1/**************************************************************************
2 *
3 * Copyright 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * on the rights to use, copy, modify, merge, publish, distribute, sub
10 * license, and/or sell copies of the Software, and to permit persons to whom
11 * the Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 *
25 **************************************************************************/
26
27#include "util/u_threaded_context.h"
28#include "util/u_cpu_detect.h"
29#include "util/format/u_format.h"
30#include "util/u_inlines.h"
31#include "util/u_memory.h"
32#include "util/u_upload_mgr.h"
33#include "driver_trace/tr_context.h"
34#include "util/log.h"
35#include "compiler/shader_info.h"
36
37#if TC_DEBUG >= 1
38#define tc_assert assert
39#else
40#define tc_assert(x)
41#endif
42
43#if TC_DEBUG >= 2
44#define tc_printf mesa_logi
45#define tc_asprintf asprintf
46#define tc_strcmp strcmp
47#else
48#define tc_printf(...)
49#define tc_asprintf(...) 0
50#define tc_strcmp(...) 0
51#endif
52
53#define TC_SENTINEL 0x5ca1ab1e
54
55enum tc_call_id {
56#define CALL(name) TC_CALL_##name,
57#include "u_threaded_context_calls.h"
58#undef CALL
59   TC_NUM_CALLS,
60};
61
62#if TC_DEBUG >= 3
63static const char *tc_call_names[] = {
64#define CALL(name) #name,
65#include "u_threaded_context_calls.h"
66#undef CALL
67};
68#endif
69
70typedef uint16_t (*tc_execute)(struct pipe_context *pipe, void *call, uint64_t *last);
71
72static const tc_execute execute_func[TC_NUM_CALLS];
73
74static void
75tc_buffer_subdata(struct pipe_context *_pipe,
76                  struct pipe_resource *resource,
77                  unsigned usage, unsigned offset,
78                  unsigned size, const void *data);
79
80static void
81tc_batch_check(UNUSED struct tc_batch *batch)
82{
83   tc_assert(batch->sentinel == TC_SENTINEL);
84   tc_assert(batch->num_total_slots <= TC_SLOTS_PER_BATCH);
85}
86
87static void
88tc_debug_check(struct threaded_context *tc)
89{
90   for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
91      tc_batch_check(&tc->batch_slots[i]);
92      tc_assert(tc->batch_slots[i].tc == tc);
93   }
94}
95
96static void
97tc_set_driver_thread(struct threaded_context *tc)
98{
99#ifndef NDEBUG
100   tc->driver_thread = util_get_thread_id();
101#endif
102}
103
104static void
105tc_clear_driver_thread(struct threaded_context *tc)
106{
107#ifndef NDEBUG
108   memset(&tc->driver_thread, 0, sizeof(tc->driver_thread));
109#endif
110}
111
112static void *
113to_call_check(void *ptr, unsigned num_slots)
114{
115#if TC_DEBUG >= 1
116   struct tc_call_base *call = ptr;
117   tc_assert(call->num_slots == num_slots);
118#endif
119   return ptr;
120}
121#define to_call(ptr, type) ((struct type *)to_call_check((void *)(ptr), call_size(type)))
122
123#define size_to_slots(size)      DIV_ROUND_UP(size, 8)
124#define call_size(type)          size_to_slots(sizeof(struct type))
125#define call_size_with_slots(type, num_slots) size_to_slots( \
126   sizeof(struct type) + sizeof(((struct type*)NULL)->slot[0]) * (num_slots))
127#define get_next_call(ptr, type) ((struct type*)((uint64_t*)ptr + call_size(type)))
128
129/* Assign src to dst while dst is uninitialized. */
130static inline void
131tc_set_resource_reference(struct pipe_resource **dst, struct pipe_resource *src)
132{
133   *dst = src;
134   pipe_reference(NULL, &src->reference); /* only increment refcount */
135}
136
137/* Assign src to dst while dst is uninitialized. */
138static inline void
139tc_set_vertex_state_reference(struct pipe_vertex_state **dst,
140                              struct pipe_vertex_state *src)
141{
142   *dst = src;
143   pipe_reference(NULL, &src->reference); /* only increment refcount */
144}
145
146/* Unreference dst but don't touch the dst pointer. */
147static inline void
148tc_drop_resource_reference(struct pipe_resource *dst)
149{
150   if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
151      pipe_resource_destroy(dst);
152}
153
154/* Unreference dst but don't touch the dst pointer. */
155static inline void
156tc_drop_surface_reference(struct pipe_surface *dst)
157{
158   if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
159      dst->context->surface_destroy(dst->context, dst);
160}
161
162/* Unreference dst but don't touch the dst pointer. */
163static inline void
164tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
165{
166   if (pipe_reference(&dst->reference, NULL)) /* only decrement refcount */
167      dst->context->stream_output_target_destroy(dst->context, dst);
168}
169
170/**
171 * Subtract the given number of references.
172 */
173static inline void
174tc_drop_vertex_state_references(struct pipe_vertex_state *dst, int num_refs)
175{
176   int count = p_atomic_add_return(&dst->reference.count, -num_refs);
177
178   assert(count >= 0);
179   /* Underflows shouldn't happen, but let's be safe. */
180   if (count <= 0)
181      dst->screen->vertex_state_destroy(dst->screen, dst);
182}
183
184/* We don't want to read or write min_index and max_index, because
185 * it shouldn't be needed by drivers at this point.
186 */
187#define DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX \
188   offsetof(struct pipe_draw_info, min_index)
189
190static void
191tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
192{
193   struct tc_batch *batch = job;
194   struct pipe_context *pipe = batch->tc->pipe;
195   uint64_t *last = &batch->slots[batch->num_total_slots];
196
197   tc_batch_check(batch);
198   tc_set_driver_thread(batch->tc);
199
200   assert(!batch->token);
201
202   for (uint64_t *iter = batch->slots; iter != last;) {
203      struct tc_call_base *call = (struct tc_call_base *)iter;
204
205      tc_assert(call->sentinel == TC_SENTINEL);
206
207#if TC_DEBUG >= 3
208      tc_printf("CALL: %s", tc_call_names[call->call_id]);
209#endif
210
211      iter += execute_func[call->call_id](pipe, call, last);
212   }
213
214   /* Add the fence to the list of fences for the driver to signal at the next
215    * flush, which we use for tracking which buffers are referenced by
216    * an unflushed command buffer.
217    */
218   struct threaded_context *tc = batch->tc;
219   struct util_queue_fence *fence =
220      &tc->buffer_lists[batch->buffer_list_index].driver_flushed_fence;
221
222   if (tc->options.driver_calls_flush_notify) {
223      tc->signal_fences_next_flush[tc->num_signal_fences_next_flush++] = fence;
224
225      /* Since our buffer lists are chained as a ring, we need to flush
226       * the context twice as we go around the ring to make the driver signal
227       * the buffer list fences, so that the producer thread can reuse the buffer
228       * list structures for the next batches without waiting.
229       */
230      unsigned half_ring = TC_MAX_BUFFER_LISTS / 2;
231      if (batch->buffer_list_index % half_ring == half_ring - 1)
232         pipe->flush(pipe, NULL, PIPE_FLUSH_ASYNC);
233   } else {
234      util_queue_fence_signal(fence);
235   }
236
237   tc_clear_driver_thread(batch->tc);
238   tc_batch_check(batch);
239   batch->num_total_slots = 0;
240}
241
242static void
243tc_begin_next_buffer_list(struct threaded_context *tc)
244{
245   tc->next_buf_list = (tc->next_buf_list + 1) % TC_MAX_BUFFER_LISTS;
246
247   tc->batch_slots[tc->next].buffer_list_index = tc->next_buf_list;
248
249   /* Clear the buffer list in the new empty batch. */
250   struct tc_buffer_list *buf_list = &tc->buffer_lists[tc->next_buf_list];
251   assert(util_queue_fence_is_signalled(&buf_list->driver_flushed_fence));
252   util_queue_fence_reset(&buf_list->driver_flushed_fence); /* set to unsignalled */
253   BITSET_ZERO(buf_list->buffer_list);
254
255   tc->add_all_gfx_bindings_to_buffer_list = true;
256   tc->add_all_compute_bindings_to_buffer_list = true;
257}
258
259static void
260tc_batch_flush(struct threaded_context *tc)
261{
262   struct tc_batch *next = &tc->batch_slots[tc->next];
263
264   tc_assert(next->num_total_slots != 0);
265   tc_batch_check(next);
266   tc_debug_check(tc);
267   tc->bytes_mapped_estimate = 0;
268   p_atomic_add(&tc->num_offloaded_slots, next->num_total_slots);
269
270   if (next->token) {
271      next->token->tc = NULL;
272      tc_unflushed_batch_token_reference(&next->token, NULL);
273   }
274
275   util_queue_add_job(&tc->queue, next, &next->fence, tc_batch_execute,
276                      NULL, 0);
277   tc->last = tc->next;
278   tc->next = (tc->next + 1) % TC_MAX_BATCHES;
279   tc_begin_next_buffer_list(tc);
280}
281
282/* This is the function that adds variable-sized calls into the current
283 * batch. It also flushes the batch if there is not enough space there.
284 * All other higher-level "add" functions use it.
285 */
286static void *
287tc_add_sized_call(struct threaded_context *tc, enum tc_call_id id,
288                  unsigned num_slots)
289{
290   struct tc_batch *next = &tc->batch_slots[tc->next];
291   assert(num_slots <= TC_SLOTS_PER_BATCH);
292   tc_debug_check(tc);
293
294   if (unlikely(next->num_total_slots + num_slots > TC_SLOTS_PER_BATCH)) {
295      tc_batch_flush(tc);
296      next = &tc->batch_slots[tc->next];
297      tc_assert(next->num_total_slots == 0);
298   }
299
300   tc_assert(util_queue_fence_is_signalled(&next->fence));
301
302   struct tc_call_base *call = (struct tc_call_base*)&next->slots[next->num_total_slots];
303   next->num_total_slots += num_slots;
304
305#if !defined(NDEBUG) && TC_DEBUG >= 1
306   call->sentinel = TC_SENTINEL;
307#endif
308   call->call_id = id;
309   call->num_slots = num_slots;
310
311#if TC_DEBUG >= 3
312   tc_printf("ENQUEUE: %s", tc_call_names[id]);
313#endif
314
315   tc_debug_check(tc);
316   return call;
317}
318
319#define tc_add_call(tc, execute, type) \
320   ((struct type*)tc_add_sized_call(tc, execute, call_size(type)))
321
322#define tc_add_slot_based_call(tc, execute, type, num_slots) \
323   ((struct type*)tc_add_sized_call(tc, execute, \
324                                    call_size_with_slots(type, num_slots)))
325
326static bool
327tc_is_sync(struct threaded_context *tc)
328{
329   struct tc_batch *last = &tc->batch_slots[tc->last];
330   struct tc_batch *next = &tc->batch_slots[tc->next];
331
332   return util_queue_fence_is_signalled(&last->fence) &&
333          !next->num_total_slots;
334}
335
336static void
337_tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char *func)
338{
339   struct tc_batch *last = &tc->batch_slots[tc->last];
340   struct tc_batch *next = &tc->batch_slots[tc->next];
341   bool synced = false;
342
343   tc_debug_check(tc);
344
345   /* Only wait for queued calls... */
346   if (!util_queue_fence_is_signalled(&last->fence)) {
347      util_queue_fence_wait(&last->fence);
348      synced = true;
349   }
350
351   tc_debug_check(tc);
352
353   if (next->token) {
354      next->token->tc = NULL;
355      tc_unflushed_batch_token_reference(&next->token, NULL);
356   }
357
358   /* .. and execute unflushed calls directly. */
359   if (next->num_total_slots) {
360      p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
361      tc->bytes_mapped_estimate = 0;
362      tc_batch_execute(next, NULL, 0);
363      tc_begin_next_buffer_list(tc);
364      synced = true;
365   }
366
367   if (synced) {
368      p_atomic_inc(&tc->num_syncs);
369
370      if (tc_strcmp(func, "tc_destroy") != 0) {
371         tc_printf("sync %s %s", func, info);
372	  }
373   }
374
375   tc_debug_check(tc);
376}
377
378#define tc_sync(tc) _tc_sync(tc, "", __func__)
379#define tc_sync_msg(tc, info) _tc_sync(tc, info, __func__)
380
381/**
382 * Call this from fence_finish for same-context fence waits of deferred fences
383 * that haven't been flushed yet.
384 *
385 * The passed pipe_context must be the one passed to pipe_screen::fence_finish,
386 * i.e., the wrapped one.
387 */
388void
389threaded_context_flush(struct pipe_context *_pipe,
390                       struct tc_unflushed_batch_token *token,
391                       bool prefer_async)
392{
393   struct threaded_context *tc = threaded_context(_pipe);
394
395   /* This is called from the gallium frontend / application thread. */
396   if (token->tc && token->tc == tc) {
397      struct tc_batch *last = &tc->batch_slots[tc->last];
398
399      /* Prefer to do the flush in the driver thread if it is already
400       * running. That should be better for cache locality.
401       */
402      if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
403         tc_batch_flush(tc);
404      else
405         tc_sync(token->tc);
406   }
407}
408
409/* Must be called before TC binds, maps, invalidates, or adds a buffer to a buffer list. */
410static void tc_touch_buffer(struct threaded_context *tc, struct threaded_resource *buf)
411{
412   const struct threaded_context *first_user = buf->first_user;
413
414   /* Fast path exit to avoid additional branches */
415   if (likely(first_user == tc))
416      return;
417
418   if (!first_user)
419      first_user = p_atomic_cmpxchg_ptr(&buf->first_user, NULL, tc);
420
421   /* The NULL check might seem unnecessary here but it's actually critical:
422    * p_atomic_cmpxchg will return NULL if it succeeds, meaning that NULL is
423    * equivalent to "we're the first user" here. (It's equally important not
424    * to ignore the result of the cmpxchg above, since it might fail.)
425    * Without the NULL check, we'd set the flag unconditionally, which is bad.
426    */
427   if (first_user && first_user != tc && !buf->used_by_multiple_contexts)
428      buf->used_by_multiple_contexts = true;
429}
430
431static bool tc_is_buffer_shared(struct threaded_resource *buf)
432{
433   return buf->is_shared || buf->used_by_multiple_contexts;
434}
435
436static void
437tc_add_to_buffer_list(struct threaded_context *tc, struct tc_buffer_list *next, struct pipe_resource *buf)
438{
439   struct threaded_resource *tbuf = threaded_resource(buf);
440   tc_touch_buffer(tc, tbuf);
441
442   uint32_t id = tbuf->buffer_id_unique;
443   BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
444}
445
446/* Set a buffer binding and add it to the buffer list. */
447static void
448tc_bind_buffer(struct threaded_context *tc, uint32_t *binding, struct tc_buffer_list *next, struct pipe_resource *buf)
449{
450   struct threaded_resource *tbuf = threaded_resource(buf);
451   tc_touch_buffer(tc, tbuf);
452
453   uint32_t id = tbuf->buffer_id_unique;
454   *binding = id;
455   BITSET_SET(next->buffer_list, id & TC_BUFFER_ID_MASK);
456}
457
458/* Reset a buffer binding. */
459static void
460tc_unbind_buffer(uint32_t *binding)
461{
462   *binding = 0;
463}
464
465/* Reset a range of buffer binding slots. */
466static void
467tc_unbind_buffers(uint32_t *binding, unsigned count)
468{
469   if (count)
470      memset(binding, 0, sizeof(*binding) * count);
471}
472
473static void
474tc_add_bindings_to_buffer_list(BITSET_WORD *buffer_list, const uint32_t *bindings,
475                               unsigned count)
476{
477   for (unsigned i = 0; i < count; i++) {
478      if (bindings[i])
479         BITSET_SET(buffer_list, bindings[i] & TC_BUFFER_ID_MASK);
480   }
481}
482
483static bool
484tc_rebind_bindings(uint32_t old_id, uint32_t new_id, uint32_t *bindings,
485                   unsigned count)
486{
487   unsigned rebind_count = 0;
488
489   for (unsigned i = 0; i < count; i++) {
490      if (bindings[i] == old_id) {
491         bindings[i] = new_id;
492         rebind_count++;
493      }
494   }
495   return rebind_count;
496}
497
498static void
499tc_add_shader_bindings_to_buffer_list(struct threaded_context *tc,
500                                      BITSET_WORD *buffer_list,
501                                      enum pipe_shader_type shader)
502{
503   tc_add_bindings_to_buffer_list(buffer_list, tc->const_buffers[shader],
504                                  tc->max_const_buffers);
505   if (tc->seen_shader_buffers[shader]) {
506      tc_add_bindings_to_buffer_list(buffer_list, tc->shader_buffers[shader],
507                                     tc->max_shader_buffers);
508   }
509   if (tc->seen_image_buffers[shader]) {
510      tc_add_bindings_to_buffer_list(buffer_list, tc->image_buffers[shader],
511                                     tc->max_images);
512   }
513   if (tc->seen_sampler_buffers[shader]) {
514      tc_add_bindings_to_buffer_list(buffer_list, tc->sampler_buffers[shader],
515                                     tc->max_samplers);
516   }
517}
518
519static unsigned
520tc_rebind_shader_bindings(struct threaded_context *tc, uint32_t old_id,
521                          uint32_t new_id, enum pipe_shader_type shader, uint32_t *rebind_mask)
522{
523   unsigned ubo = 0, ssbo = 0, img = 0, sampler = 0;
524
525   ubo = tc_rebind_bindings(old_id, new_id, tc->const_buffers[shader],
526                            tc->max_const_buffers);
527   if (ubo)
528      *rebind_mask |= BITFIELD_BIT(TC_BINDING_UBO_VS) << shader;
529   if (tc->seen_shader_buffers[shader]) {
530      ssbo = tc_rebind_bindings(old_id, new_id, tc->shader_buffers[shader],
531                                tc->max_shader_buffers);
532      if (ssbo)
533         *rebind_mask |= BITFIELD_BIT(TC_BINDING_SSBO_VS) << shader;
534   }
535   if (tc->seen_image_buffers[shader]) {
536      img = tc_rebind_bindings(old_id, new_id, tc->image_buffers[shader],
537                               tc->max_images);
538      if (img)
539         *rebind_mask |= BITFIELD_BIT(TC_BINDING_IMAGE_VS) << shader;
540   }
541   if (tc->seen_sampler_buffers[shader]) {
542      sampler = tc_rebind_bindings(old_id, new_id, tc->sampler_buffers[shader],
543                                   tc->max_samplers);
544      if (sampler)
545         *rebind_mask |= BITFIELD_BIT(TC_BINDING_SAMPLERVIEW_VS) << shader;
546   }
547   return ubo + ssbo + img + sampler;
548}
549
550/* Add all bound buffers used by VS/TCS/TES/GS/FS to the buffer list.
551 * This is called by the first draw call in a batch when we want to inherit
552 * all bindings set by the previous batch.
553 */
554static void
555tc_add_all_gfx_bindings_to_buffer_list(struct threaded_context *tc)
556{
557   BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
558
559   tc_add_bindings_to_buffer_list(buffer_list, tc->vertex_buffers, tc->max_vertex_buffers);
560   if (tc->seen_streamout_buffers)
561      tc_add_bindings_to_buffer_list(buffer_list, tc->streamout_buffers, PIPE_MAX_SO_BUFFERS);
562
563   tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_VERTEX);
564   tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_FRAGMENT);
565
566   if (tc->seen_tcs)
567      tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_CTRL);
568   if (tc->seen_tes)
569      tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_TESS_EVAL);
570   if (tc->seen_gs)
571      tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_GEOMETRY);
572
573   tc->add_all_gfx_bindings_to_buffer_list = false;
574}
575
576/* Add all bound buffers used by compute to the buffer list.
577 * This is called by the first compute call in a batch when we want to inherit
578 * all bindings set by the previous batch.
579 */
580static void
581tc_add_all_compute_bindings_to_buffer_list(struct threaded_context *tc)
582{
583   BITSET_WORD *buffer_list = tc->buffer_lists[tc->next_buf_list].buffer_list;
584
585   tc_add_shader_bindings_to_buffer_list(tc, buffer_list, PIPE_SHADER_COMPUTE);
586   tc->add_all_compute_bindings_to_buffer_list = false;
587}
588
589static unsigned
590tc_rebind_buffer(struct threaded_context *tc, uint32_t old_id, uint32_t new_id, uint32_t *rebind_mask)
591{
592   unsigned vbo = 0, so = 0;
593
594   vbo = tc_rebind_bindings(old_id, new_id, tc->vertex_buffers,
595                            tc->max_vertex_buffers);
596   if (vbo)
597      *rebind_mask |= BITFIELD_BIT(TC_BINDING_VERTEX_BUFFER);
598
599   if (tc->seen_streamout_buffers) {
600      so = tc_rebind_bindings(old_id, new_id, tc->streamout_buffers,
601                              PIPE_MAX_SO_BUFFERS);
602      if (so)
603         *rebind_mask |= BITFIELD_BIT(TC_BINDING_STREAMOUT_BUFFER);
604   }
605   unsigned rebound = vbo + so;
606
607   rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_VERTEX, rebind_mask);
608   rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_FRAGMENT, rebind_mask);
609
610   if (tc->seen_tcs)
611      rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_CTRL, rebind_mask);
612   if (tc->seen_tes)
613      rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_TESS_EVAL, rebind_mask);
614   if (tc->seen_gs)
615      rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_GEOMETRY, rebind_mask);
616
617   rebound += tc_rebind_shader_bindings(tc, old_id, new_id, PIPE_SHADER_COMPUTE, rebind_mask);
618
619   if (rebound)
620      BITSET_SET(tc->buffer_lists[tc->next_buf_list].buffer_list, new_id & TC_BUFFER_ID_MASK);
621   return rebound;
622}
623
624static bool
625tc_is_buffer_bound_with_mask(uint32_t id, uint32_t *bindings, unsigned binding_mask)
626{
627   while (binding_mask) {
628      if (bindings[u_bit_scan(&binding_mask)] == id)
629         return true;
630   }
631   return false;
632}
633
634static bool
635tc_is_buffer_shader_bound_for_write(struct threaded_context *tc, uint32_t id,
636                                    enum pipe_shader_type shader)
637{
638   if (tc->seen_shader_buffers[shader] &&
639       tc_is_buffer_bound_with_mask(id, tc->shader_buffers[shader],
640                                    tc->shader_buffers_writeable_mask[shader]))
641      return true;
642
643   if (tc->seen_image_buffers[shader] &&
644       tc_is_buffer_bound_with_mask(id, tc->image_buffers[shader],
645                                    tc->image_buffers_writeable_mask[shader]))
646      return true;
647
648   return false;
649}
650
651static bool
652tc_is_buffer_bound_for_write(struct threaded_context *tc, uint32_t id)
653{
654   if (tc->seen_streamout_buffers &&
655       tc_is_buffer_bound_with_mask(id, tc->streamout_buffers,
656                                    BITFIELD_MASK(PIPE_MAX_SO_BUFFERS)))
657      return true;
658
659   if (tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_VERTEX) ||
660       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_FRAGMENT) ||
661       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_COMPUTE))
662      return true;
663
664   if (tc->seen_tcs &&
665       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_CTRL))
666      return true;
667
668   if (tc->seen_tes &&
669       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_TESS_EVAL))
670      return true;
671
672   if (tc->seen_gs &&
673       tc_is_buffer_shader_bound_for_write(tc, id, PIPE_SHADER_GEOMETRY))
674      return true;
675
676   return false;
677}
678
679static bool
680tc_is_buffer_busy(struct threaded_context *tc, struct threaded_resource *tbuf,
681                  unsigned map_usage)
682{
683   if (!tc->options.is_resource_busy)
684      return true;
685
686   uint32_t id_hash = tbuf->buffer_id_unique & TC_BUFFER_ID_MASK;
687
688   for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
689      struct tc_buffer_list *buf_list = &tc->buffer_lists[i];
690
691      /* If the buffer is referenced by a batch that hasn't been flushed (by tc or the driver),
692       * then the buffer is considered busy. */
693      if (!util_queue_fence_is_signalled(&buf_list->driver_flushed_fence) &&
694          BITSET_TEST(buf_list->buffer_list, id_hash))
695         return true;
696   }
697
698   /* The buffer isn't referenced by any unflushed batch: we can safely ask to the driver whether
699    * this buffer is busy or not. */
700   return tc->options.is_resource_busy(tc->pipe->screen, tbuf->latest, map_usage);
701}
702
703/**
704 * allow_cpu_storage should be false for user memory and imported buffers.
705 */
706void
707threaded_resource_init(struct pipe_resource *res, bool allow_cpu_storage)
708{
709   struct threaded_resource *tres = threaded_resource(res);
710
711   tres->first_user = NULL;
712   tres->used_by_multiple_contexts = false;
713   tres->latest = &tres->b;
714   tres->cpu_storage = NULL;
715   util_range_init(&tres->valid_buffer_range);
716   tres->is_shared = false;
717   tres->is_user_ptr = false;
718   tres->buffer_id_unique = 0;
719   tres->pending_staging_uploads = 0;
720   util_range_init(&tres->pending_staging_uploads_range);
721
722   if (allow_cpu_storage &&
723       !(res->flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
724                       PIPE_RESOURCE_FLAG_SPARSE |
725                       PIPE_RESOURCE_FLAG_ENCRYPTED)) &&
726       /* We need buffer invalidation and buffer busyness tracking for the CPU
727        * storage, which aren't supported with pipe_vertex_state. */
728       !(res->bind & PIPE_BIND_VERTEX_STATE))
729      tres->allow_cpu_storage = true;
730   else
731      tres->allow_cpu_storage = false;
732}
733
734void
735threaded_resource_deinit(struct pipe_resource *res)
736{
737   struct threaded_resource *tres = threaded_resource(res);
738
739   if (tres->latest != &tres->b)
740           pipe_resource_reference(&tres->latest, NULL);
741   util_range_destroy(&tres->valid_buffer_range);
742   util_range_destroy(&tres->pending_staging_uploads_range);
743   align_free(tres->cpu_storage);
744}
745
746struct pipe_context *
747threaded_context_unwrap_sync(struct pipe_context *pipe)
748{
749   if (!pipe || !pipe->priv)
750      return pipe;
751
752   tc_sync(threaded_context(pipe));
753   return (struct pipe_context*)pipe->priv;
754}
755
756
757/********************************************************************
758 * simple functions
759 */
760
761#define TC_FUNC1(func, qualifier, type, deref, addr, ...) \
762   struct tc_call_##func { \
763      struct tc_call_base base; \
764      type state; \
765   }; \
766   \
767   static uint16_t \
768   tc_call_##func(struct pipe_context *pipe, void *call, uint64_t *last) \
769   { \
770      pipe->func(pipe, addr(to_call(call, tc_call_##func)->state)); \
771      return call_size(tc_call_##func); \
772   } \
773   \
774   static void \
775   tc_##func(struct pipe_context *_pipe, qualifier type deref param) \
776   { \
777      struct threaded_context *tc = threaded_context(_pipe); \
778      struct tc_call_##func *p = (struct tc_call_##func*) \
779                     tc_add_call(tc, TC_CALL_##func, tc_call_##func); \
780      p->state = deref(param); \
781      __VA_ARGS__; \
782   }
783
784TC_FUNC1(set_active_query_state, , bool, , )
785
786TC_FUNC1(set_blend_color, const, struct pipe_blend_color, *, &)
787TC_FUNC1(set_stencil_ref, const, struct pipe_stencil_ref, , )
788TC_FUNC1(set_clip_state, const, struct pipe_clip_state, *, &)
789TC_FUNC1(set_sample_mask, , unsigned, , )
790TC_FUNC1(set_min_samples, , unsigned, , )
791TC_FUNC1(set_polygon_stipple, const, struct pipe_poly_stipple, *, &)
792
793TC_FUNC1(texture_barrier, , unsigned, , )
794TC_FUNC1(memory_barrier, , unsigned, , )
795TC_FUNC1(delete_texture_handle, , uint64_t, , )
796TC_FUNC1(delete_image_handle, , uint64_t, , )
797TC_FUNC1(set_frontend_noop, , bool, , )
798
799
800/********************************************************************
801 * queries
802 */
803
804static struct pipe_query *
805tc_create_query(struct pipe_context *_pipe, unsigned query_type,
806                unsigned index)
807{
808   struct threaded_context *tc = threaded_context(_pipe);
809   struct pipe_context *pipe = tc->pipe;
810
811   return pipe->create_query(pipe, query_type, index);
812}
813
814static struct pipe_query *
815tc_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
816                      unsigned *query_types)
817{
818   struct threaded_context *tc = threaded_context(_pipe);
819   struct pipe_context *pipe = tc->pipe;
820
821   return pipe->create_batch_query(pipe, num_queries, query_types);
822}
823
824struct tc_query_call {
825   struct tc_call_base base;
826   struct pipe_query *query;
827};
828
829static uint16_t
830tc_call_destroy_query(struct pipe_context *pipe, void *call, uint64_t *last)
831{
832   struct pipe_query *query = to_call(call, tc_query_call)->query;
833   struct threaded_query *tq = threaded_query(query);
834
835   if (list_is_linked(&tq->head_unflushed))
836      list_del(&tq->head_unflushed);
837
838   pipe->destroy_query(pipe, query);
839   return call_size(tc_query_call);
840}
841
842static void
843tc_destroy_query(struct pipe_context *_pipe, struct pipe_query *query)
844{
845   struct threaded_context *tc = threaded_context(_pipe);
846
847   tc_add_call(tc, TC_CALL_destroy_query, tc_query_call)->query = query;
848}
849
850static uint16_t
851tc_call_begin_query(struct pipe_context *pipe, void *call, uint64_t *last)
852{
853   pipe->begin_query(pipe, to_call(call, tc_query_call)->query);
854   return call_size(tc_query_call);
855}
856
857static bool
858tc_begin_query(struct pipe_context *_pipe, struct pipe_query *query)
859{
860   struct threaded_context *tc = threaded_context(_pipe);
861
862   tc_add_call(tc, TC_CALL_begin_query, tc_query_call)->query = query;
863   return true; /* we don't care about the return value for this call */
864}
865
866struct tc_end_query_call {
867   struct tc_call_base base;
868   struct threaded_context *tc;
869   struct pipe_query *query;
870};
871
872static uint16_t
873tc_call_end_query(struct pipe_context *pipe, void *call, uint64_t *last)
874{
875   struct tc_end_query_call *p = to_call(call, tc_end_query_call);
876   struct threaded_query *tq = threaded_query(p->query);
877
878   if (!list_is_linked(&tq->head_unflushed))
879      list_add(&tq->head_unflushed, &p->tc->unflushed_queries);
880
881   pipe->end_query(pipe, p->query);
882   return call_size(tc_end_query_call);
883}
884
885static bool
886tc_end_query(struct pipe_context *_pipe, struct pipe_query *query)
887{
888   struct threaded_context *tc = threaded_context(_pipe);
889   struct threaded_query *tq = threaded_query(query);
890   struct tc_end_query_call *call =
891      tc_add_call(tc, TC_CALL_end_query, tc_end_query_call);
892
893   call->tc = tc;
894   call->query = query;
895
896   tq->flushed = false;
897
898   return true; /* we don't care about the return value for this call */
899}
900
901static bool
902tc_get_query_result(struct pipe_context *_pipe,
903                    struct pipe_query *query, bool wait,
904                    union pipe_query_result *result)
905{
906   struct threaded_context *tc = threaded_context(_pipe);
907   struct threaded_query *tq = threaded_query(query);
908   struct pipe_context *pipe = tc->pipe;
909   bool flushed = tq->flushed;
910
911   if (!flushed) {
912      tc_sync_msg(tc, wait ? "wait" : "nowait");
913      tc_set_driver_thread(tc);
914   }
915
916   bool success = pipe->get_query_result(pipe, query, wait, result);
917
918   if (!flushed)
919      tc_clear_driver_thread(tc);
920
921   if (success) {
922      tq->flushed = true;
923      if (list_is_linked(&tq->head_unflushed)) {
924         /* This is safe because it can only happen after we sync'd. */
925         list_del(&tq->head_unflushed);
926      }
927   }
928   return success;
929}
930
931struct tc_query_result_resource {
932   struct tc_call_base base;
933   enum pipe_query_flags flags:8;
934   enum pipe_query_value_type result_type:8;
935   int8_t index; /* it can be -1 */
936   unsigned offset;
937   struct pipe_query *query;
938   struct pipe_resource *resource;
939};
940
941static uint16_t
942tc_call_get_query_result_resource(struct pipe_context *pipe, void *call, uint64_t *last)
943{
944   struct tc_query_result_resource *p = to_call(call, tc_query_result_resource);
945
946   pipe->get_query_result_resource(pipe, p->query, p->flags, p->result_type,
947                                   p->index, p->resource, p->offset);
948   tc_drop_resource_reference(p->resource);
949   return call_size(tc_query_result_resource);
950}
951
952static void
953tc_get_query_result_resource(struct pipe_context *_pipe,
954                             struct pipe_query *query,
955                             enum pipe_query_flags flags,
956                             enum pipe_query_value_type result_type, int index,
957                             struct pipe_resource *resource, unsigned offset)
958{
959   struct threaded_context *tc = threaded_context(_pipe);
960
961   tc_buffer_disable_cpu_storage(resource);
962
963   struct tc_query_result_resource *p =
964      tc_add_call(tc, TC_CALL_get_query_result_resource,
965                  tc_query_result_resource);
966   p->query = query;
967   p->flags = flags;
968   p->result_type = result_type;
969   p->index = index;
970   tc_set_resource_reference(&p->resource, resource);
971   tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], resource);
972   p->offset = offset;
973}
974
975struct tc_render_condition {
976   struct tc_call_base base;
977   bool condition;
978   unsigned mode;
979   struct pipe_query *query;
980};
981
982static uint16_t
983tc_call_render_condition(struct pipe_context *pipe, void *call, uint64_t *last)
984{
985   struct tc_render_condition *p = to_call(call, tc_render_condition);
986   pipe->render_condition(pipe, p->query, p->condition, p->mode);
987   return call_size(tc_render_condition);
988}
989
990static void
991tc_render_condition(struct pipe_context *_pipe,
992                    struct pipe_query *query, bool condition,
993                    enum pipe_render_cond_flag mode)
994{
995   struct threaded_context *tc = threaded_context(_pipe);
996   struct tc_render_condition *p =
997      tc_add_call(tc, TC_CALL_render_condition, tc_render_condition);
998
999   p->query = query;
1000   p->condition = condition;
1001   p->mode = mode;
1002}
1003
1004
1005/********************************************************************
1006 * constant (immutable) states
1007 */
1008
1009#define TC_CSO_CREATE(name, sname) \
1010   static void * \
1011   tc_create_##name##_state(struct pipe_context *_pipe, \
1012                            const struct pipe_##sname##_state *state) \
1013   { \
1014      struct pipe_context *pipe = threaded_context(_pipe)->pipe; \
1015      return pipe->create_##name##_state(pipe, state); \
1016   }
1017
1018#define TC_CSO_BIND(name, ...) TC_FUNC1(bind_##name##_state, , void *, , , ##__VA_ARGS__)
1019#define TC_CSO_DELETE(name) TC_FUNC1(delete_##name##_state, , void *, , )
1020
1021#define TC_CSO(name, sname, ...) \
1022   TC_CSO_CREATE(name, sname) \
1023   TC_CSO_BIND(name, ##__VA_ARGS__) \
1024   TC_CSO_DELETE(name)
1025
1026#define TC_CSO_WHOLE(name) TC_CSO(name, name)
1027#define TC_CSO_SHADER(name) TC_CSO(name, shader)
1028#define TC_CSO_SHADER_TRACK(name) TC_CSO(name, shader, tc->seen_##name = true;)
1029
1030TC_CSO_WHOLE(blend)
1031TC_CSO_WHOLE(rasterizer)
1032TC_CSO_WHOLE(depth_stencil_alpha)
1033TC_CSO_WHOLE(compute)
1034TC_CSO_SHADER(fs)
1035TC_CSO_SHADER(vs)
1036TC_CSO_SHADER_TRACK(gs)
1037TC_CSO_SHADER_TRACK(tcs)
1038TC_CSO_SHADER_TRACK(tes)
1039TC_CSO_CREATE(sampler, sampler)
1040TC_CSO_DELETE(sampler)
1041TC_CSO_BIND(vertex_elements)
1042TC_CSO_DELETE(vertex_elements)
1043
1044static void *
1045tc_create_vertex_elements_state(struct pipe_context *_pipe, unsigned count,
1046                                const struct pipe_vertex_element *elems)
1047{
1048   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1049
1050   return pipe->create_vertex_elements_state(pipe, count, elems);
1051}
1052
1053struct tc_sampler_states {
1054   struct tc_call_base base;
1055   ubyte shader, start, count;
1056   void *slot[0]; /* more will be allocated if needed */
1057};
1058
1059static uint16_t
1060tc_call_bind_sampler_states(struct pipe_context *pipe, void *call, uint64_t *last)
1061{
1062   struct tc_sampler_states *p = (struct tc_sampler_states *)call;
1063
1064   pipe->bind_sampler_states(pipe, p->shader, p->start, p->count, p->slot);
1065   return p->base.num_slots;
1066}
1067
1068static void
1069tc_bind_sampler_states(struct pipe_context *_pipe,
1070                       enum pipe_shader_type shader,
1071                       unsigned start, unsigned count, void **states)
1072{
1073   if (!count)
1074      return;
1075
1076   struct threaded_context *tc = threaded_context(_pipe);
1077   struct tc_sampler_states *p =
1078      tc_add_slot_based_call(tc, TC_CALL_bind_sampler_states, tc_sampler_states, count);
1079
1080   p->shader = shader;
1081   p->start = start;
1082   p->count = count;
1083   memcpy(p->slot, states, count * sizeof(states[0]));
1084}
1085
1086static void
1087tc_link_shader(struct pipe_context *_pipe, void **shaders)
1088{
1089   struct threaded_context *tc = threaded_context(_pipe);
1090   tc->pipe->link_shader(tc->pipe, shaders);
1091}
1092/********************************************************************
1093 * immediate states
1094 */
1095
1096struct tc_framebuffer {
1097   struct tc_call_base base;
1098   struct pipe_framebuffer_state state;
1099};
1100
1101static uint16_t
1102tc_call_set_framebuffer_state(struct pipe_context *pipe, void *call, uint64_t *last)
1103{
1104   struct pipe_framebuffer_state *p = &to_call(call, tc_framebuffer)->state;
1105
1106   pipe->set_framebuffer_state(pipe, p);
1107
1108   unsigned nr_cbufs = p->nr_cbufs;
1109   for (unsigned i = 0; i < nr_cbufs; i++)
1110      tc_drop_surface_reference(p->cbufs[i]);
1111   tc_drop_surface_reference(p->zsbuf);
1112   return call_size(tc_framebuffer);
1113}
1114
1115static void
1116tc_set_framebuffer_state(struct pipe_context *_pipe,
1117                         const struct pipe_framebuffer_state *fb)
1118{
1119   struct threaded_context *tc = threaded_context(_pipe);
1120   struct tc_framebuffer *p =
1121      tc_add_call(tc, TC_CALL_set_framebuffer_state, tc_framebuffer);
1122   unsigned nr_cbufs = fb->nr_cbufs;
1123
1124   p->state.width = fb->width;
1125   p->state.height = fb->height;
1126   p->state.samples = fb->samples;
1127   p->state.layers = fb->layers;
1128   p->state.nr_cbufs = nr_cbufs;
1129
1130   for (unsigned i = 0; i < nr_cbufs; i++) {
1131      p->state.cbufs[i] = NULL;
1132      pipe_surface_reference(&p->state.cbufs[i], fb->cbufs[i]);
1133   }
1134   p->state.zsbuf = NULL;
1135   pipe_surface_reference(&p->state.zsbuf, fb->zsbuf);
1136}
1137
1138struct tc_tess_state {
1139   struct tc_call_base base;
1140   float state[6];
1141};
1142
1143static uint16_t
1144tc_call_set_tess_state(struct pipe_context *pipe, void *call, uint64_t *last)
1145{
1146   float *p = to_call(call, tc_tess_state)->state;
1147
1148   pipe->set_tess_state(pipe, p, p + 4);
1149   return call_size(tc_tess_state);
1150}
1151
1152static void
1153tc_set_tess_state(struct pipe_context *_pipe,
1154                  const float default_outer_level[4],
1155                  const float default_inner_level[2])
1156{
1157   struct threaded_context *tc = threaded_context(_pipe);
1158   float *p = tc_add_call(tc, TC_CALL_set_tess_state, tc_tess_state)->state;
1159
1160   memcpy(p, default_outer_level, 4 * sizeof(float));
1161   memcpy(p + 4, default_inner_level, 2 * sizeof(float));
1162}
1163
1164struct tc_patch_vertices {
1165   struct tc_call_base base;
1166   ubyte patch_vertices;
1167};
1168
1169static uint16_t
1170tc_call_set_patch_vertices(struct pipe_context *pipe, void *call, uint64_t *last)
1171{
1172   uint8_t patch_vertices = to_call(call, tc_patch_vertices)->patch_vertices;
1173
1174   pipe->set_patch_vertices(pipe, patch_vertices);
1175   return call_size(tc_patch_vertices);
1176}
1177
1178static void
1179tc_set_patch_vertices(struct pipe_context *_pipe, uint8_t patch_vertices)
1180{
1181   struct threaded_context *tc = threaded_context(_pipe);
1182
1183   tc_add_call(tc, TC_CALL_set_patch_vertices,
1184               tc_patch_vertices)->patch_vertices = patch_vertices;
1185}
1186
1187struct tc_constant_buffer_base {
1188   struct tc_call_base base;
1189   ubyte shader, index;
1190   bool is_null;
1191};
1192
1193struct tc_constant_buffer {
1194   struct tc_constant_buffer_base base;
1195   struct pipe_constant_buffer cb;
1196};
1197
1198static uint16_t
1199tc_call_set_constant_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
1200{
1201   struct tc_constant_buffer *p = (struct tc_constant_buffer *)call;
1202
1203   if (unlikely(p->base.is_null)) {
1204      pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, false, NULL);
1205      return call_size(tc_constant_buffer_base);
1206   }
1207
1208   pipe->set_constant_buffer(pipe, p->base.shader, p->base.index, true, &p->cb);
1209   return call_size(tc_constant_buffer);
1210}
1211
1212static void
1213tc_set_constant_buffer(struct pipe_context *_pipe,
1214                       enum pipe_shader_type shader, uint index,
1215                       bool take_ownership,
1216                       const struct pipe_constant_buffer *cb)
1217{
1218   struct threaded_context *tc = threaded_context(_pipe);
1219
1220   if (unlikely(!cb || (!cb->buffer && !cb->user_buffer))) {
1221      struct tc_constant_buffer_base *p =
1222         tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer_base);
1223      p->shader = shader;
1224      p->index = index;
1225      p->is_null = true;
1226      tc_unbind_buffer(&tc->const_buffers[shader][index]);
1227      return;
1228   }
1229
1230   struct pipe_resource *buffer;
1231   unsigned offset;
1232
1233   if (cb->user_buffer) {
1234      /* This must be done before adding set_constant_buffer, because it could
1235       * generate e.g. transfer_unmap and flush partially-uninitialized
1236       * set_constant_buffer to the driver if it was done afterwards.
1237       */
1238      buffer = NULL;
1239      u_upload_data(tc->base.const_uploader, 0, cb->buffer_size,
1240                    tc->ubo_alignment, cb->user_buffer, &offset, &buffer);
1241      u_upload_unmap(tc->base.const_uploader);
1242      take_ownership = true;
1243   } else {
1244      buffer = cb->buffer;
1245      offset = cb->buffer_offset;
1246   }
1247
1248   struct tc_constant_buffer *p =
1249      tc_add_call(tc, TC_CALL_set_constant_buffer, tc_constant_buffer);
1250   p->base.shader = shader;
1251   p->base.index = index;
1252   p->base.is_null = false;
1253   p->cb.user_buffer = NULL;
1254   p->cb.buffer_offset = offset;
1255   p->cb.buffer_size = cb->buffer_size;
1256
1257   if (take_ownership)
1258      p->cb.buffer = buffer;
1259   else
1260      tc_set_resource_reference(&p->cb.buffer, buffer);
1261
1262   if (buffer) {
1263      tc_bind_buffer(tc, &tc->const_buffers[shader][index],
1264                     &tc->buffer_lists[tc->next_buf_list], buffer);
1265   } else {
1266      tc_unbind_buffer(&tc->const_buffers[shader][index]);
1267   }
1268}
1269
1270struct tc_inlinable_constants {
1271   struct tc_call_base base;
1272   ubyte shader;
1273   ubyte num_values;
1274   uint32_t values[MAX_INLINABLE_UNIFORMS];
1275};
1276
1277static uint16_t
1278tc_call_set_inlinable_constants(struct pipe_context *pipe, void *call, uint64_t *last)
1279{
1280   struct tc_inlinable_constants *p = to_call(call, tc_inlinable_constants);
1281
1282   pipe->set_inlinable_constants(pipe, p->shader, p->num_values, p->values);
1283   return call_size(tc_inlinable_constants);
1284}
1285
1286static void
1287tc_set_inlinable_constants(struct pipe_context *_pipe,
1288                           enum pipe_shader_type shader,
1289                           uint num_values, uint32_t *values)
1290{
1291   struct threaded_context *tc = threaded_context(_pipe);
1292   struct tc_inlinable_constants *p =
1293      tc_add_call(tc, TC_CALL_set_inlinable_constants, tc_inlinable_constants);
1294   p->shader = shader;
1295   p->num_values = num_values;
1296   memcpy(p->values, values, num_values * 4);
1297}
1298
1299struct tc_sample_locations {
1300   struct tc_call_base base;
1301   uint16_t size;
1302   uint8_t slot[0];
1303};
1304
1305
1306static uint16_t
1307tc_call_set_sample_locations(struct pipe_context *pipe, void *call, uint64_t *last)
1308{
1309   struct tc_sample_locations *p = (struct tc_sample_locations *)call;
1310
1311   pipe->set_sample_locations(pipe, p->size, p->slot);
1312   return p->base.num_slots;
1313}
1314
1315static void
1316tc_set_sample_locations(struct pipe_context *_pipe, size_t size, const uint8_t *locations)
1317{
1318   struct threaded_context *tc = threaded_context(_pipe);
1319   struct tc_sample_locations *p =
1320      tc_add_slot_based_call(tc, TC_CALL_set_sample_locations,
1321                             tc_sample_locations, size);
1322
1323   p->size = size;
1324   memcpy(p->slot, locations, size);
1325}
1326
1327struct tc_scissors {
1328   struct tc_call_base base;
1329   ubyte start, count;
1330   struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1331};
1332
1333static uint16_t
1334tc_call_set_scissor_states(struct pipe_context *pipe, void *call, uint64_t *last)
1335{
1336   struct tc_scissors *p = (struct tc_scissors *)call;
1337
1338   pipe->set_scissor_states(pipe, p->start, p->count, p->slot);
1339   return p->base.num_slots;
1340}
1341
1342static void
1343tc_set_scissor_states(struct pipe_context *_pipe,
1344                      unsigned start, unsigned count,
1345                      const struct pipe_scissor_state *states)
1346{
1347   struct threaded_context *tc = threaded_context(_pipe);
1348   struct tc_scissors *p =
1349      tc_add_slot_based_call(tc, TC_CALL_set_scissor_states, tc_scissors, count);
1350
1351   p->start = start;
1352   p->count = count;
1353   memcpy(&p->slot, states, count * sizeof(states[0]));
1354}
1355
1356struct tc_viewports {
1357   struct tc_call_base base;
1358   ubyte start, count;
1359   struct pipe_viewport_state slot[0]; /* more will be allocated if needed */
1360};
1361
1362static uint16_t
1363tc_call_set_viewport_states(struct pipe_context *pipe, void *call, uint64_t *last)
1364{
1365   struct tc_viewports *p = (struct tc_viewports *)call;
1366
1367   pipe->set_viewport_states(pipe, p->start, p->count, p->slot);
1368   return p->base.num_slots;
1369}
1370
1371static void
1372tc_set_viewport_states(struct pipe_context *_pipe,
1373                       unsigned start, unsigned count,
1374                       const struct pipe_viewport_state *states)
1375{
1376   if (!count)
1377      return;
1378
1379   struct threaded_context *tc = threaded_context(_pipe);
1380   struct tc_viewports *p =
1381      tc_add_slot_based_call(tc, TC_CALL_set_viewport_states, tc_viewports, count);
1382
1383   p->start = start;
1384   p->count = count;
1385   memcpy(&p->slot, states, count * sizeof(states[0]));
1386}
1387
1388struct tc_window_rects {
1389   struct tc_call_base base;
1390   bool include;
1391   ubyte count;
1392   struct pipe_scissor_state slot[0]; /* more will be allocated if needed */
1393};
1394
1395static uint16_t
1396tc_call_set_window_rectangles(struct pipe_context *pipe, void *call, uint64_t *last)
1397{
1398   struct tc_window_rects *p = (struct tc_window_rects *)call;
1399
1400   pipe->set_window_rectangles(pipe, p->include, p->count, p->slot);
1401   return p->base.num_slots;
1402}
1403
1404static void
1405tc_set_window_rectangles(struct pipe_context *_pipe, bool include,
1406                         unsigned count,
1407                         const struct pipe_scissor_state *rects)
1408{
1409   struct threaded_context *tc = threaded_context(_pipe);
1410   struct tc_window_rects *p =
1411      tc_add_slot_based_call(tc, TC_CALL_set_window_rectangles, tc_window_rects, count);
1412
1413   p->include = include;
1414   p->count = count;
1415   memcpy(p->slot, rects, count * sizeof(rects[0]));
1416}
1417
1418struct tc_sampler_views {
1419   struct tc_call_base base;
1420   ubyte shader, start, count, unbind_num_trailing_slots;
1421   struct pipe_sampler_view *slot[0]; /* more will be allocated if needed */
1422};
1423
1424static uint16_t
1425tc_call_set_sampler_views(struct pipe_context *pipe, void *call, uint64_t *last)
1426{
1427   struct tc_sampler_views *p = (struct tc_sampler_views *)call;
1428
1429   pipe->set_sampler_views(pipe, p->shader, p->start, p->count,
1430                           p->unbind_num_trailing_slots, true, p->slot);
1431   return p->base.num_slots;
1432}
1433
1434static void
1435tc_set_sampler_views(struct pipe_context *_pipe,
1436                     enum pipe_shader_type shader,
1437                     unsigned start, unsigned count,
1438                     unsigned unbind_num_trailing_slots, bool take_ownership,
1439                     struct pipe_sampler_view **views)
1440{
1441   if (!count && !unbind_num_trailing_slots)
1442      return;
1443
1444   struct threaded_context *tc = threaded_context(_pipe);
1445   struct tc_sampler_views *p =
1446      tc_add_slot_based_call(tc, TC_CALL_set_sampler_views, tc_sampler_views,
1447                             views ? count : 0);
1448
1449   p->shader = shader;
1450   p->start = start;
1451
1452   if (views) {
1453      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1454
1455      p->count = count;
1456      p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1457
1458      if (take_ownership) {
1459         memcpy(p->slot, views, sizeof(*views) * count);
1460
1461         for (unsigned i = 0; i < count; i++) {
1462            if (views[i] && views[i]->target == PIPE_BUFFER) {
1463               tc_bind_buffer(tc, &tc->sampler_buffers[shader][start + i], next,
1464                              views[i]->texture);
1465            } else {
1466               tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1467            }
1468         }
1469      } else {
1470         for (unsigned i = 0; i < count; i++) {
1471            p->slot[i] = NULL;
1472            pipe_sampler_view_reference(&p->slot[i], views[i]);
1473
1474            if (views[i] && views[i]->target == PIPE_BUFFER) {
1475               tc_bind_buffer(tc, &tc->sampler_buffers[shader][start + i], next,
1476                              views[i]->texture);
1477            } else {
1478               tc_unbind_buffer(&tc->sampler_buffers[shader][start + i]);
1479            }
1480         }
1481      }
1482
1483      tc_unbind_buffers(&tc->sampler_buffers[shader][start + count],
1484                        unbind_num_trailing_slots);
1485      tc->seen_sampler_buffers[shader] = true;
1486   } else {
1487      p->count = 0;
1488      p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1489
1490      tc_unbind_buffers(&tc->sampler_buffers[shader][start],
1491                        count + unbind_num_trailing_slots);
1492   }
1493}
1494
1495struct tc_shader_images {
1496   struct tc_call_base base;
1497   ubyte shader, start, count;
1498   ubyte unbind_num_trailing_slots;
1499   struct pipe_image_view slot[0]; /* more will be allocated if needed */
1500};
1501
1502static uint16_t
1503tc_call_set_shader_images(struct pipe_context *pipe, void *call, uint64_t *last)
1504{
1505   struct tc_shader_images *p = (struct tc_shader_images *)call;
1506   unsigned count = p->count;
1507
1508   if (!p->count) {
1509      pipe->set_shader_images(pipe, p->shader, p->start, 0,
1510                              p->unbind_num_trailing_slots, NULL);
1511      return call_size(tc_shader_images);
1512   }
1513
1514   pipe->set_shader_images(pipe, p->shader, p->start, p->count,
1515                           p->unbind_num_trailing_slots, p->slot);
1516
1517   for (unsigned i = 0; i < count; i++)
1518      tc_drop_resource_reference(p->slot[i].resource);
1519
1520   return p->base.num_slots;
1521}
1522
1523static void
1524tc_set_shader_images(struct pipe_context *_pipe,
1525                     enum pipe_shader_type shader,
1526                     unsigned start, unsigned count,
1527                     unsigned unbind_num_trailing_slots,
1528                     const struct pipe_image_view *images)
1529{
1530   if (!count && !unbind_num_trailing_slots)
1531      return;
1532
1533   struct threaded_context *tc = threaded_context(_pipe);
1534   struct tc_shader_images *p =
1535      tc_add_slot_based_call(tc, TC_CALL_set_shader_images, tc_shader_images,
1536                             images ? count : 0);
1537   unsigned writable_buffers = 0;
1538
1539   p->shader = shader;
1540   p->start = start;
1541
1542   if (images) {
1543      p->count = count;
1544      p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1545
1546      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1547
1548      for (unsigned i = 0; i < count; i++) {
1549         struct pipe_resource *resource = images[i].resource;
1550
1551         tc_set_resource_reference(&p->slot[i].resource, resource);
1552
1553         if (resource && resource->target == PIPE_BUFFER) {
1554            tc_bind_buffer(tc, &tc->image_buffers[shader][start + i], next, resource);
1555
1556            if (images[i].access & PIPE_IMAGE_ACCESS_WRITE) {
1557               struct threaded_resource *tres = threaded_resource(resource);
1558
1559               tc_buffer_disable_cpu_storage(resource);
1560               util_range_add(&tres->b, &tres->valid_buffer_range,
1561                              images[i].u.buf.offset,
1562                              images[i].u.buf.offset + images[i].u.buf.size);
1563               writable_buffers |= BITFIELD_BIT(start + i);
1564            }
1565         } else {
1566            tc_unbind_buffer(&tc->image_buffers[shader][start + i]);
1567         }
1568      }
1569      memcpy(p->slot, images, count * sizeof(images[0]));
1570
1571      tc_unbind_buffers(&tc->image_buffers[shader][start + count],
1572                        unbind_num_trailing_slots);
1573      tc->seen_image_buffers[shader] = true;
1574   } else {
1575      p->count = 0;
1576      p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1577
1578      tc_unbind_buffers(&tc->image_buffers[shader][start],
1579                        count + unbind_num_trailing_slots);
1580   }
1581
1582   tc->image_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1583   tc->image_buffers_writeable_mask[shader] |= writable_buffers;
1584}
1585
1586struct tc_shader_buffers {
1587   struct tc_call_base base;
1588   ubyte shader, start, count;
1589   bool unbind;
1590   unsigned writable_bitmask;
1591   struct pipe_shader_buffer slot[0]; /* more will be allocated if needed */
1592};
1593
1594static uint16_t
1595tc_call_set_shader_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1596{
1597   struct tc_shader_buffers *p = (struct tc_shader_buffers *)call;
1598   unsigned count = p->count;
1599
1600   if (p->unbind) {
1601      pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, NULL, 0);
1602      return call_size(tc_shader_buffers);
1603   }
1604
1605   pipe->set_shader_buffers(pipe, p->shader, p->start, p->count, p->slot,
1606                            p->writable_bitmask);
1607
1608   for (unsigned i = 0; i < count; i++)
1609      tc_drop_resource_reference(p->slot[i].buffer);
1610
1611   return p->base.num_slots;
1612}
1613
1614static void
1615tc_set_shader_buffers(struct pipe_context *_pipe,
1616                      enum pipe_shader_type shader,
1617                      unsigned start, unsigned count,
1618                      const struct pipe_shader_buffer *buffers,
1619                      unsigned writable_bitmask)
1620{
1621   if (!count)
1622      return;
1623
1624   struct threaded_context *tc = threaded_context(_pipe);
1625   struct tc_shader_buffers *p =
1626      tc_add_slot_based_call(tc, TC_CALL_set_shader_buffers, tc_shader_buffers,
1627                             buffers ? count : 0);
1628
1629   p->shader = shader;
1630   p->start = start;
1631   p->count = count;
1632   p->unbind = buffers == NULL;
1633   p->writable_bitmask = writable_bitmask;
1634
1635   if (buffers) {
1636      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1637
1638      for (unsigned i = 0; i < count; i++) {
1639         struct pipe_shader_buffer *dst = &p->slot[i];
1640         const struct pipe_shader_buffer *src = buffers + i;
1641
1642         tc_set_resource_reference(&dst->buffer, src->buffer);
1643         dst->buffer_offset = src->buffer_offset;
1644         dst->buffer_size = src->buffer_size;
1645
1646         if (src->buffer) {
1647            struct threaded_resource *tres = threaded_resource(src->buffer);
1648
1649            tc_bind_buffer(tc, &tc->shader_buffers[shader][start + i], next, &tres->b);
1650
1651            if (writable_bitmask & BITFIELD_BIT(i)) {
1652               tc_buffer_disable_cpu_storage(src->buffer);
1653               util_range_add(&tres->b, &tres->valid_buffer_range,
1654                              src->buffer_offset,
1655                              src->buffer_offset + src->buffer_size);
1656            }
1657         } else {
1658            tc_unbind_buffer(&tc->shader_buffers[shader][start + i]);
1659         }
1660      }
1661      tc->seen_shader_buffers[shader] = true;
1662   } else {
1663      tc_unbind_buffers(&tc->shader_buffers[shader][start], count);
1664   }
1665
1666   tc->shader_buffers_writeable_mask[shader] &= ~BITFIELD_RANGE(start, count);
1667   tc->shader_buffers_writeable_mask[shader] |= writable_bitmask << start;
1668}
1669
1670struct tc_vertex_buffers {
1671   struct tc_call_base base;
1672   ubyte start, count;
1673   ubyte unbind_num_trailing_slots;
1674   struct pipe_vertex_buffer slot[0]; /* more will be allocated if needed */
1675};
1676
1677static uint16_t
1678tc_call_set_vertex_buffers(struct pipe_context *pipe, void *call, uint64_t *last)
1679{
1680   struct tc_vertex_buffers *p = (struct tc_vertex_buffers *)call;
1681   unsigned count = p->count;
1682
1683   if (!count) {
1684      pipe->set_vertex_buffers(pipe, p->start, 0,
1685                               p->unbind_num_trailing_slots, false, NULL);
1686      return call_size(tc_vertex_buffers);
1687   }
1688
1689   for (unsigned i = 0; i < count; i++)
1690      tc_assert(!p->slot[i].is_user_buffer);
1691
1692   pipe->set_vertex_buffers(pipe, p->start, count,
1693                            p->unbind_num_trailing_slots, true, p->slot);
1694   return p->base.num_slots;
1695}
1696
1697static void
1698tc_set_vertex_buffers(struct pipe_context *_pipe,
1699                      unsigned start, unsigned count,
1700                      unsigned unbind_num_trailing_slots,
1701                      bool take_ownership,
1702                      const struct pipe_vertex_buffer *buffers)
1703{
1704   struct threaded_context *tc = threaded_context(_pipe);
1705
1706   if (!count && !unbind_num_trailing_slots)
1707      return;
1708
1709   if (count && buffers) {
1710      struct tc_vertex_buffers *p =
1711         tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, count);
1712      p->start = start;
1713      p->count = count;
1714      p->unbind_num_trailing_slots = unbind_num_trailing_slots;
1715
1716      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1717
1718      if (take_ownership) {
1719         memcpy(p->slot, buffers, count * sizeof(struct pipe_vertex_buffer));
1720
1721         for (unsigned i = 0; i < count; i++) {
1722            struct pipe_resource *buf = buffers[i].buffer.resource;
1723
1724            if (buf) {
1725               tc_bind_buffer(tc, &tc->vertex_buffers[start + i], next, buf);
1726            } else {
1727               tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1728            }
1729         }
1730      } else {
1731         for (unsigned i = 0; i < count; i++) {
1732            struct pipe_vertex_buffer *dst = &p->slot[i];
1733            const struct pipe_vertex_buffer *src = buffers + i;
1734            struct pipe_resource *buf = src->buffer.resource;
1735
1736            tc_assert(!src->is_user_buffer);
1737            dst->stride = src->stride;
1738            dst->is_user_buffer = false;
1739            tc_set_resource_reference(&dst->buffer.resource, buf);
1740            dst->buffer_offset = src->buffer_offset;
1741
1742            if (buf) {
1743               tc_bind_buffer(tc, &tc->vertex_buffers[start + i], next, buf);
1744            } else {
1745               tc_unbind_buffer(&tc->vertex_buffers[start + i]);
1746            }
1747         }
1748      }
1749
1750      tc_unbind_buffers(&tc->vertex_buffers[start + count],
1751                        unbind_num_trailing_slots);
1752   } else {
1753      struct tc_vertex_buffers *p =
1754         tc_add_slot_based_call(tc, TC_CALL_set_vertex_buffers, tc_vertex_buffers, 0);
1755      p->start = start;
1756      p->count = 0;
1757      p->unbind_num_trailing_slots = count + unbind_num_trailing_slots;
1758
1759      tc_unbind_buffers(&tc->vertex_buffers[start],
1760                        count + unbind_num_trailing_slots);
1761   }
1762}
1763
1764struct tc_stream_outputs {
1765   struct tc_call_base base;
1766   unsigned count;
1767   struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS];
1768   unsigned offsets[PIPE_MAX_SO_BUFFERS];
1769};
1770
1771static uint16_t
1772tc_call_set_stream_output_targets(struct pipe_context *pipe, void *call, uint64_t *last)
1773{
1774   struct tc_stream_outputs *p = to_call(call, tc_stream_outputs);
1775   unsigned count = p->count;
1776
1777   pipe->set_stream_output_targets(pipe, count, p->targets, p->offsets);
1778   for (unsigned i = 0; i < count; i++)
1779      tc_drop_so_target_reference(p->targets[i]);
1780
1781   return call_size(tc_stream_outputs);
1782}
1783
1784static void
1785tc_set_stream_output_targets(struct pipe_context *_pipe,
1786                             unsigned count,
1787                             struct pipe_stream_output_target **tgs,
1788                             const unsigned *offsets)
1789{
1790   struct threaded_context *tc = threaded_context(_pipe);
1791   struct tc_stream_outputs *p =
1792      tc_add_call(tc, TC_CALL_set_stream_output_targets, tc_stream_outputs);
1793   struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
1794
1795   for (unsigned i = 0; i < count; i++) {
1796      p->targets[i] = NULL;
1797      pipe_so_target_reference(&p->targets[i], tgs[i]);
1798      if (tgs[i]) {
1799         tc_buffer_disable_cpu_storage(tgs[i]->buffer);
1800         tc_bind_buffer(tc, &tc->streamout_buffers[i], next, tgs[i]->buffer);
1801      } else {
1802         tc_unbind_buffer(&tc->streamout_buffers[i]);
1803      }
1804   }
1805   p->count = count;
1806   memcpy(p->offsets, offsets, count * sizeof(unsigned));
1807
1808   tc_unbind_buffers(&tc->streamout_buffers[count], PIPE_MAX_SO_BUFFERS - count);
1809   if (count)
1810      tc->seen_streamout_buffers = true;
1811}
1812
1813static void
1814tc_set_compute_resources(struct pipe_context *_pipe, unsigned start,
1815                         unsigned count, struct pipe_surface **resources)
1816{
1817   struct threaded_context *tc = threaded_context(_pipe);
1818   struct pipe_context *pipe = tc->pipe;
1819
1820   tc_sync(tc);
1821   pipe->set_compute_resources(pipe, start, count, resources);
1822}
1823
1824static void
1825tc_set_global_binding(struct pipe_context *_pipe, unsigned first,
1826                      unsigned count, struct pipe_resource **resources,
1827                      uint32_t **handles)
1828{
1829   struct threaded_context *tc = threaded_context(_pipe);
1830   struct pipe_context *pipe = tc->pipe;
1831
1832   tc_sync(tc);
1833   pipe->set_global_binding(pipe, first, count, resources, handles);
1834}
1835
1836
1837/********************************************************************
1838 * views
1839 */
1840
1841static struct pipe_surface *
1842tc_create_surface(struct pipe_context *_pipe,
1843                  struct pipe_resource *resource,
1844                  const struct pipe_surface *surf_tmpl)
1845{
1846   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1847   struct pipe_surface *view =
1848         pipe->create_surface(pipe, resource, surf_tmpl);
1849
1850   if (view)
1851      view->context = _pipe;
1852   return view;
1853}
1854
1855static void
1856tc_surface_destroy(struct pipe_context *_pipe,
1857                   struct pipe_surface *surf)
1858{
1859   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1860
1861   pipe->surface_destroy(pipe, surf);
1862}
1863
1864static struct pipe_sampler_view *
1865tc_create_sampler_view(struct pipe_context *_pipe,
1866                       struct pipe_resource *resource,
1867                       const struct pipe_sampler_view *templ)
1868{
1869   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1870   struct pipe_sampler_view *view =
1871         pipe->create_sampler_view(pipe, resource, templ);
1872
1873   if (view)
1874      view->context = _pipe;
1875   return view;
1876}
1877
1878static void
1879tc_sampler_view_destroy(struct pipe_context *_pipe,
1880                        struct pipe_sampler_view *view)
1881{
1882   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1883
1884   pipe->sampler_view_destroy(pipe, view);
1885}
1886
1887static struct pipe_stream_output_target *
1888tc_create_stream_output_target(struct pipe_context *_pipe,
1889                               struct pipe_resource *res,
1890                               unsigned buffer_offset,
1891                               unsigned buffer_size)
1892{
1893   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1894   struct threaded_resource *tres = threaded_resource(res);
1895   struct pipe_stream_output_target *view;
1896
1897   util_range_add(&tres->b, &tres->valid_buffer_range, buffer_offset,
1898                  buffer_offset + buffer_size);
1899
1900   view = pipe->create_stream_output_target(pipe, res, buffer_offset,
1901                                            buffer_size);
1902   if (view)
1903      view->context = _pipe;
1904   return view;
1905}
1906
1907static void
1908tc_stream_output_target_destroy(struct pipe_context *_pipe,
1909                                struct pipe_stream_output_target *target)
1910{
1911   struct pipe_context *pipe = threaded_context(_pipe)->pipe;
1912
1913   pipe->stream_output_target_destroy(pipe, target);
1914}
1915
1916
1917/********************************************************************
1918 * bindless
1919 */
1920
1921static uint64_t
1922tc_create_texture_handle(struct pipe_context *_pipe,
1923                         struct pipe_sampler_view *view,
1924                         const struct pipe_sampler_state *state)
1925{
1926   struct threaded_context *tc = threaded_context(_pipe);
1927   struct pipe_context *pipe = tc->pipe;
1928
1929   tc_sync(tc);
1930   return pipe->create_texture_handle(pipe, view, state);
1931}
1932
1933struct tc_make_texture_handle_resident {
1934   struct tc_call_base base;
1935   bool resident;
1936   uint64_t handle;
1937};
1938
1939static uint16_t
1940tc_call_make_texture_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1941{
1942   struct tc_make_texture_handle_resident *p =
1943      to_call(call, tc_make_texture_handle_resident);
1944
1945   pipe->make_texture_handle_resident(pipe, p->handle, p->resident);
1946   return call_size(tc_make_texture_handle_resident);
1947}
1948
1949static void
1950tc_make_texture_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1951                                bool resident)
1952{
1953   struct threaded_context *tc = threaded_context(_pipe);
1954   struct tc_make_texture_handle_resident *p =
1955      tc_add_call(tc, TC_CALL_make_texture_handle_resident,
1956                  tc_make_texture_handle_resident);
1957
1958   p->handle = handle;
1959   p->resident = resident;
1960}
1961
1962static uint64_t
1963tc_create_image_handle(struct pipe_context *_pipe,
1964                       const struct pipe_image_view *image)
1965{
1966   struct threaded_context *tc = threaded_context(_pipe);
1967   struct pipe_context *pipe = tc->pipe;
1968
1969   if (image->resource->target == PIPE_BUFFER)
1970      tc_buffer_disable_cpu_storage(image->resource);
1971
1972   tc_sync(tc);
1973   return pipe->create_image_handle(pipe, image);
1974}
1975
1976struct tc_make_image_handle_resident {
1977   struct tc_call_base base;
1978   bool resident;
1979   unsigned access;
1980   uint64_t handle;
1981};
1982
1983static uint16_t
1984tc_call_make_image_handle_resident(struct pipe_context *pipe, void *call, uint64_t *last)
1985{
1986   struct tc_make_image_handle_resident *p =
1987      to_call(call, tc_make_image_handle_resident);
1988
1989   pipe->make_image_handle_resident(pipe, p->handle, p->access, p->resident);
1990   return call_size(tc_make_image_handle_resident);
1991}
1992
1993static void
1994tc_make_image_handle_resident(struct pipe_context *_pipe, uint64_t handle,
1995                              unsigned access, bool resident)
1996{
1997   struct threaded_context *tc = threaded_context(_pipe);
1998   struct tc_make_image_handle_resident *p =
1999      tc_add_call(tc, TC_CALL_make_image_handle_resident,
2000                  tc_make_image_handle_resident);
2001
2002   p->handle = handle;
2003   p->access = access;
2004   p->resident = resident;
2005}
2006
2007
2008/********************************************************************
2009 * transfer
2010 */
2011
2012struct tc_replace_buffer_storage {
2013   struct tc_call_base base;
2014   uint16_t num_rebinds;
2015   uint32_t rebind_mask;
2016   uint32_t delete_buffer_id;
2017   struct pipe_resource *dst;
2018   struct pipe_resource *src;
2019   tc_replace_buffer_storage_func func;
2020};
2021
2022static uint16_t
2023tc_call_replace_buffer_storage(struct pipe_context *pipe, void *call, uint64_t *last)
2024{
2025   struct tc_replace_buffer_storage *p = to_call(call, tc_replace_buffer_storage);
2026
2027   p->func(pipe, p->dst, p->src, p->num_rebinds, p->rebind_mask, p->delete_buffer_id);
2028
2029   tc_drop_resource_reference(p->dst);
2030   tc_drop_resource_reference(p->src);
2031   return call_size(tc_replace_buffer_storage);
2032}
2033
2034/* Return true if the buffer has been invalidated or is idle.
2035 * Note that callers must've called tc_touch_buffer before calling
2036 * this function. */
2037static bool
2038tc_invalidate_buffer(struct threaded_context *tc,
2039                     struct threaded_resource *tbuf)
2040{
2041   if (!tc_is_buffer_busy(tc, tbuf, PIPE_MAP_READ_WRITE)) {
2042      /* It's idle, so invalidation would be a no-op, but we can still clear
2043       * the valid range because we are technically doing invalidation, but
2044       * skipping it because it's useless.
2045       *
2046       * If the buffer is bound for write, we can't invalidate the range.
2047       */
2048      if (!tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique))
2049         util_range_set_empty(&tbuf->valid_buffer_range);
2050      return true;
2051   }
2052
2053   struct pipe_screen *screen = tc->base.screen;
2054   struct pipe_resource *new_buf;
2055
2056   /* Shared, pinned, and sparse buffers can't be reallocated. */
2057   if (tc_is_buffer_shared(tbuf) ||
2058       tbuf->is_user_ptr ||
2059       tbuf->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE))
2060      return false;
2061
2062   /* Allocate a new one. */
2063   new_buf = screen->resource_create(screen, &tbuf->b);
2064   if (!new_buf)
2065      return false;
2066
2067   /* Replace the "latest" pointer. */
2068   if (tbuf->latest != &tbuf->b)
2069      pipe_resource_reference(&tbuf->latest, NULL);
2070
2071   tbuf->latest = new_buf;
2072
2073   uint32_t delete_buffer_id = tbuf->buffer_id_unique;
2074
2075   /* Enqueue storage replacement of the original buffer. */
2076   struct tc_replace_buffer_storage *p =
2077      tc_add_call(tc, TC_CALL_replace_buffer_storage,
2078                  tc_replace_buffer_storage);
2079
2080   p->func = tc->replace_buffer_storage;
2081   tc_set_resource_reference(&p->dst, &tbuf->b);
2082   tc_set_resource_reference(&p->src, new_buf);
2083   p->delete_buffer_id = delete_buffer_id;
2084   p->rebind_mask = 0;
2085
2086   /* Treat the current buffer as the new buffer. */
2087   bool bound_for_write = tc_is_buffer_bound_for_write(tc, tbuf->buffer_id_unique);
2088   p->num_rebinds = tc_rebind_buffer(tc, tbuf->buffer_id_unique,
2089                                     threaded_resource(new_buf)->buffer_id_unique,
2090                                     &p->rebind_mask);
2091
2092   /* If the buffer is not bound for write, clear the valid range. */
2093   if (!bound_for_write)
2094      util_range_set_empty(&tbuf->valid_buffer_range);
2095
2096   tbuf->buffer_id_unique = threaded_resource(new_buf)->buffer_id_unique;
2097   threaded_resource(new_buf)->buffer_id_unique = 0;
2098
2099   return true;
2100}
2101
2102/* Note that callers must've called tc_touch_buffer first before
2103 * calling tc_improve_map_buffer_flags. */
2104static unsigned
2105tc_improve_map_buffer_flags(struct threaded_context *tc,
2106                            struct threaded_resource *tres, unsigned usage,
2107                            unsigned offset, unsigned size)
2108{
2109   /* Never invalidate inside the driver and never infer "unsynchronized". */
2110   unsigned tc_flags = TC_TRANSFER_MAP_NO_INVALIDATE |
2111                       TC_TRANSFER_MAP_NO_INFER_UNSYNCHRONIZED;
2112
2113   /* Prevent a reentry. */
2114   if (usage & tc_flags)
2115      return usage;
2116
2117   /* Use the staging upload if it's preferred. */
2118   if (usage & (PIPE_MAP_DISCARD_RANGE |
2119                PIPE_MAP_DISCARD_WHOLE_RESOURCE) &&
2120       !(usage & PIPE_MAP_PERSISTENT) &&
2121       tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY &&
2122       tc->use_forced_staging_uploads) {
2123      usage &= ~(PIPE_MAP_DISCARD_WHOLE_RESOURCE |
2124                 PIPE_MAP_UNSYNCHRONIZED);
2125
2126      return usage | tc_flags | PIPE_MAP_DISCARD_RANGE;
2127   }
2128
2129   /* Sparse buffers can't be mapped directly and can't be reallocated
2130    * (fully invalidated). That may just be a radeonsi limitation, but
2131    * the threaded context must obey it with radeonsi.
2132    */
2133   if (tres->b.flags & (PIPE_RESOURCE_FLAG_SPARSE | PIPE_RESOURCE_FLAG_UNMAPPABLE)) {
2134      /* We can use DISCARD_RANGE instead of full discard. This is the only
2135       * fast path for sparse buffers that doesn't need thread synchronization.
2136       */
2137      if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE)
2138         usage |= PIPE_MAP_DISCARD_RANGE;
2139
2140      /* Allow DISCARD_WHOLE_RESOURCE and infering UNSYNCHRONIZED in drivers.
2141       * The threaded context doesn't do unsychronized mappings and invalida-
2142       * tions of sparse buffers, therefore a correct driver behavior won't
2143       * result in an incorrect behavior with the threaded context.
2144       */
2145      return usage;
2146   }
2147
2148   usage |= tc_flags;
2149
2150   /* Handle CPU reads trivially. */
2151   if (usage & PIPE_MAP_READ) {
2152      if (usage & PIPE_MAP_UNSYNCHRONIZED)
2153         usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* don't sync */
2154
2155      /* Drivers aren't allowed to do buffer invalidations. */
2156      return usage & ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2157   }
2158
2159   /* See if the buffer range being mapped has never been initialized or
2160    * the buffer is idle, in which case it can be mapped unsynchronized. */
2161   if (!(usage & PIPE_MAP_UNSYNCHRONIZED) &&
2162       ((!tres->is_shared &&
2163         !util_ranges_intersect(&tres->valid_buffer_range, offset, offset + size)) ||
2164        !tc_is_buffer_busy(tc, tres, usage)))
2165      usage |= PIPE_MAP_UNSYNCHRONIZED;
2166
2167   if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {
2168      /* If discarding the entire range, discard the whole resource instead. */
2169      if (usage & PIPE_MAP_DISCARD_RANGE &&
2170          offset == 0 && size == tres->b.width0)
2171         usage |= PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2172
2173      /* Discard the whole resource if needed. */
2174      if (usage & PIPE_MAP_DISCARD_WHOLE_RESOURCE) {
2175         if (tc_invalidate_buffer(tc, tres))
2176            usage |= PIPE_MAP_UNSYNCHRONIZED;
2177         else
2178            usage |= PIPE_MAP_DISCARD_RANGE; /* fallback */
2179      }
2180   }
2181
2182   /* We won't need this flag anymore. */
2183   /* TODO: We might not need TC_TRANSFER_MAP_NO_INVALIDATE with this. */
2184   usage &= ~PIPE_MAP_DISCARD_WHOLE_RESOURCE;
2185
2186   /* GL_AMD_pinned_memory and persistent mappings can't use staging
2187    * buffers. */
2188   if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2189                PIPE_MAP_PERSISTENT) ||
2190       tres->is_user_ptr)
2191      usage &= ~PIPE_MAP_DISCARD_RANGE;
2192
2193   /* Unsychronized buffer mappings don't have to synchronize the thread. */
2194   if (usage & PIPE_MAP_UNSYNCHRONIZED) {
2195      usage &= ~PIPE_MAP_DISCARD_RANGE;
2196      usage |= TC_TRANSFER_MAP_THREADED_UNSYNC; /* notify the driver */
2197   }
2198
2199   return usage;
2200}
2201
2202static void *
2203tc_buffer_map(struct pipe_context *_pipe,
2204              struct pipe_resource *resource, unsigned level,
2205              unsigned usage, const struct pipe_box *box,
2206              struct pipe_transfer **transfer)
2207{
2208   struct threaded_context *tc = threaded_context(_pipe);
2209   struct threaded_resource *tres = threaded_resource(resource);
2210   struct pipe_context *pipe = tc->pipe;
2211
2212   /* PIPE_MAP_THREAD_SAFE is for glthread, which shouldn't use the CPU storage and
2213    * this shouldn't normally be necessary because glthread only uses large buffers.
2214    */
2215   if (usage & PIPE_MAP_THREAD_SAFE)
2216      tc_buffer_disable_cpu_storage(resource);
2217
2218   tc_touch_buffer(tc, tres);
2219
2220   /* CPU storage relies on buffer invalidation never failing. With shared buffers,
2221    * invalidation might not always be possible, so CPU storage can't be used.
2222    */
2223   if (tc_is_buffer_shared(tres))
2224      tc_buffer_disable_cpu_storage(resource);
2225
2226   usage = tc_improve_map_buffer_flags(tc, tres, usage, box->x, box->width);
2227
2228   /* If the CPU storage is enabled, return it directly. */
2229   if (tres->allow_cpu_storage && !(usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2230      /* We can't let resource_copy_region disable the CPU storage. */
2231      assert(!(tres->b.flags & PIPE_RESOURCE_FLAG_DONT_MAP_DIRECTLY));
2232
2233      if (!tres->cpu_storage) {
2234         tres->cpu_storage = align_malloc(resource->width0, tc->map_buffer_alignment);
2235
2236         if (tres->cpu_storage && tres->valid_buffer_range.end) {
2237            /* The GPU buffer contains valid data. Copy them to the CPU storage. */
2238            struct pipe_box box2;
2239            struct pipe_transfer *transfer2;
2240
2241            unsigned valid_range_len = tres->valid_buffer_range.end - tres->valid_buffer_range.start;
2242            u_box_1d(tres->valid_buffer_range.start, valid_range_len, &box2);
2243
2244            tc_sync_msg(tc, "cpu storage GPU -> CPU copy");
2245            tc_set_driver_thread(tc);
2246
2247            void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2248                                         0, PIPE_MAP_READ, &box2, &transfer2);
2249            memcpy(&((uint8_t*)tres->cpu_storage)[tres->valid_buffer_range.start],
2250                   ret,
2251                   valid_range_len);
2252            pipe->buffer_unmap(pipe, transfer2);
2253
2254            tc_clear_driver_thread(tc);
2255         }
2256      }
2257
2258      if (tres->cpu_storage) {
2259         struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2260         ttrans->b.resource = resource;
2261         ttrans->b.usage = usage;
2262         ttrans->b.box = *box;
2263         ttrans->valid_buffer_range = &tres->valid_buffer_range;
2264         ttrans->cpu_storage_mapped = true;
2265         *transfer = &ttrans->b;
2266
2267         return (uint8_t*)tres->cpu_storage + box->x;
2268      } else {
2269         tres->allow_cpu_storage = false;
2270      }
2271   }
2272
2273   /* Do a staging transfer within the threaded context. The driver should
2274    * only get resource_copy_region.
2275    */
2276   if (usage & PIPE_MAP_DISCARD_RANGE) {
2277      struct threaded_transfer *ttrans = slab_zalloc(&tc->pool_transfers);
2278      uint8_t *map;
2279
2280      u_upload_alloc(tc->base.stream_uploader, 0,
2281                     box->width + (box->x % tc->map_buffer_alignment),
2282                     tc->map_buffer_alignment, &ttrans->b.offset,
2283                     &ttrans->staging, (void**)&map);
2284      if (!map) {
2285         slab_free(&tc->pool_transfers, ttrans);
2286         return NULL;
2287      }
2288
2289      ttrans->b.resource = resource;
2290      ttrans->b.level = 0;
2291      ttrans->b.usage = usage;
2292      ttrans->b.box = *box;
2293      ttrans->b.stride = 0;
2294      ttrans->b.layer_stride = 0;
2295      ttrans->valid_buffer_range = &tres->valid_buffer_range;
2296      ttrans->cpu_storage_mapped = false;
2297      *transfer = &ttrans->b;
2298
2299      p_atomic_inc(&tres->pending_staging_uploads);
2300      util_range_add(resource, &tres->pending_staging_uploads_range,
2301                     box->x, box->x + box->width);
2302
2303      return map + (box->x % tc->map_buffer_alignment);
2304   }
2305
2306   if (usage & PIPE_MAP_UNSYNCHRONIZED &&
2307       p_atomic_read(&tres->pending_staging_uploads) &&
2308       util_ranges_intersect(&tres->pending_staging_uploads_range, box->x, box->x + box->width)) {
2309      /* Write conflict detected between a staging transfer and the direct mapping we're
2310       * going to do. Resolve the conflict by ignoring UNSYNCHRONIZED so the direct mapping
2311       * will have to wait for the staging transfer completion.
2312       * Note: The conflict detection is only based on the mapped range, not on the actual
2313       * written range(s).
2314       */
2315      usage &= ~PIPE_MAP_UNSYNCHRONIZED & ~TC_TRANSFER_MAP_THREADED_UNSYNC;
2316      tc->use_forced_staging_uploads = false;
2317   }
2318
2319   /* Unsychronized buffer mappings don't have to synchronize the thread. */
2320   if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC)) {
2321      tc_sync_msg(tc, usage & PIPE_MAP_DISCARD_RANGE ? "  discard_range" :
2322                      usage & PIPE_MAP_READ ? "  read" : "  staging conflict");
2323      tc_set_driver_thread(tc);
2324   }
2325
2326   tc->bytes_mapped_estimate += box->width;
2327
2328   void *ret = pipe->buffer_map(pipe, tres->latest ? tres->latest : resource,
2329                                level, usage, box, transfer);
2330   threaded_transfer(*transfer)->valid_buffer_range = &tres->valid_buffer_range;
2331   threaded_transfer(*transfer)->cpu_storage_mapped = false;
2332
2333   if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2334      tc_clear_driver_thread(tc);
2335
2336   return ret;
2337}
2338
2339static void *
2340tc_texture_map(struct pipe_context *_pipe,
2341               struct pipe_resource *resource, unsigned level,
2342               unsigned usage, const struct pipe_box *box,
2343               struct pipe_transfer **transfer)
2344{
2345   struct threaded_context *tc = threaded_context(_pipe);
2346   struct threaded_resource *tres = threaded_resource(resource);
2347   struct pipe_context *pipe = tc->pipe;
2348
2349   tc_sync_msg(tc, "texture");
2350   tc_set_driver_thread(tc);
2351
2352   tc->bytes_mapped_estimate += box->width;
2353
2354   void *ret = pipe->texture_map(pipe, tres->latest ? tres->latest : resource,
2355                                 level, usage, box, transfer);
2356
2357   if (!(usage & TC_TRANSFER_MAP_THREADED_UNSYNC))
2358      tc_clear_driver_thread(tc);
2359
2360   return ret;
2361}
2362
2363struct tc_transfer_flush_region {
2364   struct tc_call_base base;
2365   struct pipe_box box;
2366   struct pipe_transfer *transfer;
2367};
2368
2369static uint16_t
2370tc_call_transfer_flush_region(struct pipe_context *pipe, void *call, uint64_t *last)
2371{
2372   struct tc_transfer_flush_region *p = to_call(call, tc_transfer_flush_region);
2373
2374   pipe->transfer_flush_region(pipe, p->transfer, &p->box);
2375   return call_size(tc_transfer_flush_region);
2376}
2377
2378struct tc_resource_copy_region {
2379   struct tc_call_base base;
2380   unsigned dst_level;
2381   unsigned dstx, dsty, dstz;
2382   unsigned src_level;
2383   struct pipe_box src_box;
2384   struct pipe_resource *dst;
2385   struct pipe_resource *src;
2386};
2387
2388static void
2389tc_resource_copy_region(struct pipe_context *_pipe,
2390                        struct pipe_resource *dst, unsigned dst_level,
2391                        unsigned dstx, unsigned dsty, unsigned dstz,
2392                        struct pipe_resource *src, unsigned src_level,
2393                        const struct pipe_box *src_box);
2394
2395static void
2396tc_buffer_do_flush_region(struct threaded_context *tc,
2397                          struct threaded_transfer *ttrans,
2398                          const struct pipe_box *box)
2399{
2400   struct threaded_resource *tres = threaded_resource(ttrans->b.resource);
2401
2402   if (ttrans->staging) {
2403      struct pipe_box src_box;
2404
2405      u_box_1d(ttrans->b.offset + ttrans->b.box.x % tc->map_buffer_alignment +
2406               (box->x - ttrans->b.box.x),
2407               box->width, &src_box);
2408
2409      /* Copy the staging buffer into the original one. */
2410      tc_resource_copy_region(&tc->base, ttrans->b.resource, 0, box->x, 0, 0,
2411                              ttrans->staging, 0, &src_box);
2412   }
2413
2414   /* Don't update the valid range when we're uploading the CPU storage
2415    * because it includes the uninitialized range too.
2416    */
2417   if (!(ttrans->b.usage & TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE)) {
2418      util_range_add(&tres->b, ttrans->valid_buffer_range,
2419                     box->x, box->x + box->width);
2420   }
2421}
2422
2423static void
2424tc_transfer_flush_region(struct pipe_context *_pipe,
2425                         struct pipe_transfer *transfer,
2426                         const struct pipe_box *rel_box)
2427{
2428   struct threaded_context *tc = threaded_context(_pipe);
2429   struct threaded_transfer *ttrans = threaded_transfer(transfer);
2430   struct threaded_resource *tres = threaded_resource(transfer->resource);
2431   unsigned required_usage = PIPE_MAP_WRITE |
2432                             PIPE_MAP_FLUSH_EXPLICIT;
2433
2434   if (tres->b.target == PIPE_BUFFER) {
2435      if ((transfer->usage & required_usage) == required_usage) {
2436         struct pipe_box box;
2437
2438         u_box_1d(transfer->box.x + rel_box->x, rel_box->width, &box);
2439         tc_buffer_do_flush_region(tc, ttrans, &box);
2440      }
2441
2442      /* Staging transfers don't send the call to the driver.
2443       *
2444       * Transfers using the CPU storage shouldn't call transfer_flush_region
2445       * in the driver because the buffer is not really mapped on the driver
2446       * side and the CPU storage always re-uploads everything (flush_region
2447       * makes no difference).
2448       */
2449      if (ttrans->staging || ttrans->cpu_storage_mapped)
2450         return;
2451   }
2452
2453   struct tc_transfer_flush_region *p =
2454      tc_add_call(tc, TC_CALL_transfer_flush_region, tc_transfer_flush_region);
2455   p->transfer = transfer;
2456   p->box = *rel_box;
2457}
2458
2459static void
2460tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
2461         unsigned flags);
2462
2463struct tc_buffer_unmap {
2464   struct tc_call_base base;
2465   bool was_staging_transfer;
2466   union {
2467      struct pipe_transfer *transfer;
2468      struct pipe_resource *resource;
2469   };
2470};
2471
2472static uint16_t
2473tc_call_buffer_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2474{
2475   struct tc_buffer_unmap *p = to_call(call, tc_buffer_unmap);
2476
2477   if (p->was_staging_transfer) {
2478      struct threaded_resource *tres = threaded_resource(p->resource);
2479      /* Nothing to do except keeping track of staging uploads */
2480      assert(tres->pending_staging_uploads > 0);
2481      p_atomic_dec(&tres->pending_staging_uploads);
2482      tc_drop_resource_reference(p->resource);
2483   } else {
2484      pipe->buffer_unmap(pipe, p->transfer);
2485   }
2486
2487   return call_size(tc_buffer_unmap);
2488}
2489
2490static void
2491tc_buffer_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2492{
2493   struct threaded_context *tc = threaded_context(_pipe);
2494   struct threaded_transfer *ttrans = threaded_transfer(transfer);
2495   struct threaded_resource *tres = threaded_resource(transfer->resource);
2496
2497   /* PIPE_MAP_THREAD_SAFE is only valid with UNSYNCHRONIZED. It can be
2498    * called from any thread and bypasses all multithreaded queues.
2499    */
2500   if (transfer->usage & PIPE_MAP_THREAD_SAFE) {
2501      assert(transfer->usage & PIPE_MAP_UNSYNCHRONIZED);
2502      assert(!(transfer->usage & (PIPE_MAP_FLUSH_EXPLICIT |
2503                                  PIPE_MAP_DISCARD_RANGE)));
2504
2505      struct pipe_context *pipe = tc->pipe;
2506      util_range_add(&tres->b, ttrans->valid_buffer_range,
2507                      transfer->box.x, transfer->box.x + transfer->box.width);
2508
2509      pipe->buffer_unmap(pipe, transfer);
2510      return;
2511   }
2512
2513   if (transfer->usage & PIPE_MAP_WRITE &&
2514       !(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT))
2515      tc_buffer_do_flush_region(tc, ttrans, &transfer->box);
2516
2517   if (ttrans->cpu_storage_mapped) {
2518      /* GL allows simultaneous GPU stores with mapped buffers as long as GPU stores don't
2519       * touch the mapped range. That's a problem because GPU stores free the CPU storage.
2520       * If that happens, we just ignore the unmap call and don't upload anything to prevent
2521       * a crash.
2522       *
2523       * Disallow the CPU storage in the driver to work around this.
2524       */
2525      assert(tres->cpu_storage);
2526
2527      if (tres->cpu_storage) {
2528         /* Invalidations shouldn't fail as long as CPU storage is allowed. */
2529         ASSERTED bool invalidated = tc_invalidate_buffer(tc, tres);
2530         assert(invalidated);
2531
2532         tc_buffer_subdata(&tc->base, &tres->b,
2533                           PIPE_MAP_UNSYNCHRONIZED |
2534                           TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE,
2535                           0, tres->b.width0, tres->cpu_storage);
2536         /* This shouldn't have been freed by buffer_subdata. */
2537         assert(tres->cpu_storage);
2538      } else {
2539         static bool warned_once = false;
2540         if (!warned_once) {
2541            fprintf(stderr, "This application is incompatible with cpu_storage.\n");
2542            fprintf(stderr, "Use tc_max_cpu_storage_size=0 to disable it and report this issue to Mesa.\n");
2543            warned_once = true;
2544         }
2545      }
2546
2547      tc_drop_resource_reference(ttrans->staging);
2548      slab_free(&tc->pool_transfers, ttrans);
2549      return;
2550   }
2551
2552   bool was_staging_transfer = false;
2553
2554   if (ttrans->staging) {
2555      was_staging_transfer = true;
2556
2557      tc_drop_resource_reference(ttrans->staging);
2558      slab_free(&tc->pool_transfers, ttrans);
2559   }
2560
2561   struct tc_buffer_unmap *p = tc_add_call(tc, TC_CALL_buffer_unmap,
2562                                           tc_buffer_unmap);
2563   if (was_staging_transfer) {
2564      tc_set_resource_reference(&p->resource, &tres->b);
2565      p->was_staging_transfer = true;
2566   } else {
2567      p->transfer = transfer;
2568      p->was_staging_transfer = false;
2569   }
2570
2571   /* tc_buffer_map directly maps the buffers, but tc_buffer_unmap
2572    * defers the unmap operation to the batch execution.
2573    * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2574    * and if it goes over an optional limit the current batch is flushed,
2575    * to reclaim some RAM. */
2576   if (!ttrans->staging && tc->bytes_mapped_limit &&
2577       tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2578      tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2579   }
2580}
2581
2582struct tc_texture_unmap {
2583   struct tc_call_base base;
2584   struct pipe_transfer *transfer;
2585};
2586
2587static uint16_t
2588tc_call_texture_unmap(struct pipe_context *pipe, void *call, uint64_t *last)
2589{
2590   struct tc_texture_unmap *p = (struct tc_texture_unmap *) call;
2591
2592   pipe->texture_unmap(pipe, p->transfer);
2593   return call_size(tc_texture_unmap);
2594}
2595
2596static void
2597tc_texture_unmap(struct pipe_context *_pipe, struct pipe_transfer *transfer)
2598{
2599   struct threaded_context *tc = threaded_context(_pipe);
2600   struct threaded_transfer *ttrans = threaded_transfer(transfer);
2601
2602   tc_add_call(tc, TC_CALL_texture_unmap, tc_texture_unmap)->transfer = transfer;
2603
2604   /* tc_texture_map directly maps the textures, but tc_texture_unmap
2605    * defers the unmap operation to the batch execution.
2606    * bytes_mapped_estimate is an estimation of the map/unmap bytes delta
2607    * and if it goes over an optional limit the current batch is flushed,
2608    * to reclaim some RAM. */
2609   if (!ttrans->staging && tc->bytes_mapped_limit &&
2610       tc->bytes_mapped_estimate > tc->bytes_mapped_limit) {
2611      tc_flush(_pipe, NULL, PIPE_FLUSH_ASYNC);
2612   }
2613}
2614
2615struct tc_buffer_subdata {
2616   struct tc_call_base base;
2617   unsigned usage, offset, size;
2618   struct pipe_resource *resource;
2619   char slot[0]; /* more will be allocated if needed */
2620};
2621
2622static uint16_t
2623tc_call_buffer_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2624{
2625   struct tc_buffer_subdata *p = (struct tc_buffer_subdata *)call;
2626
2627   pipe->buffer_subdata(pipe, p->resource, p->usage, p->offset, p->size,
2628                        p->slot);
2629   tc_drop_resource_reference(p->resource);
2630   return p->base.num_slots;
2631}
2632
2633static void
2634tc_buffer_subdata(struct pipe_context *_pipe,
2635                  struct pipe_resource *resource,
2636                  unsigned usage, unsigned offset,
2637                  unsigned size, const void *data)
2638{
2639   struct threaded_context *tc = threaded_context(_pipe);
2640   struct threaded_resource *tres = threaded_resource(resource);
2641
2642   if (!size)
2643      return;
2644
2645   tc_touch_buffer(tc, tres);
2646
2647   usage |= PIPE_MAP_WRITE;
2648
2649   /* PIPE_MAP_DIRECTLY supresses implicit DISCARD_RANGE. */
2650   if (!(usage & PIPE_MAP_DIRECTLY))
2651      usage |= PIPE_MAP_DISCARD_RANGE;
2652
2653   usage = tc_improve_map_buffer_flags(tc, tres, usage, offset, size);
2654
2655   /* Unsychronized and big transfers should use transfer_map. Also handle
2656    * full invalidations, because drivers aren't allowed to do them.
2657    */
2658   if (usage & (PIPE_MAP_UNSYNCHRONIZED |
2659                PIPE_MAP_DISCARD_WHOLE_RESOURCE) ||
2660       size > TC_MAX_SUBDATA_BYTES ||
2661       tres->cpu_storage) {
2662      struct pipe_transfer *transfer;
2663      struct pipe_box box;
2664      uint8_t *map = NULL;
2665
2666      u_box_1d(offset, size, &box);
2667
2668      /* CPU storage is only useful for partial updates. It can add overhead
2669       * on glBufferData calls so avoid using it.
2670       */
2671      if (!tres->cpu_storage && offset == 0 && size == resource->width0)
2672         usage |= TC_TRANSFER_MAP_UPLOAD_CPU_STORAGE;
2673
2674      map = tc_buffer_map(_pipe, resource, 0, usage, &box, &transfer);
2675      if (map) {
2676         memcpy(map, data, size);
2677         tc_buffer_unmap(_pipe, transfer);
2678      }
2679      return;
2680   }
2681
2682   util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
2683
2684   /* The upload is small. Enqueue it. */
2685   struct tc_buffer_subdata *p =
2686      tc_add_slot_based_call(tc, TC_CALL_buffer_subdata, tc_buffer_subdata, size);
2687
2688   tc_set_resource_reference(&p->resource, resource);
2689   /* This is will always be busy because if it wasn't, tc_improve_map_buffer-
2690    * _flags would set UNSYNCHRONIZED and we wouldn't get here.
2691    */
2692   tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], resource);
2693   p->usage = usage;
2694   p->offset = offset;
2695   p->size = size;
2696   memcpy(p->slot, data, size);
2697}
2698
2699struct tc_texture_subdata {
2700   struct tc_call_base base;
2701   unsigned level, usage, stride, layer_stride;
2702   struct pipe_box box;
2703   struct pipe_resource *resource;
2704   char slot[0]; /* more will be allocated if needed */
2705};
2706
2707static uint16_t
2708tc_call_texture_subdata(struct pipe_context *pipe, void *call, uint64_t *last)
2709{
2710   struct tc_texture_subdata *p = (struct tc_texture_subdata *)call;
2711
2712   pipe->texture_subdata(pipe, p->resource, p->level, p->usage, &p->box,
2713                         p->slot, p->stride, p->layer_stride);
2714   tc_drop_resource_reference(p->resource);
2715   return p->base.num_slots;
2716}
2717
2718static void
2719tc_texture_subdata(struct pipe_context *_pipe,
2720                   struct pipe_resource *resource,
2721                   unsigned level, unsigned usage,
2722                   const struct pipe_box *box,
2723                   const void *data, unsigned stride,
2724                   unsigned layer_stride)
2725{
2726   struct threaded_context *tc = threaded_context(_pipe);
2727   unsigned size;
2728
2729   assert(box->height >= 1);
2730   assert(box->depth >= 1);
2731
2732   size = (box->depth - 1) * layer_stride +
2733          (box->height - 1) * stride +
2734          box->width * util_format_get_blocksize(resource->format);
2735   if (!size)
2736      return;
2737
2738   /* Small uploads can be enqueued, big uploads must sync. */
2739   if (size <= TC_MAX_SUBDATA_BYTES) {
2740      struct tc_texture_subdata *p =
2741         tc_add_slot_based_call(tc, TC_CALL_texture_subdata, tc_texture_subdata, size);
2742
2743      tc_set_resource_reference(&p->resource, resource);
2744      p->level = level;
2745      p->usage = usage;
2746      p->box = *box;
2747      p->stride = stride;
2748      p->layer_stride = layer_stride;
2749      memcpy(p->slot, data, size);
2750   } else {
2751      struct pipe_context *pipe = tc->pipe;
2752
2753      tc_sync(tc);
2754      tc_set_driver_thread(tc);
2755      pipe->texture_subdata(pipe, resource, level, usage, box, data,
2756                            stride, layer_stride);
2757      tc_clear_driver_thread(tc);
2758   }
2759}
2760
2761
2762/********************************************************************
2763 * miscellaneous
2764 */
2765
2766#define TC_FUNC_SYNC_RET0(ret_type, func) \
2767   static ret_type \
2768   tc_##func(struct pipe_context *_pipe) \
2769   { \
2770      struct threaded_context *tc = threaded_context(_pipe); \
2771      struct pipe_context *pipe = tc->pipe; \
2772      tc_sync(tc); \
2773      return pipe->func(pipe); \
2774   }
2775
2776TC_FUNC_SYNC_RET0(uint64_t, get_timestamp)
2777
2778static void
2779tc_get_sample_position(struct pipe_context *_pipe,
2780                       unsigned sample_count, unsigned sample_index,
2781                       float *out_value)
2782{
2783   struct threaded_context *tc = threaded_context(_pipe);
2784   struct pipe_context *pipe = tc->pipe;
2785
2786   tc_sync(tc);
2787   pipe->get_sample_position(pipe, sample_count, sample_index,
2788                             out_value);
2789}
2790
2791static enum pipe_reset_status
2792tc_get_device_reset_status(struct pipe_context *_pipe)
2793{
2794   struct threaded_context *tc = threaded_context(_pipe);
2795   struct pipe_context *pipe = tc->pipe;
2796
2797   if (!tc->options.unsynchronized_get_device_reset_status)
2798      tc_sync(tc);
2799
2800   return pipe->get_device_reset_status(pipe);
2801}
2802
2803static void
2804tc_set_device_reset_callback(struct pipe_context *_pipe,
2805                             const struct pipe_device_reset_callback *cb)
2806{
2807   struct threaded_context *tc = threaded_context(_pipe);
2808   struct pipe_context *pipe = tc->pipe;
2809
2810   tc_sync(tc);
2811   pipe->set_device_reset_callback(pipe, cb);
2812}
2813
2814struct tc_string_marker {
2815   struct tc_call_base base;
2816   int len;
2817   char slot[0]; /* more will be allocated if needed */
2818};
2819
2820static uint16_t
2821tc_call_emit_string_marker(struct pipe_context *pipe, void *call, uint64_t *last)
2822{
2823   struct tc_string_marker *p = (struct tc_string_marker *)call;
2824   pipe->emit_string_marker(pipe, p->slot, p->len);
2825   return p->base.num_slots;
2826}
2827
2828static void
2829tc_emit_string_marker(struct pipe_context *_pipe,
2830                      const char *string, int len)
2831{
2832   struct threaded_context *tc = threaded_context(_pipe);
2833
2834   if (len <= TC_MAX_STRING_MARKER_BYTES) {
2835      struct tc_string_marker *p =
2836         tc_add_slot_based_call(tc, TC_CALL_emit_string_marker, tc_string_marker, len);
2837
2838      memcpy(p->slot, string, len);
2839      p->len = len;
2840   } else {
2841      struct pipe_context *pipe = tc->pipe;
2842
2843      tc_sync(tc);
2844      tc_set_driver_thread(tc);
2845      pipe->emit_string_marker(pipe, string, len);
2846      tc_clear_driver_thread(tc);
2847   }
2848}
2849
2850static void
2851tc_dump_debug_state(struct pipe_context *_pipe, FILE *stream,
2852                    unsigned flags)
2853{
2854   struct threaded_context *tc = threaded_context(_pipe);
2855   struct pipe_context *pipe = tc->pipe;
2856
2857   tc_sync(tc);
2858   pipe->dump_debug_state(pipe, stream, flags);
2859}
2860
2861static void
2862tc_set_debug_callback(struct pipe_context *_pipe,
2863                      const struct util_debug_callback *cb)
2864{
2865   struct threaded_context *tc = threaded_context(_pipe);
2866   struct pipe_context *pipe = tc->pipe;
2867
2868   tc_sync(tc);
2869
2870   /* Drop all synchronous debug callbacks. Drivers are expected to be OK
2871    * with this. shader-db will use an environment variable to disable
2872    * the threaded context.
2873    */
2874   if (cb && !cb->async)
2875      pipe->set_debug_callback(pipe, NULL);
2876   else
2877      pipe->set_debug_callback(pipe, cb);
2878}
2879
2880static void
2881tc_set_log_context(struct pipe_context *_pipe, struct u_log_context *log)
2882{
2883   struct threaded_context *tc = threaded_context(_pipe);
2884   struct pipe_context *pipe = tc->pipe;
2885
2886   tc_sync(tc);
2887   pipe->set_log_context(pipe, log);
2888}
2889
2890static void
2891tc_create_fence_fd(struct pipe_context *_pipe,
2892                   struct pipe_fence_handle **fence, int fd,
2893                   enum pipe_fd_type type)
2894{
2895   struct threaded_context *tc = threaded_context(_pipe);
2896   struct pipe_context *pipe = tc->pipe;
2897
2898   tc_sync(tc);
2899   pipe->create_fence_fd(pipe, fence, fd, type);
2900}
2901
2902struct tc_fence_call {
2903   struct tc_call_base base;
2904   struct pipe_fence_handle *fence;
2905};
2906
2907static uint16_t
2908tc_call_fence_server_sync(struct pipe_context *pipe, void *call, uint64_t *last)
2909{
2910   struct pipe_fence_handle *fence = to_call(call, tc_fence_call)->fence;
2911
2912   pipe->fence_server_sync(pipe, fence);
2913   pipe->screen->fence_reference(pipe->screen, &fence, NULL);
2914   return call_size(tc_fence_call);
2915}
2916
2917static void
2918tc_fence_server_sync(struct pipe_context *_pipe,
2919                     struct pipe_fence_handle *fence)
2920{
2921   struct threaded_context *tc = threaded_context(_pipe);
2922   struct pipe_screen *screen = tc->pipe->screen;
2923   struct tc_fence_call *call = tc_add_call(tc, TC_CALL_fence_server_sync,
2924                                            tc_fence_call);
2925
2926   call->fence = NULL;
2927   screen->fence_reference(screen, &call->fence, fence);
2928}
2929
2930static void
2931tc_fence_server_signal(struct pipe_context *_pipe,
2932                           struct pipe_fence_handle *fence)
2933{
2934   struct threaded_context *tc = threaded_context(_pipe);
2935   struct pipe_context *pipe = tc->pipe;
2936   tc_sync(tc);
2937   pipe->fence_server_signal(pipe, fence);
2938}
2939
2940static struct pipe_video_codec *
2941tc_create_video_codec(UNUSED struct pipe_context *_pipe,
2942                      UNUSED const struct pipe_video_codec *templ)
2943{
2944   unreachable("Threaded context should not be enabled for video APIs");
2945   return NULL;
2946}
2947
2948static struct pipe_video_buffer *
2949tc_create_video_buffer(UNUSED struct pipe_context *_pipe,
2950                       UNUSED const struct pipe_video_buffer *templ)
2951{
2952   unreachable("Threaded context should not be enabled for video APIs");
2953   return NULL;
2954}
2955
2956struct tc_context_param {
2957   struct tc_call_base base;
2958   enum pipe_context_param param;
2959   unsigned value;
2960};
2961
2962static uint16_t
2963tc_call_set_context_param(struct pipe_context *pipe, void *call, uint64_t *last)
2964{
2965   struct tc_context_param *p = to_call(call, tc_context_param);
2966
2967   if (pipe->set_context_param)
2968      pipe->set_context_param(pipe, p->param, p->value);
2969
2970   return call_size(tc_context_param);
2971}
2972
2973static void
2974tc_set_context_param(struct pipe_context *_pipe,
2975                           enum pipe_context_param param,
2976                           unsigned value)
2977{
2978   struct threaded_context *tc = threaded_context(_pipe);
2979
2980   if (param == PIPE_CONTEXT_PARAM_PIN_THREADS_TO_L3_CACHE) {
2981      /* Pin the gallium thread as requested. */
2982      util_set_thread_affinity(tc->queue.threads[0],
2983                               util_get_cpu_caps()->L3_affinity_mask[value],
2984                               NULL, util_get_cpu_caps()->num_cpu_mask_bits);
2985
2986      /* Execute this immediately (without enqueuing).
2987       * It's required to be thread-safe.
2988       */
2989      struct pipe_context *pipe = tc->pipe;
2990      if (pipe->set_context_param)
2991         pipe->set_context_param(pipe, param, value);
2992      return;
2993   }
2994
2995   if (tc->pipe->set_context_param) {
2996      struct tc_context_param *call =
2997         tc_add_call(tc, TC_CALL_set_context_param, tc_context_param);
2998
2999      call->param = param;
3000      call->value = value;
3001   }
3002}
3003
3004
3005/********************************************************************
3006 * draw, launch, clear, blit, copy, flush
3007 */
3008
3009struct tc_flush_call {
3010   struct tc_call_base base;
3011   unsigned flags;
3012   struct threaded_context *tc;
3013   struct pipe_fence_handle *fence;
3014};
3015
3016static void
3017tc_flush_queries(struct threaded_context *tc)
3018{
3019   struct threaded_query *tq, *tmp;
3020   LIST_FOR_EACH_ENTRY_SAFE(tq, tmp, &tc->unflushed_queries, head_unflushed) {
3021      list_del(&tq->head_unflushed);
3022
3023      /* Memory release semantics: due to a possible race with
3024       * tc_get_query_result, we must ensure that the linked list changes
3025       * are visible before setting tq->flushed.
3026       */
3027      p_atomic_set(&tq->flushed, true);
3028   }
3029}
3030
3031static uint16_t
3032tc_call_flush(struct pipe_context *pipe, void *call, uint64_t *last)
3033{
3034   struct tc_flush_call *p = to_call(call, tc_flush_call);
3035   struct pipe_screen *screen = pipe->screen;
3036
3037   pipe->flush(pipe, p->fence ? &p->fence : NULL, p->flags);
3038   screen->fence_reference(screen, &p->fence, NULL);
3039
3040   if (!(p->flags & PIPE_FLUSH_DEFERRED))
3041      tc_flush_queries(p->tc);
3042
3043   return call_size(tc_flush_call);
3044}
3045
3046static void
3047tc_flush(struct pipe_context *_pipe, struct pipe_fence_handle **fence,
3048         unsigned flags)
3049{
3050   struct threaded_context *tc = threaded_context(_pipe);
3051   struct pipe_context *pipe = tc->pipe;
3052   struct pipe_screen *screen = pipe->screen;
3053   bool async = flags & (PIPE_FLUSH_DEFERRED | PIPE_FLUSH_ASYNC);
3054
3055   if (async && tc->options.create_fence) {
3056      if (fence) {
3057         struct tc_batch *next = &tc->batch_slots[tc->next];
3058
3059         if (!next->token) {
3060            next->token = malloc(sizeof(*next->token));
3061            if (!next->token)
3062               goto out_of_memory;
3063
3064            pipe_reference_init(&next->token->ref, 1);
3065            next->token->tc = tc;
3066         }
3067
3068         screen->fence_reference(screen, fence,
3069                                 tc->options.create_fence(pipe, next->token));
3070         if (!*fence)
3071            goto out_of_memory;
3072      }
3073
3074      struct tc_flush_call *p = tc_add_call(tc, TC_CALL_flush, tc_flush_call);
3075      p->tc = tc;
3076      p->fence = fence ? *fence : NULL;
3077      p->flags = flags | TC_FLUSH_ASYNC;
3078
3079      if (!(flags & PIPE_FLUSH_DEFERRED))
3080         tc_batch_flush(tc);
3081      return;
3082   }
3083
3084out_of_memory:
3085   tc_sync_msg(tc, flags & PIPE_FLUSH_END_OF_FRAME ? "end of frame" :
3086                   flags & PIPE_FLUSH_DEFERRED ? "deferred fence" : "normal");
3087
3088   if (!(flags & PIPE_FLUSH_DEFERRED))
3089      tc_flush_queries(tc);
3090   tc_set_driver_thread(tc);
3091   pipe->flush(pipe, fence, flags);
3092   tc_clear_driver_thread(tc);
3093}
3094
3095struct tc_draw_single {
3096   struct tc_call_base base;
3097   unsigned index_bias;
3098   struct pipe_draw_info info;
3099};
3100
3101struct tc_draw_single_drawid {
3102   struct tc_draw_single base;
3103   unsigned drawid_offset;
3104};
3105
3106static uint16_t
3107tc_call_draw_single_drawid(struct pipe_context *pipe, void *call, uint64_t *last)
3108{
3109   struct tc_draw_single_drawid *info_drawid = to_call(call, tc_draw_single_drawid);
3110   struct tc_draw_single *info = &info_drawid->base;
3111
3112   /* u_threaded_context stores start/count in min/max_index for single draws. */
3113   /* Drivers using u_threaded_context shouldn't use min/max_index. */
3114   struct pipe_draw_start_count_bias draw;
3115
3116   draw.start = info->info.min_index;
3117   draw.count = info->info.max_index;
3118   draw.index_bias = info->index_bias;
3119
3120   info->info.index_bounds_valid = false;
3121   info->info.has_user_indices = false;
3122   info->info.take_index_buffer_ownership = false;
3123
3124   pipe->draw_vbo(pipe, &info->info, info_drawid->drawid_offset, NULL, &draw, 1);
3125   if (info->info.index_size)
3126      tc_drop_resource_reference(info->info.index.resource);
3127
3128   return call_size(tc_draw_single_drawid);
3129}
3130
3131static void
3132simplify_draw_info(struct pipe_draw_info *info)
3133{
3134   /* Clear these fields to facilitate draw merging.
3135    * Drivers shouldn't use them.
3136    */
3137   info->has_user_indices = false;
3138   info->index_bounds_valid = false;
3139   info->take_index_buffer_ownership = false;
3140   info->index_bias_varies = false;
3141   info->_pad = 0;
3142
3143   /* This shouldn't be set when merging single draws. */
3144   info->increment_draw_id = false;
3145
3146   if (info->index_size) {
3147      if (!info->primitive_restart)
3148         info->restart_index = 0;
3149   } else {
3150      assert(!info->primitive_restart);
3151      info->primitive_restart = false;
3152      info->restart_index = 0;
3153      info->index.resource = NULL;
3154   }
3155}
3156
3157static bool
3158is_next_call_a_mergeable_draw(struct tc_draw_single *first,
3159                              struct tc_draw_single *next)
3160{
3161   if (next->base.call_id != TC_CALL_draw_single)
3162      return false;
3163
3164   simplify_draw_info(&next->info);
3165
3166   STATIC_ASSERT(offsetof(struct pipe_draw_info, min_index) ==
3167                 sizeof(struct pipe_draw_info) - 8);
3168   STATIC_ASSERT(offsetof(struct pipe_draw_info, max_index) ==
3169                 sizeof(struct pipe_draw_info) - 4);
3170   /* All fields must be the same except start and count. */
3171   /* u_threaded_context stores start/count in min/max_index for single draws. */
3172   return memcmp((uint32_t*)&first->info, (uint32_t*)&next->info,
3173                 DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX) == 0;
3174}
3175
3176static uint16_t
3177tc_call_draw_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3178{
3179   /* Draw call merging. */
3180   struct tc_draw_single *first = to_call(call, tc_draw_single);
3181   struct tc_draw_single *last = (struct tc_draw_single *)last_ptr;
3182   struct tc_draw_single *next = get_next_call(first, tc_draw_single);
3183
3184   /* If at least 2 consecutive draw calls can be merged... */
3185   if (next != last &&
3186       next->base.call_id == TC_CALL_draw_single) {
3187      simplify_draw_info(&first->info);
3188
3189      if (is_next_call_a_mergeable_draw(first, next)) {
3190         /* The maximum number of merged draws is given by the batch size. */
3191         struct pipe_draw_start_count_bias multi[TC_SLOTS_PER_BATCH / call_size(tc_draw_single)];
3192         unsigned num_draws = 2;
3193         bool index_bias_varies = first->index_bias != next->index_bias;
3194
3195         /* u_threaded_context stores start/count in min/max_index for single draws. */
3196         multi[0].start = first->info.min_index;
3197         multi[0].count = first->info.max_index;
3198         multi[0].index_bias = first->index_bias;
3199         multi[1].start = next->info.min_index;
3200         multi[1].count = next->info.max_index;
3201         multi[1].index_bias = next->index_bias;
3202
3203         /* Find how many other draws can be merged. */
3204         next = get_next_call(next, tc_draw_single);
3205         for (; next != last && is_next_call_a_mergeable_draw(first, next);
3206              next = get_next_call(next, tc_draw_single), num_draws++) {
3207            /* u_threaded_context stores start/count in min/max_index for single draws. */
3208            multi[num_draws].start = next->info.min_index;
3209            multi[num_draws].count = next->info.max_index;
3210            multi[num_draws].index_bias = next->index_bias;
3211            index_bias_varies |= first->index_bias != next->index_bias;
3212         }
3213
3214         first->info.index_bias_varies = index_bias_varies;
3215         pipe->draw_vbo(pipe, &first->info, 0, NULL, multi, num_draws);
3216
3217         /* Since all draws use the same index buffer, drop all references at once. */
3218         if (first->info.index_size)
3219            pipe_drop_resource_references(first->info.index.resource, num_draws);
3220
3221         return call_size(tc_draw_single) * num_draws;
3222      }
3223   }
3224
3225   /* u_threaded_context stores start/count in min/max_index for single draws. */
3226   /* Drivers using u_threaded_context shouldn't use min/max_index. */
3227   struct pipe_draw_start_count_bias draw;
3228
3229   draw.start = first->info.min_index;
3230   draw.count = first->info.max_index;
3231   draw.index_bias = first->index_bias;
3232
3233   first->info.index_bounds_valid = false;
3234   first->info.has_user_indices = false;
3235   first->info.take_index_buffer_ownership = false;
3236
3237   pipe->draw_vbo(pipe, &first->info, 0, NULL, &draw, 1);
3238   if (first->info.index_size)
3239      tc_drop_resource_reference(first->info.index.resource);
3240
3241   return call_size(tc_draw_single);
3242}
3243
3244struct tc_draw_indirect {
3245   struct tc_call_base base;
3246   struct pipe_draw_start_count_bias draw;
3247   struct pipe_draw_info info;
3248   struct pipe_draw_indirect_info indirect;
3249};
3250
3251static uint16_t
3252tc_call_draw_indirect(struct pipe_context *pipe, void *call, uint64_t *last)
3253{
3254   struct tc_draw_indirect *info = to_call(call, tc_draw_indirect);
3255
3256   info->info.index_bounds_valid = false;
3257   info->info.take_index_buffer_ownership = false;
3258
3259   pipe->draw_vbo(pipe, &info->info, 0, &info->indirect, &info->draw, 1);
3260   if (info->info.index_size)
3261      tc_drop_resource_reference(info->info.index.resource);
3262
3263   tc_drop_resource_reference(info->indirect.buffer);
3264   tc_drop_resource_reference(info->indirect.indirect_draw_count);
3265   tc_drop_so_target_reference(info->indirect.count_from_stream_output);
3266   return call_size(tc_draw_indirect);
3267}
3268
3269struct tc_draw_multi {
3270   struct tc_call_base base;
3271   unsigned num_draws;
3272   struct pipe_draw_info info;
3273   struct pipe_draw_start_count_bias slot[]; /* variable-sized array */
3274};
3275
3276static uint16_t
3277tc_call_draw_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3278{
3279   struct tc_draw_multi *info = (struct tc_draw_multi*)call;
3280
3281   info->info.has_user_indices = false;
3282   info->info.index_bounds_valid = false;
3283   info->info.take_index_buffer_ownership = false;
3284
3285   pipe->draw_vbo(pipe, &info->info, 0, NULL, info->slot, info->num_draws);
3286   if (info->info.index_size)
3287      tc_drop_resource_reference(info->info.index.resource);
3288
3289   return info->base.num_slots;
3290}
3291
3292#define DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX \
3293   offsetof(struct pipe_draw_info, index)
3294
3295void
3296tc_draw_vbo(struct pipe_context *_pipe, const struct pipe_draw_info *info,
3297            unsigned drawid_offset,
3298            const struct pipe_draw_indirect_info *indirect,
3299            const struct pipe_draw_start_count_bias *draws,
3300            unsigned num_draws)
3301{
3302   STATIC_ASSERT(DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX +
3303                 sizeof(intptr_t) == offsetof(struct pipe_draw_info, min_index));
3304
3305   struct threaded_context *tc = threaded_context(_pipe);
3306   unsigned index_size = info->index_size;
3307   bool has_user_indices = info->has_user_indices;
3308
3309   if (unlikely(indirect)) {
3310      assert(!has_user_indices);
3311      assert(num_draws == 1);
3312
3313      struct tc_draw_indirect *p =
3314         tc_add_call(tc, TC_CALL_draw_indirect, tc_draw_indirect);
3315      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3316
3317      if (index_size) {
3318         if (!info->take_index_buffer_ownership) {
3319            tc_set_resource_reference(&p->info.index.resource,
3320                                      info->index.resource);
3321         }
3322         tc_add_to_buffer_list(tc, next, info->index.resource);
3323      }
3324      memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3325
3326      tc_set_resource_reference(&p->indirect.buffer, indirect->buffer);
3327      tc_set_resource_reference(&p->indirect.indirect_draw_count,
3328                                indirect->indirect_draw_count);
3329      p->indirect.count_from_stream_output = NULL;
3330      pipe_so_target_reference(&p->indirect.count_from_stream_output,
3331                               indirect->count_from_stream_output);
3332
3333      if (indirect->buffer)
3334         tc_add_to_buffer_list(tc, next, indirect->buffer);
3335      if (indirect->indirect_draw_count)
3336         tc_add_to_buffer_list(tc, next, indirect->indirect_draw_count);
3337      if (indirect->count_from_stream_output)
3338         tc_add_to_buffer_list(tc, next, indirect->count_from_stream_output->buffer);
3339
3340      memcpy(&p->indirect, indirect, sizeof(*indirect));
3341      p->draw.start = draws[0].start;
3342
3343      /* This must be after tc_add_call, which can flush the batch. */
3344      if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3345         tc_add_all_gfx_bindings_to_buffer_list(tc);
3346      return;
3347   }
3348
3349   if (num_draws == 1) {
3350      /* Single draw. */
3351      if (index_size && has_user_indices) {
3352         unsigned size = draws[0].count * index_size;
3353         struct pipe_resource *buffer = NULL;
3354         unsigned offset;
3355
3356         if (!size)
3357            return;
3358
3359         /* This must be done before adding draw_vbo, because it could generate
3360          * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3361          * to the driver if it was done afterwards.
3362          */
3363         u_upload_data(tc->base.stream_uploader, 0, size, 4,
3364                       (uint8_t*)info->index.user + draws[0].start * index_size,
3365                       &offset, &buffer);
3366         if (unlikely(!buffer))
3367            return;
3368
3369         struct tc_draw_single *p = drawid_offset > 0 ?
3370            &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3371            tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3372         memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3373         p->info.index.resource = buffer;
3374         if (drawid_offset > 0)
3375            ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3376         /* u_threaded_context stores start/count in min/max_index for single draws. */
3377         p->info.min_index = offset >> util_logbase2(index_size);
3378         p->info.max_index = draws[0].count;
3379         p->index_bias = draws[0].index_bias;
3380      } else {
3381         /* Non-indexed call or indexed with a real index buffer. */
3382         struct tc_draw_single *p = drawid_offset > 0 ?
3383            &tc_add_call(tc, TC_CALL_draw_single_drawid, tc_draw_single_drawid)->base :
3384            tc_add_call(tc, TC_CALL_draw_single, tc_draw_single);
3385         if (index_size) {
3386            if (!info->take_index_buffer_ownership) {
3387               tc_set_resource_reference(&p->info.index.resource,
3388                                         info->index.resource);
3389            }
3390            tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->index.resource);
3391         }
3392         if (drawid_offset > 0)
3393            ((struct tc_draw_single_drawid*)p)->drawid_offset = drawid_offset;
3394         memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3395         /* u_threaded_context stores start/count in min/max_index for single draws. */
3396         p->info.min_index = draws[0].start;
3397         p->info.max_index = draws[0].count;
3398         p->index_bias = draws[0].index_bias;
3399      }
3400
3401      /* This must be after tc_add_call, which can flush the batch. */
3402      if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3403         tc_add_all_gfx_bindings_to_buffer_list(tc);
3404      return;
3405   }
3406
3407   const int draw_overhead_bytes = sizeof(struct tc_draw_multi);
3408   const int one_draw_slot_bytes = sizeof(((struct tc_draw_multi*)NULL)->slot[0]);
3409   const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3410                                               sizeof(struct tc_call_base));
3411   /* Multi draw. */
3412   if (index_size && has_user_indices) {
3413      struct pipe_resource *buffer = NULL;
3414      unsigned buffer_offset, total_count = 0;
3415      unsigned index_size_shift = util_logbase2(index_size);
3416      uint8_t *ptr = NULL;
3417
3418      /* Get the total count. */
3419      for (unsigned i = 0; i < num_draws; i++)
3420         total_count += draws[i].count;
3421
3422      if (!total_count)
3423         return;
3424
3425      /* Allocate space for all index buffers.
3426       *
3427       * This must be done before adding draw_vbo, because it could generate
3428       * e.g. transfer_unmap and flush partially-uninitialized draw_vbo
3429       * to the driver if it was done afterwards.
3430       */
3431      u_upload_alloc(tc->base.stream_uploader, 0,
3432                     total_count << index_size_shift, 4,
3433                     &buffer_offset, &buffer, (void**)&ptr);
3434      if (unlikely(!buffer))
3435         return;
3436
3437      int total_offset = 0;
3438      unsigned offset = 0;
3439      while (num_draws) {
3440         struct tc_batch *next = &tc->batch_slots[tc->next];
3441
3442         int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3443         /* If there isn't enough place for one draw, try to fill the next one */
3444         if (nb_slots_left < slots_for_one_draw)
3445            nb_slots_left = TC_SLOTS_PER_BATCH;
3446         const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3447
3448         /* How many draws can we fit in the current batch */
3449         const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3450
3451         struct tc_draw_multi *p =
3452            tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3453                                   dr);
3454         memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_INDEXBUF_AND_MIN_MAX_INDEX);
3455
3456         if (total_offset == 0)
3457            /* the first slot inherits the reference from u_upload_alloc() */
3458            p->info.index.resource = buffer;
3459         else
3460            /* all following slots need a new reference */
3461            tc_set_resource_reference(&p->info.index.resource, buffer);
3462
3463         p->num_draws = dr;
3464
3465         /* Upload index buffers. */
3466         for (unsigned i = 0; i < dr; i++) {
3467            unsigned count = draws[i + total_offset].count;
3468
3469            if (!count) {
3470               p->slot[i].start = 0;
3471               p->slot[i].count = 0;
3472               p->slot[i].index_bias = 0;
3473               continue;
3474            }
3475
3476            unsigned size = count << index_size_shift;
3477            memcpy(ptr + offset,
3478                   (uint8_t*)info->index.user +
3479                   (draws[i + total_offset].start << index_size_shift), size);
3480            p->slot[i].start = (buffer_offset + offset) >> index_size_shift;
3481            p->slot[i].count = count;
3482            p->slot[i].index_bias = draws[i + total_offset].index_bias;
3483            offset += size;
3484         }
3485
3486         total_offset += dr;
3487         num_draws -= dr;
3488      }
3489   } else {
3490      int total_offset = 0;
3491      bool take_index_buffer_ownership = info->take_index_buffer_ownership;
3492      while (num_draws) {
3493         struct tc_batch *next = &tc->batch_slots[tc->next];
3494
3495         int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3496         /* If there isn't enough place for one draw, try to fill the next one */
3497         if (nb_slots_left < slots_for_one_draw)
3498            nb_slots_left = TC_SLOTS_PER_BATCH;
3499         const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3500
3501         /* How many draws can we fit in the current batch */
3502         const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3503
3504         /* Non-indexed call or indexed with a real index buffer. */
3505         struct tc_draw_multi *p =
3506            tc_add_slot_based_call(tc, TC_CALL_draw_multi, tc_draw_multi,
3507                                   dr);
3508         if (index_size) {
3509            if (!take_index_buffer_ownership) {
3510               tc_set_resource_reference(&p->info.index.resource,
3511                                         info->index.resource);
3512            }
3513            tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->index.resource);
3514         }
3515         take_index_buffer_ownership = false;
3516         memcpy(&p->info, info, DRAW_INFO_SIZE_WITHOUT_MIN_MAX_INDEX);
3517         p->num_draws = dr;
3518         memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3519         num_draws -= dr;
3520
3521         total_offset += dr;
3522      }
3523   }
3524
3525   /* This must be after tc_add_*call, which can flush the batch. */
3526   if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3527      tc_add_all_gfx_bindings_to_buffer_list(tc);
3528}
3529
3530struct tc_draw_vstate_single {
3531   struct tc_call_base base;
3532   struct pipe_draw_start_count_bias draw;
3533
3534   /* The following states must be together without holes because they are
3535    * compared by draw merging.
3536    */
3537   struct pipe_vertex_state *state;
3538   uint32_t partial_velem_mask;
3539   struct pipe_draw_vertex_state_info info;
3540};
3541
3542static bool
3543is_next_call_a_mergeable_draw_vstate(struct tc_draw_vstate_single *first,
3544                                     struct tc_draw_vstate_single *next)
3545{
3546   if (next->base.call_id != TC_CALL_draw_vstate_single)
3547      return false;
3548
3549   return !memcmp(&first->state, &next->state,
3550                  offsetof(struct tc_draw_vstate_single, info) +
3551                  sizeof(struct pipe_draw_vertex_state_info) -
3552                  offsetof(struct tc_draw_vstate_single, state));
3553}
3554
3555static uint16_t
3556tc_call_draw_vstate_single(struct pipe_context *pipe, void *call, uint64_t *last_ptr)
3557{
3558   /* Draw call merging. */
3559   struct tc_draw_vstate_single *first = to_call(call, tc_draw_vstate_single);
3560   struct tc_draw_vstate_single *last = (struct tc_draw_vstate_single *)last_ptr;
3561   struct tc_draw_vstate_single *next = get_next_call(first, tc_draw_vstate_single);
3562
3563   /* If at least 2 consecutive draw calls can be merged... */
3564   if (next != last &&
3565       is_next_call_a_mergeable_draw_vstate(first, next)) {
3566      /* The maximum number of merged draws is given by the batch size. */
3567      struct pipe_draw_start_count_bias draws[TC_SLOTS_PER_BATCH /
3568                                              call_size(tc_draw_vstate_single)];
3569      unsigned num_draws = 2;
3570
3571      draws[0] = first->draw;
3572      draws[1] = next->draw;
3573
3574      /* Find how many other draws can be merged. */
3575      next = get_next_call(next, tc_draw_vstate_single);
3576      for (; next != last &&
3577           is_next_call_a_mergeable_draw_vstate(first, next);
3578           next = get_next_call(next, tc_draw_vstate_single),
3579           num_draws++)
3580         draws[num_draws] = next->draw;
3581
3582      pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3583                              first->info, draws, num_draws);
3584      /* Since all draws use the same state, drop all references at once. */
3585      tc_drop_vertex_state_references(first->state, num_draws);
3586
3587      return call_size(tc_draw_vstate_single) * num_draws;
3588   }
3589
3590   pipe->draw_vertex_state(pipe, first->state, first->partial_velem_mask,
3591                           first->info, &first->draw, 1);
3592   tc_drop_vertex_state_references(first->state, 1);
3593   return call_size(tc_draw_vstate_single);
3594}
3595
3596struct tc_draw_vstate_multi {
3597   struct tc_call_base base;
3598   uint32_t partial_velem_mask;
3599   struct pipe_draw_vertex_state_info info;
3600   unsigned num_draws;
3601   struct pipe_vertex_state *state;
3602   struct pipe_draw_start_count_bias slot[0];
3603};
3604
3605static uint16_t
3606tc_call_draw_vstate_multi(struct pipe_context *pipe, void *call, uint64_t *last)
3607{
3608   struct tc_draw_vstate_multi *info = (struct tc_draw_vstate_multi*)call;
3609
3610   pipe->draw_vertex_state(pipe, info->state, info->partial_velem_mask,
3611                           info->info, info->slot, info->num_draws);
3612   tc_drop_vertex_state_references(info->state, 1);
3613   return info->base.num_slots;
3614}
3615
3616static void
3617tc_draw_vertex_state(struct pipe_context *_pipe,
3618                     struct pipe_vertex_state *state,
3619                     uint32_t partial_velem_mask,
3620                     struct pipe_draw_vertex_state_info info,
3621                     const struct pipe_draw_start_count_bias *draws,
3622                     unsigned num_draws)
3623{
3624   struct threaded_context *tc = threaded_context(_pipe);
3625
3626   if (num_draws == 1) {
3627      /* Single draw. */
3628      struct tc_draw_vstate_single *p =
3629         tc_add_call(tc, TC_CALL_draw_vstate_single, tc_draw_vstate_single);
3630      p->partial_velem_mask = partial_velem_mask;
3631      p->draw = draws[0];
3632      p->info.mode = info.mode;
3633      p->info.take_vertex_state_ownership = false;
3634
3635      /* This should be always 0 for simplicity because we assume that
3636       * index_bias doesn't vary.
3637       */
3638      assert(draws[0].index_bias == 0);
3639
3640      if (!info.take_vertex_state_ownership)
3641         tc_set_vertex_state_reference(&p->state, state);
3642      else
3643         p->state = state;
3644
3645
3646      /* This must be after tc_add_*call, which can flush the batch. */
3647      if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3648         tc_add_all_gfx_bindings_to_buffer_list(tc);
3649      return;
3650   }
3651
3652   const int draw_overhead_bytes = sizeof(struct tc_draw_vstate_multi);
3653   const int one_draw_slot_bytes = sizeof(((struct tc_draw_vstate_multi*)NULL)->slot[0]);
3654   const int slots_for_one_draw = DIV_ROUND_UP(draw_overhead_bytes + one_draw_slot_bytes,
3655                                               sizeof(struct tc_call_base));
3656   /* Multi draw. */
3657   int total_offset = 0;
3658   bool take_vertex_state_ownership = info.take_vertex_state_ownership;
3659   while (num_draws) {
3660      struct tc_batch *next = &tc->batch_slots[tc->next];
3661
3662      int nb_slots_left = TC_SLOTS_PER_BATCH - next->num_total_slots;
3663      /* If there isn't enough place for one draw, try to fill the next one */
3664      if (nb_slots_left < slots_for_one_draw)
3665         nb_slots_left = TC_SLOTS_PER_BATCH;
3666      const int size_left_bytes = nb_slots_left * sizeof(struct tc_call_base);
3667
3668      /* How many draws can we fit in the current batch */
3669      const int dr = MIN2(num_draws, (size_left_bytes - draw_overhead_bytes) / one_draw_slot_bytes);
3670
3671      /* Non-indexed call or indexed with a real index buffer. */
3672      struct tc_draw_vstate_multi *p =
3673         tc_add_slot_based_call(tc, TC_CALL_draw_vstate_multi, tc_draw_vstate_multi, dr);
3674
3675      if (!take_vertex_state_ownership)
3676         tc_set_vertex_state_reference(&p->state, state);
3677      else
3678         p->state = state;
3679
3680      take_vertex_state_ownership = false;
3681      p->partial_velem_mask = partial_velem_mask;
3682      p->info.mode = info.mode;
3683      p->info.take_vertex_state_ownership = false;
3684      p->num_draws = dr;
3685      memcpy(p->slot, &draws[total_offset], sizeof(draws[0]) * dr);
3686      num_draws -= dr;
3687
3688      total_offset += dr;
3689   }
3690
3691
3692   /* This must be after tc_add_*call, which can flush the batch. */
3693   if (unlikely(tc->add_all_gfx_bindings_to_buffer_list))
3694      tc_add_all_gfx_bindings_to_buffer_list(tc);
3695}
3696
3697struct tc_launch_grid_call {
3698   struct tc_call_base base;
3699   struct pipe_grid_info info;
3700};
3701
3702static uint16_t
3703tc_call_launch_grid(struct pipe_context *pipe, void *call, uint64_t *last)
3704{
3705   struct pipe_grid_info *p = &to_call(call, tc_launch_grid_call)->info;
3706
3707   pipe->launch_grid(pipe, p);
3708   tc_drop_resource_reference(p->indirect);
3709   return call_size(tc_launch_grid_call);
3710}
3711
3712static void
3713tc_launch_grid(struct pipe_context *_pipe,
3714               const struct pipe_grid_info *info)
3715{
3716   struct threaded_context *tc = threaded_context(_pipe);
3717   struct tc_launch_grid_call *p = tc_add_call(tc, TC_CALL_launch_grid,
3718                                               tc_launch_grid_call);
3719   assert(info->input == NULL);
3720
3721   tc_set_resource_reference(&p->info.indirect, info->indirect);
3722   memcpy(&p->info, info, sizeof(*info));
3723
3724   if (info->indirect)
3725      tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], info->indirect);
3726
3727   /* This must be after tc_add_*call, which can flush the batch. */
3728   if (unlikely(tc->add_all_compute_bindings_to_buffer_list))
3729      tc_add_all_compute_bindings_to_buffer_list(tc);
3730}
3731
3732static uint16_t
3733tc_call_resource_copy_region(struct pipe_context *pipe, void *call, uint64_t *last)
3734{
3735   struct tc_resource_copy_region *p = to_call(call, tc_resource_copy_region);
3736
3737   pipe->resource_copy_region(pipe, p->dst, p->dst_level, p->dstx, p->dsty,
3738                              p->dstz, p->src, p->src_level, &p->src_box);
3739   tc_drop_resource_reference(p->dst);
3740   tc_drop_resource_reference(p->src);
3741   return call_size(tc_resource_copy_region);
3742}
3743
3744static void
3745tc_resource_copy_region(struct pipe_context *_pipe,
3746                        struct pipe_resource *dst, unsigned dst_level,
3747                        unsigned dstx, unsigned dsty, unsigned dstz,
3748                        struct pipe_resource *src, unsigned src_level,
3749                        const struct pipe_box *src_box)
3750{
3751   struct threaded_context *tc = threaded_context(_pipe);
3752   struct threaded_resource *tdst = threaded_resource(dst);
3753   struct tc_resource_copy_region *p =
3754      tc_add_call(tc, TC_CALL_resource_copy_region,
3755                  tc_resource_copy_region);
3756
3757   if (dst->target == PIPE_BUFFER)
3758      tc_buffer_disable_cpu_storage(dst);
3759
3760   tc_set_resource_reference(&p->dst, dst);
3761   p->dst_level = dst_level;
3762   p->dstx = dstx;
3763   p->dsty = dsty;
3764   p->dstz = dstz;
3765   tc_set_resource_reference(&p->src, src);
3766   p->src_level = src_level;
3767   p->src_box = *src_box;
3768
3769   if (dst->target == PIPE_BUFFER) {
3770      struct tc_buffer_list *next = &tc->buffer_lists[tc->next_buf_list];
3771
3772      tc_add_to_buffer_list(tc, next, src);
3773      tc_add_to_buffer_list(tc, next, dst);
3774
3775      util_range_add(&tdst->b, &tdst->valid_buffer_range,
3776                     dstx, dstx + src_box->width);
3777   }
3778}
3779
3780struct tc_blit_call {
3781   struct tc_call_base base;
3782   struct pipe_blit_info info;
3783};
3784
3785static uint16_t
3786tc_call_blit(struct pipe_context *pipe, void *call, uint64_t *last)
3787{
3788   struct pipe_blit_info *blit = &to_call(call, tc_blit_call)->info;
3789
3790   pipe->blit(pipe, blit);
3791   tc_drop_resource_reference(blit->dst.resource);
3792   tc_drop_resource_reference(blit->src.resource);
3793   return call_size(tc_blit_call);
3794}
3795
3796static void
3797tc_blit(struct pipe_context *_pipe, const struct pipe_blit_info *info)
3798{
3799   struct threaded_context *tc = threaded_context(_pipe);
3800   struct tc_blit_call *blit = tc_add_call(tc, TC_CALL_blit, tc_blit_call);
3801
3802   tc_set_resource_reference(&blit->info.dst.resource, info->dst.resource);
3803   tc_set_resource_reference(&blit->info.src.resource, info->src.resource);
3804   memcpy(&blit->info, info, sizeof(*info));
3805}
3806
3807struct tc_generate_mipmap {
3808   struct tc_call_base base;
3809   enum pipe_format format;
3810   unsigned base_level;
3811   unsigned last_level;
3812   unsigned first_layer;
3813   unsigned last_layer;
3814   struct pipe_resource *res;
3815};
3816
3817static uint16_t
3818tc_call_generate_mipmap(struct pipe_context *pipe, void *call, uint64_t *last)
3819{
3820   struct tc_generate_mipmap *p = to_call(call, tc_generate_mipmap);
3821   ASSERTED bool result = pipe->generate_mipmap(pipe, p->res, p->format,
3822                                                    p->base_level,
3823                                                    p->last_level,
3824                                                    p->first_layer,
3825                                                    p->last_layer);
3826   assert(result);
3827   tc_drop_resource_reference(p->res);
3828   return call_size(tc_generate_mipmap);
3829}
3830
3831static bool
3832tc_generate_mipmap(struct pipe_context *_pipe,
3833                   struct pipe_resource *res,
3834                   enum pipe_format format,
3835                   unsigned base_level,
3836                   unsigned last_level,
3837                   unsigned first_layer,
3838                   unsigned last_layer)
3839{
3840   struct threaded_context *tc = threaded_context(_pipe);
3841   struct pipe_context *pipe = tc->pipe;
3842   struct pipe_screen *screen = pipe->screen;
3843   unsigned bind = PIPE_BIND_SAMPLER_VIEW;
3844
3845   if (util_format_is_depth_or_stencil(format))
3846      bind = PIPE_BIND_DEPTH_STENCIL;
3847   else
3848      bind = PIPE_BIND_RENDER_TARGET;
3849
3850   if (!screen->is_format_supported(screen, format, res->target,
3851                                    res->nr_samples, res->nr_storage_samples,
3852                                    bind))
3853      return false;
3854
3855   struct tc_generate_mipmap *p =
3856      tc_add_call(tc, TC_CALL_generate_mipmap, tc_generate_mipmap);
3857
3858   tc_set_resource_reference(&p->res, res);
3859   p->format = format;
3860   p->base_level = base_level;
3861   p->last_level = last_level;
3862   p->first_layer = first_layer;
3863   p->last_layer = last_layer;
3864   return true;
3865}
3866
3867struct tc_resource_call {
3868   struct tc_call_base base;
3869   struct pipe_resource *resource;
3870};
3871
3872static uint16_t
3873tc_call_flush_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3874{
3875   struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3876
3877   pipe->flush_resource(pipe, resource);
3878   tc_drop_resource_reference(resource);
3879   return call_size(tc_resource_call);
3880}
3881
3882static void
3883tc_flush_resource(struct pipe_context *_pipe, struct pipe_resource *resource)
3884{
3885   struct threaded_context *tc = threaded_context(_pipe);
3886   struct tc_resource_call *call = tc_add_call(tc, TC_CALL_flush_resource,
3887                                               tc_resource_call);
3888
3889   tc_set_resource_reference(&call->resource, resource);
3890}
3891
3892static uint16_t
3893tc_call_invalidate_resource(struct pipe_context *pipe, void *call, uint64_t *last)
3894{
3895   struct pipe_resource *resource = to_call(call, tc_resource_call)->resource;
3896
3897   pipe->invalidate_resource(pipe, resource);
3898   tc_drop_resource_reference(resource);
3899   return call_size(tc_resource_call);
3900}
3901
3902static void
3903tc_invalidate_resource(struct pipe_context *_pipe,
3904                       struct pipe_resource *resource)
3905{
3906   struct threaded_context *tc = threaded_context(_pipe);
3907
3908   if (resource->target == PIPE_BUFFER) {
3909      /* This can fail, in which case we simply ignore the invalidation request. */
3910      struct threaded_resource *tbuf = threaded_resource(resource);
3911      tc_touch_buffer(tc, tbuf);
3912      tc_invalidate_buffer(tc, tbuf);
3913      return;
3914   }
3915
3916   struct tc_resource_call *call = tc_add_call(tc, TC_CALL_invalidate_resource,
3917                                               tc_resource_call);
3918   tc_set_resource_reference(&call->resource, resource);
3919}
3920
3921struct tc_clear {
3922   struct tc_call_base base;
3923   bool scissor_state_set;
3924   uint8_t stencil;
3925   uint16_t buffers;
3926   float depth;
3927   struct pipe_scissor_state scissor_state;
3928   union pipe_color_union color;
3929};
3930
3931static uint16_t
3932tc_call_clear(struct pipe_context *pipe, void *call, uint64_t *last)
3933{
3934   struct tc_clear *p = to_call(call, tc_clear);
3935
3936   pipe->clear(pipe, p->buffers, p->scissor_state_set ? &p->scissor_state : NULL, &p->color, p->depth, p->stencil);
3937   return call_size(tc_clear);
3938}
3939
3940static void
3941tc_clear(struct pipe_context *_pipe, unsigned buffers, const struct pipe_scissor_state *scissor_state,
3942         const union pipe_color_union *color, double depth,
3943         unsigned stencil)
3944{
3945   struct threaded_context *tc = threaded_context(_pipe);
3946   struct tc_clear *p = tc_add_call(tc, TC_CALL_clear, tc_clear);
3947
3948   p->buffers = buffers;
3949   if (scissor_state)
3950      p->scissor_state = *scissor_state;
3951   p->scissor_state_set = !!scissor_state;
3952   p->color = *color;
3953   p->depth = depth;
3954   p->stencil = stencil;
3955}
3956
3957struct tc_clear_render_target {
3958   struct tc_call_base base;
3959   bool render_condition_enabled;
3960   unsigned dstx;
3961   unsigned dsty;
3962   unsigned width;
3963   unsigned height;
3964   union pipe_color_union color;
3965   struct pipe_surface *dst;
3966};
3967
3968static uint16_t
3969tc_call_clear_render_target(struct pipe_context *pipe, void *call, uint64_t *last)
3970{
3971   struct tc_clear_render_target *p = to_call(call, tc_clear_render_target);
3972
3973   pipe->clear_render_target(pipe, p->dst, &p->color, p->dstx, p->dsty, p->width, p->height,
3974                             p->render_condition_enabled);
3975   tc_drop_surface_reference(p->dst);
3976   return call_size(tc_clear_render_target);
3977}
3978
3979static void
3980tc_clear_render_target(struct pipe_context *_pipe,
3981                       struct pipe_surface *dst,
3982                       const union pipe_color_union *color,
3983                       unsigned dstx, unsigned dsty,
3984                       unsigned width, unsigned height,
3985                       bool render_condition_enabled)
3986{
3987   struct threaded_context *tc = threaded_context(_pipe);
3988   struct tc_clear_render_target *p = tc_add_call(tc, TC_CALL_clear_render_target, tc_clear_render_target);
3989   p->dst = NULL;
3990   pipe_surface_reference(&p->dst, dst);
3991   p->color = *color;
3992   p->dstx = dstx;
3993   p->dsty = dsty;
3994   p->width = width;
3995   p->height = height;
3996   p->render_condition_enabled = render_condition_enabled;
3997}
3998
3999
4000struct tc_clear_depth_stencil {
4001   struct tc_call_base base;
4002   bool render_condition_enabled;
4003   float depth;
4004   unsigned clear_flags;
4005   unsigned stencil;
4006   unsigned dstx;
4007   unsigned dsty;
4008   unsigned width;
4009   unsigned height;
4010   struct pipe_surface *dst;
4011};
4012
4013
4014static uint16_t
4015tc_call_clear_depth_stencil(struct pipe_context *pipe, void *call, uint64_t *last)
4016{
4017   struct tc_clear_depth_stencil *p = to_call(call, tc_clear_depth_stencil);
4018
4019   pipe->clear_depth_stencil(pipe, p->dst, p->clear_flags, p->depth, p->stencil,
4020                             p->dstx, p->dsty, p->width, p->height,
4021                             p->render_condition_enabled);
4022   tc_drop_surface_reference(p->dst);
4023   return call_size(tc_clear_depth_stencil);
4024}
4025
4026static void
4027tc_clear_depth_stencil(struct pipe_context *_pipe,
4028                       struct pipe_surface *dst, unsigned clear_flags,
4029                       double depth, unsigned stencil, unsigned dstx,
4030                       unsigned dsty, unsigned width, unsigned height,
4031                       bool render_condition_enabled)
4032{
4033   struct threaded_context *tc = threaded_context(_pipe);
4034   struct tc_clear_depth_stencil *p = tc_add_call(tc, TC_CALL_clear_depth_stencil, tc_clear_depth_stencil);
4035   p->dst = NULL;
4036   pipe_surface_reference(&p->dst, dst);
4037   p->clear_flags = clear_flags;
4038   p->depth = depth;
4039   p->stencil = stencil;
4040   p->dstx = dstx;
4041   p->dsty = dsty;
4042   p->width = width;
4043   p->height = height;
4044   p->render_condition_enabled = render_condition_enabled;
4045}
4046
4047struct tc_clear_buffer {
4048   struct tc_call_base base;
4049   uint8_t clear_value_size;
4050   unsigned offset;
4051   unsigned size;
4052   char clear_value[16];
4053   struct pipe_resource *res;
4054};
4055
4056static uint16_t
4057tc_call_clear_buffer(struct pipe_context *pipe, void *call, uint64_t *last)
4058{
4059   struct tc_clear_buffer *p = to_call(call, tc_clear_buffer);
4060
4061   pipe->clear_buffer(pipe, p->res, p->offset, p->size, p->clear_value,
4062                      p->clear_value_size);
4063   tc_drop_resource_reference(p->res);
4064   return call_size(tc_clear_buffer);
4065}
4066
4067static void
4068tc_clear_buffer(struct pipe_context *_pipe, struct pipe_resource *res,
4069                unsigned offset, unsigned size,
4070                const void *clear_value, int clear_value_size)
4071{
4072   struct threaded_context *tc = threaded_context(_pipe);
4073   struct threaded_resource *tres = threaded_resource(res);
4074   struct tc_clear_buffer *p =
4075      tc_add_call(tc, TC_CALL_clear_buffer, tc_clear_buffer);
4076
4077   tc_buffer_disable_cpu_storage(res);
4078
4079   tc_set_resource_reference(&p->res, res);
4080   tc_add_to_buffer_list(tc, &tc->buffer_lists[tc->next_buf_list], res);
4081   p->offset = offset;
4082   p->size = size;
4083   memcpy(p->clear_value, clear_value, clear_value_size);
4084   p->clear_value_size = clear_value_size;
4085
4086   util_range_add(&tres->b, &tres->valid_buffer_range, offset, offset + size);
4087}
4088
4089struct tc_clear_texture {
4090   struct tc_call_base base;
4091   unsigned level;
4092   struct pipe_box box;
4093   char data[16];
4094   struct pipe_resource *res;
4095};
4096
4097static uint16_t
4098tc_call_clear_texture(struct pipe_context *pipe, void *call, uint64_t *last)
4099{
4100   struct tc_clear_texture *p = to_call(call, tc_clear_texture);
4101
4102   pipe->clear_texture(pipe, p->res, p->level, &p->box, p->data);
4103   tc_drop_resource_reference(p->res);
4104   return call_size(tc_clear_texture);
4105}
4106
4107static void
4108tc_clear_texture(struct pipe_context *_pipe, struct pipe_resource *res,
4109                 unsigned level, const struct pipe_box *box, const void *data)
4110{
4111   struct threaded_context *tc = threaded_context(_pipe);
4112   struct tc_clear_texture *p =
4113      tc_add_call(tc, TC_CALL_clear_texture, tc_clear_texture);
4114
4115   tc_set_resource_reference(&p->res, res);
4116   p->level = level;
4117   p->box = *box;
4118   memcpy(p->data, data,
4119          util_format_get_blocksize(res->format));
4120}
4121
4122struct tc_resource_commit {
4123   struct tc_call_base base;
4124   bool commit;
4125   unsigned level;
4126   struct pipe_box box;
4127   struct pipe_resource *res;
4128};
4129
4130static uint16_t
4131tc_call_resource_commit(struct pipe_context *pipe, void *call, uint64_t *last)
4132{
4133   struct tc_resource_commit *p = to_call(call, tc_resource_commit);
4134
4135   pipe->resource_commit(pipe, p->res, p->level, &p->box, p->commit);
4136   tc_drop_resource_reference(p->res);
4137   return call_size(tc_resource_commit);
4138}
4139
4140static bool
4141tc_resource_commit(struct pipe_context *_pipe, struct pipe_resource *res,
4142                   unsigned level, struct pipe_box *box, bool commit)
4143{
4144   struct threaded_context *tc = threaded_context(_pipe);
4145   struct tc_resource_commit *p =
4146      tc_add_call(tc, TC_CALL_resource_commit, tc_resource_commit);
4147
4148   tc_set_resource_reference(&p->res, res);
4149   p->level = level;
4150   p->box = *box;
4151   p->commit = commit;
4152   return true; /* we don't care about the return value for this call */
4153}
4154
4155static unsigned
4156tc_init_intel_perf_query_info(struct pipe_context *_pipe)
4157{
4158   struct threaded_context *tc = threaded_context(_pipe);
4159   struct pipe_context *pipe = tc->pipe;
4160
4161   return pipe->init_intel_perf_query_info(pipe);
4162}
4163
4164static void
4165tc_get_intel_perf_query_info(struct pipe_context *_pipe,
4166                             unsigned query_index,
4167                             const char **name,
4168                             uint32_t *data_size,
4169                             uint32_t *n_counters,
4170                             uint32_t *n_active)
4171{
4172   struct threaded_context *tc = threaded_context(_pipe);
4173   struct pipe_context *pipe = tc->pipe;
4174
4175   tc_sync(tc); /* n_active vs begin/end_intel_perf_query */
4176   pipe->get_intel_perf_query_info(pipe, query_index, name, data_size,
4177         n_counters, n_active);
4178}
4179
4180static void
4181tc_get_intel_perf_query_counter_info(struct pipe_context *_pipe,
4182                                     unsigned query_index,
4183                                     unsigned counter_index,
4184                                     const char **name,
4185                                     const char **desc,
4186                                     uint32_t *offset,
4187                                     uint32_t *data_size,
4188                                     uint32_t *type_enum,
4189                                     uint32_t *data_type_enum,
4190                                     uint64_t *raw_max)
4191{
4192   struct threaded_context *tc = threaded_context(_pipe);
4193   struct pipe_context *pipe = tc->pipe;
4194
4195   pipe->get_intel_perf_query_counter_info(pipe, query_index, counter_index,
4196         name, desc, offset, data_size, type_enum, data_type_enum, raw_max);
4197}
4198
4199static struct pipe_query *
4200tc_new_intel_perf_query_obj(struct pipe_context *_pipe, unsigned query_index)
4201{
4202   struct threaded_context *tc = threaded_context(_pipe);
4203   struct pipe_context *pipe = tc->pipe;
4204
4205   return pipe->new_intel_perf_query_obj(pipe, query_index);
4206}
4207
4208static uint16_t
4209tc_call_begin_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4210{
4211   (void)pipe->begin_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4212   return call_size(tc_query_call);
4213}
4214
4215static bool
4216tc_begin_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4217{
4218   struct threaded_context *tc = threaded_context(_pipe);
4219
4220   tc_add_call(tc, TC_CALL_begin_intel_perf_query, tc_query_call)->query = q;
4221
4222   /* assume success, begin failure can be signaled from get_intel_perf_query_data */
4223   return true;
4224}
4225
4226static uint16_t
4227tc_call_end_intel_perf_query(struct pipe_context *pipe, void *call, uint64_t *last)
4228{
4229   pipe->end_intel_perf_query(pipe, to_call(call, tc_query_call)->query);
4230   return call_size(tc_query_call);
4231}
4232
4233static void
4234tc_end_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4235{
4236   struct threaded_context *tc = threaded_context(_pipe);
4237
4238   tc_add_call(tc, TC_CALL_end_intel_perf_query, tc_query_call)->query = q;
4239}
4240
4241static void
4242tc_delete_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4243{
4244   struct threaded_context *tc = threaded_context(_pipe);
4245   struct pipe_context *pipe = tc->pipe;
4246
4247   tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4248   pipe->delete_intel_perf_query(pipe, q);
4249}
4250
4251static void
4252tc_wait_intel_perf_query(struct pipe_context *_pipe, struct pipe_query *q)
4253{
4254   struct threaded_context *tc = threaded_context(_pipe);
4255   struct pipe_context *pipe = tc->pipe;
4256
4257   tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4258   pipe->wait_intel_perf_query(pipe, q);
4259}
4260
4261static bool
4262tc_is_intel_perf_query_ready(struct pipe_context *_pipe, struct pipe_query *q)
4263{
4264   struct threaded_context *tc = threaded_context(_pipe);
4265   struct pipe_context *pipe = tc->pipe;
4266
4267   tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4268   return pipe->is_intel_perf_query_ready(pipe, q);
4269}
4270
4271static bool
4272tc_get_intel_perf_query_data(struct pipe_context *_pipe,
4273                             struct pipe_query *q,
4274                             size_t data_size,
4275                             uint32_t *data,
4276                             uint32_t *bytes_written)
4277{
4278   struct threaded_context *tc = threaded_context(_pipe);
4279   struct pipe_context *pipe = tc->pipe;
4280
4281   tc_sync(tc); /* flush potentially pending begin/end_intel_perf_queries */
4282   return pipe->get_intel_perf_query_data(pipe, q, data_size, data, bytes_written);
4283}
4284
4285/********************************************************************
4286 * callback
4287 */
4288
4289struct tc_callback_call {
4290   struct tc_call_base base;
4291   void (*fn)(void *data);
4292   void *data;
4293};
4294
4295static uint16_t
4296tc_call_callback(UNUSED struct pipe_context *pipe, void *call, uint64_t *last)
4297{
4298   struct tc_callback_call *p = to_call(call, tc_callback_call);
4299
4300   p->fn(p->data);
4301   return call_size(tc_callback_call);
4302}
4303
4304static void
4305tc_callback(struct pipe_context *_pipe, void (*fn)(void *), void *data,
4306            bool asap)
4307{
4308   struct threaded_context *tc = threaded_context(_pipe);
4309
4310   if (asap && tc_is_sync(tc)) {
4311      fn(data);
4312      return;
4313   }
4314
4315   struct tc_callback_call *p =
4316      tc_add_call(tc, TC_CALL_callback, tc_callback_call);
4317   p->fn = fn;
4318   p->data = data;
4319}
4320
4321
4322/********************************************************************
4323 * create & destroy
4324 */
4325
4326static void
4327tc_destroy(struct pipe_context *_pipe)
4328{
4329   struct threaded_context *tc = threaded_context(_pipe);
4330   struct pipe_context *pipe = tc->pipe;
4331
4332   if (tc->base.const_uploader &&
4333       tc->base.stream_uploader != tc->base.const_uploader)
4334      u_upload_destroy(tc->base.const_uploader);
4335
4336   if (tc->base.stream_uploader)
4337      u_upload_destroy(tc->base.stream_uploader);
4338
4339   tc_sync(tc);
4340
4341   if (util_queue_is_initialized(&tc->queue)) {
4342      util_queue_destroy(&tc->queue);
4343
4344      for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4345         util_queue_fence_destroy(&tc->batch_slots[i].fence);
4346         assert(!tc->batch_slots[i].token);
4347      }
4348   }
4349
4350   slab_destroy_child(&tc->pool_transfers);
4351   assert(tc->batch_slots[tc->next].num_total_slots == 0);
4352   pipe->destroy(pipe);
4353
4354   for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++) {
4355      if (!util_queue_fence_is_signalled(&tc->buffer_lists[i].driver_flushed_fence))
4356         util_queue_fence_signal(&tc->buffer_lists[i].driver_flushed_fence);
4357      util_queue_fence_destroy(&tc->buffer_lists[i].driver_flushed_fence);
4358   }
4359
4360   FREE(tc);
4361}
4362
4363static const tc_execute execute_func[TC_NUM_CALLS] = {
4364#define CALL(name) tc_call_##name,
4365#include "u_threaded_context_calls.h"
4366#undef CALL
4367};
4368
4369void tc_driver_internal_flush_notify(struct threaded_context *tc)
4370{
4371   /* Allow drivers to call this function even for internal contexts that
4372    * don't have tc. It simplifies drivers.
4373    */
4374   if (!tc)
4375      return;
4376
4377   /* Signal fences set by tc_batch_execute. */
4378   for (unsigned i = 0; i < tc->num_signal_fences_next_flush; i++)
4379      util_queue_fence_signal(tc->signal_fences_next_flush[i]);
4380
4381   tc->num_signal_fences_next_flush = 0;
4382}
4383
4384/**
4385 * Wrap an existing pipe_context into a threaded_context.
4386 *
4387 * \param pipe                 pipe_context to wrap
4388 * \param parent_transfer_pool parent slab pool set up for creating pipe_-
4389 *                             transfer objects; the driver should have one
4390 *                             in pipe_screen.
4391 * \param replace_buffer  callback for replacing a pipe_resource's storage
4392 *                        with another pipe_resource's storage.
4393 * \param options         optional TC options/callbacks
4394 * \param out  if successful, the threaded_context will be returned here in
4395 *             addition to the return value if "out" != NULL
4396 */
4397struct pipe_context *
4398threaded_context_create(struct pipe_context *pipe,
4399                        struct slab_parent_pool *parent_transfer_pool,
4400                        tc_replace_buffer_storage_func replace_buffer,
4401                        const struct threaded_context_options *options,
4402                        struct threaded_context **out)
4403{
4404   struct threaded_context *tc;
4405
4406   if (!pipe)
4407      return NULL;
4408
4409   if (!debug_get_bool_option("GALLIUM_THREAD", util_get_cpu_caps()->nr_cpus > 1))
4410      return pipe;
4411
4412   tc = CALLOC_STRUCT(threaded_context);
4413   if (!tc) {
4414      pipe->destroy(pipe);
4415      return NULL;
4416   }
4417
4418   if (options)
4419      tc->options = *options;
4420
4421   pipe = trace_context_create_threaded(pipe->screen, pipe, &replace_buffer, &tc->options);
4422
4423   /* The driver context isn't wrapped, so set its "priv" to NULL. */
4424   pipe->priv = NULL;
4425
4426   tc->pipe = pipe;
4427   tc->replace_buffer_storage = replace_buffer;
4428   tc->map_buffer_alignment =
4429      pipe->screen->get_param(pipe->screen, PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT);
4430   tc->ubo_alignment =
4431      MAX2(pipe->screen->get_param(pipe->screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT), 64);
4432   tc->base.priv = pipe; /* priv points to the wrapped driver context */
4433   tc->base.screen = pipe->screen;
4434   tc->base.destroy = tc_destroy;
4435   tc->base.callback = tc_callback;
4436
4437   tc->base.stream_uploader = u_upload_clone(&tc->base, pipe->stream_uploader);
4438   if (pipe->stream_uploader == pipe->const_uploader)
4439      tc->base.const_uploader = tc->base.stream_uploader;
4440   else
4441      tc->base.const_uploader = u_upload_clone(&tc->base, pipe->const_uploader);
4442
4443   if (!tc->base.stream_uploader || !tc->base.const_uploader)
4444      goto fail;
4445
4446   tc->use_forced_staging_uploads = true;
4447
4448   /* The queue size is the number of batches "waiting". Batches are removed
4449    * from the queue before being executed, so keep one tc_batch slot for that
4450    * execution. Also, keep one unused slot for an unflushed batch.
4451    */
4452   if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
4453      goto fail;
4454
4455   for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
4456#if !defined(NDEBUG) && TC_DEBUG >= 1
4457      tc->batch_slots[i].sentinel = TC_SENTINEL;
4458#endif
4459      tc->batch_slots[i].tc = tc;
4460      util_queue_fence_init(&tc->batch_slots[i].fence);
4461   }
4462   for (unsigned i = 0; i < TC_MAX_BUFFER_LISTS; i++)
4463      util_queue_fence_init(&tc->buffer_lists[i].driver_flushed_fence);
4464
4465   list_inithead(&tc->unflushed_queries);
4466
4467   slab_create_child(&tc->pool_transfers, parent_transfer_pool);
4468
4469   /* If you have different limits in each shader stage, set the maximum. */
4470   struct pipe_screen *screen = pipe->screen;;
4471   tc->max_vertex_buffers =
4472      screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
4473   tc->max_const_buffers =
4474      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4475                               PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
4476   tc->max_shader_buffers =
4477      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4478                               PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
4479   tc->max_images =
4480      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4481                               PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
4482   tc->max_samplers =
4483      screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
4484                               PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
4485
4486   tc->base.set_context_param = tc_set_context_param; /* always set this */
4487
4488#define CTX_INIT(_member) \
4489   tc->base._member = tc->pipe->_member ? tc_##_member : NULL
4490
4491   CTX_INIT(flush);
4492   CTX_INIT(draw_vbo);
4493   CTX_INIT(draw_vertex_state);
4494   CTX_INIT(launch_grid);
4495   CTX_INIT(resource_copy_region);
4496   CTX_INIT(blit);
4497   CTX_INIT(clear);
4498   CTX_INIT(clear_render_target);
4499   CTX_INIT(clear_depth_stencil);
4500   CTX_INIT(clear_buffer);
4501   CTX_INIT(clear_texture);
4502   CTX_INIT(flush_resource);
4503   CTX_INIT(generate_mipmap);
4504   CTX_INIT(render_condition);
4505   CTX_INIT(create_query);
4506   CTX_INIT(create_batch_query);
4507   CTX_INIT(destroy_query);
4508   CTX_INIT(begin_query);
4509   CTX_INIT(end_query);
4510   CTX_INIT(get_query_result);
4511   CTX_INIT(get_query_result_resource);
4512   CTX_INIT(set_active_query_state);
4513   CTX_INIT(create_blend_state);
4514   CTX_INIT(bind_blend_state);
4515   CTX_INIT(delete_blend_state);
4516   CTX_INIT(create_sampler_state);
4517   CTX_INIT(bind_sampler_states);
4518   CTX_INIT(delete_sampler_state);
4519   CTX_INIT(create_rasterizer_state);
4520   CTX_INIT(bind_rasterizer_state);
4521   CTX_INIT(delete_rasterizer_state);
4522   CTX_INIT(create_depth_stencil_alpha_state);
4523   CTX_INIT(bind_depth_stencil_alpha_state);
4524   CTX_INIT(delete_depth_stencil_alpha_state);
4525   CTX_INIT(link_shader);
4526   CTX_INIT(create_fs_state);
4527   CTX_INIT(bind_fs_state);
4528   CTX_INIT(delete_fs_state);
4529   CTX_INIT(create_vs_state);
4530   CTX_INIT(bind_vs_state);
4531   CTX_INIT(delete_vs_state);
4532   CTX_INIT(create_gs_state);
4533   CTX_INIT(bind_gs_state);
4534   CTX_INIT(delete_gs_state);
4535   CTX_INIT(create_tcs_state);
4536   CTX_INIT(bind_tcs_state);
4537   CTX_INIT(delete_tcs_state);
4538   CTX_INIT(create_tes_state);
4539   CTX_INIT(bind_tes_state);
4540   CTX_INIT(delete_tes_state);
4541   CTX_INIT(create_compute_state);
4542   CTX_INIT(bind_compute_state);
4543   CTX_INIT(delete_compute_state);
4544   CTX_INIT(create_vertex_elements_state);
4545   CTX_INIT(bind_vertex_elements_state);
4546   CTX_INIT(delete_vertex_elements_state);
4547   CTX_INIT(set_blend_color);
4548   CTX_INIT(set_stencil_ref);
4549   CTX_INIT(set_sample_mask);
4550   CTX_INIT(set_min_samples);
4551   CTX_INIT(set_clip_state);
4552   CTX_INIT(set_constant_buffer);
4553   CTX_INIT(set_inlinable_constants);
4554   CTX_INIT(set_framebuffer_state);
4555   CTX_INIT(set_polygon_stipple);
4556   CTX_INIT(set_sample_locations);
4557   CTX_INIT(set_scissor_states);
4558   CTX_INIT(set_viewport_states);
4559   CTX_INIT(set_window_rectangles);
4560   CTX_INIT(set_sampler_views);
4561   CTX_INIT(set_tess_state);
4562   CTX_INIT(set_patch_vertices);
4563   CTX_INIT(set_shader_buffers);
4564   CTX_INIT(set_shader_images);
4565   CTX_INIT(set_vertex_buffers);
4566   CTX_INIT(create_stream_output_target);
4567   CTX_INIT(stream_output_target_destroy);
4568   CTX_INIT(set_stream_output_targets);
4569   CTX_INIT(create_sampler_view);
4570   CTX_INIT(sampler_view_destroy);
4571   CTX_INIT(create_surface);
4572   CTX_INIT(surface_destroy);
4573   CTX_INIT(buffer_map);
4574   CTX_INIT(texture_map);
4575   CTX_INIT(transfer_flush_region);
4576   CTX_INIT(buffer_unmap);
4577   CTX_INIT(texture_unmap);
4578   CTX_INIT(buffer_subdata);
4579   CTX_INIT(texture_subdata);
4580   CTX_INIT(texture_barrier);
4581   CTX_INIT(memory_barrier);
4582   CTX_INIT(resource_commit);
4583   CTX_INIT(create_video_codec);
4584   CTX_INIT(create_video_buffer);
4585   CTX_INIT(set_compute_resources);
4586   CTX_INIT(set_global_binding);
4587   CTX_INIT(get_sample_position);
4588   CTX_INIT(invalidate_resource);
4589   CTX_INIT(get_device_reset_status);
4590   CTX_INIT(set_device_reset_callback);
4591   CTX_INIT(dump_debug_state);
4592   CTX_INIT(set_log_context);
4593   CTX_INIT(emit_string_marker);
4594   CTX_INIT(set_debug_callback);
4595   CTX_INIT(create_fence_fd);
4596   CTX_INIT(fence_server_sync);
4597   CTX_INIT(fence_server_signal);
4598   CTX_INIT(get_timestamp);
4599   CTX_INIT(create_texture_handle);
4600   CTX_INIT(delete_texture_handle);
4601   CTX_INIT(make_texture_handle_resident);
4602   CTX_INIT(create_image_handle);
4603   CTX_INIT(delete_image_handle);
4604   CTX_INIT(make_image_handle_resident);
4605   CTX_INIT(set_frontend_noop);
4606   CTX_INIT(init_intel_perf_query_info);
4607   CTX_INIT(get_intel_perf_query_info);
4608   CTX_INIT(get_intel_perf_query_counter_info);
4609   CTX_INIT(new_intel_perf_query_obj);
4610   CTX_INIT(begin_intel_perf_query);
4611   CTX_INIT(end_intel_perf_query);
4612   CTX_INIT(delete_intel_perf_query);
4613   CTX_INIT(wait_intel_perf_query);
4614   CTX_INIT(is_intel_perf_query_ready);
4615   CTX_INIT(get_intel_perf_query_data);
4616#undef CTX_INIT
4617
4618   if (out)
4619      *out = tc;
4620
4621   tc_begin_next_buffer_list(tc);
4622   return &tc->base;
4623
4624fail:
4625   tc_destroy(&tc->base);
4626   return NULL;
4627}
4628
4629void
4630threaded_context_init_bytes_mapped_limit(struct threaded_context *tc, unsigned divisor)
4631{
4632   uint64_t total_ram;
4633   if (os_get_total_physical_memory(&total_ram)) {
4634      tc->bytes_mapped_limit = total_ram / divisor;
4635      if (sizeof(void*) == 4)
4636         tc->bytes_mapped_limit = MIN2(tc->bytes_mapped_limit, 512*1024*1024UL);
4637   }
4638}
4639