1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "d3d12_bufmgr.h"
25#include "d3d12_context.h"
26#include "d3d12_format.h"
27#include "d3d12_resource.h"
28#include "d3d12_resource_state.h"
29#include "d3d12_screen.h"
30
31#include <dxguids/dxguids.h>
32
33#include <assert.h>
34
35#define UNKNOWN_RESOURCE_STATE (D3D12_RESOURCE_STATES) 0x8000u
36
37/* Stores the current desired state of either an entire resource, or each subresource. */
38struct desired_resource_state
39{
40   bool homogenous;
41   uint32_t num_subresources;
42   D3D12_RESOURCE_STATES *subresource_states;
43};
44
45static bool
46desired_resource_state_init(desired_resource_state *state, uint32_t subresource_count)
47{
48   state->homogenous = true;
49   state->num_subresources = subresource_count;
50   state->subresource_states = (D3D12_RESOURCE_STATES *)calloc(subresource_count, sizeof(D3D12_RESOURCE_STATES));
51   return state->subresource_states != nullptr;
52}
53
54static void
55desired_resource_state_cleanup(desired_resource_state *state)
56{
57   free(state->subresource_states);
58}
59
60static D3D12_RESOURCE_STATES
61get_desired_subresource_state(const desired_resource_state *state, uint32_t subresource_index)
62{
63   if (state->homogenous)
64      subresource_index = 0;
65   return state->subresource_states[subresource_index];
66}
67
68static void
69update_subresource_state(D3D12_RESOURCE_STATES *existing_state, D3D12_RESOURCE_STATES new_state)
70{
71   if (*existing_state == UNKNOWN_RESOURCE_STATE || new_state == UNKNOWN_RESOURCE_STATE ||
72       d3d12_is_write_state(new_state)) {
73      *existing_state = new_state;
74   } else {
75      /* Accumulate read state state bits */
76      *existing_state |= new_state;
77   }
78}
79
80static void
81set_desired_resource_state(desired_resource_state *state_obj, D3D12_RESOURCE_STATES state)
82{
83   state_obj->homogenous = true;
84   update_subresource_state(&state_obj->subresource_states[0], state);
85}
86
87static void
88set_desired_subresource_state(desired_resource_state *state_obj,
89                                    uint32_t subresource,
90                                    D3D12_RESOURCE_STATES state)
91{
92   if (state_obj->homogenous && state_obj->num_subresources > 1) {
93      for (unsigned i = 1; i < state_obj->num_subresources; ++i) {
94         state_obj->subresource_states[i] = state_obj->subresource_states[0];
95      }
96      state_obj->homogenous = false;
97   }
98
99   update_subresource_state(&state_obj->subresource_states[subresource], state);
100}
101
102static void
103reset_desired_resource_state(desired_resource_state *state_obj)
104{
105   set_desired_resource_state(state_obj, UNKNOWN_RESOURCE_STATE);
106}
107
108bool
109d3d12_resource_state_init(d3d12_resource_state *state, uint32_t subresource_count, bool simultaneous_access)
110{
111   state->homogenous = true;
112   state->supports_simultaneous_access = simultaneous_access;
113   state->num_subresources = subresource_count;
114   state->subresource_states = (d3d12_subresource_state *)calloc(subresource_count, sizeof(d3d12_subresource_state));
115   return state->subresource_states != nullptr;
116}
117
118void
119d3d12_resource_state_cleanup(d3d12_resource_state *state)
120{
121   free(state->subresource_states);
122}
123
124static const d3d12_subresource_state *
125get_subresource_state(const d3d12_resource_state *state, uint32_t subresource)
126{
127   if (state->homogenous)
128      subresource = 0;
129   return &state->subresource_states[subresource];
130}
131
132static void
133set_resource_state(d3d12_resource_state *state_obj, const d3d12_subresource_state *state)
134{
135   state_obj->homogenous = true;
136   state_obj->subresource_states[0] = *state;
137}
138
139static void
140set_subresource_state(d3d12_resource_state *state_obj, uint32_t subresource, const d3d12_subresource_state *state)
141{
142   if (state_obj->homogenous && state_obj->num_subresources > 1) {
143      for (unsigned i = 1; i < state_obj->num_subresources; ++i) {
144         state_obj->subresource_states[i] = state_obj->subresource_states[0];
145      }
146      state_obj->homogenous = false;
147   }
148
149   state_obj->subresource_states[subresource] = *state;
150}
151
152static void
153reset_resource_state(d3d12_resource_state *state)
154{
155   d3d12_subresource_state subres_state = {};
156   set_resource_state(state, &subres_state);
157}
158
159static D3D12_RESOURCE_STATES
160resource_state_if_promoted(D3D12_RESOURCE_STATES desired_state,
161                           bool simultaneous_access,
162                           const d3d12_subresource_state *current_state)
163{
164   const D3D12_RESOURCE_STATES promotable_states = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE |
165                                                   D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE |
166                                                   D3D12_RESOURCE_STATE_COPY_SOURCE | D3D12_RESOURCE_STATE_COPY_DEST;
167
168   if (simultaneous_access ||
169       (desired_state & promotable_states) != D3D12_RESOURCE_STATE_COMMON) {
170      // If the current state is COMMON...
171      if (current_state->state == D3D12_RESOURCE_STATE_COMMON)
172         // ...then promotion is allowed
173         return desired_state;
174
175      // If the current state is a read state resulting from previous promotion...
176      if (current_state->is_promoted &&
177          (current_state->state & D3D12_RESOURCE_STATE_GENERIC_READ) != D3D12_RESOURCE_STATE_COMMON)
178         // ...then (accumulated) promotion is allowed
179         return desired_state | current_state->state;
180   }
181
182   return D3D12_RESOURCE_STATE_COMMON;
183}
184
185static void
186copy_resource_state(d3d12_resource_state *dest, d3d12_resource_state *src)
187{
188   assert(dest->num_subresources == src->num_subresources);
189   if (src->homogenous)
190      set_resource_state(dest, &src->subresource_states[0]);
191   else {
192      dest->homogenous = false;
193      for (unsigned i = 0; i < src->num_subresources; ++i)
194         dest->subresource_states[i] = src->subresource_states[i];
195   }
196}
197
198struct d3d12_context_state_table_entry
199{
200   struct desired_resource_state desired;
201   struct d3d12_resource_state batch_begin, batch_end;
202};
203
204static void
205destroy_context_state_table_entry(d3d12_context_state_table_entry *entry)
206{
207   desired_resource_state_cleanup(&entry->desired);
208   d3d12_resource_state_cleanup(&entry->batch_begin);
209   d3d12_resource_state_cleanup(&entry->batch_end);
210   free(entry);
211}
212
213void
214d3d12_context_state_table_init(struct d3d12_context *ctx)
215{
216   ctx->bo_state_table = _mesa_hash_table_u64_create(nullptr);
217   ctx->pending_barriers_bos = _mesa_pointer_set_create(nullptr);
218}
219
220void
221d3d12_context_state_table_destroy(struct d3d12_context *ctx)
222{
223   hash_table_foreach(ctx->bo_state_table->table, entry)
224      destroy_context_state_table_entry((d3d12_context_state_table_entry *)entry->data);
225   _mesa_hash_table_u64_destroy(ctx->bo_state_table);
226   util_dynarray_fini(&ctx->barrier_scratch);
227   if (ctx->state_fixup_cmdlist)
228      ctx->state_fixup_cmdlist->Release();
229   _mesa_set_destroy(ctx->pending_barriers_bos, nullptr);
230}
231
232static unsigned
233get_subresource_count(const D3D12_RESOURCE_DESC *desc)
234{
235   unsigned array_size = desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D ? 1 : desc->DepthOrArraySize;
236   return desc->MipLevels * array_size * d3d12_non_opaque_plane_count(desc->Format);
237}
238
239static void
240init_state_table_entry(d3d12_context_state_table_entry *bo_state, d3d12_bo *bo)
241{
242   /* Default parameters for bos for suballocated buffers */
243   unsigned subresource_count = 1;
244   bool supports_simultaneous_access = true;
245   if (bo->res) {
246      D3D12_RESOURCE_DESC desc = GetDesc(bo->res);
247      subresource_count = get_subresource_count(&desc);
248      supports_simultaneous_access = d3d12_resource_supports_simultaneous_access(&desc);
249   }
250
251   desired_resource_state_init(&bo_state->desired, subresource_count);
252   d3d12_resource_state_init(&bo_state->batch_end, subresource_count, supports_simultaneous_access);
253
254   /* We'll never need state fixups for simultaneous access resources, so don't bother initializing this second state */
255   if (!supports_simultaneous_access)
256      d3d12_resource_state_init(&bo_state->batch_begin, subresource_count, supports_simultaneous_access);
257}
258
259static d3d12_context_state_table_entry *
260find_or_create_state_entry(struct hash_table_u64 *table, d3d12_bo *bo)
261{
262   d3d12_context_state_table_entry *bo_state =
263      (d3d12_context_state_table_entry *) _mesa_hash_table_u64_search(table, bo->unique_id);
264   if (!bo_state) {
265      bo_state = CALLOC_STRUCT(d3d12_context_state_table_entry);
266      init_state_table_entry(bo_state, bo);
267      _mesa_hash_table_u64_insert(table, bo->unique_id, bo_state);
268   }
269   return bo_state;
270}
271
272static ID3D12GraphicsCommandList *
273ensure_state_fixup_cmdlist(struct d3d12_context *ctx, ID3D12CommandAllocator *alloc)
274{
275   if (!ctx->state_fixup_cmdlist) {
276      struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
277      screen->dev->CreateCommandList(0,
278                                     D3D12_COMMAND_LIST_TYPE_DIRECT,
279                                     alloc,
280                                     nullptr,
281                                     IID_PPV_ARGS(&ctx->state_fixup_cmdlist));
282   } else if (FAILED(ctx->state_fixup_cmdlist->Reset(alloc, nullptr))) {
283      ctx->state_fixup_cmdlist->Release();
284      ctx->state_fixup_cmdlist = nullptr;
285   }
286
287   return ctx->state_fixup_cmdlist;
288}
289
290static bool
291transition_required(D3D12_RESOURCE_STATES current_state, D3D12_RESOURCE_STATES *destination_state)
292{
293   // An exact match never needs a transition.
294   if (current_state == *destination_state) {
295      return false;
296   }
297
298   if (current_state == D3D12_RESOURCE_STATE_COMMON || *destination_state == D3D12_RESOURCE_STATE_COMMON) {
299      return true;
300   }
301
302   // Current state already contains the destination state, we're good.
303   if ((current_state & *destination_state) == *destination_state) {
304      *destination_state = current_state;
305      return false;
306   }
307
308   // If the transition involves a write state, then the destination should just be the requested destination.
309   // Otherwise, accumulate read states to minimize future transitions (by triggering the above condition).
310   if (!d3d12_is_write_state(*destination_state) && !d3d12_is_write_state(current_state)) {
311      *destination_state |= current_state;
312   }
313   return true;
314}
315
316static void
317resolve_global_state(struct d3d12_context *ctx, ID3D12Resource *res, d3d12_resource_state *batch_state, d3d12_resource_state *res_state)
318{
319   assert(batch_state->num_subresources == res_state->num_subresources);
320   unsigned num_subresources = batch_state->homogenous && res_state->homogenous ? 1 : batch_state->num_subresources;
321   for (unsigned i = 0; i < num_subresources; ++i) {
322      const d3d12_subresource_state *current_state = get_subresource_state(res_state, i);
323      const d3d12_subresource_state *target_state = get_subresource_state(batch_state, i);
324      D3D12_RESOURCE_STATES promotable_state =
325         resource_state_if_promoted(target_state->state, false, current_state);
326
327      D3D12_RESOURCE_STATES after = target_state->state;
328      if ((promotable_state & target_state->state) == target_state->state ||
329          !transition_required(current_state->state, &after))
330         continue;
331
332      D3D12_RESOURCE_BARRIER barrier = { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION };
333      barrier.Transition.pResource = res;
334      barrier.Transition.StateBefore = current_state->state;
335      barrier.Transition.StateAfter = after;
336      barrier.Transition.Subresource = num_subresources == 1 ? D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES : i;
337      util_dynarray_append(&ctx->barrier_scratch, D3D12_RESOURCE_BARRIER, barrier);
338   }
339}
340
341bool
342d3d12_context_state_resolve_submission(struct d3d12_context *ctx, struct d3d12_batch *batch)
343{
344   util_dynarray_foreach(&ctx->recently_destroyed_bos, uint64_t, id) {
345      void *data = _mesa_hash_table_u64_search(ctx->bo_state_table, *id);
346      if (data)
347         destroy_context_state_table_entry((d3d12_context_state_table_entry *)data);
348      _mesa_hash_table_u64_remove(ctx->bo_state_table, *id);
349   }
350
351   util_dynarray_clear(&ctx->recently_destroyed_bos);
352
353   hash_table_foreach(batch->bos, bo_entry) {
354      d3d12_bo *bo = (d3d12_bo *)bo_entry->key;
355      d3d12_context_state_table_entry *bo_state = find_or_create_state_entry(ctx->bo_state_table, bo);
356      if (!bo_state->batch_end.supports_simultaneous_access) {
357         assert(bo->res && bo->global_state.subresource_states);
358
359         resolve_global_state(ctx, bo->res, &bo_state->batch_begin, &bo->global_state);
360
361         copy_resource_state(&bo_state->batch_begin, &bo_state->batch_end);
362         copy_resource_state(&bo->global_state, &bo_state->batch_end);
363      } else {
364         reset_resource_state(&bo_state->batch_end);
365      }
366   }
367
368   bool needs_execute_fixup = false;
369   if (ctx->barrier_scratch.size) {
370      ID3D12GraphicsCommandList *cmdlist = ensure_state_fixup_cmdlist(ctx, batch->cmdalloc);
371      if (cmdlist) {
372         cmdlist->ResourceBarrier(util_dynarray_num_elements(&ctx->barrier_scratch, D3D12_RESOURCE_BARRIER),
373                                  (D3D12_RESOURCE_BARRIER *)ctx->barrier_scratch.data);
374         needs_execute_fixup = SUCCEEDED(cmdlist->Close());
375      }
376
377      util_dynarray_clear(&ctx->barrier_scratch);
378   }
379   return needs_execute_fixup;
380}
381
382static void
383append_barrier(struct d3d12_context *ctx,
384               d3d12_bo *bo,
385               d3d12_context_state_table_entry *state_entry,
386               D3D12_RESOURCE_STATES after,
387               UINT subresource,
388               bool is_implicit_dispatch)
389{
390   uint64_t offset;
391   ID3D12Resource *res = d3d12_bo_get_base(bo, &offset)->res;
392   d3d12_resource_state *current_state = &state_entry->batch_end;
393
394   D3D12_RESOURCE_BARRIER transition_desc = { D3D12_RESOURCE_BARRIER_TYPE_TRANSITION };
395   transition_desc.Transition.pResource = res;
396   transition_desc.Transition.Subresource = subresource;
397
398   // This is a transition into a state that is both write and non-write.
399   // This is invalid according to D3D12. We're venturing into undefined behavior
400   // land, but let's just pick the write state.
401   if (d3d12_is_write_state(after) && (after & ~RESOURCE_STATE_ALL_WRITE_BITS) != 0) {
402      after &= RESOURCE_STATE_ALL_WRITE_BITS;
403
404      // For now, this is the only way I've seen where this can happen.
405      assert(after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
406   }
407
408   assert((subresource == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES && current_state->homogenous) ||
409          subresource < current_state->num_subresources);
410   d3d12_subresource_state current_subresource_state = *get_subresource_state(current_state, subresource);
411
412   // If the last time this state was set was in a different execution
413   // period and is decayable then decay the current state to COMMON
414   if (ctx->submit_id != current_subresource_state.execution_id && current_subresource_state.may_decay) {
415      current_subresource_state.state = D3D12_RESOURCE_STATE_COMMON;
416      current_subresource_state.is_promoted = false;
417   }
418   bool may_decay = false;
419   bool is_promotion = false;
420
421   D3D12_RESOURCE_STATES state_if_promoted =
422      resource_state_if_promoted(after, current_state->supports_simultaneous_access, &current_subresource_state);
423
424   if (D3D12_RESOURCE_STATE_COMMON == state_if_promoted) {
425      // No promotion
426      if (current_subresource_state.state == D3D12_RESOURCE_STATE_UNORDERED_ACCESS &&
427            after == D3D12_RESOURCE_STATE_UNORDERED_ACCESS &&
428            is_implicit_dispatch) {
429         D3D12_RESOURCE_BARRIER uav_barrier = { D3D12_RESOURCE_BARRIER_TYPE_UAV };
430         uav_barrier.UAV.pResource = res;
431         util_dynarray_append(&ctx->barrier_scratch, D3D12_RESOURCE_BARRIER, uav_barrier);
432      } else if (transition_required(current_subresource_state.state, /*inout*/ &after)) {
433         // Insert a single concrete barrier (for non-simultaneous access resources).
434         transition_desc.Transition.StateBefore = current_subresource_state.state;
435         transition_desc.Transition.StateAfter = after;
436         assert(transition_desc.Transition.StateBefore != transition_desc.Transition.StateAfter);
437         util_dynarray_append(&ctx->barrier_scratch, D3D12_RESOURCE_BARRIER, transition_desc);
438
439         may_decay = current_state->supports_simultaneous_access && !d3d12_is_write_state(after);
440         is_promotion = false;
441      }
442   } else if (after != state_if_promoted) {
443      after = state_if_promoted;
444      may_decay = !d3d12_is_write_state(after);
445      is_promotion = true;
446   }
447
448   d3d12_subresource_state new_subresource_state { after, ctx->submit_id, is_promotion, may_decay };
449   if (subresource == D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES)
450      set_resource_state(current_state, &new_subresource_state);
451   else
452      set_subresource_state(current_state, subresource, &new_subresource_state);
453}
454
455void
456d3d12_transition_resource_state(struct d3d12_context *ctx,
457                                struct d3d12_resource *res,
458                                D3D12_RESOURCE_STATES state,
459                                d3d12_transition_flags flags)
460{
461   if (flags & D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS)
462      d3d12_invalidate_context_bindings(ctx, res);
463
464   d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo);
465   if (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) {
466      set_desired_resource_state(&state_entry->desired, state);
467      _mesa_set_add(ctx->pending_barriers_bos, res->bo);
468   } else if (state_entry->batch_end.homogenous) {
469      append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
470   } else {
471      for (unsigned i = 0; i < state_entry->batch_end.num_subresources; ++i) {
472         append_barrier(ctx, res->bo, state_entry, state, i, false);
473      }
474   }
475}
476
477void
478d3d12_transition_subresources_state(struct d3d12_context *ctx,
479                                    struct d3d12_resource *res,
480                                    uint32_t start_level, uint32_t num_levels,
481                                    uint32_t start_layer, uint32_t num_layers,
482                                    uint32_t start_plane, uint32_t num_planes,
483                                    D3D12_RESOURCE_STATES state,
484                                    d3d12_transition_flags flags)
485{
486   if(flags & D3D12_TRANSITION_FLAG_INVALIDATE_BINDINGS)
487      d3d12_invalidate_context_bindings(ctx, res);
488
489   d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, res->bo);
490   bool is_whole_resource = num_levels * num_layers * num_planes == state_entry->batch_end.num_subresources;
491   bool is_accumulate = (flags & D3D12_TRANSITION_FLAG_ACCUMULATE_STATE) != 0;
492
493   if (is_whole_resource && is_accumulate) {
494      set_desired_resource_state(&state_entry->desired, state);
495   } else if (is_whole_resource && state_entry->batch_end.homogenous) {
496      append_barrier(ctx, res->bo, state_entry, state, D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES, false);
497   } else {
498      for (uint32_t l = 0; l < num_levels; l++) {
499         const uint32_t level = start_level + l;
500         for (uint32_t a = 0; a < num_layers; a++) {
501            const uint32_t layer = start_layer + a;
502            for (uint32_t p = 0; p < num_planes; p++) {
503               const uint32_t plane = start_plane + p;
504               uint32_t subres_id =
505                  level + (layer * res->mip_levels) + plane * (res->mip_levels * res->base.b.array_size);
506               assert(subres_id < state_entry->desired.num_subresources);
507               if (is_accumulate)
508                  set_desired_subresource_state(&state_entry->desired, subres_id, state);
509               else
510                  append_barrier(ctx, res->bo, state_entry, state, subres_id, false);
511            }
512         }
513      }
514   }
515
516   if (is_accumulate)
517      _mesa_set_add(ctx->pending_barriers_bos, res->bo);
518}
519
520void
521d3d12_apply_resource_states(struct d3d12_context *ctx, bool is_implicit_dispatch)
522{
523   set_foreach_remove(ctx->pending_barriers_bos, entry) {
524      d3d12_bo *bo = (d3d12_bo *)entry->key;
525
526      d3d12_context_state_table_entry *state_entry = find_or_create_state_entry(ctx->bo_state_table, bo);
527      desired_resource_state *destination_state = &state_entry->desired;
528      d3d12_resource_state *current_state = &state_entry->batch_end;
529
530      // Figure out the set of subresources that are transitioning
531      bool all_resources_at_once = current_state->homogenous && destination_state->homogenous;
532
533      UINT num_subresources = all_resources_at_once ? 1 : current_state->num_subresources;
534      for (UINT i = 0; i < num_subresources; ++i) {
535         D3D12_RESOURCE_STATES after = get_desired_subresource_state(destination_state, i);
536         UINT subresource = num_subresources == 1 ? D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES : i;
537
538         // Is this subresource currently being used, or is it just being iterated over?
539         if (after == UNKNOWN_RESOURCE_STATE) {
540            // This subresource doesn't have any transition requested - move on to the next.
541            continue;
542         }
543
544         append_barrier(ctx, bo, state_entry, after, subresource, is_implicit_dispatch);
545      }
546
547      // Update destination states.
548      reset_desired_resource_state(destination_state);
549   }
550
551   if (ctx->barrier_scratch.size) {
552      ctx->cmdlist->ResourceBarrier(util_dynarray_num_elements(&ctx->barrier_scratch, D3D12_RESOURCE_BARRIER),
553                                    (D3D12_RESOURCE_BARRIER *) ctx->barrier_scratch.data);
554      util_dynarray_clear(&ctx->barrier_scratch);
555   }
556}
557