1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "d3d12_cmd_signature.h"
25#include "d3d12_compiler.h"
26#include "d3d12_compute_transforms.h"
27#include "d3d12_context.h"
28#include "d3d12_format.h"
29#include "d3d12_query.h"
30#include "d3d12_resource.h"
31#include "d3d12_root_signature.h"
32#include "d3d12_screen.h"
33#include "d3d12_surface.h"
34
35#include "util/u_debug.h"
36#include "util/u_draw.h"
37#include "util/u_helpers.h"
38#include "util/u_inlines.h"
39#include "util/u_prim.h"
40#include "util/u_prim_restart.h"
41#include "util/u_math.h"
42
43extern "C" {
44#include "indices/u_primconvert.h"
45}
46
47static const D3D12_RECT MAX_SCISSOR = { D3D12_VIEWPORT_BOUNDS_MIN,
48                                        D3D12_VIEWPORT_BOUNDS_MIN,
49                                        D3D12_VIEWPORT_BOUNDS_MAX,
50                                        D3D12_VIEWPORT_BOUNDS_MAX };
51
52static const D3D12_RECT MAX_SCISSOR_ARRAY[] = {
53   MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR,
54   MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR,
55   MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR,
56   MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR, MAX_SCISSOR
57};
58static_assert(ARRAY_SIZE(MAX_SCISSOR_ARRAY) == PIPE_MAX_VIEWPORTS, "Wrong scissor count");
59
60static D3D12_GPU_DESCRIPTOR_HANDLE
61fill_cbv_descriptors(struct d3d12_context *ctx,
62                     struct d3d12_shader *shader,
63                     int stage)
64{
65   struct d3d12_batch *batch = d3d12_current_batch(ctx);
66   struct d3d12_descriptor_handle table_start;
67   d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
68
69   for (unsigned i = 0; i < shader->num_cb_bindings; i++) {
70      unsigned binding = shader->cb_bindings[i].binding;
71      struct pipe_constant_buffer *buffer = &ctx->cbufs[stage][binding];
72
73      D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc = {};
74      if (buffer && buffer->buffer) {
75         struct d3d12_resource *res = d3d12_resource(buffer->buffer);
76         d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
77         cbv_desc.BufferLocation = d3d12_resource_gpu_virtual_address(res) + buffer->buffer_offset;
78         cbv_desc.SizeInBytes = MIN2(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16,
79            align(buffer->buffer_size, 256));
80         d3d12_batch_reference_resource(batch, res, false);
81      }
82
83      struct d3d12_descriptor_handle handle;
84      d3d12_descriptor_heap_alloc_handle(batch->view_heap, &handle);
85      d3d12_screen(ctx->base.screen)->dev->CreateConstantBufferView(&cbv_desc, handle.cpu_handle);
86   }
87
88   return table_start.gpu_handle;
89}
90
91static D3D12_GPU_DESCRIPTOR_HANDLE
92fill_srv_descriptors(struct d3d12_context *ctx,
93                     struct d3d12_shader *shader,
94                     unsigned stage)
95{
96   struct d3d12_batch *batch = d3d12_current_batch(ctx);
97   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
98   D3D12_CPU_DESCRIPTOR_HANDLE descs[PIPE_MAX_SHADER_SAMPLER_VIEWS];
99   struct d3d12_descriptor_handle table_start;
100
101   d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
102
103   for (unsigned i = shader->begin_srv_binding; i < shader->end_srv_binding; i++)
104   {
105      struct d3d12_sampler_view *view;
106
107      if (i == shader->pstipple_binding) {
108         view = (struct d3d12_sampler_view*)ctx->pstipple.sampler_view;
109      } else {
110         view = (struct d3d12_sampler_view*)ctx->sampler_views[stage][i];
111      }
112
113      unsigned desc_idx = i - shader->begin_srv_binding;
114      if (view != NULL) {
115         descs[desc_idx] = view->handle.cpu_handle;
116         d3d12_batch_reference_sampler_view(batch, view);
117
118         struct d3d12_resource *res = d3d12_resource(view->base.texture);
119         /* If this is a buffer that's been replaced, re-create the descriptor */
120         if (view->texture_generation_id != res->generation_id) {
121            d3d12_init_sampler_view_descriptor(view);
122            view->texture_generation_id = res->generation_id;
123         }
124
125         D3D12_RESOURCE_STATES state = (stage == PIPE_SHADER_FRAGMENT) ?
126                                       D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE :
127                                       D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE;
128         if (view->base.texture->target == PIPE_BUFFER) {
129            d3d12_transition_resource_state(ctx, d3d12_resource(view->base.texture),
130                                            state,
131                                            D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
132         } else {
133            d3d12_transition_subresources_state(ctx, d3d12_resource(view->base.texture),
134                                                view->base.u.tex.first_level, view->mip_levels,
135                                                view->base.u.tex.first_layer, view->array_size,
136                                                d3d12_get_format_start_plane(view->base.format),
137                                                d3d12_get_format_num_planes(view->base.format),
138                                                state,
139                                                D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
140         }
141      } else {
142         descs[desc_idx] = screen->null_srvs[shader->srv_bindings[i].dimension].cpu_handle;
143      }
144   }
145
146   d3d12_descriptor_heap_append_handles(batch->view_heap, descs, shader->end_srv_binding - shader->begin_srv_binding);
147
148   return table_start.gpu_handle;
149}
150
151static D3D12_GPU_DESCRIPTOR_HANDLE
152fill_ssbo_descriptors(struct d3d12_context *ctx,
153                     const struct d3d12_shader *shader,
154                     int stage)
155{
156   struct d3d12_batch *batch = d3d12_current_batch(ctx);
157   struct d3d12_descriptor_handle table_start;
158
159   d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
160
161   for (unsigned i = 0; i < shader->nir->info.num_ssbos; i++)
162   {
163      struct pipe_shader_buffer *view = &ctx->ssbo_views[stage][i];
164
165      D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
166      uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
167      uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
168      uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
169      uav_desc.Buffer.StructureByteStride = 0;
170      uav_desc.Buffer.CounterOffsetInBytes = 0;
171      ID3D12Resource *d3d12_res = nullptr;
172      if (view->buffer) {
173         struct d3d12_resource *res = d3d12_resource(view->buffer);
174         uint64_t res_offset = 0;
175         d3d12_res = d3d12_resource_underlying(res, &res_offset);
176         d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
177         uav_desc.Buffer.FirstElement = (view->buffer_offset + res_offset) / 4;
178         uav_desc.Buffer.NumElements = DIV_ROUND_UP(view->buffer_size, 4);
179         d3d12_batch_reference_resource(batch, res, true);
180      }
181
182      struct d3d12_descriptor_handle handle;
183      d3d12_descriptor_heap_alloc_handle(batch->view_heap, &handle);
184      d3d12_screen(ctx->base.screen)->dev->CreateUnorderedAccessView(d3d12_res, nullptr, &uav_desc, handle.cpu_handle);
185   }
186
187   return table_start.gpu_handle;
188}
189
190static D3D12_GPU_DESCRIPTOR_HANDLE
191fill_sampler_descriptors(struct d3d12_context *ctx,
192                         const struct d3d12_shader_selector *shader_sel,
193                         unsigned stage)
194{
195   const struct d3d12_shader *shader = shader_sel->current;
196   struct d3d12_batch *batch = d3d12_current_batch(ctx);
197   D3D12_CPU_DESCRIPTOR_HANDLE descs[PIPE_MAX_SHADER_SAMPLER_VIEWS];
198   struct d3d12_descriptor_handle table_start;
199
200   d2d12_descriptor_heap_get_next_handle(batch->sampler_heap, &table_start);
201
202   for (unsigned i = shader->begin_srv_binding; i < shader->end_srv_binding; i++)
203   {
204      struct d3d12_sampler_state *sampler;
205
206      if (i == shader->pstipple_binding) {
207         sampler = ctx->pstipple.sampler_cso;
208      } else {
209         sampler = ctx->samplers[stage][i];
210      }
211
212      unsigned desc_idx = i - shader->begin_srv_binding;
213      if (sampler != NULL) {
214         if (sampler->is_shadow_sampler && shader_sel->compare_with_lod_bias_grad)
215            descs[desc_idx] = sampler->handle_without_shadow.cpu_handle;
216         else
217            descs[desc_idx] = sampler->handle.cpu_handle;
218      } else
219         descs[desc_idx] = ctx->null_sampler.cpu_handle;
220   }
221
222   d3d12_descriptor_heap_append_handles(batch->sampler_heap, descs, shader->end_srv_binding - shader->begin_srv_binding);
223   return table_start.gpu_handle;
224}
225
226static D3D12_UAV_DIMENSION
227image_view_dimension(enum pipe_texture_target target)
228{
229   switch (target) {
230   case PIPE_BUFFER: return D3D12_UAV_DIMENSION_BUFFER;
231   case PIPE_TEXTURE_1D: return D3D12_UAV_DIMENSION_TEXTURE1D;
232   case PIPE_TEXTURE_1D_ARRAY: return D3D12_UAV_DIMENSION_TEXTURE1DARRAY;
233   case PIPE_TEXTURE_RECT:
234   case PIPE_TEXTURE_2D:
235      return D3D12_UAV_DIMENSION_TEXTURE2D;
236   case PIPE_TEXTURE_2D_ARRAY:
237   case PIPE_TEXTURE_CUBE:
238   case PIPE_TEXTURE_CUBE_ARRAY:
239      return D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
240   case PIPE_TEXTURE_3D: return D3D12_UAV_DIMENSION_TEXTURE3D;
241   default:
242      unreachable("unexpected target");
243   }
244}
245
246static D3D12_GPU_DESCRIPTOR_HANDLE
247fill_image_descriptors(struct d3d12_context *ctx,
248                       const struct d3d12_shader *shader,
249                       int stage)
250{
251   struct d3d12_screen *screen = d3d12_screen(ctx->base.screen);
252   struct d3d12_batch *batch = d3d12_current_batch(ctx);
253   struct d3d12_descriptor_handle table_start;
254
255   d2d12_descriptor_heap_get_next_handle(batch->view_heap, &table_start);
256
257   for (unsigned i = 0; i < shader->nir->info.num_images; i++)
258   {
259      struct pipe_image_view *view = &ctx->image_views[stage][i];
260
261      if (view->resource) {
262         D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
263         struct d3d12_resource *res = d3d12_resource(view->resource);
264         uint64_t offset = 0;
265         ID3D12Resource *d3d12_res = d3d12_resource_underlying(res, &offset);
266
267         enum pipe_format view_format = ctx->image_view_emulation_formats[stage][i];
268         if (view_format == PIPE_FORMAT_NONE)
269            view_format = view->format;
270         uav_desc.Format = d3d12_get_format(view_format);
271         uav_desc.ViewDimension = image_view_dimension(res->base.b.target);
272
273         unsigned array_size = view->u.tex.last_layer - view->u.tex.first_layer + 1;
274         switch (uav_desc.ViewDimension) {
275         case D3D12_UAV_DIMENSION_TEXTURE1D:
276            if (view->u.tex.first_layer > 0)
277               debug_printf("D3D12: can't create 1D UAV from layer %d\n",
278                            view->u.tex.first_layer);
279            uav_desc.Texture1D.MipSlice = view->u.tex.level;
280            break;
281         case D3D12_UAV_DIMENSION_TEXTURE1DARRAY:
282            uav_desc.Texture1DArray.FirstArraySlice = view->u.tex.first_layer;
283            uav_desc.Texture1DArray.ArraySize = array_size;
284            uav_desc.Texture1DArray.MipSlice = view->u.tex.level;
285            break;
286         case D3D12_UAV_DIMENSION_TEXTURE2D:
287            if (view->u.tex.first_layer > 0)
288               debug_printf("D3D12: can't create 2D UAV from layer %d\n",
289                            view->u.tex.first_layer);
290            uav_desc.Texture2D.MipSlice = view->u.tex.level;
291            uav_desc.Texture2D.PlaneSlice = 0;
292            break;
293         case D3D12_UAV_DIMENSION_TEXTURE2DARRAY:
294            uav_desc.Texture2DArray.FirstArraySlice = view->u.tex.first_layer;
295            uav_desc.Texture2DArray.ArraySize = array_size;
296            uav_desc.Texture2DArray.MipSlice = view->u.tex.level;
297            uav_desc.Texture2DArray.PlaneSlice = 0;
298            break;
299         case D3D12_UAV_DIMENSION_TEXTURE3D:
300            uav_desc.Texture3D.MipSlice = view->u.tex.level;
301            uav_desc.Texture3D.FirstWSlice = view->u.tex.first_layer;
302            uav_desc.Texture3D.WSize = array_size;
303            break;
304         case D3D12_UAV_DIMENSION_BUFFER: {
305            uav_desc.Format = d3d12_get_format(shader->uav_bindings[i].format);
306            uint format_size = util_format_get_blocksize(shader->uav_bindings[i].format);
307            offset += view->u.buf.offset;
308            uav_desc.Buffer.CounterOffsetInBytes = 0;
309            uav_desc.Buffer.FirstElement = offset / format_size;
310            uav_desc.Buffer.NumElements = view->u.buf.size / format_size;
311            uav_desc.Buffer.StructureByteStride = 0;
312            uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
313            break;
314         }
315         default:
316            unreachable("Unexpected image view dimension");
317         }
318
319         if (!batch->pending_memory_barrier) {
320            if (res->base.b.target == PIPE_BUFFER) {
321               d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
322            } else {
323               unsigned transition_first_layer = view->u.tex.first_layer;
324               unsigned transition_array_size = array_size;
325               if (res->base.b.target == PIPE_TEXTURE_3D) {
326                  transition_first_layer = 0;
327                  transition_array_size = 0;
328               }
329               d3d12_transition_subresources_state(ctx, res,
330                                                   view->u.tex.level, 1,
331                                                   transition_first_layer, transition_array_size,
332                                                   0, 1,
333                                                   D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
334                                                   D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
335            }
336         }
337         d3d12_batch_reference_resource(batch, res, true);
338
339         struct d3d12_descriptor_handle handle;
340         d3d12_descriptor_heap_alloc_handle(batch->view_heap, &handle);
341         d3d12_screen(ctx->base.screen)->dev->CreateUnorderedAccessView(d3d12_res, nullptr, &uav_desc, handle.cpu_handle);
342      } else {
343         d3d12_descriptor_heap_append_handles(batch->view_heap, &screen->null_uavs[shader->uav_bindings[i].dimension].cpu_handle, 1);
344      }
345   }
346
347   return table_start.gpu_handle;
348}
349
350static unsigned
351fill_graphics_state_vars(struct d3d12_context *ctx,
352                         const struct pipe_draw_info *dinfo,
353                         unsigned drawid,
354                         const struct pipe_draw_start_count_bias *draw,
355                         struct d3d12_shader *shader,
356                         uint32_t *values,
357                         struct d3d12_cmd_signature_key *cmd_sig_key)
358{
359   unsigned size = 0;
360
361   for (unsigned j = 0; j < shader->num_state_vars; ++j) {
362      uint32_t *ptr = values + size;
363
364      switch (shader->state_vars[j].var) {
365      case D3D12_STATE_VAR_Y_FLIP:
366         ptr[0] = fui(ctx->flip_y);
367         size += 4;
368         break;
369      case D3D12_STATE_VAR_PT_SPRITE:
370         ptr[0] = fui(1.0 / ctx->viewports[0].Width);
371         ptr[1] = fui(1.0 / ctx->viewports[0].Height);
372         ptr[2] = fui(ctx->gfx_pipeline_state.rast->base.point_size);
373         ptr[3] = fui(D3D12_MAX_POINT_SIZE);
374         size += 4;
375         break;
376      case D3D12_STATE_VAR_DRAW_PARAMS:
377         ptr[0] = dinfo->index_size ? draw->index_bias : draw->start;
378         ptr[1] = dinfo->start_instance;
379         ptr[2] = drawid;
380         ptr[3] = dinfo->index_size ? -1 : 0;
381         cmd_sig_key->draw_or_dispatch_params = 1;
382         cmd_sig_key->root_sig = ctx->gfx_pipeline_state.root_signature;
383         cmd_sig_key->params_root_const_offset = size;
384         size += 4;
385         break;
386      case D3D12_STATE_VAR_DEPTH_TRANSFORM:
387         ptr[0] = fui(2.0f * ctx->viewport_states[0].scale[2]);
388         ptr[1] = fui(ctx->viewport_states[0].translate[2] - ctx->viewport_states[0].scale[2]);
389         size += 4;
390         break;
391      case D3D12_STATE_VAR_DEFAULT_INNER_TESS_LEVEL:
392         memcpy(ptr, ctx->default_inner_tess_factor, sizeof(ctx->default_inner_tess_factor));
393         size += 4;
394         break;
395      case D3D12_STATE_VAR_DEFAULT_OUTER_TESS_LEVEL:
396         memcpy(ptr, ctx->default_outer_tess_factor, sizeof(ctx->default_outer_tess_factor));
397         size += 4;
398         break;
399      case D3D12_STATE_VAR_PATCH_VERTICES_IN:
400         ptr[0] = ctx->patch_vertices;
401         size += 4;
402         break;
403      default:
404         unreachable("unknown state variable");
405      }
406   }
407
408   return size;
409}
410
411static unsigned
412fill_compute_state_vars(struct d3d12_context *ctx,
413                        const struct pipe_grid_info *info,
414                        struct d3d12_shader *shader,
415                        uint32_t *values,
416                        struct d3d12_cmd_signature_key *cmd_sig_key)
417{
418   unsigned size = 0;
419
420   for (unsigned j = 0; j < shader->num_state_vars; ++j) {
421      uint32_t *ptr = values + size;
422
423      switch (shader->state_vars[j].var) {
424      case D3D12_STATE_VAR_NUM_WORKGROUPS:
425         ptr[0] = info->grid[0];
426         ptr[1] = info->grid[1];
427         ptr[2] = info->grid[2];
428         cmd_sig_key->draw_or_dispatch_params = 1;
429         cmd_sig_key->root_sig = ctx->compute_pipeline_state.root_signature;
430         cmd_sig_key->params_root_const_offset = size;
431         size += 4;
432         break;
433      case D3D12_STATE_VAR_TRANSFORM_GENERIC0: {
434         unsigned idx = shader->state_vars[j].var - D3D12_STATE_VAR_TRANSFORM_GENERIC0;
435         ptr[0] = ctx->transform_state_vars[idx * 4];
436         ptr[1] = ctx->transform_state_vars[idx * 4 + 1];
437         ptr[2] = ctx->transform_state_vars[idx * 4 + 2];
438         ptr[3] = ctx->transform_state_vars[idx * 4 + 3];
439         size += 4;
440         break;
441      }
442      default:
443         unreachable("unknown state variable");
444      }
445   }
446
447   return size;
448}
449
450static bool
451check_descriptors_left(struct d3d12_context *ctx, bool compute)
452{
453   struct d3d12_batch *batch = d3d12_current_batch(ctx);
454   unsigned needed_descs = 0;
455
456   unsigned count = compute ? 1 : D3D12_GFX_SHADER_STAGES;
457   for (unsigned i = 0; i < count; ++i) {
458      struct d3d12_shader_selector *shader = compute ? ctx->compute_state : ctx->gfx_stages[i];
459
460      if (!shader)
461         continue;
462
463      needed_descs += shader->current->num_cb_bindings;
464      needed_descs += shader->current->end_srv_binding - shader->current->begin_srv_binding;
465      needed_descs += shader->current->nir->info.num_ssbos;
466      needed_descs += shader->current->nir->info.num_images;
467   }
468
469   if (d3d12_descriptor_heap_get_remaining_handles(batch->view_heap) < needed_descs)
470      return false;
471
472   needed_descs = 0;
473   for (unsigned i = 0; i < count; ++i) {
474      struct d3d12_shader_selector *shader = compute ? ctx->compute_state : ctx->gfx_stages[i];
475
476      if (!shader)
477         continue;
478
479      needed_descs += shader->current->end_srv_binding - shader->current->begin_srv_binding;
480   }
481
482   if (d3d12_descriptor_heap_get_remaining_handles(batch->sampler_heap) < needed_descs)
483      return false;
484
485   return true;
486}
487
488#define MAX_DESCRIPTOR_TABLES (D3D12_GFX_SHADER_STAGES * 4)
489
490static void
491update_shader_stage_root_parameters(struct d3d12_context *ctx,
492                                    const struct d3d12_shader_selector *shader_sel,
493                                    unsigned &num_params,
494                                    unsigned &num_root_descriptors,
495                                    D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],
496                                    int root_desc_indices[MAX_DESCRIPTOR_TABLES])
497{
498   auto stage = shader_sel->stage;
499   struct d3d12_shader *shader = shader_sel->current;
500   uint64_t dirty = ctx->shader_dirty[stage];
501   assert(shader);
502
503   if (shader->num_cb_bindings > 0) {
504      if (dirty & D3D12_SHADER_DIRTY_CONSTBUF) {
505         assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
506         root_desc_tables[num_root_descriptors] = fill_cbv_descriptors(ctx, shader, stage);
507         root_desc_indices[num_root_descriptors++] = num_params;
508      }
509      num_params++;
510   }
511   if (shader->end_srv_binding > 0) {
512      if (dirty & D3D12_SHADER_DIRTY_SAMPLER_VIEWS) {
513         assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
514         root_desc_tables[num_root_descriptors] = fill_srv_descriptors(ctx, shader, stage);
515         root_desc_indices[num_root_descriptors++] = num_params;
516      }
517      num_params++;
518      if (dirty & D3D12_SHADER_DIRTY_SAMPLERS) {
519         assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
520         root_desc_tables[num_root_descriptors] = fill_sampler_descriptors(ctx, shader_sel, stage);
521         root_desc_indices[num_root_descriptors++] = num_params;
522      }
523      num_params++;
524   }
525   if (shader->nir->info.num_ssbos > 0) {
526      if (dirty & D3D12_SHADER_DIRTY_SSBO) {
527         assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
528         root_desc_tables[num_root_descriptors] = fill_ssbo_descriptors(ctx, shader, stage);
529         root_desc_indices[num_root_descriptors++] = num_params;
530      }
531      num_params++;
532   }
533   if (shader->nir->info.num_images > 0) {
534      if (dirty & D3D12_SHADER_DIRTY_IMAGE) {
535         assert(num_root_descriptors < MAX_DESCRIPTOR_TABLES);
536         root_desc_tables[num_root_descriptors] = fill_image_descriptors(ctx, shader, stage);
537         root_desc_indices[num_root_descriptors++] = num_params;
538      }
539      num_params++;
540   }
541}
542
543static unsigned
544update_graphics_root_parameters(struct d3d12_context *ctx,
545                                const struct pipe_draw_info *dinfo,
546                                unsigned drawid,
547                                const struct pipe_draw_start_count_bias *draw,
548                                D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],
549                                int root_desc_indices[MAX_DESCRIPTOR_TABLES],
550                                struct d3d12_cmd_signature_key *cmd_sig_key)
551{
552   unsigned num_params = 0;
553   unsigned num_root_descriptors = 0;
554
555   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
556      struct d3d12_shader_selector *shader_sel = ctx->gfx_stages[i];
557      if (!shader_sel)
558         continue;
559
560      update_shader_stage_root_parameters(ctx, shader_sel, num_params, num_root_descriptors, root_desc_tables, root_desc_indices);
561      /* TODO Don't always update state vars */
562      if (shader_sel->current->num_state_vars > 0) {
563         uint32_t constants[D3D12_MAX_GRAPHICS_STATE_VARS * 4];
564         unsigned size = fill_graphics_state_vars(ctx, dinfo, drawid, draw, shader_sel->current, constants, cmd_sig_key);
565         if (cmd_sig_key->draw_or_dispatch_params)
566            cmd_sig_key->params_root_const_param = num_params;
567         ctx->cmdlist->SetGraphicsRoot32BitConstants(num_params, size, constants, 0);
568         num_params++;
569      }
570   }
571   return num_root_descriptors;
572}
573
574static unsigned
575update_compute_root_parameters(struct d3d12_context *ctx,
576                               const struct pipe_grid_info *info,
577                               D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES],
578                               int root_desc_indices[MAX_DESCRIPTOR_TABLES],
579                               struct d3d12_cmd_signature_key *cmd_sig_key)
580{
581   unsigned num_params = 0;
582   unsigned num_root_descriptors = 0;
583
584   struct d3d12_shader_selector *shader_sel = ctx->compute_state;
585   if (shader_sel) {
586      update_shader_stage_root_parameters(ctx, shader_sel, num_params, num_root_descriptors, root_desc_tables, root_desc_indices);
587      /* TODO Don't always update state vars */
588      if (shader_sel->current->num_state_vars > 0) {
589         uint32_t constants[D3D12_MAX_COMPUTE_STATE_VARS * 4];
590         unsigned size = fill_compute_state_vars(ctx, info, shader_sel->current, constants, cmd_sig_key);
591         if (cmd_sig_key->draw_or_dispatch_params)
592            cmd_sig_key->params_root_const_param = num_params;
593         ctx->cmdlist->SetComputeRoot32BitConstants(num_params, size, constants, 0);
594         num_params++;
595      }
596   }
597   return num_root_descriptors;
598}
599
600static bool
601validate_stream_output_targets(struct d3d12_context *ctx)
602{
603   unsigned factor = 0;
604
605   if (ctx->gfx_pipeline_state.num_so_targets &&
606       ctx->gfx_pipeline_state.stages[PIPE_SHADER_GEOMETRY])
607      factor = ctx->gfx_pipeline_state.stages[PIPE_SHADER_GEOMETRY]->key.gs.stream_output_factor;
608
609   if (factor > 1)
610      return d3d12_enable_fake_so_buffers(ctx, factor);
611   else
612      return d3d12_disable_fake_so_buffers(ctx);
613}
614
615static D3D_PRIMITIVE_TOPOLOGY
616topology(enum pipe_prim_type prim_type, uint8_t patch_vertices)
617{
618   switch (prim_type) {
619   case PIPE_PRIM_POINTS:
620      return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
621
622   case PIPE_PRIM_LINES:
623      return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
624
625   case PIPE_PRIM_LINE_STRIP:
626      return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
627
628   case PIPE_PRIM_TRIANGLES:
629      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
630
631   case PIPE_PRIM_TRIANGLE_STRIP:
632      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
633
634   case PIPE_PRIM_LINES_ADJACENCY:
635      return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
636
637   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
638      return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
639
640   case PIPE_PRIM_TRIANGLES_ADJACENCY:
641      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
642
643   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
644      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
645
646   case PIPE_PRIM_PATCHES:
647      return (D3D_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_vertices - 1);
648
649   case PIPE_PRIM_QUADS:
650   case PIPE_PRIM_QUAD_STRIP:
651      return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; /* HACK: this is just wrong! */
652
653   default:
654      debug_printf("pipe_prim_type: %s\n", u_prim_name(prim_type));
655      unreachable("unexpected enum pipe_prim_type");
656   }
657}
658
659static DXGI_FORMAT
660ib_format(unsigned index_size)
661{
662   switch (index_size) {
663   case 1: return DXGI_FORMAT_R8_UINT;
664   case 2: return DXGI_FORMAT_R16_UINT;
665   case 4: return DXGI_FORMAT_R32_UINT;
666
667   default:
668      unreachable("unexpected index-buffer size");
669   }
670}
671
672static void
673twoface_emulation(struct d3d12_context *ctx,
674                  struct d3d12_rasterizer_state *rast,
675                  const struct pipe_draw_info *dinfo,
676                  const struct pipe_draw_indirect_info *indirect,
677                  const struct pipe_draw_start_count_bias *draw)
678{
679   /* draw backfaces */
680   ctx->base.bind_rasterizer_state(&ctx->base, rast->twoface_back);
681   d3d12_draw_vbo(&ctx->base, dinfo, 0, indirect, draw, 1);
682
683   /* restore real state */
684   ctx->base.bind_rasterizer_state(&ctx->base, rast);
685}
686
687static void
688transition_surface_subresources_state(struct d3d12_context *ctx,
689                                      struct pipe_surface *psurf,
690                                      struct pipe_resource *pres,
691                                      D3D12_RESOURCE_STATES state)
692{
693   struct d3d12_resource *res = d3d12_resource(pres);
694   unsigned start_layer, num_layers;
695   if (!d3d12_subresource_id_uses_layer(res->base.b.target)) {
696      start_layer = 0;
697      num_layers = 1;
698   } else {
699      start_layer = psurf->u.tex.first_layer;
700      num_layers = psurf->u.tex.last_layer - psurf->u.tex.first_layer + 1;
701   }
702   d3d12_transition_subresources_state(ctx, res,
703                                       psurf->u.tex.level, 1,
704                                       start_layer, num_layers,
705                                       d3d12_get_format_start_plane(psurf->format),
706                                       d3d12_get_format_num_planes(psurf->format),
707                                       state,
708                                       D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
709}
710
711static bool
712prim_supported(enum pipe_prim_type prim_type)
713{
714   switch (prim_type) {
715   case PIPE_PRIM_POINTS:
716   case PIPE_PRIM_LINES:
717   case PIPE_PRIM_LINE_STRIP:
718   case PIPE_PRIM_TRIANGLES:
719   case PIPE_PRIM_TRIANGLE_STRIP:
720   case PIPE_PRIM_LINES_ADJACENCY:
721   case PIPE_PRIM_LINE_STRIP_ADJACENCY:
722   case PIPE_PRIM_TRIANGLES_ADJACENCY:
723   case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:
724   case PIPE_PRIM_PATCHES:
725      return true;
726
727   default:
728      return false;
729   }
730}
731
732static inline struct d3d12_shader_selector *
733d3d12_last_vertex_stage(struct d3d12_context *ctx)
734{
735   struct d3d12_shader_selector *sel = ctx->gfx_stages[PIPE_SHADER_GEOMETRY];
736   if (!sel || sel->is_variant)
737      sel = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL];
738   if (!sel)
739      sel = ctx->gfx_stages[PIPE_SHADER_VERTEX];
740   return sel;
741}
742
743static bool
744update_draw_indirect_with_sysvals(struct d3d12_context *ctx,
745   const struct pipe_draw_info *dinfo,
746   unsigned drawid,
747   const struct pipe_draw_indirect_info **indirect_inout,
748   struct pipe_draw_indirect_info *indirect_out)
749{
750   if (*indirect_inout == nullptr ||
751      ctx->gfx_stages[PIPE_SHADER_VERTEX] == nullptr)
752      return false;
753
754   unsigned sysvals[] = {
755      SYSTEM_VALUE_VERTEX_ID_ZERO_BASE,
756      SYSTEM_VALUE_BASE_VERTEX,
757      SYSTEM_VALUE_FIRST_VERTEX,
758      SYSTEM_VALUE_BASE_INSTANCE,
759      SYSTEM_VALUE_DRAW_ID,
760   };
761   bool any = false;
762   for (unsigned sysval : sysvals) {
763      any |= (BITSET_TEST(ctx->gfx_stages[PIPE_SHADER_VERTEX]->initial->info.system_values_read, sysval));
764   }
765   if (!any)
766      return false;
767
768   d3d12_compute_transform_save_restore save;
769   d3d12_save_compute_transform_state(ctx, &save);
770
771   auto indirect_in = *indirect_inout;
772   *indirect_inout = indirect_out;
773
774   d3d12_compute_transform_key key;
775   memset(&key, 0, sizeof(key));
776   key.type = d3d12_compute_transform_type::base_vertex;
777   key.base_vertex.indexed = dinfo->index_size > 0;
778   key.base_vertex.dynamic_count = indirect_in->indirect_draw_count != nullptr;
779   ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
780
781   ctx->transform_state_vars[0] = indirect_in->stride;
782   ctx->transform_state_vars[1] = indirect_in->offset;
783   ctx->transform_state_vars[2] = drawid;
784
785   if (indirect_in->indirect_draw_count) {
786      pipe_constant_buffer draw_count_cbuf;
787      draw_count_cbuf.buffer = indirect_in->indirect_draw_count;
788      draw_count_cbuf.buffer_offset = indirect_in->indirect_draw_count_offset;
789      draw_count_cbuf.buffer_size = 4;
790      draw_count_cbuf.user_buffer = nullptr;
791      ctx->base.set_constant_buffer(&ctx->base, PIPE_SHADER_COMPUTE, 1, false, &draw_count_cbuf);
792   }
793
794   pipe_shader_buffer new_cs_ssbos[2];
795   new_cs_ssbos[0].buffer = indirect_in->buffer;
796   new_cs_ssbos[0].buffer_offset = 0;
797   new_cs_ssbos[0].buffer_size = indirect_in->buffer->width0;
798
799   /* 4 additional uints for base vertex, base instance, draw ID, and a bool for indexed draw */
800   unsigned out_stride = sizeof(uint32_t) * ((key.base_vertex.indexed ? 5 : 4) + 4);
801   pipe_resource output_buf_templ = {};
802   output_buf_templ.target = PIPE_BUFFER;
803   output_buf_templ.width0 = out_stride * indirect_in->draw_count;
804   output_buf_templ.height0 = output_buf_templ.depth0 = output_buf_templ.array_size =
805      output_buf_templ.last_level = 1;
806   output_buf_templ.usage = PIPE_USAGE_DEFAULT;
807
808   new_cs_ssbos[1].buffer = ctx->base.screen->resource_create(ctx->base.screen, &output_buf_templ);
809   new_cs_ssbos[1].buffer_offset = 0;
810   new_cs_ssbos[1].buffer_size = output_buf_templ.width0;
811   ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 2, new_cs_ssbos, 2);
812
813   pipe_grid_info grid = {};
814   grid.block[0] = grid.block[1] = grid.block[2] = 1;
815   grid.grid[0] = indirect_in->draw_count;
816   grid.grid[1] = grid.grid[2] = 1;
817   ctx->base.launch_grid(&ctx->base, &grid);
818
819   d3d12_restore_compute_transform_state(ctx, &save);
820
821   *indirect_out = *indirect_in;
822   indirect_out->buffer = new_cs_ssbos[1].buffer;
823   indirect_out->offset = 0;
824   indirect_out->stride = out_stride;
825   return true;
826}
827
828static bool
829update_draw_auto(struct d3d12_context *ctx,
830   const struct pipe_draw_indirect_info **indirect_inout,
831   struct pipe_draw_indirect_info *indirect_out)
832{
833   if (*indirect_inout == nullptr ||
834       (*indirect_inout)->count_from_stream_output == nullptr ||
835       ctx->gfx_stages[PIPE_SHADER_VERTEX] == nullptr)
836      return false;
837
838   d3d12_compute_transform_save_restore save;
839   d3d12_save_compute_transform_state(ctx, &save);
840
841   auto indirect_in = *indirect_inout;
842   *indirect_inout = indirect_out;
843
844   d3d12_compute_transform_key key;
845   memset(&key, 0, sizeof(key));
846   key.type = d3d12_compute_transform_type::draw_auto;
847   ctx->base.bind_compute_state(&ctx->base, d3d12_get_compute_transform(ctx, &key));
848
849   auto so_arg = indirect_in->count_from_stream_output;
850   d3d12_stream_output_target *target = (d3d12_stream_output_target *)so_arg;
851
852   ctx->transform_state_vars[0] = ctx->vbs[0].stride;
853   ctx->transform_state_vars[1] = ctx->vbs[0].buffer_offset - so_arg->buffer_offset;
854
855   pipe_shader_buffer new_cs_ssbo;
856   new_cs_ssbo.buffer = target->fill_buffer;
857   new_cs_ssbo.buffer_offset = target->fill_buffer_offset;
858   new_cs_ssbo.buffer_size = target->fill_buffer->width0 - new_cs_ssbo.buffer_offset;
859   ctx->base.set_shader_buffers(&ctx->base, PIPE_SHADER_COMPUTE, 0, 1, &new_cs_ssbo, 1);
860
861   pipe_grid_info grid = {};
862   grid.block[0] = grid.block[1] = grid.block[2] = 1;
863   grid.grid[0] = grid.grid[1] = grid.grid[2] = 1;
864   ctx->base.launch_grid(&ctx->base, &grid);
865
866   d3d12_restore_compute_transform_state(ctx, &save);
867
868   *indirect_out = *indirect_in;
869   pipe_resource_reference(&indirect_out->buffer, target->fill_buffer);
870   indirect_out->offset = target->fill_buffer_offset + 4;
871   indirect_out->stride = sizeof(D3D12_DRAW_ARGUMENTS);
872   indirect_out->count_from_stream_output = nullptr;
873   return true;
874}
875
876void
877d3d12_draw_vbo(struct pipe_context *pctx,
878               const struct pipe_draw_info *dinfo,
879               unsigned drawid_offset,
880               const struct pipe_draw_indirect_info *indirect,
881               const struct pipe_draw_start_count_bias *draws,
882               unsigned num_draws)
883{
884   if (num_draws > 1) {
885      util_draw_multi(pctx, dinfo, drawid_offset, indirect, draws, num_draws);
886      return;
887   }
888
889   if (!indirect && (!draws[0].count || !dinfo->instance_count))
890      return;
891
892   struct d3d12_context *ctx = d3d12_context(pctx);
893   struct d3d12_screen *screen = d3d12_screen(pctx->screen);
894   struct d3d12_batch *batch;
895   struct pipe_resource *index_buffer = NULL;
896   unsigned index_offset = 0;
897   enum d3d12_surface_conversion_mode conversion_modes[PIPE_MAX_COLOR_BUFS] = {};
898   struct pipe_draw_indirect_info patched_indirect = {};
899
900   if (!prim_supported((enum pipe_prim_type)dinfo->mode) ||
901       dinfo->index_size == 1 ||
902       (dinfo->primitive_restart && dinfo->restart_index != 0xffff &&
903        dinfo->restart_index != 0xffffffff)) {
904
905      if (!dinfo->primitive_restart &&
906          !indirect &&
907          !u_trim_pipe_prim((enum pipe_prim_type)dinfo->mode, (unsigned *)&draws[0].count))
908         return;
909
910      ctx->initial_api_prim = (enum pipe_prim_type)dinfo->mode;
911      util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->gfx_pipeline_state.rast->base);
912      util_primconvert_draw_vbo(ctx->primconvert, dinfo, drawid_offset, indirect, draws, num_draws);
913      return;
914   }
915
916   bool draw_auto = update_draw_auto(ctx, &indirect, &patched_indirect);
917   bool indirect_with_sysvals = !draw_auto && update_draw_indirect_with_sysvals(ctx, dinfo, drawid_offset, &indirect, &patched_indirect);
918   struct d3d12_cmd_signature_key cmd_sig_key;
919   memset(&cmd_sig_key, 0, sizeof(cmd_sig_key));
920
921   if (indirect) {
922      cmd_sig_key.compute = false;
923      cmd_sig_key.indexed = dinfo->index_size > 0;
924      if (indirect->draw_count > 1 ||
925          indirect->indirect_draw_count ||
926          indirect_with_sysvals)
927         cmd_sig_key.multi_draw_stride = indirect->stride;
928      else if (cmd_sig_key.indexed)
929         cmd_sig_key.multi_draw_stride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
930      else
931         cmd_sig_key.multi_draw_stride = sizeof(D3D12_DRAW_ARGUMENTS);
932   }
933
934   for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
935      if (ctx->fb.cbufs[i]) {
936         struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
937         conversion_modes[i] = d3d12_surface_update_pre_draw(pctx, surface, d3d12_rtv_format(ctx, i));
938         if (conversion_modes[i] != D3D12_SURFACE_CONVERSION_NONE)
939            ctx->cmdlist_dirty |= D3D12_DIRTY_FRAMEBUFFER;
940      }
941   }
942
943   struct d3d12_rasterizer_state *rast = ctx->gfx_pipeline_state.rast;
944   if (rast->twoface_back) {
945      enum pipe_prim_type saved_mode = ctx->initial_api_prim;
946      twoface_emulation(ctx, rast, dinfo, indirect, &draws[0]);
947      ctx->initial_api_prim = saved_mode;
948   }
949
950   if (ctx->pstipple.enabled && ctx->gfx_pipeline_state.rast->base.poly_stipple_enable)
951      ctx->shader_dirty[PIPE_SHADER_FRAGMENT] |= D3D12_SHADER_DIRTY_SAMPLER_VIEWS |
952                                                 D3D12_SHADER_DIRTY_SAMPLERS;
953
954   /* this should *really* be fixed at a higher level than here! */
955   enum pipe_prim_type reduced_prim = u_reduced_prim((enum pipe_prim_type)dinfo->mode);
956   if (reduced_prim == PIPE_PRIM_TRIANGLES &&
957       ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT_AND_BACK)
958      return;
959
960   if (ctx->gfx_pipeline_state.prim_type != dinfo->mode) {
961      ctx->gfx_pipeline_state.prim_type = (enum pipe_prim_type)dinfo->mode;
962      ctx->state_dirty |= D3D12_DIRTY_PRIM_MODE;
963   }
964
965   d3d12_select_shader_variants(ctx, dinfo);
966   d3d12_validate_queries(ctx);
967   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) {
968      struct d3d12_shader *shader = ctx->gfx_stages[i] ? ctx->gfx_stages[i]->current : NULL;
969      if (ctx->gfx_pipeline_state.stages[i] != shader) {
970         ctx->gfx_pipeline_state.stages[i] = shader;
971         ctx->state_dirty |= D3D12_DIRTY_SHADER;
972      }
973   }
974
975   /* Reset to an invalid value after it's been used */
976   ctx->initial_api_prim = PIPE_PRIM_MAX;
977
978   /* Copy the stream output info from the current vertex/geometry shader */
979   if (ctx->state_dirty & D3D12_DIRTY_SHADER) {
980      struct d3d12_shader_selector *sel = d3d12_last_vertex_stage(ctx);
981      if (sel) {
982         ctx->gfx_pipeline_state.so_info = sel->so_info;
983      } else {
984         memset(&ctx->gfx_pipeline_state.so_info, 0, sizeof(sel->so_info));
985      }
986   }
987   if (!validate_stream_output_targets(ctx)) {
988      debug_printf("validate_stream_output_targets() failed\n");
989      return;
990   }
991
992   D3D12_INDEX_BUFFER_STRIP_CUT_VALUE ib_strip_cut_value =
993      D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
994   if (dinfo->index_size > 0) {
995      assert(dinfo->index_size != 1);
996
997      if (dinfo->has_user_indices) {
998         if (!util_upload_index_buffer(pctx, dinfo, &draws[0], &index_buffer,
999             &index_offset, 4)) {
1000            debug_printf("util_upload_index_buffer() failed\n");
1001            return;
1002         }
1003      } else {
1004         index_buffer = dinfo->index.resource;
1005      }
1006
1007      if (dinfo->primitive_restart) {
1008         assert(dinfo->restart_index == 0xffff ||
1009                dinfo->restart_index == 0xffffffff);
1010         ib_strip_cut_value = dinfo->restart_index == 0xffff ?
1011            D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFF :
1012            D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
1013      }
1014   }
1015
1016   if (ctx->gfx_pipeline_state.ib_strip_cut_value != ib_strip_cut_value) {
1017      ctx->gfx_pipeline_state.ib_strip_cut_value = ib_strip_cut_value;
1018      ctx->state_dirty |= D3D12_DIRTY_STRIP_CUT_VALUE;
1019   }
1020
1021   if (!ctx->gfx_pipeline_state.root_signature || ctx->state_dirty & D3D12_DIRTY_SHADER) {
1022      ID3D12RootSignature *root_signature = d3d12_get_root_signature(ctx, false);
1023      if (ctx->gfx_pipeline_state.root_signature != root_signature) {
1024         ctx->gfx_pipeline_state.root_signature = root_signature;
1025         ctx->state_dirty |= D3D12_DIRTY_ROOT_SIGNATURE;
1026         for (int i = 0; i < D3D12_GFX_SHADER_STAGES; ++i)
1027            ctx->shader_dirty[i] |= D3D12_SHADER_DIRTY_ALL;
1028      }
1029   }
1030
1031   if (!ctx->current_gfx_pso || ctx->state_dirty & D3D12_DIRTY_GFX_PSO) {
1032      ctx->current_gfx_pso = d3d12_get_gfx_pipeline_state(ctx);
1033      assert(ctx->current_gfx_pso);
1034   }
1035
1036   ctx->cmdlist_dirty |= ctx->state_dirty;
1037
1038   if (!check_descriptors_left(ctx, false))
1039      d3d12_flush_cmdlist(ctx);
1040   batch = d3d12_current_batch(ctx);
1041
1042   if (ctx->cmdlist_dirty & D3D12_DIRTY_ROOT_SIGNATURE) {
1043      d3d12_batch_reference_object(batch, ctx->gfx_pipeline_state.root_signature);
1044      ctx->cmdlist->SetGraphicsRootSignature(ctx->gfx_pipeline_state.root_signature);
1045   }
1046
1047   if (ctx->cmdlist_dirty & D3D12_DIRTY_GFX_PSO) {
1048      assert(ctx->current_gfx_pso);
1049      d3d12_batch_reference_object(batch, ctx->current_gfx_pso);
1050      ctx->cmdlist->SetPipelineState(ctx->current_gfx_pso);
1051   }
1052
1053   D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES];
1054   int root_desc_indices[MAX_DESCRIPTOR_TABLES];
1055   unsigned num_root_descriptors = update_graphics_root_parameters(ctx, dinfo, drawid_offset, &draws[0],
1056      root_desc_tables, root_desc_indices, &cmd_sig_key);
1057
1058   bool need_zero_one_depth_range = d3d12_need_zero_one_depth_range(ctx);
1059   if (need_zero_one_depth_range != ctx->need_zero_one_depth_range) {
1060      ctx->cmdlist_dirty |= D3D12_DIRTY_VIEWPORT;
1061      ctx->need_zero_one_depth_range = need_zero_one_depth_range;
1062   }
1063
1064   if (ctx->cmdlist_dirty & D3D12_DIRTY_VIEWPORT) {
1065      D3D12_VIEWPORT viewports[PIPE_MAX_VIEWPORTS];
1066      for (unsigned i = 0; i < ctx->num_viewports; ++i) {
1067         viewports[i] = ctx->viewports[i];
1068         if (ctx->need_zero_one_depth_range) {
1069            viewports[i].MinDepth = 0.0f;
1070            viewports[i].MaxDepth = 1.0f;
1071         }
1072         if (ctx->fb.nr_cbufs == 0 && !ctx->fb.zsbuf) {
1073            viewports[i].TopLeftX = MAX2(0.0f, viewports[i].TopLeftX);
1074            viewports[i].TopLeftY = MAX2(0.0f, viewports[i].TopLeftY);
1075            viewports[i].Width = MIN2(ctx->fb.width, viewports[i].Width);
1076            viewports[i].Height = MIN2(ctx->fb.height, viewports[i].Height);
1077         }
1078      }
1079      ctx->cmdlist->RSSetViewports(ctx->num_viewports, viewports);
1080   }
1081
1082   if (ctx->cmdlist_dirty & D3D12_DIRTY_SCISSOR) {
1083      if (ctx->gfx_pipeline_state.rast->base.scissor && ctx->num_viewports > 0)
1084         ctx->cmdlist->RSSetScissorRects(ctx->num_viewports, ctx->scissors);
1085      else
1086         ctx->cmdlist->RSSetScissorRects(PIPE_MAX_VIEWPORTS, MAX_SCISSOR_ARRAY);
1087   }
1088
1089   if (ctx->cmdlist_dirty & D3D12_DIRTY_BLEND_COLOR) {
1090      unsigned blend_factor_flags = ctx->gfx_pipeline_state.blend->blend_factor_flags;
1091      if (blend_factor_flags & (D3D12_BLEND_FACTOR_COLOR | D3D12_BLEND_FACTOR_ANY)) {
1092         ctx->cmdlist->OMSetBlendFactor(ctx->blend_factor);
1093      } else if (blend_factor_flags & D3D12_BLEND_FACTOR_ALPHA) {
1094         float alpha_const[4] = { ctx->blend_factor[3], ctx->blend_factor[3],
1095                                 ctx->blend_factor[3], ctx->blend_factor[3] };
1096         ctx->cmdlist->OMSetBlendFactor(alpha_const);
1097      }
1098   }
1099
1100   if (ctx->cmdlist_dirty & D3D12_DIRTY_STENCIL_REF)
1101      ctx->cmdlist->OMSetStencilRef(ctx->stencil_ref.ref_value[0]);
1102
1103   if (ctx->cmdlist_dirty & D3D12_DIRTY_PRIM_MODE)
1104      ctx->cmdlist->IASetPrimitiveTopology(topology((enum pipe_prim_type)dinfo->mode, ctx->patch_vertices));
1105
1106   for (unsigned i = 0; i < ctx->num_vbs; ++i) {
1107      if (ctx->vbs[i].buffer.resource) {
1108         struct d3d12_resource *res = d3d12_resource(ctx->vbs[i].buffer.resource);
1109         d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1110         if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS)
1111            d3d12_batch_reference_resource(batch, res, false);
1112      }
1113   }
1114   if (ctx->cmdlist_dirty & D3D12_DIRTY_VERTEX_BUFFERS)
1115      ctx->cmdlist->IASetVertexBuffers(0, ctx->num_vbs, ctx->vbvs);
1116
1117   if (index_buffer) {
1118      D3D12_INDEX_BUFFER_VIEW ibv;
1119      struct d3d12_resource *res = d3d12_resource(index_buffer);
1120      ibv.BufferLocation = d3d12_resource_gpu_virtual_address(res) + index_offset;
1121      ibv.SizeInBytes = res->base.b.width0 - index_offset;
1122      ibv.Format = ib_format(dinfo->index_size);
1123      d3d12_transition_resource_state(ctx, res, D3D12_RESOURCE_STATE_INDEX_BUFFER, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1124      if (ctx->cmdlist_dirty & D3D12_DIRTY_INDEX_BUFFER ||
1125          memcmp(&ctx->ibv, &ibv, sizeof(D3D12_INDEX_BUFFER_VIEW)) != 0) {
1126         ctx->ibv = ibv;
1127         d3d12_batch_reference_resource(batch, res, false);
1128         ctx->cmdlist->IASetIndexBuffer(&ibv);
1129      }
1130
1131      if (dinfo->has_user_indices)
1132         pipe_resource_reference(&index_buffer, NULL);
1133   }
1134
1135   if (ctx->cmdlist_dirty & D3D12_DIRTY_FRAMEBUFFER) {
1136      D3D12_CPU_DESCRIPTOR_HANDLE render_targets[PIPE_MAX_COLOR_BUFS] = {};
1137      D3D12_CPU_DESCRIPTOR_HANDLE *depth_desc = NULL, tmp_desc;
1138      for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
1139         if (ctx->fb.cbufs[i]) {
1140            struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
1141            render_targets[i] = d3d12_surface_get_handle(surface, conversion_modes[i]);
1142            d3d12_batch_reference_surface_texture(batch, surface);
1143         } else
1144            render_targets[i] = screen->null_rtv.cpu_handle;
1145      }
1146      if (ctx->fb.zsbuf) {
1147         struct d3d12_surface *surface = d3d12_surface(ctx->fb.zsbuf);
1148         tmp_desc = surface->desc_handle.cpu_handle;
1149         d3d12_batch_reference_surface_texture(batch, surface);
1150         depth_desc = &tmp_desc;
1151      }
1152      ctx->cmdlist->OMSetRenderTargets(ctx->fb.nr_cbufs, render_targets, FALSE, depth_desc);
1153   }
1154
1155   struct pipe_stream_output_target **so_targets = ctx->fake_so_buffer_factor ? ctx->fake_so_targets
1156                                                                              : ctx->so_targets;
1157   D3D12_STREAM_OUTPUT_BUFFER_VIEW *so_buffer_views = ctx->fake_so_buffer_factor ? ctx->fake_so_buffer_views
1158                                                                                 : ctx->so_buffer_views;
1159   for (unsigned i = 0; i < ctx->gfx_pipeline_state.num_so_targets; ++i) {
1160      struct d3d12_stream_output_target *target = (struct d3d12_stream_output_target *)so_targets[i];
1161
1162      if (!target)
1163         continue;
1164
1165      struct d3d12_resource *so_buffer = d3d12_resource(target->base.buffer);
1166      struct d3d12_resource *fill_buffer = d3d12_resource(target->fill_buffer);
1167
1168      if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT) {
1169         d3d12_batch_reference_resource(batch, so_buffer, true);
1170         d3d12_batch_reference_resource(batch, fill_buffer, true);
1171      }
1172
1173      d3d12_transition_resource_state(ctx, so_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1174      d3d12_transition_resource_state(ctx, fill_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1175   }
1176   if (ctx->cmdlist_dirty & D3D12_DIRTY_STREAM_OUTPUT)
1177      ctx->cmdlist->SOSetTargets(0, 4, so_buffer_views);
1178
1179   for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
1180      struct pipe_surface *psurf = ctx->fb.cbufs[i];
1181      if (!psurf)
1182         continue;
1183
1184      struct pipe_resource *pres = conversion_modes[i] == D3D12_SURFACE_CONVERSION_BGRA_UINT ?
1185                                      d3d12_surface(psurf)->rgba_texture : psurf->texture;
1186      transition_surface_subresources_state(ctx, psurf, pres,
1187         D3D12_RESOURCE_STATE_RENDER_TARGET);
1188   }
1189   if (ctx->fb.zsbuf) {
1190      struct pipe_surface *psurf = ctx->fb.zsbuf;
1191      transition_surface_subresources_state(ctx, psurf, psurf->texture,
1192         D3D12_RESOURCE_STATE_DEPTH_WRITE);
1193   }
1194
1195   ID3D12Resource *indirect_arg_buf = nullptr;
1196   ID3D12Resource *indirect_count_buf = nullptr;
1197   uint64_t indirect_arg_offset = 0, indirect_count_offset = 0;
1198   if (indirect) {
1199      if (indirect->buffer) {
1200         struct d3d12_resource *indirect_buf = d3d12_resource(indirect->buffer);
1201         uint64_t buf_offset = 0;
1202         indirect_arg_buf = d3d12_resource_underlying(indirect_buf, &buf_offset);
1203         indirect_arg_offset = indirect->offset + buf_offset;
1204         d3d12_transition_resource_state(ctx, indirect_buf,
1205            D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1206         d3d12_batch_reference_resource(batch, indirect_buf, false);
1207      }
1208      if (indirect->indirect_draw_count) {
1209         struct d3d12_resource *count_buf = d3d12_resource(indirect->indirect_draw_count);
1210         uint64_t count_offset = 0;
1211         indirect_count_buf = d3d12_resource_underlying(count_buf, &count_offset);
1212         indirect_count_offset = indirect->indirect_draw_count_offset + count_offset;
1213         d3d12_transition_resource_state(ctx, count_buf,
1214            D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1215         d3d12_batch_reference_resource(batch, count_buf, false);
1216      }
1217      assert(!indirect->count_from_stream_output);
1218   }
1219
1220   d3d12_apply_resource_states(ctx, false);
1221
1222   for (unsigned i = 0; i < num_root_descriptors; ++i)
1223      ctx->cmdlist->SetGraphicsRootDescriptorTable(root_desc_indices[i], root_desc_tables[i]);
1224
1225   if (indirect) {
1226      unsigned draw_count = draw_auto ? 1 : indirect->draw_count;
1227      ID3D12CommandSignature *cmd_sig = d3d12_get_cmd_signature(ctx, &cmd_sig_key);
1228      ctx->cmdlist->ExecuteIndirect(cmd_sig, draw_count, indirect_arg_buf,
1229         indirect_arg_offset, indirect_count_buf, indirect_count_offset);
1230   } else {
1231      if (dinfo->index_size > 0)
1232         ctx->cmdlist->DrawIndexedInstanced(draws[0].count, dinfo->instance_count,
1233                                            draws[0].start, draws[0].index_bias,
1234                                            dinfo->start_instance);
1235      else
1236         ctx->cmdlist->DrawInstanced(draws[0].count, dinfo->instance_count,
1237                                     draws[0].start, dinfo->start_instance);
1238   }
1239
1240   ctx->state_dirty &= D3D12_DIRTY_COMPUTE_MASK;
1241   batch->pending_memory_barrier = false;
1242
1243   ctx->cmdlist_dirty &= D3D12_DIRTY_COMPUTE_MASK |
1244      (index_buffer ? 0 : D3D12_DIRTY_INDEX_BUFFER);
1245
1246   /* The next dispatch needs to reassert the compute PSO */
1247   ctx->cmdlist_dirty |= D3D12_DIRTY_COMPUTE_SHADER;
1248
1249   for (unsigned i = 0; i < D3D12_GFX_SHADER_STAGES; ++i)
1250      ctx->shader_dirty[i] = 0;
1251
1252   for (int i = 0; i < ctx->fb.nr_cbufs; ++i) {
1253      if (ctx->fb.cbufs[i]) {
1254         struct d3d12_surface *surface = d3d12_surface(ctx->fb.cbufs[i]);
1255         d3d12_surface_update_post_draw(pctx, surface, conversion_modes[i]);
1256      }
1257   }
1258
1259   pipe_resource_reference(&patched_indirect.buffer, NULL);
1260}
1261
1262static bool
1263update_dispatch_indirect_with_sysvals(struct d3d12_context *ctx,
1264                                      struct pipe_resource **indirect_inout,
1265                                      unsigned *indirect_offset_inout,
1266                                      struct pipe_resource **indirect_out)
1267{
1268   if (*indirect_inout == nullptr ||
1269       ctx->compute_state == nullptr)
1270      return false;
1271
1272   if (!BITSET_TEST(ctx->compute_state->current->nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS))
1273      return false;
1274
1275   if (ctx->current_predication)
1276      ctx->cmdlist->SetPredication(nullptr, 0, D3D12_PREDICATION_OP_EQUAL_ZERO);
1277
1278   auto indirect_in = *indirect_inout;
1279
1280   /* 6 uints: 2 copies of the indirect arg buffer */
1281   pipe_resource output_buf_templ = {};
1282   output_buf_templ.target = PIPE_BUFFER;
1283   output_buf_templ.width0 = sizeof(uint32_t) * 6;
1284   output_buf_templ.height0 = output_buf_templ.depth0 = output_buf_templ.array_size =
1285      output_buf_templ.last_level = 1;
1286   output_buf_templ.usage = PIPE_USAGE_DEFAULT;
1287   *indirect_out = ctx->base.screen->resource_create(ctx->base.screen, &output_buf_templ);
1288
1289   struct pipe_box src_box = { (int)*indirect_offset_inout, 0, 0, sizeof(uint32_t) * 3, 1, 1 };
1290   ctx->base.resource_copy_region(&ctx->base, *indirect_out, 0, 0, 0, 0, indirect_in, 0, &src_box);
1291   ctx->base.resource_copy_region(&ctx->base, *indirect_out, 0, src_box.width, 0, 0, indirect_in, 0, &src_box);
1292
1293   if (ctx->current_predication)
1294      d3d12_enable_predication(ctx);
1295
1296   *indirect_inout = *indirect_out;
1297   *indirect_offset_inout = 0;
1298   return true;
1299}
1300
1301void
1302d3d12_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
1303{
1304   struct d3d12_context *ctx = d3d12_context(pctx);
1305   struct d3d12_batch *batch;
1306   struct pipe_resource *patched_indirect = nullptr;
1307
1308   struct d3d12_cmd_signature_key cmd_sig_key;
1309   memset(&cmd_sig_key, 0, sizeof(cmd_sig_key));
1310   cmd_sig_key.compute = 1;
1311   cmd_sig_key.multi_draw_stride = sizeof(D3D12_DISPATCH_ARGUMENTS);
1312
1313   struct pipe_resource *indirect = info->indirect;
1314   unsigned indirect_offset = info->indirect_offset;
1315   if (indirect && update_dispatch_indirect_with_sysvals(ctx, &indirect, &indirect_offset, &patched_indirect))
1316      cmd_sig_key.multi_draw_stride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2;
1317
1318   d3d12_select_compute_shader_variants(ctx, info);
1319   d3d12_validate_queries(ctx);
1320   struct d3d12_shader *shader = ctx->compute_state ? ctx->compute_state->current : NULL;
1321   if (ctx->compute_pipeline_state.stage != shader) {
1322      ctx->compute_pipeline_state.stage = shader;
1323      ctx->state_dirty |= D3D12_DIRTY_COMPUTE_SHADER;
1324   }
1325
1326   if (!ctx->compute_pipeline_state.root_signature || ctx->state_dirty & D3D12_DIRTY_COMPUTE_SHADER) {
1327      ID3D12RootSignature *root_signature = d3d12_get_root_signature(ctx, true);
1328      if (ctx->compute_pipeline_state.root_signature != root_signature) {
1329         ctx->compute_pipeline_state.root_signature = root_signature;
1330         ctx->state_dirty |= D3D12_DIRTY_COMPUTE_ROOT_SIGNATURE;
1331         ctx->shader_dirty[PIPE_SHADER_COMPUTE] |= D3D12_SHADER_DIRTY_ALL;
1332      }
1333   }
1334
1335   if (!ctx->current_compute_pso || ctx->state_dirty & D3D12_DIRTY_COMPUTE_PSO) {
1336      ctx->current_compute_pso = d3d12_get_compute_pipeline_state(ctx);
1337      assert(ctx->current_compute_pso);
1338   }
1339
1340   ctx->cmdlist_dirty |= ctx->state_dirty;
1341
1342   if (!check_descriptors_left(ctx, true))
1343      d3d12_flush_cmdlist(ctx);
1344   batch = d3d12_current_batch(ctx);
1345
1346   if (ctx->cmdlist_dirty & D3D12_DIRTY_COMPUTE_ROOT_SIGNATURE) {
1347      d3d12_batch_reference_object(batch, ctx->compute_pipeline_state.root_signature);
1348      ctx->cmdlist->SetComputeRootSignature(ctx->compute_pipeline_state.root_signature);
1349   }
1350
1351   if (ctx->cmdlist_dirty & D3D12_DIRTY_COMPUTE_PSO) {
1352      assert(ctx->current_compute_pso);
1353      d3d12_batch_reference_object(batch, ctx->current_compute_pso);
1354      ctx->cmdlist->SetPipelineState(ctx->current_compute_pso);
1355   }
1356
1357   D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES];
1358   int root_desc_indices[MAX_DESCRIPTOR_TABLES];
1359   unsigned num_root_descriptors = update_compute_root_parameters(ctx, info, root_desc_tables, root_desc_indices, &cmd_sig_key);
1360
1361   ID3D12Resource *indirect_arg_buf = nullptr;
1362   uint64_t indirect_arg_offset = 0;
1363   if (indirect) {
1364      struct d3d12_resource *indirect_buf = d3d12_resource(indirect);
1365      uint64_t buf_offset = 0;
1366      indirect_arg_buf = d3d12_resource_underlying(indirect_buf, &buf_offset);
1367      indirect_arg_offset = indirect_offset + buf_offset;
1368      d3d12_transition_resource_state(ctx, indirect_buf,
1369         D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT, D3D12_TRANSITION_FLAG_ACCUMULATE_STATE);
1370      d3d12_batch_reference_resource(batch, indirect_buf, false);
1371   }
1372
1373   d3d12_apply_resource_states(ctx, ctx->compute_state->is_variant);
1374
1375   for (unsigned i = 0; i < num_root_descriptors; ++i)
1376      ctx->cmdlist->SetComputeRootDescriptorTable(root_desc_indices[i], root_desc_tables[i]);
1377
1378   if (indirect) {
1379      ID3D12CommandSignature *cmd_sig = d3d12_get_cmd_signature(ctx, &cmd_sig_key);
1380      ctx->cmdlist->ExecuteIndirect(cmd_sig, 1, indirect_arg_buf, indirect_arg_offset, nullptr, 0);
1381   } else {
1382      ctx->cmdlist->Dispatch(info->grid[0], info->grid[1], info->grid[2]);
1383   }
1384
1385   ctx->state_dirty &= D3D12_DIRTY_GFX_MASK;
1386   ctx->cmdlist_dirty &= D3D12_DIRTY_GFX_MASK;
1387
1388   /* The next draw needs to reassert the graphics PSO */
1389   ctx->cmdlist_dirty |= D3D12_DIRTY_SHADER;
1390   batch->pending_memory_barrier = false;
1391
1392   ctx->shader_dirty[PIPE_SHADER_COMPUTE] = 0;
1393   pipe_resource_reference(&patched_indirect, nullptr);
1394}
1395