1/*
2 * Copyright © Microsoft Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "dzn_private.h"
25
26#include "spirv/nir_spirv.h"
27
28#include "dxil_nir.h"
29#include "nir_to_dxil.h"
30#include "dxil_spirv_nir.h"
31#include "spirv_to_dxil.h"
32
33#include "dxil_validator.h"
34
35#include "vk_alloc.h"
36#include "vk_util.h"
37#include "vk_format.h"
38#include "vk_pipeline.h"
39#include "vk_pipeline_cache.h"
40
41#include "util/u_debug.h"
42
43#define d3d12_pipeline_state_stream_new_desc(__stream, __maxstreamsz, __id, __type, __desc) \
44   __type *__desc; \
45   do { \
46      struct { \
47         D3D12_PIPELINE_STATE_SUBOBJECT_TYPE type; \
48         __type desc; \
49      } *__wrapper; \
50      (__stream)->SizeInBytes = ALIGN_POT((__stream)->SizeInBytes, alignof(void *)); \
51      __wrapper = (void *)((uint8_t *)(__stream)->pPipelineStateSubobjectStream + (__stream)->SizeInBytes); \
52      (__stream)->SizeInBytes += sizeof(*__wrapper); \
53      assert((__stream)->SizeInBytes <= __maxstreamsz); \
54      __wrapper->type = D3D12_PIPELINE_STATE_SUBOBJECT_TYPE_ ## __id; \
55      __desc = &__wrapper->desc; \
56      memset(__desc, 0, sizeof(*__desc)); \
57   } while (0)
58
59#define d3d12_gfx_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
60   d3d12_pipeline_state_stream_new_desc(__stream, MAX_GFX_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
61
62#define d3d12_compute_pipeline_state_stream_new_desc(__stream, __id, __type, __desc) \
63   d3d12_pipeline_state_stream_new_desc(__stream, MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE, __id, __type, __desc)
64
65static bool
66gfx_pipeline_variant_key_equal(const void *a, const void *b)
67{
68   return !memcmp(a, b, sizeof(struct dzn_graphics_pipeline_variant_key));
69}
70
71static uint32_t
72gfx_pipeline_variant_key_hash(const void *key)
73{
74   return _mesa_hash_data(key, sizeof(struct dzn_graphics_pipeline_variant_key));
75}
76
77struct dzn_cached_blob {
78   struct vk_pipeline_cache_object base;
79   uint8_t hash[SHA1_DIGEST_LENGTH];
80   const void *data;
81   size_t size;
82};
83
84static bool
85dzn_cached_blob_serialize(struct vk_pipeline_cache_object *object,
86                          struct blob *blob)
87{
88   struct dzn_cached_blob *cached_blob =
89      container_of(object, struct dzn_cached_blob, base);
90
91   blob_write_bytes(blob, cached_blob->data, cached_blob->size);
92   return true;
93}
94
95static void
96dzn_cached_blob_destroy(struct vk_pipeline_cache_object *object)
97{
98   struct dzn_cached_blob *shader =
99      container_of(object, struct dzn_cached_blob, base);
100
101   vk_free(&shader->base.device->alloc, shader);
102}
103
104static struct vk_pipeline_cache_object *
105dzn_cached_blob_create(struct vk_device *device,
106                       const void *hash,
107                       const void *data,
108                       size_t data_size);
109
110static struct vk_pipeline_cache_object *
111dzn_cached_blob_deserialize(struct vk_device *device,
112                                   const void *key_data,
113                                   size_t key_size,
114                                   struct blob_reader *blob)
115{
116   size_t data_size = blob->end - blob->current;
117   assert(key_size == SHA1_DIGEST_LENGTH);
118
119   return dzn_cached_blob_create(device, key_data,
120                                 blob_read_bytes(blob, data_size),
121                                 data_size);
122}
123
124const struct vk_pipeline_cache_object_ops dzn_cached_blob_ops = {
125   .serialize = dzn_cached_blob_serialize,
126   .deserialize = dzn_cached_blob_deserialize,
127   .destroy = dzn_cached_blob_destroy,
128};
129
130
131static struct vk_pipeline_cache_object *
132dzn_cached_blob_create(struct vk_device *device,
133                       const void *hash,
134                       const void *data,
135                       size_t data_size)
136{
137   VK_MULTIALLOC(ma);
138   VK_MULTIALLOC_DECL(&ma, struct dzn_cached_blob, blob, 1);
139   VK_MULTIALLOC_DECL(&ma, uint8_t, copy, data_size);
140
141   if (!vk_multialloc_alloc(&ma, &device->alloc,
142                            VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
143      return NULL;
144
145   memcpy(blob->hash, hash, sizeof(blob->hash));
146
147   vk_pipeline_cache_object_init(device, &blob->base,
148                                 &dzn_cached_blob_ops,
149                                 blob->hash, sizeof(blob->hash));
150
151   if (data)
152      memcpy(copy, data, data_size);
153   blob->data = copy;
154   blob->size = data_size;
155
156   return &blob->base;
157}
158
159static VkResult
160dzn_graphics_pipeline_prepare_for_variants(struct dzn_device *device,
161                                           struct dzn_graphics_pipeline *pipeline)
162{
163   if (pipeline->variants)
164      return VK_SUCCESS;
165
166   pipeline->variants =
167      _mesa_hash_table_create(NULL,
168                              gfx_pipeline_variant_key_hash,
169                              gfx_pipeline_variant_key_equal);
170   if (!pipeline->variants)
171      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
172
173   return VK_SUCCESS;
174}
175
176static dxil_spirv_shader_stage
177to_dxil_shader_stage(VkShaderStageFlagBits in)
178{
179   switch (in) {
180   case VK_SHADER_STAGE_VERTEX_BIT: return DXIL_SPIRV_SHADER_VERTEX;
181   case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: return DXIL_SPIRV_SHADER_TESS_CTRL;
182   case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: return DXIL_SPIRV_SHADER_TESS_EVAL;
183   case VK_SHADER_STAGE_GEOMETRY_BIT: return DXIL_SPIRV_SHADER_GEOMETRY;
184   case VK_SHADER_STAGE_FRAGMENT_BIT: return DXIL_SPIRV_SHADER_FRAGMENT;
185   case VK_SHADER_STAGE_COMPUTE_BIT: return DXIL_SPIRV_SHADER_COMPUTE;
186   default: unreachable("Unsupported stage");
187   }
188}
189
190static VkResult
191dzn_pipeline_get_nir_shader(struct dzn_device *device,
192                            const struct dzn_pipeline_layout *layout,
193                            struct vk_pipeline_cache *cache,
194                            const uint8_t *hash,
195                            const VkPipelineShaderStageCreateInfo *stage_info,
196                            gl_shader_stage stage,
197                            enum dxil_spirv_yz_flip_mode yz_flip_mode,
198                            uint16_t y_flip_mask, uint16_t z_flip_mask,
199                            bool force_sample_rate_shading,
200                            enum pipe_format *vi_conversions,
201                            const nir_shader_compiler_options *nir_opts,
202                            nir_shader **nir)
203{
204   if (cache) {
205      *nir = vk_pipeline_cache_lookup_nir(cache, hash, SHA1_DIGEST_LENGTH,
206                                          nir_opts, NULL, NULL);
207       if (*nir)
208          return VK_SUCCESS;
209   }
210
211   VK_FROM_HANDLE(vk_shader_module, module, stage_info->module);
212   struct spirv_to_nir_options spirv_opts = {
213      .caps = {
214         .draw_parameters = true,
215      },
216      .ubo_addr_format = nir_address_format_32bit_index_offset,
217      .ssbo_addr_format = nir_address_format_32bit_index_offset,
218      .shared_addr_format = nir_address_format_32bit_offset_as_64bit,
219
220      /* use_deref_buffer_array_length + nir_lower_explicit_io force
221       * get_ssbo_size to take in the return from load_vulkan_descriptor
222       * instead of vulkan_resource_index. This makes it much easier to
223       * get the DXIL handle for the SSBO.
224       */
225      .use_deref_buffer_array_length = true
226   };
227
228   VkResult result =
229      vk_shader_module_to_nir(&device->vk, module, stage,
230                              stage_info->pName, stage_info->pSpecializationInfo,
231                              &spirv_opts, nir_opts, NULL, nir);
232   if (result != VK_SUCCESS)
233      return result;
234
235   struct dxil_spirv_runtime_conf conf = {
236      .runtime_data_cbv = {
237         .register_space = DZN_REGISTER_SPACE_SYSVALS,
238         .base_shader_register = 0,
239      },
240      .push_constant_cbv = {
241         .register_space = DZN_REGISTER_SPACE_PUSH_CONSTANT,
242         .base_shader_register = 0,
243      },
244      .zero_based_vertex_instance_id = false,
245      .yz_flip = {
246         .mode = yz_flip_mode,
247         .y_mask = y_flip_mask,
248         .z_mask = z_flip_mask,
249      },
250      .read_only_images_as_srvs = true,
251      .force_sample_rate_shading = force_sample_rate_shading,
252   };
253
254   bool requires_runtime_data;
255   dxil_spirv_nir_passes(*nir, &conf, &requires_runtime_data);
256
257   if (stage == MESA_SHADER_VERTEX) {
258      bool needs_conv = false;
259      for (uint32_t i = 0; i < MAX_VERTEX_GENERIC_ATTRIBS; i++) {
260         if (vi_conversions[i] != PIPE_FORMAT_NONE)
261            needs_conv = true;
262      }
263
264      if (needs_conv)
265         NIR_PASS_V(*nir, dxil_nir_lower_vs_vertex_conversion, vi_conversions);
266   }
267
268   if (cache)
269      vk_pipeline_cache_add_nir(cache, hash, SHA1_DIGEST_LENGTH, *nir);
270
271   return VK_SUCCESS;
272}
273
274static bool
275adjust_resource_index_binding(struct nir_builder *builder, nir_instr *instr,
276                              void *cb_data)
277{
278   if (instr->type != nir_instr_type_intrinsic)
279      return false;
280
281   nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
282
283   if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
284      return false;
285
286   const struct dzn_pipeline_layout *layout = cb_data;
287   unsigned set = nir_intrinsic_desc_set(intrin);
288   unsigned binding = nir_intrinsic_binding(intrin);
289
290   if (set >= layout->set_count ||
291       binding >= layout->binding_translation[set].binding_count)
292      return false;
293
294   binding = layout->binding_translation[set].base_reg[binding];
295   nir_intrinsic_set_binding(intrin, binding);
296
297   return true;
298}
299
300static bool
301adjust_var_bindings(nir_shader *shader,
302                    const struct dzn_pipeline_layout *layout,
303                    uint8_t *bindings_hash)
304{
305   uint32_t modes = nir_var_image | nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo;
306   struct mesa_sha1 bindings_hash_ctx;
307
308   if (bindings_hash)
309      _mesa_sha1_init(&bindings_hash_ctx);
310
311   nir_foreach_variable_with_modes(var, shader, modes) {
312      if (var->data.mode == nir_var_uniform) {
313         const struct glsl_type *type = glsl_without_array(var->type);
314
315         if (!glsl_type_is_sampler(type) && !glsl_type_is_texture(type))
316            continue;
317      }
318
319      unsigned s = var->data.descriptor_set, b = var->data.binding;
320
321      if (s >= layout->set_count)
322         continue;
323
324      assert(b < layout->binding_translation[s].binding_count);
325      var->data.binding = layout->binding_translation[s].base_reg[b];
326
327      if (bindings_hash) {
328         _mesa_sha1_update(&bindings_hash_ctx, &s, sizeof(s));
329         _mesa_sha1_update(&bindings_hash_ctx, &b, sizeof(b));
330         _mesa_sha1_update(&bindings_hash_ctx, &var->data.binding, sizeof(var->data.binding));
331      }
332   }
333
334   if (bindings_hash)
335      _mesa_sha1_final(&bindings_hash_ctx, bindings_hash);
336
337   return nir_shader_instructions_pass(shader, adjust_resource_index_binding,
338                                       nir_metadata_all, (void *)layout);
339}
340
341static VkResult
342dzn_pipeline_compile_shader(struct dzn_device *device,
343                            nir_shader *nir,
344                            D3D12_SHADER_BYTECODE *slot)
345{
346   struct dzn_instance *instance =
347      container_of(device->vk.physical->instance, struct dzn_instance, vk);
348   struct nir_to_dxil_options opts = {
349      .environment = DXIL_ENVIRONMENT_VULKAN,
350      .shader_model_max = SHADER_MODEL_6_2,
351#ifdef _WIN32
352      .validator_version_max = dxil_get_validator_version(instance->dxil_validator),
353#endif
354   };
355   struct blob dxil_blob;
356   VkResult result = VK_SUCCESS;
357
358   if (instance->debug_flags & DZN_DEBUG_NIR)
359      nir_print_shader(nir, stderr);
360
361   if (nir_to_dxil(nir, &opts, &dxil_blob)) {
362      blob_finish_get_buffer(&dxil_blob, (void **)&slot->pShaderBytecode,
363                             (size_t *)&slot->BytecodeLength);
364   } else {
365      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
366   }
367
368   if (dxil_blob.allocated)
369      blob_finish(&dxil_blob);
370
371   if (result != VK_SUCCESS)
372      return result;
373
374#ifdef _WIN32
375   char *err;
376   bool res = dxil_validate_module(instance->dxil_validator,
377                                   (void *)slot->pShaderBytecode,
378                                   slot->BytecodeLength, &err);
379
380   if (instance->debug_flags & DZN_DEBUG_DXIL) {
381      char *disasm = dxil_disasm_module(instance->dxil_validator,
382                                        (void *)slot->pShaderBytecode,
383                                        slot->BytecodeLength);
384      if (disasm) {
385         fprintf(stderr,
386                 "== BEGIN SHADER ============================================\n"
387                 "%s\n"
388                 "== END SHADER ==============================================\n",
389                  disasm);
390         ralloc_free(disasm);
391      }
392   }
393
394   if (!res) {
395      if (err) {
396         fprintf(stderr,
397               "== VALIDATION ERROR =============================================\n"
398               "%s\n"
399               "== END ==========================================================\n",
400               err);
401         ralloc_free(err);
402      }
403      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
404   }
405#endif
406
407   return VK_SUCCESS;
408}
409
410static D3D12_SHADER_BYTECODE *
411dzn_pipeline_get_gfx_shader_slot(D3D12_PIPELINE_STATE_STREAM_DESC *stream,
412                                 gl_shader_stage in)
413{
414   switch (in) {
415   case MESA_SHADER_VERTEX: {
416      d3d12_gfx_pipeline_state_stream_new_desc(stream, VS, D3D12_SHADER_BYTECODE, desc);
417      return desc;
418   }
419   case MESA_SHADER_TESS_CTRL: {
420      d3d12_gfx_pipeline_state_stream_new_desc(stream, DS, D3D12_SHADER_BYTECODE, desc);
421      return desc;
422   }
423   case MESA_SHADER_TESS_EVAL: {
424      d3d12_gfx_pipeline_state_stream_new_desc(stream, HS, D3D12_SHADER_BYTECODE, desc);
425      return desc;
426   }
427   case MESA_SHADER_GEOMETRY: {
428      d3d12_gfx_pipeline_state_stream_new_desc(stream, GS, D3D12_SHADER_BYTECODE, desc);
429      return desc;
430   }
431   case MESA_SHADER_FRAGMENT: {
432      d3d12_gfx_pipeline_state_stream_new_desc(stream, PS, D3D12_SHADER_BYTECODE, desc);
433      return desc;
434   }
435   default: unreachable("Unsupported stage");
436   }
437}
438
439struct dzn_cached_dxil_shader_header {
440   gl_shader_stage stage;
441   size_t size;
442   uint8_t data[0];
443};
444
445static VkResult
446dzn_pipeline_cache_lookup_dxil_shader(struct vk_pipeline_cache *cache,
447                                      const uint8_t *dxil_hash,
448                                      gl_shader_stage *stage,
449                                      D3D12_SHADER_BYTECODE *bc)
450{
451   *stage = MESA_SHADER_NONE;
452
453   if (!cache)
454      return VK_SUCCESS;
455
456   struct vk_pipeline_cache_object *cache_obj = NULL;
457
458   cache_obj =
459      vk_pipeline_cache_lookup_object(cache, dxil_hash, SHA1_DIGEST_LENGTH,
460                                      &dzn_cached_blob_ops,
461                                      NULL);
462   if (!cache_obj)
463      return VK_SUCCESS;
464
465   struct dzn_cached_blob *cached_blob =
466      container_of(cache_obj, struct dzn_cached_blob, base);
467   VkResult ret = VK_SUCCESS;
468
469   assert(sizeof(struct dzn_cached_dxil_shader_header) <= cached_blob->size);
470
471   const struct dzn_cached_dxil_shader_header *info =
472      (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
473
474   assert(sizeof(struct dzn_cached_dxil_shader_header) + info->size <= cached_blob->size);
475   assert(info->stage > MESA_SHADER_NONE && info->stage < MESA_VULKAN_SHADER_STAGES);
476   assert(info->size > 0);
477
478   void *code = malloc(info->size);
479   if (!code) {
480      ret = vk_error(cache->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
481      goto out;
482   }
483
484   memcpy(code, info->data, info->size);
485
486   bc->pShaderBytecode = code;
487   bc->BytecodeLength = info->size;
488   *stage = info->stage;
489
490out:
491   vk_pipeline_cache_object_unref(cache_obj);
492   return ret;
493}
494
495static void
496dzn_pipeline_cache_add_dxil_shader(struct vk_pipeline_cache *cache,
497                                   const uint8_t *dxil_hash,
498                                   gl_shader_stage stage,
499                                   const D3D12_SHADER_BYTECODE *bc)
500{
501   size_t size = sizeof(struct dzn_cached_dxil_shader_header) +
502                 bc->BytecodeLength;
503
504   struct vk_pipeline_cache_object *cache_obj =
505      dzn_cached_blob_create(cache->base.device, dxil_hash, NULL, size);
506   if (!cache_obj)
507      return;
508
509   struct dzn_cached_blob *cached_blob =
510      container_of(cache_obj, struct dzn_cached_blob, base);
511   struct dzn_cached_dxil_shader_header *info =
512      (struct dzn_cached_dxil_shader_header *)(cached_blob->data);
513   info->stage = stage;
514   info->size = bc->BytecodeLength;
515   memcpy(info->data, bc->pShaderBytecode, bc->BytecodeLength);
516
517   cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
518   vk_pipeline_cache_object_unref(cache_obj);
519}
520
521struct dzn_cached_gfx_pipeline_header {
522   uint32_t stages;
523   uint32_t input_count;
524};
525
526static VkResult
527dzn_pipeline_cache_lookup_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
528                                       struct vk_pipeline_cache *cache,
529                                       const uint8_t *pipeline_hash,
530                                       bool *cache_hit)
531{
532   *cache_hit = false;
533
534   if (!cache)
535      return VK_SUCCESS;
536
537   struct vk_pipeline_cache_object *cache_obj = NULL;
538
539   cache_obj =
540      vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
541                                      &dzn_cached_blob_ops,
542                                      NULL);
543   if (!cache_obj)
544      return VK_SUCCESS;
545
546   struct dzn_cached_blob *cached_blob =
547      container_of(cache_obj, struct dzn_cached_blob, base);
548   D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc =
549      &pipeline->templates.stream_desc;
550
551   const struct dzn_cached_gfx_pipeline_header *info =
552      (const struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
553   size_t offset = sizeof(*info);
554
555   assert(cached_blob->size >= sizeof(*info));
556
557   if (info->input_count > 0) {
558      offset = ALIGN_POT(offset, alignof(D3D12_INPUT_LAYOUT_DESC));
559      const D3D12_INPUT_ELEMENT_DESC *inputs =
560         (const D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
561
562      assert(cached_blob->size >= offset + sizeof(*inputs) * info->input_count);
563
564      memcpy(pipeline->templates.inputs, inputs,
565             sizeof(*inputs) * info->input_count);
566      d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
567      desc->pInputElementDescs = pipeline->templates.inputs;
568      desc->NumElements = info->input_count;
569      offset += sizeof(*inputs) * info->input_count;
570   }
571
572   assert(cached_blob->size == offset + util_bitcount(info->stages) * SHA1_DIGEST_LENGTH);
573
574   u_foreach_bit(s, info->stages) {
575      uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
576      gl_shader_stage stage;
577
578      D3D12_SHADER_BYTECODE *slot =
579         dzn_pipeline_get_gfx_shader_slot(stream_desc, s);
580
581      VkResult ret =
582         dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, slot);
583      if (ret != VK_SUCCESS)
584         return ret;
585
586      assert(stage == s);
587      offset += SHA1_DIGEST_LENGTH;
588   }
589
590   *cache_hit = true;
591
592   vk_pipeline_cache_object_unref(cache_obj);
593   return VK_SUCCESS;
594}
595
596static void
597dzn_pipeline_cache_add_gfx_pipeline(struct dzn_graphics_pipeline *pipeline,
598                                    struct vk_pipeline_cache *cache,
599                                    uint32_t vertex_input_count,
600                                    const uint8_t *pipeline_hash,
601                                    const uint8_t *const *dxil_hashes)
602{
603   size_t offset =
604      ALIGN_POT(sizeof(struct dzn_cached_gfx_pipeline_header), alignof(D3D12_INPUT_ELEMENT_DESC)) +
605      (sizeof(D3D12_INPUT_ELEMENT_DESC) * vertex_input_count);
606   uint32_t stages = 0;
607
608   for (uint32_t i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) {
609      if (pipeline->templates.shaders[i].bc) {
610         stages |= BITFIELD_BIT(i);
611         offset += SHA1_DIGEST_LENGTH;
612      }
613   }
614
615   struct vk_pipeline_cache_object *cache_obj =
616      dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, offset);
617   if (!cache_obj)
618      return;
619
620   struct dzn_cached_blob *cached_blob =
621      container_of(cache_obj, struct dzn_cached_blob, base);
622
623   offset = 0;
624   struct dzn_cached_gfx_pipeline_header *info =
625      (struct dzn_cached_gfx_pipeline_header *)(cached_blob->data);
626
627   info->input_count = vertex_input_count;
628   info->stages = stages;
629
630   offset = ALIGN_POT(offset + sizeof(*info), alignof(D3D12_INPUT_ELEMENT_DESC));
631
632   D3D12_INPUT_ELEMENT_DESC *inputs =
633      (D3D12_INPUT_ELEMENT_DESC *)((uint8_t *)cached_blob->data + offset);
634   memcpy(inputs, pipeline->templates.inputs,
635          sizeof(*inputs) * vertex_input_count);
636   offset += sizeof(*inputs) * vertex_input_count;
637
638   u_foreach_bit(s, stages) {
639      uint8_t *dxil_hash = (uint8_t *)cached_blob->data + offset;
640
641      memcpy(dxil_hash, dxil_hashes[s], SHA1_DIGEST_LENGTH);
642      offset += SHA1_DIGEST_LENGTH;
643   }
644
645   cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
646   vk_pipeline_cache_object_unref(cache_obj);
647}
648
649static void
650dzn_graphics_pipeline_hash_attribs(D3D12_INPUT_ELEMENT_DESC *attribs,
651                                   enum pipe_format *vi_conversions,
652                                   uint8_t *result)
653{
654   struct mesa_sha1 ctx;
655
656   _mesa_sha1_init(&ctx);
657   _mesa_sha1_update(&ctx, attribs, sizeof(*attribs) * MAX_VERTEX_GENERIC_ATTRIBS);
658   _mesa_sha1_update(&ctx, vi_conversions, sizeof(*vi_conversions) * MAX_VERTEX_GENERIC_ATTRIBS);
659   _mesa_sha1_final(&ctx, result);
660}
661
662static VkResult
663dzn_graphics_pipeline_compile_shaders(struct dzn_device *device,
664                                      struct dzn_graphics_pipeline *pipeline,
665                                      struct vk_pipeline_cache *cache,
666                                      const struct dzn_pipeline_layout *layout,
667                                      D3D12_PIPELINE_STATE_STREAM_DESC *out,
668                                      D3D12_INPUT_ELEMENT_DESC *attribs,
669                                      enum pipe_format *vi_conversions,
670                                      const VkGraphicsPipelineCreateInfo *info)
671{
672   const VkPipelineViewportStateCreateInfo *vp_info =
673      info->pRasterizationState->rasterizerDiscardEnable ?
674      NULL : info->pViewportState;
675   struct {
676      const VkPipelineShaderStageCreateInfo *info;
677      uint8_t spirv_hash[SHA1_DIGEST_LENGTH];
678      uint8_t dxil_hash[SHA1_DIGEST_LENGTH];
679   } stages[MESA_VULKAN_SHADER_STAGES] = { 0 };
680   const uint8_t *dxil_hashes[MESA_VULKAN_SHADER_STAGES] = { 0 };
681   uint8_t attribs_hash[SHA1_DIGEST_LENGTH];
682   uint8_t pipeline_hash[SHA1_DIGEST_LENGTH];
683   gl_shader_stage yz_flip_stage = MESA_SHADER_NONE;
684   uint32_t active_stage_mask = 0;
685   VkResult ret;
686
687   /* First step: collect stage info in a table indexed by gl_shader_stage
688    * so we can iterate over stages in pipeline order or reverse pipeline
689    * order.
690    */
691   for (uint32_t i = 0; i < info->stageCount; i++) {
692      gl_shader_stage stage =
693         vk_to_mesa_shader_stage(info->pStages[i].stage);
694
695      assert(stage <= MESA_SHADER_FRAGMENT);
696
697      if ((stage == MESA_SHADER_VERTEX ||
698           stage == MESA_SHADER_TESS_EVAL ||
699           stage == MESA_SHADER_GEOMETRY) &&
700          yz_flip_stage < stage)
701         yz_flip_stage = stage;
702
703      if (stage == MESA_SHADER_FRAGMENT &&
704          info->pRasterizationState &&
705          (info->pRasterizationState->rasterizerDiscardEnable ||
706           info->pRasterizationState->cullMode == VK_CULL_MODE_FRONT_AND_BACK)) {
707         /* Disable rasterization (AKA leave fragment shader NULL) when
708          * front+back culling or discard is set.
709          */
710         continue;
711      }
712
713      stages[stage].info = &info->pStages[i];
714      active_stage_mask |= BITFIELD_BIT(stage);
715   }
716
717   enum dxil_spirv_yz_flip_mode yz_flip_mode = DXIL_SPIRV_YZ_FLIP_NONE;
718   uint16_t y_flip_mask = 0, z_flip_mask = 0;
719
720   if (pipeline->vp.dynamic) {
721      yz_flip_mode = DXIL_SPIRV_YZ_FLIP_CONDITIONAL;
722   } else if (vp_info) {
723      for (uint32_t i = 0; vp_info->pViewports && i < vp_info->viewportCount; i++) {
724         if (vp_info->pViewports[i].height > 0)
725            y_flip_mask |= BITFIELD_BIT(i);
726
727         if (vp_info->pViewports[i].minDepth > vp_info->pViewports[i].maxDepth)
728            z_flip_mask |= BITFIELD_BIT(i);
729      }
730
731      if (y_flip_mask && z_flip_mask)
732         yz_flip_mode = DXIL_SPIRV_YZ_FLIP_UNCONDITIONAL;
733      else if (z_flip_mask)
734         yz_flip_mode = DXIL_SPIRV_Z_FLIP_UNCONDITIONAL;
735      else if (y_flip_mask)
736         yz_flip_mode = DXIL_SPIRV_Y_FLIP_UNCONDITIONAL;
737   }
738
739   bool force_sample_rate_shading =
740      info->pMultisampleState &&
741      info->pMultisampleState->sampleShadingEnable;
742
743   if (cache) {
744      dzn_graphics_pipeline_hash_attribs(attribs, vi_conversions, attribs_hash);
745
746      struct mesa_sha1 pipeline_hash_ctx;
747
748      _mesa_sha1_init(&pipeline_hash_ctx);
749      _mesa_sha1_update(&pipeline_hash_ctx, attribs_hash, sizeof(attribs_hash));
750      _mesa_sha1_update(&pipeline_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
751      _mesa_sha1_update(&pipeline_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
752      _mesa_sha1_update(&pipeline_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
753      _mesa_sha1_update(&pipeline_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
754
755      u_foreach_bit(stage, active_stage_mask) {
756         vk_pipeline_hash_shader_stage(stages[stage].info, stages[stage].spirv_hash);
757         _mesa_sha1_update(&pipeline_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
758         _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[stage].hash, sizeof(layout->stages[stage].hash));
759      }
760      _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
761
762      bool cache_hit;
763      ret = dzn_pipeline_cache_lookup_gfx_pipeline(pipeline, cache, pipeline_hash,
764                                                   &cache_hit);
765      if (ret != VK_SUCCESS)
766         return ret;
767
768      if (cache_hit)
769         return VK_SUCCESS;
770   }
771
772   /* Second step: get NIR shaders for all stages. */
773   nir_shader_compiler_options nir_opts = *dxil_get_nir_compiler_options();
774   nir_opts.lower_base_vertex = true;
775   u_foreach_bit(stage, active_stage_mask) {
776      struct mesa_sha1 nir_hash_ctx;
777      uint8_t nir_hash[SHA1_DIGEST_LENGTH];
778
779      if (cache) {
780         _mesa_sha1_init(&nir_hash_ctx);
781         if (stage == MESA_SHADER_VERTEX)
782            _mesa_sha1_update(&nir_hash_ctx, attribs_hash, sizeof(attribs_hash));
783         if (stage == yz_flip_stage) {
784            _mesa_sha1_update(&nir_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
785            _mesa_sha1_update(&nir_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
786            _mesa_sha1_update(&nir_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
787         }
788         _mesa_sha1_update(&nir_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
789         _mesa_sha1_final(&nir_hash_ctx, nir_hash);
790      }
791
792      ret = dzn_pipeline_get_nir_shader(device, layout,
793                                        cache, nir_hash,
794                                        stages[stage].info, stage,
795                                        stage == yz_flip_stage ? yz_flip_mode : DXIL_SPIRV_YZ_FLIP_NONE,
796                                        y_flip_mask, z_flip_mask,
797                                        stage == MESA_SHADER_FRAGMENT ? force_sample_rate_shading : false,
798                                        vi_conversions,
799                                        &nir_opts, &pipeline->templates.shaders[stage].nir);
800      if (ret != VK_SUCCESS)
801         return ret;
802   }
803
804   /* Third step: link those NIR shaders. We iterate in reverse order
805    * so we can eliminate outputs that are never read by the next stage.
806    */
807   uint32_t link_mask = active_stage_mask;
808   while (link_mask != 0) {
809      gl_shader_stage stage = util_last_bit(link_mask) - 1;
810      link_mask &= ~BITFIELD_BIT(stage);
811      gl_shader_stage prev_stage = util_last_bit(link_mask) - 1;
812
813      assert(pipeline->templates.shaders[stage].nir);
814      dxil_spirv_nir_link(pipeline->templates.shaders[stage].nir,
815                          prev_stage != MESA_SHADER_NONE ?
816                          pipeline->templates.shaders[prev_stage].nir : NULL);
817   }
818
819   u_foreach_bit(stage, active_stage_mask) {
820      uint8_t bindings_hash[SHA1_DIGEST_LENGTH];
821
822      NIR_PASS_V(pipeline->templates.shaders[stage].nir, adjust_var_bindings, layout,
823                 cache ? bindings_hash : NULL);
824
825      if (cache) {
826         struct mesa_sha1 dxil_hash_ctx;
827
828         _mesa_sha1_init(&dxil_hash_ctx);
829
830         if (stage == MESA_SHADER_VERTEX)
831            _mesa_sha1_update(&dxil_hash_ctx, attribs_hash, sizeof(attribs_hash));
832
833         if (stage == yz_flip_stage) {
834            _mesa_sha1_update(&dxil_hash_ctx, &yz_flip_mode, sizeof(yz_flip_mode));
835            _mesa_sha1_update(&dxil_hash_ctx, &y_flip_mask, sizeof(y_flip_mask));
836            _mesa_sha1_update(&dxil_hash_ctx, &z_flip_mask, sizeof(z_flip_mask));
837         }
838
839         if (stage == MESA_SHADER_FRAGMENT)
840            _mesa_sha1_update(&dxil_hash_ctx, &force_sample_rate_shading, sizeof(force_sample_rate_shading));
841
842         _mesa_sha1_update(&dxil_hash_ctx, stages[stage].spirv_hash, sizeof(stages[stage].spirv_hash));
843         _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
844         _mesa_sha1_final(&dxil_hash_ctx, stages[stage].dxil_hash);
845         dxil_hashes[stage] = stages[stage].dxil_hash;
846
847         gl_shader_stage cached_stage;
848         D3D12_SHADER_BYTECODE bc;
849         ret = dzn_pipeline_cache_lookup_dxil_shader(cache, stages[stage].dxil_hash, &cached_stage, &bc);
850         if (ret != VK_SUCCESS)
851            return ret;
852
853         if (cached_stage != MESA_SHADER_NONE) {
854            assert(cached_stage == stage);
855            D3D12_SHADER_BYTECODE *slot =
856               dzn_pipeline_get_gfx_shader_slot(out, stage);
857            *slot = bc;
858            pipeline->templates.shaders[stage].bc = slot;
859         }
860      }
861   }
862
863   uint32_t vert_input_count = 0;
864   if (pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
865      /* Now, declare one D3D12_INPUT_ELEMENT_DESC per VS input variable, so
866       * we can handle location overlaps properly.
867       */
868      nir_foreach_shader_in_variable(var, pipeline->templates.shaders[MESA_SHADER_VERTEX].nir) {
869         assert(var->data.location >= VERT_ATTRIB_GENERIC0);
870         unsigned loc = var->data.location - VERT_ATTRIB_GENERIC0;
871         assert(vert_input_count < D3D12_VS_INPUT_REGISTER_COUNT);
872         assert(loc < MAX_VERTEX_GENERIC_ATTRIBS);
873
874         pipeline->templates.inputs[vert_input_count] = attribs[loc];
875         pipeline->templates.inputs[vert_input_count].SemanticIndex = vert_input_count;
876         var->data.driver_location = vert_input_count++;
877      }
878
879      if (vert_input_count > 0) {
880         d3d12_gfx_pipeline_state_stream_new_desc(out, INPUT_LAYOUT, D3D12_INPUT_LAYOUT_DESC, desc);
881         desc->pInputElementDescs = pipeline->templates.inputs;
882         desc->NumElements = vert_input_count;
883      }
884   }
885
886   /* Last step: translate NIR shaders into DXIL modules */
887   u_foreach_bit(stage, active_stage_mask) {
888      /* Cache hit, we can skip the compilation. */
889      if (pipeline->templates.shaders[stage].bc)
890         continue;
891
892      if (stage == MESA_SHADER_FRAGMENT) {
893         gl_shader_stage prev_stage =
894            util_last_bit(active_stage_mask & BITFIELD_MASK(MESA_SHADER_FRAGMENT)) - 1;
895         /* Disable rasterization if the last geometry stage doesn't
896          * write the position.
897          */
898         if (prev_stage == MESA_SHADER_NONE ||
899             !(pipeline->templates.shaders[prev_stage].nir->info.outputs_written & VARYING_BIT_POS))
900            continue;
901      }
902
903      D3D12_SHADER_BYTECODE *slot =
904         dzn_pipeline_get_gfx_shader_slot(out, stage);
905
906      ret = dzn_pipeline_compile_shader(device, pipeline->templates.shaders[stage].nir, slot);
907      if (ret != VK_SUCCESS)
908         return ret;
909
910      pipeline->templates.shaders[stage].bc = slot;
911
912      if (cache)
913         dzn_pipeline_cache_add_dxil_shader(cache, stages[stage].dxil_hash, stage, slot);
914   }
915
916   if (cache)
917      dzn_pipeline_cache_add_gfx_pipeline(pipeline, cache, vert_input_count, pipeline_hash,
918                                          dxil_hashes);
919
920   return VK_SUCCESS;
921}
922
923VkFormat
924dzn_graphics_pipeline_patch_vi_format(VkFormat format)
925{
926   switch (format) {
927   case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
928   case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
929   case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
930   case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
931   case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
932   case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
933   case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
934      return VK_FORMAT_R32_UINT;
935   case VK_FORMAT_R8G8B8A8_SSCALED:
936      return VK_FORMAT_R8G8B8A8_SINT;
937   case VK_FORMAT_R8G8B8A8_USCALED:
938      return VK_FORMAT_R8G8B8A8_UINT;
939   case VK_FORMAT_R16G16B16A16_USCALED:
940      return VK_FORMAT_R16G16B16A16_UINT;
941   case VK_FORMAT_R16G16B16A16_SSCALED:
942      return VK_FORMAT_R16G16B16A16_SINT;
943   default:
944      return format;
945   }
946}
947
948static VkResult
949dzn_graphics_pipeline_translate_vi(struct dzn_graphics_pipeline *pipeline,
950                                   const VkGraphicsPipelineCreateInfo *in,
951                                   D3D12_INPUT_ELEMENT_DESC *inputs,
952                                   enum pipe_format *vi_conversions)
953{
954   const VkPipelineVertexInputStateCreateInfo *in_vi =
955      in->pVertexInputState;
956   const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisors =
957      (const VkPipelineVertexInputDivisorStateCreateInfoEXT *)
958      vk_find_struct_const(in_vi, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
959
960   if (!in_vi->vertexAttributeDescriptionCount)
961      return VK_SUCCESS;
962
963   D3D12_INPUT_CLASSIFICATION slot_class[MAX_VBS];
964
965   pipeline->vb.count = 0;
966   for (uint32_t i = 0; i < in_vi->vertexBindingDescriptionCount; i++) {
967      const struct VkVertexInputBindingDescription *bdesc =
968         &in_vi->pVertexBindingDescriptions[i];
969
970      pipeline->vb.count = MAX2(pipeline->vb.count, bdesc->binding + 1);
971      pipeline->vb.strides[bdesc->binding] = bdesc->stride;
972      if (bdesc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
973         slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA;
974      } else {
975         assert(bdesc->inputRate == VK_VERTEX_INPUT_RATE_VERTEX);
976         slot_class[bdesc->binding] = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
977      }
978   }
979
980   for (uint32_t i = 0; i < in_vi->vertexAttributeDescriptionCount; i++) {
981      const VkVertexInputAttributeDescription *attr =
982         &in_vi->pVertexAttributeDescriptions[i];
983      const VkVertexInputBindingDivisorDescriptionEXT *divisor = NULL;
984
985      if (slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA &&
986          divisors) {
987         for (uint32_t d = 0; d < divisors->vertexBindingDivisorCount; d++) {
988            if (attr->binding == divisors->pVertexBindingDivisors[d].binding) {
989               divisor = &divisors->pVertexBindingDivisors[d];
990               break;
991            }
992         }
993      }
994
995      VkFormat patched_format = dzn_graphics_pipeline_patch_vi_format(attr->format);
996      if (patched_format != attr->format)
997         vi_conversions[attr->location] = vk_format_to_pipe_format(attr->format);
998
999      /* nir_to_dxil() name all vertex inputs as TEXCOORDx */
1000      inputs[attr->location] = (D3D12_INPUT_ELEMENT_DESC) {
1001         .SemanticName = "TEXCOORD",
1002         .Format = dzn_buffer_get_dxgi_format(patched_format),
1003         .InputSlot = attr->binding,
1004         .InputSlotClass = slot_class[attr->binding],
1005         .InstanceDataStepRate =
1006            divisor ? divisor->divisor :
1007            slot_class[attr->binding] == D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA ? 1 : 0,
1008         .AlignedByteOffset = attr->offset,
1009      };
1010   }
1011
1012   return VK_SUCCESS;
1013}
1014
1015static D3D12_PRIMITIVE_TOPOLOGY_TYPE
1016to_prim_topology_type(VkPrimitiveTopology in)
1017{
1018   switch (in) {
1019   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
1020      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
1021   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
1022   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
1023   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
1024   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
1025      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
1026   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
1027   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
1028   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
1029   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
1030   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
1031      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
1032   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1033      return D3D12_PRIMITIVE_TOPOLOGY_TYPE_PATCH;
1034   default: unreachable("Invalid primitive topology");
1035   }
1036}
1037
1038static D3D12_PRIMITIVE_TOPOLOGY
1039to_prim_topology(VkPrimitiveTopology in, unsigned patch_control_points)
1040{
1041   switch (in) {
1042   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: return D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
1043   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: return D3D_PRIMITIVE_TOPOLOGY_LINELIST;
1044   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
1045   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINELIST_ADJ;
1046   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ;
1047   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1048   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
1049   /* Triangle fans are emulated using an intermediate index buffer. */
1050   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
1051   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ;
1052   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY: return D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ;
1053   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
1054      assert(patch_control_points);
1055      return (D3D12_PRIMITIVE_TOPOLOGY)(D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST + patch_control_points - 1);
1056   default: unreachable("Invalid primitive topology");
1057   }
1058}
1059
1060static VkResult
1061dzn_graphics_pipeline_translate_ia(struct dzn_device *device,
1062                                   struct dzn_graphics_pipeline *pipeline,
1063                                   D3D12_PIPELINE_STATE_STREAM_DESC *out,
1064                                   const VkGraphicsPipelineCreateInfo *in)
1065{
1066   const VkPipelineInputAssemblyStateCreateInfo *in_ia =
1067      in->pInputAssemblyState;
1068   bool has_tes = false;
1069   for (uint32_t i = 0; i < in->stageCount; i++) {
1070      if (in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT ||
1071          in->pStages[i].stage == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
1072         has_tes = true;
1073         break;
1074      }
1075   }
1076   const VkPipelineTessellationStateCreateInfo *in_tes =
1077      has_tes ? in->pTessellationState : NULL;
1078   VkResult ret = VK_SUCCESS;
1079
1080   d3d12_gfx_pipeline_state_stream_new_desc(out, PRIMITIVE_TOPOLOGY, D3D12_PRIMITIVE_TOPOLOGY_TYPE, prim_top_type);
1081   *prim_top_type = to_prim_topology_type(in_ia->topology);
1082   pipeline->ia.triangle_fan = in_ia->topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
1083   pipeline->ia.topology =
1084      to_prim_topology(in_ia->topology, in_tes ? in_tes->patchControlPoints : 0);
1085
1086   if (in_ia->primitiveRestartEnable) {
1087      d3d12_gfx_pipeline_state_stream_new_desc(out, IB_STRIP_CUT_VALUE, D3D12_INDEX_BUFFER_STRIP_CUT_VALUE, ib_strip_cut);
1088      pipeline->templates.desc_offsets.ib_strip_cut =
1089         (uintptr_t)ib_strip_cut - (uintptr_t)out->pPipelineStateSubobjectStream;
1090      *ib_strip_cut = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
1091      ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1092   }
1093
1094   return ret;
1095}
1096
1097static D3D12_FILL_MODE
1098translate_polygon_mode(VkPolygonMode in)
1099{
1100   switch (in) {
1101   case VK_POLYGON_MODE_FILL: return D3D12_FILL_MODE_SOLID;
1102   case VK_POLYGON_MODE_LINE: return D3D12_FILL_MODE_WIREFRAME;
1103   default: unreachable("Unsupported polygon mode");
1104   }
1105}
1106
1107static D3D12_CULL_MODE
1108translate_cull_mode(VkCullModeFlags in)
1109{
1110   switch (in) {
1111   case VK_CULL_MODE_NONE: return D3D12_CULL_MODE_NONE;
1112   case VK_CULL_MODE_FRONT_BIT: return D3D12_CULL_MODE_FRONT;
1113   case VK_CULL_MODE_BACK_BIT: return D3D12_CULL_MODE_BACK;
1114   /* Front+back face culling is equivalent to 'rasterization disabled' */
1115   case VK_CULL_MODE_FRONT_AND_BACK: return D3D12_CULL_MODE_NONE;
1116   default: unreachable("Unsupported cull mode");
1117   }
1118}
1119
1120static int32_t
1121translate_depth_bias(double depth_bias)
1122{
1123   if (depth_bias > INT32_MAX)
1124      return INT32_MAX;
1125   else if (depth_bias < INT32_MIN)
1126      return INT32_MIN;
1127
1128   return depth_bias;
1129}
1130
1131static void
1132dzn_graphics_pipeline_translate_rast(struct dzn_graphics_pipeline *pipeline,
1133                                     D3D12_PIPELINE_STATE_STREAM_DESC *out,
1134                                     const VkGraphicsPipelineCreateInfo *in)
1135{
1136   const VkPipelineRasterizationStateCreateInfo *in_rast =
1137      in->pRasterizationState;
1138   const VkPipelineViewportStateCreateInfo *in_vp =
1139      in_rast->rasterizerDiscardEnable ? NULL : in->pViewportState;
1140
1141   if (in_vp) {
1142      pipeline->vp.count = in_vp->viewportCount;
1143      if (in_vp->pViewports) {
1144         for (uint32_t i = 0; in_vp->pViewports && i < in_vp->viewportCount; i++)
1145            dzn_translate_viewport(&pipeline->vp.desc[i], &in_vp->pViewports[i]);
1146      }
1147
1148      pipeline->scissor.count = in_vp->scissorCount;
1149      if (in_vp->pScissors) {
1150         for (uint32_t i = 0; i < in_vp->scissorCount; i++)
1151            dzn_translate_rect(&pipeline->scissor.desc[i], &in_vp->pScissors[i]);
1152      }
1153   }
1154
1155   d3d12_gfx_pipeline_state_stream_new_desc(out, RASTERIZER, D3D12_RASTERIZER_DESC, desc);
1156   pipeline->templates.desc_offsets.rast =
1157      (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1158   desc->DepthClipEnable = !in_rast->depthClampEnable;
1159   desc->FillMode = translate_polygon_mode(in_rast->polygonMode);
1160   desc->CullMode = translate_cull_mode(in_rast->cullMode);
1161   desc->FrontCounterClockwise =
1162      in_rast->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE;
1163   if (in_rast->depthBiasEnable) {
1164      desc->DepthBias = translate_depth_bias(in_rast->depthBiasConstantFactor);
1165      desc->SlopeScaledDepthBias = in_rast->depthBiasSlopeFactor;
1166      desc->DepthBiasClamp = in_rast->depthBiasClamp;
1167   }
1168
1169   assert(in_rast->lineWidth == 1.0f);
1170}
1171
1172static void
1173dzn_graphics_pipeline_translate_ms(struct dzn_graphics_pipeline *pipeline,
1174                                   D3D12_PIPELINE_STATE_STREAM_DESC *out,
1175                                   const VkGraphicsPipelineCreateInfo *in)
1176{
1177   const VkPipelineRasterizationStateCreateInfo *in_rast =
1178      in->pRasterizationState;
1179   const VkPipelineMultisampleStateCreateInfo *in_ms =
1180      in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1181
1182   if (!in_ms)
1183      return;
1184
1185   /* TODO: minSampleShading (use VRS), alphaToOneEnable */
1186   d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_DESC, DXGI_SAMPLE_DESC, desc);
1187   desc->Count = in_ms ? in_ms->rasterizationSamples : 1;
1188   desc->Quality = 0;
1189
1190   if (!in_ms->pSampleMask)
1191      return;
1192
1193   d3d12_gfx_pipeline_state_stream_new_desc(out, SAMPLE_MASK, UINT, mask);
1194   *mask = *in_ms->pSampleMask;
1195}
1196
1197static D3D12_STENCIL_OP
1198translate_stencil_op(VkStencilOp in)
1199{
1200   switch (in) {
1201   case VK_STENCIL_OP_KEEP: return D3D12_STENCIL_OP_KEEP;
1202   case VK_STENCIL_OP_ZERO: return D3D12_STENCIL_OP_ZERO;
1203   case VK_STENCIL_OP_REPLACE: return D3D12_STENCIL_OP_REPLACE;
1204   case VK_STENCIL_OP_INCREMENT_AND_CLAMP: return D3D12_STENCIL_OP_INCR_SAT;
1205   case VK_STENCIL_OP_DECREMENT_AND_CLAMP: return D3D12_STENCIL_OP_DECR_SAT;
1206   case VK_STENCIL_OP_INCREMENT_AND_WRAP: return D3D12_STENCIL_OP_INCR;
1207   case VK_STENCIL_OP_DECREMENT_AND_WRAP: return D3D12_STENCIL_OP_DECR;
1208   case VK_STENCIL_OP_INVERT: return D3D12_STENCIL_OP_INVERT;
1209   default: unreachable("Invalid stencil op");
1210   }
1211}
1212
1213static void
1214translate_stencil_test(struct dzn_graphics_pipeline *pipeline,
1215                       D3D12_DEPTH_STENCIL_DESC1 *out,
1216                       const VkGraphicsPipelineCreateInfo *in)
1217{
1218   const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1219      in->pDepthStencilState;
1220
1221   bool front_test_uses_ref =
1222      !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1223      in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1224      in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1225      (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1226       in_zsa->front.compareMask != 0);
1227   bool back_test_uses_ref =
1228      !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1229      in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1230      in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1231      (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1232       in_zsa->back.compareMask != 0);
1233
1234   if (front_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1235      pipeline->zsa.stencil_test.front.compare_mask = UINT32_MAX;
1236   else if (front_test_uses_ref)
1237      pipeline->zsa.stencil_test.front.compare_mask = in_zsa->front.compareMask;
1238   else
1239      pipeline->zsa.stencil_test.front.compare_mask = 0;
1240
1241   if (back_test_uses_ref && pipeline->zsa.stencil_test.dynamic_compare_mask)
1242      pipeline->zsa.stencil_test.back.compare_mask = UINT32_MAX;
1243   else if (back_test_uses_ref)
1244      pipeline->zsa.stencil_test.back.compare_mask = in_zsa->back.compareMask;
1245   else
1246      pipeline->zsa.stencil_test.back.compare_mask = 0;
1247
1248   bool diff_wr_mask =
1249      in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
1250      (pipeline->zsa.stencil_test.dynamic_write_mask ||
1251       in_zsa->back.writeMask != in_zsa->front.writeMask);
1252   bool diff_ref =
1253      in->pRasterizationState->cullMode == VK_CULL_MODE_NONE &&
1254      (pipeline->zsa.stencil_test.dynamic_ref ||
1255       in_zsa->back.reference != in_zsa->front.reference);
1256   bool diff_cmp_mask =
1257      back_test_uses_ref && front_test_uses_ref &&
1258      (pipeline->zsa.stencil_test.dynamic_compare_mask ||
1259       pipeline->zsa.stencil_test.front.compare_mask != pipeline->zsa.stencil_test.back.compare_mask);
1260
1261   if (diff_cmp_mask || diff_wr_mask)
1262      pipeline->zsa.stencil_test.independent_front_back = true;
1263
1264   bool back_wr_uses_ref =
1265      !(in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT) &&
1266      ((in_zsa->back.compareOp != VK_COMPARE_OP_ALWAYS &&
1267        in_zsa->back.failOp == VK_STENCIL_OP_REPLACE) ||
1268       (in_zsa->back.compareOp != VK_COMPARE_OP_NEVER &&
1269        (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1270        in_zsa->back.passOp == VK_STENCIL_OP_REPLACE) ||
1271       (in_zsa->depthTestEnable &&
1272        in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1273        in_zsa->back.depthFailOp == VK_STENCIL_OP_REPLACE));
1274   bool front_wr_uses_ref =
1275      !(in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT) &&
1276      ((in_zsa->front.compareOp != VK_COMPARE_OP_ALWAYS &&
1277        in_zsa->front.failOp == VK_STENCIL_OP_REPLACE) ||
1278       (in_zsa->front.compareOp != VK_COMPARE_OP_NEVER &&
1279        (!in_zsa->depthTestEnable || in_zsa->depthCompareOp != VK_COMPARE_OP_NEVER) &&
1280        in_zsa->front.passOp == VK_STENCIL_OP_REPLACE) ||
1281       (in_zsa->depthTestEnable &&
1282        in_zsa->depthCompareOp != VK_COMPARE_OP_ALWAYS &&
1283        in_zsa->front.depthFailOp == VK_STENCIL_OP_REPLACE));
1284
1285   pipeline->zsa.stencil_test.front.write_mask =
1286      (pipeline->zsa.stencil_test.dynamic_write_mask ||
1287       (in->pRasterizationState->cullMode & VK_CULL_MODE_FRONT_BIT)) ?
1288      0 : in_zsa->front.writeMask;
1289   pipeline->zsa.stencil_test.back.write_mask =
1290      (pipeline->zsa.stencil_test.dynamic_write_mask ||
1291       (in->pRasterizationState->cullMode & VK_CULL_MODE_BACK_BIT)) ?
1292      0 : in_zsa->back.writeMask;
1293
1294   pipeline->zsa.stencil_test.front.uses_ref = front_test_uses_ref || front_wr_uses_ref;
1295   pipeline->zsa.stencil_test.back.uses_ref = back_test_uses_ref || back_wr_uses_ref;
1296
1297   if (diff_ref &&
1298       pipeline->zsa.stencil_test.front.uses_ref &&
1299       pipeline->zsa.stencil_test.back.uses_ref)
1300      pipeline->zsa.stencil_test.independent_front_back = true;
1301
1302   pipeline->zsa.stencil_test.front.ref =
1303      pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->front.reference;
1304   pipeline->zsa.stencil_test.back.ref =
1305      pipeline->zsa.stencil_test.dynamic_ref ? 0 : in_zsa->back.reference;
1306
1307   /* FIXME: We don't support independent {compare,write}_mask and stencil
1308    * reference. Until we have proper support for independent front/back
1309    * stencil test, let's prioritize the front setup when both are active.
1310    */
1311   out->StencilReadMask =
1312      front_test_uses_ref ?
1313      pipeline->zsa.stencil_test.front.compare_mask :
1314      back_test_uses_ref ?
1315      pipeline->zsa.stencil_test.back.compare_mask : 0;
1316   out->StencilWriteMask =
1317      pipeline->zsa.stencil_test.front.write_mask ?
1318      pipeline->zsa.stencil_test.front.write_mask :
1319      pipeline->zsa.stencil_test.back.write_mask;
1320
1321   assert(!pipeline->zsa.stencil_test.independent_front_back);
1322}
1323
1324static void
1325dzn_graphics_pipeline_translate_zsa(struct dzn_graphics_pipeline *pipeline,
1326                                    D3D12_PIPELINE_STATE_STREAM_DESC *out,
1327                                    const VkGraphicsPipelineCreateInfo *in)
1328{
1329   const VkPipelineRasterizationStateCreateInfo *in_rast =
1330      in->pRasterizationState;
1331   const VkPipelineDepthStencilStateCreateInfo *in_zsa =
1332      in_rast->rasterizerDiscardEnable ? NULL : in->pDepthStencilState;
1333
1334   if (!in_zsa)
1335      return;
1336
1337   d3d12_gfx_pipeline_state_stream_new_desc(out, DEPTH_STENCIL1, D3D12_DEPTH_STENCIL_DESC1, desc);
1338   pipeline->templates.desc_offsets.ds =
1339      (uintptr_t)desc - (uintptr_t)out->pPipelineStateSubobjectStream;
1340
1341   desc->DepthEnable =
1342      in_zsa->depthTestEnable || in_zsa->depthBoundsTestEnable;
1343   desc->DepthWriteMask =
1344      in_zsa->depthWriteEnable ?
1345      D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
1346   desc->DepthFunc =
1347      in_zsa->depthTestEnable ?
1348      dzn_translate_compare_op(in_zsa->depthCompareOp) :
1349      D3D12_COMPARISON_FUNC_ALWAYS;
1350   pipeline->zsa.depth_bounds.enable = in_zsa->depthBoundsTestEnable;
1351   pipeline->zsa.depth_bounds.min = in_zsa->minDepthBounds;
1352   pipeline->zsa.depth_bounds.max = in_zsa->maxDepthBounds;
1353   desc->DepthBoundsTestEnable = in_zsa->depthBoundsTestEnable;
1354   desc->StencilEnable = in_zsa->stencilTestEnable;
1355   if (in_zsa->stencilTestEnable) {
1356      desc->FrontFace.StencilFailOp =
1357        translate_stencil_op(in_zsa->front.failOp);
1358      desc->FrontFace.StencilDepthFailOp =
1359        translate_stencil_op(in_zsa->front.depthFailOp);
1360      desc->FrontFace.StencilPassOp =
1361        translate_stencil_op(in_zsa->front.passOp);
1362      desc->FrontFace.StencilFunc =
1363        dzn_translate_compare_op(in_zsa->front.compareOp);
1364      desc->BackFace.StencilFailOp =
1365        translate_stencil_op(in_zsa->back.failOp);
1366      desc->BackFace.StencilDepthFailOp =
1367        translate_stencil_op(in_zsa->back.depthFailOp);
1368      desc->BackFace.StencilPassOp =
1369        translate_stencil_op(in_zsa->back.passOp);
1370      desc->BackFace.StencilFunc =
1371        dzn_translate_compare_op(in_zsa->back.compareOp);
1372
1373      pipeline->zsa.stencil_test.enable = true;
1374
1375      translate_stencil_test(pipeline, desc, in);
1376   }
1377}
1378
1379static D3D12_BLEND
1380translate_blend_factor(VkBlendFactor in, bool is_alpha)
1381{
1382   switch (in) {
1383   case VK_BLEND_FACTOR_ZERO: return D3D12_BLEND_ZERO;
1384   case VK_BLEND_FACTOR_ONE: return D3D12_BLEND_ONE;
1385   case VK_BLEND_FACTOR_SRC_COLOR:
1386      return is_alpha ? D3D12_BLEND_SRC_ALPHA : D3D12_BLEND_SRC_COLOR;
1387   case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
1388      return is_alpha ? D3D12_BLEND_INV_SRC_ALPHA : D3D12_BLEND_INV_SRC_COLOR;
1389   case VK_BLEND_FACTOR_DST_COLOR:
1390      return is_alpha ? D3D12_BLEND_DEST_ALPHA : D3D12_BLEND_DEST_COLOR;
1391   case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
1392      return is_alpha ? D3D12_BLEND_INV_DEST_ALPHA : D3D12_BLEND_INV_DEST_COLOR;
1393   case VK_BLEND_FACTOR_SRC_ALPHA: return D3D12_BLEND_SRC_ALPHA;
1394   case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: return D3D12_BLEND_INV_SRC_ALPHA;
1395   case VK_BLEND_FACTOR_DST_ALPHA: return D3D12_BLEND_DEST_ALPHA;
1396   case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: return D3D12_BLEND_INV_DEST_ALPHA;
1397   /* FIXME: no way to isolate the alpla and color constants */
1398   case VK_BLEND_FACTOR_CONSTANT_COLOR:
1399   case VK_BLEND_FACTOR_CONSTANT_ALPHA:
1400      return D3D12_BLEND_BLEND_FACTOR;
1401   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
1402   case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
1403      return D3D12_BLEND_INV_BLEND_FACTOR;
1404   case VK_BLEND_FACTOR_SRC1_COLOR:
1405      return is_alpha ? D3D12_BLEND_SRC1_ALPHA : D3D12_BLEND_SRC1_COLOR;
1406   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
1407      return is_alpha ? D3D12_BLEND_INV_SRC1_ALPHA : D3D12_BLEND_INV_SRC1_COLOR;
1408   case VK_BLEND_FACTOR_SRC1_ALPHA: return D3D12_BLEND_SRC1_ALPHA;
1409   case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: return D3D12_BLEND_INV_SRC1_ALPHA;
1410   case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: return D3D12_BLEND_SRC_ALPHA_SAT;
1411   default: unreachable("Invalid blend factor");
1412   }
1413}
1414
1415static D3D12_BLEND_OP
1416translate_blend_op(VkBlendOp in)
1417{
1418   switch (in) {
1419   case VK_BLEND_OP_ADD: return D3D12_BLEND_OP_ADD;
1420   case VK_BLEND_OP_SUBTRACT: return D3D12_BLEND_OP_SUBTRACT;
1421   case VK_BLEND_OP_REVERSE_SUBTRACT: return D3D12_BLEND_OP_REV_SUBTRACT;
1422   case VK_BLEND_OP_MIN: return D3D12_BLEND_OP_MIN;
1423   case VK_BLEND_OP_MAX: return D3D12_BLEND_OP_MAX;
1424   default: unreachable("Invalid blend op");
1425   }
1426}
1427
1428static D3D12_LOGIC_OP
1429translate_logic_op(VkLogicOp in)
1430{
1431   switch (in) {
1432   case VK_LOGIC_OP_CLEAR: return D3D12_LOGIC_OP_CLEAR;
1433   case VK_LOGIC_OP_AND: return D3D12_LOGIC_OP_AND;
1434   case VK_LOGIC_OP_AND_REVERSE: return D3D12_LOGIC_OP_AND_REVERSE;
1435   case VK_LOGIC_OP_COPY: return D3D12_LOGIC_OP_COPY;
1436   case VK_LOGIC_OP_AND_INVERTED: return D3D12_LOGIC_OP_AND_INVERTED;
1437   case VK_LOGIC_OP_NO_OP: return D3D12_LOGIC_OP_NOOP;
1438   case VK_LOGIC_OP_XOR: return D3D12_LOGIC_OP_XOR;
1439   case VK_LOGIC_OP_OR: return D3D12_LOGIC_OP_OR;
1440   case VK_LOGIC_OP_NOR: return D3D12_LOGIC_OP_NOR;
1441   case VK_LOGIC_OP_EQUIVALENT: return D3D12_LOGIC_OP_EQUIV;
1442   case VK_LOGIC_OP_INVERT: return D3D12_LOGIC_OP_INVERT;
1443   case VK_LOGIC_OP_OR_REVERSE: return D3D12_LOGIC_OP_OR_REVERSE;
1444   case VK_LOGIC_OP_COPY_INVERTED: return D3D12_LOGIC_OP_COPY_INVERTED;
1445   case VK_LOGIC_OP_OR_INVERTED: return D3D12_LOGIC_OP_OR_INVERTED;
1446   case VK_LOGIC_OP_NAND: return D3D12_LOGIC_OP_NAND;
1447   case VK_LOGIC_OP_SET: return D3D12_LOGIC_OP_SET;
1448   default: unreachable("Invalid logic op");
1449   }
1450}
1451
1452static void
1453dzn_graphics_pipeline_translate_blend(struct dzn_graphics_pipeline *pipeline,
1454                                      D3D12_PIPELINE_STATE_STREAM_DESC *out,
1455                                      const VkGraphicsPipelineCreateInfo *in)
1456{
1457   const VkPipelineRasterizationStateCreateInfo *in_rast =
1458      in->pRasterizationState;
1459   const VkPipelineColorBlendStateCreateInfo *in_blend =
1460      in_rast->rasterizerDiscardEnable ? NULL : in->pColorBlendState;
1461   const VkPipelineMultisampleStateCreateInfo *in_ms =
1462      in_rast->rasterizerDiscardEnable ? NULL : in->pMultisampleState;
1463
1464   if (!in_blend || !in_ms)
1465      return;
1466
1467   d3d12_gfx_pipeline_state_stream_new_desc(out, BLEND, D3D12_BLEND_DESC, desc);
1468   D3D12_LOGIC_OP logicop =
1469      in_blend->logicOpEnable ?
1470      translate_logic_op(in_blend->logicOp) : D3D12_LOGIC_OP_NOOP;
1471   desc->AlphaToCoverageEnable = in_ms->alphaToCoverageEnable;
1472   memcpy(pipeline->blend.constants, in_blend->blendConstants,
1473          sizeof(pipeline->blend.constants));
1474
1475   for (uint32_t i = 0; i < in_blend->attachmentCount; i++) {
1476      if (i > 0 &&
1477          !memcmp(&in_blend->pAttachments[i - 1], &in_blend->pAttachments[i],
1478                  sizeof(*in_blend->pAttachments)))
1479         desc->IndependentBlendEnable = true;
1480
1481      desc->RenderTarget[i].BlendEnable =
1482         in_blend->pAttachments[i].blendEnable;
1483      desc->RenderTarget[i].RenderTargetWriteMask =
1484         in_blend->pAttachments[i].colorWriteMask;
1485
1486      if (in_blend->logicOpEnable) {
1487         desc->RenderTarget[i].LogicOpEnable = true;
1488         desc->RenderTarget[i].LogicOp = logicop;
1489      } else {
1490         desc->RenderTarget[i].SrcBlend =
1491            translate_blend_factor(in_blend->pAttachments[i].srcColorBlendFactor, false);
1492         desc->RenderTarget[i].DestBlend =
1493            translate_blend_factor(in_blend->pAttachments[i].dstColorBlendFactor, false);
1494         desc->RenderTarget[i].BlendOp =
1495            translate_blend_op(in_blend->pAttachments[i].colorBlendOp);
1496         desc->RenderTarget[i].SrcBlendAlpha =
1497            translate_blend_factor(in_blend->pAttachments[i].srcAlphaBlendFactor, true);
1498         desc->RenderTarget[i].DestBlendAlpha =
1499            translate_blend_factor(in_blend->pAttachments[i].dstAlphaBlendFactor, true);
1500         desc->RenderTarget[i].BlendOpAlpha =
1501            translate_blend_op(in_blend->pAttachments[i].alphaBlendOp);
1502      }
1503   }
1504}
1505
1506
1507static void
1508dzn_pipeline_init(struct dzn_pipeline *pipeline,
1509                  struct dzn_device *device,
1510                  VkPipelineBindPoint type,
1511                  struct dzn_pipeline_layout *layout,
1512                  D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc)
1513{
1514   pipeline->type = type;
1515   pipeline->root.sets_param_count = layout->root.sets_param_count;
1516   pipeline->root.sysval_cbv_param_idx = layout->root.sysval_cbv_param_idx;
1517   pipeline->root.push_constant_cbv_param_idx = layout->root.push_constant_cbv_param_idx;
1518   STATIC_ASSERT(sizeof(pipeline->root.type) == sizeof(layout->root.type));
1519   memcpy(pipeline->root.type, layout->root.type, sizeof(pipeline->root.type));
1520   pipeline->root.sig = layout->root.sig;
1521   ID3D12RootSignature_AddRef(pipeline->root.sig);
1522
1523   STATIC_ASSERT(sizeof(layout->desc_count) == sizeof(pipeline->desc_count));
1524   memcpy(pipeline->desc_count, layout->desc_count, sizeof(pipeline->desc_count));
1525
1526   STATIC_ASSERT(sizeof(layout->sets) == sizeof(pipeline->sets));
1527   memcpy(pipeline->sets, layout->sets, sizeof(pipeline->sets));
1528   vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
1529
1530   ASSERTED uint32_t max_streamsz =
1531      type == VK_PIPELINE_BIND_POINT_GRAPHICS ?
1532      MAX_GFX_PIPELINE_STATE_STREAM_SIZE :
1533      MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE;
1534
1535   d3d12_pipeline_state_stream_new_desc(stream_desc, max_streamsz, ROOT_SIGNATURE,
1536                                        ID3D12RootSignature *, root_sig);
1537   *root_sig = pipeline->root.sig;
1538}
1539
1540static void
1541dzn_pipeline_finish(struct dzn_pipeline *pipeline)
1542{
1543   if (pipeline->state)
1544      ID3D12PipelineState_Release(pipeline->state);
1545   if (pipeline->root.sig)
1546      ID3D12RootSignature_Release(pipeline->root.sig);
1547
1548   vk_object_base_finish(&pipeline->base);
1549}
1550
1551static void dzn_graphics_pipeline_delete_variant(struct hash_entry *he)
1552{
1553   struct dzn_graphics_pipeline_variant *variant = he->data;
1554
1555   if (variant->state)
1556      ID3D12PipelineState_Release(variant->state);
1557}
1558
1559static void
1560dzn_graphics_pipeline_cleanup_nir_shaders(struct dzn_graphics_pipeline *pipeline)
1561{
1562   for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1563      ralloc_free(pipeline->templates.shaders[i].nir);
1564      pipeline->templates.shaders[i].nir = NULL;
1565   }
1566}
1567
1568static void
1569dzn_graphics_pipeline_cleanup_dxil_shaders(struct dzn_graphics_pipeline *pipeline)
1570{
1571   for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->templates.shaders); i++) {
1572      if (pipeline->templates.shaders[i].bc) {
1573         free((void *)pipeline->templates.shaders[i].bc->pShaderBytecode);
1574         pipeline->templates.shaders[i].bc = NULL;
1575      }
1576   }
1577}
1578
1579static void
1580dzn_graphics_pipeline_destroy(struct dzn_graphics_pipeline *pipeline,
1581                              const VkAllocationCallbacks *alloc)
1582{
1583   if (!pipeline)
1584      return;
1585
1586   _mesa_hash_table_destroy(pipeline->variants,
1587                            dzn_graphics_pipeline_delete_variant);
1588
1589   dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
1590   dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
1591
1592   for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->indirect_cmd_sigs); i++) {
1593      if (pipeline->indirect_cmd_sigs[i])
1594         ID3D12CommandSignature_Release(pipeline->indirect_cmd_sigs[i]);
1595   }
1596
1597   dzn_pipeline_finish(&pipeline->base);
1598   vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
1599}
1600
1601static VkResult
1602dzn_graphics_pipeline_create(struct dzn_device *device,
1603                             VkPipelineCache cache,
1604                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
1605                             const VkAllocationCallbacks *pAllocator,
1606                             VkPipeline *out)
1607{
1608   const VkPipelineRenderingCreateInfo *ri = (const VkPipelineRenderingCreateInfo *)
1609      vk_find_struct_const(pCreateInfo, PIPELINE_RENDERING_CREATE_INFO);
1610   VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
1611   VK_FROM_HANDLE(vk_render_pass, pass, pCreateInfo->renderPass);
1612   VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
1613   uint32_t color_count = 0;
1614   VkFormat color_fmts[MAX_RTS] = { 0 };
1615   VkFormat zs_fmt = VK_FORMAT_UNDEFINED;
1616   VkResult ret;
1617   HRESULT hres = 0;
1618
1619   struct dzn_graphics_pipeline *pipeline =
1620      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
1621                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1622   if (!pipeline)
1623      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1624
1625   D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc = &pipeline->templates.stream_desc;
1626   stream_desc->pPipelineStateSubobjectStream = pipeline->templates.stream_buf;
1627
1628   dzn_pipeline_init(&pipeline->base, device,
1629                     VK_PIPELINE_BIND_POINT_GRAPHICS,
1630                     layout, stream_desc);
1631   D3D12_INPUT_ELEMENT_DESC attribs[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1632   enum pipe_format vi_conversions[MAX_VERTEX_GENERIC_ATTRIBS] = { 0 };
1633
1634   ret = dzn_graphics_pipeline_translate_vi(pipeline, pCreateInfo,
1635                                            attribs, vi_conversions);
1636   if (ret != VK_SUCCESS)
1637      goto out;
1638
1639   if (pCreateInfo->pDynamicState) {
1640      for (uint32_t i = 0; i < pCreateInfo->pDynamicState->dynamicStateCount; i++) {
1641         switch (pCreateInfo->pDynamicState->pDynamicStates[i]) {
1642         case VK_DYNAMIC_STATE_VIEWPORT:
1643            pipeline->vp.dynamic = true;
1644            break;
1645         case VK_DYNAMIC_STATE_SCISSOR:
1646            pipeline->scissor.dynamic = true;
1647            break;
1648         case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
1649            pipeline->zsa.stencil_test.dynamic_ref = true;
1650            break;
1651         case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
1652            pipeline->zsa.stencil_test.dynamic_compare_mask = true;
1653            break;
1654         case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
1655            pipeline->zsa.stencil_test.dynamic_write_mask = true;
1656            break;
1657         case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
1658            pipeline->blend.dynamic_constants = true;
1659            break;
1660         case VK_DYNAMIC_STATE_DEPTH_BOUNDS:
1661            pipeline->zsa.depth_bounds.dynamic = true;
1662            break;
1663         case VK_DYNAMIC_STATE_DEPTH_BIAS:
1664            pipeline->zsa.dynamic_depth_bias = true;
1665            ret = dzn_graphics_pipeline_prepare_for_variants(device, pipeline);
1666            if (ret)
1667               goto out;
1668            break;
1669         case VK_DYNAMIC_STATE_LINE_WIDTH:
1670            /* Nothing to do since we just support lineWidth = 1. */
1671            break;
1672         default: unreachable("Unsupported dynamic state");
1673         }
1674      }
1675   }
1676
1677   ret = dzn_graphics_pipeline_translate_ia(device, pipeline, stream_desc, pCreateInfo);
1678   if (ret)
1679      goto out;
1680
1681   dzn_graphics_pipeline_translate_rast(pipeline, stream_desc, pCreateInfo);
1682   dzn_graphics_pipeline_translate_ms(pipeline, stream_desc, pCreateInfo);
1683   dzn_graphics_pipeline_translate_zsa(pipeline, stream_desc, pCreateInfo);
1684   dzn_graphics_pipeline_translate_blend(pipeline, stream_desc, pCreateInfo);
1685
1686   if (pass) {
1687      const struct vk_subpass *subpass = &pass->subpasses[pCreateInfo->subpass];
1688      color_count = subpass->color_count;
1689      for (uint32_t i = 0; i < subpass->color_count; i++) {
1690         uint32_t idx = subpass->color_attachments[i].attachment;
1691
1692         if (idx == VK_ATTACHMENT_UNUSED) continue;
1693
1694         const struct vk_render_pass_attachment *attachment =
1695            &pass->attachments[idx];
1696
1697         color_fmts[i] = attachment->format;
1698      }
1699
1700      if (subpass->depth_stencil_attachment &&
1701          subpass->depth_stencil_attachment->attachment != VK_ATTACHMENT_UNUSED) {
1702         const struct vk_render_pass_attachment *attachment =
1703            &pass->attachments[subpass->depth_stencil_attachment->attachment];
1704
1705         zs_fmt = attachment->format;
1706      }
1707   } else if (ri) {
1708      color_count = ri->colorAttachmentCount;
1709      memcpy(color_fmts, ri->pColorAttachmentFormats,
1710             sizeof(color_fmts[0]) * color_count);
1711      if (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED)
1712         zs_fmt = ri->depthAttachmentFormat;
1713      else if (ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)
1714         zs_fmt = ri->stencilAttachmentFormat;
1715   }
1716
1717   if (color_count > 0) {
1718      d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, RENDER_TARGET_FORMATS, struct D3D12_RT_FORMAT_ARRAY, rts);
1719      rts->NumRenderTargets = color_count;
1720      for (uint32_t i = 0; i < color_count; i++) {
1721         rts->RTFormats[i] =
1722            dzn_image_get_dxgi_format(color_fmts[i],
1723                                      VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
1724                                      VK_IMAGE_ASPECT_COLOR_BIT);
1725      }
1726   }
1727
1728   if (zs_fmt != VK_FORMAT_UNDEFINED) {
1729      d3d12_gfx_pipeline_state_stream_new_desc(stream_desc, DEPTH_STENCIL_FORMAT, DXGI_FORMAT, ds_fmt);
1730      *ds_fmt =
1731         dzn_image_get_dxgi_format(zs_fmt,
1732                                   VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
1733                                   VK_IMAGE_ASPECT_DEPTH_BIT |
1734                                   VK_IMAGE_ASPECT_STENCIL_BIT);
1735   }
1736
1737   ret = dzn_graphics_pipeline_compile_shaders(device, pipeline, pcache,
1738                                               layout, stream_desc,
1739                                               attribs, vi_conversions,
1740                                               pCreateInfo);
1741   if (ret != VK_SUCCESS)
1742      goto out;
1743
1744   if (!pipeline->variants) {
1745      hres = ID3D12Device2_CreatePipelineState(device->dev, stream_desc,
1746                                               &IID_ID3D12PipelineState,
1747                                               (void **)&pipeline->base.state);
1748      if (FAILED(hres)) {
1749         ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1750         goto out;
1751      }
1752
1753      dzn_graphics_pipeline_cleanup_dxil_shaders(pipeline);
1754   }
1755
1756   dzn_graphics_pipeline_cleanup_nir_shaders(pipeline);
1757   ret = VK_SUCCESS;
1758
1759out:
1760   if (ret != VK_SUCCESS)
1761      dzn_graphics_pipeline_destroy(pipeline, pAllocator);
1762   else
1763      *out = dzn_graphics_pipeline_to_handle(pipeline);
1764
1765   return ret;
1766}
1767
1768ID3D12PipelineState *
1769dzn_graphics_pipeline_get_state(struct dzn_graphics_pipeline *pipeline,
1770                                const struct dzn_graphics_pipeline_variant_key *key)
1771{
1772   if (!pipeline->variants)
1773      return pipeline->base.state;
1774
1775   struct dzn_graphics_pipeline_variant_key masked_key = { 0 };
1776
1777   if (dzn_graphics_pipeline_get_desc_template(pipeline, ib_strip_cut))
1778      masked_key.ib_strip_cut = key->ib_strip_cut;
1779
1780   if (dzn_graphics_pipeline_get_desc_template(pipeline, rast) &&
1781       pipeline->zsa.dynamic_depth_bias)
1782      masked_key.depth_bias = key->depth_bias;
1783
1784   const D3D12_DEPTH_STENCIL_DESC1 *ds_templ =
1785      dzn_graphics_pipeline_get_desc_template(pipeline, ds);
1786   if (ds_templ && ds_templ->StencilEnable) {
1787      if (ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1788         ds_templ->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
1789         masked_key.stencil_test.front.compare_mask = key->stencil_test.front.compare_mask;
1790      if (ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1791          ds_templ->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
1792         masked_key.stencil_test.back.compare_mask = key->stencil_test.back.compare_mask;
1793      if (pipeline->zsa.stencil_test.dynamic_write_mask) {
1794         masked_key.stencil_test.front.write_mask = key->stencil_test.front.write_mask;
1795         masked_key.stencil_test.back.write_mask = key->stencil_test.back.write_mask;
1796      }
1797   }
1798
1799   struct dzn_device *device =
1800      container_of(pipeline->base.base.device, struct dzn_device, vk);
1801   struct hash_entry *he =
1802      _mesa_hash_table_search(pipeline->variants, &masked_key);
1803
1804   struct dzn_graphics_pipeline_variant *variant;
1805
1806   if (!he) {
1807      variant = rzalloc(pipeline->variants, struct dzn_graphics_pipeline_variant);
1808      variant->key = masked_key;
1809
1810      uintptr_t stream_buf[MAX_GFX_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
1811      D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
1812         .SizeInBytes = pipeline->templates.stream_desc.SizeInBytes,
1813         .pPipelineStateSubobjectStream = stream_buf,
1814      };
1815
1816      memcpy(stream_buf, pipeline->templates.stream_buf, stream_desc.SizeInBytes);
1817
1818      D3D12_INDEX_BUFFER_STRIP_CUT_VALUE *ib_strip_cut =
1819         dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ib_strip_cut);
1820      if (ib_strip_cut)
1821         *ib_strip_cut = masked_key.ib_strip_cut;
1822
1823      D3D12_RASTERIZER_DESC *rast =
1824         dzn_graphics_pipeline_get_desc(pipeline, stream_buf, rast);
1825      if (rast && pipeline->zsa.dynamic_depth_bias) {
1826         rast->DepthBias = translate_depth_bias(masked_key.depth_bias.constant_factor);
1827         rast->DepthBiasClamp = masked_key.depth_bias.clamp;
1828         rast->SlopeScaledDepthBias = masked_key.depth_bias.slope_factor;
1829      }
1830
1831      D3D12_DEPTH_STENCIL_DESC1 *ds =
1832         dzn_graphics_pipeline_get_desc(pipeline, stream_buf, ds);
1833      if (ds && ds->StencilEnable) {
1834         if (pipeline->zsa.stencil_test.dynamic_compare_mask) {
1835            if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1836                ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
1837               ds->StencilReadMask = masked_key.stencil_test.front.compare_mask;
1838            }
1839
1840            if (ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1841                ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS) {
1842               ds->StencilReadMask = masked_key.stencil_test.back.compare_mask;
1843            }
1844
1845            if (ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1846                ds->FrontFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS &&
1847                ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_NEVER &&
1848                ds->BackFace.StencilFunc != D3D12_COMPARISON_FUNC_ALWAYS)
1849               assert(masked_key.stencil_test.front.compare_mask == masked_key.stencil_test.back.compare_mask);
1850         }
1851
1852         if (pipeline->zsa.stencil_test.dynamic_write_mask) {
1853            assert(!masked_key.stencil_test.front.write_mask ||
1854                   !masked_key.stencil_test.back.write_mask ||
1855                   masked_key.stencil_test.front.write_mask == masked_key.stencil_test.back.write_mask);
1856            ds->StencilWriteMask =
1857               masked_key.stencil_test.front.write_mask |
1858               masked_key.stencil_test.back.write_mask;
1859         }
1860      }
1861
1862      ASSERTED HRESULT hres = ID3D12Device2_CreatePipelineState(device->dev, &stream_desc,
1863                                                                &IID_ID3D12PipelineState,
1864                                                                (void**)(&variant->state));
1865      assert(!FAILED(hres));
1866      he = _mesa_hash_table_insert(pipeline->variants, &variant->key, variant);
1867      assert(he);
1868   } else {
1869      variant = he->data;
1870   }
1871
1872   if (variant->state)
1873      ID3D12PipelineState_AddRef(variant->state);
1874
1875   if (pipeline->base.state)
1876      ID3D12PipelineState_Release(pipeline->base.state);
1877
1878   pipeline->base.state = variant->state;
1879   return variant->state;
1880}
1881
1882#define DZN_INDIRECT_CMD_SIG_MAX_ARGS 4
1883
1884ID3D12CommandSignature *
1885dzn_graphics_pipeline_get_indirect_cmd_sig(struct dzn_graphics_pipeline *pipeline,
1886                                           enum dzn_indirect_draw_cmd_sig_type type)
1887{
1888   assert(type < DZN_NUM_INDIRECT_DRAW_CMD_SIGS);
1889
1890   struct dzn_device *device =
1891      container_of(pipeline->base.base.device, struct dzn_device, vk);
1892   ID3D12CommandSignature *cmdsig = pipeline->indirect_cmd_sigs[type];
1893
1894   if (cmdsig)
1895      return cmdsig;
1896
1897   bool triangle_fan = type == DZN_INDIRECT_DRAW_TRIANGLE_FAN_CMD_SIG;
1898   bool indexed = type == DZN_INDIRECT_INDEXED_DRAW_CMD_SIG || triangle_fan;
1899
1900   uint32_t cmd_arg_count = 0;
1901   D3D12_INDIRECT_ARGUMENT_DESC cmd_args[DZN_INDIRECT_CMD_SIG_MAX_ARGS];
1902
1903   if (triangle_fan) {
1904      cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
1905         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW,
1906      };
1907   }
1908
1909   cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
1910      .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
1911      .Constant = {
1912         .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
1913         .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) / 4,
1914         .Num32BitValuesToSet = 2,
1915      },
1916   };
1917
1918   cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
1919      .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
1920      .Constant = {
1921         .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
1922         .DestOffsetIn32BitValues = offsetof(struct dxil_spirv_vertex_runtime_data, draw_id) / 4,
1923         .Num32BitValuesToSet = 1,
1924      },
1925   };
1926
1927   cmd_args[cmd_arg_count++] = (D3D12_INDIRECT_ARGUMENT_DESC) {
1928      .Type = indexed ?
1929              D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED :
1930              D3D12_INDIRECT_ARGUMENT_TYPE_DRAW,
1931   };
1932
1933   assert(cmd_arg_count <= ARRAY_SIZE(cmd_args));
1934   assert(offsetof(struct dxil_spirv_vertex_runtime_data, first_vertex) == 0);
1935
1936   D3D12_COMMAND_SIGNATURE_DESC cmd_sig_desc = {
1937      .ByteStride =
1938         triangle_fan ?
1939         sizeof(struct dzn_indirect_triangle_fan_draw_exec_params) :
1940         sizeof(struct dzn_indirect_draw_exec_params),
1941      .NumArgumentDescs = cmd_arg_count,
1942      .pArgumentDescs = cmd_args,
1943   };
1944   HRESULT hres =
1945      ID3D12Device1_CreateCommandSignature(device->dev, &cmd_sig_desc,
1946                                           pipeline->base.root.sig,
1947                                           &IID_ID3D12CommandSignature,
1948                                           (void **)&cmdsig);
1949   if (FAILED(hres))
1950      return NULL;
1951
1952   pipeline->indirect_cmd_sigs[type] = cmdsig;
1953   return cmdsig;
1954}
1955
1956VKAPI_ATTR VkResult VKAPI_CALL
1957dzn_CreateGraphicsPipelines(VkDevice dev,
1958                            VkPipelineCache pipelineCache,
1959                            uint32_t count,
1960                            const VkGraphicsPipelineCreateInfo *pCreateInfos,
1961                            const VkAllocationCallbacks *pAllocator,
1962                            VkPipeline *pPipelines)
1963{
1964   VK_FROM_HANDLE(dzn_device, device, dev);
1965   VkResult result = VK_SUCCESS;
1966
1967   unsigned i;
1968   for (i = 0; i < count; i++) {
1969      result = dzn_graphics_pipeline_create(device,
1970                                            pipelineCache,
1971                                            &pCreateInfos[i],
1972                                            pAllocator,
1973                                            &pPipelines[i]);
1974      if (result != VK_SUCCESS) {
1975         pPipelines[i] = VK_NULL_HANDLE;
1976
1977         /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
1978          * is not obvious what error should be report upon 2 different failures.
1979          */
1980         if (result != VK_PIPELINE_COMPILE_REQUIRED)
1981            break;
1982
1983         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1984            break;
1985      }
1986   }
1987
1988   for (; i < count; i++)
1989      pPipelines[i] = VK_NULL_HANDLE;
1990
1991   return result;
1992}
1993
1994static void
1995dzn_compute_pipeline_destroy(struct dzn_compute_pipeline *pipeline,
1996                             const VkAllocationCallbacks *alloc)
1997{
1998   if (!pipeline)
1999      return;
2000
2001   if (pipeline->indirect_cmd_sig)
2002      ID3D12CommandSignature_Release(pipeline->indirect_cmd_sig);
2003
2004   dzn_pipeline_finish(&pipeline->base);
2005   vk_free2(&pipeline->base.base.device->alloc, alloc, pipeline);
2006}
2007
2008static VkResult
2009dzn_pipeline_cache_lookup_compute_pipeline(struct vk_pipeline_cache *cache,
2010                                           uint8_t *pipeline_hash,
2011                                           D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2012                                           D3D12_SHADER_BYTECODE *dxil,
2013                                           bool *cache_hit)
2014{
2015   *cache_hit = false;
2016
2017   if (!cache)
2018      return VK_SUCCESS;
2019
2020   struct vk_pipeline_cache_object *cache_obj = NULL;
2021
2022   cache_obj =
2023      vk_pipeline_cache_lookup_object(cache, pipeline_hash, SHA1_DIGEST_LENGTH,
2024                                      &dzn_cached_blob_ops,
2025                                      NULL);
2026   if (!cache_obj)
2027      return VK_SUCCESS;
2028
2029   struct dzn_cached_blob *cached_blob =
2030      container_of(cache_obj, struct dzn_cached_blob, base);
2031
2032   assert(cached_blob->size == SHA1_DIGEST_LENGTH);
2033
2034   const uint8_t *dxil_hash = cached_blob->data;
2035   gl_shader_stage stage;
2036
2037   VkResult ret =
2038      dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, dxil);
2039
2040   if (ret != VK_SUCCESS || stage == MESA_SHADER_NONE)
2041      goto out;
2042
2043   assert(stage == MESA_SHADER_COMPUTE);
2044
2045   d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, slot);
2046   *slot = *dxil;
2047   *cache_hit = true;
2048
2049out:
2050   vk_pipeline_cache_object_unref(cache_obj);
2051   return ret;
2052}
2053
2054static void
2055dzn_pipeline_cache_add_compute_pipeline(struct vk_pipeline_cache *cache,
2056                                        uint8_t *pipeline_hash,
2057                                        uint8_t *dxil_hash)
2058{
2059   struct vk_pipeline_cache_object *cache_obj =
2060      dzn_cached_blob_create(cache->base.device, pipeline_hash, NULL, SHA1_DIGEST_LENGTH);
2061   if (!cache_obj)
2062      return;
2063
2064   struct dzn_cached_blob *cached_blob =
2065      container_of(cache_obj, struct dzn_cached_blob, base);
2066
2067   memcpy((void *)cached_blob->data, dxil_hash, SHA1_DIGEST_LENGTH);
2068
2069   cache_obj = vk_pipeline_cache_add_object(cache, cache_obj);
2070   vk_pipeline_cache_object_unref(cache_obj);
2071}
2072
2073static VkResult
2074dzn_compute_pipeline_compile_shader(struct dzn_device *device,
2075                                    struct dzn_compute_pipeline *pipeline,
2076                                    struct vk_pipeline_cache *cache,
2077                                    const struct dzn_pipeline_layout *layout,
2078                                    D3D12_PIPELINE_STATE_STREAM_DESC *stream_desc,
2079                                    D3D12_SHADER_BYTECODE *shader,
2080                                    const VkComputePipelineCreateInfo *info)
2081{
2082   uint8_t spirv_hash[SHA1_DIGEST_LENGTH], pipeline_hash[SHA1_DIGEST_LENGTH];
2083   VkResult ret = VK_SUCCESS;
2084   nir_shader *nir = NULL;
2085
2086   if (cache) {
2087      struct mesa_sha1 pipeline_hash_ctx;
2088
2089      _mesa_sha1_init(&pipeline_hash_ctx);
2090      vk_pipeline_hash_shader_stage(&info->stage, spirv_hash);
2091      _mesa_sha1_update(&pipeline_hash_ctx, spirv_hash, sizeof(spirv_hash));
2092      _mesa_sha1_update(&pipeline_hash_ctx, layout->stages[MESA_SHADER_COMPUTE].hash,
2093                        sizeof(layout->stages[MESA_SHADER_COMPUTE].hash));
2094      _mesa_sha1_final(&pipeline_hash_ctx, pipeline_hash);
2095
2096      bool cache_hit = false;
2097      ret = dzn_pipeline_cache_lookup_compute_pipeline(cache, pipeline_hash,
2098                                                       stream_desc, shader,
2099                                                       &cache_hit);
2100      if (ret != VK_SUCCESS || cache_hit)
2101         goto out;
2102   }
2103
2104   ret = dzn_pipeline_get_nir_shader(device, layout, cache, spirv_hash,
2105                                     &info->stage, MESA_SHADER_COMPUTE,
2106                                     DXIL_SPIRV_YZ_FLIP_NONE, 0, 0,
2107                                     false, NULL,
2108                                     dxil_get_nir_compiler_options(), &nir);
2109   if (ret != VK_SUCCESS)
2110      return ret;
2111
2112   uint8_t bindings_hash[SHA1_DIGEST_LENGTH], dxil_hash[SHA1_DIGEST_LENGTH];
2113
2114   NIR_PASS_V(nir, adjust_var_bindings, layout, cache ? bindings_hash : NULL);
2115
2116   if (cache) {
2117      struct mesa_sha1 dxil_hash_ctx;
2118
2119      _mesa_sha1_init(&dxil_hash_ctx);
2120      _mesa_sha1_update(&dxil_hash_ctx, spirv_hash, sizeof(spirv_hash));
2121      _mesa_sha1_update(&dxil_hash_ctx, bindings_hash, sizeof(bindings_hash));
2122      _mesa_sha1_final(&dxil_hash_ctx, dxil_hash);
2123
2124      gl_shader_stage stage;
2125
2126      ret = dzn_pipeline_cache_lookup_dxil_shader(cache, dxil_hash, &stage, shader);
2127      if (ret != VK_SUCCESS)
2128         goto out;
2129
2130      if (stage != MESA_SHADER_NONE) {
2131         assert(stage == MESA_SHADER_COMPUTE);
2132         d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2133         *cs = *shader;
2134         dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2135         goto out;
2136      }
2137   }
2138
2139   ret = dzn_pipeline_compile_shader(device, nir, shader);
2140   if (ret != VK_SUCCESS)
2141      goto out;
2142
2143   d3d12_compute_pipeline_state_stream_new_desc(stream_desc, CS, D3D12_SHADER_BYTECODE, cs);
2144   *cs = *shader;
2145
2146   if (cache) {
2147      dzn_pipeline_cache_add_dxil_shader(cache, dxil_hash, MESA_SHADER_COMPUTE, shader);
2148      dzn_pipeline_cache_add_compute_pipeline(cache, pipeline_hash, dxil_hash);
2149   }
2150
2151out:
2152   ralloc_free(nir);
2153   return ret;
2154}
2155
2156static VkResult
2157dzn_compute_pipeline_create(struct dzn_device *device,
2158                            VkPipelineCache cache,
2159                            const VkComputePipelineCreateInfo *pCreateInfo,
2160                            const VkAllocationCallbacks *pAllocator,
2161                            VkPipeline *out)
2162{
2163   VK_FROM_HANDLE(dzn_pipeline_layout, layout, pCreateInfo->layout);
2164   VK_FROM_HANDLE(vk_pipeline_cache, pcache, cache);
2165
2166   struct dzn_compute_pipeline *pipeline =
2167      vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,
2168                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2169   if (!pipeline)
2170      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2171
2172   uintptr_t state_buf[MAX_COMPUTE_PIPELINE_STATE_STREAM_SIZE / sizeof(uintptr_t)];
2173   D3D12_PIPELINE_STATE_STREAM_DESC stream_desc = {
2174      .pPipelineStateSubobjectStream = state_buf,
2175   };
2176
2177   dzn_pipeline_init(&pipeline->base, device,
2178                     VK_PIPELINE_BIND_POINT_COMPUTE,
2179                     layout, &stream_desc);
2180
2181   D3D12_SHADER_BYTECODE shader = { 0 };
2182   VkResult ret =
2183      dzn_compute_pipeline_compile_shader(device, pipeline, pcache, layout,
2184                                          &stream_desc, &shader, pCreateInfo);
2185   if (ret != VK_SUCCESS)
2186      goto out;
2187
2188   if (FAILED(ID3D12Device2_CreatePipelineState(device->dev, &stream_desc,
2189                                                &IID_ID3D12PipelineState,
2190                                                (void **)&pipeline->base.state)))
2191      ret = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2192
2193out:
2194   free((void *)shader.pShaderBytecode);
2195   if (ret != VK_SUCCESS)
2196      dzn_compute_pipeline_destroy(pipeline, pAllocator);
2197   else
2198      *out = dzn_compute_pipeline_to_handle(pipeline);
2199
2200   return ret;
2201}
2202
2203ID3D12CommandSignature *
2204dzn_compute_pipeline_get_indirect_cmd_sig(struct dzn_compute_pipeline *pipeline)
2205{
2206   if (pipeline->indirect_cmd_sig)
2207      return pipeline->indirect_cmd_sig;
2208
2209   struct dzn_device *device =
2210      container_of(pipeline->base.base.device, struct dzn_device, vk);
2211
2212   D3D12_INDIRECT_ARGUMENT_DESC indirect_dispatch_args[] = {
2213      {
2214         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT,
2215         .Constant = {
2216            .RootParameterIndex = pipeline->base.root.sysval_cbv_param_idx,
2217            .DestOffsetIn32BitValues = 0,
2218            .Num32BitValuesToSet = 3,
2219         },
2220      },
2221      {
2222         .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH,
2223      },
2224   };
2225
2226   D3D12_COMMAND_SIGNATURE_DESC indirect_dispatch_desc = {
2227      .ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS) * 2,
2228      .NumArgumentDescs = ARRAY_SIZE(indirect_dispatch_args),
2229      .pArgumentDescs = indirect_dispatch_args,
2230   };
2231
2232   HRESULT hres =
2233      ID3D12Device1_CreateCommandSignature(device->dev, &indirect_dispatch_desc,
2234                                           pipeline->base.root.sig,
2235                                           &IID_ID3D12CommandSignature,
2236                                           (void **)&pipeline->indirect_cmd_sig);
2237   if (FAILED(hres))
2238      return NULL;
2239
2240   return pipeline->indirect_cmd_sig;
2241}
2242
2243VKAPI_ATTR VkResult VKAPI_CALL
2244dzn_CreateComputePipelines(VkDevice dev,
2245                           VkPipelineCache pipelineCache,
2246                           uint32_t count,
2247                           const VkComputePipelineCreateInfo *pCreateInfos,
2248                           const VkAllocationCallbacks *pAllocator,
2249                           VkPipeline *pPipelines)
2250{
2251   VK_FROM_HANDLE(dzn_device, device, dev);
2252   VkResult result = VK_SUCCESS;
2253
2254   unsigned i;
2255   for (i = 0; i < count; i++) {
2256      result = dzn_compute_pipeline_create(device,
2257                                           pipelineCache,
2258                                           &pCreateInfos[i],
2259                                           pAllocator,
2260                                           &pPipelines[i]);
2261      if (result != VK_SUCCESS) {
2262         pPipelines[i] = VK_NULL_HANDLE;
2263
2264         /* Bail out on the first error != VK_PIPELINE_COMPILE_REQUIRED_EX as it
2265          * is not obvious what error should be report upon 2 different failures.
2266          */
2267         if (result != VK_PIPELINE_COMPILE_REQUIRED)
2268            break;
2269
2270         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
2271            break;
2272      }
2273   }
2274
2275   for (; i < count; i++)
2276      pPipelines[i] = VK_NULL_HANDLE;
2277
2278   return result;
2279}
2280
2281VKAPI_ATTR void VKAPI_CALL
2282dzn_DestroyPipeline(VkDevice device,
2283                    VkPipeline pipeline,
2284                    const VkAllocationCallbacks *pAllocator)
2285{
2286   VK_FROM_HANDLE(dzn_pipeline, pipe, pipeline);
2287
2288   if (!pipe)
2289      return;
2290
2291   if (pipe->type == VK_PIPELINE_BIND_POINT_GRAPHICS) {
2292      struct dzn_graphics_pipeline *gfx = container_of(pipe, struct dzn_graphics_pipeline, base);
2293      dzn_graphics_pipeline_destroy(gfx, pAllocator);
2294   } else {
2295      assert(pipe->type == VK_PIPELINE_BIND_POINT_COMPUTE);
2296      struct dzn_compute_pipeline *compute = container_of(pipe, struct dzn_compute_pipeline, base);
2297      dzn_compute_pipeline_destroy(compute, pAllocator);
2298   }
2299}
2300