1/*
2 * Copyright © 2019 Red Hat.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "lvp_private.h"
25#include "vk_pipeline.h"
26#include "vk_render_pass.h"
27#include "vk_util.h"
28#include "glsl_types.h"
29#include "util/os_time.h"
30#include "spirv/nir_spirv.h"
31#include "nir/nir_builder.h"
32#include "lvp_lower_vulkan_resource.h"
33#include "pipe/p_state.h"
34#include "pipe/p_context.h"
35#include "tgsi/tgsi_from_mesa.h"
36#include "nir/nir_xfb_info.h"
37
38#define SPIR_V_MAGIC_NUMBER 0x07230203
39
40#define LVP_PIPELINE_DUP(dst, src, type, count) do {             \
41      type *temp = ralloc_array(mem_ctx, type, count);           \
42      if (!temp) return VK_ERROR_OUT_OF_HOST_MEMORY;             \
43      memcpy(temp, (src), sizeof(type) * count);                 \
44      dst = temp;                                                \
45   } while(0)
46
47void
48lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline)
49{
50   if (pipeline->shader_cso[PIPE_SHADER_VERTEX])
51      device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]);
52   if (pipeline->shader_cso[PIPE_SHADER_FRAGMENT])
53      device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]);
54   if (pipeline->shader_cso[PIPE_SHADER_GEOMETRY])
55      device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]);
56   if (pipeline->shader_cso[PIPE_SHADER_TESS_CTRL])
57      device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]);
58   if (pipeline->shader_cso[PIPE_SHADER_TESS_EVAL])
59      device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]);
60   if (pipeline->shader_cso[PIPE_SHADER_COMPUTE])
61      device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]);
62
63   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++)
64      ralloc_free(pipeline->pipeline_nir[i]);
65
66   if (pipeline->layout)
67      vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk);
68
69   ralloc_free(pipeline->mem_ctx);
70   vk_free(&device->vk.alloc, pipeline->state_data);
71   vk_object_base_finish(&pipeline->base);
72   vk_free(&device->vk.alloc, pipeline);
73}
74
75VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(
76   VkDevice                                    _device,
77   VkPipeline                                  _pipeline,
78   const VkAllocationCallbacks*                pAllocator)
79{
80   LVP_FROM_HANDLE(lvp_device, device, _device);
81   LVP_FROM_HANDLE(lvp_pipeline, pipeline, _pipeline);
82
83   if (!_pipeline)
84      return;
85
86   simple_mtx_lock(&device->queue.pipeline_lock);
87   util_dynarray_append(&device->queue.pipeline_destroys, struct lvp_pipeline*, pipeline);
88   simple_mtx_unlock(&device->queue.pipeline_lock);
89}
90
91static inline unsigned
92st_shader_stage_to_ptarget(gl_shader_stage stage)
93{
94   switch (stage) {
95   case MESA_SHADER_VERTEX:
96      return PIPE_SHADER_VERTEX;
97   case MESA_SHADER_FRAGMENT:
98      return PIPE_SHADER_FRAGMENT;
99   case MESA_SHADER_GEOMETRY:
100      return PIPE_SHADER_GEOMETRY;
101   case MESA_SHADER_TESS_CTRL:
102      return PIPE_SHADER_TESS_CTRL;
103   case MESA_SHADER_TESS_EVAL:
104      return PIPE_SHADER_TESS_EVAL;
105   case MESA_SHADER_COMPUTE:
106      return PIPE_SHADER_COMPUTE;
107   default:
108      break;
109   }
110
111   assert(!"should not be reached");
112   return PIPE_SHADER_VERTEX;
113}
114
115static void
116shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)
117{
118   assert(glsl_type_is_vector_or_scalar(type));
119
120   uint32_t comp_size = glsl_type_is_boolean(type)
121      ? 4 : glsl_get_bit_size(type) / 8;
122   unsigned length = glsl_get_vector_elements(type);
123   *size = comp_size * length,
124      *align = comp_size;
125}
126
127static void
128set_image_access(struct lvp_pipeline *pipeline, nir_shader *nir,
129                   nir_intrinsic_instr *instr,
130                   bool reads, bool writes)
131{
132   nir_variable *var = nir_intrinsic_get_var(instr, 0);
133   /* calculate the variable's offset in the layout */
134   uint64_t value = 0;
135   const struct lvp_descriptor_set_binding_layout *binding =
136      get_binding_layout(pipeline->layout, var->data.descriptor_set, var->data.binding);
137   for (unsigned s = 0; s < var->data.descriptor_set; s++) {
138     if (pipeline->layout->vk.set_layouts[s])
139        value += get_set_layout(pipeline->layout, s)->stage[nir->info.stage].image_count;
140   }
141   value += binding->stage[nir->info.stage].image_index;
142   const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
143   uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value;
144
145   if (reads)
146      pipeline->access[nir->info.stage].images_read |= mask;
147   if (writes)
148      pipeline->access[nir->info.stage].images_written |= mask;
149}
150
151static void
152set_buffer_access(struct lvp_pipeline *pipeline, nir_shader *nir,
153                    nir_intrinsic_instr *instr)
154{
155   nir_variable *var = nir_intrinsic_get_var(instr, 0);
156   if (!var) {
157      nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
158      if (deref->modes != nir_var_mem_ssbo)
159         return;
160      nir_binding b = nir_chase_binding(instr->src[0]);
161      var = nir_get_binding_variable(nir, b);
162      if (!var)
163         return;
164   }
165   if (var->data.mode != nir_var_mem_ssbo)
166      return;
167   /* calculate the variable's offset in the layout */
168   uint64_t value = 0;
169   const struct lvp_descriptor_set_binding_layout *binding =
170      get_binding_layout(pipeline->layout, var->data.descriptor_set, var->data.binding);
171   for (unsigned s = 0; s < var->data.descriptor_set; s++) {
172     if (pipeline->layout->vk.set_layouts[s])
173        value += get_set_layout(pipeline->layout, s)->stage[nir->info.stage].shader_buffer_count;
174   }
175   value += binding->stage[nir->info.stage].shader_buffer_index;
176   /* Structs have been lowered already, so get_aoa_size is sufficient. */
177   const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1;
178   uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value;
179   pipeline->access[nir->info.stage].buffers_written |= mask;
180}
181
182static void
183scan_intrinsic(struct lvp_pipeline *pipeline, nir_shader *nir, nir_intrinsic_instr *instr)
184{
185   switch (instr->intrinsic) {
186   case nir_intrinsic_image_deref_sparse_load:
187   case nir_intrinsic_image_deref_load:
188   case nir_intrinsic_image_deref_size:
189   case nir_intrinsic_image_deref_samples:
190      set_image_access(pipeline, nir, instr, true, false);
191      break;
192   case nir_intrinsic_image_deref_store:
193      set_image_access(pipeline, nir, instr, false, true);
194      break;
195   case nir_intrinsic_image_deref_atomic_add:
196   case nir_intrinsic_image_deref_atomic_imin:
197   case nir_intrinsic_image_deref_atomic_umin:
198   case nir_intrinsic_image_deref_atomic_imax:
199   case nir_intrinsic_image_deref_atomic_umax:
200   case nir_intrinsic_image_deref_atomic_and:
201   case nir_intrinsic_image_deref_atomic_or:
202   case nir_intrinsic_image_deref_atomic_xor:
203   case nir_intrinsic_image_deref_atomic_exchange:
204   case nir_intrinsic_image_deref_atomic_comp_swap:
205   case nir_intrinsic_image_deref_atomic_fadd:
206      set_image_access(pipeline, nir, instr, true, true);
207      break;
208   case nir_intrinsic_deref_atomic_add:
209   case nir_intrinsic_deref_atomic_and:
210   case nir_intrinsic_deref_atomic_comp_swap:
211   case nir_intrinsic_deref_atomic_exchange:
212   case nir_intrinsic_deref_atomic_fadd:
213   case nir_intrinsic_deref_atomic_fcomp_swap:
214   case nir_intrinsic_deref_atomic_fmax:
215   case nir_intrinsic_deref_atomic_fmin:
216   case nir_intrinsic_deref_atomic_imax:
217   case nir_intrinsic_deref_atomic_imin:
218   case nir_intrinsic_deref_atomic_or:
219   case nir_intrinsic_deref_atomic_umax:
220   case nir_intrinsic_deref_atomic_umin:
221   case nir_intrinsic_deref_atomic_xor:
222   case nir_intrinsic_store_deref:
223      set_buffer_access(pipeline, nir, instr);
224      break;
225   default: break;
226   }
227}
228
229static void
230scan_pipeline_info(struct lvp_pipeline *pipeline, nir_shader *nir)
231{
232   nir_foreach_function(function, nir) {
233      if (function->impl)
234         nir_foreach_block(block, function->impl) {
235            nir_foreach_instr(instr, block) {
236               if (instr->type == nir_instr_type_intrinsic)
237                  scan_intrinsic(pipeline, nir, nir_instr_as_intrinsic(instr));
238            }
239         }
240   }
241
242}
243
244static bool
245remove_scoped_barriers_impl(nir_builder *b, nir_instr *instr, void *data)
246{
247   if (instr->type != nir_instr_type_intrinsic)
248      return false;
249   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
250   if (intr->intrinsic != nir_intrinsic_scoped_barrier)
251      return false;
252   if (data) {
253      if (nir_intrinsic_memory_scope(intr) == NIR_SCOPE_WORKGROUP ||
254          nir_intrinsic_memory_scope(intr) == NIR_SCOPE_DEVICE)
255         return false;
256   }
257   nir_instr_remove(instr);
258   return true;
259}
260
261static bool
262remove_scoped_barriers(nir_shader *nir, bool is_compute)
263{
264   return nir_shader_instructions_pass(nir, remove_scoped_barriers_impl, nir_metadata_dominance, (void*)is_compute);
265}
266
267static bool
268lower_demote_impl(nir_builder *b, nir_instr *instr, void *data)
269{
270   if (instr->type != nir_instr_type_intrinsic)
271      return false;
272   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
273   if (intr->intrinsic == nir_intrinsic_demote || intr->intrinsic == nir_intrinsic_terminate) {
274      intr->intrinsic = nir_intrinsic_discard;
275      return true;
276   }
277   if (intr->intrinsic == nir_intrinsic_demote_if || intr->intrinsic == nir_intrinsic_terminate_if) {
278      intr->intrinsic = nir_intrinsic_discard_if;
279      return true;
280   }
281   return false;
282}
283
284static bool
285lower_demote(nir_shader *nir)
286{
287   return nir_shader_instructions_pass(nir, lower_demote_impl, nir_metadata_dominance, NULL);
288}
289
290static bool
291find_tex(const nir_instr *instr, const void *data_cb)
292{
293   if (instr->type == nir_instr_type_tex)
294      return true;
295   return false;
296}
297
298static nir_ssa_def *
299fixup_tex_instr(struct nir_builder *b, nir_instr *instr, void *data_cb)
300{
301   nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
302   unsigned offset = 0;
303
304   int idx = nir_tex_instr_src_index(tex_instr, nir_tex_src_texture_offset);
305   if (idx == -1)
306      return NULL;
307
308   if (!nir_src_is_const(tex_instr->src[idx].src))
309      return NULL;
310   offset = nir_src_comp_as_uint(tex_instr->src[idx].src, 0);
311
312   nir_tex_instr_remove_src(tex_instr, idx);
313   tex_instr->texture_index += offset;
314   return NIR_LOWER_INSTR_PROGRESS;
315}
316
317static bool
318lvp_nir_fixup_indirect_tex(nir_shader *shader)
319{
320   return nir_shader_lower_instructions(shader, find_tex, fixup_tex_instr, NULL);
321}
322
323static void
324optimize(nir_shader *nir)
325{
326   bool progress = false;
327   do {
328      progress = false;
329
330      NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);
331      NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);
332      NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);
333      NIR_PASS(progress, nir, nir_opt_deref);
334      NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
335
336      NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
337
338      NIR_PASS(progress, nir, nir_copy_prop);
339      NIR_PASS(progress, nir, nir_opt_dce);
340      NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
341
342      NIR_PASS(progress, nir, nir_opt_algebraic);
343      NIR_PASS(progress, nir, nir_opt_constant_folding);
344
345      NIR_PASS(progress, nir, nir_opt_remove_phis);
346      bool trivial_continues = false;
347      NIR_PASS(trivial_continues, nir, nir_opt_trivial_continues);
348      progress |= trivial_continues;
349      if (trivial_continues) {
350         /* If nir_opt_trivial_continues makes progress, then we need to clean
351          * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
352          * to make progress.
353          */
354         NIR_PASS(progress, nir, nir_copy_prop);
355         NIR_PASS(progress, nir, nir_opt_dce);
356         NIR_PASS(progress, nir, nir_opt_remove_phis);
357      }
358      NIR_PASS(progress, nir, nir_opt_if, nir_opt_if_aggressive_last_continue | nir_opt_if_optimize_phi_true_false);
359      NIR_PASS(progress, nir, nir_opt_dead_cf);
360      NIR_PASS(progress, nir, nir_opt_conditional_discard);
361      NIR_PASS(progress, nir, nir_opt_remove_phis);
362      NIR_PASS(progress, nir, nir_opt_cse);
363      NIR_PASS(progress, nir, nir_opt_undef);
364
365      NIR_PASS(progress, nir, nir_opt_deref);
366      NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);
367      NIR_PASS(progress, nir, nir_opt_loop_unroll);
368      NIR_PASS(progress, nir, lvp_nir_fixup_indirect_tex);
369   } while (progress);
370}
371
372void
373lvp_shader_optimize(nir_shader *nir)
374{
375   optimize(nir);
376   NIR_PASS_V(nir, nir_lower_var_copies);
377   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
378   NIR_PASS_V(nir, nir_opt_dce);
379   nir_sweep(nir);
380}
381
382static VkResult
383lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,
384                         const VkPipelineShaderStageCreateInfo *sinfo)
385{
386   struct lvp_device *pdevice = pipeline->device;
387   gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
388   const nir_shader_compiler_options *drv_options = pdevice->pscreen->get_compiler_options(pipeline->device->pscreen, PIPE_SHADER_IR_NIR, st_shader_stage_to_ptarget(stage));
389   VkResult result;
390   nir_shader *nir;
391
392   const struct spirv_to_nir_options spirv_options = {
393      .environment = NIR_SPIRV_VULKAN,
394      .caps = {
395         .float64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_DOUBLES) == 1),
396         .int16 = true,
397         .int64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_INT64) == 1),
398         .tessellation = true,
399         .float_controls = true,
400         .image_ms_array = true,
401         .image_read_without_format = true,
402         .image_write_without_format = true,
403         .storage_image_ms = true,
404         .geometry_streams = true,
405         .storage_8bit = true,
406         .storage_16bit = true,
407         .variable_pointers = true,
408         .stencil_export = true,
409         .post_depth_coverage = true,
410         .transform_feedback = true,
411         .device_group = true,
412         .draw_parameters = true,
413         .shader_viewport_index_layer = true,
414         .multiview = true,
415         .physical_storage_buffer_address = true,
416         .int64_atomics = true,
417         .subgroup_arithmetic = true,
418         .subgroup_basic = true,
419         .subgroup_ballot = true,
420         .subgroup_quad = true,
421#if LLVM_VERSION_MAJOR >= 10
422         .subgroup_shuffle = true,
423#endif
424         .subgroup_vote = true,
425         .vk_memory_model = true,
426         .vk_memory_model_device_scope = true,
427         .int8 = true,
428         .float16 = true,
429         .demote_to_helper_invocation = true,
430      },
431      .ubo_addr_format = nir_address_format_32bit_index_offset,
432      .ssbo_addr_format = nir_address_format_32bit_index_offset,
433      .phys_ssbo_addr_format = nir_address_format_64bit_global,
434      .push_const_addr_format = nir_address_format_logical,
435      .shared_addr_format = nir_address_format_32bit_offset,
436   };
437
438   result = vk_pipeline_shader_stage_to_nir(&pdevice->vk, sinfo,
439                                            &spirv_options, drv_options,
440                                            NULL, &nir);
441   if (result != VK_SUCCESS)
442      return result;
443
444   if (nir->info.stage != MESA_SHADER_TESS_CTRL)
445      NIR_PASS_V(nir, remove_scoped_barriers, nir->info.stage == MESA_SHADER_COMPUTE);
446
447   const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
448      .frag_coord = true,
449      .point_coord = true,
450   };
451   NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
452
453   struct nir_lower_subgroups_options subgroup_opts = {0};
454   subgroup_opts.lower_quad = true;
455   subgroup_opts.ballot_components = 1;
456   subgroup_opts.ballot_bit_size = 32;
457   NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_opts);
458
459   if (stage == MESA_SHADER_FRAGMENT)
460      lvp_lower_input_attachments(nir, false);
461   NIR_PASS_V(nir, nir_lower_is_helper_invocation);
462   NIR_PASS_V(nir, lower_demote);
463   NIR_PASS_V(nir, nir_lower_system_values);
464   NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);
465
466   NIR_PASS_V(nir, nir_remove_dead_variables,
467              nir_var_uniform | nir_var_image, NULL);
468
469   scan_pipeline_info(pipeline, nir);
470
471   optimize(nir);
472   lvp_lower_pipeline_layout(pipeline->device, pipeline->layout, nir);
473
474   NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);
475   NIR_PASS_V(nir, nir_split_var_copies);
476   NIR_PASS_V(nir, nir_lower_global_vars_to_local);
477
478   NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
479              nir_address_format_32bit_offset);
480
481   NIR_PASS_V(nir, nir_lower_explicit_io,
482              nir_var_mem_ubo | nir_var_mem_ssbo,
483              nir_address_format_32bit_index_offset);
484
485   NIR_PASS_V(nir, nir_lower_explicit_io,
486              nir_var_mem_global,
487              nir_address_format_64bit_global);
488
489   if (nir->info.stage == MESA_SHADER_COMPUTE) {
490      NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);
491      NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);
492   }
493
494   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
495
496   if (nir->info.stage == MESA_SHADER_VERTEX ||
497       nir->info.stage == MESA_SHADER_GEOMETRY) {
498      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
499   } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
500      NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);
501   }
502
503   // TODO: also optimize the tex srcs. see radeonSI for reference */
504   /* Skip if there are potentially conflicting rounding modes */
505   struct nir_fold_16bit_tex_image_options fold_16bit_options = {
506      .rounding_mode = nir_rounding_mode_undef,
507      .fold_tex_dest = true,
508   };
509   NIR_PASS_V(nir, nir_fold_16bit_tex_image, &fold_16bit_options);
510
511   lvp_shader_optimize(nir);
512
513   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
514
515   if (nir->info.stage != MESA_SHADER_VERTEX)
516      nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
517   else {
518      nir->num_inputs = util_last_bit64(nir->info.inputs_read);
519      nir_foreach_shader_in_variable(var, nir) {
520         var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;
521      }
522   }
523   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,
524                               nir->info.stage);
525
526   nir_function_impl *impl = nir_shader_get_entrypoint(nir);
527   if (impl->ssa_alloc > 100) //skip for small shaders
528      pipeline->inlines[stage].must_inline = lvp_find_inlinable_uniforms(pipeline, nir);
529   pipeline->pipeline_nir[stage] = nir;
530
531   return VK_SUCCESS;
532}
533
534static void
535merge_tess_info(struct shader_info *tes_info,
536                const struct shader_info *tcs_info)
537{
538   /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
539    *
540    *    "PointMode. Controls generation of points rather than triangles
541    *     or lines. This functionality defaults to disabled, and is
542    *     enabled if either shader stage includes the execution mode.
543    *
544    * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
545    * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
546    * and OutputVertices, it says:
547    *
548    *    "One mode must be set in at least one of the tessellation
549    *     shader stages."
550    *
551    * So, the fields can be set in either the TCS or TES, but they must
552    * agree if set in both.  Our backend looks at TES, so bitwise-or in
553    * the values from the TCS.
554    */
555   assert(tcs_info->tess.tcs_vertices_out == 0 ||
556          tes_info->tess.tcs_vertices_out == 0 ||
557          tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
558   tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
559
560   assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
561          tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
562          tcs_info->tess.spacing == tes_info->tess.spacing);
563   tes_info->tess.spacing |= tcs_info->tess.spacing;
564
565   assert(tcs_info->tess._primitive_mode == 0 ||
566          tes_info->tess._primitive_mode == 0 ||
567          tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode);
568   tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode;
569   tes_info->tess.ccw |= tcs_info->tess.ccw;
570   tes_info->tess.point_mode |= tcs_info->tess.point_mode;
571}
572
573static void
574lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline)
575{
576   gl_shader_stage stage = MESA_SHADER_VERTEX;
577   if (pipeline->pipeline_nir[MESA_SHADER_GEOMETRY])
578      stage = MESA_SHADER_GEOMETRY;
579   else if (pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL])
580      stage = MESA_SHADER_TESS_EVAL;
581   pipeline->last_vertex = stage;
582
583   nir_xfb_info *xfb_info = pipeline->pipeline_nir[stage]->xfb_info;
584   if (xfb_info) {
585      uint8_t output_mapping[VARYING_SLOT_TESS_MAX];
586      memset(output_mapping, 0, sizeof(output_mapping));
587
588      nir_foreach_shader_out_variable(var, pipeline->pipeline_nir[stage]) {
589         unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
590                                            : glsl_count_attribute_slots(var->type, false);
591         for (unsigned i = 0; i < slots; i++)
592            output_mapping[var->data.location + i] = var->data.driver_location + i;
593      }
594
595      pipeline->stream_output.num_outputs = xfb_info->output_count;
596      for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
597         if (xfb_info->buffers_written & (1 << i)) {
598            pipeline->stream_output.stride[i] = xfb_info->buffers[i].stride / 4;
599         }
600      }
601      for (unsigned i = 0; i < xfb_info->output_count; i++) {
602         pipeline->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;
603         pipeline->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;
604         pipeline->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];
605         pipeline->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);
606         pipeline->stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1;
607         pipeline->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];
608      }
609
610   }
611}
612
613void *
614lvp_pipeline_compile_stage(struct lvp_pipeline *pipeline, nir_shader *nir)
615{
616   struct lvp_device *device = pipeline->device;
617   if (nir->info.stage == MESA_SHADER_COMPUTE) {
618      struct pipe_compute_state shstate = {0};
619      shstate.prog = nir;
620      shstate.ir_type = PIPE_SHADER_IR_NIR;
621      shstate.req_local_mem = nir->info.shared_size;
622      return device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);
623   } else {
624      struct pipe_shader_state shstate = {0};
625      shstate.type = PIPE_SHADER_IR_NIR;
626      shstate.ir.nir = nir;
627      if (nir->info.stage == pipeline->last_vertex)
628         memcpy(&shstate.stream_output, &pipeline->stream_output, sizeof(shstate.stream_output));
629
630      switch (nir->info.stage) {
631      case MESA_SHADER_FRAGMENT:
632         return device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
633      case MESA_SHADER_VERTEX:
634         return device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);
635      case MESA_SHADER_GEOMETRY:
636         return device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);
637      case MESA_SHADER_TESS_CTRL:
638         return device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);
639      case MESA_SHADER_TESS_EVAL:
640         return device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);
641      default:
642         unreachable("illegal shader");
643         break;
644      }
645   }
646   return NULL;
647}
648
649void *
650lvp_pipeline_compile(struct lvp_pipeline *pipeline, nir_shader *nir)
651{
652   struct lvp_device *device = pipeline->device;
653   device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, nir);
654   return lvp_pipeline_compile_stage(pipeline, nir);
655}
656
657#ifndef NDEBUG
658static bool
659layouts_equal(const struct lvp_descriptor_set_layout *a, const struct lvp_descriptor_set_layout *b)
660{
661   const uint8_t *pa = (const uint8_t*)a, *pb = (const uint8_t*)b;
662   uint32_t hash_start_offset = sizeof(struct vk_descriptor_set_layout);
663   uint32_t binding_offset = offsetof(struct lvp_descriptor_set_layout, binding);
664   /* base equal */
665   if (memcmp(pa + hash_start_offset, pb + hash_start_offset, binding_offset - hash_start_offset))
666      return false;
667
668   /* bindings equal */
669   if (a->binding_count != b->binding_count)
670      return false;
671   size_t binding_size = a->binding_count * sizeof(struct lvp_descriptor_set_binding_layout);
672   const struct lvp_descriptor_set_binding_layout *la = a->binding;
673   const struct lvp_descriptor_set_binding_layout *lb = b->binding;
674   if (memcmp(la, lb, binding_size)) {
675      for (unsigned i = 0; i < a->binding_count; i++) {
676         if (memcmp(&la[i], &lb[i], offsetof(struct lvp_descriptor_set_binding_layout, immutable_samplers)))
677            return false;
678      }
679   }
680
681   /* immutable sampler equal */
682   if (a->immutable_sampler_count != b->immutable_sampler_count)
683      return false;
684   if (a->immutable_sampler_count) {
685      size_t sampler_size = a->immutable_sampler_count * sizeof(struct lvp_sampler *);
686      if (memcmp(pa + binding_offset + binding_size, pb + binding_offset + binding_size, sampler_size)) {
687         struct lvp_sampler **sa = (struct lvp_sampler **)(pa + binding_offset);
688         struct lvp_sampler **sb = (struct lvp_sampler **)(pb + binding_offset);
689         for (unsigned i = 0; i < a->immutable_sampler_count; i++) {
690            if (memcmp(sa[i], sb[i], sizeof(struct lvp_sampler)))
691               return false;
692         }
693      }
694   }
695   return true;
696}
697#endif
698
699static void
700merge_layouts(struct lvp_pipeline *dst, struct lvp_pipeline_layout *src)
701{
702   if (!src)
703      return;
704   if (!dst->layout) {
705      /* no layout created yet: copy onto ralloc ctx allocation for auto-free */
706      dst->layout = ralloc(dst->mem_ctx, struct lvp_pipeline_layout);
707      memcpy(dst->layout, src, sizeof(struct lvp_pipeline_layout));
708      return;
709   }
710#ifndef NDEBUG
711   /* verify that layouts match */
712   const struct lvp_pipeline_layout *smaller = dst->layout->vk.set_count < src->vk.set_count ? dst->layout : src;
713   const struct lvp_pipeline_layout *bigger = smaller == dst->layout ? src : dst->layout;
714   for (unsigned i = 0; i < smaller->vk.set_count; i++) {
715      if (!smaller->vk.set_layouts[i] || !bigger->vk.set_layouts[i] ||
716          smaller->vk.set_layouts[i] == bigger->vk.set_layouts[i])
717         continue;
718
719      const struct lvp_descriptor_set_layout *smaller_set_layout =
720         vk_to_lvp_descriptor_set_layout(smaller->vk.set_layouts[i]);
721      const struct lvp_descriptor_set_layout *bigger_set_layout =
722         vk_to_lvp_descriptor_set_layout(bigger->vk.set_layouts[i]);
723
724      assert(!smaller_set_layout->binding_count ||
725             !bigger_set_layout->binding_count ||
726             layouts_equal(smaller_set_layout, bigger_set_layout));
727   }
728#endif
729   for (unsigned i = 0; i < src->vk.set_count; i++) {
730      if (!dst->layout->vk.set_layouts[i])
731         dst->layout->vk.set_layouts[i] = src->vk.set_layouts[i];
732   }
733   dst->layout->vk.set_count = MAX2(dst->layout->vk.set_count,
734                                    src->vk.set_count);
735   dst->layout->push_constant_size += src->push_constant_size;
736   dst->layout->push_constant_stages |= src->push_constant_stages;
737}
738
739static VkResult
740lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,
741                           struct lvp_device *device,
742                           struct lvp_pipeline_cache *cache,
743                           const VkGraphicsPipelineCreateInfo *pCreateInfo)
744{
745   VkResult result;
746
747   const VkGraphicsPipelineLibraryCreateInfoEXT *libinfo = vk_find_struct_const(pCreateInfo,
748                                                                                GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT);
749   const VkPipelineLibraryCreateInfoKHR *libstate = vk_find_struct_const(pCreateInfo,
750                                                                         PIPELINE_LIBRARY_CREATE_INFO_KHR);
751   const VkGraphicsPipelineLibraryFlagsEXT layout_stages = VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
752                                                           VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT;
753   if (libinfo)
754      pipeline->stages = libinfo->flags;
755   else if (!libstate)
756      pipeline->stages = VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
757                         VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
758                         VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
759                         VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT;
760   pipeline->mem_ctx = ralloc_context(NULL);
761
762   if (pCreateInfo->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)
763      pipeline->library = true;
764
765   struct lvp_pipeline_layout *layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
766   if (layout)
767      vk_pipeline_layout_ref(&layout->vk);
768
769   if (!layout || !(layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
770      /* this is a regular pipeline with no partials: directly reuse */
771      pipeline->layout = layout;
772   else if (pipeline->stages & layout_stages) {
773      if ((pipeline->stages & layout_stages) == layout_stages)
774         /* this has all the layout stages: directly reuse */
775         pipeline->layout = layout;
776      else {
777         /* this is a partial: copy for later merging to avoid modifying another layout */
778         merge_layouts(pipeline, layout);
779      }
780   }
781
782   if (libstate) {
783      for (unsigned i = 0; i < libstate->libraryCount; i++) {
784         LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
785         vk_graphics_pipeline_state_merge(&pipeline->graphics_state,
786                                          &p->graphics_state);
787         if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
788            pipeline->line_smooth = p->line_smooth;
789            pipeline->disable_multisample = p->disable_multisample;
790            pipeline->line_rectangular = p->line_rectangular;
791            pipeline->last_vertex = p->last_vertex;
792            memcpy(&pipeline->stream_output, &p->stream_output, sizeof(p->stream_output));
793            memcpy(&pipeline->access, &p->access, sizeof(p->access));
794         }
795         if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT)
796            pipeline->force_min_sample = p->force_min_sample;
797         if (p->stages & layout_stages) {
798            if (!layout || (layout->vk.create_flags & VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT))
799               merge_layouts(pipeline, p->layout);
800         }
801         pipeline->stages |= p->stages;
802      }
803   }
804
805   result = vk_graphics_pipeline_state_fill(&device->vk,
806                                            &pipeline->graphics_state,
807                                            pCreateInfo, NULL, NULL, NULL,
808                                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT,
809                                            &pipeline->state_data);
810   if (result != VK_SUCCESS)
811      return result;
812
813   assert(pipeline->library || pipeline->stages == (VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT |
814                                                    VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT |
815                                                    VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT |
816                                                    VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT));
817
818   pipeline->device = device;
819
820   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
821      const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];
822      gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
823      if (stage == MESA_SHADER_FRAGMENT) {
824         if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT))
825            continue;
826      } else {
827         if (!(pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT))
828            continue;
829      }
830      result = lvp_shader_compile_to_ir(pipeline, sinfo);
831      if (result != VK_SUCCESS)
832         goto fail;
833
834      switch (stage) {
835      case MESA_SHADER_GEOMETRY:
836         pipeline->gs_output_lines = pipeline->pipeline_nir[MESA_SHADER_GEOMETRY] &&
837                                     pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]->info.gs.output_primitive == SHADER_PRIM_LINES;
838         break;
839      case MESA_SHADER_FRAGMENT:
840         if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_shading)
841            pipeline->force_min_sample = true;
842         break;
843      default: break;
844      }
845   }
846   if (pCreateInfo->stageCount && pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]) {
847      nir_lower_patch_vertices(pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL], pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);
848      merge_tess_info(&pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info, &pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info);
849      if (pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)
850         pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw;
851   }
852   if (libstate) {
853       for (unsigned i = 0; i < libstate->libraryCount; i++) {
854          LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]);
855          if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) {
856             if (p->pipeline_nir[MESA_SHADER_FRAGMENT])
857                pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = nir_shader_clone(pipeline->mem_ctx, p->pipeline_nir[MESA_SHADER_FRAGMENT]);
858          }
859          if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
860             for (unsigned j = MESA_SHADER_VERTEX; j < MESA_SHADER_FRAGMENT; j++) {
861                if (p->pipeline_nir[j])
862                   pipeline->pipeline_nir[j] = nir_shader_clone(pipeline->mem_ctx, p->pipeline_nir[j]);
863             }
864          }
865       }
866   } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) {
867      const struct vk_rasterization_state *rs = pipeline->graphics_state.rs;
868      if (rs) {
869         /* always draw bresenham if !smooth */
870         pipeline->line_smooth = rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
871         pipeline->disable_multisample = rs->line.mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT ||
872                                         rs->line.mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;
873         pipeline->line_rectangular = rs->line.mode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
874      } else
875         pipeline->line_rectangular = true;
876      lvp_pipeline_xfb_init(pipeline);
877   }
878
879   if (!pipeline->library) {
880      bool has_fragment_shader = false;
881      for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) {
882         if (!pipeline->pipeline_nir[i])
883            continue;
884
885         gl_shader_stage stage = i;
886         assert(stage == pipeline->pipeline_nir[i]->info.stage);
887         enum pipe_shader_type pstage = pipe_shader_type_from_mesa(stage);
888         if (!pipeline->inlines[stage].can_inline)
889            pipeline->shader_cso[pstage] = lvp_pipeline_compile(pipeline,
890                                                                nir_shader_clone(NULL, pipeline->pipeline_nir[stage]));
891         if (stage == MESA_SHADER_FRAGMENT)
892            has_fragment_shader = true;
893      }
894
895      if (has_fragment_shader == false) {
896         /* create a dummy fragment shader for this pipeline. */
897         nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
898                                                        "dummy_frag");
899
900         pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader;
901         struct pipe_shader_state shstate = {0};
902         shstate.type = PIPE_SHADER_IR_NIR;
903         shstate.ir.nir = nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]);
904         pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);
905      }
906   }
907   return VK_SUCCESS;
908
909fail:
910   for (unsigned i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) {
911      if (pipeline->pipeline_nir[i])
912         ralloc_free(pipeline->pipeline_nir[i]);
913   }
914   vk_free(&device->vk.alloc, pipeline->state_data);
915
916   return result;
917}
918
919static VkResult
920lvp_graphics_pipeline_create(
921   VkDevice _device,
922   VkPipelineCache _cache,
923   const VkGraphicsPipelineCreateInfo *pCreateInfo,
924   VkPipeline *pPipeline)
925{
926   LVP_FROM_HANDLE(lvp_device, device, _device);
927   LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
928   struct lvp_pipeline *pipeline;
929   VkResult result;
930
931   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
932
933   pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
934                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
935   if (pipeline == NULL)
936      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
937
938   vk_object_base_init(&device->vk, &pipeline->base,
939                       VK_OBJECT_TYPE_PIPELINE);
940   uint64_t t0 = os_time_get_nano();
941   result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo);
942   if (result != VK_SUCCESS) {
943      vk_free(&device->vk.alloc, pipeline);
944      return result;
945   }
946
947   VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
948   if (feedback) {
949      feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
950      feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
951      memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
952   }
953
954   *pPipeline = lvp_pipeline_to_handle(pipeline);
955
956   return VK_SUCCESS;
957}
958
959VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(
960   VkDevice                                    _device,
961   VkPipelineCache                             pipelineCache,
962   uint32_t                                    count,
963   const VkGraphicsPipelineCreateInfo*         pCreateInfos,
964   const VkAllocationCallbacks*                pAllocator,
965   VkPipeline*                                 pPipelines)
966{
967   VkResult result = VK_SUCCESS;
968   unsigned i = 0;
969
970   for (; i < count; i++) {
971      VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
972      if (!(pCreateInfos[i].flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
973         r = lvp_graphics_pipeline_create(_device,
974                                          pipelineCache,
975                                          &pCreateInfos[i],
976                                          &pPipelines[i]);
977      if (r != VK_SUCCESS) {
978         result = r;
979         pPipelines[i] = VK_NULL_HANDLE;
980         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
981            break;
982      }
983   }
984   if (result != VK_SUCCESS) {
985      for (; i < count; i++)
986         pPipelines[i] = VK_NULL_HANDLE;
987   }
988
989   return result;
990}
991
992static VkResult
993lvp_compute_pipeline_init(struct lvp_pipeline *pipeline,
994                          struct lvp_device *device,
995                          struct lvp_pipeline_cache *cache,
996                          const VkComputePipelineCreateInfo *pCreateInfo)
997{
998   pipeline->device = device;
999   pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);
1000   vk_pipeline_layout_ref(&pipeline->layout->vk);
1001   pipeline->force_min_sample = false;
1002
1003   pipeline->mem_ctx = ralloc_context(NULL);
1004   pipeline->is_compute_pipeline = true;
1005
1006   VkResult result = lvp_shader_compile_to_ir(pipeline, &pCreateInfo->stage);
1007   if (result != VK_SUCCESS)
1008      return result;
1009
1010   if (!pipeline->inlines[MESA_SHADER_COMPUTE].can_inline)
1011      pipeline->shader_cso[PIPE_SHADER_COMPUTE] = lvp_pipeline_compile(pipeline, nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_COMPUTE]));
1012   return VK_SUCCESS;
1013}
1014
1015static VkResult
1016lvp_compute_pipeline_create(
1017   VkDevice _device,
1018   VkPipelineCache _cache,
1019   const VkComputePipelineCreateInfo *pCreateInfo,
1020   VkPipeline *pPipeline)
1021{
1022   LVP_FROM_HANDLE(lvp_device, device, _device);
1023   LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);
1024   struct lvp_pipeline *pipeline;
1025   VkResult result;
1026
1027   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);
1028
1029   pipeline = vk_zalloc(&device->vk.alloc, sizeof(*pipeline), 8,
1030                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1031   if (pipeline == NULL)
1032      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1033
1034   vk_object_base_init(&device->vk, &pipeline->base,
1035                       VK_OBJECT_TYPE_PIPELINE);
1036   uint64_t t0 = os_time_get_nano();
1037   result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo);
1038   if (result != VK_SUCCESS) {
1039      vk_free(&device->vk.alloc, pipeline);
1040      return result;
1041   }
1042
1043   const VkPipelineCreationFeedbackCreateInfo *feedback = (void*)vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO);
1044   if (feedback) {
1045      feedback->pPipelineCreationFeedback->duration = os_time_get_nano() - t0;
1046      feedback->pPipelineCreationFeedback->flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT;
1047      memset(feedback->pPipelineStageCreationFeedbacks, 0, sizeof(VkPipelineCreationFeedback) * feedback->pipelineStageCreationFeedbackCount);
1048   }
1049
1050   *pPipeline = lvp_pipeline_to_handle(pipeline);
1051
1052   return VK_SUCCESS;
1053}
1054
1055VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines(
1056   VkDevice                                    _device,
1057   VkPipelineCache                             pipelineCache,
1058   uint32_t                                    count,
1059   const VkComputePipelineCreateInfo*          pCreateInfos,
1060   const VkAllocationCallbacks*                pAllocator,
1061   VkPipeline*                                 pPipelines)
1062{
1063   VkResult result = VK_SUCCESS;
1064   unsigned i = 0;
1065
1066   for (; i < count; i++) {
1067      VkResult r = VK_PIPELINE_COMPILE_REQUIRED;
1068      if (!(pCreateInfos[i].flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT))
1069         r = lvp_compute_pipeline_create(_device,
1070                                         pipelineCache,
1071                                         &pCreateInfos[i],
1072                                         &pPipelines[i]);
1073      if (r != VK_SUCCESS) {
1074         result = r;
1075         pPipelines[i] = VK_NULL_HANDLE;
1076         if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
1077            break;
1078      }
1079   }
1080   if (result != VK_SUCCESS) {
1081      for (; i < count; i++)
1082         pPipelines[i] = VK_NULL_HANDLE;
1083   }
1084
1085
1086   return result;
1087}
1088