1/*
2 * Copyright © 2020 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24#include "helpers.h"
25#include "vulkan/vk_format.h"
26#include "common/amd_family.h"
27#include <stdio.h>
28#include <sstream>
29#include <llvm-c/Target.h>
30#include <mutex>
31
32using namespace aco;
33
34extern "C" {
35PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
36	VkInstance                                  instance,
37	const char*                                 pName);
38}
39
40ac_shader_config config;
41aco_shader_info info;
42std::unique_ptr<Program> program;
43Builder bld(NULL);
44Temp inputs[16];
45
46static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
47static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
48static std::mutex create_device_mutex;
49
50#define FUNCTION_LIST\
51   ITEM(CreateInstance)\
52   ITEM(DestroyInstance)\
53   ITEM(EnumeratePhysicalDevices)\
54   ITEM(GetPhysicalDeviceProperties2)\
55   ITEM(CreateDevice)\
56   ITEM(DestroyDevice)\
57   ITEM(CreateShaderModule)\
58   ITEM(DestroyShaderModule)\
59   ITEM(CreateGraphicsPipelines)\
60   ITEM(CreateComputePipelines)\
61   ITEM(DestroyPipeline)\
62   ITEM(CreateDescriptorSetLayout)\
63   ITEM(DestroyDescriptorSetLayout)\
64   ITEM(CreatePipelineLayout)\
65   ITEM(DestroyPipelineLayout)\
66   ITEM(CreateRenderPass)\
67   ITEM(DestroyRenderPass)\
68   ITEM(GetPipelineExecutablePropertiesKHR)\
69   ITEM(GetPipelineExecutableInternalRepresentationsKHR)
70
71#define ITEM(n) PFN_vk##n n;
72FUNCTION_LIST
73#undef ITEM
74
75void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family)
76{
77   memset(&config, 0, sizeof(config));
78   info.wave_size = wave_size;
79
80   program.reset(new Program);
81   aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config);
82   program->workgroup_size = UINT_MAX;
83   calc_min_waves(program.get());
84
85   program->debug.func = nullptr;
86   program->debug.private_data = nullptr;
87
88   program->debug.output = output;
89   program->debug.shorten_messages = true;
90   program->debug.func = nullptr;
91   program->debug.private_data = nullptr;
92
93   Block *block = program->create_and_insert_block();
94   block->kind = block_kind_top_level;
95
96   bld = Builder(program.get(), &program->blocks[0]);
97
98   config.float_mode = program->blocks[0].fp_mode.val;
99}
100
101bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
102              enum radeon_family family, const char* subvariant,
103              unsigned wave_size)
104{
105   if (!set_variant(gfx_level, subvariant))
106      return false;
107
108   memset(&info, 0, sizeof(info));
109   create_program(gfx_level, compute_cs, wave_size, family);
110
111   if (input_spec) {
112      std::vector<RegClass> input_classes;
113      while (input_spec[0]) {
114         RegType type = input_spec[0] == 'v' ? RegType::vgpr : RegType::sgpr;
115         unsigned size = input_spec[1] - '0';
116         bool in_bytes = input_spec[2] == 'b';
117         input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
118
119         input_spec += 2 + in_bytes;
120         while (input_spec[0] == ' ') input_spec++;
121      }
122
123      aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>(
124         aco_opcode::p_startpgm, Format::PSEUDO, 0, input_classes.size())};
125      for (unsigned i = 0; i < input_classes.size(); i++) {
126         inputs[i] = bld.tmp(input_classes[i]);
127         startpgm->definitions[i] = Definition(inputs[i]);
128      }
129      bld.insert(std::move(startpgm));
130   }
131
132   return true;
133}
134
135void finish_program(Program *prog)
136{
137   for (Block& BB : prog->blocks) {
138      for (unsigned idx : BB.linear_preds)
139         prog->blocks[idx].linear_succs.emplace_back(BB.index);
140      for (unsigned idx : BB.logical_preds)
141         prog->blocks[idx].logical_succs.emplace_back(BB.index);
142   }
143
144   for (Block& block : prog->blocks) {
145      if (block.linear_succs.size() == 0) {
146         block.kind |= block_kind_uniform;
147         Builder(prog, &block).sopp(aco_opcode::s_endpgm);
148      }
149   }
150}
151
152void finish_validator_test()
153{
154   finish_program(program.get());
155   aco_print_program(program.get(), output);
156   fprintf(output, "Validation results:\n");
157   if (aco::validate_ir(program.get()))
158      fprintf(output, "Validation passed\n");
159   else
160      fprintf(output, "Validation failed\n");
161}
162
163void finish_opt_test()
164{
165   finish_program(program.get());
166   if (!aco::validate_ir(program.get())) {
167      fail_test("Validation before optimization failed");
168      return;
169   }
170   aco::optimize(program.get());
171   if (!aco::validate_ir(program.get())) {
172      fail_test("Validation after optimization failed");
173      return;
174   }
175   aco_print_program(program.get(), output);
176}
177
178void finish_ra_test(ra_test_policy policy, bool lower)
179{
180   finish_program(program.get());
181   if (!aco::validate_ir(program.get())) {
182      fail_test("Validation before register allocation failed");
183      return;
184   }
185
186   program->workgroup_size = program->wave_size;
187   aco::live live_vars = aco::live_var_analysis(program.get());
188   aco::register_allocation(program.get(), live_vars.live_out, policy);
189
190   if (aco::validate_ra(program.get())) {
191      fail_test("Validation after register allocation failed");
192      return;
193   }
194
195   if (lower) {
196      aco::ssa_elimination(program.get());
197      aco::lower_to_hw_instr(program.get());
198   }
199
200   aco_print_program(program.get(), output);
201}
202
203void finish_optimizer_postRA_test()
204{
205   finish_program(program.get());
206   aco::optimize_postRA(program.get());
207   aco_print_program(program.get(), output);
208}
209
210void finish_to_hw_instr_test()
211{
212   finish_program(program.get());
213   aco::lower_to_hw_instr(program.get());
214   aco_print_program(program.get(), output);
215}
216
217void finish_insert_nops_test()
218{
219   finish_program(program.get());
220   aco::insert_NOPs(program.get());
221   aco_print_program(program.get(), output);
222}
223
224void finish_form_hard_clause_test()
225{
226   finish_program(program.get());
227   aco::form_hard_clauses(program.get());
228   aco_print_program(program.get(), output);
229}
230
231void finish_assembler_test()
232{
233   finish_program(program.get());
234   std::vector<uint32_t> binary;
235   unsigned exec_size = emit_program(program.get(), binary);
236
237   /* we could use CLRX for disassembly but that would require it to be
238    * installed */
239   if (program->gfx_level >= GFX8) {
240      print_asm(program.get(), binary, exec_size / 4u, output);
241   } else {
242      //TODO: maybe we should use CLRX and skip this test if it's not available?
243      for (uint32_t dword : binary)
244         fprintf(output, "%.8x\n", dword);
245   }
246}
247
248void writeout(unsigned i, Temp tmp)
249{
250   if (tmp.id())
251      bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
252   else
253      bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
254}
255
256void writeout(unsigned i, aco::Builder::Result res)
257{
258   bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
259}
260
261void writeout(unsigned i, Operand op)
262{
263   bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
264}
265
266void writeout(unsigned i, Operand op0, Operand op1)
267{
268   bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
269}
270
271Temp fneg(Temp src, Builder b)
272{
273   if (src.bytes() == 2)
274      return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
275   else
276      return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
277}
278
279Temp fabs(Temp src, Builder b)
280{
281   if (src.bytes() == 2) {
282      Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
283      res.instr->vop3().abs[1] = true;
284      return res;
285   } else {
286      Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
287      res.instr->vop3().abs[1] = true;
288      return res;
289   }
290}
291
292Temp f2f32(Temp src, Builder b)
293{
294   return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
295}
296
297Temp f2f16(Temp src, Builder b)
298{
299   return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
300}
301
302Temp u2u16(Temp src, Builder b)
303{
304   return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
305}
306
307Temp fadd(Temp src0, Temp src1, Builder b)
308{
309   if (src0.bytes() == 2)
310      return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
311   else
312      return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
313}
314
315Temp fmul(Temp src0, Temp src1, Builder b)
316{
317   if (src0.bytes() == 2)
318      return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
319   else
320      return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
321}
322
323Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
324{
325   if (src0.bytes() == 2)
326      return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
327   else
328      return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
329}
330
331Temp fsat(Temp src, Builder b)
332{
333   if (src.bytes() == 2)
334      return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u),
335                    Operand::c16(0x3c00u), src);
336   else
337      return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(),
338                    Operand::c32(0x3f800000u), src);
339}
340
341Temp ext_ushort(Temp src, unsigned idx, Builder b)
342{
343   return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
344                   Operand::c32(16u), Operand::c32(false));
345}
346
347Temp ext_ubyte(Temp src, unsigned idx, Builder b)
348{
349   return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
350                   Operand::c32(8u), Operand::c32(false));
351}
352
353VkDevice get_vk_device(enum amd_gfx_level gfx_level)
354{
355   enum radeon_family family;
356   switch (gfx_level) {
357   case GFX6:
358      family = CHIP_TAHITI;
359      break;
360   case GFX7:
361      family = CHIP_BONAIRE;
362      break;
363   case GFX8:
364      family = CHIP_POLARIS10;
365      break;
366   case GFX9:
367      family = CHIP_VEGA10;
368      break;
369   case GFX10:
370      family = CHIP_NAVI10;
371      break;
372   case GFX10_3:
373      family = CHIP_NAVI21;
374      break;
375   case GFX11:
376      family = CHIP_GFX1100;
377      break;
378   default:
379      family = CHIP_UNKNOWN;
380      break;
381   }
382   return get_vk_device(family);
383}
384
385VkDevice get_vk_device(enum radeon_family family)
386{
387   assert(family != CHIP_UNKNOWN);
388
389   std::lock_guard<std::mutex> guard(create_device_mutex);
390
391   if (device_cache[family])
392      return device_cache[family];
393
394   setenv("RADV_FORCE_FAMILY", ac_get_family_name(family), 1);
395
396   VkApplicationInfo app_info = {};
397   app_info.pApplicationName = "aco_tests";
398   app_info.apiVersion = VK_API_VERSION_1_2;
399   VkInstanceCreateInfo instance_create_info = {};
400   instance_create_info.pApplicationInfo = &app_info;
401   instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
402   ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
403   assert(result == VK_SUCCESS);
404
405   #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
406   FUNCTION_LIST
407   #undef ITEM
408
409   uint32_t device_count = 1;
410   VkPhysicalDevice device = VK_NULL_HANDLE;
411   result = EnumeratePhysicalDevices(instance_cache[family], &device_count, &device);
412   assert(result == VK_SUCCESS);
413   assert(device != VK_NULL_HANDLE);
414
415   VkDeviceCreateInfo device_create_info = {};
416   device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
417   static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"};
418   device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
419   device_create_info.ppEnabledExtensionNames = extensions;
420   result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
421
422   return device_cache[family];
423}
424
425static struct DestroyDevices {
426   ~DestroyDevices() {
427      for (unsigned i = 0; i < CHIP_LAST; i++) {
428         if (!device_cache[i])
429            continue;
430         DestroyDevice(device_cache[i], NULL);
431         DestroyInstance(instance_cache[i], NULL);
432      }
433   }
434} destroy_devices;
435
436void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
437                       const char *name, bool remove_encoding)
438{
439   uint32_t executable_count = 16;
440   VkPipelineExecutablePropertiesKHR executables[16];
441   VkPipelineInfoKHR pipeline_info;
442   pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
443   pipeline_info.pNext = NULL;
444   pipeline_info.pipeline = pipeline;
445   ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
446   assert(result == VK_SUCCESS);
447
448   uint32_t executable = 0;
449   for (; executable < executable_count; executable++) {
450      if (executables[executable].stages == stages)
451         break;
452   }
453   assert(executable != executable_count);
454
455   VkPipelineExecutableInfoKHR exec_info;
456   exec_info.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR;
457   exec_info.pNext = NULL;
458   exec_info.pipeline = pipeline;
459   exec_info.executableIndex = executable;
460
461   uint32_t ir_count = 16;
462   VkPipelineExecutableInternalRepresentationKHR ir[16];
463   memset(ir, 0, sizeof(ir));
464   result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
465   assert(result == VK_SUCCESS);
466
467   VkPipelineExecutableInternalRepresentationKHR* requested_ir = nullptr;
468   for (unsigned i = 0; i < ir_count; ++i) {
469      if (strcmp(ir[i].name, name) == 0) {
470         requested_ir = &ir[i];
471         break;
472      }
473   }
474   assert(requested_ir && "Could not find requested IR");
475
476   char *data = (char*)malloc(requested_ir->dataSize);
477   requested_ir->pData = data;
478   result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
479   assert(result == VK_SUCCESS);
480
481   if (remove_encoding) {
482      for (char *c = data; *c; c++) {
483         if (*c == ';') {
484            for (; *c && *c != '\n'; c++)
485               *c = ' ';
486         }
487      }
488   }
489
490   fprintf(output, "%s", data);
491   free(data);
492}
493
494VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info)
495{
496    VkShaderModuleCreateInfo vk_module_info;
497    vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
498    vk_module_info.pNext = NULL;
499    vk_module_info.flags = 0;
500    vk_module_info.codeSize = module_info->spirvSize;
501    vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
502
503    VkShaderModule module;
504    ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
505    assert(result == VK_SUCCESS);
506
507    return module;
508}
509
510PipelineBuilder::PipelineBuilder(VkDevice dev) {
511   memset(this, 0, sizeof(*this));
512   topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
513   device = dev;
514}
515
516PipelineBuilder::~PipelineBuilder()
517{
518   DestroyPipeline(device, pipeline, NULL);
519
520   for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
521      VkPipelineShaderStageCreateInfo *stage_info = &stages[i];
522      if (owned_stages & stage_info->stage)
523         DestroyShaderModule(device, stage_info->module, NULL);
524   }
525
526   DestroyPipelineLayout(device, pipeline_layout, NULL);
527
528   for (unsigned i = 0; i < util_bitcount64(desc_layouts_used); i++)
529      DestroyDescriptorSetLayout(device, desc_layouts[i], NULL);
530
531   DestroyRenderPass(device, render_pass, NULL);
532}
533
534void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
535                                       uint32_t binding, VkDescriptorType type, uint32_t count)
536{
537   desc_layouts_used |= 1ull << layout;
538   desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
539}
540
541void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
542{
543   vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
544}
545
546void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
547{
548   vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
549}
550
551void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module)
552{
553   for (unsigned i = 0; i < module->declarationCount; i++) {
554      const QoShaderDecl *decl = &module->pDeclarations[i];
555      switch (decl->decl_type) {
556      case QoShaderDeclType_ubo:
557         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
558         break;
559      case QoShaderDeclType_ssbo:
560         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
561         break;
562      case QoShaderDeclType_img_buf:
563         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
564         break;
565      case QoShaderDeclType_img:
566         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
567         break;
568      case QoShaderDeclType_tex_buf:
569         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
570         break;
571      case QoShaderDeclType_combined:
572         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
573         break;
574      case QoShaderDeclType_tex:
575         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
576         break;
577      case QoShaderDeclType_samp:
578         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
579         break;
580      default:
581         break;
582      }
583   }
584}
585
586void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
587{
588   unsigned next_vtx_offset = 0;
589   for (unsigned i = 0; i < module->declarationCount; i++) {
590      const QoShaderDecl *decl = &module->pDeclarations[i];
591      switch (decl->decl_type) {
592      case QoShaderDeclType_in:
593         if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
594            if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
595               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset);
596            else if (decl->type[0] == 'u')
597               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset);
598            else if (decl->type[0] == 'i')
599               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset);
600            next_vtx_offset += 16;
601         }
602         break;
603      case QoShaderDeclType_out:
604         if (module->stage == VK_SHADER_STAGE_FRAGMENT_BIT) {
605            if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
606               color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SFLOAT;
607            else if (decl->type[0] == 'u')
608               color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_UINT;
609            else if (decl->type[0] == 'i')
610               color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
611         }
612         break;
613      default:
614         break;
615      }
616   }
617   if (next_vtx_offset)
618      add_vertex_binding(0, next_vtx_offset);
619}
620
621void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name)
622{
623   VkPipelineShaderStageCreateInfo *stage_info;
624   if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
625      stage_info = &stages[0];
626   else
627      stage_info = &stages[gfx_pipeline_info.stageCount++];
628   stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
629   stage_info->pNext = NULL;
630   stage_info->flags = 0;
631   stage_info->stage = stage;
632   stage_info->module = module;
633   stage_info->pName = name;
634   stage_info->pSpecializationInfo = NULL;
635   owned_stages |= stage;
636}
637
638void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name)
639{
640   add_stage(stage, __qoCreateShaderModule(device, &module), name);
641   add_resource_decls(&module);
642   add_io_decls(&module);
643}
644
645void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
646{
647   add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
648   add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
649}
650
651void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
652{
653   add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
654   add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
655}
656
657void PipelineBuilder::add_cs(VkShaderModule cs)
658{
659   add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
660}
661
662void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
663{
664   add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
665}
666
667bool PipelineBuilder::is_compute() {
668   return gfx_pipeline_info.stageCount == 0;
669}
670
671void PipelineBuilder::create_compute_pipeline() {
672   VkComputePipelineCreateInfo create_info;
673   create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
674   create_info.pNext = NULL;
675   create_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
676   create_info.stage = stages[0];
677   create_info.layout = pipeline_layout;
678   create_info.basePipelineHandle = VK_NULL_HANDLE;
679   create_info.basePipelineIndex = 0;
680
681   ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
682   assert(result == VK_SUCCESS);
683}
684
685void PipelineBuilder::create_graphics_pipeline() {
686   /* create the create infos */
687   if (!samples)
688      samples = VK_SAMPLE_COUNT_1_BIT;
689
690   unsigned num_color_attachments = 0;
691   VkPipelineColorBlendAttachmentState blend_attachment_states[16];
692   VkAttachmentReference color_attachments[16];
693   VkAttachmentDescription attachment_descs[17];
694   for (unsigned i = 0; i < 16; i++) {
695      if (color_outputs[i] == VK_FORMAT_UNDEFINED)
696         continue;
697
698      VkAttachmentDescription *desc = &attachment_descs[num_color_attachments];
699      desc->flags = 0;
700      desc->format = color_outputs[i];
701      desc->samples = samples;
702      desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
703      desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
704      desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
705      desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
706      desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
707      desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
708
709      VkAttachmentReference *ref = &color_attachments[num_color_attachments];
710      ref->attachment = num_color_attachments;
711      ref->layout = VK_IMAGE_LAYOUT_GENERAL;
712
713      VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments];
714      blend->blendEnable = false;
715      blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
716                              VK_COLOR_COMPONENT_G_BIT |
717                              VK_COLOR_COMPONENT_B_BIT |
718                              VK_COLOR_COMPONENT_A_BIT;
719
720      num_color_attachments++;
721   }
722
723   unsigned num_attachments = num_color_attachments;
724   VkAttachmentReference ds_attachment;
725   if (ds_output != VK_FORMAT_UNDEFINED) {
726      VkAttachmentDescription *desc = &attachment_descs[num_attachments];
727      desc->flags = 0;
728      desc->format = ds_output;
729      desc->samples = samples;
730      desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
731      desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
732      desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
733      desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
734      desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
735      desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
736
737      ds_attachment.attachment = num_color_attachments;
738      ds_attachment.layout = VK_IMAGE_LAYOUT_GENERAL;
739
740      num_attachments++;
741   }
742
743   vs_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
744   vs_input.pNext = NULL;
745   vs_input.flags = 0;
746   vs_input.pVertexBindingDescriptions = vs_bindings;
747   vs_input.pVertexAttributeDescriptions = vs_attributes;
748
749   VkPipelineInputAssemblyStateCreateInfo assembly_state;
750   assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
751   assembly_state.pNext = NULL;
752   assembly_state.flags = 0;
753   assembly_state.topology = topology;
754   assembly_state.primitiveRestartEnable = false;
755
756   VkPipelineTessellationStateCreateInfo tess_state;
757   tess_state.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
758   tess_state.pNext = NULL;
759   tess_state.flags = 0;
760   tess_state.patchControlPoints = patch_size;
761
762   VkPipelineViewportStateCreateInfo viewport_state;
763   viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
764   viewport_state.pNext = NULL;
765   viewport_state.flags = 0;
766   viewport_state.viewportCount = 1;
767   viewport_state.pViewports = NULL;
768   viewport_state.scissorCount = 1;
769   viewport_state.pScissors = NULL;
770
771   VkPipelineRasterizationStateCreateInfo rasterization_state;
772   rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
773   rasterization_state.pNext = NULL;
774   rasterization_state.flags = 0;
775   rasterization_state.depthClampEnable = false;
776   rasterization_state.rasterizerDiscardEnable = false;
777   rasterization_state.polygonMode = VK_POLYGON_MODE_FILL;
778   rasterization_state.cullMode = VK_CULL_MODE_NONE;
779   rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
780   rasterization_state.depthBiasEnable = false;
781   rasterization_state.lineWidth = 1.0;
782
783   VkPipelineMultisampleStateCreateInfo ms_state;
784   ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
785   ms_state.pNext = NULL;
786   ms_state.flags = 0;
787   ms_state.rasterizationSamples = samples;
788   ms_state.sampleShadingEnable = sample_shading_enable;
789   ms_state.minSampleShading = min_sample_shading;
790   VkSampleMask sample_mask = 0xffffffff;
791   ms_state.pSampleMask = &sample_mask;
792   ms_state.alphaToCoverageEnable = false;
793   ms_state.alphaToOneEnable = false;
794
795   VkPipelineDepthStencilStateCreateInfo ds_state;
796   ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
797   ds_state.pNext = NULL;
798   ds_state.flags = 0;
799   ds_state.depthTestEnable = ds_output != VK_FORMAT_UNDEFINED;
800   ds_state.depthWriteEnable = true;
801   ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
802   ds_state.depthBoundsTestEnable = false;
803   ds_state.stencilTestEnable = true;
804   ds_state.front.failOp = VK_STENCIL_OP_KEEP;
805   ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
806   ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
807   ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
808   ds_state.front.compareMask = 0xffffffff,
809   ds_state.front.writeMask = 0;
810   ds_state.front.reference = 0;
811   ds_state.back = ds_state.front;
812
813   VkPipelineColorBlendStateCreateInfo color_blend_state;
814   color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
815   color_blend_state.pNext = NULL;
816   color_blend_state.flags = 0;
817   color_blend_state.logicOpEnable = false;
818   color_blend_state.attachmentCount = num_color_attachments;
819   color_blend_state.pAttachments = blend_attachment_states;
820
821   VkDynamicState dynamic_states[9] = {
822      VK_DYNAMIC_STATE_VIEWPORT,
823      VK_DYNAMIC_STATE_SCISSOR,
824      VK_DYNAMIC_STATE_LINE_WIDTH,
825      VK_DYNAMIC_STATE_DEPTH_BIAS,
826      VK_DYNAMIC_STATE_BLEND_CONSTANTS,
827      VK_DYNAMIC_STATE_DEPTH_BOUNDS,
828      VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
829      VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
830      VK_DYNAMIC_STATE_STENCIL_REFERENCE
831   };
832
833   VkPipelineDynamicStateCreateInfo dynamic_state;
834   dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
835   dynamic_state.pNext = NULL;
836   dynamic_state.flags = 0;
837   dynamic_state.dynamicStateCount = sizeof(dynamic_states) / sizeof(VkDynamicState);
838   dynamic_state.pDynamicStates = dynamic_states;
839
840   gfx_pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
841   gfx_pipeline_info.pNext = NULL;
842   gfx_pipeline_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR;
843   gfx_pipeline_info.pVertexInputState = &vs_input;
844   gfx_pipeline_info.pInputAssemblyState = &assembly_state;
845   gfx_pipeline_info.pTessellationState = &tess_state;
846   gfx_pipeline_info.pViewportState = &viewport_state;
847   gfx_pipeline_info.pRasterizationState = &rasterization_state;
848   gfx_pipeline_info.pMultisampleState = &ms_state;
849   gfx_pipeline_info.pDepthStencilState = &ds_state;
850   gfx_pipeline_info.pColorBlendState = &color_blend_state;
851   gfx_pipeline_info.pDynamicState = &dynamic_state;
852   gfx_pipeline_info.subpass = 0;
853
854   /* create the objects used to create the pipeline */
855   VkSubpassDescription subpass;
856   subpass.flags = 0;
857   subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
858   subpass.inputAttachmentCount = 0;
859   subpass.pInputAttachments = NULL;
860   subpass.colorAttachmentCount = num_color_attachments;
861   subpass.pColorAttachments = color_attachments;
862   subpass.pResolveAttachments = NULL;
863   subpass.pDepthStencilAttachment = ds_output == VK_FORMAT_UNDEFINED ? NULL : &ds_attachment;
864   subpass.preserveAttachmentCount = 0;
865   subpass.pPreserveAttachments = NULL;
866
867   VkRenderPassCreateInfo renderpass_info;
868   renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
869   renderpass_info.pNext = NULL;
870   renderpass_info.flags = 0;
871   renderpass_info.attachmentCount = num_attachments;
872   renderpass_info.pAttachments = attachment_descs;
873   renderpass_info.subpassCount = 1;
874   renderpass_info.pSubpasses = &subpass;
875   renderpass_info.dependencyCount = 0;
876   renderpass_info.pDependencies = NULL;
877
878   ASSERTED VkResult result = CreateRenderPass(device, &renderpass_info, NULL, &render_pass);
879   assert(result == VK_SUCCESS);
880
881   gfx_pipeline_info.layout = pipeline_layout;
882   gfx_pipeline_info.renderPass = render_pass;
883
884   /* create the pipeline */
885   gfx_pipeline_info.pStages = stages;
886
887   result = CreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gfx_pipeline_info, NULL, &pipeline);
888   assert(result == VK_SUCCESS);
889}
890
891void PipelineBuilder::create_pipeline() {
892   unsigned num_desc_layouts = 0;
893   for (unsigned i = 0; i < 64; i++) {
894      if (!(desc_layouts_used & (1ull << i)))
895         continue;
896
897      VkDescriptorSetLayoutCreateInfo desc_layout_info;
898      desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
899      desc_layout_info.pNext = NULL;
900      desc_layout_info.flags = 0;
901      desc_layout_info.bindingCount = num_desc_bindings[i];
902      desc_layout_info.pBindings = desc_bindings[i];
903
904      ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]);
905      assert(result == VK_SUCCESS);
906      num_desc_layouts++;
907   }
908
909   VkPipelineLayoutCreateInfo pipeline_layout_info;
910   pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
911   pipeline_layout_info.pNext = NULL;
912   pipeline_layout_info.flags = 0;
913   pipeline_layout_info.pushConstantRangeCount = 1;
914   pipeline_layout_info.pPushConstantRanges = &push_constant_range;
915   pipeline_layout_info.setLayoutCount = num_desc_layouts;
916   pipeline_layout_info.pSetLayouts = desc_layouts;
917
918   ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
919   assert(result == VK_SUCCESS);
920
921   if (is_compute())
922      create_compute_pipeline();
923   else
924      create_graphics_pipeline();
925}
926
927void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding)
928{
929   if (!pipeline)
930      create_pipeline();
931   print_pipeline_ir(device, pipeline, stage_flags, name, remove_encoding);
932}
933