1/* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24#include "helpers.h" 25#include "vulkan/vk_format.h" 26#include "common/amd_family.h" 27#include <stdio.h> 28#include <sstream> 29#include <llvm-c/Target.h> 30#include <mutex> 31 32using namespace aco; 33 34extern "C" { 35PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 36 VkInstance instance, 37 const char* pName); 38} 39 40ac_shader_config config; 41aco_shader_info info; 42std::unique_ptr<Program> program; 43Builder bld(NULL); 44Temp inputs[16]; 45 46static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE}; 47static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE}; 48static std::mutex create_device_mutex; 49 50#define FUNCTION_LIST\ 51 ITEM(CreateInstance)\ 52 ITEM(DestroyInstance)\ 53 ITEM(EnumeratePhysicalDevices)\ 54 ITEM(GetPhysicalDeviceProperties2)\ 55 ITEM(CreateDevice)\ 56 ITEM(DestroyDevice)\ 57 ITEM(CreateShaderModule)\ 58 ITEM(DestroyShaderModule)\ 59 ITEM(CreateGraphicsPipelines)\ 60 ITEM(CreateComputePipelines)\ 61 ITEM(DestroyPipeline)\ 62 ITEM(CreateDescriptorSetLayout)\ 63 ITEM(DestroyDescriptorSetLayout)\ 64 ITEM(CreatePipelineLayout)\ 65 ITEM(DestroyPipelineLayout)\ 66 ITEM(CreateRenderPass)\ 67 ITEM(DestroyRenderPass)\ 68 ITEM(GetPipelineExecutablePropertiesKHR)\ 69 ITEM(GetPipelineExecutableInternalRepresentationsKHR) 70 71#define ITEM(n) PFN_vk##n n; 72FUNCTION_LIST 73#undef ITEM 74 75void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family) 76{ 77 memset(&config, 0, sizeof(config)); 78 info.wave_size = wave_size; 79 80 program.reset(new Program); 81 aco::init_program(program.get(), stage, &info, gfx_level, family, false, &config); 82 program->workgroup_size = UINT_MAX; 83 calc_min_waves(program.get()); 84 85 program->debug.func = nullptr; 86 program->debug.private_data = nullptr; 87 88 program->debug.output = output; 89 program->debug.shorten_messages = true; 90 program->debug.func = nullptr; 91 program->debug.private_data = nullptr; 92 93 Block *block = program->create_and_insert_block(); 94 block->kind = block_kind_top_level; 95 96 bld = Builder(program.get(), &program->blocks[0]); 97 98 config.float_mode = program->blocks[0].fp_mode.val; 99} 100 101bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level, 102 enum radeon_family family, const char* subvariant, 103 unsigned wave_size) 104{ 105 if (!set_variant(gfx_level, subvariant)) 106 return false; 107 108 memset(&info, 0, sizeof(info)); 109 create_program(gfx_level, compute_cs, wave_size, family); 110 111 if (input_spec) { 112 std::vector<RegClass> input_classes; 113 while (input_spec[0]) { 114 RegType type = input_spec[0] == 'v' ? RegType::vgpr : RegType::sgpr; 115 unsigned size = input_spec[1] - '0'; 116 bool in_bytes = input_spec[2] == 'b'; 117 input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4))); 118 119 input_spec += 2 + in_bytes; 120 while (input_spec[0] == ' ') input_spec++; 121 } 122 123 aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>( 124 aco_opcode::p_startpgm, Format::PSEUDO, 0, input_classes.size())}; 125 for (unsigned i = 0; i < input_classes.size(); i++) { 126 inputs[i] = bld.tmp(input_classes[i]); 127 startpgm->definitions[i] = Definition(inputs[i]); 128 } 129 bld.insert(std::move(startpgm)); 130 } 131 132 return true; 133} 134 135void finish_program(Program *prog) 136{ 137 for (Block& BB : prog->blocks) { 138 for (unsigned idx : BB.linear_preds) 139 prog->blocks[idx].linear_succs.emplace_back(BB.index); 140 for (unsigned idx : BB.logical_preds) 141 prog->blocks[idx].logical_succs.emplace_back(BB.index); 142 } 143 144 for (Block& block : prog->blocks) { 145 if (block.linear_succs.size() == 0) { 146 block.kind |= block_kind_uniform; 147 Builder(prog, &block).sopp(aco_opcode::s_endpgm); 148 } 149 } 150} 151 152void finish_validator_test() 153{ 154 finish_program(program.get()); 155 aco_print_program(program.get(), output); 156 fprintf(output, "Validation results:\n"); 157 if (aco::validate_ir(program.get())) 158 fprintf(output, "Validation passed\n"); 159 else 160 fprintf(output, "Validation failed\n"); 161} 162 163void finish_opt_test() 164{ 165 finish_program(program.get()); 166 if (!aco::validate_ir(program.get())) { 167 fail_test("Validation before optimization failed"); 168 return; 169 } 170 aco::optimize(program.get()); 171 if (!aco::validate_ir(program.get())) { 172 fail_test("Validation after optimization failed"); 173 return; 174 } 175 aco_print_program(program.get(), output); 176} 177 178void finish_ra_test(ra_test_policy policy, bool lower) 179{ 180 finish_program(program.get()); 181 if (!aco::validate_ir(program.get())) { 182 fail_test("Validation before register allocation failed"); 183 return; 184 } 185 186 program->workgroup_size = program->wave_size; 187 aco::live live_vars = aco::live_var_analysis(program.get()); 188 aco::register_allocation(program.get(), live_vars.live_out, policy); 189 190 if (aco::validate_ra(program.get())) { 191 fail_test("Validation after register allocation failed"); 192 return; 193 } 194 195 if (lower) { 196 aco::ssa_elimination(program.get()); 197 aco::lower_to_hw_instr(program.get()); 198 } 199 200 aco_print_program(program.get(), output); 201} 202 203void finish_optimizer_postRA_test() 204{ 205 finish_program(program.get()); 206 aco::optimize_postRA(program.get()); 207 aco_print_program(program.get(), output); 208} 209 210void finish_to_hw_instr_test() 211{ 212 finish_program(program.get()); 213 aco::lower_to_hw_instr(program.get()); 214 aco_print_program(program.get(), output); 215} 216 217void finish_insert_nops_test() 218{ 219 finish_program(program.get()); 220 aco::insert_NOPs(program.get()); 221 aco_print_program(program.get(), output); 222} 223 224void finish_form_hard_clause_test() 225{ 226 finish_program(program.get()); 227 aco::form_hard_clauses(program.get()); 228 aco_print_program(program.get(), output); 229} 230 231void finish_assembler_test() 232{ 233 finish_program(program.get()); 234 std::vector<uint32_t> binary; 235 unsigned exec_size = emit_program(program.get(), binary); 236 237 /* we could use CLRX for disassembly but that would require it to be 238 * installed */ 239 if (program->gfx_level >= GFX8) { 240 print_asm(program.get(), binary, exec_size / 4u, output); 241 } else { 242 //TODO: maybe we should use CLRX and skip this test if it's not available? 243 for (uint32_t dword : binary) 244 fprintf(output, "%.8x\n", dword); 245 } 246} 247 248void writeout(unsigned i, Temp tmp) 249{ 250 if (tmp.id()) 251 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp); 252 else 253 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i)); 254} 255 256void writeout(unsigned i, aco::Builder::Result res) 257{ 258 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res); 259} 260 261void writeout(unsigned i, Operand op) 262{ 263 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op); 264} 265 266void writeout(unsigned i, Operand op0, Operand op1) 267{ 268 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1); 269} 270 271Temp fneg(Temp src, Builder b) 272{ 273 if (src.bytes() == 2) 274 return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src); 275 else 276 return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src); 277} 278 279Temp fabs(Temp src, Builder b) 280{ 281 if (src.bytes() == 2) { 282 Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src); 283 res.instr->vop3().abs[1] = true; 284 return res; 285 } else { 286 Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src); 287 res.instr->vop3().abs[1] = true; 288 return res; 289 } 290} 291 292Temp f2f32(Temp src, Builder b) 293{ 294 return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src); 295} 296 297Temp f2f16(Temp src, Builder b) 298{ 299 return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src); 300} 301 302Temp u2u16(Temp src, Builder b) 303{ 304 return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero()); 305} 306 307Temp fadd(Temp src0, Temp src1, Builder b) 308{ 309 if (src0.bytes() == 2) 310 return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1); 311 else 312 return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1); 313} 314 315Temp fmul(Temp src0, Temp src1, Builder b) 316{ 317 if (src0.bytes() == 2) 318 return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1); 319 else 320 return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1); 321} 322 323Temp fma(Temp src0, Temp src1, Temp src2, Builder b) 324{ 325 if (src0.bytes() == 2) 326 return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2); 327 else 328 return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2); 329} 330 331Temp fsat(Temp src, Builder b) 332{ 333 if (src.bytes() == 2) 334 return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), 335 Operand::c16(0x3c00u), src); 336 else 337 return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), 338 Operand::c32(0x3f800000u), src); 339} 340 341Temp ext_ushort(Temp src, unsigned idx, Builder b) 342{ 343 return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx), 344 Operand::c32(16u), Operand::c32(false)); 345} 346 347Temp ext_ubyte(Temp src, unsigned idx, Builder b) 348{ 349 return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx), 350 Operand::c32(8u), Operand::c32(false)); 351} 352 353VkDevice get_vk_device(enum amd_gfx_level gfx_level) 354{ 355 enum radeon_family family; 356 switch (gfx_level) { 357 case GFX6: 358 family = CHIP_TAHITI; 359 break; 360 case GFX7: 361 family = CHIP_BONAIRE; 362 break; 363 case GFX8: 364 family = CHIP_POLARIS10; 365 break; 366 case GFX9: 367 family = CHIP_VEGA10; 368 break; 369 case GFX10: 370 family = CHIP_NAVI10; 371 break; 372 case GFX10_3: 373 family = CHIP_NAVI21; 374 break; 375 case GFX11: 376 family = CHIP_GFX1100; 377 break; 378 default: 379 family = CHIP_UNKNOWN; 380 break; 381 } 382 return get_vk_device(family); 383} 384 385VkDevice get_vk_device(enum radeon_family family) 386{ 387 assert(family != CHIP_UNKNOWN); 388 389 std::lock_guard<std::mutex> guard(create_device_mutex); 390 391 if (device_cache[family]) 392 return device_cache[family]; 393 394 setenv("RADV_FORCE_FAMILY", ac_get_family_name(family), 1); 395 396 VkApplicationInfo app_info = {}; 397 app_info.pApplicationName = "aco_tests"; 398 app_info.apiVersion = VK_API_VERSION_1_2; 399 VkInstanceCreateInfo instance_create_info = {}; 400 instance_create_info.pApplicationInfo = &app_info; 401 instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; 402 ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]); 403 assert(result == VK_SUCCESS); 404 405 #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n); 406 FUNCTION_LIST 407 #undef ITEM 408 409 uint32_t device_count = 1; 410 VkPhysicalDevice device = VK_NULL_HANDLE; 411 result = EnumeratePhysicalDevices(instance_cache[family], &device_count, &device); 412 assert(result == VK_SUCCESS); 413 assert(device != VK_NULL_HANDLE); 414 415 VkDeviceCreateInfo device_create_info = {}; 416 device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; 417 static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"}; 418 device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]); 419 device_create_info.ppEnabledExtensionNames = extensions; 420 result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]); 421 422 return device_cache[family]; 423} 424 425static struct DestroyDevices { 426 ~DestroyDevices() { 427 for (unsigned i = 0; i < CHIP_LAST; i++) { 428 if (!device_cache[i]) 429 continue; 430 DestroyDevice(device_cache[i], NULL); 431 DestroyInstance(instance_cache[i], NULL); 432 } 433 } 434} destroy_devices; 435 436void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages, 437 const char *name, bool remove_encoding) 438{ 439 uint32_t executable_count = 16; 440 VkPipelineExecutablePropertiesKHR executables[16]; 441 VkPipelineInfoKHR pipeline_info; 442 pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR; 443 pipeline_info.pNext = NULL; 444 pipeline_info.pipeline = pipeline; 445 ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables); 446 assert(result == VK_SUCCESS); 447 448 uint32_t executable = 0; 449 for (; executable < executable_count; executable++) { 450 if (executables[executable].stages == stages) 451 break; 452 } 453 assert(executable != executable_count); 454 455 VkPipelineExecutableInfoKHR exec_info; 456 exec_info.sType = VK_STRUCTURE_TYPE_PIPELINE_EXECUTABLE_INFO_KHR; 457 exec_info.pNext = NULL; 458 exec_info.pipeline = pipeline; 459 exec_info.executableIndex = executable; 460 461 uint32_t ir_count = 16; 462 VkPipelineExecutableInternalRepresentationKHR ir[16]; 463 memset(ir, 0, sizeof(ir)); 464 result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir); 465 assert(result == VK_SUCCESS); 466 467 VkPipelineExecutableInternalRepresentationKHR* requested_ir = nullptr; 468 for (unsigned i = 0; i < ir_count; ++i) { 469 if (strcmp(ir[i].name, name) == 0) { 470 requested_ir = &ir[i]; 471 break; 472 } 473 } 474 assert(requested_ir && "Could not find requested IR"); 475 476 char *data = (char*)malloc(requested_ir->dataSize); 477 requested_ir->pData = data; 478 result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir); 479 assert(result == VK_SUCCESS); 480 481 if (remove_encoding) { 482 for (char *c = data; *c; c++) { 483 if (*c == ';') { 484 for (; *c && *c != '\n'; c++) 485 *c = ' '; 486 } 487 } 488 } 489 490 fprintf(output, "%s", data); 491 free(data); 492} 493 494VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info) 495{ 496 VkShaderModuleCreateInfo vk_module_info; 497 vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 498 vk_module_info.pNext = NULL; 499 vk_module_info.flags = 0; 500 vk_module_info.codeSize = module_info->spirvSize; 501 vk_module_info.pCode = (const uint32_t*)module_info->pSpirv; 502 503 VkShaderModule module; 504 ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module); 505 assert(result == VK_SUCCESS); 506 507 return module; 508} 509 510PipelineBuilder::PipelineBuilder(VkDevice dev) { 511 memset(this, 0, sizeof(*this)); 512 topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; 513 device = dev; 514} 515 516PipelineBuilder::~PipelineBuilder() 517{ 518 DestroyPipeline(device, pipeline, NULL); 519 520 for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) { 521 VkPipelineShaderStageCreateInfo *stage_info = &stages[i]; 522 if (owned_stages & stage_info->stage) 523 DestroyShaderModule(device, stage_info->module, NULL); 524 } 525 526 DestroyPipelineLayout(device, pipeline_layout, NULL); 527 528 for (unsigned i = 0; i < util_bitcount64(desc_layouts_used); i++) 529 DestroyDescriptorSetLayout(device, desc_layouts[i], NULL); 530 531 DestroyRenderPass(device, render_pass, NULL); 532} 533 534void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, 535 uint32_t binding, VkDescriptorType type, uint32_t count) 536{ 537 desc_layouts_used |= 1ull << layout; 538 desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL}; 539} 540 541void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate) 542{ 543 vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate}; 544} 545 546void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset) 547{ 548 vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset}; 549} 550 551void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module) 552{ 553 for (unsigned i = 0; i < module->declarationCount; i++) { 554 const QoShaderDecl *decl = &module->pDeclarations[i]; 555 switch (decl->decl_type) { 556 case QoShaderDeclType_ubo: 557 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); 558 break; 559 case QoShaderDeclType_ssbo: 560 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); 561 break; 562 case QoShaderDeclType_img_buf: 563 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER); 564 break; 565 case QoShaderDeclType_img: 566 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE); 567 break; 568 case QoShaderDeclType_tex_buf: 569 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER); 570 break; 571 case QoShaderDeclType_combined: 572 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); 573 break; 574 case QoShaderDeclType_tex: 575 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE); 576 break; 577 case QoShaderDeclType_samp: 578 add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER); 579 break; 580 default: 581 break; 582 } 583 } 584} 585 586void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module) 587{ 588 unsigned next_vtx_offset = 0; 589 for (unsigned i = 0; i < module->declarationCount; i++) { 590 const QoShaderDecl *decl = &module->pDeclarations[i]; 591 switch (decl->decl_type) { 592 case QoShaderDeclType_in: 593 if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) { 594 if (!strcmp(decl->type, "float") || decl->type[0] == 'v') 595 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset); 596 else if (decl->type[0] == 'u') 597 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset); 598 else if (decl->type[0] == 'i') 599 add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset); 600 next_vtx_offset += 16; 601 } 602 break; 603 case QoShaderDeclType_out: 604 if (module->stage == VK_SHADER_STAGE_FRAGMENT_BIT) { 605 if (!strcmp(decl->type, "float") || decl->type[0] == 'v') 606 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SFLOAT; 607 else if (decl->type[0] == 'u') 608 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_UINT; 609 else if (decl->type[0] == 'i') 610 color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT; 611 } 612 break; 613 default: 614 break; 615 } 616 } 617 if (next_vtx_offset) 618 add_vertex_binding(0, next_vtx_offset); 619} 620 621void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name) 622{ 623 VkPipelineShaderStageCreateInfo *stage_info; 624 if (stage == VK_SHADER_STAGE_COMPUTE_BIT) 625 stage_info = &stages[0]; 626 else 627 stage_info = &stages[gfx_pipeline_info.stageCount++]; 628 stage_info->sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 629 stage_info->pNext = NULL; 630 stage_info->flags = 0; 631 stage_info->stage = stage; 632 stage_info->module = module; 633 stage_info->pName = name; 634 stage_info->pSpecializationInfo = NULL; 635 owned_stages |= stage; 636} 637 638void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name) 639{ 640 add_stage(stage, __qoCreateShaderModule(device, &module), name); 641 add_resource_decls(&module); 642 add_io_decls(&module); 643} 644 645void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs) 646{ 647 add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs); 648 add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs); 649} 650 651void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs) 652{ 653 add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs); 654 add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs); 655} 656 657void PipelineBuilder::add_cs(VkShaderModule cs) 658{ 659 add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs); 660} 661 662void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs) 663{ 664 add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs); 665} 666 667bool PipelineBuilder::is_compute() { 668 return gfx_pipeline_info.stageCount == 0; 669} 670 671void PipelineBuilder::create_compute_pipeline() { 672 VkComputePipelineCreateInfo create_info; 673 create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; 674 create_info.pNext = NULL; 675 create_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; 676 create_info.stage = stages[0]; 677 create_info.layout = pipeline_layout; 678 create_info.basePipelineHandle = VK_NULL_HANDLE; 679 create_info.basePipelineIndex = 0; 680 681 ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline); 682 assert(result == VK_SUCCESS); 683} 684 685void PipelineBuilder::create_graphics_pipeline() { 686 /* create the create infos */ 687 if (!samples) 688 samples = VK_SAMPLE_COUNT_1_BIT; 689 690 unsigned num_color_attachments = 0; 691 VkPipelineColorBlendAttachmentState blend_attachment_states[16]; 692 VkAttachmentReference color_attachments[16]; 693 VkAttachmentDescription attachment_descs[17]; 694 for (unsigned i = 0; i < 16; i++) { 695 if (color_outputs[i] == VK_FORMAT_UNDEFINED) 696 continue; 697 698 VkAttachmentDescription *desc = &attachment_descs[num_color_attachments]; 699 desc->flags = 0; 700 desc->format = color_outputs[i]; 701 desc->samples = samples; 702 desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 703 desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 704 desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 705 desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; 706 desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL; 707 desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL; 708 709 VkAttachmentReference *ref = &color_attachments[num_color_attachments]; 710 ref->attachment = num_color_attachments; 711 ref->layout = VK_IMAGE_LAYOUT_GENERAL; 712 713 VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments]; 714 blend->blendEnable = false; 715 blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | 716 VK_COLOR_COMPONENT_G_BIT | 717 VK_COLOR_COMPONENT_B_BIT | 718 VK_COLOR_COMPONENT_A_BIT; 719 720 num_color_attachments++; 721 } 722 723 unsigned num_attachments = num_color_attachments; 724 VkAttachmentReference ds_attachment; 725 if (ds_output != VK_FORMAT_UNDEFINED) { 726 VkAttachmentDescription *desc = &attachment_descs[num_attachments]; 727 desc->flags = 0; 728 desc->format = ds_output; 729 desc->samples = samples; 730 desc->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 731 desc->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 732 desc->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 733 desc->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; 734 desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL; 735 desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL; 736 737 ds_attachment.attachment = num_color_attachments; 738 ds_attachment.layout = VK_IMAGE_LAYOUT_GENERAL; 739 740 num_attachments++; 741 } 742 743 vs_input.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; 744 vs_input.pNext = NULL; 745 vs_input.flags = 0; 746 vs_input.pVertexBindingDescriptions = vs_bindings; 747 vs_input.pVertexAttributeDescriptions = vs_attributes; 748 749 VkPipelineInputAssemblyStateCreateInfo assembly_state; 750 assembly_state.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; 751 assembly_state.pNext = NULL; 752 assembly_state.flags = 0; 753 assembly_state.topology = topology; 754 assembly_state.primitiveRestartEnable = false; 755 756 VkPipelineTessellationStateCreateInfo tess_state; 757 tess_state.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; 758 tess_state.pNext = NULL; 759 tess_state.flags = 0; 760 tess_state.patchControlPoints = patch_size; 761 762 VkPipelineViewportStateCreateInfo viewport_state; 763 viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; 764 viewport_state.pNext = NULL; 765 viewport_state.flags = 0; 766 viewport_state.viewportCount = 1; 767 viewport_state.pViewports = NULL; 768 viewport_state.scissorCount = 1; 769 viewport_state.pScissors = NULL; 770 771 VkPipelineRasterizationStateCreateInfo rasterization_state; 772 rasterization_state.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; 773 rasterization_state.pNext = NULL; 774 rasterization_state.flags = 0; 775 rasterization_state.depthClampEnable = false; 776 rasterization_state.rasterizerDiscardEnable = false; 777 rasterization_state.polygonMode = VK_POLYGON_MODE_FILL; 778 rasterization_state.cullMode = VK_CULL_MODE_NONE; 779 rasterization_state.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; 780 rasterization_state.depthBiasEnable = false; 781 rasterization_state.lineWidth = 1.0; 782 783 VkPipelineMultisampleStateCreateInfo ms_state; 784 ms_state.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; 785 ms_state.pNext = NULL; 786 ms_state.flags = 0; 787 ms_state.rasterizationSamples = samples; 788 ms_state.sampleShadingEnable = sample_shading_enable; 789 ms_state.minSampleShading = min_sample_shading; 790 VkSampleMask sample_mask = 0xffffffff; 791 ms_state.pSampleMask = &sample_mask; 792 ms_state.alphaToCoverageEnable = false; 793 ms_state.alphaToOneEnable = false; 794 795 VkPipelineDepthStencilStateCreateInfo ds_state; 796 ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; 797 ds_state.pNext = NULL; 798 ds_state.flags = 0; 799 ds_state.depthTestEnable = ds_output != VK_FORMAT_UNDEFINED; 800 ds_state.depthWriteEnable = true; 801 ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS; 802 ds_state.depthBoundsTestEnable = false; 803 ds_state.stencilTestEnable = true; 804 ds_state.front.failOp = VK_STENCIL_OP_KEEP; 805 ds_state.front.passOp = VK_STENCIL_OP_REPLACE; 806 ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE; 807 ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS; 808 ds_state.front.compareMask = 0xffffffff, 809 ds_state.front.writeMask = 0; 810 ds_state.front.reference = 0; 811 ds_state.back = ds_state.front; 812 813 VkPipelineColorBlendStateCreateInfo color_blend_state; 814 color_blend_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; 815 color_blend_state.pNext = NULL; 816 color_blend_state.flags = 0; 817 color_blend_state.logicOpEnable = false; 818 color_blend_state.attachmentCount = num_color_attachments; 819 color_blend_state.pAttachments = blend_attachment_states; 820 821 VkDynamicState dynamic_states[9] = { 822 VK_DYNAMIC_STATE_VIEWPORT, 823 VK_DYNAMIC_STATE_SCISSOR, 824 VK_DYNAMIC_STATE_LINE_WIDTH, 825 VK_DYNAMIC_STATE_DEPTH_BIAS, 826 VK_DYNAMIC_STATE_BLEND_CONSTANTS, 827 VK_DYNAMIC_STATE_DEPTH_BOUNDS, 828 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 829 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 830 VK_DYNAMIC_STATE_STENCIL_REFERENCE 831 }; 832 833 VkPipelineDynamicStateCreateInfo dynamic_state; 834 dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; 835 dynamic_state.pNext = NULL; 836 dynamic_state.flags = 0; 837 dynamic_state.dynamicStateCount = sizeof(dynamic_states) / sizeof(VkDynamicState); 838 dynamic_state.pDynamicStates = dynamic_states; 839 840 gfx_pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; 841 gfx_pipeline_info.pNext = NULL; 842 gfx_pipeline_info.flags = VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; 843 gfx_pipeline_info.pVertexInputState = &vs_input; 844 gfx_pipeline_info.pInputAssemblyState = &assembly_state; 845 gfx_pipeline_info.pTessellationState = &tess_state; 846 gfx_pipeline_info.pViewportState = &viewport_state; 847 gfx_pipeline_info.pRasterizationState = &rasterization_state; 848 gfx_pipeline_info.pMultisampleState = &ms_state; 849 gfx_pipeline_info.pDepthStencilState = &ds_state; 850 gfx_pipeline_info.pColorBlendState = &color_blend_state; 851 gfx_pipeline_info.pDynamicState = &dynamic_state; 852 gfx_pipeline_info.subpass = 0; 853 854 /* create the objects used to create the pipeline */ 855 VkSubpassDescription subpass; 856 subpass.flags = 0; 857 subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS; 858 subpass.inputAttachmentCount = 0; 859 subpass.pInputAttachments = NULL; 860 subpass.colorAttachmentCount = num_color_attachments; 861 subpass.pColorAttachments = color_attachments; 862 subpass.pResolveAttachments = NULL; 863 subpass.pDepthStencilAttachment = ds_output == VK_FORMAT_UNDEFINED ? NULL : &ds_attachment; 864 subpass.preserveAttachmentCount = 0; 865 subpass.pPreserveAttachments = NULL; 866 867 VkRenderPassCreateInfo renderpass_info; 868 renderpass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO; 869 renderpass_info.pNext = NULL; 870 renderpass_info.flags = 0; 871 renderpass_info.attachmentCount = num_attachments; 872 renderpass_info.pAttachments = attachment_descs; 873 renderpass_info.subpassCount = 1; 874 renderpass_info.pSubpasses = &subpass; 875 renderpass_info.dependencyCount = 0; 876 renderpass_info.pDependencies = NULL; 877 878 ASSERTED VkResult result = CreateRenderPass(device, &renderpass_info, NULL, &render_pass); 879 assert(result == VK_SUCCESS); 880 881 gfx_pipeline_info.layout = pipeline_layout; 882 gfx_pipeline_info.renderPass = render_pass; 883 884 /* create the pipeline */ 885 gfx_pipeline_info.pStages = stages; 886 887 result = CreateGraphicsPipelines(device, VK_NULL_HANDLE, 1, &gfx_pipeline_info, NULL, &pipeline); 888 assert(result == VK_SUCCESS); 889} 890 891void PipelineBuilder::create_pipeline() { 892 unsigned num_desc_layouts = 0; 893 for (unsigned i = 0; i < 64; i++) { 894 if (!(desc_layouts_used & (1ull << i))) 895 continue; 896 897 VkDescriptorSetLayoutCreateInfo desc_layout_info; 898 desc_layout_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; 899 desc_layout_info.pNext = NULL; 900 desc_layout_info.flags = 0; 901 desc_layout_info.bindingCount = num_desc_bindings[i]; 902 desc_layout_info.pBindings = desc_bindings[i]; 903 904 ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]); 905 assert(result == VK_SUCCESS); 906 num_desc_layouts++; 907 } 908 909 VkPipelineLayoutCreateInfo pipeline_layout_info; 910 pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; 911 pipeline_layout_info.pNext = NULL; 912 pipeline_layout_info.flags = 0; 913 pipeline_layout_info.pushConstantRangeCount = 1; 914 pipeline_layout_info.pPushConstantRanges = &push_constant_range; 915 pipeline_layout_info.setLayoutCount = num_desc_layouts; 916 pipeline_layout_info.pSetLayouts = desc_layouts; 917 918 ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout); 919 assert(result == VK_SUCCESS); 920 921 if (is_compute()) 922 create_compute_pipeline(); 923 else 924 create_graphics_pipeline(); 925} 926 927void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding) 928{ 929 if (!pipeline) 930 create_pipeline(); 931 print_pipeline_ir(device, pipeline, stage_flags, name, remove_encoding); 932} 933