1/*------------------------------------------------------------------------ 2 * Vulkan Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2021 The Khronos Group Inc. 6 * Copyright (c) 2021 Valve Corporation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 * 20 *//*! 21 * \file 22 * \brief Mesh Shader Synchronization Tests for VK_EXT_mesh_shader 23 *//*--------------------------------------------------------------------*/ 24 25#include "vktMeshShaderSyncTestsEXT.hpp" 26#include "vktMeshShaderUtil.hpp" 27#include "vktTestCase.hpp" 28 29#include "vkDefs.hpp" 30#include "vkTypeUtil.hpp" 31#include "vkImageWithMemory.hpp" 32#include "vkBufferWithMemory.hpp" 33#include "vkObjUtil.hpp" 34#include "vkBuilderUtil.hpp" 35#include "vkCmdUtil.hpp" 36#include "vkBarrierUtil.hpp" 37#include "vkImageUtil.hpp" 38 39#include "deUniquePtr.hpp" 40 41#include <iostream> 42#include <sstream> 43#include <vector> 44#include <set> 45 46namespace vkt 47{ 48namespace MeshShader 49{ 50 51namespace 52{ 53 54using GroupPtr = de::MovePtr<tcu::TestCaseGroup>; 55 56using namespace vk; 57 58// Stages that will be used in these tests. Shader stages sorted in pipeline order. 59enum class Stage 60{ 61 HOST = 0, 62 TRANSFER, 63 TASK, 64 MESH, 65 FRAG, 66}; 67 68std::ostream& operator<< (std::ostream& stream, Stage stage) 69{ 70 switch (stage) 71 { 72 case Stage::HOST: stream << "host"; break; 73 case Stage::TRANSFER: stream << "transfer"; break; 74 case Stage::TASK: stream << "task"; break; 75 case Stage::MESH: stream << "mesh"; break; 76 case Stage::FRAG: stream << "frag"; break; 77 default: DE_ASSERT(false); break; 78 } 79 80 return stream; 81} 82 83bool isShaderStage (Stage stage) 84{ 85 return (stage == Stage::TASK || stage == Stage::MESH || stage == Stage::FRAG); 86} 87 88VkPipelineStageFlags stageToFlags (Stage stage) 89{ 90 switch (stage) 91 { 92 case Stage::HOST: return VK_PIPELINE_STAGE_HOST_BIT; 93 case Stage::TRANSFER: return VK_PIPELINE_STAGE_TRANSFER_BIT; 94 case Stage::TASK: return VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT; 95 case Stage::MESH: return VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT; 96 case Stage::FRAG: return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; 97 default: DE_ASSERT(false); break; 98 } 99 100 // Unreachable. 101 DE_ASSERT(false); 102 return 0u; 103} 104 105VkFormat getImageFormat () 106{ 107 return VK_FORMAT_R32_UINT; 108} 109 110VkExtent3D getImageExtent () 111{ 112 return makeExtent3D(1u, 1u, 1u); 113} 114 115// Types of resources we will use. 116enum class ResourceType 117{ 118 UNIFORM_BUFFER = 0, 119 STORAGE_BUFFER, 120 STORAGE_IMAGE, 121 SAMPLED_IMAGE, 122}; 123 124VkDescriptorType resourceTypeToDescriptor (ResourceType resType) 125{ 126 switch (resType) 127 { 128 case ResourceType::UNIFORM_BUFFER: return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; 129 case ResourceType::STORAGE_BUFFER: return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 130 case ResourceType::STORAGE_IMAGE: return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 131 case ResourceType::SAMPLED_IMAGE: return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER; 132 default: DE_ASSERT(false); break; 133 } 134 135 // Unreachable. 136 DE_ASSERT(false); 137 return VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; 138} 139 140// Will the test use a specific barrier or a general memory barrier? 141enum class BarrierType 142{ 143 GENERAL = 0, 144 SPECIFIC, 145 DEPENDENCY, 146}; 147 148// Types of writes we will use. 149enum class WriteAccess 150{ 151 HOST_WRITE = 0, 152 TRANSFER_WRITE, 153 SHADER_WRITE, 154}; 155 156VkAccessFlags writeAccessToFlags (WriteAccess access) 157{ 158 switch (access) 159 { 160 case WriteAccess::HOST_WRITE: return VK_ACCESS_HOST_WRITE_BIT; 161 case WriteAccess::TRANSFER_WRITE: return VK_ACCESS_TRANSFER_WRITE_BIT; 162 case WriteAccess::SHADER_WRITE: return VK_ACCESS_SHADER_WRITE_BIT; 163 default: DE_ASSERT(false); break; 164 } 165 166 // Unreachable. 167 DE_ASSERT(false); 168 return 0u; 169} 170 171// Types of reads we will use. 172enum class ReadAccess 173{ 174 HOST_READ = 0, 175 TRANSFER_READ, 176 SHADER_READ, 177 UNIFORM_READ, 178}; 179 180VkAccessFlags readAccessToFlags (ReadAccess access) 181{ 182 switch (access) 183 { 184 case ReadAccess::HOST_READ: return VK_ACCESS_HOST_READ_BIT; 185 case ReadAccess::TRANSFER_READ: return VK_ACCESS_TRANSFER_READ_BIT; 186 case ReadAccess::SHADER_READ: return VK_ACCESS_SHADER_READ_BIT; 187 case ReadAccess::UNIFORM_READ: return VK_ACCESS_UNIFORM_READ_BIT; 188 default: DE_ASSERT(false); break; 189 } 190 191 // Unreachable. 192 DE_ASSERT(false); 193 return 0u; 194} 195 196// Auxiliary functions to verify certain combinations are possible. 197 198// Check if the writing stage can use the specified write access. 199bool canWriteFromStageAsAccess (Stage writeStage, WriteAccess access) 200{ 201 switch (writeStage) 202 { 203 case Stage::HOST: return (access == WriteAccess::HOST_WRITE); 204 case Stage::TRANSFER: return (access == WriteAccess::TRANSFER_WRITE); 205 case Stage::TASK: // fallthrough 206 case Stage::MESH: // fallthrough 207 case Stage::FRAG: return (access == WriteAccess::SHADER_WRITE); 208 default: DE_ASSERT(false); break; 209 } 210 211 return false; 212} 213 214// Check if the reading stage can use the specified read access. 215bool canReadFromStageAsAccess (Stage readStage, ReadAccess access) 216{ 217 switch (readStage) 218 { 219 case Stage::HOST: return (access == ReadAccess::HOST_READ); 220 case Stage::TRANSFER: return (access == ReadAccess::TRANSFER_READ); 221 case Stage::TASK: // fallthrough 222 case Stage::MESH: // fallthrough 223 case Stage::FRAG: return (access == ReadAccess::SHADER_READ || access == ReadAccess::UNIFORM_READ); 224 default: DE_ASSERT(false); break; 225 } 226 227 return false; 228} 229 230// Check if reading the given resource type is possible with the given type of read access. 231bool canReadResourceAsAccess (ResourceType resType, ReadAccess access) 232{ 233 if (access == ReadAccess::UNIFORM_READ) 234 return (resType == ResourceType::UNIFORM_BUFFER); 235 return true; 236} 237 238// Check if writing to the given resource type is possible with the given type of write access. 239bool canWriteResourceAsAccess (ResourceType resType, WriteAccess access) 240{ 241 if (resType == ResourceType::UNIFORM_BUFFER) 242 return (access != WriteAccess::SHADER_WRITE); 243 return true; 244} 245 246// Check if the given stage can write to the given resource type. 247bool canWriteTo (Stage stage, ResourceType resType) 248{ 249 switch (stage) 250 { 251 case Stage::HOST: return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER); 252 case Stage::TRANSFER: return true; 253 case Stage::TASK: // fallthrough 254 case Stage::MESH: // fallthrough 255 case Stage::FRAG: return (resType == ResourceType::STORAGE_BUFFER || resType == ResourceType::STORAGE_IMAGE); 256 default: DE_ASSERT(false); break; 257 } 258 259 return false; 260} 261 262// Check if the given stage can read from the given resource type. 263bool canReadFrom (Stage stage, ResourceType resType) 264{ 265 switch (stage) 266 { 267 case Stage::HOST: return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER); 268 case Stage::TRANSFER: // fallthrough 269 case Stage::TASK: // fallthrough 270 case Stage::MESH: // fallthrough 271 case Stage::FRAG: return true; 272 default: DE_ASSERT(false); break; 273 } 274 275 return false; 276} 277 278// Will we need to store the test value in an auxiliar buffer to be read? 279bool needsAuxiliarSourceBuffer (Stage fromStage, Stage toStage) 280{ 281 DE_UNREF(toStage); 282 return (fromStage == Stage::TRANSFER); 283} 284 285// Will we need to store the read operation result into an auxiliar buffer to be checked? 286bool needsAuxiliarDestBuffer (Stage fromStage, Stage toStage) 287{ 288 DE_UNREF(fromStage); 289 return (toStage == Stage::TRANSFER); 290} 291 292// Needs any auxiliar buffer for any case? 293bool needsAuxiliarBuffer (Stage fromStage, Stage toStage) 294{ 295 return (needsAuxiliarSourceBuffer(fromStage, toStage) || needsAuxiliarDestBuffer(fromStage, toStage)); 296} 297 298// Will the final value be stored in the auxiliar destination buffer? 299bool valueInAuxiliarDestBuffer (Stage toStage) 300{ 301 return (toStage == Stage::TRANSFER); 302} 303 304// Will the final value be stored in the resource buffer itself? 305bool valueInResourceBuffer (Stage toStage) 306{ 307 return (toStage == Stage::HOST); 308} 309 310// Will the final value be stored in the color buffer? 311bool valueInColorBuffer (Stage toStage) 312{ 313 return (!valueInAuxiliarDestBuffer(toStage) && !valueInResourceBuffer(toStage)); 314} 315 316// Image usage flags for the image resource. 317VkImageUsageFlags resourceImageUsageFlags (ResourceType resourceType) 318{ 319 VkImageUsageFlags flags = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT); 320 321 switch (resourceType) 322 { 323 case ResourceType::STORAGE_IMAGE: flags |= VK_IMAGE_USAGE_STORAGE_BIT; break; 324 case ResourceType::SAMPLED_IMAGE: flags |= VK_IMAGE_USAGE_SAMPLED_BIT; break; 325 default: DE_ASSERT(false); break; 326 } 327 328 return flags; 329} 330 331// Buffer usage flags for the buffer resource. 332VkBufferUsageFlags resourceBufferUsageFlags (ResourceType resourceType) 333{ 334 VkBufferUsageFlags flags = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); 335 336 switch (resourceType) 337 { 338 case ResourceType::UNIFORM_BUFFER: flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT; break; 339 case ResourceType::STORAGE_BUFFER: flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT; break; 340 default: DE_ASSERT(false); break; 341 } 342 343 return flags; 344} 345 346// Returns true if both the write and read stages are shader stages. 347bool fromShaderToShader (Stage fromStage, Stage toStage) 348{ 349 return (isShaderStage(fromStage) && isShaderStage(toStage)); 350} 351 352// Supposing we'll use two subpasses, decide the stages of a subpass based on the mandatory stages and the one we're interested in. 353std::vector<Stage> subpassStages (Stage wantedStage, bool lastSubpass) 354{ 355 std::set<Stage> stages; 356 stages.insert(wantedStage); 357 stages.insert(Stage::MESH); // This one is mandatory. 358 if (lastSubpass) 359 stages.insert(Stage::FRAG); // In the last subpass we always need a fragment shader (passthrough). 360 return std::vector<Stage>(begin(stages), end(stages)); 361} 362 363// Is the task shader in the list? 364bool hasTask (const std::vector<Stage>& stages) 365{ 366 return de::contains(begin(stages), end(stages), Stage::TASK); 367} 368 369// Is the frag shader in the list? 370bool hasFrag (const std::vector<Stage>& stages) 371{ 372 return de::contains(begin(stages), end(stages), Stage::FRAG); 373} 374 375struct TestParams 376{ 377 Stage fromStage; 378 Stage toStage; 379 ResourceType resourceType; 380 BarrierType barrierType; 381 WriteAccess writeAccess; 382 ReadAccess readAccess; 383 uint32_t testValue; 384 385protected: 386 bool readsOrWritesIn (Stage stage) const 387 { 388 DE_ASSERT(fromStage != toStage); 389 return (fromStage == stage || toStage == stage); 390 } 391 392public: 393 bool needsTask () const 394 { 395 return readsOrWritesIn(Stage::TASK); 396 } 397 398 bool readsOrWritesInMesh () const 399 { 400 return readsOrWritesIn(Stage::MESH); 401 } 402 403 std::string getResourceDecl () const 404 { 405 const auto imgFormat = ((resourceType == ResourceType::STORAGE_IMAGE) ? ", r32ui" : ""); 406 const auto storagePrefix = ((writeAccess == WriteAccess::SHADER_WRITE) ? "" : "readonly "); 407 std::ostringstream decl; 408 409 decl << "layout (set=0, binding=0" << imgFormat << ") "; 410 switch (resourceType) 411 { 412 case ResourceType::UNIFORM_BUFFER: decl << "uniform UniformBuffer { uint value; } ub;"; break; 413 case ResourceType::STORAGE_BUFFER: decl << storagePrefix << "buffer StorageBuffer { uint value; } sb;"; break; 414 case ResourceType::STORAGE_IMAGE: decl << storagePrefix << "uniform uimage2D si;"; break; 415 case ResourceType::SAMPLED_IMAGE: decl << "uniform usampler2D sampled;"; break; 416 default: DE_ASSERT(false); break; 417 } 418 419 decl << "\n"; 420 return decl.str(); 421 } 422 423 struct PushConstantStruct 424 { 425 uint32_t writeVal; 426 uint32_t readVal; 427 }; 428 429 // Get declaration for the "pc" push constant block. Must match the structure above. 430 std::string getPushConstantDecl () const 431 { 432 std::ostringstream pc; 433 pc 434 << "layout (push_constant, std430) uniform PushConstantBlock {\n" 435 << " uint writeVal;\n" 436 << " uint readVal;\n" 437 << "} pc;\n" 438 ; 439 return pc.str(); 440 } 441 442 std::string getReadStatement (const std::string& outName) const 443 { 444 std::ostringstream statement; 445 statement << " if (pc.readVal > 0u) { " << outName << " = "; 446 447 switch (resourceType) 448 { 449 case ResourceType::UNIFORM_BUFFER: statement << "ub.value"; break; 450 case ResourceType::STORAGE_BUFFER: statement << "sb.value"; break; 451 case ResourceType::STORAGE_IMAGE: statement << "imageLoad(si, ivec2(0, 0)).x"; break; 452 case ResourceType::SAMPLED_IMAGE: statement << "texture(sampled, vec2(0.5, 0.5)).x"; break; 453 default: DE_ASSERT(false); break; 454 } 455 456 statement << "; }\n"; 457 return statement.str(); 458 } 459 460 std::string getWriteStatement (const std::string& valueName) const 461 { 462 std::ostringstream statement; 463 statement << " if (pc.writeVal > 0u) { "; 464 465 switch (resourceType) 466 { 467 case ResourceType::STORAGE_BUFFER: statement << "sb.value = " << valueName; break; 468 case ResourceType::STORAGE_IMAGE: statement << "imageStore(si, ivec2(0, 0), uvec4(" << valueName << ", 0, 0, 0))"; break; 469 case ResourceType::UNIFORM_BUFFER: // fallthrough 470 case ResourceType::SAMPLED_IMAGE: // fallthrough 471 default: DE_ASSERT(false); break; 472 } 473 474 statement << "; }\n"; 475 return statement.str(); 476 } 477 478 VkShaderStageFlags getResourceShaderStages () const 479 { 480 VkShaderStageFlags flags = 0u; 481 482 if (fromStage == Stage::TASK || toStage == Stage::TASK) flags |= VK_SHADER_STAGE_TASK_BIT_EXT; 483 if (fromStage == Stage::MESH || toStage == Stage::MESH) flags |= VK_SHADER_STAGE_MESH_BIT_EXT; 484 if (fromStage == Stage::FRAG || toStage == Stage::FRAG) flags |= VK_SHADER_STAGE_FRAGMENT_BIT; 485 486 // We assume at least something must be done either on the task or mesh shaders for the tests to be interesting. 487 DE_ASSERT((flags & (VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_MESH_BIT_EXT)) != 0u); 488 return flags; 489 } 490 491 // We'll prefer to keep the image in the general layout if it will be written to from a shader stage or if the barrier is going to be a generic memory barrier. 492 bool preferGeneralLayout () const 493 { 494 return (isShaderStage(fromStage) || (barrierType == BarrierType::GENERAL) || (resourceType == ResourceType::STORAGE_IMAGE)); 495 } 496 497 // We need two pipelines if both the writing and reading stage are shaders, and either: 498 // - The writing stage comes after the reading stage in the pipeline. 499 // - The barrier to use is not a dependency. 500 bool needsTwoPipelines () const 501 { 502 return (fromShaderToShader(fromStage, toStage) && 503 (static_cast<int>(fromStage) >= static_cast<int>(toStage) || 504 barrierType != BarrierType::DEPENDENCY)); 505 } 506 507 // We need to use generic barriers when using subpass self-dependencies (single subpass and pipeline). 508 // Note: barrierType == BarrierType::DEPENDENCY is technically redundant with !needsTwoPipelines(). 509 bool subpassSelfDependency () const 510 { 511 return (fromShaderToShader(fromStage, toStage) && barrierType == BarrierType::DEPENDENCY && !needsTwoPipelines()); 512 } 513 514}; 515 516class MeshShaderSyncCase : public vkt::TestCase 517{ 518public: 519 MeshShaderSyncCase (tcu::TestContext& testCtx, const std::string& name, const TestParams& params) 520 : vkt::TestCase (testCtx, name), m_params (params) 521 {} 522 523 virtual ~MeshShaderSyncCase (void) {} 524 525 void checkSupport (Context& context) const override; 526 void initPrograms (vk::SourceCollections& programCollection) const override; 527 TestInstance* createInstance (Context& context) const override; 528 529protected: 530 TestParams m_params; 531}; 532 533class MeshShaderSyncInstance : public vkt::TestInstance 534{ 535public: 536 MeshShaderSyncInstance (Context& context, const TestParams& params) : vkt::TestInstance(context), m_params(params) {} 537 virtual ~MeshShaderSyncInstance (void) {} 538 539 tcu::TestStatus iterate (void) override; 540 541protected: 542 TestParams m_params; 543}; 544 545void MeshShaderSyncCase::checkSupport (Context& context) const 546{ 547 checkTaskMeshShaderSupportEXT(context, m_params.needsTask(), true); 548 549 if (m_params.writeAccess == WriteAccess::SHADER_WRITE) 550 { 551 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS); 552 } 553} 554 555void MeshShaderSyncCase::initPrograms (vk::SourceCollections& programCollection) const 556{ 557 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion); 558 const bool needsTaskShader = m_params.needsTask(); 559 const auto valueStr = de::toString(m_params.testValue); 560 const auto resourceDecl = m_params.getResourceDecl(); 561 const auto pcDecl = m_params.getPushConstantDecl(); 562 const std::string tdDecl = "struct TaskData { uint value; }; taskPayloadSharedEXT TaskData td;\n"; 563 564 if (needsTaskShader) 565 { 566 std::ostringstream task; 567 task 568 << "#version 450\n" 569 << "#extension GL_EXT_mesh_shader : enable\n" 570 << "\n" 571 << "layout(local_size_x=1) in;\n" 572 << "\n" 573 << tdDecl 574 << "\n" 575 << resourceDecl 576 << pcDecl 577 << "\n" 578 << "void main ()\n" 579 << "{\n" 580 << " td.value = 0u;\n" 581 << ((m_params.fromStage == Stage::TASK) ? m_params.getWriteStatement(valueStr) : "") 582 << ((m_params.toStage == Stage::TASK) ? m_params.getReadStatement("td.value") : "") 583 << " EmitMeshTasksEXT(1u, 1u, 1u);\n" 584 << "}\n" 585 ; 586 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions; 587 } 588 589 { 590 // In the mesh-to-task case, we need non-passthrough mesh and task shaders but the mesh shader doesn't have a previous task shader. 591 // In the task-to-mesh case, the second pipeline will have the main mesh shader but no previous task shader either. 592 const bool prevTaskInMainMesh = (needsTaskShader 593 && !(m_params.fromStage == Stage::MESH && m_params.toStage == Stage::TASK) 594 && !(m_params.fromStage == Stage::TASK && m_params.toStage == Stage::MESH)); 595 const bool rwInMeshStage = m_params.readsOrWritesInMesh(); 596 597 std::ostringstream mesh; 598 mesh 599 << "#version 450\n" 600 << "#extension GL_EXT_mesh_shader : enable\n" 601 << "\n" 602 << "layout(local_size_x=1) in;\n" 603 << "layout(triangles) out;\n" 604 << "layout(max_vertices=3, max_primitives=1) out;\n" 605 << "\n" 606 << (prevTaskInMainMesh ? tdDecl : "") 607 << "layout (location=0) out perprimitiveEXT uint primitiveValue[];\n" 608 << "\n" 609 << (rwInMeshStage ? resourceDecl : "") 610 << (rwInMeshStage ? pcDecl : "") 611 << "\n" 612 << "void main ()\n" 613 << "{\n" 614 << " SetMeshOutputsEXT(3u, 1u);\n" 615 << (prevTaskInMainMesh ? " primitiveValue[0] = td.value;\n" : "") 616 << ((m_params.fromStage == Stage::MESH) ? m_params.getWriteStatement(valueStr) : "") 617 << ((m_params.toStage == Stage::MESH) ? m_params.getReadStatement("primitiveValue[0]") : "") 618 << "\n" 619 << " gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n" 620 << " gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0, 3.0, 0.0, 1.0);\n" 621 << " gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n" 622 << " gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n" 623 << "}\n" 624 ; 625 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions; 626 } 627 628 { 629 const bool readFromFrag = (m_params.toStage == Stage::FRAG); 630 const bool writeFromFrag = (m_params.fromStage == Stage::FRAG); 631 const bool rwInFragStage = (readFromFrag || writeFromFrag); 632 std::ostringstream frag; 633 634 frag 635 << "#version 450\n" 636 << "#extension GL_EXT_mesh_shader : enable\n" 637 << "\n" 638 << "layout (location=0) in perprimitiveEXT flat uint primitiveValue;\n" 639 << "layout (location=0) out uvec4 outColor;\n" 640 << "\n" 641 << (rwInFragStage ? resourceDecl : "") 642 << (rwInFragStage ? pcDecl : "") 643 << "\n" 644 << "void main ()\n" 645 << "{\n" 646 << " outColor = uvec4(primitiveValue, 0, 0, 0);\n" 647 << " uint readVal = 0u;\n" 648 << (readFromFrag ? m_params.getReadStatement("readVal") : "") 649 << (readFromFrag ? " outColor = uvec4(readVal, 0, 0, 0);\n" : "") 650 << (writeFromFrag ? m_params.getWriteStatement(valueStr) : "") 651 << "}\n" 652 ; 653 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions; 654 } 655 656 // Passthrough shaders. 657 { 658 const std::string task = 659 "#version 450\n" 660 "#extension GL_EXT_mesh_shader : enable\n" 661 "\n" 662 "layout(local_size_x=1) in;\n" 663 "\n" 664 + tdDecl + 665 "\n" 666 "void main ()\n" 667 "{\n" 668 " td.value = 0u;\n" 669 " EmitMeshTasksEXT(1u, 1u, 1u);\n" 670 "}\n" 671 ; 672 programCollection.glslSources.add("taskPassthrough") << glu::TaskSource(task) << buildOptions; 673 674 const std::string frag = 675 "#version 450\n" 676 "#extension GL_EXT_mesh_shader : enable\n" 677 "\n" 678 "layout (location=0) in perprimitiveEXT flat uint primitiveValue;\n" 679 "layout (location=0) out uvec4 outColor;\n" 680 "\n" 681 "void main ()\n" 682 "{\n" 683 " outColor = uvec4(primitiveValue, 0, 0, 0);\n" 684 "}\n" 685 ; 686 programCollection.glslSources.add("fragPassthrough") << glu::FragmentSource(frag) << buildOptions; 687 688 for (int i = 0; i < 2; ++i) 689 { 690 const bool prevTask = (i > 0); 691 const std::string nameSuffix = (prevTask ? "WithTask" : ""); 692 const std::string mesh = 693 "#version 450\n" 694 "#extension GL_EXT_mesh_shader : enable\n" 695 "\n" 696 "layout(local_size_x=1) in;\n" 697 "layout(triangles) out;\n" 698 "layout(max_vertices=3, max_primitives=1) out;\n" 699 "\n" 700 + (prevTask ? tdDecl : "") + 701 "layout (location=0) out perprimitiveEXT uint primitiveValue[];\n" 702 "\n" 703 "void main ()\n" 704 "{\n" 705 " SetMeshOutputsEXT(3u, 1u);\n" 706 " " + (prevTask ? "primitiveValue[0] = td.value;" : "primitiveValue[0] = 0u;") + "\n" 707 "\n" 708 " gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n" 709 " gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0, 3.0, 0.0, 1.0);\n" 710 " gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n" 711 " gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n" 712 "}\n" 713 ; 714 programCollection.glslSources.add("meshPassthrough" + nameSuffix) << glu::MeshSource(mesh) << buildOptions; 715 } 716 } 717} 718 719TestInstance* MeshShaderSyncCase::createInstance (Context& context) const 720{ 721 return new MeshShaderSyncInstance(context, m_params); 722} 723 724// General description behind these tests. 725// 726// From To 727// ============================== 728// HOST TASK Prepare buffer from host. Only valid for uniform and storage buffers. Read value from task into td.value. Verify color buffer. 729// HOST MESH Same situation. Read value from mesh into primitiveValue[0]. Verify color buffer. 730// TRANSFER TASK Prepare auxiliary host-coherent source buffer from host. Copy buffer to buffer or buffer to image. Read from task into td.value. Verify color buffer. 731// TRANSFER MESH Same initial steps. Read from mesh into primitiveValue[0]. Verify color buffer. 732// TASK MESH Write value to buffer or image from task shader. Only valid for storage buffers and images. Read from mesh into primitiveValue[0]. Verify color buffer. 733// TASK FRAG Same write procedure and restrictions. Read from frag into outColor. Verify color buffer. 734// TASK TRANSFER Same write procedure and restrictions. Prepare auxiliary host-coherent read buffer and copy buffer to buffer or image to buffer. Verify auxiliary buffer. 735// TASK HOST Due to From/To restrictions, only valid for storage buffers. Same write procedure. Read and verify buffer directly. 736// MESH FRAG Same as task to frag but the write instructions need to be in the mesh shader. 737// MESH TRANSFER Same as task to transfer but the write instructions need to be in the mesh shader. 738// MESH HOST Same as task to host but the write instructions need to be in the mesh shader. 739// 740// The following cases require two pipelines 741// ========================================= 742// MESH TASK Write value to buffer or image from mesh shader. Only valid for storage buffers and images. Read from task into td.value. Verify color buffer. 743// Sequence: mesh, task, mesh*, frag*. 744// FRAG TASK Same as mesh to task, but writing from the first fragment shader. 745// Sequence: mesh*, frag, task, mesh*, frag*. 746// FRAG MESH Similar to frag to task, but reading from mesh into primitiveValue[0]. Verify color buffer after second fragment shader. 747// Sequence: mesh*, frag, mesh, frag*. 748// 749 750// Create one or two render passes with the right dependencies depending on the test parameters. 751std::vector<Move<VkRenderPass>> createCustomRenderPasses (const DeviceInterface& vkd, VkDevice device, VkFormat colorFormat, const TestParams& params) 752{ 753 std::vector<Move<VkRenderPass>> renderPasses; 754 const bool useDependencies = (params.barrierType == BarrierType::DEPENDENCY); 755 const bool twoPipelines = params.needsTwoPipelines(); 756 const bool twoSubpasses = (twoPipelines && useDependencies); 757 const uint32_t pipelineCount = (twoPipelines ? 2u : 1u); 758 const uint32_t subpassCount = (twoSubpasses ? 2u : 1u); 759 const uint32_t renderPassCount = ((twoPipelines && !twoSubpasses) ? 2u : 1u); 760 761 const std::vector<VkAttachmentDescription> attachmentDescs = 762 { 763 { 764 0u, // VkAttachmentDescriptionFlags flags; 765 colorFormat, // VkFormat format; 766 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples; 767 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp; 768 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp; 769 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp; 770 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp; 771 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout; 772 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout; 773 } 774 }; 775 776 const std::vector<VkAttachmentReference> attachmentRefs = { { 0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL } }; 777 778 // One or two identical subpasses. 779 const VkSubpassDescription subpassDesc = 780 { 781 0u, // VkSubpassDescriptionFlags flags; 782 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint; 783 0u, // uint32_t inputAttachmentCount; 784 nullptr, // const VkAttachmentReference* pInputAttachments; 785 static_cast<uint32_t>(attachmentRefs.size()), // uint32_t colorAttachmentCount; 786 de::dataOrNull(attachmentRefs), // const VkAttachmentReference* pColorAttachments; 787 nullptr, // const VkAttachmentReference* pResolveAttachments; 788 nullptr, // const VkAttachmentReference* pDepthStencilAttachment; 789 0u, // uint32_t preserveAttachmentCount; 790 nullptr, // const uint32_t* pPreserveAttachments; 791 }; 792 793 const std::vector<VkSubpassDescription> subpassDescs (subpassCount, subpassDesc); 794 795 std::vector<VkSubpassDependency> dependencies; 796 if (fromShaderToShader(params.fromStage, params.toStage) && useDependencies) 797 { 798 const VkSubpassDependency dependency = 799 { 800 0u, // uint32_t srcSubpass; 801 pipelineCount - 1u, // uint32_t dstSubpass; 802 stageToFlags(params.fromStage), // VkPipelineStageFlags srcStageMask; 803 stageToFlags(params.toStage), // VkPipelineStageFlags dstStageMask; 804 writeAccessToFlags(params.writeAccess), // VkAccessFlags srcAccessMask; 805 readAccessToFlags(params.readAccess), // VkAccessFlags dstAccessMask; 806 0u, // VkDependencyFlags dependencyFlags; 807 }; 808 dependencies.push_back(dependency); 809 } 810 811 const VkRenderPassCreateInfo createInfo = 812 { 813 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType; 814 nullptr, // const void* pNext; 815 0u, // VkRenderPassCreateFlags flags; 816 static_cast<uint32_t>(attachmentDescs.size()), // uint32_t attachmentCount; 817 de::dataOrNull(attachmentDescs), // const VkAttachmentDescription* pAttachments; 818 static_cast<uint32_t>(subpassDescs.size()), // uint32_t subpassCount; 819 de::dataOrNull(subpassDescs), // const VkSubpassDescription* pSubpasses; 820 static_cast<uint32_t>(dependencies.size()), // uint32_t dependencyCount; 821 de::dataOrNull(dependencies), // const VkSubpassDependency* pDependencies; 822 }; 823 824 for (uint32_t renderPassIdx = 0u; renderPassIdx < renderPassCount; ++renderPassIdx) 825 renderPasses.push_back(createRenderPass(vkd, device, &createInfo)); 826 827 return renderPasses; 828} 829 830void hostToTransferMemoryBarrier (const DeviceInterface& vkd, VkCommandBuffer cmdBuffer) 831{ 832 const auto barrier = makeMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT); 833 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &barrier); 834} 835 836void transferToHostMemoryBarrier (const DeviceInterface& vkd, VkCommandBuffer cmdBuffer) 837{ 838 const auto barrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT); 839 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &barrier); 840} 841 842tcu::TestStatus MeshShaderSyncInstance::iterate (void) 843{ 844 const auto& vkd = m_context.getDeviceInterface(); 845 const auto device = m_context.getDevice(); 846 auto& alloc = m_context.getDefaultAllocator(); 847 const auto queueIndex = m_context.getUniversalQueueFamilyIndex(); 848 const auto queue = m_context.getUniversalQueue(); 849 850 const auto imageFormat = getImageFormat(); 851 const auto imageExtent = getImageExtent(); 852 const auto colorBufferUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); 853 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u); 854 const auto colorSRL = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u); 855 const auto bufferSize = static_cast<VkDeviceSize>(sizeof(m_params.testValue)); 856 const auto descriptorType = resourceTypeToDescriptor(m_params.resourceType); 857 const auto resourceStages = m_params.getResourceShaderStages(); 858 const auto auxiliarBufferUsage = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT); 859 const auto useGeneralLayout = m_params.preferGeneralLayout(); 860 861 const auto writeAccessFlags = writeAccessToFlags(m_params.writeAccess); 862 const auto readAccessFlags = readAccessToFlags(m_params.readAccess); 863 const auto fromStageFlags = stageToFlags(m_params.fromStage); 864 const auto toStageFlags = stageToFlags(m_params.toStage); 865 866 // Prepare color buffer. 867 const VkImageCreateInfo colorBufferCreateInfo = 868 { 869 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType; 870 nullptr, // const void* pNext; 871 0u, // VkImageCreateFlags flags; 872 VK_IMAGE_TYPE_2D, // VkImageType imageType; 873 imageFormat, // VkFormat format; 874 imageExtent, // VkExtent3D extent; 875 1u, // uint32_t mipLevels; 876 1u, // uint32_t arrayLayers; 877 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples; 878 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling; 879 colorBufferUsage, // VkImageUsageFlags usage; 880 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode; 881 0u, // uint32_t queueFamilyIndexCount; 882 nullptr, // const uint32_t* pQueueFamilyIndices; 883 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout; 884 }; 885 ImageWithMemory colorBuffer (vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any); 886 const auto colorBufferView = makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR); 887 888 // Main resource. 889 using ImageWithMemoryPtr = de::MovePtr<ImageWithMemory>; 890 using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>; 891 892 ImageWithMemoryPtr imageResource; 893 Move<VkImageView> imageResourceView; 894 VkImageLayout imageDescriptorLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); 895 VkImageLayout currentLayout = VK_IMAGE_LAYOUT_UNDEFINED; 896 BufferWithMemoryPtr bufferResource; 897 898 bool useImageResource = false; 899 bool useBufferResource = false; 900 901 switch (m_params.resourceType) 902 { 903 case ResourceType::UNIFORM_BUFFER: 904 case ResourceType::STORAGE_BUFFER: 905 useBufferResource = true; 906 break; 907 case ResourceType::STORAGE_IMAGE: 908 case ResourceType::SAMPLED_IMAGE: 909 useImageResource = true; 910 break; 911 default: 912 DE_ASSERT(false); 913 break; 914 } 915 916 // One resource needed. 917 DE_ASSERT(useImageResource != useBufferResource); 918 919 if (useImageResource) 920 { 921 const auto resourceImageUsage = resourceImageUsageFlags(m_params.resourceType); 922 923 const VkImageCreateInfo resourceCreateInfo = 924 { 925 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType; 926 nullptr, // const void* pNext; 927 0u, // VkImageCreateFlags flags; 928 VK_IMAGE_TYPE_2D, // VkImageType imageType; 929 imageFormat, // VkFormat format; 930 imageExtent, // VkExtent3D extent; 931 1u, // uint32_t mipLevels; 932 1u, // uint32_t arrayLayers; 933 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples; 934 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling; 935 resourceImageUsage, // VkImageUsageFlags usage; 936 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode; 937 0u, // uint32_t queueFamilyIndexCount; 938 nullptr, // const uint32_t* pQueueFamilyIndices; 939 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout; 940 }; 941 imageResource = ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, resourceCreateInfo, MemoryRequirement::Any)); 942 imageResourceView = makeImageView(vkd, device, imageResource->get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR); 943 } 944 else 945 { 946 const auto resourceBufferUsage = resourceBufferUsageFlags(m_params.resourceType); 947 const auto resourceBufferCreateInfo = makeBufferCreateInfo(bufferSize, resourceBufferUsage); 948 bufferResource = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, resourceBufferCreateInfo, MemoryRequirement::HostVisible)); 949 } 950 951 Move<VkSampler> sampler; 952 if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) 953 { 954 const VkSamplerCreateInfo samplerCreateInfo = 955 { 956 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType; 957 nullptr, // const void* pNext; 958 0u, // VkSamplerCreateFlags flags; 959 VK_FILTER_NEAREST, // VkFilter magFilter; 960 VK_FILTER_NEAREST, // VkFilter minFilter; 961 VK_SAMPLER_MIPMAP_MODE_NEAREST, // VkSamplerMipmapMode mipmapMode; 962 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeU; 963 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeV; 964 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW; 965 0.0f, // float mipLodBias; 966 VK_FALSE, // VkBool32 anisotropyEnable; 967 1.0f, // float maxAnisotropy; 968 VK_FALSE, // VkBool32 compareEnable; 969 VK_COMPARE_OP_NEVER, // VkCompareOp compareOp; 970 0.0f, // float minLod; 971 0.0f, // float maxLod; 972 VK_BORDER_COLOR_INT_TRANSPARENT_BLACK, // VkBorderColor borderColor; 973 VK_FALSE, // VkBool32 unnormalizedCoordinates; 974 }; 975 sampler = createSampler(vkd, device, &samplerCreateInfo); 976 } 977 978 // Auxiliary host-coherent buffer for some cases. Being host-coherent lets us avoid extra barriers that would "pollute" synchronization tests. 979 BufferWithMemoryPtr hostCoherentBuffer; 980 void* hostCoherentDataPtr = nullptr; 981 if (needsAuxiliarBuffer(m_params.fromStage, m_params.toStage)) 982 { 983 const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage); 984 hostCoherentBuffer = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, (MemoryRequirement::HostVisible | MemoryRequirement::Coherent))); 985 hostCoherentDataPtr = hostCoherentBuffer->getAllocation().getHostPtr(); 986 } 987 988 // Descriptor pool. 989 Move<VkDescriptorPool> descriptorPool; 990 { 991 DescriptorPoolBuilder poolBuilder; 992 poolBuilder.addType(descriptorType); 993 descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 994 } 995 996 // Descriptor set layout. 997 Move<VkDescriptorSetLayout> setLayout; 998 { 999 DescriptorSetLayoutBuilder layoutBuilder; 1000 layoutBuilder.addSingleBinding(descriptorType, resourceStages); 1001 setLayout = layoutBuilder.build(vkd, device); 1002 } 1003 1004 // Descriptor set. 1005 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get()); 1006 1007 // Update descriptor set. 1008 { 1009 DescriptorSetUpdateBuilder updateBuilder; 1010 const auto location = DescriptorSetUpdateBuilder::Location::binding(0u); 1011 1012 switch (descriptorType) 1013 { 1014 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 1015 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 1016 { 1017 const auto bufferInfo = makeDescriptorBufferInfo(bufferResource->get(), 0ull, bufferSize); 1018 updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &bufferInfo); 1019 } 1020 break; 1021 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 1022 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 1023 { 1024 auto descriptorImageInfo = makeDescriptorImageInfo(sampler.get(), imageResourceView.get(), imageDescriptorLayout); 1025 updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &descriptorImageInfo); 1026 } 1027 break; 1028 default: 1029 DE_ASSERT(false); break; 1030 } 1031 1032 updateBuilder.update(vkd, device); 1033 } 1034 1035 // Render passes and framebuffers. 1036 const auto renderPasses = createCustomRenderPasses(vkd, device, imageFormat, m_params); 1037 const bool multiRenderPass = (renderPasses.size() > 1u); 1038 DE_ASSERT(renderPasses.size() > 0u); 1039 1040 std::vector<Move<VkFramebuffer>> framebuffers; 1041 framebuffers.reserve(renderPasses.size()); 1042 1043 for (const auto& renderPass : renderPasses) 1044 framebuffers.push_back(makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), imageExtent.width, imageExtent.height)); 1045 1046 // Viewports and scissors. 1047 std::vector<VkViewport> viewports (1u, makeViewport(imageExtent)); 1048 std::vector<VkRect2D> scissors (1u, makeRect2D(imageExtent)); 1049 1050 using PushConstantStruct = TestParams::PushConstantStruct; 1051 1052 // Pipeline layout. 1053 const auto pcSize = static_cast<uint32_t>(sizeof(PushConstantStruct)); 1054 const auto pcRange = makePushConstantRange(resourceStages, 0u, pcSize); 1055 const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get(), &pcRange); 1056 1057 // Shader modules, pipelines and pipeline layouts. 1058 const auto twoPipelines = m_params.needsTwoPipelines(); 1059 const auto selfDeps = m_params.subpassSelfDependency(); 1060 1061 // Both at the same time does not make sense. 1062 DE_ASSERT(!(twoPipelines && selfDeps)); 1063 1064 const auto pipelineCount = (twoPipelines ? 2u : 1u); 1065 const auto drawCount = (selfDeps ? 2u : 1u); 1066 const auto iterationCount = std::max(pipelineCount, drawCount); 1067 1068 std::vector<Move<VkPipeline>> pipelines; 1069 pipelines.reserve(pipelineCount); 1070 1071 // Shader modules. 1072 const auto& binaries = m_context.getBinaryCollection(); 1073 1074 Move<VkShaderModule> taskShader; 1075 if (m_params.needsTask()) 1076 taskShader = createShaderModule(vkd, device, binaries.get("task")); 1077 1078 const auto meshShader = createShaderModule(vkd, device, binaries.get("mesh")); 1079 const auto fragShader = createShaderModule(vkd, device, binaries.get("frag")); 1080 const auto taskPassthroughShader = createShaderModule(vkd, device, binaries.get("taskPassthrough")); 1081 const auto fragPassthroughShader = createShaderModule(vkd, device, binaries.get("fragPassthrough")); 1082 const auto meshPassthroughShader = createShaderModule(vkd, device, binaries.get("meshPassthrough")); 1083 const auto meshPassthroughWithTaskShader = createShaderModule(vkd, device, binaries.get("meshPassthroughWithTask")); 1084 1085 if (pipelineCount == 1u) 1086 { 1087 // Pipeline. 1088 pipelines.push_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(), fragShader.get(), renderPasses.at(0u).get(), viewports, scissors)); 1089 } 1090 else if (pipelineCount == 2u) 1091 { 1092 // Mandatory stages in each pipeline: the first pipeline will contain the "from" stage (write) and the second one the "to" stage (read). 1093 const std::vector<Stage> mandatoryStages { m_params.fromStage, m_params.toStage }; 1094 1095 // One pipeline per mandatory stage. 1096 for (uint32_t pipelineIdx = 0u; pipelineIdx < pipelineCount; ++pipelineIdx) 1097 { 1098 const auto& stage = mandatoryStages.at(pipelineIdx); 1099 1100 VkShaderModule taskModule = DE_NULL; 1101 VkShaderModule meshModule = DE_NULL; 1102 VkShaderModule fragModule = DE_NULL; 1103 1104 const bool lastSubpass = (pipelineIdx == pipelineCount - 1u); 1105 const auto pipelineStages = subpassStages(stage, lastSubpass); 1106 const bool hasTaskShader = hasTask(pipelineStages); 1107 const bool hasFragShader = hasFrag(pipelineStages); 1108 1109 // Decide which shaders to use for this one. 1110 if (hasTaskShader) 1111 taskModule = ((stage == Stage::TASK) ? taskShader.get() : taskPassthroughShader.get()); 1112 1113 if (stage == Stage::MESH) 1114 meshModule = meshShader.get(); 1115 else 1116 { 1117 meshModule = (hasTaskShader ? meshPassthroughWithTaskShader.get() : meshPassthroughShader.get()); 1118 } 1119 1120 if (hasFragShader) 1121 fragModule = ((stage == Stage::FRAG) ? fragShader.get() : fragPassthroughShader.get()); 1122 1123 // Create pipeline. When using multiple render passes, the subpass is always zero. When using a single render pass, each pipeline is prepared for one subpass. 1124 const auto renderPass = (multiRenderPass ? renderPasses.at(pipelineIdx).get() : renderPasses[0].get()); 1125 const auto subpass = (multiRenderPass ? 0u : pipelineIdx); 1126 1127 pipelines.push_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(), 1128 taskModule, meshModule, fragModule, 1129 renderPass, viewports, scissors, subpass)); 1130 } 1131 } 1132 else 1133 { 1134 DE_ASSERT(false); 1135 } 1136 1137 // Command pool and buffer. 1138 const auto cmdPool = makeCommandPool(vkd, device, queueIndex); 1139 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY); 1140 const auto cmdBuffer = cmdBufferPtr.get(); 1141 1142 beginCommandBuffer(vkd, cmdBuffer); 1143 1144 if (m_params.fromStage == Stage::HOST) 1145 { 1146 // Prepare buffer from host when the source stage is the host. 1147 DE_ASSERT(useBufferResource); 1148 1149 auto& resourceBufferAlloc = bufferResource->getAllocation(); 1150 void* resourceBufferDataPtr = resourceBufferAlloc.getHostPtr(); 1151 1152 deMemcpy(resourceBufferDataPtr, &m_params.testValue, sizeof(m_params.testValue)); 1153 flushAlloc(vkd, device, resourceBufferAlloc); 1154 } 1155 else if (m_params.fromStage == Stage::TRANSFER) 1156 { 1157 // Put value in host-coherent buffer and transfer it to the resource buffer or image. 1158 deMemcpy(hostCoherentDataPtr, &m_params.testValue, sizeof(m_params.testValue)); 1159 hostToTransferMemoryBarrier(vkd, cmdBuffer); 1160 1161 if (useBufferResource) 1162 { 1163 const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize); 1164 vkd.cmdCopyBuffer(cmdBuffer, hostCoherentBuffer->get(), bufferResource->get(), 1u, ©Region); 1165 } 1166 else 1167 { 1168 // Move image to the right layout for transfer. 1169 const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); 1170 if (newLayout != currentLayout) 1171 { 1172 const auto preCopyBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, currentLayout, newLayout, imageResource->get(), colorSRR); 1173 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preCopyBarrier); 1174 currentLayout = newLayout; 1175 } 1176 const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL); 1177 vkd.cmdCopyBufferToImage(cmdBuffer, hostCoherentBuffer->get(), imageResource->get(), currentLayout, 1u, ©Region); 1178 } 1179 } 1180 else if (isShaderStage(m_params.fromStage)) 1181 { 1182 // The image or buffer will be written to from shaders. Images need to be in the right layout. 1183 if (useImageResource) 1184 { 1185 const auto newLayout = VK_IMAGE_LAYOUT_GENERAL; 1186 if (newLayout != currentLayout) 1187 { 1188 const auto preWriteBarrier = makeImageMemoryBarrier(0u, (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT), currentLayout, newLayout, imageResource->get(), colorSRR); 1189 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags, &preWriteBarrier); 1190 currentLayout = newLayout; 1191 } 1192 } 1193 } 1194 else 1195 { 1196 DE_ASSERT(false); 1197 } 1198 1199 // If the resource is going to be read from shaders and written from a non-shader stage, we'll insert the main barrier before running the pipeline. 1200 if (isShaderStage(m_params.toStage) && !isShaderStage(m_params.fromStage)) 1201 { 1202 if (m_params.barrierType == BarrierType::GENERAL) 1203 { 1204 const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags); 1205 cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier); 1206 } 1207 else if (m_params.barrierType == BarrierType::SPECIFIC) 1208 { 1209 if (useBufferResource) 1210 { 1211 const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize); 1212 cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier); 1213 } 1214 else 1215 { 1216 const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); 1217 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR); 1218 1219 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier); 1220 currentLayout = newLayout; 1221 } 1222 } 1223 // For subpass dependencies, they have already been included in the render pass or loop below. 1224 } 1225 1226 // Run the pipeline. 1227 if (!multiRenderPass) 1228 beginRenderPass(vkd, cmdBuffer, renderPasses[0].get(), framebuffers[0].get(), scissors.at(0), tcu::UVec4(0u)); 1229 1230 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr); 1231 1232 for (uint32_t iterationIdx = 0u; iterationIdx < iterationCount; ++iterationIdx) 1233 { 1234 if (iterationIdx > 0u && !multiRenderPass && twoPipelines) 1235 vkd.cmdNextSubpass(cmdBuffer, VK_SUBPASS_CONTENTS_INLINE); 1236 1237 if (multiRenderPass) 1238 beginRenderPass(vkd, cmdBuffer, renderPasses.at(iterationIdx).get(), framebuffers.at(iterationIdx).get(), scissors.at(0), tcu::UVec4(0u)); 1239 1240 if (twoPipelines || iterationIdx == 0u) 1241 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelines.at(iterationIdx).get()); 1242 1243 PushConstantStruct pcData; 1244 if (selfDeps) 1245 { 1246 // First draw writes, second draw reads. 1247 pcData.writeVal = 1u - iterationIdx; 1248 pcData.readVal = iterationIdx; 1249 } 1250 else 1251 { 1252 // Otherwise reads and writes freely according to the pipeline shaders. 1253 pcData.writeVal = 1u; 1254 pcData.readVal = 1u; 1255 } 1256 vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData); 1257 vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u); 1258 1259 if (multiRenderPass) 1260 endRenderPass(vkd, cmdBuffer); 1261 1262 // If there are self-dependencies or multiple render passes, synchronize resource between draw calls. 1263 if ((multiRenderPass || selfDeps) && iterationIdx == 0u) 1264 { 1265 // In the case of self-dependencies, the barrier type is BarrierType::DEPENDENCY and we'll insert a general barrier because: 1266 // * VUID-vkCmdPipelineBarrier-bufferMemoryBarrierCount-01178 forbids using buffer barriers inside render passes. 1267 // * VUID-vkCmdPipelineBarrier-image-04073 forbids using image memory barriers inside render passes with resources that are not attachments. 1268 if (m_params.barrierType == BarrierType::GENERAL || m_params.barrierType == BarrierType::DEPENDENCY) 1269 { 1270 const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags); 1271 cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier); 1272 } 1273 else if (m_params.barrierType == BarrierType::SPECIFIC) 1274 { 1275 if (useBufferResource) 1276 { 1277 const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize); 1278 cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier); 1279 } 1280 else 1281 { 1282 // Note: the image will only be read from shader stages or from the transfer stage. 1283 DE_ASSERT(useGeneralLayout); 1284 const auto newLayout = VK_IMAGE_LAYOUT_GENERAL; 1285 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR); 1286 1287 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier); 1288 currentLayout = newLayout; 1289 } 1290 } 1291 else 1292 { 1293 DE_ASSERT(false); 1294 } 1295 1296 if (multiRenderPass) 1297 { 1298 // Sync color attachment writes. 1299 const auto colorWritesBarrier = makeMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); 1300 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, &colorWritesBarrier); 1301 } 1302 } 1303 } 1304 1305 if (!multiRenderPass) 1306 endRenderPass(vkd, cmdBuffer); 1307 1308 // If the resource was written to from shaders and will be read from a non-shader stage, insert the main barrier after running the pipeline. 1309 if (isShaderStage(m_params.fromStage) && !isShaderStage(m_params.toStage)) 1310 { 1311 if (m_params.barrierType == BarrierType::GENERAL) 1312 { 1313 const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags); 1314 cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier); 1315 } 1316 else if (m_params.barrierType == BarrierType::SPECIFIC) 1317 { 1318 if (useBufferResource) 1319 { 1320 const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize); 1321 cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier); 1322 } 1323 else 1324 { 1325 // Note: the image will only be read from shader stages or from the transfer stage. 1326 const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 1327 const auto imageBarrier = makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR); 1328 1329 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier); 1330 currentLayout = newLayout; 1331 } 1332 } 1333 // For subpass dependencies, they have already been included in the render pass and loop. 1334 } 1335 1336 // Read resource from the destination stage if needed. 1337 if (m_params.toStage == Stage::HOST) 1338 { 1339 // Nothing to do. The test value should be in the resource buffer already, which is host-visible. 1340 } 1341 else if (m_params.toStage == Stage::TRANSFER) 1342 { 1343 // Copy value from resource to host-coherent buffer to be verified later. 1344 if (useBufferResource) 1345 { 1346 const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize); 1347 vkd.cmdCopyBuffer(cmdBuffer, bufferResource->get(), hostCoherentBuffer->get(), 1u, ©Region); 1348 } 1349 else 1350 { 1351 const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL); 1352 vkd.cmdCopyImageToBuffer(cmdBuffer, imageResource->get(), currentLayout, hostCoherentBuffer->get(), 1u, ©Region); 1353 } 1354 1355 transferToHostMemoryBarrier(vkd, cmdBuffer); 1356 } 1357 1358 // If the output value will be available in the color buffer, take the chance to transfer its contents to a host-coherent buffer. 1359 BufferWithMemoryPtr colorVerificationBuffer; 1360 void* colorVerificationDataPtr = nullptr; 1361 1362 if (valueInColorBuffer(m_params.toStage)) 1363 { 1364 const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage); 1365 colorVerificationBuffer = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, (MemoryRequirement::HostVisible | MemoryRequirement::Coherent))); 1366 colorVerificationDataPtr = colorVerificationBuffer->getAllocation().getHostPtr(); 1367 1368 const auto srcAccess = (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT); 1369 const auto dstAccess = VK_ACCESS_TRANSFER_READ_BIT; 1370 const auto colorBarrier = makeImageMemoryBarrier(srcAccess, dstAccess, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSRR); 1371 cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &colorBarrier); 1372 1373 const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL); 1374 vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorVerificationBuffer->get(), 1u, ©Region); 1375 1376 transferToHostMemoryBarrier(vkd, cmdBuffer); 1377 } 1378 1379 1380 endCommandBuffer(vkd, cmdBuffer); 1381 submitCommandsAndWait(vkd, device, queue, cmdBuffer); 1382 1383 // Verify output resources as needed. 1384 1385 if (valueInAuxiliarDestBuffer(m_params.toStage)) 1386 { 1387 uint32_t bufferValue; 1388 deMemcpy(&bufferValue, hostCoherentDataPtr, sizeof(bufferValue)); 1389 1390 if (bufferValue != m_params.testValue) 1391 { 1392 std::ostringstream msg; 1393 msg << "Unexpected value in auxiliar host-coherent buffer: found " << bufferValue << " and expected " << m_params.testValue; 1394 TCU_FAIL(msg.str()); 1395 } 1396 } 1397 1398 if (valueInResourceBuffer(m_params.toStage)) 1399 { 1400 auto& resourceBufferAlloc = bufferResource->getAllocation(); 1401 void* resourceBufferDataPtr = resourceBufferAlloc.getHostPtr(); 1402 uint32_t bufferValue; 1403 1404 invalidateAlloc(vkd, device, resourceBufferAlloc); 1405 deMemcpy(&bufferValue, resourceBufferDataPtr, sizeof(bufferValue)); 1406 1407 if (bufferValue != m_params.testValue) 1408 { 1409 std::ostringstream msg; 1410 msg << "Unexpected value in resource buffer: found " << bufferValue << " and expected " << m_params.testValue; 1411 TCU_FAIL(msg.str()); 1412 } 1413 } 1414 1415 if (valueInColorBuffer(m_params.toStage)) 1416 { 1417 uint32_t bufferValue; 1418 deMemcpy(&bufferValue, colorVerificationDataPtr, sizeof(bufferValue)); 1419 1420 if (bufferValue != m_params.testValue) 1421 { 1422 std::ostringstream msg; 1423 msg << "Unexpected value in color verification buffer: found " << bufferValue << " and expected " << m_params.testValue; 1424 TCU_FAIL(msg.str()); 1425 } 1426 } 1427 1428 return tcu::TestStatus::pass("Pass"); 1429} 1430 1431// Specific test to check a barrier that crosses secondary command buffers and goes from compute to task. 1432class BarrierAcrossSecondaryCase : public vkt::TestCase 1433{ 1434public: 1435 BarrierAcrossSecondaryCase (tcu::TestContext& testCtx, const std::string& name) 1436 : vkt::TestCase(testCtx, name) 1437 {} 1438 virtual ~BarrierAcrossSecondaryCase (void) {} 1439 1440 void checkSupport (Context& context) const override; 1441 TestInstance* createInstance (Context& context) const override; 1442 void initPrograms (vk::SourceCollections& programCollection) const override; 1443 1444 static constexpr uint32_t kLocalSize = 128u; 1445 static constexpr uint32_t kNumWorkGroups = 16384u; 1446}; 1447 1448class BarrierAcrossSecondaryInstance : public vkt::TestInstance 1449{ 1450public: 1451 BarrierAcrossSecondaryInstance (Context& context) : vkt::TestInstance(context) {} 1452 virtual ~BarrierAcrossSecondaryInstance (void) {} 1453 1454 tcu::TestStatus iterate (void) override; 1455 1456}; 1457 1458void BarrierAcrossSecondaryCase::checkSupport (Context &context) const 1459{ 1460 checkTaskMeshShaderSupportEXT(context, true, true); 1461 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS); 1462} 1463 1464TestInstance* BarrierAcrossSecondaryCase::createInstance (Context &context) const 1465{ 1466 return new BarrierAcrossSecondaryInstance(context); 1467} 1468 1469void BarrierAcrossSecondaryCase::initPrograms (vk::SourceCollections &programCollection) const 1470{ 1471 const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion); 1472 1473 const std::string descriptorDecl = 1474 "layout (set=0, binding=0, std430) buffer OutputBlock {\n" 1475 " uint values[];\n" 1476 "} outBuffer;\n" 1477 "layout (set=0, binding=1, std430) buffer VerificationBlock {\n" 1478 " uint values[];\n" 1479 "} verificationBuffer;\n" 1480 ; 1481 1482 // The compute shader will fill the output buffer. 1483 std::ostringstream comp; 1484 comp 1485 << "#version 450\n" 1486 << "layout(local_size_x=" << kLocalSize << ") in;\n" 1487 << descriptorDecl 1488 << "void main ()\n" 1489 << "{\n" 1490 << " outBuffer.values[gl_GlobalInvocationID.x] = gl_GlobalInvocationID.x;\n" 1491 << "}\n" 1492 ; 1493 programCollection.glslSources.add("comp") << glu::ComputeSource(comp.str()); 1494 1495 // The task shader will read it, verify its contents and write the verification buffer. 1496 std::ostringstream task; 1497 task 1498 << "#version 450\n" 1499 << "#extension GL_EXT_mesh_shader : enable\n" 1500 << "layout(local_size_x=" << kLocalSize << ") in;\n" 1501 << descriptorDecl 1502 << "void main ()\n" 1503 << "{\n" 1504 << " const uint verifResult = ((outBuffer.values[gl_GlobalInvocationID.x] == gl_GlobalInvocationID.x) ? 1u : 0u);\n" 1505 << " verificationBuffer.values[gl_GlobalInvocationID.x] = verifResult;\n" 1506 << " EmitMeshTasksEXT(0u, 0u, 0u);\n" 1507 << "}\n" 1508 ; 1509 programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions; 1510 1511 std::ostringstream mesh; 1512 mesh 1513 << "#version 450\n" 1514 << "#extension GL_EXT_mesh_shader : enable\n" 1515 << "\n" 1516 << "layout(local_size_x=1) in;\n" 1517 << "layout(triangles) out;\n" 1518 << "layout(max_vertices=3, max_primitives=1) out;\n" 1519 << "\n" 1520 << "void main ()\n" 1521 << "{\n" 1522 << " SetMeshOutputsEXT(0u, 0u);\n" 1523 << "}\n" 1524 ; 1525 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions; 1526} 1527 1528tcu::TestStatus BarrierAcrossSecondaryInstance::iterate (void) 1529{ 1530 const auto& vkd = m_context.getDeviceInterface(); 1531 const auto device = m_context.getDevice(); 1532 auto& alloc = m_context.getDefaultAllocator(); 1533 const auto queueIndex = m_context.getUniversalQueueFamilyIndex(); 1534 const auto queue = m_context.getUniversalQueue(); 1535 const auto kLocalSize = BarrierAcrossSecondaryCase::kLocalSize; 1536 const auto kNumWorkGroups = BarrierAcrossSecondaryCase::kNumWorkGroups; 1537 const auto bindingStages = (VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_TASK_BIT_EXT); 1538 const auto extent = makeExtent3D(1u, 1u, 1u); 1539 1540 // Output buffer. 1541 const auto outputBufferSize = static_cast<VkDeviceSize>(kLocalSize * kNumWorkGroups * sizeof(uint32_t)); 1542 const auto outputBufferInfo = makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); 1543 BufferWithMemory outputBuffer (vkd, device, alloc, outputBufferInfo, MemoryRequirement::HostVisible); 1544 auto& outputBufferAlloc = outputBuffer.getAllocation(); 1545 void* outputBufferData = outputBufferAlloc.getHostPtr(); 1546 1547 // Verification buffer. 1548 const auto verificationBufferSize = outputBufferSize; 1549 const auto verificationBufferInfo = outputBufferInfo; 1550 BufferWithMemory verificationBuffer (vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible); 1551 auto& verificationBufferAlloc = verificationBuffer.getAllocation(); 1552 void* verificationBufferData = verificationBufferAlloc.getHostPtr(); 1553 1554 // Prepare buffer data. 1555 deMemset(outputBufferData, 0, static_cast<size_t>(outputBufferSize)); 1556 deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize)); 1557 flushAlloc(vkd, device, outputBufferAlloc); 1558 flushAlloc(vkd, device, verificationBufferAlloc); 1559 1560 // Descriptor set layout. 1561 DescriptorSetLayoutBuilder setLayoutBuilder; 1562 setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStages); 1563 setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStages); 1564 const auto setLayout = setLayoutBuilder.build(vkd, device); 1565 1566 // Pipeline layout. 1567 const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get()); 1568 1569 // Descriptor pool and set. 1570 DescriptorPoolBuilder poolBuilder; 1571 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u); 1572 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 1573 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get()); 1574 1575 // Update descriptor set. 1576 DescriptorSetUpdateBuilder updateBuilder; 1577 const auto outputBufferDescInfo = makeDescriptorBufferInfo(outputBuffer.get(), 0ull, outputBufferSize); 1578 const auto verificationBufferDescInfo = makeDescriptorBufferInfo(verificationBuffer.get(), 0ull, verificationBufferSize); 1579 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescInfo); 1580 updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &verificationBufferDescInfo); 1581 updateBuilder.update(vkd, device); 1582 1583 // Graphics pipeline auxiliary data. 1584 const auto renderPass = makeRenderPass(vkd, device); 1585 const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, extent.width, extent.height); 1586 const std::vector<VkViewport> viewports (1u, makeViewport(extent)); 1587 const std::vector<VkRect2D> scissors (1u, makeRect2D(extent)); 1588 1589 // Create pipelines. 1590 const auto& binaries = m_context.getBinaryCollection(); 1591 const auto compModule = createShaderModule(vkd, device, binaries.get("comp")); 1592 const auto taskModule = createShaderModule(vkd, device, binaries.get("task")); 1593 const auto meshModule = createShaderModule(vkd, device, binaries.get("mesh")); 1594 1595 const auto computePipeline = makeComputePipeline(vkd, device, pipelineLayout.get(), compModule.get()); 1596 const auto meshPipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), 1597 taskModule.get(), meshModule.get(), DE_NULL, 1598 renderPass.get(), viewports, scissors); 1599 1600 // Command pool and command buffers. 1601 const auto cmdPool = makeCommandPool(vkd, device, queueIndex); 1602 const auto primaryCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY); 1603 const auto compCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY); 1604 const auto meshCmdBuffer = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY); 1605 1606 // Use compute pipeline and record barrier to task shader. 1607 { 1608 const auto cmdBuffer = compCmdBuffer.get(); 1609 const auto comp2TaskBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT); 1610 1611 beginSecondaryCommandBuffer(vkd, cmdBuffer); 1612 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr); 1613 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.get()); 1614 vkd.cmdDispatch(cmdBuffer, kNumWorkGroups, 1u, 1u); 1615 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, &comp2TaskBarrier); 1616 endCommandBuffer(vkd, cmdBuffer); 1617 } 1618 1619 // Use mesh pipeline and record barrier to host. 1620 { 1621 const auto cmdBuffer = meshCmdBuffer.get(); 1622 1623 beginSecondaryCommandBuffer(vkd, cmdBuffer, renderPass.get(), framebuffer.get()); 1624 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr); 1625 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get()); 1626 vkd.cmdDrawMeshTasksEXT(cmdBuffer, kNumWorkGroups, 1u, 1u); 1627 endCommandBuffer(vkd, cmdBuffer); 1628 } 1629 1630 // Use both secondary command buffers. 1631 { 1632 const auto cmdBuffer = primaryCmdBuffer.get(); 1633 const auto task2HostBarrier = makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT); 1634 1635 beginCommandBuffer(vkd, cmdBuffer); 1636 vkd.cmdExecuteCommands(cmdBuffer, 1u, &compCmdBuffer.get()); 1637 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS); 1638 vkd.cmdExecuteCommands(cmdBuffer, 1u, &meshCmdBuffer.get()); 1639 endRenderPass(vkd, cmdBuffer); 1640 cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, VK_PIPELINE_STAGE_HOST_BIT, &task2HostBarrier); 1641 endCommandBuffer(vkd, cmdBuffer); 1642 submitCommandsAndWait(vkd, device, queue, cmdBuffer); 1643 } 1644 1645 // Verify buffer contents. 1646 invalidateAlloc(vkd, device, verificationBufferAlloc); 1647 const std::vector<uint32_t> expectedResult (kNumWorkGroups * kLocalSize, 1u); 1648 1649 if (deMemCmp(expectedResult.data(), verificationBufferData, de::dataSize(expectedResult)) != 0) 1650 TCU_FAIL("Unexpected values found in verification buffer"); 1651 1652 return tcu::TestStatus::pass("Pass"); 1653} 1654 1655} // anonymous 1656 1657tcu::TestCaseGroup* createMeshShaderSyncTestsEXT (tcu::TestContext& testCtx) 1658{ 1659 const struct 1660 { 1661 Stage fromStage; 1662 Stage toStage; 1663 } stageCombinations[] = 1664 { 1665 // Combinations where the source and destination stages involve mesh shaders. 1666 // Note: this could be tested procedurally. 1667 { Stage::HOST, Stage::TASK }, 1668 { Stage::HOST, Stage::MESH }, 1669 { Stage::TRANSFER, Stage::TASK }, 1670 { Stage::TRANSFER, Stage::MESH }, 1671 { Stage::TASK, Stage::MESH }, 1672 { Stage::TASK, Stage::FRAG }, 1673 { Stage::TASK, Stage::TRANSFER }, 1674 { Stage::TASK, Stage::HOST }, 1675 { Stage::MESH, Stage::FRAG }, 1676 { Stage::MESH, Stage::TRANSFER }, 1677 { Stage::MESH, Stage::HOST }, 1678 1679 // These require two pipelines. 1680 { Stage::MESH, Stage::TASK }, 1681 { Stage::FRAG, Stage::TASK }, 1682 { Stage::FRAG, Stage::MESH }, 1683 }; 1684 1685 const struct 1686 { 1687 ResourceType resourceType; 1688 const char* name; 1689 } resourceTypes[] = 1690 { 1691 { ResourceType::UNIFORM_BUFFER, "uniform_buffer" }, 1692 { ResourceType::STORAGE_BUFFER, "storage_buffer" }, 1693 { ResourceType::STORAGE_IMAGE, "storage_image" }, 1694 { ResourceType::SAMPLED_IMAGE, "sampled_image" }, 1695 }; 1696 1697 const struct 1698 { 1699 BarrierType barrierType; 1700 const char* name; 1701 } barrierTypes[] = 1702 { 1703 { BarrierType::GENERAL, "memory_barrier" }, 1704 { BarrierType::SPECIFIC, "specific_barrier" }, 1705 { BarrierType::DEPENDENCY, "subpass_dependency" }, 1706 }; 1707 1708 const struct 1709 { 1710 WriteAccess writeAccess; 1711 const char* name; 1712 } writeAccesses[] = 1713 { 1714 { WriteAccess::HOST_WRITE, "host_write" }, 1715 { WriteAccess::TRANSFER_WRITE, "transfer_write" }, 1716 { WriteAccess::SHADER_WRITE, "shader_write" }, 1717 }; 1718 1719 const struct 1720 { 1721 ReadAccess readAccess; 1722 const char* name; 1723 } readAccesses[] = 1724 { 1725 { ReadAccess::HOST_READ, "host_read" }, 1726 { ReadAccess::TRANSFER_READ, "transfer_read" }, 1727 { ReadAccess::SHADER_READ, "shader_read" }, 1728 { ReadAccess::UNIFORM_READ, "uniform_read" }, 1729 }; 1730 1731 uint32_t testValue = 1628510124u; 1732 1733 GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "synchronization")); 1734 1735 for (const auto& stageCombination : stageCombinations) 1736 { 1737 const std::string combinationName = de::toString(stageCombination.fromStage) + "_to_" + de::toString(stageCombination.toStage); 1738 GroupPtr combinationGroup (new tcu::TestCaseGroup(testCtx, combinationName.c_str())); 1739 1740 for (const auto& resourceCase : resourceTypes) 1741 { 1742 if (!canWriteTo(stageCombination.fromStage, resourceCase.resourceType)) 1743 continue; 1744 1745 if (!canReadFrom(stageCombination.toStage, resourceCase.resourceType)) 1746 continue; 1747 1748 GroupPtr resourceGroup (new tcu::TestCaseGroup(testCtx, resourceCase.name)); 1749 1750 for (const auto& barrierCase : barrierTypes) 1751 { 1752 const auto shaderToShader = fromShaderToShader(stageCombination.fromStage, stageCombination.toStage); 1753 const auto barrierIsDependency = (barrierCase.barrierType == BarrierType::DEPENDENCY); 1754 1755 // Subpass dependencies can only be used in shader to shader situations. 1756 if (barrierIsDependency && !shaderToShader) 1757 continue; 1758 1759 GroupPtr barrierGroup (new tcu::TestCaseGroup(testCtx, barrierCase.name)); 1760 1761 for (const auto& writeCase : writeAccesses) 1762 for (const auto& readCase : readAccesses) 1763 { 1764 if (!canReadResourceAsAccess(resourceCase.resourceType, readCase.readAccess)) 1765 continue; 1766 if (!canWriteResourceAsAccess(resourceCase.resourceType, writeCase.writeAccess)) 1767 continue; 1768 if (!canReadFromStageAsAccess(stageCombination.toStage, readCase.readAccess)) 1769 continue; 1770 if (!canWriteFromStageAsAccess(stageCombination.fromStage, writeCase.writeAccess)) 1771 continue; 1772 1773 const std::string accessCaseName = writeCase.name + std::string("_") + readCase.name; 1774 1775 const TestParams testParams = 1776 { 1777 stageCombination.fromStage, // Stage fromStage; 1778 stageCombination.toStage, // Stage toStage; 1779 resourceCase.resourceType, // ResourceType resourceType; 1780 barrierCase.barrierType, // BarrierType barrierType; 1781 writeCase.writeAccess, // WriteAccess writeAccess; 1782 readCase.readAccess, // ReadAccess readAccess; 1783 testValue++, // uint32_t testValue; 1784 }; 1785 1786 barrierGroup->addChild(new MeshShaderSyncCase(testCtx, accessCaseName, testParams)); 1787 } 1788 1789 resourceGroup->addChild(barrierGroup.release()); 1790 } 1791 1792 combinationGroup->addChild(resourceGroup.release()); 1793 } 1794 1795 mainGroup->addChild(combinationGroup.release()); 1796 } 1797 1798 { 1799 // Additional synchronization tests 1800 GroupPtr otherGroup (new tcu::TestCaseGroup(testCtx, "other")); 1801 1802 // Check synchronizing compute to task across secondary command buffer boundaries 1803 otherGroup->addChild(new BarrierAcrossSecondaryCase(testCtx, "barrier_across_secondary")); 1804 1805 mainGroup->addChild(otherGroup.release()); 1806 } 1807 1808 return mainGroup.release(); 1809} 1810 1811} // MeshShader 1812} // vkt 1813