1/*------------------------------------------------------------------------ 2 * Vulkan Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2016 The Khronos Group Inc. 6 * Copyright (c) 2016 The Android Open Source Project 7 * Copyright (c) 2023 LunarG, Inc. 8 * Copyright (c) 2023 Nintendo 9 * 10 * Licensed under the Apache License, Version 2.0 (the "License"); 11 * you may not use this file except in compliance with the License. 12 * You may obtain a copy of the License at 13 * 14 * http://www.apache.org/licenses/LICENSE-2.0 15 * 16 * Unless required by applicable law or agreed to in writing, software 17 * distributed under the License is distributed on an "AS IS" BASIS, 18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 * See the License for the specific language governing permissions and 20 * limitations under the License. 21 * 22 *//*! 23 * \file 24 * \brief Indirect Compute Dispatch tests 25 *//*--------------------------------------------------------------------*/ 26 27#include "vktComputeIndirectComputeDispatchTests.hpp" 28#include "vktComputeTestsUtil.hpp" 29#include "vktCustomInstancesDevices.hpp" 30#include "vkSafetyCriticalUtil.hpp" 31 32#include <string> 33#include <map> 34#include <vector> 35 36#include "vkDefs.hpp" 37#include "vkRef.hpp" 38#include "vkRefUtil.hpp" 39#include "vktTestCase.hpp" 40#include "vktTestCaseUtil.hpp" 41#include "vkPlatform.hpp" 42#include "vkPrograms.hpp" 43#include "vkMemUtil.hpp" 44#include "vkBarrierUtil.hpp" 45#include "vkBuilderUtil.hpp" 46#include "vkQueryUtil.hpp" 47#include "vkDeviceUtil.hpp" 48#include "vkCmdUtil.hpp" 49#include "vkObjUtil.hpp" 50#include "vkBufferWithMemory.hpp" 51 52#include "tcuVector.hpp" 53#include "tcuVectorUtil.hpp" 54#include "tcuTestLog.hpp" 55#include "tcuRGBA.hpp" 56#include "tcuStringTemplate.hpp" 57 58#include "deUniquePtr.hpp" 59#include "deSharedPtr.hpp" 60#include "deStringUtil.hpp" 61#include "deArrayUtil.hpp" 62 63#include "gluShaderUtil.hpp" 64#include "tcuCommandLine.hpp" 65 66#include <set> 67 68namespace vkt 69{ 70namespace compute 71{ 72namespace 73{ 74std::vector<std::string> removeCoreExtensions (const std::vector<std::string>& supportedExtensions, const std::vector<const char*>& coreExtensions) 75{ 76 std::vector<std::string> nonCoreExtensions; 77 std::set<std::string> excludedExtensions (coreExtensions.begin(), coreExtensions.end()); 78 79 for (const auto & supportedExtension : supportedExtensions) 80 { 81 if (!de::contains(excludedExtensions, supportedExtension)) 82 nonCoreExtensions.push_back(supportedExtension); 83 } 84 85 return nonCoreExtensions; 86} 87 88// Creates a device that has a queue for compute capabilities without graphics. 89vk::Move<vk::VkDevice> createCustomDevice (Context& context, 90#ifdef CTS_USES_VULKANSC 91 const vkt::CustomInstance& customInstance, 92#endif // CTS_USES_VULKANSC 93 uint32_t& queueFamilyIndex) 94{ 95#ifdef CTS_USES_VULKANSC 96 const vk::InstanceInterface& instanceDriver = customInstance.getDriver(); 97 const vk::VkPhysicalDevice physicalDevice = chooseDevice(instanceDriver, customInstance, context.getTestContext().getCommandLine()); 98#else 99 const vk::InstanceInterface& instanceDriver = context.getInstanceInterface(); 100 const vk::VkPhysicalDevice physicalDevice = context.getPhysicalDevice(); 101#endif // CTS_USES_VULKANSC 102 103 const std::vector<vk::VkQueueFamilyProperties> queueFamilies = getPhysicalDeviceQueueFamilyProperties(instanceDriver, physicalDevice); 104 105 queueFamilyIndex = 0; 106 for (const auto &queueFamily: queueFamilies) 107 { 108 if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT && !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT)) 109 break; 110 else 111 queueFamilyIndex++; 112 } 113 114 // One queue family without a graphics bit should be found, since this is checked in checkSupport. 115 DE_ASSERT(queueFamilyIndex < queueFamilies.size()); 116 117 const float queuePriority = 1.0f; 118 const vk::VkDeviceQueueCreateInfo deviceQueueCreateInfos[] = { 119 { 120 vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType; 121 DE_NULL, // const void* pNext; 122 (vk::VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags; 123 context.getUniversalQueueFamilyIndex(), // uint32_t queueFamilyIndex; 124 1u, // uint32_t queueCount; 125 &queuePriority, // const float* pQueuePriorities; 126 }, 127 { 128 vk::VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, // VkStructureType sType; 129 DE_NULL, // const void* pNext; 130 (vk::VkDeviceQueueCreateFlags)0u, // VkDeviceQueueCreateFlags flags; 131 queueFamilyIndex, // uint32_t queueFamilyIndex; 132 1u, // uint32_t queueCount; 133 &queuePriority, // const float* pQueuePriorities; 134 } 135 }; 136 137 // context.getDeviceExtensions() returns supported device extension including extensions that have been promoted to 138 // Vulkan core. The core extensions must be removed from the list. 139 std::vector<const char*> coreExtensions; 140 vk::getCoreDeviceExtensions(context.getUsedApiVersion(), coreExtensions); 141 std::vector<std::string> nonCoreExtensions(removeCoreExtensions(context.getDeviceExtensions(), coreExtensions)); 142 143 std::vector<const char*> extensionNames; 144 extensionNames.reserve(nonCoreExtensions.size()); 145 for (const std::string& extension : nonCoreExtensions) 146 extensionNames.push_back(extension.c_str()); 147 148 const auto& deviceFeatures2 = context.getDeviceFeatures2(); 149 150 const void *pNext = &deviceFeatures2; 151#ifdef CTS_USES_VULKANSC 152 VkDeviceObjectReservationCreateInfo memReservationInfo = context.getTestContext().getCommandLine().isSubProcess() ? context.getResourceInterface()->getStatMax() : resetDeviceObjectReservationCreateInfo(); 153 memReservationInfo.pNext = pNext; 154 pNext = &memReservationInfo; 155 156 VkPipelineCacheCreateInfo pcCI; 157 std::vector<VkPipelinePoolSize> poolSizes; 158 if (context.getTestContext().getCommandLine().isSubProcess()) 159 { 160 if (context.getResourceInterface()->getCacheDataSize() > 0) 161 { 162 pcCI = 163 { 164 VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO, // VkStructureType sType; 165 DE_NULL, // const void* pNext; 166 VK_PIPELINE_CACHE_CREATE_READ_ONLY_BIT | 167 VK_PIPELINE_CACHE_CREATE_USE_APPLICATION_STORAGE_BIT, // VkPipelineCacheCreateFlags flags; 168 context.getResourceInterface()->getCacheDataSize(), // deUintptr initialDataSize; 169 context.getResourceInterface()->getCacheData() // const void* pInitialData; 170 }; 171 memReservationInfo.pipelineCacheCreateInfoCount = 1; 172 memReservationInfo.pPipelineCacheCreateInfos = &pcCI; 173 } 174 poolSizes = context.getResourceInterface()->getPipelinePoolSizes(); 175 if (!poolSizes.empty()) 176 { 177 memReservationInfo.pipelinePoolSizeCount = deUint32(poolSizes.size()); 178 memReservationInfo.pPipelinePoolSizes = poolSizes.data(); 179 } 180 } 181#endif // CTS_USES_VULKANSC 182 183 const vk::VkDeviceCreateInfo deviceCreateInfo = 184 { 185 vk::VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, // VkStructureType sType; 186 pNext, // const void* pNext; 187 (vk::VkDeviceCreateFlags)0u, // VkDeviceCreateFlags flags; 188 DE_LENGTH_OF_ARRAY(deviceQueueCreateInfos), // uint32_t queueCreateInfoCount; 189 deviceQueueCreateInfos, // const VkDeviceQueueCreateInfo* pQueueCreateInfos; 190 0u, // uint32_t enabledLayerCount; 191 DE_NULL, // const char* const* ppEnabledLayerNames; 192 static_cast<uint32_t>(extensionNames.size()), // uint32_t enabledExtensionCount; 193 extensionNames.data(), // const char* const* ppEnabledExtensionNames; 194 DE_NULL, // const VkPhysicalDeviceFeatures* pEnabledFeatures; 195 }; 196 197 return vkt::createCustomDevice(context.getTestContext().getCommandLine().isValidationEnabled(), 198 context.getPlatformInterface(), 199#ifdef CTS_USES_VULKANSC 200 customInstance, 201#else 202 context.getInstance(), 203#endif 204 instanceDriver, physicalDevice, &deviceCreateInfo); 205} 206 207enum 208{ 209 RESULT_BLOCK_BASE_SIZE = 4 * (int)sizeof(deUint32), // uvec3 + uint 210 RESULT_BLOCK_NUM_PASSED_OFFSET = 3 * (int)sizeof(deUint32), 211 INDIRECT_COMMAND_OFFSET = 3 * (int)sizeof(deUint32), 212}; 213 214vk::VkDeviceSize getResultBlockAlignedSize (const vk::InstanceInterface& instance_interface, 215 const vk::VkPhysicalDevice physicalDevice, 216 const vk::VkDeviceSize baseSize) 217{ 218 // TODO getPhysicalDeviceProperties() was added to vkQueryUtil in 41-image-load-store-tests. Use it once it's merged. 219 vk::VkPhysicalDeviceProperties deviceProperties; 220 instance_interface.getPhysicalDeviceProperties(physicalDevice, &deviceProperties); 221 vk::VkDeviceSize alignment = deviceProperties.limits.minStorageBufferOffsetAlignment; 222 223 if (alignment == 0 || (baseSize % alignment == 0)) 224 return baseSize; 225 else 226 return (baseSize / alignment + 1)*alignment; 227} 228 229struct DispatchCommand 230{ 231 DispatchCommand (const deIntptr offset, 232 const tcu::UVec3& numWorkGroups) 233 : m_offset (offset) 234 , m_numWorkGroups (numWorkGroups) {} 235 236 deIntptr m_offset; 237 tcu::UVec3 m_numWorkGroups; 238}; 239 240typedef std::vector<DispatchCommand> DispatchCommandsVec; 241 242struct DispatchCaseDesc 243{ 244 DispatchCaseDesc (const char* name, 245 const deUintptr bufferSize, 246 const tcu::UVec3 workGroupSize, 247 const DispatchCommandsVec& dispatchCommands, 248 const bool computeQueueOnly) 249 : m_name (name) 250 , m_bufferSize (bufferSize) 251 , m_workGroupSize (workGroupSize) 252 , m_dispatchCommands (dispatchCommands) 253 , m_computeOnlyQueue (computeQueueOnly) {} 254 255 const char* m_name; 256 const deUintptr m_bufferSize; 257 const tcu::UVec3 m_workGroupSize; 258 const DispatchCommandsVec m_dispatchCommands; 259 const bool m_computeOnlyQueue; 260}; 261 262class IndirectDispatchInstanceBufferUpload : public vkt::TestInstance 263{ 264public: 265 IndirectDispatchInstanceBufferUpload (Context& context, 266 const std::string& name, 267 const deUintptr bufferSize, 268 const tcu::UVec3& workGroupSize, 269 const DispatchCommandsVec& dispatchCommands, 270 const bool computeQueueOnly, 271 const vk::ComputePipelineConstructionType computePipelineConstructionType); 272 273 virtual ~IndirectDispatchInstanceBufferUpload (void) {} 274 275 virtual tcu::TestStatus iterate (void); 276 277protected: 278 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, 279 const vk::DeviceInterface& vkdi, 280 const vk::BufferWithMemory& indirectBuffer); 281 282 deBool verifyResultBuffer (const vk::BufferWithMemory& resultBuffer, 283 const vk::DeviceInterface& vkdi, 284 const vk::VkDeviceSize resultBlockSize) const; 285 286 Context& m_context; 287 const std::string m_name; 288 289 vk::VkDevice m_device; 290#ifdef CTS_USES_VULKANSC 291 const CustomInstance m_customInstance; 292#endif // CTS_USES_VULKANSC 293 vk::Move<vk::VkDevice> m_customDevice; 294#ifndef CTS_USES_VULKANSC 295 de::MovePtr<vk::DeviceDriver> m_deviceDriver; 296#else 297 de::MovePtr<DeviceDriverSC, DeinitDeviceDeleter> m_deviceDriver; 298#endif // CTS_USES_VULKANSC 299 300 vk::VkQueue m_queue; 301 deUint32 m_queueFamilyIndex; 302 303 const deUintptr m_bufferSize; 304 const tcu::UVec3 m_workGroupSize; 305 const DispatchCommandsVec m_dispatchCommands; 306 307 de::MovePtr<vk::Allocator> m_allocator; 308 309 const bool m_computeQueueOnly; 310 vk::ComputePipelineConstructionType m_computePipelineConstructionType; 311private: 312 IndirectDispatchInstanceBufferUpload (const vkt::TestInstance&); 313 IndirectDispatchInstanceBufferUpload& operator= (const vkt::TestInstance&); 314}; 315 316IndirectDispatchInstanceBufferUpload::IndirectDispatchInstanceBufferUpload (Context& context, 317 const std::string& name, 318 const deUintptr bufferSize, 319 const tcu::UVec3& workGroupSize, 320 const DispatchCommandsVec& dispatchCommands, 321 const bool computeQueueOnly, 322 const vk::ComputePipelineConstructionType computePipelineConstructionType) 323 : vkt::TestInstance (context) 324 , m_context (context) 325 , m_name (name) 326 , m_device (context.getDevice()) 327#ifdef CTS_USES_VULKANSC 328 , m_customInstance (createCustomInstanceFromContext(context)) 329#endif // CTS_USES_VULKANSC 330 , m_queue (context.getUniversalQueue()) 331 , m_queueFamilyIndex (context.getUniversalQueueFamilyIndex()) 332 , m_bufferSize (bufferSize) 333 , m_workGroupSize (workGroupSize) 334 , m_dispatchCommands (dispatchCommands) 335 , m_computeQueueOnly (computeQueueOnly) 336 , m_computePipelineConstructionType (computePipelineConstructionType) 337{ 338} 339 340void IndirectDispatchInstanceBufferUpload::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer) 341{ 342 DE_UNREF(commandBuffer); 343 344 const vk::Allocation& alloc = indirectBuffer.getAllocation(); 345 deUint8* indirectDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr()); 346 347 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter) 348 { 349 DE_ASSERT(cmdIter->m_offset >= 0); 350 DE_ASSERT(cmdIter->m_offset % sizeof(deUint32) == 0); 351 DE_ASSERT(cmdIter->m_offset + INDIRECT_COMMAND_OFFSET <= (deIntptr)m_bufferSize); 352 353 deUint32* const dstPtr = (deUint32*)&indirectDataPtr[cmdIter->m_offset]; 354 355 dstPtr[0] = cmdIter->m_numWorkGroups[0]; 356 dstPtr[1] = cmdIter->m_numWorkGroups[1]; 357 dstPtr[2] = cmdIter->m_numWorkGroups[2]; 358 } 359 360 vk::flushAlloc(vkdi, m_device, alloc); 361} 362 363tcu::TestStatus IndirectDispatchInstanceBufferUpload::iterate (void) 364{ 365#ifdef CTS_USES_VULKANSC 366 const vk::InstanceInterface& vki = m_customInstance.getDriver(); 367#else 368 const vk::InstanceInterface& vki = m_context.getInstanceInterface(); 369#endif // CTS_USES_VULKANSC 370 tcu::TestContext& testCtx = m_context.getTestContext(); 371 372 testCtx.getLog() << tcu::TestLog::Message << "GL_DISPATCH_INDIRECT_BUFFER size = " << m_bufferSize << tcu::TestLog::EndMessage; 373 { 374 tcu::ScopedLogSection section(testCtx.getLog(), "Commands", "Indirect Dispatch Commands (" + de::toString(m_dispatchCommands.size()) + " in total)"); 375 376 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx) 377 { 378 testCtx.getLog() 379 << tcu::TestLog::Message 380 << cmdNdx << ": " << "offset = " << m_dispatchCommands[cmdNdx].m_offset << ", numWorkGroups = " << m_dispatchCommands[cmdNdx].m_numWorkGroups 381 << tcu::TestLog::EndMessage; 382 } 383 } 384 385 if (m_computeQueueOnly) 386 { 387 // m_queueFamilyIndex will be updated in createCustomDevice() to match the requested queue type. 388 m_customDevice = createCustomDevice(m_context, 389#ifdef CTS_USES_VULKANSC 390 m_customInstance, 391#endif 392 m_queueFamilyIndex); 393 m_device = m_customDevice.get(); 394#ifndef CTS_USES_VULKANSC 395 m_deviceDriver = de::MovePtr<vk::DeviceDriver>(new vk::DeviceDriver(m_context.getPlatformInterface(), m_context.getInstance(), m_device, m_context.getUsedApiVersion())); 396#else 397 m_deviceDriver = de::MovePtr<vk::DeviceDriverSC, vk::DeinitDeviceDeleter>(new vk::DeviceDriverSC(m_context.getPlatformInterface(), m_customInstance, m_device, m_context.getTestContext().getCommandLine(), m_context.getResourceInterface(), m_context.getDeviceVulkanSC10Properties(), m_context.getDeviceProperties(), m_context.getUsedApiVersion()), vk::DeinitDeviceDeleter(m_context.getResourceInterface().get(), m_device)); 398#endif // CTS_USES_VULKANSC 399 } 400#ifndef CTS_USES_VULKANSC 401 const vk::DeviceInterface& vkdi = m_context.getDeviceInterface(); 402#else 403 const vk::DeviceInterface& vkdi = (m_computeQueueOnly && (DE_NULL != m_deviceDriver)) ? *m_deviceDriver : m_context.getDeviceInterface(); 404#endif // CTS_USES_VULKANSC 405 if (m_computeQueueOnly) 406 { 407 m_queue = getDeviceQueue(vkdi, m_device, m_queueFamilyIndex, 0u); 408 m_allocator = de::MovePtr<vk::Allocator>(new vk::SimpleAllocator(vkdi, m_device, vk::getPhysicalDeviceMemoryProperties(vki, m_context.getPhysicalDevice()))); 409 } 410 vk::Allocator& allocator = m_allocator.get() ? *m_allocator : m_context.getDefaultAllocator(); 411 412 // Create result buffer 413 const vk::VkDeviceSize resultBlockSize = getResultBlockAlignedSize(vki, m_context.getPhysicalDevice(), RESULT_BLOCK_BASE_SIZE); 414 const vk::VkDeviceSize resultBufferSize = resultBlockSize * (deUint32)m_dispatchCommands.size(); 415 416 vk::BufferWithMemory resultBuffer( 417 vkdi, m_device, allocator, 418 vk::makeBufferCreateInfo(resultBufferSize, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), 419 vk::MemoryRequirement::HostVisible); 420 421 { 422 const vk::Allocation& alloc = resultBuffer.getAllocation(); 423 deUint8* resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr()); 424 425 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx) 426 { 427 deUint8* const dstPtr = &resultDataPtr[resultBlockSize*cmdNdx]; 428 429 *(deUint32*)(dstPtr + 0 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[0]; 430 *(deUint32*)(dstPtr + 1 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[1]; 431 *(deUint32*)(dstPtr + 2 * sizeof(deUint32)) = m_dispatchCommands[cmdNdx].m_numWorkGroups[2]; 432 *(deUint32*)(dstPtr + RESULT_BLOCK_NUM_PASSED_OFFSET) = 0; 433 } 434 435 vk::flushAlloc(vkdi, m_device, alloc); 436 } 437 438 // Create descriptorSetLayout 439 vk::DescriptorSetLayoutBuilder layoutBuilder; 440 layoutBuilder.addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT); 441 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vkdi, m_device)); 442 443 // Create compute pipeline 444 vk::ComputePipelineWrapper computePipeline(vkdi, m_device, m_computePipelineConstructionType, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_verify")); 445 computePipeline.setDescriptorSetLayout(descriptorSetLayout.get()); 446 computePipeline.buildPipeline(); 447 448 // Create descriptor pool 449 const vk::Unique<vk::VkDescriptorPool> descriptorPool( 450 vk::DescriptorPoolBuilder() 451 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, (deUint32)m_dispatchCommands.size()) 452 .build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, static_cast<deUint32>(m_dispatchCommands.size()))); 453 454 const vk::VkBufferMemoryBarrier ssboPostBarrier = makeBufferMemoryBarrier( 455 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_HOST_READ_BIT, *resultBuffer, 0ull, resultBufferSize); 456 457 // Create command buffer 458 const vk::Unique<vk::VkCommandPool> cmdPool(makeCommandPool(vkdi, m_device, m_queueFamilyIndex)); 459 const vk::Unique<vk::VkCommandBuffer> cmdBuffer(allocateCommandBuffer(vkdi, m_device, *cmdPool, vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY)); 460 461 // Begin recording commands 462 beginCommandBuffer(vkdi, *cmdBuffer); 463 464 // Create indirect buffer 465 vk::BufferWithMemory indirectBuffer( 466 vkdi, m_device, allocator, 467 vk::makeBufferCreateInfo(m_bufferSize, vk::VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT), 468 vk::MemoryRequirement::HostVisible); 469 fillIndirectBufferData(*cmdBuffer, vkdi, indirectBuffer); 470 471 // Bind compute pipeline 472 computePipeline.bind(*cmdBuffer); 473 474 // Allocate descriptor sets 475 typedef de::SharedPtr<vk::Unique<vk::VkDescriptorSet> > SharedVkDescriptorSet; 476 std::vector<SharedVkDescriptorSet> descriptorSets(m_dispatchCommands.size()); 477 478 vk::VkDeviceSize curOffset = 0; 479 480 // Create descriptor sets 481 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); ++cmdNdx) 482 { 483 descriptorSets[cmdNdx] = SharedVkDescriptorSet(new vk::Unique<vk::VkDescriptorSet>( 484 makeDescriptorSet(vkdi, m_device, *descriptorPool, *descriptorSetLayout))); 485 486 const vk::VkDescriptorBufferInfo resultDescriptorInfo = makeDescriptorBufferInfo(*resultBuffer, curOffset, resultBlockSize); 487 488 vk::DescriptorSetUpdateBuilder descriptorSetBuilder; 489 descriptorSetBuilder.writeSingle(**descriptorSets[cmdNdx], vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo); 490 descriptorSetBuilder.update(vkdi, m_device); 491 492 // Bind descriptor set 493 vkdi.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.getPipelineLayout(), 0u, 1u, &(**descriptorSets[cmdNdx]), 0u, DE_NULL); 494 495 // Dispatch indirect compute command 496 vkdi.cmdDispatchIndirect(*cmdBuffer, *indirectBuffer, m_dispatchCommands[cmdNdx].m_offset); 497 498 curOffset += resultBlockSize; 499 } 500 501 // Insert memory barrier 502 vkdi.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (vk::VkDependencyFlags)0, 503 0, (const vk::VkMemoryBarrier*)DE_NULL, 504 1, &ssboPostBarrier, 505 0, (const vk::VkImageMemoryBarrier*)DE_NULL); 506 507 // End recording commands 508 endCommandBuffer(vkdi, *cmdBuffer); 509 510 // Wait for command buffer execution finish 511 submitCommandsAndWait(vkdi, m_device, m_queue, *cmdBuffer); 512 513 // Check if result buffer contains valid values 514 if (verifyResultBuffer(resultBuffer, vkdi, resultBlockSize)) 515 return tcu::TestStatus(QP_TEST_RESULT_PASS, "Pass"); 516 else 517 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Invalid values in result buffer"); 518} 519 520deBool IndirectDispatchInstanceBufferUpload::verifyResultBuffer (const vk::BufferWithMemory& resultBuffer, 521 const vk::DeviceInterface& vkdi, 522 const vk::VkDeviceSize resultBlockSize) const 523{ 524 deBool allOk = true; 525 const vk::Allocation& alloc = resultBuffer.getAllocation(); 526 vk::invalidateAlloc(vkdi, m_device, alloc); 527 528 const deUint8* const resultDataPtr = reinterpret_cast<deUint8*>(alloc.getHostPtr()); 529 530 for (deUint32 cmdNdx = 0; cmdNdx < m_dispatchCommands.size(); cmdNdx++) 531 { 532 const DispatchCommand& cmd = m_dispatchCommands[cmdNdx]; 533 const deUint8* const srcPtr = (const deUint8*)resultDataPtr + cmdNdx*resultBlockSize; 534 const deUint32 numPassed = *(const deUint32*)(srcPtr + RESULT_BLOCK_NUM_PASSED_OFFSET); 535 const deUint32 numInvocationsPerGroup = m_workGroupSize[0] * m_workGroupSize[1] * m_workGroupSize[2]; 536 const deUint32 numGroups = cmd.m_numWorkGroups[0] * cmd.m_numWorkGroups[1] * cmd.m_numWorkGroups[2]; 537 const deUint32 expectedCount = numInvocationsPerGroup * numGroups; 538 539 if (numPassed != expectedCount) 540 { 541 tcu::TestContext& testCtx = m_context.getTestContext(); 542 543 testCtx.getLog() 544 << tcu::TestLog::Message 545 << "ERROR: got invalid result for invocation " << cmdNdx 546 << ": got numPassed = " << numPassed << ", expected " << expectedCount 547 << tcu::TestLog::EndMessage; 548 549 allOk = false; 550 } 551 } 552 553 return allOk; 554} 555 556class IndirectDispatchCaseBufferUpload : public vkt::TestCase 557{ 558public: 559 IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx, 560 const DispatchCaseDesc& caseDesc, 561 const glu::GLSLVersion glslVersion, 562 const vk::ComputePipelineConstructionType computePipelineConstructionType); 563 564 virtual ~IndirectDispatchCaseBufferUpload (void) {} 565 566 virtual void initPrograms (vk::SourceCollections& programCollection) const; 567 virtual TestInstance* createInstance (Context& context) const; 568 virtual void checkSupport (Context& context) const; 569 570protected: 571 const deUintptr m_bufferSize; 572 const tcu::UVec3 m_workGroupSize; 573 const DispatchCommandsVec m_dispatchCommands; 574 const glu::GLSLVersion m_glslVersion; 575 const bool m_computeOnlyQueue; 576 vk::ComputePipelineConstructionType m_computePipelineConstructionType; 577 578private: 579 IndirectDispatchCaseBufferUpload (const vkt::TestCase&); 580 IndirectDispatchCaseBufferUpload& operator= (const vkt::TestCase&); 581}; 582 583IndirectDispatchCaseBufferUpload::IndirectDispatchCaseBufferUpload (tcu::TestContext& testCtx, 584 const DispatchCaseDesc& caseDesc, 585 const glu::GLSLVersion glslVersion, 586 const vk::ComputePipelineConstructionType computePipelineConstructionType) 587 : vkt::TestCase (testCtx, caseDesc.m_name) 588 , m_bufferSize (caseDesc.m_bufferSize) 589 , m_workGroupSize (caseDesc.m_workGroupSize) 590 , m_dispatchCommands (caseDesc.m_dispatchCommands) 591 , m_glslVersion (glslVersion) 592 , m_computeOnlyQueue (caseDesc.m_computeOnlyQueue) 593 , m_computePipelineConstructionType (computePipelineConstructionType) 594{ 595} 596 597void IndirectDispatchCaseBufferUpload::initPrograms (vk::SourceCollections& programCollection) const 598{ 599 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion); 600 601 std::ostringstream verifyBuffer; 602 603 verifyBuffer 604 << versionDecl << "\n" 605 << "layout(local_size_x = ${LOCAL_SIZE_X}, local_size_y = ${LOCAL_SIZE_Y}, local_size_z = ${LOCAL_SIZE_Z}) in;\n" 606 << "layout(set = 0, binding = 0, std430) buffer Result\n" 607 << "{\n" 608 << " uvec3 expectedGroupCount;\n" 609 << " coherent uint numPassed;\n" 610 << "} result;\n" 611 << "void main (void)\n" 612 << "{\n" 613 << " if (all(equal(result.expectedGroupCount, gl_NumWorkGroups)))\n" 614 << " atomicAdd(result.numPassed, 1u);\n" 615 << "}\n"; 616 617 std::map<std::string, std::string> args; 618 619 args["LOCAL_SIZE_X"] = de::toString(m_workGroupSize.x()); 620 args["LOCAL_SIZE_Y"] = de::toString(m_workGroupSize.y()); 621 args["LOCAL_SIZE_Z"] = de::toString(m_workGroupSize.z()); 622 623 std::string verifyProgramString = tcu::StringTemplate(verifyBuffer.str()).specialize(args); 624 625 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_verify") << glu::ComputeSource(verifyProgramString); 626} 627 628TestInstance* IndirectDispatchCaseBufferUpload::createInstance (Context& context) const 629{ 630 return new IndirectDispatchInstanceBufferUpload(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue, m_computePipelineConstructionType); 631} 632 633void IndirectDispatchCaseBufferUpload::checkSupport (Context& context) const 634{ 635 // Find at least one queue family that supports compute queue but does NOT support graphics queue. 636 if (m_computeOnlyQueue) 637 { 638 bool foundQueue = false; 639 const std::vector<vk::VkQueueFamilyProperties> queueFamilies = getPhysicalDeviceQueueFamilyProperties( 640 context.getInstanceInterface(), context.getPhysicalDevice()); 641 642 for (const auto &queueFamily: queueFamilies) 643 { 644 if (queueFamily.queueFlags & vk::VK_QUEUE_COMPUTE_BIT && 645 !(queueFamily.queueFlags & vk::VK_QUEUE_GRAPHICS_BIT)) 646 { 647 foundQueue = true; 648 break; 649 } 650 } 651 if (!foundQueue) 652 TCU_THROW(NotSupportedError, "No queue family found that only supports compute queue."); 653 } 654 655 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType); 656} 657 658 class IndirectDispatchInstanceBufferGenerate : public IndirectDispatchInstanceBufferUpload 659{ 660public: 661 IndirectDispatchInstanceBufferGenerate (Context& context, 662 const std::string& name, 663 const deUintptr bufferSize, 664 const tcu::UVec3& workGroupSize, 665 const DispatchCommandsVec& dispatchCommands, 666 const bool computeOnlyQueue, 667 const vk::ComputePipelineConstructionType computePipelineConstructionType) 668 669 : IndirectDispatchInstanceBufferUpload(context, name, bufferSize, workGroupSize, dispatchCommands, computeOnlyQueue, computePipelineConstructionType) {} 670 671 virtual ~IndirectDispatchInstanceBufferGenerate (void) {} 672 673protected: 674 virtual void fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, 675 const vk::DeviceInterface& vkdi, 676 const vk::BufferWithMemory& indirectBuffer); 677 678 vk::Move<vk::VkDescriptorSetLayout> m_descriptorSetLayout; 679 vk::Move<vk::VkDescriptorPool> m_descriptorPool; 680 vk::Move<vk::VkDescriptorSet> m_descriptorSet; 681 vk::Move<vk::VkPipelineLayout> m_pipelineLayout; 682 vk::Move<vk::VkPipeline> m_computePipeline; 683 684private: 685 IndirectDispatchInstanceBufferGenerate (const vkt::TestInstance&); 686 IndirectDispatchInstanceBufferGenerate& operator= (const vkt::TestInstance&); 687}; 688 689void IndirectDispatchInstanceBufferGenerate::fillIndirectBufferData (const vk::VkCommandBuffer commandBuffer, const vk::DeviceInterface& vkdi, const vk::BufferWithMemory& indirectBuffer) 690{ 691 // Create compute shader that generates data for indirect buffer 692 const vk::Unique<vk::VkShaderModule> genIndirectBufferDataShader(createShaderModule( 693 vkdi, m_device, m_context.getBinaryCollection().get("indirect_dispatch_" + m_name + "_generate"), 0u)); 694 695 // Create descriptorSetLayout 696 m_descriptorSetLayout = vk::DescriptorSetLayoutBuilder() 697 .addSingleBinding(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, vk::VK_SHADER_STAGE_COMPUTE_BIT) 698 .build(vkdi, m_device); 699 700 // Create compute pipeline 701 m_pipelineLayout = makePipelineLayout(vkdi, m_device, *m_descriptorSetLayout); 702 m_computePipeline = makeComputePipeline(vkdi, m_device, *m_pipelineLayout, *genIndirectBufferDataShader); 703 704 // Create descriptor pool 705 m_descriptorPool = vk::DescriptorPoolBuilder() 706 .addType(vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) 707 .build(vkdi, m_device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 708 709 // Create descriptor set 710 m_descriptorSet = makeDescriptorSet(vkdi, m_device, *m_descriptorPool, *m_descriptorSetLayout); 711 712 const vk::VkDescriptorBufferInfo indirectDescriptorInfo = makeDescriptorBufferInfo(*indirectBuffer, 0ull, m_bufferSize); 713 714 vk::DescriptorSetUpdateBuilder descriptorSetBuilder; 715 descriptorSetBuilder.writeSingle(*m_descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0u), vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectDescriptorInfo); 716 descriptorSetBuilder.update(vkdi, m_device); 717 718 const vk::VkBufferMemoryBarrier bufferBarrier = makeBufferMemoryBarrier( 719 vk::VK_ACCESS_SHADER_WRITE_BIT, vk::VK_ACCESS_INDIRECT_COMMAND_READ_BIT, *indirectBuffer, 0ull, m_bufferSize); 720 721 // Bind compute pipeline 722 vkdi.cmdBindPipeline(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_computePipeline); 723 724 // Bind descriptor set 725 vkdi.cmdBindDescriptorSets(commandBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *m_pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL); 726 727 // Dispatch compute command 728 vkdi.cmdDispatch(commandBuffer, 1u, 1u, 1u); 729 730 // Insert memory barrier 731 vkdi.cmdPipelineBarrier(commandBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, (vk::VkDependencyFlags)0, 732 0, (const vk::VkMemoryBarrier*)DE_NULL, 733 1, &bufferBarrier, 734 0, (const vk::VkImageMemoryBarrier*)DE_NULL); 735} 736 737class IndirectDispatchCaseBufferGenerate : public IndirectDispatchCaseBufferUpload 738{ 739public: 740 IndirectDispatchCaseBufferGenerate (tcu::TestContext& testCtx, 741 const DispatchCaseDesc& caseDesc, 742 const glu::GLSLVersion glslVersion, 743 const vk::ComputePipelineConstructionType computePipelineConstructionType) 744 : IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glslVersion, computePipelineConstructionType) {} 745 746 virtual ~IndirectDispatchCaseBufferGenerate (void) {} 747 748 virtual void initPrograms (vk::SourceCollections& programCollection) const; 749 virtual TestInstance* createInstance (Context& context) const; 750 751private: 752 IndirectDispatchCaseBufferGenerate (const vkt::TestCase&); 753 IndirectDispatchCaseBufferGenerate& operator= (const vkt::TestCase&); 754}; 755 756void IndirectDispatchCaseBufferGenerate::initPrograms (vk::SourceCollections& programCollection) const 757{ 758 IndirectDispatchCaseBufferUpload::initPrograms(programCollection); 759 760 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion); 761 762 std::ostringstream computeBuffer; 763 764 // Header 765 computeBuffer 766 << versionDecl << "\n" 767 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n" 768 << "layout(set = 0, binding = 0, std430) buffer Out\n" 769 << "{\n" 770 << " highp uint data[];\n" 771 << "};\n" 772 << "void writeCmd (uint offset, uvec3 numWorkGroups)\n" 773 << "{\n" 774 << " data[offset+0u] = numWorkGroups.x;\n" 775 << " data[offset+1u] = numWorkGroups.y;\n" 776 << " data[offset+2u] = numWorkGroups.z;\n" 777 << "}\n" 778 << "void main (void)\n" 779 << "{\n"; 780 781 // Dispatch commands 782 for (DispatchCommandsVec::const_iterator cmdIter = m_dispatchCommands.begin(); cmdIter != m_dispatchCommands.end(); ++cmdIter) 783 { 784 const deUint32 offs = (deUint32)(cmdIter->m_offset / sizeof(deUint32)); 785 DE_ASSERT((size_t)offs * sizeof(deUint32) == (size_t)cmdIter->m_offset); 786 787 computeBuffer 788 << "\twriteCmd(" << offs << "u, uvec3(" 789 << cmdIter->m_numWorkGroups.x() << "u, " 790 << cmdIter->m_numWorkGroups.y() << "u, " 791 << cmdIter->m_numWorkGroups.z() << "u));\n"; 792 } 793 794 // Ending 795 computeBuffer << "}\n"; 796 797 std::string computeString = computeBuffer.str(); 798 799 programCollection.glslSources.add("indirect_dispatch_" + m_name + "_generate") << glu::ComputeSource(computeString); 800} 801 802TestInstance* IndirectDispatchCaseBufferGenerate::createInstance (Context& context) const 803{ 804 return new IndirectDispatchInstanceBufferGenerate(context, m_name, m_bufferSize, m_workGroupSize, m_dispatchCommands, m_computeOnlyQueue, m_computePipelineConstructionType); 805} 806 807DispatchCommandsVec commandsVec (const DispatchCommand& cmd) 808{ 809 DispatchCommandsVec vec; 810 vec.push_back(cmd); 811 return vec; 812} 813 814DispatchCommandsVec commandsVec (const DispatchCommand& cmd0, 815 const DispatchCommand& cmd1, 816 const DispatchCommand& cmd2, 817 const DispatchCommand& cmd3, 818 const DispatchCommand& cmd4) 819{ 820 DispatchCommandsVec vec; 821 vec.push_back(cmd0); 822 vec.push_back(cmd1); 823 vec.push_back(cmd2); 824 vec.push_back(cmd3); 825 vec.push_back(cmd4); 826 return vec; 827} 828 829DispatchCommandsVec commandsVec (const DispatchCommand& cmd0, 830 const DispatchCommand& cmd1, 831 const DispatchCommand& cmd2, 832 const DispatchCommand& cmd3, 833 const DispatchCommand& cmd4, 834 const DispatchCommand& cmd5, 835 const DispatchCommand& cmd6) 836{ 837 DispatchCommandsVec vec; 838 vec.push_back(cmd0); 839 vec.push_back(cmd1); 840 vec.push_back(cmd2); 841 vec.push_back(cmd3); 842 vec.push_back(cmd4); 843 vec.push_back(cmd5); 844 vec.push_back(cmd6); 845 return vec; 846} 847 848} // anonymous ns 849 850tcu::TestCaseGroup* createIndirectComputeDispatchTests (tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType) 851{ 852 853 static const DispatchCaseDesc s_dispatchCases[] = 854 { 855 // Single invocation only from offset 0 856 DispatchCaseDesc("single_invocation", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1), 857 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1))), false 858 ), 859 // Multiple groups dispatched from offset 0 860 DispatchCaseDesc("multiple_groups", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1), 861 commandsVec(DispatchCommand(0, tcu::UVec3(2, 3, 5))), false 862 ), 863 // Multiple groups of size 2x3x1 from offset 0 864 DispatchCaseDesc("multiple_groups_multiple_invocations", INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 3, 1), 865 commandsVec(DispatchCommand(0, tcu::UVec3(1, 2, 3))), false 866 ), 867 DispatchCaseDesc("small_offset", 16 + INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1), 868 commandsVec(DispatchCommand(16, tcu::UVec3(1, 1, 1))), false 869 ), 870 DispatchCaseDesc("large_offset", (2 << 20), tcu::UVec3(1, 1, 1), 871 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 1, 1))), false 872 ), 873 DispatchCaseDesc("large_offset_multiple_invocations", (2 << 20), tcu::UVec3(2, 3, 1), 874 commandsVec(DispatchCommand((1 << 20) + 12, tcu::UVec3(1, 2, 3))), false 875 ), 876 DispatchCaseDesc("empty_command", INDIRECT_COMMAND_OFFSET, tcu::UVec3(1, 1, 1), 877 commandsVec(DispatchCommand(0, tcu::UVec3(0, 0, 0))), false 878 ), 879 // Dispatch multiple compute commands from single buffer 880 DispatchCaseDesc("multi_dispatch", 1 << 10, tcu::UVec3(3, 1, 2), 881 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)), 882 DispatchCommand(INDIRECT_COMMAND_OFFSET, tcu::UVec3(2, 1, 1)), 883 DispatchCommand(104, tcu::UVec3(1, 3, 1)), 884 DispatchCommand(40, tcu::UVec3(1, 1, 7)), 885 DispatchCommand(52, tcu::UVec3(1, 1, 4))), false 886 ), 887 // Dispatch multiple compute commands from single buffer 888 DispatchCaseDesc("multi_dispatch_reuse_command", 1 << 10, tcu::UVec3(3, 1, 2), 889 commandsVec(DispatchCommand(0, tcu::UVec3(1, 1, 1)), 890 DispatchCommand(0, tcu::UVec3(1, 1, 1)), 891 DispatchCommand(0, tcu::UVec3(1, 1, 1)), 892 DispatchCommand(104, tcu::UVec3(1, 3, 1)), 893 DispatchCommand(104, tcu::UVec3(1, 3, 1)), 894 DispatchCommand(52, tcu::UVec3(1, 1, 4)), 895 DispatchCommand(52, tcu::UVec3(1, 1, 4))), false 896 ), 897 }; 898 899 de::MovePtr<tcu::TestCaseGroup> indirectComputeDispatchTests(new tcu::TestCaseGroup(testCtx, "indirect_dispatch")); 900 901 tcu::TestCaseGroup* const groupBufferUpload = new tcu::TestCaseGroup(testCtx, "upload_buffer"); 902 indirectComputeDispatchTests->addChild(groupBufferUpload); 903 904 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++) 905 { 906 DispatchCaseDesc caseDesc = s_dispatchCases[ndx]; 907 std::string computeName = std::string(caseDesc.m_name) + std::string("_compute_only_queue"); 908 DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize, 909 caseDesc.m_dispatchCommands, true); 910 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, caseDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType)); 911 groupBufferUpload->addChild(new IndirectDispatchCaseBufferUpload(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType)); 912 } 913 914 tcu::TestCaseGroup* const groupBufferGenerate = new tcu::TestCaseGroup(testCtx, "gen_in_compute"); 915 indirectComputeDispatchTests->addChild(groupBufferGenerate); 916 917 for (deUint32 ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_dispatchCases); ndx++) 918 { 919 DispatchCaseDesc caseDesc = s_dispatchCases[ndx]; 920 std::string computeName = std::string(caseDesc.m_name) + std::string("_compute_only_queue"); 921 DispatchCaseDesc computeOnlyDesc = DispatchCaseDesc(computeName.c_str(), caseDesc.m_bufferSize, caseDesc.m_workGroupSize, 922 caseDesc.m_dispatchCommands, true); 923 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, caseDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType)); 924 groupBufferGenerate->addChild(new IndirectDispatchCaseBufferGenerate(testCtx, computeOnlyDesc, glu::GLSL_VERSION_310_ES, computePipelineConstructionType)); 925 } 926 927 return indirectComputeDispatchTests.release(); 928} 929 930} // compute 931} // vkt 932