1/*------------------------------------------------------------------------ 2 * Vulkan Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2021 The Khronos Group Inc. 6 * Copyright (c) 2021 Valve Corporation. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 * 20 *//*! 21 * \file 22 * \brief Mesh Shader API Tests 23 *//*--------------------------------------------------------------------*/ 24 25#include "vktMeshShaderApiTests.hpp" 26#include "vktMeshShaderUtil.hpp" 27#include "vktTestCase.hpp" 28 29#include "vkTypeUtil.hpp" 30#include "vkImageWithMemory.hpp" 31#include "vkBufferWithMemory.hpp" 32#include "vkObjUtil.hpp" 33#include "vkBuilderUtil.hpp" 34#include "vkCmdUtil.hpp" 35#include "vkImageUtil.hpp" 36 37#include "tcuMaybe.hpp" 38#include "tcuTestLog.hpp" 39#include "tcuImageCompare.hpp" 40 41#include "deRandom.hpp" 42 43#include <iostream> 44#include <sstream> 45#include <vector> 46#include <algorithm> 47#include <iterator> 48#include <limits> 49 50namespace vkt 51{ 52namespace MeshShader 53{ 54 55namespace 56{ 57 58using namespace vk; 59 60using GroupPtr = de::MovePtr<tcu::TestCaseGroup>; 61using ImageWithMemoryPtr = de::MovePtr<ImageWithMemory>; 62using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>; 63 64enum class DrawType 65{ 66 DRAW = 0, 67 DRAW_INDIRECT, 68 DRAW_INDIRECT_COUNT, 69}; 70 71std::ostream& operator<< (std::ostream& stream, DrawType drawType) 72{ 73 switch (drawType) 74 { 75 case DrawType::DRAW: stream << "draw"; break; 76 case DrawType::DRAW_INDIRECT: stream << "draw_indirect"; break; 77 case DrawType::DRAW_INDIRECT_COUNT: stream << "draw_indirect_count"; break; 78 default: DE_ASSERT(false); break; 79 } 80 return stream; 81} 82 83 84// This helps test the maxDrawCount rule for the DRAW_INDIRECT_COUNT case. 85enum class IndirectCountLimitType 86{ 87 BUFFER_VALUE = 0, // The actual count will be given by the count buffer. 88 MAX_COUNT, // The actual count will be given by the maxDrawCount argument passed to the draw command. 89}; 90 91struct IndirectArgs 92{ 93 uint32_t offset; 94 uint32_t stride; 95}; 96 97struct TestParams 98{ 99 DrawType drawType; 100 uint32_t seed; 101 uint32_t drawCount; // Equivalent to taskCount or drawCount. 102 uint32_t firstTask; // Equivalent to firstTask in every call. 103 tcu::Maybe<IndirectArgs> indirectArgs; // Only used for DRAW_INDIRECT*. 104 tcu::Maybe<IndirectCountLimitType> indirectCountLimit; // Only used for DRAW_INDIRECT_COUNT. 105 tcu::Maybe<uint32_t> indirectCountOffset; // Only used for DRAW_INDIRECT_COUNT. 106 bool useTask; 107}; 108 109// The framebuffer will have a number of rows and 32 columns. Each mesh shader workgroup will generate geometry to fill a single 110// framebuffer row, using a triangle list with 32 triangles of different colors, each covering a framebuffer pixel. 111// 112// Note: the total framebuffer rows is called "full" below (e.g. 64). When using a task shader to generate work, each workgroup will 113// generate a single mesh workgroup using a push constant instead of a compile-time constant. 114// 115// When using DRAW, the task count will tell us how many rows of pixels will be filled in the framebuffer. 116// 117// When using indirect draws, the full framebuffer will always be drawn into by using multiple draw command structures, except in 118// the case of drawCount==0. Each draw will spawn the needed number of tasks to fill the whole framebuffer. In addition, in order to 119// make all argument structures different, the number of tasks in each draw count will be slightly different and assigned 120// pseudorandomly. 121// 122// DRAW: taskCount=0, taskCount=1, taskCount=2, taskCount=half, taskCount=full 123// 124// DRAW_INDIRECT: drawCount=0, drawCount=1, drawCount=2, drawCount=half, drawCount=full. 125// * With offset 0 and pseudorandom (multiples of 4). 126// * With stride adding a padding of 0 and pseudorandom (multiples of 4). 127// 128// DRAW_INDIRECT_COUNT: same as indirect in two variants: 129// 1. Passing the count in a buffer with a large maximum. 130// 2. Passing a large value in the buffer and limiting it with the maximum. 131 132class MeshApiCase : public vkt::TestCase 133{ 134public: 135 MeshApiCase (tcu::TestContext& testCtx, const std::string& name, const TestParams& params) 136 : vkt::TestCase (testCtx, name) 137 , m_params (params) 138 {} 139 virtual ~MeshApiCase (void) {} 140 141 void initPrograms (vk::SourceCollections& programCollection) const override; 142 void checkSupport (Context& context) const override; 143 TestInstance* createInstance (Context& context) const override; 144 145protected: 146 TestParams m_params; 147}; 148 149class MeshApiInstance : public vkt::TestInstance 150{ 151public: 152 MeshApiInstance (Context& context, const TestParams& params) 153 : vkt::TestInstance (context) 154 , m_params (params) 155 {} 156 virtual ~MeshApiInstance (void) {} 157 158 tcu::TestStatus iterate (void) override; 159 160protected: 161 TestParams m_params; 162}; 163 164TestInstance* MeshApiCase::createInstance (Context& context) const 165{ 166 return new MeshApiInstance(context, m_params); 167} 168 169struct PushConstantData 170{ 171 uint32_t width; 172 uint32_t height; 173 uint32_t firstTaskMesh; 174 uint32_t one; 175 uint32_t firstTaskTask; 176 177 std::vector<VkPushConstantRange> getRanges (bool includeTask) const 178 { 179 constexpr uint32_t offsetMesh = 0u; 180 constexpr uint32_t offsetTask = static_cast<uint32_t>(offsetof(PushConstantData, one)); 181 constexpr uint32_t sizeMesh = offsetTask; 182 constexpr uint32_t sizeTask = static_cast<uint32_t>(sizeof(PushConstantData)) - offsetTask; 183 184 const VkPushConstantRange meshRange = 185 { 186 VK_SHADER_STAGE_MESH_BIT_NV, // VkShaderStageFlags stageFlags; 187 offsetMesh, // uint32_t offset; 188 sizeMesh, // uint32_t size; 189 }; 190 const VkPushConstantRange taskRange = 191 { 192 VK_SHADER_STAGE_TASK_BIT_NV, // VkShaderStageFlags stageFlags; 193 offsetTask, // uint32_t offset; 194 sizeTask, // uint32_t size; 195 }; 196 197 std::vector<VkPushConstantRange> ranges (1u, meshRange); 198 if (includeTask) 199 ranges.push_back(taskRange); 200 return ranges; 201 } 202}; 203 204void MeshApiCase::initPrograms (vk::SourceCollections& programCollection) const 205{ 206 const std::string taskDataDecl = 207 "taskNV TaskData {\n" 208 " uint blockNumber;\n" 209 " uint blockRow;\n" 210 "} td;\n" 211 ; 212 213 // Task shader if needed. 214 if (m_params.useTask) 215 { 216 std::ostringstream task; 217 task 218 << "#version 460\n" 219 << "#extension GL_NV_mesh_shader : enable\n" 220 << "\n" 221 << "layout (local_size_x=1) in;\n" 222 << "\n" 223 << "layout (push_constant, std430) uniform TaskPushConstantBlock {\n" 224 << " layout (offset=12) uint one;\n" 225 << " layout (offset=16) uint firstTask;\n" 226 << "} pc;\n" 227 << "\n" 228 << "out " << taskDataDecl 229 << "\n" 230 << "void main ()\n" 231 << "{\n" 232 << " gl_TaskCountNV = pc.one;\n" 233 << " td.blockNumber = uint(gl_DrawID);\n" 234 << " td.blockRow = gl_WorkGroupID.x - pc.firstTask;\n" 235 << "}\n" 236 ; 237 programCollection.glslSources.add("task") << glu::TaskSource(task.str()); 238 } 239 240 // Mesh shader. 241 { 242 std::ostringstream mesh; 243 mesh 244 << "#version 460\n" 245 << "#extension GL_NV_mesh_shader : enable\n" 246 << "\n" 247 << "layout (local_size_x=32) in;\n" 248 << "layout (triangles) out;\n" 249 << "layout (max_vertices=96, max_primitives=32) out;\n" 250 << "\n" 251 << "layout (push_constant, std430) uniform MeshPushConstantBlock {\n" 252 << " uint width;\n" 253 << " uint height;\n" 254 << " uint firstTask;\n" 255 << "} pc;\n" 256 << "\n" 257 << "layout (location=0) perprimitiveNV out vec4 primitiveColor[];\n" 258 << "\n" 259 << (m_params.useTask ? ("in " + taskDataDecl): "") 260 << "\n" 261 << "layout (set=0, binding=0, std430) readonly buffer BlockSizes {\n" 262 << " uint blockSize[];\n" 263 << "} bsz;\n" 264 << "\n" 265 << "uint startOfBlock (uint blockNumber)\n" 266 << "{\n" 267 << " uint start = 0;\n" 268 << " for (uint i = 0; i < blockNumber; i++)\n" 269 << " start += bsz.blockSize[i];\n" 270 << " return start;\n" 271 << "}\n" 272 << "\n" 273 << "void main ()\n" 274 << "{\n" 275 << " const uint blockNumber = " << (m_params.useTask ? "td.blockNumber" : "uint(gl_DrawID)") << ";\n" 276 << " const uint blockRow = " << (m_params.useTask ? "td.blockRow" : "(gl_WorkGroupID.x - pc.firstTask)") << ";\n" 277 << "\n" 278 << " // Each workgroup will fill one row, and each invocation will generate a\n" 279 << " // triangle around the pixel center in each column.\n" 280 << " const uint row = startOfBlock(blockNumber) + blockRow;\n" 281 << " const uint col = gl_LocalInvocationID.x;\n" 282 << "\n" 283 << " const float fHeight = float(pc.height);\n" 284 << " const float fWidth = float(pc.width);\n" 285 << "\n" 286 << " // Pixel coordinates, normalized.\n" 287 << " const float rowNorm = (float(row) + 0.5) / fHeight;\n" 288 << " const float colNorm = (float(col) + 0.5) / fWidth;\n" 289 << "\n" 290 << " // Framebuffer coordinates.\n" 291 << " const float coordX = (colNorm * 2.0) - 1.0;\n" 292 << " const float coordY = (rowNorm * 2.0) - 1.0;\n" 293 << "\n" 294 << " const float pixelWidth = 2.0 / fWidth;\n" 295 << " const float pixelHeight = 2.0 / fHeight;\n" 296 << "\n" 297 << " const float offsetX = pixelWidth / 2.0;\n" 298 << " const float offsetY = pixelHeight / 2.0;\n" 299 << "\n" 300 << " const uint baseIndex = col*3;\n" 301 << " const uvec3 indices = uvec3(baseIndex, baseIndex + 1, baseIndex + 2);\n" 302 << "\n" 303 << " gl_PrimitiveCountNV = 32u;\n" 304 << " primitiveColor[col] = vec4(rowNorm, colNorm, 0.0, 1.0);\n" 305 << "\n" 306 << " gl_PrimitiveIndicesNV[indices.x] = indices.x;\n" 307 << " gl_PrimitiveIndicesNV[indices.y] = indices.y;\n" 308 << " gl_PrimitiveIndicesNV[indices.z] = indices.z;\n" 309 << "\n" 310 << " gl_MeshVerticesNV[indices.x].gl_Position = vec4(coordX - offsetX, coordY + offsetY, 0.0, 1.0);\n" 311 << " gl_MeshVerticesNV[indices.y].gl_Position = vec4(coordX + offsetX, coordY + offsetY, 0.0, 1.0);\n" 312 << " gl_MeshVerticesNV[indices.z].gl_Position = vec4(coordX, coordY - offsetY, 0.0, 1.0);\n" 313 << "}\n" 314 ; 315 programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()); 316 } 317 318 // Frag shader. 319 { 320 std::ostringstream frag; 321 frag 322 << "#version 460\n" 323 << "#extension GL_NV_mesh_shader : enable\n" 324 << "\n" 325 << "layout (location=0) perprimitiveNV in vec4 primitiveColor;\n" 326 << "layout (location=0) out vec4 outColor;\n" 327 << "\n" 328 << "void main ()\n" 329 << "{\n" 330 << " outColor = primitiveColor;\n" 331 << "}\n" 332 ; 333 programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()); 334 } 335} 336 337void MeshApiCase::checkSupport (Context& context) const 338{ 339 checkTaskMeshShaderSupportNV(context, m_params.useTask, true); 340 341 // VUID-vkCmdDrawMeshTasksIndirectNV-drawCount-02718 342 if (m_params.drawType == DrawType::DRAW_INDIRECT && m_params.drawCount > 1u) 343 { 344 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_MULTI_DRAW_INDIRECT); 345 } 346 347 // VUID-vkCmdDrawMeshTasksIndirectCountNV-None-04445 348 if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT) 349 context.requireDeviceFunctionality("VK_KHR_draw_indirect_count"); 350} 351 352template <typename T> 353BufferWithMemoryPtr makeStridedBuffer(const DeviceInterface& vkd, VkDevice device, Allocator& alloc, const std::vector<T>& elements, uint32_t offset, uint32_t stride, VkBufferUsageFlags usage, uint32_t endPadding) 354{ 355 const auto elementSize = static_cast<uint32_t>(sizeof(T)); 356 const auto actualStride = std::max(elementSize, stride); 357 const auto bufferSize = static_cast<size_t>(offset) + static_cast<size_t>(actualStride) * elements.size() + static_cast<size_t>(endPadding); 358 const auto bufferInfo = makeBufferCreateInfo(static_cast<VkDeviceSize>(bufferSize), usage); 359 360 BufferWithMemoryPtr buffer(new BufferWithMemory(vkd, device, alloc, bufferInfo, MemoryRequirement::HostVisible)); 361 auto& bufferAlloc = buffer->getAllocation(); 362 char* bufferDataPtr = reinterpret_cast<char*>(bufferAlloc.getHostPtr()); 363 364 char* itr = bufferDataPtr + offset; 365 for (const auto& elem : elements) 366 { 367 deMemcpy(itr, &elem, sizeof(elem)); 368 itr += actualStride; 369 } 370 if (endPadding > 0u) 371 deMemset(itr, 0xFF, endPadding); 372 373 flushAlloc(vkd, device, bufferAlloc); 374 375 return buffer; 376} 377 378VkExtent3D getExtent () 379{ 380 return makeExtent3D(32u, 64u, 1u); 381} 382 383tcu::TestStatus MeshApiInstance::iterate (void) 384{ 385 const auto& vkd = m_context.getDeviceInterface(); 386 const auto device = m_context.getDevice(); 387 auto& alloc = m_context.getDefaultAllocator(); 388 const auto queueIndex = m_context.getUniversalQueueFamilyIndex(); 389 const auto queue = m_context.getUniversalQueue(); 390 391 const auto extent = getExtent(); 392 const auto iExtent3D = tcu::IVec3(static_cast<int>(extent.width), static_cast<int>(extent.height), static_cast<int>(extent.depth)); 393 const auto iExtent2D = tcu::IVec2(iExtent3D.x(), iExtent3D.y()); 394 const auto format = VK_FORMAT_R8G8B8A8_UNORM; 395 const auto tcuFormat = mapVkFormat(format); 396 const auto colorUsage = (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); 397 const auto colorSRR = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u); 398 const tcu::Vec4 clearColor (0.0f, 0.0f, 0.0f, 1.0f); 399 const float colorThres = 0.005f; // 1/255 < 0.005 < 2/255 400 const tcu::Vec4 threshold (colorThres, colorThres, 0.0f, 0.0f); 401 402 ImageWithMemoryPtr colorBuffer; 403 Move<VkImageView> colorBufferView; 404 { 405 const VkImageCreateInfo colorBufferInfo = 406 { 407 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType; 408 nullptr, // const void* pNext; 409 0u, // VkImageCreateFlags flags; 410 VK_IMAGE_TYPE_2D, // VkImageType imageType; 411 format, // VkFormat format; 412 extent, // VkExtent3D extent; 413 1u, // uint32_t mipLevels; 414 1u, // uint32_t arrayLayers; 415 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples; 416 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling; 417 colorUsage, // VkImageUsageFlags usage; 418 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode; 419 0u, // uint32_t queueFamilyIndexCount; 420 nullptr, // const uint32_t* pQueueFamilyIndices; 421 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout; 422 }; 423 colorBuffer = ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any)); 424 colorBufferView = makeImageView(vkd, device, colorBuffer->get(), VK_IMAGE_VIEW_TYPE_2D, format, colorSRR); 425 } 426 427 // Prepare buffer containing the array of block sizes. 428 de::Random rnd (m_params.seed); 429 std::vector<uint32_t> blockSizes; 430 431 const uint32_t vectorSize = std::max(1u, m_params.drawCount); 432 const uint32_t largeDrawCount = vectorSize + 1u; // The indirect buffer needs to have some padding at the end. See below. 433 const uint32_t evenBlockSize = extent.height / vectorSize; 434 uint32_t remainingRows = extent.height; 435 436 blockSizes.reserve(vectorSize); 437 for (uint32_t i = 0; i < vectorSize - 1u; ++i) 438 { 439 const auto blockSize = static_cast<uint32_t>(rnd.getInt(1, evenBlockSize)); 440 remainingRows -= blockSize; 441 blockSizes.push_back(blockSize); 442 } 443 blockSizes.push_back(remainingRows); 444 445 const auto blockSizesBufferSize = static_cast<VkDeviceSize>(de::dataSize(blockSizes)); 446 BufferWithMemoryPtr blockSizesBuffer = makeStridedBuffer(vkd, device, alloc, blockSizes, 0u, 0u, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0u); 447 448 // Descriptor set layout, pool and set. 449 DescriptorSetLayoutBuilder layoutBuilder; 450 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_MESH_BIT_NV); 451 const auto setLayout = layoutBuilder.build(vkd, device); 452 453 DescriptorPoolBuilder poolBuilder; 454 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); 455 const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 456 457 const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get()); 458 459 // Update descriptor set. 460 { 461 DescriptorSetUpdateBuilder updateBuilder; 462 463 const auto location = DescriptorSetUpdateBuilder::Location::binding(0u); 464 const auto descriptorBufferInfo = makeDescriptorBufferInfo(blockSizesBuffer->get(), 0ull, blockSizesBufferSize); 465 466 updateBuilder.writeSingle(descriptorSet.get(), location, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &descriptorBufferInfo); 467 updateBuilder.update(vkd, device); 468 } 469 470 // Pipeline layout. 471 PushConstantData pcData; 472 const auto pcRanges = pcData.getRanges(m_params.useTask); 473 const auto pipelineLayout = makePipelineLayout(vkd, device, 1u, &setLayout.get(), static_cast<uint32_t>(pcRanges.size()), de::dataOrNull(pcRanges)); 474 475 // Push constants. 476 pcData.width = extent.width; 477 pcData.height = extent.height; 478 pcData.firstTaskMesh = m_params.firstTask; 479 pcData.one = 1u; 480 pcData.firstTaskTask = m_params.firstTask; 481 482 // Render pass and framebuffer. 483 const auto renderPass = makeRenderPass(vkd, device, format); 484 const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), extent.width, extent.height); 485 486 // Pipeline. 487 Move<VkShaderModule> taskModule; 488 Move<VkShaderModule> meshModule; 489 Move<VkShaderModule> fragModule; 490 491 const auto& binaries = m_context.getBinaryCollection(); 492 if (m_params.useTask) 493 taskModule = createShaderModule(vkd, device, binaries.get("task")); 494 meshModule = createShaderModule(vkd, device, binaries.get("mesh")); 495 fragModule = createShaderModule(vkd, device, binaries.get("frag")); 496 497 const std::vector<VkViewport> viewports (1u, makeViewport(extent)); 498 const std::vector<VkRect2D> scissors (1u, makeRect2D(extent)); 499 500 const auto pipeline = makeGraphicsPipeline(vkd, device, pipelineLayout.get(), 501 taskModule.get(), meshModule.get(), fragModule.get(), 502 renderPass.get(), viewports, scissors); 503 504 // Command pool and buffer. 505 const auto cmdPool = makeCommandPool(vkd, device, queueIndex); 506 const auto cmdBufferPtr = allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY); 507 const auto cmdBuffer = cmdBufferPtr.get(); 508 509 // Indirect and count buffers if needed. 510 BufferWithMemoryPtr indirectBuffer; 511 BufferWithMemoryPtr countBuffer; 512 513 if (m_params.drawType != DrawType::DRAW) 514 { 515 // Indirect draws. 516 DE_ASSERT(static_cast<bool>(m_params.indirectArgs)); 517 const auto& indirectArgs = m_params.indirectArgs.get(); 518 519 // Check stride and offset validity. 520 DE_ASSERT(indirectArgs.offset % 4u == 0u); 521 DE_ASSERT(indirectArgs.stride % 4u == 0u && (indirectArgs.stride == 0u || indirectArgs.stride >= static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV)))); 522 523 // Prepare struct vector, which will be converted to a buffer with the proper stride and offset later. 524 std::vector<VkDrawMeshTasksIndirectCommandNV> commands; 525 commands.reserve(blockSizes.size()); 526 527 std::transform(begin(blockSizes), end(blockSizes), std::back_inserter(commands), 528 [this](uint32_t blockSize) { return VkDrawMeshTasksIndirectCommandNV{blockSize, this->m_params.firstTask}; }); 529 530 const auto padding = static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV)); 531 indirectBuffer = makeStridedBuffer(vkd, device, alloc, commands, indirectArgs.offset, indirectArgs.stride, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, padding); 532 533 // Prepare count buffer if needed. 534 if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT) 535 { 536 DE_ASSERT(static_cast<bool>(m_params.indirectCountLimit)); 537 DE_ASSERT(static_cast<bool>(m_params.indirectCountOffset)); 538 539 const auto countBufferValue = ((m_params.indirectCountLimit.get() == IndirectCountLimitType::BUFFER_VALUE) 540 ? m_params.drawCount 541 : largeDrawCount); 542 543 const std::vector<uint32_t> singleCount (1u, countBufferValue); 544 countBuffer = makeStridedBuffer(vkd, device, alloc, singleCount, m_params.indirectCountOffset.get(), static_cast<uint32_t>(sizeof(uint32_t)), VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, 0u); 545 } 546 } 547 548 // Submit commands. 549 beginCommandBuffer(vkd, cmdBuffer); 550 beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor); 551 552 vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr); 553 { 554 const char* pcDataPtr = reinterpret_cast<const char*>(&pcData); 555 for (const auto& range : pcRanges) 556 vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), range.stageFlags, range.offset, range.size, pcDataPtr + range.offset); 557 } 558 vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline.get()); 559 560 if (m_params.drawType == DrawType::DRAW) 561 { 562 vkd.cmdDrawMeshTasksNV(cmdBuffer, m_params.drawCount, m_params.firstTask); 563 } 564 else if (m_params.drawType == DrawType::DRAW_INDIRECT) 565 { 566 const auto& indirectArgs = m_params.indirectArgs.get(); 567 vkd.cmdDrawMeshTasksIndirectNV(cmdBuffer, indirectBuffer->get(), indirectArgs.offset, m_params.drawCount, indirectArgs.stride); 568 } 569 else if (m_params.drawType == DrawType::DRAW_INDIRECT_COUNT) 570 { 571 const auto& indirectArgs = m_params.indirectArgs.get(); 572 const auto& indirectCountOffset = m_params.indirectCountOffset.get(); 573 const auto& indirectCountLimit = m_params.indirectCountLimit.get(); 574 575 const auto maxCount = ((indirectCountLimit == IndirectCountLimitType::MAX_COUNT) 576 ? m_params.drawCount 577 : largeDrawCount); 578 vkd.cmdDrawMeshTasksIndirectCountNV(cmdBuffer, indirectBuffer->get(), indirectArgs.offset, countBuffer->get(), indirectCountOffset, maxCount, indirectArgs.stride); 579 } 580 else 581 DE_ASSERT(false); 582 583 endRenderPass(vkd, cmdBuffer); 584 585 // Output buffer to extract the color buffer. 586 BufferWithMemoryPtr outBuffer; 587 void* outBufferData = nullptr; 588 { 589 const auto outBufferSize = static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * extent.width * extent.height); 590 const auto outBufferUsage = VK_BUFFER_USAGE_TRANSFER_DST_BIT; 591 const auto outBufferInfo = makeBufferCreateInfo(outBufferSize, outBufferUsage); 592 593 outBuffer = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible)); 594 outBufferData = outBuffer->getAllocation().getHostPtr(); 595 } 596 597 copyImageToBuffer(vkd, cmdBuffer, colorBuffer->get(), outBuffer->get(), iExtent2D); 598 endCommandBuffer(vkd, cmdBuffer); 599 submitCommandsAndWait(vkd, device, queue, cmdBuffer); 600 601 // Generate reference image and compare. 602 { 603 auto& log = m_context.getTestContext().getLog(); 604 auto& outBufferAlloc = outBuffer->getAllocation(); 605 tcu::ConstPixelBufferAccess result (tcuFormat, iExtent3D, outBufferData); 606 tcu::TextureLevel referenceLevel (tcuFormat, iExtent3D.x(), iExtent3D.y()); 607 const auto reference = referenceLevel.getAccess(); 608 const auto setName = de::toString(m_params.drawType) + "_draw_count_" + de::toString(m_params.drawCount) + (m_params.useTask ? "_with_task" : "_no_task"); 609 const auto fHeight = static_cast<float>(extent.height); 610 const auto fWidth = static_cast<float>(extent.width); 611 612 invalidateAlloc(vkd, device, outBufferAlloc); 613 614 for (int y = 0; y < iExtent3D.y(); ++y) 615 for (int x = 0; x < iExtent3D.x(); ++x) 616 { 617 const tcu::Vec4 refColor = ((m_params.drawCount == 0u || (m_params.drawType == DrawType::DRAW && y >= static_cast<int>(m_params.drawCount))) 618 ? clearColor 619 : tcu::Vec4( 620 // These match the per-primitive color set by the mesh shader. 621 (static_cast<float>(y) + 0.5f) / fHeight, 622 (static_cast<float>(x) + 0.5f) / fWidth, 623 0.0f, 624 1.0f)); 625 reference.setPixel(refColor, x, y); 626 } 627 628 if (!tcu::floatThresholdCompare(log, setName.c_str(), "", reference, result, threshold, tcu::COMPARE_LOG_ON_ERROR)) 629 return tcu::TestStatus::fail("Image comparison failed; check log for details"); 630 } 631 632 return tcu::TestStatus::pass("Pass"); 633} 634 635} // anonymous 636 637tcu::TestCaseGroup* createMeshShaderApiTests (tcu::TestContext& testCtx) 638{ 639 GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "api")); 640 641 const DrawType drawCases[] = 642 { 643 DrawType::DRAW, 644 DrawType::DRAW_INDIRECT, 645 DrawType::DRAW_INDIRECT_COUNT, 646 }; 647 648 const auto extent = getExtent(); 649 const uint32_t drawCountCases[] = { 0u, 1u, 2u, extent.height / 2u, extent.height }; 650 651 const uint32_t normalStride = static_cast<uint32_t>(sizeof(VkDrawMeshTasksIndirectCommandNV)); 652 const uint32_t largeStride = 2u * normalStride + 4u; 653 const uint32_t altOffset = 20u; 654 655 const struct 656 { 657 tcu::Maybe<IndirectArgs> indirectArgs; 658 const char* name; 659 } indirectArgsCases[] = 660 { 661 { tcu::nothing<IndirectArgs>(), "no_indirect_args" }, 662 663 // Offset 0, varying strides. 664 { tcu::just(IndirectArgs{ 0u, 0u }), "offset_0_stride_0" }, 665 { tcu::just(IndirectArgs{ 0u, normalStride }), "offset_0_stride_normal" }, 666 { tcu::just(IndirectArgs{ 0u, largeStride }), "offset_0_stride_large" }, 667 668 // Nonzero offset, varying strides. 669 { tcu::just(IndirectArgs{ altOffset, 0u }), "offset_alt_stride_0" }, 670 { tcu::just(IndirectArgs{ altOffset, normalStride }), "offset_alt_stride_normal" }, 671 { tcu::just(IndirectArgs{ altOffset, largeStride }), "offset_alt_stride_large" }, 672 }; 673 674 const struct 675 { 676 tcu::Maybe<IndirectCountLimitType> limitType; 677 const char* name; 678 } countLimitCases[] = 679 { 680 { tcu::nothing<IndirectCountLimitType>(), "no_count_limit" }, 681 { tcu::just(IndirectCountLimitType::BUFFER_VALUE), "count_limit_buffer" }, 682 { tcu::just(IndirectCountLimitType::MAX_COUNT), "count_limit_max_count" }, 683 }; 684 685 const struct 686 { 687 tcu::Maybe<uint32_t> countOffset; 688 const char* name; 689 } countOffsetCases[] = 690 { 691 { tcu::nothing<uint32_t>(), "no_count_offset" }, 692 { tcu::just(uint32_t{0u}), "count_offset_0" }, 693 { tcu::just(altOffset), "count_offset_alt" }, 694 }; 695 696 const struct 697 { 698 bool useTask; 699 const char* name; 700 } taskCases[] = 701 { 702 { false, "no_task_shader" }, 703 { true, "with_task_shader" }, 704 }; 705 706 const struct 707 { 708 uint32_t firstTask; 709 const char* name; 710 } firstTaskCases[] = 711 { 712 { 0u, "first_task_zero" }, 713 { 1001u, "first_task_nonzero" }, 714 }; 715 716 uint32_t seed = 1628678795u; 717 718 for (const auto& drawCase : drawCases) 719 { 720 const auto drawCaseName = de::toString(drawCase); 721 const bool isIndirect = (drawCase != DrawType::DRAW); 722 const bool isIndirectNoCount = (drawCase == DrawType::DRAW_INDIRECT); 723 const bool isIndirectCount = (drawCase == DrawType::DRAW_INDIRECT_COUNT); 724 725 GroupPtr drawGroup(new tcu::TestCaseGroup(testCtx, drawCaseName.c_str())); 726 727 for (const auto& drawCountCase : drawCountCases) 728 { 729 const auto drawCountName = "draw_count_" + de::toString(drawCountCase); 730 GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountName.c_str())); 731 732 for (const auto& indirectArgsCase : indirectArgsCases) 733 { 734 const bool hasIndirectArgs = static_cast<bool>(indirectArgsCase.indirectArgs); 735 const bool strideZero = (hasIndirectArgs && indirectArgsCase.indirectArgs.get().stride == 0u); 736 737 if (isIndirect != hasIndirectArgs) 738 continue; 739 740 // VUID-vkCmdDrawMeshTasksIndirectNV-drawCount-02146 and VUID-vkCmdDrawMeshTasksIndirectCountNV-stride-02182. 741 if (((isIndirectNoCount && drawCountCase > 1u) || isIndirectCount) && strideZero) 742 continue; 743 744 GroupPtr indirectArgsGroup(new tcu::TestCaseGroup(testCtx, indirectArgsCase.name)); 745 746 for (const auto& countLimitCase : countLimitCases) 747 { 748 const bool hasCountLimit = static_cast<bool>(countLimitCase.limitType); 749 750 if (isIndirectCount != hasCountLimit) 751 continue; 752 753 GroupPtr countLimitGroup(new tcu::TestCaseGroup(testCtx, countLimitCase.name)); 754 755 for (const auto& countOffsetCase : countOffsetCases) 756 { 757 const bool hasCountOffsetType = static_cast<bool>(countOffsetCase.countOffset); 758 759 if (isIndirectCount != hasCountOffsetType) 760 continue; 761 762 GroupPtr countOffsetGroup(new tcu::TestCaseGroup(testCtx, countOffsetCase.name)); 763 764 for (const auto& taskCase : taskCases) 765 { 766 GroupPtr taskCaseGrp(new tcu::TestCaseGroup(testCtx, taskCase.name)); 767 768 for (const auto& firstTaskCase : firstTaskCases) 769 { 770 const TestParams params = 771 { 772 drawCase, // DrawType drawType; 773 seed++, // uint32_t seed; 774 drawCountCase, // uint32_t drawCount; 775 firstTaskCase.firstTask, // uint32_t firstTask; 776 indirectArgsCase.indirectArgs, // tcu::Maybe<IndirectArgs> indirectArgs; 777 countLimitCase.limitType, // tcu::Maybe<IndirectCountLimitType> indirectCountLimit; 778 countOffsetCase.countOffset, // tcu::Maybe<uint32_t> indirectCountOffset; 779 taskCase.useTask, // bool useTask; 780 }; 781 782 taskCaseGrp->addChild(new MeshApiCase(testCtx, firstTaskCase.name, params)); 783 } 784 785 countOffsetGroup->addChild(taskCaseGrp.release()); 786 } 787 788 countLimitGroup->addChild(countOffsetGroup.release()); 789 } 790 791 indirectArgsGroup->addChild(countLimitGroup.release()); 792 } 793 794 drawCountGroup->addChild(indirectArgsGroup.release()); 795 } 796 797 drawGroup->addChild(drawCountGroup.release()); 798 } 799 800 mainGroup->addChild(drawGroup.release()); 801 } 802 803 return mainGroup.release(); 804} 805 806} // MeshShader 807} // vkt 808