1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020-2022 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *	  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Basic cmdTraceRays* tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingTraceRaysTests.hpp"
25 
26 #include "vkDefs.hpp"
27 
28 #include "vktTestCase.hpp"
29 #include "vktTestGroupUtil.hpp"
30 #include "vkCmdUtil.hpp"
31 #include "vkObjUtil.hpp"
32 #include "vkBuilderUtil.hpp"
33 #include "vkBarrierUtil.hpp"
34 #include "vkBufferWithMemory.hpp"
35 #include "vkImageWithMemory.hpp"
36 #include "vkTypeUtil.hpp"
37 
38 #include "vkRayTracingUtil.hpp"
39 
40 #include <limits>
41 #include <tuple>
42 
43 namespace vkt
44 {
45 namespace RayTracing
46 {
47 namespace
48 {
49 using namespace vk;
50 using namespace vkt;
51 
52 static const VkFlags	ALL_RAY_TRACING_STAGES	= VK_SHADER_STAGE_RAYGEN_BIT_KHR
53 												| VK_SHADER_STAGE_ANY_HIT_BIT_KHR
54 												| VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
55 												| VK_SHADER_STAGE_MISS_BIT_KHR
56 												| VK_SHADER_STAGE_INTERSECTION_BIT_KHR
57 												| VK_SHADER_STAGE_CALLABLE_BIT_KHR;
58 
59 constexpr deUint32		kClearColorValue	= 0xFFu;
60 constexpr deUint32		kHitColorValue		= 2u;
61 constexpr deUint32		kMissColorValue		= 1u;
62 
63 enum class TraceType
64 {
65 	DIRECT			= 0,
66 	INDIRECT_CPU	= 1,
67 	INDIRECT_GPU	= 2,
68 	INDIRECT2_GPU	= 3,
69 	INDIRECT2_CPU	= 4,
70 };
71 
72 struct TestParams
73 {
74 	TraceType						traceType;
75 	VkTraceRaysIndirectCommandKHR	traceDimensions;	// Note: to be used for both direct and indirect variants.
76 	bool							useKhrMaintenance1Semantics;
77 	VkTraceRaysIndirectCommand2KHR	extendedTraceDimensions;
78 };
79 struct TestParams2
80 {
81 	TraceType		traceType;
82 	VkExtent3D		traceDimensions;
83 	bool			partialCopy;
84 	VkQueueFlagBits	submitQueue;
85 };
86 
getShaderGroupSize(const InstanceInterface& vki, const VkPhysicalDevice physicalDevice)87 deUint32 getShaderGroupSize (const InstanceInterface&	vki,
88 							 const VkPhysicalDevice		physicalDevice)
89 {
90 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
91 
92 	rayTracingPropertiesKHR	= makeRayTracingProperties(vki, physicalDevice);
93 	return rayTracingPropertiesKHR->getShaderGroupHandleSize();
94 }
95 
getShaderGroupBaseAlignment(const InstanceInterface& vki, const VkPhysicalDevice physicalDevice)96 deUint32 getShaderGroupBaseAlignment (const InstanceInterface&	vki,
97 									  const VkPhysicalDevice	physicalDevice)
98 {
99 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
100 
101 	rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
102 	return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
103 }
104 
105 template<typename T>
isNullTrace(const T cmd)106 bool isNullTrace (const T cmd)
107 {
108 	return (cmd.width == 0u || cmd.height == 0u || cmd.depth == 0u);
109 }
110 
111 template<typename T>
getImageExtent(const T cmd)112 VkExtent3D getImageExtent (const T cmd)
113 {
114 	return (isNullTrace(cmd) ? makeExtent3D(8u, 8u, 1u) : makeExtent3D(cmd.width, cmd.height, cmd.depth));
115 }
116 
isNullExtent(const VkExtent3D& extent)117 bool isNullExtent (const VkExtent3D& extent)
118 {
119 	return (extent.width == 0u || extent.height == 0u || extent.depth == 0u);
120 }
121 
getNonNullImageExtent(const VkExtent3D& extent)122 VkExtent3D getNonNullImageExtent (const VkExtent3D& extent)
123 {
124 	return (isNullExtent(extent) ? makeExtent3D(8u, 8u, 1u) : makeExtent3D(extent.width, extent.height, extent.depth));
125 }
126 
makeImageCreateInfo(deUint32 width, deUint32 height, deUint32 depth, VkFormat format)127 VkImageCreateInfo makeImageCreateInfo (deUint32 width, deUint32 height, deUint32 depth, VkFormat format)
128 {
129 	const VkImageCreateInfo			imageCreateInfo			=
130 	{
131 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,																// VkStructureType			sType;
132 		DE_NULL,																							// const void*				pNext;
133 		(VkImageCreateFlags)0u,																				// VkImageCreateFlags		flags;
134 		VK_IMAGE_TYPE_3D,																					// VkImageType				imageType;
135 		format,																								// VkFormat					format;
136 		makeExtent3D(width, height, depth),																	// VkExtent3D				extent;
137 		1u,																									// deUint32					mipLevels;
138 		1u,																									// deUint32					arrayLayers;
139 		VK_SAMPLE_COUNT_1_BIT,																				// VkSampleCountFlagBits	samples;
140 		VK_IMAGE_TILING_OPTIMAL,																			// VkImageTiling			tiling;
141 		VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
142 		VK_SHARING_MODE_EXCLUSIVE,																			// VkSharingMode			sharingMode;
143 		0u,																									// deUint32					queueFamilyIndexCount;
144 		DE_NULL,																							// const deUint32*			pQueueFamilyIndices;
145 		VK_IMAGE_LAYOUT_UNDEFINED																			// VkImageLayout			initialLayout;
146 	};
147 
148 	return imageCreateInfo;
149 }
150 
getQueueFamilyIndexAtExact(const DeviceInterface& vkd, const InstanceInterface& vki, VkPhysicalDevice physDevice, VkDevice device, VkQueueFlagBits bits, deUint32 queueIndex = 0)151 std::tuple<bool, VkQueue, deUint32> getQueueFamilyIndexAtExact (const DeviceInterface&		vkd,
152 																const InstanceInterface&	vki,
153 																VkPhysicalDevice			physDevice,
154 																VkDevice					device,
155 																VkQueueFlagBits				bits,
156 																deUint32					queueIndex = 0)
157 {
158 	bool		found				= false;
159 	VkQueue		queue				= 0;
160 	deUint32	queueFamilyCount	= 0;
161 	deUint32	queueFamilyIndex	= std::numeric_limits<deUint32>::max();
162 
163 	vki.getPhysicalDeviceQueueFamilyProperties(physDevice, &queueFamilyCount, nullptr);
164 
165 	std::vector<VkQueueFamilyProperties> queueFamilies(queueFamilyCount);
166 	vki.getPhysicalDeviceQueueFamilyProperties(physDevice, &queueFamilyCount, queueFamilies.data());
167 
168 	for (uint32_t index = 0; index < queueFamilyCount; ++index)
169 	{
170 		if ((queueFamilies[index].queueFlags & bits) == bits)
171 		{
172 			queueFamilyIndex = index;
173 			break;
174 		}
175 	}
176 
177 	if (std::numeric_limits<deUint32>::max() != queueFamilyIndex)
178 	{
179 		found = true;
180 		vkd.getDeviceQueue(device, queueFamilyIndex, queueIndex, &queue);
181 	}
182 #ifdef __cpp_lib_constexpr_tuple
183 	return { found, queue, queueFamilyIndex };
184 #else
185     return std::tuple<bool, VkQueue, deUint32>(found, queue, queueFamilyIndex);
186 #endif
187 }
188 
189 typedef std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	BlasVec;
190 auto initTopAccelerationStructure (VkCommandBuffer		cmdBuffer,
191 								   const BlasVec&		bottomLevelAccelerationStructures,
192 								   Context&				context,
193 								   const VkExtent3D&	imageExtent) -> de::MovePtr<TopLevelAccelerationStructure>
194 {
195 	const DeviceInterface&						vkd				= context.getDeviceInterface();
196 	const VkDevice								device			= context.getDevice();
197 	Allocator&									allocator		= context.getDefaultAllocator();
198 	const deUint32								instanceCount	= imageExtent.depth * imageExtent.height * imageExtent.width / 2;
199 
200 	de::MovePtr<TopLevelAccelerationStructure>	result = makeTopLevelAccelerationStructure();
201 	result->setInstanceCount(instanceCount);
202 
203 	deUint32 currentInstanceIndex = 0;
204 
205 	for (deUint32 z = 0; z < imageExtent.depth; ++z)
206 	for (deUint32 y = 0; y < imageExtent.height; ++y)
207 	for (deUint32 x = 0; x < imageExtent.width; ++x)
208 	{
209 		if (((x + y + z) % 2) == 0)
210 			continue;
211 		result->addInstance(bottomLevelAccelerationStructures[currentInstanceIndex++]);
212 	}
213 	result->createAndBuild(vkd, device, cmdBuffer, allocator);
214 
215 	return result;
216 }
217 
218 class RayTracingTraceRaysIndirectTestCase : public TestCase
219 {
220 	public:
221 							RayTracingTraceRaysIndirectTestCase		(tcu::TestContext& context, const char* name, const char* desc, const TestParams data);
222 							~RayTracingTraceRaysIndirectTestCase	(void);
223 
224 	virtual void			checkSupport								(Context& context) const;
225 	virtual	void			initPrograms								(SourceCollections& programCollection) const;
226 	virtual TestInstance*	createInstance								(Context& context) const;
227 private:
228 	TestParams				m_data;
229 };
230 
231 class RayTracingTraceRaysIndirectTestInstance : public TestInstance
232 {
233 public:
234 																	RayTracingTraceRaysIndirectTestInstance			(Context& context, const TestParams& data);
235 																	~RayTracingTraceRaysIndirectTestInstance		(void);
236 	tcu::TestStatus													iterate								(void);
237 
238 protected:
239 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure>>	initBottomAccelerationStructures	(VkCommandBuffer												cmdBuffer);
240 	de::MovePtr<BufferWithMemory>									runTest								();
241 
242 private:
243 	TestParams														m_data;
244 	VkExtent3D														m_imageExtent;
245 };
246 
247 
RayTracingTraceRaysIndirectTestCase(tcu::TestContext& context, const char* name, const char* desc, const TestParams data)248 RayTracingTraceRaysIndirectTestCase::RayTracingTraceRaysIndirectTestCase (tcu::TestContext& context, const char* name, const char* desc, const TestParams data)
249 	: vkt::TestCase	(context, name, desc)
250 	, m_data		(data)
251 {
252 }
253 
~RayTracingTraceRaysIndirectTestCase(void)254 RayTracingTraceRaysIndirectTestCase::~RayTracingTraceRaysIndirectTestCase	(void)
255 {
256 }
257 
checkSupport(Context& context) const258 void RayTracingTraceRaysIndirectTestCase::checkSupport(Context& context) const
259 {
260 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
261 	context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
262 
263 	const VkPhysicalDeviceRayTracingPipelineFeaturesKHR&	rayTracingPipelineFeaturesKHR		= context.getRayTracingPipelineFeatures();
264 	if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == DE_FALSE )
265 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
266 
267 	if (rayTracingPipelineFeaturesKHR.rayTracingPipelineTraceRaysIndirect == DE_FALSE)
268 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipelineTraceRaysIndirect");
269 
270 	if (m_data.useKhrMaintenance1Semantics) {
271 		context.requireDeviceFunctionality("VK_KHR_ray_tracing_maintenance1");
272 
273 		const VkPhysicalDeviceFeatures deviceFeatures = getPhysicalDeviceFeatures(context.getInstanceInterface(), context.getPhysicalDevice());
274 		if (!deviceFeatures.shaderInt64)
275 		{
276 			TCU_THROW(NotSupportedError, "Device feature shaderInt64 is not supported");
277 		}
278 	}
279 
280 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR	= context.getAccelerationStructureFeatures();
281 	if (accelerationStructureFeaturesKHR.accelerationStructure == DE_FALSE)
282 		TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
283 }
284 
initPrograms(SourceCollections& programCollection) const285 void RayTracingTraceRaysIndirectTestCase::initPrograms (SourceCollections& programCollection) const
286 {
287 	const vk::ShaderBuildOptions	buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
288 	{
289 		std::stringstream css;
290 		css <<
291 			"#version 460 core\n"
292 			<< (m_data.useKhrMaintenance1Semantics ? "#extension GL_ARB_gpu_shader_int64: enable\n" : "\n") <<
293 			"struct TraceRaysIndirectCommand\n"
294 			"{\n";
295 		if (m_data.useKhrMaintenance1Semantics)
296 		{
297 			css <<
298 				"	uint64_t raygenShaderRecordAddress;\n"
299 				"	uint64_t raygenShaderRecordSize;\n"
300 				"	uint64_t missShaderBindingTableAddress;\n"
301 				"	uint64_t missShaderBindingTableSize;\n"
302 				"	uint64_t missShaderBindingTableStride;\n"
303 				"	uint64_t hitShaderBindingTableAddress;\n"
304 				"	uint64_t hitShaderBindingTableSize;\n"
305 				"	uint64_t hitShaderBindingTableStride;\n"
306 				"	uint64_t callableShaderBindingTableAddress;\n"
307 				"	uint64_t callableShaderBindingTableSize;\n"
308 				"	uint64_t callableShaderBindingTableStride;\n";
309 		}
310 		css <<
311 			"	uint width;\n"
312 			"	uint height;\n"
313 			"	uint depth;\n"
314 			"};\n"
315 			"layout(binding = 0) uniform IndirectCommandsUBO\n"
316 			"{\n"
317 			"	TraceRaysIndirectCommand indirectCommands;\n"
318 			"} ubo;\n"
319 			"layout(binding = 1) buffer IndirectCommandsSBO\n"
320 			"{\n"
321 			"	TraceRaysIndirectCommand indirectCommands;\n"
322 			"};\n"
323 			"void main()\n"
324 			"{\n";
325 		if (m_data.useKhrMaintenance1Semantics)
326 		{
327 			css <<
328 				"  indirectCommands.raygenShaderRecordAddress         = ubo.indirectCommands.raygenShaderRecordAddress;\n"
329 				"  indirectCommands.raygenShaderRecordSize            = ubo.indirectCommands.raygenShaderRecordSize;\n"
330 				"  indirectCommands.missShaderBindingTableAddress     = ubo.indirectCommands.missShaderBindingTableAddress;\n"
331 				"  indirectCommands.missShaderBindingTableSize        = ubo.indirectCommands.missShaderBindingTableSize;\n"
332 				"  indirectCommands.missShaderBindingTableStride      = ubo.indirectCommands.missShaderBindingTableStride;\n"
333 				"  indirectCommands.hitShaderBindingTableAddress      = ubo.indirectCommands.hitShaderBindingTableAddress;\n"
334 				"  indirectCommands.hitShaderBindingTableSize         = ubo.indirectCommands.hitShaderBindingTableSize;\n"
335 				"  indirectCommands.hitShaderBindingTableStride       = ubo.indirectCommands.hitShaderBindingTableStride;\n"
336 				"  indirectCommands.callableShaderBindingTableAddress = ubo.indirectCommands.callableShaderBindingTableAddress;\n"
337 				"  indirectCommands.callableShaderBindingTableSize    = ubo.indirectCommands.callableShaderBindingTableSize;\n"
338 				"  indirectCommands.callableShaderBindingTableStride  = ubo.indirectCommands.callableShaderBindingTableStride;\n";
339 		}
340 		css <<
341 			"  indirectCommands.width  = ubo.indirectCommands.width;\n"
342 			"  indirectCommands.height = ubo.indirectCommands.height;\n"
343 			"  indirectCommands.depth  = ubo.indirectCommands.depth;\n"
344 			"}\n";
345 
346 		programCollection.glslSources.add("compute_indirect_command") << glu::ComputeSource(css.str()) << buildOptions;
347 	}
348 
349 	{
350 		std::stringstream css;
351 		css <<
352 			"#version 460 core\n"
353 			"#extension GL_EXT_ray_tracing : require\n"
354 			"layout(location = 0) rayPayloadEXT uvec4 hitValue;\n"
355 			"layout(r32ui, set = 0, binding = 0) uniform uimage3D result;\n"
356 			"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
357 			"\n"
358 			"void main()\n"
359 			"{\n"
360 			"  float tmin     = 0.0;\n"
361 			"  float tmax     = 1.0;\n"
362 			"  vec3  origin   = vec3(float(gl_LaunchIDEXT.x) + 0.5f, float(gl_LaunchIDEXT.y) + 0.5f, float(gl_LaunchIDEXT.z + 0.5f));\n"
363 			"  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
364 			"  hitValue       = uvec4(0,0,0,0);\n"
365 			"  traceRayEXT(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
366 			"  imageStore(result, ivec3(gl_LaunchIDEXT), hitValue);\n"
367 			"}\n";
368 		programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
369 	}
370 
371 	{
372 		std::stringstream css;
373 		css <<
374 			"#version 460 core\n"
375 			"#extension GL_EXT_ray_tracing : require\n"
376 			"layout(location = 0) rayPayloadInEXT uvec4 hitValue;\n"
377 			"void main()\n"
378 			"{\n"
379 			"  hitValue = uvec4(" << kHitColorValue << ",0,0,1);\n"
380 			"}\n";
381 		programCollection.glslSources.add("chit") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
382 	}
383 
384 	{
385 		std::stringstream css;
386 		css <<
387 			"#version 460 core\n"
388 			"#extension GL_EXT_ray_tracing : require\n"
389 			"layout(location = 0) rayPayloadInEXT uvec4 hitValue;\n"
390 			"void main()\n"
391 			"{\n"
392 			"  hitValue = uvec4(" << kMissColorValue << ",0,0,1);\n"
393 			"}\n";
394 
395 		programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
396 	}
397 }
398 
createInstance(Context& context) const399 TestInstance* RayTracingTraceRaysIndirectTestCase::createInstance (Context& context) const
400 {
401 	return new RayTracingTraceRaysIndirectTestInstance(context, m_data);
402 }
403 
RayTracingTraceRaysIndirectTestInstance(Context& context, const TestParams& data)404 RayTracingTraceRaysIndirectTestInstance::RayTracingTraceRaysIndirectTestInstance (Context& context, const TestParams& data)
405 	: vkt::TestInstance		(context)
406 	, m_data				(data)
407 {
408 	m_imageExtent = data.useKhrMaintenance1Semantics ? getImageExtent(data.extendedTraceDimensions) : getImageExtent(data.traceDimensions);
409 }
410 
~RayTracingTraceRaysIndirectTestInstance(void)411 RayTracingTraceRaysIndirectTestInstance::~RayTracingTraceRaysIndirectTestInstance (void)
412 {
413 }
414 
initBottomAccelerationStructures(VkCommandBuffer cmdBuffer)415 std::vector<de::SharedPtr<BottomLevelAccelerationStructure> > RayTracingTraceRaysIndirectTestInstance::initBottomAccelerationStructures (VkCommandBuffer cmdBuffer)
416 {
417 	const DeviceInterface&											vkd			= m_context.getDeviceInterface();
418 	const VkDevice													device		= m_context.getDevice();
419 	Allocator&														allocator	= m_context.getDefaultAllocator();
420 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	result;
421 
422 	tcu::Vec3 v0(0.0, 1.0, 0.0);
423 	tcu::Vec3 v1(0.0, 0.0, 0.0);
424 	tcu::Vec3 v2(1.0, 1.0, 0.0);
425 	tcu::Vec3 v3(1.0, 0.0, 0.0);
426 
427 	for (deUint32 z = 0; z < m_imageExtent.depth; ++z)
428 	for (deUint32 y = 0; y < m_imageExtent.height; ++y)
429 	for (deUint32 x = 0; x < m_imageExtent.width; ++x)
430 	{
431 		// let's build a 3D chessboard of geometries
432 		if (((x + y + z) % 2) == 0)
433 			continue;
434 		tcu::Vec3 xyz((float)x, (float)y, (float)z);
435 		std::vector<tcu::Vec3>	geometryData;
436 
437 		de::MovePtr<BottomLevelAccelerationStructure>	bottomLevelAccelerationStructure = makeBottomLevelAccelerationStructure();
438 		bottomLevelAccelerationStructure->setGeometryCount(1u);
439 
440 		geometryData.push_back(xyz + v0);
441 		geometryData.push_back(xyz + v1);
442 		geometryData.push_back(xyz + v2);
443 		geometryData.push_back(xyz + v2);
444 		geometryData.push_back(xyz + v1);
445 		geometryData.push_back(xyz + v3);
446 
447 		bottomLevelAccelerationStructure->addGeometry(geometryData, true);
448 		bottomLevelAccelerationStructure->createAndBuild(vkd, device, cmdBuffer, allocator);
449 		result.push_back(de::SharedPtr<BottomLevelAccelerationStructure>(bottomLevelAccelerationStructure.release()));
450 	}
451 
452 	return result;
453 }
454 
runTest()455 de::MovePtr<BufferWithMemory> RayTracingTraceRaysIndirectTestInstance::runTest()
456 {
457 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
458 	const DeviceInterface&				vkd									= m_context.getDeviceInterface();
459 	const VkDevice						device								= m_context.getDevice();
460 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
461 	const deUint32						queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
462 	const VkQueue						queue								= m_context.getUniversalQueue();
463 	Allocator&							allocator							= m_context.getDefaultAllocator();
464 	const deUint32						pixelCount							= m_imageExtent.depth * m_imageExtent.height * m_imageExtent.width;
465 	const deUint32						shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
466 	const deUint32						shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
467 
468 	Move<VkDescriptorSetLayout>			computeDescriptorSetLayout;
469 	Move<VkDescriptorPool>				computeDescriptorPool;
470 	Move<VkDescriptorSet>				computeDescriptorSet;
471 	Move<VkPipelineLayout>				computePipelineLayout;
472 	Move<VkShaderModule>				computeShader;
473 	Move<VkPipeline>					computePipeline;
474 
475 	if (m_data.traceType == TraceType::INDIRECT_GPU || m_data.traceType == TraceType::INDIRECT2_GPU)
476 	{
477 		computeDescriptorSetLayout			= DescriptorSetLayoutBuilder()
478 													.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
479 													.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
480 													.build(vkd, device);
481 		computeDescriptorPool				= DescriptorPoolBuilder()
482 													.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
483 													.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
484 													.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
485 		computeDescriptorSet				= makeDescriptorSet(vkd, device, *computeDescriptorPool, *computeDescriptorSetLayout);
486 		computePipelineLayout				= makePipelineLayout(vkd, device, computeDescriptorSetLayout.get());
487 
488 		computeShader						= createShaderModule(vkd, device, m_context.getBinaryCollection().get("compute_indirect_command"), 0);
489 		const VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
490 		{
491 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType;
492 			DE_NULL,												// const void*							pNext;
493 			VkPipelineShaderStageCreateFlags(0u),					// VkPipelineShaderStageCreateFlags		flags;
494 			VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits				stage;
495 			*computeShader,											// VkShaderModule						module;
496 			"main",													// const char*							pName;
497 			DE_NULL,												// const VkSpecializationInfo*			pSpecializationInfo;
498 		};
499 		const VkComputePipelineCreateInfo pipelineCreateInfo =
500 		{
501 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,		// VkStructureType					sType;
502 			DE_NULL,											// const void*						pNext;
503 			VkPipelineCreateFlags(0u),							// VkPipelineCreateFlags			flags;
504 			pipelineShaderStageParams,							// VkPipelineShaderStageCreateInfo	stage;
505 			*computePipelineLayout,								// VkPipelineLayout					layout;
506 			DE_NULL,											// VkPipeline						basePipelineHandle;
507 			0,													// deInt32							basePipelineIndex;
508 		};
509 
510 		computePipeline = vk::createComputePipeline(vkd, device, (VkPipelineCache)0u, &pipelineCreateInfo);
511 	}
512 
513 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
514 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
515 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
516 																					.build(vkd, device);
517 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
518 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
519 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
520 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
521 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
522 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
523 
524 	de::MovePtr<RayTracingPipeline>		rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
525 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0), 0);
526 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	createShaderModule(vkd, device, m_context.getBinaryCollection().get("chit"), 0), 1);
527 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, m_context.getBinaryCollection().get("miss"), 0), 2);
528 	Move<VkPipeline>					pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
529 
530 	const de::MovePtr<BufferWithMemory>	raygenShaderBindingTable			= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
531 	const de::MovePtr<BufferWithMemory>	hitShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
532 	const de::MovePtr<BufferWithMemory>	missShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
533 
534 	const VkStridedDeviceAddressRegionKHR	raygenShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
535 	const VkStridedDeviceAddressRegionKHR	missShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
536 	const VkStridedDeviceAddressRegionKHR	hitShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
537 	const VkStridedDeviceAddressRegionKHR	callableShaderBindingTableRegion= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
538 
539 	const VkFormat						imageFormat							= VK_FORMAT_R32_UINT;
540 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(m_imageExtent.width, m_imageExtent.height, m_imageExtent.depth, imageFormat);
541 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
542 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
543 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_3D, imageFormat, imageSubresourceRange);
544 
545 	const VkBufferCreateInfo			resultBufferCreateInfo				= makeBufferCreateInfo(pixelCount*sizeof(deUint32), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
546 	const VkImageSubresourceLayers		resultBufferImageSubresourceLayers	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
547 	const VkBufferImageCopy				resultBufferImageRegion				= makeBufferImageCopy(m_imageExtent, resultBufferImageSubresourceLayers);
548 	de::MovePtr<BufferWithMemory>		resultBuffer						= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
549 
550 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
551 
552 	// create indirect command buffer and fill it with parameter values
553 	de::MovePtr<BufferWithMemory>		indirectBuffer;
554 	de::MovePtr<BufferWithMemory>		uniformBuffer;
555 
556 	// Update trace details according to VK_KHR_ray_tracing_maintenance1 semantics
557 	m_data.extendedTraceDimensions.raygenShaderRecordAddress			= raygenShaderBindingTableRegion.deviceAddress;
558 	m_data.extendedTraceDimensions.raygenShaderRecordSize				= raygenShaderBindingTableRegion.size;
559 	m_data.extendedTraceDimensions.missShaderBindingTableAddress		= missShaderBindingTableRegion.deviceAddress;
560 	m_data.extendedTraceDimensions.missShaderBindingTableSize			= missShaderBindingTableRegion.size;
561 	m_data.extendedTraceDimensions.missShaderBindingTableStride			= missShaderBindingTableRegion.stride;
562 	m_data.extendedTraceDimensions.hitShaderBindingTableAddress			= hitShaderBindingTableRegion.deviceAddress;
563 	m_data.extendedTraceDimensions.hitShaderBindingTableSize			= hitShaderBindingTableRegion.size;
564 	m_data.extendedTraceDimensions.hitShaderBindingTableStride			= hitShaderBindingTableRegion.stride;
565 	m_data.extendedTraceDimensions.callableShaderBindingTableAddress	= callableShaderBindingTableRegion.deviceAddress;
566 	m_data.extendedTraceDimensions.callableShaderBindingTableSize		= callableShaderBindingTableRegion.size;
567 	m_data.extendedTraceDimensions.callableShaderBindingTableStride		= callableShaderBindingTableRegion.stride;
568 
569 	if (m_data.traceType != TraceType::DIRECT)
570 	{
571 		const bool							indirectGpu = (m_data.traceType == TraceType::INDIRECT_GPU || m_data.traceType == TraceType::INDIRECT2_GPU);
572 		VkDeviceSize						bufferSize = m_data.useKhrMaintenance1Semantics ?  sizeof(VkTraceRaysIndirectCommand2KHR) : sizeof(VkTraceRaysIndirectCommandKHR);
573 		VkBufferUsageFlags					indirectBufferUsageFlags = VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | (indirectGpu ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : VK_BUFFER_USAGE_TRANSFER_DST_BIT);
574 		const VkBufferCreateInfo			indirectBufferCreateInfo = makeBufferCreateInfo(bufferSize, indirectBufferUsageFlags);
575 		vk::MemoryRequirement				indirectBufferMemoryRequirement = MemoryRequirement::DeviceAddress | (indirectGpu ? MemoryRequirement::Any : MemoryRequirement::HostVisible);
576 		indirectBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, indirectBufferCreateInfo, indirectBufferMemoryRequirement));
577 	}
578 
579 	if (m_data.traceType == TraceType::INDIRECT_GPU)
580 	{
581 		const VkBufferCreateInfo			uniformBufferCreateInfo = makeBufferCreateInfo(sizeof(VkTraceRaysIndirectCommandKHR), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
582 		uniformBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, uniformBufferCreateInfo, MemoryRequirement::HostVisible));
583 		deMemcpy(uniformBuffer->getAllocation().getHostPtr(), &m_data.traceDimensions, sizeof(VkTraceRaysIndirectCommandKHR));
584 		flushMappedMemoryRange(vkd, device, uniformBuffer->getAllocation().getMemory(), uniformBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
585 	}
586 	else if (m_data.traceType == TraceType::INDIRECT_CPU)
587 	{
588 		deMemcpy(indirectBuffer->getAllocation().getHostPtr(), &m_data.traceDimensions, sizeof(VkTraceRaysIndirectCommandKHR));
589 		flushMappedMemoryRange(vkd, device, indirectBuffer->getAllocation().getMemory(), indirectBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
590 	}
591 	else if (m_data.traceType == TraceType::INDIRECT2_GPU)
592 	{
593 		const VkBufferCreateInfo			uniformBufferCreateInfo = makeBufferCreateInfo(sizeof(VkTraceRaysIndirectCommand2KHR), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
594 		uniformBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, uniformBufferCreateInfo, MemoryRequirement::HostVisible));
595 		deMemcpy(uniformBuffer->getAllocation().getHostPtr(), &m_data.extendedTraceDimensions, sizeof(VkTraceRaysIndirectCommand2KHR));
596 		flushMappedMemoryRange(vkd, device, uniformBuffer->getAllocation().getMemory(), uniformBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
597 	}
598 	else if (m_data.traceType == TraceType::INDIRECT2_CPU)
599 	{
600 		deMemcpy(indirectBuffer->getAllocation().getHostPtr(), &m_data.extendedTraceDimensions, sizeof(VkTraceRaysIndirectCommand2KHR));
601 		flushMappedMemoryRange(vkd, device, indirectBuffer->getAllocation().getMemory(), indirectBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
602 	}
603 
604 	const Move<VkCommandPool>			cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
605 	const Move<VkCommandBuffer>			cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
606 
607 	std::vector<de::SharedPtr<BottomLevelAccelerationStructure> >	bottomLevelAccelerationStructures;
608 	de::MovePtr<TopLevelAccelerationStructure>						topLevelAccelerationStructure;
609 
610 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
611 	{
612 		const VkImageMemoryBarrier			preImageBarrier						= makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
613 																					VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
614 																					**image, imageSubresourceRange);
615 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
616 
617 		const VkClearValue					clearValue							= makeClearValueColorU32(kClearColorValue, 0u, 0u, 0u);
618 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
619 
620 		const VkImageMemoryBarrier			postImageBarrier					= makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
621 																					VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
622 																					**image, imageSubresourceRange);
623 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
624 
625 		bottomLevelAccelerationStructures	= initBottomAccelerationStructures(*cmdBuffer);
626 		topLevelAccelerationStructure		= initTopAccelerationStructure(*cmdBuffer, bottomLevelAccelerationStructures, m_context, m_imageExtent);
627 
628 		if (m_data.traceType == TraceType::INDIRECT_GPU)
629 		{
630 			const VkDescriptorBufferInfo	uniformBufferDescriptorInfo = makeDescriptorBufferInfo(uniformBuffer->get(), 0ull, sizeof(VkTraceRaysIndirectCommandKHR));
631 			const VkDescriptorBufferInfo	indirectBufferDescriptorInfo = makeDescriptorBufferInfo(indirectBuffer->get(), 0ull, sizeof(VkTraceRaysIndirectCommandKHR));
632 
633 			DescriptorSetUpdateBuilder()
634 				.writeSingle(*computeDescriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
635 				.writeSingle(*computeDescriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectBufferDescriptorInfo)
636 				.update(vkd, device);
637 
638 			vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
639 			vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &computeDescriptorSet.get(), 0u, DE_NULL);
640 			vkd.cmdDispatch(*cmdBuffer, 1, 1, 1);
641 
642 			const VkBufferMemoryBarrier		fillIndirectBufferMemoryBarrier	= makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
643 																				indirectBuffer->get(), 0ull, sizeof(VkTraceRaysIndirectCommandKHR));
644 			cmdPipelineBufferMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, &fillIndirectBufferMemoryBarrier);
645 		}
646 		else if (m_data.traceType == TraceType::INDIRECT2_GPU)
647 		{
648 			const VkDescriptorBufferInfo	uniformBufferDescriptorInfo = makeDescriptorBufferInfo(uniformBuffer->get(), 0ull, sizeof(VkTraceRaysIndirectCommand2KHR));
649 			const VkDescriptorBufferInfo	indirectBufferDescriptorInfo = makeDescriptorBufferInfo(indirectBuffer->get(), 0ull, sizeof(VkTraceRaysIndirectCommand2KHR));
650 
651 			DescriptorSetUpdateBuilder()
652 				.writeSingle(*computeDescriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
653 				.writeSingle(*computeDescriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectBufferDescriptorInfo)
654 				.update(vkd, device);
655 
656 			vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
657 			vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &computeDescriptorSet.get(), 0u, DE_NULL);
658 			vkd.cmdDispatch(*cmdBuffer, 1, 1, 1);
659 
660 			const VkBufferMemoryBarrier		fillIndirectBufferMemoryBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
661 																				indirectBuffer->get(), 0ull, sizeof(VkTraceRaysIndirectCommand2KHR));
662 			cmdPipelineBufferMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, &fillIndirectBufferMemoryBarrier);
663 
664 		}
665 
666 		const TopLevelAccelerationStructure*			topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
667 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
668 		{
669 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
670 			DE_NULL,															//  const void*							pNext;
671 			1u,																	//  deUint32							accelerationStructureCount;
672 			topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
673 		};
674 
675 		DescriptorSetUpdateBuilder()
676 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
677 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
678 			.update(vkd, device);
679 
680 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
681 
682 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
683 
684 		// Both calls should give the same results.
685 		if (m_data.traceType == TraceType::DIRECT)
686 		{
687 			cmdTraceRays(vkd,
688 				*cmdBuffer,
689 				&raygenShaderBindingTableRegion,
690 				&missShaderBindingTableRegion,
691 				&hitShaderBindingTableRegion,
692 				&callableShaderBindingTableRegion,
693 				m_data.traceDimensions.width, m_data.traceDimensions.height, m_data.traceDimensions.depth);
694 		}
695 		else if(m_data.traceType == TraceType::INDIRECT_CPU || m_data.traceType == TraceType::INDIRECT_GPU)
696 		{
697 			cmdTraceRaysIndirect(vkd,
698 				*cmdBuffer,
699 				&raygenShaderBindingTableRegion,
700 				&missShaderBindingTableRegion,
701 				&hitShaderBindingTableRegion,
702 				&callableShaderBindingTableRegion,
703 				getBufferDeviceAddress(vkd, device, indirectBuffer->get(), 0));
704 		}
705 		else if (m_data.traceType == TraceType::INDIRECT2_CPU || m_data.traceType == TraceType::INDIRECT2_GPU)
706 		{
707 			vkd.cmdTraceRaysIndirect2KHR(
708 				*cmdBuffer,
709 				getBufferDeviceAddress(vkd, device, indirectBuffer->get(), 0));
710 		}
711 
712 		const VkMemoryBarrier							postTraceMemoryBarrier					= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
713 		const VkMemoryBarrier							postCopyMemoryBarrier					= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
714 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
715 
716 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u, &resultBufferImageRegion);
717 
718 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
719 	}
720 	endCommandBuffer(vkd, *cmdBuffer);
721 
722 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
723 
724 	invalidateMappedMemoryRange(vkd, device, resultBuffer->getAllocation().getMemory(), resultBuffer->getAllocation().getOffset(), VK_WHOLE_SIZE);
725 
726 	return resultBuffer;
727 }
728 
iterate(void)729 tcu::TestStatus RayTracingTraceRaysIndirectTestInstance::iterate (void)
730 {
731 	// run test using arrays of pointers
732 	const de::MovePtr<BufferWithMemory>	buffer		= runTest();
733 	const deUint32*						bufferPtr	= (deUint32*)buffer->getAllocation().getHostPtr();
734 	const bool							noWrites	= m_data.useKhrMaintenance1Semantics ? isNullTrace(m_data.extendedTraceDimensions) : isNullTrace(m_data.traceDimensions);
735 
736 	deUint32							failures		= 0;
737 	deUint32							pos				= 0;
738 
739 	// verify results
740 	for (deUint32 z = 0; z < m_imageExtent.depth; ++z)
741 	for (deUint32 y = 0; y < m_imageExtent.height; ++y)
742 	for (deUint32 x = 0; x < m_imageExtent.width; ++x)
743 	{
744 		const deUint32 expectedResult = (noWrites ? kClearColorValue : (((x + y + z) % 2) ? kHitColorValue : kMissColorValue));
745 		if (bufferPtr[pos] != expectedResult)
746 			failures++;
747 		++pos;
748 	}
749 
750 	if (failures == 0)
751 		return tcu::TestStatus::pass("Pass");
752 	else
753 		return tcu::TestStatus::fail("Fail (failures=" + de::toString(failures) + ")");
754 }
755 
756 template<typename T>
makeDimensionsName(const T cmd)757 std::string makeDimensionsName (const T cmd)
758 {
759 	std::ostringstream name;
760 	name << cmd.width << "_" << cmd.height << "_" << cmd.depth;
761 	return name.str();
762 }
763 
764 using namespace tcu;
765 
766 class TraceRaysIndirect2Instance : public TestInstance
767 {
768 public:
769 						TraceRaysIndirect2Instance	(Context&					context,
770 													 const TestParams2&			params);
771 	virtual				~TraceRaysIndirect2Instance	(void) override = default;
772 	virtual TestStatus	iterate						(void) override;
773 
774 protected:
775 	void				makeIndirectStructAndFlush	(BufferWithMemory&			buffer,
776 													 const bool					source,
777 													 const BufferWithMemory&	rgenSbt,
778 													 const BufferWithMemory&	hitSbt,
779 													 const BufferWithMemory&	missSbt,
780 													 const BufferWithMemory&	callSbt) const;
781 	void				initBottomAccellStructures	(VkCommandBuffer			cmdBuffer,
782 													 BottomLevelAccelerationStructurePool&	pool,
783 													 const deUint32&			batchStructCount) const;
784 private:
785 	TestParams2			m_params;
786 	const VkExtent3D	m_imageExtent;
787 };
788 
789 class TraceRaysIndirect2Case : public TestCase
790 {
791 public:
792 							TraceRaysIndirect2Case	(TestContext& testCtx, const std::string& name, const TestParams2& params);
793 	virtual					~TraceRaysIndirect2Case	(void) override = default;
794 	virtual void			initPrograms	(SourceCollections& programCollection) const override;
795 	virtual TestInstance*	createInstance	(Context& context) const override;
796 	virtual void			checkSupport	(Context& context) const override;
797 private:
798 	TestParams2	m_params;
799 };
800 
TraceRaysIndirect2Case(TestContext& testCtx, const std::string& name, const TestParams2& params)801 TraceRaysIndirect2Case::TraceRaysIndirect2Case (TestContext& testCtx, const std::string& name, const TestParams2& params)
802 	: TestCase	(testCtx, name, std::string())
803 	, m_params	(params)
804 {
805 }
806 
createInstance(Context& context) const807 TestInstance* TraceRaysIndirect2Case::createInstance (Context& context) const
808 {
809 	return new TraceRaysIndirect2Instance(context, m_params);
810 }
811 
checkSupport(Context& context) const812 void TraceRaysIndirect2Case::checkSupport (Context& context) const
813 {
814 	context.requireInstanceFunctionality(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
815 	context.requireDeviceFunctionality(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME);
816 	context.requireDeviceFunctionality(VK_KHR_RAY_TRACING_MAINTENANCE_1_EXTENSION_NAME);
817 
818 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
819 	if (features.shaderInt64 == VK_FALSE)
820 		TCU_THROW(NotSupportedError, "64-bit integers not supported by device");
821 
822 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR = context.getAccelerationStructureFeatures();
823 	if (accelerationStructureFeaturesKHR.accelerationStructure == VK_FALSE)
824 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR::accelerationStructure");
825 
826 	const VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR& maintenance1FeaturesKHR = context.getRayTracingMaintenance1Features();
827 	if (maintenance1FeaturesKHR.rayTracingMaintenance1 == VK_FALSE)
828 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::rayTracingMaintenance1");
829 	if (maintenance1FeaturesKHR.rayTracingPipelineTraceRaysIndirect2 == VK_FALSE)
830 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::rayTracingPipelineTraceRaysIndirect2");
831 
832 	auto desiredQueue	= getQueueFamilyIndexAtExact(context.getDeviceInterface(),
833 													 context.getInstanceInterface(),
834 													 context.getPhysicalDevice(),
835 													 context.getDevice(),
836 													 m_params.submitQueue);
837 	if (!std::get<0>(desiredQueue))
838 	{
839 		std::stringstream errorMsg;
840 		errorMsg << "Desired queue " << m_params.submitQueue << " is not supported by device";
841 		errorMsg.flush();
842 		TCU_THROW(NotSupportedError, errorMsg.str());
843 	}
844 }
845 
initPrograms(SourceCollections& programCollection) const846 void TraceRaysIndirect2Case::initPrograms (SourceCollections& programCollection) const
847 {
848 	const vk::ShaderBuildOptions	buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
849 	{
850 		std::stringstream css;
851 		std::string comp(R"(
852 		#version 460 core
853 		#extension GL_ARB_gpu_shader_int64: enable
854 		struct TraceRaysIndirectCommand
855 		{
856 			uint64_t raygenShaderRecordAddress;
857 			uint64_t raygenShaderRecordSize;
858 			uint64_t missShaderBindingTableAddress;
859 			uint64_t missShaderBindingTableSize;
860 			uint64_t missShaderBindingTableStride;
861 			uint64_t hitShaderBindingTableAddress;
862 			uint64_t hitShaderBindingTableSize;
863 			uint64_t hitShaderBindingTableStride;
864 			uint64_t callableShaderBindingTableAddress;
865 			uint64_t callableShaderBindingTableSize;
866 			uint64_t callableShaderBindingTableStride;
867 			uint     width;
868 			uint     height;
869 			uint     depth;
870 		};
871 		layout(push_constant) uniform CopyStyle {
872 			uint full;
873 		} cs;
874 		layout(binding = 0) uniform IndirectCommandsUBO {
875 			TraceRaysIndirectCommand indirectCommands;
876 		} ubo;
877 		layout(binding = 1) buffer IndirectCommandsSBO {
878 			TraceRaysIndirectCommand indirectCommands;
879 		};
880 		void main()
881 		{
882 			if (cs.full != 0) {
883 				indirectCommands.raygenShaderRecordAddress         = ubo.indirectCommands.raygenShaderRecordAddress;
884 				indirectCommands.raygenShaderRecordSize            = ubo.indirectCommands.raygenShaderRecordSize;
885 				indirectCommands.missShaderBindingTableAddress     = ubo.indirectCommands.missShaderBindingTableAddress;
886 				indirectCommands.missShaderBindingTableSize        = ubo.indirectCommands.missShaderBindingTableSize;
887 				indirectCommands.missShaderBindingTableStride      = ubo.indirectCommands.missShaderBindingTableStride;
888 				indirectCommands.hitShaderBindingTableAddress      = ubo.indirectCommands.hitShaderBindingTableAddress;
889 				indirectCommands.hitShaderBindingTableSize         = ubo.indirectCommands.hitShaderBindingTableSize;
890 				indirectCommands.hitShaderBindingTableStride       = ubo.indirectCommands.hitShaderBindingTableStride;
891 				indirectCommands.callableShaderBindingTableAddress = ubo.indirectCommands.callableShaderBindingTableAddress;
892 				indirectCommands.callableShaderBindingTableSize    = ubo.indirectCommands.callableShaderBindingTableSize;
893 				indirectCommands.callableShaderBindingTableStride  = ubo.indirectCommands.callableShaderBindingTableStride;
894 			}
895 			else {
896 				indirectCommands.raygenShaderRecordAddress         = ubo.indirectCommands.raygenShaderRecordAddress;
897 
898 				indirectCommands.missShaderBindingTableStride      = ubo.indirectCommands.missShaderBindingTableStride;
899 
900 				indirectCommands.hitShaderBindingTableSize         = ubo.indirectCommands.hitShaderBindingTableSize;
901 
902 				indirectCommands.callableShaderBindingTableAddress = ubo.indirectCommands.callableShaderBindingTableAddress;
903 				indirectCommands.callableShaderBindingTableStride  = ubo.indirectCommands.callableShaderBindingTableStride;
904 			}
905 
906 			indirectCommands.width                                 = ubo.indirectCommands.width;
907 			indirectCommands.height                                = ubo.indirectCommands.height;
908 			indirectCommands.depth                                 = ubo.indirectCommands.depth;
909 
910 		})");
911 
912 		programCollection.glslSources.add("compute_indirect_command") << glu::ComputeSource(comp) << buildOptions;
913 	}
914 
915 	{
916 		std::stringstream css;
917 		css <<
918 			"#version 460 core\n"
919 			"#extension GL_EXT_ray_tracing : require\n"
920 			"layout(location = 0) rayPayloadEXT uvec4 hitValue;\n"
921 			"layout(r32ui, set = 0, binding = 0) uniform uimage3D result;\n"
922 			"layout(set = 0, binding = 1) uniform accelerationStructureEXT topLevelAS;\n"
923 			"\n"
924 			"void main()\n"
925 			"{\n"
926 			"  float tmin     = 0.0;\n"
927 			"  float tmax     = 1.0;\n"
928 			"  vec3  origin   = vec3(float(gl_LaunchIDEXT.x) + 0.5f, float(gl_LaunchIDEXT.y) + 0.5f, float(gl_LaunchIDEXT.z + 0.5f));\n"
929 			"  vec3  direct   = vec3(0.0, 0.0, -1.0);\n"
930 			"  hitValue       = uvec4(0,0,0,0);\n"
931 			"  traceRayEXT(topLevelAS, 0, 0xFF, 0, 0, 0, origin, tmin, direct, tmax, 0);\n"
932 			"  imageStore(result, ivec3(gl_LaunchIDEXT), hitValue);\n"
933 			"}\n";
934 		programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(css.str())) << buildOptions;
935 	}
936 
937 	{
938 		std::stringstream css;
939 		css <<
940 			"#version 460 core\n"
941 			"#extension GL_EXT_ray_tracing : require\n"
942 			"layout(location = 0) rayPayloadInEXT uvec4 hitValue;\n"
943 			"void main()\n"
944 			"{\n"
945 			"  hitValue = uvec4(" << kHitColorValue << ",0,0,1);\n"
946 			"}\n";
947 		programCollection.glslSources.add("chit") << glu::ClosestHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
948 	}
949 
950 	{
951 		std::stringstream css;
952 		css <<
953 			"#version 460 core\n"
954 			"#extension GL_EXT_ray_tracing : require\n"
955 			"layout(location = 0) rayPayloadInEXT uvec4 hitValue;\n"
956 			"void main()\n"
957 			"{\n"
958 			"  hitValue = uvec4(" << kMissColorValue << ",0,0,1);\n"
959 			"}\n";
960 
961 		programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
962 	}
963 }
964 
TraceRaysIndirect2Instance(Context& context, const TestParams2& params)965 TraceRaysIndirect2Instance::TraceRaysIndirect2Instance (Context& context, const TestParams2& params)
966 	: TestInstance	(context)
967 	, m_params		(params)
968 	, m_imageExtent	(getNonNullImageExtent(params.traceDimensions))
969 {
970 }
971 
makeIndirectStructAndFlush(BufferWithMemory& buffer, const bool source, const BufferWithMemory& rgenSbt, const BufferWithMemory& hitSbt, const BufferWithMemory& missSbt, const BufferWithMemory& callSbt) const972 void TraceRaysIndirect2Instance::makeIndirectStructAndFlush	(BufferWithMemory&			buffer,
973 															 const bool					source,
974 															 const BufferWithMemory&	rgenSbt,
975 															 const BufferWithMemory&	hitSbt,
976 															 const BufferWithMemory&	missSbt,
977 															 const BufferWithMemory&	callSbt) const
978 {
979 	DE_UNREF(callSbt);
980 
981 	const DeviceInterface&				vkd						= m_context.getDeviceInterface();
982 	const InstanceInterface&			vki						= m_context.getInstanceInterface();
983 	const VkPhysicalDevice				physicalDevice			= m_context.getPhysicalDevice();
984 	const VkDevice						device					= m_context.getDevice();
985 	const deUint32						shaderGroupHandleSize	= getShaderGroupSize(vki, physicalDevice);
986 	Allocation&							alloc					= buffer.getAllocation();
987 
988 	VkTraceRaysIndirectCommand2KHR		data					{};
989 
990 	if (m_params.traceType == TraceType::INDIRECT_GPU && m_params.partialCopy)
991 	{
992 		if (source)
993 		{
994 			data.raygenShaderRecordAddress			= getBufferDeviceAddress(vkd, device, *rgenSbt, 0);
995 			data.missShaderBindingTableStride		= shaderGroupHandleSize;
996 			data.hitShaderBindingTableSize			= shaderGroupHandleSize;
997 			data.callableShaderBindingTableAddress	= 0;
998 			data.callableShaderBindingTableStride	= 0;
999 		}
1000 		else
1001 		{
1002 			data.raygenShaderRecordSize				= shaderGroupHandleSize;
1003 			data.missShaderBindingTableAddress		= getBufferDeviceAddress(vkd, device, *missSbt, 0);
1004 			data.missShaderBindingTableSize			= shaderGroupHandleSize;
1005 			data.hitShaderBindingTableAddress		= getBufferDeviceAddress(vkd, device, *hitSbt, 0);
1006 			data.hitShaderBindingTableStride		= shaderGroupHandleSize;
1007 			data.callableShaderBindingTableSize		= 0;
1008 		}
1009 	}
1010 	else
1011 	{
1012 		data.raygenShaderRecordAddress				= getBufferDeviceAddress(vkd, device, *rgenSbt, 0);
1013 		data.raygenShaderRecordSize					= shaderGroupHandleSize;
1014 
1015 		data.missShaderBindingTableAddress			= getBufferDeviceAddress(vkd, device, *missSbt, 0);
1016 		data.missShaderBindingTableSize				= shaderGroupHandleSize;
1017 		data.missShaderBindingTableStride			= shaderGroupHandleSize;
1018 
1019 		data.hitShaderBindingTableAddress			= getBufferDeviceAddress(vkd, device, *hitSbt, 0);
1020 		data.hitShaderBindingTableSize				= shaderGroupHandleSize;
1021 		data.hitShaderBindingTableStride			= shaderGroupHandleSize;
1022 
1023 		data.callableShaderBindingTableAddress		= 0;
1024 		data.callableShaderBindingTableSize			= 0;
1025 		data.callableShaderBindingTableStride		= 0;
1026 	}
1027 
1028 	data.width	= m_params.traceDimensions.width;
1029 	data.height	= m_params.traceDimensions.height;
1030 	data.depth	= m_params.traceDimensions.depth;
1031 
1032 	deMemcpy(alloc.getHostPtr(), &data, sizeof(data));
1033 	flushMappedMemoryRange(vkd, device, alloc.getMemory(), alloc.getOffset(), VK_WHOLE_SIZE);
1034 }
1035 
initBottomAccellStructures(VkCommandBuffer cmdBuffer, BottomLevelAccelerationStructurePool& pool, const deUint32& batchStructCount) const1036 void TraceRaysIndirect2Instance::initBottomAccellStructures (VkCommandBuffer						cmdBuffer,
1037 															 BottomLevelAccelerationStructurePool&	pool,
1038 															 const deUint32&						batchStructCount) const
1039 {
1040 	const DeviceInterface&											vkd			= m_context.getDeviceInterface();
1041 	const VkDevice													device		= m_context.getDevice();
1042 	Allocator&														allocator	= m_context.getDefaultAllocator();
1043 
1044 	pool.batchStructCount(batchStructCount);
1045 	pool.batchGeomCount(batchStructCount * 8);
1046 
1047 	tcu::Vec3 v0(0.0, 1.0, 0.0);
1048 	tcu::Vec3 v1(0.0, 0.0, 0.0);
1049 	tcu::Vec3 v2(1.0, 1.0, 0.0);
1050 	tcu::Vec3 v3(1.0, 0.0, 0.0);
1051 
1052 	for (deUint32 z = 0; z < m_imageExtent.depth; ++z)
1053 	for (deUint32 y = 0; y < m_imageExtent.height; ++y)
1054 	for (deUint32 x = 0; x < m_imageExtent.width; ++x)
1055 	{
1056 		// let's build a 3D chessboard of geometries
1057 		if (((x + y + z) % 2) == 0)
1058 			continue;
1059 		tcu::Vec3 xyz((float)x, (float)y, (float)z);
1060 		std::vector<tcu::Vec3>	geometryData;
1061 
1062 		auto bottomLevelAccelerationStructure = pool.add();
1063 		bottomLevelAccelerationStructure->setGeometryCount(1u);
1064 
1065 		geometryData.push_back(xyz + v0);
1066 		geometryData.push_back(xyz + v1);
1067 		geometryData.push_back(xyz + v2);
1068 		geometryData.push_back(xyz + v2);
1069 		geometryData.push_back(xyz + v1);
1070 		geometryData.push_back(xyz + v3);
1071 
1072 		bottomLevelAccelerationStructure->addGeometry(geometryData, true);
1073 	}
1074 
1075 	pool.batchCreate(vkd, device, allocator);
1076 	pool.batchBuild(vkd, device, cmdBuffer);
1077 }
1078 
iterate(void)1079 TestStatus TraceRaysIndirect2Instance::iterate (void)
1080 {
1081 	const InstanceInterface&			vki									= m_context.getInstanceInterface();
1082 	const DeviceInterface&				vkd									= m_context.getDeviceInterface();
1083 	const VkDevice						device								= m_context.getDevice();
1084 	const VkPhysicalDevice				physicalDevice						= m_context.getPhysicalDevice();
1085 	const auto							queueAndFamilyIndex					= getQueueFamilyIndexAtExact(vkd, vki, physicalDevice, device, m_params.submitQueue);
1086 	const VkQueue						queue								= std::get<1>(queueAndFamilyIndex);
1087 	const deUint32						queueFamilyIndex					= std::get<2>(queueAndFamilyIndex);
1088 	Allocator&							allocator							= m_context.getDefaultAllocator();
1089 	const deUint32						width								= m_imageExtent.width;
1090 	const deUint32						height								= m_imageExtent.height;
1091 	const deUint32						depth								= m_imageExtent.depth;
1092 	const deUint32						pixelCount							= width * height * depth;
1093 	const deUint32						shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
1094 	const deUint32						shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
1095 
1096 	Move<VkDescriptorSetLayout>			computeDescriptorSetLayout;
1097 	Move<VkDescriptorPool>				computeDescriptorPool;
1098 	Move<VkDescriptorSet>				computeDescriptorSet;
1099 	Move<VkPipelineLayout>				computePipelineLayout;
1100 	Move<VkShaderModule>				computeShader;
1101 	Move<VkPipeline>					computePipeline;
1102 
1103 	if (m_params.traceType == TraceType::INDIRECT_GPU)
1104 	{
1105 		computeDescriptorSetLayout			= DescriptorSetLayoutBuilder()
1106 													.addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1107 													.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
1108 													.build(vkd, device);
1109 		computeDescriptorPool				= DescriptorPoolBuilder()
1110 													.addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
1111 													.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER)
1112 													.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1113 		const VkPushConstantRange	full	{ VK_SHADER_STAGE_COMPUTE_BIT, 0, deUint32(sizeof(deUint32)) };
1114 		computeDescriptorSet				= makeDescriptorSet(vkd, device, *computeDescriptorPool, *computeDescriptorSetLayout);
1115 		computePipelineLayout				= makePipelineLayout(vkd, device, 1, &computeDescriptorSetLayout.get(), 1, &full);
1116 
1117 		computeShader						= createShaderModule(vkd, device, m_context.getBinaryCollection().get("compute_indirect_command"), 0);
1118 		const VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
1119 		{
1120 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType;
1121 			DE_NULL,												// const void*							pNext;
1122 			VkPipelineShaderStageCreateFlags(0u),					// VkPipelineShaderStageCreateFlags		flags;
1123 			VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits				stage;
1124 			*computeShader,											// VkShaderModule						module;
1125 			"main",													// const char*							pName;
1126 			DE_NULL,												// const VkSpecializationInfo*			pSpecializationInfo;
1127 		};
1128 		const VkComputePipelineCreateInfo pipelineCreateInfo =
1129 		{
1130 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,		// VkStructureType					sType;
1131 			DE_NULL,											// const void*						pNext;
1132 			VkPipelineCreateFlags(0u),							// VkPipelineCreateFlags			flags;
1133 			pipelineShaderStageParams,							// VkPipelineShaderStageCreateInfo	stage;
1134 			*computePipelineLayout,								// VkPipelineLayout					layout;
1135 			DE_NULL,											// VkPipeline						basePipelineHandle;
1136 			0,													// deInt32							basePipelineIndex;
1137 		};
1138 
1139 		computePipeline = vk::createComputePipeline(vkd, device, (VkPipelineCache)0u, &pipelineCreateInfo);
1140 	}
1141 
1142 	const Move<VkDescriptorSetLayout>	descriptorSetLayout					= DescriptorSetLayoutBuilder()
1143 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
1144 																					.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
1145 																					.build(vkd, device);
1146 	const Move<VkDescriptorPool>		descriptorPool						= DescriptorPoolBuilder()
1147 																					.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
1148 																					.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
1149 																					.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1150 	const Move<VkDescriptorSet>			descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
1151 	const Move<VkPipelineLayout>		pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
1152 
1153 	de::MovePtr<RayTracingPipeline>		rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
1154 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0), 0);
1155 	rayTracingPipeline->addShader(VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,	createShaderModule(vkd, device, m_context.getBinaryCollection().get("chit"), 0), 1);
1156 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			createShaderModule(vkd, device, m_context.getBinaryCollection().get("miss"), 0), 2);
1157 	Move<VkPipeline>					pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
1158 
1159 	const de::MovePtr<BufferWithMemory>	rgenSbt								= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0, 1 );
1160 	const de::MovePtr<BufferWithMemory>	hitSbt								= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1, 1 );
1161 	const de::MovePtr<BufferWithMemory>	missSbt								= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 2, 1 );
1162 
1163 	const VkFormat						imageFormat							= VK_FORMAT_R32_UINT;
1164 	const VkImageCreateInfo				imageCreateInfo						= makeImageCreateInfo(width, height, depth, imageFormat);
1165 	const VkImageSubresourceRange		imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
1166 	const de::MovePtr<ImageWithMemory>	image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
1167 	const Move<VkImageView>				imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_3D, imageFormat, imageSubresourceRange);
1168 
1169 	const VkBufferCreateInfo			resultBufferCreateInfo				= makeBufferCreateInfo(pixelCount*sizeof(deUint32), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1170 	const VkImageSubresourceLayers		resultBufferImageSubresourceLayers	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1171 	const VkBufferImageCopy				resultBufferImageRegion				= makeBufferImageCopy(m_params.traceDimensions, resultBufferImageSubresourceLayers);
1172 	de::MovePtr<BufferWithMemory>		resultBuffer						= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, resultBufferCreateInfo, MemoryRequirement::HostVisible));
1173 	Allocation&							resultBufferAllocation				= resultBuffer->getAllocation();
1174 
1175 	const VkDescriptorImageInfo			descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
1176 
1177 	// create indirect command buffer and fill it with parameter values
1178 	const VkDeviceSize					bufferSize							= sizeof(VkTraceRaysIndirectCommand2KHR);
1179 	de::MovePtr<BufferWithMemory>		indirectBuffer;
1180 	de::MovePtr<BufferWithMemory>		uniformBuffer;
1181 
1182 	const bool							indirectGpu							= (m_params.traceType == TraceType::INDIRECT_GPU);
1183 	VkBufferUsageFlags					indirectBufferUsageFlags			= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | ( indirectGpu ? VK_BUFFER_USAGE_STORAGE_BUFFER_BIT : VK_BUFFER_USAGE_TRANSFER_DST_BIT );
1184 	const VkBufferCreateInfo			indirectBufferCreateInfo			= makeBufferCreateInfo(bufferSize, indirectBufferUsageFlags);
1185 	vk::MemoryRequirement				indirectBufferMemoryRequirement		= MemoryRequirement::DeviceAddress | MemoryRequirement::HostVisible;
1186 	indirectBuffer															= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, indirectBufferCreateInfo, indirectBufferMemoryRequirement));
1187 
1188 	if (m_params.traceType == TraceType::INDIRECT_GPU)
1189 	{
1190 		const VkBufferCreateInfo			uniformBufferCreateInfo			= makeBufferCreateInfo(bufferSize, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1191 		uniformBuffer														= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, uniformBufferCreateInfo, MemoryRequirement::HostVisible));
1192 		makeIndirectStructAndFlush(*uniformBuffer, true, *rgenSbt, *hitSbt, *missSbt, *missSbt);
1193 		makeIndirectStructAndFlush(*indirectBuffer, false, *rgenSbt, *hitSbt, *missSbt, *missSbt);
1194 	}
1195 	else if (m_params.traceType == TraceType::INDIRECT_CPU)
1196 	{
1197 		makeIndirectStructAndFlush(*indirectBuffer, true, *rgenSbt, *hitSbt, *missSbt, *missSbt);
1198 	}
1199 	else
1200 	{
1201 		TCU_THROW(NotSupportedError, "Invalid test parameters");
1202 	}
1203 
1204 	de::MovePtr<TopLevelAccelerationStructure>	topLevelAccelerationStructure;
1205 	BottomLevelAccelerationStructurePool	blasPool;
1206 	const Move<VkCommandPool>				cmdPool							= createCommandPool(vkd, device, 0, queueFamilyIndex);
1207 	const Move<VkCommandBuffer>				cmdBuffer						= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1208 
1209 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
1210 	{
1211 		const VkImageMemoryBarrier			preImageBarrier						= makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
1212 																					VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1213 																					**image, imageSubresourceRange);
1214 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
1215 
1216 		const VkClearValue					clearValue							= makeClearValueColorU32(kClearColorValue, 0u, 0u, 0u);
1217 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
1218 
1219 		const VkImageMemoryBarrier			postImageBarrier					= makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR,
1220 																					VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
1221 																					**image, imageSubresourceRange);
1222 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, &postImageBarrier);
1223 
1224 
1225 		initBottomAccellStructures(*cmdBuffer, blasPool, 4);
1226 		topLevelAccelerationStructure		= initTopAccelerationStructure(*cmdBuffer, blasPool.structures(), m_context, m_imageExtent);
1227 
1228 		if (m_params.traceType == TraceType::INDIRECT_GPU)
1229 		{
1230 			const deUint32					fullCopyStyle					= m_params.partialCopy ? 0 : 1;
1231 			const VkDescriptorBufferInfo	uniformBufferDescriptorInfo		= makeDescriptorBufferInfo(**uniformBuffer, 0ull, bufferSize);
1232 			const VkDescriptorBufferInfo	indirectBufferDescriptorInfo	= makeDescriptorBufferInfo(**indirectBuffer, 0ull, bufferSize);
1233 			DescriptorSetUpdateBuilder()
1234 				.writeSingle(*computeDescriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &uniformBufferDescriptorInfo)
1235 				.writeSingle(*computeDescriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &indirectBufferDescriptorInfo)
1236 				.update(vkd, device);
1237 
1238 			vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
1239 			vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipelineLayout, 0u, 1u, &computeDescriptorSet.get(), 0u, DE_NULL);
1240 			vkd.cmdPushConstants(*cmdBuffer, *computePipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, deUint32(sizeof(deUint32)), &fullCopyStyle);
1241 			vkd.cmdDispatch(*cmdBuffer, 1, 1, 1);
1242 
1243 			const VkBufferMemoryBarrier		fillIndirectBufferMemoryBarrier	= makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT,
1244 																				**indirectBuffer, 0ull, bufferSize);
1245 			cmdPipelineBufferMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT, &fillIndirectBufferMemoryBarrier);
1246 		}
1247 
1248 		const TopLevelAccelerationStructure*			topLevelAccelerationStructurePtr		= topLevelAccelerationStructure.get();
1249 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
1250 		{
1251 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
1252 			DE_NULL,															//  const void*							pNext;
1253 			1u,																	//  deUint32							accelerationStructureCount;
1254 			topLevelAccelerationStructurePtr->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
1255 		};
1256 
1257 		DescriptorSetUpdateBuilder()
1258 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
1259 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
1260 			.update(vkd, device);
1261 
1262 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
1263 
1264 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
1265 
1266 		cmdTraceRaysIndirect2(vkd, *cmdBuffer, getBufferDeviceAddress(vkd, device, **indirectBuffer, 0));
1267 
1268 		const VkMemoryBarrier							postTraceMemoryBarrier					= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
1269 		const VkMemoryBarrier							postCopyMemoryBarrier					= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1270 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
1271 
1272 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **resultBuffer, 1u, &resultBufferImageRegion);
1273 
1274 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
1275 	}
1276 	endCommandBuffer(vkd, *cmdBuffer);
1277 
1278 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
1279 
1280 	invalidateMappedMemoryRange(vkd, device, resultBufferAllocation.getMemory(), resultBufferAllocation.getOffset(), VK_WHOLE_SIZE);
1281 
1282 	// run test using arrays of pointers
1283 	const deUint32*						bufferPtr	= (deUint32*)resultBufferAllocation.getHostPtr();
1284 	const bool							noWrites	= isNullExtent(m_params.traceDimensions);
1285 
1286 	const auto							allocationCount	= blasPool.getAllocationCount();
1287 	deUint32							failures		= 0;
1288 	deUint32							pos				= 0;
1289 	deUint32							all				= 0;
1290 
1291 	// verify results
1292 	for (deUint32 z = 0; z < depth; ++z)
1293 	for (deUint32 y = 0; y < height; ++y)
1294 	for (deUint32 x = 0; x < width; ++x)
1295 	{
1296 		const deUint32 expectedResult = (noWrites ? kClearColorValue : (((x + y + z) % 2) ? kHitColorValue : kMissColorValue));
1297 		if (bufferPtr[pos] != expectedResult)
1298 			failures++;
1299 		++pos;
1300 		++all;
1301 	}
1302 
1303 	if (failures == 0)
1304 		return tcu::TestStatus::pass(std::to_string(allocationCount) +" allocations");
1305 	else
1306 	{
1307 		const auto msg = std::to_string(allocationCount) +" allocations, " + std::to_string(failures) + " failures from " + std::to_string(all);
1308 		return tcu::TestStatus::fail(msg);
1309 	}
1310 }
1311 
makeDimensionsName(const VkTraceRaysIndirectCommandKHR& cmd)1312 std::string makeDimensionsName (const VkTraceRaysIndirectCommandKHR& cmd)
1313 {
1314 	std::ostringstream name;
1315 	name << cmd.width << "_" << cmd.height << "_" << cmd.depth;
1316 	return name.str();
1317 }
1318 
makeDimensionsName(const VkExtent3D& extent)1319 std::string makeDimensionsName (const VkExtent3D& extent)
1320 {
1321 	std::ostringstream name;
1322 	name << extent.width << "x" << extent.height << "x" << extent.depth;
1323 	return name.str();
1324 }
1325 
1326 }	// anonymous
1327 
createTraceRaysTests(tcu::TestContext& testCtx)1328 tcu::TestCaseGroup* createTraceRaysTests(tcu::TestContext& testCtx)
1329 {
1330 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "trace_rays_cmds", "Tests veryfying vkCmdTraceRays* commands"));
1331 
1332 	struct BufferSourceTypeData
1333 	{
1334 		TraceType								traceType;
1335 		const char*								name;
1336 	} bufferSourceTypes[] =
1337 	{
1338 		{ TraceType::DIRECT,		"direct"			},
1339 		{ TraceType::INDIRECT_CPU,	"indirect_cpu"		},
1340 		{ TraceType::INDIRECT_GPU,	"indirect_gpu"		},
1341 	};
1342 
1343 	const VkTraceRaysIndirectCommandKHR traceDimensions[] =
1344 	{
1345 		{  0,  0, 0 },
1346 		{  0,  1, 1 },
1347 		{  1,  0, 1 },
1348 		{  1,  1, 0 },
1349 		{  8,  1, 1 },
1350 		{  8,  8, 1 },
1351 		{  8,  8, 8 },
1352 		{ 11,  1, 1 },
1353 		{ 11, 13, 1 },
1354 		{ 11, 13, 5 },
1355 	};
1356 
1357 	for (size_t bufferSourceNdx = 0; bufferSourceNdx < DE_LENGTH_OF_ARRAY(bufferSourceTypes); ++bufferSourceNdx)
1358 	{
1359 		de::MovePtr<tcu::TestCaseGroup> bufferSourceGroup(new tcu::TestCaseGroup(group->getTestContext(), bufferSourceTypes[bufferSourceNdx].name, ""));
1360 
1361 		for (size_t traceDimensionsIdx = 0; traceDimensionsIdx < DE_LENGTH_OF_ARRAY(traceDimensions); ++traceDimensionsIdx)
1362 		{
1363 			TestParams testParams
1364 			{
1365 				bufferSourceTypes[bufferSourceNdx].traceType,
1366 				traceDimensions[traceDimensionsIdx],
1367 				false,
1368 				{/* Intentionally empty */},
1369 			};
1370 			const auto testName = makeDimensionsName(traceDimensions[traceDimensionsIdx]);
1371 			bufferSourceGroup->addChild(new RayTracingTraceRaysIndirectTestCase(group->getTestContext(), testName.c_str(), "", testParams));
1372 		}
1373 
1374 		group->addChild(bufferSourceGroup.release());
1375 	}
1376 
1377 	return group.release();
1378 }
1379 
createTraceRaysMaintenance1Tests(tcu::TestContext& testCtx)1380 tcu::TestCaseGroup* createTraceRaysMaintenance1Tests(tcu::TestContext& testCtx)
1381 {
1382 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "trace_rays_cmds_maintenance_1", "Tests veryfying vkCmdTraceRays* commands"));
1383 
1384 	struct BufferSourceTypeData
1385 	{
1386 		TraceType								traceType;
1387 		const char*								name;
1388 	} bufferSourceTypes[] =
1389 	{
1390 		{ TraceType::INDIRECT2_CPU,	"indirect2_cpu"		},
1391 		{ TraceType::INDIRECT2_GPU,	"indirect2_gpu"		},
1392 	};
1393 
1394 	const VkTraceRaysIndirectCommand2KHR extendedTraceDimensions[] =
1395 	{
1396 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0, 0 },
1397 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  1, 1 },
1398 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  1,  0, 1 },
1399 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  1,  1, 0 },
1400 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  8,  1, 1 },
1401 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  8,  8, 1 },
1402 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  8,  8, 8 },
1403 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11,  1, 1 },
1404 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 13, 1 },
1405 		{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 13, 5 },
1406 	};
1407 
1408 	for (size_t bufferSourceNdx = 0; bufferSourceNdx < DE_LENGTH_OF_ARRAY(bufferSourceTypes); ++bufferSourceNdx)
1409 	{
1410 		de::MovePtr<tcu::TestCaseGroup> bufferSourceGroup(new tcu::TestCaseGroup(group->getTestContext(), bufferSourceTypes[bufferSourceNdx].name, ""));
1411 
1412 		for (size_t extendedTraceDimensionsIdx = 0; extendedTraceDimensionsIdx < DE_LENGTH_OF_ARRAY(extendedTraceDimensions); ++extendedTraceDimensionsIdx)
1413 		{
1414 			TestParams testParams
1415 			{
1416 				bufferSourceTypes[bufferSourceNdx].traceType,
1417 				{/* Intentionally empty */},
1418 				true,
1419 				extendedTraceDimensions[extendedTraceDimensionsIdx],
1420 			};
1421 			const auto testName = makeDimensionsName(extendedTraceDimensions[extendedTraceDimensionsIdx]);
1422 			bufferSourceGroup->addChild(new RayTracingTraceRaysIndirectTestCase(group->getTestContext(), testName.c_str(), "", testParams));
1423 		}
1424 
1425 		group->addChild(bufferSourceGroup.release());
1426 	}
1427 
1428 	return group.release();
1429 }
1430 
createTraceRays2Tests(tcu::TestContext& testCtx)1431 tcu::TestCaseGroup*	createTraceRays2Tests(tcu::TestContext& testCtx)
1432 {
1433 	auto group	= new tcu::TestCaseGroup(testCtx, "trace_rays_indirect2", "Tests veryfying vkCmdTraceRaysIndirect2KHR command");
1434 
1435 	std::pair<TraceType, const char*> const	bufferSources[]
1436 	{
1437 		{ TraceType::INDIRECT_CPU,	"indirect_cpu"		},
1438 		{ TraceType::INDIRECT_GPU,	"indirect_gpu"		},
1439 	};
1440 
1441 	std::pair<bool, const char*> const copyStyles[]
1442 	{
1443 		{ true,		"full_copy"	},
1444 		{ false,	"partial_copy" }
1445 	};
1446 
1447 	std::pair<VkQueueFlagBits, const char*> submitQueues[]
1448 	{
1449 		{ VK_QUEUE_GRAPHICS_BIT,	"submit_graphics" },
1450 		{ VK_QUEUE_COMPUTE_BIT,		"submit_compute" }
1451 	};
1452 
1453 	const VkExtent3D traceDimensions[] =
1454 	{
1455 		{ 11, 17, 1 },
1456 		{ 19, 11, 2 },
1457 		{ 23, 47, 3 },
1458 		{ 47, 19, 4 }
1459 	};
1460 
1461 	for (const auto& bufferSource : bufferSources)
1462 	{
1463 		auto bufferSourceGroup	= new TestCaseGroup(testCtx, bufferSource.second, "");
1464 
1465 		for (const auto& copyStyle : copyStyles)
1466 		{
1467 			auto copyStyleGroup	= new TestCaseGroup(testCtx, copyStyle.second, "");
1468 
1469 			for (const auto& submitQueue : submitQueues)
1470 			{
1471 				auto submitQueueGroup = new TestCaseGroup(testCtx, submitQueue.second, "");
1472 
1473 				for (const auto& traceDimension : traceDimensions)
1474 				{
1475 					TestParams2 testParams
1476 					{
1477 						bufferSource.first,
1478 						traceDimension,
1479 						copyStyle.first,
1480 						submitQueue.first
1481 					};
1482 					const auto testName = makeDimensionsName(traceDimension);
1483 					submitQueueGroup->addChild(new TraceRaysIndirect2Case(testCtx, testName.c_str(), testParams));
1484 				}
1485 				copyStyleGroup->addChild(submitQueueGroup);
1486 			}
1487 			bufferSourceGroup->addChild(copyStyleGroup);
1488 		}
1489 		group->addChild(bufferSourceGroup);
1490 	}
1491 
1492 	return group;
1493 }
1494 
1495 }	// RayTracing
1496 
1497 }	// vkt
1498