1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *	  http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Ray Tracing Build tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktRayTracingBuildTests.hpp"
25 
26 #include "vkDefs.hpp"
27 
28 #include "vktTestCase.hpp"
29 #include "vkCmdUtil.hpp"
30 #include "vkObjUtil.hpp"
31 #include "vkBuilderUtil.hpp"
32 #include "vkBarrierUtil.hpp"
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkImageUtil.hpp"
36 #include "vkTypeUtil.hpp"
37 
38 #include "tcuTextureUtil.hpp"
39 
40 #include "vkRayTracingUtil.hpp"
41 
42 #include "deClock.h"
43 
44 #include <cmath>
45 #include <limits>
46 #include <iostream>
47 
48 namespace vkt
49 {
50 namespace RayTracing
51 {
52 namespace
53 {
54 using namespace vk;
55 using namespace std;
56 
57 static const VkFlags	ALL_RAY_TRACING_STAGES	= VK_SHADER_STAGE_RAYGEN_BIT_KHR
58 												| VK_SHADER_STAGE_ANY_HIT_BIT_KHR
59 												| VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR
60 												| VK_SHADER_STAGE_MISS_BIT_KHR
61 												| VK_SHADER_STAGE_INTERSECTION_BIT_KHR
62 												| VK_SHADER_STAGE_CALLABLE_BIT_KHR;
63 
64 enum TestType
65 {
66 	TEST_TYPE_TRIANGLES,
67 	TEST_TYPE_AABBS,
68 	TEST_TYPE_MIXED,
69 };
70 
71 struct CaseDef
72 {
73 	TestType	testType;
74 	deUint32	width;
75 	deUint32	height;
76 	deUint32	squaresGroupCount;
77 	deUint32	geometriesGroupCount;
78 	deUint32	instancesGroupCount;
79 	bool		deferredOperation;
80 	deUint32	workerThreadsCount;
81 	bool		deviceBuild;
82 };
83 
getShaderGroupSize(const InstanceInterface& vki, const VkPhysicalDevice physicalDevice)84 deUint32 getShaderGroupSize (const InstanceInterface&	vki,
85 							 const VkPhysicalDevice		physicalDevice)
86 {
87 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
88 
89 	rayTracingPropertiesKHR	= makeRayTracingProperties(vki, physicalDevice);
90 	return rayTracingPropertiesKHR->getShaderGroupHandleSize();
91 }
92 
getShaderGroupBaseAlignment(const InstanceInterface& vki, const VkPhysicalDevice physicalDevice)93 deUint32 getShaderGroupBaseAlignment (const InstanceInterface&	vki,
94 									  const VkPhysicalDevice	physicalDevice)
95 {
96 	de::MovePtr<RayTracingProperties>	rayTracingPropertiesKHR;
97 
98 	rayTracingPropertiesKHR = makeRayTracingProperties(vki, physicalDevice);
99 	return rayTracingPropertiesKHR->getShaderGroupBaseAlignment();
100 }
101 
makeImageCreateInfo(deUint32 width, deUint32 height, VkFormat format)102 VkImageCreateInfo makeImageCreateInfo (deUint32 width, deUint32 height, VkFormat format)
103 {
104 	const VkImageUsageFlags	usage			= VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
105 	const VkImageCreateInfo	imageCreateInfo	=
106 	{
107 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType			sType;
108 		DE_NULL,								// const void*				pNext;
109 		(VkImageCreateFlags)0u,					// VkImageCreateFlags		flags;
110 		VK_IMAGE_TYPE_2D,						// VkImageType				imageType;
111 		format,									// VkFormat					format;
112 		makeExtent3D(width, height, 1u),		// VkExtent3D				extent;
113 		1u,										// deUint32					mipLevels;
114 		1u,										// deUint32					arrayLayers;
115 		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
116 		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
117 		usage,									// VkImageUsageFlags		usage;
118 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
119 		0u,										// deUint32					queueFamilyIndexCount;
120 		DE_NULL,								// const deUint32*			pQueueFamilyIndices;
121 		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
122 	};
123 
124 	return imageCreateInfo;
125 }
126 
127 class RayTracingBuildTestInstance : public TestInstance
128 {
129 public:
130 	typedef de::SharedPtr<BottomLevelAccelerationStructure> BlasPtr;
131 	typedef de::SharedPtr<TopLevelAccelerationStructure>	TlasPtr;
132 	typedef BottomLevelAccelerationStructurePool			BlasPool;
133 
134 									RayTracingBuildTestInstance			(Context& context, const CaseDef& data);
135 									~RayTracingBuildTestInstance		(void);
136 	tcu::TestStatus					iterate								(void);
137 
138 protected:
139 	bool							verifyAllocationCount				() const;
140 	void							checkSupportInInstance				(void) const;
141 	deUint32						validateBuffer						(de::MovePtr<BufferWithMemory>		buffer);
142 	de::MovePtr<BufferWithMemory>	runTest								(bool								useGpuBuild,
143 																		 deUint32							workerThreadsCount);
144 	TlasPtr							initTopAccelerationStructure		(bool								useGpuBuild,
145 																		 deUint32							workerThreadsCount,
146 																		 const BlasPool&					pool);
147 	void							createTopAccelerationStructure		(VkCommandBuffer					cmdBuffer,
148 																		 TopLevelAccelerationStructure*		tlas);
149 	void							initBottomAccelerationStructures	(BlasPool&							pool,
150 																		 bool								useGpuBuild,
151 																		 deUint32							workerThreadsCount) const;
152 	void							initBottomAccelerationStructure		(BlasPtr							blas,
153 																		 bool								useGpuBuild,
154 																		 deUint32							workerThreadsCount,
155 																		 tcu::UVec2&						startPos,
156 																		 bool								triangles) const;
157 
158 private:
159 	CaseDef							m_data;
160 	const VkFormat					m_format;
161 };
162 
RayTracingBuildTestInstance(Context& context, const CaseDef& data)163 RayTracingBuildTestInstance::RayTracingBuildTestInstance (Context& context, const CaseDef& data)
164 	: vkt::TestInstance		(context)
165 	, m_data				(data)
166 	, m_format				(VK_FORMAT_R32_UINT)
167 {
168 }
169 
~RayTracingBuildTestInstance(void)170 RayTracingBuildTestInstance::~RayTracingBuildTestInstance (void)
171 {
172 }
173 
174 class RayTracingTestCase : public TestCase
175 {
176 	public:
177 							RayTracingTestCase	(tcu::TestContext& context, const char* name, const char* desc, const CaseDef data);
178 							~RayTracingTestCase	(void);
179 
180 	virtual	void			initPrograms		(SourceCollections& programCollection) const;
181 	virtual TestInstance*	createInstance		(Context& context) const;
182 	virtual void			checkSupport		(Context& context) const;
183 
184 private:
185 	CaseDef					m_data;
186 };
187 
RayTracingTestCase(tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)188 RayTracingTestCase::RayTracingTestCase (tcu::TestContext& context, const char* name, const char* desc, const CaseDef data)
189 	: vkt::TestCase	(context, name, desc)
190 	, m_data		(data)
191 {
192 	DE_ASSERT((m_data.width * m_data.height) == (m_data.squaresGroupCount * m_data.geometriesGroupCount * m_data.instancesGroupCount));
193 }
194 
~RayTracingTestCase(void)195 RayTracingTestCase::~RayTracingTestCase	(void)
196 {
197 }
198 
checkSupport(Context& context) const199 void RayTracingTestCase::checkSupport (Context& context) const
200 {
201 	context.requireDeviceFunctionality("VK_KHR_acceleration_structure");
202 	context.requireDeviceFunctionality("VK_KHR_ray_tracing_pipeline");
203 
204 	const VkPhysicalDeviceRayTracingPipelineFeaturesKHR&	rayTracingPipelineFeaturesKHR		= context.getRayTracingPipelineFeatures();
205 	if (rayTracingPipelineFeaturesKHR.rayTracingPipeline == DE_FALSE )
206 		TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceRayTracingPipelineFeaturesKHR.rayTracingPipeline");
207 
208 	const VkPhysicalDeviceAccelerationStructureFeaturesKHR&	accelerationStructureFeaturesKHR	= context.getAccelerationStructureFeatures();
209 	if (accelerationStructureFeaturesKHR.accelerationStructure == DE_FALSE)
210 		TCU_THROW(TestError, "VK_KHR_ray_tracing_pipeline requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructure");
211 
212 	if (!m_data.deviceBuild)
213 	{
214 		context.requireDeviceFunctionality("VK_KHR_deferred_host_operations");
215 		if (accelerationStructureFeaturesKHR.accelerationStructureHostCommands == DE_FALSE)
216 			TCU_THROW(NotSupportedError, "Requires VkPhysicalDeviceAccelerationStructureFeaturesKHR.accelerationStructureHostCommands");
217 	}
218 }
219 
initPrograms(SourceCollections& programCollection) const220 void RayTracingTestCase::initPrograms (SourceCollections& programCollection) const
221 {
222 	const vk::ShaderBuildOptions	buildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_4, 0u, true);
223 	{
224 		std::stringstream css;
225 		css <<
226 			"#version 460 core\n"
227 			"#extension GL_EXT_ray_tracing : require\n"
228 			"layout(location = 0) rayPayloadInEXT vec3 hitValue;\n"
229 			"hitAttributeEXT vec3 attribs;\n"
230 			"layout(r32ui, set = 0, binding = 0) uniform uimage2D result;\n"
231 			"void main()\n"
232 			"{\n"
233 			"  uvec4 color = uvec4(1,0,0,1);\n"
234 			"  imageStore(result, ivec2(gl_LaunchIDEXT.xy), color);\n"
235 			"}\n";
236 
237 		programCollection.glslSources.add("ahit") << glu::AnyHitSource(updateRayTracingGLSL(css.str())) << buildOptions;
238 	}
239 
240 	{
241 		std::stringstream css;
242 		css <<
243 			"#version 460 core\n"
244 			"#extension GL_EXT_ray_tracing : require\n"
245 			"layout(location = 0) rayPayloadInEXT dummyPayload { vec4 dummy; };\n"
246 			"layout(r32ui, set = 0, binding = 0) uniform uimage2D result;\n"
247 			"void main()\n"
248 			"{\n"
249 			"  uvec4 color = uvec4(2,0,0,1);\n"
250 			"  imageStore(result, ivec2(gl_LaunchIDEXT.xy), color);\n"
251 			"}\n";
252 
253 		programCollection.glslSources.add("miss") << glu::MissSource(updateRayTracingGLSL(css.str())) << buildOptions;
254 	}
255 
256 	{
257 		std::stringstream css;
258 		css <<
259 			"#version 460 core\n"
260 			"#extension GL_EXT_ray_tracing : require\n"
261 			"hitAttributeEXT vec3 hitAttribute;\n"
262 			"void main()\n"
263 			"{\n"
264 			"  reportIntersectionEXT(1.0f, 0);\n"
265 			"}\n";
266 
267 		programCollection.glslSources.add("sect") << glu::IntersectionSource(updateRayTracingGLSL(css.str())) << buildOptions;
268 	}
269 
270 	programCollection.glslSources.add("rgen") << glu::RaygenSource(updateRayTracingGLSL(getCommonRayGenerationShader())) << buildOptions;
271 }
272 
createInstance(Context& context) const273 TestInstance* RayTracingTestCase::createInstance (Context& context) const
274 {
275 	return new RayTracingBuildTestInstance(context, m_data);
276 }
277 
278 auto RayTracingBuildTestInstance::initTopAccelerationStructure (bool			useGpuBuild,
279 																deUint32		workerThreadsCount,
280 																const BlasPool&	pool) -> TlasPtr
281 {
282 	de::MovePtr<TopLevelAccelerationStructure>	result		= makeTopLevelAccelerationStructure();
283 	const std::vector<BlasPtr>&					blases		= pool.structures();
284 
285 	result->setInstanceCount(blases.size());
286 	result->setBuildType(useGpuBuild ? VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR : VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR);
287 	result->setDeferredOperation(m_data.deferredOperation, workerThreadsCount);
288 
289 	for (size_t instanceNdx = 0; instanceNdx < blases.size(); ++instanceNdx)
290 	{
291 		const bool	triangles								= (m_data.testType == TEST_TYPE_TRIANGLES) || (m_data.testType == TEST_TYPE_MIXED && (instanceNdx & 1) == 0);
292 		deUint32	instanceShaderBindingTableRecordOffset	= triangles ? 0 : 1;
293 
294 		result->addInstance(blases[instanceNdx], vk::identityMatrix3x4, 0, 0xFF, instanceShaderBindingTableRecordOffset);
295 	}
296 
297 	return TlasPtr(result.release());
298 }
299 
createTopAccelerationStructure(VkCommandBuffer cmdBuffer, TopLevelAccelerationStructure* tlas)300 void RayTracingBuildTestInstance::createTopAccelerationStructure (VkCommandBuffer					cmdBuffer,
301 																  TopLevelAccelerationStructure*	tlas)
302 {
303 	const DeviceInterface&						vkd			= m_context.getDeviceInterface();
304 	const VkDevice								device		= m_context.getDevice();
305 	Allocator&									allocator	= m_context.getDefaultAllocator();
306 
307 	tlas->createAndBuild(vkd, device, cmdBuffer, allocator);
308 }
309 
initBottomAccelerationStructure(BlasPtr blas, bool useGpuBuild, deUint32 workerThreadsCount, tcu::UVec2& startPos, bool triangles) const310 void RayTracingBuildTestInstance::initBottomAccelerationStructure (BlasPtr		blas,
311 																   bool			useGpuBuild,
312 																   deUint32		workerThreadsCount,
313 																   tcu::UVec2&	startPos,
314 																   bool			triangles) const
315 {
316 	blas->setBuildType(useGpuBuild ? VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR : VK_ACCELERATION_STRUCTURE_BUILD_TYPE_HOST_KHR);
317 	blas->setDeferredOperation(m_data.deferredOperation, workerThreadsCount);
318 	blas->setGeometryCount(m_data.geometriesGroupCount);
319 
320 	for (size_t geometryNdx = 0; geometryNdx < m_data.geometriesGroupCount; ++geometryNdx)
321 	{
322 		std::vector<tcu::Vec3>	geometryData;
323 
324 		geometryData.reserve(m_data.squaresGroupCount * (triangles ? 3u : 2u));
325 
326 		for (size_t squareNdx = 0; squareNdx < m_data.squaresGroupCount; ++squareNdx)
327 		{
328 			const deUint32	n	= m_data.width * startPos.y() + startPos.x();
329 			const float		x0	= float(startPos.x() + 0) / float(m_data.width);
330 			const float		y0	= float(startPos.y() + 0) / float(m_data.height);
331 			const float		x1	= float(startPos.x() + 1) / float(m_data.width);
332 			const float		y1	= float(startPos.y() + 1) / float(m_data.height);
333 			const float		z	= (n % 7 == 0) ? +1.0f : -1.0f;
334 			const deUint32	m	= (n + 13) % (m_data.width * m_data.height);
335 
336 			if (triangles)
337 			{
338 				const float	xm	= (x0 + x1) / 2.0f;
339 				const float	ym	= (y0 + y1) / 2.0f;
340 
341 				geometryData.push_back(tcu::Vec3(x0, y0, z));
342 				geometryData.push_back(tcu::Vec3(x1, ym, z));
343 				geometryData.push_back(tcu::Vec3(xm, y1, z));
344 			}
345 			else
346 			{
347 				geometryData.push_back(tcu::Vec3(x0, y0, z));
348 				geometryData.push_back(tcu::Vec3(x1, y1, z));
349 			}
350 
351 			startPos.y() = m / m_data.width;
352 			startPos.x() = m % m_data.width;
353 		}
354 
355 		blas->addGeometry(geometryData, triangles);
356 	}
357 }
358 
initBottomAccelerationStructures(BlasPool& pool, bool useGpuBuild, deUint32 workerThreadsCount) const359 void RayTracingBuildTestInstance::initBottomAccelerationStructures	(BlasPool&	pool,
360 																	 bool		useGpuBuild,
361 																	 deUint32	workerThreadsCount) const
362 {
363 	tcu::UVec2					startPos	{};
364 	const DeviceInterface&		vkd			= m_context.getDeviceInterface();
365 	const VkDevice				device		= m_context.getDevice();
366 	Allocator&					allocator	= m_context.getDefaultAllocator();
367 	const VkDeviceSize			maxBuffSize	= 3 * (VkDeviceSize(1) << 30); // 3GB
368 
369 	for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)	pool.add();
370 
371 	const std::vector<BlasPtr>&	blases		= pool.structures();
372 
373 	for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)
374 	{
375 		const bool	triangles	= (m_data.testType == TEST_TYPE_TRIANGLES) || (m_data.testType == TEST_TYPE_MIXED && (instanceNdx & 1) == 0);
376 		initBottomAccelerationStructure(blases[instanceNdx], useGpuBuild, workerThreadsCount, startPos, triangles);
377 	}
378 
379 	pool.batchCreateAdjust(vkd, device, allocator, maxBuffSize);
380 }
381 
verifyAllocationCount() const382 bool RayTracingBuildTestInstance::verifyAllocationCount () const
383 {
384 	BlasPool					pool					{};
385 	tcu::UVec2					startPos				{};
386 	const DeviceInterface&		vkd						= m_context.getDeviceInterface();
387 	const VkDevice				device					= m_context.getDevice();
388 	auto&						log						= m_context.getTestContext().getLog();
389 	const size_t				avvailableAllocCount	= m_context.getDeviceProperties().limits.maxMemoryAllocationCount;
390 	const VkDeviceSize			maxBufferSize			= 3 * (VkDeviceSize(1) << 30); // 3GB
391 
392 
393 	for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)	pool.add();
394 
395 	const std::vector<BlasPtr>&	blases		= pool.structures();
396 
397 	for (size_t instanceNdx = 0; instanceNdx < m_data.instancesGroupCount; ++instanceNdx)
398 	{
399 		const bool	triangles	= (m_data.testType == TEST_TYPE_TRIANGLES) || (m_data.testType == TEST_TYPE_MIXED && (instanceNdx & 1) == 0);
400 		initBottomAccelerationStructure(blases[instanceNdx], true, 0, startPos, triangles);
401 	}
402 
403 	const size_t	poolAllocationCount		= pool.getAllocationCount(vkd, device, maxBufferSize);
404 	const size_t	requiredAllocationCount = poolAllocationCount + 120;
405 
406 	log << tcu::TestLog::Message
407 		<< "The test consumes " << poolAllocationCount
408 		<< " allocations out of " << avvailableAllocCount << " available"
409 		<< tcu::TestLog::EndMessage;
410 
411 	return (requiredAllocationCount < avvailableAllocCount);
412 }
413 
runTest(bool useGpuBuild, deUint32 workerThreadsCount)414 de::MovePtr<BufferWithMemory> RayTracingBuildTestInstance::runTest (bool useGpuBuild, deUint32 workerThreadsCount)
415 {
416 	const InstanceInterface&					vki									= m_context.getInstanceInterface();
417 	const DeviceInterface&						vkd									= m_context.getDeviceInterface();
418 	const VkDevice								device								= m_context.getDevice();
419 	const VkPhysicalDevice						physicalDevice						= m_context.getPhysicalDevice();
420 	const deUint32								queueFamilyIndex					= m_context.getUniversalQueueFamilyIndex();
421 	const VkQueue								queue								= m_context.getUniversalQueue();
422 	Allocator&									allocator							= m_context.getDefaultAllocator();
423 	const deUint32								pixelCount							= m_data.width * m_data.height;
424 	const deUint32								shaderGroupHandleSize				= getShaderGroupSize(vki, physicalDevice);
425 	const deUint32								shaderGroupBaseAlignment			= getShaderGroupBaseAlignment(vki, physicalDevice);
426 
427 	const Move<VkDescriptorSetLayout>			descriptorSetLayout					= DescriptorSetLayoutBuilder()
428 																							.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, ALL_RAY_TRACING_STAGES)
429 																							.addSingleBinding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, ALL_RAY_TRACING_STAGES)
430 																							.build(vkd, device);
431 	const Move<VkDescriptorPool>				descriptorPool						= DescriptorPoolBuilder()
432 																							.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
433 																							.addType(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
434 																							.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
435 	const Move<VkDescriptorSet>					descriptorSet						= makeDescriptorSet(vkd, device, *descriptorPool, *descriptorSetLayout);
436 	const Move<VkPipelineLayout>				pipelineLayout						= makePipelineLayout(vkd, device, descriptorSetLayout.get());
437 	const Move<VkCommandPool>					cmdPool								= createCommandPool(vkd, device, 0, queueFamilyIndex);
438 	const Move<VkCommandBuffer>					cmdBuffer							= allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
439 
440 	de::MovePtr<RayTracingPipeline>				rayTracingPipeline					= de::newMovePtr<RayTracingPipeline>();
441 	Move<VkShaderModule>						raygenShader						= createShaderModule(vkd, device, m_context.getBinaryCollection().get("rgen"), 0);
442 	Move<VkShaderModule>						hitShader							= createShaderModule(vkd, device, m_context.getBinaryCollection().get("ahit"), 0);
443 	Move<VkShaderModule>						missShader							= createShaderModule(vkd, device, m_context.getBinaryCollection().get("miss"), 0);
444 	Move<VkShaderModule>						intersectionShader					= createShaderModule(vkd, device, m_context.getBinaryCollection().get("sect"), 0);
445 	rayTracingPipeline->addShader(VK_SHADER_STAGE_RAYGEN_BIT_KHR,		*raygenShader,			0u);
446 	rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR,		*hitShader,				1u);
447 	rayTracingPipeline->addShader(VK_SHADER_STAGE_ANY_HIT_BIT_KHR,		*hitShader,				2u);
448 	rayTracingPipeline->addShader(VK_SHADER_STAGE_INTERSECTION_BIT_KHR, *intersectionShader,	2u);
449 	rayTracingPipeline->addShader(VK_SHADER_STAGE_MISS_BIT_KHR,			*missShader,			3u);
450 	Move<VkPipeline>							pipeline							= rayTracingPipeline->createPipeline(vkd, device, *pipelineLayout);
451 	const de::MovePtr<BufferWithMemory>			raygenShaderBindingTable			= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 0u, 1u);
452 	const de::MovePtr<BufferWithMemory>			hitShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 1u, 2u);
453 	const de::MovePtr<BufferWithMemory>			missShaderBindingTable				= rayTracingPipeline->createShaderBindingTable(vkd, device, *pipeline, allocator, shaderGroupHandleSize, shaderGroupBaseAlignment, 3u, 1u);
454 	const VkStridedDeviceAddressRegionKHR		raygenShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, raygenShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
455 	const VkStridedDeviceAddressRegionKHR		hitShaderBindingTableRegion			= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, hitShaderBindingTable->get(), 0), shaderGroupHandleSize, 2u * shaderGroupHandleSize);
456 	const VkStridedDeviceAddressRegionKHR		missShaderBindingTableRegion		= makeStridedDeviceAddressRegionKHR(getBufferDeviceAddress(vkd, device, missShaderBindingTable->get(), 0), shaderGroupHandleSize, shaderGroupHandleSize);
457 	const VkStridedDeviceAddressRegionKHR		callableShaderBindingTableRegion	= makeStridedDeviceAddressRegionKHR(DE_NULL, 0, 0);
458 
459 	const VkImageCreateInfo						imageCreateInfo						= makeImageCreateInfo(m_data.width, m_data.height, m_format);
460 	const VkImageSubresourceRange				imageSubresourceRange				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0, 1u);
461 	const de::MovePtr<ImageWithMemory>			image								= de::MovePtr<ImageWithMemory>(new ImageWithMemory(vkd, device, allocator, imageCreateInfo, MemoryRequirement::Any));
462 	const Move<VkImageView>						imageView							= makeImageView(vkd, device, **image, VK_IMAGE_VIEW_TYPE_2D, m_format, imageSubresourceRange);
463 
464 	const VkBufferCreateInfo					bufferCreateInfo					= makeBufferCreateInfo(pixelCount*sizeof(deUint32), VK_BUFFER_USAGE_TRANSFER_DST_BIT);
465 	const VkImageSubresourceLayers				bufferImageSubresourceLayers		= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
466 	const VkBufferImageCopy						bufferImageRegion					= makeBufferImageCopy(makeExtent3D(m_data.width, m_data.height, 1u), bufferImageSubresourceLayers);
467 	de::MovePtr<BufferWithMemory>				buffer								= de::MovePtr<BufferWithMemory>(new BufferWithMemory(vkd, device, allocator, bufferCreateInfo, MemoryRequirement::HostVisible));
468 
469 	const VkDescriptorImageInfo					descriptorImageInfo					= makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
470 
471 	const VkImageMemoryBarrier					preImageBarrier						= makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT,
472 																						VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
473 																						**image, imageSubresourceRange);
474 	const VkImageMemoryBarrier					postImageBarrier					= makeImageMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT,
475 																						VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, VK_IMAGE_LAYOUT_GENERAL,
476 																						**image, imageSubresourceRange);
477 	const VkMemoryBarrier						postTraceMemoryBarrier				= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
478 	const VkMemoryBarrier						postCopyMemoryBarrier				= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
479 	const VkClearValue							clearValue							= makeClearValueColorU32(5u, 5u, 5u, 255u);
480 
481 	TlasPtr										topLevelAccelerationStructure;
482 	BottomLevelAccelerationStructurePool		blasPool;
483 
484 	initBottomAccelerationStructures(blasPool, useGpuBuild, workerThreadsCount);
485 	blasPool.batchBuild(vkd, device, *cmdPool, queue);
486 
487 	beginCommandBuffer(vkd, *cmdBuffer, 0u);
488 	{
489 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preImageBarrier);
490 		vkd.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clearValue.color, 1, &imageSubresourceRange);
491 		cmdPipelineImageMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, &postImageBarrier);
492 
493 		topLevelAccelerationStructure = initTopAccelerationStructure(useGpuBuild, workerThreadsCount, blasPool);
494 		createTopAccelerationStructure(*cmdBuffer, topLevelAccelerationStructure.get());
495 
496 		VkWriteDescriptorSetAccelerationStructureKHR	accelerationStructureWriteDescriptorSet	=
497 		{
498 			VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR,	//  VkStructureType						sType;
499 			DE_NULL,															//  const void*							pNext;
500 			1u,																	//  deUint32							accelerationStructureCount;
501 			topLevelAccelerationStructure->getPtr(),							//  const VkAccelerationStructureKHR*	pAccelerationStructures;
502 		};
503 
504 		DescriptorSetUpdateBuilder()
505 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descriptorImageInfo)
506 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, &accelerationStructureWriteDescriptorSet)
507 			.update(vkd, device);
508 
509 		vkd.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipelineLayout, 0, 1, &descriptorSet.get(), 0, DE_NULL);
510 
511 		vkd.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, *pipeline);
512 
513 		cmdTraceRays(vkd,
514 			*cmdBuffer,
515 			&raygenShaderBindingTableRegion,
516 			&missShaderBindingTableRegion,
517 			&hitShaderBindingTableRegion,
518 			&callableShaderBindingTableRegion,
519 			m_data.width, m_data.height, 1);
520 
521 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, VK_PIPELINE_STAGE_TRANSFER_BIT, &postTraceMemoryBarrier);
522 
523 		vkd.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **buffer, 1u, &bufferImageRegion);
524 
525 		cmdPipelineMemoryBarrier(vkd, *cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postCopyMemoryBarrier);
526 	}
527 	endCommandBuffer(vkd, *cmdBuffer);
528 
529 	submitCommandsAndWait(vkd, device, queue, cmdBuffer.get());
530 
531 	invalidateMappedMemoryRange(vkd, device, buffer->getAllocation().getMemory(), buffer->getAllocation().getOffset(), pixelCount * sizeof(deUint32));
532 
533 	return buffer;
534 }
535 
checkSupportInInstance(void) const536 void RayTracingBuildTestInstance::checkSupportInInstance (void) const
537 {
538 	const InstanceInterface&				vki						= m_context.getInstanceInterface();
539 	const VkPhysicalDevice					physicalDevice			= m_context.getPhysicalDevice();
540 	de::MovePtr<RayTracingProperties>		rayTracingProperties	= makeRayTracingProperties(vki, physicalDevice);
541 
542 	if (rayTracingProperties->getMaxPrimitiveCount() < m_data.squaresGroupCount)
543 		TCU_THROW(NotSupportedError, "Triangles required more than supported");
544 
545 	if (rayTracingProperties->getMaxGeometryCount() < m_data.geometriesGroupCount)
546 		TCU_THROW(NotSupportedError, "Geometries required more than supported");
547 
548 	if (rayTracingProperties->getMaxInstanceCount() < m_data.instancesGroupCount)
549 		TCU_THROW(NotSupportedError, "Instances required more than supported");
550 
551 	if (!verifyAllocationCount())
552 		TCU_THROW(NotSupportedError, "Memory allocations required more than supported");
553 }
554 
validateBuffer(de::MovePtr<BufferWithMemory> buffer)555 deUint32 RayTracingBuildTestInstance::validateBuffer (de::MovePtr<BufferWithMemory>	buffer)
556 {
557 	const deUint32*	bufferPtr	= (deUint32*)buffer->getAllocation().getHostPtr();
558 	deUint32		failures	= 0;
559 	deUint32		pos			= 0;
560 
561 	for (deUint32 y = 0; y < m_data.height; ++y)
562 	for (deUint32 x = 0; x < m_data.width; ++x)
563 	{
564 		const deUint32	anyHitValue		= 1;
565 		const deUint32	missValue		= 2;
566 
567 		const deUint32	n				= m_data.width * y + x;
568 		const deUint32	expectedValue	= (n % 7 == 0) ? missValue : anyHitValue;
569 
570 		if (bufferPtr[pos] != expectedValue)
571 		{
572 			if (m_data.testType == TEST_TYPE_AABBS || m_data.testType == TEST_TYPE_MIXED)
573 			{
574 				// In the case of AABB geometries, implementations may increase their size in
575 				// an acceleration structure in order to mitigate precision issues. This may
576 				// result in false positives being reported to the application."
577 
578 				if (bufferPtr[pos] != anyHitValue)
579 				{
580 					failures++;
581 				}
582 			}
583 			else
584 			{
585 				failures++;
586 			}
587 		}
588 
589 		++pos;
590 	}
591 
592 	return failures;
593 }
594 
iterate(void)595 tcu::TestStatus RayTracingBuildTestInstance::iterate (void)
596 {
597 	checkSupportInInstance();
598 
599 	const deUint32	failures = validateBuffer(runTest(m_data.deviceBuild, m_data.workerThreadsCount));
600 
601 	return (failures == 0) ? tcu::TestStatus::pass("Pass") : tcu::TestStatus::fail("failures=" + de::toString(failures));
602 }
603 
604 }	// anonymous
605 
buildTest(tcu::TestCaseGroup* testParentGroup, deUint32 threadsCount, bool deviceBuild)606 static void buildTest (tcu::TestCaseGroup* testParentGroup, deUint32 threadsCount, bool deviceBuild)
607 {
608 	const char*		tests[]	=
609 	{
610 		"level_primitives",
611 		"level_geometries",
612 		"level_instances"
613 	};
614 	const deUint32		sizes[]				= { 4, 16, 64, 256, 1024 };
615 	const deUint32		factors[]			= { 1, 4 };
616 	const bool			deferredOperation	= threadsCount != 0;
617 	tcu::TestContext&	testCtx				= testParentGroup->getTestContext();
618 
619 	for (size_t testsNdx = 0; testsNdx < DE_LENGTH_OF_ARRAY(tests); ++testsNdx)
620 	{
621 		de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, tests[testsNdx], ""));
622 
623 		for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
624 		for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
625 		{
626 			const deUint32	factor					= factors[factorNdx];
627 			const deUint32	largestGroup			= sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
628 			const deUint32	squaresGroupCount		= testsNdx == 0 ? largestGroup : factor;
629 			const deUint32	geometriesGroupCount	= testsNdx == 1 ? largestGroup : factor;
630 			const deUint32	instancesGroupCount		= testsNdx == 2 ? largestGroup : factor;
631 			const CaseDef	caseDef					=
632 			{
633 				TEST_TYPE_TRIANGLES,	//  TestType	testType;
634 				sizes[sizesNdx],		//  deUint32	width;
635 				sizes[sizesNdx],		//  deUint32	height;
636 				squaresGroupCount,		//  deUint32	squaresGroupCount;
637 				geometriesGroupCount,	//  deUint32	geometriesGroupCount;
638 				instancesGroupCount,	//  deUint32	instancesGroupCount;
639 				deferredOperation,		//  bool		deferredOperation;
640 				threadsCount,			//  deUint32	workerThreadsCount;
641 				deviceBuild				//  bool		deviceBuild;
642 			};
643 			const std::string	suffix		= de::toString(caseDef.instancesGroupCount) + '_' + de::toString(caseDef.geometriesGroupCount) + '_' + de::toString(caseDef.squaresGroupCount);
644 			const std::string	testName	= "triangles_" + suffix;
645 
646 			if (squaresGroupCount == 0 || geometriesGroupCount == 0 || instancesGroupCount == 0)
647 				continue;
648 
649 			group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), "", caseDef));
650 		}
651 
652 		for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
653 		for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
654 		{
655 			const deUint32	factor					= factors[factorNdx];
656 			const deUint32	largestGroup			= sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
657 			const deUint32	squaresGroupCount		= testsNdx == 0 ? largestGroup : factor;
658 			const deUint32	geometriesGroupCount	= testsNdx == 1 ? largestGroup : factor;
659 			const deUint32	instancesGroupCount		= testsNdx == 2 ? largestGroup : factor;
660 			const CaseDef	caseDef					=
661 			{
662 				TEST_TYPE_AABBS,		//  TestType	testType;
663 				sizes[sizesNdx],		//  deUint32	width;
664 				sizes[sizesNdx],		//  deUint32	height;
665 				squaresGroupCount,		//  deUint32	squaresGroupCount;
666 				geometriesGroupCount,	//  deUint32	geometriesGroupCount;
667 				instancesGroupCount,	//  deUint32	instancesGroupCount;
668 				deferredOperation,		//  bool		deferredOperation;
669 				threadsCount,			//  deUint32	workerThreadsCount;
670 				deviceBuild				//  bool		deviceBuild;
671 			};
672 			const std::string	suffix		= de::toString(caseDef.instancesGroupCount) + '_' + de::toString(caseDef.geometriesGroupCount) + '_' + de::toString(caseDef.squaresGroupCount);
673 			const std::string	testName	= "aabbs_" + suffix;
674 
675 			if (squaresGroupCount == 0 || geometriesGroupCount == 0 || instancesGroupCount == 0)
676 				continue;
677 
678 			group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), "", caseDef));
679 		}
680 
681 		for (size_t factorNdx = 0; factorNdx < DE_LENGTH_OF_ARRAY(factors); ++factorNdx)
682 		for (size_t sizesNdx = 0; sizesNdx < DE_LENGTH_OF_ARRAY(sizes); ++sizesNdx)
683 		{
684 			const deUint32	factor					= factors[factorNdx];
685 			const deUint32	largestGroup			= sizes[sizesNdx] * sizes[sizesNdx] / factor / factor;
686 			const deUint32	squaresGroupCount		= testsNdx == 0 ? largestGroup : factor;
687 			const deUint32	geometriesGroupCount	= testsNdx == 1 ? largestGroup : factor;
688 			const deUint32	instancesGroupCount		= testsNdx == 2 ? largestGroup : factor;
689 			const CaseDef	caseDef					=
690 			{
691 				TEST_TYPE_MIXED,		//  TestType	testType;
692 				sizes[sizesNdx],		//  deUint32	width;
693 				sizes[sizesNdx],		//  deUint32	height;
694 				squaresGroupCount,		//  deUint32	squaresGroupCount;
695 				geometriesGroupCount,	//  deUint32	geometriesGroupCount;
696 				instancesGroupCount,	//  deUint32	instancesGroupCount;
697 				deferredOperation,		//  bool		deferredOperation;
698 				threadsCount,			//  deUint32	workerThreadsCount;
699 				deviceBuild				//  bool		deviceBuild;
700 			};
701 			const std::string	suffix		= de::toString(caseDef.instancesGroupCount) + '_' + de::toString(caseDef.geometriesGroupCount) + '_' + de::toString(caseDef.squaresGroupCount);
702 			const std::string	testName	= "mixed_" + suffix;
703 
704 			if (squaresGroupCount < 2 || geometriesGroupCount < 2 || instancesGroupCount < 2)
705 				continue;
706 
707 			group->addChild(new RayTracingTestCase(testCtx, testName.c_str(), "", caseDef));
708 		}
709 
710 		testParentGroup->addChild(group.release());
711 	}
712 }
713 
createBuildTests(tcu::TestContext& testCtx)714 tcu::TestCaseGroup*	createBuildTests (tcu::TestContext& testCtx)
715 {
716 	de::MovePtr<tcu::TestCaseGroup> buildGroup(new tcu::TestCaseGroup(testCtx, "build", "Ray tracing build tests"));
717 
718 	const deUint32	threads[]	= { 0, 1, 2, 3, 4, 8, std::numeric_limits<deUint32>::max() };
719 
720 	for (const auto threadCount : threads)
721 	{
722 		auto buildTargeGroup = [&](bool deviceBuild) -> void
723 		{
724 			DE_ASSERT(!(threadCount != 0 && deviceBuild));
725 
726 			string	groupName, groupDesc;
727 			if (deviceBuild)
728 			{
729 				groupName = "gpu";
730 				groupDesc = "Compare results of run with acceleration structures build on GPU";
731 			}
732 			else
733 			{
734 				groupName = "cpu";
735 				groupDesc = "Compare results of run with acceleration structures build on CPU";
736 			}
737 
738 			if (threadCount != 0)
739 			{
740 				groupName += threadCount == std::numeric_limits<deUint32>::max()
741 												? "ht_max" : "ht_" + de::toString(threadCount);
742 				groupDesc = "Compare results of run with acceleration structures build on CPU and using host threading";
743 			}
744 
745 			de::MovePtr<tcu::TestCaseGroup> groupGpuCpuHt(new tcu::TestCaseGroup(testCtx, groupName.c_str(), groupDesc.c_str()));
746 			buildTest(groupGpuCpuHt.get(), threadCount, deviceBuild);
747 			buildGroup->addChild(groupGpuCpuHt.release());
748 		};
749 
750 		if (threadCount == 0)
751 		{
752 			buildTargeGroup(true);
753 		}
754 		buildTargeGroup(false);
755 	}
756 
757 	return buildGroup.release();
758 }
759 
760 }	// RayTracing
761 }	// vkt
762