1/*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2018-2019 NVIDIA Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *	  http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Tests for VK_EXT_buffer_device_address.
23 *//*--------------------------------------------------------------------*/
24
25#include "vktBindingBufferDeviceAddressTests.hpp"
26
27#include "vkBufferWithMemory.hpp"
28#include "vkImageWithMemory.hpp"
29#include "vkQueryUtil.hpp"
30#include "vkBuilderUtil.hpp"
31#include "vkCmdUtil.hpp"
32#include "vkTypeUtil.hpp"
33#include "vkObjUtil.hpp"
34
35#include "vktTestGroupUtil.hpp"
36#include "vktTestCase.hpp"
37
38#include "deDefs.h"
39#include "deMath.h"
40#include "deRandom.h"
41#include "deRandom.hpp"
42#include "deSharedPtr.hpp"
43#include "deString.h"
44
45#include "tcuTestCase.hpp"
46#include "tcuTestLog.hpp"
47
48#include <string>
49#include <sstream>
50
51namespace vkt
52{
53namespace BindingModel
54{
55namespace
56{
57using namespace vk;
58using namespace std;
59
60typedef de::MovePtr<Unique<VkBuffer> >	VkBufferSp;
61typedef de::MovePtr<Allocation>			AllocationSp;
62
63static const deUint32 DIM = 8;
64
65typedef enum
66{
67	BASE_UBO = 0,
68	BASE_SSBO,
69} Base;
70
71#define ENABLE_RAYTRACING 0
72
73typedef enum
74{
75	STAGE_COMPUTE = 0,
76	STAGE_VERTEX,
77	STAGE_FRAGMENT,
78	STAGE_RAYGEN,
79} Stage;
80
81typedef enum
82{
83	BT_SINGLE = 0,
84	BT_MULTI,
85	BT_REPLAY,
86} BufType;
87
88typedef enum
89{
90	LAYOUT_STD140 = 0,
91	LAYOUT_SCALAR,
92} Layout;
93
94typedef enum
95{
96	CONVERT_NONE = 0,
97	CONVERT_UINT64,
98	CONVERT_UVEC2,
99	CONVERT_U64CMP,
100	CONVERT_UVEC2CMP,
101	CONVERT_UVEC2TOU64,
102	CONVERT_U64TOUVEC2,
103} Convert;
104
105typedef enum
106{
107	OFFSET_ZERO = 0,
108	OFFSET_NONZERO,
109} MemoryOffset;
110
111struct CaseDef
112{
113	deUint32		set;
114	deUint32		depth;
115	Base			base;
116	Stage			stage;
117	Convert			convertUToPtr;
118	bool			storeInLocal;
119	BufType			bufType;
120	Layout			layout;
121	MemoryOffset	memoryOffset;
122};
123
124class BufferAddressTestInstance : public TestInstance
125{
126public:
127						BufferAddressTestInstance	(Context& context, const CaseDef& data);
128						~BufferAddressTestInstance	(void);
129	tcu::TestStatus		iterate						(void);
130	virtual	void		fillBuffer					(const std::vector<deUint8 *>& cpuAddrs,
131													 const std::vector<deUint64>& gpuAddrs,
132													 deUint32 bufNum, deUint32 curDepth) const;
133private:
134	CaseDef				m_data;
135
136	enum
137	{
138		WIDTH = 256,
139		HEIGHT = 256
140	};
141};
142
143BufferAddressTestInstance::BufferAddressTestInstance (Context& context, const CaseDef& data)
144	: vkt::TestInstance		(context)
145	, m_data				(data)
146{
147}
148
149BufferAddressTestInstance::~BufferAddressTestInstance (void)
150{
151}
152
153class BufferAddressTestCase : public TestCase
154{
155	public:
156							BufferAddressTestCase	(tcu::TestContext& context, const char* name, const CaseDef data);
157							~BufferAddressTestCase	(void);
158	virtual	void			initPrograms			(SourceCollections& programCollection) const;
159	virtual TestInstance*	createInstance			(Context& context) const;
160	virtual void			checkSupport			(Context& context) const;
161	virtual	void			checkBuffer				(std::stringstream& checks, deUint32 bufNum, deUint32 curDepth, const std::string &prefix) const;
162
163private:
164	CaseDef					m_data;
165};
166
167BufferAddressTestCase::BufferAddressTestCase (tcu::TestContext& context, const char* name, const CaseDef data)
168	: vkt::TestCase	(context, name)
169	, m_data		(data)
170{
171}
172
173BufferAddressTestCase::~BufferAddressTestCase	(void)
174{
175}
176
177void BufferAddressTestCase::checkSupport (Context& context) const
178{
179	if (!context.isBufferDeviceAddressSupported())
180		TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
181
182	if (m_data.stage == STAGE_VERTEX && !context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
183		TCU_THROW(NotSupportedError, "Vertex pipeline stores and atomics not supported");
184
185	if (m_data.set >= context.getDeviceProperties().limits.maxBoundDescriptorSets)
186		TCU_THROW(NotSupportedError, "descriptor set number not supported");
187
188#ifndef CTS_USES_VULKANSC
189	bool isBufferDeviceAddressWithCaptureReplaySupported =
190			(context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay) ||
191			(context.isDeviceFunctionalitySupported("VK_EXT_buffer_device_address") && context.getBufferDeviceAddressFeaturesEXT().bufferDeviceAddressCaptureReplay);
192#else
193	bool isBufferDeviceAddressWithCaptureReplaySupported =
194			(context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay);
195#endif
196
197	if (m_data.bufType == BT_REPLAY && !isBufferDeviceAddressWithCaptureReplaySupported)
198		TCU_THROW(NotSupportedError, "Capture/replay of physical storage buffer pointers not supported");
199
200	if (m_data.layout == LAYOUT_SCALAR && !context.getScalarBlockLayoutFeatures().scalarBlockLayout)
201		TCU_THROW(NotSupportedError, "Scalar block layout not supported");
202
203#if ENABLE_RAYTRACING
204	if (m_data.stage == STAGE_RAYGEN &&
205		!context.isDeviceFunctionalitySupported("VK_NV_ray_tracing"))
206	{
207		TCU_THROW(NotSupportedError, "Ray tracing not supported");
208	}
209#endif
210
211	const bool needsInt64	= (	m_data.convertUToPtr == CONVERT_UINT64		||
212								m_data.convertUToPtr == CONVERT_U64CMP		||
213								m_data.convertUToPtr == CONVERT_U64TOUVEC2	||
214								m_data.convertUToPtr == CONVERT_UVEC2TOU64	);
215
216	const bool needsKHR		= (	m_data.convertUToPtr == CONVERT_UVEC2		||
217								m_data.convertUToPtr == CONVERT_UVEC2CMP	||
218								m_data.convertUToPtr == CONVERT_U64TOUVEC2	||
219								m_data.convertUToPtr == CONVERT_UVEC2TOU64	);
220
221	if (needsInt64 && !context.getDeviceFeatures().shaderInt64)
222		TCU_THROW(NotSupportedError, "Int64 not supported");
223	if (needsKHR && !context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
224		TCU_THROW(NotSupportedError, "VK_KHR_buffer_device_address not supported");
225}
226
227void BufferAddressTestCase::checkBuffer (std::stringstream& checks, deUint32 bufNum, deUint32 curDepth, const std::string &prefix) const
228{
229	string newPrefix = prefix;
230	if (curDepth > 0)
231	{
232		if (m_data.convertUToPtr == CONVERT_UINT64 || m_data.convertUToPtr == CONVERT_UVEC2TOU64)
233			newPrefix = "T1(uint64_t(T1(" + newPrefix + ")))";
234		else if (m_data.convertUToPtr == CONVERT_UVEC2 || m_data.convertUToPtr == CONVERT_U64TOUVEC2)
235			newPrefix = "T1(uvec2(T1(" + newPrefix + ")))";
236	}
237
238	if (m_data.storeInLocal && curDepth != 0)
239	{
240		std::string localName = "l" + de::toString(bufNum);
241		checks << "   " << ((bufNum & 1) ? "restrict " : "") << "T1 " << localName << " = " << newPrefix << ";\n";
242		newPrefix = localName;
243	}
244
245	checks << "   accum |= " << newPrefix << ".a[0] - " << bufNum*3+0 << ";\n";
246	checks << "   accum |= " << newPrefix << ".a[pc.identity[1]] - " << bufNum*3+1 << ";\n";
247	checks << "   accum |= " << newPrefix << ".b - " << bufNum*3+2 << ";\n";
248	checks << "   accum |= int(" << newPrefix << ".e[0][0] - " << bufNum*3+3 << ");\n";
249	checks << "   accum |= int(" << newPrefix << ".e[0][1] - " << bufNum*3+5 << ");\n";
250	checks << "   accum |= int(" << newPrefix << ".e[1][0] - " << bufNum*3+4 << ");\n";
251	checks << "   accum |= int(" << newPrefix << ".e[1][1] - " << bufNum*3+6 << ");\n";
252
253	if (m_data.layout == LAYOUT_SCALAR)
254	{
255		checks << "   f = " << newPrefix << ".f;\n";
256		checks << "   accum |= f.x - " << bufNum*3+7 << ";\n";
257		checks << "   accum |= f.y - " << bufNum*3+8 << ";\n";
258		checks << "   accum |= f.z - " << bufNum*3+9 << ";\n";
259	}
260
261	const std::string localPrefix = "l" + de::toString(bufNum);
262
263	if (m_data.convertUToPtr == CONVERT_U64CMP || m_data.convertUToPtr == CONVERT_UVEC2CMP)
264	{
265		const std::string type = ((m_data.convertUToPtr == CONVERT_U64CMP) ? "uint64_t" : "uvec2");
266
267		checks << "   " << type << " " << localPrefix << "c0 = " << type << "(" << newPrefix << ".c[0]);\n";
268		checks << "   " << type << " " << localPrefix << "c1 = " << type << "(" << newPrefix << ".c[pc.identity[1]]);\n";
269		checks << "   " << type << " " << localPrefix << "d  = " << type << "(" << newPrefix << ".d);\n";
270	}
271
272	if (curDepth != m_data.depth)
273	{
274		// Check non-null pointers and inequality among them.
275		if (m_data.convertUToPtr == CONVERT_U64CMP)
276		{
277			checks << "   if (" << localPrefix << "c0 == zero ||\n"
278				   << "       " << localPrefix << "c1 == zero ||\n"
279				   << "       " << localPrefix << "d  == zero ||\n"
280				   << "       " << localPrefix << "c0 == " << localPrefix << "c1 ||\n"
281				   << "       " << localPrefix << "c1 == " << localPrefix << "d  ||\n"
282				   << "       " << localPrefix << "c0 == " << localPrefix << "d  ) {\n"
283				   << "     accum |= 1;\n"
284				   << "   }\n";
285		}
286		else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
287		{
288			checks << "   if (all(equal(" << localPrefix << "c0, zero)) ||\n"
289				   << "       all(equal(" << localPrefix << "c1, zero)) ||\n"
290				   << "       all(equal(" << localPrefix << "d , zero)) ||\n"
291				   << "       all(equal(" << localPrefix << "c0, " << localPrefix << "c1)) ||\n"
292				   << "       all(equal(" << localPrefix << "c1, " << localPrefix << "d )) ||\n"
293				   << "       all(equal(" << localPrefix << "c0, " << localPrefix << "d )) ) {\n"
294				   << "     accum |= 1;\n"
295				   << "   }\n";
296		}
297
298		checkBuffer(checks, bufNum*3+1, curDepth+1, newPrefix + ".c[0]");
299		checkBuffer(checks, bufNum*3+2, curDepth+1, newPrefix + ".c[pc.identity[1]]");
300		checkBuffer(checks, bufNum*3+3, curDepth+1, newPrefix + ".d");
301	}
302	else
303	{
304		// Check null pointers nonexplicitly.
305		if (m_data.convertUToPtr == CONVERT_U64CMP)
306		{
307			checks << "   if (!(" << localPrefix << "c0 == " << localPrefix << "c1 &&\n"
308				   << "         " << localPrefix << "c1 == " << localPrefix << "d  &&\n"
309				   << "         " << localPrefix << "c0 == " << localPrefix << "d  )) {\n"
310				   << "     accum |= 1;\n"
311				   << "   }\n";
312		}
313		else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
314		{
315			checks << "   if (!(all(equal(" << localPrefix << "c0, " << localPrefix << "c1)) &&\n"
316				   << "         all(equal(" << localPrefix << "c1, " << localPrefix << "d )) &&\n"
317				   << "         all(equal(" << localPrefix << "c0, " << localPrefix << "d )) )) {\n"
318				   << "     accum |= 1;\n"
319				   << "   }\n";
320		}
321	}
322}
323
324void BufferAddressTestInstance::fillBuffer (const std::vector<deUint8 *>& cpuAddrs,
325											const std::vector<deUint64>& gpuAddrs,
326											deUint32 bufNum, deUint32 curDepth) const
327{
328	deUint8 *buf = cpuAddrs[bufNum];
329
330	deUint32 aStride = m_data.layout == LAYOUT_SCALAR ? 1 : 4; // (in deUint32s)
331	deUint32 cStride = m_data.layout == LAYOUT_SCALAR ? 1 : 2; // (in deUint64s)
332	deUint32 matStride = m_data.layout == LAYOUT_SCALAR ? 2 : 4; // (in floats)
333
334	// a
335	((deUint32 *)(buf+0))[0] = bufNum*3+0;
336	((deUint32 *)(buf+0))[aStride] = bufNum*3+1;
337	// b
338	((deUint32 *)(buf+32))[0] = bufNum*3+2;
339	if (m_data.layout == LAYOUT_SCALAR)
340	{
341		// f
342		((deUint32 *)(buf+36))[0] = bufNum*3+7;
343		((deUint32 *)(buf+36))[1] = bufNum*3+8;
344		((deUint32 *)(buf+36))[2] = bufNum*3+9;
345	}
346	// e
347	((float *)(buf+96))[0] = (float)(bufNum*3+3);
348	((float *)(buf+96))[1] = (float)(bufNum*3+4);
349	((float *)(buf+96))[matStride] = (float)(bufNum*3+5);
350	((float *)(buf+96))[matStride+1] = (float)(bufNum*3+6);
351
352	if (curDepth != m_data.depth)
353	{
354		// c
355		((deUint64 *)(buf+48))[0] = gpuAddrs[bufNum*3+1];
356		((deUint64 *)(buf+48))[cStride] = gpuAddrs[bufNum*3+2];
357		// d
358		((deUint64 *)(buf+80))[0] = gpuAddrs[bufNum*3+3];
359
360		fillBuffer(cpuAddrs, gpuAddrs, bufNum*3+1, curDepth+1);
361		fillBuffer(cpuAddrs, gpuAddrs, bufNum*3+2, curDepth+1);
362		fillBuffer(cpuAddrs, gpuAddrs, bufNum*3+3, curDepth+1);
363	}
364	else
365	{
366		// c
367		((deUint64 *)(buf+48))[0] = 0ull;
368		((deUint64 *)(buf+48))[cStride] = 0ull;
369		// d
370		((deUint64 *)(buf+80))[0] = 0ull;
371	}
372}
373
374
375void BufferAddressTestCase::initPrograms (SourceCollections& programCollection) const
376{
377	std::stringstream decls, checks, localDecls;
378
379	std::string baseStorage = m_data.base == BASE_UBO ? "uniform" : "buffer";
380	std::string memberStorage = "buffer";
381
382	decls << "layout(r32ui, set = " << m_data.set << ", binding = 0) uniform uimage2D image0_0;\n";
383	decls << "layout(buffer_reference) " << memberStorage << " T1;\n";
384
385	std::string refType;
386	switch (m_data.convertUToPtr)
387	{
388	case CONVERT_UINT64:
389	case CONVERT_U64TOUVEC2:
390		refType = "uint64_t";
391		break;
392
393	case CONVERT_UVEC2:
394	case CONVERT_UVEC2TOU64:
395		refType = "uvec2";
396		break;
397
398	default:
399		refType = "T1";
400		break;
401	}
402
403	std::string layout = m_data.layout == LAYOUT_SCALAR ? "scalar" : "std140";
404	decls <<
405			"layout(set = " << m_data.set << ", binding = 1, " << layout << ") " << baseStorage << " T2 {\n"
406			"   layout(offset = 0) int a[2]; // stride = 4 for scalar, 16 for std140\n"
407			"   layout(offset = 32) int b;\n"
408			<< ((m_data.layout == LAYOUT_SCALAR) ? "   layout(offset = 36) ivec3 f;\n" : "") <<
409			"   layout(offset = 48) " << refType << " c[2]; // stride = 8 for scalar, 16 for std140\n"
410			"   layout(offset = 80) " << refType << " d;\n"
411			"   layout(offset = 96, row_major) mat2 e; // tightly packed for scalar, 16 byte matrix stride for std140\n"
412			"} x;\n";
413	decls <<
414			"layout(buffer_reference, " << layout << ") " << memberStorage << " T1 {\n"
415			"   layout(offset = 0) int a[2]; // stride = 4 for scalar, 16 for std140\n"
416			"   layout(offset = 32) int b;\n"
417			<< ((m_data.layout == LAYOUT_SCALAR) ? "   layout(offset = 36) ivec3 f;\n" : "") <<
418			"   layout(offset = 48) " << refType << " c[2]; // stride = 8 for scalar, 16 for std140\n"
419			"   layout(offset = 80) " << refType << " d;\n"
420			"   layout(offset = 96, row_major) mat2 e; // tightly packed for scalar, 16 byte matrix stride for std140\n"
421			"};\n";
422
423	if (m_data.convertUToPtr == CONVERT_U64CMP)
424		localDecls << "  uint64_t zero = uint64_t(0);\n";
425	else if (m_data.convertUToPtr == CONVERT_UVEC2CMP)
426		localDecls << "  uvec2 zero = uvec2(0, 0);\n";
427
428	checkBuffer(checks, 0, 0, "x");
429
430	std::stringstream pushdecl;
431	pushdecl << "layout (push_constant, std430) uniform Block { int identity[32]; } pc;\n";
432
433	vk::ShaderBuildOptions::Flags flags = vk::ShaderBuildOptions::Flags(0);
434	if (m_data.layout == LAYOUT_SCALAR)
435		flags = vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS;
436
437	// The conversion and comparison in uvec2 form test needs SPIR-V 1.5 for OpBitcast.
438	const vk::SpirvVersion spirvVersion = ((m_data.convertUToPtr == CONVERT_UVEC2CMP) ? vk::SPIRV_VERSION_1_5 : vk::SPIRV_VERSION_1_0);
439
440	switch (m_data.stage)
441	{
442	default: DE_ASSERT(0); // Fallthrough
443	case STAGE_COMPUTE:
444		{
445			std::stringstream css;
446			css <<
447				"#version 450 core\n"
448				"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
449				"#extension GL_EXT_buffer_reference : enable\n"
450				"#extension GL_EXT_scalar_block_layout : enable\n"
451				"#extension GL_EXT_buffer_reference_uvec2 : enable\n"
452				<< pushdecl.str()
453				<< decls.str() <<
454				"layout(local_size_x = 1, local_size_y = 1) in;\n"
455				"void main()\n"
456				"{\n"
457				"  int accum = 0, temp;\n"
458				"  ivec3 f;\n"
459				<< localDecls.str()
460				<< checks.str() <<
461				"  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
462				"  imageStore(image0_0, ivec2(gl_GlobalInvocationID.xy), color);\n"
463				"}\n";
464
465			programCollection.glslSources.add("test") << glu::ComputeSource(css.str())
466				<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
467			break;
468		}
469#if ENABLE_RAYTRACING
470	case STAGE_RAYGEN:
471		{
472			std::stringstream css;
473			css <<
474				"#version 460 core\n"
475				"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
476				"#extension GL_EXT_buffer_reference : enable\n"
477				"#extension GL_EXT_scalar_block_layout : enable\n"
478				"#extension GL_EXT_buffer_reference_uvec2 : enable\n"
479				"#extension GL_NV_ray_tracing : require\n"
480				<< pushdecl.str()
481				<< decls.str() <<
482				"void main()\n"
483				"{\n"
484				"  int accum = 0, temp;\n"
485				"  ivec3 f;\n"
486				<< localDecls.str()
487				<< checks.str() <<
488				"  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
489				"  imageStore(image0_0, ivec2(gl_LaunchIDNV.xy), color);\n"
490				"}\n";
491
492			programCollection.glslSources.add("test") << glu::RaygenSource(css.str())
493				<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
494			break;
495		}
496#endif
497	case STAGE_VERTEX:
498		{
499			std::stringstream vss;
500			vss <<
501				"#version 450 core\n"
502				"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
503				"#extension GL_EXT_buffer_reference : enable\n"
504				"#extension GL_EXT_scalar_block_layout : enable\n"
505				"#extension GL_EXT_buffer_reference_uvec2 : enable\n"
506				<< pushdecl.str()
507				<< decls.str()  <<
508				"void main()\n"
509				"{\n"
510				"  int accum = 0, temp;\n"
511				"  ivec3 f;\n"
512				<< localDecls.str()
513				<< checks.str() <<
514				"  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
515				"  imageStore(image0_0, ivec2(gl_VertexIndex % " << DIM << ", gl_VertexIndex / " << DIM << "), color);\n"
516				"  gl_PointSize = 1.0f;\n"
517				"}\n";
518
519			programCollection.glslSources.add("test") << glu::VertexSource(vss.str())
520				<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
521			break;
522		}
523	case STAGE_FRAGMENT:
524		{
525			std::stringstream vss;
526			vss <<
527				"#version 450 core\n"
528				"void main()\n"
529				"{\n"
530				// full-viewport quad
531				"  gl_Position = vec4( 2.0*float(gl_VertexIndex&2) - 1.0, 4.0*(gl_VertexIndex&1)-1.0, 1.0 - 2.0 * float(gl_VertexIndex&1), 1);\n"
532				"}\n";
533
534			programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
535
536			std::stringstream fss;
537			fss <<
538				"#version 450 core\n"
539				"#extension GL_EXT_shader_explicit_arithmetic_types_int64 : enable\n"
540				"#extension GL_EXT_buffer_reference : enable\n"
541				"#extension GL_EXT_scalar_block_layout : enable\n"
542				"#extension GL_EXT_buffer_reference_uvec2 : enable\n"
543				<< pushdecl.str()
544				<< decls.str() <<
545				"void main()\n"
546				"{\n"
547				"  int accum = 0, temp;\n"
548				"  ivec3 f;\n"
549				<< localDecls.str()
550				<< checks.str() <<
551				"  uvec4 color = (accum != 0) ? uvec4(0,0,0,0) : uvec4(1,0,0,1);\n"
552				"  imageStore(image0_0, ivec2(gl_FragCoord.x, gl_FragCoord.y), color);\n"
553				"}\n";
554
555			programCollection.glslSources.add("test") << glu::FragmentSource(fss.str())
556				<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, spirvVersion, flags);
557			break;
558		}
559	}
560
561}
562
563TestInstance* BufferAddressTestCase::createInstance (Context& context) const
564{
565	return new BufferAddressTestInstance(context, m_data);
566}
567
568VkBufferCreateInfo makeBufferCreateInfo (const void*				pNext,
569										 const VkDeviceSize			bufferSize,
570										 const VkBufferUsageFlags	usage,
571										 const VkBufferCreateFlags  flags)
572{
573	const VkBufferCreateInfo bufferCreateInfo =
574	{
575		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
576		pNext,									// const void*			pNext;
577		flags,									// VkBufferCreateFlags	flags;
578		bufferSize,								// VkDeviceSize			size;
579		usage,									// VkBufferUsageFlags	usage;
580		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
581		0u,										// deUint32				queueFamilyIndexCount;
582		DE_NULL,								// const deUint32*		pQueueFamilyIndices;
583	};
584	return bufferCreateInfo;
585}
586
587tcu::TestStatus BufferAddressTestInstance::iterate (void)
588{
589	const InstanceInterface&vki						= m_context.getInstanceInterface();
590	const DeviceInterface&	vk						= m_context.getDeviceInterface();
591	const VkPhysicalDevice&	physDevice				= m_context.getPhysicalDevice();
592	const VkDevice			device					= m_context.getDevice();
593	Allocator&				allocator				= m_context.getDefaultAllocator();
594	const bool				useKHR					= m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
595
596
597	VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
598	VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
599
600#if ENABLE_RAYTRACING
601	if (m_data.stage == STAGE_RAYGEN)
602	{
603		allShaderStages = VK_SHADER_STAGE_RAYGEN_BIT_NV;
604		allPipelineStages = VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV;
605	}
606#endif
607
608	VkPhysicalDeviceProperties2 properties;
609	deMemset(&properties, 0, sizeof(properties));
610	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
611
612#if ENABLE_RAYTRACING
613	VkPhysicalDeviceRayTracingPropertiesNV rayTracingProperties;
614	deMemset(&rayTracingProperties, 0, sizeof(rayTracingProperties));
615	rayTracingProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV;
616
617	if (m_context.isDeviceFunctionalitySupported("VK_NV_ray_tracing"))
618	{
619		properties.pNext = &rayTracingProperties;
620	}
621#endif
622
623	m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
624
625	VkPipelineBindPoint bindPoint;
626
627	switch (m_data.stage)
628	{
629	case STAGE_COMPUTE:
630		bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
631		break;
632#if ENABLE_RAYTRACING
633	case STAGE_RAYGEN:
634		bindPoint = VK_PIPELINE_BIND_POINT_RAY_TRACING_NV;
635		break;
636#endif
637	default:
638		bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
639		break;
640	}
641
642	Move<vk::VkDescriptorPool>	descriptorPool;
643	Move<vk::VkDescriptorSet>	descriptorSet;
644
645	VkDescriptorPoolCreateFlags poolCreateFlags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
646
647	VkDescriptorSetLayoutBinding bindings[2];
648	bindings[0] =
649	{
650		0,									// deUint32				binding;
651		VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,	// VkDescriptorType		descriptorType;
652		1,									// deUint32				descriptorCount;
653		allShaderStages,					// VkShaderStageFlags	stageFlags;
654		DE_NULL								// const VkSampler*		pImmutableSamplers;
655	};
656	bindings[1] =
657	{
658		1,									// deUint32				binding;
659		m_data.base == BASE_UBO ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER : VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,	// VkDescriptorType		descriptorType;
660		1,									// deUint32				descriptorCount;
661		allShaderStages,					// VkShaderStageFlags	stageFlags;
662		DE_NULL								// const VkSampler*		pImmutableSamplers;
663	};
664
665	// Create a layout and allocate a descriptor set for it.
666	VkDescriptorSetLayoutCreateInfo setLayoutCreateInfo =
667	{
668		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
669		DE_NULL,
670
671		0,
672		(deUint32)2,
673		&bindings[0]
674	};
675
676	Move<vk::VkDescriptorSetLayout>	descriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
677
678	setLayoutCreateInfo.bindingCount = 0;
679	Move<vk::VkDescriptorSetLayout>	emptyDescriptorSetLayout = vk::createDescriptorSetLayout(vk, device, &setLayoutCreateInfo);
680
681	vk::DescriptorPoolBuilder poolBuilder;
682	poolBuilder.addType(bindings[1].descriptorType, 1);
683	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1);
684
685	descriptorPool = poolBuilder.build(vk, device, poolCreateFlags, 1u);
686	descriptorSet = makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout);
687
688	VkDeviceSize	align = de::max(de::max(properties.properties.limits.minUniformBufferOffsetAlignment,
689											properties.properties.limits.minStorageBufferOffsetAlignment),
690											(VkDeviceSize)128 /*sizeof(T1)*/);
691
692	deUint32 numBindings = 1;
693	for (deUint32 d = 0; d < m_data.depth; ++d)
694	{
695		numBindings = numBindings*3+1;
696	}
697
698#ifndef CTS_USES_VULKANSC
699	VkBufferDeviceAddressCreateInfoEXT addressCreateInfoEXT =
700	{
701		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT,	// VkStructureType	 sType;
702		DE_NULL,													// const void*		 pNext;
703		0x000000000ULL,												// VkDeviceSize		 deviceAddress
704	};
705#endif
706
707	VkBufferOpaqueCaptureAddressCreateInfo bufferOpaqueCaptureAddressCreateInfo =
708	{
709		VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,	// VkStructureType	 sType;
710		DE_NULL,														// const void*		 pNext;
711		0x000000000ULL,													// VkDeviceSize		 opaqueCaptureAddress
712	};
713
714	std::vector<deUint8 *> cpuAddrs(numBindings);
715	std::vector<VkDeviceAddress> gpuAddrs(numBindings);
716	std::vector<deUint64> opaqueBufferAddrs(numBindings);
717	std::vector<deUint64> opaqueMemoryAddrs(numBindings);
718
719	VkBufferDeviceAddressInfo bufferDeviceAddressInfo =
720	{
721		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	// VkStructureType	 sType;
722		DE_NULL,										// const void*		 pNext;
723		0,												// VkBuffer			 buffer
724	};
725
726	VkDeviceMemoryOpaqueCaptureAddressInfo deviceMemoryOpaqueCaptureAddressInfo =
727	{
728		VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO,	// VkStructureType	 sType;
729		DE_NULL,														// const void*		 pNext;
730		0,																// VkDeviceMemory	 memory;
731	};
732
733	bool multiBuffer = m_data.bufType != BT_SINGLE;
734	bool offsetNonZero = m_data.memoryOffset == OFFSET_NONZERO;
735	deUint32 numBuffers = multiBuffer ? numBindings : 1;
736	VkDeviceSize bufferSize = multiBuffer ? align : (align*numBindings);
737	VkDeviceSize memoryOffset = 0;
738
739	vector<VkBufferSp>			buffers(numBuffers);
740	vector<AllocationSp>		allocations(numBuffers);
741
742	VkBufferCreateInfo			bufferCreateInfo = makeBufferCreateInfo(DE_NULL, bufferSize,
743														VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
744														VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
745														VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
746														m_data.bufType == BT_REPLAY ? VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT : 0);
747
748	// VkMemoryAllocateFlags to be filled out later
749	VkMemoryAllocateFlagsInfo	allocFlagsInfo =
750	{
751		VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,	//	VkStructureType	sType
752		DE_NULL,										//	const void*		pNext
753		0,												//	VkMemoryAllocateFlags    flags
754		0,												//	uint32_t                 deviceMask
755	};
756
757	VkMemoryOpaqueCaptureAddressAllocateInfo memoryOpaqueCaptureAddressAllocateInfo =
758	{
759		VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO,	// VkStructureType    sType;
760		DE_NULL,														// const void*        pNext;
761		0,																// uint64_t           opaqueCaptureAddress;
762	};
763
764	if (useKHR)
765		allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
766
767	if (useKHR && m_data.bufType == BT_REPLAY)
768	{
769		allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
770		allocFlagsInfo.pNext = &memoryOpaqueCaptureAddressAllocateInfo;
771	}
772
773	for (deUint32 i = 0; i < numBuffers; ++i)
774	{
775		buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
776
777		// query opaque capture address before binding memory
778		if (useKHR)
779		{
780			bufferDeviceAddressInfo.buffer = **buffers[i];
781			opaqueBufferAddrs[i] = vk.getBufferOpaqueCaptureAddress(device, &bufferDeviceAddressInfo);
782		}
783
784		VkMemoryRequirements memReq = getBufferMemoryRequirements(vk, device, **buffers[i]);
785		if (offsetNonZero)
786		{
787			memoryOffset = memReq.alignment;
788			memReq.size += memoryOffset;
789		}
790
791		allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device, memReq, MemoryRequirement::HostVisible, &allocFlagsInfo));
792
793		if (useKHR)
794		{
795			deviceMemoryOpaqueCaptureAddressInfo.memory = allocations[i]->getMemory();
796			opaqueMemoryAddrs[i] = vk.getDeviceMemoryOpaqueCaptureAddress(device, &deviceMemoryOpaqueCaptureAddressInfo);
797		}
798
799		VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), memoryOffset));
800	}
801
802	if (m_data.bufType == BT_REPLAY)
803	{
804		for (deUint32 i = 0; i < numBuffers; ++i)
805		{
806			bufferDeviceAddressInfo.buffer = **buffers[i];
807			gpuAddrs[i] = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
808		}
809		buffers.clear();
810		buffers.resize(numBuffers);
811		allocations.clear();
812		allocations.resize(numBuffers);
813
814#ifndef CTS_USES_VULKANSC
815		bufferCreateInfo.pNext = useKHR ? (void *)&bufferOpaqueCaptureAddressCreateInfo : (void *)&addressCreateInfoEXT;
816#else
817		bufferCreateInfo.pNext = (void *)&bufferOpaqueCaptureAddressCreateInfo;
818#endif
819
820		for (deInt32 i = numBuffers-1; i >= 0; --i)
821		{
822#ifndef CTS_USES_VULKANSC
823			addressCreateInfoEXT.deviceAddress = gpuAddrs[i];
824#endif
825			bufferOpaqueCaptureAddressCreateInfo.opaqueCaptureAddress = opaqueBufferAddrs[i];
826			memoryOpaqueCaptureAddressAllocateInfo.opaqueCaptureAddress = opaqueMemoryAddrs[i];
827
828			buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
829			allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device, getBufferMemoryRequirements(vk, device, **buffers[i]), MemoryRequirement::HostVisible, &allocFlagsInfo));
830			VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
831
832			bufferDeviceAddressInfo.buffer = **buffers[i];
833			VkDeviceSize newAddr = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
834
835			if (newAddr != gpuAddrs[i])
836				return tcu::TestStatus(QP_TEST_RESULT_FAIL, "address mismatch");
837		}
838	}
839
840	// Create a buffer and compute the address for each "align" bytes.
841	for (deUint32 i = 0; i < numBindings; ++i)
842	{
843		bufferDeviceAddressInfo.buffer = **buffers[multiBuffer ? i : 0];
844		gpuAddrs[i] = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
845
846		cpuAddrs[i] = (deUint8 *)allocations[multiBuffer ? i : 0]->getHostPtr() + memoryOffset;
847		if (!multiBuffer)
848		{
849			cpuAddrs[i] = cpuAddrs[i] + align*i;
850			gpuAddrs[i] = gpuAddrs[i] + align*i;
851		}
852		//printf("addr 0x%08x`%08x\n", (unsigned)(gpuAddrs[i]>>32), (unsigned)(gpuAddrs[i]));
853	}
854
855	fillBuffer(cpuAddrs, gpuAddrs, 0, 0);
856
857	for (deUint32 i = 0; i < numBuffers; ++i)
858		flushAlloc(vk, device, *allocations[i]);
859
860	const VkQueue					queue					= m_context.getUniversalQueue();
861	Move<VkCommandPool>				cmdPool					= createCommandPool(vk, device, 0, m_context.getUniversalQueueFamilyIndex());
862	Move<VkCommandBuffer>			cmdBuffer				= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
863
864	beginCommandBuffer(vk, *cmdBuffer, 0u);
865
866	// Push constants are used for dynamic indexing. PushConstant[i] = i.
867
868	const VkPushConstantRange pushConstRange =
869	{
870		allShaderStages,		// VkShaderStageFlags	stageFlags
871		0,						// deUint32				offset
872		128						// deUint32				size
873	};
874
875	deUint32 nonEmptySetLimit = m_data.base == BASE_UBO ? properties.properties.limits.maxPerStageDescriptorUniformBuffers :
876														  properties.properties.limits.maxPerStageDescriptorStorageBuffers;
877	nonEmptySetLimit = de::min(nonEmptySetLimit, properties.properties.limits.maxPerStageDescriptorStorageImages);
878
879	vector<vk::VkDescriptorSetLayout>	descriptorSetLayoutsRaw(m_data.set+1);
880	for (size_t i = 0; i < m_data.set+1; ++i)
881	{
882		// use nonempty descriptor sets to consume resources until we run out of descriptors
883		if (i < nonEmptySetLimit - 1 || i == m_data.set)
884			descriptorSetLayoutsRaw[i] = descriptorSetLayout.get();
885		else
886			descriptorSetLayoutsRaw[i] = emptyDescriptorSetLayout.get();
887	}
888
889	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
890	{
891		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,				// sType
892		DE_NULL,													// pNext
893		(VkPipelineLayoutCreateFlags)0,
894		m_data.set+1,												// setLayoutCount
895		&descriptorSetLayoutsRaw[0],								// pSetLayouts
896		1u,															// pushConstantRangeCount
897		&pushConstRange,											// pPushConstantRanges
898	};
899
900	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
901
902	// PushConstant[i] = i
903	for (deUint32 i = 0; i < (deUint32)(128 / sizeof(deUint32)); ++i)
904	{
905		vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
906							(deUint32)(i * sizeof(deUint32)), (deUint32)sizeof(deUint32), &i);
907	}
908
909	de::MovePtr<BufferWithMemory> copyBuffer;
910	copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
911		vk, device, allocator, makeBufferCreateInfo(DE_NULL, DIM*DIM*sizeof(deUint32), VK_BUFFER_USAGE_TRANSFER_DST_BIT, 0), MemoryRequirement::HostVisible));
912
913	const VkImageCreateInfo			imageCreateInfo			=
914	{
915		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType			sType;
916		DE_NULL,								// const void*				pNext;
917		(VkImageCreateFlags)0u,					// VkImageCreateFlags		flags;
918		VK_IMAGE_TYPE_2D,						// VkImageType				imageType;
919		VK_FORMAT_R32_UINT,						// VkFormat					format;
920		{
921			DIM,								// deUint32	width;
922			DIM,								// deUint32	height;
923			1u									// deUint32	depth;
924		},										// VkExtent3D				extent;
925		1u,										// deUint32					mipLevels;
926		1u,										// deUint32					arrayLayers;
927		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
928		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
929		VK_IMAGE_USAGE_STORAGE_BIT
930		| VK_IMAGE_USAGE_TRANSFER_SRC_BIT
931		| VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
932		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
933		0u,										// deUint32					queueFamilyIndexCount;
934		DE_NULL,								// const deUint32*			pQueueFamilyIndices;
935		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
936	};
937
938	VkImageViewCreateInfo		imageViewCreateInfo		=
939	{
940		VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	// VkStructureType			sType;
941		DE_NULL,									// const void*				pNext;
942		(VkImageViewCreateFlags)0u,					// VkImageViewCreateFlags	flags;
943		DE_NULL,									// VkImage					image;
944		VK_IMAGE_VIEW_TYPE_2D,						// VkImageViewType			viewType;
945		VK_FORMAT_R32_UINT,							// VkFormat					format;
946		{
947			VK_COMPONENT_SWIZZLE_R,					// VkComponentSwizzle	r;
948			VK_COMPONENT_SWIZZLE_G,					// VkComponentSwizzle	g;
949			VK_COMPONENT_SWIZZLE_B,					// VkComponentSwizzle	b;
950			VK_COMPONENT_SWIZZLE_A					// VkComponentSwizzle	a;
951		},											// VkComponentMapping		 components;
952		{
953			VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask;
954			0u,										// deUint32				baseMipLevel;
955			1u,										// deUint32				levelCount;
956			0u,										// deUint32				baseArrayLayer;
957			1u										// deUint32				layerCount;
958		}											// VkImageSubresourceRange	subresourceRange;
959	};
960
961	de::MovePtr<ImageWithMemory> image;
962	Move<VkImageView> imageView;
963
964	image = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
965		vk, device, allocator, imageCreateInfo, MemoryRequirement::Any));
966	imageViewCreateInfo.image = **image;
967	imageView = createImageView(vk, device, &imageViewCreateInfo, NULL);
968
969	VkDescriptorImageInfo imageInfo = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
970	VkDescriptorBufferInfo bufferInfo = makeDescriptorBufferInfo(**buffers[0], 0, align);
971
972	VkWriteDescriptorSet w =
973	{
974		VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,							// sType
975		DE_NULL,														// pNext
976		*descriptorSet,													// dstSet
977		(deUint32)0,													// dstBinding
978		0,																// dstArrayElement
979		1u,																// descriptorCount
980		bindings[0].descriptorType,										// descriptorType
981		&imageInfo,														// pImageInfo
982		&bufferInfo,													// pBufferInfo
983		DE_NULL,														// pTexelBufferView
984	};
985	vk.updateDescriptorSets(device, 1, &w, 0, NULL);
986
987	w.dstBinding = 1;
988	w.descriptorType = bindings[1].descriptorType;
989	vk.updateDescriptorSets(device, 1, &w, 0, NULL);
990
991	vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, m_data.set, 1, &descriptorSet.get(), 0, DE_NULL);
992
993	Move<VkPipeline> pipeline;
994	Move<VkRenderPass> renderPass;
995	Move<VkFramebuffer> framebuffer;
996	de::MovePtr<BufferWithMemory> sbtBuffer;
997
998	m_context.getTestContext().touchWatchdogAndDisableIntervalTimeLimit();
999
1000	if (m_data.stage == STAGE_COMPUTE)
1001	{
1002		const Unique<VkShaderModule>	shader(createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1003
1004		const VkPipelineShaderStageCreateInfo	shaderCreateInfo =
1005		{
1006			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1007			DE_NULL,
1008			(VkPipelineShaderStageCreateFlags)0,
1009			VK_SHADER_STAGE_COMPUTE_BIT,								// stage
1010			*shader,													// shader
1011			"main",
1012			DE_NULL,													// pSpecializationInfo
1013		};
1014
1015		const VkComputePipelineCreateInfo		pipelineCreateInfo =
1016		{
1017			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1018			DE_NULL,
1019			0u,															// flags
1020			shaderCreateInfo,											// cs
1021			*pipelineLayout,											// layout
1022			(vk::VkPipeline)0,											// basePipelineHandle
1023			0u,															// basePipelineIndex
1024		};
1025		pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1026	}
1027#if ENABLE_RAYTRACING
1028	else if (m_data.stage == STAGE_RAYGEN)
1029	{
1030		const Unique<VkShaderModule>	shader(createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1031
1032		const VkPipelineShaderStageCreateInfo	shaderCreateInfo =
1033		{
1034			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1035			DE_NULL,
1036			(VkPipelineShaderStageCreateFlags)0,
1037			VK_SHADER_STAGE_RAYGEN_BIT_NV,								// stage
1038			*shader,													// shader
1039			"main",
1040			DE_NULL,													// pSpecializationInfo
1041		};
1042
1043		VkRayTracingShaderGroupCreateInfoNV group =
1044		{
1045			VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV,
1046			DE_NULL,
1047			VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV,			// type
1048			0,														// generalShader
1049			VK_SHADER_UNUSED_NV,									// closestHitShader
1050			VK_SHADER_UNUSED_NV,									// anyHitShader
1051			VK_SHADER_UNUSED_NV,									// intersectionShader
1052		};
1053
1054		VkRayTracingPipelineCreateInfoNV pipelineCreateInfo = {
1055			VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV,	// sType
1056			DE_NULL,												// pNext
1057			0,														// flags
1058			1,														// stageCount
1059			&shaderCreateInfo,										// pStages
1060			1,														// groupCount
1061			&group,													// pGroups
1062			0,														// maxRecursionDepth
1063			*pipelineLayout,										// layout
1064			(vk::VkPipeline)0,										// basePipelineHandle
1065			0u,														// basePipelineIndex
1066		};
1067
1068		pipeline = createRayTracingPipelineNV(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1069
1070		sbtBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1071			vk, device, allocator, makeBufferCreateInfo(DE_NULL, rayTracingProperties.shaderGroupHandleSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_RAY_TRACING_BIT_NV, 0), MemoryRequirement::HostVisible));
1072		deUint32 *ptr = (deUint32 *)sbtBuffer->getAllocation().getHostPtr();
1073		invalidateAlloc(vk, device, sbtBuffer->getAllocation());
1074
1075		vk.getRayTracingShaderGroupHandlesNV(device, *pipeline, 0, 1, rayTracingProperties.shaderGroupHandleSize, ptr);
1076	}
1077#endif
1078	else
1079	{
1080
1081		const vk::VkSubpassDescription		subpassDesc			=
1082		{
1083			(vk::VkSubpassDescriptionFlags)0,
1084			vk::VK_PIPELINE_BIND_POINT_GRAPHICS,					// pipelineBindPoint
1085			0u,														// inputCount
1086			DE_NULL,												// pInputAttachments
1087			0u,														// colorCount
1088			DE_NULL,												// pColorAttachments
1089			DE_NULL,												// pResolveAttachments
1090			DE_NULL,												// depthStencilAttachment
1091			0u,														// preserveCount
1092			DE_NULL,												// pPreserveAttachments
1093		};
1094		const vk::VkRenderPassCreateInfo	renderPassParams	=
1095		{
1096			vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// sType
1097			DE_NULL,												// pNext
1098			(vk::VkRenderPassCreateFlags)0,
1099			0u,														// attachmentCount
1100			DE_NULL,												// pAttachments
1101			1u,														// subpassCount
1102			&subpassDesc,											// pSubpasses
1103			0u,														// dependencyCount
1104			DE_NULL,												// pDependencies
1105		};
1106
1107		renderPass = createRenderPass(vk, device, &renderPassParams);
1108
1109		const vk::VkFramebufferCreateInfo	framebufferParams	=
1110		{
1111			vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,	// sType
1112			DE_NULL,										// pNext
1113			(vk::VkFramebufferCreateFlags)0,
1114			*renderPass,									// renderPass
1115			0u,												// attachmentCount
1116			DE_NULL,										// pAttachments
1117			DIM,											// width
1118			DIM,											// height
1119			1u,												// layers
1120		};
1121
1122		framebuffer = createFramebuffer(vk, device, &framebufferParams);
1123
1124		const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfo		=
1125		{
1126			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType							sType;
1127			DE_NULL,													// const void*								pNext;
1128			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags	flags;
1129			0u,															// deUint32									vertexBindingDescriptionCount;
1130			DE_NULL,													// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
1131			0u,															// deUint32									vertexAttributeDescriptionCount;
1132			DE_NULL														// const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
1133		};
1134
1135		const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo	=
1136		{
1137			VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType							sType;
1138			DE_NULL,														// const void*								pNext;
1139			(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags	flags;
1140			(m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology						topology;
1141			VK_FALSE														// VkBool32									primitiveRestartEnable;
1142		};
1143
1144		const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfo	=
1145		{
1146			VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		// VkStructureType							sType;
1147			DE_NULL,														// const void*								pNext;
1148			(VkPipelineRasterizationStateCreateFlags)0,						// VkPipelineRasterizationStateCreateFlags	flags;
1149			VK_FALSE,														// VkBool32									depthClampEnable;
1150			(m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE,			// VkBool32									rasterizerDiscardEnable;
1151			VK_POLYGON_MODE_FILL,											// VkPolygonMode							polygonMode;
1152			VK_CULL_MODE_NONE,												// VkCullModeFlags							cullMode;
1153			VK_FRONT_FACE_CLOCKWISE,										// VkFrontFace								frontFace;
1154			VK_FALSE,														// VkBool32									depthBiasEnable;
1155			0.0f,															// float									depthBiasConstantFactor;
1156			0.0f,															// float									depthBiasClamp;
1157			0.0f,															// float									depthBiasSlopeFactor;
1158			1.0f															// float									lineWidth;
1159		};
1160
1161		const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfo =
1162		{
1163			VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType							sType
1164			DE_NULL,													// const void*								pNext
1165			0u,															// VkPipelineMultisampleStateCreateFlags	flags
1166			VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits					rasterizationSamples
1167			VK_FALSE,													// VkBool32									sampleShadingEnable
1168			1.0f,														// float									minSampleShading
1169			DE_NULL,													// const VkSampleMask*						pSampleMask
1170			VK_FALSE,													// VkBool32									alphaToCoverageEnable
1171			VK_FALSE													// VkBool32									alphaToOneEnable
1172		};
1173
1174		VkViewport viewport = makeViewport(DIM, DIM);
1175		VkRect2D scissor = makeRect2D(DIM, DIM);
1176
1177		const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
1178		{
1179			VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType							sType
1180			DE_NULL,												// const void*								pNext
1181			(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags		flags
1182			1u,														// deUint32									viewportCount
1183			&viewport,												// const VkViewport*						pViewports
1184			1u,														// deUint32									scissorCount
1185			&scissor												// const VkRect2D*							pScissors
1186		};
1187
1188		Move<VkShaderModule> fs;
1189		Move<VkShaderModule> vs;
1190
1191		deUint32 numStages;
1192		if (m_data.stage == STAGE_VERTEX)
1193		{
1194			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1195			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1196			numStages = 1u;
1197		}
1198		else
1199		{
1200			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1201			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1202			numStages = 2u;
1203		}
1204
1205		const VkPipelineShaderStageCreateInfo	shaderCreateInfo[2] =
1206		{
1207			{
1208				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1209				DE_NULL,
1210				(VkPipelineShaderStageCreateFlags)0,
1211				VK_SHADER_STAGE_VERTEX_BIT,									// stage
1212				*vs,														// shader
1213				"main",
1214				DE_NULL,													// pSpecializationInfo
1215			},
1216			{
1217				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1218				DE_NULL,
1219				(VkPipelineShaderStageCreateFlags)0,
1220				VK_SHADER_STAGE_FRAGMENT_BIT,								// stage
1221				*fs,														// shader
1222				"main",
1223				DE_NULL,													// pSpecializationInfo
1224			}
1225		};
1226
1227		const VkGraphicsPipelineCreateInfo				graphicsPipelineCreateInfo		=
1228		{
1229			VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
1230			DE_NULL,											// const void*										pNext;
1231			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
1232			numStages,											// deUint32											stageCount;
1233			&shaderCreateInfo[0],								// const VkPipelineShaderStageCreateInfo*			pStages;
1234			&vertexInputStateCreateInfo,						// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
1235			&inputAssemblyStateCreateInfo,						// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
1236			DE_NULL,											// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
1237			&viewportStateCreateInfo,							// const VkPipelineViewportStateCreateInfo*			pViewportState;
1238			&rasterizationStateCreateInfo,						// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
1239			&multisampleStateCreateInfo,						// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
1240			DE_NULL,											// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
1241			DE_NULL,											// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
1242			DE_NULL,											// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
1243			pipelineLayout.get(),								// VkPipelineLayout									layout;
1244			renderPass.get(),									// VkRenderPass										renderPass;
1245			0u,													// deUint32											subpass;
1246			DE_NULL,											// VkPipeline										basePipelineHandle;
1247			0													// int												basePipelineIndex;
1248		};
1249
1250		pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1251	}
1252
1253	m_context.getTestContext().touchWatchdogAndEnableIntervalTimeLimit();
1254
1255	const VkImageMemoryBarrier imageBarrier =
1256	{
1257		VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,				// VkStructureType		sType
1258		DE_NULL,											// const void*			pNext
1259		0u,													// VkAccessFlags		srcAccessMask
1260		VK_ACCESS_TRANSFER_WRITE_BIT,						// VkAccessFlags		dstAccessMask
1261		VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout		oldLayout
1262		VK_IMAGE_LAYOUT_GENERAL,							// VkImageLayout		newLayout
1263		VK_QUEUE_FAMILY_IGNORED,							// uint32_t				srcQueueFamilyIndex
1264		VK_QUEUE_FAMILY_IGNORED,							// uint32_t				dstQueueFamilyIndex
1265		**image,											// VkImage				image
1266		{
1267			VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask
1268			0u,										// uint32_t				baseMipLevel
1269			1u,										// uint32_t				mipLevels,
1270			0u,										// uint32_t				baseArray
1271			1u,										// uint32_t				arraySize
1272		}
1273	};
1274
1275	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1276							(VkDependencyFlags)0,
1277							0, (const VkMemoryBarrier*)DE_NULL,
1278							0, (const VkBufferMemoryBarrier*)DE_NULL,
1279							1, &imageBarrier);
1280
1281	vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1282
1283	VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1284	VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1285
1286	VkMemoryBarrier					memBarrier =
1287	{
1288		VK_STRUCTURE_TYPE_MEMORY_BARRIER,	// sType
1289		DE_NULL,							// pNext
1290		0u,									// srcAccessMask
1291		0u,									// dstAccessMask
1292	};
1293
1294	vk.cmdClearColorImage(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1295
1296	memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1297	memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1298	vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1299		0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1300
1301	if (m_data.stage == STAGE_COMPUTE)
1302	{
1303		vk.cmdDispatch(*cmdBuffer, DIM, DIM, 1);
1304	}
1305#if ENABLE_RAYTRACING
1306	else if (m_data.stage == STAGE_RAYGEN)
1307	{
1308		vk.cmdTraceRaysNV(*cmdBuffer,
1309			**sbtBuffer, 0,
1310			DE_NULL, 0, 0,
1311			DE_NULL, 0, 0,
1312			DE_NULL, 0, 0,
1313			DIM, DIM, 1);
1314	}
1315#endif
1316	else
1317	{
1318		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1319						makeRect2D(DIM, DIM),
1320						0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1321		// Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1322		if (m_data.stage == STAGE_VERTEX)
1323		{
1324			vk.cmdDraw(*cmdBuffer, DIM*DIM, 1u, 0u, 0u);
1325		}
1326		else
1327		{
1328			vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1329		}
1330		endRenderPass(vk, *cmdBuffer);
1331	}
1332
1333	memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1334	memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1335	vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1336		0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1337
1338	const VkBufferImageCopy copyRegion = makeBufferImageCopy(makeExtent3D(DIM, DIM, 1u),
1339															 makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u));
1340	vk.cmdCopyImageToBuffer(*cmdBuffer, **image, VK_IMAGE_LAYOUT_GENERAL, **copyBuffer, 1u, &copyRegion);
1341
1342	endCommandBuffer(vk, *cmdBuffer);
1343
1344	submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1345
1346	deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1347	invalidateAlloc(vk, device, copyBuffer->getAllocation());
1348
1349	qpTestResult res = QP_TEST_RESULT_PASS;
1350
1351	for (deUint32 i = 0; i < DIM*DIM; ++i)
1352	{
1353		if (ptr[i] != 1)
1354		{
1355			res = QP_TEST_RESULT_FAIL;
1356		}
1357	}
1358
1359	return tcu::TestStatus(res, qpGetTestResultName(res));
1360}
1361
1362class CaptureReplayTestCase : public TestCase
1363{
1364public:
1365							CaptureReplayTestCase	(tcu::TestContext& context, const char* name, deUint32 seed);
1366							~CaptureReplayTestCase	(void);
1367	virtual	void			initPrograms			(SourceCollections& programCollection) const { DE_UNREF(programCollection); }
1368	virtual TestInstance*	createInstance			(Context& context) const;
1369	virtual void			checkSupport			(Context& context) const;
1370private:
1371	deUint32				m_seed;
1372};
1373
1374CaptureReplayTestCase::CaptureReplayTestCase (tcu::TestContext& context, const char* name, deUint32 seed)
1375	: vkt::TestCase	(context, name)
1376	, m_seed(seed)
1377{
1378}
1379
1380CaptureReplayTestCase::~CaptureReplayTestCase	(void)
1381{
1382}
1383
1384void CaptureReplayTestCase::checkSupport (Context& context) const
1385{
1386	if (!context.isBufferDeviceAddressSupported())
1387		TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
1388
1389#ifndef CTS_USES_VULKANSC
1390	bool isBufferDeviceAddressWithCaptureReplaySupported =
1391			(context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay) ||
1392			(context.isDeviceFunctionalitySupported("VK_EXT_buffer_device_address") && context.getBufferDeviceAddressFeaturesEXT().bufferDeviceAddressCaptureReplay);
1393#else
1394	bool isBufferDeviceAddressWithCaptureReplaySupported =
1395			(context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address") && context.getBufferDeviceAddressFeatures().bufferDeviceAddressCaptureReplay);
1396#endif
1397
1398	if (!isBufferDeviceAddressWithCaptureReplaySupported)
1399		TCU_THROW(NotSupportedError, "Capture/replay of physical storage buffer pointers not supported");
1400}
1401
1402class CaptureReplayTestInstance : public TestInstance
1403{
1404public:
1405						CaptureReplayTestInstance	(Context& context, deUint32 seed);
1406						~CaptureReplayTestInstance	(void);
1407	tcu::TestStatus		iterate						(void);
1408private:
1409	deUint32			m_seed;
1410};
1411
1412CaptureReplayTestInstance::CaptureReplayTestInstance (Context& context, deUint32 seed)
1413	: vkt::TestInstance		(context)
1414	, m_seed(seed)
1415{
1416}
1417
1418CaptureReplayTestInstance::~CaptureReplayTestInstance (void)
1419{
1420}
1421
1422TestInstance* CaptureReplayTestCase::createInstance (Context& context) const
1423{
1424	return new CaptureReplayTestInstance(context, m_seed);
1425}
1426
1427tcu::TestStatus CaptureReplayTestInstance::iterate (void)
1428{
1429	const InstanceInterface&vki						= m_context.getInstanceInterface();
1430	const DeviceInterface&	vk						= m_context.getDeviceInterface();
1431	const VkPhysicalDevice&	physDevice				= m_context.getPhysicalDevice();
1432	const VkDevice			device					= m_context.getDevice();
1433	const bool				useKHR					= m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address");
1434	de::Random				rng(m_seed);
1435
1436#ifndef CTS_USES_VULKANSC
1437	VkBufferDeviceAddressCreateInfoEXT addressCreateInfoEXT =
1438	{
1439		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT,	// VkStructureType	 sType;
1440		DE_NULL,													// const void*		 pNext;
1441		0x000000000ULL,												// VkDeviceSize		 deviceAddress
1442	};
1443#endif
1444
1445	VkBufferOpaqueCaptureAddressCreateInfo bufferOpaqueCaptureAddressCreateInfo =
1446	{
1447		VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO,	// VkStructureType	 sType;
1448		DE_NULL,														// const void*		 pNext;
1449		0x000000000ULL,													// VkDeviceSize		 opaqueCaptureAddress
1450	};
1451
1452	const deUint32 numBuffers = 100;
1453	std::vector<VkDeviceSize> bufferSizes(numBuffers);
1454	// random sizes, powers of two [4K, 4MB]
1455	for (deUint32 i = 0; i < numBuffers; ++i)
1456		bufferSizes[i] = 4096 << (rng.getUint32() % 11);
1457
1458	std::vector<VkDeviceAddress> gpuAddrs(numBuffers);
1459	std::vector<deUint64> opaqueBufferAddrs(numBuffers);
1460	std::vector<deUint64> opaqueMemoryAddrs(numBuffers);
1461
1462	VkBufferDeviceAddressInfo bufferDeviceAddressInfo =
1463	{
1464		VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	// VkStructureType	 sType;
1465		DE_NULL,										// const void*		 pNext;
1466		0,												// VkBuffer			 buffer
1467	};
1468
1469	VkDeviceMemoryOpaqueCaptureAddressInfo deviceMemoryOpaqueCaptureAddressInfo =
1470	{
1471		VK_STRUCTURE_TYPE_DEVICE_MEMORY_OPAQUE_CAPTURE_ADDRESS_INFO,	// VkStructureType	 sType;
1472		DE_NULL,														// const void*		 pNext;
1473		0,																// VkDeviceMemory	 memory;
1474	};
1475
1476	vector<VkBufferSp>			buffers(numBuffers);
1477	vector<AllocationSp>		allocations(numBuffers);
1478
1479	VkBufferCreateInfo			bufferCreateInfo = makeBufferCreateInfo(DE_NULL, 0,
1480														VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
1481														VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
1482														VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT,
1483														VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT);
1484
1485	// VkMemoryAllocateFlags to be filled out later
1486	VkMemoryAllocateFlagsInfo	allocFlagsInfo =
1487	{
1488		VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO,	//	VkStructureType	sType
1489		DE_NULL,										//	const void*		pNext
1490		0,												//	VkMemoryAllocateFlags    flags
1491		0,												//	uint32_t                 deviceMask
1492	};
1493
1494	VkMemoryOpaqueCaptureAddressAllocateInfo memoryOpaqueCaptureAddressAllocateInfo =
1495	{
1496		VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO,	// VkStructureType    sType;
1497		DE_NULL,														// const void*        pNext;
1498		0,																// uint64_t           opaqueCaptureAddress;
1499	};
1500
1501	if (useKHR)
1502		allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT;
1503
1504	if (useKHR)
1505	{
1506		allocFlagsInfo.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT;
1507		allocFlagsInfo.pNext = &memoryOpaqueCaptureAddressAllocateInfo;
1508	}
1509
1510	for (deUint32 i = 0; i < numBuffers; ++i)
1511	{
1512		bufferCreateInfo.size = bufferSizes[i];
1513		buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
1514
1515		// query opaque capture address before binding memory
1516		if (useKHR)
1517		{
1518			bufferDeviceAddressInfo.buffer = **buffers[i];
1519			opaqueBufferAddrs[i] = vk.getBufferOpaqueCaptureAddress(device, &bufferDeviceAddressInfo);
1520		}
1521
1522		allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device, getBufferMemoryRequirements(vk, device, **buffers[i]), MemoryRequirement::HostVisible, &allocFlagsInfo));
1523
1524		if (useKHR)
1525		{
1526			deviceMemoryOpaqueCaptureAddressInfo.memory = allocations[i]->getMemory();
1527			opaqueMemoryAddrs[i] = vk.getDeviceMemoryOpaqueCaptureAddress(device, &deviceMemoryOpaqueCaptureAddressInfo);
1528		}
1529
1530		VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
1531	}
1532
1533	for (deUint32 i = 0; i < numBuffers; ++i)
1534	{
1535		bufferDeviceAddressInfo.buffer = **buffers[i];
1536		gpuAddrs[i] = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
1537	}
1538	buffers.clear();
1539	buffers.resize(numBuffers);
1540	allocations.clear();
1541	allocations.resize(numBuffers);
1542
1543#ifndef CTS_USES_VULKANSC
1544	bufferCreateInfo.pNext = useKHR ? (void *)&bufferOpaqueCaptureAddressCreateInfo : (void *)&addressCreateInfoEXT;
1545#else
1546	bufferCreateInfo.pNext = (void *)&bufferOpaqueCaptureAddressCreateInfo;
1547#endif
1548
1549	for (deInt32 i = numBuffers-1; i >= 0; --i)
1550	{
1551#ifndef CTS_USES_VULKANSC
1552		addressCreateInfoEXT.deviceAddress = gpuAddrs[i];
1553#endif
1554		bufferOpaqueCaptureAddressCreateInfo.opaqueCaptureAddress = opaqueBufferAddrs[i];
1555		memoryOpaqueCaptureAddressAllocateInfo.opaqueCaptureAddress = opaqueMemoryAddrs[i];
1556
1557		bufferCreateInfo.size = bufferSizes[i];
1558		buffers[i] = VkBufferSp(new Unique<VkBuffer>(createBuffer(vk, device, &bufferCreateInfo)));
1559		allocations[i] = AllocationSp(allocateExtended(vki, vk, physDevice, device, getBufferMemoryRequirements(vk, device, **buffers[i]), MemoryRequirement::HostVisible, &allocFlagsInfo));
1560		VK_CHECK(vk.bindBufferMemory(device, **buffers[i], allocations[i]->getMemory(), 0));
1561
1562		bufferDeviceAddressInfo.buffer = **buffers[i];
1563		VkDeviceSize newAddr = vk.getBufferDeviceAddress(device, &bufferDeviceAddressInfo);
1564
1565		if (newAddr != gpuAddrs[i])
1566			return tcu::TestStatus(QP_TEST_RESULT_FAIL, "address mismatch");
1567	}
1568
1569	return tcu::TestStatus(QP_TEST_RESULT_PASS, qpGetTestResultName(QP_TEST_RESULT_PASS));
1570}
1571
1572}	// anonymous
1573
1574tcu::TestCaseGroup*	createBufferDeviceAddressTests (tcu::TestContext& testCtx)
1575{
1576	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(testCtx, "buffer_device_address"));
1577
1578	typedef struct
1579	{
1580		deUint32				count;
1581		const char*				name;
1582	} TestGroupCase;
1583
1584	TestGroupCase setCases[] =
1585	{
1586		{ 0,	"set0"},
1587		{ 3,	"set3"},
1588		{ 7,	"set7"},
1589		{ 15,	"set15"},
1590		{ 31,	"set31"},
1591	};
1592
1593	TestGroupCase depthCases[] =
1594	{
1595		{ 1,	"depth1"},
1596		{ 2,	"depth2"},
1597		{ 3,	"depth3"},
1598	};
1599
1600	TestGroupCase baseCases[] =
1601	{
1602		{ BASE_UBO,	"baseubo"},
1603		{ BASE_SSBO,"basessbo"},
1604	};
1605
1606	TestGroupCase cvtCases[] =
1607	{
1608		// load reference
1609		{ CONVERT_NONE,			"load"},
1610		// load and convert reference
1611		{ CONVERT_UINT64,		"convert"},
1612		// load and convert reference to uvec2
1613		{ CONVERT_UVEC2,		"convertuvec2"},
1614		// load, convert and compare references as uint64_t
1615		{ CONVERT_U64CMP,		"convertchecku64"},
1616		// load, convert and compare references as uvec2
1617		{ CONVERT_UVEC2CMP,		"convertcheckuv2"},
1618		// load reference as uint64_t and convert it to uvec2
1619		{ CONVERT_UVEC2TOU64,	"crossconvertu2p"},
1620		// load reference as uvec2 and convert it to uint64_t
1621		{ CONVERT_U64TOUVEC2,	"crossconvertp2u"},
1622	};
1623
1624	TestGroupCase storeCases[] =
1625	{
1626		// don't store intermediate reference
1627		{ 0,	"nostore"},
1628		// store intermediate reference
1629		{ 1,	"store"},
1630	};
1631
1632	TestGroupCase btCases[] =
1633	{
1634		// single buffer
1635		{ BT_SINGLE,	"single"},
1636		// multiple buffers
1637		{ BT_MULTI,		"multi"},
1638		// multiple buffers and VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT_EXT
1639		{ BT_REPLAY,	"replay"},
1640	};
1641
1642	TestGroupCase layoutCases[] =
1643	{
1644		{ LAYOUT_STD140,	"std140"},
1645		{ LAYOUT_SCALAR,	"scalar"},
1646	};
1647
1648	TestGroupCase stageCases[] =
1649	{
1650		{ STAGE_COMPUTE,	"comp"},
1651		{ STAGE_FRAGMENT,	"frag"},
1652		{ STAGE_VERTEX,		"vert"},
1653#if ENABLE_RAYTRACING
1654		// raygen
1655		{ STAGE_RAYGEN,		"rgen"},
1656#endif
1657	};
1658
1659	TestGroupCase offsetCases[] =
1660	{
1661		{ OFFSET_ZERO,		"offset_zero"},
1662		{ OFFSET_NONZERO,	"offset_nonzero"},
1663	};
1664
1665	for (int setNdx = 0; setNdx < DE_LENGTH_OF_ARRAY(setCases); setNdx++)
1666	{
1667		de::MovePtr<tcu::TestCaseGroup> setGroup(new tcu::TestCaseGroup(testCtx, setCases[setNdx].name));
1668		for (int depthNdx = 0; depthNdx < DE_LENGTH_OF_ARRAY(depthCases); depthNdx++)
1669		{
1670			de::MovePtr<tcu::TestCaseGroup> depthGroup(new tcu::TestCaseGroup(testCtx, depthCases[depthNdx].name));
1671			for (int baseNdx = 0; baseNdx < DE_LENGTH_OF_ARRAY(baseCases); baseNdx++)
1672			{
1673				de::MovePtr<tcu::TestCaseGroup> baseGroup(new tcu::TestCaseGroup(testCtx, baseCases[baseNdx].name));
1674				for (int cvtNdx = 0; cvtNdx < DE_LENGTH_OF_ARRAY(cvtCases); cvtNdx++)
1675				{
1676					de::MovePtr<tcu::TestCaseGroup> cvtGroup(new tcu::TestCaseGroup(testCtx, cvtCases[cvtNdx].name));
1677					for (int storeNdx = 0; storeNdx < DE_LENGTH_OF_ARRAY(storeCases); storeNdx++)
1678					{
1679						de::MovePtr<tcu::TestCaseGroup> storeGroup(new tcu::TestCaseGroup(testCtx, storeCases[storeNdx].name));
1680						for (int btNdx = 0; btNdx < DE_LENGTH_OF_ARRAY(btCases); btNdx++)
1681						{
1682							de::MovePtr<tcu::TestCaseGroup> btGroup(new tcu::TestCaseGroup(testCtx, btCases[btNdx].name));
1683							for (int layoutNdx = 0; layoutNdx < DE_LENGTH_OF_ARRAY(layoutCases); layoutNdx++)
1684							{
1685								de::MovePtr<tcu::TestCaseGroup> layoutGroup(new tcu::TestCaseGroup(testCtx, layoutCases[layoutNdx].name));
1686								for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1687								{
1688									for (int offsetNdx = 0; offsetNdx < DE_LENGTH_OF_ARRAY(offsetCases); offsetNdx++)
1689									{
1690										CaseDef c =
1691										{
1692											setCases[setNdx].count,						// deUint32 set;
1693											depthCases[depthNdx].count,					// deUint32 depth;
1694											(Base)baseCases[baseNdx].count,				// Base base;
1695											(Stage)stageCases[stageNdx].count,			// Stage stage;
1696											(Convert)cvtCases[cvtNdx].count,			// Convert convertUToPtr;
1697											!!storeCases[storeNdx].count,				// bool storeInLocal;
1698											(BufType)btCases[btNdx].count,				// BufType bufType;
1699											(Layout)layoutCases[layoutNdx].count,		// Layout layout;
1700											(MemoryOffset)offsetCases[offsetNdx].count, // Memory Offset;
1701										};
1702
1703										// Skip more complex test cases for most descriptor sets, to reduce runtime.
1704										if (c.set != 3 && (c.depth == 3 || c.layout != LAYOUT_STD140))
1705											continue;
1706
1707										// Memory offset tests are only for single buffer test cases.
1708										if (c.memoryOffset == OFFSET_NONZERO && c.bufType != BT_SINGLE)
1709											continue;
1710
1711										std::ostringstream caseName;
1712										caseName << stageCases[stageNdx].name;
1713										if (c.memoryOffset == OFFSET_NONZERO)
1714											caseName << "_offset_nonzero";
1715
1716										layoutGroup->addChild(new BufferAddressTestCase(testCtx, caseName.str().c_str(), c));
1717									}
1718								}
1719								btGroup->addChild(layoutGroup.release());
1720							}
1721							storeGroup->addChild(btGroup.release());
1722						}
1723						cvtGroup->addChild(storeGroup.release());
1724					}
1725					baseGroup->addChild(cvtGroup.release());
1726				}
1727				depthGroup->addChild(baseGroup.release());
1728			}
1729			setGroup->addChild(depthGroup.release());
1730		}
1731		group->addChild(setGroup.release());
1732	}
1733
1734	de::MovePtr<tcu::TestCaseGroup> capGroup(new tcu::TestCaseGroup(testCtx, "capture_replay_stress"));
1735	for (deUint32 i = 0; i < 10; ++i)
1736	{
1737		capGroup->addChild(new CaptureReplayTestCase(testCtx, (std::string("seed_") + de::toString(i)).c_str(), i));
1738	}
1739	group->addChild(capGroup.release());
1740	return group.release();
1741}
1742
1743}	// BindingModel
1744}	// vkt
1745