1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017-2019 The Khronos Group Inc.
6  * Copyright (c) 2018-2019 NVIDIA Corporation
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *	  http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Vulkan Memory Model tests
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktMemoryModelTests.hpp"
26 #include "vktMemoryModelPadding.hpp"
27 #include "vktMemoryModelSharedLayout.hpp"
28 #include "vktAmberTestCase.hpp"
29 
30 #include "vkBufferWithMemory.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkQueryUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 #include "vktTestCase.hpp"
39 
40 #include "deDefs.h"
41 #include "deMath.h"
42 #include "deSharedPtr.hpp"
43 #include "deString.h"
44 
45 #include "tcuTestCase.hpp"
46 #include "tcuTestLog.hpp"
47 
48 #include <string>
49 #include <sstream>
50 
51 namespace vkt
52 {
53 namespace MemoryModel
54 {
55 namespace
56 {
57 using namespace vk;
58 using namespace std;
59 
60 typedef enum
61 {
62 	TT_MP = 0,  // message passing
63 	TT_WAR, // write-after-read hazard
64 } TestType;
65 
66 typedef enum
67 {
68 	ST_FENCE_FENCE = 0,
69 	ST_FENCE_ATOMIC,
70 	ST_ATOMIC_FENCE,
71 	ST_ATOMIC_ATOMIC,
72 	ST_CONTROL_BARRIER,
73 	ST_CONTROL_AND_MEMORY_BARRIER,
74 } SyncType;
75 
76 typedef enum
77 {
78 	SC_BUFFER = 0,
79 	SC_IMAGE,
80 	SC_WORKGROUP,
81 	SC_PHYSBUFFER,
82 } StorageClass;
83 
84 typedef enum
85 {
86 	SCOPE_DEVICE = 0,
87 	SCOPE_QUEUEFAMILY,
88 	SCOPE_WORKGROUP,
89 	SCOPE_SUBGROUP,
90 } Scope;
91 
92 typedef enum
93 {
94 	STAGE_COMPUTE = 0,
95 	STAGE_VERTEX,
96 	STAGE_FRAGMENT,
97 } Stage;
98 
99 typedef enum
100 {
101 	DATA_TYPE_UINT = 0,
102 	DATA_TYPE_UINT64,
103 	DATA_TYPE_FLOAT32,
104 	DATA_TYPE_FLOAT64,
105 } DataType;
106 
107 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
108 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
109 
110 struct CaseDef
111 {
112 	bool payloadMemLocal;
113 	bool guardMemLocal;
114 	bool coherent;
115 	bool core11;
116 	bool atomicRMW;
117 	TestType testType;
118 	StorageClass payloadSC;
119 	StorageClass guardSC;
120 	Scope scope;
121 	SyncType syncType;
122 	Stage stage;
123 	DataType dataType;
124 	bool transitive;
125 	bool transitiveVis;
126 };
127 
128 class MemoryModelTestInstance : public TestInstance
129 {
130 public:
131 						MemoryModelTestInstance	(Context& context, const CaseDef& data);
132 						~MemoryModelTestInstance	(void);
133 	tcu::TestStatus		iterate				(void);
134 private:
135 	CaseDef			m_data;
136 
137 	enum
138 	{
139 		WIDTH = 256,
140 		HEIGHT = 256
141 	};
142 };
143 
MemoryModelTestInstance(Context& context, const CaseDef& data)144 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
145 	: vkt::TestInstance		(context)
146 	, m_data				(data)
147 {
148 }
149 
~MemoryModelTestInstance(void)150 MemoryModelTestInstance::~MemoryModelTestInstance (void)
151 {
152 }
153 
154 class MemoryModelTestCase : public TestCase
155 {
156 	public:
157 								MemoryModelTestCase		(tcu::TestContext& context, const char* name, const CaseDef data);
158 								~MemoryModelTestCase	(void);
159 	virtual	void				initPrograms		(SourceCollections& programCollection) const;
160 	virtual	void				initProgramsTransitive(SourceCollections& programCollection) const;
161 	virtual TestInstance*		createInstance		(Context& context) const;
162 	virtual void				checkSupport		(Context& context) const;
163 
164 private:
165 	CaseDef					m_data;
166 };
167 
MemoryModelTestCase(tcu::TestContext& context, const char* name, const CaseDef data)168 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const CaseDef data)
169 	: vkt::TestCase	(context, name)
170 	, m_data		(data)
171 {
172 }
173 
~MemoryModelTestCase(void)174 MemoryModelTestCase::~MemoryModelTestCase	(void)
175 {
176 }
177 
checkSupport(Context& context) const178 void MemoryModelTestCase::checkSupport(Context& context) const
179 {
180 	if (!context.contextSupports(vk::ApiVersion(0, 1, 1, 0)))
181 	{
182 		TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
183 	}
184 
185 	if (!m_data.core11)
186 	{
187 		if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
188 		{
189 			TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
190 		}
191 
192 		if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
193 		{
194 			TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
195 		}
196 	}
197 
198 	if (m_data.scope == SCOPE_SUBGROUP)
199 	{
200 		// Check for subgroup support for scope_subgroup tests.
201 		VkPhysicalDeviceSubgroupProperties subgroupProperties;
202 		subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
203 		subgroupProperties.pNext = DE_NULL;
204 		subgroupProperties.supportedOperations = 0;
205 
206 		VkPhysicalDeviceProperties2 properties;
207 		properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
208 		properties.pNext = &subgroupProperties;
209 
210 		context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
211 
212 		if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
213 			!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
214 			!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
215 		{
216 			TCU_THROW(NotSupportedError, "Subgroup features not supported");
217 		}
218 
219 		VkShaderStageFlags stage= VK_SHADER_STAGE_COMPUTE_BIT;
220 		if (m_data.stage == STAGE_VERTEX)
221 		{
222 			stage = VK_SHADER_STAGE_VERTEX_BIT;
223 		}
224 		else if (m_data.stage == STAGE_COMPUTE)
225 		{
226 			stage = VK_SHADER_STAGE_COMPUTE_BIT;
227 		}
228 		else if (m_data.stage == STAGE_FRAGMENT)
229 		{
230 			stage = VK_SHADER_STAGE_FRAGMENT_BIT;
231 		}
232 
233 		if((subgroupProperties.supportedStages & stage)==0)
234 		{
235 			TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
236 		}
237 	}
238 	if (m_data.dataType == DATA_TYPE_UINT64)
239 	{
240 		if (!context.getDeviceFeatures().shaderInt64)
241 		{
242 			TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
243 		}
244 		if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
245 			(m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER))
246 		{
247 			TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
248 		}
249 		if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
250 			m_data.guardSC == SC_WORKGROUP)
251 		{
252 			TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
253 		}
254 	}
255 
256 	if (m_data.dataType == DATA_TYPE_FLOAT32)
257 	{
258 		if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
259 			TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
260 
261 		if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
262 			(!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics))
263 		{
264 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
265 		}
266 
267 		if (m_data.guardSC == SC_IMAGE && (!context.getShaderAtomicFloatFeaturesEXT().shaderImageFloat32Atomics))
268 		{
269 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point image atomic operations not supported");
270 		}
271 
272 		if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics))
273 		{
274 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
275 		}
276 	}
277 
278 	if (m_data.dataType == DATA_TYPE_FLOAT64)
279 	{
280 		if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
281 			TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
282 
283 		if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
284 			(!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics))
285 		{
286 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
287 		}
288 
289 		if (m_data.guardSC == SC_IMAGE || m_data.payloadSC == SC_IMAGE)
290 		{
291 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point image atomic operations not supported");
292 		}
293 
294 		if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics))
295 		{
296 			TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
297 		}
298 	}
299 
300 	if (m_data.transitive &&
301 		!context.getVulkanMemoryModelFeatures().vulkanMemoryModelAvailabilityVisibilityChains)
302 		TCU_THROW(NotSupportedError, "vulkanMemoryModelAvailabilityVisibilityChains not supported");
303 
304 	if ((m_data.payloadSC == SC_PHYSBUFFER || m_data.guardSC == SC_PHYSBUFFER) && !context.isBufferDeviceAddressSupported())
305 		TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
306 
307 	if (m_data.stage == STAGE_VERTEX)
308 	{
309 		if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
310 		{
311 			TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
312 		}
313 	}
314 	if (m_data.stage == STAGE_FRAGMENT)
315 	{
316 		if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
317 		{
318 			TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
319 		}
320 	}
321 }
322 
323 
initPrograms(SourceCollections& programCollection) const324 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
325 {
326 	if (m_data.transitive)
327 	{
328 		initProgramsTransitive(programCollection);
329 		return;
330 	}
331 	DE_ASSERT(!m_data.transitiveVis);
332 
333 	Scope invocationMapping = m_data.scope;
334 	if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
335 		(m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
336 	{
337 		invocationMapping = SCOPE_WORKGROUP;
338 	}
339 
340 	const char *scopeStr;
341 	switch (m_data.scope)
342 	{
343 	default: DE_ASSERT(0); // fall through
344 	case SCOPE_DEVICE:		scopeStr = "gl_ScopeDevice"; break;
345 	case SCOPE_QUEUEFAMILY:	scopeStr = "gl_ScopeQueueFamily"; break;
346 	case SCOPE_WORKGROUP:	scopeStr = "gl_ScopeWorkgroup"; break;
347 	case SCOPE_SUBGROUP:	scopeStr = "gl_ScopeSubgroup"; break;
348 	}
349 
350 	const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
351 		(m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint";
352 	const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
353 
354 	// Construct storageSemantics strings. Both release and acquire
355 	// always have the payload storage class. They only include the
356 	// guard storage class if they're using FENCE for that side of the
357 	// sync.
358 	std::stringstream storageSemanticsRelease;
359 	switch (m_data.payloadSC)
360 	{
361 	default: DE_ASSERT(0); // fall through
362 	case SC_PHYSBUFFER: // fall through
363 	case SC_BUFFER:		storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
364 	case SC_IMAGE:		storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
365 	case SC_WORKGROUP:	storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
366 	}
367 	std::stringstream storageSemanticsAcquire;
368 	storageSemanticsAcquire << storageSemanticsRelease.str();
369 	if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
370 	{
371 		switch (m_data.guardSC)
372 		{
373 		default: DE_ASSERT(0); // fall through
374 		case SC_PHYSBUFFER: // fall through
375 		case SC_BUFFER:		storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
376 		case SC_IMAGE:		storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
377 		case SC_WORKGROUP:	storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
378 		}
379 	}
380 	if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
381 	{
382 		switch (m_data.guardSC)
383 		{
384 		default: DE_ASSERT(0); // fall through
385 		case SC_PHYSBUFFER: // fall through
386 		case SC_BUFFER:		storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
387 		case SC_IMAGE:		storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
388 		case SC_WORKGROUP:	storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
389 		}
390 	}
391 
392 	std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
393 
394 	semanticsRelease << "gl_SemanticsRelease";
395 	semanticsAcquire << "gl_SemanticsAcquire";
396 	semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
397 	if (!m_data.coherent && m_data.testType != TT_WAR)
398 	{
399 		DE_ASSERT(!m_data.core11);
400 		semanticsRelease << " | gl_SemanticsMakeAvailable";
401 		semanticsAcquire << " | gl_SemanticsMakeVisible";
402 		semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
403 	}
404 
405 	std::stringstream css;
406 	css << "#version 450 core\n";
407 	if (!m_data.core11)
408 	{
409 		css << "#pragma use_vulkan_memory_model\n";
410 	}
411 	if (!intType)
412 	{
413 		css <<
414 			"#extension GL_EXT_shader_atomic_float : enable\n"
415 			"#extension GL_KHR_memory_scope_semantics : enable\n";
416 	}
417 	css <<
418 		"#extension GL_KHR_shader_subgroup_basic : enable\n"
419 		"#extension GL_KHR_shader_subgroup_shuffle : enable\n"
420 		"#extension GL_KHR_shader_subgroup_ballot : enable\n"
421 		"#extension GL_KHR_memory_scope_semantics : enable\n"
422 		"#extension GL_ARB_gpu_shader_int64 : enable\n"
423 		"#extension GL_EXT_buffer_reference : enable\n"
424 		"// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
425 		"layout(constant_id = 0) const int DIM = 1;\n"
426 		"layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
427 		"struct S { " << typeStr << " x[DIM*DIM]; };\n";
428 
429 	if (m_data.stage == STAGE_COMPUTE)
430 	{
431 		css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
432 	}
433 
434 	const char *memqual = "";
435 	if (m_data.coherent)
436 	{
437 		if (m_data.core11)
438 		{
439 			// Vulkan 1.1 only has "coherent", use it regardless of scope
440 			memqual = "coherent";
441 		}
442 		else
443 		{
444 			switch (m_data.scope)
445 			{
446 			default: DE_ASSERT(0); // fall through
447 			case SCOPE_DEVICE:		memqual = "devicecoherent"; break;
448 			case SCOPE_QUEUEFAMILY:	memqual = "queuefamilycoherent"; break;
449 			case SCOPE_WORKGROUP:	memqual = "workgroupcoherent"; break;
450 			case SCOPE_SUBGROUP:	memqual = "subgroupcoherent"; break;
451 			}
452 		}
453 	}
454 	else
455 	{
456 		DE_ASSERT(!m_data.core11);
457 		memqual = "nonprivate";
458 	}
459 
460 	stringstream pushConstMembers;
461 
462 	// Declare payload, guard, and fail resources
463 	switch (m_data.payloadSC)
464 	{
465 	default: DE_ASSERT(0); // fall through
466 	case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
467 						pushConstMembers << "   layout(offset = 0) PayloadRef payloadref;\n"; break;
468 	case SC_BUFFER:		css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
469 	case SC_IMAGE:
470 		if (intType)
471 			css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
472 		else
473 			css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
474 		break;
475 	case SC_WORKGROUP:	css << "shared S payload;\n"; break;
476 	}
477 	if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
478 	{
479 		// The guard variable is only accessed with atomics and need not be declared coherent.
480 		switch (m_data.guardSC)
481 		{
482 		default: DE_ASSERT(0); // fall through
483 		case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
484 							pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break;
485 		case SC_BUFFER:		css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
486 		case SC_IMAGE:
487 			if (intType)
488 				css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
489 			else
490 				css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
491 			break;
492 		case SC_WORKGROUP:	css << "shared S guard;\n"; break;
493 		}
494 	}
495 
496 	css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
497 
498 	if (pushConstMembers.str().size() != 0) {
499 		css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
500 	}
501 
502 	css <<
503 		"void main()\n"
504 		"{\n"
505 		"   bool pass = true;\n"
506 		"   bool skip = false;\n";
507 
508 	if (m_data.payloadSC == SC_PHYSBUFFER)
509 		css << "   " << memqual << " PayloadRef payload = payloadref;\n";
510 
511 	if (m_data.stage == STAGE_FRAGMENT)
512 	{
513 		// Kill helper invocations so they don't load outside the bounds of the SSBO.
514 		// Helper pixels are also initially "active" and if a thread gets one as its
515 		// partner in SCOPE_SUBGROUP mode, it can't run the test.
516 		css << "   if (gl_HelperInvocation) { return; }\n";
517 	}
518 
519 	// Compute coordinates based on the storage class and scope.
520 	// For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
521 	// For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
522 	// For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
523 	switch (invocationMapping)
524 	{
525 	default: DE_ASSERT(0); // fall through
526 	case SCOPE_SUBGROUP:
527 		// If the partner invocation isn't active, the shuffle below will be undefined. Bail.
528 		css << "   uvec4 ballot = subgroupBallot(true);\n"
529 			   "   if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
530 
531 		switch (m_data.stage)
532 		{
533 		default: DE_ASSERT(0); // fall through
534 		case STAGE_COMPUTE:
535 			css <<
536 			"   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
537 			"   ivec2 partnerLocalId    = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
538 			"   uint sharedCoord        = localId.y * DIM + localId.x;\n"
539 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
540 			"   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
541 			"   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
542 			"   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
543 			"   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
544 			break;
545 		case STAGE_VERTEX:
546 			css <<
547 			"   uint bufferCoord        = gl_VertexIndex;\n"
548 			"   uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
549 			"   ivec2 imageCoord        = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
550 			"   ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
551 			"   gl_PointSize            = 1.0f;\n"
552 			"   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
553 			break;
554 		case STAGE_FRAGMENT:
555 			css <<
556 			"   ivec2 localId        = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
557 			"   ivec2 groupId        = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
558 			"   ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
559 			"   ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
560 			"   uint sharedCoord     = localId.y * DIM + localId.x;\n"
561 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
562 			"   uint bufferCoord     = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
563 			"   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
564 			"   ivec2 imageCoord     = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
565 			"   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
566 			break;
567 		}
568 		break;
569 	case SCOPE_WORKGROUP:
570 		css <<
571 		"   ivec2 localId           = ivec2(gl_LocalInvocationID.xy);\n"
572 		"   ivec2 partnerLocalId    = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
573 		"   uint sharedCoord        = localId.y * DIM + localId.x;\n"
574 		"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
575 		"   uint bufferCoord        = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
576 		"   uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
577 		"   ivec2 imageCoord        = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
578 		"   ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
579 		break;
580 	case SCOPE_QUEUEFAMILY:
581 	case SCOPE_DEVICE:
582 		switch (m_data.stage)
583 		{
584 		default: DE_ASSERT(0); // fall through
585 		case STAGE_COMPUTE:
586 			css <<
587 			"   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
588 			"   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
589 			"   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
590 			"   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
591 			"   ivec2 imageCoord        = globalId;\n"
592 			"   ivec2 partnerImageCoord = partnerGlobalId;\n";
593 			break;
594 		case STAGE_VERTEX:
595 			css <<
596 			"   ivec2 globalId          = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
597 			"   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
598 			"   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
599 			"   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
600 			"   ivec2 imageCoord        = globalId;\n"
601 			"   ivec2 partnerImageCoord = partnerGlobalId;\n"
602 			"   gl_PointSize            = 1.0f;\n"
603 			"   gl_Position             = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
604 			break;
605 		case STAGE_FRAGMENT:
606 			css <<
607 			"   ivec2 localId       = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
608 			"   ivec2 groupId       = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
609 			"   ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
610 			"   ivec2 partnerGroupId = groupId;\n"
611 			"   uint sharedCoord    = localId.y * DIM + localId.x;\n"
612 			"   uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
613 			"   uint bufferCoord    = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
614 			"   uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
615 			"   ivec2 imageCoord    = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
616 			"   ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
617 			break;
618 		}
619 		break;
620 	}
621 
622 	// Initialize shared memory, followed by a barrier
623 	if (m_data.payloadSC == SC_WORKGROUP)
624 	{
625 		css << "   payload.x[sharedCoord] = 0;\n";
626 	}
627 	if (m_data.guardSC == SC_WORKGROUP)
628 	{
629 		css << "   guard.x[sharedCoord] = 0;\n";
630 	}
631 	if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
632 	{
633 		switch (invocationMapping)
634 		{
635 		default: DE_ASSERT(0); // fall through
636 		case SCOPE_SUBGROUP:	css << "   subgroupBarrier();\n"; break;
637 		case SCOPE_WORKGROUP:	css << "   barrier();\n"; break;
638 		}
639 	}
640 
641 	if (m_data.testType == TT_MP)
642 	{
643 		if (intType)
644 		{
645 			// Store payload
646 			switch (m_data.payloadSC)
647 			{
648 			default: DE_ASSERT(0); // fall through
649 			case SC_PHYSBUFFER: // fall through
650 			case SC_BUFFER:		css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
651 			case SC_IMAGE:		css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
652 			case SC_WORKGROUP:	css << "   payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
653 			}
654 		}
655 		else
656 		{
657 			// Store payload
658 			switch (m_data.payloadSC)
659 			{
660 			default: DE_ASSERT(0); // fall through
661 			case SC_PHYSBUFFER: // fall through
662 			case SC_BUFFER:		css << "   payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break;
663 			case SC_IMAGE:		css << "   imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x))>>31)), 0, 0, 0)); \n"; break;
664 			case SC_WORKGROUP:	css << "   payload.x[sharedCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerSharedCoord])))>>31);\n"; break;
665 			}
666 		}
667 	}
668 	else
669 	{
670 		DE_ASSERT(m_data.testType == TT_WAR);
671 		// Load payload
672 		switch (m_data.payloadSC)
673 		{
674 		default: DE_ASSERT(0); // fall through
675 		case SC_PHYSBUFFER: // fall through
676 		case SC_BUFFER:		css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
677 		case SC_IMAGE:		css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
678 		case SC_WORKGROUP:	css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
679 		}
680 	}
681 	if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
682 	{
683 		// Acquire and release separate from control barrier
684 		css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
685 			   "   controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
686 			   "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
687 	}
688 	else if (m_data.syncType == ST_CONTROL_BARRIER)
689 	{
690 		// Control barrier performs both acquire and release
691 		css << "   controlBarrier(" << scopeStr << ", " << scopeStr << ", "
692 									<< storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
693 									<< semanticsAcquireRelease.str() << ");\n";
694 	}
695 	else
696 	{
697 		// Don't type cast for 64 bit image atomics
698 		const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
699 		// Release barrier
700 		std::stringstream atomicReleaseSemantics;
701 		if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
702 		{
703 			css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
704 			atomicReleaseSemantics << ", 0, 0";
705 		}
706 		else
707 		{
708 			atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
709 		}
710 		// Atomic store guard
711 		if (m_data.atomicRMW)
712 		{
713 			switch (m_data.guardSC)
714 			{
715 			default: DE_ASSERT(0); // fall through
716 			case SC_PHYSBUFFER: // fall through
717 			case SC_BUFFER:		css << "   atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
718 			case SC_IMAGE:		css << "   imageAtomicExchange(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
719 			case SC_WORKGROUP:	css << "   atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
720 			}
721 		}
722 		else
723 		{
724 			switch (m_data.guardSC)
725 			{
726 			default: DE_ASSERT(0); // fall through
727 			case SC_PHYSBUFFER: // fall through
728 			case SC_BUFFER:		css << "   atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
729 			case SC_IMAGE:		css << "   imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
730 			case SC_WORKGROUP:	css << "   atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
731 			}
732 		}
733 
734 		std::stringstream atomicAcquireSemantics;
735 		if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
736 		{
737 			atomicAcquireSemantics << ", 0, 0";
738 		}
739 		else
740 		{
741 			atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
742 		}
743 		// Atomic load guard
744 		if (m_data.atomicRMW)
745 		{
746 			switch (m_data.guardSC)
747 			{
748 			default: DE_ASSERT(0); // fall through
749 			case SC_PHYSBUFFER: // fall through
750 			case SC_BUFFER: css << "   skip = atomicExchange(guard.x[partnerBufferCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
751 			case SC_IMAGE:  css << "   skip = imageAtomicExchange(guard, partnerImageCoord, " << typeCastStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
752 			case SC_WORKGROUP: css << "   skip = atomicExchange(guard.x[partnerSharedCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
753 			}
754 		} else
755 		{
756 			switch (m_data.guardSC)
757 			{
758 			default: DE_ASSERT(0); // fall through
759 			case SC_PHYSBUFFER: // fall through
760 			case SC_BUFFER:		css << "   skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
761 			case SC_IMAGE:		css << "   skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
762 			case SC_WORKGROUP:	css << "   skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
763 			}
764 		}
765 		// Acquire barrier
766 		if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
767 		{
768 			css << "   memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
769 		}
770 	}
771 	if (m_data.testType == TT_MP)
772 	{
773 		// Load payload
774 		switch (m_data.payloadSC)
775 		{
776 		default: DE_ASSERT(0); // fall through
777 		case SC_PHYSBUFFER: // fall through
778 		case SC_BUFFER:		css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
779 		case SC_IMAGE:		css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
780 		case SC_WORKGROUP:	css << "   " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
781 		}
782 		css <<
783 			"   if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
784 			"}\n";
785 	}
786 	else
787 	{
788 		DE_ASSERT(m_data.testType == TT_WAR);
789 		// Store payload, only if the partner invocation has already done its read
790 		css << "   if (!skip) {\n   ";
791 		switch (m_data.payloadSC)
792 		{
793 		default: DE_ASSERT(0); // fall through
794 		case SC_PHYSBUFFER: // fall through
795 		case SC_BUFFER:		css << "   payload.x[bufferCoord] = " << typeStr << "(bufferCoord);\n"; break;
796 		case SC_IMAGE:
797 			if (intType) {
798 				css << "   imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n";
799 			}
800 			else {
801 				css << "   imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord), 0, 0, 0));\n";
802 			}
803 			break;
804 		case SC_WORKGROUP:	css << "   payload.x[sharedCoord] = " << typeStr << "(bufferCoord);\n"; break;
805 		}
806 		css <<
807 			"   }\n"
808 			"   if (r != 0) { fail.x[bufferCoord] = 1; }\n"
809 			"}\n";
810 	}
811 
812 	// Draw a fullscreen triangle strip based on gl_VertexIndex
813 	std::stringstream vss;
814 	vss <<
815 		"#version 450 core\n"
816 		"vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
817 		"void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
818 
819 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
820 
821 	switch (m_data.stage)
822 	{
823 	default: DE_ASSERT(0); // fall through
824 	case STAGE_COMPUTE:
825 		programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
826 		break;
827 	case STAGE_VERTEX:
828 		programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
829 		break;
830 	case STAGE_FRAGMENT:
831 		programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
832 		programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
833 		break;
834 	}
835 }
836 
837 
initProgramsTransitive(SourceCollections& programCollection) const838 void MemoryModelTestCase::initProgramsTransitive (SourceCollections& programCollection) const
839 {
840 	Scope invocationMapping = m_data.scope;
841 
842 	const char* typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
843 		(m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint";
844 	const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
845 
846 	// Construct storageSemantics strings. Both release and acquire
847 	// always have the payload storage class. They only include the
848 	// guard storage class if they're using FENCE for that side of the
849 	// sync.
850 	std::stringstream storageSemanticsPayload;
851 	switch (m_data.payloadSC)
852 	{
853 	default: DE_ASSERT(0); // fall through
854 	case SC_PHYSBUFFER: // fall through
855 	case SC_BUFFER:		storageSemanticsPayload << "gl_StorageSemanticsBuffer"; break;
856 	case SC_IMAGE:		storageSemanticsPayload << "gl_StorageSemanticsImage"; break;
857 	}
858 	std::stringstream storageSemanticsGuard;
859 	switch (m_data.guardSC)
860 	{
861 	default: DE_ASSERT(0); // fall through
862 	case SC_PHYSBUFFER: // fall through
863 	case SC_BUFFER:		storageSemanticsGuard << "gl_StorageSemanticsBuffer"; break;
864 	case SC_IMAGE:		storageSemanticsGuard << "gl_StorageSemanticsImage"; break;
865 	}
866 	std::stringstream storageSemanticsAll;
867 	storageSemanticsAll << storageSemanticsPayload.str() << " | " << storageSemanticsGuard.str();
868 
869 	std::stringstream css;
870 	css << "#version 450 core\n";
871 	css << "#pragma use_vulkan_memory_model\n";
872 	if (!intType)
873 	{
874 		css <<
875 			"#extension GL_EXT_shader_atomic_float : enable\n"
876 			"#extension GL_KHR_memory_scope_semantics : enable\n";
877 	}
878 	css <<
879 		"#extension GL_KHR_shader_subgroup_basic : enable\n"
880 		"#extension GL_KHR_shader_subgroup_shuffle : enable\n"
881 		"#extension GL_KHR_shader_subgroup_ballot : enable\n"
882 		"#extension GL_KHR_memory_scope_semantics : enable\n"
883 		"#extension GL_ARB_gpu_shader_int64 : enable\n"
884 		"#extension GL_EXT_buffer_reference : enable\n"
885 		"// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
886 		"layout(constant_id = 0) const int DIM = 1;\n"
887 		"layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
888 		"shared bool sharedSkip;\n";
889 
890 	css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
891 
892 	const char *memqual = "";
893 	const char *semAvail = "";
894 	const char *semVis = "";
895 	if (m_data.coherent)
896 	{
897 		memqual = "workgroupcoherent";
898 	}
899 	else
900 	{
901 		memqual = "nonprivate";
902 		semAvail = " | gl_SemanticsMakeAvailable";
903 		semVis = " | gl_SemanticsMakeVisible";
904 	}
905 
906 	stringstream pushConstMembers;
907 
908 	// Declare payload, guard, and fail resources
909 	switch (m_data.payloadSC)
910 	{
911 	default: DE_ASSERT(0); // fall through
912 	case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
913 						pushConstMembers << "   layout(offset = 0) PayloadRef payloadref;\n"; break;
914 	case SC_BUFFER:		css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
915 	case SC_IMAGE:
916 		if (intType)
917 			css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
918 		else
919 			css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
920 		break;
921 	}
922 	// The guard variable is only accessed with atomics and need not be declared coherent.
923 	switch (m_data.guardSC)
924 	{
925 	default: DE_ASSERT(0); // fall through
926 	case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
927 						pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break;
928 	case SC_BUFFER:		css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
929 	case SC_IMAGE:
930 		if (intType)
931 			css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
932 		else
933 			css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
934 		break;
935 	}
936 
937 	css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
938 
939 	if (pushConstMembers.str().size() != 0) {
940 		css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
941 	}
942 
943 	css <<
944 		"void main()\n"
945 		"{\n"
946 		"   bool pass = true;\n"
947 		"   bool skip = false;\n"
948 		"   sharedSkip = false;\n";
949 
950 	if (m_data.payloadSC == SC_PHYSBUFFER)
951 		css << "   " << memqual << " PayloadRef payload = payloadref;\n";
952 
953 	// Compute coordinates based on the storage class and scope.
954 	switch (invocationMapping)
955 	{
956 	default: DE_ASSERT(0); // fall through
957 	case SCOPE_DEVICE:
958 		css <<
959 		"   ivec2 globalId          = ivec2(gl_GlobalInvocationID.xy);\n"
960 		"   ivec2 partnerGlobalId   = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
961 		"   uint bufferCoord        = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
962 		"   uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
963 		"   ivec2 imageCoord        = globalId;\n"
964 		"   ivec2 partnerImageCoord = partnerGlobalId;\n"
965 		"   ivec2 globalId00          = ivec2(DIM) * ivec2(gl_WorkGroupID.xy);\n"
966 		"   ivec2 partnerGlobalId00   = ivec2(DIM) * (ivec2(NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_WorkGroupID.xy));\n"
967 		"   uint bufferCoord00        = globalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId00.x;\n"
968 		"   uint partnerBufferCoord00 = partnerGlobalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId00.x;\n"
969 		"   ivec2 imageCoord00        = globalId00;\n"
970 		"   ivec2 partnerImageCoord00 = partnerGlobalId00;\n";
971 		break;
972 	}
973 
974 	// Store payload
975 	if (intType)
976 	{
977 		switch (m_data.payloadSC)
978 		{
979 		default: DE_ASSERT(0); // fall through
980 		case SC_PHYSBUFFER: // fall through
981 		case SC_BUFFER:		css << "   payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
982 		case SC_IMAGE:		css << "   imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
983 		}
984 	}
985 	else
986 	{
987 		switch (m_data.payloadSC)
988 		{
989 		default: DE_ASSERT(0); // fall through
990 		case SC_PHYSBUFFER: // fall through
991 		case SC_BUFFER:	css << "   payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break;
992 		case SC_IMAGE:	css << "   imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x)>>31))), 0, 0, 0)); \n"; break;
993 		}
994 	}
995 
996 	// Sync to other threads in the workgroup
997 	css << "   controlBarrier(gl_ScopeWorkgroup, "
998 							 "gl_ScopeWorkgroup, " <<
999 							  storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, "
1000 							 "gl_SemanticsAcquireRelease" << semAvail << ");\n";
1001 
1002 	// Device-scope release/availability in invocation(0,0)
1003 	css << "   if (all(equal(gl_LocalInvocationID.xy, ivec2(0,0)))) {\n";
1004 	const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
1005 	if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_ATOMIC_FENCE) {
1006 		switch (m_data.guardSC)
1007 		{
1008 		default: DE_ASSERT(0); // fall through
1009 		case SC_PHYSBUFFER: // fall through
1010 		case SC_BUFFER:		css << "       atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break;
1011 		case SC_IMAGE:		css << "       imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break;
1012 		}
1013 	} else {
1014 		css << "       memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1015 		switch (m_data.guardSC)
1016 		{
1017 		default: DE_ASSERT(0); // fall through
1018 		case SC_PHYSBUFFER: // fall through
1019 		case SC_BUFFER:		css << "       atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break;
1020 		case SC_IMAGE:		css << "       imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break;
1021 		}
1022 	}
1023 
1024 	// Device-scope acquire/visibility either in invocation(0,0) or in every invocation
1025 	if (!m_data.transitiveVis) {
1026 		css << "   }\n";
1027 	}
1028 	if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_FENCE_ATOMIC) {
1029 		switch (m_data.guardSC)
1030 		{
1031 		default: DE_ASSERT(0); // fall through
1032 		case SC_PHYSBUFFER: // fall through
1033 		case SC_BUFFER:		css << "       skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break;
1034 		case SC_IMAGE:		css << "       skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break;
1035 		}
1036 	} else {
1037 		switch (m_data.guardSC)
1038 		{
1039 		default: DE_ASSERT(0); // fall through
1040 		case SC_PHYSBUFFER: // fall through
1041 		case SC_BUFFER:		css << "       skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, 0, 0) == 0;\n"; break;
1042 		case SC_IMAGE:		css << "       skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, 0, 0) == 0;\n"; break;
1043 		}
1044 		css << "       memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible);\n";
1045 	}
1046 
1047 	// If invocation(0,0) did the acquire then store "skip" to shared memory and
1048 	// synchronize with the workgroup
1049 	if (m_data.transitiveVis) {
1050 		css << "       sharedSkip = skip;\n";
1051 		css << "   }\n";
1052 
1053 		css << "   controlBarrier(gl_ScopeWorkgroup, "
1054 								 "gl_ScopeWorkgroup, " <<
1055 								  storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, "
1056 								 "gl_SemanticsAcquireRelease" << semVis << ");\n";
1057 		css << "   skip = sharedSkip;\n";
1058 	}
1059 
1060 	// Load payload
1061 	switch (m_data.payloadSC)
1062 	{
1063 	default: DE_ASSERT(0); // fall through
1064 	case SC_PHYSBUFFER: // fall through
1065 	case SC_BUFFER:		css << "   " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
1066 	case SC_IMAGE:		css << "   " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
1067 	}
1068 	css <<
1069 		"   if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
1070 		"}\n";
1071 
1072 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
1073 
1074 	programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
1075 }
1076 
createInstance(Context& context) const1077 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
1078 {
1079 	return new MemoryModelTestInstance(context, m_data);
1080 }
1081 
iterate(void)1082 tcu::TestStatus MemoryModelTestInstance::iterate (void)
1083 {
1084 	const DeviceInterface&	vk						= m_context.getDeviceInterface();
1085 	const VkDevice			device					= m_context.getDevice();
1086 	Allocator&				allocator				= m_context.getDefaultAllocator();
1087 
1088 	VkPhysicalDeviceProperties2 properties;
1089 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1090 	properties.pNext = NULL;
1091 
1092 	m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
1093 
1094 	deUint32 DIM = 31;
1095 	deUint32 NUM_WORKGROUP_EACH_DIM = 8;
1096 	// If necessary, shrink workgroup size to fit HW limits
1097 	if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
1098 	{
1099 		DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
1100 	}
1101 	deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
1102 
1103 	VkDeviceSize bufferSizes[3];
1104 	de::MovePtr<BufferWithMemory> buffers[3];
1105 	vk::VkDescriptorBufferInfo bufferDescriptors[3];
1106 	de::MovePtr<BufferWithMemory> copyBuffer;
1107 
1108 	for (deUint32 i = 0; i < 3; ++i)
1109 	{
1110 		size_t elementSize = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64)? sizeof(deUint64) : sizeof(deUint32);
1111 		// buffer2 is the "fail" buffer, and is always uint
1112 		if (i == 2)
1113 			elementSize = sizeof(deUint32);
1114 		bufferSizes[i] = NUM_INVOCATIONS * elementSize;
1115 
1116 		vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1117 
1118 		bool memoryDeviceAddress = false;
1119 
1120 		bool local;
1121 		switch (i)
1122 		{
1123 		default: DE_ASSERT(0); // fall through
1124 		case 0:
1125 			if (m_data.payloadSC != SC_BUFFER && m_data.payloadSC != SC_PHYSBUFFER)
1126 				continue;
1127 			local = m_data.payloadMemLocal;
1128 			if (m_data.payloadSC == SC_PHYSBUFFER)
1129 			{
1130 				usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1131 				if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1132 					memoryDeviceAddress = true;
1133 			}
1134 			break;
1135 		case 1:
1136 			if (m_data.guardSC != SC_BUFFER && m_data.guardSC != SC_PHYSBUFFER)
1137 				continue;
1138 			local = m_data.guardMemLocal;
1139 			if (m_data.guardSC == SC_PHYSBUFFER)
1140 			{
1141 				usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1142 				if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1143 					memoryDeviceAddress = true;
1144 			}
1145 			break;
1146 		case 2: local = true; break;
1147 		}
1148 
1149 		try
1150 		{
1151 			buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1152 				vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], usageFlags),
1153 				(memoryDeviceAddress ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) |
1154 				(local ? MemoryRequirement::Local : MemoryRequirement::NonLocal)));
1155 		}
1156 		catch (const tcu::NotSupportedError&)
1157 		{
1158 			if (!local)
1159 			{
1160 				TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1161 			}
1162 			throw;
1163 		}
1164 		bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
1165 	}
1166 
1167 	// Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
1168 	try
1169 	{
1170 		copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1171 			vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1172 	}
1173 	catch (const tcu::NotSupportedError&)
1174 	{
1175 		copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1176 			vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
1177 	}
1178 
1179 	VkFormat imageFormat;
1180 	switch (m_data.dataType)
1181 	{
1182 	case DATA_TYPE_UINT:
1183 	case DATA_TYPE_UINT64:
1184 		imageFormat = VK_FORMAT_R32_UINT;
1185 		break;
1186 	case DATA_TYPE_FLOAT32:
1187 	case DATA_TYPE_FLOAT64:
1188 		imageFormat = VK_FORMAT_R32_SFLOAT;
1189 		break;
1190 	default:
1191 		TCU_FAIL("Invalid data type.");
1192 	}
1193 
1194 	const VkImageCreateInfo			imageCreateInfo			=
1195 	{
1196 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	// VkStructureType		sType;
1197 		DE_NULL,								// const void*			pNext;
1198 		(VkImageCreateFlags)0u,					// VkImageCreateFlags	flags;
1199 		VK_IMAGE_TYPE_2D,						// VkImageType			imageType;
1200 		imageFormat,							// VkFormat				format;
1201 		{
1202 			DIM*NUM_WORKGROUP_EACH_DIM,	// deUint32	width;
1203 			DIM*NUM_WORKGROUP_EACH_DIM,	// deUint32	height;
1204 			1u		// deUint32	depth;
1205 		},										// VkExtent3D			   extent;
1206 		1u,										// deUint32				 mipLevels;
1207 		1u,										// deUint32				 arrayLayers;
1208 		VK_SAMPLE_COUNT_1_BIT,					// VkSampleCountFlagBits	samples;
1209 		VK_IMAGE_TILING_OPTIMAL,				// VkImageTiling			tiling;
1210 		VK_IMAGE_USAGE_STORAGE_BIT
1211 		| VK_IMAGE_USAGE_TRANSFER_SRC_BIT
1212 		| VK_IMAGE_USAGE_TRANSFER_DST_BIT,		// VkImageUsageFlags		usage;
1213 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode			sharingMode;
1214 		0u,										// deUint32				 queueFamilyIndexCount;
1215 		DE_NULL,								// const deUint32*		  pQueueFamilyIndices;
1216 		VK_IMAGE_LAYOUT_UNDEFINED				// VkImageLayout			initialLayout;
1217 	};
1218 	VkImageViewCreateInfo		imageViewCreateInfo		=
1219 	{
1220 		VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,	// VkStructureType			sType;
1221 		DE_NULL,									// const void*				pNext;
1222 		(VkImageViewCreateFlags)0u,					// VkImageViewCreateFlags	 flags;
1223 		DE_NULL,									// VkImage					image;
1224 		VK_IMAGE_VIEW_TYPE_2D,						// VkImageViewType			viewType;
1225 		imageFormat,								// VkFormat					format;
1226 		{
1227 			VK_COMPONENT_SWIZZLE_R,	// VkComponentSwizzle	r;
1228 			VK_COMPONENT_SWIZZLE_G,	// VkComponentSwizzle	g;
1229 			VK_COMPONENT_SWIZZLE_B,	// VkComponentSwizzle	b;
1230 			VK_COMPONENT_SWIZZLE_A	// VkComponentSwizzle	a;
1231 		},											// VkComponentMapping		 components;
1232 		{
1233 			VK_IMAGE_ASPECT_COLOR_BIT,	// VkImageAspectFlags	aspectMask;
1234 			0u,							// deUint32			  baseMipLevel;
1235 			1u,							// deUint32			  levelCount;
1236 			0u,							// deUint32			  baseArrayLayer;
1237 			1u							// deUint32			  layerCount;
1238 		}											// VkImageSubresourceRange	subresourceRange;
1239 	};
1240 
1241 
1242 	de::MovePtr<ImageWithMemory> images[2];
1243 	Move<VkImageView> imageViews[2];
1244 	vk::VkDescriptorImageInfo imageDescriptors[2];
1245 
1246 	for (deUint32 i = 0; i < 2; ++i)
1247 	{
1248 
1249 		bool local;
1250 		switch (i)
1251 		{
1252 		default: DE_ASSERT(0); // fall through
1253 		case 0:
1254 			if (m_data.payloadSC != SC_IMAGE)
1255 				continue;
1256 			local = m_data.payloadMemLocal;
1257 			break;
1258 		case 1:
1259 			if (m_data.guardSC != SC_IMAGE)
1260 				continue;
1261 			local = m_data.guardMemLocal;
1262 			break;
1263 		}
1264 
1265 		try
1266 		{
1267 			images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
1268 				vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
1269 		}
1270 		catch (const tcu::NotSupportedError&)
1271 		{
1272 			if (!local)
1273 			{
1274 				TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1275 			}
1276 			throw;
1277 		}
1278 		imageViewCreateInfo.image = **images[i];
1279 		imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
1280 
1281 		imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
1282 	}
1283 
1284 	vk::DescriptorSetLayoutBuilder layoutBuilder;
1285 
1286 	switch (m_data.payloadSC)
1287 	{
1288 	default:
1289 	case SC_BUFFER:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
1290 	case SC_IMAGE:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
1291 	}
1292 	switch (m_data.guardSC)
1293 	{
1294 	default:
1295 	case SC_BUFFER:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
1296 	case SC_IMAGE:	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
1297 	}
1298 	layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1299 
1300 	vk::Unique<vk::VkDescriptorSetLayout>	descriptorSetLayout(layoutBuilder.build(vk, device));
1301 
1302 	vk::Unique<vk::VkDescriptorPool>		descriptorPool(vk::DescriptorPoolBuilder()
1303 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
1304 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
1305 		.build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1306 	vk::Unique<vk::VkDescriptorSet>			descriptorSet		(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1307 
1308 	vk::DescriptorSetUpdateBuilder setUpdateBuilder;
1309 	switch (m_data.payloadSC)
1310 	{
1311 	default: DE_ASSERT(0); // fall through
1312 	case SC_PHYSBUFFER:
1313 	case SC_WORKGROUP:
1314 		break;
1315 	case SC_BUFFER:
1316 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1317 			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1318 		break;
1319 	case SC_IMAGE:
1320 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1321 			VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
1322 		break;
1323 	}
1324 	switch (m_data.guardSC)
1325 	{
1326 	default: DE_ASSERT(0); // fall through
1327 	case SC_PHYSBUFFER:
1328 	case SC_WORKGROUP:
1329 		break;
1330 	case SC_BUFFER:
1331 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1332 			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
1333 		break;
1334 	case SC_IMAGE:
1335 		setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1336 			VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
1337 		break;
1338 	}
1339 	setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
1340 		VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
1341 
1342 	setUpdateBuilder.update(vk, device);
1343 
1344 	const VkPushConstantRange pushConstRange =
1345 	{
1346 		allShaderStages,		// VkShaderStageFlags	stageFlags
1347 		0,						// deUint32				offset
1348 		16						// deUint32				size
1349 	};
1350 
1351 	const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1352 	{
1353 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,				// sType
1354 		DE_NULL,													// pNext
1355 		(VkPipelineLayoutCreateFlags)0,
1356 		1,															// setLayoutCount
1357 		&descriptorSetLayout.get(),									// pSetLayouts
1358 		1u,															// pushConstantRangeCount
1359 		&pushConstRange,											// pPushConstantRanges
1360 	};
1361 
1362 	Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1363 
1364 	Move<VkPipeline> pipeline;
1365 	Move<VkRenderPass> renderPass;
1366 	Move<VkFramebuffer> framebuffer;
1367 
1368 	VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1369 
1370 	const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
1371 
1372 	const vk::VkSpecializationMapEntry entries[3] =
1373 	{
1374 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
1375 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
1376 	};
1377 
1378 	const vk::VkSpecializationInfo specInfo =
1379 	{
1380 		2,						// mapEntryCount
1381 		entries,				// pMapEntries
1382 		sizeof(specData),		// dataSize
1383 		specData				// pData
1384 	};
1385 
1386 	if (m_data.stage == STAGE_COMPUTE)
1387 	{
1388 		const Unique<VkShaderModule>	shader						(createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1389 
1390 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo =
1391 		{
1392 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1393 			DE_NULL,
1394 			(VkPipelineShaderStageCreateFlags)0,
1395 			VK_SHADER_STAGE_COMPUTE_BIT,								// stage
1396 			*shader,													// shader
1397 			"main",
1398 			&specInfo,													// pSpecializationInfo
1399 		};
1400 
1401 		const VkComputePipelineCreateInfo		pipelineCreateInfo =
1402 		{
1403 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1404 			DE_NULL,
1405 			0u,															// flags
1406 			shaderCreateInfo,											// cs
1407 			*pipelineLayout,											// layout
1408 			(vk::VkPipeline)0,											// basePipelineHandle
1409 			0u,															// basePipelineIndex
1410 		};
1411 		pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1412 	}
1413 	else
1414 	{
1415 
1416 		const vk::VkSubpassDescription		subpassDesc			=
1417 		{
1418 			(vk::VkSubpassDescriptionFlags)0,
1419 			vk::VK_PIPELINE_BIND_POINT_GRAPHICS,					// pipelineBindPoint
1420 			0u,														// inputCount
1421 			DE_NULL,												// pInputAttachments
1422 			0u,														// colorCount
1423 			DE_NULL,												// pColorAttachments
1424 			DE_NULL,												// pResolveAttachments
1425 			DE_NULL,												// depthStencilAttachment
1426 			0u,														// preserveCount
1427 			DE_NULL,												// pPreserveAttachments
1428 
1429 		};
1430 		const vk::VkRenderPassCreateInfo	renderPassParams	=
1431 		{
1432 			vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// sType
1433 			DE_NULL,												// pNext
1434 			(vk::VkRenderPassCreateFlags)0,
1435 			0u,														// attachmentCount
1436 			DE_NULL,												// pAttachments
1437 			1u,														// subpassCount
1438 			&subpassDesc,											// pSubpasses
1439 			0u,														// dependencyCount
1440 			DE_NULL,												// pDependencies
1441 		};
1442 
1443 		renderPass = createRenderPass(vk, device, &renderPassParams);
1444 
1445 		const vk::VkFramebufferCreateInfo	framebufferParams	=
1446 		{
1447 			vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,	// sType
1448 			DE_NULL,										// pNext
1449 			(vk::VkFramebufferCreateFlags)0,
1450 			*renderPass,									// renderPass
1451 			0u,												// attachmentCount
1452 			DE_NULL,										// pAttachments
1453 			DIM*NUM_WORKGROUP_EACH_DIM,						// width
1454 			DIM*NUM_WORKGROUP_EACH_DIM,						// height
1455 			1u,												// layers
1456 		};
1457 
1458 		framebuffer = createFramebuffer(vk, device, &framebufferParams);
1459 
1460 		const VkPipelineVertexInputStateCreateInfo		vertexInputStateCreateInfo		=
1461 		{
1462 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType							sType;
1463 			DE_NULL,													// const void*								pNext;
1464 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags	flags;
1465 			0u,															// deUint32									vertexBindingDescriptionCount;
1466 			DE_NULL,													// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
1467 			0u,															// deUint32									vertexAttributeDescriptionCount;
1468 			DE_NULL														// const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
1469 		};
1470 
1471 		const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyStateCreateInfo	=
1472 		{
1473 			VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	// VkStructureType							sType;
1474 			DE_NULL,														// const void*								pNext;
1475 			(VkPipelineInputAssemblyStateCreateFlags)0,						// VkPipelineInputAssemblyStateCreateFlags	flags;
1476 			(m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology						topology;
1477 			VK_FALSE														// VkBool32									primitiveRestartEnable;
1478 		};
1479 
1480 		const VkPipelineRasterizationStateCreateInfo	rasterizationStateCreateInfo	=
1481 		{
1482 			VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		// VkStructureType							sType;
1483 			DE_NULL,														// const void*								pNext;
1484 			(VkPipelineRasterizationStateCreateFlags)0,						// VkPipelineRasterizationStateCreateFlags	flags;
1485 			VK_FALSE,														// VkBool32									depthClampEnable;
1486 			(m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE,			// VkBool32									rasterizerDiscardEnable;
1487 			VK_POLYGON_MODE_FILL,											// VkPolygonMode							polygonMode;
1488 			VK_CULL_MODE_NONE,												// VkCullModeFlags							cullMode;
1489 			VK_FRONT_FACE_CLOCKWISE,										// VkFrontFace								frontFace;
1490 			VK_FALSE,														// VkBool32									depthBiasEnable;
1491 			0.0f,															// float									depthBiasConstantFactor;
1492 			0.0f,															// float									depthBiasClamp;
1493 			0.0f,															// float									depthBiasSlopeFactor;
1494 			1.0f															// float									lineWidth;
1495 		};
1496 
1497 		const VkPipelineMultisampleStateCreateInfo		multisampleStateCreateInfo =
1498 		{
1499 			VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType						  sType
1500 			DE_NULL,													// const void*							  pNext
1501 			0u,															// VkPipelineMultisampleStateCreateFlags	flags
1502 			VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits					rasterizationSamples
1503 			VK_FALSE,													// VkBool32								 sampleShadingEnable
1504 			1.0f,														// float									minSampleShading
1505 			DE_NULL,													// const VkSampleMask*					  pSampleMask
1506 			VK_FALSE,													// VkBool32								 alphaToCoverageEnable
1507 			VK_FALSE													// VkBool32								 alphaToOneEnable
1508 		};
1509 
1510 		VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1511 		VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1512 
1513 		const VkPipelineViewportStateCreateInfo			viewportStateCreateInfo				=
1514 		{
1515 			VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	// VkStructureType							 sType
1516 			DE_NULL,												// const void*								 pNext
1517 			(VkPipelineViewportStateCreateFlags)0,					// VkPipelineViewportStateCreateFlags		  flags
1518 			1u,														// deUint32									viewportCount
1519 			&viewport,												// const VkViewport*						   pViewports
1520 			1u,														// deUint32									scissorCount
1521 			&scissor												// const VkRect2D*							 pScissors
1522 		};
1523 
1524 		Move<VkShaderModule> fs;
1525 		Move<VkShaderModule> vs;
1526 
1527 		deUint32 numStages;
1528 		if (m_data.stage == STAGE_VERTEX)
1529 		{
1530 			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1531 			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1532 			numStages = 1u;
1533 		}
1534 		else
1535 		{
1536 			vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1537 			fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1538 			numStages = 2u;
1539 		}
1540 
1541 		const VkPipelineShaderStageCreateInfo	shaderCreateInfo[2] = {
1542 			{
1543 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1544 				DE_NULL,
1545 				(VkPipelineShaderStageCreateFlags)0,
1546 				VK_SHADER_STAGE_VERTEX_BIT,									// stage
1547 				*vs,														// shader
1548 				"main",
1549 				&specInfo,													// pSpecializationInfo
1550 			},
1551 			{
1552 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1553 				DE_NULL,
1554 				(VkPipelineShaderStageCreateFlags)0,
1555 				VK_SHADER_STAGE_FRAGMENT_BIT,								// stage
1556 				*fs,														// shader
1557 				"main",
1558 				&specInfo,													// pSpecializationInfo
1559 			}
1560 		};
1561 
1562 		const VkGraphicsPipelineCreateInfo				graphicsPipelineCreateInfo		=
1563 		{
1564 			VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,	// VkStructureType									sType;
1565 			DE_NULL,											// const void*										pNext;
1566 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
1567 			numStages,											// deUint32											stageCount;
1568 			&shaderCreateInfo[0],								// const VkPipelineShaderStageCreateInfo*			pStages;
1569 			&vertexInputStateCreateInfo,						// const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
1570 			&inputAssemblyStateCreateInfo,						// const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
1571 			DE_NULL,											// const VkPipelineTessellationStateCreateInfo*		pTessellationState;
1572 			&viewportStateCreateInfo,							// const VkPipelineViewportStateCreateInfo*			pViewportState;
1573 			&rasterizationStateCreateInfo,						// const VkPipelineRasterizationStateCreateInfo*	pRasterizationState;
1574 			&multisampleStateCreateInfo,						// const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
1575 			DE_NULL,											// const VkPipelineDepthStencilStateCreateInfo*		pDepthStencilState;
1576 			DE_NULL,											// const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
1577 			DE_NULL,											// const VkPipelineDynamicStateCreateInfo*			pDynamicState;
1578 			pipelineLayout.get(),								// VkPipelineLayout									layout;
1579 			renderPass.get(),									// VkRenderPass										renderPass;
1580 			0u,													// deUint32											subpass;
1581 			DE_NULL,											// VkPipeline										basePipelineHandle;
1582 			0													// int												basePipelineIndex;
1583 		};
1584 
1585 		pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1586 	}
1587 
1588 	const VkQueue					queue					= m_context.getUniversalQueue();
1589 	Move<VkCommandPool>				cmdPool					= createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_context.getUniversalQueueFamilyIndex());
1590 	Move<VkCommandBuffer>			cmdBuffer				= allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1591 
1592 	VkBufferDeviceAddressInfo addrInfo =
1593 		{
1594 			VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO,	// VkStructureType	sType;
1595 			DE_NULL,										// const void*		 pNext;
1596 			0,												// VkBuffer			buffer
1597 		};
1598 
1599 	VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1600 	VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1601 
1602 	VkMemoryBarrier					memBarrier =
1603 		{
1604 			VK_STRUCTURE_TYPE_MEMORY_BARRIER,	// sType
1605 			DE_NULL,							// pNext
1606 			0u,									// srcAccessMask
1607 			0u,									// dstAccessMask
1608 		};
1609 
1610 	const VkBufferCopy	copyParams =
1611 		{
1612 			(VkDeviceSize)0u,						// srcOffset
1613 			(VkDeviceSize)0u,						// dstOffset
1614 			bufferSizes[2]							// size
1615 		};
1616 
1617 	deUint32 NUM_SUBMITS = 4;
1618 
1619 	for (deUint32 x = 0; x < NUM_SUBMITS; ++x)
1620 	{
1621 		beginCommandBuffer(vk, *cmdBuffer, 0u);
1622 
1623 		if (x == 0)
1624 			vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1625 
1626 		for (deUint32 i = 0; i < 2; ++i)
1627 		{
1628 			if (!images[i])
1629 				continue;
1630 
1631 			const VkImageMemoryBarrier imageBarrier =
1632 			{
1633 				VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,				// VkStructureType		sType
1634 				DE_NULL,											// const void*			pNext
1635 				0u,													// VkAccessFlags		srcAccessMask
1636 				VK_ACCESS_TRANSFER_WRITE_BIT,						// VkAccessFlags		dstAccessMask
1637 				VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout		oldLayout
1638 				VK_IMAGE_LAYOUT_GENERAL,							// VkImageLayout		newLayout
1639 				VK_QUEUE_FAMILY_IGNORED,							// uint32_t				srcQueueFamilyIndex
1640 				VK_QUEUE_FAMILY_IGNORED,							// uint32_t				dstQueueFamilyIndex
1641 				**images[i],										// VkImage				image
1642 				{
1643 					VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspectFlags	aspectMask
1644 					0u,										// uint32_t				baseMipLevel
1645 					1u,										// uint32_t				mipLevels,
1646 					0u,										// uint32_t				baseArray
1647 					1u,										// uint32_t				arraySize
1648 				}
1649 			};
1650 
1651 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1652 								 (VkDependencyFlags)0,
1653 								  0, (const VkMemoryBarrier*)DE_NULL,
1654 								  0, (const VkBufferMemoryBarrier*)DE_NULL,
1655 								  1, &imageBarrier);
1656 		}
1657 
1658 		vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1659 		vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1660 
1661 		if (m_data.payloadSC == SC_PHYSBUFFER)
1662 		{
1663 			addrInfo.buffer = **buffers[0];
1664 			VkDeviceAddress addr = vk.getBufferDeviceAddress(device, &addrInfo);
1665 			vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
1666 								0, sizeof(VkDeviceSize), &addr);
1667 		}
1668 		if (m_data.guardSC == SC_PHYSBUFFER)
1669 		{
1670 			addrInfo.buffer = **buffers[1];
1671 			VkDeviceAddress addr = vk.getBufferDeviceAddress(device, &addrInfo);
1672 			vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
1673 								8, sizeof(VkDeviceSize), &addr);
1674 		}
1675 
1676 		for (deUint32 iters = 0; iters < 50; ++iters)
1677 		{
1678 			for (deUint32 i = 0; i < 2; ++i)
1679 			{
1680 				if (buffers[i])
1681 					vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1682 				if (images[i])
1683 					vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1684 			}
1685 
1686 			memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1687 			memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1688 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1689 				0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1690 
1691 			if (m_data.stage == STAGE_COMPUTE)
1692 			{
1693 				vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1694 			}
1695 			else
1696 			{
1697 				beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1698 								makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1699 								0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1700 				// Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1701 				if (m_data.stage == STAGE_VERTEX)
1702 				{
1703 					vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1704 				}
1705 				else
1706 				{
1707 					vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1708 				}
1709 				endRenderPass(vk, *cmdBuffer);
1710 			}
1711 
1712 			memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1713 			memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1714 			vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1715 				0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1716 		}
1717 
1718 		if (x == NUM_SUBMITS - 1)
1719 		{
1720 			vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, &copyParams);
1721 			memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1722 			memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
1723 			vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1724 				0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1725 		}
1726 
1727 		endCommandBuffer(vk, *cmdBuffer);
1728 
1729 		submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1730 
1731 		m_context.resetCommandPoolForVKSC(device, *cmdPool);
1732 	}
1733 
1734 	tcu::TestLog& log = m_context.getTestContext().getLog();
1735 
1736 	deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1737 	invalidateAlloc(vk, device, copyBuffer->getAllocation());
1738 	qpTestResult res = QP_TEST_RESULT_PASS;
1739 
1740 	deUint32 numErrors = 0;
1741 	for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1742 	{
1743 		if (ptr[i] != 0)
1744 		{
1745 			if (numErrors < 256)
1746 			{
1747 				log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1748 			}
1749 			numErrors++;
1750 			res = QP_TEST_RESULT_FAIL;
1751 		}
1752 	}
1753 
1754 	if (numErrors)
1755 	{
1756 		log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1757 	}
1758 
1759 	return tcu::TestStatus(res, qpGetTestResultName(res));
1760 }
1761 
1762 #ifndef CTS_USES_VULKANSC
checkPermutedIndexTestSupport(Context& context, std::string testName)1763 void checkPermutedIndexTestSupport (Context& context, std::string testName)
1764 {
1765 	DE_UNREF(testName);
1766 
1767 	const auto		maxComputeWorkGroupCount		= context.getDeviceProperties().limits.maxComputeWorkGroupCount;
1768 	const auto		maxComputeWorkGroupSize			= context.getDeviceProperties().limits.maxComputeWorkGroupSize;
1769 	const auto		maxComputeWorkGroupInvocations	= context.getDeviceProperties().limits.maxComputeWorkGroupInvocations;
1770 
1771 	if (maxComputeWorkGroupCount[0] < 256u)
1772 		TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupCount.x");
1773 
1774 	if (maxComputeWorkGroupSize[0] < 256u)
1775 		TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupSize.x");
1776 
1777 	if (maxComputeWorkGroupInvocations < 256u)
1778 		TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupInvocations");
1779 }
1780 
createPermutedIndexTests(tcu::TestContext& testCtx)1781 tcu::TestCaseGroup* createPermutedIndexTests (tcu::TestContext& testCtx)
1782 {
1783 	de::MovePtr<tcu::TestCaseGroup> permutedIndex (new tcu::TestCaseGroup(testCtx, "permuted_index"));
1784 	static const char			dataDir[]	= "memory_model/message_passing/permuted_index";
1785 	static const std::string	cases[]		=
1786 	{
1787 		"barrier",
1788 		"release_acquire",
1789 		"release_acquire_atomic_payload"
1790 	};
1791 
1792 	for (const auto& test : cases)
1793 	{
1794 		cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx, test.c_str(), dataDir, (test + ".amber").c_str());
1795 		testCase->setCheckSupportCallback(checkPermutedIndexTestSupport);
1796 
1797 		permutedIndex->addChild(testCase);
1798 	}
1799 
1800 	return permutedIndex.release();
1801 }
1802 #endif // CTS_USES_VULKANSC
1803 
1804 }	// anonymous
1805 
createTests(tcu::TestContext& testCtx, const std::string& name)1806 tcu::TestCaseGroup*	createTests (tcu::TestContext& testCtx, const std::string& name)
1807 {
1808 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1809 			testCtx, name.c_str(), "Memory model tests"));
1810 
1811 	typedef struct
1812 	{
1813 		deUint32				value;
1814 		const char*				name;
1815 	} TestGroupCase;
1816 
1817 	TestGroupCase ttCases[] =
1818 	{
1819 		{ TT_MP,	"message_passing"},
1820 		{ TT_WAR,	"write_after_read"},
1821 	};
1822 
1823 	TestGroupCase core11Cases[] =
1824 	{
1825 		// Supported by Vulkan1.1
1826 		{ 1,	"core11"},
1827 		// Requires VK_KHR_vulkan_memory_model extension
1828 		{ 0,	"ext"},
1829 	};
1830 
1831 	TestGroupCase dtCases[] =
1832 	{
1833 		// uint32_t atomics
1834 		{ DATA_TYPE_UINT,		"u32"},
1835 		// uint64_t atomics
1836 		{ DATA_TYPE_UINT64,		"u64"},
1837 		// float32 atomics
1838 		{ DATA_TYPE_FLOAT32,	"f32"},
1839 		// float64 atomics
1840 		{ DATA_TYPE_FLOAT64,	"f64"},
1841 	};
1842 
1843 	TestGroupCase cohCases[] =
1844 	{
1845 		// coherent payload variable
1846 		{ 1,	"coherent"},
1847 		// noncoherent payload variable
1848 		{ 0,	"noncoherent"},
1849 	};
1850 
1851 	TestGroupCase stCases[] =
1852 	{
1853 		// release fence, acquire fence
1854 		{ ST_FENCE_FENCE,					"fence_fence"},
1855 		// release fence, atomic acquire
1856 		{ ST_FENCE_ATOMIC,					"fence_atomic"},
1857 		// atomic release, acquire fence
1858 		{ ST_ATOMIC_FENCE,					"atomic_fence"},
1859 		// atomic release, atomic acquire
1860 		{ ST_ATOMIC_ATOMIC,					"atomic_atomic"},
1861 		// control barrier
1862 		{ ST_CONTROL_BARRIER,				"control_barrier"},
1863 		// control barrier with release/acquire
1864 		{ ST_CONTROL_AND_MEMORY_BARRIER,	"control_and_memory_barrier"},
1865 	};
1866 
1867 	TestGroupCase rmwCases[] =
1868 	{
1869 		{ 0,	"atomicwrite"},
1870 		{ 1,	"atomicrmw"},
1871 	};
1872 
1873 	TestGroupCase scopeCases[] =
1874 	{
1875 		{ SCOPE_DEVICE,			"device"},
1876 		{ SCOPE_QUEUEFAMILY,	"queuefamily"},
1877 		{ SCOPE_WORKGROUP,		"workgroup"},
1878 		{ SCOPE_SUBGROUP,		"subgroup"},
1879 	};
1880 
1881 	TestGroupCase plCases[] =
1882 	{
1883 		// payload variable in non-local memory
1884 		{ 0,	"payload_nonlocal"},
1885 		// payload variable in local memory
1886 		{ 1,	"payload_local"},
1887 	};
1888 
1889 	TestGroupCase pscCases[] =
1890 	{
1891 		// payload variable in buffer memory
1892 		{ SC_BUFFER,	"buffer"},
1893 		// payload variable in image memory
1894 		{ SC_IMAGE,		"image"},
1895 		// payload variable in workgroup memory
1896 		{ SC_WORKGROUP,	"workgroup"},
1897 		// payload variable in physical storage buffer memory
1898 		{ SC_PHYSBUFFER,"physbuffer"},
1899 	};
1900 
1901 	TestGroupCase glCases[] =
1902 	{
1903 		// guard variable in non-local memory
1904 		{ 0,	"guard_nonlocal"},
1905 		// guard variable in local memory
1906 		{ 1,	"guard_local"},
1907 	};
1908 
1909 	TestGroupCase gscCases[] =
1910 	{
1911 		// guard variable in buffer memory
1912 		{ SC_BUFFER,	"buffer"},
1913 		// guard variable in image memory
1914 		{ SC_IMAGE,		"image"},
1915 		// guard variable in workgroup memory
1916 		{ SC_WORKGROUP,	"workgroup"},
1917 		// guard variable in physical storage buffer memory
1918 		{ SC_PHYSBUFFER,"physbuffer"},
1919 	};
1920 
1921 	TestGroupCase stageCases[] =
1922 	{
1923 		{ STAGE_COMPUTE,	"comp"},
1924 		{ STAGE_VERTEX,		"vert"},
1925 		{ STAGE_FRAGMENT,	"frag"},
1926 	};
1927 
1928 	for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1929 	{
1930 		de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name));
1931 
1932 #ifndef CTS_USES_VULKANSC
1933 		// Permuted index tests for message passing.
1934 		if (ttCases[ttNdx].value == TT_MP)
1935 			ttGroup->addChild(createPermutedIndexTests(testCtx));
1936 #endif // CTS_USES_VULKANSC
1937 
1938 		for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1939 		{
1940 			de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name));
1941 			for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1942 			{
1943 				de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name));
1944 				for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1945 				{
1946 					de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name));
1947 					for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1948 					{
1949 						de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name));
1950 						for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1951 						{
1952 							de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name));
1953 							for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1954 							{
1955 								de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name));
1956 								for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1957 								{
1958 									de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name));
1959 									for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1960 									{
1961 										de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name));
1962 										for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1963 										{
1964 											de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name));
1965 											for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1966 											{
1967 												de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name));
1968 												for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1969 												{
1970 													CaseDef c =
1971 													{
1972 														!!plCases[plNdx].value,					// bool payloadMemLocal;
1973 														!!glCases[glNdx].value,					// bool guardMemLocal;
1974 														!!cohCases[cohNdx].value,				// bool coherent;
1975 														!!core11Cases[core11Ndx].value,			// bool core11;
1976 														!!rmwCases[rmwNdx].value,				// bool atomicRMW;
1977 														(TestType)ttCases[ttNdx].value,			// TestType testType;
1978 														(StorageClass)pscCases[pscNdx].value,	// StorageClass payloadSC;
1979 														(StorageClass)gscCases[gscNdx].value,	// StorageClass guardSC;
1980 														(Scope)scopeCases[scopeNdx].value,		// Scope scope;
1981 														(SyncType)stCases[stNdx].value,			// SyncType syncType;
1982 														(Stage)stageCases[stageNdx].value,		// Stage stage;
1983 														(DataType)dtCases[dtNdx].value,			// DataType dataType;
1984 														false,									// bool transitive;
1985 														false,									// bool transitiveVis;
1986 													};
1987 
1988 													// Mustpass11 tests should only exercise things we expect to work on
1989 													// existing implementations. Exclude noncoherent tests which require
1990 													// new extensions, and assume atomic synchronization wouldn't work
1991 													// (i.e. atomics may be implemented as relaxed atomics). Exclude
1992 													// queuefamily scope which doesn't exist in Vulkan 1.1. Exclude
1993 													// physical storage buffer which doesn't support the legacy decorations.
1994 													if (c.core11 &&
1995 														(c.coherent == 0 ||
1996 														c.syncType == ST_FENCE_ATOMIC ||
1997 														c.syncType == ST_ATOMIC_FENCE ||
1998 														c.syncType == ST_ATOMIC_ATOMIC ||
1999 														c.dataType == DATA_TYPE_UINT64 ||
2000 														c.dataType == DATA_TYPE_FLOAT64 ||
2001 														c.scope == SCOPE_QUEUEFAMILY ||
2002 														c.payloadSC == SC_PHYSBUFFER ||
2003 														c.guardSC == SC_PHYSBUFFER))
2004 													{
2005 														continue;
2006 													}
2007 
2008 													if (c.stage != STAGE_COMPUTE &&
2009 														c.scope == SCOPE_WORKGROUP)
2010 													{
2011 														continue;
2012 													}
2013 
2014 													// Don't exercise local and non-local for workgroup memory
2015 													// Also don't exercise workgroup memory for non-compute stages
2016 													if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
2017 													{
2018 														continue;
2019 													}
2020 													if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
2021 													{
2022 														continue;
2023 													}
2024 													// Can't do control barrier with larger than workgroup scope, or non-compute stages
2025 													if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
2026 														(c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
2027 													{
2028 														continue;
2029 													}
2030 
2031 													// Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
2032 													if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
2033 													{
2034 														continue;
2035 													}
2036 
2037 													// uint64/float32/float64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
2038 													const bool atomicTesting = (c.dataType == DATA_TYPE_UINT64 || c.dataType == DATA_TYPE_FLOAT32 || c.dataType == DATA_TYPE_FLOAT64);
2039 													if (atomicTesting && c.syncType != ST_ATOMIC_ATOMIC)
2040 													{
2041 														continue;
2042 													}
2043 
2044 													// No 64-bit image types, so skip tests with both payload and guard in image memory
2045 													if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
2046 													{
2047 														continue;
2048 													}
2049 
2050 													// No support for atomic operations on 64-bit floating point images
2051 													if (c.dataType == DATA_TYPE_FLOAT64 && (c.payloadSC == SC_IMAGE || c.guardSC == SC_IMAGE))
2052 													{
2053 														continue;
2054 													}
2055 													// Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
2056 													if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
2057 														(c.guardSC != 0 || c.guardMemLocal != 0))
2058 													{
2059 														continue;
2060 													}
2061 
2062 													gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, c));
2063 												}
2064 												glGroup->addChild(gscGroup.release());
2065 											}
2066 											pscGroup->addChild(glGroup.release());
2067 										}
2068 										plGroup->addChild(pscGroup.release());
2069 									}
2070 									scopeGroup->addChild(plGroup.release());
2071 								}
2072 								rmwGroup->addChild(scopeGroup.release());
2073 							}
2074 							stGroup->addChild(rmwGroup.release());
2075 						}
2076 						cohGroup->addChild(stGroup.release());
2077 					}
2078 					dtGroup->addChild(cohGroup.release());
2079 				}
2080 				core11Group->addChild(dtGroup.release());
2081 			}
2082 			ttGroup->addChild(core11Group.release());
2083 		}
2084 		group->addChild(ttGroup.release());
2085 	}
2086 
2087 	TestGroupCase transVisCases[] =
2088 	{
2089 		// destination invocation acquires
2090 		{ 0,	"nontransvis"},
2091 		// invocation 0,0 acquires
2092 		{ 1,	"transvis"},
2093 	};
2094 
2095 	de::MovePtr<tcu::TestCaseGroup> transGroup(new tcu::TestCaseGroup(testCtx, "transitive"));
2096 	for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
2097 	{
2098 		de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name));
2099 		for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
2100 		{
2101 			de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name));
2102 			for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
2103 			{
2104 				de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name));
2105 				for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
2106 				{
2107 					de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name));
2108 					for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
2109 					{
2110 						de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name));
2111 						for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
2112 						{
2113 							de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name));
2114 							for (int visNdx = 0; visNdx < DE_LENGTH_OF_ARRAY(transVisCases); visNdx++)
2115 							{
2116 								CaseDef c =
2117 								{
2118 									!!plCases[plNdx].value,					// bool payloadMemLocal;
2119 									!!glCases[glNdx].value,					// bool guardMemLocal;
2120 									!!cohCases[cohNdx].value,				// bool coherent;
2121 									false,									// bool core11;
2122 									false,									// bool atomicRMW;
2123 									TT_MP,									// TestType testType;
2124 									(StorageClass)pscCases[pscNdx].value,	// StorageClass payloadSC;
2125 									(StorageClass)gscCases[gscNdx].value,	// StorageClass guardSC;
2126 									SCOPE_DEVICE,							// Scope scope;
2127 									(SyncType)stCases[stNdx].value,			// SyncType syncType;
2128 									STAGE_COMPUTE,							// Stage stage;
2129 									DATA_TYPE_UINT,							// DataType dataType;
2130 									true,									// bool transitive;
2131 									!!transVisCases[visNdx].value,			// bool transitiveVis;
2132 								};
2133 								if (c.payloadSC == SC_WORKGROUP || c.guardSC == SC_WORKGROUP)
2134 								{
2135 									continue;
2136 								}
2137 								if (c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
2138 								{
2139 									continue;
2140 								}
2141 								gscGroup->addChild(new MemoryModelTestCase(testCtx, transVisCases[visNdx].name, c));
2142 							}
2143 							glGroup->addChild(gscGroup.release());
2144 						}
2145 						pscGroup->addChild(glGroup.release());
2146 					}
2147 					plGroup->addChild(pscGroup.release());
2148 				}
2149 				stGroup->addChild(plGroup.release());
2150 			}
2151 			cohGroup->addChild(stGroup.release());
2152 		}
2153 		transGroup->addChild(cohGroup.release());
2154 	}
2155 	group->addChild(transGroup.release());
2156 
2157 	// Padding tests.
2158 	group->addChild(createPaddingTests(testCtx));
2159 	// Shared memory layout tests.
2160 	group->addChild(createSharedMemoryLayoutTests(testCtx));
2161 
2162 	return group.release();
2163 }
2164 
2165 }	// MemoryModel
2166 }	// vkt
2167