1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2018-2019 NVIDIA Corporation
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Vulkan Memory Model tests
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktMemoryModelTests.hpp"
26 #include "vktMemoryModelPadding.hpp"
27 #include "vktMemoryModelSharedLayout.hpp"
28 #include "vktAmberTestCase.hpp"
29
30 #include "vkBufferWithMemory.hpp"
31 #include "vkImageWithMemory.hpp"
32 #include "vkQueryUtil.hpp"
33 #include "vkBuilderUtil.hpp"
34 #include "vkCmdUtil.hpp"
35 #include "vkTypeUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 #include "vktTestCase.hpp"
39
40 #include "deDefs.h"
41 #include "deMath.h"
42 #include "deSharedPtr.hpp"
43 #include "deString.h"
44
45 #include "tcuTestCase.hpp"
46 #include "tcuTestLog.hpp"
47
48 #include <string>
49 #include <sstream>
50
51 namespace vkt
52 {
53 namespace MemoryModel
54 {
55 namespace
56 {
57 using namespace vk;
58 using namespace std;
59
60 typedef enum
61 {
62 TT_MP = 0, // message passing
63 TT_WAR, // write-after-read hazard
64 } TestType;
65
66 typedef enum
67 {
68 ST_FENCE_FENCE = 0,
69 ST_FENCE_ATOMIC,
70 ST_ATOMIC_FENCE,
71 ST_ATOMIC_ATOMIC,
72 ST_CONTROL_BARRIER,
73 ST_CONTROL_AND_MEMORY_BARRIER,
74 } SyncType;
75
76 typedef enum
77 {
78 SC_BUFFER = 0,
79 SC_IMAGE,
80 SC_WORKGROUP,
81 SC_PHYSBUFFER,
82 } StorageClass;
83
84 typedef enum
85 {
86 SCOPE_DEVICE = 0,
87 SCOPE_QUEUEFAMILY,
88 SCOPE_WORKGROUP,
89 SCOPE_SUBGROUP,
90 } Scope;
91
92 typedef enum
93 {
94 STAGE_COMPUTE = 0,
95 STAGE_VERTEX,
96 STAGE_FRAGMENT,
97 } Stage;
98
99 typedef enum
100 {
101 DATA_TYPE_UINT = 0,
102 DATA_TYPE_UINT64,
103 DATA_TYPE_FLOAT32,
104 DATA_TYPE_FLOAT64,
105 } DataType;
106
107 const VkFlags allShaderStages = VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
108 const VkFlags allPipelineStages = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
109
110 struct CaseDef
111 {
112 bool payloadMemLocal;
113 bool guardMemLocal;
114 bool coherent;
115 bool core11;
116 bool atomicRMW;
117 TestType testType;
118 StorageClass payloadSC;
119 StorageClass guardSC;
120 Scope scope;
121 SyncType syncType;
122 Stage stage;
123 DataType dataType;
124 bool transitive;
125 bool transitiveVis;
126 };
127
128 class MemoryModelTestInstance : public TestInstance
129 {
130 public:
131 MemoryModelTestInstance (Context& context, const CaseDef& data);
132 ~MemoryModelTestInstance (void);
133 tcu::TestStatus iterate (void);
134 private:
135 CaseDef m_data;
136
137 enum
138 {
139 WIDTH = 256,
140 HEIGHT = 256
141 };
142 };
143
MemoryModelTestInstance(Context& context, const CaseDef& data)144 MemoryModelTestInstance::MemoryModelTestInstance (Context& context, const CaseDef& data)
145 : vkt::TestInstance (context)
146 , m_data (data)
147 {
148 }
149
~MemoryModelTestInstance(void)150 MemoryModelTestInstance::~MemoryModelTestInstance (void)
151 {
152 }
153
154 class MemoryModelTestCase : public TestCase
155 {
156 public:
157 MemoryModelTestCase (tcu::TestContext& context, const char* name, const CaseDef data);
158 ~MemoryModelTestCase (void);
159 virtual void initPrograms (SourceCollections& programCollection) const;
160 virtual void initProgramsTransitive(SourceCollections& programCollection) const;
161 virtual TestInstance* createInstance (Context& context) const;
162 virtual void checkSupport (Context& context) const;
163
164 private:
165 CaseDef m_data;
166 };
167
MemoryModelTestCase(tcu::TestContext& context, const char* name, const CaseDef data)168 MemoryModelTestCase::MemoryModelTestCase (tcu::TestContext& context, const char* name, const CaseDef data)
169 : vkt::TestCase (context, name)
170 , m_data (data)
171 {
172 }
173
~MemoryModelTestCase(void)174 MemoryModelTestCase::~MemoryModelTestCase (void)
175 {
176 }
177
checkSupport(Context& context) const178 void MemoryModelTestCase::checkSupport(Context& context) const
179 {
180 if (!context.contextSupports(vk::ApiVersion(0, 1, 1, 0)))
181 {
182 TCU_THROW(NotSupportedError, "Vulkan 1.1 not supported");
183 }
184
185 if (!m_data.core11)
186 {
187 if (!context.getVulkanMemoryModelFeatures().vulkanMemoryModel)
188 {
189 TCU_THROW(NotSupportedError, "vulkanMemoryModel not supported");
190 }
191
192 if (m_data.scope == SCOPE_DEVICE && !context.getVulkanMemoryModelFeatures().vulkanMemoryModelDeviceScope)
193 {
194 TCU_THROW(NotSupportedError, "vulkanMemoryModelDeviceScope not supported");
195 }
196 }
197
198 if (m_data.scope == SCOPE_SUBGROUP)
199 {
200 // Check for subgroup support for scope_subgroup tests.
201 VkPhysicalDeviceSubgroupProperties subgroupProperties;
202 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
203 subgroupProperties.pNext = DE_NULL;
204 subgroupProperties.supportedOperations = 0;
205
206 VkPhysicalDeviceProperties2 properties;
207 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
208 properties.pNext = &subgroupProperties;
209
210 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
211
212 if (!(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BASIC_BIT) ||
213 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) ||
214 !(subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT))
215 {
216 TCU_THROW(NotSupportedError, "Subgroup features not supported");
217 }
218
219 VkShaderStageFlags stage= VK_SHADER_STAGE_COMPUTE_BIT;
220 if (m_data.stage == STAGE_VERTEX)
221 {
222 stage = VK_SHADER_STAGE_VERTEX_BIT;
223 }
224 else if (m_data.stage == STAGE_COMPUTE)
225 {
226 stage = VK_SHADER_STAGE_COMPUTE_BIT;
227 }
228 else if (m_data.stage == STAGE_FRAGMENT)
229 {
230 stage = VK_SHADER_STAGE_FRAGMENT_BIT;
231 }
232
233 if((subgroupProperties.supportedStages & stage)==0)
234 {
235 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
236 }
237 }
238 if (m_data.dataType == DATA_TYPE_UINT64)
239 {
240 if (!context.getDeviceFeatures().shaderInt64)
241 {
242 TCU_THROW(NotSupportedError, "64-bit integer in shaders not supported");
243 }
244 if (!context.getShaderAtomicInt64Features().shaderBufferInt64Atomics &&
245 (m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER))
246 {
247 TCU_THROW(NotSupportedError, "64-bit integer buffer atomics not supported");
248 }
249 if (!context.getShaderAtomicInt64Features().shaderSharedInt64Atomics &&
250 m_data.guardSC == SC_WORKGROUP)
251 {
252 TCU_THROW(NotSupportedError, "64-bit integer shared atomics not supported");
253 }
254 }
255
256 if (m_data.dataType == DATA_TYPE_FLOAT32)
257 {
258 if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
259 TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
260
261 if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
262 (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat32Atomics))
263 {
264 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point buffer atomic operations not supported");
265 }
266
267 if (m_data.guardSC == SC_IMAGE && (!context.getShaderAtomicFloatFeaturesEXT().shaderImageFloat32Atomics))
268 {
269 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point image atomic operations not supported");
270 }
271
272 if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat32Atomics))
273 {
274 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat32: 32-bit floating point shared atomic operations not supported");
275 }
276 }
277
278 if (m_data.dataType == DATA_TYPE_FLOAT64)
279 {
280 if (!context.isDeviceFunctionalitySupported("VK_EXT_shader_atomic_float"))
281 TCU_THROW(NotSupportedError, "Missing extension: VK_EXT_shader_atomic_float");
282
283 if ((m_data.guardSC == SC_BUFFER || m_data.guardSC == SC_PHYSBUFFER) &&
284 (!context.getShaderAtomicFloatFeaturesEXT().shaderBufferFloat64Atomics))
285 {
286 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point buffer atomic operations not supported");
287 }
288
289 if (m_data.guardSC == SC_IMAGE || m_data.payloadSC == SC_IMAGE)
290 {
291 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point image atomic operations not supported");
292 }
293
294 if (m_data.guardSC == SC_WORKGROUP && (!context.getShaderAtomicFloatFeaturesEXT().shaderSharedFloat64Atomics))
295 {
296 TCU_THROW(NotSupportedError, "VkShaderAtomicFloat64: 64-bit floating point shared atomic operations not supported");
297 }
298 }
299
300 if (m_data.transitive &&
301 !context.getVulkanMemoryModelFeatures().vulkanMemoryModelAvailabilityVisibilityChains)
302 TCU_THROW(NotSupportedError, "vulkanMemoryModelAvailabilityVisibilityChains not supported");
303
304 if ((m_data.payloadSC == SC_PHYSBUFFER || m_data.guardSC == SC_PHYSBUFFER) && !context.isBufferDeviceAddressSupported())
305 TCU_THROW(NotSupportedError, "Physical storage buffer pointers not supported");
306
307 if (m_data.stage == STAGE_VERTEX)
308 {
309 if (!context.getDeviceFeatures().vertexPipelineStoresAndAtomics)
310 {
311 TCU_THROW(NotSupportedError, "vertexPipelineStoresAndAtomics not supported");
312 }
313 }
314 if (m_data.stage == STAGE_FRAGMENT)
315 {
316 if (!context.getDeviceFeatures().fragmentStoresAndAtomics)
317 {
318 TCU_THROW(NotSupportedError, "fragmentStoresAndAtomics not supported");
319 }
320 }
321 }
322
323
initPrograms(SourceCollections& programCollection) const324 void MemoryModelTestCase::initPrograms (SourceCollections& programCollection) const
325 {
326 if (m_data.transitive)
327 {
328 initProgramsTransitive(programCollection);
329 return;
330 }
331 DE_ASSERT(!m_data.transitiveVis);
332
333 Scope invocationMapping = m_data.scope;
334 if ((m_data.scope == SCOPE_DEVICE || m_data.scope == SCOPE_QUEUEFAMILY) &&
335 (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP))
336 {
337 invocationMapping = SCOPE_WORKGROUP;
338 }
339
340 const char *scopeStr;
341 switch (m_data.scope)
342 {
343 default: DE_ASSERT(0); // fall through
344 case SCOPE_DEVICE: scopeStr = "gl_ScopeDevice"; break;
345 case SCOPE_QUEUEFAMILY: scopeStr = "gl_ScopeQueueFamily"; break;
346 case SCOPE_WORKGROUP: scopeStr = "gl_ScopeWorkgroup"; break;
347 case SCOPE_SUBGROUP: scopeStr = "gl_ScopeSubgroup"; break;
348 }
349
350 const char *typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
351 (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint";
352 const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
353
354 // Construct storageSemantics strings. Both release and acquire
355 // always have the payload storage class. They only include the
356 // guard storage class if they're using FENCE for that side of the
357 // sync.
358 std::stringstream storageSemanticsRelease;
359 switch (m_data.payloadSC)
360 {
361 default: DE_ASSERT(0); // fall through
362 case SC_PHYSBUFFER: // fall through
363 case SC_BUFFER: storageSemanticsRelease << "gl_StorageSemanticsBuffer"; break;
364 case SC_IMAGE: storageSemanticsRelease << "gl_StorageSemanticsImage"; break;
365 case SC_WORKGROUP: storageSemanticsRelease << "gl_StorageSemanticsShared"; break;
366 }
367 std::stringstream storageSemanticsAcquire;
368 storageSemanticsAcquire << storageSemanticsRelease.str();
369 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
370 {
371 switch (m_data.guardSC)
372 {
373 default: DE_ASSERT(0); // fall through
374 case SC_PHYSBUFFER: // fall through
375 case SC_BUFFER: storageSemanticsRelease << " | gl_StorageSemanticsBuffer"; break;
376 case SC_IMAGE: storageSemanticsRelease << " | gl_StorageSemanticsImage"; break;
377 case SC_WORKGROUP: storageSemanticsRelease << " | gl_StorageSemanticsShared"; break;
378 }
379 }
380 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
381 {
382 switch (m_data.guardSC)
383 {
384 default: DE_ASSERT(0); // fall through
385 case SC_PHYSBUFFER: // fall through
386 case SC_BUFFER: storageSemanticsAcquire << " | gl_StorageSemanticsBuffer"; break;
387 case SC_IMAGE: storageSemanticsAcquire << " | gl_StorageSemanticsImage"; break;
388 case SC_WORKGROUP: storageSemanticsAcquire << " | gl_StorageSemanticsShared"; break;
389 }
390 }
391
392 std::stringstream semanticsRelease, semanticsAcquire, semanticsAcquireRelease;
393
394 semanticsRelease << "gl_SemanticsRelease";
395 semanticsAcquire << "gl_SemanticsAcquire";
396 semanticsAcquireRelease << "gl_SemanticsAcquireRelease";
397 if (!m_data.coherent && m_data.testType != TT_WAR)
398 {
399 DE_ASSERT(!m_data.core11);
400 semanticsRelease << " | gl_SemanticsMakeAvailable";
401 semanticsAcquire << " | gl_SemanticsMakeVisible";
402 semanticsAcquireRelease << " | gl_SemanticsMakeAvailable | gl_SemanticsMakeVisible";
403 }
404
405 std::stringstream css;
406 css << "#version 450 core\n";
407 if (!m_data.core11)
408 {
409 css << "#pragma use_vulkan_memory_model\n";
410 }
411 if (!intType)
412 {
413 css <<
414 "#extension GL_EXT_shader_atomic_float : enable\n"
415 "#extension GL_KHR_memory_scope_semantics : enable\n";
416 }
417 css <<
418 "#extension GL_KHR_shader_subgroup_basic : enable\n"
419 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
420 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
421 "#extension GL_KHR_memory_scope_semantics : enable\n"
422 "#extension GL_ARB_gpu_shader_int64 : enable\n"
423 "#extension GL_EXT_buffer_reference : enable\n"
424 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
425 "layout(constant_id = 0) const int DIM = 1;\n"
426 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
427 "struct S { " << typeStr << " x[DIM*DIM]; };\n";
428
429 if (m_data.stage == STAGE_COMPUTE)
430 {
431 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
432 }
433
434 const char *memqual = "";
435 if (m_data.coherent)
436 {
437 if (m_data.core11)
438 {
439 // Vulkan 1.1 only has "coherent", use it regardless of scope
440 memqual = "coherent";
441 }
442 else
443 {
444 switch (m_data.scope)
445 {
446 default: DE_ASSERT(0); // fall through
447 case SCOPE_DEVICE: memqual = "devicecoherent"; break;
448 case SCOPE_QUEUEFAMILY: memqual = "queuefamilycoherent"; break;
449 case SCOPE_WORKGROUP: memqual = "workgroupcoherent"; break;
450 case SCOPE_SUBGROUP: memqual = "subgroupcoherent"; break;
451 }
452 }
453 }
454 else
455 {
456 DE_ASSERT(!m_data.core11);
457 memqual = "nonprivate";
458 }
459
460 stringstream pushConstMembers;
461
462 // Declare payload, guard, and fail resources
463 switch (m_data.payloadSC)
464 {
465 default: DE_ASSERT(0); // fall through
466 case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
467 pushConstMembers << " layout(offset = 0) PayloadRef payloadref;\n"; break;
468 case SC_BUFFER: css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
469 case SC_IMAGE:
470 if (intType)
471 css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
472 else
473 css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
474 break;
475 case SC_WORKGROUP: css << "shared S payload;\n"; break;
476 }
477 if (m_data.syncType != ST_CONTROL_AND_MEMORY_BARRIER && m_data.syncType != ST_CONTROL_BARRIER)
478 {
479 // The guard variable is only accessed with atomics and need not be declared coherent.
480 switch (m_data.guardSC)
481 {
482 default: DE_ASSERT(0); // fall through
483 case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
484 pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break;
485 case SC_BUFFER: css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
486 case SC_IMAGE:
487 if (intType)
488 css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
489 else
490 css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
491 break;
492 case SC_WORKGROUP: css << "shared S guard;\n"; break;
493 }
494 }
495
496 css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
497
498 if (pushConstMembers.str().size() != 0) {
499 css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
500 }
501
502 css <<
503 "void main()\n"
504 "{\n"
505 " bool pass = true;\n"
506 " bool skip = false;\n";
507
508 if (m_data.payloadSC == SC_PHYSBUFFER)
509 css << " " << memqual << " PayloadRef payload = payloadref;\n";
510
511 if (m_data.stage == STAGE_FRAGMENT)
512 {
513 // Kill helper invocations so they don't load outside the bounds of the SSBO.
514 // Helper pixels are also initially "active" and if a thread gets one as its
515 // partner in SCOPE_SUBGROUP mode, it can't run the test.
516 css << " if (gl_HelperInvocation) { return; }\n";
517 }
518
519 // Compute coordinates based on the storage class and scope.
520 // For workgroup scope, we pair up LocalInvocationID and DIM-1-LocalInvocationID.
521 // For device scope, we pair up GlobalInvocationID and DIM*NUMWORKGROUPS-1-GlobalInvocationID.
522 // For subgroup scope, we pair up LocalInvocationID and LocalInvocationID from subgroupId^(subgroupSize-1)
523 switch (invocationMapping)
524 {
525 default: DE_ASSERT(0); // fall through
526 case SCOPE_SUBGROUP:
527 // If the partner invocation isn't active, the shuffle below will be undefined. Bail.
528 css << " uvec4 ballot = subgroupBallot(true);\n"
529 " if (!subgroupBallotBitExtract(ballot, gl_SubgroupInvocationID^(gl_SubgroupSize-1))) { return; }\n";
530
531 switch (m_data.stage)
532 {
533 default: DE_ASSERT(0); // fall through
534 case STAGE_COMPUTE:
535 css <<
536 " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
537 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
538 " uint sharedCoord = localId.y * DIM + localId.x;\n"
539 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
540 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
541 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
542 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
543 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
544 break;
545 case STAGE_VERTEX:
546 css <<
547 " uint bufferCoord = gl_VertexIndex;\n"
548 " uint partnerBufferCoord = subgroupShuffleXor(gl_VertexIndex, gl_SubgroupSize-1);\n"
549 " ivec2 imageCoord = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
550 " ivec2 partnerImageCoord = subgroupShuffleXor(imageCoord, gl_SubgroupSize-1);\n"
551 " gl_PointSize = 1.0f;\n"
552 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
553 break;
554 case STAGE_FRAGMENT:
555 css <<
556 " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
557 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
558 " ivec2 partnerLocalId = subgroupShuffleXor(localId, gl_SubgroupSize-1);\n"
559 " ivec2 partnerGroupId = subgroupShuffleXor(groupId, gl_SubgroupSize-1);\n"
560 " uint sharedCoord = localId.y * DIM + localId.x;\n"
561 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
562 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
563 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
564 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
565 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
566 break;
567 }
568 break;
569 case SCOPE_WORKGROUP:
570 css <<
571 " ivec2 localId = ivec2(gl_LocalInvocationID.xy);\n"
572 " ivec2 partnerLocalId = ivec2(DIM-1)-ivec2(gl_LocalInvocationID.xy);\n"
573 " uint sharedCoord = localId.y * DIM + localId.x;\n"
574 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
575 " uint bufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + sharedCoord;\n"
576 " uint partnerBufferCoord = (gl_WorkGroupID.y * NUM_WORKGROUP_EACH_DIM + gl_WorkGroupID.x)*DIM*DIM + partnerSharedCoord;\n"
577 " ivec2 imageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + localId);\n"
578 " ivec2 partnerImageCoord = ivec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy + partnerLocalId);\n";
579 break;
580 case SCOPE_QUEUEFAMILY:
581 case SCOPE_DEVICE:
582 switch (m_data.stage)
583 {
584 default: DE_ASSERT(0); // fall through
585 case STAGE_COMPUTE:
586 css <<
587 " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n"
588 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
589 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
590 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
591 " ivec2 imageCoord = globalId;\n"
592 " ivec2 partnerImageCoord = partnerGlobalId;\n";
593 break;
594 case STAGE_VERTEX:
595 css <<
596 " ivec2 globalId = ivec2(gl_VertexIndex % (DIM*NUM_WORKGROUP_EACH_DIM), gl_VertexIndex / (DIM*NUM_WORKGROUP_EACH_DIM));\n"
597 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - globalId;\n"
598 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
599 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
600 " ivec2 imageCoord = globalId;\n"
601 " ivec2 partnerImageCoord = partnerGlobalId;\n"
602 " gl_PointSize = 1.0f;\n"
603 " gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);\n\n";
604 break;
605 case STAGE_FRAGMENT:
606 css <<
607 " ivec2 localId = ivec2(gl_FragCoord.xy) % ivec2(DIM);\n"
608 " ivec2 groupId = ivec2(gl_FragCoord.xy) / ivec2(DIM);\n"
609 " ivec2 partnerLocalId = ivec2(DIM-1)-localId;\n"
610 " ivec2 partnerGroupId = groupId;\n"
611 " uint sharedCoord = localId.y * DIM + localId.x;\n"
612 " uint partnerSharedCoord = partnerLocalId.y * DIM + partnerLocalId.x;\n"
613 " uint bufferCoord = (groupId.y * NUM_WORKGROUP_EACH_DIM + groupId.x)*DIM*DIM + sharedCoord;\n"
614 " uint partnerBufferCoord = (partnerGroupId.y * NUM_WORKGROUP_EACH_DIM + partnerGroupId.x)*DIM*DIM + partnerSharedCoord;\n"
615 " ivec2 imageCoord = ivec2(groupId.xy * ivec2(DIM) + localId);\n"
616 " ivec2 partnerImageCoord = ivec2(partnerGroupId.xy * ivec2(DIM) + partnerLocalId);\n";
617 break;
618 }
619 break;
620 }
621
622 // Initialize shared memory, followed by a barrier
623 if (m_data.payloadSC == SC_WORKGROUP)
624 {
625 css << " payload.x[sharedCoord] = 0;\n";
626 }
627 if (m_data.guardSC == SC_WORKGROUP)
628 {
629 css << " guard.x[sharedCoord] = 0;\n";
630 }
631 if (m_data.payloadSC == SC_WORKGROUP || m_data.guardSC == SC_WORKGROUP)
632 {
633 switch (invocationMapping)
634 {
635 default: DE_ASSERT(0); // fall through
636 case SCOPE_SUBGROUP: css << " subgroupBarrier();\n"; break;
637 case SCOPE_WORKGROUP: css << " barrier();\n"; break;
638 }
639 }
640
641 if (m_data.testType == TT_MP)
642 {
643 if (intType)
644 {
645 // Store payload
646 switch (m_data.payloadSC)
647 {
648 default: DE_ASSERT(0); // fall through
649 case SC_PHYSBUFFER: // fall through
650 case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
651 case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
652 case SC_WORKGROUP: css << " payload.x[sharedCoord] = bufferCoord + (payload.x[partnerSharedCoord]>>31);\n"; break;
653 }
654 }
655 else
656 {
657 // Store payload
658 switch (m_data.payloadSC)
659 {
660 default: DE_ASSERT(0); // fall through
661 case SC_PHYSBUFFER: // fall through
662 case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break;
663 case SC_IMAGE: css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x))>>31)), 0, 0, 0)); \n"; break;
664 case SC_WORKGROUP: css << " payload.x[sharedCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerSharedCoord])))>>31);\n"; break;
665 }
666 }
667 }
668 else
669 {
670 DE_ASSERT(m_data.testType == TT_WAR);
671 // Load payload
672 switch (m_data.payloadSC)
673 {
674 default: DE_ASSERT(0); // fall through
675 case SC_PHYSBUFFER: // fall through
676 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
677 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
678 case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
679 }
680 }
681 if (m_data.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
682 {
683 // Acquire and release separate from control barrier
684 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n"
685 " controlBarrier(" << scopeStr << ", gl_ScopeInvocation, 0, 0);\n"
686 " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
687 }
688 else if (m_data.syncType == ST_CONTROL_BARRIER)
689 {
690 // Control barrier performs both acquire and release
691 css << " controlBarrier(" << scopeStr << ", " << scopeStr << ", "
692 << storageSemanticsRelease.str() << " | " << storageSemanticsAcquire.str() << ", "
693 << semanticsAcquireRelease.str() << ");\n";
694 }
695 else
696 {
697 // Don't type cast for 64 bit image atomics
698 const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
699 // Release barrier
700 std::stringstream atomicReleaseSemantics;
701 if (m_data.syncType == ST_FENCE_ATOMIC || m_data.syncType == ST_FENCE_FENCE)
702 {
703 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str() << ");\n";
704 atomicReleaseSemantics << ", 0, 0";
705 }
706 else
707 {
708 atomicReleaseSemantics << ", " << storageSemanticsRelease.str() << ", " << semanticsRelease.str();
709 }
710 // Atomic store guard
711 if (m_data.atomicRMW)
712 {
713 switch (m_data.guardSC)
714 {
715 default: DE_ASSERT(0); // fall through
716 case SC_PHYSBUFFER: // fall through
717 case SC_BUFFER: css << " atomicExchange(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
718 case SC_IMAGE: css << " imageAtomicExchange(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
719 case SC_WORKGROUP: css << " atomicExchange(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
720 }
721 }
722 else
723 {
724 switch (m_data.guardSC)
725 {
726 default: DE_ASSERT(0); // fall through
727 case SC_PHYSBUFFER: // fall through
728 case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
729 case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
730 case SC_WORKGROUP: css << " atomicStore(guard.x[sharedCoord], " << typeStr << "(1u), " << scopeStr << atomicReleaseSemantics.str() << ");\n"; break;
731 }
732 }
733
734 std::stringstream atomicAcquireSemantics;
735 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
736 {
737 atomicAcquireSemantics << ", 0, 0";
738 }
739 else
740 {
741 atomicAcquireSemantics << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str();
742 }
743 // Atomic load guard
744 if (m_data.atomicRMW)
745 {
746 switch (m_data.guardSC)
747 {
748 default: DE_ASSERT(0); // fall through
749 case SC_PHYSBUFFER: // fall through
750 case SC_BUFFER: css << " skip = atomicExchange(guard.x[partnerBufferCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
751 case SC_IMAGE: css << " skip = imageAtomicExchange(guard, partnerImageCoord, " << typeCastStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
752 case SC_WORKGROUP: css << " skip = atomicExchange(guard.x[partnerSharedCoord], " << typeStr << "(2u), " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
753 }
754 } else
755 {
756 switch (m_data.guardSC)
757 {
758 default: DE_ASSERT(0); // fall through
759 case SC_PHYSBUFFER: // fall through
760 case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
761 case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord, " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
762 case SC_WORKGROUP: css << " skip = atomicLoad(guard.x[partnerSharedCoord], " << scopeStr << atomicAcquireSemantics.str() << ") == 0;\n"; break;
763 }
764 }
765 // Acquire barrier
766 if (m_data.syncType == ST_ATOMIC_FENCE || m_data.syncType == ST_FENCE_FENCE)
767 {
768 css << " memoryBarrier(" << scopeStr << ", " << storageSemanticsAcquire.str() << ", " << semanticsAcquire.str() << ");\n";
769 }
770 }
771 if (m_data.testType == TT_MP)
772 {
773 // Load payload
774 switch (m_data.payloadSC)
775 {
776 default: DE_ASSERT(0); // fall through
777 case SC_PHYSBUFFER: // fall through
778 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
779 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
780 case SC_WORKGROUP: css << " " << typeStr << " r = payload.x[partnerSharedCoord];\n"; break;
781 }
782 css <<
783 " if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
784 "}\n";
785 }
786 else
787 {
788 DE_ASSERT(m_data.testType == TT_WAR);
789 // Store payload, only if the partner invocation has already done its read
790 css << " if (!skip) {\n ";
791 switch (m_data.payloadSC)
792 {
793 default: DE_ASSERT(0); // fall through
794 case SC_PHYSBUFFER: // fall through
795 case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord);\n"; break;
796 case SC_IMAGE:
797 if (intType) {
798 css << " imageStore(payload, imageCoord, uvec4(bufferCoord, 0, 0, 0));\n";
799 }
800 else {
801 css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord), 0, 0, 0));\n";
802 }
803 break;
804 case SC_WORKGROUP: css << " payload.x[sharedCoord] = " << typeStr << "(bufferCoord);\n"; break;
805 }
806 css <<
807 " }\n"
808 " if (r != 0) { fail.x[bufferCoord] = 1; }\n"
809 "}\n";
810 }
811
812 // Draw a fullscreen triangle strip based on gl_VertexIndex
813 std::stringstream vss;
814 vss <<
815 "#version 450 core\n"
816 "vec2 coords[4] = {ivec2(-1,-1), ivec2(-1, 1), ivec2(1, -1), ivec2(1, 1)};\n"
817 "void main() { gl_Position = vec4(coords[gl_VertexIndex], 0, 1); }\n";
818
819 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
820
821 switch (m_data.stage)
822 {
823 default: DE_ASSERT(0); // fall through
824 case STAGE_COMPUTE:
825 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
826 break;
827 case STAGE_VERTEX:
828 programCollection.glslSources.add("test") << glu::VertexSource(css.str()) << buildOptions;
829 break;
830 case STAGE_FRAGMENT:
831 programCollection.glslSources.add("vert") << glu::VertexSource(vss.str());
832 programCollection.glslSources.add("test") << glu::FragmentSource(css.str()) << buildOptions;
833 break;
834 }
835 }
836
837
initProgramsTransitive(SourceCollections& programCollection) const838 void MemoryModelTestCase::initProgramsTransitive (SourceCollections& programCollection) const
839 {
840 Scope invocationMapping = m_data.scope;
841
842 const char* typeStr = (m_data.dataType == DATA_TYPE_UINT64) ? "uint64_t" : (m_data.dataType == DATA_TYPE_FLOAT32) ? "float" :
843 (m_data.dataType == DATA_TYPE_FLOAT64) ? "double" : "uint";
844 const bool intType = (m_data.dataType == DATA_TYPE_UINT || m_data.dataType == DATA_TYPE_UINT64);
845
846 // Construct storageSemantics strings. Both release and acquire
847 // always have the payload storage class. They only include the
848 // guard storage class if they're using FENCE for that side of the
849 // sync.
850 std::stringstream storageSemanticsPayload;
851 switch (m_data.payloadSC)
852 {
853 default: DE_ASSERT(0); // fall through
854 case SC_PHYSBUFFER: // fall through
855 case SC_BUFFER: storageSemanticsPayload << "gl_StorageSemanticsBuffer"; break;
856 case SC_IMAGE: storageSemanticsPayload << "gl_StorageSemanticsImage"; break;
857 }
858 std::stringstream storageSemanticsGuard;
859 switch (m_data.guardSC)
860 {
861 default: DE_ASSERT(0); // fall through
862 case SC_PHYSBUFFER: // fall through
863 case SC_BUFFER: storageSemanticsGuard << "gl_StorageSemanticsBuffer"; break;
864 case SC_IMAGE: storageSemanticsGuard << "gl_StorageSemanticsImage"; break;
865 }
866 std::stringstream storageSemanticsAll;
867 storageSemanticsAll << storageSemanticsPayload.str() << " | " << storageSemanticsGuard.str();
868
869 std::stringstream css;
870 css << "#version 450 core\n";
871 css << "#pragma use_vulkan_memory_model\n";
872 if (!intType)
873 {
874 css <<
875 "#extension GL_EXT_shader_atomic_float : enable\n"
876 "#extension GL_KHR_memory_scope_semantics : enable\n";
877 }
878 css <<
879 "#extension GL_KHR_shader_subgroup_basic : enable\n"
880 "#extension GL_KHR_shader_subgroup_shuffle : enable\n"
881 "#extension GL_KHR_shader_subgroup_ballot : enable\n"
882 "#extension GL_KHR_memory_scope_semantics : enable\n"
883 "#extension GL_ARB_gpu_shader_int64 : enable\n"
884 "#extension GL_EXT_buffer_reference : enable\n"
885 "// DIM/NUM_WORKGROUP_EACH_DIM overriden by spec constants\n"
886 "layout(constant_id = 0) const int DIM = 1;\n"
887 "layout(constant_id = 1) const int NUM_WORKGROUP_EACH_DIM = 1;\n"
888 "shared bool sharedSkip;\n";
889
890 css << "layout(local_size_x_id = 0, local_size_y_id = 0, local_size_z = 1) in;\n";
891
892 const char *memqual = "";
893 const char *semAvail = "";
894 const char *semVis = "";
895 if (m_data.coherent)
896 {
897 memqual = "workgroupcoherent";
898 }
899 else
900 {
901 memqual = "nonprivate";
902 semAvail = " | gl_SemanticsMakeAvailable";
903 semVis = " | gl_SemanticsMakeVisible";
904 }
905
906 stringstream pushConstMembers;
907
908 // Declare payload, guard, and fail resources
909 switch (m_data.payloadSC)
910 {
911 default: DE_ASSERT(0); // fall through
912 case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer PayloadRef { " << typeStr << " x[]; };\n";
913 pushConstMembers << " layout(offset = 0) PayloadRef payloadref;\n"; break;
914 case SC_BUFFER: css << "layout(set=0, binding=0) " << memqual << " buffer Payload { " << typeStr << " x[]; } payload;\n"; break;
915 case SC_IMAGE:
916 if (intType)
917 css << "layout(set=0, binding=0, r32ui) uniform " << memqual << " uimage2D payload;\n";
918 else
919 css << "layout(set=0, binding=0, r32f) uniform " << memqual << " image2D payload;\n";
920 break;
921 }
922 // The guard variable is only accessed with atomics and need not be declared coherent.
923 switch (m_data.guardSC)
924 {
925 default: DE_ASSERT(0); // fall through
926 case SC_PHYSBUFFER: css << "layout(buffer_reference) buffer GuardRef { " << typeStr << " x[]; };\n";
927 pushConstMembers << "layout(offset = 8) GuardRef guard;\n"; break;
928 case SC_BUFFER: css << "layout(set=0, binding=1) buffer Guard { " << typeStr << " x[]; } guard;\n"; break;
929 case SC_IMAGE:
930 if (intType)
931 css << "layout(set=0, binding=1, r32ui) uniform " << memqual << " uimage2D guard;\n";
932 else
933 css << "layout(set=0, binding=1, r32f) uniform " << memqual << " image2D guard;\n";
934 break;
935 }
936
937 css << "layout(set=0, binding=2) buffer Fail { uint x[]; } fail;\n";
938
939 if (pushConstMembers.str().size() != 0) {
940 css << "layout (push_constant, std430) uniform PC {\n" << pushConstMembers.str() << "};\n";
941 }
942
943 css <<
944 "void main()\n"
945 "{\n"
946 " bool pass = true;\n"
947 " bool skip = false;\n"
948 " sharedSkip = false;\n";
949
950 if (m_data.payloadSC == SC_PHYSBUFFER)
951 css << " " << memqual << " PayloadRef payload = payloadref;\n";
952
953 // Compute coordinates based on the storage class and scope.
954 switch (invocationMapping)
955 {
956 default: DE_ASSERT(0); // fall through
957 case SCOPE_DEVICE:
958 css <<
959 " ivec2 globalId = ivec2(gl_GlobalInvocationID.xy);\n"
960 " ivec2 partnerGlobalId = ivec2(DIM*NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_GlobalInvocationID.xy);\n"
961 " uint bufferCoord = globalId.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId.x;\n"
962 " uint partnerBufferCoord = partnerGlobalId.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId.x;\n"
963 " ivec2 imageCoord = globalId;\n"
964 " ivec2 partnerImageCoord = partnerGlobalId;\n"
965 " ivec2 globalId00 = ivec2(DIM) * ivec2(gl_WorkGroupID.xy);\n"
966 " ivec2 partnerGlobalId00 = ivec2(DIM) * (ivec2(NUM_WORKGROUP_EACH_DIM-1) - ivec2(gl_WorkGroupID.xy));\n"
967 " uint bufferCoord00 = globalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + globalId00.x;\n"
968 " uint partnerBufferCoord00 = partnerGlobalId00.y * DIM*NUM_WORKGROUP_EACH_DIM + partnerGlobalId00.x;\n"
969 " ivec2 imageCoord00 = globalId00;\n"
970 " ivec2 partnerImageCoord00 = partnerGlobalId00;\n";
971 break;
972 }
973
974 // Store payload
975 if (intType)
976 {
977 switch (m_data.payloadSC)
978 {
979 default: DE_ASSERT(0); // fall through
980 case SC_PHYSBUFFER: // fall through
981 case SC_BUFFER: css << " payload.x[bufferCoord] = bufferCoord + (payload.x[partnerBufferCoord]>>31);\n"; break;
982 case SC_IMAGE: css << " imageStore(payload, imageCoord, uvec4(bufferCoord + (imageLoad(payload, partnerImageCoord).x>>31), 0, 0, 0));\n"; break;
983 }
984 }
985 else
986 {
987 switch (m_data.payloadSC)
988 {
989 default: DE_ASSERT(0); // fall through
990 case SC_PHYSBUFFER: // fall through
991 case SC_BUFFER: css << " payload.x[bufferCoord] = " << typeStr << "(bufferCoord) + ((floatBitsToInt(float(payload.x[partnerBufferCoord])))>>31);\n"; break;
992 case SC_IMAGE: css << " imageStore(payload, imageCoord, vec4(" << typeStr << "(bufferCoord + (floatBitsToInt(float(imageLoad(payload, partnerImageCoord).x)>>31))), 0, 0, 0)); \n"; break;
993 }
994 }
995
996 // Sync to other threads in the workgroup
997 css << " controlBarrier(gl_ScopeWorkgroup, "
998 "gl_ScopeWorkgroup, " <<
999 storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, "
1000 "gl_SemanticsAcquireRelease" << semAvail << ");\n";
1001
1002 // Device-scope release/availability in invocation(0,0)
1003 css << " if (all(equal(gl_LocalInvocationID.xy, ivec2(0,0)))) {\n";
1004 const char* typeCastStr = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64) ? "" : typeStr;
1005 if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_ATOMIC_FENCE) {
1006 switch (m_data.guardSC)
1007 {
1008 default: DE_ASSERT(0); // fall through
1009 case SC_PHYSBUFFER: // fall through
1010 case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break;
1011 case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n"; break;
1012 }
1013 } else {
1014 css << " memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsRelease | gl_SemanticsMakeAvailable);\n";
1015 switch (m_data.guardSC)
1016 {
1017 default: DE_ASSERT(0); // fall through
1018 case SC_PHYSBUFFER: // fall through
1019 case SC_BUFFER: css << " atomicStore(guard.x[bufferCoord], " << typeStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break;
1020 case SC_IMAGE: css << " imageAtomicStore(guard, imageCoord, " << typeCastStr << "(1u), gl_ScopeDevice, 0, 0);\n"; break;
1021 }
1022 }
1023
1024 // Device-scope acquire/visibility either in invocation(0,0) or in every invocation
1025 if (!m_data.transitiveVis) {
1026 css << " }\n";
1027 }
1028 if (m_data.syncType == ST_ATOMIC_ATOMIC || m_data.syncType == ST_FENCE_ATOMIC) {
1029 switch (m_data.guardSC)
1030 {
1031 default: DE_ASSERT(0); // fall through
1032 case SC_PHYSBUFFER: // fall through
1033 case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break;
1034 case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, " << storageSemanticsPayload.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible) == 0;\n"; break;
1035 }
1036 } else {
1037 switch (m_data.guardSC)
1038 {
1039 default: DE_ASSERT(0); // fall through
1040 case SC_PHYSBUFFER: // fall through
1041 case SC_BUFFER: css << " skip = atomicLoad(guard.x[partnerBufferCoord00], gl_ScopeDevice, 0, 0) == 0;\n"; break;
1042 case SC_IMAGE: css << " skip = imageAtomicLoad(guard, partnerImageCoord00, gl_ScopeDevice, 0, 0) == 0;\n"; break;
1043 }
1044 css << " memoryBarrier(gl_ScopeDevice, " << storageSemanticsAll.str() << ", gl_SemanticsAcquire | gl_SemanticsMakeVisible);\n";
1045 }
1046
1047 // If invocation(0,0) did the acquire then store "skip" to shared memory and
1048 // synchronize with the workgroup
1049 if (m_data.transitiveVis) {
1050 css << " sharedSkip = skip;\n";
1051 css << " }\n";
1052
1053 css << " controlBarrier(gl_ScopeWorkgroup, "
1054 "gl_ScopeWorkgroup, " <<
1055 storageSemanticsPayload.str() << " | gl_StorageSemanticsShared, "
1056 "gl_SemanticsAcquireRelease" << semVis << ");\n";
1057 css << " skip = sharedSkip;\n";
1058 }
1059
1060 // Load payload
1061 switch (m_data.payloadSC)
1062 {
1063 default: DE_ASSERT(0); // fall through
1064 case SC_PHYSBUFFER: // fall through
1065 case SC_BUFFER: css << " " << typeStr << " r = payload.x[partnerBufferCoord];\n"; break;
1066 case SC_IMAGE: css << " " << typeStr << " r = imageLoad(payload, partnerImageCoord).x;\n"; break;
1067 }
1068 css <<
1069 " if (!skip && r != " << typeStr << "(partnerBufferCoord)) { fail.x[bufferCoord] = 1; }\n"
1070 "}\n";
1071
1072 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
1073
1074 programCollection.glslSources.add("test") << glu::ComputeSource(css.str()) << buildOptions;
1075 }
1076
createInstance(Context& context) const1077 TestInstance* MemoryModelTestCase::createInstance (Context& context) const
1078 {
1079 return new MemoryModelTestInstance(context, m_data);
1080 }
1081
iterate(void)1082 tcu::TestStatus MemoryModelTestInstance::iterate (void)
1083 {
1084 const DeviceInterface& vk = m_context.getDeviceInterface();
1085 const VkDevice device = m_context.getDevice();
1086 Allocator& allocator = m_context.getDefaultAllocator();
1087
1088 VkPhysicalDeviceProperties2 properties;
1089 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1090 properties.pNext = NULL;
1091
1092 m_context.getInstanceInterface().getPhysicalDeviceProperties2(m_context.getPhysicalDevice(), &properties);
1093
1094 deUint32 DIM = 31;
1095 deUint32 NUM_WORKGROUP_EACH_DIM = 8;
1096 // If necessary, shrink workgroup size to fit HW limits
1097 if (DIM*DIM > properties.properties.limits.maxComputeWorkGroupInvocations)
1098 {
1099 DIM = (deUint32)deFloatSqrt((float)properties.properties.limits.maxComputeWorkGroupInvocations);
1100 }
1101 deUint32 NUM_INVOCATIONS = (DIM * DIM * NUM_WORKGROUP_EACH_DIM * NUM_WORKGROUP_EACH_DIM);
1102
1103 VkDeviceSize bufferSizes[3];
1104 de::MovePtr<BufferWithMemory> buffers[3];
1105 vk::VkDescriptorBufferInfo bufferDescriptors[3];
1106 de::MovePtr<BufferWithMemory> copyBuffer;
1107
1108 for (deUint32 i = 0; i < 3; ++i)
1109 {
1110 size_t elementSize = (m_data.dataType == DATA_TYPE_UINT64 || m_data.dataType == DATA_TYPE_FLOAT64)? sizeof(deUint64) : sizeof(deUint32);
1111 // buffer2 is the "fail" buffer, and is always uint
1112 if (i == 2)
1113 elementSize = sizeof(deUint32);
1114 bufferSizes[i] = NUM_INVOCATIONS * elementSize;
1115
1116 vk::VkFlags usageFlags = vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1117
1118 bool memoryDeviceAddress = false;
1119
1120 bool local;
1121 switch (i)
1122 {
1123 default: DE_ASSERT(0); // fall through
1124 case 0:
1125 if (m_data.payloadSC != SC_BUFFER && m_data.payloadSC != SC_PHYSBUFFER)
1126 continue;
1127 local = m_data.payloadMemLocal;
1128 if (m_data.payloadSC == SC_PHYSBUFFER)
1129 {
1130 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1131 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1132 memoryDeviceAddress = true;
1133 }
1134 break;
1135 case 1:
1136 if (m_data.guardSC != SC_BUFFER && m_data.guardSC != SC_PHYSBUFFER)
1137 continue;
1138 local = m_data.guardMemLocal;
1139 if (m_data.guardSC == SC_PHYSBUFFER)
1140 {
1141 usageFlags |= vk::VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT;
1142 if (m_context.isDeviceFunctionalitySupported("VK_KHR_buffer_device_address"))
1143 memoryDeviceAddress = true;
1144 }
1145 break;
1146 case 2: local = true; break;
1147 }
1148
1149 try
1150 {
1151 buffers[i] = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1152 vk, device, allocator, makeBufferCreateInfo(bufferSizes[i], usageFlags),
1153 (memoryDeviceAddress ? MemoryRequirement::DeviceAddress : MemoryRequirement::Any) |
1154 (local ? MemoryRequirement::Local : MemoryRequirement::NonLocal)));
1155 }
1156 catch (const tcu::NotSupportedError&)
1157 {
1158 if (!local)
1159 {
1160 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1161 }
1162 throw;
1163 }
1164 bufferDescriptors[i] = makeDescriptorBufferInfo(**buffers[i], 0, bufferSizes[i]);
1165 }
1166
1167 // Try to use cached host memory for the buffer the CPU will read from, else fallback to host visible.
1168 try
1169 {
1170 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1171 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible | MemoryRequirement::Cached));
1172 }
1173 catch (const tcu::NotSupportedError&)
1174 {
1175 copyBuffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
1176 vk, device, allocator, makeBufferCreateInfo(bufferSizes[2], VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
1177 }
1178
1179 VkFormat imageFormat;
1180 switch (m_data.dataType)
1181 {
1182 case DATA_TYPE_UINT:
1183 case DATA_TYPE_UINT64:
1184 imageFormat = VK_FORMAT_R32_UINT;
1185 break;
1186 case DATA_TYPE_FLOAT32:
1187 case DATA_TYPE_FLOAT64:
1188 imageFormat = VK_FORMAT_R32_SFLOAT;
1189 break;
1190 default:
1191 TCU_FAIL("Invalid data type.");
1192 }
1193
1194 const VkImageCreateInfo imageCreateInfo =
1195 {
1196 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
1197 DE_NULL, // const void* pNext;
1198 (VkImageCreateFlags)0u, // VkImageCreateFlags flags;
1199 VK_IMAGE_TYPE_2D, // VkImageType imageType;
1200 imageFormat, // VkFormat format;
1201 {
1202 DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 width;
1203 DIM*NUM_WORKGROUP_EACH_DIM, // deUint32 height;
1204 1u // deUint32 depth;
1205 }, // VkExtent3D extent;
1206 1u, // deUint32 mipLevels;
1207 1u, // deUint32 arrayLayers;
1208 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1209 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
1210 VK_IMAGE_USAGE_STORAGE_BIT
1211 | VK_IMAGE_USAGE_TRANSFER_SRC_BIT
1212 | VK_IMAGE_USAGE_TRANSFER_DST_BIT, // VkImageUsageFlags usage;
1213 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1214 0u, // deUint32 queueFamilyIndexCount;
1215 DE_NULL, // const deUint32* pQueueFamilyIndices;
1216 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
1217 };
1218 VkImageViewCreateInfo imageViewCreateInfo =
1219 {
1220 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1221 DE_NULL, // const void* pNext;
1222 (VkImageViewCreateFlags)0u, // VkImageViewCreateFlags flags;
1223 DE_NULL, // VkImage image;
1224 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1225 imageFormat, // VkFormat format;
1226 {
1227 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1228 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1229 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1230 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1231 }, // VkComponentMapping components;
1232 {
1233 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1234 0u, // deUint32 baseMipLevel;
1235 1u, // deUint32 levelCount;
1236 0u, // deUint32 baseArrayLayer;
1237 1u // deUint32 layerCount;
1238 } // VkImageSubresourceRange subresourceRange;
1239 };
1240
1241
1242 de::MovePtr<ImageWithMemory> images[2];
1243 Move<VkImageView> imageViews[2];
1244 vk::VkDescriptorImageInfo imageDescriptors[2];
1245
1246 for (deUint32 i = 0; i < 2; ++i)
1247 {
1248
1249 bool local;
1250 switch (i)
1251 {
1252 default: DE_ASSERT(0); // fall through
1253 case 0:
1254 if (m_data.payloadSC != SC_IMAGE)
1255 continue;
1256 local = m_data.payloadMemLocal;
1257 break;
1258 case 1:
1259 if (m_data.guardSC != SC_IMAGE)
1260 continue;
1261 local = m_data.guardMemLocal;
1262 break;
1263 }
1264
1265 try
1266 {
1267 images[i] = de::MovePtr<ImageWithMemory>(new ImageWithMemory(
1268 vk, device, allocator, imageCreateInfo, local ? MemoryRequirement::Local : MemoryRequirement::NonLocal));
1269 }
1270 catch (const tcu::NotSupportedError&)
1271 {
1272 if (!local)
1273 {
1274 TCU_THROW(NotSupportedError, "Test variant uses non-device-local memory, which is not supported");
1275 }
1276 throw;
1277 }
1278 imageViewCreateInfo.image = **images[i];
1279 imageViews[i] = createImageView(vk, device, &imageViewCreateInfo, NULL);
1280
1281 imageDescriptors[i] = makeDescriptorImageInfo(DE_NULL, *imageViews[i], VK_IMAGE_LAYOUT_GENERAL);
1282 }
1283
1284 vk::DescriptorSetLayoutBuilder layoutBuilder;
1285
1286 switch (m_data.payloadSC)
1287 {
1288 default:
1289 case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
1290 case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
1291 }
1292 switch (m_data.guardSC)
1293 {
1294 default:
1295 case SC_BUFFER: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages); break;
1296 case SC_IMAGE: layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, allShaderStages); break;
1297 }
1298 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, allShaderStages);
1299
1300 vk::Unique<vk::VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
1301
1302 vk::Unique<vk::VkDescriptorPool> descriptorPool(vk::DescriptorPoolBuilder()
1303 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 3u)
1304 .addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 3u)
1305 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
1306 vk::Unique<vk::VkDescriptorSet> descriptorSet (makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
1307
1308 vk::DescriptorSetUpdateBuilder setUpdateBuilder;
1309 switch (m_data.payloadSC)
1310 {
1311 default: DE_ASSERT(0); // fall through
1312 case SC_PHYSBUFFER:
1313 case SC_WORKGROUP:
1314 break;
1315 case SC_BUFFER:
1316 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1317 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[0]);
1318 break;
1319 case SC_IMAGE:
1320 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(0),
1321 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[0]);
1322 break;
1323 }
1324 switch (m_data.guardSC)
1325 {
1326 default: DE_ASSERT(0); // fall through
1327 case SC_PHYSBUFFER:
1328 case SC_WORKGROUP:
1329 break;
1330 case SC_BUFFER:
1331 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1332 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[1]);
1333 break;
1334 case SC_IMAGE:
1335 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(1),
1336 VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageDescriptors[1]);
1337 break;
1338 }
1339 setUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding(2),
1340 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptors[2]);
1341
1342 setUpdateBuilder.update(vk, device);
1343
1344 const VkPushConstantRange pushConstRange =
1345 {
1346 allShaderStages, // VkShaderStageFlags stageFlags
1347 0, // deUint32 offset
1348 16 // deUint32 size
1349 };
1350
1351 const VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
1352 {
1353 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // sType
1354 DE_NULL, // pNext
1355 (VkPipelineLayoutCreateFlags)0,
1356 1, // setLayoutCount
1357 &descriptorSetLayout.get(), // pSetLayouts
1358 1u, // pushConstantRangeCount
1359 &pushConstRange, // pPushConstantRanges
1360 };
1361
1362 Move<VkPipelineLayout> pipelineLayout = createPipelineLayout(vk, device, &pipelineLayoutCreateInfo, NULL);
1363
1364 Move<VkPipeline> pipeline;
1365 Move<VkRenderPass> renderPass;
1366 Move<VkFramebuffer> framebuffer;
1367
1368 VkPipelineBindPoint bindPoint = m_data.stage == STAGE_COMPUTE ? VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1369
1370 const deUint32 specData[2] = {DIM, NUM_WORKGROUP_EACH_DIM};
1371
1372 const vk::VkSpecializationMapEntry entries[3] =
1373 {
1374 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
1375 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
1376 };
1377
1378 const vk::VkSpecializationInfo specInfo =
1379 {
1380 2, // mapEntryCount
1381 entries, // pMapEntries
1382 sizeof(specData), // dataSize
1383 specData // pData
1384 };
1385
1386 if (m_data.stage == STAGE_COMPUTE)
1387 {
1388 const Unique<VkShaderModule> shader (createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0));
1389
1390 const VkPipelineShaderStageCreateInfo shaderCreateInfo =
1391 {
1392 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1393 DE_NULL,
1394 (VkPipelineShaderStageCreateFlags)0,
1395 VK_SHADER_STAGE_COMPUTE_BIT, // stage
1396 *shader, // shader
1397 "main",
1398 &specInfo, // pSpecializationInfo
1399 };
1400
1401 const VkComputePipelineCreateInfo pipelineCreateInfo =
1402 {
1403 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1404 DE_NULL,
1405 0u, // flags
1406 shaderCreateInfo, // cs
1407 *pipelineLayout, // layout
1408 (vk::VkPipeline)0, // basePipelineHandle
1409 0u, // basePipelineIndex
1410 };
1411 pipeline = createComputePipeline(vk, device, DE_NULL, &pipelineCreateInfo, NULL);
1412 }
1413 else
1414 {
1415
1416 const vk::VkSubpassDescription subpassDesc =
1417 {
1418 (vk::VkSubpassDescriptionFlags)0,
1419 vk::VK_PIPELINE_BIND_POINT_GRAPHICS, // pipelineBindPoint
1420 0u, // inputCount
1421 DE_NULL, // pInputAttachments
1422 0u, // colorCount
1423 DE_NULL, // pColorAttachments
1424 DE_NULL, // pResolveAttachments
1425 DE_NULL, // depthStencilAttachment
1426 0u, // preserveCount
1427 DE_NULL, // pPreserveAttachments
1428
1429 };
1430 const vk::VkRenderPassCreateInfo renderPassParams =
1431 {
1432 vk::VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // sType
1433 DE_NULL, // pNext
1434 (vk::VkRenderPassCreateFlags)0,
1435 0u, // attachmentCount
1436 DE_NULL, // pAttachments
1437 1u, // subpassCount
1438 &subpassDesc, // pSubpasses
1439 0u, // dependencyCount
1440 DE_NULL, // pDependencies
1441 };
1442
1443 renderPass = createRenderPass(vk, device, &renderPassParams);
1444
1445 const vk::VkFramebufferCreateInfo framebufferParams =
1446 {
1447 vk::VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // sType
1448 DE_NULL, // pNext
1449 (vk::VkFramebufferCreateFlags)0,
1450 *renderPass, // renderPass
1451 0u, // attachmentCount
1452 DE_NULL, // pAttachments
1453 DIM*NUM_WORKGROUP_EACH_DIM, // width
1454 DIM*NUM_WORKGROUP_EACH_DIM, // height
1455 1u, // layers
1456 };
1457
1458 framebuffer = createFramebuffer(vk, device, &framebufferParams);
1459
1460 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
1461 {
1462 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1463 DE_NULL, // const void* pNext;
1464 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1465 0u, // deUint32 vertexBindingDescriptionCount;
1466 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1467 0u, // deUint32 vertexAttributeDescriptionCount;
1468 DE_NULL // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
1469 };
1470
1471 const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
1472 {
1473 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
1474 DE_NULL, // const void* pNext;
1475 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
1476 (m_data.stage == STAGE_VERTEX) ? VK_PRIMITIVE_TOPOLOGY_POINT_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, // VkPrimitiveTopology topology;
1477 VK_FALSE // VkBool32 primitiveRestartEnable;
1478 };
1479
1480 const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo =
1481 {
1482 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
1483 DE_NULL, // const void* pNext;
1484 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
1485 VK_FALSE, // VkBool32 depthClampEnable;
1486 (m_data.stage == STAGE_VERTEX) ? VK_TRUE : VK_FALSE, // VkBool32 rasterizerDiscardEnable;
1487 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
1488 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
1489 VK_FRONT_FACE_CLOCKWISE, // VkFrontFace frontFace;
1490 VK_FALSE, // VkBool32 depthBiasEnable;
1491 0.0f, // float depthBiasConstantFactor;
1492 0.0f, // float depthBiasClamp;
1493 0.0f, // float depthBiasSlopeFactor;
1494 1.0f // float lineWidth;
1495 };
1496
1497 const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo =
1498 {
1499 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType
1500 DE_NULL, // const void* pNext
1501 0u, // VkPipelineMultisampleStateCreateFlags flags
1502 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples
1503 VK_FALSE, // VkBool32 sampleShadingEnable
1504 1.0f, // float minSampleShading
1505 DE_NULL, // const VkSampleMask* pSampleMask
1506 VK_FALSE, // VkBool32 alphaToCoverageEnable
1507 VK_FALSE // VkBool32 alphaToOneEnable
1508 };
1509
1510 VkViewport viewport = makeViewport(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1511 VkRect2D scissor = makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM);
1512
1513 const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
1514 {
1515 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType
1516 DE_NULL, // const void* pNext
1517 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags
1518 1u, // deUint32 viewportCount
1519 &viewport, // const VkViewport* pViewports
1520 1u, // deUint32 scissorCount
1521 &scissor // const VkRect2D* pScissors
1522 };
1523
1524 Move<VkShaderModule> fs;
1525 Move<VkShaderModule> vs;
1526
1527 deUint32 numStages;
1528 if (m_data.stage == STAGE_VERTEX)
1529 {
1530 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1531 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0); // bogus
1532 numStages = 1u;
1533 }
1534 else
1535 {
1536 vs = createShaderModule(vk, device, m_context.getBinaryCollection().get("vert"), 0);
1537 fs = createShaderModule(vk, device, m_context.getBinaryCollection().get("test"), 0);
1538 numStages = 2u;
1539 }
1540
1541 const VkPipelineShaderStageCreateInfo shaderCreateInfo[2] = {
1542 {
1543 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1544 DE_NULL,
1545 (VkPipelineShaderStageCreateFlags)0,
1546 VK_SHADER_STAGE_VERTEX_BIT, // stage
1547 *vs, // shader
1548 "main",
1549 &specInfo, // pSpecializationInfo
1550 },
1551 {
1552 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1553 DE_NULL,
1554 (VkPipelineShaderStageCreateFlags)0,
1555 VK_SHADER_STAGE_FRAGMENT_BIT, // stage
1556 *fs, // shader
1557 "main",
1558 &specInfo, // pSpecializationInfo
1559 }
1560 };
1561
1562 const VkGraphicsPipelineCreateInfo graphicsPipelineCreateInfo =
1563 {
1564 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
1565 DE_NULL, // const void* pNext;
1566 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
1567 numStages, // deUint32 stageCount;
1568 &shaderCreateInfo[0], // const VkPipelineShaderStageCreateInfo* pStages;
1569 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
1570 &inputAssemblyStateCreateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
1571 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
1572 &viewportStateCreateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
1573 &rasterizationStateCreateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
1574 &multisampleStateCreateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
1575 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
1576 DE_NULL, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
1577 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
1578 pipelineLayout.get(), // VkPipelineLayout layout;
1579 renderPass.get(), // VkRenderPass renderPass;
1580 0u, // deUint32 subpass;
1581 DE_NULL, // VkPipeline basePipelineHandle;
1582 0 // int basePipelineIndex;
1583 };
1584
1585 pipeline = createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineCreateInfo);
1586 }
1587
1588 const VkQueue queue = m_context.getUniversalQueue();
1589 Move<VkCommandPool> cmdPool = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, m_context.getUniversalQueueFamilyIndex());
1590 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1591
1592 VkBufferDeviceAddressInfo addrInfo =
1593 {
1594 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // VkStructureType sType;
1595 DE_NULL, // const void* pNext;
1596 0, // VkBuffer buffer
1597 };
1598
1599 VkImageSubresourceRange range = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1600 VkClearValue clearColor = makeClearValueColorU32(0,0,0,0);
1601
1602 VkMemoryBarrier memBarrier =
1603 {
1604 VK_STRUCTURE_TYPE_MEMORY_BARRIER, // sType
1605 DE_NULL, // pNext
1606 0u, // srcAccessMask
1607 0u, // dstAccessMask
1608 };
1609
1610 const VkBufferCopy copyParams =
1611 {
1612 (VkDeviceSize)0u, // srcOffset
1613 (VkDeviceSize)0u, // dstOffset
1614 bufferSizes[2] // size
1615 };
1616
1617 deUint32 NUM_SUBMITS = 4;
1618
1619 for (deUint32 x = 0; x < NUM_SUBMITS; ++x)
1620 {
1621 beginCommandBuffer(vk, *cmdBuffer, 0u);
1622
1623 if (x == 0)
1624 vk.cmdFillBuffer(*cmdBuffer, **buffers[2], 0, bufferSizes[2], 0);
1625
1626 for (deUint32 i = 0; i < 2; ++i)
1627 {
1628 if (!images[i])
1629 continue;
1630
1631 const VkImageMemoryBarrier imageBarrier =
1632 {
1633 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // VkStructureType sType
1634 DE_NULL, // const void* pNext
1635 0u, // VkAccessFlags srcAccessMask
1636 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags dstAccessMask
1637 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout oldLayout
1638 VK_IMAGE_LAYOUT_GENERAL, // VkImageLayout newLayout
1639 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex
1640 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex
1641 **images[i], // VkImage image
1642 {
1643 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask
1644 0u, // uint32_t baseMipLevel
1645 1u, // uint32_t mipLevels,
1646 0u, // uint32_t baseArray
1647 1u, // uint32_t arraySize
1648 }
1649 };
1650
1651 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
1652 (VkDependencyFlags)0,
1653 0, (const VkMemoryBarrier*)DE_NULL,
1654 0, (const VkBufferMemoryBarrier*)DE_NULL,
1655 1, &imageBarrier);
1656 }
1657
1658 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, *pipelineLayout, 0u, 1, &*descriptorSet, 0u, DE_NULL);
1659 vk.cmdBindPipeline(*cmdBuffer, bindPoint, *pipeline);
1660
1661 if (m_data.payloadSC == SC_PHYSBUFFER)
1662 {
1663 addrInfo.buffer = **buffers[0];
1664 VkDeviceAddress addr = vk.getBufferDeviceAddress(device, &addrInfo);
1665 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
1666 0, sizeof(VkDeviceSize), &addr);
1667 }
1668 if (m_data.guardSC == SC_PHYSBUFFER)
1669 {
1670 addrInfo.buffer = **buffers[1];
1671 VkDeviceAddress addr = vk.getBufferDeviceAddress(device, &addrInfo);
1672 vk.cmdPushConstants(*cmdBuffer, *pipelineLayout, allShaderStages,
1673 8, sizeof(VkDeviceSize), &addr);
1674 }
1675
1676 for (deUint32 iters = 0; iters < 50; ++iters)
1677 {
1678 for (deUint32 i = 0; i < 2; ++i)
1679 {
1680 if (buffers[i])
1681 vk.cmdFillBuffer(*cmdBuffer, **buffers[i], 0, bufferSizes[i], 0);
1682 if (images[i])
1683 vk.cmdClearColorImage(*cmdBuffer, **images[i], VK_IMAGE_LAYOUT_GENERAL, &clearColor.color, 1, &range);
1684 }
1685
1686 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1687 memBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1688 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, allPipelineStages,
1689 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1690
1691 if (m_data.stage == STAGE_COMPUTE)
1692 {
1693 vk.cmdDispatch(*cmdBuffer, NUM_WORKGROUP_EACH_DIM, NUM_WORKGROUP_EACH_DIM, 1);
1694 }
1695 else
1696 {
1697 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer,
1698 makeRect2D(DIM*NUM_WORKGROUP_EACH_DIM, DIM*NUM_WORKGROUP_EACH_DIM),
1699 0, DE_NULL, VK_SUBPASS_CONTENTS_INLINE);
1700 // Draw a point cloud for vertex shader testing, and a single quad for fragment shader testing
1701 if (m_data.stage == STAGE_VERTEX)
1702 {
1703 vk.cmdDraw(*cmdBuffer, DIM*DIM*NUM_WORKGROUP_EACH_DIM*NUM_WORKGROUP_EACH_DIM, 1u, 0u, 0u);
1704 }
1705 else
1706 {
1707 vk.cmdDraw(*cmdBuffer, 4u, 1u, 0u, 0u);
1708 }
1709 endRenderPass(vk, *cmdBuffer);
1710 }
1711
1712 memBarrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
1713 memBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT;
1714 vk.cmdPipelineBarrier(*cmdBuffer, allPipelineStages, VK_PIPELINE_STAGE_TRANSFER_BIT,
1715 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1716 }
1717
1718 if (x == NUM_SUBMITS - 1)
1719 {
1720 vk.cmdCopyBuffer(*cmdBuffer, **buffers[2], **copyBuffer, 1, ©Params);
1721 memBarrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1722 memBarrier.dstAccessMask = VK_ACCESS_HOST_READ_BIT;
1723 vk.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT,
1724 0, 1, &memBarrier, 0, DE_NULL, 0, DE_NULL);
1725 }
1726
1727 endCommandBuffer(vk, *cmdBuffer);
1728
1729 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
1730
1731 m_context.resetCommandPoolForVKSC(device, *cmdPool);
1732 }
1733
1734 tcu::TestLog& log = m_context.getTestContext().getLog();
1735
1736 deUint32 *ptr = (deUint32 *)copyBuffer->getAllocation().getHostPtr();
1737 invalidateAlloc(vk, device, copyBuffer->getAllocation());
1738 qpTestResult res = QP_TEST_RESULT_PASS;
1739
1740 deUint32 numErrors = 0;
1741 for (deUint32 i = 0; i < NUM_INVOCATIONS; ++i)
1742 {
1743 if (ptr[i] != 0)
1744 {
1745 if (numErrors < 256)
1746 {
1747 log << tcu::TestLog::Message << "Failed invocation: " << i << tcu::TestLog::EndMessage;
1748 }
1749 numErrors++;
1750 res = QP_TEST_RESULT_FAIL;
1751 }
1752 }
1753
1754 if (numErrors)
1755 {
1756 log << tcu::TestLog::Message << "Total Errors: " << numErrors << tcu::TestLog::EndMessage;
1757 }
1758
1759 return tcu::TestStatus(res, qpGetTestResultName(res));
1760 }
1761
1762 #ifndef CTS_USES_VULKANSC
checkPermutedIndexTestSupport(Context& context, std::string testName)1763 void checkPermutedIndexTestSupport (Context& context, std::string testName)
1764 {
1765 DE_UNREF(testName);
1766
1767 const auto maxComputeWorkGroupCount = context.getDeviceProperties().limits.maxComputeWorkGroupCount;
1768 const auto maxComputeWorkGroupSize = context.getDeviceProperties().limits.maxComputeWorkGroupSize;
1769 const auto maxComputeWorkGroupInvocations = context.getDeviceProperties().limits.maxComputeWorkGroupInvocations;
1770
1771 if (maxComputeWorkGroupCount[0] < 256u)
1772 TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupCount.x");
1773
1774 if (maxComputeWorkGroupSize[0] < 256u)
1775 TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupSize.x");
1776
1777 if (maxComputeWorkGroupInvocations < 256u)
1778 TCU_THROW(NotSupportedError, "Minimum of 256 required for maxComputeWorkGroupInvocations");
1779 }
1780
createPermutedIndexTests(tcu::TestContext& testCtx)1781 tcu::TestCaseGroup* createPermutedIndexTests (tcu::TestContext& testCtx)
1782 {
1783 de::MovePtr<tcu::TestCaseGroup> permutedIndex (new tcu::TestCaseGroup(testCtx, "permuted_index"));
1784 static const char dataDir[] = "memory_model/message_passing/permuted_index";
1785 static const std::string cases[] =
1786 {
1787 "barrier",
1788 "release_acquire",
1789 "release_acquire_atomic_payload"
1790 };
1791
1792 for (const auto& test : cases)
1793 {
1794 cts_amber::AmberTestCase* testCase = cts_amber::createAmberTestCase(testCtx, test.c_str(), dataDir, (test + ".amber").c_str());
1795 testCase->setCheckSupportCallback(checkPermutedIndexTestSupport);
1796
1797 permutedIndex->addChild(testCase);
1798 }
1799
1800 return permutedIndex.release();
1801 }
1802 #endif // CTS_USES_VULKANSC
1803
1804 } // anonymous
1805
createTests(tcu::TestContext& testCtx, const std::string& name)1806 tcu::TestCaseGroup* createTests (tcu::TestContext& testCtx, const std::string& name)
1807 {
1808 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
1809 testCtx, name.c_str(), "Memory model tests"));
1810
1811 typedef struct
1812 {
1813 deUint32 value;
1814 const char* name;
1815 } TestGroupCase;
1816
1817 TestGroupCase ttCases[] =
1818 {
1819 { TT_MP, "message_passing"},
1820 { TT_WAR, "write_after_read"},
1821 };
1822
1823 TestGroupCase core11Cases[] =
1824 {
1825 // Supported by Vulkan1.1
1826 { 1, "core11"},
1827 // Requires VK_KHR_vulkan_memory_model extension
1828 { 0, "ext"},
1829 };
1830
1831 TestGroupCase dtCases[] =
1832 {
1833 // uint32_t atomics
1834 { DATA_TYPE_UINT, "u32"},
1835 // uint64_t atomics
1836 { DATA_TYPE_UINT64, "u64"},
1837 // float32 atomics
1838 { DATA_TYPE_FLOAT32, "f32"},
1839 // float64 atomics
1840 { DATA_TYPE_FLOAT64, "f64"},
1841 };
1842
1843 TestGroupCase cohCases[] =
1844 {
1845 // coherent payload variable
1846 { 1, "coherent"},
1847 // noncoherent payload variable
1848 { 0, "noncoherent"},
1849 };
1850
1851 TestGroupCase stCases[] =
1852 {
1853 // release fence, acquire fence
1854 { ST_FENCE_FENCE, "fence_fence"},
1855 // release fence, atomic acquire
1856 { ST_FENCE_ATOMIC, "fence_atomic"},
1857 // atomic release, acquire fence
1858 { ST_ATOMIC_FENCE, "atomic_fence"},
1859 // atomic release, atomic acquire
1860 { ST_ATOMIC_ATOMIC, "atomic_atomic"},
1861 // control barrier
1862 { ST_CONTROL_BARRIER, "control_barrier"},
1863 // control barrier with release/acquire
1864 { ST_CONTROL_AND_MEMORY_BARRIER, "control_and_memory_barrier"},
1865 };
1866
1867 TestGroupCase rmwCases[] =
1868 {
1869 { 0, "atomicwrite"},
1870 { 1, "atomicrmw"},
1871 };
1872
1873 TestGroupCase scopeCases[] =
1874 {
1875 { SCOPE_DEVICE, "device"},
1876 { SCOPE_QUEUEFAMILY, "queuefamily"},
1877 { SCOPE_WORKGROUP, "workgroup"},
1878 { SCOPE_SUBGROUP, "subgroup"},
1879 };
1880
1881 TestGroupCase plCases[] =
1882 {
1883 // payload variable in non-local memory
1884 { 0, "payload_nonlocal"},
1885 // payload variable in local memory
1886 { 1, "payload_local"},
1887 };
1888
1889 TestGroupCase pscCases[] =
1890 {
1891 // payload variable in buffer memory
1892 { SC_BUFFER, "buffer"},
1893 // payload variable in image memory
1894 { SC_IMAGE, "image"},
1895 // payload variable in workgroup memory
1896 { SC_WORKGROUP, "workgroup"},
1897 // payload variable in physical storage buffer memory
1898 { SC_PHYSBUFFER,"physbuffer"},
1899 };
1900
1901 TestGroupCase glCases[] =
1902 {
1903 // guard variable in non-local memory
1904 { 0, "guard_nonlocal"},
1905 // guard variable in local memory
1906 { 1, "guard_local"},
1907 };
1908
1909 TestGroupCase gscCases[] =
1910 {
1911 // guard variable in buffer memory
1912 { SC_BUFFER, "buffer"},
1913 // guard variable in image memory
1914 { SC_IMAGE, "image"},
1915 // guard variable in workgroup memory
1916 { SC_WORKGROUP, "workgroup"},
1917 // guard variable in physical storage buffer memory
1918 { SC_PHYSBUFFER,"physbuffer"},
1919 };
1920
1921 TestGroupCase stageCases[] =
1922 {
1923 { STAGE_COMPUTE, "comp"},
1924 { STAGE_VERTEX, "vert"},
1925 { STAGE_FRAGMENT, "frag"},
1926 };
1927
1928 for (int ttNdx = 0; ttNdx < DE_LENGTH_OF_ARRAY(ttCases); ttNdx++)
1929 {
1930 de::MovePtr<tcu::TestCaseGroup> ttGroup(new tcu::TestCaseGroup(testCtx, ttCases[ttNdx].name));
1931
1932 #ifndef CTS_USES_VULKANSC
1933 // Permuted index tests for message passing.
1934 if (ttCases[ttNdx].value == TT_MP)
1935 ttGroup->addChild(createPermutedIndexTests(testCtx));
1936 #endif // CTS_USES_VULKANSC
1937
1938 for (int core11Ndx = 0; core11Ndx < DE_LENGTH_OF_ARRAY(core11Cases); core11Ndx++)
1939 {
1940 de::MovePtr<tcu::TestCaseGroup> core11Group(new tcu::TestCaseGroup(testCtx, core11Cases[core11Ndx].name));
1941 for (int dtNdx = 0; dtNdx < DE_LENGTH_OF_ARRAY(dtCases); dtNdx++)
1942 {
1943 de::MovePtr<tcu::TestCaseGroup> dtGroup(new tcu::TestCaseGroup(testCtx, dtCases[dtNdx].name));
1944 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
1945 {
1946 de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name));
1947 for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
1948 {
1949 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name));
1950 for (int rmwNdx = 0; rmwNdx < DE_LENGTH_OF_ARRAY(rmwCases); rmwNdx++)
1951 {
1952 de::MovePtr<tcu::TestCaseGroup> rmwGroup(new tcu::TestCaseGroup(testCtx, rmwCases[rmwNdx].name));
1953 for (int scopeNdx = 0; scopeNdx < DE_LENGTH_OF_ARRAY(scopeCases); scopeNdx++)
1954 {
1955 de::MovePtr<tcu::TestCaseGroup> scopeGroup(new tcu::TestCaseGroup(testCtx, scopeCases[scopeNdx].name));
1956 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
1957 {
1958 de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name));
1959 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
1960 {
1961 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name));
1962 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
1963 {
1964 de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name));
1965 for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
1966 {
1967 de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name));
1968 for (int stageNdx = 0; stageNdx < DE_LENGTH_OF_ARRAY(stageCases); stageNdx++)
1969 {
1970 CaseDef c =
1971 {
1972 !!plCases[plNdx].value, // bool payloadMemLocal;
1973 !!glCases[glNdx].value, // bool guardMemLocal;
1974 !!cohCases[cohNdx].value, // bool coherent;
1975 !!core11Cases[core11Ndx].value, // bool core11;
1976 !!rmwCases[rmwNdx].value, // bool atomicRMW;
1977 (TestType)ttCases[ttNdx].value, // TestType testType;
1978 (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
1979 (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
1980 (Scope)scopeCases[scopeNdx].value, // Scope scope;
1981 (SyncType)stCases[stNdx].value, // SyncType syncType;
1982 (Stage)stageCases[stageNdx].value, // Stage stage;
1983 (DataType)dtCases[dtNdx].value, // DataType dataType;
1984 false, // bool transitive;
1985 false, // bool transitiveVis;
1986 };
1987
1988 // Mustpass11 tests should only exercise things we expect to work on
1989 // existing implementations. Exclude noncoherent tests which require
1990 // new extensions, and assume atomic synchronization wouldn't work
1991 // (i.e. atomics may be implemented as relaxed atomics). Exclude
1992 // queuefamily scope which doesn't exist in Vulkan 1.1. Exclude
1993 // physical storage buffer which doesn't support the legacy decorations.
1994 if (c.core11 &&
1995 (c.coherent == 0 ||
1996 c.syncType == ST_FENCE_ATOMIC ||
1997 c.syncType == ST_ATOMIC_FENCE ||
1998 c.syncType == ST_ATOMIC_ATOMIC ||
1999 c.dataType == DATA_TYPE_UINT64 ||
2000 c.dataType == DATA_TYPE_FLOAT64 ||
2001 c.scope == SCOPE_QUEUEFAMILY ||
2002 c.payloadSC == SC_PHYSBUFFER ||
2003 c.guardSC == SC_PHYSBUFFER))
2004 {
2005 continue;
2006 }
2007
2008 if (c.stage != STAGE_COMPUTE &&
2009 c.scope == SCOPE_WORKGROUP)
2010 {
2011 continue;
2012 }
2013
2014 // Don't exercise local and non-local for workgroup memory
2015 // Also don't exercise workgroup memory for non-compute stages
2016 if (c.payloadSC == SC_WORKGROUP && (c.payloadMemLocal != 0 || c.stage != STAGE_COMPUTE))
2017 {
2018 continue;
2019 }
2020 if (c.guardSC == SC_WORKGROUP && (c.guardMemLocal != 0 || c.stage != STAGE_COMPUTE))
2021 {
2022 continue;
2023 }
2024 // Can't do control barrier with larger than workgroup scope, or non-compute stages
2025 if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
2026 (c.scope == SCOPE_DEVICE || c.scope == SCOPE_QUEUEFAMILY || c.stage != STAGE_COMPUTE))
2027 {
2028 continue;
2029 }
2030
2031 // Limit RMW atomics to ST_ATOMIC_ATOMIC, just to reduce # of test cases
2032 if (c.atomicRMW && c.syncType != ST_ATOMIC_ATOMIC)
2033 {
2034 continue;
2035 }
2036
2037 // uint64/float32/float64 testing is primarily for atomics, so only test it for ST_ATOMIC_ATOMIC
2038 const bool atomicTesting = (c.dataType == DATA_TYPE_UINT64 || c.dataType == DATA_TYPE_FLOAT32 || c.dataType == DATA_TYPE_FLOAT64);
2039 if (atomicTesting && c.syncType != ST_ATOMIC_ATOMIC)
2040 {
2041 continue;
2042 }
2043
2044 // No 64-bit image types, so skip tests with both payload and guard in image memory
2045 if (c.dataType == DATA_TYPE_UINT64 && c.payloadSC == SC_IMAGE && c.guardSC == SC_IMAGE)
2046 {
2047 continue;
2048 }
2049
2050 // No support for atomic operations on 64-bit floating point images
2051 if (c.dataType == DATA_TYPE_FLOAT64 && (c.payloadSC == SC_IMAGE || c.guardSC == SC_IMAGE))
2052 {
2053 continue;
2054 }
2055 // Control barrier tests don't use a guard variable, so only run them with gsc,gl==0
2056 if ((c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER) &&
2057 (c.guardSC != 0 || c.guardMemLocal != 0))
2058 {
2059 continue;
2060 }
2061
2062 gscGroup->addChild(new MemoryModelTestCase(testCtx, stageCases[stageNdx].name, c));
2063 }
2064 glGroup->addChild(gscGroup.release());
2065 }
2066 pscGroup->addChild(glGroup.release());
2067 }
2068 plGroup->addChild(pscGroup.release());
2069 }
2070 scopeGroup->addChild(plGroup.release());
2071 }
2072 rmwGroup->addChild(scopeGroup.release());
2073 }
2074 stGroup->addChild(rmwGroup.release());
2075 }
2076 cohGroup->addChild(stGroup.release());
2077 }
2078 dtGroup->addChild(cohGroup.release());
2079 }
2080 core11Group->addChild(dtGroup.release());
2081 }
2082 ttGroup->addChild(core11Group.release());
2083 }
2084 group->addChild(ttGroup.release());
2085 }
2086
2087 TestGroupCase transVisCases[] =
2088 {
2089 // destination invocation acquires
2090 { 0, "nontransvis"},
2091 // invocation 0,0 acquires
2092 { 1, "transvis"},
2093 };
2094
2095 de::MovePtr<tcu::TestCaseGroup> transGroup(new tcu::TestCaseGroup(testCtx, "transitive"));
2096 for (int cohNdx = 0; cohNdx < DE_LENGTH_OF_ARRAY(cohCases); cohNdx++)
2097 {
2098 de::MovePtr<tcu::TestCaseGroup> cohGroup(new tcu::TestCaseGroup(testCtx, cohCases[cohNdx].name));
2099 for (int stNdx = 0; stNdx < DE_LENGTH_OF_ARRAY(stCases); stNdx++)
2100 {
2101 de::MovePtr<tcu::TestCaseGroup> stGroup(new tcu::TestCaseGroup(testCtx, stCases[stNdx].name));
2102 for (int plNdx = 0; plNdx < DE_LENGTH_OF_ARRAY(plCases); plNdx++)
2103 {
2104 de::MovePtr<tcu::TestCaseGroup> plGroup(new tcu::TestCaseGroup(testCtx, plCases[plNdx].name));
2105 for (int pscNdx = 0; pscNdx < DE_LENGTH_OF_ARRAY(pscCases); pscNdx++)
2106 {
2107 de::MovePtr<tcu::TestCaseGroup> pscGroup(new tcu::TestCaseGroup(testCtx, pscCases[pscNdx].name));
2108 for (int glNdx = 0; glNdx < DE_LENGTH_OF_ARRAY(glCases); glNdx++)
2109 {
2110 de::MovePtr<tcu::TestCaseGroup> glGroup(new tcu::TestCaseGroup(testCtx, glCases[glNdx].name));
2111 for (int gscNdx = 0; gscNdx < DE_LENGTH_OF_ARRAY(gscCases); gscNdx++)
2112 {
2113 de::MovePtr<tcu::TestCaseGroup> gscGroup(new tcu::TestCaseGroup(testCtx, gscCases[gscNdx].name));
2114 for (int visNdx = 0; visNdx < DE_LENGTH_OF_ARRAY(transVisCases); visNdx++)
2115 {
2116 CaseDef c =
2117 {
2118 !!plCases[plNdx].value, // bool payloadMemLocal;
2119 !!glCases[glNdx].value, // bool guardMemLocal;
2120 !!cohCases[cohNdx].value, // bool coherent;
2121 false, // bool core11;
2122 false, // bool atomicRMW;
2123 TT_MP, // TestType testType;
2124 (StorageClass)pscCases[pscNdx].value, // StorageClass payloadSC;
2125 (StorageClass)gscCases[gscNdx].value, // StorageClass guardSC;
2126 SCOPE_DEVICE, // Scope scope;
2127 (SyncType)stCases[stNdx].value, // SyncType syncType;
2128 STAGE_COMPUTE, // Stage stage;
2129 DATA_TYPE_UINT, // DataType dataType;
2130 true, // bool transitive;
2131 !!transVisCases[visNdx].value, // bool transitiveVis;
2132 };
2133 if (c.payloadSC == SC_WORKGROUP || c.guardSC == SC_WORKGROUP)
2134 {
2135 continue;
2136 }
2137 if (c.syncType == ST_CONTROL_BARRIER || c.syncType == ST_CONTROL_AND_MEMORY_BARRIER)
2138 {
2139 continue;
2140 }
2141 gscGroup->addChild(new MemoryModelTestCase(testCtx, transVisCases[visNdx].name, c));
2142 }
2143 glGroup->addChild(gscGroup.release());
2144 }
2145 pscGroup->addChild(glGroup.release());
2146 }
2147 plGroup->addChild(pscGroup.release());
2148 }
2149 stGroup->addChild(plGroup.release());
2150 }
2151 cohGroup->addChild(stGroup.release());
2152 }
2153 transGroup->addChild(cohGroup.release());
2154 }
2155 group->addChild(transGroup.release());
2156
2157 // Padding tests.
2158 group->addChild(createPaddingTests(testCtx));
2159 // Shared memory layout tests.
2160 group->addChild(createSharedMemoryLayoutTests(testCtx));
2161
2162 return group.release();
2163 }
2164
2165 } // MemoryModel
2166 } // vkt
2167