1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2020 The Khronos Group Inc.
6 * Copyright (c) 2020 Intel Corporation
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief VK_KHR_workgroup_memory_explicit_layout tests
25 *//*--------------------------------------------------------------------*/
26
27 #include "vktComputeWorkgroupMemoryExplicitLayoutTests.hpp"
28 #include "vktAmberTestCase.hpp"
29 #include "vktTestCase.hpp"
30 #include "vktTestCaseUtil.hpp"
31 #include "vktTestGroupUtil.hpp"
32
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkObjUtil.hpp"
40 #include "vkDefs.hpp"
41 #include "vkRef.hpp"
42
43 #include "tcuCommandLine.hpp"
44 #include "tcuTestLog.hpp"
45
46 #include "deRandom.hpp"
47 #include "deStringUtil.hpp"
48 #include "deUniquePtr.hpp"
49
50 #include <algorithm>
51 #include <vector>
52
53 using namespace vk;
54
55 namespace vkt
56 {
57 namespace compute
58 {
59 namespace
60 {
61
62 struct CheckSupportParams
63 {
64 bool needsScalar;
65 bool needsInt8;
66 bool needsInt16;
67 bool needsInt64;
68 bool needsFloat16;
69 bool needsFloat64;
70 vk::ComputePipelineConstructionType computePipelineConstructionType;
71
useTypevkt::compute::__anon27878::CheckSupportParams72 void useType(glu::DataType dt)
73 {
74 using namespace glu;
75
76 needsInt8 |= isDataTypeIntOrIVec8Bit(dt) || isDataTypeUintOrUVec8Bit(dt);
77 needsInt16 |= isDataTypeIntOrIVec16Bit(dt) || isDataTypeUintOrUVec16Bit(dt);
78 needsFloat16 |= isDataTypeFloat16OrVec(dt);
79 needsFloat64 |= isDataTypeDoubleOrDVec(dt);
80 }
81 };
82
checkSupportWithParams(Context& context, const CheckSupportParams& params)83 void checkSupportWithParams(Context& context, const CheckSupportParams& params)
84 {
85 context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
86 context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
87 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), params.computePipelineConstructionType);
88
89 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout_features;
90 deMemset(&layout_features, 0, sizeof(layout_features));
91 layout_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
92 layout_features.pNext = DE_NULL;
93
94 VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
95 deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
96 f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
97 f16_i8_features.pNext = &layout_features;
98
99 VkPhysicalDeviceFeatures2 features2;
100 deMemset(&features2, 0, sizeof(features2));
101 features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
102 features2.pNext = &f16_i8_features;
103 context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
104
105 if (params.needsScalar)
106 {
107 if (layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout != VK_TRUE)
108 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayoutScalarBlockLayout not supported");
109 }
110
111 if (params.needsInt8)
112 {
113 if (f16_i8_features.shaderInt8 != VK_TRUE)
114 TCU_THROW(NotSupportedError, "shaderInt8 not supported");
115 if (layout_features.workgroupMemoryExplicitLayout8BitAccess != VK_TRUE)
116 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout8BitAccess not supported");
117 }
118
119 if (params.needsInt16)
120 {
121 if (features2.features.shaderInt16 != VK_TRUE)
122 TCU_THROW(NotSupportedError, "shaderInt16 not supported");
123 if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
124 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
125 }
126
127 if (params.needsInt64)
128 {
129 if (features2.features.shaderInt64 != VK_TRUE)
130 TCU_THROW(NotSupportedError, "shaderInt64 not supported");
131 }
132
133 if (params.needsFloat16)
134 {
135 if (f16_i8_features.shaderFloat16 != VK_TRUE)
136 TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
137 if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
138 TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
139 }
140
141 if (params.needsFloat64)
142 {
143 if (features2.features.shaderFloat64 != VK_TRUE)
144 TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
145 }
146 }
147
runCompute(Context& context, deUint32 workgroupSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)148 tcu::TestStatus runCompute(Context& context, deUint32 workgroupSize, const vk::ComputePipelineConstructionType computePipelineConstructionType)
149 {
150 const DeviceInterface& vk = context.getDeviceInterface();
151 const VkDevice device = context.getDevice();
152 Allocator& allocator = context.getDefaultAllocator();
153 tcu::TestLog& log = context.getTestContext().getLog();
154
155 de::MovePtr<BufferWithMemory> buffer;
156 VkDescriptorBufferInfo bufferDescriptor;
157
158 VkDeviceSize size = sizeof(deUint32) * workgroupSize;
159
160 buffer = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
161 vk, device, allocator, makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT|VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
162 MemoryRequirement::HostVisible | MemoryRequirement::Cached));
163 bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
164
165 deUint32* ptr = (deUint32*)buffer->getAllocation().getHostPtr();
166
167 deMemset(ptr, 0xFF, static_cast<std::size_t>(size));
168
169 DescriptorSetLayoutBuilder layoutBuilder;
170 layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
171
172 Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
173 Unique<VkDescriptorPool> descriptorPool(DescriptorPoolBuilder()
174 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
175 .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
176 Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
177
178 VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
179 flushAlloc(vk, device, buffer->getAllocation());
180
181 ComputePipelineWrapper pipeline(vk, device, computePipelineConstructionType, context.getBinaryCollection().get("comp"));
182 pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
183 pipeline.buildPipeline();
184
185 const VkQueue queue = context.getUniversalQueue();
186 Move<VkCommandPool> cmdPool = createCommandPool(vk, device,
187 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
188 context.getUniversalQueueFamilyIndex());
189 Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
190
191 DescriptorSetUpdateBuilder setUpdateBuilder;
192 setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
193 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
194 setUpdateBuilder.update(vk, device);
195
196 beginCommandBuffer(vk, *cmdBuffer, 0);
197
198 vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, pipeline.getPipelineLayout(), 0u, 1, &*descriptorSet, 0u, DE_NULL);
199 pipeline.bind(*cmdBuffer);
200
201 vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
202
203 endCommandBuffer(vk, *cmdBuffer);
204
205 submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
206
207 invalidateAlloc(vk, device, buffer->getAllocation());
208 for (deUint32 i = 0; i < workgroupSize; ++i)
209 {
210 deUint32 expected = i;
211 if (ptr[i] != expected)
212 {
213 log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i] << tcu::TestLog::EndMessage;
214 return tcu::TestStatus::fail("compute failed");
215 }
216 }
217
218 return tcu::TestStatus::pass("compute succeeded");
219 }
220
221 class AliasTest : public vkt::TestCase
222 {
223 public:
224 enum Requirements
225 {
226 RequirementNone = 0,
227 RequirementFloat16 = 1 << 0,
228 RequirementFloat64 = 1 << 1,
229 RequirementInt8 = 1 << 2,
230 RequirementInt16 = 1 << 3,
231 RequirementInt64 = 1 << 4,
232 };
233
234 enum Flags
235 {
236 FlagNone = 0,
237 FlagLayoutStd430 = 1 << 0,
238 FlagLayoutStd140 = 1 << 1,
239 FlagLayoutScalar = 1 << 2,
240 FlagFunction = 1 << 3,
241 FlagBarrier = 1 << 4,
242 };
243
244 enum LayoutFlags
245 {
246 LayoutNone = 0,
247
248 LayoutDefault = 1 << 0,
249 LayoutStd140 = 1 << 1,
250 LayoutStd430 = 1 << 2,
251 LayoutScalar = 1 << 3,
252 LayoutAll = LayoutDefault | LayoutStd140 | LayoutStd430 | LayoutScalar,
253
254 LayoutCount = 4,
255 };
256
257 enum Function
258 {
259 FunctionNone = 0,
260 FunctionRead,
261 FunctionWrite,
262 FunctionReadWrite,
263 FunctionCount,
264 };
265
266 enum Synchronization
267 {
268 SynchronizationNone = 0,
269 SynchronizationBarrier,
270 SynchronizationCount,
271 };
272
273 struct CaseDef
274 {
275 std::string extraTypes;
276
277 std::string writeDesc;
278 std::string writeType;
279 std::string writeValue;
280
281 std::string readDesc;
282 std::string readType;
283 std::string readValue;
284
285 LayoutFlags layout;
286 Function func;
287 Synchronization sync;
288 Requirements requirements;
289
CaseDefvkt::compute::__anon27878::AliasTest::CaseDef290 CaseDef (const std::string& extraTypes_,
291 const std::string& writeDesc_,
292 const std::string& writeType_,
293 const std::string& writeValue_,
294 const std::string& readDesc_,
295 const std::string& readType_,
296 const std::string& readValue_,
297 LayoutFlags layout_,
298 Function func_,
299 Synchronization sync_,
300 Requirements requirements_)
301 : extraTypes (extraTypes_)
302 , writeDesc (writeDesc_)
303 , writeType (writeType_)
304 , writeValue (writeValue_)
305 , readDesc (readDesc_)
306 , readType (readType_)
307 , readValue (readValue_)
308 , layout (layout_)
309 , func (func_)
310 , sync (sync_)
311 , requirements (requirements_)
312 {}
313
testNamevkt::compute::__anon27878::AliasTest::CaseDef314 std::string testName() const
315 {
316 std::string name = writeDesc + "_to_" + readDesc;
317
318 // In a valid test case, only one flag will be set.
319 switch (layout)
320 {
321 case LayoutDefault:
322 name += "_default";
323 break;
324 case LayoutStd140:
325 name += "_std140";
326 break;
327 case LayoutStd430:
328 name += "_std430";
329 break;
330 case LayoutScalar:
331 name += "_scalar";
332 break;
333 default:
334 DE_ASSERT(0);
335 break;
336 }
337
338 switch (func)
339 {
340 case FunctionNone:
341 break;
342 case FunctionRead:
343 name += "_func_read";
344 break;
345 case FunctionWrite:
346 name += "_func_write";
347 break;
348 case FunctionReadWrite:
349 name += "_func_read_write";
350 break;
351 default:
352 DE_ASSERT(0);
353 break;
354 }
355
356 switch (sync)
357 {
358 case SynchronizationNone:
359 break;
360 case SynchronizationBarrier:
361 name += "_barrier";
362 break;
363 default:
364 DE_ASSERT(0);
365 break;
366 }
367
368 return name;
369 }
370 };
371
AliasTest(tcu::TestContext& testCtx, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)372 AliasTest(tcu::TestContext& testCtx, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
373 : TestCase (testCtx, caseDef.testName())
374 , m_caseDef (caseDef)
375 , m_computePipelineConstructionType (computePipelineConstructionType)
376 {
377 }
378
379 virtual void checkSupport(Context& context) const;
380 void initPrograms(SourceCollections& sourceCollections) const;
381
382 class Instance : public vkt::TestInstance
383 {
384 public:
Instance(Context& context, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)385 Instance(Context& context, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
386 : TestInstance (context)
387 , m_caseDef (caseDef)
388 , m_computePipelineConstructionType (computePipelineConstructionType)
389 {
390 }
391
iterate(void)392 tcu::TestStatus iterate(void)
393 {
394 return runCompute(m_context, 1u, m_computePipelineConstructionType);
395 }
396
397 private:
398 CaseDef m_caseDef;
399 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
400 };
401
createInstance(Context& context) const402 TestInstance* createInstance(Context& context) const
403 {
404 return new Instance(context, m_caseDef, m_computePipelineConstructionType);
405 }
406
407 private:
408 CaseDef m_caseDef;
409 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
410 };
411
checkSupport(Context& context) const412 void AliasTest::checkSupport(Context& context) const
413 {
414 CheckSupportParams p;
415 deMemset(&p, 0, sizeof(p));
416
417 p.needsScalar = m_caseDef.layout == LayoutScalar;
418 p.needsInt8 = m_caseDef.requirements & RequirementInt8;
419 p.needsInt16 = m_caseDef.requirements & RequirementInt16;
420 p.needsInt64 = m_caseDef.requirements & RequirementInt64;
421 p.needsFloat16 = m_caseDef.requirements & RequirementFloat16;
422 p.needsFloat64 = m_caseDef.requirements & RequirementFloat64;
423 p.computePipelineConstructionType = m_computePipelineConstructionType;
424
425 checkSupportWithParams(context, p);
426 }
427
initPrograms(SourceCollections& sourceCollections) const428 void AliasTest::initPrograms(SourceCollections& sourceCollections) const
429 {
430 std::string layout;
431 switch (m_caseDef.layout)
432 {
433 case LayoutStd140:
434 layout = "layout(std140)";
435 break;
436 case LayoutStd430:
437 layout = "layout(std430)";
438 break;
439 case LayoutScalar:
440 layout = "layout(scalar)";
441 break;
442 default:
443 // No layout specified.
444 break;
445 }
446
447 std::ostringstream src;
448
449 src << "#version 450\n";
450 src << "#extension GL_EXT_shared_memory_block : enable\n";
451 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
452
453 if (m_caseDef.layout == LayoutScalar)
454 src << "#extension GL_EXT_scalar_block_layout : enable\n";
455
456 src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
457
458 if (!m_caseDef.extraTypes.empty())
459 src << m_caseDef.extraTypes << ";\n";
460
461 src << layout << "shared A { " << m_caseDef.writeType << "; } a;\n";
462 src << layout << "shared B { " << m_caseDef.readType << "; } b;\n";
463 src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
464
465 if (m_caseDef.func == FunctionRead ||
466 m_caseDef.func == FunctionReadWrite)
467 {
468 src << "void read(int index) {\n";
469 src << " if (b.v == " << m_caseDef.readValue << ")\n";
470 src << " result = index;\n";
471 src << "}\n";
472 }
473
474 if (m_caseDef.func == FunctionWrite ||
475 m_caseDef.func == FunctionReadWrite)
476 {
477 src << "void write(int index) {\n";
478 src << " if (index == 0)\n";
479 src << " a.v = " << m_caseDef.writeValue << ";\n";
480 src << "}\n";
481 }
482
483 src << "void main() {\n";
484 src << " int index = int(gl_LocalInvocationIndex);\n";
485
486 if (m_caseDef.func == FunctionWrite)
487 src << " write(index);\n";
488 else
489 src << " a.v = " << m_caseDef.writeValue << ";\n";
490
491 if (m_caseDef.sync == SynchronizationBarrier)
492 src << " barrier();\n";
493
494 if (m_caseDef.func == FunctionRead ||
495 m_caseDef.func == FunctionReadWrite)
496 {
497 src << " read(index);\n";
498 }
499 else
500 {
501 src << " if (b.v == " << m_caseDef.readValue << ")\n";
502 src << " result = index;\n";
503 }
504 src << "}\n";
505
506 deUint32 buildFlags =
507 m_caseDef.layout == LayoutScalar ? ShaderBuildOptions::FLAG_ALLOW_WORKGROUP_SCALAR_OFFSETS :
508 ShaderBuildOptions::Flags(0u);
509
510 sourceCollections.glslSources.add("comp")
511 << glu::ComputeSource(src.str())
512 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4, buildFlags, true);
513 }
514
makeArray(const std::string& type, const std::vector<deUint64>& values)515 std::string makeArray(const std::string& type, const std::vector<deUint64>& values)
516 {
517 std::ostringstream s;
518 s << type << "[](";
519 for (std::size_t i = 0; i < values.size(); i++)
520 {
521 s << type << "(" << std::to_string(values[i]) << ")";
522 if (i != values.size() - 1)
523 s << ", ";
524 }
525 s << ")";
526 return s.str();
527 }
528
makeU8Array(const std::vector<deUint64>& values)529 std::string makeU8Array(const std::vector<deUint64>& values)
530 {
531 return makeArray("uint8_t", values);
532 }
533
makeU16Array(const std::vector<deUint64>& values)534 std::string makeU16Array(const std::vector<deUint64>& values)
535 {
536 return makeArray("uint16_t", values);
537 }
538
makeU32Array(const std::vector<deUint64>& values)539 std::string makeU32Array(const std::vector<deUint64>& values)
540 {
541 return makeArray("uint32_t", values);
542 }
543
AddAliasTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)544 void AddAliasTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
545 {
546 const int DEFAULT = AliasTest::LayoutDefault;
547 const int STD140 = AliasTest::LayoutStd140;
548 const int STD430 = AliasTest::LayoutStd430;
549 const int SCALAR = AliasTest::LayoutScalar;
550 const int ALL = DEFAULT | STD140 | STD430 | SCALAR;
551
552 const int FLOAT16 = AliasTest::RequirementFloat16;
553 const int FLOAT64 = AliasTest::RequirementFloat64;
554 const int INT8 = AliasTest::RequirementInt8;
555 const int INT16 = AliasTest::RequirementInt16;
556 const int INT64 = AliasTest::RequirementInt64;
557
558 #define CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2) \
559 { E, D1, T1, V1, D2, T2, V2, AliasTest::LayoutFlags(L), AliasTest::FunctionNone, AliasTest::SynchronizationNone, AliasTest::Requirements(R) }
560
561 #define CASE_EXTRA_WITH_REVERSE(L, R, E, D1, T1, V1, D2, T2, V2) \
562 CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2), \
563 CASE_EXTRA(L, R, E, D2, T2, V2, D1, T1, V1)
564
565 #define CASE_WITH_REVERSE(L, R, D1, T1, V1, D2, T2, V2) CASE_EXTRA_WITH_REVERSE(L, R, "", D1, T1, V1, D2, T2, V2)
566 #define CASE_SAME_TYPE(R, D, T, V) CASE_EXTRA(ALL, R, "", D, T, V, D, T, V)
567 #define CASE(L, R, D1, T1, V1, D2, T2, V2) CASE_EXTRA(L, R, "", D1, T1, V1, D2, T2, V2)
568
569
570 const std::vector<AliasTest::CaseDef> cases
571 {
572 CASE_SAME_TYPE(0, "bool_true", "bool v", "true"),
573 CASE_SAME_TYPE(0, "bool_false", "bool v", "false"),
574 CASE_SAME_TYPE(0, "bvec2", "bvec2 v", "bvec2(false, true)"),
575 CASE_SAME_TYPE(0, "bvec3", "bvec3 v", "bvec3(false, true, true)"),
576 CASE_SAME_TYPE(0, "bvec4", "bvec4 v", "bvec4(false, true, true, false)"),
577 CASE_SAME_TYPE(INT8, "u8", "uint8_t v", "uint8_t(10)"),
578 CASE_SAME_TYPE(INT8, "u8vec2", "u8vec2 v", "u8vec2(10, 20)"),
579 CASE_SAME_TYPE(INT8, "u8vec3", "u8vec3 v", "u8vec3(10, 20, 30)"),
580 CASE_SAME_TYPE(INT8, "u8vec4", "u8vec4 v", "u8vec4(10, 20, 30, 40)"),
581 CASE_SAME_TYPE(INT8, "i8", "int8_t v", "int8_t(-10)"),
582 CASE_SAME_TYPE(INT8, "i8vec2", "i8vec2 v", "i8vec2(-10, 20)"),
583 CASE_SAME_TYPE(INT8, "i8vec3", "i8vec3 v", "i8vec3(-10, 20, -30)"),
584 CASE_SAME_TYPE(INT8, "i8vec4", "i8vec4 v", "i8vec4(-10, 20, -30, 40)"),
585 CASE_SAME_TYPE(INT16, "u16", "uint16_t v", "uint16_t(1000)"),
586 CASE_SAME_TYPE(INT16, "u16vec2", "u16vec2 v", "u16vec2(1000, 2000)"),
587 CASE_SAME_TYPE(INT16, "u16vec3", "u16vec3 v", "u16vec3(1000, 2000, 3000)"),
588 CASE_SAME_TYPE(INT16, "u16vec4", "u16vec4 v", "u16vec4(1000, 2000, 3000, 4000)"),
589 CASE_SAME_TYPE(INT16, "i16", "int16_t v", "int16_t(-1000)"),
590 CASE_SAME_TYPE(INT16, "i16vec2", "i16vec2 v", "i16vec2(-1000, 2000)"),
591 CASE_SAME_TYPE(INT16, "i16vec3", "i16vec3 v", "i16vec3(-1000, 2000, -3000)"),
592 CASE_SAME_TYPE(INT16, "i16vec4", "i16vec4 v", "i16vec4(-1000, 2000, -3000, 4000)"),
593 CASE_SAME_TYPE(0, "u32", "uint32_t v", "uint32_t(100)"),
594 CASE_SAME_TYPE(0, "uvec2", "uvec2 v", "uvec2(100, 200)"),
595 CASE_SAME_TYPE(0, "uvec3", "uvec3 v", "uvec3(100, 200, 300)"),
596 CASE_SAME_TYPE(0, "uvec4", "uvec4 v", "uvec4(100, 200, 300, 400)"),
597 CASE_SAME_TYPE(0, "i32", "int32_t v", "int32_t(-100)"),
598 CASE_SAME_TYPE(0, "ivec2", "ivec2 v", "ivec2(-100, 200)"),
599 CASE_SAME_TYPE(0, "ivec3", "ivec3 v", "ivec3(-100, 200, -300)"),
600 CASE_SAME_TYPE(0, "ivec4", "ivec4 v", "ivec4(-100, 200, -300, 400)"),
601 CASE_SAME_TYPE(INT64, "u64", "uint64_t v", "uint64_t(1000)"),
602 CASE_SAME_TYPE(INT64, "u64vec2", "u64vec2 v", "u64vec2(1000, 2000)"),
603 CASE_SAME_TYPE(INT64, "u64vec3", "u64vec3 v", "u64vec3(1000, 2000, 3000)"),
604 CASE_SAME_TYPE(INT64, "u64vec4", "u64vec4 v", "u64vec4(1000, 2000, 3000, 4000)"),
605 CASE_SAME_TYPE(INT64, "i64", "int64_t v", "int64_t(-1000)"),
606 CASE_SAME_TYPE(INT64, "i64vec2", "i64vec2 v", "i64vec2(-1000, 2000)"),
607 CASE_SAME_TYPE(INT64, "i64vec3", "i64vec3 v", "i64vec3(-1000, 2000, -3000)"),
608 CASE_SAME_TYPE(INT64, "i64vec4", "i64vec4 v", "i64vec4(-1000, 2000, -3000, 4000)"),
609 CASE_SAME_TYPE(FLOAT16, "f16", "float16_t v", "float16_t(-100.0)"),
610 CASE_SAME_TYPE(FLOAT16, "f16vec2", "f16vec2 v", "f16vec2(100.0, -200.0)"),
611 CASE_SAME_TYPE(FLOAT16, "f16vec3", "f16vec3 v", "f16vec3(100.0, -200.0, 300.0)"),
612 CASE_SAME_TYPE(FLOAT16, "f16vec4", "f16vec4 v", "f16vec4(100.0, -200.0, 300.0, -400.0)"),
613 CASE_SAME_TYPE(0, "f32", "float32_t v", "float32_t(-100.0)"),
614 CASE_SAME_TYPE(0, "f32vec2", "f32vec2 v", "f32vec2(100.0, -200.0)"),
615 CASE_SAME_TYPE(0, "f32vec3", "f32vec3 v", "f32vec3(100.0, -200.0, 300.0)"),
616 CASE_SAME_TYPE(0, "f32vec4", "f32vec4 v", "f32vec4(100.0, -200.0, 300.0, -400.0)"),
617 CASE_SAME_TYPE(FLOAT64, "f64", "float64_t v", "float32_t(-100.0)"),
618 CASE_SAME_TYPE(FLOAT64, "f64vec2", "f64vec2 v", "f64vec2(100.0, -200.0)"),
619 CASE_SAME_TYPE(FLOAT64, "f64vec3", "f64vec3 v", "f64vec3(100.0, -200.0, 300.0)"),
620 CASE_SAME_TYPE(FLOAT64, "f64vec4", "f64vec4 v", "f64vec4(100.0, -200.0, 300.0, -400.0)"),
621 CASE_SAME_TYPE(FLOAT16, "f16mat2x2", "f16mat2x2 v", "f16mat2x2(1, 2, 3, 4)"),
622 CASE_SAME_TYPE(FLOAT16, "f16mat2x3", "f16mat2x3 v", "f16mat2x3(1, 2, 3, 4, 5, 6)"),
623 CASE_SAME_TYPE(FLOAT16, "f16mat2x4", "f16mat2x4 v", "f16mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
624 CASE_SAME_TYPE(FLOAT16, "f16mat3x2", "f16mat3x2 v", "f16mat3x2(1, 2, 3, 4, 5, 6)"),
625 CASE_SAME_TYPE(FLOAT16, "f16mat3x3", "f16mat3x3 v", "f16mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
626 CASE_SAME_TYPE(FLOAT16, "f16mat3x4", "f16mat3x4 v", "f16mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
627 CASE_SAME_TYPE(FLOAT16, "f16mat4x2", "f16mat4x2 v", "f16mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
628 CASE_SAME_TYPE(FLOAT16, "f16mat4x3", "f16mat4x3 v", "f16mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
629 CASE_SAME_TYPE(FLOAT16, "f16mat4x4", "f16mat4x4 v", "f16mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
630 CASE_SAME_TYPE(0, "f32mat2x2", "f32mat2x2 v", "f32mat2x2(1, 2, 3, 4)"),
631 CASE_SAME_TYPE(0, "f32mat2x3", "f32mat2x3 v", "f32mat2x3(1, 2, 3, 4, 5, 6)"),
632 CASE_SAME_TYPE(0, "f32mat2x4", "f32mat2x4 v", "f32mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
633 CASE_SAME_TYPE(0, "f32mat3x2", "f32mat3x2 v", "f32mat3x2(1, 2, 3, 4, 5, 6)"),
634 CASE_SAME_TYPE(0, "f32mat3x3", "f32mat3x3 v", "f32mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
635 CASE_SAME_TYPE(0, "f32mat3x4", "f32mat3x4 v", "f32mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
636 CASE_SAME_TYPE(0, "f32mat4x2", "f32mat4x2 v", "f32mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
637 CASE_SAME_TYPE(0, "f32mat4x3", "f32mat4x3 v", "f32mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
638 CASE_SAME_TYPE(0, "f32mat4x4", "f32mat4x4 v", "f32mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
639 CASE_SAME_TYPE(FLOAT64, "f64mat2x2", "f64mat2x2 v", "f64mat2x2(1, 2, 3, 4)"),
640 CASE_SAME_TYPE(FLOAT64, "f64mat2x3", "f64mat2x3 v", "f64mat2x3(1, 2, 3, 4, 5, 6)"),
641 CASE_SAME_TYPE(FLOAT64, "f64mat2x4", "f64mat2x4 v", "f64mat2x4(1, 2, 3, 4, 5, 6, 7, 8)"),
642 CASE_SAME_TYPE(FLOAT64, "f64mat3x2", "f64mat3x2 v", "f64mat3x2(1, 2, 3, 4, 5, 6)"),
643 CASE_SAME_TYPE(FLOAT64, "f64mat3x3", "f64mat3x3 v", "f64mat3x3(1, 2, 3, 4, 5, 6, 7, 8, 9)"),
644 CASE_SAME_TYPE(FLOAT64, "f64mat3x4", "f64mat3x4 v", "f64mat3x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
645 CASE_SAME_TYPE(FLOAT64, "f64mat4x2", "f64mat4x2 v", "f64mat4x2(1, 2, 3, 4, 5, 6, 7, 8)"),
646 CASE_SAME_TYPE(FLOAT64, "f64mat4x3", "f64mat4x3 v", "f64mat4x3(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)"),
647 CASE_SAME_TYPE(FLOAT64, "f64mat4x4", "f64mat4x4 v", "f64mat4x4(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)"),
648
649 CASE_WITH_REVERSE(ALL, INT8,
650 "i8", "int8_t v", "int8_t(-2)",
651 "u8", "uint8_t v", "uint8_t(0xFE)"),
652 CASE_WITH_REVERSE(ALL, INT16,
653 "i16", "int16_t v", "int16_t(-2)",
654 "u16", "uint16_t v", "uint16_t(0xFFFE)"),
655 CASE_WITH_REVERSE(ALL, 0,
656 "i32", "int32_t v", "int32_t(-2)",
657 "u32", "uint32_t v", "uint32_t(0xFFFFFFFE)"),
658 CASE_WITH_REVERSE(ALL, INT64,
659 "i64", "int64_t v", "int64_t(-2UL)",
660 "u64", "uint64_t v", "uint64_t(0xFFFFFFFFFFFFFFFEUL)"),
661 CASE_WITH_REVERSE(ALL, FLOAT16 | INT16,
662 "f16", "float16_t v", "float16_t(1.0)",
663 "u16", "uint16_t v", "uint16_t(0x3C00)"),
664 CASE_WITH_REVERSE(ALL, 0,
665 "f32", "float32_t v", "float32_t(1.0)",
666 "u32", "uint32_t v", "uint32_t(0x3F800000)"),
667 CASE_WITH_REVERSE(ALL, FLOAT64 | INT64,
668 "f64", "float64_t v", "float64_t(1.0)",
669 "u64", "uint64_t v", "uint64_t(0x3FF0000000000000UL)"),
670
671 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8,
672 "u16", "uint16_t v", "uint16_t(0x1234)",
673 "u8_array", "uint8_t v[2]", makeU8Array({0x34, 0x12})),
674 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
675 "u32", "uint32_t v", "uint32_t(0x12345678)",
676 "u8_array", "uint8_t v[4]", makeU8Array({0x78, 0x56, 0x34, 0x12})),
677 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
678 "u32", "uint32_t v", "uint32_t(0x12345678)",
679 "u16_array", "uint16_t v[2]", makeU16Array({0x5678, 0x1234})),
680 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT8,
681 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
682 "u8_array", "uint8_t v[8]", makeU8Array({0xEF, 0xCD, 0xAB, 0x90, 0x78, 0x56, 0x34, 0x12})),
683 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16,
684 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
685 "u16_array", "uint16_t v[4]", makeU16Array({0xCDEF, 0x90AB, 0x5678, 0x1234})),
686 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64,
687 "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
688 "u32_array", "uint32_t v[2]", makeU32Array({0x90ABCDEF, 0x12345678})),
689 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8,
690 "i16", "int16_t v", "int16_t(-2)",
691 "u8_array", "uint8_t v[2]", makeU8Array({0xFE, 0xFF})),
692 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
693 "i32", "int32_t v", "int32_t(-2)",
694 "u8_array", "uint8_t v[4]", makeU8Array({0xFE, 0xFF, 0xFF, 0xFF})),
695 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
696 "i32", "int32_t v", "int32_t(-2)",
697 "u16_array", "uint16_t v[2]", makeU16Array({0xFFFE, 0xFFFF})),
698 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT8,
699 "i64", "int64_t v", "int64_t(-2UL)",
700 "u8_array", "uint8_t v[8]", makeU8Array({0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF})),
701 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16,
702 "i64", "int64_t v", "int64_t(-2UL)",
703 "u16_array", "uint16_t v[4]", makeU16Array({0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF})),
704 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64,
705 "i64", "int64_t v", "int64_t(-2UL)",
706 "u32_array", "uint32_t v[2]", makeU32Array({0xFFFFFFFE, 0xFFFFFFFF})),
707 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT16 | INT8,
708 "f16", "float16_t v", "float16_t(1.0)",
709 "u8_array", "uint8_t v[2]", makeU8Array({0x00, 0x3C})),
710 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8,
711 "f32", "float32_t v", "float32_t(1.0)",
712 "u8_array", "uint8_t v[4]", makeU8Array({0x00, 0x00, 0x80, 0x3F})),
713 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16,
714 "f32", "float32_t v", "float32_t(1.0)",
715 "u16_array", "uint16_t v[2]", makeU16Array({0x0000, 0x3F80})),
716 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT8,
717 "f64", "float64_t v", "float64_t(1.0)",
718 "u8_array", "uint8_t v[8]", makeU8Array({0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F})),
719 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT16,
720 "f64", "float64_t v", "float64_t(1.0)",
721 "u16_array", "uint16_t v[4]", makeU16Array({0x0000, 0x0000, 0x0000, 0x3FF0})),
722 CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64,
723 "f64", "float64_t v", "float64_t(1.0)",
724 "u32_array", "uint32_t v[2]", makeU32Array({0x00000000, 0x3FF00000})),
725
726 CASE(DEFAULT | STD430, 0,
727 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
728 "vec2_array", "vec2 v[6]", "vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
729 CASE(STD140, 0,
730 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 999, 999), vec4(2, 2, 999, 999), vec4(3, 3, 999, 999))",
731 "vec2_array", "vec2 v[3]", "vec2[](vec2(1), vec2(2), vec2(3))"),
732 CASE(SCALAR, 0,
733 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
734 "vec2_array", "vec2 v[6]", "vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
735
736 CASE(DEFAULT | STD430, 0,
737 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
738 "vec3_array", "vec3 v[3]", "vec3[](vec3(1), vec3(2), vec3(3))"),
739 CASE(STD140, 0,
740 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
741 "vec3_array", "vec3 v[3]", "vec3[](vec3(1), vec3(2), vec3(3))"),
742 CASE(SCALAR, 0,
743 "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 2), vec4(2, 2, 3, 3), vec4(3, 4, 4, 4))",
744 "vec3_array", "vec3 v[4]", "vec3[](vec3(1), vec3(2), vec3(3), vec3(4))"),
745
746 CASE_EXTRA(DEFAULT | STD430 | SCALAR, INT8,
747 "struct s { int a; int b; }",
748 "u8_array", "uint8_t v[8]", makeU8Array({2, 0, 0, 0, 0xFE, 0xFF, 0xFF, 0xFF}),
749 "struct_int_int", "s v", "s(2, -2)"),
750 CASE_EXTRA(ALL, 0,
751 "struct s { int a; int b; }",
752 "uvec2", "uvec2 v", "uvec2(2, 0xFFFFFFFE)",
753 "struct_int_int", "s v", "s(2, -2)"),
754 };
755
756 #undef CASE_EXTRA
757 #undef CASE_EXTRA_WITH_REVERSE
758 #undef CASE_WITH_REVERSE
759 #undef CASE_SAME_TYPE
760 #undef CASE
761
762 for (deUint32 i = 0; i < cases.size(); i++)
763 {
764 for (int syncIndex = 0; syncIndex < AliasTest::SynchronizationCount; syncIndex++)
765 {
766 const AliasTest::Synchronization sync = AliasTest::Synchronization(syncIndex);
767
768 for (int funcIndex = 0; funcIndex < AliasTest::FunctionCount; funcIndex++)
769 {
770 const AliasTest::Function func = AliasTest::Function(funcIndex);
771
772 for (int layoutIndex = 0; layoutIndex < AliasTest::LayoutCount; layoutIndex++)
773 {
774 const AliasTest::LayoutFlags layout = AliasTest::LayoutFlags(1 << layoutIndex);
775
776 AliasTest::CaseDef c = cases[i];
777
778 if (c.writeDesc == c.readDesc)
779 continue;
780
781 if ((c.layout & layout) == 0)
782 continue;
783
784 c.layout = layout;
785 c.func = func;
786 c.sync = sync;
787
788 group->addChild(new AliasTest(group->getTestContext(), c, computePipelineConstructionType));
789 }
790 }
791 }
792 }
793 }
794
795 class ZeroTest : public vkt::TestCase
796 {
797 public:
798 struct CaseDef
799 {
800 glu::DataType zeroElementType;
801 glu::DataType fieldType[2];
802 deUint32 elements;
803
testNamevkt::compute::__anon27878::ZeroTest::CaseDef804 std::string testName() const
805 {
806 std::string name = glu::getDataTypeName(zeroElementType);
807 name += "_array_to";
808
809 for (deUint32 i = 0; i < DE_LENGTH_OF_ARRAY(fieldType); ++i)
810 {
811 if (fieldType[i] == glu::TYPE_INVALID)
812 break;
813 name += "_";
814 name += glu::getDataTypeName(fieldType[i]);
815 }
816 name += "_array_" + de::toString(elements);
817 return name;
818 }
819 };
820
ZeroTest(tcu::TestContext& testCtx, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)821 ZeroTest(tcu::TestContext& testCtx, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
822 : TestCase (testCtx, caseDef.testName())
823 , m_caseDef (caseDef)
824 , m_computePipelineConstructionType (computePipelineConstructionType)
825 {
826 }
827
828 virtual void checkSupport(Context& context) const;
829 void initPrograms(SourceCollections& sourceCollections) const;
830
831 class Instance : public vkt::TestInstance
832 {
833 public:
Instance(Context& context, const vk::ComputePipelineConstructionType computePipelineConstructionType)834 Instance(Context& context, const vk::ComputePipelineConstructionType computePipelineConstructionType)
835 : TestInstance (context)
836 , m_computePipelineConstructionType (computePipelineConstructionType)
837 {
838 }
839
iterate(void)840 tcu::TestStatus iterate(void)
841 {
842 return runCompute(m_context, 1u, m_computePipelineConstructionType);
843 }
844 private:
845 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
846 };
847
createInstance(Context& context) const848 TestInstance* createInstance(Context& context) const
849 {
850 return new Instance(context, m_computePipelineConstructionType);
851 }
852
853 private:
854 CaseDef m_caseDef;
855 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
856 };
857
checkSupport(Context& context) const858 void ZeroTest::checkSupport(Context& context) const
859 {
860 CheckSupportParams p;
861 deMemset(&p, 0, sizeof(p));
862
863 DE_ASSERT(!glu::isDataTypeFloat16OrVec(m_caseDef.zeroElementType));
864
865 p.useType(m_caseDef.zeroElementType);
866 p.useType(m_caseDef.fieldType[0]);
867 p.useType(m_caseDef.fieldType[1]);
868 p.computePipelineConstructionType = m_computePipelineConstructionType;
869
870 checkSupportWithParams(context, p);
871 }
872
getDataTypeLiteral(glu::DataType dt, std::string baseValue)873 std::string getDataTypeLiteral(glu::DataType dt, std::string baseValue)
874 {
875 using namespace glu;
876
877 if (isDataTypeVector(dt))
878 {
879 std::string elemValue = getDataTypeLiteral(getDataTypeScalarType(dt), baseValue);
880
881 std::ostringstream result;
882 result << getDataTypeName(dt) << "(";
883 for (int i = 0; i < getDataTypeScalarSize(dt); ++i)
884 {
885 if (i > 0)
886 result << ", ";
887 result << elemValue;
888 }
889 result << ")";
890 return result.str();
891 }
892 else if (isDataTypeScalar(dt))
893 {
894 return getDataTypeName(dt) + std::string("(") + baseValue + std::string(")");
895 }
896 else
897 {
898 DE_ASSERT(0);
899 return std::string();
900 }
901 }
902
initPrograms(SourceCollections& sourceCollections) const903 void ZeroTest::initPrograms(SourceCollections& sourceCollections) const
904 {
905 using namespace glu;
906
907 std::ostringstream src;
908
909 src << "#version 450\n"
910 << "#extension GL_EXT_shared_memory_block : enable\n"
911 << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
912 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
913
914 // Large enough to cover the largest B block even if just 8-bit elements.
915 // Small enough to fit in the minimum shared memory size limit even if with uvec4.
916 src << "shared A { " << getDataTypeName(m_caseDef.zeroElementType) << " arr[256]; } zero;\n";
917
918 src << "struct st {\n"
919 << " " << getDataTypeName(m_caseDef.fieldType[0]) << " x;\n";
920 if (m_caseDef.fieldType[1])
921 src << " " << getDataTypeName(m_caseDef.fieldType[1]) << " y;\n";
922 src << "};\n";
923
924
925 src << "shared B { st arr[4]; };\n"
926 << "layout(set = 0, binding = 0) buffer Result { uint result; };\n"
927 << "void main() {\n"
928 << "for (int i = 0; i < zero.arr.length(); i++) {\n"
929 << " zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "1") << ";\n"
930 << " }\n"
931 << " for (int i = 0; i < zero.arr.length(); i++) {\n"
932 << " zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "0") << ";\n"
933 << " }\n"
934 << " result = (\n";
935
936 for (deUint32 i = 0; i < 4; i++)
937 {
938 src << " ";
939 if (i > 0)
940 src << "&& ";
941 src << "(arr[" << de::toString(i) << "].x == " << getDataTypeLiteral(m_caseDef.fieldType[0], "0") << ")\n";
942 if (m_caseDef.fieldType[1])
943 src << " && (arr[" << de::toString(i) << "].y == " << getDataTypeLiteral(m_caseDef.fieldType[1], "0") << ")\n";
944 }
945
946 src << " ) ? 0 : 0xFF;\n"
947 << "}\n";
948
949 sourceCollections.glslSources.add("comp")
950 << ComputeSource(src.str())
951 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
952 vk::ShaderBuildOptions::Flags(0u), true);
953 }
954
isTestedZeroElementType(glu::DataType dt)955 bool isTestedZeroElementType(glu::DataType dt)
956 {
957 using namespace glu;
958
959 // Select only a few interesting types.
960 switch (dt)
961 {
962 case TYPE_UINT:
963 case TYPE_UINT_VEC4:
964 case TYPE_UINT8:
965 case TYPE_UINT8_VEC4:
966 case TYPE_UINT16:
967 return true;
968 default:
969 return false;
970 }
971 }
972
isTestedFieldType(glu::DataType dt)973 bool isTestedFieldType(glu::DataType dt)
974 {
975 using namespace glu;
976
977 // Select only a few interesting types.
978 switch (dt)
979 {
980 case TYPE_UINT:
981 case TYPE_UINT_VEC3:
982 case TYPE_UINT8:
983 case TYPE_UINT16:
984 case TYPE_FLOAT:
985 case TYPE_FLOAT_VEC4:
986 case TYPE_FLOAT16:
987 case TYPE_DOUBLE:
988 case TYPE_DOUBLE_VEC4:
989 case TYPE_BOOL:
990 return true;
991
992 default:
993 return false;
994 }
995 }
996
AddZeroTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)997 void AddZeroTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
998 {
999 using namespace glu;
1000
1001 ZeroTest::CaseDef c;
1002
1003 for (deUint32 i = 0; i < TYPE_LAST; ++i)
1004 {
1005 c.zeroElementType = DataType(i);
1006
1007 if (isTestedZeroElementType(c.zeroElementType))
1008 {
1009 deUint32 idx[2] = { 0, 0 };
1010
1011 while (idx[1] < TYPE_LAST && idx[0] < TYPE_LAST)
1012 {
1013 c.fieldType[0] = DataType(idx[0]);
1014 c.fieldType[1] = DataType(idx[1]);
1015
1016 if (isTestedFieldType(c.fieldType[0]) &&
1017 (c.fieldType[1] == TYPE_INVALID || isTestedFieldType(c.fieldType[1])))
1018 {
1019 for (deUint32 elements = 1; elements <= 4; ++elements)
1020 {
1021 c.elements = elements;
1022 group->addChild(new ZeroTest(group->getTestContext(), c, computePipelineConstructionType));
1023 }
1024 }
1025
1026 idx[0]++;
1027 if (idx[0] >= TYPE_LAST)
1028 {
1029 idx[1]++;
1030 idx[0] = 0;
1031 }
1032 }
1033 }
1034 }
1035 }
1036
1037 class PaddingTest : public vkt::TestCase
1038 {
1039 public:
1040 struct CaseDef
1041 {
1042 std::vector<glu::DataType> types;
1043 std::vector<deUint32> offsets;
1044 std::vector<std::string> values;
1045 deUint32 expected[32];
1046
testNamevkt::compute::__anon27878::PaddingTest::CaseDef1047 std::string testName() const
1048 {
1049 DE_ASSERT(types.size() > 0);
1050 DE_ASSERT(types.size() == offsets.size());
1051 DE_ASSERT(types.size() == values.size());
1052
1053 std::string name;
1054 for (deUint32 i = 0; i < types.size(); ++i)
1055 {
1056 if (i > 0)
1057 name += "_";
1058 name += glu::getDataTypeName(types[i]);
1059 name += "_" + de::toString(offsets[i]);
1060 }
1061 return name;
1062 }
1063
addvkt::compute::__anon27878::PaddingTest::CaseDef1064 void add(glu::DataType dt, deUint32 offset, const std::string& v)
1065 {
1066 types.push_back(dt);
1067 offsets.push_back(offset);
1068 values.push_back(v);
1069 }
1070
needsScalarvkt::compute::__anon27878::PaddingTest::CaseDef1071 bool needsScalar() const
1072 {
1073 for (deUint32 i = 0; i < offsets.size(); ++i)
1074 {
1075 if (offsets[i] % 4 != 0)
1076 return true;
1077 }
1078 return false;
1079 }
1080 };
1081
PaddingTest(tcu::TestContext& testCtx, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)1082 PaddingTest(tcu::TestContext& testCtx, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1083 : TestCase (testCtx, caseDef.testName())
1084 , m_caseDef (caseDef)
1085 , m_computePipelineConstructionType (computePipelineConstructionType)
1086 {
1087 }
1088
1089 virtual void checkSupport(Context& context) const;
1090 void initPrograms(SourceCollections& sourceCollections) const;
1091
1092 class Instance : public vkt::TestInstance
1093 {
1094 public:
Instance(Context& context, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)1095 Instance(Context& context, const CaseDef& caseDef, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1096 : TestInstance (context)
1097 , m_caseDef (caseDef)
1098 , m_computePipelineConstructionType (computePipelineConstructionType)
1099 {
1100 }
1101
iterate(void)1102 tcu::TestStatus iterate(void)
1103 {
1104 return runCompute(m_context, 1u, m_computePipelineConstructionType);
1105 }
1106
1107 private:
1108 CaseDef m_caseDef;
1109 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1110 };
1111
createInstance(Context& context) const1112 TestInstance* createInstance(Context& context) const
1113 {
1114 return new Instance(context, m_caseDef, m_computePipelineConstructionType);
1115 }
1116
1117 private:
1118 CaseDef m_caseDef;
1119 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1120 };
1121
checkSupport(Context& context) const1122 void PaddingTest::checkSupport(Context& context) const
1123 {
1124 CheckSupportParams p;
1125 deMemset(&p, 0, sizeof(p));
1126
1127 for (deUint32 i = 0; i < m_caseDef.types.size(); ++i)
1128 p.useType(m_caseDef.types[i]);
1129
1130 p.needsScalar = m_caseDef.needsScalar();
1131 p.computePipelineConstructionType = m_computePipelineConstructionType;
1132
1133 checkSupportWithParams(context, p);
1134 }
1135
initPrograms(SourceCollections& sourceCollections) const1136 void PaddingTest::initPrograms(SourceCollections& sourceCollections) const
1137 {
1138 using namespace glu;
1139
1140 std::ostringstream src;
1141
1142 src << "#version 450\n"
1143 << "#extension GL_EXT_shared_memory_block : enable\n"
1144 << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
1145 << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
1146
1147 src << "shared A { uint32_t words[32]; };\n";
1148
1149 if (m_caseDef.needsScalar())
1150 {
1151 src << "#extension GL_EXT_scalar_block_layout : enable\n"
1152 << "layout (scalar) ";
1153 }
1154
1155 src << "shared B {\n";
1156
1157 for (deUint32 i = 0; i < m_caseDef.types.size(); ++i)
1158 {
1159 src << " layout(offset = " << m_caseDef.offsets[i] << ") "
1160 << glu::getDataTypeName(m_caseDef.types[i]) << " x" << i << ";\n";
1161 }
1162
1163 src << "};\n"
1164 << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1165
1166 src << "void main() {\n"
1167 << "for (int i = 0; i < 32; i++) words[i] = 0;\n";
1168
1169 for (deUint32 i = 0; i < m_caseDef.values.size(); ++i)
1170 src << "x" << i << " = " << m_caseDef.values[i] << ";\n";
1171
1172 src << "result = 32;\n";
1173 for (deUint32 i = 0; i < 32; ++i)
1174 {
1175 src << "if (words[" << std::dec << i << "] == 0x"
1176 << std::uppercase << std::hex << m_caseDef.expected[i]
1177 << ") result--;\n";
1178 }
1179
1180 src << "}\n";
1181
1182 sourceCollections.glslSources.add("comp")
1183 << ComputeSource(src.str())
1184 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1185 vk::ShaderBuildOptions::Flags(0u), true);
1186 }
1187
AddPaddingTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)1188 void AddPaddingTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1189 {
1190 using namespace glu;
1191
1192 for (deUint32 i = 0; i < 31; ++i)
1193 {
1194 for (deUint32 j = i + 1; j < 32; j += 4)
1195 {
1196 PaddingTest::CaseDef c;
1197 deMemset(&c, 0, sizeof(c));
1198
1199 c.add(TYPE_UINT, 4 * i, "0x1234");
1200 c.expected[i] = 0x1234;
1201
1202 c.add(TYPE_UINT, 4 * j, "0x5678");
1203 c.expected[j] = 0x5678;
1204
1205 group->addChild(new PaddingTest(group->getTestContext(), c, computePipelineConstructionType));
1206 }
1207 }
1208
1209 for (deUint32 i = 0; i < 127; ++i)
1210 {
1211 for (deUint32 j = i + 1; j < 32; j += 16)
1212 {
1213 PaddingTest::CaseDef c;
1214 deMemset(&c, 0, sizeof(c));
1215
1216 deUint8* expected = reinterpret_cast<deUint8*>(c.expected);
1217
1218 c.add(TYPE_UINT8, i, "uint8_t(0xAA)");
1219 expected[i] = 0xAA;
1220
1221 c.add(TYPE_UINT8, j, "uint8_t(0xBB)");
1222 expected[j] = 0xBB;
1223
1224 group->addChild(new PaddingTest(group->getTestContext(), c, computePipelineConstructionType));
1225 }
1226 }
1227 }
1228
1229 class SizeTest : public vkt::TestCase
1230 {
1231 public:
SizeTest(tcu::TestContext& testCtx, deUint32 size, const vk::ComputePipelineConstructionType computePipelineConstructionType)1232 SizeTest(tcu::TestContext& testCtx, deUint32 size, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1233 : TestCase (testCtx, de::toString(size))
1234 , m_size (size)
1235 , m_computePipelineConstructionType (computePipelineConstructionType)
1236 {
1237 DE_ASSERT(size % 8 == 0);
1238 }
1239
1240 virtual void checkSupport(Context& context) const;
1241 void initPrograms(SourceCollections& sourceCollections) const;
1242
1243 class Instance : public vkt::TestInstance
1244 {
1245 public:
Instance(Context& context, const vk::ComputePipelineConstructionType computePipelineConstructionType)1246 Instance(Context& context, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1247 : TestInstance (context)
1248 , m_computePipelineConstructionType (computePipelineConstructionType)
1249 {
1250 }
1251
iterate(void)1252 tcu::TestStatus iterate(void)
1253 {
1254 return runCompute(m_context, 1u, m_computePipelineConstructionType);
1255 }
1256 private:
1257 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1258 };
1259
createInstance(Context& context) const1260 TestInstance* createInstance(Context& context) const
1261 {
1262 return new Instance(context, m_computePipelineConstructionType);
1263 }
1264
1265 private:
1266 deUint32 m_size;
1267 vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1268 };
1269
checkSupport(Context& context) const1270 void SizeTest::checkSupport(Context& context) const
1271 {
1272 context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
1273 context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
1274
1275 if (context.getDeviceProperties().limits.maxComputeSharedMemorySize < m_size)
1276 TCU_THROW(NotSupportedError, "Not enough shared memory supported.");
1277
1278 checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_computePipelineConstructionType);
1279 }
1280
initPrograms(SourceCollections& sourceCollections) const1281 void SizeTest::initPrograms(SourceCollections& sourceCollections) const
1282 {
1283 using namespace glu;
1284
1285 std::ostringstream src;
1286
1287 src << "#version 450\n";
1288 src << "#extension GL_EXT_shared_memory_block : enable\n";
1289 src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
1290 src << "layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in;\n";
1291
1292 for (deUint32 i = 0; i < 8; ++i)
1293 src << "shared B" << i << " { uint32_t words[" << (m_size / 4) << "]; } b" << i << ";\n";
1294
1295 src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1296
1297 src << "void main() {\n";
1298 src << " int index = int(gl_LocalInvocationIndex);\n";
1299 src << " int size = " << (m_size / 4) << ";\n";
1300
1301 src << " if (index == 0) for (int x = 0; x < size; x++) b0.words[x] = 0xFFFF;\n";
1302 src << " barrier();\n";
1303
1304 src << " for (int x = 0; x < size; x++) {\n";
1305 src << " if (x % 8 != index) continue;\n";
1306 for (deUint32 i = 0; i < 8; ++i)
1307 src << " if (index == " << i << ") b" << i << ".words[x] = (x << 3) | " << i << ";\n";
1308 src << " }\n";
1309
1310 src << " barrier();\n";
1311 src << " if (index != 0) return;\n";
1312
1313 src << " int r = size;\n";
1314 src << " for (int x = 0; x < size; x++) {\n";
1315 src << " int expected = (x << 3) | (x % 8);\n";
1316 src << " if (b0.words[x] == expected) r--;\n";
1317 src << " }\n";
1318 src << " result = r;\n";
1319 src << "}\n";
1320
1321 sourceCollections.glslSources.add("comp")
1322 << ComputeSource(src.str())
1323 << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1324 vk::ShaderBuildOptions::Flags(0u), true);
1325 }
1326
AddSizeTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)1327 void AddSizeTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1328 {
1329 deUint32 sizes[] =
1330 {
1331 8u,
1332 64u,
1333 4096u,
1334
1335 // Dynamic generation of shaders based on properties reported
1336 // by devices is not allowed in the CTS, so let's create a few
1337 // variants based on common known maximum sizes.
1338 16384u,
1339 32768u,
1340 49152u,
1341 65536u,
1342 };
1343
1344 for (deUint32 i = 0; i < DE_LENGTH_OF_ARRAY(sizes); ++i)
1345 group->addChild(new SizeTest(group->getTestContext(), sizes[i], computePipelineConstructionType));
1346 }
1347
CreateAmberTestCase(tcu::TestContext& testCtx, const char* name, const std::string& filename, const std::vector<std::string>& requirements = std::vector<std::string>(), bool zeroinit = false, bool shaderObjects = false)1348 cts_amber::AmberTestCase* CreateAmberTestCase(tcu::TestContext& testCtx,
1349 const char* name,
1350 const std::string& filename,
1351 const std::vector<std::string>& requirements = std::vector<std::string>(),
1352 bool zeroinit = false,
1353 bool shaderObjects = false)
1354 {
1355 vk::SpirVAsmBuildOptions asm_options(VK_MAKE_API_VERSION(0, 1, 1, 0), vk::SPIRV_VERSION_1_4);
1356 asm_options.supports_VK_KHR_spirv_1_4 = true;
1357
1358 const std::string test_filename = shaderObjects ? "shader_object_" + std::string(filename) : filename;
1359
1360 cts_amber::AmberTestCase *t = cts_amber::createAmberTestCase(testCtx, name, "compute/workgroup_memory_explicit_layout", test_filename.c_str(), requirements);
1361 t->setSpirVAsmBuildOptions(asm_options);
1362 t->addRequirement("VK_KHR_workgroup_memory_explicit_layout");
1363 t->addRequirement("VK_KHR_spirv_1_4");
1364 if (zeroinit)
1365 {
1366 t->addRequirement("VK_KHR_zero_initialize_workgroup_memory");
1367 }
1368 if (shaderObjects)
1369 {
1370 t->addRequirement("VK_EXT_shader_object");
1371 }
1372 return t;
1373 }
1374
AddCopyMemoryTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType pipelineConstructionType)1375 void AddCopyMemoryTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType pipelineConstructionType)
1376 {
1377 tcu::TestContext& testCtx = group->getTestContext();
1378
1379 bool shaderObject = (pipelineConstructionType == COMPUTE_PIPELINE_CONSTRUCTION_TYPE_SHADER_OBJECT_SPIRV) || (pipelineConstructionType == COMPUTE_PIPELINE_CONSTRUCTION_TYPE_SHADER_OBJECT_BINARY);
1380
1381 group->addChild(CreateAmberTestCase(testCtx, "basic", "copy_memory_basic.amber", {}, false, shaderObject));
1382 group->addChild(CreateAmberTestCase(testCtx, "two_invocations", "copy_memory_two_invocations.amber", {}, false, shaderObject));
1383 group->addChild(CreateAmberTestCase(testCtx, "variable_pointers", "copy_memory_variable_pointers.amber",
1384 { "VariablePointerFeatures.variablePointers" }, false, shaderObject));
1385 }
1386
AddZeroInitializeExtensionTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType pipelineConstructionType)1387 void AddZeroInitializeExtensionTests(tcu::TestCaseGroup* group, vk::ComputePipelineConstructionType pipelineConstructionType)
1388 {
1389 tcu::TestContext& testCtx = group->getTestContext();
1390
1391 bool shaderObject = (pipelineConstructionType == COMPUTE_PIPELINE_CONSTRUCTION_TYPE_SHADER_OBJECT_SPIRV) || (pipelineConstructionType == COMPUTE_PIPELINE_CONSTRUCTION_TYPE_SHADER_OBJECT_BINARY);
1392
1393 group->addChild(CreateAmberTestCase(testCtx, "block", "zero_ext_block.amber", std::vector<std::string>(), true, shaderObject));
1394 group->addChild(CreateAmberTestCase(testCtx, "other_block", "zero_ext_other_block.amber", std::vector<std::string>(), true, shaderObject));
1395 group->addChild(CreateAmberTestCase(testCtx, "block_with_offset", "zero_ext_block_with_offset.amber", std::vector<std::string>(), true, shaderObject));
1396 }
1397
1398 } // anonymous
1399
createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)1400 tcu::TestCaseGroup* createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext& testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
1401 {
1402 de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "workgroup_memory_explicit_layout"));
1403
1404 // Aliasing between different blocks and types
1405 tcu::TestCaseGroup* alias = new tcu::TestCaseGroup(testCtx, "alias");
1406 AddAliasTests(alias, computePipelineConstructionType);
1407 tests->addChild(alias);
1408
1409 tcu::TestCaseGroup* zero = new tcu::TestCaseGroup(testCtx, "zero", "Manually zero initialize a block and read from another");
1410 AddZeroTests(zero, computePipelineConstructionType);
1411 tests->addChild(zero);
1412
1413 tcu::TestCaseGroup* padding = new tcu::TestCaseGroup(testCtx, "padding", "Padding as part of the explicit layout");
1414 AddPaddingTests(padding, computePipelineConstructionType);
1415 tests->addChild(padding);
1416
1417 tcu::TestCaseGroup* size = new tcu::TestCaseGroup(testCtx, "size", "Test blocks of various sizes");
1418 AddSizeTests(size, computePipelineConstructionType);
1419 tests->addChild(size);
1420
1421 tcu::TestCaseGroup* copy_memory = new tcu::TestCaseGroup(testCtx, "copy_memory", "Test OpCopyMemory with Workgroup memory");
1422 AddCopyMemoryTests(copy_memory, computePipelineConstructionType);
1423 tests->addChild(copy_memory);
1424
1425 tcu::TestCaseGroup* zero_ext = new tcu::TestCaseGroup(testCtx, "zero_ext", "Test interaction with VK_KHR_zero_initialize_workgroup_memory");
1426 AddZeroInitializeExtensionTests(zero_ext, computePipelineConstructionType);
1427 tests->addChild(zero_ext);
1428
1429 return tests.release();
1430 }
1431
1432 } // compute
1433 } // vkt
1434