1/*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 The Khronos Group Inc.
6 * Copyright (c) 2021 Valve Corporation.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief Mesh Shader Synchronization Tests for VK_EXT_mesh_shader
23 *//*--------------------------------------------------------------------*/
24
25#include "vktMeshShaderSyncTestsEXT.hpp"
26#include "vktMeshShaderUtil.hpp"
27#include "vktTestCase.hpp"
28
29#include "vkDefs.hpp"
30#include "vkTypeUtil.hpp"
31#include "vkImageWithMemory.hpp"
32#include "vkBufferWithMemory.hpp"
33#include "vkObjUtil.hpp"
34#include "vkBuilderUtil.hpp"
35#include "vkCmdUtil.hpp"
36#include "vkBarrierUtil.hpp"
37#include "vkImageUtil.hpp"
38
39#include "deUniquePtr.hpp"
40
41#include <iostream>
42#include <sstream>
43#include <vector>
44#include <set>
45
46namespace vkt
47{
48namespace MeshShader
49{
50
51namespace
52{
53
54using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
55
56using namespace vk;
57
58// Stages that will be used in these tests. Shader stages sorted in pipeline order.
59enum class Stage
60{
61	HOST = 0,
62	TRANSFER,
63	TASK,
64	MESH,
65	FRAG,
66};
67
68std::ostream& operator<< (std::ostream& stream, Stage stage)
69{
70	switch (stage)
71	{
72	case Stage::HOST:		stream << "host";		break;
73	case Stage::TRANSFER:	stream << "transfer";	break;
74	case Stage::TASK:		stream << "task";		break;
75	case Stage::MESH:		stream << "mesh";		break;
76	case Stage::FRAG:		stream << "frag";		break;
77	default: DE_ASSERT(false); break;
78	}
79
80	return stream;
81}
82
83bool isShaderStage (Stage stage)
84{
85	return (stage == Stage::TASK || stage == Stage::MESH || stage == Stage::FRAG);
86}
87
88VkPipelineStageFlags stageToFlags (Stage stage)
89{
90	switch (stage)
91	{
92	case Stage::HOST:		return VK_PIPELINE_STAGE_HOST_BIT;
93	case Stage::TRANSFER:	return VK_PIPELINE_STAGE_TRANSFER_BIT;
94	case Stage::TASK:		return VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT;
95	case Stage::MESH:		return VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT;
96	case Stage::FRAG:		return VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
97	default:				DE_ASSERT(false); break;
98	}
99
100	// Unreachable.
101	DE_ASSERT(false);
102	return 0u;
103}
104
105VkFormat getImageFormat ()
106{
107	return VK_FORMAT_R32_UINT;
108}
109
110VkExtent3D getImageExtent ()
111{
112	return makeExtent3D(1u, 1u, 1u);
113}
114
115// Types of resources we will use.
116enum class ResourceType
117{
118	UNIFORM_BUFFER = 0,
119	STORAGE_BUFFER,
120	STORAGE_IMAGE,
121	SAMPLED_IMAGE,
122};
123
124VkDescriptorType resourceTypeToDescriptor (ResourceType resType)
125{
126	switch (resType)
127	{
128	case ResourceType::UNIFORM_BUFFER:	return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
129	case ResourceType::STORAGE_BUFFER:	return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
130	case ResourceType::STORAGE_IMAGE:	return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
131	case ResourceType::SAMPLED_IMAGE:	return VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
132	default:							DE_ASSERT(false); break;
133	}
134
135	// Unreachable.
136	DE_ASSERT(false);
137	return VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
138}
139
140// Will the test use a specific barrier or a general memory barrier?
141enum class BarrierType
142{
143	GENERAL = 0,
144	SPECIFIC,
145	DEPENDENCY,
146};
147
148// Types of writes we will use.
149enum class WriteAccess
150{
151	HOST_WRITE = 0,
152	TRANSFER_WRITE,
153	SHADER_WRITE,
154};
155
156VkAccessFlags writeAccessToFlags (WriteAccess access)
157{
158	switch (access)
159	{
160	case WriteAccess::HOST_WRITE:		return VK_ACCESS_HOST_WRITE_BIT;
161	case WriteAccess::TRANSFER_WRITE:	return VK_ACCESS_TRANSFER_WRITE_BIT;
162	case WriteAccess::SHADER_WRITE:		return VK_ACCESS_SHADER_WRITE_BIT;
163	default:							DE_ASSERT(false); break;
164	}
165
166	// Unreachable.
167	DE_ASSERT(false);
168	return 0u;
169}
170
171// Types of reads we will use.
172enum class ReadAccess
173{
174	HOST_READ = 0,
175	TRANSFER_READ,
176	SHADER_READ,
177	UNIFORM_READ,
178};
179
180VkAccessFlags readAccessToFlags (ReadAccess access)
181{
182	switch (access)
183	{
184	case ReadAccess::HOST_READ:			return VK_ACCESS_HOST_READ_BIT;
185	case ReadAccess::TRANSFER_READ:		return VK_ACCESS_TRANSFER_READ_BIT;
186	case ReadAccess::SHADER_READ:		return VK_ACCESS_SHADER_READ_BIT;
187	case ReadAccess::UNIFORM_READ:		return VK_ACCESS_UNIFORM_READ_BIT;
188	default:							DE_ASSERT(false); break;
189	}
190
191	// Unreachable.
192	DE_ASSERT(false);
193	return 0u;
194}
195
196// Auxiliary functions to verify certain combinations are possible.
197
198// Check if the writing stage can use the specified write access.
199bool canWriteFromStageAsAccess (Stage writeStage, WriteAccess access)
200{
201	switch (writeStage)
202	{
203	case Stage::HOST:		return (access == WriteAccess::HOST_WRITE);
204	case Stage::TRANSFER:	return (access == WriteAccess::TRANSFER_WRITE);
205	case Stage::TASK:		// fallthrough
206	case Stage::MESH:		// fallthrough
207	case Stage::FRAG:		return (access == WriteAccess::SHADER_WRITE);
208	default:				DE_ASSERT(false); break;
209	}
210
211	return false;
212}
213
214// Check if the reading stage can use the specified read access.
215bool canReadFromStageAsAccess (Stage readStage, ReadAccess access)
216{
217	switch (readStage)
218	{
219	case Stage::HOST:		return (access == ReadAccess::HOST_READ);
220	case Stage::TRANSFER:	return (access == ReadAccess::TRANSFER_READ);
221	case Stage::TASK:		// fallthrough
222	case Stage::MESH:		// fallthrough
223	case Stage::FRAG:		return (access == ReadAccess::SHADER_READ || access == ReadAccess::UNIFORM_READ);
224	default:				DE_ASSERT(false); break;
225	}
226
227	return false;
228}
229
230// Check if reading the given resource type is possible with the given type of read access.
231bool canReadResourceAsAccess (ResourceType resType, ReadAccess access)
232{
233	if (access == ReadAccess::UNIFORM_READ)
234		return (resType == ResourceType::UNIFORM_BUFFER);
235	return true;
236}
237
238// Check if writing to the given resource type is possible with the given type of write access.
239bool canWriteResourceAsAccess (ResourceType resType, WriteAccess access)
240{
241	if (resType == ResourceType::UNIFORM_BUFFER)
242		return (access != WriteAccess::SHADER_WRITE);
243	return true;
244}
245
246// Check if the given stage can write to the given resource type.
247bool canWriteTo (Stage stage, ResourceType resType)
248{
249	switch (stage)
250	{
251	case Stage::HOST:		return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
252	case Stage::TRANSFER:	return true;
253	case Stage::TASK:		// fallthrough
254	case Stage::MESH:		// fallthrough
255	case Stage::FRAG:		return (resType == ResourceType::STORAGE_BUFFER || resType == ResourceType::STORAGE_IMAGE);
256	default:				DE_ASSERT(false); break;
257	}
258
259	return false;
260}
261
262// Check if the given stage can read from the given resource type.
263bool canReadFrom (Stage stage, ResourceType resType)
264{
265	switch (stage)
266	{
267	case Stage::HOST:		return (resType == ResourceType::UNIFORM_BUFFER || resType == ResourceType::STORAGE_BUFFER);
268	case Stage::TRANSFER:	// fallthrough
269	case Stage::TASK:		// fallthrough
270	case Stage::MESH:		// fallthrough
271	case Stage::FRAG:		return true;
272	default:				DE_ASSERT(false); break;
273	}
274
275	return false;
276}
277
278// Will we need to store the test value in an auxiliar buffer to be read?
279bool needsAuxiliarSourceBuffer (Stage fromStage, Stage toStage)
280{
281	DE_UNREF(toStage);
282	return (fromStage == Stage::TRANSFER);
283}
284
285// Will we need to store the read operation result into an auxiliar buffer to be checked?
286bool needsAuxiliarDestBuffer (Stage fromStage, Stage toStage)
287{
288	DE_UNREF(fromStage);
289	return (toStage == Stage::TRANSFER);
290}
291
292// Needs any auxiliar buffer for any case?
293bool needsAuxiliarBuffer (Stage fromStage, Stage toStage)
294{
295	return (needsAuxiliarSourceBuffer(fromStage, toStage) || needsAuxiliarDestBuffer(fromStage, toStage));
296}
297
298// Will the final value be stored in the auxiliar destination buffer?
299bool valueInAuxiliarDestBuffer (Stage toStage)
300{
301	return (toStage == Stage::TRANSFER);
302}
303
304// Will the final value be stored in the resource buffer itself?
305bool valueInResourceBuffer (Stage toStage)
306{
307	return (toStage == Stage::HOST);
308}
309
310// Will the final value be stored in the color buffer?
311bool valueInColorBuffer (Stage toStage)
312{
313	return (!valueInAuxiliarDestBuffer(toStage) && !valueInResourceBuffer(toStage));
314}
315
316// Image usage flags for the image resource.
317VkImageUsageFlags resourceImageUsageFlags (ResourceType resourceType)
318{
319	VkImageUsageFlags flags = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT);
320
321	switch (resourceType)
322	{
323	case ResourceType::STORAGE_IMAGE:	flags |= VK_IMAGE_USAGE_STORAGE_BIT;	break;
324	case ResourceType::SAMPLED_IMAGE:	flags |= VK_IMAGE_USAGE_SAMPLED_BIT;	break;
325	default: DE_ASSERT(false); break;
326	}
327
328	return flags;
329}
330
331// Buffer usage flags for the buffer resource.
332VkBufferUsageFlags resourceBufferUsageFlags (ResourceType resourceType)
333{
334	VkBufferUsageFlags flags = (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
335
336	switch (resourceType)
337	{
338	case ResourceType::UNIFORM_BUFFER:	flags |= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;	break;
339	case ResourceType::STORAGE_BUFFER:	flags |= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;	break;
340	default: DE_ASSERT(false); break;
341	}
342
343	return flags;
344}
345
346// Returns true if both the write and read stages are shader stages.
347bool fromShaderToShader (Stage fromStage, Stage toStage)
348{
349	return (isShaderStage(fromStage) && isShaderStage(toStage));
350}
351
352// Supposing we'll use two subpasses, decide the stages of a subpass based on the mandatory stages and the one we're interested in.
353std::vector<Stage> subpassStages (Stage wantedStage, bool lastSubpass)
354{
355	std::set<Stage> stages;
356	stages.insert(wantedStage);
357	stages.insert(Stage::MESH);		// This one is mandatory.
358	if (lastSubpass)
359		stages.insert(Stage::FRAG);	// In the last subpass we always need a fragment shader (passthrough).
360	return std::vector<Stage>(begin(stages), end(stages));
361}
362
363// Is the task shader in the list?
364bool hasTask (const std::vector<Stage>& stages)
365{
366	return de::contains(begin(stages), end(stages), Stage::TASK);
367}
368
369// Is the frag shader in the list?
370bool hasFrag (const std::vector<Stage>& stages)
371{
372	return de::contains(begin(stages), end(stages), Stage::FRAG);
373}
374
375struct TestParams
376{
377	Stage			fromStage;
378	Stage			toStage;
379	ResourceType	resourceType;
380	BarrierType		barrierType;
381	WriteAccess		writeAccess;
382	ReadAccess		readAccess;
383	uint32_t		testValue;
384
385protected:
386	bool readsOrWritesIn (Stage stage) const
387	{
388		DE_ASSERT(fromStage != toStage);
389		return (fromStage == stage || toStage == stage);
390	}
391
392public:
393	bool needsTask () const
394	{
395		return readsOrWritesIn(Stage::TASK);
396	}
397
398	bool readsOrWritesInMesh () const
399	{
400		return readsOrWritesIn(Stage::MESH);
401	}
402
403	std::string getResourceDecl () const
404	{
405		const auto			imgFormat		= ((resourceType == ResourceType::STORAGE_IMAGE) ? ", r32ui" : "");
406		const auto			storagePrefix	= ((writeAccess == WriteAccess::SHADER_WRITE) ? "" : "readonly ");
407		std::ostringstream	decl;
408
409		decl << "layout (set=0, binding=0" << imgFormat << ") ";
410		switch (resourceType)
411		{
412		case ResourceType::UNIFORM_BUFFER:	decl << "uniform UniformBuffer { uint value; } ub;";					break;
413		case ResourceType::STORAGE_BUFFER:	decl << storagePrefix << "buffer StorageBuffer { uint value; } sb;";	break;
414		case ResourceType::STORAGE_IMAGE:	decl << storagePrefix << "uniform uimage2D si;";						break;
415		case ResourceType::SAMPLED_IMAGE:	decl << "uniform usampler2D sampled;";									break;
416		default:							DE_ASSERT(false);														break;
417		}
418
419		decl << "\n";
420		return decl.str();
421	}
422
423	struct PushConstantStruct
424	{
425		uint32_t writeVal;
426		uint32_t readVal;
427	};
428
429	// Get declaration for the "pc" push constant block. Must match the structure above.
430	std::string getPushConstantDecl () const
431	{
432		std::ostringstream pc;
433		pc
434			<< "layout (push_constant, std430) uniform PushConstantBlock {\n"
435			<< "    uint writeVal;\n"
436			<< "    uint readVal;\n"
437			<< "} pc;\n"
438			;
439		return pc.str();
440	}
441
442	std::string getReadStatement (const std::string& outName) const
443	{
444		std::ostringstream statement;
445		statement << "    if (pc.readVal > 0u) { " << outName << " = ";
446
447		switch (resourceType)
448		{
449		case ResourceType::UNIFORM_BUFFER:	statement << "ub.value";							break;
450		case ResourceType::STORAGE_BUFFER:	statement << "sb.value";							break;
451		case ResourceType::STORAGE_IMAGE:	statement << "imageLoad(si, ivec2(0, 0)).x";		break;
452		case ResourceType::SAMPLED_IMAGE:	statement << "texture(sampled, vec2(0.5, 0.5)).x";	break;
453		default:							DE_ASSERT(false); break;
454		}
455
456		statement << "; }\n";
457		return statement.str();
458	}
459
460	std::string getWriteStatement (const std::string& valueName) const
461	{
462		std::ostringstream statement;
463		statement << "    if (pc.writeVal > 0u) { ";
464
465		switch (resourceType)
466		{
467		case ResourceType::STORAGE_BUFFER:	statement << "sb.value = " << valueName;											break;
468		case ResourceType::STORAGE_IMAGE:	statement << "imageStore(si, ivec2(0, 0), uvec4(" << valueName << ", 0, 0, 0))";	break;
469		case ResourceType::UNIFORM_BUFFER:	// fallthrough
470		case ResourceType::SAMPLED_IMAGE:	// fallthrough
471		default:							DE_ASSERT(false); break;
472		}
473
474		statement << "; }\n";
475		return statement.str();
476	}
477
478	VkShaderStageFlags getResourceShaderStages () const
479	{
480		VkShaderStageFlags flags = 0u;
481
482		if (fromStage == Stage::TASK || toStage == Stage::TASK)	flags |= VK_SHADER_STAGE_TASK_BIT_EXT;
483		if (fromStage == Stage::MESH || toStage == Stage::MESH)	flags |= VK_SHADER_STAGE_MESH_BIT_EXT;
484		if (fromStage == Stage::FRAG || toStage == Stage::FRAG)	flags |= VK_SHADER_STAGE_FRAGMENT_BIT;
485
486		// We assume at least something must be done either on the task or mesh shaders for the tests to be interesting.
487		DE_ASSERT((flags & (VK_SHADER_STAGE_TASK_BIT_EXT | VK_SHADER_STAGE_MESH_BIT_EXT)) != 0u);
488		return flags;
489	}
490
491	// We'll prefer to keep the image in the general layout if it will be written to from a shader stage or if the barrier is going to be a generic memory barrier.
492	bool preferGeneralLayout () const
493	{
494		return (isShaderStage(fromStage) || (barrierType == BarrierType::GENERAL) || (resourceType == ResourceType::STORAGE_IMAGE));
495	}
496
497	// We need two pipelines if both the writing and reading stage are shaders, and either:
498	// - The writing stage comes after the reading stage in the pipeline.
499	// - The barrier to use is not a dependency.
500	bool needsTwoPipelines () const
501	{
502		return (fromShaderToShader(fromStage, toStage) &&
503				(static_cast<int>(fromStage) >= static_cast<int>(toStage) ||
504				barrierType != BarrierType::DEPENDENCY));
505	}
506
507	// We need to use generic barriers when using subpass self-dependencies (single subpass and pipeline).
508	// Note: barrierType == BarrierType::DEPENDENCY is technically redundant with !needsTwoPipelines().
509	bool subpassSelfDependency () const
510	{
511		return (fromShaderToShader(fromStage, toStage) && barrierType == BarrierType::DEPENDENCY && !needsTwoPipelines());
512	}
513
514};
515
516class MeshShaderSyncCase : public vkt::TestCase
517{
518public:
519					MeshShaderSyncCase		(tcu::TestContext& testCtx, const std::string& name, const TestParams& params)
520						: vkt::TestCase (testCtx, name), m_params (params)
521						{}
522
523	virtual			~MeshShaderSyncCase		(void) {}
524
525	void			checkSupport			(Context& context) const override;
526	void			initPrograms			(vk::SourceCollections& programCollection) const override;
527	TestInstance*	createInstance			(Context& context) const override;
528
529protected:
530	TestParams		m_params;
531};
532
533class MeshShaderSyncInstance : public vkt::TestInstance
534{
535public:
536						MeshShaderSyncInstance	(Context& context, const TestParams& params) : vkt::TestInstance(context), m_params(params) {}
537	virtual				~MeshShaderSyncInstance	(void) {}
538
539	tcu::TestStatus		iterate					(void) override;
540
541protected:
542	TestParams			m_params;
543};
544
545void MeshShaderSyncCase::checkSupport (Context& context) const
546{
547	checkTaskMeshShaderSupportEXT(context, m_params.needsTask(), true);
548
549	if (m_params.writeAccess == WriteAccess::SHADER_WRITE)
550	{
551		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
552	}
553}
554
555void MeshShaderSyncCase::initPrograms (vk::SourceCollections& programCollection) const
556{
557	const auto			buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
558	const bool			needsTaskShader	= m_params.needsTask();
559	const auto			valueStr		= de::toString(m_params.testValue);
560	const auto			resourceDecl	= m_params.getResourceDecl();
561	const auto			pcDecl			= m_params.getPushConstantDecl();
562	const std::string	tdDecl			= "struct TaskData { uint value; }; taskPayloadSharedEXT TaskData td;\n";
563
564	if (needsTaskShader)
565	{
566		std::ostringstream task;
567		task
568			<< "#version 450\n"
569			<< "#extension GL_EXT_mesh_shader : enable\n"
570			<< "\n"
571			<< "layout(local_size_x=1) in;\n"
572			<< "\n"
573			<< tdDecl
574			<< "\n"
575			<< resourceDecl
576			<< pcDecl
577			<< "\n"
578			<< "void main ()\n"
579			<< "{\n"
580			<< "    td.value = 0u;\n"
581			<< ((m_params.fromStage == Stage::TASK)	? m_params.getWriteStatement(valueStr)	: "")
582			<< ((m_params.toStage == Stage::TASK)	? m_params.getReadStatement("td.value")	: "")
583			<< "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
584			<< "}\n"
585			;
586		programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
587	}
588
589	{
590		// In the mesh-to-task case, we need non-passthrough mesh and task shaders but the mesh shader doesn't have a previous task shader.
591		// In the task-to-mesh case, the second pipeline will have the main mesh shader but no previous task shader either.
592		const bool prevTaskInMainMesh	= (needsTaskShader
593											&& !(m_params.fromStage == Stage::MESH && m_params.toStage == Stage::TASK)
594											&& !(m_params.fromStage == Stage::TASK && m_params.toStage == Stage::MESH));
595		const bool rwInMeshStage		= m_params.readsOrWritesInMesh();
596
597		std::ostringstream mesh;
598		mesh
599			<< "#version 450\n"
600			<< "#extension GL_EXT_mesh_shader : enable\n"
601			<< "\n"
602			<< "layout(local_size_x=1) in;\n"
603			<< "layout(triangles) out;\n"
604			<< "layout(max_vertices=3, max_primitives=1) out;\n"
605			<< "\n"
606			<< (prevTaskInMainMesh ? tdDecl : "")
607			<< "layout (location=0) out perprimitiveEXT uint primitiveValue[];\n"
608			<< "\n"
609			<< (rwInMeshStage ? resourceDecl : "")
610			<< (rwInMeshStage ? pcDecl : "")
611			<< "\n"
612			<< "void main ()\n"
613			<< "{\n"
614			<< "    SetMeshOutputsEXT(3u, 1u);\n"
615			<< (prevTaskInMainMesh ? "    primitiveValue[0] = td.value;\n" : "")
616			<< ((m_params.fromStage == Stage::MESH)	? m_params.getWriteStatement(valueStr)				: "")
617			<< ((m_params.toStage == Stage::MESH)	? m_params.getReadStatement("primitiveValue[0]")	: "")
618			<< "\n"
619			<< "    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
620			<< "    gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
621			<< "    gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
622			<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
623			<< "}\n"
624			;
625		programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
626	}
627
628	{
629		const bool			readFromFrag	= (m_params.toStage == Stage::FRAG);
630		const bool			writeFromFrag	= (m_params.fromStage == Stage::FRAG);
631		const bool			rwInFragStage	= (readFromFrag || writeFromFrag);
632		std::ostringstream	frag;
633
634		frag
635			<< "#version 450\n"
636			<< "#extension GL_EXT_mesh_shader : enable\n"
637			<< "\n"
638			<< "layout (location=0) in perprimitiveEXT flat uint primitiveValue;\n"
639			<< "layout (location=0) out uvec4 outColor;\n"
640			<< "\n"
641			<< (rwInFragStage ? resourceDecl : "")
642			<< (rwInFragStage ? pcDecl : "")
643			<< "\n"
644			<< "void main ()\n"
645			<< "{\n"
646			<< "    outColor = uvec4(primitiveValue, 0, 0, 0);\n"
647			<< "    uint readVal = 0u;\n"
648			<< (readFromFrag ?	m_params.getReadStatement("readVal")	: "")
649			<< (readFromFrag ?	"    outColor = uvec4(readVal, 0, 0, 0);\n"		: "")
650			<< (writeFromFrag ?	m_params.getWriteStatement(valueStr)			: "")
651			<< "}\n"
652			;
653		programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
654	}
655
656	// Passthrough shaders.
657	{
658		const std::string task =
659			"#version 450\n"
660			"#extension GL_EXT_mesh_shader : enable\n"
661			"\n"
662			"layout(local_size_x=1) in;\n"
663			"\n"
664			+ tdDecl +
665			"\n"
666			"void main ()\n"
667			"{\n"
668			"    td.value = 0u;\n"
669			"    EmitMeshTasksEXT(1u, 1u, 1u);\n"
670			"}\n"
671			;
672		programCollection.glslSources.add("taskPassthrough") << glu::TaskSource(task) << buildOptions;
673
674		const std::string frag =
675			"#version 450\n"
676			"#extension GL_EXT_mesh_shader : enable\n"
677			"\n"
678			"layout (location=0) in perprimitiveEXT flat uint primitiveValue;\n"
679			"layout (location=0) out uvec4 outColor;\n"
680			"\n"
681			"void main ()\n"
682			"{\n"
683			"	outColor = uvec4(primitiveValue, 0, 0, 0);\n"
684			"}\n"
685			;
686		programCollection.glslSources.add("fragPassthrough") << glu::FragmentSource(frag) << buildOptions;
687
688		for (int i = 0; i < 2; ++i)
689		{
690			const bool			prevTask	= (i > 0);
691			const std::string	nameSuffix	= (prevTask ? "WithTask" : "");
692			const std::string	mesh		=
693				"#version 450\n"
694				"#extension GL_EXT_mesh_shader : enable\n"
695				"\n"
696				"layout(local_size_x=1) in;\n"
697				"layout(triangles) out;\n"
698				"layout(max_vertices=3, max_primitives=1) out;\n"
699				"\n"
700				+ (prevTask ? tdDecl : "") +
701				"layout (location=0) out perprimitiveEXT uint primitiveValue[];\n"
702				"\n"
703				"void main ()\n"
704				"{\n"
705				"    SetMeshOutputsEXT(3u, 1u);\n"
706				"    " + (prevTask ? "primitiveValue[0] = td.value;" : "primitiveValue[0] = 0u;") + "\n"
707				"\n"
708				"    gl_MeshVerticesEXT[0].gl_Position = vec4(-1.0, -1.0, 0.0, 1.0);\n"
709				"    gl_MeshVerticesEXT[1].gl_Position = vec4(-1.0,  3.0, 0.0, 1.0);\n"
710				"    gl_MeshVerticesEXT[2].gl_Position = vec4( 3.0, -1.0, 0.0, 1.0);\n"
711				"    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
712				"}\n"
713				;
714			programCollection.glslSources.add("meshPassthrough" + nameSuffix) << glu::MeshSource(mesh) << buildOptions;
715		}
716	}
717}
718
719TestInstance* MeshShaderSyncCase::createInstance (Context& context) const
720{
721	return new MeshShaderSyncInstance(context, m_params);
722}
723
724// General description behind these tests.
725//
726//	From				To
727//	==============================
728//	HOST				TASK			Prepare buffer from host. Only valid for uniform and storage buffers. Read value from task into td.value. Verify color buffer.
729//	HOST				MESH			Same situation. Read value from mesh into primitiveValue[0]. Verify color buffer.
730//	TRANSFER			TASK			Prepare auxiliary host-coherent source buffer from host. Copy buffer to buffer or buffer to image. Read from task into td.value. Verify color buffer.
731//	TRANSFER			MESH			Same initial steps. Read from mesh into primitiveValue[0]. Verify color buffer.
732//	TASK				MESH			Write value to buffer or image from task shader. Only valid for storage buffers and images. Read from mesh into primitiveValue[0]. Verify color buffer.
733//	TASK				FRAG			Same write procedure and restrictions. Read from frag into outColor. Verify color buffer.
734//	TASK				TRANSFER		Same write procedure and restrictions. Prepare auxiliary host-coherent read buffer and copy buffer to buffer or image to buffer. Verify auxiliary buffer.
735//	TASK				HOST			Due to From/To restrictions, only valid for storage buffers. Same write procedure. Read and verify buffer directly.
736//	MESH				FRAG			Same as task to frag but the write instructions need to be in the mesh shader.
737//	MESH				TRANSFER		Same as task to transfer but the write instructions need to be in the mesh shader.
738//	MESH				HOST			Same as task to host but the write instructions need to be in the mesh shader.
739//
740//	The following cases require two pipelines
741//	=========================================
742//	MESH				TASK			Write value to buffer or image from mesh shader. Only valid for storage buffers and images. Read from task into td.value. Verify color buffer.
743//		Sequence: mesh, task, mesh*, frag*.
744//	FRAG				TASK			Same as mesh to task, but writing from the first fragment shader.
745//		Sequence: mesh*, frag, task, mesh*, frag*.
746//	FRAG				MESH			Similar to frag to task, but reading from mesh into primitiveValue[0]. Verify color buffer after second fragment shader.
747//		Sequence: mesh*, frag, mesh, frag*.
748//
749
750// Create one or two render passes with the right dependencies depending on the test parameters.
751std::vector<Move<VkRenderPass>> createCustomRenderPasses (const DeviceInterface& vkd, VkDevice device, VkFormat colorFormat, const TestParams& params)
752{
753	std::vector<Move<VkRenderPass>>	renderPasses;
754	const bool						useDependencies	= (params.barrierType == BarrierType::DEPENDENCY);
755	const bool						twoPipelines	= params.needsTwoPipelines();
756	const bool						twoSubpasses	= (twoPipelines && useDependencies);
757	const uint32_t					pipelineCount	= (twoPipelines ? 2u : 1u);
758	const uint32_t					subpassCount	= (twoSubpasses ? 2u : 1u);
759	const uint32_t					renderPassCount	= ((twoPipelines && !twoSubpasses) ? 2u : 1u);
760
761	const std::vector<VkAttachmentDescription> attachmentDescs =
762	{
763		{
764			0u,											//	VkAttachmentDescriptionFlags	flags;
765			colorFormat,								//	VkFormat						format;
766			VK_SAMPLE_COUNT_1_BIT,						//	VkSampleCountFlagBits			samples;
767			VK_ATTACHMENT_LOAD_OP_CLEAR,				//	VkAttachmentLoadOp				loadOp;
768			VK_ATTACHMENT_STORE_OP_STORE,				//	VkAttachmentStoreOp				storeOp;
769			VK_ATTACHMENT_LOAD_OP_DONT_CARE,			//	VkAttachmentLoadOp				stencilLoadOp;
770			VK_ATTACHMENT_STORE_OP_DONT_CARE,			//	VkAttachmentStoreOp				stencilStoreOp;
771			VK_IMAGE_LAYOUT_UNDEFINED,					//	VkImageLayout					initialLayout;
772			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	//	VkImageLayout					finalLayout;
773		}
774	};
775
776	const std::vector<VkAttachmentReference> attachmentRefs = { { 0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL } };
777
778	// One or two identical subpasses.
779	const VkSubpassDescription subpassDesc =
780	{
781		0u,												//	VkSubpassDescriptionFlags		flags;
782		VK_PIPELINE_BIND_POINT_GRAPHICS,				//	VkPipelineBindPoint				pipelineBindPoint;
783		0u,												//	uint32_t						inputAttachmentCount;
784		nullptr,										//	const VkAttachmentReference*	pInputAttachments;
785		static_cast<uint32_t>(attachmentRefs.size()),	//	uint32_t						colorAttachmentCount;
786		de::dataOrNull(attachmentRefs),					//	const VkAttachmentReference*	pColorAttachments;
787		nullptr,										//	const VkAttachmentReference*	pResolveAttachments;
788		nullptr,										//	const VkAttachmentReference*	pDepthStencilAttachment;
789		0u,												//	uint32_t						preserveAttachmentCount;
790		nullptr,										//	const uint32_t*					pPreserveAttachments;
791	};
792
793	const std::vector<VkSubpassDescription> subpassDescs (subpassCount, subpassDesc);
794
795	std::vector<VkSubpassDependency> dependencies;
796	if (fromShaderToShader(params.fromStage, params.toStage) && useDependencies)
797	{
798		const VkSubpassDependency dependency =
799		{
800			0u,											//	uint32_t				srcSubpass;
801			pipelineCount - 1u,							//	uint32_t				dstSubpass;
802			stageToFlags(params.fromStage),				//	VkPipelineStageFlags	srcStageMask;
803			stageToFlags(params.toStage),				//	VkPipelineStageFlags	dstStageMask;
804			writeAccessToFlags(params.writeAccess),		//	VkAccessFlags			srcAccessMask;
805			readAccessToFlags(params.readAccess),		//	VkAccessFlags			dstAccessMask;
806			0u,											//	VkDependencyFlags		dependencyFlags;
807		};
808		dependencies.push_back(dependency);
809	}
810
811	const VkRenderPassCreateInfo createInfo =
812	{
813		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,		//	VkStructureType					sType;
814		nullptr,										//	const void*						pNext;
815		0u,												//	VkRenderPassCreateFlags			flags;
816		static_cast<uint32_t>(attachmentDescs.size()),	//	uint32_t						attachmentCount;
817		de::dataOrNull(attachmentDescs),				//	const VkAttachmentDescription*	pAttachments;
818		static_cast<uint32_t>(subpassDescs.size()),		//	uint32_t						subpassCount;
819		de::dataOrNull(subpassDescs),					//	const VkSubpassDescription*		pSubpasses;
820		static_cast<uint32_t>(dependencies.size()),		//	uint32_t						dependencyCount;
821		de::dataOrNull(dependencies),					//	const VkSubpassDependency*		pDependencies;
822	};
823
824	for (uint32_t renderPassIdx = 0u; renderPassIdx < renderPassCount; ++renderPassIdx)
825		renderPasses.push_back(createRenderPass(vkd, device, &createInfo));
826
827	return renderPasses;
828}
829
830void hostToTransferMemoryBarrier (const DeviceInterface& vkd, VkCommandBuffer cmdBuffer)
831{
832	const auto barrier = makeMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT);
833	cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &barrier);
834}
835
836void transferToHostMemoryBarrier (const DeviceInterface& vkd, VkCommandBuffer cmdBuffer)
837{
838	const auto barrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
839	cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &barrier);
840}
841
842tcu::TestStatus MeshShaderSyncInstance::iterate (void)
843{
844	const auto&	vkd						= m_context.getDeviceInterface();
845	const auto	device					= m_context.getDevice();
846	auto&		alloc					= m_context.getDefaultAllocator();
847	const auto	queueIndex				= m_context.getUniversalQueueFamilyIndex();
848	const auto	queue					= m_context.getUniversalQueue();
849
850	const auto	imageFormat				= getImageFormat();
851	const auto	imageExtent				= getImageExtent();
852	const auto	colorBufferUsage		= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
853	const auto	colorSRR				= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
854	const auto	colorSRL				= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
855	const auto	bufferSize				= static_cast<VkDeviceSize>(sizeof(m_params.testValue));
856	const auto	descriptorType			= resourceTypeToDescriptor(m_params.resourceType);
857	const auto	resourceStages			= m_params.getResourceShaderStages();
858	const auto	auxiliarBufferUsage		= (VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT);
859	const auto	useGeneralLayout		= m_params.preferGeneralLayout();
860
861	const auto	writeAccessFlags		= writeAccessToFlags(m_params.writeAccess);
862	const auto	readAccessFlags			= readAccessToFlags(m_params.readAccess);
863	const auto	fromStageFlags			= stageToFlags(m_params.fromStage);
864	const auto	toStageFlags			= stageToFlags(m_params.toStage);
865
866	// Prepare color buffer.
867	const VkImageCreateInfo colorBufferCreateInfo =
868	{
869		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
870		nullptr,								//	const void*				pNext;
871		0u,										//	VkImageCreateFlags		flags;
872		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
873		imageFormat,							//	VkFormat				format;
874		imageExtent,							//	VkExtent3D				extent;
875		1u,										//	uint32_t				mipLevels;
876		1u,										//	uint32_t				arrayLayers;
877		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
878		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
879		colorBufferUsage,						//	VkImageUsageFlags		usage;
880		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
881		0u,										//	uint32_t				queueFamilyIndexCount;
882		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
883		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
884	};
885	ImageWithMemory	colorBuffer		(vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
886	const auto		colorBufferView	= makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
887
888	// Main resource.
889	using ImageWithMemoryPtr	= de::MovePtr<ImageWithMemory>;
890	using BufferWithMemoryPtr	= de::MovePtr<BufferWithMemory>;
891
892	ImageWithMemoryPtr	imageResource;
893	Move<VkImageView>	imageResourceView;
894	VkImageLayout		imageDescriptorLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
895	VkImageLayout		currentLayout			= VK_IMAGE_LAYOUT_UNDEFINED;
896	BufferWithMemoryPtr	bufferResource;
897
898	bool useImageResource	= false;
899	bool useBufferResource	= false;
900
901	switch (m_params.resourceType)
902	{
903	case ResourceType::UNIFORM_BUFFER:
904	case ResourceType::STORAGE_BUFFER:
905		useBufferResource = true;
906		break;
907	case ResourceType::STORAGE_IMAGE:
908	case ResourceType::SAMPLED_IMAGE:
909		useImageResource = true;
910		break;
911	default:
912		DE_ASSERT(false);
913		break;
914	}
915
916	// One resource needed.
917	DE_ASSERT(useImageResource != useBufferResource);
918
919	if (useImageResource)
920	{
921		const auto resourceImageUsage = resourceImageUsageFlags(m_params.resourceType);
922
923		const VkImageCreateInfo resourceCreateInfo =
924		{
925			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
926			nullptr,								//	const void*				pNext;
927			0u,										//	VkImageCreateFlags		flags;
928			VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
929			imageFormat,							//	VkFormat				format;
930			imageExtent,							//	VkExtent3D				extent;
931			1u,										//	uint32_t				mipLevels;
932			1u,										//	uint32_t				arrayLayers;
933			VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
934			VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
935			resourceImageUsage,						//	VkImageUsageFlags		usage;
936			VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
937			0u,										//	uint32_t				queueFamilyIndexCount;
938			nullptr,								//	const uint32_t*			pQueueFamilyIndices;
939			VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
940		};
941		imageResource		= ImageWithMemoryPtr(new ImageWithMemory(vkd, device, alloc, resourceCreateInfo, MemoryRequirement::Any));
942		imageResourceView	= makeImageView(vkd, device, imageResource->get(), VK_IMAGE_VIEW_TYPE_2D, imageFormat, colorSRR);
943	}
944	else
945	{
946		const auto resourceBufferUsage		= resourceBufferUsageFlags(m_params.resourceType);
947		const auto resourceBufferCreateInfo	= makeBufferCreateInfo(bufferSize, resourceBufferUsage);
948		bufferResource = BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, resourceBufferCreateInfo, MemoryRequirement::HostVisible));
949	}
950
951	Move<VkSampler> sampler;
952	if (descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
953	{
954		const VkSamplerCreateInfo samplerCreateInfo =
955		{
956			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,	//	VkStructureType			sType;
957			nullptr,								//	const void*				pNext;
958			0u,										//	VkSamplerCreateFlags	flags;
959			VK_FILTER_NEAREST,						//	VkFilter				magFilter;
960			VK_FILTER_NEAREST,						//	VkFilter				minFilter;
961			VK_SAMPLER_MIPMAP_MODE_NEAREST,			//	VkSamplerMipmapMode		mipmapMode;
962			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeU;
963			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeV;
964			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,	//	VkSamplerAddressMode	addressModeW;
965			0.0f,									//	float					mipLodBias;
966			VK_FALSE,								//	VkBool32				anisotropyEnable;
967			1.0f,									//	float					maxAnisotropy;
968			VK_FALSE,								//	VkBool32				compareEnable;
969			VK_COMPARE_OP_NEVER,					//	VkCompareOp				compareOp;
970			0.0f,									//	float					minLod;
971			0.0f,									//	float					maxLod;
972			VK_BORDER_COLOR_INT_TRANSPARENT_BLACK,	//	VkBorderColor			borderColor;
973			VK_FALSE,								//	VkBool32				unnormalizedCoordinates;
974		};
975		sampler = createSampler(vkd, device, &samplerCreateInfo);
976	}
977
978	// Auxiliary host-coherent buffer for some cases. Being host-coherent lets us avoid extra barriers that would "pollute" synchronization tests.
979	BufferWithMemoryPtr hostCoherentBuffer;
980	void*				hostCoherentDataPtr = nullptr;
981	if (needsAuxiliarBuffer(m_params.fromStage, m_params.toStage))
982	{
983		const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
984		hostCoherentBuffer	= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
985		hostCoherentDataPtr	= hostCoherentBuffer->getAllocation().getHostPtr();
986	}
987
988	// Descriptor pool.
989	Move<VkDescriptorPool> descriptorPool;
990	{
991		DescriptorPoolBuilder poolBuilder;
992		poolBuilder.addType(descriptorType);
993		descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
994	}
995
996	// Descriptor set layout.
997	Move<VkDescriptorSetLayout> setLayout;
998	{
999		DescriptorSetLayoutBuilder layoutBuilder;
1000		layoutBuilder.addSingleBinding(descriptorType, resourceStages);
1001		setLayout = layoutBuilder.build(vkd, device);
1002	}
1003
1004	// Descriptor set.
1005	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1006
1007	// Update descriptor set.
1008	{
1009		DescriptorSetUpdateBuilder	updateBuilder;
1010		const auto					location = DescriptorSetUpdateBuilder::Location::binding(0u);
1011
1012		switch (descriptorType)
1013		{
1014		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1015		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1016			{
1017				const auto bufferInfo = makeDescriptorBufferInfo(bufferResource->get(), 0ull, bufferSize);
1018				updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &bufferInfo);
1019			}
1020			break;
1021		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1022		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1023			{
1024				auto descriptorImageInfo = makeDescriptorImageInfo(sampler.get(), imageResourceView.get(), imageDescriptorLayout);
1025				updateBuilder.writeSingle(descriptorSet.get(), location, descriptorType, &descriptorImageInfo);
1026			}
1027			break;
1028		default:
1029			DE_ASSERT(false); break;
1030		}
1031
1032		updateBuilder.update(vkd, device);
1033	}
1034
1035	// Render passes and framebuffers.
1036	const auto renderPasses		= createCustomRenderPasses(vkd, device, imageFormat, m_params);
1037	const bool multiRenderPass	= (renderPasses.size() > 1u);
1038	DE_ASSERT(renderPasses.size() > 0u);
1039
1040	std::vector<Move<VkFramebuffer>> framebuffers;
1041	framebuffers.reserve(renderPasses.size());
1042
1043	for (const auto& renderPass : renderPasses)
1044		framebuffers.push_back(makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), imageExtent.width, imageExtent.height));
1045
1046	// Viewports and scissors.
1047	std::vector<VkViewport>	viewports	(1u, makeViewport(imageExtent));
1048	std::vector<VkRect2D>	scissors	(1u, makeRect2D(imageExtent));
1049
1050	using PushConstantStruct = TestParams::PushConstantStruct;
1051
1052	// Pipeline layout.
1053	const auto pcSize			= static_cast<uint32_t>(sizeof(PushConstantStruct));
1054	const auto pcRange			= makePushConstantRange(resourceStages, 0u, pcSize);
1055	const auto pipelineLayout	= makePipelineLayout(vkd, device, setLayout.get(), &pcRange);
1056
1057	// Shader modules, pipelines and pipeline layouts.
1058	const auto						twoPipelines	= m_params.needsTwoPipelines();
1059	const auto						selfDeps		= m_params.subpassSelfDependency();
1060
1061	// Both at the same time does not make sense.
1062	DE_ASSERT(!(twoPipelines && selfDeps));
1063
1064	const auto						pipelineCount	= (twoPipelines ? 2u : 1u);
1065	const auto						drawCount		= (selfDeps ? 2u : 1u);
1066	const auto						iterationCount	= std::max(pipelineCount, drawCount);
1067
1068	std::vector<Move<VkPipeline>>	pipelines;
1069	pipelines.reserve(pipelineCount);
1070
1071	// Shader modules.
1072	const auto& binaries = m_context.getBinaryCollection();
1073
1074	Move<VkShaderModule> taskShader;
1075	if (m_params.needsTask())
1076		taskShader = createShaderModule(vkd, device, binaries.get("task"));
1077
1078	const auto meshShader						= createShaderModule(vkd, device, binaries.get("mesh"));
1079	const auto fragShader						= createShaderModule(vkd, device, binaries.get("frag"));
1080	const auto taskPassthroughShader			= createShaderModule(vkd, device, binaries.get("taskPassthrough"));
1081	const auto fragPassthroughShader			= createShaderModule(vkd, device, binaries.get("fragPassthrough"));
1082	const auto meshPassthroughShader			= createShaderModule(vkd, device, binaries.get("meshPassthrough"));
1083	const auto meshPassthroughWithTaskShader	= createShaderModule(vkd, device, binaries.get("meshPassthroughWithTask"));
1084
1085	if (pipelineCount == 1u)
1086	{
1087		// Pipeline.
1088		pipelines.push_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(), taskShader.get(), meshShader.get(), fragShader.get(), renderPasses.at(0u).get(), viewports, scissors));
1089	}
1090	else if (pipelineCount == 2u)
1091	{
1092		// Mandatory stages in each pipeline: the first pipeline will contain the "from" stage (write) and the second one the "to" stage (read).
1093		const std::vector<Stage> mandatoryStages { m_params.fromStage, m_params.toStage };
1094
1095		// One pipeline per mandatory stage.
1096		for (uint32_t pipelineIdx = 0u; pipelineIdx < pipelineCount; ++pipelineIdx)
1097		{
1098			const auto& stage = mandatoryStages.at(pipelineIdx);
1099
1100			VkShaderModule taskModule = DE_NULL;
1101			VkShaderModule meshModule = DE_NULL;
1102			VkShaderModule fragModule = DE_NULL;
1103
1104			const bool lastSubpass		= (pipelineIdx == pipelineCount - 1u);
1105			const auto pipelineStages	= subpassStages(stage, lastSubpass);
1106			const bool hasTaskShader	= hasTask(pipelineStages);
1107			const bool hasFragShader	= hasFrag(pipelineStages);
1108
1109			// Decide which shaders to use for this one.
1110			if (hasTaskShader)
1111				taskModule = ((stage == Stage::TASK) ? taskShader.get() : taskPassthroughShader.get());
1112
1113			if (stage == Stage::MESH)
1114				meshModule = meshShader.get();
1115			else
1116			{
1117				meshModule = (hasTaskShader ? meshPassthroughWithTaskShader.get() : meshPassthroughShader.get());
1118			}
1119
1120			if (hasFragShader)
1121				fragModule = ((stage == Stage::FRAG) ? fragShader.get() : fragPassthroughShader.get());
1122
1123			// Create pipeline. When using multiple render passes, the subpass is always zero. When using a single render pass, each pipeline is prepared for one subpass.
1124			const auto renderPass	= (multiRenderPass ? renderPasses.at(pipelineIdx).get() : renderPasses[0].get());
1125			const auto subpass		= (multiRenderPass ? 0u : pipelineIdx);
1126
1127			pipelines.push_back(makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
1128				taskModule, meshModule, fragModule,
1129				renderPass, viewports, scissors, subpass));
1130		}
1131	}
1132	else
1133	{
1134		DE_ASSERT(false);
1135	}
1136
1137	// Command pool and buffer.
1138	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
1139	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1140	const auto cmdBuffer	= cmdBufferPtr.get();
1141
1142	beginCommandBuffer(vkd, cmdBuffer);
1143
1144	if (m_params.fromStage == Stage::HOST)
1145	{
1146		// Prepare buffer from host when the source stage is the host.
1147		DE_ASSERT(useBufferResource);
1148
1149		auto& resourceBufferAlloc	= bufferResource->getAllocation();
1150		void* resourceBufferDataPtr	= resourceBufferAlloc.getHostPtr();
1151
1152		deMemcpy(resourceBufferDataPtr, &m_params.testValue, sizeof(m_params.testValue));
1153		flushAlloc(vkd, device, resourceBufferAlloc);
1154	}
1155	else if (m_params.fromStage == Stage::TRANSFER)
1156	{
1157		// Put value in host-coherent buffer and transfer it to the resource buffer or image.
1158		deMemcpy(hostCoherentDataPtr, &m_params.testValue, sizeof(m_params.testValue));
1159		hostToTransferMemoryBarrier(vkd, cmdBuffer);
1160
1161		if (useBufferResource)
1162		{
1163			const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1164			vkd.cmdCopyBuffer(cmdBuffer, hostCoherentBuffer->get(), bufferResource->get(), 1u, &copyRegion);
1165		}
1166		else
1167		{
1168			// Move image to the right layout for transfer.
1169			const auto newLayout = (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1170			if (newLayout != currentLayout)
1171			{
1172				const auto preCopyBarrier = makeImageMemoryBarrier(0u, VK_ACCESS_TRANSFER_WRITE_BIT, currentLayout, newLayout, imageResource->get(), colorSRR);
1173				cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preCopyBarrier);
1174				currentLayout = newLayout;
1175			}
1176			const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1177			vkd.cmdCopyBufferToImage(cmdBuffer, hostCoherentBuffer->get(), imageResource->get(), currentLayout, 1u, &copyRegion);
1178		}
1179	}
1180	else if (isShaderStage(m_params.fromStage))
1181	{
1182		// The image or buffer will be written to from shaders. Images need to be in the right layout.
1183		if (useImageResource)
1184		{
1185			const auto newLayout = VK_IMAGE_LAYOUT_GENERAL;
1186			if (newLayout != currentLayout)
1187			{
1188				const auto preWriteBarrier = makeImageMemoryBarrier(0u, (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT), currentLayout, newLayout, imageResource->get(), colorSRR);
1189				cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, fromStageFlags, &preWriteBarrier);
1190				currentLayout = newLayout;
1191			}
1192		}
1193	}
1194	else
1195	{
1196		DE_ASSERT(false);
1197	}
1198
1199	// If the resource is going to be read from shaders and written from a non-shader stage, we'll insert the main barrier before running the pipeline.
1200	if (isShaderStage(m_params.toStage) && !isShaderStage(m_params.fromStage))
1201	{
1202		if (m_params.barrierType == BarrierType::GENERAL)
1203		{
1204			const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1205			cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1206		}
1207		else if (m_params.barrierType == BarrierType::SPECIFIC)
1208		{
1209			if (useBufferResource)
1210			{
1211				const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1212				cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1213			}
1214			else
1215			{
1216				const auto newLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
1217				const auto imageBarrier	= makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
1218
1219				cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1220				currentLayout = newLayout;
1221			}
1222		}
1223		// For subpass dependencies, they have already been included in the render pass or loop below.
1224	}
1225
1226	// Run the pipeline.
1227	if (!multiRenderPass)
1228		beginRenderPass(vkd, cmdBuffer, renderPasses[0].get(), framebuffers[0].get(), scissors.at(0), tcu::UVec4(0u));
1229
1230	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
1231
1232	for (uint32_t iterationIdx = 0u; iterationIdx < iterationCount; ++iterationIdx)
1233	{
1234		if (iterationIdx > 0u && !multiRenderPass && twoPipelines)
1235			vkd.cmdNextSubpass(cmdBuffer, VK_SUBPASS_CONTENTS_INLINE);
1236
1237		if (multiRenderPass)
1238			beginRenderPass(vkd, cmdBuffer, renderPasses.at(iterationIdx).get(), framebuffers.at(iterationIdx).get(), scissors.at(0), tcu::UVec4(0u));
1239
1240		if (twoPipelines || iterationIdx == 0u)
1241			vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelines.at(iterationIdx).get());
1242
1243		PushConstantStruct pcData;
1244		if (selfDeps)
1245		{
1246			// First draw writes, second draw reads.
1247			pcData.writeVal	= 1u - iterationIdx;
1248			pcData.readVal	= iterationIdx;
1249		}
1250		else
1251		{
1252			// Otherwise reads and writes freely according to the pipeline shaders.
1253			pcData.writeVal	= 1u;
1254			pcData.readVal	= 1u;
1255		}
1256		vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), resourceStages, 0u, pcSize, &pcData);
1257		vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1258
1259		if (multiRenderPass)
1260			endRenderPass(vkd, cmdBuffer);
1261
1262		// If there are self-dependencies or multiple render passes, synchronize resource between draw calls.
1263		if ((multiRenderPass || selfDeps) && iterationIdx == 0u)
1264		{
1265			// In the case of self-dependencies, the barrier type is BarrierType::DEPENDENCY and we'll insert a general barrier because:
1266			//    * VUID-vkCmdPipelineBarrier-bufferMemoryBarrierCount-01178 forbids using buffer barriers inside render passes.
1267			//    * VUID-vkCmdPipelineBarrier-image-04073 forbids using image memory barriers inside render passes with resources that are not attachments.
1268			if (m_params.barrierType == BarrierType::GENERAL || m_params.barrierType == BarrierType::DEPENDENCY)
1269			{
1270				const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1271				cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1272			}
1273			else if (m_params.barrierType == BarrierType::SPECIFIC)
1274			{
1275				if (useBufferResource)
1276				{
1277					const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1278					cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1279				}
1280				else
1281				{
1282					// Note: the image will only be read from shader stages or from the transfer stage.
1283					DE_ASSERT(useGeneralLayout);
1284					const auto newLayout	= VK_IMAGE_LAYOUT_GENERAL;
1285					const auto imageBarrier	= makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
1286
1287					cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1288					currentLayout = newLayout;
1289				}
1290			}
1291			else
1292			{
1293				DE_ASSERT(false);
1294			}
1295
1296			if (multiRenderPass)
1297			{
1298				// Sync color attachment writes.
1299				const auto colorWritesBarrier = makeMemoryBarrier(VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1300				cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, &colorWritesBarrier);
1301			}
1302		}
1303	}
1304
1305	if (!multiRenderPass)
1306		endRenderPass(vkd, cmdBuffer);
1307
1308	// If the resource was written to from shaders and will be read from a non-shader stage, insert the main barrier after running the pipeline.
1309	if (isShaderStage(m_params.fromStage) && !isShaderStage(m_params.toStage))
1310	{
1311		if (m_params.barrierType == BarrierType::GENERAL)
1312		{
1313			const auto memoryBarrier = makeMemoryBarrier(writeAccessFlags, readAccessFlags);
1314			cmdPipelineMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &memoryBarrier);
1315		}
1316		else if (m_params.barrierType == BarrierType::SPECIFIC)
1317		{
1318			if (useBufferResource)
1319			{
1320				const auto bufferBarrier = makeBufferMemoryBarrier(writeAccessFlags, readAccessFlags, bufferResource->get(), 0ull, bufferSize);
1321				cmdPipelineBufferMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &bufferBarrier);
1322			}
1323			else
1324			{
1325				// Note: the image will only be read from shader stages or from the transfer stage.
1326				const auto newLayout	= (useGeneralLayout ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1327				const auto imageBarrier	= makeImageMemoryBarrier(writeAccessFlags, readAccessFlags, currentLayout, newLayout, imageResource->get(), colorSRR);
1328
1329				cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, fromStageFlags, toStageFlags, &imageBarrier);
1330				currentLayout = newLayout;
1331			}
1332		}
1333		// For subpass dependencies, they have already been included in the render pass and loop.
1334	}
1335
1336	// Read resource from the destination stage if needed.
1337	if (m_params.toStage == Stage::HOST)
1338	{
1339		// Nothing to do. The test value should be in the resource buffer already, which is host-visible.
1340	}
1341	else if (m_params.toStage == Stage::TRANSFER)
1342	{
1343		// Copy value from resource to host-coherent buffer to be verified later.
1344		if (useBufferResource)
1345		{
1346			const auto copyRegion = makeBufferCopy(0ull, 0ull, bufferSize);
1347			vkd.cmdCopyBuffer(cmdBuffer, bufferResource->get(), hostCoherentBuffer->get(), 1u, &copyRegion);
1348		}
1349		else
1350		{
1351			const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1352			vkd.cmdCopyImageToBuffer(cmdBuffer, imageResource->get(), currentLayout, hostCoherentBuffer->get(), 1u, &copyRegion);
1353		}
1354
1355		transferToHostMemoryBarrier(vkd, cmdBuffer);
1356	}
1357
1358	// If the output value will be available in the color buffer, take the chance to transfer its contents to a host-coherent buffer.
1359	BufferWithMemoryPtr colorVerificationBuffer;
1360	void*				colorVerificationDataPtr = nullptr;
1361
1362	if (valueInColorBuffer(m_params.toStage))
1363	{
1364		const auto auxiliarBufferCreateInfo = makeBufferCreateInfo(bufferSize, auxiliarBufferUsage);
1365		colorVerificationBuffer		= BufferWithMemoryPtr(new BufferWithMemory(vkd, device, alloc, auxiliarBufferCreateInfo, (MemoryRequirement::HostVisible | MemoryRequirement::Coherent)));
1366		colorVerificationDataPtr	= colorVerificationBuffer->getAllocation().getHostPtr();
1367
1368		const auto srcAccess	= (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT);
1369		const auto dstAccess	= VK_ACCESS_TRANSFER_READ_BIT;
1370		const auto colorBarrier	= makeImageMemoryBarrier(srcAccess, dstAccess, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSRR);
1371		cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &colorBarrier);
1372
1373		const auto copyRegion = makeBufferImageCopy(imageExtent, colorSRL);
1374		vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorVerificationBuffer->get(), 1u, &copyRegion);
1375
1376		transferToHostMemoryBarrier(vkd, cmdBuffer);
1377	}
1378
1379
1380	endCommandBuffer(vkd, cmdBuffer);
1381	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1382
1383	// Verify output resources as needed.
1384
1385	if (valueInAuxiliarDestBuffer(m_params.toStage))
1386	{
1387		uint32_t bufferValue;
1388		deMemcpy(&bufferValue, hostCoherentDataPtr, sizeof(bufferValue));
1389
1390		if (bufferValue != m_params.testValue)
1391		{
1392			std::ostringstream msg;
1393			msg << "Unexpected value in auxiliar host-coherent buffer: found " << bufferValue << " and expected " << m_params.testValue;
1394			TCU_FAIL(msg.str());
1395		}
1396	}
1397
1398	if (valueInResourceBuffer(m_params.toStage))
1399	{
1400		auto&		resourceBufferAlloc		= bufferResource->getAllocation();
1401		void*		resourceBufferDataPtr	= resourceBufferAlloc.getHostPtr();
1402		uint32_t	bufferValue;
1403
1404		invalidateAlloc(vkd, device, resourceBufferAlloc);
1405		deMemcpy(&bufferValue, resourceBufferDataPtr, sizeof(bufferValue));
1406
1407		if (bufferValue != m_params.testValue)
1408		{
1409			std::ostringstream msg;
1410			msg << "Unexpected value in resource buffer: found " << bufferValue << " and expected " << m_params.testValue;
1411			TCU_FAIL(msg.str());
1412		}
1413	}
1414
1415	if (valueInColorBuffer(m_params.toStage))
1416	{
1417		uint32_t bufferValue;
1418		deMemcpy(&bufferValue, colorVerificationDataPtr, sizeof(bufferValue));
1419
1420		if (bufferValue != m_params.testValue)
1421		{
1422			std::ostringstream msg;
1423			msg << "Unexpected value in color verification buffer: found " << bufferValue << " and expected " << m_params.testValue;
1424			TCU_FAIL(msg.str());
1425		}
1426	}
1427
1428	return tcu::TestStatus::pass("Pass");
1429}
1430
1431// Specific test to check a barrier that crosses secondary command buffers and goes from compute to task.
1432class BarrierAcrossSecondaryCase : public vkt::TestCase
1433{
1434public:
1435					BarrierAcrossSecondaryCase	(tcu::TestContext& testCtx, const std::string& name)
1436						: vkt::TestCase(testCtx, name)
1437						{}
1438	virtual			~BarrierAcrossSecondaryCase	(void) {}
1439
1440	void			checkSupport				(Context& context) const override;
1441	TestInstance*	createInstance				(Context& context) const override;
1442	void			initPrograms				(vk::SourceCollections& programCollection) const override;
1443
1444	static constexpr uint32_t kLocalSize		= 128u;
1445	static constexpr uint32_t kNumWorkGroups	= 16384u;
1446};
1447
1448class BarrierAcrossSecondaryInstance : public vkt::TestInstance
1449{
1450public:
1451						BarrierAcrossSecondaryInstance	(Context& context) : vkt::TestInstance(context) {}
1452	virtual				~BarrierAcrossSecondaryInstance	(void) {}
1453
1454	tcu::TestStatus		iterate							(void) override;
1455
1456};
1457
1458void BarrierAcrossSecondaryCase::checkSupport (Context &context) const
1459{
1460	checkTaskMeshShaderSupportEXT(context, true, true);
1461	context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
1462}
1463
1464TestInstance* BarrierAcrossSecondaryCase::createInstance (Context &context) const
1465{
1466	return new BarrierAcrossSecondaryInstance(context);
1467}
1468
1469void BarrierAcrossSecondaryCase::initPrograms (vk::SourceCollections &programCollection) const
1470{
1471	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1472
1473	const std::string descriptorDecl =
1474		"layout (set=0, binding=0, std430) buffer OutputBlock {\n"
1475		"    uint values[];\n"
1476		"} outBuffer;\n"
1477		"layout (set=0, binding=1, std430) buffer VerificationBlock {\n"
1478		"    uint values[];\n"
1479		"} verificationBuffer;\n"
1480		;
1481
1482	// The compute shader will fill the output buffer.
1483	std::ostringstream comp;
1484	comp
1485		<< "#version 450\n"
1486		<< "layout(local_size_x=" << kLocalSize << ") in;\n"
1487		<< descriptorDecl
1488		<< "void main ()\n"
1489		<< "{\n"
1490		<< "    outBuffer.values[gl_GlobalInvocationID.x] = gl_GlobalInvocationID.x;\n"
1491		<< "}\n"
1492		;
1493	programCollection.glslSources.add("comp") << glu::ComputeSource(comp.str());
1494
1495	// The task shader will read it, verify its contents and write the verification buffer.
1496	std::ostringstream task;
1497	task
1498		<< "#version 450\n"
1499		<< "#extension GL_EXT_mesh_shader : enable\n"
1500		<< "layout(local_size_x=" << kLocalSize << ") in;\n"
1501		<< descriptorDecl
1502		<< "void main ()\n"
1503		<< "{\n"
1504		<< "    const uint verifResult = ((outBuffer.values[gl_GlobalInvocationID.x] == gl_GlobalInvocationID.x) ? 1u : 0u);\n"
1505		<< "    verificationBuffer.values[gl_GlobalInvocationID.x] = verifResult;\n"
1506		<< "    EmitMeshTasksEXT(0u, 0u, 0u);\n"
1507		<< "}\n"
1508		;
1509	programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1510
1511	std::ostringstream mesh;
1512	mesh
1513		<< "#version 450\n"
1514		<< "#extension GL_EXT_mesh_shader : enable\n"
1515		<< "\n"
1516		<< "layout(local_size_x=1) in;\n"
1517		<< "layout(triangles) out;\n"
1518		<< "layout(max_vertices=3, max_primitives=1) out;\n"
1519		<< "\n"
1520		<< "void main ()\n"
1521		<< "{\n"
1522		<< "    SetMeshOutputsEXT(0u, 0u);\n"
1523		<< "}\n"
1524		;
1525	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1526}
1527
1528tcu::TestStatus BarrierAcrossSecondaryInstance::iterate (void)
1529{
1530	const auto&			vkd				= m_context.getDeviceInterface();
1531	const auto			device			= m_context.getDevice();
1532	auto&				alloc			= m_context.getDefaultAllocator();
1533	const auto			queueIndex		= m_context.getUniversalQueueFamilyIndex();
1534	const auto			queue			= m_context.getUniversalQueue();
1535	const auto			kLocalSize		= BarrierAcrossSecondaryCase::kLocalSize;
1536	const auto			kNumWorkGroups	= BarrierAcrossSecondaryCase::kNumWorkGroups;
1537	const auto			bindingStages	= (VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_TASK_BIT_EXT);
1538	const auto			extent			= makeExtent3D(1u, 1u, 1u);
1539
1540	// Output buffer.
1541	const auto			outputBufferSize	= static_cast<VkDeviceSize>(kLocalSize * kNumWorkGroups * sizeof(uint32_t));
1542	const auto			outputBufferInfo	= makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
1543	BufferWithMemory	outputBuffer		(vkd, device, alloc, outputBufferInfo, MemoryRequirement::HostVisible);
1544	auto&				outputBufferAlloc	= outputBuffer.getAllocation();
1545	void*				outputBufferData	= outputBufferAlloc.getHostPtr();
1546
1547	// Verification buffer.
1548	const auto			verificationBufferSize	= outputBufferSize;
1549	const auto			verificationBufferInfo	= outputBufferInfo;
1550	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
1551	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
1552	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
1553
1554	// Prepare buffer data.
1555	deMemset(outputBufferData, 0, static_cast<size_t>(outputBufferSize));
1556	deMemset(verificationBufferData, 0, static_cast<size_t>(verificationBufferSize));
1557	flushAlloc(vkd, device, outputBufferAlloc);
1558	flushAlloc(vkd, device, verificationBufferAlloc);
1559
1560	// Descriptor set layout.
1561	DescriptorSetLayoutBuilder setLayoutBuilder;
1562	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStages);
1563	setLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, bindingStages);
1564	const auto setLayout = setLayoutBuilder.build(vkd, device);
1565
1566	// Pipeline layout.
1567	const auto pipelineLayout = makePipelineLayout(vkd, device, setLayout.get());
1568
1569	// Descriptor pool and set.
1570	DescriptorPoolBuilder poolBuilder;
1571	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u);
1572	const auto descriptorPool	= poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1573	const auto descriptorSet	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1574
1575	// Update descriptor set.
1576	DescriptorSetUpdateBuilder	updateBuilder;
1577	const auto					outputBufferDescInfo		= makeDescriptorBufferInfo(outputBuffer.get(), 0ull, outputBufferSize);
1578	const auto					verificationBufferDescInfo	= makeDescriptorBufferInfo(verificationBuffer.get(), 0ull, verificationBufferSize);
1579	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputBufferDescInfo);
1580	updateBuilder.writeSingle(descriptorSet.get(), DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &verificationBufferDescInfo);
1581	updateBuilder.update(vkd, device);
1582
1583	// Graphics pipeline auxiliary data.
1584	const auto						renderPass	= makeRenderPass(vkd, device);
1585	const auto						framebuffer	= makeFramebuffer(vkd, device, renderPass.get(), 0u, nullptr, extent.width, extent.height);
1586	const std::vector<VkViewport>	viewports	(1u, makeViewport(extent));
1587	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(extent));
1588
1589	// Create pipelines.
1590	const auto&	binaries	= m_context.getBinaryCollection();
1591	const auto	compModule	= createShaderModule(vkd, device, binaries.get("comp"));
1592	const auto	taskModule	= createShaderModule(vkd, device, binaries.get("task"));
1593	const auto	meshModule	= createShaderModule(vkd, device, binaries.get("mesh"));
1594
1595	const auto computePipeline	= makeComputePipeline(vkd, device, pipelineLayout.get(), compModule.get());
1596	const auto meshPipeline		= makeGraphicsPipeline(vkd, device, pipelineLayout.get(),
1597		taskModule.get(), meshModule.get(), DE_NULL,
1598		renderPass.get(), viewports, scissors);
1599
1600	// Command pool and command buffers.
1601	const auto cmdPool			= makeCommandPool(vkd, device, queueIndex);
1602	const auto primaryCmdBuffer	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1603	const auto compCmdBuffer	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1604	const auto meshCmdBuffer	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1605
1606	// Use compute pipeline and record barrier to task shader.
1607	{
1608		const auto cmdBuffer		= compCmdBuffer.get();
1609		const auto comp2TaskBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT);
1610
1611		beginSecondaryCommandBuffer(vkd, cmdBuffer);
1612		vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
1613		vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline.get());
1614		vkd.cmdDispatch(cmdBuffer, kNumWorkGroups, 1u, 1u);
1615		cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, &comp2TaskBarrier);
1616		endCommandBuffer(vkd, cmdBuffer);
1617	}
1618
1619	// Use mesh pipeline and record barrier to host.
1620	{
1621		const auto cmdBuffer = meshCmdBuffer.get();
1622
1623		beginSecondaryCommandBuffer(vkd, cmdBuffer, renderPass.get(), framebuffer.get());
1624		vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
1625		vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
1626		vkd.cmdDrawMeshTasksEXT(cmdBuffer, kNumWorkGroups, 1u, 1u);
1627		endCommandBuffer(vkd, cmdBuffer);
1628	}
1629
1630	// Use both secondary command buffers.
1631	{
1632		const auto cmdBuffer		= primaryCmdBuffer.get();
1633		const auto task2HostBarrier	= makeMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1634
1635		beginCommandBuffer(vkd, cmdBuffer);
1636		vkd.cmdExecuteCommands(cmdBuffer, 1u, &compCmdBuffer.get());
1637		beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS);
1638		vkd.cmdExecuteCommands(cmdBuffer, 1u, &meshCmdBuffer.get());
1639		endRenderPass(vkd, cmdBuffer);
1640		cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT, VK_PIPELINE_STAGE_HOST_BIT, &task2HostBarrier);
1641		endCommandBuffer(vkd, cmdBuffer);
1642		submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1643	}
1644
1645	// Verify buffer contents.
1646	invalidateAlloc(vkd, device, verificationBufferAlloc);
1647	const std::vector<uint32_t> expectedResult (kNumWorkGroups * kLocalSize, 1u);
1648
1649	if (deMemCmp(expectedResult.data(), verificationBufferData, de::dataSize(expectedResult)) != 0)
1650		TCU_FAIL("Unexpected values found in verification buffer");
1651
1652	return tcu::TestStatus::pass("Pass");
1653}
1654
1655} // anonymous
1656
1657tcu::TestCaseGroup* createMeshShaderSyncTestsEXT (tcu::TestContext& testCtx)
1658{
1659	const struct
1660	{
1661		Stage		fromStage;
1662		Stage		toStage;
1663	} stageCombinations[] =
1664	{
1665		// Combinations where the source and destination stages involve mesh shaders.
1666		// Note: this could be tested procedurally.
1667		{	Stage::HOST,		Stage::TASK			},
1668		{	Stage::HOST,		Stage::MESH			},
1669		{	Stage::TRANSFER,	Stage::TASK			},
1670		{	Stage::TRANSFER,	Stage::MESH			},
1671		{	Stage::TASK,		Stage::MESH			},
1672		{	Stage::TASK,		Stage::FRAG			},
1673		{	Stage::TASK,		Stage::TRANSFER		},
1674		{	Stage::TASK,		Stage::HOST			},
1675		{	Stage::MESH,		Stage::FRAG			},
1676		{	Stage::MESH,		Stage::TRANSFER		},
1677		{	Stage::MESH,		Stage::HOST			},
1678
1679		// These require two pipelines.
1680		{	Stage::MESH,		Stage::TASK			},
1681		{	Stage::FRAG,		Stage::TASK			},
1682		{	Stage::FRAG,		Stage::MESH			},
1683	};
1684
1685	const struct
1686	{
1687		ResourceType	resourceType;
1688		const char*		name;
1689	} resourceTypes[] =
1690	{
1691		{ ResourceType::UNIFORM_BUFFER,	"uniform_buffer"	},
1692		{ ResourceType::STORAGE_BUFFER,	"storage_buffer"	},
1693		{ ResourceType::STORAGE_IMAGE,	"storage_image"		},
1694		{ ResourceType::SAMPLED_IMAGE,	"sampled_image"		},
1695	};
1696
1697	const struct
1698	{
1699		BarrierType		barrierType;
1700		const char*		name;
1701	} barrierTypes[] =
1702	{
1703		{	BarrierType::GENERAL,		"memory_barrier"		},
1704		{	BarrierType::SPECIFIC,		"specific_barrier"		},
1705		{	BarrierType::DEPENDENCY,	"subpass_dependency"	},
1706	};
1707
1708	const struct
1709	{
1710		WriteAccess		writeAccess;
1711		const char*		name;
1712	} writeAccesses[] =
1713	{
1714		{	WriteAccess::HOST_WRITE,		"host_write"		},
1715		{	WriteAccess::TRANSFER_WRITE,	"transfer_write"	},
1716		{	WriteAccess::SHADER_WRITE,		"shader_write"		},
1717	};
1718
1719	const struct
1720	{
1721		ReadAccess		readAccess;
1722		const char*		name;
1723	} readAccesses[] =
1724	{
1725		{	ReadAccess::HOST_READ,		"host_read"		},
1726		{	ReadAccess::TRANSFER_READ,	"transfer_read"	},
1727		{	ReadAccess::SHADER_READ,	"shader_read"	},
1728		{	ReadAccess::UNIFORM_READ,	"uniform_read"	},
1729	};
1730
1731	uint32_t testValue = 1628510124u;
1732
1733	GroupPtr mainGroup (new tcu::TestCaseGroup(testCtx, "synchronization"));
1734
1735	for (const auto& stageCombination : stageCombinations)
1736	{
1737		const std::string	combinationName		= de::toString(stageCombination.fromStage) + "_to_" + de::toString(stageCombination.toStage);
1738		GroupPtr			combinationGroup	(new tcu::TestCaseGroup(testCtx, combinationName.c_str()));
1739
1740		for (const auto& resourceCase : resourceTypes)
1741		{
1742			if (!canWriteTo(stageCombination.fromStage, resourceCase.resourceType))
1743				continue;
1744
1745			if (!canReadFrom(stageCombination.toStage, resourceCase.resourceType))
1746				continue;
1747
1748			GroupPtr resourceGroup (new tcu::TestCaseGroup(testCtx, resourceCase.name));
1749
1750			for (const auto& barrierCase : barrierTypes)
1751			{
1752				const auto shaderToShader      = fromShaderToShader(stageCombination.fromStage, stageCombination.toStage);
1753				const auto barrierIsDependency = (barrierCase.barrierType == BarrierType::DEPENDENCY);
1754
1755				// Subpass dependencies can only be used in shader to shader situations.
1756				if (barrierIsDependency && !shaderToShader)
1757						continue;
1758
1759				GroupPtr barrierGroup (new tcu::TestCaseGroup(testCtx, barrierCase.name));
1760
1761				for (const auto& writeCase	: writeAccesses)
1762				for (const auto& readCase	: readAccesses)
1763				{
1764					if (!canReadResourceAsAccess(resourceCase.resourceType, readCase.readAccess))
1765						continue;
1766					if (!canWriteResourceAsAccess(resourceCase.resourceType, writeCase.writeAccess))
1767						continue;
1768					if (!canReadFromStageAsAccess(stageCombination.toStage, readCase.readAccess))
1769						continue;
1770					if (!canWriteFromStageAsAccess(stageCombination.fromStage, writeCase.writeAccess))
1771						continue;
1772
1773					const std::string accessCaseName = writeCase.name + std::string("_") + readCase.name;
1774
1775					const TestParams testParams =
1776					{
1777						stageCombination.fromStage,	//	Stage			fromStage;
1778						stageCombination.toStage,	//	Stage			toStage;
1779						resourceCase.resourceType,	//	ResourceType	resourceType;
1780						barrierCase.barrierType,	//	BarrierType		barrierType;
1781						writeCase.writeAccess,		//	WriteAccess		writeAccess;
1782						readCase.readAccess,		//	ReadAccess		readAccess;
1783						testValue++,				//	uint32_t		testValue;
1784					};
1785
1786					barrierGroup->addChild(new MeshShaderSyncCase(testCtx, accessCaseName, testParams));
1787				}
1788
1789				resourceGroup->addChild(barrierGroup.release());
1790			}
1791
1792			combinationGroup->addChild(resourceGroup.release());
1793		}
1794
1795		mainGroup->addChild(combinationGroup.release());
1796	}
1797
1798	{
1799		// Additional synchronization tests
1800		GroupPtr otherGroup (new tcu::TestCaseGroup(testCtx, "other"));
1801
1802		// Check synchronizing compute to task across secondary command buffer boundaries
1803		otherGroup->addChild(new BarrierAcrossSecondaryCase(testCtx, "barrier_across_secondary"));
1804
1805		mainGroup->addChild(otherGroup.release());
1806	}
1807
1808	return mainGroup.release();
1809}
1810
1811} // MeshShader
1812} // vkt
1813