1/*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2021 The Khronos Group Inc.
6 * Copyright (c) 2021 Valve Corporation.
7 * Copyright (c) 2023 LunarG, Inc.
8 * Copyright (c) 2023 Nintendo
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 *      http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 *
22 *//*!
23 * \file
24 * \brief Mesh Shader Smoke Tests for VK_EXT_mesh_shader
25 *//*--------------------------------------------------------------------*/
26
27#include "vktMeshShaderSmokeTestsEXT.hpp"
28#include "vktMeshShaderUtil.hpp"
29#include "vktTestCase.hpp"
30#include "vktTestCaseUtil.hpp"
31
32#include "vkBuilderUtil.hpp"
33#include "vkImageWithMemory.hpp"
34#include "vkBufferWithMemory.hpp"
35#include "vkObjUtil.hpp"
36#include "vkTypeUtil.hpp"
37#include "vkCmdUtil.hpp"
38#include "vkImageUtil.hpp"
39#include "vkBarrierUtil.hpp"
40#include "vkPipelineConstructionUtil.hpp"
41
42#include "tcuImageCompare.hpp"
43#include "tcuTestLog.hpp"
44#include "tcuTextureUtil.hpp"
45
46#include "deRandom.hpp"
47
48#include <utility>
49#include <vector>
50#include <string>
51#include <sstream>
52#include <set>
53#include <memory>
54
55namespace vkt
56{
57namespace MeshShader
58{
59
60namespace
61{
62
63using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
64
65using namespace vk;
66
67std::string commonMeshFragShader ()
68{
69	std::string frag =
70		"#version 450\n"
71		"#extension GL_EXT_mesh_shader : enable\n"
72		"\n"
73		"layout (location=0) in perprimitiveEXT vec4 triangleColor;\n"
74		"layout (location=0) out vec4 outColor;\n"
75		"\n"
76		"void main ()\n"
77		"{\n"
78		"	outColor = triangleColor;\n"
79		"}\n"
80		;
81	return frag;
82}
83
84tcu::Vec4 getClearColor ()
85{
86	return tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f);
87}
88
89void makeMeshGraphicsPipeline (	GraphicsPipelineWrapper&							maker,
90								const PipelineLayoutWrapper&						pipelineLayout,
91								const ShaderWrapper									taskShader,
92								const ShaderWrapper									meshShader,
93								const ShaderWrapper									fragShader,
94								const VkRenderPass									renderPass,
95								const std::vector<VkViewport>&						viewports,
96								const std::vector<VkRect2D>&						scissors,
97								const uint32_t										subpass = 0u,
98								const VkPipelineDepthStencilStateCreateInfo*		depthStencilStateCreateInfo = nullptr,
99								VkPipelineFragmentShadingRateStateCreateInfoKHR*	fragmentShadingRateStateCreateInfo = nullptr)
100{
101#ifndef CTS_USES_VULKANSC
102	maker.setDefaultMultisampleState()
103		 .setDefaultColorBlendState()
104		 .setDefaultRasterizationState()
105		 .setDefaultDepthStencilState()
106		 .setupPreRasterizationMeshShaderState(viewports,
107											   scissors,
108											   pipelineLayout,
109											   renderPass,
110											   subpass,
111											   taskShader,
112											   meshShader,
113											   nullptr,
114											   nullptr,
115											   nullptr,
116											   fragmentShadingRateStateCreateInfo)
117		 .setupFragmentShaderState(pipelineLayout,
118								   renderPass,
119								   subpass,
120								   fragShader,
121								   depthStencilStateCreateInfo)
122		 .setupFragmentOutputState(renderPass, subpass)
123		 .setMonolithicPipelineLayout(pipelineLayout)
124		 .buildPipeline();
125#else
126	DE_ASSERT(false);
127#endif // CTS_USES_VULKANSC
128}
129
130struct MeshTriangleRendererParams
131{
132	PipelineConstructionType	constructionType;
133	std::vector<tcu::Vec4>		vertexCoords;
134	std::vector<uint32_t>		vertexIndices;
135	uint32_t					taskCount;
136	tcu::Vec4					expectedColor;
137	bool						rasterizationDisabled;
138
139	MeshTriangleRendererParams (PipelineConstructionType	constructionType_,
140								std::vector<tcu::Vec4>		vertexCoords_,
141								std::vector<uint32_t>		vertexIndices_,
142								uint32_t					taskCount_,
143								const tcu::Vec4&			expectedColor_,
144								bool						rasterizationDisabled_ = false)
145		: constructionType		(constructionType_)
146		, vertexCoords			(std::move(vertexCoords_))
147		, vertexIndices			(std::move(vertexIndices_))
148		, taskCount				(taskCount_)
149		, expectedColor			(expectedColor_)
150		, rasterizationDisabled	(rasterizationDisabled_)
151	{}
152
153	MeshTriangleRendererParams (MeshTriangleRendererParams&& other)
154		: MeshTriangleRendererParams (other.constructionType,
155									  std::move(other.vertexCoords),
156									  std::move(other.vertexIndices),
157									  other.taskCount,
158									  other.expectedColor,
159									  other.rasterizationDisabled)
160	{}
161};
162
163class MeshOnlyTriangleCase : public vkt::TestCase
164{
165public:
166					MeshOnlyTriangleCase			(tcu::TestContext& testCtx, const std::string& name,
167													 PipelineConstructionType constructionType, bool rasterizationDisabled = false)
168						: vkt::TestCase				(testCtx, name)
169						, m_constructionType		(constructionType)
170						, m_rasterizationDisabled	(rasterizationDisabled)
171						{}
172	virtual			~MeshOnlyTriangleCase	(void) {}
173
174	void			initPrograms			(vk::SourceCollections& programCollection) const override;
175	TestInstance*	createInstance			(Context& context) const override;
176	void			checkSupport			(Context& context) const override;
177
178protected:
179	const PipelineConstructionType	m_constructionType;
180	const bool						m_rasterizationDisabled;
181};
182
183class MeshTaskTriangleCase : public vkt::TestCase
184{
185public:
186					MeshTaskTriangleCase	(tcu::TestContext& testCtx, const std::string& name, PipelineConstructionType constructionType)
187						: vkt::TestCase			(testCtx, name)
188						, m_constructionType	(constructionType)
189						{}
190	virtual			~MeshTaskTriangleCase	(void) {}
191
192	void			initPrograms			(vk::SourceCollections& programCollection) const override;
193	TestInstance*	createInstance			(Context& context) const override;
194	void			checkSupport			(Context& context) const override;
195
196protected:
197	const PipelineConstructionType m_constructionType;
198};
199
200// Note: not actually task-only. The task shader will not emit mesh shader work groups.
201class TaskOnlyTriangleCase : public vkt::TestCase
202{
203public:
204					TaskOnlyTriangleCase	(tcu::TestContext& testCtx, const std::string& name, PipelineConstructionType constructionType)
205						: vkt::TestCase			(testCtx, name)
206						, m_constructionType	(constructionType)
207						{}
208	virtual			~TaskOnlyTriangleCase	(void) {}
209
210	void			initPrograms			(vk::SourceCollections& programCollection) const override;
211	TestInstance*	createInstance			(Context& context) const override;
212	void			checkSupport			(Context& context) const override;
213
214protected:
215	const PipelineConstructionType m_constructionType;
216};
217
218class MeshTriangleRenderer : public vkt::TestInstance
219{
220public:
221						MeshTriangleRenderer	(Context& context, MeshTriangleRendererParams params) : vkt::TestInstance(context), m_params(std::move(params)) {}
222	virtual				~MeshTriangleRenderer	(void) {}
223
224	tcu::TestStatus		iterate					(void) override;
225
226protected:
227	MeshTriangleRendererParams	m_params;
228};
229
230void MeshOnlyTriangleCase::checkSupport (Context& context) const
231{
232	checkTaskMeshShaderSupportEXT(context, false, true);
233	checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_constructionType);
234}
235
236void MeshTaskTriangleCase::checkSupport (Context& context) const
237{
238	checkTaskMeshShaderSupportEXT(context, true, true);
239	checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_constructionType);
240}
241
242void TaskOnlyTriangleCase::checkSupport (Context& context) const
243{
244	checkTaskMeshShaderSupportEXT(context, true, true);
245	checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_constructionType);
246}
247
248void MeshOnlyTriangleCase::initPrograms (SourceCollections& dst) const
249{
250	const auto buildOptions = getMinMeshEXTBuildOptions(dst.usedVulkanVersion);
251
252	std::ostringstream mesh;
253	mesh
254		<< "#version 450\n"
255		<< "#extension GL_EXT_mesh_shader : enable\n"
256		<< "\n"
257		// We will actually output a single triangle and most invocations will do no work.
258		<< "layout(local_size_x=8, local_size_y=4, local_size_z=4) in;\n"
259		<< "layout(triangles) out;\n"
260		<< "layout(max_vertices=256, max_primitives=256) out;\n"
261		<< "\n"
262		// Unique vertex coordinates.
263		<< "layout (set=0, binding=0) uniform CoordsBuffer {\n"
264		<< "    vec4 coords[3];\n"
265		<< "} cb;\n"
266		// Unique vertex indices.
267		<< "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
268		<< "    uint indices[3];\n"
269		<< "} ib;\n"
270		<< "\n"
271		// Triangle color.
272		<< "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
273		<< "\n"
274		<< "void main ()\n"
275		<< "{\n"
276		<< "    SetMeshOutputsEXT(3u, 1u);\n"
277		<< "    triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
278		<< "\n"
279		<< "    const uint vertexIndex = gl_LocalInvocationIndex;\n"
280		<< "    if (vertexIndex < 3u)\n"
281		<< "    {\n"
282		<< "        const uint coordsIndex = ib.indices[vertexIndex];\n"
283		<< "        gl_MeshVerticesEXT[vertexIndex].gl_Position = cb.coords[coordsIndex];\n"
284		<< "    }\n"
285		<< "    if (vertexIndex == 0u)\n"
286		<< "    {\n"
287		<< "        gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
288		<< "    }\n"
289		<< "}\n"
290		;
291	dst.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
292
293	dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader()) << buildOptions;
294}
295
296void MeshTaskTriangleCase::initPrograms (SourceCollections& dst) const
297{
298	const auto buildOptions = getMinMeshEXTBuildOptions(dst.usedVulkanVersion);
299
300	std::string taskDataDecl =
301		"struct TaskData {\n"
302		"    uint triangleIndex;\n"
303		"};\n"
304		"taskPayloadSharedEXT TaskData td;\n"
305		;
306
307	std::ostringstream task;
308	task
309		// Each work group spawns 1 task each (2 in total) and each task will draw 1 triangle.
310		<< "#version 460\n"
311		<< "#extension GL_EXT_mesh_shader : enable\n"
312		<< "\n"
313		<< "layout(local_size_x=8, local_size_y=4, local_size_z=4) in;\n"
314		<< "\n"
315		<< taskDataDecl
316		<< "\n"
317		<< "void main ()\n"
318		<< "{\n"
319		<< "    if (gl_LocalInvocationIndex == 0u)\n"
320		<< "    {\n"
321		<< "        td.triangleIndex = gl_WorkGroupID.x;\n"
322		<< "    }\n"
323		<< "    EmitMeshTasksEXT(1u, 1u, 1u);\n"
324		<< "}\n"
325		;
326		;
327	dst.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
328
329	std::ostringstream mesh;
330	mesh
331		<< "#version 460\n"
332		<< "#extension GL_EXT_mesh_shader : enable\n"
333		<< "\n"
334		// We will actually output a single triangle and most invocations will do no work.
335		<< "layout(local_size_x=8, local_size_y=4, local_size_z=4) in;\n"
336		<< "layout(triangles) out;\n"
337		<< "layout(max_vertices=256, max_primitives=256) out;\n"
338		<< "\n"
339		// Unique vertex coordinates.
340		<< "layout (set=0, binding=0) uniform CoordsBuffer {\n"
341		<< "    vec4 coords[4];\n"
342		<< "} cb;\n"
343		// Unique vertex indices.
344		<< "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
345		<< "    uint indices[6];\n"
346		<< "} ib;\n"
347		<< "\n"
348		// Triangle color.
349		<< "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
350		<< "\n"
351		<< taskDataDecl
352		<< "\n"
353		<< "void main ()\n"
354		<< "{\n"
355		<< "    SetMeshOutputsEXT(3u, 1u);\n"
356		<< "\n"
357		// Each "active" invocation will copy one vertex.
358		<< "    const uint triangleVertex = gl_LocalInvocationIndex;\n"
359		<< "    const uint indexArrayPos  = td.triangleIndex * 3u + triangleVertex;\n"
360		<< "\n"
361		<< "    if (triangleVertex < 3u)\n"
362		<< "    {\n"
363		<< "        const uint coordsIndex = ib.indices[indexArrayPos];\n"
364		// Copy vertex coordinates.
365		<< "        gl_MeshVerticesEXT[triangleVertex].gl_Position = cb.coords[coordsIndex];\n"
366		// Index renumbering: final indices will always be 0, 1, 2.
367		<< "    }\n"
368		<< "    if (triangleVertex == 0u)\n"
369		<< "    {\n"
370		<< "        gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
371		<< "        triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
372		<< "    }\n"
373		<< "}\n"
374		;
375	dst.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
376
377	dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader()) << buildOptions;
378}
379
380void TaskOnlyTriangleCase::initPrograms (SourceCollections& dst) const
381{
382	const auto buildOptions = getMinMeshEXTBuildOptions(dst.usedVulkanVersion);
383
384	// The task shader does not spawn any mesh shader invocations.
385	std::ostringstream task;
386	task
387		<< "#version 450\n"
388		<< "#extension GL_EXT_mesh_shader : enable\n"
389		<< "\n"
390		<< "layout(local_size_x=1) in;\n"
391		<< "\n"
392		<< "void main ()\n"
393		<< "{\n"
394		<< "    EmitMeshTasksEXT(0u, 0u, 0u);\n"
395		<< "}\n"
396		;
397	dst.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
398
399	// Same shader as the mesh only case, but it should not be launched.
400	std::ostringstream mesh;
401	mesh
402		<< "#version 450\n"
403		<< "#extension GL_EXT_mesh_shader : enable\n"
404		<< "\n"
405		// We will actually output a single triangle and most invocations will do no work.
406		<< "layout(local_size_x=8, local_size_y=4, local_size_z=4) in;\n"
407		<< "layout(triangles) out;\n"
408		<< "layout(max_vertices=256, max_primitives=256) out;\n"
409		<< "\n"
410		<< "layout (set=0, binding=0) uniform CoordsBuffer {\n"
411		<< "    vec4 coords[3];\n"
412		<< "} cb;\n"
413		<< "layout (set=0, binding=1, std430) readonly buffer IndexBuffer {\n"
414		<< "    uint indices[3];\n"
415		<< "} ib;\n"
416		<< "\n"
417		<< "layout (location=0) out perprimitiveEXT vec4 triangleColor[];\n"
418		<< "\n"
419		<< "void main ()\n"
420		<< "{\n"
421		<< "    SetMeshOutputsEXT(3u, 1u);\n"
422		<< "    triangleColor[0] = vec4(0.0, 0.0, 1.0, 1.0);\n"
423		<< "\n"
424		<< "    const uint vertexIndex = gl_LocalInvocationIndex;\n"
425		<< "    if (vertexIndex < 3u)\n"
426		<< "    {\n"
427		<< "        const uint coordsIndex = ib.indices[vertexIndex];\n"
428		<< "        gl_MeshVerticesEXT[vertexIndex].gl_Position = cb.coords[coordsIndex];\n"
429		<< "    }\n"
430		<< "    if (vertexIndex == 0u)\n"
431		<< "    {\n"
432		<< "        gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
433		<< "    }\n"
434		<< "}\n"
435		;
436	dst.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
437
438	dst.glslSources.add("frag") << glu::FragmentSource(commonMeshFragShader()) << buildOptions;
439}
440
441TestInstance* MeshOnlyTriangleCase::createInstance (Context& context) const
442{
443	const std::vector<tcu::Vec4>	vertexCoords	=
444	{
445		tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
446		tcu::Vec4(-1.0f,  3.0f, 0.0f, 1.0f),
447		tcu::Vec4( 3.0f, -1.0f, 0.0f, 1.0f),
448	};
449	const std::vector<uint32_t>		vertexIndices	= { 0u, 1u, 2u };
450	const auto						expectedColor	= (m_rasterizationDisabled ? getClearColor() : tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f));
451	MeshTriangleRendererParams		params			(m_constructionType, std::move(vertexCoords), std::move(vertexIndices), 1u, expectedColor, m_rasterizationDisabled);
452
453	return new MeshTriangleRenderer(context, std::move(params));
454}
455
456TestInstance* MeshTaskTriangleCase::createInstance (Context& context) const
457{
458	const std::vector<tcu::Vec4>	vertexCoords	=
459	{
460		tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
461		tcu::Vec4(-1.0f,  1.0f, 0.0f, 1.0f),
462		tcu::Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
463		tcu::Vec4( 1.0f,  1.0f, 0.0f, 1.0f),
464	};
465	const std::vector<uint32_t>		vertexIndices	= { 2u, 0u, 1u, 1u, 3u, 2u };
466	MeshTriangleRendererParams		params			(m_constructionType, std::move(vertexCoords), std::move(vertexIndices), 2u, tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f));
467
468	return new MeshTriangleRenderer(context, std::move(params));
469}
470
471TestInstance* TaskOnlyTriangleCase::createInstance (Context& context) const
472{
473	const std::vector<tcu::Vec4>	vertexCoords	=
474	{
475		tcu::Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
476		tcu::Vec4(-1.0f,  3.0f, 0.0f, 1.0f),
477		tcu::Vec4( 3.0f, -1.0f, 0.0f, 1.0f),
478	};
479	const std::vector<uint32_t>		vertexIndices	= { 0u, 1u, 2u };
480	// Note we expect the clear color.
481	MeshTriangleRendererParams		params			(m_constructionType, std::move(vertexCoords), std::move(vertexIndices), 1u, getClearColor());
482
483	return new MeshTriangleRenderer(context, std::move(params));
484}
485
486tcu::TestStatus MeshTriangleRenderer::iterate ()
487{
488	const auto&		vki					= m_context.getInstanceInterface();
489	const auto&		vkd					= m_context.getDeviceInterface();
490	const auto		physicalDevice		= m_context.getPhysicalDevice();
491	const auto		device				= m_context.getDevice();
492	auto&			alloc				= m_context.getDefaultAllocator();
493	const auto		qIndex				= m_context.getUniversalQueueFamilyIndex();
494	const auto		queue				= m_context.getUniversalQueue();
495
496	const auto		vertexBufferStages	= VK_SHADER_STAGE_MESH_BIT_EXT;
497	const auto		vertexBufferSize	= static_cast<VkDeviceSize>(de::dataSize(m_params.vertexCoords));
498	const auto		vertexBufferUsage	= VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
499	const auto		vertexBufferLoc		= DescriptorSetUpdateBuilder::Location::binding(0u);
500	const auto		vertexBufferType	= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
501
502	const auto		indexBufferStages	= VK_SHADER_STAGE_MESH_BIT_EXT;
503	const auto		indexBufferSize		= static_cast<VkDeviceSize>(de::dataSize(m_params.vertexIndices));
504	const auto		indexBufferUsage	= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
505	const auto		indexBufferLoc		= DescriptorSetUpdateBuilder::Location::binding(1u);
506	const auto		indexBufferType		= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
507
508	// Vertex buffer.
509	const auto			vertexBufferInfo	= makeBufferCreateInfo(vertexBufferSize, vertexBufferUsage);
510	BufferWithMemory	vertexBuffer		(vkd, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
511	auto&				vertexBufferAlloc	= vertexBuffer.getAllocation();
512	void*				vertexBufferDataPtr	= vertexBufferAlloc.getHostPtr();
513
514	deMemcpy(vertexBufferDataPtr, m_params.vertexCoords.data(), static_cast<size_t>(vertexBufferSize));
515	flushAlloc(vkd, device, vertexBufferAlloc);
516
517	// Index buffer.
518	const auto			indexBufferInfo		= makeBufferCreateInfo(indexBufferSize, indexBufferUsage);
519	BufferWithMemory	indexBuffer			(vkd, device, alloc, indexBufferInfo, MemoryRequirement::HostVisible);
520	auto&				indexBufferAlloc	= indexBuffer.getAllocation();
521	void*				indexBufferDataPtr	= indexBufferAlloc.getHostPtr();
522
523	deMemcpy(indexBufferDataPtr, m_params.vertexIndices.data(), static_cast<size_t>(indexBufferSize));
524	flushAlloc(vkd, device, indexBufferAlloc);
525
526	// Color buffer.
527	const auto	colorBufferFormat	= VK_FORMAT_R8G8B8A8_UNORM;
528	const auto	colorBufferExtent	= makeExtent3D(8u, 8u, 1u);
529	const auto	colorBufferUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
530
531	const VkImageCreateInfo colorBufferInfo =
532	{
533		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
534		nullptr,								//	const void*				pNext;
535		0u,										//	VkImageCreateFlags		flags;
536		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
537		colorBufferFormat,						//	VkFormat				format;
538		colorBufferExtent,						//	VkExtent3D				extent;
539		1u,										//	uint32_t				mipLevels;
540		1u,										//	uint32_t				arrayLayers;
541		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
542		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
543		colorBufferUsage,						//	VkImageUsageFlags		usage;
544		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
545		0u,										//	uint32_t				queueFamilyIndexCount;
546		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
547		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
548	};
549	ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
550
551	const auto colorSRR			= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
552	const auto colorBufferView	= makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, colorBufferFormat, colorSRR);
553
554	// Render pass.
555	const auto renderPass = makeRenderPass(vkd, device, colorBufferFormat);
556
557	// Framebuffer.
558	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), colorBufferExtent.width, colorBufferExtent.height);
559
560	// Set layout.
561	DescriptorSetLayoutBuilder layoutBuilder;
562	layoutBuilder.addSingleBinding(vertexBufferType, vertexBufferStages);
563	layoutBuilder.addSingleBinding(indexBufferType, indexBufferStages);
564	const auto setLayout = layoutBuilder.build(vkd, device);
565
566	// Descriptor pool.
567	DescriptorPoolBuilder poolBuilder;
568	poolBuilder.addType(vertexBufferType);
569	poolBuilder.addType(indexBufferType);
570	const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
571
572	// Descriptor set.
573	const auto descriptorSet = makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
574
575	// Update descriptor set.
576	DescriptorSetUpdateBuilder updateBuilder;
577	const auto vertexBufferDescInfo	= makeDescriptorBufferInfo(vertexBuffer.get(), 0ull, vertexBufferSize);
578	const auto indexBufferDescInfo	= makeDescriptorBufferInfo(indexBuffer.get(), 0ull, indexBufferSize);
579	updateBuilder.writeSingle(descriptorSet.get(), vertexBufferLoc, vertexBufferType, &vertexBufferDescInfo);
580	updateBuilder.writeSingle(descriptorSet.get(), indexBufferLoc, indexBufferType, &indexBufferDescInfo);
581	updateBuilder.update(vkd, device);
582
583	// Pipeline layout.
584	const PipelineLayoutWrapper pipelineLayout (m_params.constructionType, vkd, device, setLayout.get());
585
586	// Shader modules.
587	ShaderWrapper			taskModule;
588	ShaderWrapper			fragModule;
589	const auto&				binaries = m_context.getBinaryCollection();
590
591	if (binaries.contains("task"))
592		taskModule = ShaderWrapper(vkd, device, binaries.get("task"), 0u);
593	if (!m_params.rasterizationDisabled)
594		fragModule = ShaderWrapper(vkd, device, binaries.get("frag"), 0u);
595	const auto meshModule = ShaderWrapper(vkd, device, binaries.get("mesh"), 0u);
596
597	// Graphics pipeline.
598	std::vector<VkViewport>	viewports		(1u, makeViewport(colorBufferExtent));
599	std::vector<VkRect2D>	scissors		(1u, makeRect2D(colorBufferExtent));
600	GraphicsPipelineWrapper pipelineMaker	(vki, vkd, physicalDevice, device, m_context.getDeviceExtensions(), m_params.constructionType);
601
602	makeMeshGraphicsPipeline(pipelineMaker, pipelineLayout, taskModule, meshModule, fragModule, renderPass.get(), viewports, scissors);
603	const auto				pipeline		= pipelineMaker.getPipeline();
604
605	// Command pool and buffer.
606	const auto cmdPool			= makeCommandPool(vkd, device, qIndex);
607	const auto cmdBufferPtr		= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
608	const auto cmdBuffer		= cmdBufferPtr.get();
609
610	// Output buffer.
611	const auto	tcuFormat		= mapVkFormat(colorBufferFormat);
612	const auto	outBufferSize	= static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * colorBufferExtent.width * colorBufferExtent.height);
613	const auto	outBufferUsage	= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
614	const auto	outBufferInfo	= makeBufferCreateInfo(outBufferSize, outBufferUsage);
615	BufferWithMemory outBuffer (vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible);
616	auto&		outBufferAlloc	= outBuffer.getAllocation();
617	void*		outBufferData	= outBufferAlloc.getHostPtr();
618
619	// Draw triangle.
620	beginCommandBuffer(vkd, cmdBuffer);
621	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), getClearColor());
622	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &descriptorSet.get(), 0u, nullptr);
623	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
624	vkd.cmdDrawMeshTasksEXT(cmdBuffer, m_params.taskCount, 1u, 1u);
625	endRenderPass(vkd, cmdBuffer);
626
627	// Copy color buffer to output buffer.
628	const tcu::IVec3 imageDim	(static_cast<int>(colorBufferExtent.width), static_cast<int>(colorBufferExtent.height), static_cast<int>(colorBufferExtent.depth));
629	const tcu::IVec2 imageSize	(imageDim.x(), imageDim.y());
630
631	copyImageToBuffer(vkd, cmdBuffer, colorBuffer.get(), outBuffer.get(), imageSize);
632	endCommandBuffer(vkd, cmdBuffer);
633	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
634
635	// Invalidate alloc.
636	invalidateAlloc(vkd, device, outBufferAlloc);
637	tcu::ConstPixelBufferAccess outPixels(tcuFormat, imageDim, outBufferData);
638
639	auto& log = m_context.getTestContext().getLog();
640	const tcu::Vec4 threshold (0.0f); // The color can be represented exactly.
641
642	if (!tcu::floatThresholdCompare(log, "Result", "", m_params.expectedColor, outPixels, threshold, tcu::COMPARE_LOG_EVERYTHING))
643		return tcu::TestStatus::fail("Failed; check log for details");
644
645	return tcu::TestStatus::pass("Pass");
646}
647
648VkExtent3D gradientImageExtent ()
649{
650	return makeExtent3D(256u, 256u, 1u);
651}
652
653struct GradientParams
654{
655	tcu::Maybe<FragmentSize> fragmentSize;
656	PipelineConstructionType constructionType;
657
658	GradientParams (const tcu::Maybe<FragmentSize>& fragmentSize_, PipelineConstructionType constructionType_)
659		: fragmentSize		(fragmentSize_)
660		, constructionType	(constructionType_)
661		{}
662};
663
664void checkMeshSupport (Context& context, GradientParams params)
665{
666	checkTaskMeshShaderSupportEXT(context, false, true);
667
668	if (static_cast<bool>(params.fragmentSize))
669	{
670		const auto& features = context.getMeshShaderFeaturesEXT();
671		if (!features.primitiveFragmentShadingRateMeshShader)
672			TCU_THROW(NotSupportedError, "Primitive fragment shading rate not supported in mesh shaders");
673	}
674
675	checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), params.constructionType);
676}
677
678void initGradientPrograms (vk::SourceCollections& programCollection, GradientParams params)
679{
680	const auto buildOptions	= getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
681	const auto extent		= gradientImageExtent();
682
683	std::ostringstream frag;
684	frag
685		<< "#version 450\n"
686		<< "\n"
687		<< "layout (location=0) in  vec4 inColor;\n"
688		<< "layout (location=0) out vec4 outColor;\n"
689		<< "\n"
690		<< "void main ()\n"
691		<< "{\n"
692		<< "    outColor = inColor;\n"
693		<< "}\n"
694		;
695	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
696
697	std::string fragmentSizeStr;
698	const auto useFragmentSize	= static_cast<bool>(params.fragmentSize);
699
700	if (useFragmentSize)
701	{
702		const auto& fragSize = params.fragmentSize.get();
703		fragmentSizeStr	= getGLSLShadingRateMask(fragSize);
704
705		const auto val	= getSPVShadingRateValue(fragSize);
706		DE_ASSERT(val != 0);
707		DE_UNREF(val); // For release builds.
708	}
709
710	std::ostringstream mesh;
711	mesh
712		<< "#version 450\n"
713		<< "#extension GL_EXT_mesh_shader : enable\n"
714		;
715
716	if (useFragmentSize)
717		mesh << "#extension GL_EXT_fragment_shading_rate : enable\n";
718
719	mesh
720		<< "\n"
721		<< "layout(local_size_x=4) in;\n"
722		<< "layout(triangles) out;\n"
723		<< "layout(max_vertices=256, max_primitives=256) out;\n"
724		<< "\n"
725		<< "layout (location=0) out vec4 outColor[];\n"
726		<< "\n"
727		;
728
729	if (useFragmentSize)
730	{
731		mesh
732			<< "perprimitiveEXT out gl_MeshPerPrimitiveEXT {\n"
733			<< "   int gl_PrimitiveShadingRateEXT;\n"
734			<< "} gl_MeshPrimitivesEXT[];\n"
735			<< "\n"
736			;
737	}
738
739	mesh
740		<< "void main ()\n"
741		<< "{\n"
742		<< "    SetMeshOutputsEXT(4u, 2u);\n"
743		<< "\n"
744		<< "    const uint vertex    = gl_LocalInvocationIndex;\n"
745		<< "    const uint primitive = gl_LocalInvocationIndex;\n"
746		<< "\n"
747		<< "    const vec4 topLeft      = vec4(-1.0, -1.0, 0.0, 1.0);\n"
748		<< "    const vec4 botLeft      = vec4(-1.0,  1.0, 0.0, 1.0);\n"
749		<< "    const vec4 topRight     = vec4( 1.0, -1.0, 0.0, 1.0);\n"
750		<< "    const vec4 botRight     = vec4( 1.0,  1.0, 0.0, 1.0);\n"
751		<< "    const vec4 positions[4] = vec4[](topLeft, botLeft, topRight, botRight);\n"
752		<< "\n"
753		// Green changes according to the width.
754		// Blue changes according to the height.
755		// Value 0 at the center of the first pixel and value 1 at the center of the last pixel.
756		<< "    const float width      = " << extent.width << ";\n"
757		<< "    const float height     = " << extent.height << ";\n"
758		<< "    const float halfWidth  = (1.0 / (width - 1.0)) / 2.0;\n"
759		<< "    const float halfHeight = (1.0 / (height - 1.0)) / 2.0;\n"
760		<< "    const float minGreen   = -halfWidth;\n"
761		<< "    const float maxGreen   = 1.0+halfWidth;\n"
762		<< "    const float minBlue    = -halfHeight;\n"
763		<< "    const float maxBlue    = 1.0+halfHeight;\n"
764		<< "    const vec4  colors[4]  = vec4[](\n"
765		<< "        vec4(0, minGreen, minBlue, 1.0),\n"
766		<< "        vec4(0, minGreen, maxBlue, 1.0),\n"
767		<< "        vec4(0, maxGreen, minBlue, 1.0),\n"
768		<< "        vec4(0, maxGreen, maxBlue, 1.0)\n"
769		<< "    );\n"
770		<< "\n"
771		<< "    const uvec3 indices[2] = uvec3[](\n"
772		<< "        uvec3(0, 1, 2),\n"
773		<< "        uvec3(1, 3, 2)\n"
774		<< "    );\n"
775		<< "    if (vertex < 4u)\n"
776		<< "    {\n"
777		<< "        gl_MeshVerticesEXT[vertex].gl_Position = positions[vertex];\n"
778		<< "        outColor[vertex] = colors[vertex];\n"
779		<< "    }\n"
780		<< "    if (primitive < 2u)\n"
781		<< "    {\n"
782		;
783
784	if (useFragmentSize)
785	{
786		mesh
787			<< "        gl_MeshPrimitivesEXT[primitive].gl_PrimitiveShadingRateEXT = " << fragmentSizeStr << ";\n"
788			;
789	}
790
791	mesh
792		<< "        gl_PrimitiveTriangleIndicesEXT[primitive] = indices[primitive];\n"
793		<< "    }\n"
794		<< "}\n"
795		;
796		;
797	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
798}
799
800std::string coordColorFormat (int x, int y, const tcu::Vec4& color)
801{
802	std::ostringstream msg;
803	msg << "[" << x << ", " << y << "]=(" << color.x() << ", " << color.y() << ", " << color.z() << ", " << color.w() << ")";
804	return msg.str();
805}
806
807tcu::TestStatus testFullscreenGradient (Context& context, GradientParams params)
808{
809	const auto&		vki					= context.getInstanceInterface();
810	const auto&		vkd					= context.getDeviceInterface();
811	const auto		physicalDevice		= context.getPhysicalDevice();
812	const auto		device				= context.getDevice();
813	auto&			alloc				= context.getDefaultAllocator();
814	const auto		qIndex				= context.getUniversalQueueFamilyIndex();
815	const auto		queue				= context.getUniversalQueue();
816	const auto		useFragmentSize		= static_cast<bool>(params.fragmentSize);
817	const auto		defaultFragmentSize	= FragmentSize::SIZE_1X1;
818	const auto		rateSize			= getShadingRateSize(useFragmentSize ? params.fragmentSize.get() : defaultFragmentSize);
819
820	// Color buffer.
821	const auto	colorBufferFormat	= VK_FORMAT_R8G8B8A8_UNORM;
822	const auto	colorBufferExtent	= makeExtent3D(256u, 256u, 1u); // Big enough for a detailed gradient, small enough to get unique colors.
823	const auto	colorBufferUsage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
824
825	const VkImageCreateInfo colorBufferInfo =
826	{
827		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
828		nullptr,								//	const void*				pNext;
829		0u,										//	VkImageCreateFlags		flags;
830		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
831		colorBufferFormat,						//	VkFormat				format;
832		colorBufferExtent,						//	VkExtent3D				extent;
833		1u,										//	uint32_t				mipLevels;
834		1u,										//	uint32_t				arrayLayers;
835		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
836		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
837		colorBufferUsage,						//	VkImageUsageFlags		usage;
838		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
839		0u,										//	uint32_t				queueFamilyIndexCount;
840		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
841		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
842	};
843	ImageWithMemory colorBuffer(vkd, device, alloc, colorBufferInfo, MemoryRequirement::Any);
844
845	const auto colorSRR			= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
846	const auto colorBufferView	= makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D, colorBufferFormat, colorSRR);
847
848	// Render pass.
849	const auto renderPass = makeRenderPass(vkd, device, colorBufferFormat);
850
851	// Framebuffer.
852	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), colorBufferExtent.width, colorBufferExtent.height);
853
854	// Set layout.
855	DescriptorSetLayoutBuilder layoutBuilder;
856	const auto setLayout = layoutBuilder.build(vkd, device);
857
858	// Pipeline layout.
859	const PipelineLayoutWrapper pipelineLayout (params.constructionType, vkd, device, setLayout.get());
860
861	// Shader modules.
862	ShaderWrapper			taskModule;
863	const auto&				binaries = context.getBinaryCollection();
864
865	const auto meshModule = ShaderWrapper(vkd, device, binaries.get("mesh"), 0u);
866	const auto fragModule = ShaderWrapper(vkd, device, binaries.get("frag"), 0u);
867
868	using ShadingRateInfoPtr = de::MovePtr<VkPipelineFragmentShadingRateStateCreateInfoKHR>;
869	ShadingRateInfoPtr pNext;
870	if (useFragmentSize)
871	{
872		pNext = ShadingRateInfoPtr(new VkPipelineFragmentShadingRateStateCreateInfoKHR);
873		*pNext = initVulkanStructure();
874
875		pNext->fragmentSize		= getShadingRateSize(FragmentSize::SIZE_1X1); // 1x1 will not be used as the primitive rate in tests with fragment size.
876		pNext->combinerOps[0]	= VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR;
877		pNext->combinerOps[1]	= VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR;
878	}
879
880	// Graphics pipeline.
881	std::vector<VkViewport>	viewports		(1u, makeViewport(colorBufferExtent));
882	std::vector<VkRect2D>	scissors		(1u, makeRect2D(colorBufferExtent));
883	GraphicsPipelineWrapper pipelineMaker	(vki, vkd, physicalDevice, device, context.getDeviceExtensions(), params.constructionType);
884
885	makeMeshGraphicsPipeline(pipelineMaker, pipelineLayout,
886							 taskModule, meshModule, fragModule,
887							 renderPass.get(), viewports, scissors, 0u, nullptr, pNext.get());
888	const auto pipeline = pipelineMaker.getPipeline();
889
890	// Command pool and buffer.
891	const auto cmdPool			= makeCommandPool(vkd, device, qIndex);
892	const auto cmdBufferPtr		= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
893	const auto cmdBuffer		= cmdBufferPtr.get();
894
895	// Output buffer.
896	const auto	tcuFormat		= mapVkFormat(colorBufferFormat);
897	const auto	outBufferSize	= static_cast<VkDeviceSize>(static_cast<uint32_t>(tcu::getPixelSize(tcuFormat)) * colorBufferExtent.width * colorBufferExtent.height);
898	const auto	outBufferUsage	= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
899	const auto	outBufferInfo	= makeBufferCreateInfo(outBufferSize, outBufferUsage);
900	BufferWithMemory outBuffer (vkd, device, alloc, outBufferInfo, MemoryRequirement::HostVisible);
901	auto&		outBufferAlloc	= outBuffer.getAllocation();
902	void*		outBufferData	= outBufferAlloc.getHostPtr();
903
904	// Draw triangles.
905	beginCommandBuffer(vkd, cmdBuffer);
906	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), getClearColor());
907	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
908	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
909	endRenderPass(vkd, cmdBuffer);
910
911	// Copy color buffer to output buffer.
912	const tcu::IVec3 imageDim	(static_cast<int>(colorBufferExtent.width), static_cast<int>(colorBufferExtent.height), static_cast<int>(colorBufferExtent.depth));
913	const tcu::IVec2 imageSize	(imageDim.x(), imageDim.y());
914
915	copyImageToBuffer(vkd, cmdBuffer, colorBuffer.get(), outBuffer.get(), imageSize);
916	endCommandBuffer(vkd, cmdBuffer);
917	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
918
919	// Invalidate alloc.
920	invalidateAlloc(vkd, device, outBufferAlloc);
921	tcu::ConstPixelBufferAccess outPixels(tcuFormat, imageDim, outBufferData);
922
923	// Create reference image.
924	tcu::TextureLevel		refLevel	(tcuFormat, imageDim.x(), imageDim.y(), imageDim.z());
925	tcu::PixelBufferAccess	refAccess	(refLevel);
926	for (int y = 0; y < imageDim.y(); ++y)
927		for (int x = 0; x < imageDim.x(); ++x)
928		{
929			const tcu::IVec4 color (0, x, y, 255);
930			refAccess.setPixel(color, x, y);
931		}
932
933	const tcu::TextureFormat	maskFormat	(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8);
934	tcu::TextureLevel			errorMask	(maskFormat, imageDim.x(), imageDim.y(), imageDim.z());
935	tcu::PixelBufferAccess		errorAccess	(errorMask);
936	const tcu::Vec4				green		(0.0f, 1.0f, 0.0f, 1.0f);
937	const tcu::Vec4				red			(1.0f, 0.0f, 0.0f, 1.0f);
938	auto&						log			= context.getTestContext().getLog();
939
940	// Each block needs to have the same color and be equal to one of the pixel colors of that block in the reference image.
941	const auto blockWidth	= static_cast<int>(rateSize.width);
942	const auto blockHeight	= static_cast<int>(rateSize.height);
943
944	tcu::clear(errorAccess, green);
945	bool globalFail = false;
946
947	for (int y = 0; y < imageDim.y() / blockHeight; ++y)
948		for (int x = 0; x < imageDim.x() / blockWidth; ++x)
949		{
950			bool					blockFail	= false;
951			std::vector<tcu::Vec4>	candidates;
952
953			candidates.reserve(rateSize.width * rateSize.height);
954
955			const auto cornerY		= y * blockHeight;
956			const auto cornerX		= x * blockWidth;
957			const auto cornerColor	= outPixels.getPixel(cornerX, cornerY);
958
959			for (int blockY = 0; blockY < blockHeight; ++blockY)
960				for (int blockX = 0; blockX < blockWidth; ++blockX)
961				{
962					const auto absY		= cornerY + blockY;
963					const auto absX		= cornerX + blockX;
964					const auto resColor	= outPixels.getPixel(absX, absY);
965
966					candidates.push_back(refAccess.getPixel(absX, absY));
967
968					if (cornerColor != resColor)
969					{
970						std::ostringstream msg;
971						msg << "Block not uniform: "
972							<< coordColorFormat(cornerX, cornerY, cornerColor)
973							<< " vs "
974							<< coordColorFormat(absX, absY, resColor);
975						log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
976
977						blockFail = true;
978					}
979				}
980
981			if (!de::contains(begin(candidates), end(candidates), cornerColor))
982			{
983				std::ostringstream msg;
984				msg << "Block color does not match any reference color at [" << cornerX << ", " << cornerY << "]";
985				log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
986				blockFail = true;
987			}
988
989			if (blockFail)
990			{
991				const auto blockAccess = tcu::getSubregion(errorAccess, cornerX, cornerY, blockWidth, blockHeight);
992				tcu::clear(blockAccess, red);
993				globalFail = true;
994			}
995		}
996
997	if (globalFail)
998	{
999		log << tcu::TestLog::Image("Result", "", outPixels);
1000		log << tcu::TestLog::Image("Reference", "", refAccess);
1001		log << tcu::TestLog::Image("ErrorMask", "", errorAccess);
1002
1003		TCU_FAIL("Color mismatch; check log for more details");
1004	}
1005
1006	return tcu::TestStatus::pass("Pass");
1007}
1008
1009// Smoke test that emits one triangle per pixel plus one more global background triangle, but doesn't use every triangle. It only
1010// draws half the front triangles. It gets information from a mix of vertex buffers, per primitive buffers and push constants.
1011struct PartialUsageParams
1012{
1013	PipelineConstructionType	constructionType;
1014	bool						compactVertices;
1015};
1016
1017class PartialUsageCase : public vkt::TestCase
1018{
1019public:
1020	static constexpr uint32_t kWidth            = 16u;
1021	static constexpr uint32_t kHeight           = 16u;
1022	static constexpr uint32_t kLocalInvocations = 64u;
1023	static constexpr uint32_t kMaxPrimitives    = kLocalInvocations;
1024	static constexpr uint32_t kMaxVertices      = kMaxPrimitives * 3u;
1025	static constexpr uint32_t kNumWorkGroups    = 2u;
1026	static constexpr uint32_t kTotalPrimitives  = kNumWorkGroups * kMaxPrimitives;
1027
1028					PartialUsageCase	(tcu::TestContext& testCtx, const std::string& name, const PartialUsageParams& params)
1029						: vkt::TestCase(testCtx, name)
1030						, m_params(params)
1031						{}
1032	virtual			~PartialUsageCase	(void) {}
1033
1034	void			checkSupport		(Context& context) const override;
1035	void			initPrograms		(vk::SourceCollections& programCollection) const override;
1036	TestInstance*	createInstance		(Context& context) const override;
1037
1038	struct IndexAndColor
1039	{
1040		uint32_t	index;
1041		float		color;
1042	};
1043
1044	struct PushConstants
1045	{
1046		uint32_t	totalTriangles;
1047		float		depth;
1048		float		red;
1049	};
1050
1051protected:
1052	PartialUsageParams m_params;
1053};
1054
1055class PartialUsageInstance : public vkt::TestInstance
1056{
1057public:
1058						PartialUsageInstance	(Context& context, PipelineConstructionType constructionType)
1059							: vkt::TestInstance		(context)
1060							, m_constructionType	(constructionType)
1061							{}
1062	virtual				~PartialUsageInstance	(void) {}
1063
1064	tcu::TestStatus		iterate					(void) override;
1065
1066protected:
1067	const PipelineConstructionType m_constructionType;
1068};
1069
1070void PartialUsageCase::checkSupport (Context& context) const
1071{
1072	checkTaskMeshShaderSupportEXT(context, true, true);
1073	checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_params.constructionType);
1074}
1075
1076TestInstance* PartialUsageCase::createInstance (Context &context) const
1077{
1078	return new PartialUsageInstance(context, m_params.constructionType);
1079}
1080
1081void PartialUsageCase::initPrograms (vk::SourceCollections &programCollection) const
1082{
1083	const auto buildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1084
1085	// The task shader will always emit two mesh shader work groups, which may do some work.
1086	std::ostringstream task;
1087	task
1088		<< "#version 450\n"
1089		<< "#extension GL_EXT_mesh_shader : enable\n"
1090		<< "\n"
1091		<< "layout (local_size_x=1) in;\n"
1092		<< "\n"
1093		<< "void main ()\n"
1094		<< "{\n"
1095		<< "    EmitMeshTasksEXT(" << kNumWorkGroups << ", 1u, 1u);\n"
1096		<< "}\n"
1097		;
1098	programCollection.glslSources.add("task") << glu::TaskSource(task.str()) << buildOptions;
1099
1100	// The frag shader will color the output with the indicated color;
1101	std::ostringstream frag;
1102	frag
1103		<< "#version 450\n"
1104		<< "#extension GL_EXT_mesh_shader : enable\n"
1105		<< "\n"
1106		<< "layout (location=0) perprimitiveEXT in vec4 primitiveColor;\n"
1107		<< "layout (location=0) out vec4 outColor;\n"
1108		<< "\n"
1109		<< "void main ()\n"
1110		<< "{\n"
1111		<< "    outColor = primitiveColor;\n"
1112		<< "}\n"
1113		;
1114	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str()) << buildOptions;
1115
1116	// The mesh shader reads primitive indices and vertices data from buffers and push constants. The primitive data block contains
1117	// primitive indices and primitive colors that must be read by the current invocation using an index that depends on its global
1118	// invocation index. The primitive index allows access into the triangle vertices buffer. Depending on the current work group
1119	// index and total number of triangles (set by push constants), the current invocation may have to emit a primitive or not.
1120	//
1121	// In addition, the non-compacted variant emits some extra unused vertices at the start of the array.
1122	const auto kExtraVertices		= (m_params.compactVertices ? 0u : kLocalInvocations);
1123	const auto kLocationMaxVertices	= kMaxVertices + kExtraVertices;
1124
1125	if (!m_params.compactVertices)
1126		DE_ASSERT(kLocationMaxVertices <= 256u);
1127
1128	std::ostringstream mesh;
1129	mesh
1130		<< "#version 450\n"
1131		<< "#extension GL_EXT_mesh_shader : enable\n"
1132		<< "\n"
1133		<< "layout (local_size_x=" << kLocalInvocations << ", local_size_y=1, local_size_z=1) in;\n"
1134		<< "layout (triangles) out;\n"
1135		<< "layout (max_vertices=" << kLocationMaxVertices << ", max_primitives=" << kMaxPrimitives << ") out;\n"
1136		<< "\n"
1137		<< "layout (location=0) perprimitiveEXT out vec4 primitiveColor[];\n"
1138		<< "\n"
1139		<< "layout (set=0, binding=0, std430) readonly buffer VerticesBlock {\n"
1140		<< "    vec2 coords[];\n" // 3 vertices per triangle.
1141		<< "} vertex;\n"
1142		<< "\n"
1143		<< "struct IndexAndColor {\n"
1144		<< "    uint  index;\n"   // Triangle index (for accessing the coordinates buffer above).
1145		<< "    float color;\n"   // Triangle blue color component.
1146		<< "};\n"
1147		<< "\n"
1148		<< "layout (set=0, binding=1, std430) readonly buffer PrimitiveDataBlock {\n"
1149		<< "    IndexAndColor data[];\n"
1150		<< "} primitive;\n"
1151		<< "\n"
1152		<< "layout (push_constant, std430) uniform PushConstantBlock {\n"
1153		<< "    uint  totalTriangles;\n" // How many triangles in total we have to emit.
1154		<< "    float depth;\n"          // Triangle depth (allows painting the background with a different color).
1155		<< "    float red;\n"            // Triangle red color component.
1156		<< "} pc;\n"
1157		<< "\n"
1158		<< "void main ()\n"
1159		<< "{\n"
1160		// First primitive for this work group, plus the work group primitive and vertex count.
1161		<< "    const uint firstPrimitive   = gl_WorkGroupID.x * gl_WorkGroupSize.x;\n"
1162		<< "    const uint wgTriangleCount  = ((pc.totalTriangles >= firstPrimitive) ? min(pc.totalTriangles - firstPrimitive, " << kLocalInvocations << ") : 0u);\n"
1163		<< "    const uint wgVertexCount    = wgTriangleCount * 3u + " << kExtraVertices << "u;\n"
1164		<< "\n"
1165		;
1166
1167	if (!m_params.compactVertices)
1168	{
1169		// Produce extra unused vertices.
1170		mesh
1171			<< "    {\n"
1172			<< "        const float proportion = float(gl_LocalInvocationIndex) / float(gl_WorkGroupSize.x);\n"
1173			<< "        gl_MeshVerticesEXT[gl_LocalInvocationIndex].gl_Position = vec4(proportion, 1.0 - proportion, pc.depth, 1.0);\n"
1174			<< "    }\n"
1175			<< "\n"
1176			;
1177	}
1178
1179	mesh
1180		<< "    SetMeshOutputsEXT(wgVertexCount, wgTriangleCount);\n"
1181		<< "\n"
1182		// Calculate global invocation primitive id, and use it to access the per-primitive buffer. From there, get the primitive index in the
1183		// vertex buffer and the blue color component.
1184		<< "    if (gl_LocalInvocationIndex < wgTriangleCount) {\n"
1185		<< "        const uint  primitiveID         = firstPrimitive + gl_LocalInvocationIndex;\n"
1186		<< "        const uint  primitiveIndex      = primitive.data[primitiveID].index;\n"
1187		<< "        const float blue                = primitive.data[primitiveID].color;\n"
1188		<< "        const uint  firstVertexIndex    = primitiveIndex * 3u;\n"
1189		<< "        const uvec3 globalVertexIndices = uvec3(firstVertexIndex, firstVertexIndex+1u, firstVertexIndex+2u);\n"
1190		<< "        const uint  localPrimitiveID    = gl_LocalInvocationIndex;\n"
1191		<< "        const uint  firstLocalVertex    = localPrimitiveID * 3u + " << kExtraVertices << "u;\n"
1192		<< "        const uvec3 localVertexIndices  = uvec3(firstLocalVertex, firstLocalVertex+1u, firstLocalVertex+2u);\n"
1193		<< "\n"
1194		<< "        gl_MeshVerticesEXT[localVertexIndices.x].gl_Position = vec4(vertex.coords[globalVertexIndices.x], pc.depth, 1.0);\n"
1195		<< "        gl_MeshVerticesEXT[localVertexIndices.y].gl_Position = vec4(vertex.coords[globalVertexIndices.y], pc.depth, 1.0);\n"
1196		<< "        gl_MeshVerticesEXT[localVertexIndices.z].gl_Position = vec4(vertex.coords[globalVertexIndices.z], pc.depth, 1.0);\n"
1197		<< "\n"
1198		<< "        gl_PrimitiveTriangleIndicesEXT[localPrimitiveID] = localVertexIndices;\n"
1199		<< "        primitiveColor[localPrimitiveID]                 = vec4(pc.red, 0.0, blue, 1.0f);\n"
1200		<< "    }\n"
1201		<< "}\n"
1202		;
1203	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << buildOptions;
1204}
1205
1206inline float pixelToFBCoords (uint32_t pixelId, uint32_t totalPixels)
1207{
1208	return (static_cast<float>(pixelId) + 0.5f) / static_cast<float>(totalPixels) * 2.0f - 1.0f;
1209}
1210
1211tcu::TestStatus PartialUsageInstance::iterate ()
1212{
1213	const auto&			vki					= m_context.getInstanceInterface();
1214	const auto&			vkd					= m_context.getDeviceInterface();
1215	const auto			physicalDevice		= m_context.getPhysicalDevice();
1216	const auto			device				= m_context.getDevice();
1217	const auto			queueIndex			= m_context.getUniversalQueueFamilyIndex();
1218	const auto			queue				= m_context.getUniversalQueue();
1219	auto&				alloc				= m_context.getDefaultAllocator();
1220	const auto			bufferUsage			= VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
1221	const auto			bufferDescType		= VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1222	const auto			bufferDescStages	= VK_SHADER_STAGE_MESH_BIT_EXT;
1223	const auto			pcSize				= static_cast<VkDeviceSize>(sizeof(PartialUsageCase::PushConstants));
1224	const auto			pcStages			= bufferDescStages;
1225	const auto			pcRange				= makePushConstantRange(pcStages, 0u, static_cast<uint32_t>(pcSize));
1226	const auto			fbExtent			= makeExtent3D(PartialUsageCase::kWidth, PartialUsageCase::kHeight, 1u);
1227	const tcu::IVec3	iExtent				(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height), static_cast<int>(fbExtent.depth));
1228	const auto			colorFormat			= VK_FORMAT_R8G8B8A8_UNORM;
1229	const auto			colorTcuFormat		= mapVkFormat(colorFormat);
1230	const auto			dsFormat			= VK_FORMAT_D16_UNORM;
1231	const auto			vertexSize			= sizeof(tcu::Vec2);
1232	const auto			verticesPerTriangle	= 3u;
1233	const auto			pixelCount			= fbExtent.width * fbExtent.height * fbExtent.depth;
1234	const auto			vertexCount			= pixelCount * verticesPerTriangle;
1235	const auto			triangleSize		= vertexSize * verticesPerTriangle;
1236	const auto			colorThreshold		= 0.005f; // 1/255 < 0.005 < 2/255
1237	const float			fgRed				= 0.0f;
1238	const float			bgRed				= 1.0f;
1239	const float			bgBlue				= 1.0f;
1240
1241	// Quarter of the pixel width and height in framebuffer coordinates.
1242	const float			pixelWidth4			= 2.0f / (static_cast<float>(fbExtent.width) * 4.0f);
1243	const float			pixelHeight4		= 2.0f / (static_cast<float>(fbExtent.height) * 4.0f);
1244
1245	// Offsets for each triangle vertex from the pixel center.
1246	//	+-------------------+
1247	//	|         2         |
1248	//	|         x         |
1249	//	|        x x        |
1250	//	|       x   x       |
1251	//	|      x  x  x      |
1252	//	|     x       x     |
1253	//	|    xxxxxxxxxxx    |
1254	//	|   0           1   |
1255	//	+-------------------+
1256	const std::vector<tcu::Vec2> offsets
1257	{
1258		tcu::Vec2(-pixelWidth4, +pixelHeight4),
1259		tcu::Vec2(+pixelWidth4, +pixelHeight4),
1260		tcu::Vec2(        0.0f, -pixelHeight4),
1261	};
1262
1263	// We'll use two draw calls: triangles on the front and triangle that sets the background color, so we need two vertex buffers
1264	// and two primitive data buffers.
1265	const auto			vertexBufferFrontSize	= static_cast<VkDeviceSize>(triangleSize * pixelCount);
1266	const auto			vertexBufferFrontInfo	= makeBufferCreateInfo(vertexBufferFrontSize, bufferUsage);
1267	BufferWithMemory	vertexBufferFront		(vkd, device, alloc, vertexBufferFrontInfo, MemoryRequirement::HostVisible);
1268	auto&				vertexBufferFrontAlloc	= vertexBufferFront.getAllocation();
1269	void*				vertexBufferFrontData	= vertexBufferFrontAlloc.getHostPtr();
1270
1271	std::vector<tcu::Vec2> trianglePerPixel;
1272	trianglePerPixel.reserve(vertexCount);
1273
1274	// Fill front vertex buffer.
1275	for (uint32_t y = 0u; y < PartialUsageCase::kHeight; ++y)
1276		for (uint32_t x = 0u; x < PartialUsageCase::kWidth; ++x)
1277			for (uint32_t v = 0u; v < verticesPerTriangle; ++v)
1278			{
1279				const auto&	offset = offsets.at(v);
1280				const auto	xCoord = pixelToFBCoords(x, PartialUsageCase::kWidth) + offset.x();
1281				const auto	yCoord = pixelToFBCoords(y, PartialUsageCase::kHeight) + offset.y();
1282				trianglePerPixel.emplace_back(xCoord, yCoord);
1283			}
1284	deMemcpy(vertexBufferFrontData, trianglePerPixel.data(), de::dataSize(trianglePerPixel));
1285
1286	// For the front triangles we will select some pixels randomly.
1287	using IndexAndColor = PartialUsageCase::IndexAndColor;
1288
1289	std::set<uint32_t>			selectedPixels;
1290	std::vector<IndexAndColor>	indicesAndColors;
1291	de::Random					rnd					(1646058327u);
1292	const auto					maxId				= static_cast<int>(pixelCount) - 1;
1293	const auto					fTotalTriangles		= static_cast<float>(PartialUsageCase::kTotalPrimitives);
1294
1295	while (selectedPixels.size() < PartialUsageCase::kTotalPrimitives)
1296	{
1297		const auto pixelId = static_cast<uint32_t>(rnd.getInt(0, maxId));
1298		if (!selectedPixels.count(pixelId))
1299		{
1300			selectedPixels.insert(pixelId);
1301
1302			const float			colorVal		= static_cast<float>(selectedPixels.size()) / fTotalTriangles;
1303			const IndexAndColor	indexAndColor	{ pixelId, colorVal };
1304
1305			indicesAndColors.push_back(indexAndColor);
1306		}
1307	}
1308
1309	const auto			primDataBufferFrontSize		= static_cast<VkDeviceSize>(de::dataSize(indicesAndColors));
1310	const auto			primDataBufferFrontInfo		= makeBufferCreateInfo(primDataBufferFrontSize, bufferUsage);
1311	BufferWithMemory	primDataBufferFront			(vkd, device, alloc, primDataBufferFrontInfo, MemoryRequirement::HostVisible);
1312	auto&				primDataBufferFrontAlloc	= primDataBufferFront.getAllocation();
1313	void*				primDataBufferFrontData		= primDataBufferFrontAlloc.getHostPtr();
1314	deMemcpy(primDataBufferFrontData, indicesAndColors.data(), de::dataSize(indicesAndColors));
1315
1316	// Generate reference image based on the previous data.
1317	tcu::TextureLevel		referenceLevel	(colorTcuFormat, iExtent.x(), iExtent.y(), iExtent.z());
1318	tcu::PixelBufferAccess	referenceAccess	= referenceLevel.getAccess();
1319	const tcu::Vec4			bgColor			(bgRed, 0.0f, bgBlue, 1.0f);
1320
1321	tcu::clear(referenceAccess, bgColor);
1322	for (const auto& indexAndColor : indicesAndColors)
1323	{
1324		const int		xCoord	= static_cast<int>(indexAndColor.index % fbExtent.width);
1325		const int		yCoord	= static_cast<int>(indexAndColor.index / fbExtent.width);
1326		const tcu::Vec4	color	(fgRed, 0.0f, indexAndColor.color, 1.0f);
1327
1328		referenceAccess.setPixel(color, xCoord, yCoord);
1329	}
1330
1331	// Background buffers. These will only contain one triangle.
1332	const std::vector<tcu::Vec2> backgroundTriangle
1333	{
1334		tcu::Vec2(-1.0f, -1.0f),
1335		tcu::Vec2(-1.0f,  3.0f),
1336		tcu::Vec2( 3.0f, -1.0f),
1337	};
1338
1339	const PartialUsageCase::IndexAndColor backgroundTriangleData { 0u, bgBlue };
1340
1341	const auto			vertexBufferBackSize	= static_cast<VkDeviceSize>(de::dataSize(backgroundTriangle));
1342	const auto			vertexBufferBackInfo	= makeBufferCreateInfo(vertexBufferBackSize, bufferUsage);
1343	BufferWithMemory	vertexBufferBack		(vkd, device, alloc, vertexBufferBackInfo, MemoryRequirement::HostVisible);
1344	auto&				vertexBufferBackAlloc	= vertexBufferBack.getAllocation();
1345	void*				vertexBufferBackData	= vertexBufferBackAlloc.getHostPtr();
1346	deMemcpy(vertexBufferBackData, backgroundTriangle.data(), de::dataSize(backgroundTriangle));
1347
1348	const auto			primDataBufferBackSize	= static_cast<VkDeviceSize>(sizeof(backgroundTriangleData));
1349	const auto			primDataBufferBackInfo	= makeBufferCreateInfo(primDataBufferBackSize, bufferUsage);
1350	BufferWithMemory	primDataBufferBack		(vkd, device, alloc, primDataBufferBackInfo, MemoryRequirement::HostVisible);
1351	auto&				primDataBufferBackAlloc	= primDataBufferBack.getAllocation();
1352	void*				primDataBufferBackData	= primDataBufferBackAlloc.getHostPtr();
1353	deMemcpy(primDataBufferBackData, &backgroundTriangleData, sizeof(backgroundTriangleData));
1354
1355	// Descriptor pool and descriptor sets.
1356	DescriptorPoolBuilder poolBuilder;
1357	poolBuilder.addType(bufferDescType, 4u);
1358	const auto descriptorPool = poolBuilder.build(vkd, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 2u);
1359
1360	DescriptorSetLayoutBuilder setLayoutBuilder;
1361	setLayoutBuilder.addSingleBinding(bufferDescType, bufferDescStages);
1362	setLayoutBuilder.addSingleBinding(bufferDescType, bufferDescStages);
1363	const auto setLayout = setLayoutBuilder.build(vkd, device);
1364
1365	const auto setFront	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1366	const auto setBack	= makeDescriptorSet(vkd, device, descriptorPool.get(), setLayout.get());
1367
1368	// Update descriptor sets.
1369	DescriptorSetUpdateBuilder updateBuilder;
1370	{
1371		const auto bufferInfo = makeDescriptorBufferInfo(vertexBufferFront.get(), 0ull, vertexBufferFrontSize);
1372		updateBuilder.writeSingle(setFront.get(), DescriptorSetUpdateBuilder::Location::binding(0u), bufferDescType, &bufferInfo);
1373	}
1374	{
1375		const auto bufferInfo = makeDescriptorBufferInfo(primDataBufferFront.get(), 0ull, primDataBufferFrontSize);
1376		updateBuilder.writeSingle(setFront.get(), DescriptorSetUpdateBuilder::Location::binding(1u), bufferDescType, &bufferInfo);
1377	}
1378	{
1379		const auto bufferInfo = makeDescriptorBufferInfo(vertexBufferBack.get(), 0ull, vertexBufferBackSize);
1380		updateBuilder.writeSingle(setBack.get(), DescriptorSetUpdateBuilder::Location::binding(0u), bufferDescType, &bufferInfo);
1381	}
1382	{
1383		const auto bufferInfo = makeDescriptorBufferInfo(primDataBufferBack.get(), 0ull, primDataBufferBackSize);
1384		updateBuilder.writeSingle(setBack.get(), DescriptorSetUpdateBuilder::Location::binding(1u), bufferDescType, &bufferInfo);
1385	}
1386	updateBuilder.update(vkd, device);
1387
1388	// Pipeline layout.
1389	const PipelineLayoutWrapper pipelineLayout (m_constructionType, vkd, device, setLayout.get(), &pcRange);
1390
1391	// Shader modules.
1392	const auto&	binaries	= m_context.getBinaryCollection();
1393	const auto	taskShader	= ShaderWrapper(vkd, device, binaries.get("task"));
1394	const auto	meshShader	= ShaderWrapper(vkd, device, binaries.get("mesh"));
1395	const auto	fragShader	= ShaderWrapper(vkd, device, binaries.get("frag"));
1396
1397	// Render pass.
1398	const auto renderPass = makeRenderPass(vkd, device, colorFormat, dsFormat);
1399
1400	// Color and depth/stencil buffers.
1401	const VkImageCreateInfo imageCreateInfo =
1402	{
1403		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
1404		nullptr,								//	const void*				pNext;
1405		0u,										//	VkImageCreateFlags		flags;
1406		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
1407		VK_FORMAT_UNDEFINED,					//	VkFormat				format;
1408		fbExtent,								//	VkExtent3D				extent;
1409		1u,										//	uint32_t				mipLevels;
1410		1u,										//	uint32_t				arrayLayers;
1411		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
1412		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
1413		0u,										//	VkImageUsageFlags		usage;
1414		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
1415		0u,										//	uint32_t				queueFamilyIndexCount;
1416		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
1417		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
1418	};
1419
1420	std::unique_ptr<ImageWithMemory> colorAttachment;
1421	{
1422		auto colorAttCreateInfo		= imageCreateInfo;
1423		colorAttCreateInfo.format	= colorFormat;
1424		colorAttCreateInfo.usage	= (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1425
1426		colorAttachment.reset(new ImageWithMemory(vkd, device, alloc, colorAttCreateInfo, MemoryRequirement::Any));
1427	}
1428
1429	std::unique_ptr<ImageWithMemory> dsAttachment;
1430	{
1431		auto dsAttCreateInfo	= imageCreateInfo;
1432		dsAttCreateInfo.format	= dsFormat;
1433		dsAttCreateInfo.usage	= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
1434
1435		dsAttachment.reset(new ImageWithMemory(vkd, device, alloc, dsAttCreateInfo, MemoryRequirement::Any));
1436	}
1437
1438	const auto colorSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u);
1439	const auto colorSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u);
1440	const auto dsSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_DEPTH_BIT, 0u, 1u, 0u, 1u);
1441
1442	const auto colorView	= makeImageView(vkd, device, colorAttachment->get(), VK_IMAGE_VIEW_TYPE_2D, colorFormat, colorSRR);
1443	const auto dsView		= makeImageView(vkd, device, dsAttachment->get(), VK_IMAGE_VIEW_TYPE_2D, dsFormat, dsSRR);
1444
1445	// Create verification buffer.
1446	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(tcu::getPixelSize(colorTcuFormat) * iExtent.x() * iExtent.y() * iExtent.z());
1447	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1448	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
1449	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
1450	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
1451
1452	// Framebuffer.
1453	const std::vector<VkImageView>	fbViews		{ colorView.get(), dsView.get() };
1454	const auto						framebuffer	= makeFramebuffer(
1455		vkd, device, renderPass.get(),
1456		static_cast<uint32_t>(fbViews.size()), de::dataOrNull(fbViews),
1457		fbExtent.width, fbExtent.height);
1458
1459	// Viewports and scissors.
1460	const std::vector<VkViewport>	viewports	(1u, makeViewport(fbExtent));
1461	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(fbExtent));
1462
1463	// Pipeline.
1464	const VkStencilOpState						stencilOpState	= {};
1465	const VkPipelineDepthStencilStateCreateInfo	dsInfo			=
1466	{
1467		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,		//	VkStructureType							sType;
1468		nullptr,														//	const void*								pNext;
1469		0u,																//	VkPipelineDepthStencilStateCreateFlags	flags;
1470		VK_TRUE,														//	VkBool32								depthTestEnable;
1471		VK_TRUE,														//	VkBool32								depthWriteEnable;
1472		VK_COMPARE_OP_LESS,												//	VkCompareOp								depthCompareOp;
1473		VK_FALSE,														//	VkBool32								depthBoundsTestEnable;
1474		VK_FALSE,														//	VkBool32								stencilTestEnable;
1475		stencilOpState,													//	VkStencilOpState						front;
1476		stencilOpState,													//	VkStencilOpState						back;
1477		0.0f,															//	float									minDepthBounds;
1478		1.0f,															//	float									maxDepthBounds;
1479	};
1480
1481	GraphicsPipelineWrapper pipelineMaker(vki, vkd, physicalDevice, device, m_context.getDeviceExtensions(), m_constructionType);
1482	makeMeshGraphicsPipeline(pipelineMaker, pipelineLayout,
1483							 taskShader, meshShader, fragShader,
1484							 renderPass.get(), viewports, scissors, 0u, &dsInfo);
1485	const auto pipeline = pipelineMaker.getPipeline();
1486
1487	// Command pool and buffer.
1488	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
1489	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1490	const auto cmdBuffer	= cmdBufferPtr.get();
1491
1492	// Draw the triangles in the front, then the triangle in the back.
1493	const tcu::Vec4	clearColor		(0.0f, 0.0f, 0.0f, 1.0f);
1494	const float		clearDepth		= 1.0f;
1495	const uint32_t	clearStencil	= 0u;
1496
1497	const PartialUsageCase::PushConstants pcFront	= { PartialUsageCase::kTotalPrimitives, 0.0f, fgRed };
1498	const PartialUsageCase::PushConstants pcBack	= { 1u, 0.5f, bgRed };
1499
1500	beginCommandBuffer(vkd, cmdBuffer);
1501	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u), clearColor, clearDepth, clearStencil);
1502	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
1503
1504	// Front triangles.
1505	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &setFront.get(), 0u, nullptr);
1506	vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), pcStages, 0u, static_cast<uint32_t>(pcSize), &pcFront);
1507	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1508
1509	// Back triangles.
1510	vkd.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout.get(), 0u, 1u, &setBack.get(), 0u, nullptr);
1511	vkd.cmdPushConstants(cmdBuffer, pipelineLayout.get(), pcStages, 0u, static_cast<uint32_t>(pcSize), &pcBack);
1512	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1513
1514	endRenderPass(vkd, cmdBuffer);
1515
1516	// Copy color attachment to verification buffer.
1517	const auto colorToTransferBarrier	= makeImageMemoryBarrier(
1518		VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
1519		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1520		colorAttachment->get(), colorSRR);
1521	const auto transferToHostBarrier	= makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1522	const auto copyRegion				= makeBufferImageCopy(fbExtent, colorSRL);
1523
1524	cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &colorToTransferBarrier);
1525	vkd.cmdCopyImageToBuffer(cmdBuffer, colorAttachment->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
1526	cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &transferToHostBarrier);
1527
1528	endCommandBuffer(vkd, cmdBuffer);
1529	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1530
1531	// Verify color attachment.
1532	invalidateAlloc(vkd, device, verificationBufferAlloc);
1533
1534	tcu::ConstPixelBufferAccess	resultAccess	(colorTcuFormat, iExtent, verificationBufferData);
1535	auto&						log				= m_context.getTestContext().getLog();
1536	const tcu::Vec4				errorThreshold	(colorThreshold, 0.0f, colorThreshold, 0.0f);
1537
1538	if (!tcu::floatThresholdCompare(log, "Result", "", referenceAccess, resultAccess, errorThreshold, tcu::COMPARE_LOG_ON_ERROR))
1539		TCU_FAIL("Result does not match reference -- check log for details");
1540
1541	return tcu::TestStatus::pass("Pass");
1542}
1543
1544// Create a classic and a mesh shading pipeline using graphics pipeline libraries. Both pipelines will use the same fragment shader
1545// pipeline library, and the fragment shader will use the gl_Layer built-in, which is per-primitive in mesh shaders and per-vertex
1546// in vertex shaders.
1547class SharedFragLibraryCase : public vkt::TestCase
1548{
1549public:
1550					SharedFragLibraryCase	(tcu::TestContext& testCtx, const std::string& name, PipelineConstructionType constructionType)
1551						: vkt::TestCase			(testCtx, name)
1552						, m_constructionType	(constructionType)
1553						{}
1554	virtual			~SharedFragLibraryCase	(void) {}
1555
1556	void			checkSupport			(Context& context) const override;
1557	void			initPrograms			(vk::SourceCollections& programCollection) const override;
1558	TestInstance*	createInstance			(Context& context) const override;
1559
1560	static std::vector<tcu::Vec4> getLayerColors (void);
1561
1562protected:
1563	PipelineConstructionType m_constructionType;
1564};
1565
1566class SharedFragLibraryInstance : public vkt::TestInstance
1567{
1568public:
1569						SharedFragLibraryInstance	(Context& context, PipelineConstructionType constructionType)
1570							: vkt::TestInstance		(context)
1571							, m_constructionType	(constructionType)
1572							{}
1573	virtual				~SharedFragLibraryInstance	(void) {}
1574
1575	tcu::TestStatus		iterate						(void) override;
1576
1577protected:
1578	PipelineConstructionType m_constructionType;
1579};
1580
1581std::vector<tcu::Vec4> SharedFragLibraryCase::getLayerColors (void)
1582{
1583	std::vector<tcu::Vec4> layerColors
1584	{
1585		tcu::Vec4(0.0f, 0.0f, 0.0f, 1.0f),
1586		tcu::Vec4(0.0f, 0.0f, 1.0f, 1.0f),
1587		tcu::Vec4(1.0f, 1.0f, 0.0f, 1.0f),
1588	};
1589
1590	return layerColors;
1591}
1592
1593void SharedFragLibraryCase::checkSupport (Context& context) const
1594{
1595	checkTaskMeshShaderSupportEXT(context, false/*requireTask*/, true/*requireMesh*/);
1596
1597	if (context.getUsedApiVersion() < VK_API_VERSION_1_2)
1598		context.requireDeviceFunctionality("VK_EXT_shader_viewport_index_layer");
1599	else
1600	{
1601		// More fine-grained: we do not need shaderViewportIndex.
1602		const auto& vk12Features = context.getDeviceVulkan12Features();
1603		if (!vk12Features.shaderOutputLayer)
1604			TCU_THROW(NotSupportedError, "shaderOutputLayer not supported");
1605	}
1606
1607	checkPipelineConstructionRequirements(context.getInstanceInterface(), context.getPhysicalDevice(), m_constructionType);
1608}
1609
1610void SharedFragLibraryCase::initPrograms (vk::SourceCollections &programCollection) const
1611{
1612	const auto meshBuildOptions = getMinMeshEXTBuildOptions(programCollection.usedVulkanVersion);
1613
1614	const std::string vtxPositions =
1615		"vec2 positions[3] = vec2[](\n"
1616		"    vec2(-1.0, -1.0),\n"
1617		"    vec2(-1.0, 3.0),\n"
1618		"    vec2(3.0, -1.0)\n"
1619		");\n"
1620		;
1621
1622	// The vertex shader emits geometry to layer 1.
1623	std::ostringstream vert;
1624	vert
1625		<< "#version 450\n"
1626		<< "#extension GL_ARB_shader_viewport_layer_array : enable\n"
1627		<< "\n"
1628		<< vtxPositions
1629		<< "void main ()\n"
1630		<< "{\n"
1631		<< "    gl_Position = vec4(positions[gl_VertexIndex], 0.0, 1.0);\n"
1632		<< "    gl_Layer = 1;\n"
1633		<< "}\n"
1634		;
1635	programCollection.glslSources.add("vert") << glu::VertexSource(vert.str());
1636	programCollection.glslSources.add("vert_1_2") << glu::VertexSource(vert.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_5, 0u, true);
1637
1638	// The mesh shader emits geometry to layer 2.
1639	std::ostringstream mesh;
1640	mesh
1641		<< "#version 450\n"
1642		<< "#extension GL_EXT_mesh_shader : enable\n"
1643		<< "\n"
1644		<< "layout (local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
1645		<< "layout (triangles) out;\n"
1646		<< "layout (max_vertices=3, max_primitives=1) out;\n"
1647		<< "\n"
1648		<< "perprimitiveEXT out gl_MeshPerPrimitiveEXT {\n"
1649		<< "   int gl_Layer;\n"
1650		<< "} gl_MeshPrimitivesEXT[];\n"
1651		<< "\n"
1652		<< vtxPositions
1653		<< "void main ()\n"
1654		<< "{\n"
1655		<< "    SetMeshOutputsEXT(3u, 1u);\n"
1656		<< "    for (uint i = 0; i < 3; ++i)\n"
1657		<< "        gl_MeshVerticesEXT[i].gl_Position = vec4(positions[i], 0.0, 1.0);\n"
1658		<< "    gl_PrimitiveTriangleIndicesEXT[0] = uvec3(0, 1, 2);\n"
1659		<< "    gl_MeshPrimitivesEXT[0].gl_Layer = 2;\n"
1660		<< "}\n"
1661		;
1662	programCollection.glslSources.add("mesh") << glu::MeshSource(mesh.str()) << meshBuildOptions;
1663
1664	// The frag shader uses the gl_Layer built-in to choose an output color.
1665	const auto outColors = getLayerColors();
1666	DE_ASSERT(outColors.size() == 3);
1667
1668	std::ostringstream frag;
1669	frag
1670		<< "#version 450\n"
1671		<< "\n"
1672		<< "layout (location=0) out vec4 outColor;\n"
1673		<< "\n"
1674		<< "vec4 outColors[3] = vec4[](\n"
1675		<< "	vec4" << outColors.at(0) << ",\n"
1676		<< "	vec4" << outColors.at(1) << ",\n"
1677		<< "	vec4" << outColors.at(2) << "\n"
1678		<< ");\n"
1679		<< "\n"
1680		<< "void main ()\n"
1681		<< "{\n"
1682		<< "	outColor = outColors[gl_Layer];\n"
1683		<< "}\n"
1684		;
1685	programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
1686}
1687
1688TestInstance* SharedFragLibraryCase::createInstance (Context& context) const
1689{
1690	return new SharedFragLibraryInstance(context, m_constructionType);
1691}
1692
1693VkGraphicsPipelineLibraryCreateInfoEXT makeLibCreateInfo (VkGraphicsPipelineLibraryFlagsEXT flags, void* pNext = nullptr)
1694{
1695	const VkGraphicsPipelineLibraryCreateInfoEXT createInfo =
1696	{
1697		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT,	//	VkStructureType						sType;
1698		pNext,															//	void*								pNext;
1699		flags,															//	VkGraphicsPipelineLibraryFlagsEXT	flags;
1700	};
1701
1702	return createInfo;
1703}
1704
1705tcu::TestStatus SharedFragLibraryInstance::iterate (void)
1706{
1707	const auto&			vkd					= m_context.getDeviceInterface();
1708	const auto&			device				= m_context.getDevice();
1709	const auto			queueIndex			= m_context.getUniversalQueueFamilyIndex();
1710	const auto			queue				= m_context.getUniversalQueue();
1711	auto&				alloc				= m_context.getDefaultAllocator();
1712	const auto			layerColors			= SharedFragLibraryCase::getLayerColors();
1713	const auto&			clearColor			= layerColors.front();
1714	const auto			layerCount			= static_cast<uint32_t>(layerColors.size());
1715	const auto			fbExtent			= makeExtent3D(1u, 1u, 1u);
1716	const tcu::IVec3	iExtent				(static_cast<int>(fbExtent.width), static_cast<int>(fbExtent.height), static_cast<int>(layerCount));
1717	const auto			fbFormat			= VK_FORMAT_R8G8B8A8_UNORM;
1718	const auto			tcuFormat			= mapVkFormat(fbFormat);
1719	const auto			pixelSize			= tcu::getPixelSize(tcuFormat);
1720	const auto			pixelCount			= fbExtent.width * fbExtent.height * layerCount;
1721	const auto			fbUsage				= (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
1722	const bool			optimized			= (m_constructionType == PIPELINE_CONSTRUCTION_TYPE_LINK_TIME_OPTIMIZED_LIBRARY);
1723	const auto			libExtraFlags		= (optimized ? VK_PIPELINE_CREATE_RETAIN_LINK_TIME_OPTIMIZATION_INFO_BIT_EXT : 0);
1724	const auto			libCompileFlags		= (VK_PIPELINE_CREATE_LIBRARY_BIT_KHR | libExtraFlags);
1725	const auto			pipelineLinkFlags	= (optimized ? VK_PIPELINE_CREATE_LINK_TIME_OPTIMIZATION_BIT_EXT : 0);
1726
1727	// Color buffer.
1728	const VkImageCreateInfo colorBufferCreateInfo =
1729	{
1730		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,	//	VkStructureType			sType;
1731		nullptr,								//	const void*				pNext;
1732		0u,										//	VkImageCreateFlags		flags;
1733		VK_IMAGE_TYPE_2D,						//	VkImageType				imageType;
1734		fbFormat,								//	VkFormat				format;
1735		fbExtent,								//	VkExtent3D				extent;
1736		1u,										//	uint32_t				mipLevels;
1737		layerCount,								//	uint32_t				arrayLayers;
1738		VK_SAMPLE_COUNT_1_BIT,					//	VkSampleCountFlagBits	samples;
1739		VK_IMAGE_TILING_OPTIMAL,				//	VkImageTiling			tiling;
1740		fbUsage,								//	VkImageUsageFlags		usage;
1741		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode			sharingMode;
1742		0u,										//	uint32_t				queueFamilyIndexCount;
1743		nullptr,								//	const uint32_t*			pQueueFamilyIndices;
1744		VK_IMAGE_LAYOUT_UNDEFINED,				//	VkImageLayout			initialLayout;
1745	};
1746
1747	ImageWithMemory	colorBuffer		(vkd, device, alloc, colorBufferCreateInfo, MemoryRequirement::Any);
1748	const auto		colorBufferSRR	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, layerCount);
1749	const auto		colorBufferSRL	= makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, layerCount);
1750	const auto		colorBufferView	= makeImageView(vkd, device, colorBuffer.get(), VK_IMAGE_VIEW_TYPE_2D_ARRAY, fbFormat, colorBufferSRR);
1751
1752	// Render pass.
1753	const auto renderPass = makeRenderPass(vkd, device, fbFormat);
1754
1755	// Framebuffer.
1756	const auto framebuffer = makeFramebuffer(vkd, device, renderPass.get(), colorBufferView.get(), fbExtent.width, fbExtent.height, layerCount);
1757
1758	// Verification buffer.
1759	const auto			verificationBufferSize	= static_cast<VkDeviceSize>(static_cast<int>(pixelCount) * pixelSize);
1760	const auto			verificationBufferInfo	= makeBufferCreateInfo(verificationBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1761	BufferWithMemory	verificationBuffer		(vkd, device, alloc, verificationBufferInfo, MemoryRequirement::HostVisible);
1762	auto&				verificationBufferAlloc	= verificationBuffer.getAllocation();
1763	void*				verificationBufferData	= verificationBufferAlloc.getHostPtr();
1764
1765	// Pipeline layout (common).
1766	const auto pipelineLayout = makePipelineLayout(vkd, device);
1767
1768	// Shader modules.
1769	const auto&	binaries	= m_context.getBinaryCollection();
1770	const auto	vertModule	= createShaderModule(vkd, device, (m_context.contextSupports(VK_API_VERSION_1_2)) ? binaries.get("vert_1_2") : binaries.get("vert"));
1771	const auto	meshModule	= createShaderModule(vkd, device, binaries.get("mesh"));
1772	const auto	fragModule	= createShaderModule(vkd, device, binaries.get("frag"));
1773
1774	// Fragment output state library (common).
1775	const VkColorComponentFlags					colorComponentFlags			= (	VK_COLOR_COMPONENT_R_BIT
1776																			|	VK_COLOR_COMPONENT_G_BIT
1777																			|	VK_COLOR_COMPONENT_B_BIT
1778																			|	VK_COLOR_COMPONENT_A_BIT);
1779	const VkPipelineColorBlendAttachmentState	colorBlendAttachmentState	=
1780	{
1781		VK_FALSE,				// VkBool32					blendEnable
1782		VK_BLEND_FACTOR_ZERO,	// VkBlendFactor			srcColorBlendFactor
1783		VK_BLEND_FACTOR_ZERO,	// VkBlendFactor			dstColorBlendFactor
1784		VK_BLEND_OP_ADD,		// VkBlendOp				colorBlendOp
1785		VK_BLEND_FACTOR_ZERO,	// VkBlendFactor			srcAlphaBlendFactor
1786		VK_BLEND_FACTOR_ZERO,	// VkBlendFactor			dstAlphaBlendFactor
1787		VK_BLEND_OP_ADD,		// VkBlendOp				alphaBlendOp
1788		colorComponentFlags,	// VkColorComponentFlags	colorWriteMask
1789	};
1790
1791	const VkPipelineColorBlendStateCreateInfo colorBlendState =
1792	{
1793		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//	VkStructureType								sType;
1794		nullptr,													//	const void*									pNext;
1795		0u,															//	VkPipelineColorBlendStateCreateFlags		flags;
1796		VK_FALSE,													//	VkBool32									logicOpEnable;
1797		VK_LOGIC_OP_CLEAR,											//	VkLogicOp									logicOp;
1798		1u,															//	uint32_t									attachmentCount;
1799		&colorBlendAttachmentState,									//	const VkPipelineColorBlendAttachmentState*	pAttachments;
1800		{ 0.0f, 0.0f, 0.0f, 0.0f },									//	float										blendConstants[4];
1801	};
1802
1803	const VkPipelineMultisampleStateCreateInfo multisampleState =
1804	{
1805		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	// VkStructureType								sType
1806		nullptr,													// const void*									pNext
1807		0u,															// VkPipelineMultisampleStateCreateFlags		flags
1808		VK_SAMPLE_COUNT_1_BIT,										// VkSampleCountFlagBits						rasterizationSamples
1809		VK_FALSE,													// VkBool32										sampleShadingEnable
1810		1.0f,														// float										minSampleShading
1811		nullptr,													// const VkSampleMask*							pSampleMask
1812		VK_FALSE,													// VkBool32										alphaToCoverageEnable
1813		VK_FALSE													// VkBool32										alphaToOneEnable
1814	};
1815
1816	const auto fragOutputLibInfo = makeLibCreateInfo(VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT);
1817
1818	VkGraphicsPipelineCreateInfo fragOutputInfo	= initVulkanStructure();
1819	fragOutputInfo.layout						= pipelineLayout.get();
1820	fragOutputInfo.renderPass					= renderPass.get();
1821	fragOutputInfo.pColorBlendState				= &colorBlendState;
1822	fragOutputInfo.pMultisampleState			= &multisampleState;
1823	fragOutputInfo.flags						= libCompileFlags;
1824	fragOutputInfo.pNext						= &fragOutputLibInfo;
1825
1826	const auto fragOutputLib = createGraphicsPipeline(vkd, device, DE_NULL, &fragOutputInfo);
1827
1828	// Fragment shader lib (shared among the classic and mesh pipelines).
1829	const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfo = initVulkanStructure();
1830
1831	const VkPipelineShaderStageCreateInfo fragShaderStageCreateInfo =
1832	{
1833		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType						sType;
1834		nullptr,												//	const void*							pNext;
1835		0u,														//	VkPipelineShaderStageCreateFlags	flags;
1836		VK_SHADER_STAGE_FRAGMENT_BIT,							//	VkShaderStageFlagBits				stage;
1837		fragModule.get(),										//	VkShaderModule						module;
1838		"main",													//	const char*							pName;
1839		nullptr,												//	const VkSpecializationInfo*			pSpecializationInfo;
1840	};
1841
1842	const auto fragShaderLibInfo = makeLibCreateInfo(VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT);
1843
1844	VkGraphicsPipelineCreateInfo fragShaderInfo	= initVulkanStructure();
1845	fragShaderInfo.layout						= pipelineLayout.get();
1846	fragShaderInfo.renderPass					= renderPass.get();
1847	fragShaderInfo.pMultisampleState			= &multisampleState;
1848	fragShaderInfo.pDepthStencilState			= &depthStencilStateCreateInfo;
1849	fragShaderInfo.stageCount					= 1u;
1850	fragShaderInfo.pStages						= &fragShaderStageCreateInfo;
1851	fragShaderInfo.flags						= libCompileFlags;
1852	fragShaderInfo.pNext						= &fragShaderLibInfo;
1853
1854	const auto fragShaderLib = createGraphicsPipeline(vkd, device, DE_NULL, &fragShaderInfo);
1855
1856	// Vertex input state (common, but should be unused by the mesh shading pipeline).
1857	const VkPipelineVertexInputStateCreateInfo	vertexInputStateCreateInfo		= initVulkanStructure();
1858	VkPipelineInputAssemblyStateCreateInfo		inputAssemblyStateCreateInfo	= initVulkanStructure();
1859	inputAssemblyStateCreateInfo.topology										= VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
1860	const auto									vertexInputLibInfo				= makeLibCreateInfo(VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT);
1861
1862	VkGraphicsPipelineCreateInfo vertexInputInfo	= initVulkanStructure();
1863	vertexInputInfo.layout							= pipelineLayout.get();
1864	vertexInputInfo.pVertexInputState				= &vertexInputStateCreateInfo;
1865	vertexInputInfo.pInputAssemblyState				= &inputAssemblyStateCreateInfo;
1866	vertexInputInfo.flags							= libCompileFlags;
1867	vertexInputInfo.pNext							= &vertexInputLibInfo;
1868
1869	const auto vertexInputLib = createGraphicsPipeline(vkd, device, DE_NULL, &vertexInputInfo);
1870
1871	// Pre-rasterization shader state: common pieces.
1872	const std::vector<VkViewport>	viewports	(1u, makeViewport(fbExtent));
1873	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(fbExtent));
1874
1875	const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
1876	{
1877		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,	//	VkStructureType						sType;
1878		nullptr,												//	const void*							pNext;
1879		0u,														//	VkPipelineViewportStateCreateFlags	flags;
1880		static_cast<uint32_t>(viewports.size()),				//	uint32_t							viewportCount;
1881		de::dataOrNull(viewports),								//	const VkViewport*					pViewports;
1882		static_cast<uint32_t>(scissors.size()),					//	uint32_t							scissorCount;
1883		de::dataOrNull(scissors),								//	const VkRect2D*						pScissors;
1884	};
1885
1886	const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo =
1887	{
1888		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,		//	VkStructureType							sType;
1889		nullptr,														//	const void*								pNext;
1890		0u,																//	VkPipelineRasterizationStateCreateFlags	flags;
1891		VK_FALSE,														//	VkBool32								depthClampEnable;
1892		VK_FALSE,														//	VkBool32								rasterizerDiscardEnable;
1893		VK_POLYGON_MODE_FILL,											//	VkPolygonMode							polygonMode;
1894		VK_CULL_MODE_NONE,												//	VkCullModeFlags							cullMode;
1895		VK_FRONT_FACE_COUNTER_CLOCKWISE,								//	VkFrontFace								frontFace;
1896		VK_FALSE,														//	VkBool32								depthBiasEnable;
1897		0.0f,															//	float									depthBiasConstantFactor;
1898		0.0f,															//	float									depthBiasClamp;
1899		0.0f,															//	float									depthBiasSlopeFactor;
1900		1.0f,															//	float									lineWidth;
1901	};
1902
1903	const auto preRastLibInfo = makeLibCreateInfo(VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT);
1904
1905	VkGraphicsPipelineCreateInfo preRastShaderInfo	= initVulkanStructure();
1906	preRastShaderInfo.layout						= pipelineLayout.get();
1907	preRastShaderInfo.pViewportState				= &viewportStateCreateInfo;
1908	preRastShaderInfo.pRasterizationState			= &rasterizationStateCreateInfo;
1909	preRastShaderInfo.renderPass					= renderPass.get();
1910	preRastShaderInfo.flags							= libCompileFlags;
1911	preRastShaderInfo.pNext							= &preRastLibInfo;
1912	preRastShaderInfo.stageCount					= 1u;
1913
1914	// Vertex stage info.
1915	const VkPipelineShaderStageCreateInfo vertShaderStageCreateInfo =
1916	{
1917		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType						sType;
1918		nullptr,												//	const void*							pNext;
1919		0u,														//	VkPipelineShaderStageCreateFlags	flags;
1920		VK_SHADER_STAGE_VERTEX_BIT,								//	VkShaderStageFlagBits				stage;
1921		vertModule.get(),										//	VkShaderModule						module;
1922		"main",													//	const char*							pName;
1923		nullptr,												//	const VkSpecializationInfo*			pSpecializationInfo;
1924	};
1925
1926	// Mesh stage info.
1927	const VkPipelineShaderStageCreateInfo meshShaderStageCreateInfo =
1928	{
1929		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType						sType;
1930		nullptr,												//	const void*							pNext;
1931		0u,														//	VkPipelineShaderStageCreateFlags	flags;
1932		VK_SHADER_STAGE_MESH_BIT_EXT,							//	VkShaderStageFlagBits				stage;
1933		meshModule.get(),										//	VkShaderModule						module;
1934		"main",													//	const char*							pName;
1935		nullptr,												//	const VkSpecializationInfo*			pSpecializationInfo;
1936	};
1937
1938	// Pre-rasterization shader libs.
1939	preRastShaderInfo.pStages = &vertShaderStageCreateInfo;
1940	const auto preRastClassicLib = createGraphicsPipeline(vkd, device, DE_NULL, &preRastShaderInfo);
1941
1942	preRastShaderInfo.pStages = &meshShaderStageCreateInfo;
1943	const auto preRastMeshLib = createGraphicsPipeline(vkd, device, DE_NULL, &preRastShaderInfo);
1944
1945	// Pipelines.
1946	const std::vector<VkPipeline> classicLibs	{ vertexInputLib.get(), preRastClassicLib.get(),	fragShaderLib.get(), fragOutputLib.get() };
1947	const std::vector<VkPipeline> meshLibs		{ vertexInputLib.get(), preRastMeshLib.get(),		fragShaderLib.get(), fragOutputLib.get() };
1948
1949	const VkPipelineLibraryCreateInfoKHR classicLinkInfo =
1950	{
1951		VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR,	//	VkStructureType		sType;
1952		nullptr,											//	const void*			pNext;
1953		static_cast<uint32_t>(classicLibs.size()),			//	uint32_t			libraryCount;
1954		de::dataOrNull(classicLibs),						//	const VkPipeline*	pLibraries;
1955	};
1956
1957	const VkPipelineLibraryCreateInfoKHR meshLinkInfo =
1958	{
1959		VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR,	//	VkStructureType		sType;
1960		nullptr,											//	const void*			pNext;
1961		static_cast<uint32_t>(meshLibs.size()),				//	uint32_t			libraryCount;
1962		de::dataOrNull(meshLibs),							//	const VkPipeline*	pLibraries;
1963	};
1964
1965	VkGraphicsPipelineCreateInfo classicPipelineCreateInfo = initVulkanStructure();
1966	classicPipelineCreateInfo.flags		= pipelineLinkFlags;
1967	classicPipelineCreateInfo.layout	= pipelineLayout.get();
1968	classicPipelineCreateInfo.pNext		= &classicLinkInfo;
1969
1970	VkGraphicsPipelineCreateInfo meshPipelineCreateInfo = initVulkanStructure();
1971	meshPipelineCreateInfo.flags	= pipelineLinkFlags;
1972	meshPipelineCreateInfo.layout	= pipelineLayout.get();
1973	meshPipelineCreateInfo.pNext	= &meshLinkInfo;
1974
1975	const auto classicPipeline	= createGraphicsPipeline(vkd, device, DE_NULL, &classicPipelineCreateInfo);
1976	const auto meshPipeline		= createGraphicsPipeline(vkd, device, DE_NULL, &meshPipelineCreateInfo);
1977
1978	// Record commands with both pipelines.
1979	const auto cmdPool		= makeCommandPool(vkd, device, queueIndex);
1980	const auto cmdBufferPtr	= allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1981	const auto cmdBuffer	= cmdBufferPtr.get();
1982
1983	beginCommandBuffer(vkd, cmdBuffer);
1984
1985	// Draw using both pipelines.
1986	beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0), clearColor);
1987	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, classicPipeline.get());
1988	vkd.cmdDraw(cmdBuffer, 3u, 1u, 0u, 0u);
1989	vkd.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, meshPipeline.get());
1990	vkd.cmdDrawMeshTasksEXT(cmdBuffer, 1u, 1u, 1u);
1991	endRenderPass(vkd, cmdBuffer);
1992
1993	// Copy color buffer to verification buffer.
1994	const auto preTransferBarrier = makeImageMemoryBarrier(
1995		VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
1996		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1997		colorBuffer.get(), colorBufferSRR);
1998
1999	const auto postTransferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
2000
2001	const auto copyRegion = makeBufferImageCopy(fbExtent, colorBufferSRL);
2002
2003	cmdPipelineImageMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, &preTransferBarrier);
2004	vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, verificationBuffer.get(), 1u, &copyRegion);
2005	cmdPipelineMemoryBarrier(vkd, cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, &postTransferBarrier);
2006
2007	endCommandBuffer(vkd, cmdBuffer);
2008	submitCommandsAndWait(vkd, device, queue, cmdBuffer);
2009
2010	// Validate color buffer.
2011	invalidateAlloc(vkd, device, verificationBufferAlloc);
2012
2013	tcu::ConstPixelBufferAccess	resultAccess	(tcuFormat, iExtent, verificationBufferData);
2014	auto&						log				= m_context.getTestContext().getLog();
2015	bool						fail			= false;
2016
2017	for (int z = 0; z < iExtent.z(); ++z)
2018	{
2019		const auto& expectedColor = layerColors.at(z);
2020		for (int y = 0; y < iExtent.y(); ++y)
2021			for (int x = 0; x < iExtent.x(); ++x)
2022			{
2023				const auto resultColor = resultAccess.getPixel(x, y, z);
2024				if (resultColor != expectedColor)
2025				{
2026					std::ostringstream msg;
2027					msg << "Unexpected color at coordinates (x=" << x << ", y=" << y << ", layer=" << z << "): expected " << expectedColor << " but found " << resultColor;
2028					log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
2029					fail = true;
2030				}
2031			}
2032	}
2033
2034	if (fail)
2035		return tcu::TestStatus::fail("Failed; check log for details");
2036	return tcu::TestStatus::pass("Pass");
2037}
2038
2039} // anonymous namespace
2040
2041tcu::TestCaseGroup* createMeshShaderSmokeTestsEXT (tcu::TestContext& testCtx)
2042{
2043	struct
2044	{
2045		PipelineConstructionType	constructionType;
2046		const char*					name;
2047	} constructionTypes[] =
2048	{
2049		{ PIPELINE_CONSTRUCTION_TYPE_MONOLITHIC,					"monolithic"		},
2050		{ PIPELINE_CONSTRUCTION_TYPE_LINK_TIME_OPTIMIZED_LIBRARY,	"optimized_lib"		},
2051		{ PIPELINE_CONSTRUCTION_TYPE_FAST_LINKED_LIBRARY,			"fast_lib"			},
2052	};
2053
2054	GroupPtr smokeTests (new tcu::TestCaseGroup(testCtx, "smoke"));
2055
2056	for (const auto& constructionCase : constructionTypes)
2057	{
2058		GroupPtr constructionGroup(new tcu::TestCaseGroup(testCtx, constructionCase.name));
2059
2060		const auto& cType = constructionCase.constructionType;
2061
2062		constructionGroup->addChild(new MeshOnlyTriangleCase(testCtx, "mesh_shader_triangle", cType));
2063		constructionGroup->addChild(new MeshOnlyTriangleCase(testCtx, "mesh_shader_triangle_rasterization_disabled", cType, true/*rasterizationDisabled*/));
2064		constructionGroup->addChild(new MeshTaskTriangleCase(testCtx, "mesh_task_shader_triangle", cType));
2065		constructionGroup->addChild(new TaskOnlyTriangleCase(testCtx, "task_only_shader_triangle", cType));
2066
2067		for (int i = 0; i < 2; ++i)
2068		{
2069			const bool					compaction	= (i == 0);
2070			const std::string			nameSuffix	= (compaction ? "" : "_without_compaction");
2071			const PartialUsageParams	params		{ cType, compaction };
2072
2073			constructionGroup->addChild(new PartialUsageCase(testCtx, "partial_usage" + nameSuffix, params));
2074		}
2075
2076		addFunctionCaseWithPrograms(constructionGroup.get(), "fullscreen_gradient",			checkMeshSupport, initGradientPrograms, testFullscreenGradient, GradientParams(tcu::nothing<FragmentSize>(), cType));
2077		addFunctionCaseWithPrograms(constructionGroup.get(), "fullscreen_gradient_fs2x2",	checkMeshSupport, initGradientPrograms, testFullscreenGradient, GradientParams(tcu::just(FragmentSize::SIZE_2X2), cType));
2078		addFunctionCaseWithPrograms(constructionGroup.get(), "fullscreen_gradient_fs2x1",	checkMeshSupport, initGradientPrograms, testFullscreenGradient, GradientParams(tcu::just(FragmentSize::SIZE_2X1), cType));
2079
2080		if (cType != PIPELINE_CONSTRUCTION_TYPE_MONOLITHIC)
2081		{
2082			constructionGroup->addChild(new SharedFragLibraryCase(testCtx, "shared_frag_library", cType));
2083		}
2084
2085		smokeTests->addChild(constructionGroup.release());
2086	}
2087
2088	return smokeTests.release();
2089}
2090
2091} // MeshShader
2092} // vkt
2093