1/*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file  vktSparseResourcesImageSparseResidency.cpp
21 * \brief Sparse partially resident images tests
22 *//*--------------------------------------------------------------------*/
23
24#include "vktSparseResourcesBufferSparseBinding.hpp"
25#include "vktSparseResourcesTestsUtil.hpp"
26#include "vktSparseResourcesBase.hpp"
27#include "vktTestCaseUtil.hpp"
28
29#include "vkDefs.hpp"
30#include "vkRef.hpp"
31#include "vkRefUtil.hpp"
32#include "vkPlatform.hpp"
33#include "vkPrograms.hpp"
34#include "vkMemUtil.hpp"
35#include "vkBarrierUtil.hpp"
36#include "vkBuilderUtil.hpp"
37#include "vkImageUtil.hpp"
38#include "vkQueryUtil.hpp"
39#include "vkTypeUtil.hpp"
40#include "vkCmdUtil.hpp"
41#include "vkObjUtil.hpp"
42#include "tcuTestLog.hpp"
43
44#include "deMath.h"
45#include "deUniquePtr.hpp"
46#include "deStringUtil.hpp"
47
48#include "tcuTextureUtil.hpp"
49#include "tcuTexVerifierUtil.hpp"
50
51#include <string>
52#include <vector>
53#include <sstream>
54
55using namespace vk;
56
57namespace vkt
58{
59namespace sparse
60{
61namespace
62{
63
64std::string getFormatValueString	(const std::vector<std::pair<deUint32, deUint32>>& channelsOnPlane,
65									 const std::vector<std::string>& formatValueStrings)
66{
67	std::vector<std::string> usedValues { "0", "0", "0", "0" }; // Default values.
68
69	for (const auto& channel : channelsOnPlane)
70	{
71		const auto channelIdx = channel.first;
72		usedValues[channelIdx] = formatValueStrings[channelIdx];
73	}
74
75	std::string result;
76	for (const auto& value : usedValues)
77	{
78		const auto prefix = (result.empty() ? "" : ", ");
79		result += prefix + value;
80	}
81	result = "(" + result + ")";
82	return result;
83}
84
85const std::string getCoordStr	(const ImageType	imageType,
86								 const std::string&	x,
87								 const std::string&	y,
88								 const std::string&	z)
89{
90	switch (imageType)
91	{
92		case IMAGE_TYPE_1D:
93		case IMAGE_TYPE_BUFFER:
94			return x;
95
96		case IMAGE_TYPE_1D_ARRAY:
97		case IMAGE_TYPE_2D:
98			return "ivec2(" + x + "," + y + ")";
99
100		case IMAGE_TYPE_2D_ARRAY:
101		case IMAGE_TYPE_3D:
102		case IMAGE_TYPE_CUBE:
103		case IMAGE_TYPE_CUBE_ARRAY:
104			return "ivec3(" + x + "," + y + "," + z + ")";
105
106		default:
107			DE_ASSERT(false);
108			return "";
109	}
110}
111
112tcu::UVec3 computeWorkGroupSize (const VkExtent3D& planeExtent)
113{
114	const deUint32		maxComputeWorkGroupInvocations	= 128u;
115	const tcu::UVec3	maxComputeWorkGroupSize			= tcu::UVec3(128u, 128u, 64u);
116
117	const deUint32 xWorkGroupSize = std::min(std::min(planeExtent.width,	maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
118	const deUint32 yWorkGroupSize = std::min(std::min(planeExtent.height,	maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations /  xWorkGroupSize);
119	const deUint32 zWorkGroupSize = std::min(std::min(planeExtent.depth,	maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
120
121	return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
122}
123
124class ImageSparseResidencyCase : public TestCase
125{
126public:
127	ImageSparseResidencyCase		(tcu::TestContext&		testCtx,
128									 const std::string&		name,
129									 const ImageType		imageType,
130									 const tcu::UVec3&		imageSize,
131									 const VkFormat			format,
132									 const glu::GLSLVersion	glslVersion,
133									 const bool				useDeviceGroups);
134
135	void			initPrograms	(SourceCollections&		sourceCollections) const;
136	virtual void	checkSupport	(Context&				context) const;
137	TestInstance*	createInstance	(Context&				context) const;
138
139private:
140	const bool				m_useDeviceGroups;
141	const ImageType			m_imageType;
142	const tcu::UVec3		m_imageSize;
143	const VkFormat			m_format;
144	const glu::GLSLVersion	m_glslVersion;
145};
146
147ImageSparseResidencyCase::ImageSparseResidencyCase	(tcu::TestContext&		testCtx,
148													 const std::string&		name,
149													 const ImageType		imageType,
150													 const tcu::UVec3&		imageSize,
151													 const VkFormat			format,
152													 const glu::GLSLVersion	glslVersion,
153													 const bool				useDeviceGroups)
154	: TestCase			(testCtx, name)
155	, m_useDeviceGroups	(useDeviceGroups)
156	, m_imageType		(imageType)
157	, m_imageSize		(imageSize)
158	, m_format			(format)
159	, m_glslVersion		(glslVersion)
160{
161}
162
163void ImageSparseResidencyCase::initPrograms (SourceCollections&	sourceCollections) const
164{
165	// Create compute program
166	const char* const				versionDecl			= glu::getGLSLVersionDeclaration(m_glslVersion);
167	const PlanarFormatDescription	formatDescription	= getPlanarFormatDescription(m_format);
168	const std::string				imageTypeStr		= getShaderImageType(formatDescription, m_imageType);
169	const std::string				formatDataStr		= getShaderImageDataType(formatDescription);
170	const tcu::UVec3				shaderGridSize		= getShaderGridSize(m_imageType, m_imageSize);
171	const auto						isAlphaOnly			= isAlphaOnlyFormat(m_format);
172
173	std::vector<std::string>		formatValueStrings;
174	switch (formatDescription.channels[isAlphaOnly ? 3 : 0].type)
175	{
176		case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
177		case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
178			formatValueStrings = {
179				"int(gl_GlobalInvocationID.x) % 127",
180				"int(gl_GlobalInvocationID.y) % 127",
181				"int(gl_GlobalInvocationID.z) % 127",
182				"1"
183			};
184			break;
185		case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
186		case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
187		case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
188			// For A8_UNORM, exchange the red and alpha channels.
189			formatValueStrings = {
190				(isAlphaOnly ? "1.0" : "float(int(gl_GlobalInvocationID.x) % 127) / 127.0") ,
191				"float(int(gl_GlobalInvocationID.y) % 127) / 127.0",
192				"float(int(gl_GlobalInvocationID.z) % 127) / 127.0",
193				(isAlphaOnly ? "float(int(gl_GlobalInvocationID.x) % 127) / 127.0" : "1.0"),
194			};
195			break;
196		default:	DE_ASSERT(false);	break;
197	}
198
199	for (deUint32 planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
200	{
201		VkFormat						planeCompatibleFormat		= getPlaneCompatibleFormatForWriting(formatDescription, planeNdx);
202		vk::PlanarFormatDescription		compatibleFormatDescription	= (planeCompatibleFormat != getPlaneCompatibleFormat(formatDescription, planeNdx)) ? getPlanarFormatDescription(planeCompatibleFormat) : formatDescription;
203		VkExtent3D						compatibleShaderGridSize	{ shaderGridSize.x() / formatDescription.blockWidth, shaderGridSize.y() / formatDescription.blockHeight, shaderGridSize.z() / 1u };
204
205		std::vector<std::pair<deUint32, deUint32>> channelsOnPlane;
206		for (deUint32 channelNdx = 0; channelNdx < 4; ++channelNdx)
207		{
208			if (!formatDescription.hasChannelNdx(channelNdx))
209				continue;
210			if (formatDescription.channels[channelNdx].planeNdx != planeNdx)
211				continue;
212			channelsOnPlane.push_back({ channelNdx,formatDescription.channels[channelNdx].offsetBits });
213		}
214		// reorder channels for multi-planar images
215		if(formatDescription.numPlanes>1)
216			std::sort(begin(channelsOnPlane), end(channelsOnPlane), [](const std::pair<deUint32, deUint32>& lhs, const std::pair<deUint32, deUint32>& rhs) { return lhs.second < rhs.second; });
217		std::string			formatValueStr		= getFormatValueString(channelsOnPlane, formatValueStrings);
218		VkExtent3D			shaderExtent		= getPlaneExtent(compatibleFormatDescription, compatibleShaderGridSize, planeNdx, 0);
219		const std::string	formatQualifierStr	= (isAlphaOnly ? "" : ", " + getShaderImageFormatQualifier(planeCompatibleFormat));
220		const tcu::UVec3	workGroupSize		= computeWorkGroupSize(shaderExtent);
221
222		std::ostringstream src;
223		src << versionDecl << "\n";
224		if (formatIsR64(m_format))
225		{
226			src << "#extension GL_EXT_shader_explicit_arithmetic_types_int64 : require\n"
227				<< "#extension GL_EXT_shader_image_int64 : require\n";
228		}
229		if (isAlphaOnly)
230		{
231			src << "#extension GL_EXT_shader_image_load_formatted : require\n";
232		}
233		src << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in; \n"
234			<< "layout (binding = 0" << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
235			<< "void main (void)\n"
236			<< "{\n"
237			<< "	if( gl_GlobalInvocationID.x < " << shaderExtent.width << " ) \n"
238			<< "	if( gl_GlobalInvocationID.y < " << shaderExtent.height << " ) \n"
239			<< "	if( gl_GlobalInvocationID.z < " << shaderExtent.depth << " ) \n"
240			<< "	{\n"
241			<< "		imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
242			<< formatDataStr << formatValueStr << ");\n"
243			<< "	}\n"
244			<< "}\n";
245		std::ostringstream shaderName;
246		shaderName << "comp" << planeNdx;
247		sourceCollections.glslSources.add(shaderName.str()) << glu::ComputeSource(src.str())
248			<< vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_3, vk::ShaderBuildOptions::FLAG_ALLOW_SCALAR_OFFSETS);
249	}
250}
251
252void ImageSparseResidencyCase::checkSupport(Context& context) const
253{
254	const InstanceInterface&	instance = context.getInstanceInterface();
255	const VkPhysicalDevice		physicalDevice = context.getPhysicalDevice();
256
257#ifndef CTS_USES_VULKANSC
258	if (m_format == VK_FORMAT_A8_UNORM_KHR)
259	{
260		context.requireDeviceFunctionality("VK_KHR_maintenance5");
261		const auto properties = context.getFormatProperties(m_format);
262		if ((properties.optimalTilingFeatures & VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT_KHR) == 0u)
263			TCU_THROW(NotSupportedError, "Format does not support writes without format");
264	}
265#endif // CTS_USES_VULKANSC
266
267	// Check if image size does not exceed device limits
268	if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
269		TCU_THROW(NotSupportedError, "Image size not supported for device");
270
271	// Check if device supports sparse operations for image type
272	if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
273		TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
274
275	 //Check if image format supports storage images
276	const VkFormatProperties	formatProperties = getPhysicalDeviceFormatProperties(instance, physicalDevice, m_format);
277	if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT) == 0)
278		TCU_THROW(NotSupportedError, "Storage images are not supported for this format");
279
280	if (formatIsR64(m_format))
281	{
282		context.requireDeviceFunctionality("VK_EXT_shader_image_atomic_int64");
283
284		if (context.getShaderImageAtomicInt64FeaturesEXT().shaderImageInt64Atomics == VK_FALSE)
285		{
286			TCU_THROW(NotSupportedError, "shaderImageInt64Atomics is not supported");
287		}
288
289		if (context.getShaderImageAtomicInt64FeaturesEXT().sparseImageInt64Atomics == VK_FALSE)
290		{
291			TCU_THROW(NotSupportedError, "sparseImageInt64Atomics is not supported for device");
292		}
293	}
294}
295
296class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
297{
298public:
299	ImageSparseResidencyInstance	(Context&			context,
300									 const ImageType	imageType,
301									 const tcu::UVec3&	imageSize,
302									 const VkFormat		format,
303									 const bool			useDeviceGroups);
304
305
306	tcu::TestStatus	iterate			(void);
307
308private:
309	const bool			m_useDeviceGroups;
310	const ImageType		m_imageType;
311	const tcu::UVec3	m_imageSize;
312	const VkFormat		m_format;
313};
314
315ImageSparseResidencyInstance::ImageSparseResidencyInstance	(Context&			context,
316															 const ImageType	imageType,
317															 const tcu::UVec3&	imageSize,
318															 const VkFormat		format,
319															 const bool			useDeviceGroups)
320	: SparseResourcesBaseInstance	(context, useDeviceGroups)
321	, m_useDeviceGroups				(useDeviceGroups)
322	, m_imageType					(imageType)
323	, m_imageSize					(imageSize)
324	, m_format						(format)
325{
326}
327
328tcu::TestStatus ImageSparseResidencyInstance::iterate (void)
329{
330	const auto					isAlphaOnly			= isAlphaOnlyFormat(m_format);
331	const float					epsilon				= 1e-5f;
332	const InstanceInterface&	instance			= m_context.getInstanceInterface();
333
334	{
335		// Create logical device supporting both sparse and compute queues
336		QueueRequirementsVec queueRequirements;
337		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
338		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
339
340		createDeviceSupportingQueues(queueRequirements, formatIsR64(m_format), isAlphaOnly);
341	}
342
343	VkImageCreateInfo			imageCreateInfo;
344	std::vector<DeviceMemorySp>	deviceMemUniquePtrVec;
345
346	const DeviceInterface&			deviceInterface		= getDeviceInterface();
347	const Queue&					sparseQueue			= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
348	const Queue&					computeQueue		= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
349	const PlanarFormatDescription	formatDescription	= getPlanarFormatDescription(m_format);
350
351	// Go through all physical devices
352	for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
353	{
354		const deUint32						firstDeviceID				= physDevID;
355		const deUint32						secondDeviceID				= (firstDeviceID + 1) % m_numPhysicalDevices;
356
357		const VkPhysicalDevice				physicalDevice				= getPhysicalDevice(firstDeviceID);
358		const VkPhysicalDeviceProperties	physicalDeviceProperties	= getPhysicalDeviceProperties(instance, physicalDevice);
359
360		imageCreateInfo.sType					= VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
361		imageCreateInfo.pNext					= DE_NULL;
362		imageCreateInfo.flags					= VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
363		imageCreateInfo.imageType				= mapImageType(m_imageType);
364		imageCreateInfo.format					= m_format;
365		imageCreateInfo.extent					= makeExtent3D(getLayerSize(m_imageType, m_imageSize));
366		imageCreateInfo.mipLevels				= 1u;
367		imageCreateInfo.arrayLayers				= getNumLayers(m_imageType, m_imageSize);
368		imageCreateInfo.samples					= VK_SAMPLE_COUNT_1_BIT;
369		imageCreateInfo.tiling					= VK_IMAGE_TILING_OPTIMAL;
370		imageCreateInfo.initialLayout			= VK_IMAGE_LAYOUT_UNDEFINED;
371		imageCreateInfo.usage					= VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
372												  VK_IMAGE_USAGE_STORAGE_BIT;
373		imageCreateInfo.sharingMode				= VK_SHARING_MODE_EXCLUSIVE;
374		imageCreateInfo.queueFamilyIndexCount	= 0u;
375		imageCreateInfo.pQueueFamilyIndices		= DE_NULL;
376
377		if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
378		{
379			imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
380		}
381
382		// check if we need to create VkImageView with different VkFormat than VkImage format
383		VkFormat planeCompatibleFormat0 = getPlaneCompatibleFormatForWriting(formatDescription, 0);
384		if (planeCompatibleFormat0 != getPlaneCompatibleFormat(formatDescription, 0))
385		{
386			imageCreateInfo.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
387		}
388
389		// Check if device supports sparse operations for image format
390		if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageCreateInfo))
391			TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
392
393		// Create sparse image
394		const Unique<VkImage> imageSparse(createImage(deviceInterface, getDevice(), &imageCreateInfo));
395
396		// Create sparse image memory bind semaphore
397		const Unique<VkSemaphore> imageMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
398
399		std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements;
400
401		{
402			// Get image general memory requirements
403			const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, getDevice(), *imageSparse);
404
405			if (imageMemoryRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
406				TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
407
408			DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
409
410			const deUint32						 memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID), imageMemoryRequirements, MemoryRequirement::Any);
411
412			if (memoryType == NO_MATCH_FOUND)
413				return tcu::TestStatus::fail("No matching memory type found");
414
415			if (firstDeviceID != secondDeviceID)
416			{
417				VkPeerMemoryFeatureFlags	peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
418				const deUint32				heapIndex = getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
419				deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
420
421				if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT)    == 0) ||
422					((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
423				{
424					TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
425				}
426			}
427
428			// Get sparse image sparse memory requirements
429			sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *imageSparse);
430			DE_ASSERT(sparseMemoryRequirements.size() != 0);
431
432			const deUint32 metadataAspectIndex = getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_METADATA_BIT);
433
434			std::vector<VkSparseImageMemoryBind>	imageResidencyMemoryBinds;
435			std::vector<VkSparseMemoryBind>			imageMipTailMemoryBinds;
436
437			// Bind device memory for each aspect
438			for (deUint32 planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
439			{
440				const VkImageAspectFlags		aspect				= (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
441				const deUint32					aspectIndex			= getSparseAspectRequirementsIndex(sparseMemoryRequirements, aspect);
442
443				if (aspectIndex == NO_MATCH_FOUND)
444					TCU_THROW(NotSupportedError, "Not supported image aspect");
445
446				VkSparseImageMemoryRequirements	aspectRequirements	= sparseMemoryRequirements[aspectIndex];
447				VkExtent3D						imageGranularity	= aspectRequirements.formatProperties.imageGranularity;
448
449				for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
450				{
451					for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
452					{
453						const VkImageSubresource subresource		= { aspect, mipLevelNdx, layerNdx };
454						const VkExtent3D		 planeExtent		= getPlaneExtent(formatDescription, imageCreateInfo.extent, planeNdx, mipLevelNdx);
455						const tcu::UVec3		 numSparseBinds		= alignedDivide(planeExtent, imageGranularity);
456						const tcu::UVec3		 lastBlockExtent	= tcu::UVec3(planeExtent.width  % imageGranularity.width  ? planeExtent.width  % imageGranularity.width  : imageGranularity.width,
457																				 planeExtent.height % imageGranularity.height ? planeExtent.height % imageGranularity.height : imageGranularity.height,
458																				 planeExtent.depth  % imageGranularity.depth  ? planeExtent.depth  % imageGranularity.depth  : imageGranularity.depth);
459
460						for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
461						for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
462						for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
463						{
464							const deUint32 linearIndex = x + y * numSparseBinds.x() + z * numSparseBinds.x() * numSparseBinds.y() + layerNdx * numSparseBinds.x() * numSparseBinds.y() * numSparseBinds.z();
465
466							if (linearIndex % 2u == 0u)
467							{
468								VkOffset3D offset;
469								offset.x		= x * imageGranularity.width;
470								offset.y		= y * imageGranularity.height;
471								offset.z		= z * imageGranularity.depth;
472
473								VkExtent3D extent;
474								extent.width	= (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
475								extent.height	= (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
476								extent.depth	= (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
477
478								const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, getDevice(),
479									imageMemoryRequirements.alignment, memoryType, subresource, offset, extent);
480
481								deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
482
483								imageResidencyMemoryBinds.push_back(imageMemoryBind);
484							}
485						}
486					}
487
488					if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
489					{
490						const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
491							aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
492
493						deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
494
495						imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
496					}
497
498					// Metadata
499					if (metadataAspectIndex != NO_MATCH_FOUND)
500					{
501						const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
502
503						if (!(metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
504						{
505							const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
506								metadataAspectRequirements.imageMipTailSize, memoryType,
507								metadataAspectRequirements.imageMipTailOffset + layerNdx * metadataAspectRequirements.imageMipTailStride,
508								VK_SPARSE_MEMORY_BIND_METADATA_BIT);
509
510							deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
511
512							imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
513						}
514					}
515				}
516
517				if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
518				{
519					const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
520						aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
521
522					deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
523
524					imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
525				}
526			}
527
528			// Metadata
529			if (metadataAspectIndex != NO_MATCH_FOUND)
530			{
531				const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
532
533				if ((metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
534				{
535					const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
536						metadataAspectRequirements.imageMipTailSize, memoryType, metadataAspectRequirements.imageMipTailOffset,
537						VK_SPARSE_MEMORY_BIND_METADATA_BIT);
538
539					deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
540
541					imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
542				}
543			}
544
545			const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
546			{
547				VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO,		//VkStructureType							sType;
548				DE_NULL,												//const void*								pNext;
549				firstDeviceID,											//deUint32									resourceDeviceIndex;
550				secondDeviceID,											//deUint32									memoryDeviceIndex;
551			};
552
553			VkBindSparseInfo bindSparseInfo =
554			{
555				VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,						//VkStructureType							sType;
556				m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,	//const void*								pNext;
557				0u,														//deUint32									waitSemaphoreCount;
558				DE_NULL,												//const VkSemaphore*						pWaitSemaphores;
559				0u,														//deUint32									bufferBindCount;
560				DE_NULL,												//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
561				0u,														//deUint32									imageOpaqueBindCount;
562				DE_NULL,												//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
563				0u,														//deUint32									imageBindCount;
564				DE_NULL,												//const VkSparseImageMemoryBindInfo*		pImageBinds;
565				1u,														//deUint32									signalSemaphoreCount;
566				&imageMemoryBindSemaphore.get()							//const VkSemaphore*						pSignalSemaphores;
567			};
568
569			VkSparseImageMemoryBindInfo			imageResidencyBindInfo;
570			VkSparseImageOpaqueMemoryBindInfo	imageMipTailBindInfo;
571
572			if (imageResidencyMemoryBinds.size() > 0)
573			{
574				imageResidencyBindInfo.image		= *imageSparse;
575				imageResidencyBindInfo.bindCount	= static_cast<deUint32>(imageResidencyMemoryBinds.size());
576				imageResidencyBindInfo.pBinds		= imageResidencyMemoryBinds.data();
577
578				bindSparseInfo.imageBindCount		= 1u;
579				bindSparseInfo.pImageBinds			= &imageResidencyBindInfo;
580			}
581
582			if (imageMipTailMemoryBinds.size() > 0)
583			{
584				imageMipTailBindInfo.image			= *imageSparse;
585				imageMipTailBindInfo.bindCount		= static_cast<deUint32>(imageMipTailMemoryBinds.size());
586				imageMipTailBindInfo.pBinds			= imageMipTailMemoryBinds.data();
587
588				bindSparseInfo.imageOpaqueBindCount = 1u;
589				bindSparseInfo.pImageOpaqueBinds	= &imageMipTailBindInfo;
590			}
591
592			// Submit sparse bind commands for execution
593			VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
594		}
595
596		// Create command buffer for compute and transfer operations
597		const Unique<VkCommandPool>		commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
598		const Unique<VkCommandBuffer>	commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
599
600		// Start recording commands
601		beginCommandBuffer(deviceInterface, *commandBuffer);
602
603		// Create descriptor set layout
604		const Unique<VkDescriptorSetLayout> descriptorSetLayout(
605			DescriptorSetLayoutBuilder()
606			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
607			.build(deviceInterface, getDevice()));
608
609		// Create and bind descriptor set
610		const Unique<VkDescriptorPool> descriptorPool(
611			DescriptorPoolBuilder()
612			.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
613			.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, vk::PlanarFormatDescription::MAX_PLANES));
614
615		const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
616		std::vector<de::SharedPtr<vk::Unique<vk::VkShaderModule>>>	shaderModules;
617		std::vector<de::SharedPtr<vk::Unique<vk::VkPipeline>>>		computePipelines;
618		std::vector<de::SharedPtr<vk::Unique<vk::VkDescriptorSet>>>	descriptorSets;
619		std::vector<de::SharedPtr<vk::Unique<vk::VkImageView>>>		imageViews;
620
621		const tcu::UVec3 shaderGridSize = getShaderGridSize(m_imageType, m_imageSize);
622
623		// Run compute shader for each image plane
624		for (deUint32 planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
625		{
626			const VkImageAspectFlags		aspect						= (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
627			const VkImageSubresourceRange	subresourceRange			= makeImageSubresourceRange(aspect, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
628			VkFormat						planeCompatibleFormat		= getPlaneCompatibleFormatForWriting(formatDescription, planeNdx);
629			vk::PlanarFormatDescription		compatibleFormatDescription	= (planeCompatibleFormat != getPlaneCompatibleFormat(formatDescription, planeNdx)) ? getPlanarFormatDescription(planeCompatibleFormat) : formatDescription;
630			const tcu::UVec3				compatibleShaderGridSize	( shaderGridSize.x() / formatDescription.blockWidth, shaderGridSize.y() / formatDescription.blockHeight, shaderGridSize.z() / 1u);
631			VkExtent3D						shaderExtent				= getPlaneExtent(compatibleFormatDescription, VkExtent3D{ compatibleShaderGridSize.x(), compatibleShaderGridSize.y(), compatibleShaderGridSize.z() }, planeNdx, 0u);
632
633			// Create and bind compute pipeline
634			std::ostringstream shaderName;
635			shaderName << "comp" << planeNdx;
636			auto shaderModule		= makeVkSharedPtr(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get(shaderName.str()), DE_NULL));
637			shaderModules.push_back(shaderModule);
638			auto computePipeline	= makeVkSharedPtr(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, shaderModule->get()));
639			computePipelines.push_back(computePipeline);
640			deviceInterface.cmdBindPipeline	(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline->get());
641
642			auto descriptorSet		= makeVkSharedPtr(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
643			descriptorSets.push_back(descriptorSet);
644
645			auto imageView			= makeVkSharedPtr(makeImageView(deviceInterface, getDevice(), *imageSparse, mapImageViewType(m_imageType), planeCompatibleFormat, subresourceRange));
646			imageViews.push_back(imageView);
647			const VkDescriptorImageInfo		imageSparseInfo			= makeDescriptorImageInfo(DE_NULL, imageView->get(), VK_IMAGE_LAYOUT_GENERAL);
648
649			DescriptorSetUpdateBuilder()
650				.writeSingle(descriptorSet->get(), DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &imageSparseInfo)
651				.update(deviceInterface, getDevice());
652
653			deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet->get(), 0u, DE_NULL);
654
655			{
656				const VkImageMemoryBarrier imageSparseLayoutChangeBarrier = makeImageMemoryBarrier
657				(
658					0u,
659					VK_ACCESS_SHADER_WRITE_BIT,
660					VK_IMAGE_LAYOUT_UNDEFINED,
661					VK_IMAGE_LAYOUT_GENERAL,
662					*imageSparse,
663					subresourceRange,
664					sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
665					sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED
666					);
667
668				deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseLayoutChangeBarrier);
669			}
670
671			{
672				const tcu::UVec3 workGroupSize = computeWorkGroupSize(shaderExtent);
673
674				const deUint32 xWorkGroupCount = shaderExtent.width  / workGroupSize.x() + (shaderExtent.width  % workGroupSize.x() ? 1u : 0u);
675				const deUint32 yWorkGroupCount = shaderExtent.height / workGroupSize.y() + (shaderExtent.height % workGroupSize.y() ? 1u : 0u);
676				const deUint32 zWorkGroupCount = shaderExtent.depth  / workGroupSize.z() + (shaderExtent.depth  % workGroupSize.z() ? 1u : 0u);
677
678				const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
679
680				if (maxComputeWorkGroupCount.x() < xWorkGroupCount ||
681					maxComputeWorkGroupCount.y() < yWorkGroupCount ||
682					maxComputeWorkGroupCount.z() < zWorkGroupCount)
683				{
684					TCU_THROW(NotSupportedError, "Image size is not supported");
685				}
686
687				deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
688			}
689
690			{
691				const VkImageMemoryBarrier imageSparseTransferBarrier = makeImageMemoryBarrier
692				(
693					VK_ACCESS_SHADER_WRITE_BIT,
694					VK_ACCESS_TRANSFER_READ_BIT,
695					VK_IMAGE_LAYOUT_GENERAL,
696					VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
697					*imageSparse,
698					subresourceRange
699				);
700
701				deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &imageSparseTransferBarrier);
702			}
703		}
704
705		deUint32	imageSizeInBytes = 0;
706		deUint32	planeOffsets[PlanarFormatDescription::MAX_PLANES];
707		deUint32	planeRowPitches[PlanarFormatDescription::MAX_PLANES];
708
709		for (deUint32 planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
710		{
711			planeOffsets[planeNdx]		= imageSizeInBytes;
712			const deUint32	planeW		= imageCreateInfo.extent.width / (formatDescription.blockWidth * formatDescription.planes[planeNdx].widthDivisor);
713			planeRowPitches[planeNdx]	= formatDescription.planes[planeNdx].elementSizeBytes * planeW;
714			imageSizeInBytes			+= getImageMipLevelSizeInBytes(imageCreateInfo.extent, imageCreateInfo.arrayLayers, formatDescription, planeNdx, 0, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY);
715		}
716
717		const VkBufferCreateInfo		outputBufferCreateInfo	= makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
718		const Unique<VkBuffer>			outputBuffer			(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
719		const de::UniquePtr<Allocation>	outputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
720		std::vector<VkBufferImageCopy>	bufferImageCopy			(formatDescription.numPlanes);
721
722		for (deUint32 planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
723		{
724			const VkImageAspectFlags aspect = (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
725
726			bufferImageCopy[planeNdx] =
727			{
728				planeOffsets[planeNdx],														//	VkDeviceSize				bufferOffset;
729				0u,																			//	deUint32					bufferRowLength;
730				0u,																			//	deUint32					bufferImageHeight;
731				makeImageSubresourceLayers(aspect, 0u, 0u, imageCreateInfo.arrayLayers),	//	VkImageSubresourceLayers	imageSubresource;
732				makeOffset3D(0, 0, 0),														//	VkOffset3D					imageOffset;
733				vk::getPlaneExtent(formatDescription, imageCreateInfo.extent, planeNdx, 0)	//	VkExtent3D					imageExtent;
734			};
735		}
736		deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *imageSparse, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, static_cast<deUint32>(bufferImageCopy.size()), bufferImageCopy.data());
737
738		{
739			const VkBufferMemoryBarrier outputBufferHostReadBarrier = makeBufferMemoryBarrier
740			(
741				VK_ACCESS_TRANSFER_WRITE_BIT,
742				VK_ACCESS_HOST_READ_BIT,
743				*outputBuffer,
744				0u,
745				imageSizeInBytes
746			);
747
748			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostReadBarrier, 0u, DE_NULL);
749		}
750
751		// End recording commands
752		endCommandBuffer(deviceInterface, *commandBuffer);
753
754		// The stage at which execution is going to wait for finish of sparse binding operations
755		const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
756
757		// Submit commands for execution and wait for completion
758		submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &imageMemoryBindSemaphore.get(), stageBits,
759			0, DE_NULL, m_useDeviceGroups, firstDeviceID);
760
761		// Retrieve data from buffer to host memory
762		invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
763		deUint8*	outputData	= static_cast<deUint8*>(outputBufferAlloc->getHostPtr());
764		void*		planePointers[PlanarFormatDescription::MAX_PLANES];
765
766		for (deUint32 planeNdx = 0; planeNdx < formatDescription.numPlanes; ++planeNdx)
767			planePointers[planeNdx] = outputData + static_cast<size_t>(planeOffsets[planeNdx]);
768
769		// Wait for sparse queue to become idle
770		//vsk fails:
771		deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
772
773		// write result images to log file
774		for (deUint32 channelNdx = 0; channelNdx < 4; ++channelNdx)
775		{
776			if (!formatDescription.hasChannelNdx(channelNdx))
777				continue;
778			deUint32					planeNdx					= formatDescription.channels[channelNdx].planeNdx;
779			vk::VkFormat				planeCompatibleFormat		= getPlaneCompatibleFormatForWriting(formatDescription, planeNdx);
780			vk::PlanarFormatDescription	compatibleFormatDescription	= (planeCompatibleFormat != getPlaneCompatibleFormat(formatDescription, planeNdx)) ? getPlanarFormatDescription(planeCompatibleFormat) : formatDescription;
781			const tcu::UVec3			compatibleShaderGridSize	(shaderGridSize.x() / formatDescription.blockWidth, shaderGridSize.y() / formatDescription.blockHeight, shaderGridSize.z() / 1u);
782			tcu::ConstPixelBufferAccess	pixelBuffer					= vk::getChannelAccess(compatibleFormatDescription, compatibleShaderGridSize, planeRowPitches, (const void* const*)planePointers, channelNdx);
783			std::ostringstream str;
784			str << "image" << channelNdx;
785			m_context.getTestContext().getLog() << tcu::LogImage(str.str(), str.str(), pixelBuffer);
786		}
787
788		// Validate results
789		for (deUint32 channelNdx = 0; channelNdx < 4; ++channelNdx)
790		{
791			if (!formatDescription.hasChannelNdx(channelNdx))
792				continue;
793
794			deUint32						planeNdx					= formatDescription.channels[channelNdx].planeNdx;
795			const VkImageAspectFlags		aspect						= (formatDescription.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
796			const deUint32					aspectIndex					= getSparseAspectRequirementsIndex(sparseMemoryRequirements, aspect);
797
798			if (aspectIndex == NO_MATCH_FOUND)
799				TCU_THROW(NotSupportedError, "Not supported image aspect");
800
801			VkSparseImageMemoryRequirements	aspectRequirements			= sparseMemoryRequirements[aspectIndex];
802
803			vk::VkFormat					planeCompatibleFormat		= getPlaneCompatibleFormatForWriting(formatDescription, planeNdx);
804			vk::PlanarFormatDescription		compatibleFormatDescription	= (planeCompatibleFormat != getPlaneCompatibleFormat(formatDescription, planeNdx)) ? getPlanarFormatDescription(planeCompatibleFormat) : formatDescription;
805			const tcu::UVec3				compatibleShaderGridSize	( shaderGridSize.x() / formatDescription.blockWidth, shaderGridSize.y() / formatDescription.blockHeight, shaderGridSize.z() / 1u );
806			VkExtent3D						compatibleImageSize			{ imageCreateInfo.extent.width / formatDescription.blockWidth, imageCreateInfo.extent.height / formatDescription.blockHeight, imageCreateInfo.extent.depth / 1u };
807			VkExtent3D						compatibleImageGranularity	{ aspectRequirements.formatProperties.imageGranularity.width / formatDescription.blockWidth,
808																		  aspectRequirements.formatProperties.imageGranularity.height / formatDescription.blockHeight,
809																		  aspectRequirements.formatProperties.imageGranularity.depth / 1u };
810			tcu::ConstPixelBufferAccess		pixelBuffer					= vk::getChannelAccess(compatibleFormatDescription, compatibleShaderGridSize, planeRowPitches, (const void* const*)planePointers, channelNdx);
811			VkExtent3D						planeExtent					= getPlaneExtent(compatibleFormatDescription, compatibleImageSize, planeNdx, 0u);
812			tcu::IVec3						pixelDivider				= pixelBuffer.getDivider();
813
814			if( aspectRequirements.imageMipTailFirstLod > 0u )
815			{
816				const tcu::UVec3					numSparseBinds	= alignedDivide(planeExtent, compatibleImageGranularity);
817				const tcu::UVec3					lastBlockExtent	= tcu::UVec3(planeExtent.width  % compatibleImageGranularity.width  ? planeExtent.width  % compatibleImageGranularity.width  : compatibleImageGranularity.width,
818																				 planeExtent.height % compatibleImageGranularity.height ? planeExtent.height % compatibleImageGranularity.height : compatibleImageGranularity.height,
819																				 planeExtent.depth  % compatibleImageGranularity.depth  ? planeExtent.depth  % compatibleImageGranularity.depth  : compatibleImageGranularity.depth);
820
821				for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
822				{
823					for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
824					for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
825					for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
826					{
827						VkExtent3D offset;
828						offset.width	= x * compatibleImageGranularity.width;
829						offset.height	= y * compatibleImageGranularity.height;
830						offset.depth	= z * compatibleImageGranularity.depth + layerNdx * numSparseBinds.z()*compatibleImageGranularity.depth;
831
832						VkExtent3D extent;
833						extent.width	= (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : compatibleImageGranularity.width;
834						extent.height	= (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : compatibleImageGranularity.height;
835						extent.depth	= (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : compatibleImageGranularity.depth;
836
837						const deUint32 linearIndex = x + y * numSparseBinds.x() + z * numSparseBinds.x() * numSparseBinds.y() + layerNdx * numSparseBinds.x() * numSparseBinds.y() * numSparseBinds.z();
838
839						if (linearIndex % 2u == 0u)
840						{
841							for (deUint32 offsetZ = offset.depth; offsetZ < offset.depth + extent.depth; ++offsetZ)
842							for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
843							for (deUint32 offsetX = offset.width; offsetX < offset.width + extent.width; ++offsetX)
844							{
845								deUint32	iReferenceValue;
846								float		fReferenceValue;
847
848								switch (channelNdx)
849								{
850									case 0:
851										iReferenceValue = offsetX % 127u;
852										fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
853										break;
854									case 1:
855										iReferenceValue = offsetY % 127u;
856										fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
857										break;
858									case 2:
859										iReferenceValue = offsetZ % 127u;
860										fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
861										break;
862									case 3:
863										// For A8_UNORM we use the same values as the normal red channel, as per the shader.
864										iReferenceValue = (isAlphaOnly ? offsetX % 127u : 1u);
865										fReferenceValue = (isAlphaOnly ? static_cast<float>(iReferenceValue) / 127.f : 1.f);
866										break;
867									default:	DE_FATAL("Unexpected channel index");	break;
868								}
869
870								float acceptableError = epsilon;
871
872								switch (formatDescription.channels[channelNdx].type)
873								{
874									case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
875									case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
876									{
877										const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
878
879										if (outputValue.x() != iReferenceValue)
880											return tcu::TestStatus::fail("Failed");
881
882										break;
883									}
884									case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
885									case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
886									{
887										float fixedPointError = tcu::TexVerifierUtil::computeFixedPointError(formatDescription.channels[channelNdx].sizeBits);
888										acceptableError += fixedPointError;
889										const tcu::Vec4 outputValue = pixelBuffer.getPixel(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
890
891										if (deAbs(outputValue.x() - fReferenceValue) > acceptableError)
892											return tcu::TestStatus::fail("Failed");
893
894										break;
895									}
896									case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
897									{
898										const tcu::Vec4 outputValue = pixelBuffer.getPixel(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
899
900										if (deAbs( outputValue.x() - fReferenceValue) > acceptableError)
901											return tcu::TestStatus::fail("Failed");
902
903										break;
904									}
905									default:	DE_FATAL("Unexpected channel type");	break;
906								}
907							}
908						}
909						else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
910						{
911							for (deUint32 offsetZ = offset.depth; offsetZ < offset.depth + extent.depth; ++offsetZ)
912							for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
913							for (deUint32 offsetX = offset.width; offsetX < offset.width + extent.width; ++offsetX)
914							{
915								float acceptableError = epsilon;
916
917								switch (formatDescription.channels[channelNdx].type)
918								{
919									case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
920									case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
921									{
922										const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
923
924										if (outputValue.x() != 0u)
925											return tcu::TestStatus::fail("Failed");
926
927										break;
928									}
929									case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
930									case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
931									{
932										float fixedPointError = tcu::TexVerifierUtil::computeFixedPointError(formatDescription.channels[channelNdx].sizeBits);
933										acceptableError += fixedPointError;
934										const tcu::Vec4 outputValue = pixelBuffer.getPixel(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
935
936										if (deAbs(outputValue.x()) > acceptableError)
937											return tcu::TestStatus::fail("Failed");
938
939										break;
940									}
941									case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
942									{
943										const tcu::Vec4 outputValue = pixelBuffer.getPixel(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
944
945										if (deAbs(outputValue.x()) > acceptableError)
946											return tcu::TestStatus::fail("Failed");
947
948										break;
949									}
950									default:	DE_FATAL("Unexpected channel type");	break;
951								}
952							}
953						}
954					}
955				}
956			}
957			else
958			{
959				for (deUint32 offsetZ = 0u; offsetZ < planeExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
960				for (deUint32 offsetY = 0u; offsetY < planeExtent.height; ++offsetY)
961				for (deUint32 offsetX = 0u; offsetX < planeExtent.width; ++offsetX)
962				{
963					deUint32	iReferenceValue;
964					float		fReferenceValue;
965					switch (channelNdx)
966					{
967						case 0:
968							iReferenceValue = offsetX % 127u;
969							fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
970							break;
971						case 1:
972							iReferenceValue = offsetY % 127u;
973							fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
974							break;
975						case 2:
976							iReferenceValue = offsetZ % 127u;
977							fReferenceValue = static_cast<float>(iReferenceValue) / 127.f;
978							break;
979						case 3:
980							iReferenceValue = (isAlphaOnly ? offsetX % 127u : 1u);
981							fReferenceValue = (isAlphaOnly ? static_cast<float>(iReferenceValue) / 127.f : 1.f);
982							break;
983						default:	DE_FATAL("Unexpected channel index");	break;
984					}
985					float acceptableError = epsilon;
986
987					switch (formatDescription.channels[channelNdx].type)
988					{
989						case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
990						case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
991						{
992							const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
993
994							if (outputValue.x() != iReferenceValue)
995								return tcu::TestStatus::fail("Failed");
996
997							break;
998						}
999						case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
1000						case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
1001						{
1002							float fixedPointError = tcu::TexVerifierUtil::computeFixedPointError(formatDescription.channels[channelNdx].sizeBits);
1003							acceptableError += fixedPointError;
1004							const tcu::Vec4 outputValue = pixelBuffer.getPixel(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
1005
1006							if (deAbs(outputValue.x() - fReferenceValue) > acceptableError)
1007								return tcu::TestStatus::fail("Failed");
1008
1009							break;
1010						}
1011						case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
1012						{
1013							const tcu::Vec4 outputValue = pixelBuffer.getPixel(offsetX * pixelDivider.x(), offsetY * pixelDivider.y(), offsetZ * pixelDivider.z());
1014
1015							if (deAbs( outputValue.x() - fReferenceValue) > acceptableError)
1016								return tcu::TestStatus::fail("Failed");
1017
1018							break;
1019						}
1020						default:	DE_FATAL("Unexpected channel type");	break;
1021					}
1022				}
1023			}
1024		}
1025	}
1026
1027	return tcu::TestStatus::pass("Passed");
1028}
1029
1030TestInstance* ImageSparseResidencyCase::createInstance (Context& context) const
1031{
1032	return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format, m_useDeviceGroups);
1033}
1034
1035std::vector<TestFormat> getSparseResidencyTestFormats (ImageType imageType, bool addExtraFormat)
1036{
1037	auto formats = getTestFormats(imageType);
1038#ifndef CTS_USES_VULKANSC
1039	if (addExtraFormat)
1040		formats.push_back(TestFormat{ VK_FORMAT_A8_UNORM_KHR });
1041#endif // CTS_USES_VULKANSC
1042	return formats;
1043}
1044
1045} // anonymous ns
1046
1047tcu::TestCaseGroup* createImageSparseResidencyTestsCommon (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup> testGroup, const bool useDeviceGroup = false)
1048{
1049	const std::vector<TestImageParameters> imageParameters
1050	{
1051		{ IMAGE_TYPE_2D,			{ tcu::UVec3(512u, 256u,  1u),	tcu::UVec3(1024u, 128u, 1u),	tcu::UVec3(11u,  137u, 1u) },	getSparseResidencyTestFormats(IMAGE_TYPE_2D, !useDeviceGroup) },
1052		{ IMAGE_TYPE_2D_ARRAY,		{ tcu::UVec3(512u, 256u,  6u),	tcu::UVec3(1024u, 128u, 8u),	tcu::UVec3(11u,  137u, 3u) },	getSparseResidencyTestFormats(IMAGE_TYPE_2D_ARRAY, !useDeviceGroup) },
1053		{ IMAGE_TYPE_CUBE,			{ tcu::UVec3(256u, 256u,  1u),	tcu::UVec3(128u,  128u, 1u),	tcu::UVec3(137u, 137u, 1u) },	getSparseResidencyTestFormats(IMAGE_TYPE_CUBE, !useDeviceGroup) },
1054		{ IMAGE_TYPE_CUBE_ARRAY,	{ tcu::UVec3(256u, 256u,  6u),	tcu::UVec3(128u,  128u, 8u),	tcu::UVec3(137u, 137u, 3u) },	getSparseResidencyTestFormats(IMAGE_TYPE_CUBE_ARRAY, !useDeviceGroup) },
1055		{ IMAGE_TYPE_3D,			{ tcu::UVec3(512u, 256u, 16u),	tcu::UVec3(1024u, 128u, 8u),	tcu::UVec3(11u,  137u, 3u) },	getSparseResidencyTestFormats(IMAGE_TYPE_3D, !useDeviceGroup) },
1056	};
1057
1058	for (size_t imageTypeNdx = 0; imageTypeNdx < imageParameters.size(); ++imageTypeNdx)
1059	{
1060		const ImageType					imageType = imageParameters[imageTypeNdx].imageType;
1061		de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str()));
1062
1063		for (size_t formatNdx = 0; formatNdx < imageParameters[imageTypeNdx].formats.size(); ++formatNdx)
1064		{
1065			const VkFormat					format				= imageParameters[imageTypeNdx].formats[formatNdx].format;
1066			tcu::UVec3						imageSizeAlignment	= getImageSizeAlignment(format);
1067			de::MovePtr<tcu::TestCaseGroup> formatGroup			(new tcu::TestCaseGroup(testCtx, getImageFormatID(format).c_str()));
1068
1069			for (size_t imageSizeNdx = 0; imageSizeNdx < imageParameters[imageTypeNdx].imageSizes.size(); ++imageSizeNdx)
1070			{
1071				const tcu::UVec3 imageSize = imageParameters[imageTypeNdx].imageSizes[imageSizeNdx];
1072
1073				// skip test for images with odd sizes for some YCbCr formats
1074				if ((imageSize.x() % imageSizeAlignment.x()) != 0)
1075					continue;
1076				if ((imageSize.y() % imageSizeAlignment.y()) != 0)
1077					continue;
1078
1079				std::ostringstream stream;
1080				stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
1081
1082				formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), imageType, imageSize, format, glu::GLSL_VERSION_440, useDeviceGroup));
1083			}
1084			imageTypeGroup->addChild(formatGroup.release());
1085		}
1086		testGroup->addChild(imageTypeGroup.release());
1087	}
1088
1089	return testGroup.release();
1090}
1091
1092tcu::TestCaseGroup* createImageSparseResidencyTests (tcu::TestContext& testCtx)
1093{
1094	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency"));
1095	return createImageSparseResidencyTestsCommon(testCtx, testGroup);
1096}
1097
1098tcu::TestCaseGroup* createDeviceGroupImageSparseResidencyTests (tcu::TestContext& testCtx)
1099{
1100	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "device_group_image_sparse_residency"));
1101	return createImageSparseResidencyTestsCommon(testCtx, testGroup, true);
1102}
1103
1104} // sparse
1105} // vkt
1106