1/*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.1 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Synchronization Tests
22 *//*--------------------------------------------------------------------*/
23
24#include "es31fSynchronizationTests.hpp"
25#include "tcuTestLog.hpp"
26#include "tcuStringTemplate.hpp"
27#include "tcuSurface.hpp"
28#include "tcuRenderTarget.hpp"
29#include "gluRenderContext.hpp"
30#include "gluShaderProgram.hpp"
31#include "gluObjectWrapper.hpp"
32#include "gluPixelTransfer.hpp"
33#include "gluContextInfo.hpp"
34#include "glwFunctions.hpp"
35#include "glwEnums.hpp"
36#include "deStringUtil.hpp"
37#include "deSharedPtr.hpp"
38#include "deMemory.h"
39#include "deRandom.hpp"
40
41#include <map>
42
43namespace deqp
44{
45namespace gles31
46{
47namespace Functional
48{
49namespace
50{
51
52static bool checkSupport(Context& ctx)
53{
54	auto ctxType = ctx.getRenderContext().getType();
55	return contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
56		   contextSupports(ctxType, glu::ApiType::core(4, 5)) ||
57		   ctx.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic");
58}
59
60static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
61{
62	std::vector<deUint32> chainDelta(valueChain.size());
63
64	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
65		chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
66
67	// chainDelta contains now the actual additions applied to the value
68	// check there exists an addition ramp form 1 to ...
69	std::sort(chainDelta.begin(), chainDelta.end());
70
71	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
72	{
73		if ((int)chainDelta[callNdx] != callNdx+1)
74		{
75			invalidOperationNdx = callNdx;
76			errorDelta = chainDelta[callNdx];
77			errorExpected = callNdx+1;
78
79			return false;
80		}
81	}
82
83	return true;
84}
85
86static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
87{
88	const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
89	GLU_EXPECT_NO_ERROR(gl.getError(), "map");
90
91	if (!ptr)
92		throw tcu::TestError("mapBufferRange returned NULL");
93
94	result.resize(numElements);
95	memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
96
97	if (gl.unmapBuffer(target) == GL_FALSE)
98		throw tcu::TestError("unmapBuffer returned false");
99}
100
101static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
102{
103	std::vector<deUint32> vec;
104
105	readBuffer(gl, target, 1, vec);
106
107	return vec[0];
108}
109
110//! Generate a ramp of values from 1 to numElements, and shuffle it
111void generateShuffledRamp (int numElements, std::vector<int>& ramp)
112{
113	de::Random rng(0xabcd);
114
115	// some positive (non-zero) unique values
116	ramp.resize(numElements);
117	for (int callNdx = 0; callNdx < numElements; ++callNdx)
118		ramp[callNdx] = callNdx + 1;
119
120	rng.shuffle(ramp.begin(), ramp.end());
121}
122
123static std::string specializeShader(Context& context, const char* code)
124{
125	auto					ctxType			= context.getRenderContext().getType();
126	const bool				isES32orGL45	= glu::contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
127											  glu::contextSupports(ctxType, glu::ApiType::core(4, 5));
128	const glu::GLSLVersion	glslVersion		= glu::getContextTypeGLSLVersion(ctxType);
129
130	std::map<std::string, std::string> specializationMap;
131	specializationMap["GLSL_VERSION_DECL"]				= glu::getGLSLVersionDeclaration(glslVersion);
132	specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"]	= isES32orGL45 ? "" : "#extension GL_OES_shader_image_atomic : require";
133
134	return tcu::StringTemplate(code).specialize(specializationMap);
135}
136
137class InterInvocationTestCase : public TestCase
138{
139public:
140	enum StorageType
141	{
142		STORAGE_BUFFER = 0,
143		STORAGE_IMAGE,
144
145		STORAGE_LAST
146	};
147	enum CaseFlags
148	{
149		FLAG_ATOMIC				= 0x1,
150		FLAG_ALIASING_STORAGES	= 0x2,
151		FLAG_IN_GROUP			= 0x4,
152	};
153
154						InterInvocationTestCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
155						~InterInvocationTestCase	(void);
156
157private:
158	void				init						(void);
159	void				deinit						(void);
160	IterateResult		iterate						(void);
161
162	void				runCompute					(void);
163	bool				verifyResults				(void);
164	virtual std::string	genShaderSource				(void) const = 0;
165
166protected:
167	std::string			genBarrierSource			(void) const;
168
169	const StorageType	m_storage;
170	const bool			m_useAtomic;
171	const bool			m_aliasingStorages;
172	const bool			m_syncWithGroup;
173	const int			m_workWidth;				// !< total work width
174	const int			m_workHeight;				// !<     ...    height
175	const int			m_localWidth;				// !< group width
176	const int			m_localHeight;				// !< group height
177	const int			m_elementsPerInvocation;	// !< elements accessed by a single invocation
178
179private:
180	glw::GLuint			m_storageBuf;
181	glw::GLuint			m_storageTex;
182	glw::GLuint			m_resultBuf;
183	glu::ShaderProgram*	m_program;
184};
185
186InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
187	: TestCase					(context, name, desc)
188	, m_storage					(storage)
189	, m_useAtomic				((flags & FLAG_ATOMIC) != 0)
190	, m_aliasingStorages		((flags & FLAG_ALIASING_STORAGES) != 0)
191	, m_syncWithGroup			((flags & FLAG_IN_GROUP) != 0)
192	, m_workWidth				(256)
193	, m_workHeight				(256)
194	, m_localWidth				(16)
195	, m_localHeight				(8)
196	, m_elementsPerInvocation	(8)
197	, m_storageBuf				(0)
198	, m_storageTex				(0)
199	, m_resultBuf				(0)
200	, m_program					(DE_NULL)
201{
202	DE_ASSERT(m_storage < STORAGE_LAST);
203	DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
204}
205
206InterInvocationTestCase::~InterInvocationTestCase (void)
207{
208	deinit();
209}
210
211void InterInvocationTestCase::init (void)
212{
213	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
214
215	// requirements
216
217	if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
218		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
219
220	// program
221
222	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
223	m_testCtx.getLog() << *m_program;
224	if (!m_program->isOk())
225		throw tcu::TestError("could not build program");
226
227	// source
228
229	if (m_storage == STORAGE_BUFFER)
230	{
231		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
232		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
233		std::vector<deUint32>	zeroBuffer		(bufferElements, 0);
234
235		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
236
237		gl.genBuffers(1, &m_storageBuf);
238		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
239		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
240		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
241	}
242	else if (m_storage == STORAGE_IMAGE)
243	{
244		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
245		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
246
247		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
248
249		gl.genTextures(1, &m_storageTex);
250		gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
251		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
252		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
253		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
254		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
255
256		// Zero-fill
257		m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
258
259		{
260			const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
261			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
262			GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
263		}
264	}
265	else
266		DE_ASSERT(DE_FALSE);
267
268	// destination
269
270	{
271		const int				bufferElements	= m_workWidth * m_workHeight;
272		const int				bufferSize		= bufferElements * (int)sizeof(deUint32);
273		std::vector<deInt32>	negativeBuffer	(bufferElements, -1);
274
275		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
276
277		gl.genBuffers(1, &m_resultBuf);
278		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
279		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
280		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
281	}
282}
283
284void InterInvocationTestCase::deinit (void)
285{
286	if (m_storageBuf)
287	{
288		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
289		m_storageBuf = DE_NULL;
290	}
291
292	if (m_storageTex)
293	{
294		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
295		m_storageTex = DE_NULL;
296	}
297
298	if (m_resultBuf)
299	{
300		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
301		m_resultBuf = DE_NULL;
302	}
303
304	delete m_program;
305	m_program = DE_NULL;
306}
307
308InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
309{
310	// Dispatch
311	runCompute();
312
313	// Verify buffer contents
314	if (verifyResults())
315		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
316	else
317		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
318
319	return STOP;
320}
321
322void InterInvocationTestCase::runCompute (void)
323{
324	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
325	const int				groupsX	= m_workWidth / m_localWidth;
326	const int				groupsY	= m_workHeight / m_localHeight;
327
328	DE_ASSERT((m_workWidth % m_localWidth) == 0);
329	DE_ASSERT((m_workHeight % m_localHeight) == 0);
330
331	m_testCtx.getLog()
332		<< tcu::TestLog::Message
333		<< "Dispatching compute.\n"
334		<< "	group size: " << m_localWidth << "x" << m_localHeight << "\n"
335		<< "	dispatch size: " << groupsX << "x" << groupsY << "\n"
336		<< "	total work size: " << m_workWidth << "x" << m_workHeight << "\n"
337		<< tcu::TestLog::EndMessage;
338
339	gl.useProgram(m_program->getProgram());
340
341	// source
342	if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
343	{
344		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
345		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
346	}
347	else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
348	{
349		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
350		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
351		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
352
353		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
354	}
355	else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
356	{
357		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
358		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
359	}
360	else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
361	{
362		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
363		gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
364
365		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
366
367		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
368	}
369	else
370		DE_ASSERT(DE_FALSE);
371
372	// destination
373	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
374	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
375
376	// dispatch
377	gl.dispatchCompute(groupsX, groupsY, 1);
378	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
379}
380
381bool InterInvocationTestCase::verifyResults (void)
382{
383	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
384	const int				errorFloodThreshold	= 5;
385	int						numErrorsLogged		= 0;
386	const void*				mapped				= DE_NULL;
387	std::vector<deInt32>	results				(m_workWidth * m_workHeight);
388	bool					error				= false;
389
390	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
391	gl.memoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
392	mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
393	GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
394
395	// copy to properly aligned array
396	deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
397
398	if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
399		throw tcu::TestError("memory map store corrupted");
400
401	// check the results
402	for (int ndx = 0; ndx < (int)results.size(); ++ndx)
403	{
404		if (results[ndx] != 1)
405		{
406			error = true;
407
408			if (numErrorsLogged == 0)
409				m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
410			if (numErrorsLogged++ < errorFloodThreshold)
411				m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
412			else
413			{
414				// after N errors, no point continuing verification
415				m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
416				break;
417			}
418		}
419	}
420
421	if (!error)
422		m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
423	return !error;
424}
425
426std::string InterInvocationTestCase::genBarrierSource (void) const
427{
428	std::ostringstream buf;
429
430	if (m_syncWithGroup)
431	{
432		// Wait until all invocations in this work group have their texture/buffer read/write operations complete
433		// \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
434		//       we only require intra-workgroup synchronization.
435		buf << "\n"
436			<< "	groupMemoryBarrier();\n"
437			<< "	barrier();\n"
438			<< "\n";
439	}
440	else if (m_storage == STORAGE_BUFFER)
441	{
442		DE_ASSERT(!m_syncWithGroup);
443
444		// Waiting only for data written by this invocation. Since all buffer reads and writes are
445		// processed in order (within a single invocation), we don't have to do anything.
446		buf << "\n";
447	}
448	else if (m_storage == STORAGE_IMAGE)
449	{
450		DE_ASSERT(!m_syncWithGroup);
451
452		// Waiting only for data written by this invocation. But since operations complete in undefined
453		// order, we have to wait for them to complete.
454		buf << "\n"
455			<< "	memoryBarrierImage();\n"
456			<< "\n";
457	}
458	else
459		DE_ASSERT(DE_FALSE);
460
461	return buf.str();
462}
463
464class InvocationBasicCase : public InterInvocationTestCase
465{
466public:
467							InvocationBasicCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
468private:
469	std::string				genShaderSource			(void) const;
470	virtual std::string		genShaderMainBlock		(void) const = 0;
471};
472
473InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
474	: InterInvocationTestCase(context, name, desc, storage, flags)
475{
476}
477
478std::string InvocationBasicCase::genShaderSource (void) const
479{
480	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
481	std::ostringstream	buf;
482
483	buf << "${GLSL_VERSION_DECL}\n"
484		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
485		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
486		<< "layout(binding=0, std430) buffer Output\n"
487		<< "{\n"
488		<< "	highp int values[];\n"
489		<< "} sb_result;\n";
490
491	if (m_storage == STORAGE_BUFFER)
492		buf << "layout(binding=1, std430) coherent buffer Storage\n"
493			<< "{\n"
494			<< "	highp int values[];\n"
495			<< "} sb_store;\n"
496			<< "\n"
497			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
498			<< "{\n"
499			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
500			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
501			<< "}\n";
502	else if (m_storage == STORAGE_IMAGE)
503		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
504			<< "\n"
505			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
506			<< "{\n"
507			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
508			<< "}\n";
509	else
510		DE_ASSERT(DE_FALSE);
511
512	buf << "\n"
513		<< "void main (void)\n"
514		<< "{\n"
515		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
516		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
517		<< "	bool allOk      = true;\n"
518		<< "\n"
519		<< genShaderMainBlock()
520		<< "\n"
521		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
522		<< "}\n";
523
524	return specializeShader(m_context, buf.str().c_str());
525}
526
527class InvocationWriteReadCase : public InvocationBasicCase
528{
529public:
530					InvocationWriteReadCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
531private:
532	std::string		genShaderMainBlock			(void) const;
533};
534
535InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
536	: InvocationBasicCase(context, name, desc, storage, flags)
537{
538}
539
540std::string InvocationWriteReadCase::genShaderMainBlock (void) const
541{
542	std::ostringstream buf;
543
544	// write
545
546	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
547	{
548		if (m_storage == STORAGE_BUFFER && m_useAtomic)
549			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
550		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
551			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
552		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
553			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
554		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
555			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
556		else
557			DE_ASSERT(DE_FALSE);
558	}
559
560	// barrier
561
562	buf << genBarrierSource();
563
564	// read
565
566	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
567	{
568		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
569
570		if (m_storage == STORAGE_BUFFER && m_useAtomic)
571			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
572		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
573			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
574		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
575			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
576		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
577			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
578		else
579			DE_ASSERT(DE_FALSE);
580	}
581
582	return buf.str();
583}
584
585class InvocationReadWriteCase : public InvocationBasicCase
586{
587public:
588					InvocationReadWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
589private:
590	std::string		genShaderMainBlock			(void) const;
591};
592
593InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
594	: InvocationBasicCase(context, name, desc, storage, flags)
595{
596}
597
598std::string InvocationReadWriteCase::genShaderMainBlock (void) const
599{
600	std::ostringstream buf;
601
602	// read
603
604	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
605	{
606		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
607
608		if (m_storage == STORAGE_BUFFER && m_useAtomic)
609			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
610		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
611			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
612		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
613			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
614		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
615			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
616		else
617			DE_ASSERT(DE_FALSE);
618	}
619
620	// barrier
621
622	buf << genBarrierSource();
623
624	// write
625
626	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
627	{
628		if (m_storage == STORAGE_BUFFER && m_useAtomic)
629			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
630		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
631			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
632		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
633			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
634		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
635			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
636		else
637			DE_ASSERT(DE_FALSE);
638	}
639
640	return buf.str();
641}
642
643class InvocationOverWriteCase : public InvocationBasicCase
644{
645public:
646					InvocationOverWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
647private:
648	std::string		genShaderMainBlock			(void) const;
649};
650
651InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
652	: InvocationBasicCase(context, name, desc, storage, flags)
653{
654}
655
656std::string InvocationOverWriteCase::genShaderMainBlock (void) const
657{
658	std::ostringstream buf;
659
660	// write
661
662	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
663	{
664		if (m_storage == STORAGE_BUFFER && m_useAtomic)
665			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
666		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
667			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
668		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
669			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
670		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
671			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
672		else
673			DE_ASSERT(DE_FALSE);
674	}
675
676	// barrier
677
678	buf << genBarrierSource();
679
680	// write over
681
682	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
683	{
684		// write another invocation's value or our own value depending on test type
685		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
686
687		if (m_storage == STORAGE_BUFFER && m_useAtomic)
688			buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
689		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
690			buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
691		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
692			buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
693		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
694			buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
695		else
696			DE_ASSERT(DE_FALSE);
697	}
698
699	// barrier
700
701	buf << genBarrierSource();
702
703	// read
704
705	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
706	{
707		// check another invocation's value or our own value depending on test type
708		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
709
710		if (m_storage == STORAGE_BUFFER && m_useAtomic)
711			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
712		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
713			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
714		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
715			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
716		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
717			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
718		else
719			DE_ASSERT(DE_FALSE);
720	}
721
722	return buf.str();
723}
724
725class InvocationAliasWriteCase : public InterInvocationTestCase
726{
727public:
728	enum TestType
729	{
730		TYPE_WRITE = 0,
731		TYPE_OVERWRITE,
732
733		TYPE_LAST
734	};
735
736					InvocationAliasWriteCase	(Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
737private:
738	std::string		genShaderSource				(void) const;
739
740	const TestType	m_type;
741};
742
743InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
744	: InterInvocationTestCase	(context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
745	, m_type					(type)
746{
747	DE_ASSERT(type < TYPE_LAST);
748}
749
750std::string InvocationAliasWriteCase::genShaderSource (void) const
751{
752	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
753	std::ostringstream	buf;
754
755	buf << "${GLSL_VERSION_DECL}\n"
756		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
757		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
758		<< "layout(binding=0, std430) buffer Output\n"
759		<< "{\n"
760		<< "	highp int values[];\n"
761		<< "} sb_result;\n";
762
763	if (m_storage == STORAGE_BUFFER)
764		buf << "layout(binding=1, std430) coherent buffer Storage0\n"
765			<< "{\n"
766			<< "	highp int values[];\n"
767			<< "} sb_store0;\n"
768			<< "layout(binding=2, std430) coherent buffer Storage1\n"
769			<< "{\n"
770			<< "	highp int values[];\n"
771			<< "} sb_store1;\n"
772			<< "\n"
773			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
774			<< "{\n"
775			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
776			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
777			<< "}\n";
778	else if (m_storage == STORAGE_IMAGE)
779		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
780			<< "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
781			<< "\n"
782			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
783			<< "{\n"
784			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
785			<< "}\n";
786	else
787		DE_ASSERT(DE_FALSE);
788
789	buf << "\n"
790		<< "void main (void)\n"
791		<< "{\n"
792		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
793		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
794		<< "	bool allOk      = true;\n"
795		<< "\n";
796
797	if (m_type == TYPE_OVERWRITE)
798	{
799		// write
800
801		for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
802		{
803			if (m_storage == STORAGE_BUFFER && m_useAtomic)
804				buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
805			else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
806				buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
807			else if (m_storage == STORAGE_IMAGE && m_useAtomic)
808				buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
809			else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
810				buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
811			else
812				DE_ASSERT(DE_FALSE);
813		}
814
815		// barrier
816
817		buf << genBarrierSource();
818	}
819	else
820		DE_ASSERT(m_type == TYPE_WRITE);
821
822	// write (again)
823
824	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
825	{
826		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
827
828		if (m_storage == STORAGE_BUFFER && m_useAtomic)
829			buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
830		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
831			buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
832		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
833			buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
834		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
835			buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
836		else
837			DE_ASSERT(DE_FALSE);
838	}
839
840	// barrier
841
842	buf << genBarrierSource();
843
844	// read
845
846	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
847	{
848		if (m_storage == STORAGE_BUFFER && m_useAtomic)
849			buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
850		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
851			buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
852		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
853			buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
854		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
855			buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
856		else
857			DE_ASSERT(DE_FALSE);
858	}
859
860	// return result
861
862	buf << "\n"
863		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
864		<< "}\n";
865
866	return specializeShader(m_context, buf.str().c_str());
867}
868
869namespace op
870{
871
872struct WriteData
873{
874	int targetHandle;
875	int seed;
876
877	static WriteData Generate(int targetHandle, int seed)
878	{
879		WriteData retVal;
880
881		retVal.targetHandle = targetHandle;
882		retVal.seed = seed;
883
884		return retVal;
885	}
886};
887
888struct ReadData
889{
890	int targetHandle;
891	int seed;
892
893	static ReadData Generate(int targetHandle, int seed)
894	{
895		ReadData retVal;
896
897		retVal.targetHandle = targetHandle;
898		retVal.seed = seed;
899
900		return retVal;
901	}
902};
903
904struct Barrier
905{
906};
907
908struct WriteDataInterleaved
909{
910	int		targetHandle;
911	int		seed;
912	bool	evenOdd;
913
914	static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
915	{
916		WriteDataInterleaved retVal;
917
918		retVal.targetHandle = targetHandle;
919		retVal.seed = seed;
920		retVal.evenOdd = evenOdd;
921
922		return retVal;
923	}
924};
925
926struct ReadDataInterleaved
927{
928	int targetHandle;
929	int seed0;
930	int seed1;
931
932	static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
933	{
934		ReadDataInterleaved retVal;
935
936		retVal.targetHandle = targetHandle;
937		retVal.seed0 = seed0;
938		retVal.seed1 = seed1;
939
940		return retVal;
941	}
942};
943
944struct ReadMultipleData
945{
946	int targetHandle0;
947	int seed0;
948	int targetHandle1;
949	int seed1;
950
951	static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
952	{
953		ReadMultipleData retVal;
954
955		retVal.targetHandle0 = targetHandle0;
956		retVal.seed0 = seed0;
957		retVal.targetHandle1 = targetHandle1;
958		retVal.seed1 = seed1;
959
960		return retVal;
961	}
962};
963
964struct ReadZeroData
965{
966	int targetHandle;
967
968	static ReadZeroData Generate(int targetHandle)
969	{
970		ReadZeroData retVal;
971
972		retVal.targetHandle = targetHandle;
973
974		return retVal;
975	}
976};
977
978} // namespace op
979
980class InterCallTestCase;
981
982class InterCallOperations
983{
984public:
985	InterCallOperations& operator<< (const op::WriteData&);
986	InterCallOperations& operator<< (const op::ReadData&);
987	InterCallOperations& operator<< (const op::Barrier&);
988	InterCallOperations& operator<< (const op::ReadMultipleData&);
989	InterCallOperations& operator<< (const op::WriteDataInterleaved&);
990	InterCallOperations& operator<< (const op::ReadDataInterleaved&);
991	InterCallOperations& operator<< (const op::ReadZeroData&);
992
993private:
994	struct Command
995	{
996		enum CommandType
997		{
998			TYPE_WRITE = 0,
999			TYPE_READ,
1000			TYPE_BARRIER,
1001			TYPE_READ_MULTIPLE,
1002			TYPE_WRITE_INTERLEAVE,
1003			TYPE_READ_INTERLEAVE,
1004			TYPE_READ_ZERO,
1005
1006			TYPE_LAST
1007		};
1008
1009		CommandType type;
1010
1011		union CommandUnion
1012		{
1013			op::WriteData				write;
1014			op::ReadData				read;
1015			op::Barrier					barrier;
1016			op::ReadMultipleData		readMulti;
1017			op::WriteDataInterleaved	writeInterleave;
1018			op::ReadDataInterleaved		readInterleave;
1019			op::ReadZeroData			readZero;
1020		} u_cmd;
1021	};
1022
1023	friend class InterCallTestCase;
1024
1025	std::vector<Command> m_cmds;
1026};
1027
1028InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
1029{
1030	m_cmds.push_back(Command());
1031	m_cmds.back().type = Command::TYPE_WRITE;
1032	m_cmds.back().u_cmd.write = cmd;
1033
1034	return *this;
1035}
1036
1037InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
1038{
1039	m_cmds.push_back(Command());
1040	m_cmds.back().type = Command::TYPE_READ;
1041	m_cmds.back().u_cmd.read = cmd;
1042
1043	return *this;
1044}
1045
1046InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
1047{
1048	m_cmds.push_back(Command());
1049	m_cmds.back().type = Command::TYPE_BARRIER;
1050	m_cmds.back().u_cmd.barrier = cmd;
1051
1052	return *this;
1053}
1054
1055InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
1056{
1057	m_cmds.push_back(Command());
1058	m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
1059	m_cmds.back().u_cmd.readMulti = cmd;
1060
1061	return *this;
1062}
1063
1064InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
1065{
1066	m_cmds.push_back(Command());
1067	m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
1068	m_cmds.back().u_cmd.writeInterleave = cmd;
1069
1070	return *this;
1071}
1072
1073InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
1074{
1075	m_cmds.push_back(Command());
1076	m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
1077	m_cmds.back().u_cmd.readInterleave = cmd;
1078
1079	return *this;
1080}
1081
1082InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
1083{
1084	m_cmds.push_back(Command());
1085	m_cmds.back().type = Command::TYPE_READ_ZERO;
1086	m_cmds.back().u_cmd.readZero = cmd;
1087
1088	return *this;
1089}
1090
1091class InterCallTestCase : public TestCase
1092{
1093public:
1094	enum StorageType
1095	{
1096		STORAGE_BUFFER = 0,
1097		STORAGE_IMAGE,
1098
1099		STORAGE_LAST
1100	};
1101	enum Flags
1102	{
1103		FLAG_USE_ATOMIC	= 1,
1104		FLAG_USE_INT	= 2,
1105	};
1106													InterCallTestCase			(Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
1107													~InterCallTestCase			(void);
1108
1109private:
1110	void											init						(void);
1111	void											deinit						(void);
1112	IterateResult									iterate						(void);
1113	bool											verifyResults				(void);
1114
1115	void											runCommand					(const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
1116	void											runCommand					(const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1117	void											runCommand					(const op::Barrier&);
1118	void											runCommand					(const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1119	void											runCommand					(const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
1120	void											runCommand					(const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1121	void											runCommand					(const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1122	void											runSingleRead				(int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
1123
1124	glw::GLuint										genStorage					(int friendlyName);
1125	glw::GLuint										genResultStorage			(void);
1126	glu::ShaderProgram*								genWriteProgram				(int seed);
1127	glu::ShaderProgram*								genReadProgram				(int seed);
1128	glu::ShaderProgram*								genReadMultipleProgram		(int seed0, int seed1);
1129	glu::ShaderProgram*								genWriteInterleavedProgram	(int seed, bool evenOdd);
1130	glu::ShaderProgram*								genReadInterleavedProgram	(int seed0, int seed1);
1131	glu::ShaderProgram*								genReadZeroProgram			(void);
1132
1133	const StorageType								m_storage;
1134	const int										m_invocationGridSize;	// !< width and height of the two dimensional work dispatch
1135	const int										m_perInvocationSize;	// !< number of elements accessed in single invocation
1136	const std::vector<InterCallOperations::Command>	m_cmds;
1137	const bool										m_useAtomic;
1138	const bool										m_formatInteger;
1139
1140	std::vector<glu::ShaderProgram*>				m_operationPrograms;
1141	std::vector<glw::GLuint>						m_operationResultStorages;
1142	std::map<int, glw::GLuint>						m_storageIDs;
1143};
1144
1145InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
1146	: TestCase					(context, name, desc)
1147	, m_storage					(storage)
1148	, m_invocationGridSize		(512)
1149	, m_perInvocationSize		(2)
1150	, m_cmds					(ops.m_cmds)
1151	, m_useAtomic				((flags & FLAG_USE_ATOMIC) != 0)
1152	, m_formatInteger			((flags & FLAG_USE_INT) != 0)
1153{
1154}
1155
1156InterCallTestCase::~InterCallTestCase (void)
1157{
1158	deinit();
1159}
1160
1161void InterCallTestCase::init (void)
1162{
1163	int			programFriendlyName = 0;
1164
1165	// requirements
1166
1167	if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
1168		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
1169
1170	// generate resources and validate command list
1171
1172	m_operationPrograms.resize(m_cmds.size(), DE_NULL);
1173	m_operationResultStorages.resize(m_cmds.size(), 0);
1174
1175	for (int step = 0; step < (int)m_cmds.size(); ++step)
1176	{
1177		switch (m_cmds[step].type)
1178		{
1179			case InterCallOperations::Command::TYPE_WRITE:
1180			{
1181				const op::WriteData& cmd = m_cmds[step].u_cmd.write;
1182
1183				// new storage handle?
1184				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1185					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1186
1187				// program
1188				{
1189					glu::ShaderProgram* program = genWriteProgram(cmd.seed);
1190
1191					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1192					m_testCtx.getLog() << *program;
1193
1194					if (!program->isOk())
1195						throw tcu::TestError("could not build program");
1196
1197					m_operationPrograms[step] = program;
1198				}
1199				break;
1200			}
1201
1202			case InterCallOperations::Command::TYPE_READ:
1203			{
1204				const op::ReadData& cmd = m_cmds[step].u_cmd.read;
1205				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1206
1207				// program and result storage
1208				{
1209					glu::ShaderProgram* program = genReadProgram(cmd.seed);
1210
1211					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1212					m_testCtx.getLog() << *program;
1213
1214					if (!program->isOk())
1215						throw tcu::TestError("could not build program");
1216
1217					m_operationPrograms[step] = program;
1218					m_operationResultStorages[step] = genResultStorage();
1219				}
1220				break;
1221			}
1222
1223			case InterCallOperations::Command::TYPE_BARRIER:
1224			{
1225				break;
1226			}
1227
1228			case InterCallOperations::Command::TYPE_READ_MULTIPLE:
1229			{
1230				const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
1231				DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
1232				DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
1233
1234				// program
1235				{
1236					glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
1237
1238					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1239					m_testCtx.getLog() << *program;
1240
1241					if (!program->isOk())
1242						throw tcu::TestError("could not build program");
1243
1244					m_operationPrograms[step] = program;
1245					m_operationResultStorages[step] = genResultStorage();
1246				}
1247				break;
1248			}
1249
1250			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
1251			{
1252				const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
1253
1254				// new storage handle?
1255				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1256					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1257
1258				// program
1259				{
1260					glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
1261
1262					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1263					m_testCtx.getLog() << *program;
1264
1265					if (!program->isOk())
1266						throw tcu::TestError("could not build program");
1267
1268					m_operationPrograms[step] = program;
1269				}
1270				break;
1271			}
1272
1273			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
1274			{
1275				const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
1276				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
1277
1278				// program
1279				{
1280					glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
1281
1282					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1283					m_testCtx.getLog() << *program;
1284
1285					if (!program->isOk())
1286						throw tcu::TestError("could not build program");
1287
1288					m_operationPrograms[step] = program;
1289					m_operationResultStorages[step] = genResultStorage();
1290				}
1291				break;
1292			}
1293
1294			case InterCallOperations::Command::TYPE_READ_ZERO:
1295			{
1296				const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
1297
1298				// new storage handle?
1299				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
1300					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
1301
1302				// program
1303				{
1304					glu::ShaderProgram* program = genReadZeroProgram();
1305
1306					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
1307					m_testCtx.getLog() << *program;
1308
1309					if (!program->isOk())
1310						throw tcu::TestError("could not build program");
1311
1312					m_operationPrograms[step] = program;
1313					m_operationResultStorages[step] = genResultStorage();
1314				}
1315				break;
1316			}
1317
1318			default:
1319				DE_ASSERT(DE_FALSE);
1320		}
1321	}
1322}
1323
1324void InterCallTestCase::deinit (void)
1325{
1326	// programs
1327	for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
1328		delete m_operationPrograms[ndx];
1329	m_operationPrograms.clear();
1330
1331	// result storages
1332	for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
1333	{
1334		if (m_operationResultStorages[ndx])
1335			m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
1336	}
1337	m_operationResultStorages.clear();
1338
1339	// storage
1340	for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
1341	{
1342		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1343
1344		if (m_storage == STORAGE_BUFFER)
1345			gl.deleteBuffers(1, &it->second);
1346		else if (m_storage == STORAGE_IMAGE)
1347			gl.deleteTextures(1, &it->second);
1348		else
1349			DE_ASSERT(DE_FALSE);
1350	}
1351	m_storageIDs.clear();
1352}
1353
1354InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
1355{
1356	int programFriendlyName			= 0;
1357	int resultStorageFriendlyName	= 0;
1358
1359	m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
1360
1361	// run steps
1362
1363	for (int step = 0; step < (int)m_cmds.size(); ++step)
1364	{
1365		switch (m_cmds[step].type)
1366		{
1367			case InterCallOperations::Command::TYPE_WRITE:				runCommand(m_cmds[step].u_cmd.write,			step,	programFriendlyName);								break;
1368			case InterCallOperations::Command::TYPE_READ:				runCommand(m_cmds[step].u_cmd.read,				step,	programFriendlyName, resultStorageFriendlyName);	break;
1369			case InterCallOperations::Command::TYPE_BARRIER:			runCommand(m_cmds[step].u_cmd.barrier);																		break;
1370			case InterCallOperations::Command::TYPE_READ_MULTIPLE:		runCommand(m_cmds[step].u_cmd.readMulti,		step,	programFriendlyName, resultStorageFriendlyName);	break;
1371			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.writeInterleave,	step,	programFriendlyName);								break;
1372			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.readInterleave,	step,	programFriendlyName, resultStorageFriendlyName);	break;
1373			case InterCallOperations::Command::TYPE_READ_ZERO:			runCommand(m_cmds[step].u_cmd.readZero,			step,	programFriendlyName, resultStorageFriendlyName);	break;
1374			default:
1375				DE_ASSERT(DE_FALSE);
1376		}
1377	}
1378
1379	// read results from result buffers
1380	if (verifyResults())
1381		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1382	else
1383		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
1384
1385	return STOP;
1386}
1387
1388bool InterCallTestCase::verifyResults (void)
1389{
1390	int		resultBufferFriendlyName	= 0;
1391	bool	allResultsOk				= true;
1392	bool	anyResult					= false;
1393
1394	m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
1395
1396	for (int step = 0; step < (int)m_cmds.size(); ++step)
1397	{
1398		const int	errorFloodThreshold	= 5;
1399		int			numErrorsLogged		= 0;
1400
1401		if (m_operationResultStorages[step])
1402		{
1403			const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1404			const void*				mapped	= DE_NULL;
1405			std::vector<deInt32>	results	(m_invocationGridSize * m_invocationGridSize);
1406			bool					error	= false;
1407
1408			anyResult = true;
1409
1410			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
1411			mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
1412			GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
1413
1414			// copy to properly aligned array
1415			deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
1416
1417			if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
1418				throw tcu::TestError("memory map store corrupted");
1419
1420			// check the results
1421			for (int ndx = 0; ndx < (int)results.size(); ++ndx)
1422			{
1423				if (results[ndx] != 1)
1424				{
1425					error = true;
1426
1427					if (numErrorsLogged == 0)
1428						m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
1429					if (numErrorsLogged++ < errorFloodThreshold)
1430						m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
1431					else
1432					{
1433						// after N errors, no point continuing verification
1434						m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
1435						break;
1436					}
1437				}
1438			}
1439
1440			if (error)
1441			{
1442				allResultsOk = false;
1443			}
1444			else
1445				m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
1446		}
1447	}
1448
1449	DE_ASSERT(anyResult);
1450	DE_UNREF(anyResult);
1451
1452	return allResultsOk;
1453}
1454
1455void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
1456{
1457	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1458
1459	m_testCtx.getLog()
1460		<< tcu::TestLog::Message
1461		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1462		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1463		<< tcu::TestLog::EndMessage;
1464
1465	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1466
1467	// set destination
1468	if (m_storage == STORAGE_BUFFER)
1469	{
1470		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1471
1472		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1473		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1474	}
1475	else if (m_storage == STORAGE_IMAGE)
1476	{
1477		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1478
1479		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1480		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1481	}
1482	else
1483		DE_ASSERT(DE_FALSE);
1484
1485	// calc
1486	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1487	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1488}
1489
1490void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1491{
1492	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1493}
1494
1495void InterCallTestCase::runCommand (const op::Barrier& cmd)
1496{
1497	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1498
1499	DE_UNREF(cmd);
1500
1501	if (m_storage == STORAGE_BUFFER)
1502	{
1503		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
1504		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
1505	}
1506	else if (m_storage == STORAGE_IMAGE)
1507	{
1508		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
1509		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1510	}
1511	else
1512		DE_ASSERT(DE_FALSE);
1513}
1514
1515void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1516{
1517	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1518
1519	m_testCtx.getLog()
1520		<< tcu::TestLog::Message
1521		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
1522		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1523		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1524		<< tcu::TestLog::EndMessage;
1525
1526	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1527
1528	// set sources
1529	if (m_storage == STORAGE_BUFFER)
1530	{
1531		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1532		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1533
1534		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
1535		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
1536		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
1537	}
1538	else if (m_storage == STORAGE_IMAGE)
1539	{
1540		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
1541		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
1542
1543		gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1544		gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1545		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
1546	}
1547	else
1548		DE_ASSERT(DE_FALSE);
1549
1550	// set destination
1551	DE_ASSERT(m_operationResultStorages[stepNdx]);
1552	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1553	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1554
1555	// calc
1556	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1557	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
1558}
1559
1560void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
1561{
1562	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1563
1564	m_testCtx.getLog()
1565		<< tcu::TestLog::Message
1566		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
1567		<< "	Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
1568		<< "	Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
1569		<< tcu::TestLog::EndMessage;
1570
1571	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1572
1573	// set destination
1574	if (m_storage == STORAGE_BUFFER)
1575	{
1576		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1577
1578		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
1579		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
1580	}
1581	else if (m_storage == STORAGE_IMAGE)
1582	{
1583		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
1584
1585		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1586		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
1587	}
1588	else
1589		DE_ASSERT(DE_FALSE);
1590
1591	// calc
1592	gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
1593	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
1594}
1595
1596void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1597{
1598	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1599}
1600
1601void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1602{
1603	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
1604}
1605
1606void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
1607{
1608	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1609
1610	m_testCtx.getLog()
1611		<< tcu::TestLog::Message
1612		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
1613		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
1614		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
1615		<< tcu::TestLog::EndMessage;
1616
1617	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
1618
1619	// set source
1620	if (m_storage == STORAGE_BUFFER)
1621	{
1622		DE_ASSERT(m_storageIDs[targetHandle]);
1623
1624		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
1625		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
1626	}
1627	else if (m_storage == STORAGE_IMAGE)
1628	{
1629		DE_ASSERT(m_storageIDs[targetHandle]);
1630
1631		gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
1632		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
1633	}
1634	else
1635		DE_ASSERT(DE_FALSE);
1636
1637	// set destination
1638	DE_ASSERT(m_operationResultStorages[stepNdx]);
1639	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
1640	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
1641
1642	// calc
1643	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
1644	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
1645}
1646
1647glw::GLuint InterCallTestCase::genStorage (int friendlyName)
1648{
1649	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
1650
1651	if (m_storage == STORAGE_BUFFER)
1652	{
1653		const int		numElements		= m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
1654		const int		bufferSize		= numElements * (int)((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
1655		glw::GLuint		retVal			= 0;
1656
1657		m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
1658
1659		gl.genBuffers(1, &retVal);
1660		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1661
1662		if (m_formatInteger)
1663		{
1664			const std::vector<deUint32> zeroBuffer(numElements, 0);
1665			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1666		}
1667		else
1668		{
1669			const std::vector<float> zeroBuffer(numElements, 0.0f);
1670			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
1671		}
1672		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1673
1674		return retVal;
1675	}
1676	else if (m_storage == STORAGE_IMAGE)
1677	{
1678		const int	imageWidth	= m_invocationGridSize;
1679		const int	imageHeight	= m_invocationGridSize * m_perInvocationSize;
1680		glw::GLuint	retVal		= 0;
1681
1682		m_testCtx.getLog()
1683			<< tcu::TestLog::Message
1684			<< "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
1685			<< ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
1686			<< ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
1687			<< tcu::TestLog::EndMessage;
1688
1689		gl.genTextures(1, &retVal);
1690		gl.bindTexture(GL_TEXTURE_2D, retVal);
1691
1692		if (m_formatInteger)
1693			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
1694		else
1695			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
1696
1697		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1698		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1699		GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
1700
1701		m_testCtx.getLog()
1702			<< tcu::TestLog::Message
1703			<< "Filling image with 0"
1704			<< tcu::TestLog::EndMessage;
1705
1706		if (m_formatInteger)
1707		{
1708			const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
1709			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
1710		}
1711		else
1712		{
1713			const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
1714			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
1715		}
1716
1717		GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
1718
1719		return retVal;
1720	}
1721	else
1722	{
1723		DE_ASSERT(DE_FALSE);
1724		return 0;
1725	}
1726}
1727
1728glw::GLuint InterCallTestCase::genResultStorage (void)
1729{
1730	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
1731	glw::GLuint				retVal	= 0;
1732
1733	gl.genBuffers(1, &retVal);
1734	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
1735	gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
1736	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
1737
1738	return retVal;
1739}
1740
1741glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
1742{
1743	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1744	std::ostringstream	buf;
1745
1746	buf << "${GLSL_VERSION_DECL}\n"
1747		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1748		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1749
1750	if (m_storage == STORAGE_BUFFER)
1751		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1752			<< "{\n"
1753			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1754			<< "} sb_out;\n";
1755	else if (m_storage == STORAGE_IMAGE)
1756		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1757	else
1758		DE_ASSERT(DE_FALSE);
1759
1760	buf << "\n"
1761		<< "void main (void)\n"
1762		<< "{\n"
1763		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1764		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1765		<< "\n";
1766
1767	// Write to buffer/image m_perInvocationSize elements
1768	if (m_storage == STORAGE_BUFFER)
1769	{
1770		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1771		{
1772			if (m_useAtomic)
1773				buf << "	atomicExchange(";
1774			else
1775				buf << "	";
1776
1777			buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
1778
1779			if (m_useAtomic)
1780				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1781			else
1782				buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1783		}
1784	}
1785	else if (m_storage == STORAGE_IMAGE)
1786	{
1787		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1788		{
1789			if (m_useAtomic)
1790				buf << "	imageAtomicExchange";
1791			else
1792				buf << "	imageStore";
1793
1794			buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1795
1796			if (m_useAtomic)
1797				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1798			else
1799				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
1800		}
1801	}
1802	else
1803		DE_ASSERT(DE_FALSE);
1804
1805	buf << "}\n";
1806
1807	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1808}
1809
1810glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
1811{
1812	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1813	std::ostringstream	buf;
1814
1815	buf << "${GLSL_VERSION_DECL}\n"
1816		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1817		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1818
1819	if (m_storage == STORAGE_BUFFER)
1820		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1821			<< "{\n"
1822			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1823			<< "} sb_in;\n";
1824	else if (m_storage == STORAGE_IMAGE)
1825		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
1826	else
1827		DE_ASSERT(DE_FALSE);
1828
1829	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1830		<< "{\n"
1831		<< "	highp int resultOk[];\n"
1832		<< "} sb_result;\n"
1833		<< "\n"
1834		<< "void main (void)\n"
1835		<< "{\n"
1836		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1837		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1838		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1839		<< "	bool allOk = true;\n"
1840		<< "\n";
1841
1842	// Verify data
1843
1844	if (m_storage == STORAGE_BUFFER)
1845	{
1846		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1847		{
1848			if (!m_useAtomic)
1849				buf << "	allOk = allOk && (sb_in.values[(groupNdx + "
1850					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
1851					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1852			else
1853				buf << "	allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
1854					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
1855					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1856		}
1857	}
1858	else if (m_storage == STORAGE_IMAGE)
1859	{
1860		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1861		{
1862			if (!m_useAtomic)
1863				buf	<< "	allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1864					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
1865					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1866			else
1867				buf << "	allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
1868					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
1869					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1870		}
1871	}
1872	else
1873		DE_ASSERT(DE_FALSE);
1874
1875	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1876		<< "}\n";
1877
1878	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1879}
1880
1881glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
1882{
1883	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1884	std::ostringstream	buf;
1885
1886	buf << "${GLSL_VERSION_DECL}\n"
1887		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1888		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1889
1890	if (m_storage == STORAGE_BUFFER)
1891		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
1892			<< "{\n"
1893			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1894			<< "} sb_in0;\n"
1895			<< "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
1896			<< "{\n"
1897			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1898			<< "} sb_in1;\n";
1899	else if (m_storage == STORAGE_IMAGE)
1900		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
1901			<< "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
1902	else
1903		DE_ASSERT(DE_FALSE);
1904
1905	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
1906		<< "{\n"
1907		<< "	highp int resultOk[];\n"
1908		<< "} sb_result;\n"
1909		<< "\n"
1910		<< "void main (void)\n"
1911		<< "{\n"
1912		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1913		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1914		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
1915		<< "	bool allOk = true;\n"
1916		<< "\n";
1917
1918	// Verify data
1919
1920	if (m_storage == STORAGE_BUFFER)
1921	{
1922		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1923			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1924				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1925	}
1926	else if (m_storage == STORAGE_IMAGE)
1927	{
1928		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
1929			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
1930				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1931	}
1932	else
1933		DE_ASSERT(DE_FALSE);
1934
1935	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
1936		<< "}\n";
1937
1938	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
1939}
1940
1941glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
1942{
1943	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
1944	std::ostringstream	buf;
1945
1946	buf << "${GLSL_VERSION_DECL}\n"
1947		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
1948		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
1949
1950	if (m_storage == STORAGE_BUFFER)
1951		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
1952			<< "{\n"
1953			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
1954			<< "} sb_out;\n";
1955	else if (m_storage == STORAGE_IMAGE)
1956		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
1957	else
1958		DE_ASSERT(DE_FALSE);
1959
1960	buf << "\n"
1961		<< "void main (void)\n"
1962		<< "{\n"
1963		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
1964		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
1965		<< "\n";
1966
1967	// Write to buffer/image m_perInvocationSize elements
1968	if (m_storage == STORAGE_BUFFER)
1969	{
1970		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1971		{
1972			if (m_useAtomic)
1973				buf << "	atomicExchange(";
1974			else
1975				buf << "	";
1976
1977			buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
1978
1979			if (m_useAtomic)
1980				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1981			else
1982				buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
1983		}
1984	}
1985	else if (m_storage == STORAGE_IMAGE)
1986	{
1987		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
1988		{
1989			if (m_useAtomic)
1990				buf << "	imageAtomicExchange";
1991			else
1992				buf << "	imageStore";
1993
1994			buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
1995
1996			if (m_useAtomic)
1997				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
1998			else
1999				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
2000		}
2001	}
2002	else
2003		DE_ASSERT(DE_FALSE);
2004
2005	buf << "}\n";
2006
2007	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2008}
2009
2010glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
2011{
2012	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2013	std::ostringstream	buf;
2014
2015	buf << "${GLSL_VERSION_DECL}\n"
2016		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2017		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
2018
2019	if (m_storage == STORAGE_BUFFER)
2020		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2021			<< "{\n"
2022			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2023			<< "} sb_in;\n";
2024	else if (m_storage == STORAGE_IMAGE)
2025		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2026	else
2027		DE_ASSERT(DE_FALSE);
2028
2029	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2030		<< "{\n"
2031		<< "	highp int resultOk[];\n"
2032		<< "} sb_result;\n"
2033		<< "\n"
2034		<< "void main (void)\n"
2035		<< "{\n"
2036		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2037		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2038		<< "	int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
2039		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
2040		<< "	bool allOk = true;\n"
2041		<< "\n";
2042
2043	// Verify data
2044
2045	if (m_storage == STORAGE_BUFFER)
2046	{
2047		buf << "	if (groupNdx % 2 == 0)\n"
2048			<< "	{\n";
2049		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2050			buf << "		allOk = allOk && ("
2051				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
2052				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2053		buf << "	}\n"
2054			<< "	else\n"
2055			<< "	{\n";
2056		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2057			buf << "		allOk = allOk && ("
2058				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
2059				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2060		buf << "	}\n";
2061	}
2062	else if (m_storage == STORAGE_IMAGE)
2063	{
2064		buf << "	if (groupNdx % 2 == 0)\n"
2065			<< "	{\n";
2066		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2067			buf << "		allOk = allOk && ("
2068				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2069				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2070				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2071		buf << "	}\n"
2072			<< "	else\n"
2073			<< "	{\n";
2074		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2075			buf << "		allOk = allOk && ("
2076				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
2077				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
2078				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
2079		buf << "	}\n";
2080	}
2081	else
2082		DE_ASSERT(DE_FALSE);
2083
2084	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2085		<< "}\n";
2086
2087	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2088}
2089
2090glu::ShaderProgram*	InterCallTestCase::genReadZeroProgram (void)
2091{
2092	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
2093	std::ostringstream	buf;
2094
2095	buf << "${GLSL_VERSION_DECL}\n"
2096		<< ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
2097		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
2098
2099	if (m_storage == STORAGE_BUFFER)
2100		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
2101			<< "{\n"
2102			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
2103			<< "} sb_in;\n";
2104	else if (m_storage == STORAGE_IMAGE)
2105		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
2106	else
2107		DE_ASSERT(DE_FALSE);
2108
2109	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
2110		<< "{\n"
2111		<< "	highp int resultOk[];\n"
2112		<< "} sb_result;\n"
2113		<< "\n"
2114		<< "void main (void)\n"
2115		<< "{\n"
2116		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
2117		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
2118		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
2119		<< "	bool allOk = true;\n"
2120		<< "\n";
2121
2122	// Verify data
2123
2124	if (m_storage == STORAGE_BUFFER)
2125	{
2126		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2127			buf << "	allOk = allOk && ("
2128				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
2129				<< ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2130	}
2131	else if (m_storage == STORAGE_IMAGE)
2132	{
2133		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
2134			buf << "	allOk = allOk && ("
2135			<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
2136			<< ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
2137	}
2138	else
2139		DE_ASSERT(DE_FALSE);
2140
2141	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
2142		<< "}\n";
2143
2144	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(specializeShader(m_context, buf.str().c_str())));
2145}
2146
2147class SSBOConcurrentAtomicCase : public TestCase
2148{
2149public:
2150
2151							SSBOConcurrentAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2152							~SSBOConcurrentAtomicCase	(void);
2153
2154	void					init						(void);
2155	void					deinit						(void);
2156	IterateResult			iterate						(void);
2157
2158private:
2159	std::string				genComputeSource			(void) const;
2160
2161	const int				m_numCalls;
2162	const int				m_workSize;
2163	glu::ShaderProgram*		m_program;
2164	deUint32				m_bufferID;
2165	std::vector<deUint32>	m_intermediateResultBuffers;
2166};
2167
2168SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2169	: TestCase		(context, name, description)
2170	, m_numCalls	(numCalls)
2171	, m_workSize	(workSize)
2172	, m_program		(DE_NULL)
2173	, m_bufferID	(DE_NULL)
2174{
2175}
2176
2177SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
2178{
2179	deinit();
2180}
2181
2182void SSBOConcurrentAtomicCase::init (void)
2183{
2184	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2185	std::vector<deUint32>	zeroData			(m_workSize, 0);
2186
2187	// gen buffers
2188
2189	gl.genBuffers(1, &m_bufferID);
2190	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2191	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2192
2193	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2194	{
2195		deUint32 buffer = 0;
2196
2197		gl.genBuffers(1, &buffer);
2198		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2199		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2200
2201		m_intermediateResultBuffers.push_back(buffer);
2202		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2203	}
2204
2205	// gen program
2206
2207	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2208	m_testCtx.getLog() << *m_program;
2209	if (!m_program->isOk())
2210		throw tcu::TestError("could not build program");
2211}
2212
2213void SSBOConcurrentAtomicCase::deinit (void)
2214{
2215	if (m_bufferID)
2216	{
2217		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
2218		m_bufferID = 0;
2219	}
2220
2221	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2222		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2223	m_intermediateResultBuffers.clear();
2224
2225	delete m_program;
2226	m_program = DE_NULL;
2227}
2228
2229TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
2230{
2231	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2232	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2233	std::vector<int>		deltas;
2234
2235	// generate unique deltas
2236	generateShuffledRamp(m_numCalls, deltas);
2237
2238	// invoke program N times, each with a different delta
2239	{
2240		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2241
2242		m_testCtx.getLog()
2243			<< tcu::TestLog::Message
2244			<< "Running shader " << m_numCalls << " times.\n"
2245			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2246			<< "Setting u_atomicDelta to a unique value for each call.\n"
2247			<< tcu::TestLog::EndMessage;
2248
2249		if (deltaLocation == -1)
2250			throw tcu::TestError("u_atomicDelta location was -1");
2251
2252		gl.useProgram(m_program->getProgram());
2253		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
2254
2255		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2256		{
2257			m_testCtx.getLog()
2258				<< tcu::TestLog::Message
2259				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2260				<< tcu::TestLog::EndMessage;
2261
2262			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2263			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2264			gl.dispatchCompute(m_workSize, 1, 1);
2265		}
2266
2267		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2268	}
2269
2270	// Verify result
2271	{
2272		std::vector<deUint32> result;
2273
2274		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2275
2276		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
2277		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
2278
2279		for (int ndx = 0; ndx < m_workSize; ++ndx)
2280		{
2281			if (result[ndx] != sumValue)
2282			{
2283				m_testCtx.getLog()
2284					<< tcu::TestLog::Message
2285					<< "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2286					<< "Work buffer contains invalid values."
2287					<< tcu::TestLog::EndMessage;
2288
2289				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2290				return STOP;
2291			}
2292		}
2293
2294		m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
2295	}
2296
2297	// verify steps
2298	{
2299		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2300		std::vector<deUint32>				valueChain			(m_numCalls);
2301
2302		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2303
2304		// collect results
2305
2306		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2307		{
2308			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2309			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
2310		}
2311
2312		// verify values
2313
2314		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2315		{
2316			int			invalidOperationNdx;
2317			deUint32	errorDelta;
2318			deUint32	errorExpected;
2319
2320			// collect result chain for each element
2321			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2322				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2323
2324			// check there exists a path from 0 to sumValue using each addition once
2325			// decompose cumulative results to addition operations (all additions positive => this works)
2326
2327			std::sort(valueChain.begin(), valueChain.end());
2328
2329			// validate chain
2330			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2331			{
2332				m_testCtx.getLog()
2333					<< tcu::TestLog::Message
2334					<< "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2335					<< "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
2336					<< tcu::TestLog::EndMessage;
2337
2338				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2339					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2340				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2341
2342				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2343				return STOP;
2344			}
2345		}
2346
2347		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2348	}
2349
2350	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2351	return STOP;
2352}
2353
2354std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
2355{
2356	std::ostringstream buf;
2357
2358	buf	<< "${GLSL_VERSION_DECL}\n"
2359		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2360		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2361		<< "{\n"
2362		<< "	highp uint values[" << m_workSize << "];\n"
2363		<< "} sb_ires;\n"
2364		<< "\n"
2365		<< "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
2366		<< "{\n"
2367		<< "	highp uint values[" << m_workSize << "];\n"
2368		<< "} sb_work;\n"
2369		<< "uniform highp uint u_atomicDelta;\n"
2370		<< "\n"
2371		<< "void main ()\n"
2372		<< "{\n"
2373		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
2374		<< "	sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
2375		<< "}";
2376
2377	return specializeShader(m_context, buf.str().c_str());
2378}
2379
2380class ConcurrentAtomicCounterCase : public TestCase
2381{
2382public:
2383
2384							ConcurrentAtomicCounterCase		(Context& context, const char* name, const char* description, int numCalls, int workSize);
2385							~ConcurrentAtomicCounterCase	(void);
2386
2387	void					init							(void);
2388	void					deinit							(void);
2389	IterateResult			iterate							(void);
2390
2391private:
2392	std::string				genComputeSource				(bool evenOdd) const;
2393
2394	const int				m_numCalls;
2395	const int				m_workSize;
2396	glu::ShaderProgram*		m_evenProgram;
2397	glu::ShaderProgram*		m_oddProgram;
2398	deUint32				m_counterBuffer;
2399	deUint32				m_intermediateResultBuffer;
2400};
2401
2402ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2403	: TestCase					(context, name, description)
2404	, m_numCalls				(numCalls)
2405	, m_workSize				(workSize)
2406	, m_evenProgram				(DE_NULL)
2407	, m_oddProgram				(DE_NULL)
2408	, m_counterBuffer			(DE_NULL)
2409	, m_intermediateResultBuffer(DE_NULL)
2410{
2411}
2412
2413ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
2414{
2415	deinit();
2416}
2417
2418void ConcurrentAtomicCounterCase::init (void)
2419{
2420	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
2421	const std::vector<deUint32>	zeroData	(m_numCalls * m_workSize, 0);
2422
2423	// gen buffer
2424
2425	gl.genBuffers(1, &m_counterBuffer);
2426	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
2427	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
2428
2429	gl.genBuffers(1, &m_intermediateResultBuffer);
2430	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2431	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2432
2433	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2434
2435	// gen programs
2436
2437	{
2438		const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
2439
2440		m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
2441		m_testCtx.getLog() << *m_evenProgram;
2442		if (!m_evenProgram->isOk())
2443			throw tcu::TestError("could not build program");
2444	}
2445	{
2446		const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
2447
2448		m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
2449		m_testCtx.getLog() << *m_oddProgram;
2450		if (!m_oddProgram->isOk())
2451			throw tcu::TestError("could not build program");
2452	}
2453}
2454
2455void ConcurrentAtomicCounterCase::deinit (void)
2456{
2457	if (m_counterBuffer)
2458	{
2459		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
2460		m_counterBuffer = 0;
2461	}
2462	if (m_intermediateResultBuffer)
2463	{
2464		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
2465		m_intermediateResultBuffer = 0;
2466	}
2467
2468	delete m_evenProgram;
2469	m_evenProgram = DE_NULL;
2470
2471	delete m_oddProgram;
2472	m_oddProgram = DE_NULL;
2473}
2474
2475TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
2476{
2477	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
2478
2479	// invoke program N times, each with a different delta
2480	{
2481		const int evenCallNdxLocation	= gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
2482		const int oddCallNdxLocation	= gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
2483
2484		m_testCtx.getLog()
2485			<< tcu::TestLog::Message
2486			<< "Running shader pair (even & odd) " << m_numCalls << " times.\n"
2487			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
2488			<< tcu::TestLog::EndMessage;
2489
2490		if (evenCallNdxLocation == -1)
2491			throw tcu::TestError("u_callNdx location was -1");
2492		if (oddCallNdxLocation == -1)
2493			throw tcu::TestError("u_callNdx location was -1");
2494
2495		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
2496		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counterBuffer);
2497
2498		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2499		{
2500			gl.useProgram(m_evenProgram->getProgram());
2501			gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
2502			gl.dispatchCompute(m_workSize, 1, 1);
2503
2504			gl.useProgram(m_oddProgram->getProgram());
2505			gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
2506			gl.dispatchCompute(m_workSize, 1, 1);
2507		}
2508
2509		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2510	}
2511
2512	// Verify result
2513	{
2514		deUint32 result;
2515
2516		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
2517
2518		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
2519		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
2520
2521		if ((int)result != m_numCalls*m_workSize)
2522		{
2523			m_testCtx.getLog()
2524				<< tcu::TestLog::Message
2525				<< "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
2526				<< tcu::TestLog::EndMessage;
2527
2528			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2529			return STOP;
2530		}
2531
2532		m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
2533	}
2534
2535	// verify steps
2536	{
2537		std::vector<deUint32> intermediateResults;
2538
2539		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2540
2541		// collect results
2542
2543		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
2544		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
2545
2546		// verify values
2547
2548		std::sort(intermediateResults.begin(), intermediateResults.end());
2549
2550		for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
2551		{
2552			if ((int)intermediateResults[valueNdx] != valueNdx)
2553			{
2554				m_testCtx.getLog()
2555					<< tcu::TestLog::Message
2556					<< "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
2557					<< "Intermediate buffer contains invalid values. Intermediate results:\n"
2558					<< tcu::TestLog::EndMessage;
2559
2560				for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
2561					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
2562
2563				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2564				return STOP;
2565			}
2566		}
2567
2568		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2569	}
2570
2571	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2572	return STOP;
2573}
2574
2575std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
2576{
2577	std::ostringstream buf;
2578
2579	buf	<< "${GLSL_VERSION_DECL}\n"
2580		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2581		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2582		<< "{\n"
2583		<< "	highp uint values[" << m_workSize * m_numCalls << "];\n"
2584		<< "} sb_ires;\n"
2585		<< "\n"
2586		<< "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
2587		<< "uniform highp uint u_callNdx;\n"
2588		<< "\n"
2589		<< "void main ()\n"
2590		<< "{\n"
2591		<< "	highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
2592		<< "	if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
2593		<< "		sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
2594		<< "}";
2595
2596	return specializeShader(m_context, buf.str().c_str());
2597}
2598
2599class ConcurrentImageAtomicCase : public TestCase
2600{
2601public:
2602
2603							ConcurrentImageAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2604							~ConcurrentImageAtomicCase	(void);
2605
2606	void					init						(void);
2607	void					deinit						(void);
2608	IterateResult			iterate						(void);
2609
2610private:
2611	void					readWorkImage				(std::vector<deUint32>& result);
2612
2613	std::string				genComputeSource			(void) const;
2614	std::string				genImageReadSource			(void) const;
2615	std::string				genImageClearSource			(void) const;
2616
2617	const int				m_numCalls;
2618	const int				m_workSize;
2619	glu::ShaderProgram*		m_program;
2620	glu::ShaderProgram*		m_imageReadProgram;
2621	glu::ShaderProgram*		m_imageClearProgram;
2622	deUint32				m_imageID;
2623	std::vector<deUint32>	m_intermediateResultBuffers;
2624};
2625
2626ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2627	: TestCase				(context, name, description)
2628	, m_numCalls			(numCalls)
2629	, m_workSize			(workSize)
2630	, m_program				(DE_NULL)
2631	, m_imageReadProgram	(DE_NULL)
2632	, m_imageClearProgram	(DE_NULL)
2633	, m_imageID				(DE_NULL)
2634{
2635}
2636
2637ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
2638{
2639	deinit();
2640}
2641
2642void ConcurrentImageAtomicCase::init (void)
2643{
2644	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
2645	std::vector<deUint32>	zeroData			(m_workSize * m_workSize, 0);
2646
2647	if (!checkSupport(m_context))
2648		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
2649
2650	// gen image
2651
2652	gl.genTextures(1, &m_imageID);
2653	gl.bindTexture(GL_TEXTURE_2D, m_imageID);
2654	gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
2655	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
2656	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
2657	GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
2658
2659	// gen buffers
2660
2661	for (int ndx = 0; ndx < m_numCalls; ++ndx)
2662	{
2663		deUint32 buffer = 0;
2664
2665		gl.genBuffers(1, &buffer);
2666		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2667		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
2668
2669		m_intermediateResultBuffers.push_back(buffer);
2670		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
2671	}
2672
2673	// gen programs
2674
2675	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
2676	m_testCtx.getLog() << *m_program;
2677	if (!m_program->isOk())
2678		throw tcu::TestError("could not build program");
2679
2680	m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
2681	if (!m_imageReadProgram->isOk())
2682	{
2683		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
2684
2685		m_testCtx.getLog() << *m_imageReadProgram;
2686		throw tcu::TestError("could not build program");
2687	}
2688
2689	m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
2690	if (!m_imageClearProgram->isOk())
2691	{
2692		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
2693
2694		m_testCtx.getLog() << *m_imageClearProgram;
2695		throw tcu::TestError("could not build program");
2696	}
2697}
2698
2699void ConcurrentImageAtomicCase::deinit (void)
2700{
2701	if (m_imageID)
2702	{
2703		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
2704		m_imageID = 0;
2705	}
2706
2707	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
2708		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
2709	m_intermediateResultBuffers.clear();
2710
2711	delete m_program;
2712	m_program = DE_NULL;
2713
2714	delete m_imageReadProgram;
2715	m_imageReadProgram = DE_NULL;
2716
2717	delete m_imageClearProgram;
2718	m_imageClearProgram = DE_NULL;
2719}
2720
2721TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
2722{
2723	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2724	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
2725	std::vector<int>		deltas;
2726
2727	// generate unique deltas
2728	generateShuffledRamp(m_numCalls, deltas);
2729
2730	// clear image
2731	{
2732		m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
2733
2734		gl.useProgram(m_imageClearProgram->getProgram());
2735		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
2736		gl.dispatchCompute(m_workSize, m_workSize, 1);
2737		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2738
2739		GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
2740	}
2741
2742	// invoke program N times, each with a different delta
2743	{
2744		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
2745
2746		m_testCtx.getLog()
2747			<< tcu::TestLog::Message
2748			<< "Running shader " << m_numCalls << " times.\n"
2749			<< "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
2750			<< "Setting u_atomicDelta to a unique value for each call.\n"
2751			<< tcu::TestLog::EndMessage;
2752
2753		if (deltaLocation == -1)
2754			throw tcu::TestError("u_atomicDelta location was -1");
2755
2756		gl.useProgram(m_program->getProgram());
2757		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
2758
2759		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2760		{
2761			m_testCtx.getLog()
2762				<< tcu::TestLog::Message
2763				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
2764				<< tcu::TestLog::EndMessage;
2765
2766			gl.uniform1ui(deltaLocation, deltas[callNdx]);
2767			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
2768			gl.dispatchCompute(m_workSize, m_workSize, 1);
2769		}
2770
2771		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
2772	}
2773
2774	// Verify result
2775	{
2776		std::vector<deUint32> result;
2777
2778		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
2779
2780		readWorkImage(result);
2781
2782		for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
2783		{
2784			if (result[ndx] != sumValue)
2785			{
2786				m_testCtx.getLog()
2787					<< tcu::TestLog::Message
2788					<< "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
2789					<< "Work image contains invalid values."
2790					<< tcu::TestLog::EndMessage;
2791
2792				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
2793				return STOP;
2794			}
2795		}
2796
2797		m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
2798	}
2799
2800	// verify steps
2801	{
2802		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
2803		std::vector<deUint32>				valueChain			(m_numCalls);
2804		std::vector<deUint32>				chainDelta			(m_numCalls);
2805
2806		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
2807
2808		// collect results
2809
2810		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2811		{
2812			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
2813			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
2814		}
2815
2816		// verify values
2817
2818		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
2819		{
2820			int			invalidOperationNdx;
2821			deUint32	errorDelta;
2822			deUint32	errorExpected;
2823
2824			// collect result chain for each element
2825			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2826				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
2827
2828			// check there exists a path from 0 to sumValue using each addition once
2829			// decompose cumulative results to addition operations (all additions positive => this works)
2830
2831			std::sort(valueChain.begin(), valueChain.end());
2832
2833			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
2834				chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
2835
2836			// chainDelta contains now the actual additions applied to the value
2837			std::sort(chainDelta.begin(), chainDelta.end());
2838
2839			// validate chain
2840			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
2841			{
2842				m_testCtx.getLog()
2843					<< tcu::TestLog::Message
2844					<< "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
2845					<< invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
2846					<< "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
2847					<< tcu::TestLog::EndMessage;
2848
2849				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
2850					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
2851				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
2852
2853				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
2854				return STOP;
2855			}
2856		}
2857
2858		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
2859	}
2860
2861	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
2862	return STOP;
2863}
2864
2865void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
2866{
2867	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
2868	glu::Buffer				resultBuffer	(m_context.getRenderContext());
2869
2870	// Read image to an ssbo
2871
2872	{
2873		const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
2874
2875		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
2876		gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
2877
2878		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
2879		gl.useProgram(m_imageReadProgram->getProgram());
2880
2881		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
2882		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
2883		gl.dispatchCompute(m_workSize, m_workSize, 1);
2884
2885		GLU_EXPECT_NO_ERROR(gl.getError(), "read");
2886	}
2887
2888	// Read ssbo
2889	{
2890		const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
2891		GLU_EXPECT_NO_ERROR(gl.getError(), "map");
2892
2893		if (!ptr)
2894			throw tcu::TestError("mapBufferRange returned NULL");
2895
2896		result.resize(m_workSize * m_workSize);
2897		memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
2898
2899		if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
2900			throw tcu::TestError("unmapBuffer returned false");
2901	}
2902}
2903
2904std::string ConcurrentImageAtomicCase::genComputeSource (void) const
2905{
2906	std::ostringstream buf;
2907
2908	buf	<< "${GLSL_VERSION_DECL}\n"
2909		<< "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
2910		<< "\n"
2911		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2912		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
2913		<< "{\n"
2914		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2915		<< "} sb_ires;\n"
2916		<< "\n"
2917		<< "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
2918		<< "uniform highp uint u_atomicDelta;\n"
2919		<< "\n"
2920		<< "void main ()\n"
2921		<< "{\n"
2922		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2923		<< "	sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
2924		<< "}";
2925
2926	return specializeShader(m_context, buf.str().c_str());
2927}
2928
2929std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
2930{
2931	std::ostringstream buf;
2932
2933	buf	<< "${GLSL_VERSION_DECL}\n"
2934		<< "\n"
2935		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2936		<< "layout (binding = 1, std430) writeonly buffer ImageValues\n"
2937		<< "{\n"
2938		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
2939		<< "} sb_res;\n"
2940		<< "\n"
2941		<< "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
2942		<< "\n"
2943		<< "void main ()\n"
2944		<< "{\n"
2945		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
2946		<< "	sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
2947		<< "}";
2948
2949	return specializeShader(m_context, buf.str().c_str());
2950}
2951
2952std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
2953{
2954	std::ostringstream buf;
2955
2956	buf	<< "${GLSL_VERSION_DECL}\n"
2957		<< "\n"
2958		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
2959		<< "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
2960		<< "\n"
2961		<< "void main ()\n"
2962		<< "{\n"
2963		<< "	imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
2964		<< "}";
2965
2966	return specializeShader(m_context, buf.str().c_str());
2967}
2968
2969class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
2970{
2971public:
2972							ConcurrentSSBOAtomicCounterMixedCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
2973							~ConcurrentSSBOAtomicCounterMixedCase	(void);
2974
2975	void					init									(void);
2976	void					deinit									(void);
2977	IterateResult			iterate									(void);
2978
2979private:
2980	std::string				genSSBOComputeSource					(void) const;
2981	std::string				genAtomicCounterComputeSource			(void) const;
2982
2983	const int				m_numCalls;
2984	const int				m_workSize;
2985	deUint32				m_bufferID;
2986	glu::ShaderProgram*		m_ssboAtomicProgram;
2987	glu::ShaderProgram*		m_atomicCounterProgram;
2988};
2989
2990ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
2991	: TestCase					(context, name, description)
2992	, m_numCalls				(numCalls)
2993	, m_workSize				(workSize)
2994	, m_bufferID				(DE_NULL)
2995	, m_ssboAtomicProgram		(DE_NULL)
2996	, m_atomicCounterProgram	(DE_NULL)
2997{
2998	// SSBO atomic XORs cancel out
2999	DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
3000}
3001
3002ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
3003{
3004	deinit();
3005}
3006
3007void ConcurrentSSBOAtomicCounterMixedCase::init (void)
3008{
3009	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
3010	const deUint32				zeroBuf[2]	= { 0, 0 };
3011
3012	// gen buffer
3013
3014	gl.genBuffers(1, &m_bufferID);
3015	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
3016	gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
3017
3018	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
3019
3020	// gen programs
3021
3022	{
3023		const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
3024
3025		m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
3026		m_testCtx.getLog() << *m_ssboAtomicProgram;
3027		if (!m_ssboAtomicProgram->isOk())
3028			throw tcu::TestError("could not build program");
3029	}
3030	{
3031		const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
3032
3033		m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
3034		m_testCtx.getLog() << *m_atomicCounterProgram;
3035		if (!m_atomicCounterProgram->isOk())
3036			throw tcu::TestError("could not build program");
3037	}
3038}
3039
3040void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
3041{
3042	if (m_bufferID)
3043	{
3044		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
3045		m_bufferID = 0;
3046	}
3047
3048	delete m_ssboAtomicProgram;
3049	m_ssboAtomicProgram = DE_NULL;
3050
3051	delete m_atomicCounterProgram;
3052	m_atomicCounterProgram = DE_NULL;
3053}
3054
3055TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
3056{
3057	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
3058
3059	m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
3060
3061	// invoke programs N times
3062	{
3063		m_testCtx.getLog()
3064			<< tcu::TestLog::Message
3065			<< "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
3066			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
3067			<< tcu::TestLog::EndMessage;
3068
3069		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
3070		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_bufferID);
3071
3072		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
3073		{
3074			gl.useProgram(m_atomicCounterProgram->getProgram());
3075			gl.dispatchCompute(m_workSize, 1, 1);
3076
3077			gl.useProgram(m_ssboAtomicProgram->getProgram());
3078			gl.dispatchCompute(m_workSize, 1, 1);
3079		}
3080
3081		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
3082	}
3083
3084	// Verify result
3085	{
3086		deUint32 result;
3087
3088		// XORs cancel out, only addition is left
3089		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
3090
3091		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
3092		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
3093
3094		if ((int)result != m_numCalls*m_workSize)
3095		{
3096			m_testCtx.getLog()
3097				<< tcu::TestLog::Message
3098				<< "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
3099				<< tcu::TestLog::EndMessage;
3100
3101			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
3102			return STOP;
3103		}
3104
3105		m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
3106	}
3107
3108	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
3109	return STOP;
3110}
3111
3112std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
3113{
3114	std::ostringstream buf;
3115
3116	buf	<< "${GLSL_VERSION_DECL}\n"
3117		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3118		<< "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
3119		<< "{\n"
3120		<< "	highp uint targetValue;\n"
3121		<< "	highp uint unused;\n"
3122		<< "} sb_work;\n"
3123		<< "\n"
3124		<< "void main ()\n"
3125		<< "{\n"
3126		<< "	// flip high bits\n"
3127		<< "	highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
3128		<< "	sb_work.unused = atomicXor(sb_work.targetValue, mask);\n"
3129		<< "}";
3130
3131	return specializeShader(m_context, buf.str().c_str());
3132}
3133
3134std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
3135{
3136	std::ostringstream buf;
3137
3138	buf	<< "${GLSL_VERSION_DECL}\n"
3139		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
3140		<< "\n"
3141		<< "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
3142		<< "\n"
3143		<< "void main ()\n"
3144		<< "{\n"
3145		<< "	atomicCounterIncrement(u_counter);\n"
3146		<< "}";
3147
3148	return specializeShader(m_context, buf.str().c_str());
3149}
3150
3151} // anonymous
3152
3153SynchronizationTests::SynchronizationTests (Context& context)
3154	: TestCaseGroup(context, "synchronization", "Synchronization tests")
3155{
3156}
3157
3158SynchronizationTests::~SynchronizationTests (void)
3159{
3160}
3161
3162void SynchronizationTests::init (void)
3163{
3164	tcu::TestCaseGroup* const inInvocationGroup		= new tcu::TestCaseGroup(m_testCtx, "in_invocation",	"Test intra-invocation synchronization");
3165	tcu::TestCaseGroup* const interInvocationGroup	= new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
3166	tcu::TestCaseGroup* const interCallGroup		= new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
3167
3168	addChild(inInvocationGroup);
3169	addChild(interInvocationGroup);
3170	addChild(interCallGroup);
3171
3172	// .in_invocation & .inter_invocation
3173	{
3174		static const struct CaseConfig
3175		{
3176			const char*									namePrefix;
3177			const InterInvocationTestCase::StorageType	storage;
3178			const int									flags;
3179		} configs[] =
3180		{
3181			{ "image",			InterInvocationTestCase::STORAGE_IMAGE,		0										},
3182			{ "image_atomic",	InterInvocationTestCase::STORAGE_IMAGE,		InterInvocationTestCase::FLAG_ATOMIC	},
3183			{ "ssbo",			InterInvocationTestCase::STORAGE_BUFFER,	0										},
3184			{ "ssbo_atomic",	InterInvocationTestCase::STORAGE_BUFFER,	InterInvocationTestCase::FLAG_ATOMIC	},
3185		};
3186
3187		for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
3188		{
3189			tcu::TestCaseGroup* const	targetGroup	= (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
3190			const int					extraFlags	= (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
3191
3192			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3193			{
3194				const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3195
3196				targetGroup->addChild(new InvocationWriteReadCase(m_context,
3197																  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3198																  (std::string("Write to ") + target + " and read it").c_str(),
3199																  configs[configNdx].storage,
3200																  configs[configNdx].flags | extraFlags));
3201
3202				targetGroup->addChild(new InvocationReadWriteCase(m_context,
3203																  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3204																  (std::string("Read form ") + target + " and then write to it").c_str(),
3205																  configs[configNdx].storage,
3206																  configs[configNdx].flags | extraFlags));
3207
3208				targetGroup->addChild(new InvocationOverWriteCase(m_context,
3209																  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3210																  (std::string("Write to ") + target + " twice and read it").c_str(),
3211																  configs[configNdx].storage,
3212																  configs[configNdx].flags | extraFlags));
3213
3214				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3215																   (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
3216																   (std::string("Write to aliasing ") + target + " and read it").c_str(),
3217																   InvocationAliasWriteCase::TYPE_WRITE,
3218																   configs[configNdx].storage,
3219																   configs[configNdx].flags | extraFlags));
3220
3221				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
3222																   (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
3223																   (std::string("Write to aliasing ") + target + "s and read it").c_str(),
3224																   InvocationAliasWriteCase::TYPE_OVERWRITE,
3225																   configs[configNdx].storage,
3226																   configs[configNdx].flags | extraFlags));
3227			}
3228		}
3229	}
3230
3231	// .inter_call
3232	{
3233		tcu::TestCaseGroup* const withBarrierGroup		= new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
3234		tcu::TestCaseGroup* const withoutBarrierGroup	= new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
3235
3236		interCallGroup->addChild(withBarrierGroup);
3237		interCallGroup->addChild(withoutBarrierGroup);
3238
3239		// .with_memory_barrier
3240		{
3241			static const struct CaseConfig
3242			{
3243				const char*								namePrefix;
3244				const InterCallTestCase::StorageType	storage;
3245				const int								flags;
3246			} configs[] =
3247			{
3248				{ "image",			InterCallTestCase::STORAGE_IMAGE,	0																		},
3249				{ "image_atomic",	InterCallTestCase::STORAGE_IMAGE,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3250				{ "ssbo",			InterCallTestCase::STORAGE_BUFFER,	0																		},
3251				{ "ssbo_atomic",	InterCallTestCase::STORAGE_BUFFER,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
3252			};
3253
3254			const int seed0 = 123;
3255			const int seed1 = 457;
3256
3257			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
3258			{
3259				const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
3260
3261				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3262																 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
3263																 (std::string("Write to ") + target + " and read it").c_str(),
3264																 configs[configNdx].storage,
3265																 configs[configNdx].flags,
3266																 InterCallOperations()
3267																	<< op::WriteData::Generate(1, seed0)
3268																	<< op::Barrier()
3269																	<< op::ReadData::Generate(1, seed0)));
3270
3271				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3272																 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
3273																 (std::string("Read from ") + target + " and then write to it").c_str(),
3274																 configs[configNdx].storage,
3275																 configs[configNdx].flags,
3276																 InterCallOperations()
3277																	<< op::ReadZeroData::Generate(1)
3278																	<< op::Barrier()
3279																	<< op::WriteData::Generate(1, seed0)));
3280
3281				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3282																 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
3283																 (std::string("Write to ") + target + " twice and read it").c_str(),
3284																 configs[configNdx].storage,
3285																 configs[configNdx].flags,
3286																 InterCallOperations()
3287																	<< op::WriteData::Generate(1, seed0)
3288																	<< op::Barrier()
3289																	<< op::WriteData::Generate(1, seed1)
3290																	<< op::Barrier()
3291																	<< op::ReadData::Generate(1, seed1)));
3292
3293				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3294																 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
3295																 (std::string("Write to multiple ") + target + "s and read them").c_str(),
3296																 configs[configNdx].storage,
3297																 configs[configNdx].flags,
3298																 InterCallOperations()
3299																	<< op::WriteData::Generate(1, seed0)
3300																	<< op::WriteData::Generate(2, seed1)
3301																	<< op::Barrier()
3302																	<< op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
3303
3304				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3305																 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
3306																 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
3307																 configs[configNdx].storage,
3308																 configs[configNdx].flags,
3309																 InterCallOperations()
3310																	<< op::WriteDataInterleaved::Generate(1, seed0, true)
3311																	<< op::WriteDataInterleaved::Generate(1, seed1, false)
3312																	<< op::Barrier()
3313																	<< op::ReadDataInterleaved::Generate(1, seed0, seed1)));
3314
3315				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3316																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
3317																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3318																 configs[configNdx].storage,
3319																 configs[configNdx].flags,
3320																 InterCallOperations()
3321																	<< op::WriteData::Generate(1, seed0)
3322																	<< op::WriteData::Generate(2, seed1)
3323																	<< op::Barrier()
3324																	<< op::ReadData::Generate(1, seed0)
3325																	<< op::ReadData::Generate(2, seed1)));
3326
3327				withBarrierGroup->addChild(new InterCallTestCase(m_context,
3328																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
3329																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
3330																 configs[configNdx].storage,
3331																 configs[configNdx].flags,
3332																 InterCallOperations()
3333																	<< op::WriteData::Generate(1, seed0)
3334																	<< op::WriteData::Generate(2, seed1)
3335																	<< op::Barrier()
3336																	<< op::ReadData::Generate(2, seed1)
3337																	<< op::ReadData::Generate(1, seed0)));
3338			}
3339
3340			// .without_memory_barrier
3341			{
3342				struct InvocationConfig
3343				{
3344					const char*	name;
3345					int			count;
3346				};
3347
3348				static const InvocationConfig ssboInvocations[] =
3349				{
3350					{ "1k",		1024	},
3351					{ "4k",		4096	},
3352					{ "32k",	32768	},
3353				};
3354				static const InvocationConfig imageInvocations[] =
3355				{
3356					{ "8x8",		8	},
3357					{ "32x32",		32	},
3358					{ "128x128",	128	},
3359				};
3360				static const InvocationConfig counterInvocations[] =
3361				{
3362					{ "32",		32		},
3363					{ "128",	128		},
3364					{ "1k",		1024	},
3365				};
3366				static const int callCounts[] = { 2, 5, 100 };
3367
3368				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
3369					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3370						withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
3371
3372				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
3373					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3374						withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
3375
3376				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3377					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3378						withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3379
3380				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
3381					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
3382						withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
3383			}
3384		}
3385	}
3386}
3387
3388} // Functional
3389} // gles31
3390} // deqp
3391