1/*-------------------------------------------------------------------------
2 * OpenGL Conformance Test Suite
3 * -----------------------------
4 *
5 * Copyright (c) 2014-2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 */ /*!
20 * \file
21 * \brief
22 */ /*-------------------------------------------------------------------*/
23
24#include "es31cComputeShaderTests.hpp"
25#include "gluContextInfo.hpp"
26#include "gluPlatform.hpp"
27#include "glwEnums.hpp"
28#include "glwFunctions.hpp"
29#include "tcuCommandLine.hpp"
30#include "tcuMatrix.hpp"
31#include "tcuMatrixUtil.hpp"
32#include "tcuPlatform.hpp"
33#include "tcuRenderTarget.hpp"
34#include <cstdarg>
35#include <sstream>
36
37namespace glcts
38{
39
40using namespace glw;
41using tcu::Vec2;
42using tcu::Vec3;
43using tcu::Vec4;
44using tcu::UVec4;
45using tcu::UVec3;
46using tcu::Mat4;
47
48namespace
49{
50
51typedef Vec3  vec2;
52typedef Vec3  vec3;
53typedef Vec4  vec4;
54typedef UVec3 uvec3;
55typedef UVec4 uvec4;
56typedef Mat4  mat4;
57
58const char* const kGLSLVer = "#version 310 es\n";
59
60class ComputeShaderBase : public glcts::SubcaseBase
61{
62
63public:
64	virtual ~ComputeShaderBase()
65	{
66	}
67
68	ComputeShaderBase()
69		: renderTarget(m_context.getRenderContext().getRenderTarget()), pixelFormat(renderTarget.getPixelFormat())
70	{
71		g_color_eps = vec4(1.f / (1 << 13));
72		if (pixelFormat.redBits != 0)
73		{
74			g_color_eps.x() += 1.f / (static_cast<float>(1 << pixelFormat.redBits) - 1.0f);
75		}
76		if (pixelFormat.greenBits != 0)
77		{
78			g_color_eps.y() += 1.f / (static_cast<float>(1 << pixelFormat.greenBits) - 1.0f);
79		}
80		if (pixelFormat.blueBits != 0)
81		{
82			g_color_eps.z() += 1.f / (static_cast<float>(1 << pixelFormat.blueBits) - 1.0f);
83		}
84		if (pixelFormat.alphaBits != 0)
85		{
86			g_color_eps.w() += 1.f / (static_cast<float>(1 << pixelFormat.alphaBits) - 1.0f);
87		}
88	}
89
90	const tcu::RenderTarget& renderTarget;
91	const tcu::PixelFormat&  pixelFormat;
92	vec4					 g_color_eps;
93
94	uvec3 IndexTo3DCoord(GLuint idx, GLuint max_x, GLuint max_y)
95	{
96		const GLuint x = idx % max_x;
97		idx /= max_x;
98		const GLuint y = idx % max_y;
99		idx /= max_y;
100		const GLuint z = idx;
101		return uvec3(x, y, z);
102	}
103
104	bool CheckProgram(GLuint program, bool* compile_error = NULL)
105	{
106		GLint compile_status = GL_TRUE;
107		GLint status;
108		glGetProgramiv(program, GL_LINK_STATUS, &status);
109
110		if (status == GL_FALSE)
111		{
112			GLint attached_shaders = 0;
113			glGetProgramiv(program, GL_ATTACHED_SHADERS, &attached_shaders);
114
115			if (attached_shaders > 0)
116			{
117				std::vector<GLuint> shaders(attached_shaders);
118				glGetAttachedShaders(program, attached_shaders, NULL, &shaders[0]);
119
120				for (GLint i = 0; i < attached_shaders; ++i)
121				{
122					GLenum type;
123					glGetShaderiv(shaders[i], GL_SHADER_TYPE, reinterpret_cast<GLint*>(&type));
124					switch (type)
125					{
126					case GL_VERTEX_SHADER:
127						m_context.getTestContext().getLog()
128							<< tcu::TestLog::Message << "*** Vertex Shader ***" << tcu::TestLog::EndMessage;
129						break;
130					case GL_FRAGMENT_SHADER:
131						m_context.getTestContext().getLog()
132							<< tcu::TestLog::Message << "*** Fragment Shader ***" << tcu::TestLog::EndMessage;
133						break;
134					case GL_COMPUTE_SHADER:
135						m_context.getTestContext().getLog()
136							<< tcu::TestLog::Message << "*** Compute Shader ***" << tcu::TestLog::EndMessage;
137						break;
138					default:
139						m_context.getTestContext().getLog()
140							<< tcu::TestLog::Message << "*** Unknown Shader ***" << tcu::TestLog::EndMessage;
141						break;
142					}
143
144					GLint res;
145					glGetShaderiv(shaders[i], GL_COMPILE_STATUS, &res);
146					if (res != GL_TRUE)
147						compile_status = res;
148
149					GLint length = 0;
150					glGetShaderiv(shaders[i], GL_SHADER_SOURCE_LENGTH, &length);
151					if (length > 0)
152					{
153						std::vector<GLchar> source(length);
154						glGetShaderSource(shaders[i], length, NULL, &source[0]);
155						m_context.getTestContext().getLog()
156							<< tcu::TestLog::Message << &source[0] << tcu::TestLog::EndMessage;
157					}
158
159					glGetShaderiv(shaders[i], GL_INFO_LOG_LENGTH, &length);
160					if (length > 0)
161					{
162						std::vector<GLchar> log(length);
163						glGetShaderInfoLog(shaders[i], length, NULL, &log[0]);
164						m_context.getTestContext().getLog()
165							<< tcu::TestLog::Message << &log[0] << tcu::TestLog::EndMessage;
166					}
167				}
168			}
169
170			GLint length;
171			glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length);
172			if (length > 0)
173			{
174				std::vector<GLchar> log(length);
175				glGetProgramInfoLog(program, length, NULL, &log[0]);
176				m_context.getTestContext().getLog() << tcu::TestLog::Message << &log[0] << tcu::TestLog::EndMessage;
177			}
178		}
179
180		if (compile_error)
181			*compile_error = (compile_status == GL_TRUE ? false : true);
182		if (compile_status != GL_TRUE)
183			return false;
184		return status == GL_TRUE ? true : false;
185	}
186
187	GLuint CreateComputeProgram(const std::string& cs)
188	{
189		const GLuint p = glCreateProgram();
190
191		if (!cs.empty())
192		{
193			const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
194			glAttachShader(p, sh);
195			glDeleteShader(sh);
196			const char* const src[2] = { kGLSLVer, cs.c_str() };
197			glShaderSource(sh, 2, src, NULL);
198			glCompileShader(sh);
199		}
200
201		return p;
202	}
203
204	GLuint CreateProgram(const std::string& vs, const std::string& fs)
205	{
206		const GLuint p = glCreateProgram();
207
208		if (!vs.empty())
209		{
210			const GLuint sh = glCreateShader(GL_VERTEX_SHADER);
211			glAttachShader(p, sh);
212			glDeleteShader(sh);
213			const char* const src[2] = { kGLSLVer, vs.c_str() };
214			glShaderSource(sh, 2, src, NULL);
215			glCompileShader(sh);
216		}
217		if (!fs.empty())
218		{
219			const GLuint sh = glCreateShader(GL_FRAGMENT_SHADER);
220			glAttachShader(p, sh);
221			glDeleteShader(sh);
222			const char* const src[2] = { kGLSLVer, fs.c_str() };
223			glShaderSource(sh, 2, src, NULL);
224			glCompileShader(sh);
225		}
226
227		return p;
228	}
229
230	GLuint BuildShaderProgram(GLenum type, const std::string& source)
231	{
232		const char* const src[2] = { kGLSLVer, source.c_str() };
233		return glCreateShaderProgramv(type, 2, src);
234	}
235
236	GLfloat distance(GLfloat p0, GLfloat p1)
237	{
238		return de::abs(p0 - p1);
239	}
240
241	inline bool ColorEqual(const vec4& c0, const vec4& c1, const vec4& epsilon)
242	{
243		if (distance(c0.x(), c1.x()) > epsilon.x())
244			return false;
245		if (distance(c0.y(), c1.y()) > epsilon.y())
246			return false;
247		if (distance(c0.z(), c1.z()) > epsilon.z())
248			return false;
249		if (distance(c0.w(), c1.w()) > epsilon.w())
250			return false;
251		return true;
252	}
253
254	inline bool ColorEqual(const vec3& c0, const vec3& c1, const vec4& epsilon)
255	{
256		if (distance(c0.x(), c1.x()) > epsilon.x())
257			return false;
258		if (distance(c0.y(), c1.y()) > epsilon.y())
259			return false;
260		if (distance(c0.z(), c1.z()) > epsilon.z())
261			return false;
262		return true;
263	}
264
265	bool ValidateReadBuffer(int x, int y, int w, int h, const vec4& expected)
266	{
267		std::vector<vec4>	display(w * h);
268		std::vector<GLubyte> data(w * h * 4);
269		glReadPixels(x, y, w, h, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
270
271		for (int i = 0; i < w * h * 4; i += 4)
272		{
273			display[i / 4] = vec4(static_cast<GLfloat>(data[i] / 255.), static_cast<GLfloat>(data[i + 1] / 255.),
274								  static_cast<GLfloat>(data[i + 2] / 255.), static_cast<GLfloat>(data[i + 3] / 255.));
275		}
276
277		for (int j = 0; j < h; ++j)
278		{
279			for (int i = 0; i < w; ++i)
280			{
281				if (!ColorEqual(display[j * w + i], expected, g_color_eps))
282				{
283					m_context.getTestContext().getLog()
284						<< tcu::TestLog::Message << "Color at (" << x + i << ", " << y + j << ") is ["
285						<< display[j * w + i].x() << ", " << display[j * w + i].y() << ", " << display[j * w + i].z()
286						<< ", " << display[j * w + i].w() << "] should be [" << expected.x() << ", " << expected.y()
287						<< ", " << expected.z() << ", " << expected.w() << "]." << tcu::TestLog::EndMessage;
288					return false;
289				}
290			}
291		}
292
293		return true;
294	}
295
296	bool ValidateReadBufferCenteredQuad(int width, int height, const vec3& expected)
297	{
298		bool				 result = true;
299		std::vector<vec3>	fb(width * height);
300		std::vector<GLubyte> data(width * height * 4);
301		glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
302
303		for (int i = 0; i < width * height * 4; i += 4)
304		{
305			fb[i / 4] = vec3(static_cast<GLfloat>(data[i] / 255.), static_cast<GLfloat>(data[i + 1] / 255.),
306							 static_cast<GLfloat>(data[i + 2] / 255.));
307		}
308
309		int startx = int((static_cast<float>(width) * 0.1f) + 1);
310		int starty = int((static_cast<float>(height) * 0.1f) + 1);
311		int endx   = int(static_cast<float>(width) - 2 * ((static_cast<float>(width) * 0.1f) + 1) - 1);
312		int endy   = int(static_cast<float>(height) - 2 * ((static_cast<float>(height) * 0.1f) + 1) - 1);
313
314		for (int y = starty; y < endy; ++y)
315		{
316			for (int x = startx; x < endx; ++x)
317			{
318				const int idx = y * width + x;
319				if (!ColorEqual(fb[idx], expected, g_color_eps))
320				{
321					return false;
322				}
323			}
324		}
325
326		if (!ColorEqual(fb[2 * width + 2], vec3(0), g_color_eps))
327		{
328			result = false;
329		}
330		if (!ColorEqual(fb[2 * width + (width - 3)], vec3(0), g_color_eps))
331		{
332			result = false;
333		}
334		if (!ColorEqual(fb[(height - 3) * width + (width - 3)], vec3(0), g_color_eps))
335		{
336			result = false;
337		}
338		if (!ColorEqual(fb[(height - 3) * width + 2], vec3(0), g_color_eps))
339		{
340			result = false;
341		}
342
343		return result;
344	}
345
346	int getWindowWidth()
347	{
348		return renderTarget.getWidth();
349	}
350
351	int getWindowHeight()
352	{
353		return renderTarget.getHeight();
354	}
355
356	bool ValidateWindow4Quads(const vec3& lb, const vec3& rb, const vec3& rt, const vec3& lt)
357	{
358		int					 width  = 100;
359		int					 height = 100;
360		std::vector<vec3>	fb(width * height);
361		std::vector<GLubyte> data(width * height * 4);
362		glReadPixels(0, 0, width, height, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
363
364		for (int i = 0; i < width * height * 4; i += 4)
365		{
366			fb[i / 4] = vec3(static_cast<GLfloat>(data[i] / 255.), static_cast<GLfloat>(data[i + 1] / 255.),
367							 static_cast<GLfloat>(data[i + 2] / 255.));
368		}
369
370		bool status = true;
371
372		// left-bottom quad
373		for (int y = 10; y < height / 2 - 10; ++y)
374		{
375			for (int x = 10; x < width / 2 - 10; ++x)
376			{
377				const int idx = y * width + x;
378				if (!ColorEqual(fb[idx], lb, g_color_eps))
379				{
380					m_context.getTestContext().getLog()
381						<< tcu::TestLog::Message << "First bad color (" << x << ", " << y << "): " << fb[idx].x() << " "
382						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
383					status = false;
384				}
385			}
386		}
387		// right-bottom quad
388		for (int y = 10; y < height / 2 - 10; ++y)
389		{
390			for (int x = width / 2 + 10; x < width - 10; ++x)
391			{
392				const int idx = y * width + x;
393				if (!ColorEqual(fb[idx], rb, g_color_eps))
394				{
395					m_context.getTestContext().getLog()
396						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
397						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
398					status = false;
399				}
400			}
401		}
402		// right-top quad
403		for (int y = height / 2 + 10; y < height - 10; ++y)
404		{
405			for (int x = width / 2 + 10; x < width - 10; ++x)
406			{
407				const int idx = y * width + x;
408				if (!ColorEqual(fb[idx], rt, g_color_eps))
409				{
410					m_context.getTestContext().getLog()
411						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
412						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
413					status = false;
414				}
415			}
416		}
417		// left-top quad
418		for (int y = height / 2 + 10; y < height - 10; ++y)
419		{
420			for (int x = 10; x < width / 2 - 10; ++x)
421			{
422				const int idx = y * width + x;
423				if (!ColorEqual(fb[idx], lt, g_color_eps))
424				{
425					m_context.getTestContext().getLog()
426						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
427						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
428					status = false;
429				}
430			}
431		}
432		// middle horizontal line should be black
433		for (int y = height / 2 - 2; y < height / 2 + 2; ++y)
434		{
435			for (int x = 0; x < width; ++x)
436			{
437				const int idx = y * width + x;
438				if (!ColorEqual(fb[idx], vec3(0), g_color_eps))
439				{
440					m_context.getTestContext().getLog()
441						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
442						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
443					status = false;
444				}
445			}
446		}
447		// middle vertical line should be black
448		for (int y = 0; y < height; ++y)
449		{
450			for (int x = width / 2 - 2; x < width / 2 + 2; ++x)
451			{
452				const int idx = y * width + x;
453				if (!ColorEqual(fb[idx], vec3(0), g_color_eps))
454				{
455					m_context.getTestContext().getLog()
456						<< tcu::TestLog::Message << "Bad color at (" << x << ", " << y << "): " << fb[idx].x() << " "
457						<< fb[idx].y() << " " << fb[idx].z() << tcu::TestLog::EndMessage;
458					status = false;
459				}
460			}
461		}
462
463		return status;
464	}
465
466	bool IsEqual(vec4 a, vec4 b)
467	{
468		return (a.x() == b.x()) && (a.y() == b.y()) && (a.z() == b.z()) && (a.w() == b.w());
469	}
470
471	bool IsEqual(uvec4 a, uvec4 b)
472	{
473		return (a.x() == b.x()) && (a.y() == b.y()) && (a.z() == b.z()) && (a.w() == b.w());
474	}
475};
476
477class SimpleCompute : public ComputeShaderBase
478{
479
480	virtual std::string Title()
481	{
482		return "Simplest possible Compute Shader";
483	}
484
485	virtual std::string Purpose()
486	{
487		return "1. Verify that CS can be created, compiled and linked.\n"
488			   "2. Verify that local work size can be queried with GetProgramiv command.\n"
489			   "3. Verify that CS can be dispatched with DispatchCompute command.\n"
490			   "4. Verify that CS can write to SSBO.";
491	}
492
493	virtual std::string Method()
494	{
495		return "Create and dispatch CS. Verify SSBO content.";
496	}
497
498	virtual std::string PassCriteria()
499	{
500		return "Everything works as expected.";
501	}
502
503	GLuint m_program;
504	GLuint m_buffer;
505
506	virtual long Setup()
507	{
508
509		const char* const glsl_cs =
510			NL "layout(local_size_x = 1, local_size_y = 1) in;" NL "layout(std430) buffer Output {" NL "  vec4 data;" NL
511			   "} g_out;" NL "void main() {" NL "  g_out.data = vec4(1.0, 2.0, 3.0, 4.0);" NL "}";
512		m_program = CreateComputeProgram(glsl_cs);
513		glLinkProgram(m_program);
514		if (!CheckProgram(m_program))
515			return ERROR;
516
517		GLint v[3];
518		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
519		if (v[0] != 1 || v[1] != 1 || v[2] != 1)
520		{
521			m_context.getTestContext().getLog()
522				<< tcu::TestLog::Message << "Got " << v[0] << ", " << v[1] << ", " << v[2]
523				<< ", expected: 1, 1, 1 in GL_COMPUTE_WORK_GROUP_SIZE check" << tcu::TestLog::EndMessage;
524			return ERROR;
525		}
526
527		glGenBuffers(1, &m_buffer);
528		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
529		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), NULL, GL_DYNAMIC_DRAW);
530		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
531
532		return NO_ERROR;
533	}
534
535	virtual long Run()
536	{
537		glUseProgram(m_program);
538		glDispatchCompute(1, 1, 1);
539
540		vec4* data;
541		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_buffer);
542		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
543		data	   = static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), GL_MAP_READ_BIT));
544		long error = NO_ERROR;
545		if (!IsEqual(data[0], vec4(1.0f, 2.0f, 3.0f, 4.0f)))
546		{
547			error = ERROR;
548		}
549		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
550		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
551		return error;
552	}
553
554	virtual long Cleanup()
555	{
556		glUseProgram(0);
557		glDeleteProgram(m_program);
558		glDeleteBuffers(1, &m_buffer);
559		return NO_ERROR;
560	}
561};
562
563static const char* const glsl_cs_long = R"(
564	layout(local_size_x = 1, local_size_y = 1) in;
565	layout(std430) buffer;
566	layout(binding = 0) buffer Output {
567		int elements[2];
568	} output_data;
569
570	void main() {
571		int temp = 0;
572		int value = output_data.elements[1]/100;
573		for (int i = 0; i < value; i++) {
574			for (int j = 0; j < output_data.elements[1]/value; j++) {
575				temp += 1;
576			}
577		}
578		atomicAdd(output_data.elements[0], temp);
579	}
580)";
581
582static const char* const glsl_cs_short = R"(
583	layout(local_size_x = 1, local_size_y = 1) in;
584	layout(std430) buffer;
585	layout(binding = 0) buffer Output {
586		int elements[2];
587	} output_data;
588
589	void main() {
590		output_data.elements[0] += 1;
591	}
592)";
593
594class LongRunningComputeFenceTest : public ComputeShaderBase
595{
596
597	std::string Title() override
598	{
599		return "Synchronization test for two compute tests";
600	}
601
602	std::string Purpose() override
603	{
604		return "Verify that fence works correctly across different contexts.";
605	}
606
607	std::string Method() override
608	{
609		return R"(1. Create two CS(Long and Short) an SSBO and a new shared context.
610			  2. Dispatch long CS with DispatchCompute and generate a fence object.
611			  3. Change the context to the newly created shared context.
612			  4. Issue a glWaitSync() followed by a call to DispatchCompute on the short CS.
613			  5. Issue a glFinish() to wait for both CS to finish.
614			  6. Verify the value is correctly updated in the SSBO.)";
615	}
616
617	std::string PassCriteria() override
618	{
619		return "Everything works as expected.";
620	}
621
622	glu::RenderContext*		m_sharedContext;
623	GLuint				m_program1;
624	GLuint				m_program2;
625	GLuint				m_buffer;
626	GLsync				m_gl_sync;
627	const int			m_total_count = 5000000;
628	const int			m_shorter_count = 50000;
629	int				m_dataLoadStore[2] = {0, m_shorter_count};
630	int				*m_read_data;
631
632	long Setup() override
633	{
634		glu::RenderContext&		base_render_context = m_context.getRenderContext();
635		tcu::TestContext&		m_testcontext	    = m_context.getTestContext();
636		glu::ContextType		contextType(base_render_context.getType().getAPI());
637		glu::RenderConfig		config(contextType);
638		const tcu::CommandLine&		cmdLine = m_testcontext.getCommandLine();
639
640		glGenBuffers(2, &m_buffer);
641
642		m_program1 = CreateComputeProgram(glsl_cs_long);
643		glLinkProgram(m_program1);
644		if (!CheckProgram(m_program1))
645			return ERROR;
646
647		m_program2 = CreateComputeProgram(glsl_cs_short);
648		glLinkProgram(m_program2);
649		if (!CheckProgram(m_program2))
650			return ERROR;
651
652		glu::parseRenderConfig(&config, cmdLine);
653
654#if (DE_OS == DE_OS_ANDROID) || defined(DEQP_SURFACELESS) || defined(NULLWS)
655		// Can only have one Window created at a time
656		// Note that this surface type is not supported on all platforms
657		config.surfaceType = glu::RenderConfig::SURFACETYPE_OFFSCREEN_GENERIC;
658#endif
659
660		m_sharedContext = glu::createRenderContext(m_testcontext.getPlatform(), cmdLine, config, &base_render_context);
661		if (!m_sharedContext)
662			return ERROR;
663
664		base_render_context.makeCurrent();
665
666		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
667		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int) * 2, &m_dataLoadStore, GL_STREAM_COPY);
668
669		return NO_ERROR;
670	}
671
672	long Run() override
673	{
674		long				error	= NO_ERROR;
675		glu::RenderContext&		base_render_context = m_context.getRenderContext();
676
677		glUseProgram(m_program1);
678		for (int i = 0; i < m_total_count/m_shorter_count; i++)
679			glDispatchCompute(1, 1, 1);
680
681		glMemoryBarrier(GL_ALL_BARRIER_BITS);
682		m_gl_sync = glFenceSync(
683			/*condition=*/GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
684		glFlush();
685
686		m_sharedContext->makeCurrent();
687
688		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
689		glUseProgram(m_program2);
690
691		glWaitSync(m_gl_sync, 0, GL_TIMEOUT_IGNORED);
692
693		glMemoryBarrier(GL_ALL_BARRIER_BITS);
694		glDispatchCompute(1, 1, 1);
695		glMemoryBarrier(GL_ALL_BARRIER_BITS);
696		glFinish();
697
698		m_read_data =
699			static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int) * 2, GL_MAP_READ_BIT));
700		if (m_read_data[0] != (m_total_count + 1))
701		{
702			m_context.getTestContext().getLog()
703				<< tcu::TestLog::Message << "Invalid read data: " << m_read_data[0] << tcu::TestLog::EndMessage;
704			error = ERROR;
705		}
706
707		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
708		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
709
710		base_render_context.makeCurrent();
711
712		return error;
713	}
714
715	long Cleanup() override
716	{
717		glUseProgram(0);
718
719		glDeleteProgram(m_program1);
720		glDeleteProgram(m_program2);
721		glDeleteBuffers(2, &m_buffer);
722
723		return NO_ERROR;
724	}
725};
726
727static
728decltype(glw::Functions::bufferStorage) getBufferStorageFunction(glu::RenderContext& renderContext)
729{
730	decltype(glw::Functions::bufferStorage) bufferStorageFunc;
731
732	bufferStorageFunc = (decltype(bufferStorageFunc))renderContext.getProcAddress("glBufferStorageEXT");
733	DE_ASSERT(bufferStorageFunc);
734
735	return bufferStorageFunc;
736}
737
738class LongRunningPersistentSSBOComputeTest : public ComputeShaderBase
739{
740
741	std::string Title() override
742	{
743		return "Synchronization test for Persistent Buffers";
744	}
745
746	std::string Purpose() override
747	{
748		return "Verify that fence works correctly across different contexts.";
749	}
750
751	std::string Method() override
752	{
753		return  R"(1. Create two Long CS, an SSBO and a new shared context.
754			   2. Dispatch long CS with DispatchCompute and generate a fence object.
755			   3. Change the context to the newly created shared context.
756			   4. Issue a glClientWaitSync().
757			   5. Verify the value is correctly updated in the SSBO.)";
758	}
759
760	std::string PassCriteria() override
761	{
762		return "Everything works as expected.";
763	}
764
765	glu::RenderContext*	m_sharedContext = NULL;
766	GLuint			m_buffer;
767	volatile int*		m_dataLoadStore = NULL;
768	const int		m_total_count = 5000000;
769	const int		m_shorter_count = 50000;
770
771	long Setup() override
772	{
773		glu::RenderContext&		base_render_context = m_context.getRenderContext();
774		const glu::ContextInfo&		base_render_context_info = m_context.getContextInfo();
775		tcu::TestContext&		m_testcontext = m_context.getTestContext();
776		const tcu::CommandLine&		cmdLine = m_testcontext.getCommandLine();
777		glu::ContextType		contextType(base_render_context.getType().getAPI());
778		glu::RenderConfig		config(contextType);
779
780		glu::parseRenderConfig(&config, cmdLine);
781
782		if (!base_render_context_info.isExtensionSupported("GL_EXT_buffer_storage"))
783		{
784			OutputNotSupported("GL_EXT_buffer_storage not supported");
785			return NOT_SUPPORTED;
786		}
787
788#if (DE_OS == DE_OS_ANDROID) || defined(DEQP_SURFACELESS) || defined(NULLWS)
789		// Android can only have one Window created at a time
790		// Note that this surface type is not supported on all platforms
791		config.surfaceType = glu::RenderConfig::SURFACETYPE_OFFSCREEN_GENERIC;
792#endif
793
794		m_sharedContext = glu::createRenderContext(m_testcontext.getPlatform(), cmdLine, config, &base_render_context);
795		if (!m_sharedContext)
796			return ERROR;
797
798		base_render_context.makeCurrent();
799
800		return NO_ERROR;
801	}
802
803	long RunComputePersistent()
804	{
805		glw::glBufferStorageFunc	GLBUFFERSTORAGEEXTFUNC = NULL;
806		GLbitfield			buffer_flags = GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
807		GLuint				program	= CreateComputeProgram(glsl_cs_long);
808
809		glLinkProgram(program);
810		if (!CheckProgram(program))
811			return ERROR;
812
813		glUseProgram(program);
814
815		glGenBuffers(2, &m_buffer);
816		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
817
818		GLU_EXPECT_NO_ERROR(glGetError(), "Error in binding buffer!");
819
820		GLBUFFERSTORAGEEXTFUNC = getBufferStorageFunction(*m_sharedContext);
821		if (!GLBUFFERSTORAGEEXTFUNC)
822		{
823			m_context.getTestContext().getLog()
824				<< tcu::TestLog::Message << "Empty function!" << tcu::TestLog::EndMessage;
825			return ERROR;
826		}
827
828		GLBUFFERSTORAGEEXTFUNC(GL_SHADER_STORAGE_BUFFER, sizeof(int) * 2, NULL, buffer_flags);
829		GLU_EXPECT_NO_ERROR(glGetError(), "Error when setting default value to Buffer");
830
831		m_dataLoadStore	 = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int) * 2, buffer_flags));
832		m_dataLoadStore[0] = 0;
833		m_dataLoadStore[1] = m_shorter_count;
834
835		for (int i = 0; i < m_total_count/m_shorter_count; i++)
836			glDispatchCompute(1, 1, 1);
837
838		glMemoryBarrier(GL_ALL_BARRIER_BITS);
839		glFlush();
840
841		return NO_ERROR;
842	}
843
844	void PollClientWait(GLsync gl_sync)
845	{
846		while (true)
847		{
848			GLenum status = glClientWaitSync(gl_sync, 0, 100000000);
849			switch (status)
850			{
851			case GL_ALREADY_SIGNALED:
852				m_context.getTestContext().getLog()
853					<< tcu::TestLog::Message << "glClientWaitSync --- GL_ALREADY_SIGNALED" << tcu::TestLog::EndMessage;
854				return;
855			case GL_CONDITION_SATISFIED:
856				m_context.getTestContext().getLog()
857					<< tcu::TestLog::Message << "glClientWaitSync --- GL_CONDITION_SATISFIED"
858					<< tcu::TestLog::EndMessage;
859				return;
860			case GL_WAIT_FAILED:
861				m_context.getTestContext().getLog()
862					<< tcu::TestLog::Message << "glClientWaitSync --- GL_WAIT_FAILED" << tcu::TestLog::EndMessage;
863				return;
864			case GL_TIMEOUT_EXPIRED:
865				m_context.getTestContext().getLog()
866					<< tcu::TestLog::Message << "glClientWaitSync --- GL_TIMEOUT_EXPIRED" << tcu::TestLog::EndMessage;
867				break;
868			}
869		}
870	}
871
872	long Run() override
873	{
874		long				error = NO_ERROR;
875		GLsync				gl_sync;
876		glu::RenderContext& base_render_context = m_context.getRenderContext();
877
878		m_sharedContext->makeCurrent();
879
880		if (RunComputePersistent() == ERROR)
881			return ERROR;
882
883		gl_sync = glFenceSync(
884			/*condition=*/GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
885		glFlush();
886
887		PollClientWait(gl_sync);
888
889		if (m_dataLoadStore[0] != m_total_count)
890		{
891			m_context.getTestContext().getLog()
892				<< tcu::TestLog::Message << "Invalid read data: " << m_dataLoadStore[0] << tcu::TestLog::EndMessage;
893			error = ERROR;
894		}
895
896		base_render_context.makeCurrent();
897
898		glDeleteSync(gl_sync);
899
900		return error;
901	}
902
903	long Cleanup() override
904	{
905		glDeleteBuffers(2, &m_buffer);
906		m_dataLoadStore = NULL;
907		return NO_ERROR;
908	}
909};
910
911class BasicOneWorkGroup : public ComputeShaderBase
912{
913
914	virtual std::string Title()
915	{
916		return "One work group with various local sizes";
917	}
918
919	virtual std::string Purpose()
920	{
921		return NL "1. Verify that declared local work size has correct effect." NL
922				  "2. Verify that the number of shader invocations is correct." NL
923				  "3. Verify that the built-in variables: gl_WorkGroupSize, gl_WorkGroupID, gl_GlobalInvocationID," NL
924				  "    gl_LocalInvocationID and gl_LocalInvocationIndex has correct values." NL
925				  "4. Verify that DispatchCompute and DispatchComputeIndirect commands work as expected.";
926	}
927
928	virtual std::string Method()
929	{
930		return NL "1. Create several CS with various local sizes." NL
931				  "2. Dispatch each CS with DispatchCompute and DispatchComputeIndirect commands." NL
932				  "3. Verify SSBO content.";
933	}
934
935	virtual std::string PassCriteria()
936	{
937		return "Everything works as expected.";
938	}
939
940	GLuint m_program;
941	GLuint m_storage_buffer;
942	GLuint m_dispatch_buffer;
943
944	std::string GenSource(int x, int y, int z, GLuint binding)
945	{
946		std::stringstream ss;
947		ss << NL "layout(local_size_x = " << x << ", local_size_y = " << y << ", local_size_z = " << z
948		   << ") in;" NL "layout(std430, binding = " << binding
949		   << ") buffer Output {" NL "  uvec4 local_id[];" NL "} g_out;" NL "void main() {" NL
950			  "  if (gl_WorkGroupSize == uvec3("
951		   << x << ", " << y << ", " << z
952		   << ") && gl_WorkGroupID == uvec3(0) &&" NL "      gl_GlobalInvocationID == gl_LocalInvocationID) {" NL
953			  "    g_out.local_id[gl_LocalInvocationIndex] = uvec4(gl_LocalInvocationID, 0);" NL "  } else {" NL
954			  "    g_out.local_id[gl_LocalInvocationIndex] = uvec4(0xffff);" NL "  }" NL "}";
955		return ss.str();
956	}
957
958	bool RunIteration(int local_size_x, int local_size_y, int local_size_z, GLuint binding, bool dispatch_indirect)
959	{
960		if (m_program != 0)
961			glDeleteProgram(m_program);
962		m_program = CreateComputeProgram(GenSource(local_size_x, local_size_y, local_size_z, binding));
963		glLinkProgram(m_program);
964		if (!CheckProgram(m_program))
965			return false;
966
967		GLint v[3];
968		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
969		if (v[0] != local_size_x || v[1] != local_size_y || v[2] != local_size_z)
970		{
971			m_context.getTestContext().getLog()
972				<< tcu::TestLog::Message << "GL_COMPUTE_LOCAL_WORK_SIZE is (" << v[0] << " " << v[1] << " " << v[2]
973				<< ") should be (" << local_size_x << " " << local_size_y << " " << local_size_z << ")"
974				<< tcu::TestLog::EndMessage;
975			return false;
976		}
977
978		const int kSize = local_size_x * local_size_y * local_size_z;
979
980		if (m_storage_buffer == 0)
981			glGenBuffers(1, &m_storage_buffer);
982		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, binding, m_storage_buffer);
983		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * kSize, NULL, GL_DYNAMIC_DRAW);
984		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
985
986		glUseProgram(m_program);
987		if (dispatch_indirect)
988		{
989			const GLuint num_groups[3] = { 1, 1, 1 };
990			if (m_dispatch_buffer == 0)
991				glGenBuffers(1, &m_dispatch_buffer);
992			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
993			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), num_groups, GL_STATIC_DRAW);
994			glDispatchComputeIndirect(0);
995		}
996		else
997		{
998			glDispatchCompute(1, 1, 1);
999		}
1000
1001		uvec4* data;
1002		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1003		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1004		data =
1005			static_cast<uvec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, kSize * sizeof(uvec4), GL_MAP_READ_BIT));
1006
1007		bool ret = true;
1008		for (int z = 0; z < local_size_z; ++z)
1009		{
1010			for (int y = 0; y < local_size_y; ++y)
1011			{
1012				for (int x = 0; x < local_size_x; ++x)
1013				{
1014					const int index = z * local_size_x * local_size_y + y * local_size_x + x;
1015					if (!IsEqual(data[index], uvec4(x, y, z, 0)))
1016					{
1017						m_context.getTestContext().getLog()
1018							<< tcu::TestLog::Message << "Invalid data at offset " << index << tcu::TestLog::EndMessage;
1019						ret = false;
1020					}
1021				}
1022			}
1023		}
1024		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1025		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1026		return ret;
1027	}
1028
1029	virtual long Setup()
1030	{
1031		m_program		  = 0;
1032		m_storage_buffer  = 0;
1033		m_dispatch_buffer = 0;
1034		return NO_ERROR;
1035	}
1036
1037	virtual long Run()
1038	{
1039		if (!RunIteration(16, 1, 1, 0, true))
1040			return ERROR;
1041		if (!RunIteration(8, 8, 1, 1, false))
1042			return ERROR;
1043		if (!RunIteration(4, 4, 4, 2, true))
1044			return ERROR;
1045		if (!RunIteration(1, 2, 3, 3, false))
1046			return ERROR;
1047		if (!RunIteration(128, 1, 1, 3, true))
1048			return ERROR;
1049		if (!RunIteration(2, 8, 8, 3, false))
1050			return ERROR;
1051		if (!RunIteration(2, 2, 32, 7, true))
1052			return ERROR;
1053		return NO_ERROR;
1054	}
1055
1056	virtual long Cleanup()
1057	{
1058		glUseProgram(0);
1059		glDeleteProgram(m_program);
1060		glDeleteBuffers(1, &m_storage_buffer);
1061		glDeleteBuffers(1, &m_dispatch_buffer);
1062		return NO_ERROR;
1063	}
1064};
1065
1066class BasicResourceUBO : public ComputeShaderBase
1067{
1068
1069	virtual std::string Title()
1070	{
1071		return "Compute Shader resources - UBOs";
1072	}
1073
1074	virtual std::string Purpose()
1075	{
1076		return "Verify that CS is able to read data from UBOs and write it to SSBO.";
1077	}
1078
1079	virtual std::string Method()
1080	{
1081		return NL "1. Create CS which uses array of UBOs." NL
1082				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
1083				  "3. Read data from each UBO and write it to SSBO." NL "4. Verify SSBO content." NL
1084				  "5. Repeat for different buffer and CS work sizes.";
1085	}
1086
1087	virtual std::string PassCriteria()
1088	{
1089		return "Everything works as expected.";
1090	}
1091
1092	GLuint m_program;
1093	GLuint m_storage_buffer;
1094	GLuint m_uniform_buffer[12];
1095	GLuint m_dispatch_buffer;
1096
1097	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1098	{
1099		const uvec3		  global_size = local_size * num_groups;
1100		std::stringstream ss;
1101		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1102		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
1103		   << ", " << global_size.y() << ", " << global_size.z()
1104		   << ");" NL "layout(std140) uniform InputBuffer {" NL "  vec4 data["
1105		   << global_size.x() * global_size.y() * global_size.z()
1106		   << "];" NL "} g_in_buffer[12];" NL "layout(std430) buffer OutputBuffer {" NL "  vec4 data0["
1107		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data1["
1108		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data2["
1109		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data3["
1110		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data4["
1111		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data5["
1112		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data6["
1113		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data7["
1114		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data8["
1115		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data9["
1116		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data10["
1117		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data11["
1118		   << global_size.x() * global_size.y() * global_size.z()
1119		   << "];" NL "} g_out_buffer;" NL "void main() {" NL "  uint global_index = gl_GlobalInvocationID.x +" NL
1120			  "                      gl_GlobalInvocationID.y * kGlobalSize.x +" NL
1121			  "                      gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
1122			  "  g_out_buffer.data0[global_index] = g_in_buffer[0].data[global_index];" NL
1123			  "  g_out_buffer.data1[global_index] = g_in_buffer[1].data[global_index];" NL
1124			  "  g_out_buffer.data2[global_index] = g_in_buffer[2].data[global_index];" NL
1125			  "  g_out_buffer.data3[global_index] = g_in_buffer[3].data[global_index];" NL
1126			  "  g_out_buffer.data4[global_index] = g_in_buffer[4].data[global_index];" NL
1127			  "  g_out_buffer.data5[global_index] = g_in_buffer[5].data[global_index];" NL
1128			  "  g_out_buffer.data6[global_index] = g_in_buffer[6].data[global_index];" NL
1129			  "  g_out_buffer.data7[global_index] = g_in_buffer[7].data[global_index];" NL
1130			  "  g_out_buffer.data8[global_index] = g_in_buffer[8].data[global_index];" NL
1131			  "  g_out_buffer.data9[global_index] = g_in_buffer[9].data[global_index];" NL
1132			  "  g_out_buffer.data10[global_index] = g_in_buffer[10].data[global_index];" NL
1133			  "  g_out_buffer.data11[global_index] = g_in_buffer[11].data[global_index];" NL "}";
1134		return ss.str();
1135	}
1136
1137	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1138	{
1139		if (m_program != 0)
1140			glDeleteProgram(m_program);
1141		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1142		glLinkProgram(m_program);
1143		if (!CheckProgram(m_program))
1144			return false;
1145
1146		for (GLuint i = 0; i < 12; ++i)
1147		{
1148			char name[32];
1149			sprintf(name, "InputBuffer[%u]", i);
1150			const GLuint index = glGetUniformBlockIndex(m_program, name);
1151			glUniformBlockBinding(m_program, index, i);
1152		}
1153
1154		const GLuint kBufferSize =
1155			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
1156
1157		if (m_storage_buffer == 0)
1158			glGenBuffers(1, &m_storage_buffer);
1159		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1160		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize * 12, NULL, GL_DYNAMIC_DRAW);
1161		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1162
1163		if (m_uniform_buffer[0] == 0)
1164			glGenBuffers(12, m_uniform_buffer);
1165		for (GLuint i = 0; i < 12; ++i)
1166		{
1167			std::vector<vec4> data(kBufferSize);
1168			for (GLuint j = 0; j < kBufferSize; ++j)
1169			{
1170				data[j] = vec4(static_cast<float>(i) * static_cast<float>(kBufferSize) + static_cast<float>(j));
1171			}
1172			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
1173			glBufferData(GL_UNIFORM_BUFFER, sizeof(vec4) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
1174		}
1175		glBindBuffer(GL_UNIFORM_BUFFER, 0);
1176
1177		glUseProgram(m_program);
1178		if (dispatch_indirect)
1179		{
1180			if (m_dispatch_buffer == 0)
1181				glGenBuffers(1, &m_dispatch_buffer);
1182			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1183			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1184			glDispatchComputeIndirect(0);
1185		}
1186		else
1187		{
1188			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1189		}
1190
1191		vec4* data;
1192		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1193		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1194		data = static_cast<vec4*>(
1195			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * 12 * kBufferSize, GL_MAP_READ_BIT));
1196
1197		bool ret = true;
1198		for (GLuint z = 0; z < local_size.z() * num_groups.z(); ++z)
1199		{
1200			for (GLuint y = 0; y < local_size.y() * num_groups.y(); ++y)
1201			{
1202				for (GLuint x = 0; x < local_size.x() * num_groups.x(); ++x)
1203				{
1204					const GLuint index = z * local_size.x() * num_groups.x() * local_size.y() * num_groups.y() +
1205										 y * local_size.x() * num_groups.x() + x;
1206					for (int i = 0; i < 1; ++i)
1207					{
1208						if (!IsEqual(data[index * 12 + i], vec4(static_cast<float>(index * 12 + i))))
1209						{
1210							m_context.getTestContext().getLog() << tcu::TestLog::Message << "Invalid data at offset "
1211																<< index * 12 + i << tcu::TestLog::EndMessage;
1212							ret = false;
1213						}
1214					}
1215				}
1216			}
1217		}
1218		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1219		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1220		return ret;
1221	}
1222
1223	virtual long Setup()
1224	{
1225		m_program		 = 0;
1226		m_storage_buffer = 0;
1227		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
1228		m_dispatch_buffer = 0;
1229		return NO_ERROR;
1230	}
1231
1232	virtual long Run()
1233	{
1234		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
1235			return ERROR;
1236		if (!RunIteration(uvec3(2, 2, 2), uvec3(2, 2, 2), true))
1237			return ERROR;
1238		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
1239			return ERROR;
1240		return NO_ERROR;
1241	}
1242
1243	virtual long Cleanup()
1244	{
1245		glUseProgram(0);
1246		glDeleteProgram(m_program);
1247		glDeleteBuffers(1, &m_storage_buffer);
1248		glDeleteBuffers(12, m_uniform_buffer);
1249		glDeleteBuffers(1, &m_dispatch_buffer);
1250		return NO_ERROR;
1251	}
1252};
1253
1254class BasicResourceTexture : public ComputeShaderBase
1255{
1256
1257	virtual std::string Title()
1258	{
1259		return NL "Compute Shader resources - Textures";
1260	}
1261
1262	virtual std::string Purpose()
1263	{
1264		return NL "Verify that texture access works correctly in CS.";
1265	}
1266
1267	virtual std::string Method()
1268	{
1269		return NL "1. Create CS which uses all sampler types (sampler2D, sampler3D," NL "    sampler2DArray)." NL
1270				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
1271				  "3. Sample each texture and write sampled value to SSBO." NL "4. Verify SSBO content." NL
1272				  "5. Repeat for different texture and CS work sizes.";
1273	}
1274
1275	virtual std::string PassCriteria()
1276	{
1277		return NL "Everything works as expected.";
1278	}
1279
1280	GLuint m_program;
1281	GLuint m_storage_buffer;
1282	GLuint m_texture[3];
1283	GLuint m_dispatch_buffer;
1284
1285	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1286	{
1287		const uvec3		  global_size = local_size * num_groups;
1288		std::stringstream ss;
1289		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1290		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
1291		   << ", " << global_size.y() << ", " << global_size.z()
1292		   << ");" NL "uniform sampler2D g_sampler0;" NL "uniform lowp sampler3D g_sampler1;" NL
1293			  "uniform mediump sampler2DArray g_sampler2;" NL "layout(std430) buffer OutputBuffer {" NL "  vec4 data0["
1294		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data1["
1295		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  vec4 data2["
1296		   << global_size.x() * global_size.y() * global_size.z()
1297		   << "];" NL "} g_out_buffer;" NL "void main() {" NL "  uint global_index = gl_GlobalInvocationID.x +" NL
1298			  "                            gl_GlobalInvocationID.y * kGlobalSize.x +" NL
1299			  "                            gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
1300			  "  g_out_buffer.data0[global_index] = texture(g_sampler0, vec2(gl_GlobalInvocationID) / "
1301			  "vec2(kGlobalSize));" NL "  g_out_buffer.data1[global_index] = textureProj(g_sampler1, "
1302			  "vec4(vec3(gl_GlobalInvocationID) / vec3(kGlobalSize), 1.0));" NL
1303			  "  g_out_buffer.data2[global_index] = texelFetchOffset(g_sampler2, ivec3(gl_GlobalInvocationID), 0, "
1304			  "ivec2(0));" NL "}";
1305		return ss.str();
1306	}
1307
1308	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1309	{
1310		if (m_program != 0)
1311			glDeleteProgram(m_program);
1312		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1313		glLinkProgram(m_program);
1314		if (!CheckProgram(m_program))
1315			return false;
1316
1317		glUseProgram(m_program);
1318		for (int i = 0; i < 4; ++i)
1319		{
1320			char name[32];
1321			sprintf(name, "g_sampler%d", i);
1322			glUniform1i(glGetUniformLocation(m_program, name), i);
1323		}
1324		glUseProgram(0);
1325
1326		const GLuint kBufferSize =
1327			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
1328		const GLint kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
1329		const GLint kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
1330		const GLint kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
1331
1332		std::vector<vec4> buffer_data(kBufferSize * 4);
1333		if (m_storage_buffer == 0)
1334			glGenBuffers(1, &m_storage_buffer);
1335		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1336		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize * 4, &buffer_data[0], GL_DYNAMIC_DRAW);
1337		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1338
1339		std::vector<vec4> texture_data(kBufferSize, vec4(123.0f));
1340		if (m_texture[0] == 0)
1341			glGenTextures(3, m_texture);
1342
1343		glActiveTexture(GL_TEXTURE0);
1344		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
1345		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1346		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1347		glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA32F, kWidth, kHeight, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1348
1349		glActiveTexture(GL_TEXTURE1);
1350		glBindTexture(GL_TEXTURE_3D, m_texture[1]);
1351		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1352		glTexParameteri(GL_TEXTURE_3D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1353		glTexImage3D(GL_TEXTURE_3D, 0, GL_RGBA32F, kWidth, kHeight, kDepth, 0, GL_RGBA, GL_FLOAT, &texture_data[0]);
1354
1355		glActiveTexture(GL_TEXTURE2);
1356		glBindTexture(GL_TEXTURE_2D_ARRAY, m_texture[2]);
1357		glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1358		glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1359		glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, kWidth, kHeight, kDepth, 0, GL_RGBA, GL_FLOAT,
1360					 &texture_data[0]);
1361
1362		glUseProgram(m_program);
1363		if (dispatch_indirect)
1364		{
1365			if (m_dispatch_buffer == 0)
1366				glGenBuffers(1, &m_dispatch_buffer);
1367			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1368			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1369			glDispatchComputeIndirect(0);
1370		}
1371		else
1372		{
1373			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1374		}
1375
1376		vec4* data;
1377		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1378		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1379
1380		data = static_cast<vec4*>(
1381			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * 3 * kBufferSize, GL_MAP_READ_BIT));
1382		bool ret = true;
1383		for (GLuint index = 0; index < kBufferSize * 3; ++index)
1384		{
1385			if (!IsEqual(data[index], vec4(123.0f)))
1386			{
1387				m_context.getTestContext().getLog()
1388					<< tcu::TestLog::Message << "Invalid data at index " << index << tcu::TestLog::EndMessage;
1389				ret = false;
1390			}
1391		}
1392		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1393		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1394
1395		return ret;
1396	}
1397
1398	virtual long Setup()
1399	{
1400		m_program		 = 0;
1401		m_storage_buffer = 0;
1402		memset(m_texture, 0, sizeof(m_texture));
1403		m_dispatch_buffer = 0;
1404		return NO_ERROR;
1405	}
1406
1407	virtual long Run()
1408	{
1409		if (!RunIteration(uvec3(4, 4, 4), uvec3(8, 1, 1), false))
1410			return ERROR;
1411		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), true))
1412			return ERROR;
1413		if (!RunIteration(uvec3(2, 2, 2), uvec3(2, 2, 2), false))
1414			return ERROR;
1415		return NO_ERROR;
1416	}
1417
1418	virtual long Cleanup()
1419	{
1420		glActiveTexture(GL_TEXTURE0);
1421		glUseProgram(0);
1422		glDeleteProgram(m_program);
1423		glDeleteBuffers(1, &m_storage_buffer);
1424		glDeleteTextures(3, m_texture);
1425		glDeleteBuffers(1, &m_dispatch_buffer);
1426		return NO_ERROR;
1427	}
1428};
1429
1430class BasicResourceImage : public ComputeShaderBase
1431{
1432
1433	virtual std::string Title()
1434	{
1435		return NL "Compute Shader resources - Images";
1436	}
1437
1438	virtual std::string Purpose()
1439	{
1440		return NL "Verify that reading/writing GPU memory via image variables work as expected.";
1441	}
1442
1443	virtual std::string Method()
1444	{
1445		return NL "1. Create CS which uses two image2D variables to read and write underlying GPU memory." NL
1446				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
1447				  "3. Verify memory content." NL "4. Repeat for different texture and CS work sizes.";
1448	}
1449
1450	virtual std::string PassCriteria()
1451	{
1452		return NL "Everything works as expected.";
1453	}
1454
1455	GLuint m_program;
1456	GLuint m_draw_program;
1457	GLuint m_texture[2];
1458	GLuint m_dispatch_buffer;
1459	GLuint m_vertex_array;
1460
1461	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1462	{
1463		const uvec3		  global_size = local_size * num_groups;
1464		std::stringstream ss;
1465		if (m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1466		{
1467			ss << NL "#extension GL_OES_shader_image_atomic : enable";
1468		}
1469		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1470		   << ", local_size_z = " << local_size.z()
1471		   << ") in;" NL "layout(r32ui, binding=0) coherent uniform mediump uimage2D g_image1;" NL
1472			  "layout(r32ui, binding=1) uniform mediump uimage2D g_image2;" NL "const uvec3 kGlobalSize = uvec3("
1473		   << global_size.x() << ", " << global_size.y() << ", " << global_size.z()
1474		   << ");" NL "void main() {" NL
1475			  "  if (gl_GlobalInvocationID.x >= kGlobalSize.x || gl_GlobalInvocationID.y >= kGlobalSize.y) return;" NL
1476			  "  uvec4 color = uvec4(gl_GlobalInvocationID.x + gl_GlobalInvocationID.y);";
1477		if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
1478		{
1479			m_context.getTestContext().getLog()
1480				<< tcu::TestLog::Message << "Function imageAtomicAdd not supported, using imageStore"
1481				<< tcu::TestLog::EndMessage;
1482			ss << NL "  imageStore(g_image1, ivec2(gl_GlobalInvocationID), color);" NL
1483					 "  uvec4 c = imageLoad(g_image1, ivec2(gl_GlobalInvocationID));" NL
1484					 "  imageStore(g_image2, ivec2(gl_GlobalInvocationID), c);" NL "}";
1485		}
1486		else
1487		{
1488			m_context.getTestContext().getLog()
1489				<< tcu::TestLog::Message << "Using imageAtomicAdd" << tcu::TestLog::EndMessage;
1490			ss << NL "  imageStore(g_image1, ivec2(gl_GlobalInvocationID), uvec4(0));" NL
1491					 "  imageAtomicAdd(g_image1, ivec2(gl_GlobalInvocationID), color.x);" NL
1492					 "  uvec4 c = imageLoad(g_image1, ivec2(gl_GlobalInvocationID));" NL
1493					 "  imageStore(g_image2, ivec2(gl_GlobalInvocationID), c);" NL "}";
1494		}
1495
1496		return ss.str();
1497	}
1498
1499	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1500	{
1501		if (m_program != 0)
1502			glDeleteProgram(m_program);
1503		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1504		glLinkProgram(m_program);
1505		if (!CheckProgram(m_program))
1506			return false;
1507
1508		const GLint  kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
1509		const GLint  kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
1510		const GLint  kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
1511		const GLuint kSize   = kWidth * kHeight * kDepth;
1512
1513		std::vector<uvec4> data(kSize);
1514		glGenTextures(2, m_texture);
1515
1516		for (int i = 0; i < 2; ++i)
1517		{
1518			glBindTexture(GL_TEXTURE_2D, m_texture[i]);
1519			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
1520			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
1521			glTexStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, kWidth, kHeight);
1522			glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, kWidth, kHeight, GL_RED_INTEGER, GL_UNSIGNED_INT, &data[0]);
1523		}
1524		glBindTexture(GL_TEXTURE_2D, 0);
1525
1526		glBindImageTexture(0, m_texture[0], 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
1527		glBindImageTexture(1, m_texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
1528		glUseProgram(m_program);
1529		if (dispatch_indirect)
1530		{
1531			if (m_dispatch_buffer == 0)
1532				glGenBuffers(1, &m_dispatch_buffer);
1533			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1534			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1535			glDispatchComputeIndirect(0);
1536		}
1537		else
1538		{
1539			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1540		}
1541		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
1542
1543		glClear(GL_COLOR_BUFFER_BIT);
1544		glActiveTexture(GL_TEXTURE0);
1545		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
1546		glActiveTexture(GL_TEXTURE1);
1547		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
1548		glUseProgram(m_draw_program);
1549		glBindVertexArray(m_vertex_array);
1550		glViewport(0, 0, kWidth, kHeight);
1551		glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, 1);
1552
1553		std::vector<vec4>	display(kWidth * kHeight);
1554		std::vector<GLubyte> colorData(kWidth * kHeight * 4);
1555		glReadPixels(0, 0, kWidth, kHeight, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
1556		glDeleteTextures(2, m_texture);
1557
1558		for (int i = 0; i < kWidth * kHeight * 4; i += 4)
1559		{
1560			display[i / 4] =
1561				vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
1562					 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
1563		}
1564
1565		/* As the colors are converted R8->Rx and then read back as Rx->R8,
1566		 need to add both conversions to the epsilon. */
1567		vec4 kColorEps = g_color_eps;
1568		kColorEps.x() += 1.f / ((1 << 8) - 1.0f);
1569		for (int y = 0; y < kHeight; ++y)
1570		{
1571			for (int x = 0; x < kWidth; ++x)
1572			{
1573				if (y >= getWindowHeight() || x >= getWindowWidth())
1574				{
1575					continue;
1576				}
1577				const vec4 c = vec4(float(y + x) / 255.0f, 1.0f, 1.0f, 1.0f);
1578				if (!ColorEqual(display[y * kWidth + x], c, kColorEps))
1579				{
1580					m_context.getTestContext().getLog()
1581						<< tcu::TestLog::Message << "Got red: " << display[y * kWidth + x].x() << ", expected " << c.x()
1582						<< ", at (" << x << ", " << y << ")" << tcu::TestLog::EndMessage;
1583					return false;
1584				}
1585			}
1586		}
1587
1588		return true;
1589	}
1590
1591	virtual long Setup()
1592	{
1593		m_program = 0;
1594		memset(m_texture, 0, sizeof(m_texture));
1595		m_dispatch_buffer = 0;
1596		return NO_ERROR;
1597	}
1598
1599	virtual long Run()
1600	{
1601
1602		const char* const glsl_vs =
1603			NL "const vec2 g_quad[] = vec2[](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));" NL "void main() {" NL
1604			   "  gl_Position = vec4(g_quad[gl_VertexID], 0, 1);" NL "}";
1605
1606		const char* glsl_fs =
1607			NL "layout(location = 0) out mediump vec4 o_color;" NL "uniform mediump usampler2D g_image1;" NL
1608			   "uniform mediump usampler2D g_image2;" NL "void main() {" NL
1609			   "  mediump uvec4 c1 = texelFetch(g_image1, ivec2(gl_FragCoord.xy), 0);" NL
1610			   "  mediump uvec4 c2 = texelFetch(g_image2, ivec2(gl_FragCoord.xy), 0);" NL
1611			   "  if (c1 == c2) o_color = vec4(float(c1.x)/255.0, 1.0, 1.0, 1.0);" NL
1612			   "  else o_color = vec4(1, 0, 0, 1);" NL "}";
1613
1614		m_draw_program = CreateProgram(glsl_vs, glsl_fs);
1615		glLinkProgram(m_draw_program);
1616		if (!CheckProgram(m_draw_program))
1617			return ERROR;
1618
1619		glUseProgram(m_draw_program);
1620		glUniform1i(glGetUniformLocation(m_draw_program, "g_image1"), 0);
1621		glUniform1i(glGetUniformLocation(m_draw_program, "g_image2"), 1);
1622		glUseProgram(0);
1623
1624		glGenVertexArrays(1, &m_vertex_array);
1625
1626		if (!RunIteration(uvec3(8, 16, 1), uvec3(8, 4, 1), true))
1627			return ERROR;
1628		if (!RunIteration(uvec3(4, 32, 1), uvec3(16, 2, 1), false))
1629			return ERROR;
1630		if (!RunIteration(uvec3(16, 4, 1), uvec3(4, 16, 1), false))
1631			return ERROR;
1632		if (!RunIteration(uvec3(8, 8, 1), uvec3(8, 8, 1), true))
1633			return ERROR;
1634
1635		return NO_ERROR;
1636	}
1637
1638	virtual long Cleanup()
1639	{
1640		glUseProgram(0);
1641		glDeleteProgram(m_program);
1642		glDeleteProgram(m_draw_program);
1643		glDeleteVertexArrays(1, &m_vertex_array);
1644		glDeleteTextures(2, m_texture);
1645		glDeleteBuffers(1, &m_dispatch_buffer);
1646		glViewport(0, 0, getWindowWidth(), getWindowHeight());
1647		return NO_ERROR;
1648	}
1649};
1650
1651class BasicResourceAtomicCounter : public ComputeShaderBase
1652{
1653
1654	virtual std::string Title()
1655	{
1656		return "Compute Shader resources - Atomic Counters";
1657	}
1658
1659	virtual std::string Purpose()
1660	{
1661		return NL
1662			"1. Verify that Atomic Counters work as expected in CS." NL
1663			"2. Verify that built-in functions: atomicCounterIncrement and atomicCounterDecrement work correctly.";
1664	}
1665
1666	virtual std::string Method()
1667	{
1668		return NL
1669			"1. Create CS which uses two atomic_uint variables." NL
1670			"2. In CS write values returned by atomicCounterIncrement and atomicCounterDecrement functions to SSBO." NL
1671			"3. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL "4. Verify SSBO content." NL
1672			"5. Repeat for different buffer and CS work sizes.";
1673	}
1674
1675	virtual std::string PassCriteria()
1676	{
1677		return "Everything works as expected.";
1678	}
1679
1680	GLuint m_program;
1681	GLuint m_storage_buffer;
1682	GLuint m_counter_buffer;
1683	GLuint m_dispatch_buffer;
1684
1685	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
1686	{
1687		const uvec3		  global_size = local_size * num_groups;
1688		std::stringstream ss;
1689		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
1690		   << ", local_size_z = " << local_size.z()
1691		   << ") in;" NL "layout(std430, binding = 0) buffer Output {" NL "  uint inc_data["
1692		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uint dec_data["
1693		   << global_size.x() * global_size.y() * global_size.z()
1694		   << "];" NL "};" NL "layout(binding = 0, offset = 0) uniform atomic_uint g_inc_counter;" NL
1695			  "layout(binding = 0, offset = 4) uniform atomic_uint g_dec_counter;" NL "void main() {" NL
1696			  "  uint index = atomicCounterIncrement(g_inc_counter);" NL "  inc_data[index] = index;" NL
1697			  "  dec_data[index] = atomicCounterDecrement(g_dec_counter);" NL "}";
1698		return ss.str();
1699	}
1700
1701	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
1702	{
1703		if (m_program != 0)
1704			glDeleteProgram(m_program);
1705		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
1706		glLinkProgram(m_program);
1707		if (!CheckProgram(m_program))
1708			return false;
1709
1710		const GLint  kWidth  = static_cast<GLint>(local_size.x() * num_groups.x());
1711		const GLint  kHeight = static_cast<GLint>(local_size.y() * num_groups.y());
1712		const GLint  kDepth  = static_cast<GLint>(local_size.z() * num_groups.z());
1713		const GLuint kSize   = kWidth * kHeight * kDepth;
1714
1715		if (m_storage_buffer == 0)
1716			glGenBuffers(1, &m_storage_buffer);
1717		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1718		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kSize * 2, NULL, GL_DYNAMIC_DRAW);
1719		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1720
1721		if (m_counter_buffer == 0)
1722			glGenBuffers(1, &m_counter_buffer);
1723
1724		glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counter_buffer);
1725		glBufferData(GL_ATOMIC_COUNTER_BUFFER, 2 * sizeof(GLuint), NULL, GL_STREAM_DRAW);
1726		*static_cast<GLuint*>(glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), GL_MAP_WRITE_BIT)) = 0;
1727		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1728		*static_cast<GLuint*>(
1729			glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, sizeof(GLuint), sizeof(GLuint), GL_MAP_WRITE_BIT)) = kSize;
1730		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1731
1732		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0);
1733
1734		glUseProgram(m_program);
1735		if (dispatch_indirect)
1736		{
1737			if (m_dispatch_buffer == 0)
1738				glGenBuffers(1, &m_dispatch_buffer);
1739			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
1740			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
1741			glDispatchComputeIndirect(0);
1742		}
1743		else
1744		{
1745			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
1746		}
1747
1748		GLuint* data;
1749		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
1750		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1751		data = static_cast<GLuint*>(
1752			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kSize, GL_MAP_READ_BIT));
1753
1754		bool ret = true;
1755		for (GLuint i = 0; i < kSize; ++i)
1756		{
1757			if (data[i] != i)
1758			{
1759				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Value at index " << i << " is "
1760													<< data[i] << " should be " << i << tcu::TestLog::EndMessage;
1761				ret = false;
1762			}
1763		}
1764		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1765		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1766
1767		GLuint* value;
1768		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counter_buffer);
1769		value =
1770			static_cast<GLuint*>(glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, 2 * sizeof(GLuint), GL_MAP_READ_BIT));
1771		if (value[0] != kSize)
1772		{
1773			m_context.getTestContext().getLog()
1774				<< tcu::TestLog::Message << "Final atomic counter value (buffer 0, offset 0) is " << value[0]
1775				<< " should be " << kSize << tcu::TestLog::EndMessage;
1776			ret = false;
1777		}
1778		if (value[1] != 0)
1779		{
1780			m_context.getTestContext().getLog()
1781				<< tcu::TestLog::Message << "Final atomic counter value (buffer 0, offset 4) is " << value[1]
1782				<< " should be 0" << tcu::TestLog::EndMessage;
1783			ret = false;
1784		}
1785		glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
1786		glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, 0);
1787
1788		return ret;
1789	}
1790
1791	virtual long Setup()
1792	{
1793		m_program		  = 0;
1794		m_storage_buffer  = 0;
1795		m_counter_buffer  = 0;
1796		m_dispatch_buffer = 0;
1797		return NO_ERROR;
1798	}
1799
1800	virtual long Run()
1801	{
1802		if (!RunIteration(uvec3(4, 3, 2), uvec3(2, 3, 4), false))
1803			return ERROR;
1804		if (!RunIteration(uvec3(1, 1, 1), uvec3(1, 1, 1), true))
1805			return ERROR;
1806		if (!RunIteration(uvec3(1, 6, 1), uvec3(1, 1, 8), false))
1807			return ERROR;
1808		if (!RunIteration(uvec3(4, 1, 2), uvec3(10, 3, 4), true))
1809			return ERROR;
1810		return NO_ERROR;
1811	}
1812
1813	virtual long Cleanup()
1814	{
1815		glUseProgram(0);
1816		glDeleteProgram(m_program);
1817		glDeleteBuffers(1, &m_counter_buffer);
1818		glDeleteBuffers(1, &m_dispatch_buffer);
1819		glDeleteBuffers(1, &m_storage_buffer);
1820		return NO_ERROR;
1821	}
1822};
1823
1824class BasicResourceUniform : public ComputeShaderBase
1825{
1826
1827	virtual std::string Title()
1828	{
1829		return "Compute Shader resources - Uniforms";
1830	}
1831
1832	virtual std::string Purpose()
1833	{
1834		return NL "1. Verify that all types of uniform variables work as expected in CS." NL
1835				  "2. Verify that uniform variables can be updated with Uniform* commands.";
1836	}
1837
1838	virtual std::string Method()
1839	{
1840		return NL "1. Create CS which uses all (single precision and integer) types of uniform variables." NL
1841				  "2. Update uniform variables with Uniform* commands." NL
1842				  "3. Verify that uniform variables were updated correctly.";
1843	}
1844
1845	virtual std::string PassCriteria()
1846	{
1847		return "Everything works as expected.";
1848	}
1849
1850	GLuint m_program;
1851	GLuint m_storage_buffer;
1852
1853	virtual long Setup()
1854	{
1855		m_program		 = 0;
1856		m_storage_buffer = 0;
1857		return NO_ERROR;
1858	}
1859
1860	virtual long Run()
1861	{
1862		const char* const glsl_cs = NL
1863			"layout(local_size_x = 1) in;" NL "buffer Result {" NL "  int g_result;" NL "};" NL "uniform float g_0;" NL
1864			"uniform vec2 g_1;" NL "uniform vec3 g_2;" NL "uniform vec4 g_3;" NL "uniform mat2 g_4;" NL
1865			"uniform mat2x3 g_5;" NL "uniform mat2x4 g_6;" NL "uniform mat3x2 g_7;" NL "uniform mat3 g_8;" NL
1866			"uniform mat3x4 g_9;" NL "uniform mat4x2 g_10;" NL "uniform mat4x3 g_11;" NL "uniform mat4 g_12;" NL
1867			"uniform int g_13;" NL "uniform ivec2 g_14;" NL "uniform ivec3 g_15;" NL "uniform ivec4 g_16;" NL
1868			"uniform uint g_17;" NL "uniform uvec2 g_18;" NL "uniform uvec3 g_19;" NL "uniform uvec4 g_20;" NL NL
1869			"void main() {" NL "  g_result = 1;" NL NL "  if (g_0 != 1.0) g_result = 0;" NL
1870			"  if (g_1 != vec2(2.0, 3.0)) g_result = 0;" NL "  if (g_2 != vec3(4.0, 5.0, 6.0)) g_result = 0;" NL
1871			"  if (g_3 != vec4(7.0, 8.0, 9.0, 10.0)) g_result = 0;" NL NL
1872			"  if (g_4 != mat2(11.0, 12.0, 13.0, 14.0)) g_result = 0;" NL
1873			"  if (g_5 != mat2x3(15.0, 16.0, 17.0, 18.0, 19.0, 20.0)) g_result = 0;" NL
1874			"  if (g_6 != mat2x4(21.0, 22.0, 23.0, 24.0, 25.0, 26.0, 27.0, 28.0)) g_result = 0;" NL NL
1875			"  if (g_7 != mat3x2(29.0, 30.0, 31.0, 32.0, 33.0, 34.0)) g_result = 0;" NL
1876			"  if (g_8 != mat3(35.0, 36.0, 37.0, 38.0, 39.0, 40.0, 41.0, 42.0, 43.0)) g_result = 0;" NL
1877			"  if (g_9 != mat3x4(44.0, 45.0, 46.0, 47.0, 48.0, 49.0, 50.0, 51.0, 52.0, 53.0, 54.0, 55.0)) g_result = "
1878			"0;" NL NL "  if (g_10 != mat4x2(56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0)) g_result = 0;" NL
1879			"  if (g_11 != mat4x3(63.0, 64.0, 65.0, 66.0, 67.0, 68.0, 69.0, 70.0, 71.0, 27.0, 73, 74.0)) g_result = "
1880			"0;" NL "  if (g_12 != mat4(75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 84.0, 85.0, 86.0, 87.0, "
1881			"88.0, 89.0, 90.0)) g_result = 0;" NL NL "  if (g_13 != 91) g_result = 0;" NL
1882			"  if (g_14 != ivec2(92, 93)) g_result = 0;" NL "  if (g_15 != ivec3(94, 95, 96)) g_result = 0;" NL
1883			"  if (g_16 != ivec4(97, 98, 99, 100)) g_result = 0;" NL NL "  if (g_17 != 101u) g_result = 0;" NL
1884			"  if (g_18 != uvec2(102u, 103u)) g_result = 0;" NL
1885			"  if (g_19 != uvec3(104u, 105u, 106u)) g_result = 0;" NL
1886			"  if (g_20 != uvec4(107u, 108u, 109u, 110u)) g_result = 0;" NL "}";
1887
1888		m_program = CreateComputeProgram(glsl_cs);
1889		glLinkProgram(m_program);
1890		glUseProgram(m_program);
1891		if (!CheckProgram(m_program))
1892			return ERROR;
1893
1894		glGenBuffers(1, &m_storage_buffer);
1895		/* create buffer */
1896		{
1897			const int data = 123;
1898			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
1899			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
1900		}
1901
1902		glUniform1f(glGetUniformLocation(m_program, "g_0"), 1.0f);
1903		glUniform2f(glGetUniformLocation(m_program, "g_1"), 2.0f, 3.0f);
1904		glUniform3f(glGetUniformLocation(m_program, "g_2"), 4.0f, 5.0f, 6.0f);
1905		glUniform4f(glGetUniformLocation(m_program, "g_3"), 7.0f, 8.0f, 9.0f, 10.0f);
1906
1907		/* mat2 */
1908		{
1909			const GLfloat value[4] = { 11.0f, 12.0f, 13.0f, 14.0f };
1910			glUniformMatrix2fv(glGetUniformLocation(m_program, "g_4"), 1, GL_FALSE, value);
1911		}
1912		/* mat2x3 */
1913		{
1914			const GLfloat value[6] = { 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f };
1915			glUniformMatrix2x3fv(glGetUniformLocation(m_program, "g_5"), 1, GL_FALSE, value);
1916		}
1917		/* mat2x4 */
1918		{
1919			const GLfloat value[8] = { 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f };
1920			glUniformMatrix2x4fv(glGetUniformLocation(m_program, "g_6"), 1, GL_FALSE, value);
1921		}
1922
1923		/* mat3x2 */
1924		{
1925			const GLfloat value[6] = { 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f };
1926			glUniformMatrix3x2fv(glGetUniformLocation(m_program, "g_7"), 1, GL_FALSE, value);
1927		}
1928		/* mat3 */
1929		{
1930			const GLfloat value[9] = { 35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f };
1931			glUniformMatrix3fv(glGetUniformLocation(m_program, "g_8"), 1, GL_FALSE, value);
1932		}
1933		/* mat3x4 */
1934		{
1935			const GLfloat value[12] = { 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f,
1936										50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f };
1937			glUniformMatrix3x4fv(glGetUniformLocation(m_program, "g_9"), 1, GL_FALSE, value);
1938		}
1939
1940		/* mat4x2 */
1941		{
1942			const GLfloat value[8] = { 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f };
1943			glUniformMatrix4x2fv(glGetUniformLocation(m_program, "g_10"), 1, GL_FALSE, value);
1944		}
1945		/* mat4x3 */
1946		{
1947			const GLfloat value[12] = {
1948				63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 27.0f, 73, 74.0f
1949			};
1950			glUniformMatrix4x3fv(glGetUniformLocation(m_program, "g_11"), 1, GL_FALSE, value);
1951		}
1952		/* mat4 */
1953		{
1954			const GLfloat value[16] = { 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f,
1955										83.0f, 84.0f, 85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f };
1956			glUniformMatrix4fv(glGetUniformLocation(m_program, "g_12"), 1, GL_FALSE, value);
1957		}
1958
1959		glUniform1i(glGetUniformLocation(m_program, "g_13"), 91);
1960		glUniform2i(glGetUniformLocation(m_program, "g_14"), 92, 93);
1961		glUniform3i(glGetUniformLocation(m_program, "g_15"), 94, 95, 96);
1962		glUniform4i(glGetUniformLocation(m_program, "g_16"), 97, 98, 99, 100);
1963
1964		glUniform1ui(glGetUniformLocation(m_program, "g_17"), 101);
1965		glUniform2ui(glGetUniformLocation(m_program, "g_18"), 102, 103);
1966		glUniform3ui(glGetUniformLocation(m_program, "g_19"), 104, 105, 106);
1967		glUniform4ui(glGetUniformLocation(m_program, "g_20"), 107, 108, 109, 110);
1968
1969		glDispatchCompute(1, 1, 1);
1970		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
1971
1972		long error = NO_ERROR;
1973		/* validate */
1974		{
1975			int* data;
1976			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
1977			if (data[0] != 1)
1978			{
1979				m_context.getTestContext().getLog()
1980					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 1." << tcu::TestLog::EndMessage;
1981				error = ERROR;
1982			}
1983			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
1984			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
1985		}
1986
1987		return error;
1988	}
1989
1990	virtual long Cleanup()
1991	{
1992		glUseProgram(0);
1993		glDeleteProgram(m_program);
1994		glDeleteBuffers(1, &m_storage_buffer);
1995		return NO_ERROR;
1996	}
1997};
1998
1999class BasicBuiltinVariables : public ComputeShaderBase
2000{
2001
2002	virtual std::string Title()
2003	{
2004		return "CS built-in variables";
2005	}
2006
2007	virtual std::string Purpose()
2008	{
2009		return NL "Verify that all (gl_WorkGroupSize, gl_WorkGroupID, gl_LocalInvocationID," NL
2010				  "gl_GlobalInvocationID, gl_NumWorkGroups, gl_WorkGroupSize)" NL
2011				  "CS built-in variables has correct values.";
2012	}
2013
2014	virtual std::string Method()
2015	{
2016		return NL "1. Create CS which writes all built-in variables to SSBO." NL
2017				  "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
2018				  "3. Verify SSBO content." NL "4. Repeat for several different local and global work sizes.";
2019	}
2020
2021	virtual std::string PassCriteria()
2022	{
2023		return "Everything works as expected.";
2024	}
2025
2026	GLuint m_program;
2027	GLuint m_storage_buffer;
2028	GLuint m_dispatch_buffer;
2029
2030	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
2031	{
2032		const uvec3		  global_size = local_size * num_groups;
2033		std::stringstream ss;
2034		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
2035		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
2036		   << ", " << global_size.y() << ", " << global_size.z()
2037		   << ");" NL "layout(std430) buffer OutputBuffer {" NL "  uvec4 num_work_groups["
2038		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 work_group_size["
2039		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 work_group_id["
2040		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 local_invocation_id["
2041		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 global_invocation_id["
2042		   << global_size.x() * global_size.y() * global_size.z() << "];" NL "  uvec4 local_invocation_index["
2043		   << global_size.x() * global_size.y() * global_size.z()
2044		   << "];" NL "} g_out_buffer;" NL "void main() {" NL
2045			  "  if ((gl_WorkGroupSize * gl_WorkGroupID + gl_LocalInvocationID) != gl_GlobalInvocationID) return;" NL
2046			  "  uint global_index = gl_GlobalInvocationID.x +" NL
2047			  "                      gl_GlobalInvocationID.y * kGlobalSize.x +" NL
2048			  "                      gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
2049			  "  g_out_buffer.num_work_groups[global_index] = uvec4(gl_NumWorkGroups, 0);" NL
2050			  "  g_out_buffer.work_group_size[global_index] = uvec4(gl_WorkGroupSize, 0);" NL
2051			  "  g_out_buffer.work_group_id[global_index] = uvec4(gl_WorkGroupID, 0);" NL
2052			  "  g_out_buffer.local_invocation_id[global_index] = uvec4(gl_LocalInvocationID, 0);" NL
2053			  "  g_out_buffer.global_invocation_id[global_index] = uvec4(gl_GlobalInvocationID, 0);" NL
2054			  "  g_out_buffer.local_invocation_index[global_index] = uvec4(gl_LocalInvocationIndex);" NL "}";
2055		return ss.str();
2056	}
2057
2058	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
2059	{
2060		if (m_program != 0)
2061			glDeleteProgram(m_program);
2062		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
2063		glLinkProgram(m_program);
2064		if (!CheckProgram(m_program))
2065			return false;
2066
2067		const GLuint kBufferSize =
2068			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
2069
2070		std::vector<uvec4> data(kBufferSize * 6);
2071		if (m_storage_buffer == 0)
2072			glGenBuffers(1, &m_storage_buffer);
2073		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2074		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uvec4) * kBufferSize * 6, &data[0], GL_DYNAMIC_DRAW);
2075		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2076
2077		glUseProgram(m_program);
2078		if (dispatch_indirect)
2079		{
2080			if (m_dispatch_buffer == 0)
2081				glGenBuffers(1, &m_dispatch_buffer);
2082			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
2083			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
2084			glDispatchComputeIndirect(0);
2085		}
2086		else
2087		{
2088			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
2089		}
2090
2091		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2092		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2093		uvec4* result;
2094		result = static_cast<uvec4*>(
2095			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(uvec4) * kBufferSize * 6, GL_MAP_READ_BIT));
2096
2097		// gl_NumWorkGroups
2098		for (GLuint index = 0; index < kBufferSize; ++index)
2099		{
2100			if (!IsEqual(result[index], uvec4(num_groups.x(), num_groups.y(), num_groups.z(), 0)))
2101			{
2102				m_context.getTestContext().getLog()
2103					<< tcu::TestLog::Message << "gl_NumWorkGroups: Invalid data at index " << index
2104					<< tcu::TestLog::EndMessage;
2105				return false;
2106			}
2107		}
2108		// gl_WorkGroupSize
2109		for (GLuint index = kBufferSize; index < 2 * kBufferSize; ++index)
2110		{
2111			if (!IsEqual(result[index], uvec4(local_size.x(), local_size.y(), local_size.z(), 0)))
2112			{
2113				m_context.getTestContext().getLog()
2114					<< tcu::TestLog::Message << "gl_WorkGroupSize: Invalid data at index " << index
2115					<< tcu::TestLog::EndMessage;
2116				return false;
2117			}
2118		}
2119		// gl_WorkGroupID
2120		for (GLuint index = 2 * kBufferSize; index < 3 * kBufferSize; ++index)
2121		{
2122			uvec3 expected = IndexTo3DCoord(index - 2 * kBufferSize, local_size.x() * num_groups.x(),
2123											local_size.y() * num_groups.y());
2124			expected.x() /= local_size.x();
2125			expected.y() /= local_size.y();
2126			expected.z() /= local_size.z();
2127			if (!IsEqual(result[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
2128			{
2129				m_context.getTestContext().getLog()
2130					<< tcu::TestLog::Message << "gl_WorkGroupSize: Invalid data at index " << index
2131					<< tcu::TestLog::EndMessage;
2132				return false;
2133			}
2134		}
2135		// gl_LocalInvocationID
2136		for (GLuint index = 3 * kBufferSize; index < 4 * kBufferSize; ++index)
2137		{
2138			uvec3 expected = IndexTo3DCoord(index - 3 * kBufferSize, local_size.x() * num_groups.x(),
2139											local_size.y() * num_groups.y());
2140			expected.x() %= local_size.x();
2141			expected.y() %= local_size.y();
2142			expected.z() %= local_size.z();
2143			if (!IsEqual(result[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
2144			{
2145				m_context.getTestContext().getLog()
2146					<< tcu::TestLog::Message << "gl_LocalInvocationID: Invalid data at index " << index
2147					<< tcu::TestLog::EndMessage;
2148				return false;
2149			}
2150		}
2151		// gl_GlobalInvocationID
2152		for (GLuint index = 4 * kBufferSize; index < 5 * kBufferSize; ++index)
2153		{
2154			uvec3 expected = IndexTo3DCoord(index - 4 * kBufferSize, local_size.x() * num_groups.x(),
2155											local_size.y() * num_groups.y());
2156			if (!IsEqual(result[index], uvec4(expected.x(), expected.y(), expected.z(), 0)))
2157			{
2158				m_context.getTestContext().getLog()
2159					<< tcu::TestLog::Message << "gl_GlobalInvocationID: Invalid data at index " << index
2160					<< tcu::TestLog::EndMessage;
2161				return false;
2162			}
2163		}
2164		// gl_LocalInvocationIndex
2165		for (GLuint index = 5 * kBufferSize; index < 6 * kBufferSize; ++index)
2166		{
2167			uvec3 coord = IndexTo3DCoord(index - 5 * kBufferSize, local_size.x() * num_groups.x(),
2168										 local_size.y() * num_groups.y());
2169			const GLuint expected = (coord.x() % local_size.x()) + (coord.y() % local_size.y()) * local_size.x() +
2170									(coord.z() % local_size.z()) * local_size.x() * local_size.y();
2171			if (!IsEqual(result[index], uvec4(expected)))
2172			{
2173				m_context.getTestContext().getLog()
2174					<< tcu::TestLog::Message << "gl_LocalInvocationIndex: Invalid data at index " << index
2175					<< tcu::TestLog::EndMessage;
2176				return false;
2177			}
2178		}
2179		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2180		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2181		return true;
2182	}
2183
2184	virtual long Setup()
2185	{
2186		m_program		  = 0;
2187		m_storage_buffer  = 0;
2188		m_dispatch_buffer = 0;
2189		return NO_ERROR;
2190	}
2191
2192	virtual long Run()
2193	{
2194		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
2195			return ERROR;
2196		if (!RunIteration(uvec3(1, 1, 64), uvec3(1, 5, 2), true))
2197			return ERROR;
2198		if (!RunIteration(uvec3(1, 1, 4), uvec3(2, 2, 2), false))
2199			return ERROR;
2200		if (!RunIteration(uvec3(3, 2, 1), uvec3(1, 2, 3), true))
2201			return ERROR;
2202		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
2203			return ERROR;
2204		if (!RunIteration(uvec3(2, 4, 7), uvec3(2, 1, 4), true))
2205			return ERROR;
2206		return NO_ERROR;
2207	}
2208
2209	virtual long Cleanup()
2210	{
2211		glUseProgram(0);
2212		glDeleteProgram(m_program);
2213		glDeleteBuffers(1, &m_storage_buffer);
2214		glDeleteBuffers(1, &m_dispatch_buffer);
2215		return NO_ERROR;
2216	}
2217};
2218
2219class BasicMax : public ComputeShaderBase
2220{
2221
2222	virtual std::string Title()
2223	{
2224		return NL "CS max values";
2225	}
2226
2227	virtual std::string Purpose()
2228	{
2229		return NL "Verify (on the API and GLSL side) that all GL_MAX_COMPUTE_* values are not less than" NL
2230				  "required by the OpenGL specification.";
2231	}
2232
2233	virtual std::string Method()
2234	{
2235		return NL "1. Use all API commands to query all GL_MAX_COMPUTE_* values. Verify that they are correct." NL
2236				  "2. Verify all gl_MaxCompute* constants in the GLSL.";
2237	}
2238
2239	virtual std::string PassCriteria()
2240	{
2241		return NL "Everything works as expected.";
2242	}
2243
2244	GLuint m_program;
2245	GLuint m_buffer;
2246
2247	bool CheckIndexed(GLenum target, const GLint* min_values)
2248	{
2249		GLint   i;
2250		GLint64 i64;
2251
2252		for (GLuint c = 0; c < 3; c++)
2253		{
2254			glGetIntegeri_v(target, c, &i);
2255			if (i < min_values[c])
2256			{
2257				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i << " should be at least "
2258													<< min_values[c] << tcu::TestLog::EndMessage;
2259				return false;
2260			}
2261		}
2262		for (GLuint c = 0; c < 3; c++)
2263		{
2264			glGetInteger64i_v(target, c, &i64);
2265			if (static_cast<GLint>(i64) < min_values[c])
2266			{
2267				m_context.getTestContext().getLog()
2268					<< tcu::TestLog::Message << "Is " << static_cast<GLint>(i64) << " should be at least "
2269					<< min_values[c] << tcu::TestLog::EndMessage;
2270				return false;
2271			}
2272		}
2273
2274		return true;
2275	}
2276
2277	bool Check(GLenum target, const GLint min_value)
2278	{
2279		GLint	 i;
2280		GLint64   i64;
2281		GLfloat   f;
2282		GLboolean b;
2283
2284		glGetIntegerv(target, &i);
2285		if (i < min_value)
2286		{
2287			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i << " should be at least "
2288												<< min_value << tcu::TestLog::EndMessage;
2289			return false;
2290		}
2291		glGetInteger64v(target, &i64);
2292		if (i64 < static_cast<GLint64>(min_value))
2293		{
2294			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << i64
2295												<< " should be at least " << static_cast<GLint64>(min_value) << tcu::TestLog::EndMessage;
2296			return false;
2297		}
2298		glGetFloatv(target, &f);
2299		if (f < static_cast<GLfloat>(min_value))
2300		{
2301			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is " << f
2302												<< " should be at least " << static_cast<GLfloat>(min_value) << tcu::TestLog::EndMessage;
2303			return false;
2304		}
2305		glGetBooleanv(target, &b);
2306		if (b == GL_FALSE)
2307		{
2308			m_context.getTestContext().getLog() << tcu::TestLog::Message << "Is GL_FALSE should be at least GL_TRUE."
2309												<< min_value << tcu::TestLog::EndMessage;
2310			return false;
2311		}
2312
2313		return true;
2314	}
2315
2316	virtual long Setup()
2317	{
2318		m_program = 0;
2319		m_buffer  = 0;
2320		return NO_ERROR;
2321	}
2322
2323	virtual long Run()
2324	{
2325		const GLint work_group_count[3] = { 65535, 65535, 65535 };
2326		if (!CheckIndexed(GL_MAX_COMPUTE_WORK_GROUP_COUNT, work_group_count))
2327			return ERROR;
2328
2329		const GLint work_group_size[3] = { 128, 128, 64 };
2330		if (!CheckIndexed(GL_MAX_COMPUTE_WORK_GROUP_SIZE, work_group_size))
2331			return ERROR;
2332
2333		if (!Check(GL_MAX_COMPUTE_UNIFORM_BLOCKS, 12))
2334			return ERROR;
2335		if (!Check(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, 16))
2336			return ERROR;
2337		if (!Check(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, 1))
2338			return ERROR;
2339		if (!Check(GL_MAX_COMPUTE_ATOMIC_COUNTERS, 8))
2340			return ERROR;
2341		if (!Check(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, 16384))
2342			return ERROR;
2343		if (!Check(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, 512))
2344			return ERROR;
2345		if (!Check(GL_MAX_COMPUTE_IMAGE_UNIFORMS, 4))
2346			return ERROR;
2347		if (!Check(GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS, 512))
2348			return ERROR;
2349
2350		const char* const glsl_cs =
2351			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  int g_output;" NL "};" NL
2352			   "uniform ivec3 MaxComputeWorkGroupCount;" NL "uniform ivec3 MaxComputeWorkGroupSize;" NL
2353			   "uniform int MaxComputeUniformComponents;" NL "uniform int MaxComputeTextureImageUnits;" NL
2354			   "uniform int MaxComputeImageUniforms;" NL "uniform int MaxComputeAtomicCounters;" NL
2355			   "uniform int MaxComputeAtomicCounterBuffers;" NL "void main() {" NL "  g_output = 1;" NL
2356			   "  if (MaxComputeWorkGroupCount != gl_MaxComputeWorkGroupCount) g_output = 0;" NL
2357			   "  if (MaxComputeWorkGroupSize != gl_MaxComputeWorkGroupSize) g_output = 0;" NL
2358			   "  if (MaxComputeUniformComponents != gl_MaxComputeUniformComponents) g_output = 0;" NL
2359			   "  if (MaxComputeTextureImageUnits != gl_MaxComputeTextureImageUnits) g_output = 0;" NL
2360			   "  if (MaxComputeImageUniforms != gl_MaxComputeImageUniforms) g_output = 0;" NL
2361			   "  if (MaxComputeAtomicCounters != gl_MaxComputeAtomicCounters) g_output = 0;" NL
2362			   "  if (MaxComputeAtomicCounterBuffers != gl_MaxComputeAtomicCounterBuffers) g_output = 0;" NL "}";
2363		m_program = CreateComputeProgram(glsl_cs);
2364		glLinkProgram(m_program);
2365		if (!CheckProgram(m_program))
2366			return ERROR;
2367		glUseProgram(m_program);
2368
2369		GLint p[3];
2370		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &p[0]);
2371		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &p[1]);
2372		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &p[2]);
2373		glUniform3i(glGetUniformLocation(m_program, "MaxComputeWorkGroupCount"), p[0], p[1], p[2]);
2374
2375		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &p[0]);
2376		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 1, &p[1]);
2377		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 2, &p[2]);
2378		glUniform3iv(glGetUniformLocation(m_program, "MaxComputeWorkGroupSize"), 1, p);
2379
2380		glGetIntegerv(GL_MAX_COMPUTE_UNIFORM_COMPONENTS, p);
2381		glUniform1i(glGetUniformLocation(m_program, "MaxComputeUniformComponents"), p[0]);
2382
2383		glGetIntegerv(GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, p);
2384		glUniform1iv(glGetUniformLocation(m_program, "MaxComputeTextureImageUnits"), 1, p);
2385
2386		glGetIntegerv(GL_MAX_COMPUTE_IMAGE_UNIFORMS, p);
2387		glUniform1i(glGetUniformLocation(m_program, "MaxComputeImageUniforms"), p[0]);
2388
2389		glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTERS, p);
2390		glUniform1i(glGetUniformLocation(m_program, "MaxComputeAtomicCounters"), p[0]);
2391
2392		glGetIntegerv(GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS, p);
2393		glUniform1i(glGetUniformLocation(m_program, "MaxComputeAtomicCounterBuffers"), p[0]);
2394
2395		GLint data = 0xffff;
2396		glGenBuffers(1, &m_buffer);
2397		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
2398		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint), &data, GL_DYNAMIC_DRAW);
2399
2400		glDispatchCompute(1, 1, 1);
2401
2402		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2403
2404		GLint* result;
2405		result	 = static_cast<GLint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint), GL_MAP_READ_BIT));
2406		long error = NO_ERROR;
2407		if (result[0] != 1)
2408		{
2409			error = ERROR;
2410		}
2411
2412		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2413		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2414		return error;
2415	}
2416	virtual long Cleanup()
2417	{
2418		glUseProgram(0);
2419		glDeleteProgram(m_program);
2420		glDeleteBuffers(1, &m_buffer);
2421		return NO_ERROR;
2422	}
2423};
2424
2425class NegativeAttachShader : public ComputeShaderBase
2426{
2427
2428	virtual std::string Title()
2429	{
2430		return "Api Attach Shader";
2431	}
2432
2433	virtual std::string Purpose()
2434	{
2435		return NL "Verify that calling AttachShader with multiple shader objects of type COMPUTE_SHADER generates "
2436				  "INVALID_OPERATION.";
2437	}
2438
2439	virtual std::string Method()
2440	{
2441		return NL "Try to attach multiple shader objects of the same type and verify that proper error is generated.";
2442	}
2443
2444	virtual std::string PassCriteria()
2445	{
2446		return "INVALID_OPERATION is generated.";
2447	}
2448
2449	virtual long Run()
2450	{
2451		const char* const cs1[2] = { "#version 310 es", NL "layout(local_size_x = 1) in;" NL "void Run();" NL
2452														   "void main() {" NL "  Run();" NL "}" };
2453
2454		const char* const cs2 =
2455			"#version 310 es" NL "layout(binding = 0, std430) buffer Output {" NL "  vec4 g_output;" NL "};" NL
2456			"vec4 CalculateOutput();" NL "void Run() {" NL "  g_output = CalculateOutput();" NL "}";
2457
2458		const char* const cs3 =
2459			"#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(binding = 0, std430) buffer Output {" NL
2460			"  vec4 g_output;" NL "};" NL "vec4 CalculateOutput() {" NL "  g_output = vec4(0);" NL
2461			"  return vec4(1, 2, 3, 4);" NL "}";
2462
2463		const GLuint sh1 = glCreateShader(GL_COMPUTE_SHADER);
2464
2465		GLint type;
2466		glGetShaderiv(sh1, GL_SHADER_TYPE, &type);
2467		if (static_cast<GLenum>(type) != GL_COMPUTE_SHADER)
2468		{
2469			m_context.getTestContext().getLog()
2470				<< tcu::TestLog::Message << "SHADER_TYPE should be COMPUTE_SHADER." << tcu::TestLog::EndMessage;
2471			glDeleteShader(sh1);
2472			return false;
2473		}
2474
2475		glShaderSource(sh1, 2, cs1, NULL);
2476		glCompileShader(sh1);
2477
2478		const GLuint sh2 = glCreateShader(GL_COMPUTE_SHADER);
2479		glShaderSource(sh2, 1, &cs2, NULL);
2480		glCompileShader(sh2);
2481
2482		const GLuint sh3 = glCreateShader(GL_COMPUTE_SHADER);
2483		glShaderSource(sh3, 1, &cs3, NULL);
2484		glCompileShader(sh3);
2485
2486		const GLuint p = glCreateProgram();
2487		glAttachShader(p, sh1);
2488		glAttachShader(p, sh2);
2489		if (glGetError() != GL_INVALID_OPERATION)
2490		{
2491			m_context.getTestContext().getLog()
2492				<< tcu::TestLog::Message
2493				<< "GL_INVALID_OPERATION error expected after attaching shader of the same type."
2494				<< tcu::TestLog::EndMessage;
2495			return ERROR;
2496		}
2497		glAttachShader(p, sh3);
2498		if (glGetError() != GL_INVALID_OPERATION)
2499		{
2500			m_context.getTestContext().getLog()
2501				<< tcu::TestLog::Message
2502				<< "GL_INVALID_OPERATION error expected after attaching shader of the same type."
2503				<< tcu::TestLog::EndMessage;
2504			return ERROR;
2505		}
2506
2507		glDeleteShader(sh1);
2508		glDeleteShader(sh2);
2509		glDeleteShader(sh3);
2510
2511		glUseProgram(0);
2512		glDeleteProgram(p);
2513
2514		return NO_ERROR;
2515	}
2516};
2517
2518class BasicBuildSeparable : public ComputeShaderBase
2519{
2520
2521	virtual std::string Title()
2522	{
2523		return "Building CS separable program";
2524	}
2525
2526	virtual std::string Purpose()
2527	{
2528		return NL "1. Verify that building separable CS program works as expected." NL
2529				  "2. Verify that program consisting from 4 strings works as expected.";
2530	}
2531
2532	virtual std::string Method()
2533	{
2534		return NL "1. Create, compile and link CS using CreateShaderProgramv command." NL
2535				  "2. Dispatch and verify CS program.";
2536	}
2537
2538	virtual std::string PassCriteria()
2539	{
2540		return "Everything works as expected.";
2541	}
2542
2543	virtual long Run()
2544	{
2545		const char* const cs[4] = {
2546			"#version 310 es",
2547
2548			NL "layout(local_size_x = 1) in;" NL "void Run();" NL "void main() {" NL "  Run();" NL "}",
2549
2550			NL "layout(binding = 0, std430) buffer Output {" NL "  vec4 g_output;" NL "};" NL
2551			   "vec4 CalculateOutput();" NL "void Run() {" NL "  g_output = CalculateOutput();" NL "}",
2552
2553			NL "vec4 CalculateOutput() {" NL "  g_output = vec4(0);" NL "  return vec4(1, 2, 3, 4);" NL "}"
2554		};
2555
2556		const GLuint p   = glCreateShaderProgramv(GL_COMPUTE_SHADER, 4, cs);
2557		bool		 res = CheckProgram(p);
2558
2559		GLuint buffer;
2560		glGenBuffers(1, &buffer);
2561		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, buffer);
2562		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4), &vec4(0.0f)[0], GL_DYNAMIC_DRAW);
2563		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2564
2565		glUseProgram(p);
2566		glDispatchCompute(1, 1, 1);
2567
2568		vec4* data;
2569		glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
2570		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2571		data = static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), GL_MAP_READ_BIT));
2572		if (!IsEqual(data[0], vec4(1.0f, 2.0f, 3.0f, 4.0f)))
2573		{
2574			m_context.getTestContext().getLog()
2575				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
2576			res = false;
2577		}
2578		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2579
2580		glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), &vec4(0.0f)[0]);
2581
2582		GLuint pipeline;
2583		glGenProgramPipelines(1, &pipeline);
2584		glUseProgramStages(pipeline, GL_COMPUTE_SHADER_BIT, p);
2585
2586		glUseProgram(0);
2587		glBindProgramPipeline(pipeline);
2588		glDispatchCompute(1, 1, 1);
2589
2590		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2591		data = static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4), GL_MAP_READ_BIT));
2592
2593		if (!IsEqual(data[0], vec4(1.0f, 2.0f, 3.0f, 4.0f)))
2594		{
2595			m_context.getTestContext().getLog()
2596				<< tcu::TestLog::Message << "Invalid value!" << tcu::TestLog::EndMessage;
2597			res = false;
2598		}
2599
2600		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2601		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2602		glDeleteProgramPipelines(1, &pipeline);
2603		glDeleteBuffers(1, &buffer);
2604		glDeleteProgram(p);
2605
2606		return res == true ? NO_ERROR : ERROR;
2607	}
2608};
2609
2610class BasicSharedSimple : public ComputeShaderBase
2611{
2612	virtual std::string Title()
2613	{
2614		return "Shared Memory - simple usage";
2615	}
2616
2617	virtual std::string Purpose()
2618	{
2619		return NL "1. Verify that shared array of uints works as expected." NL
2620				  "2. Verify that shared memory written by one invocation is observable by other invocations" NL
2621				  "    when groupMemoryBarrier() and barrier() built-in functions are used.";
2622	}
2623
2624	virtual std::string Method()
2625	{
2626		return NL "1. Create and dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
2627				  "2. Verify results written by CS to SSBO." NL
2628				  "3. Repeat for several different number of work groups.";
2629	}
2630
2631	virtual std::string PassCriteria()
2632	{
2633		return "Everything works as expected.";
2634	}
2635
2636	GLuint m_program;
2637	GLuint m_storage_buffer;
2638	GLuint m_dispatch_buffer;
2639
2640	bool RunIteration(const GLuint num_groups, bool dispatch_indirect)
2641	{
2642		const GLuint kBufferSize = 128 * num_groups;
2643
2644		std::vector<GLuint> data(kBufferSize, 0xffff);
2645		if (m_storage_buffer == 0)
2646			glGenBuffers(1, &m_storage_buffer);
2647		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2648		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
2649		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2650
2651		glUseProgram(m_program);
2652		if (dispatch_indirect)
2653		{
2654			const GLuint groups[3] = { num_groups, 1, 1 };
2655			if (m_dispatch_buffer == 0)
2656				glGenBuffers(1, &m_dispatch_buffer);
2657			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
2658			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(groups), groups, GL_STATIC_DRAW);
2659			glDispatchComputeIndirect(0);
2660		}
2661		else
2662		{
2663			glDispatchCompute(num_groups, 1, 1);
2664		}
2665
2666		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2667		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2668		GLuint* result;
2669		result = static_cast<GLuint*>(
2670			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, GL_MAP_READ_BIT));
2671		bool res = true;
2672		for (GLuint i = 0; i < kBufferSize; ++i)
2673		{
2674			if (result[i] != 1)
2675			{
2676				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
2677													<< result[i] << " should be 1." << tcu::TestLog::EndMessage;
2678				res = false;
2679			}
2680		}
2681		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2682		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2683		return res;
2684	}
2685
2686	virtual long Setup()
2687	{
2688		m_program		  = 0;
2689		m_storage_buffer  = 0;
2690		m_dispatch_buffer = 0;
2691		return NO_ERROR;
2692	}
2693
2694	virtual long Run()
2695	{
2696		const char* const glsl_cs =
2697			NL "layout(local_size_x = 128) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
2698			   "shared uint g_shared_data[128];" NL "void main() {" NL
2699			   "  g_shared_data[gl_LocalInvocationID.x] = gl_LocalInvocationIndex;" NL
2700			   "  groupMemoryBarrier();" // flush memory stores
2701			NL "  barrier();"			 // wait for all stores to finish
2702			NL "  g_output[gl_GlobalInvocationID.x] = 1u;" NL "  if (gl_LocalInvocationIndex < 127u) {" NL
2703			   "    uint res = g_shared_data[gl_LocalInvocationID.x + "
2704			   "1u];" // load data from shared memory filled by other thread
2705			NL "    if (res != (gl_LocalInvocationIndex + 1u)) {" NL "      g_output[gl_GlobalInvocationID.x] = 0u;" NL
2706			   "    }" NL "  }" NL "}";
2707		m_program = CreateComputeProgram(glsl_cs);
2708		glLinkProgram(m_program);
2709		if (!CheckProgram(m_program))
2710			return ERROR;
2711
2712		if (!RunIteration(1, false))
2713			return ERROR;
2714		if (!RunIteration(8, true))
2715			return ERROR;
2716		if (!RunIteration(13, false))
2717			return ERROR;
2718		if (!RunIteration(7, true))
2719			return ERROR;
2720		return NO_ERROR;
2721	}
2722	virtual long Cleanup()
2723	{
2724		glUseProgram(0);
2725		glDeleteProgram(m_program);
2726		glDeleteBuffers(1, &m_storage_buffer);
2727		glDeleteBuffers(1, &m_dispatch_buffer);
2728		return NO_ERROR;
2729	}
2730};
2731
2732class BasicSharedStruct : public ComputeShaderBase
2733{
2734	virtual std::string Title()
2735	{
2736		return "Shared Memory - arrays and structers";
2737	}
2738
2739	virtual std::string Purpose()
2740	{
2741		return NL "1. Verify that vectors, matrices, structers and arrays of those can be used" NL
2742				  "    as a shared memory." NL
2743				  "2. Verify that shared memory can be indexed with constant values, built-in" NL
2744				  "    variables and dynamic expressions." NL
2745				  "3. Verify that memoryBarrierAtomicCounter(), memoryBarrierImage(), memoryBarrier()," NL
2746				  "     memoryBarrierBuffer() and memoryBarrierShared() built-in functions are accepted" NL
2747				  "     by the GLSL compiler.";
2748	}
2749
2750	virtual std::string Method()
2751	{
2752		return NL "1. Create and dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
2753				  "2. Verify results written by CS to SSBO.";
2754	}
2755
2756	virtual std::string PassCriteria()
2757	{
2758		return "Everything works as expected.";
2759	}
2760
2761	GLuint m_program;
2762	GLuint m_storage_buffer;
2763	GLuint m_dispatch_buffer;
2764
2765	bool RunIteration(bool dispatch_indirect)
2766	{
2767		const GLuint kBufferSize = 256;
2768
2769		std::vector<vec4> data(kBufferSize);
2770		if (m_storage_buffer == 0)
2771			glGenBuffers(1, &m_storage_buffer);
2772		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2773		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * kBufferSize, &data[0], GL_DYNAMIC_DRAW);
2774		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2775
2776		glUseProgram(m_program);
2777		if (dispatch_indirect)
2778		{
2779			const GLuint groups[3] = { 1, 1, 1 };
2780			if (m_dispatch_buffer == 0)
2781				glGenBuffers(1, &m_dispatch_buffer);
2782			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
2783			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(groups), groups, GL_STATIC_DRAW);
2784			glDispatchComputeIndirect(0);
2785		}
2786		else
2787		{
2788			glDispatchCompute(1, 1, 1);
2789		}
2790
2791		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
2792		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2793		vec4* result;
2794		result = static_cast<vec4*>(
2795			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * kBufferSize, GL_MAP_READ_BIT));
2796		bool res = true;
2797		for (GLuint i = 0; i < kBufferSize; ++i)
2798		{
2799			if (!IsEqual(result[i], vec4(static_cast<float>(i))))
2800			{
2801				m_context.getTestContext().getLog()
2802					<< tcu::TestLog::Message << "Invalid data at index " << i << tcu::TestLog::EndMessage;
2803				res = false;
2804			}
2805		}
2806		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2807		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2808		return res;
2809	}
2810
2811	virtual long Setup()
2812	{
2813		m_program		  = 0;
2814		m_storage_buffer  = 0;
2815		m_dispatch_buffer = 0;
2816		return NO_ERROR;
2817	}
2818
2819	virtual long Run()
2820	{
2821		const char* const glsl_cs = NL
2822			"layout(local_size_x = 128) in;" NL "layout(std430) buffer Output {" NL "  vec4 g_output[256];" NL "};" NL
2823			"struct SubData {" NL "  mat2x4 data;" NL "};" NL "struct Data {" NL "  vec3 data0;" NL "  uint index;" NL
2824			"  SubData data1;" NL "};" NL "shared Data g_shared_data[256];" NL "shared int g_shared_buf[2];" NL
2825			"void main() {" NL "  if (gl_LocalInvocationID.x == 0u) {" NL "    g_shared_buf[1] = 1;" NL
2826			"    g_shared_buf[1u + gl_LocalInvocationID.x] = 0;" NL "    g_shared_buf[0] = 128;" NL
2827			"    g_output[0] = vec4(g_shared_buf[1]);" NL "    g_output[128] = vec4(g_shared_buf[0]);" NL
2828			"    memoryBarrierBuffer();" // note: this call is not needed here, just check if compiler accepts it
2829			NL "  } else {" NL "    uint index = gl_LocalInvocationIndex;" NL
2830			"    g_shared_data[index].index = index;" NL "    g_shared_data[index + 128u].index = index + 128u;" NL
2831			"    g_shared_data[index].data1.data = mat2x4(0.0);" NL
2832			"    g_shared_data[index + 128u].data1.data = mat2x4(0.0);" NL
2833			"    g_output[index] = vec4(g_shared_data[index].index);" // load data from shared memory
2834			NL "    g_output[index + 128u] = vec4(g_shared_data[index + 128u].index);" NL
2835			"    memoryBarrierShared();" // note: this call is not needed here, just check if compiler accepts it
2836			NL "  }" NL "  memoryBarrierAtomicCounter();" NL "  memoryBarrierImage();" NL
2837			"  memoryBarrier();" // note: these calls are not needed here, just check if compiler accepts them
2838			NL "}";
2839		m_program = CreateComputeProgram(glsl_cs);
2840		glLinkProgram(m_program);
2841		if (!CheckProgram(m_program))
2842			return ERROR;
2843
2844		if (!RunIteration(false))
2845			return ERROR;
2846		if (!RunIteration(true))
2847			return ERROR;
2848		return NO_ERROR;
2849	}
2850
2851	virtual long Cleanup()
2852	{
2853		glUseProgram(0);
2854		glDeleteProgram(m_program);
2855		glDeleteBuffers(1, &m_storage_buffer);
2856		glDeleteBuffers(1, &m_dispatch_buffer);
2857		return NO_ERROR;
2858	}
2859};
2860
2861class BasicDispatchIndirect : public ComputeShaderBase
2862{
2863	virtual std::string Title()
2864	{
2865		return NL "DispatchComputeIndirect command";
2866	}
2867
2868	virtual std::string Purpose()
2869	{
2870		return NL
2871			"1. Verify that DispatchComputeIndirect command works as described in the OpenGL specification." NL
2872			"2. Verify that <offset> parameter is correctly applied." NL
2873			"3. Verify that updating dispatch buffer with different methods (BufferData, BufferSubData, MapBuffer)" NL
2874			"    just before DispatchComputeIndirect call works as expected." NL
2875			"4. Verify that GL_DISPATCH_INDIRECT_BUFFER_BINDING binding point is set correctly.";
2876	}
2877
2878	virtual std::string Method()
2879	{
2880		return NL
2881			"1. Create CS and dispatch indirect buffer." NL "2. Dispatch CS with DispatchComputeIndirect command." NL
2882			"3. Update dispatch indirect buffer." NL
2883			"4. Repeat several times updating dispatch buffer with different methods and changing <offset> parameter.";
2884	}
2885
2886	virtual std::string PassCriteria()
2887	{
2888		return NL "Everything works as expected.";
2889	}
2890
2891	GLuint m_program;
2892	GLuint m_storage_buffer;
2893	GLuint m_dispatch_buffer[2];
2894
2895	bool RunIteration(GLintptr offset, GLuint buffer_size)
2896	{
2897		std::vector<GLuint> data(buffer_size);
2898		if (m_storage_buffer == 0)
2899			glGenBuffers(1, &m_storage_buffer);
2900		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
2901		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * buffer_size, &data[0], GL_DYNAMIC_DRAW);
2902
2903		glDispatchComputeIndirect(offset);
2904
2905		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
2906		GLuint* result;
2907		result = static_cast<GLuint*>(
2908			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * buffer_size, GL_MAP_READ_BIT));
2909		bool res = true;
2910		for (GLuint i = 0; i < buffer_size; ++i)
2911		{
2912			if (result[i] != i)
2913			{
2914				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
2915													<< result[i] << " should be " << i << tcu::TestLog::EndMessage;
2916				res = false;
2917			}
2918		}
2919		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
2920		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
2921		return res;
2922	}
2923
2924	bool CheckBinding(GLuint expected)
2925	{
2926		GLint	 i;
2927		GLint64   i64;
2928		GLfloat   f;
2929		GLboolean b;
2930
2931		GLfloat expectedFloat = static_cast<GLfloat>(expected);
2932
2933		glGetIntegerv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &i);
2934		if (static_cast<GLuint>(i) != expected)
2935		{
2936			return false;
2937		}
2938		glGetInteger64v(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &i64);
2939		if (static_cast<GLuint>(i64) != expected)
2940		{
2941			return false;
2942		}
2943		glGetFloatv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &f);
2944		if (f != expectedFloat)
2945		{
2946			return false;
2947		}
2948		glGetBooleanv(GL_DISPATCH_INDIRECT_BUFFER_BINDING, &b);
2949		if (b != (expected != 0 ? GL_TRUE : GL_FALSE))
2950		{
2951			return false;
2952		}
2953
2954		return true;
2955	}
2956
2957	virtual long Setup()
2958	{
2959		m_program		 = 0;
2960		m_storage_buffer = 0;
2961		memset(m_dispatch_buffer, 0, sizeof(m_dispatch_buffer));
2962		return NO_ERROR;
2963	}
2964
2965	virtual long Run()
2966	{
2967		const char* const glsl_cs =
2968			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
2969			   "uniform uvec3 g_global_size;" NL "void main() {" NL "  uint global_index = gl_GlobalInvocationID.x +" NL
2970			   "                      gl_GlobalInvocationID.y * g_global_size.x +" NL
2971			   "                      gl_GlobalInvocationID.z * g_global_size.x * g_global_size.y;" NL
2972			   "  if (gl_NumWorkGroups != g_global_size) {" NL "    g_output[global_index] = 0xffffu;" NL
2973			   "    return;" NL "  }" NL "  g_output[global_index] = global_index;" NL "}";
2974		m_program = CreateComputeProgram(glsl_cs);
2975		glLinkProgram(m_program);
2976		if (!CheckProgram(m_program))
2977			return ERROR;
2978
2979		if (!CheckBinding(0))
2980			return ERROR;
2981
2982		glGenBuffers(2, m_dispatch_buffer);
2983
2984		const GLuint data[]  = { 1, 2, 3, 4, 5, 6, 7, 8 };
2985		const GLuint data2[] = { 3, 1, 4, 4 };
2986
2987		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[0]);
2988		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data), data, GL_STREAM_DRAW);
2989		if (!CheckBinding(m_dispatch_buffer[0]))
2990			return ERROR;
2991
2992		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[1]);
2993		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(data2), data2, GL_STREAM_READ);
2994		if (!CheckBinding(m_dispatch_buffer[1]))
2995			return ERROR;
2996
2997		glUseProgram(m_program);
2998		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[0]);
2999
3000		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 2, 3);
3001		if (!RunIteration(0, 6))
3002			return ERROR;
3003
3004		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 2, 3, 4);
3005		if (!RunIteration(4, 24))
3006			return ERROR;
3007
3008		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 4, 5, 6);
3009		if (!RunIteration(12, 120))
3010			return ERROR;
3011
3012		glBufferSubData(GL_DISPATCH_INDIRECT_BUFFER, 20, 12, data);
3013		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 2, 3);
3014		if (!RunIteration(20, 6))
3015			return ERROR;
3016
3017		GLuint* ptr = static_cast<GLuint*>(
3018			glMapBufferRange(GL_DISPATCH_INDIRECT_BUFFER, 0, sizeof(GLuint) * 4, GL_MAP_WRITE_BIT));
3019		*ptr++ = 4;
3020		*ptr++ = 4;
3021		*ptr++ = 4;
3022		glUnmapBuffer(GL_DISPATCH_INDIRECT_BUFFER);
3023
3024		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 4, 4, 4);
3025		if (!RunIteration(0, 64))
3026			return ERROR;
3027
3028		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer[1]);
3029
3030		glUniform3ui(glGetUniformLocation(m_program, "g_global_size"), 1, 4, 4);
3031		if (!RunIteration(4, 16))
3032			return ERROR;
3033
3034		glDeleteBuffers(2, m_dispatch_buffer);
3035		memset(m_dispatch_buffer, 0, sizeof(m_dispatch_buffer));
3036
3037		if (!CheckBinding(0))
3038			return ERROR;
3039
3040		return NO_ERROR;
3041	}
3042	virtual long Cleanup()
3043	{
3044		glUseProgram(0);
3045		glDeleteProgram(m_program);
3046		glDeleteBuffers(1, &m_storage_buffer);
3047		glDeleteBuffers(2, m_dispatch_buffer);
3048		return NO_ERROR;
3049	}
3050};
3051
3052class BasicSSOComputePipeline : public ComputeShaderBase
3053{
3054	virtual std::string Title()
3055	{
3056		return NL "Separable CS Programs - Compute and non-compute stages (1)";
3057	}
3058	virtual std::string Purpose()
3059	{
3060		return NL "1. Verify that compute and non-compute stages can be attached to one pipeline object." NL
3061				  "2. Verify that DrawArrays and ComputeDispatch commands works as expected in this case.";
3062	}
3063	virtual std::string Method()
3064	{
3065		return NL "1. Create VS, FS and CS. Attach all created stages to one pipeline object." NL
3066				  "2. Bind pipeline object." NL "3. Invoke compute stage with DispatchCompute commmand." NL
3067				  "4. Issue MemoryBarrier command." NL
3068				  "5. Issue DrawArrays command which uses data written by the compute stage." NL "6. Verify result.";
3069	}
3070	virtual std::string PassCriteria()
3071	{
3072		return NL "Everything works as expected.";
3073	}
3074
3075	GLuint m_vsp, m_fsp, m_csp;
3076	GLuint m_storage_buffer;
3077	GLuint m_vertex_array;
3078	GLuint m_pipeline;
3079
3080	virtual long Setup()
3081	{
3082		m_vsp = m_fsp = m_csp = 0;
3083		m_storage_buffer	  = 0;
3084		m_vertex_array		  = 0;
3085		m_pipeline			  = 0;
3086		return NO_ERROR;
3087	}
3088	virtual long Run()
3089	{
3090		const char* const glsl_cs = NL
3091			"layout(local_size_x = 4) in;" NL "layout(std430) buffer Output {" NL "  vec4 g_output[4];" NL "};" NL
3092			"void main() {" NL "  const vec2 quad[4] = vec2[](vec2(-1, -1), vec2(1, -1), vec2(-1, 1), vec2(1, 1));" NL
3093			"  g_output[gl_GlobalInvocationID.x] = vec4(quad[gl_GlobalInvocationID.x], 0, 1);" NL "}";
3094		m_csp = CreateComputeProgram(glsl_cs);
3095		glProgramParameteri(m_csp, GL_PROGRAM_SEPARABLE, GL_TRUE);
3096		glLinkProgram(m_csp);
3097		if (!CheckProgram(m_csp))
3098			return ERROR;
3099
3100		const char* const glsl_vs =
3101			NL "layout(location = 0) in vec4 i_position;" NL "void main() {" NL "  gl_Position = i_position;" NL "}";
3102		m_vsp = BuildShaderProgram(GL_VERTEX_SHADER, glsl_vs);
3103		if (!CheckProgram(m_vsp))
3104			return ERROR;
3105
3106		const char* const glsl_fs = NL "layout(location = 0) out mediump vec4 o_color;" NL "void main() {" NL
3107									   "  o_color = vec4(0, 1, 0, 1);" NL "}";
3108		m_fsp = BuildShaderProgram(GL_FRAGMENT_SHADER, glsl_fs);
3109		if (!CheckProgram(m_fsp))
3110			return ERROR;
3111
3112		glGenProgramPipelines(1, &m_pipeline);
3113		glUseProgramStages(m_pipeline, GL_VERTEX_SHADER_BIT, m_vsp);
3114		glUseProgramStages(m_pipeline, GL_FRAGMENT_SHADER_BIT, m_fsp);
3115		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_csp);
3116
3117		glGenBuffers(1, &m_storage_buffer);
3118		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
3119		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec4) * 4, NULL, GL_DYNAMIC_DRAW);
3120		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3121
3122		glGenVertexArrays(1, &m_vertex_array);
3123		glBindVertexArray(m_vertex_array);
3124		glBindBuffer(GL_ARRAY_BUFFER, m_storage_buffer);
3125		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, 0);
3126		glBindBuffer(GL_ARRAY_BUFFER, 0);
3127		glEnableVertexAttribArray(0);
3128		glBindVertexArray(0);
3129
3130		glBindProgramPipeline(m_pipeline);
3131		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
3132		glDispatchCompute(1, 1, 1);
3133
3134		glClear(GL_COLOR_BUFFER_BIT);
3135		glBindVertexArray(m_vertex_array);
3136		glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
3137		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
3138
3139		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
3140			return ERROR;
3141		return NO_ERROR;
3142	}
3143
3144	virtual long Cleanup()
3145	{
3146		glDeleteProgram(m_vsp);
3147		glDeleteProgram(m_fsp);
3148		glDeleteProgram(m_csp);
3149		glDeleteBuffers(1, &m_storage_buffer);
3150		glDeleteVertexArrays(1, &m_vertex_array);
3151		glDeleteProgramPipelines(1, &m_pipeline);
3152		return NO_ERROR;
3153	}
3154};
3155
3156class BasicSSOCase2 : public ComputeShaderBase
3157{
3158	virtual std::string Title()
3159	{
3160		return NL "Separable CS Programs - Compute and non-compute stages (2)";
3161	}
3162	virtual std::string Purpose()
3163	{
3164		return NL "1. Verify that data computed by the compute stage is visible to non-compute stage after "
3165				  "MemoryBarrier command." NL "2. Verify that ProgramParameteri(program, GL_PROGRAM_SEPARABLE, "
3166				  "GL_TRUE) command works correctly for CS." NL
3167				  "3. Verify that gl_WorkGroupSize built-in variable is a contant and can be used as an array size.";
3168	}
3169	virtual std::string Method()
3170	{
3171		return NL "1. Create VS, FS and CS. Attach all created stages to one pipeline object." NL
3172				  "2. Bind pipeline object." NL "3. Invoke compute stage with DispatchCompute commmand." NL
3173				  "4. Issue MemoryBarrier command." NL
3174				  "5. Issue DrawArrays command which uses data written to the buffer object by the compute stage." NL
3175				  "6. Verify result.";
3176	}
3177	virtual std::string PassCriteria()
3178	{
3179		return NL "Everything works as expected.";
3180	}
3181
3182	GLuint m_program_ab;
3183	GLuint m_program_c;
3184	GLuint m_pipeline;
3185	GLuint m_storage_buffer;
3186	GLuint m_vao;
3187
3188	virtual long Setup()
3189	{
3190		m_program_ab	 = 0;
3191		m_program_c		 = 0;
3192		m_pipeline		 = 0;
3193		m_storage_buffer = 0;
3194		m_vao			 = 0;
3195		return NO_ERROR;
3196	}
3197	virtual long Run()
3198	{
3199		GLint res;
3200		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
3201		if (res <= 0)
3202		{
3203			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
3204			return NO_ERROR;
3205		}
3206
3207		const char* const glsl_a =
3208			"#version 310 es" NL "layout(binding = 1, std430) buffer Input {" NL "  mediump vec2 g_input[4];" NL "};" NL
3209			"flat out mediump vec3 color;" NL "void main() {" NL
3210			"  gl_Position = vec4(g_input[gl_VertexID], 0.0, 1.0);" NL "  color = vec3(0.0, 1.0, 0.0);" NL "}";
3211		const char* const glsl_b =
3212			"#version 310 es" NL "flat in mediump vec3 color;" NL "layout(location = 0) out mediump vec4 g_color;" NL
3213			"void main() {" NL "  g_color = vec4(color, 1.0);" NL "}";
3214		const char* const glsl_c =
3215			"#version 310 es" NL "layout(local_size_x = 4) in;" NL "layout(binding = 1, std430) buffer Output {" NL
3216			"  vec2 g_output[gl_WorkGroupSize.x];" NL "};" NL "void main() {" NL
3217			"  if (gl_GlobalInvocationID.x == 0u) {" NL "    g_output[0] = vec2(-0.8, -0.8);" NL
3218			"  } else if (gl_GlobalInvocationID.x == 1u) {" NL "    g_output[1] = vec2(0.8, -0.8);" NL
3219			"  } else if (gl_GlobalInvocationID.x == 2u) {" NL "    g_output[2] = vec2(-0.8, 0.8);" NL
3220			"  } else if (gl_GlobalInvocationID.x == 3u) {" NL "    g_output[3] = vec2(0.8, 0.8);" NL "  }" NL "}";
3221
3222		m_program_ab = glCreateProgram();
3223		GLuint sh	= glCreateShader(GL_VERTEX_SHADER);
3224		glAttachShader(m_program_ab, sh);
3225		glDeleteShader(sh);
3226		glShaderSource(sh, 1, &glsl_a, NULL);
3227		glCompileShader(sh);
3228
3229		sh = glCreateShader(GL_FRAGMENT_SHADER);
3230		glAttachShader(m_program_ab, sh);
3231		glDeleteShader(sh);
3232		glShaderSource(sh, 1, &glsl_b, NULL);
3233		glCompileShader(sh);
3234
3235		glProgramParameteri(m_program_ab, GL_PROGRAM_SEPARABLE, GL_TRUE);
3236		glLinkProgram(m_program_ab);
3237
3238		m_program_c = glCreateShaderProgramv(GL_COMPUTE_SHADER, 1, &glsl_c);
3239
3240		glGenVertexArrays(1, &m_vao);
3241		glGenProgramPipelines(1, &m_pipeline);
3242		glUseProgramStages(m_pipeline, GL_ALL_SHADER_BITS, m_program_ab);
3243		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_program_c);
3244
3245		glGenBuffers(1, &m_storage_buffer);
3246		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer);
3247		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(vec2) * 4, NULL, GL_STREAM_DRAW);
3248
3249		glClear(GL_COLOR_BUFFER_BIT);
3250		glBindProgramPipeline(m_pipeline);
3251		glDispatchCompute(1, 1, 1);
3252		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
3253		glBindVertexArray(m_vao);
3254		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
3255
3256		if (getWindowWidth() < 500 &&
3257			!ValidateReadBufferCenteredQuad(getWindowWidth(), getWindowHeight(), vec3(0, 1, 0)))
3258		{
3259			return ERROR;
3260		}
3261		return NO_ERROR;
3262	}
3263	virtual long Cleanup()
3264	{
3265		glDeleteProgram(m_program_ab);
3266		glDeleteProgram(m_program_c);
3267		glDeleteProgramPipelines(1, &m_pipeline);
3268		glDeleteBuffers(1, &m_storage_buffer);
3269		glDeleteVertexArrays(1, &m_vao);
3270		return NO_ERROR;
3271	}
3272};
3273
3274class BasicSSOCase3 : public ComputeShaderBase
3275{
3276	virtual std::string Title()
3277	{
3278		return NL "Separable CS Programs - Compute stage";
3279	}
3280	virtual std::string Purpose()
3281	{
3282		return NL "Verify that compute shader stage selected with UseProgram command has precedence" NL
3283				  "over compute shader stage selected with BindProgramPipeline command.";
3284	}
3285	virtual std::string Method()
3286	{
3287		return NL "1. Create CS0 with CreateProgram command. Create CS1 with CreateShaderProgramv command." NL
3288				  "2. Verify that CS program selected with UseProgram is dispatched even if there is active" NL
3289				  "    compute stage bound by BindProgramPipeline.";
3290	}
3291	virtual std::string PassCriteria()
3292	{
3293		return NL "Everything works as expected.";
3294	}
3295
3296	GLuint m_program_a;
3297	GLuint m_program_b;
3298	GLuint m_pipeline;
3299	GLuint m_storage_buffer;
3300
3301	virtual long Setup()
3302	{
3303		m_program_a		 = 0;
3304		m_program_b		 = 0;
3305		m_pipeline		 = 0;
3306		m_storage_buffer = 0;
3307		return NO_ERROR;
3308	}
3309	virtual long Run()
3310	{
3311		const char* const glsl_a =
3312			"#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(binding = 3, std430) buffer Output {" NL
3313			"  int g_output;" NL "};" NL "void main() {" NL "  g_output = 1;" NL "}";
3314		const char* const glsl_b =
3315			"#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(binding = 3, std430) buffer Output {" NL
3316			"  int g_output;" NL "};" NL "void main() {" NL "  g_output = 2;" NL "}";
3317		/* create program A */
3318		{
3319			m_program_a = glCreateProgram();
3320			GLuint sh   = glCreateShader(GL_COMPUTE_SHADER);
3321			glAttachShader(m_program_a, sh);
3322			glDeleteShader(sh);
3323			glShaderSource(sh, 1, &glsl_a, NULL);
3324			glCompileShader(sh);
3325			glProgramParameteri(m_program_a, GL_PROGRAM_SEPARABLE, GL_TRUE);
3326			glLinkProgram(m_program_a);
3327		}
3328		m_program_b = glCreateShaderProgramv(GL_COMPUTE_SHADER, 1, &glsl_b);
3329
3330		/* create storage buffer */
3331		{
3332			int data = 0;
3333			glGenBuffers(1, &m_storage_buffer);
3334			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_storage_buffer);
3335			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(int), &data, GL_STREAM_READ);
3336		}
3337
3338		glGenProgramPipelines(1, &m_pipeline);
3339		glUseProgramStages(m_pipeline, GL_ALL_SHADER_BITS, m_program_b);
3340
3341		glUseProgram(m_program_a);
3342		glBindProgramPipeline(m_pipeline);
3343		glDispatchCompute(1, 1, 1);
3344		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3345
3346		long error = NO_ERROR;
3347		{
3348			int* data;
3349			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
3350			if (data[0] != 1)
3351			{
3352				m_context.getTestContext().getLog()
3353					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 1." << tcu::TestLog::EndMessage;
3354				error = ERROR;
3355			}
3356			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3357		}
3358
3359		glUseProgram(0);
3360		glDispatchCompute(1, 1, 1);
3361		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3362
3363		{
3364			int* data;
3365			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
3366			if (data[0] != 2)
3367			{
3368				m_context.getTestContext().getLog()
3369					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 2." << tcu::TestLog::EndMessage;
3370				error = ERROR;
3371			}
3372			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3373		}
3374
3375		glUseProgram(m_program_b);
3376		glDispatchCompute(1, 1, 1);
3377		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3378
3379		{
3380			int* data;
3381			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
3382			if (data[0] != 2)
3383			{
3384				m_context.getTestContext().getLog()
3385					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 2." << tcu::TestLog::EndMessage;
3386				error = ERROR;
3387			}
3388			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3389		}
3390
3391		glUseProgram(0);
3392		glUseProgramStages(m_pipeline, GL_COMPUTE_SHADER_BIT, m_program_a);
3393		glDispatchCompute(1, 1, 1);
3394		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3395
3396		{
3397			int* data;
3398			data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int), GL_MAP_READ_BIT));
3399			if (data[0] != 1)
3400			{
3401				m_context.getTestContext().getLog()
3402					<< tcu::TestLog::Message << "Data is " << data[0] << " should be 1." << tcu::TestLog::EndMessage;
3403				error = ERROR;
3404			}
3405			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3406		}
3407
3408		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3409		return error;
3410	}
3411	virtual long Cleanup()
3412	{
3413		glDeleteProgram(m_program_a);
3414		glDeleteProgram(m_program_b);
3415		glDeleteProgramPipelines(1, &m_pipeline);
3416		glDeleteBuffers(1, &m_storage_buffer);
3417		return NO_ERROR;
3418	}
3419};
3420
3421class BasicAtomicCase1 : public ComputeShaderBase
3422{
3423	virtual std::string Title()
3424	{
3425		return NL "Atomic functions";
3426	}
3427	virtual std::string Purpose()
3428	{
3429		return NL "1. Verify that atomicAdd function works as expected with int and uint parameters." NL
3430				  "2. Verify that shared memory can be used with atomic functions." NL
3431				  "3. Verify that groupMemoryBarrier() and barrier() built-in functions work as expected.";
3432	}
3433	virtual std::string Method()
3434	{
3435		return NL "1. Use shared memory as a 'counter' with-in one CS work group." NL
3436				  "2. Each shader invocation increments/decrements 'counter' value using atomicAdd function." NL
3437				  "3. Values returned by atomicAdd function are written to SSBO." NL
3438				  "4. Verify SSBO content (values from 0 to 7 should be written).";
3439	}
3440	virtual std::string PassCriteria()
3441	{
3442		return NL "Everything works as expected.";
3443	}
3444
3445	GLuint m_program;
3446	GLuint m_storage_buffer;
3447
3448	virtual long Setup()
3449	{
3450		m_program		 = 0;
3451		m_storage_buffer = 0;
3452		return NO_ERROR;
3453	}
3454	virtual long Run()
3455	{
3456		const char* const glsl_cs =
3457			NL "layout(local_size_x = 8) in;" NL "layout(std430, binding = 0) buffer Output {" NL
3458			   "  uint g_add_output[8];" NL "  int g_sub_output[8];" NL "};" NL "shared uint g_add_value;" NL
3459			   "shared int g_sub_value;" NL "void main() {" NL "  if (gl_LocalInvocationIndex == 0u) {" NL
3460			   "    g_add_value = 0u;" NL "    g_sub_value = 7;" NL "  }" NL
3461			   "  g_add_output[gl_LocalInvocationIndex] = 0u;" NL "  g_sub_output[gl_LocalInvocationIndex] = 0;" NL
3462			   "  groupMemoryBarrier();" NL "  barrier();" NL
3463			   "  g_add_output[gl_LocalInvocationIndex] = atomicAdd(g_add_value, 1u);" NL
3464			   "  g_sub_output[gl_LocalInvocationIndex] = atomicAdd(g_sub_value, -1);" NL "}";
3465		m_program = CreateComputeProgram(glsl_cs);
3466		glLinkProgram(m_program);
3467		if (!CheckProgram(m_program))
3468			return ERROR;
3469
3470		glGenBuffers(1, &m_storage_buffer);
3471		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
3472		glBufferData(GL_SHADER_STORAGE_BUFFER, 16 * sizeof(int), NULL, GL_STATIC_DRAW);
3473
3474		glUseProgram(m_program);
3475		glDispatchCompute(1, 1, 1);
3476		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3477
3478		int* data;
3479		data = static_cast<int*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int) * 8, GL_MAP_READ_BIT));
3480		std::sort(data, data + 8);
3481		long error = NO_ERROR;
3482		for (int i = 0; i < 8; ++i)
3483		{
3484			if (data[i] != i)
3485			{
3486				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3487													<< data[i] << " should be " << i << tcu::TestLog::EndMessage;
3488				error = ERROR;
3489			}
3490		}
3491		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3492
3493		data = static_cast<int*>(
3494			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, sizeof(int) * 8, sizeof(int) * 8, GL_MAP_READ_BIT));
3495		std::sort(data, data + 8);
3496		for (int i = 0; i < 8; ++i)
3497		{
3498			if (data[i] != i)
3499			{
3500				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3501													<< data[i] << " should be " << i << tcu::TestLog::EndMessage;
3502				error = ERROR;
3503			}
3504		}
3505		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3506		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3507		return error;
3508	}
3509	virtual long Cleanup()
3510	{
3511		glUseProgram(0);
3512		glDeleteProgram(m_program);
3513		glDeleteBuffers(1, &m_storage_buffer);
3514		return NO_ERROR;
3515	}
3516};
3517
3518class BasicAtomicCase2 : public ComputeShaderBase
3519{
3520	virtual std::string Title()
3521	{
3522		return NL "Atomic functions - buffer variables";
3523	}
3524	virtual std::string Purpose()
3525	{
3526		return NL "1. Verify that all atomic functions (atomicExchange, atomicMin, atomicMax," NL
3527				  "    atomicAnd, atomicOr, atomicXor and atomicCompSwap) works as expected with buffer variables." NL
3528				  "2. Verify that atomic functions work with parameters being constants and" NL
3529				  "    with parameters being uniforms." NL
3530				  "3. Verify that barrier() built-in function can be used in a control flow.";
3531	}
3532	virtual std::string Method()
3533	{
3534		return NL "1. Create CS that uses all atomic functions. Values returned by the atomic functions are written to "
3535				  "SSBO." NL "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
3536				  "3. Verify SSBO content." NL
3537				  "4. Repeat for different number of work groups and different work group sizes.";
3538	}
3539	virtual std::string PassCriteria()
3540	{
3541		return NL "Everything works as expected.";
3542	}
3543
3544	GLuint m_program;
3545	GLuint m_storage_buffer[2];
3546	GLuint m_dispatch_buffer;
3547
3548	std::string GenSource(const uvec3& local_size, const uvec3& num_groups)
3549	{
3550		const uvec3		  global_size = local_size * num_groups;
3551		std::stringstream ss;
3552		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
3553		   << ", local_size_z = " << local_size.z() << ") in;" NL "const uvec3 kGlobalSize = uvec3(" << global_size.x()
3554		   << ", " << global_size.y() << ", " << global_size.z()
3555		   << ");" NL "layout(std430, binding = 0) buffer OutputU {" NL "  uint g_uint_out["
3556		   << global_size.x() * global_size.y() * global_size.z()
3557		   << "];" NL "};" NL "layout(std430, binding = 1) buffer OutputI {" NL "  int data["
3558		   << global_size.x() * global_size.y() * global_size.z()
3559		   << "];" NL "} g_int_out;" NL "uniform uint g_uint_value[8];" NL "void main() {" NL
3560			  "  uint global_index = gl_GlobalInvocationID.x +" NL
3561			  "                      gl_GlobalInvocationID.y * kGlobalSize.x +" NL
3562			  "                      gl_GlobalInvocationID.z * kGlobalSize.x * kGlobalSize.y;" NL
3563			  "  atomicExchange(g_uint_out[global_index], g_uint_value[0]);" NL
3564			  "  atomicMin(g_uint_out[global_index], g_uint_value[1]);" NL
3565			  "  atomicMax(g_uint_out[global_index], g_uint_value[2]);" NL
3566			  "  atomicAnd(g_uint_out[global_index], g_uint_value[3]);" NL
3567			  "  atomicOr(g_uint_out[global_index], g_uint_value[4]);" NL "  if (g_uint_value[0] > 0u) {" NL
3568			  "    barrier();" // not needed here, just check if compiler accepts it in a control flow
3569			NL "    atomicXor(g_uint_out[global_index], g_uint_value[5]);" NL "  }" NL
3570			  "  atomicCompSwap(g_uint_out[global_index], g_uint_value[6], g_uint_value[7]);" NL NL
3571			  "  atomicExchange(g_int_out.data[global_index], 3);" NL "  atomicMin(g_int_out.data[global_index], 1);" NL
3572			  "  atomicMax(g_int_out.data[global_index], 2);" NL "  atomicAnd(g_int_out.data[global_index], 0x1);" NL
3573			  "  atomicOr(g_int_out.data[global_index], 0x3);" NL "  atomicXor(g_int_out.data[global_index], 0x1);" NL
3574			  "  atomicCompSwap(g_int_out.data[global_index], 0x2, 0x7);" NL "}";
3575		return ss.str();
3576	}
3577	bool RunIteration(const uvec3& local_size, const uvec3& num_groups, bool dispatch_indirect)
3578	{
3579		if (m_program != 0)
3580			glDeleteProgram(m_program);
3581		m_program = CreateComputeProgram(GenSource(local_size, num_groups));
3582		glLinkProgram(m_program);
3583		if (!CheckProgram(m_program))
3584			return false;
3585
3586		const GLuint kBufferSize =
3587			local_size.x() * num_groups.x() * local_size.y() * num_groups.y() * local_size.z() * num_groups.z();
3588
3589		if (m_storage_buffer[0] == 0)
3590			glGenBuffers(2, m_storage_buffer);
3591		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
3592		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize, NULL, GL_DYNAMIC_DRAW);
3593		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
3594		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLint) * kBufferSize, NULL, GL_DYNAMIC_DRAW);
3595
3596		glUseProgram(m_program);
3597		GLuint values[8] = { 3u, 1u, 2u, 0x1u, 0x3u, 0x1u, 0x2u, 0x7u };
3598		glUniform1uiv(glGetUniformLocation(m_program, "g_uint_value"), 8, values);
3599		if (dispatch_indirect)
3600		{
3601			if (m_dispatch_buffer == 0)
3602				glGenBuffers(1, &m_dispatch_buffer);
3603			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
3604			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
3605			glDispatchComputeIndirect(0);
3606		}
3607		else
3608		{
3609			glDispatchCompute(num_groups.x(), num_groups.y(), num_groups.z());
3610		}
3611		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3612
3613		bool	res = true;
3614		GLuint* udata;
3615		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
3616		udata = static_cast<GLuint*>(
3617			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, GL_MAP_READ_BIT));
3618		for (GLuint i = 0; i < kBufferSize; ++i)
3619		{
3620			if (udata[i] != 7)
3621			{
3622				m_context.getTestContext().getLog() << tcu::TestLog::Message << "uData at index " << i << " is "
3623													<< udata[i] << " should be 7." << tcu::TestLog::EndMessage;
3624				res = false;
3625			}
3626		}
3627		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3628		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3629
3630		GLint* idata;
3631		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
3632		idata = static_cast<GLint*>(
3633			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint) * kBufferSize, GL_MAP_READ_BIT));
3634		for (GLint i = 0; i < static_cast<GLint>(kBufferSize); ++i)
3635		{
3636			if (idata[i] != 7)
3637			{
3638				m_context.getTestContext().getLog() << tcu::TestLog::Message << "iData at index " << i << " is "
3639													<< idata[i] << " should be 7." << tcu::TestLog::EndMessage;
3640				res = false;
3641			}
3642		}
3643		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3644		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3645		return res;
3646	}
3647	virtual long Setup()
3648	{
3649		m_program			= 0;
3650		m_storage_buffer[0] = m_storage_buffer[1] = 0;
3651		m_dispatch_buffer						  = 0;
3652		return NO_ERROR;
3653	}
3654	virtual long Run()
3655	{
3656		if (!RunIteration(uvec3(64, 1, 1), uvec3(8, 1, 1), false))
3657			return ERROR;
3658		if (!RunIteration(uvec3(1, 1, 64), uvec3(1, 5, 2), true))
3659			return ERROR;
3660		if (!RunIteration(uvec3(1, 1, 4), uvec3(2, 2, 2), false))
3661			return ERROR;
3662		if (!RunIteration(uvec3(3, 2, 1), uvec3(1, 2, 3), true))
3663			return ERROR;
3664		if (!RunIteration(uvec3(2, 4, 2), uvec3(2, 4, 1), false))
3665			return ERROR;
3666		if (!RunIteration(uvec3(2, 4, 7), uvec3(2, 1, 4), true))
3667			return ERROR;
3668		return NO_ERROR;
3669	}
3670	virtual long Cleanup()
3671	{
3672		glUseProgram(0);
3673		glDeleteProgram(m_program);
3674		glDeleteBuffers(2, m_storage_buffer);
3675		glDeleteBuffers(1, &m_dispatch_buffer);
3676		return NO_ERROR;
3677	}
3678};
3679
3680class BasicAtomicCase3 : public ComputeShaderBase
3681{
3682	virtual std::string Title()
3683	{
3684		return NL "Atomic functions - shared variables";
3685	}
3686	virtual std::string Purpose()
3687	{
3688		return NL "1. Verify that all atomic functions (atomicExchange, atomicMin, atomicMax," NL
3689				  "    atomicAnd, atomicOr, atomicXor and atomicCompSwap) works as expected with shared variables." NL
3690				  "2. Verify that atomic functions work with parameters being constants and" NL
3691				  "    with parameters being uniforms." NL
3692				  "3. Verify that atomic functions can be used in a control flow.";
3693	}
3694	virtual std::string Method()
3695	{
3696		return NL "1. Create CS that uses all atomic functions. Values returned by the atomic functions are written to "
3697				  "SSBO." NL "2. Dispatch CS with DispatchCompute and DispatchComputeIndirect commands." NL
3698				  "3. Verify SSBO content." NL
3699				  "4. Repeat for different number of work groups and different work group sizes.";
3700	}
3701	virtual std::string PassCriteria()
3702	{
3703		return NL "Everything works as expected.";
3704	}
3705
3706	GLuint m_program;
3707	GLuint m_storage_buffer;
3708	GLuint m_dispatch_buffer;
3709
3710	std::string GenSource(const uvec3& local_size)
3711	{
3712		std::stringstream ss;
3713		ss << NL "layout(local_size_x = " << local_size.x() << ", local_size_y = " << local_size.y()
3714		   << ", local_size_z = " << local_size.z()
3715		   << ") in;" NL "layout(std430, binding = 0) buffer Output {" NL "  uint g_uint_out["
3716		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "  int g_int_out["
3717		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "};" NL "shared uint g_shared_uint["
3718		   << local_size.x() * local_size.y() * local_size.z() << "];" NL "shared int g_shared_int["
3719		   << local_size.x() * local_size.y() * local_size.z()
3720		   << "];" NL "uniform uint g_uint_value[8];" NL "void main() {" NL
3721			  "  atomicExchange(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[0]);" NL
3722			  "  atomicMin(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[1]);" NL
3723			  "  atomicMax(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[2]);" NL
3724			  "  atomicAnd(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[3]);" NL
3725			  "  atomicOr(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[4]);" NL
3726			  "  atomicXor(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[5]);" NL
3727			  "  atomicCompSwap(g_shared_uint[gl_LocalInvocationIndex], g_uint_value[6], g_uint_value[7]);" NL NL
3728			  "  atomicExchange(g_shared_int[gl_LocalInvocationIndex], 3);" NL
3729			  "  atomicMin(g_shared_int[gl_LocalInvocationIndex], 1);" NL
3730			  "  atomicMax(g_shared_int[gl_LocalInvocationIndex], 2);" NL
3731			  "  atomicAnd(g_shared_int[gl_LocalInvocationIndex], 0x1);" NL "  if (g_uint_value[1] > 0u) {" NL
3732			  "    atomicOr(g_shared_int[gl_LocalInvocationIndex], 0x3);" NL
3733			  "    atomicXor(g_shared_int[gl_LocalInvocationIndex], 0x1);" NL
3734			  "    atomicCompSwap(g_shared_int[gl_LocalInvocationIndex], 0x2, 0x7);" NL "  }" NL NL
3735			  "  g_uint_out[gl_LocalInvocationIndex] = g_shared_uint[gl_LocalInvocationIndex];" NL
3736			  "  g_int_out[gl_LocalInvocationIndex] = g_shared_int[gl_LocalInvocationIndex];" NL "}";
3737		return ss.str();
3738	}
3739	bool RunIteration(const uvec3& local_size, bool dispatch_indirect)
3740	{
3741		if (m_program != 0)
3742			glDeleteProgram(m_program);
3743		m_program = CreateComputeProgram(GenSource(local_size));
3744		glLinkProgram(m_program);
3745		if (!CheckProgram(m_program))
3746			return false;
3747
3748		const GLuint kBufferSize = local_size.x() * local_size.y() * local_size.z();
3749
3750		if (m_storage_buffer == 0)
3751			glGenBuffers(1, &m_storage_buffer);
3752		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
3753		glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize * 2, NULL, GL_DYNAMIC_DRAW);
3754
3755		glUseProgram(m_program);
3756		GLuint values[8] = { 3u, 1u, 2u, 0x1u, 0x3u, 0x1u, 0x2u, 0x7u };
3757		glUniform1uiv(glGetUniformLocation(m_program, "g_uint_value"), 8, values);
3758		if (dispatch_indirect)
3759		{
3760			const GLuint num_groups[3] = { 1, 1, 1 };
3761			if (m_dispatch_buffer == 0)
3762				glGenBuffers(1, &m_dispatch_buffer);
3763			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
3764			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), &num_groups[0], GL_STATIC_DRAW);
3765			glDispatchComputeIndirect(0);
3766		}
3767		else
3768		{
3769			glDispatchCompute(1, 1, 1);
3770		}
3771		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
3772
3773		bool	ret = true;
3774		GLuint* udata;
3775		udata = static_cast<GLuint*>(
3776			glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * kBufferSize, GL_MAP_READ_BIT));
3777		for (GLuint i = 0; i < kBufferSize; ++i)
3778		{
3779			if (udata[i] != 7)
3780			{
3781				m_context.getTestContext().getLog() << tcu::TestLog::Message << "uData at index " << i << " is "
3782													<< udata[i] << " should be 7." << tcu::TestLog::EndMessage;
3783				ret = false;
3784			}
3785		}
3786		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3787
3788		GLint* idata;
3789		idata = static_cast<GLint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, sizeof(GLuint) * kBufferSize,
3790													 sizeof(GLint) * kBufferSize, GL_MAP_READ_BIT));
3791		for (GLint i = 0; i < static_cast<GLint>(kBufferSize); ++i)
3792		{
3793			if (idata[i] != 7)
3794			{
3795				m_context.getTestContext().getLog() << tcu::TestLog::Message << "iData at index " << i << " is "
3796													<< idata[i] << " should be 7." << tcu::TestLog::EndMessage;
3797				ret = false;
3798			}
3799		}
3800		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
3801		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
3802
3803		return ret;
3804	}
3805	virtual long Setup()
3806	{
3807		m_program		  = 0;
3808		m_storage_buffer  = 0;
3809		m_dispatch_buffer = 0;
3810		return NO_ERROR;
3811	}
3812	virtual long Run()
3813	{
3814		if (!RunIteration(uvec3(64, 1, 1), false))
3815			return ERROR;
3816		if (!RunIteration(uvec3(1, 1, 64), true))
3817			return ERROR;
3818		if (!RunIteration(uvec3(1, 1, 4), false))
3819			return ERROR;
3820		if (!RunIteration(uvec3(3, 2, 1), true))
3821			return ERROR;
3822		if (!RunIteration(uvec3(2, 4, 2), false))
3823			return ERROR;
3824		if (!RunIteration(uvec3(2, 4, 7), true))
3825			return ERROR;
3826		return NO_ERROR;
3827	}
3828	virtual long Cleanup()
3829	{
3830		glUseProgram(0);
3831		glDeleteProgram(m_program);
3832		glDeleteBuffers(1, &m_storage_buffer);
3833		glDeleteBuffers(1, &m_dispatch_buffer);
3834		return NO_ERROR;
3835	}
3836};
3837
3838class AdvancedCopyImage : public ComputeShaderBase
3839{
3840	virtual std::string Title()
3841	{
3842		return NL "Copy Image";
3843	}
3844	virtual std::string Purpose()
3845	{
3846		return NL "Verify that copying two textures using CS works as expected.";
3847	}
3848	virtual std::string Method()
3849	{
3850		return NL "Use shader image load and store operations to copy two textures in the CS.";
3851	}
3852	virtual std::string PassCriteria()
3853	{
3854		return NL "Everything works as expected.";
3855	}
3856
3857	GLuint m_program;
3858	GLuint m_texture[2];
3859	GLuint m_fbo;
3860
3861	virtual long Setup()
3862	{
3863		m_program = 0;
3864		m_fbo	 = 0;
3865		memset(m_texture, 0, sizeof(m_texture));
3866		return NO_ERROR;
3867	}
3868	virtual long Run()
3869	{
3870		const char* const glsl_cs =
3871			NL "#define TILE_WIDTH 8" NL "#define TILE_HEIGHT 8" NL
3872			   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
3873			   "layout(binding = 0, rgba8) readonly uniform mediump image2D g_input_image;" NL
3874			   "layout(binding = 1, rgba8) writeonly uniform mediump image2D g_output_image;" NL NL
3875			   "layout(local_size_x=TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL				 NL "void main() {" NL
3876			   "  ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL "  ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
3877			   "  ivec2 pixel_xy = tile_xy * kTileSize + thread_xy;" NL NL
3878			   "  vec4 pixel = imageLoad(g_input_image, pixel_xy);" NL
3879			   "  imageStore(g_output_image, pixel_xy, pixel);" NL "}";
3880		m_program = CreateComputeProgram(glsl_cs);
3881		glLinkProgram(m_program);
3882		if (!CheckProgram(m_program))
3883			return ERROR;
3884
3885		std::vector<GLubyte> in_image(64 * 64 * 4, 0x0f);
3886		std::vector<GLubyte> out_image(64 * 64 * 4, 0xff);
3887
3888		glGenTextures(2, m_texture);
3889		glBindTexture(GL_TEXTURE_2D, m_texture[0]);
3890		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
3891		glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 64, 64);
3892		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 64, 64, GL_RGBA, GL_UNSIGNED_BYTE, &in_image[0]);
3893
3894		glBindTexture(GL_TEXTURE_2D, m_texture[1]);
3895		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
3896		glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 64, 64);
3897		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 64, 64, GL_RGBA, GL_UNSIGNED_BYTE, &out_image[0]);
3898
3899		glUseProgram(m_program);
3900		glBindImageTexture(0, m_texture[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);
3901		glBindImageTexture(1, m_texture[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
3902		glDispatchCompute(9, 8,
3903						  1); // 9 is on purpose, to ensure that out of bounds image load and stores have no effect
3904		glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
3905
3906		std::vector<GLubyte> data(64 * 64 * 4);
3907		glGenFramebuffers(1, &m_fbo);
3908		glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
3909		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture[1], 0);
3910		glReadPixels(0, 0, 64, 64, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
3911		for (std::size_t i = 0; i < data.size(); ++i)
3912		{
3913			if (data[i] != 0x0f)
3914			{
3915				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
3916													<< data[i] << " should be " << 0x0f << tcu::TestLog::EndMessage;
3917				return ERROR;
3918			}
3919		}
3920
3921		return NO_ERROR;
3922	}
3923	virtual long Cleanup()
3924	{
3925		glUseProgram(0);
3926		glDeleteProgram(m_program);
3927		glDeleteFramebuffers(1, &m_fbo);
3928		glDeleteTextures(2, m_texture);
3929		return NO_ERROR;
3930	}
3931};
3932
3933class AdvancedPipelinePreVS : public ComputeShaderBase
3934{
3935	virtual std::string Title()
3936	{
3937		return NL "CS as an additional pipeline stage - Before VS (1)";
3938	}
3939	virtual std::string Purpose()
3940	{
3941		return NL "Verify that CS which runs just before VS and modifies VBO content works as expected.";
3942	}
3943	virtual std::string Method()
3944	{
3945		return NL "1. Prepare VBO and VAO for a drawing operation." NL "2. Run CS to modify existing VBO content." NL
3946				  "3. Issue MemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT) command." NL
3947				  "4. Issue draw call command." NL "5. Verify that the framebuffer content is as expected.";
3948	}
3949	virtual std::string PassCriteria()
3950	{
3951		return NL "Everything works as expected.";
3952	}
3953
3954	GLuint m_program[2];
3955	GLuint m_vertex_buffer;
3956	GLuint m_vertex_array;
3957
3958	virtual long Setup()
3959	{
3960		memset(m_program, 0, sizeof(m_program));
3961		m_vertex_buffer = 0;
3962		m_vertex_array  = 0;
3963		return NO_ERROR;
3964	}
3965	virtual long Run()
3966	{
3967		const char* const glsl_cs = NL "layout(local_size_x = 4) in;" NL "struct Vertex {" NL "  vec4 position;" NL
3968									   "  vec4 color;" NL "};" NL "layout(binding = 0, std430) buffer VertexBuffer {" NL
3969									   "  Vertex g_vertex[];" NL "};" NL "uniform float g_scale;" NL "void main() {" NL
3970									   "  g_vertex[gl_GlobalInvocationID.x].position.xyz *= g_scale;" NL
3971									   "  g_vertex[gl_GlobalInvocationID.x].color *= vec4(0.0, 1.0, 0.0, 1.0);" NL "}";
3972		m_program[0] = CreateComputeProgram(glsl_cs);
3973		glLinkProgram(m_program[0]);
3974		glUseProgram(m_program[0]);
3975		glUniform1f(glGetUniformLocation(m_program[0], "g_scale"), 0.8f);
3976		glUseProgram(0);
3977		if (!CheckProgram(m_program[0]))
3978			return ERROR;
3979
3980		const char* const glsl_vs =
3981			NL "layout(location = 0) in mediump vec4 g_position;" NL "layout(location = 1) in mediump vec4 g_color;" NL
3982			   "flat out mediump vec4 color;" NL "void main() {" NL "  gl_Position = g_position;" NL
3983			   "  color = g_color;" NL "}";
3984		const char* const glsl_fs =
3985			NL "flat in mediump vec4 color;" NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL
3986			   "  g_color = color;" NL "}";
3987		m_program[1] = CreateProgram(glsl_vs, glsl_fs);
3988		glLinkProgram(m_program[1]);
3989		if (!CheckProgram(m_program[1]))
3990			return ERROR;
3991
3992		/* vertex buffer */
3993		{
3994			const float data[] = { -1, -1, 0, 1, 1, 1, 1, 1, 1, -1, 0, 1, 1, 1, 1, 1,
3995								   -1, 1,  0, 1, 1, 1, 1, 1, 1, 1,  0, 1, 1, 1, 1, 1 };
3996			glGenBuffers(1, &m_vertex_buffer);
3997			glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
3998			glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
3999			glBindBuffer(GL_ARRAY_BUFFER, 0);
4000		}
4001
4002		glGenVertexArrays(1, &m_vertex_array);
4003		glBindVertexArray(m_vertex_array);
4004		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4005		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), 0);
4006		glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), reinterpret_cast<void*>(sizeof(vec4)));
4007		glBindBuffer(GL_ARRAY_BUFFER, 0);
4008		glEnableVertexAttribArray(0);
4009		glEnableVertexAttribArray(1);
4010		glBindVertexArray(0);
4011
4012		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_vertex_buffer);
4013		glUseProgram(m_program[0]);
4014		glDispatchCompute(1, 1, 1);
4015
4016		glClear(GL_COLOR_BUFFER_BIT);
4017		glUseProgram(m_program[1]);
4018		glBindVertexArray(m_vertex_array);
4019		glMemoryBarrier(GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT);
4020		glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, 1);
4021
4022		if (getWindowWidth() < 500 &&
4023			!ValidateReadBufferCenteredQuad(getWindowWidth(), getWindowHeight(), vec3(0, 1, 0)))
4024		{
4025			return ERROR;
4026		}
4027		return NO_ERROR;
4028	}
4029	virtual long Cleanup()
4030	{
4031		glUseProgram(0);
4032		for (int i = 0; i < 2; ++i)
4033			glDeleteProgram(m_program[i]);
4034		glDeleteBuffers(1, &m_vertex_buffer);
4035		glDeleteVertexArrays(1, &m_vertex_array);
4036		return NO_ERROR;
4037	}
4038};
4039
4040class AdvancedPipelineGenDrawCommands : public ComputeShaderBase
4041{
4042	virtual std::string Title()
4043	{
4044		return NL "CS as an additional pipeline stage - Before VS (2)";
4045	}
4046	virtual std::string Purpose()
4047	{
4048		return NL "Verify that a complex scenario where CS is used to generate drawing commands" NL
4049				  "and write them to a draw indirect buffer works as expected. This is a practial usage of CS." NL
4050				  "CS is used for culling objects which are outside of the viewing frustum.";
4051	}
4052	virtual std::string Method()
4053	{
4054		return NL "1. Run CS which will generate four sets of draw call parameters and write them to the draw indirect "
4055				  "buffer." NL "2. One set of draw call parameters will be: 0, 0, 0, 0" NL
4056				  "    (which means that an object is outside of the viewing frustum and should not be drawn)." NL
4057				  "3. Issue MemoryBarrier(GL_COMMAND_BARRIER_BIT) command." NL
4058				  "4. Issue four draw indirect commands." NL "5. Verify that the framebuffer content is as expected.";
4059	}
4060	virtual std::string PassCriteria()
4061	{
4062		return NL "Everything works as expected.";
4063	}
4064
4065	GLuint m_program[2];
4066	GLuint m_vertex_buffer;
4067	GLuint m_index_buffer;
4068	GLuint m_vertex_array;
4069	GLuint m_draw_buffer;
4070	GLuint m_object_buffer;
4071
4072	virtual long Setup()
4073	{
4074		memset(m_program, 0, sizeof(m_program));
4075		m_vertex_buffer = 0;
4076		m_index_buffer  = 0;
4077		m_vertex_array  = 0;
4078		m_draw_buffer   = 0;
4079		m_object_buffer = 0;
4080		return NO_ERROR;
4081	}
4082	virtual long Run()
4083	{
4084		GLint res;
4085		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
4086		if (res <= 0)
4087		{
4088			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
4089			return NOT_SUPPORTED;
4090		}
4091
4092		const char* const glsl_cs =
4093			NL "layout(local_size_x = 4) in;" NL "struct DrawCommand {" NL "  uint count;" NL
4094			   "  uint instance_count;" NL "  uint first_index;" NL "  int base_vertex;" NL "  uint base_instance;" NL
4095			   "};" NL "layout(std430) buffer;" NL "layout(binding = 0) readonly buffer ObjectBuffer {" NL
4096			   "  mat4 transform[4];" NL "  uint count[4];" NL "  uint first_index[4];" NL "} g_objects;" NL
4097			   "layout(binding = 1) writeonly buffer DrawCommandBuffer {" NL "  DrawCommand g_command[4];" NL "};" NL
4098			   "bool IsObjectVisible(uint id) {" NL
4099			   "  if (g_objects.transform[id][3].x < -1.0 || g_objects.transform[id][3].x > 1.0) return false;" NL
4100			   "  if (g_objects.transform[id][3][1] < -1.0 || g_objects.transform[id][3][1] > 1.0) return false;" NL
4101			   "  if (g_objects.transform[id][3][2] < -1.0 || g_objects.transform[id][3].z > 1.0) return false;" NL
4102			   "  return true;" NL "}" NL "void main() {" NL "  uint id = gl_GlobalInvocationID.x;" NL
4103			   "  g_command[id].count = 0u;" NL "  g_command[id].instance_count = 0u;" NL
4104			   "  g_command[id].first_index = 0u;" NL "  g_command[id].base_vertex = int(0);" NL
4105			   "  g_command[id].base_instance = 0u;" NL "  if (IsObjectVisible(id)) {" NL
4106			   "    g_command[id].count = g_objects.count[id];" NL "    g_command[id].instance_count = 1u;" NL
4107			   "    g_command[id].first_index = g_objects.first_index[id];" NL "  }" NL "}";
4108		m_program[0] = CreateComputeProgram(glsl_cs);
4109		glLinkProgram(m_program[0]);
4110		if (!CheckProgram(m_program[0]))
4111			return ERROR;
4112
4113		const char* const glsl_vs =
4114			NL "layout(location = 0) in mediump vec4 g_position;" NL "layout(location = 1) in mediump vec3 g_color;" NL
4115			   "flat out mediump vec3 color;" NL "layout(binding = 0, std430) buffer ObjectBuffer {" NL
4116			   "  mediump mat4 transform[4];" NL "  uint count[4];" NL "  uint first_index[4];" NL "} g_objects;" NL
4117			   "uniform int g_object_id;" NL "void main() {" NL
4118			   "  gl_Position = g_objects.transform[g_object_id] * g_position;" NL "  color = g_color;" NL "}";
4119		const char* const glsl_fs =
4120			NL "flat in mediump vec3 color;" NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL
4121			   "  g_color = vec4(color, 1.0);" NL "}";
4122		m_program[1] = CreateProgram(glsl_vs, glsl_fs);
4123		glLinkProgram(m_program[1]);
4124		if (!CheckProgram(m_program[1]))
4125			return ERROR;
4126		glViewport(0, 0, 100, 100);
4127
4128		/* object buffer */
4129		{
4130			struct
4131			{
4132				mat4   transform[4];
4133				GLuint count[4];
4134				GLuint first_index[4];
4135			} data = {
4136				{ tcu::translationMatrix(vec3(-1.5f, -0.5f, 0.0f)), tcu::translationMatrix(vec3(0.5f, -0.5f, 0.0f)),
4137				  tcu::translationMatrix(vec3(-0.5f, 0.5f, 0.0f)), tcu::translationMatrix(vec3(0.5f, 0.5f, 0.0f)) },
4138				{ 4, 4, 4, 4 },
4139				{ 0, 4, 8, 12 }
4140			};
4141			glGenBuffers(1, &m_object_buffer);
4142			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_object_buffer);
4143			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
4144		}
4145		/* vertex buffer */
4146		{
4147			const vec3 data[] = { vec3(-0.4f, -0.4f, 0.0f), vec3(1, 0, 0), vec3(0.4f, -0.4f, 0.0f), vec3(1, 0, 0),
4148								  vec3(-0.4f, 0.4f, 0.0f),  vec3(1, 0, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(1, 0, 0),
4149								  vec3(-0.4f, -0.4f, 0.0f), vec3(0, 1, 0), vec3(0.4f, -0.4f, 0.0f), vec3(0, 1, 0),
4150								  vec3(-0.4f, 0.4f, 0.0f),  vec3(0, 1, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(0, 1, 0),
4151								  vec3(-0.4f, -0.4f, 0.0f), vec3(0, 0, 1), vec3(0.4f, -0.4f, 0.0f), vec3(0, 0, 1),
4152								  vec3(-0.4f, 0.4f, 0.0f),  vec3(0, 0, 1), vec3(0.4f, 0.4f, 0.0f),  vec3(0, 0, 1),
4153								  vec3(-0.4f, -0.4f, 0.0f), vec3(1, 1, 0), vec3(0.4f, -0.4f, 0.0f), vec3(1, 1, 0),
4154								  vec3(-0.4f, 0.4f, 0.0f),  vec3(1, 1, 0), vec3(0.4f, 0.4f, 0.0f),  vec3(1, 1, 0) };
4155			glGenBuffers(1, &m_vertex_buffer);
4156			glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4157			glBufferData(GL_ARRAY_BUFFER, sizeof(data), data, GL_STATIC_DRAW);
4158			glBindBuffer(GL_ARRAY_BUFFER, 0);
4159		}
4160		/* index buffer */
4161		{
4162			const GLushort data[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
4163			glGenBuffers(1, &m_index_buffer);
4164			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
4165			glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(data), data, GL_DYNAMIC_DRAW);
4166			glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
4167		}
4168		glGenBuffers(1, &m_draw_buffer);
4169		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_draw_buffer);
4170		glBufferData(GL_DRAW_INDIRECT_BUFFER, 4 * sizeof(GLuint) * 5, NULL, GL_DYNAMIC_DRAW);
4171		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, 0);
4172
4173		glGenVertexArrays(1, &m_vertex_array);
4174		glBindVertexArray(m_vertex_array);
4175		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4176		glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 2 * sizeof(vec3), 0);
4177		glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 2 * sizeof(vec3), reinterpret_cast<void*>(sizeof(vec3)));
4178		glBindBuffer(GL_ARRAY_BUFFER, 0);
4179		glEnableVertexAttribArray(0);
4180		glEnableVertexAttribArray(1);
4181		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer);
4182		glBindVertexArray(0);
4183
4184		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_draw_buffer);
4185		glUseProgram(m_program[0]);
4186		glDispatchCompute(1, 1, 1);
4187
4188		glClear(GL_COLOR_BUFFER_BIT);
4189		glUseProgram(m_program[1]);
4190		glBindVertexArray(m_vertex_array);
4191		glBindBuffer(GL_DRAW_INDIRECT_BUFFER, m_draw_buffer);
4192		glMemoryBarrier(GL_COMMAND_BARRIER_BIT);
4193		/* draw (CPU draw calls dispatch, could be done by the GPU with ARB_multi_draw_indirect) */
4194		{
4195			GLsizeiptr offset = 0;
4196			for (int i = 0; i < 4; ++i)
4197			{
4198				glUniform1i(glGetUniformLocation(m_program[1], "g_object_id"), i);
4199				glDrawElementsIndirect(GL_TRIANGLE_STRIP, GL_UNSIGNED_SHORT, reinterpret_cast<void*>(offset));
4200				offset += 5 * sizeof(GLuint);
4201			}
4202		}
4203		if (getWindowWidth() >= 100 && getWindowHeight() >= 100 &&
4204			!ValidateWindow4Quads(vec3(0), vec3(0, 1, 0), vec3(1, 1, 0), vec3(0, 0, 1)))
4205		{
4206			return ERROR;
4207		}
4208		return NO_ERROR;
4209	}
4210	virtual long Cleanup()
4211	{
4212		glUseProgram(0);
4213		for (int i = 0; i < 2; ++i)
4214			glDeleteProgram(m_program[i]);
4215		glDeleteBuffers(1, &m_vertex_buffer);
4216		glDeleteBuffers(1, &m_index_buffer);
4217		glDeleteVertexArrays(1, &m_vertex_array);
4218		glDeleteBuffers(1, &m_draw_buffer);
4219		glDeleteBuffers(1, &m_object_buffer);
4220		glViewport(0, 0, getWindowWidth(), getWindowHeight());
4221		return NO_ERROR;
4222	}
4223};
4224
4225class AdvancedPipelineComputeChain : public ComputeShaderBase
4226{
4227	virtual std::string Title()
4228	{
4229		return NL "Compute Chain";
4230	}
4231	virtual std::string Purpose()
4232	{
4233		return NL "1. Verify that dispatching several compute kernels that work in a sequence" NL
4234				  "    with a common set of resources works as expected." NL
4235				  "2. Verify that indexing nested structures with built-in variables work as expected." NL
4236				  "3. Verify that two kernels can write to the same resource without MemoryBarrier" NL
4237				  "    command if target regions of memory do not overlap.";
4238	}
4239	virtual std::string Method()
4240	{
4241		return NL "1. Create a set of GPU resources (buffers, images, atomic counters)." NL
4242				  "2. Dispatch Kernel0 that write to these resources." NL "3. Issue MemoryBarrier command." NL
4243				  "4. Dispatch Kernel1 that read/write from/to these resources." NL "5. Issue MemoryBarrier command." NL
4244				  "6. Dispatch Kernel2 that read/write from/to these resources." NL
4245				  "7. Verify that content of all resources is as expected.";
4246	}
4247	virtual std::string PassCriteria()
4248	{
4249		return NL "Everything works as expected.";
4250	}
4251
4252	GLuint m_program[3];
4253	GLuint m_storage_buffer[4];
4254	GLuint m_counter_buffer;
4255	GLuint m_texture;
4256	GLuint m_fbo;
4257
4258	std::string Common()
4259	{
4260		return NL "precision highp image2D;" NL "struct S0 {" NL "  int m0[8];" NL "};" NL "struct S1 {" NL
4261				  "  S0 m0[8];" NL "};" NL "layout(binding = 0, std430) buffer Buffer0 {" NL "  int m0[5];" NL
4262				  "  S1 m1[8];" NL "} g_buffer0;" NL "layout(binding = 1, std430) buffer Buffer1 {" NL
4263				  "  uint data[8];" NL "} g_buffer1;" NL "layout(binding = 2, std430) buffer Buffer2 {" NL
4264				  "  int data[256];" NL "} g_buffer2;" NL "layout(binding = 3, std430) buffer Buffer3 {" NL
4265				  "  int data[256];" NL "} g_buffer3;" NL "layout(binding = 4, std430) buffer Buffer4 {" NL
4266				  "  mat4 data0;" NL "  mat4 data1;" NL "} g_buffer4;" NL
4267				  "layout(binding = 0, rgba8) writeonly uniform mediump image2D g_image0;" NL
4268				  "layout(binding = 0, offset = 8) uniform atomic_uint g_counter[2];";
4269	}
4270	std::string GenGLSL(int p)
4271	{
4272		std::stringstream ss;
4273		ss << Common();
4274		if (p == 0)
4275		{
4276			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;" NL
4277					 "void UpdateBuffer0(uvec3 id, int add_val) {" NL "  if (id.x < 8u && id.y < 8u && id.z < 8u) {" NL
4278					 "    g_buffer0.m1[id.z].m0[id.y].m0[id.x] += add_val;" NL "  }" NL "}" NL
4279					 "uniform int g_add_value;" NL "uniform uint g_counter_y;" NL "uniform vec4 g_image_value;" NL
4280					 "void main() {" NL "  uvec3 id = gl_GlobalInvocationID;" NL "  UpdateBuffer0(id, 1);" NL
4281					 "  UpdateBuffer0(id, g_add_value);" NL "  if (id == uvec3(1, g_counter_y, 1)) {" NL
4282					 "    uint idx = atomicCounterIncrement(g_counter[1]);" NL "    g_buffer1.data[idx] = idx;" NL
4283					 "    idx = atomicCounterIncrement(g_counter[1]);" NL "    g_buffer1.data[idx] = idx;" NL "  }" NL
4284					 "  if (id.x < 4u && id.y < 4u && id.z == 0u) {" NL
4285					 "    imageStore(g_image0, ivec2(id.xy), g_image_value);" NL "  }" NL
4286					 "  if (id.x < 2u && id.y == 0u && id.z == 0u) {" NL
4287					 "    g_buffer2.data[id.x] -= int(g_counter_y);" NL "  }" NL "}";
4288		}
4289		else if (p == 1)
4290		{
4291			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 1) in;"
4292				// translation matrix
4293				NL "uniform mat4 g_mvp;" NL "void main() {" NL "  if (gl_GlobalInvocationID == uvec3(0)) {" NL
4294					 "    g_buffer4.data0 *= g_mvp;" NL "  }" NL "  if (gl_WorkGroupID == uvec3(0)) {" NL
4295					 "    g_buffer4.data1[gl_LocalInvocationID.y][gl_LocalInvocationID.x] = "
4296					 "g_mvp[gl_LocalInvocationID.x][gl_LocalInvocationID.y];" NL "  }" NL "}";
4297		}
4298		else if (p == 2)
4299		{
4300			ss << NL "layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;" NL "void main() {" NL "}";
4301		}
4302		return ss.str();
4303	}
4304	virtual long Setup()
4305	{
4306		memset(m_program, 0, sizeof(m_program));
4307		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
4308		m_counter_buffer = 0;
4309		m_texture		 = 0;
4310		m_fbo			 = 0;
4311		return NO_ERROR;
4312	}
4313	virtual long Run()
4314	{
4315		using namespace tcu;
4316
4317		for (int i = 0; i < 3; ++i)
4318		{
4319			m_program[i] = CreateComputeProgram(GenGLSL(i));
4320			glLinkProgram(m_program[i]);
4321			if (i == 0)
4322			{
4323				glUseProgram(m_program[i]);
4324				glUniform1i(glGetUniformLocation(m_program[i], "g_add_value"), 1);
4325				glUniform1ui(glGetUniformLocation(m_program[i], "g_counter_y"), 1u);
4326				glUniform4f(glGetUniformLocation(m_program[i], "g_image_value"), 0.25f, 0.5f, 0.75f, 1.0f);
4327				glUseProgram(0);
4328			}
4329			else if (i == 1)
4330			{
4331				glUseProgram(m_program[i]);
4332				GLfloat values[16] = { 1.0f, 0.0f, 0.0f, 0.0f, 0.0f,  1.0f,  0.0f,  0.0f,
4333									   0.0f, 0.0f, 1.0f, 0.0f, 10.0f, 20.0f, 30.0f, 1.0f };
4334				glUniformMatrix4fv(glGetUniformLocation(m_program[i], "g_mvp"), 1, GL_FALSE, values);
4335				glUseProgram(0);
4336			}
4337			if (!CheckProgram(m_program[i]))
4338				return ERROR;
4339		}
4340
4341		glGenBuffers(4, m_storage_buffer);
4342		/* storage buffer 0 */
4343		{
4344			std::vector<int> data(5 + 8 * 8 * 8);
4345			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer[0]);
4346			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(int)), &data[0], GL_STATIC_COPY);
4347		}
4348		/* storage buffer 1 */
4349		{
4350			const GLuint data[8] = { 0 };
4351			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storage_buffer[1]);
4352			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), data, GL_STATIC_COPY);
4353		}
4354		/* storage buffer 2 & 3 */
4355		{
4356			std::vector<GLint> data(512, 7);
4357			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[2]);
4358			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(GLint)), &data[0], GL_STATIC_COPY);
4359
4360			glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 2, m_storage_buffer[2], 0,
4361							  (GLsizeiptr)(sizeof(GLint) * data.size() / 2));
4362			glBindBufferRange(GL_SHADER_STORAGE_BUFFER, 3, m_storage_buffer[2],
4363							  (GLintptr)(sizeof(GLint) * data.size() / 2),
4364							  (GLsizeiptr)(sizeof(GLint) * data.size() / 2));
4365		}
4366		/* storage buffer 4 */
4367		{
4368			std::vector<mat4> data(2);
4369			data[0] = mat4(1);
4370			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, m_storage_buffer[3]);
4371			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(data.size() * sizeof(mat4)), &data[0], GL_STATIC_COPY);
4372		}
4373		/* counter buffer */
4374		{
4375			GLuint data[4] = { 0 };
4376			glGenBuffers(1, &m_counter_buffer);
4377			glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counter_buffer);
4378			glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(data), data, GL_STATIC_COPY);
4379		}
4380		/* texture */
4381		{
4382			std::vector<GLint> data(4 * 4 * 4, 0);
4383			glGenTextures(1, &m_texture);
4384			glBindTexture(GL_TEXTURE_2D, m_texture);
4385			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
4386			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
4387			glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 4, 4);
4388			glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
4389			glBindTexture(GL_TEXTURE_2D, 0);
4390		}
4391
4392		glUseProgram(m_program[0]);
4393		glBindImageTexture(0, m_texture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_RGBA8);
4394		glDispatchCompute(2, 2, 2);
4395		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4396		glDispatchCompute(3, 2, 2);
4397
4398		glUseProgram(m_program[1]);
4399		glDispatchCompute(4, 3, 7);
4400
4401		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_TEXTURE_UPDATE_BARRIER_BIT |
4402						GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4403
4404		long error = NO_ERROR;
4405		/* validate storage buffer 0 */
4406		{
4407			int* data;
4408			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[0]);
4409			data = static_cast<int*>(
4410				glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(int) * (5 + 8 * 8 * 8), GL_MAP_READ_BIT));
4411			for (std::size_t i = 5; i < 5 + 8 * 8 * 8; ++i)
4412			{
4413				if (data[i] != 4)
4414				{
4415					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i]
4416														<< " should be: 2." << tcu::TestLog::EndMessage;
4417					error = ERROR;
4418				}
4419			}
4420			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4421			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4422		}
4423		/* validate storage buffer 1 */
4424		{
4425			GLuint* data;
4426			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[1]);
4427			data = static_cast<GLuint*>(
4428				glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * 8, GL_MAP_READ_BIT));
4429			for (GLuint i = 0; i < 4; ++i)
4430			{
4431				if (data[i] != i)
4432				{
4433					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i]
4434														<< " should be: " << i << tcu::TestLog::EndMessage;
4435					error = ERROR;
4436				}
4437			}
4438			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4439			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4440		}
4441		/* validate storage buffer 2 & 3 */
4442		{
4443			GLint* data;
4444			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[2]);
4445			data = static_cast<GLint*>(
4446				glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint) * 512, GL_MAP_READ_BIT));
4447			for (int i = 0; i < 2; ++i)
4448			{
4449				if (data[i] != 5)
4450				{
4451					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i]
4452														<< " should be: 5." << tcu::TestLog::EndMessage;
4453					error = ERROR;
4454				}
4455				if (data[i + 256] != 7)
4456				{
4457					m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[i + 256]
4458														<< " should be: 7." << tcu::TestLog::EndMessage;
4459					error = ERROR;
4460				}
4461			}
4462			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4463			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4464		}
4465		/* validate storage buffer 4 */
4466		{
4467			mat4* data;
4468			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[3]);
4469			data = static_cast<mat4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(mat4) * 2, GL_MAP_READ_BIT));
4470			if (transpose(data[1]) != translationMatrix(vec3(10.0f, 20.0f, 30.0f)))
4471			{
4472				m_context.getTestContext().getLog()
4473					<< tcu::TestLog::Message << "Data is incorrect." << tcu::TestLog::EndMessage;
4474				error = ERROR;
4475			}
4476			if (transpose(data[0]) != transpose(translationMatrix(vec3(10.0f, 20.0f, 30.0f))))
4477			{
4478				m_context.getTestContext().getLog()
4479					<< tcu::TestLog::Message << "Data is incorrect." << tcu::TestLog::EndMessage;
4480				error = ERROR;
4481			}
4482			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4483			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4484		}
4485		/* validate counter buffer */
4486		{
4487			GLuint* data;
4488			data = static_cast<GLuint*>(
4489				glMapBufferRange(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint) * 4, GL_MAP_READ_BIT));
4490			if (data[3] != 4)
4491			{
4492				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is: " << data[3]
4493													<< " should be: " << 4 << tcu::TestLog::EndMessage;
4494				error = ERROR;
4495			}
4496			glUnmapBuffer(GL_ATOMIC_COUNTER_BUFFER);
4497		}
4498		/* validate texture */
4499		{
4500			std::vector<vec4> data(4 * 4);
4501			glBindTexture(GL_TEXTURE_2D, m_texture);
4502			glGenFramebuffers(1, &m_fbo);
4503			glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
4504			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture, 0);
4505			std::vector<GLubyte> colorData(4 * 4 * 4);
4506			glReadPixels(0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
4507			for (int i = 0; i < 4 * 4 * 4; i += 4)
4508			{
4509				data[i / 4] =
4510					vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
4511						 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
4512			}
4513			vec4 epsilon = vec4(1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f); // texture format is RGBA8.
4514			for (std::size_t i = 0; i < data.size(); ++i)
4515			{
4516				if (!ColorEqual(data[i], vec4(0.25f, 0.5f, 0.75f, 1.0f), epsilon))
4517				{
4518					m_context.getTestContext().getLog()
4519						<< tcu::TestLog::Message << "Invalid data at texture." << tcu::TestLog::EndMessage;
4520					return ERROR;
4521				}
4522			}
4523		}
4524
4525		return error;
4526	}
4527	virtual long Cleanup()
4528	{
4529		glUseProgram(0);
4530		for (int i = 0; i < 3; ++i)
4531			glDeleteProgram(m_program[i]);
4532		glDeleteBuffers(4, m_storage_buffer);
4533		glDeleteBuffers(1, &m_counter_buffer);
4534		glDeleteTextures(1, &m_texture);
4535		glDeleteFramebuffers(1, &m_fbo);
4536		return NO_ERROR;
4537	}
4538};
4539
4540class AdvancedPipelinePostFS : public ComputeShaderBase
4541{
4542	virtual std::string Title()
4543	{
4544		return NL "CS as an additional pipeline stage - After FS";
4545	}
4546	virtual std::string Purpose()
4547	{
4548		return NL "1. Verify that CS which runs just after FS to do a post-processing on a rendered image works as "
4549				  "expected." NL "2. Verify that CS used as a post-processing filter works as expected." NL
4550				  "3. Verify that several CS kernels which run in a sequence to do a post-processing on a rendered "
4551				  "image works as expected.";
4552	}
4553	virtual std::string Method()
4554	{
4555		return NL
4556			"1. Render image to Texture0 using VS and FS." NL
4557			"2. Use Texture0 as an input to Kernel0 which performs post-processing and writes result to Texture1." NL
4558			"3. Issue MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) command." NL
4559			"4. Use Texture1 as an input to Kernel1 which performs post-processing and writes result to Texture0." NL
4560			"5. Issue MemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT) command." NL
4561			"6. Verify content of the final post-processed image (Texture0).";
4562	}
4563	virtual std::string PassCriteria()
4564	{
4565		return NL "Everything works as expected.";
4566	}
4567
4568	GLuint m_program[3];
4569	GLuint m_render_target[2];
4570	GLuint m_framebuffer;
4571	GLuint m_vertex_array;
4572	GLuint m_fbo;
4573
4574	virtual long Setup()
4575	{
4576		memset(m_program, 0, sizeof(m_program));
4577		memset(m_render_target, 0, sizeof(m_render_target));
4578		m_framebuffer  = 0;
4579		m_vertex_array = 0;
4580		m_fbo		   = 0;
4581		return NO_ERROR;
4582	}
4583
4584	virtual long Run()
4585	{
4586		const char* const glsl_vs =
4587			NL "const mediump vec2 g_vertex[4] = vec2[4](vec2(0.0), vec2(-1.0, -1.0), vec2(3.0, -1.0), vec2(-1.0, "
4588			   "3.0));" NL "void main() {" NL "  gl_Position = vec4(g_vertex[gl_VertexID], 0.0, 1.0);" NL "}";
4589		const char* const glsl_fs = NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL
4590									   "  g_color = vec4(1.0, 0.0, 0.0, 1.0);" NL "}";
4591		m_program[0] = CreateProgram(glsl_vs, glsl_fs);
4592		glLinkProgram(m_program[0]);
4593		if (!CheckProgram(m_program[0]))
4594			return ERROR;
4595
4596		const char* const glsl_cs =
4597			NL "#define TILE_WIDTH 4" NL "#define TILE_HEIGHT 4" NL
4598			   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
4599			   "layout(binding = 0, rgba8) readonly uniform mediump image2D g_input_image;" NL
4600			   "layout(binding = 1, rgba8) writeonly uniform mediump image2D g_output_image;" NL NL
4601			   "layout(local_size_x = TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL				 NL "void main() {" NL
4602			   "  ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL "  ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL NL
4603			   "  if (thread_xy == ivec2(0)) {" NL "    ivec2 pixel_xy = tile_xy * kTileSize;" NL
4604			   "    for (int y = 0; y < TILE_HEIGHT; ++y) {" NL "      for (int x = 0; x < TILE_WIDTH; ++x) {" NL
4605			   "        imageStore(g_output_image, pixel_xy + ivec2(x, y), vec4(0, 1, 0, 1));" NL "      }" NL
4606			   "    }" NL "  }" NL "}";
4607
4608		m_program[1] = CreateComputeProgram(glsl_cs);
4609		glLinkProgram(m_program[1]);
4610		if (!CheckProgram(m_program[1]))
4611			return ERROR;
4612
4613		const char* const glsl_cs2 =
4614			NL "#define TILE_WIDTH 8" NL "#define TILE_HEIGHT 8" NL
4615			   "const ivec2 kTileSize = ivec2(TILE_WIDTH, TILE_HEIGHT);" NL NL
4616			   "layout(binding = 0, rgba8) readonly uniform mediump image2D g_input_image;" NL
4617			   "layout(binding = 1, rgba8) writeonly uniform mediump image2D g_output_image;" NL NL
4618			   "layout(local_size_x = TILE_WIDTH, local_size_y=TILE_HEIGHT) in;" NL NL "vec4 Process(vec4 ic) {" NL
4619			   "  return ic + vec4(1.0, 0.0, 0.0, 0.0);" NL "}" NL "void main() {" NL
4620			   "  ivec2 tile_xy = ivec2(gl_WorkGroupID);" NL "  ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
4621			   "  ivec2 pixel_xy = tile_xy * kTileSize + thread_xy;" NL
4622			   "  vec4 ic = imageLoad(g_input_image, pixel_xy);" NL
4623			   "  imageStore(g_output_image, pixel_xy, Process(ic));" NL "}";
4624
4625		m_program[2] = CreateComputeProgram(glsl_cs2);
4626		glLinkProgram(m_program[2]);
4627		if (!CheckProgram(m_program[2]))
4628			return ERROR;
4629
4630		glGenVertexArrays(1, &m_vertex_array);
4631
4632		/* init render targets */
4633		{
4634			std::vector<GLint> data(128 * 128 * 4);
4635			glGenTextures(2, m_render_target);
4636			for (int i = 0; i < 2; ++i)
4637			{
4638				glBindTexture(GL_TEXTURE_2D, m_render_target[i]);
4639				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
4640				glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 128, 128);
4641				glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 128, 128, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
4642			}
4643			glBindTexture(GL_TEXTURE_2D, 0);
4644		}
4645
4646		glGenFramebuffers(1, &m_framebuffer);
4647		glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
4648		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_render_target[0], 0);
4649		glBindFramebuffer(GL_FRAMEBUFFER, 0);
4650
4651		glBindFramebuffer(GL_FRAMEBUFFER, m_framebuffer);
4652		glUseProgram(m_program[0]);
4653		glBindVertexArray(m_vertex_array);
4654		glClear(GL_COLOR_BUFFER_BIT);
4655		glViewport(0, 0, 128, 128);
4656		// draw full-viewport triangle
4657		glDrawArrays(GL_TRIANGLES, 1,
4658					 3); // note: <first> is 1 this means that gl_VertexID in the VS will be: 1, 2 and 3
4659		glBindFramebuffer(GL_FRAMEBUFFER, 0);
4660
4661		glBindImageTexture(0, m_render_target[0], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);  // input
4662		glBindImageTexture(1, m_render_target[1], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); // output
4663		glUseProgram(m_program[1]);
4664		glDispatchCompute(128 / 4, 128 / 4, 1);
4665
4666		glBindImageTexture(0, m_render_target[1], 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA8);  // input
4667		glBindImageTexture(1, m_render_target[0], 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8); // output
4668		glUseProgram(m_program[2]);
4669		glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
4670		glDispatchCompute(128 / 8, 128 / 8, 1);
4671
4672		/* validate render target */
4673		{
4674			std::vector<vec4> data(128 * 128);
4675			glBindTexture(GL_TEXTURE_2D, m_render_target[0]);
4676			glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
4677			glGenFramebuffers(1, &m_fbo);
4678			glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
4679			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_render_target[0], 0);
4680			std::vector<GLubyte> colorData(128 * 128 * 4);
4681			glReadPixels(0, 0, 128, 128, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
4682			for (int i = 0; i < 128 * 128 * 4; i += 4)
4683			{
4684				data[i / 4] =
4685					vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
4686						 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
4687			}
4688			for (std::size_t i = 0; i < data.size(); ++i)
4689			{
4690				if (!IsEqual(data[i], vec4(1, 1, 0, 1)))
4691				{
4692					m_context.getTestContext().getLog()
4693						<< tcu::TestLog::Message << "Invalid data at index " << i << ": " << data[i].x() << ", "
4694						<< data[i].y() << ", " << data[i].z() << ", " << data[i].w() << tcu::TestLog::EndMessage;
4695					return ERROR;
4696				}
4697			}
4698		}
4699		return NO_ERROR;
4700	}
4701
4702	virtual long Cleanup()
4703	{
4704		glViewport(0, 0, getWindowWidth(), getWindowHeight());
4705		glUseProgram(0);
4706		for (int i = 0; i < 3; ++i)
4707			glDeleteProgram(m_program[i]);
4708		glDeleteTextures(2, m_render_target);
4709		glDeleteVertexArrays(1, &m_vertex_array);
4710		glDeleteFramebuffers(1, &m_framebuffer);
4711		glDeleteFramebuffers(1, &m_fbo);
4712		return NO_ERROR;
4713	}
4714};
4715
4716class AdvancedPipelinePostXFB : public ComputeShaderBase
4717{
4718	virtual std::string Title()
4719	{
4720		return NL "CS as an additional pipeline stage - After XFB";
4721	}
4722	virtual std::string Purpose()
4723	{
4724		return NL "1. Verify that CS which process data fedback by VS works as expected." NL
4725				  "2. Verify that XFB and SSBO works correctly together in one shader." NL
4726				  "3. Verify that 'switch' statment which selects different execution path for each CS thread works as "
4727				  "expected.";
4728	}
4729	virtual std::string Method()
4730	{
4731		return NL "1. Draw triangle with XFB enabled. Some data is written to the XFB buffer." NL
4732				  "2. Use XFB buffer as 'input SSBO' in CS. Process data and write it to 'output SSBO'." NL
4733				  "3. Verify 'output SSBO' content.";
4734	}
4735	virtual std::string PassCriteria()
4736	{
4737		return NL "Everything works as expected.";
4738	}
4739
4740	GLuint m_program[2];
4741	GLuint m_storage_buffer;
4742	GLuint m_xfb_buffer;
4743	GLuint m_vertex_buffer;
4744	GLuint m_vertex_array;
4745
4746	virtual long Setup()
4747	{
4748		memset(m_program, 0, sizeof(m_program));
4749		m_storage_buffer = 0;
4750		m_xfb_buffer	 = 0;
4751		m_vertex_buffer  = 0;
4752		m_vertex_array   = 0;
4753		return NO_ERROR;
4754	}
4755	virtual long Run()
4756	{
4757		GLint res;
4758		glGetIntegerv(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, &res);
4759		if (res <= 0)
4760		{
4761			OutputNotSupported("GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS <= 0");
4762			return NOT_SUPPORTED;
4763		}
4764
4765		const char* const glsl_vs =
4766			NL "layout(location = 0) in mediump vec4 g_position;" NL "layout(location = 1) in mediump vec4 g_color;" NL
4767			   "struct Vertex {" NL "  mediump vec4 position;" NL "  mediump vec4 color;" NL "};" NL
4768			   "flat out mediump vec4 color;" NL "layout(binding = 0) buffer StageData {" NL "  Vertex vertex[];" NL
4769			   "} g_vs_buffer;" NL "void main() {" NL "  gl_Position = g_position;" NL "  color = g_color;" NL
4770			   "  g_vs_buffer.vertex[gl_VertexID].position = g_position;" NL
4771			   "  g_vs_buffer.vertex[gl_VertexID].color = g_color;" NL "}";
4772		const char* const glsl_fs =
4773			NL "flat mediump in vec4 color;" NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL
4774			   "  g_color = color;" NL "}";
4775		m_program[0] = CreateProgram(glsl_vs, glsl_fs);
4776		/* setup xfb varyings */
4777		{
4778			const char* const var[2] = { "gl_Position", "color" };
4779			glTransformFeedbackVaryings(m_program[0], 2, var, GL_INTERLEAVED_ATTRIBS);
4780		}
4781		glLinkProgram(m_program[0]);
4782		if (!CheckProgram(m_program[0]))
4783			return ERROR;
4784
4785		const char* const glsl_cs =
4786			NL "layout(local_size_x = 3) in;" NL "struct Vertex {" NL "  vec4 position;" NL "  vec4 color;" NL "};" NL
4787			   "layout(binding = 3, std430) buffer Buffer {" NL "  Vertex g_vertex[3];" NL "};" NL
4788			   "uniform vec4 g_color1;" NL "uniform int g_two;" NL "void UpdateVertex2(int i) {" NL
4789			   "  g_vertex[i].color -= vec4(-1, 1, 0, 0);" NL "}" NL "void main() {" NL
4790			   "  switch (gl_GlobalInvocationID.x) {" NL
4791			   "    case 0u: g_vertex[gl_GlobalInvocationID.x].color += vec4(1, 0, 0, 0); break;" NL
4792			   "    case 1u: g_vertex[1].color += g_color1; break;" NL "    case 2u: UpdateVertex2(g_two); break;" NL
4793			   "    default: return;" NL "  }" NL "}";
4794		m_program[1] = CreateComputeProgram(glsl_cs);
4795		glLinkProgram(m_program[1]);
4796		glUseProgram(m_program[1]);
4797		glUniform4f(glGetUniformLocation(m_program[1], "g_color1"), 0.f, 0.f, 1.f, 0.f);
4798		glUniform1i(glGetUniformLocation(m_program[1], "g_two"), 2);
4799		glUseProgram(0);
4800		if (!CheckProgram(m_program[1]))
4801			return ERROR;
4802
4803		glGenBuffers(1, &m_storage_buffer);
4804		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
4805		glBufferData(GL_SHADER_STORAGE_BUFFER, 3 * sizeof(vec4) * 2, NULL, GL_STATIC_COPY);
4806
4807		glGenBuffers(1, &m_xfb_buffer);
4808		glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, m_xfb_buffer);
4809		glBufferData(GL_TRANSFORM_FEEDBACK_BUFFER, 3 * sizeof(vec4) * 2, NULL, GL_STREAM_COPY);
4810
4811		const float in_data[3 * 8] = { -1, -1, 0, 1, 0, 1, 0, 1, 3, -1, 0, 1, 0, 1, 0, 1, -1, 3, 0, 1, 0, 1, 0, 1 };
4812		glGenBuffers(1, &m_vertex_buffer);
4813		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4814		glBufferData(GL_ARRAY_BUFFER, sizeof(in_data), in_data, GL_STATIC_DRAW);
4815		glBindBuffer(GL_ARRAY_BUFFER, 0);
4816
4817		glGenVertexArrays(1, &m_vertex_array);
4818		glBindVertexArray(m_vertex_array);
4819		glBindBuffer(GL_ARRAY_BUFFER, m_vertex_buffer);
4820		glVertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), 0);
4821		glVertexAttribPointer(1, 4, GL_FLOAT, GL_FALSE, 2 * sizeof(vec4), reinterpret_cast<void*>(sizeof(vec4)));
4822		glBindBuffer(GL_ARRAY_BUFFER, 0);
4823		glEnableVertexAttribArray(0);
4824		glEnableVertexAttribArray(1);
4825		glBindVertexArray(0);
4826
4827		glClear(GL_COLOR_BUFFER_BIT);
4828		glUseProgram(m_program[0]);
4829		glBindVertexArray(m_vertex_array);
4830		glBeginTransformFeedback(GL_TRIANGLES);
4831		glDrawArrays(GL_TRIANGLES, 0, 3);
4832		glEndTransformFeedback();
4833
4834		glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
4835
4836		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, m_xfb_buffer);
4837		glUseProgram(m_program[1]);
4838		glDispatchCompute(1, 1, 1);
4839
4840		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
4841
4842		long error = NO_ERROR;
4843		/* validate storage buffer */
4844		{
4845			float* data;
4846			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
4847			data = static_cast<float*>(
4848				glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(float) * 3 * 8, GL_MAP_READ_BIT));
4849			if (memcmp(data, in_data, sizeof(float) * 3 * 8) != 0)
4850			{
4851				m_context.getTestContext().getLog()
4852					<< tcu::TestLog::Message << "Data in shader storage buffer is incorrect."
4853					<< tcu::TestLog::EndMessage;
4854				error = ERROR;
4855			}
4856			glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
4857			glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
4858		}
4859		/* validate xfb buffer */
4860		{
4861			const float ref_data[3 * 8] = {
4862				-1, -1, 0, 1, 1, 1, 0, 1, 3, -1, 0, 1, 0, 1, 1, 1, -1, 3, 0, 1, 1, 0, 0, 1
4863			};
4864			float* data;
4865			data = static_cast<float*>(
4866				glMapBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, 0, sizeof(float) * 3 * 8, GL_MAP_READ_BIT));
4867			if (memcmp(data, ref_data, sizeof(float) * 3 * 8) != 0)
4868			{
4869				m_context.getTestContext().getLog()
4870					<< tcu::TestLog::Message << "Data in xfb buffer is incorrect." << tcu::TestLog::EndMessage;
4871				error = ERROR;
4872			}
4873			glUnmapBuffer(GL_TRANSFORM_FEEDBACK_BUFFER);
4874			glBindBuffer(GL_TRANSFORM_FEEDBACK_BUFFER, 0);
4875		}
4876		if (!ValidateReadBuffer(0, 0, getWindowWidth(), getWindowHeight(), vec4(0, 1, 0, 1)))
4877		{
4878			error = ERROR;
4879		}
4880		return error;
4881	}
4882	virtual long Cleanup()
4883	{
4884		glUseProgram(0);
4885		for (int i = 0; i < 2; ++i)
4886			glDeleteProgram(m_program[i]);
4887		glDeleteBuffers(1, &m_vertex_buffer);
4888		glDeleteBuffers(1, &m_storage_buffer);
4889		glDeleteBuffers(1, &m_xfb_buffer);
4890		glDeleteVertexArrays(1, &m_vertex_array);
4891		return NO_ERROR;
4892	}
4893};
4894
4895class AdvancedSharedIndexing : public ComputeShaderBase
4896{
4897	virtual std::string Title()
4898	{
4899		return NL "Shared Memory - Indexing";
4900	}
4901	virtual std::string Purpose()
4902	{
4903		return NL "1. Verify that indexing various types of shared memory works as expected." NL
4904				  "2. Verify that indexing shared memory with different types of expressions work as expected." NL
4905				  "3. Verify that all declaration types of shared structures are supported by the GLSL compiler.";
4906	}
4907	virtual std::string Method()
4908	{
4909		return NL "1. Create CS which uses shared memory in many different ways." NL
4910				  "2. Write to shared memory using different expressions." NL "3. Validate shared memory content." NL
4911				  "4. Use synchronization primitives (barrier, groupMemoryBarrier) where applicable.";
4912	}
4913	virtual std::string PassCriteria()
4914	{
4915		return NL "Everyting works as expected.";
4916	}
4917
4918	GLuint m_program;
4919	GLuint m_texture;
4920	GLuint m_fbo;
4921
4922	virtual long Setup()
4923	{
4924		m_program = 0;
4925		m_texture = 0;
4926		m_fbo	 = 0;
4927		return NO_ERROR;
4928	}
4929	virtual long Run()
4930	{
4931		const char* const glsl_cs =
4932			NL "layout(binding = 3, rgba8) uniform mediump writeonly image2D g_result_image;" NL
4933			   "layout (local_size_x = 4,local_size_y=4 ) in;" NL "shared vec4 g_shared1[4];" NL
4934			   "shared mat4 g_shared2;" NL "shared struct {" NL "  float data[4];" NL "} g_shared3[4];" NL
4935			   "shared struct Type { float data[4]; } g_shared4[4];" NL "shared Type g_shared5[4];" NL
4936			   "uniform bool g_true;" NL "uniform float g_values[16];" NL NL "void Sync() {" NL
4937			   "  groupMemoryBarrier();" NL "  barrier();" NL "}" NL "void SetMemory(ivec2 xy, float value) {" NL
4938			   "  g_shared1[xy.y][gl_LocalInvocationID.x] = value;" NL "  g_shared2[xy.y][xy.x] = value;" NL
4939			   "  g_shared3[xy[1]].data[xy[0]] = value;" NL "  g_shared4[xy.y].data[xy[0]] = value;" NL
4940			   "  g_shared5[gl_LocalInvocationID.y].data[gl_LocalInvocationID.x] = value;" NL "}" NL
4941			   "bool CheckMemory(ivec2 xy, float expected) {" NL
4942			   "  if (g_shared1[xy.y][xy[0]] != expected) return false;" NL
4943			   "  if (g_shared2[xy[1]][xy[0]] != expected) return false;" NL
4944			   "  if (g_shared3[gl_LocalInvocationID.y].data[gl_LocalInvocationID.x] != expected) return false;" NL
4945			   "  if (g_shared4[gl_LocalInvocationID.y].data[xy.x] != expected) return false;" NL
4946			   "  if (g_shared5[xy.y].data[xy.x] != expected) return false;" NL "  return true;" NL "}" NL
4947			   "void main() {" NL "  ivec2 thread_xy = ivec2(gl_LocalInvocationID);" NL
4948			   "  vec4 result = vec4(0.0, 1.0, 0.0, 1.0);" NL NL
4949			   "  SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 1.0);" NL "  Sync();" NL
4950			   "  if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 1.0)) result = vec4(1.0, 0.0, 0.0, "
4951			   "1.0);" NL NL "  SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * -1.0);" NL "  Sync();" NL
4952			   "  if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * -1.0)) result = vec4(1.0, 0.0, 0.0, "
4953			   "1.0);" NL NL "  if (g_true && gl_LocalInvocationID.x < 10u) {" NL
4954			   "    SetMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 7.0);" NL "    Sync();" NL
4955			   "    if (!CheckMemory(thread_xy, g_values[gl_LocalInvocationIndex] * 7.0)) result = vec4(1.0, 0.0, 0.0, "
4956			   "1.0);" NL "  }" NL NL "  imageStore(g_result_image, thread_xy, result);" NL "}";
4957
4958		m_program = CreateComputeProgram(glsl_cs);
4959		glLinkProgram(m_program);
4960		if (!CheckProgram(m_program))
4961			return ERROR;
4962
4963		/* init texture */
4964		{
4965			std::vector<GLint> data(4 * 4 * 4);
4966			glGenTextures(1, &m_texture);
4967			glBindTexture(GL_TEXTURE_2D, m_texture);
4968			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
4969			glTexStorage2D(GL_TEXTURE_2D, 1, GL_RGBA8, 4, 4);
4970			glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]);
4971			glBindTexture(GL_TEXTURE_2D, 0);
4972		}
4973
4974		glBindImageTexture(3, m_texture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8);
4975		glUseProgram(m_program);
4976		glUniform1i(glGetUniformLocation(m_program, "g_true"), GL_TRUE);
4977		GLfloat values[16] = { 1.f, 2.f, 3.f, 4.f, 5.f, 6.f, 7.f, 8.f, 9.f, 10.f, 11.f, 12.f, 13.f, 14.f, 15.f, 16.f };
4978		glUniform1fv(glGetUniformLocation(m_program, "g_values"), 16, values);
4979		glDispatchCompute(1, 1, 1);
4980
4981		/* validate render target */
4982		{
4983			std::vector<vec4> data(4 * 4);
4984			glBindTexture(GL_TEXTURE_2D, m_texture);
4985			glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
4986			glGenFramebuffers(1, &m_fbo);
4987			glBindFramebuffer(GL_FRAMEBUFFER, m_fbo);
4988			glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, m_texture, 0);
4989			std::vector<GLubyte> colorData(4 * 4 * 4);
4990			glReadPixels(0, 0, 4, 4, GL_RGBA, GL_UNSIGNED_BYTE, &colorData[0]);
4991			for (int i = 0; i < 4 * 4 * 4; i += 4)
4992			{
4993				data[i / 4] =
4994					vec4(static_cast<GLfloat>(colorData[i] / 255.), static_cast<GLfloat>(colorData[i + 1] / 255.),
4995						 static_cast<GLfloat>(colorData[i + 2] / 255.), static_cast<GLfloat>(colorData[i + 3] / 255.));
4996			}
4997			for (std::size_t i = 0; i < data.size(); ++i)
4998			{
4999				if (!IsEqual(data[i], vec4(0, 1, 0, 1)))
5000				{
5001					m_context.getTestContext().getLog()
5002						<< tcu::TestLog::Message << "Invalid data at index " << i << tcu::TestLog::EndMessage;
5003					return ERROR;
5004				}
5005			}
5006		}
5007		return NO_ERROR;
5008	}
5009	virtual long Cleanup()
5010	{
5011		glUseProgram(0);
5012		glDeleteProgram(m_program);
5013		glDeleteTextures(1, &m_texture);
5014		glDeleteFramebuffers(1, &m_fbo);
5015		return NO_ERROR;
5016	}
5017};
5018
5019class AdvancedSharedMax : public ComputeShaderBase
5020{
5021	virtual std::string Title()
5022	{
5023		return NL "Shared Memory - 16K";
5024	}
5025	virtual std::string Purpose()
5026	{
5027		return NL "Support for 16K of shared memory is required by the OpenGL specifaction. Verify if an "
5028				  "implementation supports it.";
5029	}
5030	virtual std::string Method()
5031	{
5032		return NL "Create and dispatch CS which uses 16K of shared memory.";
5033	}
5034	virtual std::string PassCriteria()
5035	{
5036		return NL "Everything works as expected.";
5037	}
5038
5039	GLuint m_program;
5040	GLuint m_buffer;
5041
5042	virtual long Setup()
5043	{
5044		m_program = 0;
5045		m_buffer  = 0;
5046		return NO_ERROR;
5047	}
5048	virtual long Run()
5049	{
5050		const char* const glsl_cs = NL
5051			"layout(local_size_x = 64) in;" NL
5052			"shared struct Type { vec4 v[16]; } g_shared[64];" // 16384 bytes of shared memory
5053			NL "layout(std430) buffer Output {" NL "  Type g_output[64];" NL "};" NL NL "void main() {" NL
5054			"  int id = int(gl_GlobalInvocationID.x);" NL
5055			"  g_shared[id].v = vec4[16](vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), "
5056			"vec4(1.0)," NL "                            vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), vec4(1.0), "
5057			"vec4(1.0), vec4(1.0), vec4(1.0));" NL "  memoryBarrierShared();" NL "  barrier();" NL NL
5058			"  vec4 sum = vec4(0.0);" NL "  int sum_count = 0;" NL "  for (int i = id - 6; i < id + 9; ++i) {" NL
5059			"    if (id >= 0 && id < g_shared.length()) {" NL "      sum += g_shared[id].v[0];" NL
5060			"      sum += g_shared[id].v[1];" NL "      sum += g_shared[id].v[2];" NL
5061			"      sum += g_shared[id].v[3];" NL "      sum += g_shared[id].v[4];" NL
5062			"      sum += g_shared[id].v[5];" NL "      sum += g_shared[id].v[6];" NL
5063			"      sum += g_shared[id].v[7];" NL "      sum += g_shared[id].v[8];" NL
5064			"      sum += g_shared[id].v[9];" NL "      sum += g_shared[id].v[10];" NL
5065			"      sum += g_shared[id].v[11];" NL "      sum += g_shared[id].v[12];" NL
5066			"      sum += g_shared[id].v[13];" NL "      sum += g_shared[id].v[14];" NL
5067			"      sum += g_shared[id].v[15];" NL "      sum_count += 16;" NL "    }" NL "  }" NL
5068			"  sum = abs((sum / float(sum_count)) - vec4(1.0));" NL
5069			"  if (sum.x > 0.0000001f || sum.y > 0.0000001f || sum.z > 0.0000001f || sum.w > 0.0000001f) return;" NL NL
5070			"  g_output[id] = g_shared[id];" NL "}";
5071
5072		m_program = CreateComputeProgram(glsl_cs);
5073		glLinkProgram(m_program);
5074		if (!CheckProgram(m_program))
5075			return ERROR;
5076
5077		/* init buffer */
5078		{
5079			std::vector<vec4> data(1024);
5080			glGenBuffers(1, &m_buffer);
5081			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_buffer);
5082			glBufferData(GL_SHADER_STORAGE_BUFFER, (GLsizeiptr)(sizeof(vec4) * data.size()), &data[0][0],
5083						 GL_DYNAMIC_COPY);
5084		}
5085
5086		glUseProgram(m_program);
5087		glDispatchCompute(1, 1, 1);
5088		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5089
5090		long error = NO_ERROR;
5091		/* validate buffer */
5092		{
5093			vec4* data;
5094			data =
5095				static_cast<vec4*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(vec4) * 1024, GL_MAP_READ_BIT));
5096			for (std::size_t i = 0; i < 1024; ++i)
5097			{
5098				if (!IsEqual(data[i], vec4(1.0f)))
5099				{
5100					m_context.getTestContext().getLog()
5101						<< tcu::TestLog::Message << "Invalid data at index " << i << tcu::TestLog::EndMessage;
5102					error = ERROR;
5103				}
5104			}
5105		}
5106		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
5107		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
5108		return error;
5109	}
5110	virtual long Cleanup()
5111	{
5112		glUseProgram(0);
5113		glDeleteProgram(m_program);
5114		glDeleteBuffers(1, &m_buffer);
5115		return NO_ERROR;
5116	}
5117};
5118
5119class AdvancedResourcesMax : public ComputeShaderBase
5120{
5121	virtual std::string Title()
5122	{
5123		return NL "Maximum number of resources in one shader";
5124	}
5125	virtual std::string Purpose()
5126	{
5127		return NL "1. Verify that using 4 SSBOs, 12 UBOs, 8 atomic counters" NL "   in one CS works as expected.";
5128	}
5129	virtual std::string Method()
5130	{
5131		return NL "Create and dispatch CS. Verify result.";
5132	}
5133	virtual std::string PassCriteria()
5134	{
5135		return NL "Everything works as expected.";
5136	}
5137
5138	GLuint m_program;
5139	GLuint m_storage_buffer[4];
5140	GLuint m_uniform_buffer[12];
5141	GLuint m_atomic_buffer;
5142
5143	bool RunIteration(GLuint index)
5144	{
5145		for (GLuint i = 0; i < 4; ++i)
5146		{
5147			const GLuint data = i + 1;
5148			glBindBufferBase(GL_SHADER_STORAGE_BUFFER, i, m_storage_buffer[i]);
5149			glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
5150		}
5151		for (GLuint i = 0; i < 12; ++i)
5152		{
5153			const GLuint data = i + 1;
5154			glBindBufferBase(GL_UNIFORM_BUFFER, i, m_uniform_buffer[i]);
5155			glBufferData(GL_UNIFORM_BUFFER, sizeof(data), &data, GL_STATIC_DRAW);
5156		}
5157		{
5158			GLuint data[8];
5159			for (GLuint i = 0; i < 8; ++i)
5160				data[i]   = i + 1;
5161			glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_atomic_buffer);
5162			glBufferData(GL_ATOMIC_COUNTER_BUFFER, sizeof(data), &data[0], GL_STATIC_DRAW);
5163		}
5164
5165		glUseProgram(m_program);
5166		glUniform1ui(glGetUniformLocation(m_program, "g_index"), index);
5167		/* uniform array */
5168		{
5169			std::vector<GLuint> data(480);
5170			for (GLuint i = 0; i < static_cast<GLuint>(data.size()); ++i)
5171				data[i]   = i + 1;
5172			glUniform1uiv(glGetUniformLocation(m_program, "g_uniform_def"), static_cast<GLsizei>(data.size()),
5173						  &data[0]);
5174		}
5175		glDispatchCompute(1, 1, 1);
5176		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
5177
5178		bool ret = true;
5179		/* validate buffer */
5180		{
5181			GLuint* data;
5182			glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer[index]);
5183			data = static_cast<GLuint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint), GL_MAP_READ_BIT));
5184			if (data[0] != (index + 1) * 4)
5185			{
5186				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data is " << data[0] << " should be "
5187													<< ((index + 1) * 4) << tcu::TestLog::EndMessage;
5188				ret = false;
5189			}
5190		}
5191		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
5192		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
5193		return ret;
5194	}
5195	virtual long Setup()
5196	{
5197		m_program = 0;
5198		memset(m_storage_buffer, 0, sizeof(m_storage_buffer));
5199		memset(m_uniform_buffer, 0, sizeof(m_uniform_buffer));
5200		m_atomic_buffer = 0;
5201		return NO_ERROR;
5202	}
5203	virtual long Run()
5204	{
5205		const char* const glsl_cs =
5206			NL "layout(local_size_x = 1) in;" NL "layout(std140, binding = 0) buffer ShaderStorageBlock {" NL
5207			   "  uint data;" NL "} g_shader_storage[4];" NL "layout(std140, binding = 0) uniform UniformBlock {" NL
5208			   "  uint data;" NL "} g_uniform[12];" NL
5209			   "layout(binding = 0, offset =  0) uniform atomic_uint g_atomic_counter0;" NL
5210			   "layout(binding = 0, offset =  4) uniform atomic_uint g_atomic_counter1;" NL
5211			   "layout(binding = 0, offset =  8) uniform atomic_uint g_atomic_counter2;" NL
5212			   "layout(binding = 0, offset = 12) uniform atomic_uint g_atomic_counter3;" NL
5213			   "layout(binding = 0, offset = 16) uniform atomic_uint g_atomic_counter4;" NL
5214			   "layout(binding = 0, offset = 20) uniform atomic_uint g_atomic_counter5;" NL
5215			   "layout(binding = 0, offset = 24) uniform atomic_uint g_atomic_counter6;" NL
5216			   "layout(binding = 0, offset = 28) uniform atomic_uint g_atomic_counter7;" NL
5217			   "uniform uint g_uniform_def[480];" NL "uniform uint g_index;" NL NL "uint Add() {" NL
5218			   "  switch (g_index) {" NL "    case 0u: return atomicCounter(g_atomic_counter0);" NL
5219			   "    case 1u: return atomicCounter(g_atomic_counter1);" NL
5220			   "    case 2u: return atomicCounter(g_atomic_counter2);" NL
5221			   "    case 3u: return atomicCounter(g_atomic_counter3);" NL
5222			   "    case 4u: return atomicCounter(g_atomic_counter4);" NL
5223			   "    case 5u: return atomicCounter(g_atomic_counter5);" NL
5224			   "    case 6u: return atomicCounter(g_atomic_counter6);" NL
5225			   "    case 7u: return atomicCounter(g_atomic_counter7);" NL "  }" NL "}" NL "void main() {" NL
5226			   "  switch (g_index) {" NL "    case 0u: {" NL "      g_shader_storage[0].data += g_uniform[0].data;" NL
5227			   "      g_shader_storage[0].data += Add();" NL "      g_shader_storage[0].data += g_uniform_def[0];" NL
5228			   "      break;" NL "    }" NL "    case 1u: {" NL
5229			   "      g_shader_storage[1].data += g_uniform[1].data;" NL "      g_shader_storage[1].data += Add();" NL
5230			   "      g_shader_storage[1].data += g_uniform_def[1];" NL "      break;" NL "    }" NL "    case 2u: {" NL
5231			   "      g_shader_storage[2].data += g_uniform[2].data;" NL "      g_shader_storage[2].data += Add();" NL
5232			   "      g_shader_storage[2].data += g_uniform_def[2];" NL "      break;" NL "    }" NL "    case 3u: {" NL
5233			   "      g_shader_storage[3].data += g_uniform[3].data;" NL "      g_shader_storage[3].data += Add();" NL
5234			   "      g_shader_storage[3].data += g_uniform_def[3];" NL "      break;" NL "    }" NL "  }" NL "}";
5235		m_program = CreateComputeProgram(glsl_cs);
5236		glLinkProgram(m_program);
5237		if (!CheckProgram(m_program))
5238			return ERROR;
5239
5240		glGenBuffers(4, m_storage_buffer);
5241		glGenBuffers(12, m_uniform_buffer);
5242		glGenBuffers(1, &m_atomic_buffer);
5243
5244		if (!RunIteration(0))
5245			return ERROR;
5246		if (!RunIteration(1))
5247			return ERROR;
5248		if (!RunIteration(3))
5249			return ERROR;
5250
5251		return NO_ERROR;
5252	}
5253	virtual long Cleanup()
5254	{
5255		glUseProgram(0);
5256		glDeleteProgram(m_program);
5257		glDeleteBuffers(4, m_storage_buffer);
5258		glDeleteBuffers(12, m_uniform_buffer);
5259		glDeleteBuffers(1, &m_atomic_buffer);
5260		return NO_ERROR;
5261	}
5262};
5263
5264class WorkGroupSizeUsage : public ComputeShaderBase
5265{
5266	virtual std::string Title()
5267	{
5268		return NL "gl_WorkGroupSize usage";
5269	}
5270	virtual std::string Purpose()
5271	{
5272		return NL "Verify gl_WorkGroupSize usage rules.";
5273	}
5274	virtual std::string Method()
5275	{
5276		return NL "";
5277	}
5278	virtual std::string PassCriteria()
5279	{
5280		return NL "";
5281	}
5282
5283	virtual long Run()
5284	{
5285		// local group size declared with some dimensions omitted - omitted dimensions should have size of 1
5286		if (!CheckOmittedDimensions('x') || !CheckOmittedDimensions('y') || !CheckOmittedDimensions('z'))
5287			return ERROR;
5288
5289		// check if compilation error is generated when shader doesn't declare
5290		// fixed local group size and tries to use gl_WorkGroupSize
5291		if (!CheckCompilationError("#version 310 es" NL "layout(std430) buffer Output {" NL "  uint g_output;" NL
5292								   "};" NL "void main() {" NL "  g_output = gl_WorkGroupSize.x;" NL "}"))
5293			return ERROR;
5294
5295		// check if compilation error is generated when shader tries using
5296		// gl_WorkGroupSize in a function before declaring local group size
5297		if (!CheckCompilationError("#version 310 es" NL "layout(std430) buffer Output {" NL "  uint g_output;" NL
5298								   "};" NL "void main() {" NL "  g_output = gl_WorkGroupSize.x;" NL "}" NL
5299								   "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;"))
5300			return ERROR;
5301
5302		return NO_ERROR;
5303	}
5304
5305	bool CheckOmittedDimensions(GLchar defined_component)
5306	{
5307		std::stringstream ss;
5308		ss << "layout(std430) buffer Output {" NL "  uint g_output;" NL "};" NL "layout(local_size_"
5309		   << defined_component
5310		   << " = 1) in;" NL "void main() {" NL "  g_output = gl_WorkGroupSize.x + gl_WorkGroupSize.z;" NL "}";
5311
5312		std::string glsl_cs = ss.str();
5313		GLuint		program = CreateComputeProgram(glsl_cs);
5314		glLinkProgram(program);
5315		if (!CheckProgram(program))
5316			return false;
5317
5318		GLint v[3];
5319		glGetProgramiv(program, GL_COMPUTE_WORK_GROUP_SIZE, v);
5320		if (v[0] != 1 || v[1] != 1 || v[2] != 1)
5321		{
5322			m_context.getTestContext().getLog()
5323				<< tcu::TestLog::Message << "Got " << v[0] << ", " << v[1] << ", " << v[2]
5324				<< ", expected: 1, 1, 1 in GL_COMPUTE_WORK_GROUP_SIZE check" << tcu::TestLog::EndMessage;
5325			return false;
5326		}
5327
5328		glDeleteProgram(program);
5329		return true;
5330	}
5331
5332	bool CheckCompilationError(const std::string& source)
5333	{
5334		const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
5335
5336		const char* const src = source.c_str();
5337		glShaderSource(sh, 1, &src, NULL);
5338		glCompileShader(sh);
5339
5340		GLchar log[1024];
5341		glGetShaderInfoLog(sh, sizeof(log), NULL, log);
5342		m_context.getTestContext().getLog() << tcu::TestLog::Message << "Shader Info Log:\n"
5343											<< log << tcu::TestLog::EndMessage;
5344
5345		GLint status;
5346		glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
5347		glDeleteShader(sh);
5348
5349		if (status == GL_TRUE)
5350		{
5351			m_context.getTestContext().getLog()
5352				<< tcu::TestLog::Message << "Compilation should fail." << tcu::TestLog::EndMessage;
5353			return false;
5354		}
5355
5356		return true;
5357	}
5358};
5359
5360class NegativeAPINoActiveProgram : public ComputeShaderBase
5361{
5362	virtual std::string Title()
5363	{
5364		return NL "API errors - no active program";
5365	}
5366	virtual std::string Purpose()
5367	{
5368		return NL "Verify that appropriate errors are generated by the OpenGL API.";
5369	}
5370	virtual std::string Method()
5371	{
5372		return NL "";
5373	}
5374	virtual std::string PassCriteria()
5375	{
5376		return NL "";
5377	}
5378
5379	GLuint m_program;
5380
5381	virtual long Setup()
5382	{
5383		m_program = 0;
5384		return NO_ERROR;
5385	}
5386	virtual long Run()
5387	{
5388		glDispatchCompute(1, 2, 3);
5389		if (glGetError() != GL_INVALID_OPERATION)
5390		{
5391			m_context.getTestContext().getLog()
5392				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
5393											"DispatchComputeIndirect if there is no active program for the compute\n"
5394											"shader stage."
5395				<< tcu::TestLog::EndMessage;
5396			return ERROR;
5397		}
5398
5399		/* indirect dispatch */
5400		{
5401			GLuint		 buffer;
5402			const GLuint num_group[3] = { 3, 2, 1 };
5403			glGenBuffers(1, &buffer);
5404			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
5405			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_group), num_group, GL_STATIC_DRAW);
5406			glDispatchComputeIndirect(0);
5407			glDeleteBuffers(1, &buffer);
5408			if (glGetError() != GL_INVALID_OPERATION)
5409			{
5410				m_context.getTestContext().getLog()
5411					<< tcu::TestLog::Message
5412					<< "INVALID_OPERATION is generated by DispatchCompute or\n"
5413					   "DispatchComputeIndirect if there is no active program for the compute\n"
5414					   "shader stage."
5415					<< tcu::TestLog::EndMessage;
5416				return ERROR;
5417			}
5418		}
5419
5420		const char* const glsl_vs = NL "layout(location = 0) in mediump vec4 g_position;" NL "void main() {" NL
5421									   "  gl_Position = g_position;" NL "}";
5422		const char* const glsl_fs =
5423			NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
5424		m_program = CreateProgram(glsl_vs, glsl_fs);
5425		glLinkProgram(m_program);
5426		if (!CheckProgram(m_program))
5427			return ERROR;
5428
5429		glUseProgram(m_program);
5430
5431		glDispatchCompute(1, 2, 3);
5432		if (glGetError() != GL_INVALID_OPERATION)
5433		{
5434			m_context.getTestContext().getLog()
5435				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by DispatchCompute or\n"
5436											"DispatchComputeIndirect if there is no active program for the compute\n"
5437											"shader stage."
5438				<< tcu::TestLog::EndMessage;
5439			return ERROR;
5440		}
5441
5442		/* indirect dispatch */
5443		{
5444			GLuint		 buffer;
5445			const GLuint num_group[3] = { 3, 2, 1 };
5446			glGenBuffers(1, &buffer);
5447			glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer);
5448			glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_group), num_group, GL_STATIC_DRAW);
5449			glDispatchComputeIndirect(0);
5450			glDeleteBuffers(1, &buffer);
5451			if (glGetError() != GL_INVALID_OPERATION)
5452			{
5453				m_context.getTestContext().getLog()
5454					<< tcu::TestLog::Message
5455					<< "INVALID_OPERATION is generated by DispatchCompute or\n"
5456					   "DispatchComputeIndirect if there is no active program for the compute\n"
5457					   "shader stage."
5458					<< tcu::TestLog::EndMessage;
5459				return ERROR;
5460			}
5461		}
5462
5463		return NO_ERROR;
5464	}
5465	virtual long Cleanup()
5466	{
5467		glUseProgram(0);
5468		glDeleteProgram(m_program);
5469		return NO_ERROR;
5470	}
5471};
5472
5473class NegativeAPIWorkGroupCount : public ComputeShaderBase
5474{
5475	virtual std::string Title()
5476	{
5477		return NL "API errors - invalid work group count";
5478	}
5479	virtual std::string Purpose()
5480	{
5481		return NL "Verify that appropriate errors are generated by the OpenGL API.";
5482	}
5483	virtual std::string Method()
5484	{
5485		return NL "";
5486	}
5487	virtual std::string PassCriteria()
5488	{
5489		return NL "";
5490	}
5491
5492	GLuint m_program;
5493	GLuint m_storage_buffer;
5494
5495	virtual long Setup()
5496	{
5497		m_program		 = 0;
5498		m_storage_buffer = 0;
5499		return NO_ERROR;
5500	}
5501	virtual long Run()
5502	{
5503		const char* const glsl_cs =
5504			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
5505			   "void main() {" NL
5506			   "  g_output[gl_GlobalInvocationID.x * gl_GlobalInvocationID.y * gl_GlobalInvocationID.z] = 0u;" NL "}";
5507		m_program = CreateComputeProgram(glsl_cs);
5508		glLinkProgram(m_program);
5509		if (!CheckProgram(m_program))
5510			return ERROR;
5511
5512		glGenBuffers(1, &m_storage_buffer);
5513		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5514		glBufferData(GL_SHADER_STORAGE_BUFFER, 100000, NULL, GL_DYNAMIC_DRAW);
5515
5516		GLint x, y, z;
5517		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 0, &x);
5518		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 1, &y);
5519		glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_COUNT, 2, &z);
5520
5521		glUseProgram(m_program);
5522
5523		glDispatchCompute(x + 1, 1, 1);
5524		if (glGetError() != GL_INVALID_VALUE)
5525		{
5526			m_context.getTestContext().getLog()
5527				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
5528											"<num_groups_y> or <num_groups_z> is greater than the value of\n"
5529											"MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension."
5530				<< tcu::TestLog::EndMessage;
5531			return ERROR;
5532		}
5533
5534		glDispatchCompute(1, y + 1, 1);
5535		if (glGetError() != GL_INVALID_VALUE)
5536		{
5537			m_context.getTestContext().getLog()
5538				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
5539											"<num_groups_y> or <num_groups_z> is greater than the value of\n"
5540											"MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension."
5541				<< tcu::TestLog::EndMessage;
5542			return ERROR;
5543		}
5544
5545		glDispatchCompute(1, 1, z + 1);
5546		if (glGetError() != GL_INVALID_VALUE)
5547		{
5548			m_context.getTestContext().getLog()
5549				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchCompute if any of <num_groups_x>,\n"
5550											"<num_groups_y> or <num_groups_z> is greater than the value of\n"
5551											"MAX_COMPUTE_WORK_GROUP_COUNT for the corresponding dimension."
5552				<< tcu::TestLog::EndMessage;
5553			return ERROR;
5554		}
5555
5556		return NO_ERROR;
5557	}
5558	virtual long Cleanup()
5559	{
5560		glUseProgram(0);
5561		glDeleteProgram(m_program);
5562		glDeleteBuffers(1, &m_storage_buffer);
5563		return NO_ERROR;
5564	}
5565};
5566
5567class NegativeAPIIndirect : public ComputeShaderBase
5568{
5569	virtual std::string Title()
5570	{
5571		return NL "API errors - incorrect DispatchComputeIndirect usage";
5572	}
5573	virtual std::string Purpose()
5574	{
5575		return NL "Verify that appropriate errors are generated by the OpenGL API.";
5576	}
5577	virtual std::string Method()
5578	{
5579		return NL "";
5580	}
5581	virtual std::string PassCriteria()
5582	{
5583		return NL "";
5584	}
5585
5586	GLuint m_program;
5587	GLuint m_storage_buffer;
5588	GLuint m_dispatch_buffer;
5589
5590	virtual long Setup()
5591	{
5592		m_program		  = 0;
5593		m_storage_buffer  = 0;
5594		m_dispatch_buffer = 0;
5595		return NO_ERROR;
5596	}
5597
5598	virtual long Run()
5599	{
5600		const char* const glsl_cs =
5601			NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL
5602			   "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0u;" NL "}";
5603		m_program = CreateComputeProgram(glsl_cs);
5604		glLinkProgram(m_program);
5605		if (!CheckProgram(m_program))
5606			return ERROR;
5607		glUseProgram(m_program);
5608
5609		glGenBuffers(1, &m_storage_buffer);
5610		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5611		glBufferData(GL_SHADER_STORAGE_BUFFER, 100000, NULL, GL_DYNAMIC_DRAW);
5612
5613		const GLuint num_groups[6] = { 1, 1, 1, 1, 1, 1 };
5614		glGenBuffers(1, &m_dispatch_buffer);
5615		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, m_dispatch_buffer);
5616		glBufferData(GL_DISPATCH_INDIRECT_BUFFER, sizeof(num_groups), num_groups, GL_STATIC_COPY);
5617
5618		glDispatchComputeIndirect(-2);
5619		if (glGetError() != GL_INVALID_VALUE)
5620		{
5621			m_context.getTestContext().getLog()
5622				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchComputeIndirect if <indirect> is\n"
5623											"less than zero or not a multiple of four."
5624				<< tcu::TestLog::EndMessage;
5625			return ERROR;
5626		}
5627
5628		glDispatchComputeIndirect(3);
5629		if (glGetError() != GL_INVALID_VALUE)
5630		{
5631			m_context.getTestContext().getLog()
5632				<< tcu::TestLog::Message << "INVALID_VALUE is generated by DispatchComputeIndirect if <indirect> is\n"
5633											"less than zero or not a multiple of four."
5634				<< tcu::TestLog::EndMessage;
5635			return ERROR;
5636		}
5637
5638		glDispatchComputeIndirect(16);
5639		if (glGetError() != GL_INVALID_OPERATION)
5640		{
5641			m_context.getTestContext().getLog()
5642				<< tcu::TestLog::Message
5643				<< "INVALID_OPERATION is generated by DispatchComputeIndirect if no buffer is\n"
5644				   "bound to DISPATCH_INDIRECT_BUFFER or if the command would source data\n"
5645				   "beyond the end of the bound buffer object."
5646				<< tcu::TestLog::EndMessage;
5647			return ERROR;
5648		}
5649
5650		glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, 0);
5651		glDispatchComputeIndirect(0);
5652		if (glGetError() != GL_INVALID_OPERATION)
5653		{
5654			m_context.getTestContext().getLog()
5655				<< tcu::TestLog::Message
5656				<< "INVALID_OPERATION is generated by DispatchComputeIndirect if no buffer is\n"
5657				   "bound to DISPATCH_INDIRECT_BUFFER or if the command would source data\n"
5658				   "beyond the end of the bound buffer object."
5659				<< tcu::TestLog::EndMessage;
5660			return ERROR;
5661		}
5662
5663		return NO_ERROR;
5664	}
5665	virtual long Cleanup()
5666	{
5667		glUseProgram(0);
5668		glDeleteProgram(m_program);
5669		glDeleteBuffers(1, &m_storage_buffer);
5670		glDeleteBuffers(1, &m_dispatch_buffer);
5671		return NO_ERROR;
5672	}
5673};
5674
5675class NegativeAPIProgram : public ComputeShaderBase
5676{
5677	virtual std::string Title()
5678	{
5679		return NL "API errors - program state";
5680	}
5681	virtual std::string Purpose()
5682	{
5683		return NL "Verify that appropriate errors are generated by the OpenGL API.";
5684	}
5685	virtual std::string Method()
5686	{
5687		return NL "";
5688	}
5689	virtual std::string PassCriteria()
5690	{
5691		return NL "";
5692	}
5693
5694	GLuint m_program;
5695	GLuint m_storage_buffer;
5696
5697	virtual long Setup()
5698	{
5699		m_program		 = 0;
5700		m_storage_buffer = 0;
5701		return NO_ERROR;
5702	}
5703	virtual long Run()
5704	{
5705		const char* const glsl_vs = NL "layout(location = 0) in mediump vec4 g_position;" NL "void main() {" NL
5706									   "  gl_Position = g_position;" NL "}";
5707		const char* const glsl_fs =
5708			NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
5709		m_program = CreateProgram(glsl_vs, glsl_fs);
5710
5711		GLint v[3];
5712		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
5713		if (glGetError() != GL_INVALID_OPERATION)
5714		{
5715			m_context.getTestContext().getLog()
5716				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by GetProgramiv if <pname> is\n"
5717											"COMPUTE_LOCAL_WORK_SIZE and either the program has not been linked\n"
5718											"successfully, or has been linked but contains no compute shaders."
5719				<< tcu::TestLog::EndMessage;
5720			return ERROR;
5721		}
5722
5723		glLinkProgram(m_program);
5724		if (!CheckProgram(m_program))
5725			return ERROR;
5726
5727		glGetProgramiv(m_program, GL_COMPUTE_WORK_GROUP_SIZE, v);
5728		if (glGetError() != GL_INVALID_OPERATION)
5729		{
5730			m_context.getTestContext().getLog()
5731				<< tcu::TestLog::Message << "INVALID_OPERATION is generated by GetProgramiv if <pname> is\n"
5732											"COMPUTE_LOCAL_WORK_SIZE and either the program has not been linked\n"
5733											"successfully, or has been linked but contains no compute shaders."
5734				<< tcu::TestLog::EndMessage;
5735			return ERROR;
5736		}
5737		glDeleteProgram(m_program);
5738
5739		const char* const glsl_cs =
5740			"#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
5741			"  uint g_output[];" NL "};" NL "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
5742		m_program = glCreateProgram();
5743
5744		GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
5745		glAttachShader(m_program, sh);
5746		glDeleteShader(sh);
5747		glShaderSource(sh, 1, &glsl_cs, NULL);
5748		glCompileShader(sh);
5749
5750		sh = glCreateShader(GL_VERTEX_SHADER);
5751		glAttachShader(m_program, sh);
5752		glDeleteShader(sh);
5753		glShaderSource(sh, 1, &glsl_vs, NULL);
5754		glCompileShader(sh);
5755
5756		sh = glCreateShader(GL_FRAGMENT_SHADER);
5757		glAttachShader(m_program, sh);
5758		glDeleteShader(sh);
5759		glShaderSource(sh, 1, &glsl_fs, NULL);
5760		glCompileShader(sh);
5761
5762		glLinkProgram(m_program);
5763		GLint status;
5764		glGetProgramiv(m_program, GL_LINK_STATUS, &status);
5765		if (status == GL_TRUE)
5766		{
5767			m_context.getTestContext().getLog()
5768				<< tcu::TestLog::Message << "LinkProgram will fail if <program> contains a combination of compute and\n"
5769											"non-compute shaders."
5770				<< tcu::TestLog::EndMessage;
5771			return ERROR;
5772		}
5773
5774		return NO_ERROR;
5775	}
5776	virtual long Cleanup()
5777	{
5778		glUseProgram(0);
5779		glDeleteProgram(m_program);
5780		glDeleteBuffers(1, &m_storage_buffer);
5781		return NO_ERROR;
5782	}
5783};
5784
5785class NegativeGLSLCompileTimeErrors : public ComputeShaderBase
5786{
5787	virtual std::string Title()
5788	{
5789		return NL "Compile-time errors";
5790	}
5791	virtual std::string Purpose()
5792	{
5793		return NL "Verify that appropriate errors are generated by the GLSL compiler.";
5794	}
5795	virtual std::string Method()
5796	{
5797		return NL "";
5798	}
5799	virtual std::string PassCriteria()
5800	{
5801		return NL "";
5802	}
5803
5804	static std::string Shader1(int x, int y, int z)
5805	{
5806		std::stringstream ss;
5807		ss << "#version 310 es" NL "layout(local_size_x = " << x << ", local_size_y = " << y << ", local_size_z = " << z
5808		   << ") in;" NL "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
5809			  "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}";
5810		return ss.str();
5811	}
5812	virtual long Run()
5813	{
5814		// gl_GlobalInvocationID requires "#version 310" or later
5815		if (!Compile("#version 300 es" NL "layout(local_size_x = 1) in;" NL "layout(std430) buffer Output {" NL
5816					 "  uint g_output[];" NL "};" NL "void main() {" NL "  g_output[gl_GlobalInvocationID.x] = 0;" NL
5817					 "}"))
5818			return ERROR;
5819
5820		if (!Compile("#version 310 es" NL "layout(local_size_x = 1) in;" NL "layout(local_size_x = 2) in;" NL
5821					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
5822					 "  g_output[gl_GlobalInvocationID.x] = 0;" NL "}"))
5823			return ERROR;
5824
5825		if (!Compile("#version 310 es" NL "layout(local_size_x = 1) in;" NL "in uint x;" NL
5826					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
5827					 "  g_output[gl_GlobalInvocationID.x] = x;" NL "}"))
5828			return ERROR;
5829
5830		if (!Compile("#version 310 es" NL "layout(local_size_x = 1) in;" NL "out uint x;" NL
5831					 "layout(std430) buffer Output {" NL "  uint g_output[];" NL "};" NL "void main() {" NL
5832					 "  g_output[gl_GlobalInvocationID.x] = 0;" NL "  x = 0;" NL "}"))
5833			return ERROR;
5834
5835		{
5836			GLint x, y, z;
5837			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &x);
5838			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &y);
5839			glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &z);
5840
5841			if (!Compile(Shader1(x + 1, 1, 1)))
5842				return ERROR;
5843			if (!Compile(Shader1(1, y + 1, 1)))
5844				return ERROR;
5845			if (!Compile(Shader1(1, 1, z + 1)))
5846				return ERROR;
5847		}
5848
5849		return NO_ERROR;
5850	}
5851
5852	bool Compile(const std::string& source)
5853	{
5854		const GLuint sh = glCreateShader(GL_COMPUTE_SHADER);
5855
5856		const char* const src = source.c_str();
5857		glShaderSource(sh, 1, &src, NULL);
5858		glCompileShader(sh);
5859
5860		GLchar log[1024];
5861		glGetShaderInfoLog(sh, sizeof(log), NULL, log);
5862		m_context.getTestContext().getLog() << tcu::TestLog::Message << "Shader Info Log:\n"
5863											<< log << tcu::TestLog::EndMessage;
5864
5865		GLint status;
5866		glGetShaderiv(sh, GL_COMPILE_STATUS, &status);
5867		glDeleteShader(sh);
5868
5869		if (status == GL_TRUE)
5870		{
5871			m_context.getTestContext().getLog()
5872				<< tcu::TestLog::Message << "Compilation should fail." << tcu::TestLog::EndMessage;
5873			return false;
5874		}
5875
5876		return true;
5877	}
5878};
5879
5880class NegativeGLSLLinkTimeErrors : public ComputeShaderBase
5881{
5882	virtual std::string Title()
5883	{
5884		return NL "Link-time errors";
5885	}
5886	virtual std::string Purpose()
5887	{
5888		return NL "Verify that appropriate errors are generated by the GLSL linker.";
5889	}
5890	virtual std::string Method()
5891	{
5892		return NL "";
5893	}
5894	virtual std::string PassCriteria()
5895	{
5896		return NL "";
5897	}
5898
5899	virtual long Run()
5900	{
5901		const char* const glsl_cs =
5902			NL "layout(local_size_x = 1, local_size_y = 1) in;" NL "layout(std430) buffer Output {" NL "  vec4 data;" NL
5903			   "} g_out;" NL "void main() {" NL "  g_out.data = vec4(1.0, 2.0, 3.0, 4.0);" NL "}";
5904		const char* const glsl_vs = NL "layout(location = 0) in mediump vec4 g_position;" NL "void main() {" NL
5905									   "  gl_Position = g_position;" NL "}";
5906		const char* const glsl_fs =
5907			NL "layout(location = 0) out mediump vec4 g_color;" NL "void main() {" NL "  g_color = vec4(1);" NL "}";
5908
5909		GLuint p = CreateComputeProgram(glsl_cs);
5910
5911		{
5912			const GLuint sh = glCreateShader(GL_VERTEX_SHADER);
5913			glAttachShader(p, sh);
5914			glDeleteShader(sh);
5915			const char* const src[2] = { kGLSLVer, glsl_vs };
5916			glShaderSource(sh, 2, src, NULL);
5917			glCompileShader(sh);
5918		}
5919		{
5920			const GLuint sh = glCreateShader(GL_FRAGMENT_SHADER);
5921			glAttachShader(p, sh);
5922			glDeleteShader(sh);
5923			const char* const src[2] = { kGLSLVer, glsl_fs };
5924			glShaderSource(sh, 2, src, NULL);
5925			glCompileShader(sh);
5926		}
5927		long error = NO_ERROR;
5928		glLinkProgram(p);
5929		if (CheckProgram(p))
5930			error = ERROR;
5931
5932		/* no layout */
5933		const char* const glsl_cs2 = NL "layout(std430) buffer Output {" NL "  vec4 data;" NL "} g_out;" NL
5934										"void main() {" NL "  g_out.data = vec4(1.0, 2.0, 3.0, 4.0);" NL "}";
5935
5936		GLuint p2 = CreateComputeProgram(glsl_cs2);
5937		glLinkProgram(p2);
5938		if (CheckProgram(p2))
5939			error = ERROR;
5940
5941		glDeleteProgram(p);
5942		glDeleteProgram(p2);
5943		return error;
5944	}
5945};
5946
5947class BasicWorkGroupSizeIsConst : public ComputeShaderBase
5948{
5949	virtual std::string Title()
5950	{
5951		return NL "gl_WorkGroupSize is an constant";
5952	}
5953	virtual std::string Purpose()
5954	{
5955		return NL "Verify that gl_WorkGroupSize can be used as an constant expression.";
5956	}
5957	virtual std::string Method()
5958	{
5959		return NL "";
5960	}
5961	virtual std::string PassCriteria()
5962	{
5963		return NL "";
5964	}
5965
5966	GLuint m_program;
5967	GLuint m_storage_buffer;
5968
5969	virtual long Setup()
5970	{
5971		m_program		 = 0;
5972		m_storage_buffer = 0;
5973		return NO_ERROR;
5974	}
5975
5976	virtual long Run()
5977	{
5978		const char* const glsl_cs =
5979			NL "layout(local_size_x = 2, local_size_y = 3, local_size_z = 4) in;" NL
5980			   "layout(std430, binding = 0) buffer Output {" NL "  uint g_buffer[22u + gl_WorkGroupSize.x];" NL "};" NL
5981			   "shared uint g_shared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];" NL
5982			   "uniform uint g_uniform[gl_WorkGroupSize.z + 20u];" NL "void main() {" NL
5983			   "  g_shared[gl_LocalInvocationIndex] = 1U;" NL "  groupMemoryBarrier();" NL "  barrier();" NL
5984			   "  uint sum = 0u;" NL
5985			   "  for (uint i = 0u; i < gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z; ++i) {" NL
5986			   "    sum += g_shared[i];" NL "  }" NL "  sum += g_uniform[gl_LocalInvocationIndex];" NL
5987			   "  g_buffer[gl_LocalInvocationIndex] = sum;" NL "}";
5988		m_program = CreateComputeProgram(glsl_cs);
5989		glLinkProgram(m_program);
5990		if (!CheckProgram(m_program))
5991			return ERROR;
5992
5993		glGenBuffers(1, &m_storage_buffer);
5994		glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storage_buffer);
5995		glBufferData(GL_SHADER_STORAGE_BUFFER, 24 * sizeof(GLuint), NULL, GL_STATIC_DRAW);
5996
5997		glUseProgram(m_program);
5998		GLuint values[24] = { 1u,  2u,  3u,  4u,  5u,  6u,  7u,  8u,  9u,  10u, 11u, 12u,
5999							  13u, 14u, 15u, 16u, 17u, 18u, 19u, 20u, 21u, 22u, 23u, 24u };
6000		glUniform1uiv(glGetUniformLocation(m_program, "g_uniform"), 24, values);
6001		glDispatchCompute(1, 1, 1);
6002		glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
6003
6004		long	error = NO_ERROR;
6005		GLuint* data;
6006		glBindBuffer(GL_SHADER_STORAGE_BUFFER, m_storage_buffer);
6007		data =
6008			static_cast<GLuint*>(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLuint) * 24, GL_MAP_READ_BIT));
6009		for (GLuint i = 0; i < 24; ++i)
6010		{
6011			if (data[i] != (i + 25))
6012			{
6013				m_context.getTestContext().getLog() << tcu::TestLog::Message << "Data at index " << i << " is "
6014													<< data[i] << " should be" << (i + 25) << tcu::TestLog::EndMessage;
6015				error = ERROR;
6016			}
6017		}
6018		glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
6019		glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
6020		return error;
6021	}
6022
6023	virtual long Cleanup()
6024	{
6025		glUseProgram(0);
6026		glDeleteProgram(m_program);
6027		glDeleteBuffers(1, &m_storage_buffer);
6028		return NO_ERROR;
6029	}
6030};
6031
6032} // anonymous namespace
6033
6034ComputeShaderTests::ComputeShaderTests(glcts::Context& context) : TestCaseGroup(context, "compute_shader", "")
6035{
6036}
6037
6038ComputeShaderTests::~ComputeShaderTests(void)
6039{
6040}
6041
6042void ComputeShaderTests::init()
6043{
6044	using namespace glcts;
6045	addChild(new TestSubcase(m_context, "simple-compute", TestSubcase::Create<SimpleCompute>));
6046	addChild(new TestSubcase(m_context, "simple-compute-shared_context", TestSubcase::Create<LongRunningComputeFenceTest>));
6047	addChild(new TestSubcase(m_context, "simple-compute-shared_context-persistent-buffer", TestSubcase::Create<LongRunningPersistentSSBOComputeTest>));
6048	addChild(new TestSubcase(m_context, "one-work-group", TestSubcase::Create<BasicOneWorkGroup>));
6049	addChild(new TestSubcase(m_context, "resource-ubo", TestSubcase::Create<BasicResourceUBO>));
6050	addChild(new TestSubcase(m_context, "resource-texture", TestSubcase::Create<BasicResourceTexture>));
6051	addChild(new TestSubcase(m_context, "resource-image", TestSubcase::Create<BasicResourceImage>));
6052	addChild(new TestSubcase(m_context, "resource-atomic-counter", TestSubcase::Create<BasicResourceAtomicCounter>));
6053	addChild(new TestSubcase(m_context, "resource-uniform", TestSubcase::Create<BasicResourceUniform>));
6054	addChild(new TestSubcase(m_context, "built-in-variables", TestSubcase::Create<BasicBuiltinVariables>));
6055	addChild(new TestSubcase(m_context, "max", TestSubcase::Create<BasicMax>));
6056	addChild(new TestSubcase(m_context, "work-group-size", TestSubcase::Create<BasicWorkGroupSizeIsConst>));
6057	addChild(new TestSubcase(m_context, "build-separable", TestSubcase::Create<BasicBuildSeparable>));
6058	addChild(new TestSubcase(m_context, "shared-simple", TestSubcase::Create<BasicSharedSimple>));
6059	addChild(new TestSubcase(m_context, "shared-struct", TestSubcase::Create<BasicSharedStruct>));
6060	addChild(new TestSubcase(m_context, "dispatch-indirect", TestSubcase::Create<BasicDispatchIndirect>));
6061	addChild(new TestSubcase(m_context, "sso-compute-pipeline", TestSubcase::Create<BasicSSOComputePipeline>));
6062	addChild(new TestSubcase(m_context, "sso-case2", TestSubcase::Create<BasicSSOCase2>));
6063	addChild(new TestSubcase(m_context, "sso-case3", TestSubcase::Create<BasicSSOCase3>));
6064	addChild(new TestSubcase(m_context, "atomic-case1", TestSubcase::Create<BasicAtomicCase1>));
6065	addChild(new TestSubcase(m_context, "atomic-case2", TestSubcase::Create<BasicAtomicCase2>));
6066	addChild(new TestSubcase(m_context, "atomic-case3", TestSubcase::Create<BasicAtomicCase3>));
6067	addChild(new TestSubcase(m_context, "copy-image", TestSubcase::Create<AdvancedCopyImage>));
6068	addChild(new TestSubcase(m_context, "pipeline-pre-vs", TestSubcase::Create<AdvancedPipelinePreVS>));
6069	addChild(
6070		new TestSubcase(m_context, "pipeline-gen-draw-commands", TestSubcase::Create<AdvancedPipelineGenDrawCommands>));
6071	addChild(new TestSubcase(m_context, "pipeline-compute-chain", TestSubcase::Create<AdvancedPipelineComputeChain>));
6072	addChild(new TestSubcase(m_context, "pipeline-post-fs", TestSubcase::Create<AdvancedPipelinePostFS>));
6073	addChild(new TestSubcase(m_context, "pipeline-post-xfb", TestSubcase::Create<AdvancedPipelinePostXFB>));
6074	addChild(new TestSubcase(m_context, "shared-indexing", TestSubcase::Create<AdvancedSharedIndexing>));
6075	addChild(new TestSubcase(m_context, "shared-max", TestSubcase::Create<AdvancedSharedMax>));
6076	addChild(new TestSubcase(m_context, "resources-max", TestSubcase::Create<AdvancedResourcesMax>));
6077	addChild(new TestSubcase(m_context, "work-group-size-usage", TestSubcase::Create<WorkGroupSizeUsage>));
6078	addChild(new TestSubcase(m_context, "api-no-active-program", TestSubcase::Create<NegativeAPINoActiveProgram>));
6079	addChild(new TestSubcase(m_context, "api-work-group-count", TestSubcase::Create<NegativeAPIWorkGroupCount>));
6080	addChild(new TestSubcase(m_context, "api-indirect", TestSubcase::Create<NegativeAPIIndirect>));
6081	addChild(new TestSubcase(m_context, "api-program", TestSubcase::Create<NegativeAPIProgram>));
6082	addChild(
6083		new TestSubcase(m_context, "glsl-compile-time-errors", TestSubcase::Create<NegativeGLSLCompileTimeErrors>));
6084	addChild(new TestSubcase(m_context, "glsl-link-time-errors", TestSubcase::Create<NegativeGLSLLinkTimeErrors>));
6085	addChild(new TestSubcase(m_context, "api-attach-shader", TestSubcase::Create<NegativeAttachShader>));
6086}
6087} // glcts namespace
6088