1 /*------------------------------------------------------------------------
2  * OpenGL Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017-2019 The Khronos Group Inc.
6  * Copyright (c) 2017 Codeplay Software Ltd.
7  * Copyright (c) 2019 NVIDIA Corporation.
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests Utils
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "glcSubgroupsTestsUtils.hpp"
27 #include "deRandom.hpp"
28 #include "tcuCommandLine.hpp"
29 #include "tcuStringTemplate.hpp"
30 #include "gluContextInfo.hpp"
31 #include "gluShaderUtil.hpp"
32 
33 using namespace deqp;
34 using namespace std;
35 using namespace glc;
36 using namespace glw;
37 
38 namespace
39 {
40 // debug callback function
41 // To use:
42 //	  gl.enable(GL_DEBUG_OUTPUT);
43 //	  gl.debugMessageCallback(debugCallback, &context);
44 //
debugCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const char * message, const void * userParam)45 void debugCallback(GLenum source, GLenum type, GLuint id, GLenum severity,
46 					GLsizei length, const char * message, const void * userParam)
47 {
48 	glc::Context *context = (glc::Context *)userParam;
49 
50 	tcu::TestLog& log	= context->getDeqpContext().getTestContext().getLog();
51 
52 	log << tcu::TestLog::Message
53 		<< "DEBUG: source = " << source << ", type= " << type << ", id = " << id << ", severity = " << severity
54 		<< ", length = " << length << "\n"
55 		<< "DEBUG: `" << message << "`"
56 		<< tcu::TestLog::EndMessage;
57 
58 }
59 
60 // getFormatReadInfo
61 // returns the stride in bytes
getFormatReadInfo(const subgroups::Format format, GLenum &readFormat, GLenum &readType)62 deUint32 getFormatReadInfo(const subgroups::Format format, GLenum &readFormat, GLenum &readType)
63 {
64 	using namespace subgroups;
65 	switch (format)
66 	{
67 		default:
68 			DE_FATAL("Unhandled format!");
69 			// fall-through
70 		case FORMAT_R32G32B32A32_SFLOAT:
71 			readFormat = GL_RGBA;
72 			readType = GL_FLOAT;
73 			return 4u;
74 		case FORMAT_R32G32_SFLOAT:
75 			readFormat = GL_RG;
76 			readType = GL_FLOAT;
77 			return 2u;
78 		case FORMAT_R32_UINT:
79 			readFormat = GL_RED_INTEGER;
80 			readType = GL_UNSIGNED_INT;
81 			return 1u;
82 		case FORMAT_R32G32B32A32_UINT:
83 			readFormat = GL_RGBA_INTEGER;
84 			readType = GL_UNSIGNED_INT;
85 			return 4u;
86 	}
87 }
88 
getMaxWidth()89 deUint32 getMaxWidth ()
90 {
91 	return 1024u;
92 }
93 
getNextWidth(const deUint32 width)94 deUint32 getNextWidth (const deUint32 width)
95 {
96 	if (width < 128)
97 	{
98 		// This ensures we test every value up to 128 (the max subgroup size).
99 		return width + 1;
100 	}
101 	else
102 	{
103 		// And once we hit 128 we increment to only power of 2's to reduce testing time.
104 		return width * 2;
105 	}
106 }
107 
getFormatSizeInBytes(const subgroups::Format format)108 deUint32 getFormatSizeInBytes(const subgroups::Format format)
109 {
110 	using namespace subgroups;
111 	switch (format)
112 	{
113 		default:
114 			DE_FATAL("Unhandled format!");
115 			return 0;
116 		case FORMAT_R32_SINT:
117 		case FORMAT_R32_UINT:
118 			return sizeof(deInt32);
119 		case FORMAT_R32G32_SINT:
120 		case FORMAT_R32G32_UINT:
121 			return static_cast<deUint32>(sizeof(deInt32) * 2);
122 		case FORMAT_R32G32B32_SINT:
123 		case FORMAT_R32G32B32_UINT:
124 		case FORMAT_R32G32B32A32_SINT:
125 		case FORMAT_R32G32B32A32_UINT:
126 			return static_cast<deUint32>(sizeof(deInt32) * 4);
127 		case FORMAT_R32_SFLOAT:
128 			return 4;
129 		case FORMAT_R32G32_SFLOAT:
130 			return 8;
131 		case FORMAT_R32G32B32_SFLOAT:
132 			return 16;
133 		case FORMAT_R32G32B32A32_SFLOAT:
134 			return 16;
135 		case FORMAT_R64_SFLOAT:
136 			return 8;
137 		case FORMAT_R64G64_SFLOAT:
138 			return 16;
139 		case FORMAT_R64G64B64_SFLOAT:
140 			return 32;
141 		case FORMAT_R64G64B64A64_SFLOAT:
142 			return 32;
143 		// The below formats are used to represent bool and bvec* types. These
144 		// types are passed to the shader as int and ivec* types, before the
145 		// calculations are done as booleans. We need a distinct type here so
146 		// that the shader generators can switch on it and generate the correct
147 		// shader source for testing.
148 		case FORMAT_R32_BOOL:
149 			return sizeof(deInt32);
150 		case FORMAT_R32G32_BOOL:
151 			return static_cast<deUint32>(sizeof(deInt32) * 2);
152 		case FORMAT_R32G32B32_BOOL:
153 		case FORMAT_R32G32B32A32_BOOL:
154 			return static_cast<deUint32>(sizeof(deInt32) * 4);
155 	}
156 }
157 
getElementSizeInBytes( const subgroups::Format format, const subgroups::SSBOData::InputDataLayoutType layout)158 deUint32 getElementSizeInBytes(
159 	const subgroups::Format format,
160 	const subgroups::SSBOData::InputDataLayoutType layout)
161 {
162 	deUint32 bytes = getFormatSizeInBytes(format);
163 	if (layout == subgroups::SSBOData::LayoutStd140)
164 		return bytes < 16 ? 16 : bytes;
165 	else
166 		return bytes;
167 }
168 
169 
makeGraphicsPipeline(glc::Context& context, const subgroups::ShaderStageFlags stages, const GlslSource * vshader, const GlslSource * fshader, const GlslSource * gshader, const GlslSource * tcshader, const GlslSource * teshader)170 de::MovePtr<glu::ShaderProgram> makeGraphicsPipeline(glc::Context&				context,
171 									  const subgroups::ShaderStageFlags			stages,
172 									  const GlslSource *						vshader,
173 									  const GlslSource *						fshader,
174 									  const GlslSource *						gshader,
175 									  const GlslSource *						tcshader,
176 									  const GlslSource *						teshader)
177 {
178 	tcu::TestLog&	log			= context.getDeqpContext().getTestContext().getLog();
179 	const bool		doShaderLog	= log.isShaderLoggingEnabled();
180 	DE_UNREF(stages);			// only used for asserts
181 
182 	map<string, string> templateArgs;
183 	string				versionDecl(getGLSLVersionDeclaration(context.getGLSLVersion()));
184 	string				tessExtension =
185 		 context.getDeqpContext().getContextInfo().isExtensionSupported("GL_EXT_tessellation_shader") ?
186 						   "#extension GL_EXT_tessellation_shader : require" :
187 						   "";
188 	templateArgs.insert(pair<string, string>("VERSION_DECL", versionDecl));
189 	templateArgs.insert(pair<string, string>("TESS_EXTENSION", tessExtension));
190 
191 	string vertSource, tescSource, teseSource, geomSource, fragSource;
192 	if (vshader)
193 	{
194 		DE_ASSERT(stages & subgroups::SHADER_STAGE_VERTEX_BIT);
195 		tcu::StringTemplate shaderTemplate(vshader->sources[glu::SHADERTYPE_VERTEX][0]);
196 		string shaderSource(shaderTemplate.specialize(templateArgs));
197 		if (doShaderLog)
198 		{
199 			log << tcu::TestLog::Message << "vertex shader:\n"
200 				<< shaderSource << "\n:end:" << tcu::TestLog::EndMessage;
201 		}
202 		vertSource = shaderSource;
203 	}
204 	if (tcshader)
205 	{
206 		DE_ASSERT(stages & subgroups::SHADER_STAGE_TESS_CONTROL_BIT);
207 		tcu::StringTemplate shaderTemplate(tcshader->sources[glu::SHADERTYPE_TESSELLATION_CONTROL][0]);
208 		string shaderSource(shaderTemplate.specialize(templateArgs));
209 		if (doShaderLog)
210 		{
211 			log << tcu::TestLog::Message << "tess control shader:\n"
212 				<< shaderSource << "\n:end:" << tcu::TestLog::EndMessage;
213 		}
214 		tescSource = shaderSource;
215 	}
216 	if (teshader)
217 	{
218 		DE_ASSERT(stages & subgroups::SHADER_STAGE_TESS_EVALUATION_BIT);
219 		tcu::StringTemplate shaderTemplate(teshader->sources[glu::SHADERTYPE_TESSELLATION_EVALUATION][0]);
220 		string shaderSource(shaderTemplate.specialize(templateArgs));
221 		if (doShaderLog)
222 		{
223 			log << tcu::TestLog::Message << "tess eval shader:\n"
224 				<< shaderSource << "\n:end:" << tcu::TestLog::EndMessage;
225 		}
226 		teseSource = shaderSource;
227 	}
228 	if (gshader)
229 	{
230 		DE_ASSERT(stages & subgroups::SHADER_STAGE_GEOMETRY_BIT);
231 		tcu::StringTemplate shaderTemplate(gshader->sources[glu::SHADERTYPE_GEOMETRY][0]);
232 		string shaderSource(shaderTemplate.specialize(templateArgs));
233 		if (doShaderLog)
234 		{
235 			log << tcu::TestLog::Message << "geometry shader:\n"
236 				<< shaderSource << "\n:end:" << tcu::TestLog::EndMessage;
237 		}
238 		geomSource = shaderSource;
239 	}
240 	if (fshader)
241 	{
242 		DE_ASSERT(stages & subgroups::SHADER_STAGE_FRAGMENT_BIT);
243 		tcu::StringTemplate shaderTemplate(fshader->sources[glu::SHADERTYPE_FRAGMENT][0]);
244 		string shaderSource(shaderTemplate.specialize(templateArgs));
245 		if (doShaderLog)
246 		{
247 			log << tcu::TestLog::Message << "fragment shader:\n"
248 				<< shaderSource << "\n:end:" << tcu::TestLog::EndMessage;
249 		}
250 		fragSource = shaderSource;
251 	}
252 
253 	glu::ShaderProgram *program = DE_NULL;
254 	if(context.getShaderType() == SHADER_TYPE_GLSL)
255 	{
256 		glu::ProgramSources sources;
257 		if (vshader)
258 			sources << glu::VertexSource(vertSource);
259 		if (tcshader)
260 			sources << glu::TessellationControlSource(tescSource);
261 		if (teshader)
262 			sources << glu::TessellationEvaluationSource(teseSource);
263 		if (gshader)
264 			sources << glu::GeometrySource(geomSource);
265 		if (fshader)
266 			sources << glu::FragmentSource(fragSource);
267 
268 		program = new glu::ShaderProgram(context.getDeqpContext().getRenderContext().getFunctions(), sources);
269 	} else {
270 		DE_ASSERT(context.getShaderType() == SHADER_TYPE_SPIRV);
271 
272 		glu::ProgramBinaries binaries;
273 		if (vshader)
274 			binaries << spirvUtils::makeSpirV(log, glu::VertexSource(vertSource), spirvUtils::SPIRV_VERSION_1_3);
275 		if (tcshader)
276 			binaries << spirvUtils::makeSpirV(log, glu::TessellationControlSource(tescSource), spirvUtils::SPIRV_VERSION_1_3);
277 		if (teshader)
278 			binaries << spirvUtils::makeSpirV(log, glu::TessellationEvaluationSource(teseSource), spirvUtils::SPIRV_VERSION_1_3);
279 		if (gshader)
280 			binaries << spirvUtils::makeSpirV(log, glu::GeometrySource(geomSource), spirvUtils::SPIRV_VERSION_1_3);
281 		if (fshader)
282 			binaries << spirvUtils::makeSpirV(log, glu::FragmentSource(fragSource), spirvUtils::SPIRV_VERSION_1_3);
283 
284 		program = new glu::ShaderProgram(context.getDeqpContext().getRenderContext().getFunctions(), binaries);
285 	}
286 
287 	if (!program->isOk())
288 	{
289 		log << tcu::TestLog::Message << "Shader build failed.\n"
290 			<< "Vertex: " << (vshader ? program->getShaderInfo(glu::SHADERTYPE_VERTEX).infoLog : "n/a") << "\n"
291 			<< "Tess Cont: " << (tcshader ? program->getShaderInfo(glu::SHADERTYPE_TESSELLATION_CONTROL).infoLog : "n/a") << "\n"
292 			<< "Tess Eval: " << (teshader ? program->getShaderInfo(glu::SHADERTYPE_TESSELLATION_EVALUATION).infoLog : "n/a") << "\n"
293 			<< "Geometry: " << (gshader ? program->getShaderInfo(glu::SHADERTYPE_GEOMETRY).infoLog : "n/a") << "\n"
294 			<< "Fragment: " << (fshader ? program->getShaderInfo(glu::SHADERTYPE_FRAGMENT).infoLog : "n/a") << "\n"
295 			<< "Program: " << program->getProgramInfo().infoLog << tcu::TestLog::EndMessage;
296 	}
297 	return de::MovePtr<glu::ShaderProgram>(program);
298 }
299 
makeComputePipeline(glc::Context& context, const GlslSource &glslTemplate, deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)300 de::MovePtr<glu::ShaderProgram> makeComputePipeline(glc::Context& context, const GlslSource &glslTemplate,
301 									 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
302 {
303 
304 	tcu::TestLog&	log			= context.getDeqpContext().getTestContext().getLog();
305 	const bool		doShaderLog	= log.isShaderLoggingEnabled();
306 
307 	tcu::StringTemplate computeTemplate(glslTemplate.sources[glu::SHADERTYPE_COMPUTE][0]);
308 
309 	map<string, string>		templateArgs;
310 	{
311 		stringstream localSize;
312 		localSize << "local_size_x = " << localSizeX;
313 		templateArgs.insert(pair<string, string>("LOCAL_SIZE_X", localSize.str()));
314 	}
315 	{
316 		stringstream localSize;
317 		localSize << "local_size_y = " << localSizeY;
318 		templateArgs.insert(pair<string, string>("LOCAL_SIZE_Y", localSize.str()));
319 	}
320 	{
321 		stringstream localSize;
322 		localSize << "local_size_z = " << localSizeZ;
323 		templateArgs.insert(pair<string, string>("LOCAL_SIZE_Z", localSize.str()));
324 	}
325 	string versionDecl(getGLSLVersionDeclaration(context.getGLSLVersion()));
326 	templateArgs.insert(pair<string, string>("VERSION_DECL", versionDecl));
327 
328 	glu::ComputeSource cshader(glu::ComputeSource(computeTemplate.specialize(templateArgs)));
329 
330 	if (doShaderLog)
331 	{
332 		log << tcu::TestLog::Message << "compute shader specialized source:\n"
333 			<< cshader.source << "\n:end:" << tcu::TestLog::EndMessage;
334 	}
335 
336 	glu::ShaderProgram *program = DE_NULL;
337 	if(context.getShaderType() == SHADER_TYPE_GLSL)
338 	{
339 		glu::ProgramSources sources;
340 		sources << cshader;
341 		program = new glu::ShaderProgram(context.getDeqpContext().getRenderContext().getFunctions(), sources);
342 	} else {
343 		DE_ASSERT(context.getShaderType() == SHADER_TYPE_SPIRV);
344 
345 		glu::ProgramBinaries binaries;
346 		binaries << spirvUtils::makeSpirV(log, cshader, spirvUtils::SPIRV_VERSION_1_3);
347 
348 		program = new glu::ShaderProgram(context.getDeqpContext().getRenderContext().getFunctions(), binaries);
349 	}
350 
351 	if (!program->isOk())
352 	{
353 		log << tcu::TestLog::Message << "Shader build failed.\n"
354 			<< "Compute: " << program->getShaderInfo(glu::SHADERTYPE_COMPUTE).infoLog << "\n"
355 			<< "Program: " << program->getProgramInfo().infoLog << tcu::TestLog::EndMessage;
356 	}
357 	return de::MovePtr<glu::ShaderProgram>(program);
358 }
359 
360 struct Buffer;
361 struct Image;
362 
363 struct BufferOrImage
364 {
isImage__anon27609::BufferOrImage365 	bool isImage() const
366 	{
367 		return m_isImage;
368 	}
369 
getAsBuffer__anon27609::BufferOrImage370 	Buffer* getAsBuffer()
371 	{
372 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
373 		return reinterpret_cast<Buffer* >(this);
374 	}
375 
getAsImage__anon27609::BufferOrImage376 	Image* getAsImage()
377 	{
378 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
379 		return reinterpret_cast<Image*>(this);
380 	}
381 
getType__anon27609::BufferOrImage382 	virtual subgroups::DescriptorType getType() const
383 	{
384 		if (m_isImage)
385 		{
386 			return subgroups::DESCRIPTOR_TYPE_STORAGE_IMAGE;
387 		}
388 		else
389 		{
390 			return subgroups::DESCRIPTOR_TYPE_STORAGE_BUFFER;
391 		}
392 	}
393 
getId__anon27609::BufferOrImage394 	GLuint getId()
395 	{
396 		return m_objectId;
397 	}
398 
~BufferOrImage__anon27609::BufferOrImage399 	virtual ~BufferOrImage() {}
400 
401 protected:
BufferOrImage__anon27609::BufferOrImage402 	explicit BufferOrImage(glc::Context& context, bool image)
403 		: m_gl(context.getDeqpContext().getRenderContext().getFunctions())
404 		, m_isImage(image)
405 		, m_objectId(0) {}
406 
407 	const glw::Functions &	m_gl;
408 	bool					m_isImage;
409 	GLuint					m_objectId;
410 };
411 
412 struct Buffer : public BufferOrImage
413 {
Buffer__anon27609::Buffer414 	explicit Buffer(
415 		glc::Context& context, deUint64 sizeInBytes, GLenum target = GL_SHADER_STORAGE_BUFFER)
416 		: BufferOrImage		(context, false)
417 		, m_sizeInBytes		(sizeInBytes)
418 		, m_target			(target)
419 	{
420 		m_gl.genBuffers(1, &m_objectId);
421 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "genBuffers");
422 		m_gl.bindBuffer(m_target, m_objectId);
423 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "bindBuffer");
424 		m_gl.bufferData(m_target, m_sizeInBytes, NULL, GL_DYNAMIC_DRAW);
425 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "bufferData");
426 		m_gl.bindBuffer(m_target, 0);
427 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "bindBuffer(0)");
428 	}
429 
~Buffer__anon27609::Buffer430 	virtual ~Buffer()
431 	{
432 		if (m_objectId != 0)
433 		{
434 			m_gl.deleteBuffers(1, &m_objectId);
435 			GLU_EXPECT_NO_ERROR(m_gl.getError(), "glDeleteBuffers");
436 		}
437 	}
438 
getType__anon27609::Buffer439 	virtual subgroups::DescriptorType getType() const
440 	{
441 		if (GL_UNIFORM_BUFFER == m_target)
442 		{
443 			return subgroups::DESCRIPTOR_TYPE_UNIFORM_BUFFER;
444 		}
445 		return subgroups::DESCRIPTOR_TYPE_STORAGE_BUFFER;
446 	}
447 
mapBufferPtr__anon27609::Buffer448 	glw::GLvoid* mapBufferPtr() {
449 		glw::GLvoid *ptr;
450 
451 		m_gl.bindBuffer(m_target, m_objectId);
452 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glBindBuffer");
453 
454 		ptr = m_gl.mapBufferRange(m_target, 0, m_sizeInBytes, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT);
455 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glMapBuffer");
456 
457 		m_gl.bindBuffer(m_target, 0);
458 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glBindBuffer(0)");
459 
460 		return ptr;
461 	}
462 
unmapBufferPtr__anon27609::Buffer463 	void unmapBufferPtr() {
464 		m_gl.bindBuffer(m_target, m_objectId);
465 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glBindBuffer");
466 
467 		m_gl.unmapBuffer(m_target);
468 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glUnmapBuffer");
469 
470 		m_gl.bindBuffer(m_target, 0);
471 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glBindBuffer(0)");
472 	}
473 
getSize__anon27609::Buffer474 	deUint64 getSize() const {
475 		return m_sizeInBytes;
476 	}
477 
478 private:
479 	deUint64					m_sizeInBytes;
480 	const GLenum				m_target;
481 };
482 
483 struct Image : public BufferOrImage
484 {
Image__anon27609::Image485 	explicit Image(glc::Context& context, deUint32 width, deUint32 height,
486 				   subgroups::Format format)
487 		: BufferOrImage(context, true)
488 	{
489 		m_gl.genTextures(1, &m_objectId);
490 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glGenTextures");
491 		m_gl.bindTexture(GL_TEXTURE_2D, m_objectId);
492 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glBindTexture");
493 		m_gl.texStorage2D(GL_TEXTURE_2D, 1, format, width, height);
494 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glTexStorage2D");
495 
496 		m_gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
497 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glTexParameteri");
498 		m_gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
499 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glTexParameteri");
500 	}
501 
~Image__anon27609::Image502 	virtual ~Image()
503 	{
504 		if (m_objectId != 0)
505 		{
506 			m_gl.deleteTextures(1, &m_objectId);
507 			GLU_EXPECT_NO_ERROR(m_gl.getError(), "glDeleteTextures");
508 		}
509 	}
510 
511 private:
512 };
513 
514 struct Vao
515 {
Vao__anon27609::Vao516 	explicit Vao(glc::Context& context)
517 		: m_gl(context.getDeqpContext().getRenderContext().getFunctions())
518 		, m_objectId(0)
519 	{
520 		m_gl.genVertexArrays(1, &m_objectId);
521 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glGenVertexArrays");
522 		m_gl.bindVertexArray(m_objectId);
523 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glBindVertexArray");
524 	}
525 
~Vao__anon27609::Vao526 	~Vao()
527 	{
528 		if (m_objectId != 0)
529 		{
530 			m_gl.deleteVertexArrays(1, &m_objectId);
531 			GLU_EXPECT_NO_ERROR(m_gl.getError(), "glDeleteVertexArrays");
532 		}
533 	}
534 
535 private:
536 	const glw::Functions &	m_gl;
537 	GLuint					m_objectId;
538 };
539 
540 struct Fbo
541 {
Fbo__anon27609::Fbo542 	explicit Fbo(glc::Context& context)
543 		: m_gl(context.getDeqpContext().getRenderContext().getFunctions())
544 		, m_objectId(0)
545 	{
546 		m_gl.genFramebuffers(1, &m_objectId);
547 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glGenFramebuffers");
548 		m_gl.bindFramebuffer(GL_FRAMEBUFFER, m_objectId);
549 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glBindFramebuffer");
550 	}
551 
~Fbo__anon27609::Fbo552 	~Fbo()
553 	{
554 		if (m_objectId != 0)
555 		{
556 			m_gl.deleteFramebuffers(1, &m_objectId);
557 			GLU_EXPECT_NO_ERROR(m_gl.getError(), "deleteFramebuffers");
558 		}
559 	}
560 
bind2D__anon27609::Fbo561 	void bind2D(Image &img)
562 	{
563 		m_gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, img.getId(), 0);
564 		GLU_EXPECT_NO_ERROR(m_gl.getError(), "glFramebufferTexture2D");
565 	}
566 
567 private:
568 	const glw::Functions &	m_gl;
569 	GLuint					m_objectId;
570 
571 };
572 }
573 
getSharedMemoryBallotHelper()574 std::string glc::subgroups::getSharedMemoryBallotHelper()
575 {
576 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
577 			"uvec4 sharedMemoryBallot(bool vote)\n"
578 			"{\n"
579 			"  uint groupOffset = gl_SubgroupID;\n"
580 			"  // One invocation in the group 0's the whole group's data\n"
581 			"  if (subgroupElect())\n"
582 			"  {\n"
583 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
584 			"  }\n"
585 			"  subgroupMemoryBarrierShared();\n"
586 			"  if (vote)\n"
587 			"  {\n"
588 			"    highp uint invocationId = gl_SubgroupInvocationID % 32u;\n"
589 			"    highp uint bitToSet = 1u << invocationId;\n"
590 			"    switch (gl_SubgroupInvocationID / 32u)\n"
591 			"    {\n"
592 			"    case 0u: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
593 			"    case 1u: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
594 			"    case 2u: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
595 			"    case 3u: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
596 			"    }\n"
597 			"  }\n"
598 			"  subgroupMemoryBarrierShared();\n"
599 			"  return superSecretComputeShaderHelper[groupOffset];\n"
600 			"}\n";
601 }
602 
getSubgroupSize(Context& context)603 deUint32 glc::subgroups::getSubgroupSize(Context& context)
604 {
605 	int subgroupSize = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SIZE_KHR);
606 
607 	return subgroupSize;
608 }
609 
maxSupportedSubgroupSize()610 deUint32 glc::subgroups::maxSupportedSubgroupSize() {
611 	return 128u;
612 }
613 
getShaderStageName(ShaderStageFlags stage)614 std::string glc::subgroups::getShaderStageName(ShaderStageFlags stage)
615 {
616 	DE_ASSERT(stage & SHADER_STAGE_ALL_VALID);
617 	switch (stage)
618 	{
619 		default:
620 			DE_FATAL("Unhandled stage!");
621 			return "";
622 		case SHADER_STAGE_COMPUTE_BIT:
623 			return "compute";
624 		case SHADER_STAGE_FRAGMENT_BIT:
625 			return "fragment";
626 		case SHADER_STAGE_VERTEX_BIT:
627 			return "vertex";
628 		case SHADER_STAGE_GEOMETRY_BIT:
629 			return "geometry";
630 		case SHADER_STAGE_TESS_CONTROL_BIT:
631 			return "tess_control";
632 		case SHADER_STAGE_TESS_EVALUATION_BIT:
633 			return "tess_eval";
634 	}
635 }
636 
getSubgroupFeatureName(SubgroupFeatureFlags bit)637 std::string glc::subgroups::getSubgroupFeatureName(SubgroupFeatureFlags bit)
638 {
639 	DE_ASSERT(bit & SUBGROUP_FEATURE_ALL_VALID);
640 	switch (bit)
641 	{
642 		default:
643 			DE_FATAL("Unknown subgroup feature category!");
644 			return "";
645 		case SUBGROUP_FEATURE_BASIC_BIT:
646 			return "GL_SUBGROUP_FEATURE_BASIC_BIT_KHR";
647 		case SUBGROUP_FEATURE_VOTE_BIT:
648 			return "GL_SUBGROUP_FEATURE_VOTE_BIT_KHR";
649 		case SUBGROUP_FEATURE_ARITHMETIC_BIT:
650 			return "GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR";
651 		case SUBGROUP_FEATURE_BALLOT_BIT:
652 			return "GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR";
653 		case SUBGROUP_FEATURE_SHUFFLE_BIT:
654 			return "GL_SUBGROUP_FEATURE_SHUFFLE_BIT_KHR";
655 		case SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
656 			return "GL_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT_KHR";
657 		case SUBGROUP_FEATURE_CLUSTERED_BIT:
658 			return "GL_SUBGROUP_FEATURE_CLUSTERED_BIT_KHR";
659 		case SUBGROUP_FEATURE_QUAD_BIT:
660 			return "GL_SUBGROUP_FEATURE_QUAD_BIT_KHR";
661 		case SUBGROUP_FEATURE_PARTITIONED_BIT_NV:
662 			return "GL_SUBGROUP_FEATURE_PARTITIONED_BIT_NV";
663 	}
664 }
665 
addNoSubgroupShader(SourceCollections& programCollection)666 void glc::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
667 {
668 	{
669 		const std::string vertNoSubgroupGLSL =
670 			"${VERSION_DECL}\n"
671 			"void main (void)\n"
672 			"{\n"
673 			"  float pixelSize = 2.0f/1024.0f;\n"
674 			"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
675 			"  gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
676 			"  gl_PointSize = 1.0f;\n"
677 			"}\n";
678 		programCollection.add("vert_noSubgroup") << glu::VertexSource(vertNoSubgroupGLSL);
679 	}
680 
681 	{
682 		const std::string tescNoSubgroupGLSL =
683 			"${VERSION_DECL}\n"
684 			"layout(vertices=1) out;\n"
685 			"\n"
686 			"void main (void)\n"
687 			"{\n"
688 			"  if (gl_InvocationID == 0)\n"
689 			"  {\n"
690 			"    gl_TessLevelOuter[0] = 1.0f;\n"
691 			"    gl_TessLevelOuter[1] = 1.0f;\n"
692 			"  }\n"
693 			"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
694 			"}\n";
695 		programCollection.add("tesc_noSubgroup") << glu::TessellationControlSource(tescNoSubgroupGLSL);
696 	}
697 
698 	{
699 		const std::string teseNoSubgroupGLSL =
700 			"${VERSION_DECL}\n"
701 			"layout(isolines) in;\n"
702 			"\n"
703 			"void main (void)\n"
704 			"{\n"
705 			"  float pixelSize = 2.0f/1024.0f;\n"
706 			"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
707 			"}\n";
708 		programCollection.add("tese_noSubgroup") << glu::TessellationEvaluationSource(teseNoSubgroupGLSL);
709 	}
710 
711 }
712 
getVertShaderForStage(const ShaderStageFlags stage)713 std::string glc::subgroups::getVertShaderForStage(const ShaderStageFlags stage)
714 {
715 	DE_ASSERT(stage & SHADER_STAGE_ALL_VALID);
716 	switch (stage)
717 	{
718 		default:
719 			DE_FATAL("Unhandled stage!");
720 			return "";
721 		case SHADER_STAGE_FRAGMENT_BIT:
722 			return
723 				"${VERSION_DECL}\n"
724 				"void main (void)\n"
725 				"{\n"
726 				"  float pixelSize = 2.0f/1024.0f;\n"
727 				"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
728 				"  gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
729 				"}\n";
730 		case SHADER_STAGE_GEOMETRY_BIT:
731 			return
732 				"${VERSION_DECL}\n"
733 				"void main (void)\n"
734 				"{\n"
735 				"}\n";
736 		case SHADER_STAGE_TESS_CONTROL_BIT:
737 		case SHADER_STAGE_TESS_EVALUATION_BIT:
738 			return
739 				"${VERSION_DECL}\n"
740 				"void main (void)\n"
741 				"{\n"
742 				"}\n";
743 	}
744 }
745 
isSubgroupSupported(Context& context)746 bool glc::subgroups::isSubgroupSupported(Context& context)
747 {
748 	return context.getDeqpContext().getContextInfo().isExtensionSupported("GL_KHR_shader_subgroup");
749 }
750 
areSubgroupOperationsSupportedForStage( Context& context, const ShaderStageFlags stage)751 bool glc::subgroups::areSubgroupOperationsSupportedForStage(
752 	Context& context, const ShaderStageFlags stage)
753 {
754 	DE_ASSERT(stage & SHADER_STAGE_ALL_VALID);
755 	int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
756 
757 	return (stage & supportedStages) ? true : false;
758 }
759 
areSubgroupOperationsRequiredForStage( const ShaderStageFlags stage)760 bool glc::subgroups::areSubgroupOperationsRequiredForStage(
761 	const ShaderStageFlags stage)
762 {
763 	DE_ASSERT(stage & SHADER_STAGE_ALL_VALID);
764 	switch (stage)
765 	{
766 		default:
767 			return false;
768 		case SHADER_STAGE_COMPUTE_BIT:
769 			return true;
770 	}
771 }
772 
isSubgroupFeatureSupportedForDevice( Context& context, const SubgroupFeatureFlags bit)773 bool glc::subgroups::isSubgroupFeatureSupportedForDevice(
774 	Context& context,
775 	const SubgroupFeatureFlags bit)
776 {
777 	DE_ASSERT(bit & SUBGROUP_FEATURE_ALL_VALID);
778 
779 	int supportedOperations = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_FEATURES_KHR);
780 
781 	return (bit & supportedOperations) ? true : false;
782 }
783 
isFragmentSSBOSupportedForDevice(Context& context)784 bool glc::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
785 {
786 	int numFragmentSSBOs = context.getDeqpContext().getContextInfo().getInt(GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS);
787 
788 	return (numFragmentSSBOs > 0) ? true : false;
789 }
790 
isVertexSSBOSupportedForDevice(Context& context)791 bool glc::subgroups::isVertexSSBOSupportedForDevice(Context& context)
792 {
793 	int numVertexSSBOs = context.getDeqpContext().getContextInfo().getInt(GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS);
794 
795 	return (numVertexSSBOs > 0) ? true : false;
796 }
797 
isImageSupportedForStageOnDevice(Context& context, const ShaderStageFlags stage)798 bool glc::subgroups::isImageSupportedForStageOnDevice(Context& context, const ShaderStageFlags stage)
799 {
800 	glw::GLint stageQuery;
801 	DE_ASSERT(stage & SHADER_STAGE_ALL_VALID);
802 
803 	// image uniforms are optional in VTG stages
804 	switch (stage)
805 	{
806 		case SHADER_STAGE_FRAGMENT_BIT:
807 		case SHADER_STAGE_COMPUTE_BIT:
808 		default:
809 			return true;
810 		case SHADER_STAGE_VERTEX_BIT:
811 			stageQuery = GL_MAX_VERTEX_IMAGE_UNIFORMS;
812 			break;
813 		case SHADER_STAGE_TESS_CONTROL_BIT:
814 			stageQuery = GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS;
815 			break;
816 		case SHADER_STAGE_TESS_EVALUATION_BIT:
817 			stageQuery = GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS;
818 			break;
819 		case SHADER_STAGE_GEOMETRY_BIT:
820 			stageQuery = GL_MAX_GEOMETRY_IMAGE_UNIFORMS;
821 			break;
822 	}
823 
824 	int numImages = context.getDeqpContext().getContextInfo().getInt(stageQuery);
825 
826 	return (numImages > 0) ? true : false;
827 }
828 
isDoubleSupportedForDevice(Context& context)829 bool glc::subgroups::isDoubleSupportedForDevice(Context& context)
830 {
831 	glu::ContextType contextType = context.getDeqpContext().getRenderContext().getType();
832 	return (glu::contextSupports(contextType, glu::ApiType::core(4, 0)) ||
833 			context.getDeqpContext().getContextInfo().isExtensionSupported("GL_ARB_gpu_shader_fp64"));
834 }
835 
isDoubleFormat(Format format)836 bool glc::subgroups::isDoubleFormat(Format format)
837 {
838 	switch (format)
839 	{
840 		default:
841 			return false;
842 		case FORMAT_R64_SFLOAT:
843 		case FORMAT_R64G64_SFLOAT:
844 		case FORMAT_R64G64B64_SFLOAT:
845 		case FORMAT_R64G64B64A64_SFLOAT:
846 			return true;
847 	}
848 }
849 
getFormatNameForGLSL(Format format)850 std::string glc::subgroups::getFormatNameForGLSL (Format format)
851 {
852 	switch (format)
853 	{
854 		default:
855 			DE_FATAL("Unhandled format!");
856 			return "";
857 		case FORMAT_R32_SINT:
858 			return "int";
859 		case FORMAT_R32G32_SINT:
860 			return "ivec2";
861 		case FORMAT_R32G32B32_SINT:
862 			return "ivec3";
863 		case FORMAT_R32G32B32A32_SINT:
864 			return "ivec4";
865 		case FORMAT_R32_UINT:
866 			return "uint";
867 		case FORMAT_R32G32_UINT:
868 			return "uvec2";
869 		case FORMAT_R32G32B32_UINT:
870 			return "uvec3";
871 		case FORMAT_R32G32B32A32_UINT:
872 			return "uvec4";
873 		case FORMAT_R32_SFLOAT:
874 			return "float";
875 		case FORMAT_R32G32_SFLOAT:
876 			return "vec2";
877 		case FORMAT_R32G32B32_SFLOAT:
878 			return "vec3";
879 		case FORMAT_R32G32B32A32_SFLOAT:
880 			return "vec4";
881 		case FORMAT_R64_SFLOAT:
882 			return "double";
883 		case FORMAT_R64G64_SFLOAT:
884 			return "dvec2";
885 		case FORMAT_R64G64B64_SFLOAT:
886 			return "dvec3";
887 		case FORMAT_R64G64B64A64_SFLOAT:
888 			return "dvec4";
889 		case FORMAT_R32_BOOL:
890 			return "bool";
891 		case FORMAT_R32G32_BOOL:
892 			return "bvec2";
893 		case FORMAT_R32G32B32_BOOL:
894 			return "bvec3";
895 		case FORMAT_R32G32B32A32_BOOL:
896 			return "bvec4";
897 	}
898 }
899 
setVertexShaderFrameBuffer(SourceCollections& programCollection)900 void glc::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
901 {
902 	programCollection.add("vert") << glu::VertexSource(
903 		"${VERSION_DECL}\n"
904 		"layout(location = 0) in highp vec4 in_position;\n"
905 		"void main (void)\n"
906 		"{\n"
907 		"  gl_Position = in_position;\n"
908 		"}\n");
909 }
910 
setFragmentShaderFrameBuffer(SourceCollections& programCollection)911 void glc::subgroups::setFragmentShaderFrameBuffer (SourceCollections& programCollection)
912 {
913 	programCollection.add("fragment") << glu::FragmentSource(
914 		"${VERSION_DECL}\n"
915 		"precision highp int;\n"
916 		"layout(location = 0) in highp float in_color;\n"
917 		"layout(location = 0) out uint out_color;\n"
918 		"void main()\n"
919 		"{\n"
920 		"	out_color = uint(in_color);\n"
921 		"}\n");
922 }
923 
setTesCtrlShaderFrameBuffer(SourceCollections& programCollection)924 void glc::subgroups::setTesCtrlShaderFrameBuffer (SourceCollections& programCollection)
925 {
926 	programCollection.add("tesc") << glu::TessellationControlSource(
927 		"${VERSION_DECL}\n"
928 		"#extension GL_KHR_shader_subgroup_basic: enable\n"
929 		"${TESS_EXTENSION}\n"
930 		"layout(vertices = 2) out;\n"
931 		"void main (void)\n"
932 		"{\n"
933 		"  if (gl_InvocationID == 0)\n"
934 		"  {\n"
935 		"    gl_TessLevelOuter[0] = 1.0f;\n"
936 		"    gl_TessLevelOuter[1] = 1.0f;\n"
937 		"  }\n"
938 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
939 		"}\n");
940 }
941 
setTesEvalShaderFrameBuffer(SourceCollections& programCollection)942 void glc::subgroups::setTesEvalShaderFrameBuffer (SourceCollections& programCollection)
943 {
944 	programCollection.add("tese") << glu::TessellationEvaluationSource(
945 		"${VERSION_DECL}\n"
946 		"#extension GL_KHR_shader_subgroup_ballot: enable\n"
947 		"${TESS_EXTENSION}\n"
948 		"layout(isolines, equal_spacing, ccw ) in;\n"
949 		"layout(location = 0) in float in_color[];\n"
950 		"layout(location = 0) out float out_color;\n"
951 		"\n"
952 		"void main (void)\n"
953 		"{\n"
954 		"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
955 		"  out_color = in_color[0];\n"
956 		"}\n");
957 }
958 
addGeometryShadersFromTemplate(const std::string& glslTemplate, SourceCollections& collection)959 void glc::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, SourceCollections& collection)
960 {
961 	tcu::StringTemplate geometryTemplate(glslTemplate);
962 
963 	map<string, string>		linesParams;
964 	linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
965 
966 	map<string, string>		pointsParams;
967 	pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
968 
969 	collection.add("geometry_lines")	<< glu::GeometrySource("${VERSION_DECL}\n" + geometryTemplate.specialize(linesParams));
970 	collection.add("geometry_points")	<< glu::GeometrySource("${VERSION_DECL}\n" + geometryTemplate.specialize(pointsParams));
971 }
972 
initializeMemory(deqp::Context& context, glw::GLvoid *hostPtr, subgroups::SSBOData& data)973 void initializeMemory(deqp::Context& context, glw::GLvoid *hostPtr, subgroups::SSBOData& data)
974 {
975 	using namespace subgroups;
976 	const Format format = data.format;
977 	const deUint64 size = data.numElements *
978 		(data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
979 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
980 	{
981 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
982 		switch (format)
983 		{
984 			default:
985 				DE_FATAL("Illegal buffer format");
986 				break;
987 			case FORMAT_R32_BOOL:
988 			case FORMAT_R32G32_BOOL:
989 			case FORMAT_R32G32B32_BOOL:
990 			case FORMAT_R32G32B32A32_BOOL:
991 			{
992 				deUint32* ptr = reinterpret_cast<deUint32*>(hostPtr);
993 
994 				for (deUint64 k = 0; k < (size / sizeof(deUint32)); k++)
995 				{
996 					deUint32 r = rnd.getUint32();
997 					ptr[k] = (r & 1) ? r : 0;
998 				}
999 			}
1000 			break;
1001 			case FORMAT_R32_SINT:
1002 			case FORMAT_R32G32_SINT:
1003 			case FORMAT_R32G32B32_SINT:
1004 			case FORMAT_R32G32B32A32_SINT:
1005 			case FORMAT_R32_UINT:
1006 			case FORMAT_R32G32_UINT:
1007 			case FORMAT_R32G32B32_UINT:
1008 			case FORMAT_R32G32B32A32_UINT:
1009 			{
1010 				deUint32* ptr = reinterpret_cast<deUint32*>(hostPtr);
1011 
1012 				for (deUint64 k = 0; k < (size / sizeof(deUint32)); k++)
1013 				{
1014 					ptr[k] = rnd.getUint32();
1015 				}
1016 			}
1017 			break;
1018 			case FORMAT_R32_SFLOAT:
1019 			case FORMAT_R32G32_SFLOAT:
1020 			case FORMAT_R32G32B32_SFLOAT:
1021 			case FORMAT_R32G32B32A32_SFLOAT:
1022 			{
1023 				float* ptr = reinterpret_cast<float*>(hostPtr);
1024 
1025 				for (deUint64 k = 0; k < (size / sizeof(float)); k++)
1026 				{
1027 					ptr[k] = rnd.getFloat();
1028 				}
1029 			}
1030 			break;
1031 			case FORMAT_R64_SFLOAT:
1032 			case FORMAT_R64G64_SFLOAT:
1033 			case FORMAT_R64G64B64_SFLOAT:
1034 			case FORMAT_R64G64B64A64_SFLOAT:
1035 			{
1036 				double* ptr = reinterpret_cast<double*>(hostPtr);
1037 
1038 				for (deUint64 k = 0; k < (size / sizeof(double)); k++)
1039 				{
1040 					ptr[k] = rnd.getDouble();
1041 				}
1042 			}
1043 			break;
1044 		}
1045 	}
1046 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
1047 	{
1048 		deUint32* ptr = reinterpret_cast<deUint32*>(hostPtr);
1049 
1050 		for (deUint64 k = 0; k < size / 4; k++)
1051 		{
1052 			ptr[k] = 0;
1053 		}
1054 	}
1055 
1056 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
1057 	{
1058 		// nothing to do for GL
1059 	}
1060 }
1061 
getResultBinding(const glc::subgroups::ShaderStageFlags shaderStage)1062 deUint32 getResultBinding (const glc::subgroups::ShaderStageFlags shaderStage)
1063 {
1064 	using namespace glc::subgroups;
1065 	switch(shaderStage)
1066 	{
1067 		case SHADER_STAGE_VERTEX_BIT:
1068 			return 0u;
1069 		case SHADER_STAGE_TESS_CONTROL_BIT:
1070 			return 1u;
1071 		case SHADER_STAGE_TESS_EVALUATION_BIT:
1072 			return 2u;
1073 		case SHADER_STAGE_GEOMETRY_BIT:
1074 			return 3u;
1075 		default:
1076 			DE_ASSERT(0);
1077 			return -1;
1078 	}
1079 	DE_ASSERT(0);
1080 	return -1;
1081 }
1082 
makeTessellationEvaluationFrameBufferTest( Context& context, Format format, SSBOData* extraData, deUint32 extraDataCount, bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize), const ShaderStageFlags shaderStage)1083 tcu::TestStatus glc::subgroups::makeTessellationEvaluationFrameBufferTest(
1084 	Context& context, Format format, SSBOData* extraData,
1085 	deUint32 extraDataCount,
1086 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1087 	const ShaderStageFlags shaderStage)
1088 {
1089 	tcu::TestLog& log	= context.getDeqpContext().getTestContext().getLog();
1090 	const glw::Functions& gl = context.getDeqpContext().getRenderContext().getFunctions();
1091 
1092 	const deUint32							maxWidth				= getMaxWidth();
1093 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
1094 
1095 	const GlslSource& vshader = context.getSourceCollection().get("vert");
1096 	const GlslSource& tcshader = context.getSourceCollection().get("tesc");
1097 	const GlslSource& teshader = context.getSourceCollection().get("tese");
1098 	const GlslSource& fshader = context.getSourceCollection().get("fragment");
1099 
1100 	for (deUint32 i = 0u; i < extraDataCount; i++)
1101 	{
1102 		if (extraData[i].isImage)
1103 		{
1104 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1105 			// haven't implemented init for images yet
1106 			DE_ASSERT(extraData[i].initializeType == subgroups::SSBOData::InitializeNone);
1107 		}
1108 		else
1109 		{
1110 			deUint64 size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
1111 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, GL_UNIFORM_BUFFER));
1112 
1113 			glw::GLvoid *ptr = inputBuffers[i]->getAsBuffer()->mapBufferPtr();
1114 			initializeMemory(context.getDeqpContext(), ptr, extraData[i]);
1115 			inputBuffers[i]->getAsBuffer()->unmapBufferPtr();
1116 		}
1117 	}
1118 
1119 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1120 	{
1121 		log << tcu::TestLog::Message
1122 			<< "binding inputBuffers[" << ndx << "](" << inputBuffers[ndx]->getType() << ", " << inputBuffers[ndx]->getId() << " ), "
1123 			<< "stage = " << shaderStage << " , binding = " << extraData[ndx].binding << "\n"
1124 			<< tcu::TestLog::EndMessage;
1125 
1126 		if (inputBuffers[ndx]->isImage())
1127 		{
1128 			gl.bindImageTexture(extraData[ndx].binding, inputBuffers[ndx]->getId(),
1129 								0, GL_FALSE, 0, GL_READ_ONLY, extraData[ndx].format);
1130 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindImageTexture()");
1131 		} else
1132 		{
1133 			gl.bindBufferBase(inputBuffers[ndx]->getType(), extraData[ndx].binding, inputBuffers[ndx]->getId());
1134 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferBase()");
1135 		}
1136 	}
1137 
1138 	de::MovePtr<glu::ShaderProgram> pipeline(
1139 			makeGraphicsPipeline(context, (ShaderStageFlags)(SHADER_STAGE_VERTEX_BIT | SHADER_STAGE_FRAGMENT_BIT | SHADER_STAGE_TESS_CONTROL_BIT | SHADER_STAGE_TESS_EVALUATION_BIT),
1140 								 &vshader, &fshader, DE_NULL, &tcshader, &teshader));
1141 	if (!pipeline->isOk())
1142 	{
1143 		return tcu::TestStatus::fail("tese graphics program build failed");
1144 	}
1145 
1146 	const deUint32							subgroupSize			= getSubgroupSize(context);
1147 	const deUint64							vertexBufferSize		= 2ull * maxWidth * sizeof(tcu::Vec4);
1148 	Buffer									vertexBuffer			(context, vertexBufferSize, GL_ARRAY_BUFFER);
1149 	unsigned								totalIterations			= 0u;
1150 	unsigned								failedIterations		= 0u;
1151 	Image									discardableImage		(context, maxWidth, 1u, format);
1152 
1153 	{
1154 		glw::GLvoid *			bufferPtr			= vertexBuffer.mapBufferPtr();
1155 		std::vector<tcu::Vec4>	data				(2u * maxWidth, tcu::Vec4(1.0f, 0.0f, 1.0f, 1.0f));
1156 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
1157 		float					leftHandPosition	= -1.0f;
1158 
1159 		for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
1160 		{
1161 			data[ndx][0] = leftHandPosition;
1162 			leftHandPosition += pixelSize;
1163 			data[ndx+1][0] = leftHandPosition;
1164 		}
1165 
1166 		deMemcpy(bufferPtr, &data[0], data.size() * sizeof(tcu::Vec4));
1167 		vertexBuffer.unmapBufferPtr();
1168 	}
1169 
1170 	Vao vao(context);
1171 	Fbo fbo(context);
1172 	fbo.bind2D(discardableImage);
1173 
1174 	gl.viewport(0, 0, maxWidth, 1u);
1175 	GLU_EXPECT_NO_ERROR(gl.getError(), "glViewport");
1176 
1177 	const deUint64				imageResultSize		= getFormatSizeInBytes(format) * maxWidth;
1178 	vector<glw::GLubyte>		imageBufferResult(imageResultSize);
1179 	const deUint64				vertexBufferOffset	= 0u;
1180 
1181 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
1182 	{
1183 		totalIterations++;
1184 
1185 		{
1186 			gl.clearColor(0.0f, 0.0f, 0.0f, 0.0f);
1187 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClearColor");
1188 			gl.clear(GL_COLOR_BUFFER_BIT);
1189 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClear");
1190 
1191 			gl.useProgram(pipeline->getProgram());
1192 			GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram");
1193 
1194 			gl.enableVertexAttribArray(0);
1195 			GLU_EXPECT_NO_ERROR(gl.getError(), "glEnableVertexAttribArray");
1196 
1197 			gl.bindBuffer(GL_ARRAY_BUFFER, vertexBuffer.getId());
1198 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBuffer");
1199 
1200 			gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, sizeof(tcu::Vec4), glu::BufferOffsetAsPointer(vertexBufferOffset));
1201 			GLU_EXPECT_NO_ERROR(gl.getError(), "glVertexAttribPointer");
1202 
1203 			gl.patchParameteri(GL_PATCH_VERTICES, 2u);
1204 			GLU_EXPECT_NO_ERROR(gl.getError(), "glPatchParameter(PATCH_VERTICES)");
1205 
1206 			gl.drawArrays(GL_PATCHES, 0, 2 * width);
1207 			GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawArrays");
1208 
1209 			gl.disableVertexAttribArray(0);
1210 
1211 			GLenum readFormat;
1212 			GLenum readType;
1213 			getFormatReadInfo(format, readFormat, readType);
1214 
1215 			gl.readPixels(0, 0, width, 1, readFormat, readType, (GLvoid*)&imageBufferResult[0]);
1216 			GLU_EXPECT_NO_ERROR(gl.getError(), "glReadPixels");
1217 		}
1218 
1219 		{
1220 			std::vector<const void*> datas;
1221 			datas.push_back(&imageBufferResult[0]);
1222 			if (!checkResult(datas, width/2u, subgroupSize))
1223 				failedIterations++;
1224 		}
1225 	}
1226 
1227 	if (0 < failedIterations)
1228 	{
1229 		log		<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
1230 				<< totalIterations << " values passed" << tcu::TestLog::EndMessage;
1231 		return tcu::TestStatus::fail("Failed!");
1232 	} else
1233 	{
1234 		log	<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
1235 				<<totalIterations << " values passed" << tcu::TestLog::EndMessage;
1236 	}
1237 
1238 	return tcu::TestStatus::pass("OK");
1239 }
1240 
check(std::vector<const void*> datas, deUint32 width, deUint32 ref)1241 bool glc::subgroups::check(std::vector<const void*> datas,
1242 	deUint32 width, deUint32 ref)
1243 {
1244 	const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
1245 
1246 	for (deUint32 n = 0; n < width; ++n)
1247 	{
1248 		if (data[n] != ref)
1249 		{
1250 			return false;
1251 		}
1252 	}
1253 
1254 	return true;
1255 }
1256 
checkCompute(std::vector<const void*> datas, const deUint32 numWorkgroups[3], const deUint32 localSize[3], deUint32 ref)1257 bool glc::subgroups::checkCompute(std::vector<const void*> datas,
1258 	const deUint32 numWorkgroups[3], const deUint32 localSize[3],
1259 	deUint32 ref)
1260 {
1261 	const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
1262 	const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
1263 	const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
1264 
1265 	return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
1266 }
1267 
1268 
makeGeometryFrameBufferTest( Context& context, Format format, SSBOData* extraData, deUint32 extraDataCount, bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))1269 tcu::TestStatus glc::subgroups::makeGeometryFrameBufferTest(
1270 	Context& context, Format format, SSBOData* extraData,
1271 	deUint32 extraDataCount,
1272 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
1273 {
1274 	tcu::TestLog& log	= context.getDeqpContext().getTestContext().getLog();
1275 	const glw::Functions& gl = context.getDeqpContext().getRenderContext().getFunctions();
1276 
1277 	const deUint32							maxWidth				= getMaxWidth();
1278 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
1279 
1280 	const GlslSource& vshader = context.getSourceCollection().get("vert");
1281 	const GlslSource& gshader = context.getSourceCollection().get("geometry");
1282 	const GlslSource& fshader = context.getSourceCollection().get("fragment");
1283 
1284 	for (deUint32 i = 0u; i < extraDataCount; i++)
1285 	{
1286 		if (extraData[i].isImage)
1287 		{
1288 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1289 			// haven't implemented init for images yet
1290 			DE_ASSERT(extraData[i].initializeType == subgroups::SSBOData::InitializeNone);
1291 		}
1292 		else
1293 		{
1294 			deUint64 size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
1295 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, GL_UNIFORM_BUFFER));
1296 
1297 			glw::GLvoid *ptr = inputBuffers[i]->getAsBuffer()->mapBufferPtr();
1298 			initializeMemory(context.getDeqpContext(), ptr, extraData[i]);
1299 			inputBuffers[i]->getAsBuffer()->unmapBufferPtr();
1300 		}
1301 	}
1302 
1303 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1304 	{
1305 		log << tcu::TestLog::Message
1306 			<< "binding inputBuffers[" << ndx << "](" << inputBuffers[ndx]->getType() << ", " << inputBuffers[ndx]->getId() << " ), "
1307 			<< "GEOMETRY, binding = " << extraData[ndx].binding << "\n"
1308 			<< tcu::TestLog::EndMessage;
1309 
1310 		if (inputBuffers[ndx]->isImage())
1311 		{
1312 			gl.bindImageTexture(extraData[ndx].binding, inputBuffers[ndx]->getId(),
1313 								0, GL_FALSE, 0, GL_READ_ONLY, extraData[ndx].format);
1314 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindImageTexture()");
1315 		} else
1316 		{
1317 			gl.bindBufferBase(inputBuffers[ndx]->getType(), extraData[ndx].binding, inputBuffers[ndx]->getId());
1318 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferBase()");
1319 		}
1320 	}
1321 
1322 	de::MovePtr<glu::ShaderProgram> pipeline(
1323 			makeGraphicsPipeline(context, (ShaderStageFlags)(SHADER_STAGE_VERTEX_BIT | SHADER_STAGE_FRAGMENT_BIT | SHADER_STAGE_GEOMETRY_BIT),
1324 								 &vshader, &fshader, &gshader, DE_NULL, DE_NULL));
1325 	if (!pipeline->isOk())
1326 	{
1327 		return tcu::TestStatus::fail("geom graphics program build failed");
1328 	}
1329 
1330 	const deUint32							subgroupSize			= getSubgroupSize(context);
1331 	const deUint64							vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
1332 	Buffer									vertexBuffer			(context, vertexBufferSize, GL_ARRAY_BUFFER);
1333 	unsigned								totalIterations			= 0u;
1334 	unsigned								failedIterations		= 0u;
1335 	Image									discardableImage		(context, maxWidth, 1u, format);
1336 
1337 	{
1338 		glw::GLvoid *			bufferPtr			= vertexBuffer.mapBufferPtr();
1339 		std::vector<tcu::Vec4>	data				(maxWidth, tcu::Vec4(1.0f, 0.5f, 1.0f, 1.0f));
1340 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
1341 		float					leftHandPosition	= -1.0f;
1342 
1343 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
1344 		{
1345 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
1346 			leftHandPosition += pixelSize;
1347 		}
1348 
1349 		deMemcpy(bufferPtr, &data[0], maxWidth * sizeof(tcu::Vec4));
1350 		vertexBuffer.unmapBufferPtr();
1351 	}
1352 
1353 	Vao vao(context);
1354 	Fbo fbo(context);
1355 	fbo.bind2D(discardableImage);
1356 
1357 	gl.viewport(0, 0, maxWidth, 1u);
1358 	GLU_EXPECT_NO_ERROR(gl.getError(), "glViewport");
1359 
1360 	const deUint64				imageResultSize		= getFormatSizeInBytes(format) * maxWidth;
1361 	vector<glw::GLubyte>		imageBufferResult(imageResultSize);
1362 	const deUint64				vertexBufferOffset	= 0u;
1363 
1364 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
1365 	{
1366 		totalIterations++;
1367 
1368 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
1369 		{
1370 			if (inputBuffers[ndx]->isImage())
1371 			{
1372 				DE_ASSERT(extraData[ndx].initializeType == subgroups::SSBOData::InitializeNone);
1373 			} else
1374 			{
1375 				glw::GLvoid *ptr = inputBuffers[ndx]->getAsBuffer()->mapBufferPtr();
1376 				initializeMemory(context.getDeqpContext(), ptr, extraData[ndx]);
1377 				inputBuffers[ndx]->getAsBuffer()->unmapBufferPtr();
1378 			}
1379 		}
1380 
1381 		{
1382 			gl.clearColor(0.0f, 0.0f, 0.0f, 0.0f);
1383 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClearColor");
1384 			gl.clear(GL_COLOR_BUFFER_BIT);
1385 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClear");
1386 
1387 			gl.useProgram(pipeline->getProgram());
1388 			GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram");
1389 
1390 			gl.enableVertexAttribArray(0);
1391 			GLU_EXPECT_NO_ERROR(gl.getError(), "glEnableVertexAttribArray");
1392 
1393 			gl.bindBuffer(GL_ARRAY_BUFFER, vertexBuffer.getId());
1394 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBuffer");
1395 
1396 			gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, sizeof(tcu::Vec4), glu::BufferOffsetAsPointer(vertexBufferOffset));
1397 			GLU_EXPECT_NO_ERROR(gl.getError(), "glVertexAttribPointer");
1398 
1399 			gl.drawArrays(GL_POINTS, 0, width);
1400 			GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawArrays");
1401 
1402 			gl.disableVertexAttribArray(0);
1403 			GLU_EXPECT_NO_ERROR(gl.getError(), "glDisableVertexAttribArray");
1404 
1405 			GLenum readFormat;
1406 			GLenum readType;
1407 			getFormatReadInfo(format, readFormat, readType);
1408 
1409 			gl.readPixels(0, 0, width, 1, readFormat, readType, (GLvoid*)&imageBufferResult[0]);
1410 			GLU_EXPECT_NO_ERROR(gl.getError(), "glReadPixels");
1411 		}
1412 
1413 		{
1414 			std::vector<const void*> datas;
1415 			datas.push_back(&imageBufferResult[0]);
1416 			if (!checkResult(datas, width, subgroupSize))
1417 				failedIterations++;
1418 		}
1419 	}
1420 
1421 	if (0 < failedIterations)
1422 	{
1423 		log	<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
1424 				<< totalIterations << " values passed" << tcu::TestLog::EndMessage;
1425 		return tcu::TestStatus::fail("Failed!");
1426 	} else
1427 	{
1428 		log	<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
1429 				<<totalIterations << " values passed" << tcu::TestLog::EndMessage;
1430 	}
1431 
1432 	return tcu::TestStatus::pass("OK");
1433 }
1434 
allStages( Context& context, Format format, SSBOData* extraDatas, deUint32 extraDatasCount, bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize), const ShaderStageFlags shaderStageTested)1435 tcu::TestStatus glc::subgroups::allStages(
1436 	Context& context, Format format, SSBOData* extraDatas,
1437 	deUint32 extraDatasCount,
1438 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1439 	const ShaderStageFlags shaderStageTested)
1440 {
1441 	const deUint32					maxWidth			= getMaxWidth();
1442 	vector<ShaderStageFlags>		stagesVector;
1443 	ShaderStageFlags				shaderStageRequired	= (ShaderStageFlags)0ull;
1444 	tcu::TestLog&					log					= context.getDeqpContext().getTestContext().getLog();
1445 	const glw::Functions&			gl					= context.getDeqpContext().getRenderContext().getFunctions();
1446 
1447 	if (shaderStageTested & SHADER_STAGE_VERTEX_BIT)
1448 	{
1449 		stagesVector.push_back(SHADER_STAGE_VERTEX_BIT);
1450 	}
1451 	if (shaderStageTested & SHADER_STAGE_TESS_CONTROL_BIT)
1452 	{
1453 		stagesVector.push_back(SHADER_STAGE_TESS_CONTROL_BIT);
1454 		shaderStageRequired = (ShaderStageFlags)((deUint32)shaderStageRequired | ((deUint32)(shaderStageTested & SHADER_STAGE_TESS_EVALUATION_BIT) ? 0u : (deUint32)SHADER_STAGE_TESS_EVALUATION_BIT));
1455 		shaderStageRequired = (ShaderStageFlags)((deUint32)shaderStageRequired | ((deUint32)(shaderStageTested & SHADER_STAGE_VERTEX_BIT) ? 0u : (deUint32)SHADER_STAGE_VERTEX_BIT));
1456 	}
1457 	if (shaderStageTested & SHADER_STAGE_TESS_EVALUATION_BIT)
1458 	{
1459 		stagesVector.push_back(SHADER_STAGE_TESS_EVALUATION_BIT);
1460 		shaderStageRequired = (ShaderStageFlags)((deUint32)shaderStageRequired | ((deUint32)(shaderStageTested & SHADER_STAGE_VERTEX_BIT) ? 0u : (deUint32)SHADER_STAGE_VERTEX_BIT));
1461 		shaderStageRequired = (ShaderStageFlags)((deUint32)shaderStageRequired | ((deUint32)(shaderStageTested & SHADER_STAGE_TESS_CONTROL_BIT) ? 0u : (deUint32)SHADER_STAGE_TESS_CONTROL_BIT));
1462 	}
1463 	if (shaderStageTested & SHADER_STAGE_GEOMETRY_BIT)
1464 	{
1465 		stagesVector.push_back(SHADER_STAGE_GEOMETRY_BIT);
1466 		const ShaderStageFlags required = SHADER_STAGE_VERTEX_BIT;
1467 		shaderStageRequired = (ShaderStageFlags)((deUint32)shaderStageRequired | ((deUint32)(shaderStageTested & required) ? 0u : (deUint32)required));
1468 	}
1469 	if (shaderStageTested & SHADER_STAGE_FRAGMENT_BIT)
1470 	{
1471 		const ShaderStageFlags required = SHADER_STAGE_VERTEX_BIT;
1472 		shaderStageRequired = (ShaderStageFlags)((deUint32)shaderStageRequired | ((deUint32)(shaderStageTested & required) ? 0u : (deUint32)required));
1473 	}
1474 
1475 	const deUint32	stagesCount	= static_cast<deUint32>(stagesVector.size());
1476 	const string	vert		= (shaderStageRequired & SHADER_STAGE_VERTEX_BIT)			? "vert_noSubgroup"		: "vert";
1477 	const string	tesc		= (shaderStageRequired & SHADER_STAGE_TESS_CONTROL_BIT)		? "tesc_noSubgroup"		: "tesc";
1478 	const string	tese		= (shaderStageRequired & SHADER_STAGE_TESS_EVALUATION_BIT)	? "tese_noSubgroup"		: "tese";
1479 
1480 	shaderStageRequired = (ShaderStageFlags)(shaderStageTested | shaderStageRequired);
1481 
1482 	const GlslSource *vshader = &context.getSourceCollection().get(vert);
1483 	const GlslSource *fshader = DE_NULL;
1484 	const GlslSource *gshader = DE_NULL;
1485 	const GlslSource *tcshader = DE_NULL;
1486 	const GlslSource *teshader = DE_NULL;
1487 
1488 	if (shaderStageRequired & SHADER_STAGE_TESS_CONTROL_BIT)
1489 	{
1490 		tcshader = &context.getSourceCollection().get(tesc);
1491 		teshader = &context.getSourceCollection().get(tese);
1492 	}
1493 	if (shaderStageRequired & SHADER_STAGE_GEOMETRY_BIT)
1494 	{
1495 		if (shaderStageRequired & SHADER_STAGE_TESS_EVALUATION_BIT)
1496 		{
1497 			// tessellation shaders output line primitives
1498 			gshader = &context.getSourceCollection().get("geometry_lines");
1499 		}
1500 		else
1501 		{
1502 			// otherwise points are processed by geometry shader
1503 			gshader = &context.getSourceCollection().get("geometry_points");
1504 		}
1505 	}
1506 	if (shaderStageRequired & SHADER_STAGE_FRAGMENT_BIT)
1507 	{
1508 		fshader = &context.getSourceCollection().get("fragment");
1509 	}
1510 
1511 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
1512 
1513 	// The implicit result SSBO we use to store our outputs from the shader
1514 	for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
1515 	{
1516 		const deUint64 shaderSize = (stagesVector[ndx] == SHADER_STAGE_TESS_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
1517 		const deUint64 size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
1518 		inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
1519 
1520 		log << tcu::TestLog::Message
1521 			<< "binding inputBuffers[" << ndx << "](" << inputBuffers[ndx]->getType() << ", "
1522 			<< inputBuffers[ndx]->getId() << ", " << size << "), "
1523 			<< "inputstage[" << ndx << "] = " << stagesVector[ndx] << " binding = " << getResultBinding(stagesVector[ndx])
1524 			<< tcu::TestLog::EndMessage;
1525 
1526 		gl.bindBufferBase(inputBuffers[ndx]->getType(), getResultBinding(stagesVector[ndx]), inputBuffers[ndx]->getId());
1527 		GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferBase(ndx, inputBuffers[ndx])");
1528 	}
1529 
1530 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
1531 	{
1532 		const deUint32 datasNdx = ndx - stagesCount;
1533 		if (extraDatas[datasNdx].isImage)
1534 		{
1535 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
1536 
1537 			// haven't implemented init for images yet
1538 			DE_ASSERT(extraDatas[datasNdx].initializeType == subgroups::SSBOData::InitializeNone);
1539 		}
1540 		else
1541 		{
1542 			const deUint64 size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
1543 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
1544 
1545 			glw::GLvoid *ptr = inputBuffers[ndx]->getAsBuffer()->mapBufferPtr();
1546 			initializeMemory(context.getDeqpContext(), ptr, extraDatas[datasNdx]);
1547 			inputBuffers[ndx]->getAsBuffer()->unmapBufferPtr();
1548 		}
1549 
1550 		log << tcu::TestLog::Message
1551 			<< "binding inputBuffers[" << ndx << "](" << inputBuffers[ndx]->getType() << ", "
1552 			<< inputBuffers[ndx]->getId() << ", " << extraDatas[datasNdx].numElements << " els), "
1553 			<< "extrastage[" << datasNdx << "] = " << extraDatas[datasNdx].stages << " binding = " << extraDatas[datasNdx].binding
1554 			<< tcu::TestLog::EndMessage;
1555 
1556 		if (inputBuffers[ndx]->isImage())
1557 		{
1558 			gl.bindImageTexture(extraDatas[datasNdx].binding, inputBuffers[ndx]->getId(),
1559 								0, GL_FALSE, 0, GL_READ_WRITE, extraDatas[datasNdx].format);
1560 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindImageTexture(extraDatas[datasNdx])");
1561 		} else
1562 		{
1563 			gl.bindBufferBase(inputBuffers[ndx]->getType(), extraDatas[datasNdx].binding, inputBuffers[ndx]->getId());
1564 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferBase(extraDatas[datasNdx])");
1565 		}
1566 	}
1567 
1568 	de::MovePtr<glu::ShaderProgram> pipeline(
1569 			makeGraphicsPipeline(context, shaderStageRequired, vshader, fshader, gshader, tcshader, teshader));
1570 
1571 	if (!pipeline->isOk())
1572 	{
1573 		return tcu::TestStatus::fail("allstages graphics program build failed");
1574 	}
1575 
1576 	{
1577 		const deUint32					subgroupSize			= getSubgroupSize(context);
1578 		unsigned						totalIterations			= 0u;
1579 		unsigned						failedIterations		= 0u;
1580 		Image							resultImage				(context, maxWidth, 1, format);
1581 		const deUint64					imageResultSize			= getFormatSizeInBytes(format) * maxWidth;
1582 		vector<glw::GLubyte>			imageBufferResult(imageResultSize);
1583 
1584 		Vao vao(context);
1585 		Fbo fbo(context);
1586 		fbo.bind2D(resultImage);
1587 
1588 		gl.viewport(0, 0, maxWidth, 1u);
1589 		GLU_EXPECT_NO_ERROR(gl.getError(), "viewport");
1590 
1591 		for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
1592 		{
1593 			for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
1594 			{
1595 				// re-init the data
1596 				if (extraDatas[ndx - stagesCount].isImage)
1597 				{
1598 					// haven't implemented init for images yet
1599 					DE_ASSERT(extraDatas[ndx - stagesCount].initializeType == subgroups::SSBOData::InitializeNone);
1600 				} else
1601 				{
1602 					glw::GLvoid *ptr = inputBuffers[ndx]->getAsBuffer()->mapBufferPtr();
1603 					initializeMemory(context.getDeqpContext(), ptr, extraDatas[ndx - stagesCount]);
1604 					inputBuffers[ndx]->getAsBuffer()->unmapBufferPtr();
1605 				}
1606 			}
1607 
1608 			totalIterations++;
1609 
1610 			gl.clearColor(0.0f, 0.0f, 0.0f, 0.0f);
1611 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClearColor");
1612 			gl.clear(GL_COLOR_BUFFER_BIT);
1613 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClear");
1614 
1615 			gl.useProgram(pipeline->getProgram());
1616 			GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram");
1617 
1618 			glw::GLenum drawType;
1619 			if (shaderStageRequired & SHADER_STAGE_TESS_CONTROL_BIT)
1620 			{
1621 				drawType = GL_PATCHES;
1622 				gl.patchParameteri(GL_PATCH_VERTICES, 1u);
1623 				GLU_EXPECT_NO_ERROR(gl.getError(), "glPatchParameter(PATCH_VERTICES)");
1624 			} else
1625 			{
1626 				drawType = GL_POINTS;
1627 			}
1628 
1629 			gl.drawArrays(drawType, 0, width);
1630 			GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawArrays");
1631 
1632 			GLenum readFormat;
1633 			GLenum readType;
1634 			getFormatReadInfo(format, readFormat, readType);
1635 
1636 			gl.readPixels(0, 0, width, 1, readFormat, readType, (GLvoid*)&imageBufferResult[0]);
1637 			GLU_EXPECT_NO_ERROR(gl.getError(), "glReadPixels");
1638 
1639 			for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
1640 			{
1641 				std::vector<const void*> datas;
1642 				std::vector<Buffer *> buffersToUnmap;
1643 
1644 				if (!inputBuffers[ndx]->isImage())
1645 				{
1646 					glw::GLvoid * resultData = inputBuffers[ndx]->getAsBuffer()->mapBufferPtr();
1647 					buffersToUnmap.push_back(inputBuffers[ndx]->getAsBuffer());
1648 					// we always have our result data first
1649 					datas.push_back(resultData);
1650 				}
1651 
1652 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
1653 				{
1654 					const deUint32 datasNdx = index - stagesCount;
1655 					if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
1656 					{
1657 						glw::GLvoid * resultData = inputBuffers[index]->getAsBuffer()->mapBufferPtr();
1658 						buffersToUnmap.push_back(inputBuffers[index]->getAsBuffer());
1659 						datas.push_back(resultData);
1660 					}
1661 				}
1662 
1663 				if (!checkResult(datas, (stagesVector[ndx] == SHADER_STAGE_TESS_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
1664 					failedIterations++;
1665 
1666 				while( !buffersToUnmap.empty() )
1667 				{
1668 					Buffer * buf = buffersToUnmap.back();
1669 					buf->unmapBufferPtr();
1670 					buffersToUnmap.pop_back();
1671 				}
1672 			}
1673 			if (shaderStageTested & SHADER_STAGE_FRAGMENT_BIT)
1674 			{
1675 				std::vector<const void*> datas;
1676 				std::vector<Buffer *> buffersToUnmap;
1677 
1678 				// we always have our result data first
1679 				datas.push_back(&imageBufferResult[0]);
1680 
1681 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
1682 				{
1683 					const deUint32 datasNdx = index - stagesCount;
1684 					if (SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
1685 					{
1686 						glw::GLvoid * resultData = inputBuffers[index]->getAsBuffer()->mapBufferPtr();
1687 						buffersToUnmap.push_back(inputBuffers[index]->getAsBuffer());
1688 						// we always have our result data first
1689 						datas.push_back(resultData);
1690 					}
1691 				}
1692 
1693 				if (!checkResult(datas, width, subgroupSize))
1694 					failedIterations++;
1695 
1696 				while( !buffersToUnmap.empty() )
1697 				{
1698 					Buffer * buf = buffersToUnmap.back();
1699 					buf->unmapBufferPtr();
1700 					buffersToUnmap.pop_back();
1701 				}
1702 			}
1703 
1704 		}
1705 
1706 		if (0 < failedIterations)
1707 		{
1708 			log		<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
1709 					<< totalIterations << " values passed" << tcu::TestLog::EndMessage;
1710 			return tcu::TestStatus::fail("Failed!");
1711 		} else
1712 		{
1713 			log		<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
1714 					<< totalIterations << " values passed" << tcu::TestLog::EndMessage;
1715 		}
1716 	}
1717 	return tcu::TestStatus::pass("OK");
1718 }
1719 
makeVertexFrameBufferTest(Context& context, Format format, SSBOData* extraData, deUint32 extraDataCount, bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))1720 tcu::TestStatus glc::subgroups::makeVertexFrameBufferTest(Context& context, Format format,
1721 	SSBOData* extraData, deUint32 extraDataCount,
1722 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
1723 {
1724 	tcu::TestLog& log	= context.getDeqpContext().getTestContext().getLog();
1725 	const glw::Functions& gl = context.getDeqpContext().getRenderContext().getFunctions();
1726 
1727 	const deUint32							maxWidth				= getMaxWidth();
1728 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
1729 
1730 	const GlslSource& vshader = context.getSourceCollection().get("vert");
1731 	const GlslSource& fshader = context.getSourceCollection().get("fragment");
1732 
1733 	for (deUint32 i = 0u; i < extraDataCount; i++)
1734 	{
1735 		if (extraData[i].isImage)
1736 		{
1737 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1738 
1739 			// haven't implemented init for images yet
1740 			DE_ASSERT(extraData[i].initializeType == subgroups::SSBOData::InitializeNone);
1741 		}
1742 		else
1743 		{
1744 			deUint64 size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
1745 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, GL_UNIFORM_BUFFER));
1746 
1747 			glw::GLvoid *ptr = inputBuffers[i]->getAsBuffer()->mapBufferPtr();
1748 			initializeMemory(context.getDeqpContext(), ptr, extraData[i]);
1749 			inputBuffers[i]->getAsBuffer()->unmapBufferPtr();
1750 		}
1751 	}
1752 
1753 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1754 	{
1755 		log << tcu::TestLog::Message
1756 			<< "binding inputBuffers[" << ndx << "](" << inputBuffers[ndx]->getType() << ", " << inputBuffers[ndx]->getId() << " ), "
1757 			<< "VERTEX, binding = " << extraData[ndx].binding << "\n"
1758 			<< tcu::TestLog::EndMessage;
1759 
1760 		if (inputBuffers[ndx]->isImage())
1761 		{
1762 			gl.bindImageTexture(extraData[ndx].binding, inputBuffers[ndx]->getId(),
1763 								0, GL_FALSE, 0, GL_READ_ONLY, extraData[ndx].format);
1764 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindImageTexture()");
1765 		} else
1766 		{
1767 			gl.bindBufferBase(inputBuffers[ndx]->getType(), extraData[ndx].binding, inputBuffers[ndx]->getId());
1768 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferBase()");
1769 		}
1770 	}
1771 
1772 	de::MovePtr<glu::ShaderProgram> pipeline(
1773 			makeGraphicsPipeline(context, (ShaderStageFlags)(SHADER_STAGE_VERTEX_BIT | SHADER_STAGE_FRAGMENT_BIT),
1774 								 &vshader, &fshader, DE_NULL, DE_NULL, DE_NULL));
1775 
1776 	if (!pipeline->isOk())
1777 	{
1778 		return tcu::TestStatus::fail("vert graphics program build failed");
1779 	}
1780 
1781 	const deUint32							subgroupSize			= getSubgroupSize(context);
1782 
1783 	const deUint64							vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
1784 	Buffer									vertexBuffer			(context, vertexBufferSize, GL_ARRAY_BUFFER);
1785 
1786 	unsigned								totalIterations			= 0u;
1787 	unsigned								failedIterations		= 0u;
1788 
1789 	Image									discardableImage		(context, maxWidth, 1u, format);
1790 
1791 	{
1792 		glw::GLvoid *			bufferPtr			= vertexBuffer.mapBufferPtr();
1793 		std::vector<tcu::Vec4>	data				(maxWidth, tcu::Vec4(1.0f, 0.5f, 1.0f, 1.0f));
1794 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
1795 		float					leftHandPosition	= -1.0f;
1796 
1797 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
1798 		{
1799 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
1800 			leftHandPosition += pixelSize;
1801 		}
1802 
1803 		deMemcpy(bufferPtr, &data[0], maxWidth * sizeof(tcu::Vec4));
1804 		vertexBuffer.unmapBufferPtr();
1805 	}
1806 
1807 	Vao vao(context);
1808 	Fbo fbo(context);
1809 	fbo.bind2D(discardableImage);
1810 
1811 	gl.viewport(0, 0, maxWidth, 1u);
1812 	GLU_EXPECT_NO_ERROR(gl.getError(), "glViewport");
1813 
1814 	const deUint64				imageResultSize		= getFormatSizeInBytes(format) * maxWidth;
1815 	vector<glw::GLubyte>		imageBufferResult(imageResultSize);
1816 	const deUint64				vertexBufferOffset	= 0u;
1817 
1818 	for (deUint32 width = 1u; width < maxWidth; width = getNextWidth(width))
1819 	{
1820 		totalIterations++;
1821 
1822 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
1823 		{
1824 			if (inputBuffers[ndx]->isImage())
1825 			{
1826 				DE_ASSERT(extraData[ndx].initializeType == subgroups::SSBOData::InitializeNone);
1827 			} else
1828 			{
1829 				glw::GLvoid *ptr = inputBuffers[ndx]->getAsBuffer()->mapBufferPtr();
1830 				initializeMemory(context.getDeqpContext(), ptr, extraData[ndx]);
1831 				inputBuffers[ndx]->getAsBuffer()->unmapBufferPtr();
1832 			}
1833 		}
1834 
1835 		{
1836 			gl.clearColor(0.0f, 0.0f, 0.0f, 0.0f);
1837 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClearColor");
1838 			gl.clear(GL_COLOR_BUFFER_BIT);
1839 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClear");
1840 
1841 			gl.useProgram(pipeline->getProgram());
1842 			GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram");
1843 
1844 			gl.enableVertexAttribArray(0);
1845 			GLU_EXPECT_NO_ERROR(gl.getError(), "glEnableVertexAttribArray");
1846 
1847 			gl.bindBuffer(GL_ARRAY_BUFFER, vertexBuffer.getId());
1848 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBuffer");
1849 
1850 			gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, sizeof(tcu::Vec4), glu::BufferOffsetAsPointer(vertexBufferOffset));
1851 			GLU_EXPECT_NO_ERROR(gl.getError(), "glVertexAttribPointer");
1852 
1853 			gl.drawArrays(GL_POINTS, 0, width);
1854 			GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawArrays");
1855 
1856 			gl.disableVertexAttribArray(0);
1857 			GLU_EXPECT_NO_ERROR(gl.getError(), "glDisableVertexAttribArray");
1858 
1859 			GLenum readFormat;
1860 			GLenum readType;
1861 			getFormatReadInfo(format, readFormat, readType);
1862 
1863 			gl.readPixels(0, 0, width, 1, readFormat, readType, (GLvoid*)&imageBufferResult[0]);
1864 			GLU_EXPECT_NO_ERROR(gl.getError(), "glReadPixels");
1865 		}
1866 
1867 		{
1868 			std::vector<const void*> datas;
1869 			datas.push_back(&imageBufferResult[0]);
1870 			if (!checkResult(datas, width, subgroupSize))
1871 				failedIterations++;
1872 		}
1873 	}
1874 
1875 	if (0 < failedIterations)
1876 	{
1877 		log	<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
1878 			<< totalIterations << " values passed" << tcu::TestLog::EndMessage;
1879 		return tcu::TestStatus::fail("Failed!");
1880 	} else
1881 	{
1882 		log	<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
1883 				<<totalIterations << " values passed" << tcu::TestLog::EndMessage;
1884 	}
1885 
1886 	return tcu::TestStatus::pass("OK");
1887 }
1888 
1889 
makeFragmentFrameBufferTest(Context& context, Format format, SSBOData* extraDatas, deUint32 extraDatasCount, bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 height, deUint32 subgroupSize))1890 tcu::TestStatus glc::subgroups::makeFragmentFrameBufferTest	(Context& context, Format format, SSBOData* extraDatas,
1891 	deUint32 extraDatasCount,
1892 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
1893 						deUint32 height, deUint32 subgroupSize))
1894 {
1895 	tcu::TestLog& log	= context.getDeqpContext().getTestContext().getLog();
1896 	const glw::Functions& gl = context.getDeqpContext().getRenderContext().getFunctions();
1897 
1898 	const GlslSource& vshader = context.getSourceCollection().get("vert");
1899 	const GlslSource& fshader = context.getSourceCollection().get("fragment");
1900 
1901 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
1902 
1903 	for (deUint32 i = 0; i < extraDatasCount; i++)
1904 	{
1905 		if (extraDatas[i].isImage)
1906 		{
1907 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
1908 										static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
1909 
1910 			// haven't implemented init for images yet
1911 			DE_ASSERT(extraDatas[i].initializeType == subgroups::SSBOData::InitializeNone);
1912 		}
1913 		else
1914 		{
1915 			deUint64 size =
1916 				getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
1917 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, GL_UNIFORM_BUFFER));
1918 
1919 			glw::GLvoid *ptr = inputBuffers[i]->getAsBuffer()->mapBufferPtr();
1920 			initializeMemory(context.getDeqpContext(), ptr, extraDatas[i]);
1921 			inputBuffers[i]->getAsBuffer()->unmapBufferPtr();
1922 		}
1923 	}
1924 
1925 	for (deUint32 i = 0; i < extraDatasCount; i++)
1926 	{
1927 		log << tcu::TestLog::Message
1928 			<< "binding inputBuffers[" << i << "](" << inputBuffers[i]->getType() << ", " << inputBuffers[i]->getId() << " ), "
1929 			<< "FRAGMENT, binding = " << extraDatas[i].binding << "\n"
1930 			<< tcu::TestLog::EndMessage;
1931 
1932 		if (inputBuffers[i]->isImage())
1933 		{
1934 			gl.bindImageTexture(extraDatas[i].binding, inputBuffers[i]->getId(),
1935 								0, GL_FALSE, 0, GL_READ_ONLY, extraDatas[i].format);
1936 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindImageTexture()");
1937 		} else
1938 		{
1939 			gl.bindBufferBase(inputBuffers[i]->getType(), extraDatas[i].binding, inputBuffers[i]->getId());
1940 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferBase()");
1941 		}
1942 	}
1943 
1944 	de::MovePtr<glu::ShaderProgram> pipeline(
1945 			makeGraphicsPipeline(context, (ShaderStageFlags)(SHADER_STAGE_VERTEX_BIT | SHADER_STAGE_FRAGMENT_BIT),
1946 								 &vshader, &fshader, DE_NULL, DE_NULL, DE_NULL));
1947 
1948 	if (!pipeline->isOk())
1949 	{
1950 		return tcu::TestStatus::fail("frag graphics program build failed");
1951 	}
1952 
1953 	const deUint32 subgroupSize = getSubgroupSize(context);
1954 
1955 	unsigned totalIterations = 0;
1956 	unsigned failedIterations = 0;
1957 
1958 	Vao vao(context);
1959 	Fbo fbo(context);
1960 
1961 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
1962 	{
1963 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
1964 		{
1965 			totalIterations++;
1966 
1967 			// re-init the data
1968 			for (deUint32 i = 0; i < extraDatasCount; i++)
1969 			{
1970 				if (inputBuffers[i]->isImage())
1971 				{
1972 					DE_ASSERT(extraDatas[i].initializeType == subgroups::SSBOData::InitializeNone);
1973 				} else
1974 				{
1975 					glw::GLvoid *ptr = inputBuffers[i]->getAsBuffer()->mapBufferPtr();
1976 					initializeMemory(context.getDeqpContext(), ptr, extraDatas[i]);
1977 					inputBuffers[i]->getAsBuffer()->unmapBufferPtr();
1978 				}
1979 			}
1980 
1981 			deUint64 formatSize = getFormatSizeInBytes(format);
1982 			const deUint64 resultImageSizeInBytes =
1983 				width * height * formatSize;
1984 
1985 			Image resultImage(context, width, height, format);
1986 
1987 			vector<glw::GLubyte>  resultBuffer(resultImageSizeInBytes);
1988 
1989 			fbo.bind2D(resultImage);
1990 
1991 			gl.viewport(0, 0, width, height);
1992 			GLU_EXPECT_NO_ERROR(gl.getError(), "glViewport");
1993 
1994 			gl.clearColor(0.0f, 0.0f, 0.0f, 0.0f);
1995 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClearColor");
1996 			gl.clear(GL_COLOR_BUFFER_BIT);
1997 			GLU_EXPECT_NO_ERROR(gl.getError(), "glClear");
1998 
1999 			gl.useProgram(pipeline->getProgram());
2000 			GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram");
2001 
2002 			gl.drawArrays(GL_TRIANGLE_STRIP, 0, 4);
2003 			GLU_EXPECT_NO_ERROR(gl.getError(), "glDrawArrays");
2004 
2005 			GLenum readFormat;
2006 			GLenum readType;
2007 			getFormatReadInfo(format, readFormat, readType);
2008 
2009 			gl.readPixels(0, 0, width, height, readFormat, readType, (GLvoid*)&resultBuffer[0]);
2010 			GLU_EXPECT_NO_ERROR(gl.getError(), "glReadPixels");
2011 
2012 			std::vector<const void*> datas;
2013 			{
2014 				// we always have our result data first
2015 				datas.push_back(&resultBuffer[0]);
2016 			}
2017 
2018 			if (!checkResult(datas, width, height, subgroupSize))
2019 			{
2020 				failedIterations++;
2021 			}
2022 		}
2023 	}
2024 
2025 	if (0 < failedIterations)
2026 	{
2027 		log		<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
2028 				<< totalIterations << " values passed" << tcu::TestLog::EndMessage;
2029 		return tcu::TestStatus::fail("Failed!");
2030 	} else
2031 	{
2032 		log		<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
2033 				<<totalIterations << " values passed" << tcu::TestLog::EndMessage;
2034 	}
2035 	return tcu::TestStatus::pass("OK");
2036 }
2037 
makeComputeTest( Context& context, Format format, SSBOData* inputs, deUint32 inputsCount, bool (*checkResult)(std::vector<const void*> datas, const deUint32 numWorkgroups[3], const deUint32 localSize[3], deUint32 subgroupSize))2038 tcu::TestStatus glc::subgroups::makeComputeTest(
2039 	Context& context, Format format, SSBOData* inputs, deUint32 inputsCount,
2040 	bool (*checkResult)(std::vector<const void*> datas,
2041 						const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2042 						deUint32 subgroupSize))
2043 {
2044 	const glw::Functions& gl = context.getDeqpContext().getRenderContext().getFunctions();
2045 	deUint64 elementSize = getFormatSizeInBytes(format);
2046 
2047 	const deUint64 resultBufferSize = maxSupportedSubgroupSize() *
2048 										  maxSupportedSubgroupSize() *
2049 										  maxSupportedSubgroupSize();
2050 	const deUint64 resultBufferSizeInBytes = resultBufferSize * elementSize;
2051 
2052 	Buffer resultBuffer(
2053 		context, resultBufferSizeInBytes);
2054 
2055 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
2056 
2057 	for (deUint32 i = 0; i < inputsCount; i++)
2058 	{
2059 		if (inputs[i].isImage)
2060 		{
2061 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2062 										static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
2063 			// haven't implemented init for images yet
2064 			DE_ASSERT(inputs[i].initializeType == subgroups::SSBOData::InitializeNone);
2065 		}
2066 		else
2067 		{
2068 			deUint64 size =
2069 				getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
2070 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2071 
2072 			glw::GLvoid *ptr = inputBuffers[i]->getAsBuffer()->mapBufferPtr();
2073 			initializeMemory(context.getDeqpContext(), ptr, inputs[i]);
2074 			inputBuffers[i]->getAsBuffer()->unmapBufferPtr();
2075 		}
2076 
2077 	}
2078 
2079 	tcu::TestLog& log	= context.getDeqpContext().getTestContext().getLog();
2080 	log << tcu::TestLog::Message
2081 		<< "binding resultbuffer(type=" << resultBuffer.getType()
2082 		<< ", id=" << resultBuffer.getId() << ", binding=0), COMPUTE"
2083 		<< tcu::TestLog::EndMessage;
2084 
2085 	gl.bindBufferBase(resultBuffer.getType(), 0, resultBuffer.getId());
2086 	GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferBase(0, resultBuffer)");
2087 
2088 	for (deUint32 i = 0; i < inputsCount; i++)
2089 	{
2090 		log << tcu::TestLog::Message
2091 			<< "binding inputBuffers[" << i << "](type=" << inputBuffers[i]->getType()
2092 			<< ", id=" << inputBuffers[i]->getId() << ", binding="
2093 			<< inputs[i].binding << "), 1, COMPUTE"
2094 			<< tcu::TestLog::EndMessage;
2095 
2096 		if (inputBuffers[i]->isImage())
2097 		{
2098 			gl.bindImageTexture(inputs[i].binding, inputBuffers[i]->getId(),
2099 								0, GL_FALSE, 0, GL_READ_WRITE, inputs[i].format);
2100 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindImageTexture(inputBuffer[i]");
2101 		} else
2102 		{
2103 			gl.bindBufferBase(inputBuffers[i]->getType(), inputs[i].binding, inputBuffers[i]->getId());
2104 			GLU_EXPECT_NO_ERROR(gl.getError(), "glBindBufferBase(inputBuffer[i])");
2105 		}
2106 	}
2107 
2108 	const GlslSource &cshader = context.getSourceCollection().get("comp");
2109 
2110 	unsigned totalIterations = 0;
2111 	unsigned failedIterations = 0;
2112 
2113 	const deUint32 subgroupSize = getSubgroupSize(context);
2114 
2115 	const deUint32 numWorkgroups[3] = {4, 2, 2};
2116 
2117 	const deUint32 localSizesToTestCount = 15;
2118 	deUint32 localSizesToTest[localSizesToTestCount][3] =
2119 	{
2120 		{1, 1, 1},
2121 		{32, 4, 1},
2122 		{32, 1, 4},
2123 		{1, 32, 4},
2124 		{1, 4, 32},
2125 		{4, 1, 32},
2126 		{4, 32, 1},
2127 		{subgroupSize, 1, 1},
2128 		{1, subgroupSize, 1},
2129 		{1, 1, subgroupSize},
2130 		{3, 5, 7},
2131 		{128, 1, 1},
2132 		{1, 128, 1},
2133 		{1, 1, 64},
2134 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
2135 	};
2136 
2137 
2138 	de::MovePtr<glu::ShaderProgram> lastPipeline(
2139 		makeComputePipeline(context, cshader,
2140 							localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
2141 
2142 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
2143 	{
2144 		const deUint32 nextX = localSizesToTest[index + 1][0];
2145 		const deUint32 nextY = localSizesToTest[index + 1][1];
2146 		const deUint32 nextZ = localSizesToTest[index + 1][2];
2147 
2148 		// we are running one test
2149 		totalIterations++;
2150 
2151 		if (!lastPipeline->isOk())
2152 		{
2153 			return tcu::TestStatus::fail("compute shaders build failed");
2154 		}
2155 
2156 		gl.useProgram(lastPipeline->getProgram());
2157 		GLU_EXPECT_NO_ERROR(gl.getError(), "glUseProgram");
2158 
2159 		gl.dispatchCompute(numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
2160 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute");
2161 
2162 		de::MovePtr<glu::ShaderProgram> nextPipeline(
2163 			makeComputePipeline(context, cshader, nextX, nextY, nextZ));
2164 
2165 		std::vector<const void*> datas;
2166 
2167 		{
2168 			glw::GLvoid * resultData = resultBuffer.mapBufferPtr();
2169 
2170 			// we always have our result data first
2171 			datas.push_back(resultData);
2172 		}
2173 
2174 		for (deUint32 i = 0; i < inputsCount; i++)
2175 		{
2176 			if (!inputBuffers[i]->isImage())
2177 			{
2178 				glw::GLvoid *resultData = inputBuffers[i]->getAsBuffer()->mapBufferPtr();
2179 
2180 				// we always have our result data first
2181 				datas.push_back(resultData);
2182 			}
2183 		}
2184 
2185 		if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
2186 		{
2187 			failedIterations++;
2188 		}
2189 
2190 		resultBuffer.unmapBufferPtr();
2191 		for (deUint32 i = 0; i < inputsCount; i++)
2192 		{
2193 			if (!inputBuffers[i]->isImage())
2194 			{
2195 				inputBuffers[i]->getAsBuffer()->unmapBufferPtr();
2196 			}
2197 		}
2198 
2199 		lastPipeline = nextPipeline;
2200 	}
2201 
2202 	if (0 < failedIterations)
2203 	{
2204 		log		<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
2205 				<< totalIterations << " values passed" << tcu::TestLog::EndMessage;
2206 		return tcu::TestStatus::fail("Failed!");
2207 	} else
2208 	{
2209 		log		<< tcu::TestLog::Message << (totalIterations - failedIterations) << " / "
2210 				<< totalIterations << " values passed" << tcu::TestLog::EndMessage;
2211 	}
2212 
2213 	return tcu::TestStatus::pass("OK");
2214 }
2215