1e5c31af7Sopenharmony_ci/*------------------------------------------------------------------------
2e5c31af7Sopenharmony_ci * OpenGL Conformance Tests
3e5c31af7Sopenharmony_ci * ------------------------
4e5c31af7Sopenharmony_ci *
5e5c31af7Sopenharmony_ci * Copyright (c) 2017-2019 The Khronos Group Inc.
6e5c31af7Sopenharmony_ci * Copyright (c) 2017 Codeplay Software Ltd.
7e5c31af7Sopenharmony_ci * Copyright (c) 2019 NVIDIA Corporation.
8e5c31af7Sopenharmony_ci *
9e5c31af7Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
10e5c31af7Sopenharmony_ci * you may not use this file except in compliance with the License.
11e5c31af7Sopenharmony_ci * You may obtain a copy of the License at
12e5c31af7Sopenharmony_ci *
13e5c31af7Sopenharmony_ci *      http://www.apache.org/licenses/LICENSE-2.0
14e5c31af7Sopenharmony_ci *
15e5c31af7Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software
16e5c31af7Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
17e5c31af7Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18e5c31af7Sopenharmony_ci * See the License for the specific language governing permissions and
19e5c31af7Sopenharmony_ci * limitations under the License.
20e5c31af7Sopenharmony_ci *
21e5c31af7Sopenharmony_ci */ /*!
22e5c31af7Sopenharmony_ci * \file
23e5c31af7Sopenharmony_ci * \brief Subgroups Tests
24e5c31af7Sopenharmony_ci */ /*--------------------------------------------------------------------*/
25e5c31af7Sopenharmony_ci
26e5c31af7Sopenharmony_ci#include "glcSubgroupsShapeTests.hpp"
27e5c31af7Sopenharmony_ci#include "glcSubgroupsTestsUtils.hpp"
28e5c31af7Sopenharmony_ci
29e5c31af7Sopenharmony_ci#include <string>
30e5c31af7Sopenharmony_ci#include <vector>
31e5c31af7Sopenharmony_ci
32e5c31af7Sopenharmony_ciusing namespace tcu;
33e5c31af7Sopenharmony_ciusing namespace std;
34e5c31af7Sopenharmony_ci
35e5c31af7Sopenharmony_cinamespace glc
36e5c31af7Sopenharmony_ci{
37e5c31af7Sopenharmony_cinamespace subgroups
38e5c31af7Sopenharmony_ci{
39e5c31af7Sopenharmony_cinamespace
40e5c31af7Sopenharmony_ci{
41e5c31af7Sopenharmony_cistatic bool checkVertexPipelineStages(std::vector<const void*> datas,
42e5c31af7Sopenharmony_ci									  deUint32 width, deUint32)
43e5c31af7Sopenharmony_ci{
44e5c31af7Sopenharmony_ci	return glc::subgroups::check(datas, width, 1);
45e5c31af7Sopenharmony_ci}
46e5c31af7Sopenharmony_ci
47e5c31af7Sopenharmony_cistatic bool checkComputeStage(std::vector<const void*> datas,
48e5c31af7Sopenharmony_ci						 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
49e5c31af7Sopenharmony_ci						 deUint32)
50e5c31af7Sopenharmony_ci{
51e5c31af7Sopenharmony_ci	return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
52e5c31af7Sopenharmony_ci}
53e5c31af7Sopenharmony_ci
54e5c31af7Sopenharmony_cienum OpType
55e5c31af7Sopenharmony_ci{
56e5c31af7Sopenharmony_ci	OPTYPE_CLUSTERED = 0,
57e5c31af7Sopenharmony_ci	OPTYPE_QUAD,
58e5c31af7Sopenharmony_ci	OPTYPE_LAST
59e5c31af7Sopenharmony_ci};
60e5c31af7Sopenharmony_ci
61e5c31af7Sopenharmony_cistd::string getOpTypeName(int opType)
62e5c31af7Sopenharmony_ci{
63e5c31af7Sopenharmony_ci	switch (opType)
64e5c31af7Sopenharmony_ci	{
65e5c31af7Sopenharmony_ci		default:
66e5c31af7Sopenharmony_ci			DE_FATAL("Unsupported op type");
67e5c31af7Sopenharmony_ci			return "";
68e5c31af7Sopenharmony_ci		case OPTYPE_CLUSTERED:
69e5c31af7Sopenharmony_ci			return "clustered";
70e5c31af7Sopenharmony_ci		case OPTYPE_QUAD:
71e5c31af7Sopenharmony_ci			return "quad";
72e5c31af7Sopenharmony_ci	}
73e5c31af7Sopenharmony_ci}
74e5c31af7Sopenharmony_ci
75e5c31af7Sopenharmony_cistruct CaseDefinition
76e5c31af7Sopenharmony_ci{
77e5c31af7Sopenharmony_ci	int					opType;
78e5c31af7Sopenharmony_ci	ShaderStageFlags	shaderStage;
79e5c31af7Sopenharmony_ci};
80e5c31af7Sopenharmony_ci
81e5c31af7Sopenharmony_civoid initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
82e5c31af7Sopenharmony_ci{
83e5c31af7Sopenharmony_ci	std::ostringstream				bdy;
84e5c31af7Sopenharmony_ci	std::string						extension = (OPTYPE_CLUSTERED == caseDef.opType) ?
85e5c31af7Sopenharmony_ci										"#extension GL_KHR_shader_subgroup_clustered: enable\n" :
86e5c31af7Sopenharmony_ci										"#extension GL_KHR_shader_subgroup_quad: enable\n";
87e5c31af7Sopenharmony_ci
88e5c31af7Sopenharmony_ci	subgroups::setFragmentShaderFrameBuffer(programCollection);
89e5c31af7Sopenharmony_ci
90e5c31af7Sopenharmony_ci	if (SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
91e5c31af7Sopenharmony_ci		subgroups::setVertexShaderFrameBuffer(programCollection);
92e5c31af7Sopenharmony_ci
93e5c31af7Sopenharmony_ci	extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n";
94e5c31af7Sopenharmony_ci
95e5c31af7Sopenharmony_ci	bdy << "  uint tempResult = 0x1u;\n"
96e5c31af7Sopenharmony_ci		<< "  uvec4 mask = subgroupBallot(true);\n";
97e5c31af7Sopenharmony_ci
98e5c31af7Sopenharmony_ci	if (OPTYPE_CLUSTERED == caseDef.opType)
99e5c31af7Sopenharmony_ci	{
100e5c31af7Sopenharmony_ci		for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
101e5c31af7Sopenharmony_ci		{
102e5c31af7Sopenharmony_ci			bdy << "  if (gl_SubgroupSize >= " << i << "u)\n"
103e5c31af7Sopenharmony_ci				<< "  {\n"
104e5c31af7Sopenharmony_ci				<< "    uvec4 contribution = uvec4(0);\n"
105e5c31af7Sopenharmony_ci				<< "    uint modID = gl_SubgroupInvocationID % 32u;\n"
106e5c31af7Sopenharmony_ci				<< "    switch (gl_SubgroupInvocationID / 32u)\n"
107e5c31af7Sopenharmony_ci				<< "    {\n"
108e5c31af7Sopenharmony_ci				<< "    case 0u: contribution.x = 1u << modID; break;\n"
109e5c31af7Sopenharmony_ci				<< "    case 1u: contribution.y = 1u << modID; break;\n"
110e5c31af7Sopenharmony_ci				<< "    case 2u: contribution.z = 1u << modID; break;\n"
111e5c31af7Sopenharmony_ci				<< "    case 3u: contribution.w = 1u << modID; break;\n"
112e5c31af7Sopenharmony_ci				<< "    }\n"
113e5c31af7Sopenharmony_ci				<< "    uvec4 result = subgroupClusteredOr(contribution, " << i << "u);\n"
114e5c31af7Sopenharmony_ci				<< "    uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << "u);\n"
115e5c31af7Sopenharmony_ci				<< "    for (uint i = 0u; i < " << i << "u; i++)\n"
116e5c31af7Sopenharmony_ci				<< "    {\n"
117e5c31af7Sopenharmony_ci				<< "      uint nextID = rootID + i;\n"
118e5c31af7Sopenharmony_ci				<< "      if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n"
119e5c31af7Sopenharmony_ci				<< "      {\n"
120e5c31af7Sopenharmony_ci				<< "        tempResult = 0u;\n"
121e5c31af7Sopenharmony_ci				<< "      }\n"
122e5c31af7Sopenharmony_ci				<< "    }\n"
123e5c31af7Sopenharmony_ci				<< "  }\n";
124e5c31af7Sopenharmony_ci		}
125e5c31af7Sopenharmony_ci	}
126e5c31af7Sopenharmony_ci	else
127e5c31af7Sopenharmony_ci	{
128e5c31af7Sopenharmony_ci		bdy << "  uint cluster[4] =\n"
129e5c31af7Sopenharmony_ci			<< "  uint[](\n"
130e5c31af7Sopenharmony_ci			<< "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 0u),\n"
131e5c31af7Sopenharmony_ci			<< "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 1u),\n"
132e5c31af7Sopenharmony_ci			<< "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 2u),\n"
133e5c31af7Sopenharmony_ci			<< "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 3u)\n"
134e5c31af7Sopenharmony_ci			<< "  );\n"
135e5c31af7Sopenharmony_ci			<< "  uint rootID = gl_SubgroupInvocationID & ~0x3u;\n"
136e5c31af7Sopenharmony_ci			<< "  for (uint i = 0u; i < 4u; i++)\n"
137e5c31af7Sopenharmony_ci			<< "  {\n"
138e5c31af7Sopenharmony_ci			<< "    uint nextID = rootID + i;\n"
139e5c31af7Sopenharmony_ci			<< "    if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n"
140e5c31af7Sopenharmony_ci			<< "    {\n"
141e5c31af7Sopenharmony_ci			<< "      tempResult = mask.x;\n"
142e5c31af7Sopenharmony_ci			<< "    }\n"
143e5c31af7Sopenharmony_ci			<< "  }\n";
144e5c31af7Sopenharmony_ci	}
145e5c31af7Sopenharmony_ci
146e5c31af7Sopenharmony_ci	if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
147e5c31af7Sopenharmony_ci	{
148e5c31af7Sopenharmony_ci		std::ostringstream vertexSrc;
149e5c31af7Sopenharmony_ci		vertexSrc << "${VERSION_DECL}\n"
150e5c31af7Sopenharmony_ci			<< extension
151e5c31af7Sopenharmony_ci			<< "layout(location = 0) in highp vec4 in_position;\n"
152e5c31af7Sopenharmony_ci			<< "layout(location = 0) out float result;\n"
153e5c31af7Sopenharmony_ci			<< "\n"
154e5c31af7Sopenharmony_ci			<< "void main (void)\n"
155e5c31af7Sopenharmony_ci			<< "{\n"
156e5c31af7Sopenharmony_ci			<< bdy.str()
157e5c31af7Sopenharmony_ci			<< "  result = float(tempResult);\n"
158e5c31af7Sopenharmony_ci			<< "  gl_Position = in_position;\n"
159e5c31af7Sopenharmony_ci			<< "  gl_PointSize = 1.0f;\n"
160e5c31af7Sopenharmony_ci			<< "}\n";
161e5c31af7Sopenharmony_ci		programCollection.add("vert") << glu::VertexSource(vertexSrc.str());
162e5c31af7Sopenharmony_ci	}
163e5c31af7Sopenharmony_ci	else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
164e5c31af7Sopenharmony_ci	{
165e5c31af7Sopenharmony_ci		std::ostringstream geometry;
166e5c31af7Sopenharmony_ci
167e5c31af7Sopenharmony_ci		geometry << "${VERSION_DECL}\n"
168e5c31af7Sopenharmony_ci			<< extension
169e5c31af7Sopenharmony_ci			<< "layout(points) in;\n"
170e5c31af7Sopenharmony_ci			<< "layout(points, max_vertices = 1) out;\n"
171e5c31af7Sopenharmony_ci			<< "layout(location = 0) out float out_color;\n"
172e5c31af7Sopenharmony_ci			<< "\n"
173e5c31af7Sopenharmony_ci			<< "void main (void)\n"
174e5c31af7Sopenharmony_ci			<< "{\n"
175e5c31af7Sopenharmony_ci			<< bdy.str()
176e5c31af7Sopenharmony_ci			<< "  out_color = float(tempResult);\n"
177e5c31af7Sopenharmony_ci			<< "  gl_Position = gl_in[0].gl_Position;\n"
178e5c31af7Sopenharmony_ci			<< "  EmitVertex();\n"
179e5c31af7Sopenharmony_ci			<< "  EndPrimitive();\n"
180e5c31af7Sopenharmony_ci			<< "}\n";
181e5c31af7Sopenharmony_ci
182e5c31af7Sopenharmony_ci		programCollection.add("geometry") << glu::GeometrySource(geometry.str());
183e5c31af7Sopenharmony_ci	}
184e5c31af7Sopenharmony_ci	else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
185e5c31af7Sopenharmony_ci	{
186e5c31af7Sopenharmony_ci		std::ostringstream controlSource;
187e5c31af7Sopenharmony_ci
188e5c31af7Sopenharmony_ci		controlSource << "${VERSION_DECL}\n"
189e5c31af7Sopenharmony_ci			<< extension
190e5c31af7Sopenharmony_ci			<< "layout(vertices = 2) out;\n"
191e5c31af7Sopenharmony_ci			<< "layout(location = 0) out float out_color[];\n"
192e5c31af7Sopenharmony_ci			<< "\n"
193e5c31af7Sopenharmony_ci			<< "void main (void)\n"
194e5c31af7Sopenharmony_ci			<< "{\n"
195e5c31af7Sopenharmony_ci			<< "  if (gl_InvocationID == 0)\n"
196e5c31af7Sopenharmony_ci			<<"  {\n"
197e5c31af7Sopenharmony_ci			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
198e5c31af7Sopenharmony_ci			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
199e5c31af7Sopenharmony_ci			<< "  }\n"
200e5c31af7Sopenharmony_ci			<< bdy.str()
201e5c31af7Sopenharmony_ci			<< "  out_color[gl_InvocationID] = float(tempResult);\n"
202e5c31af7Sopenharmony_ci			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
203e5c31af7Sopenharmony_ci			<< "}\n";
204e5c31af7Sopenharmony_ci
205e5c31af7Sopenharmony_ci		programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
206e5c31af7Sopenharmony_ci		subgroups::setTesEvalShaderFrameBuffer(programCollection);
207e5c31af7Sopenharmony_ci	}
208e5c31af7Sopenharmony_ci	else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
209e5c31af7Sopenharmony_ci	{
210e5c31af7Sopenharmony_ci		std::ostringstream evaluationSource;
211e5c31af7Sopenharmony_ci
212e5c31af7Sopenharmony_ci		evaluationSource << "${VERSION_DECL}\n"
213e5c31af7Sopenharmony_ci			<< extension
214e5c31af7Sopenharmony_ci			<< "layout(isolines, equal_spacing, ccw) in;\n"
215e5c31af7Sopenharmony_ci			<< "layout(location = 0) out float out_color;\n"
216e5c31af7Sopenharmony_ci			<< "void main (void)\n"
217e5c31af7Sopenharmony_ci			<< "{\n"
218e5c31af7Sopenharmony_ci			<< bdy.str()
219e5c31af7Sopenharmony_ci			<< "  out_color = float(tempResult);\n"
220e5c31af7Sopenharmony_ci			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
221e5c31af7Sopenharmony_ci			<< "}\n";
222e5c31af7Sopenharmony_ci
223e5c31af7Sopenharmony_ci		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
224e5c31af7Sopenharmony_ci		programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
225e5c31af7Sopenharmony_ci	}
226e5c31af7Sopenharmony_ci	else
227e5c31af7Sopenharmony_ci	{
228e5c31af7Sopenharmony_ci		DE_FATAL("Unsupported shader stage");
229e5c31af7Sopenharmony_ci	}
230e5c31af7Sopenharmony_ci}
231e5c31af7Sopenharmony_ci
232e5c31af7Sopenharmony_civoid initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
233e5c31af7Sopenharmony_ci{
234e5c31af7Sopenharmony_ci	std::string extension = (OPTYPE_CLUSTERED == caseDef.opType) ?
235e5c31af7Sopenharmony_ci							"#extension GL_KHR_shader_subgroup_clustered: enable\n" :
236e5c31af7Sopenharmony_ci							"#extension GL_KHR_shader_subgroup_quad: enable\n";
237e5c31af7Sopenharmony_ci
238e5c31af7Sopenharmony_ci	extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n";
239e5c31af7Sopenharmony_ci
240e5c31af7Sopenharmony_ci	std::ostringstream bdy;
241e5c31af7Sopenharmony_ci
242e5c31af7Sopenharmony_ci	bdy << "  uint tempResult = 0x1u;\n"
243e5c31af7Sopenharmony_ci		<< "  uvec4 mask = subgroupBallot(true);\n";
244e5c31af7Sopenharmony_ci
245e5c31af7Sopenharmony_ci	if (OPTYPE_CLUSTERED == caseDef.opType)
246e5c31af7Sopenharmony_ci	{
247e5c31af7Sopenharmony_ci		for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
248e5c31af7Sopenharmony_ci		{
249e5c31af7Sopenharmony_ci			bdy << "  if (gl_SubgroupSize >= " << i << "u)\n"
250e5c31af7Sopenharmony_ci				<< "  {\n"
251e5c31af7Sopenharmony_ci				<< "    uvec4 contribution = uvec4(0);\n"
252e5c31af7Sopenharmony_ci				<< "    uint modID = gl_SubgroupInvocationID % 32u;\n"
253e5c31af7Sopenharmony_ci				<< "    switch (gl_SubgroupInvocationID / 32u)\n"
254e5c31af7Sopenharmony_ci				<< "    {\n"
255e5c31af7Sopenharmony_ci				<< "    case 0u: contribution.x = 1u << modID; break;\n"
256e5c31af7Sopenharmony_ci				<< "    case 1u: contribution.y = 1u << modID; break;\n"
257e5c31af7Sopenharmony_ci				<< "    case 2u: contribution.z = 1u << modID; break;\n"
258e5c31af7Sopenharmony_ci				<< "    case 3u: contribution.w = 1u << modID; break;\n"
259e5c31af7Sopenharmony_ci				<< "    }\n"
260e5c31af7Sopenharmony_ci				<< "    uvec4 result = subgroupClusteredOr(contribution, " << i << "u);\n"
261e5c31af7Sopenharmony_ci				<< "    uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << "u);\n"
262e5c31af7Sopenharmony_ci				<< "    for (uint i = 0u; i < " << i << "u; i++)\n"
263e5c31af7Sopenharmony_ci				<< "    {\n"
264e5c31af7Sopenharmony_ci				<< "      uint nextID = rootID + i;\n"
265e5c31af7Sopenharmony_ci				<< "      if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n"
266e5c31af7Sopenharmony_ci				<< "      {\n"
267e5c31af7Sopenharmony_ci				<< "        tempResult = 0u;\n"
268e5c31af7Sopenharmony_ci				<< "      }\n"
269e5c31af7Sopenharmony_ci				<< "    }\n"
270e5c31af7Sopenharmony_ci				<< "  }\n";
271e5c31af7Sopenharmony_ci		}
272e5c31af7Sopenharmony_ci	}
273e5c31af7Sopenharmony_ci	else
274e5c31af7Sopenharmony_ci	{
275e5c31af7Sopenharmony_ci		bdy << "  uint cluster[4] =\n"
276e5c31af7Sopenharmony_ci			<< "  uint[](\n"
277e5c31af7Sopenharmony_ci			<< "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 0u),\n"
278e5c31af7Sopenharmony_ci			<< "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 1u),\n"
279e5c31af7Sopenharmony_ci			<< "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 2u),\n"
280e5c31af7Sopenharmony_ci			<< "    subgroupQuadBroadcast(gl_SubgroupInvocationID, 3u)\n"
281e5c31af7Sopenharmony_ci			<< "  );\n"
282e5c31af7Sopenharmony_ci			<< "  uint rootID = gl_SubgroupInvocationID & ~0x3u;\n"
283e5c31af7Sopenharmony_ci			<< "  for (uint i = 0u; i < 4u; i++)\n"
284e5c31af7Sopenharmony_ci			<< "  {\n"
285e5c31af7Sopenharmony_ci			<< "    uint nextID = rootID + i;\n"
286e5c31af7Sopenharmony_ci			<< "    if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n"
287e5c31af7Sopenharmony_ci			<< "    {\n"
288e5c31af7Sopenharmony_ci			<< "      tempResult = mask.x;\n"
289e5c31af7Sopenharmony_ci			<< "    }\n"
290e5c31af7Sopenharmony_ci			<< "  }\n";
291e5c31af7Sopenharmony_ci	}
292e5c31af7Sopenharmony_ci
293e5c31af7Sopenharmony_ci	if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
294e5c31af7Sopenharmony_ci	{
295e5c31af7Sopenharmony_ci		std::ostringstream src;
296e5c31af7Sopenharmony_ci
297e5c31af7Sopenharmony_ci		src << "${VERSION_DECL}\n"
298e5c31af7Sopenharmony_ci			<< extension
299e5c31af7Sopenharmony_ci			<< "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
300e5c31af7Sopenharmony_ci			<< "layout(binding = 0, std430) buffer Buffer0\n"
301e5c31af7Sopenharmony_ci			<< "{\n"
302e5c31af7Sopenharmony_ci			<< "  uint result[];\n"
303e5c31af7Sopenharmony_ci			<< "};\n"
304e5c31af7Sopenharmony_ci			<< "\n"
305e5c31af7Sopenharmony_ci			<< "void main (void)\n"
306e5c31af7Sopenharmony_ci			<< "{\n"
307e5c31af7Sopenharmony_ci			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
308e5c31af7Sopenharmony_ci			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
309e5c31af7Sopenharmony_ci			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
310e5c31af7Sopenharmony_ci			"gl_GlobalInvocationID.x;\n"
311e5c31af7Sopenharmony_ci			<< bdy.str()
312e5c31af7Sopenharmony_ci			<< "  result[offset] = tempResult;\n"
313e5c31af7Sopenharmony_ci			<< "}\n";
314e5c31af7Sopenharmony_ci
315e5c31af7Sopenharmony_ci		programCollection.add("comp") << glu::ComputeSource(src.str());
316e5c31af7Sopenharmony_ci	}
317e5c31af7Sopenharmony_ci	else
318e5c31af7Sopenharmony_ci	{
319e5c31af7Sopenharmony_ci		{
320e5c31af7Sopenharmony_ci			const string vertex =
321e5c31af7Sopenharmony_ci				"${VERSION_DECL}\n"
322e5c31af7Sopenharmony_ci				+ extension +
323e5c31af7Sopenharmony_ci				"layout(binding = 0, std430) buffer Buffer0\n"
324e5c31af7Sopenharmony_ci				"{\n"
325e5c31af7Sopenharmony_ci				"  uint result[];\n"
326e5c31af7Sopenharmony_ci				"} b0;\n"
327e5c31af7Sopenharmony_ci				"\n"
328e5c31af7Sopenharmony_ci				"void main (void)\n"
329e5c31af7Sopenharmony_ci				"{\n"
330e5c31af7Sopenharmony_ci				+ bdy.str() +
331e5c31af7Sopenharmony_ci				"  b0.result[gl_VertexID] = tempResult;\n"
332e5c31af7Sopenharmony_ci				"  float pixelSize = 2.0f/1024.0f;\n"
333e5c31af7Sopenharmony_ci				"  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
334e5c31af7Sopenharmony_ci				"  gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
335e5c31af7Sopenharmony_ci				"}\n";
336e5c31af7Sopenharmony_ci
337e5c31af7Sopenharmony_ci			programCollection.add("vert") << glu::VertexSource(vertex);
338e5c31af7Sopenharmony_ci		}
339e5c31af7Sopenharmony_ci
340e5c31af7Sopenharmony_ci		{
341e5c31af7Sopenharmony_ci			const string tesc =
342e5c31af7Sopenharmony_ci				"${VERSION_DECL}\n"
343e5c31af7Sopenharmony_ci				+ extension +
344e5c31af7Sopenharmony_ci				"layout(vertices=1) out;\n"
345e5c31af7Sopenharmony_ci				"layout(binding = 1, std430) buffer Buffer1\n"
346e5c31af7Sopenharmony_ci				"{\n"
347e5c31af7Sopenharmony_ci				"  uint result[];\n"
348e5c31af7Sopenharmony_ci				"} b1;\n"
349e5c31af7Sopenharmony_ci				"\n"
350e5c31af7Sopenharmony_ci				"void main (void)\n"
351e5c31af7Sopenharmony_ci				"{\n"
352e5c31af7Sopenharmony_ci				+ bdy.str() +
353e5c31af7Sopenharmony_ci				"  b1.result[gl_PrimitiveID] = 1u;\n"
354e5c31af7Sopenharmony_ci				"  if (gl_InvocationID == 0)\n"
355e5c31af7Sopenharmony_ci				"  {\n"
356e5c31af7Sopenharmony_ci				"    gl_TessLevelOuter[0] = 1.0f;\n"
357e5c31af7Sopenharmony_ci				"    gl_TessLevelOuter[1] = 1.0f;\n"
358e5c31af7Sopenharmony_ci				"  }\n"
359e5c31af7Sopenharmony_ci				"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
360e5c31af7Sopenharmony_ci				"}\n";
361e5c31af7Sopenharmony_ci
362e5c31af7Sopenharmony_ci			programCollection.add("tesc") << glu::TessellationControlSource(tesc);
363e5c31af7Sopenharmony_ci		}
364e5c31af7Sopenharmony_ci
365e5c31af7Sopenharmony_ci		{
366e5c31af7Sopenharmony_ci			const string tese =
367e5c31af7Sopenharmony_ci				"${VERSION_DECL}\n"
368e5c31af7Sopenharmony_ci				+ extension +
369e5c31af7Sopenharmony_ci				"layout(isolines) in;\n"
370e5c31af7Sopenharmony_ci				"layout(binding = 2, std430) buffer Buffer2\n"
371e5c31af7Sopenharmony_ci				"{\n"
372e5c31af7Sopenharmony_ci				"  uint result[];\n"
373e5c31af7Sopenharmony_ci				"} b2;\n"
374e5c31af7Sopenharmony_ci				"\n"
375e5c31af7Sopenharmony_ci				"void main (void)\n"
376e5c31af7Sopenharmony_ci				"{\n"
377e5c31af7Sopenharmony_ci				+ bdy.str() +
378e5c31af7Sopenharmony_ci				"  b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = 1u;\n"
379e5c31af7Sopenharmony_ci				"  float pixelSize = 2.0f/1024.0f;\n"
380e5c31af7Sopenharmony_ci				"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
381e5c31af7Sopenharmony_ci				"}\n";
382e5c31af7Sopenharmony_ci
383e5c31af7Sopenharmony_ci			programCollection.add("tese") << glu::TessellationEvaluationSource(tese);
384e5c31af7Sopenharmony_ci		}
385e5c31af7Sopenharmony_ci
386e5c31af7Sopenharmony_ci		{
387e5c31af7Sopenharmony_ci			const string geometry =
388e5c31af7Sopenharmony_ci				// version added by addGeometryShadersFromTemplate
389e5c31af7Sopenharmony_ci				extension +
390e5c31af7Sopenharmony_ci				"layout(${TOPOLOGY}) in;\n"
391e5c31af7Sopenharmony_ci				"layout(points, max_vertices = 1) out;\n"
392e5c31af7Sopenharmony_ci				"layout(binding = 3, std430) buffer Buffer3\n"
393e5c31af7Sopenharmony_ci				"{\n"
394e5c31af7Sopenharmony_ci				"  uint result[];\n"
395e5c31af7Sopenharmony_ci				"} b3;\n"
396e5c31af7Sopenharmony_ci				"\n"
397e5c31af7Sopenharmony_ci				"void main (void)\n"
398e5c31af7Sopenharmony_ci				"{\n"
399e5c31af7Sopenharmony_ci				+ bdy.str() +
400e5c31af7Sopenharmony_ci				"  b3.result[gl_PrimitiveIDIn] = tempResult;\n"
401e5c31af7Sopenharmony_ci				"  gl_Position = gl_in[0].gl_Position;\n"
402e5c31af7Sopenharmony_ci				"  EmitVertex();\n"
403e5c31af7Sopenharmony_ci				"  EndPrimitive();\n"
404e5c31af7Sopenharmony_ci				"}\n";
405e5c31af7Sopenharmony_ci
406e5c31af7Sopenharmony_ci			subgroups::addGeometryShadersFromTemplate(geometry, programCollection);
407e5c31af7Sopenharmony_ci		}
408e5c31af7Sopenharmony_ci
409e5c31af7Sopenharmony_ci		{
410e5c31af7Sopenharmony_ci			const string fragment =
411e5c31af7Sopenharmony_ci				"${VERSION_DECL}\n"
412e5c31af7Sopenharmony_ci				+ extension +
413e5c31af7Sopenharmony_ci				"precision highp int;\n"
414e5c31af7Sopenharmony_ci				"layout(location = 0) out uint result;\n"
415e5c31af7Sopenharmony_ci				"void main (void)\n"
416e5c31af7Sopenharmony_ci				"{\n"
417e5c31af7Sopenharmony_ci				+ bdy.str() +
418e5c31af7Sopenharmony_ci				"  result = tempResult;\n"
419e5c31af7Sopenharmony_ci				"}\n";
420e5c31af7Sopenharmony_ci
421e5c31af7Sopenharmony_ci			programCollection.add("fragment") << glu::FragmentSource(fragment);
422e5c31af7Sopenharmony_ci		}
423e5c31af7Sopenharmony_ci		subgroups::addNoSubgroupShader(programCollection);
424e5c31af7Sopenharmony_ci	}
425e5c31af7Sopenharmony_ci}
426e5c31af7Sopenharmony_ci
427e5c31af7Sopenharmony_civoid supportedCheck (Context& context, CaseDefinition caseDef)
428e5c31af7Sopenharmony_ci{
429e5c31af7Sopenharmony_ci	if (!subgroups::isSubgroupSupported(context))
430e5c31af7Sopenharmony_ci		TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
431e5c31af7Sopenharmony_ci
432e5c31af7Sopenharmony_ci	if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_BALLOT_BIT))
433e5c31af7Sopenharmony_ci	{
434e5c31af7Sopenharmony_ci		TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations");
435e5c31af7Sopenharmony_ci	}
436e5c31af7Sopenharmony_ci
437e5c31af7Sopenharmony_ci	if (OPTYPE_CLUSTERED == caseDef.opType)
438e5c31af7Sopenharmony_ci	{
439e5c31af7Sopenharmony_ci		if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_CLUSTERED_BIT))
440e5c31af7Sopenharmony_ci		{
441e5c31af7Sopenharmony_ci			TCU_THROW(NotSupportedError, "Subgroup shape tests require that clustered operations are supported!");
442e5c31af7Sopenharmony_ci		}
443e5c31af7Sopenharmony_ci	}
444e5c31af7Sopenharmony_ci
445e5c31af7Sopenharmony_ci	if (OPTYPE_QUAD == caseDef.opType)
446e5c31af7Sopenharmony_ci	{
447e5c31af7Sopenharmony_ci		if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_QUAD_BIT))
448e5c31af7Sopenharmony_ci		{
449e5c31af7Sopenharmony_ci			TCU_THROW(NotSupportedError, "Subgroup shape tests require that quad operations are supported!");
450e5c31af7Sopenharmony_ci		}
451e5c31af7Sopenharmony_ci	}
452e5c31af7Sopenharmony_ci}
453e5c31af7Sopenharmony_ci
454e5c31af7Sopenharmony_citcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
455e5c31af7Sopenharmony_ci{
456e5c31af7Sopenharmony_ci	if (!subgroups::areSubgroupOperationsSupportedForStage(
457e5c31af7Sopenharmony_ci				context, caseDef.shaderStage))
458e5c31af7Sopenharmony_ci	{
459e5c31af7Sopenharmony_ci		if (subgroups::areSubgroupOperationsRequiredForStage(
460e5c31af7Sopenharmony_ci					caseDef.shaderStage))
461e5c31af7Sopenharmony_ci		{
462e5c31af7Sopenharmony_ci			return tcu::TestStatus::fail(
463e5c31af7Sopenharmony_ci					   "Shader stage " +
464e5c31af7Sopenharmony_ci					   subgroups::getShaderStageName(caseDef.shaderStage) +
465e5c31af7Sopenharmony_ci					   " is required to support subgroup operations!");
466e5c31af7Sopenharmony_ci		}
467e5c31af7Sopenharmony_ci		else
468e5c31af7Sopenharmony_ci		{
469e5c31af7Sopenharmony_ci			TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
470e5c31af7Sopenharmony_ci		}
471e5c31af7Sopenharmony_ci	}
472e5c31af7Sopenharmony_ci
473e5c31af7Sopenharmony_ci	if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
474e5c31af7Sopenharmony_ci		return subgroups::makeVertexFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
475e5c31af7Sopenharmony_ci	else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
476e5c31af7Sopenharmony_ci		return subgroups::makeGeometryFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages);
477e5c31af7Sopenharmony_ci	else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
478e5c31af7Sopenharmony_ci		return subgroups::makeTessellationEvaluationFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, SHADER_STAGE_TESS_CONTROL_BIT);
479e5c31af7Sopenharmony_ci	else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
480e5c31af7Sopenharmony_ci		return subgroups::makeTessellationEvaluationFrameBufferTest(context,  FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, SHADER_STAGE_TESS_EVALUATION_BIT);
481e5c31af7Sopenharmony_ci	else
482e5c31af7Sopenharmony_ci		TCU_THROW(InternalError, "Unhandled shader stage");
483e5c31af7Sopenharmony_ci}
484e5c31af7Sopenharmony_ci
485e5c31af7Sopenharmony_ci
486e5c31af7Sopenharmony_citcu::TestStatus test(Context& context, const CaseDefinition caseDef)
487e5c31af7Sopenharmony_ci{
488e5c31af7Sopenharmony_ci	if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_BASIC_BIT))
489e5c31af7Sopenharmony_ci	{
490e5c31af7Sopenharmony_ci		return tcu::TestStatus::fail(
491e5c31af7Sopenharmony_ci				   "Subgroup feature " +
492e5c31af7Sopenharmony_ci				   subgroups::getSubgroupFeatureName(SUBGROUP_FEATURE_BASIC_BIT) +
493e5c31af7Sopenharmony_ci				   " is a required capability!");
494e5c31af7Sopenharmony_ci	}
495e5c31af7Sopenharmony_ci
496e5c31af7Sopenharmony_ci	if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
497e5c31af7Sopenharmony_ci	{
498e5c31af7Sopenharmony_ci		if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
499e5c31af7Sopenharmony_ci		{
500e5c31af7Sopenharmony_ci			return tcu::TestStatus::fail(
501e5c31af7Sopenharmony_ci					   "Shader stage " +
502e5c31af7Sopenharmony_ci					   subgroups::getShaderStageName(caseDef.shaderStage) +
503e5c31af7Sopenharmony_ci					   " is required to support subgroup operations!");
504e5c31af7Sopenharmony_ci		}
505e5c31af7Sopenharmony_ci		return subgroups::makeComputeTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkComputeStage);
506e5c31af7Sopenharmony_ci	}
507e5c31af7Sopenharmony_ci	else
508e5c31af7Sopenharmony_ci	{
509e5c31af7Sopenharmony_ci		int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
510e5c31af7Sopenharmony_ci
511e5c31af7Sopenharmony_ci		ShaderStageFlags stages = (ShaderStageFlags)(caseDef.shaderStage & supportedStages);
512e5c31af7Sopenharmony_ci
513e5c31af7Sopenharmony_ci		if (SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
514e5c31af7Sopenharmony_ci		{
515e5c31af7Sopenharmony_ci			if ( (stages & SHADER_STAGE_FRAGMENT_BIT) == 0)
516e5c31af7Sopenharmony_ci				TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
517e5c31af7Sopenharmony_ci			else
518e5c31af7Sopenharmony_ci				stages = SHADER_STAGE_FRAGMENT_BIT;
519e5c31af7Sopenharmony_ci		}
520e5c31af7Sopenharmony_ci
521e5c31af7Sopenharmony_ci		if ((ShaderStageFlags)0u == stages)
522e5c31af7Sopenharmony_ci			TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
523e5c31af7Sopenharmony_ci
524e5c31af7Sopenharmony_ci		return subgroups::allStages(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages);
525e5c31af7Sopenharmony_ci	}
526e5c31af7Sopenharmony_ci}
527e5c31af7Sopenharmony_ci}
528e5c31af7Sopenharmony_ci
529e5c31af7Sopenharmony_cideqp::TestCaseGroup* createSubgroupsShapeTests(deqp::Context& testCtx)
530e5c31af7Sopenharmony_ci{
531e5c31af7Sopenharmony_ci	de::MovePtr<deqp::TestCaseGroup> graphicGroup(new deqp::TestCaseGroup(
532e5c31af7Sopenharmony_ci		testCtx, "graphics", "Subgroup shape category tests: graphics"));
533e5c31af7Sopenharmony_ci	de::MovePtr<deqp::TestCaseGroup> computeGroup(new deqp::TestCaseGroup(
534e5c31af7Sopenharmony_ci		testCtx, "compute", "Subgroup shape category tests: compute"));
535e5c31af7Sopenharmony_ci	de::MovePtr<deqp::TestCaseGroup> framebufferGroup(new deqp::TestCaseGroup(
536e5c31af7Sopenharmony_ci		testCtx, "framebuffer", "Subgroup shape category tests: framebuffer"));
537e5c31af7Sopenharmony_ci
538e5c31af7Sopenharmony_ci	const ShaderStageFlags stages[] =
539e5c31af7Sopenharmony_ci	{
540e5c31af7Sopenharmony_ci		SHADER_STAGE_VERTEX_BIT,
541e5c31af7Sopenharmony_ci		SHADER_STAGE_TESS_EVALUATION_BIT,
542e5c31af7Sopenharmony_ci		SHADER_STAGE_TESS_CONTROL_BIT,
543e5c31af7Sopenharmony_ci		SHADER_STAGE_GEOMETRY_BIT,
544e5c31af7Sopenharmony_ci	};
545e5c31af7Sopenharmony_ci
546e5c31af7Sopenharmony_ci	for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
547e5c31af7Sopenharmony_ci	{
548e5c31af7Sopenharmony_ci		const std::string op = de::toLower(getOpTypeName(opTypeIndex));
549e5c31af7Sopenharmony_ci
550e5c31af7Sopenharmony_ci		{
551e5c31af7Sopenharmony_ci			const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT};
552e5c31af7Sopenharmony_ci			SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef);
553e5c31af7Sopenharmony_ci
554e5c31af7Sopenharmony_ci		}
555e5c31af7Sopenharmony_ci
556e5c31af7Sopenharmony_ci		{
557e5c31af7Sopenharmony_ci			const CaseDefinition caseDef =
558e5c31af7Sopenharmony_ci			{
559e5c31af7Sopenharmony_ci				opTypeIndex,
560e5c31af7Sopenharmony_ci				SHADER_STAGE_ALL_GRAPHICS
561e5c31af7Sopenharmony_ci			};
562e5c31af7Sopenharmony_ci			SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(graphicGroup.get(),
563e5c31af7Sopenharmony_ci									op, "",
564e5c31af7Sopenharmony_ci									supportedCheck, initPrograms, test, caseDef);
565e5c31af7Sopenharmony_ci		}
566e5c31af7Sopenharmony_ci
567e5c31af7Sopenharmony_ci		for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
568e5c31af7Sopenharmony_ci		{
569e5c31af7Sopenharmony_ci			const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
570e5c31af7Sopenharmony_ci			SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(framebufferGroup.get(),op + "_" + getShaderStageName(caseDef.shaderStage), "",
571e5c31af7Sopenharmony_ci										supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
572e5c31af7Sopenharmony_ci		}
573e5c31af7Sopenharmony_ci	}
574e5c31af7Sopenharmony_ci
575e5c31af7Sopenharmony_ci	de::MovePtr<deqp::TestCaseGroup> group(new deqp::TestCaseGroup(
576e5c31af7Sopenharmony_ci		testCtx, "shape", "Subgroup shape category tests"));
577e5c31af7Sopenharmony_ci
578e5c31af7Sopenharmony_ci	group->addChild(graphicGroup.release());
579e5c31af7Sopenharmony_ci	group->addChild(computeGroup.release());
580e5c31af7Sopenharmony_ci	group->addChild(framebufferGroup.release());
581e5c31af7Sopenharmony_ci
582e5c31af7Sopenharmony_ci	return group.release();
583e5c31af7Sopenharmony_ci}
584e5c31af7Sopenharmony_ci
585e5c31af7Sopenharmony_ci} // subgroups
586e5c31af7Sopenharmony_ci} // glc
587