/*------------------------------------------------------------------------ * OpenGL Conformance Tests * ------------------------ * * Copyright (c) 2017-2019 The Khronos Group Inc. * Copyright (c) 2017 Codeplay Software Ltd. * Copyright (c) 2019 NVIDIA Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ /*! * \file * \brief Subgroups Tests */ /*--------------------------------------------------------------------*/ #include "glcSubgroupsBasicTests.hpp" #include "glcSubgroupsTestsUtils.hpp" #include #include #include "tcuStringTemplate.hpp" using namespace tcu; using namespace std; namespace glc { namespace subgroups { namespace { static const deUint32 ELECTED_VALUE = 42u; static const deUint32 UNELECTED_VALUE = 13u; static const deUint64 SHADER_BUFFER_SIZE = 4096ull; static bool checkFragmentSubgroupBarriersNoSSBO(std::vector datas, deUint32 width, deUint32 height, deUint32) { const float* const resultData = reinterpret_cast(datas[0]); for (deUint32 x = 0u; x < width; ++x) { for (deUint32 y = 0u; y < height; ++y) { const deUint32 ndx = (x * height + y) * 4u; if (1.0f == resultData[ndx +2]) { if(resultData[ndx] != resultData[ndx +1]) { return false; } } else if (resultData[ndx] != resultData[ndx +3]) { return false; } } } return true; } static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector datas, deUint32 width, deUint32) { const float* const resultData = reinterpret_cast(datas[0]); float poisonValuesFound = 0.0f; float numSubgroupsUsed = 0.0f; for (deUint32 x = 0; x < width; ++x) { deUint32 val = static_cast(resultData[x * 2]); numSubgroupsUsed += resultData[x * 2 + 1]; switch (val) { default: // some garbage value was found! return false; case UNELECTED_VALUE: break; case ELECTED_VALUE: poisonValuesFound += 1.0f; break; } } return numSubgroupsUsed == poisonValuesFound; } static bool checkVertexPipelineStagesSubgroupElect(std::vector datas, deUint32 width, deUint32) { const deUint32* const resultData = reinterpret_cast(datas[0]); deUint32 poisonValuesFound = 0; for (deUint32 x = 0; x < width; ++x) { deUint32 val = resultData[x]; switch (val) { default: // some garbage value was found! return false; case UNELECTED_VALUE: break; case ELECTED_VALUE: poisonValuesFound++; break; } } // we used an atomicly incremented counter to note how many subgroups we used for the vertex shader const deUint32 numSubgroupsUsed = *reinterpret_cast(datas[1]); return numSubgroupsUsed == poisonValuesFound; } static bool checkVertexPipelineStagesSubgroupBarriers(std::vector datas, deUint32 width, deUint32) { const deUint32* const resultData = reinterpret_cast(datas[0]); // We used this SSBO to generate our unique value! const deUint32 ref = *reinterpret_cast(datas[1]); for (deUint32 x = 0; x < width; ++x) { deUint32 val = resultData[x]; if (val != ref) return false; } return true; } static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector datas, deUint32 width, deUint32) { const float* const resultData = reinterpret_cast(datas[0]); for (deUint32 x = 0u; x < width; ++x) { const deUint32 ndx = x*4u; if (1.0f == resultData[ndx +2]) { if(resultData[ndx] != resultData[ndx +1]) return false; } else if (resultData[ndx] != resultData[ndx +3]) { return false; } } return true; } static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector datas, deUint32 width, deUint32) { const float* const resultData = reinterpret_cast(datas[0]); for (deUint32 x = 0u; x < width; ++x) { const deUint32 ndx = x*4u; if (0.0f == resultData[ndx +2] && resultData[ndx] != resultData[ndx +3]) { return false; } } return true; } static bool checkComputeSubgroupElect(std::vector datas, const deUint32 numWorkgroups[3], const deUint32 localSize[3], deUint32) { return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 1); } static bool checkComputeSubgroupBarriers(std::vector datas, const deUint32 numWorkgroups[3], const deUint32 localSize[3], deUint32) { // We used this SSBO to generate our unique value! const deUint32 ref = *reinterpret_cast(datas[1]); return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, ref); } enum OpType { OPTYPE_ELECT = 0, OPTYPE_SUBGROUP_BARRIER, OPTYPE_SUBGROUP_MEMORY_BARRIER, OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER, OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED, OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE, OPTYPE_LAST }; std::string getOpTypeName(int opType) { switch (opType) { default: DE_FATAL("Unsupported op type"); return ""; case OPTYPE_ELECT: return "subgroupElect"; case OPTYPE_SUBGROUP_BARRIER: return "subgroupBarrier"; case OPTYPE_SUBGROUP_MEMORY_BARRIER: return "subgroupMemoryBarrier"; case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER: return "subgroupMemoryBarrierBuffer"; case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED: return "subgroupMemoryBarrierShared"; case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE: return "subgroupMemoryBarrierImage"; } } struct CaseDefinition { int opType; subgroups::ShaderStageFlags shaderStage; }; void initFrameBufferPrograms(SourceCollections& programCollection, CaseDefinition caseDef) { if(subgroups::SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage) { const string fragmentGLSL = "${VERSION_DECL}\n" "layout(location = 0) in highp vec4 in_color;\n" "layout(location = 0) out highp vec4 out_color;\n" "void main()\n" "{\n" " out_color = in_color;\n" "}\n"; programCollection.add("fragment") << glu::FragmentSource(fragmentGLSL); } if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage) { const string vertexGLSL = "${VERSION_DECL}\n" "void main (void)\n" "{\n" " vec2 uv = vec2((gl_VertexID << 1) & 2, gl_VertexID & 2);\n" " gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n" " gl_PointSize = 1.0f;\n" "}\n"; programCollection.add("vert") << glu::VertexSource(vertexGLSL); } else if (subgroups::SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage) subgroups::setVertexShaderFrameBuffer(programCollection); if (OPTYPE_ELECT == caseDef.opType) { std::ostringstream electedValue ; std::ostringstream unelectedValue; electedValue << ELECTED_VALUE; unelectedValue << UNELECTED_VALUE; if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage) { const string vertexGLSL = "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "layout(location = 0) out vec4 out_color;\n" "layout(location = 0) in highp vec4 in_position;\n" "\n" "void main (void)\n" "{\n" " if (subgroupElect())\n" " {\n" " out_color.r = " + electedValue.str() + ".0f;\n" " out_color.g = 1.0f;\n" " }\n" " else\n" " {\n" " out_color.r = " + unelectedValue.str() + ".0f;\n" " out_color.g = 0.0f;\n" " }\n" " gl_Position = in_position;\n" " gl_PointSize = 1.0f;\n" "}\n"; programCollection.add("vert") << glu::VertexSource(vertexGLSL); } else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage) { const string geometryGLSL = "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "layout(points) in;\n" "layout(points, max_vertices = 1) out;\n" "layout(location = 0) out vec4 out_color;\n" "void main (void)\n" "{\n" " if (subgroupElect())\n" " {\n" " out_color.r = " + electedValue.str() + ".0f;\n" " out_color.g = 1.0f;\n" " }\n" " else\n" " {\n" " out_color.r = " + unelectedValue.str() + ".0f;\n" " out_color.g = 0.0f;\n" " }\n" " gl_Position = gl_in[0].gl_Position;\n" " EmitVertex();\n" " EndPrimitive();\n" "}\n"; programCollection.add("geometry") << glu::GeometrySource(geometryGLSL); } else if (subgroups::SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage) { const string controlSourceGLSL = "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "${TESS_EXTENSION}\n" "layout(vertices = 2) out;\n" "void main (void)\n" "{\n" " if (gl_InvocationID == 0)\n" " {\n" " gl_TessLevelOuter[0] = 1.0f;\n" " gl_TessLevelOuter[1] = 1.0f;\n" " }\n" " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" "}\n"; programCollection.add("tesc") << glu::TessellationControlSource(controlSourceGLSL); const string evaluationSourceGLSL = "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "${TESS_EXTENSION}\n" "layout(isolines, equal_spacing, ccw ) in;\n" "layout(location = 0) out vec4 out_color;\n" "\n" "void main (void)\n" "{\n" " if (subgroupElect())\n" " {\n" " out_color.r = 2.0f * " + electedValue.str() + ".0f - " + unelectedValue.str() + ".0f;\n" " out_color.g = 2.0f;\n" " }\n" " else\n" " {\n" " out_color.r = " + unelectedValue.str() + ".0f;\n" " out_color.g = 0.0f;\n" " }\n" " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n" "}\n"; programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSourceGLSL); } else if (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage) { const string controlSourceGLSL = "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "${TESS_EXTENSION}\n" "layout(vertices = 2) out;\n" "layout(location = 0) out vec4 out_color[];\n" "void main (void)\n" "{\n" " if (gl_InvocationID == 0)\n" " {\n" " gl_TessLevelOuter[0] = 1.0f;\n" " gl_TessLevelOuter[1] = 1.0f;\n" " }\n" " if (subgroupElect())\n" " {\n" " out_color[gl_InvocationID].r = " + electedValue.str() + ".0f;\n" " out_color[gl_InvocationID].g = 1.0f;\n" " }\n" " else\n" " {\n" " out_color[gl_InvocationID].r = " + unelectedValue.str() + ".0f;\n" " out_color[gl_InvocationID].g = 0.0f;\n" " }\n" " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" "}\n"; programCollection.add("tesc") << glu::TessellationControlSource(controlSourceGLSL); const string evaluationSourceGLSL = "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_ballot: enable\n" "${TESS_EXTENSION}\n" "layout(isolines, equal_spacing, ccw ) in;\n" "layout(location = 0) in vec4 in_color[];\n" "layout(location = 0) out vec4 out_color;\n" "\n" "void main (void)\n" "{\n" " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n" " out_color = in_color[0];\n" "}\n"; programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSourceGLSL); } else { DE_FATAL("Unsupported shader stage"); } } else { std::ostringstream bdy; string color = (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage) ? "out_color[gl_InvocationID].b = 1.0f;\n" : "out_color.b = 1.0f;\n"; switch (caseDef.opType) { default: DE_FATAL("Unhandled op type!"); break; case OPTYPE_SUBGROUP_BARRIER: case OPTYPE_SUBGROUP_MEMORY_BARRIER: case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER: { bdy << " tempResult2 = tempBuffer[id];\n" << " if (subgroupElect())\n" << " {\n" << " tempResult = value;\n" << " " << color << " }\n" << " else\n" << " {\n" << " tempResult = tempBuffer[id];\n" << " }\n" << " " << getOpTypeName(caseDef.opType) << "();\n"; break; } case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE: bdy << " tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n" << " if (subgroupElect())\n" << " {\n" << " tempResult = value;\n" << " " << color << " }\n" << " else\n" << " {\n" << " tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n" << " }\n" << " subgroupMemoryBarrierImage();\n"; break; } if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage) { std::ostringstream fragment; fragment << "${VERSION_DECL}\n" << "#extension GL_KHR_shader_subgroup_basic: enable\n" << "#extension GL_KHR_shader_subgroup_ballot: enable\n" << "precision highp int;\n" << "layout(location = 0) out highp vec4 out_color;\n" << "\n" << "layout(binding = 0, std140) uniform Buffer1\n" << "{\n" << " uint tempBuffer["< bufferNameMapping; bufferNameMapping.insert(pair("SSBO1", "1")); bufferNameMapping.insert(pair("IMG1", "0")); src << "${VERSION_DECL}\n" << "#extension GL_KHR_shader_subgroup_basic: enable\n" << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n" << "layout(binding = 0, std430) buffer Buffer0\n" << "{\n" << " uint result[];\n" << "} b0;\n" << "layout(binding = 1, std430) buffer Buffer1\n" << "{\n" << " uint value;\n" << " uint tempBuffer[];\n" << "} b1;\n" << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) uniform highp uimage2D tempImage0;\n" : "\n") << "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n" << "\n" << "void main (void)\n" << "{\n" << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " highp uint offset = globalSize.x * ((globalSize.y * " "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + " "gl_GlobalInvocationID.x;\n" << " uint localId = gl_SubgroupID;\n" << " uint id = globalSize.x * ((globalSize.y * " "gl_WorkGroupID.z) + gl_WorkGroupID.y) + " "gl_WorkGroupID.x + localId;\n" << " uint tempResult = 0u;\n" << bdyTemplate.specialize(bufferNameMapping) << " b0.result[offset] = tempResult;\n" << "}\n"; programCollection.add("comp") << glu::ComputeSource(src.str()); } else { { map bufferNameMapping; bufferNameMapping.insert(pair("SSBO1", "4")); bufferNameMapping.insert(pair("IMG1", "0")); std::ostringstream vertex; vertex << "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "#extension GL_KHR_shader_subgroup_ballot: enable\n" "layout(binding = 0, std430) buffer Buffer0\n" "{\n" " uint result[];\n" "} b0;\n" "layout(binding = 4, std430) buffer Buffer4\n" "{\n" " uint value;\n" " uint tempBuffer[];\n" "} b4;\n" "layout(binding = 5, std430) buffer Buffer5\n" "{\n" " uint subgroupID;\n" "} b5;\n" << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 0, r32ui) uniform highp uimage2D tempImage0;\n" : "") << "void main (void)\n" "{\n" " uint id = 0u;\n" " if (subgroupElect())\n" " {\n" " id = atomicAdd(b5.subgroupID, 1u);\n" " }\n" " id = subgroupBroadcastFirst(id);\n" " uint localId = id;\n" " uint tempResult = 0u;\n" + bdyTemplate.specialize(bufferNameMapping) + " b0.result[gl_VertexID] = tempResult;\n" " float pixelSize = 2.0f/1024.0f;\n" " float pixelPosition = pixelSize/2.0f - 1.0f;\n" " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n" " gl_PointSize = 1.0f;\n" "}\n"; programCollection.add("vert") << glu::VertexSource(vertex.str()); } { map bufferNameMapping; bufferNameMapping.insert(pair("SSBO1", "6")); bufferNameMapping.insert(pair("IMG1", "1")); std::ostringstream tesc; tesc << "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "#extension GL_KHR_shader_subgroup_ballot: enable\n" "layout(vertices=1) out;\n" "layout(binding = 1, std430) buffer Buffer1\n" "{\n" " uint result[];\n" "} b1;\n" "layout(binding = 6, std430) buffer Buffer6\n" "{\n" " uint value;\n" " uint tempBuffer[];\n" "} b6;\n" "layout(binding = 7, std430) buffer Buffer7\n" "{\n" " uint subgroupID;\n" "} b7;\n" << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 1, r32ui) uniform highp uimage2D tempImage1;\n" : "") << "void main (void)\n" "{\n" " uint id = 0u;\n" " if (subgroupElect())\n" " {\n" " id = atomicAdd(b7.subgroupID, 1u);\n" " }\n" " id = subgroupBroadcastFirst(id);\n" " uint localId = id;\n" " uint tempResult = 0u;\n" + bdyTemplate.specialize(bufferNameMapping) + " b1.result[gl_PrimitiveID] = tempResult;\n" " if (gl_InvocationID == 0)\n" " {\n" " gl_TessLevelOuter[0] = 1.0f;\n" " gl_TessLevelOuter[1] = 1.0f;\n" " }\n" " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" "}\n"; programCollection.add("tesc") << glu::TessellationControlSource(tesc.str()); } { map bufferNameMapping; bufferNameMapping.insert(pair("SSBO1", "8")); bufferNameMapping.insert(pair("IMG1", "2")); std::ostringstream tese; tese << "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "#extension GL_KHR_shader_subgroup_ballot: enable\n" "layout(isolines) in;\n" "layout(binding = 2, std430) buffer Buffer2\n" "{\n" " uint result[];\n" "} b2;\n" "layout(binding = 8, std430) buffer Buffer8\n" "{\n" " uint value;\n" " uint tempBuffer[];\n" "} b8;\n" "layout(binding = 9, std430) buffer Buffer9\n" "{\n" " uint subgroupID;\n" "} b9;\n" << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 2, r32ui) uniform highp uimage2D tempImage2;\n" : "") << "void main (void)\n" "{\n" " uint id = 0u;\n" " if (subgroupElect())\n" " {\n" " id = atomicAdd(b9.subgroupID, 1u);\n" " }\n" " id = subgroupBroadcastFirst(id);\n" " uint localId = id;\n" " uint tempResult = 0u;\n" + bdyTemplate.specialize(bufferNameMapping) + " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = tempResult;\n" " float pixelSize = 2.0f/1024.0f;\n"" gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n" "}\n"; programCollection.add("tese") << glu::TessellationEvaluationSource(tese.str()); } { map bufferNameMapping; bufferNameMapping.insert(pair("SSBO1", "10")); bufferNameMapping.insert(pair("IMG1", "3")); std::ostringstream geometry; geometry << "#extension GL_KHR_shader_subgroup_basic: enable\n" "#extension GL_KHR_shader_subgroup_ballot: enable\n" "layout(${TOPOLOGY}) in;\n" "layout(points, max_vertices = 1) out;\n" "layout(binding = 3, std430) buffer Buffer3\n" "{\n" " uint result[];\n" "} b3;\n" "layout(binding = 10, std430) buffer Buffer10\n" "{\n" " uint value;\n" " uint tempBuffer[];\n" "} b10;\n" "layout(binding = 11, std430) buffer Buffer11\n" "{\n" " uint subgroupID;\n" "} b11;\n" << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 3, r32ui) uniform highp uimage2D tempImage3;\n" : "") << "void main (void)\n" "{\n" " uint id = 0u;\n" " if (subgroupElect())\n" " {\n" " id = atomicAdd(b11.subgroupID, 1u);\n" " }\n" " id = subgroupBroadcastFirst(id);\n" " uint localId = id;\n" " uint tempResult = 0u;\n" + bdyTemplate.specialize(bufferNameMapping) + " b3.result[gl_PrimitiveIDIn] = tempResult;\n" " gl_Position = gl_in[0].gl_Position;\n" " EmitVertex();\n" " EndPrimitive();\n" "}\n"; subgroups::addGeometryShadersFromTemplate(geometry.str(), programCollection); } { map bufferNameMapping; bufferNameMapping.insert(pair("SSBO1", "12")); bufferNameMapping.insert(pair("IMG1", "4")); std::ostringstream fragment; fragment << "${VERSION_DECL}\n" "#extension GL_KHR_shader_subgroup_basic: enable\n" "#extension GL_KHR_shader_subgroup_ballot: enable\n" "precision highp int;\n" "layout(location = 0) out uint result;\n" "layout(binding = 12, std430) buffer Buffer12\n" "{\n" " uint value;\n" " uint tempBuffer[];\n" "} b12;\n" "layout(binding = 13, std430) buffer Buffer13\n" "{\n" " uint subgroupID;\n" "} b13;\n" << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? "layout(binding = 4, r32ui) uniform highp uimage2D tempImage4;\n" : "") << "void main (void)\n" "{\n" " if (gl_HelperInvocation) return;\n" " uint id = 0u;\n" " if (subgroupElect())\n" " {\n" " id = atomicAdd(b13.subgroupID, 1u);\n" " }\n" " id = subgroupBroadcastFirst(id);\n" " uint localId = id;\n" " uint tempResult = 0u;\n" + bdyTemplate.specialize(bufferNameMapping) + " result = tempResult;\n" "}\n"; programCollection.add("fragment") << glu::FragmentSource(fragment.str()); } subgroups::addNoSubgroupShader(programCollection); } } } void supportedCheck (Context& context, CaseDefinition caseDef) { DE_UNREF(caseDef); if (!subgroups::isSubgroupSupported(context)) TCU_THROW(NotSupportedError, "Subgroup operations are not supported"); } tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef) { if (!subgroups::areSubgroupOperationsSupportedForStage( context, caseDef.shaderStage)) { if (subgroups::areSubgroupOperationsRequiredForStage( caseDef.shaderStage)) { return tcu::TestStatus::fail( "Shader stage " + subgroups::getShaderStageName(caseDef.shaderStage) + " is required to support subgroup operations!"); } else { TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage"); } } if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BASIC_BIT)) { return tcu::TestStatus::fail( "Subgroup feature " + subgroups::getSubgroupFeatureName(subgroups::SUBGROUP_FEATURE_BASIC_BIT) + " is a required capability!"); } if (OPTYPE_ELECT != caseDef.opType && subgroups::SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage) { if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BALLOT_BIT)) { TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!"); } } if (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType) { if (!subgroups::isImageSupportedForStageOnDevice(context, caseDef.shaderStage)) { TCU_THROW(NotSupportedError, "Subgroup basic memory barrier image test for " + subgroups::getShaderStageName(caseDef.shaderStage) + " stage requires that image uniforms be supported on this stage"); } } const deUint32 inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u; std::vector inputDatas (inputDatasCount); inputDatas[0].format = subgroups::FORMAT_R32_UINT; inputDatas[0].layout = subgroups::SSBOData::LayoutStd140; inputDatas[0].numElements = SHADER_BUFFER_SIZE/4ull; inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero; inputDatas[0].binding = 0u; inputDatas[1].format = subgroups::FORMAT_R32_UINT; inputDatas[1].layout = subgroups::SSBOData::LayoutStd140; inputDatas[1].numElements = 1ull; inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero; inputDatas[1].binding = 1u; if(OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ) { inputDatas[2].format = subgroups::FORMAT_R32_UINT; inputDatas[2].layout = subgroups::SSBOData::LayoutPacked; inputDatas[2].numElements = SHADER_BUFFER_SIZE; inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone; inputDatas[2].isImage = true; inputDatas[2].binding = 0u; } if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage) { if (OPTYPE_ELECT == caseDef.opType) return subgroups::makeVertexFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO); else return subgroups::makeVertexFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO); } else if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage) { return subgroups::makeFragmentFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkFragmentSubgroupBarriersNoSSBO); } else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage) { if (OPTYPE_ELECT == caseDef.opType) return subgroups::makeGeometryFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO); else return subgroups::makeGeometryFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, checkVertexPipelineStagesSubgroupBarriersNoSSBO); } if (OPTYPE_ELECT == caseDef.opType) return subgroups::makeTessellationEvaluationFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u, checkVertexPipelineStagesSubgroupElectNoSSBO, caseDef.shaderStage); return subgroups::makeTessellationEvaluationFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount, (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)? checkVertexPipelineStagesSubgroupBarriersNoSSBO : checkTessellationEvaluationSubgroupBarriersNoSSBO, caseDef.shaderStage); } tcu::TestStatus test(Context& context, const CaseDefinition caseDef) { if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BASIC_BIT)) { return tcu::TestStatus::fail( "Subgroup feature " + subgroups::getSubgroupFeatureName(subgroups::SUBGROUP_FEATURE_BASIC_BIT) + " is a required capability!"); } if (OPTYPE_ELECT != caseDef.opType && subgroups::SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage) { if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BALLOT_BIT)) { TCU_THROW(NotSupportedError, "Subgroup basic operation non-compute stage test required that ballot operations are supported!"); } } if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage) { if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage)) { return tcu::TestStatus::fail("Shader stage " + subgroups::getShaderStageName(caseDef.shaderStage) + " is required to support subgroup operations!"); } if (OPTYPE_ELECT == caseDef.opType) { return subgroups::makeComputeTest(context, subgroups::FORMAT_R32_UINT, DE_NULL, 0, checkComputeSubgroupElect); } else { const deUint32 inputDatasCount = 2; subgroups::SSBOData inputDatas[inputDatasCount]; inputDatas[0].format = subgroups::FORMAT_R32_UINT; inputDatas[0].layout = subgroups::SSBOData::LayoutStd430; inputDatas[0].numElements = 1 + SHADER_BUFFER_SIZE; inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero; inputDatas[0].binding = 1u; inputDatas[1].format = subgroups::FORMAT_R32_UINT; inputDatas[1].layout = subgroups::SSBOData::LayoutPacked; inputDatas[1].numElements = SHADER_BUFFER_SIZE; inputDatas[1].initializeType = subgroups::SSBOData::InitializeNone; inputDatas[1].isImage = true; inputDatas[1].binding = 0u; return subgroups::makeComputeTest(context, subgroups::FORMAT_R32_UINT, inputDatas, inputDatasCount, checkComputeSubgroupBarriers); } } else { if (!subgroups::isFragmentSSBOSupportedForDevice(context)) { TCU_THROW(NotSupportedError, "Subgroup basic operation require that the fragment stage be able to write to SSBOs!"); } int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR); int combinedSSBOs = context.getDeqpContext().getContextInfo().getInt(GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS); subgroups::ShaderStageFlags stages = (subgroups::ShaderStageFlags)(caseDef.shaderStage & supportedStages); if ( subgroups::SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context)) { if ( (stages & subgroups::SHADER_STAGE_FRAGMENT_BIT) == 0) TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes"); else stages = subgroups::SHADER_STAGE_FRAGMENT_BIT; } if ((subgroups::ShaderStageFlags)0u == stages) TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader"); // with sufficient effort we could dynamically assign the binding points // based on the number of stages actually supported, etc, but we already // have the framebuffer tests which cover those cases, so there doesn't seem // to be much benefit in doing that right now. if (combinedSSBOs < 14) TCU_THROW(NotSupportedError, "Device does not support enough combined SSBOs for this test (14)"); if (OPTYPE_ELECT == caseDef.opType) { const deUint32 inputCount = 5u; subgroups::SSBOData inputData[inputCount]; inputData[0].format = subgroups::FORMAT_R32_UINT; inputData[0].layout = subgroups::SSBOData::LayoutStd430; inputData[0].numElements = 1; inputData[0].initializeType = subgroups::SSBOData::InitializeZero; inputData[0].binding = 4u; inputData[0].stages = subgroups::SHADER_STAGE_VERTEX_BIT; inputData[1].format = subgroups::FORMAT_R32_UINT; inputData[1].layout = subgroups::SSBOData::LayoutStd430; inputData[1].numElements = 1; inputData[1].initializeType = subgroups::SSBOData::InitializeZero; inputData[1].binding = 5u; inputData[1].stages = subgroups::SHADER_STAGE_TESS_CONTROL_BIT; inputData[2].format = subgroups::FORMAT_R32_UINT; inputData[2].layout = subgroups::SSBOData::LayoutStd430; inputData[2].numElements = 1; inputData[2].initializeType = subgroups::SSBOData::InitializeZero; inputData[2].binding = 6u; inputData[2].stages = subgroups::SHADER_STAGE_TESS_EVALUATION_BIT; inputData[3].format = subgroups::FORMAT_R32_UINT; inputData[3].layout = subgroups::SSBOData::LayoutStd430; inputData[3].numElements = 1; inputData[3].initializeType = subgroups::SSBOData::InitializeZero; inputData[3].binding = 7u; inputData[3].stages = subgroups::SHADER_STAGE_GEOMETRY_BIT; inputData[4].format = subgroups::FORMAT_R32_UINT; inputData[4].layout = subgroups::SSBOData::LayoutStd430; inputData[4].numElements = 1; inputData[4].initializeType = subgroups::SSBOData::InitializeZero; inputData[4].binding = 8u; inputData[4].stages = subgroups::SHADER_STAGE_FRAGMENT_BIT; return subgroups::allStages(context, subgroups::FORMAT_R32_UINT, inputData, inputCount, checkVertexPipelineStagesSubgroupElect, stages); } else { const subgroups::ShaderStageFlags stagesBits[] = { subgroups::SHADER_STAGE_VERTEX_BIT, subgroups::SHADER_STAGE_TESS_CONTROL_BIT, subgroups::SHADER_STAGE_TESS_EVALUATION_BIT, subgroups::SHADER_STAGE_GEOMETRY_BIT, subgroups::SHADER_STAGE_FRAGMENT_BIT, }; const deUint32 inputDatasCount = DE_LENGTH_OF_ARRAY(stagesBits) * 3u; subgroups::SSBOData inputDatas[inputDatasCount]; for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx) { const deUint32 index = ndx*3; const deUint32 ssboIndex = ndx*2; const deUint32 imgIndex = ndx; inputDatas[index].format = subgroups::FORMAT_R32_UINT; inputDatas[index].layout = subgroups::SSBOData::LayoutStd430; inputDatas[index].numElements = 1 + SHADER_BUFFER_SIZE; inputDatas[index].initializeType = subgroups::SSBOData::InitializeNonZero; inputDatas[index].binding = ssboIndex + 4u; inputDatas[index].stages = stagesBits[ndx]; inputDatas[index + 1].format = subgroups::FORMAT_R32_UINT; inputDatas[index + 1].layout = subgroups::SSBOData::LayoutStd430; inputDatas[index + 1].numElements = 1; inputDatas[index + 1].initializeType = subgroups::SSBOData::InitializeZero; inputDatas[index + 1].binding = ssboIndex + 5u; inputDatas[index + 1].stages = stagesBits[ndx]; inputDatas[index + 2].format = subgroups::FORMAT_R32_UINT; inputDatas[index + 2].layout = subgroups::SSBOData::LayoutPacked; inputDatas[index + 2].numElements = SHADER_BUFFER_SIZE; inputDatas[index + 2].initializeType = subgroups::SSBOData::InitializeNone; inputDatas[index + 2].isImage = true; inputDatas[index + 2].binding = imgIndex; inputDatas[index + 2].stages = stagesBits[ndx]; } return subgroups::allStages(context, subgroups::FORMAT_R32_UINT, inputDatas, inputDatasCount, checkVertexPipelineStagesSubgroupBarriers, stages); } } } } deqp::TestCaseGroup* createSubgroupsBasicTests(deqp::Context& testCtx) { de::MovePtr graphicGroup(new deqp::TestCaseGroup( testCtx, "graphics", "Subgroup basic category tests: graphics")); de::MovePtr computeGroup(new deqp::TestCaseGroup( testCtx, "compute", "Subgroup basic category tests: compute")); de::MovePtr framebufferGroup(new deqp::TestCaseGroup( testCtx, "framebuffer", "Subgroup basic category tests: framebuffer")); const subgroups::ShaderStageFlags stages[] = { SHADER_STAGE_FRAGMENT_BIT, SHADER_STAGE_VERTEX_BIT, SHADER_STAGE_TESS_EVALUATION_BIT, SHADER_STAGE_TESS_CONTROL_BIT, SHADER_STAGE_GEOMETRY_BIT, }; for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex) { const std::string op = de::toLower(getOpTypeName(opTypeIndex)); { const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT}; SubgroupFactory::addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef); } if (OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opTypeIndex) { // Shared isn't available in non compute shaders. continue; } { const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_ALL_GRAPHICS}; SubgroupFactory::addFunctionCaseWithPrograms(graphicGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef); } for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex) { if (opTypeIndex == OPTYPE_ELECT && stageIndex == 0) continue; // This is not tested. I don't know why. const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]}; SubgroupFactory::addFunctionCaseWithPrograms(framebufferGroup.get(), op + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef); } } de::MovePtr group(new deqp::TestCaseGroup( testCtx, "basic", "Subgroup basic category tests")); group->addChild(graphicGroup.release()); group->addChild(computeGroup.release()); group->addChild(framebufferGroup.release()); return group.release(); } } // subgroups } // glc