1/*------------------------------------------------------------------------ 2 * OpenGL Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2017-2019 The Khronos Group Inc. 6 * Copyright (c) 2017 Codeplay Software Ltd. 7 * Copyright (c) 2019 NVIDIA Corporation. 8 * 9 * Licensed under the Apache License, Version 2.0 (the "License"); 10 * you may not use this file except in compliance with the License. 11 * You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 * 21 */ /*! 22 * \file 23 * \brief Subgroups Tests 24 */ /*--------------------------------------------------------------------*/ 25 26#include "glcSubgroupsShapeTests.hpp" 27#include "glcSubgroupsTestsUtils.hpp" 28 29#include <string> 30#include <vector> 31 32using namespace tcu; 33using namespace std; 34 35namespace glc 36{ 37namespace subgroups 38{ 39namespace 40{ 41static bool checkVertexPipelineStages(std::vector<const void*> datas, 42 deUint32 width, deUint32) 43{ 44 return glc::subgroups::check(datas, width, 1); 45} 46 47static bool checkComputeStage(std::vector<const void*> datas, 48 const deUint32 numWorkgroups[3], const deUint32 localSize[3], 49 deUint32) 50{ 51 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 1); 52} 53 54enum OpType 55{ 56 OPTYPE_CLUSTERED = 0, 57 OPTYPE_QUAD, 58 OPTYPE_LAST 59}; 60 61std::string getOpTypeName(int opType) 62{ 63 switch (opType) 64 { 65 default: 66 DE_FATAL("Unsupported op type"); 67 return ""; 68 case OPTYPE_CLUSTERED: 69 return "clustered"; 70 case OPTYPE_QUAD: 71 return "quad"; 72 } 73} 74 75struct CaseDefinition 76{ 77 int opType; 78 ShaderStageFlags shaderStage; 79}; 80 81void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef) 82{ 83 std::ostringstream bdy; 84 std::string extension = (OPTYPE_CLUSTERED == caseDef.opType) ? 85 "#extension GL_KHR_shader_subgroup_clustered: enable\n" : 86 "#extension GL_KHR_shader_subgroup_quad: enable\n"; 87 88 subgroups::setFragmentShaderFrameBuffer(programCollection); 89 90 if (SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage) 91 subgroups::setVertexShaderFrameBuffer(programCollection); 92 93 extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n"; 94 95 bdy << " uint tempResult = 0x1u;\n" 96 << " uvec4 mask = subgroupBallot(true);\n"; 97 98 if (OPTYPE_CLUSTERED == caseDef.opType) 99 { 100 for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2) 101 { 102 bdy << " if (gl_SubgroupSize >= " << i << "u)\n" 103 << " {\n" 104 << " uvec4 contribution = uvec4(0);\n" 105 << " uint modID = gl_SubgroupInvocationID % 32u;\n" 106 << " switch (gl_SubgroupInvocationID / 32u)\n" 107 << " {\n" 108 << " case 0u: contribution.x = 1u << modID; break;\n" 109 << " case 1u: contribution.y = 1u << modID; break;\n" 110 << " case 2u: contribution.z = 1u << modID; break;\n" 111 << " case 3u: contribution.w = 1u << modID; break;\n" 112 << " }\n" 113 << " uvec4 result = subgroupClusteredOr(contribution, " << i << "u);\n" 114 << " uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << "u);\n" 115 << " for (uint i = 0u; i < " << i << "u; i++)\n" 116 << " {\n" 117 << " uint nextID = rootID + i;\n" 118 << " if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n" 119 << " {\n" 120 << " tempResult = 0u;\n" 121 << " }\n" 122 << " }\n" 123 << " }\n"; 124 } 125 } 126 else 127 { 128 bdy << " uint cluster[4] =\n" 129 << " uint[](\n" 130 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 0u),\n" 131 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 1u),\n" 132 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 2u),\n" 133 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 3u)\n" 134 << " );\n" 135 << " uint rootID = gl_SubgroupInvocationID & ~0x3u;\n" 136 << " for (uint i = 0u; i < 4u; i++)\n" 137 << " {\n" 138 << " uint nextID = rootID + i;\n" 139 << " if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n" 140 << " {\n" 141 << " tempResult = mask.x;\n" 142 << " }\n" 143 << " }\n"; 144 } 145 146 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage) 147 { 148 std::ostringstream vertexSrc; 149 vertexSrc << "${VERSION_DECL}\n" 150 << extension 151 << "layout(location = 0) in highp vec4 in_position;\n" 152 << "layout(location = 0) out float result;\n" 153 << "\n" 154 << "void main (void)\n" 155 << "{\n" 156 << bdy.str() 157 << " result = float(tempResult);\n" 158 << " gl_Position = in_position;\n" 159 << " gl_PointSize = 1.0f;\n" 160 << "}\n"; 161 programCollection.add("vert") << glu::VertexSource(vertexSrc.str()); 162 } 163 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage) 164 { 165 std::ostringstream geometry; 166 167 geometry << "${VERSION_DECL}\n" 168 << extension 169 << "layout(points) in;\n" 170 << "layout(points, max_vertices = 1) out;\n" 171 << "layout(location = 0) out float out_color;\n" 172 << "\n" 173 << "void main (void)\n" 174 << "{\n" 175 << bdy.str() 176 << " out_color = float(tempResult);\n" 177 << " gl_Position = gl_in[0].gl_Position;\n" 178 << " EmitVertex();\n" 179 << " EndPrimitive();\n" 180 << "}\n"; 181 182 programCollection.add("geometry") << glu::GeometrySource(geometry.str()); 183 } 184 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage) 185 { 186 std::ostringstream controlSource; 187 188 controlSource << "${VERSION_DECL}\n" 189 << extension 190 << "layout(vertices = 2) out;\n" 191 << "layout(location = 0) out float out_color[];\n" 192 << "\n" 193 << "void main (void)\n" 194 << "{\n" 195 << " if (gl_InvocationID == 0)\n" 196 <<" {\n" 197 << " gl_TessLevelOuter[0] = 1.0f;\n" 198 << " gl_TessLevelOuter[1] = 1.0f;\n" 199 << " }\n" 200 << bdy.str() 201 << " out_color[gl_InvocationID] = float(tempResult);\n" 202 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" 203 << "}\n"; 204 205 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str()); 206 subgroups::setTesEvalShaderFrameBuffer(programCollection); 207 } 208 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage) 209 { 210 std::ostringstream evaluationSource; 211 212 evaluationSource << "${VERSION_DECL}\n" 213 << extension 214 << "layout(isolines, equal_spacing, ccw) in;\n" 215 << "layout(location = 0) out float out_color;\n" 216 << "void main (void)\n" 217 << "{\n" 218 << bdy.str() 219 << " out_color = float(tempResult);\n" 220 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n" 221 << "}\n"; 222 223 subgroups::setTesCtrlShaderFrameBuffer(programCollection); 224 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()); 225 } 226 else 227 { 228 DE_FATAL("Unsupported shader stage"); 229 } 230} 231 232void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef) 233{ 234 std::string extension = (OPTYPE_CLUSTERED == caseDef.opType) ? 235 "#extension GL_KHR_shader_subgroup_clustered: enable\n" : 236 "#extension GL_KHR_shader_subgroup_quad: enable\n"; 237 238 extension += "#extension GL_KHR_shader_subgroup_ballot: enable\n"; 239 240 std::ostringstream bdy; 241 242 bdy << " uint tempResult = 0x1u;\n" 243 << " uvec4 mask = subgroupBallot(true);\n"; 244 245 if (OPTYPE_CLUSTERED == caseDef.opType) 246 { 247 for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2) 248 { 249 bdy << " if (gl_SubgroupSize >= " << i << "u)\n" 250 << " {\n" 251 << " uvec4 contribution = uvec4(0);\n" 252 << " uint modID = gl_SubgroupInvocationID % 32u;\n" 253 << " switch (gl_SubgroupInvocationID / 32u)\n" 254 << " {\n" 255 << " case 0u: contribution.x = 1u << modID; break;\n" 256 << " case 1u: contribution.y = 1u << modID; break;\n" 257 << " case 2u: contribution.z = 1u << modID; break;\n" 258 << " case 3u: contribution.w = 1u << modID; break;\n" 259 << " }\n" 260 << " uvec4 result = subgroupClusteredOr(contribution, " << i << "u);\n" 261 << " uint rootID = gl_SubgroupInvocationID & ~(" << i - 1 << "u);\n" 262 << " for (uint i = 0u; i < " << i << "u; i++)\n" 263 << " {\n" 264 << " uint nextID = rootID + i;\n" 265 << " if (subgroupBallotBitExtract(mask, nextID) ^^ subgroupBallotBitExtract(result, nextID))\n" 266 << " {\n" 267 << " tempResult = 0u;\n" 268 << " }\n" 269 << " }\n" 270 << " }\n"; 271 } 272 } 273 else 274 { 275 bdy << " uint cluster[4] =\n" 276 << " uint[](\n" 277 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 0u),\n" 278 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 1u),\n" 279 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 2u),\n" 280 << " subgroupQuadBroadcast(gl_SubgroupInvocationID, 3u)\n" 281 << " );\n" 282 << " uint rootID = gl_SubgroupInvocationID & ~0x3u;\n" 283 << " for (uint i = 0u; i < 4u; i++)\n" 284 << " {\n" 285 << " uint nextID = rootID + i;\n" 286 << " if (subgroupBallotBitExtract(mask, nextID) && (cluster[i] != nextID))\n" 287 << " {\n" 288 << " tempResult = mask.x;\n" 289 << " }\n" 290 << " }\n"; 291 } 292 293 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage) 294 { 295 std::ostringstream src; 296 297 src << "${VERSION_DECL}\n" 298 << extension 299 << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n" 300 << "layout(binding = 0, std430) buffer Buffer0\n" 301 << "{\n" 302 << " uint result[];\n" 303 << "};\n" 304 << "\n" 305 << "void main (void)\n" 306 << "{\n" 307 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n" 308 << " highp uint offset = globalSize.x * ((globalSize.y * " 309 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + " 310 "gl_GlobalInvocationID.x;\n" 311 << bdy.str() 312 << " result[offset] = tempResult;\n" 313 << "}\n"; 314 315 programCollection.add("comp") << glu::ComputeSource(src.str()); 316 } 317 else 318 { 319 { 320 const string vertex = 321 "${VERSION_DECL}\n" 322 + extension + 323 "layout(binding = 0, std430) buffer Buffer0\n" 324 "{\n" 325 " uint result[];\n" 326 "} b0;\n" 327 "\n" 328 "void main (void)\n" 329 "{\n" 330 + bdy.str() + 331 " b0.result[gl_VertexID] = tempResult;\n" 332 " float pixelSize = 2.0f/1024.0f;\n" 333 " float pixelPosition = pixelSize/2.0f - 1.0f;\n" 334 " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n" 335 "}\n"; 336 337 programCollection.add("vert") << glu::VertexSource(vertex); 338 } 339 340 { 341 const string tesc = 342 "${VERSION_DECL}\n" 343 + extension + 344 "layout(vertices=1) out;\n" 345 "layout(binding = 1, std430) buffer Buffer1\n" 346 "{\n" 347 " uint result[];\n" 348 "} b1;\n" 349 "\n" 350 "void main (void)\n" 351 "{\n" 352 + bdy.str() + 353 " b1.result[gl_PrimitiveID] = 1u;\n" 354 " if (gl_InvocationID == 0)\n" 355 " {\n" 356 " gl_TessLevelOuter[0] = 1.0f;\n" 357 " gl_TessLevelOuter[1] = 1.0f;\n" 358 " }\n" 359 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" 360 "}\n"; 361 362 programCollection.add("tesc") << glu::TessellationControlSource(tesc); 363 } 364 365 { 366 const string tese = 367 "${VERSION_DECL}\n" 368 + extension + 369 "layout(isolines) in;\n" 370 "layout(binding = 2, std430) buffer Buffer2\n" 371 "{\n" 372 " uint result[];\n" 373 "} b2;\n" 374 "\n" 375 "void main (void)\n" 376 "{\n" 377 + bdy.str() + 378 " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = 1u;\n" 379 " float pixelSize = 2.0f/1024.0f;\n" 380 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n" 381 "}\n"; 382 383 programCollection.add("tese") << glu::TessellationEvaluationSource(tese); 384 } 385 386 { 387 const string geometry = 388 // version added by addGeometryShadersFromTemplate 389 extension + 390 "layout(${TOPOLOGY}) in;\n" 391 "layout(points, max_vertices = 1) out;\n" 392 "layout(binding = 3, std430) buffer Buffer3\n" 393 "{\n" 394 " uint result[];\n" 395 "} b3;\n" 396 "\n" 397 "void main (void)\n" 398 "{\n" 399 + bdy.str() + 400 " b3.result[gl_PrimitiveIDIn] = tempResult;\n" 401 " gl_Position = gl_in[0].gl_Position;\n" 402 " EmitVertex();\n" 403 " EndPrimitive();\n" 404 "}\n"; 405 406 subgroups::addGeometryShadersFromTemplate(geometry, programCollection); 407 } 408 409 { 410 const string fragment = 411 "${VERSION_DECL}\n" 412 + extension + 413 "precision highp int;\n" 414 "layout(location = 0) out uint result;\n" 415 "void main (void)\n" 416 "{\n" 417 + bdy.str() + 418 " result = tempResult;\n" 419 "}\n"; 420 421 programCollection.add("fragment") << glu::FragmentSource(fragment); 422 } 423 subgroups::addNoSubgroupShader(programCollection); 424 } 425} 426 427void supportedCheck (Context& context, CaseDefinition caseDef) 428{ 429 if (!subgroups::isSubgroupSupported(context)) 430 TCU_THROW(NotSupportedError, "Subgroup operations are not supported"); 431 432 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_BALLOT_BIT)) 433 { 434 TCU_THROW(NotSupportedError, "Device does not support subgroup ballot operations"); 435 } 436 437 if (OPTYPE_CLUSTERED == caseDef.opType) 438 { 439 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_CLUSTERED_BIT)) 440 { 441 TCU_THROW(NotSupportedError, "Subgroup shape tests require that clustered operations are supported!"); 442 } 443 } 444 445 if (OPTYPE_QUAD == caseDef.opType) 446 { 447 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_QUAD_BIT)) 448 { 449 TCU_THROW(NotSupportedError, "Subgroup shape tests require that quad operations are supported!"); 450 } 451 } 452} 453 454tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef) 455{ 456 if (!subgroups::areSubgroupOperationsSupportedForStage( 457 context, caseDef.shaderStage)) 458 { 459 if (subgroups::areSubgroupOperationsRequiredForStage( 460 caseDef.shaderStage)) 461 { 462 return tcu::TestStatus::fail( 463 "Shader stage " + 464 subgroups::getShaderStageName(caseDef.shaderStage) + 465 " is required to support subgroup operations!"); 466 } 467 else 468 { 469 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage"); 470 } 471 } 472 473 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage) 474 return subgroups::makeVertexFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages); 475 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage) 476 return subgroups::makeGeometryFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages); 477 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage) 478 return subgroups::makeTessellationEvaluationFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, SHADER_STAGE_TESS_CONTROL_BIT); 479 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage) 480 return subgroups::makeTessellationEvaluationFrameBufferTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, SHADER_STAGE_TESS_EVALUATION_BIT); 481 else 482 TCU_THROW(InternalError, "Unhandled shader stage"); 483} 484 485 486tcu::TestStatus test(Context& context, const CaseDefinition caseDef) 487{ 488 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_BASIC_BIT)) 489 { 490 return tcu::TestStatus::fail( 491 "Subgroup feature " + 492 subgroups::getSubgroupFeatureName(SUBGROUP_FEATURE_BASIC_BIT) + 493 " is a required capability!"); 494 } 495 496 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage) 497 { 498 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage)) 499 { 500 return tcu::TestStatus::fail( 501 "Shader stage " + 502 subgroups::getShaderStageName(caseDef.shaderStage) + 503 " is required to support subgroup operations!"); 504 } 505 return subgroups::makeComputeTest(context, FORMAT_R32_UINT, DE_NULL, 0, checkComputeStage); 506 } 507 else 508 { 509 int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR); 510 511 ShaderStageFlags stages = (ShaderStageFlags)(caseDef.shaderStage & supportedStages); 512 513 if (SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context)) 514 { 515 if ( (stages & SHADER_STAGE_FRAGMENT_BIT) == 0) 516 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes"); 517 else 518 stages = SHADER_STAGE_FRAGMENT_BIT; 519 } 520 521 if ((ShaderStageFlags)0u == stages) 522 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader"); 523 524 return subgroups::allStages(context, FORMAT_R32_UINT, DE_NULL, 0, checkVertexPipelineStages, stages); 525 } 526} 527} 528 529deqp::TestCaseGroup* createSubgroupsShapeTests(deqp::Context& testCtx) 530{ 531 de::MovePtr<deqp::TestCaseGroup> graphicGroup(new deqp::TestCaseGroup( 532 testCtx, "graphics", "Subgroup shape category tests: graphics")); 533 de::MovePtr<deqp::TestCaseGroup> computeGroup(new deqp::TestCaseGroup( 534 testCtx, "compute", "Subgroup shape category tests: compute")); 535 de::MovePtr<deqp::TestCaseGroup> framebufferGroup(new deqp::TestCaseGroup( 536 testCtx, "framebuffer", "Subgroup shape category tests: framebuffer")); 537 538 const ShaderStageFlags stages[] = 539 { 540 SHADER_STAGE_VERTEX_BIT, 541 SHADER_STAGE_TESS_EVALUATION_BIT, 542 SHADER_STAGE_TESS_CONTROL_BIT, 543 SHADER_STAGE_GEOMETRY_BIT, 544 }; 545 546 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex) 547 { 548 const std::string op = de::toLower(getOpTypeName(opTypeIndex)); 549 550 { 551 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT}; 552 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck, initPrograms, test, caseDef); 553 554 } 555 556 { 557 const CaseDefinition caseDef = 558 { 559 opTypeIndex, 560 SHADER_STAGE_ALL_GRAPHICS 561 }; 562 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(graphicGroup.get(), 563 op, "", 564 supportedCheck, initPrograms, test, caseDef); 565 } 566 567 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex) 568 { 569 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]}; 570 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(framebufferGroup.get(),op + "_" + getShaderStageName(caseDef.shaderStage), "", 571 supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef); 572 } 573 } 574 575 de::MovePtr<deqp::TestCaseGroup> group(new deqp::TestCaseGroup( 576 testCtx, "shape", "Subgroup shape category tests")); 577 578 group->addChild(graphicGroup.release()); 579 group->addChild(computeGroup.release()); 580 group->addChild(framebufferGroup.release()); 581 582 return group.release(); 583} 584 585} // subgroups 586} // glc 587