1cb93a386Sopenharmony_ci#version 450 2cb93a386Sopenharmony_ci#extension GL_KHR_shader_subgroup_basic : require 3cb93a386Sopenharmony_ci#extension GL_KHR_shader_subgroup_ballot : require 4cb93a386Sopenharmony_ci#extension GL_KHR_shader_subgroup_vote : require 5cb93a386Sopenharmony_ci#extension GL_KHR_shader_subgroup_shuffle : require 6cb93a386Sopenharmony_ci#extension GL_KHR_shader_subgroup_shuffle_relative : require 7cb93a386Sopenharmony_ci#extension GL_KHR_shader_subgroup_arithmetic : require 8cb93a386Sopenharmony_ci#extension GL_KHR_shader_subgroup_clustered : require 9cb93a386Sopenharmony_ci#extension GL_KHR_shader_subgroup_quad : require 10cb93a386Sopenharmony_cilayout(local_size_x = 1) in; 11cb93a386Sopenharmony_ci 12cb93a386Sopenharmony_cilayout(std430, binding = 0) buffer SSBO 13cb93a386Sopenharmony_ci{ 14cb93a386Sopenharmony_ci float FragColor; 15cb93a386Sopenharmony_ci}; 16cb93a386Sopenharmony_ci 17cb93a386Sopenharmony_civoid main() 18cb93a386Sopenharmony_ci{ 19cb93a386Sopenharmony_ci // basic 20cb93a386Sopenharmony_ci FragColor = float(gl_NumSubgroups); 21cb93a386Sopenharmony_ci FragColor = float(gl_SubgroupID); 22cb93a386Sopenharmony_ci FragColor = float(gl_SubgroupSize); 23cb93a386Sopenharmony_ci FragColor = float(gl_SubgroupInvocationID); 24cb93a386Sopenharmony_ci subgroupBarrier(); 25cb93a386Sopenharmony_ci subgroupMemoryBarrier(); 26cb93a386Sopenharmony_ci subgroupMemoryBarrierBuffer(); 27cb93a386Sopenharmony_ci subgroupMemoryBarrierShared(); 28cb93a386Sopenharmony_ci subgroupMemoryBarrierImage(); 29cb93a386Sopenharmony_ci bool elected = subgroupElect(); 30cb93a386Sopenharmony_ci 31cb93a386Sopenharmony_ci // ballot 32cb93a386Sopenharmony_ci FragColor = float(gl_SubgroupEqMask); 33cb93a386Sopenharmony_ci FragColor = float(gl_SubgroupGeMask); 34cb93a386Sopenharmony_ci FragColor = float(gl_SubgroupGtMask); 35cb93a386Sopenharmony_ci FragColor = float(gl_SubgroupLeMask); 36cb93a386Sopenharmony_ci FragColor = float(gl_SubgroupLtMask); 37cb93a386Sopenharmony_ci vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u); 38cb93a386Sopenharmony_ci bvec2 broadcasted_bool = subgroupBroadcast(bvec2(true), 8u); 39cb93a386Sopenharmony_ci vec3 first = subgroupBroadcastFirst(vec3(20.0)); 40cb93a386Sopenharmony_ci bvec4 first_bool = subgroupBroadcastFirst(bvec4(false)); 41cb93a386Sopenharmony_ci uvec4 ballot_value = subgroupBallot(true); 42cb93a386Sopenharmony_ci bool inverse_ballot_value = subgroupInverseBallot(ballot_value); 43cb93a386Sopenharmony_ci bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u); 44cb93a386Sopenharmony_ci uint bit_count = subgroupBallotBitCount(ballot_value); 45cb93a386Sopenharmony_ci uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value); 46cb93a386Sopenharmony_ci uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value); 47cb93a386Sopenharmony_ci uint lsb = subgroupBallotFindLSB(ballot_value); 48cb93a386Sopenharmony_ci uint msb = subgroupBallotFindMSB(ballot_value); 49cb93a386Sopenharmony_ci 50cb93a386Sopenharmony_ci // shuffle 51cb93a386Sopenharmony_ci uint shuffled = subgroupShuffle(10u, 8u); 52cb93a386Sopenharmony_ci bool shuffled_bool = subgroupShuffle(true, 9u); 53cb93a386Sopenharmony_ci uint shuffled_xor = subgroupShuffleXor(30u, 8u); 54cb93a386Sopenharmony_ci bool shuffled_xor_bool = subgroupShuffleXor(false, 9u); 55cb93a386Sopenharmony_ci 56cb93a386Sopenharmony_ci // shuffle relative 57cb93a386Sopenharmony_ci uint shuffled_up = subgroupShuffleUp(20u, 4u); 58cb93a386Sopenharmony_ci bool shuffled_up_bool = subgroupShuffleUp(true, 4u); 59cb93a386Sopenharmony_ci uint shuffled_down = subgroupShuffleDown(20u, 4u); 60cb93a386Sopenharmony_ci bool shuffled_down_bool = subgroupShuffleDown(false, 4u); 61cb93a386Sopenharmony_ci 62cb93a386Sopenharmony_ci // vote 63cb93a386Sopenharmony_ci bool has_all = subgroupAll(true); 64cb93a386Sopenharmony_ci bool has_any = subgroupAny(true); 65cb93a386Sopenharmony_ci bool has_equal = subgroupAllEqual(0); 66cb93a386Sopenharmony_ci has_equal = subgroupAllEqual(true); 67cb93a386Sopenharmony_ci has_equal = subgroupAllEqual(vec3(0.0, 1.0, 2.0)); 68cb93a386Sopenharmony_ci has_equal = subgroupAllEqual(bvec4(true, true, false, true)); 69cb93a386Sopenharmony_ci 70cb93a386Sopenharmony_ci // arithmetic 71cb93a386Sopenharmony_ci vec4 added = subgroupAdd(vec4(20.0)); 72cb93a386Sopenharmony_ci ivec4 iadded = subgroupAdd(ivec4(20)); 73cb93a386Sopenharmony_ci vec4 multiplied = subgroupMul(vec4(20.0)); 74cb93a386Sopenharmony_ci ivec4 imultiplied = subgroupMul(ivec4(20)); 75cb93a386Sopenharmony_ci vec4 lo = subgroupMin(vec4(20.0)); 76cb93a386Sopenharmony_ci vec4 hi = subgroupMax(vec4(20.0)); 77cb93a386Sopenharmony_ci ivec4 slo = subgroupMin(ivec4(20)); 78cb93a386Sopenharmony_ci ivec4 shi = subgroupMax(ivec4(20)); 79cb93a386Sopenharmony_ci uvec4 ulo = subgroupMin(uvec4(20)); 80cb93a386Sopenharmony_ci uvec4 uhi = subgroupMax(uvec4(20)); 81cb93a386Sopenharmony_ci uvec4 anded = subgroupAnd(ballot_value); 82cb93a386Sopenharmony_ci uvec4 ored = subgroupOr(ballot_value); 83cb93a386Sopenharmony_ci uvec4 xored = subgroupXor(ballot_value); 84cb93a386Sopenharmony_ci 85cb93a386Sopenharmony_ci added = subgroupInclusiveAdd(added); 86cb93a386Sopenharmony_ci iadded = subgroupInclusiveAdd(iadded); 87cb93a386Sopenharmony_ci multiplied = subgroupInclusiveMul(multiplied); 88cb93a386Sopenharmony_ci imultiplied = subgroupInclusiveMul(imultiplied); 89cb93a386Sopenharmony_ci //lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal 90cb93a386Sopenharmony_ci //hi = subgroupInclusiveMax(hi); 91cb93a386Sopenharmony_ci //slo = subgroupInclusiveMin(slo); 92cb93a386Sopenharmony_ci //shi = subgroupInclusiveMax(shi); 93cb93a386Sopenharmony_ci //ulo = subgroupInclusiveMin(ulo); 94cb93a386Sopenharmony_ci //uhi = subgroupInclusiveMax(uhi); 95cb93a386Sopenharmony_ci //anded = subgroupInclusiveAnd(anded); 96cb93a386Sopenharmony_ci //ored = subgroupInclusiveOr(ored); 97cb93a386Sopenharmony_ci //xored = subgroupInclusiveXor(ored); 98cb93a386Sopenharmony_ci //added = subgroupExclusiveAdd(lo); 99cb93a386Sopenharmony_ci 100cb93a386Sopenharmony_ci added = subgroupExclusiveAdd(multiplied); 101cb93a386Sopenharmony_ci multiplied = subgroupExclusiveMul(multiplied); 102cb93a386Sopenharmony_ci iadded = subgroupExclusiveAdd(imultiplied); 103cb93a386Sopenharmony_ci imultiplied = subgroupExclusiveMul(imultiplied); 104cb93a386Sopenharmony_ci //lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal 105cb93a386Sopenharmony_ci //hi = subgroupExclusiveMax(hi); 106cb93a386Sopenharmony_ci //ulo = subgroupExclusiveMin(ulo); 107cb93a386Sopenharmony_ci //uhi = subgroupExclusiveMax(uhi); 108cb93a386Sopenharmony_ci //slo = subgroupExclusiveMin(slo); 109cb93a386Sopenharmony_ci //shi = subgroupExclusiveMax(shi); 110cb93a386Sopenharmony_ci //anded = subgroupExclusiveAnd(anded); 111cb93a386Sopenharmony_ci //ored = subgroupExclusiveOr(ored); 112cb93a386Sopenharmony_ci //xored = subgroupExclusiveXor(ored); 113cb93a386Sopenharmony_ci 114cb93a386Sopenharmony_ci // clustered 115cb93a386Sopenharmony_ci added = subgroupClusteredAdd(added, 4u); 116cb93a386Sopenharmony_ci multiplied = subgroupClusteredMul(multiplied, 4u); 117cb93a386Sopenharmony_ci iadded = subgroupClusteredAdd(iadded, 4u); 118cb93a386Sopenharmony_ci imultiplied = subgroupClusteredMul(imultiplied, 4u); 119cb93a386Sopenharmony_ci lo = subgroupClusteredMin(lo, 4u); 120cb93a386Sopenharmony_ci hi = subgroupClusteredMax(hi, 4u); 121cb93a386Sopenharmony_ci ulo = subgroupClusteredMin(ulo, 4u); 122cb93a386Sopenharmony_ci uhi = subgroupClusteredMax(uhi, 4u); 123cb93a386Sopenharmony_ci slo = subgroupClusteredMin(slo, 4u); 124cb93a386Sopenharmony_ci shi = subgroupClusteredMax(shi, 4u); 125cb93a386Sopenharmony_ci anded = subgroupClusteredAnd(anded, 4u); 126cb93a386Sopenharmony_ci ored = subgroupClusteredOr(ored, 4u); 127cb93a386Sopenharmony_ci xored = subgroupClusteredXor(xored, 4u); 128cb93a386Sopenharmony_ci 129cb93a386Sopenharmony_ci // quad 130cb93a386Sopenharmony_ci vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0)); 131cb93a386Sopenharmony_ci bvec4 swap_horiz_bool = subgroupQuadSwapHorizontal(bvec4(true)); 132cb93a386Sopenharmony_ci vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0)); 133cb93a386Sopenharmony_ci bvec4 swap_vertical_bool = subgroupQuadSwapVertical(bvec4(true)); 134cb93a386Sopenharmony_ci vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0)); 135cb93a386Sopenharmony_ci bvec4 swap_diagonal_bool = subgroupQuadSwapDiagonal(bvec4(true)); 136cb93a386Sopenharmony_ci vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u); 137cb93a386Sopenharmony_ci bvec4 quad_broadcast_bool = subgroupQuadBroadcast(bvec4(true), 3u); 138cb93a386Sopenharmony_ci} 139