1617a3babSopenharmony_ci#version 460 2617a3babSopenharmony_ci 3617a3babSopenharmony_civec4 undeclared_errors(vec4 f4) 4617a3babSopenharmony_ci{ 5617a3babSopenharmony_ci vec4 result; 6617a3babSopenharmony_ci gl_SubgroupSize; // ERROR, extension not enabled (basic) 7617a3babSopenharmony_ci gl_SubgroupInvocationID; // ERROR, extension not enabled (basic) 8617a3babSopenharmony_ci subgroupBarrier(); // ERROR, extension not enabled (basic) 9617a3babSopenharmony_ci subgroupMemoryBarrier(); // ERROR, extension not enabled (basic) 10617a3babSopenharmony_ci subgroupMemoryBarrierBuffer(); // ERROR, extension not enabled (basic) 11617a3babSopenharmony_ci subgroupMemoryBarrierImage(); // ERROR, extension not enabled (basic) 12617a3babSopenharmony_ci subgroupElect(); // ERROR, extension not enabled (basic) 13617a3babSopenharmony_ci gl_NumSubgroups; // ERROR, extension not enabled (basic) 14617a3babSopenharmony_ci gl_SubgroupID; // ERROR, extension not enabled (basic) 15617a3babSopenharmony_ci subgroupMemoryBarrierShared(); // ERROR, extension not enabled (basic) 16617a3babSopenharmony_ci 17617a3babSopenharmony_ci subgroupAll(true); // ERROR extension not enabled (vote) 18617a3babSopenharmony_ci subgroupAny(false); // ERROR extension not enabled (vote) 19617a3babSopenharmony_ci subgroupAllEqual(f4); // ERROR extension not enabled (vote) 20617a3babSopenharmony_ci 21617a3babSopenharmony_ci gl_SubgroupEqMask; // ERROR extension not enabled (ballot) 22617a3babSopenharmony_ci gl_SubgroupGeMask; // ERROR extension not enabled (ballot) 23617a3babSopenharmony_ci gl_SubgroupGtMask; // ERROR extension not enabled (ballot) 24617a3babSopenharmony_ci gl_SubgroupLeMask; // ERROR extension not enabled (ballot) 25617a3babSopenharmony_ci gl_SubgroupLtMask; // ERROR extension not enabled (ballot) 26617a3babSopenharmony_ci subgroupBroadcast(f4, 0); // ERROR extension not enabled (ballot) 27617a3babSopenharmony_ci subgroupBroadcastFirst(f4); // ERROR extension not enabled (ballot) 28617a3babSopenharmony_ci uvec4 ballot = subgroupBallot(false); // ERROR extension not enabled (ballot) 29617a3babSopenharmony_ci subgroupInverseBallot(uvec4(0x1)); // ERROR extension not enabled (ballot) 30617a3babSopenharmony_ci subgroupBallotBitExtract(ballot, 0); // ERROR extension not enabled (ballot) 31617a3babSopenharmony_ci subgroupBallotBitCount(ballot); // ERROR extension not enabled (ballot) 32617a3babSopenharmony_ci subgroupBallotInclusiveBitCount(ballot); // ERROR extension not enabled (ballot) 33617a3babSopenharmony_ci subgroupBallotExclusiveBitCount(ballot); // ERROR extension not enabled (ballot) 34617a3babSopenharmony_ci subgroupBallotFindLSB(ballot); // ERROR extension not enabled (ballot) 35617a3babSopenharmony_ci subgroupBallotFindMSB(ballot); // ERROR extension not enabled (ballot) 36617a3babSopenharmony_ci 37617a3babSopenharmony_ci subgroupShuffle(f4, 0); // ERROR extension not enabled (shuffle) 38617a3babSopenharmony_ci subgroupShuffleXor(f4, 0x1); // ERROR extension not enabled (shuffle) 39617a3babSopenharmony_ci subgroupShuffleUp(f4, 1); // ERROR extension not enabled (shuffle_relative) 40617a3babSopenharmony_ci subgroupShuffleDown(f4, 1); // ERROR extension not enabled (shuffle_relative) 41617a3babSopenharmony_ci 42617a3babSopenharmony_ci result = subgroupAdd(f4); // ERROR, extension not enabled (arith) 43617a3babSopenharmony_ci subgroupMul(f4); // ERROR, extension not enabled (arith) 44617a3babSopenharmony_ci subgroupMin(f4); // ERROR, extension not enabled (arith) 45617a3babSopenharmony_ci subgroupMax(f4); // ERROR, extension not enabled (arith) 46617a3babSopenharmony_ci subgroupAnd(ballot); // ERROR, extension not enabled (arith) 47617a3babSopenharmony_ci subgroupOr(ballot); // ERROR, extension not enabled (arith) 48617a3babSopenharmony_ci subgroupXor(ballot); // ERROR, extension not enabled (arith) 49617a3babSopenharmony_ci subgroupInclusiveAdd(f4); // ERROR, extension not enabled (arith) 50617a3babSopenharmony_ci subgroupInclusiveMul(f4); // ERROR, extension not enabled (arith) 51617a3babSopenharmony_ci subgroupInclusiveMin(f4); // ERROR, extension not enabled (arith) 52617a3babSopenharmony_ci subgroupInclusiveMax(f4); // ERROR, extension not enabled (arith) 53617a3babSopenharmony_ci subgroupInclusiveAnd(ballot); // ERROR, extension not enabled (arith) 54617a3babSopenharmony_ci subgroupInclusiveOr(ballot); // ERROR, extension not enabled (arith) 55617a3babSopenharmony_ci subgroupInclusiveXor(ballot); // ERROR, extension not enabled (arith) 56617a3babSopenharmony_ci subgroupExclusiveAdd(f4); // ERROR, extension not enabled (arith) 57617a3babSopenharmony_ci subgroupExclusiveMul(f4); // ERROR, extension not enabled (arith) 58617a3babSopenharmony_ci subgroupExclusiveMin(f4); // ERROR, extension not enabled (arith) 59617a3babSopenharmony_ci subgroupExclusiveMax(f4); // ERROR, extension not enabled (arith) 60617a3babSopenharmony_ci subgroupExclusiveAnd(ballot); // ERROR, extension not enabled (arith) 61617a3babSopenharmony_ci subgroupExclusiveOr(ballot); // ERROR, extension not enabled (arith) 62617a3babSopenharmony_ci subgroupExclusiveXor(ballot); // ERROR, extension not enabled (arith) 63617a3babSopenharmony_ci 64617a3babSopenharmony_ci subgroupClusteredAdd(f4, 2); // ERROR, extension not enabled (clustered) 65617a3babSopenharmony_ci subgroupClusteredMul(f4, 2); // ERROR, extension not enabled (clustered) 66617a3babSopenharmony_ci subgroupClusteredMin(f4, 2); // ERROR, extension not enabled (clustered) 67617a3babSopenharmony_ci subgroupClusteredMax(f4, 2); // ERROR, extension not enabled (clustered) 68617a3babSopenharmony_ci subgroupClusteredAnd(ballot, 2); // ERROR, extension not enabled (clustered) 69617a3babSopenharmony_ci subgroupClusteredOr(ballot, 2); // ERROR, extension not enabled (clustered) 70617a3babSopenharmony_ci subgroupClusteredXor(ballot, 2); // ERROR, extension not enabled (clustered) 71617a3babSopenharmony_ci 72617a3babSopenharmony_ci subgroupQuadBroadcast(f4, 0); // ERROR, extension not enabled (quad) 73617a3babSopenharmony_ci subgroupQuadSwapHorizontal(f4); // ERROR, extension not enabled (quad) 74617a3babSopenharmony_ci subgroupQuadSwapVertical(f4); // ERROR, extension not enabled (quad) 75617a3babSopenharmony_ci subgroupQuadSwapDiagonal(f4); // ERROR, extension not enabled (quad) 76617a3babSopenharmony_ci 77617a3babSopenharmony_ci uvec4 parti = subgroupPartitionNV(f4); // ERROR, extension not enabled (partitioned) 78617a3babSopenharmony_ci subgroupPartitionedAddNV(f4, parti); // ERROR, extension not enabled (partitioned) 79617a3babSopenharmony_ci subgroupPartitionedMulNV(f4, parti); // ERROR, extension not enabled (partitioned) 80617a3babSopenharmony_ci subgroupPartitionedMinNV(f4, parti); // ERROR, extension not enabled (partitioned) 81617a3babSopenharmony_ci subgroupPartitionedMaxNV(f4, parti); // ERROR, extension not enabled (partitioned) 82617a3babSopenharmony_ci subgroupPartitionedAndNV(ballot, parti); // ERROR, extension not enabled (partitioned) 83617a3babSopenharmony_ci subgroupPartitionedOrNV(ballot, parti); // ERROR, extension not enabled (partitioned) 84617a3babSopenharmony_ci subgroupPartitionedXorNV(ballot, parti); // ERROR, extension not enabled (partitioned) 85617a3babSopenharmony_ci subgroupPartitionedInclusiveAddNV(f4, parti); // ERROR, extension not enabled (partitioned) 86617a3babSopenharmony_ci subgroupPartitionedInclusiveMulNV(f4, parti); // ERROR, extension not enabled (partitioned) 87617a3babSopenharmony_ci subgroupPartitionedInclusiveMinNV(f4, parti); // ERROR, extension not enabled (partitioned) 88617a3babSopenharmony_ci subgroupPartitionedInclusiveMaxNV(f4, parti); // ERROR, extension not enabled (partitioned) 89617a3babSopenharmony_ci subgroupPartitionedInclusiveAndNV(ballot, parti); // ERROR, extension not enabled (partitioned) 90617a3babSopenharmony_ci subgroupPartitionedInclusiveOrNV(ballot, parti); // ERROR, extension not enabled (partitioned) 91617a3babSopenharmony_ci subgroupPartitionedInclusiveXorNV(ballot, parti); // ERROR, extension not enabled (partitioned) 92617a3babSopenharmony_ci subgroupPartitionedExclusiveAddNV(f4, parti); // ERROR, extension not enabled (partitioned) 93617a3babSopenharmony_ci subgroupPartitionedExclusiveMulNV(f4, parti); // ERROR, extension not enabled (partitioned) 94617a3babSopenharmony_ci subgroupPartitionedExclusiveMinNV(f4, parti); // ERROR, extension not enabled (partitioned) 95617a3babSopenharmony_ci subgroupPartitionedExclusiveMaxNV(f4, parti); // ERROR, extension not enabled (partitioned) 96617a3babSopenharmony_ci subgroupPartitionedExclusiveAndNV(ballot, parti); // ERROR, extension not enabled (partitioned) 97617a3babSopenharmony_ci subgroupPartitionedExclusiveOrNV(ballot, parti); // ERROR, extension not enabled (partitioned) 98617a3babSopenharmony_ci subgroupPartitionedExclusiveXorNV(ballot, parti); // ERROR, extension not enabled (partitioned) 99617a3babSopenharmony_ci 100617a3babSopenharmony_ci return result; 101617a3babSopenharmony_ci} 102617a3babSopenharmony_ci 103617a3babSopenharmony_ci#define MAX_VIEWS gl_MaxMeshViewCountNV 104617a3babSopenharmony_ci 105617a3babSopenharmony_ci#define BARRIER() \ 106617a3babSopenharmony_ci memoryBarrierShared(); \ 107617a3babSopenharmony_ci barrier(); 108617a3babSopenharmony_ci 109617a3babSopenharmony_ci#extension GL_NV_mesh_shader : enable 110617a3babSopenharmony_ci 111617a3babSopenharmony_cilayout(local_size_x = 32) in; 112617a3babSopenharmony_ci 113617a3babSopenharmony_ci// test use of shared memory in task shaders: 114617a3babSopenharmony_cilayout(binding=0) writeonly uniform image2D uni_image; 115617a3babSopenharmony_ciuniform block0 { 116617a3babSopenharmony_ci uint uni_value; 117617a3babSopenharmony_ci}; 118617a3babSopenharmony_cishared vec4 mem[10]; 119617a3babSopenharmony_ci 120617a3babSopenharmony_ci// test use of task memory in task shaders: 121617a3babSopenharmony_citaskNV out Task { 122617a3babSopenharmony_ci vec2 dummy; 123617a3babSopenharmony_ci vec2 submesh[3]; 124617a3babSopenharmony_ci uint viewID; 125617a3babSopenharmony_ci} mytask; 126617a3babSopenharmony_ci 127617a3babSopenharmony_civoid main() 128617a3babSopenharmony_ci{ 129617a3babSopenharmony_ci uint iid = gl_LocalInvocationID.x; 130617a3babSopenharmony_ci uint gid = gl_WorkGroupID.x; 131617a3babSopenharmony_ci uint viewID = gl_MeshViewIndicesNV[gl_MeshViewCountNV%MAX_VIEWS]; 132617a3babSopenharmony_ci 133617a3babSopenharmony_ci // 1. shared memory load and stores 134617a3babSopenharmony_ci for (uint i = 0; i < 10; ++i) { 135617a3babSopenharmony_ci mem[i] = vec4(i + uni_value); 136617a3babSopenharmony_ci } 137617a3babSopenharmony_ci imageStore(uni_image, ivec2(iid), mem[gid]); 138617a3babSopenharmony_ci imageStore(uni_image, ivec2(iid), mem[gid+1]); 139617a3babSopenharmony_ci 140617a3babSopenharmony_ci BARRIER(); 141617a3babSopenharmony_ci 142617a3babSopenharmony_ci // 2. task memory stores 143617a3babSopenharmony_ci 144617a3babSopenharmony_ci mytask.dummy = vec2(30.0, 31.0); 145617a3babSopenharmony_ci mytask.submesh[0] = vec2(32.0, 33.0); 146617a3babSopenharmony_ci mytask.submesh[1] = vec2(34.0, 35.0); 147617a3babSopenharmony_ci mytask.submesh[2] = mytask.submesh[gid%2]; 148617a3babSopenharmony_ci mytask.viewID = viewID; 149617a3babSopenharmony_ci 150617a3babSopenharmony_ci BARRIER(); 151617a3babSopenharmony_ci 152617a3babSopenharmony_ci // 3. set task count 153617a3babSopenharmony_ci gl_TaskCountNV = 3; 154617a3babSopenharmony_ci} 155617a3babSopenharmony_ci 156617a3babSopenharmony_ci#extension GL_KHR_shader_subgroup_basic: enable 157617a3babSopenharmony_civoid basic_works (void) 158617a3babSopenharmony_ci{ 159617a3babSopenharmony_ci gl_SubgroupSize; 160617a3babSopenharmony_ci gl_SubgroupInvocationID; 161617a3babSopenharmony_ci subgroupBarrier(); 162617a3babSopenharmony_ci subgroupMemoryBarrier(); 163617a3babSopenharmony_ci subgroupMemoryBarrierBuffer(); 164617a3babSopenharmony_ci subgroupMemoryBarrierImage(); 165617a3babSopenharmony_ci subgroupElect(); 166617a3babSopenharmony_ci gl_NumSubgroups; // allowed in task 167617a3babSopenharmony_ci gl_SubgroupID; // allowed in task 168617a3babSopenharmony_ci subgroupMemoryBarrierShared(); // allowed in task 169617a3babSopenharmony_ci} 170617a3babSopenharmony_ci 171617a3babSopenharmony_ci#extension GL_KHR_shader_subgroup_ballot: enable 172617a3babSopenharmony_civoid ballot_works(vec4 f4) { 173617a3babSopenharmony_ci gl_SubgroupEqMask; 174617a3babSopenharmony_ci gl_SubgroupGeMask; 175617a3babSopenharmony_ci gl_SubgroupGtMask; 176617a3babSopenharmony_ci gl_SubgroupLeMask; 177617a3babSopenharmony_ci gl_SubgroupLtMask; 178617a3babSopenharmony_ci subgroupBroadcast(f4, 0); 179617a3babSopenharmony_ci subgroupBroadcastFirst(f4); 180617a3babSopenharmony_ci uvec4 ballot = subgroupBallot(false); 181617a3babSopenharmony_ci subgroupInverseBallot(uvec4(0x1)); 182617a3babSopenharmony_ci subgroupBallotBitExtract(ballot, 0); 183617a3babSopenharmony_ci subgroupBallotBitCount(ballot); 184617a3babSopenharmony_ci subgroupBallotInclusiveBitCount(ballot); 185617a3babSopenharmony_ci subgroupBallotExclusiveBitCount(ballot); 186617a3babSopenharmony_ci subgroupBallotFindLSB(ballot); 187617a3babSopenharmony_ci subgroupBallotFindMSB(ballot); 188617a3babSopenharmony_ci} 189617a3babSopenharmony_ci 190617a3babSopenharmony_ci#extension GL_KHR_shader_subgroup_vote: enable 191617a3babSopenharmony_civoid vote_works(vec4 f4) 192617a3babSopenharmony_ci{ 193617a3babSopenharmony_ci subgroupAll(true); 194617a3babSopenharmony_ci subgroupAny(false); 195617a3babSopenharmony_ci subgroupAllEqual(f4); 196617a3babSopenharmony_ci} 197617a3babSopenharmony_ci 198617a3babSopenharmony_ci#extension GL_KHR_shader_subgroup_shuffle: enable 199617a3babSopenharmony_ci#extension GL_KHR_shader_subgroup_shuffle_relative: enable 200617a3babSopenharmony_civoid shuffle_works(vec4 f4) 201617a3babSopenharmony_ci{ 202617a3babSopenharmony_ci subgroupShuffle(f4, 0); 203617a3babSopenharmony_ci subgroupShuffleXor(f4, 0x1); 204617a3babSopenharmony_ci subgroupShuffleUp(f4, 1); 205617a3babSopenharmony_ci subgroupShuffleDown(f4, 1); 206617a3babSopenharmony_ci} 207617a3babSopenharmony_ci 208617a3babSopenharmony_ci#extension GL_KHR_shader_subgroup_arithmetic: enable 209617a3babSopenharmony_civoid arith_works(vec4 f4) 210617a3babSopenharmony_ci{ 211617a3babSopenharmony_ci uvec4 ballot; 212617a3babSopenharmony_ci subgroupAdd(f4); 213617a3babSopenharmony_ci subgroupMul(f4); 214617a3babSopenharmony_ci subgroupMin(f4); 215617a3babSopenharmony_ci subgroupMax(f4); 216617a3babSopenharmony_ci subgroupAnd(ballot); 217617a3babSopenharmony_ci subgroupOr(ballot); 218617a3babSopenharmony_ci subgroupXor(ballot); 219617a3babSopenharmony_ci subgroupInclusiveAdd(f4); 220617a3babSopenharmony_ci subgroupInclusiveMul(f4); 221617a3babSopenharmony_ci subgroupInclusiveMin(f4); 222617a3babSopenharmony_ci subgroupInclusiveMax(f4); 223617a3babSopenharmony_ci subgroupInclusiveAnd(ballot); 224617a3babSopenharmony_ci subgroupInclusiveOr(ballot); 225617a3babSopenharmony_ci subgroupInclusiveXor(ballot); 226617a3babSopenharmony_ci subgroupExclusiveAdd(f4); 227617a3babSopenharmony_ci subgroupExclusiveMul(f4); 228617a3babSopenharmony_ci subgroupExclusiveMin(f4); 229617a3babSopenharmony_ci subgroupExclusiveMax(f4); 230617a3babSopenharmony_ci subgroupExclusiveAnd(ballot); 231617a3babSopenharmony_ci subgroupExclusiveOr(ballot); 232617a3babSopenharmony_ci subgroupExclusiveXor(ballot); 233617a3babSopenharmony_ci} 234617a3babSopenharmony_ci 235617a3babSopenharmony_ci#extension GL_KHR_shader_subgroup_clustered: enable 236617a3babSopenharmony_civoid clustered_works(vec4 f4) 237617a3babSopenharmony_ci{ 238617a3babSopenharmony_ci uvec4 ballot = uvec4(0x55,0,0,0); 239617a3babSopenharmony_ci subgroupClusteredAdd(f4, 2); 240617a3babSopenharmony_ci subgroupClusteredMul(f4, 2); 241617a3babSopenharmony_ci subgroupClusteredMin(f4, 2); 242617a3babSopenharmony_ci subgroupClusteredMax(f4, 2); 243617a3babSopenharmony_ci subgroupClusteredAnd(ballot, 2); 244617a3babSopenharmony_ci subgroupClusteredOr(ballot, 2); 245617a3babSopenharmony_ci subgroupClusteredXor(ballot, 2); 246617a3babSopenharmony_ci} 247617a3babSopenharmony_ci 248617a3babSopenharmony_ci#extension GL_KHR_shader_subgroup_quad: enable 249617a3babSopenharmony_civoid quad_works(vec4 f4) 250617a3babSopenharmony_ci{ 251617a3babSopenharmony_ci subgroupQuadBroadcast(f4, 0); 252617a3babSopenharmony_ci subgroupQuadSwapHorizontal(f4); 253617a3babSopenharmony_ci subgroupQuadSwapVertical(f4); 254617a3babSopenharmony_ci subgroupQuadSwapDiagonal(f4); 255617a3babSopenharmony_ci} 256617a3babSopenharmony_ci 257617a3babSopenharmony_ci#extension GL_NV_shader_subgroup_partitioned: enable 258617a3babSopenharmony_civoid partitioned_works(vec4 f4) 259617a3babSopenharmony_ci{ 260617a3babSopenharmony_ci uvec4 parti = subgroupPartitionNV(f4); 261617a3babSopenharmony_ci uvec4 ballot = uvec4(0x55,0,0,0); 262617a3babSopenharmony_ci subgroupPartitionedAddNV(f4, parti); 263617a3babSopenharmony_ci subgroupPartitionedMulNV(f4, parti); 264617a3babSopenharmony_ci subgroupPartitionedMinNV(f4, parti); 265617a3babSopenharmony_ci subgroupPartitionedMaxNV(f4, parti); 266617a3babSopenharmony_ci subgroupPartitionedAndNV(ballot, parti); 267617a3babSopenharmony_ci subgroupPartitionedOrNV(ballot, parti); 268617a3babSopenharmony_ci subgroupPartitionedXorNV(ballot, parti); 269617a3babSopenharmony_ci subgroupPartitionedInclusiveAddNV(f4, parti); 270617a3babSopenharmony_ci subgroupPartitionedInclusiveMulNV(f4, parti); 271617a3babSopenharmony_ci subgroupPartitionedInclusiveMinNV(f4, parti); 272617a3babSopenharmony_ci subgroupPartitionedInclusiveMaxNV(f4, parti); 273617a3babSopenharmony_ci subgroupPartitionedInclusiveAndNV(ballot, parti); 274617a3babSopenharmony_ci subgroupPartitionedInclusiveOrNV(ballot, parti); 275617a3babSopenharmony_ci subgroupPartitionedInclusiveXorNV(ballot, parti); 276617a3babSopenharmony_ci subgroupPartitionedExclusiveAddNV(f4, parti); 277617a3babSopenharmony_ci subgroupPartitionedExclusiveMulNV(f4, parti); 278617a3babSopenharmony_ci subgroupPartitionedExclusiveMinNV(f4, parti); 279617a3babSopenharmony_ci subgroupPartitionedExclusiveMaxNV(f4, parti); 280617a3babSopenharmony_ci subgroupPartitionedExclusiveAndNV(ballot, parti); 281617a3babSopenharmony_ci subgroupPartitionedExclusiveOrNV(ballot, parti); 282617a3babSopenharmony_ci subgroupPartitionedExclusiveXorNV(ballot, parti); 283617a3babSopenharmony_ci} 284617a3babSopenharmony_ci 285617a3babSopenharmony_ci// tests for NV_shader_sm_builtins 286617a3babSopenharmony_civoid sm_builtins_err() 287617a3babSopenharmony_ci{ 288617a3babSopenharmony_ci gl_WarpsPerSMNV; // ERROR, no extension 289617a3babSopenharmony_ci gl_SMCountNV; // ERROR, no extension 290617a3babSopenharmony_ci gl_WarpIDNV; // ERROR, no extension 291617a3babSopenharmony_ci gl_SMIDNV; // ERROR, no extension 292617a3babSopenharmony_ci} 293617a3babSopenharmony_ci 294617a3babSopenharmony_ci#ifdef GL_NV_shader_sm_builtins 295617a3babSopenharmony_ci#extension GL_NV_shader_sm_builtins : enable 296617a3babSopenharmony_ci#endif 297617a3babSopenharmony_ci 298617a3babSopenharmony_civoid sm_builtins() 299617a3babSopenharmony_ci{ 300617a3babSopenharmony_ci gl_WarpsPerSMNV; 301617a3babSopenharmony_ci gl_SMCountNV; 302617a3babSopenharmony_ci gl_WarpIDNV; 303617a3babSopenharmony_ci gl_SMIDNV; 304617a3babSopenharmony_ci} 305