1617a3babSopenharmony_ci#version 450 core 2617a3babSopenharmony_ci#extension GL_KHR_memory_scope_semantics : enable 3617a3babSopenharmony_ci#extension GL_NV_cooperative_matrix : enable 4617a3babSopenharmony_ci#extension GL_NV_integer_cooperative_matrix : enable 5617a3babSopenharmony_ci#extension GL_EXT_shader_explicit_arithmetic_types : enable 6617a3babSopenharmony_ci#extension GL_EXT_buffer_reference : enable 7617a3babSopenharmony_ci 8617a3babSopenharmony_cilayout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 9617a3babSopenharmony_ci 10617a3babSopenharmony_ciconst int X = 8; 11617a3babSopenharmony_cilayout(constant_id = 0) const int Y = 2; 12617a3babSopenharmony_ciconst int Z = X*Y; 13617a3babSopenharmony_ci 14617a3babSopenharmony_ciicoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC; 15617a3babSopenharmony_ciicoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3]; 16617a3babSopenharmony_ciucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC; 17617a3babSopenharmony_ciucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3]; 18617a3babSopenharmony_ci 19617a3babSopenharmony_ciint iarr[miC.length()]; 20617a3babSopenharmony_ciint iarr2[miC2[1].length()]; 21617a3babSopenharmony_ciint uarr[muC.length()]; 22617a3babSopenharmony_ciint uarr2[muC2[1].length()]; 23617a3babSopenharmony_ci 24617a3babSopenharmony_ciconst icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1); 25617a3babSopenharmony_ciconst ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1); 26617a3babSopenharmony_ci 27617a3babSopenharmony_cistruct S { int a; int b; int c; }; 28617a3babSopenharmony_ci 29617a3babSopenharmony_ciconst S s = S(12, 23, 34); 30617a3babSopenharmony_ci 31617a3babSopenharmony_cilayout(set = 0, binding = 0, buffer_reference) coherent buffer Block { 32617a3babSopenharmony_ci uint y[1024*1024]; 33617a3babSopenharmony_ci uint x[]; 34617a3babSopenharmony_ci} block; 35617a3babSopenharmony_ci 36617a3babSopenharmony_cilayout(set = 0, binding = 0) coherent buffer Block16 { 37617a3babSopenharmony_ci int8_t y[1024*1024]; 38617a3babSopenharmony_ci int8_t x[]; 39617a3babSopenharmony_ci 40617a3babSopenharmony_ci Block b; 41617a3babSopenharmony_ci} block8; 42617a3babSopenharmony_ci 43617a3babSopenharmony_ciicoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; } 44617a3babSopenharmony_ciucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); } 45617a3babSopenharmony_ci 46617a3babSopenharmony_cilayout(constant_id = 2) const int SC = 1; 47617a3babSopenharmony_ciucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC]; 48617a3babSopenharmony_ci 49617a3babSopenharmony_ci// sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16> 50617a3babSopenharmony_cishared uvec4 shmatrix[16*16*2/16]; 51617a3babSopenharmony_ci 52617a3babSopenharmony_civoid main() 53617a3babSopenharmony_ci{ 54617a3babSopenharmony_ci ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2); 55617a3babSopenharmony_ci icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2); 56617a3babSopenharmony_ci 57617a3babSopenharmony_ci mu = mu + mu; 58617a3babSopenharmony_ci mu = mu - mu; 59617a3babSopenharmony_ci mi = -mi; 60617a3babSopenharmony_ci mi = mi * int8_t(2); 61617a3babSopenharmony_ci 62617a3babSopenharmony_ci fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu); 63617a3babSopenharmony_ci fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu); 64617a3babSopenharmony_ci fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi); 65617a3babSopenharmony_ci fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi); 66617a3babSopenharmony_ci 67617a3babSopenharmony_ci uint8_t x = mu[1]; 68617a3babSopenharmony_ci mi[0] = int8_t(x); 69617a3babSopenharmony_ci 70617a3babSopenharmony_ci coopMatLoadNV(mi, block.x, 16, 128, false); 71617a3babSopenharmony_ci coopMatStoreNV(mi, block.x, 16, 128, false); 72617a3babSopenharmony_ci coopMatLoadNV(mu, block8.x, 16, 128, false); 73617a3babSopenharmony_ci coopMatStoreNV(mu, block8.x, 16, 128, false); 74617a3babSopenharmony_ci coopMatLoadNV(mi, block8.b.x, 16, 128, false); 75617a3babSopenharmony_ci coopMatStoreNV(mi, block8.b.x, 16, 128, false); 76617a3babSopenharmony_ci 77617a3babSopenharmony_ci ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A; 78617a3babSopenharmony_ci ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B; 79617a3babSopenharmony_ci ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C; 80617a3babSopenharmony_ci ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D; 81617a3babSopenharmony_ci D = coopMatMulAddNV(A, B, C); 82617a3babSopenharmony_ci 83617a3babSopenharmony_ci int l = D.length(); 84617a3babSopenharmony_ci 85617a3babSopenharmony_ci 86617a3babSopenharmony_ci icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5]; 87617a3babSopenharmony_ci a[3][0] = int8_t(1); 88617a3babSopenharmony_ci 89617a3babSopenharmony_ci int md1 = mD[1]; 90617a3babSopenharmony_ci 91617a3babSopenharmony_ci md1 += (mi += mi)[1234]; 92617a3babSopenharmony_ci 93617a3babSopenharmony_ci muC2[0] = muC2[1]; 94617a3babSopenharmony_ci muC2[1][0] = (miC2[2][0]); 95617a3babSopenharmony_ci 96617a3babSopenharmony_ci coopMatLoadNV(mi, block.y, 16, 128, false); 97617a3babSopenharmony_ci coopMatStoreNV(mi, block.y, 16, 128, false); 98617a3babSopenharmony_ci coopMatLoadNV(mu, block8.y, 16, 128, false); 99617a3babSopenharmony_ci coopMatStoreNV(mu, block8.y, 16, 128, false); 100617a3babSopenharmony_ci 101617a3babSopenharmony_ci icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1; 102617a3babSopenharmony_ci ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2; 103617a3babSopenharmony_ci 104617a3babSopenharmony_ci p1 = ineg(p1); 105617a3babSopenharmony_ci p2 = umul(p2); 106617a3babSopenharmony_ci 107617a3babSopenharmony_ci p1 /= p1; 108617a3babSopenharmony_ci p2 /= p2; 109617a3babSopenharmony_ci 110617a3babSopenharmony_ci p1 *= int8_t(2); 111617a3babSopenharmony_ci p2 *= uint8_t(4); 112617a3babSopenharmony_ci 113617a3babSopenharmony_ci icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms; 114617a3babSopenharmony_ci coopMatLoadNV(ms, shmatrix, 1, 2, false); 115617a3babSopenharmony_ci coopMatStoreNV(ms, shmatrix, 1, 2, false); 116617a3babSopenharmony_ci 117617a3babSopenharmony_ci icoopmatNV<16, gl_ScopeSubgroup, 16, 8> i16 = icoopmatNV<16, gl_ScopeSubgroup, 16, 8>(0); 118617a3babSopenharmony_ci ucoopmatNV<16, gl_ScopeSubgroup, 16, 8> u16 = ucoopmatNV<16, gl_ScopeSubgroup, 16, 8>(0); 119617a3babSopenharmony_ci coopMatLoadNV(i16, shmatrix, 1, 2, false); 120617a3babSopenharmony_ci coopMatStoreNV(i16, shmatrix, 1, 2, false); 121617a3babSopenharmony_ci coopMatLoadNV(u16, shmatrix, 1, 2, false); 122617a3babSopenharmony_ci coopMatStoreNV(u16, shmatrix, 1, 2, false); 123617a3babSopenharmony_ci} 124