1617a3babSopenharmony_ci#version 450 core 2617a3babSopenharmony_ci#extension GL_KHR_memory_scope_semantics : enable 3617a3babSopenharmony_ci#extension GL_KHR_cooperative_matrix : enable 4617a3babSopenharmony_ci#extension GL_EXT_shader_explicit_arithmetic_types : enable 5617a3babSopenharmony_ci#extension GL_EXT_buffer_reference : enable 6617a3babSopenharmony_ci 7617a3babSopenharmony_cilayout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; 8617a3babSopenharmony_ci 9617a3babSopenharmony_ciconst int X = 8; 10617a3babSopenharmony_cilayout(constant_id = 0) const int Y = 2; 11617a3babSopenharmony_ciconst int Z = X*Y; 12617a3babSopenharmony_ci 13617a3babSopenharmony_cicoopmat<float16_t, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mC; 14617a3babSopenharmony_cicoopmat<float16_t, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mC2[3]; 15617a3babSopenharmony_ci 16617a3babSopenharmony_cilayout(constant_id = 1) const float F = 3.0; 17617a3babSopenharmony_ci 18617a3babSopenharmony_ciconst coopmat<float, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mD = coopmat<float, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator>(0.0); 19617a3babSopenharmony_ciconst coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> mD2 = coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(1); 20617a3babSopenharmony_ci 21617a3babSopenharmony_cistruct S { int a; int b; int c; }; 22617a3babSopenharmony_ci 23617a3babSopenharmony_ciconst S s = S(12, 23, 34); 24617a3babSopenharmony_ci 25617a3babSopenharmony_cilayout(set = 0, binding = 0, buffer_reference) coherent buffer Block { 26617a3babSopenharmony_ci float y[1024*1024]; 27617a3babSopenharmony_ci float x[]; 28617a3babSopenharmony_ci} block; 29617a3babSopenharmony_ci 30617a3babSopenharmony_cilayout(set = 0, binding = 0) coherent buffer Block16 { 31617a3babSopenharmony_ci float16_t y[1024*1024]; 32617a3babSopenharmony_ci float16_t x[]; 33617a3babSopenharmony_ci 34617a3babSopenharmony_ci Block b; 35617a3babSopenharmony_ci} block16; 36617a3babSopenharmony_ci 37617a3babSopenharmony_cicoopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> f16(coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> m) { return -m; } 38617a3babSopenharmony_cicoopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> f32(coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> m) { return -m; } 39617a3babSopenharmony_ci 40617a3babSopenharmony_cilayout(constant_id = 2) const int SC = 1; 41617a3babSopenharmony_cicoopmat<float16_t, gl_ScopeSubgroup, SC, SC, gl_MatrixUseAccumulator> scm[SC][SC]; 42617a3babSopenharmony_ci 43617a3babSopenharmony_ci// sized for coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseAccumulator> 44617a3babSopenharmony_cishared uvec4 shmatrix[16*16*2/16]; 45617a3babSopenharmony_ci 46617a3babSopenharmony_civoid main() 47617a3babSopenharmony_ci{ 48617a3babSopenharmony_ci coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator> m = coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator>(0.0); 49617a3babSopenharmony_ci 50617a3babSopenharmony_ci m = m + m; 51617a3babSopenharmony_ci m = m - m; 52617a3babSopenharmony_ci m = -m; 53617a3babSopenharmony_ci m = 2.0*m; 54617a3babSopenharmony_ci m = m*2.0; 55617a3babSopenharmony_ci 56617a3babSopenharmony_ci coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> m2 = coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator>(m); 57617a3babSopenharmony_ci 58617a3babSopenharmony_ci float x = m[1]; 59617a3babSopenharmony_ci m[0] = x; 60617a3babSopenharmony_ci 61617a3babSopenharmony_ci coopMatLoad(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 62617a3babSopenharmony_ci coopMatStore(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 63617a3babSopenharmony_ci coopMatLoad(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 64617a3babSopenharmony_ci coopMatStore(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 65617a3babSopenharmony_ci coopMatLoad(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 66617a3babSopenharmony_ci coopMatStore(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 67617a3babSopenharmony_ci 68617a3babSopenharmony_ci coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseA> A; 69617a3babSopenharmony_ci coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseB> B; 70617a3babSopenharmony_ci coopmat<float, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> C; 71617a3babSopenharmony_ci coopmat<float, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> D; 72617a3babSopenharmony_ci D = coopMatMulAdd(A, B, C); 73617a3babSopenharmony_ci 74617a3babSopenharmony_ci int l = D.length(); 75617a3babSopenharmony_ci 76617a3babSopenharmony_ci coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> E; 77617a3babSopenharmony_ci 78617a3babSopenharmony_ci coopmat<float16_t, gl_ScopeSubgroup, Z, Z, gl_MatrixUseAccumulator> F = coopmat<float16_t, gl_ScopeSubgroup, Z, Z, gl_MatrixUseAccumulator>(0.0); 79617a3babSopenharmony_ci 80617a3babSopenharmony_ci coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator> a[5]; 81617a3babSopenharmony_ci a[3][0] = 1.0; 82617a3babSopenharmony_ci 83617a3babSopenharmony_ci float md1 = mD[1]; 84617a3babSopenharmony_ci 85617a3babSopenharmony_ci md1 += (m += m)[1234]; 86617a3babSopenharmony_ci 87617a3babSopenharmony_ci mC2[1] = mC2[2]; 88617a3babSopenharmony_ci 89617a3babSopenharmony_ci coopMatLoad(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 90617a3babSopenharmony_ci coopMatStore(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 91617a3babSopenharmony_ci coopMatLoad(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 92617a3babSopenharmony_ci coopMatStore(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowMajor); 93617a3babSopenharmony_ci 94617a3babSopenharmony_ci coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> p1; 95617a3babSopenharmony_ci coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> p2; 96617a3babSopenharmony_ci 97617a3babSopenharmony_ci p1 = f16(p1); 98617a3babSopenharmony_ci p2 = f32(p2); 99617a3babSopenharmony_ci 100617a3babSopenharmony_ci p1 = coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(0.0); 101617a3babSopenharmony_ci p2 = coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(0.0); 102617a3babSopenharmony_ci 103617a3babSopenharmony_ci p1 /= p1; 104617a3babSopenharmony_ci 105617a3babSopenharmony_ci p1 *= float16_t(2.0); 106617a3babSopenharmony_ci p2 *= 4.0; 107617a3babSopenharmony_ci 108617a3babSopenharmony_ci coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> ms; 109617a3babSopenharmony_ci coopMatLoad(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowMajor); 110617a3babSopenharmony_ci coopMatStore(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowMajor); 111617a3babSopenharmony_ci 112617a3babSopenharmony_ci coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseA> ms8A; 113617a3babSopenharmony_ci coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseB> ms8B; 114617a3babSopenharmony_ci coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> ms8C; 115617a3babSopenharmony_ci coopMatMulAdd(ms8A, ms8B, ms8C); 116617a3babSopenharmony_ci coopMatMulAdd(ms8A, ms8B, ms8C, 0); 117617a3babSopenharmony_ci coopMatMulAdd(ms8A, ms8B, ms8C, gl_MatrixOperandsSaturatingAccumulation); 118617a3babSopenharmony_ci 119617a3babSopenharmony_ci coopmat<int16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseA> m16; 120617a3babSopenharmony_ci coopMatStore(m16, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowMajor); 121617a3babSopenharmony_ci} 122