1617a3babSopenharmony_ci#version 450 core
2617a3babSopenharmony_ci#extension GL_KHR_memory_scope_semantics : enable
3617a3babSopenharmony_ci#extension GL_KHR_cooperative_matrix : enable
4617a3babSopenharmony_ci#extension GL_EXT_shader_explicit_arithmetic_types : enable
5617a3babSopenharmony_ci#extension GL_EXT_buffer_reference : enable
6617a3babSopenharmony_ci
7617a3babSopenharmony_cilayout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
8617a3babSopenharmony_ci
9617a3babSopenharmony_ciconst int X = 8;
10617a3babSopenharmony_cilayout(constant_id = 0) const int Y = 2;
11617a3babSopenharmony_ciconst int Z = X*Y;
12617a3babSopenharmony_ci
13617a3babSopenharmony_cicoopmat<float16_t, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mC;
14617a3babSopenharmony_cicoopmat<float16_t, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mC2[3];
15617a3babSopenharmony_ci
16617a3babSopenharmony_cilayout(constant_id = 1) const float F = 3.0;
17617a3babSopenharmony_ci
18617a3babSopenharmony_ciconst coopmat<float, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator> mD = coopmat<float, gl_ScopeSubgroup, Z, 8, gl_MatrixUseAccumulator>(0.0);
19617a3babSopenharmony_ciconst coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> mD2 = coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(1);
20617a3babSopenharmony_ci
21617a3babSopenharmony_cistruct S { int a; int b; int c; };
22617a3babSopenharmony_ci
23617a3babSopenharmony_ciconst S s = S(12, 23, 34);
24617a3babSopenharmony_ci
25617a3babSopenharmony_cilayout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
26617a3babSopenharmony_ci    float y[1024*1024];
27617a3babSopenharmony_ci    float x[];
28617a3babSopenharmony_ci} block;
29617a3babSopenharmony_ci
30617a3babSopenharmony_cilayout(set = 0, binding = 0) coherent buffer Block16 {
31617a3babSopenharmony_ci    float16_t y[1024*1024];
32617a3babSopenharmony_ci    float16_t x[];
33617a3babSopenharmony_ci
34617a3babSopenharmony_ci    Block b;
35617a3babSopenharmony_ci} block16;
36617a3babSopenharmony_ci
37617a3babSopenharmony_cicoopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> f16(coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> m) { return -m; }
38617a3babSopenharmony_cicoopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> f32(coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> m) { return -m; }
39617a3babSopenharmony_ci
40617a3babSopenharmony_cilayout(constant_id = 2) const int SC = 1;
41617a3babSopenharmony_cicoopmat<float16_t, gl_ScopeSubgroup, SC, SC, gl_MatrixUseAccumulator> scm[SC][SC];
42617a3babSopenharmony_ci
43617a3babSopenharmony_ci// sized for coopmat<float16_t, gl_ScopeSubgroup, 16, 16, gl_MatrixUseAccumulator>
44617a3babSopenharmony_cishared uvec4 shmatrix[16*16*2/16];
45617a3babSopenharmony_ci
46617a3babSopenharmony_civoid main()
47617a3babSopenharmony_ci{
48617a3babSopenharmony_ci    coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator> m = coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator>(0.0);
49617a3babSopenharmony_ci
50617a3babSopenharmony_ci    m = m + m;
51617a3babSopenharmony_ci    m = m - m;
52617a3babSopenharmony_ci    m = -m;
53617a3babSopenharmony_ci    m = 2.0*m;
54617a3babSopenharmony_ci    m = m*2.0;
55617a3babSopenharmony_ci
56617a3babSopenharmony_ci    coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> m2 = coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator>(m);
57617a3babSopenharmony_ci
58617a3babSopenharmony_ci    float x = m[1];
59617a3babSopenharmony_ci    m[0] = x;
60617a3babSopenharmony_ci
61617a3babSopenharmony_ci    coopMatLoad(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
62617a3babSopenharmony_ci    coopMatStore(m, block.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
63617a3babSopenharmony_ci    coopMatLoad(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
64617a3babSopenharmony_ci    coopMatStore(m2, block16.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
65617a3babSopenharmony_ci    coopMatLoad(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
66617a3babSopenharmony_ci    coopMatStore(m, block16.b.x, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
67617a3babSopenharmony_ci
68617a3babSopenharmony_ci    coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseA> A;
69617a3babSopenharmony_ci    coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseB> B;
70617a3babSopenharmony_ci    coopmat<float, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> C;
71617a3babSopenharmony_ci    coopmat<float, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> D;
72617a3babSopenharmony_ci    D = coopMatMulAdd(A, B, C);
73617a3babSopenharmony_ci
74617a3babSopenharmony_ci    int l = D.length();
75617a3babSopenharmony_ci
76617a3babSopenharmony_ci    coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> E;
77617a3babSopenharmony_ci
78617a3babSopenharmony_ci    coopmat<float16_t, gl_ScopeSubgroup, Z, Z, gl_MatrixUseAccumulator> F = coopmat<float16_t, gl_ScopeSubgroup, Z, Z, gl_MatrixUseAccumulator>(0.0);
79617a3babSopenharmony_ci
80617a3babSopenharmony_ci    coopmat<float, gl_ScopeSubgroup, 16, (2>1?8:4), gl_MatrixUseAccumulator> a[5];
81617a3babSopenharmony_ci    a[3][0] = 1.0;
82617a3babSopenharmony_ci
83617a3babSopenharmony_ci    float md1 = mD[1];
84617a3babSopenharmony_ci
85617a3babSopenharmony_ci    md1 += (m += m)[1234];
86617a3babSopenharmony_ci
87617a3babSopenharmony_ci    mC2[1] = mC2[2];
88617a3babSopenharmony_ci
89617a3babSopenharmony_ci    coopMatLoad(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
90617a3babSopenharmony_ci    coopMatStore(m, block.y, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
91617a3babSopenharmony_ci    coopMatLoad(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
92617a3babSopenharmony_ci    coopMatStore(m2, block16.y, 16, 128, gl_CooperativeMatrixLayoutRowMajor);
93617a3babSopenharmony_ci
94617a3babSopenharmony_ci    coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> p1;
95617a3babSopenharmony_ci    coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> p2;
96617a3babSopenharmony_ci
97617a3babSopenharmony_ci    p1 = f16(p1);
98617a3babSopenharmony_ci    p2 = f32(p2);
99617a3babSopenharmony_ci
100617a3babSopenharmony_ci    p1 = coopmat<float16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(0.0);
101617a3babSopenharmony_ci    p2 = coopmat<float, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator>(0.0);
102617a3babSopenharmony_ci
103617a3babSopenharmony_ci    p1 /= p1;
104617a3babSopenharmony_ci
105617a3babSopenharmony_ci    p1 *= float16_t(2.0);
106617a3babSopenharmony_ci    p2 *= 4.0;
107617a3babSopenharmony_ci
108617a3babSopenharmony_ci    coopmat<float16_t, gl_ScopeSubgroup, 16, 8, gl_MatrixUseAccumulator> ms;
109617a3babSopenharmony_ci    coopMatLoad(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowMajor);
110617a3babSopenharmony_ci    coopMatStore(ms, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowMajor);
111617a3babSopenharmony_ci
112617a3babSopenharmony_ci    coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseA> ms8A;
113617a3babSopenharmony_ci    coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseB> ms8B;
114617a3babSopenharmony_ci    coopmat<int8_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseAccumulator> ms8C;
115617a3babSopenharmony_ci    coopMatMulAdd(ms8A, ms8B, ms8C);
116617a3babSopenharmony_ci    coopMatMulAdd(ms8A, ms8B, ms8C, 0);
117617a3babSopenharmony_ci    coopMatMulAdd(ms8A, ms8B, ms8C, gl_MatrixOperandsSaturatingAccumulation);
118617a3babSopenharmony_ci
119617a3babSopenharmony_ci    coopmat<int16_t, gl_ScopeSubgroup, 8, 8, gl_MatrixUseA> m16;
120617a3babSopenharmony_ci    coopMatStore(m16, shmatrix, 1, 2, gl_CooperativeMatrixLayoutRowMajor);
121617a3babSopenharmony_ci}
122