1617a3babSopenharmony_ci#version 450 core
2617a3babSopenharmony_ci#extension GL_KHR_memory_scope_semantics : enable
3617a3babSopenharmony_ci#extension GL_NV_cooperative_matrix : enable
4617a3babSopenharmony_ci#extension GL_NV_integer_cooperative_matrix : enable
5617a3babSopenharmony_ci#extension GL_EXT_shader_explicit_arithmetic_types : enable
6617a3babSopenharmony_ci#extension GL_EXT_buffer_reference : enable
7617a3babSopenharmony_ci
8617a3babSopenharmony_cilayout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
9617a3babSopenharmony_ci
10617a3babSopenharmony_ciconst int X = 8;
11617a3babSopenharmony_cilayout(constant_id = 0) const int Y = 2;
12617a3babSopenharmony_ciconst int Z = X*Y;
13617a3babSopenharmony_ci
14617a3babSopenharmony_ciicoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC;
15617a3babSopenharmony_ciicoopmatNV<8, gl_ScopeSubgroup, Z, 8> miC2[3];
16617a3babSopenharmony_ciucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC;
17617a3babSopenharmony_ciucoopmatNV<8, gl_ScopeSubgroup, Z, 8> muC2[3];
18617a3babSopenharmony_ci
19617a3babSopenharmony_ciint iarr[miC.length()];
20617a3babSopenharmony_ciint iarr2[miC2[1].length()];
21617a3babSopenharmony_ciint uarr[muC.length()];
22617a3babSopenharmony_ciint uarr2[muC2[1].length()];
23617a3babSopenharmony_ci
24617a3babSopenharmony_ciconst icoopmatNV<32, gl_ScopeSubgroup, Z, 8> mD = icoopmatNV<32, gl_ScopeSubgroup, Z, 8>(1);
25617a3babSopenharmony_ciconst ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> mD2 = ucoopmatNV<8, gl_ScopeSubgroup, 8, 8>(1);
26617a3babSopenharmony_ci
27617a3babSopenharmony_cistruct S { int a; int b; int c; };
28617a3babSopenharmony_ci
29617a3babSopenharmony_ciconst S s = S(12, 23, 34);
30617a3babSopenharmony_ci
31617a3babSopenharmony_cilayout(set = 0, binding = 0, buffer_reference) coherent buffer Block {
32617a3babSopenharmony_ci    uint y[1024*1024];
33617a3babSopenharmony_ci    uint x[];
34617a3babSopenharmony_ci} block;
35617a3babSopenharmony_ci
36617a3babSopenharmony_cilayout(set = 0, binding = 0) coherent buffer Block16 {
37617a3babSopenharmony_ci    int8_t y[1024*1024];
38617a3babSopenharmony_ci    int8_t x[];
39617a3babSopenharmony_ci
40617a3babSopenharmony_ci    Block b;
41617a3babSopenharmony_ci} block8;
42617a3babSopenharmony_ci
43617a3babSopenharmony_ciicoopmatNV<8, gl_ScopeSubgroup, 8, 8> ineg(icoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return -m; }
44617a3babSopenharmony_ciucoopmatNV<8, gl_ScopeSubgroup, 8, 8> umul(ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> m) { return m * uint8_t(2); }
45617a3babSopenharmony_ci
46617a3babSopenharmony_cilayout(constant_id = 2) const int SC = 1;
47617a3babSopenharmony_ciucoopmatNV<32, gl_ScopeSubgroup, SC, SC> scm[SC][SC];
48617a3babSopenharmony_ci
49617a3babSopenharmony_ci// sized for icoopmatNV<8, gl_ScopeSubgroup, 16, 16>
50617a3babSopenharmony_cishared uvec4 shmatrix[16*16*2/16];
51617a3babSopenharmony_ci
52617a3babSopenharmony_civoid main()
53617a3babSopenharmony_ci{
54617a3babSopenharmony_ci    ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mu = ucoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
55617a3babSopenharmony_ci    icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> mi = icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)>(2);
56617a3babSopenharmony_ci
57617a3babSopenharmony_ci    mu = mu + mu;
58617a3babSopenharmony_ci    mu = mu - mu;
59617a3babSopenharmony_ci    mi = -mi;
60617a3babSopenharmony_ci    mi = mi * int8_t(2);
61617a3babSopenharmony_ci
62617a3babSopenharmony_ci    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_0 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mu);
63617a3babSopenharmony_ci    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_0 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mu);
64617a3babSopenharmony_ci    fcoopmatNV<16, gl_ScopeSubgroup, 16, 8> mf16_1 = fcoopmatNV<16, gl_ScopeSubgroup, 16, 8>(mi);
65617a3babSopenharmony_ci    fcoopmatNV<32, gl_ScopeSubgroup, 16, 8> mf32_1 = fcoopmatNV<32, gl_ScopeSubgroup, 16, 8>(mi);
66617a3babSopenharmony_ci
67617a3babSopenharmony_ci    uint8_t x = mu[1];
68617a3babSopenharmony_ci    mi[0] = int8_t(x);
69617a3babSopenharmony_ci
70617a3babSopenharmony_ci    coopMatLoadNV(mi, block.x, 16, 128, false);
71617a3babSopenharmony_ci    coopMatStoreNV(mi, block.x, 16, 128, false);
72617a3babSopenharmony_ci    coopMatLoadNV(mu, block8.x, 16, 128, false);
73617a3babSopenharmony_ci    coopMatStoreNV(mu, block8.x, 16, 128, false);
74617a3babSopenharmony_ci    coopMatLoadNV(mi, block8.b.x, 16, 128, false);
75617a3babSopenharmony_ci    coopMatStoreNV(mi, block8.b.x, 16, 128, false);
76617a3babSopenharmony_ci
77617a3babSopenharmony_ci    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> A;
78617a3babSopenharmony_ci    ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> B;
79617a3babSopenharmony_ci    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> C;
80617a3babSopenharmony_ci    ucoopmatNV<8, gl_ScopeSubgroup, 16, 8> D;
81617a3babSopenharmony_ci    D = coopMatMulAddNV(A, B, C);
82617a3babSopenharmony_ci
83617a3babSopenharmony_ci    int l = D.length();
84617a3babSopenharmony_ci
85617a3babSopenharmony_ci
86617a3babSopenharmony_ci    icoopmatNV<8, gl_ScopeSubgroup, 16, (2>1?8:4)> a[5];
87617a3babSopenharmony_ci    a[3][0] = int8_t(1);
88617a3babSopenharmony_ci
89617a3babSopenharmony_ci    int md1 = mD[1];
90617a3babSopenharmony_ci
91617a3babSopenharmony_ci    md1 += (mi += mi)[1234];
92617a3babSopenharmony_ci
93617a3babSopenharmony_ci    muC2[0] = muC2[1];
94617a3babSopenharmony_ci    muC2[1][0] = (miC2[2][0]);
95617a3babSopenharmony_ci
96617a3babSopenharmony_ci    coopMatLoadNV(mi, block.y, 16, 128, false);
97617a3babSopenharmony_ci    coopMatStoreNV(mi, block.y, 16, 128, false);
98617a3babSopenharmony_ci    coopMatLoadNV(mu, block8.y, 16, 128, false);
99617a3babSopenharmony_ci    coopMatStoreNV(mu, block8.y, 16, 128, false);
100617a3babSopenharmony_ci
101617a3babSopenharmony_ci    icoopmatNV<8, gl_ScopeSubgroup, 8, 8> p1;
102617a3babSopenharmony_ci    ucoopmatNV<8, gl_ScopeSubgroup, 8, 8> p2;
103617a3babSopenharmony_ci
104617a3babSopenharmony_ci    p1 = ineg(p1);
105617a3babSopenharmony_ci    p2 = umul(p2);
106617a3babSopenharmony_ci
107617a3babSopenharmony_ci    p1 /= p1;
108617a3babSopenharmony_ci    p2 /= p2;
109617a3babSopenharmony_ci
110617a3babSopenharmony_ci    p1 *= int8_t(2);
111617a3babSopenharmony_ci    p2 *= uint8_t(4);
112617a3babSopenharmony_ci
113617a3babSopenharmony_ci    icoopmatNV<8, gl_ScopeSubgroup, 16, 8> ms;
114617a3babSopenharmony_ci    coopMatLoadNV(ms, shmatrix, 1, 2, false);
115617a3babSopenharmony_ci    coopMatStoreNV(ms, shmatrix, 1, 2, false);
116617a3babSopenharmony_ci
117617a3babSopenharmony_ci    icoopmatNV<16, gl_ScopeSubgroup, 16, 8> i16 = icoopmatNV<16, gl_ScopeSubgroup, 16, 8>(0);
118617a3babSopenharmony_ci    ucoopmatNV<16, gl_ScopeSubgroup, 16, 8> u16 = ucoopmatNV<16, gl_ScopeSubgroup, 16, 8>(0);
119617a3babSopenharmony_ci    coopMatLoadNV(i16, shmatrix, 1, 2, false);
120617a3babSopenharmony_ci    coopMatStoreNV(i16, shmatrix, 1, 2, false);
121617a3babSopenharmony_ci    coopMatLoadNV(u16, shmatrix, 1, 2, false);
122617a3babSopenharmony_ci    coopMatStoreNV(u16, shmatrix, 1, 2, false);
123617a3babSopenharmony_ci}
124