1/*
2 * Copyright (C) 2022 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "bi_test.h"
25#include "bi_builder.h"
26#include "va_compiler.h"
27#include "valhall_enums.h"
28
29#include <gtest/gtest.h>
30
31static void
32strip_nops(bi_context *ctx)
33{
34   bi_foreach_instr_global_safe(ctx, I) {
35      if (I->op == BI_OPCODE_NOP)
36         bi_remove_instruction(I);
37   }
38}
39
40#define CASE(shader_stage, test) do { \
41   bi_builder *A = bit_builder(mem_ctx); \
42   bi_builder *B = bit_builder(mem_ctx); \
43   { \
44      UNUSED bi_builder *b = A; \
45      A->shader->stage = MESA_SHADER_ ## shader_stage; \
46      test; \
47   } \
48   strip_nops(A->shader); \
49   va_insert_flow_control_nops(A->shader); \
50   { \
51      UNUSED bi_builder *b = B; \
52      B->shader->stage = MESA_SHADER_ ## shader_stage; \
53      test; \
54   } \
55   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
56} while(0)
57
58#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
59
60class InsertFlow : public testing::Test {
61protected:
62   InsertFlow() {
63      mem_ctx = ralloc_context(NULL);
64   }
65
66   ~InsertFlow() {
67      ralloc_free(mem_ctx);
68   }
69
70   void *mem_ctx;
71};
72
73TEST_F(InsertFlow, PreserveEmptyShader) {
74   CASE(FRAGMENT, {});
75}
76
77TEST_F(InsertFlow, TilebufferWait7) {
78   CASE(FRAGMENT, {
79        flow(DISCARD);
80        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
81        flow(WAIT);
82        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
83                    bi_register(6), bi_register(7), bi_register(8),
84                    BI_REGISTER_FORMAT_AUTO, 4, 4);
85        flow(END);
86   });
87
88   CASE(FRAGMENT, {
89        flow(DISCARD);
90        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
91        flow(WAIT);
92        bi_st_tile(b, bi_register(0), bi_register(4), bi_register(5),
93                    bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
94        flow(END);
95   });
96
97   CASE(FRAGMENT, {
98        flow(DISCARD);
99        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
100        flow(WAIT);
101        bi_ld_tile_to(b, bi_register(0), bi_register(4), bi_register(5),
102                    bi_register(6), BI_REGISTER_FORMAT_AUTO, BI_VECSIZE_V4);
103        flow(END);
104   });
105}
106
107TEST_F(InsertFlow, AtestWait6AndWait0After) {
108   CASE(FRAGMENT, {
109        flow(DISCARD);
110        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
111        flow(WAIT0126);
112        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5));
113        flow(WAIT0);
114        flow(END);
115   });
116}
117
118TEST_F(InsertFlow, ZSEmitWait6) {
119   CASE(FRAGMENT, {
120        flow(DISCARD);
121        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
122        flow(WAIT0126);
123        bi_zs_emit_to(b, bi_register(0), bi_register(4), bi_register(5),
124                      bi_register(6), true, true);
125        flow(END);
126   });
127}
128
129TEST_F(InsertFlow, LoadThenUnrelatedThenUse) {
130   CASE(VERTEX, {
131         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
132                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
133         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
134         flow(WAIT0);
135         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
136         flow(END);
137   });
138}
139
140TEST_F(InsertFlow, SingleLdVar) {
141   CASE(FRAGMENT, {
142         flow(DISCARD);
143         bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
144                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
145                                 BI_SOURCE_FORMAT_F16,
146                                 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
147         flow(WAIT0);
148         flow(END);
149   });
150}
151
152TEST_F(InsertFlow, SerializeLdVars) {
153   CASE(FRAGMENT, {
154         flow(DISCARD);
155         bi_ld_var_buf_imm_f16_to(b, bi_register(16), bi_register(61),
156                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
157                                 BI_SOURCE_FORMAT_F16,
158                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 0);
159         bi_ld_var_buf_imm_f16_to(b, bi_register(2), bi_register(61),
160                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
161                                 BI_SOURCE_FORMAT_F16,
162                                 BI_UPDATE_RETRIEVE, BI_VECSIZE_V4, 0);
163         flow(WAIT0);
164         bi_ld_var_buf_imm_f16_to(b, bi_register(8), bi_register(61),
165                                 BI_REGISTER_FORMAT_F16, BI_SAMPLE_CENTER,
166                                 BI_SOURCE_FORMAT_F16,
167                                 BI_UPDATE_STORE, BI_VECSIZE_V4, 1);
168         flow(WAIT0);
169         flow(END);
170   });
171}
172
173TEST_F(InsertFlow, Clper) {
174   CASE(FRAGMENT, {
175         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
176         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
177                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
178                         BI_SUBGROUP_SUBGROUP4);
179         flow(DISCARD);
180         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
181         flow(END);
182   });
183}
184
185TEST_F(InsertFlow, TextureImplicit) {
186   CASE(FRAGMENT, {
187         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
188         bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
189                          bi_register(12), false, BI_DIMENSION_2D,
190                          BI_REGISTER_FORMAT_F32, false, false,
191                          BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
192         flow(DISCARD);
193         flow(WAIT0);
194         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
195         flow(END);
196   });
197}
198
199TEST_F(InsertFlow, TextureExplicit) {
200   CASE(FRAGMENT, {
201         flow(DISCARD);
202         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
203         bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
204                          bi_register(12), false, BI_DIMENSION_2D,
205                          BI_REGISTER_FORMAT_F32, false, false,
206                          BI_VA_LOD_MODE_ZERO_LOD, BI_WRITE_MASK_RGBA, 4);
207         flow(WAIT0);
208         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
209         flow(END);
210   });
211}
212
213/*      A
214 *     / \
215 *    B   C
216 *     \ /
217 *      D
218 */
219TEST_F(InsertFlow, DiamondCFG) {
220   CASE(FRAGMENT, {
221         bi_block *A = bi_start_block(&b->shader->blocks);
222         bi_block *B = bit_block(b->shader);
223         bi_block *C = bit_block(b->shader);
224         bi_block *D = bit_block(b->shader);
225
226         bi_block_add_successor(A, B);
227         bi_block_add_successor(A, C);
228
229         bi_block_add_successor(B, D);
230         bi_block_add_successor(C, D);
231
232         /* B uses helper invocations, no other block does.
233          *
234          * That means B and C need to discard helpers.
235          */
236         b->cursor = bi_after_block(B);
237         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
238               BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
239               BI_SUBGROUP_SUBGROUP4);
240         flow(DISCARD);
241         flow(RECONVERGE);
242
243         b->cursor = bi_after_block(C);
244         flow(DISCARD);
245         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
246         flow(RECONVERGE);
247
248         b->cursor = bi_after_block(D);
249         flow(END);
250   });
251}
252
253TEST_F(InsertFlow, BarrierBug) {
254   CASE(KERNEL, {
255         bi_instr *I = bi_store_i32(b, bi_register(0), bi_register(2), bi_register(4), BI_SEG_NONE, 0);
256         I->slot = 2;
257
258         bi_fadd_f32_to(b, bi_register(10), bi_register(10), bi_register(10));
259         flow(WAIT2);
260         bi_barrier(b);
261         flow(WAIT);
262         flow(END);
263   });
264}
265