1/*
2 * Copyright (C) 2022 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "bi_test.h"
25#include "bi_builder.h"
26#include "va_compiler.h"
27#include "valhall_enums.h"
28
29#include <gtest/gtest.h>
30
31#define CASE(test, expected) do { \
32   bi_builder *A = bit_builder(mem_ctx); \
33   bi_builder *B = bit_builder(mem_ctx); \
34   { \
35      bi_builder *b = A; \
36      A->shader->stage = MESA_SHADER_FRAGMENT; \
37      test; \
38   } \
39   va_merge_flow(A->shader); \
40   { \
41      bi_builder *b = B; \
42      B->shader->stage = MESA_SHADER_FRAGMENT; \
43      expected; \
44   } \
45   ASSERT_SHADER_EQUAL(A->shader, B->shader); \
46} while(0)
47
48#define NEGCASE(test) CASE(test, test)
49
50#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f
51
52class MergeFlow : public testing::Test {
53protected:
54   MergeFlow() {
55      mem_ctx = ralloc_context(NULL);
56   }
57
58   ~MergeFlow() {
59      ralloc_free(mem_ctx);
60   }
61
62   void *mem_ctx;
63   bi_instr *I;
64};
65
66TEST_F(MergeFlow, End) {
67   CASE({
68        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
69        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
70                    bi_register(6), bi_register(7), bi_register(8),
71                    BI_REGISTER_FORMAT_AUTO, 4, 4);
72        flow(END);
73   },
74   {
75        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
76        I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
77                        bi_register(6), bi_register(7), bi_register(8),
78                        BI_REGISTER_FORMAT_AUTO, 4, 4);
79        I->flow = VA_FLOW_END;
80   });
81}
82
83TEST_F(MergeFlow, Reconverge) {
84   CASE({
85        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
86        bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
87                    bi_register(6), bi_register(7), bi_register(8),
88                    BI_REGISTER_FORMAT_AUTO, 4, 4);
89        flow(RECONVERGE);
90   },
91   {
92        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
93        I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
94                        bi_register(6), bi_register(7), bi_register(8),
95                        BI_REGISTER_FORMAT_AUTO, 4, 4);
96        I->flow = VA_FLOW_RECONVERGE;
97   });
98}
99
100TEST_F(MergeFlow, TrivialWait) {
101   CASE({
102        bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
103        flow(WAIT0126);
104        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5));
105   },
106   {
107        I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
108        I->flow = VA_FLOW_WAIT0126;
109        bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5));
110   });
111}
112
113TEST_F(MergeFlow, LoadThenUnrelatedThenUse) {
114   CASE({
115         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
116                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
117         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
118         flow(WAIT0);
119         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
120         flow(END);
121   },
122   {
123         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
124                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
125         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
126         I->flow = VA_FLOW_WAIT0;
127         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19));
128         I->flow = VA_FLOW_END;
129   });
130}
131
132TEST_F(MergeFlow, TrivialDiscard) {
133   CASE({
134         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
135         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
136                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
137                         BI_SUBGROUP_SUBGROUP4);
138         flow(DISCARD);
139         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
140         flow(END);
141   },
142   {
143         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
144         I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
145                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
146                         BI_SUBGROUP_SUBGROUP4);
147         I->flow = VA_FLOW_DISCARD;
148         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
149         I->flow = VA_FLOW_END;
150   });
151}
152
153TEST_F(MergeFlow, TrivialDiscardAtTheStart) {
154   CASE({
155         flow(DISCARD);
156         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
157   },
158   {
159         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
160         I->flow = VA_FLOW_DISCARD;
161   });
162}
163
164TEST_F(MergeFlow, MoveDiscardPastWait) {
165   CASE({
166         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
167         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
168                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
169                         BI_SUBGROUP_SUBGROUP4);
170         flow(DISCARD);
171         flow(WAIT0);
172         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
173   },
174   {
175         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
176         I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
177                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
178                         BI_SUBGROUP_SUBGROUP4);
179         I->flow = VA_FLOW_WAIT0;
180         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
181         I->flow = VA_FLOW_DISCARD;
182   });
183}
184
185TEST_F(MergeFlow, OccludedWaitsAndDiscard) {
186   CASE({
187         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
188         bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
189                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
190                         BI_SUBGROUP_SUBGROUP4);
191         flow(WAIT0);
192         flow(DISCARD);
193         flow(WAIT2);
194         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
195   },
196   {
197         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
198         I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8),
199                         BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE,
200                         BI_SUBGROUP_SUBGROUP4);
201         I->flow = VA_FLOW_WAIT02;
202         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
203         I->flow = VA_FLOW_DISCARD;
204   });
205}
206
207TEST_F(MergeFlow, DeleteUselessWaits) {
208   CASE({
209         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
210         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
211         flow(WAIT0);
212         flow(WAIT2);
213         flow(END);
214   },
215   {
216         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
217         I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
218         I->flow = VA_FLOW_END;
219   });
220}
221
222TEST_F(MergeFlow, BlockFullOfUselessWaits) {
223   CASE({
224         flow(WAIT0);
225         flow(WAIT2);
226         flow(DISCARD);
227         flow(END);
228   },
229   {
230         flow(END);
231   });
232}
233
234TEST_F(MergeFlow, WaitWithMessage) {
235   CASE({
236         bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
237                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
238         flow(WAIT0);
239   },
240   {
241         I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
242                               BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
243         I->flow = VA_FLOW_WAIT0;
244   });
245}
246
247TEST_F(MergeFlow, CantMoveWaitPastMessage) {
248   NEGCASE({
249         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
250         I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61),
251                           BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1);
252
253         /* Pretend it's blocked for some reason. This doesn't actually happen
254          * with the current algorithm, but it's good to handle the special
255          * cases correctly in case we change later on.
256          */
257         I->flow = VA_FLOW_DISCARD;
258         flow(WAIT0);
259   });
260}
261
262TEST_F(MergeFlow, DeletePointlessDiscard) {
263   CASE({
264         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
265         bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
266                          bi_register(12), false, BI_DIMENSION_2D,
267                          BI_REGISTER_FORMAT_F32, false, false,
268                          BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
269         flow(DISCARD);
270         flow(WAIT0);
271         flow(WAIT0126);
272         bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5));
273         flow(WAIT);
274         bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
275                     bi_register(6), bi_register(7), bi_register(8),
276                     BI_REGISTER_FORMAT_AUTO, 4, 4);
277         flow(END);
278   },
279   {
280         bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0));
281         I = bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8),
282                          bi_register(12), false, BI_DIMENSION_2D,
283                          BI_REGISTER_FORMAT_F32, false, false,
284                          BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4);
285         I->flow = VA_FLOW_WAIT0126;
286         I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5));
287         I->flow = VA_FLOW_WAIT;
288         I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5),
289                         bi_register(6), bi_register(7), bi_register(8),
290                         BI_REGISTER_FORMAT_AUTO, 4, 4);
291         I->flow = VA_FLOW_END;
292   });
293}
294
295TEST_F(MergeFlow, PreserveTerminalBarriers) {
296   CASE({
297         bi_barrier(b);
298         flow(WAIT);
299         flow(END);
300   },
301   {
302         bi_barrier(b)->flow = VA_FLOW_WAIT;
303         flow(END);
304   });
305}
306