1/* 2 * Copyright (C) 2022 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "bi_test.h" 25#include "bi_builder.h" 26#include "va_compiler.h" 27#include "valhall_enums.h" 28 29#include <gtest/gtest.h> 30 31#define CASE(test, expected) do { \ 32 bi_builder *A = bit_builder(mem_ctx); \ 33 bi_builder *B = bit_builder(mem_ctx); \ 34 { \ 35 bi_builder *b = A; \ 36 A->shader->stage = MESA_SHADER_FRAGMENT; \ 37 test; \ 38 } \ 39 va_merge_flow(A->shader); \ 40 { \ 41 bi_builder *b = B; \ 42 B->shader->stage = MESA_SHADER_FRAGMENT; \ 43 expected; \ 44 } \ 45 ASSERT_SHADER_EQUAL(A->shader, B->shader); \ 46} while(0) 47 48#define NEGCASE(test) CASE(test, test) 49 50#define flow(f) bi_nop(b)->flow = VA_FLOW_ ## f 51 52class MergeFlow : public testing::Test { 53protected: 54 MergeFlow() { 55 mem_ctx = ralloc_context(NULL); 56 } 57 58 ~MergeFlow() { 59 ralloc_free(mem_ctx); 60 } 61 62 void *mem_ctx; 63 bi_instr *I; 64}; 65 66TEST_F(MergeFlow, End) { 67 CASE({ 68 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 69 bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5), 70 bi_register(6), bi_register(7), bi_register(8), 71 BI_REGISTER_FORMAT_AUTO, 4, 4); 72 flow(END); 73 }, 74 { 75 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 76 I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5), 77 bi_register(6), bi_register(7), bi_register(8), 78 BI_REGISTER_FORMAT_AUTO, 4, 4); 79 I->flow = VA_FLOW_END; 80 }); 81} 82 83TEST_F(MergeFlow, Reconverge) { 84 CASE({ 85 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 86 bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5), 87 bi_register(6), bi_register(7), bi_register(8), 88 BI_REGISTER_FORMAT_AUTO, 4, 4); 89 flow(RECONVERGE); 90 }, 91 { 92 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 93 I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5), 94 bi_register(6), bi_register(7), bi_register(8), 95 BI_REGISTER_FORMAT_AUTO, 4, 4); 96 I->flow = VA_FLOW_RECONVERGE; 97 }); 98} 99 100TEST_F(MergeFlow, TrivialWait) { 101 CASE({ 102 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 103 flow(WAIT0126); 104 bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5)); 105 }, 106 { 107 I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 108 I->flow = VA_FLOW_WAIT0126; 109 bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5)); 110 }); 111} 112 113TEST_F(MergeFlow, LoadThenUnrelatedThenUse) { 114 CASE({ 115 bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61), 116 BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1); 117 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 118 flow(WAIT0); 119 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19)); 120 flow(END); 121 }, 122 { 123 bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61), 124 BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1); 125 I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 126 I->flow = VA_FLOW_WAIT0; 127 I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(19)); 128 I->flow = VA_FLOW_END; 129 }); 130} 131 132TEST_F(MergeFlow, TrivialDiscard) { 133 CASE({ 134 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 135 bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8), 136 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE, 137 BI_SUBGROUP_SUBGROUP4); 138 flow(DISCARD); 139 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 140 flow(END); 141 }, 142 { 143 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 144 I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8), 145 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE, 146 BI_SUBGROUP_SUBGROUP4); 147 I->flow = VA_FLOW_DISCARD; 148 I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 149 I->flow = VA_FLOW_END; 150 }); 151} 152 153TEST_F(MergeFlow, TrivialDiscardAtTheStart) { 154 CASE({ 155 flow(DISCARD); 156 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 157 }, 158 { 159 I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 160 I->flow = VA_FLOW_DISCARD; 161 }); 162} 163 164TEST_F(MergeFlow, MoveDiscardPastWait) { 165 CASE({ 166 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 167 bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8), 168 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE, 169 BI_SUBGROUP_SUBGROUP4); 170 flow(DISCARD); 171 flow(WAIT0); 172 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 173 }, 174 { 175 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 176 I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8), 177 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE, 178 BI_SUBGROUP_SUBGROUP4); 179 I->flow = VA_FLOW_WAIT0; 180 I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 181 I->flow = VA_FLOW_DISCARD; 182 }); 183} 184 185TEST_F(MergeFlow, OccludedWaitsAndDiscard) { 186 CASE({ 187 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 188 bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8), 189 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE, 190 BI_SUBGROUP_SUBGROUP4); 191 flow(WAIT0); 192 flow(DISCARD); 193 flow(WAIT2); 194 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 195 }, 196 { 197 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 198 I = bi_clper_i32_to(b, bi_register(0), bi_register(4), bi_register(8), 199 BI_INACTIVE_RESULT_ZERO, BI_LANE_OP_NONE, 200 BI_SUBGROUP_SUBGROUP4); 201 I->flow = VA_FLOW_WAIT02; 202 I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 203 I->flow = VA_FLOW_DISCARD; 204 }); 205} 206 207TEST_F(MergeFlow, DeleteUselessWaits) { 208 CASE({ 209 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 210 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 211 flow(WAIT0); 212 flow(WAIT2); 213 flow(END); 214 }, 215 { 216 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 217 I = bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 218 I->flow = VA_FLOW_END; 219 }); 220} 221 222TEST_F(MergeFlow, BlockFullOfUselessWaits) { 223 CASE({ 224 flow(WAIT0); 225 flow(WAIT2); 226 flow(DISCARD); 227 flow(END); 228 }, 229 { 230 flow(END); 231 }); 232} 233 234TEST_F(MergeFlow, WaitWithMessage) { 235 CASE({ 236 bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61), 237 BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1); 238 flow(WAIT0); 239 }, 240 { 241 I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61), 242 BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1); 243 I->flow = VA_FLOW_WAIT0; 244 }); 245} 246 247TEST_F(MergeFlow, CantMoveWaitPastMessage) { 248 NEGCASE({ 249 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 250 I = bi_ld_attr_imm_to(b, bi_register(16), bi_register(60), bi_register(61), 251 BI_REGISTER_FORMAT_F32, BI_VECSIZE_V4, 1); 252 253 /* Pretend it's blocked for some reason. This doesn't actually happen 254 * with the current algorithm, but it's good to handle the special 255 * cases correctly in case we change later on. 256 */ 257 I->flow = VA_FLOW_DISCARD; 258 flow(WAIT0); 259 }); 260} 261 262TEST_F(MergeFlow, DeletePointlessDiscard) { 263 CASE({ 264 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 265 bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8), 266 bi_register(12), false, BI_DIMENSION_2D, 267 BI_REGISTER_FORMAT_F32, false, false, 268 BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4); 269 flow(DISCARD); 270 flow(WAIT0); 271 flow(WAIT0126); 272 bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5)); 273 flow(WAIT); 274 bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5), 275 bi_register(6), bi_register(7), bi_register(8), 276 BI_REGISTER_FORMAT_AUTO, 4, 4); 277 flow(END); 278 }, 279 { 280 bi_fadd_f32_to(b, bi_register(0), bi_register(0), bi_register(0)); 281 I = bi_tex_single_to(b, bi_register(0), bi_register(4), bi_register(8), 282 bi_register(12), false, BI_DIMENSION_2D, 283 BI_REGISTER_FORMAT_F32, false, false, 284 BI_VA_LOD_MODE_COMPUTED_LOD, BI_WRITE_MASK_RGBA, 4); 285 I->flow = VA_FLOW_WAIT0126; 286 I = bi_atest_to(b, bi_register(0), bi_register(4), bi_register(5)); 287 I->flow = VA_FLOW_WAIT; 288 I = bi_blend_to(b, bi_register(0), bi_register(4), bi_register(5), 289 bi_register(6), bi_register(7), bi_register(8), 290 BI_REGISTER_FORMAT_AUTO, 4, 4); 291 I->flow = VA_FLOW_END; 292 }); 293} 294 295TEST_F(MergeFlow, PreserveTerminalBarriers) { 296 CASE({ 297 bi_barrier(b); 298 flow(WAIT); 299 flow(END); 300 }, 301 { 302 bi_barrier(b)->flow = VA_FLOW_WAIT; 303 flow(END); 304 }); 305} 306