1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "compiler.h" 25bf215546Sopenharmony_ci#include "bi_test.h" 26bf215546Sopenharmony_ci#include "bi_builder.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include <gtest/gtest.h> 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_cistatic void 31bf215546Sopenharmony_cibi_optimizer(bi_context *ctx) 32bf215546Sopenharmony_ci{ 33bf215546Sopenharmony_ci bi_opt_mod_prop_forward(ctx); 34bf215546Sopenharmony_ci bi_opt_mod_prop_backward(ctx); 35bf215546Sopenharmony_ci bi_opt_dead_code_eliminate(ctx); 36bf215546Sopenharmony_ci} 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#define CASE(instr, expected) INSTRUCTION_CASE(instr, expected, bi_optimizer) 39bf215546Sopenharmony_ci#define NEGCASE(instr) CASE(instr, instr) 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ciclass Optimizer : public testing::Test { 42bf215546Sopenharmony_ciprotected: 43bf215546Sopenharmony_ci Optimizer() { 44bf215546Sopenharmony_ci mem_ctx = ralloc_context(NULL); 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci reg = bi_register(0); 47bf215546Sopenharmony_ci x = bi_register(1); 48bf215546Sopenharmony_ci y = bi_register(2); 49bf215546Sopenharmony_ci negabsx = bi_neg(bi_abs(x)); 50bf215546Sopenharmony_ci } 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci ~Optimizer() { 53bf215546Sopenharmony_ci ralloc_free(mem_ctx); 54bf215546Sopenharmony_ci } 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci void *mem_ctx; 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci bi_index reg; 59bf215546Sopenharmony_ci bi_index x; 60bf215546Sopenharmony_ci bi_index y; 61bf215546Sopenharmony_ci bi_index negabsx; 62bf215546Sopenharmony_ci}; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ciTEST_F(Optimizer, FusedFABSNEG) 65bf215546Sopenharmony_ci{ 66bf215546Sopenharmony_ci CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), y), 67bf215546Sopenharmony_ci bi_fadd_f32_to(b, reg, bi_abs(x), y)); 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_neg(x)), y), 70bf215546Sopenharmony_ci bi_fadd_f32_to(b, reg, bi_neg(x), y)); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), y), 73bf215546Sopenharmony_ci bi_fadd_f32_to(b, reg, negabsx, y)); 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, x), y), 76bf215546Sopenharmony_ci bi_fadd_f32_to(b, reg, x, y)); 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci CASE(bi_fmin_f32_to(b, reg, bi_fabsneg_f32(b, negabsx), bi_neg(y)), 79bf215546Sopenharmony_ci bi_fmin_f32_to(b, reg, negabsx, bi_neg(y))); 80bf215546Sopenharmony_ci} 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ciTEST_F(Optimizer, FusedFABSNEGForFP16) 83bf215546Sopenharmony_ci{ 84bf215546Sopenharmony_ci CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y), 85bf215546Sopenharmony_ci bi_fadd_v2f16_to(b, reg, negabsx, y)); 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci CASE(bi_fmin_v2f16_to(b, reg, bi_fabsneg_v2f16(b, negabsx), bi_neg(y)), 88bf215546Sopenharmony_ci bi_fmin_v2f16_to(b, reg, negabsx, bi_neg(y))); 89bf215546Sopenharmony_ci} 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ciTEST_F(Optimizer, FuseFADD_F32WithEqualSourcesAbsAbsAndClamp) 92bf215546Sopenharmony_ci{ 93bf215546Sopenharmony_ci CASE({ 94bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_abs(x)), bi_abs(x)); 95bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 96bf215546Sopenharmony_ci }, { 97bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x)); 98bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 99bf215546Sopenharmony_ci }); 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci CASE({ 102bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_fabsneg_f32(b, bi_abs(x))); 103bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 104bf215546Sopenharmony_ci }, { 105bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x)); 106bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 107bf215546Sopenharmony_ci }); 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci CASE({ 110bf215546Sopenharmony_ci bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), bi_abs(x))); 111bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 112bf215546Sopenharmony_ci }, { 113bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, bi_abs(x), bi_abs(x)); 114bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 115bf215546Sopenharmony_ci }); 116bf215546Sopenharmony_ci} 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ciTEST_F(Optimizer, FuseFADD_V2F16WithDifferentSourcesAbsAbsAndClamp) 119bf215546Sopenharmony_ci{ 120bf215546Sopenharmony_ci CASE({ 121bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(y)); 122bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 123bf215546Sopenharmony_ci }, { 124bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y)); 125bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 126bf215546Sopenharmony_ci }); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci CASE({ 129bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(y))); 130bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 131bf215546Sopenharmony_ci }, { 132bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y)); 133bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 134bf215546Sopenharmony_ci }); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci CASE({ 137bf215546Sopenharmony_ci bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(y))); 138bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 139bf215546Sopenharmony_ci }, { 140bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_abs(y)); 141bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 142bf215546Sopenharmony_ci }); 143bf215546Sopenharmony_ci} 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ciTEST_F(Optimizer, AvoidFADD_V2F16WithEqualSourcesAbsAbsAndClamp) 146bf215546Sopenharmony_ci{ 147bf215546Sopenharmony_ci NEGCASE({ 148bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_abs(x)), bi_abs(x)); 149bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 150bf215546Sopenharmony_ci }); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci NEGCASE({ 153bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_abs(x), bi_fabsneg_v2f16(b, bi_abs(x))); 154bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 155bf215546Sopenharmony_ci }); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci NEGCASE({ 158bf215546Sopenharmony_ci bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, bi_abs(x), bi_abs(x))); 159bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 160bf215546Sopenharmony_ci }); 161bf215546Sopenharmony_ci} 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ciTEST_F(Optimizer, SwizzlesComposedForFP16) 164bf215546Sopenharmony_ci{ 165bf215546Sopenharmony_ci CASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), y), 166bf215546Sopenharmony_ci bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y)); 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, negabsx), true, false), y), 169bf215546Sopenharmony_ci bi_fadd_v2f16_to(b, reg, bi_swz_16(negabsx, true, false), y)); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_swz_16(negabsx, true, false)), true, false), y), 172bf215546Sopenharmony_ci bi_fadd_v2f16_to(b, reg, negabsx, y)); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, false)), true, false), y), 175bf215546Sopenharmony_ci bi_fadd_v2f16_to(b, reg, bi_half(negabsx, false), y)); 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci CASE(bi_fadd_v2f16_to(b, reg, bi_swz_16(bi_fabsneg_v2f16(b, bi_half(negabsx, true)), true, false), y), 178bf215546Sopenharmony_ci bi_fadd_v2f16_to(b, reg, bi_half(negabsx, true), y)); 179bf215546Sopenharmony_ci} 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ciTEST_F(Optimizer, PreserveWidens) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci /* Check that widens are passed through */ 184bf215546Sopenharmony_ci CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, false)), y), 185bf215546Sopenharmony_ci bi_fadd_f32_to(b, reg, bi_half(negabsx, false), y)); 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(negabsx, true)), y), 188bf215546Sopenharmony_ci bi_fadd_f32_to(b, reg, bi_half(negabsx, true), y)); 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci CASE(bi_fadd_f32_to(b, reg, bi_fabsneg_f32(b, bi_half(x, true)), bi_fabsneg_f32(b, bi_half(x, false))), 191bf215546Sopenharmony_ci bi_fadd_f32_to(b, reg, bi_half(x, true), bi_half(x, false))); 192bf215546Sopenharmony_ci} 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ciTEST_F(Optimizer, DoNotMixSizesForFABSNEG) 195bf215546Sopenharmony_ci{ 196bf215546Sopenharmony_ci /* Refuse to mix sizes for fabsneg, that's wrong */ 197bf215546Sopenharmony_ci NEGCASE(bi_fadd_f32_to(b, reg, bi_fabsneg_v2f16(b, negabsx), y)); 198bf215546Sopenharmony_ci NEGCASE(bi_fadd_v2f16_to(b, reg, bi_fabsneg_f32(b, negabsx), y)); 199bf215546Sopenharmony_ci} 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ciTEST_F(Optimizer, AvoidZeroAndFABSNEGFootguns) 202bf215546Sopenharmony_ci{ 203bf215546Sopenharmony_ci /* It's tempting to use addition by 0.0 as the absneg primitive, but that 204bf215546Sopenharmony_ci * has footguns around signed zero and round modes. Check we don't 205bf215546Sopenharmony_ci * incorrectly fuse these rules. */ 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci bi_index zero = bi_zero(); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_abs(x), zero), y)); 210bf215546Sopenharmony_ci NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(x), zero), y)); 211bf215546Sopenharmony_ci NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, bi_neg(bi_abs(x)), zero), y)); 212bf215546Sopenharmony_ci NEGCASE(bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, zero), y)); 213bf215546Sopenharmony_ci} 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ciTEST_F(Optimizer, ClampsPropagated) 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci CASE({ 218bf215546Sopenharmony_ci bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_f32(b, x, y)); 219bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 220bf215546Sopenharmony_ci }, { 221bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, x, y); 222bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 223bf215546Sopenharmony_ci }); 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci CASE({ 226bf215546Sopenharmony_ci bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y)); 227bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 228bf215546Sopenharmony_ci }, { 229bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y); 230bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 231bf215546Sopenharmony_ci }); 232bf215546Sopenharmony_ci} 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ciTEST_F(Optimizer, ClampsComposed) 236bf215546Sopenharmony_ci{ 237bf215546Sopenharmony_ci CASE({ 238bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y); 239bf215546Sopenharmony_ci bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]); 240bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_M1_1; 241bf215546Sopenharmony_ci J->clamp = BI_CLAMP_CLAMP_0_INF; 242bf215546Sopenharmony_ci }, { 243bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, x, y); 244bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 245bf215546Sopenharmony_ci }); 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci CASE({ 248bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y); 249bf215546Sopenharmony_ci bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]); 250bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 251bf215546Sopenharmony_ci J->clamp = BI_CLAMP_CLAMP_0_INF; 252bf215546Sopenharmony_ci }, { 253bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, x, y); 254bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 255bf215546Sopenharmony_ci }); 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci CASE({ 258bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, bi_temp(b->shader), x, y); 259bf215546Sopenharmony_ci bi_instr *J = bi_fclamp_f32_to(b, reg, I->dest[0]); 260bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 261bf215546Sopenharmony_ci J->clamp = BI_CLAMP_CLAMP_0_INF; 262bf215546Sopenharmony_ci }, { 263bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, x, y); 264bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 265bf215546Sopenharmony_ci }); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci CASE({ 268bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y); 269bf215546Sopenharmony_ci bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]); 270bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_M1_1; 271bf215546Sopenharmony_ci J->clamp = BI_CLAMP_CLAMP_0_INF; 272bf215546Sopenharmony_ci }, { 273bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y); 274bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 275bf215546Sopenharmony_ci }); 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci CASE({ 278bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y); 279bf215546Sopenharmony_ci bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]); 280bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 281bf215546Sopenharmony_ci J->clamp = BI_CLAMP_CLAMP_0_INF; 282bf215546Sopenharmony_ci }, { 283bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y); 284bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 285bf215546Sopenharmony_ci }); 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci CASE({ 288bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, bi_temp(b->shader), x, y); 289bf215546Sopenharmony_ci bi_instr *J = bi_fclamp_v2f16_to(b, reg, I->dest[0]); 290bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 291bf215546Sopenharmony_ci J->clamp = BI_CLAMP_CLAMP_0_INF; 292bf215546Sopenharmony_ci }, { 293bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, x, y); 294bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_INF; 295bf215546Sopenharmony_ci }); 296bf215546Sopenharmony_ci} 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ciTEST_F(Optimizer, DoNotMixSizesWhenClamping) 299bf215546Sopenharmony_ci{ 300bf215546Sopenharmony_ci NEGCASE({ 301bf215546Sopenharmony_ci bi_instr *I = bi_fclamp_f32_to(b, reg, bi_fadd_v2f16(b, x, y)); 302bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 303bf215546Sopenharmony_ci }); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci NEGCASE({ 306bf215546Sopenharmony_ci bi_instr *I = bi_fclamp_v2f16_to(b, reg, bi_fadd_f32(b, x, y)); 307bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 308bf215546Sopenharmony_ci }); 309bf215546Sopenharmony_ci} 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ciTEST_F(Optimizer, DoNotUseAdditionByZeroForClamps) 312bf215546Sopenharmony_ci{ 313bf215546Sopenharmony_ci bi_index zero = bi_zero(); 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci /* We can't use addition by 0.0 for clamps due to signed zeros. */ 316bf215546Sopenharmony_ci NEGCASE({ 317bf215546Sopenharmony_ci bi_instr *I = bi_fadd_f32_to(b, reg, bi_fadd_f32(b, x, y), zero); 318bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_M1_1; 319bf215546Sopenharmony_ci }); 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci NEGCASE({ 322bf215546Sopenharmony_ci bi_instr *I = bi_fadd_v2f16_to(b, reg, bi_fadd_v2f16(b, x, y), zero); 323bf215546Sopenharmony_ci I->clamp = BI_CLAMP_CLAMP_0_1; 324bf215546Sopenharmony_ci }); 325bf215546Sopenharmony_ci} 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ciTEST_F(Optimizer, FuseComparisonsWithDISCARD) 328bf215546Sopenharmony_ci{ 329bf215546Sopenharmony_ci CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1)), 330bf215546Sopenharmony_ci bi_discard_f32(b, x, y, BI_CMPF_LE)); 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1)), 333bf215546Sopenharmony_ci bi_discard_f32(b, x, y, BI_CMPF_NE)); 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci CASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1)), 336bf215546Sopenharmony_ci bi_discard_f32(b, x, y, BI_CMPF_EQ)); 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci for (unsigned h = 0; h < 2; ++h) { 339bf215546Sopenharmony_ci CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1), h)), 340bf215546Sopenharmony_ci bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_LE)); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_NE, BI_RESULT_TYPE_I1), h)), 343bf215546Sopenharmony_ci bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_NE)); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci CASE(bi_discard_b32(b, bi_half(bi_fcmp_v2f16(b, x, y, BI_CMPF_EQ, BI_RESULT_TYPE_M1), h)), 346bf215546Sopenharmony_ci bi_discard_f32(b, bi_half(x, h), bi_half(y, h), BI_CMPF_EQ)); 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci} 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ciTEST_F(Optimizer, DoNotFuseSpecialComparisons) 351bf215546Sopenharmony_ci{ 352bf215546Sopenharmony_ci NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_GTLT, BI_RESULT_TYPE_F1))); 353bf215546Sopenharmony_ci NEGCASE(bi_discard_b32(b, bi_fcmp_f32(b, x, y, BI_CMPF_TOTAL, BI_RESULT_TYPE_F1))); 354bf215546Sopenharmony_ci} 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ciTEST_F(Optimizer, FuseResultType) 357bf215546Sopenharmony_ci{ 358bf215546Sopenharmony_ci CASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0), 359bf215546Sopenharmony_ci bi_fcmp_f32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1), 360bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 361bf215546Sopenharmony_ci bi_fcmp_f32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_F1)); 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci CASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0), 364bf215546Sopenharmony_ci bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1), 365bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 366bf215546Sopenharmony_ci bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1)); 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1), 369bf215546Sopenharmony_ci bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1), 370bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 371bf215546Sopenharmony_ci bi_fcmp_f32_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1)); 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci CASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0), 374bf215546Sopenharmony_ci bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1), 375bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 376bf215546Sopenharmony_ci bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_F1)); 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1), 379bf215546Sopenharmony_ci bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1), 380bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 381bf215546Sopenharmony_ci bi_fcmp_v2f16_to(b, reg, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_I1)); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1), 384bf215546Sopenharmony_ci bi_icmp_u32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1), 385bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 386bf215546Sopenharmony_ci bi_icmp_u32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1)); 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1), 389bf215546Sopenharmony_ci bi_icmp_v2u16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1), 390bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 391bf215546Sopenharmony_ci bi_icmp_v2u16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1)); 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1), 394bf215546Sopenharmony_ci bi_icmp_v4u8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1), 395bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 396bf215546Sopenharmony_ci bi_icmp_v4u8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1)); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci CASE(bi_mux_i32_to(b, reg, bi_imm_u32(0), bi_imm_u32(1), 399bf215546Sopenharmony_ci bi_icmp_s32(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1), 400bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 401bf215546Sopenharmony_ci bi_icmp_s32_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1)); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci CASE(bi_mux_v2i16_to(b, reg, bi_imm_u16(0), bi_imm_u16(1), 404bf215546Sopenharmony_ci bi_icmp_v2s16(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1), 405bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 406bf215546Sopenharmony_ci bi_icmp_v2s16_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1)); 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci CASE(bi_mux_v4i8_to(b, reg, bi_imm_u8(0), bi_imm_u8(1), 409bf215546Sopenharmony_ci bi_icmp_v4s8(b, x, y, BI_CMPF_LE, BI_RESULT_TYPE_M1), 410bf215546Sopenharmony_ci BI_MUX_INT_ZERO), 411bf215546Sopenharmony_ci bi_icmp_v4s8_to(b, reg, x, y, BI_CMPF_LE, BI_RESULT_TYPE_I1)); 412bf215546Sopenharmony_ci} 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ciTEST_F(Optimizer, DoNotFuseMixedSizeResultType) 415bf215546Sopenharmony_ci{ 416bf215546Sopenharmony_ci NEGCASE(bi_mux_i32_to(b, reg, bi_imm_f32(0.0), bi_imm_f32(1.0), 417bf215546Sopenharmony_ci bi_fcmp_v2f16(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1), 418bf215546Sopenharmony_ci BI_MUX_INT_ZERO)); 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci NEGCASE(bi_mux_v2i16_to(b, reg, bi_imm_f16(0.0), bi_imm_f16(1.0), 421bf215546Sopenharmony_ci bi_fcmp_f32(b, bi_abs(x), bi_neg(y), BI_CMPF_LE, BI_RESULT_TYPE_M1), 422bf215546Sopenharmony_ci BI_MUX_INT_ZERO)); 423bf215546Sopenharmony_ci} 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ciTEST_F(Optimizer, VarTexCoord32) 426bf215546Sopenharmony_ci{ 427bf215546Sopenharmony_ci CASE({ 428bf215546Sopenharmony_ci bi_index ld = bi_ld_var_imm(b, bi_null(), BI_REGISTER_FORMAT_F32, BI_SAMPLE_CENTER, BI_UPDATE_STORE, BI_VECSIZE_V2, 0); 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci bi_index x = bi_temp(b->shader); 431bf215546Sopenharmony_ci bi_index y = bi_temp(b->shader); 432bf215546Sopenharmony_ci bi_instr *split = bi_split_i32_to(b, x, ld); 433bf215546Sopenharmony_ci split->nr_dests = 2; 434bf215546Sopenharmony_ci split->dest[1] = y; 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci bi_texs_2d_f32_to(b, reg, x, y, false, 0, 0); 437bf215546Sopenharmony_ci }, { 438bf215546Sopenharmony_ci bi_var_tex_f32_to(b, reg, false, BI_SAMPLE_CENTER, BI_UPDATE_STORE, 0, 0); 439bf215546Sopenharmony_ci }); 440bf215546Sopenharmony_ci} 441