1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "helpers.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ciusing namespace aco; 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ciBEGIN_TEST(optimizer_postRA.vcmp) 30bf215546Sopenharmony_ci PhysReg reg_v0(256); 31bf215546Sopenharmony_ci PhysReg reg_s0(0); 32bf215546Sopenharmony_ci PhysReg reg_s2(2); 33bf215546Sopenharmony_ci PhysReg reg_s4(4); 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci //>> v1: %a:v[0] = p_startpgm 36bf215546Sopenharmony_ci ASSERTED bool setup_ok = setup_cs("v1", GFX8); 37bf215546Sopenharmony_ci assert(setup_ok); 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci auto &startpgm = bld.instructions->at(0); 40bf215546Sopenharmony_ci assert(startpgm->opcode == aco_opcode::p_startpgm); 41bf215546Sopenharmony_ci startpgm->definitions[0].setFixed(reg_v0); 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci Temp v_in = inputs[0]; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci { 46bf215546Sopenharmony_ci /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */ 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 49bf215546Sopenharmony_ci //! s2: %e:s[2-3] = p_cbranch_z %b:vcc 50bf215546Sopenharmony_ci //! p_unit_test 0, %e:s[2-3] 51bf215546Sopenharmony_ci auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 52bf215546Sopenharmony_ci Operand(v_in, reg_v0)); 53bf215546Sopenharmony_ci auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); 54bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 55bf215546Sopenharmony_ci writeout(0, Operand(br, reg_s2)); 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci //; del b, e 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci { 61bf215546Sopenharmony_ci /* When VCC is overwritten inbetween, don't optimize. */ 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 64bf215546Sopenharmony_ci //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 65bf215546Sopenharmony_ci //! s2: %f:vcc = s_mov_b64 0 66bf215546Sopenharmony_ci //! s2: %e:s[2-3] = p_cbranch_z %d:scc 67bf215546Sopenharmony_ci //! p_unit_test 1, %e:s[2-3], %f:vcc 68bf215546Sopenharmony_ci auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 69bf215546Sopenharmony_ci Operand(v_in, reg_v0)); 70bf215546Sopenharmony_ci auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); 71bf215546Sopenharmony_ci auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero()); 72bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 73bf215546Sopenharmony_ci writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc)); 74bf215546Sopenharmony_ci } 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci //; del b, c, d, e, f 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci { 79bf215546Sopenharmony_ci /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */ 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0] 82bf215546Sopenharmony_ci //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec 83bf215546Sopenharmony_ci //! s2: %e:s[2-3] = p_cbranch_z %d:scc 84bf215546Sopenharmony_ci //! p_unit_test 2, %e:s[2-3] 85bf215546Sopenharmony_ci auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(), 86bf215546Sopenharmony_ci Operand(v_in, reg_v0)); 87bf215546Sopenharmony_ci auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm)); 88bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 89bf215546Sopenharmony_ci writeout(2, Operand(br, reg_s2)); 90bf215546Sopenharmony_ci } 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci //; del b, c, d, e 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci { 95bf215546Sopenharmony_ci /* When the VCC isn't written by VOPC, don't optimize */ 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5] 98bf215546Sopenharmony_ci //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 99bf215546Sopenharmony_ci //! s2: %e:s[2-3] = p_cbranch_z %d:scc 100bf215546Sopenharmony_ci //! p_unit_test 2, %e:s[2-3] 101bf215546Sopenharmony_ci auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), 102bf215546Sopenharmony_ci Operand::c32(1u), Operand(reg_s4, bld.lm)); 103bf215546Sopenharmony_ci auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm)); 104bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 105bf215546Sopenharmony_ci writeout(2, Operand(br, reg_s2)); 106bf215546Sopenharmony_ci } 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci //; del b, c, d, e, f, x 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci { 111bf215546Sopenharmony_ci /* When EXEC is overwritten inbetween, don't optimize. */ 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] 114bf215546Sopenharmony_ci //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec 115bf215546Sopenharmony_ci //! s2: %f:exec = s_mov_b64 42 116bf215546Sopenharmony_ci //! s2: %e:s[2-3] = p_cbranch_z %d:scc 117bf215546Sopenharmony_ci //! p_unit_test 4, %e:s[2-3], %f:exec 118bf215546Sopenharmony_ci auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), 119bf215546Sopenharmony_ci Operand(v_in, reg_v0)); 120bf215546Sopenharmony_ci auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); 121bf215546Sopenharmony_ci auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u)); 122bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); 123bf215546Sopenharmony_ci writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec)); 124bf215546Sopenharmony_ci } 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci //; del b, c, d, e, f, x 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci finish_optimizer_postRA_test(); 129bf215546Sopenharmony_ciEND_TEST 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ciBEGIN_TEST(optimizer_postRA.scc_nocmp_opt) 132bf215546Sopenharmony_ci //>> s1: %a, s2: %y, s1: %z = p_startpgm 133bf215546Sopenharmony_ci ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6); 134bf215546Sopenharmony_ci assert(setup_ok); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci PhysReg reg_s0{0}; 137bf215546Sopenharmony_ci PhysReg reg_s1{1}; 138bf215546Sopenharmony_ci PhysReg reg_s2{2}; 139bf215546Sopenharmony_ci PhysReg reg_s3{3}; 140bf215546Sopenharmony_ci PhysReg reg_s4{4}; 141bf215546Sopenharmony_ci PhysReg reg_s6{6}; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci Temp in_0 = inputs[0]; 144bf215546Sopenharmony_ci Temp in_1 = inputs[1]; 145bf215546Sopenharmony_ci Temp in_2 = inputs[2]; 146bf215546Sopenharmony_ci Operand op_in_0(in_0); 147bf215546Sopenharmony_ci op_in_0.setFixed(reg_s0); 148bf215546Sopenharmony_ci Operand op_in_1(in_1); 149bf215546Sopenharmony_ci op_in_1.setFixed(reg_s4); 150bf215546Sopenharmony_ci Operand op_in_2(in_2); 151bf215546Sopenharmony_ci op_in_2.setFixed(reg_s6); 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci { 154bf215546Sopenharmony_ci //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 155bf215546Sopenharmony_ci //! s2: %f:vcc = p_cbranch_nz %e:scc 156bf215546Sopenharmony_ci //! p_unit_test 0, %f:vcc 157bf215546Sopenharmony_ci auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 158bf215546Sopenharmony_ci Operand::c32(0x40018u)); 159bf215546Sopenharmony_ci auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 160bf215546Sopenharmony_ci Operand::zero()); 161bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); 162bf215546Sopenharmony_ci writeout(0, Operand(br, vcc)); 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci //; del d, e, f 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci { 168bf215546Sopenharmony_ci //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 169bf215546Sopenharmony_ci //! s2: %f:vcc = p_cbranch_z %e:scc 170bf215546Sopenharmony_ci //! p_unit_test 1, %f:vcc 171bf215546Sopenharmony_ci auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 172bf215546Sopenharmony_ci Operand::c32(0x40018u)); 173bf215546Sopenharmony_ci auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), 174bf215546Sopenharmony_ci Operand::zero()); 175bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); 176bf215546Sopenharmony_ci writeout(1, Operand(br, vcc)); 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci //; del d, e, f 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci { 182bf215546Sopenharmony_ci //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 183bf215546Sopenharmony_ci //! s2: %f:vcc = p_cbranch_z %e:scc 184bf215546Sopenharmony_ci //! p_unit_test 2, %f:vcc 185bf215546Sopenharmony_ci auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 186bf215546Sopenharmony_ci Operand::c32(0x40018u)); 187bf215546Sopenharmony_ci auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 188bf215546Sopenharmony_ci Operand::zero()); 189bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); 190bf215546Sopenharmony_ci writeout(2, Operand(br, vcc)); 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci //; del d, e, f 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci { 196bf215546Sopenharmony_ci //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 197bf215546Sopenharmony_ci //! s2: %f:vcc = p_cbranch_nz %e:scc 198bf215546Sopenharmony_ci //! p_unit_test 3, %f:vcc 199bf215546Sopenharmony_ci auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 200bf215546Sopenharmony_ci Operand::c32(0x40018u)); 201bf215546Sopenharmony_ci auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), 202bf215546Sopenharmony_ci Operand::zero()); 203bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); 204bf215546Sopenharmony_ci writeout(3, Operand(br, vcc)); 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci //; del d, e, f 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci { 210bf215546Sopenharmony_ci //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345 211bf215546Sopenharmony_ci //! s2: %f:vcc = p_cbranch_z %e:scc 212bf215546Sopenharmony_ci //! p_unit_test 4, %f:vcc 213bf215546Sopenharmony_ci auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1, 214bf215546Sopenharmony_ci Operand::c32(0x12345u)); 215bf215546Sopenharmony_ci auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2), 216bf215546Sopenharmony_ci Operand::zero(8)); 217bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); 218bf215546Sopenharmony_ci writeout(4, Operand(br, vcc)); 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci //; del d, e, f 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci { 224bf215546Sopenharmony_ci /* SCC is overwritten in between, don't optimize */ 225bf215546Sopenharmony_ci 226bf215546Sopenharmony_ci //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 227bf215546Sopenharmony_ci //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 228bf215546Sopenharmony_ci //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 229bf215546Sopenharmony_ci //! s2: %f:vcc = p_cbranch_z %g:scc 230bf215546Sopenharmony_ci //! p_unit_test 5, %f:vcc, %h:s[3] 231bf215546Sopenharmony_ci auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 232bf215546Sopenharmony_ci Operand::c32(0x40018u)); 233bf215546Sopenharmony_ci auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, 234bf215546Sopenharmony_ci Operand::c32(1u)); 235bf215546Sopenharmony_ci auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 236bf215546Sopenharmony_ci Operand::zero()); 237bf215546Sopenharmony_ci auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); 238bf215546Sopenharmony_ci writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3)); 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci //; del d, e, f, g, h, x 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci { 244bf215546Sopenharmony_ci //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 245bf215546Sopenharmony_ci //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc 246bf215546Sopenharmony_ci //! p_unit_test 6, %f:s[4] 247bf215546Sopenharmony_ci auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 248bf215546Sopenharmony_ci Operand::c32(0x40018u)); 249bf215546Sopenharmony_ci auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 250bf215546Sopenharmony_ci Operand::zero()); 251bf215546Sopenharmony_ci auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp)); 252bf215546Sopenharmony_ci writeout(6, Operand(br, reg_s4)); 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci //; del d, e, f 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci { 258bf215546Sopenharmony_ci /* SCC is overwritten in between, don't optimize */ 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 261bf215546Sopenharmony_ci //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1 262bf215546Sopenharmony_ci //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 263bf215546Sopenharmony_ci //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc 264bf215546Sopenharmony_ci //! p_unit_test 7, %f:s[4], %h:s[3] 265bf215546Sopenharmony_ci auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, 266bf215546Sopenharmony_ci Operand::c32(0x40018u)); 267bf215546Sopenharmony_ci auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, 268bf215546Sopenharmony_ci Operand::c32(1u)); 269bf215546Sopenharmony_ci auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), 270bf215546Sopenharmony_ci Operand::zero()); 271bf215546Sopenharmony_ci auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp)); 272bf215546Sopenharmony_ci writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3)); 273bf215546Sopenharmony_ci } 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci //; del d, e, f, g, h, x 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci finish_optimizer_postRA_test(); 278bf215546Sopenharmony_ciEND_TEST 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ciBEGIN_TEST(optimizer_postRA.dpp) 281bf215546Sopenharmony_ci //>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm 282bf215546Sopenharmony_ci if (!setup_cs("v1 v1 s2 s2", GFX10_3)) 283bf215546Sopenharmony_ci return; 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256)); 286bf215546Sopenharmony_ci bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257)); 287bf215546Sopenharmony_ci bld.instructions->at(0)->definitions[2].setFixed(vcc); 288bf215546Sopenharmony_ci bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0)); 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci PhysReg reg_v0(256); 291bf215546Sopenharmony_ci PhysReg reg_v2(258); 292bf215546Sopenharmony_ci Operand a(inputs[0], PhysReg(256)); 293bf215546Sopenharmony_ci Operand b(inputs[1], PhysReg(257)); 294bf215546Sopenharmony_ci Operand c(inputs[2], vcc); 295bf215546Sopenharmony_ci Operand d(inputs[3], PhysReg(0)); 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci /* basic optimization */ 298bf215546Sopenharmony_ci //! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 299bf215546Sopenharmony_ci //! p_unit_test 0, %res0:v[2] 300bf215546Sopenharmony_ci Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 301bf215546Sopenharmony_ci Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b); 302bf215546Sopenharmony_ci writeout(0, Operand(res0, reg_v2)); 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci /* operand swapping */ 305bf215546Sopenharmony_ci //! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 306bf215546Sopenharmony_ci //! p_unit_test 1, %res1:v[2] 307bf215546Sopenharmony_ci Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 308bf215546Sopenharmony_ci Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2)); 309bf215546Sopenharmony_ci writeout(1, Operand(res1, reg_v2)); 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci //! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 312bf215546Sopenharmony_ci //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1 313bf215546Sopenharmony_ci //! p_unit_test 2, %res2:v[2] 314bf215546Sopenharmony_ci Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 315bf215546Sopenharmony_ci Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror); 316bf215546Sopenharmony_ci writeout(2, Operand(res2, reg_v2)); 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci /* modifiers */ 319bf215546Sopenharmony_ci //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 320bf215546Sopenharmony_ci //! p_unit_test 3, %res3:v[2] 321bf215546Sopenharmony_ci auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 322bf215546Sopenharmony_ci tmp3.instr->dpp16().neg[0] = true; 323bf215546Sopenharmony_ci Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b); 324bf215546Sopenharmony_ci writeout(3, Operand(res3, reg_v2)); 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci //! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1 327bf215546Sopenharmony_ci //! p_unit_test 4, %res4:v[2] 328bf215546Sopenharmony_ci Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 329bf215546Sopenharmony_ci auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b); 330bf215546Sopenharmony_ci res4.instr->vop3().neg[0] = true; 331bf215546Sopenharmony_ci writeout(4, Operand(res4, reg_v2)); 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 334bf215546Sopenharmony_ci //! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp 335bf215546Sopenharmony_ci //! p_unit_test 5, %res5:v[2] 336bf215546Sopenharmony_ci Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 337bf215546Sopenharmony_ci auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b); 338bf215546Sopenharmony_ci res5.instr->vop3().clamp = true; 339bf215546Sopenharmony_ci writeout(5, Operand(res5, reg_v2)); 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1 342bf215546Sopenharmony_ci //! p_unit_test 6, %res6:v[2] 343bf215546Sopenharmony_ci auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 344bf215546Sopenharmony_ci tmp6.instr->dpp16().neg[0] = true; 345bf215546Sopenharmony_ci auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b); 346bf215546Sopenharmony_ci res6.instr->vop3().abs[0] = true; 347bf215546Sopenharmony_ci writeout(6, Operand(res6, reg_v2)); 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1 350bf215546Sopenharmony_ci //! p_unit_test 7, %res7:v[2] 351bf215546Sopenharmony_ci Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 352bf215546Sopenharmony_ci auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2)); 353bf215546Sopenharmony_ci res7.instr->vop3().abs[0] = true; 354bf215546Sopenharmony_ci writeout(7, Operand(res7, reg_v2)); 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci /* vcc */ 357bf215546Sopenharmony_ci //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1 358bf215546Sopenharmony_ci //! p_unit_test 8, %res8:v[2] 359bf215546Sopenharmony_ci Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 360bf215546Sopenharmony_ci Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c); 361bf215546Sopenharmony_ci writeout(8, Operand(res8, reg_v2)); 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 364bf215546Sopenharmony_ci //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1] 365bf215546Sopenharmony_ci //! p_unit_test 9, %res9:v[2] 366bf215546Sopenharmony_ci Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 367bf215546Sopenharmony_ci Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d); 368bf215546Sopenharmony_ci writeout(9, Operand(res9, reg_v2)); 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci /* control flow */ 371bf215546Sopenharmony_ci //! BB1 372bf215546Sopenharmony_ci //! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */ 373bf215546Sopenharmony_ci //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1 374bf215546Sopenharmony_ci //! p_unit_test 10, %res10:v[2] 375bf215546Sopenharmony_ci Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci bld.reset(program->create_and_insert_block()); 378bf215546Sopenharmony_ci program->blocks[0].linear_succs.push_back(1); 379bf215546Sopenharmony_ci program->blocks[0].logical_succs.push_back(1); 380bf215546Sopenharmony_ci program->blocks[1].linear_preds.push_back(0); 381bf215546Sopenharmony_ci program->blocks[1].logical_preds.push_back(0); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b); 384bf215546Sopenharmony_ci writeout(10, Operand(res10, reg_v2)); 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci /* can't combine if the v_mov_b32's operand is modified */ 387bf215546Sopenharmony_ci //! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1 388bf215546Sopenharmony_ci //! v1: %tmp11_2:v[0] = v_mov_b32 0 389bf215546Sopenharmony_ci //! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1] 390bf215546Sopenharmony_ci //! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0] 391bf215546Sopenharmony_ci Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror); 392bf215546Sopenharmony_ci Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0)); 393bf215546Sopenharmony_ci Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b); 394bf215546Sopenharmony_ci writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0)); 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci finish_optimizer_postRA_test(); 397bf215546Sopenharmony_ciEND_TEST 398bf215546Sopenharmony_ci 399