1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci */
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_ci#include "helpers.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ciusing namespace aco;
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ciBEGIN_TEST(optimizer_postRA.vcmp)
30bf215546Sopenharmony_ci    PhysReg reg_v0(256);
31bf215546Sopenharmony_ci    PhysReg reg_s0(0);
32bf215546Sopenharmony_ci    PhysReg reg_s2(2);
33bf215546Sopenharmony_ci    PhysReg reg_s4(4);
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci    //>> v1: %a:v[0] = p_startpgm
36bf215546Sopenharmony_ci    ASSERTED bool setup_ok = setup_cs("v1", GFX8);
37bf215546Sopenharmony_ci    assert(setup_ok);
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci    auto &startpgm = bld.instructions->at(0);
40bf215546Sopenharmony_ci    assert(startpgm->opcode == aco_opcode::p_startpgm);
41bf215546Sopenharmony_ci    startpgm->definitions[0].setFixed(reg_v0);
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci    Temp v_in = inputs[0];
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci    {
46bf215546Sopenharmony_ci        /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci        //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
49bf215546Sopenharmony_ci        //! s2: %e:s[2-3] = p_cbranch_z %b:vcc
50bf215546Sopenharmony_ci        //! p_unit_test 0, %e:s[2-3]
51bf215546Sopenharmony_ci        auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
52bf215546Sopenharmony_ci                             Operand(v_in, reg_v0));
53bf215546Sopenharmony_ci        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
54bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
55bf215546Sopenharmony_ci        writeout(0, Operand(br, reg_s2));
56bf215546Sopenharmony_ci    }
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_ci    //; del b, e
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ci    {
61bf215546Sopenharmony_ci        /* When VCC is overwritten inbetween, don't optimize. */
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci        //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
64bf215546Sopenharmony_ci        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
65bf215546Sopenharmony_ci        //! s2: %f:vcc = s_mov_b64 0
66bf215546Sopenharmony_ci        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
67bf215546Sopenharmony_ci        //! p_unit_test 1, %e:s[2-3], %f:vcc
68bf215546Sopenharmony_ci        auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
69bf215546Sopenharmony_ci                             Operand(v_in, reg_v0));
70bf215546Sopenharmony_ci        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
71bf215546Sopenharmony_ci        auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
72bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
73bf215546Sopenharmony_ci        writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
74bf215546Sopenharmony_ci    }
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci    //; del b, c, d, e, f
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci    {
79bf215546Sopenharmony_ci        /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci        //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
82bf215546Sopenharmony_ci        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
83bf215546Sopenharmony_ci        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
84bf215546Sopenharmony_ci        //! p_unit_test 2, %e:s[2-3]
85bf215546Sopenharmony_ci        auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
86bf215546Sopenharmony_ci                                 Operand(v_in, reg_v0));
87bf215546Sopenharmony_ci        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm));
88bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
89bf215546Sopenharmony_ci        writeout(2, Operand(br, reg_s2));
90bf215546Sopenharmony_ci    }
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci    //; del b, c, d, e
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_ci    {
95bf215546Sopenharmony_ci        /* When the VCC isn't written by VOPC, don't optimize */
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_ci        //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
98bf215546Sopenharmony_ci        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
99bf215546Sopenharmony_ci        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
100bf215546Sopenharmony_ci        //! p_unit_test 2, %e:s[2-3]
101bf215546Sopenharmony_ci        auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc),
102bf215546Sopenharmony_ci                             Operand::c32(1u), Operand(reg_s4, bld.lm));
103bf215546Sopenharmony_ci        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm));
104bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
105bf215546Sopenharmony_ci        writeout(2, Operand(br, reg_s2));
106bf215546Sopenharmony_ci    }
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci    //; del b, c, d, e, f, x
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci    {
111bf215546Sopenharmony_ci        /* When EXEC is overwritten inbetween, don't optimize. */
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci        //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
114bf215546Sopenharmony_ci        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
115bf215546Sopenharmony_ci        //! s2: %f:exec = s_mov_b64 42
116bf215546Sopenharmony_ci        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
117bf215546Sopenharmony_ci        //! p_unit_test 4, %e:s[2-3], %f:exec
118bf215546Sopenharmony_ci        auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
119bf215546Sopenharmony_ci                             Operand(v_in, reg_v0));
120bf215546Sopenharmony_ci        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
121bf215546Sopenharmony_ci        auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
122bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
123bf215546Sopenharmony_ci        writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
124bf215546Sopenharmony_ci    }
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci    //; del b, c, d, e, f, x
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci    finish_optimizer_postRA_test();
129bf215546Sopenharmony_ciEND_TEST
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ciBEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
132bf215546Sopenharmony_ci    //>> s1: %a, s2: %y, s1: %z = p_startpgm
133bf215546Sopenharmony_ci    ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
134bf215546Sopenharmony_ci    assert(setup_ok);
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci    PhysReg reg_s0{0};
137bf215546Sopenharmony_ci    PhysReg reg_s1{1};
138bf215546Sopenharmony_ci    PhysReg reg_s2{2};
139bf215546Sopenharmony_ci    PhysReg reg_s3{3};
140bf215546Sopenharmony_ci    PhysReg reg_s4{4};
141bf215546Sopenharmony_ci    PhysReg reg_s6{6};
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci    Temp in_0 = inputs[0];
144bf215546Sopenharmony_ci    Temp in_1 = inputs[1];
145bf215546Sopenharmony_ci    Temp in_2 = inputs[2];
146bf215546Sopenharmony_ci    Operand op_in_0(in_0);
147bf215546Sopenharmony_ci    op_in_0.setFixed(reg_s0);
148bf215546Sopenharmony_ci    Operand op_in_1(in_1);
149bf215546Sopenharmony_ci    op_in_1.setFixed(reg_s4);
150bf215546Sopenharmony_ci    Operand op_in_2(in_2);
151bf215546Sopenharmony_ci    op_in_2.setFixed(reg_s6);
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci    {
154bf215546Sopenharmony_ci        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
155bf215546Sopenharmony_ci        //! s2: %f:vcc = p_cbranch_nz %e:scc
156bf215546Sopenharmony_ci        //! p_unit_test 0, %f:vcc
157bf215546Sopenharmony_ci        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
158bf215546Sopenharmony_ci                             Operand::c32(0x40018u));
159bf215546Sopenharmony_ci        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
160bf215546Sopenharmony_ci                             Operand::zero());
161bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
162bf215546Sopenharmony_ci        writeout(0, Operand(br, vcc));
163bf215546Sopenharmony_ci    }
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci    //; del d, e, f
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci    {
168bf215546Sopenharmony_ci        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
169bf215546Sopenharmony_ci        //! s2: %f:vcc = p_cbranch_z %e:scc
170bf215546Sopenharmony_ci        //! p_unit_test 1, %f:vcc
171bf215546Sopenharmony_ci        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
172bf215546Sopenharmony_ci                             Operand::c32(0x40018u));
173bf215546Sopenharmony_ci        auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
174bf215546Sopenharmony_ci                             Operand::zero());
175bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
176bf215546Sopenharmony_ci        writeout(1, Operand(br, vcc));
177bf215546Sopenharmony_ci    }
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci    //; del d, e, f
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci    {
182bf215546Sopenharmony_ci        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
183bf215546Sopenharmony_ci        //! s2: %f:vcc = p_cbranch_z %e:scc
184bf215546Sopenharmony_ci        //! p_unit_test 2, %f:vcc
185bf215546Sopenharmony_ci        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
186bf215546Sopenharmony_ci                             Operand::c32(0x40018u));
187bf215546Sopenharmony_ci        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
188bf215546Sopenharmony_ci                             Operand::zero());
189bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
190bf215546Sopenharmony_ci        writeout(2, Operand(br, vcc));
191bf215546Sopenharmony_ci    }
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci    //; del d, e, f
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci    {
196bf215546Sopenharmony_ci        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
197bf215546Sopenharmony_ci        //! s2: %f:vcc = p_cbranch_nz %e:scc
198bf215546Sopenharmony_ci        //! p_unit_test 3, %f:vcc
199bf215546Sopenharmony_ci        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
200bf215546Sopenharmony_ci                             Operand::c32(0x40018u));
201bf215546Sopenharmony_ci        auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
202bf215546Sopenharmony_ci                             Operand::zero());
203bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
204bf215546Sopenharmony_ci        writeout(3, Operand(br, vcc));
205bf215546Sopenharmony_ci    }
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci    //; del d, e, f
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci    {
210bf215546Sopenharmony_ci        //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
211bf215546Sopenharmony_ci        //! s2: %f:vcc = p_cbranch_z %e:scc
212bf215546Sopenharmony_ci        //! p_unit_test 4, %f:vcc
213bf215546Sopenharmony_ci        auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
214bf215546Sopenharmony_ci                             Operand::c32(0x12345u));
215bf215546Sopenharmony_ci        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
216bf215546Sopenharmony_ci                             Operand::zero(8));
217bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
218bf215546Sopenharmony_ci        writeout(4, Operand(br, vcc));
219bf215546Sopenharmony_ci    }
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci    //; del d, e, f
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci    {
224bf215546Sopenharmony_ci        /* SCC is overwritten in between, don't optimize */
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
227bf215546Sopenharmony_ci        //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
228bf215546Sopenharmony_ci        //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
229bf215546Sopenharmony_ci        //! s2: %f:vcc = p_cbranch_z %g:scc
230bf215546Sopenharmony_ci        //! p_unit_test 5, %f:vcc, %h:s[3]
231bf215546Sopenharmony_ci        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
232bf215546Sopenharmony_ci                             Operand::c32(0x40018u));
233bf215546Sopenharmony_ci        auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
234bf215546Sopenharmony_ci                             Operand::c32(1u));
235bf215546Sopenharmony_ci        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
236bf215546Sopenharmony_ci                             Operand::zero());
237bf215546Sopenharmony_ci        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
238bf215546Sopenharmony_ci        writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
239bf215546Sopenharmony_ci    }
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci    //; del d, e, f, g, h, x
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci    {
244bf215546Sopenharmony_ci        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
245bf215546Sopenharmony_ci        //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
246bf215546Sopenharmony_ci        //! p_unit_test 6, %f:s[4]
247bf215546Sopenharmony_ci        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
248bf215546Sopenharmony_ci                             Operand::c32(0x40018u));
249bf215546Sopenharmony_ci        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
250bf215546Sopenharmony_ci                             Operand::zero());
251bf215546Sopenharmony_ci        auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
252bf215546Sopenharmony_ci        writeout(6, Operand(br, reg_s4));
253bf215546Sopenharmony_ci    }
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci    //; del d, e, f
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ci    {
258bf215546Sopenharmony_ci        /* SCC is overwritten in between, don't optimize */
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ci        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
261bf215546Sopenharmony_ci        //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
262bf215546Sopenharmony_ci        //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
263bf215546Sopenharmony_ci        //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
264bf215546Sopenharmony_ci        //! p_unit_test 7, %f:s[4], %h:s[3]
265bf215546Sopenharmony_ci        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
266bf215546Sopenharmony_ci                             Operand::c32(0x40018u));
267bf215546Sopenharmony_ci        auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
268bf215546Sopenharmony_ci                             Operand::c32(1u));
269bf215546Sopenharmony_ci        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
270bf215546Sopenharmony_ci                             Operand::zero());
271bf215546Sopenharmony_ci        auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
272bf215546Sopenharmony_ci        writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
273bf215546Sopenharmony_ci    }
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci    //; del d, e, f, g, h, x
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci    finish_optimizer_postRA_test();
278bf215546Sopenharmony_ciEND_TEST
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ciBEGIN_TEST(optimizer_postRA.dpp)
281bf215546Sopenharmony_ci   //>> v1: %a:v[0], v1: %b:v[1], s2: %c:vcc, s2: %d:s[0-1] = p_startpgm
282bf215546Sopenharmony_ci   if (!setup_cs("v1 v1 s2 s2", GFX10_3))
283bf215546Sopenharmony_ci      return;
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   bld.instructions->at(0)->definitions[0].setFixed(PhysReg(256));
286bf215546Sopenharmony_ci   bld.instructions->at(0)->definitions[1].setFixed(PhysReg(257));
287bf215546Sopenharmony_ci   bld.instructions->at(0)->definitions[2].setFixed(vcc);
288bf215546Sopenharmony_ci   bld.instructions->at(0)->definitions[3].setFixed(PhysReg(0));
289bf215546Sopenharmony_ci
290bf215546Sopenharmony_ci   PhysReg reg_v0(256);
291bf215546Sopenharmony_ci   PhysReg reg_v2(258);
292bf215546Sopenharmony_ci   Operand a(inputs[0], PhysReg(256));
293bf215546Sopenharmony_ci   Operand b(inputs[1], PhysReg(257));
294bf215546Sopenharmony_ci   Operand c(inputs[2], vcc);
295bf215546Sopenharmony_ci   Operand d(inputs[3], PhysReg(0));
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci   /* basic optimization */
298bf215546Sopenharmony_ci   //! v1: %res0:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
299bf215546Sopenharmony_ci   //! p_unit_test 0, %res0:v[2]
300bf215546Sopenharmony_ci   Temp tmp0 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
301bf215546Sopenharmony_ci   Temp res0 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp0, reg_v2), b);
302bf215546Sopenharmony_ci   writeout(0, Operand(res0, reg_v2));
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci   /* operand swapping */
305bf215546Sopenharmony_ci   //! v1: %res1:v[2] = v_subrev_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
306bf215546Sopenharmony_ci   //! p_unit_test 1, %res1:v[2]
307bf215546Sopenharmony_ci   Temp tmp1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
308bf215546Sopenharmony_ci   Temp res1 = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp1, reg_v2));
309bf215546Sopenharmony_ci   writeout(1, Operand(res1, reg_v2));
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_ci   //! v1: %tmp2:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
312bf215546Sopenharmony_ci   //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1
313bf215546Sopenharmony_ci   //! p_unit_test 2, %res2:v[2]
314bf215546Sopenharmony_ci   Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
315bf215546Sopenharmony_ci   Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror);
316bf215546Sopenharmony_ci   writeout(2, Operand(res2, reg_v2));
317bf215546Sopenharmony_ci
318bf215546Sopenharmony_ci   /* modifiers */
319bf215546Sopenharmony_ci   //! v1: %res3:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
320bf215546Sopenharmony_ci   //! p_unit_test 3, %res3:v[2]
321bf215546Sopenharmony_ci   auto tmp3 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
322bf215546Sopenharmony_ci   tmp3.instr->dpp16().neg[0] = true;
323bf215546Sopenharmony_ci   Temp res3 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp3, reg_v2), b);
324bf215546Sopenharmony_ci   writeout(3, Operand(res3, reg_v2));
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci   //! v1: %res4:v[2] = v_add_f32 -%a:v[0], %b:v[1] row_mirror bound_ctrl:1
327bf215546Sopenharmony_ci   //! p_unit_test 4, %res4:v[2]
328bf215546Sopenharmony_ci   Temp tmp4 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
329bf215546Sopenharmony_ci   auto res4 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp4, reg_v2), b);
330bf215546Sopenharmony_ci   res4.instr->vop3().neg[0] = true;
331bf215546Sopenharmony_ci   writeout(4, Operand(res4, reg_v2));
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci   //! v1: %tmp5:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
334bf215546Sopenharmony_ci   //! v1: %res5:v[2] = v_add_f32 %tmp5:v[2], %b:v[1] clamp
335bf215546Sopenharmony_ci   //! p_unit_test 5, %res5:v[2]
336bf215546Sopenharmony_ci   Temp tmp5 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
337bf215546Sopenharmony_ci   auto res5 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp5, reg_v2), b);
338bf215546Sopenharmony_ci   res5.instr->vop3().clamp = true;
339bf215546Sopenharmony_ci   writeout(5, Operand(res5, reg_v2));
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_ci   //! v1: %res6:v[2] = v_add_f32 |%a:v[0]|, %b:v[1] row_mirror bound_ctrl:1
342bf215546Sopenharmony_ci   //! p_unit_test 6, %res6:v[2]
343bf215546Sopenharmony_ci   auto tmp6 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
344bf215546Sopenharmony_ci   tmp6.instr->dpp16().neg[0] = true;
345bf215546Sopenharmony_ci   auto res6 = bld.vop2_e64(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp6, reg_v2), b);
346bf215546Sopenharmony_ci   res6.instr->vop3().abs[0] = true;
347bf215546Sopenharmony_ci   writeout(6, Operand(res6, reg_v2));
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   //! v1: %res7:v[2] = v_subrev_f32 %a:v[0], |%b:v[1]| row_mirror bound_ctrl:1
350bf215546Sopenharmony_ci   //! p_unit_test 7, %res7:v[2]
351bf215546Sopenharmony_ci   Temp tmp7 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
352bf215546Sopenharmony_ci   auto res7 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp7, reg_v2));
353bf215546Sopenharmony_ci   res7.instr->vop3().abs[0] = true;
354bf215546Sopenharmony_ci   writeout(7, Operand(res7, reg_v2));
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   /* vcc */
357bf215546Sopenharmony_ci   //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
358bf215546Sopenharmony_ci   //! p_unit_test 8, %res8:v[2]
359bf215546Sopenharmony_ci   Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
360bf215546Sopenharmony_ci   Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
361bf215546Sopenharmony_ci   writeout(8, Operand(res8, reg_v2));
362bf215546Sopenharmony_ci
363bf215546Sopenharmony_ci   //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
364bf215546Sopenharmony_ci   //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
365bf215546Sopenharmony_ci   //! p_unit_test 9, %res9:v[2]
366bf215546Sopenharmony_ci   Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
367bf215546Sopenharmony_ci   Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
368bf215546Sopenharmony_ci   writeout(9, Operand(res9, reg_v2));
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_ci   /* control flow */
371bf215546Sopenharmony_ci   //! BB1
372bf215546Sopenharmony_ci   //! /* logical preds: BB0, / linear preds: BB0, / kind: uniform, */
373bf215546Sopenharmony_ci   //! v1: %res10:v[2] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
374bf215546Sopenharmony_ci   //! p_unit_test 10, %res10:v[2]
375bf215546Sopenharmony_ci   Temp tmp10 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci   bld.reset(program->create_and_insert_block());
378bf215546Sopenharmony_ci   program->blocks[0].linear_succs.push_back(1);
379bf215546Sopenharmony_ci   program->blocks[0].logical_succs.push_back(1);
380bf215546Sopenharmony_ci   program->blocks[1].linear_preds.push_back(0);
381bf215546Sopenharmony_ci   program->blocks[1].logical_preds.push_back(0);
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   Temp res10 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp10, reg_v2), b);
384bf215546Sopenharmony_ci   writeout(10, Operand(res10, reg_v2));
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci   /* can't combine if the v_mov_b32's operand is modified */
387bf215546Sopenharmony_ci   //! v1: %tmp11_1:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
388bf215546Sopenharmony_ci   //! v1: %tmp11_2:v[0] = v_mov_b32 0
389bf215546Sopenharmony_ci   //! v1: %res11:v[2] = v_add_f32 %tmp11_1:v[2], %b:v[1]
390bf215546Sopenharmony_ci   //! p_unit_test 11, %res11_1:v[2], %tmp11_2:v[0]
391bf215546Sopenharmony_ci   Temp tmp11_1 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
392bf215546Sopenharmony_ci   Temp tmp11_2 = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1, reg_v0), Operand::c32(0));
393bf215546Sopenharmony_ci   Temp res11 = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v2), Operand(tmp11_1, reg_v2), b);
394bf215546Sopenharmony_ci   writeout(11, Operand(res11, reg_v2), Operand(tmp11_2, reg_v0));
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci   finish_optimizer_postRA_test();
397bf215546Sopenharmony_ciEND_TEST
398bf215546Sopenharmony_ci
399