1/*
2 * Copyright © 2020 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24#include "helpers.h"
25
26using namespace aco;
27
28BEGIN_TEST(builder.v_mul_imm)
29   for (unsigned i = GFX8; i <= GFX10; i++) {
30      //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
31      if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i))
32         continue;
33
34      /* simple optimizations */
35
36      //! p_unit_test 0, 0
37      writeout(0, bld.v_mul_imm(bld.def(v1), inputs[0], 0));
38
39      //! p_unit_test 1, %a
40      writeout(1, bld.v_mul_imm(bld.def(v1), inputs[0], 1));
41
42      //! v1: %res2 = v_lshlrev_b32 2, %a
43      //! p_unit_test 2, %res2
44      writeout(2, bld.v_mul_imm(bld.def(v1), inputs[0], 4));
45
46      //! v1: %res3 = v_lshlrev_b32 31, %a
47      //! p_unit_test 3, %res3
48      writeout(3, bld.v_mul_imm(bld.def(v1), inputs[0], 2147483648u));
49
50      /* single lshl+add/sub */
51
52      //~gfx8! v1: %res4_tmp = v_lshlrev_b32 3, %a
53      //~gfx8! v1: %res4,  s2: %_ = v_add_co_u32 %res4_tmp, %a
54      //~gfx(9|10)! v1: %res4 = v_lshl_add_u32 %a, 3, %a
55      //! p_unit_test 4, %res4
56      writeout(4, bld.v_mul_imm(bld.def(v1), inputs[0], 9));
57
58      //~gfx[89]! v1: %res5_tmp = v_lshlrev_b32 3, %a
59      //~gfx8! v1: %res5,  s2: %_ = v_sub_co_u32 %res5_tmp, %a
60      //~gfx9! v1: %res5 = v_sub_u32 %res5_tmp, %a
61      //~gfx10! v1: %res5 = v_mul_lo_u32 7, %a
62      //! p_unit_test 5, %res5
63      writeout(5, bld.v_mul_imm(bld.def(v1), inputs[0], 7));
64
65      /* lshl+add optimization with literal */
66
67      //~gfx8! v1: %res6_tmp0 = v_lshlrev_b32 2, %a
68      //~gfx8! v1: %res6_tmp1 = v_lshlrev_b32 6, %a
69      //~gfx8! v1: %res6,  s2: %_ = v_add_co_u32 %res6_tmp1, %res6_tmp0
70      //~gfx9! v1: %res6_tmp = v_lshlrev_b32 2, %a
71      //~gfx9! v1: %res6 = v_lshl_add_u32 %a, 6, %res6_tmp
72      //~gfx10! v1: %res6 = v_mul_lo_u32 0x44, %a
73      //! p_unit_test 6, %res6
74      writeout(6, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64));
75
76      //~gfx8! s1: %res7_tmp = p_parallelcopy 0x144
77      //~gfx8! v1: %res7 = v_mul_lo_u32 %res7_tmp, %a
78      //~gfx9! v1: %res7_tmp0 = v_lshlrev_b32 2, %a
79      //~gfx9! v1: %res7_tmp1 = v_lshl_add_u32 %a, 6, %res7_tmp0
80      //~gfx9! v1: %res7 = v_lshl_add_u32 %a, 8, %res7_tmp1
81      //~gfx10! v1: %res7 = v_mul_lo_u32 0x144, %a
82      //! p_unit_test 7, %res7
83      writeout(7, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64 | 256));
84
85      //~gfx8! s1: %res8_tmp = p_parallelcopy 0x944
86      //~gfx8! v1: %res8 = v_mul_lo_u32 %res8_tmp, %a
87      //~gfx9! v1: %res8_tmp0 = v_lshlrev_b32 2, %a
88      //~gfx9! v1: %res8_tmp1 = v_lshl_add_u32 %a, 6, %res8_tmp0
89      //~gfx9! v1: %res8_tmp2 = v_lshl_add_u32 %a, 8, %res8_tmp1
90      //~gfx9! v1: %res8 = v_lshl_add_u32 %a, 11, %res8_tmp2
91      //~gfx10! v1: %res8 = v_mul_lo_u32 0x944, %a
92      //! p_unit_test 8, %res8
93      writeout(8, bld.v_mul_imm(bld.def(v1), inputs[0], 4 | 64 | 256 | 2048));
94
95      /* lshl+add optimization with inline constant */
96
97      //~gfx8! v1: %res9_tmp0 = v_lshlrev_b32 1, %a
98      //~gfx8! v1: %res9_tmp1 = v_lshlrev_b32 2, %a
99      //~gfx8! v1: %res9,  s2: %_ = v_add_co_u32 %res9_tmp1, %res9_tmp0
100      //~gfx9! v1: %res9_tmp0 = v_lshlrev_b32 1, %a
101      //~gfx9! v1: %res9 = v_lshl_add_u32 %a, 2, %res9_tmp0
102      //~gfx10! v1: %res9 = v_mul_lo_u32 6, %a
103      //! p_unit_test 9, %res9
104      writeout(9, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4));
105
106      //~gfx(8|10)! v1: %res10 = v_mul_lo_u32 14, %a
107      //~gfx9! v1: %res10_tmp0 = v_lshlrev_b32 1, %a
108      //~gfx9! v1: %res10_tmp1 = v_lshl_add_u32 %a, 2, %res10_tmp0
109      //~gfx9! v1: %res10 = v_lshl_add_u32 %a, 3, %res10_tmp1
110      //! p_unit_test 10, %res10
111      writeout(10, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4 | 8));
112
113      //! v1: %res11 = v_mul_lo_u32 30, %a
114      //! p_unit_test 11, %res11
115      writeout(11, bld.v_mul_imm(bld.def(v1), inputs[0], 2 | 4 | 8 | 16));
116
117      finish_opt_test();
118   }
119END_TEST
120