1/*
2 * Copyright © 2020 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24#include "helpers.h"
25
26using namespace aco;
27
28BEGIN_TEST(assembler.s_memtime)
29   for (unsigned i = GFX6; i <= GFX10; i++) {
30      if (!setup_cs(NULL, (amd_gfx_level)i))
31         continue;
32
33      //~gfx[6-7]>> c7800000
34      //~gfx[6-7]!  bf810000
35      //~gfx[8-9]>> s_memtime s[0:1] ; c0900000 00000000
36      //~gfx10>> s_memtime s[0:1] ; f4900000 fa000000
37      bld.smem(aco_opcode::s_memtime, bld.def(s2)).def(0).setFixed(PhysReg{0});
38
39      finish_assembler_test();
40   }
41END_TEST
42
43BEGIN_TEST(assembler.branch_3f)
44   if (!setup_cs(NULL, (amd_gfx_level)GFX10))
45      return;
46
47   //! BB0:
48   //! s_branch BB1                                                ; bf820040
49   //! s_nop 0                                                     ; bf800000
50   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
51
52   for (unsigned i = 0; i < 0x3f; i++)
53      bld.vop1(aco_opcode::v_nop);
54
55   bld.reset(program->create_and_insert_block());
56
57   program->blocks[1].linear_preds.push_back(0u);
58
59   finish_assembler_test();
60END_TEST
61
62BEGIN_TEST(assembler.long_jump.unconditional_forwards)
63   if (!setup_cs(NULL, (amd_gfx_level)GFX10))
64      return;
65
66   //!BB0:
67   //! s_getpc_b64 s[0:1]                                          ; be801f00
68   //! s_addc_u32 s0, s0, 0x20014                                  ; 8200ff00 00020014
69   //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
70   //! s_bitset0_b32 s0, 0                                         ; be801b80
71   //! s_setpc_b64 s[0:1]                                          ; be802000
72   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
73
74   bld.reset(program->create_and_insert_block());
75
76   //! s_nop 0                                                     ; bf800000
77   //!(then repeated 32767 times)
78   for (unsigned i = 0; i < INT16_MAX + 1; i++)
79      bld.sopp(aco_opcode::s_nop, -1, 0);
80
81   //! BB2:
82   //! s_endpgm                                                    ; bf810000
83   bld.reset(program->create_and_insert_block());
84
85   program->blocks[2].linear_preds.push_back(0u);
86   program->blocks[2].linear_preds.push_back(1u);
87
88   finish_assembler_test();
89END_TEST
90
91BEGIN_TEST(assembler.long_jump.conditional_forwards)
92   if (!setup_cs(NULL, (amd_gfx_level)GFX10))
93      return;
94
95   //! BB0:
96   //! s_cbranch_scc1 BB1                                          ; bf850006
97   //! s_getpc_b64 s[0:1]                                          ; be801f00
98   //! s_addc_u32 s0, s0, 0x20014                                  ; 8200ff00 00020014
99   //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
100   //! s_bitset0_b32 s0, 0                                         ; be801b80
101   //! s_setpc_b64 s[0:1]                                          ; be802000
102   bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2);
103
104   bld.reset(program->create_and_insert_block());
105
106   //! BB1:
107   //! s_nop 0 ; bf800000
108   //!(then repeated 32767 times)
109   for (unsigned i = 0; i < INT16_MAX + 1; i++)
110      bld.sopp(aco_opcode::s_nop, -1, 0);
111
112   //! BB2:
113   //! s_endpgm                                                    ; bf810000
114   bld.reset(program->create_and_insert_block());
115
116   program->blocks[1].linear_preds.push_back(0u);
117   program->blocks[2].linear_preds.push_back(0u);
118   program->blocks[2].linear_preds.push_back(1u);
119
120   finish_assembler_test();
121END_TEST
122
123BEGIN_TEST(assembler.long_jump.unconditional_backwards)
124   if (!setup_cs(NULL, (amd_gfx_level)GFX10))
125      return;
126
127   //!BB0:
128   //! s_nop 0                                                     ; bf800000
129   //!(then repeated 32767 times)
130   for (unsigned i = 0; i < INT16_MAX + 1; i++)
131      bld.sopp(aco_opcode::s_nop, -1, 0);
132
133   //! s_getpc_b64 s[0:1]                                          ; be801f00
134   //! s_addc_u32 s0, s0, 0xfffdfffc                               ; 8200ff00 fffdfffc
135   //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
136   //! s_bitset0_b32 s0, 0                                         ; be801b80
137   //! s_setpc_b64 s[0:1]                                          ; be802000
138   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 0);
139
140   //! BB1:
141   //! s_endpgm                                                    ; bf810000
142   bld.reset(program->create_and_insert_block());
143
144   program->blocks[0].linear_preds.push_back(0u);
145   program->blocks[1].linear_preds.push_back(0u);
146
147   finish_assembler_test();
148END_TEST
149
150BEGIN_TEST(assembler.long_jump.conditional_backwards)
151   if (!setup_cs(NULL, (amd_gfx_level)GFX10))
152      return;
153
154   //!BB0:
155   //! s_nop 0                                                     ; bf800000
156   //!(then repeated 32767 times)
157   for (unsigned i = 0; i < INT16_MAX + 1; i++)
158      bld.sopp(aco_opcode::s_nop, -1, 0);
159
160   //! s_cbranch_execz BB1                                         ; bf880006
161   //! s_getpc_b64 s[0:1]                                          ; be801f00
162   //! s_addc_u32 s0, s0, 0xfffdfff8                               ; 8200ff00 fffdfff8
163   //! s_bitcmp1_b32 s0, 0                                         ; bf0d8000
164   //! s_bitset0_b32 s0, 0                                         ; be801b80
165   //! s_setpc_b64 s[0:1]                                          ; be802000
166   bld.sopp(aco_opcode::s_cbranch_execnz, Definition(PhysReg(0), s2), 0);
167
168   //! BB1:
169   //! s_endpgm                                                    ; bf810000
170   bld.reset(program->create_and_insert_block());
171
172   program->blocks[0].linear_preds.push_back(0u);
173   program->blocks[1].linear_preds.push_back(0u);
174
175   finish_assembler_test();
176END_TEST
177
178BEGIN_TEST(assembler.long_jump.3f)
179   if (!setup_cs(NULL, (amd_gfx_level)GFX10))
180      return;
181
182   //! BB0:
183   //! s_branch BB1                                                ; bf820040
184   //! s_nop 0                                                     ; bf800000
185   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1);
186
187   for (unsigned i = 0; i < 0x3f - 6; i++) // a unconditional long jump is 6 dwords
188      bld.vop1(aco_opcode::v_nop);
189   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
190
191   bld.reset(program->create_and_insert_block());
192   for (unsigned i = 0; i < INT16_MAX + 1; i++)
193      bld.vop1(aco_opcode::v_nop);
194   bld.reset(program->create_and_insert_block());
195
196   program->blocks[1].linear_preds.push_back(0u);
197   program->blocks[2].linear_preds.push_back(0u);
198   program->blocks[2].linear_preds.push_back(1u);
199
200   finish_assembler_test();
201END_TEST
202
203BEGIN_TEST(assembler.long_jump.constaddr)
204   if (!setup_cs(NULL, (amd_gfx_level)GFX10))
205      return;
206
207   //>> s_getpc_b64 s[0:1]                                          ; be801f00
208   bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2);
209
210   bld.reset(program->create_and_insert_block());
211
212   for (unsigned i = 0; i < INT16_MAX + 1; i++)
213      bld.sopp(aco_opcode::s_nop, -1, 0);
214
215   bld.reset(program->create_and_insert_block());
216
217   //>> s_getpc_b64 s[0:1]                                          ; be801f00
218   //! s_add_u32 s0, s0, 0xe4                                      ; 8000ff00 000000e4
219   bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero());
220   bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc),
221            Operand(PhysReg(0), s1), Operand::zero(), Operand::zero());
222
223   program->blocks[2].linear_preds.push_back(0u);
224   program->blocks[2].linear_preds.push_back(1u);
225
226   finish_assembler_test();
227END_TEST
228
229BEGIN_TEST(assembler.v_add3)
230   for (unsigned i = GFX9; i <= GFX10; i++) {
231      if (!setup_cs(NULL, (amd_gfx_level)i))
232         continue;
233
234      //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
235      //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
236      aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
237      add3->operands[0] = Operand::zero();
238      add3->operands[1] = Operand::zero();
239      add3->operands[2] = Operand::zero();
240      add3->definitions[0] = Definition(PhysReg(0), v1);
241      bld.insert(std::move(add3));
242
243      finish_assembler_test();
244   }
245END_TEST
246
247BEGIN_TEST(assembler.v_add3_clamp)
248   for (unsigned i = GFX9; i <= GFX10; i++) {
249      if (!setup_cs(NULL, (amd_gfx_level)i))
250         continue;
251
252      //~gfx9>> integer addition + clamp ; d1ff8000 02010080
253      //~gfx10>> integer addition + clamp ; d76d8000 02010080
254      aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
255      add3->operands[0] = Operand::zero();
256      add3->operands[1] = Operand::zero();
257      add3->operands[2] = Operand::zero();
258      add3->definitions[0] = Definition(PhysReg(0), v1);
259      add3->clamp = 1;
260      bld.insert(std::move(add3));
261
262      finish_assembler_test();
263   }
264END_TEST
265
266BEGIN_TEST(assembler.smem_offset)
267   for (unsigned i = GFX9; i <= GFX10; i++) {
268      if (!setup_cs(NULL, (amd_gfx_level)i))
269         continue;
270
271      Definition dst(PhysReg(7), s1);
272      Operand sbase(PhysReg(6), s2);
273      Operand offset(PhysReg(5), s1);
274
275      //~gfx9>> s_load_dword s7, s[6:7], s5 ; c00001c3 00000005
276      //~gfx10>> s_load_dword s7, s[6:7], s5 ; f40001c3 0a000000
277      bld.smem(aco_opcode::s_load_dword, dst, sbase, offset);
278      //~gfx9! s_load_dword s7, s[6:7], 0x42 ; c00201c3 00000042
279      //~gfx10! s_load_dword s7, s[6:7], 0x42 ; f40001c3 fa000042
280      bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42));
281      if (i >= GFX9) {
282         //~gfx9! s_load_dword s7, s[6:7], s5 offset:0x42 ; c00241c3 0a000042
283         //~gfx10! s_load_dword s7, s[6:7], s5 offset:0x42 ; f40001c3 0a000042
284         bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42), offset);
285      }
286
287      finish_assembler_test();
288   }
289END_TEST
290
291BEGIN_TEST(assembler.p_constaddr)
292   if (!setup_cs(NULL, GFX9))
293      return;
294
295   Definition dst0 = bld.def(s2);
296   Definition dst1 = bld.def(s2);
297   dst0.setFixed(PhysReg(0));
298   dst1.setFixed(PhysReg(2));
299
300   //>> s_getpc_b64 s[0:1] ; be801c00
301   //! s_add_u32 s0, s0, 24 ; 8000ff00 00000018
302   bld.pseudo(aco_opcode::p_constaddr, dst0, Operand::zero());
303
304   //! s_getpc_b64 s[2:3] ; be821c00
305   //! s_add_u32 s2, s2, 44 ; 8002ff02 0000002c
306   bld.pseudo(aco_opcode::p_constaddr, dst1, Operand::c32(32));
307
308   aco::lower_to_hw_instr(program.get());
309   finish_assembler_test();
310END_TEST
311