1/* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24#include "helpers.h" 25 26using namespace aco; 27 28BEGIN_TEST(assembler.s_memtime) 29 for (unsigned i = GFX6; i <= GFX10; i++) { 30 if (!setup_cs(NULL, (amd_gfx_level)i)) 31 continue; 32 33 //~gfx[6-7]>> c7800000 34 //~gfx[6-7]! bf810000 35 //~gfx[8-9]>> s_memtime s[0:1] ; c0900000 00000000 36 //~gfx10>> s_memtime s[0:1] ; f4900000 fa000000 37 bld.smem(aco_opcode::s_memtime, bld.def(s2)).def(0).setFixed(PhysReg{0}); 38 39 finish_assembler_test(); 40 } 41END_TEST 42 43BEGIN_TEST(assembler.branch_3f) 44 if (!setup_cs(NULL, (amd_gfx_level)GFX10)) 45 return; 46 47 //! BB0: 48 //! s_branch BB1 ; bf820040 49 //! s_nop 0 ; bf800000 50 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1); 51 52 for (unsigned i = 0; i < 0x3f; i++) 53 bld.vop1(aco_opcode::v_nop); 54 55 bld.reset(program->create_and_insert_block()); 56 57 program->blocks[1].linear_preds.push_back(0u); 58 59 finish_assembler_test(); 60END_TEST 61 62BEGIN_TEST(assembler.long_jump.unconditional_forwards) 63 if (!setup_cs(NULL, (amd_gfx_level)GFX10)) 64 return; 65 66 //!BB0: 67 //! s_getpc_b64 s[0:1] ; be801f00 68 //! s_addc_u32 s0, s0, 0x20014 ; 8200ff00 00020014 69 //! s_bitcmp1_b32 s0, 0 ; bf0d8000 70 //! s_bitset0_b32 s0, 0 ; be801b80 71 //! s_setpc_b64 s[0:1] ; be802000 72 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 73 74 bld.reset(program->create_and_insert_block()); 75 76 //! s_nop 0 ; bf800000 77 //!(then repeated 32767 times) 78 for (unsigned i = 0; i < INT16_MAX + 1; i++) 79 bld.sopp(aco_opcode::s_nop, -1, 0); 80 81 //! BB2: 82 //! s_endpgm ; bf810000 83 bld.reset(program->create_and_insert_block()); 84 85 program->blocks[2].linear_preds.push_back(0u); 86 program->blocks[2].linear_preds.push_back(1u); 87 88 finish_assembler_test(); 89END_TEST 90 91BEGIN_TEST(assembler.long_jump.conditional_forwards) 92 if (!setup_cs(NULL, (amd_gfx_level)GFX10)) 93 return; 94 95 //! BB0: 96 //! s_cbranch_scc1 BB1 ; bf850006 97 //! s_getpc_b64 s[0:1] ; be801f00 98 //! s_addc_u32 s0, s0, 0x20014 ; 8200ff00 00020014 99 //! s_bitcmp1_b32 s0, 0 ; bf0d8000 100 //! s_bitset0_b32 s0, 0 ; be801b80 101 //! s_setpc_b64 s[0:1] ; be802000 102 bld.sopp(aco_opcode::s_cbranch_scc0, Definition(PhysReg(0), s2), 2); 103 104 bld.reset(program->create_and_insert_block()); 105 106 //! BB1: 107 //! s_nop 0 ; bf800000 108 //!(then repeated 32767 times) 109 for (unsigned i = 0; i < INT16_MAX + 1; i++) 110 bld.sopp(aco_opcode::s_nop, -1, 0); 111 112 //! BB2: 113 //! s_endpgm ; bf810000 114 bld.reset(program->create_and_insert_block()); 115 116 program->blocks[1].linear_preds.push_back(0u); 117 program->blocks[2].linear_preds.push_back(0u); 118 program->blocks[2].linear_preds.push_back(1u); 119 120 finish_assembler_test(); 121END_TEST 122 123BEGIN_TEST(assembler.long_jump.unconditional_backwards) 124 if (!setup_cs(NULL, (amd_gfx_level)GFX10)) 125 return; 126 127 //!BB0: 128 //! s_nop 0 ; bf800000 129 //!(then repeated 32767 times) 130 for (unsigned i = 0; i < INT16_MAX + 1; i++) 131 bld.sopp(aco_opcode::s_nop, -1, 0); 132 133 //! s_getpc_b64 s[0:1] ; be801f00 134 //! s_addc_u32 s0, s0, 0xfffdfffc ; 8200ff00 fffdfffc 135 //! s_bitcmp1_b32 s0, 0 ; bf0d8000 136 //! s_bitset0_b32 s0, 0 ; be801b80 137 //! s_setpc_b64 s[0:1] ; be802000 138 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 0); 139 140 //! BB1: 141 //! s_endpgm ; bf810000 142 bld.reset(program->create_and_insert_block()); 143 144 program->blocks[0].linear_preds.push_back(0u); 145 program->blocks[1].linear_preds.push_back(0u); 146 147 finish_assembler_test(); 148END_TEST 149 150BEGIN_TEST(assembler.long_jump.conditional_backwards) 151 if (!setup_cs(NULL, (amd_gfx_level)GFX10)) 152 return; 153 154 //!BB0: 155 //! s_nop 0 ; bf800000 156 //!(then repeated 32767 times) 157 for (unsigned i = 0; i < INT16_MAX + 1; i++) 158 bld.sopp(aco_opcode::s_nop, -1, 0); 159 160 //! s_cbranch_execz BB1 ; bf880006 161 //! s_getpc_b64 s[0:1] ; be801f00 162 //! s_addc_u32 s0, s0, 0xfffdfff8 ; 8200ff00 fffdfff8 163 //! s_bitcmp1_b32 s0, 0 ; bf0d8000 164 //! s_bitset0_b32 s0, 0 ; be801b80 165 //! s_setpc_b64 s[0:1] ; be802000 166 bld.sopp(aco_opcode::s_cbranch_execnz, Definition(PhysReg(0), s2), 0); 167 168 //! BB1: 169 //! s_endpgm ; bf810000 170 bld.reset(program->create_and_insert_block()); 171 172 program->blocks[0].linear_preds.push_back(0u); 173 program->blocks[1].linear_preds.push_back(0u); 174 175 finish_assembler_test(); 176END_TEST 177 178BEGIN_TEST(assembler.long_jump.3f) 179 if (!setup_cs(NULL, (amd_gfx_level)GFX10)) 180 return; 181 182 //! BB0: 183 //! s_branch BB1 ; bf820040 184 //! s_nop 0 ; bf800000 185 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 1); 186 187 for (unsigned i = 0; i < 0x3f - 6; i++) // a unconditional long jump is 6 dwords 188 bld.vop1(aco_opcode::v_nop); 189 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 190 191 bld.reset(program->create_and_insert_block()); 192 for (unsigned i = 0; i < INT16_MAX + 1; i++) 193 bld.vop1(aco_opcode::v_nop); 194 bld.reset(program->create_and_insert_block()); 195 196 program->blocks[1].linear_preds.push_back(0u); 197 program->blocks[2].linear_preds.push_back(0u); 198 program->blocks[2].linear_preds.push_back(1u); 199 200 finish_assembler_test(); 201END_TEST 202 203BEGIN_TEST(assembler.long_jump.constaddr) 204 if (!setup_cs(NULL, (amd_gfx_level)GFX10)) 205 return; 206 207 //>> s_getpc_b64 s[0:1] ; be801f00 208 bld.sopp(aco_opcode::s_branch, Definition(PhysReg(0), s2), 2); 209 210 bld.reset(program->create_and_insert_block()); 211 212 for (unsigned i = 0; i < INT16_MAX + 1; i++) 213 bld.sopp(aco_opcode::s_nop, -1, 0); 214 215 bld.reset(program->create_and_insert_block()); 216 217 //>> s_getpc_b64 s[0:1] ; be801f00 218 //! s_add_u32 s0, s0, 0xe4 ; 8000ff00 000000e4 219 bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero()); 220 bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc), 221 Operand(PhysReg(0), s1), Operand::zero(), Operand::zero()); 222 223 program->blocks[2].linear_preds.push_back(0u); 224 program->blocks[2].linear_preds.push_back(1u); 225 226 finish_assembler_test(); 227END_TEST 228 229BEGIN_TEST(assembler.v_add3) 230 for (unsigned i = GFX9; i <= GFX10; i++) { 231 if (!setup_cs(NULL, (amd_gfx_level)i)) 232 continue; 233 234 //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080 235 //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080 236 aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; 237 add3->operands[0] = Operand::zero(); 238 add3->operands[1] = Operand::zero(); 239 add3->operands[2] = Operand::zero(); 240 add3->definitions[0] = Definition(PhysReg(0), v1); 241 bld.insert(std::move(add3)); 242 243 finish_assembler_test(); 244 } 245END_TEST 246 247BEGIN_TEST(assembler.v_add3_clamp) 248 for (unsigned i = GFX9; i <= GFX10; i++) { 249 if (!setup_cs(NULL, (amd_gfx_level)i)) 250 continue; 251 252 //~gfx9>> integer addition + clamp ; d1ff8000 02010080 253 //~gfx10>> integer addition + clamp ; d76d8000 02010080 254 aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; 255 add3->operands[0] = Operand::zero(); 256 add3->operands[1] = Operand::zero(); 257 add3->operands[2] = Operand::zero(); 258 add3->definitions[0] = Definition(PhysReg(0), v1); 259 add3->clamp = 1; 260 bld.insert(std::move(add3)); 261 262 finish_assembler_test(); 263 } 264END_TEST 265 266BEGIN_TEST(assembler.smem_offset) 267 for (unsigned i = GFX9; i <= GFX10; i++) { 268 if (!setup_cs(NULL, (amd_gfx_level)i)) 269 continue; 270 271 Definition dst(PhysReg(7), s1); 272 Operand sbase(PhysReg(6), s2); 273 Operand offset(PhysReg(5), s1); 274 275 //~gfx9>> s_load_dword s7, s[6:7], s5 ; c00001c3 00000005 276 //~gfx10>> s_load_dword s7, s[6:7], s5 ; f40001c3 0a000000 277 bld.smem(aco_opcode::s_load_dword, dst, sbase, offset); 278 //~gfx9! s_load_dword s7, s[6:7], 0x42 ; c00201c3 00000042 279 //~gfx10! s_load_dword s7, s[6:7], 0x42 ; f40001c3 fa000042 280 bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42)); 281 if (i >= GFX9) { 282 //~gfx9! s_load_dword s7, s[6:7], s5 offset:0x42 ; c00241c3 0a000042 283 //~gfx10! s_load_dword s7, s[6:7], s5 offset:0x42 ; f40001c3 0a000042 284 bld.smem(aco_opcode::s_load_dword, dst, sbase, Operand::c32(0x42), offset); 285 } 286 287 finish_assembler_test(); 288 } 289END_TEST 290 291BEGIN_TEST(assembler.p_constaddr) 292 if (!setup_cs(NULL, GFX9)) 293 return; 294 295 Definition dst0 = bld.def(s2); 296 Definition dst1 = bld.def(s2); 297 dst0.setFixed(PhysReg(0)); 298 dst1.setFixed(PhysReg(2)); 299 300 //>> s_getpc_b64 s[0:1] ; be801c00 301 //! s_add_u32 s0, s0, 24 ; 8000ff00 00000018 302 bld.pseudo(aco_opcode::p_constaddr, dst0, Operand::zero()); 303 304 //! s_getpc_b64 s[2:3] ; be821c00 305 //! s_add_u32 s2, s2, 44 ; 8002ff02 0000002c 306 bld.pseudo(aco_opcode::p_constaddr, dst1, Operand::c32(32)); 307 308 aco::lower_to_hw_instr(program.get()); 309 finish_assembler_test(); 310END_TEST 311