1/* 2 * Copyright © 2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <fcntl.h> 25#include <string.h> 26#include <xf86drm.h> 27 28#include <gtest/gtest.h> 29 30#include "c99_compat.h" 31#include "dev/intel_device_info.h" 32#include "drm-uapi/i915_drm.h" 33#include "genxml/gen_macros.h" 34#include "util/macros.h" 35 36class mi_builder_test; 37 38struct address { 39 uint32_t gem_handle; 40 uint32_t offset; 41}; 42 43#define __gen_address_type struct address 44#define __gen_user_data ::mi_builder_test 45 46uint64_t __gen_combine_address(mi_builder_test *test, void *location, 47 struct address addr, uint32_t delta); 48void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords); 49struct address __gen_get_batch_address(mi_builder_test *test, 50 void *location); 51 52struct address 53__gen_address_offset(address addr, uint64_t offset) 54{ 55 addr.offset += offset; 56 return addr; 57} 58 59#if GFX_VERx10 >= 75 60#define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */ 61#else 62#define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */ 63#endif 64#define MI_BUILDER_NUM_ALLOC_GPRS 15 65#define INPUT_DATA_OFFSET 0 66#define OUTPUT_DATA_OFFSET 2048 67 68#define __genxml_cmd_length(cmd) cmd ## _length 69#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias 70#define __genxml_cmd_header(cmd) cmd ## _header 71#define __genxml_cmd_pack(cmd) cmd ## _pack 72 73#include "genxml/genX_pack.h" 74#include "mi_builder.h" 75 76#define emit_cmd(cmd, name) \ 77 for (struct cmd name = { __genxml_cmd_header(cmd) }, \ 78 *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \ 79 __builtin_expect(_dst != NULL, 1); \ 80 __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL) 81 82#include <vector> 83 84class mi_builder_test : public ::testing::Test { 85public: 86 mi_builder_test(); 87 ~mi_builder_test(); 88 89 void SetUp(); 90 91 void *emit_dwords(int num_dwords); 92 void submit_batch(); 93 94 inline address in_addr(uint32_t offset) 95 { 96 address addr; 97 addr.gem_handle = data_bo_handle; 98 addr.offset = INPUT_DATA_OFFSET + offset; 99 return addr; 100 } 101 102 inline address out_addr(uint32_t offset) 103 { 104 address addr; 105 addr.gem_handle = data_bo_handle; 106 addr.offset = OUTPUT_DATA_OFFSET + offset; 107 return addr; 108 } 109 110 inline mi_value in_mem64(uint32_t offset) 111 { 112 return mi_mem64(in_addr(offset)); 113 } 114 115 inline mi_value in_mem32(uint32_t offset) 116 { 117 return mi_mem32(in_addr(offset)); 118 } 119 120 inline mi_value out_mem64(uint32_t offset) 121 { 122 return mi_mem64(out_addr(offset)); 123 } 124 125 inline mi_value out_mem32(uint32_t offset) 126 { 127 return mi_mem32(out_addr(offset)); 128 } 129 130 int fd; 131 int ctx_id; 132 intel_device_info devinfo; 133 134 uint32_t batch_bo_handle; 135#if GFX_VER >= 8 136 uint64_t batch_bo_addr; 137#endif 138 uint32_t batch_offset; 139 void *batch_map; 140 141#if GFX_VER < 8 142 std::vector<drm_i915_gem_relocation_entry> relocs; 143#endif 144 145 uint32_t data_bo_handle; 146#if GFX_VER >= 8 147 uint64_t data_bo_addr; 148#endif 149 void *data_map; 150 char *input; 151 char *output; 152 uint64_t canary; 153 154 mi_builder b; 155}; 156 157mi_builder_test::mi_builder_test() : 158 fd(-1) 159{ } 160 161mi_builder_test::~mi_builder_test() 162{ 163 close(fd); 164} 165 166// 1 MB of batch should be enough for anyone, right? 167#define BATCH_BO_SIZE (256 * 4096) 168#define DATA_BO_SIZE 4096 169 170void 171mi_builder_test::SetUp() 172{ 173 drmDevicePtr devices[8]; 174 int max_devices = drmGetDevices2(0, devices, 8); 175 176 int i; 177 for (i = 0; i < max_devices; i++) { 178 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER && 179 devices[i]->bustype == DRM_BUS_PCI && 180 devices[i]->deviceinfo.pci->vendor_id == 0x8086) { 181 fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC); 182 if (fd < 0) 183 continue; 184 185 /* We don't really need to do this when running on hardware because 186 * we can just pull it from the drmDevice. However, without doing 187 * this, intel_dump_gpu gets a bit of heartburn and we can't use the 188 * --device option with it. 189 */ 190 int device_id; 191 drm_i915_getparam getparam = drm_i915_getparam(); 192 getparam.param = I915_PARAM_CHIPSET_ID; 193 getparam.value = &device_id; 194 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, 195 (void *)&getparam), 0) << strerror(errno); 196 197 ASSERT_TRUE(intel_get_device_info_from_pci_id(device_id, &devinfo)); 198 if (devinfo.ver != GFX_VER || 199 (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) { 200 close(fd); 201 fd = -1; 202 continue; 203 } 204 205 206 /* Found a device! */ 207 break; 208 } 209 } 210 ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device"; 211 212 drm_i915_gem_context_create ctx_create = drm_i915_gem_context_create(); 213 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, 214 (void *)&ctx_create), 0) << strerror(errno); 215 ctx_id = ctx_create.ctx_id; 216 217 if (GFX_VER >= 8) { 218 /* On gfx8+, we require softpin */ 219 int has_softpin; 220 drm_i915_getparam getparam = drm_i915_getparam(); 221 getparam.param = I915_PARAM_HAS_EXEC_SOFTPIN; 222 getparam.value = &has_softpin; 223 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, 224 (void *)&getparam), 0) << strerror(errno); 225 ASSERT_TRUE(has_softpin); 226 } 227 228 // Create the batch buffer 229 drm_i915_gem_create gem_create = drm_i915_gem_create(); 230 gem_create.size = BATCH_BO_SIZE; 231 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, 232 (void *)&gem_create), 0) << strerror(errno); 233 batch_bo_handle = gem_create.handle; 234#if GFX_VER >= 8 235 batch_bo_addr = 0xffffffffdff70000ULL; 236#endif 237 238 drm_i915_gem_caching gem_caching = drm_i915_gem_caching(); 239 gem_caching.handle = batch_bo_handle; 240 gem_caching.caching = I915_CACHING_CACHED; 241 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING, 242 (void *)&gem_caching), 0) << strerror(errno); 243 244 drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap(); 245 gem_mmap.handle = batch_bo_handle; 246 gem_mmap.offset = 0; 247 gem_mmap.size = BATCH_BO_SIZE; 248 gem_mmap.flags = 0; 249 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP, 250 (void *)&gem_mmap), 0) << strerror(errno); 251 batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr; 252 253 // Start the batch at zero 254 batch_offset = 0; 255 256 // Create the data buffer 257 gem_create = drm_i915_gem_create(); 258 gem_create.size = DATA_BO_SIZE; 259 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, 260 (void *)&gem_create), 0) << strerror(errno); 261 data_bo_handle = gem_create.handle; 262#if GFX_VER >= 8 263 data_bo_addr = 0xffffffffefff0000ULL; 264#endif 265 266 gem_caching = drm_i915_gem_caching(); 267 gem_caching.handle = data_bo_handle; 268 gem_caching.caching = I915_CACHING_CACHED; 269 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING, 270 (void *)&gem_caching), 0) << strerror(errno); 271 272 gem_mmap = drm_i915_gem_mmap(); 273 gem_mmap.handle = data_bo_handle; 274 gem_mmap.offset = 0; 275 gem_mmap.size = DATA_BO_SIZE; 276 gem_mmap.flags = 0; 277 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP, 278 (void *)&gem_mmap), 0) << strerror(errno); 279 data_map = (void *)(uintptr_t)gem_mmap.addr_ptr; 280 input = (char *)data_map + INPUT_DATA_OFFSET; 281 output = (char *)data_map + OUTPUT_DATA_OFFSET; 282 283 // Fill the test data with garbage 284 memset(data_map, 139, DATA_BO_SIZE); 285 memset(&canary, 139, sizeof(canary)); 286 287 mi_builder_init(&b, &devinfo, this); 288} 289 290void * 291mi_builder_test::emit_dwords(int num_dwords) 292{ 293 void *ptr = (void *)((char *)batch_map + batch_offset); 294 batch_offset += num_dwords * 4; 295 assert(batch_offset < BATCH_BO_SIZE); 296 return ptr; 297} 298 299void 300mi_builder_test::submit_batch() 301{ 302 mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe); 303 304 // Round batch up to an even number of dwords. 305 if (batch_offset & 4) 306 mi_builder_emit(&b, GENX(MI_NOOP), noop); 307 308 drm_i915_gem_exec_object2 objects[2]; 309 memset(objects, 0, sizeof(objects)); 310 311 objects[0].handle = data_bo_handle; 312 objects[0].relocation_count = 0; 313 objects[0].relocs_ptr = 0; 314#if GFX_VER >= 8 /* On gfx8+, we pin everything */ 315 objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | 316 EXEC_OBJECT_PINNED | 317 EXEC_OBJECT_WRITE; 318 objects[0].offset = data_bo_addr; 319#else 320 objects[0].flags = EXEC_OBJECT_WRITE; 321 objects[0].offset = -1; 322#endif 323 324 objects[1].handle = batch_bo_handle; 325#if GFX_VER >= 8 /* On gfx8+, we don't use relocations */ 326 objects[1].relocation_count = 0; 327 objects[1].relocs_ptr = 0; 328 objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | 329 EXEC_OBJECT_PINNED; 330 objects[1].offset = batch_bo_addr; 331#else 332 objects[1].relocation_count = relocs.size(); 333 objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0]; 334 objects[1].flags = 0; 335 objects[1].offset = -1; 336#endif 337 338 drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2(); 339 execbuf.buffers_ptr = (uintptr_t)(void *)objects; 340 execbuf.buffer_count = 2; 341 execbuf.batch_start_offset = 0; 342 execbuf.batch_len = batch_offset; 343 execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER; 344 execbuf.rsvd1 = ctx_id; 345 346 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, 347 (void *)&execbuf), 0) << strerror(errno); 348 349 drm_i915_gem_wait gem_wait = drm_i915_gem_wait(); 350 gem_wait.bo_handle = batch_bo_handle; 351 gem_wait.timeout_ns = INT64_MAX; 352 ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT, 353 (void *)&gem_wait), 0) << strerror(errno); 354} 355 356uint64_t 357__gen_combine_address(mi_builder_test *test, void *location, 358 address addr, uint32_t delta) 359{ 360#if GFX_VER >= 8 361 uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ? 362 test->data_bo_addr : test->batch_bo_addr; 363 return addr_u64 + addr.offset + delta; 364#else 365 drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry(); 366 reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1; 367 reloc.delta = addr.offset + delta; 368 reloc.offset = (char *)location - (char *)test->batch_map; 369 reloc.presumed_offset = -1; 370 test->relocs.push_back(reloc); 371 372 return reloc.delta; 373#endif 374} 375 376void * 377__gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords) 378{ 379 return test->emit_dwords(num_dwords); 380} 381 382struct address 383__gen_get_batch_address(mi_builder_test *test, void *location) 384{ 385 assert(location >= test->batch_map); 386 size_t offset = (char *)location - (char *)test->batch_map; 387 assert(offset < BATCH_BO_SIZE); 388 assert(offset <= UINT32_MAX); 389 390 return (struct address) { 391 .gem_handle = test->batch_bo_handle, 392 .offset = (uint32_t)offset, 393 }; 394} 395 396#include "genxml/genX_pack.h" 397#include "mi_builder.h" 398 399TEST_F(mi_builder_test, imm_mem) 400{ 401 const uint64_t value = 0x0123456789abcdef; 402 403 mi_store(&b, out_mem64(0), mi_imm(value)); 404 mi_store(&b, out_mem32(8), mi_imm(value)); 405 406 submit_batch(); 407 408 // 64 -> 64 409 EXPECT_EQ(*(uint64_t *)(output + 0), value); 410 411 // 64 -> 32 412 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value); 413 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); 414} 415 416/* mem -> mem copies are only supported on HSW+ */ 417#if GFX_VERx10 >= 75 418TEST_F(mi_builder_test, mem_mem) 419{ 420 const uint64_t value = 0x0123456789abcdef; 421 *(uint64_t *)input = value; 422 423 mi_store(&b, out_mem64(0), in_mem64(0)); 424 mi_store(&b, out_mem32(8), in_mem64(0)); 425 mi_store(&b, out_mem32(16), in_mem32(0)); 426 mi_store(&b, out_mem64(24), in_mem32(0)); 427 428 submit_batch(); 429 430 // 64 -> 64 431 EXPECT_EQ(*(uint64_t *)(output + 0), value); 432 433 // 64 -> 32 434 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value); 435 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); 436 437 // 32 -> 32 438 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value); 439 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary); 440 441 // 32 -> 64 442 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value); 443} 444#endif 445 446TEST_F(mi_builder_test, imm_reg) 447{ 448 const uint64_t value = 0x0123456789abcdef; 449 450 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary)); 451 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value)); 452 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG)); 453 454 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary)); 455 mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value)); 456 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG)); 457 458 submit_batch(); 459 460 // 64 -> 64 461 EXPECT_EQ(*(uint64_t *)(output + 0), value); 462 463 // 64 -> 32 464 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value); 465 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); 466} 467 468TEST_F(mi_builder_test, mem_reg) 469{ 470 const uint64_t value = 0x0123456789abcdef; 471 *(uint64_t *)input = value; 472 473 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary)); 474 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0)); 475 mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG)); 476 477 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary)); 478 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0)); 479 mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG)); 480 481 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary)); 482 mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0)); 483 mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG)); 484 485 mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary)); 486 mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0)); 487 mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG)); 488 489 submit_batch(); 490 491 // 64 -> 64 492 EXPECT_EQ(*(uint64_t *)(output + 0), value); 493 494 // 64 -> 32 495 EXPECT_EQ(*(uint32_t *)(output + 8), (uint32_t)value); 496 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); 497 498 // 32 -> 32 499 EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value); 500 EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary); 501 502 // 32 -> 64 503 EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value); 504} 505 506TEST_F(mi_builder_test, memset) 507{ 508 const unsigned memset_size = 256; 509 510 mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size); 511 512 submit_batch(); 513 514 uint32_t *out_u32 = (uint32_t *)output; 515 for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++) 516 EXPECT_EQ(out_u32[i], 0xdeadbeef); 517} 518 519TEST_F(mi_builder_test, memcpy) 520{ 521 const unsigned memcpy_size = 256; 522 523 uint8_t *in_u8 = (uint8_t *)input; 524 for (unsigned i = 0; i < memcpy_size; i++) 525 in_u8[i] = i; 526 527 mi_memcpy(&b, out_addr(0), in_addr(0), 256); 528 529 submit_batch(); 530 531 uint8_t *out_u8 = (uint8_t *)output; 532 for (unsigned i = 0; i < memcpy_size; i++) 533 EXPECT_EQ(out_u8[i], i); 534} 535 536/* Start of MI_MATH section */ 537#if GFX_VERx10 >= 75 538 539#define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm)) 540 541TEST_F(mi_builder_test, inot) 542{ 543 const uint64_t value = 0x0123456789abcdef; 544 const uint32_t value_lo = (uint32_t)value; 545 const uint32_t value_hi = (uint32_t)(value >> 32); 546 memcpy(input, &value, sizeof(value)); 547 548 mi_store(&b, out_mem64(0), mi_inot(&b, in_mem64(0))); 549 mi_store(&b, out_mem64(8), mi_inot(&b, mi_inot(&b, in_mem64(0)))); 550 mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0))); 551 mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4))); 552 mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0))); 553 mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0))); 554 mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0)))); 555 mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4))); 556 557 submit_batch(); 558 559 EXPECT_EQ(*(uint64_t *)(output + 0), ~value); 560 EXPECT_EQ(*(uint64_t *)(output + 8), value); 561 EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo); 562 EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi); 563 EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value); 564 EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo); 565 EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo); 566 EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi); 567} 568 569/* Test adding of immediates of all kinds including 570 * 571 * - All zeroes 572 * - All ones 573 * - inverted constants 574 */ 575TEST_F(mi_builder_test, add_imm) 576{ 577 const uint64_t value = 0x0123456789abcdef; 578 const uint64_t add = 0xdeadbeefac0ffee2; 579 memcpy(input, &value, sizeof(value)); 580 581 mi_store(&b, out_mem64(0), 582 mi_iadd(&b, in_mem64(0), mi_imm(0))); 583 mi_store(&b, out_mem64(8), 584 mi_iadd(&b, in_mem64(0), mi_imm(-1))); 585 mi_store(&b, out_mem64(16), 586 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0)))); 587 mi_store(&b, out_mem64(24), 588 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1)))); 589 mi_store(&b, out_mem64(32), 590 mi_iadd(&b, in_mem64(0), mi_imm(add))); 591 mi_store(&b, out_mem64(40), 592 mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add)))); 593 mi_store(&b, out_mem64(48), 594 mi_iadd(&b, mi_imm(0), in_mem64(0))); 595 mi_store(&b, out_mem64(56), 596 mi_iadd(&b, mi_imm(-1), in_mem64(0))); 597 mi_store(&b, out_mem64(64), 598 mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0))); 599 mi_store(&b, out_mem64(72), 600 mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0))); 601 mi_store(&b, out_mem64(80), 602 mi_iadd(&b, mi_imm(add), in_mem64(0))); 603 mi_store(&b, out_mem64(88), 604 mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0))); 605 606 // And some add_imm just for good measure 607 mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0)); 608 mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add)); 609 610 submit_batch(); 611 612 EXPECT_EQ(*(uint64_t *)(output + 0), value); 613 EXPECT_EQ(*(uint64_t *)(output + 8), value - 1); 614 EXPECT_EQ(*(uint64_t *)(output + 16), value - 1); 615 EXPECT_EQ(*(uint64_t *)(output + 24), value); 616 EXPECT_EQ(*(uint64_t *)(output + 32), value + add); 617 EXPECT_EQ(*(uint64_t *)(output + 40), value + ~add); 618 EXPECT_EQ(*(uint64_t *)(output + 48), value); 619 EXPECT_EQ(*(uint64_t *)(output + 56), value - 1); 620 EXPECT_EQ(*(uint64_t *)(output + 64), value - 1); 621 EXPECT_EQ(*(uint64_t *)(output + 72), value); 622 EXPECT_EQ(*(uint64_t *)(output + 80), value + add); 623 EXPECT_EQ(*(uint64_t *)(output + 88), value + ~add); 624 EXPECT_EQ(*(uint64_t *)(output + 96), value); 625 EXPECT_EQ(*(uint64_t *)(output + 104), value + add); 626} 627 628TEST_F(mi_builder_test, ult_uge_ieq_ine) 629{ 630 uint64_t values[8] = { 631 0x0123456789abcdef, 632 0xdeadbeefac0ffee2, 633 (uint64_t)-1, 634 1, 635 0, 636 1049571, 637 (uint64_t)-240058, 638 20204184, 639 }; 640 memcpy(input, values, sizeof(values)); 641 642 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { 643 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) { 644 mi_store(&b, out_mem64(i * 256 + j * 32 + 0), 645 mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8))); 646 mi_store(&b, out_mem64(i * 256 + j * 32 + 8), 647 mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8))); 648 mi_store(&b, out_mem64(i * 256 + j * 32 + 16), 649 mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8))); 650 mi_store(&b, out_mem64(i * 256 + j * 32 + 24), 651 mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8))); 652 } 653 } 654 655 submit_batch(); 656 657 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { 658 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) { 659 uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32); 660 EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]), 661 mi_imm(values[j]))); 662 EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]), 663 mi_imm(values[j]))); 664 EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]), 665 mi_imm(values[j]))); 666 EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]), 667 mi_imm(values[j]))); 668 } 669 } 670} 671 672TEST_F(mi_builder_test, z_nz) 673{ 674 uint64_t values[8] = { 675 0, 676 1, 677 UINT32_MAX, 678 UINT32_MAX + 1, 679 UINT64_MAX, 680 }; 681 memcpy(input, values, sizeof(values)); 682 683 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { 684 mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8))); 685 mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8))); 686 } 687 688 submit_batch(); 689 690 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { 691 uint64_t *out_u64 = (uint64_t *)(output + i * 16); 692 EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i]))); 693 EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i]))); 694 } 695} 696 697TEST_F(mi_builder_test, iand) 698{ 699 const uint64_t values[2] = { 700 0x0123456789abcdef, 701 0xdeadbeefac0ffee2, 702 }; 703 memcpy(input, values, sizeof(values)); 704 705 mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8))); 706 707 submit_batch(); 708 709 EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]), 710 mi_imm(values[1]))); 711} 712 713#if GFX_VERx10 >= 125 714TEST_F(mi_builder_test, ishl) 715{ 716 const uint64_t value = 0x0123456789abcdef; 717 memcpy(input, &value, sizeof(value)); 718 719 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 }; 720 memcpy(input + 8, shifts, sizeof(shifts)); 721 722 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) { 723 mi_store(&b, out_mem64(i * 8), 724 mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4))); 725 } 726 727 submit_batch(); 728 729 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) { 730 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), 731 mi_ishl(&b, mi_imm(value), mi_imm(shifts[i]))); 732 } 733} 734 735TEST_F(mi_builder_test, ushr) 736{ 737 const uint64_t value = 0x0123456789abcdef; 738 memcpy(input, &value, sizeof(value)); 739 740 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 }; 741 memcpy(input + 8, shifts, sizeof(shifts)); 742 743 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) { 744 mi_store(&b, out_mem64(i * 8), 745 mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4))); 746 } 747 748 submit_batch(); 749 750 for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) { 751 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), 752 mi_ushr(&b, mi_imm(value), mi_imm(shifts[i]))); 753 } 754} 755 756TEST_F(mi_builder_test, ushr_imm) 757{ 758 const uint64_t value = 0x0123456789abcdef; 759 memcpy(input, &value, sizeof(value)); 760 761 const unsigned max_shift = 64; 762 763 for (unsigned i = 0; i <= max_shift; i++) 764 mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i)); 765 766 submit_batch(); 767 768 for (unsigned i = 0; i <= max_shift; i++) { 769 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), 770 mi_ushr_imm(&b, mi_imm(value), i)); 771 } 772} 773 774TEST_F(mi_builder_test, ishr) 775{ 776 const uint64_t values[] = { 777 0x0123456789abcdef, 778 0xfedcba9876543210, 779 }; 780 memcpy(input, values, sizeof(values)); 781 782 uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 }; 783 memcpy(input + 16, shifts, sizeof(shifts)); 784 785 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { 786 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) { 787 mi_store(&b, out_mem64(i * 8 + j * 16), 788 mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4))); 789 } 790 } 791 792 submit_batch(); 793 794 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { 795 for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) { 796 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16), 797 mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j]))); 798 } 799 } 800} 801 802TEST_F(mi_builder_test, ishr_imm) 803{ 804 const uint64_t value = 0x0123456789abcdef; 805 memcpy(input, &value, sizeof(value)); 806 807 const unsigned max_shift = 64; 808 809 for (unsigned i = 0; i <= max_shift; i++) 810 mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i)); 811 812 submit_batch(); 813 814 for (unsigned i = 0; i <= max_shift; i++) { 815 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), 816 mi_ishr_imm(&b, mi_imm(value), i)); 817 } 818} 819#endif /* if GFX_VERx10 >= 125 */ 820 821TEST_F(mi_builder_test, imul_imm) 822{ 823 uint64_t lhs[2] = { 824 0x0123456789abcdef, 825 0xdeadbeefac0ffee2, 826 }; 827 memcpy(input, lhs, sizeof(lhs)); 828 829 /* Some random 32-bit unsigned integers. The first four have been 830 * hand-chosen just to ensure some good low integers; the rest were 831 * generated with a python script. 832 */ 833 uint32_t rhs[20] = { 834 1, 2, 3, 5, 835 10800, 193, 64, 40, 836 3796, 256, 88, 473, 837 1421, 706, 175, 850, 838 39, 38985, 1941, 17, 839 }; 840 841 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) { 842 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) { 843 mi_store(&b, out_mem64(i * 160 + j * 8), 844 mi_imul_imm(&b, in_mem64(i * 8), rhs[j])); 845 } 846 } 847 848 submit_batch(); 849 850 for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) { 851 for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) { 852 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8), 853 mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j])); 854 } 855 } 856} 857 858TEST_F(mi_builder_test, ishl_imm) 859{ 860 const uint64_t value = 0x0123456789abcdef; 861 memcpy(input, &value, sizeof(value)); 862 863 const unsigned max_shift = 64; 864 865 for (unsigned i = 0; i <= max_shift; i++) 866 mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i)); 867 868 submit_batch(); 869 870 for (unsigned i = 0; i <= max_shift; i++) { 871 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), 872 mi_ishl_imm(&b, mi_imm(value), i)); 873 } 874} 875 876TEST_F(mi_builder_test, ushr32_imm) 877{ 878 const uint64_t value = 0x0123456789abcdef; 879 memcpy(input, &value, sizeof(value)); 880 881 const unsigned max_shift = 64; 882 883 for (unsigned i = 0; i <= max_shift; i++) 884 mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i)); 885 886 submit_batch(); 887 888 for (unsigned i = 0; i <= max_shift; i++) { 889 EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8), 890 mi_ushr32_imm(&b, mi_imm(value), i)); 891 } 892} 893 894TEST_F(mi_builder_test, udiv32_imm) 895{ 896 /* Some random 32-bit unsigned integers. The first four have been 897 * hand-chosen just to ensure some good low integers; the rest were 898 * generated with a python script. 899 */ 900 uint32_t values[20] = { 901 1, 2, 3, 5, 902 10800, 193, 64, 40, 903 3796, 256, 88, 473, 904 1421, 706, 175, 850, 905 39, 38985, 1941, 17, 906 }; 907 memcpy(input, values, sizeof(values)); 908 909 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { 910 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) { 911 mi_store(&b, out_mem32(i * 80 + j * 4), 912 mi_udiv32_imm(&b, in_mem32(i * 4), values[j])); 913 } 914 } 915 916 submit_batch(); 917 918 for (unsigned i = 0; i < ARRAY_SIZE(values); i++) { 919 for (unsigned j = 0; j < ARRAY_SIZE(values); j++) { 920 EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4), 921 mi_udiv32_imm(&b, mi_imm(values[i]), values[j])); 922 } 923 } 924} 925 926TEST_F(mi_builder_test, store_if) 927{ 928 uint64_t u64 = 0xb453b411deadc0deull; 929 uint32_t u32 = 0x1337d00d; 930 931 /* Write values with the predicate enabled */ 932 emit_cmd(GENX(MI_PREDICATE), mip) { 933 mip.LoadOperation = LOAD_LOAD; 934 mip.CombineOperation = COMBINE_SET; 935 mip.CompareOperation = COMPARE_TRUE; 936 } 937 938 mi_store_if(&b, out_mem64(0), mi_imm(u64)); 939 mi_store_if(&b, out_mem32(8), mi_imm(u32)); 940 941 /* Set predicate to false, write garbage that shouldn't land */ 942 emit_cmd(GENX(MI_PREDICATE), mip) { 943 mip.LoadOperation = LOAD_LOAD; 944 mip.CombineOperation = COMBINE_SET; 945 mip.CompareOperation = COMPARE_FALSE; 946 } 947 948 mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull)); 949 mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000)); 950 951 submit_batch(); 952 953 EXPECT_EQ(*(uint64_t *)(output + 0), u64); 954 EXPECT_EQ(*(uint32_t *)(output + 8), u32); 955 EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary); 956} 957 958#endif /* GFX_VERx10 >= 75 */ 959 960#if GFX_VERx10 >= 125 961 962/* 963 * Indirect load/store tests. Only available on XE_HP+ 964 */ 965 966TEST_F(mi_builder_test, load_mem64_offset) 967{ 968 uint64_t values[8] = { 969 0x0123456789abcdef, 970 0xdeadbeefac0ffee2, 971 (uint64_t)-1, 972 1, 973 0, 974 1049571, 975 (uint64_t)-240058, 976 20204184, 977 }; 978 memcpy(input, values, sizeof(values)); 979 980 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 }; 981 memcpy(input + 64, offsets, sizeof(offsets)); 982 983 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) { 984 mi_store(&b, out_mem64(i * 8), 985 mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64))); 986 } 987 988 submit_batch(); 989 990 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) 991 EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]); 992} 993 994TEST_F(mi_builder_test, store_mem64_offset) 995{ 996 uint64_t values[8] = { 997 0x0123456789abcdef, 998 0xdeadbeefac0ffee2, 999 (uint64_t)-1, 1000 1, 1001 0, 1002 1049571, 1003 (uint64_t)-240058, 1004 20204184, 1005 }; 1006 memcpy(input, values, sizeof(values)); 1007 1008 uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 }; 1009 memcpy(input + 64, offsets, sizeof(offsets)); 1010 1011 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) { 1012 mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64), 1013 in_mem64(i * 8)); 1014 } 1015 1016 submit_batch(); 1017 1018 for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) 1019 EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]); 1020} 1021 1022/* 1023 * Control-flow tests. Only available on XE_HP+ 1024 */ 1025 1026TEST_F(mi_builder_test, goto) 1027{ 1028 const uint64_t value = 0xb453b411deadc0deull; 1029 1030 mi_store(&b, out_mem64(0), mi_imm(value)); 1031 1032 struct mi_goto_target t = MI_GOTO_TARGET_INIT; 1033 mi_goto(&b, &t); 1034 1035 /* This one should be skipped */ 1036 mi_store(&b, out_mem64(0), mi_imm(0)); 1037 1038 mi_goto_target(&b, &t); 1039 1040 submit_batch(); 1041 1042 EXPECT_EQ(*(uint64_t *)(output + 0), value); 1043} 1044 1045#define MI_PREDICATE_RESULT 0x2418 1046 1047TEST_F(mi_builder_test, goto_if) 1048{ 1049 const uint64_t values[] = { 1050 0xb453b411deadc0deull, 1051 0x0123456789abcdefull, 1052 0, 1053 }; 1054 1055 mi_store(&b, out_mem64(0), mi_imm(values[0])); 1056 1057 emit_cmd(GENX(MI_PREDICATE), mip) { 1058 mip.LoadOperation = LOAD_LOAD; 1059 mip.CombineOperation = COMBINE_SET; 1060 mip.CompareOperation = COMPARE_FALSE; 1061 } 1062 1063 struct mi_goto_target t = MI_GOTO_TARGET_INIT; 1064 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t); 1065 1066 mi_store(&b, out_mem64(0), mi_imm(values[1])); 1067 1068 emit_cmd(GENX(MI_PREDICATE), mip) { 1069 mip.LoadOperation = LOAD_LOAD; 1070 mip.CombineOperation = COMBINE_SET; 1071 mip.CompareOperation = COMPARE_TRUE; 1072 } 1073 1074 mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t); 1075 1076 /* This one should be skipped */ 1077 mi_store(&b, out_mem64(0), mi_imm(values[2])); 1078 1079 mi_goto_target(&b, &t); 1080 1081 submit_batch(); 1082 1083 EXPECT_EQ(*(uint64_t *)(output + 0), values[1]); 1084} 1085 1086TEST_F(mi_builder_test, loop_simple) 1087{ 1088 const uint64_t loop_count = 8; 1089 1090 mi_store(&b, out_mem64(0), mi_imm(0)); 1091 1092 mi_loop(&b) { 1093 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count))); 1094 1095 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1)); 1096 } 1097 1098 submit_batch(); 1099 1100 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count); 1101} 1102 1103TEST_F(mi_builder_test, loop_break) 1104{ 1105 mi_loop(&b) { 1106 mi_store(&b, out_mem64(0), mi_imm(1)); 1107 1108 mi_break_if(&b, mi_imm(0)); 1109 1110 mi_store(&b, out_mem64(0), mi_imm(2)); 1111 1112 mi_break(&b); 1113 1114 mi_store(&b, out_mem64(0), mi_imm(3)); 1115 } 1116 1117 submit_batch(); 1118 1119 EXPECT_EQ(*(uint64_t *)(output + 0), 2); 1120} 1121 1122TEST_F(mi_builder_test, loop_continue) 1123{ 1124 const uint64_t loop_count = 8; 1125 1126 mi_store(&b, out_mem64(0), mi_imm(0)); 1127 mi_store(&b, out_mem64(8), mi_imm(0)); 1128 1129 mi_loop(&b) { 1130 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count))); 1131 1132 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1)); 1133 mi_store(&b, out_mem64(8), mi_imm(5)); 1134 1135 mi_continue(&b); 1136 1137 mi_store(&b, out_mem64(8), mi_imm(10)); 1138 } 1139 1140 submit_batch(); 1141 1142 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count); 1143 EXPECT_EQ(*(uint64_t *)(output + 8), 5); 1144} 1145 1146TEST_F(mi_builder_test, loop_continue_if) 1147{ 1148 const uint64_t loop_count = 8; 1149 1150 mi_store(&b, out_mem64(0), mi_imm(0)); 1151 mi_store(&b, out_mem64(8), mi_imm(0)); 1152 1153 mi_loop(&b) { 1154 mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count))); 1155 1156 mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1)); 1157 mi_store(&b, out_mem64(8), mi_imm(5)); 1158 1159 emit_cmd(GENX(MI_PREDICATE), mip) { 1160 mip.LoadOperation = LOAD_LOAD; 1161 mip.CombineOperation = COMBINE_SET; 1162 mip.CompareOperation = COMPARE_FALSE; 1163 } 1164 1165 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT)); 1166 1167 mi_store(&b, out_mem64(8), mi_imm(10)); 1168 1169 emit_cmd(GENX(MI_PREDICATE), mip) { 1170 mip.LoadOperation = LOAD_LOAD; 1171 mip.CombineOperation = COMBINE_SET; 1172 mip.CompareOperation = COMPARE_TRUE; 1173 } 1174 1175 mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT)); 1176 1177 mi_store(&b, out_mem64(8), mi_imm(15)); 1178 } 1179 1180 submit_batch(); 1181 1182 EXPECT_EQ(*(uint64_t *)(output + 0), loop_count); 1183 EXPECT_EQ(*(uint64_t *)(output + 8), 10); 1184} 1185#endif /* GFX_VERx10 >= 125 */ 1186