1/*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <fcntl.h>
25#include <string.h>
26#include <xf86drm.h>
27
28#include <gtest/gtest.h>
29
30#include "c99_compat.h"
31#include "dev/intel_device_info.h"
32#include "drm-uapi/i915_drm.h"
33#include "genxml/gen_macros.h"
34#include "util/macros.h"
35
36class mi_builder_test;
37
38struct address {
39   uint32_t gem_handle;
40   uint32_t offset;
41};
42
43#define __gen_address_type struct address
44#define __gen_user_data ::mi_builder_test
45
46uint64_t __gen_combine_address(mi_builder_test *test, void *location,
47                               struct address addr, uint32_t delta);
48void * __gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords);
49struct address __gen_get_batch_address(mi_builder_test *test,
50                                       void *location);
51
52struct address
53__gen_address_offset(address addr, uint64_t offset)
54{
55   addr.offset += offset;
56   return addr;
57}
58
59#if GFX_VERx10 >= 75
60#define RSVD_TEMP_REG 0x2678 /* MI_ALU_REG15 */
61#else
62#define RSVD_TEMP_REG 0x2430 /* GFX7_3DPRIM_START_VERTEX */
63#endif
64#define MI_BUILDER_NUM_ALLOC_GPRS 15
65#define INPUT_DATA_OFFSET 0
66#define OUTPUT_DATA_OFFSET 2048
67
68#define __genxml_cmd_length(cmd) cmd ## _length
69#define __genxml_cmd_length_bias(cmd) cmd ## _length_bias
70#define __genxml_cmd_header(cmd) cmd ## _header
71#define __genxml_cmd_pack(cmd) cmd ## _pack
72
73#include "genxml/genX_pack.h"
74#include "mi_builder.h"
75
76#define emit_cmd(cmd, name)                                           \
77   for (struct cmd name = { __genxml_cmd_header(cmd) },               \
78        *_dst = (struct cmd *) emit_dwords(__genxml_cmd_length(cmd)); \
79        __builtin_expect(_dst != NULL, 1);                            \
80        __genxml_cmd_pack(cmd)(this, (void *)_dst, &name), _dst = NULL)
81
82#include <vector>
83
84class mi_builder_test : public ::testing::Test {
85public:
86   mi_builder_test();
87   ~mi_builder_test();
88
89   void SetUp();
90
91   void *emit_dwords(int num_dwords);
92   void submit_batch();
93
94   inline address in_addr(uint32_t offset)
95   {
96      address addr;
97      addr.gem_handle = data_bo_handle;
98      addr.offset = INPUT_DATA_OFFSET + offset;
99      return addr;
100   }
101
102   inline address out_addr(uint32_t offset)
103   {
104      address addr;
105      addr.gem_handle = data_bo_handle;
106      addr.offset = OUTPUT_DATA_OFFSET + offset;
107      return addr;
108   }
109
110   inline mi_value in_mem64(uint32_t offset)
111   {
112      return mi_mem64(in_addr(offset));
113   }
114
115   inline mi_value in_mem32(uint32_t offset)
116   {
117      return mi_mem32(in_addr(offset));
118   }
119
120   inline mi_value out_mem64(uint32_t offset)
121   {
122      return mi_mem64(out_addr(offset));
123   }
124
125   inline mi_value out_mem32(uint32_t offset)
126   {
127      return mi_mem32(out_addr(offset));
128   }
129
130   int fd;
131   int ctx_id;
132   intel_device_info devinfo;
133
134   uint32_t batch_bo_handle;
135#if GFX_VER >= 8
136   uint64_t batch_bo_addr;
137#endif
138   uint32_t batch_offset;
139   void *batch_map;
140
141#if GFX_VER < 8
142   std::vector<drm_i915_gem_relocation_entry> relocs;
143#endif
144
145   uint32_t data_bo_handle;
146#if GFX_VER >= 8
147   uint64_t data_bo_addr;
148#endif
149   void *data_map;
150   char *input;
151   char *output;
152   uint64_t canary;
153
154   mi_builder b;
155};
156
157mi_builder_test::mi_builder_test() :
158  fd(-1)
159{ }
160
161mi_builder_test::~mi_builder_test()
162{
163   close(fd);
164}
165
166// 1 MB of batch should be enough for anyone, right?
167#define BATCH_BO_SIZE (256 * 4096)
168#define DATA_BO_SIZE 4096
169
170void
171mi_builder_test::SetUp()
172{
173   drmDevicePtr devices[8];
174   int max_devices = drmGetDevices2(0, devices, 8);
175
176   int i;
177   for (i = 0; i < max_devices; i++) {
178      if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
179          devices[i]->bustype == DRM_BUS_PCI &&
180          devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
181         fd = open(devices[i]->nodes[DRM_NODE_RENDER], O_RDWR | O_CLOEXEC);
182         if (fd < 0)
183            continue;
184
185         /* We don't really need to do this when running on hardware because
186          * we can just pull it from the drmDevice.  However, without doing
187          * this, intel_dump_gpu gets a bit of heartburn and we can't use the
188          * --device option with it.
189          */
190         int device_id;
191         drm_i915_getparam getparam = drm_i915_getparam();
192         getparam.param = I915_PARAM_CHIPSET_ID;
193         getparam.value = &device_id;
194         ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
195                            (void *)&getparam), 0) << strerror(errno);
196
197         ASSERT_TRUE(intel_get_device_info_from_pci_id(device_id, &devinfo));
198         if (devinfo.ver != GFX_VER ||
199             (devinfo.platform == INTEL_PLATFORM_HSW) != (GFX_VERx10 == 75)) {
200            close(fd);
201            fd = -1;
202            continue;
203         }
204
205
206         /* Found a device! */
207         break;
208      }
209   }
210   ASSERT_TRUE(i < max_devices) << "Failed to find a DRM device";
211
212   drm_i915_gem_context_create ctx_create = drm_i915_gem_context_create();
213   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE,
214                      (void *)&ctx_create), 0) << strerror(errno);
215   ctx_id = ctx_create.ctx_id;
216
217   if (GFX_VER >= 8) {
218      /* On gfx8+, we require softpin */
219      int has_softpin;
220      drm_i915_getparam getparam = drm_i915_getparam();
221      getparam.param = I915_PARAM_HAS_EXEC_SOFTPIN;
222      getparam.value = &has_softpin;
223      ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GETPARAM,
224                         (void *)&getparam), 0) << strerror(errno);
225      ASSERT_TRUE(has_softpin);
226   }
227
228   // Create the batch buffer
229   drm_i915_gem_create gem_create = drm_i915_gem_create();
230   gem_create.size = BATCH_BO_SIZE;
231   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
232                      (void *)&gem_create), 0) << strerror(errno);
233   batch_bo_handle = gem_create.handle;
234#if GFX_VER >= 8
235   batch_bo_addr = 0xffffffffdff70000ULL;
236#endif
237
238   drm_i915_gem_caching gem_caching = drm_i915_gem_caching();
239   gem_caching.handle = batch_bo_handle;
240   gem_caching.caching = I915_CACHING_CACHED;
241   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
242                      (void *)&gem_caching), 0) << strerror(errno);
243
244   drm_i915_gem_mmap gem_mmap = drm_i915_gem_mmap();
245   gem_mmap.handle = batch_bo_handle;
246   gem_mmap.offset = 0;
247   gem_mmap.size = BATCH_BO_SIZE;
248   gem_mmap.flags = 0;
249   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
250                      (void *)&gem_mmap), 0) << strerror(errno);
251   batch_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
252
253   // Start the batch at zero
254   batch_offset = 0;
255
256   // Create the data buffer
257   gem_create = drm_i915_gem_create();
258   gem_create.size = DATA_BO_SIZE;
259   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE,
260                      (void *)&gem_create), 0) << strerror(errno);
261   data_bo_handle = gem_create.handle;
262#if GFX_VER >= 8
263   data_bo_addr = 0xffffffffefff0000ULL;
264#endif
265
266   gem_caching = drm_i915_gem_caching();
267   gem_caching.handle = data_bo_handle;
268   gem_caching.caching = I915_CACHING_CACHED;
269   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_SET_CACHING,
270                      (void *)&gem_caching), 0) << strerror(errno);
271
272   gem_mmap = drm_i915_gem_mmap();
273   gem_mmap.handle = data_bo_handle;
274   gem_mmap.offset = 0;
275   gem_mmap.size = DATA_BO_SIZE;
276   gem_mmap.flags = 0;
277   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP,
278                      (void *)&gem_mmap), 0) << strerror(errno);
279   data_map = (void *)(uintptr_t)gem_mmap.addr_ptr;
280   input = (char *)data_map + INPUT_DATA_OFFSET;
281   output = (char *)data_map + OUTPUT_DATA_OFFSET;
282
283   // Fill the test data with garbage
284   memset(data_map, 139, DATA_BO_SIZE);
285   memset(&canary, 139, sizeof(canary));
286
287   mi_builder_init(&b, &devinfo, this);
288}
289
290void *
291mi_builder_test::emit_dwords(int num_dwords)
292{
293   void *ptr = (void *)((char *)batch_map + batch_offset);
294   batch_offset += num_dwords * 4;
295   assert(batch_offset < BATCH_BO_SIZE);
296   return ptr;
297}
298
299void
300mi_builder_test::submit_batch()
301{
302   mi_builder_emit(&b, GENX(MI_BATCH_BUFFER_END), bbe);
303
304   // Round batch up to an even number of dwords.
305   if (batch_offset & 4)
306      mi_builder_emit(&b, GENX(MI_NOOP), noop);
307
308   drm_i915_gem_exec_object2 objects[2];
309   memset(objects, 0, sizeof(objects));
310
311   objects[0].handle = data_bo_handle;
312   objects[0].relocation_count = 0;
313   objects[0].relocs_ptr = 0;
314#if GFX_VER >= 8 /* On gfx8+, we pin everything */
315   objects[0].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
316                      EXEC_OBJECT_PINNED |
317                      EXEC_OBJECT_WRITE;
318   objects[0].offset = data_bo_addr;
319#else
320   objects[0].flags = EXEC_OBJECT_WRITE;
321   objects[0].offset = -1;
322#endif
323
324   objects[1].handle = batch_bo_handle;
325#if GFX_VER >= 8 /* On gfx8+, we don't use relocations */
326   objects[1].relocation_count = 0;
327   objects[1].relocs_ptr = 0;
328   objects[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS |
329                      EXEC_OBJECT_PINNED;
330   objects[1].offset = batch_bo_addr;
331#else
332   objects[1].relocation_count = relocs.size();
333   objects[1].relocs_ptr = (uintptr_t)(void *)&relocs[0];
334   objects[1].flags = 0;
335   objects[1].offset = -1;
336#endif
337
338   drm_i915_gem_execbuffer2 execbuf = drm_i915_gem_execbuffer2();
339   execbuf.buffers_ptr = (uintptr_t)(void *)objects;
340   execbuf.buffer_count = 2;
341   execbuf.batch_start_offset = 0;
342   execbuf.batch_len = batch_offset;
343   execbuf.flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER;
344   execbuf.rsvd1 = ctx_id;
345
346   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
347                      (void *)&execbuf), 0) << strerror(errno);
348
349   drm_i915_gem_wait gem_wait = drm_i915_gem_wait();
350   gem_wait.bo_handle = batch_bo_handle;
351   gem_wait.timeout_ns = INT64_MAX;
352   ASSERT_EQ(drmIoctl(fd, DRM_IOCTL_I915_GEM_WAIT,
353                      (void *)&gem_wait), 0) << strerror(errno);
354}
355
356uint64_t
357__gen_combine_address(mi_builder_test *test, void *location,
358                      address addr, uint32_t delta)
359{
360#if GFX_VER >= 8
361   uint64_t addr_u64 = addr.gem_handle == test->data_bo_handle ?
362                       test->data_bo_addr : test->batch_bo_addr;
363   return addr_u64 + addr.offset + delta;
364#else
365   drm_i915_gem_relocation_entry reloc = drm_i915_gem_relocation_entry();
366   reloc.target_handle = addr.gem_handle == test->data_bo_handle ? 0 : 1;
367   reloc.delta = addr.offset + delta;
368   reloc.offset = (char *)location - (char *)test->batch_map;
369   reloc.presumed_offset = -1;
370   test->relocs.push_back(reloc);
371
372   return reloc.delta;
373#endif
374}
375
376void *
377__gen_get_batch_dwords(mi_builder_test *test, unsigned num_dwords)
378{
379   return test->emit_dwords(num_dwords);
380}
381
382struct address
383__gen_get_batch_address(mi_builder_test *test, void *location)
384{
385   assert(location >= test->batch_map);
386   size_t offset = (char *)location - (char *)test->batch_map;
387   assert(offset < BATCH_BO_SIZE);
388   assert(offset <= UINT32_MAX);
389
390   return (struct address) {
391      .gem_handle = test->batch_bo_handle,
392      .offset = (uint32_t)offset,
393   };
394}
395
396#include "genxml/genX_pack.h"
397#include "mi_builder.h"
398
399TEST_F(mi_builder_test, imm_mem)
400{
401   const uint64_t value = 0x0123456789abcdef;
402
403   mi_store(&b, out_mem64(0), mi_imm(value));
404   mi_store(&b, out_mem32(8), mi_imm(value));
405
406   submit_batch();
407
408   // 64 -> 64
409   EXPECT_EQ(*(uint64_t *)(output + 0),  value);
410
411   // 64 -> 32
412   EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
413   EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
414}
415
416/* mem -> mem copies are only supported on HSW+ */
417#if GFX_VERx10 >= 75
418TEST_F(mi_builder_test, mem_mem)
419{
420   const uint64_t value = 0x0123456789abcdef;
421   *(uint64_t *)input = value;
422
423   mi_store(&b, out_mem64(0),   in_mem64(0));
424   mi_store(&b, out_mem32(8),   in_mem64(0));
425   mi_store(&b, out_mem32(16),  in_mem32(0));
426   mi_store(&b, out_mem64(24),  in_mem32(0));
427
428   submit_batch();
429
430   // 64 -> 64
431   EXPECT_EQ(*(uint64_t *)(output + 0),  value);
432
433   // 64 -> 32
434   EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
435   EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
436
437   // 32 -> 32
438   EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
439   EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
440
441   // 32 -> 64
442   EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
443}
444#endif
445
446TEST_F(mi_builder_test, imm_reg)
447{
448   const uint64_t value = 0x0123456789abcdef;
449
450   mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
451   mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(value));
452   mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
453
454   mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
455   mi_store(&b, mi_reg32(RSVD_TEMP_REG), mi_imm(value));
456   mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
457
458   submit_batch();
459
460   // 64 -> 64
461   EXPECT_EQ(*(uint64_t *)(output + 0),  value);
462
463   // 64 -> 32
464   EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
465   EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
466}
467
468TEST_F(mi_builder_test, mem_reg)
469{
470   const uint64_t value = 0x0123456789abcdef;
471   *(uint64_t *)input = value;
472
473   mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
474   mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem64(0));
475   mi_store(&b, out_mem64(0), mi_reg64(RSVD_TEMP_REG));
476
477   mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
478   mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem64(0));
479   mi_store(&b, out_mem64(8), mi_reg64(RSVD_TEMP_REG));
480
481   mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
482   mi_store(&b, mi_reg32(RSVD_TEMP_REG), in_mem32(0));
483   mi_store(&b, out_mem64(16), mi_reg64(RSVD_TEMP_REG));
484
485   mi_store(&b, mi_reg64(RSVD_TEMP_REG), mi_imm(canary));
486   mi_store(&b, mi_reg64(RSVD_TEMP_REG), in_mem32(0));
487   mi_store(&b, out_mem64(24), mi_reg64(RSVD_TEMP_REG));
488
489   submit_batch();
490
491   // 64 -> 64
492   EXPECT_EQ(*(uint64_t *)(output + 0),  value);
493
494   // 64 -> 32
495   EXPECT_EQ(*(uint32_t *)(output + 8),  (uint32_t)value);
496   EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
497
498   // 32 -> 32
499   EXPECT_EQ(*(uint32_t *)(output + 16), (uint32_t)value);
500   EXPECT_EQ(*(uint32_t *)(output + 20), (uint32_t)canary);
501
502   // 32 -> 64
503   EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
504}
505
506TEST_F(mi_builder_test, memset)
507{
508   const unsigned memset_size = 256;
509
510   mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
511
512   submit_batch();
513
514   uint32_t *out_u32 = (uint32_t *)output;
515   for (unsigned i = 0; i <  memset_size / sizeof(*out_u32); i++)
516      EXPECT_EQ(out_u32[i], 0xdeadbeef);
517}
518
519TEST_F(mi_builder_test, memcpy)
520{
521   const unsigned memcpy_size = 256;
522
523   uint8_t *in_u8 = (uint8_t *)input;
524   for (unsigned i = 0; i < memcpy_size; i++)
525      in_u8[i] = i;
526
527   mi_memcpy(&b, out_addr(0), in_addr(0), 256);
528
529   submit_batch();
530
531   uint8_t *out_u8 = (uint8_t *)output;
532   for (unsigned i = 0; i < memcpy_size; i++)
533      EXPECT_EQ(out_u8[i], i);
534}
535
536/* Start of MI_MATH section */
537#if GFX_VERx10 >= 75
538
539#define EXPECT_EQ_IMM(x, imm) EXPECT_EQ(x, mi_value_to_u64(imm))
540
541TEST_F(mi_builder_test, inot)
542{
543   const uint64_t value = 0x0123456789abcdef;
544   const uint32_t value_lo = (uint32_t)value;
545   const uint32_t value_hi = (uint32_t)(value >> 32);
546   memcpy(input, &value, sizeof(value));
547
548   mi_store(&b, out_mem64(0),  mi_inot(&b, in_mem64(0)));
549   mi_store(&b, out_mem64(8),  mi_inot(&b, mi_inot(&b, in_mem64(0))));
550   mi_store(&b, out_mem64(16), mi_inot(&b, in_mem32(0)));
551   mi_store(&b, out_mem64(24), mi_inot(&b, in_mem32(4)));
552   mi_store(&b, out_mem32(32), mi_inot(&b, in_mem64(0)));
553   mi_store(&b, out_mem32(36), mi_inot(&b, in_mem32(0)));
554   mi_store(&b, out_mem32(40), mi_inot(&b, mi_inot(&b, in_mem32(0))));
555   mi_store(&b, out_mem32(44), mi_inot(&b, in_mem32(4)));
556
557   submit_batch();
558
559   EXPECT_EQ(*(uint64_t *)(output + 0),  ~value);
560   EXPECT_EQ(*(uint64_t *)(output + 8),  value);
561   EXPECT_EQ(*(uint64_t *)(output + 16), ~(uint64_t)value_lo);
562   EXPECT_EQ(*(uint64_t *)(output + 24), ~(uint64_t)value_hi);
563   EXPECT_EQ(*(uint32_t *)(output + 32), (uint32_t)~value);
564   EXPECT_EQ(*(uint32_t *)(output + 36), (uint32_t)~value_lo);
565   EXPECT_EQ(*(uint32_t *)(output + 40), (uint32_t)value_lo);
566   EXPECT_EQ(*(uint32_t *)(output + 44), (uint32_t)~value_hi);
567}
568
569/* Test adding of immediates of all kinds including
570 *
571 *  - All zeroes
572 *  - All ones
573 *  - inverted constants
574 */
575TEST_F(mi_builder_test, add_imm)
576{
577   const uint64_t value = 0x0123456789abcdef;
578   const uint64_t add = 0xdeadbeefac0ffee2;
579   memcpy(input, &value, sizeof(value));
580
581   mi_store(&b, out_mem64(0),
582                mi_iadd(&b, in_mem64(0), mi_imm(0)));
583   mi_store(&b, out_mem64(8),
584                mi_iadd(&b, in_mem64(0), mi_imm(-1)));
585   mi_store(&b, out_mem64(16),
586                mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(0))));
587   mi_store(&b, out_mem64(24),
588                mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(-1))));
589   mi_store(&b, out_mem64(32),
590                mi_iadd(&b, in_mem64(0), mi_imm(add)));
591   mi_store(&b, out_mem64(40),
592                mi_iadd(&b, in_mem64(0), mi_inot(&b, mi_imm(add))));
593   mi_store(&b, out_mem64(48),
594                mi_iadd(&b, mi_imm(0), in_mem64(0)));
595   mi_store(&b, out_mem64(56),
596                mi_iadd(&b, mi_imm(-1), in_mem64(0)));
597   mi_store(&b, out_mem64(64),
598                mi_iadd(&b, mi_inot(&b, mi_imm(0)), in_mem64(0)));
599   mi_store(&b, out_mem64(72),
600                mi_iadd(&b, mi_inot(&b, mi_imm(-1)), in_mem64(0)));
601   mi_store(&b, out_mem64(80),
602                mi_iadd(&b, mi_imm(add), in_mem64(0)));
603   mi_store(&b, out_mem64(88),
604                mi_iadd(&b, mi_inot(&b, mi_imm(add)), in_mem64(0)));
605
606   // And some add_imm just for good measure
607   mi_store(&b, out_mem64(96), mi_iadd_imm(&b, in_mem64(0), 0));
608   mi_store(&b, out_mem64(104), mi_iadd_imm(&b, in_mem64(0), add));
609
610   submit_batch();
611
612   EXPECT_EQ(*(uint64_t *)(output + 0),   value);
613   EXPECT_EQ(*(uint64_t *)(output + 8),   value - 1);
614   EXPECT_EQ(*(uint64_t *)(output + 16),  value - 1);
615   EXPECT_EQ(*(uint64_t *)(output + 24),  value);
616   EXPECT_EQ(*(uint64_t *)(output + 32),  value + add);
617   EXPECT_EQ(*(uint64_t *)(output + 40),  value + ~add);
618   EXPECT_EQ(*(uint64_t *)(output + 48),  value);
619   EXPECT_EQ(*(uint64_t *)(output + 56),  value - 1);
620   EXPECT_EQ(*(uint64_t *)(output + 64),  value - 1);
621   EXPECT_EQ(*(uint64_t *)(output + 72),  value);
622   EXPECT_EQ(*(uint64_t *)(output + 80),  value + add);
623   EXPECT_EQ(*(uint64_t *)(output + 88),  value + ~add);
624   EXPECT_EQ(*(uint64_t *)(output + 96),  value);
625   EXPECT_EQ(*(uint64_t *)(output + 104), value + add);
626}
627
628TEST_F(mi_builder_test, ult_uge_ieq_ine)
629{
630   uint64_t values[8] = {
631      0x0123456789abcdef,
632      0xdeadbeefac0ffee2,
633      (uint64_t)-1,
634      1,
635      0,
636      1049571,
637      (uint64_t)-240058,
638      20204184,
639   };
640   memcpy(input, values, sizeof(values));
641
642   for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
643      for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
644         mi_store(&b, out_mem64(i * 256 + j * 32 + 0),
645                      mi_ult(&b, in_mem64(i * 8), in_mem64(j * 8)));
646         mi_store(&b, out_mem64(i * 256 + j * 32 + 8),
647                      mi_uge(&b, in_mem64(i * 8), in_mem64(j * 8)));
648         mi_store(&b, out_mem64(i * 256 + j * 32 + 16),
649                      mi_ieq(&b, in_mem64(i * 8), in_mem64(j * 8)));
650         mi_store(&b, out_mem64(i * 256 + j * 32 + 24),
651                      mi_ine(&b, in_mem64(i * 8), in_mem64(j * 8)));
652      }
653   }
654
655   submit_batch();
656
657   for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
658      for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
659         uint64_t *out_u64 = (uint64_t *)(output + i * 256 + j * 32);
660         EXPECT_EQ_IMM(out_u64[0], mi_ult(&b, mi_imm(values[i]),
661                                              mi_imm(values[j])));
662         EXPECT_EQ_IMM(out_u64[1], mi_uge(&b, mi_imm(values[i]),
663                                              mi_imm(values[j])));
664         EXPECT_EQ_IMM(out_u64[2], mi_ieq(&b, mi_imm(values[i]),
665                                              mi_imm(values[j])));
666         EXPECT_EQ_IMM(out_u64[3], mi_ine(&b, mi_imm(values[i]),
667                                              mi_imm(values[j])));
668      }
669   }
670}
671
672TEST_F(mi_builder_test, z_nz)
673{
674   uint64_t values[8] = {
675      0,
676      1,
677      UINT32_MAX,
678      UINT32_MAX + 1,
679      UINT64_MAX,
680   };
681   memcpy(input, values, sizeof(values));
682
683   for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
684      mi_store(&b, out_mem64(i * 16 + 0), mi_nz(&b, in_mem64(i * 8)));
685      mi_store(&b, out_mem64(i * 16 + 8), mi_z(&b, in_mem64(i * 8)));
686   }
687
688   submit_batch();
689
690   for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
691      uint64_t *out_u64 = (uint64_t *)(output + i * 16);
692      EXPECT_EQ_IMM(out_u64[0], mi_nz(&b, mi_imm(values[i])));
693      EXPECT_EQ_IMM(out_u64[1], mi_z(&b, mi_imm(values[i])));
694   }
695}
696
697TEST_F(mi_builder_test, iand)
698{
699   const uint64_t values[2] = {
700      0x0123456789abcdef,
701      0xdeadbeefac0ffee2,
702   };
703   memcpy(input, values, sizeof(values));
704
705   mi_store(&b, out_mem64(0), mi_iand(&b, in_mem64(0), in_mem64(8)));
706
707   submit_batch();
708
709   EXPECT_EQ_IMM(*(uint64_t *)output, mi_iand(&b, mi_imm(values[0]),
710                                                  mi_imm(values[1])));
711}
712
713#if GFX_VERx10 >= 125
714TEST_F(mi_builder_test, ishl)
715{
716   const uint64_t value = 0x0123456789abcdef;
717   memcpy(input, &value, sizeof(value));
718
719   uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
720   memcpy(input + 8, shifts, sizeof(shifts));
721
722   for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
723      mi_store(&b, out_mem64(i * 8),
724                   mi_ishl(&b, in_mem64(0), in_mem32(8 + i * 4)));
725   }
726
727   submit_batch();
728
729   for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
730      EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
731                    mi_ishl(&b, mi_imm(value), mi_imm(shifts[i])));
732   }
733}
734
735TEST_F(mi_builder_test, ushr)
736{
737   const uint64_t value = 0x0123456789abcdef;
738   memcpy(input, &value, sizeof(value));
739
740   uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
741   memcpy(input + 8, shifts, sizeof(shifts));
742
743   for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
744      mi_store(&b, out_mem64(i * 8),
745                   mi_ushr(&b, in_mem64(0), in_mem32(8 + i * 4)));
746   }
747
748   submit_batch();
749
750   for (unsigned i = 0; i < ARRAY_SIZE(shifts); i++) {
751      EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
752                    mi_ushr(&b, mi_imm(value), mi_imm(shifts[i])));
753   }
754}
755
756TEST_F(mi_builder_test, ushr_imm)
757{
758   const uint64_t value = 0x0123456789abcdef;
759   memcpy(input, &value, sizeof(value));
760
761   const unsigned max_shift = 64;
762
763   for (unsigned i = 0; i <= max_shift; i++)
764      mi_store(&b, out_mem64(i * 8), mi_ushr_imm(&b, in_mem64(0), i));
765
766   submit_batch();
767
768   for (unsigned i = 0; i <= max_shift; i++) {
769      EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
770                    mi_ushr_imm(&b, mi_imm(value), i));
771   }
772}
773
774TEST_F(mi_builder_test, ishr)
775{
776   const uint64_t values[] = {
777      0x0123456789abcdef,
778      0xfedcba9876543210,
779   };
780   memcpy(input, values, sizeof(values));
781
782   uint32_t shifts[] = { 0, 1, 2, 4, 8, 16, 32 };
783   memcpy(input + 16, shifts, sizeof(shifts));
784
785   for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
786      for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
787         mi_store(&b, out_mem64(i * 8 + j * 16),
788                      mi_ishr(&b, in_mem64(i * 8), in_mem32(16 + j * 4)));
789      }
790   }
791
792   submit_batch();
793
794   for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
795      for (unsigned j = 0; j < ARRAY_SIZE(shifts); j++) {
796         EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8 + j * 16),
797                       mi_ishr(&b, mi_imm(values[i]), mi_imm(shifts[j])));
798      }
799   }
800}
801
802TEST_F(mi_builder_test, ishr_imm)
803{
804   const uint64_t value = 0x0123456789abcdef;
805   memcpy(input, &value, sizeof(value));
806
807   const unsigned max_shift = 64;
808
809   for (unsigned i = 0; i <= max_shift; i++)
810      mi_store(&b, out_mem64(i * 8), mi_ishr_imm(&b, in_mem64(0), i));
811
812   submit_batch();
813
814   for (unsigned i = 0; i <= max_shift; i++) {
815      EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
816                    mi_ishr_imm(&b, mi_imm(value), i));
817   }
818}
819#endif /* if GFX_VERx10 >= 125 */
820
821TEST_F(mi_builder_test, imul_imm)
822{
823   uint64_t lhs[2] = {
824      0x0123456789abcdef,
825      0xdeadbeefac0ffee2,
826   };
827   memcpy(input, lhs, sizeof(lhs));
828
829    /* Some random 32-bit unsigned integers.  The first four have been
830     * hand-chosen just to ensure some good low integers; the rest were
831     * generated with a python script.
832     */
833   uint32_t rhs[20] = {
834      1,       2,       3,       5,
835      10800,   193,     64,      40,
836      3796,    256,     88,      473,
837      1421,    706,     175,     850,
838      39,      38985,   1941,    17,
839   };
840
841   for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
842      for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
843         mi_store(&b, out_mem64(i * 160 + j * 8),
844                      mi_imul_imm(&b, in_mem64(i * 8), rhs[j]));
845      }
846   }
847
848   submit_batch();
849
850   for (unsigned i = 0; i < ARRAY_SIZE(lhs); i++) {
851      for (unsigned j = 0; j < ARRAY_SIZE(rhs); j++) {
852         EXPECT_EQ_IMM(*(uint64_t *)(output + i * 160 + j * 8),
853                       mi_imul_imm(&b, mi_imm(lhs[i]), rhs[j]));
854      }
855   }
856}
857
858TEST_F(mi_builder_test, ishl_imm)
859{
860   const uint64_t value = 0x0123456789abcdef;
861   memcpy(input, &value, sizeof(value));
862
863   const unsigned max_shift = 64;
864
865   for (unsigned i = 0; i <= max_shift; i++)
866      mi_store(&b, out_mem64(i * 8), mi_ishl_imm(&b, in_mem64(0), i));
867
868   submit_batch();
869
870   for (unsigned i = 0; i <= max_shift; i++) {
871      EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
872                    mi_ishl_imm(&b, mi_imm(value), i));
873   }
874}
875
876TEST_F(mi_builder_test, ushr32_imm)
877{
878   const uint64_t value = 0x0123456789abcdef;
879   memcpy(input, &value, sizeof(value));
880
881   const unsigned max_shift = 64;
882
883   for (unsigned i = 0; i <= max_shift; i++)
884      mi_store(&b, out_mem64(i * 8), mi_ushr32_imm(&b, in_mem64(0), i));
885
886   submit_batch();
887
888   for (unsigned i = 0; i <= max_shift; i++) {
889      EXPECT_EQ_IMM(*(uint64_t *)(output + i * 8),
890                    mi_ushr32_imm(&b, mi_imm(value), i));
891   }
892}
893
894TEST_F(mi_builder_test, udiv32_imm)
895{
896    /* Some random 32-bit unsigned integers.  The first four have been
897     * hand-chosen just to ensure some good low integers; the rest were
898     * generated with a python script.
899     */
900   uint32_t values[20] = {
901      1,       2,       3,       5,
902      10800,   193,     64,      40,
903      3796,    256,     88,      473,
904      1421,    706,     175,     850,
905      39,      38985,   1941,    17,
906   };
907   memcpy(input, values, sizeof(values));
908
909   for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
910      for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
911         mi_store(&b, out_mem32(i * 80 + j * 4),
912                      mi_udiv32_imm(&b, in_mem32(i * 4), values[j]));
913      }
914   }
915
916   submit_batch();
917
918   for (unsigned i = 0; i < ARRAY_SIZE(values); i++) {
919      for (unsigned j = 0; j < ARRAY_SIZE(values); j++) {
920         EXPECT_EQ_IMM(*(uint32_t *)(output + i * 80 + j * 4),
921                       mi_udiv32_imm(&b, mi_imm(values[i]), values[j]));
922      }
923   }
924}
925
926TEST_F(mi_builder_test, store_if)
927{
928   uint64_t u64 = 0xb453b411deadc0deull;
929   uint32_t u32 = 0x1337d00d;
930
931   /* Write values with the predicate enabled */
932   emit_cmd(GENX(MI_PREDICATE), mip) {
933      mip.LoadOperation    = LOAD_LOAD;
934      mip.CombineOperation = COMBINE_SET;
935      mip.CompareOperation = COMPARE_TRUE;
936   }
937
938   mi_store_if(&b, out_mem64(0), mi_imm(u64));
939   mi_store_if(&b, out_mem32(8), mi_imm(u32));
940
941   /* Set predicate to false, write garbage that shouldn't land */
942   emit_cmd(GENX(MI_PREDICATE), mip) {
943      mip.LoadOperation    = LOAD_LOAD;
944      mip.CombineOperation = COMBINE_SET;
945      mip.CompareOperation = COMPARE_FALSE;
946   }
947
948   mi_store_if(&b, out_mem64(0), mi_imm(0xd0d0d0d0d0d0d0d0ull));
949   mi_store_if(&b, out_mem32(8), mi_imm(0xc000c000));
950
951   submit_batch();
952
953   EXPECT_EQ(*(uint64_t *)(output + 0), u64);
954   EXPECT_EQ(*(uint32_t *)(output + 8), u32);
955   EXPECT_EQ(*(uint32_t *)(output + 12), (uint32_t)canary);
956}
957
958#endif /* GFX_VERx10 >= 75 */
959
960#if GFX_VERx10 >= 125
961
962/*
963 * Indirect load/store tests.  Only available on XE_HP+
964 */
965
966TEST_F(mi_builder_test, load_mem64_offset)
967{
968   uint64_t values[8] = {
969      0x0123456789abcdef,
970      0xdeadbeefac0ffee2,
971      (uint64_t)-1,
972      1,
973      0,
974      1049571,
975      (uint64_t)-240058,
976      20204184,
977   };
978   memcpy(input, values, sizeof(values));
979
980   uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
981   memcpy(input + 64, offsets, sizeof(offsets));
982
983   for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
984      mi_store(&b, out_mem64(i * 8),
985               mi_load_mem64_offset(&b, in_addr(0), in_mem32(i * 4 + 64)));
986   }
987
988   submit_batch();
989
990   for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
991      EXPECT_EQ(*(uint64_t *)(output + i * 8), values[offsets[i] / 8]);
992}
993
994TEST_F(mi_builder_test, store_mem64_offset)
995{
996   uint64_t values[8] = {
997      0x0123456789abcdef,
998      0xdeadbeefac0ffee2,
999      (uint64_t)-1,
1000      1,
1001      0,
1002      1049571,
1003      (uint64_t)-240058,
1004      20204184,
1005   };
1006   memcpy(input, values, sizeof(values));
1007
1008   uint32_t offsets[8] = { 0, 40, 24, 48, 56, 8, 32, 16 };
1009   memcpy(input + 64, offsets, sizeof(offsets));
1010
1011   for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++) {
1012      mi_store_mem64_offset(&b, out_addr(0), in_mem32(i * 4 + 64),
1013                                in_mem64(i * 8));
1014   }
1015
1016   submit_batch();
1017
1018   for (unsigned i = 0; i < ARRAY_SIZE(offsets); i++)
1019      EXPECT_EQ(*(uint64_t *)(output + offsets[i]), values[i]);
1020}
1021
1022/*
1023 * Control-flow tests.  Only available on XE_HP+
1024 */
1025
1026TEST_F(mi_builder_test, goto)
1027{
1028   const uint64_t value = 0xb453b411deadc0deull;
1029
1030   mi_store(&b, out_mem64(0), mi_imm(value));
1031
1032   struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1033   mi_goto(&b, &t);
1034
1035   /* This one should be skipped */
1036   mi_store(&b, out_mem64(0), mi_imm(0));
1037
1038   mi_goto_target(&b, &t);
1039
1040   submit_batch();
1041
1042   EXPECT_EQ(*(uint64_t *)(output + 0), value);
1043}
1044
1045#define MI_PREDICATE_RESULT  0x2418
1046
1047TEST_F(mi_builder_test, goto_if)
1048{
1049   const uint64_t values[] = {
1050      0xb453b411deadc0deull,
1051      0x0123456789abcdefull,
1052      0,
1053   };
1054
1055   mi_store(&b, out_mem64(0), mi_imm(values[0]));
1056
1057   emit_cmd(GENX(MI_PREDICATE), mip) {
1058      mip.LoadOperation    = LOAD_LOAD;
1059      mip.CombineOperation = COMBINE_SET;
1060      mip.CompareOperation = COMPARE_FALSE;
1061   }
1062
1063   struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1064   mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1065
1066   mi_store(&b, out_mem64(0), mi_imm(values[1]));
1067
1068   emit_cmd(GENX(MI_PREDICATE), mip) {
1069      mip.LoadOperation    = LOAD_LOAD;
1070      mip.CombineOperation = COMBINE_SET;
1071      mip.CompareOperation = COMPARE_TRUE;
1072   }
1073
1074   mi_goto_if(&b, mi_reg32(MI_PREDICATE_RESULT), &t);
1075
1076   /* This one should be skipped */
1077   mi_store(&b, out_mem64(0), mi_imm(values[2]));
1078
1079   mi_goto_target(&b, &t);
1080
1081   submit_batch();
1082
1083   EXPECT_EQ(*(uint64_t *)(output + 0), values[1]);
1084}
1085
1086TEST_F(mi_builder_test, loop_simple)
1087{
1088   const uint64_t loop_count = 8;
1089
1090   mi_store(&b, out_mem64(0), mi_imm(0));
1091
1092   mi_loop(&b) {
1093      mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1094
1095      mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1096   }
1097
1098   submit_batch();
1099
1100   EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1101}
1102
1103TEST_F(mi_builder_test, loop_break)
1104{
1105   mi_loop(&b) {
1106      mi_store(&b, out_mem64(0), mi_imm(1));
1107
1108      mi_break_if(&b, mi_imm(0));
1109
1110      mi_store(&b, out_mem64(0), mi_imm(2));
1111
1112      mi_break(&b);
1113
1114      mi_store(&b, out_mem64(0), mi_imm(3));
1115   }
1116
1117   submit_batch();
1118
1119   EXPECT_EQ(*(uint64_t *)(output + 0), 2);
1120}
1121
1122TEST_F(mi_builder_test, loop_continue)
1123{
1124   const uint64_t loop_count = 8;
1125
1126   mi_store(&b, out_mem64(0), mi_imm(0));
1127   mi_store(&b, out_mem64(8), mi_imm(0));
1128
1129   mi_loop(&b) {
1130      mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1131
1132      mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1133      mi_store(&b, out_mem64(8), mi_imm(5));
1134
1135      mi_continue(&b);
1136
1137      mi_store(&b, out_mem64(8), mi_imm(10));
1138   }
1139
1140   submit_batch();
1141
1142   EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1143   EXPECT_EQ(*(uint64_t *)(output + 8), 5);
1144}
1145
1146TEST_F(mi_builder_test, loop_continue_if)
1147{
1148   const uint64_t loop_count = 8;
1149
1150   mi_store(&b, out_mem64(0), mi_imm(0));
1151   mi_store(&b, out_mem64(8), mi_imm(0));
1152
1153   mi_loop(&b) {
1154      mi_break_if(&b, mi_uge(&b, out_mem64(0), mi_imm(loop_count)));
1155
1156      mi_store(&b, out_mem64(0), mi_iadd_imm(&b, out_mem64(0), 1));
1157      mi_store(&b, out_mem64(8), mi_imm(5));
1158
1159      emit_cmd(GENX(MI_PREDICATE), mip) {
1160         mip.LoadOperation    = LOAD_LOAD;
1161         mip.CombineOperation = COMBINE_SET;
1162         mip.CompareOperation = COMPARE_FALSE;
1163      }
1164
1165      mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1166
1167      mi_store(&b, out_mem64(8), mi_imm(10));
1168
1169      emit_cmd(GENX(MI_PREDICATE), mip) {
1170         mip.LoadOperation    = LOAD_LOAD;
1171         mip.CombineOperation = COMBINE_SET;
1172         mip.CompareOperation = COMPARE_TRUE;
1173      }
1174
1175      mi_continue_if(&b, mi_reg32(MI_PREDICATE_RESULT));
1176
1177      mi_store(&b, out_mem64(8), mi_imm(15));
1178   }
1179
1180   submit_batch();
1181
1182   EXPECT_EQ(*(uint64_t *)(output + 0), loop_count);
1183   EXPECT_EQ(*(uint64_t *)(output + 8), 10);
1184}
1185#endif /* GFX_VERx10 >= 125 */
1186