1/*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef MI_BUILDER_H
25#define MI_BUILDER_H
26
27#include "dev/intel_device_info.h"
28#include "genxml/genX_bits.h"
29#include "util/bitscan.h"
30#include "util/fast_idiv_by_const.h"
31#include "util/u_math.h"
32
33#ifndef MI_BUILDER_NUM_ALLOC_GPRS
34/** The number of GPRs the MI builder is allowed to allocate
35 *
36 * This may be set by a user of this API so that it can reserve some GPRs at
37 * the top end for its own use.
38 */
39#define MI_BUILDER_NUM_ALLOC_GPRS 16
40#endif
41
42/** These must be defined by the user of the builder
43 *
44 * void *__gen_get_batch_dwords(__gen_user_data *user_data,
45 *                              unsigned num_dwords);
46 *
47 * __gen_address_type
48 * __gen_address_offset(__gen_address_type addr, uint64_t offset);
49 *
50 *
51 * If self-modifying batches are supported, we must be able to pass batch
52 * addresses around as void*s so pinning as well as batch chaining or some
53 * other mechanism for ensuring batch pointers remain valid during building is
54 * required. The following function must also be defined, it returns an
55 * address in canonical form:
56 *
57 * __gen_address_type
58 * __gen_get_batch_address(__gen_user_data *user_data, void *location);
59 *
60 * Also, __gen_combine_address must accept a location value of NULL and return
61 * a fully valid 64-bit address.
62 */
63
64/*
65 * Start of the actual MI builder
66 */
67
68#define __genxml_cmd_length(cmd) cmd ## _length
69#define __genxml_cmd_header(cmd) cmd ## _header
70#define __genxml_cmd_pack(cmd) cmd ## _pack
71
72#define mi_builder_pack(b, cmd, dst, name)                          \
73   for (struct cmd name = { __genxml_cmd_header(cmd) },                 \
74        *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \
75        __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name),    \
76        _dst = NULL)
77
78#define mi_builder_emit(b, cmd, name)                               \
79   mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)
80
81
82enum mi_value_type {
83   MI_VALUE_TYPE_IMM,
84   MI_VALUE_TYPE_MEM32,
85   MI_VALUE_TYPE_MEM64,
86   MI_VALUE_TYPE_REG32,
87   MI_VALUE_TYPE_REG64,
88};
89
90struct mi_value {
91   enum mi_value_type type;
92
93   union {
94      uint64_t imm;
95      __gen_address_type addr;
96      uint32_t reg;
97   };
98
99#if GFX_VERx10 >= 75
100   bool invert;
101#endif
102};
103
104struct mi_reg_num {
105   uint32_t num;
106#if GFX_VER >= 11
107   bool cs;
108#endif
109};
110
111static inline struct mi_reg_num
112mi_adjust_reg_num(uint32_t reg)
113{
114#if GFX_VER >= 11
115   bool cs = reg >= 0x2000 && reg < 0x4000;
116   return (struct mi_reg_num) {
117      .num = reg - (cs ? 0x2000 : 0),
118      .cs = cs,
119   };
120#else
121   return (struct mi_reg_num) { .num = reg, };
122#endif
123}
124
125#if GFX_VER >= 9
126#define MI_BUILDER_MAX_MATH_DWORDS 256
127#else
128#define MI_BUILDER_MAX_MATH_DWORDS 64
129#endif
130
131struct mi_builder {
132   const struct intel_device_info *devinfo;
133   __gen_user_data *user_data;
134
135#if GFX_VERx10 >= 75
136   uint32_t gprs;
137   uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];
138
139   unsigned num_math_dwords;
140   uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];
141#endif
142};
143
144static inline void
145mi_builder_init(struct mi_builder *b,
146                const struct intel_device_info *devinfo,
147                __gen_user_data *user_data)
148{
149   memset(b, 0, sizeof(*b));
150   b->devinfo = devinfo;
151   b->user_data = user_data;
152
153#if GFX_VERx10 >= 75
154   b->gprs = 0;
155   b->num_math_dwords = 0;
156#endif
157}
158
159static inline void
160mi_builder_flush_math(struct mi_builder *b)
161{
162#if GFX_VERx10 >= 75
163   if (b->num_math_dwords == 0)
164      return;
165
166   uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
167                                                     1 + b->num_math_dwords);
168   mi_builder_pack(b, GENX(MI_MATH), dw, math) {
169      math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);
170   }
171   memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));
172   b->num_math_dwords = 0;
173#endif
174}
175
176#define _MI_BUILDER_GPR_BASE 0x2600
177/* The actual hardware limit on GPRs */
178#define _MI_BUILDER_NUM_HW_GPRS 16
179
180#if GFX_VERx10 >= 75
181
182static inline bool
183mi_value_is_reg(struct mi_value val)
184{
185   return val.type == MI_VALUE_TYPE_REG32 ||
186          val.type == MI_VALUE_TYPE_REG64;
187}
188
189static inline bool
190mi_value_is_gpr(struct mi_value val)
191{
192   return mi_value_is_reg(val) &&
193          val.reg >= _MI_BUILDER_GPR_BASE &&
194          val.reg < _MI_BUILDER_GPR_BASE +
195                    _MI_BUILDER_NUM_HW_GPRS * 8;
196}
197
198static inline bool
199_mi_value_is_allocated_gpr(struct mi_value val)
200{
201   return mi_value_is_reg(val) &&
202          val.reg >= _MI_BUILDER_GPR_BASE &&
203          val.reg < _MI_BUILDER_GPR_BASE +
204                    MI_BUILDER_NUM_ALLOC_GPRS * 8;
205}
206
207static inline uint32_t
208_mi_value_as_gpr(struct mi_value val)
209{
210   assert(mi_value_is_gpr(val));
211   assert(val.reg % 8 == 0);
212   return (val.reg - _MI_BUILDER_GPR_BASE) / 8;
213}
214
215static inline struct mi_value
216mi_new_gpr(struct mi_builder *b)
217{
218   unsigned gpr = ffs(~b->gprs) - 1;
219   assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
220   assert(b->gpr_refs[gpr] == 0);
221   b->gprs |= (1u << gpr);
222   b->gpr_refs[gpr] = 1;
223
224   return (struct mi_value) {
225      .type = MI_VALUE_TYPE_REG64,
226      .reg = _MI_BUILDER_GPR_BASE + gpr * 8,
227   };
228}
229#endif /* GFX_VERx10 >= 75 */
230
231/** Take a reference to a mi_value
232 *
233 * The MI builder uses reference counting to automatically free ALU GPRs for
234 * re-use in calculations.  All mi_* math functions consume the reference
235 * they are handed for each source and return a reference to a value which the
236 * caller must consume.  In particular, if you pas the same value into a
237 * single mi_* math function twice (say to add a number to itself), you
238 * are responsible for calling mi_value_ref() to get a second reference
239 * because the mi_* math function will consume it twice.
240 */
241static inline struct mi_value
242mi_value_ref(struct mi_builder *b, struct mi_value val)
243{
244#if GFX_VERx10 >= 75
245   if (_mi_value_is_allocated_gpr(val)) {
246      unsigned gpr = _mi_value_as_gpr(val);
247      assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
248      assert(b->gprs & (1u << gpr));
249      assert(b->gpr_refs[gpr] < UINT8_MAX);
250      b->gpr_refs[gpr]++;
251   }
252#endif /* GFX_VERx10 >= 75 */
253
254   return val;
255}
256
257/** Drop a reference to a mi_value
258 *
259 * See also mi_value_ref.
260 */
261static inline void
262mi_value_unref(struct mi_builder *b, struct mi_value val)
263{
264#if GFX_VERx10 >= 75
265   if (_mi_value_is_allocated_gpr(val)) {
266      unsigned gpr = _mi_value_as_gpr(val);
267      assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);
268      assert(b->gprs & (1u << gpr));
269      assert(b->gpr_refs[gpr] > 0);
270      if (--b->gpr_refs[gpr] == 0)
271         b->gprs &= ~(1u << gpr);
272   }
273#endif /* GFX_VERx10 >= 75 */
274}
275
276static inline struct mi_value
277mi_imm(uint64_t imm)
278{
279   return (struct mi_value) {
280      .type = MI_VALUE_TYPE_IMM,
281      .imm = imm,
282   };
283}
284
285static inline struct mi_value
286mi_reg32(uint32_t reg)
287{
288   struct mi_value val = {
289      .type = MI_VALUE_TYPE_REG32,
290      .reg = reg,
291   };
292#if GFX_VERx10 >= 75
293   assert(!_mi_value_is_allocated_gpr(val));
294#endif
295   return val;
296}
297
298static inline struct mi_value
299mi_reg64(uint32_t reg)
300{
301   struct mi_value val = {
302      .type = MI_VALUE_TYPE_REG64,
303      .reg = reg,
304   };
305#if GFX_VERx10 >= 75
306   assert(!_mi_value_is_allocated_gpr(val));
307#endif
308   return val;
309}
310
311static inline struct mi_value
312mi_mem32(__gen_address_type addr)
313{
314   return (struct mi_value) {
315      .type = MI_VALUE_TYPE_MEM32,
316      .addr = addr,
317   };
318}
319
320static inline struct mi_value
321mi_mem64(__gen_address_type addr)
322{
323   return (struct mi_value) {
324      .type = MI_VALUE_TYPE_MEM64,
325      .addr = addr,
326   };
327}
328
329static inline struct mi_value
330mi_value_half(struct mi_value value, bool top_32_bits)
331{
332   switch (value.type) {
333   case MI_VALUE_TYPE_IMM:
334      if (top_32_bits)
335         value.imm >>= 32;
336      else
337         value.imm &= 0xffffffffu;
338      return value;
339
340   case MI_VALUE_TYPE_MEM32:
341      assert(!top_32_bits);
342      return value;
343
344   case MI_VALUE_TYPE_MEM64:
345      if (top_32_bits)
346         value.addr = __gen_address_offset(value.addr, 4);
347      value.type = MI_VALUE_TYPE_MEM32;
348      return value;
349
350   case MI_VALUE_TYPE_REG32:
351      assert(!top_32_bits);
352      return value;
353
354   case MI_VALUE_TYPE_REG64:
355      if (top_32_bits)
356         value.reg += 4;
357      value.type = MI_VALUE_TYPE_REG32;
358      return value;
359   }
360
361   unreachable("Invalid mi_value type");
362}
363
364static inline void
365_mi_copy_no_unref(struct mi_builder *b,
366                  struct mi_value dst, struct mi_value src)
367{
368#if GFX_VERx10 >= 75
369   /* TODO: We could handle src.invert by emitting a bit of math if we really
370    * wanted to.
371    */
372   assert(!dst.invert && !src.invert);
373#endif
374   mi_builder_flush_math(b);
375
376   switch (dst.type) {
377   case MI_VALUE_TYPE_IMM:
378      unreachable("Cannot copy to an immediate");
379
380   case MI_VALUE_TYPE_MEM64:
381   case MI_VALUE_TYPE_REG64:
382      switch (src.type) {
383      case MI_VALUE_TYPE_IMM:
384         if (dst.type == MI_VALUE_TYPE_REG64) {
385            uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
386                                                              GENX(MI_LOAD_REGISTER_IMM_length) + 2);
387            struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
388            mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {
389               lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -
390                                 GENX(MI_LOAD_REGISTER_IMM_length_bias);
391#if GFX_VER >= 11
392               lri.AddCSMMIOStartOffset = reg.cs;
393#endif
394            }
395            dw[1] = reg.num;
396            dw[2] = src.imm;
397            dw[3] = reg.num + 4;
398            dw[4] = src.imm >> 32;
399         } else {
400#if GFX_VER >= 8
401            assert(dst.type == MI_VALUE_TYPE_MEM64);
402            uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,
403                                                              GENX(MI_STORE_DATA_IMM_length) + 1);
404            mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {
405               sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -
406                                 GENX(MI_STORE_DATA_IMM_length_bias);
407               sdm.StoreQword = true;
408               sdm.Address = dst.addr;
409            }
410            dw[3] = src.imm;
411            dw[4] = src.imm >> 32;
412#else
413         _mi_copy_no_unref(b, mi_value_half(dst, false),
414                              mi_value_half(src, false));
415         _mi_copy_no_unref(b, mi_value_half(dst, true),
416                              mi_value_half(src, true));
417#endif
418         }
419         break;
420      case MI_VALUE_TYPE_REG32:
421      case MI_VALUE_TYPE_MEM32:
422         _mi_copy_no_unref(b, mi_value_half(dst, false),
423                              mi_value_half(src, false));
424         _mi_copy_no_unref(b, mi_value_half(dst, true),
425                              mi_imm(0));
426         break;
427      case MI_VALUE_TYPE_REG64:
428      case MI_VALUE_TYPE_MEM64:
429         _mi_copy_no_unref(b, mi_value_half(dst, false),
430                              mi_value_half(src, false));
431         _mi_copy_no_unref(b, mi_value_half(dst, true),
432                              mi_value_half(src, true));
433         break;
434      default:
435         unreachable("Invalid mi_value type");
436      }
437      break;
438
439   case MI_VALUE_TYPE_MEM32:
440      switch (src.type) {
441      case MI_VALUE_TYPE_IMM:
442         mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {
443            sdi.Address = dst.addr;
444#if GFX_VER >= 12
445            sdi.ForceWriteCompletionCheck = true;
446#endif
447            sdi.ImmediateData = src.imm;
448         }
449         break;
450
451      case MI_VALUE_TYPE_MEM32:
452      case MI_VALUE_TYPE_MEM64:
453#if GFX_VER >= 8
454         mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {
455            cmm.DestinationMemoryAddress = dst.addr;
456            cmm.SourceMemoryAddress = src.addr;
457         }
458#elif GFX_VERx10 == 75
459         {
460            struct mi_value tmp = mi_new_gpr(b);
461            _mi_copy_no_unref(b, tmp, src);
462            _mi_copy_no_unref(b, dst, tmp);
463            mi_value_unref(b, tmp);
464         }
465#else
466         unreachable("Cannot do mem <-> mem copy on IVB and earlier");
467#endif
468         break;
469
470      case MI_VALUE_TYPE_REG32:
471      case MI_VALUE_TYPE_REG64:
472         mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
473            struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
474            srm.RegisterAddress = reg.num;
475#if GFX_VER >= 11
476            srm.AddCSMMIOStartOffset = reg.cs;
477#endif
478            srm.MemoryAddress = dst.addr;
479         }
480         break;
481
482      default:
483         unreachable("Invalid mi_value type");
484      }
485      break;
486
487   case MI_VALUE_TYPE_REG32:
488      switch (src.type) {
489      case MI_VALUE_TYPE_IMM:
490         mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {
491            struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
492            lri.RegisterOffset = reg.num;
493#if GFX_VER >= 11
494            lri.AddCSMMIOStartOffset = reg.cs;
495#endif
496            lri.DataDWord = src.imm;
497         }
498         break;
499
500      case MI_VALUE_TYPE_MEM32:
501      case MI_VALUE_TYPE_MEM64:
502#if GFX_VER >= 7
503         mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {
504            struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);
505            lrm.RegisterAddress = reg.num;
506#if GFX_VER >= 11
507            lrm.AddCSMMIOStartOffset = reg.cs;
508#endif
509            lrm.MemoryAddress = src.addr;
510         }
511#else
512         unreachable("Cannot load do mem -> reg copy on SNB and earlier");
513#endif
514         break;
515
516      case MI_VALUE_TYPE_REG32:
517      case MI_VALUE_TYPE_REG64:
518#if GFX_VERx10 >= 75
519         if (src.reg != dst.reg) {
520            mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {
521               struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
522               lrr.SourceRegisterAddress = reg.num;
523#if GFX_VER >= 11
524               lrr.AddCSMMIOStartOffsetSource = reg.cs;
525#endif
526               reg = mi_adjust_reg_num(dst.reg);
527               lrr.DestinationRegisterAddress = reg.num;
528#if GFX_VER >= 11
529               lrr.AddCSMMIOStartOffsetDestination = reg.cs;
530#endif
531            }
532         }
533#else
534         unreachable("Cannot do reg <-> reg copy on IVB and earlier");
535#endif
536         break;
537
538      default:
539         unreachable("Invalid mi_value type");
540      }
541      break;
542
543   default:
544      unreachable("Invalid mi_value type");
545   }
546}
547
548#if GFX_VERx10 >= 75
549static inline struct mi_value
550mi_resolve_invert(struct mi_builder *b, struct mi_value src);
551#endif
552
553/** Store the value in src to the value represented by dst
554 *
555 * If the bit size of src and dst mismatch, this function does an unsigned
556 * integer cast.  If src has more bits than dst, it takes the bottom bits.  If
557 * src has fewer bits then dst, it fills the top bits with zeros.
558 *
559 * This function consumes one reference for each of src and dst.
560 */
561static inline void
562mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)
563{
564#if GFX_VERx10 >= 75
565   src = mi_resolve_invert(b, src);
566#endif
567   _mi_copy_no_unref(b, dst, src);
568   mi_value_unref(b, src);
569   mi_value_unref(b, dst);
570}
571
572static inline void
573mi_memset(struct mi_builder *b, __gen_address_type dst,
574          uint32_t value, uint32_t size)
575{
576#if GFX_VERx10 >= 75
577   assert(b->num_math_dwords == 0);
578#endif
579
580   /* This memset operates in units of dwords. */
581   assert(size % 4 == 0);
582
583   for (uint32_t i = 0; i < size; i += 4) {
584      mi_store(b, mi_mem32(__gen_address_offset(dst, i)),
585                      mi_imm(value));
586   }
587}
588
589/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */
590static inline void
591mi_memcpy(struct mi_builder *b, __gen_address_type dst,
592          __gen_address_type src, uint32_t size)
593{
594#if GFX_VERx10 >= 75
595   assert(b->num_math_dwords == 0);
596#endif
597
598   /* This memcpy operates in units of dwords. */
599   assert(size % 4 == 0);
600
601   for (uint32_t i = 0; i < size; i += 4) {
602      struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));
603      struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));
604#if GFX_VERx10 >= 75
605      mi_store(b, dst_val, src_val);
606#else
607      /* IVB does not have a general purpose register for command streamer
608       * commands. Therefore, we use an alternate temporary register.
609       */
610      struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */
611      mi_store(b, tmp_reg, src_val);
612      mi_store(b, dst_val, tmp_reg);
613#endif
614   }
615}
616
617/*
618 * MI_MATH Section.  Only available on Haswell+
619 */
620
621#if GFX_VERx10 >= 75
622
623/**
624 * Perform a predicated store (assuming the condition is already loaded
625 * in the MI_PREDICATE_RESULT register) of the value in src to the memory
626 * location specified by dst.  Non-memory destinations are not supported.
627 *
628 * This function consumes one reference for each of src and dst.
629 */
630static inline void
631mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)
632{
633   assert(!dst.invert && !src.invert);
634
635   mi_builder_flush_math(b);
636
637   /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the
638    * destination to be memory, and resolve the source to a temporary
639    * register if it isn't in one already.
640    */
641   assert(dst.type == MI_VALUE_TYPE_MEM64 ||
642          dst.type == MI_VALUE_TYPE_MEM32);
643
644   if (src.type != MI_VALUE_TYPE_REG32 &&
645       src.type != MI_VALUE_TYPE_REG64) {
646      struct mi_value tmp = mi_new_gpr(b);
647      _mi_copy_no_unref(b, tmp, src);
648      src = tmp;
649   }
650
651   if (dst.type == MI_VALUE_TYPE_MEM64) {
652      mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
653         struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
654         srm.RegisterAddress = reg.num;
655#if GFX_VER >= 11
656         srm.AddCSMMIOStartOffset = reg.cs;
657#endif
658         srm.MemoryAddress = dst.addr;
659         srm.PredicateEnable = true;
660      }
661      mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
662         struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);
663         srm.RegisterAddress = reg.num;
664#if GFX_VER >= 11
665         srm.AddCSMMIOStartOffset = reg.cs;
666#endif
667         srm.MemoryAddress = __gen_address_offset(dst.addr, 4);
668         srm.PredicateEnable = true;
669      }
670   } else {
671      mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
672         struct mi_reg_num reg = mi_adjust_reg_num(src.reg);
673         srm.RegisterAddress = reg.num;
674#if GFX_VER >= 11
675         srm.AddCSMMIOStartOffset = reg.cs;
676#endif
677         srm.MemoryAddress = dst.addr;
678         srm.PredicateEnable = true;
679      }
680   }
681
682   mi_value_unref(b, src);
683   mi_value_unref(b, dst);
684}
685
686static inline void
687_mi_builder_push_math(struct mi_builder *b,
688                      const uint32_t *dwords,
689                      unsigned num_dwords)
690{
691   assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);
692   if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)
693      mi_builder_flush_math(b);
694
695   memcpy(&b->math_dwords[b->num_math_dwords],
696          dwords, num_dwords * sizeof(*dwords));
697   b->num_math_dwords += num_dwords;
698}
699
700static inline uint32_t
701_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)
702{
703   struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
704      .Operand2 = operand2,
705      .Operand1 = operand1,
706      .ALUOpcode = opcode,
707   };
708
709   uint32_t dw;
710   GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
711
712   return dw;
713}
714
715static inline struct mi_value
716mi_value_to_gpr(struct mi_builder *b, struct mi_value val)
717{
718   if (mi_value_is_gpr(val))
719      return val;
720
721   /* Save off the invert flag because it makes copy() grumpy */
722   bool invert = val.invert;
723   val.invert = false;
724
725   struct mi_value tmp = mi_new_gpr(b);
726   _mi_copy_no_unref(b, tmp, val);
727   tmp.invert = invert;
728
729   return tmp;
730}
731
732static inline uint64_t
733mi_value_to_u64(struct mi_value val)
734{
735   assert(val.type == MI_VALUE_TYPE_IMM);
736   return val.invert ? ~val.imm : val.imm;
737}
738
739static inline uint32_t
740_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)
741{
742   if (val->type == MI_VALUE_TYPE_IMM &&
743       (val->imm == 0 || val->imm == UINT64_MAX)) {
744      uint64_t imm = val->invert ? ~val->imm : val->imm;
745      return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);
746   } else {
747      *val = mi_value_to_gpr(b, *val);
748      return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,
749                          src, _mi_value_as_gpr(*val));
750   }
751}
752
753static inline struct mi_value
754mi_math_binop(struct mi_builder *b, uint32_t opcode,
755              struct mi_value src0, struct mi_value src1,
756              uint32_t store_op, uint32_t store_src)
757{
758   struct mi_value dst = mi_new_gpr(b);
759
760   uint32_t dw[4];
761   dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);
762   dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);
763   dw[2] = _mi_pack_alu(opcode, 0, 0);
764   dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);
765   _mi_builder_push_math(b, dw, 4);
766
767   mi_value_unref(b, src0);
768   mi_value_unref(b, src1);
769
770   return dst;
771}
772
773static inline struct mi_value
774mi_inot(struct mi_builder *b, struct mi_value val)
775{
776   if (val.type == MI_VALUE_TYPE_IMM)
777      return mi_imm(~mi_value_to_u64(val));
778
779   val.invert = !val.invert;
780   return val;
781}
782
783static inline struct mi_value
784mi_resolve_invert(struct mi_builder *b, struct mi_value src)
785{
786   if (!src.invert)
787      return src;
788
789   assert(src.type != MI_VALUE_TYPE_IMM);
790   return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
791                           MI_ALU_STORE, MI_ALU_ACCU);
792}
793
794static inline struct mi_value
795mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
796{
797   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
798      return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));
799
800   return mi_math_binop(b, MI_ALU_ADD, src0, src1,
801                           MI_ALU_STORE, MI_ALU_ACCU);
802}
803
804static inline struct mi_value
805mi_iadd_imm(struct mi_builder *b,
806                struct mi_value src, uint64_t N)
807{
808   if (N == 0)
809      return src;
810
811   return mi_iadd(b, src, mi_imm(N));
812}
813
814static inline struct mi_value
815mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
816{
817   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
818      return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));
819
820   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
821                           MI_ALU_STORE, MI_ALU_ACCU);
822}
823
824static inline struct mi_value
825mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
826{
827   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
828      return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);
829
830   /* Compute "equal" by subtracting and storing the zero bit */
831   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
832                            MI_ALU_STORE, MI_ALU_ZF);
833}
834
835static inline struct mi_value
836mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
837{
838   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
839      return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);
840
841   /* Compute "not equal" by subtracting and storing the inverse zero bit */
842   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
843                            MI_ALU_STOREINV, MI_ALU_ZF);
844}
845
846static inline struct mi_value
847mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
848{
849   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
850      return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);
851
852   /* Compute "less than" by subtracting and storing the carry bit */
853   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
854                           MI_ALU_STORE, MI_ALU_CF);
855}
856
857static inline struct mi_value
858mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
859{
860   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
861      return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);
862
863   /* Compute "less than" by subtracting and storing the carry bit */
864   return mi_math_binop(b, MI_ALU_SUB, src0, src1,
865                           MI_ALU_STOREINV, MI_ALU_CF);
866}
867
868static inline struct mi_value
869mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
870{
871   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
872      return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));
873
874   return mi_math_binop(b, MI_ALU_AND, src0, src1,
875                           MI_ALU_STORE, MI_ALU_ACCU);
876}
877
878static inline struct mi_value
879mi_nz(struct mi_builder *b, struct mi_value src)
880{
881   if (src.type == MI_VALUE_TYPE_IMM)
882      return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);
883
884   return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
885                           MI_ALU_STOREINV, MI_ALU_ZF);
886}
887
888static inline struct mi_value
889mi_z(struct mi_builder *b, struct mi_value src)
890{
891   if (src.type == MI_VALUE_TYPE_IMM)
892      return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);
893
894   return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),
895                           MI_ALU_STORE, MI_ALU_ZF);
896}
897
898static inline struct mi_value
899mi_ior(struct mi_builder *b,
900       struct mi_value src0, struct mi_value src1)
901{
902   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
903      return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));
904
905   return mi_math_binop(b, MI_ALU_OR, src0, src1,
906                           MI_ALU_STORE, MI_ALU_ACCU);
907}
908
909#if GFX_VERx10 >= 125
910static inline struct mi_value
911mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
912{
913   if (src1.type == MI_VALUE_TYPE_IMM) {
914      assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
915      assert(mi_value_to_u64(src1) <= 32);
916   }
917
918   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
919      return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));
920
921   return mi_math_binop(b, MI_ALU_SHL, src0, src1,
922                           MI_ALU_STORE, MI_ALU_ACCU);
923}
924
925static inline struct mi_value
926mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
927{
928   if (src1.type == MI_VALUE_TYPE_IMM) {
929      assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
930      assert(mi_value_to_u64(src1) <= 32);
931   }
932
933   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
934      return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));
935
936   return mi_math_binop(b, MI_ALU_SHR, src0, src1,
937                           MI_ALU_STORE, MI_ALU_ACCU);
938}
939
940static inline struct mi_value
941mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
942{
943   if (shift == 0)
944      return src;
945
946   if (shift >= 64)
947      return mi_imm(0);
948
949   if (src.type == MI_VALUE_TYPE_IMM)
950      return mi_imm(mi_value_to_u64(src) >> shift);
951
952   struct mi_value res = mi_value_to_gpr(b, src);
953
954   /* Annoyingly, we only have power-of-two shifts */
955   while (shift) {
956      int bit = u_bit_scan(&shift);
957      assert(bit <= 5);
958      res = mi_ushr(b, res, mi_imm(1 << bit));
959   }
960
961   return res;
962}
963
964static inline struct mi_value
965mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)
966{
967   if (src1.type == MI_VALUE_TYPE_IMM) {
968      assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));
969      assert(mi_value_to_u64(src1) <= 32);
970   }
971
972   if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)
973      return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));
974
975   return mi_math_binop(b, MI_ALU_SAR, src0, src1,
976                            MI_ALU_STORE, MI_ALU_ACCU);
977}
978
979static inline struct mi_value
980mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
981{
982   if (shift == 0)
983      return src;
984
985   if (shift >= 64)
986      return mi_imm(0);
987
988   if (src.type == MI_VALUE_TYPE_IMM)
989      return mi_imm((int64_t)mi_value_to_u64(src) >> shift);
990
991   struct mi_value res = mi_value_to_gpr(b, src);
992
993   /* Annoyingly, we only have power-of-two shifts */
994   while (shift) {
995      int bit = u_bit_scan(&shift);
996      assert(bit <= 5);
997      res = mi_ishr(b, res, mi_imm(1 << bit));
998   }
999
1000   return res;
1001}
1002#endif /* if GFX_VERx10 >= 125 */
1003
1004static inline struct mi_value
1005mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)
1006{
1007   if (src.type == MI_VALUE_TYPE_IMM)
1008      return mi_imm(mi_value_to_u64(src) * N);
1009
1010   if (N == 0) {
1011      mi_value_unref(b, src);
1012      return mi_imm(0);
1013   }
1014
1015   if (N == 1)
1016      return src;
1017
1018   src = mi_value_to_gpr(b, src);
1019
1020   struct mi_value res = mi_value_ref(b, src);
1021
1022   unsigned top_bit = 31 - __builtin_clz(N);
1023   for (int i = top_bit - 1; i >= 0; i--) {
1024      res = mi_iadd(b, res, mi_value_ref(b, res));
1025      if (N & (1 << i))
1026         res = mi_iadd(b, res, mi_value_ref(b, src));
1027   }
1028
1029   mi_value_unref(b, src);
1030
1031   return res;
1032}
1033
1034static inline struct mi_value
1035mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1036{
1037   if (shift == 0)
1038      return src;
1039
1040   if (shift >= 64)
1041      return mi_imm(0);
1042
1043   if (src.type == MI_VALUE_TYPE_IMM)
1044      return mi_imm(mi_value_to_u64(src) << shift);
1045
1046   struct mi_value res = mi_value_to_gpr(b, src);
1047
1048#if GFX_VERx10 >= 125
1049   /* Annoyingly, we only have power-of-two shifts */
1050   while (shift) {
1051      int bit = u_bit_scan(&shift);
1052      assert(bit <= 5);
1053      res = mi_ishl(b, res, mi_imm(1 << bit));
1054   }
1055#else
1056   for (unsigned i = 0; i < shift; i++)
1057      res = mi_iadd(b, res, mi_value_ref(b, res));
1058#endif
1059
1060   return res;
1061}
1062
1063static inline struct mi_value
1064mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)
1065{
1066   if (shift == 0)
1067      return src;
1068
1069   if (shift >= 64)
1070      return mi_imm(0);
1071
1072   /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits
1073    * of the result.
1074    */
1075   if (src.type == MI_VALUE_TYPE_IMM)
1076      return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);
1077
1078   if (shift > 32) {
1079      struct mi_value tmp = mi_new_gpr(b);
1080      _mi_copy_no_unref(b, mi_value_half(tmp, false),
1081                               mi_value_half(src, true));
1082      _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));
1083      mi_value_unref(b, src);
1084      src = tmp;
1085      shift -= 32;
1086   }
1087   assert(shift <= 32);
1088   struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);
1089   struct mi_value dst = mi_new_gpr(b);
1090   _mi_copy_no_unref(b, mi_value_half(dst, false),
1091                            mi_value_half(tmp, true));
1092   _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));
1093   mi_value_unref(b, tmp);
1094   return dst;
1095}
1096
1097static inline struct mi_value
1098mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)
1099{
1100   if (N.type == MI_VALUE_TYPE_IMM) {
1101      assert(mi_value_to_u64(N) <= UINT32_MAX);
1102      return mi_imm(mi_value_to_u64(N) / D);
1103   }
1104
1105   /* We implicitly assume that N is only a 32-bit value */
1106   if (D == 0) {
1107      /* This is invalid but we should do something */
1108      return mi_imm(0);
1109   } else if (util_is_power_of_two_or_zero(D)) {
1110      return mi_ushr32_imm(b, N, util_logbase2(D));
1111   } else {
1112      struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);
1113      assert(m.multiplier <= UINT32_MAX);
1114
1115      if (m.pre_shift)
1116         N = mi_ushr32_imm(b, N, m.pre_shift);
1117
1118      /* Do the 32x32 multiply  into gpr0 */
1119      N = mi_imul_imm(b, N, m.multiplier);
1120
1121      if (m.increment)
1122         N = mi_iadd(b, N, mi_imm(m.multiplier));
1123
1124      N = mi_ushr32_imm(b, N, 32);
1125
1126      if (m.post_shift)
1127         N = mi_ushr32_imm(b, N, m.post_shift);
1128
1129      return N;
1130   }
1131}
1132
1133#endif /* MI_MATH section */
1134
1135/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */
1136#if MI_BUILDER_CAN_WRITE_BATCH
1137
1138struct mi_address_token {
1139   /* Pointers to address memory fields in the batch. */
1140   uint64_t *ptrs[2];
1141};
1142
1143static inline struct mi_address_token
1144mi_store_address(struct mi_builder *b, struct mi_value addr_reg)
1145{
1146   mi_builder_flush_math(b);
1147
1148   assert(addr_reg.type == MI_VALUE_TYPE_REG64);
1149
1150   struct mi_address_token token = {};
1151
1152   for (unsigned i = 0; i < 2; i++) {
1153      mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {
1154         srm.RegisterAddress = addr_reg.reg + (i * 4);
1155
1156         const unsigned addr_dw =
1157            GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;
1158         token.ptrs[i] = (void *)_dst + addr_dw;
1159      }
1160   }
1161
1162   mi_value_unref(b, addr_reg);
1163   return token;
1164}
1165
1166static inline void
1167mi_self_mod_barrier(struct mi_builder *b)
1168{
1169   /* First make sure all the memory writes from previous modifying commands
1170    * have landed. We want to do this before going through the CS cache,
1171    * otherwise we could be fetching memory that hasn't been written to yet.
1172    */
1173   mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
1174      pc.CommandStreamerStallEnable = true;
1175   }
1176   /* Documentation says Gfx11+ should be able to invalidate the command cache
1177    * but experiment show it doesn't work properly, so for now just get over
1178    * the CS prefetch.
1179    */
1180   for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)
1181      mi_builder_emit(b, GENX(MI_NOOP), noop);
1182}
1183
1184static inline void
1185_mi_resolve_address_token(struct mi_builder *b,
1186                          struct mi_address_token token,
1187                          void *batch_location)
1188{
1189   __gen_address_type addr = __gen_get_batch_address(b->user_data,
1190                                                    batch_location);
1191   uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,
1192                                                  addr, 0);
1193   *(token.ptrs[0]) = addr_addr_u64;
1194   *(token.ptrs[1]) = addr_addr_u64 + 4;
1195}
1196
1197#endif /* MI_BUILDER_CAN_WRITE_BATCH */
1198
1199#if GFX_VERx10 >= 125
1200
1201/*
1202 * Indirect load/store.  Only available on XE_HP+
1203 */
1204
1205MUST_CHECK static inline struct mi_value
1206mi_load_mem64_offset(struct mi_builder *b,
1207                     __gen_address_type addr, struct mi_value offset)
1208{
1209   uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1210   struct mi_value addr_val = mi_imm(addr_u64);
1211
1212   struct mi_value dst = mi_new_gpr(b);
1213
1214   uint32_t dw[5];
1215   dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1216   dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1217   dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1218   dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);
1219   dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);
1220   _mi_builder_push_math(b, dw, 5);
1221
1222   mi_value_unref(b, addr_val);
1223   mi_value_unref(b, offset);
1224
1225   return dst;
1226}
1227
1228static inline void
1229mi_store_mem64_offset(struct mi_builder *b,
1230                          __gen_address_type addr, struct mi_value offset,
1231                          struct mi_value data)
1232{
1233   uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);
1234   struct mi_value addr_val = mi_imm(addr_u64);
1235
1236   data = mi_value_to_gpr(b, mi_resolve_invert(b, data));
1237
1238   uint32_t dw[5];
1239   dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);
1240   dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);
1241   dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);
1242   dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));
1243   dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);
1244   _mi_builder_push_math(b, dw, 5);
1245
1246   mi_value_unref(b, addr_val);
1247   mi_value_unref(b, offset);
1248   mi_value_unref(b, data);
1249
1250   /* This is the only math case which has side-effects outside of regular
1251    * registers to flush math afterwards so we don't confuse anyone.
1252    */
1253   mi_builder_flush_math(b);
1254}
1255
1256/*
1257 * Control-flow Section.  Only available on XE_HP+
1258 */
1259
1260struct _mi_goto {
1261   bool predicated;
1262   void *mi_bbs;
1263};
1264
1265struct mi_goto_target {
1266   bool placed;
1267   unsigned num_gotos;
1268   struct _mi_goto gotos[8];
1269   __gen_address_type addr;
1270};
1271
1272#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})
1273
1274#define MI_BUILDER_MI_PREDICATE_RESULT_num  0x2418
1275
1276static inline void
1277mi_goto_if(struct mi_builder *b, struct mi_value cond,
1278           struct mi_goto_target *t)
1279{
1280   /* First, set up the predicate, if any */
1281   bool predicated;
1282   if (cond.type == MI_VALUE_TYPE_IMM) {
1283      /* If it's an immediate, the goto either doesn't happen or happens
1284       * unconditionally.
1285       */
1286      if (mi_value_to_u64(cond) == 0)
1287         return;
1288
1289      assert(mi_value_to_u64(cond) == ~0ull);
1290      predicated = false;
1291   } else if (mi_value_is_reg(cond) &&
1292              cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {
1293      /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client
1294       * provided us with
1295       */
1296      assert(cond.type == MI_VALUE_TYPE_REG32);
1297      predicated = true;
1298   } else {
1299      mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);
1300      predicated = true;
1301   }
1302
1303   if (predicated) {
1304      mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1305         sp.PredicateEnable = NOOPOnResultClear;
1306      }
1307   }
1308   if (t->placed) {
1309      mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {
1310         bbs.PredicationEnable         = predicated;
1311         bbs.AddressSpaceIndicator     = ASI_PPGTT;
1312         bbs.BatchBufferStartAddress   = t->addr;
1313      }
1314   } else {
1315      assert(t->num_gotos < ARRAY_SIZE(t->gotos));
1316      struct _mi_goto g = {
1317         .predicated = predicated,
1318         .mi_bbs = __gen_get_batch_dwords(b->user_data,
1319                                          GENX(MI_BATCH_BUFFER_START_length)),
1320      };
1321      memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));
1322      t->gotos[t->num_gotos++] = g;
1323   }
1324   if (predicated) {
1325      mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1326         sp.PredicateEnable = NOOPNever;
1327      }
1328   }
1329}
1330
1331static inline void
1332mi_goto(struct mi_builder *b, struct mi_goto_target *t)
1333{
1334   mi_goto_if(b, mi_imm(-1), t);
1335}
1336
1337static inline void
1338mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)
1339{
1340   mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {
1341      sp.PredicateEnable = NOOPNever;
1342      t->addr = __gen_get_batch_address(b->user_data, _dst);
1343   }
1344   t->placed = true;
1345
1346   struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };
1347   bbs.AddressSpaceIndicator     = ASI_PPGTT;
1348   bbs.BatchBufferStartAddress   = t->addr;
1349
1350   for (unsigned i = 0; i < t->num_gotos; i++) {
1351      bbs.PredicationEnable = t->gotos[i].predicated;
1352      GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);
1353   }
1354}
1355
1356static inline struct mi_goto_target
1357mi_goto_target_init_and_place(struct mi_builder *b)
1358{
1359   struct mi_goto_target t = MI_GOTO_TARGET_INIT;
1360   mi_goto_target(b, &t);
1361   return t;
1362}
1363
1364#define mi_loop(b) \
1365   for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \
1366        __continue = mi_goto_target_init_and_place(b); !__break.placed; \
1367        mi_goto(b, &__continue), mi_goto_target(b, &__break))
1368
1369#define mi_break(b) mi_goto(b, &__break)
1370#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)
1371#define mi_continue(b) mi_goto(b, &__continue)
1372#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)
1373
1374#endif /* GFX_VERx10 >= 125 */
1375
1376#endif /* MI_BUILDER_H */
1377