1/* 2 * Copyright © 2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#ifndef MI_BUILDER_H 25#define MI_BUILDER_H 26 27#include "dev/intel_device_info.h" 28#include "genxml/genX_bits.h" 29#include "util/bitscan.h" 30#include "util/fast_idiv_by_const.h" 31#include "util/u_math.h" 32 33#ifndef MI_BUILDER_NUM_ALLOC_GPRS 34/** The number of GPRs the MI builder is allowed to allocate 35 * 36 * This may be set by a user of this API so that it can reserve some GPRs at 37 * the top end for its own use. 38 */ 39#define MI_BUILDER_NUM_ALLOC_GPRS 16 40#endif 41 42/** These must be defined by the user of the builder 43 * 44 * void *__gen_get_batch_dwords(__gen_user_data *user_data, 45 * unsigned num_dwords); 46 * 47 * __gen_address_type 48 * __gen_address_offset(__gen_address_type addr, uint64_t offset); 49 * 50 * 51 * If self-modifying batches are supported, we must be able to pass batch 52 * addresses around as void*s so pinning as well as batch chaining or some 53 * other mechanism for ensuring batch pointers remain valid during building is 54 * required. The following function must also be defined, it returns an 55 * address in canonical form: 56 * 57 * __gen_address_type 58 * __gen_get_batch_address(__gen_user_data *user_data, void *location); 59 * 60 * Also, __gen_combine_address must accept a location value of NULL and return 61 * a fully valid 64-bit address. 62 */ 63 64/* 65 * Start of the actual MI builder 66 */ 67 68#define __genxml_cmd_length(cmd) cmd ## _length 69#define __genxml_cmd_header(cmd) cmd ## _header 70#define __genxml_cmd_pack(cmd) cmd ## _pack 71 72#define mi_builder_pack(b, cmd, dst, name) \ 73 for (struct cmd name = { __genxml_cmd_header(cmd) }, \ 74 *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \ 75 __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \ 76 _dst = NULL) 77 78#define mi_builder_emit(b, cmd, name) \ 79 mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name) 80 81 82enum mi_value_type { 83 MI_VALUE_TYPE_IMM, 84 MI_VALUE_TYPE_MEM32, 85 MI_VALUE_TYPE_MEM64, 86 MI_VALUE_TYPE_REG32, 87 MI_VALUE_TYPE_REG64, 88}; 89 90struct mi_value { 91 enum mi_value_type type; 92 93 union { 94 uint64_t imm; 95 __gen_address_type addr; 96 uint32_t reg; 97 }; 98 99#if GFX_VERx10 >= 75 100 bool invert; 101#endif 102}; 103 104struct mi_reg_num { 105 uint32_t num; 106#if GFX_VER >= 11 107 bool cs; 108#endif 109}; 110 111static inline struct mi_reg_num 112mi_adjust_reg_num(uint32_t reg) 113{ 114#if GFX_VER >= 11 115 bool cs = reg >= 0x2000 && reg < 0x4000; 116 return (struct mi_reg_num) { 117 .num = reg - (cs ? 0x2000 : 0), 118 .cs = cs, 119 }; 120#else 121 return (struct mi_reg_num) { .num = reg, }; 122#endif 123} 124 125#if GFX_VER >= 9 126#define MI_BUILDER_MAX_MATH_DWORDS 256 127#else 128#define MI_BUILDER_MAX_MATH_DWORDS 64 129#endif 130 131struct mi_builder { 132 const struct intel_device_info *devinfo; 133 __gen_user_data *user_data; 134 135#if GFX_VERx10 >= 75 136 uint32_t gprs; 137 uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS]; 138 139 unsigned num_math_dwords; 140 uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS]; 141#endif 142}; 143 144static inline void 145mi_builder_init(struct mi_builder *b, 146 const struct intel_device_info *devinfo, 147 __gen_user_data *user_data) 148{ 149 memset(b, 0, sizeof(*b)); 150 b->devinfo = devinfo; 151 b->user_data = user_data; 152 153#if GFX_VERx10 >= 75 154 b->gprs = 0; 155 b->num_math_dwords = 0; 156#endif 157} 158 159static inline void 160mi_builder_flush_math(struct mi_builder *b) 161{ 162#if GFX_VERx10 >= 75 163 if (b->num_math_dwords == 0) 164 return; 165 166 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 167 1 + b->num_math_dwords); 168 mi_builder_pack(b, GENX(MI_MATH), dw, math) { 169 math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias); 170 } 171 memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t)); 172 b->num_math_dwords = 0; 173#endif 174} 175 176#define _MI_BUILDER_GPR_BASE 0x2600 177/* The actual hardware limit on GPRs */ 178#define _MI_BUILDER_NUM_HW_GPRS 16 179 180#if GFX_VERx10 >= 75 181 182static inline bool 183mi_value_is_reg(struct mi_value val) 184{ 185 return val.type == MI_VALUE_TYPE_REG32 || 186 val.type == MI_VALUE_TYPE_REG64; 187} 188 189static inline bool 190mi_value_is_gpr(struct mi_value val) 191{ 192 return mi_value_is_reg(val) && 193 val.reg >= _MI_BUILDER_GPR_BASE && 194 val.reg < _MI_BUILDER_GPR_BASE + 195 _MI_BUILDER_NUM_HW_GPRS * 8; 196} 197 198static inline bool 199_mi_value_is_allocated_gpr(struct mi_value val) 200{ 201 return mi_value_is_reg(val) && 202 val.reg >= _MI_BUILDER_GPR_BASE && 203 val.reg < _MI_BUILDER_GPR_BASE + 204 MI_BUILDER_NUM_ALLOC_GPRS * 8; 205} 206 207static inline uint32_t 208_mi_value_as_gpr(struct mi_value val) 209{ 210 assert(mi_value_is_gpr(val)); 211 assert(val.reg % 8 == 0); 212 return (val.reg - _MI_BUILDER_GPR_BASE) / 8; 213} 214 215static inline struct mi_value 216mi_new_gpr(struct mi_builder *b) 217{ 218 unsigned gpr = ffs(~b->gprs) - 1; 219 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 220 assert(b->gpr_refs[gpr] == 0); 221 b->gprs |= (1u << gpr); 222 b->gpr_refs[gpr] = 1; 223 224 return (struct mi_value) { 225 .type = MI_VALUE_TYPE_REG64, 226 .reg = _MI_BUILDER_GPR_BASE + gpr * 8, 227 }; 228} 229#endif /* GFX_VERx10 >= 75 */ 230 231/** Take a reference to a mi_value 232 * 233 * The MI builder uses reference counting to automatically free ALU GPRs for 234 * re-use in calculations. All mi_* math functions consume the reference 235 * they are handed for each source and return a reference to a value which the 236 * caller must consume. In particular, if you pas the same value into a 237 * single mi_* math function twice (say to add a number to itself), you 238 * are responsible for calling mi_value_ref() to get a second reference 239 * because the mi_* math function will consume it twice. 240 */ 241static inline struct mi_value 242mi_value_ref(struct mi_builder *b, struct mi_value val) 243{ 244#if GFX_VERx10 >= 75 245 if (_mi_value_is_allocated_gpr(val)) { 246 unsigned gpr = _mi_value_as_gpr(val); 247 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 248 assert(b->gprs & (1u << gpr)); 249 assert(b->gpr_refs[gpr] < UINT8_MAX); 250 b->gpr_refs[gpr]++; 251 } 252#endif /* GFX_VERx10 >= 75 */ 253 254 return val; 255} 256 257/** Drop a reference to a mi_value 258 * 259 * See also mi_value_ref. 260 */ 261static inline void 262mi_value_unref(struct mi_builder *b, struct mi_value val) 263{ 264#if GFX_VERx10 >= 75 265 if (_mi_value_is_allocated_gpr(val)) { 266 unsigned gpr = _mi_value_as_gpr(val); 267 assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 268 assert(b->gprs & (1u << gpr)); 269 assert(b->gpr_refs[gpr] > 0); 270 if (--b->gpr_refs[gpr] == 0) 271 b->gprs &= ~(1u << gpr); 272 } 273#endif /* GFX_VERx10 >= 75 */ 274} 275 276static inline struct mi_value 277mi_imm(uint64_t imm) 278{ 279 return (struct mi_value) { 280 .type = MI_VALUE_TYPE_IMM, 281 .imm = imm, 282 }; 283} 284 285static inline struct mi_value 286mi_reg32(uint32_t reg) 287{ 288 struct mi_value val = { 289 .type = MI_VALUE_TYPE_REG32, 290 .reg = reg, 291 }; 292#if GFX_VERx10 >= 75 293 assert(!_mi_value_is_allocated_gpr(val)); 294#endif 295 return val; 296} 297 298static inline struct mi_value 299mi_reg64(uint32_t reg) 300{ 301 struct mi_value val = { 302 .type = MI_VALUE_TYPE_REG64, 303 .reg = reg, 304 }; 305#if GFX_VERx10 >= 75 306 assert(!_mi_value_is_allocated_gpr(val)); 307#endif 308 return val; 309} 310 311static inline struct mi_value 312mi_mem32(__gen_address_type addr) 313{ 314 return (struct mi_value) { 315 .type = MI_VALUE_TYPE_MEM32, 316 .addr = addr, 317 }; 318} 319 320static inline struct mi_value 321mi_mem64(__gen_address_type addr) 322{ 323 return (struct mi_value) { 324 .type = MI_VALUE_TYPE_MEM64, 325 .addr = addr, 326 }; 327} 328 329static inline struct mi_value 330mi_value_half(struct mi_value value, bool top_32_bits) 331{ 332 switch (value.type) { 333 case MI_VALUE_TYPE_IMM: 334 if (top_32_bits) 335 value.imm >>= 32; 336 else 337 value.imm &= 0xffffffffu; 338 return value; 339 340 case MI_VALUE_TYPE_MEM32: 341 assert(!top_32_bits); 342 return value; 343 344 case MI_VALUE_TYPE_MEM64: 345 if (top_32_bits) 346 value.addr = __gen_address_offset(value.addr, 4); 347 value.type = MI_VALUE_TYPE_MEM32; 348 return value; 349 350 case MI_VALUE_TYPE_REG32: 351 assert(!top_32_bits); 352 return value; 353 354 case MI_VALUE_TYPE_REG64: 355 if (top_32_bits) 356 value.reg += 4; 357 value.type = MI_VALUE_TYPE_REG32; 358 return value; 359 } 360 361 unreachable("Invalid mi_value type"); 362} 363 364static inline void 365_mi_copy_no_unref(struct mi_builder *b, 366 struct mi_value dst, struct mi_value src) 367{ 368#if GFX_VERx10 >= 75 369 /* TODO: We could handle src.invert by emitting a bit of math if we really 370 * wanted to. 371 */ 372 assert(!dst.invert && !src.invert); 373#endif 374 mi_builder_flush_math(b); 375 376 switch (dst.type) { 377 case MI_VALUE_TYPE_IMM: 378 unreachable("Cannot copy to an immediate"); 379 380 case MI_VALUE_TYPE_MEM64: 381 case MI_VALUE_TYPE_REG64: 382 switch (src.type) { 383 case MI_VALUE_TYPE_IMM: 384 if (dst.type == MI_VALUE_TYPE_REG64) { 385 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 386 GENX(MI_LOAD_REGISTER_IMM_length) + 2); 387 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 388 mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) { 389 lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 - 390 GENX(MI_LOAD_REGISTER_IMM_length_bias); 391#if GFX_VER >= 11 392 lri.AddCSMMIOStartOffset = reg.cs; 393#endif 394 } 395 dw[1] = reg.num; 396 dw[2] = src.imm; 397 dw[3] = reg.num + 4; 398 dw[4] = src.imm >> 32; 399 } else { 400#if GFX_VER >= 8 401 assert(dst.type == MI_VALUE_TYPE_MEM64); 402 uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 403 GENX(MI_STORE_DATA_IMM_length) + 1); 404 mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) { 405 sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 - 406 GENX(MI_STORE_DATA_IMM_length_bias); 407 sdm.StoreQword = true; 408 sdm.Address = dst.addr; 409 } 410 dw[3] = src.imm; 411 dw[4] = src.imm >> 32; 412#else 413 _mi_copy_no_unref(b, mi_value_half(dst, false), 414 mi_value_half(src, false)); 415 _mi_copy_no_unref(b, mi_value_half(dst, true), 416 mi_value_half(src, true)); 417#endif 418 } 419 break; 420 case MI_VALUE_TYPE_REG32: 421 case MI_VALUE_TYPE_MEM32: 422 _mi_copy_no_unref(b, mi_value_half(dst, false), 423 mi_value_half(src, false)); 424 _mi_copy_no_unref(b, mi_value_half(dst, true), 425 mi_imm(0)); 426 break; 427 case MI_VALUE_TYPE_REG64: 428 case MI_VALUE_TYPE_MEM64: 429 _mi_copy_no_unref(b, mi_value_half(dst, false), 430 mi_value_half(src, false)); 431 _mi_copy_no_unref(b, mi_value_half(dst, true), 432 mi_value_half(src, true)); 433 break; 434 default: 435 unreachable("Invalid mi_value type"); 436 } 437 break; 438 439 case MI_VALUE_TYPE_MEM32: 440 switch (src.type) { 441 case MI_VALUE_TYPE_IMM: 442 mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) { 443 sdi.Address = dst.addr; 444#if GFX_VER >= 12 445 sdi.ForceWriteCompletionCheck = true; 446#endif 447 sdi.ImmediateData = src.imm; 448 } 449 break; 450 451 case MI_VALUE_TYPE_MEM32: 452 case MI_VALUE_TYPE_MEM64: 453#if GFX_VER >= 8 454 mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) { 455 cmm.DestinationMemoryAddress = dst.addr; 456 cmm.SourceMemoryAddress = src.addr; 457 } 458#elif GFX_VERx10 == 75 459 { 460 struct mi_value tmp = mi_new_gpr(b); 461 _mi_copy_no_unref(b, tmp, src); 462 _mi_copy_no_unref(b, dst, tmp); 463 mi_value_unref(b, tmp); 464 } 465#else 466 unreachable("Cannot do mem <-> mem copy on IVB and earlier"); 467#endif 468 break; 469 470 case MI_VALUE_TYPE_REG32: 471 case MI_VALUE_TYPE_REG64: 472 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 473 struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 474 srm.RegisterAddress = reg.num; 475#if GFX_VER >= 11 476 srm.AddCSMMIOStartOffset = reg.cs; 477#endif 478 srm.MemoryAddress = dst.addr; 479 } 480 break; 481 482 default: 483 unreachable("Invalid mi_value type"); 484 } 485 break; 486 487 case MI_VALUE_TYPE_REG32: 488 switch (src.type) { 489 case MI_VALUE_TYPE_IMM: 490 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) { 491 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 492 lri.RegisterOffset = reg.num; 493#if GFX_VER >= 11 494 lri.AddCSMMIOStartOffset = reg.cs; 495#endif 496 lri.DataDWord = src.imm; 497 } 498 break; 499 500 case MI_VALUE_TYPE_MEM32: 501 case MI_VALUE_TYPE_MEM64: 502#if GFX_VER >= 7 503 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) { 504 struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 505 lrm.RegisterAddress = reg.num; 506#if GFX_VER >= 11 507 lrm.AddCSMMIOStartOffset = reg.cs; 508#endif 509 lrm.MemoryAddress = src.addr; 510 } 511#else 512 unreachable("Cannot load do mem -> reg copy on SNB and earlier"); 513#endif 514 break; 515 516 case MI_VALUE_TYPE_REG32: 517 case MI_VALUE_TYPE_REG64: 518#if GFX_VERx10 >= 75 519 if (src.reg != dst.reg) { 520 mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) { 521 struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 522 lrr.SourceRegisterAddress = reg.num; 523#if GFX_VER >= 11 524 lrr.AddCSMMIOStartOffsetSource = reg.cs; 525#endif 526 reg = mi_adjust_reg_num(dst.reg); 527 lrr.DestinationRegisterAddress = reg.num; 528#if GFX_VER >= 11 529 lrr.AddCSMMIOStartOffsetDestination = reg.cs; 530#endif 531 } 532 } 533#else 534 unreachable("Cannot do reg <-> reg copy on IVB and earlier"); 535#endif 536 break; 537 538 default: 539 unreachable("Invalid mi_value type"); 540 } 541 break; 542 543 default: 544 unreachable("Invalid mi_value type"); 545 } 546} 547 548#if GFX_VERx10 >= 75 549static inline struct mi_value 550mi_resolve_invert(struct mi_builder *b, struct mi_value src); 551#endif 552 553/** Store the value in src to the value represented by dst 554 * 555 * If the bit size of src and dst mismatch, this function does an unsigned 556 * integer cast. If src has more bits than dst, it takes the bottom bits. If 557 * src has fewer bits then dst, it fills the top bits with zeros. 558 * 559 * This function consumes one reference for each of src and dst. 560 */ 561static inline void 562mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src) 563{ 564#if GFX_VERx10 >= 75 565 src = mi_resolve_invert(b, src); 566#endif 567 _mi_copy_no_unref(b, dst, src); 568 mi_value_unref(b, src); 569 mi_value_unref(b, dst); 570} 571 572static inline void 573mi_memset(struct mi_builder *b, __gen_address_type dst, 574 uint32_t value, uint32_t size) 575{ 576#if GFX_VERx10 >= 75 577 assert(b->num_math_dwords == 0); 578#endif 579 580 /* This memset operates in units of dwords. */ 581 assert(size % 4 == 0); 582 583 for (uint32_t i = 0; i < size; i += 4) { 584 mi_store(b, mi_mem32(__gen_address_offset(dst, i)), 585 mi_imm(value)); 586 } 587} 588 589/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */ 590static inline void 591mi_memcpy(struct mi_builder *b, __gen_address_type dst, 592 __gen_address_type src, uint32_t size) 593{ 594#if GFX_VERx10 >= 75 595 assert(b->num_math_dwords == 0); 596#endif 597 598 /* This memcpy operates in units of dwords. */ 599 assert(size % 4 == 0); 600 601 for (uint32_t i = 0; i < size; i += 4) { 602 struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i)); 603 struct mi_value src_val = mi_mem32(__gen_address_offset(src, i)); 604#if GFX_VERx10 >= 75 605 mi_store(b, dst_val, src_val); 606#else 607 /* IVB does not have a general purpose register for command streamer 608 * commands. Therefore, we use an alternate temporary register. 609 */ 610 struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */ 611 mi_store(b, tmp_reg, src_val); 612 mi_store(b, dst_val, tmp_reg); 613#endif 614 } 615} 616 617/* 618 * MI_MATH Section. Only available on Haswell+ 619 */ 620 621#if GFX_VERx10 >= 75 622 623/** 624 * Perform a predicated store (assuming the condition is already loaded 625 * in the MI_PREDICATE_RESULT register) of the value in src to the memory 626 * location specified by dst. Non-memory destinations are not supported. 627 * 628 * This function consumes one reference for each of src and dst. 629 */ 630static inline void 631mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src) 632{ 633 assert(!dst.invert && !src.invert); 634 635 mi_builder_flush_math(b); 636 637 /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the 638 * destination to be memory, and resolve the source to a temporary 639 * register if it isn't in one already. 640 */ 641 assert(dst.type == MI_VALUE_TYPE_MEM64 || 642 dst.type == MI_VALUE_TYPE_MEM32); 643 644 if (src.type != MI_VALUE_TYPE_REG32 && 645 src.type != MI_VALUE_TYPE_REG64) { 646 struct mi_value tmp = mi_new_gpr(b); 647 _mi_copy_no_unref(b, tmp, src); 648 src = tmp; 649 } 650 651 if (dst.type == MI_VALUE_TYPE_MEM64) { 652 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 653 struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 654 srm.RegisterAddress = reg.num; 655#if GFX_VER >= 11 656 srm.AddCSMMIOStartOffset = reg.cs; 657#endif 658 srm.MemoryAddress = dst.addr; 659 srm.PredicateEnable = true; 660 } 661 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 662 struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4); 663 srm.RegisterAddress = reg.num; 664#if GFX_VER >= 11 665 srm.AddCSMMIOStartOffset = reg.cs; 666#endif 667 srm.MemoryAddress = __gen_address_offset(dst.addr, 4); 668 srm.PredicateEnable = true; 669 } 670 } else { 671 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 672 struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 673 srm.RegisterAddress = reg.num; 674#if GFX_VER >= 11 675 srm.AddCSMMIOStartOffset = reg.cs; 676#endif 677 srm.MemoryAddress = dst.addr; 678 srm.PredicateEnable = true; 679 } 680 } 681 682 mi_value_unref(b, src); 683 mi_value_unref(b, dst); 684} 685 686static inline void 687_mi_builder_push_math(struct mi_builder *b, 688 const uint32_t *dwords, 689 unsigned num_dwords) 690{ 691 assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS); 692 if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS) 693 mi_builder_flush_math(b); 694 695 memcpy(&b->math_dwords[b->num_math_dwords], 696 dwords, num_dwords * sizeof(*dwords)); 697 b->num_math_dwords += num_dwords; 698} 699 700static inline uint32_t 701_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2) 702{ 703 struct GENX(MI_MATH_ALU_INSTRUCTION) instr = { 704 .Operand2 = operand2, 705 .Operand1 = operand1, 706 .ALUOpcode = opcode, 707 }; 708 709 uint32_t dw; 710 GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr); 711 712 return dw; 713} 714 715static inline struct mi_value 716mi_value_to_gpr(struct mi_builder *b, struct mi_value val) 717{ 718 if (mi_value_is_gpr(val)) 719 return val; 720 721 /* Save off the invert flag because it makes copy() grumpy */ 722 bool invert = val.invert; 723 val.invert = false; 724 725 struct mi_value tmp = mi_new_gpr(b); 726 _mi_copy_no_unref(b, tmp, val); 727 tmp.invert = invert; 728 729 return tmp; 730} 731 732static inline uint64_t 733mi_value_to_u64(struct mi_value val) 734{ 735 assert(val.type == MI_VALUE_TYPE_IMM); 736 return val.invert ? ~val.imm : val.imm; 737} 738 739static inline uint32_t 740_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val) 741{ 742 if (val->type == MI_VALUE_TYPE_IMM && 743 (val->imm == 0 || val->imm == UINT64_MAX)) { 744 uint64_t imm = val->invert ? ~val->imm : val->imm; 745 return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0); 746 } else { 747 *val = mi_value_to_gpr(b, *val); 748 return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD, 749 src, _mi_value_as_gpr(*val)); 750 } 751} 752 753static inline struct mi_value 754mi_math_binop(struct mi_builder *b, uint32_t opcode, 755 struct mi_value src0, struct mi_value src1, 756 uint32_t store_op, uint32_t store_src) 757{ 758 struct mi_value dst = mi_new_gpr(b); 759 760 uint32_t dw[4]; 761 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0); 762 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1); 763 dw[2] = _mi_pack_alu(opcode, 0, 0); 764 dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src); 765 _mi_builder_push_math(b, dw, 4); 766 767 mi_value_unref(b, src0); 768 mi_value_unref(b, src1); 769 770 return dst; 771} 772 773static inline struct mi_value 774mi_inot(struct mi_builder *b, struct mi_value val) 775{ 776 if (val.type == MI_VALUE_TYPE_IMM) 777 return mi_imm(~mi_value_to_u64(val)); 778 779 val.invert = !val.invert; 780 return val; 781} 782 783static inline struct mi_value 784mi_resolve_invert(struct mi_builder *b, struct mi_value src) 785{ 786 if (!src.invert) 787 return src; 788 789 assert(src.type != MI_VALUE_TYPE_IMM); 790 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 791 MI_ALU_STORE, MI_ALU_ACCU); 792} 793 794static inline struct mi_value 795mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 796{ 797 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 798 return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1)); 799 800 return mi_math_binop(b, MI_ALU_ADD, src0, src1, 801 MI_ALU_STORE, MI_ALU_ACCU); 802} 803 804static inline struct mi_value 805mi_iadd_imm(struct mi_builder *b, 806 struct mi_value src, uint64_t N) 807{ 808 if (N == 0) 809 return src; 810 811 return mi_iadd(b, src, mi_imm(N)); 812} 813 814static inline struct mi_value 815mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 816{ 817 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 818 return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1)); 819 820 return mi_math_binop(b, MI_ALU_SUB, src0, src1, 821 MI_ALU_STORE, MI_ALU_ACCU); 822} 823 824static inline struct mi_value 825mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 826{ 827 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 828 return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0); 829 830 /* Compute "equal" by subtracting and storing the zero bit */ 831 return mi_math_binop(b, MI_ALU_SUB, src0, src1, 832 MI_ALU_STORE, MI_ALU_ZF); 833} 834 835static inline struct mi_value 836mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 837{ 838 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 839 return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0); 840 841 /* Compute "not equal" by subtracting and storing the inverse zero bit */ 842 return mi_math_binop(b, MI_ALU_SUB, src0, src1, 843 MI_ALU_STOREINV, MI_ALU_ZF); 844} 845 846static inline struct mi_value 847mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 848{ 849 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 850 return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0); 851 852 /* Compute "less than" by subtracting and storing the carry bit */ 853 return mi_math_binop(b, MI_ALU_SUB, src0, src1, 854 MI_ALU_STORE, MI_ALU_CF); 855} 856 857static inline struct mi_value 858mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 859{ 860 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 861 return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0); 862 863 /* Compute "less than" by subtracting and storing the carry bit */ 864 return mi_math_binop(b, MI_ALU_SUB, src0, src1, 865 MI_ALU_STOREINV, MI_ALU_CF); 866} 867 868static inline struct mi_value 869mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 870{ 871 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 872 return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1)); 873 874 return mi_math_binop(b, MI_ALU_AND, src0, src1, 875 MI_ALU_STORE, MI_ALU_ACCU); 876} 877 878static inline struct mi_value 879mi_nz(struct mi_builder *b, struct mi_value src) 880{ 881 if (src.type == MI_VALUE_TYPE_IMM) 882 return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0); 883 884 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 885 MI_ALU_STOREINV, MI_ALU_ZF); 886} 887 888static inline struct mi_value 889mi_z(struct mi_builder *b, struct mi_value src) 890{ 891 if (src.type == MI_VALUE_TYPE_IMM) 892 return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0); 893 894 return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 895 MI_ALU_STORE, MI_ALU_ZF); 896} 897 898static inline struct mi_value 899mi_ior(struct mi_builder *b, 900 struct mi_value src0, struct mi_value src1) 901{ 902 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 903 return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1)); 904 905 return mi_math_binop(b, MI_ALU_OR, src0, src1, 906 MI_ALU_STORE, MI_ALU_ACCU); 907} 908 909#if GFX_VERx10 >= 125 910static inline struct mi_value 911mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 912{ 913 if (src1.type == MI_VALUE_TYPE_IMM) { 914 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 915 assert(mi_value_to_u64(src1) <= 32); 916 } 917 918 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 919 return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1)); 920 921 return mi_math_binop(b, MI_ALU_SHL, src0, src1, 922 MI_ALU_STORE, MI_ALU_ACCU); 923} 924 925static inline struct mi_value 926mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 927{ 928 if (src1.type == MI_VALUE_TYPE_IMM) { 929 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 930 assert(mi_value_to_u64(src1) <= 32); 931 } 932 933 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 934 return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1)); 935 936 return mi_math_binop(b, MI_ALU_SHR, src0, src1, 937 MI_ALU_STORE, MI_ALU_ACCU); 938} 939 940static inline struct mi_value 941mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 942{ 943 if (shift == 0) 944 return src; 945 946 if (shift >= 64) 947 return mi_imm(0); 948 949 if (src.type == MI_VALUE_TYPE_IMM) 950 return mi_imm(mi_value_to_u64(src) >> shift); 951 952 struct mi_value res = mi_value_to_gpr(b, src); 953 954 /* Annoyingly, we only have power-of-two shifts */ 955 while (shift) { 956 int bit = u_bit_scan(&shift); 957 assert(bit <= 5); 958 res = mi_ushr(b, res, mi_imm(1 << bit)); 959 } 960 961 return res; 962} 963 964static inline struct mi_value 965mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 966{ 967 if (src1.type == MI_VALUE_TYPE_IMM) { 968 assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 969 assert(mi_value_to_u64(src1) <= 32); 970 } 971 972 if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 973 return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1)); 974 975 return mi_math_binop(b, MI_ALU_SAR, src0, src1, 976 MI_ALU_STORE, MI_ALU_ACCU); 977} 978 979static inline struct mi_value 980mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 981{ 982 if (shift == 0) 983 return src; 984 985 if (shift >= 64) 986 return mi_imm(0); 987 988 if (src.type == MI_VALUE_TYPE_IMM) 989 return mi_imm((int64_t)mi_value_to_u64(src) >> shift); 990 991 struct mi_value res = mi_value_to_gpr(b, src); 992 993 /* Annoyingly, we only have power-of-two shifts */ 994 while (shift) { 995 int bit = u_bit_scan(&shift); 996 assert(bit <= 5); 997 res = mi_ishr(b, res, mi_imm(1 << bit)); 998 } 999 1000 return res; 1001} 1002#endif /* if GFX_VERx10 >= 125 */ 1003 1004static inline struct mi_value 1005mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N) 1006{ 1007 if (src.type == MI_VALUE_TYPE_IMM) 1008 return mi_imm(mi_value_to_u64(src) * N); 1009 1010 if (N == 0) { 1011 mi_value_unref(b, src); 1012 return mi_imm(0); 1013 } 1014 1015 if (N == 1) 1016 return src; 1017 1018 src = mi_value_to_gpr(b, src); 1019 1020 struct mi_value res = mi_value_ref(b, src); 1021 1022 unsigned top_bit = 31 - __builtin_clz(N); 1023 for (int i = top_bit - 1; i >= 0; i--) { 1024 res = mi_iadd(b, res, mi_value_ref(b, res)); 1025 if (N & (1 << i)) 1026 res = mi_iadd(b, res, mi_value_ref(b, src)); 1027 } 1028 1029 mi_value_unref(b, src); 1030 1031 return res; 1032} 1033 1034static inline struct mi_value 1035mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 1036{ 1037 if (shift == 0) 1038 return src; 1039 1040 if (shift >= 64) 1041 return mi_imm(0); 1042 1043 if (src.type == MI_VALUE_TYPE_IMM) 1044 return mi_imm(mi_value_to_u64(src) << shift); 1045 1046 struct mi_value res = mi_value_to_gpr(b, src); 1047 1048#if GFX_VERx10 >= 125 1049 /* Annoyingly, we only have power-of-two shifts */ 1050 while (shift) { 1051 int bit = u_bit_scan(&shift); 1052 assert(bit <= 5); 1053 res = mi_ishl(b, res, mi_imm(1 << bit)); 1054 } 1055#else 1056 for (unsigned i = 0; i < shift; i++) 1057 res = mi_iadd(b, res, mi_value_ref(b, res)); 1058#endif 1059 1060 return res; 1061} 1062 1063static inline struct mi_value 1064mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 1065{ 1066 if (shift == 0) 1067 return src; 1068 1069 if (shift >= 64) 1070 return mi_imm(0); 1071 1072 /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits 1073 * of the result. 1074 */ 1075 if (src.type == MI_VALUE_TYPE_IMM) 1076 return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX); 1077 1078 if (shift > 32) { 1079 struct mi_value tmp = mi_new_gpr(b); 1080 _mi_copy_no_unref(b, mi_value_half(tmp, false), 1081 mi_value_half(src, true)); 1082 _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0)); 1083 mi_value_unref(b, src); 1084 src = tmp; 1085 shift -= 32; 1086 } 1087 assert(shift <= 32); 1088 struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift); 1089 struct mi_value dst = mi_new_gpr(b); 1090 _mi_copy_no_unref(b, mi_value_half(dst, false), 1091 mi_value_half(tmp, true)); 1092 _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0)); 1093 mi_value_unref(b, tmp); 1094 return dst; 1095} 1096 1097static inline struct mi_value 1098mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D) 1099{ 1100 if (N.type == MI_VALUE_TYPE_IMM) { 1101 assert(mi_value_to_u64(N) <= UINT32_MAX); 1102 return mi_imm(mi_value_to_u64(N) / D); 1103 } 1104 1105 /* We implicitly assume that N is only a 32-bit value */ 1106 if (D == 0) { 1107 /* This is invalid but we should do something */ 1108 return mi_imm(0); 1109 } else if (util_is_power_of_two_or_zero(D)) { 1110 return mi_ushr32_imm(b, N, util_logbase2(D)); 1111 } else { 1112 struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32); 1113 assert(m.multiplier <= UINT32_MAX); 1114 1115 if (m.pre_shift) 1116 N = mi_ushr32_imm(b, N, m.pre_shift); 1117 1118 /* Do the 32x32 multiply into gpr0 */ 1119 N = mi_imul_imm(b, N, m.multiplier); 1120 1121 if (m.increment) 1122 N = mi_iadd(b, N, mi_imm(m.multiplier)); 1123 1124 N = mi_ushr32_imm(b, N, 32); 1125 1126 if (m.post_shift) 1127 N = mi_ushr32_imm(b, N, m.post_shift); 1128 1129 return N; 1130 } 1131} 1132 1133#endif /* MI_MATH section */ 1134 1135/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */ 1136#if MI_BUILDER_CAN_WRITE_BATCH 1137 1138struct mi_address_token { 1139 /* Pointers to address memory fields in the batch. */ 1140 uint64_t *ptrs[2]; 1141}; 1142 1143static inline struct mi_address_token 1144mi_store_address(struct mi_builder *b, struct mi_value addr_reg) 1145{ 1146 mi_builder_flush_math(b); 1147 1148 assert(addr_reg.type == MI_VALUE_TYPE_REG64); 1149 1150 struct mi_address_token token = {}; 1151 1152 for (unsigned i = 0; i < 2; i++) { 1153 mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 1154 srm.RegisterAddress = addr_reg.reg + (i * 4); 1155 1156 const unsigned addr_dw = 1157 GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8; 1158 token.ptrs[i] = (void *)_dst + addr_dw; 1159 } 1160 } 1161 1162 mi_value_unref(b, addr_reg); 1163 return token; 1164} 1165 1166static inline void 1167mi_self_mod_barrier(struct mi_builder *b) 1168{ 1169 /* First make sure all the memory writes from previous modifying commands 1170 * have landed. We want to do this before going through the CS cache, 1171 * otherwise we could be fetching memory that hasn't been written to yet. 1172 */ 1173 mi_builder_emit(b, GENX(PIPE_CONTROL), pc) { 1174 pc.CommandStreamerStallEnable = true; 1175 } 1176 /* Documentation says Gfx11+ should be able to invalidate the command cache 1177 * but experiment show it doesn't work properly, so for now just get over 1178 * the CS prefetch. 1179 */ 1180 for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++) 1181 mi_builder_emit(b, GENX(MI_NOOP), noop); 1182} 1183 1184static inline void 1185_mi_resolve_address_token(struct mi_builder *b, 1186 struct mi_address_token token, 1187 void *batch_location) 1188{ 1189 __gen_address_type addr = __gen_get_batch_address(b->user_data, 1190 batch_location); 1191 uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location, 1192 addr, 0); 1193 *(token.ptrs[0]) = addr_addr_u64; 1194 *(token.ptrs[1]) = addr_addr_u64 + 4; 1195} 1196 1197#endif /* MI_BUILDER_CAN_WRITE_BATCH */ 1198 1199#if GFX_VERx10 >= 125 1200 1201/* 1202 * Indirect load/store. Only available on XE_HP+ 1203 */ 1204 1205MUST_CHECK static inline struct mi_value 1206mi_load_mem64_offset(struct mi_builder *b, 1207 __gen_address_type addr, struct mi_value offset) 1208{ 1209 uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0); 1210 struct mi_value addr_val = mi_imm(addr_u64); 1211 1212 struct mi_value dst = mi_new_gpr(b); 1213 1214 uint32_t dw[5]; 1215 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val); 1216 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset); 1217 dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0); 1218 dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU); 1219 dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0); 1220 _mi_builder_push_math(b, dw, 5); 1221 1222 mi_value_unref(b, addr_val); 1223 mi_value_unref(b, offset); 1224 1225 return dst; 1226} 1227 1228static inline void 1229mi_store_mem64_offset(struct mi_builder *b, 1230 __gen_address_type addr, struct mi_value offset, 1231 struct mi_value data) 1232{ 1233 uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0); 1234 struct mi_value addr_val = mi_imm(addr_u64); 1235 1236 data = mi_value_to_gpr(b, mi_resolve_invert(b, data)); 1237 1238 uint32_t dw[5]; 1239 dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val); 1240 dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset); 1241 dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0); 1242 dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data)); 1243 dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0); 1244 _mi_builder_push_math(b, dw, 5); 1245 1246 mi_value_unref(b, addr_val); 1247 mi_value_unref(b, offset); 1248 mi_value_unref(b, data); 1249 1250 /* This is the only math case which has side-effects outside of regular 1251 * registers to flush math afterwards so we don't confuse anyone. 1252 */ 1253 mi_builder_flush_math(b); 1254} 1255 1256/* 1257 * Control-flow Section. Only available on XE_HP+ 1258 */ 1259 1260struct _mi_goto { 1261 bool predicated; 1262 void *mi_bbs; 1263}; 1264 1265struct mi_goto_target { 1266 bool placed; 1267 unsigned num_gotos; 1268 struct _mi_goto gotos[8]; 1269 __gen_address_type addr; 1270}; 1271 1272#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {}) 1273 1274#define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418 1275 1276static inline void 1277mi_goto_if(struct mi_builder *b, struct mi_value cond, 1278 struct mi_goto_target *t) 1279{ 1280 /* First, set up the predicate, if any */ 1281 bool predicated; 1282 if (cond.type == MI_VALUE_TYPE_IMM) { 1283 /* If it's an immediate, the goto either doesn't happen or happens 1284 * unconditionally. 1285 */ 1286 if (mi_value_to_u64(cond) == 0) 1287 return; 1288 1289 assert(mi_value_to_u64(cond) == ~0ull); 1290 predicated = false; 1291 } else if (mi_value_is_reg(cond) && 1292 cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) { 1293 /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client 1294 * provided us with 1295 */ 1296 assert(cond.type == MI_VALUE_TYPE_REG32); 1297 predicated = true; 1298 } else { 1299 mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond); 1300 predicated = true; 1301 } 1302 1303 if (predicated) { 1304 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 1305 sp.PredicateEnable = NOOPOnResultClear; 1306 } 1307 } 1308 if (t->placed) { 1309 mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) { 1310 bbs.PredicationEnable = predicated; 1311 bbs.AddressSpaceIndicator = ASI_PPGTT; 1312 bbs.BatchBufferStartAddress = t->addr; 1313 } 1314 } else { 1315 assert(t->num_gotos < ARRAY_SIZE(t->gotos)); 1316 struct _mi_goto g = { 1317 .predicated = predicated, 1318 .mi_bbs = __gen_get_batch_dwords(b->user_data, 1319 GENX(MI_BATCH_BUFFER_START_length)), 1320 }; 1321 memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length)); 1322 t->gotos[t->num_gotos++] = g; 1323 } 1324 if (predicated) { 1325 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 1326 sp.PredicateEnable = NOOPNever; 1327 } 1328 } 1329} 1330 1331static inline void 1332mi_goto(struct mi_builder *b, struct mi_goto_target *t) 1333{ 1334 mi_goto_if(b, mi_imm(-1), t); 1335} 1336 1337static inline void 1338mi_goto_target(struct mi_builder *b, struct mi_goto_target *t) 1339{ 1340 mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 1341 sp.PredicateEnable = NOOPNever; 1342 t->addr = __gen_get_batch_address(b->user_data, _dst); 1343 } 1344 t->placed = true; 1345 1346 struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) }; 1347 bbs.AddressSpaceIndicator = ASI_PPGTT; 1348 bbs.BatchBufferStartAddress = t->addr; 1349 1350 for (unsigned i = 0; i < t->num_gotos; i++) { 1351 bbs.PredicationEnable = t->gotos[i].predicated; 1352 GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs); 1353 } 1354} 1355 1356static inline struct mi_goto_target 1357mi_goto_target_init_and_place(struct mi_builder *b) 1358{ 1359 struct mi_goto_target t = MI_GOTO_TARGET_INIT; 1360 mi_goto_target(b, &t); 1361 return t; 1362} 1363 1364#define mi_loop(b) \ 1365 for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \ 1366 __continue = mi_goto_target_init_and_place(b); !__break.placed; \ 1367 mi_goto(b, &__continue), mi_goto_target(b, &__break)) 1368 1369#define mi_break(b) mi_goto(b, &__break) 1370#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break) 1371#define mi_continue(b) mi_goto(b, &__continue) 1372#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue) 1373 1374#endif /* GFX_VERx10 >= 125 */ 1375 1376#endif /* MI_BUILDER_H */ 1377