1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#ifndef MI_BUILDER_H 25bf215546Sopenharmony_ci#define MI_BUILDER_H 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "dev/intel_device_info.h" 28bf215546Sopenharmony_ci#include "genxml/genX_bits.h" 29bf215546Sopenharmony_ci#include "util/bitscan.h" 30bf215546Sopenharmony_ci#include "util/fast_idiv_by_const.h" 31bf215546Sopenharmony_ci#include "util/u_math.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#ifndef MI_BUILDER_NUM_ALLOC_GPRS 34bf215546Sopenharmony_ci/** The number of GPRs the MI builder is allowed to allocate 35bf215546Sopenharmony_ci * 36bf215546Sopenharmony_ci * This may be set by a user of this API so that it can reserve some GPRs at 37bf215546Sopenharmony_ci * the top end for its own use. 38bf215546Sopenharmony_ci */ 39bf215546Sopenharmony_ci#define MI_BUILDER_NUM_ALLOC_GPRS 16 40bf215546Sopenharmony_ci#endif 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci/** These must be defined by the user of the builder 43bf215546Sopenharmony_ci * 44bf215546Sopenharmony_ci * void *__gen_get_batch_dwords(__gen_user_data *user_data, 45bf215546Sopenharmony_ci * unsigned num_dwords); 46bf215546Sopenharmony_ci * 47bf215546Sopenharmony_ci * __gen_address_type 48bf215546Sopenharmony_ci * __gen_address_offset(__gen_address_type addr, uint64_t offset); 49bf215546Sopenharmony_ci * 50bf215546Sopenharmony_ci * 51bf215546Sopenharmony_ci * If self-modifying batches are supported, we must be able to pass batch 52bf215546Sopenharmony_ci * addresses around as void*s so pinning as well as batch chaining or some 53bf215546Sopenharmony_ci * other mechanism for ensuring batch pointers remain valid during building is 54bf215546Sopenharmony_ci * required. The following function must also be defined, it returns an 55bf215546Sopenharmony_ci * address in canonical form: 56bf215546Sopenharmony_ci * 57bf215546Sopenharmony_ci * __gen_address_type 58bf215546Sopenharmony_ci * __gen_get_batch_address(__gen_user_data *user_data, void *location); 59bf215546Sopenharmony_ci * 60bf215546Sopenharmony_ci * Also, __gen_combine_address must accept a location value of NULL and return 61bf215546Sopenharmony_ci * a fully valid 64-bit address. 62bf215546Sopenharmony_ci */ 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci/* 65bf215546Sopenharmony_ci * Start of the actual MI builder 66bf215546Sopenharmony_ci */ 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci#define __genxml_cmd_length(cmd) cmd ## _length 69bf215546Sopenharmony_ci#define __genxml_cmd_header(cmd) cmd ## _header 70bf215546Sopenharmony_ci#define __genxml_cmd_pack(cmd) cmd ## _pack 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci#define mi_builder_pack(b, cmd, dst, name) \ 73bf215546Sopenharmony_ci for (struct cmd name = { __genxml_cmd_header(cmd) }, \ 74bf215546Sopenharmony_ci *_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \ 75bf215546Sopenharmony_ci __genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \ 76bf215546Sopenharmony_ci _dst = NULL) 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci#define mi_builder_emit(b, cmd, name) \ 79bf215546Sopenharmony_ci mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name) 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_cienum mi_value_type { 83bf215546Sopenharmony_ci MI_VALUE_TYPE_IMM, 84bf215546Sopenharmony_ci MI_VALUE_TYPE_MEM32, 85bf215546Sopenharmony_ci MI_VALUE_TYPE_MEM64, 86bf215546Sopenharmony_ci MI_VALUE_TYPE_REG32, 87bf215546Sopenharmony_ci MI_VALUE_TYPE_REG64, 88bf215546Sopenharmony_ci}; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_cistruct mi_value { 91bf215546Sopenharmony_ci enum mi_value_type type; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci union { 94bf215546Sopenharmony_ci uint64_t imm; 95bf215546Sopenharmony_ci __gen_address_type addr; 96bf215546Sopenharmony_ci uint32_t reg; 97bf215546Sopenharmony_ci }; 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 100bf215546Sopenharmony_ci bool invert; 101bf215546Sopenharmony_ci#endif 102bf215546Sopenharmony_ci}; 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_cistruct mi_reg_num { 105bf215546Sopenharmony_ci uint32_t num; 106bf215546Sopenharmony_ci#if GFX_VER >= 11 107bf215546Sopenharmony_ci bool cs; 108bf215546Sopenharmony_ci#endif 109bf215546Sopenharmony_ci}; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_cistatic inline struct mi_reg_num 112bf215546Sopenharmony_cimi_adjust_reg_num(uint32_t reg) 113bf215546Sopenharmony_ci{ 114bf215546Sopenharmony_ci#if GFX_VER >= 11 115bf215546Sopenharmony_ci bool cs = reg >= 0x2000 && reg < 0x4000; 116bf215546Sopenharmony_ci return (struct mi_reg_num) { 117bf215546Sopenharmony_ci .num = reg - (cs ? 0x2000 : 0), 118bf215546Sopenharmony_ci .cs = cs, 119bf215546Sopenharmony_ci }; 120bf215546Sopenharmony_ci#else 121bf215546Sopenharmony_ci return (struct mi_reg_num) { .num = reg, }; 122bf215546Sopenharmony_ci#endif 123bf215546Sopenharmony_ci} 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci#if GFX_VER >= 9 126bf215546Sopenharmony_ci#define MI_BUILDER_MAX_MATH_DWORDS 256 127bf215546Sopenharmony_ci#else 128bf215546Sopenharmony_ci#define MI_BUILDER_MAX_MATH_DWORDS 64 129bf215546Sopenharmony_ci#endif 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_cistruct mi_builder { 132bf215546Sopenharmony_ci const struct intel_device_info *devinfo; 133bf215546Sopenharmony_ci __gen_user_data *user_data; 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 136bf215546Sopenharmony_ci uint32_t gprs; 137bf215546Sopenharmony_ci uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS]; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci unsigned num_math_dwords; 140bf215546Sopenharmony_ci uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS]; 141bf215546Sopenharmony_ci#endif 142bf215546Sopenharmony_ci}; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_cistatic inline void 145bf215546Sopenharmony_cimi_builder_init(struct mi_builder *b, 146bf215546Sopenharmony_ci const struct intel_device_info *devinfo, 147bf215546Sopenharmony_ci __gen_user_data *user_data) 148bf215546Sopenharmony_ci{ 149bf215546Sopenharmony_ci memset(b, 0, sizeof(*b)); 150bf215546Sopenharmony_ci b->devinfo = devinfo; 151bf215546Sopenharmony_ci b->user_data = user_data; 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 154bf215546Sopenharmony_ci b->gprs = 0; 155bf215546Sopenharmony_ci b->num_math_dwords = 0; 156bf215546Sopenharmony_ci#endif 157bf215546Sopenharmony_ci} 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_cistatic inline void 160bf215546Sopenharmony_cimi_builder_flush_math(struct mi_builder *b) 161bf215546Sopenharmony_ci{ 162bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 163bf215546Sopenharmony_ci if (b->num_math_dwords == 0) 164bf215546Sopenharmony_ci return; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 167bf215546Sopenharmony_ci 1 + b->num_math_dwords); 168bf215546Sopenharmony_ci mi_builder_pack(b, GENX(MI_MATH), dw, math) { 169bf215546Sopenharmony_ci math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias); 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t)); 172bf215546Sopenharmony_ci b->num_math_dwords = 0; 173bf215546Sopenharmony_ci#endif 174bf215546Sopenharmony_ci} 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci#define _MI_BUILDER_GPR_BASE 0x2600 177bf215546Sopenharmony_ci/* The actual hardware limit on GPRs */ 178bf215546Sopenharmony_ci#define _MI_BUILDER_NUM_HW_GPRS 16 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_cistatic inline bool 183bf215546Sopenharmony_cimi_value_is_reg(struct mi_value val) 184bf215546Sopenharmony_ci{ 185bf215546Sopenharmony_ci return val.type == MI_VALUE_TYPE_REG32 || 186bf215546Sopenharmony_ci val.type == MI_VALUE_TYPE_REG64; 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_cistatic inline bool 190bf215546Sopenharmony_cimi_value_is_gpr(struct mi_value val) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci return mi_value_is_reg(val) && 193bf215546Sopenharmony_ci val.reg >= _MI_BUILDER_GPR_BASE && 194bf215546Sopenharmony_ci val.reg < _MI_BUILDER_GPR_BASE + 195bf215546Sopenharmony_ci _MI_BUILDER_NUM_HW_GPRS * 8; 196bf215546Sopenharmony_ci} 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_cistatic inline bool 199bf215546Sopenharmony_ci_mi_value_is_allocated_gpr(struct mi_value val) 200bf215546Sopenharmony_ci{ 201bf215546Sopenharmony_ci return mi_value_is_reg(val) && 202bf215546Sopenharmony_ci val.reg >= _MI_BUILDER_GPR_BASE && 203bf215546Sopenharmony_ci val.reg < _MI_BUILDER_GPR_BASE + 204bf215546Sopenharmony_ci MI_BUILDER_NUM_ALLOC_GPRS * 8; 205bf215546Sopenharmony_ci} 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_cistatic inline uint32_t 208bf215546Sopenharmony_ci_mi_value_as_gpr(struct mi_value val) 209bf215546Sopenharmony_ci{ 210bf215546Sopenharmony_ci assert(mi_value_is_gpr(val)); 211bf215546Sopenharmony_ci assert(val.reg % 8 == 0); 212bf215546Sopenharmony_ci return (val.reg - _MI_BUILDER_GPR_BASE) / 8; 213bf215546Sopenharmony_ci} 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_cistatic inline struct mi_value 216bf215546Sopenharmony_cimi_new_gpr(struct mi_builder *b) 217bf215546Sopenharmony_ci{ 218bf215546Sopenharmony_ci unsigned gpr = ffs(~b->gprs) - 1; 219bf215546Sopenharmony_ci assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 220bf215546Sopenharmony_ci assert(b->gpr_refs[gpr] == 0); 221bf215546Sopenharmony_ci b->gprs |= (1u << gpr); 222bf215546Sopenharmony_ci b->gpr_refs[gpr] = 1; 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci return (struct mi_value) { 225bf215546Sopenharmony_ci .type = MI_VALUE_TYPE_REG64, 226bf215546Sopenharmony_ci .reg = _MI_BUILDER_GPR_BASE + gpr * 8, 227bf215546Sopenharmony_ci }; 228bf215546Sopenharmony_ci} 229bf215546Sopenharmony_ci#endif /* GFX_VERx10 >= 75 */ 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci/** Take a reference to a mi_value 232bf215546Sopenharmony_ci * 233bf215546Sopenharmony_ci * The MI builder uses reference counting to automatically free ALU GPRs for 234bf215546Sopenharmony_ci * re-use in calculations. All mi_* math functions consume the reference 235bf215546Sopenharmony_ci * they are handed for each source and return a reference to a value which the 236bf215546Sopenharmony_ci * caller must consume. In particular, if you pas the same value into a 237bf215546Sopenharmony_ci * single mi_* math function twice (say to add a number to itself), you 238bf215546Sopenharmony_ci * are responsible for calling mi_value_ref() to get a second reference 239bf215546Sopenharmony_ci * because the mi_* math function will consume it twice. 240bf215546Sopenharmony_ci */ 241bf215546Sopenharmony_cistatic inline struct mi_value 242bf215546Sopenharmony_cimi_value_ref(struct mi_builder *b, struct mi_value val) 243bf215546Sopenharmony_ci{ 244bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 245bf215546Sopenharmony_ci if (_mi_value_is_allocated_gpr(val)) { 246bf215546Sopenharmony_ci unsigned gpr = _mi_value_as_gpr(val); 247bf215546Sopenharmony_ci assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 248bf215546Sopenharmony_ci assert(b->gprs & (1u << gpr)); 249bf215546Sopenharmony_ci assert(b->gpr_refs[gpr] < UINT8_MAX); 250bf215546Sopenharmony_ci b->gpr_refs[gpr]++; 251bf215546Sopenharmony_ci } 252bf215546Sopenharmony_ci#endif /* GFX_VERx10 >= 75 */ 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci return val; 255bf215546Sopenharmony_ci} 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci/** Drop a reference to a mi_value 258bf215546Sopenharmony_ci * 259bf215546Sopenharmony_ci * See also mi_value_ref. 260bf215546Sopenharmony_ci */ 261bf215546Sopenharmony_cistatic inline void 262bf215546Sopenharmony_cimi_value_unref(struct mi_builder *b, struct mi_value val) 263bf215546Sopenharmony_ci{ 264bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 265bf215546Sopenharmony_ci if (_mi_value_is_allocated_gpr(val)) { 266bf215546Sopenharmony_ci unsigned gpr = _mi_value_as_gpr(val); 267bf215546Sopenharmony_ci assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS); 268bf215546Sopenharmony_ci assert(b->gprs & (1u << gpr)); 269bf215546Sopenharmony_ci assert(b->gpr_refs[gpr] > 0); 270bf215546Sopenharmony_ci if (--b->gpr_refs[gpr] == 0) 271bf215546Sopenharmony_ci b->gprs &= ~(1u << gpr); 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci#endif /* GFX_VERx10 >= 75 */ 274bf215546Sopenharmony_ci} 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_cistatic inline struct mi_value 277bf215546Sopenharmony_cimi_imm(uint64_t imm) 278bf215546Sopenharmony_ci{ 279bf215546Sopenharmony_ci return (struct mi_value) { 280bf215546Sopenharmony_ci .type = MI_VALUE_TYPE_IMM, 281bf215546Sopenharmony_ci .imm = imm, 282bf215546Sopenharmony_ci }; 283bf215546Sopenharmony_ci} 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_cistatic inline struct mi_value 286bf215546Sopenharmony_cimi_reg32(uint32_t reg) 287bf215546Sopenharmony_ci{ 288bf215546Sopenharmony_ci struct mi_value val = { 289bf215546Sopenharmony_ci .type = MI_VALUE_TYPE_REG32, 290bf215546Sopenharmony_ci .reg = reg, 291bf215546Sopenharmony_ci }; 292bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 293bf215546Sopenharmony_ci assert(!_mi_value_is_allocated_gpr(val)); 294bf215546Sopenharmony_ci#endif 295bf215546Sopenharmony_ci return val; 296bf215546Sopenharmony_ci} 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_cistatic inline struct mi_value 299bf215546Sopenharmony_cimi_reg64(uint32_t reg) 300bf215546Sopenharmony_ci{ 301bf215546Sopenharmony_ci struct mi_value val = { 302bf215546Sopenharmony_ci .type = MI_VALUE_TYPE_REG64, 303bf215546Sopenharmony_ci .reg = reg, 304bf215546Sopenharmony_ci }; 305bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 306bf215546Sopenharmony_ci assert(!_mi_value_is_allocated_gpr(val)); 307bf215546Sopenharmony_ci#endif 308bf215546Sopenharmony_ci return val; 309bf215546Sopenharmony_ci} 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_cistatic inline struct mi_value 312bf215546Sopenharmony_cimi_mem32(__gen_address_type addr) 313bf215546Sopenharmony_ci{ 314bf215546Sopenharmony_ci return (struct mi_value) { 315bf215546Sopenharmony_ci .type = MI_VALUE_TYPE_MEM32, 316bf215546Sopenharmony_ci .addr = addr, 317bf215546Sopenharmony_ci }; 318bf215546Sopenharmony_ci} 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_cistatic inline struct mi_value 321bf215546Sopenharmony_cimi_mem64(__gen_address_type addr) 322bf215546Sopenharmony_ci{ 323bf215546Sopenharmony_ci return (struct mi_value) { 324bf215546Sopenharmony_ci .type = MI_VALUE_TYPE_MEM64, 325bf215546Sopenharmony_ci .addr = addr, 326bf215546Sopenharmony_ci }; 327bf215546Sopenharmony_ci} 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_cistatic inline struct mi_value 330bf215546Sopenharmony_cimi_value_half(struct mi_value value, bool top_32_bits) 331bf215546Sopenharmony_ci{ 332bf215546Sopenharmony_ci switch (value.type) { 333bf215546Sopenharmony_ci case MI_VALUE_TYPE_IMM: 334bf215546Sopenharmony_ci if (top_32_bits) 335bf215546Sopenharmony_ci value.imm >>= 32; 336bf215546Sopenharmony_ci else 337bf215546Sopenharmony_ci value.imm &= 0xffffffffu; 338bf215546Sopenharmony_ci return value; 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM32: 341bf215546Sopenharmony_ci assert(!top_32_bits); 342bf215546Sopenharmony_ci return value; 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM64: 345bf215546Sopenharmony_ci if (top_32_bits) 346bf215546Sopenharmony_ci value.addr = __gen_address_offset(value.addr, 4); 347bf215546Sopenharmony_ci value.type = MI_VALUE_TYPE_MEM32; 348bf215546Sopenharmony_ci return value; 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG32: 351bf215546Sopenharmony_ci assert(!top_32_bits); 352bf215546Sopenharmony_ci return value; 353bf215546Sopenharmony_ci 354bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG64: 355bf215546Sopenharmony_ci if (top_32_bits) 356bf215546Sopenharmony_ci value.reg += 4; 357bf215546Sopenharmony_ci value.type = MI_VALUE_TYPE_REG32; 358bf215546Sopenharmony_ci return value; 359bf215546Sopenharmony_ci } 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci unreachable("Invalid mi_value type"); 362bf215546Sopenharmony_ci} 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_cistatic inline void 365bf215546Sopenharmony_ci_mi_copy_no_unref(struct mi_builder *b, 366bf215546Sopenharmony_ci struct mi_value dst, struct mi_value src) 367bf215546Sopenharmony_ci{ 368bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 369bf215546Sopenharmony_ci /* TODO: We could handle src.invert by emitting a bit of math if we really 370bf215546Sopenharmony_ci * wanted to. 371bf215546Sopenharmony_ci */ 372bf215546Sopenharmony_ci assert(!dst.invert && !src.invert); 373bf215546Sopenharmony_ci#endif 374bf215546Sopenharmony_ci mi_builder_flush_math(b); 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci switch (dst.type) { 377bf215546Sopenharmony_ci case MI_VALUE_TYPE_IMM: 378bf215546Sopenharmony_ci unreachable("Cannot copy to an immediate"); 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM64: 381bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG64: 382bf215546Sopenharmony_ci switch (src.type) { 383bf215546Sopenharmony_ci case MI_VALUE_TYPE_IMM: 384bf215546Sopenharmony_ci if (dst.type == MI_VALUE_TYPE_REG64) { 385bf215546Sopenharmony_ci uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 386bf215546Sopenharmony_ci GENX(MI_LOAD_REGISTER_IMM_length) + 2); 387bf215546Sopenharmony_ci struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 388bf215546Sopenharmony_ci mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) { 389bf215546Sopenharmony_ci lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 - 390bf215546Sopenharmony_ci GENX(MI_LOAD_REGISTER_IMM_length_bias); 391bf215546Sopenharmony_ci#if GFX_VER >= 11 392bf215546Sopenharmony_ci lri.AddCSMMIOStartOffset = reg.cs; 393bf215546Sopenharmony_ci#endif 394bf215546Sopenharmony_ci } 395bf215546Sopenharmony_ci dw[1] = reg.num; 396bf215546Sopenharmony_ci dw[2] = src.imm; 397bf215546Sopenharmony_ci dw[3] = reg.num + 4; 398bf215546Sopenharmony_ci dw[4] = src.imm >> 32; 399bf215546Sopenharmony_ci } else { 400bf215546Sopenharmony_ci#if GFX_VER >= 8 401bf215546Sopenharmony_ci assert(dst.type == MI_VALUE_TYPE_MEM64); 402bf215546Sopenharmony_ci uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data, 403bf215546Sopenharmony_ci GENX(MI_STORE_DATA_IMM_length) + 1); 404bf215546Sopenharmony_ci mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) { 405bf215546Sopenharmony_ci sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 - 406bf215546Sopenharmony_ci GENX(MI_STORE_DATA_IMM_length_bias); 407bf215546Sopenharmony_ci sdm.StoreQword = true; 408bf215546Sopenharmony_ci sdm.Address = dst.addr; 409bf215546Sopenharmony_ci } 410bf215546Sopenharmony_ci dw[3] = src.imm; 411bf215546Sopenharmony_ci dw[4] = src.imm >> 32; 412bf215546Sopenharmony_ci#else 413bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(dst, false), 414bf215546Sopenharmony_ci mi_value_half(src, false)); 415bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(dst, true), 416bf215546Sopenharmony_ci mi_value_half(src, true)); 417bf215546Sopenharmony_ci#endif 418bf215546Sopenharmony_ci } 419bf215546Sopenharmony_ci break; 420bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG32: 421bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM32: 422bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(dst, false), 423bf215546Sopenharmony_ci mi_value_half(src, false)); 424bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(dst, true), 425bf215546Sopenharmony_ci mi_imm(0)); 426bf215546Sopenharmony_ci break; 427bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG64: 428bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM64: 429bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(dst, false), 430bf215546Sopenharmony_ci mi_value_half(src, false)); 431bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(dst, true), 432bf215546Sopenharmony_ci mi_value_half(src, true)); 433bf215546Sopenharmony_ci break; 434bf215546Sopenharmony_ci default: 435bf215546Sopenharmony_ci unreachable("Invalid mi_value type"); 436bf215546Sopenharmony_ci } 437bf215546Sopenharmony_ci break; 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM32: 440bf215546Sopenharmony_ci switch (src.type) { 441bf215546Sopenharmony_ci case MI_VALUE_TYPE_IMM: 442bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) { 443bf215546Sopenharmony_ci sdi.Address = dst.addr; 444bf215546Sopenharmony_ci#if GFX_VER >= 12 445bf215546Sopenharmony_ci sdi.ForceWriteCompletionCheck = true; 446bf215546Sopenharmony_ci#endif 447bf215546Sopenharmony_ci sdi.ImmediateData = src.imm; 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci break; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM32: 452bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM64: 453bf215546Sopenharmony_ci#if GFX_VER >= 8 454bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) { 455bf215546Sopenharmony_ci cmm.DestinationMemoryAddress = dst.addr; 456bf215546Sopenharmony_ci cmm.SourceMemoryAddress = src.addr; 457bf215546Sopenharmony_ci } 458bf215546Sopenharmony_ci#elif GFX_VERx10 == 75 459bf215546Sopenharmony_ci { 460bf215546Sopenharmony_ci struct mi_value tmp = mi_new_gpr(b); 461bf215546Sopenharmony_ci _mi_copy_no_unref(b, tmp, src); 462bf215546Sopenharmony_ci _mi_copy_no_unref(b, dst, tmp); 463bf215546Sopenharmony_ci mi_value_unref(b, tmp); 464bf215546Sopenharmony_ci } 465bf215546Sopenharmony_ci#else 466bf215546Sopenharmony_ci unreachable("Cannot do mem <-> mem copy on IVB and earlier"); 467bf215546Sopenharmony_ci#endif 468bf215546Sopenharmony_ci break; 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG32: 471bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG64: 472bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 473bf215546Sopenharmony_ci struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 474bf215546Sopenharmony_ci srm.RegisterAddress = reg.num; 475bf215546Sopenharmony_ci#if GFX_VER >= 11 476bf215546Sopenharmony_ci srm.AddCSMMIOStartOffset = reg.cs; 477bf215546Sopenharmony_ci#endif 478bf215546Sopenharmony_ci srm.MemoryAddress = dst.addr; 479bf215546Sopenharmony_ci } 480bf215546Sopenharmony_ci break; 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci default: 483bf215546Sopenharmony_ci unreachable("Invalid mi_value type"); 484bf215546Sopenharmony_ci } 485bf215546Sopenharmony_ci break; 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG32: 488bf215546Sopenharmony_ci switch (src.type) { 489bf215546Sopenharmony_ci case MI_VALUE_TYPE_IMM: 490bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) { 491bf215546Sopenharmony_ci struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 492bf215546Sopenharmony_ci lri.RegisterOffset = reg.num; 493bf215546Sopenharmony_ci#if GFX_VER >= 11 494bf215546Sopenharmony_ci lri.AddCSMMIOStartOffset = reg.cs; 495bf215546Sopenharmony_ci#endif 496bf215546Sopenharmony_ci lri.DataDWord = src.imm; 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci break; 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM32: 501bf215546Sopenharmony_ci case MI_VALUE_TYPE_MEM64: 502bf215546Sopenharmony_ci#if GFX_VER >= 7 503bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) { 504bf215546Sopenharmony_ci struct mi_reg_num reg = mi_adjust_reg_num(dst.reg); 505bf215546Sopenharmony_ci lrm.RegisterAddress = reg.num; 506bf215546Sopenharmony_ci#if GFX_VER >= 11 507bf215546Sopenharmony_ci lrm.AddCSMMIOStartOffset = reg.cs; 508bf215546Sopenharmony_ci#endif 509bf215546Sopenharmony_ci lrm.MemoryAddress = src.addr; 510bf215546Sopenharmony_ci } 511bf215546Sopenharmony_ci#else 512bf215546Sopenharmony_ci unreachable("Cannot load do mem -> reg copy on SNB and earlier"); 513bf215546Sopenharmony_ci#endif 514bf215546Sopenharmony_ci break; 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG32: 517bf215546Sopenharmony_ci case MI_VALUE_TYPE_REG64: 518bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 519bf215546Sopenharmony_ci if (src.reg != dst.reg) { 520bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) { 521bf215546Sopenharmony_ci struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 522bf215546Sopenharmony_ci lrr.SourceRegisterAddress = reg.num; 523bf215546Sopenharmony_ci#if GFX_VER >= 11 524bf215546Sopenharmony_ci lrr.AddCSMMIOStartOffsetSource = reg.cs; 525bf215546Sopenharmony_ci#endif 526bf215546Sopenharmony_ci reg = mi_adjust_reg_num(dst.reg); 527bf215546Sopenharmony_ci lrr.DestinationRegisterAddress = reg.num; 528bf215546Sopenharmony_ci#if GFX_VER >= 11 529bf215546Sopenharmony_ci lrr.AddCSMMIOStartOffsetDestination = reg.cs; 530bf215546Sopenharmony_ci#endif 531bf215546Sopenharmony_ci } 532bf215546Sopenharmony_ci } 533bf215546Sopenharmony_ci#else 534bf215546Sopenharmony_ci unreachable("Cannot do reg <-> reg copy on IVB and earlier"); 535bf215546Sopenharmony_ci#endif 536bf215546Sopenharmony_ci break; 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci default: 539bf215546Sopenharmony_ci unreachable("Invalid mi_value type"); 540bf215546Sopenharmony_ci } 541bf215546Sopenharmony_ci break; 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci default: 544bf215546Sopenharmony_ci unreachable("Invalid mi_value type"); 545bf215546Sopenharmony_ci } 546bf215546Sopenharmony_ci} 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 549bf215546Sopenharmony_cistatic inline struct mi_value 550bf215546Sopenharmony_cimi_resolve_invert(struct mi_builder *b, struct mi_value src); 551bf215546Sopenharmony_ci#endif 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci/** Store the value in src to the value represented by dst 554bf215546Sopenharmony_ci * 555bf215546Sopenharmony_ci * If the bit size of src and dst mismatch, this function does an unsigned 556bf215546Sopenharmony_ci * integer cast. If src has more bits than dst, it takes the bottom bits. If 557bf215546Sopenharmony_ci * src has fewer bits then dst, it fills the top bits with zeros. 558bf215546Sopenharmony_ci * 559bf215546Sopenharmony_ci * This function consumes one reference for each of src and dst. 560bf215546Sopenharmony_ci */ 561bf215546Sopenharmony_cistatic inline void 562bf215546Sopenharmony_cimi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src) 563bf215546Sopenharmony_ci{ 564bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 565bf215546Sopenharmony_ci src = mi_resolve_invert(b, src); 566bf215546Sopenharmony_ci#endif 567bf215546Sopenharmony_ci _mi_copy_no_unref(b, dst, src); 568bf215546Sopenharmony_ci mi_value_unref(b, src); 569bf215546Sopenharmony_ci mi_value_unref(b, dst); 570bf215546Sopenharmony_ci} 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_cistatic inline void 573bf215546Sopenharmony_cimi_memset(struct mi_builder *b, __gen_address_type dst, 574bf215546Sopenharmony_ci uint32_t value, uint32_t size) 575bf215546Sopenharmony_ci{ 576bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 577bf215546Sopenharmony_ci assert(b->num_math_dwords == 0); 578bf215546Sopenharmony_ci#endif 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci /* This memset operates in units of dwords. */ 581bf215546Sopenharmony_ci assert(size % 4 == 0); 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci for (uint32_t i = 0; i < size; i += 4) { 584bf215546Sopenharmony_ci mi_store(b, mi_mem32(__gen_address_offset(dst, i)), 585bf215546Sopenharmony_ci mi_imm(value)); 586bf215546Sopenharmony_ci } 587bf215546Sopenharmony_ci} 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */ 590bf215546Sopenharmony_cistatic inline void 591bf215546Sopenharmony_cimi_memcpy(struct mi_builder *b, __gen_address_type dst, 592bf215546Sopenharmony_ci __gen_address_type src, uint32_t size) 593bf215546Sopenharmony_ci{ 594bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 595bf215546Sopenharmony_ci assert(b->num_math_dwords == 0); 596bf215546Sopenharmony_ci#endif 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci /* This memcpy operates in units of dwords. */ 599bf215546Sopenharmony_ci assert(size % 4 == 0); 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci for (uint32_t i = 0; i < size; i += 4) { 602bf215546Sopenharmony_ci struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i)); 603bf215546Sopenharmony_ci struct mi_value src_val = mi_mem32(__gen_address_offset(src, i)); 604bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 605bf215546Sopenharmony_ci mi_store(b, dst_val, src_val); 606bf215546Sopenharmony_ci#else 607bf215546Sopenharmony_ci /* IVB does not have a general purpose register for command streamer 608bf215546Sopenharmony_ci * commands. Therefore, we use an alternate temporary register. 609bf215546Sopenharmony_ci */ 610bf215546Sopenharmony_ci struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */ 611bf215546Sopenharmony_ci mi_store(b, tmp_reg, src_val); 612bf215546Sopenharmony_ci mi_store(b, dst_val, tmp_reg); 613bf215546Sopenharmony_ci#endif 614bf215546Sopenharmony_ci } 615bf215546Sopenharmony_ci} 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci/* 618bf215546Sopenharmony_ci * MI_MATH Section. Only available on Haswell+ 619bf215546Sopenharmony_ci */ 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci#if GFX_VERx10 >= 75 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci/** 624bf215546Sopenharmony_ci * Perform a predicated store (assuming the condition is already loaded 625bf215546Sopenharmony_ci * in the MI_PREDICATE_RESULT register) of the value in src to the memory 626bf215546Sopenharmony_ci * location specified by dst. Non-memory destinations are not supported. 627bf215546Sopenharmony_ci * 628bf215546Sopenharmony_ci * This function consumes one reference for each of src and dst. 629bf215546Sopenharmony_ci */ 630bf215546Sopenharmony_cistatic inline void 631bf215546Sopenharmony_cimi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src) 632bf215546Sopenharmony_ci{ 633bf215546Sopenharmony_ci assert(!dst.invert && !src.invert); 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci mi_builder_flush_math(b); 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci /* We can only predicate MI_STORE_REGISTER_MEM, so restrict the 638bf215546Sopenharmony_ci * destination to be memory, and resolve the source to a temporary 639bf215546Sopenharmony_ci * register if it isn't in one already. 640bf215546Sopenharmony_ci */ 641bf215546Sopenharmony_ci assert(dst.type == MI_VALUE_TYPE_MEM64 || 642bf215546Sopenharmony_ci dst.type == MI_VALUE_TYPE_MEM32); 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci if (src.type != MI_VALUE_TYPE_REG32 && 645bf215546Sopenharmony_ci src.type != MI_VALUE_TYPE_REG64) { 646bf215546Sopenharmony_ci struct mi_value tmp = mi_new_gpr(b); 647bf215546Sopenharmony_ci _mi_copy_no_unref(b, tmp, src); 648bf215546Sopenharmony_ci src = tmp; 649bf215546Sopenharmony_ci } 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ci if (dst.type == MI_VALUE_TYPE_MEM64) { 652bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 653bf215546Sopenharmony_ci struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 654bf215546Sopenharmony_ci srm.RegisterAddress = reg.num; 655bf215546Sopenharmony_ci#if GFX_VER >= 11 656bf215546Sopenharmony_ci srm.AddCSMMIOStartOffset = reg.cs; 657bf215546Sopenharmony_ci#endif 658bf215546Sopenharmony_ci srm.MemoryAddress = dst.addr; 659bf215546Sopenharmony_ci srm.PredicateEnable = true; 660bf215546Sopenharmony_ci } 661bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 662bf215546Sopenharmony_ci struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4); 663bf215546Sopenharmony_ci srm.RegisterAddress = reg.num; 664bf215546Sopenharmony_ci#if GFX_VER >= 11 665bf215546Sopenharmony_ci srm.AddCSMMIOStartOffset = reg.cs; 666bf215546Sopenharmony_ci#endif 667bf215546Sopenharmony_ci srm.MemoryAddress = __gen_address_offset(dst.addr, 4); 668bf215546Sopenharmony_ci srm.PredicateEnable = true; 669bf215546Sopenharmony_ci } 670bf215546Sopenharmony_ci } else { 671bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 672bf215546Sopenharmony_ci struct mi_reg_num reg = mi_adjust_reg_num(src.reg); 673bf215546Sopenharmony_ci srm.RegisterAddress = reg.num; 674bf215546Sopenharmony_ci#if GFX_VER >= 11 675bf215546Sopenharmony_ci srm.AddCSMMIOStartOffset = reg.cs; 676bf215546Sopenharmony_ci#endif 677bf215546Sopenharmony_ci srm.MemoryAddress = dst.addr; 678bf215546Sopenharmony_ci srm.PredicateEnable = true; 679bf215546Sopenharmony_ci } 680bf215546Sopenharmony_ci } 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci mi_value_unref(b, src); 683bf215546Sopenharmony_ci mi_value_unref(b, dst); 684bf215546Sopenharmony_ci} 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_cistatic inline void 687bf215546Sopenharmony_ci_mi_builder_push_math(struct mi_builder *b, 688bf215546Sopenharmony_ci const uint32_t *dwords, 689bf215546Sopenharmony_ci unsigned num_dwords) 690bf215546Sopenharmony_ci{ 691bf215546Sopenharmony_ci assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS); 692bf215546Sopenharmony_ci if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS) 693bf215546Sopenharmony_ci mi_builder_flush_math(b); 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_ci memcpy(&b->math_dwords[b->num_math_dwords], 696bf215546Sopenharmony_ci dwords, num_dwords * sizeof(*dwords)); 697bf215546Sopenharmony_ci b->num_math_dwords += num_dwords; 698bf215546Sopenharmony_ci} 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_cistatic inline uint32_t 701bf215546Sopenharmony_ci_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2) 702bf215546Sopenharmony_ci{ 703bf215546Sopenharmony_ci struct GENX(MI_MATH_ALU_INSTRUCTION) instr = { 704bf215546Sopenharmony_ci .Operand2 = operand2, 705bf215546Sopenharmony_ci .Operand1 = operand1, 706bf215546Sopenharmony_ci .ALUOpcode = opcode, 707bf215546Sopenharmony_ci }; 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci uint32_t dw; 710bf215546Sopenharmony_ci GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr); 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_ci return dw; 713bf215546Sopenharmony_ci} 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_cistatic inline struct mi_value 716bf215546Sopenharmony_cimi_value_to_gpr(struct mi_builder *b, struct mi_value val) 717bf215546Sopenharmony_ci{ 718bf215546Sopenharmony_ci if (mi_value_is_gpr(val)) 719bf215546Sopenharmony_ci return val; 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci /* Save off the invert flag because it makes copy() grumpy */ 722bf215546Sopenharmony_ci bool invert = val.invert; 723bf215546Sopenharmony_ci val.invert = false; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci struct mi_value tmp = mi_new_gpr(b); 726bf215546Sopenharmony_ci _mi_copy_no_unref(b, tmp, val); 727bf215546Sopenharmony_ci tmp.invert = invert; 728bf215546Sopenharmony_ci 729bf215546Sopenharmony_ci return tmp; 730bf215546Sopenharmony_ci} 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_cistatic inline uint64_t 733bf215546Sopenharmony_cimi_value_to_u64(struct mi_value val) 734bf215546Sopenharmony_ci{ 735bf215546Sopenharmony_ci assert(val.type == MI_VALUE_TYPE_IMM); 736bf215546Sopenharmony_ci return val.invert ? ~val.imm : val.imm; 737bf215546Sopenharmony_ci} 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_cistatic inline uint32_t 740bf215546Sopenharmony_ci_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val) 741bf215546Sopenharmony_ci{ 742bf215546Sopenharmony_ci if (val->type == MI_VALUE_TYPE_IMM && 743bf215546Sopenharmony_ci (val->imm == 0 || val->imm == UINT64_MAX)) { 744bf215546Sopenharmony_ci uint64_t imm = val->invert ? ~val->imm : val->imm; 745bf215546Sopenharmony_ci return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0); 746bf215546Sopenharmony_ci } else { 747bf215546Sopenharmony_ci *val = mi_value_to_gpr(b, *val); 748bf215546Sopenharmony_ci return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD, 749bf215546Sopenharmony_ci src, _mi_value_as_gpr(*val)); 750bf215546Sopenharmony_ci } 751bf215546Sopenharmony_ci} 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_cistatic inline struct mi_value 754bf215546Sopenharmony_cimi_math_binop(struct mi_builder *b, uint32_t opcode, 755bf215546Sopenharmony_ci struct mi_value src0, struct mi_value src1, 756bf215546Sopenharmony_ci uint32_t store_op, uint32_t store_src) 757bf215546Sopenharmony_ci{ 758bf215546Sopenharmony_ci struct mi_value dst = mi_new_gpr(b); 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci uint32_t dw[4]; 761bf215546Sopenharmony_ci dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0); 762bf215546Sopenharmony_ci dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1); 763bf215546Sopenharmony_ci dw[2] = _mi_pack_alu(opcode, 0, 0); 764bf215546Sopenharmony_ci dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src); 765bf215546Sopenharmony_ci _mi_builder_push_math(b, dw, 4); 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci mi_value_unref(b, src0); 768bf215546Sopenharmony_ci mi_value_unref(b, src1); 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci return dst; 771bf215546Sopenharmony_ci} 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_cistatic inline struct mi_value 774bf215546Sopenharmony_cimi_inot(struct mi_builder *b, struct mi_value val) 775bf215546Sopenharmony_ci{ 776bf215546Sopenharmony_ci if (val.type == MI_VALUE_TYPE_IMM) 777bf215546Sopenharmony_ci return mi_imm(~mi_value_to_u64(val)); 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci val.invert = !val.invert; 780bf215546Sopenharmony_ci return val; 781bf215546Sopenharmony_ci} 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_cistatic inline struct mi_value 784bf215546Sopenharmony_cimi_resolve_invert(struct mi_builder *b, struct mi_value src) 785bf215546Sopenharmony_ci{ 786bf215546Sopenharmony_ci if (!src.invert) 787bf215546Sopenharmony_ci return src; 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci assert(src.type != MI_VALUE_TYPE_IMM); 790bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 791bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ACCU); 792bf215546Sopenharmony_ci} 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_cistatic inline struct mi_value 795bf215546Sopenharmony_cimi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 796bf215546Sopenharmony_ci{ 797bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 798bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1)); 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_ADD, src0, src1, 801bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ACCU); 802bf215546Sopenharmony_ci} 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_cistatic inline struct mi_value 805bf215546Sopenharmony_cimi_iadd_imm(struct mi_builder *b, 806bf215546Sopenharmony_ci struct mi_value src, uint64_t N) 807bf215546Sopenharmony_ci{ 808bf215546Sopenharmony_ci if (N == 0) 809bf215546Sopenharmony_ci return src; 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci return mi_iadd(b, src, mi_imm(N)); 812bf215546Sopenharmony_ci} 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_cistatic inline struct mi_value 815bf215546Sopenharmony_cimi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 816bf215546Sopenharmony_ci{ 817bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 818bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1)); 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_SUB, src0, src1, 821bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ACCU); 822bf215546Sopenharmony_ci} 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_cistatic inline struct mi_value 825bf215546Sopenharmony_cimi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 826bf215546Sopenharmony_ci{ 827bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 828bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0); 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci /* Compute "equal" by subtracting and storing the zero bit */ 831bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_SUB, src0, src1, 832bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ZF); 833bf215546Sopenharmony_ci} 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_cistatic inline struct mi_value 836bf215546Sopenharmony_cimi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 837bf215546Sopenharmony_ci{ 838bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 839bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0); 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_ci /* Compute "not equal" by subtracting and storing the inverse zero bit */ 842bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_SUB, src0, src1, 843bf215546Sopenharmony_ci MI_ALU_STOREINV, MI_ALU_ZF); 844bf215546Sopenharmony_ci} 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_cistatic inline struct mi_value 847bf215546Sopenharmony_cimi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 848bf215546Sopenharmony_ci{ 849bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 850bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0); 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci /* Compute "less than" by subtracting and storing the carry bit */ 853bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_SUB, src0, src1, 854bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_CF); 855bf215546Sopenharmony_ci} 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_cistatic inline struct mi_value 858bf215546Sopenharmony_cimi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 859bf215546Sopenharmony_ci{ 860bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 861bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0); 862bf215546Sopenharmony_ci 863bf215546Sopenharmony_ci /* Compute "less than" by subtracting and storing the carry bit */ 864bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_SUB, src0, src1, 865bf215546Sopenharmony_ci MI_ALU_STOREINV, MI_ALU_CF); 866bf215546Sopenharmony_ci} 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_cistatic inline struct mi_value 869bf215546Sopenharmony_cimi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 870bf215546Sopenharmony_ci{ 871bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 872bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1)); 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_AND, src0, src1, 875bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ACCU); 876bf215546Sopenharmony_ci} 877bf215546Sopenharmony_ci 878bf215546Sopenharmony_cistatic inline struct mi_value 879bf215546Sopenharmony_cimi_nz(struct mi_builder *b, struct mi_value src) 880bf215546Sopenharmony_ci{ 881bf215546Sopenharmony_ci if (src.type == MI_VALUE_TYPE_IMM) 882bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0); 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 885bf215546Sopenharmony_ci MI_ALU_STOREINV, MI_ALU_ZF); 886bf215546Sopenharmony_ci} 887bf215546Sopenharmony_ci 888bf215546Sopenharmony_cistatic inline struct mi_value 889bf215546Sopenharmony_cimi_z(struct mi_builder *b, struct mi_value src) 890bf215546Sopenharmony_ci{ 891bf215546Sopenharmony_ci if (src.type == MI_VALUE_TYPE_IMM) 892bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0); 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0), 895bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ZF); 896bf215546Sopenharmony_ci} 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_cistatic inline struct mi_value 899bf215546Sopenharmony_cimi_ior(struct mi_builder *b, 900bf215546Sopenharmony_ci struct mi_value src0, struct mi_value src1) 901bf215546Sopenharmony_ci{ 902bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 903bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1)); 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_OR, src0, src1, 906bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ACCU); 907bf215546Sopenharmony_ci} 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_ci#if GFX_VERx10 >= 125 910bf215546Sopenharmony_cistatic inline struct mi_value 911bf215546Sopenharmony_cimi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 912bf215546Sopenharmony_ci{ 913bf215546Sopenharmony_ci if (src1.type == MI_VALUE_TYPE_IMM) { 914bf215546Sopenharmony_ci assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 915bf215546Sopenharmony_ci assert(mi_value_to_u64(src1) <= 32); 916bf215546Sopenharmony_ci } 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 919bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1)); 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_SHL, src0, src1, 922bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ACCU); 923bf215546Sopenharmony_ci} 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_cistatic inline struct mi_value 926bf215546Sopenharmony_cimi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 927bf215546Sopenharmony_ci{ 928bf215546Sopenharmony_ci if (src1.type == MI_VALUE_TYPE_IMM) { 929bf215546Sopenharmony_ci assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 930bf215546Sopenharmony_ci assert(mi_value_to_u64(src1) <= 32); 931bf215546Sopenharmony_ci } 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 934bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1)); 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_SHR, src0, src1, 937bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ACCU); 938bf215546Sopenharmony_ci} 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_cistatic inline struct mi_value 941bf215546Sopenharmony_cimi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 942bf215546Sopenharmony_ci{ 943bf215546Sopenharmony_ci if (shift == 0) 944bf215546Sopenharmony_ci return src; 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci if (shift >= 64) 947bf215546Sopenharmony_ci return mi_imm(0); 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci if (src.type == MI_VALUE_TYPE_IMM) 950bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src) >> shift); 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci struct mi_value res = mi_value_to_gpr(b, src); 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_ci /* Annoyingly, we only have power-of-two shifts */ 955bf215546Sopenharmony_ci while (shift) { 956bf215546Sopenharmony_ci int bit = u_bit_scan(&shift); 957bf215546Sopenharmony_ci assert(bit <= 5); 958bf215546Sopenharmony_ci res = mi_ushr(b, res, mi_imm(1 << bit)); 959bf215546Sopenharmony_ci } 960bf215546Sopenharmony_ci 961bf215546Sopenharmony_ci return res; 962bf215546Sopenharmony_ci} 963bf215546Sopenharmony_ci 964bf215546Sopenharmony_cistatic inline struct mi_value 965bf215546Sopenharmony_cimi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1) 966bf215546Sopenharmony_ci{ 967bf215546Sopenharmony_ci if (src1.type == MI_VALUE_TYPE_IMM) { 968bf215546Sopenharmony_ci assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1))); 969bf215546Sopenharmony_ci assert(mi_value_to_u64(src1) <= 32); 970bf215546Sopenharmony_ci } 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM) 973bf215546Sopenharmony_ci return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1)); 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci return mi_math_binop(b, MI_ALU_SAR, src0, src1, 976bf215546Sopenharmony_ci MI_ALU_STORE, MI_ALU_ACCU); 977bf215546Sopenharmony_ci} 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_cistatic inline struct mi_value 980bf215546Sopenharmony_cimi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 981bf215546Sopenharmony_ci{ 982bf215546Sopenharmony_ci if (shift == 0) 983bf215546Sopenharmony_ci return src; 984bf215546Sopenharmony_ci 985bf215546Sopenharmony_ci if (shift >= 64) 986bf215546Sopenharmony_ci return mi_imm(0); 987bf215546Sopenharmony_ci 988bf215546Sopenharmony_ci if (src.type == MI_VALUE_TYPE_IMM) 989bf215546Sopenharmony_ci return mi_imm((int64_t)mi_value_to_u64(src) >> shift); 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci struct mi_value res = mi_value_to_gpr(b, src); 992bf215546Sopenharmony_ci 993bf215546Sopenharmony_ci /* Annoyingly, we only have power-of-two shifts */ 994bf215546Sopenharmony_ci while (shift) { 995bf215546Sopenharmony_ci int bit = u_bit_scan(&shift); 996bf215546Sopenharmony_ci assert(bit <= 5); 997bf215546Sopenharmony_ci res = mi_ishr(b, res, mi_imm(1 << bit)); 998bf215546Sopenharmony_ci } 999bf215546Sopenharmony_ci 1000bf215546Sopenharmony_ci return res; 1001bf215546Sopenharmony_ci} 1002bf215546Sopenharmony_ci#endif /* if GFX_VERx10 >= 125 */ 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_cistatic inline struct mi_value 1005bf215546Sopenharmony_cimi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N) 1006bf215546Sopenharmony_ci{ 1007bf215546Sopenharmony_ci if (src.type == MI_VALUE_TYPE_IMM) 1008bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src) * N); 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_ci if (N == 0) { 1011bf215546Sopenharmony_ci mi_value_unref(b, src); 1012bf215546Sopenharmony_ci return mi_imm(0); 1013bf215546Sopenharmony_ci } 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci if (N == 1) 1016bf215546Sopenharmony_ci return src; 1017bf215546Sopenharmony_ci 1018bf215546Sopenharmony_ci src = mi_value_to_gpr(b, src); 1019bf215546Sopenharmony_ci 1020bf215546Sopenharmony_ci struct mi_value res = mi_value_ref(b, src); 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci unsigned top_bit = 31 - __builtin_clz(N); 1023bf215546Sopenharmony_ci for (int i = top_bit - 1; i >= 0; i--) { 1024bf215546Sopenharmony_ci res = mi_iadd(b, res, mi_value_ref(b, res)); 1025bf215546Sopenharmony_ci if (N & (1 << i)) 1026bf215546Sopenharmony_ci res = mi_iadd(b, res, mi_value_ref(b, src)); 1027bf215546Sopenharmony_ci } 1028bf215546Sopenharmony_ci 1029bf215546Sopenharmony_ci mi_value_unref(b, src); 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_ci return res; 1032bf215546Sopenharmony_ci} 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_cistatic inline struct mi_value 1035bf215546Sopenharmony_cimi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 1036bf215546Sopenharmony_ci{ 1037bf215546Sopenharmony_ci if (shift == 0) 1038bf215546Sopenharmony_ci return src; 1039bf215546Sopenharmony_ci 1040bf215546Sopenharmony_ci if (shift >= 64) 1041bf215546Sopenharmony_ci return mi_imm(0); 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci if (src.type == MI_VALUE_TYPE_IMM) 1044bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(src) << shift); 1045bf215546Sopenharmony_ci 1046bf215546Sopenharmony_ci struct mi_value res = mi_value_to_gpr(b, src); 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci#if GFX_VERx10 >= 125 1049bf215546Sopenharmony_ci /* Annoyingly, we only have power-of-two shifts */ 1050bf215546Sopenharmony_ci while (shift) { 1051bf215546Sopenharmony_ci int bit = u_bit_scan(&shift); 1052bf215546Sopenharmony_ci assert(bit <= 5); 1053bf215546Sopenharmony_ci res = mi_ishl(b, res, mi_imm(1 << bit)); 1054bf215546Sopenharmony_ci } 1055bf215546Sopenharmony_ci#else 1056bf215546Sopenharmony_ci for (unsigned i = 0; i < shift; i++) 1057bf215546Sopenharmony_ci res = mi_iadd(b, res, mi_value_ref(b, res)); 1058bf215546Sopenharmony_ci#endif 1059bf215546Sopenharmony_ci 1060bf215546Sopenharmony_ci return res; 1061bf215546Sopenharmony_ci} 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_cistatic inline struct mi_value 1064bf215546Sopenharmony_cimi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift) 1065bf215546Sopenharmony_ci{ 1066bf215546Sopenharmony_ci if (shift == 0) 1067bf215546Sopenharmony_ci return src; 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci if (shift >= 64) 1070bf215546Sopenharmony_ci return mi_imm(0); 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_ci /* We right-shift by left-shifting by 32 - shift and taking the top 32 bits 1073bf215546Sopenharmony_ci * of the result. 1074bf215546Sopenharmony_ci */ 1075bf215546Sopenharmony_ci if (src.type == MI_VALUE_TYPE_IMM) 1076bf215546Sopenharmony_ci return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX); 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci if (shift > 32) { 1079bf215546Sopenharmony_ci struct mi_value tmp = mi_new_gpr(b); 1080bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(tmp, false), 1081bf215546Sopenharmony_ci mi_value_half(src, true)); 1082bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0)); 1083bf215546Sopenharmony_ci mi_value_unref(b, src); 1084bf215546Sopenharmony_ci src = tmp; 1085bf215546Sopenharmony_ci shift -= 32; 1086bf215546Sopenharmony_ci } 1087bf215546Sopenharmony_ci assert(shift <= 32); 1088bf215546Sopenharmony_ci struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift); 1089bf215546Sopenharmony_ci struct mi_value dst = mi_new_gpr(b); 1090bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(dst, false), 1091bf215546Sopenharmony_ci mi_value_half(tmp, true)); 1092bf215546Sopenharmony_ci _mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0)); 1093bf215546Sopenharmony_ci mi_value_unref(b, tmp); 1094bf215546Sopenharmony_ci return dst; 1095bf215546Sopenharmony_ci} 1096bf215546Sopenharmony_ci 1097bf215546Sopenharmony_cistatic inline struct mi_value 1098bf215546Sopenharmony_cimi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D) 1099bf215546Sopenharmony_ci{ 1100bf215546Sopenharmony_ci if (N.type == MI_VALUE_TYPE_IMM) { 1101bf215546Sopenharmony_ci assert(mi_value_to_u64(N) <= UINT32_MAX); 1102bf215546Sopenharmony_ci return mi_imm(mi_value_to_u64(N) / D); 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci /* We implicitly assume that N is only a 32-bit value */ 1106bf215546Sopenharmony_ci if (D == 0) { 1107bf215546Sopenharmony_ci /* This is invalid but we should do something */ 1108bf215546Sopenharmony_ci return mi_imm(0); 1109bf215546Sopenharmony_ci } else if (util_is_power_of_two_or_zero(D)) { 1110bf215546Sopenharmony_ci return mi_ushr32_imm(b, N, util_logbase2(D)); 1111bf215546Sopenharmony_ci } else { 1112bf215546Sopenharmony_ci struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32); 1113bf215546Sopenharmony_ci assert(m.multiplier <= UINT32_MAX); 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_ci if (m.pre_shift) 1116bf215546Sopenharmony_ci N = mi_ushr32_imm(b, N, m.pre_shift); 1117bf215546Sopenharmony_ci 1118bf215546Sopenharmony_ci /* Do the 32x32 multiply into gpr0 */ 1119bf215546Sopenharmony_ci N = mi_imul_imm(b, N, m.multiplier); 1120bf215546Sopenharmony_ci 1121bf215546Sopenharmony_ci if (m.increment) 1122bf215546Sopenharmony_ci N = mi_iadd(b, N, mi_imm(m.multiplier)); 1123bf215546Sopenharmony_ci 1124bf215546Sopenharmony_ci N = mi_ushr32_imm(b, N, 32); 1125bf215546Sopenharmony_ci 1126bf215546Sopenharmony_ci if (m.post_shift) 1127bf215546Sopenharmony_ci N = mi_ushr32_imm(b, N, m.post_shift); 1128bf215546Sopenharmony_ci 1129bf215546Sopenharmony_ci return N; 1130bf215546Sopenharmony_ci } 1131bf215546Sopenharmony_ci} 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci#endif /* MI_MATH section */ 1134bf215546Sopenharmony_ci 1135bf215546Sopenharmony_ci/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */ 1136bf215546Sopenharmony_ci#if MI_BUILDER_CAN_WRITE_BATCH 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_cistruct mi_address_token { 1139bf215546Sopenharmony_ci /* Pointers to address memory fields in the batch. */ 1140bf215546Sopenharmony_ci uint64_t *ptrs[2]; 1141bf215546Sopenharmony_ci}; 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_cistatic inline struct mi_address_token 1144bf215546Sopenharmony_cimi_store_address(struct mi_builder *b, struct mi_value addr_reg) 1145bf215546Sopenharmony_ci{ 1146bf215546Sopenharmony_ci mi_builder_flush_math(b); 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci assert(addr_reg.type == MI_VALUE_TYPE_REG64); 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_ci struct mi_address_token token = {}; 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 1153bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) { 1154bf215546Sopenharmony_ci srm.RegisterAddress = addr_reg.reg + (i * 4); 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_ci const unsigned addr_dw = 1157bf215546Sopenharmony_ci GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8; 1158bf215546Sopenharmony_ci token.ptrs[i] = (void *)_dst + addr_dw; 1159bf215546Sopenharmony_ci } 1160bf215546Sopenharmony_ci } 1161bf215546Sopenharmony_ci 1162bf215546Sopenharmony_ci mi_value_unref(b, addr_reg); 1163bf215546Sopenharmony_ci return token; 1164bf215546Sopenharmony_ci} 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_cistatic inline void 1167bf215546Sopenharmony_cimi_self_mod_barrier(struct mi_builder *b) 1168bf215546Sopenharmony_ci{ 1169bf215546Sopenharmony_ci /* First make sure all the memory writes from previous modifying commands 1170bf215546Sopenharmony_ci * have landed. We want to do this before going through the CS cache, 1171bf215546Sopenharmony_ci * otherwise we could be fetching memory that hasn't been written to yet. 1172bf215546Sopenharmony_ci */ 1173bf215546Sopenharmony_ci mi_builder_emit(b, GENX(PIPE_CONTROL), pc) { 1174bf215546Sopenharmony_ci pc.CommandStreamerStallEnable = true; 1175bf215546Sopenharmony_ci } 1176bf215546Sopenharmony_ci /* Documentation says Gfx11+ should be able to invalidate the command cache 1177bf215546Sopenharmony_ci * but experiment show it doesn't work properly, so for now just get over 1178bf215546Sopenharmony_ci * the CS prefetch. 1179bf215546Sopenharmony_ci */ 1180bf215546Sopenharmony_ci for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++) 1181bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_NOOP), noop); 1182bf215546Sopenharmony_ci} 1183bf215546Sopenharmony_ci 1184bf215546Sopenharmony_cistatic inline void 1185bf215546Sopenharmony_ci_mi_resolve_address_token(struct mi_builder *b, 1186bf215546Sopenharmony_ci struct mi_address_token token, 1187bf215546Sopenharmony_ci void *batch_location) 1188bf215546Sopenharmony_ci{ 1189bf215546Sopenharmony_ci __gen_address_type addr = __gen_get_batch_address(b->user_data, 1190bf215546Sopenharmony_ci batch_location); 1191bf215546Sopenharmony_ci uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location, 1192bf215546Sopenharmony_ci addr, 0); 1193bf215546Sopenharmony_ci *(token.ptrs[0]) = addr_addr_u64; 1194bf215546Sopenharmony_ci *(token.ptrs[1]) = addr_addr_u64 + 4; 1195bf215546Sopenharmony_ci} 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_ci#endif /* MI_BUILDER_CAN_WRITE_BATCH */ 1198bf215546Sopenharmony_ci 1199bf215546Sopenharmony_ci#if GFX_VERx10 >= 125 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci/* 1202bf215546Sopenharmony_ci * Indirect load/store. Only available on XE_HP+ 1203bf215546Sopenharmony_ci */ 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ciMUST_CHECK static inline struct mi_value 1206bf215546Sopenharmony_cimi_load_mem64_offset(struct mi_builder *b, 1207bf215546Sopenharmony_ci __gen_address_type addr, struct mi_value offset) 1208bf215546Sopenharmony_ci{ 1209bf215546Sopenharmony_ci uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0); 1210bf215546Sopenharmony_ci struct mi_value addr_val = mi_imm(addr_u64); 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ci struct mi_value dst = mi_new_gpr(b); 1213bf215546Sopenharmony_ci 1214bf215546Sopenharmony_ci uint32_t dw[5]; 1215bf215546Sopenharmony_ci dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val); 1216bf215546Sopenharmony_ci dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset); 1217bf215546Sopenharmony_ci dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0); 1218bf215546Sopenharmony_ci dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU); 1219bf215546Sopenharmony_ci dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0); 1220bf215546Sopenharmony_ci _mi_builder_push_math(b, dw, 5); 1221bf215546Sopenharmony_ci 1222bf215546Sopenharmony_ci mi_value_unref(b, addr_val); 1223bf215546Sopenharmony_ci mi_value_unref(b, offset); 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci return dst; 1226bf215546Sopenharmony_ci} 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_cistatic inline void 1229bf215546Sopenharmony_cimi_store_mem64_offset(struct mi_builder *b, 1230bf215546Sopenharmony_ci __gen_address_type addr, struct mi_value offset, 1231bf215546Sopenharmony_ci struct mi_value data) 1232bf215546Sopenharmony_ci{ 1233bf215546Sopenharmony_ci uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0); 1234bf215546Sopenharmony_ci struct mi_value addr_val = mi_imm(addr_u64); 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_ci data = mi_value_to_gpr(b, mi_resolve_invert(b, data)); 1237bf215546Sopenharmony_ci 1238bf215546Sopenharmony_ci uint32_t dw[5]; 1239bf215546Sopenharmony_ci dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val); 1240bf215546Sopenharmony_ci dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset); 1241bf215546Sopenharmony_ci dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0); 1242bf215546Sopenharmony_ci dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data)); 1243bf215546Sopenharmony_ci dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0); 1244bf215546Sopenharmony_ci _mi_builder_push_math(b, dw, 5); 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_ci mi_value_unref(b, addr_val); 1247bf215546Sopenharmony_ci mi_value_unref(b, offset); 1248bf215546Sopenharmony_ci mi_value_unref(b, data); 1249bf215546Sopenharmony_ci 1250bf215546Sopenharmony_ci /* This is the only math case which has side-effects outside of regular 1251bf215546Sopenharmony_ci * registers to flush math afterwards so we don't confuse anyone. 1252bf215546Sopenharmony_ci */ 1253bf215546Sopenharmony_ci mi_builder_flush_math(b); 1254bf215546Sopenharmony_ci} 1255bf215546Sopenharmony_ci 1256bf215546Sopenharmony_ci/* 1257bf215546Sopenharmony_ci * Control-flow Section. Only available on XE_HP+ 1258bf215546Sopenharmony_ci */ 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_cistruct _mi_goto { 1261bf215546Sopenharmony_ci bool predicated; 1262bf215546Sopenharmony_ci void *mi_bbs; 1263bf215546Sopenharmony_ci}; 1264bf215546Sopenharmony_ci 1265bf215546Sopenharmony_cistruct mi_goto_target { 1266bf215546Sopenharmony_ci bool placed; 1267bf215546Sopenharmony_ci unsigned num_gotos; 1268bf215546Sopenharmony_ci struct _mi_goto gotos[8]; 1269bf215546Sopenharmony_ci __gen_address_type addr; 1270bf215546Sopenharmony_ci}; 1271bf215546Sopenharmony_ci 1272bf215546Sopenharmony_ci#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {}) 1273bf215546Sopenharmony_ci 1274bf215546Sopenharmony_ci#define MI_BUILDER_MI_PREDICATE_RESULT_num 0x2418 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_cistatic inline void 1277bf215546Sopenharmony_cimi_goto_if(struct mi_builder *b, struct mi_value cond, 1278bf215546Sopenharmony_ci struct mi_goto_target *t) 1279bf215546Sopenharmony_ci{ 1280bf215546Sopenharmony_ci /* First, set up the predicate, if any */ 1281bf215546Sopenharmony_ci bool predicated; 1282bf215546Sopenharmony_ci if (cond.type == MI_VALUE_TYPE_IMM) { 1283bf215546Sopenharmony_ci /* If it's an immediate, the goto either doesn't happen or happens 1284bf215546Sopenharmony_ci * unconditionally. 1285bf215546Sopenharmony_ci */ 1286bf215546Sopenharmony_ci if (mi_value_to_u64(cond) == 0) 1287bf215546Sopenharmony_ci return; 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci assert(mi_value_to_u64(cond) == ~0ull); 1290bf215546Sopenharmony_ci predicated = false; 1291bf215546Sopenharmony_ci } else if (mi_value_is_reg(cond) && 1292bf215546Sopenharmony_ci cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) { 1293bf215546Sopenharmony_ci /* If it's MI_PREDICATE_RESULT, we use whatever predicate the client 1294bf215546Sopenharmony_ci * provided us with 1295bf215546Sopenharmony_ci */ 1296bf215546Sopenharmony_ci assert(cond.type == MI_VALUE_TYPE_REG32); 1297bf215546Sopenharmony_ci predicated = true; 1298bf215546Sopenharmony_ci } else { 1299bf215546Sopenharmony_ci mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond); 1300bf215546Sopenharmony_ci predicated = true; 1301bf215546Sopenharmony_ci } 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_ci if (predicated) { 1304bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 1305bf215546Sopenharmony_ci sp.PredicateEnable = NOOPOnResultClear; 1306bf215546Sopenharmony_ci } 1307bf215546Sopenharmony_ci } 1308bf215546Sopenharmony_ci if (t->placed) { 1309bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) { 1310bf215546Sopenharmony_ci bbs.PredicationEnable = predicated; 1311bf215546Sopenharmony_ci bbs.AddressSpaceIndicator = ASI_PPGTT; 1312bf215546Sopenharmony_ci bbs.BatchBufferStartAddress = t->addr; 1313bf215546Sopenharmony_ci } 1314bf215546Sopenharmony_ci } else { 1315bf215546Sopenharmony_ci assert(t->num_gotos < ARRAY_SIZE(t->gotos)); 1316bf215546Sopenharmony_ci struct _mi_goto g = { 1317bf215546Sopenharmony_ci .predicated = predicated, 1318bf215546Sopenharmony_ci .mi_bbs = __gen_get_batch_dwords(b->user_data, 1319bf215546Sopenharmony_ci GENX(MI_BATCH_BUFFER_START_length)), 1320bf215546Sopenharmony_ci }; 1321bf215546Sopenharmony_ci memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length)); 1322bf215546Sopenharmony_ci t->gotos[t->num_gotos++] = g; 1323bf215546Sopenharmony_ci } 1324bf215546Sopenharmony_ci if (predicated) { 1325bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 1326bf215546Sopenharmony_ci sp.PredicateEnable = NOOPNever; 1327bf215546Sopenharmony_ci } 1328bf215546Sopenharmony_ci } 1329bf215546Sopenharmony_ci} 1330bf215546Sopenharmony_ci 1331bf215546Sopenharmony_cistatic inline void 1332bf215546Sopenharmony_cimi_goto(struct mi_builder *b, struct mi_goto_target *t) 1333bf215546Sopenharmony_ci{ 1334bf215546Sopenharmony_ci mi_goto_if(b, mi_imm(-1), t); 1335bf215546Sopenharmony_ci} 1336bf215546Sopenharmony_ci 1337bf215546Sopenharmony_cistatic inline void 1338bf215546Sopenharmony_cimi_goto_target(struct mi_builder *b, struct mi_goto_target *t) 1339bf215546Sopenharmony_ci{ 1340bf215546Sopenharmony_ci mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) { 1341bf215546Sopenharmony_ci sp.PredicateEnable = NOOPNever; 1342bf215546Sopenharmony_ci t->addr = __gen_get_batch_address(b->user_data, _dst); 1343bf215546Sopenharmony_ci } 1344bf215546Sopenharmony_ci t->placed = true; 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_ci struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) }; 1347bf215546Sopenharmony_ci bbs.AddressSpaceIndicator = ASI_PPGTT; 1348bf215546Sopenharmony_ci bbs.BatchBufferStartAddress = t->addr; 1349bf215546Sopenharmony_ci 1350bf215546Sopenharmony_ci for (unsigned i = 0; i < t->num_gotos; i++) { 1351bf215546Sopenharmony_ci bbs.PredicationEnable = t->gotos[i].predicated; 1352bf215546Sopenharmony_ci GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs); 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci} 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_cistatic inline struct mi_goto_target 1357bf215546Sopenharmony_cimi_goto_target_init_and_place(struct mi_builder *b) 1358bf215546Sopenharmony_ci{ 1359bf215546Sopenharmony_ci struct mi_goto_target t = MI_GOTO_TARGET_INIT; 1360bf215546Sopenharmony_ci mi_goto_target(b, &t); 1361bf215546Sopenharmony_ci return t; 1362bf215546Sopenharmony_ci} 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_ci#define mi_loop(b) \ 1365bf215546Sopenharmony_ci for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \ 1366bf215546Sopenharmony_ci __continue = mi_goto_target_init_and_place(b); !__break.placed; \ 1367bf215546Sopenharmony_ci mi_goto(b, &__continue), mi_goto_target(b, &__break)) 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci#define mi_break(b) mi_goto(b, &__break) 1370bf215546Sopenharmony_ci#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break) 1371bf215546Sopenharmony_ci#define mi_continue(b) mi_goto(b, &__continue) 1372bf215546Sopenharmony_ci#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue) 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci#endif /* GFX_VERx10 >= 125 */ 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci#endif /* MI_BUILDER_H */ 1377