1/************************************************************************** 2 * 3 * Copyright 2019 Red Hat. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **************************************************************************/ 25 26#include "lp_bld_nir.h" 27#include "lp_bld_arit.h" 28#include "lp_bld_bitarit.h" 29#include "lp_bld_const.h" 30#include "lp_bld_conv.h" 31#include "lp_bld_gather.h" 32#include "lp_bld_logic.h" 33#include "lp_bld_quad.h" 34#include "lp_bld_flow.h" 35#include "lp_bld_intr.h" 36#include "lp_bld_struct.h" 37#include "lp_bld_debug.h" 38#include "lp_bld_printf.h" 39#include "nir_deref.h" 40#include "nir_search_helpers.h" 41 42 43// Doing AOS (and linear) codegen? 44static bool 45is_aos(const struct lp_build_nir_context *bld_base) 46{ 47 // AOS is used for vectors of uint8[16] 48 return bld_base->base.type.length == 16 && bld_base->base.type.width == 8; 49} 50 51 52static void 53visit_cf_list(struct lp_build_nir_context *bld_base, 54 struct exec_list *list); 55 56 57static LLVMValueRef 58cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val, 59 nir_alu_type alu_type, unsigned bit_size) 60{ 61 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 62 switch (alu_type) { 63 case nir_type_float: 64 switch (bit_size) { 65 case 16: 66 return LLVMBuildBitCast(builder, val, bld_base->half_bld.vec_type, ""); 67 case 32: 68 return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, ""); 69 case 64: 70 return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, ""); 71 default: 72 assert(0); 73 break; 74 } 75 break; 76 case nir_type_int: 77 switch (bit_size) { 78 case 8: 79 return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, ""); 80 case 16: 81 return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, ""); 82 case 32: 83 return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, ""); 84 case 64: 85 return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, ""); 86 default: 87 assert(0); 88 break; 89 } 90 break; 91 case nir_type_uint: 92 switch (bit_size) { 93 case 8: 94 return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, ""); 95 case 16: 96 return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, ""); 97 case 1: 98 case 32: 99 return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, ""); 100 case 64: 101 return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, ""); 102 default: 103 assert(0); 104 break; 105 } 106 break; 107 case nir_type_uint32: 108 return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, ""); 109 default: 110 return val; 111 } 112 return NULL; 113} 114 115 116static unsigned 117glsl_sampler_to_pipe(int sampler_dim, bool is_array) 118{ 119 unsigned pipe_target = PIPE_BUFFER; 120 switch (sampler_dim) { 121 case GLSL_SAMPLER_DIM_1D: 122 pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D; 123 break; 124 case GLSL_SAMPLER_DIM_2D: 125 pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D; 126 break; 127 case GLSL_SAMPLER_DIM_SUBPASS: 128 case GLSL_SAMPLER_DIM_SUBPASS_MS: 129 pipe_target = PIPE_TEXTURE_2D_ARRAY; 130 break; 131 case GLSL_SAMPLER_DIM_3D: 132 pipe_target = PIPE_TEXTURE_3D; 133 break; 134 case GLSL_SAMPLER_DIM_MS: 135 pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D; 136 break; 137 case GLSL_SAMPLER_DIM_CUBE: 138 pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE; 139 break; 140 case GLSL_SAMPLER_DIM_RECT: 141 pipe_target = PIPE_TEXTURE_RECT; 142 break; 143 case GLSL_SAMPLER_DIM_BUF: 144 pipe_target = PIPE_BUFFER; 145 break; 146 default: 147 break; 148 } 149 return pipe_target; 150} 151 152 153static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa) 154{ 155 return bld_base->ssa_defs[ssa->index]; 156} 157 158 159static LLVMValueRef 160get_src(struct lp_build_nir_context *bld_base, nir_src src); 161 162 163static LLVMValueRef 164get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src) 165{ 166 struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg); 167 LLVMValueRef reg_storage = (LLVMValueRef)entry->data; 168 struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size); 169 LLVMValueRef indir_src = NULL; 170 if (src.indirect) 171 indir_src = get_src(bld_base, *src.indirect); 172 return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage); 173} 174 175 176static LLVMValueRef 177get_src(struct lp_build_nir_context *bld_base, nir_src src) 178{ 179 if (src.is_ssa) 180 return get_ssa_src(bld_base, src.ssa); 181 else 182 return get_reg_src(bld_base, src.reg); 183} 184 185 186static void 187assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr) 188{ 189 bld_base->ssa_defs[idx] = ptr; 190} 191 192 193static void 194assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa, 195 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS]) 196{ 197 if ((ssa->num_components == 1 || is_aos(bld_base))) { 198 assign_ssa(bld_base, ssa->index, vals[0]); 199 } else { 200 assign_ssa(bld_base, ssa->index, 201 lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, 202 vals, ssa->num_components)); 203 } 204} 205 206 207static void 208assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg, 209 unsigned write_mask, 210 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS]) 211{ 212 struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg); 213 LLVMValueRef reg_storage = (LLVMValueRef)entry->data; 214 struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size); 215 LLVMValueRef indir_src = NULL; 216 if (reg->indirect) 217 indir_src = get_src(bld_base, *reg->indirect); 218 bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals); 219} 220 221 222static void 223assign_dest(struct lp_build_nir_context *bld_base, 224 const nir_dest *dest, 225 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS]) 226{ 227 if (dest->is_ssa) 228 assign_ssa_dest(bld_base, &dest->ssa, vals); 229 else 230 assign_reg(bld_base, &dest->reg, 0, vals); 231} 232 233 234static void 235assign_alu_dest(struct lp_build_nir_context *bld_base, 236 const nir_alu_dest *dest, 237 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS]) 238{ 239 if (dest->dest.is_ssa) 240 assign_ssa_dest(bld_base, &dest->dest.ssa, vals); 241 else 242 assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals); 243} 244 245 246static LLVMValueRef 247int_to_bool32(struct lp_build_nir_context *bld_base, 248 uint32_t src_bit_size, 249 bool is_unsigned, 250 LLVMValueRef val) 251{ 252 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 253 struct lp_build_context *int_bld = 254 get_int_bld(bld_base, is_unsigned, src_bit_size); 255 LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, 256 int_bld->type, PIPE_FUNC_NOTEQUAL, 257 val, int_bld->zero); 258 if (src_bit_size == 16) 259 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, ""); 260 else if (src_bit_size == 64) 261 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); 262 return result; 263} 264 265 266static LLVMValueRef 267flt_to_bool32(struct lp_build_nir_context *bld_base, 268 uint32_t src_bit_size, 269 LLVMValueRef val) 270{ 271 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 272 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size); 273 LLVMValueRef result = 274 lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero); 275 if (src_bit_size == 64) 276 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); 277 if (src_bit_size == 16) 278 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, ""); 279 return result; 280} 281 282 283static LLVMValueRef 284fcmp32(struct lp_build_nir_context *bld_base, 285 enum pipe_compare_func compare, 286 uint32_t src_bit_size, 287 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]) 288{ 289 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 290 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size); 291 LLVMValueRef result; 292 293 if (compare != PIPE_FUNC_NOTEQUAL) 294 result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]); 295 else 296 result = lp_build_cmp(flt_bld, compare, src[0], src[1]); 297 if (src_bit_size == 64) 298 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); 299 else if (src_bit_size == 16) 300 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, ""); 301 return result; 302} 303 304 305static LLVMValueRef 306icmp32(struct lp_build_nir_context *bld_base, 307 enum pipe_compare_func compare, 308 bool is_unsigned, 309 uint32_t src_bit_size, 310 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]) 311{ 312 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 313 struct lp_build_context *i_bld = 314 get_int_bld(bld_base, is_unsigned, src_bit_size); 315 LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]); 316 if (src_bit_size < 32) 317 result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, ""); 318 else if (src_bit_size == 64) 319 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); 320 return result; 321} 322 323 324/** 325 * Get a source register value for an ALU instruction. 326 * This is where swizzled are handled. There should be no negation 327 * or absolute value modifiers. 328 * num_components indicates the number of components needed in the 329 * returned array or vector. 330 */ 331static LLVMValueRef 332get_alu_src(struct lp_build_nir_context *bld_base, 333 nir_alu_src src, 334 unsigned num_components) 335{ 336 struct gallivm_state *gallivm = bld_base->base.gallivm; 337 LLVMBuilderRef builder = gallivm->builder; 338 LLVMValueRef value = get_src(bld_base, src.src); 339 bool need_swizzle = false; 340 341 assert(value); 342 343 if (is_aos(bld_base)) 344 return value; 345 346 unsigned src_components = nir_src_num_components(src.src); 347 for (unsigned i = 0; i < num_components; ++i) { 348 assert(src.swizzle[i] < src_components); 349 if (src.swizzle[i] != i) 350 need_swizzle = true; 351 } 352 353 if (need_swizzle || num_components != src_components) { 354 if (src_components > 1 && num_components == 1) { 355 value = LLVMBuildExtractValue(gallivm->builder, value, 356 src.swizzle[0], ""); 357 } else if (src_components == 1 && num_components > 1) { 358 LLVMValueRef values[] = {value, value, value, value, 359 value, value, value, value, 360 value, value, value, value, 361 value, value, value, value}; 362 value = lp_nir_array_build_gather_values(builder, values, num_components); 363 } else { 364 LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components)); 365 for (unsigned i = 0; i < num_components; i++) 366 arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, ""); 367 value = arr; 368 } 369 } 370 assert(!src.negate); 371 assert(!src.abs); 372 return value; 373} 374 375 376static LLVMValueRef 377emit_b2f(struct lp_build_nir_context *bld_base, 378 LLVMValueRef src0, 379 unsigned bitsize) 380{ 381 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 382 LLVMValueRef result = 383 LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32), 384 LLVMBuildBitCast(builder, 385 lp_build_const_vec(bld_base->base.gallivm, 386 bld_base->base.type, 387 1.0), 388 bld_base->int_bld.vec_type, ""), 389 ""); 390 result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, ""); 391 switch (bitsize) { 392 case 16: 393 result = LLVMBuildFPTrunc(builder, result, 394 bld_base->half_bld.vec_type, ""); 395 break; 396 case 32: 397 break; 398 case 64: 399 result = LLVMBuildFPExt(builder, result, 400 bld_base->dbl_bld.vec_type, ""); 401 break; 402 default: 403 unreachable("unsupported bit size."); 404 } 405 return result; 406} 407 408 409static LLVMValueRef 410emit_b2i(struct lp_build_nir_context *bld_base, 411 LLVMValueRef src0, 412 unsigned bitsize) 413{ 414 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 415 LLVMValueRef result = LLVMBuildAnd(builder, 416 cast_type(bld_base, src0, nir_type_int, 32), 417 lp_build_const_int_vec(bld_base->base.gallivm, 418 bld_base->base.type, 1), ""); 419 switch (bitsize) { 420 case 8: 421 return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, ""); 422 case 16: 423 return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, ""); 424 case 32: 425 return result; 426 case 64: 427 return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, ""); 428 default: 429 unreachable("unsupported bit size."); 430 } 431} 432 433 434static LLVMValueRef 435emit_b32csel(struct lp_build_nir_context *bld_base, 436 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], 437 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]) 438{ 439 LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32); 440 LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero); 441 struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]); 442 return lp_build_select(bld, v, src[1], src[2]); 443} 444 445 446static LLVMValueRef 447split_64bit(struct lp_build_nir_context *bld_base, 448 LLVMValueRef src, 449 bool hi) 450{ 451 struct gallivm_state *gallivm = bld_base->base.gallivm; 452 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; 453 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; 454 int len = bld_base->base.type.length * 2; 455 for (unsigned i = 0; i < bld_base->base.type.length; i++) { 456#if UTIL_ARCH_LITTLE_ENDIAN 457 shuffles[i] = lp_build_const_int32(gallivm, i * 2); 458 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); 459#else 460 shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1); 461 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2)); 462#endif 463 } 464 465 src = LLVMBuildBitCast(gallivm->builder, src, 466 LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), ""); 467 return LLVMBuildShuffleVector(gallivm->builder, src, 468 LLVMGetUndef(LLVMTypeOf(src)), 469 LLVMConstVector(hi ? shuffles2 : shuffles, 470 bld_base->base.type.length), 471 ""); 472} 473 474 475static LLVMValueRef 476merge_64bit(struct lp_build_nir_context *bld_base, 477 LLVMValueRef input, 478 LLVMValueRef input2) 479{ 480 struct gallivm_state *gallivm = bld_base->base.gallivm; 481 LLVMBuilderRef builder = gallivm->builder; 482 int i; 483 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; 484 int len = bld_base->base.type.length * 2; 485 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); 486 487 for (i = 0; i < bld_base->base.type.length * 2; i+=2) { 488#if UTIL_ARCH_LITTLE_ENDIAN 489 shuffles[i] = lp_build_const_int32(gallivm, i / 2); 490 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); 491#else 492 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); 493 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2); 494#endif 495 } 496 return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); 497} 498 499 500static LLVMValueRef 501split_16bit(struct lp_build_nir_context *bld_base, 502 LLVMValueRef src, 503 bool hi) 504{ 505 struct gallivm_state *gallivm = bld_base->base.gallivm; 506 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32]; 507 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32]; 508 int len = bld_base->base.type.length * 2; 509 for (unsigned i = 0; i < bld_base->base.type.length; i++) { 510#if UTIL_ARCH_LITTLE_ENDIAN 511 shuffles[i] = lp_build_const_int32(gallivm, i * 2); 512 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1); 513#else 514 shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1); 515 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2)); 516#endif 517 } 518 519 src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), ""); 520 return LLVMBuildShuffleVector(gallivm->builder, src, 521 LLVMGetUndef(LLVMTypeOf(src)), 522 LLVMConstVector(hi ? shuffles2 : shuffles, 523 bld_base->base.type.length), 524 ""); 525} 526 527 528static LLVMValueRef 529merge_16bit(struct lp_build_nir_context *bld_base, 530 LLVMValueRef input, 531 LLVMValueRef input2) 532{ 533 struct gallivm_state *gallivm = bld_base->base.gallivm; 534 LLVMBuilderRef builder = gallivm->builder; 535 int i; 536 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)]; 537 int len = bld_base->int16_bld.type.length * 2; 538 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32))); 539 540 for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) { 541#if UTIL_ARCH_LITTLE_ENDIAN 542 shuffles[i] = lp_build_const_int32(gallivm, i / 2); 543 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); 544#else 545 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length); 546 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2); 547#endif 548 } 549 return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), ""); 550} 551 552 553static LLVMValueRef 554get_signed_divisor(struct gallivm_state *gallivm, 555 struct lp_build_context *int_bld, 556 struct lp_build_context *mask_bld, 557 int src_bit_size, 558 LLVMValueRef src, LLVMValueRef divisor) 559{ 560 LLVMBuilderRef builder = gallivm->builder; 561 /* However for signed divides SIGFPE can occur if the numerator is INT_MIN 562 and divisor is -1. */ 563 /* set mask if numerator == INT_MIN */ 564 long long min_val; 565 switch (src_bit_size) { 566 case 8: 567 min_val = INT8_MIN; 568 break; 569 case 16: 570 min_val = INT16_MIN; 571 break; 572 default: 573 case 32: 574 min_val = INT_MIN; 575 break; 576 case 64: 577 min_val = INT64_MIN; 578 break; 579 } 580 LLVMValueRef div_mask2 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src, 581 lp_build_const_int_vec(gallivm, int_bld->type, min_val)); 582 /* set another mask if divisor is - 1 */ 583 LLVMValueRef div_mask3 = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, divisor, 584 lp_build_const_int_vec(gallivm, int_bld->type, -1)); 585 div_mask2 = LLVMBuildAnd(builder, div_mask2, div_mask3, ""); 586 587 divisor = lp_build_select(mask_bld, div_mask2, int_bld->one, divisor); 588 return divisor; 589} 590 591 592static LLVMValueRef 593do_int_divide(struct lp_build_nir_context *bld_base, 594 bool is_unsigned, unsigned src_bit_size, 595 LLVMValueRef src, LLVMValueRef src2) 596{ 597 struct gallivm_state *gallivm = bld_base->base.gallivm; 598 LLVMBuilderRef builder = gallivm->builder; 599 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size); 600 struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size); 601 602 /* avoid divide by 0. Converted divisor from 0 to -1 */ 603 LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2, 604 mask_bld->zero); 605 606 LLVMValueRef divisor = LLVMBuildOr(builder, div_mask, src2, ""); 607 if (!is_unsigned) { 608 divisor = get_signed_divisor(gallivm, int_bld, mask_bld, 609 src_bit_size, src, divisor); 610 } 611 LLVMValueRef result = lp_build_div(int_bld, src, divisor); 612 613 if (!is_unsigned) { 614 LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, ""); 615 return LLVMBuildAnd(builder, not_div_mask, result, ""); 616 } else 617 /* udiv by zero is guaranteed to return 0xffffffff at least with d3d10 618 * may as well do same for idiv */ 619 return LLVMBuildOr(builder, div_mask, result, ""); 620} 621 622 623static LLVMValueRef 624do_int_mod(struct lp_build_nir_context *bld_base, 625 bool is_unsigned, unsigned src_bit_size, 626 LLVMValueRef src, LLVMValueRef src2) 627{ 628 struct gallivm_state *gallivm = bld_base->base.gallivm; 629 LLVMBuilderRef builder = gallivm->builder; 630 struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size); 631 struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size); 632 LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2, 633 mask_bld->zero); 634 LLVMValueRef divisor = LLVMBuildOr(builder, 635 div_mask, 636 src2, ""); 637 if (!is_unsigned) { 638 divisor = get_signed_divisor(gallivm, int_bld, mask_bld, 639 src_bit_size, src, divisor); 640 } 641 LLVMValueRef result = lp_build_mod(int_bld, src, divisor); 642 return LLVMBuildOr(builder, div_mask, result, ""); 643} 644 645 646static LLVMValueRef 647do_quantize_to_f16(struct lp_build_nir_context *bld_base, 648 LLVMValueRef src) 649{ 650 struct gallivm_state *gallivm = bld_base->base.gallivm; 651 LLVMBuilderRef builder = gallivm->builder; 652 LLVMValueRef result, cond, cond2, temp; 653 654 result = LLVMBuildFPTrunc(builder, src, bld_base->half_bld.vec_type, ""); 655 result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, ""); 656 657 temp = lp_build_abs(get_flt_bld(bld_base, 32), result); 658 cond = LLVMBuildFCmp(builder, LLVMRealOGT, 659 LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""), 660 temp, ""); 661 cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, ""); 662 cond = LLVMBuildAnd(builder, cond, cond2, ""); 663 result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, ""); 664 return result; 665} 666 667 668static LLVMValueRef 669do_alu_action(struct lp_build_nir_context *bld_base, 670 const nir_alu_instr *instr, 671 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS], 672 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]) 673{ 674 struct gallivm_state *gallivm = bld_base->base.gallivm; 675 LLVMBuilderRef builder = gallivm->builder; 676 LLVMValueRef result; 677 678 switch (instr->op) { 679 case nir_op_b2f16: 680 result = emit_b2f(bld_base, src[0], 16); 681 break; 682 case nir_op_b2f32: 683 result = emit_b2f(bld_base, src[0], 32); 684 break; 685 case nir_op_b2f64: 686 result = emit_b2f(bld_base, src[0], 64); 687 break; 688 case nir_op_b2i8: 689 result = emit_b2i(bld_base, src[0], 8); 690 break; 691 case nir_op_b2i16: 692 result = emit_b2i(bld_base, src[0], 16); 693 break; 694 case nir_op_b2i32: 695 result = emit_b2i(bld_base, src[0], 32); 696 break; 697 case nir_op_b2i64: 698 result = emit_b2i(bld_base, src[0], 64); 699 break; 700 case nir_op_b32csel: 701 result = emit_b32csel(bld_base, src_bit_size, src); 702 break; 703 case nir_op_bit_count: 704 result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); 705 if (src_bit_size[0] < 32) 706 result = LLVMBuildZExt(builder, result, bld_base->int_bld.vec_type, ""); 707 else if (src_bit_size[0] > 32) 708 result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, ""); 709 break; 710 case nir_op_bitfield_select: 711 result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2]))); 712 break; 713 case nir_op_bitfield_reverse: 714 result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); 715 break; 716 case nir_op_f2b32: 717 result = flt_to_bool32(bld_base, src_bit_size[0], src[0]); 718 break; 719 case nir_op_f2f16: 720 if (src_bit_size[0] == 64) 721 src[0] = LLVMBuildFPTrunc(builder, src[0], 722 bld_base->base.vec_type, ""); 723 result = LLVMBuildFPTrunc(builder, src[0], 724 bld_base->half_bld.vec_type, ""); 725 break; 726 case nir_op_f2f32: 727 if (src_bit_size[0] < 32) 728 result = LLVMBuildFPExt(builder, src[0], 729 bld_base->base.vec_type, ""); 730 else 731 result = LLVMBuildFPTrunc(builder, src[0], 732 bld_base->base.vec_type, ""); 733 break; 734 case nir_op_f2f64: 735 result = LLVMBuildFPExt(builder, src[0], 736 bld_base->dbl_bld.vec_type, ""); 737 break; 738 case nir_op_f2i8: 739 result = LLVMBuildFPToSI(builder, 740 src[0], 741 bld_base->uint8_bld.vec_type, ""); 742 break; 743 case nir_op_f2i16: 744 result = LLVMBuildFPToSI(builder, 745 src[0], 746 bld_base->uint16_bld.vec_type, ""); 747 break; 748 case nir_op_f2i32: 749 result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, ""); 750 break; 751 case nir_op_f2u8: 752 result = LLVMBuildFPToUI(builder, 753 src[0], 754 bld_base->uint8_bld.vec_type, ""); 755 break; 756 case nir_op_f2u16: 757 result = LLVMBuildFPToUI(builder, 758 src[0], 759 bld_base->uint16_bld.vec_type, ""); 760 break; 761 case nir_op_f2u32: 762 result = LLVMBuildFPToUI(builder, 763 src[0], 764 bld_base->base.int_vec_type, ""); 765 break; 766 case nir_op_f2i64: 767 result = LLVMBuildFPToSI(builder, 768 src[0], 769 bld_base->int64_bld.vec_type, ""); 770 break; 771 case nir_op_f2u64: 772 result = LLVMBuildFPToUI(builder, 773 src[0], 774 bld_base->uint64_bld.vec_type, ""); 775 break; 776 case nir_op_fabs: 777 result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 778 break; 779 case nir_op_fadd: 780 result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), 781 src[0], src[1]); 782 break; 783 case nir_op_fceil: 784 result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 785 break; 786 case nir_op_fcos: 787 result = lp_build_cos(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 788 break; 789 case nir_op_fddx: 790 case nir_op_fddx_coarse: 791 case nir_op_fddx_fine: 792 result = lp_build_ddx(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 793 break; 794 case nir_op_fddy: 795 case nir_op_fddy_coarse: 796 case nir_op_fddy_fine: 797 result = lp_build_ddy(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 798 break; 799 case nir_op_fdiv: 800 result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]), 801 src[0], src[1]); 802 break; 803 case nir_op_feq32: 804 result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src); 805 break; 806 case nir_op_fexp2: 807 result = lp_build_exp2(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 808 break; 809 case nir_op_ffloor: 810 result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 811 break; 812 case nir_op_ffma: 813 result = lp_build_fmuladd(builder, src[0], src[1], src[2]); 814 break; 815 case nir_op_ffract: { 816 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]); 817 LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]); 818 result = lp_build_sub(flt_bld, src[0], tmp); 819 break; 820 } 821 case nir_op_fge: 822 case nir_op_fge32: 823 result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src); 824 break; 825 case nir_op_find_lsb: { 826 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]); 827 result = lp_build_cttz(int_bld, src[0]); 828 if (src_bit_size[0] < 32) 829 result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, ""); 830 else if (src_bit_size[0] > 32) 831 result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, ""); 832 break; 833 } 834 case nir_op_fisfinite32: 835 unreachable("Should have been lowered in nir_opt_algebraic_late."); 836 case nir_op_flog2: 837 result = lp_build_log2_safe(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 838 break; 839 case nir_op_flt: 840 case nir_op_flt32: 841 result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src); 842 break; 843 case nir_op_fmax: 844 case nir_op_fmin: { 845 enum gallivm_nan_behavior minmax_nan; 846 int first = 0; 847 848 /* If one of the sources is known to be a number (i.e., not NaN), then 849 * better code can be generated by passing that information along. 850 */ 851 if (is_a_number(bld_base->range_ht, instr, 1, 852 0 /* unused num_components */, 853 NULL /* unused swizzle */)) { 854 minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN; 855 } else if (is_a_number(bld_base->range_ht, instr, 0, 856 0 /* unused num_components */, 857 NULL /* unused swizzle */)) { 858 first = 1; 859 minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN; 860 } else { 861 minmax_nan = GALLIVM_NAN_RETURN_OTHER; 862 } 863 864 if (instr->op == nir_op_fmin) { 865 result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]), 866 src[first], src[1 - first], minmax_nan); 867 } else { 868 result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]), 869 src[first], src[1 - first], minmax_nan); 870 } 871 break; 872 } 873 case nir_op_fmod: { 874 struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]); 875 result = lp_build_div(flt_bld, src[0], src[1]); 876 result = lp_build_floor(flt_bld, result); 877 result = lp_build_mul(flt_bld, src[1], result); 878 result = lp_build_sub(flt_bld, src[0], result); 879 break; 880 } 881 case nir_op_fmul: 882 result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]), 883 src[0], src[1]); 884 break; 885 case nir_op_fneu32: 886 result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src); 887 break; 888 case nir_op_fneg: 889 result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 890 break; 891 case nir_op_fpow: 892 result = lp_build_pow(get_flt_bld(bld_base, src_bit_size[0]), src[0], src[1]); 893 break; 894 case nir_op_fquantize2f16: 895 result = do_quantize_to_f16(bld_base, src[0]); 896 break; 897 case nir_op_frcp: 898 result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 899 break; 900 case nir_op_fround_even: 901 if (src_bit_size[0] == 16) { 902 struct lp_build_context *bld = get_flt_bld(bld_base, 16); 903 char intrinsic[64]; 904 lp_format_intrinsic(intrinsic, 64, "llvm.roundeven", bld->vec_type); 905 result = lp_build_intrinsic_unary(builder, intrinsic, bld->vec_type, src[0]); 906 } else { 907 result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 908 } 909 break; 910 case nir_op_frsq: 911 result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 912 break; 913 case nir_op_fsat: 914 result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 915 break; 916 case nir_op_fsign: 917 result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 918 break; 919 case nir_op_fsin: 920 result = lp_build_sin(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 921 break; 922 case nir_op_fsqrt: 923 result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 924 break; 925 case nir_op_ftrunc: 926 result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]); 927 break; 928 case nir_op_i2b32: 929 result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]); 930 break; 931 case nir_op_i2f16: 932 result = LLVMBuildSIToFP(builder, src[0], 933 bld_base->half_bld.vec_type, ""); 934 break; 935 case nir_op_i2f32: 936 result = lp_build_int_to_float(&bld_base->base, src[0]); 937 break; 938 case nir_op_i2f64: 939 result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]); 940 break; 941 case nir_op_i2i8: 942 result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, ""); 943 break; 944 case nir_op_i2i16: 945 if (src_bit_size[0] < 16) 946 result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, ""); 947 else 948 result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, ""); 949 break; 950 case nir_op_i2i32: 951 if (src_bit_size[0] < 32) 952 result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, ""); 953 else 954 result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, ""); 955 break; 956 case nir_op_i2i64: 957 result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, ""); 958 break; 959 case nir_op_iabs: 960 result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); 961 break; 962 case nir_op_iadd: 963 result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]), 964 src[0], src[1]); 965 break; 966 case nir_op_iand: 967 result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]), 968 src[0], src[1]); 969 break; 970 case nir_op_idiv: 971 result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]); 972 break; 973 case nir_op_ieq32: 974 result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src); 975 break; 976 case nir_op_ige32: 977 result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src); 978 break; 979 case nir_op_ilt32: 980 result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src); 981 break; 982 case nir_op_imax: 983 result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]); 984 break; 985 case nir_op_imin: 986 result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]); 987 break; 988 case nir_op_imul: 989 case nir_op_imul24: 990 result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]), 991 src[0], src[1]); 992 break; 993 case nir_op_imul_high: { 994 LLVMValueRef hi_bits; 995 lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits); 996 result = hi_bits; 997 break; 998 } 999 case nir_op_ine32: 1000 result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src); 1001 break; 1002 case nir_op_ineg: 1003 result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); 1004 break; 1005 case nir_op_inot: 1006 result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); 1007 break; 1008 case nir_op_ior: 1009 result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]), 1010 src[0], src[1]); 1011 break; 1012 case nir_op_imod: 1013 case nir_op_irem: 1014 result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]); 1015 break; 1016 case nir_op_ishl: { 1017 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]); 1018 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]); 1019 if (src_bit_size[0] == 64) 1020 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, ""); 1021 if (src_bit_size[0] < 32) 1022 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, ""); 1023 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1))); 1024 result = lp_build_shl(int_bld, src[0], src[1]); 1025 break; 1026 } 1027 case nir_op_ishr: { 1028 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]); 1029 struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]); 1030 if (src_bit_size[0] == 64) 1031 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, ""); 1032 if (src_bit_size[0] < 32) 1033 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, ""); 1034 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1))); 1035 result = lp_build_shr(int_bld, src[0], src[1]); 1036 break; 1037 } 1038 case nir_op_isign: 1039 result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]); 1040 break; 1041 case nir_op_isub: 1042 result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]), 1043 src[0], src[1]); 1044 break; 1045 case nir_op_ixor: 1046 result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]), 1047 src[0], src[1]); 1048 break; 1049 case nir_op_mov: 1050 result = src[0]; 1051 break; 1052 case nir_op_unpack_64_2x32_split_x: 1053 result = split_64bit(bld_base, src[0], false); 1054 break; 1055 case nir_op_unpack_64_2x32_split_y: 1056 result = split_64bit(bld_base, src[0], true); 1057 break; 1058 1059 case nir_op_pack_32_2x16_split: { 1060 LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]); 1061 result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, ""); 1062 break; 1063 } 1064 case nir_op_unpack_32_2x16_split_x: 1065 result = split_16bit(bld_base, src[0], false); 1066 break; 1067 case nir_op_unpack_32_2x16_split_y: 1068 result = split_16bit(bld_base, src[0], true); 1069 break; 1070 case nir_op_pack_64_2x32_split: { 1071 LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]); 1072 result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, ""); 1073 break; 1074 } 1075 case nir_op_pack_32_4x8_split: { 1076 LLVMValueRef tmp1 = merge_16bit(bld_base, src[0], src[1]); 1077 LLVMValueRef tmp2 = merge_16bit(bld_base, src[2], src[3]); 1078 tmp1 = LLVMBuildBitCast(builder, tmp1, bld_base->uint16_bld.vec_type, ""); 1079 tmp2 = LLVMBuildBitCast(builder, tmp2, bld_base->uint16_bld.vec_type, ""); 1080 LLVMValueRef tmp = merge_16bit(bld_base, tmp1, tmp2); 1081 result = LLVMBuildBitCast(builder, tmp, bld_base->uint_bld.vec_type, ""); 1082 break; 1083 } 1084 case nir_op_u2f16: 1085 result = LLVMBuildUIToFP(builder, src[0], 1086 bld_base->half_bld.vec_type, ""); 1087 break; 1088 case nir_op_u2f32: 1089 result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, ""); 1090 break; 1091 case nir_op_u2f64: 1092 result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, ""); 1093 break; 1094 case nir_op_u2u8: 1095 result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, ""); 1096 break; 1097 case nir_op_u2u16: 1098 if (src_bit_size[0] < 16) 1099 result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, ""); 1100 else 1101 result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, ""); 1102 break; 1103 case nir_op_u2u32: 1104 if (src_bit_size[0] < 32) 1105 result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, ""); 1106 else 1107 result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, ""); 1108 break; 1109 case nir_op_u2u64: 1110 result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, ""); 1111 break; 1112 case nir_op_udiv: 1113 result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]); 1114 break; 1115 case nir_op_ufind_msb: { 1116 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]); 1117 result = lp_build_ctlz(uint_bld, src[0]); 1118 result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result); 1119 if (src_bit_size[0] < 32) 1120 result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, ""); 1121 else 1122 result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, ""); 1123 break; 1124 } 1125 case nir_op_uge32: 1126 result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src); 1127 break; 1128 case nir_op_ult32: 1129 result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src); 1130 break; 1131 case nir_op_umax: 1132 result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]); 1133 break; 1134 case nir_op_umin: 1135 result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]); 1136 break; 1137 case nir_op_umod: 1138 result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]); 1139 break; 1140 case nir_op_umul_high: { 1141 LLVMValueRef hi_bits; 1142 lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits); 1143 result = hi_bits; 1144 break; 1145 } 1146 case nir_op_ushr: { 1147 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]); 1148 if (src_bit_size[0] == 64) 1149 src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, ""); 1150 if (src_bit_size[0] < 32) 1151 src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, ""); 1152 src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1))); 1153 result = lp_build_shr(uint_bld, src[0], src[1]); 1154 break; 1155 } 1156 case nir_op_bcsel: { 1157 LLVMTypeRef src1_type = LLVMTypeOf(src[1]); 1158 LLVMTypeRef src2_type = LLVMTypeOf(src[2]); 1159 1160 if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind && 1161 LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) { 1162 src[2] = LLVMBuildIntToPtr(builder, src[2], src1_type, ""); 1163 } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind && 1164 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) { 1165 src[1] = LLVMBuildIntToPtr(builder, src[1], src2_type, ""); 1166 } 1167 1168 for (int i = 1; i <= 2; i++) { 1169 LLVMTypeRef type = LLVMTypeOf(src[i]); 1170 if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) 1171 break; 1172 src[i] = LLVMBuildBitCast(builder, src[i], get_int_bld(bld_base, true, src_bit_size[i])->vec_type, ""); 1173 } 1174 return LLVMBuildSelect(builder, src[0], src[1], src[2], ""); 1175 } 1176 default: 1177 assert(0); 1178 break; 1179 } 1180 return result; 1181} 1182 1183 1184static void 1185visit_alu(struct lp_build_nir_context *bld_base, 1186 const nir_alu_instr *instr) 1187{ 1188 struct gallivm_state *gallivm = bld_base->base.gallivm; 1189 LLVMValueRef src[NIR_MAX_VEC_COMPONENTS]; 1190 unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS]; 1191 const unsigned num_components = nir_dest_num_components(instr->dest.dest); 1192 unsigned src_components; 1193 1194 switch (instr->op) { 1195 case nir_op_vec2: 1196 case nir_op_vec3: 1197 case nir_op_vec4: 1198 case nir_op_vec8: 1199 case nir_op_vec16: 1200 src_components = 1; 1201 break; 1202 case nir_op_pack_half_2x16: 1203 src_components = 2; 1204 break; 1205 case nir_op_unpack_half_2x16: 1206 src_components = 1; 1207 break; 1208 case nir_op_cube_face_coord_amd: 1209 case nir_op_cube_face_index_amd: 1210 src_components = 3; 1211 break; 1212 case nir_op_fsum2: 1213 case nir_op_fsum3: 1214 case nir_op_fsum4: 1215 src_components = nir_op_infos[instr->op].input_sizes[0]; 1216 break; 1217 default: 1218 src_components = num_components; 1219 break; 1220 } 1221 1222 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 1223 src[i] = get_alu_src(bld_base, instr->src[i], src_components); 1224 src_bit_size[i] = nir_src_bit_size(instr->src[i].src); 1225 } 1226 1227 if (instr->op == nir_op_mov && 1228 is_aos(bld_base) && 1229 !instr->dest.dest.is_ssa) { 1230 for (unsigned i = 0; i < 4; i++) { 1231 if (instr->dest.write_mask & (1 << i)) { 1232 assign_reg(bld_base, &instr->dest.dest.reg, (1 << i), src); 1233 } 1234 } 1235 return; 1236 } 1237 1238 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]; 1239 if (instr->op == nir_op_vec4 || 1240 instr->op == nir_op_vec3 || 1241 instr->op == nir_op_vec2 || 1242 instr->op == nir_op_vec8 || 1243 instr->op == nir_op_vec16) { 1244 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 1245 result[i] = cast_type(bld_base, src[i], 1246 nir_op_infos[instr->op].input_types[i], 1247 src_bit_size[i]); 1248 } 1249 } else if (instr->op == nir_op_fsum4 || 1250 instr->op == nir_op_fsum3 || 1251 instr->op == nir_op_fsum2) { 1252 for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) { 1253 LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder, 1254 src[0], c, ""); 1255 temp_chan = cast_type(bld_base, temp_chan, 1256 nir_op_infos[instr->op].input_types[0], 1257 src_bit_size[0]); 1258 result[0] = (c == 0) ? temp_chan 1259 : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), 1260 result[0], temp_chan); 1261 } 1262 } else if (is_aos(bld_base)) { 1263 if (instr->op == nir_op_fmul) { 1264 if (LLVMIsConstant(src[0])) { 1265 src[0] = lp_nir_aos_conv_const(gallivm, src[0], 1); 1266 } 1267 if (LLVMIsConstant(src[1])) { 1268 src[1] = lp_nir_aos_conv_const(gallivm, src[1], 1); 1269 } 1270 } 1271 result[0] = do_alu_action(bld_base, instr, src_bit_size, src); 1272 } else { 1273 /* Loop for R,G,B,A channels */ 1274 for (unsigned c = 0; c < num_components; c++) { 1275 LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS]; 1276 1277 /* Loop over instruction operands */ 1278 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { 1279 if (num_components > 1) { 1280 src_chan[i] = LLVMBuildExtractValue(gallivm->builder, 1281 src[i], c, ""); 1282 } else { 1283 src_chan[i] = src[i]; 1284 } 1285 src_chan[i] = cast_type(bld_base, src_chan[i], 1286 nir_op_infos[instr->op].input_types[i], 1287 src_bit_size[i]); 1288 } 1289 result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan); 1290 result[c] = cast_type(bld_base, result[c], 1291 nir_op_infos[instr->op].output_type, 1292 nir_dest_bit_size(instr->dest.dest)); 1293 } 1294 } 1295 assign_alu_dest(bld_base, &instr->dest, result); 1296} 1297 1298 1299static void 1300visit_load_const(struct lp_build_nir_context *bld_base, 1301 const nir_load_const_instr *instr) 1302{ 1303 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]; 1304 bld_base->load_const(bld_base, instr, result); 1305 assign_ssa_dest(bld_base, &instr->def, result); 1306} 1307 1308 1309static void 1310get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr, 1311 bool vs_in, unsigned *vertex_index_out, 1312 LLVMValueRef *vertex_index_ref, 1313 unsigned *const_out, LLVMValueRef *indir_out) 1314{ 1315 LLVMBuilderRef builder = bld_base->base.gallivm->builder; 1316 nir_variable *var = nir_deref_instr_get_variable(instr); 1317 nir_deref_path path; 1318 unsigned idx_lvl = 1; 1319 1320 nir_deref_path_init(&path, instr, NULL); 1321 1322 if (vertex_index_out != NULL || vertex_index_ref != NULL) { 1323 if (vertex_index_ref) { 1324 *vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index); 1325 if (vertex_index_out) 1326 *vertex_index_out = 0; 1327 } else { 1328 *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index); 1329 } 1330 ++idx_lvl; 1331 } 1332 1333 uint32_t const_offset = 0; 1334 LLVMValueRef offset = NULL; 1335 1336 if (var->data.compact && nir_src_is_const(instr->arr.index)) { 1337 assert(instr->deref_type == nir_deref_type_array); 1338 const_offset = nir_src_as_uint(instr->arr.index); 1339 goto out; 1340 } 1341 1342 for (; path.path[idx_lvl]; ++idx_lvl) { 1343 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type; 1344 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) { 1345 unsigned index = path.path[idx_lvl]->strct.index; 1346 1347 for (unsigned i = 0; i < index; i++) { 1348 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i); 1349 const_offset += glsl_count_attribute_slots(ft, vs_in); 1350 } 1351 } else if (path.path[idx_lvl]->deref_type == nir_deref_type_array) { 1352 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in); 1353 if (nir_src_is_const(path.path[idx_lvl]->arr.index)) { 1354 const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size; 1355 } else { 1356 LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index); 1357 idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32); 1358 LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size), 1359 idx_src); 1360 if (offset) 1361 offset = lp_build_add(&bld_base->uint_bld, offset, array_off); 1362 else 1363 offset = array_off; 1364 } 1365 } else 1366 unreachable("Uhandled deref type in get_deref_instr_offset"); 1367 } 1368 1369out: 1370 nir_deref_path_finish(&path); 1371 1372 if (const_offset && offset) 1373 offset = LLVMBuildAdd(builder, offset, 1374 lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset), 1375 ""); 1376 *const_out = const_offset; 1377 *indir_out = offset; 1378} 1379 1380 1381static void 1382visit_load_input(struct lp_build_nir_context *bld_base, 1383 nir_intrinsic_instr *instr, 1384 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1385{ 1386 nir_variable var = {0}; 1387 var.data.location = nir_intrinsic_io_semantics(instr).location; 1388 var.data.driver_location = nir_intrinsic_base(instr); 1389 var.data.location_frac = nir_intrinsic_component(instr); 1390 1391 unsigned nc = nir_dest_num_components(instr->dest); 1392 unsigned bit_size = nir_dest_bit_size(instr->dest); 1393 1394 nir_src offset = *nir_get_io_offset_src(instr); 1395 bool indirect = !nir_src_is_const(offset); 1396 if (!indirect) 1397 assert(nir_src_as_uint(offset) == 0); 1398 LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL; 1399 1400 bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result); 1401} 1402 1403 1404static void 1405visit_store_output(struct lp_build_nir_context *bld_base, 1406 nir_intrinsic_instr *instr) 1407{ 1408 nir_variable var = {0}; 1409 var.data.location = nir_intrinsic_io_semantics(instr).location; 1410 var.data.driver_location = nir_intrinsic_base(instr); 1411 var.data.location_frac = nir_intrinsic_component(instr); 1412 1413 unsigned mask = nir_intrinsic_write_mask(instr); 1414 1415 unsigned bit_size = nir_src_bit_size(instr->src[0]); 1416 LLVMValueRef src = get_src(bld_base, instr->src[0]); 1417 1418 nir_src offset = *nir_get_io_offset_src(instr); 1419 bool indirect = !nir_src_is_const(offset); 1420 if (!indirect) 1421 assert(nir_src_as_uint(offset) == 0); 1422 LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL; 1423 1424 if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) { 1425 src = LLVMBuildExtractValue(bld_base->base.gallivm->builder, 1426 src, 0, ""); 1427 } 1428 1429 bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask), 1430 bit_size, &var, mask, NULL, 0, indir_index, src); 1431} 1432 1433 1434static void 1435visit_load_var(struct lp_build_nir_context *bld_base, 1436 nir_intrinsic_instr *instr, 1437 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1438{ 1439 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 1440 nir_variable *var = nir_deref_instr_get_variable(deref); 1441 assert(util_bitcount(deref->modes) == 1); 1442 nir_variable_mode mode = deref->modes; 1443 unsigned const_index; 1444 LLVMValueRef indir_index; 1445 LLVMValueRef indir_vertex_index = NULL; 1446 unsigned vertex_index = 0; 1447 unsigned nc = nir_dest_num_components(instr->dest); 1448 unsigned bit_size = nir_dest_bit_size(instr->dest); 1449 if (var) { 1450 bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX && 1451 var->data.mode == nir_var_shader_in; 1452 bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY && 1453 var->data.mode == nir_var_shader_in; 1454 bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL && 1455 var->data.mode == nir_var_shader_in; 1456 bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL && 1457 var->data.mode == nir_var_shader_out && !var->data.patch; 1458 bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL && 1459 var->data.mode == nir_var_shader_in && !var->data.patch; 1460 1461 mode = var->data.mode; 1462 1463 get_deref_offset(bld_base, deref, vs_in, 1464 gs_in ? &vertex_index : NULL, 1465 (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL, 1466 &const_index, &indir_index); 1467 } 1468 bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, 1469 indir_vertex_index, const_index, indir_index, result); 1470} 1471 1472 1473static void 1474visit_store_var(struct lp_build_nir_context *bld_base, 1475 nir_intrinsic_instr *instr) 1476{ 1477 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 1478 nir_variable *var = nir_deref_instr_get_variable(deref); 1479 assert(util_bitcount(deref->modes) == 1); 1480 nir_variable_mode mode = deref->modes; 1481 int writemask = instr->const_index[0]; 1482 unsigned bit_size = nir_src_bit_size(instr->src[1]); 1483 LLVMValueRef src = get_src(bld_base, instr->src[1]); 1484 unsigned const_index = 0; 1485 LLVMValueRef indir_index, indir_vertex_index = NULL; 1486 if (var) { 1487 bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL && 1488 var->data.mode == nir_var_shader_out && !var->data.patch; 1489 get_deref_offset(bld_base, deref, false, NULL, 1490 tcs_out ? &indir_vertex_index : NULL, 1491 &const_index, &indir_index); 1492 } 1493 bld_base->store_var(bld_base, mode, instr->num_components, bit_size, 1494 var, writemask, indir_vertex_index, const_index, 1495 indir_index, src); 1496} 1497 1498 1499static void 1500visit_load_ubo(struct lp_build_nir_context *bld_base, 1501 nir_intrinsic_instr *instr, 1502 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1503{ 1504 struct gallivm_state *gallivm = bld_base->base.gallivm; 1505 LLVMBuilderRef builder = gallivm->builder; 1506 LLVMValueRef idx = get_src(bld_base, instr->src[0]); 1507 LLVMValueRef offset = get_src(bld_base, instr->src[1]); 1508 1509 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]); 1510 idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), ""); 1511 bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), 1512 nir_dest_bit_size(instr->dest), 1513 offset_is_uniform, idx, offset, result); 1514} 1515 1516 1517static void 1518visit_load_push_constant(struct lp_build_nir_context *bld_base, 1519 nir_intrinsic_instr *instr, 1520 LLVMValueRef result[4]) 1521{ 1522 struct gallivm_state *gallivm = bld_base->base.gallivm; 1523 LLVMValueRef offset = get_src(bld_base, instr->src[0]); 1524 LLVMValueRef idx = lp_build_const_int32(gallivm, 0); 1525 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]); 1526 1527 bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), 1528 nir_dest_bit_size(instr->dest), 1529 offset_is_uniform, idx, offset, result); 1530} 1531 1532 1533static void 1534visit_load_ssbo(struct lp_build_nir_context *bld_base, 1535 nir_intrinsic_instr *instr, 1536 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1537{ 1538 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_uint, 32); 1539 LLVMValueRef offset = get_src(bld_base, instr->src[1]); 1540 bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[0]) && nir_src_is_always_uniform(instr->src[1]); 1541 bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), 1542 index_and_offset_are_uniform, idx, offset, result); 1543} 1544 1545 1546static void 1547visit_store_ssbo(struct lp_build_nir_context *bld_base, 1548 nir_intrinsic_instr *instr) 1549{ 1550 LLVMValueRef val = get_src(bld_base, instr->src[0]); 1551 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_uint, 32); 1552 LLVMValueRef offset = get_src(bld_base, instr->src[2]); 1553 bool index_and_offset_are_uniform = nir_src_is_always_uniform(instr->src[1]) && nir_src_is_always_uniform(instr->src[2]); 1554 int writemask = instr->const_index[0]; 1555 int nc = nir_src_num_components(instr->src[0]); 1556 int bitsize = nir_src_bit_size(instr->src[0]); 1557 bld_base->store_mem(bld_base, writemask, nc, bitsize, index_and_offset_are_uniform, idx, offset, val); 1558} 1559 1560 1561static void 1562visit_get_ssbo_size(struct lp_build_nir_context *bld_base, 1563 nir_intrinsic_instr *instr, 1564 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1565{ 1566 LLVMValueRef idx = cast_type(bld_base, 1567 get_src(bld_base, instr->src[0]), 1568 nir_type_uint, 32); 1569 result[0] = bld_base->get_ssbo_size(bld_base, idx); 1570} 1571 1572 1573static void 1574visit_ssbo_atomic(struct lp_build_nir_context *bld_base, 1575 nir_intrinsic_instr *instr, 1576 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1577{ 1578 LLVMValueRef idx = cast_type(bld_base, get_src(bld_base, instr->src[0]), 1579 nir_type_uint, 32); 1580 LLVMValueRef offset = get_src(bld_base, instr->src[1]); 1581 LLVMValueRef val = get_src(bld_base, instr->src[2]); 1582 LLVMValueRef val2 = NULL; 1583 int bitsize = nir_src_bit_size(instr->src[2]); 1584 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) 1585 val2 = get_src(bld_base, instr->src[3]); 1586 1587 bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx, 1588 offset, val, val2, &result[0]); 1589} 1590 1591 1592static void 1593visit_load_image(struct lp_build_nir_context *bld_base, 1594 nir_intrinsic_instr *instr, 1595 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1596{ 1597 struct gallivm_state *gallivm = bld_base->base.gallivm; 1598 LLVMBuilderRef builder = gallivm->builder; 1599 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 1600 nir_variable *var = nir_deref_instr_get_variable(deref); 1601 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]); 1602 LLVMValueRef coords[5]; 1603 struct lp_img_params params; 1604 const struct glsl_type *type = glsl_without_array(var->type); 1605 unsigned const_index; 1606 LLVMValueRef indir_index; 1607 get_deref_offset(bld_base, deref, false, NULL, NULL, 1608 &const_index, &indir_index); 1609 1610 memset(¶ms, 0, sizeof(params)); 1611 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), 1612 glsl_sampler_type_is_array(type)); 1613 for (unsigned i = 0; i < 4; i++) 1614 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, ""); 1615 if (params.target == PIPE_TEXTURE_1D_ARRAY) 1616 coords[2] = coords[1]; 1617 1618 params.coords = coords; 1619 params.outdata = result; 1620 params.img_op = LP_IMG_LOAD; 1621 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || 1622 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS) { 1623 params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]), 1624 nir_type_uint, 32); 1625 } 1626 params.image_index = var->data.binding + (indir_index ? 0 : const_index); 1627 params.image_index_offset = indir_index; 1628 bld_base->image_op(bld_base, ¶ms); 1629} 1630 1631 1632static void 1633visit_store_image(struct lp_build_nir_context *bld_base, 1634 nir_intrinsic_instr *instr) 1635{ 1636 struct gallivm_state *gallivm = bld_base->base.gallivm; 1637 LLVMBuilderRef builder = gallivm->builder; 1638 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 1639 nir_variable *var = nir_deref_instr_get_variable(deref); 1640 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]); 1641 LLVMValueRef in_val = get_src(bld_base, instr->src[3]); 1642 LLVMValueRef coords[5]; 1643 struct lp_img_params params; 1644 const struct glsl_type *type = glsl_without_array(var->type); 1645 unsigned const_index; 1646 LLVMValueRef indir_index; 1647 get_deref_offset(bld_base, deref, false, NULL, NULL, 1648 &const_index, &indir_index); 1649 1650 memset(¶ms, 0, sizeof(params)); 1651 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type)); 1652 for (unsigned i = 0; i < 4; i++) 1653 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, ""); 1654 if (params.target == PIPE_TEXTURE_1D_ARRAY) 1655 coords[2] = coords[1]; 1656 params.coords = coords; 1657 1658 for (unsigned i = 0; i < 4; i++) { 1659 params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, ""); 1660 params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, ""); 1661 } 1662 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) 1663 params.ms_index = get_src(bld_base, instr->src[2]); 1664 params.img_op = LP_IMG_STORE; 1665 params.image_index = var->data.binding + (indir_index ? 0 : const_index); 1666 params.image_index_offset = indir_index; 1667 1668 if (params.target == PIPE_TEXTURE_1D_ARRAY) 1669 coords[2] = coords[1]; 1670 bld_base->image_op(bld_base, ¶ms); 1671} 1672 1673 1674static void 1675visit_atomic_image(struct lp_build_nir_context *bld_base, 1676 nir_intrinsic_instr *instr, 1677 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1678{ 1679 struct gallivm_state *gallivm = bld_base->base.gallivm; 1680 LLVMBuilderRef builder = gallivm->builder; 1681 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 1682 nir_variable *var = nir_deref_instr_get_variable(deref); 1683 struct lp_img_params params; 1684 LLVMValueRef coord_val = get_src(bld_base, instr->src[1]); 1685 LLVMValueRef in_val = get_src(bld_base, instr->src[3]); 1686 LLVMValueRef coords[5]; 1687 const struct glsl_type *type = glsl_without_array(var->type); 1688 unsigned const_index; 1689 LLVMValueRef indir_index; 1690 get_deref_offset(bld_base, deref, false, NULL, NULL, 1691 &const_index, &indir_index); 1692 1693 memset(¶ms, 0, sizeof(params)); 1694 1695 switch (instr->intrinsic) { 1696 case nir_intrinsic_image_deref_atomic_add: 1697 params.op = LLVMAtomicRMWBinOpAdd; 1698 break; 1699 case nir_intrinsic_image_deref_atomic_exchange: 1700 params.op = LLVMAtomicRMWBinOpXchg; 1701 break; 1702 case nir_intrinsic_image_deref_atomic_and: 1703 params.op = LLVMAtomicRMWBinOpAnd; 1704 break; 1705 case nir_intrinsic_image_deref_atomic_or: 1706 params.op = LLVMAtomicRMWBinOpOr; 1707 break; 1708 case nir_intrinsic_image_deref_atomic_xor: 1709 params.op = LLVMAtomicRMWBinOpXor; 1710 break; 1711 case nir_intrinsic_image_deref_atomic_umin: 1712 params.op = LLVMAtomicRMWBinOpUMin; 1713 break; 1714 case nir_intrinsic_image_deref_atomic_umax: 1715 params.op = LLVMAtomicRMWBinOpUMax; 1716 break; 1717 case nir_intrinsic_image_deref_atomic_imin: 1718 params.op = LLVMAtomicRMWBinOpMin; 1719 break; 1720 case nir_intrinsic_image_deref_atomic_imax: 1721 params.op = LLVMAtomicRMWBinOpMax; 1722 break; 1723 default: 1724 break; 1725 } 1726 1727 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), 1728 glsl_sampler_type_is_array(type)); 1729 for (unsigned i = 0; i < 4; i++) { 1730 coords[i] = LLVMBuildExtractValue(builder, coord_val, i, ""); 1731 } 1732 if (params.target == PIPE_TEXTURE_1D_ARRAY) { 1733 coords[2] = coords[1]; 1734 } 1735 1736 params.coords = coords; 1737 1738 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS) { 1739 params.ms_index = get_src(bld_base, instr->src[2]); 1740 } 1741 if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) { 1742 LLVMValueRef cas_val = get_src(bld_base, instr->src[4]); 1743 params.indata[0] = in_val; 1744 params.indata2[0] = cas_val; 1745 } else { 1746 params.indata[0] = in_val; 1747 } 1748 1749 params.outdata = result; 1750 params.img_op = 1751 (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) 1752 ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC; 1753 params.image_index = var->data.binding + (indir_index ? 0 : const_index); 1754 params.image_index_offset = indir_index; 1755 1756 bld_base->image_op(bld_base, ¶ms); 1757} 1758 1759 1760static void 1761visit_image_size(struct lp_build_nir_context *bld_base, 1762 nir_intrinsic_instr *instr, 1763 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1764{ 1765 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 1766 nir_variable *var = nir_deref_instr_get_variable(deref); 1767 struct lp_sampler_size_query_params params = { 0 }; 1768 unsigned const_index; 1769 LLVMValueRef indir_index; 1770 const struct glsl_type *type = glsl_without_array(var->type); 1771 get_deref_offset(bld_base, deref, false, NULL, NULL, 1772 &const_index, &indir_index); 1773 params.texture_unit = var->data.binding + (indir_index ? 0 : const_index); 1774 params.texture_unit_offset = indir_index; 1775 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), 1776 glsl_sampler_type_is_array(type)); 1777 params.sizes_out = result; 1778 1779 bld_base->image_size(bld_base, ¶ms); 1780} 1781 1782 1783static void 1784visit_image_samples(struct lp_build_nir_context *bld_base, 1785 nir_intrinsic_instr *instr, 1786 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1787{ 1788 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 1789 nir_variable *var = nir_deref_instr_get_variable(deref); 1790 struct lp_sampler_size_query_params params = { 0 }; 1791 unsigned const_index; 1792 LLVMValueRef indir_index; 1793 const struct glsl_type *type = glsl_without_array(var->type); 1794 get_deref_offset(bld_base, deref, false, NULL, NULL, 1795 &const_index, &indir_index); 1796 1797 params.texture_unit = var->data.binding + (indir_index ? 0 : const_index); 1798 params.texture_unit_offset = indir_index; 1799 params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), 1800 glsl_sampler_type_is_array(type)); 1801 params.sizes_out = result; 1802 params.samples_only = true; 1803 1804 bld_base->image_size(bld_base, ¶ms); 1805} 1806 1807static void 1808visit_shared_load(struct lp_build_nir_context *bld_base, 1809 nir_intrinsic_instr *instr, 1810 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1811{ 1812 LLVMValueRef offset = get_src(bld_base, instr->src[0]); 1813 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]); 1814 bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), 1815 offset_is_uniform, NULL, offset, result); 1816} 1817 1818 1819static void 1820visit_shared_store(struct lp_build_nir_context *bld_base, 1821 nir_intrinsic_instr *instr) 1822{ 1823 LLVMValueRef val = get_src(bld_base, instr->src[0]); 1824 LLVMValueRef offset = get_src(bld_base, instr->src[1]); 1825 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[1]); 1826 int writemask = instr->const_index[1]; 1827 int nc = nir_src_num_components(instr->src[0]); 1828 int bitsize = nir_src_bit_size(instr->src[0]); 1829 bld_base->store_mem(bld_base, writemask, nc, bitsize, offset_is_uniform, NULL, offset, val); 1830} 1831 1832 1833static void 1834visit_shared_atomic(struct lp_build_nir_context *bld_base, 1835 nir_intrinsic_instr *instr, 1836 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1837{ 1838 LLVMValueRef offset = get_src(bld_base, instr->src[0]); 1839 LLVMValueRef val = get_src(bld_base, instr->src[1]); 1840 LLVMValueRef val2 = NULL; 1841 int bitsize = nir_src_bit_size(instr->src[1]); 1842 if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) 1843 val2 = get_src(bld_base, instr->src[2]); 1844 1845 bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]); 1846} 1847 1848 1849static void 1850visit_barrier(struct lp_build_nir_context *bld_base) 1851{ 1852 bld_base->barrier(bld_base); 1853} 1854 1855 1856static void 1857visit_discard(struct lp_build_nir_context *bld_base, 1858 nir_intrinsic_instr *instr) 1859{ 1860 LLVMValueRef cond = NULL; 1861 if (instr->intrinsic == nir_intrinsic_discard_if) { 1862 cond = get_src(bld_base, instr->src[0]); 1863 cond = cast_type(bld_base, cond, nir_type_int, 32); 1864 } 1865 bld_base->discard(bld_base, cond); 1866} 1867 1868 1869static void 1870visit_load_kernel_input(struct lp_build_nir_context *bld_base, 1871 nir_intrinsic_instr *instr, 1872 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1873{ 1874 LLVMValueRef offset = get_src(bld_base, instr->src[0]); 1875 1876 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]); 1877 bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), 1878 nir_dest_bit_size(instr->dest), 1879 nir_src_bit_size(instr->src[0]), 1880 offset_is_uniform, offset, result); 1881} 1882 1883 1884static void 1885visit_load_global(struct lp_build_nir_context *bld_base, 1886 nir_intrinsic_instr *instr, 1887 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1888{ 1889 LLVMValueRef addr = get_src(bld_base, instr->src[0]); 1890 bool offset_is_uniform = nir_src_is_always_uniform(instr->src[0]); 1891 bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest), 1892 nir_src_bit_size(instr->src[0]), 1893 offset_is_uniform, addr, result); 1894} 1895 1896 1897static void 1898visit_store_global(struct lp_build_nir_context *bld_base, 1899 nir_intrinsic_instr *instr) 1900{ 1901 LLVMValueRef val = get_src(bld_base, instr->src[0]); 1902 int nc = nir_src_num_components(instr->src[0]); 1903 int bitsize = nir_src_bit_size(instr->src[0]); 1904 LLVMValueRef addr = get_src(bld_base, instr->src[1]); 1905 int addr_bitsize = nir_src_bit_size(instr->src[1]); 1906 int writemask = instr->const_index[0]; 1907 bld_base->store_global(bld_base, writemask, nc, bitsize, 1908 addr_bitsize, addr, val); 1909} 1910 1911 1912static void 1913visit_global_atomic(struct lp_build_nir_context *bld_base, 1914 nir_intrinsic_instr *instr, 1915 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1916{ 1917 LLVMValueRef addr = get_src(bld_base, instr->src[0]); 1918 LLVMValueRef val = get_src(bld_base, instr->src[1]); 1919 LLVMValueRef val2 = NULL; 1920 int addr_bitsize = nir_src_bit_size(instr->src[0]); 1921 int val_bitsize = nir_src_bit_size(instr->src[1]); 1922 if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap) 1923 val2 = get_src(bld_base, instr->src[2]); 1924 1925 bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize, 1926 val_bitsize, addr, val, val2, &result[0]); 1927} 1928 1929#if LLVM_VERSION_MAJOR >= 10 1930static void visit_shuffle(struct lp_build_nir_context *bld_base, 1931 nir_intrinsic_instr *instr, 1932 LLVMValueRef dst[4]) 1933{ 1934 LLVMValueRef src = get_src(bld_base, instr->src[0]); 1935 src = cast_type(bld_base, src, nir_type_int, nir_src_bit_size(instr->src[0])); 1936 LLVMValueRef index = get_src(bld_base, instr->src[1]); 1937 index = cast_type(bld_base, index, nir_type_uint, nir_src_bit_size(instr->src[1])); 1938 1939 bld_base->shuffle(bld_base, src, index, instr, dst); 1940} 1941#endif 1942 1943 1944static void 1945visit_interp(struct lp_build_nir_context *bld_base, 1946 nir_intrinsic_instr *instr, 1947 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1948{ 1949 struct gallivm_state *gallivm = bld_base->base.gallivm; 1950 LLVMBuilderRef builder = gallivm->builder; 1951 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr); 1952 unsigned num_components = nir_dest_num_components(instr->dest); 1953 nir_variable *var = nir_deref_instr_get_variable(deref); 1954 unsigned const_index; 1955 LLVMValueRef indir_index; 1956 LLVMValueRef offsets[2] = { NULL, NULL }; 1957 get_deref_offset(bld_base, deref, false, NULL, NULL, 1958 &const_index, &indir_index); 1959 bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid; 1960 bool sample = false; 1961 if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) { 1962 for (unsigned i = 0; i < 2; i++) { 1963 offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, ""); 1964 offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32); 1965 } 1966 } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) { 1967 offsets[0] = get_src(bld_base, instr->src[1]); 1968 offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32); 1969 sample = true; 1970 } 1971 bld_base->interp_at(bld_base, num_components, var, centroid, sample, 1972 const_index, indir_index, offsets, result); 1973} 1974 1975 1976static void 1977visit_load_scratch(struct lp_build_nir_context *bld_base, 1978 nir_intrinsic_instr *instr, 1979 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS]) 1980{ 1981 LLVMValueRef offset = get_src(bld_base, instr->src[0]); 1982 1983 bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest), 1984 nir_dest_bit_size(instr->dest), offset, result); 1985} 1986 1987 1988static void 1989visit_store_scratch(struct lp_build_nir_context *bld_base, 1990 nir_intrinsic_instr *instr) 1991{ 1992 LLVMValueRef val = get_src(bld_base, instr->src[0]); 1993 LLVMValueRef offset = get_src(bld_base, instr->src[1]); 1994 int writemask = instr->const_index[2]; 1995 int nc = nir_src_num_components(instr->src[0]); 1996 int bitsize = nir_src_bit_size(instr->src[0]); 1997 bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val); 1998} 1999 2000 2001static void 2002visit_intrinsic(struct lp_build_nir_context *bld_base, 2003 nir_intrinsic_instr *instr) 2004{ 2005 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0}; 2006 switch (instr->intrinsic) { 2007 case nir_intrinsic_load_input: 2008 visit_load_input(bld_base, instr, result); 2009 break; 2010 case nir_intrinsic_store_output: 2011 visit_store_output(bld_base, instr); 2012 break; 2013 case nir_intrinsic_load_deref: 2014 visit_load_var(bld_base, instr, result); 2015 break; 2016 case nir_intrinsic_store_deref: 2017 visit_store_var(bld_base, instr); 2018 break; 2019 case nir_intrinsic_load_ubo: 2020 visit_load_ubo(bld_base, instr, result); 2021 break; 2022 case nir_intrinsic_load_push_constant: 2023 visit_load_push_constant(bld_base, instr, result); 2024 break; 2025 case nir_intrinsic_load_ssbo: 2026 visit_load_ssbo(bld_base, instr, result); 2027 break; 2028 case nir_intrinsic_store_ssbo: 2029 visit_store_ssbo(bld_base, instr); 2030 break; 2031 case nir_intrinsic_get_ssbo_size: 2032 visit_get_ssbo_size(bld_base, instr, result); 2033 break; 2034 case nir_intrinsic_load_vertex_id: 2035 case nir_intrinsic_load_primitive_id: 2036 case nir_intrinsic_load_instance_id: 2037 case nir_intrinsic_load_base_instance: 2038 case nir_intrinsic_load_base_vertex: 2039 case nir_intrinsic_load_first_vertex: 2040 case nir_intrinsic_load_workgroup_id: 2041 case nir_intrinsic_load_local_invocation_id: 2042 case nir_intrinsic_load_local_invocation_index: 2043 case nir_intrinsic_load_num_workgroups: 2044 case nir_intrinsic_load_invocation_id: 2045 case nir_intrinsic_load_front_face: 2046 case nir_intrinsic_load_draw_id: 2047 case nir_intrinsic_load_workgroup_size: 2048 case nir_intrinsic_load_work_dim: 2049 case nir_intrinsic_load_tess_coord: 2050 case nir_intrinsic_load_tess_level_outer: 2051 case nir_intrinsic_load_tess_level_inner: 2052 case nir_intrinsic_load_patch_vertices_in: 2053 case nir_intrinsic_load_sample_id: 2054 case nir_intrinsic_load_sample_pos: 2055 case nir_intrinsic_load_sample_mask_in: 2056 case nir_intrinsic_load_view_index: 2057 case nir_intrinsic_load_subgroup_invocation: 2058 case nir_intrinsic_load_subgroup_id: 2059 case nir_intrinsic_load_num_subgroups: 2060 bld_base->sysval_intrin(bld_base, instr, result); 2061 break; 2062 case nir_intrinsic_load_helper_invocation: 2063 bld_base->helper_invocation(bld_base, &result[0]); 2064 break; 2065 case nir_intrinsic_discard_if: 2066 case nir_intrinsic_discard: 2067 visit_discard(bld_base, instr); 2068 break; 2069 case nir_intrinsic_emit_vertex: 2070 bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr)); 2071 break; 2072 case nir_intrinsic_end_primitive: 2073 bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr)); 2074 break; 2075 case nir_intrinsic_ssbo_atomic_add: 2076 case nir_intrinsic_ssbo_atomic_imin: 2077 case nir_intrinsic_ssbo_atomic_imax: 2078 case nir_intrinsic_ssbo_atomic_umin: 2079 case nir_intrinsic_ssbo_atomic_umax: 2080 case nir_intrinsic_ssbo_atomic_and: 2081 case nir_intrinsic_ssbo_atomic_or: 2082 case nir_intrinsic_ssbo_atomic_xor: 2083 case nir_intrinsic_ssbo_atomic_exchange: 2084 case nir_intrinsic_ssbo_atomic_comp_swap: 2085 visit_ssbo_atomic(bld_base, instr, result); 2086 break; 2087 case nir_intrinsic_image_deref_load: 2088 visit_load_image(bld_base, instr, result); 2089 break; 2090 case nir_intrinsic_image_deref_store: 2091 visit_store_image(bld_base, instr); 2092 break; 2093 case nir_intrinsic_image_deref_atomic_add: 2094 case nir_intrinsic_image_deref_atomic_imin: 2095 case nir_intrinsic_image_deref_atomic_imax: 2096 case nir_intrinsic_image_deref_atomic_umin: 2097 case nir_intrinsic_image_deref_atomic_umax: 2098 case nir_intrinsic_image_deref_atomic_and: 2099 case nir_intrinsic_image_deref_atomic_or: 2100 case nir_intrinsic_image_deref_atomic_xor: 2101 case nir_intrinsic_image_deref_atomic_exchange: 2102 case nir_intrinsic_image_deref_atomic_comp_swap: 2103 visit_atomic_image(bld_base, instr, result); 2104 break; 2105 case nir_intrinsic_image_deref_size: 2106 visit_image_size(bld_base, instr, result); 2107 break; 2108 case nir_intrinsic_image_deref_samples: 2109 visit_image_samples(bld_base, instr, result); 2110 break; 2111 case nir_intrinsic_load_shared: 2112 visit_shared_load(bld_base, instr, result); 2113 break; 2114 case nir_intrinsic_store_shared: 2115 visit_shared_store(bld_base, instr); 2116 break; 2117 case nir_intrinsic_shared_atomic_add: 2118 case nir_intrinsic_shared_atomic_imin: 2119 case nir_intrinsic_shared_atomic_umin: 2120 case nir_intrinsic_shared_atomic_imax: 2121 case nir_intrinsic_shared_atomic_umax: 2122 case nir_intrinsic_shared_atomic_and: 2123 case nir_intrinsic_shared_atomic_or: 2124 case nir_intrinsic_shared_atomic_xor: 2125 case nir_intrinsic_shared_atomic_exchange: 2126 case nir_intrinsic_shared_atomic_comp_swap: 2127 visit_shared_atomic(bld_base, instr, result); 2128 break; 2129 case nir_intrinsic_control_barrier: 2130 case nir_intrinsic_scoped_barrier: 2131 visit_barrier(bld_base); 2132 break; 2133 case nir_intrinsic_group_memory_barrier: 2134 case nir_intrinsic_memory_barrier: 2135 case nir_intrinsic_memory_barrier_shared: 2136 case nir_intrinsic_memory_barrier_buffer: 2137 case nir_intrinsic_memory_barrier_image: 2138 case nir_intrinsic_memory_barrier_tcs_patch: 2139 break; 2140 case nir_intrinsic_load_kernel_input: 2141 visit_load_kernel_input(bld_base, instr, result); 2142 break; 2143 case nir_intrinsic_load_global: 2144 case nir_intrinsic_load_global_constant: 2145 visit_load_global(bld_base, instr, result); 2146 break; 2147 case nir_intrinsic_store_global: 2148 visit_store_global(bld_base, instr); 2149 break; 2150 case nir_intrinsic_global_atomic_add: 2151 case nir_intrinsic_global_atomic_imin: 2152 case nir_intrinsic_global_atomic_umin: 2153 case nir_intrinsic_global_atomic_imax: 2154 case nir_intrinsic_global_atomic_umax: 2155 case nir_intrinsic_global_atomic_and: 2156 case nir_intrinsic_global_atomic_or: 2157 case nir_intrinsic_global_atomic_xor: 2158 case nir_intrinsic_global_atomic_exchange: 2159 case nir_intrinsic_global_atomic_comp_swap: 2160 visit_global_atomic(bld_base, instr, result); 2161 break; 2162 case nir_intrinsic_vote_all: 2163 case nir_intrinsic_vote_any: 2164 case nir_intrinsic_vote_ieq: 2165 case nir_intrinsic_vote_feq: 2166 bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result); 2167 break; 2168 case nir_intrinsic_elect: 2169 bld_base->elect(bld_base, result); 2170 break; 2171 case nir_intrinsic_reduce: 2172 case nir_intrinsic_inclusive_scan: 2173 case nir_intrinsic_exclusive_scan: 2174 bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result); 2175 break; 2176 case nir_intrinsic_ballot: 2177 bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result); 2178 break; 2179#if LLVM_VERSION_MAJOR >= 10 2180 case nir_intrinsic_shuffle: 2181 visit_shuffle(bld_base, instr, result); 2182 break; 2183#endif 2184 case nir_intrinsic_read_invocation: 2185 case nir_intrinsic_read_first_invocation: { 2186 LLVMValueRef src1 = NULL; 2187 LLVMValueRef src0 = get_src(bld_base, instr->src[0]); 2188 if (instr->intrinsic == nir_intrinsic_read_invocation) { 2189 src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32); 2190 src0 = cast_type(bld_base, src0, nir_type_int, nir_src_bit_size(instr->src[0])); 2191 } 2192 bld_base->read_invocation(bld_base, src0, nir_src_bit_size(instr->src[0]), src1, result); 2193 break; 2194 } 2195 case nir_intrinsic_interp_deref_at_offset: 2196 case nir_intrinsic_interp_deref_at_centroid: 2197 case nir_intrinsic_interp_deref_at_sample: 2198 visit_interp(bld_base, instr, result); 2199 break; 2200 case nir_intrinsic_load_scratch: 2201 visit_load_scratch(bld_base, instr, result); 2202 break; 2203 case nir_intrinsic_store_scratch: 2204 visit_store_scratch(bld_base, instr); 2205 break; 2206 default: 2207 fprintf(stderr, "Unsupported intrinsic: "); 2208 nir_print_instr(&instr->instr, stderr); 2209 fprintf(stderr, "\n"); 2210 assert(0); 2211 break; 2212 } 2213 if (result[0]) { 2214 assign_dest(bld_base, &instr->dest, result); 2215 } 2216} 2217 2218 2219static void 2220visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr) 2221{ 2222 struct lp_sampler_size_query_params params = { 0 }; 2223 LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS]; 2224 LLVMValueRef explicit_lod = NULL; 2225 LLVMValueRef texture_unit_offset = NULL; 2226 for (unsigned i = 0; i < instr->num_srcs; i++) { 2227 switch (instr->src[i].src_type) { 2228 case nir_tex_src_lod: 2229 explicit_lod = cast_type(bld_base, 2230 get_src(bld_base, instr->src[i].src), 2231 nir_type_int, 32); 2232 break; 2233 case nir_tex_src_texture_offset: 2234 texture_unit_offset = get_src(bld_base, instr->src[i].src); 2235 break; 2236 default: 2237 break; 2238 } 2239 } 2240 2241 params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array); 2242 params.texture_unit = instr->texture_index; 2243 params.explicit_lod = explicit_lod; 2244 params.is_sviewinfo = TRUE; 2245 params.sizes_out = sizes_out; 2246 params.samples_only = (instr->op == nir_texop_texture_samples); 2247 params.texture_unit_offset = texture_unit_offset; 2248 2249 if (instr->op == nir_texop_query_levels) 2250 params.explicit_lod = bld_base->uint_bld.zero; 2251 bld_base->tex_size(bld_base, ¶ms); 2252 assign_dest(bld_base, &instr->dest, 2253 &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]); 2254} 2255 2256 2257static enum lp_sampler_lod_property 2258lp_build_nir_lod_property(struct lp_build_nir_context *bld_base, 2259 nir_src lod_src) 2260{ 2261 enum lp_sampler_lod_property lod_property; 2262 2263 if (nir_src_is_always_uniform(lod_src)) { 2264 lod_property = LP_SAMPLER_LOD_SCALAR; 2265 } else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) { 2266 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) 2267 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2268 else 2269 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2270 } else { 2271 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2272 } 2273 return lod_property; 2274} 2275 2276 2277static void 2278visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr) 2279{ 2280 struct gallivm_state *gallivm = bld_base->base.gallivm; 2281 LLVMBuilderRef builder = gallivm->builder; 2282 LLVMValueRef coords[5]; 2283 LLVMValueRef offsets[3] = { NULL }; 2284 LLVMValueRef explicit_lod = NULL, ms_index = NULL; 2285 struct lp_sampler_params params; 2286 struct lp_derivatives derivs; 2287 unsigned sample_key = 0; 2288 nir_deref_instr *texture_deref_instr = NULL; 2289 nir_deref_instr *sampler_deref_instr = NULL; 2290 LLVMValueRef texture_unit_offset = NULL; 2291 LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS]; 2292 unsigned lod_src = 0; 2293 LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type); 2294 unsigned coord_vals = is_aos(bld_base) ? 1 : instr->coord_components; 2295 memset(¶ms, 0, sizeof(params)); 2296 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; 2297 2298 if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) { 2299 visit_txs(bld_base, instr); 2300 return; 2301 } 2302 if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) 2303 sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT; 2304 else if (instr->op == nir_texop_tg4) { 2305 sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT; 2306 sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT); 2307 } else if (instr->op == nir_texop_lod) 2308 sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT; 2309 for (unsigned i = 0; i < instr->num_srcs; i++) { 2310 switch (instr->src[i].src_type) { 2311 case nir_tex_src_coord: { 2312 LLVMValueRef coord = get_src(bld_base, instr->src[i].src); 2313 if (coord_vals == 1) 2314 coords[0] = coord; 2315 else { 2316 for (unsigned chan = 0; chan < instr->coord_components; ++chan) 2317 coords[chan] = LLVMBuildExtractValue(builder, coord, 2318 chan, ""); 2319 } 2320 for (unsigned chan = coord_vals; chan < 5; chan++) 2321 coords[chan] = coord_undef; 2322 2323 break; 2324 } 2325 case nir_tex_src_texture_deref: 2326 texture_deref_instr = nir_src_as_deref(instr->src[i].src); 2327 break; 2328 case nir_tex_src_sampler_deref: 2329 sampler_deref_instr = nir_src_as_deref(instr->src[i].src); 2330 break; 2331 case nir_tex_src_comparator: 2332 sample_key |= LP_SAMPLER_SHADOW; 2333 coords[4] = get_src(bld_base, instr->src[i].src); 2334 coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32); 2335 break; 2336 case nir_tex_src_bias: 2337 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; 2338 lod_src = i; 2339 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32); 2340 break; 2341 case nir_tex_src_lod: 2342 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; 2343 lod_src = i; 2344 if (instr->op == nir_texop_txf) 2345 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32); 2346 else 2347 explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32); 2348 break; 2349 case nir_tex_src_ddx: { 2350 int deriv_cnt = instr->coord_components; 2351 if (instr->is_array) 2352 deriv_cnt--; 2353 LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src); 2354 if (deriv_cnt == 1) 2355 derivs.ddx[0] = deriv_val; 2356 else 2357 for (unsigned chan = 0; chan < deriv_cnt; ++chan) 2358 derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val, 2359 chan, ""); 2360 for (unsigned chan = 0; chan < deriv_cnt; ++chan) 2361 derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32); 2362 break; 2363 } 2364 case nir_tex_src_ddy: { 2365 int deriv_cnt = instr->coord_components; 2366 if (instr->is_array) 2367 deriv_cnt--; 2368 LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src); 2369 if (deriv_cnt == 1) 2370 derivs.ddy[0] = deriv_val; 2371 else 2372 for (unsigned chan = 0; chan < deriv_cnt; ++chan) 2373 derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val, 2374 chan, ""); 2375 for (unsigned chan = 0; chan < deriv_cnt; ++chan) 2376 derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32); 2377 break; 2378 } 2379 case nir_tex_src_offset: { 2380 int offset_cnt = instr->coord_components; 2381 if (instr->is_array) 2382 offset_cnt--; 2383 LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src); 2384 sample_key |= LP_SAMPLER_OFFSETS; 2385 if (offset_cnt == 1) 2386 offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32); 2387 else { 2388 for (unsigned chan = 0; chan < offset_cnt; ++chan) { 2389 offsets[chan] = LLVMBuildExtractValue(builder, offset_val, 2390 chan, ""); 2391 offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32); 2392 } 2393 } 2394 break; 2395 } 2396 case nir_tex_src_ms_index: 2397 sample_key |= LP_SAMPLER_FETCH_MS; 2398 ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32); 2399 break; 2400 2401 case nir_tex_src_texture_offset: 2402 texture_unit_offset = get_src(bld_base, instr->src[i].src); 2403 break; 2404 case nir_tex_src_sampler_offset: 2405 break; 2406 default: 2407 assert(0); 2408 break; 2409 } 2410 } 2411 if (!sampler_deref_instr) 2412 sampler_deref_instr = texture_deref_instr; 2413 2414 if (explicit_lod) 2415 lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src); 2416 2417 if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb || 2418 instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod) 2419 for (unsigned chan = 0; chan < coord_vals; ++chan) 2420 coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32); 2421 else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms) 2422 for (unsigned chan = 0; chan < instr->coord_components; ++chan) 2423 coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32); 2424 2425 if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) { 2426 /* move layer coord for 1d arrays. */ 2427 coords[2] = coords[1]; 2428 coords[1] = coord_undef; 2429 } 2430 2431 uint32_t samp_base_index = 0, tex_base_index = 0; 2432 if (!sampler_deref_instr) { 2433 int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle); 2434 if (samp_src_index == -1) { 2435 samp_base_index = instr->sampler_index; 2436 } 2437 } 2438 if (!texture_deref_instr) { 2439 int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle); 2440 if (tex_src_index == -1) { 2441 tex_base_index = instr->texture_index; 2442 } 2443 } 2444 2445 if (instr->op == nir_texop_txd) { 2446 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; 2447 params.derivs = &derivs; 2448 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) { 2449 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) 2450 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2451 else 2452 lod_property = LP_SAMPLER_LOD_PER_QUAD; 2453 } else 2454 lod_property = LP_SAMPLER_LOD_PER_ELEMENT; 2455 } 2456 2457 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; 2458 params.sample_key = sample_key; 2459 params.offsets = offsets; 2460 params.texture_index = tex_base_index; 2461 params.texture_index_offset = texture_unit_offset; 2462 params.sampler_index = samp_base_index; 2463 params.coords = coords; 2464 params.texel = texel; 2465 params.lod = explicit_lod; 2466 params.ms_index = ms_index; 2467 params.aniso_filter_table = bld_base->aniso_filter_table; 2468 bld_base->tex(bld_base, ¶ms); 2469 2470 if (nir_dest_bit_size(instr->dest) != 32) { 2471 assert(nir_dest_bit_size(instr->dest) == 16); 2472 LLVMTypeRef vec_type = NULL; 2473 bool is_float = false; 2474 switch (nir_alu_type_get_base_type(instr->dest_type)) { 2475 case nir_type_float: 2476 is_float = true; 2477 break; 2478 case nir_type_int: 2479 vec_type = bld_base->int16_bld.vec_type; 2480 break; 2481 case nir_type_uint: 2482 vec_type = bld_base->uint16_bld.vec_type; 2483 break; 2484 default: 2485 unreachable("unexpected alu type"); 2486 } 2487 for (int i = 0; i < nir_dest_num_components(instr->dest); ++i) { 2488 if (is_float) { 2489 texel[i] = lp_build_float_to_half(gallivm, texel[i]); 2490 } else { 2491 texel[i] = LLVMBuildBitCast(builder, texel[i], bld_base->int_bld.vec_type, ""); 2492 texel[i] = LLVMBuildTrunc(builder, texel[i], vec_type, ""); 2493 } 2494 } 2495 } 2496 2497 assign_dest(bld_base, &instr->dest, texel); 2498} 2499 2500 2501static void 2502visit_ssa_undef(struct lp_build_nir_context *bld_base, 2503 const nir_ssa_undef_instr *instr) 2504{ 2505 unsigned num_components = instr->def.num_components; 2506 LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS]; 2507 struct lp_build_context *undef_bld = get_int_bld(bld_base, true, 2508 instr->def.bit_size); 2509 for (unsigned i = 0; i < num_components; i++) 2510 undef[i] = LLVMGetUndef(undef_bld->vec_type); 2511 memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components); 2512 assign_ssa_dest(bld_base, &instr->def, undef); 2513} 2514 2515 2516static void 2517visit_jump(struct lp_build_nir_context *bld_base, 2518 const nir_jump_instr *instr) 2519{ 2520 switch (instr->type) { 2521 case nir_jump_break: 2522 bld_base->break_stmt(bld_base); 2523 break; 2524 case nir_jump_continue: 2525 bld_base->continue_stmt(bld_base); 2526 break; 2527 default: 2528 unreachable("Unknown jump instr\n"); 2529 } 2530} 2531 2532 2533static void 2534visit_deref(struct lp_build_nir_context *bld_base, 2535 nir_deref_instr *instr) 2536{ 2537 if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared | 2538 nir_var_mem_global)) { 2539 return; 2540 } 2541 2542 LLVMValueRef result = NULL; 2543 switch(instr->deref_type) { 2544 case nir_deref_type_var: { 2545 struct hash_entry *entry = 2546 _mesa_hash_table_search(bld_base->vars, instr->var); 2547 result = entry->data; 2548 break; 2549 } 2550 default: 2551 unreachable("Unhandled deref_instr deref type"); 2552 } 2553 2554 assign_ssa(bld_base, instr->dest.ssa.index, result); 2555} 2556 2557 2558static void 2559visit_block(struct lp_build_nir_context *bld_base, nir_block *block) 2560{ 2561 nir_foreach_instr(instr, block) 2562 { 2563 switch (instr->type) { 2564 case nir_instr_type_alu: 2565 visit_alu(bld_base, nir_instr_as_alu(instr)); 2566 break; 2567 case nir_instr_type_load_const: 2568 visit_load_const(bld_base, nir_instr_as_load_const(instr)); 2569 break; 2570 case nir_instr_type_intrinsic: 2571 visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr)); 2572 break; 2573 case nir_instr_type_tex: 2574 visit_tex(bld_base, nir_instr_as_tex(instr)); 2575 break; 2576 case nir_instr_type_phi: 2577 assert(0); 2578 break; 2579 case nir_instr_type_ssa_undef: 2580 visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr)); 2581 break; 2582 case nir_instr_type_jump: 2583 visit_jump(bld_base, nir_instr_as_jump(instr)); 2584 break; 2585 case nir_instr_type_deref: 2586 visit_deref(bld_base, nir_instr_as_deref(instr)); 2587 break; 2588 default: 2589 fprintf(stderr, "Unknown NIR instr type: "); 2590 nir_print_instr(instr, stderr); 2591 fprintf(stderr, "\n"); 2592 abort(); 2593 } 2594 } 2595} 2596 2597 2598static void 2599visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt) 2600{ 2601 LLVMValueRef cond = get_src(bld_base, if_stmt->condition); 2602 2603 bld_base->if_cond(bld_base, cond); 2604 visit_cf_list(bld_base, &if_stmt->then_list); 2605 2606 if (!exec_list_is_empty(&if_stmt->else_list)) { 2607 bld_base->else_stmt(bld_base); 2608 visit_cf_list(bld_base, &if_stmt->else_list); 2609 } 2610 bld_base->endif_stmt(bld_base); 2611} 2612 2613 2614static void 2615visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop) 2616{ 2617 bld_base->bgnloop(bld_base); 2618 visit_cf_list(bld_base, &loop->body); 2619 bld_base->endloop(bld_base); 2620} 2621 2622 2623static void 2624visit_cf_list(struct lp_build_nir_context *bld_base, 2625 struct exec_list *list) 2626{ 2627 foreach_list_typed(nir_cf_node, node, node, list) 2628 { 2629 switch (node->type) { 2630 case nir_cf_node_block: 2631 visit_block(bld_base, nir_cf_node_as_block(node)); 2632 break; 2633 case nir_cf_node_if: 2634 visit_if(bld_base, nir_cf_node_as_if(node)); 2635 break; 2636 case nir_cf_node_loop: 2637 visit_loop(bld_base, nir_cf_node_as_loop(node)); 2638 break; 2639 default: 2640 assert(0); 2641 } 2642 } 2643} 2644 2645 2646static void 2647handle_shader_output_decl(struct lp_build_nir_context *bld_base, 2648 struct nir_shader *nir, 2649 struct nir_variable *variable) 2650{ 2651 bld_base->emit_var_decl(bld_base, variable); 2652} 2653 2654 2655/* vector registers are stored as arrays in LLVM side, 2656 so we can use GEP on them, as to do exec mask stores 2657 we need to operate on a single components. 2658 arrays are: 2659 0.x, 1.x, 2.x, 3.x 2660 0.y, 1.y, 2.y, 3.y 2661 .... 2662*/ 2663static LLVMTypeRef 2664get_register_type(struct lp_build_nir_context *bld_base, 2665 nir_register *reg) 2666{ 2667 if (is_aos(bld_base)) 2668 return bld_base->base.int_vec_type; 2669 2670 struct lp_build_context *int_bld = 2671 get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size); 2672 2673 LLVMTypeRef type = int_bld->vec_type; 2674 if (reg->num_array_elems) 2675 type = LLVMArrayType(type, reg->num_array_elems); 2676 if (reg->num_components > 1) 2677 type = LLVMArrayType(type, reg->num_components); 2678 2679 return type; 2680} 2681 2682 2683bool lp_build_nir_llvm(struct lp_build_nir_context *bld_base, 2684 struct nir_shader *nir) 2685{ 2686 struct nir_function *func; 2687 2688 nir_convert_from_ssa(nir, true); 2689 nir_lower_locals_to_regs(nir); 2690 nir_remove_dead_derefs(nir); 2691 nir_remove_dead_variables(nir, nir_var_function_temp, NULL); 2692 2693 if (is_aos(bld_base)) { 2694 nir_move_vec_src_uses_to_dest(nir); 2695 nir_lower_vec_to_movs(nir, NULL, NULL); 2696 } 2697 2698 nir_foreach_shader_out_variable(variable, nir) 2699 handle_shader_output_decl(bld_base, nir, variable); 2700 2701 if (nir->info.io_lowered) { 2702 uint64_t outputs_written = nir->info.outputs_written; 2703 2704 while (outputs_written) { 2705 unsigned location = u_bit_scan64(&outputs_written); 2706 nir_variable var = {0}; 2707 2708 var.type = glsl_vec4_type(); 2709 var.data.mode = nir_var_shader_out; 2710 var.data.location = location; 2711 var.data.driver_location = util_bitcount64(nir->info.outputs_written & 2712 BITFIELD64_MASK(location)); 2713 bld_base->emit_var_decl(bld_base, &var); 2714 } 2715 } 2716 2717 bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 2718 _mesa_key_pointer_equal); 2719 bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 2720 _mesa_key_pointer_equal); 2721 bld_base->range_ht = _mesa_pointer_hash_table_create(NULL); 2722 2723 func = (struct nir_function *)exec_list_get_head(&nir->functions); 2724 2725 nir_foreach_register(reg, &func->impl->registers) { 2726 LLVMTypeRef type = get_register_type(bld_base, reg); 2727 LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm, 2728 type, "reg"); 2729 _mesa_hash_table_insert(bld_base->regs, reg, reg_alloc); 2730 } 2731 nir_index_ssa_defs(func->impl); 2732 bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef)); 2733 visit_cf_list(bld_base, &func->impl->body); 2734 2735 free(bld_base->ssa_defs); 2736 ralloc_free(bld_base->vars); 2737 ralloc_free(bld_base->regs); 2738 ralloc_free(bld_base->range_ht); 2739 return true; 2740} 2741 2742 2743/* do some basic opts to remove some things we don't want to see. */ 2744void 2745lp_build_opt_nir(struct nir_shader *nir) 2746{ 2747 bool progress; 2748 2749 static const struct nir_lower_tex_options lower_tex_options = { 2750 .lower_tg4_offsets = true, 2751 .lower_txp = ~0u, 2752 .lower_invalid_implicit_lod = true, 2753 }; 2754 NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); 2755 NIR_PASS_V(nir, nir_lower_frexp); 2756 2757 NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true); 2758 NIR_PASS_V(nir, nir_lower_fp16_casts); 2759 do { 2760 progress = false; 2761 NIR_PASS(progress, nir, nir_opt_constant_folding); 2762 NIR_PASS(progress, nir, nir_opt_algebraic); 2763 NIR_PASS(progress, nir, nir_lower_pack); 2764 2765 nir_lower_tex_options options = { .lower_invalid_implicit_lod = true, }; 2766 NIR_PASS_V(nir, nir_lower_tex, &options); 2767 2768 const nir_lower_subgroups_options subgroups_options = { 2769 .subgroup_size = lp_native_vector_width / 32, 2770 .ballot_bit_size = 32, 2771 .ballot_components = 1, 2772 .lower_to_scalar = true, 2773 .lower_subgroup_masks = true, 2774 .lower_relative_shuffle = true, 2775 }; 2776 NIR_PASS(progress, nir, nir_lower_subgroups, &subgroups_options); 2777 } while (progress); 2778 2779 do { 2780 progress = false; 2781 NIR_PASS(progress, nir, nir_opt_algebraic_late); 2782 if (progress) { 2783 NIR_PASS_V(nir, nir_copy_prop); 2784 NIR_PASS_V(nir, nir_opt_dce); 2785 NIR_PASS_V(nir, nir_opt_cse); 2786 } 2787 } while (progress); 2788 2789 if (nir_lower_bool_to_int32(nir)) { 2790 NIR_PASS_V(nir, nir_copy_prop); 2791 NIR_PASS_V(nir, nir_opt_dce); 2792 } 2793} 2794