1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Helper functions for logical operations. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35#include <llvm/Config/llvm-config.h> 36 37#include "util/u_cpu_detect.h" 38#include "util/u_memory.h" 39#include "util/u_debug.h" 40 41#include "lp_bld_type.h" 42#include "lp_bld_const.h" 43#include "lp_bld_swizzle.h" 44#include "lp_bld_init.h" 45#include "lp_bld_intr.h" 46#include "lp_bld_debug.h" 47#include "lp_bld_logic.h" 48 49 50/* 51 * XXX 52 * 53 * Selection with vector conditional like 54 * 55 * select <4 x i1> %C, %A, %B 56 * 57 * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only 58 * supported on some backends (x86) starting with llvm 3.1. 59 * 60 * Expanding the boolean vector to full SIMD register width, as in 61 * 62 * sext <4 x i1> %C to <4 x i32> 63 * 64 * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 65 * it causes assertion failures in LLVM 2.6. It appears to work correctly on 66 * LLVM 2.7. 67 */ 68 69 70/** 71 * Build code to compare two values 'a' and 'b' of 'type' using the given func. 72 * \param func one of PIPE_FUNC_x 73 * If the ordered argument is true the function will use LLVM's ordered 74 * comparisons, otherwise unordered comparisons will be used. 75 * The result values will be 0 for false or ~0 for true. 76 */ 77static LLVMValueRef 78lp_build_compare_ext(struct gallivm_state *gallivm, 79 const struct lp_type type, 80 enum pipe_compare_func func, 81 LLVMValueRef a, 82 LLVMValueRef b, 83 boolean ordered) 84{ 85 LLVMBuilderRef builder = gallivm->builder; 86 LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 87 LLVMValueRef zeros = LLVMConstNull(int_vec_type); 88 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 89 LLVMValueRef cond; 90 LLVMValueRef res; 91 92 assert(lp_check_value(type, a)); 93 assert(lp_check_value(type, b)); 94 95 if (func == PIPE_FUNC_NEVER) 96 return zeros; 97 if (func == PIPE_FUNC_ALWAYS) 98 return ones; 99 100 assert(func > PIPE_FUNC_NEVER); 101 assert(func < PIPE_FUNC_ALWAYS); 102 103 if (type.floating) { 104 LLVMRealPredicate op; 105 switch(func) { 106 case PIPE_FUNC_EQUAL: 107 op = ordered ? LLVMRealOEQ : LLVMRealUEQ; 108 break; 109 case PIPE_FUNC_NOTEQUAL: 110 op = ordered ? LLVMRealONE : LLVMRealUNE; 111 break; 112 case PIPE_FUNC_LESS: 113 op = ordered ? LLVMRealOLT : LLVMRealULT; 114 break; 115 case PIPE_FUNC_LEQUAL: 116 op = ordered ? LLVMRealOLE : LLVMRealULE; 117 break; 118 case PIPE_FUNC_GREATER: 119 op = ordered ? LLVMRealOGT : LLVMRealUGT; 120 break; 121 case PIPE_FUNC_GEQUAL: 122 op = ordered ? LLVMRealOGE : LLVMRealUGE; 123 break; 124 default: 125 assert(0); 126 return lp_build_undef(gallivm, type); 127 } 128 129 cond = LLVMBuildFCmp(builder, op, a, b, ""); 130 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 131 } 132 else { 133 LLVMIntPredicate op; 134 switch(func) { 135 case PIPE_FUNC_EQUAL: 136 op = LLVMIntEQ; 137 break; 138 case PIPE_FUNC_NOTEQUAL: 139 op = LLVMIntNE; 140 break; 141 case PIPE_FUNC_LESS: 142 op = type.sign ? LLVMIntSLT : LLVMIntULT; 143 break; 144 case PIPE_FUNC_LEQUAL: 145 op = type.sign ? LLVMIntSLE : LLVMIntULE; 146 break; 147 case PIPE_FUNC_GREATER: 148 op = type.sign ? LLVMIntSGT : LLVMIntUGT; 149 break; 150 case PIPE_FUNC_GEQUAL: 151 op = type.sign ? LLVMIntSGE : LLVMIntUGE; 152 break; 153 default: 154 assert(0); 155 return lp_build_undef(gallivm, type); 156 } 157 158 cond = LLVMBuildICmp(builder, op, a, b, ""); 159 res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 160 } 161 162 return res; 163} 164 165/** 166 * Build code to compare two values 'a' and 'b' of 'type' using the given func. 167 * \param func one of PIPE_FUNC_x 168 * The result values will be 0 for false or ~0 for true. 169 */ 170LLVMValueRef 171lp_build_compare(struct gallivm_state *gallivm, 172 const struct lp_type type, 173 enum pipe_compare_func func, 174 LLVMValueRef a, 175 LLVMValueRef b) 176{ 177 LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 178 LLVMValueRef zeros = LLVMConstNull(int_vec_type); 179 LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 180 181 assert(lp_check_value(type, a)); 182 assert(lp_check_value(type, b)); 183 184 if (func == PIPE_FUNC_NEVER) 185 return zeros; 186 if (func == PIPE_FUNC_ALWAYS) 187 return ones; 188 189 assert(func > PIPE_FUNC_NEVER); 190 assert(func < PIPE_FUNC_ALWAYS); 191 192#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 193 /* 194 * There are no unsigned integer comparison instructions in SSE. 195 */ 196 197 if (!type.floating && !type.sign && 198 type.width * type.length == 128 && 199 util_get_cpu_caps()->has_sse2 && 200 (func == PIPE_FUNC_LESS || 201 func == PIPE_FUNC_LEQUAL || 202 func == PIPE_FUNC_GREATER || 203 func == PIPE_FUNC_GEQUAL) && 204 (gallivm_debug & GALLIVM_DEBUG_PERF)) { 205 debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", 206 __FUNCTION__, type.length, type.width); 207 } 208#endif 209 210 return lp_build_compare_ext(gallivm, type, func, a, b, FALSE); 211} 212 213/** 214 * Build code to compare two values 'a' and 'b' using the given func. 215 * \param func one of PIPE_FUNC_x 216 * If the operands are floating point numbers, the function will use 217 * ordered comparison which means that it will return true if both 218 * operands are not a NaN and the specified condition evaluates to true. 219 * The result values will be 0 for false or ~0 for true. 220 */ 221LLVMValueRef 222lp_build_cmp_ordered(struct lp_build_context *bld, 223 enum pipe_compare_func func, 224 LLVMValueRef a, 225 LLVMValueRef b) 226{ 227 return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE); 228} 229 230/** 231 * Build code to compare two values 'a' and 'b' using the given func. 232 * \param func one of PIPE_FUNC_x 233 * If the operands are floating point numbers, the function will use 234 * unordered comparison which means that it will return true if either 235 * operand is a NaN or the specified condition evaluates to true. 236 * The result values will be 0 for false or ~0 for true. 237 */ 238LLVMValueRef 239lp_build_cmp(struct lp_build_context *bld, 240 enum pipe_compare_func func, 241 LLVMValueRef a, 242 LLVMValueRef b) 243{ 244 return lp_build_compare(bld->gallivm, bld->type, func, a, b); 245} 246 247 248/** 249 * Return (mask & a) | (~mask & b); 250 */ 251LLVMValueRef 252lp_build_select_bitwise(struct lp_build_context *bld, 253 LLVMValueRef mask, 254 LLVMValueRef a, 255 LLVMValueRef b) 256{ 257 LLVMBuilderRef builder = bld->gallivm->builder; 258 struct lp_type type = bld->type; 259 LLVMValueRef res; 260 LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); 261 262 assert(lp_check_value(type, a)); 263 assert(lp_check_value(type, b)); 264 265 if (a == b) { 266 return a; 267 } 268 269 if (type.floating) { 270 a = LLVMBuildBitCast(builder, a, int_vec_type, ""); 271 b = LLVMBuildBitCast(builder, b, int_vec_type, ""); 272 } 273 274 if (type.width > 32) 275 mask = LLVMBuildSExt(builder, mask, int_vec_type, ""); 276 a = LLVMBuildAnd(builder, a, mask, ""); 277 278 /* This often gets translated to PANDN, but sometimes the NOT is 279 * pre-computed and stored in another constant. The best strategy depends 280 * on available registers, so it is not a big deal -- hopefully LLVM does 281 * the right decision attending the rest of the program. 282 */ 283 b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); 284 285 res = LLVMBuildOr(builder, a, b, ""); 286 287 if (type.floating) { 288 LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 289 res = LLVMBuildBitCast(builder, res, vec_type, ""); 290 } 291 292 return res; 293} 294 295 296/** 297 * Return mask ? a : b; 298 * 299 * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 300 * will yield unpredictable results. 301 */ 302LLVMValueRef 303lp_build_select(struct lp_build_context *bld, 304 LLVMValueRef mask, 305 LLVMValueRef a, 306 LLVMValueRef b) 307{ 308 LLVMBuilderRef builder = bld->gallivm->builder; 309 LLVMContextRef lc = bld->gallivm->context; 310 struct lp_type type = bld->type; 311 LLVMValueRef res; 312 313 assert(lp_check_value(type, a)); 314 assert(lp_check_value(type, b)); 315 316 if (a == b) 317 return a; 318 319 if (type.length == 1) { 320 mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); 321 res = LLVMBuildSelect(builder, mask, a, b, ""); 322 } 323 else if (LLVMIsConstant(mask) || 324 LLVMGetInstructionOpcode(mask) == LLVMSExt) { 325 /* Generate a vector select. 326 * 327 * Using vector selects should avoid emitting intrinsics hence avoid 328 * hindering optimization passes, but vector selects weren't properly 329 * supported yet for a long time, and LLVM will generate poor code when 330 * the mask is not the result of a comparison. 331 * XXX: Even if the instruction was an SExt, this may still produce 332 * terrible code. Try piglit stencil-twoside. 333 */ 334 335 /* Convert the mask to a vector of booleans. 336 * 337 * XXX: In x86 the mask is controlled by the MSB, so if we shifted the 338 * mask by `type.width - 1`, LLVM should realize the mask is ready. Alas 339 * what really happens is that LLVM will emit two shifts back to back. 340 */ 341 if (0) { 342 LLVMValueRef shift = 343 LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0); 344 shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift); 345 mask = LLVMBuildLShr(builder, mask, shift, ""); 346 } 347 LLVMTypeRef bool_vec_type = 348 LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); 349 mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); 350 351 res = LLVMBuildSelect(builder, mask, a, b, ""); 352 } 353 else if (((util_get_cpu_caps()->has_sse4_1 && 354 type.width * type.length == 128) || 355 (util_get_cpu_caps()->has_avx && 356 type.width * type.length == 256 && type.width >= 32) || 357 (util_get_cpu_caps()->has_avx2 && 358 type.width * type.length == 256)) && 359 !LLVMIsConstant(a) && 360 !LLVMIsConstant(b) && 361 !LLVMIsConstant(mask)) { 362 const char *intrinsic; 363 LLVMTypeRef arg_type; 364 LLVMValueRef args[3]; 365 366 LLVMTypeRef mask_type = LLVMGetElementType(LLVMTypeOf(mask)); 367 if (LLVMGetIntTypeWidth(mask_type) != type.width) { 368 LLVMTypeRef int_vec_type = 369 LLVMVectorType(LLVMIntTypeInContext(lc, type.width), type.length); 370 mask = LLVMBuildSExt(builder, mask, int_vec_type, ""); 371 } 372 /* 373 * There's only float blend in AVX but can just cast i32/i64 374 * to float. 375 */ 376 if (type.width * type.length == 256) { 377 if (type.width == 64) { 378 intrinsic = "llvm.x86.avx.blendv.pd.256"; 379 arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); 380 } 381 else if (type.width == 32) { 382 intrinsic = "llvm.x86.avx.blendv.ps.256"; 383 arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); 384 } else { 385 assert(util_get_cpu_caps()->has_avx2); 386 intrinsic = "llvm.x86.avx2.pblendvb"; 387 arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32); 388 } 389 } 390 else if (type.floating && 391 type.width == 64) { 392 intrinsic = "llvm.x86.sse41.blendvpd"; 393 arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); 394 } else if (type.floating && 395 type.width == 32) { 396 intrinsic = "llvm.x86.sse41.blendvps"; 397 arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); 398 } else { 399 intrinsic = "llvm.x86.sse41.pblendvb"; 400 arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); 401 } 402 403 if (arg_type != bld->int_vec_type) { 404 mask = LLVMBuildBitCast(builder, mask, arg_type, ""); 405 } 406 407 if (arg_type != bld->vec_type) { 408 a = LLVMBuildBitCast(builder, a, arg_type, ""); 409 b = LLVMBuildBitCast(builder, b, arg_type, ""); 410 } 411 412 args[0] = b; 413 args[1] = a; 414 args[2] = mask; 415 416 res = lp_build_intrinsic(builder, intrinsic, 417 arg_type, args, ARRAY_SIZE(args), 0); 418 419 if (arg_type != bld->vec_type) { 420 res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); 421 } 422 } 423 else { 424 res = lp_build_select_bitwise(bld, mask, a, b); 425 } 426 427 return res; 428} 429 430 431/** 432 * Return mask ? a : b; 433 * 434 * mask is a TGSI_WRITEMASK_xxx. 435 */ 436LLVMValueRef 437lp_build_select_aos(struct lp_build_context *bld, 438 unsigned mask, 439 LLVMValueRef a, 440 LLVMValueRef b, 441 unsigned num_channels) 442{ 443 LLVMBuilderRef builder = bld->gallivm->builder; 444 const struct lp_type type = bld->type; 445 const unsigned n = type.length; 446 447 assert((mask & ~0xf) == 0); 448 assert(lp_check_value(type, a)); 449 assert(lp_check_value(type, b)); 450 451 if (a == b) 452 return a; 453 if ((mask & 0xf) == 0xf) 454 return a; 455 if ((mask & 0xf) == 0x0) 456 return b; 457 if (a == bld->undef || b == bld->undef) 458 return bld->undef; 459 460 /* 461 * There are two major ways of accomplishing this: 462 * - with a shuffle 463 * - with a select 464 * 465 * The flip between these is empirical and might need to be adjusted. 466 */ 467 if (n <= 4) { 468 /* 469 * Shuffle. 470 */ 471 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 472 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 473 474 for (unsigned j = 0; j < n; j += num_channels) 475 for (unsigned i = 0; i < num_channels; ++i) 476 shuffles[j + i] = LLVMConstInt(elem_type, 477 (mask & (1 << i) ? 0 : n) + j + i, 478 0); 479 480 return LLVMBuildShuffleVector(builder, a, b, 481 LLVMConstVector(shuffles, n), ""); 482 } 483 else { 484 LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, 485 type, mask, num_channels); 486 return lp_build_select(bld, mask_vec, a, b); 487 } 488} 489 490 491/** 492 * Return (scalar-cast)val ? true : false; 493 */ 494LLVMValueRef 495lp_build_any_true_range(struct lp_build_context *bld, 496 unsigned real_length, 497 LLVMValueRef val) 498{ 499 LLVMBuilderRef builder = bld->gallivm->builder; 500 LLVMTypeRef scalar_type; 501 LLVMTypeRef true_type; 502 503 assert(real_length <= bld->type.length); 504 505 true_type = LLVMIntTypeInContext(bld->gallivm->context, 506 bld->type.width * real_length); 507 scalar_type = LLVMIntTypeInContext(bld->gallivm->context, 508 bld->type.width * bld->type.length); 509 val = LLVMBuildBitCast(builder, val, scalar_type, ""); 510 /* 511 * We're using always native types so we can use intrinsics. 512 * However, if we don't do per-element calculations, we must ensure 513 * the excess elements aren't used since they may contain garbage. 514 */ 515 if (real_length < bld->type.length) { 516 val = LLVMBuildTrunc(builder, val, true_type, ""); 517 } 518 return LLVMBuildICmp(builder, LLVMIntNE, 519 val, LLVMConstNull(true_type), ""); 520} 521