1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2009 VMware, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the 8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 12bf215546Sopenharmony_ci * the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 16bf215546Sopenharmony_ci * of the Software. 17bf215546Sopenharmony_ci * 18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci **************************************************************************/ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/** 29bf215546Sopenharmony_ci * @file 30bf215546Sopenharmony_ci * Helper functions for logical operations. 31bf215546Sopenharmony_ci * 32bf215546Sopenharmony_ci * @author Jose Fonseca <jfonseca@vmware.com> 33bf215546Sopenharmony_ci */ 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include <llvm/Config/llvm-config.h> 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci#include "util/u_cpu_detect.h" 38bf215546Sopenharmony_ci#include "util/u_memory.h" 39bf215546Sopenharmony_ci#include "util/u_debug.h" 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_ci#include "lp_bld_type.h" 42bf215546Sopenharmony_ci#include "lp_bld_const.h" 43bf215546Sopenharmony_ci#include "lp_bld_swizzle.h" 44bf215546Sopenharmony_ci#include "lp_bld_init.h" 45bf215546Sopenharmony_ci#include "lp_bld_intr.h" 46bf215546Sopenharmony_ci#include "lp_bld_debug.h" 47bf215546Sopenharmony_ci#include "lp_bld_logic.h" 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci/* 51bf215546Sopenharmony_ci * XXX 52bf215546Sopenharmony_ci * 53bf215546Sopenharmony_ci * Selection with vector conditional like 54bf215546Sopenharmony_ci * 55bf215546Sopenharmony_ci * select <4 x i1> %C, %A, %B 56bf215546Sopenharmony_ci * 57bf215546Sopenharmony_ci * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only 58bf215546Sopenharmony_ci * supported on some backends (x86) starting with llvm 3.1. 59bf215546Sopenharmony_ci * 60bf215546Sopenharmony_ci * Expanding the boolean vector to full SIMD register width, as in 61bf215546Sopenharmony_ci * 62bf215546Sopenharmony_ci * sext <4 x i1> %C to <4 x i32> 63bf215546Sopenharmony_ci * 64bf215546Sopenharmony_ci * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but 65bf215546Sopenharmony_ci * it causes assertion failures in LLVM 2.6. It appears to work correctly on 66bf215546Sopenharmony_ci * LLVM 2.7. 67bf215546Sopenharmony_ci */ 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci/** 71bf215546Sopenharmony_ci * Build code to compare two values 'a' and 'b' of 'type' using the given func. 72bf215546Sopenharmony_ci * \param func one of PIPE_FUNC_x 73bf215546Sopenharmony_ci * If the ordered argument is true the function will use LLVM's ordered 74bf215546Sopenharmony_ci * comparisons, otherwise unordered comparisons will be used. 75bf215546Sopenharmony_ci * The result values will be 0 for false or ~0 for true. 76bf215546Sopenharmony_ci */ 77bf215546Sopenharmony_cistatic LLVMValueRef 78bf215546Sopenharmony_cilp_build_compare_ext(struct gallivm_state *gallivm, 79bf215546Sopenharmony_ci const struct lp_type type, 80bf215546Sopenharmony_ci enum pipe_compare_func func, 81bf215546Sopenharmony_ci LLVMValueRef a, 82bf215546Sopenharmony_ci LLVMValueRef b, 83bf215546Sopenharmony_ci boolean ordered) 84bf215546Sopenharmony_ci{ 85bf215546Sopenharmony_ci LLVMBuilderRef builder = gallivm->builder; 86bf215546Sopenharmony_ci LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 87bf215546Sopenharmony_ci LLVMValueRef zeros = LLVMConstNull(int_vec_type); 88bf215546Sopenharmony_ci LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 89bf215546Sopenharmony_ci LLVMValueRef cond; 90bf215546Sopenharmony_ci LLVMValueRef res; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci assert(lp_check_value(type, a)); 93bf215546Sopenharmony_ci assert(lp_check_value(type, b)); 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci if (func == PIPE_FUNC_NEVER) 96bf215546Sopenharmony_ci return zeros; 97bf215546Sopenharmony_ci if (func == PIPE_FUNC_ALWAYS) 98bf215546Sopenharmony_ci return ones; 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci assert(func > PIPE_FUNC_NEVER); 101bf215546Sopenharmony_ci assert(func < PIPE_FUNC_ALWAYS); 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci if (type.floating) { 104bf215546Sopenharmony_ci LLVMRealPredicate op; 105bf215546Sopenharmony_ci switch(func) { 106bf215546Sopenharmony_ci case PIPE_FUNC_EQUAL: 107bf215546Sopenharmony_ci op = ordered ? LLVMRealOEQ : LLVMRealUEQ; 108bf215546Sopenharmony_ci break; 109bf215546Sopenharmony_ci case PIPE_FUNC_NOTEQUAL: 110bf215546Sopenharmony_ci op = ordered ? LLVMRealONE : LLVMRealUNE; 111bf215546Sopenharmony_ci break; 112bf215546Sopenharmony_ci case PIPE_FUNC_LESS: 113bf215546Sopenharmony_ci op = ordered ? LLVMRealOLT : LLVMRealULT; 114bf215546Sopenharmony_ci break; 115bf215546Sopenharmony_ci case PIPE_FUNC_LEQUAL: 116bf215546Sopenharmony_ci op = ordered ? LLVMRealOLE : LLVMRealULE; 117bf215546Sopenharmony_ci break; 118bf215546Sopenharmony_ci case PIPE_FUNC_GREATER: 119bf215546Sopenharmony_ci op = ordered ? LLVMRealOGT : LLVMRealUGT; 120bf215546Sopenharmony_ci break; 121bf215546Sopenharmony_ci case PIPE_FUNC_GEQUAL: 122bf215546Sopenharmony_ci op = ordered ? LLVMRealOGE : LLVMRealUGE; 123bf215546Sopenharmony_ci break; 124bf215546Sopenharmony_ci default: 125bf215546Sopenharmony_ci assert(0); 126bf215546Sopenharmony_ci return lp_build_undef(gallivm, type); 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci cond = LLVMBuildFCmp(builder, op, a, b, ""); 130bf215546Sopenharmony_ci res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 131bf215546Sopenharmony_ci } 132bf215546Sopenharmony_ci else { 133bf215546Sopenharmony_ci LLVMIntPredicate op; 134bf215546Sopenharmony_ci switch(func) { 135bf215546Sopenharmony_ci case PIPE_FUNC_EQUAL: 136bf215546Sopenharmony_ci op = LLVMIntEQ; 137bf215546Sopenharmony_ci break; 138bf215546Sopenharmony_ci case PIPE_FUNC_NOTEQUAL: 139bf215546Sopenharmony_ci op = LLVMIntNE; 140bf215546Sopenharmony_ci break; 141bf215546Sopenharmony_ci case PIPE_FUNC_LESS: 142bf215546Sopenharmony_ci op = type.sign ? LLVMIntSLT : LLVMIntULT; 143bf215546Sopenharmony_ci break; 144bf215546Sopenharmony_ci case PIPE_FUNC_LEQUAL: 145bf215546Sopenharmony_ci op = type.sign ? LLVMIntSLE : LLVMIntULE; 146bf215546Sopenharmony_ci break; 147bf215546Sopenharmony_ci case PIPE_FUNC_GREATER: 148bf215546Sopenharmony_ci op = type.sign ? LLVMIntSGT : LLVMIntUGT; 149bf215546Sopenharmony_ci break; 150bf215546Sopenharmony_ci case PIPE_FUNC_GEQUAL: 151bf215546Sopenharmony_ci op = type.sign ? LLVMIntSGE : LLVMIntUGE; 152bf215546Sopenharmony_ci break; 153bf215546Sopenharmony_ci default: 154bf215546Sopenharmony_ci assert(0); 155bf215546Sopenharmony_ci return lp_build_undef(gallivm, type); 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci cond = LLVMBuildICmp(builder, op, a, b, ""); 159bf215546Sopenharmony_ci res = LLVMBuildSExt(builder, cond, int_vec_type, ""); 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci return res; 163bf215546Sopenharmony_ci} 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci/** 166bf215546Sopenharmony_ci * Build code to compare two values 'a' and 'b' of 'type' using the given func. 167bf215546Sopenharmony_ci * \param func one of PIPE_FUNC_x 168bf215546Sopenharmony_ci * The result values will be 0 for false or ~0 for true. 169bf215546Sopenharmony_ci */ 170bf215546Sopenharmony_ciLLVMValueRef 171bf215546Sopenharmony_cilp_build_compare(struct gallivm_state *gallivm, 172bf215546Sopenharmony_ci const struct lp_type type, 173bf215546Sopenharmony_ci enum pipe_compare_func func, 174bf215546Sopenharmony_ci LLVMValueRef a, 175bf215546Sopenharmony_ci LLVMValueRef b) 176bf215546Sopenharmony_ci{ 177bf215546Sopenharmony_ci LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); 178bf215546Sopenharmony_ci LLVMValueRef zeros = LLVMConstNull(int_vec_type); 179bf215546Sopenharmony_ci LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci assert(lp_check_value(type, a)); 182bf215546Sopenharmony_ci assert(lp_check_value(type, b)); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci if (func == PIPE_FUNC_NEVER) 185bf215546Sopenharmony_ci return zeros; 186bf215546Sopenharmony_ci if (func == PIPE_FUNC_ALWAYS) 187bf215546Sopenharmony_ci return ones; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci assert(func > PIPE_FUNC_NEVER); 190bf215546Sopenharmony_ci assert(func < PIPE_FUNC_ALWAYS); 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) 193bf215546Sopenharmony_ci /* 194bf215546Sopenharmony_ci * There are no unsigned integer comparison instructions in SSE. 195bf215546Sopenharmony_ci */ 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci if (!type.floating && !type.sign && 198bf215546Sopenharmony_ci type.width * type.length == 128 && 199bf215546Sopenharmony_ci util_get_cpu_caps()->has_sse2 && 200bf215546Sopenharmony_ci (func == PIPE_FUNC_LESS || 201bf215546Sopenharmony_ci func == PIPE_FUNC_LEQUAL || 202bf215546Sopenharmony_ci func == PIPE_FUNC_GREATER || 203bf215546Sopenharmony_ci func == PIPE_FUNC_GEQUAL) && 204bf215546Sopenharmony_ci (gallivm_debug & GALLIVM_DEBUG_PERF)) { 205bf215546Sopenharmony_ci debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", 206bf215546Sopenharmony_ci __FUNCTION__, type.length, type.width); 207bf215546Sopenharmony_ci } 208bf215546Sopenharmony_ci#endif 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci return lp_build_compare_ext(gallivm, type, func, a, b, FALSE); 211bf215546Sopenharmony_ci} 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci/** 214bf215546Sopenharmony_ci * Build code to compare two values 'a' and 'b' using the given func. 215bf215546Sopenharmony_ci * \param func one of PIPE_FUNC_x 216bf215546Sopenharmony_ci * If the operands are floating point numbers, the function will use 217bf215546Sopenharmony_ci * ordered comparison which means that it will return true if both 218bf215546Sopenharmony_ci * operands are not a NaN and the specified condition evaluates to true. 219bf215546Sopenharmony_ci * The result values will be 0 for false or ~0 for true. 220bf215546Sopenharmony_ci */ 221bf215546Sopenharmony_ciLLVMValueRef 222bf215546Sopenharmony_cilp_build_cmp_ordered(struct lp_build_context *bld, 223bf215546Sopenharmony_ci enum pipe_compare_func func, 224bf215546Sopenharmony_ci LLVMValueRef a, 225bf215546Sopenharmony_ci LLVMValueRef b) 226bf215546Sopenharmony_ci{ 227bf215546Sopenharmony_ci return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE); 228bf215546Sopenharmony_ci} 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci/** 231bf215546Sopenharmony_ci * Build code to compare two values 'a' and 'b' using the given func. 232bf215546Sopenharmony_ci * \param func one of PIPE_FUNC_x 233bf215546Sopenharmony_ci * If the operands are floating point numbers, the function will use 234bf215546Sopenharmony_ci * unordered comparison which means that it will return true if either 235bf215546Sopenharmony_ci * operand is a NaN or the specified condition evaluates to true. 236bf215546Sopenharmony_ci * The result values will be 0 for false or ~0 for true. 237bf215546Sopenharmony_ci */ 238bf215546Sopenharmony_ciLLVMValueRef 239bf215546Sopenharmony_cilp_build_cmp(struct lp_build_context *bld, 240bf215546Sopenharmony_ci enum pipe_compare_func func, 241bf215546Sopenharmony_ci LLVMValueRef a, 242bf215546Sopenharmony_ci LLVMValueRef b) 243bf215546Sopenharmony_ci{ 244bf215546Sopenharmony_ci return lp_build_compare(bld->gallivm, bld->type, func, a, b); 245bf215546Sopenharmony_ci} 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci/** 249bf215546Sopenharmony_ci * Return (mask & a) | (~mask & b); 250bf215546Sopenharmony_ci */ 251bf215546Sopenharmony_ciLLVMValueRef 252bf215546Sopenharmony_cilp_build_select_bitwise(struct lp_build_context *bld, 253bf215546Sopenharmony_ci LLVMValueRef mask, 254bf215546Sopenharmony_ci LLVMValueRef a, 255bf215546Sopenharmony_ci LLVMValueRef b) 256bf215546Sopenharmony_ci{ 257bf215546Sopenharmony_ci LLVMBuilderRef builder = bld->gallivm->builder; 258bf215546Sopenharmony_ci struct lp_type type = bld->type; 259bf215546Sopenharmony_ci LLVMValueRef res; 260bf215546Sopenharmony_ci LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci assert(lp_check_value(type, a)); 263bf215546Sopenharmony_ci assert(lp_check_value(type, b)); 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci if (a == b) { 266bf215546Sopenharmony_ci return a; 267bf215546Sopenharmony_ci } 268bf215546Sopenharmony_ci 269bf215546Sopenharmony_ci if (type.floating) { 270bf215546Sopenharmony_ci a = LLVMBuildBitCast(builder, a, int_vec_type, ""); 271bf215546Sopenharmony_ci b = LLVMBuildBitCast(builder, b, int_vec_type, ""); 272bf215546Sopenharmony_ci } 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci if (type.width > 32) 275bf215546Sopenharmony_ci mask = LLVMBuildSExt(builder, mask, int_vec_type, ""); 276bf215546Sopenharmony_ci a = LLVMBuildAnd(builder, a, mask, ""); 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci /* This often gets translated to PANDN, but sometimes the NOT is 279bf215546Sopenharmony_ci * pre-computed and stored in another constant. The best strategy depends 280bf215546Sopenharmony_ci * on available registers, so it is not a big deal -- hopefully LLVM does 281bf215546Sopenharmony_ci * the right decision attending the rest of the program. 282bf215546Sopenharmony_ci */ 283bf215546Sopenharmony_ci b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci res = LLVMBuildOr(builder, a, b, ""); 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci if (type.floating) { 288bf215546Sopenharmony_ci LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); 289bf215546Sopenharmony_ci res = LLVMBuildBitCast(builder, res, vec_type, ""); 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci return res; 293bf215546Sopenharmony_ci} 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci/** 297bf215546Sopenharmony_ci * Return mask ? a : b; 298bf215546Sopenharmony_ci * 299bf215546Sopenharmony_ci * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value 300bf215546Sopenharmony_ci * will yield unpredictable results. 301bf215546Sopenharmony_ci */ 302bf215546Sopenharmony_ciLLVMValueRef 303bf215546Sopenharmony_cilp_build_select(struct lp_build_context *bld, 304bf215546Sopenharmony_ci LLVMValueRef mask, 305bf215546Sopenharmony_ci LLVMValueRef a, 306bf215546Sopenharmony_ci LLVMValueRef b) 307bf215546Sopenharmony_ci{ 308bf215546Sopenharmony_ci LLVMBuilderRef builder = bld->gallivm->builder; 309bf215546Sopenharmony_ci LLVMContextRef lc = bld->gallivm->context; 310bf215546Sopenharmony_ci struct lp_type type = bld->type; 311bf215546Sopenharmony_ci LLVMValueRef res; 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci assert(lp_check_value(type, a)); 314bf215546Sopenharmony_ci assert(lp_check_value(type, b)); 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci if (a == b) 317bf215546Sopenharmony_ci return a; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci if (type.length == 1) { 320bf215546Sopenharmony_ci mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); 321bf215546Sopenharmony_ci res = LLVMBuildSelect(builder, mask, a, b, ""); 322bf215546Sopenharmony_ci } 323bf215546Sopenharmony_ci else if (LLVMIsConstant(mask) || 324bf215546Sopenharmony_ci LLVMGetInstructionOpcode(mask) == LLVMSExt) { 325bf215546Sopenharmony_ci /* Generate a vector select. 326bf215546Sopenharmony_ci * 327bf215546Sopenharmony_ci * Using vector selects should avoid emitting intrinsics hence avoid 328bf215546Sopenharmony_ci * hindering optimization passes, but vector selects weren't properly 329bf215546Sopenharmony_ci * supported yet for a long time, and LLVM will generate poor code when 330bf215546Sopenharmony_ci * the mask is not the result of a comparison. 331bf215546Sopenharmony_ci * XXX: Even if the instruction was an SExt, this may still produce 332bf215546Sopenharmony_ci * terrible code. Try piglit stencil-twoside. 333bf215546Sopenharmony_ci */ 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci /* Convert the mask to a vector of booleans. 336bf215546Sopenharmony_ci * 337bf215546Sopenharmony_ci * XXX: In x86 the mask is controlled by the MSB, so if we shifted the 338bf215546Sopenharmony_ci * mask by `type.width - 1`, LLVM should realize the mask is ready. Alas 339bf215546Sopenharmony_ci * what really happens is that LLVM will emit two shifts back to back. 340bf215546Sopenharmony_ci */ 341bf215546Sopenharmony_ci if (0) { 342bf215546Sopenharmony_ci LLVMValueRef shift = 343bf215546Sopenharmony_ci LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0); 344bf215546Sopenharmony_ci shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift); 345bf215546Sopenharmony_ci mask = LLVMBuildLShr(builder, mask, shift, ""); 346bf215546Sopenharmony_ci } 347bf215546Sopenharmony_ci LLVMTypeRef bool_vec_type = 348bf215546Sopenharmony_ci LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); 349bf215546Sopenharmony_ci mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci res = LLVMBuildSelect(builder, mask, a, b, ""); 352bf215546Sopenharmony_ci } 353bf215546Sopenharmony_ci else if (((util_get_cpu_caps()->has_sse4_1 && 354bf215546Sopenharmony_ci type.width * type.length == 128) || 355bf215546Sopenharmony_ci (util_get_cpu_caps()->has_avx && 356bf215546Sopenharmony_ci type.width * type.length == 256 && type.width >= 32) || 357bf215546Sopenharmony_ci (util_get_cpu_caps()->has_avx2 && 358bf215546Sopenharmony_ci type.width * type.length == 256)) && 359bf215546Sopenharmony_ci !LLVMIsConstant(a) && 360bf215546Sopenharmony_ci !LLVMIsConstant(b) && 361bf215546Sopenharmony_ci !LLVMIsConstant(mask)) { 362bf215546Sopenharmony_ci const char *intrinsic; 363bf215546Sopenharmony_ci LLVMTypeRef arg_type; 364bf215546Sopenharmony_ci LLVMValueRef args[3]; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci LLVMTypeRef mask_type = LLVMGetElementType(LLVMTypeOf(mask)); 367bf215546Sopenharmony_ci if (LLVMGetIntTypeWidth(mask_type) != type.width) { 368bf215546Sopenharmony_ci LLVMTypeRef int_vec_type = 369bf215546Sopenharmony_ci LLVMVectorType(LLVMIntTypeInContext(lc, type.width), type.length); 370bf215546Sopenharmony_ci mask = LLVMBuildSExt(builder, mask, int_vec_type, ""); 371bf215546Sopenharmony_ci } 372bf215546Sopenharmony_ci /* 373bf215546Sopenharmony_ci * There's only float blend in AVX but can just cast i32/i64 374bf215546Sopenharmony_ci * to float. 375bf215546Sopenharmony_ci */ 376bf215546Sopenharmony_ci if (type.width * type.length == 256) { 377bf215546Sopenharmony_ci if (type.width == 64) { 378bf215546Sopenharmony_ci intrinsic = "llvm.x86.avx.blendv.pd.256"; 379bf215546Sopenharmony_ci arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); 380bf215546Sopenharmony_ci } 381bf215546Sopenharmony_ci else if (type.width == 32) { 382bf215546Sopenharmony_ci intrinsic = "llvm.x86.avx.blendv.ps.256"; 383bf215546Sopenharmony_ci arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); 384bf215546Sopenharmony_ci } else { 385bf215546Sopenharmony_ci assert(util_get_cpu_caps()->has_avx2); 386bf215546Sopenharmony_ci intrinsic = "llvm.x86.avx2.pblendvb"; 387bf215546Sopenharmony_ci arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32); 388bf215546Sopenharmony_ci } 389bf215546Sopenharmony_ci } 390bf215546Sopenharmony_ci else if (type.floating && 391bf215546Sopenharmony_ci type.width == 64) { 392bf215546Sopenharmony_ci intrinsic = "llvm.x86.sse41.blendvpd"; 393bf215546Sopenharmony_ci arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); 394bf215546Sopenharmony_ci } else if (type.floating && 395bf215546Sopenharmony_ci type.width == 32) { 396bf215546Sopenharmony_ci intrinsic = "llvm.x86.sse41.blendvps"; 397bf215546Sopenharmony_ci arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); 398bf215546Sopenharmony_ci } else { 399bf215546Sopenharmony_ci intrinsic = "llvm.x86.sse41.pblendvb"; 400bf215546Sopenharmony_ci arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci if (arg_type != bld->int_vec_type) { 404bf215546Sopenharmony_ci mask = LLVMBuildBitCast(builder, mask, arg_type, ""); 405bf215546Sopenharmony_ci } 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci if (arg_type != bld->vec_type) { 408bf215546Sopenharmony_ci a = LLVMBuildBitCast(builder, a, arg_type, ""); 409bf215546Sopenharmony_ci b = LLVMBuildBitCast(builder, b, arg_type, ""); 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci args[0] = b; 413bf215546Sopenharmony_ci args[1] = a; 414bf215546Sopenharmony_ci args[2] = mask; 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci res = lp_build_intrinsic(builder, intrinsic, 417bf215546Sopenharmony_ci arg_type, args, ARRAY_SIZE(args), 0); 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci if (arg_type != bld->vec_type) { 420bf215546Sopenharmony_ci res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); 421bf215546Sopenharmony_ci } 422bf215546Sopenharmony_ci } 423bf215546Sopenharmony_ci else { 424bf215546Sopenharmony_ci res = lp_build_select_bitwise(bld, mask, a, b); 425bf215546Sopenharmony_ci } 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci return res; 428bf215546Sopenharmony_ci} 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci/** 432bf215546Sopenharmony_ci * Return mask ? a : b; 433bf215546Sopenharmony_ci * 434bf215546Sopenharmony_ci * mask is a TGSI_WRITEMASK_xxx. 435bf215546Sopenharmony_ci */ 436bf215546Sopenharmony_ciLLVMValueRef 437bf215546Sopenharmony_cilp_build_select_aos(struct lp_build_context *bld, 438bf215546Sopenharmony_ci unsigned mask, 439bf215546Sopenharmony_ci LLVMValueRef a, 440bf215546Sopenharmony_ci LLVMValueRef b, 441bf215546Sopenharmony_ci unsigned num_channels) 442bf215546Sopenharmony_ci{ 443bf215546Sopenharmony_ci LLVMBuilderRef builder = bld->gallivm->builder; 444bf215546Sopenharmony_ci const struct lp_type type = bld->type; 445bf215546Sopenharmony_ci const unsigned n = type.length; 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci assert((mask & ~0xf) == 0); 448bf215546Sopenharmony_ci assert(lp_check_value(type, a)); 449bf215546Sopenharmony_ci assert(lp_check_value(type, b)); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci if (a == b) 452bf215546Sopenharmony_ci return a; 453bf215546Sopenharmony_ci if ((mask & 0xf) == 0xf) 454bf215546Sopenharmony_ci return a; 455bf215546Sopenharmony_ci if ((mask & 0xf) == 0x0) 456bf215546Sopenharmony_ci return b; 457bf215546Sopenharmony_ci if (a == bld->undef || b == bld->undef) 458bf215546Sopenharmony_ci return bld->undef; 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci /* 461bf215546Sopenharmony_ci * There are two major ways of accomplishing this: 462bf215546Sopenharmony_ci * - with a shuffle 463bf215546Sopenharmony_ci * - with a select 464bf215546Sopenharmony_ci * 465bf215546Sopenharmony_ci * The flip between these is empirical and might need to be adjusted. 466bf215546Sopenharmony_ci */ 467bf215546Sopenharmony_ci if (n <= 4) { 468bf215546Sopenharmony_ci /* 469bf215546Sopenharmony_ci * Shuffle. 470bf215546Sopenharmony_ci */ 471bf215546Sopenharmony_ci LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 472bf215546Sopenharmony_ci LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci for (unsigned j = 0; j < n; j += num_channels) 475bf215546Sopenharmony_ci for (unsigned i = 0; i < num_channels; ++i) 476bf215546Sopenharmony_ci shuffles[j + i] = LLVMConstInt(elem_type, 477bf215546Sopenharmony_ci (mask & (1 << i) ? 0 : n) + j + i, 478bf215546Sopenharmony_ci 0); 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci return LLVMBuildShuffleVector(builder, a, b, 481bf215546Sopenharmony_ci LLVMConstVector(shuffles, n), ""); 482bf215546Sopenharmony_ci } 483bf215546Sopenharmony_ci else { 484bf215546Sopenharmony_ci LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, 485bf215546Sopenharmony_ci type, mask, num_channels); 486bf215546Sopenharmony_ci return lp_build_select(bld, mask_vec, a, b); 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci} 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci/** 492bf215546Sopenharmony_ci * Return (scalar-cast)val ? true : false; 493bf215546Sopenharmony_ci */ 494bf215546Sopenharmony_ciLLVMValueRef 495bf215546Sopenharmony_cilp_build_any_true_range(struct lp_build_context *bld, 496bf215546Sopenharmony_ci unsigned real_length, 497bf215546Sopenharmony_ci LLVMValueRef val) 498bf215546Sopenharmony_ci{ 499bf215546Sopenharmony_ci LLVMBuilderRef builder = bld->gallivm->builder; 500bf215546Sopenharmony_ci LLVMTypeRef scalar_type; 501bf215546Sopenharmony_ci LLVMTypeRef true_type; 502bf215546Sopenharmony_ci 503bf215546Sopenharmony_ci assert(real_length <= bld->type.length); 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci true_type = LLVMIntTypeInContext(bld->gallivm->context, 506bf215546Sopenharmony_ci bld->type.width * real_length); 507bf215546Sopenharmony_ci scalar_type = LLVMIntTypeInContext(bld->gallivm->context, 508bf215546Sopenharmony_ci bld->type.width * bld->type.length); 509bf215546Sopenharmony_ci val = LLVMBuildBitCast(builder, val, scalar_type, ""); 510bf215546Sopenharmony_ci /* 511bf215546Sopenharmony_ci * We're using always native types so we can use intrinsics. 512bf215546Sopenharmony_ci * However, if we don't do per-element calculations, we must ensure 513bf215546Sopenharmony_ci * the excess elements aren't used since they may contain garbage. 514bf215546Sopenharmony_ci */ 515bf215546Sopenharmony_ci if (real_length < bld->type.length) { 516bf215546Sopenharmony_ci val = LLVMBuildTrunc(builder, val, true_type, ""); 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci return LLVMBuildICmp(builder, LLVMIntNE, 519bf215546Sopenharmony_ci val, LLVMConstNull(true_type), ""); 520bf215546Sopenharmony_ci} 521