1bf215546Sopenharmony_ci/**************************************************************************
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright 2009 VMware, Inc.
4bf215546Sopenharmony_ci * All Rights Reserved.
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the
8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to
11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
12bf215546Sopenharmony_ci * the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
16bf215546Sopenharmony_ci * of the Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci **************************************************************************/
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/**
29bf215546Sopenharmony_ci * @file
30bf215546Sopenharmony_ci * Helper functions for logical operations.
31bf215546Sopenharmony_ci *
32bf215546Sopenharmony_ci * @author Jose Fonseca <jfonseca@vmware.com>
33bf215546Sopenharmony_ci */
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci#include <llvm/Config/llvm-config.h>
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci#include "util/u_cpu_detect.h"
38bf215546Sopenharmony_ci#include "util/u_memory.h"
39bf215546Sopenharmony_ci#include "util/u_debug.h"
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ci#include "lp_bld_type.h"
42bf215546Sopenharmony_ci#include "lp_bld_const.h"
43bf215546Sopenharmony_ci#include "lp_bld_swizzle.h"
44bf215546Sopenharmony_ci#include "lp_bld_init.h"
45bf215546Sopenharmony_ci#include "lp_bld_intr.h"
46bf215546Sopenharmony_ci#include "lp_bld_debug.h"
47bf215546Sopenharmony_ci#include "lp_bld_logic.h"
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_ci/*
51bf215546Sopenharmony_ci * XXX
52bf215546Sopenharmony_ci *
53bf215546Sopenharmony_ci * Selection with vector conditional like
54bf215546Sopenharmony_ci *
55bf215546Sopenharmony_ci *    select <4 x i1> %C, %A, %B
56bf215546Sopenharmony_ci *
57bf215546Sopenharmony_ci * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only
58bf215546Sopenharmony_ci * supported on some backends (x86) starting with llvm 3.1.
59bf215546Sopenharmony_ci *
60bf215546Sopenharmony_ci * Expanding the boolean vector to full SIMD register width, as in
61bf215546Sopenharmony_ci *
62bf215546Sopenharmony_ci *    sext <4 x i1> %C to <4 x i32>
63bf215546Sopenharmony_ci *
64bf215546Sopenharmony_ci * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but
65bf215546Sopenharmony_ci * it causes assertion failures in LLVM 2.6. It appears to work correctly on
66bf215546Sopenharmony_ci * LLVM 2.7.
67bf215546Sopenharmony_ci */
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ci/**
71bf215546Sopenharmony_ci * Build code to compare two values 'a' and 'b' of 'type' using the given func.
72bf215546Sopenharmony_ci * \param func  one of PIPE_FUNC_x
73bf215546Sopenharmony_ci * If the ordered argument is true the function will use LLVM's ordered
74bf215546Sopenharmony_ci * comparisons, otherwise unordered comparisons will be used.
75bf215546Sopenharmony_ci * The result values will be 0 for false or ~0 for true.
76bf215546Sopenharmony_ci */
77bf215546Sopenharmony_cistatic LLVMValueRef
78bf215546Sopenharmony_cilp_build_compare_ext(struct gallivm_state *gallivm,
79bf215546Sopenharmony_ci                     const struct lp_type type,
80bf215546Sopenharmony_ci                     enum pipe_compare_func func,
81bf215546Sopenharmony_ci                     LLVMValueRef a,
82bf215546Sopenharmony_ci                     LLVMValueRef b,
83bf215546Sopenharmony_ci                     boolean ordered)
84bf215546Sopenharmony_ci{
85bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
86bf215546Sopenharmony_ci   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
87bf215546Sopenharmony_ci   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
88bf215546Sopenharmony_ci   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
89bf215546Sopenharmony_ci   LLVMValueRef cond;
90bf215546Sopenharmony_ci   LLVMValueRef res;
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci   assert(lp_check_value(type, a));
93bf215546Sopenharmony_ci   assert(lp_check_value(type, b));
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci   if (func == PIPE_FUNC_NEVER)
96bf215546Sopenharmony_ci      return zeros;
97bf215546Sopenharmony_ci   if (func == PIPE_FUNC_ALWAYS)
98bf215546Sopenharmony_ci      return ones;
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci   assert(func > PIPE_FUNC_NEVER);
101bf215546Sopenharmony_ci   assert(func < PIPE_FUNC_ALWAYS);
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci   if (type.floating) {
104bf215546Sopenharmony_ci      LLVMRealPredicate op;
105bf215546Sopenharmony_ci      switch(func) {
106bf215546Sopenharmony_ci      case PIPE_FUNC_EQUAL:
107bf215546Sopenharmony_ci         op = ordered ? LLVMRealOEQ : LLVMRealUEQ;
108bf215546Sopenharmony_ci         break;
109bf215546Sopenharmony_ci      case PIPE_FUNC_NOTEQUAL:
110bf215546Sopenharmony_ci         op = ordered ? LLVMRealONE : LLVMRealUNE;
111bf215546Sopenharmony_ci         break;
112bf215546Sopenharmony_ci      case PIPE_FUNC_LESS:
113bf215546Sopenharmony_ci         op = ordered ? LLVMRealOLT : LLVMRealULT;
114bf215546Sopenharmony_ci         break;
115bf215546Sopenharmony_ci      case PIPE_FUNC_LEQUAL:
116bf215546Sopenharmony_ci         op = ordered ? LLVMRealOLE : LLVMRealULE;
117bf215546Sopenharmony_ci         break;
118bf215546Sopenharmony_ci      case PIPE_FUNC_GREATER:
119bf215546Sopenharmony_ci         op = ordered ? LLVMRealOGT : LLVMRealUGT;
120bf215546Sopenharmony_ci         break;
121bf215546Sopenharmony_ci      case PIPE_FUNC_GEQUAL:
122bf215546Sopenharmony_ci         op = ordered ? LLVMRealOGE : LLVMRealUGE;
123bf215546Sopenharmony_ci         break;
124bf215546Sopenharmony_ci      default:
125bf215546Sopenharmony_ci         assert(0);
126bf215546Sopenharmony_ci         return lp_build_undef(gallivm, type);
127bf215546Sopenharmony_ci      }
128bf215546Sopenharmony_ci
129bf215546Sopenharmony_ci      cond = LLVMBuildFCmp(builder, op, a, b, "");
130bf215546Sopenharmony_ci      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
131bf215546Sopenharmony_ci   }
132bf215546Sopenharmony_ci   else {
133bf215546Sopenharmony_ci      LLVMIntPredicate op;
134bf215546Sopenharmony_ci      switch(func) {
135bf215546Sopenharmony_ci      case PIPE_FUNC_EQUAL:
136bf215546Sopenharmony_ci         op = LLVMIntEQ;
137bf215546Sopenharmony_ci         break;
138bf215546Sopenharmony_ci      case PIPE_FUNC_NOTEQUAL:
139bf215546Sopenharmony_ci         op = LLVMIntNE;
140bf215546Sopenharmony_ci         break;
141bf215546Sopenharmony_ci      case PIPE_FUNC_LESS:
142bf215546Sopenharmony_ci         op = type.sign ? LLVMIntSLT : LLVMIntULT;
143bf215546Sopenharmony_ci         break;
144bf215546Sopenharmony_ci      case PIPE_FUNC_LEQUAL:
145bf215546Sopenharmony_ci         op = type.sign ? LLVMIntSLE : LLVMIntULE;
146bf215546Sopenharmony_ci         break;
147bf215546Sopenharmony_ci      case PIPE_FUNC_GREATER:
148bf215546Sopenharmony_ci         op = type.sign ? LLVMIntSGT : LLVMIntUGT;
149bf215546Sopenharmony_ci         break;
150bf215546Sopenharmony_ci      case PIPE_FUNC_GEQUAL:
151bf215546Sopenharmony_ci         op = type.sign ? LLVMIntSGE : LLVMIntUGE;
152bf215546Sopenharmony_ci         break;
153bf215546Sopenharmony_ci      default:
154bf215546Sopenharmony_ci         assert(0);
155bf215546Sopenharmony_ci         return lp_build_undef(gallivm, type);
156bf215546Sopenharmony_ci      }
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci      cond = LLVMBuildICmp(builder, op, a, b, "");
159bf215546Sopenharmony_ci      res = LLVMBuildSExt(builder, cond, int_vec_type, "");
160bf215546Sopenharmony_ci   }
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   return res;
163bf215546Sopenharmony_ci}
164bf215546Sopenharmony_ci
165bf215546Sopenharmony_ci/**
166bf215546Sopenharmony_ci * Build code to compare two values 'a' and 'b' of 'type' using the given func.
167bf215546Sopenharmony_ci * \param func  one of PIPE_FUNC_x
168bf215546Sopenharmony_ci * The result values will be 0 for false or ~0 for true.
169bf215546Sopenharmony_ci */
170bf215546Sopenharmony_ciLLVMValueRef
171bf215546Sopenharmony_cilp_build_compare(struct gallivm_state *gallivm,
172bf215546Sopenharmony_ci                 const struct lp_type type,
173bf215546Sopenharmony_ci                 enum pipe_compare_func func,
174bf215546Sopenharmony_ci                 LLVMValueRef a,
175bf215546Sopenharmony_ci                 LLVMValueRef b)
176bf215546Sopenharmony_ci{
177bf215546Sopenharmony_ci   LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type);
178bf215546Sopenharmony_ci   LLVMValueRef zeros = LLVMConstNull(int_vec_type);
179bf215546Sopenharmony_ci   LLVMValueRef ones = LLVMConstAllOnes(int_vec_type);
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   assert(lp_check_value(type, a));
182bf215546Sopenharmony_ci   assert(lp_check_value(type, b));
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   if (func == PIPE_FUNC_NEVER)
185bf215546Sopenharmony_ci      return zeros;
186bf215546Sopenharmony_ci   if (func == PIPE_FUNC_ALWAYS)
187bf215546Sopenharmony_ci      return ones;
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   assert(func > PIPE_FUNC_NEVER);
190bf215546Sopenharmony_ci   assert(func < PIPE_FUNC_ALWAYS);
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
193bf215546Sopenharmony_ci   /*
194bf215546Sopenharmony_ci    * There are no unsigned integer comparison instructions in SSE.
195bf215546Sopenharmony_ci    */
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   if (!type.floating && !type.sign &&
198bf215546Sopenharmony_ci       type.width * type.length == 128 &&
199bf215546Sopenharmony_ci       util_get_cpu_caps()->has_sse2 &&
200bf215546Sopenharmony_ci       (func == PIPE_FUNC_LESS ||
201bf215546Sopenharmony_ci        func == PIPE_FUNC_LEQUAL ||
202bf215546Sopenharmony_ci        func == PIPE_FUNC_GREATER ||
203bf215546Sopenharmony_ci        func == PIPE_FUNC_GEQUAL) &&
204bf215546Sopenharmony_ci       (gallivm_debug & GALLIVM_DEBUG_PERF)) {
205bf215546Sopenharmony_ci         debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n",
206bf215546Sopenharmony_ci                      __FUNCTION__, type.length, type.width);
207bf215546Sopenharmony_ci   }
208bf215546Sopenharmony_ci#endif
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   return lp_build_compare_ext(gallivm, type, func, a, b, FALSE);
211bf215546Sopenharmony_ci}
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci/**
214bf215546Sopenharmony_ci * Build code to compare two values 'a' and 'b' using the given func.
215bf215546Sopenharmony_ci * \param func  one of PIPE_FUNC_x
216bf215546Sopenharmony_ci * If the operands are floating point numbers, the function will use
217bf215546Sopenharmony_ci * ordered comparison which means that it will return true if both
218bf215546Sopenharmony_ci * operands are not a NaN and the specified condition evaluates to true.
219bf215546Sopenharmony_ci * The result values will be 0 for false or ~0 for true.
220bf215546Sopenharmony_ci */
221bf215546Sopenharmony_ciLLVMValueRef
222bf215546Sopenharmony_cilp_build_cmp_ordered(struct lp_build_context *bld,
223bf215546Sopenharmony_ci                     enum pipe_compare_func func,
224bf215546Sopenharmony_ci                     LLVMValueRef a,
225bf215546Sopenharmony_ci                     LLVMValueRef b)
226bf215546Sopenharmony_ci{
227bf215546Sopenharmony_ci   return lp_build_compare_ext(bld->gallivm, bld->type, func, a, b, TRUE);
228bf215546Sopenharmony_ci}
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci/**
231bf215546Sopenharmony_ci * Build code to compare two values 'a' and 'b' using the given func.
232bf215546Sopenharmony_ci * \param func  one of PIPE_FUNC_x
233bf215546Sopenharmony_ci * If the operands are floating point numbers, the function will use
234bf215546Sopenharmony_ci * unordered comparison which means that it will return true if either
235bf215546Sopenharmony_ci * operand is a NaN or the specified condition evaluates to true.
236bf215546Sopenharmony_ci * The result values will be 0 for false or ~0 for true.
237bf215546Sopenharmony_ci */
238bf215546Sopenharmony_ciLLVMValueRef
239bf215546Sopenharmony_cilp_build_cmp(struct lp_build_context *bld,
240bf215546Sopenharmony_ci             enum pipe_compare_func func,
241bf215546Sopenharmony_ci             LLVMValueRef a,
242bf215546Sopenharmony_ci             LLVMValueRef b)
243bf215546Sopenharmony_ci{
244bf215546Sopenharmony_ci   return lp_build_compare(bld->gallivm, bld->type, func, a, b);
245bf215546Sopenharmony_ci}
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci/**
249bf215546Sopenharmony_ci * Return (mask & a) | (~mask & b);
250bf215546Sopenharmony_ci */
251bf215546Sopenharmony_ciLLVMValueRef
252bf215546Sopenharmony_cilp_build_select_bitwise(struct lp_build_context *bld,
253bf215546Sopenharmony_ci                        LLVMValueRef mask,
254bf215546Sopenharmony_ci                        LLVMValueRef a,
255bf215546Sopenharmony_ci                        LLVMValueRef b)
256bf215546Sopenharmony_ci{
257bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
258bf215546Sopenharmony_ci   struct lp_type type = bld->type;
259bf215546Sopenharmony_ci   LLVMValueRef res;
260bf215546Sopenharmony_ci   LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type);
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   assert(lp_check_value(type, a));
263bf215546Sopenharmony_ci   assert(lp_check_value(type, b));
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci   if (a == b) {
266bf215546Sopenharmony_ci      return a;
267bf215546Sopenharmony_ci   }
268bf215546Sopenharmony_ci
269bf215546Sopenharmony_ci   if (type.floating) {
270bf215546Sopenharmony_ci      a = LLVMBuildBitCast(builder, a, int_vec_type, "");
271bf215546Sopenharmony_ci      b = LLVMBuildBitCast(builder, b, int_vec_type, "");
272bf215546Sopenharmony_ci   }
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci   if (type.width > 32)
275bf215546Sopenharmony_ci      mask = LLVMBuildSExt(builder, mask, int_vec_type, "");
276bf215546Sopenharmony_ci   a = LLVMBuildAnd(builder, a, mask, "");
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   /* This often gets translated to PANDN, but sometimes the NOT is
279bf215546Sopenharmony_ci    * pre-computed and stored in another constant. The best strategy depends
280bf215546Sopenharmony_ci    * on available registers, so it is not a big deal -- hopefully LLVM does
281bf215546Sopenharmony_ci    * the right decision attending the rest of the program.
282bf215546Sopenharmony_ci    */
283bf215546Sopenharmony_ci   b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), "");
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   res = LLVMBuildOr(builder, a, b, "");
286bf215546Sopenharmony_ci
287bf215546Sopenharmony_ci   if (type.floating) {
288bf215546Sopenharmony_ci      LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type);
289bf215546Sopenharmony_ci      res = LLVMBuildBitCast(builder, res, vec_type, "");
290bf215546Sopenharmony_ci   }
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci   return res;
293bf215546Sopenharmony_ci}
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci/**
297bf215546Sopenharmony_ci * Return mask ? a : b;
298bf215546Sopenharmony_ci *
299bf215546Sopenharmony_ci * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value
300bf215546Sopenharmony_ci * will yield unpredictable results.
301bf215546Sopenharmony_ci */
302bf215546Sopenharmony_ciLLVMValueRef
303bf215546Sopenharmony_cilp_build_select(struct lp_build_context *bld,
304bf215546Sopenharmony_ci                LLVMValueRef mask,
305bf215546Sopenharmony_ci                LLVMValueRef a,
306bf215546Sopenharmony_ci                LLVMValueRef b)
307bf215546Sopenharmony_ci{
308bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
309bf215546Sopenharmony_ci   LLVMContextRef lc = bld->gallivm->context;
310bf215546Sopenharmony_ci   struct lp_type type = bld->type;
311bf215546Sopenharmony_ci   LLVMValueRef res;
312bf215546Sopenharmony_ci
313bf215546Sopenharmony_ci   assert(lp_check_value(type, a));
314bf215546Sopenharmony_ci   assert(lp_check_value(type, b));
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci   if (a == b)
317bf215546Sopenharmony_ci      return a;
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   if (type.length == 1) {
320bf215546Sopenharmony_ci      mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), "");
321bf215546Sopenharmony_ci      res = LLVMBuildSelect(builder, mask, a, b, "");
322bf215546Sopenharmony_ci   }
323bf215546Sopenharmony_ci   else if (LLVMIsConstant(mask) ||
324bf215546Sopenharmony_ci            LLVMGetInstructionOpcode(mask) == LLVMSExt) {
325bf215546Sopenharmony_ci      /* Generate a vector select.
326bf215546Sopenharmony_ci       *
327bf215546Sopenharmony_ci       * Using vector selects should avoid emitting intrinsics hence avoid
328bf215546Sopenharmony_ci       * hindering optimization passes, but vector selects weren't properly
329bf215546Sopenharmony_ci       * supported yet for a long time, and LLVM will generate poor code when
330bf215546Sopenharmony_ci       * the mask is not the result of a comparison.
331bf215546Sopenharmony_ci       * XXX: Even if the instruction was an SExt, this may still produce
332bf215546Sopenharmony_ci       * terrible code. Try piglit stencil-twoside.
333bf215546Sopenharmony_ci       */
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci      /* Convert the mask to a vector of booleans.
336bf215546Sopenharmony_ci       *
337bf215546Sopenharmony_ci       * XXX: In x86 the mask is controlled by the MSB, so if we shifted the
338bf215546Sopenharmony_ci       * mask by `type.width - 1`, LLVM should realize the mask is ready.  Alas
339bf215546Sopenharmony_ci       * what really happens is that LLVM will emit two shifts back to back.
340bf215546Sopenharmony_ci       */
341bf215546Sopenharmony_ci      if (0) {
342bf215546Sopenharmony_ci         LLVMValueRef shift =
343bf215546Sopenharmony_ci            LLVMConstInt(bld->int_elem_type, bld->type.width - 1, 0);
344bf215546Sopenharmony_ci         shift = lp_build_broadcast(bld->gallivm, bld->int_vec_type, shift);
345bf215546Sopenharmony_ci         mask = LLVMBuildLShr(builder, mask, shift, "");
346bf215546Sopenharmony_ci      }
347bf215546Sopenharmony_ci      LLVMTypeRef bool_vec_type =
348bf215546Sopenharmony_ci         LLVMVectorType(LLVMInt1TypeInContext(lc), type.length);
349bf215546Sopenharmony_ci      mask = LLVMBuildTrunc(builder, mask, bool_vec_type, "");
350bf215546Sopenharmony_ci
351bf215546Sopenharmony_ci      res = LLVMBuildSelect(builder, mask, a, b, "");
352bf215546Sopenharmony_ci   }
353bf215546Sopenharmony_ci   else if (((util_get_cpu_caps()->has_sse4_1 &&
354bf215546Sopenharmony_ci              type.width * type.length == 128) ||
355bf215546Sopenharmony_ci             (util_get_cpu_caps()->has_avx &&
356bf215546Sopenharmony_ci              type.width * type.length == 256 && type.width >= 32) ||
357bf215546Sopenharmony_ci             (util_get_cpu_caps()->has_avx2 &&
358bf215546Sopenharmony_ci              type.width * type.length == 256)) &&
359bf215546Sopenharmony_ci            !LLVMIsConstant(a) &&
360bf215546Sopenharmony_ci            !LLVMIsConstant(b) &&
361bf215546Sopenharmony_ci            !LLVMIsConstant(mask)) {
362bf215546Sopenharmony_ci      const char *intrinsic;
363bf215546Sopenharmony_ci      LLVMTypeRef arg_type;
364bf215546Sopenharmony_ci      LLVMValueRef args[3];
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci      LLVMTypeRef mask_type = LLVMGetElementType(LLVMTypeOf(mask));
367bf215546Sopenharmony_ci      if (LLVMGetIntTypeWidth(mask_type) != type.width) {
368bf215546Sopenharmony_ci         LLVMTypeRef int_vec_type =
369bf215546Sopenharmony_ci            LLVMVectorType(LLVMIntTypeInContext(lc, type.width), type.length);
370bf215546Sopenharmony_ci         mask = LLVMBuildSExt(builder, mask, int_vec_type, "");
371bf215546Sopenharmony_ci      }
372bf215546Sopenharmony_ci      /*
373bf215546Sopenharmony_ci       *  There's only float blend in AVX but can just cast i32/i64
374bf215546Sopenharmony_ci       *  to float.
375bf215546Sopenharmony_ci       */
376bf215546Sopenharmony_ci      if (type.width * type.length == 256) {
377bf215546Sopenharmony_ci         if (type.width == 64) {
378bf215546Sopenharmony_ci           intrinsic = "llvm.x86.avx.blendv.pd.256";
379bf215546Sopenharmony_ci           arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4);
380bf215546Sopenharmony_ci         }
381bf215546Sopenharmony_ci         else if (type.width == 32) {
382bf215546Sopenharmony_ci            intrinsic = "llvm.x86.avx.blendv.ps.256";
383bf215546Sopenharmony_ci            arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8);
384bf215546Sopenharmony_ci         } else {
385bf215546Sopenharmony_ci            assert(util_get_cpu_caps()->has_avx2);
386bf215546Sopenharmony_ci            intrinsic = "llvm.x86.avx2.pblendvb";
387bf215546Sopenharmony_ci            arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 32);
388bf215546Sopenharmony_ci         }
389bf215546Sopenharmony_ci      }
390bf215546Sopenharmony_ci      else if (type.floating &&
391bf215546Sopenharmony_ci               type.width == 64) {
392bf215546Sopenharmony_ci         intrinsic = "llvm.x86.sse41.blendvpd";
393bf215546Sopenharmony_ci         arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2);
394bf215546Sopenharmony_ci      } else if (type.floating &&
395bf215546Sopenharmony_ci                 type.width == 32) {
396bf215546Sopenharmony_ci         intrinsic = "llvm.x86.sse41.blendvps";
397bf215546Sopenharmony_ci         arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4);
398bf215546Sopenharmony_ci      } else {
399bf215546Sopenharmony_ci         intrinsic = "llvm.x86.sse41.pblendvb";
400bf215546Sopenharmony_ci         arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16);
401bf215546Sopenharmony_ci      }
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci      if (arg_type != bld->int_vec_type) {
404bf215546Sopenharmony_ci         mask = LLVMBuildBitCast(builder, mask, arg_type, "");
405bf215546Sopenharmony_ci      }
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci      if (arg_type != bld->vec_type) {
408bf215546Sopenharmony_ci         a = LLVMBuildBitCast(builder, a, arg_type, "");
409bf215546Sopenharmony_ci         b = LLVMBuildBitCast(builder, b, arg_type, "");
410bf215546Sopenharmony_ci      }
411bf215546Sopenharmony_ci
412bf215546Sopenharmony_ci      args[0] = b;
413bf215546Sopenharmony_ci      args[1] = a;
414bf215546Sopenharmony_ci      args[2] = mask;
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci      res = lp_build_intrinsic(builder, intrinsic,
417bf215546Sopenharmony_ci                               arg_type, args, ARRAY_SIZE(args), 0);
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci      if (arg_type != bld->vec_type) {
420bf215546Sopenharmony_ci         res = LLVMBuildBitCast(builder, res, bld->vec_type, "");
421bf215546Sopenharmony_ci      }
422bf215546Sopenharmony_ci   }
423bf215546Sopenharmony_ci   else {
424bf215546Sopenharmony_ci      res = lp_build_select_bitwise(bld, mask, a, b);
425bf215546Sopenharmony_ci   }
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci   return res;
428bf215546Sopenharmony_ci}
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_ci
431bf215546Sopenharmony_ci/**
432bf215546Sopenharmony_ci * Return mask ? a : b;
433bf215546Sopenharmony_ci *
434bf215546Sopenharmony_ci * mask is a TGSI_WRITEMASK_xxx.
435bf215546Sopenharmony_ci */
436bf215546Sopenharmony_ciLLVMValueRef
437bf215546Sopenharmony_cilp_build_select_aos(struct lp_build_context *bld,
438bf215546Sopenharmony_ci                    unsigned mask,
439bf215546Sopenharmony_ci                    LLVMValueRef a,
440bf215546Sopenharmony_ci                    LLVMValueRef b,
441bf215546Sopenharmony_ci                    unsigned num_channels)
442bf215546Sopenharmony_ci{
443bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
444bf215546Sopenharmony_ci   const struct lp_type type = bld->type;
445bf215546Sopenharmony_ci   const unsigned n = type.length;
446bf215546Sopenharmony_ci
447bf215546Sopenharmony_ci   assert((mask & ~0xf) == 0);
448bf215546Sopenharmony_ci   assert(lp_check_value(type, a));
449bf215546Sopenharmony_ci   assert(lp_check_value(type, b));
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   if (a == b)
452bf215546Sopenharmony_ci      return a;
453bf215546Sopenharmony_ci   if ((mask & 0xf) == 0xf)
454bf215546Sopenharmony_ci      return a;
455bf215546Sopenharmony_ci   if ((mask & 0xf) == 0x0)
456bf215546Sopenharmony_ci      return b;
457bf215546Sopenharmony_ci   if (a == bld->undef || b == bld->undef)
458bf215546Sopenharmony_ci      return bld->undef;
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci   /*
461bf215546Sopenharmony_ci    * There are two major ways of accomplishing this:
462bf215546Sopenharmony_ci    * - with a shuffle
463bf215546Sopenharmony_ci    * - with a select
464bf215546Sopenharmony_ci    *
465bf215546Sopenharmony_ci    * The flip between these is empirical and might need to be adjusted.
466bf215546Sopenharmony_ci    */
467bf215546Sopenharmony_ci   if (n <= 4) {
468bf215546Sopenharmony_ci      /*
469bf215546Sopenharmony_ci       * Shuffle.
470bf215546Sopenharmony_ci       */
471bf215546Sopenharmony_ci      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
472bf215546Sopenharmony_ci      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci      for (unsigned j = 0; j < n; j += num_channels)
475bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_channels; ++i)
476bf215546Sopenharmony_ci            shuffles[j + i] = LLVMConstInt(elem_type,
477bf215546Sopenharmony_ci                                           (mask & (1 << i) ? 0 : n) + j + i,
478bf215546Sopenharmony_ci                                           0);
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci      return LLVMBuildShuffleVector(builder, a, b,
481bf215546Sopenharmony_ci                                    LLVMConstVector(shuffles, n), "");
482bf215546Sopenharmony_ci   }
483bf215546Sopenharmony_ci   else {
484bf215546Sopenharmony_ci      LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm,
485bf215546Sopenharmony_ci                                                      type, mask, num_channels);
486bf215546Sopenharmony_ci      return lp_build_select(bld, mask_vec, a, b);
487bf215546Sopenharmony_ci   }
488bf215546Sopenharmony_ci}
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci/**
492bf215546Sopenharmony_ci * Return (scalar-cast)val ? true : false;
493bf215546Sopenharmony_ci */
494bf215546Sopenharmony_ciLLVMValueRef
495bf215546Sopenharmony_cilp_build_any_true_range(struct lp_build_context *bld,
496bf215546Sopenharmony_ci                        unsigned real_length,
497bf215546Sopenharmony_ci                        LLVMValueRef val)
498bf215546Sopenharmony_ci{
499bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
500bf215546Sopenharmony_ci   LLVMTypeRef scalar_type;
501bf215546Sopenharmony_ci   LLVMTypeRef true_type;
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_ci   assert(real_length <= bld->type.length);
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci   true_type = LLVMIntTypeInContext(bld->gallivm->context,
506bf215546Sopenharmony_ci                                    bld->type.width * real_length);
507bf215546Sopenharmony_ci   scalar_type = LLVMIntTypeInContext(bld->gallivm->context,
508bf215546Sopenharmony_ci                                      bld->type.width * bld->type.length);
509bf215546Sopenharmony_ci   val = LLVMBuildBitCast(builder, val, scalar_type, "");
510bf215546Sopenharmony_ci   /*
511bf215546Sopenharmony_ci    * We're using always native types so we can use intrinsics.
512bf215546Sopenharmony_ci    * However, if we don't do per-element calculations, we must ensure
513bf215546Sopenharmony_ci    * the excess elements aren't used since they may contain garbage.
514bf215546Sopenharmony_ci    */
515bf215546Sopenharmony_ci   if (real_length < bld->type.length) {
516bf215546Sopenharmony_ci      val = LLVMBuildTrunc(builder, val, true_type, "");
517bf215546Sopenharmony_ci   }
518bf215546Sopenharmony_ci   return LLVMBuildICmp(builder, LLVMIntNE,
519bf215546Sopenharmony_ci                        val, LLVMConstNull(true_type), "");
520bf215546Sopenharmony_ci}
521