1bf215546Sopenharmony_ci/**************************************************************************
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright 2009 VMware, Inc.
4bf215546Sopenharmony_ci * All Rights Reserved.
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the
8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to
11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
12bf215546Sopenharmony_ci * the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
15bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
16bf215546Sopenharmony_ci * of the Software.
17bf215546Sopenharmony_ci *
18bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19bf215546Sopenharmony_ci * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21bf215546Sopenharmony_ci * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22bf215546Sopenharmony_ci * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23bf215546Sopenharmony_ci * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24bf215546Sopenharmony_ci * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci **************************************************************************/
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci/**
29bf215546Sopenharmony_ci * @file
30bf215546Sopenharmony_ci * Helper functions for swizzling/shuffling.
31bf215546Sopenharmony_ci *
32bf215546Sopenharmony_ci * @author Jose Fonseca <jfonseca@vmware.com>
33bf215546Sopenharmony_ci */
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci#include <inttypes.h>  /* for PRIx64 macro */
36bf215546Sopenharmony_ci#include "util/compiler.h"
37bf215546Sopenharmony_ci#include "util/u_debug.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci#include "lp_bld_type.h"
40bf215546Sopenharmony_ci#include "lp_bld_const.h"
41bf215546Sopenharmony_ci#include "lp_bld_init.h"
42bf215546Sopenharmony_ci#include "lp_bld_logic.h"
43bf215546Sopenharmony_ci#include "lp_bld_swizzle.h"
44bf215546Sopenharmony_ci#include "lp_bld_pack.h"
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ciLLVMValueRef
48bf215546Sopenharmony_cilp_build_broadcast(struct gallivm_state *gallivm,
49bf215546Sopenharmony_ci                   LLVMTypeRef vec_type,
50bf215546Sopenharmony_ci                   LLVMValueRef scalar)
51bf215546Sopenharmony_ci{
52bf215546Sopenharmony_ci   LLVMValueRef res;
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_ci   if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
55bf215546Sopenharmony_ci      /* scalar */
56bf215546Sopenharmony_ci      assert(vec_type == LLVMTypeOf(scalar));
57bf215546Sopenharmony_ci      res = scalar;
58bf215546Sopenharmony_ci   } else {
59bf215546Sopenharmony_ci      LLVMBuilderRef builder = gallivm->builder;
60bf215546Sopenharmony_ci      const unsigned length = LLVMGetVectorSize(vec_type);
61bf215546Sopenharmony_ci      LLVMValueRef undef = LLVMGetUndef(vec_type);
62bf215546Sopenharmony_ci      /* The shuffle vector is always made of int32 elements */
63bf215546Sopenharmony_ci      LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
64bf215546Sopenharmony_ci      LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci      assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci      res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
69bf215546Sopenharmony_ci      res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
70bf215546Sopenharmony_ci   }
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_ci   return res;
73bf215546Sopenharmony_ci}
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci/**
77bf215546Sopenharmony_ci * Broadcast
78bf215546Sopenharmony_ci */
79bf215546Sopenharmony_ciLLVMValueRef
80bf215546Sopenharmony_cilp_build_broadcast_scalar(struct lp_build_context *bld,
81bf215546Sopenharmony_ci                          LLVMValueRef scalar)
82bf215546Sopenharmony_ci{
83bf215546Sopenharmony_ci   assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
86bf215546Sopenharmony_ci}
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci/**
90bf215546Sopenharmony_ci * Combined extract and broadcast (mere shuffle in most cases)
91bf215546Sopenharmony_ci */
92bf215546Sopenharmony_ciLLVMValueRef
93bf215546Sopenharmony_cilp_build_extract_broadcast(struct gallivm_state *gallivm,
94bf215546Sopenharmony_ci                           struct lp_type src_type,
95bf215546Sopenharmony_ci                           struct lp_type dst_type,
96bf215546Sopenharmony_ci                           LLVMValueRef vector,
97bf215546Sopenharmony_ci                           LLVMValueRef index)
98bf215546Sopenharmony_ci{
99bf215546Sopenharmony_ci   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
100bf215546Sopenharmony_ci   LLVMValueRef res;
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci   assert(src_type.floating == dst_type.floating);
103bf215546Sopenharmony_ci   assert(src_type.width    == dst_type.width);
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci   assert(lp_check_value(src_type, vector));
106bf215546Sopenharmony_ci   assert(LLVMTypeOf(index) == i32t);
107bf215546Sopenharmony_ci
108bf215546Sopenharmony_ci   if (src_type.length == 1) {
109bf215546Sopenharmony_ci      if (dst_type.length == 1) {
110bf215546Sopenharmony_ci         /*
111bf215546Sopenharmony_ci          * Trivial scalar -> scalar.
112bf215546Sopenharmony_ci          */
113bf215546Sopenharmony_ci         res = vector;
114bf215546Sopenharmony_ci      } else {
115bf215546Sopenharmony_ci         /*
116bf215546Sopenharmony_ci          * Broadcast scalar -> vector.
117bf215546Sopenharmony_ci          */
118bf215546Sopenharmony_ci         res = lp_build_broadcast(gallivm,
119bf215546Sopenharmony_ci                                  lp_build_vec_type(gallivm, dst_type),
120bf215546Sopenharmony_ci                                  vector);
121bf215546Sopenharmony_ci      }
122bf215546Sopenharmony_ci   } else {
123bf215546Sopenharmony_ci      if (dst_type.length > 1) {
124bf215546Sopenharmony_ci         /*
125bf215546Sopenharmony_ci          * shuffle - result can be of different length.
126bf215546Sopenharmony_ci          */
127bf215546Sopenharmony_ci         LLVMValueRef shuffle;
128bf215546Sopenharmony_ci         shuffle = lp_build_broadcast(gallivm,
129bf215546Sopenharmony_ci                                      LLVMVectorType(i32t, dst_type.length),
130bf215546Sopenharmony_ci                                      index);
131bf215546Sopenharmony_ci         res = LLVMBuildShuffleVector(gallivm->builder, vector,
132bf215546Sopenharmony_ci                                      LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
133bf215546Sopenharmony_ci                                      shuffle, "");
134bf215546Sopenharmony_ci      } else {
135bf215546Sopenharmony_ci         /*
136bf215546Sopenharmony_ci          * Trivial extract scalar from vector.
137bf215546Sopenharmony_ci          */
138bf215546Sopenharmony_ci          res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
139bf215546Sopenharmony_ci      }
140bf215546Sopenharmony_ci   }
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   return res;
143bf215546Sopenharmony_ci}
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci/**
147bf215546Sopenharmony_ci * Swizzle one channel into other channels.
148bf215546Sopenharmony_ci */
149bf215546Sopenharmony_ciLLVMValueRef
150bf215546Sopenharmony_cilp_build_swizzle_scalar_aos(struct lp_build_context *bld,
151bf215546Sopenharmony_ci                            LLVMValueRef a,
152bf215546Sopenharmony_ci                            unsigned channel,
153bf215546Sopenharmony_ci                            unsigned num_channels)
154bf215546Sopenharmony_ci{
155bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
156bf215546Sopenharmony_ci   const struct lp_type type = bld->type;
157bf215546Sopenharmony_ci   const unsigned n = type.length;
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci   if (a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
160bf215546Sopenharmony_ci      return a;
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   assert(num_channels == 2 || num_channels == 4);
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci   /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
165bf215546Sopenharmony_ci    * using shuffles here actually causes worst results. More investigation is
166bf215546Sopenharmony_ci    * needed. */
167bf215546Sopenharmony_ci   if (LLVMIsConstant(a) || type.width >= 16) {
168bf215546Sopenharmony_ci      /*
169bf215546Sopenharmony_ci       * Shuffle.
170bf215546Sopenharmony_ci       */
171bf215546Sopenharmony_ci      LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
172bf215546Sopenharmony_ci      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci      for (unsigned j = 0; j < n; j += num_channels)
175bf215546Sopenharmony_ci         for (unsigned i = 0; i < num_channels; ++i)
176bf215546Sopenharmony_ci            shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ci      return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
179bf215546Sopenharmony_ci   } else if (num_channels == 2) {
180bf215546Sopenharmony_ci      /*
181bf215546Sopenharmony_ci       * Bit mask and shifts
182bf215546Sopenharmony_ci       *
183bf215546Sopenharmony_ci       *   XY XY .... XY  <= input
184bf215546Sopenharmony_ci       *   0Y 0Y .... 0Y
185bf215546Sopenharmony_ci       *   YY YY .... YY
186bf215546Sopenharmony_ci       *   YY YY .... YY  <= output
187bf215546Sopenharmony_ci       */
188bf215546Sopenharmony_ci      struct lp_type type2;
189bf215546Sopenharmony_ci      LLVMValueRef tmp = NULL;
190bf215546Sopenharmony_ci      int shift;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci      a = LLVMBuildAnd(builder, a,
193bf215546Sopenharmony_ci                       lp_build_const_mask_aos(bld->gallivm,
194bf215546Sopenharmony_ci                                               type, 1 << channel, num_channels), "");
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci      type2 = type;
197bf215546Sopenharmony_ci      type2.floating = FALSE;
198bf215546Sopenharmony_ci      type2.width *= 2;
199bf215546Sopenharmony_ci      type2.length /= 2;
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci      /*
204bf215546Sopenharmony_ci       * Vector element 0 is always channel X.
205bf215546Sopenharmony_ci       *
206bf215546Sopenharmony_ci       *                        76 54 32 10 (array numbering)
207bf215546Sopenharmony_ci       * Little endian reg in:  YX YX YX YX
208bf215546Sopenharmony_ci       * Little endian reg out: YY YY YY YY if shift right (shift == -1)
209bf215546Sopenharmony_ci       *                        XX XX XX XX if shift left (shift == 1)
210bf215546Sopenharmony_ci       *
211bf215546Sopenharmony_ci       *                        01 23 45 67 (array numbering)
212bf215546Sopenharmony_ci       * Big endian reg in:     XY XY XY XY
213bf215546Sopenharmony_ci       * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
214bf215546Sopenharmony_ci       *                        XX XX XX XX if shift right (shift == -1)
215bf215546Sopenharmony_ci       *
216bf215546Sopenharmony_ci       */
217bf215546Sopenharmony_ci#if UTIL_ARCH_LITTLE_ENDIAN
218bf215546Sopenharmony_ci      shift = channel == 0 ? 1 : -1;
219bf215546Sopenharmony_ci#else
220bf215546Sopenharmony_ci      shift = channel == 0 ? -1 : 1;
221bf215546Sopenharmony_ci#endif
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci      if (shift > 0) {
224bf215546Sopenharmony_ci         tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
225bf215546Sopenharmony_ci      } else if (shift < 0) {
226bf215546Sopenharmony_ci         tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
227bf215546Sopenharmony_ci      }
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci      assert(tmp);
230bf215546Sopenharmony_ci      if (tmp) {
231bf215546Sopenharmony_ci         a = LLVMBuildOr(builder, a, tmp, "");
232bf215546Sopenharmony_ci      }
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
235bf215546Sopenharmony_ci   } else {
236bf215546Sopenharmony_ci      /*
237bf215546Sopenharmony_ci       * Bit mask and recursive shifts
238bf215546Sopenharmony_ci       *
239bf215546Sopenharmony_ci       * Little-endian registers:
240bf215546Sopenharmony_ci       *
241bf215546Sopenharmony_ci       *   7654 3210
242bf215546Sopenharmony_ci       *   WZYX WZYX .... WZYX  <= input
243bf215546Sopenharmony_ci       *   00Y0 00Y0 .... 00Y0  <= mask
244bf215546Sopenharmony_ci       *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
245bf215546Sopenharmony_ci       *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
246bf215546Sopenharmony_ci       *
247bf215546Sopenharmony_ci       * Big-endian registers:
248bf215546Sopenharmony_ci       *
249bf215546Sopenharmony_ci       *   0123 4567
250bf215546Sopenharmony_ci       *   XYZW XYZW .... XYZW  <= input
251bf215546Sopenharmony_ci       *   0Y00 0Y00 .... 0Y00  <= mask
252bf215546Sopenharmony_ci       *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
253bf215546Sopenharmony_ci       *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
254bf215546Sopenharmony_ci       *
255bf215546Sopenharmony_ci       * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
256bf215546Sopenharmony_ci       */
257bf215546Sopenharmony_ci      static const int shifts[4][2] = {
258bf215546Sopenharmony_ci         { 1,  2},
259bf215546Sopenharmony_ci         {-1,  2},
260bf215546Sopenharmony_ci         { 1, -2},
261bf215546Sopenharmony_ci         {-1, -2}
262bf215546Sopenharmony_ci      };
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci      a = LLVMBuildAnd(builder, a,
265bf215546Sopenharmony_ci                       lp_build_const_mask_aos(bld->gallivm,
266bf215546Sopenharmony_ci                                               type, 1 << channel, 4), "");
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci      /*
269bf215546Sopenharmony_ci       * Build a type where each element is an integer that cover the four
270bf215546Sopenharmony_ci       * channels.
271bf215546Sopenharmony_ci       */
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci      struct lp_type type4 = type;
274bf215546Sopenharmony_ci      type4.floating = FALSE;
275bf215546Sopenharmony_ci      type4.width *= 4;
276bf215546Sopenharmony_ci      type4.length /= 4;
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci      for (unsigned i = 0; i < 2; ++i) {
281bf215546Sopenharmony_ci         LLVMValueRef tmp = NULL;
282bf215546Sopenharmony_ci         int shift = shifts[channel][i];
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci         /* See endianness diagram above */
285bf215546Sopenharmony_ci#if UTIL_ARCH_BIG_ENDIAN
286bf215546Sopenharmony_ci         shift = -shift;
287bf215546Sopenharmony_ci#endif
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci         if (shift > 0)
290bf215546Sopenharmony_ci            tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
291bf215546Sopenharmony_ci         if (shift < 0)
292bf215546Sopenharmony_ci            tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci         assert(tmp);
295bf215546Sopenharmony_ci         if (tmp)
296bf215546Sopenharmony_ci            a = LLVMBuildOr(builder, a, tmp, "");
297bf215546Sopenharmony_ci      }
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci      return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
300bf215546Sopenharmony_ci   }
301bf215546Sopenharmony_ci}
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci/**
305bf215546Sopenharmony_ci * Swizzle a vector consisting of an array of XYZW structs.
306bf215546Sopenharmony_ci *
307bf215546Sopenharmony_ci * This fills a vector of dst_len length with the swizzled channels from src.
308bf215546Sopenharmony_ci *
309bf215546Sopenharmony_ci * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
310bf215546Sopenharmony_ci *      RGBA RGBA = BGR BGR BG
311bf215546Sopenharmony_ci *
312bf215546Sopenharmony_ci * @param swizzles        the swizzle array
313bf215546Sopenharmony_ci * @param num_swizzles    the number of elements in swizzles
314bf215546Sopenharmony_ci * @param dst_len         the length of the result
315bf215546Sopenharmony_ci */
316bf215546Sopenharmony_ciLLVMValueRef
317bf215546Sopenharmony_cilp_build_swizzle_aos_n(struct gallivm_state* gallivm,
318bf215546Sopenharmony_ci                       LLVMValueRef src,
319bf215546Sopenharmony_ci                       const unsigned char* swizzles,
320bf215546Sopenharmony_ci                       unsigned num_swizzles,
321bf215546Sopenharmony_ci                       unsigned dst_len)
322bf215546Sopenharmony_ci{
323bf215546Sopenharmony_ci   LLVMBuilderRef builder = gallivm->builder;
324bf215546Sopenharmony_ci   LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
325bf215546Sopenharmony_ci
326bf215546Sopenharmony_ci   assert(dst_len < LP_MAX_VECTOR_WIDTH);
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   for (unsigned i = 0; i < dst_len; ++i) {
329bf215546Sopenharmony_ci      int swizzle = swizzles[i % num_swizzles];
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_ci      if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
332bf215546Sopenharmony_ci         shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
333bf215546Sopenharmony_ci      } else {
334bf215546Sopenharmony_ci         shuffles[i] = lp_build_const_int32(gallivm, swizzle);
335bf215546Sopenharmony_ci      }
336bf215546Sopenharmony_ci   }
337bf215546Sopenharmony_ci
338bf215546Sopenharmony_ci   return LLVMBuildShuffleVector(builder, src,
339bf215546Sopenharmony_ci                                 LLVMGetUndef(LLVMTypeOf(src)),
340bf215546Sopenharmony_ci                                 LLVMConstVector(shuffles, dst_len), "");
341bf215546Sopenharmony_ci}
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci
344bf215546Sopenharmony_ciLLVMValueRef
345bf215546Sopenharmony_cilp_build_swizzle_aos(struct lp_build_context *bld,
346bf215546Sopenharmony_ci                     LLVMValueRef a,
347bf215546Sopenharmony_ci                     const unsigned char swizzles[4])
348bf215546Sopenharmony_ci{
349bf215546Sopenharmony_ci   LLVMBuilderRef builder = bld->gallivm->builder;
350bf215546Sopenharmony_ci   const struct lp_type type = bld->type;
351bf215546Sopenharmony_ci   const unsigned n = type.length;
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   if (swizzles[0] == PIPE_SWIZZLE_X &&
354bf215546Sopenharmony_ci       swizzles[1] == PIPE_SWIZZLE_Y &&
355bf215546Sopenharmony_ci       swizzles[2] == PIPE_SWIZZLE_Z &&
356bf215546Sopenharmony_ci       swizzles[3] == PIPE_SWIZZLE_W) {
357bf215546Sopenharmony_ci      return a;
358bf215546Sopenharmony_ci   }
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci   if (swizzles[0] == swizzles[1] &&
361bf215546Sopenharmony_ci       swizzles[1] == swizzles[2] &&
362bf215546Sopenharmony_ci       swizzles[2] == swizzles[3]) {
363bf215546Sopenharmony_ci      switch (swizzles[0]) {
364bf215546Sopenharmony_ci      case PIPE_SWIZZLE_X:
365bf215546Sopenharmony_ci      case PIPE_SWIZZLE_Y:
366bf215546Sopenharmony_ci      case PIPE_SWIZZLE_Z:
367bf215546Sopenharmony_ci      case PIPE_SWIZZLE_W:
368bf215546Sopenharmony_ci         return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
369bf215546Sopenharmony_ci      case PIPE_SWIZZLE_0:
370bf215546Sopenharmony_ci         return bld->zero;
371bf215546Sopenharmony_ci      case PIPE_SWIZZLE_1:
372bf215546Sopenharmony_ci         return bld->one;
373bf215546Sopenharmony_ci      case LP_BLD_SWIZZLE_DONTCARE:
374bf215546Sopenharmony_ci         return bld->undef;
375bf215546Sopenharmony_ci      default:
376bf215546Sopenharmony_ci         assert(0);
377bf215546Sopenharmony_ci         return bld->undef;
378bf215546Sopenharmony_ci      }
379bf215546Sopenharmony_ci   }
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   if (LLVMIsConstant(a) ||
382bf215546Sopenharmony_ci       type.width >= 16) {
383bf215546Sopenharmony_ci      /*
384bf215546Sopenharmony_ci       * Shuffle.
385bf215546Sopenharmony_ci       */
386bf215546Sopenharmony_ci      LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
387bf215546Sopenharmony_ci      LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
388bf215546Sopenharmony_ci      LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
389bf215546Sopenharmony_ci      LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
390bf215546Sopenharmony_ci
391bf215546Sopenharmony_ci      memset(aux, 0, sizeof aux);
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci      for (unsigned j = 0; j < n; j += 4) {
394bf215546Sopenharmony_ci         for (unsigned i = 0; i < 4; ++i) {
395bf215546Sopenharmony_ci            unsigned shuffle;
396bf215546Sopenharmony_ci            switch (swizzles[i]) {
397bf215546Sopenharmony_ci            default:
398bf215546Sopenharmony_ci               assert(0);
399bf215546Sopenharmony_ci            case PIPE_SWIZZLE_X:
400bf215546Sopenharmony_ci            case PIPE_SWIZZLE_Y:
401bf215546Sopenharmony_ci            case PIPE_SWIZZLE_Z:
402bf215546Sopenharmony_ci            case PIPE_SWIZZLE_W:
403bf215546Sopenharmony_ci               shuffle = j + swizzles[i];
404bf215546Sopenharmony_ci               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
405bf215546Sopenharmony_ci               break;
406bf215546Sopenharmony_ci            case PIPE_SWIZZLE_0:
407bf215546Sopenharmony_ci               shuffle = type.length + 0;
408bf215546Sopenharmony_ci               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
409bf215546Sopenharmony_ci               if (!aux[0]) {
410bf215546Sopenharmony_ci                  aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
411bf215546Sopenharmony_ci               }
412bf215546Sopenharmony_ci               break;
413bf215546Sopenharmony_ci            case PIPE_SWIZZLE_1:
414bf215546Sopenharmony_ci               shuffle = type.length + 1;
415bf215546Sopenharmony_ci               shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
416bf215546Sopenharmony_ci               if (!aux[1]) {
417bf215546Sopenharmony_ci                  aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
418bf215546Sopenharmony_ci               }
419bf215546Sopenharmony_ci               break;
420bf215546Sopenharmony_ci            case LP_BLD_SWIZZLE_DONTCARE:
421bf215546Sopenharmony_ci               shuffles[j + i] = LLVMGetUndef(i32t);
422bf215546Sopenharmony_ci               break;
423bf215546Sopenharmony_ci            }
424bf215546Sopenharmony_ci         }
425bf215546Sopenharmony_ci      }
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci      for (unsigned i = 0; i < n; ++i) {
428bf215546Sopenharmony_ci         if (!aux[i]) {
429bf215546Sopenharmony_ci            aux[i] = undef;
430bf215546Sopenharmony_ci         }
431bf215546Sopenharmony_ci      }
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci      return LLVMBuildShuffleVector(builder, a,
434bf215546Sopenharmony_ci                                    LLVMConstVector(aux, n),
435bf215546Sopenharmony_ci                                    LLVMConstVector(shuffles, n), "");
436bf215546Sopenharmony_ci   } else {
437bf215546Sopenharmony_ci      /*
438bf215546Sopenharmony_ci       * Bit mask and shifts.
439bf215546Sopenharmony_ci       *
440bf215546Sopenharmony_ci       * For example, this will convert BGRA to RGBA by doing
441bf215546Sopenharmony_ci       *
442bf215546Sopenharmony_ci       * Little endian:
443bf215546Sopenharmony_ci       *   rgba = (bgra & 0x00ff0000) >> 16
444bf215546Sopenharmony_ci       *        | (bgra & 0xff00ff00)
445bf215546Sopenharmony_ci       *        | (bgra & 0x000000ff) << 16
446bf215546Sopenharmony_ci       *
447bf215546Sopenharmony_ci       * Big endian:A
448bf215546Sopenharmony_ci       *   rgba = (bgra & 0x0000ff00) << 16
449bf215546Sopenharmony_ci       *        | (bgra & 0x00ff00ff)
450bf215546Sopenharmony_ci       *        | (bgra & 0xff000000) >> 16
451bf215546Sopenharmony_ci       *
452bf215546Sopenharmony_ci       * This is necessary not only for faster cause, but because X86 backend
453bf215546Sopenharmony_ci       * will refuse shuffles of <4 x i8> vectors
454bf215546Sopenharmony_ci       */
455bf215546Sopenharmony_ci
456bf215546Sopenharmony_ci      /*
457bf215546Sopenharmony_ci       * Start with a mixture of 1 and 0.
458bf215546Sopenharmony_ci       */
459bf215546Sopenharmony_ci      unsigned cond = 0;
460bf215546Sopenharmony_ci      for (unsigned chan = 0; chan < 4; ++chan) {
461bf215546Sopenharmony_ci         if (swizzles[chan] == PIPE_SWIZZLE_1) {
462bf215546Sopenharmony_ci            cond |= 1 << chan;
463bf215546Sopenharmony_ci         }
464bf215546Sopenharmony_ci      }
465bf215546Sopenharmony_ci      LLVMValueRef res =
466bf215546Sopenharmony_ci         lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci      /*
469bf215546Sopenharmony_ci       * Build a type where each element is an integer that cover the four
470bf215546Sopenharmony_ci       * channels.
471bf215546Sopenharmony_ci       */
472bf215546Sopenharmony_ci      struct lp_type type4 = type;
473bf215546Sopenharmony_ci      type4.floating = FALSE;
474bf215546Sopenharmony_ci      type4.width *= 4;
475bf215546Sopenharmony_ci      type4.length /= 4;
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci      a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
478bf215546Sopenharmony_ci      res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci      /*
481bf215546Sopenharmony_ci       * Mask and shift the channels, trying to group as many channels in the
482bf215546Sopenharmony_ci       * same shift as possible.  The shift amount is positive for shifts left
483bf215546Sopenharmony_ci       * and negative for shifts right.
484bf215546Sopenharmony_ci       */
485bf215546Sopenharmony_ci      for (int shift = -3; shift <= 3; ++shift) {
486bf215546Sopenharmony_ci         uint64_t mask = 0;
487bf215546Sopenharmony_ci
488bf215546Sopenharmony_ci         assert(type4.width <= sizeof(mask)*8);
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci         /*
491bf215546Sopenharmony_ci          * Vector element numbers follow the XYZW order, so 0 is always X,
492bf215546Sopenharmony_ci          * etc.  After widening 4 times we have:
493bf215546Sopenharmony_ci          *
494bf215546Sopenharmony_ci          *                                3210
495bf215546Sopenharmony_ci          * Little-endian register layout: WZYX
496bf215546Sopenharmony_ci          *
497bf215546Sopenharmony_ci          *                                0123
498bf215546Sopenharmony_ci          * Big-endian register layout:    XYZW
499bf215546Sopenharmony_ci          *
500bf215546Sopenharmony_ci          * For little-endian, higher-numbered channels are obtained by a
501bf215546Sopenharmony_ci          * shift right (negative shift amount) and lower-numbered channels by
502bf215546Sopenharmony_ci          * a shift left (positive shift amount).  The opposite is true for
503bf215546Sopenharmony_ci          * big-endian.
504bf215546Sopenharmony_ci          */
505bf215546Sopenharmony_ci         for (unsigned chan = 0; chan < 4; ++chan) {
506bf215546Sopenharmony_ci            if (swizzles[chan] < 4) {
507bf215546Sopenharmony_ci               /* We need to move channel swizzles[chan] into channel chan */
508bf215546Sopenharmony_ci#if UTIL_ARCH_LITTLE_ENDIAN
509bf215546Sopenharmony_ci               if (swizzles[chan] - chan == -shift) {
510bf215546Sopenharmony_ci                  mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
511bf215546Sopenharmony_ci               }
512bf215546Sopenharmony_ci#else
513bf215546Sopenharmony_ci               if (swizzles[chan] - chan == shift) {
514bf215546Sopenharmony_ci                  mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
515bf215546Sopenharmony_ci               }
516bf215546Sopenharmony_ci#endif
517bf215546Sopenharmony_ci            }
518bf215546Sopenharmony_ci         }
519bf215546Sopenharmony_ci
520bf215546Sopenharmony_ci         if (mask) {
521bf215546Sopenharmony_ci            LLVMValueRef masked;
522bf215546Sopenharmony_ci            LLVMValueRef shifted;
523bf215546Sopenharmony_ci            if (0)
524bf215546Sopenharmony_ci               debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci            masked = LLVMBuildAnd(builder, a,
527bf215546Sopenharmony_ci                                  lp_build_const_int_vec(bld->gallivm, type4, mask), "");
528bf215546Sopenharmony_ci            if (shift > 0) {
529bf215546Sopenharmony_ci               shifted = LLVMBuildShl(builder, masked,
530bf215546Sopenharmony_ci                                      lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
531bf215546Sopenharmony_ci            } else if (shift < 0) {
532bf215546Sopenharmony_ci               shifted = LLVMBuildLShr(builder, masked,
533bf215546Sopenharmony_ci                                       lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
534bf215546Sopenharmony_ci            } else {
535bf215546Sopenharmony_ci               shifted = masked;
536bf215546Sopenharmony_ci            }
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_ci            res = LLVMBuildOr(builder, res, shifted, "");
539bf215546Sopenharmony_ci         }
540bf215546Sopenharmony_ci      }
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci      return LLVMBuildBitCast(builder, res,
543bf215546Sopenharmony_ci                              lp_build_vec_type(bld->gallivm, type), "");
544bf215546Sopenharmony_ci   }
545bf215546Sopenharmony_ci}
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci
548bf215546Sopenharmony_ci/**
549bf215546Sopenharmony_ci * Extended swizzle of a single channel of a SoA vector.
550bf215546Sopenharmony_ci *
551bf215546Sopenharmony_ci * @param bld         building context
552bf215546Sopenharmony_ci * @param unswizzled  array with the 4 unswizzled values
553bf215546Sopenharmony_ci * @param swizzle     one of the PIPE_SWIZZLE_*
554bf215546Sopenharmony_ci *
555bf215546Sopenharmony_ci * @return  the swizzled value.
556bf215546Sopenharmony_ci */
557bf215546Sopenharmony_ciLLVMValueRef
558bf215546Sopenharmony_cilp_build_swizzle_soa_channel(struct lp_build_context *bld,
559bf215546Sopenharmony_ci                             const LLVMValueRef *unswizzled,
560bf215546Sopenharmony_ci                             enum pipe_swizzle swizzle)
561bf215546Sopenharmony_ci{
562bf215546Sopenharmony_ci   switch (swizzle) {
563bf215546Sopenharmony_ci   case PIPE_SWIZZLE_X:
564bf215546Sopenharmony_ci   case PIPE_SWIZZLE_Y:
565bf215546Sopenharmony_ci   case PIPE_SWIZZLE_Z:
566bf215546Sopenharmony_ci   case PIPE_SWIZZLE_W:
567bf215546Sopenharmony_ci      return unswizzled[swizzle];
568bf215546Sopenharmony_ci   case PIPE_SWIZZLE_0:
569bf215546Sopenharmony_ci      return bld->zero;
570bf215546Sopenharmony_ci   case PIPE_SWIZZLE_1:
571bf215546Sopenharmony_ci      return bld->one;
572bf215546Sopenharmony_ci   default:
573bf215546Sopenharmony_ci      assert(0);
574bf215546Sopenharmony_ci      return bld->undef;
575bf215546Sopenharmony_ci   }
576bf215546Sopenharmony_ci}
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci
579bf215546Sopenharmony_ci/**
580bf215546Sopenharmony_ci * Extended swizzle of a SoA vector.
581bf215546Sopenharmony_ci *
582bf215546Sopenharmony_ci * @param bld         building context
583bf215546Sopenharmony_ci * @param unswizzled  array with the 4 unswizzled values
584bf215546Sopenharmony_ci * @param swizzles    array of PIPE_SWIZZLE_*
585bf215546Sopenharmony_ci * @param swizzled    output swizzled values
586bf215546Sopenharmony_ci */
587bf215546Sopenharmony_civoid
588bf215546Sopenharmony_cilp_build_swizzle_soa(struct lp_build_context *bld,
589bf215546Sopenharmony_ci                     const LLVMValueRef *unswizzled,
590bf215546Sopenharmony_ci                     const unsigned char swizzles[4],
591bf215546Sopenharmony_ci                     LLVMValueRef *swizzled)
592bf215546Sopenharmony_ci{
593bf215546Sopenharmony_ci   for (unsigned chan = 0; chan < 4; ++chan) {
594bf215546Sopenharmony_ci      swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
595bf215546Sopenharmony_ci                                                    swizzles[chan]);
596bf215546Sopenharmony_ci   }
597bf215546Sopenharmony_ci}
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci/**
601bf215546Sopenharmony_ci * Do an extended swizzle of a SoA vector inplace.
602bf215546Sopenharmony_ci *
603bf215546Sopenharmony_ci * @param bld         building context
604bf215546Sopenharmony_ci * @param values      intput/output array with the 4 values
605bf215546Sopenharmony_ci * @param swizzles    array of PIPE_SWIZZLE_*
606bf215546Sopenharmony_ci */
607bf215546Sopenharmony_civoid
608bf215546Sopenharmony_cilp_build_swizzle_soa_inplace(struct lp_build_context *bld,
609bf215546Sopenharmony_ci                             LLVMValueRef *values,
610bf215546Sopenharmony_ci                             const unsigned char swizzles[4])
611bf215546Sopenharmony_ci{
612bf215546Sopenharmony_ci   LLVMValueRef unswizzled[4];
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci   for (unsigned chan = 0; chan < 4; ++chan) {
615bf215546Sopenharmony_ci      unswizzled[chan] = values[chan];
616bf215546Sopenharmony_ci   }
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci   lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
619bf215546Sopenharmony_ci}
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci/**
623bf215546Sopenharmony_ci * Transpose from AOS <-> SOA
624bf215546Sopenharmony_ci *
625bf215546Sopenharmony_ci * @param single_type_lp   type of pixels
626bf215546Sopenharmony_ci * @param src              the 4 * n pixel input
627bf215546Sopenharmony_ci * @param dst              the 4 * n pixel output
628bf215546Sopenharmony_ci */
629bf215546Sopenharmony_civoid
630bf215546Sopenharmony_cilp_build_transpose_aos(struct gallivm_state *gallivm,
631bf215546Sopenharmony_ci                       struct lp_type single_type_lp,
632bf215546Sopenharmony_ci                       const LLVMValueRef src[4],
633bf215546Sopenharmony_ci                       LLVMValueRef dst[4])
634bf215546Sopenharmony_ci{
635bf215546Sopenharmony_ci   struct lp_type double_type_lp = single_type_lp;
636bf215546Sopenharmony_ci   double_type_lp.length >>= 1;
637bf215546Sopenharmony_ci   double_type_lp.width  <<= 1;
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   LLVMTypeRef double_type = lp_build_vec_type(gallivm, double_type_lp);
640bf215546Sopenharmony_ci   LLVMTypeRef single_type = lp_build_vec_type(gallivm, single_type_lp);
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_ci   LLVMValueRef double_type_zero = LLVMConstNull(double_type);
643bf215546Sopenharmony_ci   LLVMValueRef t0 = NULL, t1 = NULL, t2 = NULL, t3 = NULL;
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci   /* Interleave x, y, z, w -> xy and zw */
646bf215546Sopenharmony_ci   if (src[0] || src[1]) {
647bf215546Sopenharmony_ci      LLVMValueRef src0 = src[0];
648bf215546Sopenharmony_ci      LLVMValueRef src1 = src[1];
649bf215546Sopenharmony_ci      if (!src0)
650bf215546Sopenharmony_ci         src0 = LLVMConstNull(single_type);
651bf215546Sopenharmony_ci      if (!src1)
652bf215546Sopenharmony_ci         src1 = LLVMConstNull(single_type);
653bf215546Sopenharmony_ci      t0 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 0);
654bf215546Sopenharmony_ci      t2 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 1);
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci      /* Cast to double width type for second interleave */
657bf215546Sopenharmony_ci      t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
658bf215546Sopenharmony_ci      t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
659bf215546Sopenharmony_ci   }
660bf215546Sopenharmony_ci   if (src[2] || src[3]) {
661bf215546Sopenharmony_ci      LLVMValueRef src2 = src[2];
662bf215546Sopenharmony_ci      LLVMValueRef src3 = src[3];
663bf215546Sopenharmony_ci      if (!src2)
664bf215546Sopenharmony_ci         src2 = LLVMConstNull(single_type);
665bf215546Sopenharmony_ci      if (!src3)
666bf215546Sopenharmony_ci         src3 = LLVMConstNull(single_type);
667bf215546Sopenharmony_ci      t1 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 0);
668bf215546Sopenharmony_ci      t3 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 1);
669bf215546Sopenharmony_ci
670bf215546Sopenharmony_ci      /* Cast to double width type for second interleave */
671bf215546Sopenharmony_ci      t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
672bf215546Sopenharmony_ci      t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
673bf215546Sopenharmony_ci   }
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_ci   if (!t0)
676bf215546Sopenharmony_ci      t0 = double_type_zero;
677bf215546Sopenharmony_ci   if (!t1)
678bf215546Sopenharmony_ci      t1 = double_type_zero;
679bf215546Sopenharmony_ci   if (!t2)
680bf215546Sopenharmony_ci      t2 = double_type_zero;
681bf215546Sopenharmony_ci   if (!t3)
682bf215546Sopenharmony_ci      t3 = double_type_zero;
683bf215546Sopenharmony_ci
684bf215546Sopenharmony_ci   /* Interleave xy, zw -> xyzw */
685bf215546Sopenharmony_ci   dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
686bf215546Sopenharmony_ci   dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
687bf215546Sopenharmony_ci   dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
688bf215546Sopenharmony_ci   dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci   /* Cast back to original single width type */
691bf215546Sopenharmony_ci   dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
692bf215546Sopenharmony_ci   dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
693bf215546Sopenharmony_ci   dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
694bf215546Sopenharmony_ci   dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
695bf215546Sopenharmony_ci}
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci/**
699bf215546Sopenharmony_ci * Transpose from AOS <-> SOA for num_srcs
700bf215546Sopenharmony_ci */
701bf215546Sopenharmony_civoid
702bf215546Sopenharmony_cilp_build_transpose_aos_n(struct gallivm_state *gallivm,
703bf215546Sopenharmony_ci                         struct lp_type type,
704bf215546Sopenharmony_ci                         const LLVMValueRef* src,
705bf215546Sopenharmony_ci                         unsigned num_srcs,
706bf215546Sopenharmony_ci                         LLVMValueRef* dst)
707bf215546Sopenharmony_ci{
708bf215546Sopenharmony_ci   switch (num_srcs) {
709bf215546Sopenharmony_ci   case 1:
710bf215546Sopenharmony_ci      dst[0] = src[0];
711bf215546Sopenharmony_ci      break;
712bf215546Sopenharmony_ci   case 2:
713bf215546Sopenharmony_ci   {
714bf215546Sopenharmony_ci      /* Note: we must use a temporary incase src == dst */
715bf215546Sopenharmony_ci      LLVMValueRef lo, hi;
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci      lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
718bf215546Sopenharmony_ci      hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci      dst[0] = lo;
721bf215546Sopenharmony_ci      dst[1] = hi;
722bf215546Sopenharmony_ci      break;
723bf215546Sopenharmony_ci   }
724bf215546Sopenharmony_ci   case 4:
725bf215546Sopenharmony_ci      lp_build_transpose_aos(gallivm, type, src, dst);
726bf215546Sopenharmony_ci      break;
727bf215546Sopenharmony_ci   default:
728bf215546Sopenharmony_ci      assert(0);
729bf215546Sopenharmony_ci   }
730bf215546Sopenharmony_ci}
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci/**
734bf215546Sopenharmony_ci * Pack n-th element of aos values,
735bf215546Sopenharmony_ci * pad out to destination size.
736bf215546Sopenharmony_ci * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
737bf215546Sopenharmony_ci */
738bf215546Sopenharmony_ciLLVMValueRef
739bf215546Sopenharmony_cilp_build_pack_aos_scalars(struct gallivm_state *gallivm,
740bf215546Sopenharmony_ci                          struct lp_type src_type,
741bf215546Sopenharmony_ci                          struct lp_type dst_type,
742bf215546Sopenharmony_ci                          const LLVMValueRef src,
743bf215546Sopenharmony_ci                          unsigned channel)
744bf215546Sopenharmony_ci{
745bf215546Sopenharmony_ci   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
746bf215546Sopenharmony_ci   LLVMValueRef undef = LLVMGetUndef(i32t);
747bf215546Sopenharmony_ci   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
748bf215546Sopenharmony_ci   unsigned num_src = src_type.length / 4;
749bf215546Sopenharmony_ci   unsigned num_dst = dst_type.length;
750bf215546Sopenharmony_ci
751bf215546Sopenharmony_ci   assert(num_src <= num_dst);
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_src; i++) {
754bf215546Sopenharmony_ci      shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
755bf215546Sopenharmony_ci   }
756bf215546Sopenharmony_ci   for (unsigned i = num_src; i < num_dst; i++) {
757bf215546Sopenharmony_ci      shuffles[i] = undef;
758bf215546Sopenharmony_ci   }
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ci   if (num_dst == 1) {
761bf215546Sopenharmony_ci      return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
762bf215546Sopenharmony_ci   }
763bf215546Sopenharmony_ci   else {
764bf215546Sopenharmony_ci      return LLVMBuildShuffleVector(gallivm->builder, src, src,
765bf215546Sopenharmony_ci                                    LLVMConstVector(shuffles, num_dst), "");
766bf215546Sopenharmony_ci   }
767bf215546Sopenharmony_ci}
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci
770bf215546Sopenharmony_ci/**
771bf215546Sopenharmony_ci * Unpack and broadcast packed aos values consisting of only the
772bf215546Sopenharmony_ci * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
773bf215546Sopenharmony_ci */
774bf215546Sopenharmony_ciLLVMValueRef
775bf215546Sopenharmony_cilp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
776bf215546Sopenharmony_ci                                      struct lp_type src_type,
777bf215546Sopenharmony_ci                                      struct lp_type dst_type,
778bf215546Sopenharmony_ci                                      const LLVMValueRef src)
779bf215546Sopenharmony_ci{
780bf215546Sopenharmony_ci   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
781bf215546Sopenharmony_ci   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
782bf215546Sopenharmony_ci   unsigned num_dst = dst_type.length;
783bf215546Sopenharmony_ci   unsigned num_src = dst_type.length / 4;
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci   assert(num_dst / 4 <= src_type.length);
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_src; i++) {
788bf215546Sopenharmony_ci      shuffles[i*4] = LLVMConstInt(i32t, i, 0);
789bf215546Sopenharmony_ci      shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
790bf215546Sopenharmony_ci      shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
791bf215546Sopenharmony_ci      shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
792bf215546Sopenharmony_ci   }
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci   if (num_src == 1) {
795bf215546Sopenharmony_ci      return lp_build_extract_broadcast(gallivm, src_type, dst_type,
796bf215546Sopenharmony_ci                                        src, shuffles[0]);
797bf215546Sopenharmony_ci   } else {
798bf215546Sopenharmony_ci      return LLVMBuildShuffleVector(gallivm->builder, src, src,
799bf215546Sopenharmony_ci                                    LLVMConstVector(shuffles, num_dst), "");
800bf215546Sopenharmony_ci   }
801bf215546Sopenharmony_ci}
802bf215546Sopenharmony_ci
803