1/************************************************************************** 2 * 3 * Copyright 2009 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * @file 30 * Helper functions for swizzling/shuffling. 31 * 32 * @author Jose Fonseca <jfonseca@vmware.com> 33 */ 34 35#include <inttypes.h> /* for PRIx64 macro */ 36#include "util/compiler.h" 37#include "util/u_debug.h" 38 39#include "lp_bld_type.h" 40#include "lp_bld_const.h" 41#include "lp_bld_init.h" 42#include "lp_bld_logic.h" 43#include "lp_bld_swizzle.h" 44#include "lp_bld_pack.h" 45 46 47LLVMValueRef 48lp_build_broadcast(struct gallivm_state *gallivm, 49 LLVMTypeRef vec_type, 50 LLVMValueRef scalar) 51{ 52 LLVMValueRef res; 53 54 if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) { 55 /* scalar */ 56 assert(vec_type == LLVMTypeOf(scalar)); 57 res = scalar; 58 } else { 59 LLVMBuilderRef builder = gallivm->builder; 60 const unsigned length = LLVMGetVectorSize(vec_type); 61 LLVMValueRef undef = LLVMGetUndef(vec_type); 62 /* The shuffle vector is always made of int32 elements */ 63 LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context); 64 LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length); 65 66 assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar)); 67 68 res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), ""); 69 res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), ""); 70 } 71 72 return res; 73} 74 75 76/** 77 * Broadcast 78 */ 79LLVMValueRef 80lp_build_broadcast_scalar(struct lp_build_context *bld, 81 LLVMValueRef scalar) 82{ 83 assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar))); 84 85 return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar); 86} 87 88 89/** 90 * Combined extract and broadcast (mere shuffle in most cases) 91 */ 92LLVMValueRef 93lp_build_extract_broadcast(struct gallivm_state *gallivm, 94 struct lp_type src_type, 95 struct lp_type dst_type, 96 LLVMValueRef vector, 97 LLVMValueRef index) 98{ 99 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 100 LLVMValueRef res; 101 102 assert(src_type.floating == dst_type.floating); 103 assert(src_type.width == dst_type.width); 104 105 assert(lp_check_value(src_type, vector)); 106 assert(LLVMTypeOf(index) == i32t); 107 108 if (src_type.length == 1) { 109 if (dst_type.length == 1) { 110 /* 111 * Trivial scalar -> scalar. 112 */ 113 res = vector; 114 } else { 115 /* 116 * Broadcast scalar -> vector. 117 */ 118 res = lp_build_broadcast(gallivm, 119 lp_build_vec_type(gallivm, dst_type), 120 vector); 121 } 122 } else { 123 if (dst_type.length > 1) { 124 /* 125 * shuffle - result can be of different length. 126 */ 127 LLVMValueRef shuffle; 128 shuffle = lp_build_broadcast(gallivm, 129 LLVMVectorType(i32t, dst_type.length), 130 index); 131 res = LLVMBuildShuffleVector(gallivm->builder, vector, 132 LLVMGetUndef(lp_build_vec_type(gallivm, src_type)), 133 shuffle, ""); 134 } else { 135 /* 136 * Trivial extract scalar from vector. 137 */ 138 res = LLVMBuildExtractElement(gallivm->builder, vector, index, ""); 139 } 140 } 141 142 return res; 143} 144 145 146/** 147 * Swizzle one channel into other channels. 148 */ 149LLVMValueRef 150lp_build_swizzle_scalar_aos(struct lp_build_context *bld, 151 LLVMValueRef a, 152 unsigned channel, 153 unsigned num_channels) 154{ 155 LLVMBuilderRef builder = bld->gallivm->builder; 156 const struct lp_type type = bld->type; 157 const unsigned n = type.length; 158 159 if (a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1) 160 return a; 161 162 assert(num_channels == 2 || num_channels == 4); 163 164 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing 165 * using shuffles here actually causes worst results. More investigation is 166 * needed. */ 167 if (LLVMIsConstant(a) || type.width >= 16) { 168 /* 169 * Shuffle. 170 */ 171 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); 172 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 173 174 for (unsigned j = 0; j < n; j += num_channels) 175 for (unsigned i = 0; i < num_channels; ++i) 176 shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); 177 178 return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); 179 } else if (num_channels == 2) { 180 /* 181 * Bit mask and shifts 182 * 183 * XY XY .... XY <= input 184 * 0Y 0Y .... 0Y 185 * YY YY .... YY 186 * YY YY .... YY <= output 187 */ 188 struct lp_type type2; 189 LLVMValueRef tmp = NULL; 190 int shift; 191 192 a = LLVMBuildAnd(builder, a, 193 lp_build_const_mask_aos(bld->gallivm, 194 type, 1 << channel, num_channels), ""); 195 196 type2 = type; 197 type2.floating = FALSE; 198 type2.width *= 2; 199 type2.length /= 2; 200 201 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), ""); 202 203 /* 204 * Vector element 0 is always channel X. 205 * 206 * 76 54 32 10 (array numbering) 207 * Little endian reg in: YX YX YX YX 208 * Little endian reg out: YY YY YY YY if shift right (shift == -1) 209 * XX XX XX XX if shift left (shift == 1) 210 * 211 * 01 23 45 67 (array numbering) 212 * Big endian reg in: XY XY XY XY 213 * Big endian reg out: YY YY YY YY if shift left (shift == 1) 214 * XX XX XX XX if shift right (shift == -1) 215 * 216 */ 217#if UTIL_ARCH_LITTLE_ENDIAN 218 shift = channel == 0 ? 1 : -1; 219#else 220 shift = channel == 0 ? -1 : 1; 221#endif 222 223 if (shift > 0) { 224 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), ""); 225 } else if (shift < 0) { 226 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), ""); 227 } 228 229 assert(tmp); 230 if (tmp) { 231 a = LLVMBuildOr(builder, a, tmp, ""); 232 } 233 234 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); 235 } else { 236 /* 237 * Bit mask and recursive shifts 238 * 239 * Little-endian registers: 240 * 241 * 7654 3210 242 * WZYX WZYX .... WZYX <= input 243 * 00Y0 00Y0 .... 00Y0 <= mask 244 * 00YY 00YY .... 00YY <= shift right 1 (shift amount -1) 245 * YYYY YYYY .... YYYY <= shift left 2 (shift amount 2) 246 * 247 * Big-endian registers: 248 * 249 * 0123 4567 250 * XYZW XYZW .... XYZW <= input 251 * 0Y00 0Y00 .... 0Y00 <= mask 252 * YY00 YY00 .... YY00 <= shift left 1 (shift amount 1) 253 * YYYY YYYY .... YYYY <= shift right 2 (shift amount -2) 254 * 255 * shifts[] gives little-endian shift amounts; we need to negate for big-endian. 256 */ 257 static const int shifts[4][2] = { 258 { 1, 2}, 259 {-1, 2}, 260 { 1, -2}, 261 {-1, -2} 262 }; 263 264 a = LLVMBuildAnd(builder, a, 265 lp_build_const_mask_aos(bld->gallivm, 266 type, 1 << channel, 4), ""); 267 268 /* 269 * Build a type where each element is an integer that cover the four 270 * channels. 271 */ 272 273 struct lp_type type4 = type; 274 type4.floating = FALSE; 275 type4.width *= 4; 276 type4.length /= 4; 277 278 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 279 280 for (unsigned i = 0; i < 2; ++i) { 281 LLVMValueRef tmp = NULL; 282 int shift = shifts[channel][i]; 283 284 /* See endianness diagram above */ 285#if UTIL_ARCH_BIG_ENDIAN 286 shift = -shift; 287#endif 288 289 if (shift > 0) 290 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 291 if (shift < 0) 292 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 293 294 assert(tmp); 295 if (tmp) 296 a = LLVMBuildOr(builder, a, tmp, ""); 297 } 298 299 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); 300 } 301} 302 303 304/** 305 * Swizzle a vector consisting of an array of XYZW structs. 306 * 307 * This fills a vector of dst_len length with the swizzled channels from src. 308 * 309 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in 310 * RGBA RGBA = BGR BGR BG 311 * 312 * @param swizzles the swizzle array 313 * @param num_swizzles the number of elements in swizzles 314 * @param dst_len the length of the result 315 */ 316LLVMValueRef 317lp_build_swizzle_aos_n(struct gallivm_state* gallivm, 318 LLVMValueRef src, 319 const unsigned char* swizzles, 320 unsigned num_swizzles, 321 unsigned dst_len) 322{ 323 LLVMBuilderRef builder = gallivm->builder; 324 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH]; 325 326 assert(dst_len < LP_MAX_VECTOR_WIDTH); 327 328 for (unsigned i = 0; i < dst_len; ++i) { 329 int swizzle = swizzles[i % num_swizzles]; 330 331 if (swizzle == LP_BLD_SWIZZLE_DONTCARE) { 332 shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); 333 } else { 334 shuffles[i] = lp_build_const_int32(gallivm, swizzle); 335 } 336 } 337 338 return LLVMBuildShuffleVector(builder, src, 339 LLVMGetUndef(LLVMTypeOf(src)), 340 LLVMConstVector(shuffles, dst_len), ""); 341} 342 343 344LLVMValueRef 345lp_build_swizzle_aos(struct lp_build_context *bld, 346 LLVMValueRef a, 347 const unsigned char swizzles[4]) 348{ 349 LLVMBuilderRef builder = bld->gallivm->builder; 350 const struct lp_type type = bld->type; 351 const unsigned n = type.length; 352 353 if (swizzles[0] == PIPE_SWIZZLE_X && 354 swizzles[1] == PIPE_SWIZZLE_Y && 355 swizzles[2] == PIPE_SWIZZLE_Z && 356 swizzles[3] == PIPE_SWIZZLE_W) { 357 return a; 358 } 359 360 if (swizzles[0] == swizzles[1] && 361 swizzles[1] == swizzles[2] && 362 swizzles[2] == swizzles[3]) { 363 switch (swizzles[0]) { 364 case PIPE_SWIZZLE_X: 365 case PIPE_SWIZZLE_Y: 366 case PIPE_SWIZZLE_Z: 367 case PIPE_SWIZZLE_W: 368 return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4); 369 case PIPE_SWIZZLE_0: 370 return bld->zero; 371 case PIPE_SWIZZLE_1: 372 return bld->one; 373 case LP_BLD_SWIZZLE_DONTCARE: 374 return bld->undef; 375 default: 376 assert(0); 377 return bld->undef; 378 } 379 } 380 381 if (LLVMIsConstant(a) || 382 type.width >= 16) { 383 /* 384 * Shuffle. 385 */ 386 LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type)); 387 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context); 388 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 389 LLVMValueRef aux[LP_MAX_VECTOR_LENGTH]; 390 391 memset(aux, 0, sizeof aux); 392 393 for (unsigned j = 0; j < n; j += 4) { 394 for (unsigned i = 0; i < 4; ++i) { 395 unsigned shuffle; 396 switch (swizzles[i]) { 397 default: 398 assert(0); 399 case PIPE_SWIZZLE_X: 400 case PIPE_SWIZZLE_Y: 401 case PIPE_SWIZZLE_Z: 402 case PIPE_SWIZZLE_W: 403 shuffle = j + swizzles[i]; 404 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 405 break; 406 case PIPE_SWIZZLE_0: 407 shuffle = type.length + 0; 408 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 409 if (!aux[0]) { 410 aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0); 411 } 412 break; 413 case PIPE_SWIZZLE_1: 414 shuffle = type.length + 1; 415 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0); 416 if (!aux[1]) { 417 aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0); 418 } 419 break; 420 case LP_BLD_SWIZZLE_DONTCARE: 421 shuffles[j + i] = LLVMGetUndef(i32t); 422 break; 423 } 424 } 425 } 426 427 for (unsigned i = 0; i < n; ++i) { 428 if (!aux[i]) { 429 aux[i] = undef; 430 } 431 } 432 433 return LLVMBuildShuffleVector(builder, a, 434 LLVMConstVector(aux, n), 435 LLVMConstVector(shuffles, n), ""); 436 } else { 437 /* 438 * Bit mask and shifts. 439 * 440 * For example, this will convert BGRA to RGBA by doing 441 * 442 * Little endian: 443 * rgba = (bgra & 0x00ff0000) >> 16 444 * | (bgra & 0xff00ff00) 445 * | (bgra & 0x000000ff) << 16 446 * 447 * Big endian:A 448 * rgba = (bgra & 0x0000ff00) << 16 449 * | (bgra & 0x00ff00ff) 450 * | (bgra & 0xff000000) >> 16 451 * 452 * This is necessary not only for faster cause, but because X86 backend 453 * will refuse shuffles of <4 x i8> vectors 454 */ 455 456 /* 457 * Start with a mixture of 1 and 0. 458 */ 459 unsigned cond = 0; 460 for (unsigned chan = 0; chan < 4; ++chan) { 461 if (swizzles[chan] == PIPE_SWIZZLE_1) { 462 cond |= 1 << chan; 463 } 464 } 465 LLVMValueRef res = 466 lp_build_select_aos(bld, cond, bld->one, bld->zero, 4); 467 468 /* 469 * Build a type where each element is an integer that cover the four 470 * channels. 471 */ 472 struct lp_type type4 = type; 473 type4.floating = FALSE; 474 type4.width *= 4; 475 type4.length /= 4; 476 477 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), ""); 478 res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), ""); 479 480 /* 481 * Mask and shift the channels, trying to group as many channels in the 482 * same shift as possible. The shift amount is positive for shifts left 483 * and negative for shifts right. 484 */ 485 for (int shift = -3; shift <= 3; ++shift) { 486 uint64_t mask = 0; 487 488 assert(type4.width <= sizeof(mask)*8); 489 490 /* 491 * Vector element numbers follow the XYZW order, so 0 is always X, 492 * etc. After widening 4 times we have: 493 * 494 * 3210 495 * Little-endian register layout: WZYX 496 * 497 * 0123 498 * Big-endian register layout: XYZW 499 * 500 * For little-endian, higher-numbered channels are obtained by a 501 * shift right (negative shift amount) and lower-numbered channels by 502 * a shift left (positive shift amount). The opposite is true for 503 * big-endian. 504 */ 505 for (unsigned chan = 0; chan < 4; ++chan) { 506 if (swizzles[chan] < 4) { 507 /* We need to move channel swizzles[chan] into channel chan */ 508#if UTIL_ARCH_LITTLE_ENDIAN 509 if (swizzles[chan] - chan == -shift) { 510 mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width); 511 } 512#else 513 if (swizzles[chan] - chan == shift) { 514 mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width); 515 } 516#endif 517 } 518 } 519 520 if (mask) { 521 LLVMValueRef masked; 522 LLVMValueRef shifted; 523 if (0) 524 debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask); 525 526 masked = LLVMBuildAnd(builder, a, 527 lp_build_const_int_vec(bld->gallivm, type4, mask), ""); 528 if (shift > 0) { 529 shifted = LLVMBuildShl(builder, masked, 530 lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), ""); 531 } else if (shift < 0) { 532 shifted = LLVMBuildLShr(builder, masked, 533 lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), ""); 534 } else { 535 shifted = masked; 536 } 537 538 res = LLVMBuildOr(builder, res, shifted, ""); 539 } 540 } 541 542 return LLVMBuildBitCast(builder, res, 543 lp_build_vec_type(bld->gallivm, type), ""); 544 } 545} 546 547 548/** 549 * Extended swizzle of a single channel of a SoA vector. 550 * 551 * @param bld building context 552 * @param unswizzled array with the 4 unswizzled values 553 * @param swizzle one of the PIPE_SWIZZLE_* 554 * 555 * @return the swizzled value. 556 */ 557LLVMValueRef 558lp_build_swizzle_soa_channel(struct lp_build_context *bld, 559 const LLVMValueRef *unswizzled, 560 enum pipe_swizzle swizzle) 561{ 562 switch (swizzle) { 563 case PIPE_SWIZZLE_X: 564 case PIPE_SWIZZLE_Y: 565 case PIPE_SWIZZLE_Z: 566 case PIPE_SWIZZLE_W: 567 return unswizzled[swizzle]; 568 case PIPE_SWIZZLE_0: 569 return bld->zero; 570 case PIPE_SWIZZLE_1: 571 return bld->one; 572 default: 573 assert(0); 574 return bld->undef; 575 } 576} 577 578 579/** 580 * Extended swizzle of a SoA vector. 581 * 582 * @param bld building context 583 * @param unswizzled array with the 4 unswizzled values 584 * @param swizzles array of PIPE_SWIZZLE_* 585 * @param swizzled output swizzled values 586 */ 587void 588lp_build_swizzle_soa(struct lp_build_context *bld, 589 const LLVMValueRef *unswizzled, 590 const unsigned char swizzles[4], 591 LLVMValueRef *swizzled) 592{ 593 for (unsigned chan = 0; chan < 4; ++chan) { 594 swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled, 595 swizzles[chan]); 596 } 597} 598 599 600/** 601 * Do an extended swizzle of a SoA vector inplace. 602 * 603 * @param bld building context 604 * @param values intput/output array with the 4 values 605 * @param swizzles array of PIPE_SWIZZLE_* 606 */ 607void 608lp_build_swizzle_soa_inplace(struct lp_build_context *bld, 609 LLVMValueRef *values, 610 const unsigned char swizzles[4]) 611{ 612 LLVMValueRef unswizzled[4]; 613 614 for (unsigned chan = 0; chan < 4; ++chan) { 615 unswizzled[chan] = values[chan]; 616 } 617 618 lp_build_swizzle_soa(bld, unswizzled, swizzles, values); 619} 620 621 622/** 623 * Transpose from AOS <-> SOA 624 * 625 * @param single_type_lp type of pixels 626 * @param src the 4 * n pixel input 627 * @param dst the 4 * n pixel output 628 */ 629void 630lp_build_transpose_aos(struct gallivm_state *gallivm, 631 struct lp_type single_type_lp, 632 const LLVMValueRef src[4], 633 LLVMValueRef dst[4]) 634{ 635 struct lp_type double_type_lp = single_type_lp; 636 double_type_lp.length >>= 1; 637 double_type_lp.width <<= 1; 638 639 LLVMTypeRef double_type = lp_build_vec_type(gallivm, double_type_lp); 640 LLVMTypeRef single_type = lp_build_vec_type(gallivm, single_type_lp); 641 642 LLVMValueRef double_type_zero = LLVMConstNull(double_type); 643 LLVMValueRef t0 = NULL, t1 = NULL, t2 = NULL, t3 = NULL; 644 645 /* Interleave x, y, z, w -> xy and zw */ 646 if (src[0] || src[1]) { 647 LLVMValueRef src0 = src[0]; 648 LLVMValueRef src1 = src[1]; 649 if (!src0) 650 src0 = LLVMConstNull(single_type); 651 if (!src1) 652 src1 = LLVMConstNull(single_type); 653 t0 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 0); 654 t2 = lp_build_interleave2_half(gallivm, single_type_lp, src0, src1, 1); 655 656 /* Cast to double width type for second interleave */ 657 t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0"); 658 t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2"); 659 } 660 if (src[2] || src[3]) { 661 LLVMValueRef src2 = src[2]; 662 LLVMValueRef src3 = src[3]; 663 if (!src2) 664 src2 = LLVMConstNull(single_type); 665 if (!src3) 666 src3 = LLVMConstNull(single_type); 667 t1 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 0); 668 t3 = lp_build_interleave2_half(gallivm, single_type_lp, src2, src3, 1); 669 670 /* Cast to double width type for second interleave */ 671 t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1"); 672 t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3"); 673 } 674 675 if (!t0) 676 t0 = double_type_zero; 677 if (!t1) 678 t1 = double_type_zero; 679 if (!t2) 680 t2 = double_type_zero; 681 if (!t3) 682 t3 = double_type_zero; 683 684 /* Interleave xy, zw -> xyzw */ 685 dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0); 686 dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1); 687 dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0); 688 dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1); 689 690 /* Cast back to original single width type */ 691 dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0"); 692 dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1"); 693 dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2"); 694 dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3"); 695} 696 697 698/** 699 * Transpose from AOS <-> SOA for num_srcs 700 */ 701void 702lp_build_transpose_aos_n(struct gallivm_state *gallivm, 703 struct lp_type type, 704 const LLVMValueRef* src, 705 unsigned num_srcs, 706 LLVMValueRef* dst) 707{ 708 switch (num_srcs) { 709 case 1: 710 dst[0] = src[0]; 711 break; 712 case 2: 713 { 714 /* Note: we must use a temporary incase src == dst */ 715 LLVMValueRef lo, hi; 716 717 lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0); 718 hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1); 719 720 dst[0] = lo; 721 dst[1] = hi; 722 break; 723 } 724 case 4: 725 lp_build_transpose_aos(gallivm, type, src, dst); 726 break; 727 default: 728 assert(0); 729 } 730} 731 732 733/** 734 * Pack n-th element of aos values, 735 * pad out to destination size. 736 * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _ 737 */ 738LLVMValueRef 739lp_build_pack_aos_scalars(struct gallivm_state *gallivm, 740 struct lp_type src_type, 741 struct lp_type dst_type, 742 const LLVMValueRef src, 743 unsigned channel) 744{ 745 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 746 LLVMValueRef undef = LLVMGetUndef(i32t); 747 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 748 unsigned num_src = src_type.length / 4; 749 unsigned num_dst = dst_type.length; 750 751 assert(num_src <= num_dst); 752 753 for (unsigned i = 0; i < num_src; i++) { 754 shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0); 755 } 756 for (unsigned i = num_src; i < num_dst; i++) { 757 shuffles[i] = undef; 758 } 759 760 if (num_dst == 1) { 761 return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], ""); 762 } 763 else { 764 return LLVMBuildShuffleVector(gallivm->builder, src, src, 765 LLVMConstVector(shuffles, num_dst), ""); 766 } 767} 768 769 770/** 771 * Unpack and broadcast packed aos values consisting of only the 772 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2 773 */ 774LLVMValueRef 775lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm, 776 struct lp_type src_type, 777 struct lp_type dst_type, 778 const LLVMValueRef src) 779{ 780 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); 781 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; 782 unsigned num_dst = dst_type.length; 783 unsigned num_src = dst_type.length / 4; 784 785 assert(num_dst / 4 <= src_type.length); 786 787 for (unsigned i = 0; i < num_src; i++) { 788 shuffles[i*4] = LLVMConstInt(i32t, i, 0); 789 shuffles[i*4+1] = LLVMConstInt(i32t, i, 0); 790 shuffles[i*4+2] = LLVMConstInt(i32t, i, 0); 791 shuffles[i*4+3] = LLVMConstInt(i32t, i, 0); 792 } 793 794 if (num_src == 1) { 795 return lp_build_extract_broadcast(gallivm, src_type, dst_type, 796 src, shuffles[0]); 797 } else { 798 return LLVMBuildShuffleVector(gallivm->builder, src, src, 799 LLVMConstVector(shuffles, num_dst), ""); 800 } 801} 802 803