Lines Matching refs:items

92   const __m128i v = *(const __m128i *)(const void *)(items + (i) * 8); \

93 *( __m128i *)( void *)(items + (i) * 8) = \
105 SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim)
110 SWAP2_128(0) SWAP2_128(1) items += 2 * 8;
111 SWAP2_128(0) SWAP2_128(1) items += 2 * 8;
113 while (items != lim);
119 __m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \
126 *(__m128i *)(void *)(items + (i) * 4) = _mm_packus_epi16(v0, v1); }
134 SwapBytes4_128_pack(CSwapUInt32 *items, const CSwapUInt32 *lim)
141 SWAP4_128_pack(0); items += 1 * 4;
142 // SWAP4_128_pack(0); SWAP4_128_pack(1); items += 2 * 4;
144 while (items != lim);
149 __m128i v = *(const __m128i *)(const void *)(items + (i) * 4); \
157 *(__m128i *)(void *)(items + (i) * 4) = _mm_or_si128(v2, v); }
165 SwapBytes4_128_shift(CSwapUInt32 *items, const CSwapUInt32 *lim)
172 // SWAP4_128_shift(0) SWAP4_128_shift(1) items += 2 * 4;
173 // SWAP4_128_shift(0) SWAP4_128_shift(1) items += 2 * 4;
174 SWAP4_128_shift(0); items += 1 * 4;
176 while (items != lim);
236 #define SHUF_128(i) *(items + (i)) = \
237 _mm_shuffle_epi8(*(items + (i)), mask); // SSSE3
249 __m128i *items = (__m128i *)items8;
260 SHUF_128(0) SHUF_128(1) items += 2;
261 SHUF_128(0) SHUF_128(1) items += 2;
263 while (items != lim);
278 #define SHUF_256(i) *(items + (i)) = \
279 _mm256_shuffle_epi8(*(items + (i)), mask); // AVX2
291 __m256i *items = (__m256i *)items8;
320 SHUF_256(0) SHUF_256(1) items += 2;
321 SHUF_256(0) SHUF_256(1) items += 2;
323 while (items != lim);
371 #define SWAP2_128(i) *(uint8x16_t *) (void *)(items + (i) * 8) = \
372 vrev16q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 8));
373 #define SWAP4_128(i) *(uint8x16_t *) (void *)(items + (i) * 4) = \
374 vrev32q_u8(*(const uint8x16_t *)(const void *)(items + (i) * 4));
384 SwapBytes2_128(CSwapUInt16 *items, const CSwapUInt16 *lim)
389 SWAP2_128(0) SWAP2_128(1) items += 2 * 8;
390 SWAP2_128(0) SWAP2_128(1) items += 2 * 8;
392 while (items != lim);
403 SwapBytes4_128(CSwapUInt32 *items, const CSwapUInt32 *lim)
408 SWAP4_128(0) SWAP4_128(1) items += 2 * 4;
409 SWAP4_128(0) SWAP4_128(1) items += 2 * 4;
411 while (items != lim);
429 #define SWAP2_16(i) { UInt32 v = items[i]; v += (v << 16); v >>= 8; items[i] = (CSwapUInt16)v; }
432 #define SWAP2_16(i) { items[i] = _rotr16(items[i], 8); }
435 #define SWAP2_16(i) { CSwapUInt16 v = items[i]; items[i] = Z7_BSWAP16(v); }
439 #define SWAP4_32(i) { CSwapUInt32 v = items[i]; items[i] = Z7_BSWAP32(v); }
442 { UInt32 v = items[i]; \
445 items[i] = v; }
505 UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 4); \
507 *(UInt64 *)(void *)(items + (i) * 4) = v; }
510 SwapBytes2_64(CSwapUInt16 *items, const CSwapUInt16 *lim)
515 SWAP2_64(0) SWAP2_64(1) items += 2 * 4;
516 SWAP2_64(0) SWAP2_64(1) items += 2 * 4;
518 while (items != lim);
534 UInt64 v = *(const UInt64 *)(const void *)(items + (i) * 2); \
536 *(UInt64 *)(void *)(items + (i) * 2) = v; }
539 SwapBytes4_64(CSwapUInt32 *items, const CSwapUInt32 *lim)
544 SWAP4_64(0) SWAP4_64(1) items += 2 * 2;
545 SWAP4_64(0) SWAP4_64(1) items += 2 * 2;
547 while (items != lim);
582 UInt32 v = *(const UInt32 *)(const void *)(items + (i) * 2); \
584 *(UInt32 *)(void *)(items + (i) * 2) = v; }
588 SwapBytes2_32(CSwapUInt16 *items, const CSwapUInt16 *lim)
593 SWAP2_32(0) SWAP2_32(1) items += 2 * 2;
594 SWAP2_32(0) SWAP2_32(1) items += 2 * 2;
596 while (items != lim);
601 SwapBytes4_32(CSwapUInt32 *items, const CSwapUInt32 *lim)
606 SWAP4_32(0) SWAP4_32(1) items += 2;
607 SWAP4_32(0) SWAP4_32(1) items += 2;
609 while (items != lim);
634 void z7_SwapBytes2(CSwapUInt16 *items, size_t numItems)
637 for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--)
640 items++;
648 lim = items + numItems2;
655 ShufBytes_256((__m256i *)(void *)items,
662 ShufBytes_128((__m128i *)(void *)items,
670 SwapBytes2_128(items, lim);
674 DEFAULT_Swap2(items, lim);
676 items = lim;
682 items++;
688 void z7_SwapBytes4(CSwapUInt32 *items, size_t numItems)
691 for (; numItems != 0 && ((unsigned)(ptrdiff_t)items & (SWAP_VECTOR_ALIGN_SIZE - 1)) != 0; numItems--)
694 items++;
702 lim = items + numItems2;
709 ShufBytes_256((__m256i *)(void *)items,
716 ShufBytes_128((__m128i *)(void *)items,
724 SwapBytes4_128(items, lim);
728 DEFAULT_Swap4(items, lim);
730 items = lim;
736 items++;