Lines Matching defs:vint4
189 // vint4 data type
195 struct vint4
200 ASTCENC_SIMD_INLINE vint4() = default;
208 ASTCENC_SIMD_INLINE explicit vint4(const int *p)
216 ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p)
234 ASTCENC_SIMD_INLINE explicit vint4(int a)
244 ASTCENC_SIMD_INLINE explicit vint4(int a, int b, int c, int d)
252 ASTCENC_SIMD_INLINE explicit vint4(__m128i a)
283 static ASTCENC_SIMD_INLINE vint4 zero()
285 return vint4(_mm_setzero_si128());
291 static ASTCENC_SIMD_INLINE vint4 load1(const int* p)
293 return vint4(*p);
299 static ASTCENC_SIMD_INLINE vint4 load(const uint8_t* p)
302 return vint4(_mm_lddqu_si128(reinterpret_cast<const __m128i*>(p)));
304 return vint4(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p)));
311 static ASTCENC_SIMD_INLINE vint4 loada(const int* p)
313 return vint4(_mm_load_si128(reinterpret_cast<const __m128i*>(p)));
319 static ASTCENC_SIMD_INLINE vint4 lane_id()
321 return vint4(_mm_set_epi32(3, 2, 1, 0));
360 vint4 mask(a == false ? 0 : -1);
371 vint4 mask(a == false ? 0 : -1,
440 // vint4 operators and functions
446 ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, vint4 b)
448 return vint4(_mm_add_epi32(a.m, b.m));
454 ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, vint4 b)
456 return vint4(_mm_sub_epi32(a.m, b.m));
462 ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, vint4 b)
465 return vint4(_mm_mullo_epi32 (a.m, b.m));
474 return vint4(r);
481 ASTCENC_SIMD_INLINE vint4 operator~(vint4 a)
483 return vint4(_mm_xor_si128(a.m, _mm_set1_epi32(-1)));
489 ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, vint4 b)
491 return vint4(_mm_or_si128(a.m, b.m));
497 ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, vint4 b)
499 return vint4(_mm_and_si128(a.m, b.m));
505 ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, vint4 b)
507 return vint4(_mm_xor_si128(a.m, b.m));
513 ASTCENC_SIMD_INLINE vmask4 operator==(vint4 a, vint4 b)
521 ASTCENC_SIMD_INLINE vmask4 operator!=(vint4 a, vint4 b)
529 ASTCENC_SIMD_INLINE vmask4 operator<(vint4 a, vint4 b)
537 ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b)
545 template <int s> ASTCENC_SIMD_INLINE vint4 lsl(vint4 a)
547 return vint4(_mm_slli_epi32(a.m, s));
553 template <int s> ASTCENC_SIMD_INLINE vint4 lsr(vint4 a)
555 return vint4(_mm_srli_epi32(a.m, s));
561 template <int s> ASTCENC_SIMD_INLINE vint4 asr(vint4 a)
563 return vint4(_mm_srai_epi32(a.m, s));
569 ASTCENC_SIMD_INLINE vint4 min(vint4 a, vint4 b)
572 return vint4(_mm_min_epi32(a.m, b.m));
577 return vint4(_mm_or_si128(ap,bp));
584 ASTCENC_SIMD_INLINE vint4 max(vint4 a, vint4 b)
587 return vint4(_mm_max_epi32(a.m, b.m));
592 return vint4(_mm_or_si128(ap,bp));
599 ASTCENC_SIMD_INLINE vint4 hmin(vint4 a)
601 a = min(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 3, 2))));
602 a = min(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 1))));
603 return vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 0)));
609 ASTCENC_SIMD_INLINE vint4 hmax(vint4 a)
611 a = max(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 3, 2))));
612 a = max(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 1))));
613 return vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 0)));
619 ASTCENC_SIMD_INLINE int hadd_s(vint4 a)
635 ASTCENC_SIMD_INLINE void storea(vint4 a, int* p)
643 ASTCENC_SIMD_INLINE void store(vint4 a, int* p)
652 ASTCENC_SIMD_INLINE void store(vint4 a, uint8_t* p)
660 ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p)
669 ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices)
672 return vint4(_mm_i32gather_epi32(base, indices.m, 4));
676 return vint4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]);
683 ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a)
687 return vint4(_mm_shuffle_epi8(a.m, shuf));
691 return vint4(_mm_unpacklo_epi16(va, vb));
698 ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond)
703 return vint4(_mm_blendv_epi8(a.m, b.m, condi));
705 return vint4(_mm_or_si128(_mm_and_si128(condi, b.m), _mm_andnot_si128(condi, a.m)));
918 ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, vint4 indices)
948 ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a)
950 return vint4(_mm_cvttps_epi32(a.m));
956 ASTCENC_SIMD_INLINE vint4 float_to_int_rtn(vfloat4 a)
959 return vint4(_mm_cvttps_epi32(a.m));
965 ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a)
973 ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a)
978 return vint4(f16);
980 return vint4(
1004 ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a)
1040 ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a)
1042 return vint4(_mm_castps_si128(a.m));
1052 ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 v)
1060 ASTCENC_SIMD_INLINE void vtable_prepare(vint4 t0, vint4& t0p)
1068 ASTCENC_SIMD_INLINE void vtable_prepare(vint4 t0, vint4 t1, vint4& t0p, vint4& t1p)
1083 vint4 t0, vint4 t1, vint4 t2, vint4 t3,
1084 vint4& t0p, vint4& t1p, vint4& t2p, vint4& t3p)
1102 ASTCENC_SIMD_INLINE vint4 vtable_8bt_32bi(vint4 t0, vint4 idx)
1109 return vint4(result);
1115 return vint4(table[idx.lane<0>()],
1125 ASTCENC_SIMD_INLINE vint4 vtable_8bt_32bi(vint4 t0, vint4 t1, vint4 idx)
1137 return vint4(result);
1144 return vint4(table[idx.lane<0>()],
1154 ASTCENC_SIMD_INLINE vint4 vtable_8bt_32bi(vint4 t0, vint4 t1, vint4 t2, vint4 t3, vint4 idx)
1174 return vint4(result);
1183 return vint4(table[idx.lane<0>()],
1198 ASTCENC_SIMD_INLINE vint4 interleave_rgba8(vint4 r, vint4 g, vint4 b, vint4 a)
1207 return vint4(value);
1213 return vint4(value);
1230 ASTCENC_SIMD_INLINE void store_lanes_masked(uint8_t* base, vint4 data, vmask4 mask)