Lines Matching defs:vfloat4
45 // vfloat4 data type
51 struct vfloat4
56 ASTCENC_SIMD_INLINE vfloat4() = default;
64 ASTCENC_SIMD_INLINE explicit vfloat4(const float *p)
74 ASTCENC_SIMD_INLINE explicit vfloat4(float a)
84 ASTCENC_SIMD_INLINE explicit vfloat4(float a, float b, float c, float d)
92 ASTCENC_SIMD_INLINE explicit vfloat4(__m128 a)
124 static ASTCENC_SIMD_INLINE vfloat4 zero()
126 return vfloat4(_mm_setzero_ps());
132 static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p)
134 return vfloat4(_mm_load_ps1(p));
140 static ASTCENC_SIMD_INLINE vfloat4 loada(const float* p)
142 return vfloat4(_mm_load_ps(p));
148 static ASTCENC_SIMD_INLINE vfloat4 lane_id()
150 return vfloat4(_mm_set_ps(3, 2, 1, 0));
156 template <int l0, int l1> ASTCENC_SIMD_INLINE vfloat4 swz() const
158 vfloat4 result(_mm_shuffle_ps(m, m, l0 | l1 << 2));
167 template <int l0, int l1, int l2> ASTCENC_SIMD_INLINE vfloat4 swz() const
169 vfloat4 result(_mm_shuffle_ps(m, m, l0 | l1 << 2 | l2 << 4));
177 template <int l0, int l1, int l2, int l3> ASTCENC_SIMD_INLINE vfloat4 swz() const
179 return vfloat4(_mm_shuffle_ps(m, m, l0 | l1 << 2 | l2 << 4 | l3 << 6));
232 * Consider using vfloat4::zero() for constexpr zeros.
710 // vfloat4 operators and functions
716 ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, vfloat4 b)
718 return vfloat4(_mm_add_ps(a.m, b.m));
724 ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, vfloat4 b)
726 return vfloat4(_mm_sub_ps(a.m, b.m));
732 ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, vfloat4 b)
734 return vfloat4(_mm_mul_ps(a.m, b.m));
740 ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, vfloat4 b)
742 return vfloat4(_mm_div_ps(a.m, b.m));
748 ASTCENC_SIMD_INLINE vmask4 operator==(vfloat4 a, vfloat4 b)
756 ASTCENC_SIMD_INLINE vmask4 operator!=(vfloat4 a, vfloat4 b)
764 ASTCENC_SIMD_INLINE vmask4 operator<(vfloat4 a, vfloat4 b)
772 ASTCENC_SIMD_INLINE vmask4 operator>(vfloat4 a, vfloat4 b)
780 ASTCENC_SIMD_INLINE vmask4 operator<=(vfloat4 a, vfloat4 b)
788 ASTCENC_SIMD_INLINE vmask4 operator>=(vfloat4 a, vfloat4 b)
798 ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, vfloat4 b)
801 return vfloat4(_mm_min_ps(a.m, b.m));
809 ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, vfloat4 b)
812 return vfloat4(_mm_max_ps(a.m, b.m));
818 ASTCENC_SIMD_INLINE vfloat4 abs(vfloat4 a)
820 return vfloat4(_mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), a.m), a.m));
826 ASTCENC_SIMD_INLINE vfloat4 round(vfloat4 a)
830 return vfloat4(_mm_round_ps(a.m, flags));
844 return vfloat4(_mm_xor_ps(r1, r2));
851 ASTCENC_SIMD_INLINE vfloat4 hmin(vfloat4 a)
853 a = min(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 3, 2))));
854 a = min(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 1))));
855 return vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 0)));
861 ASTCENC_SIMD_INLINE vfloat4 hmax(vfloat4 a)
863 a = max(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 3, 2))));
864 a = max(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 1))));
865 return vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 0)));
871 ASTCENC_SIMD_INLINE float hadd_s(vfloat4 a)
885 ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a)
887 return vfloat4(_mm_sqrt_ps(a.m));
893 ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond)
896 return vfloat4(_mm_blendv_ps(a.m, b.m, cond.m));
898 return vfloat4(_mm_or_ps(_mm_and_ps(cond.m, b.m), _mm_andnot_ps(cond.m, a.m)));
905 ASTCENC_SIMD_INLINE vfloat4 select_msb(vfloat4 a, vfloat4 b, vmask4 cond)
908 return vfloat4(_mm_blendv_ps(a.m, b.m, cond.m));
911 return vfloat4(_mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m)));
918 ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, vint4 indices)
921 return vfloat4(_mm_i32gather_ps(base, indices.m, 4));
925 return vfloat4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]);
932 ASTCENC_SIMD_INLINE void store(vfloat4 a, float* p)
940 ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* p)
948 ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a)
956 ASTCENC_SIMD_INLINE vint4 float_to_int_rtn(vfloat4 a)
958 a = a + vfloat4(0.5f);
965 ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a)
967 return vfloat4(_mm_cvtepi32_ps(a.m));
973 ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a)
1004 ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a)
1009 return vfloat4(f32);
1011 return vfloat4(
1040 ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a)
1052 ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 v)
1054 return vfloat4(_mm_castsi128_ps(v.m));
1266 ASTCENC_SIMD_INLINE float dot_s(vfloat4 a, vfloat4 b)
1274 ASTCENC_SIMD_INLINE vfloat4 dot(vfloat4 a, vfloat4 b)
1276 return vfloat4(_mm_dp_ps(a.m, b.m, 0xFF));
1282 ASTCENC_SIMD_INLINE float dot3_s(vfloat4 a, vfloat4 b)
1290 ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b)
1292 return vfloat4(_mm_dp_ps(a.m, b.m, 0x77));