Lines Matching refs:Vec
11 // skvx::Vec<N,T> are SIMD vectors of N T's, a v1.5 successor to SkNx<N,T>.
18 // with across translation units. skvx::Vec<N,T> always has N*sizeof(T) size
57 struct alignas(N*sizeof(T)) Vec;
60 SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>&);
65 // All Vec have the same simple memory layout, the same as `T vec[N]`.
71 Vec<N/2,T> lo, hi;
79 SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, T z, T w) : lo(xy), hi(z,w) {}
80 SKVX_ALWAYS_INLINE VecStorage(T x, T y, Vec<2,T> zw) : lo(x,y), hi(zw) {}
81 SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, Vec<2,T> zw) : lo(xy), hi(zw) {}
83 SKVX_ALWAYS_INLINE Vec<2,T>& xy() { return lo; }
84 SKVX_ALWAYS_INLINE Vec<2,T>& zw() { return hi; }
90 SKVX_ALWAYS_INLINE Vec<2,T> xy() const { return lo; }
91 SKVX_ALWAYS_INLINE Vec<2,T> zw() const { return hi; }
98 SKVX_ALWAYS_INLINE Vec<4,T> yxwz() const { return shuffle<1,0,3,2>(bit_pun<Vec<4,T>>(*this)); }
99 SKVX_ALWAYS_INLINE Vec<4,T> zwxy() const { return shuffle<2,3,0,1>(bit_pun<Vec<4,T>>(*this)); }
101 Vec<2,T> lo, hi;
117 SKVX_ALWAYS_INLINE Vec<2,T> yx() const { return shuffle<1,0>(bit_pun<Vec<2,T>>(*this)); }
119 SKVX_ALWAYS_INLINE Vec<4,T> xyxy() const {
120 return Vec<4,T>(bit_pun<Vec<2,T>>(*this), bit_pun<Vec<2,T>>(*this));
123 Vec<1,T> lo, hi;
127 struct alignas(N*sizeof(T)) Vec : public VecStorage<N,T> {
131 // Methods belong here in the class declaration of Vec only if:
134 // Other operations on Vec should be defined outside the type.
136 SKVX_ALWAYS_INLINE Vec() = default;
140 SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) {
144 this->lo = Vec<N/2,T>::Load(vals + 0);
145 this->hi = Vec<N/2,T>::Load(vals + N/2);
151 SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
152 Vec v;
153 memcpy(&v, ptr, sizeof(Vec));
157 memcpy(ptr, this, sizeof(Vec));
162 struct Vec<1,T> {
165 SKVX_ALWAYS_INLINE Vec() = default;
167 Vec(T s) : val(s) {}
169 SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) : val(xs.size() ? *xs.begin() : 0) {}
174 SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
175 Vec v;
176 memcpy(&v, ptr, sizeof(Vec));
180 memcpy(ptr, this, sizeof(Vec));
205 // Join two Vec<N,T> into one Vec<2N,T>.
206 SINT Vec<2*N,T> join(const Vec<N,T>& lo, const Vec<N,T>& hi) {
207 Vec<2*N,T> v;
213 // We have three strategies for implementing Vec operations:
217 // We can slot in platform-specific implementations as overloads for particular Vec<N,T>,
222 // VExt<N,T> types have the same size as Vec<N,T> and support most operations directly.
238 SI Vec<4,float> to_vec(VExt<4,float> v) { return bit_pun<Vec<4,float>>(v); }
241 SINT VExt<N,T> to_vext(const Vec<N,T>& v) { return bit_pun<VExt<N,T>>(v); }
242 SINT Vec <N,T> to_vec(const VExt<N,T>& v) { return bit_pun<Vec <N,T>>(v); }
244 SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
247 SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
250 SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
253 SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
257 SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
260 SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
263 SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
267 SINT Vec<N,T> operator!(const Vec<N,T>& x) { return to_vec<N,T>(!to_vext(x)); }
268 SINT Vec<N,T> operator-(const Vec<N,T>& x) { return to_vec<N,T>(-to_vext(x)); }
269 SINT Vec<N,T> operator~(const Vec<N,T>& x) { return to_vec<N,T>(~to_vext(x)); }
271 SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) << k); }
272 SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) >> k); }
274 SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
275 return bit_pun<Vec<N,M<T>>>(to_vext(x) == to_vext(y));
277 SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
278 return bit_pun<Vec<N,M<T>>>(to_vext(x) != to_vext(y));
280 SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
281 return bit_pun<Vec<N,M<T>>>(to_vext(x) <= to_vext(y));
283 SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
284 return bit_pun<Vec<N,M<T>>>(to_vext(x) >= to_vext(y));
286 SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
287 return bit_pun<Vec<N,M<T>>>(to_vext(x) < to_vext(y));
289 SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
290 return bit_pun<Vec<N,M<T>>>(to_vext(x) > to_vext(y));
299 SIT Vec<1,T> operator+(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val + y.val; }
300 SIT Vec<1,T> operator-(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val - y.val; }
301 SIT Vec<1,T> operator*(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val * y.val; }
302 SIT Vec<1,T> operator/(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val / y.val; }
304 SIT Vec<1,T> operator^(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val ^ y.val; }
305 SIT Vec<1,T> operator&(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val & y.val; }
306 SIT Vec<1,T> operator|(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val | y.val; }
308 SIT Vec<1,T> operator!(const Vec<1,T>& x) { return !x.val; }
309 SIT Vec<1,T> operator-(const Vec<1,T>& x) { return -x.val; }
310 SIT Vec<1,T> operator~(const Vec<1,T>& x) { return ~x.val; }
312 SIT Vec<1,T> operator<<(const Vec<1,T>& x, int k) { return x.val << k; }
313 SIT Vec<1,T> operator>>(const Vec<1,T>& x, int k) { return x.val >> k; }
315 SIT Vec<1,M<T>> operator==(const Vec<1,T>& x, const Vec<1,T>& y) {
318 SIT Vec<1,M<T>> operator!=(const Vec<1,T>& x, const Vec<1,T>& y) {
321 SIT Vec<1,M<T>> operator<=(const Vec<1,T>& x, const Vec<1,T>& y) {
324 SIT Vec<1,M<T>> operator>=(const Vec<1,T>& x, const Vec<1,T>& y) {
327 SIT Vec<1,M<T>> operator< (const Vec<1,T>& x, const Vec<1,T>& y) {
330 SIT Vec<1,M<T>> operator> (const Vec<1,T>& x, const Vec<1,T>& y) {
335 SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
338 SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
341 SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
344 SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
348 SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
351 SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
354 SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
358 SINT Vec<N,T> operator!(const Vec<N,T>& x) { return join(!x.lo, !x.hi); }
359 SINT Vec<N,T> operator-(const Vec<N,T>& x) { return join(-x.lo, -x.hi); }
360 SINT Vec<N,T> operator~(const Vec<N,T>& x) { return join(~x.lo, ~x.hi); }
362 SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return join(x.lo << k, x.hi << k); }
363 SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return join(x.lo >> k, x.hi >> k); }
365 SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
368 SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
371 SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
374 SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
377 SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
380 SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
386 SINTU Vec<N,T> operator+ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) + y; }
387 SINTU Vec<N,T> operator- (U x, const Vec<N,T>& y) { return Vec<N,T>(x) - y; }
388 SINTU Vec<N,T> operator* (U x, const Vec<N,T>& y) { return Vec<N,T>(x) * y; }
389 SINTU Vec<N,T> operator/ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) / y; }
390 SINTU Vec<N,T> operator^ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) ^ y; }
391 SINTU Vec<N,T> operator& (U x, const Vec<N,T>& y) { return Vec<N,T>(x) & y; }
392 SINTU Vec<N,T> operator| (U x, const Vec<N,T>& y) { return Vec<N,T>(x) | y; }
393 SINTU Vec<N,M<T>> operator==(U x, const Vec<N,T>& y) { return Vec<N,T>(x) == y; }
394 SINTU Vec<N,M<T>> operator!=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) != y; }
395 SINTU Vec<N,M<T>> operator<=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) <= y; }
396 SINTU Vec<N,M<T>> operator>=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) >= y; }
397 SINTU Vec<N,M<T>> operator< (U x, const Vec<N,T>& y) { return Vec<N,T>(x) < y; }
398 SINTU Vec<N,M<T>> operator> (U x, const Vec<N,T>& y) { return Vec<N,T>(x) > y; }
400 SINTU Vec<N,T> operator+ (const Vec<N,T>& x, U y) { return x + Vec<N,T>(y); }
401 SINTU Vec<N,T> operator- (const Vec<N,T>& x, U y) { return x - Vec<N,T>(y); }
402 SINTU Vec<N,T> operator* (const Vec<N,T>& x, U y) { return x * Vec<N,T>(y); }
403 SINTU Vec<N,T> operator/ (const Vec<N,T>& x, U y) { return x / Vec<N,T>(y); }
404 SINTU Vec<N,T> operator^ (const Vec<N,T>& x, U y) { return x ^ Vec<N,T>(y); }
405 SINTU Vec<N,T> operator& (const Vec<N,T>& x, U y) { return x & Vec<N,T>(y); }
406 SINTU Vec<N,T> operator| (const Vec<N,T>& x, U y) { return x | Vec<N,T>(y); }
407 SINTU Vec<N,M<T>> operator==(const Vec<N,T>& x, U y) { return x == Vec<N,T>(y); }
408 SINTU Vec<N,M<T>> operator!=(const Vec<N,T>& x, U y) { return x != Vec<N,T>(y); }
409 SINTU Vec<N,M<T>> operator<=(const Vec<N,T>& x, U y) { return x <= Vec<N,T>(y); }
410 SINTU Vec<N,M<T>> operator>=(const Vec<N,T>& x, U y) { return x >= Vec<N,T>(y); }
411 SINTU Vec<N,M<T>> operator< (const Vec<N,T>& x, U y) { return x < Vec<N,T>(y); }
412 SINTU Vec<N,M<T>> operator> (const Vec<N,T>& x, U y) { return x > Vec<N,T>(y); }
414 SINT Vec<N,T>& operator+=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x + y); }
415 SINT Vec<N,T>& operator-=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x - y); }
416 SINT Vec<N,T>& operator*=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x * y); }
417 SINT Vec<N,T>& operator/=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x / y); }
418 SINT Vec<N,T>& operator^=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x ^ y); }
419 SINT Vec<N,T>& operator&=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x & y); }
420 SINT Vec<N,T>& operator|=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x | y); }
422 SINTU Vec<N,T>& operator+=(Vec<N,T>& x, U y) { return (x = x + Vec<N,T>(y)); }
423 SINTU Vec<N,T>& operator-=(Vec<N,T>& x, U y) { return (x = x - Vec<N,T>(y)); }
424 SINTU Vec<N,T>& operator*=(Vec<N,T>& x, U y) { return (x = x * Vec<N,T>(y)); }
425 SINTU Vec<N,T>& operator/=(Vec<N,T>& x, U y) { return (x = x / Vec<N,T>(y)); }
426 SINTU Vec<N,T>& operator^=(Vec<N,T>& x, U y) { return (x = x ^ Vec<N,T>(y)); }
427 SINTU Vec<N,T>& operator&=(Vec<N,T>& x, U y) { return (x = x & Vec<N,T>(y)); }
428 SINTU Vec<N,T>& operator|=(Vec<N,T>& x, U y) { return (x = x | Vec<N,T>(y)); }
430 SINT Vec<N,T>& operator<<=(Vec<N,T>& x, int bits) { return (x = x << bits); }
431 SINT Vec<N,T>& operator>>=(Vec<N,T>& x, int bits) { return (x = x >> bits); }
438 SINT Vec<N,T> naive_if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) {
439 return bit_pun<Vec<N,T>>(( cond & bit_pun<Vec<N, M<T>>>(t)) |
440 (~cond & bit_pun<Vec<N, M<T>>>(e)) );
443 SIT Vec<1,T> if_then_else(const Vec<1,M<T>>& cond, const Vec<1,T>& t, const Vec<1,T>& e) {
445 return bit_pun<Vec<1,T>>(( cond & bit_pun<Vec<1, M<T>>>(t)) |
446 (~cond & bit_pun<Vec<1, M<T>>>(e)) );
448 SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) {
453 return unchecked_bit_pun<Vec<N,T>>(_mm256_blendv_epi8(unchecked_bit_pun<__m256i>(e),
460 return unchecked_bit_pun<Vec<N,T>>(_mm_blendv_epi8(unchecked_bit_pun<__m128i>(e),
467 return unchecked_bit_pun<Vec<N,T>>(vbslq_u8(unchecked_bit_pun<uint8x16_t>(cond),
481 SIT bool any(const Vec<1,T>& x) { return x.val != 0; }
482 SINT bool any(const Vec<N,T>& x) {
492 SIT bool all(const Vec<1,T>& x) { return x.val != 0; }
493 SINT bool all(const Vec<N,T>& x) {
515 // cast() Vec<N,S> to Vec<N,D>, as if applying a C-cast to each lane.
518 SI Vec<1,D> cast(const Vec<1,S>& src) { return (D)src.val; }
521 SI Vec<N,D> cast(const Vec<N,S>& src) {
530 SIT T min(const Vec<1,T>& x) { return x.val; }
531 SIT T max(const Vec<1,T>& x) { return x.val; }
532 SINT T min(const Vec<N,T>& x) { return std::min(min(x.lo), min(x.hi)); }
533 SINT T max(const Vec<N,T>& x) { return std::max(max(x.lo), max(x.hi)); }
535 SINT Vec<N,T> min(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(y < x, y, x); }
536 SINT Vec<N,T> max(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(x < y, y, x); }
538 SINTU Vec<N,T> min(const Vec<N,T>& x, U y) { return min(x, Vec<N,T>(y)); }
539 SINTU Vec<N,T> max(const Vec<N,T>& x, U y) { return max(x, Vec<N,T>(y)); }
540 SINTU Vec<N,T> min(U x, const Vec<N,T>& y) { return min(Vec<N,T>(x), y); }
541 SINTU Vec<N,T> max(U x, const Vec<N,T>& y) { return max(Vec<N,T>(x), y); }
545 SINT Vec<N,T> pin(const Vec<N,T>& x, const Vec<N,T>& lo, const Vec<N,T>& hi) {
550 // skvx::Vec<4,float> rgba = {R,G,B,A};
555 // The only real restriction is that the output also be a legal N=power-of-two sknx::Vec.
557 SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>& x) {
571 Fn&& fn, const Args&... args) -> skvx::Vec<sizeof...(I), decltype(fn(args[0]...))> {
587 auto map(Fn&& fn, const Vec<N,T>& first, const Rest&... rest) {
592 SIN Vec<N,float> ceil(const Vec<N,float>& x) { return map( ceilf, x); }
593 SIN Vec<N,float> floor(const Vec<N,float>& x) { return map(floorf, x); }
594 SIN Vec<N,float> trunc(const Vec<N,float>& x) { return map(truncf, x); }
595 SIN Vec<N,float> round(const Vec<N,float>& x) { return map(roundf, x); }
596 SIN Vec<N,float> sqrt(const Vec<N,float>& x) { return map( sqrtf, x); }
597 SIN Vec<N,float> abs(const Vec<N,float>& x) { return map( fabsf, x); }
598 SIN Vec<N,float> fma(const Vec<N,float>& x,
599 const Vec<N,float>& y,
600 const Vec<N,float>& z) {
606 SI Vec<1,int> lrint(const Vec<1,float>& x) {
609 SIN Vec<N,int> lrint(const Vec<N,float>& x) {
612 return unchecked_bit_pun<Vec<N,int>>(_mm256_cvtps_epi32(unchecked_bit_pun<__m256>(x)));
617 return unchecked_bit_pun<Vec<N,int>>(_mm_cvtps_epi32(unchecked_bit_pun<__m128>(x)));
624 SIN Vec<N,float> fract(const Vec<N,float>& x) { return x - floor(x); }
631 SIN Vec<N,uint16_t> to_half_finite_ftz(const Vec<N,float>& x) {
632 Vec<N,uint32_t> sem = bit_pun<Vec<N,uint32_t>>(x),
636 return cast<uint16_t>(if_then_else(is_denorm, Vec<N,uint32_t>(0)
639 SIN Vec<N,float> from_half_finite_ftz(const Vec<N,uint16_t>& x) {
640 Vec<N,uint32_t> wide = cast<uint32_t>(x),
643 auto is_denorm = bit_pun<Vec<N,int32_t>>(em < 0x0400);
644 return if_then_else(is_denorm, Vec<N,float>(0)
645 , bit_pun<Vec<N,float>>( (s<<16) + (em<<13) + ((127-15)<<23) ));
649 SI Vec<1,uint16_t> to_half(const Vec<1,float>& x) { return to_half_finite_ftz(x); }
650 SI Vec<1,float> from_half(const Vec<1,uint16_t>& x) { return from_half_finite_ftz(x); }
652 SIN Vec<N,uint16_t> to_half(const Vec<N,float>& x) {
655 return unchecked_bit_pun<Vec<N,uint16_t>>(_mm256_cvtps_ph(unchecked_bit_pun<__m256>(x),
661 return unchecked_bit_pun<Vec<N,uint16_t>>(vcvt_f16_f32(unchecked_bit_pun<float32x4_t>(x)));
672 SIN Vec<N,float> from_half(const Vec<N,uint16_t>& x) {
675 return unchecked_bit_pun<Vec<N,float>>(_mm256_cvtph_ps(unchecked_bit_pun<__m128i>(x)));
680 return unchecked_bit_pun<Vec<N,float>>(vcvt_f32_f16(unchecked_bit_pun<float16x4_t>(x)));
691 SIN Vec<N,uint8_t> div255(const Vec<N,uint16_t>& x) {
697 SIN Vec<N,uint8_t> approx_scale(const Vec<N,uint8_t>& x, const Vec<N,uint8_t>& y) {
731 Vec<4, uint32_t> divide(const Vec<4, uint32_t>& numerator) const {
751 SI Vec<8,uint16_t> mull(const Vec<8,uint8_t>& x,
752 const Vec<8,uint8_t>& y) {
757 SIN std::enable_if_t<(N < 8), Vec<N,uint16_t>> mull(const Vec<N,uint8_t>& x,
758 const Vec<N,uint8_t>& y) {
764 SIN std::enable_if_t<(N > 8), Vec<N,uint16_t>> mull(const Vec<N,uint8_t>& x,
765 const Vec<N,uint8_t>& y) {
772 SIN Vec<N,uint16_t> mull(const Vec<N,uint8_t>& x,
773 const Vec<N,uint8_t>& y) {
796 SIN Vec<N,float> approx_acos(Vec<N,float> x) {
817 skvx::Vec<1,T>& a,
818 skvx::Vec<1,T>& b,
819 skvx::Vec<1,T>& c,
820 skvx::Vec<1,T>& d) {
827 skvx::Vec<N,T>& a,
828 skvx::Vec<N,T>& b,
829 skvx::Vec<N,T>& c,
830 skvx::Vec<N,T>& d) {
838 skvx::Vec<N,T>& a, \
839 skvx::Vec<N,T>& b, \
840 skvx::Vec<N,T>& c, \
841 skvx::Vec<N,T>& d) { \
843 a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
844 b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \
845 c = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[2]); \
846 d = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[3]); \
865 Vec<4,float>& a,
866 Vec<4,float>& b,
867 Vec<4,float>& c,
868 Vec<4,float>& d) {
875 a = bit_pun<Vec<4,float>>(a_);
876 b = bit_pun<Vec<4,float>>(b_);
877 c = bit_pun<Vec<4,float>>(c_);
878 d = bit_pun<Vec<4,float>>(d_);
887 SIT void strided_load2(const T* v, skvx::Vec<1,T>& a, skvx::Vec<1,T>& b) {
891 SINT void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) {
898 SI void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) { \
900 a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
901 b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \