SkVx.h - OpenGrok cross reference for /third_party/skia/include/private/SkVx.h

Lines Matching refs:Vec
11 // skvx::Vec<N,T> are SIMD vectors of N T's, a v1.5 successor to SkNx<N,T>.
18 // with across translation units.  skvx::Vec<N,T> always has N*sizeof(T) size
57 struct alignas(N*sizeof(T)) Vec;
60 SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>&);
65 // All Vec have the same simple memory layout, the same as `T vec[N]`.
71     Vec<N/2,T> lo, hi;
79     SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, T z, T w) : lo(xy), hi(z,w) {}
80     SKVX_ALWAYS_INLINE VecStorage(T x, T y, Vec<2,T> zw) : lo(x,y), hi(zw) {}
81     SKVX_ALWAYS_INLINE VecStorage(Vec<2,T> xy, Vec<2,T> zw) : lo(xy), hi(zw) {}
83     SKVX_ALWAYS_INLINE Vec<2,T>& xy() { return lo; }
84     SKVX_ALWAYS_INLINE Vec<2,T>& zw() { return hi; }
90     SKVX_ALWAYS_INLINE Vec<2,T> xy() const { return lo; }
91     SKVX_ALWAYS_INLINE Vec<2,T> zw() const { return hi; }
98     SKVX_ALWAYS_INLINE Vec<4,T> yxwz() const { return shuffle<1,0,3,2>(bit_pun<Vec<4,T>>(*this)); }
99     SKVX_ALWAYS_INLINE Vec<4,T> zwxy() const { return shuffle<2,3,0,1>(bit_pun<Vec<4,T>>(*this)); }
101     Vec<2,T> lo, hi;
117     SKVX_ALWAYS_INLINE Vec<2,T> yx() const { return shuffle<1,0>(bit_pun<Vec<2,T>>(*this)); }
119     SKVX_ALWAYS_INLINE Vec<4,T> xyxy() const {
120         return Vec<4,T>(bit_pun<Vec<2,T>>(*this), bit_pun<Vec<2,T>>(*this));
123     Vec<1,T> lo, hi;
127 struct alignas(N*sizeof(T)) Vec : public VecStorage<N,T> {
131     // Methods belong here in the class declaration of Vec only if:
134     // Other operations on Vec should be defined outside the type.
136     SKVX_ALWAYS_INLINE Vec() = default;
140     SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) {
144         this->lo = Vec<N/2,T>::Load(vals +   0);
145         this->hi = Vec<N/2,T>::Load(vals + N/2);
151     SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
152         Vec v;
153         memcpy(&v, ptr, sizeof(Vec));
157         memcpy(ptr, this, sizeof(Vec));
162 struct Vec<1,T> {
165     SKVX_ALWAYS_INLINE Vec() = default;
167     Vec(T s) : val(s) {}
169     SKVX_ALWAYS_INLINE Vec(std::initializer_list<T> xs) : val(xs.size() ? *xs.begin() : 0) {}
174     SKVX_ALWAYS_INLINE static Vec Load(const void* ptr) {
175         Vec v;
176         memcpy(&v, ptr, sizeof(Vec));
180         memcpy(ptr, this, sizeof(Vec));
205 // Join two Vec<N,T> into one Vec<2N,T>.
206 SINT Vec<2*N,T> join(const Vec<N,T>& lo, const Vec<N,T>& hi) {
207     Vec<2*N,T> v;
213 // We have three strategies for implementing Vec operations:
217 // We can slot in platform-specific implementations as overloads for particular Vec<N,T>,
222     // VExt<N,T> types have the same size as Vec<N,T> and support most operations directly.
238         SI Vec<4,float> to_vec(VExt<4,float> v) { return bit_pun<Vec<4,float>>(v); }
241     SINT VExt<N,T> to_vext(const Vec<N,T>& v) { return bit_pun<VExt<N,T>>(v); }
242     SINT Vec <N,T> to_vec(const VExt<N,T>& v) { return bit_pun<Vec <N,T>>(v); }
244     SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
247     SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
250     SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
253     SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
257     SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
260     SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
263     SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
267     SINT Vec<N,T> operator!(const Vec<N,T>& x) { return to_vec<N,T>(!to_vext(x)); }
268     SINT Vec<N,T> operator-(const Vec<N,T>& x) { return to_vec<N,T>(-to_vext(x)); }
269     SINT Vec<N,T> operator~(const Vec<N,T>& x) { return to_vec<N,T>(~to_vext(x)); }
271     SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) << k); }
272     SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return to_vec<N,T>(to_vext(x) >> k); }
274     SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
275         return bit_pun<Vec<N,M<T>>>(to_vext(x) == to_vext(y));
277     SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
278         return bit_pun<Vec<N,M<T>>>(to_vext(x) != to_vext(y));
280     SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
281         return bit_pun<Vec<N,M<T>>>(to_vext(x) <= to_vext(y));
283     SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
284         return bit_pun<Vec<N,M<T>>>(to_vext(x) >= to_vext(y));
286     SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
287         return bit_pun<Vec<N,M<T>>>(to_vext(x) <  to_vext(y));
289     SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
290         return bit_pun<Vec<N,M<T>>>(to_vext(x) >  to_vext(y));
299     SIT Vec<1,T> operator+(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val + y.val; }
300     SIT Vec<1,T> operator-(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val - y.val; }
301     SIT Vec<1,T> operator*(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val * y.val; }
302     SIT Vec<1,T> operator/(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val / y.val; }
304     SIT Vec<1,T> operator^(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val ^ y.val; }
305     SIT Vec<1,T> operator&(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val & y.val; }
306     SIT Vec<1,T> operator|(const Vec<1,T>& x, const Vec<1,T>& y) { return x.val | y.val; }
308     SIT Vec<1,T> operator!(const Vec<1,T>& x) { return !x.val; }
309     SIT Vec<1,T> operator-(const Vec<1,T>& x) { return -x.val; }
310     SIT Vec<1,T> operator~(const Vec<1,T>& x) { return ~x.val; }
312     SIT Vec<1,T> operator<<(const Vec<1,T>& x, int k) { return x.val << k; }
313     SIT Vec<1,T> operator>>(const Vec<1,T>& x, int k) { return x.val >> k; }
315     SIT Vec<1,M<T>> operator==(const Vec<1,T>& x, const Vec<1,T>& y) {
318     SIT Vec<1,M<T>> operator!=(const Vec<1,T>& x, const Vec<1,T>& y) {
321     SIT Vec<1,M<T>> operator<=(const Vec<1,T>& x, const Vec<1,T>& y) {
324     SIT Vec<1,M<T>> operator>=(const Vec<1,T>& x, const Vec<1,T>& y) {
327     SIT Vec<1,M<T>> operator< (const Vec<1,T>& x, const Vec<1,T>& y) {
330     SIT Vec<1,M<T>> operator> (const Vec<1,T>& x, const Vec<1,T>& y) {
335     SINT Vec<N,T> operator+(const Vec<N,T>& x, const Vec<N,T>& y) {
338     SINT Vec<N,T> operator-(const Vec<N,T>& x, const Vec<N,T>& y) {
341     SINT Vec<N,T> operator*(const Vec<N,T>& x, const Vec<N,T>& y) {
344     SINT Vec<N,T> operator/(const Vec<N,T>& x, const Vec<N,T>& y) {
348     SINT Vec<N,T> operator^(const Vec<N,T>& x, const Vec<N,T>& y) {
351     SINT Vec<N,T> operator&(const Vec<N,T>& x, const Vec<N,T>& y) {
354     SINT Vec<N,T> operator|(const Vec<N,T>& x, const Vec<N,T>& y) {
358     SINT Vec<N,T> operator!(const Vec<N,T>& x) { return join(!x.lo, !x.hi); }
359     SINT Vec<N,T> operator-(const Vec<N,T>& x) { return join(-x.lo, -x.hi); }
360     SINT Vec<N,T> operator~(const Vec<N,T>& x) { return join(~x.lo, ~x.hi); }
362     SINT Vec<N,T> operator<<(const Vec<N,T>& x, int k) { return join(x.lo << k, x.hi << k); }
363     SINT Vec<N,T> operator>>(const Vec<N,T>& x, int k) { return join(x.lo >> k, x.hi >> k); }
365     SINT Vec<N,M<T>> operator==(const Vec<N,T>& x, const Vec<N,T>& y) {
368     SINT Vec<N,M<T>> operator!=(const Vec<N,T>& x, const Vec<N,T>& y) {
371     SINT Vec<N,M<T>> operator<=(const Vec<N,T>& x, const Vec<N,T>& y) {
374     SINT Vec<N,M<T>> operator>=(const Vec<N,T>& x, const Vec<N,T>& y) {
377     SINT Vec<N,M<T>> operator< (const Vec<N,T>& x, const Vec<N,T>& y) {
380     SINT Vec<N,M<T>> operator> (const Vec<N,T>& x, const Vec<N,T>& y) {
386 SINTU Vec<N,T>    operator+ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) +  y; }
387 SINTU Vec<N,T>    operator- (U x, const Vec<N,T>& y) { return Vec<N,T>(x) -  y; }
388 SINTU Vec<N,T>    operator* (U x, const Vec<N,T>& y) { return Vec<N,T>(x) *  y; }
389 SINTU Vec<N,T>    operator/ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) /  y; }
390 SINTU Vec<N,T>    operator^ (U x, const Vec<N,T>& y) { return Vec<N,T>(x) ^  y; }
391 SINTU Vec<N,T>    operator& (U x, const Vec<N,T>& y) { return Vec<N,T>(x) &  y; }
392 SINTU Vec<N,T>    operator| (U x, const Vec<N,T>& y) { return Vec<N,T>(x) |  y; }
393 SINTU Vec<N,M<T>> operator==(U x, const Vec<N,T>& y) { return Vec<N,T>(x) == y; }
394 SINTU Vec<N,M<T>> operator!=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) != y; }
395 SINTU Vec<N,M<T>> operator<=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) <= y; }
396 SINTU Vec<N,M<T>> operator>=(U x, const Vec<N,T>& y) { return Vec<N,T>(x) >= y; }
397 SINTU Vec<N,M<T>> operator< (U x, const Vec<N,T>& y) { return Vec<N,T>(x) <  y; }
398 SINTU Vec<N,M<T>> operator> (U x, const Vec<N,T>& y) { return Vec<N,T>(x) >  y; }
400 SINTU Vec<N,T>    operator+ (const Vec<N,T>& x, U y) { return x +  Vec<N,T>(y); }
401 SINTU Vec<N,T>    operator- (const Vec<N,T>& x, U y) { return x -  Vec<N,T>(y); }
402 SINTU Vec<N,T>    operator* (const Vec<N,T>& x, U y) { return x *  Vec<N,T>(y); }
403 SINTU Vec<N,T>    operator/ (const Vec<N,T>& x, U y) { return x /  Vec<N,T>(y); }
404 SINTU Vec<N,T>    operator^ (const Vec<N,T>& x, U y) { return x ^  Vec<N,T>(y); }
405 SINTU Vec<N,T>    operator& (const Vec<N,T>& x, U y) { return x &  Vec<N,T>(y); }
406 SINTU Vec<N,T>    operator| (const Vec<N,T>& x, U y) { return x |  Vec<N,T>(y); }
407 SINTU Vec<N,M<T>> operator==(const Vec<N,T>& x, U y) { return x == Vec<N,T>(y); }
408 SINTU Vec<N,M<T>> operator!=(const Vec<N,T>& x, U y) { return x != Vec<N,T>(y); }
409 SINTU Vec<N,M<T>> operator<=(const Vec<N,T>& x, U y) { return x <= Vec<N,T>(y); }
410 SINTU Vec<N,M<T>> operator>=(const Vec<N,T>& x, U y) { return x >= Vec<N,T>(y); }
411 SINTU Vec<N,M<T>> operator< (const Vec<N,T>& x, U y) { return x <  Vec<N,T>(y); }
412 SINTU Vec<N,M<T>> operator> (const Vec<N,T>& x, U y) { return x >  Vec<N,T>(y); }
414 SINT Vec<N,T>& operator+=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x + y); }
415 SINT Vec<N,T>& operator-=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x - y); }
416 SINT Vec<N,T>& operator*=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x * y); }
417 SINT Vec<N,T>& operator/=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x / y); }
418 SINT Vec<N,T>& operator^=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x ^ y); }
419 SINT Vec<N,T>& operator&=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x & y); }
420 SINT Vec<N,T>& operator|=(Vec<N,T>& x, const Vec<N,T>& y) { return (x = x | y); }
422 SINTU Vec<N,T>& operator+=(Vec<N,T>& x, U y) { return (x = x + Vec<N,T>(y)); }
423 SINTU Vec<N,T>& operator-=(Vec<N,T>& x, U y) { return (x = x - Vec<N,T>(y)); }
424 SINTU Vec<N,T>& operator*=(Vec<N,T>& x, U y) { return (x = x * Vec<N,T>(y)); }
425 SINTU Vec<N,T>& operator/=(Vec<N,T>& x, U y) { return (x = x / Vec<N,T>(y)); }
426 SINTU Vec<N,T>& operator^=(Vec<N,T>& x, U y) { return (x = x ^ Vec<N,T>(y)); }
427 SINTU Vec<N,T>& operator&=(Vec<N,T>& x, U y) { return (x = x & Vec<N,T>(y)); }
428 SINTU Vec<N,T>& operator|=(Vec<N,T>& x, U y) { return (x = x | Vec<N,T>(y)); }
430 SINT Vec<N,T>& operator<<=(Vec<N,T>& x, int bits) { return (x = x << bits); }
431 SINT Vec<N,T>& operator>>=(Vec<N,T>& x, int bits) { return (x = x >> bits); }
438 SINT Vec<N,T> naive_if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) {
439     return bit_pun<Vec<N,T>>(( cond & bit_pun<Vec<N, M<T>>>(t)) |
440                              (~cond & bit_pun<Vec<N, M<T>>>(e)) );
443 SIT Vec<1,T> if_then_else(const Vec<1,M<T>>& cond, const Vec<1,T>& t, const Vec<1,T>& e) {
445     return bit_pun<Vec<1,T>>(( cond & bit_pun<Vec<1, M<T>>>(t)) |
446                              (~cond & bit_pun<Vec<1, M<T>>>(e)) );
448 SINT Vec<N,T> if_then_else(const Vec<N,M<T>>& cond, const Vec<N,T>& t, const Vec<N,T>& e) {
453         return unchecked_bit_pun<Vec<N,T>>(_mm256_blendv_epi8(unchecked_bit_pun<__m256i>(e),
460         return unchecked_bit_pun<Vec<N,T>>(_mm_blendv_epi8(unchecked_bit_pun<__m128i>(e),
467         return unchecked_bit_pun<Vec<N,T>>(vbslq_u8(unchecked_bit_pun<uint8x16_t>(cond),
481 SIT  bool any(const Vec<1,T>& x) { return x.val != 0; }
482 SINT bool any(const Vec<N,T>& x) {
492 SIT  bool all(const Vec<1,T>& x) { return x.val != 0; }
493 SINT bool all(const Vec<N,T>& x) {
515 // cast() Vec<N,S> to Vec<N,D>, as if applying a C-cast to each lane.
518 SI Vec<1,D> cast(const Vec<1,S>& src) { return (D)src.val; }
521 SI Vec<N,D> cast(const Vec<N,S>& src) {
530 SIT  T min(const Vec<1,T>& x) { return x.val; }
531 SIT  T max(const Vec<1,T>& x) { return x.val; }
532 SINT T min(const Vec<N,T>& x) { return std::min(min(x.lo), min(x.hi)); }
533 SINT T max(const Vec<N,T>& x) { return std::max(max(x.lo), max(x.hi)); }
535 SINT Vec<N,T> min(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(y < x, y, x); }
536 SINT Vec<N,T> max(const Vec<N,T>& x, const Vec<N,T>& y) { return naive_if_then_else(x < y, y, x); }
538 SINTU Vec<N,T> min(const Vec<N,T>& x, U y) { return min(x, Vec<N,T>(y)); }
539 SINTU Vec<N,T> max(const Vec<N,T>& x, U y) { return max(x, Vec<N,T>(y)); }
540 SINTU Vec<N,T> min(U x, const Vec<N,T>& y) { return min(Vec<N,T>(x), y); }
541 SINTU Vec<N,T> max(U x, const Vec<N,T>& y) { return max(Vec<N,T>(x), y); }
545 SINT Vec<N,T> pin(const Vec<N,T>& x, const Vec<N,T>& lo, const Vec<N,T>& hi) {
550 //    skvx::Vec<4,float> rgba = {R,G,B,A};
555 // The only real restriction is that the output also be a legal N=power-of-two sknx::Vec.
557 SI Vec<sizeof...(Ix),T> shuffle(const Vec<N,T>& x) {
571             Fn&& fn, const Args&... args) -> skvx::Vec<sizeof...(I), decltype(fn(args[0]...))> {
587 auto map(Fn&& fn, const Vec<N,T>& first, const Rest&... rest) {
592 SIN Vec<N,float>  ceil(const Vec<N,float>& x) { return map( ceilf, x); }
593 SIN Vec<N,float> floor(const Vec<N,float>& x) { return map(floorf, x); }
594 SIN Vec<N,float> trunc(const Vec<N,float>& x) { return map(truncf, x); }
595 SIN Vec<N,float> round(const Vec<N,float>& x) { return map(roundf, x); }
596 SIN Vec<N,float>  sqrt(const Vec<N,float>& x) { return map( sqrtf, x); }
597 SIN Vec<N,float>   abs(const Vec<N,float>& x) { return map( fabsf, x); }
598 SIN Vec<N,float>   fma(const Vec<N,float>& x,
599                        const Vec<N,float>& y,
600                        const Vec<N,float>& z) {
606 SI Vec<1,int> lrint(const Vec<1,float>& x) {
609 SIN Vec<N,int> lrint(const Vec<N,float>& x) {
612         return unchecked_bit_pun<Vec<N,int>>(_mm256_cvtps_epi32(unchecked_bit_pun<__m256>(x)));
617         return unchecked_bit_pun<Vec<N,int>>(_mm_cvtps_epi32(unchecked_bit_pun<__m128>(x)));
624 SIN Vec<N,float> fract(const Vec<N,float>& x) { return x - floor(x); }
631 SIN Vec<N,uint16_t> to_half_finite_ftz(const Vec<N,float>& x) {
632     Vec<N,uint32_t> sem = bit_pun<Vec<N,uint32_t>>(x),
636     return cast<uint16_t>(if_then_else(is_denorm, Vec<N,uint32_t>(0)
639 SIN Vec<N,float> from_half_finite_ftz(const Vec<N,uint16_t>& x) {
640     Vec<N,uint32_t> wide = cast<uint32_t>(x),
643     auto is_denorm = bit_pun<Vec<N,int32_t>>(em < 0x0400);
644     return if_then_else(is_denorm, Vec<N,float>(0)
645                                  , bit_pun<Vec<N,float>>( (s<<16) + (em<<13) + ((127-15)<<23) ));
649 SI Vec<1,uint16_t> to_half(const Vec<1,float>&    x) { return   to_half_finite_ftz(x); }
650 SI Vec<1,float>  from_half(const Vec<1,uint16_t>& x) { return from_half_finite_ftz(x); }
652 SIN Vec<N,uint16_t> to_half(const Vec<N,float>& x) {
655         return unchecked_bit_pun<Vec<N,uint16_t>>(_mm256_cvtps_ph(unchecked_bit_pun<__m256>(x),
661         return unchecked_bit_pun<Vec<N,uint16_t>>(vcvt_f16_f32(unchecked_bit_pun<float32x4_t>(x)));
672 SIN Vec<N,float> from_half(const Vec<N,uint16_t>& x) {
675         return unchecked_bit_pun<Vec<N,float>>(_mm256_cvtph_ps(unchecked_bit_pun<__m128i>(x)));
680         return unchecked_bit_pun<Vec<N,float>>(vcvt_f32_f16(unchecked_bit_pun<float16x4_t>(x)));
691 SIN Vec<N,uint8_t> div255(const Vec<N,uint16_t>& x) {
697 SIN Vec<N,uint8_t> approx_scale(const Vec<N,uint8_t>& x, const Vec<N,uint8_t>& y) {
731     Vec<4, uint32_t> divide(const Vec<4, uint32_t>& numerator) const {
751     SI Vec<8,uint16_t> mull(const Vec<8,uint8_t>& x,
752                             const Vec<8,uint8_t>& y) {
757     SIN std::enable_if_t<(N < 8), Vec<N,uint16_t>> mull(const Vec<N,uint8_t>& x,
758                                                         const Vec<N,uint8_t>& y) {
764     SIN std::enable_if_t<(N > 8), Vec<N,uint16_t>> mull(const Vec<N,uint8_t>& x,
765                                                         const Vec<N,uint8_t>& y) {
772     SIN Vec<N,uint16_t> mull(const Vec<N,uint8_t>& x,
773                              const Vec<N,uint8_t>& y) {
796 SIN Vec<N,float> approx_acos(Vec<N,float> x) {
817                        skvx::Vec<1,T>& a,
818                        skvx::Vec<1,T>& b,
819                        skvx::Vec<1,T>& c,
820                        skvx::Vec<1,T>& d) {
827                         skvx::Vec<N,T>& a,
828                         skvx::Vec<N,T>& b,
829                         skvx::Vec<N,T>& c,
830                         skvx::Vec<N,T>& d) {
838                       skvx::Vec<N,T>& a, \
839                       skvx::Vec<N,T>& b, \
840                       skvx::Vec<N,T>& c, \
841                       skvx::Vec<N,T>& d) { \
843     a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
844     b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \
845     c = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[2]); \
846     d = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[3]); \
865                       Vec<4,float>& a,
866                       Vec<4,float>& b,
867                       Vec<4,float>& c,
868                       Vec<4,float>& d) {
875     a = bit_pun<Vec<4,float>>(a_);
876     b = bit_pun<Vec<4,float>>(b_);
877     c = bit_pun<Vec<4,float>>(c_);
878     d = bit_pun<Vec<4,float>>(d_);
887 SIT void strided_load2(const T* v, skvx::Vec<1,T>& a, skvx::Vec<1,T>& b) {
891 SINT void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) {
898 SI void strided_load2(const T* v, skvx::Vec<N,T>& a, skvx::Vec<N,T>& b) { \
900     a = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[0]); \
901     b = skvx::bit_pun<skvx::Vec<N,T>>(mat.val[1]); \