Lines Matching refs:SI
16 // Every function in this file should be marked static and inline using SI.
18 #define SI __attribute__((always_inline)) static inline
20 #define SI static inline
24 SI Dst widen_cast(const Src& src) {
38 SI void* load_and_inc(void**& program) {
124 SI F mad(F f, F m, F a) { return f*m+a; }
125 SI F min(F a, F b) { return fminf(a,b); }
126 SI F max(F a, F b) { return fmaxf(a,b); }
127 SI F abs_ (F v) { return fabsf(v); }
128 SI F floor_(F v) { return floorf(v); }
129 SI F rcp_fast(F v) { return 1.0f / v; }
130 SI F rsqrt (F v) { return 1.0f / sqrtf(v); }
131 SI F sqrt_ (F v) { return sqrtf(v); }
132 SI F rcp_precise (F v) { return 1.0f / v; }
134 SI U32 round (F v, F scale) { return (uint32_t)(v*scale + 0.5f); }
135 SI U16 pack(U32 v) { return (U16)v; }
136 SI U8 pack(U16 v) { return (U8)v; }
138 SI F if_then_else(I32 c, F t, F e) { return c ? t : e; }
141 SI T gather(const T* p, U32 ix) { return p[ix]; }
143 SI void load2(const uint16_t* ptr, size_t tail, U16* r, U16* g) {
147 SI void store2(uint16_t* ptr, size_t tail, U16 r, U16 g) {
151 SI void load3(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b) {
156 SI void load4(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b, U16* a) {
162 SI void store4(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) {
169 SI void load2(const float* ptr, size_t tail, F* r, F* g) {
173 SI void store2(float* ptr, size_t tail, F r, F g) {
177 SI void load4(const float* ptr, size_t tail, F* r, F* g, F* b, F* a) {
183 SI void store4(float* ptr, size_t tail, F r, F g, F b, F a) {
201 SI F min(F a, F b) { return vminq_f32(a,b); }
202 SI F max(F a, F b) { return vmaxq_f32(a,b); }
203 SI F abs_ (F v) { return vabsq_f32(v); }
204 SI F rcp_fast(F v) { auto e = vrecpeq_f32 (v); return vrecpsq_f32 (v,e ) * e; }
205 SI F rcp_precise (F v) { auto e = rcp_fast(v); return vrecpsq_f32 (v,e ) * e; }
206 SI F rsqrt (F v) { auto e = vrsqrteq_f32(v); return vrsqrtsq_f32(v,e*e) * e; }
208 SI U16 pack(U32 v) { return __builtin_convertvector(v, U16); }
209 SI U8 pack(U16 v) { return __builtin_convertvector(v, U8); }
211 SI F if_then_else(I32 c, F t, F e) { return vbslq_f32((U32)c,t,e); }
214 SI F mad(F f, F m, F a) { return vfmaq_f32(a,f,m); }
215 SI F floor_(F v) { return vrndmq_f32(v); }
216 SI F sqrt_(F v) { return vsqrtq_f32(v); }
217 SI U32 round(F v, F scale) { return vcvtnq_u32_f32(v*scale); }
219 SI F mad(F f, F m, F a) { return vmlaq_f32(a,f,m); }
220 SI F floor_(F v) {
225 SI F sqrt_(F v) {
232 SI U32 round(F v, F scale) {
239 SI V<T> gather(const T* p, U32 ix) {
242 SI void load2(const uint16_t* ptr, size_t tail, U16* r, U16* g) {
254 SI void store2(uint16_t* ptr, size_t tail, U16 r, U16 g) {
263 SI void load3(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b) {
276 SI void load4(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b, U16* a) {
291 SI void store4(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) {
300 SI void load2(const float* ptr, size_t tail, F* r, F* g) {
312 SI void store2(float* ptr, size_t tail, F r, F g) {
321 SI void load4(const float* ptr, size_t tail, F* r, F* g, F* b, F* a) {
335 SI void store4(float* ptr, size_t tail, F r, F g, F b, F a) {
355 SI F mad(F f, F m, F a) {
363 SI F min(F a, F b) { return _mm256_min_ps(a,b); }
364 SI F max(F a, F b) { return _mm256_max_ps(a,b); }
365 SI F abs_ (F v) { return _mm256_and_ps(v, 0-v); }
366 SI F floor_(F v) { return _mm256_floor_ps(v); }
367 SI F rcp_fast(F v) { return _mm256_rcp_ps (v); }
368 SI F rsqrt (F v) { return _mm256_rsqrt_ps(v); }
369 SI F sqrt_ (F v) { return _mm256_sqrt_ps (v); }
370 SI F rcp_precise (F v) {
380 SI U32 round (F v, F scale) { return _mm256_cvtps_epi32(v*scale); }
381 SI U16 pack(U32 v) {
385 SI U8 pack(U16 v) {
390 SI F if_then_else(I32 c, F t, F e) { return _mm256_blendv_ps(e,t,c); }
393 SI V<T> gather(const T* p, U32 ix) {
398 SI F gather(const float* p, U32 ix) { return _mm256_i32gather_ps (p, ix, 4); }
399 SI U32 gather(const uint32_t* p, U32 ix) { return _mm256_i32gather_epi32(p, ix, 4); }
400 SI U64 gather(const uint64_t* p, U32 ix) {
409 SI void load2(const uint16_t* ptr, size_t tail, U16* r, U16* g) {
440 SI void store2(uint16_t* ptr, size_t tail, U16 r, U16 g) {
471 SI void load3(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b) {
512 SI void load4(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b, U16* a) {
546 SI void store4(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) {
574 SI void load2(const float* ptr, size_t tail, F* r, F* g) {
607 SI void store2(float* ptr, size_t tail, F r, F g) {
638 SI void load4(const float* ptr, size_t tail, F* r, F* g, F* b, F* a) {
662 SI void store4(float* ptr, size_t tail, F r, F g, F b, F a) {
702 SI F mad(F f, F m, F a) { return f*m+a; }
703 SI F min(F a, F b) { return _mm_min_ps(a,b); }
704 SI F max(F a, F b) { return _mm_max_ps(a,b); }
705 SI F abs_(F v) { return _mm_and_ps(v, 0-v); }
706 SI F rcp_fast(F v) { return _mm_rcp_ps (v); }
707 SI F rcp_precise (F v) { F e = rcp_fast(v); return e * (2.0f - v * e); }
708 SI F rsqrt (F v) { return _mm_rsqrt_ps(v); }
709 SI F sqrt_(F v) { return _mm_sqrt_ps (v); }
711 SI U32 round(F v, F scale) { return _mm_cvtps_epi32(v*scale); }
713 SI U16 pack(U32 v) {
723 SI U8 pack(U16 v) {
729 SI F if_then_else(I32 c, F t, F e) {
733 SI F floor_(F v) {
743 SI V<T> gather(const T* p, U32 ix) {
747 SI void load2(const uint16_t* ptr, size_t tail, U16* r, U16* g) {
771 SI void store2(uint16_t* ptr, size_t tail, U16 r, U16 g) {
789 SI void load3(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b) {
825 SI void load4(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b, U16* a) {
850 SI void store4(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) {
865 SI void load2(const float* ptr, size_t tail, F* r, F* g) {
879 SI void store2(float* ptr, size_t tail, F r, F g) {
892 SI void load4(const float* ptr, size_t tail, F* r, F* g, F* b, F* a) {
912 SI void store4(float* ptr, size_t tail, F r, F g, F b, F a) {
931 SI F cast (U32 v) { return (F)v; }
932 SI F cast64(U64 v) { return (F)v; }
933 SI U32 trunc_(F v) { return (U32)v; }
934 SI U32 expand(U16 v) { return (U32)v; }
935 SI U32 expand(U8 v) { return (U32)v; }
937 SI F cast (U32 v) { return __builtin_convertvector((I32)v, F); }
938 SI F cast64(U64 v) { return __builtin_convertvector( v, F); }
939 SI U32 trunc_(F v) { return (U32)__builtin_convertvector( v, I32); }
940 SI U32 expand(U16 v) { return __builtin_convertvector( v, U32); }
941 SI U32 expand(U8 v) { return __builtin_convertvector( v, U32); }
945 SI V if_then_else(I32 c, V t, V e) {
949 SI U16 bswap(U16 x) {
961 SI F fract(F v) { return v - floor_(v); }
964 SI F approx_log2(F x) {
976 SI F approx_log(F x) {
981 SI F approx_pow2(F x) {
989 SI F approx_exp(F x) {
994 SI F approx_powf(F x, F y) {
999 SI F from_half(U16 h) {
1020 SI U16 to_half(F f) {
1113 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
1122 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
1126 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
1134 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
1154 SI V load(const T* src, size_t tail) {
1175 SI void store(T* dst, V v, size_t tail) {
1194 SI F from_byte(U8 b) {
1197 SI F from_short(U16 s) {
1200 SI void from_565(U16 _565, F* r, F* g, F* b) {
1206 SI void from_4444(U16 _4444, F* r, F* g, F* b, F* a) {
1213 SI void from_8888(U32 _8888, F* r, F* g, F* b, F* a) {
1219 SI void from_88(U16 _88, F* r, F* g) {
1224 SI void from_1010102(U32 rgba, F* r, F* g, F* b, F* a) {
1230 SI void from_1616(U32 _1616, F* r, F* g) {
1234 SI void from_16161616(U64 _16161616, F* r, F* g, F* b, F* a) {
1243 SI T* ptr_at_xy(const SkRasterPipeline_MemoryCtx* ctx, size_t dx, size_t dy) {
1248 SI F clamp(F v, F limit) {
1255 SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, F x, F y) {
1270 SI U32 to_unorm(F v, F scale, F bias = 1.0f) {
1275 SI I32 cond_to_mask(I32 cond) { return if_then_else(cond, I32(~0), I32(0)); }
1394 SI F name##_channel(F s, F d, F sa, F da); \
1401 SI F name##_channel(F s, F d, F sa, F da)
1403 SI F inv(F x) { return 1.0f - x; }
1404 SI F two(F x) { return x + x; }
1426 SI F name##_channel(F s, F d, F sa, F da); \
1433 SI F name##_channel(F s, F d, F sa, F da)
1488 SI F sat(F r, F g, F b) { return max(r, max(g,b)) - min(r, min(g,b)); }
1489 SI F lum(F r, F g, F b) { return r*0.30f + g*0.59f + b*0.11f; }
1491 SI void set_sat(F* r, F* g, F* b, F s) {
1504 SI void set_lum(F* r, F* g, F* b, F l) {
1510 SI void clip_color(F* r, F* g, F* b, F a) {
1698 SI F clamp_01(F v) { return min(max(0, v), 1); }
1739 SI F alpha_coverage_from_rgb_coverage(F a, F da, F cr, F cg, F cb) {
1775 SI F lerp(F from, F to, F t) {
1846 SI F strip_sign(F x, U32* sign) {
1852 SI F apply_sign(F x, U32 sign) {
2303 SI F exclusive_repeat(F v, const SkRasterPipeline_TileCtx* ctx) {
2306 SI F exclusive_mirror(F v, const SkRasterPipeline_TileCtx* ctx) {
2425 SI void gradient_lookup(const SkRasterPipeline_GradientCtx* c, U32 idx, F t,
2610 SI void bilinear_x(SkRasterPipeline_SamplerCtx* ctx, F* x) {
2620 SI void bilinear_y(SkRasterPipeline_SamplerCtx* ctx, F* y) {
2642 SI F bicubic_near(F t) {
2646 SI F bicubic_far(F t) {
2652 SI void bicubic_x(SkRasterPipeline_SamplerCtx* ctx, F* x) {
2664 SI void bicubic_y(SkRasterPipeline_SamplerCtx* ctx, F* y) {
2708 SI F tile(F v, SkTileMode mode, float limit, float invLimit) {
2720 SI void sample(const SkRasterPipeline_SamplerCtx2* ctx, F x, F y,
2742 SI void sampler(const SkRasterPipeline_SamplerCtx2* ctx,
2987 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, F& x, F& y); \
2997 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, F& x, F& y)
3000 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, F x, F y, \
3011 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, F x, F y, \
3016 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
3025 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
3030 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, F& x, F& y); \
3042 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, F& x, F& y)
3045 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, F x, F y, \
3057 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, F x, F y, \
3062 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
3072 SI void name##_k(__VA_ARGS__, size_t dx, size_t dy, size_t tail, \
3097 SI U16 div255(U16 v) {
3111 SI U16 div255_accurate(U16 v) {
3122 SI U16 inv(U16 v) { return 255-v; }
3124 SI U16 if_then_else(I16 c, U16 t, U16 e) { return (t & c) | (e & ~c); }
3125 SI U32 if_then_else(I32 c, U32 t, U32 e) { return (t & c) | (e & ~c); }
3127 SI U16 max(U16 x, U16 y) { return if_then_else(x < y, y, x); }
3128 SI U16 min(U16 x, U16 y) { return if_then_else(x < y, x, y); }
3130 SI U16 from_float(float f) { return f * 255.0f + 0.5f; }
3132 SI U16 lerp(U16 from, U16 to, U16 t) { return div255( from*inv(t) + to*t ); }
3135 SI D cast(S src) {
3140 SI void split(S v, D* lo, D* hi) {
3146 SI D join(S lo, S hi) {
3154 SI F if_then_else(I32 c, F t, F e) {
3157 SI F max(F x, F y) { return if_then_else(x < y, y, x); }
3158 SI F min(F x, F y) { return if_then_else(x < y, x, y); }
3160 SI I32 if_then_else(I32 c, I32 t, I32 e) {
3163 SI I32 max(I32 x, I32 y) { return if_then_else(x < y, y, x); }
3164 SI I32 min(I32 x, I32 y) { return if_then_else(x < y, x, y); }
3166 SI F mad(F f, F m, F a) { return f*m+a; }
3167 SI U32 trunc_(F x) { return (U32)cast<I32>(x); }
3170 SI F rcp_precise(F x) {
3187 SI F sqrt_(F x) {
3218 SI F floor_(F x) {
3242 SI I16 scaled_mult(I16 a, I16 b) {
3260 SI U16 constrained_add(I16 a, U16 b) {
3275 SI F fract(F x) { return x - floor_(x); }
3276 SI F abs_(F x) { return sk_bit_cast<F>( sk_bit_cast<I32>(x) & 0x7fffffff ); }
3397 SI U16 name##_channel(U16 s, U16 d, U16 sa, U16 da); \
3404 SI U16 name##_channel(U16 s, U16 d, U16 sa, U16 da)
3424 SI U16 name##_channel(U16 s, U16 d, U16 sa, U16 da); \
3431 SI U16 name##_channel(U16 s, U16 d, U16 sa, U16 da)
3451 SI T* ptr_at_xy(const SkRasterPipeline_MemoryCtx* ctx, size_t dx, size_t dy) {
3456 SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, F x, F y) {
3469 SI U32 ix_and_ptr(T** ptr, const SkRasterPipeline_GatherCtx* ctx, I32 x, I32 y) {
3482 SI V load(const T* ptr, size_t tail) {
3507 SI void store(T* ptr, size_t tail, V v) {
3532 SI V gather(const T* ptr, U32 ix) {
3558 SI V gather(const T* ptr, U32 ix) {
3567 SI void from_8888(U32 rgba, U16* r, U16* g, U16* b, U16* a) {
3592 SI void load_8888_(const uint32_t* ptr, size_t tail, U16* r, U16* g, U16* b, U16* a) {
3613 SI void store_8888_(uint32_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) {
3654 SI void from_565(U16 rgb, U16* r, U16* g, U16* b) {
3665 SI void load_565_(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b) {
3668 SI void store_565_(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b) {
3699 SI void from_4444(U16 rgba, U16* r, U16* g, U16* b, U16* a) {
3712 SI void load_4444_(const uint16_t* ptr, size_t tail, U16* r, U16* g, U16* b, U16* a) {
3715 SI void store_4444_(uint16_t* ptr, size_t tail, U16 r, U16 g, U16 b, U16 a) {
3743 SI void from_88(U16 rg, U16* r, U16* g) {
3748 SI void load_88_(const uint16_t* ptr, size_t tail, U16* r, U16* g) {
3768 SI void store_88_(uint16_t* ptr, size_t tail, U16 r, U16 g) {
3812 SI U16 load_8(const uint8_t* ptr, size_t tail) {
3815 SI void store_8(uint8_t* ptr, size_t tail, U16 v) {
3931 SI U16 alpha_coverage_from_rgb_coverage(U16 a, U16 da, U16 cr, U16 cg, U16 cb) {
3970 SI F clamp_01(F v) { return min(max(0, v), 1); }
3979 SI I16 cond_to_mask_16(I32 cond) { return cast<I16>(cond); }
4002 SI void round_F_to_U16(F R, F G, F B, F A, bool interpolatedInPremul,
4014 SI void gradient_lookup(const SkRasterPipeline_GradientCtx* c, U32 idx, F t,
4087 SI F cast (U32 v) { return __builtin_convertvector((I32)v, F); }
4352 #undef SI