1cb93a386Sopenharmony_ci/*
2cb93a386Sopenharmony_ci * Copyright 2019 Google Inc.
3cb93a386Sopenharmony_ci *
4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be
5cb93a386Sopenharmony_ci * found in the LICENSE file.
6cb93a386Sopenharmony_ci */
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci#include "include/private/SkVx.h"
9cb93a386Sopenharmony_ci#include "tests/Test.h"
10cb93a386Sopenharmony_ci#include <numeric>
11cb93a386Sopenharmony_ci
12cb93a386Sopenharmony_ciusing float2 = skvx::Vec<2,float>;
13cb93a386Sopenharmony_ciusing float4 = skvx::Vec<4,float>;
14cb93a386Sopenharmony_ciusing float8 = skvx::Vec<8,float>;
15cb93a386Sopenharmony_ci
16cb93a386Sopenharmony_ciusing double2 = skvx::Vec<2,double>;
17cb93a386Sopenharmony_ciusing double4 = skvx::Vec<4,double>;
18cb93a386Sopenharmony_ciusing double8 = skvx::Vec<8,double>;
19cb93a386Sopenharmony_ci
20cb93a386Sopenharmony_ciusing byte2  = skvx::Vec< 2,uint8_t>;
21cb93a386Sopenharmony_ciusing byte4  = skvx::Vec< 4,uint8_t>;
22cb93a386Sopenharmony_ciusing byte8  = skvx::Vec< 8,uint8_t>;
23cb93a386Sopenharmony_ciusing byte16 = skvx::Vec<16,uint8_t>;
24cb93a386Sopenharmony_ci
25cb93a386Sopenharmony_ciusing int2 = skvx::Vec<2,int32_t>;
26cb93a386Sopenharmony_ciusing int4 = skvx::Vec<4,int32_t>;
27cb93a386Sopenharmony_ciusing int8 = skvx::Vec<8,int32_t>;
28cb93a386Sopenharmony_ci
29cb93a386Sopenharmony_ciusing uint2 = skvx::Vec<2,uint32_t>;
30cb93a386Sopenharmony_ciusing uint4 = skvx::Vec<4,uint32_t>;
31cb93a386Sopenharmony_ciusing uint8 = skvx::Vec<8,uint32_t>;
32cb93a386Sopenharmony_ci
33cb93a386Sopenharmony_ciusing long2 = skvx::Vec<2,int64_t>;
34cb93a386Sopenharmony_ciusing long4 = skvx::Vec<4,int64_t>;
35cb93a386Sopenharmony_ciusing long8 = skvx::Vec<8,int64_t>;
36cb93a386Sopenharmony_ci
37cb93a386Sopenharmony_ciDEF_TEST(SkVx, r) {
38cb93a386Sopenharmony_ci    static_assert(sizeof(float2) ==  8, "");
39cb93a386Sopenharmony_ci    static_assert(sizeof(float4) == 16, "");
40cb93a386Sopenharmony_ci    static_assert(sizeof(float8) == 32, "");
41cb93a386Sopenharmony_ci
42cb93a386Sopenharmony_ci    static_assert(sizeof(byte2) == 2, "");
43cb93a386Sopenharmony_ci    static_assert(sizeof(byte4) == 4, "");
44cb93a386Sopenharmony_ci    static_assert(sizeof(byte8) == 8, "");
45cb93a386Sopenharmony_ci
46cb93a386Sopenharmony_ci    {
47cb93a386Sopenharmony_ci        int4 mask = float4{1,2,3,4} < float4{1,2,4,8};
48cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, mask[0] == int32_t( 0));
49cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, mask[1] == int32_t( 0));
50cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, mask[2] == int32_t(-1));
51cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, mask[3] == int32_t(-1));
52cb93a386Sopenharmony_ci
53cb93a386Sopenharmony_ci        REPORTER_ASSERT(r,  any(mask));
54cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, !all(mask));
55cb93a386Sopenharmony_ci    }
56cb93a386Sopenharmony_ci
57cb93a386Sopenharmony_ci    {
58cb93a386Sopenharmony_ci        long4 mask = double4{1,2,3,4} < double4{1,2,4,8};
59cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, mask[0] == int64_t( 0));
60cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, mask[1] == int64_t( 0));
61cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, mask[2] == int64_t(-1));
62cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, mask[3] == int64_t(-1));
63cb93a386Sopenharmony_ci
64cb93a386Sopenharmony_ci        REPORTER_ASSERT(r,  any(mask));
65cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, !all(mask));
66cb93a386Sopenharmony_ci    }
67cb93a386Sopenharmony_ci
68cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, min(float4{1,2,3,4}) == 1);
69cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, max(float4{1,2,3,4}) == 4);
70cb93a386Sopenharmony_ci
71cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4{1,2,3,4,5} == int4{1,2,3,4}));
72cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4{1,2,3,4}   == int4{1,2,3,4}));
73cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4{1,2,3}     == int4{1,2,3,0}));
74cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4{1,2}       == int4{1,2,0,0}));
75cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4{1}         == int4{1,0,0,0}));
76cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4(1)         == int4{1,1,1,1}));
77cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4{}          == int4{0,0,0,0}));
78cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4()          == int4{0,0,0,0}));
79cb93a386Sopenharmony_ci
80cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4{1,2,2,1} == min(int4{1,2,3,4}, int4{4,3,2,1})));
81cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(int4{4,3,3,4} == max(int4{1,2,3,4}, int4{4,3,2,1})));
82cb93a386Sopenharmony_ci
83cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(if_then_else(float4{1,2,3,2} <= float4{2,2,2,2}, float4(42), float4(47))
84cb93a386Sopenharmony_ci                           == float4{42,42,47,42}));
85cb93a386Sopenharmony_ci
86cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(floor(float4{-1.5f,1.5f,1.0f,-1.0f}) == float4{-2.0f,1.0f,1.0f,-1.0f}));
87cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all( ceil(float4{-1.5f,1.5f,1.0f,-1.0f}) == float4{-1.0f,2.0f,1.0f,-1.0f}));
88cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(trunc(float4{-1.5f,1.5f,1.0f,-1.0f}) == float4{-1.0f,1.0f,1.0f,-1.0f}));
89cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(round(float4{-1.5f,1.5f,1.0f,-1.0f}) == float4{-2.0f,2.0f,1.0f,-1.0f}));
90cb93a386Sopenharmony_ci
91cb93a386Sopenharmony_ci
92cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(abs(float4{-2,-1,0,1}) == float4{2,1,0,1}));
93cb93a386Sopenharmony_ci
94cb93a386Sopenharmony_ci    // TODO(mtklein): these tests could be made less loose.
95cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all( sqrt(float4{2,3,4,5}) < float4{2,2,3,3}));
96cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all( sqrt(float2{2,3}) < float2{2,2}));
97cb93a386Sopenharmony_ci
98cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::cast<int>(float4{-1.5f,0.5f,1.0f,1.5f}) == int4{-1,0,1,1}));
99cb93a386Sopenharmony_ci
100cb93a386Sopenharmony_ci    float buf[] = {1,2,3,4,5,6};
101cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(float4::Load(buf) == float4{1,2,3,4}));
102cb93a386Sopenharmony_ci    float4{2,3,4,5}.store(buf);
103cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, buf[0] == 2
104cb93a386Sopenharmony_ci                    && buf[1] == 3
105cb93a386Sopenharmony_ci                    && buf[2] == 4
106cb93a386Sopenharmony_ci                    && buf[3] == 5
107cb93a386Sopenharmony_ci                    && buf[4] == 5
108cb93a386Sopenharmony_ci                    && buf[5] == 6);
109cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(float4::Load(buf+0) == float4{2,3,4,5}));
110cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(float4::Load(buf+2) == float4{4,5,5,6}));
111cb93a386Sopenharmony_ci
112cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::shuffle<2,1,0,3>        (float4{1,2,3,4}) == float4{3,2,1,4}));
113cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::shuffle<2,1>            (float4{1,2,3,4}) == float2{3,2}));
114cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::shuffle<3,3,3,3>        (float4{1,2,3,4}) == float4{4,4,4,4}));
115cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::shuffle<2,1,2,1,2,1,2,1>(float4{1,2,3,4})
116cb93a386Sopenharmony_ci                           == float8{3,2,3,2,3,2,3,2}));
117cb93a386Sopenharmony_ci
118cb93a386Sopenharmony_ci    // Test that mixed types can be used where they make sense.  Mostly about ergonomics.
119cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(float4{1,2,3,4} < 5));
120cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all( byte4{1,2,3,4} < 5));
121cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(  int4{1,2,3,4} < 5.0f));
122cb93a386Sopenharmony_ci    float4 five = 5;
123cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(five == 5.0f));
124cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(five == 5));
125cb93a386Sopenharmony_ci
126cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(max(2, min(float4{1,2,3,4}, 3)) == float4{2,2,3,3}));
127cb93a386Sopenharmony_ci
128cb93a386Sopenharmony_ci    for (int x = 0; x < 256; x++)
129cb93a386Sopenharmony_ci    for (int y = 0; y < 256; y++) {
130cb93a386Sopenharmony_ci        uint8_t want = (uint8_t)( 255*(x/255.0 * y/255.0) + 0.5 );
131cb93a386Sopenharmony_ci
132cb93a386Sopenharmony_ci        {
133cb93a386Sopenharmony_ci            uint8_t got = skvx::div255(skvx::Vec<8, uint16_t>(x) *
134cb93a386Sopenharmony_ci                                       skvx::Vec<8, uint16_t>(y) )[0];
135cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, got == want);
136cb93a386Sopenharmony_ci        }
137cb93a386Sopenharmony_ci
138cb93a386Sopenharmony_ci        {
139cb93a386Sopenharmony_ci            uint8_t got = skvx::approx_scale(skvx::Vec<8,uint8_t>(x),
140cb93a386Sopenharmony_ci                                             skvx::Vec<8,uint8_t>(y))[0];
141cb93a386Sopenharmony_ci
142cb93a386Sopenharmony_ci            REPORTER_ASSERT(r, got == want-1 ||
143cb93a386Sopenharmony_ci                               got == want   ||
144cb93a386Sopenharmony_ci                               got == want+1);
145cb93a386Sopenharmony_ci            if (x == 0 || y == 0 || x == 255 || y == 255) {
146cb93a386Sopenharmony_ci                REPORTER_ASSERT(r, got == want);
147cb93a386Sopenharmony_ci            }
148cb93a386Sopenharmony_ci        }
149cb93a386Sopenharmony_ci    }
150cb93a386Sopenharmony_ci
151cb93a386Sopenharmony_ci    for (int x = 0; x < 256; x++)
152cb93a386Sopenharmony_ci    for (int y = 0; y < 256; y++) {
153cb93a386Sopenharmony_ci        uint16_t xy = x*y;
154cb93a386Sopenharmony_ci
155cb93a386Sopenharmony_ci        // Make sure to cover implementation cases N=8, N<8, and N>8.
156cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, all(mull(byte2 (x), byte2 (y)) == xy));
157cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, all(mull(byte4 (x), byte4 (y)) == xy));
158cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, all(mull(byte8 (x), byte8 (y)) == xy));
159cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, all(mull(byte16(x), byte16(y)) == xy));
160cb93a386Sopenharmony_ci    }
161cb93a386Sopenharmony_ci
162cb93a386Sopenharmony_ci    {
163cb93a386Sopenharmony_ci        // Intentionally not testing -0, as we don't care if it's 0x0000 or 0x8000.
164cb93a386Sopenharmony_ci        float8 fs = {+0.0f,+0.5f,+1.0f,+2.0f,
165cb93a386Sopenharmony_ci                     -4.0f,-0.5f,-1.0f,-2.0f};
166cb93a386Sopenharmony_ci        skvx::Vec<8,uint16_t> hs = {0x0000,0x3800,0x3c00,0x4000,
167cb93a386Sopenharmony_ci                                    0xc400,0xb800,0xbc00,0xc000};
168cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, all(skvx::  to_half(fs) == hs));
169cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, all(skvx::from_half(hs) == fs));
170cb93a386Sopenharmony_ci    }
171cb93a386Sopenharmony_ci}
172cb93a386Sopenharmony_ci
173cb93a386Sopenharmony_ciDEF_TEST(SkVx_xy, r) {
174cb93a386Sopenharmony_ci    float2 f = float2(1,2);
175cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float2{1,2}));
176cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.x() == 1);
177cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.y() == 2);
178cb93a386Sopenharmony_ci    f.y() = 9;
179cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float2{1,9}));
180cb93a386Sopenharmony_ci    f.x() = 0;
181cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float2(0,9)));
182cb93a386Sopenharmony_ci    f[0] = 8;
183cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.x() == 8);
184cb93a386Sopenharmony_ci    f[1] = 6;
185cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.y() == 6);
186cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float2(8,6)));
187cb93a386Sopenharmony_ci    f = f.yx();
188cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float2(6,8)));
189cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, skvx::bit_pun<SkPoint>(f) == SkPoint::Make(6,8));
190cb93a386Sopenharmony_ci    SkPoint p;
191cb93a386Sopenharmony_ci    f.store(&p);
192cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, p == SkPoint::Make(6,8));
193cb93a386Sopenharmony_ci    f.yx().store(&p);
194cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, p == SkPoint::Make(8,6));
195cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f.xyxy() == float4(6,8,6,8)));
196cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f.xyxy() == float4(f,f)));
197cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::join(f,f) == f.xyxy()));
198cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::join(f.yx(),f) == float4(f.y(),f.x(),f)));
199cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::join(f.yx(),f) == float4(f.yx(),f.x(),f.y())));
200cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::join(f,f.yx()) == float4(f.x(),f.y(),f.yx())));
201cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::join(f.yx(),f.yx()) == float4(f.yx(),f.yx())));
202cb93a386Sopenharmony_ci}
203cb93a386Sopenharmony_ci
204cb93a386Sopenharmony_ciDEF_TEST(SkVx_xyzw, r) {
205cb93a386Sopenharmony_ci    float4 f = float4{1,2,3,4};
206cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float4(1,2,3,4)));
207cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float4(1,2,float2(3,4))));
208cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float4(float2(1,2),3,4)));
209cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float4(float2(1,2),float2(3,4))));
210cb93a386Sopenharmony_ci    f.xy() = float2(9,8);
211cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float4(9,8,3,4)));
212cb93a386Sopenharmony_ci    f.zw().x() = 7;
213cb93a386Sopenharmony_ci    f.zw().y() = 6;
214cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float4(9,8,7,6)));
215cb93a386Sopenharmony_ci    f.x() = 5;
216cb93a386Sopenharmony_ci    f.y() = 4;
217cb93a386Sopenharmony_ci    f.z() = 3;
218cb93a386Sopenharmony_ci    f.w() = 2;
219cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float4(5,4,3,2)));
220cb93a386Sopenharmony_ci    f[0] = 0;
221cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.x() == 0);
222cb93a386Sopenharmony_ci    f[1] = 1;
223cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.y() == 1);
224cb93a386Sopenharmony_ci    f[2] = 2;
225cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.z() == 2);
226cb93a386Sopenharmony_ci    f[3] = 3;
227cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.w() == 3);
228cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, skvx::all(f.xy() == float2(0,1)));
229cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, skvx::all(f.zw() == float2{2,3}));
230cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f == float4(0,1,2,3)));
231cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f.yxwz().lo == skvx::shuffle<1,0>(f)));
232cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f.yxwz().hi == skvx::shuffle<3,2>(f)));
233cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f.zwxy().lo.lo == f.z()));
234cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f.zwxy().lo.hi == f.w()));
235cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f.zwxy().hi.lo == f.x()));
236cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(f.zwxy().hi.hi == f.y()));
237cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.yxwz().lo.lo.val == f.y());
238cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.yxwz().lo.hi.val == f.x());
239cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.yxwz().hi.lo.val == f.w());
240cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, f.yxwz().hi.hi.val == f.z());
241cb93a386Sopenharmony_ci
242cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::naive_if_then_else(int2(0,~0),
243cb93a386Sopenharmony_ci                                                    skvx::shuffle<3,2>(float4(0,1,2,3)),
244cb93a386Sopenharmony_ci                                                    float4(4,5,6,7).xy()) == float2(4,2)));
245cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::if_then_else(int2(0,~0),
246cb93a386Sopenharmony_ci                                              skvx::shuffle<3,2>(float4(0,1,2,3)),
247cb93a386Sopenharmony_ci                                              float4(4,5,6,7).xy()) == float2(4,2)));
248cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::naive_if_then_else(int2(0,~0).xyxy(),
249cb93a386Sopenharmony_ci                                                    float4(0,1,2,3).zwxy(),
250cb93a386Sopenharmony_ci                                                    float4(4,5,6,7)) == float4(4,3,6,1)));
251cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::if_then_else(int2(0,~0).xyxy(),
252cb93a386Sopenharmony_ci                                              float4(0,1,2,3).zwxy(),
253cb93a386Sopenharmony_ci                                              float4(4,5,6,7)) == float4(4,3,6,1)));
254cb93a386Sopenharmony_ci
255cb93a386Sopenharmony_ci    REPORTER_ASSERT(r, all(skvx::pin(float4(0,1,2,3).yxwz(),
256cb93a386Sopenharmony_ci                                     float2(1).xyxy(),
257cb93a386Sopenharmony_ci                                     float2(2).xyxy()) == float4(1,1,2,2)));
258cb93a386Sopenharmony_ci}
259cb93a386Sopenharmony_ci
260cb93a386Sopenharmony_cistatic bool check_approx_acos(skiatest::Reporter* r, float x, float approx_acos_x) {
261cb93a386Sopenharmony_ci    float acosf_x = acosf(x);
262cb93a386Sopenharmony_ci    float error = acosf_x - approx_acos_x;
263cb93a386Sopenharmony_ci    if (!(fabsf(error) <= SKVX_APPROX_ACOS_MAX_ERROR)) {
264cb93a386Sopenharmony_ci        ERRORF(r, "Larger-than-expected error from skvx::approx_acos\n"
265cb93a386Sopenharmony_ci                  "  x=              %f\n"
266cb93a386Sopenharmony_ci                  "  approx_acos_x=  %f  (%f degrees\n"
267cb93a386Sopenharmony_ci                  "  acosf_x=        %f  (%f degrees\n"
268cb93a386Sopenharmony_ci                  "  error=          %f  (%f degrees)\n"
269cb93a386Sopenharmony_ci                  "  tolerance=      %f  (%f degrees)\n\n",
270cb93a386Sopenharmony_ci                  x, approx_acos_x, SkRadiansToDegrees(approx_acos_x), acosf_x,
271cb93a386Sopenharmony_ci                  SkRadiansToDegrees(acosf_x), error, SkRadiansToDegrees(error),
272cb93a386Sopenharmony_ci                  SKVX_APPROX_ACOS_MAX_ERROR, SkRadiansToDegrees(SKVX_APPROX_ACOS_MAX_ERROR));
273cb93a386Sopenharmony_ci        return false;
274cb93a386Sopenharmony_ci    }
275cb93a386Sopenharmony_ci    return true;
276cb93a386Sopenharmony_ci}
277cb93a386Sopenharmony_ci
278cb93a386Sopenharmony_ciDEF_TEST(SkVx_approx_acos, r) {
279cb93a386Sopenharmony_ci    float4 boundaries = skvx::approx_acos(float4{-1, 0, 1, 0});
280cb93a386Sopenharmony_ci    check_approx_acos(r, -1, boundaries[0]);
281cb93a386Sopenharmony_ci    check_approx_acos(r, 0, boundaries[1]);
282cb93a386Sopenharmony_ci    check_approx_acos(r, +1, boundaries[2]);
283cb93a386Sopenharmony_ci
284cb93a386Sopenharmony_ci    // Select a distribution of starting points around which to begin testing approx_acos. These
285cb93a386Sopenharmony_ci    // fall roughly around the known minimum and maximum errors. No need to include -1, 0, or 1
286cb93a386Sopenharmony_ci    // since those were just tested above. (Those are tricky because 0 is an inflection and the
287cb93a386Sopenharmony_ci    // derivative is infinite at 1 and -1.)
288cb93a386Sopenharmony_ci    float8 x = {-.99f, -.8f, -.4f, -.2f, .2f, .4f, .8f, .99f};
289cb93a386Sopenharmony_ci
290cb93a386Sopenharmony_ci    // Converge at the various local minima and maxima of "approx_acos(x) - cosf(x)" and verify that
291cb93a386Sopenharmony_ci    // approx_acos is always within "kTolerance" degrees of the expected answer.
292cb93a386Sopenharmony_ci    float8 err_;
293cb93a386Sopenharmony_ci    for (int iter = 0; iter < 10; ++iter) {
294cb93a386Sopenharmony_ci        // Run our approximate inverse cosine approximation.
295cb93a386Sopenharmony_ci        auto approx_acos_x = skvx::approx_acos(x);
296cb93a386Sopenharmony_ci
297cb93a386Sopenharmony_ci        // Find d/dx(error)
298cb93a386Sopenharmony_ci        //    = d/dx(approx_acos(x) - acos(x))
299cb93a386Sopenharmony_ci        //    = (f'g - fg')/gg + 1/sqrt(1 - x^2), [where f = bx^3 + ax, g = dx^4 + cx^2 + 1]
300cb93a386Sopenharmony_ci        float8 xx = x*x;
301cb93a386Sopenharmony_ci        float8 a = -0.939115566365855f;
302cb93a386Sopenharmony_ci        float8 b =  0.9217841528914573f;
303cb93a386Sopenharmony_ci        float8 c = -1.2845906244690837f;
304cb93a386Sopenharmony_ci        float8 d =  0.295624144969963174f;
305cb93a386Sopenharmony_ci        float8 f = (b*xx + a)*x;
306cb93a386Sopenharmony_ci        float8 f_ = 3*b*xx + a;
307cb93a386Sopenharmony_ci        float8 g = (d*xx + c)*xx + 1;
308cb93a386Sopenharmony_ci        float8 g_ = (4*d*xx + 2*c)*x;
309cb93a386Sopenharmony_ci        float8 gg = g*g;
310cb93a386Sopenharmony_ci        float8 q = skvx::sqrt(1 - xx);
311cb93a386Sopenharmony_ci        err_ = (f_*g - f*g_)/gg + 1/q;
312cb93a386Sopenharmony_ci
313cb93a386Sopenharmony_ci        // Find d^2/dx^2(error)
314cb93a386Sopenharmony_ci        //    = ((f''g - fg'')g^2 - (f'g - fg')2gg') / g^4 + x(1 - x^2)^(-3/2)
315cb93a386Sopenharmony_ci        //    = ((f''g - fg'')g - (f'g - fg')2g') / g^3 + x(1 - x^2)^(-3/2)
316cb93a386Sopenharmony_ci        float8 f__ = 6*b*x;
317cb93a386Sopenharmony_ci        float8 g__ = 12*d*xx + 2*c;
318cb93a386Sopenharmony_ci        float8 err__ = ((f__*g - f*g__)*g - (f_*g - f*g_)*2*g_) / (gg*g) + x/((1 - xx)*q);
319cb93a386Sopenharmony_ci
320cb93a386Sopenharmony_ci#if 0
321cb93a386Sopenharmony_ci        SkDebugf("\n\niter %i\n", iter);
322cb93a386Sopenharmony_ci#endif
323cb93a386Sopenharmony_ci        // Ensure each lane's approximation is within maximum error.
324cb93a386Sopenharmony_ci        for (int j = 0; j < 8; ++j) {
325cb93a386Sopenharmony_ci#if 0
326cb93a386Sopenharmony_ci            SkDebugf("x=%f  err=%f  err'=%f  err''=%f\n",
327cb93a386Sopenharmony_ci                     x[j], SkRadiansToDegrees(skvx::approx_acos_x[j] - acosf(x[j])),
328cb93a386Sopenharmony_ci                     SkRadiansToDegrees(err_[j]), SkRadiansToDegrees(err__[j]));
329cb93a386Sopenharmony_ci#endif
330cb93a386Sopenharmony_ci            if (!check_approx_acos(r, x[j], approx_acos_x[j])) {
331cb93a386Sopenharmony_ci                return;
332cb93a386Sopenharmony_ci            }
333cb93a386Sopenharmony_ci        }
334cb93a386Sopenharmony_ci
335cb93a386Sopenharmony_ci        // Use Newton's method to update the x values to locations closer to their local minimum or
336cb93a386Sopenharmony_ci        // maximum. (This is where d/dx(error) == 0.)
337cb93a386Sopenharmony_ci        x -= err_/err__;
338cb93a386Sopenharmony_ci        x = skvx::pin<8,float>(x, -.99f, .99f);
339cb93a386Sopenharmony_ci    }
340cb93a386Sopenharmony_ci
341cb93a386Sopenharmony_ci    // Ensure each lane converged to a local minimum or maximum.
342cb93a386Sopenharmony_ci    for (int j = 0; j < 8; ++j) {
343cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, SkScalarNearlyZero(err_[j]));
344cb93a386Sopenharmony_ci    }
345cb93a386Sopenharmony_ci
346cb93a386Sopenharmony_ci    // Make sure we found all the actual known locations of local min/max error.
347cb93a386Sopenharmony_ci    for (float knownRoot : {-0.983536f, -0.867381f, -0.410923f, 0.410923f, 0.867381f, 0.983536f}) {
348cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, skvx::any(skvx::abs(x - knownRoot) < SK_ScalarNearlyZero));
349cb93a386Sopenharmony_ci    }
350cb93a386Sopenharmony_ci}
351cb93a386Sopenharmony_ci
352cb93a386Sopenharmony_citemplate<int N, typename T> void check_strided_loads(skiatest::Reporter* r) {
353cb93a386Sopenharmony_ci    using Vec = skvx::Vec<N,T>;
354cb93a386Sopenharmony_ci    T values[N*4];
355cb93a386Sopenharmony_ci    std::iota(values, values + N*4, 0);
356cb93a386Sopenharmony_ci    Vec a, b, c, d;
357cb93a386Sopenharmony_ci    skvx::strided_load2(values, a, b);
358cb93a386Sopenharmony_ci    for (int i = 0; i < N; ++i) {
359cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, a[i] == values[i*2]);
360cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, b[i] == values[i*2 + 1]);
361cb93a386Sopenharmony_ci    }
362cb93a386Sopenharmony_ci    skvx::strided_load4(values, a, b, c, d);
363cb93a386Sopenharmony_ci    for (int i = 0; i < N; ++i) {
364cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, a[i] == values[i*4]);
365cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, b[i] == values[i*4 + 1]);
366cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, c[i] == values[i*4 + 2]);
367cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, d[i] == values[i*4 + 3]);
368cb93a386Sopenharmony_ci    }
369cb93a386Sopenharmony_ci}
370cb93a386Sopenharmony_ci
371cb93a386Sopenharmony_citemplate<typename T> void check_strided_loads(skiatest::Reporter* r) {
372cb93a386Sopenharmony_ci    check_strided_loads<1,T>(r);
373cb93a386Sopenharmony_ci    check_strided_loads<2,T>(r);
374cb93a386Sopenharmony_ci    check_strided_loads<4,T>(r);
375cb93a386Sopenharmony_ci    check_strided_loads<8,T>(r);
376cb93a386Sopenharmony_ci    check_strided_loads<16,T>(r);
377cb93a386Sopenharmony_ci    check_strided_loads<32,T>(r);
378cb93a386Sopenharmony_ci}
379cb93a386Sopenharmony_ci
380cb93a386Sopenharmony_ciDEF_TEST(SkVx_strided_loads, r) {
381cb93a386Sopenharmony_ci    check_strided_loads<uint32_t>(r);
382cb93a386Sopenharmony_ci    check_strided_loads<uint16_t>(r);
383cb93a386Sopenharmony_ci    check_strided_loads<uint8_t>(r);
384cb93a386Sopenharmony_ci    check_strided_loads<int32_t>(r);
385cb93a386Sopenharmony_ci    check_strided_loads<int16_t>(r);
386cb93a386Sopenharmony_ci    check_strided_loads<int8_t>(r);
387cb93a386Sopenharmony_ci    check_strided_loads<float>(r);
388cb93a386Sopenharmony_ci}
389cb93a386Sopenharmony_ci
390cb93a386Sopenharmony_ciDEF_TEST(SkVM_ScaledDividerU32, r) {
391cb93a386Sopenharmony_ci    static constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max();
392cb93a386Sopenharmony_ci
393cb93a386Sopenharmony_ci    auto errorBounds = [&](uint32_t actual, uint32_t expected) {
394cb93a386Sopenharmony_ci        uint32_t lowerLimit = expected == 0 ? 0 : expected - 1,
395cb93a386Sopenharmony_ci                 upperLimit = expected == kMax ? kMax : expected + 1;
396cb93a386Sopenharmony_ci        return lowerLimit <= actual && actual <= upperLimit;
397cb93a386Sopenharmony_ci    };
398cb93a386Sopenharmony_ci
399cb93a386Sopenharmony_ci    auto test = [&](uint32_t denom) {
400cb93a386Sopenharmony_ci        // half == 1 so, the max to check is kMax-1
401cb93a386Sopenharmony_ci        skvx::ScaledDividerU32 d(denom);
402cb93a386Sopenharmony_ci        uint32_t maxCheck = static_cast<uint32_t>(floor((double)(kMax - d.half()) / denom + 0.5));
403cb93a386Sopenharmony_ci        REPORTER_ASSERT(r, errorBounds(d.divide((kMax))[0], maxCheck));
404cb93a386Sopenharmony_ci        for (uint32_t i = 0; i < kMax - d.half(); i += 65535) {
405cb93a386Sopenharmony_ci            uint32_t expected = static_cast<uint32_t>(floor((double)i / denom + 0.5));
406cb93a386Sopenharmony_ci            auto actual = d.divide(i + d.half());
407cb93a386Sopenharmony_ci            if (!errorBounds(actual[0], expected)) {
408cb93a386Sopenharmony_ci                SkDebugf("i: %u expected: %u actual: %u\n", i, expected, actual[0]);
409cb93a386Sopenharmony_ci            }
410cb93a386Sopenharmony_ci            // Make sure all the lanes are the same.
411cb93a386Sopenharmony_ci            for (int e = 1; e < 4; e++) {
412cb93a386Sopenharmony_ci                SkASSERT(actual[0] == actual[e]);
413cb93a386Sopenharmony_ci            }
414cb93a386Sopenharmony_ci        }
415cb93a386Sopenharmony_ci    };
416cb93a386Sopenharmony_ci
417cb93a386Sopenharmony_ci    test(2);
418cb93a386Sopenharmony_ci    test(3);
419cb93a386Sopenharmony_ci    test(5);
420cb93a386Sopenharmony_ci    test(7);
421cb93a386Sopenharmony_ci    test(27);
422cb93a386Sopenharmony_ci    test(65'535);
423cb93a386Sopenharmony_ci    test(15'485'863);
424cb93a386Sopenharmony_ci    test(512'927'377);
425cb93a386Sopenharmony_ci}
426