1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci * Copyright 2019 Google Inc. 3cb93a386Sopenharmony_ci * 4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci * found in the LICENSE file. 6cb93a386Sopenharmony_ci */ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#include "include/private/SkVx.h" 9cb93a386Sopenharmony_ci#include "tests/Test.h" 10cb93a386Sopenharmony_ci#include <numeric> 11cb93a386Sopenharmony_ci 12cb93a386Sopenharmony_ciusing float2 = skvx::Vec<2,float>; 13cb93a386Sopenharmony_ciusing float4 = skvx::Vec<4,float>; 14cb93a386Sopenharmony_ciusing float8 = skvx::Vec<8,float>; 15cb93a386Sopenharmony_ci 16cb93a386Sopenharmony_ciusing double2 = skvx::Vec<2,double>; 17cb93a386Sopenharmony_ciusing double4 = skvx::Vec<4,double>; 18cb93a386Sopenharmony_ciusing double8 = skvx::Vec<8,double>; 19cb93a386Sopenharmony_ci 20cb93a386Sopenharmony_ciusing byte2 = skvx::Vec< 2,uint8_t>; 21cb93a386Sopenharmony_ciusing byte4 = skvx::Vec< 4,uint8_t>; 22cb93a386Sopenharmony_ciusing byte8 = skvx::Vec< 8,uint8_t>; 23cb93a386Sopenharmony_ciusing byte16 = skvx::Vec<16,uint8_t>; 24cb93a386Sopenharmony_ci 25cb93a386Sopenharmony_ciusing int2 = skvx::Vec<2,int32_t>; 26cb93a386Sopenharmony_ciusing int4 = skvx::Vec<4,int32_t>; 27cb93a386Sopenharmony_ciusing int8 = skvx::Vec<8,int32_t>; 28cb93a386Sopenharmony_ci 29cb93a386Sopenharmony_ciusing uint2 = skvx::Vec<2,uint32_t>; 30cb93a386Sopenharmony_ciusing uint4 = skvx::Vec<4,uint32_t>; 31cb93a386Sopenharmony_ciusing uint8 = skvx::Vec<8,uint32_t>; 32cb93a386Sopenharmony_ci 33cb93a386Sopenharmony_ciusing long2 = skvx::Vec<2,int64_t>; 34cb93a386Sopenharmony_ciusing long4 = skvx::Vec<4,int64_t>; 35cb93a386Sopenharmony_ciusing long8 = skvx::Vec<8,int64_t>; 36cb93a386Sopenharmony_ci 37cb93a386Sopenharmony_ciDEF_TEST(SkVx, r) { 38cb93a386Sopenharmony_ci static_assert(sizeof(float2) == 8, ""); 39cb93a386Sopenharmony_ci static_assert(sizeof(float4) == 16, ""); 40cb93a386Sopenharmony_ci static_assert(sizeof(float8) == 32, ""); 41cb93a386Sopenharmony_ci 42cb93a386Sopenharmony_ci static_assert(sizeof(byte2) == 2, ""); 43cb93a386Sopenharmony_ci static_assert(sizeof(byte4) == 4, ""); 44cb93a386Sopenharmony_ci static_assert(sizeof(byte8) == 8, ""); 45cb93a386Sopenharmony_ci 46cb93a386Sopenharmony_ci { 47cb93a386Sopenharmony_ci int4 mask = float4{1,2,3,4} < float4{1,2,4,8}; 48cb93a386Sopenharmony_ci REPORTER_ASSERT(r, mask[0] == int32_t( 0)); 49cb93a386Sopenharmony_ci REPORTER_ASSERT(r, mask[1] == int32_t( 0)); 50cb93a386Sopenharmony_ci REPORTER_ASSERT(r, mask[2] == int32_t(-1)); 51cb93a386Sopenharmony_ci REPORTER_ASSERT(r, mask[3] == int32_t(-1)); 52cb93a386Sopenharmony_ci 53cb93a386Sopenharmony_ci REPORTER_ASSERT(r, any(mask)); 54cb93a386Sopenharmony_ci REPORTER_ASSERT(r, !all(mask)); 55cb93a386Sopenharmony_ci } 56cb93a386Sopenharmony_ci 57cb93a386Sopenharmony_ci { 58cb93a386Sopenharmony_ci long4 mask = double4{1,2,3,4} < double4{1,2,4,8}; 59cb93a386Sopenharmony_ci REPORTER_ASSERT(r, mask[0] == int64_t( 0)); 60cb93a386Sopenharmony_ci REPORTER_ASSERT(r, mask[1] == int64_t( 0)); 61cb93a386Sopenharmony_ci REPORTER_ASSERT(r, mask[2] == int64_t(-1)); 62cb93a386Sopenharmony_ci REPORTER_ASSERT(r, mask[3] == int64_t(-1)); 63cb93a386Sopenharmony_ci 64cb93a386Sopenharmony_ci REPORTER_ASSERT(r, any(mask)); 65cb93a386Sopenharmony_ci REPORTER_ASSERT(r, !all(mask)); 66cb93a386Sopenharmony_ci } 67cb93a386Sopenharmony_ci 68cb93a386Sopenharmony_ci REPORTER_ASSERT(r, min(float4{1,2,3,4}) == 1); 69cb93a386Sopenharmony_ci REPORTER_ASSERT(r, max(float4{1,2,3,4}) == 4); 70cb93a386Sopenharmony_ci 71cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4{1,2,3,4,5} == int4{1,2,3,4})); 72cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4{1,2,3,4} == int4{1,2,3,4})); 73cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4{1,2,3} == int4{1,2,3,0})); 74cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4{1,2} == int4{1,2,0,0})); 75cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4{1} == int4{1,0,0,0})); 76cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4(1) == int4{1,1,1,1})); 77cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4{} == int4{0,0,0,0})); 78cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4() == int4{0,0,0,0})); 79cb93a386Sopenharmony_ci 80cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4{1,2,2,1} == min(int4{1,2,3,4}, int4{4,3,2,1}))); 81cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(int4{4,3,3,4} == max(int4{1,2,3,4}, int4{4,3,2,1}))); 82cb93a386Sopenharmony_ci 83cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(if_then_else(float4{1,2,3,2} <= float4{2,2,2,2}, float4(42), float4(47)) 84cb93a386Sopenharmony_ci == float4{42,42,47,42})); 85cb93a386Sopenharmony_ci 86cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(floor(float4{-1.5f,1.5f,1.0f,-1.0f}) == float4{-2.0f,1.0f,1.0f,-1.0f})); 87cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all( ceil(float4{-1.5f,1.5f,1.0f,-1.0f}) == float4{-1.0f,2.0f,1.0f,-1.0f})); 88cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(trunc(float4{-1.5f,1.5f,1.0f,-1.0f}) == float4{-1.0f,1.0f,1.0f,-1.0f})); 89cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(round(float4{-1.5f,1.5f,1.0f,-1.0f}) == float4{-2.0f,2.0f,1.0f,-1.0f})); 90cb93a386Sopenharmony_ci 91cb93a386Sopenharmony_ci 92cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(abs(float4{-2,-1,0,1}) == float4{2,1,0,1})); 93cb93a386Sopenharmony_ci 94cb93a386Sopenharmony_ci // TODO(mtklein): these tests could be made less loose. 95cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all( sqrt(float4{2,3,4,5}) < float4{2,2,3,3})); 96cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all( sqrt(float2{2,3}) < float2{2,2})); 97cb93a386Sopenharmony_ci 98cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::cast<int>(float4{-1.5f,0.5f,1.0f,1.5f}) == int4{-1,0,1,1})); 99cb93a386Sopenharmony_ci 100cb93a386Sopenharmony_ci float buf[] = {1,2,3,4,5,6}; 101cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(float4::Load(buf) == float4{1,2,3,4})); 102cb93a386Sopenharmony_ci float4{2,3,4,5}.store(buf); 103cb93a386Sopenharmony_ci REPORTER_ASSERT(r, buf[0] == 2 104cb93a386Sopenharmony_ci && buf[1] == 3 105cb93a386Sopenharmony_ci && buf[2] == 4 106cb93a386Sopenharmony_ci && buf[3] == 5 107cb93a386Sopenharmony_ci && buf[4] == 5 108cb93a386Sopenharmony_ci && buf[5] == 6); 109cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(float4::Load(buf+0) == float4{2,3,4,5})); 110cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(float4::Load(buf+2) == float4{4,5,5,6})); 111cb93a386Sopenharmony_ci 112cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::shuffle<2,1,0,3> (float4{1,2,3,4}) == float4{3,2,1,4})); 113cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::shuffle<2,1> (float4{1,2,3,4}) == float2{3,2})); 114cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::shuffle<3,3,3,3> (float4{1,2,3,4}) == float4{4,4,4,4})); 115cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::shuffle<2,1,2,1,2,1,2,1>(float4{1,2,3,4}) 116cb93a386Sopenharmony_ci == float8{3,2,3,2,3,2,3,2})); 117cb93a386Sopenharmony_ci 118cb93a386Sopenharmony_ci // Test that mixed types can be used where they make sense. Mostly about ergonomics. 119cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(float4{1,2,3,4} < 5)); 120cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all( byte4{1,2,3,4} < 5)); 121cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all( int4{1,2,3,4} < 5.0f)); 122cb93a386Sopenharmony_ci float4 five = 5; 123cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(five == 5.0f)); 124cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(five == 5)); 125cb93a386Sopenharmony_ci 126cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(max(2, min(float4{1,2,3,4}, 3)) == float4{2,2,3,3})); 127cb93a386Sopenharmony_ci 128cb93a386Sopenharmony_ci for (int x = 0; x < 256; x++) 129cb93a386Sopenharmony_ci for (int y = 0; y < 256; y++) { 130cb93a386Sopenharmony_ci uint8_t want = (uint8_t)( 255*(x/255.0 * y/255.0) + 0.5 ); 131cb93a386Sopenharmony_ci 132cb93a386Sopenharmony_ci { 133cb93a386Sopenharmony_ci uint8_t got = skvx::div255(skvx::Vec<8, uint16_t>(x) * 134cb93a386Sopenharmony_ci skvx::Vec<8, uint16_t>(y) )[0]; 135cb93a386Sopenharmony_ci REPORTER_ASSERT(r, got == want); 136cb93a386Sopenharmony_ci } 137cb93a386Sopenharmony_ci 138cb93a386Sopenharmony_ci { 139cb93a386Sopenharmony_ci uint8_t got = skvx::approx_scale(skvx::Vec<8,uint8_t>(x), 140cb93a386Sopenharmony_ci skvx::Vec<8,uint8_t>(y))[0]; 141cb93a386Sopenharmony_ci 142cb93a386Sopenharmony_ci REPORTER_ASSERT(r, got == want-1 || 143cb93a386Sopenharmony_ci got == want || 144cb93a386Sopenharmony_ci got == want+1); 145cb93a386Sopenharmony_ci if (x == 0 || y == 0 || x == 255 || y == 255) { 146cb93a386Sopenharmony_ci REPORTER_ASSERT(r, got == want); 147cb93a386Sopenharmony_ci } 148cb93a386Sopenharmony_ci } 149cb93a386Sopenharmony_ci } 150cb93a386Sopenharmony_ci 151cb93a386Sopenharmony_ci for (int x = 0; x < 256; x++) 152cb93a386Sopenharmony_ci for (int y = 0; y < 256; y++) { 153cb93a386Sopenharmony_ci uint16_t xy = x*y; 154cb93a386Sopenharmony_ci 155cb93a386Sopenharmony_ci // Make sure to cover implementation cases N=8, N<8, and N>8. 156cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(mull(byte2 (x), byte2 (y)) == xy)); 157cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(mull(byte4 (x), byte4 (y)) == xy)); 158cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(mull(byte8 (x), byte8 (y)) == xy)); 159cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(mull(byte16(x), byte16(y)) == xy)); 160cb93a386Sopenharmony_ci } 161cb93a386Sopenharmony_ci 162cb93a386Sopenharmony_ci { 163cb93a386Sopenharmony_ci // Intentionally not testing -0, as we don't care if it's 0x0000 or 0x8000. 164cb93a386Sopenharmony_ci float8 fs = {+0.0f,+0.5f,+1.0f,+2.0f, 165cb93a386Sopenharmony_ci -4.0f,-0.5f,-1.0f,-2.0f}; 166cb93a386Sopenharmony_ci skvx::Vec<8,uint16_t> hs = {0x0000,0x3800,0x3c00,0x4000, 167cb93a386Sopenharmony_ci 0xc400,0xb800,0xbc00,0xc000}; 168cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx:: to_half(fs) == hs)); 169cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::from_half(hs) == fs)); 170cb93a386Sopenharmony_ci } 171cb93a386Sopenharmony_ci} 172cb93a386Sopenharmony_ci 173cb93a386Sopenharmony_ciDEF_TEST(SkVx_xy, r) { 174cb93a386Sopenharmony_ci float2 f = float2(1,2); 175cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float2{1,2})); 176cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.x() == 1); 177cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.y() == 2); 178cb93a386Sopenharmony_ci f.y() = 9; 179cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float2{1,9})); 180cb93a386Sopenharmony_ci f.x() = 0; 181cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float2(0,9))); 182cb93a386Sopenharmony_ci f[0] = 8; 183cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.x() == 8); 184cb93a386Sopenharmony_ci f[1] = 6; 185cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.y() == 6); 186cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float2(8,6))); 187cb93a386Sopenharmony_ci f = f.yx(); 188cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float2(6,8))); 189cb93a386Sopenharmony_ci REPORTER_ASSERT(r, skvx::bit_pun<SkPoint>(f) == SkPoint::Make(6,8)); 190cb93a386Sopenharmony_ci SkPoint p; 191cb93a386Sopenharmony_ci f.store(&p); 192cb93a386Sopenharmony_ci REPORTER_ASSERT(r, p == SkPoint::Make(6,8)); 193cb93a386Sopenharmony_ci f.yx().store(&p); 194cb93a386Sopenharmony_ci REPORTER_ASSERT(r, p == SkPoint::Make(8,6)); 195cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f.xyxy() == float4(6,8,6,8))); 196cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f.xyxy() == float4(f,f))); 197cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::join(f,f) == f.xyxy())); 198cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::join(f.yx(),f) == float4(f.y(),f.x(),f))); 199cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::join(f.yx(),f) == float4(f.yx(),f.x(),f.y()))); 200cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::join(f,f.yx()) == float4(f.x(),f.y(),f.yx()))); 201cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::join(f.yx(),f.yx()) == float4(f.yx(),f.yx()))); 202cb93a386Sopenharmony_ci} 203cb93a386Sopenharmony_ci 204cb93a386Sopenharmony_ciDEF_TEST(SkVx_xyzw, r) { 205cb93a386Sopenharmony_ci float4 f = float4{1,2,3,4}; 206cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float4(1,2,3,4))); 207cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float4(1,2,float2(3,4)))); 208cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float4(float2(1,2),3,4))); 209cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float4(float2(1,2),float2(3,4)))); 210cb93a386Sopenharmony_ci f.xy() = float2(9,8); 211cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float4(9,8,3,4))); 212cb93a386Sopenharmony_ci f.zw().x() = 7; 213cb93a386Sopenharmony_ci f.zw().y() = 6; 214cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float4(9,8,7,6))); 215cb93a386Sopenharmony_ci f.x() = 5; 216cb93a386Sopenharmony_ci f.y() = 4; 217cb93a386Sopenharmony_ci f.z() = 3; 218cb93a386Sopenharmony_ci f.w() = 2; 219cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float4(5,4,3,2))); 220cb93a386Sopenharmony_ci f[0] = 0; 221cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.x() == 0); 222cb93a386Sopenharmony_ci f[1] = 1; 223cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.y() == 1); 224cb93a386Sopenharmony_ci f[2] = 2; 225cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.z() == 2); 226cb93a386Sopenharmony_ci f[3] = 3; 227cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.w() == 3); 228cb93a386Sopenharmony_ci REPORTER_ASSERT(r, skvx::all(f.xy() == float2(0,1))); 229cb93a386Sopenharmony_ci REPORTER_ASSERT(r, skvx::all(f.zw() == float2{2,3})); 230cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f == float4(0,1,2,3))); 231cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f.yxwz().lo == skvx::shuffle<1,0>(f))); 232cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f.yxwz().hi == skvx::shuffle<3,2>(f))); 233cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f.zwxy().lo.lo == f.z())); 234cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f.zwxy().lo.hi == f.w())); 235cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f.zwxy().hi.lo == f.x())); 236cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(f.zwxy().hi.hi == f.y())); 237cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.yxwz().lo.lo.val == f.y()); 238cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.yxwz().lo.hi.val == f.x()); 239cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.yxwz().hi.lo.val == f.w()); 240cb93a386Sopenharmony_ci REPORTER_ASSERT(r, f.yxwz().hi.hi.val == f.z()); 241cb93a386Sopenharmony_ci 242cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::naive_if_then_else(int2(0,~0), 243cb93a386Sopenharmony_ci skvx::shuffle<3,2>(float4(0,1,2,3)), 244cb93a386Sopenharmony_ci float4(4,5,6,7).xy()) == float2(4,2))); 245cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::if_then_else(int2(0,~0), 246cb93a386Sopenharmony_ci skvx::shuffle<3,2>(float4(0,1,2,3)), 247cb93a386Sopenharmony_ci float4(4,5,6,7).xy()) == float2(4,2))); 248cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::naive_if_then_else(int2(0,~0).xyxy(), 249cb93a386Sopenharmony_ci float4(0,1,2,3).zwxy(), 250cb93a386Sopenharmony_ci float4(4,5,6,7)) == float4(4,3,6,1))); 251cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::if_then_else(int2(0,~0).xyxy(), 252cb93a386Sopenharmony_ci float4(0,1,2,3).zwxy(), 253cb93a386Sopenharmony_ci float4(4,5,6,7)) == float4(4,3,6,1))); 254cb93a386Sopenharmony_ci 255cb93a386Sopenharmony_ci REPORTER_ASSERT(r, all(skvx::pin(float4(0,1,2,3).yxwz(), 256cb93a386Sopenharmony_ci float2(1).xyxy(), 257cb93a386Sopenharmony_ci float2(2).xyxy()) == float4(1,1,2,2))); 258cb93a386Sopenharmony_ci} 259cb93a386Sopenharmony_ci 260cb93a386Sopenharmony_cistatic bool check_approx_acos(skiatest::Reporter* r, float x, float approx_acos_x) { 261cb93a386Sopenharmony_ci float acosf_x = acosf(x); 262cb93a386Sopenharmony_ci float error = acosf_x - approx_acos_x; 263cb93a386Sopenharmony_ci if (!(fabsf(error) <= SKVX_APPROX_ACOS_MAX_ERROR)) { 264cb93a386Sopenharmony_ci ERRORF(r, "Larger-than-expected error from skvx::approx_acos\n" 265cb93a386Sopenharmony_ci " x= %f\n" 266cb93a386Sopenharmony_ci " approx_acos_x= %f (%f degrees\n" 267cb93a386Sopenharmony_ci " acosf_x= %f (%f degrees\n" 268cb93a386Sopenharmony_ci " error= %f (%f degrees)\n" 269cb93a386Sopenharmony_ci " tolerance= %f (%f degrees)\n\n", 270cb93a386Sopenharmony_ci x, approx_acos_x, SkRadiansToDegrees(approx_acos_x), acosf_x, 271cb93a386Sopenharmony_ci SkRadiansToDegrees(acosf_x), error, SkRadiansToDegrees(error), 272cb93a386Sopenharmony_ci SKVX_APPROX_ACOS_MAX_ERROR, SkRadiansToDegrees(SKVX_APPROX_ACOS_MAX_ERROR)); 273cb93a386Sopenharmony_ci return false; 274cb93a386Sopenharmony_ci } 275cb93a386Sopenharmony_ci return true; 276cb93a386Sopenharmony_ci} 277cb93a386Sopenharmony_ci 278cb93a386Sopenharmony_ciDEF_TEST(SkVx_approx_acos, r) { 279cb93a386Sopenharmony_ci float4 boundaries = skvx::approx_acos(float4{-1, 0, 1, 0}); 280cb93a386Sopenharmony_ci check_approx_acos(r, -1, boundaries[0]); 281cb93a386Sopenharmony_ci check_approx_acos(r, 0, boundaries[1]); 282cb93a386Sopenharmony_ci check_approx_acos(r, +1, boundaries[2]); 283cb93a386Sopenharmony_ci 284cb93a386Sopenharmony_ci // Select a distribution of starting points around which to begin testing approx_acos. These 285cb93a386Sopenharmony_ci // fall roughly around the known minimum and maximum errors. No need to include -1, 0, or 1 286cb93a386Sopenharmony_ci // since those were just tested above. (Those are tricky because 0 is an inflection and the 287cb93a386Sopenharmony_ci // derivative is infinite at 1 and -1.) 288cb93a386Sopenharmony_ci float8 x = {-.99f, -.8f, -.4f, -.2f, .2f, .4f, .8f, .99f}; 289cb93a386Sopenharmony_ci 290cb93a386Sopenharmony_ci // Converge at the various local minima and maxima of "approx_acos(x) - cosf(x)" and verify that 291cb93a386Sopenharmony_ci // approx_acos is always within "kTolerance" degrees of the expected answer. 292cb93a386Sopenharmony_ci float8 err_; 293cb93a386Sopenharmony_ci for (int iter = 0; iter < 10; ++iter) { 294cb93a386Sopenharmony_ci // Run our approximate inverse cosine approximation. 295cb93a386Sopenharmony_ci auto approx_acos_x = skvx::approx_acos(x); 296cb93a386Sopenharmony_ci 297cb93a386Sopenharmony_ci // Find d/dx(error) 298cb93a386Sopenharmony_ci // = d/dx(approx_acos(x) - acos(x)) 299cb93a386Sopenharmony_ci // = (f'g - fg')/gg + 1/sqrt(1 - x^2), [where f = bx^3 + ax, g = dx^4 + cx^2 + 1] 300cb93a386Sopenharmony_ci float8 xx = x*x; 301cb93a386Sopenharmony_ci float8 a = -0.939115566365855f; 302cb93a386Sopenharmony_ci float8 b = 0.9217841528914573f; 303cb93a386Sopenharmony_ci float8 c = -1.2845906244690837f; 304cb93a386Sopenharmony_ci float8 d = 0.295624144969963174f; 305cb93a386Sopenharmony_ci float8 f = (b*xx + a)*x; 306cb93a386Sopenharmony_ci float8 f_ = 3*b*xx + a; 307cb93a386Sopenharmony_ci float8 g = (d*xx + c)*xx + 1; 308cb93a386Sopenharmony_ci float8 g_ = (4*d*xx + 2*c)*x; 309cb93a386Sopenharmony_ci float8 gg = g*g; 310cb93a386Sopenharmony_ci float8 q = skvx::sqrt(1 - xx); 311cb93a386Sopenharmony_ci err_ = (f_*g - f*g_)/gg + 1/q; 312cb93a386Sopenharmony_ci 313cb93a386Sopenharmony_ci // Find d^2/dx^2(error) 314cb93a386Sopenharmony_ci // = ((f''g - fg'')g^2 - (f'g - fg')2gg') / g^4 + x(1 - x^2)^(-3/2) 315cb93a386Sopenharmony_ci // = ((f''g - fg'')g - (f'g - fg')2g') / g^3 + x(1 - x^2)^(-3/2) 316cb93a386Sopenharmony_ci float8 f__ = 6*b*x; 317cb93a386Sopenharmony_ci float8 g__ = 12*d*xx + 2*c; 318cb93a386Sopenharmony_ci float8 err__ = ((f__*g - f*g__)*g - (f_*g - f*g_)*2*g_) / (gg*g) + x/((1 - xx)*q); 319cb93a386Sopenharmony_ci 320cb93a386Sopenharmony_ci#if 0 321cb93a386Sopenharmony_ci SkDebugf("\n\niter %i\n", iter); 322cb93a386Sopenharmony_ci#endif 323cb93a386Sopenharmony_ci // Ensure each lane's approximation is within maximum error. 324cb93a386Sopenharmony_ci for (int j = 0; j < 8; ++j) { 325cb93a386Sopenharmony_ci#if 0 326cb93a386Sopenharmony_ci SkDebugf("x=%f err=%f err'=%f err''=%f\n", 327cb93a386Sopenharmony_ci x[j], SkRadiansToDegrees(skvx::approx_acos_x[j] - acosf(x[j])), 328cb93a386Sopenharmony_ci SkRadiansToDegrees(err_[j]), SkRadiansToDegrees(err__[j])); 329cb93a386Sopenharmony_ci#endif 330cb93a386Sopenharmony_ci if (!check_approx_acos(r, x[j], approx_acos_x[j])) { 331cb93a386Sopenharmony_ci return; 332cb93a386Sopenharmony_ci } 333cb93a386Sopenharmony_ci } 334cb93a386Sopenharmony_ci 335cb93a386Sopenharmony_ci // Use Newton's method to update the x values to locations closer to their local minimum or 336cb93a386Sopenharmony_ci // maximum. (This is where d/dx(error) == 0.) 337cb93a386Sopenharmony_ci x -= err_/err__; 338cb93a386Sopenharmony_ci x = skvx::pin<8,float>(x, -.99f, .99f); 339cb93a386Sopenharmony_ci } 340cb93a386Sopenharmony_ci 341cb93a386Sopenharmony_ci // Ensure each lane converged to a local minimum or maximum. 342cb93a386Sopenharmony_ci for (int j = 0; j < 8; ++j) { 343cb93a386Sopenharmony_ci REPORTER_ASSERT(r, SkScalarNearlyZero(err_[j])); 344cb93a386Sopenharmony_ci } 345cb93a386Sopenharmony_ci 346cb93a386Sopenharmony_ci // Make sure we found all the actual known locations of local min/max error. 347cb93a386Sopenharmony_ci for (float knownRoot : {-0.983536f, -0.867381f, -0.410923f, 0.410923f, 0.867381f, 0.983536f}) { 348cb93a386Sopenharmony_ci REPORTER_ASSERT(r, skvx::any(skvx::abs(x - knownRoot) < SK_ScalarNearlyZero)); 349cb93a386Sopenharmony_ci } 350cb93a386Sopenharmony_ci} 351cb93a386Sopenharmony_ci 352cb93a386Sopenharmony_citemplate<int N, typename T> void check_strided_loads(skiatest::Reporter* r) { 353cb93a386Sopenharmony_ci using Vec = skvx::Vec<N,T>; 354cb93a386Sopenharmony_ci T values[N*4]; 355cb93a386Sopenharmony_ci std::iota(values, values + N*4, 0); 356cb93a386Sopenharmony_ci Vec a, b, c, d; 357cb93a386Sopenharmony_ci skvx::strided_load2(values, a, b); 358cb93a386Sopenharmony_ci for (int i = 0; i < N; ++i) { 359cb93a386Sopenharmony_ci REPORTER_ASSERT(r, a[i] == values[i*2]); 360cb93a386Sopenharmony_ci REPORTER_ASSERT(r, b[i] == values[i*2 + 1]); 361cb93a386Sopenharmony_ci } 362cb93a386Sopenharmony_ci skvx::strided_load4(values, a, b, c, d); 363cb93a386Sopenharmony_ci for (int i = 0; i < N; ++i) { 364cb93a386Sopenharmony_ci REPORTER_ASSERT(r, a[i] == values[i*4]); 365cb93a386Sopenharmony_ci REPORTER_ASSERT(r, b[i] == values[i*4 + 1]); 366cb93a386Sopenharmony_ci REPORTER_ASSERT(r, c[i] == values[i*4 + 2]); 367cb93a386Sopenharmony_ci REPORTER_ASSERT(r, d[i] == values[i*4 + 3]); 368cb93a386Sopenharmony_ci } 369cb93a386Sopenharmony_ci} 370cb93a386Sopenharmony_ci 371cb93a386Sopenharmony_citemplate<typename T> void check_strided_loads(skiatest::Reporter* r) { 372cb93a386Sopenharmony_ci check_strided_loads<1,T>(r); 373cb93a386Sopenharmony_ci check_strided_loads<2,T>(r); 374cb93a386Sopenharmony_ci check_strided_loads<4,T>(r); 375cb93a386Sopenharmony_ci check_strided_loads<8,T>(r); 376cb93a386Sopenharmony_ci check_strided_loads<16,T>(r); 377cb93a386Sopenharmony_ci check_strided_loads<32,T>(r); 378cb93a386Sopenharmony_ci} 379cb93a386Sopenharmony_ci 380cb93a386Sopenharmony_ciDEF_TEST(SkVx_strided_loads, r) { 381cb93a386Sopenharmony_ci check_strided_loads<uint32_t>(r); 382cb93a386Sopenharmony_ci check_strided_loads<uint16_t>(r); 383cb93a386Sopenharmony_ci check_strided_loads<uint8_t>(r); 384cb93a386Sopenharmony_ci check_strided_loads<int32_t>(r); 385cb93a386Sopenharmony_ci check_strided_loads<int16_t>(r); 386cb93a386Sopenharmony_ci check_strided_loads<int8_t>(r); 387cb93a386Sopenharmony_ci check_strided_loads<float>(r); 388cb93a386Sopenharmony_ci} 389cb93a386Sopenharmony_ci 390cb93a386Sopenharmony_ciDEF_TEST(SkVM_ScaledDividerU32, r) { 391cb93a386Sopenharmony_ci static constexpr uint32_t kMax = std::numeric_limits<uint32_t>::max(); 392cb93a386Sopenharmony_ci 393cb93a386Sopenharmony_ci auto errorBounds = [&](uint32_t actual, uint32_t expected) { 394cb93a386Sopenharmony_ci uint32_t lowerLimit = expected == 0 ? 0 : expected - 1, 395cb93a386Sopenharmony_ci upperLimit = expected == kMax ? kMax : expected + 1; 396cb93a386Sopenharmony_ci return lowerLimit <= actual && actual <= upperLimit; 397cb93a386Sopenharmony_ci }; 398cb93a386Sopenharmony_ci 399cb93a386Sopenharmony_ci auto test = [&](uint32_t denom) { 400cb93a386Sopenharmony_ci // half == 1 so, the max to check is kMax-1 401cb93a386Sopenharmony_ci skvx::ScaledDividerU32 d(denom); 402cb93a386Sopenharmony_ci uint32_t maxCheck = static_cast<uint32_t>(floor((double)(kMax - d.half()) / denom + 0.5)); 403cb93a386Sopenharmony_ci REPORTER_ASSERT(r, errorBounds(d.divide((kMax))[0], maxCheck)); 404cb93a386Sopenharmony_ci for (uint32_t i = 0; i < kMax - d.half(); i += 65535) { 405cb93a386Sopenharmony_ci uint32_t expected = static_cast<uint32_t>(floor((double)i / denom + 0.5)); 406cb93a386Sopenharmony_ci auto actual = d.divide(i + d.half()); 407cb93a386Sopenharmony_ci if (!errorBounds(actual[0], expected)) { 408cb93a386Sopenharmony_ci SkDebugf("i: %u expected: %u actual: %u\n", i, expected, actual[0]); 409cb93a386Sopenharmony_ci } 410cb93a386Sopenharmony_ci // Make sure all the lanes are the same. 411cb93a386Sopenharmony_ci for (int e = 1; e < 4; e++) { 412cb93a386Sopenharmony_ci SkASSERT(actual[0] == actual[e]); 413cb93a386Sopenharmony_ci } 414cb93a386Sopenharmony_ci } 415cb93a386Sopenharmony_ci }; 416cb93a386Sopenharmony_ci 417cb93a386Sopenharmony_ci test(2); 418cb93a386Sopenharmony_ci test(3); 419cb93a386Sopenharmony_ci test(5); 420cb93a386Sopenharmony_ci test(7); 421cb93a386Sopenharmony_ci test(27); 422cb93a386Sopenharmony_ci test(65'535); 423cb93a386Sopenharmony_ci test(15'485'863); 424cb93a386Sopenharmony_ci test(512'927'377); 425cb93a386Sopenharmony_ci} 426