1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * Double-precision vector sin function. 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2019, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci#include "mathlib.h" 9bbbf1280Sopenharmony_ci#include "v_math.h" 10bbbf1280Sopenharmony_ci#if V_SUPPORTED 11bbbf1280Sopenharmony_ci 12bbbf1280Sopenharmony_cistatic const double Poly[] = { 13bbbf1280Sopenharmony_ci/* worst-case error is 3.5 ulp. 14bbbf1280Sopenharmony_ci abs error: 0x1.be222a58p-53 in [-pi/2, pi/2]. */ 15bbbf1280Sopenharmony_ci-0x1.9f4a9c8b21dc9p-41, 16bbbf1280Sopenharmony_ci 0x1.60e88a10163f2p-33, 17bbbf1280Sopenharmony_ci-0x1.ae6361b7254e7p-26, 18bbbf1280Sopenharmony_ci 0x1.71de382e8d62bp-19, 19bbbf1280Sopenharmony_ci-0x1.a01a019aeb4ffp-13, 20bbbf1280Sopenharmony_ci 0x1.111111110b25ep-7, 21bbbf1280Sopenharmony_ci-0x1.55555555554c3p-3, 22bbbf1280Sopenharmony_ci}; 23bbbf1280Sopenharmony_ci 24bbbf1280Sopenharmony_ci#define C7 v_f64 (Poly[0]) 25bbbf1280Sopenharmony_ci#define C6 v_f64 (Poly[1]) 26bbbf1280Sopenharmony_ci#define C5 v_f64 (Poly[2]) 27bbbf1280Sopenharmony_ci#define C4 v_f64 (Poly[3]) 28bbbf1280Sopenharmony_ci#define C3 v_f64 (Poly[4]) 29bbbf1280Sopenharmony_ci#define C2 v_f64 (Poly[5]) 30bbbf1280Sopenharmony_ci#define C1 v_f64 (Poly[6]) 31bbbf1280Sopenharmony_ci 32bbbf1280Sopenharmony_ci#define InvPi v_f64 (0x1.45f306dc9c883p-2) 33bbbf1280Sopenharmony_ci#define Pi1 v_f64 (0x1.921fb54442d18p+1) 34bbbf1280Sopenharmony_ci#define Pi2 v_f64 (0x1.1a62633145c06p-53) 35bbbf1280Sopenharmony_ci#define Pi3 v_f64 (0x1.c1cd129024e09p-106) 36bbbf1280Sopenharmony_ci#define Shift v_f64 (0x1.8p52) 37bbbf1280Sopenharmony_ci#define RangeVal v_f64 (0x1p23) 38bbbf1280Sopenharmony_ci#define AbsMask v_u64 (0x7fffffffffffffff) 39bbbf1280Sopenharmony_ci 40bbbf1280Sopenharmony_ciVPCS_ATTR 41bbbf1280Sopenharmony_ci__attribute__ ((noinline)) static v_f64_t 42bbbf1280Sopenharmony_cispecialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) 43bbbf1280Sopenharmony_ci{ 44bbbf1280Sopenharmony_ci return v_call_f64 (sin, x, y, cmp); 45bbbf1280Sopenharmony_ci} 46bbbf1280Sopenharmony_ci 47bbbf1280Sopenharmony_ciVPCS_ATTR 48bbbf1280Sopenharmony_civ_f64_t 49bbbf1280Sopenharmony_ciV_NAME(sin) (v_f64_t x) 50bbbf1280Sopenharmony_ci{ 51bbbf1280Sopenharmony_ci v_f64_t n, r, r2, y; 52bbbf1280Sopenharmony_ci v_u64_t sign, odd, cmp; 53bbbf1280Sopenharmony_ci 54bbbf1280Sopenharmony_ci r = v_as_f64_u64 (v_as_u64_f64 (x) & AbsMask); 55bbbf1280Sopenharmony_ci sign = v_as_u64_f64 (x) & ~AbsMask; 56bbbf1280Sopenharmony_ci cmp = v_cond_u64 (v_as_u64_f64 (r) >= v_as_u64_f64 (RangeVal)); 57bbbf1280Sopenharmony_ci 58bbbf1280Sopenharmony_ci /* n = rint(|x|/pi). */ 59bbbf1280Sopenharmony_ci n = v_fma_f64 (InvPi, r, Shift); 60bbbf1280Sopenharmony_ci odd = v_as_u64_f64 (n) << 63; 61bbbf1280Sopenharmony_ci n -= Shift; 62bbbf1280Sopenharmony_ci 63bbbf1280Sopenharmony_ci /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2). */ 64bbbf1280Sopenharmony_ci r = v_fma_f64 (-Pi1, n, r); 65bbbf1280Sopenharmony_ci r = v_fma_f64 (-Pi2, n, r); 66bbbf1280Sopenharmony_ci r = v_fma_f64 (-Pi3, n, r); 67bbbf1280Sopenharmony_ci 68bbbf1280Sopenharmony_ci /* sin(r) poly approx. */ 69bbbf1280Sopenharmony_ci r2 = r * r; 70bbbf1280Sopenharmony_ci y = v_fma_f64 (C7, r2, C6); 71bbbf1280Sopenharmony_ci y = v_fma_f64 (y, r2, C5); 72bbbf1280Sopenharmony_ci y = v_fma_f64 (y, r2, C4); 73bbbf1280Sopenharmony_ci y = v_fma_f64 (y, r2, C3); 74bbbf1280Sopenharmony_ci y = v_fma_f64 (y, r2, C2); 75bbbf1280Sopenharmony_ci y = v_fma_f64 (y, r2, C1); 76bbbf1280Sopenharmony_ci y = v_fma_f64 (y * r2, r, r); 77bbbf1280Sopenharmony_ci 78bbbf1280Sopenharmony_ci /* sign. */ 79bbbf1280Sopenharmony_ci y = v_as_f64_u64 (v_as_u64_f64 (y) ^ sign ^ odd); 80bbbf1280Sopenharmony_ci 81bbbf1280Sopenharmony_ci if (unlikely (v_any_u64 (cmp))) 82bbbf1280Sopenharmony_ci return specialcase (x, y, cmp); 83bbbf1280Sopenharmony_ci return y; 84bbbf1280Sopenharmony_ci} 85bbbf1280Sopenharmony_ciVPCS_ALIAS 86bbbf1280Sopenharmony_ci#endif 87