1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * Single-precision vector sin function. 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2019, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci#include "mathlib.h" 9bbbf1280Sopenharmony_ci#include "v_math.h" 10bbbf1280Sopenharmony_ci#if V_SUPPORTED 11bbbf1280Sopenharmony_ci 12bbbf1280Sopenharmony_cistatic const float Poly[] = { 13bbbf1280Sopenharmony_ci /* 1.886 ulp error */ 14bbbf1280Sopenharmony_ci 0x1.5b2e76p-19f, 15bbbf1280Sopenharmony_ci -0x1.9f42eap-13f, 16bbbf1280Sopenharmony_ci 0x1.110df4p-7f, 17bbbf1280Sopenharmony_ci -0x1.555548p-3f, 18bbbf1280Sopenharmony_ci}; 19bbbf1280Sopenharmony_ci#define Pi1 v_f32 (0x1.921fb6p+1f) 20bbbf1280Sopenharmony_ci#define Pi2 v_f32 (-0x1.777a5cp-24f) 21bbbf1280Sopenharmony_ci#define Pi3 v_f32 (-0x1.ee59dap-49f) 22bbbf1280Sopenharmony_ci#define A3 v_f32 (Poly[3]) 23bbbf1280Sopenharmony_ci#define A5 v_f32 (Poly[2]) 24bbbf1280Sopenharmony_ci#define A7 v_f32 (Poly[1]) 25bbbf1280Sopenharmony_ci#define A9 v_f32 (Poly[0]) 26bbbf1280Sopenharmony_ci#define RangeVal v_f32 (0x1p20f) 27bbbf1280Sopenharmony_ci#define InvPi v_f32 (0x1.45f306p-2f) 28bbbf1280Sopenharmony_ci#define Shift v_f32 (0x1.8p+23f) 29bbbf1280Sopenharmony_ci#define AbsMask v_u32 (0x7fffffff) 30bbbf1280Sopenharmony_ci 31bbbf1280Sopenharmony_ciVPCS_ATTR 32bbbf1280Sopenharmony_cistatic v_f32_t 33bbbf1280Sopenharmony_cispecialcase (v_f32_t x, v_f32_t y, v_u32_t cmp) 34bbbf1280Sopenharmony_ci{ 35bbbf1280Sopenharmony_ci /* Fall back to scalar code. */ 36bbbf1280Sopenharmony_ci return v_call_f32 (sinf, x, y, cmp); 37bbbf1280Sopenharmony_ci} 38bbbf1280Sopenharmony_ci 39bbbf1280Sopenharmony_ciVPCS_ATTR 40bbbf1280Sopenharmony_civ_f32_t 41bbbf1280Sopenharmony_ciV_NAME(sinf) (v_f32_t x) 42bbbf1280Sopenharmony_ci{ 43bbbf1280Sopenharmony_ci v_f32_t n, r, r2, y; 44bbbf1280Sopenharmony_ci v_u32_t sign, odd, cmp; 45bbbf1280Sopenharmony_ci 46bbbf1280Sopenharmony_ci r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask); 47bbbf1280Sopenharmony_ci sign = v_as_u32_f32 (x) & ~AbsMask; 48bbbf1280Sopenharmony_ci cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal)); 49bbbf1280Sopenharmony_ci 50bbbf1280Sopenharmony_ci /* n = rint(|x|/pi) */ 51bbbf1280Sopenharmony_ci n = v_fma_f32 (InvPi, r, Shift); 52bbbf1280Sopenharmony_ci odd = v_as_u32_f32 (n) << 31; 53bbbf1280Sopenharmony_ci n -= Shift; 54bbbf1280Sopenharmony_ci 55bbbf1280Sopenharmony_ci /* r = |x| - n*pi (range reduction into -pi/2 .. pi/2) */ 56bbbf1280Sopenharmony_ci r = v_fma_f32 (-Pi1, n, r); 57bbbf1280Sopenharmony_ci r = v_fma_f32 (-Pi2, n, r); 58bbbf1280Sopenharmony_ci r = v_fma_f32 (-Pi3, n, r); 59bbbf1280Sopenharmony_ci 60bbbf1280Sopenharmony_ci /* y = sin(r) */ 61bbbf1280Sopenharmony_ci r2 = r * r; 62bbbf1280Sopenharmony_ci y = v_fma_f32 (A9, r2, A7); 63bbbf1280Sopenharmony_ci y = v_fma_f32 (y, r2, A5); 64bbbf1280Sopenharmony_ci y = v_fma_f32 (y, r2, A3); 65bbbf1280Sopenharmony_ci y = v_fma_f32 (y * r2, r, r); 66bbbf1280Sopenharmony_ci 67bbbf1280Sopenharmony_ci /* sign fix */ 68bbbf1280Sopenharmony_ci y = v_as_f32_u32 (v_as_u32_f32 (y) ^ sign ^ odd); 69bbbf1280Sopenharmony_ci 70bbbf1280Sopenharmony_ci if (unlikely (v_any_u32 (cmp))) 71bbbf1280Sopenharmony_ci return specialcase (x, y, cmp); 72bbbf1280Sopenharmony_ci return y; 73bbbf1280Sopenharmony_ci} 74bbbf1280Sopenharmony_ciVPCS_ALIAS 75bbbf1280Sopenharmony_ci#endif 76