1bbbf1280Sopenharmony_ci/*
2bbbf1280Sopenharmony_ci * Single-precision vector sin function.
3bbbf1280Sopenharmony_ci *
4bbbf1280Sopenharmony_ci * Copyright (c) 2019, Arm Limited.
5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT
6bbbf1280Sopenharmony_ci */
7bbbf1280Sopenharmony_ci
8bbbf1280Sopenharmony_ci#include "mathlib.h"
9bbbf1280Sopenharmony_ci#include "v_math.h"
10bbbf1280Sopenharmony_ci#if V_SUPPORTED
11bbbf1280Sopenharmony_ci
12bbbf1280Sopenharmony_cistatic const float Poly[] = {
13bbbf1280Sopenharmony_ci  /* 1.886 ulp error */
14bbbf1280Sopenharmony_ci  0x1.5b2e76p-19f,
15bbbf1280Sopenharmony_ci  -0x1.9f42eap-13f,
16bbbf1280Sopenharmony_ci  0x1.110df4p-7f,
17bbbf1280Sopenharmony_ci  -0x1.555548p-3f,
18bbbf1280Sopenharmony_ci};
19bbbf1280Sopenharmony_ci#define Pi1 v_f32 (0x1.921fb6p+1f)
20bbbf1280Sopenharmony_ci#define Pi2 v_f32 (-0x1.777a5cp-24f)
21bbbf1280Sopenharmony_ci#define Pi3 v_f32 (-0x1.ee59dap-49f)
22bbbf1280Sopenharmony_ci#define A3 v_f32 (Poly[3])
23bbbf1280Sopenharmony_ci#define A5 v_f32 (Poly[2])
24bbbf1280Sopenharmony_ci#define A7 v_f32 (Poly[1])
25bbbf1280Sopenharmony_ci#define A9 v_f32 (Poly[0])
26bbbf1280Sopenharmony_ci#define RangeVal v_f32 (0x1p20f)
27bbbf1280Sopenharmony_ci#define InvPi v_f32 (0x1.45f306p-2f)
28bbbf1280Sopenharmony_ci#define Shift v_f32 (0x1.8p+23f)
29bbbf1280Sopenharmony_ci#define AbsMask v_u32 (0x7fffffff)
30bbbf1280Sopenharmony_ci
31bbbf1280Sopenharmony_ciVPCS_ATTR
32bbbf1280Sopenharmony_cistatic v_f32_t
33bbbf1280Sopenharmony_cispecialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
34bbbf1280Sopenharmony_ci{
35bbbf1280Sopenharmony_ci  /* Fall back to scalar code.  */
36bbbf1280Sopenharmony_ci  return v_call_f32 (sinf, x, y, cmp);
37bbbf1280Sopenharmony_ci}
38bbbf1280Sopenharmony_ci
39bbbf1280Sopenharmony_ciVPCS_ATTR
40bbbf1280Sopenharmony_civ_f32_t
41bbbf1280Sopenharmony_ciV_NAME(sinf) (v_f32_t x)
42bbbf1280Sopenharmony_ci{
43bbbf1280Sopenharmony_ci  v_f32_t n, r, r2, y;
44bbbf1280Sopenharmony_ci  v_u32_t sign, odd, cmp;
45bbbf1280Sopenharmony_ci
46bbbf1280Sopenharmony_ci  r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
47bbbf1280Sopenharmony_ci  sign = v_as_u32_f32 (x) & ~AbsMask;
48bbbf1280Sopenharmony_ci  cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
49bbbf1280Sopenharmony_ci
50bbbf1280Sopenharmony_ci  /* n = rint(|x|/pi) */
51bbbf1280Sopenharmony_ci  n = v_fma_f32 (InvPi, r, Shift);
52bbbf1280Sopenharmony_ci  odd = v_as_u32_f32 (n) << 31;
53bbbf1280Sopenharmony_ci  n -= Shift;
54bbbf1280Sopenharmony_ci
55bbbf1280Sopenharmony_ci  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
56bbbf1280Sopenharmony_ci  r = v_fma_f32 (-Pi1, n, r);
57bbbf1280Sopenharmony_ci  r = v_fma_f32 (-Pi2, n, r);
58bbbf1280Sopenharmony_ci  r = v_fma_f32 (-Pi3, n, r);
59bbbf1280Sopenharmony_ci
60bbbf1280Sopenharmony_ci  /* y = sin(r) */
61bbbf1280Sopenharmony_ci  r2 = r * r;
62bbbf1280Sopenharmony_ci  y = v_fma_f32 (A9, r2, A7);
63bbbf1280Sopenharmony_ci  y = v_fma_f32 (y, r2, A5);
64bbbf1280Sopenharmony_ci  y = v_fma_f32 (y, r2, A3);
65bbbf1280Sopenharmony_ci  y = v_fma_f32 (y * r2, r, r);
66bbbf1280Sopenharmony_ci
67bbbf1280Sopenharmony_ci  /* sign fix */
68bbbf1280Sopenharmony_ci  y = v_as_f32_u32 (v_as_u32_f32 (y) ^ sign ^ odd);
69bbbf1280Sopenharmony_ci
70bbbf1280Sopenharmony_ci  if (unlikely (v_any_u32 (cmp)))
71bbbf1280Sopenharmony_ci    return specialcase (x, y, cmp);
72bbbf1280Sopenharmony_ci  return y;
73bbbf1280Sopenharmony_ci}
74bbbf1280Sopenharmony_ciVPCS_ALIAS
75bbbf1280Sopenharmony_ci#endif
76