1/*
2 * Single-precision vector cos function.
3 *
4 * Copyright (c) 2019, Arm Limited.
5 * SPDX-License-Identifier: MIT
6 */
7
8#include "mathlib.h"
9#include "v_math.h"
10#if V_SUPPORTED
11
12static const float Poly[] = {
13  /* 1.886 ulp error */
14  0x1.5b2e76p-19f,
15  -0x1.9f42eap-13f,
16  0x1.110df4p-7f,
17  -0x1.555548p-3f,
18};
19#define Pi1 v_f32 (0x1.921fb6p+1f)
20#define Pi2 v_f32 (-0x1.777a5cp-24f)
21#define Pi3 v_f32 (-0x1.ee59dap-49f)
22#define A3 v_f32 (Poly[3])
23#define A5 v_f32 (Poly[2])
24#define A7 v_f32 (Poly[1])
25#define A9 v_f32 (Poly[0])
26#define RangeVal v_f32 (0x1p20f)
27#define InvPi v_f32 (0x1.45f306p-2f)
28#define Shift v_f32 (0x1.8p+23f)
29#define AbsMask v_u32 (0x7fffffff)
30#define HalfPi v_f32 (0x1.921fb6p0f)
31
32VPCS_ATTR
33static v_f32_t
34specialcase (v_f32_t x, v_f32_t y, v_u32_t cmp)
35{
36  /* Fall back to scalar code.  */
37  return v_call_f32 (cosf, x, y, cmp);
38}
39
40VPCS_ATTR
41v_f32_t
42V_NAME(cosf) (v_f32_t x)
43{
44  v_f32_t n, r, r2, y;
45  v_u32_t odd, cmp;
46
47  r = v_as_f32_u32 (v_as_u32_f32 (x) & AbsMask);
48  cmp = v_cond_u32 (v_as_u32_f32 (r) >= v_as_u32_f32 (RangeVal));
49
50  /* n = rint((|x|+pi/2)/pi) - 0.5 */
51  n = v_fma_f32 (InvPi, r + HalfPi, Shift);
52  odd = v_as_u32_f32 (n) << 31;
53  n -= Shift;
54  n -= v_f32 (0.5f);
55
56  /* r = |x| - n*pi  (range reduction into -pi/2 .. pi/2) */
57  r = v_fma_f32 (-Pi1, n, r);
58  r = v_fma_f32 (-Pi2, n, r);
59  r = v_fma_f32 (-Pi3, n, r);
60
61  /* y = sin(r) */
62  r2 = r * r;
63  y = v_fma_f32 (A9, r2, A7);
64  y = v_fma_f32 (y, r2, A5);
65  y = v_fma_f32 (y, r2, A3);
66  y = v_fma_f32 (y * r2, r, r);
67
68  /* sign fix */
69  y = v_as_f32_u32 (v_as_u32_f32 (y) ^ odd);
70
71  if (unlikely (v_any_u32 (cmp)))
72    return specialcase (x, y, cmp);
73  return y;
74}
75VPCS_ALIAS
76#endif
77