1bbbf1280Sopenharmony_ci/*
2bbbf1280Sopenharmony_ci * Double-precision vector log(x) function.
3bbbf1280Sopenharmony_ci *
4bbbf1280Sopenharmony_ci * Copyright (c) 2019, Arm Limited.
5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT
6bbbf1280Sopenharmony_ci */
7bbbf1280Sopenharmony_ci
8bbbf1280Sopenharmony_ci#include "mathlib.h"
9bbbf1280Sopenharmony_ci#include "v_math.h"
10bbbf1280Sopenharmony_ci#include "v_log.h"
11bbbf1280Sopenharmony_ci#if V_SUPPORTED
12bbbf1280Sopenharmony_ci
13bbbf1280Sopenharmony_ci/* Worst-case error: 1.17 + 0.5 ulp.  */
14bbbf1280Sopenharmony_ci
15bbbf1280Sopenharmony_cistatic const f64_t Poly[] = {
16bbbf1280Sopenharmony_ci  /* rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ].  */
17bbbf1280Sopenharmony_ci  -0x1.ffffffffffff7p-2,
18bbbf1280Sopenharmony_ci   0x1.55555555170d4p-2,
19bbbf1280Sopenharmony_ci  -0x1.0000000399c27p-2,
20bbbf1280Sopenharmony_ci   0x1.999b2e90e94cap-3,
21bbbf1280Sopenharmony_ci  -0x1.554e550bd501ep-3,
22bbbf1280Sopenharmony_ci};
23bbbf1280Sopenharmony_ci
24bbbf1280Sopenharmony_ci#define A0 v_f64 (Poly[0])
25bbbf1280Sopenharmony_ci#define A1 v_f64 (Poly[1])
26bbbf1280Sopenharmony_ci#define A2 v_f64 (Poly[2])
27bbbf1280Sopenharmony_ci#define A3 v_f64 (Poly[3])
28bbbf1280Sopenharmony_ci#define A4 v_f64 (Poly[4])
29bbbf1280Sopenharmony_ci#define Ln2 v_f64 (0x1.62e42fefa39efp-1)
30bbbf1280Sopenharmony_ci#define N (1 << V_LOG_TABLE_BITS)
31bbbf1280Sopenharmony_ci#define OFF v_u64 (0x3fe6900900000000)
32bbbf1280Sopenharmony_ci
33bbbf1280Sopenharmony_cistruct entry
34bbbf1280Sopenharmony_ci{
35bbbf1280Sopenharmony_ci  v_f64_t invc;
36bbbf1280Sopenharmony_ci  v_f64_t logc;
37bbbf1280Sopenharmony_ci};
38bbbf1280Sopenharmony_ci
39bbbf1280Sopenharmony_cistatic inline struct entry
40bbbf1280Sopenharmony_cilookup (v_u64_t i)
41bbbf1280Sopenharmony_ci{
42bbbf1280Sopenharmony_ci  struct entry e;
43bbbf1280Sopenharmony_ci#ifdef SCALAR
44bbbf1280Sopenharmony_ci  e.invc = __v_log_data[i].invc;
45bbbf1280Sopenharmony_ci  e.logc = __v_log_data[i].logc;
46bbbf1280Sopenharmony_ci#else
47bbbf1280Sopenharmony_ci  e.invc[0] = __v_log_data[i[0]].invc;
48bbbf1280Sopenharmony_ci  e.logc[0] = __v_log_data[i[0]].logc;
49bbbf1280Sopenharmony_ci  e.invc[1] = __v_log_data[i[1]].invc;
50bbbf1280Sopenharmony_ci  e.logc[1] = __v_log_data[i[1]].logc;
51bbbf1280Sopenharmony_ci#endif
52bbbf1280Sopenharmony_ci  return e;
53bbbf1280Sopenharmony_ci}
54bbbf1280Sopenharmony_ci
55bbbf1280Sopenharmony_ciVPCS_ATTR
56bbbf1280Sopenharmony_ci__attribute__ ((noinline)) static v_f64_t
57bbbf1280Sopenharmony_cispecialcase (v_f64_t x, v_f64_t y, v_u64_t cmp)
58bbbf1280Sopenharmony_ci{
59bbbf1280Sopenharmony_ci  return v_call_f64 (log, x, y, cmp);
60bbbf1280Sopenharmony_ci}
61bbbf1280Sopenharmony_ci
62bbbf1280Sopenharmony_ciVPCS_ATTR
63bbbf1280Sopenharmony_civ_f64_t
64bbbf1280Sopenharmony_ciV_NAME(log) (v_f64_t x)
65bbbf1280Sopenharmony_ci{
66bbbf1280Sopenharmony_ci  v_f64_t z, r, r2, p, y, kd, hi;
67bbbf1280Sopenharmony_ci  v_u64_t ix, iz, tmp, top, i, cmp;
68bbbf1280Sopenharmony_ci  v_s64_t k;
69bbbf1280Sopenharmony_ci  struct entry e;
70bbbf1280Sopenharmony_ci
71bbbf1280Sopenharmony_ci  ix = v_as_u64_f64 (x);
72bbbf1280Sopenharmony_ci  top = ix >> 48;
73bbbf1280Sopenharmony_ci  cmp = v_cond_u64 (top - v_u64 (0x0010) >= v_u64 (0x7ff0 - 0x0010));
74bbbf1280Sopenharmony_ci
75bbbf1280Sopenharmony_ci  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
76bbbf1280Sopenharmony_ci     The range is split into N subintervals.
77bbbf1280Sopenharmony_ci     The ith subinterval contains z and c is near its center.  */
78bbbf1280Sopenharmony_ci  tmp = ix - OFF;
79bbbf1280Sopenharmony_ci  i = (tmp >> (52 - V_LOG_TABLE_BITS)) % N;
80bbbf1280Sopenharmony_ci  k = v_as_s64_u64 (tmp) >> 52; /* arithmetic shift */
81bbbf1280Sopenharmony_ci  iz = ix - (tmp & v_u64 (0xfffULL << 52));
82bbbf1280Sopenharmony_ci  z = v_as_f64_u64 (iz);
83bbbf1280Sopenharmony_ci  e = lookup (i);
84bbbf1280Sopenharmony_ci
85bbbf1280Sopenharmony_ci  /* log(x) = log1p(z/c-1) + log(c) + k*Ln2.  */
86bbbf1280Sopenharmony_ci  r = v_fma_f64 (z, e.invc, v_f64 (-1.0));
87bbbf1280Sopenharmony_ci  kd = v_to_f64_s64 (k);
88bbbf1280Sopenharmony_ci
89bbbf1280Sopenharmony_ci  /* hi = r + log(c) + k*Ln2.  */
90bbbf1280Sopenharmony_ci  hi = v_fma_f64 (kd, Ln2, e.logc + r);
91bbbf1280Sopenharmony_ci  /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi.  */
92bbbf1280Sopenharmony_ci  r2 = r * r;
93bbbf1280Sopenharmony_ci  y = v_fma_f64 (A3, r, A2);
94bbbf1280Sopenharmony_ci  p = v_fma_f64 (A1, r, A0);
95bbbf1280Sopenharmony_ci  y = v_fma_f64 (A4, r2, y);
96bbbf1280Sopenharmony_ci  y = v_fma_f64 (y, r2, p);
97bbbf1280Sopenharmony_ci  y = v_fma_f64 (y, r2, hi);
98bbbf1280Sopenharmony_ci
99bbbf1280Sopenharmony_ci  if (unlikely (v_any_u64 (cmp)))
100bbbf1280Sopenharmony_ci    return specialcase (x, y, cmp);
101bbbf1280Sopenharmony_ci  return y;
102bbbf1280Sopenharmony_ci}
103bbbf1280Sopenharmony_ciVPCS_ALIAS
104bbbf1280Sopenharmony_ci#endif
105