1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * Double-precision vector log(x) function. 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2019, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci#include "mathlib.h" 9bbbf1280Sopenharmony_ci#include "v_math.h" 10bbbf1280Sopenharmony_ci#include "v_log.h" 11bbbf1280Sopenharmony_ci#if V_SUPPORTED 12bbbf1280Sopenharmony_ci 13bbbf1280Sopenharmony_ci/* Worst-case error: 1.17 + 0.5 ulp. */ 14bbbf1280Sopenharmony_ci 15bbbf1280Sopenharmony_cistatic const f64_t Poly[] = { 16bbbf1280Sopenharmony_ci /* rel error: 0x1.6272e588p-56 in [ -0x1.fc1p-9 0x1.009p-8 ]. */ 17bbbf1280Sopenharmony_ci -0x1.ffffffffffff7p-2, 18bbbf1280Sopenharmony_ci 0x1.55555555170d4p-2, 19bbbf1280Sopenharmony_ci -0x1.0000000399c27p-2, 20bbbf1280Sopenharmony_ci 0x1.999b2e90e94cap-3, 21bbbf1280Sopenharmony_ci -0x1.554e550bd501ep-3, 22bbbf1280Sopenharmony_ci}; 23bbbf1280Sopenharmony_ci 24bbbf1280Sopenharmony_ci#define A0 v_f64 (Poly[0]) 25bbbf1280Sopenharmony_ci#define A1 v_f64 (Poly[1]) 26bbbf1280Sopenharmony_ci#define A2 v_f64 (Poly[2]) 27bbbf1280Sopenharmony_ci#define A3 v_f64 (Poly[3]) 28bbbf1280Sopenharmony_ci#define A4 v_f64 (Poly[4]) 29bbbf1280Sopenharmony_ci#define Ln2 v_f64 (0x1.62e42fefa39efp-1) 30bbbf1280Sopenharmony_ci#define N (1 << V_LOG_TABLE_BITS) 31bbbf1280Sopenharmony_ci#define OFF v_u64 (0x3fe6900900000000) 32bbbf1280Sopenharmony_ci 33bbbf1280Sopenharmony_cistruct entry 34bbbf1280Sopenharmony_ci{ 35bbbf1280Sopenharmony_ci v_f64_t invc; 36bbbf1280Sopenharmony_ci v_f64_t logc; 37bbbf1280Sopenharmony_ci}; 38bbbf1280Sopenharmony_ci 39bbbf1280Sopenharmony_cistatic inline struct entry 40bbbf1280Sopenharmony_cilookup (v_u64_t i) 41bbbf1280Sopenharmony_ci{ 42bbbf1280Sopenharmony_ci struct entry e; 43bbbf1280Sopenharmony_ci#ifdef SCALAR 44bbbf1280Sopenharmony_ci e.invc = __v_log_data[i].invc; 45bbbf1280Sopenharmony_ci e.logc = __v_log_data[i].logc; 46bbbf1280Sopenharmony_ci#else 47bbbf1280Sopenharmony_ci e.invc[0] = __v_log_data[i[0]].invc; 48bbbf1280Sopenharmony_ci e.logc[0] = __v_log_data[i[0]].logc; 49bbbf1280Sopenharmony_ci e.invc[1] = __v_log_data[i[1]].invc; 50bbbf1280Sopenharmony_ci e.logc[1] = __v_log_data[i[1]].logc; 51bbbf1280Sopenharmony_ci#endif 52bbbf1280Sopenharmony_ci return e; 53bbbf1280Sopenharmony_ci} 54bbbf1280Sopenharmony_ci 55bbbf1280Sopenharmony_ciVPCS_ATTR 56bbbf1280Sopenharmony_ci__attribute__ ((noinline)) static v_f64_t 57bbbf1280Sopenharmony_cispecialcase (v_f64_t x, v_f64_t y, v_u64_t cmp) 58bbbf1280Sopenharmony_ci{ 59bbbf1280Sopenharmony_ci return v_call_f64 (log, x, y, cmp); 60bbbf1280Sopenharmony_ci} 61bbbf1280Sopenharmony_ci 62bbbf1280Sopenharmony_ciVPCS_ATTR 63bbbf1280Sopenharmony_civ_f64_t 64bbbf1280Sopenharmony_ciV_NAME(log) (v_f64_t x) 65bbbf1280Sopenharmony_ci{ 66bbbf1280Sopenharmony_ci v_f64_t z, r, r2, p, y, kd, hi; 67bbbf1280Sopenharmony_ci v_u64_t ix, iz, tmp, top, i, cmp; 68bbbf1280Sopenharmony_ci v_s64_t k; 69bbbf1280Sopenharmony_ci struct entry e; 70bbbf1280Sopenharmony_ci 71bbbf1280Sopenharmony_ci ix = v_as_u64_f64 (x); 72bbbf1280Sopenharmony_ci top = ix >> 48; 73bbbf1280Sopenharmony_ci cmp = v_cond_u64 (top - v_u64 (0x0010) >= v_u64 (0x7ff0 - 0x0010)); 74bbbf1280Sopenharmony_ci 75bbbf1280Sopenharmony_ci /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. 76bbbf1280Sopenharmony_ci The range is split into N subintervals. 77bbbf1280Sopenharmony_ci The ith subinterval contains z and c is near its center. */ 78bbbf1280Sopenharmony_ci tmp = ix - OFF; 79bbbf1280Sopenharmony_ci i = (tmp >> (52 - V_LOG_TABLE_BITS)) % N; 80bbbf1280Sopenharmony_ci k = v_as_s64_u64 (tmp) >> 52; /* arithmetic shift */ 81bbbf1280Sopenharmony_ci iz = ix - (tmp & v_u64 (0xfffULL << 52)); 82bbbf1280Sopenharmony_ci z = v_as_f64_u64 (iz); 83bbbf1280Sopenharmony_ci e = lookup (i); 84bbbf1280Sopenharmony_ci 85bbbf1280Sopenharmony_ci /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ 86bbbf1280Sopenharmony_ci r = v_fma_f64 (z, e.invc, v_f64 (-1.0)); 87bbbf1280Sopenharmony_ci kd = v_to_f64_s64 (k); 88bbbf1280Sopenharmony_ci 89bbbf1280Sopenharmony_ci /* hi = r + log(c) + k*Ln2. */ 90bbbf1280Sopenharmony_ci hi = v_fma_f64 (kd, Ln2, e.logc + r); 91bbbf1280Sopenharmony_ci /* y = r2*(A0 + r*A1 + r2*(A2 + r*A3 + r2*A4)) + hi. */ 92bbbf1280Sopenharmony_ci r2 = r * r; 93bbbf1280Sopenharmony_ci y = v_fma_f64 (A3, r, A2); 94bbbf1280Sopenharmony_ci p = v_fma_f64 (A1, r, A0); 95bbbf1280Sopenharmony_ci y = v_fma_f64 (A4, r2, y); 96bbbf1280Sopenharmony_ci y = v_fma_f64 (y, r2, p); 97bbbf1280Sopenharmony_ci y = v_fma_f64 (y, r2, hi); 98bbbf1280Sopenharmony_ci 99bbbf1280Sopenharmony_ci if (unlikely (v_any_u64 (cmp))) 100bbbf1280Sopenharmony_ci return specialcase (x, y, cmp); 101bbbf1280Sopenharmony_ci return y; 102bbbf1280Sopenharmony_ci} 103bbbf1280Sopenharmony_ciVPCS_ALIAS 104bbbf1280Sopenharmony_ci#endif 105