1bbbf1280Sopenharmony_ci/* 2bbbf1280Sopenharmony_ci * Single-precision vector 2^x function. 3bbbf1280Sopenharmony_ci * 4bbbf1280Sopenharmony_ci * Copyright (c) 2019, Arm Limited. 5bbbf1280Sopenharmony_ci * SPDX-License-Identifier: MIT 6bbbf1280Sopenharmony_ci */ 7bbbf1280Sopenharmony_ci 8bbbf1280Sopenharmony_ci#include "mathlib.h" 9bbbf1280Sopenharmony_ci#include "v_math.h" 10bbbf1280Sopenharmony_ci#if V_SUPPORTED 11bbbf1280Sopenharmony_ci 12bbbf1280Sopenharmony_cistatic const float Poly[] = { 13bbbf1280Sopenharmony_ci /* maxerr: 0.878 ulp. */ 14bbbf1280Sopenharmony_ci 0x1.416b5ep-13f, 0x1.5f082ep-10f, 0x1.3b2dep-7f, 0x1.c6af7cp-5f, 0x1.ebfbdcp-3f, 0x1.62e43p-1f 15bbbf1280Sopenharmony_ci}; 16bbbf1280Sopenharmony_ci#define C0 v_f32 (Poly[0]) 17bbbf1280Sopenharmony_ci#define C1 v_f32 (Poly[1]) 18bbbf1280Sopenharmony_ci#define C2 v_f32 (Poly[2]) 19bbbf1280Sopenharmony_ci#define C3 v_f32 (Poly[3]) 20bbbf1280Sopenharmony_ci#define C4 v_f32 (Poly[4]) 21bbbf1280Sopenharmony_ci#define C5 v_f32 (Poly[5]) 22bbbf1280Sopenharmony_ci 23bbbf1280Sopenharmony_ci#define Shift v_f32 (0x1.8p23f) 24bbbf1280Sopenharmony_ci#define InvLn2 v_f32 (0x1.715476p+0f) 25bbbf1280Sopenharmony_ci#define Ln2hi v_f32 (0x1.62e4p-1f) 26bbbf1280Sopenharmony_ci#define Ln2lo v_f32 (0x1.7f7d1cp-20f) 27bbbf1280Sopenharmony_ci 28bbbf1280Sopenharmony_ciVPCS_ATTR 29bbbf1280Sopenharmony_cistatic v_f32_t 30bbbf1280Sopenharmony_cispecialcase (v_f32_t poly, v_f32_t n, v_u32_t e, v_f32_t absn) 31bbbf1280Sopenharmony_ci{ 32bbbf1280Sopenharmony_ci /* 2^n may overflow, break it up into s1*s2. */ 33bbbf1280Sopenharmony_ci v_u32_t b = v_cond_u32 (n <= v_f32 (0.0f)) & v_u32 (0x83000000); 34bbbf1280Sopenharmony_ci v_f32_t s1 = v_as_f32_u32 (v_u32 (0x7f000000) + b); 35bbbf1280Sopenharmony_ci v_f32_t s2 = v_as_f32_u32 (e - b); 36bbbf1280Sopenharmony_ci v_u32_t cmp = v_cond_u32 (absn > v_f32 (192.0f)); 37bbbf1280Sopenharmony_ci v_f32_t r1 = s1 * s1; 38bbbf1280Sopenharmony_ci v_f32_t r0 = poly * s1 * s2; 39bbbf1280Sopenharmony_ci return v_as_f32_u32 ((cmp & v_as_u32_f32 (r1)) | (~cmp & v_as_u32_f32 (r0))); 40bbbf1280Sopenharmony_ci} 41bbbf1280Sopenharmony_ci 42bbbf1280Sopenharmony_ciVPCS_ATTR 43bbbf1280Sopenharmony_civ_f32_t 44bbbf1280Sopenharmony_ciV_NAME(exp2f_1u) (v_f32_t x) 45bbbf1280Sopenharmony_ci{ 46bbbf1280Sopenharmony_ci v_f32_t n, r, scale, poly, absn; 47bbbf1280Sopenharmony_ci v_u32_t cmp, e; 48bbbf1280Sopenharmony_ci 49bbbf1280Sopenharmony_ci /* exp2(x) = 2^n * poly(r), with poly(r) in [1/sqrt(2),sqrt(2)] 50bbbf1280Sopenharmony_ci x = n + r, with r in [-1/2, 1/2]. */ 51bbbf1280Sopenharmony_ci#if 0 52bbbf1280Sopenharmony_ci v_f32_t z; 53bbbf1280Sopenharmony_ci z = x + Shift; 54bbbf1280Sopenharmony_ci n = z - Shift; 55bbbf1280Sopenharmony_ci r = x - n; 56bbbf1280Sopenharmony_ci e = v_as_u32_f32 (z) << 23; 57bbbf1280Sopenharmony_ci#else 58bbbf1280Sopenharmony_ci n = v_round_f32 (x); 59bbbf1280Sopenharmony_ci r = x - n; 60bbbf1280Sopenharmony_ci e = v_as_u32_s32 (v_round_s32 (x)) << 23; 61bbbf1280Sopenharmony_ci#endif 62bbbf1280Sopenharmony_ci scale = v_as_f32_u32 (e + v_u32 (0x3f800000)); 63bbbf1280Sopenharmony_ci absn = v_abs_f32 (n); 64bbbf1280Sopenharmony_ci cmp = v_cond_u32 (absn > v_f32 (126.0f)); 65bbbf1280Sopenharmony_ci poly = v_fma_f32 (C0, r, C1); 66bbbf1280Sopenharmony_ci poly = v_fma_f32 (poly, r, C2); 67bbbf1280Sopenharmony_ci poly = v_fma_f32 (poly, r, C3); 68bbbf1280Sopenharmony_ci poly = v_fma_f32 (poly, r, C4); 69bbbf1280Sopenharmony_ci poly = v_fma_f32 (poly, r, C5); 70bbbf1280Sopenharmony_ci poly = v_fma_f32 (poly, r, v_f32 (1.0f)); 71bbbf1280Sopenharmony_ci if (unlikely (v_any_u32 (cmp))) 72bbbf1280Sopenharmony_ci return specialcase (poly, n, e, absn); 73bbbf1280Sopenharmony_ci return scale * poly; 74bbbf1280Sopenharmony_ci} 75bbbf1280Sopenharmony_ci#endif 76