xref: /third_party/jerryscript/jerry-libm/trig.c (revision 425bb815)
1/* Copyright JS Foundation and other contributors, http://js.foundation
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 *
15 * This file is based on work under the following copyright and permission
16 * notice:
17 *
18 *     Copyright (C) 1993, 2004 by Sun Microsystems, Inc. All rights reserved.
19 *
20 *     Developed at SunSoft, a Sun Microsystems, Inc. business.
21 *     Permission to use, copy, modify, and distribute this
22 *     software is freely granted, provided that this notice
23 *     is preserved.
24 *
25 *     @(#)k_rem_pio2.c 1.3 95/01/18
26 *     @(#)e_rem_pio2.c 1.4 95/01/18
27 *     @(#)k_sin.c 1.3 95/01/18
28 *     @(#)k_cos.c 1.3 95/01/18
29 *     @(#)k_tan.c 1.5 04/04/22
30 *     @(#)s_sin.c 1.3 95/01/18
31 *     @(#)s_cos.c 1.3 95/01/18
32 *     @(#)s_tan.c 1.3 95/01/18
33 */
34
35#include "jerry-libm-internal.h"
36
37#define zero    0.00000000000000000000e+00 /* 0x00000000, 0x00000000 */
38#define half    5.00000000000000000000e-01 /* 0x3FE00000, 0x00000000 */
39#define one     1.00000000000000000000e+00 /* 0x3FF00000, 0x00000000 */
40#define two24   1.67772160000000000000e+07 /* 0x41700000, 0x00000000 */
41#define twon24  5.96046447753906250000e-08 /* 0x3E700000, 0x00000000 */
42
43/* __kernel_rem_pio2(x,y,e0,nx,prec)
44 * double x[],y[]; int e0,nx,prec;
45 *
46 * __kernel_rem_pio2 return the last three digits of N with
47 *              y = x - N*pi/2
48 * so that |y| < pi/2.
49 *
50 * The method is to compute the integer (mod 8) and fraction parts of
51 * (2/pi)*x without doing the full multiplication. In general we
52 * skip the part of the product that are known to be a huge integer (
53 * more accurately, = 0 mod 8 ). Thus the number of operations are
54 * independent of the exponent of the input.
55 *
56 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
57 *
58 * Input parameters:
59 *      x[]     The input value (must be positive) is broken into nx
60 *              pieces of 24-bit integers in double precision format.
61 *              x[i] will be the i-th 24 bit of x. The scaled exponent
62 *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
63 *              match x's up to 24 bits.
64 *
65 *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
66 *                      e0 = ilogb(z)-23
67 *                      z  = scalbn(z,-e0)
68 *              for i = 0,1,2
69 *                      x[i] = floor(z)
70 *                      z    = (z-x[i])*2**24
71 *
72 *      y[]     ouput result in an array of double precision numbers.
73 *              The dimension of y[] is:
74 *                      24-bit  precision       1
75 *                      53-bit  precision       2
76 *                      64-bit  precision       2
77 *                      113-bit precision       3
78 *              The actual value is the sum of them. Thus for 113-bit
79 *              precison, one may have to do something like:
80 *
81 *              long double t,w,r_head, r_tail;
82 *              t = (long double)y[2] + (long double)y[1];
83 *              w = (long double)y[0];
84 *              r_head = t+w;
85 *              r_tail = w - (r_head - t);
86 *
87 *      e0      The exponent of x[0]
88 *
89 *      nx      dimension of x[]
90 *
91 *      prec    an integer indicating the precision:
92 *                      0       24  bits (single)
93 *                      1       53  bits (double)
94 *                      2       64  bits (extended)
95 *                      3       113 bits (quad)
96 *
97 * External function:
98 *      double scalbn(), floor();
99 *
100 * Here is the description of some local variables:
101 *
102 *      ipio2[] integer array, contains the (24*i)-th to (24*i+23)-th
103 *              bit of 2/pi after binary point. The corresponding
104 *              floating value is
105 *
106 *                      ipio2[i] * 2^(-24(i+1)).
107 *
108 *      jk      jk+1 is the initial number of terms of ipio2[] needed
109 *              in the computation. The recommended value is 2,3,4,
110 *              6 for single, double, extended,and quad.
111 *
112 *      jz      local integer variable indicating the number of
113 *              terms of ipio2[] used.
114 *
115 *      jx      nx - 1
116 *
117 *      jv      index for pointing to the suitable ipio2[] for the
118 *              computation. In general, we want
119 *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
120 *              is an integer. Thus
121 *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
122 *              Hence jv = max(0,(e0-3)/24).
123 *
124 *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
125 *
126 *      q[]     double array with integral value, representing the
127 *              24-bits chunk of the product of x and 2/pi.
128 *
129 *      q0      the corresponding exponent of q[0]. Note that the
130 *              exponent for q[i] would be q0-24*i.
131 *
132 *      PIo2[]  double precision array, obtained by cutting pi/2
133 *              into 24 bits chunks.
134 *
135 *      f[]     ipio2[] in floating point
136 *
137 *      iq[]    integer array by breaking up q[] in 24-bits chunk.
138 *
139 *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
140 *
141 *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
142 *              it also indicates the *sign* of the result.
143 */
144
145/*
146 * Constants:
147 * The hexadecimal values are the intended ones for the following
148 * constants. The decimal values may be used, provided that the
149 * compiler will convert from decimal to binary accurately enough
150 * to produce the hexadecimal values shown.
151 */
152
153/* initial value for jk */
154static const int init_jk[] =
155{
156  2, 3, 4, 6
157};
158
159static const double PIo2[] =
160{
161  1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
162  7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
163  5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
164  3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
165  1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
166  1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
167  2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
168  2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
169};
170
171/*
172 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
173 */
174static const int ipio2[] =
175{
176  0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
177  0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
178  0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
179  0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
180  0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
181  0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
182  0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
183  0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
184  0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
185  0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
186  0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
187};
188
189static int
190__kernel_rem_pio2 (double *x, double *y, int e0, int nx, int prec)
191{
192  int jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih;
193  double z, fw, f[20], fq[20], q[20];
194
195  /* initialize jk */
196  jk = init_jk[prec];
197  jp = jk;
198
199  /* determine jx, jv, q0, note that 3 > q0 */
200  jx = nx - 1;
201  jv = (e0 - 3) / 24;
202  if (jv < 0)
203  {
204    jv = 0;
205  }
206  q0 = e0 - 24 * (jv + 1);
207
208  /* set up f[0] to f[jx + jk] where f[jx + jk] = ipio2[jv + jk] */
209  j = jv - jx;
210  m = jx + jk;
211  for (i = 0; i <= m; i++, j++)
212  {
213    f[i] = (j < 0) ? zero : (double) ipio2[j];
214  }
215
216  /* compute q[0], q[1], ... q[jk] */
217  for (i = 0; i <= jk; i++)
218  {
219    for (j = 0, fw = 0.0; j <= jx; j++)
220    {
221      fw += x[j] * f[jx + i - j];
222    }
223    q[i] = fw;
224  }
225
226  jz = jk;
227recompute:
228  /* distill q[] into iq[] reversingly */
229  for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--)
230  {
231    fw = (double) ((int) (twon24 * z));
232    iq[i] = (int) (z - two24 * fw);
233    z = q[j - 1] + fw;
234  }
235
236  /* compute n */
237  z = scalbn (z, q0); /* actual value of z */
238  z -= 8.0 * floor (z * 0.125); /* trim off integer >= 8 */
239  n = (int) z;
240  z -= (double) n;
241  ih = 0;
242  if (q0 > 0) /* need iq[jz - 1] to determine n */
243  {
244    i = (iq[jz - 1] >> (24 - q0));
245    n += i;
246    iq[jz - 1] -= i << (24 - q0);
247    ih = iq[jz - 1] >> (23 - q0);
248  }
249  else if (q0 == 0)
250  {
251    ih = iq[jz - 1] >> 23;
252  }
253  else if (z >= 0.5)
254  {
255    ih = 2;
256  }
257
258  if (ih > 0) /* q > 0.5 */
259  {
260    n += 1;
261    carry = 0;
262    for (i = 0; i < jz; i++) /* compute 1 - q */
263    {
264      j = iq[i];
265      if (carry == 0)
266      {
267        if (j != 0)
268        {
269          carry = 1;
270          iq[i] = 0x1000000 - j;
271        }
272      }
273      else
274      {
275        iq[i] = 0xffffff - j;
276      }
277    }
278    if (q0 > 0) /* rare case: chance is 1 in 12 */
279    {
280      switch (q0)
281      {
282        case 1:
283        {
284          iq[jz - 1] &= 0x7fffff;
285          break;
286        }
287        case 2:
288        {
289          iq[jz - 1] &= 0x3fffff;
290          break;
291        }
292      }
293    }
294    if (ih == 2)
295    {
296      z = one - z;
297      if (carry != 0)
298      {
299        z -= scalbn (one, q0);
300      }
301    }
302  }
303
304  /* check if recomputation is needed */
305  if (z == zero)
306  {
307    j = 0;
308    for (i = jz - 1; i >= jk; i--)
309    {
310      j |= iq[i];
311    }
312    if (j == 0) /* need recomputation */
313    {
314      for (k = 1; iq[jk - k] == 0; k++) /* k = no. of terms needed */
315      {
316      }
317
318      for (i = jz + 1; i <= jz + k; i++) /* add q[jz + 1] to q[jz + k] */
319      {
320        f[jx + i] = (double) ipio2[jv + i];
321        for (j = 0, fw = 0.0; j <= jx; j++)
322        {
323          fw += x[j] * f[jx + i - j];
324        }
325        q[i] = fw;
326      }
327      jz += k;
328      goto recompute;
329    }
330  }
331
332  /* chop off zero terms */
333  if (z == 0.0)
334  {
335    jz -= 1;
336    q0 -= 24;
337    while (iq[jz] == 0)
338    {
339      jz--;
340      q0 -= 24;
341    }
342  }
343  else
344  { /* break z into 24-bit if necessary */
345    z = scalbn (z, -q0);
346    if (z >= two24)
347    {
348      fw = (double) ((int) (twon24 * z));
349      iq[jz] = (int) (z - two24 * fw);
350      jz += 1;
351      q0 += 24;
352      iq[jz] = (int) fw;
353    }
354    else
355    {
356      iq[jz] = (int) z;
357    }
358  }
359
360  /* convert integer "bit" chunk to floating-point value */
361  fw = scalbn (one, q0);
362  for (i = jz; i >= 0; i--)
363  {
364    q[i] = fw * (double) iq[i];
365    fw *= twon24;
366  }
367
368  /* compute PIo2[0, ..., jp] * q[jz, ..., 0] */
369  for (i = jz; i >= 0; i--)
370  {
371    for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++)
372    {
373      fw += PIo2[k] * q[i + k];
374    }
375    fq[jz - i] = fw;
376  }
377
378  /* compress fq[] into y[] */
379  switch (prec)
380  {
381    case 0:
382    {
383      fw = 0.0;
384      for (i = jz; i >= 0; i--)
385      {
386        fw += fq[i];
387      }
388      y[0] = (ih == 0) ? fw : -fw;
389      break;
390    }
391    case 1:
392    case 2:
393    {
394      fw = 0.0;
395      for (i = jz; i >= 0; i--)
396      {
397        fw += fq[i];
398      }
399      y[0] = (ih == 0) ? fw : -fw;
400      fw = fq[0] - fw;
401      for (i = 1; i <= jz; i++)
402      {
403        fw += fq[i];
404      }
405      y[1] = (ih == 0) ? fw : -fw;
406      break;
407    }
408    case 3: /* painful */
409    {
410      for (i = jz; i > 0; i--)
411      {
412        fw = fq[i - 1] + fq[i];
413        fq[i] += fq[i - 1] - fw;
414        fq[i - 1] = fw;
415      }
416      for (i = jz; i > 1; i--)
417      {
418        fw = fq[i - 1] + fq[i];
419        fq[i] += fq[i - 1] - fw;
420        fq[i - 1] = fw;
421      }
422      for (fw = 0.0, i = jz; i >= 2; i--)
423      {
424        fw += fq[i];
425      }
426      if (ih == 0)
427      {
428        y[0] = fq[0];
429        y[1] = fq[1];
430        y[2] = fw;
431      }
432      else
433      {
434        y[0] = -fq[0];
435        y[1] = -fq[1];
436        y[2] = -fw;
437      }
438    }
439  }
440  return n & 7;
441} /* __kernel_rem_pio2 */
442
443/* __ieee754_rem_pio2(x,y)
444 * return the remainder of x rem pi/2 in y[0]+y[1]
445 * use __kernel_rem_pio2()
446 */
447
448static const int npio2_hw[] =
449{
450  0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
451  0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
452  0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
453  0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
454  0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
455  0x404858EB, 0x404921FB,
456};
457
458/*
459 * invpio2:  53 bits of 2/pi
460 * pio2_1:   first  33 bit of pi/2
461 * pio2_1t:  pi/2 - pio2_1
462 * pio2_2:   second 33 bit of pi/2
463 * pio2_2t:  pi/2 - (pio2_1 + pio2_2)
464 * pio2_3:   third  33 bit of pi/2
465 * pio2_3t:  pi/2 - (pio2_1 + pio2_2 + pio2_3)
466 */
467#define invpio2 6.36619772367581382433e-01 /* 0x3FE45F30, 0x6DC9C883 */
468#define pio2_1  1.57079632673412561417e+00 /* 0x3FF921FB, 0x54400000 */
469#define pio2_1t 6.07710050650619224932e-11 /* 0x3DD0B461, 0x1A626331 */
470#define pio2_2  6.07710050630396597660e-11 /* 0x3DD0B461, 0x1A600000 */
471#define pio2_2t 2.02226624879595063154e-21 /* 0x3BA3198A, 0x2E037073 */
472#define pio2_3  2.02226624871116645580e-21 /* 0x3BA3198A, 0x2E000000 */
473#define pio2_3t 8.47842766036889956997e-32 /* 0x397B839A, 0x252049C1 */
474
475static int
476__ieee754_rem_pio2 (double x, double *y)
477{
478  double_accessor z;
479  double w, t, r, fn;
480  double tx[3];
481  int e0, i, j, nx, n, ix, hx;
482
483  hx = __HI (x); /* high word of x */
484  ix = hx & 0x7fffffff;
485  if (ix <= 0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
486  {
487    y[0] = x;
488    y[1] = 0;
489    return 0;
490  }
491  if (ix < 0x4002d97c) /* |x| < 3pi/4, special case with n = +-1 */
492  {
493    if (hx > 0)
494    {
495      z.dbl = x - pio2_1;
496      if (ix != 0x3ff921fb) /* 33 + 53 bit pi is good enough */
497      {
498        y[0] = z.dbl - pio2_1t;
499        y[1] = (z.dbl - y[0]) - pio2_1t;
500      }
501      else /* near pi/2, use 33 + 33 + 53 bit pi */
502      {
503        z.dbl -= pio2_2;
504        y[0] = z.dbl - pio2_2t;
505        y[1] = (z.dbl - y[0]) - pio2_2t;
506      }
507      return 1;
508    }
509    else /* negative x */
510    {
511      z.dbl = x + pio2_1;
512      if (ix != 0x3ff921fb) /* 33 + 53 bit pi is good enough */
513      {
514        y[0] = z.dbl + pio2_1t;
515        y[1] = (z.dbl - y[0]) + pio2_1t;
516      }
517      else /* near pi/2, use 33 + 33 + 53 bit pi */
518      {
519        z.dbl += pio2_2;
520        y[0] = z.dbl + pio2_2t;
521        y[1] = (z.dbl - y[0]) + pio2_2t;
522      }
523      return -1;
524    }
525  }
526  if (ix <= 0x413921fb) /* |x| ~<= 2^19 * (pi/2), medium size */
527  {
528    t = fabs (x);
529    n = (int) (t * invpio2 + half);
530    fn = (double) n;
531    r = t - fn * pio2_1;
532    w = fn * pio2_1t; /* 1st round good to 85 bit */
533    if (n < 32 && ix != npio2_hw[n - 1])
534    {
535      y[0] = r - w; /* quick check no cancellation */
536    }
537    else
538    {
539      j = ix >> 20;
540      y[0] = r - w;
541      i = j - (((__HI (y[0])) >> 20) & 0x7ff);
542      if (i > 16) /* 2nd iteration needed, good to 118 */
543      {
544        t = r;
545        w = fn * pio2_2;
546        r = t - w;
547        w = fn * pio2_2t - ((t - r) - w);
548        y[0] = r - w;
549        i = j - (((__HI (y[0])) >> 20) & 0x7ff);
550        if (i > 49) /* 3rd iteration need, 151 bits acc, will cover all possible cases */
551        {
552          t = r;
553          w = fn * pio2_3;
554          r = t - w;
555          w = fn * pio2_3t - ((t - r) - w);
556          y[0] = r - w;
557        }
558      }
559    }
560    y[1] = (r - y[0]) - w;
561    if (hx < 0)
562    {
563      y[0] = -y[0];
564      y[1] = -y[1];
565      return -n;
566    }
567    else
568    {
569      return n;
570    }
571  }
572  /*
573   * all other (large) arguments
574   */
575  if (ix >= 0x7ff00000) /* x is inf or NaN */
576  {
577    y[0] = y[1] = x - x;
578    return 0;
579  }
580  /* set z = scalbn(|x|, ilogb(x) - 23) */
581  z.as_int.lo = __LO (x);
582  e0 = (ix >> 20) - 1046; /* e0 = ilogb(z) - 23; */
583  z.as_int.hi = ix - (e0 << 20);
584  for (i = 0; i < 2; i++)
585  {
586    tx[i] = (double) ((int) (z.dbl));
587    z.dbl = (z.dbl - tx[i]) * two24;
588  }
589  tx[2] = z.dbl;
590  nx = 3;
591  while (tx[nx - 1] == zero) /* skip zero term */
592  {
593    nx--;
594  }
595  n = __kernel_rem_pio2 (tx, y, e0, nx, 2);
596  if (hx < 0)
597  {
598    y[0] = -y[0];
599    y[1] = -y[1];
600    return -n;
601  }
602  return n;
603} /* __ieee754_rem_pio2 */
604
605/* __kernel_sin( x, y, iy)
606 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
607 * Input x is assumed to be bounded by ~pi/4 in magnitude.
608 * Input y is the tail of x.
609 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
610 *
611 * Algorithm
612 *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
613 *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
614 *      3. sin(x) is approximated by a polynomial of degree 13 on
615 *         [0,pi/4]
616 *                               3            13
617 *              sin(x) ~ x + S1*x + ... + S6*x
618 *         where
619 *
620 *      |sin(x)         2     4     6     8     10     12  |     -58
621 *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
622 *      |  x                                               |
623 *
624 *      4. sin(x+y) = sin(x) + sin'(x')*y
625 *                  ~ sin(x) + (1-x*x/2)*y
626 *         For better accuracy, let
627 *                   3      2      2      2      2
628 *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
629 *         then                   3    2
630 *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
631 */
632
633#define S1   -1.66666666666666324348e-01 /* 0xBFC55555, 0x55555549 */
634#define S2    8.33333333332248946124e-03 /* 0x3F811111, 0x1110F8A6 */
635#define S3   -1.98412698298579493134e-04 /* 0xBF2A01A0, 0x19C161D5 */
636#define S4    2.75573137070700676789e-06 /* 0x3EC71DE3, 0x57B1FE7D */
637#define S5   -2.50507602534068634195e-08 /* 0xBE5AE5E6, 0x8A2B9CEB */
638#define S6    1.58969099521155010221e-10 /* 0x3DE5D93A, 0x5ACFD57C */
639
640static double
641__kernel_sin (double x, double y, int iy)
642{
643  double z, r, v;
644  int ix;
645
646  ix = __HI (x) & 0x7fffffff; /* high word of x */
647  if (ix < 0x3e400000) /* |x| < 2**-27 */
648  {
649    if ((int) x == 0)
650    {
651      return x; /* generate inexact */
652    }
653  }
654  z = x * x;
655  v = z * x;
656  r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6)));
657  if (iy == 0)
658  {
659    return x + v * (S1 + z * r);
660  }
661  else
662  {
663    return x - ((z * (half * y - v * r) - y) - v * S1);
664  }
665} /* __kernel_sin */
666
667/*
668 * __kernel_cos( x,  y )
669 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
670 * Input x is assumed to be bounded by ~pi/4 in magnitude.
671 * Input y is the tail of x.
672 *
673 * Algorithm
674 *      1. Since cos(-x) = cos(x), we need only to consider positive x.
675 *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
676 *      3. cos(x) is approximated by a polynomial of degree 14 on
677 *         [0,pi/4]
678 *                                       4            14
679 *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
680 *         where the remez error is
681 *
682 *      |              2     4     6     8     10    12     14 |     -58
683 *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
684 *      |                                                      |
685 *
686 *                     4     6     8     10    12     14
687 *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
688 *             cos(x) = 1 - x*x/2 + r
689 *         since cos(x+y) ~ cos(x) - sin(x)*y
690 *                        ~ cos(x) - x*y,
691 *         a correction term is necessary in cos(x) and hence
692 *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
693 *         For better accuracy when x > 0.3, let qx = |x|/4 with
694 *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
695 *         Then
696 *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
697 *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
698 *         magnitude of the latter is at least a quarter of x*x/2,
699 *         thus, reducing the rounding error in the subtraction.
700 */
701
702#define C1   4.16666666666666019037e-02 /* 0x3FA55555, 0x5555554C */
703#define C2  -1.38888888888741095749e-03 /* 0xBF56C16C, 0x16C15177 */
704#define C3   2.48015872894767294178e-05 /* 0x3EFA01A0, 0x19CB1590 */
705#define C4  -2.75573143513906633035e-07 /* 0xBE927E4F, 0x809C52AD */
706#define C5   2.08757232129817482790e-09 /* 0x3E21EE9E, 0xBDB4B1C4 */
707#define C6  -1.13596475577881948265e-11 /* 0xBDA8FAE9, 0xBE8838D4 */
708
709static double
710__kernel_cos (double x, double y)
711{
712  double a, hz, z, r;
713  int ix;
714
715  ix = __HI (x) & 0x7fffffff; /* ix = |x|'s high word */
716  if (ix < 0x3e400000) /* if x < 2**27 */
717  {
718    if (((int) x) == 0)
719    {
720      return one; /* generate inexact */
721    }
722  }
723  z = x * x;
724  r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6)))));
725  if (ix < 0x3FD33333) /* if |x| < 0.3 */
726  {
727    return one - (0.5 * z - (z * r - x * y));
728  }
729  else
730  {
731    double_accessor qx;
732    if (ix > 0x3fe90000) /* x > 0.78125 */
733    {
734      qx.dbl = 0.28125;
735    }
736    else
737    {
738      qx.as_int.hi = ix - 0x00200000; /* x / 4 */
739      qx.as_int.lo = 0;
740    }
741    hz = 0.5 * z - qx.dbl;
742    a = one - qx.dbl;
743    return a - (hz - (z * r - x * y));
744  }
745} /* __kernel_cos */
746
747/* __kernel_tan( x, y, k )
748 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
749 * Input x is assumed to be bounded by ~pi/4 in magnitude.
750 * Input y is the tail of x.
751 * Input k indicates whether tan (if k = 1) or -1/tan (if k = -1) is returned.
752 *
753 * Algorithm
754 *      1. Since tan(-x) = -tan(x), we need only to consider positive x.
755 *      2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
756 *      3. tan(x) is approximated by a odd polynomial of degree 27 on
757 *         [0,0.67434]
758 *                               3             27
759 *              tan(x) ~ x + T1*x + ... + T13*x
760 *         where
761 *
762 *              |tan(x)         2     4            26   |     -59.2
763 *              |----- - (1+T1*x +T2*x +.... +T13*x    )| <= 2
764 *              |  x                                    |
765 *
766 *         Note: tan(x+y) = tan(x) + tan'(x)*y
767 *                        ~ tan(x) + (1+x*x)*y
768 *         Therefore, for better accuracy in computing tan(x+y), let
769 *                   3      2      2       2       2
770 *              r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
771 *         then
772 *                                  3    2
773 *              tan(x+y) = x + (T1*x + (x *(r+y)+y))
774 *
775 *      4. For x in [0.67434,pi/4],  let y = pi/4 - x, then
776 *              tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
777 *                     = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
778 */
779
780#define T0      3.33333333333334091986e-01 /* 3FD55555, 55555563 */
781#define T1      1.33333333333201242699e-01 /* 3FC11111, 1110FE7A */
782#define T2      5.39682539762260521377e-02 /* 3FABA1BA, 1BB341FE */
783#define T3      2.18694882948595424599e-02 /* 3F9664F4, 8406D637 */
784#define T4      8.86323982359930005737e-03 /* 3F8226E3, E96E8493 */
785#define T5      3.59207910759131235356e-03 /* 3F6D6D22, C9560328 */
786#define T6      1.45620945432529025516e-03 /* 3F57DBC8, FEE08315 */
787#define T7      5.88041240820264096874e-04 /* 3F4344D8, F2F26501 */
788#define T8      2.46463134818469906812e-04 /* 3F3026F7, 1A8D1068 */
789#define T9      7.81794442939557092300e-05 /* 3F147E88, A03792A6 */
790#define T10     7.14072491382608190305e-05 /* 3F12B80F, 32F0A7E9 */
791#define T11    -1.85586374855275456654e-05 /* BEF375CB, DB605373 */
792#define T12     2.59073051863633712884e-05 /* 3EFB2A70, 74BF7AD4 */
793#define pio4    7.85398163397448278999e-01 /* 3FE921FB, 54442D18 */
794#define pio4lo  3.06161699786838301793e-17 /* 3C81A626, 33145C07 */
795
796static double
797__kernel_tan (double x, double y, int iy)
798{
799  double_accessor z;
800  double r, v, w, s;
801  int ix, hx;
802
803  hx = __HI (x); /* high word of x */
804  ix = hx & 0x7fffffff; /* high word of |x| */
805  if (ix < 0x3e300000) /* x < 2**-28 */
806  {
807    if ((int) x == 0) /* generate inexact */
808    {
809      if (((ix | __LO (x)) | (iy + 1)) == 0)
810      {
811        return one / fabs (x);
812      }
813      else
814      {
815        if (iy == 1)
816        {
817          return x;
818        }
819        else /* compute -1 / (x + y) carefully */
820        {
821          double a;
822          double_accessor t;
823
824          z.dbl = w = x + y;
825          z.as_int.lo = 0;
826          v = y - (z.dbl - x);
827          t.dbl = a = -one / w;
828          t.as_int.lo = 0;
829          s = one + t.dbl * z.dbl;
830          return t.dbl + a * (s + t.dbl * v);
831        }
832      }
833    }
834  }
835  if (ix >= 0x3FE59428) /* |x| >= 0.6744 */
836  {
837    if (hx < 0)
838    {
839      x = -x;
840      y = -y;
841    }
842    z.dbl = pio4 - x;
843    w = pio4lo - y;
844    x = z.dbl + w;
845    y = 0.0;
846  }
847  z.dbl = x * x;
848  w = z.dbl * z.dbl;
849  /*
850   * Break x^5 * (T[1] + x^2 * T[2] + ...) into
851   * x^5 (T[1] + x^4 * T[3] + ... + x^20 * T[11]) +
852   * x^5 (x^2 * (T[2] + x^4 * T[4] + ... + x^22 * [T12]))
853   */
854  r = T1 + w * (T3 + w * (T5 + w * (T7 + w * (T9 + w * T11))));
855  v = z.dbl * (T2 + w * (T4 + w * (T6 + w * (T8 + w * (T10 + w * T12)))));
856  s = z.dbl * x;
857  r = y + z.dbl * (s * (r + v) + y);
858  r += T0 * s;
859  w = x + r;
860  if (ix >= 0x3FE59428)
861  {
862    v = (double) iy;
863    return (double) (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r)));
864  }
865  if (iy == 1)
866  {
867    return w;
868  }
869  else
870  {
871    /*
872     * if allow error up to 2 ulp, simply return
873     * -1.0 / (x + r) here
874     */
875    /* compute -1.0 / (x + r) accurately */
876    double a;
877    double_accessor t;
878
879    z.dbl = w;
880    z.as_int.lo = 0;
881    v = r - (z.dbl - x); /* z + v = r + x */
882    t.dbl = a = -1.0 / w; /* a = -1.0 / w */
883    t.as_int.lo = 0;
884    s = 1.0 + t.dbl * z.dbl;
885    return t.dbl + a * (s + t.dbl * v);
886  }
887} /* __kernel_tan */
888
889/* Method:
890 *      Let S,C and T denote the sin, cos and tan respectively on
891 *      [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
892 *      in [-pi/4 , +pi/4], and let n = k mod 4.
893 *      We have
894 *
895 *          n        sin(x)      cos(x)        tan(x)
896 *     ----------------------------------------------------------
897 *          0          S           C             T
898 *          1          C          -S            -1/T
899 *          2         -S          -C             T
900 *          3         -C           S            -1/T
901 *     ----------------------------------------------------------
902 *
903 * Special cases:
904 *      Let trig be any of sin, cos, or tan.
905 *      trig(+-INF)  is NaN, with signals;
906 *      trig(NaN)    is that NaN;
907 *
908 * Accuracy:
909 *      TRIG(x) returns trig(x) nearly rounded
910 */
911
912/* sin(x)
913 * Return sine function of x.
914 *
915 * kernel function:
916 *      __kernel_sin            ... sine function on [-pi/4,pi/4]
917 *      __kernel_cos            ... cose function on [-pi/4,pi/4]
918 *      __ieee754_rem_pio2      ... argument reduction routine
919 */
920double
921sin (double x)
922{
923  double y[2], z = 0.0;
924  int n, ix;
925
926  /* High word of x. */
927  ix = __HI (x);
928
929  /* |x| ~< pi/4 */
930  ix &= 0x7fffffff;
931  if (ix <= 0x3fe921fb)
932  {
933    return __kernel_sin (x, z, 0);
934  }
935
936  /* sin(Inf or NaN) is NaN */
937  else if (ix >= 0x7ff00000)
938  {
939    return x - x;
940  }
941
942  /* argument reduction needed */
943  else
944  {
945    n = __ieee754_rem_pio2 (x, y);
946    switch (n & 3)
947    {
948      case 0:
949      {
950        return __kernel_sin (y[0], y[1], 1);
951      }
952      case 1:
953      {
954        return __kernel_cos (y[0], y[1]);
955      }
956      case 2:
957      {
958        return -__kernel_sin (y[0], y[1], 1);
959      }
960      default:
961      {
962        return -__kernel_cos (y[0], y[1]);
963      }
964    }
965  }
966} /* sin */
967
968/* cos(x)
969 * Return cosine function of x.
970 *
971 * kernel function:
972 *      __kernel_sin            ... sine function on [-pi/4,pi/4]
973 *      __kernel_cos            ... cosine function on [-pi/4,pi/4]
974 *      __ieee754_rem_pio2      ... argument reduction routine
975 */
976
977double
978cos (double x)
979{
980  double y[2], z = 0.0;
981  int n, ix;
982
983  /* High word of x. */
984  ix = __HI (x);
985
986  /* |x| ~< pi/4 */
987  ix &= 0x7fffffff;
988  if (ix <= 0x3fe921fb)
989  {
990    return __kernel_cos (x, z);
991  }
992
993  /* cos(Inf or NaN) is NaN */
994  else if (ix >= 0x7ff00000)
995  {
996    return x - x;
997  }
998
999  /* argument reduction needed */
1000  else
1001  {
1002    n = __ieee754_rem_pio2 (x, y);
1003    switch (n & 3)
1004    {
1005      case 0:
1006      {
1007        return __kernel_cos (y[0], y[1]);
1008      }
1009      case 1:
1010      {
1011        return -__kernel_sin (y[0], y[1], 1);
1012      }
1013      case 2:
1014      {
1015        return -__kernel_cos (y[0], y[1]);
1016      }
1017      default:
1018      {
1019        return __kernel_sin (y[0], y[1], 1);
1020      }
1021    }
1022  }
1023} /* cos */
1024
1025/* tan(x)
1026 * Return tangent function of x.
1027 *
1028 * kernel function:
1029 *      __kernel_tan            ... tangent function on [-pi/4,pi/4]
1030 *      __ieee754_rem_pio2      ... argument reduction routine
1031 */
1032
1033double
1034tan (double x)
1035{
1036  double y[2], z = 0.0;
1037  int n, ix;
1038
1039  /* High word of x. */
1040  ix = __HI (x);
1041
1042  /* |x| ~< pi/4 */
1043  ix &= 0x7fffffff;
1044  if (ix <= 0x3fe921fb)
1045  {
1046    return __kernel_tan (x, z, 1);
1047  }
1048
1049  /* tan(Inf or NaN) is NaN */
1050  else if (ix >= 0x7ff00000)
1051  {
1052    return x - x; /* NaN */
1053  }
1054
1055  /* argument reduction needed */
1056  else
1057  {
1058    n = __ieee754_rem_pio2 (x, y);
1059    return __kernel_tan (y[0], y[1], 1 - ((n & 1) << 1)); /*   1 -- n even, -1 -- n odd */
1060  }
1061} /* tan */
1062
1063#undef zero
1064#undef half
1065#undef one
1066#undef two24
1067#undef twon24
1068#undef invpio2
1069#undef pio2_1
1070#undef pio2_1t
1071#undef pio2_2
1072#undef pio2_2t
1073#undef pio2_3
1074#undef pio2_3t
1075#undef S1
1076#undef S2
1077#undef S3
1078#undef S4
1079#undef S5
1080#undef S6
1081#undef C1
1082#undef C2
1083#undef C3
1084#undef C4
1085#undef C5
1086#undef C6
1087#undef T0
1088#undef T1
1089#undef T2
1090#undef T3
1091#undef T4
1092#undef T5
1093#undef T6
1094#undef T7
1095#undef T8
1096#undef T9
1097#undef T10
1098#undef T11
1099#undef T12
1100#undef pio4
1101#undef pio4lo
1102