1/* Copyright JS Foundation and other contributors, http://js.foundation 2 * 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 * 15 * This file is based on work under the following copyright and permission 16 * notice: 17 * 18 * Copyright (C) 1993, 2004 by Sun Microsystems, Inc. All rights reserved. 19 * 20 * Developed at SunSoft, a Sun Microsystems, Inc. business. 21 * Permission to use, copy, modify, and distribute this 22 * software is freely granted, provided that this notice 23 * is preserved. 24 * 25 * @(#)k_rem_pio2.c 1.3 95/01/18 26 * @(#)e_rem_pio2.c 1.4 95/01/18 27 * @(#)k_sin.c 1.3 95/01/18 28 * @(#)k_cos.c 1.3 95/01/18 29 * @(#)k_tan.c 1.5 04/04/22 30 * @(#)s_sin.c 1.3 95/01/18 31 * @(#)s_cos.c 1.3 95/01/18 32 * @(#)s_tan.c 1.3 95/01/18 33 */ 34 35#include "jerry-libm-internal.h" 36 37#define zero 0.00000000000000000000e+00 /* 0x00000000, 0x00000000 */ 38#define half 5.00000000000000000000e-01 /* 0x3FE00000, 0x00000000 */ 39#define one 1.00000000000000000000e+00 /* 0x3FF00000, 0x00000000 */ 40#define two24 1.67772160000000000000e+07 /* 0x41700000, 0x00000000 */ 41#define twon24 5.96046447753906250000e-08 /* 0x3E700000, 0x00000000 */ 42 43/* __kernel_rem_pio2(x,y,e0,nx,prec) 44 * double x[],y[]; int e0,nx,prec; 45 * 46 * __kernel_rem_pio2 return the last three digits of N with 47 * y = x - N*pi/2 48 * so that |y| < pi/2. 49 * 50 * The method is to compute the integer (mod 8) and fraction parts of 51 * (2/pi)*x without doing the full multiplication. In general we 52 * skip the part of the product that are known to be a huge integer ( 53 * more accurately, = 0 mod 8 ). Thus the number of operations are 54 * independent of the exponent of the input. 55 * 56 * (2/pi) is represented by an array of 24-bit integers in ipio2[]. 57 * 58 * Input parameters: 59 * x[] The input value (must be positive) is broken into nx 60 * pieces of 24-bit integers in double precision format. 61 * x[i] will be the i-th 24 bit of x. The scaled exponent 62 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 63 * match x's up to 24 bits. 64 * 65 * Example of breaking a double positive z into x[0]+x[1]+x[2]: 66 * e0 = ilogb(z)-23 67 * z = scalbn(z,-e0) 68 * for i = 0,1,2 69 * x[i] = floor(z) 70 * z = (z-x[i])*2**24 71 * 72 * y[] ouput result in an array of double precision numbers. 73 * The dimension of y[] is: 74 * 24-bit precision 1 75 * 53-bit precision 2 76 * 64-bit precision 2 77 * 113-bit precision 3 78 * The actual value is the sum of them. Thus for 113-bit 79 * precison, one may have to do something like: 80 * 81 * long double t,w,r_head, r_tail; 82 * t = (long double)y[2] + (long double)y[1]; 83 * w = (long double)y[0]; 84 * r_head = t+w; 85 * r_tail = w - (r_head - t); 86 * 87 * e0 The exponent of x[0] 88 * 89 * nx dimension of x[] 90 * 91 * prec an integer indicating the precision: 92 * 0 24 bits (single) 93 * 1 53 bits (double) 94 * 2 64 bits (extended) 95 * 3 113 bits (quad) 96 * 97 * External function: 98 * double scalbn(), floor(); 99 * 100 * Here is the description of some local variables: 101 * 102 * ipio2[] integer array, contains the (24*i)-th to (24*i+23)-th 103 * bit of 2/pi after binary point. The corresponding 104 * floating value is 105 * 106 * ipio2[i] * 2^(-24(i+1)). 107 * 108 * jk jk+1 is the initial number of terms of ipio2[] needed 109 * in the computation. The recommended value is 2,3,4, 110 * 6 for single, double, extended,and quad. 111 * 112 * jz local integer variable indicating the number of 113 * terms of ipio2[] used. 114 * 115 * jx nx - 1 116 * 117 * jv index for pointing to the suitable ipio2[] for the 118 * computation. In general, we want 119 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 120 * is an integer. Thus 121 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv 122 * Hence jv = max(0,(e0-3)/24). 123 * 124 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. 125 * 126 * q[] double array with integral value, representing the 127 * 24-bits chunk of the product of x and 2/pi. 128 * 129 * q0 the corresponding exponent of q[0]. Note that the 130 * exponent for q[i] would be q0-24*i. 131 * 132 * PIo2[] double precision array, obtained by cutting pi/2 133 * into 24 bits chunks. 134 * 135 * f[] ipio2[] in floating point 136 * 137 * iq[] integer array by breaking up q[] in 24-bits chunk. 138 * 139 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] 140 * 141 * ih integer. If >0 it indicates q[] is >= 0.5, hence 142 * it also indicates the *sign* of the result. 143 */ 144 145/* 146 * Constants: 147 * The hexadecimal values are the intended ones for the following 148 * constants. The decimal values may be used, provided that the 149 * compiler will convert from decimal to binary accurately enough 150 * to produce the hexadecimal values shown. 151 */ 152 153/* initial value for jk */ 154static const int init_jk[] = 155{ 156 2, 3, 4, 6 157}; 158 159static const double PIo2[] = 160{ 161 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */ 162 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */ 163 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */ 164 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */ 165 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */ 166 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */ 167 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */ 168 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */ 169}; 170 171/* 172 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi 173 */ 174static const int ipio2[] = 175{ 176 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62, 177 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A, 178 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129, 179 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41, 180 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8, 181 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF, 182 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5, 183 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08, 184 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3, 185 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880, 186 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B, 187}; 188 189static int 190__kernel_rem_pio2 (double *x, double *y, int e0, int nx, int prec) 191{ 192 int jz, jx, jv, jp, jk, carry, n, iq[20], i, j, k, m, q0, ih; 193 double z, fw, f[20], fq[20], q[20]; 194 195 /* initialize jk */ 196 jk = init_jk[prec]; 197 jp = jk; 198 199 /* determine jx, jv, q0, note that 3 > q0 */ 200 jx = nx - 1; 201 jv = (e0 - 3) / 24; 202 if (jv < 0) 203 { 204 jv = 0; 205 } 206 q0 = e0 - 24 * (jv + 1); 207 208 /* set up f[0] to f[jx + jk] where f[jx + jk] = ipio2[jv + jk] */ 209 j = jv - jx; 210 m = jx + jk; 211 for (i = 0; i <= m; i++, j++) 212 { 213 f[i] = (j < 0) ? zero : (double) ipio2[j]; 214 } 215 216 /* compute q[0], q[1], ... q[jk] */ 217 for (i = 0; i <= jk; i++) 218 { 219 for (j = 0, fw = 0.0; j <= jx; j++) 220 { 221 fw += x[j] * f[jx + i - j]; 222 } 223 q[i] = fw; 224 } 225 226 jz = jk; 227recompute: 228 /* distill q[] into iq[] reversingly */ 229 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) 230 { 231 fw = (double) ((int) (twon24 * z)); 232 iq[i] = (int) (z - two24 * fw); 233 z = q[j - 1] + fw; 234 } 235 236 /* compute n */ 237 z = scalbn (z, q0); /* actual value of z */ 238 z -= 8.0 * floor (z * 0.125); /* trim off integer >= 8 */ 239 n = (int) z; 240 z -= (double) n; 241 ih = 0; 242 if (q0 > 0) /* need iq[jz - 1] to determine n */ 243 { 244 i = (iq[jz - 1] >> (24 - q0)); 245 n += i; 246 iq[jz - 1] -= i << (24 - q0); 247 ih = iq[jz - 1] >> (23 - q0); 248 } 249 else if (q0 == 0) 250 { 251 ih = iq[jz - 1] >> 23; 252 } 253 else if (z >= 0.5) 254 { 255 ih = 2; 256 } 257 258 if (ih > 0) /* q > 0.5 */ 259 { 260 n += 1; 261 carry = 0; 262 for (i = 0; i < jz; i++) /* compute 1 - q */ 263 { 264 j = iq[i]; 265 if (carry == 0) 266 { 267 if (j != 0) 268 { 269 carry = 1; 270 iq[i] = 0x1000000 - j; 271 } 272 } 273 else 274 { 275 iq[i] = 0xffffff - j; 276 } 277 } 278 if (q0 > 0) /* rare case: chance is 1 in 12 */ 279 { 280 switch (q0) 281 { 282 case 1: 283 { 284 iq[jz - 1] &= 0x7fffff; 285 break; 286 } 287 case 2: 288 { 289 iq[jz - 1] &= 0x3fffff; 290 break; 291 } 292 } 293 } 294 if (ih == 2) 295 { 296 z = one - z; 297 if (carry != 0) 298 { 299 z -= scalbn (one, q0); 300 } 301 } 302 } 303 304 /* check if recomputation is needed */ 305 if (z == zero) 306 { 307 j = 0; 308 for (i = jz - 1; i >= jk; i--) 309 { 310 j |= iq[i]; 311 } 312 if (j == 0) /* need recomputation */ 313 { 314 for (k = 1; iq[jk - k] == 0; k++) /* k = no. of terms needed */ 315 { 316 } 317 318 for (i = jz + 1; i <= jz + k; i++) /* add q[jz + 1] to q[jz + k] */ 319 { 320 f[jx + i] = (double) ipio2[jv + i]; 321 for (j = 0, fw = 0.0; j <= jx; j++) 322 { 323 fw += x[j] * f[jx + i - j]; 324 } 325 q[i] = fw; 326 } 327 jz += k; 328 goto recompute; 329 } 330 } 331 332 /* chop off zero terms */ 333 if (z == 0.0) 334 { 335 jz -= 1; 336 q0 -= 24; 337 while (iq[jz] == 0) 338 { 339 jz--; 340 q0 -= 24; 341 } 342 } 343 else 344 { /* break z into 24-bit if necessary */ 345 z = scalbn (z, -q0); 346 if (z >= two24) 347 { 348 fw = (double) ((int) (twon24 * z)); 349 iq[jz] = (int) (z - two24 * fw); 350 jz += 1; 351 q0 += 24; 352 iq[jz] = (int) fw; 353 } 354 else 355 { 356 iq[jz] = (int) z; 357 } 358 } 359 360 /* convert integer "bit" chunk to floating-point value */ 361 fw = scalbn (one, q0); 362 for (i = jz; i >= 0; i--) 363 { 364 q[i] = fw * (double) iq[i]; 365 fw *= twon24; 366 } 367 368 /* compute PIo2[0, ..., jp] * q[jz, ..., 0] */ 369 for (i = jz; i >= 0; i--) 370 { 371 for (fw = 0.0, k = 0; k <= jp && k <= jz - i; k++) 372 { 373 fw += PIo2[k] * q[i + k]; 374 } 375 fq[jz - i] = fw; 376 } 377 378 /* compress fq[] into y[] */ 379 switch (prec) 380 { 381 case 0: 382 { 383 fw = 0.0; 384 for (i = jz; i >= 0; i--) 385 { 386 fw += fq[i]; 387 } 388 y[0] = (ih == 0) ? fw : -fw; 389 break; 390 } 391 case 1: 392 case 2: 393 { 394 fw = 0.0; 395 for (i = jz; i >= 0; i--) 396 { 397 fw += fq[i]; 398 } 399 y[0] = (ih == 0) ? fw : -fw; 400 fw = fq[0] - fw; 401 for (i = 1; i <= jz; i++) 402 { 403 fw += fq[i]; 404 } 405 y[1] = (ih == 0) ? fw : -fw; 406 break; 407 } 408 case 3: /* painful */ 409 { 410 for (i = jz; i > 0; i--) 411 { 412 fw = fq[i - 1] + fq[i]; 413 fq[i] += fq[i - 1] - fw; 414 fq[i - 1] = fw; 415 } 416 for (i = jz; i > 1; i--) 417 { 418 fw = fq[i - 1] + fq[i]; 419 fq[i] += fq[i - 1] - fw; 420 fq[i - 1] = fw; 421 } 422 for (fw = 0.0, i = jz; i >= 2; i--) 423 { 424 fw += fq[i]; 425 } 426 if (ih == 0) 427 { 428 y[0] = fq[0]; 429 y[1] = fq[1]; 430 y[2] = fw; 431 } 432 else 433 { 434 y[0] = -fq[0]; 435 y[1] = -fq[1]; 436 y[2] = -fw; 437 } 438 } 439 } 440 return n & 7; 441} /* __kernel_rem_pio2 */ 442 443/* __ieee754_rem_pio2(x,y) 444 * return the remainder of x rem pi/2 in y[0]+y[1] 445 * use __kernel_rem_pio2() 446 */ 447 448static const int npio2_hw[] = 449{ 450 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, 451 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, 452 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, 453 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, 454 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, 455 0x404858EB, 0x404921FB, 456}; 457 458/* 459 * invpio2: 53 bits of 2/pi 460 * pio2_1: first 33 bit of pi/2 461 * pio2_1t: pi/2 - pio2_1 462 * pio2_2: second 33 bit of pi/2 463 * pio2_2t: pi/2 - (pio2_1 + pio2_2) 464 * pio2_3: third 33 bit of pi/2 465 * pio2_3t: pi/2 - (pio2_1 + pio2_2 + pio2_3) 466 */ 467#define invpio2 6.36619772367581382433e-01 /* 0x3FE45F30, 0x6DC9C883 */ 468#define pio2_1 1.57079632673412561417e+00 /* 0x3FF921FB, 0x54400000 */ 469#define pio2_1t 6.07710050650619224932e-11 /* 0x3DD0B461, 0x1A626331 */ 470#define pio2_2 6.07710050630396597660e-11 /* 0x3DD0B461, 0x1A600000 */ 471#define pio2_2t 2.02226624879595063154e-21 /* 0x3BA3198A, 0x2E037073 */ 472#define pio2_3 2.02226624871116645580e-21 /* 0x3BA3198A, 0x2E000000 */ 473#define pio2_3t 8.47842766036889956997e-32 /* 0x397B839A, 0x252049C1 */ 474 475static int 476__ieee754_rem_pio2 (double x, double *y) 477{ 478 double_accessor z; 479 double w, t, r, fn; 480 double tx[3]; 481 int e0, i, j, nx, n, ix, hx; 482 483 hx = __HI (x); /* high word of x */ 484 ix = hx & 0x7fffffff; 485 if (ix <= 0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */ 486 { 487 y[0] = x; 488 y[1] = 0; 489 return 0; 490 } 491 if (ix < 0x4002d97c) /* |x| < 3pi/4, special case with n = +-1 */ 492 { 493 if (hx > 0) 494 { 495 z.dbl = x - pio2_1; 496 if (ix != 0x3ff921fb) /* 33 + 53 bit pi is good enough */ 497 { 498 y[0] = z.dbl - pio2_1t; 499 y[1] = (z.dbl - y[0]) - pio2_1t; 500 } 501 else /* near pi/2, use 33 + 33 + 53 bit pi */ 502 { 503 z.dbl -= pio2_2; 504 y[0] = z.dbl - pio2_2t; 505 y[1] = (z.dbl - y[0]) - pio2_2t; 506 } 507 return 1; 508 } 509 else /* negative x */ 510 { 511 z.dbl = x + pio2_1; 512 if (ix != 0x3ff921fb) /* 33 + 53 bit pi is good enough */ 513 { 514 y[0] = z.dbl + pio2_1t; 515 y[1] = (z.dbl - y[0]) + pio2_1t; 516 } 517 else /* near pi/2, use 33 + 33 + 53 bit pi */ 518 { 519 z.dbl += pio2_2; 520 y[0] = z.dbl + pio2_2t; 521 y[1] = (z.dbl - y[0]) + pio2_2t; 522 } 523 return -1; 524 } 525 } 526 if (ix <= 0x413921fb) /* |x| ~<= 2^19 * (pi/2), medium size */ 527 { 528 t = fabs (x); 529 n = (int) (t * invpio2 + half); 530 fn = (double) n; 531 r = t - fn * pio2_1; 532 w = fn * pio2_1t; /* 1st round good to 85 bit */ 533 if (n < 32 && ix != npio2_hw[n - 1]) 534 { 535 y[0] = r - w; /* quick check no cancellation */ 536 } 537 else 538 { 539 j = ix >> 20; 540 y[0] = r - w; 541 i = j - (((__HI (y[0])) >> 20) & 0x7ff); 542 if (i > 16) /* 2nd iteration needed, good to 118 */ 543 { 544 t = r; 545 w = fn * pio2_2; 546 r = t - w; 547 w = fn * pio2_2t - ((t - r) - w); 548 y[0] = r - w; 549 i = j - (((__HI (y[0])) >> 20) & 0x7ff); 550 if (i > 49) /* 3rd iteration need, 151 bits acc, will cover all possible cases */ 551 { 552 t = r; 553 w = fn * pio2_3; 554 r = t - w; 555 w = fn * pio2_3t - ((t - r) - w); 556 y[0] = r - w; 557 } 558 } 559 } 560 y[1] = (r - y[0]) - w; 561 if (hx < 0) 562 { 563 y[0] = -y[0]; 564 y[1] = -y[1]; 565 return -n; 566 } 567 else 568 { 569 return n; 570 } 571 } 572 /* 573 * all other (large) arguments 574 */ 575 if (ix >= 0x7ff00000) /* x is inf or NaN */ 576 { 577 y[0] = y[1] = x - x; 578 return 0; 579 } 580 /* set z = scalbn(|x|, ilogb(x) - 23) */ 581 z.as_int.lo = __LO (x); 582 e0 = (ix >> 20) - 1046; /* e0 = ilogb(z) - 23; */ 583 z.as_int.hi = ix - (e0 << 20); 584 for (i = 0; i < 2; i++) 585 { 586 tx[i] = (double) ((int) (z.dbl)); 587 z.dbl = (z.dbl - tx[i]) * two24; 588 } 589 tx[2] = z.dbl; 590 nx = 3; 591 while (tx[nx - 1] == zero) /* skip zero term */ 592 { 593 nx--; 594 } 595 n = __kernel_rem_pio2 (tx, y, e0, nx, 2); 596 if (hx < 0) 597 { 598 y[0] = -y[0]; 599 y[1] = -y[1]; 600 return -n; 601 } 602 return n; 603} /* __ieee754_rem_pio2 */ 604 605/* __kernel_sin( x, y, iy) 606 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 607 * Input x is assumed to be bounded by ~pi/4 in magnitude. 608 * Input y is the tail of x. 609 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). 610 * 611 * Algorithm 612 * 1. Since sin(-x) = -sin(x), we need only to consider positive x. 613 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. 614 * 3. sin(x) is approximated by a polynomial of degree 13 on 615 * [0,pi/4] 616 * 3 13 617 * sin(x) ~ x + S1*x + ... + S6*x 618 * where 619 * 620 * |sin(x) 2 4 6 8 10 12 | -58 621 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 622 * | x | 623 * 624 * 4. sin(x+y) = sin(x) + sin'(x')*y 625 * ~ sin(x) + (1-x*x/2)*y 626 * For better accuracy, let 627 * 3 2 2 2 2 628 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) 629 * then 3 2 630 * sin(x) = x + (S1*x + (x *(r-y/2)+y)) 631 */ 632 633#define S1 -1.66666666666666324348e-01 /* 0xBFC55555, 0x55555549 */ 634#define S2 8.33333333332248946124e-03 /* 0x3F811111, 0x1110F8A6 */ 635#define S3 -1.98412698298579493134e-04 /* 0xBF2A01A0, 0x19C161D5 */ 636#define S4 2.75573137070700676789e-06 /* 0x3EC71DE3, 0x57B1FE7D */ 637#define S5 -2.50507602534068634195e-08 /* 0xBE5AE5E6, 0x8A2B9CEB */ 638#define S6 1.58969099521155010221e-10 /* 0x3DE5D93A, 0x5ACFD57C */ 639 640static double 641__kernel_sin (double x, double y, int iy) 642{ 643 double z, r, v; 644 int ix; 645 646 ix = __HI (x) & 0x7fffffff; /* high word of x */ 647 if (ix < 0x3e400000) /* |x| < 2**-27 */ 648 { 649 if ((int) x == 0) 650 { 651 return x; /* generate inexact */ 652 } 653 } 654 z = x * x; 655 v = z * x; 656 r = S2 + z * (S3 + z * (S4 + z * (S5 + z * S6))); 657 if (iy == 0) 658 { 659 return x + v * (S1 + z * r); 660 } 661 else 662 { 663 return x - ((z * (half * y - v * r) - y) - v * S1); 664 } 665} /* __kernel_sin */ 666 667/* 668 * __kernel_cos( x, y ) 669 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 670 * Input x is assumed to be bounded by ~pi/4 in magnitude. 671 * Input y is the tail of x. 672 * 673 * Algorithm 674 * 1. Since cos(-x) = cos(x), we need only to consider positive x. 675 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. 676 * 3. cos(x) is approximated by a polynomial of degree 14 on 677 * [0,pi/4] 678 * 4 14 679 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x 680 * where the remez error is 681 * 682 * | 2 4 6 8 10 12 14 | -58 683 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 684 * | | 685 * 686 * 4 6 8 10 12 14 687 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then 688 * cos(x) = 1 - x*x/2 + r 689 * since cos(x+y) ~ cos(x) - sin(x)*y 690 * ~ cos(x) - x*y, 691 * a correction term is necessary in cos(x) and hence 692 * cos(x+y) = 1 - (x*x/2 - (r - x*y)) 693 * For better accuracy when x > 0.3, let qx = |x|/4 with 694 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. 695 * Then 696 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). 697 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the 698 * magnitude of the latter is at least a quarter of x*x/2, 699 * thus, reducing the rounding error in the subtraction. 700 */ 701 702#define C1 4.16666666666666019037e-02 /* 0x3FA55555, 0x5555554C */ 703#define C2 -1.38888888888741095749e-03 /* 0xBF56C16C, 0x16C15177 */ 704#define C3 2.48015872894767294178e-05 /* 0x3EFA01A0, 0x19CB1590 */ 705#define C4 -2.75573143513906633035e-07 /* 0xBE927E4F, 0x809C52AD */ 706#define C5 2.08757232129817482790e-09 /* 0x3E21EE9E, 0xBDB4B1C4 */ 707#define C6 -1.13596475577881948265e-11 /* 0xBDA8FAE9, 0xBE8838D4 */ 708 709static double 710__kernel_cos (double x, double y) 711{ 712 double a, hz, z, r; 713 int ix; 714 715 ix = __HI (x) & 0x7fffffff; /* ix = |x|'s high word */ 716 if (ix < 0x3e400000) /* if x < 2**27 */ 717 { 718 if (((int) x) == 0) 719 { 720 return one; /* generate inexact */ 721 } 722 } 723 z = x * x; 724 r = z * (C1 + z * (C2 + z * (C3 + z * (C4 + z * (C5 + z * C6))))); 725 if (ix < 0x3FD33333) /* if |x| < 0.3 */ 726 { 727 return one - (0.5 * z - (z * r - x * y)); 728 } 729 else 730 { 731 double_accessor qx; 732 if (ix > 0x3fe90000) /* x > 0.78125 */ 733 { 734 qx.dbl = 0.28125; 735 } 736 else 737 { 738 qx.as_int.hi = ix - 0x00200000; /* x / 4 */ 739 qx.as_int.lo = 0; 740 } 741 hz = 0.5 * z - qx.dbl; 742 a = one - qx.dbl; 743 return a - (hz - (z * r - x * y)); 744 } 745} /* __kernel_cos */ 746 747/* __kernel_tan( x, y, k ) 748 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854 749 * Input x is assumed to be bounded by ~pi/4 in magnitude. 750 * Input y is the tail of x. 751 * Input k indicates whether tan (if k = 1) or -1/tan (if k = -1) is returned. 752 * 753 * Algorithm 754 * 1. Since tan(-x) = -tan(x), we need only to consider positive x. 755 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0. 756 * 3. tan(x) is approximated by a odd polynomial of degree 27 on 757 * [0,0.67434] 758 * 3 27 759 * tan(x) ~ x + T1*x + ... + T13*x 760 * where 761 * 762 * |tan(x) 2 4 26 | -59.2 763 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2 764 * | x | 765 * 766 * Note: tan(x+y) = tan(x) + tan'(x)*y 767 * ~ tan(x) + (1+x*x)*y 768 * Therefore, for better accuracy in computing tan(x+y), let 769 * 3 2 2 2 2 770 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13)))) 771 * then 772 * 3 2 773 * tan(x+y) = x + (T1*x + (x *(r+y)+y)) 774 * 775 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then 776 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y)) 777 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y))) 778 */ 779 780#define T0 3.33333333333334091986e-01 /* 3FD55555, 55555563 */ 781#define T1 1.33333333333201242699e-01 /* 3FC11111, 1110FE7A */ 782#define T2 5.39682539762260521377e-02 /* 3FABA1BA, 1BB341FE */ 783#define T3 2.18694882948595424599e-02 /* 3F9664F4, 8406D637 */ 784#define T4 8.86323982359930005737e-03 /* 3F8226E3, E96E8493 */ 785#define T5 3.59207910759131235356e-03 /* 3F6D6D22, C9560328 */ 786#define T6 1.45620945432529025516e-03 /* 3F57DBC8, FEE08315 */ 787#define T7 5.88041240820264096874e-04 /* 3F4344D8, F2F26501 */ 788#define T8 2.46463134818469906812e-04 /* 3F3026F7, 1A8D1068 */ 789#define T9 7.81794442939557092300e-05 /* 3F147E88, A03792A6 */ 790#define T10 7.14072491382608190305e-05 /* 3F12B80F, 32F0A7E9 */ 791#define T11 -1.85586374855275456654e-05 /* BEF375CB, DB605373 */ 792#define T12 2.59073051863633712884e-05 /* 3EFB2A70, 74BF7AD4 */ 793#define pio4 7.85398163397448278999e-01 /* 3FE921FB, 54442D18 */ 794#define pio4lo 3.06161699786838301793e-17 /* 3C81A626, 33145C07 */ 795 796static double 797__kernel_tan (double x, double y, int iy) 798{ 799 double_accessor z; 800 double r, v, w, s; 801 int ix, hx; 802 803 hx = __HI (x); /* high word of x */ 804 ix = hx & 0x7fffffff; /* high word of |x| */ 805 if (ix < 0x3e300000) /* x < 2**-28 */ 806 { 807 if ((int) x == 0) /* generate inexact */ 808 { 809 if (((ix | __LO (x)) | (iy + 1)) == 0) 810 { 811 return one / fabs (x); 812 } 813 else 814 { 815 if (iy == 1) 816 { 817 return x; 818 } 819 else /* compute -1 / (x + y) carefully */ 820 { 821 double a; 822 double_accessor t; 823 824 z.dbl = w = x + y; 825 z.as_int.lo = 0; 826 v = y - (z.dbl - x); 827 t.dbl = a = -one / w; 828 t.as_int.lo = 0; 829 s = one + t.dbl * z.dbl; 830 return t.dbl + a * (s + t.dbl * v); 831 } 832 } 833 } 834 } 835 if (ix >= 0x3FE59428) /* |x| >= 0.6744 */ 836 { 837 if (hx < 0) 838 { 839 x = -x; 840 y = -y; 841 } 842 z.dbl = pio4 - x; 843 w = pio4lo - y; 844 x = z.dbl + w; 845 y = 0.0; 846 } 847 z.dbl = x * x; 848 w = z.dbl * z.dbl; 849 /* 850 * Break x^5 * (T[1] + x^2 * T[2] + ...) into 851 * x^5 (T[1] + x^4 * T[3] + ... + x^20 * T[11]) + 852 * x^5 (x^2 * (T[2] + x^4 * T[4] + ... + x^22 * [T12])) 853 */ 854 r = T1 + w * (T3 + w * (T5 + w * (T7 + w * (T9 + w * T11)))); 855 v = z.dbl * (T2 + w * (T4 + w * (T6 + w * (T8 + w * (T10 + w * T12))))); 856 s = z.dbl * x; 857 r = y + z.dbl * (s * (r + v) + y); 858 r += T0 * s; 859 w = x + r; 860 if (ix >= 0x3FE59428) 861 { 862 v = (double) iy; 863 return (double) (1 - ((hx >> 30) & 2)) * (v - 2.0 * (x - (w * w / (w + v) - r))); 864 } 865 if (iy == 1) 866 { 867 return w; 868 } 869 else 870 { 871 /* 872 * if allow error up to 2 ulp, simply return 873 * -1.0 / (x + r) here 874 */ 875 /* compute -1.0 / (x + r) accurately */ 876 double a; 877 double_accessor t; 878 879 z.dbl = w; 880 z.as_int.lo = 0; 881 v = r - (z.dbl - x); /* z + v = r + x */ 882 t.dbl = a = -1.0 / w; /* a = -1.0 / w */ 883 t.as_int.lo = 0; 884 s = 1.0 + t.dbl * z.dbl; 885 return t.dbl + a * (s + t.dbl * v); 886 } 887} /* __kernel_tan */ 888 889/* Method: 890 * Let S,C and T denote the sin, cos and tan respectively on 891 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2 892 * in [-pi/4 , +pi/4], and let n = k mod 4. 893 * We have 894 * 895 * n sin(x) cos(x) tan(x) 896 * ---------------------------------------------------------- 897 * 0 S C T 898 * 1 C -S -1/T 899 * 2 -S -C T 900 * 3 -C S -1/T 901 * ---------------------------------------------------------- 902 * 903 * Special cases: 904 * Let trig be any of sin, cos, or tan. 905 * trig(+-INF) is NaN, with signals; 906 * trig(NaN) is that NaN; 907 * 908 * Accuracy: 909 * TRIG(x) returns trig(x) nearly rounded 910 */ 911 912/* sin(x) 913 * Return sine function of x. 914 * 915 * kernel function: 916 * __kernel_sin ... sine function on [-pi/4,pi/4] 917 * __kernel_cos ... cose function on [-pi/4,pi/4] 918 * __ieee754_rem_pio2 ... argument reduction routine 919 */ 920double 921sin (double x) 922{ 923 double y[2], z = 0.0; 924 int n, ix; 925 926 /* High word of x. */ 927 ix = __HI (x); 928 929 /* |x| ~< pi/4 */ 930 ix &= 0x7fffffff; 931 if (ix <= 0x3fe921fb) 932 { 933 return __kernel_sin (x, z, 0); 934 } 935 936 /* sin(Inf or NaN) is NaN */ 937 else if (ix >= 0x7ff00000) 938 { 939 return x - x; 940 } 941 942 /* argument reduction needed */ 943 else 944 { 945 n = __ieee754_rem_pio2 (x, y); 946 switch (n & 3) 947 { 948 case 0: 949 { 950 return __kernel_sin (y[0], y[1], 1); 951 } 952 case 1: 953 { 954 return __kernel_cos (y[0], y[1]); 955 } 956 case 2: 957 { 958 return -__kernel_sin (y[0], y[1], 1); 959 } 960 default: 961 { 962 return -__kernel_cos (y[0], y[1]); 963 } 964 } 965 } 966} /* sin */ 967 968/* cos(x) 969 * Return cosine function of x. 970 * 971 * kernel function: 972 * __kernel_sin ... sine function on [-pi/4,pi/4] 973 * __kernel_cos ... cosine function on [-pi/4,pi/4] 974 * __ieee754_rem_pio2 ... argument reduction routine 975 */ 976 977double 978cos (double x) 979{ 980 double y[2], z = 0.0; 981 int n, ix; 982 983 /* High word of x. */ 984 ix = __HI (x); 985 986 /* |x| ~< pi/4 */ 987 ix &= 0x7fffffff; 988 if (ix <= 0x3fe921fb) 989 { 990 return __kernel_cos (x, z); 991 } 992 993 /* cos(Inf or NaN) is NaN */ 994 else if (ix >= 0x7ff00000) 995 { 996 return x - x; 997 } 998 999 /* argument reduction needed */ 1000 else 1001 { 1002 n = __ieee754_rem_pio2 (x, y); 1003 switch (n & 3) 1004 { 1005 case 0: 1006 { 1007 return __kernel_cos (y[0], y[1]); 1008 } 1009 case 1: 1010 { 1011 return -__kernel_sin (y[0], y[1], 1); 1012 } 1013 case 2: 1014 { 1015 return -__kernel_cos (y[0], y[1]); 1016 } 1017 default: 1018 { 1019 return __kernel_sin (y[0], y[1], 1); 1020 } 1021 } 1022 } 1023} /* cos */ 1024 1025/* tan(x) 1026 * Return tangent function of x. 1027 * 1028 * kernel function: 1029 * __kernel_tan ... tangent function on [-pi/4,pi/4] 1030 * __ieee754_rem_pio2 ... argument reduction routine 1031 */ 1032 1033double 1034tan (double x) 1035{ 1036 double y[2], z = 0.0; 1037 int n, ix; 1038 1039 /* High word of x. */ 1040 ix = __HI (x); 1041 1042 /* |x| ~< pi/4 */ 1043 ix &= 0x7fffffff; 1044 if (ix <= 0x3fe921fb) 1045 { 1046 return __kernel_tan (x, z, 1); 1047 } 1048 1049 /* tan(Inf or NaN) is NaN */ 1050 else if (ix >= 0x7ff00000) 1051 { 1052 return x - x; /* NaN */ 1053 } 1054 1055 /* argument reduction needed */ 1056 else 1057 { 1058 n = __ieee754_rem_pio2 (x, y); 1059 return __kernel_tan (y[0], y[1], 1 - ((n & 1) << 1)); /* 1 -- n even, -1 -- n odd */ 1060 } 1061} /* tan */ 1062 1063#undef zero 1064#undef half 1065#undef one 1066#undef two24 1067#undef twon24 1068#undef invpio2 1069#undef pio2_1 1070#undef pio2_1t 1071#undef pio2_2 1072#undef pio2_2t 1073#undef pio2_3 1074#undef pio2_3t 1075#undef S1 1076#undef S2 1077#undef S3 1078#undef S4 1079#undef S5 1080#undef S6 1081#undef C1 1082#undef C2 1083#undef C3 1084#undef C4 1085#undef C5 1086#undef C6 1087#undef T0 1088#undef T1 1089#undef T2 1090#undef T3 1091#undef T4 1092#undef T5 1093#undef T6 1094#undef T7 1095#undef T8 1096#undef T9 1097#undef T10 1098#undef T11 1099#undef T12 1100#undef pio4 1101#undef pio4lo 1102