162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci=============================================================================== 362306a36Sopenharmony_ci 462306a36Sopenharmony_ciThis C source file is part of the SoftFloat IEC/IEEE Floating-point 562306a36Sopenharmony_ciArithmetic Package, Release 2. 662306a36Sopenharmony_ci 762306a36Sopenharmony_ciWritten by John R. Hauser. This work was made possible in part by the 862306a36Sopenharmony_ciInternational Computer Science Institute, located at Suite 600, 1947 Center 962306a36Sopenharmony_ciStreet, Berkeley, California 94704. Funding was partially provided by the 1062306a36Sopenharmony_ciNational Science Foundation under grant MIP-9311980. The original version 1162306a36Sopenharmony_ciof this code was written as part of a project to build a fixed-point vector 1262306a36Sopenharmony_ciprocessor in collaboration with the University of California at Berkeley, 1362306a36Sopenharmony_cioverseen by Profs. Nelson Morgan and John Wawrzynek. More information 1462306a36Sopenharmony_ciis available through the web page 1562306a36Sopenharmony_cihttp://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ciTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 1862306a36Sopenharmony_cihas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 1962306a36Sopenharmony_ciTIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 2062306a36Sopenharmony_ciPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 2162306a36Sopenharmony_ciAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ciDerivative works are acceptable, even for commercial purposes, so long as 2462306a36Sopenharmony_ci(1) they include prominent notice that the work is derivative, and (2) they 2562306a36Sopenharmony_ciinclude prominent notice akin to these three paragraphs for those parts of 2662306a36Sopenharmony_cithis code that are retained. 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci=============================================================================== 2962306a36Sopenharmony_ci*/ 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci#include <asm/div64.h> 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#include "fpa11.h" 3462306a36Sopenharmony_ci//#include "milieu.h" 3562306a36Sopenharmony_ci//#include "softfloat.h" 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* 3862306a36Sopenharmony_ci------------------------------------------------------------------------------- 3962306a36Sopenharmony_ciPrimitive arithmetic functions, including multi-word arithmetic, and 4062306a36Sopenharmony_cidivision and square root approximations. (Can be specialized to target if 4162306a36Sopenharmony_cidesired.) 4262306a36Sopenharmony_ci------------------------------------------------------------------------------- 4362306a36Sopenharmony_ci*/ 4462306a36Sopenharmony_ci#include "softfloat-macros" 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci/* 4762306a36Sopenharmony_ci------------------------------------------------------------------------------- 4862306a36Sopenharmony_ciFunctions and definitions to determine: (1) whether tininess for underflow 4962306a36Sopenharmony_ciis detected before or after rounding by default, (2) what (if anything) 5062306a36Sopenharmony_cihappens when exceptions are raised, (3) how signaling NaNs are distinguished 5162306a36Sopenharmony_cifrom quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 5262306a36Sopenharmony_ciare propagated from function inputs to output. These details are target- 5362306a36Sopenharmony_cispecific. 5462306a36Sopenharmony_ci------------------------------------------------------------------------------- 5562306a36Sopenharmony_ci*/ 5662306a36Sopenharmony_ci#include "softfloat-specialize" 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci/* 5962306a36Sopenharmony_ci------------------------------------------------------------------------------- 6062306a36Sopenharmony_ciTakes a 64-bit fixed-point value `absZ' with binary point between bits 6 6162306a36Sopenharmony_ciand 7, and returns the properly rounded 32-bit integer corresponding to the 6262306a36Sopenharmony_ciinput. If `zSign' is nonzero, the input is negated before being converted 6362306a36Sopenharmony_cito an integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point 6462306a36Sopenharmony_ciinput is simply rounded to an integer, with the inexact exception raised if 6562306a36Sopenharmony_cithe input cannot be represented exactly as an integer. If the fixed-point 6662306a36Sopenharmony_ciinput is too large, however, the invalid exception is raised and the largest 6762306a36Sopenharmony_cipositive or negative integer is returned. 6862306a36Sopenharmony_ci------------------------------------------------------------------------------- 6962306a36Sopenharmony_ci*/ 7062306a36Sopenharmony_cistatic int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ ) 7162306a36Sopenharmony_ci{ 7262306a36Sopenharmony_ci int8 roundingMode; 7362306a36Sopenharmony_ci flag roundNearestEven; 7462306a36Sopenharmony_ci int8 roundIncrement, roundBits; 7562306a36Sopenharmony_ci int32 z; 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci roundingMode = roundData->mode; 7862306a36Sopenharmony_ci roundNearestEven = ( roundingMode == float_round_nearest_even ); 7962306a36Sopenharmony_ci roundIncrement = 0x40; 8062306a36Sopenharmony_ci if ( ! roundNearestEven ) { 8162306a36Sopenharmony_ci if ( roundingMode == float_round_to_zero ) { 8262306a36Sopenharmony_ci roundIncrement = 0; 8362306a36Sopenharmony_ci } 8462306a36Sopenharmony_ci else { 8562306a36Sopenharmony_ci roundIncrement = 0x7F; 8662306a36Sopenharmony_ci if ( zSign ) { 8762306a36Sopenharmony_ci if ( roundingMode == float_round_up ) roundIncrement = 0; 8862306a36Sopenharmony_ci } 8962306a36Sopenharmony_ci else { 9062306a36Sopenharmony_ci if ( roundingMode == float_round_down ) roundIncrement = 0; 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci } 9362306a36Sopenharmony_ci } 9462306a36Sopenharmony_ci roundBits = absZ & 0x7F; 9562306a36Sopenharmony_ci absZ = ( absZ + roundIncrement )>>7; 9662306a36Sopenharmony_ci absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 9762306a36Sopenharmony_ci z = absZ; 9862306a36Sopenharmony_ci if ( zSign ) z = - z; 9962306a36Sopenharmony_ci if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 10062306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 10162306a36Sopenharmony_ci return zSign ? 0x80000000 : 0x7FFFFFFF; 10262306a36Sopenharmony_ci } 10362306a36Sopenharmony_ci if ( roundBits ) roundData->exception |= float_flag_inexact; 10462306a36Sopenharmony_ci return z; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci} 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci/* 10962306a36Sopenharmony_ci------------------------------------------------------------------------------- 11062306a36Sopenharmony_ciReturns the fraction bits of the single-precision floating-point value `a'. 11162306a36Sopenharmony_ci------------------------------------------------------------------------------- 11262306a36Sopenharmony_ci*/ 11362306a36Sopenharmony_ciINLINE bits32 extractFloat32Frac( float32 a ) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci return a & 0x007FFFFF; 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci/* 12162306a36Sopenharmony_ci------------------------------------------------------------------------------- 12262306a36Sopenharmony_ciReturns the exponent bits of the single-precision floating-point value `a'. 12362306a36Sopenharmony_ci------------------------------------------------------------------------------- 12462306a36Sopenharmony_ci*/ 12562306a36Sopenharmony_ciINLINE int16 extractFloat32Exp( float32 a ) 12662306a36Sopenharmony_ci{ 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci return ( a>>23 ) & 0xFF; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci} 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci/* 13362306a36Sopenharmony_ci------------------------------------------------------------------------------- 13462306a36Sopenharmony_ciReturns the sign bit of the single-precision floating-point value `a'. 13562306a36Sopenharmony_ci------------------------------------------------------------------------------- 13662306a36Sopenharmony_ci*/ 13762306a36Sopenharmony_ci#if 0 /* in softfloat.h */ 13862306a36Sopenharmony_ciINLINE flag extractFloat32Sign( float32 a ) 13962306a36Sopenharmony_ci{ 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci return a>>31; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci} 14462306a36Sopenharmony_ci#endif 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci/* 14762306a36Sopenharmony_ci------------------------------------------------------------------------------- 14862306a36Sopenharmony_ciNormalizes the subnormal single-precision floating-point value represented 14962306a36Sopenharmony_ciby the denormalized significand `aSig'. The normalized exponent and 15062306a36Sopenharmony_cisignificand are stored at the locations pointed to by `zExpPtr' and 15162306a36Sopenharmony_ci`zSigPtr', respectively. 15262306a36Sopenharmony_ci------------------------------------------------------------------------------- 15362306a36Sopenharmony_ci*/ 15462306a36Sopenharmony_cistatic void 15562306a36Sopenharmony_ci normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 15662306a36Sopenharmony_ci{ 15762306a36Sopenharmony_ci int8 shiftCount; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci shiftCount = countLeadingZeros32( aSig ) - 8; 16062306a36Sopenharmony_ci *zSigPtr = aSig<<shiftCount; 16162306a36Sopenharmony_ci *zExpPtr = 1 - shiftCount; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci} 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci/* 16662306a36Sopenharmony_ci------------------------------------------------------------------------------- 16762306a36Sopenharmony_ciPacks the sign `zSign', exponent `zExp', and significand `zSig' into a 16862306a36Sopenharmony_cisingle-precision floating-point value, returning the result. After being 16962306a36Sopenharmony_cishifted into the proper positions, the three fields are simply added 17062306a36Sopenharmony_citogether to form the result. This means that any integer portion of `zSig' 17162306a36Sopenharmony_ciwill be added into the exponent. Since a properly normalized significand 17262306a36Sopenharmony_ciwill have an integer portion equal to 1, the `zExp' input should be 1 less 17362306a36Sopenharmony_cithan the desired result exponent whenever `zSig' is a complete, normalized 17462306a36Sopenharmony_cisignificand. 17562306a36Sopenharmony_ci------------------------------------------------------------------------------- 17662306a36Sopenharmony_ci*/ 17762306a36Sopenharmony_ciINLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci#if 0 18062306a36Sopenharmony_ci float32 f; 18162306a36Sopenharmony_ci __asm__("@ packFloat32 \n\ 18262306a36Sopenharmony_ci mov %0, %1, asl #31 \n\ 18362306a36Sopenharmony_ci orr %0, %2, asl #23 \n\ 18462306a36Sopenharmony_ci orr %0, %3" 18562306a36Sopenharmony_ci : /* no outputs */ 18662306a36Sopenharmony_ci : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig) 18762306a36Sopenharmony_ci : "cc"); 18862306a36Sopenharmony_ci return f; 18962306a36Sopenharmony_ci#else 19062306a36Sopenharmony_ci return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 19162306a36Sopenharmony_ci#endif 19262306a36Sopenharmony_ci} 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci/* 19562306a36Sopenharmony_ci------------------------------------------------------------------------------- 19662306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp', 19762306a36Sopenharmony_ciand significand `zSig', and returns the proper single-precision floating- 19862306a36Sopenharmony_cipoint value corresponding to the abstract input. Ordinarily, the abstract 19962306a36Sopenharmony_civalue is simply rounded and packed into the single-precision format, with 20062306a36Sopenharmony_cithe inexact exception raised if the abstract input cannot be represented 20162306a36Sopenharmony_ciexactly. If the abstract value is too large, however, the overflow and 20262306a36Sopenharmony_ciinexact exceptions are raised and an infinity or maximal finite value is 20362306a36Sopenharmony_cireturned. If the abstract value is too small, the input value is rounded to 20462306a36Sopenharmony_cia subnormal number, and the underflow and inexact exceptions are raised if 20562306a36Sopenharmony_cithe abstract input cannot be represented exactly as a subnormal single- 20662306a36Sopenharmony_ciprecision floating-point number. 20762306a36Sopenharmony_ci The input significand `zSig' has its binary point between bits 30 20862306a36Sopenharmony_ciand 29, which is 7 bits to the left of the usual location. This shifted 20962306a36Sopenharmony_cisignificand must be normalized or smaller. If `zSig' is not normalized, 21062306a36Sopenharmony_ci`zExp' must be 0; in that case, the result returned is a subnormal number, 21162306a36Sopenharmony_ciand it must not require rounding. In the usual case that `zSig' is 21262306a36Sopenharmony_cinormalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 21362306a36Sopenharmony_ciThe handling of underflow and overflow follows the IEC/IEEE Standard for 21462306a36Sopenharmony_ciBinary Floating-point Arithmetic. 21562306a36Sopenharmony_ci------------------------------------------------------------------------------- 21662306a36Sopenharmony_ci*/ 21762306a36Sopenharmony_cistatic float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig ) 21862306a36Sopenharmony_ci{ 21962306a36Sopenharmony_ci int8 roundingMode; 22062306a36Sopenharmony_ci flag roundNearestEven; 22162306a36Sopenharmony_ci int8 roundIncrement, roundBits; 22262306a36Sopenharmony_ci flag isTiny; 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci roundingMode = roundData->mode; 22562306a36Sopenharmony_ci roundNearestEven = ( roundingMode == float_round_nearest_even ); 22662306a36Sopenharmony_ci roundIncrement = 0x40; 22762306a36Sopenharmony_ci if ( ! roundNearestEven ) { 22862306a36Sopenharmony_ci if ( roundingMode == float_round_to_zero ) { 22962306a36Sopenharmony_ci roundIncrement = 0; 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci else { 23262306a36Sopenharmony_ci roundIncrement = 0x7F; 23362306a36Sopenharmony_ci if ( zSign ) { 23462306a36Sopenharmony_ci if ( roundingMode == float_round_up ) roundIncrement = 0; 23562306a36Sopenharmony_ci } 23662306a36Sopenharmony_ci else { 23762306a36Sopenharmony_ci if ( roundingMode == float_round_down ) roundIncrement = 0; 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci } 24062306a36Sopenharmony_ci } 24162306a36Sopenharmony_ci roundBits = zSig & 0x7F; 24262306a36Sopenharmony_ci if ( 0xFD <= (bits16) zExp ) { 24362306a36Sopenharmony_ci if ( ( 0xFD < zExp ) 24462306a36Sopenharmony_ci || ( ( zExp == 0xFD ) 24562306a36Sopenharmony_ci && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 24662306a36Sopenharmony_ci ) { 24762306a36Sopenharmony_ci roundData->exception |= float_flag_overflow | float_flag_inexact; 24862306a36Sopenharmony_ci return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 24962306a36Sopenharmony_ci } 25062306a36Sopenharmony_ci if ( zExp < 0 ) { 25162306a36Sopenharmony_ci isTiny = 25262306a36Sopenharmony_ci ( float_detect_tininess == float_tininess_before_rounding ) 25362306a36Sopenharmony_ci || ( zExp < -1 ) 25462306a36Sopenharmony_ci || ( zSig + roundIncrement < 0x80000000 ); 25562306a36Sopenharmony_ci shift32RightJamming( zSig, - zExp, &zSig ); 25662306a36Sopenharmony_ci zExp = 0; 25762306a36Sopenharmony_ci roundBits = zSig & 0x7F; 25862306a36Sopenharmony_ci if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; 25962306a36Sopenharmony_ci } 26062306a36Sopenharmony_ci } 26162306a36Sopenharmony_ci if ( roundBits ) roundData->exception |= float_flag_inexact; 26262306a36Sopenharmony_ci zSig = ( zSig + roundIncrement )>>7; 26362306a36Sopenharmony_ci zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 26462306a36Sopenharmony_ci if ( zSig == 0 ) zExp = 0; 26562306a36Sopenharmony_ci return packFloat32( zSign, zExp, zSig ); 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci/* 27062306a36Sopenharmony_ci------------------------------------------------------------------------------- 27162306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp', 27262306a36Sopenharmony_ciand significand `zSig', and returns the proper single-precision floating- 27362306a36Sopenharmony_cipoint value corresponding to the abstract input. This routine is just like 27462306a36Sopenharmony_ci`roundAndPackFloat32' except that `zSig' does not have to be normalized in 27562306a36Sopenharmony_ciany way. In all cases, `zExp' must be 1 less than the ``true'' floating- 27662306a36Sopenharmony_cipoint exponent. 27762306a36Sopenharmony_ci------------------------------------------------------------------------------- 27862306a36Sopenharmony_ci*/ 27962306a36Sopenharmony_cistatic float32 28062306a36Sopenharmony_ci normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig ) 28162306a36Sopenharmony_ci{ 28262306a36Sopenharmony_ci int8 shiftCount; 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci shiftCount = countLeadingZeros32( zSig ) - 1; 28562306a36Sopenharmony_ci return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount ); 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci/* 29062306a36Sopenharmony_ci------------------------------------------------------------------------------- 29162306a36Sopenharmony_ciReturns the fraction bits of the double-precision floating-point value `a'. 29262306a36Sopenharmony_ci------------------------------------------------------------------------------- 29362306a36Sopenharmony_ci*/ 29462306a36Sopenharmony_ciINLINE bits64 extractFloat64Frac( float64 a ) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci return a & LIT64( 0x000FFFFFFFFFFFFF ); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci} 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci/* 30262306a36Sopenharmony_ci------------------------------------------------------------------------------- 30362306a36Sopenharmony_ciReturns the exponent bits of the double-precision floating-point value `a'. 30462306a36Sopenharmony_ci------------------------------------------------------------------------------- 30562306a36Sopenharmony_ci*/ 30662306a36Sopenharmony_ciINLINE int16 extractFloat64Exp( float64 a ) 30762306a36Sopenharmony_ci{ 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci return ( a>>52 ) & 0x7FF; 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci} 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci/* 31462306a36Sopenharmony_ci------------------------------------------------------------------------------- 31562306a36Sopenharmony_ciReturns the sign bit of the double-precision floating-point value `a'. 31662306a36Sopenharmony_ci------------------------------------------------------------------------------- 31762306a36Sopenharmony_ci*/ 31862306a36Sopenharmony_ci#if 0 /* in softfloat.h */ 31962306a36Sopenharmony_ciINLINE flag extractFloat64Sign( float64 a ) 32062306a36Sopenharmony_ci{ 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci return a>>63; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci} 32562306a36Sopenharmony_ci#endif 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci/* 32862306a36Sopenharmony_ci------------------------------------------------------------------------------- 32962306a36Sopenharmony_ciNormalizes the subnormal double-precision floating-point value represented 33062306a36Sopenharmony_ciby the denormalized significand `aSig'. The normalized exponent and 33162306a36Sopenharmony_cisignificand are stored at the locations pointed to by `zExpPtr' and 33262306a36Sopenharmony_ci`zSigPtr', respectively. 33362306a36Sopenharmony_ci------------------------------------------------------------------------------- 33462306a36Sopenharmony_ci*/ 33562306a36Sopenharmony_cistatic void 33662306a36Sopenharmony_ci normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) 33762306a36Sopenharmony_ci{ 33862306a36Sopenharmony_ci int8 shiftCount; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci shiftCount = countLeadingZeros64( aSig ) - 11; 34162306a36Sopenharmony_ci *zSigPtr = aSig<<shiftCount; 34262306a36Sopenharmony_ci *zExpPtr = 1 - shiftCount; 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci} 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci/* 34762306a36Sopenharmony_ci------------------------------------------------------------------------------- 34862306a36Sopenharmony_ciPacks the sign `zSign', exponent `zExp', and significand `zSig' into a 34962306a36Sopenharmony_cidouble-precision floating-point value, returning the result. After being 35062306a36Sopenharmony_cishifted into the proper positions, the three fields are simply added 35162306a36Sopenharmony_citogether to form the result. This means that any integer portion of `zSig' 35262306a36Sopenharmony_ciwill be added into the exponent. Since a properly normalized significand 35362306a36Sopenharmony_ciwill have an integer portion equal to 1, the `zExp' input should be 1 less 35462306a36Sopenharmony_cithan the desired result exponent whenever `zSig' is a complete, normalized 35562306a36Sopenharmony_cisignificand. 35662306a36Sopenharmony_ci------------------------------------------------------------------------------- 35762306a36Sopenharmony_ci*/ 35862306a36Sopenharmony_ciINLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) 35962306a36Sopenharmony_ci{ 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig; 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_ci} 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci/* 36662306a36Sopenharmony_ci------------------------------------------------------------------------------- 36762306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp', 36862306a36Sopenharmony_ciand significand `zSig', and returns the proper double-precision floating- 36962306a36Sopenharmony_cipoint value corresponding to the abstract input. Ordinarily, the abstract 37062306a36Sopenharmony_civalue is simply rounded and packed into the double-precision format, with 37162306a36Sopenharmony_cithe inexact exception raised if the abstract input cannot be represented 37262306a36Sopenharmony_ciexactly. If the abstract value is too large, however, the overflow and 37362306a36Sopenharmony_ciinexact exceptions are raised and an infinity or maximal finite value is 37462306a36Sopenharmony_cireturned. If the abstract value is too small, the input value is rounded to 37562306a36Sopenharmony_cia subnormal number, and the underflow and inexact exceptions are raised if 37662306a36Sopenharmony_cithe abstract input cannot be represented exactly as a subnormal double- 37762306a36Sopenharmony_ciprecision floating-point number. 37862306a36Sopenharmony_ci The input significand `zSig' has its binary point between bits 62 37962306a36Sopenharmony_ciand 61, which is 10 bits to the left of the usual location. This shifted 38062306a36Sopenharmony_cisignificand must be normalized or smaller. If `zSig' is not normalized, 38162306a36Sopenharmony_ci`zExp' must be 0; in that case, the result returned is a subnormal number, 38262306a36Sopenharmony_ciand it must not require rounding. In the usual case that `zSig' is 38362306a36Sopenharmony_cinormalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 38462306a36Sopenharmony_ciThe handling of underflow and overflow follows the IEC/IEEE Standard for 38562306a36Sopenharmony_ciBinary Floating-point Arithmetic. 38662306a36Sopenharmony_ci------------------------------------------------------------------------------- 38762306a36Sopenharmony_ci*/ 38862306a36Sopenharmony_cistatic float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig ) 38962306a36Sopenharmony_ci{ 39062306a36Sopenharmony_ci int8 roundingMode; 39162306a36Sopenharmony_ci flag roundNearestEven; 39262306a36Sopenharmony_ci int16 roundIncrement, roundBits; 39362306a36Sopenharmony_ci flag isTiny; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci roundingMode = roundData->mode; 39662306a36Sopenharmony_ci roundNearestEven = ( roundingMode == float_round_nearest_even ); 39762306a36Sopenharmony_ci roundIncrement = 0x200; 39862306a36Sopenharmony_ci if ( ! roundNearestEven ) { 39962306a36Sopenharmony_ci if ( roundingMode == float_round_to_zero ) { 40062306a36Sopenharmony_ci roundIncrement = 0; 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ci else { 40362306a36Sopenharmony_ci roundIncrement = 0x3FF; 40462306a36Sopenharmony_ci if ( zSign ) { 40562306a36Sopenharmony_ci if ( roundingMode == float_round_up ) roundIncrement = 0; 40662306a36Sopenharmony_ci } 40762306a36Sopenharmony_ci else { 40862306a36Sopenharmony_ci if ( roundingMode == float_round_down ) roundIncrement = 0; 40962306a36Sopenharmony_ci } 41062306a36Sopenharmony_ci } 41162306a36Sopenharmony_ci } 41262306a36Sopenharmony_ci roundBits = zSig & 0x3FF; 41362306a36Sopenharmony_ci if ( 0x7FD <= (bits16) zExp ) { 41462306a36Sopenharmony_ci if ( ( 0x7FD < zExp ) 41562306a36Sopenharmony_ci || ( ( zExp == 0x7FD ) 41662306a36Sopenharmony_ci && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) 41762306a36Sopenharmony_ci ) { 41862306a36Sopenharmony_ci //register int lr = __builtin_return_address(0); 41962306a36Sopenharmony_ci //printk("roundAndPackFloat64 called from 0x%08x\n",lr); 42062306a36Sopenharmony_ci roundData->exception |= float_flag_overflow | float_flag_inexact; 42162306a36Sopenharmony_ci return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); 42262306a36Sopenharmony_ci } 42362306a36Sopenharmony_ci if ( zExp < 0 ) { 42462306a36Sopenharmony_ci isTiny = 42562306a36Sopenharmony_ci ( float_detect_tininess == float_tininess_before_rounding ) 42662306a36Sopenharmony_ci || ( zExp < -1 ) 42762306a36Sopenharmony_ci || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); 42862306a36Sopenharmony_ci shift64RightJamming( zSig, - zExp, &zSig ); 42962306a36Sopenharmony_ci zExp = 0; 43062306a36Sopenharmony_ci roundBits = zSig & 0x3FF; 43162306a36Sopenharmony_ci if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; 43262306a36Sopenharmony_ci } 43362306a36Sopenharmony_ci } 43462306a36Sopenharmony_ci if ( roundBits ) roundData->exception |= float_flag_inexact; 43562306a36Sopenharmony_ci zSig = ( zSig + roundIncrement )>>10; 43662306a36Sopenharmony_ci zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); 43762306a36Sopenharmony_ci if ( zSig == 0 ) zExp = 0; 43862306a36Sopenharmony_ci return packFloat64( zSign, zExp, zSig ); 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_ci} 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_ci/* 44362306a36Sopenharmony_ci------------------------------------------------------------------------------- 44462306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp', 44562306a36Sopenharmony_ciand significand `zSig', and returns the proper double-precision floating- 44662306a36Sopenharmony_cipoint value corresponding to the abstract input. This routine is just like 44762306a36Sopenharmony_ci`roundAndPackFloat64' except that `zSig' does not have to be normalized in 44862306a36Sopenharmony_ciany way. In all cases, `zExp' must be 1 less than the ``true'' floating- 44962306a36Sopenharmony_cipoint exponent. 45062306a36Sopenharmony_ci------------------------------------------------------------------------------- 45162306a36Sopenharmony_ci*/ 45262306a36Sopenharmony_cistatic float64 45362306a36Sopenharmony_ci normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig ) 45462306a36Sopenharmony_ci{ 45562306a36Sopenharmony_ci int8 shiftCount; 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci shiftCount = countLeadingZeros64( zSig ) - 1; 45862306a36Sopenharmony_ci return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount ); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci} 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci#ifdef FLOATX80 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci/* 46562306a36Sopenharmony_ci------------------------------------------------------------------------------- 46662306a36Sopenharmony_ciReturns the fraction bits of the extended double-precision floating-point 46762306a36Sopenharmony_civalue `a'. 46862306a36Sopenharmony_ci------------------------------------------------------------------------------- 46962306a36Sopenharmony_ci*/ 47062306a36Sopenharmony_ciINLINE bits64 extractFloatx80Frac( floatx80 a ) 47162306a36Sopenharmony_ci{ 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci return a.low; 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci} 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci/* 47862306a36Sopenharmony_ci------------------------------------------------------------------------------- 47962306a36Sopenharmony_ciReturns the exponent bits of the extended double-precision floating-point 48062306a36Sopenharmony_civalue `a'. 48162306a36Sopenharmony_ci------------------------------------------------------------------------------- 48262306a36Sopenharmony_ci*/ 48362306a36Sopenharmony_ciINLINE int32 extractFloatx80Exp( floatx80 a ) 48462306a36Sopenharmony_ci{ 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci return a.high & 0x7FFF; 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci} 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci/* 49162306a36Sopenharmony_ci------------------------------------------------------------------------------- 49262306a36Sopenharmony_ciReturns the sign bit of the extended double-precision floating-point value 49362306a36Sopenharmony_ci`a'. 49462306a36Sopenharmony_ci------------------------------------------------------------------------------- 49562306a36Sopenharmony_ci*/ 49662306a36Sopenharmony_ciINLINE flag extractFloatx80Sign( floatx80 a ) 49762306a36Sopenharmony_ci{ 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci return a.high>>15; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci} 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci/* 50462306a36Sopenharmony_ci------------------------------------------------------------------------------- 50562306a36Sopenharmony_ciNormalizes the subnormal extended double-precision floating-point value 50662306a36Sopenharmony_cirepresented by the denormalized significand `aSig'. The normalized exponent 50762306a36Sopenharmony_ciand significand are stored at the locations pointed to by `zExpPtr' and 50862306a36Sopenharmony_ci`zSigPtr', respectively. 50962306a36Sopenharmony_ci------------------------------------------------------------------------------- 51062306a36Sopenharmony_ci*/ 51162306a36Sopenharmony_cistatic void 51262306a36Sopenharmony_ci normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) 51362306a36Sopenharmony_ci{ 51462306a36Sopenharmony_ci int8 shiftCount; 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci shiftCount = countLeadingZeros64( aSig ); 51762306a36Sopenharmony_ci *zSigPtr = aSig<<shiftCount; 51862306a36Sopenharmony_ci *zExpPtr = 1 - shiftCount; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci} 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci/* 52362306a36Sopenharmony_ci------------------------------------------------------------------------------- 52462306a36Sopenharmony_ciPacks the sign `zSign', exponent `zExp', and significand `zSig' into an 52562306a36Sopenharmony_ciextended double-precision floating-point value, returning the result. 52662306a36Sopenharmony_ci------------------------------------------------------------------------------- 52762306a36Sopenharmony_ci*/ 52862306a36Sopenharmony_ciINLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) 52962306a36Sopenharmony_ci{ 53062306a36Sopenharmony_ci floatx80 z; 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci z.low = zSig; 53362306a36Sopenharmony_ci z.high = ( ( (bits16) zSign )<<15 ) + zExp; 53462306a36Sopenharmony_ci z.__padding = 0; 53562306a36Sopenharmony_ci return z; 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci} 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci/* 54062306a36Sopenharmony_ci------------------------------------------------------------------------------- 54162306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp', 54262306a36Sopenharmony_ciand extended significand formed by the concatenation of `zSig0' and `zSig1', 54362306a36Sopenharmony_ciand returns the proper extended double-precision floating-point value 54462306a36Sopenharmony_cicorresponding to the abstract input. Ordinarily, the abstract value is 54562306a36Sopenharmony_cirounded and packed into the extended double-precision format, with the 54662306a36Sopenharmony_ciinexact exception raised if the abstract input cannot be represented 54762306a36Sopenharmony_ciexactly. If the abstract value is too large, however, the overflow and 54862306a36Sopenharmony_ciinexact exceptions are raised and an infinity or maximal finite value is 54962306a36Sopenharmony_cireturned. If the abstract value is too small, the input value is rounded to 55062306a36Sopenharmony_cia subnormal number, and the underflow and inexact exceptions are raised if 55162306a36Sopenharmony_cithe abstract input cannot be represented exactly as a subnormal extended 55262306a36Sopenharmony_cidouble-precision floating-point number. 55362306a36Sopenharmony_ci If `roundingPrecision' is 32 or 64, the result is rounded to the same 55462306a36Sopenharmony_cinumber of bits as single or double precision, respectively. Otherwise, the 55562306a36Sopenharmony_ciresult is rounded to the full precision of the extended double-precision 55662306a36Sopenharmony_ciformat. 55762306a36Sopenharmony_ci The input significand must be normalized or smaller. If the input 55862306a36Sopenharmony_cisignificand is not normalized, `zExp' must be 0; in that case, the result 55962306a36Sopenharmony_cireturned is a subnormal number, and it must not require rounding. The 56062306a36Sopenharmony_cihandling of underflow and overflow follows the IEC/IEEE Standard for Binary 56162306a36Sopenharmony_ciFloating-point Arithmetic. 56262306a36Sopenharmony_ci------------------------------------------------------------------------------- 56362306a36Sopenharmony_ci*/ 56462306a36Sopenharmony_cistatic floatx80 56562306a36Sopenharmony_ci roundAndPackFloatx80( 56662306a36Sopenharmony_ci struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 56762306a36Sopenharmony_ci ) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci int8 roundingMode, roundingPrecision; 57062306a36Sopenharmony_ci flag roundNearestEven, increment, isTiny; 57162306a36Sopenharmony_ci int64 roundIncrement, roundMask, roundBits; 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci roundingMode = roundData->mode; 57462306a36Sopenharmony_ci roundingPrecision = roundData->precision; 57562306a36Sopenharmony_ci roundNearestEven = ( roundingMode == float_round_nearest_even ); 57662306a36Sopenharmony_ci if ( roundingPrecision == 80 ) goto precision80; 57762306a36Sopenharmony_ci if ( roundingPrecision == 64 ) { 57862306a36Sopenharmony_ci roundIncrement = LIT64( 0x0000000000000400 ); 57962306a36Sopenharmony_ci roundMask = LIT64( 0x00000000000007FF ); 58062306a36Sopenharmony_ci } 58162306a36Sopenharmony_ci else if ( roundingPrecision == 32 ) { 58262306a36Sopenharmony_ci roundIncrement = LIT64( 0x0000008000000000 ); 58362306a36Sopenharmony_ci roundMask = LIT64( 0x000000FFFFFFFFFF ); 58462306a36Sopenharmony_ci } 58562306a36Sopenharmony_ci else { 58662306a36Sopenharmony_ci goto precision80; 58762306a36Sopenharmony_ci } 58862306a36Sopenharmony_ci zSig0 |= ( zSig1 != 0 ); 58962306a36Sopenharmony_ci if ( ! roundNearestEven ) { 59062306a36Sopenharmony_ci if ( roundingMode == float_round_to_zero ) { 59162306a36Sopenharmony_ci roundIncrement = 0; 59262306a36Sopenharmony_ci } 59362306a36Sopenharmony_ci else { 59462306a36Sopenharmony_ci roundIncrement = roundMask; 59562306a36Sopenharmony_ci if ( zSign ) { 59662306a36Sopenharmony_ci if ( roundingMode == float_round_up ) roundIncrement = 0; 59762306a36Sopenharmony_ci } 59862306a36Sopenharmony_ci else { 59962306a36Sopenharmony_ci if ( roundingMode == float_round_down ) roundIncrement = 0; 60062306a36Sopenharmony_ci } 60162306a36Sopenharmony_ci } 60262306a36Sopenharmony_ci } 60362306a36Sopenharmony_ci roundBits = zSig0 & roundMask; 60462306a36Sopenharmony_ci if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 60562306a36Sopenharmony_ci if ( ( 0x7FFE < zExp ) 60662306a36Sopenharmony_ci || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 60762306a36Sopenharmony_ci ) { 60862306a36Sopenharmony_ci goto overflow; 60962306a36Sopenharmony_ci } 61062306a36Sopenharmony_ci if ( zExp <= 0 ) { 61162306a36Sopenharmony_ci isTiny = 61262306a36Sopenharmony_ci ( float_detect_tininess == float_tininess_before_rounding ) 61362306a36Sopenharmony_ci || ( zExp < 0 ) 61462306a36Sopenharmony_ci || ( zSig0 <= zSig0 + roundIncrement ); 61562306a36Sopenharmony_ci shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 61662306a36Sopenharmony_ci zExp = 0; 61762306a36Sopenharmony_ci roundBits = zSig0 & roundMask; 61862306a36Sopenharmony_ci if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow; 61962306a36Sopenharmony_ci if ( roundBits ) roundData->exception |= float_flag_inexact; 62062306a36Sopenharmony_ci zSig0 += roundIncrement; 62162306a36Sopenharmony_ci if ( (sbits64) zSig0 < 0 ) zExp = 1; 62262306a36Sopenharmony_ci roundIncrement = roundMask + 1; 62362306a36Sopenharmony_ci if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 62462306a36Sopenharmony_ci roundMask |= roundIncrement; 62562306a36Sopenharmony_ci } 62662306a36Sopenharmony_ci zSig0 &= ~ roundMask; 62762306a36Sopenharmony_ci return packFloatx80( zSign, zExp, zSig0 ); 62862306a36Sopenharmony_ci } 62962306a36Sopenharmony_ci } 63062306a36Sopenharmony_ci if ( roundBits ) roundData->exception |= float_flag_inexact; 63162306a36Sopenharmony_ci zSig0 += roundIncrement; 63262306a36Sopenharmony_ci if ( zSig0 < roundIncrement ) { 63362306a36Sopenharmony_ci ++zExp; 63462306a36Sopenharmony_ci zSig0 = LIT64( 0x8000000000000000 ); 63562306a36Sopenharmony_ci } 63662306a36Sopenharmony_ci roundIncrement = roundMask + 1; 63762306a36Sopenharmony_ci if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 63862306a36Sopenharmony_ci roundMask |= roundIncrement; 63962306a36Sopenharmony_ci } 64062306a36Sopenharmony_ci zSig0 &= ~ roundMask; 64162306a36Sopenharmony_ci if ( zSig0 == 0 ) zExp = 0; 64262306a36Sopenharmony_ci return packFloatx80( zSign, zExp, zSig0 ); 64362306a36Sopenharmony_ci precision80: 64462306a36Sopenharmony_ci increment = ( (sbits64) zSig1 < 0 ); 64562306a36Sopenharmony_ci if ( ! roundNearestEven ) { 64662306a36Sopenharmony_ci if ( roundingMode == float_round_to_zero ) { 64762306a36Sopenharmony_ci increment = 0; 64862306a36Sopenharmony_ci } 64962306a36Sopenharmony_ci else { 65062306a36Sopenharmony_ci if ( zSign ) { 65162306a36Sopenharmony_ci increment = ( roundingMode == float_round_down ) && zSig1; 65262306a36Sopenharmony_ci } 65362306a36Sopenharmony_ci else { 65462306a36Sopenharmony_ci increment = ( roundingMode == float_round_up ) && zSig1; 65562306a36Sopenharmony_ci } 65662306a36Sopenharmony_ci } 65762306a36Sopenharmony_ci } 65862306a36Sopenharmony_ci if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 65962306a36Sopenharmony_ci if ( ( 0x7FFE < zExp ) 66062306a36Sopenharmony_ci || ( ( zExp == 0x7FFE ) 66162306a36Sopenharmony_ci && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) 66262306a36Sopenharmony_ci && increment 66362306a36Sopenharmony_ci ) 66462306a36Sopenharmony_ci ) { 66562306a36Sopenharmony_ci roundMask = 0; 66662306a36Sopenharmony_ci overflow: 66762306a36Sopenharmony_ci roundData->exception |= float_flag_overflow | float_flag_inexact; 66862306a36Sopenharmony_ci if ( ( roundingMode == float_round_to_zero ) 66962306a36Sopenharmony_ci || ( zSign && ( roundingMode == float_round_up ) ) 67062306a36Sopenharmony_ci || ( ! zSign && ( roundingMode == float_round_down ) ) 67162306a36Sopenharmony_ci ) { 67262306a36Sopenharmony_ci return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 67362306a36Sopenharmony_ci } 67462306a36Sopenharmony_ci return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 67562306a36Sopenharmony_ci } 67662306a36Sopenharmony_ci if ( zExp <= 0 ) { 67762306a36Sopenharmony_ci isTiny = 67862306a36Sopenharmony_ci ( float_detect_tininess == float_tininess_before_rounding ) 67962306a36Sopenharmony_ci || ( zExp < 0 ) 68062306a36Sopenharmony_ci || ! increment 68162306a36Sopenharmony_ci || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); 68262306a36Sopenharmony_ci shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 68362306a36Sopenharmony_ci zExp = 0; 68462306a36Sopenharmony_ci if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow; 68562306a36Sopenharmony_ci if ( zSig1 ) roundData->exception |= float_flag_inexact; 68662306a36Sopenharmony_ci if ( roundNearestEven ) { 68762306a36Sopenharmony_ci increment = ( (sbits64) zSig1 < 0 ); 68862306a36Sopenharmony_ci } 68962306a36Sopenharmony_ci else { 69062306a36Sopenharmony_ci if ( zSign ) { 69162306a36Sopenharmony_ci increment = ( roundingMode == float_round_down ) && zSig1; 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci else { 69462306a36Sopenharmony_ci increment = ( roundingMode == float_round_up ) && zSig1; 69562306a36Sopenharmony_ci } 69662306a36Sopenharmony_ci } 69762306a36Sopenharmony_ci if ( increment ) { 69862306a36Sopenharmony_ci ++zSig0; 69962306a36Sopenharmony_ci zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven ); 70062306a36Sopenharmony_ci if ( (sbits64) zSig0 < 0 ) zExp = 1; 70162306a36Sopenharmony_ci } 70262306a36Sopenharmony_ci return packFloatx80( zSign, zExp, zSig0 ); 70362306a36Sopenharmony_ci } 70462306a36Sopenharmony_ci } 70562306a36Sopenharmony_ci if ( zSig1 ) roundData->exception |= float_flag_inexact; 70662306a36Sopenharmony_ci if ( increment ) { 70762306a36Sopenharmony_ci ++zSig0; 70862306a36Sopenharmony_ci if ( zSig0 == 0 ) { 70962306a36Sopenharmony_ci ++zExp; 71062306a36Sopenharmony_ci zSig0 = LIT64( 0x8000000000000000 ); 71162306a36Sopenharmony_ci } 71262306a36Sopenharmony_ci else { 71362306a36Sopenharmony_ci zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven ); 71462306a36Sopenharmony_ci } 71562306a36Sopenharmony_ci } 71662306a36Sopenharmony_ci else { 71762306a36Sopenharmony_ci if ( zSig0 == 0 ) zExp = 0; 71862306a36Sopenharmony_ci } 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci return packFloatx80( zSign, zExp, zSig0 ); 72162306a36Sopenharmony_ci} 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci/* 72462306a36Sopenharmony_ci------------------------------------------------------------------------------- 72562306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent 72662306a36Sopenharmony_ci`zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 72762306a36Sopenharmony_ciand returns the proper extended double-precision floating-point value 72862306a36Sopenharmony_cicorresponding to the abstract input. This routine is just like 72962306a36Sopenharmony_ci`roundAndPackFloatx80' except that the input significand does not have to be 73062306a36Sopenharmony_cinormalized. 73162306a36Sopenharmony_ci------------------------------------------------------------------------------- 73262306a36Sopenharmony_ci*/ 73362306a36Sopenharmony_cistatic floatx80 73462306a36Sopenharmony_ci normalizeRoundAndPackFloatx80( 73562306a36Sopenharmony_ci struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 73662306a36Sopenharmony_ci ) 73762306a36Sopenharmony_ci{ 73862306a36Sopenharmony_ci int8 shiftCount; 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci if ( zSig0 == 0 ) { 74162306a36Sopenharmony_ci zSig0 = zSig1; 74262306a36Sopenharmony_ci zSig1 = 0; 74362306a36Sopenharmony_ci zExp -= 64; 74462306a36Sopenharmony_ci } 74562306a36Sopenharmony_ci shiftCount = countLeadingZeros64( zSig0 ); 74662306a36Sopenharmony_ci shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 74762306a36Sopenharmony_ci zExp -= shiftCount; 74862306a36Sopenharmony_ci return 74962306a36Sopenharmony_ci roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 ); 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci} 75262306a36Sopenharmony_ci 75362306a36Sopenharmony_ci#endif 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci/* 75662306a36Sopenharmony_ci------------------------------------------------------------------------------- 75762306a36Sopenharmony_ciReturns the result of converting the 32-bit two's complement integer `a' to 75862306a36Sopenharmony_cithe single-precision floating-point format. The conversion is performed 75962306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 76062306a36Sopenharmony_ci------------------------------------------------------------------------------- 76162306a36Sopenharmony_ci*/ 76262306a36Sopenharmony_cifloat32 int32_to_float32(struct roundingData *roundData, int32 a) 76362306a36Sopenharmony_ci{ 76462306a36Sopenharmony_ci flag zSign; 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci if ( a == 0 ) return 0; 76762306a36Sopenharmony_ci if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 76862306a36Sopenharmony_ci zSign = ( a < 0 ); 76962306a36Sopenharmony_ci return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a ); 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci} 77262306a36Sopenharmony_ci 77362306a36Sopenharmony_ci/* 77462306a36Sopenharmony_ci------------------------------------------------------------------------------- 77562306a36Sopenharmony_ciReturns the result of converting the 32-bit two's complement integer `a' to 77662306a36Sopenharmony_cithe double-precision floating-point format. The conversion is performed 77762306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 77862306a36Sopenharmony_ci------------------------------------------------------------------------------- 77962306a36Sopenharmony_ci*/ 78062306a36Sopenharmony_cifloat64 int32_to_float64( int32 a ) 78162306a36Sopenharmony_ci{ 78262306a36Sopenharmony_ci flag aSign; 78362306a36Sopenharmony_ci uint32 absA; 78462306a36Sopenharmony_ci int8 shiftCount; 78562306a36Sopenharmony_ci bits64 zSig; 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci if ( a == 0 ) return 0; 78862306a36Sopenharmony_ci aSign = ( a < 0 ); 78962306a36Sopenharmony_ci absA = aSign ? - a : a; 79062306a36Sopenharmony_ci shiftCount = countLeadingZeros32( absA ) + 21; 79162306a36Sopenharmony_ci zSig = absA; 79262306a36Sopenharmony_ci return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount ); 79362306a36Sopenharmony_ci 79462306a36Sopenharmony_ci} 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci#ifdef FLOATX80 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci/* 79962306a36Sopenharmony_ci------------------------------------------------------------------------------- 80062306a36Sopenharmony_ciReturns the result of converting the 32-bit two's complement integer `a' 80162306a36Sopenharmony_cito the extended double-precision floating-point format. The conversion 80262306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point 80362306a36Sopenharmony_ciArithmetic. 80462306a36Sopenharmony_ci------------------------------------------------------------------------------- 80562306a36Sopenharmony_ci*/ 80662306a36Sopenharmony_cifloatx80 int32_to_floatx80( int32 a ) 80762306a36Sopenharmony_ci{ 80862306a36Sopenharmony_ci flag zSign; 80962306a36Sopenharmony_ci uint32 absA; 81062306a36Sopenharmony_ci int8 shiftCount; 81162306a36Sopenharmony_ci bits64 zSig; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 81462306a36Sopenharmony_ci zSign = ( a < 0 ); 81562306a36Sopenharmony_ci absA = zSign ? - a : a; 81662306a36Sopenharmony_ci shiftCount = countLeadingZeros32( absA ) + 32; 81762306a36Sopenharmony_ci zSig = absA; 81862306a36Sopenharmony_ci return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci} 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci#endif 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci/* 82562306a36Sopenharmony_ci------------------------------------------------------------------------------- 82662306a36Sopenharmony_ciReturns the result of converting the single-precision floating-point value 82762306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format. The conversion is 82862306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 82962306a36Sopenharmony_ciArithmetic---which means in particular that the conversion is rounded 83062306a36Sopenharmony_ciaccording to the current rounding mode. If `a' is a NaN, the largest 83162306a36Sopenharmony_cipositive integer is returned. Otherwise, if the conversion overflows, the 83262306a36Sopenharmony_cilargest integer with the same sign as `a' is returned. 83362306a36Sopenharmony_ci------------------------------------------------------------------------------- 83462306a36Sopenharmony_ci*/ 83562306a36Sopenharmony_ciint32 float32_to_int32( struct roundingData *roundData, float32 a ) 83662306a36Sopenharmony_ci{ 83762306a36Sopenharmony_ci flag aSign; 83862306a36Sopenharmony_ci int16 aExp, shiftCount; 83962306a36Sopenharmony_ci bits32 aSig; 84062306a36Sopenharmony_ci bits64 zSig; 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 84362306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 84462306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 84562306a36Sopenharmony_ci if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 84662306a36Sopenharmony_ci if ( aExp ) aSig |= 0x00800000; 84762306a36Sopenharmony_ci shiftCount = 0xAF - aExp; 84862306a36Sopenharmony_ci zSig = aSig; 84962306a36Sopenharmony_ci zSig <<= 32; 85062306a36Sopenharmony_ci if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig ); 85162306a36Sopenharmony_ci return roundAndPackInt32( roundData, aSign, zSig ); 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci} 85462306a36Sopenharmony_ci 85562306a36Sopenharmony_ci/* 85662306a36Sopenharmony_ci------------------------------------------------------------------------------- 85762306a36Sopenharmony_ciReturns the result of converting the single-precision floating-point value 85862306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format. The conversion is 85962306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 86062306a36Sopenharmony_ciArithmetic, except that the conversion is always rounded toward zero. If 86162306a36Sopenharmony_ci`a' is a NaN, the largest positive integer is returned. Otherwise, if the 86262306a36Sopenharmony_ciconversion overflows, the largest integer with the same sign as `a' is 86362306a36Sopenharmony_cireturned. 86462306a36Sopenharmony_ci------------------------------------------------------------------------------- 86562306a36Sopenharmony_ci*/ 86662306a36Sopenharmony_ciint32 float32_to_int32_round_to_zero( float32 a ) 86762306a36Sopenharmony_ci{ 86862306a36Sopenharmony_ci flag aSign; 86962306a36Sopenharmony_ci int16 aExp, shiftCount; 87062306a36Sopenharmony_ci bits32 aSig; 87162306a36Sopenharmony_ci int32 z; 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 87462306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 87562306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 87662306a36Sopenharmony_ci shiftCount = aExp - 0x9E; 87762306a36Sopenharmony_ci if ( 0 <= shiftCount ) { 87862306a36Sopenharmony_ci if ( a == 0xCF000000 ) return 0x80000000; 87962306a36Sopenharmony_ci float_raise( float_flag_invalid ); 88062306a36Sopenharmony_ci if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 88162306a36Sopenharmony_ci return 0x80000000; 88262306a36Sopenharmony_ci } 88362306a36Sopenharmony_ci else if ( aExp <= 0x7E ) { 88462306a36Sopenharmony_ci if ( aExp | aSig ) float_raise( float_flag_inexact ); 88562306a36Sopenharmony_ci return 0; 88662306a36Sopenharmony_ci } 88762306a36Sopenharmony_ci aSig = ( aSig | 0x00800000 )<<8; 88862306a36Sopenharmony_ci z = aSig>>( - shiftCount ); 88962306a36Sopenharmony_ci if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 89062306a36Sopenharmony_ci float_raise( float_flag_inexact ); 89162306a36Sopenharmony_ci } 89262306a36Sopenharmony_ci return aSign ? - z : z; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci} 89562306a36Sopenharmony_ci 89662306a36Sopenharmony_ci/* 89762306a36Sopenharmony_ci------------------------------------------------------------------------------- 89862306a36Sopenharmony_ciReturns the result of converting the single-precision floating-point value 89962306a36Sopenharmony_ci`a' to the double-precision floating-point format. The conversion is 90062306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 90162306a36Sopenharmony_ciArithmetic. 90262306a36Sopenharmony_ci------------------------------------------------------------------------------- 90362306a36Sopenharmony_ci*/ 90462306a36Sopenharmony_cifloat64 float32_to_float64( float32 a ) 90562306a36Sopenharmony_ci{ 90662306a36Sopenharmony_ci flag aSign; 90762306a36Sopenharmony_ci int16 aExp; 90862306a36Sopenharmony_ci bits32 aSig; 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 91162306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 91262306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 91362306a36Sopenharmony_ci if ( aExp == 0xFF ) { 91462306a36Sopenharmony_ci if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 91562306a36Sopenharmony_ci return packFloat64( aSign, 0x7FF, 0 ); 91662306a36Sopenharmony_ci } 91762306a36Sopenharmony_ci if ( aExp == 0 ) { 91862306a36Sopenharmony_ci if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); 91962306a36Sopenharmony_ci normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 92062306a36Sopenharmony_ci --aExp; 92162306a36Sopenharmony_ci } 92262306a36Sopenharmony_ci return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci} 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci#ifdef FLOATX80 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci/* 92962306a36Sopenharmony_ci------------------------------------------------------------------------------- 93062306a36Sopenharmony_ciReturns the result of converting the single-precision floating-point value 93162306a36Sopenharmony_ci`a' to the extended double-precision floating-point format. The conversion 93262306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point 93362306a36Sopenharmony_ciArithmetic. 93462306a36Sopenharmony_ci------------------------------------------------------------------------------- 93562306a36Sopenharmony_ci*/ 93662306a36Sopenharmony_cifloatx80 float32_to_floatx80( float32 a ) 93762306a36Sopenharmony_ci{ 93862306a36Sopenharmony_ci flag aSign; 93962306a36Sopenharmony_ci int16 aExp; 94062306a36Sopenharmony_ci bits32 aSig; 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 94362306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 94462306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 94562306a36Sopenharmony_ci if ( aExp == 0xFF ) { 94662306a36Sopenharmony_ci if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); 94762306a36Sopenharmony_ci return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci if ( aExp == 0 ) { 95062306a36Sopenharmony_ci if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 95162306a36Sopenharmony_ci normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 95262306a36Sopenharmony_ci } 95362306a36Sopenharmony_ci aSig |= 0x00800000; 95462306a36Sopenharmony_ci return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci} 95762306a36Sopenharmony_ci 95862306a36Sopenharmony_ci#endif 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci/* 96162306a36Sopenharmony_ci------------------------------------------------------------------------------- 96262306a36Sopenharmony_ciRounds the single-precision floating-point value `a' to an integer, and 96362306a36Sopenharmony_cireturns the result as a single-precision floating-point value. The 96462306a36Sopenharmony_cioperation is performed according to the IEC/IEEE Standard for Binary 96562306a36Sopenharmony_ciFloating-point Arithmetic. 96662306a36Sopenharmony_ci------------------------------------------------------------------------------- 96762306a36Sopenharmony_ci*/ 96862306a36Sopenharmony_cifloat32 float32_round_to_int( struct roundingData *roundData, float32 a ) 96962306a36Sopenharmony_ci{ 97062306a36Sopenharmony_ci flag aSign; 97162306a36Sopenharmony_ci int16 aExp; 97262306a36Sopenharmony_ci bits32 lastBitMask, roundBitsMask; 97362306a36Sopenharmony_ci int8 roundingMode; 97462306a36Sopenharmony_ci float32 z; 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 97762306a36Sopenharmony_ci if ( 0x96 <= aExp ) { 97862306a36Sopenharmony_ci if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 97962306a36Sopenharmony_ci return propagateFloat32NaN( a, a ); 98062306a36Sopenharmony_ci } 98162306a36Sopenharmony_ci return a; 98262306a36Sopenharmony_ci } 98362306a36Sopenharmony_ci roundingMode = roundData->mode; 98462306a36Sopenharmony_ci if ( aExp <= 0x7E ) { 98562306a36Sopenharmony_ci if ( (bits32) ( a<<1 ) == 0 ) return a; 98662306a36Sopenharmony_ci roundData->exception |= float_flag_inexact; 98762306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 98862306a36Sopenharmony_ci switch ( roundingMode ) { 98962306a36Sopenharmony_ci case float_round_nearest_even: 99062306a36Sopenharmony_ci if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { 99162306a36Sopenharmony_ci return packFloat32( aSign, 0x7F, 0 ); 99262306a36Sopenharmony_ci } 99362306a36Sopenharmony_ci break; 99462306a36Sopenharmony_ci case float_round_down: 99562306a36Sopenharmony_ci return aSign ? 0xBF800000 : 0; 99662306a36Sopenharmony_ci case float_round_up: 99762306a36Sopenharmony_ci return aSign ? 0x80000000 : 0x3F800000; 99862306a36Sopenharmony_ci } 99962306a36Sopenharmony_ci return packFloat32( aSign, 0, 0 ); 100062306a36Sopenharmony_ci } 100162306a36Sopenharmony_ci lastBitMask = 1; 100262306a36Sopenharmony_ci lastBitMask <<= 0x96 - aExp; 100362306a36Sopenharmony_ci roundBitsMask = lastBitMask - 1; 100462306a36Sopenharmony_ci z = a; 100562306a36Sopenharmony_ci if ( roundingMode == float_round_nearest_even ) { 100662306a36Sopenharmony_ci z += lastBitMask>>1; 100762306a36Sopenharmony_ci if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 100862306a36Sopenharmony_ci } 100962306a36Sopenharmony_ci else if ( roundingMode != float_round_to_zero ) { 101062306a36Sopenharmony_ci if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { 101162306a36Sopenharmony_ci z += roundBitsMask; 101262306a36Sopenharmony_ci } 101362306a36Sopenharmony_ci } 101462306a36Sopenharmony_ci z &= ~ roundBitsMask; 101562306a36Sopenharmony_ci if ( z != a ) roundData->exception |= float_flag_inexact; 101662306a36Sopenharmony_ci return z; 101762306a36Sopenharmony_ci 101862306a36Sopenharmony_ci} 101962306a36Sopenharmony_ci 102062306a36Sopenharmony_ci/* 102162306a36Sopenharmony_ci------------------------------------------------------------------------------- 102262306a36Sopenharmony_ciReturns the result of adding the absolute values of the single-precision 102362306a36Sopenharmony_cifloating-point values `a' and `b'. If `zSign' is true, the sum is negated 102462306a36Sopenharmony_cibefore being returned. `zSign' is ignored if the result is a NaN. The 102562306a36Sopenharmony_ciaddition is performed according to the IEC/IEEE Standard for Binary 102662306a36Sopenharmony_ciFloating-point Arithmetic. 102762306a36Sopenharmony_ci------------------------------------------------------------------------------- 102862306a36Sopenharmony_ci*/ 102962306a36Sopenharmony_cistatic float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign ) 103062306a36Sopenharmony_ci{ 103162306a36Sopenharmony_ci int16 aExp, bExp, zExp; 103262306a36Sopenharmony_ci bits32 aSig, bSig, zSig; 103362306a36Sopenharmony_ci int16 expDiff; 103462306a36Sopenharmony_ci 103562306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 103662306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 103762306a36Sopenharmony_ci bSig = extractFloat32Frac( b ); 103862306a36Sopenharmony_ci bExp = extractFloat32Exp( b ); 103962306a36Sopenharmony_ci expDiff = aExp - bExp; 104062306a36Sopenharmony_ci aSig <<= 6; 104162306a36Sopenharmony_ci bSig <<= 6; 104262306a36Sopenharmony_ci if ( 0 < expDiff ) { 104362306a36Sopenharmony_ci if ( aExp == 0xFF ) { 104462306a36Sopenharmony_ci if ( aSig ) return propagateFloat32NaN( a, b ); 104562306a36Sopenharmony_ci return a; 104662306a36Sopenharmony_ci } 104762306a36Sopenharmony_ci if ( bExp == 0 ) { 104862306a36Sopenharmony_ci --expDiff; 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci else { 105162306a36Sopenharmony_ci bSig |= 0x20000000; 105262306a36Sopenharmony_ci } 105362306a36Sopenharmony_ci shift32RightJamming( bSig, expDiff, &bSig ); 105462306a36Sopenharmony_ci zExp = aExp; 105562306a36Sopenharmony_ci } 105662306a36Sopenharmony_ci else if ( expDiff < 0 ) { 105762306a36Sopenharmony_ci if ( bExp == 0xFF ) { 105862306a36Sopenharmony_ci if ( bSig ) return propagateFloat32NaN( a, b ); 105962306a36Sopenharmony_ci return packFloat32( zSign, 0xFF, 0 ); 106062306a36Sopenharmony_ci } 106162306a36Sopenharmony_ci if ( aExp == 0 ) { 106262306a36Sopenharmony_ci ++expDiff; 106362306a36Sopenharmony_ci } 106462306a36Sopenharmony_ci else { 106562306a36Sopenharmony_ci aSig |= 0x20000000; 106662306a36Sopenharmony_ci } 106762306a36Sopenharmony_ci shift32RightJamming( aSig, - expDiff, &aSig ); 106862306a36Sopenharmony_ci zExp = bExp; 106962306a36Sopenharmony_ci } 107062306a36Sopenharmony_ci else { 107162306a36Sopenharmony_ci if ( aExp == 0xFF ) { 107262306a36Sopenharmony_ci if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 107362306a36Sopenharmony_ci return a; 107462306a36Sopenharmony_ci } 107562306a36Sopenharmony_ci if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); 107662306a36Sopenharmony_ci zSig = 0x40000000 + aSig + bSig; 107762306a36Sopenharmony_ci zExp = aExp; 107862306a36Sopenharmony_ci goto roundAndPack; 107962306a36Sopenharmony_ci } 108062306a36Sopenharmony_ci aSig |= 0x20000000; 108162306a36Sopenharmony_ci zSig = ( aSig + bSig )<<1; 108262306a36Sopenharmony_ci --zExp; 108362306a36Sopenharmony_ci if ( (sbits32) zSig < 0 ) { 108462306a36Sopenharmony_ci zSig = aSig + bSig; 108562306a36Sopenharmony_ci ++zExp; 108662306a36Sopenharmony_ci } 108762306a36Sopenharmony_ci roundAndPack: 108862306a36Sopenharmony_ci return roundAndPackFloat32( roundData, zSign, zExp, zSig ); 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci} 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_ci/* 109362306a36Sopenharmony_ci------------------------------------------------------------------------------- 109462306a36Sopenharmony_ciReturns the result of subtracting the absolute values of the single- 109562306a36Sopenharmony_ciprecision floating-point values `a' and `b'. If `zSign' is true, the 109662306a36Sopenharmony_cidifference is negated before being returned. `zSign' is ignored if the 109762306a36Sopenharmony_ciresult is a NaN. The subtraction is performed according to the IEC/IEEE 109862306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic. 109962306a36Sopenharmony_ci------------------------------------------------------------------------------- 110062306a36Sopenharmony_ci*/ 110162306a36Sopenharmony_cistatic float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign ) 110262306a36Sopenharmony_ci{ 110362306a36Sopenharmony_ci int16 aExp, bExp, zExp; 110462306a36Sopenharmony_ci bits32 aSig, bSig, zSig; 110562306a36Sopenharmony_ci int16 expDiff; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 110862306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 110962306a36Sopenharmony_ci bSig = extractFloat32Frac( b ); 111062306a36Sopenharmony_ci bExp = extractFloat32Exp( b ); 111162306a36Sopenharmony_ci expDiff = aExp - bExp; 111262306a36Sopenharmony_ci aSig <<= 7; 111362306a36Sopenharmony_ci bSig <<= 7; 111462306a36Sopenharmony_ci if ( 0 < expDiff ) goto aExpBigger; 111562306a36Sopenharmony_ci if ( expDiff < 0 ) goto bExpBigger; 111662306a36Sopenharmony_ci if ( aExp == 0xFF ) { 111762306a36Sopenharmony_ci if ( aSig | bSig ) return propagateFloat32NaN( a, b ); 111862306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 111962306a36Sopenharmony_ci return float32_default_nan; 112062306a36Sopenharmony_ci } 112162306a36Sopenharmony_ci if ( aExp == 0 ) { 112262306a36Sopenharmony_ci aExp = 1; 112362306a36Sopenharmony_ci bExp = 1; 112462306a36Sopenharmony_ci } 112562306a36Sopenharmony_ci if ( bSig < aSig ) goto aBigger; 112662306a36Sopenharmony_ci if ( aSig < bSig ) goto bBigger; 112762306a36Sopenharmony_ci return packFloat32( roundData->mode == float_round_down, 0, 0 ); 112862306a36Sopenharmony_ci bExpBigger: 112962306a36Sopenharmony_ci if ( bExp == 0xFF ) { 113062306a36Sopenharmony_ci if ( bSig ) return propagateFloat32NaN( a, b ); 113162306a36Sopenharmony_ci return packFloat32( zSign ^ 1, 0xFF, 0 ); 113262306a36Sopenharmony_ci } 113362306a36Sopenharmony_ci if ( aExp == 0 ) { 113462306a36Sopenharmony_ci ++expDiff; 113562306a36Sopenharmony_ci } 113662306a36Sopenharmony_ci else { 113762306a36Sopenharmony_ci aSig |= 0x40000000; 113862306a36Sopenharmony_ci } 113962306a36Sopenharmony_ci shift32RightJamming( aSig, - expDiff, &aSig ); 114062306a36Sopenharmony_ci bSig |= 0x40000000; 114162306a36Sopenharmony_ci bBigger: 114262306a36Sopenharmony_ci zSig = bSig - aSig; 114362306a36Sopenharmony_ci zExp = bExp; 114462306a36Sopenharmony_ci zSign ^= 1; 114562306a36Sopenharmony_ci goto normalizeRoundAndPack; 114662306a36Sopenharmony_ci aExpBigger: 114762306a36Sopenharmony_ci if ( aExp == 0xFF ) { 114862306a36Sopenharmony_ci if ( aSig ) return propagateFloat32NaN( a, b ); 114962306a36Sopenharmony_ci return a; 115062306a36Sopenharmony_ci } 115162306a36Sopenharmony_ci if ( bExp == 0 ) { 115262306a36Sopenharmony_ci --expDiff; 115362306a36Sopenharmony_ci } 115462306a36Sopenharmony_ci else { 115562306a36Sopenharmony_ci bSig |= 0x40000000; 115662306a36Sopenharmony_ci } 115762306a36Sopenharmony_ci shift32RightJamming( bSig, expDiff, &bSig ); 115862306a36Sopenharmony_ci aSig |= 0x40000000; 115962306a36Sopenharmony_ci aBigger: 116062306a36Sopenharmony_ci zSig = aSig - bSig; 116162306a36Sopenharmony_ci zExp = aExp; 116262306a36Sopenharmony_ci normalizeRoundAndPack: 116362306a36Sopenharmony_ci --zExp; 116462306a36Sopenharmony_ci return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig ); 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci} 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci/* 116962306a36Sopenharmony_ci------------------------------------------------------------------------------- 117062306a36Sopenharmony_ciReturns the result of adding the single-precision floating-point values `a' 117162306a36Sopenharmony_ciand `b'. The operation is performed according to the IEC/IEEE Standard for 117262306a36Sopenharmony_ciBinary Floating-point Arithmetic. 117362306a36Sopenharmony_ci------------------------------------------------------------------------------- 117462306a36Sopenharmony_ci*/ 117562306a36Sopenharmony_cifloat32 float32_add( struct roundingData *roundData, float32 a, float32 b ) 117662306a36Sopenharmony_ci{ 117762306a36Sopenharmony_ci flag aSign, bSign; 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 118062306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 118162306a36Sopenharmony_ci if ( aSign == bSign ) { 118262306a36Sopenharmony_ci return addFloat32Sigs( roundData, a, b, aSign ); 118362306a36Sopenharmony_ci } 118462306a36Sopenharmony_ci else { 118562306a36Sopenharmony_ci return subFloat32Sigs( roundData, a, b, aSign ); 118662306a36Sopenharmony_ci } 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci} 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci/* 119162306a36Sopenharmony_ci------------------------------------------------------------------------------- 119262306a36Sopenharmony_ciReturns the result of subtracting the single-precision floating-point values 119362306a36Sopenharmony_ci`a' and `b'. The operation is performed according to the IEC/IEEE Standard 119462306a36Sopenharmony_cifor Binary Floating-point Arithmetic. 119562306a36Sopenharmony_ci------------------------------------------------------------------------------- 119662306a36Sopenharmony_ci*/ 119762306a36Sopenharmony_cifloat32 float32_sub( struct roundingData *roundData, float32 a, float32 b ) 119862306a36Sopenharmony_ci{ 119962306a36Sopenharmony_ci flag aSign, bSign; 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 120262306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 120362306a36Sopenharmony_ci if ( aSign == bSign ) { 120462306a36Sopenharmony_ci return subFloat32Sigs( roundData, a, b, aSign ); 120562306a36Sopenharmony_ci } 120662306a36Sopenharmony_ci else { 120762306a36Sopenharmony_ci return addFloat32Sigs( roundData, a, b, aSign ); 120862306a36Sopenharmony_ci } 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_ci} 121162306a36Sopenharmony_ci 121262306a36Sopenharmony_ci/* 121362306a36Sopenharmony_ci------------------------------------------------------------------------------- 121462306a36Sopenharmony_ciReturns the result of multiplying the single-precision floating-point values 121562306a36Sopenharmony_ci`a' and `b'. The operation is performed according to the IEC/IEEE Standard 121662306a36Sopenharmony_cifor Binary Floating-point Arithmetic. 121762306a36Sopenharmony_ci------------------------------------------------------------------------------- 121862306a36Sopenharmony_ci*/ 121962306a36Sopenharmony_cifloat32 float32_mul( struct roundingData *roundData, float32 a, float32 b ) 122062306a36Sopenharmony_ci{ 122162306a36Sopenharmony_ci flag aSign, bSign, zSign; 122262306a36Sopenharmony_ci int16 aExp, bExp, zExp; 122362306a36Sopenharmony_ci bits32 aSig, bSig; 122462306a36Sopenharmony_ci bits64 zSig64; 122562306a36Sopenharmony_ci bits32 zSig; 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 122862306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 122962306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 123062306a36Sopenharmony_ci bSig = extractFloat32Frac( b ); 123162306a36Sopenharmony_ci bExp = extractFloat32Exp( b ); 123262306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 123362306a36Sopenharmony_ci zSign = aSign ^ bSign; 123462306a36Sopenharmony_ci if ( aExp == 0xFF ) { 123562306a36Sopenharmony_ci if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 123662306a36Sopenharmony_ci return propagateFloat32NaN( a, b ); 123762306a36Sopenharmony_ci } 123862306a36Sopenharmony_ci if ( ( bExp | bSig ) == 0 ) { 123962306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 124062306a36Sopenharmony_ci return float32_default_nan; 124162306a36Sopenharmony_ci } 124262306a36Sopenharmony_ci return packFloat32( zSign, 0xFF, 0 ); 124362306a36Sopenharmony_ci } 124462306a36Sopenharmony_ci if ( bExp == 0xFF ) { 124562306a36Sopenharmony_ci if ( bSig ) return propagateFloat32NaN( a, b ); 124662306a36Sopenharmony_ci if ( ( aExp | aSig ) == 0 ) { 124762306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 124862306a36Sopenharmony_ci return float32_default_nan; 124962306a36Sopenharmony_ci } 125062306a36Sopenharmony_ci return packFloat32( zSign, 0xFF, 0 ); 125162306a36Sopenharmony_ci } 125262306a36Sopenharmony_ci if ( aExp == 0 ) { 125362306a36Sopenharmony_ci if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 125462306a36Sopenharmony_ci normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 125562306a36Sopenharmony_ci } 125662306a36Sopenharmony_ci if ( bExp == 0 ) { 125762306a36Sopenharmony_ci if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); 125862306a36Sopenharmony_ci normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 125962306a36Sopenharmony_ci } 126062306a36Sopenharmony_ci zExp = aExp + bExp - 0x7F; 126162306a36Sopenharmony_ci aSig = ( aSig | 0x00800000 )<<7; 126262306a36Sopenharmony_ci bSig = ( bSig | 0x00800000 )<<8; 126362306a36Sopenharmony_ci shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); 126462306a36Sopenharmony_ci zSig = zSig64; 126562306a36Sopenharmony_ci if ( 0 <= (sbits32) ( zSig<<1 ) ) { 126662306a36Sopenharmony_ci zSig <<= 1; 126762306a36Sopenharmony_ci --zExp; 126862306a36Sopenharmony_ci } 126962306a36Sopenharmony_ci return roundAndPackFloat32( roundData, zSign, zExp, zSig ); 127062306a36Sopenharmony_ci 127162306a36Sopenharmony_ci} 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci/* 127462306a36Sopenharmony_ci------------------------------------------------------------------------------- 127562306a36Sopenharmony_ciReturns the result of dividing the single-precision floating-point value `a' 127662306a36Sopenharmony_ciby the corresponding value `b'. The operation is performed according to the 127762306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic. 127862306a36Sopenharmony_ci------------------------------------------------------------------------------- 127962306a36Sopenharmony_ci*/ 128062306a36Sopenharmony_cifloat32 float32_div( struct roundingData *roundData, float32 a, float32 b ) 128162306a36Sopenharmony_ci{ 128262306a36Sopenharmony_ci flag aSign, bSign, zSign; 128362306a36Sopenharmony_ci int16 aExp, bExp, zExp; 128462306a36Sopenharmony_ci bits32 aSig, bSig, zSig; 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 128762306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 128862306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 128962306a36Sopenharmony_ci bSig = extractFloat32Frac( b ); 129062306a36Sopenharmony_ci bExp = extractFloat32Exp( b ); 129162306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 129262306a36Sopenharmony_ci zSign = aSign ^ bSign; 129362306a36Sopenharmony_ci if ( aExp == 0xFF ) { 129462306a36Sopenharmony_ci if ( aSig ) return propagateFloat32NaN( a, b ); 129562306a36Sopenharmony_ci if ( bExp == 0xFF ) { 129662306a36Sopenharmony_ci if ( bSig ) return propagateFloat32NaN( a, b ); 129762306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 129862306a36Sopenharmony_ci return float32_default_nan; 129962306a36Sopenharmony_ci } 130062306a36Sopenharmony_ci return packFloat32( zSign, 0xFF, 0 ); 130162306a36Sopenharmony_ci } 130262306a36Sopenharmony_ci if ( bExp == 0xFF ) { 130362306a36Sopenharmony_ci if ( bSig ) return propagateFloat32NaN( a, b ); 130462306a36Sopenharmony_ci return packFloat32( zSign, 0, 0 ); 130562306a36Sopenharmony_ci } 130662306a36Sopenharmony_ci if ( bExp == 0 ) { 130762306a36Sopenharmony_ci if ( bSig == 0 ) { 130862306a36Sopenharmony_ci if ( ( aExp | aSig ) == 0 ) { 130962306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 131062306a36Sopenharmony_ci return float32_default_nan; 131162306a36Sopenharmony_ci } 131262306a36Sopenharmony_ci roundData->exception |= float_flag_divbyzero; 131362306a36Sopenharmony_ci return packFloat32( zSign, 0xFF, 0 ); 131462306a36Sopenharmony_ci } 131562306a36Sopenharmony_ci normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 131662306a36Sopenharmony_ci } 131762306a36Sopenharmony_ci if ( aExp == 0 ) { 131862306a36Sopenharmony_ci if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); 131962306a36Sopenharmony_ci normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 132062306a36Sopenharmony_ci } 132162306a36Sopenharmony_ci zExp = aExp - bExp + 0x7D; 132262306a36Sopenharmony_ci aSig = ( aSig | 0x00800000 )<<7; 132362306a36Sopenharmony_ci bSig = ( bSig | 0x00800000 )<<8; 132462306a36Sopenharmony_ci if ( bSig <= ( aSig + aSig ) ) { 132562306a36Sopenharmony_ci aSig >>= 1; 132662306a36Sopenharmony_ci ++zExp; 132762306a36Sopenharmony_ci } 132862306a36Sopenharmony_ci { 132962306a36Sopenharmony_ci bits64 tmp = ( (bits64) aSig )<<32; 133062306a36Sopenharmony_ci do_div( tmp, bSig ); 133162306a36Sopenharmony_ci zSig = tmp; 133262306a36Sopenharmony_ci } 133362306a36Sopenharmony_ci if ( ( zSig & 0x3F ) == 0 ) { 133462306a36Sopenharmony_ci zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 ); 133562306a36Sopenharmony_ci } 133662306a36Sopenharmony_ci return roundAndPackFloat32( roundData, zSign, zExp, zSig ); 133762306a36Sopenharmony_ci 133862306a36Sopenharmony_ci} 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci/* 134162306a36Sopenharmony_ci------------------------------------------------------------------------------- 134262306a36Sopenharmony_ciReturns the remainder of the single-precision floating-point value `a' 134362306a36Sopenharmony_ciwith respect to the corresponding value `b'. The operation is performed 134462306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 134562306a36Sopenharmony_ci------------------------------------------------------------------------------- 134662306a36Sopenharmony_ci*/ 134762306a36Sopenharmony_cifloat32 float32_rem( struct roundingData *roundData, float32 a, float32 b ) 134862306a36Sopenharmony_ci{ 134962306a36Sopenharmony_ci flag aSign, bSign, zSign; 135062306a36Sopenharmony_ci int16 aExp, bExp, expDiff; 135162306a36Sopenharmony_ci bits32 aSig, bSig; 135262306a36Sopenharmony_ci bits32 q; 135362306a36Sopenharmony_ci bits64 aSig64, bSig64, q64; 135462306a36Sopenharmony_ci bits32 alternateASig; 135562306a36Sopenharmony_ci sbits32 sigMean; 135662306a36Sopenharmony_ci 135762306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 135862306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 135962306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 136062306a36Sopenharmony_ci bSig = extractFloat32Frac( b ); 136162306a36Sopenharmony_ci bExp = extractFloat32Exp( b ); 136262306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 136362306a36Sopenharmony_ci if ( aExp == 0xFF ) { 136462306a36Sopenharmony_ci if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { 136562306a36Sopenharmony_ci return propagateFloat32NaN( a, b ); 136662306a36Sopenharmony_ci } 136762306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 136862306a36Sopenharmony_ci return float32_default_nan; 136962306a36Sopenharmony_ci } 137062306a36Sopenharmony_ci if ( bExp == 0xFF ) { 137162306a36Sopenharmony_ci if ( bSig ) return propagateFloat32NaN( a, b ); 137262306a36Sopenharmony_ci return a; 137362306a36Sopenharmony_ci } 137462306a36Sopenharmony_ci if ( bExp == 0 ) { 137562306a36Sopenharmony_ci if ( bSig == 0 ) { 137662306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 137762306a36Sopenharmony_ci return float32_default_nan; 137862306a36Sopenharmony_ci } 137962306a36Sopenharmony_ci normalizeFloat32Subnormal( bSig, &bExp, &bSig ); 138062306a36Sopenharmony_ci } 138162306a36Sopenharmony_ci if ( aExp == 0 ) { 138262306a36Sopenharmony_ci if ( aSig == 0 ) return a; 138362306a36Sopenharmony_ci normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 138462306a36Sopenharmony_ci } 138562306a36Sopenharmony_ci expDiff = aExp - bExp; 138662306a36Sopenharmony_ci aSig |= 0x00800000; 138762306a36Sopenharmony_ci bSig |= 0x00800000; 138862306a36Sopenharmony_ci if ( expDiff < 32 ) { 138962306a36Sopenharmony_ci aSig <<= 8; 139062306a36Sopenharmony_ci bSig <<= 8; 139162306a36Sopenharmony_ci if ( expDiff < 0 ) { 139262306a36Sopenharmony_ci if ( expDiff < -1 ) return a; 139362306a36Sopenharmony_ci aSig >>= 1; 139462306a36Sopenharmony_ci } 139562306a36Sopenharmony_ci q = ( bSig <= aSig ); 139662306a36Sopenharmony_ci if ( q ) aSig -= bSig; 139762306a36Sopenharmony_ci if ( 0 < expDiff ) { 139862306a36Sopenharmony_ci bits64 tmp = ( (bits64) aSig )<<32; 139962306a36Sopenharmony_ci do_div( tmp, bSig ); 140062306a36Sopenharmony_ci q = tmp; 140162306a36Sopenharmony_ci q >>= 32 - expDiff; 140262306a36Sopenharmony_ci bSig >>= 2; 140362306a36Sopenharmony_ci aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 140462306a36Sopenharmony_ci } 140562306a36Sopenharmony_ci else { 140662306a36Sopenharmony_ci aSig >>= 2; 140762306a36Sopenharmony_ci bSig >>= 2; 140862306a36Sopenharmony_ci } 140962306a36Sopenharmony_ci } 141062306a36Sopenharmony_ci else { 141162306a36Sopenharmony_ci if ( bSig <= aSig ) aSig -= bSig; 141262306a36Sopenharmony_ci aSig64 = ( (bits64) aSig )<<40; 141362306a36Sopenharmony_ci bSig64 = ( (bits64) bSig )<<40; 141462306a36Sopenharmony_ci expDiff -= 64; 141562306a36Sopenharmony_ci while ( 0 < expDiff ) { 141662306a36Sopenharmony_ci q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 141762306a36Sopenharmony_ci q64 = ( 2 < q64 ) ? q64 - 2 : 0; 141862306a36Sopenharmony_ci aSig64 = - ( ( bSig * q64 )<<38 ); 141962306a36Sopenharmony_ci expDiff -= 62; 142062306a36Sopenharmony_ci } 142162306a36Sopenharmony_ci expDiff += 64; 142262306a36Sopenharmony_ci q64 = estimateDiv128To64( aSig64, 0, bSig64 ); 142362306a36Sopenharmony_ci q64 = ( 2 < q64 ) ? q64 - 2 : 0; 142462306a36Sopenharmony_ci q = q64>>( 64 - expDiff ); 142562306a36Sopenharmony_ci bSig <<= 6; 142662306a36Sopenharmony_ci aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; 142762306a36Sopenharmony_ci } 142862306a36Sopenharmony_ci do { 142962306a36Sopenharmony_ci alternateASig = aSig; 143062306a36Sopenharmony_ci ++q; 143162306a36Sopenharmony_ci aSig -= bSig; 143262306a36Sopenharmony_ci } while ( 0 <= (sbits32) aSig ); 143362306a36Sopenharmony_ci sigMean = aSig + alternateASig; 143462306a36Sopenharmony_ci if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 143562306a36Sopenharmony_ci aSig = alternateASig; 143662306a36Sopenharmony_ci } 143762306a36Sopenharmony_ci zSign = ( (sbits32) aSig < 0 ); 143862306a36Sopenharmony_ci if ( zSign ) aSig = - aSig; 143962306a36Sopenharmony_ci return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig ); 144062306a36Sopenharmony_ci 144162306a36Sopenharmony_ci} 144262306a36Sopenharmony_ci 144362306a36Sopenharmony_ci/* 144462306a36Sopenharmony_ci------------------------------------------------------------------------------- 144562306a36Sopenharmony_ciReturns the square root of the single-precision floating-point value `a'. 144662306a36Sopenharmony_ciThe operation is performed according to the IEC/IEEE Standard for Binary 144762306a36Sopenharmony_ciFloating-point Arithmetic. 144862306a36Sopenharmony_ci------------------------------------------------------------------------------- 144962306a36Sopenharmony_ci*/ 145062306a36Sopenharmony_cifloat32 float32_sqrt( struct roundingData *roundData, float32 a ) 145162306a36Sopenharmony_ci{ 145262306a36Sopenharmony_ci flag aSign; 145362306a36Sopenharmony_ci int16 aExp, zExp; 145462306a36Sopenharmony_ci bits32 aSig, zSig; 145562306a36Sopenharmony_ci bits64 rem, term; 145662306a36Sopenharmony_ci 145762306a36Sopenharmony_ci aSig = extractFloat32Frac( a ); 145862306a36Sopenharmony_ci aExp = extractFloat32Exp( a ); 145962306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 146062306a36Sopenharmony_ci if ( aExp == 0xFF ) { 146162306a36Sopenharmony_ci if ( aSig ) return propagateFloat32NaN( a, 0 ); 146262306a36Sopenharmony_ci if ( ! aSign ) return a; 146362306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 146462306a36Sopenharmony_ci return float32_default_nan; 146562306a36Sopenharmony_ci } 146662306a36Sopenharmony_ci if ( aSign ) { 146762306a36Sopenharmony_ci if ( ( aExp | aSig ) == 0 ) return a; 146862306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 146962306a36Sopenharmony_ci return float32_default_nan; 147062306a36Sopenharmony_ci } 147162306a36Sopenharmony_ci if ( aExp == 0 ) { 147262306a36Sopenharmony_ci if ( aSig == 0 ) return 0; 147362306a36Sopenharmony_ci normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 147462306a36Sopenharmony_ci } 147562306a36Sopenharmony_ci zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; 147662306a36Sopenharmony_ci aSig = ( aSig | 0x00800000 )<<8; 147762306a36Sopenharmony_ci zSig = estimateSqrt32( aExp, aSig ) + 2; 147862306a36Sopenharmony_ci if ( ( zSig & 0x7F ) <= 5 ) { 147962306a36Sopenharmony_ci if ( zSig < 2 ) { 148062306a36Sopenharmony_ci zSig = 0xFFFFFFFF; 148162306a36Sopenharmony_ci } 148262306a36Sopenharmony_ci else { 148362306a36Sopenharmony_ci aSig >>= aExp & 1; 148462306a36Sopenharmony_ci term = ( (bits64) zSig ) * zSig; 148562306a36Sopenharmony_ci rem = ( ( (bits64) aSig )<<32 ) - term; 148662306a36Sopenharmony_ci while ( (sbits64) rem < 0 ) { 148762306a36Sopenharmony_ci --zSig; 148862306a36Sopenharmony_ci rem += ( ( (bits64) zSig )<<1 ) | 1; 148962306a36Sopenharmony_ci } 149062306a36Sopenharmony_ci zSig |= ( rem != 0 ); 149162306a36Sopenharmony_ci } 149262306a36Sopenharmony_ci } 149362306a36Sopenharmony_ci shift32RightJamming( zSig, 1, &zSig ); 149462306a36Sopenharmony_ci return roundAndPackFloat32( roundData, 0, zExp, zSig ); 149562306a36Sopenharmony_ci 149662306a36Sopenharmony_ci} 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci/* 149962306a36Sopenharmony_ci------------------------------------------------------------------------------- 150062306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is equal to the 150162306a36Sopenharmony_cicorresponding value `b', and 0 otherwise. The comparison is performed 150262306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 150362306a36Sopenharmony_ci------------------------------------------------------------------------------- 150462306a36Sopenharmony_ci*/ 150562306a36Sopenharmony_ciflag float32_eq( float32 a, float32 b ) 150662306a36Sopenharmony_ci{ 150762306a36Sopenharmony_ci 150862306a36Sopenharmony_ci if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 150962306a36Sopenharmony_ci || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 151062306a36Sopenharmony_ci ) { 151162306a36Sopenharmony_ci if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { 151262306a36Sopenharmony_ci float_raise( float_flag_invalid ); 151362306a36Sopenharmony_ci } 151462306a36Sopenharmony_ci return 0; 151562306a36Sopenharmony_ci } 151662306a36Sopenharmony_ci return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_ci} 151962306a36Sopenharmony_ci 152062306a36Sopenharmony_ci/* 152162306a36Sopenharmony_ci------------------------------------------------------------------------------- 152262306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is less than or 152362306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise. The comparison is 152462306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 152562306a36Sopenharmony_ciArithmetic. 152662306a36Sopenharmony_ci------------------------------------------------------------------------------- 152762306a36Sopenharmony_ci*/ 152862306a36Sopenharmony_ciflag float32_le( float32 a, float32 b ) 152962306a36Sopenharmony_ci{ 153062306a36Sopenharmony_ci flag aSign, bSign; 153162306a36Sopenharmony_ci 153262306a36Sopenharmony_ci if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 153362306a36Sopenharmony_ci || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 153462306a36Sopenharmony_ci ) { 153562306a36Sopenharmony_ci float_raise( float_flag_invalid ); 153662306a36Sopenharmony_ci return 0; 153762306a36Sopenharmony_ci } 153862306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 153962306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 154062306a36Sopenharmony_ci if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 154162306a36Sopenharmony_ci return ( a == b ) || ( aSign ^ ( a < b ) ); 154262306a36Sopenharmony_ci 154362306a36Sopenharmony_ci} 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci/* 154662306a36Sopenharmony_ci------------------------------------------------------------------------------- 154762306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is less than 154862306a36Sopenharmony_cithe corresponding value `b', and 0 otherwise. The comparison is performed 154962306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 155062306a36Sopenharmony_ci------------------------------------------------------------------------------- 155162306a36Sopenharmony_ci*/ 155262306a36Sopenharmony_ciflag float32_lt( float32 a, float32 b ) 155362306a36Sopenharmony_ci{ 155462306a36Sopenharmony_ci flag aSign, bSign; 155562306a36Sopenharmony_ci 155662306a36Sopenharmony_ci if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 155762306a36Sopenharmony_ci || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 155862306a36Sopenharmony_ci ) { 155962306a36Sopenharmony_ci float_raise( float_flag_invalid ); 156062306a36Sopenharmony_ci return 0; 156162306a36Sopenharmony_ci } 156262306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 156362306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 156462306a36Sopenharmony_ci if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 156562306a36Sopenharmony_ci return ( a != b ) && ( aSign ^ ( a < b ) ); 156662306a36Sopenharmony_ci 156762306a36Sopenharmony_ci} 156862306a36Sopenharmony_ci 156962306a36Sopenharmony_ci/* 157062306a36Sopenharmony_ci------------------------------------------------------------------------------- 157162306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is equal to the 157262306a36Sopenharmony_cicorresponding value `b', and 0 otherwise. The invalid exception is raised 157362306a36Sopenharmony_ciif either operand is a NaN. Otherwise, the comparison is performed 157462306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 157562306a36Sopenharmony_ci------------------------------------------------------------------------------- 157662306a36Sopenharmony_ci*/ 157762306a36Sopenharmony_ciflag float32_eq_signaling( float32 a, float32 b ) 157862306a36Sopenharmony_ci{ 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 158162306a36Sopenharmony_ci || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 158262306a36Sopenharmony_ci ) { 158362306a36Sopenharmony_ci float_raise( float_flag_invalid ); 158462306a36Sopenharmony_ci return 0; 158562306a36Sopenharmony_ci } 158662306a36Sopenharmony_ci return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci} 158962306a36Sopenharmony_ci 159062306a36Sopenharmony_ci/* 159162306a36Sopenharmony_ci------------------------------------------------------------------------------- 159262306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is less than or 159362306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 159462306a36Sopenharmony_cicause an exception. Otherwise, the comparison is performed according to the 159562306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic. 159662306a36Sopenharmony_ci------------------------------------------------------------------------------- 159762306a36Sopenharmony_ci*/ 159862306a36Sopenharmony_ciflag float32_le_quiet( float32 a, float32 b ) 159962306a36Sopenharmony_ci{ 160062306a36Sopenharmony_ci flag aSign, bSign; 160162306a36Sopenharmony_ci //int16 aExp, bExp; 160262306a36Sopenharmony_ci 160362306a36Sopenharmony_ci if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 160462306a36Sopenharmony_ci || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 160562306a36Sopenharmony_ci ) { 160662306a36Sopenharmony_ci /* Do nothing, even if NaN as we're quiet */ 160762306a36Sopenharmony_ci return 0; 160862306a36Sopenharmony_ci } 160962306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 161062306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 161162306a36Sopenharmony_ci if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); 161262306a36Sopenharmony_ci return ( a == b ) || ( aSign ^ ( a < b ) ); 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_ci} 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci/* 161762306a36Sopenharmony_ci------------------------------------------------------------------------------- 161862306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is less than 161962306a36Sopenharmony_cithe corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 162062306a36Sopenharmony_ciexception. Otherwise, the comparison is performed according to the IEC/IEEE 162162306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic. 162262306a36Sopenharmony_ci------------------------------------------------------------------------------- 162362306a36Sopenharmony_ci*/ 162462306a36Sopenharmony_ciflag float32_lt_quiet( float32 a, float32 b ) 162562306a36Sopenharmony_ci{ 162662306a36Sopenharmony_ci flag aSign, bSign; 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) 162962306a36Sopenharmony_ci || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) 163062306a36Sopenharmony_ci ) { 163162306a36Sopenharmony_ci /* Do nothing, even if NaN as we're quiet */ 163262306a36Sopenharmony_ci return 0; 163362306a36Sopenharmony_ci } 163462306a36Sopenharmony_ci aSign = extractFloat32Sign( a ); 163562306a36Sopenharmony_ci bSign = extractFloat32Sign( b ); 163662306a36Sopenharmony_ci if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); 163762306a36Sopenharmony_ci return ( a != b ) && ( aSign ^ ( a < b ) ); 163862306a36Sopenharmony_ci 163962306a36Sopenharmony_ci} 164062306a36Sopenharmony_ci 164162306a36Sopenharmony_ci/* 164262306a36Sopenharmony_ci------------------------------------------------------------------------------- 164362306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value 164462306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format. The conversion is 164562306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 164662306a36Sopenharmony_ciArithmetic---which means in particular that the conversion is rounded 164762306a36Sopenharmony_ciaccording to the current rounding mode. If `a' is a NaN, the largest 164862306a36Sopenharmony_cipositive integer is returned. Otherwise, if the conversion overflows, the 164962306a36Sopenharmony_cilargest integer with the same sign as `a' is returned. 165062306a36Sopenharmony_ci------------------------------------------------------------------------------- 165162306a36Sopenharmony_ci*/ 165262306a36Sopenharmony_ciint32 float64_to_int32( struct roundingData *roundData, float64 a ) 165362306a36Sopenharmony_ci{ 165462306a36Sopenharmony_ci flag aSign; 165562306a36Sopenharmony_ci int16 aExp, shiftCount; 165662306a36Sopenharmony_ci bits64 aSig; 165762306a36Sopenharmony_ci 165862306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 165962306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 166062306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 166162306a36Sopenharmony_ci if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 166262306a36Sopenharmony_ci if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 166362306a36Sopenharmony_ci shiftCount = 0x42C - aExp; 166462306a36Sopenharmony_ci if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); 166562306a36Sopenharmony_ci return roundAndPackInt32( roundData, aSign, aSig ); 166662306a36Sopenharmony_ci 166762306a36Sopenharmony_ci} 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci/* 167062306a36Sopenharmony_ci------------------------------------------------------------------------------- 167162306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value 167262306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format. The conversion is 167362306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 167462306a36Sopenharmony_ciArithmetic, except that the conversion is always rounded toward zero. If 167562306a36Sopenharmony_ci`a' is a NaN, the largest positive integer is returned. Otherwise, if the 167662306a36Sopenharmony_ciconversion overflows, the largest integer with the same sign as `a' is 167762306a36Sopenharmony_cireturned. 167862306a36Sopenharmony_ci------------------------------------------------------------------------------- 167962306a36Sopenharmony_ci*/ 168062306a36Sopenharmony_ciint32 float64_to_int32_round_to_zero( float64 a ) 168162306a36Sopenharmony_ci{ 168262306a36Sopenharmony_ci flag aSign; 168362306a36Sopenharmony_ci int16 aExp, shiftCount; 168462306a36Sopenharmony_ci bits64 aSig, savedASig; 168562306a36Sopenharmony_ci int32 z; 168662306a36Sopenharmony_ci 168762306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 168862306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 168962306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 169062306a36Sopenharmony_ci shiftCount = 0x433 - aExp; 169162306a36Sopenharmony_ci if ( shiftCount < 21 ) { 169262306a36Sopenharmony_ci if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 169362306a36Sopenharmony_ci goto invalid; 169462306a36Sopenharmony_ci } 169562306a36Sopenharmony_ci else if ( 52 < shiftCount ) { 169662306a36Sopenharmony_ci if ( aExp || aSig ) float_raise( float_flag_inexact ); 169762306a36Sopenharmony_ci return 0; 169862306a36Sopenharmony_ci } 169962306a36Sopenharmony_ci aSig |= LIT64( 0x0010000000000000 ); 170062306a36Sopenharmony_ci savedASig = aSig; 170162306a36Sopenharmony_ci aSig >>= shiftCount; 170262306a36Sopenharmony_ci z = aSig; 170362306a36Sopenharmony_ci if ( aSign ) z = - z; 170462306a36Sopenharmony_ci if ( ( z < 0 ) ^ aSign ) { 170562306a36Sopenharmony_ci invalid: 170662306a36Sopenharmony_ci float_raise( float_flag_invalid ); 170762306a36Sopenharmony_ci return aSign ? 0x80000000 : 0x7FFFFFFF; 170862306a36Sopenharmony_ci } 170962306a36Sopenharmony_ci if ( ( aSig<<shiftCount ) != savedASig ) { 171062306a36Sopenharmony_ci float_raise( float_flag_inexact ); 171162306a36Sopenharmony_ci } 171262306a36Sopenharmony_ci return z; 171362306a36Sopenharmony_ci 171462306a36Sopenharmony_ci} 171562306a36Sopenharmony_ci 171662306a36Sopenharmony_ci/* 171762306a36Sopenharmony_ci------------------------------------------------------------------------------- 171862306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value 171962306a36Sopenharmony_ci`a' to the 32-bit two's complement unsigned integer format. The conversion 172062306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point 172162306a36Sopenharmony_ciArithmetic---which means in particular that the conversion is rounded 172262306a36Sopenharmony_ciaccording to the current rounding mode. If `a' is a NaN, the largest 172362306a36Sopenharmony_cipositive integer is returned. Otherwise, if the conversion overflows, the 172462306a36Sopenharmony_cilargest positive integer is returned. 172562306a36Sopenharmony_ci------------------------------------------------------------------------------- 172662306a36Sopenharmony_ci*/ 172762306a36Sopenharmony_ciint32 float64_to_uint32( struct roundingData *roundData, float64 a ) 172862306a36Sopenharmony_ci{ 172962306a36Sopenharmony_ci flag aSign; 173062306a36Sopenharmony_ci int16 aExp, shiftCount; 173162306a36Sopenharmony_ci bits64 aSig; 173262306a36Sopenharmony_ci 173362306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 173462306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 173562306a36Sopenharmony_ci aSign = 0; //extractFloat64Sign( a ); 173662306a36Sopenharmony_ci //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 173762306a36Sopenharmony_ci if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); 173862306a36Sopenharmony_ci shiftCount = 0x42C - aExp; 173962306a36Sopenharmony_ci if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); 174062306a36Sopenharmony_ci return roundAndPackInt32( roundData, aSign, aSig ); 174162306a36Sopenharmony_ci} 174262306a36Sopenharmony_ci 174362306a36Sopenharmony_ci/* 174462306a36Sopenharmony_ci------------------------------------------------------------------------------- 174562306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value 174662306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format. The conversion is 174762306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 174862306a36Sopenharmony_ciArithmetic, except that the conversion is always rounded toward zero. If 174962306a36Sopenharmony_ci`a' is a NaN, the largest positive integer is returned. Otherwise, if the 175062306a36Sopenharmony_ciconversion overflows, the largest positive integer is returned. 175162306a36Sopenharmony_ci------------------------------------------------------------------------------- 175262306a36Sopenharmony_ci*/ 175362306a36Sopenharmony_ciint32 float64_to_uint32_round_to_zero( float64 a ) 175462306a36Sopenharmony_ci{ 175562306a36Sopenharmony_ci flag aSign; 175662306a36Sopenharmony_ci int16 aExp, shiftCount; 175762306a36Sopenharmony_ci bits64 aSig, savedASig; 175862306a36Sopenharmony_ci int32 z; 175962306a36Sopenharmony_ci 176062306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 176162306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 176262306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 176362306a36Sopenharmony_ci shiftCount = 0x433 - aExp; 176462306a36Sopenharmony_ci if ( shiftCount < 21 ) { 176562306a36Sopenharmony_ci if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; 176662306a36Sopenharmony_ci goto invalid; 176762306a36Sopenharmony_ci } 176862306a36Sopenharmony_ci else if ( 52 < shiftCount ) { 176962306a36Sopenharmony_ci if ( aExp || aSig ) float_raise( float_flag_inexact ); 177062306a36Sopenharmony_ci return 0; 177162306a36Sopenharmony_ci } 177262306a36Sopenharmony_ci aSig |= LIT64( 0x0010000000000000 ); 177362306a36Sopenharmony_ci savedASig = aSig; 177462306a36Sopenharmony_ci aSig >>= shiftCount; 177562306a36Sopenharmony_ci z = aSig; 177662306a36Sopenharmony_ci if ( aSign ) z = - z; 177762306a36Sopenharmony_ci if ( ( z < 0 ) ^ aSign ) { 177862306a36Sopenharmony_ci invalid: 177962306a36Sopenharmony_ci float_raise( float_flag_invalid ); 178062306a36Sopenharmony_ci return aSign ? 0x80000000 : 0x7FFFFFFF; 178162306a36Sopenharmony_ci } 178262306a36Sopenharmony_ci if ( ( aSig<<shiftCount ) != savedASig ) { 178362306a36Sopenharmony_ci float_raise( float_flag_inexact ); 178462306a36Sopenharmony_ci } 178562306a36Sopenharmony_ci return z; 178662306a36Sopenharmony_ci} 178762306a36Sopenharmony_ci 178862306a36Sopenharmony_ci/* 178962306a36Sopenharmony_ci------------------------------------------------------------------------------- 179062306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value 179162306a36Sopenharmony_ci`a' to the single-precision floating-point format. The conversion is 179262306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 179362306a36Sopenharmony_ciArithmetic. 179462306a36Sopenharmony_ci------------------------------------------------------------------------------- 179562306a36Sopenharmony_ci*/ 179662306a36Sopenharmony_cifloat32 float64_to_float32( struct roundingData *roundData, float64 a ) 179762306a36Sopenharmony_ci{ 179862306a36Sopenharmony_ci flag aSign; 179962306a36Sopenharmony_ci int16 aExp; 180062306a36Sopenharmony_ci bits64 aSig; 180162306a36Sopenharmony_ci bits32 zSig; 180262306a36Sopenharmony_ci 180362306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 180462306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 180562306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 180662306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 180762306a36Sopenharmony_ci if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); 180862306a36Sopenharmony_ci return packFloat32( aSign, 0xFF, 0 ); 180962306a36Sopenharmony_ci } 181062306a36Sopenharmony_ci shift64RightJamming( aSig, 22, &aSig ); 181162306a36Sopenharmony_ci zSig = aSig; 181262306a36Sopenharmony_ci if ( aExp || zSig ) { 181362306a36Sopenharmony_ci zSig |= 0x40000000; 181462306a36Sopenharmony_ci aExp -= 0x381; 181562306a36Sopenharmony_ci } 181662306a36Sopenharmony_ci return roundAndPackFloat32( roundData, aSign, aExp, zSig ); 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci} 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci#ifdef FLOATX80 182162306a36Sopenharmony_ci 182262306a36Sopenharmony_ci/* 182362306a36Sopenharmony_ci------------------------------------------------------------------------------- 182462306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value 182562306a36Sopenharmony_ci`a' to the extended double-precision floating-point format. The conversion 182662306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point 182762306a36Sopenharmony_ciArithmetic. 182862306a36Sopenharmony_ci------------------------------------------------------------------------------- 182962306a36Sopenharmony_ci*/ 183062306a36Sopenharmony_cifloatx80 float64_to_floatx80( float64 a ) 183162306a36Sopenharmony_ci{ 183262306a36Sopenharmony_ci flag aSign; 183362306a36Sopenharmony_ci int16 aExp; 183462306a36Sopenharmony_ci bits64 aSig; 183562306a36Sopenharmony_ci 183662306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 183762306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 183862306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 183962306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 184062306a36Sopenharmony_ci if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); 184162306a36Sopenharmony_ci return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 184262306a36Sopenharmony_ci } 184362306a36Sopenharmony_ci if ( aExp == 0 ) { 184462306a36Sopenharmony_ci if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 184562306a36Sopenharmony_ci normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 184662306a36Sopenharmony_ci } 184762306a36Sopenharmony_ci return 184862306a36Sopenharmony_ci packFloatx80( 184962306a36Sopenharmony_ci aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_ci} 185262306a36Sopenharmony_ci 185362306a36Sopenharmony_ci#endif 185462306a36Sopenharmony_ci 185562306a36Sopenharmony_ci/* 185662306a36Sopenharmony_ci------------------------------------------------------------------------------- 185762306a36Sopenharmony_ciRounds the double-precision floating-point value `a' to an integer, and 185862306a36Sopenharmony_cireturns the result as a double-precision floating-point value. The 185962306a36Sopenharmony_cioperation is performed according to the IEC/IEEE Standard for Binary 186062306a36Sopenharmony_ciFloating-point Arithmetic. 186162306a36Sopenharmony_ci------------------------------------------------------------------------------- 186262306a36Sopenharmony_ci*/ 186362306a36Sopenharmony_cifloat64 float64_round_to_int( struct roundingData *roundData, float64 a ) 186462306a36Sopenharmony_ci{ 186562306a36Sopenharmony_ci flag aSign; 186662306a36Sopenharmony_ci int16 aExp; 186762306a36Sopenharmony_ci bits64 lastBitMask, roundBitsMask; 186862306a36Sopenharmony_ci int8 roundingMode; 186962306a36Sopenharmony_ci float64 z; 187062306a36Sopenharmony_ci 187162306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 187262306a36Sopenharmony_ci if ( 0x433 <= aExp ) { 187362306a36Sopenharmony_ci if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { 187462306a36Sopenharmony_ci return propagateFloat64NaN( a, a ); 187562306a36Sopenharmony_ci } 187662306a36Sopenharmony_ci return a; 187762306a36Sopenharmony_ci } 187862306a36Sopenharmony_ci if ( aExp <= 0x3FE ) { 187962306a36Sopenharmony_ci if ( (bits64) ( a<<1 ) == 0 ) return a; 188062306a36Sopenharmony_ci roundData->exception |= float_flag_inexact; 188162306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 188262306a36Sopenharmony_ci switch ( roundData->mode ) { 188362306a36Sopenharmony_ci case float_round_nearest_even: 188462306a36Sopenharmony_ci if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { 188562306a36Sopenharmony_ci return packFloat64( aSign, 0x3FF, 0 ); 188662306a36Sopenharmony_ci } 188762306a36Sopenharmony_ci break; 188862306a36Sopenharmony_ci case float_round_down: 188962306a36Sopenharmony_ci return aSign ? LIT64( 0xBFF0000000000000 ) : 0; 189062306a36Sopenharmony_ci case float_round_up: 189162306a36Sopenharmony_ci return 189262306a36Sopenharmony_ci aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); 189362306a36Sopenharmony_ci } 189462306a36Sopenharmony_ci return packFloat64( aSign, 0, 0 ); 189562306a36Sopenharmony_ci } 189662306a36Sopenharmony_ci lastBitMask = 1; 189762306a36Sopenharmony_ci lastBitMask <<= 0x433 - aExp; 189862306a36Sopenharmony_ci roundBitsMask = lastBitMask - 1; 189962306a36Sopenharmony_ci z = a; 190062306a36Sopenharmony_ci roundingMode = roundData->mode; 190162306a36Sopenharmony_ci if ( roundingMode == float_round_nearest_even ) { 190262306a36Sopenharmony_ci z += lastBitMask>>1; 190362306a36Sopenharmony_ci if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; 190462306a36Sopenharmony_ci } 190562306a36Sopenharmony_ci else if ( roundingMode != float_round_to_zero ) { 190662306a36Sopenharmony_ci if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { 190762306a36Sopenharmony_ci z += roundBitsMask; 190862306a36Sopenharmony_ci } 190962306a36Sopenharmony_ci } 191062306a36Sopenharmony_ci z &= ~ roundBitsMask; 191162306a36Sopenharmony_ci if ( z != a ) roundData->exception |= float_flag_inexact; 191262306a36Sopenharmony_ci return z; 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_ci} 191562306a36Sopenharmony_ci 191662306a36Sopenharmony_ci/* 191762306a36Sopenharmony_ci------------------------------------------------------------------------------- 191862306a36Sopenharmony_ciReturns the result of adding the absolute values of the double-precision 191962306a36Sopenharmony_cifloating-point values `a' and `b'. If `zSign' is true, the sum is negated 192062306a36Sopenharmony_cibefore being returned. `zSign' is ignored if the result is a NaN. The 192162306a36Sopenharmony_ciaddition is performed according to the IEC/IEEE Standard for Binary 192262306a36Sopenharmony_ciFloating-point Arithmetic. 192362306a36Sopenharmony_ci------------------------------------------------------------------------------- 192462306a36Sopenharmony_ci*/ 192562306a36Sopenharmony_cistatic float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign ) 192662306a36Sopenharmony_ci{ 192762306a36Sopenharmony_ci int16 aExp, bExp, zExp; 192862306a36Sopenharmony_ci bits64 aSig, bSig, zSig; 192962306a36Sopenharmony_ci int16 expDiff; 193062306a36Sopenharmony_ci 193162306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 193262306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 193362306a36Sopenharmony_ci bSig = extractFloat64Frac( b ); 193462306a36Sopenharmony_ci bExp = extractFloat64Exp( b ); 193562306a36Sopenharmony_ci expDiff = aExp - bExp; 193662306a36Sopenharmony_ci aSig <<= 9; 193762306a36Sopenharmony_ci bSig <<= 9; 193862306a36Sopenharmony_ci if ( 0 < expDiff ) { 193962306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 194062306a36Sopenharmony_ci if ( aSig ) return propagateFloat64NaN( a, b ); 194162306a36Sopenharmony_ci return a; 194262306a36Sopenharmony_ci } 194362306a36Sopenharmony_ci if ( bExp == 0 ) { 194462306a36Sopenharmony_ci --expDiff; 194562306a36Sopenharmony_ci } 194662306a36Sopenharmony_ci else { 194762306a36Sopenharmony_ci bSig |= LIT64( 0x2000000000000000 ); 194862306a36Sopenharmony_ci } 194962306a36Sopenharmony_ci shift64RightJamming( bSig, expDiff, &bSig ); 195062306a36Sopenharmony_ci zExp = aExp; 195162306a36Sopenharmony_ci } 195262306a36Sopenharmony_ci else if ( expDiff < 0 ) { 195362306a36Sopenharmony_ci if ( bExp == 0x7FF ) { 195462306a36Sopenharmony_ci if ( bSig ) return propagateFloat64NaN( a, b ); 195562306a36Sopenharmony_ci return packFloat64( zSign, 0x7FF, 0 ); 195662306a36Sopenharmony_ci } 195762306a36Sopenharmony_ci if ( aExp == 0 ) { 195862306a36Sopenharmony_ci ++expDiff; 195962306a36Sopenharmony_ci } 196062306a36Sopenharmony_ci else { 196162306a36Sopenharmony_ci aSig |= LIT64( 0x2000000000000000 ); 196262306a36Sopenharmony_ci } 196362306a36Sopenharmony_ci shift64RightJamming( aSig, - expDiff, &aSig ); 196462306a36Sopenharmony_ci zExp = bExp; 196562306a36Sopenharmony_ci } 196662306a36Sopenharmony_ci else { 196762306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 196862306a36Sopenharmony_ci if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 196962306a36Sopenharmony_ci return a; 197062306a36Sopenharmony_ci } 197162306a36Sopenharmony_ci if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); 197262306a36Sopenharmony_ci zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; 197362306a36Sopenharmony_ci zExp = aExp; 197462306a36Sopenharmony_ci goto roundAndPack; 197562306a36Sopenharmony_ci } 197662306a36Sopenharmony_ci aSig |= LIT64( 0x2000000000000000 ); 197762306a36Sopenharmony_ci zSig = ( aSig + bSig )<<1; 197862306a36Sopenharmony_ci --zExp; 197962306a36Sopenharmony_ci if ( (sbits64) zSig < 0 ) { 198062306a36Sopenharmony_ci zSig = aSig + bSig; 198162306a36Sopenharmony_ci ++zExp; 198262306a36Sopenharmony_ci } 198362306a36Sopenharmony_ci roundAndPack: 198462306a36Sopenharmony_ci return roundAndPackFloat64( roundData, zSign, zExp, zSig ); 198562306a36Sopenharmony_ci 198662306a36Sopenharmony_ci} 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_ci/* 198962306a36Sopenharmony_ci------------------------------------------------------------------------------- 199062306a36Sopenharmony_ciReturns the result of subtracting the absolute values of the double- 199162306a36Sopenharmony_ciprecision floating-point values `a' and `b'. If `zSign' is true, the 199262306a36Sopenharmony_cidifference is negated before being returned. `zSign' is ignored if the 199362306a36Sopenharmony_ciresult is a NaN. The subtraction is performed according to the IEC/IEEE 199462306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic. 199562306a36Sopenharmony_ci------------------------------------------------------------------------------- 199662306a36Sopenharmony_ci*/ 199762306a36Sopenharmony_cistatic float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign ) 199862306a36Sopenharmony_ci{ 199962306a36Sopenharmony_ci int16 aExp, bExp, zExp; 200062306a36Sopenharmony_ci bits64 aSig, bSig, zSig; 200162306a36Sopenharmony_ci int16 expDiff; 200262306a36Sopenharmony_ci 200362306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 200462306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 200562306a36Sopenharmony_ci bSig = extractFloat64Frac( b ); 200662306a36Sopenharmony_ci bExp = extractFloat64Exp( b ); 200762306a36Sopenharmony_ci expDiff = aExp - bExp; 200862306a36Sopenharmony_ci aSig <<= 10; 200962306a36Sopenharmony_ci bSig <<= 10; 201062306a36Sopenharmony_ci if ( 0 < expDiff ) goto aExpBigger; 201162306a36Sopenharmony_ci if ( expDiff < 0 ) goto bExpBigger; 201262306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 201362306a36Sopenharmony_ci if ( aSig | bSig ) return propagateFloat64NaN( a, b ); 201462306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 201562306a36Sopenharmony_ci return float64_default_nan; 201662306a36Sopenharmony_ci } 201762306a36Sopenharmony_ci if ( aExp == 0 ) { 201862306a36Sopenharmony_ci aExp = 1; 201962306a36Sopenharmony_ci bExp = 1; 202062306a36Sopenharmony_ci } 202162306a36Sopenharmony_ci if ( bSig < aSig ) goto aBigger; 202262306a36Sopenharmony_ci if ( aSig < bSig ) goto bBigger; 202362306a36Sopenharmony_ci return packFloat64( roundData->mode == float_round_down, 0, 0 ); 202462306a36Sopenharmony_ci bExpBigger: 202562306a36Sopenharmony_ci if ( bExp == 0x7FF ) { 202662306a36Sopenharmony_ci if ( bSig ) return propagateFloat64NaN( a, b ); 202762306a36Sopenharmony_ci return packFloat64( zSign ^ 1, 0x7FF, 0 ); 202862306a36Sopenharmony_ci } 202962306a36Sopenharmony_ci if ( aExp == 0 ) { 203062306a36Sopenharmony_ci ++expDiff; 203162306a36Sopenharmony_ci } 203262306a36Sopenharmony_ci else { 203362306a36Sopenharmony_ci aSig |= LIT64( 0x4000000000000000 ); 203462306a36Sopenharmony_ci } 203562306a36Sopenharmony_ci shift64RightJamming( aSig, - expDiff, &aSig ); 203662306a36Sopenharmony_ci bSig |= LIT64( 0x4000000000000000 ); 203762306a36Sopenharmony_ci bBigger: 203862306a36Sopenharmony_ci zSig = bSig - aSig; 203962306a36Sopenharmony_ci zExp = bExp; 204062306a36Sopenharmony_ci zSign ^= 1; 204162306a36Sopenharmony_ci goto normalizeRoundAndPack; 204262306a36Sopenharmony_ci aExpBigger: 204362306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 204462306a36Sopenharmony_ci if ( aSig ) return propagateFloat64NaN( a, b ); 204562306a36Sopenharmony_ci return a; 204662306a36Sopenharmony_ci } 204762306a36Sopenharmony_ci if ( bExp == 0 ) { 204862306a36Sopenharmony_ci --expDiff; 204962306a36Sopenharmony_ci } 205062306a36Sopenharmony_ci else { 205162306a36Sopenharmony_ci bSig |= LIT64( 0x4000000000000000 ); 205262306a36Sopenharmony_ci } 205362306a36Sopenharmony_ci shift64RightJamming( bSig, expDiff, &bSig ); 205462306a36Sopenharmony_ci aSig |= LIT64( 0x4000000000000000 ); 205562306a36Sopenharmony_ci aBigger: 205662306a36Sopenharmony_ci zSig = aSig - bSig; 205762306a36Sopenharmony_ci zExp = aExp; 205862306a36Sopenharmony_ci normalizeRoundAndPack: 205962306a36Sopenharmony_ci --zExp; 206062306a36Sopenharmony_ci return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig ); 206162306a36Sopenharmony_ci 206262306a36Sopenharmony_ci} 206362306a36Sopenharmony_ci 206462306a36Sopenharmony_ci/* 206562306a36Sopenharmony_ci------------------------------------------------------------------------------- 206662306a36Sopenharmony_ciReturns the result of adding the double-precision floating-point values `a' 206762306a36Sopenharmony_ciand `b'. The operation is performed according to the IEC/IEEE Standard for 206862306a36Sopenharmony_ciBinary Floating-point Arithmetic. 206962306a36Sopenharmony_ci------------------------------------------------------------------------------- 207062306a36Sopenharmony_ci*/ 207162306a36Sopenharmony_cifloat64 float64_add( struct roundingData *roundData, float64 a, float64 b ) 207262306a36Sopenharmony_ci{ 207362306a36Sopenharmony_ci flag aSign, bSign; 207462306a36Sopenharmony_ci 207562306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 207662306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 207762306a36Sopenharmony_ci if ( aSign == bSign ) { 207862306a36Sopenharmony_ci return addFloat64Sigs( roundData, a, b, aSign ); 207962306a36Sopenharmony_ci } 208062306a36Sopenharmony_ci else { 208162306a36Sopenharmony_ci return subFloat64Sigs( roundData, a, b, aSign ); 208262306a36Sopenharmony_ci } 208362306a36Sopenharmony_ci 208462306a36Sopenharmony_ci} 208562306a36Sopenharmony_ci 208662306a36Sopenharmony_ci/* 208762306a36Sopenharmony_ci------------------------------------------------------------------------------- 208862306a36Sopenharmony_ciReturns the result of subtracting the double-precision floating-point values 208962306a36Sopenharmony_ci`a' and `b'. The operation is performed according to the IEC/IEEE Standard 209062306a36Sopenharmony_cifor Binary Floating-point Arithmetic. 209162306a36Sopenharmony_ci------------------------------------------------------------------------------- 209262306a36Sopenharmony_ci*/ 209362306a36Sopenharmony_cifloat64 float64_sub( struct roundingData *roundData, float64 a, float64 b ) 209462306a36Sopenharmony_ci{ 209562306a36Sopenharmony_ci flag aSign, bSign; 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 209862306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 209962306a36Sopenharmony_ci if ( aSign == bSign ) { 210062306a36Sopenharmony_ci return subFloat64Sigs( roundData, a, b, aSign ); 210162306a36Sopenharmony_ci } 210262306a36Sopenharmony_ci else { 210362306a36Sopenharmony_ci return addFloat64Sigs( roundData, a, b, aSign ); 210462306a36Sopenharmony_ci } 210562306a36Sopenharmony_ci 210662306a36Sopenharmony_ci} 210762306a36Sopenharmony_ci 210862306a36Sopenharmony_ci/* 210962306a36Sopenharmony_ci------------------------------------------------------------------------------- 211062306a36Sopenharmony_ciReturns the result of multiplying the double-precision floating-point values 211162306a36Sopenharmony_ci`a' and `b'. The operation is performed according to the IEC/IEEE Standard 211262306a36Sopenharmony_cifor Binary Floating-point Arithmetic. 211362306a36Sopenharmony_ci------------------------------------------------------------------------------- 211462306a36Sopenharmony_ci*/ 211562306a36Sopenharmony_cifloat64 float64_mul( struct roundingData *roundData, float64 a, float64 b ) 211662306a36Sopenharmony_ci{ 211762306a36Sopenharmony_ci flag aSign, bSign, zSign; 211862306a36Sopenharmony_ci int16 aExp, bExp, zExp; 211962306a36Sopenharmony_ci bits64 aSig, bSig, zSig0, zSig1; 212062306a36Sopenharmony_ci 212162306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 212262306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 212362306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 212462306a36Sopenharmony_ci bSig = extractFloat64Frac( b ); 212562306a36Sopenharmony_ci bExp = extractFloat64Exp( b ); 212662306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 212762306a36Sopenharmony_ci zSign = aSign ^ bSign; 212862306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 212962306a36Sopenharmony_ci if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 213062306a36Sopenharmony_ci return propagateFloat64NaN( a, b ); 213162306a36Sopenharmony_ci } 213262306a36Sopenharmony_ci if ( ( bExp | bSig ) == 0 ) { 213362306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 213462306a36Sopenharmony_ci return float64_default_nan; 213562306a36Sopenharmony_ci } 213662306a36Sopenharmony_ci return packFloat64( zSign, 0x7FF, 0 ); 213762306a36Sopenharmony_ci } 213862306a36Sopenharmony_ci if ( bExp == 0x7FF ) { 213962306a36Sopenharmony_ci if ( bSig ) return propagateFloat64NaN( a, b ); 214062306a36Sopenharmony_ci if ( ( aExp | aSig ) == 0 ) { 214162306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 214262306a36Sopenharmony_ci return float64_default_nan; 214362306a36Sopenharmony_ci } 214462306a36Sopenharmony_ci return packFloat64( zSign, 0x7FF, 0 ); 214562306a36Sopenharmony_ci } 214662306a36Sopenharmony_ci if ( aExp == 0 ) { 214762306a36Sopenharmony_ci if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 214862306a36Sopenharmony_ci normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 214962306a36Sopenharmony_ci } 215062306a36Sopenharmony_ci if ( bExp == 0 ) { 215162306a36Sopenharmony_ci if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); 215262306a36Sopenharmony_ci normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 215362306a36Sopenharmony_ci } 215462306a36Sopenharmony_ci zExp = aExp + bExp - 0x3FF; 215562306a36Sopenharmony_ci aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 215662306a36Sopenharmony_ci bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 215762306a36Sopenharmony_ci mul64To128( aSig, bSig, &zSig0, &zSig1 ); 215862306a36Sopenharmony_ci zSig0 |= ( zSig1 != 0 ); 215962306a36Sopenharmony_ci if ( 0 <= (sbits64) ( zSig0<<1 ) ) { 216062306a36Sopenharmony_ci zSig0 <<= 1; 216162306a36Sopenharmony_ci --zExp; 216262306a36Sopenharmony_ci } 216362306a36Sopenharmony_ci return roundAndPackFloat64( roundData, zSign, zExp, zSig0 ); 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ci} 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci/* 216862306a36Sopenharmony_ci------------------------------------------------------------------------------- 216962306a36Sopenharmony_ciReturns the result of dividing the double-precision floating-point value `a' 217062306a36Sopenharmony_ciby the corresponding value `b'. The operation is performed according to 217162306a36Sopenharmony_cithe IEC/IEEE Standard for Binary Floating-point Arithmetic. 217262306a36Sopenharmony_ci------------------------------------------------------------------------------- 217362306a36Sopenharmony_ci*/ 217462306a36Sopenharmony_cifloat64 float64_div( struct roundingData *roundData, float64 a, float64 b ) 217562306a36Sopenharmony_ci{ 217662306a36Sopenharmony_ci flag aSign, bSign, zSign; 217762306a36Sopenharmony_ci int16 aExp, bExp, zExp; 217862306a36Sopenharmony_ci bits64 aSig, bSig, zSig; 217962306a36Sopenharmony_ci bits64 rem0, rem1; 218062306a36Sopenharmony_ci bits64 term0, term1; 218162306a36Sopenharmony_ci 218262306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 218362306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 218462306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 218562306a36Sopenharmony_ci bSig = extractFloat64Frac( b ); 218662306a36Sopenharmony_ci bExp = extractFloat64Exp( b ); 218762306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 218862306a36Sopenharmony_ci zSign = aSign ^ bSign; 218962306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 219062306a36Sopenharmony_ci if ( aSig ) return propagateFloat64NaN( a, b ); 219162306a36Sopenharmony_ci if ( bExp == 0x7FF ) { 219262306a36Sopenharmony_ci if ( bSig ) return propagateFloat64NaN( a, b ); 219362306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 219462306a36Sopenharmony_ci return float64_default_nan; 219562306a36Sopenharmony_ci } 219662306a36Sopenharmony_ci return packFloat64( zSign, 0x7FF, 0 ); 219762306a36Sopenharmony_ci } 219862306a36Sopenharmony_ci if ( bExp == 0x7FF ) { 219962306a36Sopenharmony_ci if ( bSig ) return propagateFloat64NaN( a, b ); 220062306a36Sopenharmony_ci return packFloat64( zSign, 0, 0 ); 220162306a36Sopenharmony_ci } 220262306a36Sopenharmony_ci if ( bExp == 0 ) { 220362306a36Sopenharmony_ci if ( bSig == 0 ) { 220462306a36Sopenharmony_ci if ( ( aExp | aSig ) == 0 ) { 220562306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 220662306a36Sopenharmony_ci return float64_default_nan; 220762306a36Sopenharmony_ci } 220862306a36Sopenharmony_ci roundData->exception |= float_flag_divbyzero; 220962306a36Sopenharmony_ci return packFloat64( zSign, 0x7FF, 0 ); 221062306a36Sopenharmony_ci } 221162306a36Sopenharmony_ci normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 221262306a36Sopenharmony_ci } 221362306a36Sopenharmony_ci if ( aExp == 0 ) { 221462306a36Sopenharmony_ci if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 221562306a36Sopenharmony_ci normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 221662306a36Sopenharmony_ci } 221762306a36Sopenharmony_ci zExp = aExp - bExp + 0x3FD; 221862306a36Sopenharmony_ci aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 221962306a36Sopenharmony_ci bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 222062306a36Sopenharmony_ci if ( bSig <= ( aSig + aSig ) ) { 222162306a36Sopenharmony_ci aSig >>= 1; 222262306a36Sopenharmony_ci ++zExp; 222362306a36Sopenharmony_ci } 222462306a36Sopenharmony_ci zSig = estimateDiv128To64( aSig, 0, bSig ); 222562306a36Sopenharmony_ci if ( ( zSig & 0x1FF ) <= 2 ) { 222662306a36Sopenharmony_ci mul64To128( bSig, zSig, &term0, &term1 ); 222762306a36Sopenharmony_ci sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 222862306a36Sopenharmony_ci while ( (sbits64) rem0 < 0 ) { 222962306a36Sopenharmony_ci --zSig; 223062306a36Sopenharmony_ci add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 223162306a36Sopenharmony_ci } 223262306a36Sopenharmony_ci zSig |= ( rem1 != 0 ); 223362306a36Sopenharmony_ci } 223462306a36Sopenharmony_ci return roundAndPackFloat64( roundData, zSign, zExp, zSig ); 223562306a36Sopenharmony_ci 223662306a36Sopenharmony_ci} 223762306a36Sopenharmony_ci 223862306a36Sopenharmony_ci/* 223962306a36Sopenharmony_ci------------------------------------------------------------------------------- 224062306a36Sopenharmony_ciReturns the remainder of the double-precision floating-point value `a' 224162306a36Sopenharmony_ciwith respect to the corresponding value `b'. The operation is performed 224262306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 224362306a36Sopenharmony_ci------------------------------------------------------------------------------- 224462306a36Sopenharmony_ci*/ 224562306a36Sopenharmony_cifloat64 float64_rem( struct roundingData *roundData, float64 a, float64 b ) 224662306a36Sopenharmony_ci{ 224762306a36Sopenharmony_ci flag aSign, bSign, zSign; 224862306a36Sopenharmony_ci int16 aExp, bExp, expDiff; 224962306a36Sopenharmony_ci bits64 aSig, bSig; 225062306a36Sopenharmony_ci bits64 q, alternateASig; 225162306a36Sopenharmony_ci sbits64 sigMean; 225262306a36Sopenharmony_ci 225362306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 225462306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 225562306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 225662306a36Sopenharmony_ci bSig = extractFloat64Frac( b ); 225762306a36Sopenharmony_ci bExp = extractFloat64Exp( b ); 225862306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 225962306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 226062306a36Sopenharmony_ci if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 226162306a36Sopenharmony_ci return propagateFloat64NaN( a, b ); 226262306a36Sopenharmony_ci } 226362306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 226462306a36Sopenharmony_ci return float64_default_nan; 226562306a36Sopenharmony_ci } 226662306a36Sopenharmony_ci if ( bExp == 0x7FF ) { 226762306a36Sopenharmony_ci if ( bSig ) return propagateFloat64NaN( a, b ); 226862306a36Sopenharmony_ci return a; 226962306a36Sopenharmony_ci } 227062306a36Sopenharmony_ci if ( bExp == 0 ) { 227162306a36Sopenharmony_ci if ( bSig == 0 ) { 227262306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 227362306a36Sopenharmony_ci return float64_default_nan; 227462306a36Sopenharmony_ci } 227562306a36Sopenharmony_ci normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 227662306a36Sopenharmony_ci } 227762306a36Sopenharmony_ci if ( aExp == 0 ) { 227862306a36Sopenharmony_ci if ( aSig == 0 ) return a; 227962306a36Sopenharmony_ci normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 228062306a36Sopenharmony_ci } 228162306a36Sopenharmony_ci expDiff = aExp - bExp; 228262306a36Sopenharmony_ci aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; 228362306a36Sopenharmony_ci bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 228462306a36Sopenharmony_ci if ( expDiff < 0 ) { 228562306a36Sopenharmony_ci if ( expDiff < -1 ) return a; 228662306a36Sopenharmony_ci aSig >>= 1; 228762306a36Sopenharmony_ci } 228862306a36Sopenharmony_ci q = ( bSig <= aSig ); 228962306a36Sopenharmony_ci if ( q ) aSig -= bSig; 229062306a36Sopenharmony_ci expDiff -= 64; 229162306a36Sopenharmony_ci while ( 0 < expDiff ) { 229262306a36Sopenharmony_ci q = estimateDiv128To64( aSig, 0, bSig ); 229362306a36Sopenharmony_ci q = ( 2 < q ) ? q - 2 : 0; 229462306a36Sopenharmony_ci aSig = - ( ( bSig>>2 ) * q ); 229562306a36Sopenharmony_ci expDiff -= 62; 229662306a36Sopenharmony_ci } 229762306a36Sopenharmony_ci expDiff += 64; 229862306a36Sopenharmony_ci if ( 0 < expDiff ) { 229962306a36Sopenharmony_ci q = estimateDiv128To64( aSig, 0, bSig ); 230062306a36Sopenharmony_ci q = ( 2 < q ) ? q - 2 : 0; 230162306a36Sopenharmony_ci q >>= 64 - expDiff; 230262306a36Sopenharmony_ci bSig >>= 2; 230362306a36Sopenharmony_ci aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 230462306a36Sopenharmony_ci } 230562306a36Sopenharmony_ci else { 230662306a36Sopenharmony_ci aSig >>= 2; 230762306a36Sopenharmony_ci bSig >>= 2; 230862306a36Sopenharmony_ci } 230962306a36Sopenharmony_ci do { 231062306a36Sopenharmony_ci alternateASig = aSig; 231162306a36Sopenharmony_ci ++q; 231262306a36Sopenharmony_ci aSig -= bSig; 231362306a36Sopenharmony_ci } while ( 0 <= (sbits64) aSig ); 231462306a36Sopenharmony_ci sigMean = aSig + alternateASig; 231562306a36Sopenharmony_ci if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 231662306a36Sopenharmony_ci aSig = alternateASig; 231762306a36Sopenharmony_ci } 231862306a36Sopenharmony_ci zSign = ( (sbits64) aSig < 0 ); 231962306a36Sopenharmony_ci if ( zSign ) aSig = - aSig; 232062306a36Sopenharmony_ci return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig ); 232162306a36Sopenharmony_ci 232262306a36Sopenharmony_ci} 232362306a36Sopenharmony_ci 232462306a36Sopenharmony_ci/* 232562306a36Sopenharmony_ci------------------------------------------------------------------------------- 232662306a36Sopenharmony_ciReturns the square root of the double-precision floating-point value `a'. 232762306a36Sopenharmony_ciThe operation is performed according to the IEC/IEEE Standard for Binary 232862306a36Sopenharmony_ciFloating-point Arithmetic. 232962306a36Sopenharmony_ci------------------------------------------------------------------------------- 233062306a36Sopenharmony_ci*/ 233162306a36Sopenharmony_cifloat64 float64_sqrt( struct roundingData *roundData, float64 a ) 233262306a36Sopenharmony_ci{ 233362306a36Sopenharmony_ci flag aSign; 233462306a36Sopenharmony_ci int16 aExp, zExp; 233562306a36Sopenharmony_ci bits64 aSig, zSig; 233662306a36Sopenharmony_ci bits64 rem0, rem1, term0, term1; //, shiftedRem; 233762306a36Sopenharmony_ci //float64 z; 233862306a36Sopenharmony_ci 233962306a36Sopenharmony_ci aSig = extractFloat64Frac( a ); 234062306a36Sopenharmony_ci aExp = extractFloat64Exp( a ); 234162306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 234262306a36Sopenharmony_ci if ( aExp == 0x7FF ) { 234362306a36Sopenharmony_ci if ( aSig ) return propagateFloat64NaN( a, a ); 234462306a36Sopenharmony_ci if ( ! aSign ) return a; 234562306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 234662306a36Sopenharmony_ci return float64_default_nan; 234762306a36Sopenharmony_ci } 234862306a36Sopenharmony_ci if ( aSign ) { 234962306a36Sopenharmony_ci if ( ( aExp | aSig ) == 0 ) return a; 235062306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 235162306a36Sopenharmony_ci return float64_default_nan; 235262306a36Sopenharmony_ci } 235362306a36Sopenharmony_ci if ( aExp == 0 ) { 235462306a36Sopenharmony_ci if ( aSig == 0 ) return 0; 235562306a36Sopenharmony_ci normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 235662306a36Sopenharmony_ci } 235762306a36Sopenharmony_ci zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 235862306a36Sopenharmony_ci aSig |= LIT64( 0x0010000000000000 ); 235962306a36Sopenharmony_ci zSig = estimateSqrt32( aExp, aSig>>21 ); 236062306a36Sopenharmony_ci zSig <<= 31; 236162306a36Sopenharmony_ci aSig <<= 9 - ( aExp & 1 ); 236262306a36Sopenharmony_ci zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2; 236362306a36Sopenharmony_ci if ( ( zSig & 0x3FF ) <= 5 ) { 236462306a36Sopenharmony_ci if ( zSig < 2 ) { 236562306a36Sopenharmony_ci zSig = LIT64( 0xFFFFFFFFFFFFFFFF ); 236662306a36Sopenharmony_ci } 236762306a36Sopenharmony_ci else { 236862306a36Sopenharmony_ci aSig <<= 2; 236962306a36Sopenharmony_ci mul64To128( zSig, zSig, &term0, &term1 ); 237062306a36Sopenharmony_ci sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 237162306a36Sopenharmony_ci while ( (sbits64) rem0 < 0 ) { 237262306a36Sopenharmony_ci --zSig; 237362306a36Sopenharmony_ci shortShift128Left( 0, zSig, 1, &term0, &term1 ); 237462306a36Sopenharmony_ci term1 |= 1; 237562306a36Sopenharmony_ci add128( rem0, rem1, term0, term1, &rem0, &rem1 ); 237662306a36Sopenharmony_ci } 237762306a36Sopenharmony_ci zSig |= ( ( rem0 | rem1 ) != 0 ); 237862306a36Sopenharmony_ci } 237962306a36Sopenharmony_ci } 238062306a36Sopenharmony_ci shift64RightJamming( zSig, 1, &zSig ); 238162306a36Sopenharmony_ci return roundAndPackFloat64( roundData, 0, zExp, zSig ); 238262306a36Sopenharmony_ci 238362306a36Sopenharmony_ci} 238462306a36Sopenharmony_ci 238562306a36Sopenharmony_ci/* 238662306a36Sopenharmony_ci------------------------------------------------------------------------------- 238762306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is equal to the 238862306a36Sopenharmony_cicorresponding value `b', and 0 otherwise. The comparison is performed 238962306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 239062306a36Sopenharmony_ci------------------------------------------------------------------------------- 239162306a36Sopenharmony_ci*/ 239262306a36Sopenharmony_ciflag float64_eq( float64 a, float64 b ) 239362306a36Sopenharmony_ci{ 239462306a36Sopenharmony_ci 239562306a36Sopenharmony_ci if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 239662306a36Sopenharmony_ci || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 239762306a36Sopenharmony_ci ) { 239862306a36Sopenharmony_ci if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 239962306a36Sopenharmony_ci float_raise( float_flag_invalid ); 240062306a36Sopenharmony_ci } 240162306a36Sopenharmony_ci return 0; 240262306a36Sopenharmony_ci } 240362306a36Sopenharmony_ci return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci} 240662306a36Sopenharmony_ci 240762306a36Sopenharmony_ci/* 240862306a36Sopenharmony_ci------------------------------------------------------------------------------- 240962306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is less than or 241062306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise. The comparison is 241162306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 241262306a36Sopenharmony_ciArithmetic. 241362306a36Sopenharmony_ci------------------------------------------------------------------------------- 241462306a36Sopenharmony_ci*/ 241562306a36Sopenharmony_ciflag float64_le( float64 a, float64 b ) 241662306a36Sopenharmony_ci{ 241762306a36Sopenharmony_ci flag aSign, bSign; 241862306a36Sopenharmony_ci 241962306a36Sopenharmony_ci if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 242062306a36Sopenharmony_ci || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 242162306a36Sopenharmony_ci ) { 242262306a36Sopenharmony_ci float_raise( float_flag_invalid ); 242362306a36Sopenharmony_ci return 0; 242462306a36Sopenharmony_ci } 242562306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 242662306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 242762306a36Sopenharmony_ci if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 242862306a36Sopenharmony_ci return ( a == b ) || ( aSign ^ ( a < b ) ); 242962306a36Sopenharmony_ci 243062306a36Sopenharmony_ci} 243162306a36Sopenharmony_ci 243262306a36Sopenharmony_ci/* 243362306a36Sopenharmony_ci------------------------------------------------------------------------------- 243462306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is less than 243562306a36Sopenharmony_cithe corresponding value `b', and 0 otherwise. The comparison is performed 243662306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 243762306a36Sopenharmony_ci------------------------------------------------------------------------------- 243862306a36Sopenharmony_ci*/ 243962306a36Sopenharmony_ciflag float64_lt( float64 a, float64 b ) 244062306a36Sopenharmony_ci{ 244162306a36Sopenharmony_ci flag aSign, bSign; 244262306a36Sopenharmony_ci 244362306a36Sopenharmony_ci if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 244462306a36Sopenharmony_ci || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 244562306a36Sopenharmony_ci ) { 244662306a36Sopenharmony_ci float_raise( float_flag_invalid ); 244762306a36Sopenharmony_ci return 0; 244862306a36Sopenharmony_ci } 244962306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 245062306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 245162306a36Sopenharmony_ci if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 245262306a36Sopenharmony_ci return ( a != b ) && ( aSign ^ ( a < b ) ); 245362306a36Sopenharmony_ci 245462306a36Sopenharmony_ci} 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci/* 245762306a36Sopenharmony_ci------------------------------------------------------------------------------- 245862306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is equal to the 245962306a36Sopenharmony_cicorresponding value `b', and 0 otherwise. The invalid exception is raised 246062306a36Sopenharmony_ciif either operand is a NaN. Otherwise, the comparison is performed 246162306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 246262306a36Sopenharmony_ci------------------------------------------------------------------------------- 246362306a36Sopenharmony_ci*/ 246462306a36Sopenharmony_ciflag float64_eq_signaling( float64 a, float64 b ) 246562306a36Sopenharmony_ci{ 246662306a36Sopenharmony_ci 246762306a36Sopenharmony_ci if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 246862306a36Sopenharmony_ci || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 246962306a36Sopenharmony_ci ) { 247062306a36Sopenharmony_ci float_raise( float_flag_invalid ); 247162306a36Sopenharmony_ci return 0; 247262306a36Sopenharmony_ci } 247362306a36Sopenharmony_ci return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 247462306a36Sopenharmony_ci 247562306a36Sopenharmony_ci} 247662306a36Sopenharmony_ci 247762306a36Sopenharmony_ci/* 247862306a36Sopenharmony_ci------------------------------------------------------------------------------- 247962306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is less than or 248062306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 248162306a36Sopenharmony_cicause an exception. Otherwise, the comparison is performed according to the 248262306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic. 248362306a36Sopenharmony_ci------------------------------------------------------------------------------- 248462306a36Sopenharmony_ci*/ 248562306a36Sopenharmony_ciflag float64_le_quiet( float64 a, float64 b ) 248662306a36Sopenharmony_ci{ 248762306a36Sopenharmony_ci flag aSign, bSign; 248862306a36Sopenharmony_ci //int16 aExp, bExp; 248962306a36Sopenharmony_ci 249062306a36Sopenharmony_ci if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 249162306a36Sopenharmony_ci || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 249262306a36Sopenharmony_ci ) { 249362306a36Sopenharmony_ci /* Do nothing, even if NaN as we're quiet */ 249462306a36Sopenharmony_ci return 0; 249562306a36Sopenharmony_ci } 249662306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 249762306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 249862306a36Sopenharmony_ci if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 249962306a36Sopenharmony_ci return ( a == b ) || ( aSign ^ ( a < b ) ); 250062306a36Sopenharmony_ci 250162306a36Sopenharmony_ci} 250262306a36Sopenharmony_ci 250362306a36Sopenharmony_ci/* 250462306a36Sopenharmony_ci------------------------------------------------------------------------------- 250562306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is less than 250662306a36Sopenharmony_cithe corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 250762306a36Sopenharmony_ciexception. Otherwise, the comparison is performed according to the IEC/IEEE 250862306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic. 250962306a36Sopenharmony_ci------------------------------------------------------------------------------- 251062306a36Sopenharmony_ci*/ 251162306a36Sopenharmony_ciflag float64_lt_quiet( float64 a, float64 b ) 251262306a36Sopenharmony_ci{ 251362306a36Sopenharmony_ci flag aSign, bSign; 251462306a36Sopenharmony_ci 251562306a36Sopenharmony_ci if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 251662306a36Sopenharmony_ci || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 251762306a36Sopenharmony_ci ) { 251862306a36Sopenharmony_ci /* Do nothing, even if NaN as we're quiet */ 251962306a36Sopenharmony_ci return 0; 252062306a36Sopenharmony_ci } 252162306a36Sopenharmony_ci aSign = extractFloat64Sign( a ); 252262306a36Sopenharmony_ci bSign = extractFloat64Sign( b ); 252362306a36Sopenharmony_ci if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 252462306a36Sopenharmony_ci return ( a != b ) && ( aSign ^ ( a < b ) ); 252562306a36Sopenharmony_ci 252662306a36Sopenharmony_ci} 252762306a36Sopenharmony_ci 252862306a36Sopenharmony_ci#ifdef FLOATX80 252962306a36Sopenharmony_ci 253062306a36Sopenharmony_ci/* 253162306a36Sopenharmony_ci------------------------------------------------------------------------------- 253262306a36Sopenharmony_ciReturns the result of converting the extended double-precision floating- 253362306a36Sopenharmony_cipoint value `a' to the 32-bit two's complement integer format. The 253462306a36Sopenharmony_ciconversion is performed according to the IEC/IEEE Standard for Binary 253562306a36Sopenharmony_ciFloating-point Arithmetic---which means in particular that the conversion 253662306a36Sopenharmony_ciis rounded according to the current rounding mode. If `a' is a NaN, the 253762306a36Sopenharmony_cilargest positive integer is returned. Otherwise, if the conversion 253862306a36Sopenharmony_cioverflows, the largest integer with the same sign as `a' is returned. 253962306a36Sopenharmony_ci------------------------------------------------------------------------------- 254062306a36Sopenharmony_ci*/ 254162306a36Sopenharmony_ciint32 floatx80_to_int32( struct roundingData *roundData, floatx80 a ) 254262306a36Sopenharmony_ci{ 254362306a36Sopenharmony_ci flag aSign; 254462306a36Sopenharmony_ci int32 aExp, shiftCount; 254562306a36Sopenharmony_ci bits64 aSig; 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_ci aSig = extractFloatx80Frac( a ); 254862306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 254962306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 255062306a36Sopenharmony_ci if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 255162306a36Sopenharmony_ci shiftCount = 0x4037 - aExp; 255262306a36Sopenharmony_ci if ( shiftCount <= 0 ) shiftCount = 1; 255362306a36Sopenharmony_ci shift64RightJamming( aSig, shiftCount, &aSig ); 255462306a36Sopenharmony_ci return roundAndPackInt32( roundData, aSign, aSig ); 255562306a36Sopenharmony_ci 255662306a36Sopenharmony_ci} 255762306a36Sopenharmony_ci 255862306a36Sopenharmony_ci/* 255962306a36Sopenharmony_ci------------------------------------------------------------------------------- 256062306a36Sopenharmony_ciReturns the result of converting the extended double-precision floating- 256162306a36Sopenharmony_cipoint value `a' to the 32-bit two's complement integer format. The 256262306a36Sopenharmony_ciconversion is performed according to the IEC/IEEE Standard for Binary 256362306a36Sopenharmony_ciFloating-point Arithmetic, except that the conversion is always rounded 256462306a36Sopenharmony_citoward zero. If `a' is a NaN, the largest positive integer is returned. 256562306a36Sopenharmony_ciOtherwise, if the conversion overflows, the largest integer with the same 256662306a36Sopenharmony_cisign as `a' is returned. 256762306a36Sopenharmony_ci------------------------------------------------------------------------------- 256862306a36Sopenharmony_ci*/ 256962306a36Sopenharmony_ciint32 floatx80_to_int32_round_to_zero( floatx80 a ) 257062306a36Sopenharmony_ci{ 257162306a36Sopenharmony_ci flag aSign; 257262306a36Sopenharmony_ci int32 aExp, shiftCount; 257362306a36Sopenharmony_ci bits64 aSig, savedASig; 257462306a36Sopenharmony_ci int32 z; 257562306a36Sopenharmony_ci 257662306a36Sopenharmony_ci aSig = extractFloatx80Frac( a ); 257762306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 257862306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 257962306a36Sopenharmony_ci shiftCount = 0x403E - aExp; 258062306a36Sopenharmony_ci if ( shiftCount < 32 ) { 258162306a36Sopenharmony_ci if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 258262306a36Sopenharmony_ci goto invalid; 258362306a36Sopenharmony_ci } 258462306a36Sopenharmony_ci else if ( 63 < shiftCount ) { 258562306a36Sopenharmony_ci if ( aExp || aSig ) float_raise( float_flag_inexact ); 258662306a36Sopenharmony_ci return 0; 258762306a36Sopenharmony_ci } 258862306a36Sopenharmony_ci savedASig = aSig; 258962306a36Sopenharmony_ci aSig >>= shiftCount; 259062306a36Sopenharmony_ci z = aSig; 259162306a36Sopenharmony_ci if ( aSign ) z = - z; 259262306a36Sopenharmony_ci if ( ( z < 0 ) ^ aSign ) { 259362306a36Sopenharmony_ci invalid: 259462306a36Sopenharmony_ci float_raise( float_flag_invalid ); 259562306a36Sopenharmony_ci return aSign ? 0x80000000 : 0x7FFFFFFF; 259662306a36Sopenharmony_ci } 259762306a36Sopenharmony_ci if ( ( aSig<<shiftCount ) != savedASig ) { 259862306a36Sopenharmony_ci float_raise( float_flag_inexact ); 259962306a36Sopenharmony_ci } 260062306a36Sopenharmony_ci return z; 260162306a36Sopenharmony_ci 260262306a36Sopenharmony_ci} 260362306a36Sopenharmony_ci 260462306a36Sopenharmony_ci/* 260562306a36Sopenharmony_ci------------------------------------------------------------------------------- 260662306a36Sopenharmony_ciReturns the result of converting the extended double-precision floating- 260762306a36Sopenharmony_cipoint value `a' to the single-precision floating-point format. The 260862306a36Sopenharmony_ciconversion is performed according to the IEC/IEEE Standard for Binary 260962306a36Sopenharmony_ciFloating-point Arithmetic. 261062306a36Sopenharmony_ci------------------------------------------------------------------------------- 261162306a36Sopenharmony_ci*/ 261262306a36Sopenharmony_cifloat32 floatx80_to_float32( struct roundingData *roundData, floatx80 a ) 261362306a36Sopenharmony_ci{ 261462306a36Sopenharmony_ci flag aSign; 261562306a36Sopenharmony_ci int32 aExp; 261662306a36Sopenharmony_ci bits64 aSig; 261762306a36Sopenharmony_ci 261862306a36Sopenharmony_ci aSig = extractFloatx80Frac( a ); 261962306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 262062306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 262162306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 262262306a36Sopenharmony_ci if ( (bits64) ( aSig<<1 ) ) { 262362306a36Sopenharmony_ci return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); 262462306a36Sopenharmony_ci } 262562306a36Sopenharmony_ci return packFloat32( aSign, 0xFF, 0 ); 262662306a36Sopenharmony_ci } 262762306a36Sopenharmony_ci shift64RightJamming( aSig, 33, &aSig ); 262862306a36Sopenharmony_ci if ( aExp || aSig ) aExp -= 0x3F81; 262962306a36Sopenharmony_ci return roundAndPackFloat32( roundData, aSign, aExp, aSig ); 263062306a36Sopenharmony_ci 263162306a36Sopenharmony_ci} 263262306a36Sopenharmony_ci 263362306a36Sopenharmony_ci/* 263462306a36Sopenharmony_ci------------------------------------------------------------------------------- 263562306a36Sopenharmony_ciReturns the result of converting the extended double-precision floating- 263662306a36Sopenharmony_cipoint value `a' to the double-precision floating-point format. The 263762306a36Sopenharmony_ciconversion is performed according to the IEC/IEEE Standard for Binary 263862306a36Sopenharmony_ciFloating-point Arithmetic. 263962306a36Sopenharmony_ci------------------------------------------------------------------------------- 264062306a36Sopenharmony_ci*/ 264162306a36Sopenharmony_cifloat64 floatx80_to_float64( struct roundingData *roundData, floatx80 a ) 264262306a36Sopenharmony_ci{ 264362306a36Sopenharmony_ci flag aSign; 264462306a36Sopenharmony_ci int32 aExp; 264562306a36Sopenharmony_ci bits64 aSig, zSig; 264662306a36Sopenharmony_ci 264762306a36Sopenharmony_ci aSig = extractFloatx80Frac( a ); 264862306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 264962306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 265062306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 265162306a36Sopenharmony_ci if ( (bits64) ( aSig<<1 ) ) { 265262306a36Sopenharmony_ci return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); 265362306a36Sopenharmony_ci } 265462306a36Sopenharmony_ci return packFloat64( aSign, 0x7FF, 0 ); 265562306a36Sopenharmony_ci } 265662306a36Sopenharmony_ci shift64RightJamming( aSig, 1, &zSig ); 265762306a36Sopenharmony_ci if ( aExp || aSig ) aExp -= 0x3C01; 265862306a36Sopenharmony_ci return roundAndPackFloat64( roundData, aSign, aExp, zSig ); 265962306a36Sopenharmony_ci 266062306a36Sopenharmony_ci} 266162306a36Sopenharmony_ci 266262306a36Sopenharmony_ci/* 266362306a36Sopenharmony_ci------------------------------------------------------------------------------- 266462306a36Sopenharmony_ciRounds the extended double-precision floating-point value `a' to an integer, 266562306a36Sopenharmony_ciand returns the result as an extended quadruple-precision floating-point 266662306a36Sopenharmony_civalue. The operation is performed according to the IEC/IEEE Standard for 266762306a36Sopenharmony_ciBinary Floating-point Arithmetic. 266862306a36Sopenharmony_ci------------------------------------------------------------------------------- 266962306a36Sopenharmony_ci*/ 267062306a36Sopenharmony_cifloatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a ) 267162306a36Sopenharmony_ci{ 267262306a36Sopenharmony_ci flag aSign; 267362306a36Sopenharmony_ci int32 aExp; 267462306a36Sopenharmony_ci bits64 lastBitMask, roundBitsMask; 267562306a36Sopenharmony_ci int8 roundingMode; 267662306a36Sopenharmony_ci floatx80 z; 267762306a36Sopenharmony_ci 267862306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 267962306a36Sopenharmony_ci if ( 0x403E <= aExp ) { 268062306a36Sopenharmony_ci if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { 268162306a36Sopenharmony_ci return propagateFloatx80NaN( a, a ); 268262306a36Sopenharmony_ci } 268362306a36Sopenharmony_ci return a; 268462306a36Sopenharmony_ci } 268562306a36Sopenharmony_ci if ( aExp <= 0x3FFE ) { 268662306a36Sopenharmony_ci if ( ( aExp == 0 ) 268762306a36Sopenharmony_ci && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { 268862306a36Sopenharmony_ci return a; 268962306a36Sopenharmony_ci } 269062306a36Sopenharmony_ci roundData->exception |= float_flag_inexact; 269162306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 269262306a36Sopenharmony_ci switch ( roundData->mode ) { 269362306a36Sopenharmony_ci case float_round_nearest_even: 269462306a36Sopenharmony_ci if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) 269562306a36Sopenharmony_ci ) { 269662306a36Sopenharmony_ci return 269762306a36Sopenharmony_ci packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); 269862306a36Sopenharmony_ci } 269962306a36Sopenharmony_ci break; 270062306a36Sopenharmony_ci case float_round_down: 270162306a36Sopenharmony_ci return 270262306a36Sopenharmony_ci aSign ? 270362306a36Sopenharmony_ci packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) 270462306a36Sopenharmony_ci : packFloatx80( 0, 0, 0 ); 270562306a36Sopenharmony_ci case float_round_up: 270662306a36Sopenharmony_ci return 270762306a36Sopenharmony_ci aSign ? packFloatx80( 1, 0, 0 ) 270862306a36Sopenharmony_ci : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); 270962306a36Sopenharmony_ci } 271062306a36Sopenharmony_ci return packFloatx80( aSign, 0, 0 ); 271162306a36Sopenharmony_ci } 271262306a36Sopenharmony_ci lastBitMask = 1; 271362306a36Sopenharmony_ci lastBitMask <<= 0x403E - aExp; 271462306a36Sopenharmony_ci roundBitsMask = lastBitMask - 1; 271562306a36Sopenharmony_ci z = a; 271662306a36Sopenharmony_ci roundingMode = roundData->mode; 271762306a36Sopenharmony_ci if ( roundingMode == float_round_nearest_even ) { 271862306a36Sopenharmony_ci z.low += lastBitMask>>1; 271962306a36Sopenharmony_ci if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 272062306a36Sopenharmony_ci } 272162306a36Sopenharmony_ci else if ( roundingMode != float_round_to_zero ) { 272262306a36Sopenharmony_ci if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { 272362306a36Sopenharmony_ci z.low += roundBitsMask; 272462306a36Sopenharmony_ci } 272562306a36Sopenharmony_ci } 272662306a36Sopenharmony_ci z.low &= ~ roundBitsMask; 272762306a36Sopenharmony_ci if ( z.low == 0 ) { 272862306a36Sopenharmony_ci ++z.high; 272962306a36Sopenharmony_ci z.low = LIT64( 0x8000000000000000 ); 273062306a36Sopenharmony_ci } 273162306a36Sopenharmony_ci if ( z.low != a.low ) roundData->exception |= float_flag_inexact; 273262306a36Sopenharmony_ci return z; 273362306a36Sopenharmony_ci 273462306a36Sopenharmony_ci} 273562306a36Sopenharmony_ci 273662306a36Sopenharmony_ci/* 273762306a36Sopenharmony_ci------------------------------------------------------------------------------- 273862306a36Sopenharmony_ciReturns the result of adding the absolute values of the extended double- 273962306a36Sopenharmony_ciprecision floating-point values `a' and `b'. If `zSign' is true, the sum is 274062306a36Sopenharmony_cinegated before being returned. `zSign' is ignored if the result is a NaN. 274162306a36Sopenharmony_ciThe addition is performed according to the IEC/IEEE Standard for Binary 274262306a36Sopenharmony_ciFloating-point Arithmetic. 274362306a36Sopenharmony_ci------------------------------------------------------------------------------- 274462306a36Sopenharmony_ci*/ 274562306a36Sopenharmony_cistatic floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign ) 274662306a36Sopenharmony_ci{ 274762306a36Sopenharmony_ci int32 aExp, bExp, zExp; 274862306a36Sopenharmony_ci bits64 aSig, bSig, zSig0, zSig1; 274962306a36Sopenharmony_ci int32 expDiff; 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci aSig = extractFloatx80Frac( a ); 275262306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 275362306a36Sopenharmony_ci bSig = extractFloatx80Frac( b ); 275462306a36Sopenharmony_ci bExp = extractFloatx80Exp( b ); 275562306a36Sopenharmony_ci expDiff = aExp - bExp; 275662306a36Sopenharmony_ci if ( 0 < expDiff ) { 275762306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 275862306a36Sopenharmony_ci if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 275962306a36Sopenharmony_ci return a; 276062306a36Sopenharmony_ci } 276162306a36Sopenharmony_ci if ( bExp == 0 ) --expDiff; 276262306a36Sopenharmony_ci shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 276362306a36Sopenharmony_ci zExp = aExp; 276462306a36Sopenharmony_ci } 276562306a36Sopenharmony_ci else if ( expDiff < 0 ) { 276662306a36Sopenharmony_ci if ( bExp == 0x7FFF ) { 276762306a36Sopenharmony_ci if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 276862306a36Sopenharmony_ci return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 276962306a36Sopenharmony_ci } 277062306a36Sopenharmony_ci if ( aExp == 0 ) ++expDiff; 277162306a36Sopenharmony_ci shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 277262306a36Sopenharmony_ci zExp = bExp; 277362306a36Sopenharmony_ci } 277462306a36Sopenharmony_ci else { 277562306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 277662306a36Sopenharmony_ci if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 277762306a36Sopenharmony_ci return propagateFloatx80NaN( a, b ); 277862306a36Sopenharmony_ci } 277962306a36Sopenharmony_ci return a; 278062306a36Sopenharmony_ci } 278162306a36Sopenharmony_ci zSig1 = 0; 278262306a36Sopenharmony_ci zSig0 = aSig + bSig; 278362306a36Sopenharmony_ci if ( aExp == 0 ) { 278462306a36Sopenharmony_ci normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); 278562306a36Sopenharmony_ci goto roundAndPack; 278662306a36Sopenharmony_ci } 278762306a36Sopenharmony_ci zExp = aExp; 278862306a36Sopenharmony_ci goto shiftRight1; 278962306a36Sopenharmony_ci } 279062306a36Sopenharmony_ci 279162306a36Sopenharmony_ci zSig0 = aSig + bSig; 279262306a36Sopenharmony_ci 279362306a36Sopenharmony_ci if ( (sbits64) zSig0 < 0 ) goto roundAndPack; 279462306a36Sopenharmony_ci shiftRight1: 279562306a36Sopenharmony_ci shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); 279662306a36Sopenharmony_ci zSig0 |= LIT64( 0x8000000000000000 ); 279762306a36Sopenharmony_ci ++zExp; 279862306a36Sopenharmony_ci roundAndPack: 279962306a36Sopenharmony_ci return 280062306a36Sopenharmony_ci roundAndPackFloatx80( 280162306a36Sopenharmony_ci roundData, zSign, zExp, zSig0, zSig1 ); 280262306a36Sopenharmony_ci 280362306a36Sopenharmony_ci} 280462306a36Sopenharmony_ci 280562306a36Sopenharmony_ci/* 280662306a36Sopenharmony_ci------------------------------------------------------------------------------- 280762306a36Sopenharmony_ciReturns the result of subtracting the absolute values of the extended 280862306a36Sopenharmony_cidouble-precision floating-point values `a' and `b'. If `zSign' is true, 280962306a36Sopenharmony_cithe difference is negated before being returned. `zSign' is ignored if the 281062306a36Sopenharmony_ciresult is a NaN. The subtraction is performed according to the IEC/IEEE 281162306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic. 281262306a36Sopenharmony_ci------------------------------------------------------------------------------- 281362306a36Sopenharmony_ci*/ 281462306a36Sopenharmony_cistatic floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign ) 281562306a36Sopenharmony_ci{ 281662306a36Sopenharmony_ci int32 aExp, bExp, zExp; 281762306a36Sopenharmony_ci bits64 aSig, bSig, zSig0, zSig1; 281862306a36Sopenharmony_ci int32 expDiff; 281962306a36Sopenharmony_ci floatx80 z; 282062306a36Sopenharmony_ci 282162306a36Sopenharmony_ci aSig = extractFloatx80Frac( a ); 282262306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 282362306a36Sopenharmony_ci bSig = extractFloatx80Frac( b ); 282462306a36Sopenharmony_ci bExp = extractFloatx80Exp( b ); 282562306a36Sopenharmony_ci expDiff = aExp - bExp; 282662306a36Sopenharmony_ci if ( 0 < expDiff ) goto aExpBigger; 282762306a36Sopenharmony_ci if ( expDiff < 0 ) goto bExpBigger; 282862306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 282962306a36Sopenharmony_ci if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 283062306a36Sopenharmony_ci return propagateFloatx80NaN( a, b ); 283162306a36Sopenharmony_ci } 283262306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 283362306a36Sopenharmony_ci z.low = floatx80_default_nan_low; 283462306a36Sopenharmony_ci z.high = floatx80_default_nan_high; 283562306a36Sopenharmony_ci z.__padding = 0; 283662306a36Sopenharmony_ci return z; 283762306a36Sopenharmony_ci } 283862306a36Sopenharmony_ci if ( aExp == 0 ) { 283962306a36Sopenharmony_ci aExp = 1; 284062306a36Sopenharmony_ci bExp = 1; 284162306a36Sopenharmony_ci } 284262306a36Sopenharmony_ci zSig1 = 0; 284362306a36Sopenharmony_ci if ( bSig < aSig ) goto aBigger; 284462306a36Sopenharmony_ci if ( aSig < bSig ) goto bBigger; 284562306a36Sopenharmony_ci return packFloatx80( roundData->mode == float_round_down, 0, 0 ); 284662306a36Sopenharmony_ci bExpBigger: 284762306a36Sopenharmony_ci if ( bExp == 0x7FFF ) { 284862306a36Sopenharmony_ci if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 284962306a36Sopenharmony_ci return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); 285062306a36Sopenharmony_ci } 285162306a36Sopenharmony_ci if ( aExp == 0 ) ++expDiff; 285262306a36Sopenharmony_ci shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 285362306a36Sopenharmony_ci bBigger: 285462306a36Sopenharmony_ci sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); 285562306a36Sopenharmony_ci zExp = bExp; 285662306a36Sopenharmony_ci zSign ^= 1; 285762306a36Sopenharmony_ci goto normalizeRoundAndPack; 285862306a36Sopenharmony_ci aExpBigger: 285962306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 286062306a36Sopenharmony_ci if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 286162306a36Sopenharmony_ci return a; 286262306a36Sopenharmony_ci } 286362306a36Sopenharmony_ci if ( bExp == 0 ) --expDiff; 286462306a36Sopenharmony_ci shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 286562306a36Sopenharmony_ci aBigger: 286662306a36Sopenharmony_ci sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); 286762306a36Sopenharmony_ci zExp = aExp; 286862306a36Sopenharmony_ci normalizeRoundAndPack: 286962306a36Sopenharmony_ci return 287062306a36Sopenharmony_ci normalizeRoundAndPackFloatx80( 287162306a36Sopenharmony_ci roundData, zSign, zExp, zSig0, zSig1 ); 287262306a36Sopenharmony_ci 287362306a36Sopenharmony_ci} 287462306a36Sopenharmony_ci 287562306a36Sopenharmony_ci/* 287662306a36Sopenharmony_ci------------------------------------------------------------------------------- 287762306a36Sopenharmony_ciReturns the result of adding the extended double-precision floating-point 287862306a36Sopenharmony_civalues `a' and `b'. The operation is performed according to the IEC/IEEE 287962306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic. 288062306a36Sopenharmony_ci------------------------------------------------------------------------------- 288162306a36Sopenharmony_ci*/ 288262306a36Sopenharmony_cifloatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b ) 288362306a36Sopenharmony_ci{ 288462306a36Sopenharmony_ci flag aSign, bSign; 288562306a36Sopenharmony_ci 288662306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 288762306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 288862306a36Sopenharmony_ci if ( aSign == bSign ) { 288962306a36Sopenharmony_ci return addFloatx80Sigs( roundData, a, b, aSign ); 289062306a36Sopenharmony_ci } 289162306a36Sopenharmony_ci else { 289262306a36Sopenharmony_ci return subFloatx80Sigs( roundData, a, b, aSign ); 289362306a36Sopenharmony_ci } 289462306a36Sopenharmony_ci 289562306a36Sopenharmony_ci} 289662306a36Sopenharmony_ci 289762306a36Sopenharmony_ci/* 289862306a36Sopenharmony_ci------------------------------------------------------------------------------- 289962306a36Sopenharmony_ciReturns the result of subtracting the extended double-precision floating- 290062306a36Sopenharmony_cipoint values `a' and `b'. The operation is performed according to the 290162306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic. 290262306a36Sopenharmony_ci------------------------------------------------------------------------------- 290362306a36Sopenharmony_ci*/ 290462306a36Sopenharmony_cifloatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b ) 290562306a36Sopenharmony_ci{ 290662306a36Sopenharmony_ci flag aSign, bSign; 290762306a36Sopenharmony_ci 290862306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 290962306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 291062306a36Sopenharmony_ci if ( aSign == bSign ) { 291162306a36Sopenharmony_ci return subFloatx80Sigs( roundData, a, b, aSign ); 291262306a36Sopenharmony_ci } 291362306a36Sopenharmony_ci else { 291462306a36Sopenharmony_ci return addFloatx80Sigs( roundData, a, b, aSign ); 291562306a36Sopenharmony_ci } 291662306a36Sopenharmony_ci 291762306a36Sopenharmony_ci} 291862306a36Sopenharmony_ci 291962306a36Sopenharmony_ci/* 292062306a36Sopenharmony_ci------------------------------------------------------------------------------- 292162306a36Sopenharmony_ciReturns the result of multiplying the extended double-precision floating- 292262306a36Sopenharmony_cipoint values `a' and `b'. The operation is performed according to the 292362306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic. 292462306a36Sopenharmony_ci------------------------------------------------------------------------------- 292562306a36Sopenharmony_ci*/ 292662306a36Sopenharmony_cifloatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b ) 292762306a36Sopenharmony_ci{ 292862306a36Sopenharmony_ci flag aSign, bSign, zSign; 292962306a36Sopenharmony_ci int32 aExp, bExp, zExp; 293062306a36Sopenharmony_ci bits64 aSig, bSig, zSig0, zSig1; 293162306a36Sopenharmony_ci floatx80 z; 293262306a36Sopenharmony_ci 293362306a36Sopenharmony_ci aSig = extractFloatx80Frac( a ); 293462306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 293562306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 293662306a36Sopenharmony_ci bSig = extractFloatx80Frac( b ); 293762306a36Sopenharmony_ci bExp = extractFloatx80Exp( b ); 293862306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 293962306a36Sopenharmony_ci zSign = aSign ^ bSign; 294062306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 294162306a36Sopenharmony_ci if ( (bits64) ( aSig<<1 ) 294262306a36Sopenharmony_ci || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 294362306a36Sopenharmony_ci return propagateFloatx80NaN( a, b ); 294462306a36Sopenharmony_ci } 294562306a36Sopenharmony_ci if ( ( bExp | bSig ) == 0 ) goto invalid; 294662306a36Sopenharmony_ci return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 294762306a36Sopenharmony_ci } 294862306a36Sopenharmony_ci if ( bExp == 0x7FFF ) { 294962306a36Sopenharmony_ci if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 295062306a36Sopenharmony_ci if ( ( aExp | aSig ) == 0 ) { 295162306a36Sopenharmony_ci invalid: 295262306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 295362306a36Sopenharmony_ci z.low = floatx80_default_nan_low; 295462306a36Sopenharmony_ci z.high = floatx80_default_nan_high; 295562306a36Sopenharmony_ci z.__padding = 0; 295662306a36Sopenharmony_ci return z; 295762306a36Sopenharmony_ci } 295862306a36Sopenharmony_ci return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 295962306a36Sopenharmony_ci } 296062306a36Sopenharmony_ci if ( aExp == 0 ) { 296162306a36Sopenharmony_ci if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 296262306a36Sopenharmony_ci normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 296362306a36Sopenharmony_ci } 296462306a36Sopenharmony_ci if ( bExp == 0 ) { 296562306a36Sopenharmony_ci if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); 296662306a36Sopenharmony_ci normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 296762306a36Sopenharmony_ci } 296862306a36Sopenharmony_ci zExp = aExp + bExp - 0x3FFE; 296962306a36Sopenharmony_ci mul64To128( aSig, bSig, &zSig0, &zSig1 ); 297062306a36Sopenharmony_ci if ( 0 < (sbits64) zSig0 ) { 297162306a36Sopenharmony_ci shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); 297262306a36Sopenharmony_ci --zExp; 297362306a36Sopenharmony_ci } 297462306a36Sopenharmony_ci return 297562306a36Sopenharmony_ci roundAndPackFloatx80( 297662306a36Sopenharmony_ci roundData, zSign, zExp, zSig0, zSig1 ); 297762306a36Sopenharmony_ci 297862306a36Sopenharmony_ci} 297962306a36Sopenharmony_ci 298062306a36Sopenharmony_ci/* 298162306a36Sopenharmony_ci------------------------------------------------------------------------------- 298262306a36Sopenharmony_ciReturns the result of dividing the extended double-precision floating-point 298362306a36Sopenharmony_civalue `a' by the corresponding value `b'. The operation is performed 298462306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 298562306a36Sopenharmony_ci------------------------------------------------------------------------------- 298662306a36Sopenharmony_ci*/ 298762306a36Sopenharmony_cifloatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b ) 298862306a36Sopenharmony_ci{ 298962306a36Sopenharmony_ci flag aSign, bSign, zSign; 299062306a36Sopenharmony_ci int32 aExp, bExp, zExp; 299162306a36Sopenharmony_ci bits64 aSig, bSig, zSig0, zSig1; 299262306a36Sopenharmony_ci bits64 rem0, rem1, rem2, term0, term1, term2; 299362306a36Sopenharmony_ci floatx80 z; 299462306a36Sopenharmony_ci 299562306a36Sopenharmony_ci aSig = extractFloatx80Frac( a ); 299662306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 299762306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 299862306a36Sopenharmony_ci bSig = extractFloatx80Frac( b ); 299962306a36Sopenharmony_ci bExp = extractFloatx80Exp( b ); 300062306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 300162306a36Sopenharmony_ci zSign = aSign ^ bSign; 300262306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 300362306a36Sopenharmony_ci if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 300462306a36Sopenharmony_ci if ( bExp == 0x7FFF ) { 300562306a36Sopenharmony_ci if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 300662306a36Sopenharmony_ci goto invalid; 300762306a36Sopenharmony_ci } 300862306a36Sopenharmony_ci return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 300962306a36Sopenharmony_ci } 301062306a36Sopenharmony_ci if ( bExp == 0x7FFF ) { 301162306a36Sopenharmony_ci if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 301262306a36Sopenharmony_ci return packFloatx80( zSign, 0, 0 ); 301362306a36Sopenharmony_ci } 301462306a36Sopenharmony_ci if ( bExp == 0 ) { 301562306a36Sopenharmony_ci if ( bSig == 0 ) { 301662306a36Sopenharmony_ci if ( ( aExp | aSig ) == 0 ) { 301762306a36Sopenharmony_ci invalid: 301862306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 301962306a36Sopenharmony_ci z.low = floatx80_default_nan_low; 302062306a36Sopenharmony_ci z.high = floatx80_default_nan_high; 302162306a36Sopenharmony_ci z.__padding = 0; 302262306a36Sopenharmony_ci return z; 302362306a36Sopenharmony_ci } 302462306a36Sopenharmony_ci roundData->exception |= float_flag_divbyzero; 302562306a36Sopenharmony_ci return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 302662306a36Sopenharmony_ci } 302762306a36Sopenharmony_ci normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 302862306a36Sopenharmony_ci } 302962306a36Sopenharmony_ci if ( aExp == 0 ) { 303062306a36Sopenharmony_ci if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 303162306a36Sopenharmony_ci normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 303262306a36Sopenharmony_ci } 303362306a36Sopenharmony_ci zExp = aExp - bExp + 0x3FFE; 303462306a36Sopenharmony_ci rem1 = 0; 303562306a36Sopenharmony_ci if ( bSig <= aSig ) { 303662306a36Sopenharmony_ci shift128Right( aSig, 0, 1, &aSig, &rem1 ); 303762306a36Sopenharmony_ci ++zExp; 303862306a36Sopenharmony_ci } 303962306a36Sopenharmony_ci zSig0 = estimateDiv128To64( aSig, rem1, bSig ); 304062306a36Sopenharmony_ci mul64To128( bSig, zSig0, &term0, &term1 ); 304162306a36Sopenharmony_ci sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); 304262306a36Sopenharmony_ci while ( (sbits64) rem0 < 0 ) { 304362306a36Sopenharmony_ci --zSig0; 304462306a36Sopenharmony_ci add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 304562306a36Sopenharmony_ci } 304662306a36Sopenharmony_ci zSig1 = estimateDiv128To64( rem1, 0, bSig ); 304762306a36Sopenharmony_ci if ( (bits64) ( zSig1<<1 ) <= 8 ) { 304862306a36Sopenharmony_ci mul64To128( bSig, zSig1, &term1, &term2 ); 304962306a36Sopenharmony_ci sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 305062306a36Sopenharmony_ci while ( (sbits64) rem1 < 0 ) { 305162306a36Sopenharmony_ci --zSig1; 305262306a36Sopenharmony_ci add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); 305362306a36Sopenharmony_ci } 305462306a36Sopenharmony_ci zSig1 |= ( ( rem1 | rem2 ) != 0 ); 305562306a36Sopenharmony_ci } 305662306a36Sopenharmony_ci return 305762306a36Sopenharmony_ci roundAndPackFloatx80( 305862306a36Sopenharmony_ci roundData, zSign, zExp, zSig0, zSig1 ); 305962306a36Sopenharmony_ci 306062306a36Sopenharmony_ci} 306162306a36Sopenharmony_ci 306262306a36Sopenharmony_ci/* 306362306a36Sopenharmony_ci------------------------------------------------------------------------------- 306462306a36Sopenharmony_ciReturns the remainder of the extended double-precision floating-point value 306562306a36Sopenharmony_ci`a' with respect to the corresponding value `b'. The operation is performed 306662306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 306762306a36Sopenharmony_ci------------------------------------------------------------------------------- 306862306a36Sopenharmony_ci*/ 306962306a36Sopenharmony_cifloatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b ) 307062306a36Sopenharmony_ci{ 307162306a36Sopenharmony_ci flag aSign, bSign, zSign; 307262306a36Sopenharmony_ci int32 aExp, bExp, expDiff; 307362306a36Sopenharmony_ci bits64 aSig0, aSig1, bSig; 307462306a36Sopenharmony_ci bits64 q, term0, term1, alternateASig0, alternateASig1; 307562306a36Sopenharmony_ci floatx80 z; 307662306a36Sopenharmony_ci 307762306a36Sopenharmony_ci aSig0 = extractFloatx80Frac( a ); 307862306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 307962306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 308062306a36Sopenharmony_ci bSig = extractFloatx80Frac( b ); 308162306a36Sopenharmony_ci bExp = extractFloatx80Exp( b ); 308262306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 308362306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 308462306a36Sopenharmony_ci if ( (bits64) ( aSig0<<1 ) 308562306a36Sopenharmony_ci || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 308662306a36Sopenharmony_ci return propagateFloatx80NaN( a, b ); 308762306a36Sopenharmony_ci } 308862306a36Sopenharmony_ci goto invalid; 308962306a36Sopenharmony_ci } 309062306a36Sopenharmony_ci if ( bExp == 0x7FFF ) { 309162306a36Sopenharmony_ci if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 309262306a36Sopenharmony_ci return a; 309362306a36Sopenharmony_ci } 309462306a36Sopenharmony_ci if ( bExp == 0 ) { 309562306a36Sopenharmony_ci if ( bSig == 0 ) { 309662306a36Sopenharmony_ci invalid: 309762306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 309862306a36Sopenharmony_ci z.low = floatx80_default_nan_low; 309962306a36Sopenharmony_ci z.high = floatx80_default_nan_high; 310062306a36Sopenharmony_ci z.__padding = 0; 310162306a36Sopenharmony_ci return z; 310262306a36Sopenharmony_ci } 310362306a36Sopenharmony_ci normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 310462306a36Sopenharmony_ci } 310562306a36Sopenharmony_ci if ( aExp == 0 ) { 310662306a36Sopenharmony_ci if ( (bits64) ( aSig0<<1 ) == 0 ) return a; 310762306a36Sopenharmony_ci normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 310862306a36Sopenharmony_ci } 310962306a36Sopenharmony_ci bSig |= LIT64( 0x8000000000000000 ); 311062306a36Sopenharmony_ci zSign = aSign; 311162306a36Sopenharmony_ci expDiff = aExp - bExp; 311262306a36Sopenharmony_ci aSig1 = 0; 311362306a36Sopenharmony_ci if ( expDiff < 0 ) { 311462306a36Sopenharmony_ci if ( expDiff < -1 ) return a; 311562306a36Sopenharmony_ci shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); 311662306a36Sopenharmony_ci expDiff = 0; 311762306a36Sopenharmony_ci } 311862306a36Sopenharmony_ci q = ( bSig <= aSig0 ); 311962306a36Sopenharmony_ci if ( q ) aSig0 -= bSig; 312062306a36Sopenharmony_ci expDiff -= 64; 312162306a36Sopenharmony_ci while ( 0 < expDiff ) { 312262306a36Sopenharmony_ci q = estimateDiv128To64( aSig0, aSig1, bSig ); 312362306a36Sopenharmony_ci q = ( 2 < q ) ? q - 2 : 0; 312462306a36Sopenharmony_ci mul64To128( bSig, q, &term0, &term1 ); 312562306a36Sopenharmony_ci sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 312662306a36Sopenharmony_ci shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); 312762306a36Sopenharmony_ci expDiff -= 62; 312862306a36Sopenharmony_ci } 312962306a36Sopenharmony_ci expDiff += 64; 313062306a36Sopenharmony_ci if ( 0 < expDiff ) { 313162306a36Sopenharmony_ci q = estimateDiv128To64( aSig0, aSig1, bSig ); 313262306a36Sopenharmony_ci q = ( 2 < q ) ? q - 2 : 0; 313362306a36Sopenharmony_ci q >>= 64 - expDiff; 313462306a36Sopenharmony_ci mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); 313562306a36Sopenharmony_ci sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 313662306a36Sopenharmony_ci shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); 313762306a36Sopenharmony_ci while ( le128( term0, term1, aSig0, aSig1 ) ) { 313862306a36Sopenharmony_ci ++q; 313962306a36Sopenharmony_ci sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 314062306a36Sopenharmony_ci } 314162306a36Sopenharmony_ci } 314262306a36Sopenharmony_ci else { 314362306a36Sopenharmony_ci term1 = 0; 314462306a36Sopenharmony_ci term0 = bSig; 314562306a36Sopenharmony_ci } 314662306a36Sopenharmony_ci sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); 314762306a36Sopenharmony_ci if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) 314862306a36Sopenharmony_ci || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) 314962306a36Sopenharmony_ci && ( q & 1 ) ) 315062306a36Sopenharmony_ci ) { 315162306a36Sopenharmony_ci aSig0 = alternateASig0; 315262306a36Sopenharmony_ci aSig1 = alternateASig1; 315362306a36Sopenharmony_ci zSign = ! zSign; 315462306a36Sopenharmony_ci } 315562306a36Sopenharmony_ci 315662306a36Sopenharmony_ci return 315762306a36Sopenharmony_ci normalizeRoundAndPackFloatx80( 315862306a36Sopenharmony_ci roundData, zSign, bExp + expDiff, aSig0, aSig1 ); 315962306a36Sopenharmony_ci 316062306a36Sopenharmony_ci} 316162306a36Sopenharmony_ci 316262306a36Sopenharmony_ci/* 316362306a36Sopenharmony_ci------------------------------------------------------------------------------- 316462306a36Sopenharmony_ciReturns the square root of the extended double-precision floating-point 316562306a36Sopenharmony_civalue `a'. The operation is performed according to the IEC/IEEE Standard 316662306a36Sopenharmony_cifor Binary Floating-point Arithmetic. 316762306a36Sopenharmony_ci------------------------------------------------------------------------------- 316862306a36Sopenharmony_ci*/ 316962306a36Sopenharmony_cifloatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a ) 317062306a36Sopenharmony_ci{ 317162306a36Sopenharmony_ci flag aSign; 317262306a36Sopenharmony_ci int32 aExp, zExp; 317362306a36Sopenharmony_ci bits64 aSig0, aSig1, zSig0, zSig1; 317462306a36Sopenharmony_ci bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 317562306a36Sopenharmony_ci bits64 shiftedRem0, shiftedRem1; 317662306a36Sopenharmony_ci floatx80 z; 317762306a36Sopenharmony_ci 317862306a36Sopenharmony_ci aSig0 = extractFloatx80Frac( a ); 317962306a36Sopenharmony_ci aExp = extractFloatx80Exp( a ); 318062306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 318162306a36Sopenharmony_ci if ( aExp == 0x7FFF ) { 318262306a36Sopenharmony_ci if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); 318362306a36Sopenharmony_ci if ( ! aSign ) return a; 318462306a36Sopenharmony_ci goto invalid; 318562306a36Sopenharmony_ci } 318662306a36Sopenharmony_ci if ( aSign ) { 318762306a36Sopenharmony_ci if ( ( aExp | aSig0 ) == 0 ) return a; 318862306a36Sopenharmony_ci invalid: 318962306a36Sopenharmony_ci roundData->exception |= float_flag_invalid; 319062306a36Sopenharmony_ci z.low = floatx80_default_nan_low; 319162306a36Sopenharmony_ci z.high = floatx80_default_nan_high; 319262306a36Sopenharmony_ci z.__padding = 0; 319362306a36Sopenharmony_ci return z; 319462306a36Sopenharmony_ci } 319562306a36Sopenharmony_ci if ( aExp == 0 ) { 319662306a36Sopenharmony_ci if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); 319762306a36Sopenharmony_ci normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 319862306a36Sopenharmony_ci } 319962306a36Sopenharmony_ci zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; 320062306a36Sopenharmony_ci zSig0 = estimateSqrt32( aExp, aSig0>>32 ); 320162306a36Sopenharmony_ci zSig0 <<= 31; 320262306a36Sopenharmony_ci aSig1 = 0; 320362306a36Sopenharmony_ci shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 ); 320462306a36Sopenharmony_ci zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4; 320562306a36Sopenharmony_ci if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF ); 320662306a36Sopenharmony_ci shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 ); 320762306a36Sopenharmony_ci mul64To128( zSig0, zSig0, &term0, &term1 ); 320862306a36Sopenharmony_ci sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 320962306a36Sopenharmony_ci while ( (sbits64) rem0 < 0 ) { 321062306a36Sopenharmony_ci --zSig0; 321162306a36Sopenharmony_ci shortShift128Left( 0, zSig0, 1, &term0, &term1 ); 321262306a36Sopenharmony_ci term1 |= 1; 321362306a36Sopenharmony_ci add128( rem0, rem1, term0, term1, &rem0, &rem1 ); 321462306a36Sopenharmony_ci } 321562306a36Sopenharmony_ci shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 ); 321662306a36Sopenharmony_ci zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 ); 321762306a36Sopenharmony_ci if ( (bits64) ( zSig1<<1 ) <= 10 ) { 321862306a36Sopenharmony_ci if ( zSig1 == 0 ) zSig1 = 1; 321962306a36Sopenharmony_ci mul64To128( zSig0, zSig1, &term1, &term2 ); 322062306a36Sopenharmony_ci shortShift128Left( term1, term2, 1, &term1, &term2 ); 322162306a36Sopenharmony_ci sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 322262306a36Sopenharmony_ci mul64To128( zSig1, zSig1, &term2, &term3 ); 322362306a36Sopenharmony_ci sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 322462306a36Sopenharmony_ci while ( (sbits64) rem1 < 0 ) { 322562306a36Sopenharmony_ci --zSig1; 322662306a36Sopenharmony_ci shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 ); 322762306a36Sopenharmony_ci term3 |= 1; 322862306a36Sopenharmony_ci add192( 322962306a36Sopenharmony_ci rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 ); 323062306a36Sopenharmony_ci } 323162306a36Sopenharmony_ci zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 323262306a36Sopenharmony_ci } 323362306a36Sopenharmony_ci return 323462306a36Sopenharmony_ci roundAndPackFloatx80( 323562306a36Sopenharmony_ci roundData, 0, zExp, zSig0, zSig1 ); 323662306a36Sopenharmony_ci 323762306a36Sopenharmony_ci} 323862306a36Sopenharmony_ci 323962306a36Sopenharmony_ci/* 324062306a36Sopenharmony_ci------------------------------------------------------------------------------- 324162306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is 324262306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise. The comparison is 324362306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point 324462306a36Sopenharmony_ciArithmetic. 324562306a36Sopenharmony_ci------------------------------------------------------------------------------- 324662306a36Sopenharmony_ci*/ 324762306a36Sopenharmony_ciflag floatx80_eq( floatx80 a, floatx80 b ) 324862306a36Sopenharmony_ci{ 324962306a36Sopenharmony_ci 325062306a36Sopenharmony_ci if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 325162306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 325262306a36Sopenharmony_ci || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 325362306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 325462306a36Sopenharmony_ci ) { 325562306a36Sopenharmony_ci if ( floatx80_is_signaling_nan( a ) 325662306a36Sopenharmony_ci || floatx80_is_signaling_nan( b ) ) { 325762306a36Sopenharmony_ci float_raise( float_flag_invalid ); 325862306a36Sopenharmony_ci } 325962306a36Sopenharmony_ci return 0; 326062306a36Sopenharmony_ci } 326162306a36Sopenharmony_ci return 326262306a36Sopenharmony_ci ( a.low == b.low ) 326362306a36Sopenharmony_ci && ( ( a.high == b.high ) 326462306a36Sopenharmony_ci || ( ( a.low == 0 ) 326562306a36Sopenharmony_ci && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) 326662306a36Sopenharmony_ci ); 326762306a36Sopenharmony_ci 326862306a36Sopenharmony_ci} 326962306a36Sopenharmony_ci 327062306a36Sopenharmony_ci/* 327162306a36Sopenharmony_ci------------------------------------------------------------------------------- 327262306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is 327362306a36Sopenharmony_ciless than or equal to the corresponding value `b', and 0 otherwise. The 327462306a36Sopenharmony_cicomparison is performed according to the IEC/IEEE Standard for Binary 327562306a36Sopenharmony_ciFloating-point Arithmetic. 327662306a36Sopenharmony_ci------------------------------------------------------------------------------- 327762306a36Sopenharmony_ci*/ 327862306a36Sopenharmony_ciflag floatx80_le( floatx80 a, floatx80 b ) 327962306a36Sopenharmony_ci{ 328062306a36Sopenharmony_ci flag aSign, bSign; 328162306a36Sopenharmony_ci 328262306a36Sopenharmony_ci if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 328362306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 328462306a36Sopenharmony_ci || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 328562306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 328662306a36Sopenharmony_ci ) { 328762306a36Sopenharmony_ci float_raise( float_flag_invalid ); 328862306a36Sopenharmony_ci return 0; 328962306a36Sopenharmony_ci } 329062306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 329162306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 329262306a36Sopenharmony_ci if ( aSign != bSign ) { 329362306a36Sopenharmony_ci return 329462306a36Sopenharmony_ci aSign 329562306a36Sopenharmony_ci || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 329662306a36Sopenharmony_ci == 0 ); 329762306a36Sopenharmony_ci } 329862306a36Sopenharmony_ci return 329962306a36Sopenharmony_ci aSign ? le128( b.high, b.low, a.high, a.low ) 330062306a36Sopenharmony_ci : le128( a.high, a.low, b.high, b.low ); 330162306a36Sopenharmony_ci 330262306a36Sopenharmony_ci} 330362306a36Sopenharmony_ci 330462306a36Sopenharmony_ci/* 330562306a36Sopenharmony_ci------------------------------------------------------------------------------- 330662306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is 330762306a36Sopenharmony_ciless than the corresponding value `b', and 0 otherwise. The comparison 330862306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point 330962306a36Sopenharmony_ciArithmetic. 331062306a36Sopenharmony_ci------------------------------------------------------------------------------- 331162306a36Sopenharmony_ci*/ 331262306a36Sopenharmony_ciflag floatx80_lt( floatx80 a, floatx80 b ) 331362306a36Sopenharmony_ci{ 331462306a36Sopenharmony_ci flag aSign, bSign; 331562306a36Sopenharmony_ci 331662306a36Sopenharmony_ci if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 331762306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 331862306a36Sopenharmony_ci || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 331962306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 332062306a36Sopenharmony_ci ) { 332162306a36Sopenharmony_ci float_raise( float_flag_invalid ); 332262306a36Sopenharmony_ci return 0; 332362306a36Sopenharmony_ci } 332462306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 332562306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 332662306a36Sopenharmony_ci if ( aSign != bSign ) { 332762306a36Sopenharmony_ci return 332862306a36Sopenharmony_ci aSign 332962306a36Sopenharmony_ci && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 333062306a36Sopenharmony_ci != 0 ); 333162306a36Sopenharmony_ci } 333262306a36Sopenharmony_ci return 333362306a36Sopenharmony_ci aSign ? lt128( b.high, b.low, a.high, a.low ) 333462306a36Sopenharmony_ci : lt128( a.high, a.low, b.high, b.low ); 333562306a36Sopenharmony_ci 333662306a36Sopenharmony_ci} 333762306a36Sopenharmony_ci 333862306a36Sopenharmony_ci/* 333962306a36Sopenharmony_ci------------------------------------------------------------------------------- 334062306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is equal 334162306a36Sopenharmony_cito the corresponding value `b', and 0 otherwise. The invalid exception is 334262306a36Sopenharmony_ciraised if either operand is a NaN. Otherwise, the comparison is performed 334362306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic. 334462306a36Sopenharmony_ci------------------------------------------------------------------------------- 334562306a36Sopenharmony_ci*/ 334662306a36Sopenharmony_ciflag floatx80_eq_signaling( floatx80 a, floatx80 b ) 334762306a36Sopenharmony_ci{ 334862306a36Sopenharmony_ci 334962306a36Sopenharmony_ci if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 335062306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 335162306a36Sopenharmony_ci || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 335262306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 335362306a36Sopenharmony_ci ) { 335462306a36Sopenharmony_ci float_raise( float_flag_invalid ); 335562306a36Sopenharmony_ci return 0; 335662306a36Sopenharmony_ci } 335762306a36Sopenharmony_ci return 335862306a36Sopenharmony_ci ( a.low == b.low ) 335962306a36Sopenharmony_ci && ( ( a.high == b.high ) 336062306a36Sopenharmony_ci || ( ( a.low == 0 ) 336162306a36Sopenharmony_ci && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) 336262306a36Sopenharmony_ci ); 336362306a36Sopenharmony_ci 336462306a36Sopenharmony_ci} 336562306a36Sopenharmony_ci 336662306a36Sopenharmony_ci/* 336762306a36Sopenharmony_ci------------------------------------------------------------------------------- 336862306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is less 336962306a36Sopenharmony_cithan or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs 337062306a36Sopenharmony_cido not cause an exception. Otherwise, the comparison is performed according 337162306a36Sopenharmony_cito the IEC/IEEE Standard for Binary Floating-point Arithmetic. 337262306a36Sopenharmony_ci------------------------------------------------------------------------------- 337362306a36Sopenharmony_ci*/ 337462306a36Sopenharmony_ciflag floatx80_le_quiet( floatx80 a, floatx80 b ) 337562306a36Sopenharmony_ci{ 337662306a36Sopenharmony_ci flag aSign, bSign; 337762306a36Sopenharmony_ci 337862306a36Sopenharmony_ci if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 337962306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 338062306a36Sopenharmony_ci || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 338162306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 338262306a36Sopenharmony_ci ) { 338362306a36Sopenharmony_ci /* Do nothing, even if NaN as we're quiet */ 338462306a36Sopenharmony_ci return 0; 338562306a36Sopenharmony_ci } 338662306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 338762306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 338862306a36Sopenharmony_ci if ( aSign != bSign ) { 338962306a36Sopenharmony_ci return 339062306a36Sopenharmony_ci aSign 339162306a36Sopenharmony_ci || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 339262306a36Sopenharmony_ci == 0 ); 339362306a36Sopenharmony_ci } 339462306a36Sopenharmony_ci return 339562306a36Sopenharmony_ci aSign ? le128( b.high, b.low, a.high, a.low ) 339662306a36Sopenharmony_ci : le128( a.high, a.low, b.high, b.low ); 339762306a36Sopenharmony_ci 339862306a36Sopenharmony_ci} 339962306a36Sopenharmony_ci 340062306a36Sopenharmony_ci/* 340162306a36Sopenharmony_ci------------------------------------------------------------------------------- 340262306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is less 340362306a36Sopenharmony_cithan the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause 340462306a36Sopenharmony_cian exception. Otherwise, the comparison is performed according to the 340562306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic. 340662306a36Sopenharmony_ci------------------------------------------------------------------------------- 340762306a36Sopenharmony_ci*/ 340862306a36Sopenharmony_ciflag floatx80_lt_quiet( floatx80 a, floatx80 b ) 340962306a36Sopenharmony_ci{ 341062306a36Sopenharmony_ci flag aSign, bSign; 341162306a36Sopenharmony_ci 341262306a36Sopenharmony_ci if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 341362306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 341462306a36Sopenharmony_ci || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 341562306a36Sopenharmony_ci && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 341662306a36Sopenharmony_ci ) { 341762306a36Sopenharmony_ci /* Do nothing, even if NaN as we're quiet */ 341862306a36Sopenharmony_ci return 0; 341962306a36Sopenharmony_ci } 342062306a36Sopenharmony_ci aSign = extractFloatx80Sign( a ); 342162306a36Sopenharmony_ci bSign = extractFloatx80Sign( b ); 342262306a36Sopenharmony_ci if ( aSign != bSign ) { 342362306a36Sopenharmony_ci return 342462306a36Sopenharmony_ci aSign 342562306a36Sopenharmony_ci && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 342662306a36Sopenharmony_ci != 0 ); 342762306a36Sopenharmony_ci } 342862306a36Sopenharmony_ci return 342962306a36Sopenharmony_ci aSign ? lt128( b.high, b.low, a.high, a.low ) 343062306a36Sopenharmony_ci : lt128( a.high, a.low, b.high, b.low ); 343162306a36Sopenharmony_ci 343262306a36Sopenharmony_ci} 343362306a36Sopenharmony_ci 343462306a36Sopenharmony_ci#endif 343562306a36Sopenharmony_ci 3436