162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci===============================================================================
362306a36Sopenharmony_ci
462306a36Sopenharmony_ciThis C source file is part of the SoftFloat IEC/IEEE Floating-point
562306a36Sopenharmony_ciArithmetic Package, Release 2.
662306a36Sopenharmony_ci
762306a36Sopenharmony_ciWritten by John R. Hauser.  This work was made possible in part by the
862306a36Sopenharmony_ciInternational Computer Science Institute, located at Suite 600, 1947 Center
962306a36Sopenharmony_ciStreet, Berkeley, California 94704.  Funding was partially provided by the
1062306a36Sopenharmony_ciNational Science Foundation under grant MIP-9311980.  The original version
1162306a36Sopenharmony_ciof this code was written as part of a project to build a fixed-point vector
1262306a36Sopenharmony_ciprocessor in collaboration with the University of California at Berkeley,
1362306a36Sopenharmony_cioverseen by Profs. Nelson Morgan and John Wawrzynek.  More information
1462306a36Sopenharmony_ciis available through the web page
1562306a36Sopenharmony_cihttp://www.jhauser.us/arithmetic/SoftFloat-2b/SoftFloat-source.txt
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ciTHIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
1862306a36Sopenharmony_cihas been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
1962306a36Sopenharmony_ciTIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
2062306a36Sopenharmony_ciPERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
2162306a36Sopenharmony_ciAND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ciDerivative works are acceptable, even for commercial purposes, so long as
2462306a36Sopenharmony_ci(1) they include prominent notice that the work is derivative, and (2) they
2562306a36Sopenharmony_ciinclude prominent notice akin to these three paragraphs for those parts of
2662306a36Sopenharmony_cithis code that are retained.
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci===============================================================================
2962306a36Sopenharmony_ci*/
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci#include <asm/div64.h>
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci#include "fpa11.h"
3462306a36Sopenharmony_ci//#include "milieu.h"
3562306a36Sopenharmony_ci//#include "softfloat.h"
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/*
3862306a36Sopenharmony_ci-------------------------------------------------------------------------------
3962306a36Sopenharmony_ciPrimitive arithmetic functions, including multi-word arithmetic, and
4062306a36Sopenharmony_cidivision and square root approximations.  (Can be specialized to target if
4162306a36Sopenharmony_cidesired.)
4262306a36Sopenharmony_ci-------------------------------------------------------------------------------
4362306a36Sopenharmony_ci*/
4462306a36Sopenharmony_ci#include "softfloat-macros"
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci/*
4762306a36Sopenharmony_ci-------------------------------------------------------------------------------
4862306a36Sopenharmony_ciFunctions and definitions to determine:  (1) whether tininess for underflow
4962306a36Sopenharmony_ciis detected before or after rounding by default, (2) what (if anything)
5062306a36Sopenharmony_cihappens when exceptions are raised, (3) how signaling NaNs are distinguished
5162306a36Sopenharmony_cifrom quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
5262306a36Sopenharmony_ciare propagated from function inputs to output.  These details are target-
5362306a36Sopenharmony_cispecific.
5462306a36Sopenharmony_ci-------------------------------------------------------------------------------
5562306a36Sopenharmony_ci*/
5662306a36Sopenharmony_ci#include "softfloat-specialize"
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci/*
5962306a36Sopenharmony_ci-------------------------------------------------------------------------------
6062306a36Sopenharmony_ciTakes a 64-bit fixed-point value `absZ' with binary point between bits 6
6162306a36Sopenharmony_ciand 7, and returns the properly rounded 32-bit integer corresponding to the
6262306a36Sopenharmony_ciinput.  If `zSign' is nonzero, the input is negated before being converted
6362306a36Sopenharmony_cito an integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point
6462306a36Sopenharmony_ciinput is simply rounded to an integer, with the inexact exception raised if
6562306a36Sopenharmony_cithe input cannot be represented exactly as an integer.  If the fixed-point
6662306a36Sopenharmony_ciinput is too large, however, the invalid exception is raised and the largest
6762306a36Sopenharmony_cipositive or negative integer is returned.
6862306a36Sopenharmony_ci-------------------------------------------------------------------------------
6962306a36Sopenharmony_ci*/
7062306a36Sopenharmony_cistatic int32 roundAndPackInt32( struct roundingData *roundData, flag zSign, bits64 absZ )
7162306a36Sopenharmony_ci{
7262306a36Sopenharmony_ci    int8 roundingMode;
7362306a36Sopenharmony_ci    flag roundNearestEven;
7462306a36Sopenharmony_ci    int8 roundIncrement, roundBits;
7562306a36Sopenharmony_ci    int32 z;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci    roundingMode = roundData->mode;
7862306a36Sopenharmony_ci    roundNearestEven = ( roundingMode == float_round_nearest_even );
7962306a36Sopenharmony_ci    roundIncrement = 0x40;
8062306a36Sopenharmony_ci    if ( ! roundNearestEven ) {
8162306a36Sopenharmony_ci        if ( roundingMode == float_round_to_zero ) {
8262306a36Sopenharmony_ci            roundIncrement = 0;
8362306a36Sopenharmony_ci        }
8462306a36Sopenharmony_ci        else {
8562306a36Sopenharmony_ci            roundIncrement = 0x7F;
8662306a36Sopenharmony_ci            if ( zSign ) {
8762306a36Sopenharmony_ci                if ( roundingMode == float_round_up ) roundIncrement = 0;
8862306a36Sopenharmony_ci            }
8962306a36Sopenharmony_ci            else {
9062306a36Sopenharmony_ci                if ( roundingMode == float_round_down ) roundIncrement = 0;
9162306a36Sopenharmony_ci            }
9262306a36Sopenharmony_ci        }
9362306a36Sopenharmony_ci    }
9462306a36Sopenharmony_ci    roundBits = absZ & 0x7F;
9562306a36Sopenharmony_ci    absZ = ( absZ + roundIncrement )>>7;
9662306a36Sopenharmony_ci    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
9762306a36Sopenharmony_ci    z = absZ;
9862306a36Sopenharmony_ci    if ( zSign ) z = - z;
9962306a36Sopenharmony_ci    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
10062306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
10162306a36Sopenharmony_ci        return zSign ? 0x80000000 : 0x7FFFFFFF;
10262306a36Sopenharmony_ci    }
10362306a36Sopenharmony_ci    if ( roundBits ) roundData->exception |= float_flag_inexact;
10462306a36Sopenharmony_ci    return z;
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci}
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci/*
10962306a36Sopenharmony_ci-------------------------------------------------------------------------------
11062306a36Sopenharmony_ciReturns the fraction bits of the single-precision floating-point value `a'.
11162306a36Sopenharmony_ci-------------------------------------------------------------------------------
11262306a36Sopenharmony_ci*/
11362306a36Sopenharmony_ciINLINE bits32 extractFloat32Frac( float32 a )
11462306a36Sopenharmony_ci{
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci    return a & 0x007FFFFF;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci/*
12162306a36Sopenharmony_ci-------------------------------------------------------------------------------
12262306a36Sopenharmony_ciReturns the exponent bits of the single-precision floating-point value `a'.
12362306a36Sopenharmony_ci-------------------------------------------------------------------------------
12462306a36Sopenharmony_ci*/
12562306a36Sopenharmony_ciINLINE int16 extractFloat32Exp( float32 a )
12662306a36Sopenharmony_ci{
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci    return ( a>>23 ) & 0xFF;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci}
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci/*
13362306a36Sopenharmony_ci-------------------------------------------------------------------------------
13462306a36Sopenharmony_ciReturns the sign bit of the single-precision floating-point value `a'.
13562306a36Sopenharmony_ci-------------------------------------------------------------------------------
13662306a36Sopenharmony_ci*/
13762306a36Sopenharmony_ci#if 0	/* in softfloat.h */
13862306a36Sopenharmony_ciINLINE flag extractFloat32Sign( float32 a )
13962306a36Sopenharmony_ci{
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci    return a>>31;
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ci#endif
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci/*
14762306a36Sopenharmony_ci-------------------------------------------------------------------------------
14862306a36Sopenharmony_ciNormalizes the subnormal single-precision floating-point value represented
14962306a36Sopenharmony_ciby the denormalized significand `aSig'.  The normalized exponent and
15062306a36Sopenharmony_cisignificand are stored at the locations pointed to by `zExpPtr' and
15162306a36Sopenharmony_ci`zSigPtr', respectively.
15262306a36Sopenharmony_ci-------------------------------------------------------------------------------
15362306a36Sopenharmony_ci*/
15462306a36Sopenharmony_cistatic void
15562306a36Sopenharmony_ci normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
15662306a36Sopenharmony_ci{
15762306a36Sopenharmony_ci    int8 shiftCount;
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci    shiftCount = countLeadingZeros32( aSig ) - 8;
16062306a36Sopenharmony_ci    *zSigPtr = aSig<<shiftCount;
16162306a36Sopenharmony_ci    *zExpPtr = 1 - shiftCount;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci}
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci/*
16662306a36Sopenharmony_ci-------------------------------------------------------------------------------
16762306a36Sopenharmony_ciPacks the sign `zSign', exponent `zExp', and significand `zSig' into a
16862306a36Sopenharmony_cisingle-precision floating-point value, returning the result.  After being
16962306a36Sopenharmony_cishifted into the proper positions, the three fields are simply added
17062306a36Sopenharmony_citogether to form the result.  This means that any integer portion of `zSig'
17162306a36Sopenharmony_ciwill be added into the exponent.  Since a properly normalized significand
17262306a36Sopenharmony_ciwill have an integer portion equal to 1, the `zExp' input should be 1 less
17362306a36Sopenharmony_cithan the desired result exponent whenever `zSig' is a complete, normalized
17462306a36Sopenharmony_cisignificand.
17562306a36Sopenharmony_ci-------------------------------------------------------------------------------
17662306a36Sopenharmony_ci*/
17762306a36Sopenharmony_ciINLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
17862306a36Sopenharmony_ci{
17962306a36Sopenharmony_ci#if 0
18062306a36Sopenharmony_ci   float32 f;
18162306a36Sopenharmony_ci   __asm__("@ packFloat32				\n\
18262306a36Sopenharmony_ci   	    mov %0, %1, asl #31				\n\
18362306a36Sopenharmony_ci   	    orr %0, %2, asl #23				\n\
18462306a36Sopenharmony_ci   	    orr %0, %3"
18562306a36Sopenharmony_ci   	    : /* no outputs */
18662306a36Sopenharmony_ci   	    : "g" (f), "g" (zSign), "g" (zExp), "g" (zSig)
18762306a36Sopenharmony_ci   	    : "cc");
18862306a36Sopenharmony_ci   return f;
18962306a36Sopenharmony_ci#else
19062306a36Sopenharmony_ci    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
19162306a36Sopenharmony_ci#endif
19262306a36Sopenharmony_ci}
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci/*
19562306a36Sopenharmony_ci-------------------------------------------------------------------------------
19662306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp',
19762306a36Sopenharmony_ciand significand `zSig', and returns the proper single-precision floating-
19862306a36Sopenharmony_cipoint value corresponding to the abstract input.  Ordinarily, the abstract
19962306a36Sopenharmony_civalue is simply rounded and packed into the single-precision format, with
20062306a36Sopenharmony_cithe inexact exception raised if the abstract input cannot be represented
20162306a36Sopenharmony_ciexactly.  If the abstract value is too large, however, the overflow and
20262306a36Sopenharmony_ciinexact exceptions are raised and an infinity or maximal finite value is
20362306a36Sopenharmony_cireturned.  If the abstract value is too small, the input value is rounded to
20462306a36Sopenharmony_cia subnormal number, and the underflow and inexact exceptions are raised if
20562306a36Sopenharmony_cithe abstract input cannot be represented exactly as a subnormal single-
20662306a36Sopenharmony_ciprecision floating-point number.
20762306a36Sopenharmony_ci    The input significand `zSig' has its binary point between bits 30
20862306a36Sopenharmony_ciand 29, which is 7 bits to the left of the usual location.  This shifted
20962306a36Sopenharmony_cisignificand must be normalized or smaller.  If `zSig' is not normalized,
21062306a36Sopenharmony_ci`zExp' must be 0; in that case, the result returned is a subnormal number,
21162306a36Sopenharmony_ciand it must not require rounding.  In the usual case that `zSig' is
21262306a36Sopenharmony_cinormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
21362306a36Sopenharmony_ciThe handling of underflow and overflow follows the IEC/IEEE Standard for
21462306a36Sopenharmony_ciBinary Floating-point Arithmetic.
21562306a36Sopenharmony_ci-------------------------------------------------------------------------------
21662306a36Sopenharmony_ci*/
21762306a36Sopenharmony_cistatic float32 roundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
21862306a36Sopenharmony_ci{
21962306a36Sopenharmony_ci    int8 roundingMode;
22062306a36Sopenharmony_ci    flag roundNearestEven;
22162306a36Sopenharmony_ci    int8 roundIncrement, roundBits;
22262306a36Sopenharmony_ci    flag isTiny;
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci    roundingMode = roundData->mode;
22562306a36Sopenharmony_ci    roundNearestEven = ( roundingMode == float_round_nearest_even );
22662306a36Sopenharmony_ci    roundIncrement = 0x40;
22762306a36Sopenharmony_ci    if ( ! roundNearestEven ) {
22862306a36Sopenharmony_ci        if ( roundingMode == float_round_to_zero ) {
22962306a36Sopenharmony_ci            roundIncrement = 0;
23062306a36Sopenharmony_ci        }
23162306a36Sopenharmony_ci        else {
23262306a36Sopenharmony_ci            roundIncrement = 0x7F;
23362306a36Sopenharmony_ci            if ( zSign ) {
23462306a36Sopenharmony_ci                if ( roundingMode == float_round_up ) roundIncrement = 0;
23562306a36Sopenharmony_ci            }
23662306a36Sopenharmony_ci            else {
23762306a36Sopenharmony_ci                if ( roundingMode == float_round_down ) roundIncrement = 0;
23862306a36Sopenharmony_ci            }
23962306a36Sopenharmony_ci        }
24062306a36Sopenharmony_ci    }
24162306a36Sopenharmony_ci    roundBits = zSig & 0x7F;
24262306a36Sopenharmony_ci    if ( 0xFD <= (bits16) zExp ) {
24362306a36Sopenharmony_ci        if (    ( 0xFD < zExp )
24462306a36Sopenharmony_ci             || (    ( zExp == 0xFD )
24562306a36Sopenharmony_ci                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
24662306a36Sopenharmony_ci           ) {
24762306a36Sopenharmony_ci            roundData->exception |= float_flag_overflow | float_flag_inexact;
24862306a36Sopenharmony_ci            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
24962306a36Sopenharmony_ci        }
25062306a36Sopenharmony_ci        if ( zExp < 0 ) {
25162306a36Sopenharmony_ci            isTiny =
25262306a36Sopenharmony_ci                   ( float_detect_tininess == float_tininess_before_rounding )
25362306a36Sopenharmony_ci                || ( zExp < -1 )
25462306a36Sopenharmony_ci                || ( zSig + roundIncrement < 0x80000000 );
25562306a36Sopenharmony_ci            shift32RightJamming( zSig, - zExp, &zSig );
25662306a36Sopenharmony_ci            zExp = 0;
25762306a36Sopenharmony_ci            roundBits = zSig & 0x7F;
25862306a36Sopenharmony_ci            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
25962306a36Sopenharmony_ci        }
26062306a36Sopenharmony_ci    }
26162306a36Sopenharmony_ci    if ( roundBits ) roundData->exception |= float_flag_inexact;
26262306a36Sopenharmony_ci    zSig = ( zSig + roundIncrement )>>7;
26362306a36Sopenharmony_ci    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
26462306a36Sopenharmony_ci    if ( zSig == 0 ) zExp = 0;
26562306a36Sopenharmony_ci    return packFloat32( zSign, zExp, zSig );
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci/*
27062306a36Sopenharmony_ci-------------------------------------------------------------------------------
27162306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp',
27262306a36Sopenharmony_ciand significand `zSig', and returns the proper single-precision floating-
27362306a36Sopenharmony_cipoint value corresponding to the abstract input.  This routine is just like
27462306a36Sopenharmony_ci`roundAndPackFloat32' except that `zSig' does not have to be normalized in
27562306a36Sopenharmony_ciany way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
27662306a36Sopenharmony_cipoint exponent.
27762306a36Sopenharmony_ci-------------------------------------------------------------------------------
27862306a36Sopenharmony_ci*/
27962306a36Sopenharmony_cistatic float32
28062306a36Sopenharmony_ci normalizeRoundAndPackFloat32( struct roundingData *roundData, flag zSign, int16 zExp, bits32 zSig )
28162306a36Sopenharmony_ci{
28262306a36Sopenharmony_ci    int8 shiftCount;
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci    shiftCount = countLeadingZeros32( zSig ) - 1;
28562306a36Sopenharmony_ci    return roundAndPackFloat32( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci}
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci/*
29062306a36Sopenharmony_ci-------------------------------------------------------------------------------
29162306a36Sopenharmony_ciReturns the fraction bits of the double-precision floating-point value `a'.
29262306a36Sopenharmony_ci-------------------------------------------------------------------------------
29362306a36Sopenharmony_ci*/
29462306a36Sopenharmony_ciINLINE bits64 extractFloat64Frac( float64 a )
29562306a36Sopenharmony_ci{
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci    return a & LIT64( 0x000FFFFFFFFFFFFF );
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci/*
30262306a36Sopenharmony_ci-------------------------------------------------------------------------------
30362306a36Sopenharmony_ciReturns the exponent bits of the double-precision floating-point value `a'.
30462306a36Sopenharmony_ci-------------------------------------------------------------------------------
30562306a36Sopenharmony_ci*/
30662306a36Sopenharmony_ciINLINE int16 extractFloat64Exp( float64 a )
30762306a36Sopenharmony_ci{
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci    return ( a>>52 ) & 0x7FF;
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci/*
31462306a36Sopenharmony_ci-------------------------------------------------------------------------------
31562306a36Sopenharmony_ciReturns the sign bit of the double-precision floating-point value `a'.
31662306a36Sopenharmony_ci-------------------------------------------------------------------------------
31762306a36Sopenharmony_ci*/
31862306a36Sopenharmony_ci#if 0	/* in softfloat.h */
31962306a36Sopenharmony_ciINLINE flag extractFloat64Sign( float64 a )
32062306a36Sopenharmony_ci{
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci    return a>>63;
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci}
32562306a36Sopenharmony_ci#endif
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci/*
32862306a36Sopenharmony_ci-------------------------------------------------------------------------------
32962306a36Sopenharmony_ciNormalizes the subnormal double-precision floating-point value represented
33062306a36Sopenharmony_ciby the denormalized significand `aSig'.  The normalized exponent and
33162306a36Sopenharmony_cisignificand are stored at the locations pointed to by `zExpPtr' and
33262306a36Sopenharmony_ci`zSigPtr', respectively.
33362306a36Sopenharmony_ci-------------------------------------------------------------------------------
33462306a36Sopenharmony_ci*/
33562306a36Sopenharmony_cistatic void
33662306a36Sopenharmony_ci normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
33762306a36Sopenharmony_ci{
33862306a36Sopenharmony_ci    int8 shiftCount;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci    shiftCount = countLeadingZeros64( aSig ) - 11;
34162306a36Sopenharmony_ci    *zSigPtr = aSig<<shiftCount;
34262306a36Sopenharmony_ci    *zExpPtr = 1 - shiftCount;
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci}
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci/*
34762306a36Sopenharmony_ci-------------------------------------------------------------------------------
34862306a36Sopenharmony_ciPacks the sign `zSign', exponent `zExp', and significand `zSig' into a
34962306a36Sopenharmony_cidouble-precision floating-point value, returning the result.  After being
35062306a36Sopenharmony_cishifted into the proper positions, the three fields are simply added
35162306a36Sopenharmony_citogether to form the result.  This means that any integer portion of `zSig'
35262306a36Sopenharmony_ciwill be added into the exponent.  Since a properly normalized significand
35362306a36Sopenharmony_ciwill have an integer portion equal to 1, the `zExp' input should be 1 less
35462306a36Sopenharmony_cithan the desired result exponent whenever `zSig' is a complete, normalized
35562306a36Sopenharmony_cisignificand.
35662306a36Sopenharmony_ci-------------------------------------------------------------------------------
35762306a36Sopenharmony_ci*/
35862306a36Sopenharmony_ciINLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
35962306a36Sopenharmony_ci{
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci    return ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<52 ) + zSig;
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci}
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci/*
36662306a36Sopenharmony_ci-------------------------------------------------------------------------------
36762306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp',
36862306a36Sopenharmony_ciand significand `zSig', and returns the proper double-precision floating-
36962306a36Sopenharmony_cipoint value corresponding to the abstract input.  Ordinarily, the abstract
37062306a36Sopenharmony_civalue is simply rounded and packed into the double-precision format, with
37162306a36Sopenharmony_cithe inexact exception raised if the abstract input cannot be represented
37262306a36Sopenharmony_ciexactly.  If the abstract value is too large, however, the overflow and
37362306a36Sopenharmony_ciinexact exceptions are raised and an infinity or maximal finite value is
37462306a36Sopenharmony_cireturned.  If the abstract value is too small, the input value is rounded to
37562306a36Sopenharmony_cia subnormal number, and the underflow and inexact exceptions are raised if
37662306a36Sopenharmony_cithe abstract input cannot be represented exactly as a subnormal double-
37762306a36Sopenharmony_ciprecision floating-point number.
37862306a36Sopenharmony_ci    The input significand `zSig' has its binary point between bits 62
37962306a36Sopenharmony_ciand 61, which is 10 bits to the left of the usual location.  This shifted
38062306a36Sopenharmony_cisignificand must be normalized or smaller.  If `zSig' is not normalized,
38162306a36Sopenharmony_ci`zExp' must be 0; in that case, the result returned is a subnormal number,
38262306a36Sopenharmony_ciand it must not require rounding.  In the usual case that `zSig' is
38362306a36Sopenharmony_cinormalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
38462306a36Sopenharmony_ciThe handling of underflow and overflow follows the IEC/IEEE Standard for
38562306a36Sopenharmony_ciBinary Floating-point Arithmetic.
38662306a36Sopenharmony_ci-------------------------------------------------------------------------------
38762306a36Sopenharmony_ci*/
38862306a36Sopenharmony_cistatic float64 roundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
38962306a36Sopenharmony_ci{
39062306a36Sopenharmony_ci    int8 roundingMode;
39162306a36Sopenharmony_ci    flag roundNearestEven;
39262306a36Sopenharmony_ci    int16 roundIncrement, roundBits;
39362306a36Sopenharmony_ci    flag isTiny;
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci    roundingMode = roundData->mode;
39662306a36Sopenharmony_ci    roundNearestEven = ( roundingMode == float_round_nearest_even );
39762306a36Sopenharmony_ci    roundIncrement = 0x200;
39862306a36Sopenharmony_ci    if ( ! roundNearestEven ) {
39962306a36Sopenharmony_ci        if ( roundingMode == float_round_to_zero ) {
40062306a36Sopenharmony_ci            roundIncrement = 0;
40162306a36Sopenharmony_ci        }
40262306a36Sopenharmony_ci        else {
40362306a36Sopenharmony_ci            roundIncrement = 0x3FF;
40462306a36Sopenharmony_ci            if ( zSign ) {
40562306a36Sopenharmony_ci                if ( roundingMode == float_round_up ) roundIncrement = 0;
40662306a36Sopenharmony_ci            }
40762306a36Sopenharmony_ci            else {
40862306a36Sopenharmony_ci                if ( roundingMode == float_round_down ) roundIncrement = 0;
40962306a36Sopenharmony_ci            }
41062306a36Sopenharmony_ci        }
41162306a36Sopenharmony_ci    }
41262306a36Sopenharmony_ci    roundBits = zSig & 0x3FF;
41362306a36Sopenharmony_ci    if ( 0x7FD <= (bits16) zExp ) {
41462306a36Sopenharmony_ci        if (    ( 0x7FD < zExp )
41562306a36Sopenharmony_ci             || (    ( zExp == 0x7FD )
41662306a36Sopenharmony_ci                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
41762306a36Sopenharmony_ci           ) {
41862306a36Sopenharmony_ci            //register int lr = __builtin_return_address(0);
41962306a36Sopenharmony_ci            //printk("roundAndPackFloat64 called from 0x%08x\n",lr);
42062306a36Sopenharmony_ci            roundData->exception |= float_flag_overflow | float_flag_inexact;
42162306a36Sopenharmony_ci            return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 );
42262306a36Sopenharmony_ci        }
42362306a36Sopenharmony_ci        if ( zExp < 0 ) {
42462306a36Sopenharmony_ci            isTiny =
42562306a36Sopenharmony_ci                   ( float_detect_tininess == float_tininess_before_rounding )
42662306a36Sopenharmony_ci                || ( zExp < -1 )
42762306a36Sopenharmony_ci                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
42862306a36Sopenharmony_ci            shift64RightJamming( zSig, - zExp, &zSig );
42962306a36Sopenharmony_ci            zExp = 0;
43062306a36Sopenharmony_ci            roundBits = zSig & 0x3FF;
43162306a36Sopenharmony_ci            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
43262306a36Sopenharmony_ci        }
43362306a36Sopenharmony_ci    }
43462306a36Sopenharmony_ci    if ( roundBits ) roundData->exception |= float_flag_inexact;
43562306a36Sopenharmony_ci    zSig = ( zSig + roundIncrement )>>10;
43662306a36Sopenharmony_ci    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
43762306a36Sopenharmony_ci    if ( zSig == 0 ) zExp = 0;
43862306a36Sopenharmony_ci    return packFloat64( zSign, zExp, zSig );
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci}
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci/*
44362306a36Sopenharmony_ci-------------------------------------------------------------------------------
44462306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp',
44562306a36Sopenharmony_ciand significand `zSig', and returns the proper double-precision floating-
44662306a36Sopenharmony_cipoint value corresponding to the abstract input.  This routine is just like
44762306a36Sopenharmony_ci`roundAndPackFloat64' except that `zSig' does not have to be normalized in
44862306a36Sopenharmony_ciany way.  In all cases, `zExp' must be 1 less than the ``true'' floating-
44962306a36Sopenharmony_cipoint exponent.
45062306a36Sopenharmony_ci-------------------------------------------------------------------------------
45162306a36Sopenharmony_ci*/
45262306a36Sopenharmony_cistatic float64
45362306a36Sopenharmony_ci normalizeRoundAndPackFloat64( struct roundingData *roundData, flag zSign, int16 zExp, bits64 zSig )
45462306a36Sopenharmony_ci{
45562306a36Sopenharmony_ci    int8 shiftCount;
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci    shiftCount = countLeadingZeros64( zSig ) - 1;
45862306a36Sopenharmony_ci    return roundAndPackFloat64( roundData, zSign, zExp - shiftCount, zSig<<shiftCount );
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci}
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci#ifdef FLOATX80
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci/*
46562306a36Sopenharmony_ci-------------------------------------------------------------------------------
46662306a36Sopenharmony_ciReturns the fraction bits of the extended double-precision floating-point
46762306a36Sopenharmony_civalue `a'.
46862306a36Sopenharmony_ci-------------------------------------------------------------------------------
46962306a36Sopenharmony_ci*/
47062306a36Sopenharmony_ciINLINE bits64 extractFloatx80Frac( floatx80 a )
47162306a36Sopenharmony_ci{
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci    return a.low;
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci}
47662306a36Sopenharmony_ci
47762306a36Sopenharmony_ci/*
47862306a36Sopenharmony_ci-------------------------------------------------------------------------------
47962306a36Sopenharmony_ciReturns the exponent bits of the extended double-precision floating-point
48062306a36Sopenharmony_civalue `a'.
48162306a36Sopenharmony_ci-------------------------------------------------------------------------------
48262306a36Sopenharmony_ci*/
48362306a36Sopenharmony_ciINLINE int32 extractFloatx80Exp( floatx80 a )
48462306a36Sopenharmony_ci{
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci    return a.high & 0x7FFF;
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci}
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci/*
49162306a36Sopenharmony_ci-------------------------------------------------------------------------------
49262306a36Sopenharmony_ciReturns the sign bit of the extended double-precision floating-point value
49362306a36Sopenharmony_ci`a'.
49462306a36Sopenharmony_ci-------------------------------------------------------------------------------
49562306a36Sopenharmony_ci*/
49662306a36Sopenharmony_ciINLINE flag extractFloatx80Sign( floatx80 a )
49762306a36Sopenharmony_ci{
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci    return a.high>>15;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci/*
50462306a36Sopenharmony_ci-------------------------------------------------------------------------------
50562306a36Sopenharmony_ciNormalizes the subnormal extended double-precision floating-point value
50662306a36Sopenharmony_cirepresented by the denormalized significand `aSig'.  The normalized exponent
50762306a36Sopenharmony_ciand significand are stored at the locations pointed to by `zExpPtr' and
50862306a36Sopenharmony_ci`zSigPtr', respectively.
50962306a36Sopenharmony_ci-------------------------------------------------------------------------------
51062306a36Sopenharmony_ci*/
51162306a36Sopenharmony_cistatic void
51262306a36Sopenharmony_ci normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
51362306a36Sopenharmony_ci{
51462306a36Sopenharmony_ci    int8 shiftCount;
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci    shiftCount = countLeadingZeros64( aSig );
51762306a36Sopenharmony_ci    *zSigPtr = aSig<<shiftCount;
51862306a36Sopenharmony_ci    *zExpPtr = 1 - shiftCount;
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci}
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci/*
52362306a36Sopenharmony_ci-------------------------------------------------------------------------------
52462306a36Sopenharmony_ciPacks the sign `zSign', exponent `zExp', and significand `zSig' into an
52562306a36Sopenharmony_ciextended double-precision floating-point value, returning the result.
52662306a36Sopenharmony_ci-------------------------------------------------------------------------------
52762306a36Sopenharmony_ci*/
52862306a36Sopenharmony_ciINLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
52962306a36Sopenharmony_ci{
53062306a36Sopenharmony_ci    floatx80 z;
53162306a36Sopenharmony_ci
53262306a36Sopenharmony_ci    z.low = zSig;
53362306a36Sopenharmony_ci    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
53462306a36Sopenharmony_ci    z.__padding = 0;
53562306a36Sopenharmony_ci    return z;
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci}
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci/*
54062306a36Sopenharmony_ci-------------------------------------------------------------------------------
54162306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent `zExp',
54262306a36Sopenharmony_ciand extended significand formed by the concatenation of `zSig0' and `zSig1',
54362306a36Sopenharmony_ciand returns the proper extended double-precision floating-point value
54462306a36Sopenharmony_cicorresponding to the abstract input.  Ordinarily, the abstract value is
54562306a36Sopenharmony_cirounded and packed into the extended double-precision format, with the
54662306a36Sopenharmony_ciinexact exception raised if the abstract input cannot be represented
54762306a36Sopenharmony_ciexactly.  If the abstract value is too large, however, the overflow and
54862306a36Sopenharmony_ciinexact exceptions are raised and an infinity or maximal finite value is
54962306a36Sopenharmony_cireturned.  If the abstract value is too small, the input value is rounded to
55062306a36Sopenharmony_cia subnormal number, and the underflow and inexact exceptions are raised if
55162306a36Sopenharmony_cithe abstract input cannot be represented exactly as a subnormal extended
55262306a36Sopenharmony_cidouble-precision floating-point number.
55362306a36Sopenharmony_ci    If `roundingPrecision' is 32 or 64, the result is rounded to the same
55462306a36Sopenharmony_cinumber of bits as single or double precision, respectively.  Otherwise, the
55562306a36Sopenharmony_ciresult is rounded to the full precision of the extended double-precision
55662306a36Sopenharmony_ciformat.
55762306a36Sopenharmony_ci    The input significand must be normalized or smaller.  If the input
55862306a36Sopenharmony_cisignificand is not normalized, `zExp' must be 0; in that case, the result
55962306a36Sopenharmony_cireturned is a subnormal number, and it must not require rounding.  The
56062306a36Sopenharmony_cihandling of underflow and overflow follows the IEC/IEEE Standard for Binary
56162306a36Sopenharmony_ciFloating-point Arithmetic.
56262306a36Sopenharmony_ci-------------------------------------------------------------------------------
56362306a36Sopenharmony_ci*/
56462306a36Sopenharmony_cistatic floatx80
56562306a36Sopenharmony_ci roundAndPackFloatx80(
56662306a36Sopenharmony_ci     struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
56762306a36Sopenharmony_ci )
56862306a36Sopenharmony_ci{
56962306a36Sopenharmony_ci    int8 roundingMode, roundingPrecision;
57062306a36Sopenharmony_ci    flag roundNearestEven, increment, isTiny;
57162306a36Sopenharmony_ci    int64 roundIncrement, roundMask, roundBits;
57262306a36Sopenharmony_ci
57362306a36Sopenharmony_ci    roundingMode = roundData->mode;
57462306a36Sopenharmony_ci    roundingPrecision = roundData->precision;
57562306a36Sopenharmony_ci    roundNearestEven = ( roundingMode == float_round_nearest_even );
57662306a36Sopenharmony_ci    if ( roundingPrecision == 80 ) goto precision80;
57762306a36Sopenharmony_ci    if ( roundingPrecision == 64 ) {
57862306a36Sopenharmony_ci        roundIncrement = LIT64( 0x0000000000000400 );
57962306a36Sopenharmony_ci        roundMask = LIT64( 0x00000000000007FF );
58062306a36Sopenharmony_ci    }
58162306a36Sopenharmony_ci    else if ( roundingPrecision == 32 ) {
58262306a36Sopenharmony_ci        roundIncrement = LIT64( 0x0000008000000000 );
58362306a36Sopenharmony_ci        roundMask = LIT64( 0x000000FFFFFFFFFF );
58462306a36Sopenharmony_ci    }
58562306a36Sopenharmony_ci    else {
58662306a36Sopenharmony_ci        goto precision80;
58762306a36Sopenharmony_ci    }
58862306a36Sopenharmony_ci    zSig0 |= ( zSig1 != 0 );
58962306a36Sopenharmony_ci    if ( ! roundNearestEven ) {
59062306a36Sopenharmony_ci        if ( roundingMode == float_round_to_zero ) {
59162306a36Sopenharmony_ci            roundIncrement = 0;
59262306a36Sopenharmony_ci        }
59362306a36Sopenharmony_ci        else {
59462306a36Sopenharmony_ci            roundIncrement = roundMask;
59562306a36Sopenharmony_ci            if ( zSign ) {
59662306a36Sopenharmony_ci                if ( roundingMode == float_round_up ) roundIncrement = 0;
59762306a36Sopenharmony_ci            }
59862306a36Sopenharmony_ci            else {
59962306a36Sopenharmony_ci                if ( roundingMode == float_round_down ) roundIncrement = 0;
60062306a36Sopenharmony_ci            }
60162306a36Sopenharmony_ci        }
60262306a36Sopenharmony_ci    }
60362306a36Sopenharmony_ci    roundBits = zSig0 & roundMask;
60462306a36Sopenharmony_ci    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
60562306a36Sopenharmony_ci        if (    ( 0x7FFE < zExp )
60662306a36Sopenharmony_ci             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
60762306a36Sopenharmony_ci           ) {
60862306a36Sopenharmony_ci            goto overflow;
60962306a36Sopenharmony_ci        }
61062306a36Sopenharmony_ci        if ( zExp <= 0 ) {
61162306a36Sopenharmony_ci            isTiny =
61262306a36Sopenharmony_ci                   ( float_detect_tininess == float_tininess_before_rounding )
61362306a36Sopenharmony_ci                || ( zExp < 0 )
61462306a36Sopenharmony_ci                || ( zSig0 <= zSig0 + roundIncrement );
61562306a36Sopenharmony_ci            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
61662306a36Sopenharmony_ci            zExp = 0;
61762306a36Sopenharmony_ci            roundBits = zSig0 & roundMask;
61862306a36Sopenharmony_ci            if ( isTiny && roundBits ) roundData->exception |= float_flag_underflow;
61962306a36Sopenharmony_ci            if ( roundBits ) roundData->exception |= float_flag_inexact;
62062306a36Sopenharmony_ci            zSig0 += roundIncrement;
62162306a36Sopenharmony_ci            if ( (sbits64) zSig0 < 0 ) zExp = 1;
62262306a36Sopenharmony_ci            roundIncrement = roundMask + 1;
62362306a36Sopenharmony_ci            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
62462306a36Sopenharmony_ci                roundMask |= roundIncrement;
62562306a36Sopenharmony_ci            }
62662306a36Sopenharmony_ci            zSig0 &= ~ roundMask;
62762306a36Sopenharmony_ci            return packFloatx80( zSign, zExp, zSig0 );
62862306a36Sopenharmony_ci        }
62962306a36Sopenharmony_ci    }
63062306a36Sopenharmony_ci    if ( roundBits ) roundData->exception |= float_flag_inexact;
63162306a36Sopenharmony_ci    zSig0 += roundIncrement;
63262306a36Sopenharmony_ci    if ( zSig0 < roundIncrement ) {
63362306a36Sopenharmony_ci        ++zExp;
63462306a36Sopenharmony_ci        zSig0 = LIT64( 0x8000000000000000 );
63562306a36Sopenharmony_ci    }
63662306a36Sopenharmony_ci    roundIncrement = roundMask + 1;
63762306a36Sopenharmony_ci    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
63862306a36Sopenharmony_ci        roundMask |= roundIncrement;
63962306a36Sopenharmony_ci    }
64062306a36Sopenharmony_ci    zSig0 &= ~ roundMask;
64162306a36Sopenharmony_ci    if ( zSig0 == 0 ) zExp = 0;
64262306a36Sopenharmony_ci    return packFloatx80( zSign, zExp, zSig0 );
64362306a36Sopenharmony_ci precision80:
64462306a36Sopenharmony_ci    increment = ( (sbits64) zSig1 < 0 );
64562306a36Sopenharmony_ci    if ( ! roundNearestEven ) {
64662306a36Sopenharmony_ci        if ( roundingMode == float_round_to_zero ) {
64762306a36Sopenharmony_ci            increment = 0;
64862306a36Sopenharmony_ci        }
64962306a36Sopenharmony_ci        else {
65062306a36Sopenharmony_ci            if ( zSign ) {
65162306a36Sopenharmony_ci                increment = ( roundingMode == float_round_down ) && zSig1;
65262306a36Sopenharmony_ci            }
65362306a36Sopenharmony_ci            else {
65462306a36Sopenharmony_ci                increment = ( roundingMode == float_round_up ) && zSig1;
65562306a36Sopenharmony_ci            }
65662306a36Sopenharmony_ci        }
65762306a36Sopenharmony_ci    }
65862306a36Sopenharmony_ci    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
65962306a36Sopenharmony_ci        if (    ( 0x7FFE < zExp )
66062306a36Sopenharmony_ci             || (    ( zExp == 0x7FFE )
66162306a36Sopenharmony_ci                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
66262306a36Sopenharmony_ci                  && increment
66362306a36Sopenharmony_ci                )
66462306a36Sopenharmony_ci           ) {
66562306a36Sopenharmony_ci            roundMask = 0;
66662306a36Sopenharmony_ci overflow:
66762306a36Sopenharmony_ci            roundData->exception |= float_flag_overflow | float_flag_inexact;
66862306a36Sopenharmony_ci            if (    ( roundingMode == float_round_to_zero )
66962306a36Sopenharmony_ci                 || ( zSign && ( roundingMode == float_round_up ) )
67062306a36Sopenharmony_ci                 || ( ! zSign && ( roundingMode == float_round_down ) )
67162306a36Sopenharmony_ci               ) {
67262306a36Sopenharmony_ci                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
67362306a36Sopenharmony_ci            }
67462306a36Sopenharmony_ci            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
67562306a36Sopenharmony_ci        }
67662306a36Sopenharmony_ci        if ( zExp <= 0 ) {
67762306a36Sopenharmony_ci            isTiny =
67862306a36Sopenharmony_ci                   ( float_detect_tininess == float_tininess_before_rounding )
67962306a36Sopenharmony_ci                || ( zExp < 0 )
68062306a36Sopenharmony_ci                || ! increment
68162306a36Sopenharmony_ci                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
68262306a36Sopenharmony_ci            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
68362306a36Sopenharmony_ci            zExp = 0;
68462306a36Sopenharmony_ci            if ( isTiny && zSig1 ) roundData->exception |= float_flag_underflow;
68562306a36Sopenharmony_ci            if ( zSig1 ) roundData->exception |= float_flag_inexact;
68662306a36Sopenharmony_ci            if ( roundNearestEven ) {
68762306a36Sopenharmony_ci                increment = ( (sbits64) zSig1 < 0 );
68862306a36Sopenharmony_ci            }
68962306a36Sopenharmony_ci            else {
69062306a36Sopenharmony_ci                if ( zSign ) {
69162306a36Sopenharmony_ci                    increment = ( roundingMode == float_round_down ) && zSig1;
69262306a36Sopenharmony_ci                }
69362306a36Sopenharmony_ci                else {
69462306a36Sopenharmony_ci                    increment = ( roundingMode == float_round_up ) && zSig1;
69562306a36Sopenharmony_ci                }
69662306a36Sopenharmony_ci            }
69762306a36Sopenharmony_ci            if ( increment ) {
69862306a36Sopenharmony_ci                ++zSig0;
69962306a36Sopenharmony_ci                zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
70062306a36Sopenharmony_ci                if ( (sbits64) zSig0 < 0 ) zExp = 1;
70162306a36Sopenharmony_ci            }
70262306a36Sopenharmony_ci            return packFloatx80( zSign, zExp, zSig0 );
70362306a36Sopenharmony_ci        }
70462306a36Sopenharmony_ci    }
70562306a36Sopenharmony_ci    if ( zSig1 ) roundData->exception |= float_flag_inexact;
70662306a36Sopenharmony_ci    if ( increment ) {
70762306a36Sopenharmony_ci        ++zSig0;
70862306a36Sopenharmony_ci        if ( zSig0 == 0 ) {
70962306a36Sopenharmony_ci            ++zExp;
71062306a36Sopenharmony_ci            zSig0 = LIT64( 0x8000000000000000 );
71162306a36Sopenharmony_ci        }
71262306a36Sopenharmony_ci        else {
71362306a36Sopenharmony_ci            zSig0 &= ~ ( ( zSig1 + zSig1 == 0 ) & roundNearestEven );
71462306a36Sopenharmony_ci        }
71562306a36Sopenharmony_ci    }
71662306a36Sopenharmony_ci    else {
71762306a36Sopenharmony_ci        if ( zSig0 == 0 ) zExp = 0;
71862306a36Sopenharmony_ci    }
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci    return packFloatx80( zSign, zExp, zSig0 );
72162306a36Sopenharmony_ci}
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci/*
72462306a36Sopenharmony_ci-------------------------------------------------------------------------------
72562306a36Sopenharmony_ciTakes an abstract floating-point value having sign `zSign', exponent
72662306a36Sopenharmony_ci`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
72762306a36Sopenharmony_ciand returns the proper extended double-precision floating-point value
72862306a36Sopenharmony_cicorresponding to the abstract input.  This routine is just like
72962306a36Sopenharmony_ci`roundAndPackFloatx80' except that the input significand does not have to be
73062306a36Sopenharmony_cinormalized.
73162306a36Sopenharmony_ci-------------------------------------------------------------------------------
73262306a36Sopenharmony_ci*/
73362306a36Sopenharmony_cistatic floatx80
73462306a36Sopenharmony_ci normalizeRoundAndPackFloatx80(
73562306a36Sopenharmony_ci     struct roundingData *roundData, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
73662306a36Sopenharmony_ci )
73762306a36Sopenharmony_ci{
73862306a36Sopenharmony_ci    int8 shiftCount;
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci    if ( zSig0 == 0 ) {
74162306a36Sopenharmony_ci        zSig0 = zSig1;
74262306a36Sopenharmony_ci        zSig1 = 0;
74362306a36Sopenharmony_ci        zExp -= 64;
74462306a36Sopenharmony_ci    }
74562306a36Sopenharmony_ci    shiftCount = countLeadingZeros64( zSig0 );
74662306a36Sopenharmony_ci    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
74762306a36Sopenharmony_ci    zExp -= shiftCount;
74862306a36Sopenharmony_ci    return
74962306a36Sopenharmony_ci        roundAndPackFloatx80( roundData, zSign, zExp, zSig0, zSig1 );
75062306a36Sopenharmony_ci
75162306a36Sopenharmony_ci}
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci#endif
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci/*
75662306a36Sopenharmony_ci-------------------------------------------------------------------------------
75762306a36Sopenharmony_ciReturns the result of converting the 32-bit two's complement integer `a' to
75862306a36Sopenharmony_cithe single-precision floating-point format.  The conversion is performed
75962306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
76062306a36Sopenharmony_ci-------------------------------------------------------------------------------
76162306a36Sopenharmony_ci*/
76262306a36Sopenharmony_cifloat32 int32_to_float32(struct roundingData *roundData, int32 a)
76362306a36Sopenharmony_ci{
76462306a36Sopenharmony_ci    flag zSign;
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_ci    if ( a == 0 ) return 0;
76762306a36Sopenharmony_ci    if ( a == 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
76862306a36Sopenharmony_ci    zSign = ( a < 0 );
76962306a36Sopenharmony_ci    return normalizeRoundAndPackFloat32( roundData, zSign, 0x9C, zSign ? - a : a );
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci}
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci/*
77462306a36Sopenharmony_ci-------------------------------------------------------------------------------
77562306a36Sopenharmony_ciReturns the result of converting the 32-bit two's complement integer `a' to
77662306a36Sopenharmony_cithe double-precision floating-point format.  The conversion is performed
77762306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
77862306a36Sopenharmony_ci-------------------------------------------------------------------------------
77962306a36Sopenharmony_ci*/
78062306a36Sopenharmony_cifloat64 int32_to_float64( int32 a )
78162306a36Sopenharmony_ci{
78262306a36Sopenharmony_ci    flag aSign;
78362306a36Sopenharmony_ci    uint32 absA;
78462306a36Sopenharmony_ci    int8 shiftCount;
78562306a36Sopenharmony_ci    bits64 zSig;
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_ci    if ( a == 0 ) return 0;
78862306a36Sopenharmony_ci    aSign = ( a < 0 );
78962306a36Sopenharmony_ci    absA = aSign ? - a : a;
79062306a36Sopenharmony_ci    shiftCount = countLeadingZeros32( absA ) + 21;
79162306a36Sopenharmony_ci    zSig = absA;
79262306a36Sopenharmony_ci    return packFloat64( aSign, 0x432 - shiftCount, zSig<<shiftCount );
79362306a36Sopenharmony_ci
79462306a36Sopenharmony_ci}
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci#ifdef FLOATX80
79762306a36Sopenharmony_ci
79862306a36Sopenharmony_ci/*
79962306a36Sopenharmony_ci-------------------------------------------------------------------------------
80062306a36Sopenharmony_ciReturns the result of converting the 32-bit two's complement integer `a'
80162306a36Sopenharmony_cito the extended double-precision floating-point format.  The conversion
80262306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point
80362306a36Sopenharmony_ciArithmetic.
80462306a36Sopenharmony_ci-------------------------------------------------------------------------------
80562306a36Sopenharmony_ci*/
80662306a36Sopenharmony_cifloatx80 int32_to_floatx80( int32 a )
80762306a36Sopenharmony_ci{
80862306a36Sopenharmony_ci    flag zSign;
80962306a36Sopenharmony_ci    uint32 absA;
81062306a36Sopenharmony_ci    int8 shiftCount;
81162306a36Sopenharmony_ci    bits64 zSig;
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
81462306a36Sopenharmony_ci    zSign = ( a < 0 );
81562306a36Sopenharmony_ci    absA = zSign ? - a : a;
81662306a36Sopenharmony_ci    shiftCount = countLeadingZeros32( absA ) + 32;
81762306a36Sopenharmony_ci    zSig = absA;
81862306a36Sopenharmony_ci    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_ci}
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci#endif
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci/*
82562306a36Sopenharmony_ci-------------------------------------------------------------------------------
82662306a36Sopenharmony_ciReturns the result of converting the single-precision floating-point value
82762306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format.  The conversion is
82862306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
82962306a36Sopenharmony_ciArithmetic---which means in particular that the conversion is rounded
83062306a36Sopenharmony_ciaccording to the current rounding mode.  If `a' is a NaN, the largest
83162306a36Sopenharmony_cipositive integer is returned.  Otherwise, if the conversion overflows, the
83262306a36Sopenharmony_cilargest integer with the same sign as `a' is returned.
83362306a36Sopenharmony_ci-------------------------------------------------------------------------------
83462306a36Sopenharmony_ci*/
83562306a36Sopenharmony_ciint32 float32_to_int32( struct roundingData *roundData, float32 a )
83662306a36Sopenharmony_ci{
83762306a36Sopenharmony_ci    flag aSign;
83862306a36Sopenharmony_ci    int16 aExp, shiftCount;
83962306a36Sopenharmony_ci    bits32 aSig;
84062306a36Sopenharmony_ci    bits64 zSig;
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
84362306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
84462306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
84562306a36Sopenharmony_ci    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
84662306a36Sopenharmony_ci    if ( aExp ) aSig |= 0x00800000;
84762306a36Sopenharmony_ci    shiftCount = 0xAF - aExp;
84862306a36Sopenharmony_ci    zSig = aSig;
84962306a36Sopenharmony_ci    zSig <<= 32;
85062306a36Sopenharmony_ci    if ( 0 < shiftCount ) shift64RightJamming( zSig, shiftCount, &zSig );
85162306a36Sopenharmony_ci    return roundAndPackInt32( roundData, aSign, zSig );
85262306a36Sopenharmony_ci
85362306a36Sopenharmony_ci}
85462306a36Sopenharmony_ci
85562306a36Sopenharmony_ci/*
85662306a36Sopenharmony_ci-------------------------------------------------------------------------------
85762306a36Sopenharmony_ciReturns the result of converting the single-precision floating-point value
85862306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format.  The conversion is
85962306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
86062306a36Sopenharmony_ciArithmetic, except that the conversion is always rounded toward zero.  If
86162306a36Sopenharmony_ci`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
86262306a36Sopenharmony_ciconversion overflows, the largest integer with the same sign as `a' is
86362306a36Sopenharmony_cireturned.
86462306a36Sopenharmony_ci-------------------------------------------------------------------------------
86562306a36Sopenharmony_ci*/
86662306a36Sopenharmony_ciint32 float32_to_int32_round_to_zero( float32 a )
86762306a36Sopenharmony_ci{
86862306a36Sopenharmony_ci    flag aSign;
86962306a36Sopenharmony_ci    int16 aExp, shiftCount;
87062306a36Sopenharmony_ci    bits32 aSig;
87162306a36Sopenharmony_ci    int32 z;
87262306a36Sopenharmony_ci
87362306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
87462306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
87562306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
87662306a36Sopenharmony_ci    shiftCount = aExp - 0x9E;
87762306a36Sopenharmony_ci    if ( 0 <= shiftCount ) {
87862306a36Sopenharmony_ci        if ( a == 0xCF000000 ) return 0x80000000;
87962306a36Sopenharmony_ci        float_raise( float_flag_invalid );
88062306a36Sopenharmony_ci        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
88162306a36Sopenharmony_ci        return 0x80000000;
88262306a36Sopenharmony_ci    }
88362306a36Sopenharmony_ci    else if ( aExp <= 0x7E ) {
88462306a36Sopenharmony_ci        if ( aExp | aSig ) float_raise( float_flag_inexact );
88562306a36Sopenharmony_ci        return 0;
88662306a36Sopenharmony_ci    }
88762306a36Sopenharmony_ci    aSig = ( aSig | 0x00800000 )<<8;
88862306a36Sopenharmony_ci    z = aSig>>( - shiftCount );
88962306a36Sopenharmony_ci    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
89062306a36Sopenharmony_ci        float_raise( float_flag_inexact );
89162306a36Sopenharmony_ci    }
89262306a36Sopenharmony_ci    return aSign ? - z : z;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci}
89562306a36Sopenharmony_ci
89662306a36Sopenharmony_ci/*
89762306a36Sopenharmony_ci-------------------------------------------------------------------------------
89862306a36Sopenharmony_ciReturns the result of converting the single-precision floating-point value
89962306a36Sopenharmony_ci`a' to the double-precision floating-point format.  The conversion is
90062306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
90162306a36Sopenharmony_ciArithmetic.
90262306a36Sopenharmony_ci-------------------------------------------------------------------------------
90362306a36Sopenharmony_ci*/
90462306a36Sopenharmony_cifloat64 float32_to_float64( float32 a )
90562306a36Sopenharmony_ci{
90662306a36Sopenharmony_ci    flag aSign;
90762306a36Sopenharmony_ci    int16 aExp;
90862306a36Sopenharmony_ci    bits32 aSig;
90962306a36Sopenharmony_ci
91062306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
91162306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
91262306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
91362306a36Sopenharmony_ci    if ( aExp == 0xFF ) {
91462306a36Sopenharmony_ci        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
91562306a36Sopenharmony_ci        return packFloat64( aSign, 0x7FF, 0 );
91662306a36Sopenharmony_ci    }
91762306a36Sopenharmony_ci    if ( aExp == 0 ) {
91862306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
91962306a36Sopenharmony_ci        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
92062306a36Sopenharmony_ci        --aExp;
92162306a36Sopenharmony_ci    }
92262306a36Sopenharmony_ci    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci}
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci#ifdef FLOATX80
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci/*
92962306a36Sopenharmony_ci-------------------------------------------------------------------------------
93062306a36Sopenharmony_ciReturns the result of converting the single-precision floating-point value
93162306a36Sopenharmony_ci`a' to the extended double-precision floating-point format.  The conversion
93262306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point
93362306a36Sopenharmony_ciArithmetic.
93462306a36Sopenharmony_ci-------------------------------------------------------------------------------
93562306a36Sopenharmony_ci*/
93662306a36Sopenharmony_cifloatx80 float32_to_floatx80( float32 a )
93762306a36Sopenharmony_ci{
93862306a36Sopenharmony_ci    flag aSign;
93962306a36Sopenharmony_ci    int16 aExp;
94062306a36Sopenharmony_ci    bits32 aSig;
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
94362306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
94462306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
94562306a36Sopenharmony_ci    if ( aExp == 0xFF ) {
94662306a36Sopenharmony_ci        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
94762306a36Sopenharmony_ci        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
94862306a36Sopenharmony_ci    }
94962306a36Sopenharmony_ci    if ( aExp == 0 ) {
95062306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
95162306a36Sopenharmony_ci        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
95262306a36Sopenharmony_ci    }
95362306a36Sopenharmony_ci    aSig |= 0x00800000;
95462306a36Sopenharmony_ci    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci}
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci#endif
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci/*
96162306a36Sopenharmony_ci-------------------------------------------------------------------------------
96262306a36Sopenharmony_ciRounds the single-precision floating-point value `a' to an integer, and
96362306a36Sopenharmony_cireturns the result as a single-precision floating-point value.  The
96462306a36Sopenharmony_cioperation is performed according to the IEC/IEEE Standard for Binary
96562306a36Sopenharmony_ciFloating-point Arithmetic.
96662306a36Sopenharmony_ci-------------------------------------------------------------------------------
96762306a36Sopenharmony_ci*/
96862306a36Sopenharmony_cifloat32 float32_round_to_int( struct roundingData *roundData, float32 a )
96962306a36Sopenharmony_ci{
97062306a36Sopenharmony_ci    flag aSign;
97162306a36Sopenharmony_ci    int16 aExp;
97262306a36Sopenharmony_ci    bits32 lastBitMask, roundBitsMask;
97362306a36Sopenharmony_ci    int8 roundingMode;
97462306a36Sopenharmony_ci    float32 z;
97562306a36Sopenharmony_ci
97662306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
97762306a36Sopenharmony_ci    if ( 0x96 <= aExp ) {
97862306a36Sopenharmony_ci        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
97962306a36Sopenharmony_ci            return propagateFloat32NaN( a, a );
98062306a36Sopenharmony_ci        }
98162306a36Sopenharmony_ci        return a;
98262306a36Sopenharmony_ci    }
98362306a36Sopenharmony_ci    roundingMode = roundData->mode;
98462306a36Sopenharmony_ci    if ( aExp <= 0x7E ) {
98562306a36Sopenharmony_ci        if ( (bits32) ( a<<1 ) == 0 ) return a;
98662306a36Sopenharmony_ci        roundData->exception |= float_flag_inexact;
98762306a36Sopenharmony_ci        aSign = extractFloat32Sign( a );
98862306a36Sopenharmony_ci        switch ( roundingMode ) {
98962306a36Sopenharmony_ci         case float_round_nearest_even:
99062306a36Sopenharmony_ci            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
99162306a36Sopenharmony_ci                return packFloat32( aSign, 0x7F, 0 );
99262306a36Sopenharmony_ci            }
99362306a36Sopenharmony_ci            break;
99462306a36Sopenharmony_ci         case float_round_down:
99562306a36Sopenharmony_ci            return aSign ? 0xBF800000 : 0;
99662306a36Sopenharmony_ci         case float_round_up:
99762306a36Sopenharmony_ci            return aSign ? 0x80000000 : 0x3F800000;
99862306a36Sopenharmony_ci        }
99962306a36Sopenharmony_ci        return packFloat32( aSign, 0, 0 );
100062306a36Sopenharmony_ci    }
100162306a36Sopenharmony_ci    lastBitMask = 1;
100262306a36Sopenharmony_ci    lastBitMask <<= 0x96 - aExp;
100362306a36Sopenharmony_ci    roundBitsMask = lastBitMask - 1;
100462306a36Sopenharmony_ci    z = a;
100562306a36Sopenharmony_ci    if ( roundingMode == float_round_nearest_even ) {
100662306a36Sopenharmony_ci        z += lastBitMask>>1;
100762306a36Sopenharmony_ci        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
100862306a36Sopenharmony_ci    }
100962306a36Sopenharmony_ci    else if ( roundingMode != float_round_to_zero ) {
101062306a36Sopenharmony_ci        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
101162306a36Sopenharmony_ci            z += roundBitsMask;
101262306a36Sopenharmony_ci        }
101362306a36Sopenharmony_ci    }
101462306a36Sopenharmony_ci    z &= ~ roundBitsMask;
101562306a36Sopenharmony_ci    if ( z != a ) roundData->exception |= float_flag_inexact;
101662306a36Sopenharmony_ci    return z;
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci}
101962306a36Sopenharmony_ci
102062306a36Sopenharmony_ci/*
102162306a36Sopenharmony_ci-------------------------------------------------------------------------------
102262306a36Sopenharmony_ciReturns the result of adding the absolute values of the single-precision
102362306a36Sopenharmony_cifloating-point values `a' and `b'.  If `zSign' is true, the sum is negated
102462306a36Sopenharmony_cibefore being returned.  `zSign' is ignored if the result is a NaN.  The
102562306a36Sopenharmony_ciaddition is performed according to the IEC/IEEE Standard for Binary
102662306a36Sopenharmony_ciFloating-point Arithmetic.
102762306a36Sopenharmony_ci-------------------------------------------------------------------------------
102862306a36Sopenharmony_ci*/
102962306a36Sopenharmony_cistatic float32 addFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
103062306a36Sopenharmony_ci{
103162306a36Sopenharmony_ci    int16 aExp, bExp, zExp;
103262306a36Sopenharmony_ci    bits32 aSig, bSig, zSig;
103362306a36Sopenharmony_ci    int16 expDiff;
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
103662306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
103762306a36Sopenharmony_ci    bSig = extractFloat32Frac( b );
103862306a36Sopenharmony_ci    bExp = extractFloat32Exp( b );
103962306a36Sopenharmony_ci    expDiff = aExp - bExp;
104062306a36Sopenharmony_ci    aSig <<= 6;
104162306a36Sopenharmony_ci    bSig <<= 6;
104262306a36Sopenharmony_ci    if ( 0 < expDiff ) {
104362306a36Sopenharmony_ci        if ( aExp == 0xFF ) {
104462306a36Sopenharmony_ci            if ( aSig ) return propagateFloat32NaN( a, b );
104562306a36Sopenharmony_ci            return a;
104662306a36Sopenharmony_ci        }
104762306a36Sopenharmony_ci        if ( bExp == 0 ) {
104862306a36Sopenharmony_ci            --expDiff;
104962306a36Sopenharmony_ci        }
105062306a36Sopenharmony_ci        else {
105162306a36Sopenharmony_ci            bSig |= 0x20000000;
105262306a36Sopenharmony_ci        }
105362306a36Sopenharmony_ci        shift32RightJamming( bSig, expDiff, &bSig );
105462306a36Sopenharmony_ci        zExp = aExp;
105562306a36Sopenharmony_ci    }
105662306a36Sopenharmony_ci    else if ( expDiff < 0 ) {
105762306a36Sopenharmony_ci        if ( bExp == 0xFF ) {
105862306a36Sopenharmony_ci            if ( bSig ) return propagateFloat32NaN( a, b );
105962306a36Sopenharmony_ci            return packFloat32( zSign, 0xFF, 0 );
106062306a36Sopenharmony_ci        }
106162306a36Sopenharmony_ci        if ( aExp == 0 ) {
106262306a36Sopenharmony_ci            ++expDiff;
106362306a36Sopenharmony_ci        }
106462306a36Sopenharmony_ci        else {
106562306a36Sopenharmony_ci            aSig |= 0x20000000;
106662306a36Sopenharmony_ci        }
106762306a36Sopenharmony_ci        shift32RightJamming( aSig, - expDiff, &aSig );
106862306a36Sopenharmony_ci        zExp = bExp;
106962306a36Sopenharmony_ci    }
107062306a36Sopenharmony_ci    else {
107162306a36Sopenharmony_ci        if ( aExp == 0xFF ) {
107262306a36Sopenharmony_ci            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
107362306a36Sopenharmony_ci            return a;
107462306a36Sopenharmony_ci        }
107562306a36Sopenharmony_ci        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
107662306a36Sopenharmony_ci        zSig = 0x40000000 + aSig + bSig;
107762306a36Sopenharmony_ci        zExp = aExp;
107862306a36Sopenharmony_ci        goto roundAndPack;
107962306a36Sopenharmony_ci    }
108062306a36Sopenharmony_ci    aSig |= 0x20000000;
108162306a36Sopenharmony_ci    zSig = ( aSig + bSig )<<1;
108262306a36Sopenharmony_ci    --zExp;
108362306a36Sopenharmony_ci    if ( (sbits32) zSig < 0 ) {
108462306a36Sopenharmony_ci        zSig = aSig + bSig;
108562306a36Sopenharmony_ci        ++zExp;
108662306a36Sopenharmony_ci    }
108762306a36Sopenharmony_ci roundAndPack:
108862306a36Sopenharmony_ci    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
108962306a36Sopenharmony_ci
109062306a36Sopenharmony_ci}
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci/*
109362306a36Sopenharmony_ci-------------------------------------------------------------------------------
109462306a36Sopenharmony_ciReturns the result of subtracting the absolute values of the single-
109562306a36Sopenharmony_ciprecision floating-point values `a' and `b'.  If `zSign' is true, the
109662306a36Sopenharmony_cidifference is negated before being returned.  `zSign' is ignored if the
109762306a36Sopenharmony_ciresult is a NaN.  The subtraction is performed according to the IEC/IEEE
109862306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic.
109962306a36Sopenharmony_ci-------------------------------------------------------------------------------
110062306a36Sopenharmony_ci*/
110162306a36Sopenharmony_cistatic float32 subFloat32Sigs( struct roundingData *roundData, float32 a, float32 b, flag zSign )
110262306a36Sopenharmony_ci{
110362306a36Sopenharmony_ci    int16 aExp, bExp, zExp;
110462306a36Sopenharmony_ci    bits32 aSig, bSig, zSig;
110562306a36Sopenharmony_ci    int16 expDiff;
110662306a36Sopenharmony_ci
110762306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
110862306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
110962306a36Sopenharmony_ci    bSig = extractFloat32Frac( b );
111062306a36Sopenharmony_ci    bExp = extractFloat32Exp( b );
111162306a36Sopenharmony_ci    expDiff = aExp - bExp;
111262306a36Sopenharmony_ci    aSig <<= 7;
111362306a36Sopenharmony_ci    bSig <<= 7;
111462306a36Sopenharmony_ci    if ( 0 < expDiff ) goto aExpBigger;
111562306a36Sopenharmony_ci    if ( expDiff < 0 ) goto bExpBigger;
111662306a36Sopenharmony_ci    if ( aExp == 0xFF ) {
111762306a36Sopenharmony_ci        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
111862306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
111962306a36Sopenharmony_ci        return float32_default_nan;
112062306a36Sopenharmony_ci    }
112162306a36Sopenharmony_ci    if ( aExp == 0 ) {
112262306a36Sopenharmony_ci        aExp = 1;
112362306a36Sopenharmony_ci        bExp = 1;
112462306a36Sopenharmony_ci    }
112562306a36Sopenharmony_ci    if ( bSig < aSig ) goto aBigger;
112662306a36Sopenharmony_ci    if ( aSig < bSig ) goto bBigger;
112762306a36Sopenharmony_ci    return packFloat32( roundData->mode == float_round_down, 0, 0 );
112862306a36Sopenharmony_ci bExpBigger:
112962306a36Sopenharmony_ci    if ( bExp == 0xFF ) {
113062306a36Sopenharmony_ci        if ( bSig ) return propagateFloat32NaN( a, b );
113162306a36Sopenharmony_ci        return packFloat32( zSign ^ 1, 0xFF, 0 );
113262306a36Sopenharmony_ci    }
113362306a36Sopenharmony_ci    if ( aExp == 0 ) {
113462306a36Sopenharmony_ci        ++expDiff;
113562306a36Sopenharmony_ci    }
113662306a36Sopenharmony_ci    else {
113762306a36Sopenharmony_ci        aSig |= 0x40000000;
113862306a36Sopenharmony_ci    }
113962306a36Sopenharmony_ci    shift32RightJamming( aSig, - expDiff, &aSig );
114062306a36Sopenharmony_ci    bSig |= 0x40000000;
114162306a36Sopenharmony_ci bBigger:
114262306a36Sopenharmony_ci    zSig = bSig - aSig;
114362306a36Sopenharmony_ci    zExp = bExp;
114462306a36Sopenharmony_ci    zSign ^= 1;
114562306a36Sopenharmony_ci    goto normalizeRoundAndPack;
114662306a36Sopenharmony_ci aExpBigger:
114762306a36Sopenharmony_ci    if ( aExp == 0xFF ) {
114862306a36Sopenharmony_ci        if ( aSig ) return propagateFloat32NaN( a, b );
114962306a36Sopenharmony_ci        return a;
115062306a36Sopenharmony_ci    }
115162306a36Sopenharmony_ci    if ( bExp == 0 ) {
115262306a36Sopenharmony_ci        --expDiff;
115362306a36Sopenharmony_ci    }
115462306a36Sopenharmony_ci    else {
115562306a36Sopenharmony_ci        bSig |= 0x40000000;
115662306a36Sopenharmony_ci    }
115762306a36Sopenharmony_ci    shift32RightJamming( bSig, expDiff, &bSig );
115862306a36Sopenharmony_ci    aSig |= 0x40000000;
115962306a36Sopenharmony_ci aBigger:
116062306a36Sopenharmony_ci    zSig = aSig - bSig;
116162306a36Sopenharmony_ci    zExp = aExp;
116262306a36Sopenharmony_ci normalizeRoundAndPack:
116362306a36Sopenharmony_ci    --zExp;
116462306a36Sopenharmony_ci    return normalizeRoundAndPackFloat32( roundData, zSign, zExp, zSig );
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci}
116762306a36Sopenharmony_ci
116862306a36Sopenharmony_ci/*
116962306a36Sopenharmony_ci-------------------------------------------------------------------------------
117062306a36Sopenharmony_ciReturns the result of adding the single-precision floating-point values `a'
117162306a36Sopenharmony_ciand `b'.  The operation is performed according to the IEC/IEEE Standard for
117262306a36Sopenharmony_ciBinary Floating-point Arithmetic.
117362306a36Sopenharmony_ci-------------------------------------------------------------------------------
117462306a36Sopenharmony_ci*/
117562306a36Sopenharmony_cifloat32 float32_add( struct roundingData *roundData, float32 a, float32 b )
117662306a36Sopenharmony_ci{
117762306a36Sopenharmony_ci    flag aSign, bSign;
117862306a36Sopenharmony_ci
117962306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
118062306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
118162306a36Sopenharmony_ci    if ( aSign == bSign ) {
118262306a36Sopenharmony_ci        return addFloat32Sigs( roundData, a, b, aSign );
118362306a36Sopenharmony_ci    }
118462306a36Sopenharmony_ci    else {
118562306a36Sopenharmony_ci        return subFloat32Sigs( roundData, a, b, aSign );
118662306a36Sopenharmony_ci    }
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_ci}
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci/*
119162306a36Sopenharmony_ci-------------------------------------------------------------------------------
119262306a36Sopenharmony_ciReturns the result of subtracting the single-precision floating-point values
119362306a36Sopenharmony_ci`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
119462306a36Sopenharmony_cifor Binary Floating-point Arithmetic.
119562306a36Sopenharmony_ci-------------------------------------------------------------------------------
119662306a36Sopenharmony_ci*/
119762306a36Sopenharmony_cifloat32 float32_sub( struct roundingData *roundData, float32 a, float32 b )
119862306a36Sopenharmony_ci{
119962306a36Sopenharmony_ci    flag aSign, bSign;
120062306a36Sopenharmony_ci
120162306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
120262306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
120362306a36Sopenharmony_ci    if ( aSign == bSign ) {
120462306a36Sopenharmony_ci        return subFloat32Sigs( roundData, a, b, aSign );
120562306a36Sopenharmony_ci    }
120662306a36Sopenharmony_ci    else {
120762306a36Sopenharmony_ci        return addFloat32Sigs( roundData, a, b, aSign );
120862306a36Sopenharmony_ci    }
120962306a36Sopenharmony_ci
121062306a36Sopenharmony_ci}
121162306a36Sopenharmony_ci
121262306a36Sopenharmony_ci/*
121362306a36Sopenharmony_ci-------------------------------------------------------------------------------
121462306a36Sopenharmony_ciReturns the result of multiplying the single-precision floating-point values
121562306a36Sopenharmony_ci`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
121662306a36Sopenharmony_cifor Binary Floating-point Arithmetic.
121762306a36Sopenharmony_ci-------------------------------------------------------------------------------
121862306a36Sopenharmony_ci*/
121962306a36Sopenharmony_cifloat32 float32_mul( struct roundingData *roundData, float32 a, float32 b )
122062306a36Sopenharmony_ci{
122162306a36Sopenharmony_ci    flag aSign, bSign, zSign;
122262306a36Sopenharmony_ci    int16 aExp, bExp, zExp;
122362306a36Sopenharmony_ci    bits32 aSig, bSig;
122462306a36Sopenharmony_ci    bits64 zSig64;
122562306a36Sopenharmony_ci    bits32 zSig;
122662306a36Sopenharmony_ci
122762306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
122862306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
122962306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
123062306a36Sopenharmony_ci    bSig = extractFloat32Frac( b );
123162306a36Sopenharmony_ci    bExp = extractFloat32Exp( b );
123262306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
123362306a36Sopenharmony_ci    zSign = aSign ^ bSign;
123462306a36Sopenharmony_ci    if ( aExp == 0xFF ) {
123562306a36Sopenharmony_ci        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
123662306a36Sopenharmony_ci            return propagateFloat32NaN( a, b );
123762306a36Sopenharmony_ci        }
123862306a36Sopenharmony_ci        if ( ( bExp | bSig ) == 0 ) {
123962306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
124062306a36Sopenharmony_ci            return float32_default_nan;
124162306a36Sopenharmony_ci        }
124262306a36Sopenharmony_ci        return packFloat32( zSign, 0xFF, 0 );
124362306a36Sopenharmony_ci    }
124462306a36Sopenharmony_ci    if ( bExp == 0xFF ) {
124562306a36Sopenharmony_ci        if ( bSig ) return propagateFloat32NaN( a, b );
124662306a36Sopenharmony_ci        if ( ( aExp | aSig ) == 0 ) {
124762306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
124862306a36Sopenharmony_ci            return float32_default_nan;
124962306a36Sopenharmony_ci        }
125062306a36Sopenharmony_ci        return packFloat32( zSign, 0xFF, 0 );
125162306a36Sopenharmony_ci    }
125262306a36Sopenharmony_ci    if ( aExp == 0 ) {
125362306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
125462306a36Sopenharmony_ci        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
125562306a36Sopenharmony_ci    }
125662306a36Sopenharmony_ci    if ( bExp == 0 ) {
125762306a36Sopenharmony_ci        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
125862306a36Sopenharmony_ci        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
125962306a36Sopenharmony_ci    }
126062306a36Sopenharmony_ci    zExp = aExp + bExp - 0x7F;
126162306a36Sopenharmony_ci    aSig = ( aSig | 0x00800000 )<<7;
126262306a36Sopenharmony_ci    bSig = ( bSig | 0x00800000 )<<8;
126362306a36Sopenharmony_ci    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
126462306a36Sopenharmony_ci    zSig = zSig64;
126562306a36Sopenharmony_ci    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
126662306a36Sopenharmony_ci        zSig <<= 1;
126762306a36Sopenharmony_ci        --zExp;
126862306a36Sopenharmony_ci    }
126962306a36Sopenharmony_ci    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
127062306a36Sopenharmony_ci
127162306a36Sopenharmony_ci}
127262306a36Sopenharmony_ci
127362306a36Sopenharmony_ci/*
127462306a36Sopenharmony_ci-------------------------------------------------------------------------------
127562306a36Sopenharmony_ciReturns the result of dividing the single-precision floating-point value `a'
127662306a36Sopenharmony_ciby the corresponding value `b'.  The operation is performed according to the
127762306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic.
127862306a36Sopenharmony_ci-------------------------------------------------------------------------------
127962306a36Sopenharmony_ci*/
128062306a36Sopenharmony_cifloat32 float32_div( struct roundingData *roundData, float32 a, float32 b )
128162306a36Sopenharmony_ci{
128262306a36Sopenharmony_ci    flag aSign, bSign, zSign;
128362306a36Sopenharmony_ci    int16 aExp, bExp, zExp;
128462306a36Sopenharmony_ci    bits32 aSig, bSig, zSig;
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
128762306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
128862306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
128962306a36Sopenharmony_ci    bSig = extractFloat32Frac( b );
129062306a36Sopenharmony_ci    bExp = extractFloat32Exp( b );
129162306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
129262306a36Sopenharmony_ci    zSign = aSign ^ bSign;
129362306a36Sopenharmony_ci    if ( aExp == 0xFF ) {
129462306a36Sopenharmony_ci        if ( aSig ) return propagateFloat32NaN( a, b );
129562306a36Sopenharmony_ci        if ( bExp == 0xFF ) {
129662306a36Sopenharmony_ci            if ( bSig ) return propagateFloat32NaN( a, b );
129762306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
129862306a36Sopenharmony_ci            return float32_default_nan;
129962306a36Sopenharmony_ci        }
130062306a36Sopenharmony_ci        return packFloat32( zSign, 0xFF, 0 );
130162306a36Sopenharmony_ci    }
130262306a36Sopenharmony_ci    if ( bExp == 0xFF ) {
130362306a36Sopenharmony_ci        if ( bSig ) return propagateFloat32NaN( a, b );
130462306a36Sopenharmony_ci        return packFloat32( zSign, 0, 0 );
130562306a36Sopenharmony_ci    }
130662306a36Sopenharmony_ci    if ( bExp == 0 ) {
130762306a36Sopenharmony_ci        if ( bSig == 0 ) {
130862306a36Sopenharmony_ci            if ( ( aExp | aSig ) == 0 ) {
130962306a36Sopenharmony_ci                roundData->exception |= float_flag_invalid;
131062306a36Sopenharmony_ci                return float32_default_nan;
131162306a36Sopenharmony_ci            }
131262306a36Sopenharmony_ci            roundData->exception |= float_flag_divbyzero;
131362306a36Sopenharmony_ci            return packFloat32( zSign, 0xFF, 0 );
131462306a36Sopenharmony_ci        }
131562306a36Sopenharmony_ci        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
131662306a36Sopenharmony_ci    }
131762306a36Sopenharmony_ci    if ( aExp == 0 ) {
131862306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
131962306a36Sopenharmony_ci        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
132062306a36Sopenharmony_ci    }
132162306a36Sopenharmony_ci    zExp = aExp - bExp + 0x7D;
132262306a36Sopenharmony_ci    aSig = ( aSig | 0x00800000 )<<7;
132362306a36Sopenharmony_ci    bSig = ( bSig | 0x00800000 )<<8;
132462306a36Sopenharmony_ci    if ( bSig <= ( aSig + aSig ) ) {
132562306a36Sopenharmony_ci        aSig >>= 1;
132662306a36Sopenharmony_ci        ++zExp;
132762306a36Sopenharmony_ci    }
132862306a36Sopenharmony_ci    {
132962306a36Sopenharmony_ci        bits64 tmp = ( (bits64) aSig )<<32;
133062306a36Sopenharmony_ci        do_div( tmp, bSig );
133162306a36Sopenharmony_ci        zSig = tmp;
133262306a36Sopenharmony_ci    }
133362306a36Sopenharmony_ci    if ( ( zSig & 0x3F ) == 0 ) {
133462306a36Sopenharmony_ci        zSig |= ( ( (bits64) bSig ) * zSig != ( (bits64) aSig )<<32 );
133562306a36Sopenharmony_ci    }
133662306a36Sopenharmony_ci    return roundAndPackFloat32( roundData, zSign, zExp, zSig );
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci}
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci/*
134162306a36Sopenharmony_ci-------------------------------------------------------------------------------
134262306a36Sopenharmony_ciReturns the remainder of the single-precision floating-point value `a'
134362306a36Sopenharmony_ciwith respect to the corresponding value `b'.  The operation is performed
134462306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
134562306a36Sopenharmony_ci-------------------------------------------------------------------------------
134662306a36Sopenharmony_ci*/
134762306a36Sopenharmony_cifloat32 float32_rem( struct roundingData *roundData, float32 a, float32 b )
134862306a36Sopenharmony_ci{
134962306a36Sopenharmony_ci    flag aSign, bSign, zSign;
135062306a36Sopenharmony_ci    int16 aExp, bExp, expDiff;
135162306a36Sopenharmony_ci    bits32 aSig, bSig;
135262306a36Sopenharmony_ci    bits32 q;
135362306a36Sopenharmony_ci    bits64 aSig64, bSig64, q64;
135462306a36Sopenharmony_ci    bits32 alternateASig;
135562306a36Sopenharmony_ci    sbits32 sigMean;
135662306a36Sopenharmony_ci
135762306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
135862306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
135962306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
136062306a36Sopenharmony_ci    bSig = extractFloat32Frac( b );
136162306a36Sopenharmony_ci    bExp = extractFloat32Exp( b );
136262306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
136362306a36Sopenharmony_ci    if ( aExp == 0xFF ) {
136462306a36Sopenharmony_ci        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
136562306a36Sopenharmony_ci            return propagateFloat32NaN( a, b );
136662306a36Sopenharmony_ci        }
136762306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
136862306a36Sopenharmony_ci        return float32_default_nan;
136962306a36Sopenharmony_ci    }
137062306a36Sopenharmony_ci    if ( bExp == 0xFF ) {
137162306a36Sopenharmony_ci        if ( bSig ) return propagateFloat32NaN( a, b );
137262306a36Sopenharmony_ci        return a;
137362306a36Sopenharmony_ci    }
137462306a36Sopenharmony_ci    if ( bExp == 0 ) {
137562306a36Sopenharmony_ci        if ( bSig == 0 ) {
137662306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
137762306a36Sopenharmony_ci            return float32_default_nan;
137862306a36Sopenharmony_ci        }
137962306a36Sopenharmony_ci        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
138062306a36Sopenharmony_ci    }
138162306a36Sopenharmony_ci    if ( aExp == 0 ) {
138262306a36Sopenharmony_ci        if ( aSig == 0 ) return a;
138362306a36Sopenharmony_ci        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
138462306a36Sopenharmony_ci    }
138562306a36Sopenharmony_ci    expDiff = aExp - bExp;
138662306a36Sopenharmony_ci    aSig |= 0x00800000;
138762306a36Sopenharmony_ci    bSig |= 0x00800000;
138862306a36Sopenharmony_ci    if ( expDiff < 32 ) {
138962306a36Sopenharmony_ci        aSig <<= 8;
139062306a36Sopenharmony_ci        bSig <<= 8;
139162306a36Sopenharmony_ci        if ( expDiff < 0 ) {
139262306a36Sopenharmony_ci            if ( expDiff < -1 ) return a;
139362306a36Sopenharmony_ci            aSig >>= 1;
139462306a36Sopenharmony_ci        }
139562306a36Sopenharmony_ci        q = ( bSig <= aSig );
139662306a36Sopenharmony_ci        if ( q ) aSig -= bSig;
139762306a36Sopenharmony_ci        if ( 0 < expDiff ) {
139862306a36Sopenharmony_ci            bits64 tmp = ( (bits64) aSig )<<32;
139962306a36Sopenharmony_ci            do_div( tmp, bSig );
140062306a36Sopenharmony_ci            q = tmp;
140162306a36Sopenharmony_ci            q >>= 32 - expDiff;
140262306a36Sopenharmony_ci            bSig >>= 2;
140362306a36Sopenharmony_ci            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
140462306a36Sopenharmony_ci        }
140562306a36Sopenharmony_ci        else {
140662306a36Sopenharmony_ci            aSig >>= 2;
140762306a36Sopenharmony_ci            bSig >>= 2;
140862306a36Sopenharmony_ci        }
140962306a36Sopenharmony_ci    }
141062306a36Sopenharmony_ci    else {
141162306a36Sopenharmony_ci        if ( bSig <= aSig ) aSig -= bSig;
141262306a36Sopenharmony_ci        aSig64 = ( (bits64) aSig )<<40;
141362306a36Sopenharmony_ci        bSig64 = ( (bits64) bSig )<<40;
141462306a36Sopenharmony_ci        expDiff -= 64;
141562306a36Sopenharmony_ci        while ( 0 < expDiff ) {
141662306a36Sopenharmony_ci            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
141762306a36Sopenharmony_ci            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
141862306a36Sopenharmony_ci            aSig64 = - ( ( bSig * q64 )<<38 );
141962306a36Sopenharmony_ci            expDiff -= 62;
142062306a36Sopenharmony_ci        }
142162306a36Sopenharmony_ci        expDiff += 64;
142262306a36Sopenharmony_ci        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
142362306a36Sopenharmony_ci        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
142462306a36Sopenharmony_ci        q = q64>>( 64 - expDiff );
142562306a36Sopenharmony_ci        bSig <<= 6;
142662306a36Sopenharmony_ci        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
142762306a36Sopenharmony_ci    }
142862306a36Sopenharmony_ci    do {
142962306a36Sopenharmony_ci        alternateASig = aSig;
143062306a36Sopenharmony_ci        ++q;
143162306a36Sopenharmony_ci        aSig -= bSig;
143262306a36Sopenharmony_ci    } while ( 0 <= (sbits32) aSig );
143362306a36Sopenharmony_ci    sigMean = aSig + alternateASig;
143462306a36Sopenharmony_ci    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
143562306a36Sopenharmony_ci        aSig = alternateASig;
143662306a36Sopenharmony_ci    }
143762306a36Sopenharmony_ci    zSign = ( (sbits32) aSig < 0 );
143862306a36Sopenharmony_ci    if ( zSign ) aSig = - aSig;
143962306a36Sopenharmony_ci    return normalizeRoundAndPackFloat32( roundData, aSign ^ zSign, bExp, aSig );
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci}
144262306a36Sopenharmony_ci
144362306a36Sopenharmony_ci/*
144462306a36Sopenharmony_ci-------------------------------------------------------------------------------
144562306a36Sopenharmony_ciReturns the square root of the single-precision floating-point value `a'.
144662306a36Sopenharmony_ciThe operation is performed according to the IEC/IEEE Standard for Binary
144762306a36Sopenharmony_ciFloating-point Arithmetic.
144862306a36Sopenharmony_ci-------------------------------------------------------------------------------
144962306a36Sopenharmony_ci*/
145062306a36Sopenharmony_cifloat32 float32_sqrt( struct roundingData *roundData, float32 a )
145162306a36Sopenharmony_ci{
145262306a36Sopenharmony_ci    flag aSign;
145362306a36Sopenharmony_ci    int16 aExp, zExp;
145462306a36Sopenharmony_ci    bits32 aSig, zSig;
145562306a36Sopenharmony_ci    bits64 rem, term;
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci    aSig = extractFloat32Frac( a );
145862306a36Sopenharmony_ci    aExp = extractFloat32Exp( a );
145962306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
146062306a36Sopenharmony_ci    if ( aExp == 0xFF ) {
146162306a36Sopenharmony_ci        if ( aSig ) return propagateFloat32NaN( a, 0 );
146262306a36Sopenharmony_ci        if ( ! aSign ) return a;
146362306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
146462306a36Sopenharmony_ci        return float32_default_nan;
146562306a36Sopenharmony_ci    }
146662306a36Sopenharmony_ci    if ( aSign ) {
146762306a36Sopenharmony_ci        if ( ( aExp | aSig ) == 0 ) return a;
146862306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
146962306a36Sopenharmony_ci        return float32_default_nan;
147062306a36Sopenharmony_ci    }
147162306a36Sopenharmony_ci    if ( aExp == 0 ) {
147262306a36Sopenharmony_ci        if ( aSig == 0 ) return 0;
147362306a36Sopenharmony_ci        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
147462306a36Sopenharmony_ci    }
147562306a36Sopenharmony_ci    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
147662306a36Sopenharmony_ci    aSig = ( aSig | 0x00800000 )<<8;
147762306a36Sopenharmony_ci    zSig = estimateSqrt32( aExp, aSig ) + 2;
147862306a36Sopenharmony_ci    if ( ( zSig & 0x7F ) <= 5 ) {
147962306a36Sopenharmony_ci        if ( zSig < 2 ) {
148062306a36Sopenharmony_ci            zSig = 0xFFFFFFFF;
148162306a36Sopenharmony_ci        }
148262306a36Sopenharmony_ci        else {
148362306a36Sopenharmony_ci            aSig >>= aExp & 1;
148462306a36Sopenharmony_ci            term = ( (bits64) zSig ) * zSig;
148562306a36Sopenharmony_ci            rem = ( ( (bits64) aSig )<<32 ) - term;
148662306a36Sopenharmony_ci            while ( (sbits64) rem < 0 ) {
148762306a36Sopenharmony_ci                --zSig;
148862306a36Sopenharmony_ci                rem += ( ( (bits64) zSig )<<1 ) | 1;
148962306a36Sopenharmony_ci            }
149062306a36Sopenharmony_ci            zSig |= ( rem != 0 );
149162306a36Sopenharmony_ci        }
149262306a36Sopenharmony_ci    }
149362306a36Sopenharmony_ci    shift32RightJamming( zSig, 1, &zSig );
149462306a36Sopenharmony_ci    return roundAndPackFloat32( roundData, 0, zExp, zSig );
149562306a36Sopenharmony_ci
149662306a36Sopenharmony_ci}
149762306a36Sopenharmony_ci
149862306a36Sopenharmony_ci/*
149962306a36Sopenharmony_ci-------------------------------------------------------------------------------
150062306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is equal to the
150162306a36Sopenharmony_cicorresponding value `b', and 0 otherwise.  The comparison is performed
150262306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
150362306a36Sopenharmony_ci-------------------------------------------------------------------------------
150462306a36Sopenharmony_ci*/
150562306a36Sopenharmony_ciflag float32_eq( float32 a, float32 b )
150662306a36Sopenharmony_ci{
150762306a36Sopenharmony_ci
150862306a36Sopenharmony_ci    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
150962306a36Sopenharmony_ci         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
151062306a36Sopenharmony_ci       ) {
151162306a36Sopenharmony_ci        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
151262306a36Sopenharmony_ci            float_raise( float_flag_invalid );
151362306a36Sopenharmony_ci        }
151462306a36Sopenharmony_ci        return 0;
151562306a36Sopenharmony_ci    }
151662306a36Sopenharmony_ci    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_ci}
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_ci/*
152162306a36Sopenharmony_ci-------------------------------------------------------------------------------
152262306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is less than or
152362306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise.  The comparison is
152462306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
152562306a36Sopenharmony_ciArithmetic.
152662306a36Sopenharmony_ci-------------------------------------------------------------------------------
152762306a36Sopenharmony_ci*/
152862306a36Sopenharmony_ciflag float32_le( float32 a, float32 b )
152962306a36Sopenharmony_ci{
153062306a36Sopenharmony_ci    flag aSign, bSign;
153162306a36Sopenharmony_ci
153262306a36Sopenharmony_ci    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
153362306a36Sopenharmony_ci         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
153462306a36Sopenharmony_ci       ) {
153562306a36Sopenharmony_ci        float_raise( float_flag_invalid );
153662306a36Sopenharmony_ci        return 0;
153762306a36Sopenharmony_ci    }
153862306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
153962306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
154062306a36Sopenharmony_ci    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
154162306a36Sopenharmony_ci    return ( a == b ) || ( aSign ^ ( a < b ) );
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci}
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_ci/*
154662306a36Sopenharmony_ci-------------------------------------------------------------------------------
154762306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is less than
154862306a36Sopenharmony_cithe corresponding value `b', and 0 otherwise.  The comparison is performed
154962306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
155062306a36Sopenharmony_ci-------------------------------------------------------------------------------
155162306a36Sopenharmony_ci*/
155262306a36Sopenharmony_ciflag float32_lt( float32 a, float32 b )
155362306a36Sopenharmony_ci{
155462306a36Sopenharmony_ci    flag aSign, bSign;
155562306a36Sopenharmony_ci
155662306a36Sopenharmony_ci    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
155762306a36Sopenharmony_ci         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
155862306a36Sopenharmony_ci       ) {
155962306a36Sopenharmony_ci        float_raise( float_flag_invalid );
156062306a36Sopenharmony_ci        return 0;
156162306a36Sopenharmony_ci    }
156262306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
156362306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
156462306a36Sopenharmony_ci    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
156562306a36Sopenharmony_ci    return ( a != b ) && ( aSign ^ ( a < b ) );
156662306a36Sopenharmony_ci
156762306a36Sopenharmony_ci}
156862306a36Sopenharmony_ci
156962306a36Sopenharmony_ci/*
157062306a36Sopenharmony_ci-------------------------------------------------------------------------------
157162306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is equal to the
157262306a36Sopenharmony_cicorresponding value `b', and 0 otherwise.  The invalid exception is raised
157362306a36Sopenharmony_ciif either operand is a NaN.  Otherwise, the comparison is performed
157462306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
157562306a36Sopenharmony_ci-------------------------------------------------------------------------------
157662306a36Sopenharmony_ci*/
157762306a36Sopenharmony_ciflag float32_eq_signaling( float32 a, float32 b )
157862306a36Sopenharmony_ci{
157962306a36Sopenharmony_ci
158062306a36Sopenharmony_ci    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
158162306a36Sopenharmony_ci         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
158262306a36Sopenharmony_ci       ) {
158362306a36Sopenharmony_ci        float_raise( float_flag_invalid );
158462306a36Sopenharmony_ci        return 0;
158562306a36Sopenharmony_ci    }
158662306a36Sopenharmony_ci    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci}
158962306a36Sopenharmony_ci
159062306a36Sopenharmony_ci/*
159162306a36Sopenharmony_ci-------------------------------------------------------------------------------
159262306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is less than or
159362306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
159462306a36Sopenharmony_cicause an exception.  Otherwise, the comparison is performed according to the
159562306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic.
159662306a36Sopenharmony_ci-------------------------------------------------------------------------------
159762306a36Sopenharmony_ci*/
159862306a36Sopenharmony_ciflag float32_le_quiet( float32 a, float32 b )
159962306a36Sopenharmony_ci{
160062306a36Sopenharmony_ci    flag aSign, bSign;
160162306a36Sopenharmony_ci    //int16 aExp, bExp;
160262306a36Sopenharmony_ci
160362306a36Sopenharmony_ci    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
160462306a36Sopenharmony_ci         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
160562306a36Sopenharmony_ci       ) {
160662306a36Sopenharmony_ci        /* Do nothing, even if NaN as we're quiet */
160762306a36Sopenharmony_ci        return 0;
160862306a36Sopenharmony_ci    }
160962306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
161062306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
161162306a36Sopenharmony_ci    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
161262306a36Sopenharmony_ci    return ( a == b ) || ( aSign ^ ( a < b ) );
161362306a36Sopenharmony_ci
161462306a36Sopenharmony_ci}
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci/*
161762306a36Sopenharmony_ci-------------------------------------------------------------------------------
161862306a36Sopenharmony_ciReturns 1 if the single-precision floating-point value `a' is less than
161962306a36Sopenharmony_cithe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
162062306a36Sopenharmony_ciexception.  Otherwise, the comparison is performed according to the IEC/IEEE
162162306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic.
162262306a36Sopenharmony_ci-------------------------------------------------------------------------------
162362306a36Sopenharmony_ci*/
162462306a36Sopenharmony_ciflag float32_lt_quiet( float32 a, float32 b )
162562306a36Sopenharmony_ci{
162662306a36Sopenharmony_ci    flag aSign, bSign;
162762306a36Sopenharmony_ci
162862306a36Sopenharmony_ci    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
162962306a36Sopenharmony_ci         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
163062306a36Sopenharmony_ci       ) {
163162306a36Sopenharmony_ci        /* Do nothing, even if NaN as we're quiet */
163262306a36Sopenharmony_ci        return 0;
163362306a36Sopenharmony_ci    }
163462306a36Sopenharmony_ci    aSign = extractFloat32Sign( a );
163562306a36Sopenharmony_ci    bSign = extractFloat32Sign( b );
163662306a36Sopenharmony_ci    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
163762306a36Sopenharmony_ci    return ( a != b ) && ( aSign ^ ( a < b ) );
163862306a36Sopenharmony_ci
163962306a36Sopenharmony_ci}
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci/*
164262306a36Sopenharmony_ci-------------------------------------------------------------------------------
164362306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value
164462306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format.  The conversion is
164562306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
164662306a36Sopenharmony_ciArithmetic---which means in particular that the conversion is rounded
164762306a36Sopenharmony_ciaccording to the current rounding mode.  If `a' is a NaN, the largest
164862306a36Sopenharmony_cipositive integer is returned.  Otherwise, if the conversion overflows, the
164962306a36Sopenharmony_cilargest integer with the same sign as `a' is returned.
165062306a36Sopenharmony_ci-------------------------------------------------------------------------------
165162306a36Sopenharmony_ci*/
165262306a36Sopenharmony_ciint32 float64_to_int32( struct roundingData *roundData, float64 a )
165362306a36Sopenharmony_ci{
165462306a36Sopenharmony_ci    flag aSign;
165562306a36Sopenharmony_ci    int16 aExp, shiftCount;
165662306a36Sopenharmony_ci    bits64 aSig;
165762306a36Sopenharmony_ci
165862306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
165962306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
166062306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
166162306a36Sopenharmony_ci    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
166262306a36Sopenharmony_ci    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
166362306a36Sopenharmony_ci    shiftCount = 0x42C - aExp;
166462306a36Sopenharmony_ci    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
166562306a36Sopenharmony_ci    return roundAndPackInt32( roundData, aSign, aSig );
166662306a36Sopenharmony_ci
166762306a36Sopenharmony_ci}
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_ci/*
167062306a36Sopenharmony_ci-------------------------------------------------------------------------------
167162306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value
167262306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format.  The conversion is
167362306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
167462306a36Sopenharmony_ciArithmetic, except that the conversion is always rounded toward zero.  If
167562306a36Sopenharmony_ci`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
167662306a36Sopenharmony_ciconversion overflows, the largest integer with the same sign as `a' is
167762306a36Sopenharmony_cireturned.
167862306a36Sopenharmony_ci-------------------------------------------------------------------------------
167962306a36Sopenharmony_ci*/
168062306a36Sopenharmony_ciint32 float64_to_int32_round_to_zero( float64 a )
168162306a36Sopenharmony_ci{
168262306a36Sopenharmony_ci    flag aSign;
168362306a36Sopenharmony_ci    int16 aExp, shiftCount;
168462306a36Sopenharmony_ci    bits64 aSig, savedASig;
168562306a36Sopenharmony_ci    int32 z;
168662306a36Sopenharmony_ci
168762306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
168862306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
168962306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
169062306a36Sopenharmony_ci    shiftCount = 0x433 - aExp;
169162306a36Sopenharmony_ci    if ( shiftCount < 21 ) {
169262306a36Sopenharmony_ci        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
169362306a36Sopenharmony_ci        goto invalid;
169462306a36Sopenharmony_ci    }
169562306a36Sopenharmony_ci    else if ( 52 < shiftCount ) {
169662306a36Sopenharmony_ci        if ( aExp || aSig ) float_raise( float_flag_inexact );
169762306a36Sopenharmony_ci        return 0;
169862306a36Sopenharmony_ci    }
169962306a36Sopenharmony_ci    aSig |= LIT64( 0x0010000000000000 );
170062306a36Sopenharmony_ci    savedASig = aSig;
170162306a36Sopenharmony_ci    aSig >>= shiftCount;
170262306a36Sopenharmony_ci    z = aSig;
170362306a36Sopenharmony_ci    if ( aSign ) z = - z;
170462306a36Sopenharmony_ci    if ( ( z < 0 ) ^ aSign ) {
170562306a36Sopenharmony_ci invalid:
170662306a36Sopenharmony_ci        float_raise( float_flag_invalid );
170762306a36Sopenharmony_ci        return aSign ? 0x80000000 : 0x7FFFFFFF;
170862306a36Sopenharmony_ci    }
170962306a36Sopenharmony_ci    if ( ( aSig<<shiftCount ) != savedASig ) {
171062306a36Sopenharmony_ci        float_raise( float_flag_inexact );
171162306a36Sopenharmony_ci    }
171262306a36Sopenharmony_ci    return z;
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci}
171562306a36Sopenharmony_ci
171662306a36Sopenharmony_ci/*
171762306a36Sopenharmony_ci-------------------------------------------------------------------------------
171862306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value
171962306a36Sopenharmony_ci`a' to the 32-bit two's complement unsigned integer format.  The conversion
172062306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point
172162306a36Sopenharmony_ciArithmetic---which means in particular that the conversion is rounded
172262306a36Sopenharmony_ciaccording to the current rounding mode.  If `a' is a NaN, the largest
172362306a36Sopenharmony_cipositive integer is returned.  Otherwise, if the conversion overflows, the
172462306a36Sopenharmony_cilargest positive integer is returned.
172562306a36Sopenharmony_ci-------------------------------------------------------------------------------
172662306a36Sopenharmony_ci*/
172762306a36Sopenharmony_ciint32 float64_to_uint32( struct roundingData *roundData, float64 a )
172862306a36Sopenharmony_ci{
172962306a36Sopenharmony_ci    flag aSign;
173062306a36Sopenharmony_ci    int16 aExp, shiftCount;
173162306a36Sopenharmony_ci    bits64 aSig;
173262306a36Sopenharmony_ci
173362306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
173462306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
173562306a36Sopenharmony_ci    aSign = 0; //extractFloat64Sign( a );
173662306a36Sopenharmony_ci    //if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
173762306a36Sopenharmony_ci    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
173862306a36Sopenharmony_ci    shiftCount = 0x42C - aExp;
173962306a36Sopenharmony_ci    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
174062306a36Sopenharmony_ci    return roundAndPackInt32( roundData, aSign, aSig );
174162306a36Sopenharmony_ci}
174262306a36Sopenharmony_ci
174362306a36Sopenharmony_ci/*
174462306a36Sopenharmony_ci-------------------------------------------------------------------------------
174562306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value
174662306a36Sopenharmony_ci`a' to the 32-bit two's complement integer format.  The conversion is
174762306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
174862306a36Sopenharmony_ciArithmetic, except that the conversion is always rounded toward zero.  If
174962306a36Sopenharmony_ci`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
175062306a36Sopenharmony_ciconversion overflows, the largest positive integer is returned.
175162306a36Sopenharmony_ci-------------------------------------------------------------------------------
175262306a36Sopenharmony_ci*/
175362306a36Sopenharmony_ciint32 float64_to_uint32_round_to_zero( float64 a )
175462306a36Sopenharmony_ci{
175562306a36Sopenharmony_ci    flag aSign;
175662306a36Sopenharmony_ci    int16 aExp, shiftCount;
175762306a36Sopenharmony_ci    bits64 aSig, savedASig;
175862306a36Sopenharmony_ci    int32 z;
175962306a36Sopenharmony_ci
176062306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
176162306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
176262306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
176362306a36Sopenharmony_ci    shiftCount = 0x433 - aExp;
176462306a36Sopenharmony_ci    if ( shiftCount < 21 ) {
176562306a36Sopenharmony_ci        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
176662306a36Sopenharmony_ci        goto invalid;
176762306a36Sopenharmony_ci    }
176862306a36Sopenharmony_ci    else if ( 52 < shiftCount ) {
176962306a36Sopenharmony_ci        if ( aExp || aSig ) float_raise( float_flag_inexact );
177062306a36Sopenharmony_ci        return 0;
177162306a36Sopenharmony_ci    }
177262306a36Sopenharmony_ci    aSig |= LIT64( 0x0010000000000000 );
177362306a36Sopenharmony_ci    savedASig = aSig;
177462306a36Sopenharmony_ci    aSig >>= shiftCount;
177562306a36Sopenharmony_ci    z = aSig;
177662306a36Sopenharmony_ci    if ( aSign ) z = - z;
177762306a36Sopenharmony_ci    if ( ( z < 0 ) ^ aSign ) {
177862306a36Sopenharmony_ci invalid:
177962306a36Sopenharmony_ci        float_raise( float_flag_invalid );
178062306a36Sopenharmony_ci        return aSign ? 0x80000000 : 0x7FFFFFFF;
178162306a36Sopenharmony_ci    }
178262306a36Sopenharmony_ci    if ( ( aSig<<shiftCount ) != savedASig ) {
178362306a36Sopenharmony_ci        float_raise( float_flag_inexact );
178462306a36Sopenharmony_ci    }
178562306a36Sopenharmony_ci    return z;
178662306a36Sopenharmony_ci}
178762306a36Sopenharmony_ci
178862306a36Sopenharmony_ci/*
178962306a36Sopenharmony_ci-------------------------------------------------------------------------------
179062306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value
179162306a36Sopenharmony_ci`a' to the single-precision floating-point format.  The conversion is
179262306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
179362306a36Sopenharmony_ciArithmetic.
179462306a36Sopenharmony_ci-------------------------------------------------------------------------------
179562306a36Sopenharmony_ci*/
179662306a36Sopenharmony_cifloat32 float64_to_float32( struct roundingData *roundData, float64 a )
179762306a36Sopenharmony_ci{
179862306a36Sopenharmony_ci    flag aSign;
179962306a36Sopenharmony_ci    int16 aExp;
180062306a36Sopenharmony_ci    bits64 aSig;
180162306a36Sopenharmony_ci    bits32 zSig;
180262306a36Sopenharmony_ci
180362306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
180462306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
180562306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
180662306a36Sopenharmony_ci    if ( aExp == 0x7FF ) {
180762306a36Sopenharmony_ci        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
180862306a36Sopenharmony_ci        return packFloat32( aSign, 0xFF, 0 );
180962306a36Sopenharmony_ci    }
181062306a36Sopenharmony_ci    shift64RightJamming( aSig, 22, &aSig );
181162306a36Sopenharmony_ci    zSig = aSig;
181262306a36Sopenharmony_ci    if ( aExp || zSig ) {
181362306a36Sopenharmony_ci        zSig |= 0x40000000;
181462306a36Sopenharmony_ci        aExp -= 0x381;
181562306a36Sopenharmony_ci    }
181662306a36Sopenharmony_ci    return roundAndPackFloat32( roundData, aSign, aExp, zSig );
181762306a36Sopenharmony_ci
181862306a36Sopenharmony_ci}
181962306a36Sopenharmony_ci
182062306a36Sopenharmony_ci#ifdef FLOATX80
182162306a36Sopenharmony_ci
182262306a36Sopenharmony_ci/*
182362306a36Sopenharmony_ci-------------------------------------------------------------------------------
182462306a36Sopenharmony_ciReturns the result of converting the double-precision floating-point value
182562306a36Sopenharmony_ci`a' to the extended double-precision floating-point format.  The conversion
182662306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point
182762306a36Sopenharmony_ciArithmetic.
182862306a36Sopenharmony_ci-------------------------------------------------------------------------------
182962306a36Sopenharmony_ci*/
183062306a36Sopenharmony_cifloatx80 float64_to_floatx80( float64 a )
183162306a36Sopenharmony_ci{
183262306a36Sopenharmony_ci    flag aSign;
183362306a36Sopenharmony_ci    int16 aExp;
183462306a36Sopenharmony_ci    bits64 aSig;
183562306a36Sopenharmony_ci
183662306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
183762306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
183862306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
183962306a36Sopenharmony_ci    if ( aExp == 0x7FF ) {
184062306a36Sopenharmony_ci        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
184162306a36Sopenharmony_ci        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
184262306a36Sopenharmony_ci    }
184362306a36Sopenharmony_ci    if ( aExp == 0 ) {
184462306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
184562306a36Sopenharmony_ci        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
184662306a36Sopenharmony_ci    }
184762306a36Sopenharmony_ci    return
184862306a36Sopenharmony_ci        packFloatx80(
184962306a36Sopenharmony_ci            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_ci}
185262306a36Sopenharmony_ci
185362306a36Sopenharmony_ci#endif
185462306a36Sopenharmony_ci
185562306a36Sopenharmony_ci/*
185662306a36Sopenharmony_ci-------------------------------------------------------------------------------
185762306a36Sopenharmony_ciRounds the double-precision floating-point value `a' to an integer, and
185862306a36Sopenharmony_cireturns the result as a double-precision floating-point value.  The
185962306a36Sopenharmony_cioperation is performed according to the IEC/IEEE Standard for Binary
186062306a36Sopenharmony_ciFloating-point Arithmetic.
186162306a36Sopenharmony_ci-------------------------------------------------------------------------------
186262306a36Sopenharmony_ci*/
186362306a36Sopenharmony_cifloat64 float64_round_to_int( struct roundingData *roundData, float64 a )
186462306a36Sopenharmony_ci{
186562306a36Sopenharmony_ci    flag aSign;
186662306a36Sopenharmony_ci    int16 aExp;
186762306a36Sopenharmony_ci    bits64 lastBitMask, roundBitsMask;
186862306a36Sopenharmony_ci    int8 roundingMode;
186962306a36Sopenharmony_ci    float64 z;
187062306a36Sopenharmony_ci
187162306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
187262306a36Sopenharmony_ci    if ( 0x433 <= aExp ) {
187362306a36Sopenharmony_ci        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
187462306a36Sopenharmony_ci            return propagateFloat64NaN( a, a );
187562306a36Sopenharmony_ci        }
187662306a36Sopenharmony_ci        return a;
187762306a36Sopenharmony_ci    }
187862306a36Sopenharmony_ci    if ( aExp <= 0x3FE ) {
187962306a36Sopenharmony_ci        if ( (bits64) ( a<<1 ) == 0 ) return a;
188062306a36Sopenharmony_ci        roundData->exception |= float_flag_inexact;
188162306a36Sopenharmony_ci        aSign = extractFloat64Sign( a );
188262306a36Sopenharmony_ci        switch ( roundData->mode ) {
188362306a36Sopenharmony_ci         case float_round_nearest_even:
188462306a36Sopenharmony_ci            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
188562306a36Sopenharmony_ci                return packFloat64( aSign, 0x3FF, 0 );
188662306a36Sopenharmony_ci            }
188762306a36Sopenharmony_ci            break;
188862306a36Sopenharmony_ci         case float_round_down:
188962306a36Sopenharmony_ci            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
189062306a36Sopenharmony_ci         case float_round_up:
189162306a36Sopenharmony_ci            return
189262306a36Sopenharmony_ci            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
189362306a36Sopenharmony_ci        }
189462306a36Sopenharmony_ci        return packFloat64( aSign, 0, 0 );
189562306a36Sopenharmony_ci    }
189662306a36Sopenharmony_ci    lastBitMask = 1;
189762306a36Sopenharmony_ci    lastBitMask <<= 0x433 - aExp;
189862306a36Sopenharmony_ci    roundBitsMask = lastBitMask - 1;
189962306a36Sopenharmony_ci    z = a;
190062306a36Sopenharmony_ci    roundingMode = roundData->mode;
190162306a36Sopenharmony_ci    if ( roundingMode == float_round_nearest_even ) {
190262306a36Sopenharmony_ci        z += lastBitMask>>1;
190362306a36Sopenharmony_ci        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
190462306a36Sopenharmony_ci    }
190562306a36Sopenharmony_ci    else if ( roundingMode != float_round_to_zero ) {
190662306a36Sopenharmony_ci        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
190762306a36Sopenharmony_ci            z += roundBitsMask;
190862306a36Sopenharmony_ci        }
190962306a36Sopenharmony_ci    }
191062306a36Sopenharmony_ci    z &= ~ roundBitsMask;
191162306a36Sopenharmony_ci    if ( z != a ) roundData->exception |= float_flag_inexact;
191262306a36Sopenharmony_ci    return z;
191362306a36Sopenharmony_ci
191462306a36Sopenharmony_ci}
191562306a36Sopenharmony_ci
191662306a36Sopenharmony_ci/*
191762306a36Sopenharmony_ci-------------------------------------------------------------------------------
191862306a36Sopenharmony_ciReturns the result of adding the absolute values of the double-precision
191962306a36Sopenharmony_cifloating-point values `a' and `b'.  If `zSign' is true, the sum is negated
192062306a36Sopenharmony_cibefore being returned.  `zSign' is ignored if the result is a NaN.  The
192162306a36Sopenharmony_ciaddition is performed according to the IEC/IEEE Standard for Binary
192262306a36Sopenharmony_ciFloating-point Arithmetic.
192362306a36Sopenharmony_ci-------------------------------------------------------------------------------
192462306a36Sopenharmony_ci*/
192562306a36Sopenharmony_cistatic float64 addFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
192662306a36Sopenharmony_ci{
192762306a36Sopenharmony_ci    int16 aExp, bExp, zExp;
192862306a36Sopenharmony_ci    bits64 aSig, bSig, zSig;
192962306a36Sopenharmony_ci    int16 expDiff;
193062306a36Sopenharmony_ci
193162306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
193262306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
193362306a36Sopenharmony_ci    bSig = extractFloat64Frac( b );
193462306a36Sopenharmony_ci    bExp = extractFloat64Exp( b );
193562306a36Sopenharmony_ci    expDiff = aExp - bExp;
193662306a36Sopenharmony_ci    aSig <<= 9;
193762306a36Sopenharmony_ci    bSig <<= 9;
193862306a36Sopenharmony_ci    if ( 0 < expDiff ) {
193962306a36Sopenharmony_ci        if ( aExp == 0x7FF ) {
194062306a36Sopenharmony_ci            if ( aSig ) return propagateFloat64NaN( a, b );
194162306a36Sopenharmony_ci            return a;
194262306a36Sopenharmony_ci        }
194362306a36Sopenharmony_ci        if ( bExp == 0 ) {
194462306a36Sopenharmony_ci            --expDiff;
194562306a36Sopenharmony_ci        }
194662306a36Sopenharmony_ci        else {
194762306a36Sopenharmony_ci            bSig |= LIT64( 0x2000000000000000 );
194862306a36Sopenharmony_ci        }
194962306a36Sopenharmony_ci        shift64RightJamming( bSig, expDiff, &bSig );
195062306a36Sopenharmony_ci        zExp = aExp;
195162306a36Sopenharmony_ci    }
195262306a36Sopenharmony_ci    else if ( expDiff < 0 ) {
195362306a36Sopenharmony_ci        if ( bExp == 0x7FF ) {
195462306a36Sopenharmony_ci            if ( bSig ) return propagateFloat64NaN( a, b );
195562306a36Sopenharmony_ci            return packFloat64( zSign, 0x7FF, 0 );
195662306a36Sopenharmony_ci        }
195762306a36Sopenharmony_ci        if ( aExp == 0 ) {
195862306a36Sopenharmony_ci            ++expDiff;
195962306a36Sopenharmony_ci        }
196062306a36Sopenharmony_ci        else {
196162306a36Sopenharmony_ci            aSig |= LIT64( 0x2000000000000000 );
196262306a36Sopenharmony_ci        }
196362306a36Sopenharmony_ci        shift64RightJamming( aSig, - expDiff, &aSig );
196462306a36Sopenharmony_ci        zExp = bExp;
196562306a36Sopenharmony_ci    }
196662306a36Sopenharmony_ci    else {
196762306a36Sopenharmony_ci        if ( aExp == 0x7FF ) {
196862306a36Sopenharmony_ci            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
196962306a36Sopenharmony_ci            return a;
197062306a36Sopenharmony_ci        }
197162306a36Sopenharmony_ci        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
197262306a36Sopenharmony_ci        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
197362306a36Sopenharmony_ci        zExp = aExp;
197462306a36Sopenharmony_ci        goto roundAndPack;
197562306a36Sopenharmony_ci    }
197662306a36Sopenharmony_ci    aSig |= LIT64( 0x2000000000000000 );
197762306a36Sopenharmony_ci    zSig = ( aSig + bSig )<<1;
197862306a36Sopenharmony_ci    --zExp;
197962306a36Sopenharmony_ci    if ( (sbits64) zSig < 0 ) {
198062306a36Sopenharmony_ci        zSig = aSig + bSig;
198162306a36Sopenharmony_ci        ++zExp;
198262306a36Sopenharmony_ci    }
198362306a36Sopenharmony_ci roundAndPack:
198462306a36Sopenharmony_ci    return roundAndPackFloat64( roundData, zSign, zExp, zSig );
198562306a36Sopenharmony_ci
198662306a36Sopenharmony_ci}
198762306a36Sopenharmony_ci
198862306a36Sopenharmony_ci/*
198962306a36Sopenharmony_ci-------------------------------------------------------------------------------
199062306a36Sopenharmony_ciReturns the result of subtracting the absolute values of the double-
199162306a36Sopenharmony_ciprecision floating-point values `a' and `b'.  If `zSign' is true, the
199262306a36Sopenharmony_cidifference is negated before being returned.  `zSign' is ignored if the
199362306a36Sopenharmony_ciresult is a NaN.  The subtraction is performed according to the IEC/IEEE
199462306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic.
199562306a36Sopenharmony_ci-------------------------------------------------------------------------------
199662306a36Sopenharmony_ci*/
199762306a36Sopenharmony_cistatic float64 subFloat64Sigs( struct roundingData *roundData, float64 a, float64 b, flag zSign )
199862306a36Sopenharmony_ci{
199962306a36Sopenharmony_ci    int16 aExp, bExp, zExp;
200062306a36Sopenharmony_ci    bits64 aSig, bSig, zSig;
200162306a36Sopenharmony_ci    int16 expDiff;
200262306a36Sopenharmony_ci
200362306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
200462306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
200562306a36Sopenharmony_ci    bSig = extractFloat64Frac( b );
200662306a36Sopenharmony_ci    bExp = extractFloat64Exp( b );
200762306a36Sopenharmony_ci    expDiff = aExp - bExp;
200862306a36Sopenharmony_ci    aSig <<= 10;
200962306a36Sopenharmony_ci    bSig <<= 10;
201062306a36Sopenharmony_ci    if ( 0 < expDiff ) goto aExpBigger;
201162306a36Sopenharmony_ci    if ( expDiff < 0 ) goto bExpBigger;
201262306a36Sopenharmony_ci    if ( aExp == 0x7FF ) {
201362306a36Sopenharmony_ci        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
201462306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
201562306a36Sopenharmony_ci        return float64_default_nan;
201662306a36Sopenharmony_ci    }
201762306a36Sopenharmony_ci    if ( aExp == 0 ) {
201862306a36Sopenharmony_ci        aExp = 1;
201962306a36Sopenharmony_ci        bExp = 1;
202062306a36Sopenharmony_ci    }
202162306a36Sopenharmony_ci    if ( bSig < aSig ) goto aBigger;
202262306a36Sopenharmony_ci    if ( aSig < bSig ) goto bBigger;
202362306a36Sopenharmony_ci    return packFloat64( roundData->mode == float_round_down, 0, 0 );
202462306a36Sopenharmony_ci bExpBigger:
202562306a36Sopenharmony_ci    if ( bExp == 0x7FF ) {
202662306a36Sopenharmony_ci        if ( bSig ) return propagateFloat64NaN( a, b );
202762306a36Sopenharmony_ci        return packFloat64( zSign ^ 1, 0x7FF, 0 );
202862306a36Sopenharmony_ci    }
202962306a36Sopenharmony_ci    if ( aExp == 0 ) {
203062306a36Sopenharmony_ci        ++expDiff;
203162306a36Sopenharmony_ci    }
203262306a36Sopenharmony_ci    else {
203362306a36Sopenharmony_ci        aSig |= LIT64( 0x4000000000000000 );
203462306a36Sopenharmony_ci    }
203562306a36Sopenharmony_ci    shift64RightJamming( aSig, - expDiff, &aSig );
203662306a36Sopenharmony_ci    bSig |= LIT64( 0x4000000000000000 );
203762306a36Sopenharmony_ci bBigger:
203862306a36Sopenharmony_ci    zSig = bSig - aSig;
203962306a36Sopenharmony_ci    zExp = bExp;
204062306a36Sopenharmony_ci    zSign ^= 1;
204162306a36Sopenharmony_ci    goto normalizeRoundAndPack;
204262306a36Sopenharmony_ci aExpBigger:
204362306a36Sopenharmony_ci    if ( aExp == 0x7FF ) {
204462306a36Sopenharmony_ci        if ( aSig ) return propagateFloat64NaN( a, b );
204562306a36Sopenharmony_ci        return a;
204662306a36Sopenharmony_ci    }
204762306a36Sopenharmony_ci    if ( bExp == 0 ) {
204862306a36Sopenharmony_ci        --expDiff;
204962306a36Sopenharmony_ci    }
205062306a36Sopenharmony_ci    else {
205162306a36Sopenharmony_ci        bSig |= LIT64( 0x4000000000000000 );
205262306a36Sopenharmony_ci    }
205362306a36Sopenharmony_ci    shift64RightJamming( bSig, expDiff, &bSig );
205462306a36Sopenharmony_ci    aSig |= LIT64( 0x4000000000000000 );
205562306a36Sopenharmony_ci aBigger:
205662306a36Sopenharmony_ci    zSig = aSig - bSig;
205762306a36Sopenharmony_ci    zExp = aExp;
205862306a36Sopenharmony_ci normalizeRoundAndPack:
205962306a36Sopenharmony_ci    --zExp;
206062306a36Sopenharmony_ci    return normalizeRoundAndPackFloat64( roundData, zSign, zExp, zSig );
206162306a36Sopenharmony_ci
206262306a36Sopenharmony_ci}
206362306a36Sopenharmony_ci
206462306a36Sopenharmony_ci/*
206562306a36Sopenharmony_ci-------------------------------------------------------------------------------
206662306a36Sopenharmony_ciReturns the result of adding the double-precision floating-point values `a'
206762306a36Sopenharmony_ciand `b'.  The operation is performed according to the IEC/IEEE Standard for
206862306a36Sopenharmony_ciBinary Floating-point Arithmetic.
206962306a36Sopenharmony_ci-------------------------------------------------------------------------------
207062306a36Sopenharmony_ci*/
207162306a36Sopenharmony_cifloat64 float64_add( struct roundingData *roundData, float64 a, float64 b )
207262306a36Sopenharmony_ci{
207362306a36Sopenharmony_ci    flag aSign, bSign;
207462306a36Sopenharmony_ci
207562306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
207662306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
207762306a36Sopenharmony_ci    if ( aSign == bSign ) {
207862306a36Sopenharmony_ci        return addFloat64Sigs( roundData, a, b, aSign );
207962306a36Sopenharmony_ci    }
208062306a36Sopenharmony_ci    else {
208162306a36Sopenharmony_ci        return subFloat64Sigs( roundData, a, b, aSign );
208262306a36Sopenharmony_ci    }
208362306a36Sopenharmony_ci
208462306a36Sopenharmony_ci}
208562306a36Sopenharmony_ci
208662306a36Sopenharmony_ci/*
208762306a36Sopenharmony_ci-------------------------------------------------------------------------------
208862306a36Sopenharmony_ciReturns the result of subtracting the double-precision floating-point values
208962306a36Sopenharmony_ci`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
209062306a36Sopenharmony_cifor Binary Floating-point Arithmetic.
209162306a36Sopenharmony_ci-------------------------------------------------------------------------------
209262306a36Sopenharmony_ci*/
209362306a36Sopenharmony_cifloat64 float64_sub( struct roundingData *roundData, float64 a, float64 b )
209462306a36Sopenharmony_ci{
209562306a36Sopenharmony_ci    flag aSign, bSign;
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
209862306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
209962306a36Sopenharmony_ci    if ( aSign == bSign ) {
210062306a36Sopenharmony_ci        return subFloat64Sigs( roundData, a, b, aSign );
210162306a36Sopenharmony_ci    }
210262306a36Sopenharmony_ci    else {
210362306a36Sopenharmony_ci        return addFloat64Sigs( roundData, a, b, aSign );
210462306a36Sopenharmony_ci    }
210562306a36Sopenharmony_ci
210662306a36Sopenharmony_ci}
210762306a36Sopenharmony_ci
210862306a36Sopenharmony_ci/*
210962306a36Sopenharmony_ci-------------------------------------------------------------------------------
211062306a36Sopenharmony_ciReturns the result of multiplying the double-precision floating-point values
211162306a36Sopenharmony_ci`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
211262306a36Sopenharmony_cifor Binary Floating-point Arithmetic.
211362306a36Sopenharmony_ci-------------------------------------------------------------------------------
211462306a36Sopenharmony_ci*/
211562306a36Sopenharmony_cifloat64 float64_mul( struct roundingData *roundData, float64 a, float64 b )
211662306a36Sopenharmony_ci{
211762306a36Sopenharmony_ci    flag aSign, bSign, zSign;
211862306a36Sopenharmony_ci    int16 aExp, bExp, zExp;
211962306a36Sopenharmony_ci    bits64 aSig, bSig, zSig0, zSig1;
212062306a36Sopenharmony_ci
212162306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
212262306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
212362306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
212462306a36Sopenharmony_ci    bSig = extractFloat64Frac( b );
212562306a36Sopenharmony_ci    bExp = extractFloat64Exp( b );
212662306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
212762306a36Sopenharmony_ci    zSign = aSign ^ bSign;
212862306a36Sopenharmony_ci    if ( aExp == 0x7FF ) {
212962306a36Sopenharmony_ci        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
213062306a36Sopenharmony_ci            return propagateFloat64NaN( a, b );
213162306a36Sopenharmony_ci        }
213262306a36Sopenharmony_ci        if ( ( bExp | bSig ) == 0 ) {
213362306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
213462306a36Sopenharmony_ci            return float64_default_nan;
213562306a36Sopenharmony_ci        }
213662306a36Sopenharmony_ci        return packFloat64( zSign, 0x7FF, 0 );
213762306a36Sopenharmony_ci    }
213862306a36Sopenharmony_ci    if ( bExp == 0x7FF ) {
213962306a36Sopenharmony_ci        if ( bSig ) return propagateFloat64NaN( a, b );
214062306a36Sopenharmony_ci        if ( ( aExp | aSig ) == 0 ) {
214162306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
214262306a36Sopenharmony_ci            return float64_default_nan;
214362306a36Sopenharmony_ci        }
214462306a36Sopenharmony_ci        return packFloat64( zSign, 0x7FF, 0 );
214562306a36Sopenharmony_ci    }
214662306a36Sopenharmony_ci    if ( aExp == 0 ) {
214762306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
214862306a36Sopenharmony_ci        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
214962306a36Sopenharmony_ci    }
215062306a36Sopenharmony_ci    if ( bExp == 0 ) {
215162306a36Sopenharmony_ci        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
215262306a36Sopenharmony_ci        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
215362306a36Sopenharmony_ci    }
215462306a36Sopenharmony_ci    zExp = aExp + bExp - 0x3FF;
215562306a36Sopenharmony_ci    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
215662306a36Sopenharmony_ci    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
215762306a36Sopenharmony_ci    mul64To128( aSig, bSig, &zSig0, &zSig1 );
215862306a36Sopenharmony_ci    zSig0 |= ( zSig1 != 0 );
215962306a36Sopenharmony_ci    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
216062306a36Sopenharmony_ci        zSig0 <<= 1;
216162306a36Sopenharmony_ci        --zExp;
216262306a36Sopenharmony_ci    }
216362306a36Sopenharmony_ci    return roundAndPackFloat64( roundData, zSign, zExp, zSig0 );
216462306a36Sopenharmony_ci
216562306a36Sopenharmony_ci}
216662306a36Sopenharmony_ci
216762306a36Sopenharmony_ci/*
216862306a36Sopenharmony_ci-------------------------------------------------------------------------------
216962306a36Sopenharmony_ciReturns the result of dividing the double-precision floating-point value `a'
217062306a36Sopenharmony_ciby the corresponding value `b'.  The operation is performed according to
217162306a36Sopenharmony_cithe IEC/IEEE Standard for Binary Floating-point Arithmetic.
217262306a36Sopenharmony_ci-------------------------------------------------------------------------------
217362306a36Sopenharmony_ci*/
217462306a36Sopenharmony_cifloat64 float64_div( struct roundingData *roundData, float64 a, float64 b )
217562306a36Sopenharmony_ci{
217662306a36Sopenharmony_ci    flag aSign, bSign, zSign;
217762306a36Sopenharmony_ci    int16 aExp, bExp, zExp;
217862306a36Sopenharmony_ci    bits64 aSig, bSig, zSig;
217962306a36Sopenharmony_ci    bits64 rem0, rem1;
218062306a36Sopenharmony_ci    bits64 term0, term1;
218162306a36Sopenharmony_ci
218262306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
218362306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
218462306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
218562306a36Sopenharmony_ci    bSig = extractFloat64Frac( b );
218662306a36Sopenharmony_ci    bExp = extractFloat64Exp( b );
218762306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
218862306a36Sopenharmony_ci    zSign = aSign ^ bSign;
218962306a36Sopenharmony_ci    if ( aExp == 0x7FF ) {
219062306a36Sopenharmony_ci        if ( aSig ) return propagateFloat64NaN( a, b );
219162306a36Sopenharmony_ci        if ( bExp == 0x7FF ) {
219262306a36Sopenharmony_ci            if ( bSig ) return propagateFloat64NaN( a, b );
219362306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
219462306a36Sopenharmony_ci            return float64_default_nan;
219562306a36Sopenharmony_ci        }
219662306a36Sopenharmony_ci        return packFloat64( zSign, 0x7FF, 0 );
219762306a36Sopenharmony_ci    }
219862306a36Sopenharmony_ci    if ( bExp == 0x7FF ) {
219962306a36Sopenharmony_ci        if ( bSig ) return propagateFloat64NaN( a, b );
220062306a36Sopenharmony_ci        return packFloat64( zSign, 0, 0 );
220162306a36Sopenharmony_ci    }
220262306a36Sopenharmony_ci    if ( bExp == 0 ) {
220362306a36Sopenharmony_ci        if ( bSig == 0 ) {
220462306a36Sopenharmony_ci            if ( ( aExp | aSig ) == 0 ) {
220562306a36Sopenharmony_ci                roundData->exception |= float_flag_invalid;
220662306a36Sopenharmony_ci                return float64_default_nan;
220762306a36Sopenharmony_ci            }
220862306a36Sopenharmony_ci            roundData->exception |= float_flag_divbyzero;
220962306a36Sopenharmony_ci            return packFloat64( zSign, 0x7FF, 0 );
221062306a36Sopenharmony_ci        }
221162306a36Sopenharmony_ci        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
221262306a36Sopenharmony_ci    }
221362306a36Sopenharmony_ci    if ( aExp == 0 ) {
221462306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
221562306a36Sopenharmony_ci        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
221662306a36Sopenharmony_ci    }
221762306a36Sopenharmony_ci    zExp = aExp - bExp + 0x3FD;
221862306a36Sopenharmony_ci    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
221962306a36Sopenharmony_ci    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
222062306a36Sopenharmony_ci    if ( bSig <= ( aSig + aSig ) ) {
222162306a36Sopenharmony_ci        aSig >>= 1;
222262306a36Sopenharmony_ci        ++zExp;
222362306a36Sopenharmony_ci    }
222462306a36Sopenharmony_ci    zSig = estimateDiv128To64( aSig, 0, bSig );
222562306a36Sopenharmony_ci    if ( ( zSig & 0x1FF ) <= 2 ) {
222662306a36Sopenharmony_ci        mul64To128( bSig, zSig, &term0, &term1 );
222762306a36Sopenharmony_ci        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
222862306a36Sopenharmony_ci        while ( (sbits64) rem0 < 0 ) {
222962306a36Sopenharmony_ci            --zSig;
223062306a36Sopenharmony_ci            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
223162306a36Sopenharmony_ci        }
223262306a36Sopenharmony_ci        zSig |= ( rem1 != 0 );
223362306a36Sopenharmony_ci    }
223462306a36Sopenharmony_ci    return roundAndPackFloat64( roundData, zSign, zExp, zSig );
223562306a36Sopenharmony_ci
223662306a36Sopenharmony_ci}
223762306a36Sopenharmony_ci
223862306a36Sopenharmony_ci/*
223962306a36Sopenharmony_ci-------------------------------------------------------------------------------
224062306a36Sopenharmony_ciReturns the remainder of the double-precision floating-point value `a'
224162306a36Sopenharmony_ciwith respect to the corresponding value `b'.  The operation is performed
224262306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
224362306a36Sopenharmony_ci-------------------------------------------------------------------------------
224462306a36Sopenharmony_ci*/
224562306a36Sopenharmony_cifloat64 float64_rem( struct roundingData *roundData, float64 a, float64 b )
224662306a36Sopenharmony_ci{
224762306a36Sopenharmony_ci    flag aSign, bSign, zSign;
224862306a36Sopenharmony_ci    int16 aExp, bExp, expDiff;
224962306a36Sopenharmony_ci    bits64 aSig, bSig;
225062306a36Sopenharmony_ci    bits64 q, alternateASig;
225162306a36Sopenharmony_ci    sbits64 sigMean;
225262306a36Sopenharmony_ci
225362306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
225462306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
225562306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
225662306a36Sopenharmony_ci    bSig = extractFloat64Frac( b );
225762306a36Sopenharmony_ci    bExp = extractFloat64Exp( b );
225862306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
225962306a36Sopenharmony_ci    if ( aExp == 0x7FF ) {
226062306a36Sopenharmony_ci        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
226162306a36Sopenharmony_ci            return propagateFloat64NaN( a, b );
226262306a36Sopenharmony_ci        }
226362306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
226462306a36Sopenharmony_ci        return float64_default_nan;
226562306a36Sopenharmony_ci    }
226662306a36Sopenharmony_ci    if ( bExp == 0x7FF ) {
226762306a36Sopenharmony_ci        if ( bSig ) return propagateFloat64NaN( a, b );
226862306a36Sopenharmony_ci        return a;
226962306a36Sopenharmony_ci    }
227062306a36Sopenharmony_ci    if ( bExp == 0 ) {
227162306a36Sopenharmony_ci        if ( bSig == 0 ) {
227262306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
227362306a36Sopenharmony_ci            return float64_default_nan;
227462306a36Sopenharmony_ci        }
227562306a36Sopenharmony_ci        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
227662306a36Sopenharmony_ci    }
227762306a36Sopenharmony_ci    if ( aExp == 0 ) {
227862306a36Sopenharmony_ci        if ( aSig == 0 ) return a;
227962306a36Sopenharmony_ci        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
228062306a36Sopenharmony_ci    }
228162306a36Sopenharmony_ci    expDiff = aExp - bExp;
228262306a36Sopenharmony_ci    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
228362306a36Sopenharmony_ci    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
228462306a36Sopenharmony_ci    if ( expDiff < 0 ) {
228562306a36Sopenharmony_ci        if ( expDiff < -1 ) return a;
228662306a36Sopenharmony_ci        aSig >>= 1;
228762306a36Sopenharmony_ci    }
228862306a36Sopenharmony_ci    q = ( bSig <= aSig );
228962306a36Sopenharmony_ci    if ( q ) aSig -= bSig;
229062306a36Sopenharmony_ci    expDiff -= 64;
229162306a36Sopenharmony_ci    while ( 0 < expDiff ) {
229262306a36Sopenharmony_ci        q = estimateDiv128To64( aSig, 0, bSig );
229362306a36Sopenharmony_ci        q = ( 2 < q ) ? q - 2 : 0;
229462306a36Sopenharmony_ci        aSig = - ( ( bSig>>2 ) * q );
229562306a36Sopenharmony_ci        expDiff -= 62;
229662306a36Sopenharmony_ci    }
229762306a36Sopenharmony_ci    expDiff += 64;
229862306a36Sopenharmony_ci    if ( 0 < expDiff ) {
229962306a36Sopenharmony_ci        q = estimateDiv128To64( aSig, 0, bSig );
230062306a36Sopenharmony_ci        q = ( 2 < q ) ? q - 2 : 0;
230162306a36Sopenharmony_ci        q >>= 64 - expDiff;
230262306a36Sopenharmony_ci        bSig >>= 2;
230362306a36Sopenharmony_ci        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
230462306a36Sopenharmony_ci    }
230562306a36Sopenharmony_ci    else {
230662306a36Sopenharmony_ci        aSig >>= 2;
230762306a36Sopenharmony_ci        bSig >>= 2;
230862306a36Sopenharmony_ci    }
230962306a36Sopenharmony_ci    do {
231062306a36Sopenharmony_ci        alternateASig = aSig;
231162306a36Sopenharmony_ci        ++q;
231262306a36Sopenharmony_ci        aSig -= bSig;
231362306a36Sopenharmony_ci    } while ( 0 <= (sbits64) aSig );
231462306a36Sopenharmony_ci    sigMean = aSig + alternateASig;
231562306a36Sopenharmony_ci    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
231662306a36Sopenharmony_ci        aSig = alternateASig;
231762306a36Sopenharmony_ci    }
231862306a36Sopenharmony_ci    zSign = ( (sbits64) aSig < 0 );
231962306a36Sopenharmony_ci    if ( zSign ) aSig = - aSig;
232062306a36Sopenharmony_ci    return normalizeRoundAndPackFloat64( roundData, aSign ^ zSign, bExp, aSig );
232162306a36Sopenharmony_ci
232262306a36Sopenharmony_ci}
232362306a36Sopenharmony_ci
232462306a36Sopenharmony_ci/*
232562306a36Sopenharmony_ci-------------------------------------------------------------------------------
232662306a36Sopenharmony_ciReturns the square root of the double-precision floating-point value `a'.
232762306a36Sopenharmony_ciThe operation is performed according to the IEC/IEEE Standard for Binary
232862306a36Sopenharmony_ciFloating-point Arithmetic.
232962306a36Sopenharmony_ci-------------------------------------------------------------------------------
233062306a36Sopenharmony_ci*/
233162306a36Sopenharmony_cifloat64 float64_sqrt( struct roundingData *roundData, float64 a )
233262306a36Sopenharmony_ci{
233362306a36Sopenharmony_ci    flag aSign;
233462306a36Sopenharmony_ci    int16 aExp, zExp;
233562306a36Sopenharmony_ci    bits64 aSig, zSig;
233662306a36Sopenharmony_ci    bits64 rem0, rem1, term0, term1; //, shiftedRem;
233762306a36Sopenharmony_ci    //float64 z;
233862306a36Sopenharmony_ci
233962306a36Sopenharmony_ci    aSig = extractFloat64Frac( a );
234062306a36Sopenharmony_ci    aExp = extractFloat64Exp( a );
234162306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
234262306a36Sopenharmony_ci    if ( aExp == 0x7FF ) {
234362306a36Sopenharmony_ci        if ( aSig ) return propagateFloat64NaN( a, a );
234462306a36Sopenharmony_ci        if ( ! aSign ) return a;
234562306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
234662306a36Sopenharmony_ci        return float64_default_nan;
234762306a36Sopenharmony_ci    }
234862306a36Sopenharmony_ci    if ( aSign ) {
234962306a36Sopenharmony_ci        if ( ( aExp | aSig ) == 0 ) return a;
235062306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
235162306a36Sopenharmony_ci        return float64_default_nan;
235262306a36Sopenharmony_ci    }
235362306a36Sopenharmony_ci    if ( aExp == 0 ) {
235462306a36Sopenharmony_ci        if ( aSig == 0 ) return 0;
235562306a36Sopenharmony_ci        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
235662306a36Sopenharmony_ci    }
235762306a36Sopenharmony_ci    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
235862306a36Sopenharmony_ci    aSig |= LIT64( 0x0010000000000000 );
235962306a36Sopenharmony_ci    zSig = estimateSqrt32( aExp, aSig>>21 );
236062306a36Sopenharmony_ci    zSig <<= 31;
236162306a36Sopenharmony_ci    aSig <<= 9 - ( aExp & 1 );
236262306a36Sopenharmony_ci    zSig = estimateDiv128To64( aSig, 0, zSig ) + zSig + 2;
236362306a36Sopenharmony_ci    if ( ( zSig & 0x3FF ) <= 5 ) {
236462306a36Sopenharmony_ci        if ( zSig < 2 ) {
236562306a36Sopenharmony_ci            zSig = LIT64( 0xFFFFFFFFFFFFFFFF );
236662306a36Sopenharmony_ci        }
236762306a36Sopenharmony_ci        else {
236862306a36Sopenharmony_ci            aSig <<= 2;
236962306a36Sopenharmony_ci            mul64To128( zSig, zSig, &term0, &term1 );
237062306a36Sopenharmony_ci            sub128( aSig, 0, term0, term1, &rem0, &rem1 );
237162306a36Sopenharmony_ci            while ( (sbits64) rem0 < 0 ) {
237262306a36Sopenharmony_ci                --zSig;
237362306a36Sopenharmony_ci                shortShift128Left( 0, zSig, 1, &term0, &term1 );
237462306a36Sopenharmony_ci                term1 |= 1;
237562306a36Sopenharmony_ci                add128( rem0, rem1, term0, term1, &rem0, &rem1 );
237662306a36Sopenharmony_ci            }
237762306a36Sopenharmony_ci            zSig |= ( ( rem0 | rem1 ) != 0 );
237862306a36Sopenharmony_ci        }
237962306a36Sopenharmony_ci    }
238062306a36Sopenharmony_ci    shift64RightJamming( zSig, 1, &zSig );
238162306a36Sopenharmony_ci    return roundAndPackFloat64( roundData, 0, zExp, zSig );
238262306a36Sopenharmony_ci
238362306a36Sopenharmony_ci}
238462306a36Sopenharmony_ci
238562306a36Sopenharmony_ci/*
238662306a36Sopenharmony_ci-------------------------------------------------------------------------------
238762306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is equal to the
238862306a36Sopenharmony_cicorresponding value `b', and 0 otherwise.  The comparison is performed
238962306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
239062306a36Sopenharmony_ci-------------------------------------------------------------------------------
239162306a36Sopenharmony_ci*/
239262306a36Sopenharmony_ciflag float64_eq( float64 a, float64 b )
239362306a36Sopenharmony_ci{
239462306a36Sopenharmony_ci
239562306a36Sopenharmony_ci    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
239662306a36Sopenharmony_ci         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
239762306a36Sopenharmony_ci       ) {
239862306a36Sopenharmony_ci        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
239962306a36Sopenharmony_ci            float_raise( float_flag_invalid );
240062306a36Sopenharmony_ci        }
240162306a36Sopenharmony_ci        return 0;
240262306a36Sopenharmony_ci    }
240362306a36Sopenharmony_ci    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci}
240662306a36Sopenharmony_ci
240762306a36Sopenharmony_ci/*
240862306a36Sopenharmony_ci-------------------------------------------------------------------------------
240962306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is less than or
241062306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise.  The comparison is
241162306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
241262306a36Sopenharmony_ciArithmetic.
241362306a36Sopenharmony_ci-------------------------------------------------------------------------------
241462306a36Sopenharmony_ci*/
241562306a36Sopenharmony_ciflag float64_le( float64 a, float64 b )
241662306a36Sopenharmony_ci{
241762306a36Sopenharmony_ci    flag aSign, bSign;
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_ci    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
242062306a36Sopenharmony_ci         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
242162306a36Sopenharmony_ci       ) {
242262306a36Sopenharmony_ci        float_raise( float_flag_invalid );
242362306a36Sopenharmony_ci        return 0;
242462306a36Sopenharmony_ci    }
242562306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
242662306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
242762306a36Sopenharmony_ci    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
242862306a36Sopenharmony_ci    return ( a == b ) || ( aSign ^ ( a < b ) );
242962306a36Sopenharmony_ci
243062306a36Sopenharmony_ci}
243162306a36Sopenharmony_ci
243262306a36Sopenharmony_ci/*
243362306a36Sopenharmony_ci-------------------------------------------------------------------------------
243462306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is less than
243562306a36Sopenharmony_cithe corresponding value `b', and 0 otherwise.  The comparison is performed
243662306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
243762306a36Sopenharmony_ci-------------------------------------------------------------------------------
243862306a36Sopenharmony_ci*/
243962306a36Sopenharmony_ciflag float64_lt( float64 a, float64 b )
244062306a36Sopenharmony_ci{
244162306a36Sopenharmony_ci    flag aSign, bSign;
244262306a36Sopenharmony_ci
244362306a36Sopenharmony_ci    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
244462306a36Sopenharmony_ci         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
244562306a36Sopenharmony_ci       ) {
244662306a36Sopenharmony_ci        float_raise( float_flag_invalid );
244762306a36Sopenharmony_ci        return 0;
244862306a36Sopenharmony_ci    }
244962306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
245062306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
245162306a36Sopenharmony_ci    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
245262306a36Sopenharmony_ci    return ( a != b ) && ( aSign ^ ( a < b ) );
245362306a36Sopenharmony_ci
245462306a36Sopenharmony_ci}
245562306a36Sopenharmony_ci
245662306a36Sopenharmony_ci/*
245762306a36Sopenharmony_ci-------------------------------------------------------------------------------
245862306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is equal to the
245962306a36Sopenharmony_cicorresponding value `b', and 0 otherwise.  The invalid exception is raised
246062306a36Sopenharmony_ciif either operand is a NaN.  Otherwise, the comparison is performed
246162306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
246262306a36Sopenharmony_ci-------------------------------------------------------------------------------
246362306a36Sopenharmony_ci*/
246462306a36Sopenharmony_ciflag float64_eq_signaling( float64 a, float64 b )
246562306a36Sopenharmony_ci{
246662306a36Sopenharmony_ci
246762306a36Sopenharmony_ci    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
246862306a36Sopenharmony_ci         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
246962306a36Sopenharmony_ci       ) {
247062306a36Sopenharmony_ci        float_raise( float_flag_invalid );
247162306a36Sopenharmony_ci        return 0;
247262306a36Sopenharmony_ci    }
247362306a36Sopenharmony_ci    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
247462306a36Sopenharmony_ci
247562306a36Sopenharmony_ci}
247662306a36Sopenharmony_ci
247762306a36Sopenharmony_ci/*
247862306a36Sopenharmony_ci-------------------------------------------------------------------------------
247962306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is less than or
248062306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
248162306a36Sopenharmony_cicause an exception.  Otherwise, the comparison is performed according to the
248262306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic.
248362306a36Sopenharmony_ci-------------------------------------------------------------------------------
248462306a36Sopenharmony_ci*/
248562306a36Sopenharmony_ciflag float64_le_quiet( float64 a, float64 b )
248662306a36Sopenharmony_ci{
248762306a36Sopenharmony_ci    flag aSign, bSign;
248862306a36Sopenharmony_ci    //int16 aExp, bExp;
248962306a36Sopenharmony_ci
249062306a36Sopenharmony_ci    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
249162306a36Sopenharmony_ci         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
249262306a36Sopenharmony_ci       ) {
249362306a36Sopenharmony_ci        /* Do nothing, even if NaN as we're quiet */
249462306a36Sopenharmony_ci        return 0;
249562306a36Sopenharmony_ci    }
249662306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
249762306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
249862306a36Sopenharmony_ci    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
249962306a36Sopenharmony_ci    return ( a == b ) || ( aSign ^ ( a < b ) );
250062306a36Sopenharmony_ci
250162306a36Sopenharmony_ci}
250262306a36Sopenharmony_ci
250362306a36Sopenharmony_ci/*
250462306a36Sopenharmony_ci-------------------------------------------------------------------------------
250562306a36Sopenharmony_ciReturns 1 if the double-precision floating-point value `a' is less than
250662306a36Sopenharmony_cithe corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
250762306a36Sopenharmony_ciexception.  Otherwise, the comparison is performed according to the IEC/IEEE
250862306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic.
250962306a36Sopenharmony_ci-------------------------------------------------------------------------------
251062306a36Sopenharmony_ci*/
251162306a36Sopenharmony_ciflag float64_lt_quiet( float64 a, float64 b )
251262306a36Sopenharmony_ci{
251362306a36Sopenharmony_ci    flag aSign, bSign;
251462306a36Sopenharmony_ci
251562306a36Sopenharmony_ci    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
251662306a36Sopenharmony_ci         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
251762306a36Sopenharmony_ci       ) {
251862306a36Sopenharmony_ci        /* Do nothing, even if NaN as we're quiet */
251962306a36Sopenharmony_ci        return 0;
252062306a36Sopenharmony_ci    }
252162306a36Sopenharmony_ci    aSign = extractFloat64Sign( a );
252262306a36Sopenharmony_ci    bSign = extractFloat64Sign( b );
252362306a36Sopenharmony_ci    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
252462306a36Sopenharmony_ci    return ( a != b ) && ( aSign ^ ( a < b ) );
252562306a36Sopenharmony_ci
252662306a36Sopenharmony_ci}
252762306a36Sopenharmony_ci
252862306a36Sopenharmony_ci#ifdef FLOATX80
252962306a36Sopenharmony_ci
253062306a36Sopenharmony_ci/*
253162306a36Sopenharmony_ci-------------------------------------------------------------------------------
253262306a36Sopenharmony_ciReturns the result of converting the extended double-precision floating-
253362306a36Sopenharmony_cipoint value `a' to the 32-bit two's complement integer format.  The
253462306a36Sopenharmony_ciconversion is performed according to the IEC/IEEE Standard for Binary
253562306a36Sopenharmony_ciFloating-point Arithmetic---which means in particular that the conversion
253662306a36Sopenharmony_ciis rounded according to the current rounding mode.  If `a' is a NaN, the
253762306a36Sopenharmony_cilargest positive integer is returned.  Otherwise, if the conversion
253862306a36Sopenharmony_cioverflows, the largest integer with the same sign as `a' is returned.
253962306a36Sopenharmony_ci-------------------------------------------------------------------------------
254062306a36Sopenharmony_ci*/
254162306a36Sopenharmony_ciint32 floatx80_to_int32( struct roundingData *roundData, floatx80 a )
254262306a36Sopenharmony_ci{
254362306a36Sopenharmony_ci    flag aSign;
254462306a36Sopenharmony_ci    int32 aExp, shiftCount;
254562306a36Sopenharmony_ci    bits64 aSig;
254662306a36Sopenharmony_ci
254762306a36Sopenharmony_ci    aSig = extractFloatx80Frac( a );
254862306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
254962306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
255062306a36Sopenharmony_ci    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
255162306a36Sopenharmony_ci    shiftCount = 0x4037 - aExp;
255262306a36Sopenharmony_ci    if ( shiftCount <= 0 ) shiftCount = 1;
255362306a36Sopenharmony_ci    shift64RightJamming( aSig, shiftCount, &aSig );
255462306a36Sopenharmony_ci    return roundAndPackInt32( roundData, aSign, aSig );
255562306a36Sopenharmony_ci
255662306a36Sopenharmony_ci}
255762306a36Sopenharmony_ci
255862306a36Sopenharmony_ci/*
255962306a36Sopenharmony_ci-------------------------------------------------------------------------------
256062306a36Sopenharmony_ciReturns the result of converting the extended double-precision floating-
256162306a36Sopenharmony_cipoint value `a' to the 32-bit two's complement integer format.  The
256262306a36Sopenharmony_ciconversion is performed according to the IEC/IEEE Standard for Binary
256362306a36Sopenharmony_ciFloating-point Arithmetic, except that the conversion is always rounded
256462306a36Sopenharmony_citoward zero.  If `a' is a NaN, the largest positive integer is returned.
256562306a36Sopenharmony_ciOtherwise, if the conversion overflows, the largest integer with the same
256662306a36Sopenharmony_cisign as `a' is returned.
256762306a36Sopenharmony_ci-------------------------------------------------------------------------------
256862306a36Sopenharmony_ci*/
256962306a36Sopenharmony_ciint32 floatx80_to_int32_round_to_zero( floatx80 a )
257062306a36Sopenharmony_ci{
257162306a36Sopenharmony_ci    flag aSign;
257262306a36Sopenharmony_ci    int32 aExp, shiftCount;
257362306a36Sopenharmony_ci    bits64 aSig, savedASig;
257462306a36Sopenharmony_ci    int32 z;
257562306a36Sopenharmony_ci
257662306a36Sopenharmony_ci    aSig = extractFloatx80Frac( a );
257762306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
257862306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
257962306a36Sopenharmony_ci    shiftCount = 0x403E - aExp;
258062306a36Sopenharmony_ci    if ( shiftCount < 32 ) {
258162306a36Sopenharmony_ci        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
258262306a36Sopenharmony_ci        goto invalid;
258362306a36Sopenharmony_ci    }
258462306a36Sopenharmony_ci    else if ( 63 < shiftCount ) {
258562306a36Sopenharmony_ci        if ( aExp || aSig ) float_raise( float_flag_inexact );
258662306a36Sopenharmony_ci        return 0;
258762306a36Sopenharmony_ci    }
258862306a36Sopenharmony_ci    savedASig = aSig;
258962306a36Sopenharmony_ci    aSig >>= shiftCount;
259062306a36Sopenharmony_ci    z = aSig;
259162306a36Sopenharmony_ci    if ( aSign ) z = - z;
259262306a36Sopenharmony_ci    if ( ( z < 0 ) ^ aSign ) {
259362306a36Sopenharmony_ci invalid:
259462306a36Sopenharmony_ci        float_raise( float_flag_invalid );
259562306a36Sopenharmony_ci        return aSign ? 0x80000000 : 0x7FFFFFFF;
259662306a36Sopenharmony_ci    }
259762306a36Sopenharmony_ci    if ( ( aSig<<shiftCount ) != savedASig ) {
259862306a36Sopenharmony_ci        float_raise( float_flag_inexact );
259962306a36Sopenharmony_ci    }
260062306a36Sopenharmony_ci    return z;
260162306a36Sopenharmony_ci
260262306a36Sopenharmony_ci}
260362306a36Sopenharmony_ci
260462306a36Sopenharmony_ci/*
260562306a36Sopenharmony_ci-------------------------------------------------------------------------------
260662306a36Sopenharmony_ciReturns the result of converting the extended double-precision floating-
260762306a36Sopenharmony_cipoint value `a' to the single-precision floating-point format.  The
260862306a36Sopenharmony_ciconversion is performed according to the IEC/IEEE Standard for Binary
260962306a36Sopenharmony_ciFloating-point Arithmetic.
261062306a36Sopenharmony_ci-------------------------------------------------------------------------------
261162306a36Sopenharmony_ci*/
261262306a36Sopenharmony_cifloat32 floatx80_to_float32( struct roundingData *roundData, floatx80 a )
261362306a36Sopenharmony_ci{
261462306a36Sopenharmony_ci    flag aSign;
261562306a36Sopenharmony_ci    int32 aExp;
261662306a36Sopenharmony_ci    bits64 aSig;
261762306a36Sopenharmony_ci
261862306a36Sopenharmony_ci    aSig = extractFloatx80Frac( a );
261962306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
262062306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
262162306a36Sopenharmony_ci    if ( aExp == 0x7FFF ) {
262262306a36Sopenharmony_ci        if ( (bits64) ( aSig<<1 ) ) {
262362306a36Sopenharmony_ci            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
262462306a36Sopenharmony_ci        }
262562306a36Sopenharmony_ci        return packFloat32( aSign, 0xFF, 0 );
262662306a36Sopenharmony_ci    }
262762306a36Sopenharmony_ci    shift64RightJamming( aSig, 33, &aSig );
262862306a36Sopenharmony_ci    if ( aExp || aSig ) aExp -= 0x3F81;
262962306a36Sopenharmony_ci    return roundAndPackFloat32( roundData, aSign, aExp, aSig );
263062306a36Sopenharmony_ci
263162306a36Sopenharmony_ci}
263262306a36Sopenharmony_ci
263362306a36Sopenharmony_ci/*
263462306a36Sopenharmony_ci-------------------------------------------------------------------------------
263562306a36Sopenharmony_ciReturns the result of converting the extended double-precision floating-
263662306a36Sopenharmony_cipoint value `a' to the double-precision floating-point format.  The
263762306a36Sopenharmony_ciconversion is performed according to the IEC/IEEE Standard for Binary
263862306a36Sopenharmony_ciFloating-point Arithmetic.
263962306a36Sopenharmony_ci-------------------------------------------------------------------------------
264062306a36Sopenharmony_ci*/
264162306a36Sopenharmony_cifloat64 floatx80_to_float64( struct roundingData *roundData, floatx80 a )
264262306a36Sopenharmony_ci{
264362306a36Sopenharmony_ci    flag aSign;
264462306a36Sopenharmony_ci    int32 aExp;
264562306a36Sopenharmony_ci    bits64 aSig, zSig;
264662306a36Sopenharmony_ci
264762306a36Sopenharmony_ci    aSig = extractFloatx80Frac( a );
264862306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
264962306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
265062306a36Sopenharmony_ci    if ( aExp == 0x7FFF ) {
265162306a36Sopenharmony_ci        if ( (bits64) ( aSig<<1 ) ) {
265262306a36Sopenharmony_ci            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
265362306a36Sopenharmony_ci        }
265462306a36Sopenharmony_ci        return packFloat64( aSign, 0x7FF, 0 );
265562306a36Sopenharmony_ci    }
265662306a36Sopenharmony_ci    shift64RightJamming( aSig, 1, &zSig );
265762306a36Sopenharmony_ci    if ( aExp || aSig ) aExp -= 0x3C01;
265862306a36Sopenharmony_ci    return roundAndPackFloat64( roundData, aSign, aExp, zSig );
265962306a36Sopenharmony_ci
266062306a36Sopenharmony_ci}
266162306a36Sopenharmony_ci
266262306a36Sopenharmony_ci/*
266362306a36Sopenharmony_ci-------------------------------------------------------------------------------
266462306a36Sopenharmony_ciRounds the extended double-precision floating-point value `a' to an integer,
266562306a36Sopenharmony_ciand returns the result as an extended quadruple-precision floating-point
266662306a36Sopenharmony_civalue.  The operation is performed according to the IEC/IEEE Standard for
266762306a36Sopenharmony_ciBinary Floating-point Arithmetic.
266862306a36Sopenharmony_ci-------------------------------------------------------------------------------
266962306a36Sopenharmony_ci*/
267062306a36Sopenharmony_cifloatx80 floatx80_round_to_int( struct roundingData *roundData, floatx80 a )
267162306a36Sopenharmony_ci{
267262306a36Sopenharmony_ci    flag aSign;
267362306a36Sopenharmony_ci    int32 aExp;
267462306a36Sopenharmony_ci    bits64 lastBitMask, roundBitsMask;
267562306a36Sopenharmony_ci    int8 roundingMode;
267662306a36Sopenharmony_ci    floatx80 z;
267762306a36Sopenharmony_ci
267862306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
267962306a36Sopenharmony_ci    if ( 0x403E <= aExp ) {
268062306a36Sopenharmony_ci        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
268162306a36Sopenharmony_ci            return propagateFloatx80NaN( a, a );
268262306a36Sopenharmony_ci        }
268362306a36Sopenharmony_ci        return a;
268462306a36Sopenharmony_ci    }
268562306a36Sopenharmony_ci    if ( aExp <= 0x3FFE ) {
268662306a36Sopenharmony_ci        if (    ( aExp == 0 )
268762306a36Sopenharmony_ci             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
268862306a36Sopenharmony_ci            return a;
268962306a36Sopenharmony_ci        }
269062306a36Sopenharmony_ci        roundData->exception |= float_flag_inexact;
269162306a36Sopenharmony_ci        aSign = extractFloatx80Sign( a );
269262306a36Sopenharmony_ci        switch ( roundData->mode ) {
269362306a36Sopenharmony_ci         case float_round_nearest_even:
269462306a36Sopenharmony_ci            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
269562306a36Sopenharmony_ci               ) {
269662306a36Sopenharmony_ci                return
269762306a36Sopenharmony_ci                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
269862306a36Sopenharmony_ci            }
269962306a36Sopenharmony_ci            break;
270062306a36Sopenharmony_ci         case float_round_down:
270162306a36Sopenharmony_ci            return
270262306a36Sopenharmony_ci                  aSign ?
270362306a36Sopenharmony_ci                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
270462306a36Sopenharmony_ci                : packFloatx80( 0, 0, 0 );
270562306a36Sopenharmony_ci         case float_round_up:
270662306a36Sopenharmony_ci            return
270762306a36Sopenharmony_ci                  aSign ? packFloatx80( 1, 0, 0 )
270862306a36Sopenharmony_ci                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
270962306a36Sopenharmony_ci        }
271062306a36Sopenharmony_ci        return packFloatx80( aSign, 0, 0 );
271162306a36Sopenharmony_ci    }
271262306a36Sopenharmony_ci    lastBitMask = 1;
271362306a36Sopenharmony_ci    lastBitMask <<= 0x403E - aExp;
271462306a36Sopenharmony_ci    roundBitsMask = lastBitMask - 1;
271562306a36Sopenharmony_ci    z = a;
271662306a36Sopenharmony_ci    roundingMode = roundData->mode;
271762306a36Sopenharmony_ci    if ( roundingMode == float_round_nearest_even ) {
271862306a36Sopenharmony_ci        z.low += lastBitMask>>1;
271962306a36Sopenharmony_ci        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
272062306a36Sopenharmony_ci    }
272162306a36Sopenharmony_ci    else if ( roundingMode != float_round_to_zero ) {
272262306a36Sopenharmony_ci        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
272362306a36Sopenharmony_ci            z.low += roundBitsMask;
272462306a36Sopenharmony_ci        }
272562306a36Sopenharmony_ci    }
272662306a36Sopenharmony_ci    z.low &= ~ roundBitsMask;
272762306a36Sopenharmony_ci    if ( z.low == 0 ) {
272862306a36Sopenharmony_ci        ++z.high;
272962306a36Sopenharmony_ci        z.low = LIT64( 0x8000000000000000 );
273062306a36Sopenharmony_ci    }
273162306a36Sopenharmony_ci    if ( z.low != a.low ) roundData->exception |= float_flag_inexact;
273262306a36Sopenharmony_ci    return z;
273362306a36Sopenharmony_ci
273462306a36Sopenharmony_ci}
273562306a36Sopenharmony_ci
273662306a36Sopenharmony_ci/*
273762306a36Sopenharmony_ci-------------------------------------------------------------------------------
273862306a36Sopenharmony_ciReturns the result of adding the absolute values of the extended double-
273962306a36Sopenharmony_ciprecision floating-point values `a' and `b'.  If `zSign' is true, the sum is
274062306a36Sopenharmony_cinegated before being returned.  `zSign' is ignored if the result is a NaN.
274162306a36Sopenharmony_ciThe addition is performed according to the IEC/IEEE Standard for Binary
274262306a36Sopenharmony_ciFloating-point Arithmetic.
274362306a36Sopenharmony_ci-------------------------------------------------------------------------------
274462306a36Sopenharmony_ci*/
274562306a36Sopenharmony_cistatic floatx80 addFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
274662306a36Sopenharmony_ci{
274762306a36Sopenharmony_ci    int32 aExp, bExp, zExp;
274862306a36Sopenharmony_ci    bits64 aSig, bSig, zSig0, zSig1;
274962306a36Sopenharmony_ci    int32 expDiff;
275062306a36Sopenharmony_ci
275162306a36Sopenharmony_ci    aSig = extractFloatx80Frac( a );
275262306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
275362306a36Sopenharmony_ci    bSig = extractFloatx80Frac( b );
275462306a36Sopenharmony_ci    bExp = extractFloatx80Exp( b );
275562306a36Sopenharmony_ci    expDiff = aExp - bExp;
275662306a36Sopenharmony_ci    if ( 0 < expDiff ) {
275762306a36Sopenharmony_ci        if ( aExp == 0x7FFF ) {
275862306a36Sopenharmony_ci            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
275962306a36Sopenharmony_ci            return a;
276062306a36Sopenharmony_ci        }
276162306a36Sopenharmony_ci        if ( bExp == 0 ) --expDiff;
276262306a36Sopenharmony_ci        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
276362306a36Sopenharmony_ci        zExp = aExp;
276462306a36Sopenharmony_ci    }
276562306a36Sopenharmony_ci    else if ( expDiff < 0 ) {
276662306a36Sopenharmony_ci        if ( bExp == 0x7FFF ) {
276762306a36Sopenharmony_ci            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
276862306a36Sopenharmony_ci            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
276962306a36Sopenharmony_ci        }
277062306a36Sopenharmony_ci        if ( aExp == 0 ) ++expDiff;
277162306a36Sopenharmony_ci        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
277262306a36Sopenharmony_ci        zExp = bExp;
277362306a36Sopenharmony_ci    }
277462306a36Sopenharmony_ci    else {
277562306a36Sopenharmony_ci        if ( aExp == 0x7FFF ) {
277662306a36Sopenharmony_ci            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
277762306a36Sopenharmony_ci                return propagateFloatx80NaN( a, b );
277862306a36Sopenharmony_ci            }
277962306a36Sopenharmony_ci            return a;
278062306a36Sopenharmony_ci        }
278162306a36Sopenharmony_ci        zSig1 = 0;
278262306a36Sopenharmony_ci        zSig0 = aSig + bSig;
278362306a36Sopenharmony_ci        if ( aExp == 0 ) {
278462306a36Sopenharmony_ci            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
278562306a36Sopenharmony_ci            goto roundAndPack;
278662306a36Sopenharmony_ci        }
278762306a36Sopenharmony_ci        zExp = aExp;
278862306a36Sopenharmony_ci        goto shiftRight1;
278962306a36Sopenharmony_ci    }
279062306a36Sopenharmony_ci
279162306a36Sopenharmony_ci    zSig0 = aSig + bSig;
279262306a36Sopenharmony_ci
279362306a36Sopenharmony_ci    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
279462306a36Sopenharmony_ci shiftRight1:
279562306a36Sopenharmony_ci    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
279662306a36Sopenharmony_ci    zSig0 |= LIT64( 0x8000000000000000 );
279762306a36Sopenharmony_ci    ++zExp;
279862306a36Sopenharmony_ci roundAndPack:
279962306a36Sopenharmony_ci    return
280062306a36Sopenharmony_ci        roundAndPackFloatx80(
280162306a36Sopenharmony_ci            roundData, zSign, zExp, zSig0, zSig1 );
280262306a36Sopenharmony_ci
280362306a36Sopenharmony_ci}
280462306a36Sopenharmony_ci
280562306a36Sopenharmony_ci/*
280662306a36Sopenharmony_ci-------------------------------------------------------------------------------
280762306a36Sopenharmony_ciReturns the result of subtracting the absolute values of the extended
280862306a36Sopenharmony_cidouble-precision floating-point values `a' and `b'.  If `zSign' is true,
280962306a36Sopenharmony_cithe difference is negated before being returned.  `zSign' is ignored if the
281062306a36Sopenharmony_ciresult is a NaN.  The subtraction is performed according to the IEC/IEEE
281162306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic.
281262306a36Sopenharmony_ci-------------------------------------------------------------------------------
281362306a36Sopenharmony_ci*/
281462306a36Sopenharmony_cistatic floatx80 subFloatx80Sigs( struct roundingData *roundData, floatx80 a, floatx80 b, flag zSign )
281562306a36Sopenharmony_ci{
281662306a36Sopenharmony_ci    int32 aExp, bExp, zExp;
281762306a36Sopenharmony_ci    bits64 aSig, bSig, zSig0, zSig1;
281862306a36Sopenharmony_ci    int32 expDiff;
281962306a36Sopenharmony_ci    floatx80 z;
282062306a36Sopenharmony_ci
282162306a36Sopenharmony_ci    aSig = extractFloatx80Frac( a );
282262306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
282362306a36Sopenharmony_ci    bSig = extractFloatx80Frac( b );
282462306a36Sopenharmony_ci    bExp = extractFloatx80Exp( b );
282562306a36Sopenharmony_ci    expDiff = aExp - bExp;
282662306a36Sopenharmony_ci    if ( 0 < expDiff ) goto aExpBigger;
282762306a36Sopenharmony_ci    if ( expDiff < 0 ) goto bExpBigger;
282862306a36Sopenharmony_ci    if ( aExp == 0x7FFF ) {
282962306a36Sopenharmony_ci        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
283062306a36Sopenharmony_ci            return propagateFloatx80NaN( a, b );
283162306a36Sopenharmony_ci        }
283262306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
283362306a36Sopenharmony_ci        z.low = floatx80_default_nan_low;
283462306a36Sopenharmony_ci        z.high = floatx80_default_nan_high;
283562306a36Sopenharmony_ci        z.__padding = 0;
283662306a36Sopenharmony_ci        return z;
283762306a36Sopenharmony_ci    }
283862306a36Sopenharmony_ci    if ( aExp == 0 ) {
283962306a36Sopenharmony_ci        aExp = 1;
284062306a36Sopenharmony_ci        bExp = 1;
284162306a36Sopenharmony_ci    }
284262306a36Sopenharmony_ci    zSig1 = 0;
284362306a36Sopenharmony_ci    if ( bSig < aSig ) goto aBigger;
284462306a36Sopenharmony_ci    if ( aSig < bSig ) goto bBigger;
284562306a36Sopenharmony_ci    return packFloatx80( roundData->mode == float_round_down, 0, 0 );
284662306a36Sopenharmony_ci bExpBigger:
284762306a36Sopenharmony_ci    if ( bExp == 0x7FFF ) {
284862306a36Sopenharmony_ci        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
284962306a36Sopenharmony_ci        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
285062306a36Sopenharmony_ci    }
285162306a36Sopenharmony_ci    if ( aExp == 0 ) ++expDiff;
285262306a36Sopenharmony_ci    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
285362306a36Sopenharmony_ci bBigger:
285462306a36Sopenharmony_ci    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
285562306a36Sopenharmony_ci    zExp = bExp;
285662306a36Sopenharmony_ci    zSign ^= 1;
285762306a36Sopenharmony_ci    goto normalizeRoundAndPack;
285862306a36Sopenharmony_ci aExpBigger:
285962306a36Sopenharmony_ci    if ( aExp == 0x7FFF ) {
286062306a36Sopenharmony_ci        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
286162306a36Sopenharmony_ci        return a;
286262306a36Sopenharmony_ci    }
286362306a36Sopenharmony_ci    if ( bExp == 0 ) --expDiff;
286462306a36Sopenharmony_ci    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
286562306a36Sopenharmony_ci aBigger:
286662306a36Sopenharmony_ci    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
286762306a36Sopenharmony_ci    zExp = aExp;
286862306a36Sopenharmony_ci normalizeRoundAndPack:
286962306a36Sopenharmony_ci    return
287062306a36Sopenharmony_ci        normalizeRoundAndPackFloatx80(
287162306a36Sopenharmony_ci            roundData, zSign, zExp, zSig0, zSig1 );
287262306a36Sopenharmony_ci
287362306a36Sopenharmony_ci}
287462306a36Sopenharmony_ci
287562306a36Sopenharmony_ci/*
287662306a36Sopenharmony_ci-------------------------------------------------------------------------------
287762306a36Sopenharmony_ciReturns the result of adding the extended double-precision floating-point
287862306a36Sopenharmony_civalues `a' and `b'.  The operation is performed according to the IEC/IEEE
287962306a36Sopenharmony_ciStandard for Binary Floating-point Arithmetic.
288062306a36Sopenharmony_ci-------------------------------------------------------------------------------
288162306a36Sopenharmony_ci*/
288262306a36Sopenharmony_cifloatx80 floatx80_add( struct roundingData *roundData, floatx80 a, floatx80 b )
288362306a36Sopenharmony_ci{
288462306a36Sopenharmony_ci    flag aSign, bSign;
288562306a36Sopenharmony_ci
288662306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
288762306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
288862306a36Sopenharmony_ci    if ( aSign == bSign ) {
288962306a36Sopenharmony_ci        return addFloatx80Sigs( roundData, a, b, aSign );
289062306a36Sopenharmony_ci    }
289162306a36Sopenharmony_ci    else {
289262306a36Sopenharmony_ci        return subFloatx80Sigs( roundData, a, b, aSign );
289362306a36Sopenharmony_ci    }
289462306a36Sopenharmony_ci
289562306a36Sopenharmony_ci}
289662306a36Sopenharmony_ci
289762306a36Sopenharmony_ci/*
289862306a36Sopenharmony_ci-------------------------------------------------------------------------------
289962306a36Sopenharmony_ciReturns the result of subtracting the extended double-precision floating-
290062306a36Sopenharmony_cipoint values `a' and `b'.  The operation is performed according to the
290162306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic.
290262306a36Sopenharmony_ci-------------------------------------------------------------------------------
290362306a36Sopenharmony_ci*/
290462306a36Sopenharmony_cifloatx80 floatx80_sub( struct roundingData *roundData, floatx80 a, floatx80 b )
290562306a36Sopenharmony_ci{
290662306a36Sopenharmony_ci    flag aSign, bSign;
290762306a36Sopenharmony_ci
290862306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
290962306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
291062306a36Sopenharmony_ci    if ( aSign == bSign ) {
291162306a36Sopenharmony_ci        return subFloatx80Sigs( roundData, a, b, aSign );
291262306a36Sopenharmony_ci    }
291362306a36Sopenharmony_ci    else {
291462306a36Sopenharmony_ci        return addFloatx80Sigs( roundData, a, b, aSign );
291562306a36Sopenharmony_ci    }
291662306a36Sopenharmony_ci
291762306a36Sopenharmony_ci}
291862306a36Sopenharmony_ci
291962306a36Sopenharmony_ci/*
292062306a36Sopenharmony_ci-------------------------------------------------------------------------------
292162306a36Sopenharmony_ciReturns the result of multiplying the extended double-precision floating-
292262306a36Sopenharmony_cipoint values `a' and `b'.  The operation is performed according to the
292362306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic.
292462306a36Sopenharmony_ci-------------------------------------------------------------------------------
292562306a36Sopenharmony_ci*/
292662306a36Sopenharmony_cifloatx80 floatx80_mul( struct roundingData *roundData, floatx80 a, floatx80 b )
292762306a36Sopenharmony_ci{
292862306a36Sopenharmony_ci    flag aSign, bSign, zSign;
292962306a36Sopenharmony_ci    int32 aExp, bExp, zExp;
293062306a36Sopenharmony_ci    bits64 aSig, bSig, zSig0, zSig1;
293162306a36Sopenharmony_ci    floatx80 z;
293262306a36Sopenharmony_ci
293362306a36Sopenharmony_ci    aSig = extractFloatx80Frac( a );
293462306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
293562306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
293662306a36Sopenharmony_ci    bSig = extractFloatx80Frac( b );
293762306a36Sopenharmony_ci    bExp = extractFloatx80Exp( b );
293862306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
293962306a36Sopenharmony_ci    zSign = aSign ^ bSign;
294062306a36Sopenharmony_ci    if ( aExp == 0x7FFF ) {
294162306a36Sopenharmony_ci        if (    (bits64) ( aSig<<1 )
294262306a36Sopenharmony_ci             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
294362306a36Sopenharmony_ci            return propagateFloatx80NaN( a, b );
294462306a36Sopenharmony_ci        }
294562306a36Sopenharmony_ci        if ( ( bExp | bSig ) == 0 ) goto invalid;
294662306a36Sopenharmony_ci        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
294762306a36Sopenharmony_ci    }
294862306a36Sopenharmony_ci    if ( bExp == 0x7FFF ) {
294962306a36Sopenharmony_ci        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
295062306a36Sopenharmony_ci        if ( ( aExp | aSig ) == 0 ) {
295162306a36Sopenharmony_ci invalid:
295262306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
295362306a36Sopenharmony_ci            z.low = floatx80_default_nan_low;
295462306a36Sopenharmony_ci            z.high = floatx80_default_nan_high;
295562306a36Sopenharmony_ci            z.__padding = 0;
295662306a36Sopenharmony_ci            return z;
295762306a36Sopenharmony_ci        }
295862306a36Sopenharmony_ci        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
295962306a36Sopenharmony_ci    }
296062306a36Sopenharmony_ci    if ( aExp == 0 ) {
296162306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
296262306a36Sopenharmony_ci        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
296362306a36Sopenharmony_ci    }
296462306a36Sopenharmony_ci    if ( bExp == 0 ) {
296562306a36Sopenharmony_ci        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
296662306a36Sopenharmony_ci        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
296762306a36Sopenharmony_ci    }
296862306a36Sopenharmony_ci    zExp = aExp + bExp - 0x3FFE;
296962306a36Sopenharmony_ci    mul64To128( aSig, bSig, &zSig0, &zSig1 );
297062306a36Sopenharmony_ci    if ( 0 < (sbits64) zSig0 ) {
297162306a36Sopenharmony_ci        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
297262306a36Sopenharmony_ci        --zExp;
297362306a36Sopenharmony_ci    }
297462306a36Sopenharmony_ci    return
297562306a36Sopenharmony_ci        roundAndPackFloatx80(
297662306a36Sopenharmony_ci            roundData, zSign, zExp, zSig0, zSig1 );
297762306a36Sopenharmony_ci
297862306a36Sopenharmony_ci}
297962306a36Sopenharmony_ci
298062306a36Sopenharmony_ci/*
298162306a36Sopenharmony_ci-------------------------------------------------------------------------------
298262306a36Sopenharmony_ciReturns the result of dividing the extended double-precision floating-point
298362306a36Sopenharmony_civalue `a' by the corresponding value `b'.  The operation is performed
298462306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
298562306a36Sopenharmony_ci-------------------------------------------------------------------------------
298662306a36Sopenharmony_ci*/
298762306a36Sopenharmony_cifloatx80 floatx80_div( struct roundingData *roundData, floatx80 a, floatx80 b )
298862306a36Sopenharmony_ci{
298962306a36Sopenharmony_ci    flag aSign, bSign, zSign;
299062306a36Sopenharmony_ci    int32 aExp, bExp, zExp;
299162306a36Sopenharmony_ci    bits64 aSig, bSig, zSig0, zSig1;
299262306a36Sopenharmony_ci    bits64 rem0, rem1, rem2, term0, term1, term2;
299362306a36Sopenharmony_ci    floatx80 z;
299462306a36Sopenharmony_ci
299562306a36Sopenharmony_ci    aSig = extractFloatx80Frac( a );
299662306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
299762306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
299862306a36Sopenharmony_ci    bSig = extractFloatx80Frac( b );
299962306a36Sopenharmony_ci    bExp = extractFloatx80Exp( b );
300062306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
300162306a36Sopenharmony_ci    zSign = aSign ^ bSign;
300262306a36Sopenharmony_ci    if ( aExp == 0x7FFF ) {
300362306a36Sopenharmony_ci        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
300462306a36Sopenharmony_ci        if ( bExp == 0x7FFF ) {
300562306a36Sopenharmony_ci            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
300662306a36Sopenharmony_ci            goto invalid;
300762306a36Sopenharmony_ci        }
300862306a36Sopenharmony_ci        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
300962306a36Sopenharmony_ci    }
301062306a36Sopenharmony_ci    if ( bExp == 0x7FFF ) {
301162306a36Sopenharmony_ci        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
301262306a36Sopenharmony_ci        return packFloatx80( zSign, 0, 0 );
301362306a36Sopenharmony_ci    }
301462306a36Sopenharmony_ci    if ( bExp == 0 ) {
301562306a36Sopenharmony_ci        if ( bSig == 0 ) {
301662306a36Sopenharmony_ci            if ( ( aExp | aSig ) == 0 ) {
301762306a36Sopenharmony_ci invalid:
301862306a36Sopenharmony_ci                roundData->exception |= float_flag_invalid;
301962306a36Sopenharmony_ci                z.low = floatx80_default_nan_low;
302062306a36Sopenharmony_ci                z.high = floatx80_default_nan_high;
302162306a36Sopenharmony_ci                z.__padding = 0;
302262306a36Sopenharmony_ci                return z;
302362306a36Sopenharmony_ci            }
302462306a36Sopenharmony_ci            roundData->exception |= float_flag_divbyzero;
302562306a36Sopenharmony_ci            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
302662306a36Sopenharmony_ci        }
302762306a36Sopenharmony_ci        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
302862306a36Sopenharmony_ci    }
302962306a36Sopenharmony_ci    if ( aExp == 0 ) {
303062306a36Sopenharmony_ci        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
303162306a36Sopenharmony_ci        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
303262306a36Sopenharmony_ci    }
303362306a36Sopenharmony_ci    zExp = aExp - bExp + 0x3FFE;
303462306a36Sopenharmony_ci    rem1 = 0;
303562306a36Sopenharmony_ci    if ( bSig <= aSig ) {
303662306a36Sopenharmony_ci        shift128Right( aSig, 0, 1, &aSig, &rem1 );
303762306a36Sopenharmony_ci        ++zExp;
303862306a36Sopenharmony_ci    }
303962306a36Sopenharmony_ci    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
304062306a36Sopenharmony_ci    mul64To128( bSig, zSig0, &term0, &term1 );
304162306a36Sopenharmony_ci    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
304262306a36Sopenharmony_ci    while ( (sbits64) rem0 < 0 ) {
304362306a36Sopenharmony_ci        --zSig0;
304462306a36Sopenharmony_ci        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
304562306a36Sopenharmony_ci    }
304662306a36Sopenharmony_ci    zSig1 = estimateDiv128To64( rem1, 0, bSig );
304762306a36Sopenharmony_ci    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
304862306a36Sopenharmony_ci        mul64To128( bSig, zSig1, &term1, &term2 );
304962306a36Sopenharmony_ci        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
305062306a36Sopenharmony_ci        while ( (sbits64) rem1 < 0 ) {
305162306a36Sopenharmony_ci            --zSig1;
305262306a36Sopenharmony_ci            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
305362306a36Sopenharmony_ci        }
305462306a36Sopenharmony_ci        zSig1 |= ( ( rem1 | rem2 ) != 0 );
305562306a36Sopenharmony_ci    }
305662306a36Sopenharmony_ci    return
305762306a36Sopenharmony_ci        roundAndPackFloatx80(
305862306a36Sopenharmony_ci            roundData, zSign, zExp, zSig0, zSig1 );
305962306a36Sopenharmony_ci
306062306a36Sopenharmony_ci}
306162306a36Sopenharmony_ci
306262306a36Sopenharmony_ci/*
306362306a36Sopenharmony_ci-------------------------------------------------------------------------------
306462306a36Sopenharmony_ciReturns the remainder of the extended double-precision floating-point value
306562306a36Sopenharmony_ci`a' with respect to the corresponding value `b'.  The operation is performed
306662306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
306762306a36Sopenharmony_ci-------------------------------------------------------------------------------
306862306a36Sopenharmony_ci*/
306962306a36Sopenharmony_cifloatx80 floatx80_rem( struct roundingData *roundData, floatx80 a, floatx80 b )
307062306a36Sopenharmony_ci{
307162306a36Sopenharmony_ci    flag aSign, bSign, zSign;
307262306a36Sopenharmony_ci    int32 aExp, bExp, expDiff;
307362306a36Sopenharmony_ci    bits64 aSig0, aSig1, bSig;
307462306a36Sopenharmony_ci    bits64 q, term0, term1, alternateASig0, alternateASig1;
307562306a36Sopenharmony_ci    floatx80 z;
307662306a36Sopenharmony_ci
307762306a36Sopenharmony_ci    aSig0 = extractFloatx80Frac( a );
307862306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
307962306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
308062306a36Sopenharmony_ci    bSig = extractFloatx80Frac( b );
308162306a36Sopenharmony_ci    bExp = extractFloatx80Exp( b );
308262306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
308362306a36Sopenharmony_ci    if ( aExp == 0x7FFF ) {
308462306a36Sopenharmony_ci        if (    (bits64) ( aSig0<<1 )
308562306a36Sopenharmony_ci             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
308662306a36Sopenharmony_ci            return propagateFloatx80NaN( a, b );
308762306a36Sopenharmony_ci        }
308862306a36Sopenharmony_ci        goto invalid;
308962306a36Sopenharmony_ci    }
309062306a36Sopenharmony_ci    if ( bExp == 0x7FFF ) {
309162306a36Sopenharmony_ci        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
309262306a36Sopenharmony_ci        return a;
309362306a36Sopenharmony_ci    }
309462306a36Sopenharmony_ci    if ( bExp == 0 ) {
309562306a36Sopenharmony_ci        if ( bSig == 0 ) {
309662306a36Sopenharmony_ci invalid:
309762306a36Sopenharmony_ci            roundData->exception |= float_flag_invalid;
309862306a36Sopenharmony_ci            z.low = floatx80_default_nan_low;
309962306a36Sopenharmony_ci            z.high = floatx80_default_nan_high;
310062306a36Sopenharmony_ci            z.__padding = 0;
310162306a36Sopenharmony_ci            return z;
310262306a36Sopenharmony_ci        }
310362306a36Sopenharmony_ci        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
310462306a36Sopenharmony_ci    }
310562306a36Sopenharmony_ci    if ( aExp == 0 ) {
310662306a36Sopenharmony_ci        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
310762306a36Sopenharmony_ci        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
310862306a36Sopenharmony_ci    }
310962306a36Sopenharmony_ci    bSig |= LIT64( 0x8000000000000000 );
311062306a36Sopenharmony_ci    zSign = aSign;
311162306a36Sopenharmony_ci    expDiff = aExp - bExp;
311262306a36Sopenharmony_ci    aSig1 = 0;
311362306a36Sopenharmony_ci    if ( expDiff < 0 ) {
311462306a36Sopenharmony_ci        if ( expDiff < -1 ) return a;
311562306a36Sopenharmony_ci        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
311662306a36Sopenharmony_ci        expDiff = 0;
311762306a36Sopenharmony_ci    }
311862306a36Sopenharmony_ci    q = ( bSig <= aSig0 );
311962306a36Sopenharmony_ci    if ( q ) aSig0 -= bSig;
312062306a36Sopenharmony_ci    expDiff -= 64;
312162306a36Sopenharmony_ci    while ( 0 < expDiff ) {
312262306a36Sopenharmony_ci        q = estimateDiv128To64( aSig0, aSig1, bSig );
312362306a36Sopenharmony_ci        q = ( 2 < q ) ? q - 2 : 0;
312462306a36Sopenharmony_ci        mul64To128( bSig, q, &term0, &term1 );
312562306a36Sopenharmony_ci        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
312662306a36Sopenharmony_ci        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
312762306a36Sopenharmony_ci        expDiff -= 62;
312862306a36Sopenharmony_ci    }
312962306a36Sopenharmony_ci    expDiff += 64;
313062306a36Sopenharmony_ci    if ( 0 < expDiff ) {
313162306a36Sopenharmony_ci        q = estimateDiv128To64( aSig0, aSig1, bSig );
313262306a36Sopenharmony_ci        q = ( 2 < q ) ? q - 2 : 0;
313362306a36Sopenharmony_ci        q >>= 64 - expDiff;
313462306a36Sopenharmony_ci        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
313562306a36Sopenharmony_ci        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
313662306a36Sopenharmony_ci        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
313762306a36Sopenharmony_ci        while ( le128( term0, term1, aSig0, aSig1 ) ) {
313862306a36Sopenharmony_ci            ++q;
313962306a36Sopenharmony_ci            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
314062306a36Sopenharmony_ci        }
314162306a36Sopenharmony_ci    }
314262306a36Sopenharmony_ci    else {
314362306a36Sopenharmony_ci        term1 = 0;
314462306a36Sopenharmony_ci        term0 = bSig;
314562306a36Sopenharmony_ci    }
314662306a36Sopenharmony_ci    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
314762306a36Sopenharmony_ci    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
314862306a36Sopenharmony_ci         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
314962306a36Sopenharmony_ci              && ( q & 1 ) )
315062306a36Sopenharmony_ci       ) {
315162306a36Sopenharmony_ci        aSig0 = alternateASig0;
315262306a36Sopenharmony_ci        aSig1 = alternateASig1;
315362306a36Sopenharmony_ci        zSign = ! zSign;
315462306a36Sopenharmony_ci    }
315562306a36Sopenharmony_ci
315662306a36Sopenharmony_ci    return
315762306a36Sopenharmony_ci        normalizeRoundAndPackFloatx80(
315862306a36Sopenharmony_ci            roundData, zSign, bExp + expDiff, aSig0, aSig1 );
315962306a36Sopenharmony_ci
316062306a36Sopenharmony_ci}
316162306a36Sopenharmony_ci
316262306a36Sopenharmony_ci/*
316362306a36Sopenharmony_ci-------------------------------------------------------------------------------
316462306a36Sopenharmony_ciReturns the square root of the extended double-precision floating-point
316562306a36Sopenharmony_civalue `a'.  The operation is performed according to the IEC/IEEE Standard
316662306a36Sopenharmony_cifor Binary Floating-point Arithmetic.
316762306a36Sopenharmony_ci-------------------------------------------------------------------------------
316862306a36Sopenharmony_ci*/
316962306a36Sopenharmony_cifloatx80 floatx80_sqrt( struct roundingData *roundData, floatx80 a )
317062306a36Sopenharmony_ci{
317162306a36Sopenharmony_ci    flag aSign;
317262306a36Sopenharmony_ci    int32 aExp, zExp;
317362306a36Sopenharmony_ci    bits64 aSig0, aSig1, zSig0, zSig1;
317462306a36Sopenharmony_ci    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
317562306a36Sopenharmony_ci    bits64 shiftedRem0, shiftedRem1;
317662306a36Sopenharmony_ci    floatx80 z;
317762306a36Sopenharmony_ci
317862306a36Sopenharmony_ci    aSig0 = extractFloatx80Frac( a );
317962306a36Sopenharmony_ci    aExp = extractFloatx80Exp( a );
318062306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
318162306a36Sopenharmony_ci    if ( aExp == 0x7FFF ) {
318262306a36Sopenharmony_ci        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
318362306a36Sopenharmony_ci        if ( ! aSign ) return a;
318462306a36Sopenharmony_ci        goto invalid;
318562306a36Sopenharmony_ci    }
318662306a36Sopenharmony_ci    if ( aSign ) {
318762306a36Sopenharmony_ci        if ( ( aExp | aSig0 ) == 0 ) return a;
318862306a36Sopenharmony_ci invalid:
318962306a36Sopenharmony_ci        roundData->exception |= float_flag_invalid;
319062306a36Sopenharmony_ci        z.low = floatx80_default_nan_low;
319162306a36Sopenharmony_ci        z.high = floatx80_default_nan_high;
319262306a36Sopenharmony_ci        z.__padding = 0;
319362306a36Sopenharmony_ci        return z;
319462306a36Sopenharmony_ci    }
319562306a36Sopenharmony_ci    if ( aExp == 0 ) {
319662306a36Sopenharmony_ci        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
319762306a36Sopenharmony_ci        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
319862306a36Sopenharmony_ci    }
319962306a36Sopenharmony_ci    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
320062306a36Sopenharmony_ci    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
320162306a36Sopenharmony_ci    zSig0 <<= 31;
320262306a36Sopenharmony_ci    aSig1 = 0;
320362306a36Sopenharmony_ci    shift128Right( aSig0, 0, ( aExp & 1 ) + 2, &aSig0, &aSig1 );
320462306a36Sopenharmony_ci    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0 ) + zSig0 + 4;
320562306a36Sopenharmony_ci    if ( 0 <= (sbits64) zSig0 ) zSig0 = LIT64( 0xFFFFFFFFFFFFFFFF );
320662306a36Sopenharmony_ci    shortShift128Left( aSig0, aSig1, 2, &aSig0, &aSig1 );
320762306a36Sopenharmony_ci    mul64To128( zSig0, zSig0, &term0, &term1 );
320862306a36Sopenharmony_ci    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
320962306a36Sopenharmony_ci    while ( (sbits64) rem0 < 0 ) {
321062306a36Sopenharmony_ci        --zSig0;
321162306a36Sopenharmony_ci        shortShift128Left( 0, zSig0, 1, &term0, &term1 );
321262306a36Sopenharmony_ci        term1 |= 1;
321362306a36Sopenharmony_ci        add128( rem0, rem1, term0, term1, &rem0, &rem1 );
321462306a36Sopenharmony_ci    }
321562306a36Sopenharmony_ci    shortShift128Left( rem0, rem1, 63, &shiftedRem0, &shiftedRem1 );
321662306a36Sopenharmony_ci    zSig1 = estimateDiv128To64( shiftedRem0, shiftedRem1, zSig0 );
321762306a36Sopenharmony_ci    if ( (bits64) ( zSig1<<1 ) <= 10 ) {
321862306a36Sopenharmony_ci        if ( zSig1 == 0 ) zSig1 = 1;
321962306a36Sopenharmony_ci        mul64To128( zSig0, zSig1, &term1, &term2 );
322062306a36Sopenharmony_ci        shortShift128Left( term1, term2, 1, &term1, &term2 );
322162306a36Sopenharmony_ci        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
322262306a36Sopenharmony_ci        mul64To128( zSig1, zSig1, &term2, &term3 );
322362306a36Sopenharmony_ci        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
322462306a36Sopenharmony_ci        while ( (sbits64) rem1 < 0 ) {
322562306a36Sopenharmony_ci            --zSig1;
322662306a36Sopenharmony_ci            shortShift192Left( 0, zSig0, zSig1, 1, &term1, &term2, &term3 );
322762306a36Sopenharmony_ci            term3 |= 1;
322862306a36Sopenharmony_ci            add192(
322962306a36Sopenharmony_ci                rem1, rem2, rem3, term1, term2, term3, &rem1, &rem2, &rem3 );
323062306a36Sopenharmony_ci        }
323162306a36Sopenharmony_ci        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
323262306a36Sopenharmony_ci    }
323362306a36Sopenharmony_ci    return
323462306a36Sopenharmony_ci        roundAndPackFloatx80(
323562306a36Sopenharmony_ci            roundData, 0, zExp, zSig0, zSig1 );
323662306a36Sopenharmony_ci
323762306a36Sopenharmony_ci}
323862306a36Sopenharmony_ci
323962306a36Sopenharmony_ci/*
324062306a36Sopenharmony_ci-------------------------------------------------------------------------------
324162306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is
324262306a36Sopenharmony_ciequal to the corresponding value `b', and 0 otherwise.  The comparison is
324362306a36Sopenharmony_ciperformed according to the IEC/IEEE Standard for Binary Floating-point
324462306a36Sopenharmony_ciArithmetic.
324562306a36Sopenharmony_ci-------------------------------------------------------------------------------
324662306a36Sopenharmony_ci*/
324762306a36Sopenharmony_ciflag floatx80_eq( floatx80 a, floatx80 b )
324862306a36Sopenharmony_ci{
324962306a36Sopenharmony_ci
325062306a36Sopenharmony_ci    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
325162306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
325262306a36Sopenharmony_ci         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
325362306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
325462306a36Sopenharmony_ci       ) {
325562306a36Sopenharmony_ci        if (    floatx80_is_signaling_nan( a )
325662306a36Sopenharmony_ci             || floatx80_is_signaling_nan( b ) ) {
325762306a36Sopenharmony_ci            float_raise( float_flag_invalid );
325862306a36Sopenharmony_ci        }
325962306a36Sopenharmony_ci        return 0;
326062306a36Sopenharmony_ci    }
326162306a36Sopenharmony_ci    return
326262306a36Sopenharmony_ci           ( a.low == b.low )
326362306a36Sopenharmony_ci        && (    ( a.high == b.high )
326462306a36Sopenharmony_ci             || (    ( a.low == 0 )
326562306a36Sopenharmony_ci                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
326662306a36Sopenharmony_ci           );
326762306a36Sopenharmony_ci
326862306a36Sopenharmony_ci}
326962306a36Sopenharmony_ci
327062306a36Sopenharmony_ci/*
327162306a36Sopenharmony_ci-------------------------------------------------------------------------------
327262306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is
327362306a36Sopenharmony_ciless than or equal to the corresponding value `b', and 0 otherwise.  The
327462306a36Sopenharmony_cicomparison is performed according to the IEC/IEEE Standard for Binary
327562306a36Sopenharmony_ciFloating-point Arithmetic.
327662306a36Sopenharmony_ci-------------------------------------------------------------------------------
327762306a36Sopenharmony_ci*/
327862306a36Sopenharmony_ciflag floatx80_le( floatx80 a, floatx80 b )
327962306a36Sopenharmony_ci{
328062306a36Sopenharmony_ci    flag aSign, bSign;
328162306a36Sopenharmony_ci
328262306a36Sopenharmony_ci    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
328362306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
328462306a36Sopenharmony_ci         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
328562306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
328662306a36Sopenharmony_ci       ) {
328762306a36Sopenharmony_ci        float_raise( float_flag_invalid );
328862306a36Sopenharmony_ci        return 0;
328962306a36Sopenharmony_ci    }
329062306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
329162306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
329262306a36Sopenharmony_ci    if ( aSign != bSign ) {
329362306a36Sopenharmony_ci        return
329462306a36Sopenharmony_ci               aSign
329562306a36Sopenharmony_ci            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
329662306a36Sopenharmony_ci                 == 0 );
329762306a36Sopenharmony_ci    }
329862306a36Sopenharmony_ci    return
329962306a36Sopenharmony_ci          aSign ? le128( b.high, b.low, a.high, a.low )
330062306a36Sopenharmony_ci        : le128( a.high, a.low, b.high, b.low );
330162306a36Sopenharmony_ci
330262306a36Sopenharmony_ci}
330362306a36Sopenharmony_ci
330462306a36Sopenharmony_ci/*
330562306a36Sopenharmony_ci-------------------------------------------------------------------------------
330662306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is
330762306a36Sopenharmony_ciless than the corresponding value `b', and 0 otherwise.  The comparison
330862306a36Sopenharmony_ciis performed according to the IEC/IEEE Standard for Binary Floating-point
330962306a36Sopenharmony_ciArithmetic.
331062306a36Sopenharmony_ci-------------------------------------------------------------------------------
331162306a36Sopenharmony_ci*/
331262306a36Sopenharmony_ciflag floatx80_lt( floatx80 a, floatx80 b )
331362306a36Sopenharmony_ci{
331462306a36Sopenharmony_ci    flag aSign, bSign;
331562306a36Sopenharmony_ci
331662306a36Sopenharmony_ci    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
331762306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
331862306a36Sopenharmony_ci         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
331962306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
332062306a36Sopenharmony_ci       ) {
332162306a36Sopenharmony_ci        float_raise( float_flag_invalid );
332262306a36Sopenharmony_ci        return 0;
332362306a36Sopenharmony_ci    }
332462306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
332562306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
332662306a36Sopenharmony_ci    if ( aSign != bSign ) {
332762306a36Sopenharmony_ci        return
332862306a36Sopenharmony_ci               aSign
332962306a36Sopenharmony_ci            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
333062306a36Sopenharmony_ci                 != 0 );
333162306a36Sopenharmony_ci    }
333262306a36Sopenharmony_ci    return
333362306a36Sopenharmony_ci          aSign ? lt128( b.high, b.low, a.high, a.low )
333462306a36Sopenharmony_ci        : lt128( a.high, a.low, b.high, b.low );
333562306a36Sopenharmony_ci
333662306a36Sopenharmony_ci}
333762306a36Sopenharmony_ci
333862306a36Sopenharmony_ci/*
333962306a36Sopenharmony_ci-------------------------------------------------------------------------------
334062306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is equal
334162306a36Sopenharmony_cito the corresponding value `b', and 0 otherwise.  The invalid exception is
334262306a36Sopenharmony_ciraised if either operand is a NaN.  Otherwise, the comparison is performed
334362306a36Sopenharmony_ciaccording to the IEC/IEEE Standard for Binary Floating-point Arithmetic.
334462306a36Sopenharmony_ci-------------------------------------------------------------------------------
334562306a36Sopenharmony_ci*/
334662306a36Sopenharmony_ciflag floatx80_eq_signaling( floatx80 a, floatx80 b )
334762306a36Sopenharmony_ci{
334862306a36Sopenharmony_ci
334962306a36Sopenharmony_ci    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
335062306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
335162306a36Sopenharmony_ci         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
335262306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
335362306a36Sopenharmony_ci       ) {
335462306a36Sopenharmony_ci        float_raise( float_flag_invalid );
335562306a36Sopenharmony_ci        return 0;
335662306a36Sopenharmony_ci    }
335762306a36Sopenharmony_ci    return
335862306a36Sopenharmony_ci           ( a.low == b.low )
335962306a36Sopenharmony_ci        && (    ( a.high == b.high )
336062306a36Sopenharmony_ci             || (    ( a.low == 0 )
336162306a36Sopenharmony_ci                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
336262306a36Sopenharmony_ci           );
336362306a36Sopenharmony_ci
336462306a36Sopenharmony_ci}
336562306a36Sopenharmony_ci
336662306a36Sopenharmony_ci/*
336762306a36Sopenharmony_ci-------------------------------------------------------------------------------
336862306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is less
336962306a36Sopenharmony_cithan or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
337062306a36Sopenharmony_cido not cause an exception.  Otherwise, the comparison is performed according
337162306a36Sopenharmony_cito the IEC/IEEE Standard for Binary Floating-point Arithmetic.
337262306a36Sopenharmony_ci-------------------------------------------------------------------------------
337362306a36Sopenharmony_ci*/
337462306a36Sopenharmony_ciflag floatx80_le_quiet( floatx80 a, floatx80 b )
337562306a36Sopenharmony_ci{
337662306a36Sopenharmony_ci    flag aSign, bSign;
337762306a36Sopenharmony_ci
337862306a36Sopenharmony_ci    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
337962306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
338062306a36Sopenharmony_ci         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
338162306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
338262306a36Sopenharmony_ci       ) {
338362306a36Sopenharmony_ci        /* Do nothing, even if NaN as we're quiet */
338462306a36Sopenharmony_ci        return 0;
338562306a36Sopenharmony_ci    }
338662306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
338762306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
338862306a36Sopenharmony_ci    if ( aSign != bSign ) {
338962306a36Sopenharmony_ci        return
339062306a36Sopenharmony_ci               aSign
339162306a36Sopenharmony_ci            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
339262306a36Sopenharmony_ci                 == 0 );
339362306a36Sopenharmony_ci    }
339462306a36Sopenharmony_ci    return
339562306a36Sopenharmony_ci          aSign ? le128( b.high, b.low, a.high, a.low )
339662306a36Sopenharmony_ci        : le128( a.high, a.low, b.high, b.low );
339762306a36Sopenharmony_ci
339862306a36Sopenharmony_ci}
339962306a36Sopenharmony_ci
340062306a36Sopenharmony_ci/*
340162306a36Sopenharmony_ci-------------------------------------------------------------------------------
340262306a36Sopenharmony_ciReturns 1 if the extended double-precision floating-point value `a' is less
340362306a36Sopenharmony_cithan the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
340462306a36Sopenharmony_cian exception.  Otherwise, the comparison is performed according to the
340562306a36Sopenharmony_ciIEC/IEEE Standard for Binary Floating-point Arithmetic.
340662306a36Sopenharmony_ci-------------------------------------------------------------------------------
340762306a36Sopenharmony_ci*/
340862306a36Sopenharmony_ciflag floatx80_lt_quiet( floatx80 a, floatx80 b )
340962306a36Sopenharmony_ci{
341062306a36Sopenharmony_ci    flag aSign, bSign;
341162306a36Sopenharmony_ci
341262306a36Sopenharmony_ci    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
341362306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
341462306a36Sopenharmony_ci         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
341562306a36Sopenharmony_ci              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
341662306a36Sopenharmony_ci       ) {
341762306a36Sopenharmony_ci        /* Do nothing, even if NaN as we're quiet */
341862306a36Sopenharmony_ci        return 0;
341962306a36Sopenharmony_ci    }
342062306a36Sopenharmony_ci    aSign = extractFloatx80Sign( a );
342162306a36Sopenharmony_ci    bSign = extractFloatx80Sign( b );
342262306a36Sopenharmony_ci    if ( aSign != bSign ) {
342362306a36Sopenharmony_ci        return
342462306a36Sopenharmony_ci               aSign
342562306a36Sopenharmony_ci            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
342662306a36Sopenharmony_ci                 != 0 );
342762306a36Sopenharmony_ci    }
342862306a36Sopenharmony_ci    return
342962306a36Sopenharmony_ci          aSign ? lt128( b.high, b.low, a.high, a.low )
343062306a36Sopenharmony_ci        : lt128( a.high, a.low, b.high, b.low );
343162306a36Sopenharmony_ci
343262306a36Sopenharmony_ci}
343362306a36Sopenharmony_ci
343462306a36Sopenharmony_ci#endif
343562306a36Sopenharmony_ci
3436