1cbd624adSopenharmony_ci//! Defines rounding schemes for floating-point numbers.
2cbd624adSopenharmony_ci
3cbd624adSopenharmony_ci#![doc(hidden)]
4cbd624adSopenharmony_ci
5cbd624adSopenharmony_ciuse crate::extended_float::ExtendedFloat;
6cbd624adSopenharmony_ciuse crate::mask::{lower_n_halfway, lower_n_mask};
7cbd624adSopenharmony_ciuse crate::num::Float;
8cbd624adSopenharmony_ci
9cbd624adSopenharmony_ci// ROUNDING
10cbd624adSopenharmony_ci// --------
11cbd624adSopenharmony_ci
12cbd624adSopenharmony_ci/// Round an extended-precision float to the nearest machine float.
13cbd624adSopenharmony_ci///
14cbd624adSopenharmony_ci/// Shifts the significant digits into place, adjusts the exponent,
15cbd624adSopenharmony_ci/// so it can be easily converted to a native float.
16cbd624adSopenharmony_ci#[cfg_attr(not(feature = "compact"), inline)]
17cbd624adSopenharmony_cipub fn round<F, Cb>(fp: &mut ExtendedFloat, cb: Cb)
18cbd624adSopenharmony_ciwhere
19cbd624adSopenharmony_ci    F: Float,
20cbd624adSopenharmony_ci    Cb: Fn(&mut ExtendedFloat, i32),
21cbd624adSopenharmony_ci{
22cbd624adSopenharmony_ci    let fp_inf = ExtendedFloat {
23cbd624adSopenharmony_ci        mant: 0,
24cbd624adSopenharmony_ci        exp: F::INFINITE_POWER,
25cbd624adSopenharmony_ci    };
26cbd624adSopenharmony_ci
27cbd624adSopenharmony_ci    // Calculate our shift in significant digits.
28cbd624adSopenharmony_ci    let mantissa_shift = 64 - F::MANTISSA_SIZE - 1;
29cbd624adSopenharmony_ci
30cbd624adSopenharmony_ci    // Check for a denormal float, if after the shift the exponent is negative.
31cbd624adSopenharmony_ci    if -fp.exp >= mantissa_shift {
32cbd624adSopenharmony_ci        // Have a denormal float that isn't a literal 0.
33cbd624adSopenharmony_ci        // The extra 1 is to adjust for the denormal float, which is
34cbd624adSopenharmony_ci        // `1 - F::EXPONENT_BIAS`. This works as before, because our
35cbd624adSopenharmony_ci        // old logic rounded to `F::DENORMAL_EXPONENT` (now 1), and then
36cbd624adSopenharmony_ci        // checked if `exp == F::DENORMAL_EXPONENT` and no hidden mask
37cbd624adSopenharmony_ci        // bit was set. Here, we handle that here, rather than later.
38cbd624adSopenharmony_ci        //
39cbd624adSopenharmony_ci        // This might round-down to 0, but shift will be at **max** 65,
40cbd624adSopenharmony_ci        // for halfway cases rounding towards 0.
41cbd624adSopenharmony_ci        let shift = -fp.exp + 1;
42cbd624adSopenharmony_ci        debug_assert!(shift <= 65);
43cbd624adSopenharmony_ci        cb(fp, shift.min(64));
44cbd624adSopenharmony_ci        // Check for round-up: if rounding-nearest carried us to the hidden bit.
45cbd624adSopenharmony_ci        fp.exp = (fp.mant >= F::HIDDEN_BIT_MASK) as i32;
46cbd624adSopenharmony_ci        return;
47cbd624adSopenharmony_ci    }
48cbd624adSopenharmony_ci
49cbd624adSopenharmony_ci    // The float is normal, round to the hidden bit.
50cbd624adSopenharmony_ci    cb(fp, mantissa_shift);
51cbd624adSopenharmony_ci
52cbd624adSopenharmony_ci    // Check if we carried, and if so, shift the bit to the hidden bit.
53cbd624adSopenharmony_ci    let carry_mask = F::CARRY_MASK;
54cbd624adSopenharmony_ci    if fp.mant & carry_mask == carry_mask {
55cbd624adSopenharmony_ci        fp.mant >>= 1;
56cbd624adSopenharmony_ci        fp.exp += 1;
57cbd624adSopenharmony_ci    }
58cbd624adSopenharmony_ci
59cbd624adSopenharmony_ci    // Handle if we carried and check for overflow again.
60cbd624adSopenharmony_ci    if fp.exp >= F::INFINITE_POWER {
61cbd624adSopenharmony_ci        // Exponent is above largest normal value, must be infinite.
62cbd624adSopenharmony_ci        *fp = fp_inf;
63cbd624adSopenharmony_ci        return;
64cbd624adSopenharmony_ci    }
65cbd624adSopenharmony_ci
66cbd624adSopenharmony_ci    // Remove the hidden bit.
67cbd624adSopenharmony_ci    fp.mant &= F::MANTISSA_MASK;
68cbd624adSopenharmony_ci}
69cbd624adSopenharmony_ci
70cbd624adSopenharmony_ci/// Shift right N-bytes and round towards a direction.
71cbd624adSopenharmony_ci///
72cbd624adSopenharmony_ci/// Callback should take the following parameters:
73cbd624adSopenharmony_ci///     1. is_odd
74cbd624adSopenharmony_ci///     1. is_halfway
75cbd624adSopenharmony_ci///     1. is_above
76cbd624adSopenharmony_ci#[cfg_attr(not(feature = "compact"), inline)]
77cbd624adSopenharmony_cipub fn round_nearest_tie_even<Cb>(fp: &mut ExtendedFloat, shift: i32, cb: Cb)
78cbd624adSopenharmony_ciwhere
79cbd624adSopenharmony_ci    // is_odd, is_halfway, is_above
80cbd624adSopenharmony_ci    Cb: Fn(bool, bool, bool) -> bool,
81cbd624adSopenharmony_ci{
82cbd624adSopenharmony_ci    // Ensure we've already handled denormal values that underflow.
83cbd624adSopenharmony_ci    debug_assert!(shift <= 64);
84cbd624adSopenharmony_ci
85cbd624adSopenharmony_ci    // Extract the truncated bits using mask.
86cbd624adSopenharmony_ci    // Calculate if the value of the truncated bits are either above
87cbd624adSopenharmony_ci    // the mid-way point, or equal to it.
88cbd624adSopenharmony_ci    //
89cbd624adSopenharmony_ci    // For example, for 4 truncated bytes, the mask would be 0b1111
90cbd624adSopenharmony_ci    // and the midway point would be 0b1000.
91cbd624adSopenharmony_ci    let mask = lower_n_mask(shift as u64);
92cbd624adSopenharmony_ci    let halfway = lower_n_halfway(shift as u64);
93cbd624adSopenharmony_ci    let truncated_bits = fp.mant & mask;
94cbd624adSopenharmony_ci    let is_above = truncated_bits > halfway;
95cbd624adSopenharmony_ci    let is_halfway = truncated_bits == halfway;
96cbd624adSopenharmony_ci
97cbd624adSopenharmony_ci    // Bit shift so the leading bit is in the hidden bit.
98cbd624adSopenharmony_ci    // This optimixes pretty well:
99cbd624adSopenharmony_ci    //  ```text
100cbd624adSopenharmony_ci    //   mov     ecx, esi
101cbd624adSopenharmony_ci    //   shr     rdi, cl
102cbd624adSopenharmony_ci    //   xor     eax, eax
103cbd624adSopenharmony_ci    //   cmp     esi, 64
104cbd624adSopenharmony_ci    //   cmovne  rax, rdi
105cbd624adSopenharmony_ci    //   ret
106cbd624adSopenharmony_ci    //  ```
107cbd624adSopenharmony_ci    fp.mant = match shift == 64 {
108cbd624adSopenharmony_ci        true => 0,
109cbd624adSopenharmony_ci        false => fp.mant >> shift,
110cbd624adSopenharmony_ci    };
111cbd624adSopenharmony_ci    fp.exp += shift;
112cbd624adSopenharmony_ci
113cbd624adSopenharmony_ci    // Extract the last bit after shifting (and determine if it is odd).
114cbd624adSopenharmony_ci    let is_odd = fp.mant & 1 == 1;
115cbd624adSopenharmony_ci
116cbd624adSopenharmony_ci    // Calculate if we need to roundup.
117cbd624adSopenharmony_ci    // We need to roundup if we are above halfway, or if we are odd
118cbd624adSopenharmony_ci    // and at half-way (need to tie-to-even). Avoid the branch here.
119cbd624adSopenharmony_ci    fp.mant += cb(is_odd, is_halfway, is_above) as u64;
120cbd624adSopenharmony_ci}
121cbd624adSopenharmony_ci
122cbd624adSopenharmony_ci/// Round our significant digits into place, truncating them.
123cbd624adSopenharmony_ci#[cfg_attr(not(feature = "compact"), inline)]
124cbd624adSopenharmony_cipub fn round_down(fp: &mut ExtendedFloat, shift: i32) {
125cbd624adSopenharmony_ci    // Might have a shift greater than 64 if we have an error.
126cbd624adSopenharmony_ci    fp.mant = match shift == 64 {
127cbd624adSopenharmony_ci        true => 0,
128cbd624adSopenharmony_ci        false => fp.mant >> shift,
129cbd624adSopenharmony_ci    };
130cbd624adSopenharmony_ci    fp.exp += shift;
131cbd624adSopenharmony_ci}
132