1cbd624adSopenharmony_ci//! Defines rounding schemes for floating-point numbers. 2cbd624adSopenharmony_ci 3cbd624adSopenharmony_ci#![doc(hidden)] 4cbd624adSopenharmony_ci 5cbd624adSopenharmony_ciuse crate::extended_float::ExtendedFloat; 6cbd624adSopenharmony_ciuse crate::mask::{lower_n_halfway, lower_n_mask}; 7cbd624adSopenharmony_ciuse crate::num::Float; 8cbd624adSopenharmony_ci 9cbd624adSopenharmony_ci// ROUNDING 10cbd624adSopenharmony_ci// -------- 11cbd624adSopenharmony_ci 12cbd624adSopenharmony_ci/// Round an extended-precision float to the nearest machine float. 13cbd624adSopenharmony_ci/// 14cbd624adSopenharmony_ci/// Shifts the significant digits into place, adjusts the exponent, 15cbd624adSopenharmony_ci/// so it can be easily converted to a native float. 16cbd624adSopenharmony_ci#[cfg_attr(not(feature = "compact"), inline)] 17cbd624adSopenharmony_cipub fn round<F, Cb>(fp: &mut ExtendedFloat, cb: Cb) 18cbd624adSopenharmony_ciwhere 19cbd624adSopenharmony_ci F: Float, 20cbd624adSopenharmony_ci Cb: Fn(&mut ExtendedFloat, i32), 21cbd624adSopenharmony_ci{ 22cbd624adSopenharmony_ci let fp_inf = ExtendedFloat { 23cbd624adSopenharmony_ci mant: 0, 24cbd624adSopenharmony_ci exp: F::INFINITE_POWER, 25cbd624adSopenharmony_ci }; 26cbd624adSopenharmony_ci 27cbd624adSopenharmony_ci // Calculate our shift in significant digits. 28cbd624adSopenharmony_ci let mantissa_shift = 64 - F::MANTISSA_SIZE - 1; 29cbd624adSopenharmony_ci 30cbd624adSopenharmony_ci // Check for a denormal float, if after the shift the exponent is negative. 31cbd624adSopenharmony_ci if -fp.exp >= mantissa_shift { 32cbd624adSopenharmony_ci // Have a denormal float that isn't a literal 0. 33cbd624adSopenharmony_ci // The extra 1 is to adjust for the denormal float, which is 34cbd624adSopenharmony_ci // `1 - F::EXPONENT_BIAS`. This works as before, because our 35cbd624adSopenharmony_ci // old logic rounded to `F::DENORMAL_EXPONENT` (now 1), and then 36cbd624adSopenharmony_ci // checked if `exp == F::DENORMAL_EXPONENT` and no hidden mask 37cbd624adSopenharmony_ci // bit was set. Here, we handle that here, rather than later. 38cbd624adSopenharmony_ci // 39cbd624adSopenharmony_ci // This might round-down to 0, but shift will be at **max** 65, 40cbd624adSopenharmony_ci // for halfway cases rounding towards 0. 41cbd624adSopenharmony_ci let shift = -fp.exp + 1; 42cbd624adSopenharmony_ci debug_assert!(shift <= 65); 43cbd624adSopenharmony_ci cb(fp, shift.min(64)); 44cbd624adSopenharmony_ci // Check for round-up: if rounding-nearest carried us to the hidden bit. 45cbd624adSopenharmony_ci fp.exp = (fp.mant >= F::HIDDEN_BIT_MASK) as i32; 46cbd624adSopenharmony_ci return; 47cbd624adSopenharmony_ci } 48cbd624adSopenharmony_ci 49cbd624adSopenharmony_ci // The float is normal, round to the hidden bit. 50cbd624adSopenharmony_ci cb(fp, mantissa_shift); 51cbd624adSopenharmony_ci 52cbd624adSopenharmony_ci // Check if we carried, and if so, shift the bit to the hidden bit. 53cbd624adSopenharmony_ci let carry_mask = F::CARRY_MASK; 54cbd624adSopenharmony_ci if fp.mant & carry_mask == carry_mask { 55cbd624adSopenharmony_ci fp.mant >>= 1; 56cbd624adSopenharmony_ci fp.exp += 1; 57cbd624adSopenharmony_ci } 58cbd624adSopenharmony_ci 59cbd624adSopenharmony_ci // Handle if we carried and check for overflow again. 60cbd624adSopenharmony_ci if fp.exp >= F::INFINITE_POWER { 61cbd624adSopenharmony_ci // Exponent is above largest normal value, must be infinite. 62cbd624adSopenharmony_ci *fp = fp_inf; 63cbd624adSopenharmony_ci return; 64cbd624adSopenharmony_ci } 65cbd624adSopenharmony_ci 66cbd624adSopenharmony_ci // Remove the hidden bit. 67cbd624adSopenharmony_ci fp.mant &= F::MANTISSA_MASK; 68cbd624adSopenharmony_ci} 69cbd624adSopenharmony_ci 70cbd624adSopenharmony_ci/// Shift right N-bytes and round towards a direction. 71cbd624adSopenharmony_ci/// 72cbd624adSopenharmony_ci/// Callback should take the following parameters: 73cbd624adSopenharmony_ci/// 1. is_odd 74cbd624adSopenharmony_ci/// 1. is_halfway 75cbd624adSopenharmony_ci/// 1. is_above 76cbd624adSopenharmony_ci#[cfg_attr(not(feature = "compact"), inline)] 77cbd624adSopenharmony_cipub fn round_nearest_tie_even<Cb>(fp: &mut ExtendedFloat, shift: i32, cb: Cb) 78cbd624adSopenharmony_ciwhere 79cbd624adSopenharmony_ci // is_odd, is_halfway, is_above 80cbd624adSopenharmony_ci Cb: Fn(bool, bool, bool) -> bool, 81cbd624adSopenharmony_ci{ 82cbd624adSopenharmony_ci // Ensure we've already handled denormal values that underflow. 83cbd624adSopenharmony_ci debug_assert!(shift <= 64); 84cbd624adSopenharmony_ci 85cbd624adSopenharmony_ci // Extract the truncated bits using mask. 86cbd624adSopenharmony_ci // Calculate if the value of the truncated bits are either above 87cbd624adSopenharmony_ci // the mid-way point, or equal to it. 88cbd624adSopenharmony_ci // 89cbd624adSopenharmony_ci // For example, for 4 truncated bytes, the mask would be 0b1111 90cbd624adSopenharmony_ci // and the midway point would be 0b1000. 91cbd624adSopenharmony_ci let mask = lower_n_mask(shift as u64); 92cbd624adSopenharmony_ci let halfway = lower_n_halfway(shift as u64); 93cbd624adSopenharmony_ci let truncated_bits = fp.mant & mask; 94cbd624adSopenharmony_ci let is_above = truncated_bits > halfway; 95cbd624adSopenharmony_ci let is_halfway = truncated_bits == halfway; 96cbd624adSopenharmony_ci 97cbd624adSopenharmony_ci // Bit shift so the leading bit is in the hidden bit. 98cbd624adSopenharmony_ci // This optimixes pretty well: 99cbd624adSopenharmony_ci // ```text 100cbd624adSopenharmony_ci // mov ecx, esi 101cbd624adSopenharmony_ci // shr rdi, cl 102cbd624adSopenharmony_ci // xor eax, eax 103cbd624adSopenharmony_ci // cmp esi, 64 104cbd624adSopenharmony_ci // cmovne rax, rdi 105cbd624adSopenharmony_ci // ret 106cbd624adSopenharmony_ci // ``` 107cbd624adSopenharmony_ci fp.mant = match shift == 64 { 108cbd624adSopenharmony_ci true => 0, 109cbd624adSopenharmony_ci false => fp.mant >> shift, 110cbd624adSopenharmony_ci }; 111cbd624adSopenharmony_ci fp.exp += shift; 112cbd624adSopenharmony_ci 113cbd624adSopenharmony_ci // Extract the last bit after shifting (and determine if it is odd). 114cbd624adSopenharmony_ci let is_odd = fp.mant & 1 == 1; 115cbd624adSopenharmony_ci 116cbd624adSopenharmony_ci // Calculate if we need to roundup. 117cbd624adSopenharmony_ci // We need to roundup if we are above halfway, or if we are odd 118cbd624adSopenharmony_ci // and at half-way (need to tie-to-even). Avoid the branch here. 119cbd624adSopenharmony_ci fp.mant += cb(is_odd, is_halfway, is_above) as u64; 120cbd624adSopenharmony_ci} 121cbd624adSopenharmony_ci 122cbd624adSopenharmony_ci/// Round our significant digits into place, truncating them. 123cbd624adSopenharmony_ci#[cfg_attr(not(feature = "compact"), inline)] 124cbd624adSopenharmony_cipub fn round_down(fp: &mut ExtendedFloat, shift: i32) { 125cbd624adSopenharmony_ci // Might have a shift greater than 64 if we have an error. 126cbd624adSopenharmony_ci fp.mant = match shift == 64 { 127cbd624adSopenharmony_ci true => 0, 128cbd624adSopenharmony_ci false => fp.mant >> shift, 129cbd624adSopenharmony_ci }; 130cbd624adSopenharmony_ci fp.exp += shift; 131cbd624adSopenharmony_ci} 132