1//! A simple big-integer type for slow path algorithms. 2//! 3//! This includes minimal stackvector for use in big-integer arithmetic. 4 5#![doc(hidden)] 6 7#[cfg(feature = "alloc")] 8use crate::heapvec::HeapVec; 9use crate::num::Float; 10#[cfg(not(feature = "alloc"))] 11use crate::stackvec::StackVec; 12#[cfg(not(feature = "compact"))] 13use crate::table::{LARGE_POW5, LARGE_POW5_STEP}; 14use core::{cmp, ops, ptr}; 15 16/// Number of bits in a Bigint. 17/// 18/// This needs to be at least the number of bits required to store 19/// a Bigint, which is `log2(radix**digits)`. 20/// ≅ 3600 for base-10, rounded-up. 21pub const BIGINT_BITS: usize = 4000; 22 23/// The number of limbs for the bigint. 24pub const BIGINT_LIMBS: usize = BIGINT_BITS / LIMB_BITS; 25 26#[cfg(feature = "alloc")] 27pub type VecType = HeapVec; 28 29#[cfg(not(feature = "alloc"))] 30pub type VecType = StackVec; 31 32/// Storage for a big integer type. 33/// 34/// This is used for algorithms when we have a finite number of digits. 35/// Specifically, it stores all the significant digits scaled to the 36/// proper exponent, as an integral type, and then directly compares 37/// these digits. 38/// 39/// This requires us to store the number of significant bits, plus the 40/// number of exponent bits (required) since we scale everything 41/// to the same exponent. 42#[derive(Clone, PartialEq, Eq)] 43pub struct Bigint { 44 /// Significant digits for the float, stored in a big integer in LE order. 45 /// 46 /// This is pretty much the same number of digits for any radix, since the 47 /// significant digits balances out the zeros from the exponent: 48 /// 1. Decimal is 1091 digits, 767 mantissa digits + 324 exponent zeros. 49 /// 2. Base 6 is 1097 digits, or 680 mantissa digits + 417 exponent zeros. 50 /// 3. Base 36 is 1086 digits, or 877 mantissa digits + 209 exponent zeros. 51 /// 52 /// However, the number of bytes required is larger for large radixes: 53 /// for decimal, we need `log2(10**1091) ≅ 3600`, while for base 36 54 /// we need `log2(36**1086) ≅ 5600`. Since we use uninitialized data, 55 /// we avoid a major performance hit from the large buffer size. 56 pub data: VecType, 57} 58 59#[allow(clippy::new_without_default)] 60impl Bigint { 61 /// Construct a bigint representing 0. 62 #[inline(always)] 63 pub fn new() -> Self { 64 Self { 65 data: VecType::new(), 66 } 67 } 68 69 /// Construct a bigint from an integer. 70 #[inline(always)] 71 pub fn from_u64(value: u64) -> Self { 72 Self { 73 data: VecType::from_u64(value), 74 } 75 } 76 77 #[inline(always)] 78 pub fn hi64(&self) -> (u64, bool) { 79 self.data.hi64() 80 } 81 82 /// Multiply and assign as if by exponentiation by a power. 83 #[inline] 84 pub fn pow(&mut self, base: u32, exp: u32) -> Option<()> { 85 debug_assert!(base == 2 || base == 5 || base == 10); 86 if base % 5 == 0 { 87 pow(&mut self.data, exp)?; 88 } 89 if base % 2 == 0 { 90 shl(&mut self.data, exp as usize)?; 91 } 92 Some(()) 93 } 94 95 /// Calculate the bit-length of the big-integer. 96 #[inline] 97 pub fn bit_length(&self) -> u32 { 98 bit_length(&self.data) 99 } 100} 101 102impl ops::MulAssign<&Bigint> for Bigint { 103 fn mul_assign(&mut self, rhs: &Bigint) { 104 self.data *= &rhs.data; 105 } 106} 107 108/// REVERSE VIEW 109 110/// Reverse, immutable view of a sequence. 111pub struct ReverseView<'a, T: 'a> { 112 inner: &'a [T], 113} 114 115impl<'a, T> ops::Index<usize> for ReverseView<'a, T> { 116 type Output = T; 117 118 #[inline] 119 fn index(&self, index: usize) -> &T { 120 let len = self.inner.len(); 121 &(*self.inner)[len - index - 1] 122 } 123} 124 125/// Create a reverse view of the vector for indexing. 126#[inline] 127pub fn rview(x: &[Limb]) -> ReverseView<Limb> { 128 ReverseView { 129 inner: x, 130 } 131} 132 133// COMPARE 134// ------- 135 136/// Compare `x` to `y`, in little-endian order. 137#[inline] 138pub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering { 139 match x.len().cmp(&y.len()) { 140 cmp::Ordering::Equal => { 141 let iter = x.iter().rev().zip(y.iter().rev()); 142 for (&xi, yi) in iter { 143 match xi.cmp(yi) { 144 cmp::Ordering::Equal => (), 145 ord => return ord, 146 } 147 } 148 // Equal case. 149 cmp::Ordering::Equal 150 }, 151 ord => ord, 152 } 153} 154 155// NORMALIZE 156// --------- 157 158/// Normalize the integer, so any leading zero values are removed. 159#[inline] 160pub fn normalize(x: &mut VecType) { 161 // We don't care if this wraps: the index is bounds-checked. 162 while let Some(&value) = x.get(x.len().wrapping_sub(1)) { 163 if value == 0 { 164 unsafe { x.set_len(x.len() - 1) }; 165 } else { 166 break; 167 } 168 } 169} 170 171/// Get if the big integer is normalized. 172#[inline] 173#[allow(clippy::match_like_matches_macro)] 174pub fn is_normalized(x: &[Limb]) -> bool { 175 // We don't care if this wraps: the index is bounds-checked. 176 match x.get(x.len().wrapping_sub(1)) { 177 Some(&0) => false, 178 _ => true, 179 } 180} 181 182// FROM 183// ---- 184 185/// Create StackVec from u64 value. 186#[inline(always)] 187#[allow(clippy::branches_sharing_code)] 188pub fn from_u64(x: u64) -> VecType { 189 let mut vec = VecType::new(); 190 debug_assert!(vec.capacity() >= 2); 191 if LIMB_BITS == 32 { 192 vec.try_push(x as Limb).unwrap(); 193 vec.try_push((x >> 32) as Limb).unwrap(); 194 } else { 195 vec.try_push(x as Limb).unwrap(); 196 } 197 vec.normalize(); 198 vec 199} 200 201// HI 202// -- 203 204/// Check if any of the remaining bits are non-zero. 205/// 206/// # Safety 207/// 208/// Safe as long as `rindex <= x.len()`. 209#[inline] 210pub fn nonzero(x: &[Limb], rindex: usize) -> bool { 211 debug_assert!(rindex <= x.len()); 212 213 let len = x.len(); 214 let slc = &x[..len - rindex]; 215 slc.iter().rev().any(|&x| x != 0) 216} 217 218// These return the high X bits and if the bits were truncated. 219 220/// Shift 32-bit integer to high 64-bits. 221#[inline] 222pub fn u32_to_hi64_1(r0: u32) -> (u64, bool) { 223 u64_to_hi64_1(r0 as u64) 224} 225 226/// Shift 2 32-bit integers to high 64-bits. 227#[inline] 228pub fn u32_to_hi64_2(r0: u32, r1: u32) -> (u64, bool) { 229 let r0 = (r0 as u64) << 32; 230 let r1 = r1 as u64; 231 u64_to_hi64_1(r0 | r1) 232} 233 234/// Shift 3 32-bit integers to high 64-bits. 235#[inline] 236pub fn u32_to_hi64_3(r0: u32, r1: u32, r2: u32) -> (u64, bool) { 237 let r0 = r0 as u64; 238 let r1 = (r1 as u64) << 32; 239 let r2 = r2 as u64; 240 u64_to_hi64_2(r0, r1 | r2) 241} 242 243/// Shift 64-bit integer to high 64-bits. 244#[inline] 245pub fn u64_to_hi64_1(r0: u64) -> (u64, bool) { 246 let ls = r0.leading_zeros(); 247 (r0 << ls, false) 248} 249 250/// Shift 2 64-bit integers to high 64-bits. 251#[inline] 252pub fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) { 253 let ls = r0.leading_zeros(); 254 let rs = 64 - ls; 255 let v = match ls { 256 0 => r0, 257 _ => (r0 << ls) | (r1 >> rs), 258 }; 259 let n = r1 << ls != 0; 260 (v, n) 261} 262 263/// Extract the hi bits from the buffer. 264macro_rules! hi { 265 // # Safety 266 // 267 // Safe as long as the `stackvec.len() >= 1`. 268 (@1 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ 269 $fn($rview[0] as $t) 270 }}; 271 272 // # Safety 273 // 274 // Safe as long as the `stackvec.len() >= 2`. 275 (@2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ 276 let r0 = $rview[0] as $t; 277 let r1 = $rview[1] as $t; 278 $fn(r0, r1) 279 }}; 280 281 // # Safety 282 // 283 // Safe as long as the `stackvec.len() >= 2`. 284 (@nonzero2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ 285 let (v, n) = hi!(@2 $self, $rview, $t, $fn); 286 (v, n || nonzero($self, 2 )) 287 }}; 288 289 // # Safety 290 // 291 // Safe as long as the `stackvec.len() >= 3`. 292 (@3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ 293 let r0 = $rview[0] as $t; 294 let r1 = $rview[1] as $t; 295 let r2 = $rview[2] as $t; 296 $fn(r0, r1, r2) 297 }}; 298 299 // # Safety 300 // 301 // Safe as long as the `stackvec.len() >= 3`. 302 (@nonzero3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{ 303 let (v, n) = hi!(@3 $self, $rview, $t, $fn); 304 (v, n || nonzero($self, 3)) 305 }}; 306} 307 308/// Get the high 64 bits from the vector. 309#[inline(always)] 310pub fn hi64(x: &[Limb]) -> (u64, bool) { 311 let rslc = rview(x); 312 // SAFETY: the buffer must be at least length bytes long. 313 match x.len() { 314 0 => (0, false), 315 1 if LIMB_BITS == 32 => hi!(@1 x, rslc, u32, u32_to_hi64_1), 316 1 => hi!(@1 x, rslc, u64, u64_to_hi64_1), 317 2 if LIMB_BITS == 32 => hi!(@2 x, rslc, u32, u32_to_hi64_2), 318 2 => hi!(@2 x, rslc, u64, u64_to_hi64_2), 319 _ if LIMB_BITS == 32 => hi!(@nonzero3 x, rslc, u32, u32_to_hi64_3), 320 _ => hi!(@nonzero2 x, rslc, u64, u64_to_hi64_2), 321 } 322} 323 324// POWERS 325// ------ 326 327/// MulAssign by a power of 5. 328/// 329/// Theoretically... 330/// 331/// Use an exponentiation by squaring method, since it reduces the time 332/// complexity of the multiplication to ~`O(log(n))` for the squaring, 333/// and `O(n*m)` for the result. Since `m` is typically a lower-order 334/// factor, this significantly reduces the number of multiplications 335/// we need to do. Iteratively multiplying by small powers follows 336/// the nth triangular number series, which scales as `O(p^2)`, but 337/// where `p` is `n+m`. In short, it scales very poorly. 338/// 339/// Practically.... 340/// 341/// Exponentiation by Squaring: 342/// running 2 tests 343/// test bigcomp_f32_lexical ... bench: 1,018 ns/iter (+/- 78) 344/// test bigcomp_f64_lexical ... bench: 3,639 ns/iter (+/- 1,007) 345/// 346/// Exponentiation by Iterative Small Powers: 347/// running 2 tests 348/// test bigcomp_f32_lexical ... bench: 518 ns/iter (+/- 31) 349/// test bigcomp_f64_lexical ... bench: 583 ns/iter (+/- 47) 350/// 351/// Exponentiation by Iterative Large Powers (of 2): 352/// running 2 tests 353/// test bigcomp_f32_lexical ... bench: 671 ns/iter (+/- 31) 354/// test bigcomp_f64_lexical ... bench: 1,394 ns/iter (+/- 47) 355/// 356/// The following benchmarks were run on `1 * 5^300`, using native `pow`, 357/// a version with only small powers, and one with pre-computed powers 358/// of `5^(3 * max_exp)`, rather than `5^(5 * max_exp)`. 359/// 360/// However, using large powers is crucial for good performance for higher 361/// powers. 362/// pow/default time: [426.20 ns 427.96 ns 429.89 ns] 363/// pow/small time: [2.9270 us 2.9411 us 2.9565 us] 364/// pow/large:3 time: [838.51 ns 842.21 ns 846.27 ns] 365/// 366/// Even using worst-case scenarios, exponentiation by squaring is 367/// significantly slower for our workloads. Just multiply by small powers, 368/// in simple cases, and use precalculated large powers in other cases. 369/// 370/// Furthermore, using sufficiently big large powers is also crucial for 371/// performance. This is a tradeoff of binary size and performance, and 372/// using a single value at ~`5^(5 * max_exp)` seems optimal. 373pub fn pow(x: &mut VecType, mut exp: u32) -> Option<()> { 374 // Minimize the number of iterations for large exponents: just 375 // do a few steps with a large powers. 376 #[cfg(not(feature = "compact"))] 377 { 378 while exp >= LARGE_POW5_STEP { 379 large_mul(x, &LARGE_POW5)?; 380 exp -= LARGE_POW5_STEP; 381 } 382 } 383 384 // Now use our pre-computed small powers iteratively. 385 // This is calculated as `⌊log(2^BITS - 1, 5)⌋`. 386 let small_step = if LIMB_BITS == 32 { 387 13 388 } else { 389 27 390 }; 391 let max_native = (5 as Limb).pow(small_step); 392 while exp >= small_step { 393 small_mul(x, max_native)?; 394 exp -= small_step; 395 } 396 if exp != 0 { 397 // SAFETY: safe, since `exp < small_step`. 398 let small_power = unsafe { f64::int_pow_fast_path(exp as usize, 5) }; 399 small_mul(x, small_power as Limb)?; 400 } 401 Some(()) 402} 403 404// SCALAR 405// ------ 406 407/// Add two small integers and return the resulting value and if overflow happens. 408#[inline(always)] 409pub fn scalar_add(x: Limb, y: Limb) -> (Limb, bool) { 410 x.overflowing_add(y) 411} 412 413/// Multiply two small integers (with carry) (and return the overflow contribution). 414/// 415/// Returns the (low, high) components. 416#[inline(always)] 417pub fn scalar_mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) { 418 // Cannot overflow, as long as wide is 2x as wide. This is because 419 // the following is always true: 420 // `Wide::MAX - (Narrow::MAX * Narrow::MAX) >= Narrow::MAX` 421 let z: Wide = (x as Wide) * (y as Wide) + (carry as Wide); 422 (z as Limb, (z >> LIMB_BITS) as Limb) 423} 424 425// SMALL 426// ----- 427 428/// Add small integer to bigint starting from offset. 429#[inline] 430pub fn small_add_from(x: &mut VecType, y: Limb, start: usize) -> Option<()> { 431 let mut index = start; 432 let mut carry = y; 433 while carry != 0 && index < x.len() { 434 let result = scalar_add(x[index], carry); 435 x[index] = result.0; 436 carry = result.1 as Limb; 437 index += 1; 438 } 439 // If we carried past all the elements, add to the end of the buffer. 440 if carry != 0 { 441 x.try_push(carry)?; 442 } 443 Some(()) 444} 445 446/// Add small integer to bigint. 447#[inline(always)] 448pub fn small_add(x: &mut VecType, y: Limb) -> Option<()> { 449 small_add_from(x, y, 0) 450} 451 452/// Multiply bigint by small integer. 453#[inline] 454pub fn small_mul(x: &mut VecType, y: Limb) -> Option<()> { 455 let mut carry = 0; 456 for xi in x.iter_mut() { 457 let result = scalar_mul(*xi, y, carry); 458 *xi = result.0; 459 carry = result.1; 460 } 461 // If we carried past all the elements, add to the end of the buffer. 462 if carry != 0 { 463 x.try_push(carry)?; 464 } 465 Some(()) 466} 467 468// LARGE 469// ----- 470 471/// Add bigint to bigint starting from offset. 472pub fn large_add_from(x: &mut VecType, y: &[Limb], start: usize) -> Option<()> { 473 // The effective x buffer is from `xstart..x.len()`, so we need to treat 474 // that as the current range. If the effective y buffer is longer, need 475 // to resize to that, + the start index. 476 if y.len() > x.len().saturating_sub(start) { 477 // Ensure we panic if we can't extend the buffer. 478 // This avoids any unsafe behavior afterwards. 479 x.try_resize(y.len() + start, 0)?; 480 } 481 482 // Iteratively add elements from y to x. 483 let mut carry = false; 484 for (index, &yi) in y.iter().enumerate() { 485 // We panicked in `try_resize` if this wasn't true. 486 let xi = x.get_mut(start + index).unwrap(); 487 488 // Only one op of the two ops can overflow, since we added at max 489 // Limb::max_value() + Limb::max_value(). Add the previous carry, 490 // and store the current carry for the next. 491 let result = scalar_add(*xi, yi); 492 *xi = result.0; 493 let mut tmp = result.1; 494 if carry { 495 let result = scalar_add(*xi, 1); 496 *xi = result.0; 497 tmp |= result.1; 498 } 499 carry = tmp; 500 } 501 502 // Handle overflow. 503 if carry { 504 small_add_from(x, 1, y.len() + start)?; 505 } 506 Some(()) 507} 508 509/// Add bigint to bigint. 510#[inline(always)] 511pub fn large_add(x: &mut VecType, y: &[Limb]) -> Option<()> { 512 large_add_from(x, y, 0) 513} 514 515/// Grade-school multiplication algorithm. 516/// 517/// Slow, naive algorithm, using limb-bit bases and just shifting left for 518/// each iteration. This could be optimized with numerous other algorithms, 519/// but it's extremely simple, and works in O(n*m) time, which is fine 520/// by me. Each iteration, of which there are `m` iterations, requires 521/// `n` multiplications, and `n` additions, or grade-school multiplication. 522/// 523/// Don't use Karatsuba multiplication, since out implementation seems to 524/// be slower asymptotically, which is likely just due to the small sizes 525/// we deal with here. For example, running on the following data: 526/// 527/// ```text 528/// const SMALL_X: &[u32] = &[ 529/// 766857581, 3588187092, 1583923090, 2204542082, 1564708913, 2695310100, 3676050286, 530/// 1022770393, 468044626, 446028186 531/// ]; 532/// const SMALL_Y: &[u32] = &[ 533/// 3945492125, 3250752032, 1282554898, 1708742809, 1131807209, 3171663979, 1353276095, 534/// 1678845844, 2373924447, 3640713171 535/// ]; 536/// const LARGE_X: &[u32] = &[ 537/// 3647536243, 2836434412, 2154401029, 1297917894, 137240595, 790694805, 2260404854, 538/// 3872698172, 690585094, 99641546, 3510774932, 1672049983, 2313458559, 2017623719, 539/// 638180197, 1140936565, 1787190494, 1797420655, 14113450, 2350476485, 3052941684, 540/// 1993594787, 2901001571, 4156930025, 1248016552, 848099908, 2660577483, 4030871206, 541/// 692169593, 2835966319, 1781364505, 4266390061, 1813581655, 4210899844, 2137005290, 542/// 2346701569, 3715571980, 3386325356, 1251725092, 2267270902, 474686922, 2712200426, 543/// 197581715, 3087636290, 1379224439, 1258285015, 3230794403, 2759309199, 1494932094, 544/// 326310242 545/// ]; 546/// const LARGE_Y: &[u32] = &[ 547/// 1574249566, 868970575, 76716509, 3198027972, 1541766986, 1095120699, 3891610505, 548/// 2322545818, 1677345138, 865101357, 2650232883, 2831881215, 3985005565, 2294283760, 549/// 3468161605, 393539559, 3665153349, 1494067812, 106699483, 2596454134, 797235106, 550/// 705031740, 1209732933, 2732145769, 4122429072, 141002534, 790195010, 4014829800, 551/// 1303930792, 3649568494, 308065964, 1233648836, 2807326116, 79326486, 1262500691, 552/// 621809229, 2258109428, 3819258501, 171115668, 1139491184, 2979680603, 1333372297, 553/// 1657496603, 2790845317, 4090236532, 4220374789, 601876604, 1828177209, 2372228171, 554/// 2247372529 555/// ]; 556/// ``` 557/// 558/// We get the following results: 559 560/// ```text 561/// mul/small:long time: [220.23 ns 221.47 ns 222.81 ns] 562/// Found 4 outliers among 100 measurements (4.00%) 563/// 2 (2.00%) high mild 564/// 2 (2.00%) high severe 565/// mul/small:karatsuba time: [233.88 ns 234.63 ns 235.44 ns] 566/// Found 11 outliers among 100 measurements (11.00%) 567/// 8 (8.00%) high mild 568/// 3 (3.00%) high severe 569/// mul/large:long time: [1.9365 us 1.9455 us 1.9558 us] 570/// Found 12 outliers among 100 measurements (12.00%) 571/// 7 (7.00%) high mild 572/// 5 (5.00%) high severe 573/// mul/large:karatsuba time: [4.4250 us 4.4515 us 4.4812 us] 574/// ``` 575/// 576/// In short, Karatsuba multiplication is never worthwhile for out use-case. 577pub fn long_mul(x: &[Limb], y: &[Limb]) -> Option<VecType> { 578 // Using the immutable value, multiply by all the scalars in y, using 579 // the algorithm defined above. Use a single buffer to avoid 580 // frequent reallocations. Handle the first case to avoid a redundant 581 // addition, since we know y.len() >= 1. 582 let mut z = VecType::try_from(x)?; 583 if !y.is_empty() { 584 let y0 = y[0]; 585 small_mul(&mut z, y0)?; 586 587 for (index, &yi) in y.iter().enumerate().skip(1) { 588 if yi != 0 { 589 let mut zi = VecType::try_from(x)?; 590 small_mul(&mut zi, yi)?; 591 large_add_from(&mut z, &zi, index)?; 592 } 593 } 594 } 595 596 z.normalize(); 597 Some(z) 598} 599 600/// Multiply bigint by bigint using grade-school multiplication algorithm. 601#[inline(always)] 602pub fn large_mul(x: &mut VecType, y: &[Limb]) -> Option<()> { 603 // Karatsuba multiplication never makes sense, so just use grade school 604 // multiplication. 605 if y.len() == 1 { 606 // SAFETY: safe since `y.len() == 1`. 607 small_mul(x, y[0])?; 608 } else { 609 *x = long_mul(y, x)?; 610 } 611 Some(()) 612} 613 614// SHIFT 615// ----- 616 617/// Shift-left `n` bits inside a buffer. 618#[inline] 619pub fn shl_bits(x: &mut VecType, n: usize) -> Option<()> { 620 debug_assert!(n != 0); 621 622 // Internally, for each item, we shift left by n, and add the previous 623 // right shifted limb-bits. 624 // For example, we transform (for u8) shifted left 2, to: 625 // b10100100 b01000010 626 // b10 b10010001 b00001000 627 debug_assert!(n < LIMB_BITS); 628 let rshift = LIMB_BITS - n; 629 let lshift = n; 630 let mut prev: Limb = 0; 631 for xi in x.iter_mut() { 632 let tmp = *xi; 633 *xi <<= lshift; 634 *xi |= prev >> rshift; 635 prev = tmp; 636 } 637 638 // Always push the carry, even if it creates a non-normal result. 639 let carry = prev >> rshift; 640 if carry != 0 { 641 x.try_push(carry)?; 642 } 643 644 Some(()) 645} 646 647/// Shift-left `n` limbs inside a buffer. 648#[inline] 649pub fn shl_limbs(x: &mut VecType, n: usize) -> Option<()> { 650 debug_assert!(n != 0); 651 if n + x.len() > x.capacity() { 652 None 653 } else if !x.is_empty() { 654 let len = n + x.len(); 655 // SAFE: since x is not empty, and `x.len() + n <= x.capacity()`. 656 unsafe { 657 // Move the elements. 658 let src = x.as_ptr(); 659 let dst = x.as_mut_ptr().add(n); 660 ptr::copy(src, dst, x.len()); 661 // Write our 0s. 662 ptr::write_bytes(x.as_mut_ptr(), 0, n); 663 x.set_len(len); 664 } 665 Some(()) 666 } else { 667 Some(()) 668 } 669} 670 671/// Shift-left buffer by n bits. 672#[inline] 673pub fn shl(x: &mut VecType, n: usize) -> Option<()> { 674 let rem = n % LIMB_BITS; 675 let div = n / LIMB_BITS; 676 if rem != 0 { 677 shl_bits(x, rem)?; 678 } 679 if div != 0 { 680 shl_limbs(x, div)?; 681 } 682 Some(()) 683} 684 685/// Get number of leading zero bits in the storage. 686#[inline] 687pub fn leading_zeros(x: &[Limb]) -> u32 { 688 let length = x.len(); 689 // wrapping_sub is fine, since it'll just return None. 690 if let Some(&value) = x.get(length.wrapping_sub(1)) { 691 value.leading_zeros() 692 } else { 693 0 694 } 695} 696 697/// Calculate the bit-length of the big-integer. 698#[inline] 699pub fn bit_length(x: &[Limb]) -> u32 { 700 let nlz = leading_zeros(x); 701 LIMB_BITS as u32 * x.len() as u32 - nlz 702} 703 704// LIMB 705// ---- 706 707// Type for a single limb of the big integer. 708// 709// A limb is analogous to a digit in base10, except, it stores 32-bit 710// or 64-bit numbers instead. We want types where 64-bit multiplication 711// is well-supported by the architecture, rather than emulated in 3 712// instructions. The quickest way to check this support is using a 713// cross-compiler for numerous architectures, along with the following 714// source file and command: 715// 716// Compile with `gcc main.c -c -S -O3 -masm=intel` 717// 718// And the source code is: 719// ```text 720// #include <stdint.h> 721// 722// struct i128 { 723// uint64_t hi; 724// uint64_t lo; 725// }; 726// 727// // Type your code here, or load an example. 728// struct i128 square(uint64_t x, uint64_t y) { 729// __int128 prod = (__int128)x * (__int128)y; 730// struct i128 z; 731// z.hi = (uint64_t)(prod >> 64); 732// z.lo = (uint64_t)prod; 733// return z; 734// } 735// ``` 736// 737// If the result contains `call __multi3`, then the multiplication 738// is emulated by the compiler. Otherwise, it's natively supported. 739// 740// This should be all-known 64-bit platforms supported by Rust. 741// https://forge.rust-lang.org/platform-support.html 742// 743// # Supported 744// 745// Platforms where native 128-bit multiplication is explicitly supported: 746// - x86_64 (Supported via `MUL`). 747// - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from). 748// - s390x (Supported via `MLGR`). 749// 750// # Efficient 751// 752// Platforms where native 64-bit multiplication is supported and 753// you can extract hi-lo for 64-bit multiplications. 754// - aarch64 (Requires `UMULH` and `MUL` to capture high and low bits). 755// - powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits). 756// - riscv64 (Requires `MUL` and `MULH` to capture high and low bits). 757// 758// # Unsupported 759// 760// Platforms where native 128-bit multiplication is not supported, 761// requiring software emulation. 762// sparc64 (`UMUL` only supports double-word arguments). 763// sparcv9 (Same as sparc64). 764// 765// These tests are run via `xcross`, my own library for C cross-compiling, 766// which supports numerous targets (far in excess of Rust's tier 1 support, 767// or rust-embedded/cross's list). xcross may be found here: 768// https://github.com/Alexhuszagh/xcross 769// 770// To compile for the given target, run: 771// `xcross gcc main.c -c -S -O3 --target $target` 772// 773// All 32-bit architectures inherently do not have support. That means 774// we can essentially look for 64-bit architectures that are not SPARC. 775 776#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))] 777pub type Limb = u64; 778#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))] 779pub type Wide = u128; 780#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))] 781pub const LIMB_BITS: usize = 64; 782 783#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))] 784pub type Limb = u32; 785#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))] 786pub type Wide = u64; 787#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))] 788pub const LIMB_BITS: usize = 32; 789