1cbd624adSopenharmony_ci//! A simple big-integer type for slow path algorithms.
2cbd624adSopenharmony_ci//!
3cbd624adSopenharmony_ci//! This includes minimal stackvector for use in big-integer arithmetic.
4cbd624adSopenharmony_ci
5cbd624adSopenharmony_ci#![doc(hidden)]
6cbd624adSopenharmony_ci
7cbd624adSopenharmony_ci#[cfg(feature = "alloc")]
8cbd624adSopenharmony_ciuse crate::heapvec::HeapVec;
9cbd624adSopenharmony_ciuse crate::num::Float;
10cbd624adSopenharmony_ci#[cfg(not(feature = "alloc"))]
11cbd624adSopenharmony_ciuse crate::stackvec::StackVec;
12cbd624adSopenharmony_ci#[cfg(not(feature = "compact"))]
13cbd624adSopenharmony_ciuse crate::table::{LARGE_POW5, LARGE_POW5_STEP};
14cbd624adSopenharmony_ciuse core::{cmp, ops, ptr};
15cbd624adSopenharmony_ci
16cbd624adSopenharmony_ci/// Number of bits in a Bigint.
17cbd624adSopenharmony_ci///
18cbd624adSopenharmony_ci/// This needs to be at least the number of bits required to store
19cbd624adSopenharmony_ci/// a Bigint, which is `log2(radix**digits)`.
20cbd624adSopenharmony_ci/// ≅ 3600 for base-10, rounded-up.
21cbd624adSopenharmony_cipub const BIGINT_BITS: usize = 4000;
22cbd624adSopenharmony_ci
23cbd624adSopenharmony_ci/// The number of limbs for the bigint.
24cbd624adSopenharmony_cipub const BIGINT_LIMBS: usize = BIGINT_BITS / LIMB_BITS;
25cbd624adSopenharmony_ci
26cbd624adSopenharmony_ci#[cfg(feature = "alloc")]
27cbd624adSopenharmony_cipub type VecType = HeapVec;
28cbd624adSopenharmony_ci
29cbd624adSopenharmony_ci#[cfg(not(feature = "alloc"))]
30cbd624adSopenharmony_cipub type VecType = StackVec;
31cbd624adSopenharmony_ci
32cbd624adSopenharmony_ci/// Storage for a big integer type.
33cbd624adSopenharmony_ci///
34cbd624adSopenharmony_ci/// This is used for algorithms when we have a finite number of digits.
35cbd624adSopenharmony_ci/// Specifically, it stores all the significant digits scaled to the
36cbd624adSopenharmony_ci/// proper exponent, as an integral type, and then directly compares
37cbd624adSopenharmony_ci/// these digits.
38cbd624adSopenharmony_ci///
39cbd624adSopenharmony_ci/// This requires us to store the number of significant bits, plus the
40cbd624adSopenharmony_ci/// number of exponent bits (required) since we scale everything
41cbd624adSopenharmony_ci/// to the same exponent.
42cbd624adSopenharmony_ci#[derive(Clone, PartialEq, Eq)]
43cbd624adSopenharmony_cipub struct Bigint {
44cbd624adSopenharmony_ci    /// Significant digits for the float, stored in a big integer in LE order.
45cbd624adSopenharmony_ci    ///
46cbd624adSopenharmony_ci    /// This is pretty much the same number of digits for any radix, since the
47cbd624adSopenharmony_ci    ///  significant digits balances out the zeros from the exponent:
48cbd624adSopenharmony_ci    ///     1. Decimal is 1091 digits, 767 mantissa digits + 324 exponent zeros.
49cbd624adSopenharmony_ci    ///     2. Base 6 is 1097 digits, or 680 mantissa digits + 417 exponent zeros.
50cbd624adSopenharmony_ci    ///     3. Base 36 is 1086 digits, or 877 mantissa digits + 209 exponent zeros.
51cbd624adSopenharmony_ci    ///
52cbd624adSopenharmony_ci    /// However, the number of bytes required is larger for large radixes:
53cbd624adSopenharmony_ci    /// for decimal, we need `log2(10**1091) ≅ 3600`, while for base 36
54cbd624adSopenharmony_ci    /// we need `log2(36**1086) ≅ 5600`. Since we use uninitialized data,
55cbd624adSopenharmony_ci    /// we avoid a major performance hit from the large buffer size.
56cbd624adSopenharmony_ci    pub data: VecType,
57cbd624adSopenharmony_ci}
58cbd624adSopenharmony_ci
59cbd624adSopenharmony_ci#[allow(clippy::new_without_default)]
60cbd624adSopenharmony_ciimpl Bigint {
61cbd624adSopenharmony_ci    /// Construct a bigint representing 0.
62cbd624adSopenharmony_ci    #[inline(always)]
63cbd624adSopenharmony_ci    pub fn new() -> Self {
64cbd624adSopenharmony_ci        Self {
65cbd624adSopenharmony_ci            data: VecType::new(),
66cbd624adSopenharmony_ci        }
67cbd624adSopenharmony_ci    }
68cbd624adSopenharmony_ci
69cbd624adSopenharmony_ci    /// Construct a bigint from an integer.
70cbd624adSopenharmony_ci    #[inline(always)]
71cbd624adSopenharmony_ci    pub fn from_u64(value: u64) -> Self {
72cbd624adSopenharmony_ci        Self {
73cbd624adSopenharmony_ci            data: VecType::from_u64(value),
74cbd624adSopenharmony_ci        }
75cbd624adSopenharmony_ci    }
76cbd624adSopenharmony_ci
77cbd624adSopenharmony_ci    #[inline(always)]
78cbd624adSopenharmony_ci    pub fn hi64(&self) -> (u64, bool) {
79cbd624adSopenharmony_ci        self.data.hi64()
80cbd624adSopenharmony_ci    }
81cbd624adSopenharmony_ci
82cbd624adSopenharmony_ci    /// Multiply and assign as if by exponentiation by a power.
83cbd624adSopenharmony_ci    #[inline]
84cbd624adSopenharmony_ci    pub fn pow(&mut self, base: u32, exp: u32) -> Option<()> {
85cbd624adSopenharmony_ci        debug_assert!(base == 2 || base == 5 || base == 10);
86cbd624adSopenharmony_ci        if base % 5 == 0 {
87cbd624adSopenharmony_ci            pow(&mut self.data, exp)?;
88cbd624adSopenharmony_ci        }
89cbd624adSopenharmony_ci        if base % 2 == 0 {
90cbd624adSopenharmony_ci            shl(&mut self.data, exp as usize)?;
91cbd624adSopenharmony_ci        }
92cbd624adSopenharmony_ci        Some(())
93cbd624adSopenharmony_ci    }
94cbd624adSopenharmony_ci
95cbd624adSopenharmony_ci    /// Calculate the bit-length of the big-integer.
96cbd624adSopenharmony_ci    #[inline]
97cbd624adSopenharmony_ci    pub fn bit_length(&self) -> u32 {
98cbd624adSopenharmony_ci        bit_length(&self.data)
99cbd624adSopenharmony_ci    }
100cbd624adSopenharmony_ci}
101cbd624adSopenharmony_ci
102cbd624adSopenharmony_ciimpl ops::MulAssign<&Bigint> for Bigint {
103cbd624adSopenharmony_ci    fn mul_assign(&mut self, rhs: &Bigint) {
104cbd624adSopenharmony_ci        self.data *= &rhs.data;
105cbd624adSopenharmony_ci    }
106cbd624adSopenharmony_ci}
107cbd624adSopenharmony_ci
108cbd624adSopenharmony_ci/// REVERSE VIEW
109cbd624adSopenharmony_ci
110cbd624adSopenharmony_ci/// Reverse, immutable view of a sequence.
111cbd624adSopenharmony_cipub struct ReverseView<'a, T: 'a> {
112cbd624adSopenharmony_ci    inner: &'a [T],
113cbd624adSopenharmony_ci}
114cbd624adSopenharmony_ci
115cbd624adSopenharmony_ciimpl<'a, T> ops::Index<usize> for ReverseView<'a, T> {
116cbd624adSopenharmony_ci    type Output = T;
117cbd624adSopenharmony_ci
118cbd624adSopenharmony_ci    #[inline]
119cbd624adSopenharmony_ci    fn index(&self, index: usize) -> &T {
120cbd624adSopenharmony_ci        let len = self.inner.len();
121cbd624adSopenharmony_ci        &(*self.inner)[len - index - 1]
122cbd624adSopenharmony_ci    }
123cbd624adSopenharmony_ci}
124cbd624adSopenharmony_ci
125cbd624adSopenharmony_ci/// Create a reverse view of the vector for indexing.
126cbd624adSopenharmony_ci#[inline]
127cbd624adSopenharmony_cipub fn rview(x: &[Limb]) -> ReverseView<Limb> {
128cbd624adSopenharmony_ci    ReverseView {
129cbd624adSopenharmony_ci        inner: x,
130cbd624adSopenharmony_ci    }
131cbd624adSopenharmony_ci}
132cbd624adSopenharmony_ci
133cbd624adSopenharmony_ci// COMPARE
134cbd624adSopenharmony_ci// -------
135cbd624adSopenharmony_ci
136cbd624adSopenharmony_ci/// Compare `x` to `y`, in little-endian order.
137cbd624adSopenharmony_ci#[inline]
138cbd624adSopenharmony_cipub fn compare(x: &[Limb], y: &[Limb]) -> cmp::Ordering {
139cbd624adSopenharmony_ci    match x.len().cmp(&y.len()) {
140cbd624adSopenharmony_ci        cmp::Ordering::Equal => {
141cbd624adSopenharmony_ci            let iter = x.iter().rev().zip(y.iter().rev());
142cbd624adSopenharmony_ci            for (&xi, yi) in iter {
143cbd624adSopenharmony_ci                match xi.cmp(yi) {
144cbd624adSopenharmony_ci                    cmp::Ordering::Equal => (),
145cbd624adSopenharmony_ci                    ord => return ord,
146cbd624adSopenharmony_ci                }
147cbd624adSopenharmony_ci            }
148cbd624adSopenharmony_ci            // Equal case.
149cbd624adSopenharmony_ci            cmp::Ordering::Equal
150cbd624adSopenharmony_ci        },
151cbd624adSopenharmony_ci        ord => ord,
152cbd624adSopenharmony_ci    }
153cbd624adSopenharmony_ci}
154cbd624adSopenharmony_ci
155cbd624adSopenharmony_ci// NORMALIZE
156cbd624adSopenharmony_ci// ---------
157cbd624adSopenharmony_ci
158cbd624adSopenharmony_ci/// Normalize the integer, so any leading zero values are removed.
159cbd624adSopenharmony_ci#[inline]
160cbd624adSopenharmony_cipub fn normalize(x: &mut VecType) {
161cbd624adSopenharmony_ci    // We don't care if this wraps: the index is bounds-checked.
162cbd624adSopenharmony_ci    while let Some(&value) = x.get(x.len().wrapping_sub(1)) {
163cbd624adSopenharmony_ci        if value == 0 {
164cbd624adSopenharmony_ci            unsafe { x.set_len(x.len() - 1) };
165cbd624adSopenharmony_ci        } else {
166cbd624adSopenharmony_ci            break;
167cbd624adSopenharmony_ci        }
168cbd624adSopenharmony_ci    }
169cbd624adSopenharmony_ci}
170cbd624adSopenharmony_ci
171cbd624adSopenharmony_ci/// Get if the big integer is normalized.
172cbd624adSopenharmony_ci#[inline]
173cbd624adSopenharmony_ci#[allow(clippy::match_like_matches_macro)]
174cbd624adSopenharmony_cipub fn is_normalized(x: &[Limb]) -> bool {
175cbd624adSopenharmony_ci    // We don't care if this wraps: the index is bounds-checked.
176cbd624adSopenharmony_ci    match x.get(x.len().wrapping_sub(1)) {
177cbd624adSopenharmony_ci        Some(&0) => false,
178cbd624adSopenharmony_ci        _ => true,
179cbd624adSopenharmony_ci    }
180cbd624adSopenharmony_ci}
181cbd624adSopenharmony_ci
182cbd624adSopenharmony_ci// FROM
183cbd624adSopenharmony_ci// ----
184cbd624adSopenharmony_ci
185cbd624adSopenharmony_ci/// Create StackVec from u64 value.
186cbd624adSopenharmony_ci#[inline(always)]
187cbd624adSopenharmony_ci#[allow(clippy::branches_sharing_code)]
188cbd624adSopenharmony_cipub fn from_u64(x: u64) -> VecType {
189cbd624adSopenharmony_ci    let mut vec = VecType::new();
190cbd624adSopenharmony_ci    debug_assert!(vec.capacity() >= 2);
191cbd624adSopenharmony_ci    if LIMB_BITS == 32 {
192cbd624adSopenharmony_ci        vec.try_push(x as Limb).unwrap();
193cbd624adSopenharmony_ci        vec.try_push((x >> 32) as Limb).unwrap();
194cbd624adSopenharmony_ci    } else {
195cbd624adSopenharmony_ci        vec.try_push(x as Limb).unwrap();
196cbd624adSopenharmony_ci    }
197cbd624adSopenharmony_ci    vec.normalize();
198cbd624adSopenharmony_ci    vec
199cbd624adSopenharmony_ci}
200cbd624adSopenharmony_ci
201cbd624adSopenharmony_ci// HI
202cbd624adSopenharmony_ci// --
203cbd624adSopenharmony_ci
204cbd624adSopenharmony_ci/// Check if any of the remaining bits are non-zero.
205cbd624adSopenharmony_ci///
206cbd624adSopenharmony_ci/// # Safety
207cbd624adSopenharmony_ci///
208cbd624adSopenharmony_ci/// Safe as long as `rindex <= x.len()`.
209cbd624adSopenharmony_ci#[inline]
210cbd624adSopenharmony_cipub fn nonzero(x: &[Limb], rindex: usize) -> bool {
211cbd624adSopenharmony_ci    debug_assert!(rindex <= x.len());
212cbd624adSopenharmony_ci
213cbd624adSopenharmony_ci    let len = x.len();
214cbd624adSopenharmony_ci    let slc = &x[..len - rindex];
215cbd624adSopenharmony_ci    slc.iter().rev().any(|&x| x != 0)
216cbd624adSopenharmony_ci}
217cbd624adSopenharmony_ci
218cbd624adSopenharmony_ci// These return the high X bits and if the bits were truncated.
219cbd624adSopenharmony_ci
220cbd624adSopenharmony_ci/// Shift 32-bit integer to high 64-bits.
221cbd624adSopenharmony_ci#[inline]
222cbd624adSopenharmony_cipub fn u32_to_hi64_1(r0: u32) -> (u64, bool) {
223cbd624adSopenharmony_ci    u64_to_hi64_1(r0 as u64)
224cbd624adSopenharmony_ci}
225cbd624adSopenharmony_ci
226cbd624adSopenharmony_ci/// Shift 2 32-bit integers to high 64-bits.
227cbd624adSopenharmony_ci#[inline]
228cbd624adSopenharmony_cipub fn u32_to_hi64_2(r0: u32, r1: u32) -> (u64, bool) {
229cbd624adSopenharmony_ci    let r0 = (r0 as u64) << 32;
230cbd624adSopenharmony_ci    let r1 = r1 as u64;
231cbd624adSopenharmony_ci    u64_to_hi64_1(r0 | r1)
232cbd624adSopenharmony_ci}
233cbd624adSopenharmony_ci
234cbd624adSopenharmony_ci/// Shift 3 32-bit integers to high 64-bits.
235cbd624adSopenharmony_ci#[inline]
236cbd624adSopenharmony_cipub fn u32_to_hi64_3(r0: u32, r1: u32, r2: u32) -> (u64, bool) {
237cbd624adSopenharmony_ci    let r0 = r0 as u64;
238cbd624adSopenharmony_ci    let r1 = (r1 as u64) << 32;
239cbd624adSopenharmony_ci    let r2 = r2 as u64;
240cbd624adSopenharmony_ci    u64_to_hi64_2(r0, r1 | r2)
241cbd624adSopenharmony_ci}
242cbd624adSopenharmony_ci
243cbd624adSopenharmony_ci/// Shift 64-bit integer to high 64-bits.
244cbd624adSopenharmony_ci#[inline]
245cbd624adSopenharmony_cipub fn u64_to_hi64_1(r0: u64) -> (u64, bool) {
246cbd624adSopenharmony_ci    let ls = r0.leading_zeros();
247cbd624adSopenharmony_ci    (r0 << ls, false)
248cbd624adSopenharmony_ci}
249cbd624adSopenharmony_ci
250cbd624adSopenharmony_ci/// Shift 2 64-bit integers to high 64-bits.
251cbd624adSopenharmony_ci#[inline]
252cbd624adSopenharmony_cipub fn u64_to_hi64_2(r0: u64, r1: u64) -> (u64, bool) {
253cbd624adSopenharmony_ci    let ls = r0.leading_zeros();
254cbd624adSopenharmony_ci    let rs = 64 - ls;
255cbd624adSopenharmony_ci    let v = match ls {
256cbd624adSopenharmony_ci        0 => r0,
257cbd624adSopenharmony_ci        _ => (r0 << ls) | (r1 >> rs),
258cbd624adSopenharmony_ci    };
259cbd624adSopenharmony_ci    let n = r1 << ls != 0;
260cbd624adSopenharmony_ci    (v, n)
261cbd624adSopenharmony_ci}
262cbd624adSopenharmony_ci
263cbd624adSopenharmony_ci/// Extract the hi bits from the buffer.
264cbd624adSopenharmony_cimacro_rules! hi {
265cbd624adSopenharmony_ci    // # Safety
266cbd624adSopenharmony_ci    //
267cbd624adSopenharmony_ci    // Safe as long as the `stackvec.len() >= 1`.
268cbd624adSopenharmony_ci    (@1 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
269cbd624adSopenharmony_ci        $fn($rview[0] as $t)
270cbd624adSopenharmony_ci    }};
271cbd624adSopenharmony_ci
272cbd624adSopenharmony_ci    // # Safety
273cbd624adSopenharmony_ci    //
274cbd624adSopenharmony_ci    // Safe as long as the `stackvec.len() >= 2`.
275cbd624adSopenharmony_ci    (@2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
276cbd624adSopenharmony_ci        let r0 = $rview[0] as $t;
277cbd624adSopenharmony_ci        let r1 = $rview[1] as $t;
278cbd624adSopenharmony_ci        $fn(r0, r1)
279cbd624adSopenharmony_ci    }};
280cbd624adSopenharmony_ci
281cbd624adSopenharmony_ci    // # Safety
282cbd624adSopenharmony_ci    //
283cbd624adSopenharmony_ci    // Safe as long as the `stackvec.len() >= 2`.
284cbd624adSopenharmony_ci    (@nonzero2 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
285cbd624adSopenharmony_ci        let (v, n) = hi!(@2 $self, $rview, $t, $fn);
286cbd624adSopenharmony_ci        (v, n || nonzero($self, 2 ))
287cbd624adSopenharmony_ci    }};
288cbd624adSopenharmony_ci
289cbd624adSopenharmony_ci    // # Safety
290cbd624adSopenharmony_ci    //
291cbd624adSopenharmony_ci    // Safe as long as the `stackvec.len() >= 3`.
292cbd624adSopenharmony_ci    (@3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
293cbd624adSopenharmony_ci        let r0 = $rview[0] as $t;
294cbd624adSopenharmony_ci        let r1 = $rview[1] as $t;
295cbd624adSopenharmony_ci        let r2 = $rview[2] as $t;
296cbd624adSopenharmony_ci        $fn(r0, r1, r2)
297cbd624adSopenharmony_ci    }};
298cbd624adSopenharmony_ci
299cbd624adSopenharmony_ci    // # Safety
300cbd624adSopenharmony_ci    //
301cbd624adSopenharmony_ci    // Safe as long as the `stackvec.len() >= 3`.
302cbd624adSopenharmony_ci    (@nonzero3 $self:ident, $rview:ident, $t:ident, $fn:ident) => {{
303cbd624adSopenharmony_ci        let (v, n) = hi!(@3 $self, $rview, $t, $fn);
304cbd624adSopenharmony_ci        (v, n || nonzero($self, 3))
305cbd624adSopenharmony_ci    }};
306cbd624adSopenharmony_ci}
307cbd624adSopenharmony_ci
308cbd624adSopenharmony_ci/// Get the high 64 bits from the vector.
309cbd624adSopenharmony_ci#[inline(always)]
310cbd624adSopenharmony_cipub fn hi64(x: &[Limb]) -> (u64, bool) {
311cbd624adSopenharmony_ci    let rslc = rview(x);
312cbd624adSopenharmony_ci    // SAFETY: the buffer must be at least length bytes long.
313cbd624adSopenharmony_ci    match x.len() {
314cbd624adSopenharmony_ci        0 => (0, false),
315cbd624adSopenharmony_ci        1 if LIMB_BITS == 32 => hi!(@1 x, rslc, u32, u32_to_hi64_1),
316cbd624adSopenharmony_ci        1 => hi!(@1 x, rslc, u64, u64_to_hi64_1),
317cbd624adSopenharmony_ci        2 if LIMB_BITS == 32 => hi!(@2 x, rslc, u32, u32_to_hi64_2),
318cbd624adSopenharmony_ci        2 => hi!(@2 x, rslc, u64, u64_to_hi64_2),
319cbd624adSopenharmony_ci        _ if LIMB_BITS == 32 => hi!(@nonzero3 x, rslc, u32, u32_to_hi64_3),
320cbd624adSopenharmony_ci        _ => hi!(@nonzero2 x, rslc, u64, u64_to_hi64_2),
321cbd624adSopenharmony_ci    }
322cbd624adSopenharmony_ci}
323cbd624adSopenharmony_ci
324cbd624adSopenharmony_ci// POWERS
325cbd624adSopenharmony_ci// ------
326cbd624adSopenharmony_ci
327cbd624adSopenharmony_ci/// MulAssign by a power of 5.
328cbd624adSopenharmony_ci///
329cbd624adSopenharmony_ci/// Theoretically...
330cbd624adSopenharmony_ci///
331cbd624adSopenharmony_ci/// Use an exponentiation by squaring method, since it reduces the time
332cbd624adSopenharmony_ci/// complexity of the multiplication to ~`O(log(n))` for the squaring,
333cbd624adSopenharmony_ci/// and `O(n*m)` for the result. Since `m` is typically a lower-order
334cbd624adSopenharmony_ci/// factor, this significantly reduces the number of multiplications
335cbd624adSopenharmony_ci/// we need to do. Iteratively multiplying by small powers follows
336cbd624adSopenharmony_ci/// the nth triangular number series, which scales as `O(p^2)`, but
337cbd624adSopenharmony_ci/// where `p` is `n+m`. In short, it scales very poorly.
338cbd624adSopenharmony_ci///
339cbd624adSopenharmony_ci/// Practically....
340cbd624adSopenharmony_ci///
341cbd624adSopenharmony_ci/// Exponentiation by Squaring:
342cbd624adSopenharmony_ci///     running 2 tests
343cbd624adSopenharmony_ci///     test bigcomp_f32_lexical ... bench:       1,018 ns/iter (+/- 78)
344cbd624adSopenharmony_ci///     test bigcomp_f64_lexical ... bench:       3,639 ns/iter (+/- 1,007)
345cbd624adSopenharmony_ci///
346cbd624adSopenharmony_ci/// Exponentiation by Iterative Small Powers:
347cbd624adSopenharmony_ci///     running 2 tests
348cbd624adSopenharmony_ci///     test bigcomp_f32_lexical ... bench:         518 ns/iter (+/- 31)
349cbd624adSopenharmony_ci///     test bigcomp_f64_lexical ... bench:         583 ns/iter (+/- 47)
350cbd624adSopenharmony_ci///
351cbd624adSopenharmony_ci/// Exponentiation by Iterative Large Powers (of 2):
352cbd624adSopenharmony_ci///     running 2 tests
353cbd624adSopenharmony_ci///     test bigcomp_f32_lexical ... bench:         671 ns/iter (+/- 31)
354cbd624adSopenharmony_ci///     test bigcomp_f64_lexical ... bench:       1,394 ns/iter (+/- 47)
355cbd624adSopenharmony_ci///
356cbd624adSopenharmony_ci/// The following benchmarks were run on `1 * 5^300`, using native `pow`,
357cbd624adSopenharmony_ci/// a version with only small powers, and one with pre-computed powers
358cbd624adSopenharmony_ci/// of `5^(3 * max_exp)`, rather than `5^(5 * max_exp)`.
359cbd624adSopenharmony_ci///
360cbd624adSopenharmony_ci/// However, using large powers is crucial for good performance for higher
361cbd624adSopenharmony_ci/// powers.
362cbd624adSopenharmony_ci///     pow/default             time:   [426.20 ns 427.96 ns 429.89 ns]
363cbd624adSopenharmony_ci///     pow/small               time:   [2.9270 us 2.9411 us 2.9565 us]
364cbd624adSopenharmony_ci///     pow/large:3             time:   [838.51 ns 842.21 ns 846.27 ns]
365cbd624adSopenharmony_ci///
366cbd624adSopenharmony_ci/// Even using worst-case scenarios, exponentiation by squaring is
367cbd624adSopenharmony_ci/// significantly slower for our workloads. Just multiply by small powers,
368cbd624adSopenharmony_ci/// in simple cases, and use precalculated large powers in other cases.
369cbd624adSopenharmony_ci///
370cbd624adSopenharmony_ci/// Furthermore, using sufficiently big large powers is also crucial for
371cbd624adSopenharmony_ci/// performance. This is a tradeoff of binary size and performance, and
372cbd624adSopenharmony_ci/// using a single value at ~`5^(5 * max_exp)` seems optimal.
373cbd624adSopenharmony_cipub fn pow(x: &mut VecType, mut exp: u32) -> Option<()> {
374cbd624adSopenharmony_ci    // Minimize the number of iterations for large exponents: just
375cbd624adSopenharmony_ci    // do a few steps with a large powers.
376cbd624adSopenharmony_ci    #[cfg(not(feature = "compact"))]
377cbd624adSopenharmony_ci    {
378cbd624adSopenharmony_ci        while exp >= LARGE_POW5_STEP {
379cbd624adSopenharmony_ci            large_mul(x, &LARGE_POW5)?;
380cbd624adSopenharmony_ci            exp -= LARGE_POW5_STEP;
381cbd624adSopenharmony_ci        }
382cbd624adSopenharmony_ci    }
383cbd624adSopenharmony_ci
384cbd624adSopenharmony_ci    // Now use our pre-computed small powers iteratively.
385cbd624adSopenharmony_ci    // This is calculated as `⌊log(2^BITS - 1, 5)⌋`.
386cbd624adSopenharmony_ci    let small_step = if LIMB_BITS == 32 {
387cbd624adSopenharmony_ci        13
388cbd624adSopenharmony_ci    } else {
389cbd624adSopenharmony_ci        27
390cbd624adSopenharmony_ci    };
391cbd624adSopenharmony_ci    let max_native = (5 as Limb).pow(small_step);
392cbd624adSopenharmony_ci    while exp >= small_step {
393cbd624adSopenharmony_ci        small_mul(x, max_native)?;
394cbd624adSopenharmony_ci        exp -= small_step;
395cbd624adSopenharmony_ci    }
396cbd624adSopenharmony_ci    if exp != 0 {
397cbd624adSopenharmony_ci        // SAFETY: safe, since `exp < small_step`.
398cbd624adSopenharmony_ci        let small_power = unsafe { f64::int_pow_fast_path(exp as usize, 5) };
399cbd624adSopenharmony_ci        small_mul(x, small_power as Limb)?;
400cbd624adSopenharmony_ci    }
401cbd624adSopenharmony_ci    Some(())
402cbd624adSopenharmony_ci}
403cbd624adSopenharmony_ci
404cbd624adSopenharmony_ci// SCALAR
405cbd624adSopenharmony_ci// ------
406cbd624adSopenharmony_ci
407cbd624adSopenharmony_ci/// Add two small integers and return the resulting value and if overflow happens.
408cbd624adSopenharmony_ci#[inline(always)]
409cbd624adSopenharmony_cipub fn scalar_add(x: Limb, y: Limb) -> (Limb, bool) {
410cbd624adSopenharmony_ci    x.overflowing_add(y)
411cbd624adSopenharmony_ci}
412cbd624adSopenharmony_ci
413cbd624adSopenharmony_ci/// Multiply two small integers (with carry) (and return the overflow contribution).
414cbd624adSopenharmony_ci///
415cbd624adSopenharmony_ci/// Returns the (low, high) components.
416cbd624adSopenharmony_ci#[inline(always)]
417cbd624adSopenharmony_cipub fn scalar_mul(x: Limb, y: Limb, carry: Limb) -> (Limb, Limb) {
418cbd624adSopenharmony_ci    // Cannot overflow, as long as wide is 2x as wide. This is because
419cbd624adSopenharmony_ci    // the following is always true:
420cbd624adSopenharmony_ci    // `Wide::MAX - (Narrow::MAX * Narrow::MAX) >= Narrow::MAX`
421cbd624adSopenharmony_ci    let z: Wide = (x as Wide) * (y as Wide) + (carry as Wide);
422cbd624adSopenharmony_ci    (z as Limb, (z >> LIMB_BITS) as Limb)
423cbd624adSopenharmony_ci}
424cbd624adSopenharmony_ci
425cbd624adSopenharmony_ci// SMALL
426cbd624adSopenharmony_ci// -----
427cbd624adSopenharmony_ci
428cbd624adSopenharmony_ci/// Add small integer to bigint starting from offset.
429cbd624adSopenharmony_ci#[inline]
430cbd624adSopenharmony_cipub fn small_add_from(x: &mut VecType, y: Limb, start: usize) -> Option<()> {
431cbd624adSopenharmony_ci    let mut index = start;
432cbd624adSopenharmony_ci    let mut carry = y;
433cbd624adSopenharmony_ci    while carry != 0 && index < x.len() {
434cbd624adSopenharmony_ci        let result = scalar_add(x[index], carry);
435cbd624adSopenharmony_ci        x[index] = result.0;
436cbd624adSopenharmony_ci        carry = result.1 as Limb;
437cbd624adSopenharmony_ci        index += 1;
438cbd624adSopenharmony_ci    }
439cbd624adSopenharmony_ci    // If we carried past all the elements, add to the end of the buffer.
440cbd624adSopenharmony_ci    if carry != 0 {
441cbd624adSopenharmony_ci        x.try_push(carry)?;
442cbd624adSopenharmony_ci    }
443cbd624adSopenharmony_ci    Some(())
444cbd624adSopenharmony_ci}
445cbd624adSopenharmony_ci
446cbd624adSopenharmony_ci/// Add small integer to bigint.
447cbd624adSopenharmony_ci#[inline(always)]
448cbd624adSopenharmony_cipub fn small_add(x: &mut VecType, y: Limb) -> Option<()> {
449cbd624adSopenharmony_ci    small_add_from(x, y, 0)
450cbd624adSopenharmony_ci}
451cbd624adSopenharmony_ci
452cbd624adSopenharmony_ci/// Multiply bigint by small integer.
453cbd624adSopenharmony_ci#[inline]
454cbd624adSopenharmony_cipub fn small_mul(x: &mut VecType, y: Limb) -> Option<()> {
455cbd624adSopenharmony_ci    let mut carry = 0;
456cbd624adSopenharmony_ci    for xi in x.iter_mut() {
457cbd624adSopenharmony_ci        let result = scalar_mul(*xi, y, carry);
458cbd624adSopenharmony_ci        *xi = result.0;
459cbd624adSopenharmony_ci        carry = result.1;
460cbd624adSopenharmony_ci    }
461cbd624adSopenharmony_ci    // If we carried past all the elements, add to the end of the buffer.
462cbd624adSopenharmony_ci    if carry != 0 {
463cbd624adSopenharmony_ci        x.try_push(carry)?;
464cbd624adSopenharmony_ci    }
465cbd624adSopenharmony_ci    Some(())
466cbd624adSopenharmony_ci}
467cbd624adSopenharmony_ci
468cbd624adSopenharmony_ci// LARGE
469cbd624adSopenharmony_ci// -----
470cbd624adSopenharmony_ci
471cbd624adSopenharmony_ci/// Add bigint to bigint starting from offset.
472cbd624adSopenharmony_cipub fn large_add_from(x: &mut VecType, y: &[Limb], start: usize) -> Option<()> {
473cbd624adSopenharmony_ci    // The effective x buffer is from `xstart..x.len()`, so we need to treat
474cbd624adSopenharmony_ci    // that as the current range. If the effective y buffer is longer, need
475cbd624adSopenharmony_ci    // to resize to that, + the start index.
476cbd624adSopenharmony_ci    if y.len() > x.len().saturating_sub(start) {
477cbd624adSopenharmony_ci        // Ensure we panic if we can't extend the buffer.
478cbd624adSopenharmony_ci        // This avoids any unsafe behavior afterwards.
479cbd624adSopenharmony_ci        x.try_resize(y.len() + start, 0)?;
480cbd624adSopenharmony_ci    }
481cbd624adSopenharmony_ci
482cbd624adSopenharmony_ci    // Iteratively add elements from y to x.
483cbd624adSopenharmony_ci    let mut carry = false;
484cbd624adSopenharmony_ci    for (index, &yi) in y.iter().enumerate() {
485cbd624adSopenharmony_ci        // We panicked in `try_resize` if this wasn't true.
486cbd624adSopenharmony_ci        let xi = x.get_mut(start + index).unwrap();
487cbd624adSopenharmony_ci
488cbd624adSopenharmony_ci        // Only one op of the two ops can overflow, since we added at max
489cbd624adSopenharmony_ci        // Limb::max_value() + Limb::max_value(). Add the previous carry,
490cbd624adSopenharmony_ci        // and store the current carry for the next.
491cbd624adSopenharmony_ci        let result = scalar_add(*xi, yi);
492cbd624adSopenharmony_ci        *xi = result.0;
493cbd624adSopenharmony_ci        let mut tmp = result.1;
494cbd624adSopenharmony_ci        if carry {
495cbd624adSopenharmony_ci            let result = scalar_add(*xi, 1);
496cbd624adSopenharmony_ci            *xi = result.0;
497cbd624adSopenharmony_ci            tmp |= result.1;
498cbd624adSopenharmony_ci        }
499cbd624adSopenharmony_ci        carry = tmp;
500cbd624adSopenharmony_ci    }
501cbd624adSopenharmony_ci
502cbd624adSopenharmony_ci    // Handle overflow.
503cbd624adSopenharmony_ci    if carry {
504cbd624adSopenharmony_ci        small_add_from(x, 1, y.len() + start)?;
505cbd624adSopenharmony_ci    }
506cbd624adSopenharmony_ci    Some(())
507cbd624adSopenharmony_ci}
508cbd624adSopenharmony_ci
509cbd624adSopenharmony_ci/// Add bigint to bigint.
510cbd624adSopenharmony_ci#[inline(always)]
511cbd624adSopenharmony_cipub fn large_add(x: &mut VecType, y: &[Limb]) -> Option<()> {
512cbd624adSopenharmony_ci    large_add_from(x, y, 0)
513cbd624adSopenharmony_ci}
514cbd624adSopenharmony_ci
515cbd624adSopenharmony_ci/// Grade-school multiplication algorithm.
516cbd624adSopenharmony_ci///
517cbd624adSopenharmony_ci/// Slow, naive algorithm, using limb-bit bases and just shifting left for
518cbd624adSopenharmony_ci/// each iteration. This could be optimized with numerous other algorithms,
519cbd624adSopenharmony_ci/// but it's extremely simple, and works in O(n*m) time, which is fine
520cbd624adSopenharmony_ci/// by me. Each iteration, of which there are `m` iterations, requires
521cbd624adSopenharmony_ci/// `n` multiplications, and `n` additions, or grade-school multiplication.
522cbd624adSopenharmony_ci///
523cbd624adSopenharmony_ci/// Don't use Karatsuba multiplication, since out implementation seems to
524cbd624adSopenharmony_ci/// be slower asymptotically, which is likely just due to the small sizes
525cbd624adSopenharmony_ci/// we deal with here. For example, running on the following data:
526cbd624adSopenharmony_ci///
527cbd624adSopenharmony_ci/// ```text
528cbd624adSopenharmony_ci/// const SMALL_X: &[u32] = &[
529cbd624adSopenharmony_ci///     766857581, 3588187092, 1583923090, 2204542082, 1564708913, 2695310100, 3676050286,
530cbd624adSopenharmony_ci///     1022770393, 468044626, 446028186
531cbd624adSopenharmony_ci/// ];
532cbd624adSopenharmony_ci/// const SMALL_Y: &[u32] = &[
533cbd624adSopenharmony_ci///     3945492125, 3250752032, 1282554898, 1708742809, 1131807209, 3171663979, 1353276095,
534cbd624adSopenharmony_ci///     1678845844, 2373924447, 3640713171
535cbd624adSopenharmony_ci/// ];
536cbd624adSopenharmony_ci/// const LARGE_X: &[u32] = &[
537cbd624adSopenharmony_ci///     3647536243, 2836434412, 2154401029, 1297917894, 137240595, 790694805, 2260404854,
538cbd624adSopenharmony_ci///     3872698172, 690585094, 99641546, 3510774932, 1672049983, 2313458559, 2017623719,
539cbd624adSopenharmony_ci///     638180197, 1140936565, 1787190494, 1797420655, 14113450, 2350476485, 3052941684,
540cbd624adSopenharmony_ci///     1993594787, 2901001571, 4156930025, 1248016552, 848099908, 2660577483, 4030871206,
541cbd624adSopenharmony_ci///     692169593, 2835966319, 1781364505, 4266390061, 1813581655, 4210899844, 2137005290,
542cbd624adSopenharmony_ci///     2346701569, 3715571980, 3386325356, 1251725092, 2267270902, 474686922, 2712200426,
543cbd624adSopenharmony_ci///     197581715, 3087636290, 1379224439, 1258285015, 3230794403, 2759309199, 1494932094,
544cbd624adSopenharmony_ci///     326310242
545cbd624adSopenharmony_ci/// ];
546cbd624adSopenharmony_ci/// const LARGE_Y: &[u32] = &[
547cbd624adSopenharmony_ci///     1574249566, 868970575, 76716509, 3198027972, 1541766986, 1095120699, 3891610505,
548cbd624adSopenharmony_ci///     2322545818, 1677345138, 865101357, 2650232883, 2831881215, 3985005565, 2294283760,
549cbd624adSopenharmony_ci///     3468161605, 393539559, 3665153349, 1494067812, 106699483, 2596454134, 797235106,
550cbd624adSopenharmony_ci///     705031740, 1209732933, 2732145769, 4122429072, 141002534, 790195010, 4014829800,
551cbd624adSopenharmony_ci///     1303930792, 3649568494, 308065964, 1233648836, 2807326116, 79326486, 1262500691,
552cbd624adSopenharmony_ci///     621809229, 2258109428, 3819258501, 171115668, 1139491184, 2979680603, 1333372297,
553cbd624adSopenharmony_ci///     1657496603, 2790845317, 4090236532, 4220374789, 601876604, 1828177209, 2372228171,
554cbd624adSopenharmony_ci///     2247372529
555cbd624adSopenharmony_ci/// ];
556cbd624adSopenharmony_ci/// ```
557cbd624adSopenharmony_ci///
558cbd624adSopenharmony_ci/// We get the following results:
559cbd624adSopenharmony_ci
560cbd624adSopenharmony_ci/// ```text
561cbd624adSopenharmony_ci/// mul/small:long          time:   [220.23 ns 221.47 ns 222.81 ns]
562cbd624adSopenharmony_ci/// Found 4 outliers among 100 measurements (4.00%)
563cbd624adSopenharmony_ci///   2 (2.00%) high mild
564cbd624adSopenharmony_ci///   2 (2.00%) high severe
565cbd624adSopenharmony_ci/// mul/small:karatsuba     time:   [233.88 ns 234.63 ns 235.44 ns]
566cbd624adSopenharmony_ci/// Found 11 outliers among 100 measurements (11.00%)
567cbd624adSopenharmony_ci///   8 (8.00%) high mild
568cbd624adSopenharmony_ci///   3 (3.00%) high severe
569cbd624adSopenharmony_ci/// mul/large:long          time:   [1.9365 us 1.9455 us 1.9558 us]
570cbd624adSopenharmony_ci/// Found 12 outliers among 100 measurements (12.00%)
571cbd624adSopenharmony_ci///   7 (7.00%) high mild
572cbd624adSopenharmony_ci///   5 (5.00%) high severe
573cbd624adSopenharmony_ci/// mul/large:karatsuba     time:   [4.4250 us 4.4515 us 4.4812 us]
574cbd624adSopenharmony_ci/// ```
575cbd624adSopenharmony_ci///
576cbd624adSopenharmony_ci/// In short, Karatsuba multiplication is never worthwhile for out use-case.
577cbd624adSopenharmony_cipub fn long_mul(x: &[Limb], y: &[Limb]) -> Option<VecType> {
578cbd624adSopenharmony_ci    // Using the immutable value, multiply by all the scalars in y, using
579cbd624adSopenharmony_ci    // the algorithm defined above. Use a single buffer to avoid
580cbd624adSopenharmony_ci    // frequent reallocations. Handle the first case to avoid a redundant
581cbd624adSopenharmony_ci    // addition, since we know y.len() >= 1.
582cbd624adSopenharmony_ci    let mut z = VecType::try_from(x)?;
583cbd624adSopenharmony_ci    if !y.is_empty() {
584cbd624adSopenharmony_ci        let y0 = y[0];
585cbd624adSopenharmony_ci        small_mul(&mut z, y0)?;
586cbd624adSopenharmony_ci
587cbd624adSopenharmony_ci        for (index, &yi) in y.iter().enumerate().skip(1) {
588cbd624adSopenharmony_ci            if yi != 0 {
589cbd624adSopenharmony_ci                let mut zi = VecType::try_from(x)?;
590cbd624adSopenharmony_ci                small_mul(&mut zi, yi)?;
591cbd624adSopenharmony_ci                large_add_from(&mut z, &zi, index)?;
592cbd624adSopenharmony_ci            }
593cbd624adSopenharmony_ci        }
594cbd624adSopenharmony_ci    }
595cbd624adSopenharmony_ci
596cbd624adSopenharmony_ci    z.normalize();
597cbd624adSopenharmony_ci    Some(z)
598cbd624adSopenharmony_ci}
599cbd624adSopenharmony_ci
600cbd624adSopenharmony_ci/// Multiply bigint by bigint using grade-school multiplication algorithm.
601cbd624adSopenharmony_ci#[inline(always)]
602cbd624adSopenharmony_cipub fn large_mul(x: &mut VecType, y: &[Limb]) -> Option<()> {
603cbd624adSopenharmony_ci    // Karatsuba multiplication never makes sense, so just use grade school
604cbd624adSopenharmony_ci    // multiplication.
605cbd624adSopenharmony_ci    if y.len() == 1 {
606cbd624adSopenharmony_ci        // SAFETY: safe since `y.len() == 1`.
607cbd624adSopenharmony_ci        small_mul(x, y[0])?;
608cbd624adSopenharmony_ci    } else {
609cbd624adSopenharmony_ci        *x = long_mul(y, x)?;
610cbd624adSopenharmony_ci    }
611cbd624adSopenharmony_ci    Some(())
612cbd624adSopenharmony_ci}
613cbd624adSopenharmony_ci
614cbd624adSopenharmony_ci// SHIFT
615cbd624adSopenharmony_ci// -----
616cbd624adSopenharmony_ci
617cbd624adSopenharmony_ci/// Shift-left `n` bits inside a buffer.
618cbd624adSopenharmony_ci#[inline]
619cbd624adSopenharmony_cipub fn shl_bits(x: &mut VecType, n: usize) -> Option<()> {
620cbd624adSopenharmony_ci    debug_assert!(n != 0);
621cbd624adSopenharmony_ci
622cbd624adSopenharmony_ci    // Internally, for each item, we shift left by n, and add the previous
623cbd624adSopenharmony_ci    // right shifted limb-bits.
624cbd624adSopenharmony_ci    // For example, we transform (for u8) shifted left 2, to:
625cbd624adSopenharmony_ci    //      b10100100 b01000010
626cbd624adSopenharmony_ci    //      b10 b10010001 b00001000
627cbd624adSopenharmony_ci    debug_assert!(n < LIMB_BITS);
628cbd624adSopenharmony_ci    let rshift = LIMB_BITS - n;
629cbd624adSopenharmony_ci    let lshift = n;
630cbd624adSopenharmony_ci    let mut prev: Limb = 0;
631cbd624adSopenharmony_ci    for xi in x.iter_mut() {
632cbd624adSopenharmony_ci        let tmp = *xi;
633cbd624adSopenharmony_ci        *xi <<= lshift;
634cbd624adSopenharmony_ci        *xi |= prev >> rshift;
635cbd624adSopenharmony_ci        prev = tmp;
636cbd624adSopenharmony_ci    }
637cbd624adSopenharmony_ci
638cbd624adSopenharmony_ci    // Always push the carry, even if it creates a non-normal result.
639cbd624adSopenharmony_ci    let carry = prev >> rshift;
640cbd624adSopenharmony_ci    if carry != 0 {
641cbd624adSopenharmony_ci        x.try_push(carry)?;
642cbd624adSopenharmony_ci    }
643cbd624adSopenharmony_ci
644cbd624adSopenharmony_ci    Some(())
645cbd624adSopenharmony_ci}
646cbd624adSopenharmony_ci
647cbd624adSopenharmony_ci/// Shift-left `n` limbs inside a buffer.
648cbd624adSopenharmony_ci#[inline]
649cbd624adSopenharmony_cipub fn shl_limbs(x: &mut VecType, n: usize) -> Option<()> {
650cbd624adSopenharmony_ci    debug_assert!(n != 0);
651cbd624adSopenharmony_ci    if n + x.len() > x.capacity() {
652cbd624adSopenharmony_ci        None
653cbd624adSopenharmony_ci    } else if !x.is_empty() {
654cbd624adSopenharmony_ci        let len = n + x.len();
655cbd624adSopenharmony_ci        // SAFE: since x is not empty, and `x.len() + n <= x.capacity()`.
656cbd624adSopenharmony_ci        unsafe {
657cbd624adSopenharmony_ci            // Move the elements.
658cbd624adSopenharmony_ci            let src = x.as_ptr();
659cbd624adSopenharmony_ci            let dst = x.as_mut_ptr().add(n);
660cbd624adSopenharmony_ci            ptr::copy(src, dst, x.len());
661cbd624adSopenharmony_ci            // Write our 0s.
662cbd624adSopenharmony_ci            ptr::write_bytes(x.as_mut_ptr(), 0, n);
663cbd624adSopenharmony_ci            x.set_len(len);
664cbd624adSopenharmony_ci        }
665cbd624adSopenharmony_ci        Some(())
666cbd624adSopenharmony_ci    } else {
667cbd624adSopenharmony_ci        Some(())
668cbd624adSopenharmony_ci    }
669cbd624adSopenharmony_ci}
670cbd624adSopenharmony_ci
671cbd624adSopenharmony_ci/// Shift-left buffer by n bits.
672cbd624adSopenharmony_ci#[inline]
673cbd624adSopenharmony_cipub fn shl(x: &mut VecType, n: usize) -> Option<()> {
674cbd624adSopenharmony_ci    let rem = n % LIMB_BITS;
675cbd624adSopenharmony_ci    let div = n / LIMB_BITS;
676cbd624adSopenharmony_ci    if rem != 0 {
677cbd624adSopenharmony_ci        shl_bits(x, rem)?;
678cbd624adSopenharmony_ci    }
679cbd624adSopenharmony_ci    if div != 0 {
680cbd624adSopenharmony_ci        shl_limbs(x, div)?;
681cbd624adSopenharmony_ci    }
682cbd624adSopenharmony_ci    Some(())
683cbd624adSopenharmony_ci}
684cbd624adSopenharmony_ci
685cbd624adSopenharmony_ci/// Get number of leading zero bits in the storage.
686cbd624adSopenharmony_ci#[inline]
687cbd624adSopenharmony_cipub fn leading_zeros(x: &[Limb]) -> u32 {
688cbd624adSopenharmony_ci    let length = x.len();
689cbd624adSopenharmony_ci    // wrapping_sub is fine, since it'll just return None.
690cbd624adSopenharmony_ci    if let Some(&value) = x.get(length.wrapping_sub(1)) {
691cbd624adSopenharmony_ci        value.leading_zeros()
692cbd624adSopenharmony_ci    } else {
693cbd624adSopenharmony_ci        0
694cbd624adSopenharmony_ci    }
695cbd624adSopenharmony_ci}
696cbd624adSopenharmony_ci
697cbd624adSopenharmony_ci/// Calculate the bit-length of the big-integer.
698cbd624adSopenharmony_ci#[inline]
699cbd624adSopenharmony_cipub fn bit_length(x: &[Limb]) -> u32 {
700cbd624adSopenharmony_ci    let nlz = leading_zeros(x);
701cbd624adSopenharmony_ci    LIMB_BITS as u32 * x.len() as u32 - nlz
702cbd624adSopenharmony_ci}
703cbd624adSopenharmony_ci
704cbd624adSopenharmony_ci// LIMB
705cbd624adSopenharmony_ci// ----
706cbd624adSopenharmony_ci
707cbd624adSopenharmony_ci//  Type for a single limb of the big integer.
708cbd624adSopenharmony_ci//
709cbd624adSopenharmony_ci//  A limb is analogous to a digit in base10, except, it stores 32-bit
710cbd624adSopenharmony_ci//  or 64-bit numbers instead. We want types where 64-bit multiplication
711cbd624adSopenharmony_ci//  is well-supported by the architecture, rather than emulated in 3
712cbd624adSopenharmony_ci//  instructions. The quickest way to check this support is using a
713cbd624adSopenharmony_ci//  cross-compiler for numerous architectures, along with the following
714cbd624adSopenharmony_ci//  source file and command:
715cbd624adSopenharmony_ci//
716cbd624adSopenharmony_ci//  Compile with `gcc main.c -c -S -O3 -masm=intel`
717cbd624adSopenharmony_ci//
718cbd624adSopenharmony_ci//  And the source code is:
719cbd624adSopenharmony_ci//  ```text
720cbd624adSopenharmony_ci//  #include <stdint.h>
721cbd624adSopenharmony_ci//
722cbd624adSopenharmony_ci//  struct i128 {
723cbd624adSopenharmony_ci//      uint64_t hi;
724cbd624adSopenharmony_ci//      uint64_t lo;
725cbd624adSopenharmony_ci//  };
726cbd624adSopenharmony_ci//
727cbd624adSopenharmony_ci//  // Type your code here, or load an example.
728cbd624adSopenharmony_ci//  struct i128 square(uint64_t x, uint64_t y) {
729cbd624adSopenharmony_ci//      __int128 prod = (__int128)x * (__int128)y;
730cbd624adSopenharmony_ci//      struct i128 z;
731cbd624adSopenharmony_ci//      z.hi = (uint64_t)(prod >> 64);
732cbd624adSopenharmony_ci//      z.lo = (uint64_t)prod;
733cbd624adSopenharmony_ci//      return z;
734cbd624adSopenharmony_ci//  }
735cbd624adSopenharmony_ci//  ```
736cbd624adSopenharmony_ci//
737cbd624adSopenharmony_ci//  If the result contains `call __multi3`, then the multiplication
738cbd624adSopenharmony_ci//  is emulated by the compiler. Otherwise, it's natively supported.
739cbd624adSopenharmony_ci//
740cbd624adSopenharmony_ci//  This should be all-known 64-bit platforms supported by Rust.
741cbd624adSopenharmony_ci//      https://forge.rust-lang.org/platform-support.html
742cbd624adSopenharmony_ci//
743cbd624adSopenharmony_ci//  # Supported
744cbd624adSopenharmony_ci//
745cbd624adSopenharmony_ci//  Platforms where native 128-bit multiplication is explicitly supported:
746cbd624adSopenharmony_ci//      - x86_64 (Supported via `MUL`).
747cbd624adSopenharmony_ci//      - mips64 (Supported via `DMULTU`, which `HI` and `LO` can be read-from).
748cbd624adSopenharmony_ci//      - s390x (Supported via `MLGR`).
749cbd624adSopenharmony_ci//
750cbd624adSopenharmony_ci//  # Efficient
751cbd624adSopenharmony_ci//
752cbd624adSopenharmony_ci//  Platforms where native 64-bit multiplication is supported and
753cbd624adSopenharmony_ci//  you can extract hi-lo for 64-bit multiplications.
754cbd624adSopenharmony_ci//      - aarch64 (Requires `UMULH` and `MUL` to capture high and low bits).
755cbd624adSopenharmony_ci//      - powerpc64 (Requires `MULHDU` and `MULLD` to capture high and low bits).
756cbd624adSopenharmony_ci//      - riscv64 (Requires `MUL` and `MULH` to capture high and low bits).
757cbd624adSopenharmony_ci//
758cbd624adSopenharmony_ci//  # Unsupported
759cbd624adSopenharmony_ci//
760cbd624adSopenharmony_ci//  Platforms where native 128-bit multiplication is not supported,
761cbd624adSopenharmony_ci//  requiring software emulation.
762cbd624adSopenharmony_ci//      sparc64 (`UMUL` only supports double-word arguments).
763cbd624adSopenharmony_ci//      sparcv9 (Same as sparc64).
764cbd624adSopenharmony_ci//
765cbd624adSopenharmony_ci//  These tests are run via `xcross`, my own library for C cross-compiling,
766cbd624adSopenharmony_ci//  which supports numerous targets (far in excess of Rust's tier 1 support,
767cbd624adSopenharmony_ci//  or rust-embedded/cross's list). xcross may be found here:
768cbd624adSopenharmony_ci//      https://github.com/Alexhuszagh/xcross
769cbd624adSopenharmony_ci//
770cbd624adSopenharmony_ci//  To compile for the given target, run:
771cbd624adSopenharmony_ci//      `xcross gcc main.c -c -S -O3 --target $target`
772cbd624adSopenharmony_ci//
773cbd624adSopenharmony_ci//  All 32-bit architectures inherently do not have support. That means
774cbd624adSopenharmony_ci//  we can essentially look for 64-bit architectures that are not SPARC.
775cbd624adSopenharmony_ci
776cbd624adSopenharmony_ci#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
777cbd624adSopenharmony_cipub type Limb = u64;
778cbd624adSopenharmony_ci#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
779cbd624adSopenharmony_cipub type Wide = u128;
780cbd624adSopenharmony_ci#[cfg(all(target_pointer_width = "64", not(target_arch = "sparc")))]
781cbd624adSopenharmony_cipub const LIMB_BITS: usize = 64;
782cbd624adSopenharmony_ci
783cbd624adSopenharmony_ci#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
784cbd624adSopenharmony_cipub type Limb = u32;
785cbd624adSopenharmony_ci#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
786cbd624adSopenharmony_cipub type Wide = u64;
787cbd624adSopenharmony_ci#[cfg(not(all(target_pointer_width = "64", not(target_arch = "sparc"))))]
788cbd624adSopenharmony_cipub const LIMB_BITS: usize = 32;
789