11cb0ef41Sopenharmony_ci// Copyright 2021 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci// Burnikel-Ziegler division. 61cb0ef41Sopenharmony_ci// Reference: "Fast Recursive Division" by Christoph Burnikel and Joachim 71cb0ef41Sopenharmony_ci// Ziegler, found at http://cr.yp.to/bib/1998/burnikel.ps 81cb0ef41Sopenharmony_ci 91cb0ef41Sopenharmony_ci#include <string.h> 101cb0ef41Sopenharmony_ci 111cb0ef41Sopenharmony_ci#include "src/bigint/bigint-internal.h" 121cb0ef41Sopenharmony_ci#include "src/bigint/digit-arithmetic.h" 131cb0ef41Sopenharmony_ci#include "src/bigint/div-helpers.h" 141cb0ef41Sopenharmony_ci#include "src/bigint/util.h" 151cb0ef41Sopenharmony_ci#include "src/bigint/vector-arithmetic.h" 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_cinamespace v8 { 181cb0ef41Sopenharmony_cinamespace bigint { 191cb0ef41Sopenharmony_ci 201cb0ef41Sopenharmony_cinamespace { 211cb0ef41Sopenharmony_ci 221cb0ef41Sopenharmony_ci// Compares [a_high, A] with B. 231cb0ef41Sopenharmony_ci// Returns: 241cb0ef41Sopenharmony_ci// - a value < 0 if [a_high, A] < B 251cb0ef41Sopenharmony_ci// - 0 if [a_high, A] == B 261cb0ef41Sopenharmony_ci// - a value > 0 if [a_high, A] > B. 271cb0ef41Sopenharmony_ciint SpecialCompare(digit_t a_high, Digits A, Digits B) { 281cb0ef41Sopenharmony_ci B.Normalize(); 291cb0ef41Sopenharmony_ci int a_len; 301cb0ef41Sopenharmony_ci if (a_high == 0) { 311cb0ef41Sopenharmony_ci A.Normalize(); 321cb0ef41Sopenharmony_ci a_len = A.len(); 331cb0ef41Sopenharmony_ci } else { 341cb0ef41Sopenharmony_ci a_len = A.len() + 1; 351cb0ef41Sopenharmony_ci } 361cb0ef41Sopenharmony_ci int diff = a_len - B.len(); 371cb0ef41Sopenharmony_ci if (diff != 0) return diff; 381cb0ef41Sopenharmony_ci int i = a_len - 1; 391cb0ef41Sopenharmony_ci if (a_high != 0) { 401cb0ef41Sopenharmony_ci if (a_high > B[i]) return 1; 411cb0ef41Sopenharmony_ci if (a_high < B[i]) return -1; 421cb0ef41Sopenharmony_ci i--; 431cb0ef41Sopenharmony_ci } 441cb0ef41Sopenharmony_ci while (i >= 0 && A[i] == B[i]) i--; 451cb0ef41Sopenharmony_ci if (i < 0) return 0; 461cb0ef41Sopenharmony_ci return A[i] > B[i] ? 1 : -1; 471cb0ef41Sopenharmony_ci} 481cb0ef41Sopenharmony_ci 491cb0ef41Sopenharmony_civoid SetOnes(RWDigits X) { 501cb0ef41Sopenharmony_ci memset(X.digits(), 0xFF, X.len() * sizeof(digit_t)); 511cb0ef41Sopenharmony_ci} 521cb0ef41Sopenharmony_ci 531cb0ef41Sopenharmony_ci// Since the Burnikel-Ziegler method is inherently recursive, we put 541cb0ef41Sopenharmony_ci// non-changing data into a container object. 551cb0ef41Sopenharmony_ciclass BZ { 561cb0ef41Sopenharmony_ci public: 571cb0ef41Sopenharmony_ci BZ(ProcessorImpl* proc, int scratch_space) 581cb0ef41Sopenharmony_ci : proc_(proc), 591cb0ef41Sopenharmony_ci scratch_mem_(scratch_space >= kBurnikelThreshold ? scratch_space : 0) {} 601cb0ef41Sopenharmony_ci 611cb0ef41Sopenharmony_ci void DivideBasecase(RWDigits Q, RWDigits R, Digits A, Digits B); 621cb0ef41Sopenharmony_ci void D3n2n(RWDigits Q, RWDigits R, Digits A1A2, Digits A3, Digits B); 631cb0ef41Sopenharmony_ci void D2n1n(RWDigits Q, RWDigits R, Digits A, Digits B); 641cb0ef41Sopenharmony_ci 651cb0ef41Sopenharmony_ci private: 661cb0ef41Sopenharmony_ci ProcessorImpl* proc_; 671cb0ef41Sopenharmony_ci Storage scratch_mem_; 681cb0ef41Sopenharmony_ci}; 691cb0ef41Sopenharmony_ci 701cb0ef41Sopenharmony_civoid BZ::DivideBasecase(RWDigits Q, RWDigits R, Digits A, Digits B) { 711cb0ef41Sopenharmony_ci A.Normalize(); 721cb0ef41Sopenharmony_ci B.Normalize(); 731cb0ef41Sopenharmony_ci DCHECK(B.len() > 0); 741cb0ef41Sopenharmony_ci int cmp = Compare(A, B); 751cb0ef41Sopenharmony_ci if (cmp <= 0) { 761cb0ef41Sopenharmony_ci Q.Clear(); 771cb0ef41Sopenharmony_ci if (cmp == 0) { 781cb0ef41Sopenharmony_ci // If A == B, then Q=1, R=0. 791cb0ef41Sopenharmony_ci R.Clear(); 801cb0ef41Sopenharmony_ci Q[0] = 1; 811cb0ef41Sopenharmony_ci } else { 821cb0ef41Sopenharmony_ci // If A < B, then Q=0, R=A. 831cb0ef41Sopenharmony_ci PutAt(R, A, R.len()); 841cb0ef41Sopenharmony_ci } 851cb0ef41Sopenharmony_ci return; 861cb0ef41Sopenharmony_ci } 871cb0ef41Sopenharmony_ci if (B.len() == 1) { 881cb0ef41Sopenharmony_ci return proc_->DivideSingle(Q, R.digits(), A, B[0]); 891cb0ef41Sopenharmony_ci } 901cb0ef41Sopenharmony_ci return proc_->DivideSchoolbook(Q, R, A, B); 911cb0ef41Sopenharmony_ci} 921cb0ef41Sopenharmony_ci 931cb0ef41Sopenharmony_ci// Algorithm 2 from the paper. Variable names same as there. 941cb0ef41Sopenharmony_ci// Returns Q(uotient) and R(emainder) for A/B, with B having two thirds 951cb0ef41Sopenharmony_ci// the size of A = [A1, A2, A3]. 961cb0ef41Sopenharmony_civoid BZ::D3n2n(RWDigits Q, RWDigits R, Digits A1A2, Digits A3, Digits B) { 971cb0ef41Sopenharmony_ci DCHECK((B.len() & 1) == 0); 981cb0ef41Sopenharmony_ci int n = B.len() / 2; 991cb0ef41Sopenharmony_ci DCHECK(A1A2.len() == 2 * n); 1001cb0ef41Sopenharmony_ci // Actual condition is stricter than length: A < B * 2^(kDigitBits * n) 1011cb0ef41Sopenharmony_ci DCHECK(Compare(A1A2, B) < 0); 1021cb0ef41Sopenharmony_ci DCHECK(A3.len() == n); 1031cb0ef41Sopenharmony_ci DCHECK(Q.len() == n); 1041cb0ef41Sopenharmony_ci DCHECK(R.len() == 2 * n); 1051cb0ef41Sopenharmony_ci // 1. Split A into three parts A = [A1, A2, A3] with Ai < 2^(kDigitBits * n). 1061cb0ef41Sopenharmony_ci Digits A1(A1A2, n, n); 1071cb0ef41Sopenharmony_ci // 2. Split B into two parts B = [B1, B2] with Bi < 2^(kDigitBits * n). 1081cb0ef41Sopenharmony_ci Digits B1(B, n, n); 1091cb0ef41Sopenharmony_ci Digits B2(B, 0, n); 1101cb0ef41Sopenharmony_ci // 3. Distinguish the cases A1 < B1 or A1 >= B1. 1111cb0ef41Sopenharmony_ci RWDigits Qhat = Q; 1121cb0ef41Sopenharmony_ci RWDigits R1(R, n, n); 1131cb0ef41Sopenharmony_ci digit_t r1_high = 0; 1141cb0ef41Sopenharmony_ci if (Compare(A1, B1) < 0) { 1151cb0ef41Sopenharmony_ci // 3a. If A1 < B1, compute Qhat = floor([A1, A2] / B1) with remainder R1 1161cb0ef41Sopenharmony_ci // using algorithm D2n1n. 1171cb0ef41Sopenharmony_ci D2n1n(Qhat, R1, A1A2, B1); 1181cb0ef41Sopenharmony_ci if (proc_->should_terminate()) return; 1191cb0ef41Sopenharmony_ci } else { 1201cb0ef41Sopenharmony_ci // 3b. If A1 >= B1, set Qhat = 2^(kDigitBits * n) - 1 and set 1211cb0ef41Sopenharmony_ci // R1 = [A1, A2] - [B1, 0] + [0, B1] 1221cb0ef41Sopenharmony_ci SetOnes(Qhat); 1231cb0ef41Sopenharmony_ci // Step 1: compute A1 - B1, which can't underflow because of the comparison 1241cb0ef41Sopenharmony_ci // guarding this else-branch, and always has a one-digit result because 1251cb0ef41Sopenharmony_ci // of this function's preconditions. 1261cb0ef41Sopenharmony_ci RWDigits temp = R1; 1271cb0ef41Sopenharmony_ci Subtract(temp, A1, B1); 1281cb0ef41Sopenharmony_ci temp.Normalize(); 1291cb0ef41Sopenharmony_ci DCHECK(temp.len() <= 1); 1301cb0ef41Sopenharmony_ci if (temp.len() > 0) r1_high = temp[0]; 1311cb0ef41Sopenharmony_ci // Step 2: compute A2 + B1. 1321cb0ef41Sopenharmony_ci Digits A2(A1A2, 0, n); 1331cb0ef41Sopenharmony_ci r1_high += AddAndReturnCarry(R1, A2, B1); 1341cb0ef41Sopenharmony_ci } 1351cb0ef41Sopenharmony_ci // 4. Compute D = Qhat * B2 using (Karatsuba) multiplication. 1361cb0ef41Sopenharmony_ci RWDigits D(scratch_mem_.get(), 2 * n); 1371cb0ef41Sopenharmony_ci proc_->Multiply(D, Qhat, B2); 1381cb0ef41Sopenharmony_ci if (proc_->should_terminate()) return; 1391cb0ef41Sopenharmony_ci 1401cb0ef41Sopenharmony_ci // 5. Compute Rhat = R1*2^(kDigitBits * n) + A3 - D = [R1, A3] - D. 1411cb0ef41Sopenharmony_ci PutAt(R, A3, n); 1421cb0ef41Sopenharmony_ci // 6. As long as Rhat < 0, repeat: 1431cb0ef41Sopenharmony_ci while (SpecialCompare(r1_high, R, D) < 0) { 1441cb0ef41Sopenharmony_ci // 6a. Rhat = Rhat + B 1451cb0ef41Sopenharmony_ci r1_high += AddAndReturnCarry(R, R, B); 1461cb0ef41Sopenharmony_ci // 6b. Qhat = Qhat - 1 1471cb0ef41Sopenharmony_ci Subtract(Qhat, 1); 1481cb0ef41Sopenharmony_ci } 1491cb0ef41Sopenharmony_ci // 5. Compute Rhat = R1*2^(kDigitBits * n) + A3 - D = [R1, A3] - D. 1501cb0ef41Sopenharmony_ci digit_t borrow = SubtractAndReturnBorrow(R, R, D); 1511cb0ef41Sopenharmony_ci DCHECK(borrow == r1_high); 1521cb0ef41Sopenharmony_ci DCHECK(Compare(R, B) < 0); 1531cb0ef41Sopenharmony_ci (void)borrow; 1541cb0ef41Sopenharmony_ci // 7. Return R = Rhat, Q = Qhat. 1551cb0ef41Sopenharmony_ci} 1561cb0ef41Sopenharmony_ci 1571cb0ef41Sopenharmony_ci// Algorithm 1 from the paper. Variable names same as there. 1581cb0ef41Sopenharmony_ci// Returns Q(uotient) and (R)emainder for A/B, with A twice the size of B. 1591cb0ef41Sopenharmony_civoid BZ::D2n1n(RWDigits Q, RWDigits R, Digits A, Digits B) { 1601cb0ef41Sopenharmony_ci int n = B.len(); 1611cb0ef41Sopenharmony_ci DCHECK(A.len() <= 2 * n); 1621cb0ef41Sopenharmony_ci // A < B * 2^(kDigitsBits * n) 1631cb0ef41Sopenharmony_ci DCHECK(Compare(Digits(A, n, n), B) < 0); 1641cb0ef41Sopenharmony_ci DCHECK(Q.len() == n); 1651cb0ef41Sopenharmony_ci DCHECK(R.len() == n); 1661cb0ef41Sopenharmony_ci // 1. If n is odd or smaller than some convenient constant, compute Q and R 1671cb0ef41Sopenharmony_ci // by school division and return. 1681cb0ef41Sopenharmony_ci if ((n & 1) == 1 || n < kBurnikelThreshold) { 1691cb0ef41Sopenharmony_ci return DivideBasecase(Q, R, A, B); 1701cb0ef41Sopenharmony_ci } 1711cb0ef41Sopenharmony_ci // 2. Split A into four parts A = [A1, ..., A4] with 1721cb0ef41Sopenharmony_ci // Ai < 2^(kDigitBits * n / 2). Split B into two parts [B2, B1] with 1731cb0ef41Sopenharmony_ci // Bi < 2^(kDigitBits * n / 2). 1741cb0ef41Sopenharmony_ci Digits A1A2(A, n, n); 1751cb0ef41Sopenharmony_ci Digits A3(A, n / 2, n / 2); 1761cb0ef41Sopenharmony_ci Digits A4(A, 0, n / 2); 1771cb0ef41Sopenharmony_ci // 3. Compute the high part Q1 of floor(A/B) as 1781cb0ef41Sopenharmony_ci // Q1 = floor([A1, A2, A3] / [B1, B2]) with remainder R1 = [R11, R12], 1791cb0ef41Sopenharmony_ci // using algorithm D3n2n. 1801cb0ef41Sopenharmony_ci RWDigits Q1(Q, n / 2, n / 2); 1811cb0ef41Sopenharmony_ci ScratchDigits R1(n); 1821cb0ef41Sopenharmony_ci D3n2n(Q1, R1, A1A2, A3, B); 1831cb0ef41Sopenharmony_ci if (proc_->should_terminate()) return; 1841cb0ef41Sopenharmony_ci // 4. Compute the low part Q2 of floor(A/B) as 1851cb0ef41Sopenharmony_ci // Q2 = floor([R11, R12, A4] / [B1, B2]) with remainder R, using 1861cb0ef41Sopenharmony_ci // algorithm D3n2n. 1871cb0ef41Sopenharmony_ci RWDigits Q2(Q, 0, n / 2); 1881cb0ef41Sopenharmony_ci D3n2n(Q2, R, R1, A4, B); 1891cb0ef41Sopenharmony_ci // 5. Return Q = [Q1, Q2] and R. 1901cb0ef41Sopenharmony_ci} 1911cb0ef41Sopenharmony_ci 1921cb0ef41Sopenharmony_ci} // namespace 1931cb0ef41Sopenharmony_ci 1941cb0ef41Sopenharmony_ci// Algorithm 3 from the paper. Variables names same as there. 1951cb0ef41Sopenharmony_ci// Returns Q(uotient) and R(emainder) for A/B (no size restrictions). 1961cb0ef41Sopenharmony_ci// R is optional, Q is not. 1971cb0ef41Sopenharmony_civoid ProcessorImpl::DivideBurnikelZiegler(RWDigits Q, RWDigits R, Digits A, 1981cb0ef41Sopenharmony_ci Digits B) { 1991cb0ef41Sopenharmony_ci DCHECK(A.len() >= B.len()); 2001cb0ef41Sopenharmony_ci DCHECK(R.len() == 0 || R.len() >= B.len()); 2011cb0ef41Sopenharmony_ci DCHECK(Q.len() > A.len() - B.len()); 2021cb0ef41Sopenharmony_ci int r = A.len(); 2031cb0ef41Sopenharmony_ci int s = B.len(); 2041cb0ef41Sopenharmony_ci // The requirements are: 2051cb0ef41Sopenharmony_ci // - n >= s, n as small as possible. 2061cb0ef41Sopenharmony_ci // - m must be a power of two. 2071cb0ef41Sopenharmony_ci // 1. Set m = min {2^k | 2^k * kBurnikelThreshold > s}. 2081cb0ef41Sopenharmony_ci int m = 1 << BitLength(s / kBurnikelThreshold); 2091cb0ef41Sopenharmony_ci // 2. Set j = roundup(s/m) and n = j * m. 2101cb0ef41Sopenharmony_ci int j = DIV_CEIL(s, m); 2111cb0ef41Sopenharmony_ci int n = j * m; 2121cb0ef41Sopenharmony_ci // 3. Set sigma = max{tao | 2^tao * B < 2^(kDigitBits * n)}. 2131cb0ef41Sopenharmony_ci int sigma = CountLeadingZeros(B[s - 1]); 2141cb0ef41Sopenharmony_ci int digit_shift = n - s; 2151cb0ef41Sopenharmony_ci // 4. Set B = B * 2^sigma to normalize B. Shift A by the same amount. 2161cb0ef41Sopenharmony_ci ScratchDigits B_shifted(n); 2171cb0ef41Sopenharmony_ci LeftShift(B_shifted + digit_shift, B, sigma); 2181cb0ef41Sopenharmony_ci for (int i = 0; i < digit_shift; i++) B_shifted[i] = 0; 2191cb0ef41Sopenharmony_ci B = B_shifted; 2201cb0ef41Sopenharmony_ci // We need an extra digit if A's top digit does not have enough space for 2211cb0ef41Sopenharmony_ci // the left-shift by {sigma}. Additionally, the top bit of A must be 0 2221cb0ef41Sopenharmony_ci // (see "-1" in step 5 below), which combined with B being normalized (i.e. 2231cb0ef41Sopenharmony_ci // B's top bit is 1) ensures the preconditions of the helper functions. 2241cb0ef41Sopenharmony_ci int extra_digit = CountLeadingZeros(A[r - 1]) < (sigma + 1) ? 1 : 0; 2251cb0ef41Sopenharmony_ci r = A.len() + digit_shift + extra_digit; 2261cb0ef41Sopenharmony_ci ScratchDigits A_shifted(r); 2271cb0ef41Sopenharmony_ci LeftShift(A_shifted + digit_shift, A, sigma); 2281cb0ef41Sopenharmony_ci for (int i = 0; i < digit_shift; i++) A_shifted[i] = 0; 2291cb0ef41Sopenharmony_ci A = A_shifted; 2301cb0ef41Sopenharmony_ci // 5. Set t = min{t >= 2 | A < 2^(kDigitBits * t * n - 1)}. 2311cb0ef41Sopenharmony_ci int t = std::max(DIV_CEIL(r, n), 2); 2321cb0ef41Sopenharmony_ci // 6. Split A conceptually into t blocks. 2331cb0ef41Sopenharmony_ci // 7. Set Z_(t-2) = [A_(t-1), A_(t-2)]. 2341cb0ef41Sopenharmony_ci int z_len = n * 2; 2351cb0ef41Sopenharmony_ci ScratchDigits Z(z_len); 2361cb0ef41Sopenharmony_ci PutAt(Z, A + n * (t - 2), z_len); 2371cb0ef41Sopenharmony_ci // 8. For i from t-2 downto 0 do: 2381cb0ef41Sopenharmony_ci BZ bz(this, n); 2391cb0ef41Sopenharmony_ci ScratchDigits Ri(n); 2401cb0ef41Sopenharmony_ci { 2411cb0ef41Sopenharmony_ci // First iteration unrolled and specialized. 2421cb0ef41Sopenharmony_ci // We might not have n digits at the top of Q, so use temporary storage 2431cb0ef41Sopenharmony_ci // for Qi... 2441cb0ef41Sopenharmony_ci ScratchDigits Qi(n); 2451cb0ef41Sopenharmony_ci bz.D2n1n(Qi, Ri, Z, B); 2461cb0ef41Sopenharmony_ci if (should_terminate()) return; 2471cb0ef41Sopenharmony_ci // ...but there *will* be enough space for any non-zero result digits! 2481cb0ef41Sopenharmony_ci Qi.Normalize(); 2491cb0ef41Sopenharmony_ci RWDigits target = Q + n * (t - 2); 2501cb0ef41Sopenharmony_ci DCHECK(Qi.len() <= target.len()); 2511cb0ef41Sopenharmony_ci PutAt(target, Qi, target.len()); 2521cb0ef41Sopenharmony_ci } 2531cb0ef41Sopenharmony_ci // Now loop over any remaining iterations. 2541cb0ef41Sopenharmony_ci for (int i = t - 3; i >= 0; i--) { 2551cb0ef41Sopenharmony_ci // 8b. If i > 0, set Z_(i-1) = [Ri, A_(i-1)]. 2561cb0ef41Sopenharmony_ci // (De-duped with unrolled first iteration, hence reading A_(i).) 2571cb0ef41Sopenharmony_ci PutAt(Z + n, Ri, n); 2581cb0ef41Sopenharmony_ci PutAt(Z, A + n * i, n); 2591cb0ef41Sopenharmony_ci // 8a. Using algorithm D2n1n compute Qi, Ri such that Zi = B*Qi + Ri. 2601cb0ef41Sopenharmony_ci RWDigits Qi(Q, i * n, n); 2611cb0ef41Sopenharmony_ci bz.D2n1n(Qi, Ri, Z, B); 2621cb0ef41Sopenharmony_ci if (should_terminate()) return; 2631cb0ef41Sopenharmony_ci } 2641cb0ef41Sopenharmony_ci // 9. Return Q = [Q_(t-2), ..., Q_0] and R = R_0 * 2^(-sigma). 2651cb0ef41Sopenharmony_ci#if DEBUG 2661cb0ef41Sopenharmony_ci for (int i = 0; i < digit_shift; i++) { 2671cb0ef41Sopenharmony_ci DCHECK(Ri[i] == 0); 2681cb0ef41Sopenharmony_ci } 2691cb0ef41Sopenharmony_ci#endif 2701cb0ef41Sopenharmony_ci if (R.len() != 0) { 2711cb0ef41Sopenharmony_ci Digits Ri_part(Ri, digit_shift, Ri.len()); 2721cb0ef41Sopenharmony_ci Ri_part.Normalize(); 2731cb0ef41Sopenharmony_ci DCHECK(Ri_part.len() <= R.len()); 2741cb0ef41Sopenharmony_ci RightShift(R, Ri_part, sigma); 2751cb0ef41Sopenharmony_ci } 2761cb0ef41Sopenharmony_ci} 2771cb0ef41Sopenharmony_ci 2781cb0ef41Sopenharmony_ci} // namespace bigint 2791cb0ef41Sopenharmony_ci} // namespace v8 280