11cb0ef41Sopenharmony_ci// Copyright 2021 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci// Burnikel-Ziegler division.
61cb0ef41Sopenharmony_ci// Reference: "Fast Recursive Division" by Christoph Burnikel and Joachim
71cb0ef41Sopenharmony_ci// Ziegler, found at http://cr.yp.to/bib/1998/burnikel.ps
81cb0ef41Sopenharmony_ci
91cb0ef41Sopenharmony_ci#include <string.h>
101cb0ef41Sopenharmony_ci
111cb0ef41Sopenharmony_ci#include "src/bigint/bigint-internal.h"
121cb0ef41Sopenharmony_ci#include "src/bigint/digit-arithmetic.h"
131cb0ef41Sopenharmony_ci#include "src/bigint/div-helpers.h"
141cb0ef41Sopenharmony_ci#include "src/bigint/util.h"
151cb0ef41Sopenharmony_ci#include "src/bigint/vector-arithmetic.h"
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_cinamespace v8 {
181cb0ef41Sopenharmony_cinamespace bigint {
191cb0ef41Sopenharmony_ci
201cb0ef41Sopenharmony_cinamespace {
211cb0ef41Sopenharmony_ci
221cb0ef41Sopenharmony_ci// Compares [a_high, A] with B.
231cb0ef41Sopenharmony_ci// Returns:
241cb0ef41Sopenharmony_ci// - a value < 0 if [a_high, A] < B
251cb0ef41Sopenharmony_ci// - 0           if [a_high, A] == B
261cb0ef41Sopenharmony_ci// - a value > 0 if [a_high, A] > B.
271cb0ef41Sopenharmony_ciint SpecialCompare(digit_t a_high, Digits A, Digits B) {
281cb0ef41Sopenharmony_ci  B.Normalize();
291cb0ef41Sopenharmony_ci  int a_len;
301cb0ef41Sopenharmony_ci  if (a_high == 0) {
311cb0ef41Sopenharmony_ci    A.Normalize();
321cb0ef41Sopenharmony_ci    a_len = A.len();
331cb0ef41Sopenharmony_ci  } else {
341cb0ef41Sopenharmony_ci    a_len = A.len() + 1;
351cb0ef41Sopenharmony_ci  }
361cb0ef41Sopenharmony_ci  int diff = a_len - B.len();
371cb0ef41Sopenharmony_ci  if (diff != 0) return diff;
381cb0ef41Sopenharmony_ci  int i = a_len - 1;
391cb0ef41Sopenharmony_ci  if (a_high != 0) {
401cb0ef41Sopenharmony_ci    if (a_high > B[i]) return 1;
411cb0ef41Sopenharmony_ci    if (a_high < B[i]) return -1;
421cb0ef41Sopenharmony_ci    i--;
431cb0ef41Sopenharmony_ci  }
441cb0ef41Sopenharmony_ci  while (i >= 0 && A[i] == B[i]) i--;
451cb0ef41Sopenharmony_ci  if (i < 0) return 0;
461cb0ef41Sopenharmony_ci  return A[i] > B[i] ? 1 : -1;
471cb0ef41Sopenharmony_ci}
481cb0ef41Sopenharmony_ci
491cb0ef41Sopenharmony_civoid SetOnes(RWDigits X) {
501cb0ef41Sopenharmony_ci  memset(X.digits(), 0xFF, X.len() * sizeof(digit_t));
511cb0ef41Sopenharmony_ci}
521cb0ef41Sopenharmony_ci
531cb0ef41Sopenharmony_ci// Since the Burnikel-Ziegler method is inherently recursive, we put
541cb0ef41Sopenharmony_ci// non-changing data into a container object.
551cb0ef41Sopenharmony_ciclass BZ {
561cb0ef41Sopenharmony_ci public:
571cb0ef41Sopenharmony_ci  BZ(ProcessorImpl* proc, int scratch_space)
581cb0ef41Sopenharmony_ci      : proc_(proc),
591cb0ef41Sopenharmony_ci        scratch_mem_(scratch_space >= kBurnikelThreshold ? scratch_space : 0) {}
601cb0ef41Sopenharmony_ci
611cb0ef41Sopenharmony_ci  void DivideBasecase(RWDigits Q, RWDigits R, Digits A, Digits B);
621cb0ef41Sopenharmony_ci  void D3n2n(RWDigits Q, RWDigits R, Digits A1A2, Digits A3, Digits B);
631cb0ef41Sopenharmony_ci  void D2n1n(RWDigits Q, RWDigits R, Digits A, Digits B);
641cb0ef41Sopenharmony_ci
651cb0ef41Sopenharmony_ci private:
661cb0ef41Sopenharmony_ci  ProcessorImpl* proc_;
671cb0ef41Sopenharmony_ci  Storage scratch_mem_;
681cb0ef41Sopenharmony_ci};
691cb0ef41Sopenharmony_ci
701cb0ef41Sopenharmony_civoid BZ::DivideBasecase(RWDigits Q, RWDigits R, Digits A, Digits B) {
711cb0ef41Sopenharmony_ci  A.Normalize();
721cb0ef41Sopenharmony_ci  B.Normalize();
731cb0ef41Sopenharmony_ci  DCHECK(B.len() > 0);
741cb0ef41Sopenharmony_ci  int cmp = Compare(A, B);
751cb0ef41Sopenharmony_ci  if (cmp <= 0) {
761cb0ef41Sopenharmony_ci    Q.Clear();
771cb0ef41Sopenharmony_ci    if (cmp == 0) {
781cb0ef41Sopenharmony_ci      // If A == B, then Q=1, R=0.
791cb0ef41Sopenharmony_ci      R.Clear();
801cb0ef41Sopenharmony_ci      Q[0] = 1;
811cb0ef41Sopenharmony_ci    } else {
821cb0ef41Sopenharmony_ci      // If A < B, then Q=0, R=A.
831cb0ef41Sopenharmony_ci      PutAt(R, A, R.len());
841cb0ef41Sopenharmony_ci    }
851cb0ef41Sopenharmony_ci    return;
861cb0ef41Sopenharmony_ci  }
871cb0ef41Sopenharmony_ci  if (B.len() == 1) {
881cb0ef41Sopenharmony_ci    return proc_->DivideSingle(Q, R.digits(), A, B[0]);
891cb0ef41Sopenharmony_ci  }
901cb0ef41Sopenharmony_ci  return proc_->DivideSchoolbook(Q, R, A, B);
911cb0ef41Sopenharmony_ci}
921cb0ef41Sopenharmony_ci
931cb0ef41Sopenharmony_ci// Algorithm 2 from the paper. Variable names same as there.
941cb0ef41Sopenharmony_ci// Returns Q(uotient) and R(emainder) for A/B, with B having two thirds
951cb0ef41Sopenharmony_ci// the size of A = [A1, A2, A3].
961cb0ef41Sopenharmony_civoid BZ::D3n2n(RWDigits Q, RWDigits R, Digits A1A2, Digits A3, Digits B) {
971cb0ef41Sopenharmony_ci  DCHECK((B.len() & 1) == 0);
981cb0ef41Sopenharmony_ci  int n = B.len() / 2;
991cb0ef41Sopenharmony_ci  DCHECK(A1A2.len() == 2 * n);
1001cb0ef41Sopenharmony_ci  // Actual condition is stricter than length: A < B * 2^(kDigitBits * n)
1011cb0ef41Sopenharmony_ci  DCHECK(Compare(A1A2, B) < 0);
1021cb0ef41Sopenharmony_ci  DCHECK(A3.len() == n);
1031cb0ef41Sopenharmony_ci  DCHECK(Q.len() == n);
1041cb0ef41Sopenharmony_ci  DCHECK(R.len() == 2 * n);
1051cb0ef41Sopenharmony_ci  // 1. Split A into three parts A = [A1, A2, A3] with Ai < 2^(kDigitBits * n).
1061cb0ef41Sopenharmony_ci  Digits A1(A1A2, n, n);
1071cb0ef41Sopenharmony_ci  // 2. Split B into two parts B = [B1, B2] with Bi < 2^(kDigitBits * n).
1081cb0ef41Sopenharmony_ci  Digits B1(B, n, n);
1091cb0ef41Sopenharmony_ci  Digits B2(B, 0, n);
1101cb0ef41Sopenharmony_ci  // 3. Distinguish the cases A1 < B1 or A1 >= B1.
1111cb0ef41Sopenharmony_ci  RWDigits Qhat = Q;
1121cb0ef41Sopenharmony_ci  RWDigits R1(R, n, n);
1131cb0ef41Sopenharmony_ci  digit_t r1_high = 0;
1141cb0ef41Sopenharmony_ci  if (Compare(A1, B1) < 0) {
1151cb0ef41Sopenharmony_ci    // 3a. If A1 < B1, compute Qhat = floor([A1, A2] / B1) with remainder R1
1161cb0ef41Sopenharmony_ci    //     using algorithm D2n1n.
1171cb0ef41Sopenharmony_ci    D2n1n(Qhat, R1, A1A2, B1);
1181cb0ef41Sopenharmony_ci    if (proc_->should_terminate()) return;
1191cb0ef41Sopenharmony_ci  } else {
1201cb0ef41Sopenharmony_ci    // 3b. If A1 >= B1, set Qhat = 2^(kDigitBits * n) - 1 and set
1211cb0ef41Sopenharmony_ci    //     R1 = [A1, A2] - [B1, 0] + [0, B1]
1221cb0ef41Sopenharmony_ci    SetOnes(Qhat);
1231cb0ef41Sopenharmony_ci    // Step 1: compute A1 - B1, which can't underflow because of the comparison
1241cb0ef41Sopenharmony_ci    // guarding this else-branch, and always has a one-digit result because
1251cb0ef41Sopenharmony_ci    // of this function's preconditions.
1261cb0ef41Sopenharmony_ci    RWDigits temp = R1;
1271cb0ef41Sopenharmony_ci    Subtract(temp, A1, B1);
1281cb0ef41Sopenharmony_ci    temp.Normalize();
1291cb0ef41Sopenharmony_ci    DCHECK(temp.len() <= 1);
1301cb0ef41Sopenharmony_ci    if (temp.len() > 0) r1_high = temp[0];
1311cb0ef41Sopenharmony_ci    // Step 2: compute A2 + B1.
1321cb0ef41Sopenharmony_ci    Digits A2(A1A2, 0, n);
1331cb0ef41Sopenharmony_ci    r1_high += AddAndReturnCarry(R1, A2, B1);
1341cb0ef41Sopenharmony_ci  }
1351cb0ef41Sopenharmony_ci  // 4. Compute D = Qhat * B2 using (Karatsuba) multiplication.
1361cb0ef41Sopenharmony_ci  RWDigits D(scratch_mem_.get(), 2 * n);
1371cb0ef41Sopenharmony_ci  proc_->Multiply(D, Qhat, B2);
1381cb0ef41Sopenharmony_ci  if (proc_->should_terminate()) return;
1391cb0ef41Sopenharmony_ci
1401cb0ef41Sopenharmony_ci  // 5. Compute Rhat = R1*2^(kDigitBits * n) + A3 - D = [R1, A3] - D.
1411cb0ef41Sopenharmony_ci  PutAt(R, A3, n);
1421cb0ef41Sopenharmony_ci  // 6. As long as Rhat < 0, repeat:
1431cb0ef41Sopenharmony_ci  while (SpecialCompare(r1_high, R, D) < 0) {
1441cb0ef41Sopenharmony_ci    // 6a. Rhat = Rhat + B
1451cb0ef41Sopenharmony_ci    r1_high += AddAndReturnCarry(R, R, B);
1461cb0ef41Sopenharmony_ci    // 6b. Qhat = Qhat - 1
1471cb0ef41Sopenharmony_ci    Subtract(Qhat, 1);
1481cb0ef41Sopenharmony_ci  }
1491cb0ef41Sopenharmony_ci  // 5. Compute Rhat = R1*2^(kDigitBits * n) + A3 - D = [R1, A3] - D.
1501cb0ef41Sopenharmony_ci  digit_t borrow = SubtractAndReturnBorrow(R, R, D);
1511cb0ef41Sopenharmony_ci  DCHECK(borrow == r1_high);
1521cb0ef41Sopenharmony_ci  DCHECK(Compare(R, B) < 0);
1531cb0ef41Sopenharmony_ci  (void)borrow;
1541cb0ef41Sopenharmony_ci  // 7. Return R = Rhat, Q = Qhat.
1551cb0ef41Sopenharmony_ci}
1561cb0ef41Sopenharmony_ci
1571cb0ef41Sopenharmony_ci// Algorithm 1 from the paper. Variable names same as there.
1581cb0ef41Sopenharmony_ci// Returns Q(uotient) and (R)emainder for A/B, with A twice the size of B.
1591cb0ef41Sopenharmony_civoid BZ::D2n1n(RWDigits Q, RWDigits R, Digits A, Digits B) {
1601cb0ef41Sopenharmony_ci  int n = B.len();
1611cb0ef41Sopenharmony_ci  DCHECK(A.len() <= 2 * n);
1621cb0ef41Sopenharmony_ci  // A < B * 2^(kDigitsBits * n)
1631cb0ef41Sopenharmony_ci  DCHECK(Compare(Digits(A, n, n), B) < 0);
1641cb0ef41Sopenharmony_ci  DCHECK(Q.len() == n);
1651cb0ef41Sopenharmony_ci  DCHECK(R.len() == n);
1661cb0ef41Sopenharmony_ci  // 1. If n is odd or smaller than some convenient constant, compute Q and R
1671cb0ef41Sopenharmony_ci  //    by school division and return.
1681cb0ef41Sopenharmony_ci  if ((n & 1) == 1 || n < kBurnikelThreshold) {
1691cb0ef41Sopenharmony_ci    return DivideBasecase(Q, R, A, B);
1701cb0ef41Sopenharmony_ci  }
1711cb0ef41Sopenharmony_ci  // 2. Split A into four parts A = [A1, ..., A4] with
1721cb0ef41Sopenharmony_ci  //    Ai < 2^(kDigitBits * n / 2). Split B into two parts [B2, B1] with
1731cb0ef41Sopenharmony_ci  //    Bi < 2^(kDigitBits * n / 2).
1741cb0ef41Sopenharmony_ci  Digits A1A2(A, n, n);
1751cb0ef41Sopenharmony_ci  Digits A3(A, n / 2, n / 2);
1761cb0ef41Sopenharmony_ci  Digits A4(A, 0, n / 2);
1771cb0ef41Sopenharmony_ci  // 3. Compute the high part Q1 of floor(A/B) as
1781cb0ef41Sopenharmony_ci  //    Q1 = floor([A1, A2, A3] / [B1, B2]) with remainder R1 = [R11, R12],
1791cb0ef41Sopenharmony_ci  //    using algorithm D3n2n.
1801cb0ef41Sopenharmony_ci  RWDigits Q1(Q, n / 2, n / 2);
1811cb0ef41Sopenharmony_ci  ScratchDigits R1(n);
1821cb0ef41Sopenharmony_ci  D3n2n(Q1, R1, A1A2, A3, B);
1831cb0ef41Sopenharmony_ci  if (proc_->should_terminate()) return;
1841cb0ef41Sopenharmony_ci  // 4. Compute the low part Q2 of floor(A/B) as
1851cb0ef41Sopenharmony_ci  //    Q2 = floor([R11, R12, A4] / [B1, B2]) with remainder R, using
1861cb0ef41Sopenharmony_ci  //    algorithm D3n2n.
1871cb0ef41Sopenharmony_ci  RWDigits Q2(Q, 0, n / 2);
1881cb0ef41Sopenharmony_ci  D3n2n(Q2, R, R1, A4, B);
1891cb0ef41Sopenharmony_ci  // 5. Return Q = [Q1, Q2] and R.
1901cb0ef41Sopenharmony_ci}
1911cb0ef41Sopenharmony_ci
1921cb0ef41Sopenharmony_ci}  // namespace
1931cb0ef41Sopenharmony_ci
1941cb0ef41Sopenharmony_ci// Algorithm 3 from the paper. Variables names same as there.
1951cb0ef41Sopenharmony_ci// Returns Q(uotient) and R(emainder) for A/B (no size restrictions).
1961cb0ef41Sopenharmony_ci// R is optional, Q is not.
1971cb0ef41Sopenharmony_civoid ProcessorImpl::DivideBurnikelZiegler(RWDigits Q, RWDigits R, Digits A,
1981cb0ef41Sopenharmony_ci                                          Digits B) {
1991cb0ef41Sopenharmony_ci  DCHECK(A.len() >= B.len());
2001cb0ef41Sopenharmony_ci  DCHECK(R.len() == 0 || R.len() >= B.len());
2011cb0ef41Sopenharmony_ci  DCHECK(Q.len() > A.len() - B.len());
2021cb0ef41Sopenharmony_ci  int r = A.len();
2031cb0ef41Sopenharmony_ci  int s = B.len();
2041cb0ef41Sopenharmony_ci  // The requirements are:
2051cb0ef41Sopenharmony_ci  // - n >= s, n as small as possible.
2061cb0ef41Sopenharmony_ci  // - m must be a power of two.
2071cb0ef41Sopenharmony_ci  // 1. Set m = min {2^k | 2^k * kBurnikelThreshold > s}.
2081cb0ef41Sopenharmony_ci  int m = 1 << BitLength(s / kBurnikelThreshold);
2091cb0ef41Sopenharmony_ci  // 2. Set j = roundup(s/m) and n = j * m.
2101cb0ef41Sopenharmony_ci  int j = DIV_CEIL(s, m);
2111cb0ef41Sopenharmony_ci  int n = j * m;
2121cb0ef41Sopenharmony_ci  // 3. Set sigma = max{tao | 2^tao * B < 2^(kDigitBits * n)}.
2131cb0ef41Sopenharmony_ci  int sigma = CountLeadingZeros(B[s - 1]);
2141cb0ef41Sopenharmony_ci  int digit_shift = n - s;
2151cb0ef41Sopenharmony_ci  // 4. Set B = B * 2^sigma to normalize B. Shift A by the same amount.
2161cb0ef41Sopenharmony_ci  ScratchDigits B_shifted(n);
2171cb0ef41Sopenharmony_ci  LeftShift(B_shifted + digit_shift, B, sigma);
2181cb0ef41Sopenharmony_ci  for (int i = 0; i < digit_shift; i++) B_shifted[i] = 0;
2191cb0ef41Sopenharmony_ci  B = B_shifted;
2201cb0ef41Sopenharmony_ci  // We need an extra digit if A's top digit does not have enough space for
2211cb0ef41Sopenharmony_ci  // the left-shift by {sigma}. Additionally, the top bit of A must be 0
2221cb0ef41Sopenharmony_ci  // (see "-1" in step 5 below), which combined with B being normalized (i.e.
2231cb0ef41Sopenharmony_ci  // B's top bit is 1) ensures the preconditions of the helper functions.
2241cb0ef41Sopenharmony_ci  int extra_digit = CountLeadingZeros(A[r - 1]) < (sigma + 1) ? 1 : 0;
2251cb0ef41Sopenharmony_ci  r = A.len() + digit_shift + extra_digit;
2261cb0ef41Sopenharmony_ci  ScratchDigits A_shifted(r);
2271cb0ef41Sopenharmony_ci  LeftShift(A_shifted + digit_shift, A, sigma);
2281cb0ef41Sopenharmony_ci  for (int i = 0; i < digit_shift; i++) A_shifted[i] = 0;
2291cb0ef41Sopenharmony_ci  A = A_shifted;
2301cb0ef41Sopenharmony_ci  // 5. Set t = min{t >= 2 | A < 2^(kDigitBits * t * n - 1)}.
2311cb0ef41Sopenharmony_ci  int t = std::max(DIV_CEIL(r, n), 2);
2321cb0ef41Sopenharmony_ci  // 6. Split A conceptually into t blocks.
2331cb0ef41Sopenharmony_ci  // 7. Set Z_(t-2) = [A_(t-1), A_(t-2)].
2341cb0ef41Sopenharmony_ci  int z_len = n * 2;
2351cb0ef41Sopenharmony_ci  ScratchDigits Z(z_len);
2361cb0ef41Sopenharmony_ci  PutAt(Z, A + n * (t - 2), z_len);
2371cb0ef41Sopenharmony_ci  // 8. For i from t-2 downto 0 do:
2381cb0ef41Sopenharmony_ci  BZ bz(this, n);
2391cb0ef41Sopenharmony_ci  ScratchDigits Ri(n);
2401cb0ef41Sopenharmony_ci  {
2411cb0ef41Sopenharmony_ci    // First iteration unrolled and specialized.
2421cb0ef41Sopenharmony_ci    // We might not have n digits at the top of Q, so use temporary storage
2431cb0ef41Sopenharmony_ci    // for Qi...
2441cb0ef41Sopenharmony_ci    ScratchDigits Qi(n);
2451cb0ef41Sopenharmony_ci    bz.D2n1n(Qi, Ri, Z, B);
2461cb0ef41Sopenharmony_ci    if (should_terminate()) return;
2471cb0ef41Sopenharmony_ci    // ...but there *will* be enough space for any non-zero result digits!
2481cb0ef41Sopenharmony_ci    Qi.Normalize();
2491cb0ef41Sopenharmony_ci    RWDigits target = Q + n * (t - 2);
2501cb0ef41Sopenharmony_ci    DCHECK(Qi.len() <= target.len());
2511cb0ef41Sopenharmony_ci    PutAt(target, Qi, target.len());
2521cb0ef41Sopenharmony_ci  }
2531cb0ef41Sopenharmony_ci  // Now loop over any remaining iterations.
2541cb0ef41Sopenharmony_ci  for (int i = t - 3; i >= 0; i--) {
2551cb0ef41Sopenharmony_ci    // 8b. If i > 0, set Z_(i-1) = [Ri, A_(i-1)].
2561cb0ef41Sopenharmony_ci    // (De-duped with unrolled first iteration, hence reading A_(i).)
2571cb0ef41Sopenharmony_ci    PutAt(Z + n, Ri, n);
2581cb0ef41Sopenharmony_ci    PutAt(Z, A + n * i, n);
2591cb0ef41Sopenharmony_ci    // 8a. Using algorithm D2n1n compute Qi, Ri such that Zi = B*Qi + Ri.
2601cb0ef41Sopenharmony_ci    RWDigits Qi(Q, i * n, n);
2611cb0ef41Sopenharmony_ci    bz.D2n1n(Qi, Ri, Z, B);
2621cb0ef41Sopenharmony_ci    if (should_terminate()) return;
2631cb0ef41Sopenharmony_ci  }
2641cb0ef41Sopenharmony_ci  // 9. Return Q = [Q_(t-2), ..., Q_0] and R = R_0 * 2^(-sigma).
2651cb0ef41Sopenharmony_ci#if DEBUG
2661cb0ef41Sopenharmony_ci  for (int i = 0; i < digit_shift; i++) {
2671cb0ef41Sopenharmony_ci    DCHECK(Ri[i] == 0);
2681cb0ef41Sopenharmony_ci  }
2691cb0ef41Sopenharmony_ci#endif
2701cb0ef41Sopenharmony_ci  if (R.len() != 0) {
2711cb0ef41Sopenharmony_ci    Digits Ri_part(Ri, digit_shift, Ri.len());
2721cb0ef41Sopenharmony_ci    Ri_part.Normalize();
2731cb0ef41Sopenharmony_ci    DCHECK(Ri_part.len() <= R.len());
2741cb0ef41Sopenharmony_ci    RightShift(R, Ri_part, sigma);
2751cb0ef41Sopenharmony_ci  }
2761cb0ef41Sopenharmony_ci}
2771cb0ef41Sopenharmony_ci
2781cb0ef41Sopenharmony_ci}  // namespace bigint
2791cb0ef41Sopenharmony_ci}  // namespace v8
280