11cb0ef41Sopenharmony_ci// Copyright 2021 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci// Barrett division, finding the inverse with Newton's method.
61cb0ef41Sopenharmony_ci// Reference: "Fast Division of Large Integers" by Karl Hasselström,
71cb0ef41Sopenharmony_ci// found at https://treskal.com/s/masters-thesis.pdf
81cb0ef41Sopenharmony_ci
91cb0ef41Sopenharmony_ci// Many thanks to Karl Wiberg, k@w5.se, for both writing up an
101cb0ef41Sopenharmony_ci// understandable theoretical description of the algorithm and privately
111cb0ef41Sopenharmony_ci// providing a demo implementation, on which the implementation in this file is
121cb0ef41Sopenharmony_ci// based.
131cb0ef41Sopenharmony_ci
141cb0ef41Sopenharmony_ci#include <algorithm>
151cb0ef41Sopenharmony_ci
161cb0ef41Sopenharmony_ci#include "src/bigint/bigint-internal.h"
171cb0ef41Sopenharmony_ci#include "src/bigint/digit-arithmetic.h"
181cb0ef41Sopenharmony_ci#include "src/bigint/div-helpers.h"
191cb0ef41Sopenharmony_ci#include "src/bigint/vector-arithmetic.h"
201cb0ef41Sopenharmony_ci
211cb0ef41Sopenharmony_cinamespace v8 {
221cb0ef41Sopenharmony_cinamespace bigint {
231cb0ef41Sopenharmony_ci
241cb0ef41Sopenharmony_cinamespace {
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_civoid DcheckIntegerPartRange(Digits X, digit_t min, digit_t max) {
271cb0ef41Sopenharmony_ci#if DEBUG
281cb0ef41Sopenharmony_ci  digit_t integer_part = X.msd();
291cb0ef41Sopenharmony_ci  DCHECK(integer_part >= min);
301cb0ef41Sopenharmony_ci  DCHECK(integer_part <= max);
311cb0ef41Sopenharmony_ci#else
321cb0ef41Sopenharmony_ci  USE(X);
331cb0ef41Sopenharmony_ci  USE(min);
341cb0ef41Sopenharmony_ci  USE(max);
351cb0ef41Sopenharmony_ci#endif
361cb0ef41Sopenharmony_ci}
371cb0ef41Sopenharmony_ci
381cb0ef41Sopenharmony_ci}  // namespace
391cb0ef41Sopenharmony_ci
401cb0ef41Sopenharmony_ci// Z := (the fractional part of) 1/V, via naive division.
411cb0ef41Sopenharmony_ci// See comments at {Invert} and {InvertNewton} below for details.
421cb0ef41Sopenharmony_civoid ProcessorImpl::InvertBasecase(RWDigits Z, Digits V, RWDigits scratch) {
431cb0ef41Sopenharmony_ci  DCHECK(Z.len() > V.len());
441cb0ef41Sopenharmony_ci  DCHECK(V.len() > 0);
451cb0ef41Sopenharmony_ci  DCHECK(scratch.len() >= 2 * V.len());
461cb0ef41Sopenharmony_ci  int n = V.len();
471cb0ef41Sopenharmony_ci  RWDigits X(scratch, 0, 2 * n);
481cb0ef41Sopenharmony_ci  digit_t borrow = 0;
491cb0ef41Sopenharmony_ci  int i = 0;
501cb0ef41Sopenharmony_ci  for (; i < n; i++) X[i] = 0;
511cb0ef41Sopenharmony_ci  for (; i < 2 * n; i++) X[i] = digit_sub2(0, V[i - n], borrow, &borrow);
521cb0ef41Sopenharmony_ci  DCHECK(borrow == 1);
531cb0ef41Sopenharmony_ci  RWDigits R(nullptr, 0);  // We don't need the remainder.
541cb0ef41Sopenharmony_ci  if (n < kBurnikelThreshold) {
551cb0ef41Sopenharmony_ci    DivideSchoolbook(Z, R, X, V);
561cb0ef41Sopenharmony_ci  } else {
571cb0ef41Sopenharmony_ci    DivideBurnikelZiegler(Z, R, X, V);
581cb0ef41Sopenharmony_ci  }
591cb0ef41Sopenharmony_ci}
601cb0ef41Sopenharmony_ci
611cb0ef41Sopenharmony_ci// This is Algorithm 4.2 from the paper.
621cb0ef41Sopenharmony_ci// Computes the inverse of V, shifted by kDigitBits * 2 * V.len, accurate to
631cb0ef41Sopenharmony_ci// V.len+1 digits. The V.len low digits of the result digits will be written
641cb0ef41Sopenharmony_ci// to Z, plus there is an implicit top digit with value 1.
651cb0ef41Sopenharmony_ci// Needs InvertNewtonScratchSpace(V.len) of scratch space.
661cb0ef41Sopenharmony_ci// The result is either correct or off by one (about half the time it is
671cb0ef41Sopenharmony_ci// correct, half the time it is one too much, and in the corner case where V is
681cb0ef41Sopenharmony_ci// minimal and the implicit top digit would have to be 2 it is one too little).
691cb0ef41Sopenharmony_ci// Barrett's division algorithm can handle that, so we don't care.
701cb0ef41Sopenharmony_civoid ProcessorImpl::InvertNewton(RWDigits Z, Digits V, RWDigits scratch) {
711cb0ef41Sopenharmony_ci  const int vn = V.len();
721cb0ef41Sopenharmony_ci  DCHECK(Z.len() >= vn);
731cb0ef41Sopenharmony_ci  DCHECK(scratch.len() >= InvertNewtonScratchSpace(vn));
741cb0ef41Sopenharmony_ci  const int kSOffset = 0;
751cb0ef41Sopenharmony_ci  const int kWOffset = 0;  // S and W can share their scratch space.
761cb0ef41Sopenharmony_ci  const int kUOffset = vn + kInvertNewtonExtraSpace;
771cb0ef41Sopenharmony_ci
781cb0ef41Sopenharmony_ci  // The base case won't work otherwise.
791cb0ef41Sopenharmony_ci  DCHECK(V.len() >= 3);
801cb0ef41Sopenharmony_ci
811cb0ef41Sopenharmony_ci  constexpr int kBasecasePrecision = kNewtonInversionThreshold - 1;
821cb0ef41Sopenharmony_ci  // V must have more digits than the basecase.
831cb0ef41Sopenharmony_ci  DCHECK(V.len() > kBasecasePrecision);
841cb0ef41Sopenharmony_ci  DCHECK(IsBitNormalized(V));
851cb0ef41Sopenharmony_ci
861cb0ef41Sopenharmony_ci  // Step (1): Setup.
871cb0ef41Sopenharmony_ci  // Calculate precision required at each step.
881cb0ef41Sopenharmony_ci  // {k} is the number of fraction bits for the current iteration.
891cb0ef41Sopenharmony_ci  int k = vn * kDigitBits;
901cb0ef41Sopenharmony_ci  int target_fraction_bits[8 * sizeof(vn)];  // "k_i" in the paper.
911cb0ef41Sopenharmony_ci  int iteration = -1;  // "i" in the paper, except inverted to run downwards.
921cb0ef41Sopenharmony_ci  while (k > kBasecasePrecision * kDigitBits) {
931cb0ef41Sopenharmony_ci    iteration++;
941cb0ef41Sopenharmony_ci    target_fraction_bits[iteration] = k;
951cb0ef41Sopenharmony_ci    k = DIV_CEIL(k, 2);
961cb0ef41Sopenharmony_ci  }
971cb0ef41Sopenharmony_ci  // At this point, k <= kBasecasePrecision*kDigitBits is the number of
981cb0ef41Sopenharmony_ci  // fraction bits to use in the base case. {iteration} is the highest index
991cb0ef41Sopenharmony_ci  // in use for f[].
1001cb0ef41Sopenharmony_ci
1011cb0ef41Sopenharmony_ci  // Step (2): Initial approximation.
1021cb0ef41Sopenharmony_ci  int initial_digits = DIV_CEIL(k + 1, kDigitBits);
1031cb0ef41Sopenharmony_ci  Digits top_part_of_v(V, vn - initial_digits, initial_digits);
1041cb0ef41Sopenharmony_ci  InvertBasecase(Z, top_part_of_v, scratch);
1051cb0ef41Sopenharmony_ci  Z[initial_digits] = Z[initial_digits] + 1;  // Implicit top digit.
1061cb0ef41Sopenharmony_ci  // From now on, we'll keep Z.len updated to the part that's already computed.
1071cb0ef41Sopenharmony_ci  Z.set_len(initial_digits + 1);
1081cb0ef41Sopenharmony_ci
1091cb0ef41Sopenharmony_ci  // Step (3): Precision doubling loop.
1101cb0ef41Sopenharmony_ci  while (true) {
1111cb0ef41Sopenharmony_ci    DcheckIntegerPartRange(Z, 1, 2);
1121cb0ef41Sopenharmony_ci
1131cb0ef41Sopenharmony_ci    // (3b): S = Z^2
1141cb0ef41Sopenharmony_ci    RWDigits S(scratch, kSOffset, 2 * Z.len());
1151cb0ef41Sopenharmony_ci    Multiply(S, Z, Z);
1161cb0ef41Sopenharmony_ci    if (should_terminate()) return;
1171cb0ef41Sopenharmony_ci    S.TrimOne();  // Top digit of S is unused.
1181cb0ef41Sopenharmony_ci    DcheckIntegerPartRange(S, 1, 4);
1191cb0ef41Sopenharmony_ci
1201cb0ef41Sopenharmony_ci    // (3c): T = V, truncated so that at least 2k+3 fraction bits remain.
1211cb0ef41Sopenharmony_ci    int fraction_digits = DIV_CEIL(2 * k + 3, kDigitBits);
1221cb0ef41Sopenharmony_ci    int t_len = std::min(V.len(), fraction_digits);
1231cb0ef41Sopenharmony_ci    Digits T(V, V.len() - t_len, t_len);
1241cb0ef41Sopenharmony_ci
1251cb0ef41Sopenharmony_ci    // (3d): U = T * S, truncated so that at least 2k+1 fraction bits remain
1261cb0ef41Sopenharmony_ci    // (U has one integer digit, which might be zero).
1271cb0ef41Sopenharmony_ci    fraction_digits = DIV_CEIL(2 * k + 1, kDigitBits);
1281cb0ef41Sopenharmony_ci    RWDigits U(scratch, kUOffset, S.len() + T.len());
1291cb0ef41Sopenharmony_ci    DCHECK(U.len() > fraction_digits);
1301cb0ef41Sopenharmony_ci    Multiply(U, S, T);
1311cb0ef41Sopenharmony_ci    if (should_terminate()) return;
1321cb0ef41Sopenharmony_ci    U = U + (U.len() - (1 + fraction_digits));
1331cb0ef41Sopenharmony_ci    DcheckIntegerPartRange(U, 0, 3);
1341cb0ef41Sopenharmony_ci
1351cb0ef41Sopenharmony_ci    // (3e): W = 2 * Z, padded with "0" fraction bits so that it has the
1361cb0ef41Sopenharmony_ci    // same number of fraction bits as U.
1371cb0ef41Sopenharmony_ci    DCHECK(U.len() >= Z.len());
1381cb0ef41Sopenharmony_ci    RWDigits W(scratch, kWOffset, U.len());
1391cb0ef41Sopenharmony_ci    int padding_digits = U.len() - Z.len();
1401cb0ef41Sopenharmony_ci    for (int i = 0; i < padding_digits; i++) W[i] = 0;
1411cb0ef41Sopenharmony_ci    LeftShift(W + padding_digits, Z, 1);
1421cb0ef41Sopenharmony_ci    DcheckIntegerPartRange(W, 2, 4);
1431cb0ef41Sopenharmony_ci
1441cb0ef41Sopenharmony_ci    // (3f): Z = W - U.
1451cb0ef41Sopenharmony_ci    // This check is '<=' instead of '<' because U's top digit is its
1461cb0ef41Sopenharmony_ci    // integer part, and we want vn fraction digits.
1471cb0ef41Sopenharmony_ci    if (U.len() <= vn) {
1481cb0ef41Sopenharmony_ci      // Normal subtraction.
1491cb0ef41Sopenharmony_ci      // This is not the last iteration.
1501cb0ef41Sopenharmony_ci      DCHECK(iteration > 0);
1511cb0ef41Sopenharmony_ci      Z.set_len(U.len());
1521cb0ef41Sopenharmony_ci      digit_t borrow = SubtractAndReturnBorrow(Z, W, U);
1531cb0ef41Sopenharmony_ci      DCHECK(borrow == 0);
1541cb0ef41Sopenharmony_ci      USE(borrow);
1551cb0ef41Sopenharmony_ci      DcheckIntegerPartRange(Z, 1, 2);
1561cb0ef41Sopenharmony_ci    } else {
1571cb0ef41Sopenharmony_ci      // Truncate some least significant digits so that we get vn
1581cb0ef41Sopenharmony_ci      // fraction digits, and compute the integer digit separately.
1591cb0ef41Sopenharmony_ci      // This is the last iteration.
1601cb0ef41Sopenharmony_ci      DCHECK(iteration == 0);
1611cb0ef41Sopenharmony_ci      Z.set_len(vn);
1621cb0ef41Sopenharmony_ci      Digits W_part(W, W.len() - vn - 1, vn);
1631cb0ef41Sopenharmony_ci      Digits U_part(U, U.len() - vn - 1, vn);
1641cb0ef41Sopenharmony_ci      digit_t borrow = SubtractAndReturnBorrow(Z, W_part, U_part);
1651cb0ef41Sopenharmony_ci      digit_t integer_part = W.msd() - U.msd() - borrow;
1661cb0ef41Sopenharmony_ci      DCHECK(integer_part == 1 || integer_part == 2);
1671cb0ef41Sopenharmony_ci      if (integer_part == 2) {
1681cb0ef41Sopenharmony_ci        // This is the rare case where the correct result would be 2.0, but
1691cb0ef41Sopenharmony_ci        // since we can't express that by returning only the fractional part
1701cb0ef41Sopenharmony_ci        // with an implicit 1-digit, we have to return [1.]9999... instead.
1711cb0ef41Sopenharmony_ci        for (int i = 0; i < Z.len(); i++) Z[i] = ~digit_t{0};
1721cb0ef41Sopenharmony_ci      }
1731cb0ef41Sopenharmony_ci      break;
1741cb0ef41Sopenharmony_ci    }
1751cb0ef41Sopenharmony_ci    // (3g, 3h): Update local variables and loop.
1761cb0ef41Sopenharmony_ci    k = target_fraction_bits[iteration];
1771cb0ef41Sopenharmony_ci    iteration--;
1781cb0ef41Sopenharmony_ci  }
1791cb0ef41Sopenharmony_ci}
1801cb0ef41Sopenharmony_ci
1811cb0ef41Sopenharmony_ci// Computes the inverse of V, shifted by kDigitBits * 2 * V.len, accurate to
1821cb0ef41Sopenharmony_ci// V.len+1 digits. The V.len low digits of the result digits will be written
1831cb0ef41Sopenharmony_ci// to Z, plus there is an implicit top digit with value 1.
1841cb0ef41Sopenharmony_ci// (Corner case: if V is minimal, the implicit digit should be 2; in that case
1851cb0ef41Sopenharmony_ci// we return one less than the correct answer. DivideBarrett can handle that.)
1861cb0ef41Sopenharmony_ci// Needs InvertScratchSpace(V.len) digits of scratch space.
1871cb0ef41Sopenharmony_civoid ProcessorImpl::Invert(RWDigits Z, Digits V, RWDigits scratch) {
1881cb0ef41Sopenharmony_ci  DCHECK(Z.len() > V.len());
1891cb0ef41Sopenharmony_ci  DCHECK(V.len() >= 1);
1901cb0ef41Sopenharmony_ci  DCHECK(IsBitNormalized(V));
1911cb0ef41Sopenharmony_ci  DCHECK(scratch.len() >= InvertScratchSpace(V.len()));
1921cb0ef41Sopenharmony_ci
1931cb0ef41Sopenharmony_ci  int vn = V.len();
1941cb0ef41Sopenharmony_ci  if (vn >= kNewtonInversionThreshold) {
1951cb0ef41Sopenharmony_ci    return InvertNewton(Z, V, scratch);
1961cb0ef41Sopenharmony_ci  }
1971cb0ef41Sopenharmony_ci  if (vn == 1) {
1981cb0ef41Sopenharmony_ci    digit_t d = V[0];
1991cb0ef41Sopenharmony_ci    digit_t dummy_remainder;
2001cb0ef41Sopenharmony_ci    Z[0] = digit_div(~d, ~digit_t{0}, d, &dummy_remainder);
2011cb0ef41Sopenharmony_ci    Z[1] = 0;
2021cb0ef41Sopenharmony_ci  } else {
2031cb0ef41Sopenharmony_ci    InvertBasecase(Z, V, scratch);
2041cb0ef41Sopenharmony_ci    if (Z[vn] == 1) {
2051cb0ef41Sopenharmony_ci      for (int i = 0; i < vn; i++) Z[i] = ~digit_t{0};
2061cb0ef41Sopenharmony_ci      Z[vn] = 0;
2071cb0ef41Sopenharmony_ci    }
2081cb0ef41Sopenharmony_ci  }
2091cb0ef41Sopenharmony_ci}
2101cb0ef41Sopenharmony_ci
2111cb0ef41Sopenharmony_ci// This is algorithm 3.5 from the paper.
2121cb0ef41Sopenharmony_ci// Computes Q(uotient) and R(emainder) for A/B using I, which is a
2131cb0ef41Sopenharmony_ci// precomputed approximation of 1/B (e.g. with Invert() above).
2141cb0ef41Sopenharmony_ci// Needs DivideBarrettScratchSpace(A.len) scratch space.
2151cb0ef41Sopenharmony_civoid ProcessorImpl::DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B,
2161cb0ef41Sopenharmony_ci                                  Digits I, RWDigits scratch) {
2171cb0ef41Sopenharmony_ci  DCHECK(Q.len() > A.len() - B.len());
2181cb0ef41Sopenharmony_ci  DCHECK(R.len() >= B.len());
2191cb0ef41Sopenharmony_ci  DCHECK(A.len() > B.len());  // Careful: This is *not* '>=' !
2201cb0ef41Sopenharmony_ci  DCHECK(A.len() <= 2 * B.len());
2211cb0ef41Sopenharmony_ci  DCHECK(B.len() > 0);
2221cb0ef41Sopenharmony_ci  DCHECK(IsBitNormalized(B));
2231cb0ef41Sopenharmony_ci  DCHECK(I.len() == A.len() - B.len());
2241cb0ef41Sopenharmony_ci  DCHECK(scratch.len() >= DivideBarrettScratchSpace(A.len()));
2251cb0ef41Sopenharmony_ci
2261cb0ef41Sopenharmony_ci  int orig_q_len = Q.len();
2271cb0ef41Sopenharmony_ci
2281cb0ef41Sopenharmony_ci  // (1): A1 = A with B.len fewer digits.
2291cb0ef41Sopenharmony_ci  Digits A1 = A + B.len();
2301cb0ef41Sopenharmony_ci  DCHECK(A1.len() == I.len());
2311cb0ef41Sopenharmony_ci
2321cb0ef41Sopenharmony_ci  // (2): Q = A1*I with I.len fewer digits.
2331cb0ef41Sopenharmony_ci  // {I} has an implicit high digit with value 1, so we add {A1} to the high
2341cb0ef41Sopenharmony_ci  // part of the multiplication result.
2351cb0ef41Sopenharmony_ci  RWDigits K(scratch, 0, 2 * I.len());
2361cb0ef41Sopenharmony_ci  Multiply(K, A1, I);
2371cb0ef41Sopenharmony_ci  if (should_terminate()) return;
2381cb0ef41Sopenharmony_ci  Q.set_len(I.len() + 1);
2391cb0ef41Sopenharmony_ci  Add(Q, K + I.len(), A1);
2401cb0ef41Sopenharmony_ci  // K is no longer used, can re-use {scratch} for P.
2411cb0ef41Sopenharmony_ci
2421cb0ef41Sopenharmony_ci  // (3): R = A - B*Q (approximate remainder).
2431cb0ef41Sopenharmony_ci  RWDigits P(scratch, 0, A.len() + 1);
2441cb0ef41Sopenharmony_ci  Multiply(P, B, Q);
2451cb0ef41Sopenharmony_ci  if (should_terminate()) return;
2461cb0ef41Sopenharmony_ci  digit_t borrow = SubtractAndReturnBorrow(R, A, Digits(P, 0, B.len()));
2471cb0ef41Sopenharmony_ci  // R may be allocated wider than B, zero out any extra digits if so.
2481cb0ef41Sopenharmony_ci  for (int i = B.len(); i < R.len(); i++) R[i] = 0;
2491cb0ef41Sopenharmony_ci  digit_t r_high = A[B.len()] - P[B.len()] - borrow;
2501cb0ef41Sopenharmony_ci
2511cb0ef41Sopenharmony_ci  // Adjust R and Q so that they become the correct remainder and quotient.
2521cb0ef41Sopenharmony_ci  // The number of iterations is guaranteed to be at most some very small
2531cb0ef41Sopenharmony_ci  // constant, unless the caller gave us a bad approximate quotient.
2541cb0ef41Sopenharmony_ci  if (r_high >> (kDigitBits - 1) == 1) {
2551cb0ef41Sopenharmony_ci    // (5b): R < 0, so R += B
2561cb0ef41Sopenharmony_ci    digit_t q_sub = 0;
2571cb0ef41Sopenharmony_ci    do {
2581cb0ef41Sopenharmony_ci      r_high += AddAndReturnCarry(R, R, B);
2591cb0ef41Sopenharmony_ci      q_sub++;
2601cb0ef41Sopenharmony_ci      DCHECK(q_sub <= 5);
2611cb0ef41Sopenharmony_ci    } while (r_high != 0);
2621cb0ef41Sopenharmony_ci    Subtract(Q, q_sub);
2631cb0ef41Sopenharmony_ci  } else {
2641cb0ef41Sopenharmony_ci    digit_t q_add = 0;
2651cb0ef41Sopenharmony_ci    while (r_high != 0 || GreaterThanOrEqual(R, B)) {
2661cb0ef41Sopenharmony_ci      // (5c): R >= B, so R -= B
2671cb0ef41Sopenharmony_ci      r_high -= SubtractAndReturnBorrow(R, R, B);
2681cb0ef41Sopenharmony_ci      q_add++;
2691cb0ef41Sopenharmony_ci      DCHECK(q_add <= 5);
2701cb0ef41Sopenharmony_ci    }
2711cb0ef41Sopenharmony_ci    Add(Q, q_add);
2721cb0ef41Sopenharmony_ci  }
2731cb0ef41Sopenharmony_ci  // (5a): Return.
2741cb0ef41Sopenharmony_ci  int final_q_len = Q.len();
2751cb0ef41Sopenharmony_ci  Q.set_len(orig_q_len);
2761cb0ef41Sopenharmony_ci  for (int i = final_q_len; i < orig_q_len; i++) Q[i] = 0;
2771cb0ef41Sopenharmony_ci}
2781cb0ef41Sopenharmony_ci
2791cb0ef41Sopenharmony_ci// Computes Q(uotient) and R(emainder) for A/B, using Barrett division.
2801cb0ef41Sopenharmony_civoid ProcessorImpl::DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B) {
2811cb0ef41Sopenharmony_ci  DCHECK(Q.len() > A.len() - B.len());
2821cb0ef41Sopenharmony_ci  DCHECK(R.len() >= B.len());
2831cb0ef41Sopenharmony_ci  DCHECK(A.len() > B.len());  // Careful: This is *not* '>=' !
2841cb0ef41Sopenharmony_ci  DCHECK(B.len() > 0);
2851cb0ef41Sopenharmony_ci
2861cb0ef41Sopenharmony_ci  // Normalize B, and shift A by the same amount.
2871cb0ef41Sopenharmony_ci  ShiftedDigits b_normalized(B);
2881cb0ef41Sopenharmony_ci  ShiftedDigits a_normalized(A, b_normalized.shift());
2891cb0ef41Sopenharmony_ci  // Keep the code below more concise.
2901cb0ef41Sopenharmony_ci  B = b_normalized;
2911cb0ef41Sopenharmony_ci  A = a_normalized;
2921cb0ef41Sopenharmony_ci
2931cb0ef41Sopenharmony_ci  // The core DivideBarrett function above only supports A having at most
2941cb0ef41Sopenharmony_ci  // twice as many digits as B. We generalize this to arbitrary inputs
2951cb0ef41Sopenharmony_ci  // similar to Burnikel-Ziegler division by performing a t-by-1 division
2961cb0ef41Sopenharmony_ci  // of B-sized chunks. It's easy to special-case the situation where we
2971cb0ef41Sopenharmony_ci  // don't need to bother.
2981cb0ef41Sopenharmony_ci  int barrett_dividend_length = A.len() <= 2 * B.len() ? A.len() : 2 * B.len();
2991cb0ef41Sopenharmony_ci  int i_len = barrett_dividend_length - B.len();
3001cb0ef41Sopenharmony_ci  ScratchDigits I(i_len + 1);  // +1 is for temporary use by Invert().
3011cb0ef41Sopenharmony_ci  int scratch_len =
3021cb0ef41Sopenharmony_ci      std::max(InvertScratchSpace(i_len),
3031cb0ef41Sopenharmony_ci               DivideBarrettScratchSpace(barrett_dividend_length));
3041cb0ef41Sopenharmony_ci  ScratchDigits scratch(scratch_len);
3051cb0ef41Sopenharmony_ci  Invert(I, Digits(B, B.len() - i_len, i_len), scratch);
3061cb0ef41Sopenharmony_ci  if (should_terminate()) return;
3071cb0ef41Sopenharmony_ci  I.TrimOne();
3081cb0ef41Sopenharmony_ci  DCHECK(I.len() == i_len);
3091cb0ef41Sopenharmony_ci  if (A.len() > 2 * B.len()) {
3101cb0ef41Sopenharmony_ci    // This follows the variable names and and algorithmic steps of
3111cb0ef41Sopenharmony_ci    // DivideBurnikelZiegler().
3121cb0ef41Sopenharmony_ci    int n = B.len();  // Chunk length.
3131cb0ef41Sopenharmony_ci    // (5): {t} is the number of B-sized chunks of A.
3141cb0ef41Sopenharmony_ci    int t = DIV_CEIL(A.len(), n);
3151cb0ef41Sopenharmony_ci    DCHECK(t >= 3);
3161cb0ef41Sopenharmony_ci    // (6)/(7): Z is used for the current 2-chunk block to be divided by B,
3171cb0ef41Sopenharmony_ci    // initialized to the two topmost chunks of A.
3181cb0ef41Sopenharmony_ci    int z_len = n * 2;
3191cb0ef41Sopenharmony_ci    ScratchDigits Z(z_len);
3201cb0ef41Sopenharmony_ci    PutAt(Z, A + n * (t - 2), z_len);
3211cb0ef41Sopenharmony_ci    // (8): For i from t-2 downto 0 do
3221cb0ef41Sopenharmony_ci    int qi_len = n + 1;
3231cb0ef41Sopenharmony_ci    ScratchDigits Qi(qi_len);
3241cb0ef41Sopenharmony_ci    ScratchDigits Ri(n);
3251cb0ef41Sopenharmony_ci    // First iteration unrolled and specialized.
3261cb0ef41Sopenharmony_ci    {
3271cb0ef41Sopenharmony_ci      int i = t - 2;
3281cb0ef41Sopenharmony_ci      DivideBarrett(Qi, Ri, Z, B, I, scratch);
3291cb0ef41Sopenharmony_ci      if (should_terminate()) return;
3301cb0ef41Sopenharmony_ci      RWDigits target = Q + n * i;
3311cb0ef41Sopenharmony_ci      // In the first iteration, all qi_len = n + 1 digits may be used.
3321cb0ef41Sopenharmony_ci      int to_copy = std::min(qi_len, target.len());
3331cb0ef41Sopenharmony_ci      for (int j = 0; j < to_copy; j++) target[j] = Qi[j];
3341cb0ef41Sopenharmony_ci      for (int j = to_copy; j < target.len(); j++) target[j] = 0;
3351cb0ef41Sopenharmony_ci#if DEBUG
3361cb0ef41Sopenharmony_ci      for (int j = to_copy; j < Qi.len(); j++) {
3371cb0ef41Sopenharmony_ci        DCHECK(Qi[j] == 0);
3381cb0ef41Sopenharmony_ci      }
3391cb0ef41Sopenharmony_ci#endif
3401cb0ef41Sopenharmony_ci    }
3411cb0ef41Sopenharmony_ci    // Now loop over any remaining iterations.
3421cb0ef41Sopenharmony_ci    for (int i = t - 3; i >= 0; i--) {
3431cb0ef41Sopenharmony_ci      // (8b): If i > 0, set Z_(i-1) = [Ri, A_(i-1)].
3441cb0ef41Sopenharmony_ci      // (De-duped with unrolled first iteration, hence reading A_(i).)
3451cb0ef41Sopenharmony_ci      PutAt(Z + n, Ri, n);
3461cb0ef41Sopenharmony_ci      PutAt(Z, A + n * i, n);
3471cb0ef41Sopenharmony_ci      // (8a): Compute Qi, Ri such that Zi = B*Qi + Ri.
3481cb0ef41Sopenharmony_ci      DivideBarrett(Qi, Ri, Z, B, I, scratch);
3491cb0ef41Sopenharmony_ci      DCHECK(Qi[qi_len - 1] == 0);
3501cb0ef41Sopenharmony_ci      if (should_terminate()) return;
3511cb0ef41Sopenharmony_ci      // (9): Return Q = [Q_(t-2), ..., Q_0]...
3521cb0ef41Sopenharmony_ci      PutAt(Q + n * i, Qi, n);
3531cb0ef41Sopenharmony_ci    }
3541cb0ef41Sopenharmony_ci    Ri.Normalize();
3551cb0ef41Sopenharmony_ci    DCHECK(Ri.len() <= R.len());
3561cb0ef41Sopenharmony_ci    // (9): ...and R = R_0 * 2^(-leading_zeros).
3571cb0ef41Sopenharmony_ci    RightShift(R, Ri, b_normalized.shift());
3581cb0ef41Sopenharmony_ci  } else {
3591cb0ef41Sopenharmony_ci    DivideBarrett(Q, R, A, B, I, scratch);
3601cb0ef41Sopenharmony_ci    if (should_terminate()) return;
3611cb0ef41Sopenharmony_ci    RightShift(R, R, b_normalized.shift());
3621cb0ef41Sopenharmony_ci  }
3631cb0ef41Sopenharmony_ci}
3641cb0ef41Sopenharmony_ci
3651cb0ef41Sopenharmony_ci}  // namespace bigint
3661cb0ef41Sopenharmony_ci}  // namespace v8
367