11cb0ef41Sopenharmony_ci// Copyright 2021 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci// Barrett division, finding the inverse with Newton's method. 61cb0ef41Sopenharmony_ci// Reference: "Fast Division of Large Integers" by Karl Hasselström, 71cb0ef41Sopenharmony_ci// found at https://treskal.com/s/masters-thesis.pdf 81cb0ef41Sopenharmony_ci 91cb0ef41Sopenharmony_ci// Many thanks to Karl Wiberg, k@w5.se, for both writing up an 101cb0ef41Sopenharmony_ci// understandable theoretical description of the algorithm and privately 111cb0ef41Sopenharmony_ci// providing a demo implementation, on which the implementation in this file is 121cb0ef41Sopenharmony_ci// based. 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_ci#include <algorithm> 151cb0ef41Sopenharmony_ci 161cb0ef41Sopenharmony_ci#include "src/bigint/bigint-internal.h" 171cb0ef41Sopenharmony_ci#include "src/bigint/digit-arithmetic.h" 181cb0ef41Sopenharmony_ci#include "src/bigint/div-helpers.h" 191cb0ef41Sopenharmony_ci#include "src/bigint/vector-arithmetic.h" 201cb0ef41Sopenharmony_ci 211cb0ef41Sopenharmony_cinamespace v8 { 221cb0ef41Sopenharmony_cinamespace bigint { 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_cinamespace { 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_civoid DcheckIntegerPartRange(Digits X, digit_t min, digit_t max) { 271cb0ef41Sopenharmony_ci#if DEBUG 281cb0ef41Sopenharmony_ci digit_t integer_part = X.msd(); 291cb0ef41Sopenharmony_ci DCHECK(integer_part >= min); 301cb0ef41Sopenharmony_ci DCHECK(integer_part <= max); 311cb0ef41Sopenharmony_ci#else 321cb0ef41Sopenharmony_ci USE(X); 331cb0ef41Sopenharmony_ci USE(min); 341cb0ef41Sopenharmony_ci USE(max); 351cb0ef41Sopenharmony_ci#endif 361cb0ef41Sopenharmony_ci} 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_ci} // namespace 391cb0ef41Sopenharmony_ci 401cb0ef41Sopenharmony_ci// Z := (the fractional part of) 1/V, via naive division. 411cb0ef41Sopenharmony_ci// See comments at {Invert} and {InvertNewton} below for details. 421cb0ef41Sopenharmony_civoid ProcessorImpl::InvertBasecase(RWDigits Z, Digits V, RWDigits scratch) { 431cb0ef41Sopenharmony_ci DCHECK(Z.len() > V.len()); 441cb0ef41Sopenharmony_ci DCHECK(V.len() > 0); 451cb0ef41Sopenharmony_ci DCHECK(scratch.len() >= 2 * V.len()); 461cb0ef41Sopenharmony_ci int n = V.len(); 471cb0ef41Sopenharmony_ci RWDigits X(scratch, 0, 2 * n); 481cb0ef41Sopenharmony_ci digit_t borrow = 0; 491cb0ef41Sopenharmony_ci int i = 0; 501cb0ef41Sopenharmony_ci for (; i < n; i++) X[i] = 0; 511cb0ef41Sopenharmony_ci for (; i < 2 * n; i++) X[i] = digit_sub2(0, V[i - n], borrow, &borrow); 521cb0ef41Sopenharmony_ci DCHECK(borrow == 1); 531cb0ef41Sopenharmony_ci RWDigits R(nullptr, 0); // We don't need the remainder. 541cb0ef41Sopenharmony_ci if (n < kBurnikelThreshold) { 551cb0ef41Sopenharmony_ci DivideSchoolbook(Z, R, X, V); 561cb0ef41Sopenharmony_ci } else { 571cb0ef41Sopenharmony_ci DivideBurnikelZiegler(Z, R, X, V); 581cb0ef41Sopenharmony_ci } 591cb0ef41Sopenharmony_ci} 601cb0ef41Sopenharmony_ci 611cb0ef41Sopenharmony_ci// This is Algorithm 4.2 from the paper. 621cb0ef41Sopenharmony_ci// Computes the inverse of V, shifted by kDigitBits * 2 * V.len, accurate to 631cb0ef41Sopenharmony_ci// V.len+1 digits. The V.len low digits of the result digits will be written 641cb0ef41Sopenharmony_ci// to Z, plus there is an implicit top digit with value 1. 651cb0ef41Sopenharmony_ci// Needs InvertNewtonScratchSpace(V.len) of scratch space. 661cb0ef41Sopenharmony_ci// The result is either correct or off by one (about half the time it is 671cb0ef41Sopenharmony_ci// correct, half the time it is one too much, and in the corner case where V is 681cb0ef41Sopenharmony_ci// minimal and the implicit top digit would have to be 2 it is one too little). 691cb0ef41Sopenharmony_ci// Barrett's division algorithm can handle that, so we don't care. 701cb0ef41Sopenharmony_civoid ProcessorImpl::InvertNewton(RWDigits Z, Digits V, RWDigits scratch) { 711cb0ef41Sopenharmony_ci const int vn = V.len(); 721cb0ef41Sopenharmony_ci DCHECK(Z.len() >= vn); 731cb0ef41Sopenharmony_ci DCHECK(scratch.len() >= InvertNewtonScratchSpace(vn)); 741cb0ef41Sopenharmony_ci const int kSOffset = 0; 751cb0ef41Sopenharmony_ci const int kWOffset = 0; // S and W can share their scratch space. 761cb0ef41Sopenharmony_ci const int kUOffset = vn + kInvertNewtonExtraSpace; 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_ci // The base case won't work otherwise. 791cb0ef41Sopenharmony_ci DCHECK(V.len() >= 3); 801cb0ef41Sopenharmony_ci 811cb0ef41Sopenharmony_ci constexpr int kBasecasePrecision = kNewtonInversionThreshold - 1; 821cb0ef41Sopenharmony_ci // V must have more digits than the basecase. 831cb0ef41Sopenharmony_ci DCHECK(V.len() > kBasecasePrecision); 841cb0ef41Sopenharmony_ci DCHECK(IsBitNormalized(V)); 851cb0ef41Sopenharmony_ci 861cb0ef41Sopenharmony_ci // Step (1): Setup. 871cb0ef41Sopenharmony_ci // Calculate precision required at each step. 881cb0ef41Sopenharmony_ci // {k} is the number of fraction bits for the current iteration. 891cb0ef41Sopenharmony_ci int k = vn * kDigitBits; 901cb0ef41Sopenharmony_ci int target_fraction_bits[8 * sizeof(vn)]; // "k_i" in the paper. 911cb0ef41Sopenharmony_ci int iteration = -1; // "i" in the paper, except inverted to run downwards. 921cb0ef41Sopenharmony_ci while (k > kBasecasePrecision * kDigitBits) { 931cb0ef41Sopenharmony_ci iteration++; 941cb0ef41Sopenharmony_ci target_fraction_bits[iteration] = k; 951cb0ef41Sopenharmony_ci k = DIV_CEIL(k, 2); 961cb0ef41Sopenharmony_ci } 971cb0ef41Sopenharmony_ci // At this point, k <= kBasecasePrecision*kDigitBits is the number of 981cb0ef41Sopenharmony_ci // fraction bits to use in the base case. {iteration} is the highest index 991cb0ef41Sopenharmony_ci // in use for f[]. 1001cb0ef41Sopenharmony_ci 1011cb0ef41Sopenharmony_ci // Step (2): Initial approximation. 1021cb0ef41Sopenharmony_ci int initial_digits = DIV_CEIL(k + 1, kDigitBits); 1031cb0ef41Sopenharmony_ci Digits top_part_of_v(V, vn - initial_digits, initial_digits); 1041cb0ef41Sopenharmony_ci InvertBasecase(Z, top_part_of_v, scratch); 1051cb0ef41Sopenharmony_ci Z[initial_digits] = Z[initial_digits] + 1; // Implicit top digit. 1061cb0ef41Sopenharmony_ci // From now on, we'll keep Z.len updated to the part that's already computed. 1071cb0ef41Sopenharmony_ci Z.set_len(initial_digits + 1); 1081cb0ef41Sopenharmony_ci 1091cb0ef41Sopenharmony_ci // Step (3): Precision doubling loop. 1101cb0ef41Sopenharmony_ci while (true) { 1111cb0ef41Sopenharmony_ci DcheckIntegerPartRange(Z, 1, 2); 1121cb0ef41Sopenharmony_ci 1131cb0ef41Sopenharmony_ci // (3b): S = Z^2 1141cb0ef41Sopenharmony_ci RWDigits S(scratch, kSOffset, 2 * Z.len()); 1151cb0ef41Sopenharmony_ci Multiply(S, Z, Z); 1161cb0ef41Sopenharmony_ci if (should_terminate()) return; 1171cb0ef41Sopenharmony_ci S.TrimOne(); // Top digit of S is unused. 1181cb0ef41Sopenharmony_ci DcheckIntegerPartRange(S, 1, 4); 1191cb0ef41Sopenharmony_ci 1201cb0ef41Sopenharmony_ci // (3c): T = V, truncated so that at least 2k+3 fraction bits remain. 1211cb0ef41Sopenharmony_ci int fraction_digits = DIV_CEIL(2 * k + 3, kDigitBits); 1221cb0ef41Sopenharmony_ci int t_len = std::min(V.len(), fraction_digits); 1231cb0ef41Sopenharmony_ci Digits T(V, V.len() - t_len, t_len); 1241cb0ef41Sopenharmony_ci 1251cb0ef41Sopenharmony_ci // (3d): U = T * S, truncated so that at least 2k+1 fraction bits remain 1261cb0ef41Sopenharmony_ci // (U has one integer digit, which might be zero). 1271cb0ef41Sopenharmony_ci fraction_digits = DIV_CEIL(2 * k + 1, kDigitBits); 1281cb0ef41Sopenharmony_ci RWDigits U(scratch, kUOffset, S.len() + T.len()); 1291cb0ef41Sopenharmony_ci DCHECK(U.len() > fraction_digits); 1301cb0ef41Sopenharmony_ci Multiply(U, S, T); 1311cb0ef41Sopenharmony_ci if (should_terminate()) return; 1321cb0ef41Sopenharmony_ci U = U + (U.len() - (1 + fraction_digits)); 1331cb0ef41Sopenharmony_ci DcheckIntegerPartRange(U, 0, 3); 1341cb0ef41Sopenharmony_ci 1351cb0ef41Sopenharmony_ci // (3e): W = 2 * Z, padded with "0" fraction bits so that it has the 1361cb0ef41Sopenharmony_ci // same number of fraction bits as U. 1371cb0ef41Sopenharmony_ci DCHECK(U.len() >= Z.len()); 1381cb0ef41Sopenharmony_ci RWDigits W(scratch, kWOffset, U.len()); 1391cb0ef41Sopenharmony_ci int padding_digits = U.len() - Z.len(); 1401cb0ef41Sopenharmony_ci for (int i = 0; i < padding_digits; i++) W[i] = 0; 1411cb0ef41Sopenharmony_ci LeftShift(W + padding_digits, Z, 1); 1421cb0ef41Sopenharmony_ci DcheckIntegerPartRange(W, 2, 4); 1431cb0ef41Sopenharmony_ci 1441cb0ef41Sopenharmony_ci // (3f): Z = W - U. 1451cb0ef41Sopenharmony_ci // This check is '<=' instead of '<' because U's top digit is its 1461cb0ef41Sopenharmony_ci // integer part, and we want vn fraction digits. 1471cb0ef41Sopenharmony_ci if (U.len() <= vn) { 1481cb0ef41Sopenharmony_ci // Normal subtraction. 1491cb0ef41Sopenharmony_ci // This is not the last iteration. 1501cb0ef41Sopenharmony_ci DCHECK(iteration > 0); 1511cb0ef41Sopenharmony_ci Z.set_len(U.len()); 1521cb0ef41Sopenharmony_ci digit_t borrow = SubtractAndReturnBorrow(Z, W, U); 1531cb0ef41Sopenharmony_ci DCHECK(borrow == 0); 1541cb0ef41Sopenharmony_ci USE(borrow); 1551cb0ef41Sopenharmony_ci DcheckIntegerPartRange(Z, 1, 2); 1561cb0ef41Sopenharmony_ci } else { 1571cb0ef41Sopenharmony_ci // Truncate some least significant digits so that we get vn 1581cb0ef41Sopenharmony_ci // fraction digits, and compute the integer digit separately. 1591cb0ef41Sopenharmony_ci // This is the last iteration. 1601cb0ef41Sopenharmony_ci DCHECK(iteration == 0); 1611cb0ef41Sopenharmony_ci Z.set_len(vn); 1621cb0ef41Sopenharmony_ci Digits W_part(W, W.len() - vn - 1, vn); 1631cb0ef41Sopenharmony_ci Digits U_part(U, U.len() - vn - 1, vn); 1641cb0ef41Sopenharmony_ci digit_t borrow = SubtractAndReturnBorrow(Z, W_part, U_part); 1651cb0ef41Sopenharmony_ci digit_t integer_part = W.msd() - U.msd() - borrow; 1661cb0ef41Sopenharmony_ci DCHECK(integer_part == 1 || integer_part == 2); 1671cb0ef41Sopenharmony_ci if (integer_part == 2) { 1681cb0ef41Sopenharmony_ci // This is the rare case where the correct result would be 2.0, but 1691cb0ef41Sopenharmony_ci // since we can't express that by returning only the fractional part 1701cb0ef41Sopenharmony_ci // with an implicit 1-digit, we have to return [1.]9999... instead. 1711cb0ef41Sopenharmony_ci for (int i = 0; i < Z.len(); i++) Z[i] = ~digit_t{0}; 1721cb0ef41Sopenharmony_ci } 1731cb0ef41Sopenharmony_ci break; 1741cb0ef41Sopenharmony_ci } 1751cb0ef41Sopenharmony_ci // (3g, 3h): Update local variables and loop. 1761cb0ef41Sopenharmony_ci k = target_fraction_bits[iteration]; 1771cb0ef41Sopenharmony_ci iteration--; 1781cb0ef41Sopenharmony_ci } 1791cb0ef41Sopenharmony_ci} 1801cb0ef41Sopenharmony_ci 1811cb0ef41Sopenharmony_ci// Computes the inverse of V, shifted by kDigitBits * 2 * V.len, accurate to 1821cb0ef41Sopenharmony_ci// V.len+1 digits. The V.len low digits of the result digits will be written 1831cb0ef41Sopenharmony_ci// to Z, plus there is an implicit top digit with value 1. 1841cb0ef41Sopenharmony_ci// (Corner case: if V is minimal, the implicit digit should be 2; in that case 1851cb0ef41Sopenharmony_ci// we return one less than the correct answer. DivideBarrett can handle that.) 1861cb0ef41Sopenharmony_ci// Needs InvertScratchSpace(V.len) digits of scratch space. 1871cb0ef41Sopenharmony_civoid ProcessorImpl::Invert(RWDigits Z, Digits V, RWDigits scratch) { 1881cb0ef41Sopenharmony_ci DCHECK(Z.len() > V.len()); 1891cb0ef41Sopenharmony_ci DCHECK(V.len() >= 1); 1901cb0ef41Sopenharmony_ci DCHECK(IsBitNormalized(V)); 1911cb0ef41Sopenharmony_ci DCHECK(scratch.len() >= InvertScratchSpace(V.len())); 1921cb0ef41Sopenharmony_ci 1931cb0ef41Sopenharmony_ci int vn = V.len(); 1941cb0ef41Sopenharmony_ci if (vn >= kNewtonInversionThreshold) { 1951cb0ef41Sopenharmony_ci return InvertNewton(Z, V, scratch); 1961cb0ef41Sopenharmony_ci } 1971cb0ef41Sopenharmony_ci if (vn == 1) { 1981cb0ef41Sopenharmony_ci digit_t d = V[0]; 1991cb0ef41Sopenharmony_ci digit_t dummy_remainder; 2001cb0ef41Sopenharmony_ci Z[0] = digit_div(~d, ~digit_t{0}, d, &dummy_remainder); 2011cb0ef41Sopenharmony_ci Z[1] = 0; 2021cb0ef41Sopenharmony_ci } else { 2031cb0ef41Sopenharmony_ci InvertBasecase(Z, V, scratch); 2041cb0ef41Sopenharmony_ci if (Z[vn] == 1) { 2051cb0ef41Sopenharmony_ci for (int i = 0; i < vn; i++) Z[i] = ~digit_t{0}; 2061cb0ef41Sopenharmony_ci Z[vn] = 0; 2071cb0ef41Sopenharmony_ci } 2081cb0ef41Sopenharmony_ci } 2091cb0ef41Sopenharmony_ci} 2101cb0ef41Sopenharmony_ci 2111cb0ef41Sopenharmony_ci// This is algorithm 3.5 from the paper. 2121cb0ef41Sopenharmony_ci// Computes Q(uotient) and R(emainder) for A/B using I, which is a 2131cb0ef41Sopenharmony_ci// precomputed approximation of 1/B (e.g. with Invert() above). 2141cb0ef41Sopenharmony_ci// Needs DivideBarrettScratchSpace(A.len) scratch space. 2151cb0ef41Sopenharmony_civoid ProcessorImpl::DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B, 2161cb0ef41Sopenharmony_ci Digits I, RWDigits scratch) { 2171cb0ef41Sopenharmony_ci DCHECK(Q.len() > A.len() - B.len()); 2181cb0ef41Sopenharmony_ci DCHECK(R.len() >= B.len()); 2191cb0ef41Sopenharmony_ci DCHECK(A.len() > B.len()); // Careful: This is *not* '>=' ! 2201cb0ef41Sopenharmony_ci DCHECK(A.len() <= 2 * B.len()); 2211cb0ef41Sopenharmony_ci DCHECK(B.len() > 0); 2221cb0ef41Sopenharmony_ci DCHECK(IsBitNormalized(B)); 2231cb0ef41Sopenharmony_ci DCHECK(I.len() == A.len() - B.len()); 2241cb0ef41Sopenharmony_ci DCHECK(scratch.len() >= DivideBarrettScratchSpace(A.len())); 2251cb0ef41Sopenharmony_ci 2261cb0ef41Sopenharmony_ci int orig_q_len = Q.len(); 2271cb0ef41Sopenharmony_ci 2281cb0ef41Sopenharmony_ci // (1): A1 = A with B.len fewer digits. 2291cb0ef41Sopenharmony_ci Digits A1 = A + B.len(); 2301cb0ef41Sopenharmony_ci DCHECK(A1.len() == I.len()); 2311cb0ef41Sopenharmony_ci 2321cb0ef41Sopenharmony_ci // (2): Q = A1*I with I.len fewer digits. 2331cb0ef41Sopenharmony_ci // {I} has an implicit high digit with value 1, so we add {A1} to the high 2341cb0ef41Sopenharmony_ci // part of the multiplication result. 2351cb0ef41Sopenharmony_ci RWDigits K(scratch, 0, 2 * I.len()); 2361cb0ef41Sopenharmony_ci Multiply(K, A1, I); 2371cb0ef41Sopenharmony_ci if (should_terminate()) return; 2381cb0ef41Sopenharmony_ci Q.set_len(I.len() + 1); 2391cb0ef41Sopenharmony_ci Add(Q, K + I.len(), A1); 2401cb0ef41Sopenharmony_ci // K is no longer used, can re-use {scratch} for P. 2411cb0ef41Sopenharmony_ci 2421cb0ef41Sopenharmony_ci // (3): R = A - B*Q (approximate remainder). 2431cb0ef41Sopenharmony_ci RWDigits P(scratch, 0, A.len() + 1); 2441cb0ef41Sopenharmony_ci Multiply(P, B, Q); 2451cb0ef41Sopenharmony_ci if (should_terminate()) return; 2461cb0ef41Sopenharmony_ci digit_t borrow = SubtractAndReturnBorrow(R, A, Digits(P, 0, B.len())); 2471cb0ef41Sopenharmony_ci // R may be allocated wider than B, zero out any extra digits if so. 2481cb0ef41Sopenharmony_ci for (int i = B.len(); i < R.len(); i++) R[i] = 0; 2491cb0ef41Sopenharmony_ci digit_t r_high = A[B.len()] - P[B.len()] - borrow; 2501cb0ef41Sopenharmony_ci 2511cb0ef41Sopenharmony_ci // Adjust R and Q so that they become the correct remainder and quotient. 2521cb0ef41Sopenharmony_ci // The number of iterations is guaranteed to be at most some very small 2531cb0ef41Sopenharmony_ci // constant, unless the caller gave us a bad approximate quotient. 2541cb0ef41Sopenharmony_ci if (r_high >> (kDigitBits - 1) == 1) { 2551cb0ef41Sopenharmony_ci // (5b): R < 0, so R += B 2561cb0ef41Sopenharmony_ci digit_t q_sub = 0; 2571cb0ef41Sopenharmony_ci do { 2581cb0ef41Sopenharmony_ci r_high += AddAndReturnCarry(R, R, B); 2591cb0ef41Sopenharmony_ci q_sub++; 2601cb0ef41Sopenharmony_ci DCHECK(q_sub <= 5); 2611cb0ef41Sopenharmony_ci } while (r_high != 0); 2621cb0ef41Sopenharmony_ci Subtract(Q, q_sub); 2631cb0ef41Sopenharmony_ci } else { 2641cb0ef41Sopenharmony_ci digit_t q_add = 0; 2651cb0ef41Sopenharmony_ci while (r_high != 0 || GreaterThanOrEqual(R, B)) { 2661cb0ef41Sopenharmony_ci // (5c): R >= B, so R -= B 2671cb0ef41Sopenharmony_ci r_high -= SubtractAndReturnBorrow(R, R, B); 2681cb0ef41Sopenharmony_ci q_add++; 2691cb0ef41Sopenharmony_ci DCHECK(q_add <= 5); 2701cb0ef41Sopenharmony_ci } 2711cb0ef41Sopenharmony_ci Add(Q, q_add); 2721cb0ef41Sopenharmony_ci } 2731cb0ef41Sopenharmony_ci // (5a): Return. 2741cb0ef41Sopenharmony_ci int final_q_len = Q.len(); 2751cb0ef41Sopenharmony_ci Q.set_len(orig_q_len); 2761cb0ef41Sopenharmony_ci for (int i = final_q_len; i < orig_q_len; i++) Q[i] = 0; 2771cb0ef41Sopenharmony_ci} 2781cb0ef41Sopenharmony_ci 2791cb0ef41Sopenharmony_ci// Computes Q(uotient) and R(emainder) for A/B, using Barrett division. 2801cb0ef41Sopenharmony_civoid ProcessorImpl::DivideBarrett(RWDigits Q, RWDigits R, Digits A, Digits B) { 2811cb0ef41Sopenharmony_ci DCHECK(Q.len() > A.len() - B.len()); 2821cb0ef41Sopenharmony_ci DCHECK(R.len() >= B.len()); 2831cb0ef41Sopenharmony_ci DCHECK(A.len() > B.len()); // Careful: This is *not* '>=' ! 2841cb0ef41Sopenharmony_ci DCHECK(B.len() > 0); 2851cb0ef41Sopenharmony_ci 2861cb0ef41Sopenharmony_ci // Normalize B, and shift A by the same amount. 2871cb0ef41Sopenharmony_ci ShiftedDigits b_normalized(B); 2881cb0ef41Sopenharmony_ci ShiftedDigits a_normalized(A, b_normalized.shift()); 2891cb0ef41Sopenharmony_ci // Keep the code below more concise. 2901cb0ef41Sopenharmony_ci B = b_normalized; 2911cb0ef41Sopenharmony_ci A = a_normalized; 2921cb0ef41Sopenharmony_ci 2931cb0ef41Sopenharmony_ci // The core DivideBarrett function above only supports A having at most 2941cb0ef41Sopenharmony_ci // twice as many digits as B. We generalize this to arbitrary inputs 2951cb0ef41Sopenharmony_ci // similar to Burnikel-Ziegler division by performing a t-by-1 division 2961cb0ef41Sopenharmony_ci // of B-sized chunks. It's easy to special-case the situation where we 2971cb0ef41Sopenharmony_ci // don't need to bother. 2981cb0ef41Sopenharmony_ci int barrett_dividend_length = A.len() <= 2 * B.len() ? A.len() : 2 * B.len(); 2991cb0ef41Sopenharmony_ci int i_len = barrett_dividend_length - B.len(); 3001cb0ef41Sopenharmony_ci ScratchDigits I(i_len + 1); // +1 is for temporary use by Invert(). 3011cb0ef41Sopenharmony_ci int scratch_len = 3021cb0ef41Sopenharmony_ci std::max(InvertScratchSpace(i_len), 3031cb0ef41Sopenharmony_ci DivideBarrettScratchSpace(barrett_dividend_length)); 3041cb0ef41Sopenharmony_ci ScratchDigits scratch(scratch_len); 3051cb0ef41Sopenharmony_ci Invert(I, Digits(B, B.len() - i_len, i_len), scratch); 3061cb0ef41Sopenharmony_ci if (should_terminate()) return; 3071cb0ef41Sopenharmony_ci I.TrimOne(); 3081cb0ef41Sopenharmony_ci DCHECK(I.len() == i_len); 3091cb0ef41Sopenharmony_ci if (A.len() > 2 * B.len()) { 3101cb0ef41Sopenharmony_ci // This follows the variable names and and algorithmic steps of 3111cb0ef41Sopenharmony_ci // DivideBurnikelZiegler(). 3121cb0ef41Sopenharmony_ci int n = B.len(); // Chunk length. 3131cb0ef41Sopenharmony_ci // (5): {t} is the number of B-sized chunks of A. 3141cb0ef41Sopenharmony_ci int t = DIV_CEIL(A.len(), n); 3151cb0ef41Sopenharmony_ci DCHECK(t >= 3); 3161cb0ef41Sopenharmony_ci // (6)/(7): Z is used for the current 2-chunk block to be divided by B, 3171cb0ef41Sopenharmony_ci // initialized to the two topmost chunks of A. 3181cb0ef41Sopenharmony_ci int z_len = n * 2; 3191cb0ef41Sopenharmony_ci ScratchDigits Z(z_len); 3201cb0ef41Sopenharmony_ci PutAt(Z, A + n * (t - 2), z_len); 3211cb0ef41Sopenharmony_ci // (8): For i from t-2 downto 0 do 3221cb0ef41Sopenharmony_ci int qi_len = n + 1; 3231cb0ef41Sopenharmony_ci ScratchDigits Qi(qi_len); 3241cb0ef41Sopenharmony_ci ScratchDigits Ri(n); 3251cb0ef41Sopenharmony_ci // First iteration unrolled and specialized. 3261cb0ef41Sopenharmony_ci { 3271cb0ef41Sopenharmony_ci int i = t - 2; 3281cb0ef41Sopenharmony_ci DivideBarrett(Qi, Ri, Z, B, I, scratch); 3291cb0ef41Sopenharmony_ci if (should_terminate()) return; 3301cb0ef41Sopenharmony_ci RWDigits target = Q + n * i; 3311cb0ef41Sopenharmony_ci // In the first iteration, all qi_len = n + 1 digits may be used. 3321cb0ef41Sopenharmony_ci int to_copy = std::min(qi_len, target.len()); 3331cb0ef41Sopenharmony_ci for (int j = 0; j < to_copy; j++) target[j] = Qi[j]; 3341cb0ef41Sopenharmony_ci for (int j = to_copy; j < target.len(); j++) target[j] = 0; 3351cb0ef41Sopenharmony_ci#if DEBUG 3361cb0ef41Sopenharmony_ci for (int j = to_copy; j < Qi.len(); j++) { 3371cb0ef41Sopenharmony_ci DCHECK(Qi[j] == 0); 3381cb0ef41Sopenharmony_ci } 3391cb0ef41Sopenharmony_ci#endif 3401cb0ef41Sopenharmony_ci } 3411cb0ef41Sopenharmony_ci // Now loop over any remaining iterations. 3421cb0ef41Sopenharmony_ci for (int i = t - 3; i >= 0; i--) { 3431cb0ef41Sopenharmony_ci // (8b): If i > 0, set Z_(i-1) = [Ri, A_(i-1)]. 3441cb0ef41Sopenharmony_ci // (De-duped with unrolled first iteration, hence reading A_(i).) 3451cb0ef41Sopenharmony_ci PutAt(Z + n, Ri, n); 3461cb0ef41Sopenharmony_ci PutAt(Z, A + n * i, n); 3471cb0ef41Sopenharmony_ci // (8a): Compute Qi, Ri such that Zi = B*Qi + Ri. 3481cb0ef41Sopenharmony_ci DivideBarrett(Qi, Ri, Z, B, I, scratch); 3491cb0ef41Sopenharmony_ci DCHECK(Qi[qi_len - 1] == 0); 3501cb0ef41Sopenharmony_ci if (should_terminate()) return; 3511cb0ef41Sopenharmony_ci // (9): Return Q = [Q_(t-2), ..., Q_0]... 3521cb0ef41Sopenharmony_ci PutAt(Q + n * i, Qi, n); 3531cb0ef41Sopenharmony_ci } 3541cb0ef41Sopenharmony_ci Ri.Normalize(); 3551cb0ef41Sopenharmony_ci DCHECK(Ri.len() <= R.len()); 3561cb0ef41Sopenharmony_ci // (9): ...and R = R_0 * 2^(-leading_zeros). 3571cb0ef41Sopenharmony_ci RightShift(R, Ri, b_normalized.shift()); 3581cb0ef41Sopenharmony_ci } else { 3591cb0ef41Sopenharmony_ci DivideBarrett(Q, R, A, B, I, scratch); 3601cb0ef41Sopenharmony_ci if (should_terminate()) return; 3611cb0ef41Sopenharmony_ci RightShift(R, R, b_normalized.shift()); 3621cb0ef41Sopenharmony_ci } 3631cb0ef41Sopenharmony_ci} 3641cb0ef41Sopenharmony_ci 3651cb0ef41Sopenharmony_ci} // namespace bigint 3661cb0ef41Sopenharmony_ci} // namespace v8 367