11cb0ef41Sopenharmony_ci// Copyright 2021 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci// Karatsuba multiplication. This is loosely based on Go's implementation
61cb0ef41Sopenharmony_ci// found at https://golang.org/src/math/big/nat.go, licensed as follows:
71cb0ef41Sopenharmony_ci//
81cb0ef41Sopenharmony_ci// Copyright 2009 The Go Authors. All rights reserved.
91cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style
101cb0ef41Sopenharmony_ci// license that can be found in the LICENSE file [1].
111cb0ef41Sopenharmony_ci//
121cb0ef41Sopenharmony_ci// [1] https://golang.org/LICENSE
131cb0ef41Sopenharmony_ci
141cb0ef41Sopenharmony_ci#include <algorithm>
151cb0ef41Sopenharmony_ci#include <utility>
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_ci#include "src/bigint/bigint-internal.h"
181cb0ef41Sopenharmony_ci#include "src/bigint/digit-arithmetic.h"
191cb0ef41Sopenharmony_ci#include "src/bigint/util.h"
201cb0ef41Sopenharmony_ci#include "src/bigint/vector-arithmetic.h"
211cb0ef41Sopenharmony_ci
221cb0ef41Sopenharmony_cinamespace v8 {
231cb0ef41Sopenharmony_cinamespace bigint {
241cb0ef41Sopenharmony_ci
251cb0ef41Sopenharmony_ci// If Karatsuba is the best supported algorithm, then it must check for
261cb0ef41Sopenharmony_ci// termination requests. If there are more advanced algorithms available
271cb0ef41Sopenharmony_ci// for larger inputs, then Karatsuba will only be used for sufficiently
281cb0ef41Sopenharmony_ci// small chunks that checking for termination requests is not necessary.
291cb0ef41Sopenharmony_ci#if V8_ADVANCED_BIGINT_ALGORITHMS
301cb0ef41Sopenharmony_ci#define MAYBE_TERMINATE
311cb0ef41Sopenharmony_ci#else
321cb0ef41Sopenharmony_ci#define MAYBE_TERMINATE \
331cb0ef41Sopenharmony_ci  if (should_terminate()) return;
341cb0ef41Sopenharmony_ci#endif
351cb0ef41Sopenharmony_ci
361cb0ef41Sopenharmony_cinamespace {
371cb0ef41Sopenharmony_ci
381cb0ef41Sopenharmony_ci// The Karatsuba algorithm sometimes finishes more quickly when the
391cb0ef41Sopenharmony_ci// input length is rounded up a bit. This method encodes some heuristics
401cb0ef41Sopenharmony_ci// to accomplish this. The details have been determined experimentally.
411cb0ef41Sopenharmony_ciint RoundUpLen(int len) {
421cb0ef41Sopenharmony_ci  if (len <= 36) return RoundUp(len, 2);
431cb0ef41Sopenharmony_ci  // Keep the 4 or 5 most significant non-zero bits.
441cb0ef41Sopenharmony_ci  int shift = BitLength(len) - 5;
451cb0ef41Sopenharmony_ci  if ((len >> shift) >= 0x18) {
461cb0ef41Sopenharmony_ci    shift++;
471cb0ef41Sopenharmony_ci  }
481cb0ef41Sopenharmony_ci  // Round up, unless we're only just above the threshold. This smoothes
491cb0ef41Sopenharmony_ci  // the steps by which time goes up as input size increases.
501cb0ef41Sopenharmony_ci  int additive = ((1 << shift) - 1);
511cb0ef41Sopenharmony_ci  if (shift >= 2 && (len & additive) < (1 << (shift - 2))) {
521cb0ef41Sopenharmony_ci    return len;
531cb0ef41Sopenharmony_ci  }
541cb0ef41Sopenharmony_ci  return ((len + additive) >> shift) << shift;
551cb0ef41Sopenharmony_ci}
561cb0ef41Sopenharmony_ci
571cb0ef41Sopenharmony_ci// This method makes the final decision how much to bump up the input size.
581cb0ef41Sopenharmony_ciint KaratsubaLength(int n) {
591cb0ef41Sopenharmony_ci  n = RoundUpLen(n);
601cb0ef41Sopenharmony_ci  int i = 0;
611cb0ef41Sopenharmony_ci  while (n > kKaratsubaThreshold) {
621cb0ef41Sopenharmony_ci    n >>= 1;
631cb0ef41Sopenharmony_ci    i++;
641cb0ef41Sopenharmony_ci  }
651cb0ef41Sopenharmony_ci  return n << i;
661cb0ef41Sopenharmony_ci}
671cb0ef41Sopenharmony_ci
681cb0ef41Sopenharmony_ci// Performs the specific subtraction required by {KaratsubaMain} below.
691cb0ef41Sopenharmony_civoid KaratsubaSubtractionHelper(RWDigits result, Digits X, Digits Y,
701cb0ef41Sopenharmony_ci                                int* sign) {
711cb0ef41Sopenharmony_ci  X.Normalize();
721cb0ef41Sopenharmony_ci  Y.Normalize();
731cb0ef41Sopenharmony_ci  digit_t borrow = 0;
741cb0ef41Sopenharmony_ci  int i = 0;
751cb0ef41Sopenharmony_ci  if (!GreaterThanOrEqual(X, Y)) {
761cb0ef41Sopenharmony_ci    *sign = -(*sign);
771cb0ef41Sopenharmony_ci    std::swap(X, Y);
781cb0ef41Sopenharmony_ci  }
791cb0ef41Sopenharmony_ci  for (; i < Y.len(); i++) {
801cb0ef41Sopenharmony_ci    result[i] = digit_sub2(X[i], Y[i], borrow, &borrow);
811cb0ef41Sopenharmony_ci  }
821cb0ef41Sopenharmony_ci  for (; i < X.len(); i++) {
831cb0ef41Sopenharmony_ci    result[i] = digit_sub(X[i], borrow, &borrow);
841cb0ef41Sopenharmony_ci  }
851cb0ef41Sopenharmony_ci  DCHECK(borrow == 0);
861cb0ef41Sopenharmony_ci  for (; i < result.len(); i++) result[i] = 0;
871cb0ef41Sopenharmony_ci}
881cb0ef41Sopenharmony_ci
891cb0ef41Sopenharmony_ci}  // namespace
901cb0ef41Sopenharmony_ci
911cb0ef41Sopenharmony_civoid ProcessorImpl::MultiplyKaratsuba(RWDigits Z, Digits X, Digits Y) {
921cb0ef41Sopenharmony_ci  DCHECK(X.len() >= Y.len());
931cb0ef41Sopenharmony_ci  DCHECK(Y.len() >= kKaratsubaThreshold);
941cb0ef41Sopenharmony_ci  DCHECK(Z.len() >= X.len() + Y.len());
951cb0ef41Sopenharmony_ci  int k = KaratsubaLength(Y.len());
961cb0ef41Sopenharmony_ci  int scratch_len = 4 * k;
971cb0ef41Sopenharmony_ci  ScratchDigits scratch(scratch_len);
981cb0ef41Sopenharmony_ci  KaratsubaStart(Z, X, Y, scratch, k);
991cb0ef41Sopenharmony_ci}
1001cb0ef41Sopenharmony_ci
1011cb0ef41Sopenharmony_ci// Entry point for Karatsuba-based multiplication, takes care of inputs
1021cb0ef41Sopenharmony_ci// with unequal lengths by chopping the larger into chunks.
1031cb0ef41Sopenharmony_civoid ProcessorImpl::KaratsubaStart(RWDigits Z, Digits X, Digits Y,
1041cb0ef41Sopenharmony_ci                                   RWDigits scratch, int k) {
1051cb0ef41Sopenharmony_ci  KaratsubaMain(Z, X, Y, scratch, k);
1061cb0ef41Sopenharmony_ci  MAYBE_TERMINATE
1071cb0ef41Sopenharmony_ci  for (int i = 2 * k; i < Z.len(); i++) Z[i] = 0;
1081cb0ef41Sopenharmony_ci  if (k < Y.len() || X.len() != Y.len()) {
1091cb0ef41Sopenharmony_ci    ScratchDigits T(2 * k);
1101cb0ef41Sopenharmony_ci    // Add X0 * Y1 * b.
1111cb0ef41Sopenharmony_ci    Digits X0(X, 0, k);
1121cb0ef41Sopenharmony_ci    Digits Y1 = Y + std::min(k, Y.len());
1131cb0ef41Sopenharmony_ci    if (Y1.len() > 0) {
1141cb0ef41Sopenharmony_ci      KaratsubaChunk(T, X0, Y1, scratch);
1151cb0ef41Sopenharmony_ci      MAYBE_TERMINATE
1161cb0ef41Sopenharmony_ci      AddAndReturnOverflow(Z + k, T);  // Can't overflow.
1171cb0ef41Sopenharmony_ci    }
1181cb0ef41Sopenharmony_ci
1191cb0ef41Sopenharmony_ci    // Add Xi * Y0 << i and Xi * Y1 * b << (i + k).
1201cb0ef41Sopenharmony_ci    Digits Y0(Y, 0, k);
1211cb0ef41Sopenharmony_ci    for (int i = k; i < X.len(); i += k) {
1221cb0ef41Sopenharmony_ci      Digits Xi(X, i, k);
1231cb0ef41Sopenharmony_ci      KaratsubaChunk(T, Xi, Y0, scratch);
1241cb0ef41Sopenharmony_ci      MAYBE_TERMINATE
1251cb0ef41Sopenharmony_ci      AddAndReturnOverflow(Z + i, T);  // Can't overflow.
1261cb0ef41Sopenharmony_ci      if (Y1.len() > 0) {
1271cb0ef41Sopenharmony_ci        KaratsubaChunk(T, Xi, Y1, scratch);
1281cb0ef41Sopenharmony_ci        MAYBE_TERMINATE
1291cb0ef41Sopenharmony_ci        AddAndReturnOverflow(Z + (i + k), T);  // Can't overflow.
1301cb0ef41Sopenharmony_ci      }
1311cb0ef41Sopenharmony_ci    }
1321cb0ef41Sopenharmony_ci  }
1331cb0ef41Sopenharmony_ci}
1341cb0ef41Sopenharmony_ci
1351cb0ef41Sopenharmony_ci// Entry point for chunk-wise multiplications, selects an appropriate
1361cb0ef41Sopenharmony_ci// algorithm for the inputs based on their sizes.
1371cb0ef41Sopenharmony_civoid ProcessorImpl::KaratsubaChunk(RWDigits Z, Digits X, Digits Y,
1381cb0ef41Sopenharmony_ci                                   RWDigits scratch) {
1391cb0ef41Sopenharmony_ci  X.Normalize();
1401cb0ef41Sopenharmony_ci  Y.Normalize();
1411cb0ef41Sopenharmony_ci  if (X.len() == 0 || Y.len() == 0) return Z.Clear();
1421cb0ef41Sopenharmony_ci  if (X.len() < Y.len()) std::swap(X, Y);
1431cb0ef41Sopenharmony_ci  if (Y.len() == 1) return MultiplySingle(Z, X, Y[0]);
1441cb0ef41Sopenharmony_ci  if (Y.len() < kKaratsubaThreshold) return MultiplySchoolbook(Z, X, Y);
1451cb0ef41Sopenharmony_ci  int k = KaratsubaLength(Y.len());
1461cb0ef41Sopenharmony_ci  DCHECK(scratch.len() >= 4 * k);
1471cb0ef41Sopenharmony_ci  return KaratsubaStart(Z, X, Y, scratch, k);
1481cb0ef41Sopenharmony_ci}
1491cb0ef41Sopenharmony_ci
1501cb0ef41Sopenharmony_ci// The main recursive Karatsuba method.
1511cb0ef41Sopenharmony_civoid ProcessorImpl::KaratsubaMain(RWDigits Z, Digits X, Digits Y,
1521cb0ef41Sopenharmony_ci                                  RWDigits scratch, int n) {
1531cb0ef41Sopenharmony_ci  if (n < kKaratsubaThreshold) {
1541cb0ef41Sopenharmony_ci    X.Normalize();
1551cb0ef41Sopenharmony_ci    Y.Normalize();
1561cb0ef41Sopenharmony_ci    if (X.len() >= Y.len()) {
1571cb0ef41Sopenharmony_ci      return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), X, Y);
1581cb0ef41Sopenharmony_ci    } else {
1591cb0ef41Sopenharmony_ci      return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), Y, X);
1601cb0ef41Sopenharmony_ci    }
1611cb0ef41Sopenharmony_ci  }
1621cb0ef41Sopenharmony_ci  DCHECK(scratch.len() >= 4 * n);
1631cb0ef41Sopenharmony_ci  DCHECK((n & 1) == 0);
1641cb0ef41Sopenharmony_ci  int n2 = n >> 1;
1651cb0ef41Sopenharmony_ci  Digits X0(X, 0, n2);
1661cb0ef41Sopenharmony_ci  Digits X1(X, n2, n2);
1671cb0ef41Sopenharmony_ci  Digits Y0(Y, 0, n2);
1681cb0ef41Sopenharmony_ci  Digits Y1(Y, n2, n2);
1691cb0ef41Sopenharmony_ci  RWDigits scratch_for_recursion(scratch, 2 * n, 2 * n);
1701cb0ef41Sopenharmony_ci  RWDigits P0(scratch, 0, n);
1711cb0ef41Sopenharmony_ci  KaratsubaMain(P0, X0, Y0, scratch_for_recursion, n2);
1721cb0ef41Sopenharmony_ci  MAYBE_TERMINATE
1731cb0ef41Sopenharmony_ci  for (int i = 0; i < n; i++) Z[i] = P0[i];
1741cb0ef41Sopenharmony_ci  RWDigits P2(scratch, n, n);
1751cb0ef41Sopenharmony_ci  KaratsubaMain(P2, X1, Y1, scratch_for_recursion, n2);
1761cb0ef41Sopenharmony_ci  MAYBE_TERMINATE
1771cb0ef41Sopenharmony_ci  RWDigits Z2 = Z + n;
1781cb0ef41Sopenharmony_ci  int end = std::min(Z2.len(), P2.len());
1791cb0ef41Sopenharmony_ci  for (int i = 0; i < end; i++) Z2[i] = P2[i];
1801cb0ef41Sopenharmony_ci  for (int i = end; i < n; i++) {
1811cb0ef41Sopenharmony_ci    DCHECK(P2[i] == 0);
1821cb0ef41Sopenharmony_ci  }
1831cb0ef41Sopenharmony_ci  // The intermediate result can be one digit too large; the subtraction
1841cb0ef41Sopenharmony_ci  // below will fix this.
1851cb0ef41Sopenharmony_ci  digit_t overflow = AddAndReturnOverflow(Z + n2, P0);
1861cb0ef41Sopenharmony_ci  overflow += AddAndReturnOverflow(Z + n2, P2);
1871cb0ef41Sopenharmony_ci  RWDigits X_diff(scratch, 0, n2);
1881cb0ef41Sopenharmony_ci  RWDigits Y_diff(scratch, n2, n2);
1891cb0ef41Sopenharmony_ci  int sign = 1;
1901cb0ef41Sopenharmony_ci  KaratsubaSubtractionHelper(X_diff, X1, X0, &sign);
1911cb0ef41Sopenharmony_ci  KaratsubaSubtractionHelper(Y_diff, Y0, Y1, &sign);
1921cb0ef41Sopenharmony_ci  RWDigits P1(scratch, n, n);
1931cb0ef41Sopenharmony_ci  KaratsubaMain(P1, X_diff, Y_diff, scratch_for_recursion, n2);
1941cb0ef41Sopenharmony_ci  if (sign > 0) {
1951cb0ef41Sopenharmony_ci    overflow += AddAndReturnOverflow(Z + n2, P1);
1961cb0ef41Sopenharmony_ci  } else {
1971cb0ef41Sopenharmony_ci    overflow -= SubAndReturnBorrow(Z + n2, P1);
1981cb0ef41Sopenharmony_ci  }
1991cb0ef41Sopenharmony_ci  // The intermediate result may have been bigger, but the final result fits.
2001cb0ef41Sopenharmony_ci  DCHECK(overflow == 0);
2011cb0ef41Sopenharmony_ci  USE(overflow);
2021cb0ef41Sopenharmony_ci}
2031cb0ef41Sopenharmony_ci
2041cb0ef41Sopenharmony_ci#undef MAYBE_TERMINATE
2051cb0ef41Sopenharmony_ci
2061cb0ef41Sopenharmony_ci}  // namespace bigint
2071cb0ef41Sopenharmony_ci}  // namespace v8
208