11cb0ef41Sopenharmony_ci// Copyright 2021 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci// Karatsuba multiplication. This is loosely based on Go's implementation 61cb0ef41Sopenharmony_ci// found at https://golang.org/src/math/big/nat.go, licensed as follows: 71cb0ef41Sopenharmony_ci// 81cb0ef41Sopenharmony_ci// Copyright 2009 The Go Authors. All rights reserved. 91cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style 101cb0ef41Sopenharmony_ci// license that can be found in the LICENSE file [1]. 111cb0ef41Sopenharmony_ci// 121cb0ef41Sopenharmony_ci// [1] https://golang.org/LICENSE 131cb0ef41Sopenharmony_ci 141cb0ef41Sopenharmony_ci#include <algorithm> 151cb0ef41Sopenharmony_ci#include <utility> 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ci#include "src/bigint/bigint-internal.h" 181cb0ef41Sopenharmony_ci#include "src/bigint/digit-arithmetic.h" 191cb0ef41Sopenharmony_ci#include "src/bigint/util.h" 201cb0ef41Sopenharmony_ci#include "src/bigint/vector-arithmetic.h" 211cb0ef41Sopenharmony_ci 221cb0ef41Sopenharmony_cinamespace v8 { 231cb0ef41Sopenharmony_cinamespace bigint { 241cb0ef41Sopenharmony_ci 251cb0ef41Sopenharmony_ci// If Karatsuba is the best supported algorithm, then it must check for 261cb0ef41Sopenharmony_ci// termination requests. If there are more advanced algorithms available 271cb0ef41Sopenharmony_ci// for larger inputs, then Karatsuba will only be used for sufficiently 281cb0ef41Sopenharmony_ci// small chunks that checking for termination requests is not necessary. 291cb0ef41Sopenharmony_ci#if V8_ADVANCED_BIGINT_ALGORITHMS 301cb0ef41Sopenharmony_ci#define MAYBE_TERMINATE 311cb0ef41Sopenharmony_ci#else 321cb0ef41Sopenharmony_ci#define MAYBE_TERMINATE \ 331cb0ef41Sopenharmony_ci if (should_terminate()) return; 341cb0ef41Sopenharmony_ci#endif 351cb0ef41Sopenharmony_ci 361cb0ef41Sopenharmony_cinamespace { 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_ci// The Karatsuba algorithm sometimes finishes more quickly when the 391cb0ef41Sopenharmony_ci// input length is rounded up a bit. This method encodes some heuristics 401cb0ef41Sopenharmony_ci// to accomplish this. The details have been determined experimentally. 411cb0ef41Sopenharmony_ciint RoundUpLen(int len) { 421cb0ef41Sopenharmony_ci if (len <= 36) return RoundUp(len, 2); 431cb0ef41Sopenharmony_ci // Keep the 4 or 5 most significant non-zero bits. 441cb0ef41Sopenharmony_ci int shift = BitLength(len) - 5; 451cb0ef41Sopenharmony_ci if ((len >> shift) >= 0x18) { 461cb0ef41Sopenharmony_ci shift++; 471cb0ef41Sopenharmony_ci } 481cb0ef41Sopenharmony_ci // Round up, unless we're only just above the threshold. This smoothes 491cb0ef41Sopenharmony_ci // the steps by which time goes up as input size increases. 501cb0ef41Sopenharmony_ci int additive = ((1 << shift) - 1); 511cb0ef41Sopenharmony_ci if (shift >= 2 && (len & additive) < (1 << (shift - 2))) { 521cb0ef41Sopenharmony_ci return len; 531cb0ef41Sopenharmony_ci } 541cb0ef41Sopenharmony_ci return ((len + additive) >> shift) << shift; 551cb0ef41Sopenharmony_ci} 561cb0ef41Sopenharmony_ci 571cb0ef41Sopenharmony_ci// This method makes the final decision how much to bump up the input size. 581cb0ef41Sopenharmony_ciint KaratsubaLength(int n) { 591cb0ef41Sopenharmony_ci n = RoundUpLen(n); 601cb0ef41Sopenharmony_ci int i = 0; 611cb0ef41Sopenharmony_ci while (n > kKaratsubaThreshold) { 621cb0ef41Sopenharmony_ci n >>= 1; 631cb0ef41Sopenharmony_ci i++; 641cb0ef41Sopenharmony_ci } 651cb0ef41Sopenharmony_ci return n << i; 661cb0ef41Sopenharmony_ci} 671cb0ef41Sopenharmony_ci 681cb0ef41Sopenharmony_ci// Performs the specific subtraction required by {KaratsubaMain} below. 691cb0ef41Sopenharmony_civoid KaratsubaSubtractionHelper(RWDigits result, Digits X, Digits Y, 701cb0ef41Sopenharmony_ci int* sign) { 711cb0ef41Sopenharmony_ci X.Normalize(); 721cb0ef41Sopenharmony_ci Y.Normalize(); 731cb0ef41Sopenharmony_ci digit_t borrow = 0; 741cb0ef41Sopenharmony_ci int i = 0; 751cb0ef41Sopenharmony_ci if (!GreaterThanOrEqual(X, Y)) { 761cb0ef41Sopenharmony_ci *sign = -(*sign); 771cb0ef41Sopenharmony_ci std::swap(X, Y); 781cb0ef41Sopenharmony_ci } 791cb0ef41Sopenharmony_ci for (; i < Y.len(); i++) { 801cb0ef41Sopenharmony_ci result[i] = digit_sub2(X[i], Y[i], borrow, &borrow); 811cb0ef41Sopenharmony_ci } 821cb0ef41Sopenharmony_ci for (; i < X.len(); i++) { 831cb0ef41Sopenharmony_ci result[i] = digit_sub(X[i], borrow, &borrow); 841cb0ef41Sopenharmony_ci } 851cb0ef41Sopenharmony_ci DCHECK(borrow == 0); 861cb0ef41Sopenharmony_ci for (; i < result.len(); i++) result[i] = 0; 871cb0ef41Sopenharmony_ci} 881cb0ef41Sopenharmony_ci 891cb0ef41Sopenharmony_ci} // namespace 901cb0ef41Sopenharmony_ci 911cb0ef41Sopenharmony_civoid ProcessorImpl::MultiplyKaratsuba(RWDigits Z, Digits X, Digits Y) { 921cb0ef41Sopenharmony_ci DCHECK(X.len() >= Y.len()); 931cb0ef41Sopenharmony_ci DCHECK(Y.len() >= kKaratsubaThreshold); 941cb0ef41Sopenharmony_ci DCHECK(Z.len() >= X.len() + Y.len()); 951cb0ef41Sopenharmony_ci int k = KaratsubaLength(Y.len()); 961cb0ef41Sopenharmony_ci int scratch_len = 4 * k; 971cb0ef41Sopenharmony_ci ScratchDigits scratch(scratch_len); 981cb0ef41Sopenharmony_ci KaratsubaStart(Z, X, Y, scratch, k); 991cb0ef41Sopenharmony_ci} 1001cb0ef41Sopenharmony_ci 1011cb0ef41Sopenharmony_ci// Entry point for Karatsuba-based multiplication, takes care of inputs 1021cb0ef41Sopenharmony_ci// with unequal lengths by chopping the larger into chunks. 1031cb0ef41Sopenharmony_civoid ProcessorImpl::KaratsubaStart(RWDigits Z, Digits X, Digits Y, 1041cb0ef41Sopenharmony_ci RWDigits scratch, int k) { 1051cb0ef41Sopenharmony_ci KaratsubaMain(Z, X, Y, scratch, k); 1061cb0ef41Sopenharmony_ci MAYBE_TERMINATE 1071cb0ef41Sopenharmony_ci for (int i = 2 * k; i < Z.len(); i++) Z[i] = 0; 1081cb0ef41Sopenharmony_ci if (k < Y.len() || X.len() != Y.len()) { 1091cb0ef41Sopenharmony_ci ScratchDigits T(2 * k); 1101cb0ef41Sopenharmony_ci // Add X0 * Y1 * b. 1111cb0ef41Sopenharmony_ci Digits X0(X, 0, k); 1121cb0ef41Sopenharmony_ci Digits Y1 = Y + std::min(k, Y.len()); 1131cb0ef41Sopenharmony_ci if (Y1.len() > 0) { 1141cb0ef41Sopenharmony_ci KaratsubaChunk(T, X0, Y1, scratch); 1151cb0ef41Sopenharmony_ci MAYBE_TERMINATE 1161cb0ef41Sopenharmony_ci AddAndReturnOverflow(Z + k, T); // Can't overflow. 1171cb0ef41Sopenharmony_ci } 1181cb0ef41Sopenharmony_ci 1191cb0ef41Sopenharmony_ci // Add Xi * Y0 << i and Xi * Y1 * b << (i + k). 1201cb0ef41Sopenharmony_ci Digits Y0(Y, 0, k); 1211cb0ef41Sopenharmony_ci for (int i = k; i < X.len(); i += k) { 1221cb0ef41Sopenharmony_ci Digits Xi(X, i, k); 1231cb0ef41Sopenharmony_ci KaratsubaChunk(T, Xi, Y0, scratch); 1241cb0ef41Sopenharmony_ci MAYBE_TERMINATE 1251cb0ef41Sopenharmony_ci AddAndReturnOverflow(Z + i, T); // Can't overflow. 1261cb0ef41Sopenharmony_ci if (Y1.len() > 0) { 1271cb0ef41Sopenharmony_ci KaratsubaChunk(T, Xi, Y1, scratch); 1281cb0ef41Sopenharmony_ci MAYBE_TERMINATE 1291cb0ef41Sopenharmony_ci AddAndReturnOverflow(Z + (i + k), T); // Can't overflow. 1301cb0ef41Sopenharmony_ci } 1311cb0ef41Sopenharmony_ci } 1321cb0ef41Sopenharmony_ci } 1331cb0ef41Sopenharmony_ci} 1341cb0ef41Sopenharmony_ci 1351cb0ef41Sopenharmony_ci// Entry point for chunk-wise multiplications, selects an appropriate 1361cb0ef41Sopenharmony_ci// algorithm for the inputs based on their sizes. 1371cb0ef41Sopenharmony_civoid ProcessorImpl::KaratsubaChunk(RWDigits Z, Digits X, Digits Y, 1381cb0ef41Sopenharmony_ci RWDigits scratch) { 1391cb0ef41Sopenharmony_ci X.Normalize(); 1401cb0ef41Sopenharmony_ci Y.Normalize(); 1411cb0ef41Sopenharmony_ci if (X.len() == 0 || Y.len() == 0) return Z.Clear(); 1421cb0ef41Sopenharmony_ci if (X.len() < Y.len()) std::swap(X, Y); 1431cb0ef41Sopenharmony_ci if (Y.len() == 1) return MultiplySingle(Z, X, Y[0]); 1441cb0ef41Sopenharmony_ci if (Y.len() < kKaratsubaThreshold) return MultiplySchoolbook(Z, X, Y); 1451cb0ef41Sopenharmony_ci int k = KaratsubaLength(Y.len()); 1461cb0ef41Sopenharmony_ci DCHECK(scratch.len() >= 4 * k); 1471cb0ef41Sopenharmony_ci return KaratsubaStart(Z, X, Y, scratch, k); 1481cb0ef41Sopenharmony_ci} 1491cb0ef41Sopenharmony_ci 1501cb0ef41Sopenharmony_ci// The main recursive Karatsuba method. 1511cb0ef41Sopenharmony_civoid ProcessorImpl::KaratsubaMain(RWDigits Z, Digits X, Digits Y, 1521cb0ef41Sopenharmony_ci RWDigits scratch, int n) { 1531cb0ef41Sopenharmony_ci if (n < kKaratsubaThreshold) { 1541cb0ef41Sopenharmony_ci X.Normalize(); 1551cb0ef41Sopenharmony_ci Y.Normalize(); 1561cb0ef41Sopenharmony_ci if (X.len() >= Y.len()) { 1571cb0ef41Sopenharmony_ci return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), X, Y); 1581cb0ef41Sopenharmony_ci } else { 1591cb0ef41Sopenharmony_ci return MultiplySchoolbook(RWDigits(Z, 0, 2 * n), Y, X); 1601cb0ef41Sopenharmony_ci } 1611cb0ef41Sopenharmony_ci } 1621cb0ef41Sopenharmony_ci DCHECK(scratch.len() >= 4 * n); 1631cb0ef41Sopenharmony_ci DCHECK((n & 1) == 0); 1641cb0ef41Sopenharmony_ci int n2 = n >> 1; 1651cb0ef41Sopenharmony_ci Digits X0(X, 0, n2); 1661cb0ef41Sopenharmony_ci Digits X1(X, n2, n2); 1671cb0ef41Sopenharmony_ci Digits Y0(Y, 0, n2); 1681cb0ef41Sopenharmony_ci Digits Y1(Y, n2, n2); 1691cb0ef41Sopenharmony_ci RWDigits scratch_for_recursion(scratch, 2 * n, 2 * n); 1701cb0ef41Sopenharmony_ci RWDigits P0(scratch, 0, n); 1711cb0ef41Sopenharmony_ci KaratsubaMain(P0, X0, Y0, scratch_for_recursion, n2); 1721cb0ef41Sopenharmony_ci MAYBE_TERMINATE 1731cb0ef41Sopenharmony_ci for (int i = 0; i < n; i++) Z[i] = P0[i]; 1741cb0ef41Sopenharmony_ci RWDigits P2(scratch, n, n); 1751cb0ef41Sopenharmony_ci KaratsubaMain(P2, X1, Y1, scratch_for_recursion, n2); 1761cb0ef41Sopenharmony_ci MAYBE_TERMINATE 1771cb0ef41Sopenharmony_ci RWDigits Z2 = Z + n; 1781cb0ef41Sopenharmony_ci int end = std::min(Z2.len(), P2.len()); 1791cb0ef41Sopenharmony_ci for (int i = 0; i < end; i++) Z2[i] = P2[i]; 1801cb0ef41Sopenharmony_ci for (int i = end; i < n; i++) { 1811cb0ef41Sopenharmony_ci DCHECK(P2[i] == 0); 1821cb0ef41Sopenharmony_ci } 1831cb0ef41Sopenharmony_ci // The intermediate result can be one digit too large; the subtraction 1841cb0ef41Sopenharmony_ci // below will fix this. 1851cb0ef41Sopenharmony_ci digit_t overflow = AddAndReturnOverflow(Z + n2, P0); 1861cb0ef41Sopenharmony_ci overflow += AddAndReturnOverflow(Z + n2, P2); 1871cb0ef41Sopenharmony_ci RWDigits X_diff(scratch, 0, n2); 1881cb0ef41Sopenharmony_ci RWDigits Y_diff(scratch, n2, n2); 1891cb0ef41Sopenharmony_ci int sign = 1; 1901cb0ef41Sopenharmony_ci KaratsubaSubtractionHelper(X_diff, X1, X0, &sign); 1911cb0ef41Sopenharmony_ci KaratsubaSubtractionHelper(Y_diff, Y0, Y1, &sign); 1921cb0ef41Sopenharmony_ci RWDigits P1(scratch, n, n); 1931cb0ef41Sopenharmony_ci KaratsubaMain(P1, X_diff, Y_diff, scratch_for_recursion, n2); 1941cb0ef41Sopenharmony_ci if (sign > 0) { 1951cb0ef41Sopenharmony_ci overflow += AddAndReturnOverflow(Z + n2, P1); 1961cb0ef41Sopenharmony_ci } else { 1971cb0ef41Sopenharmony_ci overflow -= SubAndReturnBorrow(Z + n2, P1); 1981cb0ef41Sopenharmony_ci } 1991cb0ef41Sopenharmony_ci // The intermediate result may have been bigger, but the final result fits. 2001cb0ef41Sopenharmony_ci DCHECK(overflow == 0); 2011cb0ef41Sopenharmony_ci USE(overflow); 2021cb0ef41Sopenharmony_ci} 2031cb0ef41Sopenharmony_ci 2041cb0ef41Sopenharmony_ci#undef MAYBE_TERMINATE 2051cb0ef41Sopenharmony_ci 2061cb0ef41Sopenharmony_ci} // namespace bigint 2071cb0ef41Sopenharmony_ci} // namespace v8 208