17db96d56Sopenharmony_ci/*
27db96d56Sopenharmony_ci * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
37db96d56Sopenharmony_ci *
47db96d56Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
57db96d56Sopenharmony_ci * modification, are permitted provided that the following conditions
67db96d56Sopenharmony_ci * are met:
77db96d56Sopenharmony_ci *
87db96d56Sopenharmony_ci * 1. Redistributions of source code must retain the above copyright
97db96d56Sopenharmony_ci *    notice, this list of conditions and the following disclaimer.
107db96d56Sopenharmony_ci *
117db96d56Sopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright
127db96d56Sopenharmony_ci *    notice, this list of conditions and the following disclaimer in the
137db96d56Sopenharmony_ci *    documentation and/or other materials provided with the distribution.
147db96d56Sopenharmony_ci *
157db96d56Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
167db96d56Sopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
177db96d56Sopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
187db96d56Sopenharmony_ci * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
197db96d56Sopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
207db96d56Sopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
217db96d56Sopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
227db96d56Sopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
237db96d56Sopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
247db96d56Sopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
257db96d56Sopenharmony_ci * SUCH DAMAGE.
267db96d56Sopenharmony_ci */
277db96d56Sopenharmony_ci
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci#include "mpdecimal.h"
307db96d56Sopenharmony_ci
317db96d56Sopenharmony_ci#include <assert.h>
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci#include "constants.h"
347db96d56Sopenharmony_ci#include "crt.h"
357db96d56Sopenharmony_ci#include "numbertheory.h"
367db96d56Sopenharmony_ci#include "typearith.h"
377db96d56Sopenharmony_ci#include "umodarith.h"
387db96d56Sopenharmony_ci
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci/* Bignum: Chinese Remainder Theorem, extends the maximum transform length. */
417db96d56Sopenharmony_ci
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci/* Multiply P1P2 by v, store result in w. */
447db96d56Sopenharmony_cistatic inline void
457db96d56Sopenharmony_ci_crt_mulP1P2_3(mpd_uint_t w[3], mpd_uint_t v)
467db96d56Sopenharmony_ci{
477db96d56Sopenharmony_ci    mpd_uint_t hi1, hi2, lo;
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_ci    _mpd_mul_words(&hi1, &lo, LH_P1P2, v);
507db96d56Sopenharmony_ci    w[0] = lo;
517db96d56Sopenharmony_ci
527db96d56Sopenharmony_ci    _mpd_mul_words(&hi2, &lo, UH_P1P2, v);
537db96d56Sopenharmony_ci    lo = hi1 + lo;
547db96d56Sopenharmony_ci    if (lo < hi1) hi2++;
557db96d56Sopenharmony_ci
567db96d56Sopenharmony_ci    w[1] = lo;
577db96d56Sopenharmony_ci    w[2] = hi2;
587db96d56Sopenharmony_ci}
597db96d56Sopenharmony_ci
607db96d56Sopenharmony_ci/* Add 3 words from v to w. The result is known to fit in w. */
617db96d56Sopenharmony_cistatic inline void
627db96d56Sopenharmony_ci_crt_add3(mpd_uint_t w[3], mpd_uint_t v[3])
637db96d56Sopenharmony_ci{
647db96d56Sopenharmony_ci    mpd_uint_t carry;
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci    w[0] = w[0] + v[0];
677db96d56Sopenharmony_ci    carry = (w[0] < v[0]);
687db96d56Sopenharmony_ci
697db96d56Sopenharmony_ci    w[1] = w[1] + v[1];
707db96d56Sopenharmony_ci    if (w[1] < v[1]) w[2]++;
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci    w[1] = w[1] + carry;
737db96d56Sopenharmony_ci    if (w[1] < carry) w[2]++;
747db96d56Sopenharmony_ci
757db96d56Sopenharmony_ci    w[2] += v[2];
767db96d56Sopenharmony_ci}
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ci/* Divide 3 words in u by v, store result in w, return remainder. */
797db96d56Sopenharmony_cistatic inline mpd_uint_t
807db96d56Sopenharmony_ci_crt_div3(mpd_uint_t *w, const mpd_uint_t *u, mpd_uint_t v)
817db96d56Sopenharmony_ci{
827db96d56Sopenharmony_ci    mpd_uint_t r1 = u[2];
837db96d56Sopenharmony_ci    mpd_uint_t r2;
847db96d56Sopenharmony_ci
857db96d56Sopenharmony_ci    if (r1 < v) {
867db96d56Sopenharmony_ci        w[2] = 0;
877db96d56Sopenharmony_ci    }
887db96d56Sopenharmony_ci    else {
897db96d56Sopenharmony_ci        _mpd_div_word(&w[2], &r1, u[2], v); /* GCOV_NOT_REACHED */
907db96d56Sopenharmony_ci    }
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_ci    _mpd_div_words(&w[1], &r2, r1, u[1], v);
937db96d56Sopenharmony_ci    _mpd_div_words(&w[0], &r1, r2, u[0], v);
947db96d56Sopenharmony_ci
957db96d56Sopenharmony_ci    return r1;
967db96d56Sopenharmony_ci}
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci
997db96d56Sopenharmony_ci/*
1007db96d56Sopenharmony_ci * Chinese Remainder Theorem:
1017db96d56Sopenharmony_ci * Algorithm from Joerg Arndt, "Matters Computational",
1027db96d56Sopenharmony_ci * Chapter 37.4.1 [http://www.jjj.de/fxt/]
1037db96d56Sopenharmony_ci *
1047db96d56Sopenharmony_ci * See also Knuth, TAOCP, Volume 2, 4.3.2, exercise 7.
1057db96d56Sopenharmony_ci */
1067db96d56Sopenharmony_ci
1077db96d56Sopenharmony_ci/*
1087db96d56Sopenharmony_ci * CRT with carry: x1, x2, x3 contain numbers modulo p1, p2, p3. For each
1097db96d56Sopenharmony_ci * triple of members of the arrays, find the unique z modulo p1*p2*p3, with
1107db96d56Sopenharmony_ci * zmax = p1*p2*p3 - 1.
1117db96d56Sopenharmony_ci *
1127db96d56Sopenharmony_ci * In each iteration of the loop, split z into result[i] = z % MPD_RADIX
1137db96d56Sopenharmony_ci * and carry = z / MPD_RADIX. Let N be the size of carry[] and cmax the
1147db96d56Sopenharmony_ci * maximum carry.
1157db96d56Sopenharmony_ci *
1167db96d56Sopenharmony_ci * Limits for the 32-bit build:
1177db96d56Sopenharmony_ci *
1187db96d56Sopenharmony_ci *   N    = 2**96
1197db96d56Sopenharmony_ci *   cmax = 7711435591312380274
1207db96d56Sopenharmony_ci *
1217db96d56Sopenharmony_ci * Limits for the 64 bit build:
1227db96d56Sopenharmony_ci *
1237db96d56Sopenharmony_ci *   N    = 2**192
1247db96d56Sopenharmony_ci *   cmax = 627710135393475385904124401220046371710
1257db96d56Sopenharmony_ci *
1267db96d56Sopenharmony_ci * The following statements hold for both versions:
1277db96d56Sopenharmony_ci *
1287db96d56Sopenharmony_ci *   1) cmax + zmax < N, so the addition does not overflow.
1297db96d56Sopenharmony_ci *
1307db96d56Sopenharmony_ci *   2) (cmax + zmax) / MPD_RADIX == cmax.
1317db96d56Sopenharmony_ci *
1327db96d56Sopenharmony_ci *   3) If c <= cmax, then c_next = (c + zmax) / MPD_RADIX <= cmax.
1337db96d56Sopenharmony_ci */
1347db96d56Sopenharmony_civoid
1357db96d56Sopenharmony_cicrt3(mpd_uint_t *x1, mpd_uint_t *x2, mpd_uint_t *x3, mpd_size_t rsize)
1367db96d56Sopenharmony_ci{
1377db96d56Sopenharmony_ci    mpd_uint_t p1 = mpd_moduli[P1];
1387db96d56Sopenharmony_ci    mpd_uint_t umod;
1397db96d56Sopenharmony_ci#ifdef PPRO
1407db96d56Sopenharmony_ci    double dmod;
1417db96d56Sopenharmony_ci    uint32_t dinvmod[3];
1427db96d56Sopenharmony_ci#endif
1437db96d56Sopenharmony_ci    mpd_uint_t a1, a2, a3;
1447db96d56Sopenharmony_ci    mpd_uint_t s;
1457db96d56Sopenharmony_ci    mpd_uint_t z[3], t[3];
1467db96d56Sopenharmony_ci    mpd_uint_t carry[3] = {0,0,0};
1477db96d56Sopenharmony_ci    mpd_uint_t hi, lo;
1487db96d56Sopenharmony_ci    mpd_size_t i;
1497db96d56Sopenharmony_ci
1507db96d56Sopenharmony_ci    for (i = 0; i < rsize; i++) {
1517db96d56Sopenharmony_ci
1527db96d56Sopenharmony_ci        a1 = x1[i];
1537db96d56Sopenharmony_ci        a2 = x2[i];
1547db96d56Sopenharmony_ci        a3 = x3[i];
1557db96d56Sopenharmony_ci
1567db96d56Sopenharmony_ci        SETMODULUS(P2);
1577db96d56Sopenharmony_ci        s = ext_submod(a2, a1, umod);
1587db96d56Sopenharmony_ci        s = MULMOD(s, INV_P1_MOD_P2);
1597db96d56Sopenharmony_ci
1607db96d56Sopenharmony_ci        _mpd_mul_words(&hi, &lo, s, p1);
1617db96d56Sopenharmony_ci        lo = lo + a1;
1627db96d56Sopenharmony_ci        if (lo < a1) hi++;
1637db96d56Sopenharmony_ci
1647db96d56Sopenharmony_ci        SETMODULUS(P3);
1657db96d56Sopenharmony_ci        s = dw_submod(a3, hi, lo, umod);
1667db96d56Sopenharmony_ci        s = MULMOD(s, INV_P1P2_MOD_P3);
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_ci        z[0] = lo;
1697db96d56Sopenharmony_ci        z[1] = hi;
1707db96d56Sopenharmony_ci        z[2] = 0;
1717db96d56Sopenharmony_ci
1727db96d56Sopenharmony_ci        _crt_mulP1P2_3(t, s);
1737db96d56Sopenharmony_ci        _crt_add3(z, t);
1747db96d56Sopenharmony_ci        _crt_add3(carry, z);
1757db96d56Sopenharmony_ci
1767db96d56Sopenharmony_ci        x1[i] = _crt_div3(carry, carry, MPD_RADIX);
1777db96d56Sopenharmony_ci    }
1787db96d56Sopenharmony_ci
1797db96d56Sopenharmony_ci    assert(carry[0] == 0 && carry[1] == 0 && carry[2] == 0);
1807db96d56Sopenharmony_ci}
181