17db96d56Sopenharmony_ci/* 27db96d56Sopenharmony_ci * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. 37db96d56Sopenharmony_ci * 47db96d56Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 57db96d56Sopenharmony_ci * modification, are permitted provided that the following conditions 67db96d56Sopenharmony_ci * are met: 77db96d56Sopenharmony_ci * 87db96d56Sopenharmony_ci * 1. Redistributions of source code must retain the above copyright 97db96d56Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 107db96d56Sopenharmony_ci * 117db96d56Sopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright 127db96d56Sopenharmony_ci * notice, this list of conditions and the following disclaimer in the 137db96d56Sopenharmony_ci * documentation and/or other materials provided with the distribution. 147db96d56Sopenharmony_ci * 157db96d56Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 167db96d56Sopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 177db96d56Sopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 187db96d56Sopenharmony_ci * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 197db96d56Sopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 207db96d56Sopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 217db96d56Sopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 227db96d56Sopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 237db96d56Sopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 247db96d56Sopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 257db96d56Sopenharmony_ci * SUCH DAMAGE. 267db96d56Sopenharmony_ci */ 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci#include "mpdecimal.h" 307db96d56Sopenharmony_ci#include "bits.h" 317db96d56Sopenharmony_ci#include "constants.h" 327db96d56Sopenharmony_ci#include "convolute.h" 337db96d56Sopenharmony_ci#include "fnt.h" 347db96d56Sopenharmony_ci#include "fourstep.h" 357db96d56Sopenharmony_ci#include "numbertheory.h" 367db96d56Sopenharmony_ci#include "sixstep.h" 377db96d56Sopenharmony_ci#include "umodarith.h" 387db96d56Sopenharmony_ci 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci/* Bignum: Fast convolution using the Number Theoretic Transform. Used for 417db96d56Sopenharmony_ci the multiplication of very large coefficients. */ 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci 447db96d56Sopenharmony_ci/* Convolute the data in c1 and c2. Result is in c1. */ 457db96d56Sopenharmony_ciint 467db96d56Sopenharmony_cifnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum) 477db96d56Sopenharmony_ci{ 487db96d56Sopenharmony_ci int (*fnt)(mpd_uint_t *, mpd_size_t, int); 497db96d56Sopenharmony_ci int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); 507db96d56Sopenharmony_ci#ifdef PPRO 517db96d56Sopenharmony_ci double dmod; 527db96d56Sopenharmony_ci uint32_t dinvmod[3]; 537db96d56Sopenharmony_ci#endif 547db96d56Sopenharmony_ci mpd_uint_t n_inv, umod; 557db96d56Sopenharmony_ci mpd_size_t i; 567db96d56Sopenharmony_ci 577db96d56Sopenharmony_ci 587db96d56Sopenharmony_ci SETMODULUS(modnum); 597db96d56Sopenharmony_ci n_inv = POWMOD(n, (umod-2)); 607db96d56Sopenharmony_ci 617db96d56Sopenharmony_ci if (ispower2(n)) { 627db96d56Sopenharmony_ci if (n > SIX_STEP_THRESHOLD) { 637db96d56Sopenharmony_ci fnt = six_step_fnt; 647db96d56Sopenharmony_ci inv_fnt = inv_six_step_fnt; 657db96d56Sopenharmony_ci } 667db96d56Sopenharmony_ci else { 677db96d56Sopenharmony_ci fnt = std_fnt; 687db96d56Sopenharmony_ci inv_fnt = std_inv_fnt; 697db96d56Sopenharmony_ci } 707db96d56Sopenharmony_ci } 717db96d56Sopenharmony_ci else { 727db96d56Sopenharmony_ci fnt = four_step_fnt; 737db96d56Sopenharmony_ci inv_fnt = inv_four_step_fnt; 747db96d56Sopenharmony_ci } 757db96d56Sopenharmony_ci 767db96d56Sopenharmony_ci if (!fnt(c1, n, modnum)) { 777db96d56Sopenharmony_ci return 0; 787db96d56Sopenharmony_ci } 797db96d56Sopenharmony_ci if (!fnt(c2, n, modnum)) { 807db96d56Sopenharmony_ci return 0; 817db96d56Sopenharmony_ci } 827db96d56Sopenharmony_ci for (i = 0; i < n-1; i += 2) { 837db96d56Sopenharmony_ci mpd_uint_t x0 = c1[i]; 847db96d56Sopenharmony_ci mpd_uint_t y0 = c2[i]; 857db96d56Sopenharmony_ci mpd_uint_t x1 = c1[i+1]; 867db96d56Sopenharmony_ci mpd_uint_t y1 = c2[i+1]; 877db96d56Sopenharmony_ci MULMOD2(&x0, y0, &x1, y1); 887db96d56Sopenharmony_ci c1[i] = x0; 897db96d56Sopenharmony_ci c1[i+1] = x1; 907db96d56Sopenharmony_ci } 917db96d56Sopenharmony_ci 927db96d56Sopenharmony_ci if (!inv_fnt(c1, n, modnum)) { 937db96d56Sopenharmony_ci return 0; 947db96d56Sopenharmony_ci } 957db96d56Sopenharmony_ci for (i = 0; i < n-3; i += 4) { 967db96d56Sopenharmony_ci mpd_uint_t x0 = c1[i]; 977db96d56Sopenharmony_ci mpd_uint_t x1 = c1[i+1]; 987db96d56Sopenharmony_ci mpd_uint_t x2 = c1[i+2]; 997db96d56Sopenharmony_ci mpd_uint_t x3 = c1[i+3]; 1007db96d56Sopenharmony_ci MULMOD2C(&x0, &x1, n_inv); 1017db96d56Sopenharmony_ci MULMOD2C(&x2, &x3, n_inv); 1027db96d56Sopenharmony_ci c1[i] = x0; 1037db96d56Sopenharmony_ci c1[i+1] = x1; 1047db96d56Sopenharmony_ci c1[i+2] = x2; 1057db96d56Sopenharmony_ci c1[i+3] = x3; 1067db96d56Sopenharmony_ci } 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci return 1; 1097db96d56Sopenharmony_ci} 1107db96d56Sopenharmony_ci 1117db96d56Sopenharmony_ci/* Autoconvolute the data in c1. Result is in c1. */ 1127db96d56Sopenharmony_ciint 1137db96d56Sopenharmony_cifnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum) 1147db96d56Sopenharmony_ci{ 1157db96d56Sopenharmony_ci int (*fnt)(mpd_uint_t *, mpd_size_t, int); 1167db96d56Sopenharmony_ci int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); 1177db96d56Sopenharmony_ci#ifdef PPRO 1187db96d56Sopenharmony_ci double dmod; 1197db96d56Sopenharmony_ci uint32_t dinvmod[3]; 1207db96d56Sopenharmony_ci#endif 1217db96d56Sopenharmony_ci mpd_uint_t n_inv, umod; 1227db96d56Sopenharmony_ci mpd_size_t i; 1237db96d56Sopenharmony_ci 1247db96d56Sopenharmony_ci 1257db96d56Sopenharmony_ci SETMODULUS(modnum); 1267db96d56Sopenharmony_ci n_inv = POWMOD(n, (umod-2)); 1277db96d56Sopenharmony_ci 1287db96d56Sopenharmony_ci if (ispower2(n)) { 1297db96d56Sopenharmony_ci if (n > SIX_STEP_THRESHOLD) { 1307db96d56Sopenharmony_ci fnt = six_step_fnt; 1317db96d56Sopenharmony_ci inv_fnt = inv_six_step_fnt; 1327db96d56Sopenharmony_ci } 1337db96d56Sopenharmony_ci else { 1347db96d56Sopenharmony_ci fnt = std_fnt; 1357db96d56Sopenharmony_ci inv_fnt = std_inv_fnt; 1367db96d56Sopenharmony_ci } 1377db96d56Sopenharmony_ci } 1387db96d56Sopenharmony_ci else { 1397db96d56Sopenharmony_ci fnt = four_step_fnt; 1407db96d56Sopenharmony_ci inv_fnt = inv_four_step_fnt; 1417db96d56Sopenharmony_ci } 1427db96d56Sopenharmony_ci 1437db96d56Sopenharmony_ci if (!fnt(c1, n, modnum)) { 1447db96d56Sopenharmony_ci return 0; 1457db96d56Sopenharmony_ci } 1467db96d56Sopenharmony_ci for (i = 0; i < n-1; i += 2) { 1477db96d56Sopenharmony_ci mpd_uint_t x0 = c1[i]; 1487db96d56Sopenharmony_ci mpd_uint_t x1 = c1[i+1]; 1497db96d56Sopenharmony_ci MULMOD2(&x0, x0, &x1, x1); 1507db96d56Sopenharmony_ci c1[i] = x0; 1517db96d56Sopenharmony_ci c1[i+1] = x1; 1527db96d56Sopenharmony_ci } 1537db96d56Sopenharmony_ci 1547db96d56Sopenharmony_ci if (!inv_fnt(c1, n, modnum)) { 1557db96d56Sopenharmony_ci return 0; 1567db96d56Sopenharmony_ci } 1577db96d56Sopenharmony_ci for (i = 0; i < n-3; i += 4) { 1587db96d56Sopenharmony_ci mpd_uint_t x0 = c1[i]; 1597db96d56Sopenharmony_ci mpd_uint_t x1 = c1[i+1]; 1607db96d56Sopenharmony_ci mpd_uint_t x2 = c1[i+2]; 1617db96d56Sopenharmony_ci mpd_uint_t x3 = c1[i+3]; 1627db96d56Sopenharmony_ci MULMOD2C(&x0, &x1, n_inv); 1637db96d56Sopenharmony_ci MULMOD2C(&x2, &x3, n_inv); 1647db96d56Sopenharmony_ci c1[i] = x0; 1657db96d56Sopenharmony_ci c1[i+1] = x1; 1667db96d56Sopenharmony_ci c1[i+2] = x2; 1677db96d56Sopenharmony_ci c1[i+3] = x3; 1687db96d56Sopenharmony_ci } 1697db96d56Sopenharmony_ci 1707db96d56Sopenharmony_ci return 1; 1717db96d56Sopenharmony_ci} 172