17db96d56Sopenharmony_ci/* 27db96d56Sopenharmony_ci * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. 37db96d56Sopenharmony_ci * 47db96d56Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 57db96d56Sopenharmony_ci * modification, are permitted provided that the following conditions 67db96d56Sopenharmony_ci * are met: 77db96d56Sopenharmony_ci * 87db96d56Sopenharmony_ci * 1. Redistributions of source code must retain the above copyright 97db96d56Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 107db96d56Sopenharmony_ci * 117db96d56Sopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright 127db96d56Sopenharmony_ci * notice, this list of conditions and the following disclaimer in the 137db96d56Sopenharmony_ci * documentation and/or other materials provided with the distribution. 147db96d56Sopenharmony_ci * 157db96d56Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 167db96d56Sopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 177db96d56Sopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 187db96d56Sopenharmony_ci * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 197db96d56Sopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 207db96d56Sopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 217db96d56Sopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 227db96d56Sopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 237db96d56Sopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 247db96d56Sopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 257db96d56Sopenharmony_ci * SUCH DAMAGE. 267db96d56Sopenharmony_ci */ 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci#include "mpdecimal.h" 307db96d56Sopenharmony_ci 317db96d56Sopenharmony_ci#include <assert.h> 327db96d56Sopenharmony_ci 337db96d56Sopenharmony_ci#include "bits.h" 347db96d56Sopenharmony_ci#include "constants.h" 357db96d56Sopenharmony_ci#include "difradix2.h" 367db96d56Sopenharmony_ci#include "numbertheory.h" 377db96d56Sopenharmony_ci#include "umodarith.h" 387db96d56Sopenharmony_ci 397db96d56Sopenharmony_ci 407db96d56Sopenharmony_ci/* Bignum: The actual transform routine (decimation in frequency). */ 417db96d56Sopenharmony_ci 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci/* 447db96d56Sopenharmony_ci * Generate index pairs (x, bitreverse(x)) and carry out the permutation. 457db96d56Sopenharmony_ci * n must be a power of two. 467db96d56Sopenharmony_ci * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational", 477db96d56Sopenharmony_ci * Chapter 1.14.4. [http://www.jjj.de/fxt/] 487db96d56Sopenharmony_ci */ 497db96d56Sopenharmony_cistatic inline void 507db96d56Sopenharmony_cibitreverse_permute(mpd_uint_t a[], mpd_size_t n) 517db96d56Sopenharmony_ci{ 527db96d56Sopenharmony_ci mpd_size_t x = 0; 537db96d56Sopenharmony_ci mpd_size_t r = 0; 547db96d56Sopenharmony_ci mpd_uint_t t; 557db96d56Sopenharmony_ci 567db96d56Sopenharmony_ci do { /* Invariant: r = bitreverse(x) */ 577db96d56Sopenharmony_ci if (r > x) { 587db96d56Sopenharmony_ci t = a[x]; 597db96d56Sopenharmony_ci a[x] = a[r]; 607db96d56Sopenharmony_ci a[r] = t; 617db96d56Sopenharmony_ci } 627db96d56Sopenharmony_ci /* Flip trailing consecutive 1 bits and the first zero bit 637db96d56Sopenharmony_ci * that absorbs a possible carry. */ 647db96d56Sopenharmony_ci x += 1; 657db96d56Sopenharmony_ci /* Mirror the operation on r: Flip n_trailing_zeros(x)+1 667db96d56Sopenharmony_ci high bits of r. */ 677db96d56Sopenharmony_ci r ^= (n - (n >> (mpd_bsf(x)+1))); 687db96d56Sopenharmony_ci /* The loop invariant is preserved. */ 697db96d56Sopenharmony_ci } while (x < n); 707db96d56Sopenharmony_ci} 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci 737db96d56Sopenharmony_ci/* Fast Number Theoretic Transform, decimation in frequency. */ 747db96d56Sopenharmony_civoid 757db96d56Sopenharmony_cifnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams) 767db96d56Sopenharmony_ci{ 777db96d56Sopenharmony_ci mpd_uint_t *wtable = tparams->wtable; 787db96d56Sopenharmony_ci mpd_uint_t umod; 797db96d56Sopenharmony_ci#ifdef PPRO 807db96d56Sopenharmony_ci double dmod; 817db96d56Sopenharmony_ci uint32_t dinvmod[3]; 827db96d56Sopenharmony_ci#endif 837db96d56Sopenharmony_ci mpd_uint_t u0, u1, v0, v1; 847db96d56Sopenharmony_ci mpd_uint_t w, w0, w1, wstep; 857db96d56Sopenharmony_ci mpd_size_t m, mhalf; 867db96d56Sopenharmony_ci mpd_size_t j, r; 877db96d56Sopenharmony_ci 887db96d56Sopenharmony_ci 897db96d56Sopenharmony_ci assert(ispower2(n)); 907db96d56Sopenharmony_ci assert(n >= 4); 917db96d56Sopenharmony_ci 927db96d56Sopenharmony_ci SETMODULUS(tparams->modnum); 937db96d56Sopenharmony_ci 947db96d56Sopenharmony_ci /* m == n */ 957db96d56Sopenharmony_ci mhalf = n / 2; 967db96d56Sopenharmony_ci for (j = 0; j < mhalf; j += 2) { 977db96d56Sopenharmony_ci 987db96d56Sopenharmony_ci w0 = wtable[j]; 997db96d56Sopenharmony_ci w1 = wtable[j+1]; 1007db96d56Sopenharmony_ci 1017db96d56Sopenharmony_ci u0 = a[j]; 1027db96d56Sopenharmony_ci v0 = a[j+mhalf]; 1037db96d56Sopenharmony_ci 1047db96d56Sopenharmony_ci u1 = a[j+1]; 1057db96d56Sopenharmony_ci v1 = a[j+1+mhalf]; 1067db96d56Sopenharmony_ci 1077db96d56Sopenharmony_ci a[j] = addmod(u0, v0, umod); 1087db96d56Sopenharmony_ci v0 = submod(u0, v0, umod); 1097db96d56Sopenharmony_ci 1107db96d56Sopenharmony_ci a[j+1] = addmod(u1, v1, umod); 1117db96d56Sopenharmony_ci v1 = submod(u1, v1, umod); 1127db96d56Sopenharmony_ci 1137db96d56Sopenharmony_ci MULMOD2(&v0, w0, &v1, w1); 1147db96d56Sopenharmony_ci 1157db96d56Sopenharmony_ci a[j+mhalf] = v0; 1167db96d56Sopenharmony_ci a[j+1+mhalf] = v1; 1177db96d56Sopenharmony_ci 1187db96d56Sopenharmony_ci } 1197db96d56Sopenharmony_ci 1207db96d56Sopenharmony_ci wstep = 2; 1217db96d56Sopenharmony_ci for (m = n/2; m >= 2; m>>=1, wstep<<=1) { 1227db96d56Sopenharmony_ci 1237db96d56Sopenharmony_ci mhalf = m / 2; 1247db96d56Sopenharmony_ci 1257db96d56Sopenharmony_ci /* j == 0 */ 1267db96d56Sopenharmony_ci for (r = 0; r < n; r += 2*m) { 1277db96d56Sopenharmony_ci 1287db96d56Sopenharmony_ci u0 = a[r]; 1297db96d56Sopenharmony_ci v0 = a[r+mhalf]; 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_ci u1 = a[m+r]; 1327db96d56Sopenharmony_ci v1 = a[m+r+mhalf]; 1337db96d56Sopenharmony_ci 1347db96d56Sopenharmony_ci a[r] = addmod(u0, v0, umod); 1357db96d56Sopenharmony_ci v0 = submod(u0, v0, umod); 1367db96d56Sopenharmony_ci 1377db96d56Sopenharmony_ci a[m+r] = addmod(u1, v1, umod); 1387db96d56Sopenharmony_ci v1 = submod(u1, v1, umod); 1397db96d56Sopenharmony_ci 1407db96d56Sopenharmony_ci a[r+mhalf] = v0; 1417db96d56Sopenharmony_ci a[m+r+mhalf] = v1; 1427db96d56Sopenharmony_ci } 1437db96d56Sopenharmony_ci 1447db96d56Sopenharmony_ci for (j = 1; j < mhalf; j++) { 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_ci w = wtable[j*wstep]; 1477db96d56Sopenharmony_ci 1487db96d56Sopenharmony_ci for (r = 0; r < n; r += 2*m) { 1497db96d56Sopenharmony_ci 1507db96d56Sopenharmony_ci u0 = a[r+j]; 1517db96d56Sopenharmony_ci v0 = a[r+j+mhalf]; 1527db96d56Sopenharmony_ci 1537db96d56Sopenharmony_ci u1 = a[m+r+j]; 1547db96d56Sopenharmony_ci v1 = a[m+r+j+mhalf]; 1557db96d56Sopenharmony_ci 1567db96d56Sopenharmony_ci a[r+j] = addmod(u0, v0, umod); 1577db96d56Sopenharmony_ci v0 = submod(u0, v0, umod); 1587db96d56Sopenharmony_ci 1597db96d56Sopenharmony_ci a[m+r+j] = addmod(u1, v1, umod); 1607db96d56Sopenharmony_ci v1 = submod(u1, v1, umod); 1617db96d56Sopenharmony_ci 1627db96d56Sopenharmony_ci MULMOD2C(&v0, &v1, w); 1637db96d56Sopenharmony_ci 1647db96d56Sopenharmony_ci a[r+j+mhalf] = v0; 1657db96d56Sopenharmony_ci a[m+r+j+mhalf] = v1; 1667db96d56Sopenharmony_ci } 1677db96d56Sopenharmony_ci 1687db96d56Sopenharmony_ci } 1697db96d56Sopenharmony_ci 1707db96d56Sopenharmony_ci } 1717db96d56Sopenharmony_ci 1727db96d56Sopenharmony_ci bitreverse_permute(a, n); 1737db96d56Sopenharmony_ci} 174