17db96d56Sopenharmony_ci/*
27db96d56Sopenharmony_ci * Copyright (c) 2008-2020 Stefan Krah. All rights reserved.
37db96d56Sopenharmony_ci *
47db96d56Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
57db96d56Sopenharmony_ci * modification, are permitted provided that the following conditions
67db96d56Sopenharmony_ci * are met:
77db96d56Sopenharmony_ci *
87db96d56Sopenharmony_ci * 1. Redistributions of source code must retain the above copyright
97db96d56Sopenharmony_ci *    notice, this list of conditions and the following disclaimer.
107db96d56Sopenharmony_ci *
117db96d56Sopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright
127db96d56Sopenharmony_ci *    notice, this list of conditions and the following disclaimer in the
137db96d56Sopenharmony_ci *    documentation and/or other materials provided with the distribution.
147db96d56Sopenharmony_ci *
157db96d56Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
167db96d56Sopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
177db96d56Sopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
187db96d56Sopenharmony_ci * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
197db96d56Sopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
207db96d56Sopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
217db96d56Sopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
227db96d56Sopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
237db96d56Sopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
247db96d56Sopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
257db96d56Sopenharmony_ci * SUCH DAMAGE.
267db96d56Sopenharmony_ci */
277db96d56Sopenharmony_ci
287db96d56Sopenharmony_ci
297db96d56Sopenharmony_ci#include "mpdecimal.h"
307db96d56Sopenharmony_ci
317db96d56Sopenharmony_ci#include <assert.h>
327db96d56Sopenharmony_ci
337db96d56Sopenharmony_ci#include "bits.h"
347db96d56Sopenharmony_ci#include "constants.h"
357db96d56Sopenharmony_ci#include "difradix2.h"
367db96d56Sopenharmony_ci#include "numbertheory.h"
377db96d56Sopenharmony_ci#include "umodarith.h"
387db96d56Sopenharmony_ci
397db96d56Sopenharmony_ci
407db96d56Sopenharmony_ci/* Bignum: The actual transform routine (decimation in frequency). */
417db96d56Sopenharmony_ci
427db96d56Sopenharmony_ci
437db96d56Sopenharmony_ci/*
447db96d56Sopenharmony_ci * Generate index pairs (x, bitreverse(x)) and carry out the permutation.
457db96d56Sopenharmony_ci * n must be a power of two.
467db96d56Sopenharmony_ci * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",
477db96d56Sopenharmony_ci * Chapter 1.14.4. [http://www.jjj.de/fxt/]
487db96d56Sopenharmony_ci */
497db96d56Sopenharmony_cistatic inline void
507db96d56Sopenharmony_cibitreverse_permute(mpd_uint_t a[], mpd_size_t n)
517db96d56Sopenharmony_ci{
527db96d56Sopenharmony_ci    mpd_size_t x = 0;
537db96d56Sopenharmony_ci    mpd_size_t r = 0;
547db96d56Sopenharmony_ci    mpd_uint_t t;
557db96d56Sopenharmony_ci
567db96d56Sopenharmony_ci    do { /* Invariant: r = bitreverse(x) */
577db96d56Sopenharmony_ci        if (r > x) {
587db96d56Sopenharmony_ci            t = a[x];
597db96d56Sopenharmony_ci            a[x] = a[r];
607db96d56Sopenharmony_ci            a[r] = t;
617db96d56Sopenharmony_ci        }
627db96d56Sopenharmony_ci        /* Flip trailing consecutive 1 bits and the first zero bit
637db96d56Sopenharmony_ci         * that absorbs a possible carry. */
647db96d56Sopenharmony_ci        x += 1;
657db96d56Sopenharmony_ci        /* Mirror the operation on r: Flip n_trailing_zeros(x)+1
667db96d56Sopenharmony_ci           high bits of r. */
677db96d56Sopenharmony_ci        r ^= (n - (n >> (mpd_bsf(x)+1)));
687db96d56Sopenharmony_ci        /* The loop invariant is preserved. */
697db96d56Sopenharmony_ci    } while (x < n);
707db96d56Sopenharmony_ci}
717db96d56Sopenharmony_ci
727db96d56Sopenharmony_ci
737db96d56Sopenharmony_ci/* Fast Number Theoretic Transform, decimation in frequency. */
747db96d56Sopenharmony_civoid
757db96d56Sopenharmony_cifnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
767db96d56Sopenharmony_ci{
777db96d56Sopenharmony_ci    mpd_uint_t *wtable = tparams->wtable;
787db96d56Sopenharmony_ci    mpd_uint_t umod;
797db96d56Sopenharmony_ci#ifdef PPRO
807db96d56Sopenharmony_ci    double dmod;
817db96d56Sopenharmony_ci    uint32_t dinvmod[3];
827db96d56Sopenharmony_ci#endif
837db96d56Sopenharmony_ci    mpd_uint_t u0, u1, v0, v1;
847db96d56Sopenharmony_ci    mpd_uint_t w, w0, w1, wstep;
857db96d56Sopenharmony_ci    mpd_size_t m, mhalf;
867db96d56Sopenharmony_ci    mpd_size_t j, r;
877db96d56Sopenharmony_ci
887db96d56Sopenharmony_ci
897db96d56Sopenharmony_ci    assert(ispower2(n));
907db96d56Sopenharmony_ci    assert(n >= 4);
917db96d56Sopenharmony_ci
927db96d56Sopenharmony_ci    SETMODULUS(tparams->modnum);
937db96d56Sopenharmony_ci
947db96d56Sopenharmony_ci    /* m == n */
957db96d56Sopenharmony_ci    mhalf = n / 2;
967db96d56Sopenharmony_ci    for (j = 0; j < mhalf; j += 2) {
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci        w0 = wtable[j];
997db96d56Sopenharmony_ci        w1 = wtable[j+1];
1007db96d56Sopenharmony_ci
1017db96d56Sopenharmony_ci        u0 = a[j];
1027db96d56Sopenharmony_ci        v0 = a[j+mhalf];
1037db96d56Sopenharmony_ci
1047db96d56Sopenharmony_ci        u1 = a[j+1];
1057db96d56Sopenharmony_ci        v1 = a[j+1+mhalf];
1067db96d56Sopenharmony_ci
1077db96d56Sopenharmony_ci        a[j] = addmod(u0, v0, umod);
1087db96d56Sopenharmony_ci        v0 = submod(u0, v0, umod);
1097db96d56Sopenharmony_ci
1107db96d56Sopenharmony_ci        a[j+1] = addmod(u1, v1, umod);
1117db96d56Sopenharmony_ci        v1 = submod(u1, v1, umod);
1127db96d56Sopenharmony_ci
1137db96d56Sopenharmony_ci        MULMOD2(&v0, w0, &v1, w1);
1147db96d56Sopenharmony_ci
1157db96d56Sopenharmony_ci        a[j+mhalf] = v0;
1167db96d56Sopenharmony_ci        a[j+1+mhalf] = v1;
1177db96d56Sopenharmony_ci
1187db96d56Sopenharmony_ci    }
1197db96d56Sopenharmony_ci
1207db96d56Sopenharmony_ci    wstep = 2;
1217db96d56Sopenharmony_ci    for (m = n/2; m >= 2; m>>=1, wstep<<=1) {
1227db96d56Sopenharmony_ci
1237db96d56Sopenharmony_ci        mhalf = m / 2;
1247db96d56Sopenharmony_ci
1257db96d56Sopenharmony_ci        /* j == 0 */
1267db96d56Sopenharmony_ci        for (r = 0; r < n; r += 2*m) {
1277db96d56Sopenharmony_ci
1287db96d56Sopenharmony_ci            u0 = a[r];
1297db96d56Sopenharmony_ci            v0 = a[r+mhalf];
1307db96d56Sopenharmony_ci
1317db96d56Sopenharmony_ci            u1 = a[m+r];
1327db96d56Sopenharmony_ci            v1 = a[m+r+mhalf];
1337db96d56Sopenharmony_ci
1347db96d56Sopenharmony_ci            a[r] = addmod(u0, v0, umod);
1357db96d56Sopenharmony_ci            v0 = submod(u0, v0, umod);
1367db96d56Sopenharmony_ci
1377db96d56Sopenharmony_ci            a[m+r] = addmod(u1, v1, umod);
1387db96d56Sopenharmony_ci            v1 = submod(u1, v1, umod);
1397db96d56Sopenharmony_ci
1407db96d56Sopenharmony_ci            a[r+mhalf] = v0;
1417db96d56Sopenharmony_ci            a[m+r+mhalf] = v1;
1427db96d56Sopenharmony_ci        }
1437db96d56Sopenharmony_ci
1447db96d56Sopenharmony_ci        for (j = 1; j < mhalf; j++) {
1457db96d56Sopenharmony_ci
1467db96d56Sopenharmony_ci            w = wtable[j*wstep];
1477db96d56Sopenharmony_ci
1487db96d56Sopenharmony_ci            for (r = 0; r < n; r += 2*m) {
1497db96d56Sopenharmony_ci
1507db96d56Sopenharmony_ci                u0 = a[r+j];
1517db96d56Sopenharmony_ci                v0 = a[r+j+mhalf];
1527db96d56Sopenharmony_ci
1537db96d56Sopenharmony_ci                u1 = a[m+r+j];
1547db96d56Sopenharmony_ci                v1 = a[m+r+j+mhalf];
1557db96d56Sopenharmony_ci
1567db96d56Sopenharmony_ci                a[r+j] = addmod(u0, v0, umod);
1577db96d56Sopenharmony_ci                v0 = submod(u0, v0, umod);
1587db96d56Sopenharmony_ci
1597db96d56Sopenharmony_ci                a[m+r+j] = addmod(u1, v1, umod);
1607db96d56Sopenharmony_ci                v1 = submod(u1, v1, umod);
1617db96d56Sopenharmony_ci
1627db96d56Sopenharmony_ci                MULMOD2C(&v0, &v1, w);
1637db96d56Sopenharmony_ci
1647db96d56Sopenharmony_ci                a[r+j+mhalf] = v0;
1657db96d56Sopenharmony_ci                a[m+r+j+mhalf] = v1;
1667db96d56Sopenharmony_ci            }
1677db96d56Sopenharmony_ci
1687db96d56Sopenharmony_ci        }
1697db96d56Sopenharmony_ci
1707db96d56Sopenharmony_ci    }
1717db96d56Sopenharmony_ci
1727db96d56Sopenharmony_ci    bitreverse_permute(a, n);
1737db96d56Sopenharmony_ci}
174