17db96d56Sopenharmony_ci/* 27db96d56Sopenharmony_ci * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. 37db96d56Sopenharmony_ci * 47db96d56Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 57db96d56Sopenharmony_ci * modification, are permitted provided that the following conditions 67db96d56Sopenharmony_ci * are met: 77db96d56Sopenharmony_ci * 87db96d56Sopenharmony_ci * 1. Redistributions of source code must retain the above copyright 97db96d56Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 107db96d56Sopenharmony_ci * 117db96d56Sopenharmony_ci * 2. Redistributions in binary form must reproduce the above copyright 127db96d56Sopenharmony_ci * notice, this list of conditions and the following disclaimer in the 137db96d56Sopenharmony_ci * documentation and/or other materials provided with the distribution. 147db96d56Sopenharmony_ci * 157db96d56Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 167db96d56Sopenharmony_ci * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 177db96d56Sopenharmony_ci * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 187db96d56Sopenharmony_ci * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 197db96d56Sopenharmony_ci * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 207db96d56Sopenharmony_ci * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 217db96d56Sopenharmony_ci * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 227db96d56Sopenharmony_ci * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 237db96d56Sopenharmony_ci * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 247db96d56Sopenharmony_ci * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 257db96d56Sopenharmony_ci * SUCH DAMAGE. 267db96d56Sopenharmony_ci */ 277db96d56Sopenharmony_ci 287db96d56Sopenharmony_ci 297db96d56Sopenharmony_ci#include "mpdecimal.h" 307db96d56Sopenharmony_ci 317db96d56Sopenharmony_ci#include <assert.h> 327db96d56Sopenharmony_ci#include <stdio.h> 337db96d56Sopenharmony_ci 347db96d56Sopenharmony_ci#include "bits.h" 357db96d56Sopenharmony_ci#include "constants.h" 367db96d56Sopenharmony_ci#include "difradix2.h" 377db96d56Sopenharmony_ci#include "numbertheory.h" 387db96d56Sopenharmony_ci#include "sixstep.h" 397db96d56Sopenharmony_ci#include "transpose.h" 407db96d56Sopenharmony_ci#include "umodarith.h" 417db96d56Sopenharmony_ci 427db96d56Sopenharmony_ci 437db96d56Sopenharmony_ci/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the 447db96d56Sopenharmony_ci form 2**n (See literature/six-step.txt). */ 457db96d56Sopenharmony_ci 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_ci/* forward transform with sign = -1 */ 487db96d56Sopenharmony_ciint 497db96d56Sopenharmony_cisix_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum) 507db96d56Sopenharmony_ci{ 517db96d56Sopenharmony_ci struct fnt_params *tparams; 527db96d56Sopenharmony_ci mpd_size_t log2n, C, R; 537db96d56Sopenharmony_ci mpd_uint_t kernel; 547db96d56Sopenharmony_ci mpd_uint_t umod; 557db96d56Sopenharmony_ci#ifdef PPRO 567db96d56Sopenharmony_ci double dmod; 577db96d56Sopenharmony_ci uint32_t dinvmod[3]; 587db96d56Sopenharmony_ci#endif 597db96d56Sopenharmony_ci mpd_uint_t *x, w0, w1, wstep; 607db96d56Sopenharmony_ci mpd_size_t i, k; 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci 637db96d56Sopenharmony_ci assert(ispower2(n)); 647db96d56Sopenharmony_ci assert(n >= 16); 657db96d56Sopenharmony_ci assert(n <= MPD_MAXTRANSFORM_2N); 667db96d56Sopenharmony_ci 677db96d56Sopenharmony_ci log2n = mpd_bsr(n); 687db96d56Sopenharmony_ci C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */ 697db96d56Sopenharmony_ci R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */ 707db96d56Sopenharmony_ci 717db96d56Sopenharmony_ci 727db96d56Sopenharmony_ci /* Transpose the matrix. */ 737db96d56Sopenharmony_ci if (!transpose_pow2(a, R, C)) { 747db96d56Sopenharmony_ci return 0; 757db96d56Sopenharmony_ci } 767db96d56Sopenharmony_ci 777db96d56Sopenharmony_ci /* Length R transform on the rows. */ 787db96d56Sopenharmony_ci if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) { 797db96d56Sopenharmony_ci return 0; 807db96d56Sopenharmony_ci } 817db96d56Sopenharmony_ci for (x = a; x < a+n; x += R) { 827db96d56Sopenharmony_ci fnt_dif2(x, R, tparams); 837db96d56Sopenharmony_ci } 847db96d56Sopenharmony_ci 857db96d56Sopenharmony_ci /* Transpose the matrix. */ 867db96d56Sopenharmony_ci if (!transpose_pow2(a, C, R)) { 877db96d56Sopenharmony_ci mpd_free(tparams); 887db96d56Sopenharmony_ci return 0; 897db96d56Sopenharmony_ci } 907db96d56Sopenharmony_ci 917db96d56Sopenharmony_ci /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */ 927db96d56Sopenharmony_ci SETMODULUS(modnum); 937db96d56Sopenharmony_ci kernel = _mpd_getkernel(n, -1, modnum); 947db96d56Sopenharmony_ci for (i = 1; i < R; i++) { 957db96d56Sopenharmony_ci w0 = 1; /* r**(i*0): initial value for k=0 */ 967db96d56Sopenharmony_ci w1 = POWMOD(kernel, i); /* r**(i*1): initial value for k=1 */ 977db96d56Sopenharmony_ci wstep = MULMOD(w1, w1); /* r**(2*i) */ 987db96d56Sopenharmony_ci for (k = 0; k < C; k += 2) { 997db96d56Sopenharmony_ci mpd_uint_t x0 = a[i*C+k]; 1007db96d56Sopenharmony_ci mpd_uint_t x1 = a[i*C+k+1]; 1017db96d56Sopenharmony_ci MULMOD2(&x0, w0, &x1, w1); 1027db96d56Sopenharmony_ci MULMOD2C(&w0, &w1, wstep); /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */ 1037db96d56Sopenharmony_ci a[i*C+k] = x0; 1047db96d56Sopenharmony_ci a[i*C+k+1] = x1; 1057db96d56Sopenharmony_ci } 1067db96d56Sopenharmony_ci } 1077db96d56Sopenharmony_ci 1087db96d56Sopenharmony_ci /* Length C transform on the rows. */ 1097db96d56Sopenharmony_ci if (C != R) { 1107db96d56Sopenharmony_ci mpd_free(tparams); 1117db96d56Sopenharmony_ci if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) { 1127db96d56Sopenharmony_ci return 0; 1137db96d56Sopenharmony_ci } 1147db96d56Sopenharmony_ci } 1157db96d56Sopenharmony_ci for (x = a; x < a+n; x += C) { 1167db96d56Sopenharmony_ci fnt_dif2(x, C, tparams); 1177db96d56Sopenharmony_ci } 1187db96d56Sopenharmony_ci mpd_free(tparams); 1197db96d56Sopenharmony_ci 1207db96d56Sopenharmony_ci#if 0 1217db96d56Sopenharmony_ci /* An unordered transform is sufficient for convolution. */ 1227db96d56Sopenharmony_ci /* Transpose the matrix. */ 1237db96d56Sopenharmony_ci if (!transpose_pow2(a, R, C)) { 1247db96d56Sopenharmony_ci return 0; 1257db96d56Sopenharmony_ci } 1267db96d56Sopenharmony_ci#endif 1277db96d56Sopenharmony_ci 1287db96d56Sopenharmony_ci return 1; 1297db96d56Sopenharmony_ci} 1307db96d56Sopenharmony_ci 1317db96d56Sopenharmony_ci 1327db96d56Sopenharmony_ci/* reverse transform, sign = 1 */ 1337db96d56Sopenharmony_ciint 1347db96d56Sopenharmony_ciinv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum) 1357db96d56Sopenharmony_ci{ 1367db96d56Sopenharmony_ci struct fnt_params *tparams; 1377db96d56Sopenharmony_ci mpd_size_t log2n, C, R; 1387db96d56Sopenharmony_ci mpd_uint_t kernel; 1397db96d56Sopenharmony_ci mpd_uint_t umod; 1407db96d56Sopenharmony_ci#ifdef PPRO 1417db96d56Sopenharmony_ci double dmod; 1427db96d56Sopenharmony_ci uint32_t dinvmod[3]; 1437db96d56Sopenharmony_ci#endif 1447db96d56Sopenharmony_ci mpd_uint_t *x, w0, w1, wstep; 1457db96d56Sopenharmony_ci mpd_size_t i, k; 1467db96d56Sopenharmony_ci 1477db96d56Sopenharmony_ci 1487db96d56Sopenharmony_ci assert(ispower2(n)); 1497db96d56Sopenharmony_ci assert(n >= 16); 1507db96d56Sopenharmony_ci assert(n <= MPD_MAXTRANSFORM_2N); 1517db96d56Sopenharmony_ci 1527db96d56Sopenharmony_ci log2n = mpd_bsr(n); 1537db96d56Sopenharmony_ci C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */ 1547db96d56Sopenharmony_ci R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */ 1557db96d56Sopenharmony_ci 1567db96d56Sopenharmony_ci 1577db96d56Sopenharmony_ci#if 0 1587db96d56Sopenharmony_ci /* An unordered transform is sufficient for convolution. */ 1597db96d56Sopenharmony_ci /* Transpose the matrix, producing an R*C matrix. */ 1607db96d56Sopenharmony_ci if (!transpose_pow2(a, C, R)) { 1617db96d56Sopenharmony_ci return 0; 1627db96d56Sopenharmony_ci } 1637db96d56Sopenharmony_ci#endif 1647db96d56Sopenharmony_ci 1657db96d56Sopenharmony_ci /* Length C transform on the rows. */ 1667db96d56Sopenharmony_ci if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) { 1677db96d56Sopenharmony_ci return 0; 1687db96d56Sopenharmony_ci } 1697db96d56Sopenharmony_ci for (x = a; x < a+n; x += C) { 1707db96d56Sopenharmony_ci fnt_dif2(x, C, tparams); 1717db96d56Sopenharmony_ci } 1727db96d56Sopenharmony_ci 1737db96d56Sopenharmony_ci /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */ 1747db96d56Sopenharmony_ci SETMODULUS(modnum); 1757db96d56Sopenharmony_ci kernel = _mpd_getkernel(n, 1, modnum); 1767db96d56Sopenharmony_ci for (i = 1; i < R; i++) { 1777db96d56Sopenharmony_ci w0 = 1; 1787db96d56Sopenharmony_ci w1 = POWMOD(kernel, i); 1797db96d56Sopenharmony_ci wstep = MULMOD(w1, w1); 1807db96d56Sopenharmony_ci for (k = 0; k < C; k += 2) { 1817db96d56Sopenharmony_ci mpd_uint_t x0 = a[i*C+k]; 1827db96d56Sopenharmony_ci mpd_uint_t x1 = a[i*C+k+1]; 1837db96d56Sopenharmony_ci MULMOD2(&x0, w0, &x1, w1); 1847db96d56Sopenharmony_ci MULMOD2C(&w0, &w1, wstep); 1857db96d56Sopenharmony_ci a[i*C+k] = x0; 1867db96d56Sopenharmony_ci a[i*C+k+1] = x1; 1877db96d56Sopenharmony_ci } 1887db96d56Sopenharmony_ci } 1897db96d56Sopenharmony_ci 1907db96d56Sopenharmony_ci /* Transpose the matrix. */ 1917db96d56Sopenharmony_ci if (!transpose_pow2(a, R, C)) { 1927db96d56Sopenharmony_ci mpd_free(tparams); 1937db96d56Sopenharmony_ci return 0; 1947db96d56Sopenharmony_ci } 1957db96d56Sopenharmony_ci 1967db96d56Sopenharmony_ci /* Length R transform on the rows. */ 1977db96d56Sopenharmony_ci if (R != C) { 1987db96d56Sopenharmony_ci mpd_free(tparams); 1997db96d56Sopenharmony_ci if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) { 2007db96d56Sopenharmony_ci return 0; 2017db96d56Sopenharmony_ci } 2027db96d56Sopenharmony_ci } 2037db96d56Sopenharmony_ci for (x = a; x < a+n; x += R) { 2047db96d56Sopenharmony_ci fnt_dif2(x, R, tparams); 2057db96d56Sopenharmony_ci } 2067db96d56Sopenharmony_ci mpd_free(tparams); 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci /* Transpose the matrix. */ 2097db96d56Sopenharmony_ci if (!transpose_pow2(a, C, R)) { 2107db96d56Sopenharmony_ci return 0; 2117db96d56Sopenharmony_ci } 2127db96d56Sopenharmony_ci 2137db96d56Sopenharmony_ci return 1; 2147db96d56Sopenharmony_ci} 215