1/* 2 * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29#include "mpdecimal.h" 30 31#include <assert.h> 32#include <stdio.h> 33 34#include "bits.h" 35#include "constants.h" 36#include "difradix2.h" 37#include "numbertheory.h" 38#include "sixstep.h" 39#include "transpose.h" 40#include "umodarith.h" 41 42 43/* Bignum: Cache efficient Matrix Fourier Transform for arrays of the 44 form 2**n (See literature/six-step.txt). */ 45 46 47/* forward transform with sign = -1 */ 48int 49six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum) 50{ 51 struct fnt_params *tparams; 52 mpd_size_t log2n, C, R; 53 mpd_uint_t kernel; 54 mpd_uint_t umod; 55#ifdef PPRO 56 double dmod; 57 uint32_t dinvmod[3]; 58#endif 59 mpd_uint_t *x, w0, w1, wstep; 60 mpd_size_t i, k; 61 62 63 assert(ispower2(n)); 64 assert(n >= 16); 65 assert(n <= MPD_MAXTRANSFORM_2N); 66 67 log2n = mpd_bsr(n); 68 C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */ 69 R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */ 70 71 72 /* Transpose the matrix. */ 73 if (!transpose_pow2(a, R, C)) { 74 return 0; 75 } 76 77 /* Length R transform on the rows. */ 78 if ((tparams = _mpd_init_fnt_params(R, -1, modnum)) == NULL) { 79 return 0; 80 } 81 for (x = a; x < a+n; x += R) { 82 fnt_dif2(x, R, tparams); 83 } 84 85 /* Transpose the matrix. */ 86 if (!transpose_pow2(a, C, R)) { 87 mpd_free(tparams); 88 return 0; 89 } 90 91 /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */ 92 SETMODULUS(modnum); 93 kernel = _mpd_getkernel(n, -1, modnum); 94 for (i = 1; i < R; i++) { 95 w0 = 1; /* r**(i*0): initial value for k=0 */ 96 w1 = POWMOD(kernel, i); /* r**(i*1): initial value for k=1 */ 97 wstep = MULMOD(w1, w1); /* r**(2*i) */ 98 for (k = 0; k < C; k += 2) { 99 mpd_uint_t x0 = a[i*C+k]; 100 mpd_uint_t x1 = a[i*C+k+1]; 101 MULMOD2(&x0, w0, &x1, w1); 102 MULMOD2C(&w0, &w1, wstep); /* r**(i*(k+2)) = r**(i*k) * r**(2*i) */ 103 a[i*C+k] = x0; 104 a[i*C+k+1] = x1; 105 } 106 } 107 108 /* Length C transform on the rows. */ 109 if (C != R) { 110 mpd_free(tparams); 111 if ((tparams = _mpd_init_fnt_params(C, -1, modnum)) == NULL) { 112 return 0; 113 } 114 } 115 for (x = a; x < a+n; x += C) { 116 fnt_dif2(x, C, tparams); 117 } 118 mpd_free(tparams); 119 120#if 0 121 /* An unordered transform is sufficient for convolution. */ 122 /* Transpose the matrix. */ 123 if (!transpose_pow2(a, R, C)) { 124 return 0; 125 } 126#endif 127 128 return 1; 129} 130 131 132/* reverse transform, sign = 1 */ 133int 134inv_six_step_fnt(mpd_uint_t *a, mpd_size_t n, int modnum) 135{ 136 struct fnt_params *tparams; 137 mpd_size_t log2n, C, R; 138 mpd_uint_t kernel; 139 mpd_uint_t umod; 140#ifdef PPRO 141 double dmod; 142 uint32_t dinvmod[3]; 143#endif 144 mpd_uint_t *x, w0, w1, wstep; 145 mpd_size_t i, k; 146 147 148 assert(ispower2(n)); 149 assert(n >= 16); 150 assert(n <= MPD_MAXTRANSFORM_2N); 151 152 log2n = mpd_bsr(n); 153 C = ((mpd_size_t)1) << (log2n / 2); /* number of columns */ 154 R = ((mpd_size_t)1) << (log2n - (log2n / 2)); /* number of rows */ 155 156 157#if 0 158 /* An unordered transform is sufficient for convolution. */ 159 /* Transpose the matrix, producing an R*C matrix. */ 160 if (!transpose_pow2(a, C, R)) { 161 return 0; 162 } 163#endif 164 165 /* Length C transform on the rows. */ 166 if ((tparams = _mpd_init_fnt_params(C, 1, modnum)) == NULL) { 167 return 0; 168 } 169 for (x = a; x < a+n; x += C) { 170 fnt_dif2(x, C, tparams); 171 } 172 173 /* Multiply each matrix element (addressed by i*C+k) by r**(i*k). */ 174 SETMODULUS(modnum); 175 kernel = _mpd_getkernel(n, 1, modnum); 176 for (i = 1; i < R; i++) { 177 w0 = 1; 178 w1 = POWMOD(kernel, i); 179 wstep = MULMOD(w1, w1); 180 for (k = 0; k < C; k += 2) { 181 mpd_uint_t x0 = a[i*C+k]; 182 mpd_uint_t x1 = a[i*C+k+1]; 183 MULMOD2(&x0, w0, &x1, w1); 184 MULMOD2C(&w0, &w1, wstep); 185 a[i*C+k] = x0; 186 a[i*C+k+1] = x1; 187 } 188 } 189 190 /* Transpose the matrix. */ 191 if (!transpose_pow2(a, R, C)) { 192 mpd_free(tparams); 193 return 0; 194 } 195 196 /* Length R transform on the rows. */ 197 if (R != C) { 198 mpd_free(tparams); 199 if ((tparams = _mpd_init_fnt_params(R, 1, modnum)) == NULL) { 200 return 0; 201 } 202 } 203 for (x = a; x < a+n; x += R) { 204 fnt_dif2(x, R, tparams); 205 } 206 mpd_free(tparams); 207 208 /* Transpose the matrix. */ 209 if (!transpose_pow2(a, C, R)) { 210 return 0; 211 } 212 213 return 1; 214} 215