1/* 2 * Copyright (c) 2008-2020 Stefan Krah. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29#include "mpdecimal.h" 30#include "bits.h" 31#include "constants.h" 32#include "convolute.h" 33#include "fnt.h" 34#include "fourstep.h" 35#include "numbertheory.h" 36#include "sixstep.h" 37#include "umodarith.h" 38 39 40/* Bignum: Fast convolution using the Number Theoretic Transform. Used for 41 the multiplication of very large coefficients. */ 42 43 44/* Convolute the data in c1 and c2. Result is in c1. */ 45int 46fnt_convolute(mpd_uint_t *c1, mpd_uint_t *c2, mpd_size_t n, int modnum) 47{ 48 int (*fnt)(mpd_uint_t *, mpd_size_t, int); 49 int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); 50#ifdef PPRO 51 double dmod; 52 uint32_t dinvmod[3]; 53#endif 54 mpd_uint_t n_inv, umod; 55 mpd_size_t i; 56 57 58 SETMODULUS(modnum); 59 n_inv = POWMOD(n, (umod-2)); 60 61 if (ispower2(n)) { 62 if (n > SIX_STEP_THRESHOLD) { 63 fnt = six_step_fnt; 64 inv_fnt = inv_six_step_fnt; 65 } 66 else { 67 fnt = std_fnt; 68 inv_fnt = std_inv_fnt; 69 } 70 } 71 else { 72 fnt = four_step_fnt; 73 inv_fnt = inv_four_step_fnt; 74 } 75 76 if (!fnt(c1, n, modnum)) { 77 return 0; 78 } 79 if (!fnt(c2, n, modnum)) { 80 return 0; 81 } 82 for (i = 0; i < n-1; i += 2) { 83 mpd_uint_t x0 = c1[i]; 84 mpd_uint_t y0 = c2[i]; 85 mpd_uint_t x1 = c1[i+1]; 86 mpd_uint_t y1 = c2[i+1]; 87 MULMOD2(&x0, y0, &x1, y1); 88 c1[i] = x0; 89 c1[i+1] = x1; 90 } 91 92 if (!inv_fnt(c1, n, modnum)) { 93 return 0; 94 } 95 for (i = 0; i < n-3; i += 4) { 96 mpd_uint_t x0 = c1[i]; 97 mpd_uint_t x1 = c1[i+1]; 98 mpd_uint_t x2 = c1[i+2]; 99 mpd_uint_t x3 = c1[i+3]; 100 MULMOD2C(&x0, &x1, n_inv); 101 MULMOD2C(&x2, &x3, n_inv); 102 c1[i] = x0; 103 c1[i+1] = x1; 104 c1[i+2] = x2; 105 c1[i+3] = x3; 106 } 107 108 return 1; 109} 110 111/* Autoconvolute the data in c1. Result is in c1. */ 112int 113fnt_autoconvolute(mpd_uint_t *c1, mpd_size_t n, int modnum) 114{ 115 int (*fnt)(mpd_uint_t *, mpd_size_t, int); 116 int (*inv_fnt)(mpd_uint_t *, mpd_size_t, int); 117#ifdef PPRO 118 double dmod; 119 uint32_t dinvmod[3]; 120#endif 121 mpd_uint_t n_inv, umod; 122 mpd_size_t i; 123 124 125 SETMODULUS(modnum); 126 n_inv = POWMOD(n, (umod-2)); 127 128 if (ispower2(n)) { 129 if (n > SIX_STEP_THRESHOLD) { 130 fnt = six_step_fnt; 131 inv_fnt = inv_six_step_fnt; 132 } 133 else { 134 fnt = std_fnt; 135 inv_fnt = std_inv_fnt; 136 } 137 } 138 else { 139 fnt = four_step_fnt; 140 inv_fnt = inv_four_step_fnt; 141 } 142 143 if (!fnt(c1, n, modnum)) { 144 return 0; 145 } 146 for (i = 0; i < n-1; i += 2) { 147 mpd_uint_t x0 = c1[i]; 148 mpd_uint_t x1 = c1[i+1]; 149 MULMOD2(&x0, x0, &x1, x1); 150 c1[i] = x0; 151 c1[i+1] = x1; 152 } 153 154 if (!inv_fnt(c1, n, modnum)) { 155 return 0; 156 } 157 for (i = 0; i < n-3; i += 4) { 158 mpd_uint_t x0 = c1[i]; 159 mpd_uint_t x1 = c1[i+1]; 160 mpd_uint_t x2 = c1[i+2]; 161 mpd_uint_t x3 = c1[i+3]; 162 MULMOD2C(&x0, &x1, n_inv); 163 MULMOD2C(&x2, &x3, n_inv); 164 c1[i] = x0; 165 c1[i+1] = x1; 166 c1[i+2] = x2; 167 c1[i+3] = x3; 168 } 169 170 return 1; 171} 172