162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* mpihelp-mul.c - MPI helper functions 362306a36Sopenharmony_ci * Copyright (C) 1994, 1996, 1998, 1999, 462306a36Sopenharmony_ci * 2000 Free Software Foundation, Inc. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This file is part of GnuPG. 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * Note: This code is heavily based on the GNU MP Library. 962306a36Sopenharmony_ci * Actually it's the same code with only minor changes in the 1062306a36Sopenharmony_ci * way the data is stored; this is to support the abstraction 1162306a36Sopenharmony_ci * of an optional secure memory allocation which may be used 1262306a36Sopenharmony_ci * to avoid revealing of sensitive data due to paging etc. 1362306a36Sopenharmony_ci * The GNU MP Library itself is published under the LGPL; 1462306a36Sopenharmony_ci * however I decided to publish this code under the plain GPL. 1562306a36Sopenharmony_ci */ 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include <linux/string.h> 1862306a36Sopenharmony_ci#include "mpi-internal.h" 1962306a36Sopenharmony_ci#include "longlong.h" 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ 2262306a36Sopenharmony_ci do { \ 2362306a36Sopenharmony_ci if ((size) < KARATSUBA_THRESHOLD) \ 2462306a36Sopenharmony_ci mul_n_basecase(prodp, up, vp, size); \ 2562306a36Sopenharmony_ci else \ 2662306a36Sopenharmony_ci mul_n(prodp, up, vp, size, tspace); \ 2762306a36Sopenharmony_ci } while (0); 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ 3062306a36Sopenharmony_ci do { \ 3162306a36Sopenharmony_ci if ((size) < KARATSUBA_THRESHOLD) \ 3262306a36Sopenharmony_ci mpih_sqr_n_basecase(prodp, up, size); \ 3362306a36Sopenharmony_ci else \ 3462306a36Sopenharmony_ci mpih_sqr_n(prodp, up, size, tspace); \ 3562306a36Sopenharmony_ci } while (0); 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), 3862306a36Sopenharmony_ci * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are 3962306a36Sopenharmony_ci * always stored. Return the most significant limb. 4062306a36Sopenharmony_ci * 4162306a36Sopenharmony_ci * Argument constraints: 4262306a36Sopenharmony_ci * 1. PRODP != UP and PRODP != VP, i.e. the destination 4362306a36Sopenharmony_ci * must be distinct from the multiplier and the multiplicand. 4462306a36Sopenharmony_ci * 4562306a36Sopenharmony_ci * 4662306a36Sopenharmony_ci * Handle simple cases with traditional multiplication. 4762306a36Sopenharmony_ci * 4862306a36Sopenharmony_ci * This is the most critical code of multiplication. All multiplies rely 4962306a36Sopenharmony_ci * on this, both small and huge. Small ones arrive here immediately. Huge 5062306a36Sopenharmony_ci * ones arrive here as this is the base case for Karatsuba's recursive 5162306a36Sopenharmony_ci * algorithm below. 5262306a36Sopenharmony_ci */ 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistatic mpi_limb_t 5562306a36Sopenharmony_cimul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) 5662306a36Sopenharmony_ci{ 5762306a36Sopenharmony_ci mpi_size_t i; 5862306a36Sopenharmony_ci mpi_limb_t cy; 5962306a36Sopenharmony_ci mpi_limb_t v_limb; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci /* Multiply by the first limb in V separately, as the result can be 6262306a36Sopenharmony_ci * stored (not added) to PROD. We also avoid a loop for zeroing. */ 6362306a36Sopenharmony_ci v_limb = vp[0]; 6462306a36Sopenharmony_ci if (v_limb <= 1) { 6562306a36Sopenharmony_ci if (v_limb == 1) 6662306a36Sopenharmony_ci MPN_COPY(prodp, up, size); 6762306a36Sopenharmony_ci else 6862306a36Sopenharmony_ci MPN_ZERO(prodp, size); 6962306a36Sopenharmony_ci cy = 0; 7062306a36Sopenharmony_ci } else 7162306a36Sopenharmony_ci cy = mpihelp_mul_1(prodp, up, size, v_limb); 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_ci prodp[size] = cy; 7462306a36Sopenharmony_ci prodp++; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci /* For each iteration in the outer loop, multiply one limb from 7762306a36Sopenharmony_ci * U with one limb from V, and add it to PROD. */ 7862306a36Sopenharmony_ci for (i = 1; i < size; i++) { 7962306a36Sopenharmony_ci v_limb = vp[i]; 8062306a36Sopenharmony_ci if (v_limb <= 1) { 8162306a36Sopenharmony_ci cy = 0; 8262306a36Sopenharmony_ci if (v_limb == 1) 8362306a36Sopenharmony_ci cy = mpihelp_add_n(prodp, prodp, up, size); 8462306a36Sopenharmony_ci } else 8562306a36Sopenharmony_ci cy = mpihelp_addmul_1(prodp, up, size, v_limb); 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci prodp[size] = cy; 8862306a36Sopenharmony_ci prodp++; 8962306a36Sopenharmony_ci } 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci return cy; 9262306a36Sopenharmony_ci} 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_cistatic void 9562306a36Sopenharmony_cimul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, 9662306a36Sopenharmony_ci mpi_size_t size, mpi_ptr_t tspace) 9762306a36Sopenharmony_ci{ 9862306a36Sopenharmony_ci if (size & 1) { 9962306a36Sopenharmony_ci /* The size is odd, and the code below doesn't handle that. 10062306a36Sopenharmony_ci * Multiply the least significant (size - 1) limbs with a recursive 10162306a36Sopenharmony_ci * call, and handle the most significant limb of S1 and S2 10262306a36Sopenharmony_ci * separately. 10362306a36Sopenharmony_ci * A slightly faster way to do this would be to make the Karatsuba 10462306a36Sopenharmony_ci * code below behave as if the size were even, and let it check for 10562306a36Sopenharmony_ci * odd size in the end. I.e., in essence move this code to the end. 10662306a36Sopenharmony_ci * Doing so would save us a recursive call, and potentially make the 10762306a36Sopenharmony_ci * stack grow a lot less. 10862306a36Sopenharmony_ci */ 10962306a36Sopenharmony_ci mpi_size_t esize = size - 1; /* even size */ 11062306a36Sopenharmony_ci mpi_limb_t cy_limb; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); 11362306a36Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); 11462306a36Sopenharmony_ci prodp[esize + esize] = cy_limb; 11562306a36Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); 11662306a36Sopenharmony_ci prodp[esize + size] = cy_limb; 11762306a36Sopenharmony_ci } else { 11862306a36Sopenharmony_ci /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. 11962306a36Sopenharmony_ci * 12062306a36Sopenharmony_ci * Split U in two pieces, U1 and U0, such that 12162306a36Sopenharmony_ci * U = U0 + U1*(B**n), 12262306a36Sopenharmony_ci * and V in V1 and V0, such that 12362306a36Sopenharmony_ci * V = V0 + V1*(B**n). 12462306a36Sopenharmony_ci * 12562306a36Sopenharmony_ci * UV is then computed recursively using the identity 12662306a36Sopenharmony_ci * 12762306a36Sopenharmony_ci * 2n n n n 12862306a36Sopenharmony_ci * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V 12962306a36Sopenharmony_ci * 1 1 1 0 0 1 0 0 13062306a36Sopenharmony_ci * 13162306a36Sopenharmony_ci * Where B = 2**BITS_PER_MP_LIMB. 13262306a36Sopenharmony_ci */ 13362306a36Sopenharmony_ci mpi_size_t hsize = size >> 1; 13462306a36Sopenharmony_ci mpi_limb_t cy; 13562306a36Sopenharmony_ci int negflg; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci /* Product H. ________________ ________________ 13862306a36Sopenharmony_ci * |_____U1 x V1____||____U0 x V0_____| 13962306a36Sopenharmony_ci * Put result in upper part of PROD and pass low part of TSPACE 14062306a36Sopenharmony_ci * as new TSPACE. 14162306a36Sopenharmony_ci */ 14262306a36Sopenharmony_ci MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, 14362306a36Sopenharmony_ci tspace); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci /* Product M. ________________ 14662306a36Sopenharmony_ci * |_(U1-U0)(V0-V1)_| 14762306a36Sopenharmony_ci */ 14862306a36Sopenharmony_ci if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { 14962306a36Sopenharmony_ci mpihelp_sub_n(prodp, up + hsize, up, hsize); 15062306a36Sopenharmony_ci negflg = 0; 15162306a36Sopenharmony_ci } else { 15262306a36Sopenharmony_ci mpihelp_sub_n(prodp, up, up + hsize, hsize); 15362306a36Sopenharmony_ci negflg = 1; 15462306a36Sopenharmony_ci } 15562306a36Sopenharmony_ci if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { 15662306a36Sopenharmony_ci mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); 15762306a36Sopenharmony_ci negflg ^= 1; 15862306a36Sopenharmony_ci } else { 15962306a36Sopenharmony_ci mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); 16062306a36Sopenharmony_ci /* No change of NEGFLG. */ 16162306a36Sopenharmony_ci } 16262306a36Sopenharmony_ci /* Read temporary operands from low part of PROD. 16362306a36Sopenharmony_ci * Put result in low part of TSPACE using upper part of TSPACE 16462306a36Sopenharmony_ci * as new TSPACE. 16562306a36Sopenharmony_ci */ 16662306a36Sopenharmony_ci MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, 16762306a36Sopenharmony_ci tspace + size); 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci /* Add/copy product H. */ 17062306a36Sopenharmony_ci MPN_COPY(prodp + hsize, prodp + size, hsize); 17162306a36Sopenharmony_ci cy = mpihelp_add_n(prodp + size, prodp + size, 17262306a36Sopenharmony_ci prodp + size + hsize, hsize); 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci /* Add product M (if NEGFLG M is a negative number) */ 17562306a36Sopenharmony_ci if (negflg) 17662306a36Sopenharmony_ci cy -= 17762306a36Sopenharmony_ci mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, 17862306a36Sopenharmony_ci size); 17962306a36Sopenharmony_ci else 18062306a36Sopenharmony_ci cy += 18162306a36Sopenharmony_ci mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, 18262306a36Sopenharmony_ci size); 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci /* Product L. ________________ ________________ 18562306a36Sopenharmony_ci * |________________||____U0 x V0_____| 18662306a36Sopenharmony_ci * Read temporary operands from low part of PROD. 18762306a36Sopenharmony_ci * Put result in low part of TSPACE using upper part of TSPACE 18862306a36Sopenharmony_ci * as new TSPACE. 18962306a36Sopenharmony_ci */ 19062306a36Sopenharmony_ci MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci /* Add/copy Product L (twice) */ 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); 19562306a36Sopenharmony_ci if (cy) 19662306a36Sopenharmony_ci mpihelp_add_1(prodp + hsize + size, 19762306a36Sopenharmony_ci prodp + hsize + size, hsize, cy); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci MPN_COPY(prodp, tspace, hsize); 20062306a36Sopenharmony_ci cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, 20162306a36Sopenharmony_ci hsize); 20262306a36Sopenharmony_ci if (cy) 20362306a36Sopenharmony_ci mpihelp_add_1(prodp + size, prodp + size, size, 1); 20462306a36Sopenharmony_ci } 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_civoid mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) 20862306a36Sopenharmony_ci{ 20962306a36Sopenharmony_ci mpi_size_t i; 21062306a36Sopenharmony_ci mpi_limb_t cy_limb; 21162306a36Sopenharmony_ci mpi_limb_t v_limb; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci /* Multiply by the first limb in V separately, as the result can be 21462306a36Sopenharmony_ci * stored (not added) to PROD. We also avoid a loop for zeroing. */ 21562306a36Sopenharmony_ci v_limb = up[0]; 21662306a36Sopenharmony_ci if (v_limb <= 1) { 21762306a36Sopenharmony_ci if (v_limb == 1) 21862306a36Sopenharmony_ci MPN_COPY(prodp, up, size); 21962306a36Sopenharmony_ci else 22062306a36Sopenharmony_ci MPN_ZERO(prodp, size); 22162306a36Sopenharmony_ci cy_limb = 0; 22262306a36Sopenharmony_ci } else 22362306a36Sopenharmony_ci cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci prodp[size] = cy_limb; 22662306a36Sopenharmony_ci prodp++; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci /* For each iteration in the outer loop, multiply one limb from 22962306a36Sopenharmony_ci * U with one limb from V, and add it to PROD. */ 23062306a36Sopenharmony_ci for (i = 1; i < size; i++) { 23162306a36Sopenharmony_ci v_limb = up[i]; 23262306a36Sopenharmony_ci if (v_limb <= 1) { 23362306a36Sopenharmony_ci cy_limb = 0; 23462306a36Sopenharmony_ci if (v_limb == 1) 23562306a36Sopenharmony_ci cy_limb = mpihelp_add_n(prodp, prodp, up, size); 23662306a36Sopenharmony_ci } else 23762306a36Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci prodp[size] = cy_limb; 24062306a36Sopenharmony_ci prodp++; 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci} 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_civoid 24562306a36Sopenharmony_cimpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) 24662306a36Sopenharmony_ci{ 24762306a36Sopenharmony_ci if (size & 1) { 24862306a36Sopenharmony_ci /* The size is odd, and the code below doesn't handle that. 24962306a36Sopenharmony_ci * Multiply the least significant (size - 1) limbs with a recursive 25062306a36Sopenharmony_ci * call, and handle the most significant limb of S1 and S2 25162306a36Sopenharmony_ci * separately. 25262306a36Sopenharmony_ci * A slightly faster way to do this would be to make the Karatsuba 25362306a36Sopenharmony_ci * code below behave as if the size were even, and let it check for 25462306a36Sopenharmony_ci * odd size in the end. I.e., in essence move this code to the end. 25562306a36Sopenharmony_ci * Doing so would save us a recursive call, and potentially make the 25662306a36Sopenharmony_ci * stack grow a lot less. 25762306a36Sopenharmony_ci */ 25862306a36Sopenharmony_ci mpi_size_t esize = size - 1; /* even size */ 25962306a36Sopenharmony_ci mpi_limb_t cy_limb; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci MPN_SQR_N_RECURSE(prodp, up, esize, tspace); 26262306a36Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); 26362306a36Sopenharmony_ci prodp[esize + esize] = cy_limb; 26462306a36Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_ci prodp[esize + size] = cy_limb; 26762306a36Sopenharmony_ci } else { 26862306a36Sopenharmony_ci mpi_size_t hsize = size >> 1; 26962306a36Sopenharmony_ci mpi_limb_t cy; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci /* Product H. ________________ ________________ 27262306a36Sopenharmony_ci * |_____U1 x U1____||____U0 x U0_____| 27362306a36Sopenharmony_ci * Put result in upper part of PROD and pass low part of TSPACE 27462306a36Sopenharmony_ci * as new TSPACE. 27562306a36Sopenharmony_ci */ 27662306a36Sopenharmony_ci MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci /* Product M. ________________ 27962306a36Sopenharmony_ci * |_(U1-U0)(U0-U1)_| 28062306a36Sopenharmony_ci */ 28162306a36Sopenharmony_ci if (mpihelp_cmp(up + hsize, up, hsize) >= 0) 28262306a36Sopenharmony_ci mpihelp_sub_n(prodp, up + hsize, up, hsize); 28362306a36Sopenharmony_ci else 28462306a36Sopenharmony_ci mpihelp_sub_n(prodp, up, up + hsize, hsize); 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci /* Read temporary operands from low part of PROD. 28762306a36Sopenharmony_ci * Put result in low part of TSPACE using upper part of TSPACE 28862306a36Sopenharmony_ci * as new TSPACE. */ 28962306a36Sopenharmony_ci MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci /* Add/copy product H */ 29262306a36Sopenharmony_ci MPN_COPY(prodp + hsize, prodp + size, hsize); 29362306a36Sopenharmony_ci cy = mpihelp_add_n(prodp + size, prodp + size, 29462306a36Sopenharmony_ci prodp + size + hsize, hsize); 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci /* Add product M (if NEGFLG M is a negative number). */ 29762306a36Sopenharmony_ci cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci /* Product L. ________________ ________________ 30062306a36Sopenharmony_ci * |________________||____U0 x U0_____| 30162306a36Sopenharmony_ci * Read temporary operands from low part of PROD. 30262306a36Sopenharmony_ci * Put result in low part of TSPACE using upper part of TSPACE 30362306a36Sopenharmony_ci * as new TSPACE. */ 30462306a36Sopenharmony_ci MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci /* Add/copy Product L (twice). */ 30762306a36Sopenharmony_ci cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); 30862306a36Sopenharmony_ci if (cy) 30962306a36Sopenharmony_ci mpihelp_add_1(prodp + hsize + size, 31062306a36Sopenharmony_ci prodp + hsize + size, hsize, cy); 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci MPN_COPY(prodp, tspace, hsize); 31362306a36Sopenharmony_ci cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, 31462306a36Sopenharmony_ci hsize); 31562306a36Sopenharmony_ci if (cy) 31662306a36Sopenharmony_ci mpihelp_add_1(prodp + size, prodp + size, size, 1); 31762306a36Sopenharmony_ci } 31862306a36Sopenharmony_ci} 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_civoid mpihelp_mul_n(mpi_ptr_t prodp, 32262306a36Sopenharmony_ci mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) 32362306a36Sopenharmony_ci{ 32462306a36Sopenharmony_ci if (up == vp) { 32562306a36Sopenharmony_ci if (size < KARATSUBA_THRESHOLD) 32662306a36Sopenharmony_ci mpih_sqr_n_basecase(prodp, up, size); 32762306a36Sopenharmony_ci else { 32862306a36Sopenharmony_ci mpi_ptr_t tspace; 32962306a36Sopenharmony_ci tspace = mpi_alloc_limb_space(2 * size); 33062306a36Sopenharmony_ci mpih_sqr_n(prodp, up, size, tspace); 33162306a36Sopenharmony_ci mpi_free_limb_space(tspace); 33262306a36Sopenharmony_ci } 33362306a36Sopenharmony_ci } else { 33462306a36Sopenharmony_ci if (size < KARATSUBA_THRESHOLD) 33562306a36Sopenharmony_ci mul_n_basecase(prodp, up, vp, size); 33662306a36Sopenharmony_ci else { 33762306a36Sopenharmony_ci mpi_ptr_t tspace; 33862306a36Sopenharmony_ci tspace = mpi_alloc_limb_space(2 * size); 33962306a36Sopenharmony_ci mul_n(prodp, up, vp, size, tspace); 34062306a36Sopenharmony_ci mpi_free_limb_space(tspace); 34162306a36Sopenharmony_ci } 34262306a36Sopenharmony_ci } 34362306a36Sopenharmony_ci} 34462306a36Sopenharmony_ci 34562306a36Sopenharmony_ciint 34662306a36Sopenharmony_cimpihelp_mul_karatsuba_case(mpi_ptr_t prodp, 34762306a36Sopenharmony_ci mpi_ptr_t up, mpi_size_t usize, 34862306a36Sopenharmony_ci mpi_ptr_t vp, mpi_size_t vsize, 34962306a36Sopenharmony_ci struct karatsuba_ctx *ctx) 35062306a36Sopenharmony_ci{ 35162306a36Sopenharmony_ci mpi_limb_t cy; 35262306a36Sopenharmony_ci 35362306a36Sopenharmony_ci if (!ctx->tspace || ctx->tspace_size < vsize) { 35462306a36Sopenharmony_ci if (ctx->tspace) 35562306a36Sopenharmony_ci mpi_free_limb_space(ctx->tspace); 35662306a36Sopenharmony_ci ctx->tspace = mpi_alloc_limb_space(2 * vsize); 35762306a36Sopenharmony_ci if (!ctx->tspace) 35862306a36Sopenharmony_ci return -ENOMEM; 35962306a36Sopenharmony_ci ctx->tspace_size = vsize; 36062306a36Sopenharmony_ci } 36162306a36Sopenharmony_ci 36262306a36Sopenharmony_ci MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci prodp += vsize; 36562306a36Sopenharmony_ci up += vsize; 36662306a36Sopenharmony_ci usize -= vsize; 36762306a36Sopenharmony_ci if (usize >= vsize) { 36862306a36Sopenharmony_ci if (!ctx->tp || ctx->tp_size < vsize) { 36962306a36Sopenharmony_ci if (ctx->tp) 37062306a36Sopenharmony_ci mpi_free_limb_space(ctx->tp); 37162306a36Sopenharmony_ci ctx->tp = mpi_alloc_limb_space(2 * vsize); 37262306a36Sopenharmony_ci if (!ctx->tp) { 37362306a36Sopenharmony_ci if (ctx->tspace) 37462306a36Sopenharmony_ci mpi_free_limb_space(ctx->tspace); 37562306a36Sopenharmony_ci ctx->tspace = NULL; 37662306a36Sopenharmony_ci return -ENOMEM; 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci ctx->tp_size = vsize; 37962306a36Sopenharmony_ci } 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci do { 38262306a36Sopenharmony_ci MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); 38362306a36Sopenharmony_ci cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); 38462306a36Sopenharmony_ci mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, 38562306a36Sopenharmony_ci cy); 38662306a36Sopenharmony_ci prodp += vsize; 38762306a36Sopenharmony_ci up += vsize; 38862306a36Sopenharmony_ci usize -= vsize; 38962306a36Sopenharmony_ci } while (usize >= vsize); 39062306a36Sopenharmony_ci } 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci if (usize) { 39362306a36Sopenharmony_ci if (usize < KARATSUBA_THRESHOLD) { 39462306a36Sopenharmony_ci mpi_limb_t tmp; 39562306a36Sopenharmony_ci if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) 39662306a36Sopenharmony_ci < 0) 39762306a36Sopenharmony_ci return -ENOMEM; 39862306a36Sopenharmony_ci } else { 39962306a36Sopenharmony_ci if (!ctx->next) { 40062306a36Sopenharmony_ci ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); 40162306a36Sopenharmony_ci if (!ctx->next) 40262306a36Sopenharmony_ci return -ENOMEM; 40362306a36Sopenharmony_ci } 40462306a36Sopenharmony_ci if (mpihelp_mul_karatsuba_case(ctx->tspace, 40562306a36Sopenharmony_ci vp, vsize, 40662306a36Sopenharmony_ci up, usize, 40762306a36Sopenharmony_ci ctx->next) < 0) 40862306a36Sopenharmony_ci return -ENOMEM; 40962306a36Sopenharmony_ci } 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); 41262306a36Sopenharmony_ci mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); 41362306a36Sopenharmony_ci } 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci return 0; 41662306a36Sopenharmony_ci} 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_civoid mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) 41962306a36Sopenharmony_ci{ 42062306a36Sopenharmony_ci struct karatsuba_ctx *ctx2; 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci if (ctx->tp) 42362306a36Sopenharmony_ci mpi_free_limb_space(ctx->tp); 42462306a36Sopenharmony_ci if (ctx->tspace) 42562306a36Sopenharmony_ci mpi_free_limb_space(ctx->tspace); 42662306a36Sopenharmony_ci for (ctx = ctx->next; ctx; ctx = ctx2) { 42762306a36Sopenharmony_ci ctx2 = ctx->next; 42862306a36Sopenharmony_ci if (ctx->tp) 42962306a36Sopenharmony_ci mpi_free_limb_space(ctx->tp); 43062306a36Sopenharmony_ci if (ctx->tspace) 43162306a36Sopenharmony_ci mpi_free_limb_space(ctx->tspace); 43262306a36Sopenharmony_ci kfree(ctx); 43362306a36Sopenharmony_ci } 43462306a36Sopenharmony_ci} 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) 43762306a36Sopenharmony_ci * and v (pointed to by VP, with VSIZE limbs), and store the result at 43862306a36Sopenharmony_ci * PRODP. USIZE + VSIZE limbs are always stored, but if the input 43962306a36Sopenharmony_ci * operands are normalized. Return the most significant limb of the 44062306a36Sopenharmony_ci * result. 44162306a36Sopenharmony_ci * 44262306a36Sopenharmony_ci * NOTE: The space pointed to by PRODP is overwritten before finished 44362306a36Sopenharmony_ci * with U and V, so overlap is an error. 44462306a36Sopenharmony_ci * 44562306a36Sopenharmony_ci * Argument constraints: 44662306a36Sopenharmony_ci * 1. USIZE >= VSIZE. 44762306a36Sopenharmony_ci * 2. PRODP != UP and PRODP != VP, i.e. the destination 44862306a36Sopenharmony_ci * must be distinct from the multiplier and the multiplicand. 44962306a36Sopenharmony_ci */ 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_ciint 45262306a36Sopenharmony_cimpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, 45362306a36Sopenharmony_ci mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) 45462306a36Sopenharmony_ci{ 45562306a36Sopenharmony_ci mpi_ptr_t prod_endp = prodp + usize + vsize - 1; 45662306a36Sopenharmony_ci mpi_limb_t cy; 45762306a36Sopenharmony_ci struct karatsuba_ctx ctx; 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci if (vsize < KARATSUBA_THRESHOLD) { 46062306a36Sopenharmony_ci mpi_size_t i; 46162306a36Sopenharmony_ci mpi_limb_t v_limb; 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci if (!vsize) { 46462306a36Sopenharmony_ci *_result = 0; 46562306a36Sopenharmony_ci return 0; 46662306a36Sopenharmony_ci } 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci /* Multiply by the first limb in V separately, as the result can be 46962306a36Sopenharmony_ci * stored (not added) to PROD. We also avoid a loop for zeroing. */ 47062306a36Sopenharmony_ci v_limb = vp[0]; 47162306a36Sopenharmony_ci if (v_limb <= 1) { 47262306a36Sopenharmony_ci if (v_limb == 1) 47362306a36Sopenharmony_ci MPN_COPY(prodp, up, usize); 47462306a36Sopenharmony_ci else 47562306a36Sopenharmony_ci MPN_ZERO(prodp, usize); 47662306a36Sopenharmony_ci cy = 0; 47762306a36Sopenharmony_ci } else 47862306a36Sopenharmony_ci cy = mpihelp_mul_1(prodp, up, usize, v_limb); 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci prodp[usize] = cy; 48162306a36Sopenharmony_ci prodp++; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci /* For each iteration in the outer loop, multiply one limb from 48462306a36Sopenharmony_ci * U with one limb from V, and add it to PROD. */ 48562306a36Sopenharmony_ci for (i = 1; i < vsize; i++) { 48662306a36Sopenharmony_ci v_limb = vp[i]; 48762306a36Sopenharmony_ci if (v_limb <= 1) { 48862306a36Sopenharmony_ci cy = 0; 48962306a36Sopenharmony_ci if (v_limb == 1) 49062306a36Sopenharmony_ci cy = mpihelp_add_n(prodp, prodp, up, 49162306a36Sopenharmony_ci usize); 49262306a36Sopenharmony_ci } else 49362306a36Sopenharmony_ci cy = mpihelp_addmul_1(prodp, up, usize, v_limb); 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci prodp[usize] = cy; 49662306a36Sopenharmony_ci prodp++; 49762306a36Sopenharmony_ci } 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci *_result = cy; 50062306a36Sopenharmony_ci return 0; 50162306a36Sopenharmony_ci } 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci memset(&ctx, 0, sizeof ctx); 50462306a36Sopenharmony_ci if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) 50562306a36Sopenharmony_ci return -ENOMEM; 50662306a36Sopenharmony_ci mpihelp_release_karatsuba_ctx(&ctx); 50762306a36Sopenharmony_ci *_result = *prod_endp; 50862306a36Sopenharmony_ci return 0; 50962306a36Sopenharmony_ci} 510