18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* mpihelp-mul.c - MPI helper functions 38c2ecf20Sopenharmony_ci * Copyright (C) 1994, 1996, 1998, 1999, 48c2ecf20Sopenharmony_ci * 2000 Free Software Foundation, Inc. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * This file is part of GnuPG. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * Note: This code is heavily based on the GNU MP Library. 98c2ecf20Sopenharmony_ci * Actually it's the same code with only minor changes in the 108c2ecf20Sopenharmony_ci * way the data is stored; this is to support the abstraction 118c2ecf20Sopenharmony_ci * of an optional secure memory allocation which may be used 128c2ecf20Sopenharmony_ci * to avoid revealing of sensitive data due to paging etc. 138c2ecf20Sopenharmony_ci * The GNU MP Library itself is published under the LGPL; 148c2ecf20Sopenharmony_ci * however I decided to publish this code under the plain GPL. 158c2ecf20Sopenharmony_ci */ 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci#include <linux/string.h> 188c2ecf20Sopenharmony_ci#include "mpi-internal.h" 198c2ecf20Sopenharmony_ci#include "longlong.h" 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ 228c2ecf20Sopenharmony_ci do { \ 238c2ecf20Sopenharmony_ci if ((size) < KARATSUBA_THRESHOLD) \ 248c2ecf20Sopenharmony_ci mul_n_basecase(prodp, up, vp, size); \ 258c2ecf20Sopenharmony_ci else \ 268c2ecf20Sopenharmony_ci mul_n(prodp, up, vp, size, tspace); \ 278c2ecf20Sopenharmony_ci } while (0); 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ 308c2ecf20Sopenharmony_ci do { \ 318c2ecf20Sopenharmony_ci if ((size) < KARATSUBA_THRESHOLD) \ 328c2ecf20Sopenharmony_ci mpih_sqr_n_basecase(prodp, up, size); \ 338c2ecf20Sopenharmony_ci else \ 348c2ecf20Sopenharmony_ci mpih_sqr_n(prodp, up, size, tspace); \ 358c2ecf20Sopenharmony_ci } while (0); 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), 388c2ecf20Sopenharmony_ci * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are 398c2ecf20Sopenharmony_ci * always stored. Return the most significant limb. 408c2ecf20Sopenharmony_ci * 418c2ecf20Sopenharmony_ci * Argument constraints: 428c2ecf20Sopenharmony_ci * 1. PRODP != UP and PRODP != VP, i.e. the destination 438c2ecf20Sopenharmony_ci * must be distinct from the multiplier and the multiplicand. 448c2ecf20Sopenharmony_ci * 458c2ecf20Sopenharmony_ci * 468c2ecf20Sopenharmony_ci * Handle simple cases with traditional multiplication. 478c2ecf20Sopenharmony_ci * 488c2ecf20Sopenharmony_ci * This is the most critical code of multiplication. All multiplies rely 498c2ecf20Sopenharmony_ci * on this, both small and huge. Small ones arrive here immediately. Huge 508c2ecf20Sopenharmony_ci * ones arrive here as this is the base case for Karatsuba's recursive 518c2ecf20Sopenharmony_ci * algorithm below. 528c2ecf20Sopenharmony_ci */ 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cistatic mpi_limb_t 558c2ecf20Sopenharmony_cimul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) 568c2ecf20Sopenharmony_ci{ 578c2ecf20Sopenharmony_ci mpi_size_t i; 588c2ecf20Sopenharmony_ci mpi_limb_t cy; 598c2ecf20Sopenharmony_ci mpi_limb_t v_limb; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci /* Multiply by the first limb in V separately, as the result can be 628c2ecf20Sopenharmony_ci * stored (not added) to PROD. We also avoid a loop for zeroing. */ 638c2ecf20Sopenharmony_ci v_limb = vp[0]; 648c2ecf20Sopenharmony_ci if (v_limb <= 1) { 658c2ecf20Sopenharmony_ci if (v_limb == 1) 668c2ecf20Sopenharmony_ci MPN_COPY(prodp, up, size); 678c2ecf20Sopenharmony_ci else 688c2ecf20Sopenharmony_ci MPN_ZERO(prodp, size); 698c2ecf20Sopenharmony_ci cy = 0; 708c2ecf20Sopenharmony_ci } else 718c2ecf20Sopenharmony_ci cy = mpihelp_mul_1(prodp, up, size, v_limb); 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci prodp[size] = cy; 748c2ecf20Sopenharmony_ci prodp++; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci /* For each iteration in the outer loop, multiply one limb from 778c2ecf20Sopenharmony_ci * U with one limb from V, and add it to PROD. */ 788c2ecf20Sopenharmony_ci for (i = 1; i < size; i++) { 798c2ecf20Sopenharmony_ci v_limb = vp[i]; 808c2ecf20Sopenharmony_ci if (v_limb <= 1) { 818c2ecf20Sopenharmony_ci cy = 0; 828c2ecf20Sopenharmony_ci if (v_limb == 1) 838c2ecf20Sopenharmony_ci cy = mpihelp_add_n(prodp, prodp, up, size); 848c2ecf20Sopenharmony_ci } else 858c2ecf20Sopenharmony_ci cy = mpihelp_addmul_1(prodp, up, size, v_limb); 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci prodp[size] = cy; 888c2ecf20Sopenharmony_ci prodp++; 898c2ecf20Sopenharmony_ci } 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci return cy; 928c2ecf20Sopenharmony_ci} 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_cistatic void 958c2ecf20Sopenharmony_cimul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, 968c2ecf20Sopenharmony_ci mpi_size_t size, mpi_ptr_t tspace) 978c2ecf20Sopenharmony_ci{ 988c2ecf20Sopenharmony_ci if (size & 1) { 998c2ecf20Sopenharmony_ci /* The size is odd, and the code below doesn't handle that. 1008c2ecf20Sopenharmony_ci * Multiply the least significant (size - 1) limbs with a recursive 1018c2ecf20Sopenharmony_ci * call, and handle the most significant limb of S1 and S2 1028c2ecf20Sopenharmony_ci * separately. 1038c2ecf20Sopenharmony_ci * A slightly faster way to do this would be to make the Karatsuba 1048c2ecf20Sopenharmony_ci * code below behave as if the size were even, and let it check for 1058c2ecf20Sopenharmony_ci * odd size in the end. I.e., in essence move this code to the end. 1068c2ecf20Sopenharmony_ci * Doing so would save us a recursive call, and potentially make the 1078c2ecf20Sopenharmony_ci * stack grow a lot less. 1088c2ecf20Sopenharmony_ci */ 1098c2ecf20Sopenharmony_ci mpi_size_t esize = size - 1; /* even size */ 1108c2ecf20Sopenharmony_ci mpi_limb_t cy_limb; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); 1138c2ecf20Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); 1148c2ecf20Sopenharmony_ci prodp[esize + esize] = cy_limb; 1158c2ecf20Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); 1168c2ecf20Sopenharmony_ci prodp[esize + size] = cy_limb; 1178c2ecf20Sopenharmony_ci } else { 1188c2ecf20Sopenharmony_ci /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. 1198c2ecf20Sopenharmony_ci * 1208c2ecf20Sopenharmony_ci * Split U in two pieces, U1 and U0, such that 1218c2ecf20Sopenharmony_ci * U = U0 + U1*(B**n), 1228c2ecf20Sopenharmony_ci * and V in V1 and V0, such that 1238c2ecf20Sopenharmony_ci * V = V0 + V1*(B**n). 1248c2ecf20Sopenharmony_ci * 1258c2ecf20Sopenharmony_ci * UV is then computed recursively using the identity 1268c2ecf20Sopenharmony_ci * 1278c2ecf20Sopenharmony_ci * 2n n n n 1288c2ecf20Sopenharmony_ci * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V 1298c2ecf20Sopenharmony_ci * 1 1 1 0 0 1 0 0 1308c2ecf20Sopenharmony_ci * 1318c2ecf20Sopenharmony_ci * Where B = 2**BITS_PER_MP_LIMB. 1328c2ecf20Sopenharmony_ci */ 1338c2ecf20Sopenharmony_ci mpi_size_t hsize = size >> 1; 1348c2ecf20Sopenharmony_ci mpi_limb_t cy; 1358c2ecf20Sopenharmony_ci int negflg; 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci /* Product H. ________________ ________________ 1388c2ecf20Sopenharmony_ci * |_____U1 x V1____||____U0 x V0_____| 1398c2ecf20Sopenharmony_ci * Put result in upper part of PROD and pass low part of TSPACE 1408c2ecf20Sopenharmony_ci * as new TSPACE. 1418c2ecf20Sopenharmony_ci */ 1428c2ecf20Sopenharmony_ci MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, 1438c2ecf20Sopenharmony_ci tspace); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci /* Product M. ________________ 1468c2ecf20Sopenharmony_ci * |_(U1-U0)(V0-V1)_| 1478c2ecf20Sopenharmony_ci */ 1488c2ecf20Sopenharmony_ci if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { 1498c2ecf20Sopenharmony_ci mpihelp_sub_n(prodp, up + hsize, up, hsize); 1508c2ecf20Sopenharmony_ci negflg = 0; 1518c2ecf20Sopenharmony_ci } else { 1528c2ecf20Sopenharmony_ci mpihelp_sub_n(prodp, up, up + hsize, hsize); 1538c2ecf20Sopenharmony_ci negflg = 1; 1548c2ecf20Sopenharmony_ci } 1558c2ecf20Sopenharmony_ci if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { 1568c2ecf20Sopenharmony_ci mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); 1578c2ecf20Sopenharmony_ci negflg ^= 1; 1588c2ecf20Sopenharmony_ci } else { 1598c2ecf20Sopenharmony_ci mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); 1608c2ecf20Sopenharmony_ci /* No change of NEGFLG. */ 1618c2ecf20Sopenharmony_ci } 1628c2ecf20Sopenharmony_ci /* Read temporary operands from low part of PROD. 1638c2ecf20Sopenharmony_ci * Put result in low part of TSPACE using upper part of TSPACE 1648c2ecf20Sopenharmony_ci * as new TSPACE. 1658c2ecf20Sopenharmony_ci */ 1668c2ecf20Sopenharmony_ci MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, 1678c2ecf20Sopenharmony_ci tspace + size); 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci /* Add/copy product H. */ 1708c2ecf20Sopenharmony_ci MPN_COPY(prodp + hsize, prodp + size, hsize); 1718c2ecf20Sopenharmony_ci cy = mpihelp_add_n(prodp + size, prodp + size, 1728c2ecf20Sopenharmony_ci prodp + size + hsize, hsize); 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci /* Add product M (if NEGFLG M is a negative number) */ 1758c2ecf20Sopenharmony_ci if (negflg) 1768c2ecf20Sopenharmony_ci cy -= 1778c2ecf20Sopenharmony_ci mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, 1788c2ecf20Sopenharmony_ci size); 1798c2ecf20Sopenharmony_ci else 1808c2ecf20Sopenharmony_ci cy += 1818c2ecf20Sopenharmony_ci mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, 1828c2ecf20Sopenharmony_ci size); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci /* Product L. ________________ ________________ 1858c2ecf20Sopenharmony_ci * |________________||____U0 x V0_____| 1868c2ecf20Sopenharmony_ci * Read temporary operands from low part of PROD. 1878c2ecf20Sopenharmony_ci * Put result in low part of TSPACE using upper part of TSPACE 1888c2ecf20Sopenharmony_ci * as new TSPACE. 1898c2ecf20Sopenharmony_ci */ 1908c2ecf20Sopenharmony_ci MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci /* Add/copy Product L (twice) */ 1938c2ecf20Sopenharmony_ci 1948c2ecf20Sopenharmony_ci cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); 1958c2ecf20Sopenharmony_ci if (cy) 1968c2ecf20Sopenharmony_ci mpihelp_add_1(prodp + hsize + size, 1978c2ecf20Sopenharmony_ci prodp + hsize + size, hsize, cy); 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci MPN_COPY(prodp, tspace, hsize); 2008c2ecf20Sopenharmony_ci cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, 2018c2ecf20Sopenharmony_ci hsize); 2028c2ecf20Sopenharmony_ci if (cy) 2038c2ecf20Sopenharmony_ci mpihelp_add_1(prodp + size, prodp + size, size, 1); 2048c2ecf20Sopenharmony_ci } 2058c2ecf20Sopenharmony_ci} 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_civoid mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) 2088c2ecf20Sopenharmony_ci{ 2098c2ecf20Sopenharmony_ci mpi_size_t i; 2108c2ecf20Sopenharmony_ci mpi_limb_t cy_limb; 2118c2ecf20Sopenharmony_ci mpi_limb_t v_limb; 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_ci /* Multiply by the first limb in V separately, as the result can be 2148c2ecf20Sopenharmony_ci * stored (not added) to PROD. We also avoid a loop for zeroing. */ 2158c2ecf20Sopenharmony_ci v_limb = up[0]; 2168c2ecf20Sopenharmony_ci if (v_limb <= 1) { 2178c2ecf20Sopenharmony_ci if (v_limb == 1) 2188c2ecf20Sopenharmony_ci MPN_COPY(prodp, up, size); 2198c2ecf20Sopenharmony_ci else 2208c2ecf20Sopenharmony_ci MPN_ZERO(prodp, size); 2218c2ecf20Sopenharmony_ci cy_limb = 0; 2228c2ecf20Sopenharmony_ci } else 2238c2ecf20Sopenharmony_ci cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci prodp[size] = cy_limb; 2268c2ecf20Sopenharmony_ci prodp++; 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci /* For each iteration in the outer loop, multiply one limb from 2298c2ecf20Sopenharmony_ci * U with one limb from V, and add it to PROD. */ 2308c2ecf20Sopenharmony_ci for (i = 1; i < size; i++) { 2318c2ecf20Sopenharmony_ci v_limb = up[i]; 2328c2ecf20Sopenharmony_ci if (v_limb <= 1) { 2338c2ecf20Sopenharmony_ci cy_limb = 0; 2348c2ecf20Sopenharmony_ci if (v_limb == 1) 2358c2ecf20Sopenharmony_ci cy_limb = mpihelp_add_n(prodp, prodp, up, size); 2368c2ecf20Sopenharmony_ci } else 2378c2ecf20Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci prodp[size] = cy_limb; 2408c2ecf20Sopenharmony_ci prodp++; 2418c2ecf20Sopenharmony_ci } 2428c2ecf20Sopenharmony_ci} 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_civoid 2458c2ecf20Sopenharmony_cimpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) 2468c2ecf20Sopenharmony_ci{ 2478c2ecf20Sopenharmony_ci if (size & 1) { 2488c2ecf20Sopenharmony_ci /* The size is odd, and the code below doesn't handle that. 2498c2ecf20Sopenharmony_ci * Multiply the least significant (size - 1) limbs with a recursive 2508c2ecf20Sopenharmony_ci * call, and handle the most significant limb of S1 and S2 2518c2ecf20Sopenharmony_ci * separately. 2528c2ecf20Sopenharmony_ci * A slightly faster way to do this would be to make the Karatsuba 2538c2ecf20Sopenharmony_ci * code below behave as if the size were even, and let it check for 2548c2ecf20Sopenharmony_ci * odd size in the end. I.e., in essence move this code to the end. 2558c2ecf20Sopenharmony_ci * Doing so would save us a recursive call, and potentially make the 2568c2ecf20Sopenharmony_ci * stack grow a lot less. 2578c2ecf20Sopenharmony_ci */ 2588c2ecf20Sopenharmony_ci mpi_size_t esize = size - 1; /* even size */ 2598c2ecf20Sopenharmony_ci mpi_limb_t cy_limb; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci MPN_SQR_N_RECURSE(prodp, up, esize, tspace); 2628c2ecf20Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); 2638c2ecf20Sopenharmony_ci prodp[esize + esize] = cy_limb; 2648c2ecf20Sopenharmony_ci cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci prodp[esize + size] = cy_limb; 2678c2ecf20Sopenharmony_ci } else { 2688c2ecf20Sopenharmony_ci mpi_size_t hsize = size >> 1; 2698c2ecf20Sopenharmony_ci mpi_limb_t cy; 2708c2ecf20Sopenharmony_ci 2718c2ecf20Sopenharmony_ci /* Product H. ________________ ________________ 2728c2ecf20Sopenharmony_ci * |_____U1 x U1____||____U0 x U0_____| 2738c2ecf20Sopenharmony_ci * Put result in upper part of PROD and pass low part of TSPACE 2748c2ecf20Sopenharmony_ci * as new TSPACE. 2758c2ecf20Sopenharmony_ci */ 2768c2ecf20Sopenharmony_ci MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci /* Product M. ________________ 2798c2ecf20Sopenharmony_ci * |_(U1-U0)(U0-U1)_| 2808c2ecf20Sopenharmony_ci */ 2818c2ecf20Sopenharmony_ci if (mpihelp_cmp(up + hsize, up, hsize) >= 0) 2828c2ecf20Sopenharmony_ci mpihelp_sub_n(prodp, up + hsize, up, hsize); 2838c2ecf20Sopenharmony_ci else 2848c2ecf20Sopenharmony_ci mpihelp_sub_n(prodp, up, up + hsize, hsize); 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci /* Read temporary operands from low part of PROD. 2878c2ecf20Sopenharmony_ci * Put result in low part of TSPACE using upper part of TSPACE 2888c2ecf20Sopenharmony_ci * as new TSPACE. */ 2898c2ecf20Sopenharmony_ci MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci /* Add/copy product H */ 2928c2ecf20Sopenharmony_ci MPN_COPY(prodp + hsize, prodp + size, hsize); 2938c2ecf20Sopenharmony_ci cy = mpihelp_add_n(prodp + size, prodp + size, 2948c2ecf20Sopenharmony_ci prodp + size + hsize, hsize); 2958c2ecf20Sopenharmony_ci 2968c2ecf20Sopenharmony_ci /* Add product M (if NEGFLG M is a negative number). */ 2978c2ecf20Sopenharmony_ci cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci /* Product L. ________________ ________________ 3008c2ecf20Sopenharmony_ci * |________________||____U0 x U0_____| 3018c2ecf20Sopenharmony_ci * Read temporary operands from low part of PROD. 3028c2ecf20Sopenharmony_ci * Put result in low part of TSPACE using upper part of TSPACE 3038c2ecf20Sopenharmony_ci * as new TSPACE. */ 3048c2ecf20Sopenharmony_ci MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci /* Add/copy Product L (twice). */ 3078c2ecf20Sopenharmony_ci cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); 3088c2ecf20Sopenharmony_ci if (cy) 3098c2ecf20Sopenharmony_ci mpihelp_add_1(prodp + hsize + size, 3108c2ecf20Sopenharmony_ci prodp + hsize + size, hsize, cy); 3118c2ecf20Sopenharmony_ci 3128c2ecf20Sopenharmony_ci MPN_COPY(prodp, tspace, hsize); 3138c2ecf20Sopenharmony_ci cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, 3148c2ecf20Sopenharmony_ci hsize); 3158c2ecf20Sopenharmony_ci if (cy) 3168c2ecf20Sopenharmony_ci mpihelp_add_1(prodp + size, prodp + size, size, 1); 3178c2ecf20Sopenharmony_ci } 3188c2ecf20Sopenharmony_ci} 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_civoid mpihelp_mul_n(mpi_ptr_t prodp, 3228c2ecf20Sopenharmony_ci mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) 3238c2ecf20Sopenharmony_ci{ 3248c2ecf20Sopenharmony_ci if (up == vp) { 3258c2ecf20Sopenharmony_ci if (size < KARATSUBA_THRESHOLD) 3268c2ecf20Sopenharmony_ci mpih_sqr_n_basecase(prodp, up, size); 3278c2ecf20Sopenharmony_ci else { 3288c2ecf20Sopenharmony_ci mpi_ptr_t tspace; 3298c2ecf20Sopenharmony_ci tspace = mpi_alloc_limb_space(2 * size); 3308c2ecf20Sopenharmony_ci mpih_sqr_n(prodp, up, size, tspace); 3318c2ecf20Sopenharmony_ci mpi_free_limb_space(tspace); 3328c2ecf20Sopenharmony_ci } 3338c2ecf20Sopenharmony_ci } else { 3348c2ecf20Sopenharmony_ci if (size < KARATSUBA_THRESHOLD) 3358c2ecf20Sopenharmony_ci mul_n_basecase(prodp, up, vp, size); 3368c2ecf20Sopenharmony_ci else { 3378c2ecf20Sopenharmony_ci mpi_ptr_t tspace; 3388c2ecf20Sopenharmony_ci tspace = mpi_alloc_limb_space(2 * size); 3398c2ecf20Sopenharmony_ci mul_n(prodp, up, vp, size, tspace); 3408c2ecf20Sopenharmony_ci mpi_free_limb_space(tspace); 3418c2ecf20Sopenharmony_ci } 3428c2ecf20Sopenharmony_ci } 3438c2ecf20Sopenharmony_ci} 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ciint 3468c2ecf20Sopenharmony_cimpihelp_mul_karatsuba_case(mpi_ptr_t prodp, 3478c2ecf20Sopenharmony_ci mpi_ptr_t up, mpi_size_t usize, 3488c2ecf20Sopenharmony_ci mpi_ptr_t vp, mpi_size_t vsize, 3498c2ecf20Sopenharmony_ci struct karatsuba_ctx *ctx) 3508c2ecf20Sopenharmony_ci{ 3518c2ecf20Sopenharmony_ci mpi_limb_t cy; 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci if (!ctx->tspace || ctx->tspace_size < vsize) { 3548c2ecf20Sopenharmony_ci if (ctx->tspace) 3558c2ecf20Sopenharmony_ci mpi_free_limb_space(ctx->tspace); 3568c2ecf20Sopenharmony_ci ctx->tspace = mpi_alloc_limb_space(2 * vsize); 3578c2ecf20Sopenharmony_ci if (!ctx->tspace) 3588c2ecf20Sopenharmony_ci return -ENOMEM; 3598c2ecf20Sopenharmony_ci ctx->tspace_size = vsize; 3608c2ecf20Sopenharmony_ci } 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci prodp += vsize; 3658c2ecf20Sopenharmony_ci up += vsize; 3668c2ecf20Sopenharmony_ci usize -= vsize; 3678c2ecf20Sopenharmony_ci if (usize >= vsize) { 3688c2ecf20Sopenharmony_ci if (!ctx->tp || ctx->tp_size < vsize) { 3698c2ecf20Sopenharmony_ci if (ctx->tp) 3708c2ecf20Sopenharmony_ci mpi_free_limb_space(ctx->tp); 3718c2ecf20Sopenharmony_ci ctx->tp = mpi_alloc_limb_space(2 * vsize); 3728c2ecf20Sopenharmony_ci if (!ctx->tp) { 3738c2ecf20Sopenharmony_ci if (ctx->tspace) 3748c2ecf20Sopenharmony_ci mpi_free_limb_space(ctx->tspace); 3758c2ecf20Sopenharmony_ci ctx->tspace = NULL; 3768c2ecf20Sopenharmony_ci return -ENOMEM; 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci ctx->tp_size = vsize; 3798c2ecf20Sopenharmony_ci } 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci do { 3828c2ecf20Sopenharmony_ci MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); 3838c2ecf20Sopenharmony_ci cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); 3848c2ecf20Sopenharmony_ci mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, 3858c2ecf20Sopenharmony_ci cy); 3868c2ecf20Sopenharmony_ci prodp += vsize; 3878c2ecf20Sopenharmony_ci up += vsize; 3888c2ecf20Sopenharmony_ci usize -= vsize; 3898c2ecf20Sopenharmony_ci } while (usize >= vsize); 3908c2ecf20Sopenharmony_ci } 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci if (usize) { 3938c2ecf20Sopenharmony_ci if (usize < KARATSUBA_THRESHOLD) { 3948c2ecf20Sopenharmony_ci mpi_limb_t tmp; 3958c2ecf20Sopenharmony_ci if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) 3968c2ecf20Sopenharmony_ci < 0) 3978c2ecf20Sopenharmony_ci return -ENOMEM; 3988c2ecf20Sopenharmony_ci } else { 3998c2ecf20Sopenharmony_ci if (!ctx->next) { 4008c2ecf20Sopenharmony_ci ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); 4018c2ecf20Sopenharmony_ci if (!ctx->next) 4028c2ecf20Sopenharmony_ci return -ENOMEM; 4038c2ecf20Sopenharmony_ci } 4048c2ecf20Sopenharmony_ci if (mpihelp_mul_karatsuba_case(ctx->tspace, 4058c2ecf20Sopenharmony_ci vp, vsize, 4068c2ecf20Sopenharmony_ci up, usize, 4078c2ecf20Sopenharmony_ci ctx->next) < 0) 4088c2ecf20Sopenharmony_ci return -ENOMEM; 4098c2ecf20Sopenharmony_ci } 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); 4128c2ecf20Sopenharmony_ci mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci return 0; 4168c2ecf20Sopenharmony_ci} 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_civoid mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) 4198c2ecf20Sopenharmony_ci{ 4208c2ecf20Sopenharmony_ci struct karatsuba_ctx *ctx2; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci if (ctx->tp) 4238c2ecf20Sopenharmony_ci mpi_free_limb_space(ctx->tp); 4248c2ecf20Sopenharmony_ci if (ctx->tspace) 4258c2ecf20Sopenharmony_ci mpi_free_limb_space(ctx->tspace); 4268c2ecf20Sopenharmony_ci for (ctx = ctx->next; ctx; ctx = ctx2) { 4278c2ecf20Sopenharmony_ci ctx2 = ctx->next; 4288c2ecf20Sopenharmony_ci if (ctx->tp) 4298c2ecf20Sopenharmony_ci mpi_free_limb_space(ctx->tp); 4308c2ecf20Sopenharmony_ci if (ctx->tspace) 4318c2ecf20Sopenharmony_ci mpi_free_limb_space(ctx->tspace); 4328c2ecf20Sopenharmony_ci kfree(ctx); 4338c2ecf20Sopenharmony_ci } 4348c2ecf20Sopenharmony_ci} 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) 4378c2ecf20Sopenharmony_ci * and v (pointed to by VP, with VSIZE limbs), and store the result at 4388c2ecf20Sopenharmony_ci * PRODP. USIZE + VSIZE limbs are always stored, but if the input 4398c2ecf20Sopenharmony_ci * operands are normalized. Return the most significant limb of the 4408c2ecf20Sopenharmony_ci * result. 4418c2ecf20Sopenharmony_ci * 4428c2ecf20Sopenharmony_ci * NOTE: The space pointed to by PRODP is overwritten before finished 4438c2ecf20Sopenharmony_ci * with U and V, so overlap is an error. 4448c2ecf20Sopenharmony_ci * 4458c2ecf20Sopenharmony_ci * Argument constraints: 4468c2ecf20Sopenharmony_ci * 1. USIZE >= VSIZE. 4478c2ecf20Sopenharmony_ci * 2. PRODP != UP and PRODP != VP, i.e. the destination 4488c2ecf20Sopenharmony_ci * must be distinct from the multiplier and the multiplicand. 4498c2ecf20Sopenharmony_ci */ 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ciint 4528c2ecf20Sopenharmony_cimpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, 4538c2ecf20Sopenharmony_ci mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) 4548c2ecf20Sopenharmony_ci{ 4558c2ecf20Sopenharmony_ci mpi_ptr_t prod_endp = prodp + usize + vsize - 1; 4568c2ecf20Sopenharmony_ci mpi_limb_t cy; 4578c2ecf20Sopenharmony_ci struct karatsuba_ctx ctx; 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci if (vsize < KARATSUBA_THRESHOLD) { 4608c2ecf20Sopenharmony_ci mpi_size_t i; 4618c2ecf20Sopenharmony_ci mpi_limb_t v_limb; 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ci if (!vsize) { 4648c2ecf20Sopenharmony_ci *_result = 0; 4658c2ecf20Sopenharmony_ci return 0; 4668c2ecf20Sopenharmony_ci } 4678c2ecf20Sopenharmony_ci 4688c2ecf20Sopenharmony_ci /* Multiply by the first limb in V separately, as the result can be 4698c2ecf20Sopenharmony_ci * stored (not added) to PROD. We also avoid a loop for zeroing. */ 4708c2ecf20Sopenharmony_ci v_limb = vp[0]; 4718c2ecf20Sopenharmony_ci if (v_limb <= 1) { 4728c2ecf20Sopenharmony_ci if (v_limb == 1) 4738c2ecf20Sopenharmony_ci MPN_COPY(prodp, up, usize); 4748c2ecf20Sopenharmony_ci else 4758c2ecf20Sopenharmony_ci MPN_ZERO(prodp, usize); 4768c2ecf20Sopenharmony_ci cy = 0; 4778c2ecf20Sopenharmony_ci } else 4788c2ecf20Sopenharmony_ci cy = mpihelp_mul_1(prodp, up, usize, v_limb); 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ci prodp[usize] = cy; 4818c2ecf20Sopenharmony_ci prodp++; 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci /* For each iteration in the outer loop, multiply one limb from 4848c2ecf20Sopenharmony_ci * U with one limb from V, and add it to PROD. */ 4858c2ecf20Sopenharmony_ci for (i = 1; i < vsize; i++) { 4868c2ecf20Sopenharmony_ci v_limb = vp[i]; 4878c2ecf20Sopenharmony_ci if (v_limb <= 1) { 4888c2ecf20Sopenharmony_ci cy = 0; 4898c2ecf20Sopenharmony_ci if (v_limb == 1) 4908c2ecf20Sopenharmony_ci cy = mpihelp_add_n(prodp, prodp, up, 4918c2ecf20Sopenharmony_ci usize); 4928c2ecf20Sopenharmony_ci } else 4938c2ecf20Sopenharmony_ci cy = mpihelp_addmul_1(prodp, up, usize, v_limb); 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci prodp[usize] = cy; 4968c2ecf20Sopenharmony_ci prodp++; 4978c2ecf20Sopenharmony_ci } 4988c2ecf20Sopenharmony_ci 4998c2ecf20Sopenharmony_ci *_result = cy; 5008c2ecf20Sopenharmony_ci return 0; 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci 5038c2ecf20Sopenharmony_ci memset(&ctx, 0, sizeof ctx); 5048c2ecf20Sopenharmony_ci if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) 5058c2ecf20Sopenharmony_ci return -ENOMEM; 5068c2ecf20Sopenharmony_ci mpihelp_release_karatsuba_ctx(&ctx); 5078c2ecf20Sopenharmony_ci *_result = *prod_endp; 5088c2ecf20Sopenharmony_ci return 0; 5098c2ecf20Sopenharmony_ci} 510