xref: /kernel/linux/linux-5.10/lib/mpi/mpih-mul.c (revision 8c2ecf20)
18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/* mpihelp-mul.c  -  MPI helper functions
38c2ecf20Sopenharmony_ci * Copyright (C) 1994, 1996, 1998, 1999,
48c2ecf20Sopenharmony_ci *               2000 Free Software Foundation, Inc.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * This file is part of GnuPG.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Note: This code is heavily based on the GNU MP Library.
98c2ecf20Sopenharmony_ci *	 Actually it's the same code with only minor changes in the
108c2ecf20Sopenharmony_ci *	 way the data is stored; this is to support the abstraction
118c2ecf20Sopenharmony_ci *	 of an optional secure memory allocation which may be used
128c2ecf20Sopenharmony_ci *	 to avoid revealing of sensitive data due to paging etc.
138c2ecf20Sopenharmony_ci *	 The GNU MP Library itself is published under the LGPL;
148c2ecf20Sopenharmony_ci *	 however I decided to publish this code under the plain GPL.
158c2ecf20Sopenharmony_ci */
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#include <linux/string.h>
188c2ecf20Sopenharmony_ci#include "mpi-internal.h"
198c2ecf20Sopenharmony_ci#include "longlong.h"
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)		\
228c2ecf20Sopenharmony_ci	do {							\
238c2ecf20Sopenharmony_ci		if ((size) < KARATSUBA_THRESHOLD)		\
248c2ecf20Sopenharmony_ci			mul_n_basecase(prodp, up, vp, size);	\
258c2ecf20Sopenharmony_ci		else						\
268c2ecf20Sopenharmony_ci			mul_n(prodp, up, vp, size, tspace);	\
278c2ecf20Sopenharmony_ci	} while (0);
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)		\
308c2ecf20Sopenharmony_ci	do {							\
318c2ecf20Sopenharmony_ci		if ((size) < KARATSUBA_THRESHOLD)		\
328c2ecf20Sopenharmony_ci			mpih_sqr_n_basecase(prodp, up, size);	\
338c2ecf20Sopenharmony_ci		else						\
348c2ecf20Sopenharmony_ci			mpih_sqr_n(prodp, up, size, tspace);	\
358c2ecf20Sopenharmony_ci	} while (0);
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
388c2ecf20Sopenharmony_ci * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
398c2ecf20Sopenharmony_ci * always stored.  Return the most significant limb.
408c2ecf20Sopenharmony_ci *
418c2ecf20Sopenharmony_ci * Argument constraints:
428c2ecf20Sopenharmony_ci * 1. PRODP != UP and PRODP != VP, i.e. the destination
438c2ecf20Sopenharmony_ci *    must be distinct from the multiplier and the multiplicand.
448c2ecf20Sopenharmony_ci *
458c2ecf20Sopenharmony_ci *
468c2ecf20Sopenharmony_ci * Handle simple cases with traditional multiplication.
478c2ecf20Sopenharmony_ci *
488c2ecf20Sopenharmony_ci * This is the most critical code of multiplication.  All multiplies rely
498c2ecf20Sopenharmony_ci * on this, both small and huge.  Small ones arrive here immediately.  Huge
508c2ecf20Sopenharmony_ci * ones arrive here as this is the base case for Karatsuba's recursive
518c2ecf20Sopenharmony_ci * algorithm below.
528c2ecf20Sopenharmony_ci */
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistatic mpi_limb_t
558c2ecf20Sopenharmony_cimul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
568c2ecf20Sopenharmony_ci{
578c2ecf20Sopenharmony_ci	mpi_size_t i;
588c2ecf20Sopenharmony_ci	mpi_limb_t cy;
598c2ecf20Sopenharmony_ci	mpi_limb_t v_limb;
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	/* Multiply by the first limb in V separately, as the result can be
628c2ecf20Sopenharmony_ci	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
638c2ecf20Sopenharmony_ci	v_limb = vp[0];
648c2ecf20Sopenharmony_ci	if (v_limb <= 1) {
658c2ecf20Sopenharmony_ci		if (v_limb == 1)
668c2ecf20Sopenharmony_ci			MPN_COPY(prodp, up, size);
678c2ecf20Sopenharmony_ci		else
688c2ecf20Sopenharmony_ci			MPN_ZERO(prodp, size);
698c2ecf20Sopenharmony_ci		cy = 0;
708c2ecf20Sopenharmony_ci	} else
718c2ecf20Sopenharmony_ci		cy = mpihelp_mul_1(prodp, up, size, v_limb);
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	prodp[size] = cy;
748c2ecf20Sopenharmony_ci	prodp++;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	/* For each iteration in the outer loop, multiply one limb from
778c2ecf20Sopenharmony_ci	 * U with one limb from V, and add it to PROD.  */
788c2ecf20Sopenharmony_ci	for (i = 1; i < size; i++) {
798c2ecf20Sopenharmony_ci		v_limb = vp[i];
808c2ecf20Sopenharmony_ci		if (v_limb <= 1) {
818c2ecf20Sopenharmony_ci			cy = 0;
828c2ecf20Sopenharmony_ci			if (v_limb == 1)
838c2ecf20Sopenharmony_ci				cy = mpihelp_add_n(prodp, prodp, up, size);
848c2ecf20Sopenharmony_ci		} else
858c2ecf20Sopenharmony_ci			cy = mpihelp_addmul_1(prodp, up, size, v_limb);
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci		prodp[size] = cy;
888c2ecf20Sopenharmony_ci		prodp++;
898c2ecf20Sopenharmony_ci	}
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	return cy;
928c2ecf20Sopenharmony_ci}
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_cistatic void
958c2ecf20Sopenharmony_cimul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
968c2ecf20Sopenharmony_ci		mpi_size_t size, mpi_ptr_t tspace)
978c2ecf20Sopenharmony_ci{
988c2ecf20Sopenharmony_ci	if (size & 1) {
998c2ecf20Sopenharmony_ci		/* The size is odd, and the code below doesn't handle that.
1008c2ecf20Sopenharmony_ci		 * Multiply the least significant (size - 1) limbs with a recursive
1018c2ecf20Sopenharmony_ci		 * call, and handle the most significant limb of S1 and S2
1028c2ecf20Sopenharmony_ci		 * separately.
1038c2ecf20Sopenharmony_ci		 * A slightly faster way to do this would be to make the Karatsuba
1048c2ecf20Sopenharmony_ci		 * code below behave as if the size were even, and let it check for
1058c2ecf20Sopenharmony_ci		 * odd size in the end.  I.e., in essence move this code to the end.
1068c2ecf20Sopenharmony_ci		 * Doing so would save us a recursive call, and potentially make the
1078c2ecf20Sopenharmony_ci		 * stack grow a lot less.
1088c2ecf20Sopenharmony_ci		 */
1098c2ecf20Sopenharmony_ci		mpi_size_t esize = size - 1;	/* even size */
1108c2ecf20Sopenharmony_ci		mpi_limb_t cy_limb;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci		MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
1138c2ecf20Sopenharmony_ci		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
1148c2ecf20Sopenharmony_ci		prodp[esize + esize] = cy_limb;
1158c2ecf20Sopenharmony_ci		cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
1168c2ecf20Sopenharmony_ci		prodp[esize + size] = cy_limb;
1178c2ecf20Sopenharmony_ci	} else {
1188c2ecf20Sopenharmony_ci		/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
1198c2ecf20Sopenharmony_ci		 *
1208c2ecf20Sopenharmony_ci		 * Split U in two pieces, U1 and U0, such that
1218c2ecf20Sopenharmony_ci		 * U = U0 + U1*(B**n),
1228c2ecf20Sopenharmony_ci		 * and V in V1 and V0, such that
1238c2ecf20Sopenharmony_ci		 * V = V0 + V1*(B**n).
1248c2ecf20Sopenharmony_ci		 *
1258c2ecf20Sopenharmony_ci		 * UV is then computed recursively using the identity
1268c2ecf20Sopenharmony_ci		 *
1278c2ecf20Sopenharmony_ci		 *        2n   n          n                     n
1288c2ecf20Sopenharmony_ci		 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
1298c2ecf20Sopenharmony_ci		 *                1 1        1  0   0  1              0 0
1308c2ecf20Sopenharmony_ci		 *
1318c2ecf20Sopenharmony_ci		 * Where B = 2**BITS_PER_MP_LIMB.
1328c2ecf20Sopenharmony_ci		 */
1338c2ecf20Sopenharmony_ci		mpi_size_t hsize = size >> 1;
1348c2ecf20Sopenharmony_ci		mpi_limb_t cy;
1358c2ecf20Sopenharmony_ci		int negflg;
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci		/* Product H.      ________________  ________________
1388c2ecf20Sopenharmony_ci		 *                |_____U1 x V1____||____U0 x V0_____|
1398c2ecf20Sopenharmony_ci		 * Put result in upper part of PROD and pass low part of TSPACE
1408c2ecf20Sopenharmony_ci		 * as new TSPACE.
1418c2ecf20Sopenharmony_ci		 */
1428c2ecf20Sopenharmony_ci		MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
1438c2ecf20Sopenharmony_ci				  tspace);
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci		/* Product M.      ________________
1468c2ecf20Sopenharmony_ci		 *                |_(U1-U0)(V0-V1)_|
1478c2ecf20Sopenharmony_ci		 */
1488c2ecf20Sopenharmony_ci		if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
1498c2ecf20Sopenharmony_ci			mpihelp_sub_n(prodp, up + hsize, up, hsize);
1508c2ecf20Sopenharmony_ci			negflg = 0;
1518c2ecf20Sopenharmony_ci		} else {
1528c2ecf20Sopenharmony_ci			mpihelp_sub_n(prodp, up, up + hsize, hsize);
1538c2ecf20Sopenharmony_ci			negflg = 1;
1548c2ecf20Sopenharmony_ci		}
1558c2ecf20Sopenharmony_ci		if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
1568c2ecf20Sopenharmony_ci			mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
1578c2ecf20Sopenharmony_ci			negflg ^= 1;
1588c2ecf20Sopenharmony_ci		} else {
1598c2ecf20Sopenharmony_ci			mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
1608c2ecf20Sopenharmony_ci			/* No change of NEGFLG.  */
1618c2ecf20Sopenharmony_ci		}
1628c2ecf20Sopenharmony_ci		/* Read temporary operands from low part of PROD.
1638c2ecf20Sopenharmony_ci		 * Put result in low part of TSPACE using upper part of TSPACE
1648c2ecf20Sopenharmony_ci		 * as new TSPACE.
1658c2ecf20Sopenharmony_ci		 */
1668c2ecf20Sopenharmony_ci		MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
1678c2ecf20Sopenharmony_ci				  tspace + size);
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci		/* Add/copy product H. */
1708c2ecf20Sopenharmony_ci		MPN_COPY(prodp + hsize, prodp + size, hsize);
1718c2ecf20Sopenharmony_ci		cy = mpihelp_add_n(prodp + size, prodp + size,
1728c2ecf20Sopenharmony_ci				   prodp + size + hsize, hsize);
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci		/* Add product M (if NEGFLG M is a negative number) */
1758c2ecf20Sopenharmony_ci		if (negflg)
1768c2ecf20Sopenharmony_ci			cy -=
1778c2ecf20Sopenharmony_ci			    mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
1788c2ecf20Sopenharmony_ci					  size);
1798c2ecf20Sopenharmony_ci		else
1808c2ecf20Sopenharmony_ci			cy +=
1818c2ecf20Sopenharmony_ci			    mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
1828c2ecf20Sopenharmony_ci					  size);
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci		/* Product L.      ________________  ________________
1858c2ecf20Sopenharmony_ci		 *                |________________||____U0 x V0_____|
1868c2ecf20Sopenharmony_ci		 * Read temporary operands from low part of PROD.
1878c2ecf20Sopenharmony_ci		 * Put result in low part of TSPACE using upper part of TSPACE
1888c2ecf20Sopenharmony_ci		 * as new TSPACE.
1898c2ecf20Sopenharmony_ci		 */
1908c2ecf20Sopenharmony_ci		MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci		/* Add/copy Product L (twice) */
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
1958c2ecf20Sopenharmony_ci		if (cy)
1968c2ecf20Sopenharmony_ci			mpihelp_add_1(prodp + hsize + size,
1978c2ecf20Sopenharmony_ci				      prodp + hsize + size, hsize, cy);
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci		MPN_COPY(prodp, tspace, hsize);
2008c2ecf20Sopenharmony_ci		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
2018c2ecf20Sopenharmony_ci				   hsize);
2028c2ecf20Sopenharmony_ci		if (cy)
2038c2ecf20Sopenharmony_ci			mpihelp_add_1(prodp + size, prodp + size, size, 1);
2048c2ecf20Sopenharmony_ci	}
2058c2ecf20Sopenharmony_ci}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_civoid mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
2088c2ecf20Sopenharmony_ci{
2098c2ecf20Sopenharmony_ci	mpi_size_t i;
2108c2ecf20Sopenharmony_ci	mpi_limb_t cy_limb;
2118c2ecf20Sopenharmony_ci	mpi_limb_t v_limb;
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	/* Multiply by the first limb in V separately, as the result can be
2148c2ecf20Sopenharmony_ci	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
2158c2ecf20Sopenharmony_ci	v_limb = up[0];
2168c2ecf20Sopenharmony_ci	if (v_limb <= 1) {
2178c2ecf20Sopenharmony_ci		if (v_limb == 1)
2188c2ecf20Sopenharmony_ci			MPN_COPY(prodp, up, size);
2198c2ecf20Sopenharmony_ci		else
2208c2ecf20Sopenharmony_ci			MPN_ZERO(prodp, size);
2218c2ecf20Sopenharmony_ci		cy_limb = 0;
2228c2ecf20Sopenharmony_ci	} else
2238c2ecf20Sopenharmony_ci		cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	prodp[size] = cy_limb;
2268c2ecf20Sopenharmony_ci	prodp++;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	/* For each iteration in the outer loop, multiply one limb from
2298c2ecf20Sopenharmony_ci	 * U with one limb from V, and add it to PROD.  */
2308c2ecf20Sopenharmony_ci	for (i = 1; i < size; i++) {
2318c2ecf20Sopenharmony_ci		v_limb = up[i];
2328c2ecf20Sopenharmony_ci		if (v_limb <= 1) {
2338c2ecf20Sopenharmony_ci			cy_limb = 0;
2348c2ecf20Sopenharmony_ci			if (v_limb == 1)
2358c2ecf20Sopenharmony_ci				cy_limb = mpihelp_add_n(prodp, prodp, up, size);
2368c2ecf20Sopenharmony_ci		} else
2378c2ecf20Sopenharmony_ci			cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci		prodp[size] = cy_limb;
2408c2ecf20Sopenharmony_ci		prodp++;
2418c2ecf20Sopenharmony_ci	}
2428c2ecf20Sopenharmony_ci}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_civoid
2458c2ecf20Sopenharmony_cimpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
2468c2ecf20Sopenharmony_ci{
2478c2ecf20Sopenharmony_ci	if (size & 1) {
2488c2ecf20Sopenharmony_ci		/* The size is odd, and the code below doesn't handle that.
2498c2ecf20Sopenharmony_ci		 * Multiply the least significant (size - 1) limbs with a recursive
2508c2ecf20Sopenharmony_ci		 * call, and handle the most significant limb of S1 and S2
2518c2ecf20Sopenharmony_ci		 * separately.
2528c2ecf20Sopenharmony_ci		 * A slightly faster way to do this would be to make the Karatsuba
2538c2ecf20Sopenharmony_ci		 * code below behave as if the size were even, and let it check for
2548c2ecf20Sopenharmony_ci		 * odd size in the end.  I.e., in essence move this code to the end.
2558c2ecf20Sopenharmony_ci		 * Doing so would save us a recursive call, and potentially make the
2568c2ecf20Sopenharmony_ci		 * stack grow a lot less.
2578c2ecf20Sopenharmony_ci		 */
2588c2ecf20Sopenharmony_ci		mpi_size_t esize = size - 1;	/* even size */
2598c2ecf20Sopenharmony_ci		mpi_limb_t cy_limb;
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_ci		MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
2628c2ecf20Sopenharmony_ci		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
2638c2ecf20Sopenharmony_ci		prodp[esize + esize] = cy_limb;
2648c2ecf20Sopenharmony_ci		cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci		prodp[esize + size] = cy_limb;
2678c2ecf20Sopenharmony_ci	} else {
2688c2ecf20Sopenharmony_ci		mpi_size_t hsize = size >> 1;
2698c2ecf20Sopenharmony_ci		mpi_limb_t cy;
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci		/* Product H.      ________________  ________________
2728c2ecf20Sopenharmony_ci		 *                |_____U1 x U1____||____U0 x U0_____|
2738c2ecf20Sopenharmony_ci		 * Put result in upper part of PROD and pass low part of TSPACE
2748c2ecf20Sopenharmony_ci		 * as new TSPACE.
2758c2ecf20Sopenharmony_ci		 */
2768c2ecf20Sopenharmony_ci		MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci		/* Product M.      ________________
2798c2ecf20Sopenharmony_ci		 *                |_(U1-U0)(U0-U1)_|
2808c2ecf20Sopenharmony_ci		 */
2818c2ecf20Sopenharmony_ci		if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
2828c2ecf20Sopenharmony_ci			mpihelp_sub_n(prodp, up + hsize, up, hsize);
2838c2ecf20Sopenharmony_ci		else
2848c2ecf20Sopenharmony_ci			mpihelp_sub_n(prodp, up, up + hsize, hsize);
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci		/* Read temporary operands from low part of PROD.
2878c2ecf20Sopenharmony_ci		 * Put result in low part of TSPACE using upper part of TSPACE
2888c2ecf20Sopenharmony_ci		 * as new TSPACE.  */
2898c2ecf20Sopenharmony_ci		MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci		/* Add/copy product H  */
2928c2ecf20Sopenharmony_ci		MPN_COPY(prodp + hsize, prodp + size, hsize);
2938c2ecf20Sopenharmony_ci		cy = mpihelp_add_n(prodp + size, prodp + size,
2948c2ecf20Sopenharmony_ci				   prodp + size + hsize, hsize);
2958c2ecf20Sopenharmony_ci
2968c2ecf20Sopenharmony_ci		/* Add product M (if NEGFLG M is a negative number).  */
2978c2ecf20Sopenharmony_ci		cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci		/* Product L.      ________________  ________________
3008c2ecf20Sopenharmony_ci		 *                |________________||____U0 x U0_____|
3018c2ecf20Sopenharmony_ci		 * Read temporary operands from low part of PROD.
3028c2ecf20Sopenharmony_ci		 * Put result in low part of TSPACE using upper part of TSPACE
3038c2ecf20Sopenharmony_ci		 * as new TSPACE.  */
3048c2ecf20Sopenharmony_ci		MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci		/* Add/copy Product L (twice).  */
3078c2ecf20Sopenharmony_ci		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
3088c2ecf20Sopenharmony_ci		if (cy)
3098c2ecf20Sopenharmony_ci			mpihelp_add_1(prodp + hsize + size,
3108c2ecf20Sopenharmony_ci				      prodp + hsize + size, hsize, cy);
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci		MPN_COPY(prodp, tspace, hsize);
3138c2ecf20Sopenharmony_ci		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
3148c2ecf20Sopenharmony_ci				   hsize);
3158c2ecf20Sopenharmony_ci		if (cy)
3168c2ecf20Sopenharmony_ci			mpihelp_add_1(prodp + size, prodp + size, size, 1);
3178c2ecf20Sopenharmony_ci	}
3188c2ecf20Sopenharmony_ci}
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_civoid mpihelp_mul_n(mpi_ptr_t prodp,
3228c2ecf20Sopenharmony_ci		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
3238c2ecf20Sopenharmony_ci{
3248c2ecf20Sopenharmony_ci	if (up == vp) {
3258c2ecf20Sopenharmony_ci		if (size < KARATSUBA_THRESHOLD)
3268c2ecf20Sopenharmony_ci			mpih_sqr_n_basecase(prodp, up, size);
3278c2ecf20Sopenharmony_ci		else {
3288c2ecf20Sopenharmony_ci			mpi_ptr_t tspace;
3298c2ecf20Sopenharmony_ci			tspace = mpi_alloc_limb_space(2 * size);
3308c2ecf20Sopenharmony_ci			mpih_sqr_n(prodp, up, size, tspace);
3318c2ecf20Sopenharmony_ci			mpi_free_limb_space(tspace);
3328c2ecf20Sopenharmony_ci		}
3338c2ecf20Sopenharmony_ci	} else {
3348c2ecf20Sopenharmony_ci		if (size < KARATSUBA_THRESHOLD)
3358c2ecf20Sopenharmony_ci			mul_n_basecase(prodp, up, vp, size);
3368c2ecf20Sopenharmony_ci		else {
3378c2ecf20Sopenharmony_ci			mpi_ptr_t tspace;
3388c2ecf20Sopenharmony_ci			tspace = mpi_alloc_limb_space(2 * size);
3398c2ecf20Sopenharmony_ci			mul_n(prodp, up, vp, size, tspace);
3408c2ecf20Sopenharmony_ci			mpi_free_limb_space(tspace);
3418c2ecf20Sopenharmony_ci		}
3428c2ecf20Sopenharmony_ci	}
3438c2ecf20Sopenharmony_ci}
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ciint
3468c2ecf20Sopenharmony_cimpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
3478c2ecf20Sopenharmony_ci			   mpi_ptr_t up, mpi_size_t usize,
3488c2ecf20Sopenharmony_ci			   mpi_ptr_t vp, mpi_size_t vsize,
3498c2ecf20Sopenharmony_ci			   struct karatsuba_ctx *ctx)
3508c2ecf20Sopenharmony_ci{
3518c2ecf20Sopenharmony_ci	mpi_limb_t cy;
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci	if (!ctx->tspace || ctx->tspace_size < vsize) {
3548c2ecf20Sopenharmony_ci		if (ctx->tspace)
3558c2ecf20Sopenharmony_ci			mpi_free_limb_space(ctx->tspace);
3568c2ecf20Sopenharmony_ci		ctx->tspace = mpi_alloc_limb_space(2 * vsize);
3578c2ecf20Sopenharmony_ci		if (!ctx->tspace)
3588c2ecf20Sopenharmony_ci			return -ENOMEM;
3598c2ecf20Sopenharmony_ci		ctx->tspace_size = vsize;
3608c2ecf20Sopenharmony_ci	}
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_ci	MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	prodp += vsize;
3658c2ecf20Sopenharmony_ci	up += vsize;
3668c2ecf20Sopenharmony_ci	usize -= vsize;
3678c2ecf20Sopenharmony_ci	if (usize >= vsize) {
3688c2ecf20Sopenharmony_ci		if (!ctx->tp || ctx->tp_size < vsize) {
3698c2ecf20Sopenharmony_ci			if (ctx->tp)
3708c2ecf20Sopenharmony_ci				mpi_free_limb_space(ctx->tp);
3718c2ecf20Sopenharmony_ci			ctx->tp = mpi_alloc_limb_space(2 * vsize);
3728c2ecf20Sopenharmony_ci			if (!ctx->tp) {
3738c2ecf20Sopenharmony_ci				if (ctx->tspace)
3748c2ecf20Sopenharmony_ci					mpi_free_limb_space(ctx->tspace);
3758c2ecf20Sopenharmony_ci				ctx->tspace = NULL;
3768c2ecf20Sopenharmony_ci				return -ENOMEM;
3778c2ecf20Sopenharmony_ci			}
3788c2ecf20Sopenharmony_ci			ctx->tp_size = vsize;
3798c2ecf20Sopenharmony_ci		}
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci		do {
3828c2ecf20Sopenharmony_ci			MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
3838c2ecf20Sopenharmony_ci			cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
3848c2ecf20Sopenharmony_ci			mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
3858c2ecf20Sopenharmony_ci				      cy);
3868c2ecf20Sopenharmony_ci			prodp += vsize;
3878c2ecf20Sopenharmony_ci			up += vsize;
3888c2ecf20Sopenharmony_ci			usize -= vsize;
3898c2ecf20Sopenharmony_ci		} while (usize >= vsize);
3908c2ecf20Sopenharmony_ci	}
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	if (usize) {
3938c2ecf20Sopenharmony_ci		if (usize < KARATSUBA_THRESHOLD) {
3948c2ecf20Sopenharmony_ci			mpi_limb_t tmp;
3958c2ecf20Sopenharmony_ci			if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
3968c2ecf20Sopenharmony_ci			    < 0)
3978c2ecf20Sopenharmony_ci				return -ENOMEM;
3988c2ecf20Sopenharmony_ci		} else {
3998c2ecf20Sopenharmony_ci			if (!ctx->next) {
4008c2ecf20Sopenharmony_ci				ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
4018c2ecf20Sopenharmony_ci				if (!ctx->next)
4028c2ecf20Sopenharmony_ci					return -ENOMEM;
4038c2ecf20Sopenharmony_ci			}
4048c2ecf20Sopenharmony_ci			if (mpihelp_mul_karatsuba_case(ctx->tspace,
4058c2ecf20Sopenharmony_ci						       vp, vsize,
4068c2ecf20Sopenharmony_ci						       up, usize,
4078c2ecf20Sopenharmony_ci						       ctx->next) < 0)
4088c2ecf20Sopenharmony_ci				return -ENOMEM;
4098c2ecf20Sopenharmony_ci		}
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci		cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
4128c2ecf20Sopenharmony_ci		mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
4138c2ecf20Sopenharmony_ci	}
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci	return 0;
4168c2ecf20Sopenharmony_ci}
4178c2ecf20Sopenharmony_ci
4188c2ecf20Sopenharmony_civoid mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
4198c2ecf20Sopenharmony_ci{
4208c2ecf20Sopenharmony_ci	struct karatsuba_ctx *ctx2;
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	if (ctx->tp)
4238c2ecf20Sopenharmony_ci		mpi_free_limb_space(ctx->tp);
4248c2ecf20Sopenharmony_ci	if (ctx->tspace)
4258c2ecf20Sopenharmony_ci		mpi_free_limb_space(ctx->tspace);
4268c2ecf20Sopenharmony_ci	for (ctx = ctx->next; ctx; ctx = ctx2) {
4278c2ecf20Sopenharmony_ci		ctx2 = ctx->next;
4288c2ecf20Sopenharmony_ci		if (ctx->tp)
4298c2ecf20Sopenharmony_ci			mpi_free_limb_space(ctx->tp);
4308c2ecf20Sopenharmony_ci		if (ctx->tspace)
4318c2ecf20Sopenharmony_ci			mpi_free_limb_space(ctx->tspace);
4328c2ecf20Sopenharmony_ci		kfree(ctx);
4338c2ecf20Sopenharmony_ci	}
4348c2ecf20Sopenharmony_ci}
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
4378c2ecf20Sopenharmony_ci * and v (pointed to by VP, with VSIZE limbs), and store the result at
4388c2ecf20Sopenharmony_ci * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
4398c2ecf20Sopenharmony_ci * operands are normalized.  Return the most significant limb of the
4408c2ecf20Sopenharmony_ci * result.
4418c2ecf20Sopenharmony_ci *
4428c2ecf20Sopenharmony_ci * NOTE: The space pointed to by PRODP is overwritten before finished
4438c2ecf20Sopenharmony_ci * with U and V, so overlap is an error.
4448c2ecf20Sopenharmony_ci *
4458c2ecf20Sopenharmony_ci * Argument constraints:
4468c2ecf20Sopenharmony_ci * 1. USIZE >= VSIZE.
4478c2ecf20Sopenharmony_ci * 2. PRODP != UP and PRODP != VP, i.e. the destination
4488c2ecf20Sopenharmony_ci *    must be distinct from the multiplier and the multiplicand.
4498c2ecf20Sopenharmony_ci */
4508c2ecf20Sopenharmony_ci
4518c2ecf20Sopenharmony_ciint
4528c2ecf20Sopenharmony_cimpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
4538c2ecf20Sopenharmony_ci	    mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
4548c2ecf20Sopenharmony_ci{
4558c2ecf20Sopenharmony_ci	mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
4568c2ecf20Sopenharmony_ci	mpi_limb_t cy;
4578c2ecf20Sopenharmony_ci	struct karatsuba_ctx ctx;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci	if (vsize < KARATSUBA_THRESHOLD) {
4608c2ecf20Sopenharmony_ci		mpi_size_t i;
4618c2ecf20Sopenharmony_ci		mpi_limb_t v_limb;
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci		if (!vsize) {
4648c2ecf20Sopenharmony_ci			*_result = 0;
4658c2ecf20Sopenharmony_ci			return 0;
4668c2ecf20Sopenharmony_ci		}
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci		/* Multiply by the first limb in V separately, as the result can be
4698c2ecf20Sopenharmony_ci		 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
4708c2ecf20Sopenharmony_ci		v_limb = vp[0];
4718c2ecf20Sopenharmony_ci		if (v_limb <= 1) {
4728c2ecf20Sopenharmony_ci			if (v_limb == 1)
4738c2ecf20Sopenharmony_ci				MPN_COPY(prodp, up, usize);
4748c2ecf20Sopenharmony_ci			else
4758c2ecf20Sopenharmony_ci				MPN_ZERO(prodp, usize);
4768c2ecf20Sopenharmony_ci			cy = 0;
4778c2ecf20Sopenharmony_ci		} else
4788c2ecf20Sopenharmony_ci			cy = mpihelp_mul_1(prodp, up, usize, v_limb);
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci		prodp[usize] = cy;
4818c2ecf20Sopenharmony_ci		prodp++;
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci		/* For each iteration in the outer loop, multiply one limb from
4848c2ecf20Sopenharmony_ci		 * U with one limb from V, and add it to PROD.  */
4858c2ecf20Sopenharmony_ci		for (i = 1; i < vsize; i++) {
4868c2ecf20Sopenharmony_ci			v_limb = vp[i];
4878c2ecf20Sopenharmony_ci			if (v_limb <= 1) {
4888c2ecf20Sopenharmony_ci				cy = 0;
4898c2ecf20Sopenharmony_ci				if (v_limb == 1)
4908c2ecf20Sopenharmony_ci					cy = mpihelp_add_n(prodp, prodp, up,
4918c2ecf20Sopenharmony_ci							   usize);
4928c2ecf20Sopenharmony_ci			} else
4938c2ecf20Sopenharmony_ci				cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci			prodp[usize] = cy;
4968c2ecf20Sopenharmony_ci			prodp++;
4978c2ecf20Sopenharmony_ci		}
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_ci		*_result = cy;
5008c2ecf20Sopenharmony_ci		return 0;
5018c2ecf20Sopenharmony_ci	}
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	memset(&ctx, 0, sizeof ctx);
5048c2ecf20Sopenharmony_ci	if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
5058c2ecf20Sopenharmony_ci		return -ENOMEM;
5068c2ecf20Sopenharmony_ci	mpihelp_release_karatsuba_ctx(&ctx);
5078c2ecf20Sopenharmony_ci	*_result = *prod_endp;
5088c2ecf20Sopenharmony_ci	return 0;
5098c2ecf20Sopenharmony_ci}
510