162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/* mpihelp-mul.c  -  MPI helper functions
362306a36Sopenharmony_ci * Copyright (C) 1994, 1996, 1998, 1999,
462306a36Sopenharmony_ci *               2000 Free Software Foundation, Inc.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * This file is part of GnuPG.
762306a36Sopenharmony_ci *
862306a36Sopenharmony_ci * Note: This code is heavily based on the GNU MP Library.
962306a36Sopenharmony_ci *	 Actually it's the same code with only minor changes in the
1062306a36Sopenharmony_ci *	 way the data is stored; this is to support the abstraction
1162306a36Sopenharmony_ci *	 of an optional secure memory allocation which may be used
1262306a36Sopenharmony_ci *	 to avoid revealing of sensitive data due to paging etc.
1362306a36Sopenharmony_ci *	 The GNU MP Library itself is published under the LGPL;
1462306a36Sopenharmony_ci *	 however I decided to publish this code under the plain GPL.
1562306a36Sopenharmony_ci */
1662306a36Sopenharmony_ci
1762306a36Sopenharmony_ci#include <linux/string.h>
1862306a36Sopenharmony_ci#include "mpi-internal.h"
1962306a36Sopenharmony_ci#include "longlong.h"
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)		\
2262306a36Sopenharmony_ci	do {							\
2362306a36Sopenharmony_ci		if ((size) < KARATSUBA_THRESHOLD)		\
2462306a36Sopenharmony_ci			mul_n_basecase(prodp, up, vp, size);	\
2562306a36Sopenharmony_ci		else						\
2662306a36Sopenharmony_ci			mul_n(prodp, up, vp, size, tspace);	\
2762306a36Sopenharmony_ci	} while (0);
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)		\
3062306a36Sopenharmony_ci	do {							\
3162306a36Sopenharmony_ci		if ((size) < KARATSUBA_THRESHOLD)		\
3262306a36Sopenharmony_ci			mpih_sqr_n_basecase(prodp, up, size);	\
3362306a36Sopenharmony_ci		else						\
3462306a36Sopenharmony_ci			mpih_sqr_n(prodp, up, size, tspace);	\
3562306a36Sopenharmony_ci	} while (0);
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_ci/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
3862306a36Sopenharmony_ci * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
3962306a36Sopenharmony_ci * always stored.  Return the most significant limb.
4062306a36Sopenharmony_ci *
4162306a36Sopenharmony_ci * Argument constraints:
4262306a36Sopenharmony_ci * 1. PRODP != UP and PRODP != VP, i.e. the destination
4362306a36Sopenharmony_ci *    must be distinct from the multiplier and the multiplicand.
4462306a36Sopenharmony_ci *
4562306a36Sopenharmony_ci *
4662306a36Sopenharmony_ci * Handle simple cases with traditional multiplication.
4762306a36Sopenharmony_ci *
4862306a36Sopenharmony_ci * This is the most critical code of multiplication.  All multiplies rely
4962306a36Sopenharmony_ci * on this, both small and huge.  Small ones arrive here immediately.  Huge
5062306a36Sopenharmony_ci * ones arrive here as this is the base case for Karatsuba's recursive
5162306a36Sopenharmony_ci * algorithm below.
5262306a36Sopenharmony_ci */
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_cistatic mpi_limb_t
5562306a36Sopenharmony_cimul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	mpi_size_t i;
5862306a36Sopenharmony_ci	mpi_limb_t cy;
5962306a36Sopenharmony_ci	mpi_limb_t v_limb;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	/* Multiply by the first limb in V separately, as the result can be
6262306a36Sopenharmony_ci	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
6362306a36Sopenharmony_ci	v_limb = vp[0];
6462306a36Sopenharmony_ci	if (v_limb <= 1) {
6562306a36Sopenharmony_ci		if (v_limb == 1)
6662306a36Sopenharmony_ci			MPN_COPY(prodp, up, size);
6762306a36Sopenharmony_ci		else
6862306a36Sopenharmony_ci			MPN_ZERO(prodp, size);
6962306a36Sopenharmony_ci		cy = 0;
7062306a36Sopenharmony_ci	} else
7162306a36Sopenharmony_ci		cy = mpihelp_mul_1(prodp, up, size, v_limb);
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	prodp[size] = cy;
7462306a36Sopenharmony_ci	prodp++;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	/* For each iteration in the outer loop, multiply one limb from
7762306a36Sopenharmony_ci	 * U with one limb from V, and add it to PROD.  */
7862306a36Sopenharmony_ci	for (i = 1; i < size; i++) {
7962306a36Sopenharmony_ci		v_limb = vp[i];
8062306a36Sopenharmony_ci		if (v_limb <= 1) {
8162306a36Sopenharmony_ci			cy = 0;
8262306a36Sopenharmony_ci			if (v_limb == 1)
8362306a36Sopenharmony_ci				cy = mpihelp_add_n(prodp, prodp, up, size);
8462306a36Sopenharmony_ci		} else
8562306a36Sopenharmony_ci			cy = mpihelp_addmul_1(prodp, up, size, v_limb);
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci		prodp[size] = cy;
8862306a36Sopenharmony_ci		prodp++;
8962306a36Sopenharmony_ci	}
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	return cy;
9262306a36Sopenharmony_ci}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_cistatic void
9562306a36Sopenharmony_cimul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
9662306a36Sopenharmony_ci		mpi_size_t size, mpi_ptr_t tspace)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci	if (size & 1) {
9962306a36Sopenharmony_ci		/* The size is odd, and the code below doesn't handle that.
10062306a36Sopenharmony_ci		 * Multiply the least significant (size - 1) limbs with a recursive
10162306a36Sopenharmony_ci		 * call, and handle the most significant limb of S1 and S2
10262306a36Sopenharmony_ci		 * separately.
10362306a36Sopenharmony_ci		 * A slightly faster way to do this would be to make the Karatsuba
10462306a36Sopenharmony_ci		 * code below behave as if the size were even, and let it check for
10562306a36Sopenharmony_ci		 * odd size in the end.  I.e., in essence move this code to the end.
10662306a36Sopenharmony_ci		 * Doing so would save us a recursive call, and potentially make the
10762306a36Sopenharmony_ci		 * stack grow a lot less.
10862306a36Sopenharmony_ci		 */
10962306a36Sopenharmony_ci		mpi_size_t esize = size - 1;	/* even size */
11062306a36Sopenharmony_ci		mpi_limb_t cy_limb;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci		MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
11362306a36Sopenharmony_ci		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
11462306a36Sopenharmony_ci		prodp[esize + esize] = cy_limb;
11562306a36Sopenharmony_ci		cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
11662306a36Sopenharmony_ci		prodp[esize + size] = cy_limb;
11762306a36Sopenharmony_ci	} else {
11862306a36Sopenharmony_ci		/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
11962306a36Sopenharmony_ci		 *
12062306a36Sopenharmony_ci		 * Split U in two pieces, U1 and U0, such that
12162306a36Sopenharmony_ci		 * U = U0 + U1*(B**n),
12262306a36Sopenharmony_ci		 * and V in V1 and V0, such that
12362306a36Sopenharmony_ci		 * V = V0 + V1*(B**n).
12462306a36Sopenharmony_ci		 *
12562306a36Sopenharmony_ci		 * UV is then computed recursively using the identity
12662306a36Sopenharmony_ci		 *
12762306a36Sopenharmony_ci		 *        2n   n          n                     n
12862306a36Sopenharmony_ci		 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
12962306a36Sopenharmony_ci		 *                1 1        1  0   0  1              0 0
13062306a36Sopenharmony_ci		 *
13162306a36Sopenharmony_ci		 * Where B = 2**BITS_PER_MP_LIMB.
13262306a36Sopenharmony_ci		 */
13362306a36Sopenharmony_ci		mpi_size_t hsize = size >> 1;
13462306a36Sopenharmony_ci		mpi_limb_t cy;
13562306a36Sopenharmony_ci		int negflg;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci		/* Product H.      ________________  ________________
13862306a36Sopenharmony_ci		 *                |_____U1 x V1____||____U0 x V0_____|
13962306a36Sopenharmony_ci		 * Put result in upper part of PROD and pass low part of TSPACE
14062306a36Sopenharmony_ci		 * as new TSPACE.
14162306a36Sopenharmony_ci		 */
14262306a36Sopenharmony_ci		MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
14362306a36Sopenharmony_ci				  tspace);
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		/* Product M.      ________________
14662306a36Sopenharmony_ci		 *                |_(U1-U0)(V0-V1)_|
14762306a36Sopenharmony_ci		 */
14862306a36Sopenharmony_ci		if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
14962306a36Sopenharmony_ci			mpihelp_sub_n(prodp, up + hsize, up, hsize);
15062306a36Sopenharmony_ci			negflg = 0;
15162306a36Sopenharmony_ci		} else {
15262306a36Sopenharmony_ci			mpihelp_sub_n(prodp, up, up + hsize, hsize);
15362306a36Sopenharmony_ci			negflg = 1;
15462306a36Sopenharmony_ci		}
15562306a36Sopenharmony_ci		if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
15662306a36Sopenharmony_ci			mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
15762306a36Sopenharmony_ci			negflg ^= 1;
15862306a36Sopenharmony_ci		} else {
15962306a36Sopenharmony_ci			mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
16062306a36Sopenharmony_ci			/* No change of NEGFLG.  */
16162306a36Sopenharmony_ci		}
16262306a36Sopenharmony_ci		/* Read temporary operands from low part of PROD.
16362306a36Sopenharmony_ci		 * Put result in low part of TSPACE using upper part of TSPACE
16462306a36Sopenharmony_ci		 * as new TSPACE.
16562306a36Sopenharmony_ci		 */
16662306a36Sopenharmony_ci		MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
16762306a36Sopenharmony_ci				  tspace + size);
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci		/* Add/copy product H. */
17062306a36Sopenharmony_ci		MPN_COPY(prodp + hsize, prodp + size, hsize);
17162306a36Sopenharmony_ci		cy = mpihelp_add_n(prodp + size, prodp + size,
17262306a36Sopenharmony_ci				   prodp + size + hsize, hsize);
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci		/* Add product M (if NEGFLG M is a negative number) */
17562306a36Sopenharmony_ci		if (negflg)
17662306a36Sopenharmony_ci			cy -=
17762306a36Sopenharmony_ci			    mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
17862306a36Sopenharmony_ci					  size);
17962306a36Sopenharmony_ci		else
18062306a36Sopenharmony_ci			cy +=
18162306a36Sopenharmony_ci			    mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
18262306a36Sopenharmony_ci					  size);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci		/* Product L.      ________________  ________________
18562306a36Sopenharmony_ci		 *                |________________||____U0 x V0_____|
18662306a36Sopenharmony_ci		 * Read temporary operands from low part of PROD.
18762306a36Sopenharmony_ci		 * Put result in low part of TSPACE using upper part of TSPACE
18862306a36Sopenharmony_ci		 * as new TSPACE.
18962306a36Sopenharmony_ci		 */
19062306a36Sopenharmony_ci		MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci		/* Add/copy Product L (twice) */
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
19562306a36Sopenharmony_ci		if (cy)
19662306a36Sopenharmony_ci			mpihelp_add_1(prodp + hsize + size,
19762306a36Sopenharmony_ci				      prodp + hsize + size, hsize, cy);
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci		MPN_COPY(prodp, tspace, hsize);
20062306a36Sopenharmony_ci		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
20162306a36Sopenharmony_ci				   hsize);
20262306a36Sopenharmony_ci		if (cy)
20362306a36Sopenharmony_ci			mpihelp_add_1(prodp + size, prodp + size, size, 1);
20462306a36Sopenharmony_ci	}
20562306a36Sopenharmony_ci}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_civoid mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
20862306a36Sopenharmony_ci{
20962306a36Sopenharmony_ci	mpi_size_t i;
21062306a36Sopenharmony_ci	mpi_limb_t cy_limb;
21162306a36Sopenharmony_ci	mpi_limb_t v_limb;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	/* Multiply by the first limb in V separately, as the result can be
21462306a36Sopenharmony_ci	 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
21562306a36Sopenharmony_ci	v_limb = up[0];
21662306a36Sopenharmony_ci	if (v_limb <= 1) {
21762306a36Sopenharmony_ci		if (v_limb == 1)
21862306a36Sopenharmony_ci			MPN_COPY(prodp, up, size);
21962306a36Sopenharmony_ci		else
22062306a36Sopenharmony_ci			MPN_ZERO(prodp, size);
22162306a36Sopenharmony_ci		cy_limb = 0;
22262306a36Sopenharmony_ci	} else
22362306a36Sopenharmony_ci		cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	prodp[size] = cy_limb;
22662306a36Sopenharmony_ci	prodp++;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	/* For each iteration in the outer loop, multiply one limb from
22962306a36Sopenharmony_ci	 * U with one limb from V, and add it to PROD.  */
23062306a36Sopenharmony_ci	for (i = 1; i < size; i++) {
23162306a36Sopenharmony_ci		v_limb = up[i];
23262306a36Sopenharmony_ci		if (v_limb <= 1) {
23362306a36Sopenharmony_ci			cy_limb = 0;
23462306a36Sopenharmony_ci			if (v_limb == 1)
23562306a36Sopenharmony_ci				cy_limb = mpihelp_add_n(prodp, prodp, up, size);
23662306a36Sopenharmony_ci		} else
23762306a36Sopenharmony_ci			cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci		prodp[size] = cy_limb;
24062306a36Sopenharmony_ci		prodp++;
24162306a36Sopenharmony_ci	}
24262306a36Sopenharmony_ci}
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_civoid
24562306a36Sopenharmony_cimpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
24662306a36Sopenharmony_ci{
24762306a36Sopenharmony_ci	if (size & 1) {
24862306a36Sopenharmony_ci		/* The size is odd, and the code below doesn't handle that.
24962306a36Sopenharmony_ci		 * Multiply the least significant (size - 1) limbs with a recursive
25062306a36Sopenharmony_ci		 * call, and handle the most significant limb of S1 and S2
25162306a36Sopenharmony_ci		 * separately.
25262306a36Sopenharmony_ci		 * A slightly faster way to do this would be to make the Karatsuba
25362306a36Sopenharmony_ci		 * code below behave as if the size were even, and let it check for
25462306a36Sopenharmony_ci		 * odd size in the end.  I.e., in essence move this code to the end.
25562306a36Sopenharmony_ci		 * Doing so would save us a recursive call, and potentially make the
25662306a36Sopenharmony_ci		 * stack grow a lot less.
25762306a36Sopenharmony_ci		 */
25862306a36Sopenharmony_ci		mpi_size_t esize = size - 1;	/* even size */
25962306a36Sopenharmony_ci		mpi_limb_t cy_limb;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci		MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
26262306a36Sopenharmony_ci		cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
26362306a36Sopenharmony_ci		prodp[esize + esize] = cy_limb;
26462306a36Sopenharmony_ci		cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci		prodp[esize + size] = cy_limb;
26762306a36Sopenharmony_ci	} else {
26862306a36Sopenharmony_ci		mpi_size_t hsize = size >> 1;
26962306a36Sopenharmony_ci		mpi_limb_t cy;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci		/* Product H.      ________________  ________________
27262306a36Sopenharmony_ci		 *                |_____U1 x U1____||____U0 x U0_____|
27362306a36Sopenharmony_ci		 * Put result in upper part of PROD and pass low part of TSPACE
27462306a36Sopenharmony_ci		 * as new TSPACE.
27562306a36Sopenharmony_ci		 */
27662306a36Sopenharmony_ci		MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci		/* Product M.      ________________
27962306a36Sopenharmony_ci		 *                |_(U1-U0)(U0-U1)_|
28062306a36Sopenharmony_ci		 */
28162306a36Sopenharmony_ci		if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
28262306a36Sopenharmony_ci			mpihelp_sub_n(prodp, up + hsize, up, hsize);
28362306a36Sopenharmony_ci		else
28462306a36Sopenharmony_ci			mpihelp_sub_n(prodp, up, up + hsize, hsize);
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_ci		/* Read temporary operands from low part of PROD.
28762306a36Sopenharmony_ci		 * Put result in low part of TSPACE using upper part of TSPACE
28862306a36Sopenharmony_ci		 * as new TSPACE.  */
28962306a36Sopenharmony_ci		MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci		/* Add/copy product H  */
29262306a36Sopenharmony_ci		MPN_COPY(prodp + hsize, prodp + size, hsize);
29362306a36Sopenharmony_ci		cy = mpihelp_add_n(prodp + size, prodp + size,
29462306a36Sopenharmony_ci				   prodp + size + hsize, hsize);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci		/* Add product M (if NEGFLG M is a negative number).  */
29762306a36Sopenharmony_ci		cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci		/* Product L.      ________________  ________________
30062306a36Sopenharmony_ci		 *                |________________||____U0 x U0_____|
30162306a36Sopenharmony_ci		 * Read temporary operands from low part of PROD.
30262306a36Sopenharmony_ci		 * Put result in low part of TSPACE using upper part of TSPACE
30362306a36Sopenharmony_ci		 * as new TSPACE.  */
30462306a36Sopenharmony_ci		MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci		/* Add/copy Product L (twice).  */
30762306a36Sopenharmony_ci		cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
30862306a36Sopenharmony_ci		if (cy)
30962306a36Sopenharmony_ci			mpihelp_add_1(prodp + hsize + size,
31062306a36Sopenharmony_ci				      prodp + hsize + size, hsize, cy);
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci		MPN_COPY(prodp, tspace, hsize);
31362306a36Sopenharmony_ci		cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
31462306a36Sopenharmony_ci				   hsize);
31562306a36Sopenharmony_ci		if (cy)
31662306a36Sopenharmony_ci			mpihelp_add_1(prodp + size, prodp + size, size, 1);
31762306a36Sopenharmony_ci	}
31862306a36Sopenharmony_ci}
31962306a36Sopenharmony_ci
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_civoid mpihelp_mul_n(mpi_ptr_t prodp,
32262306a36Sopenharmony_ci		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
32362306a36Sopenharmony_ci{
32462306a36Sopenharmony_ci	if (up == vp) {
32562306a36Sopenharmony_ci		if (size < KARATSUBA_THRESHOLD)
32662306a36Sopenharmony_ci			mpih_sqr_n_basecase(prodp, up, size);
32762306a36Sopenharmony_ci		else {
32862306a36Sopenharmony_ci			mpi_ptr_t tspace;
32962306a36Sopenharmony_ci			tspace = mpi_alloc_limb_space(2 * size);
33062306a36Sopenharmony_ci			mpih_sqr_n(prodp, up, size, tspace);
33162306a36Sopenharmony_ci			mpi_free_limb_space(tspace);
33262306a36Sopenharmony_ci		}
33362306a36Sopenharmony_ci	} else {
33462306a36Sopenharmony_ci		if (size < KARATSUBA_THRESHOLD)
33562306a36Sopenharmony_ci			mul_n_basecase(prodp, up, vp, size);
33662306a36Sopenharmony_ci		else {
33762306a36Sopenharmony_ci			mpi_ptr_t tspace;
33862306a36Sopenharmony_ci			tspace = mpi_alloc_limb_space(2 * size);
33962306a36Sopenharmony_ci			mul_n(prodp, up, vp, size, tspace);
34062306a36Sopenharmony_ci			mpi_free_limb_space(tspace);
34162306a36Sopenharmony_ci		}
34262306a36Sopenharmony_ci	}
34362306a36Sopenharmony_ci}
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ciint
34662306a36Sopenharmony_cimpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
34762306a36Sopenharmony_ci			   mpi_ptr_t up, mpi_size_t usize,
34862306a36Sopenharmony_ci			   mpi_ptr_t vp, mpi_size_t vsize,
34962306a36Sopenharmony_ci			   struct karatsuba_ctx *ctx)
35062306a36Sopenharmony_ci{
35162306a36Sopenharmony_ci	mpi_limb_t cy;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	if (!ctx->tspace || ctx->tspace_size < vsize) {
35462306a36Sopenharmony_ci		if (ctx->tspace)
35562306a36Sopenharmony_ci			mpi_free_limb_space(ctx->tspace);
35662306a36Sopenharmony_ci		ctx->tspace = mpi_alloc_limb_space(2 * vsize);
35762306a36Sopenharmony_ci		if (!ctx->tspace)
35862306a36Sopenharmony_ci			return -ENOMEM;
35962306a36Sopenharmony_ci		ctx->tspace_size = vsize;
36062306a36Sopenharmony_ci	}
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	prodp += vsize;
36562306a36Sopenharmony_ci	up += vsize;
36662306a36Sopenharmony_ci	usize -= vsize;
36762306a36Sopenharmony_ci	if (usize >= vsize) {
36862306a36Sopenharmony_ci		if (!ctx->tp || ctx->tp_size < vsize) {
36962306a36Sopenharmony_ci			if (ctx->tp)
37062306a36Sopenharmony_ci				mpi_free_limb_space(ctx->tp);
37162306a36Sopenharmony_ci			ctx->tp = mpi_alloc_limb_space(2 * vsize);
37262306a36Sopenharmony_ci			if (!ctx->tp) {
37362306a36Sopenharmony_ci				if (ctx->tspace)
37462306a36Sopenharmony_ci					mpi_free_limb_space(ctx->tspace);
37562306a36Sopenharmony_ci				ctx->tspace = NULL;
37662306a36Sopenharmony_ci				return -ENOMEM;
37762306a36Sopenharmony_ci			}
37862306a36Sopenharmony_ci			ctx->tp_size = vsize;
37962306a36Sopenharmony_ci		}
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci		do {
38262306a36Sopenharmony_ci			MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
38362306a36Sopenharmony_ci			cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
38462306a36Sopenharmony_ci			mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
38562306a36Sopenharmony_ci				      cy);
38662306a36Sopenharmony_ci			prodp += vsize;
38762306a36Sopenharmony_ci			up += vsize;
38862306a36Sopenharmony_ci			usize -= vsize;
38962306a36Sopenharmony_ci		} while (usize >= vsize);
39062306a36Sopenharmony_ci	}
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	if (usize) {
39362306a36Sopenharmony_ci		if (usize < KARATSUBA_THRESHOLD) {
39462306a36Sopenharmony_ci			mpi_limb_t tmp;
39562306a36Sopenharmony_ci			if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
39662306a36Sopenharmony_ci			    < 0)
39762306a36Sopenharmony_ci				return -ENOMEM;
39862306a36Sopenharmony_ci		} else {
39962306a36Sopenharmony_ci			if (!ctx->next) {
40062306a36Sopenharmony_ci				ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
40162306a36Sopenharmony_ci				if (!ctx->next)
40262306a36Sopenharmony_ci					return -ENOMEM;
40362306a36Sopenharmony_ci			}
40462306a36Sopenharmony_ci			if (mpihelp_mul_karatsuba_case(ctx->tspace,
40562306a36Sopenharmony_ci						       vp, vsize,
40662306a36Sopenharmony_ci						       up, usize,
40762306a36Sopenharmony_ci						       ctx->next) < 0)
40862306a36Sopenharmony_ci				return -ENOMEM;
40962306a36Sopenharmony_ci		}
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci		cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
41262306a36Sopenharmony_ci		mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
41362306a36Sopenharmony_ci	}
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	return 0;
41662306a36Sopenharmony_ci}
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_civoid mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
41962306a36Sopenharmony_ci{
42062306a36Sopenharmony_ci	struct karatsuba_ctx *ctx2;
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	if (ctx->tp)
42362306a36Sopenharmony_ci		mpi_free_limb_space(ctx->tp);
42462306a36Sopenharmony_ci	if (ctx->tspace)
42562306a36Sopenharmony_ci		mpi_free_limb_space(ctx->tspace);
42662306a36Sopenharmony_ci	for (ctx = ctx->next; ctx; ctx = ctx2) {
42762306a36Sopenharmony_ci		ctx2 = ctx->next;
42862306a36Sopenharmony_ci		if (ctx->tp)
42962306a36Sopenharmony_ci			mpi_free_limb_space(ctx->tp);
43062306a36Sopenharmony_ci		if (ctx->tspace)
43162306a36Sopenharmony_ci			mpi_free_limb_space(ctx->tspace);
43262306a36Sopenharmony_ci		kfree(ctx);
43362306a36Sopenharmony_ci	}
43462306a36Sopenharmony_ci}
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
43762306a36Sopenharmony_ci * and v (pointed to by VP, with VSIZE limbs), and store the result at
43862306a36Sopenharmony_ci * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
43962306a36Sopenharmony_ci * operands are normalized.  Return the most significant limb of the
44062306a36Sopenharmony_ci * result.
44162306a36Sopenharmony_ci *
44262306a36Sopenharmony_ci * NOTE: The space pointed to by PRODP is overwritten before finished
44362306a36Sopenharmony_ci * with U and V, so overlap is an error.
44462306a36Sopenharmony_ci *
44562306a36Sopenharmony_ci * Argument constraints:
44662306a36Sopenharmony_ci * 1. USIZE >= VSIZE.
44762306a36Sopenharmony_ci * 2. PRODP != UP and PRODP != VP, i.e. the destination
44862306a36Sopenharmony_ci *    must be distinct from the multiplier and the multiplicand.
44962306a36Sopenharmony_ci */
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ciint
45262306a36Sopenharmony_cimpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
45362306a36Sopenharmony_ci	    mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
45462306a36Sopenharmony_ci{
45562306a36Sopenharmony_ci	mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
45662306a36Sopenharmony_ci	mpi_limb_t cy;
45762306a36Sopenharmony_ci	struct karatsuba_ctx ctx;
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci	if (vsize < KARATSUBA_THRESHOLD) {
46062306a36Sopenharmony_ci		mpi_size_t i;
46162306a36Sopenharmony_ci		mpi_limb_t v_limb;
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci		if (!vsize) {
46462306a36Sopenharmony_ci			*_result = 0;
46562306a36Sopenharmony_ci			return 0;
46662306a36Sopenharmony_ci		}
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci		/* Multiply by the first limb in V separately, as the result can be
46962306a36Sopenharmony_ci		 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
47062306a36Sopenharmony_ci		v_limb = vp[0];
47162306a36Sopenharmony_ci		if (v_limb <= 1) {
47262306a36Sopenharmony_ci			if (v_limb == 1)
47362306a36Sopenharmony_ci				MPN_COPY(prodp, up, usize);
47462306a36Sopenharmony_ci			else
47562306a36Sopenharmony_ci				MPN_ZERO(prodp, usize);
47662306a36Sopenharmony_ci			cy = 0;
47762306a36Sopenharmony_ci		} else
47862306a36Sopenharmony_ci			cy = mpihelp_mul_1(prodp, up, usize, v_limb);
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_ci		prodp[usize] = cy;
48162306a36Sopenharmony_ci		prodp++;
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci		/* For each iteration in the outer loop, multiply one limb from
48462306a36Sopenharmony_ci		 * U with one limb from V, and add it to PROD.  */
48562306a36Sopenharmony_ci		for (i = 1; i < vsize; i++) {
48662306a36Sopenharmony_ci			v_limb = vp[i];
48762306a36Sopenharmony_ci			if (v_limb <= 1) {
48862306a36Sopenharmony_ci				cy = 0;
48962306a36Sopenharmony_ci				if (v_limb == 1)
49062306a36Sopenharmony_ci					cy = mpihelp_add_n(prodp, prodp, up,
49162306a36Sopenharmony_ci							   usize);
49262306a36Sopenharmony_ci			} else
49362306a36Sopenharmony_ci				cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
49462306a36Sopenharmony_ci
49562306a36Sopenharmony_ci			prodp[usize] = cy;
49662306a36Sopenharmony_ci			prodp++;
49762306a36Sopenharmony_ci		}
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci		*_result = cy;
50062306a36Sopenharmony_ci		return 0;
50162306a36Sopenharmony_ci	}
50262306a36Sopenharmony_ci
50362306a36Sopenharmony_ci	memset(&ctx, 0, sizeof ctx);
50462306a36Sopenharmony_ci	if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
50562306a36Sopenharmony_ci		return -ENOMEM;
50662306a36Sopenharmony_ci	mpihelp_release_karatsuba_ctx(&ctx);
50762306a36Sopenharmony_ci	*_result = *prod_endp;
50862306a36Sopenharmony_ci	return 0;
50962306a36Sopenharmony_ci}
510