18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/* mpihelp-div.c  -  MPI helper functions
38c2ecf20Sopenharmony_ci *	Copyright (C) 1994, 1996 Free Software Foundation, Inc.
48c2ecf20Sopenharmony_ci *	Copyright (C) 1998, 1999 Free Software Foundation, Inc.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * This file is part of GnuPG.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Note: This code is heavily based on the GNU MP Library.
98c2ecf20Sopenharmony_ci *	 Actually it's the same code with only minor changes in the
108c2ecf20Sopenharmony_ci *	 way the data is stored; this is to support the abstraction
118c2ecf20Sopenharmony_ci *	 of an optional secure memory allocation which may be used
128c2ecf20Sopenharmony_ci *	 to avoid revealing of sensitive data due to paging etc.
138c2ecf20Sopenharmony_ci *	 The GNU MP Library itself is published under the LGPL;
148c2ecf20Sopenharmony_ci *	 however I decided to publish this code under the plain GPL.
158c2ecf20Sopenharmony_ci */
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci#include "mpi-internal.h"
188c2ecf20Sopenharmony_ci#include "longlong.h"
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci#ifndef UMUL_TIME
218c2ecf20Sopenharmony_ci#define UMUL_TIME 1
228c2ecf20Sopenharmony_ci#endif
238c2ecf20Sopenharmony_ci#ifndef UDIV_TIME
248c2ecf20Sopenharmony_ci#define UDIV_TIME UMUL_TIME
258c2ecf20Sopenharmony_ci#endif
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_cimpi_limb_t
298c2ecf20Sopenharmony_cimpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
308c2ecf20Sopenharmony_ci			mpi_limb_t divisor_limb)
318c2ecf20Sopenharmony_ci{
328c2ecf20Sopenharmony_ci	mpi_size_t i;
338c2ecf20Sopenharmony_ci	mpi_limb_t n1, n0, r;
348c2ecf20Sopenharmony_ci	mpi_limb_t dummy __maybe_unused;
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci	/* Botch: Should this be handled at all?  Rely on callers?	*/
378c2ecf20Sopenharmony_ci	if (!dividend_size)
388c2ecf20Sopenharmony_ci		return 0;
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci	/* If multiplication is much faster than division, and the
418c2ecf20Sopenharmony_ci	 * dividend is large, pre-invert the divisor, and use
428c2ecf20Sopenharmony_ci	 * only multiplications in the inner loop.
438c2ecf20Sopenharmony_ci	 *
448c2ecf20Sopenharmony_ci	 * This test should be read:
458c2ecf20Sopenharmony_ci	 *	 Does it ever help to use udiv_qrnnd_preinv?
468c2ecf20Sopenharmony_ci	 *	   && Does what we save compensate for the inversion overhead?
478c2ecf20Sopenharmony_ci	 */
488c2ecf20Sopenharmony_ci	if (UDIV_TIME > (2 * UMUL_TIME + 6)
498c2ecf20Sopenharmony_ci			&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
508c2ecf20Sopenharmony_ci		int normalization_steps;
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci		normalization_steps = count_leading_zeros(divisor_limb);
538c2ecf20Sopenharmony_ci		if (normalization_steps) {
548c2ecf20Sopenharmony_ci			mpi_limb_t divisor_limb_inverted;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci			divisor_limb <<= normalization_steps;
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
598c2ecf20Sopenharmony_ci			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
608c2ecf20Sopenharmony_ci			 * most significant bit (with weight 2**N) implicit.
618c2ecf20Sopenharmony_ci			 *
628c2ecf20Sopenharmony_ci			 * Special case for DIVISOR_LIMB == 100...000.
638c2ecf20Sopenharmony_ci			 */
648c2ecf20Sopenharmony_ci			if (!(divisor_limb << 1))
658c2ecf20Sopenharmony_ci				divisor_limb_inverted = ~(mpi_limb_t)0;
668c2ecf20Sopenharmony_ci			else
678c2ecf20Sopenharmony_ci				udiv_qrnnd(divisor_limb_inverted, dummy,
688c2ecf20Sopenharmony_ci						-divisor_limb, 0, divisor_limb);
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci			n1 = dividend_ptr[dividend_size - 1];
718c2ecf20Sopenharmony_ci			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci			/* Possible optimization:
748c2ecf20Sopenharmony_ci			 * if (r == 0
758c2ecf20Sopenharmony_ci			 * && divisor_limb > ((n1 << normalization_steps)
768c2ecf20Sopenharmony_ci			 *		       | (dividend_ptr[dividend_size - 2] >> ...)))
778c2ecf20Sopenharmony_ci			 * ...one division less...
788c2ecf20Sopenharmony_ci			 */
798c2ecf20Sopenharmony_ci			for (i = dividend_size - 2; i >= 0; i--) {
808c2ecf20Sopenharmony_ci				n0 = dividend_ptr[i];
818c2ecf20Sopenharmony_ci				UDIV_QRNND_PREINV(dummy, r, r,
828c2ecf20Sopenharmony_ci						((n1 << normalization_steps)
838c2ecf20Sopenharmony_ci						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
848c2ecf20Sopenharmony_ci						divisor_limb, divisor_limb_inverted);
858c2ecf20Sopenharmony_ci				n1 = n0;
868c2ecf20Sopenharmony_ci			}
878c2ecf20Sopenharmony_ci			UDIV_QRNND_PREINV(dummy, r, r,
888c2ecf20Sopenharmony_ci					n1 << normalization_steps,
898c2ecf20Sopenharmony_ci					divisor_limb, divisor_limb_inverted);
908c2ecf20Sopenharmony_ci			return r >> normalization_steps;
918c2ecf20Sopenharmony_ci		} else {
928c2ecf20Sopenharmony_ci			mpi_limb_t divisor_limb_inverted;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
958c2ecf20Sopenharmony_ci			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
968c2ecf20Sopenharmony_ci			 * most significant bit (with weight 2**N) implicit.
978c2ecf20Sopenharmony_ci			 *
988c2ecf20Sopenharmony_ci			 * Special case for DIVISOR_LIMB == 100...000.
998c2ecf20Sopenharmony_ci			 */
1008c2ecf20Sopenharmony_ci			if (!(divisor_limb << 1))
1018c2ecf20Sopenharmony_ci				divisor_limb_inverted = ~(mpi_limb_t)0;
1028c2ecf20Sopenharmony_ci			else
1038c2ecf20Sopenharmony_ci				udiv_qrnnd(divisor_limb_inverted, dummy,
1048c2ecf20Sopenharmony_ci						-divisor_limb, 0, divisor_limb);
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci			i = dividend_size - 1;
1078c2ecf20Sopenharmony_ci			r = dividend_ptr[i];
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci			if (r >= divisor_limb)
1108c2ecf20Sopenharmony_ci				r = 0;
1118c2ecf20Sopenharmony_ci			else
1128c2ecf20Sopenharmony_ci				i--;
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci			for ( ; i >= 0; i--) {
1158c2ecf20Sopenharmony_ci				n0 = dividend_ptr[i];
1168c2ecf20Sopenharmony_ci				UDIV_QRNND_PREINV(dummy, r, r,
1178c2ecf20Sopenharmony_ci						n0, divisor_limb, divisor_limb_inverted);
1188c2ecf20Sopenharmony_ci			}
1198c2ecf20Sopenharmony_ci			return r;
1208c2ecf20Sopenharmony_ci		}
1218c2ecf20Sopenharmony_ci	} else {
1228c2ecf20Sopenharmony_ci		if (UDIV_NEEDS_NORMALIZATION) {
1238c2ecf20Sopenharmony_ci			int normalization_steps;
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci			normalization_steps = count_leading_zeros(divisor_limb);
1268c2ecf20Sopenharmony_ci			if (normalization_steps) {
1278c2ecf20Sopenharmony_ci				divisor_limb <<= normalization_steps;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci				n1 = dividend_ptr[dividend_size - 1];
1308c2ecf20Sopenharmony_ci				r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci				/* Possible optimization:
1338c2ecf20Sopenharmony_ci				 * if (r == 0
1348c2ecf20Sopenharmony_ci				 * && divisor_limb > ((n1 << normalization_steps)
1358c2ecf20Sopenharmony_ci				 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
1368c2ecf20Sopenharmony_ci				 * ...one division less...
1378c2ecf20Sopenharmony_ci				 */
1388c2ecf20Sopenharmony_ci				for (i = dividend_size - 2; i >= 0; i--) {
1398c2ecf20Sopenharmony_ci					n0 = dividend_ptr[i];
1408c2ecf20Sopenharmony_ci					udiv_qrnnd(dummy, r, r,
1418c2ecf20Sopenharmony_ci						((n1 << normalization_steps)
1428c2ecf20Sopenharmony_ci						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
1438c2ecf20Sopenharmony_ci						divisor_limb);
1448c2ecf20Sopenharmony_ci					n1 = n0;
1458c2ecf20Sopenharmony_ci				}
1468c2ecf20Sopenharmony_ci				udiv_qrnnd(dummy, r, r,
1478c2ecf20Sopenharmony_ci						n1 << normalization_steps,
1488c2ecf20Sopenharmony_ci						divisor_limb);
1498c2ecf20Sopenharmony_ci				return r >> normalization_steps;
1508c2ecf20Sopenharmony_ci			}
1518c2ecf20Sopenharmony_ci		}
1528c2ecf20Sopenharmony_ci		/* No normalization needed, either because udiv_qrnnd doesn't require
1538c2ecf20Sopenharmony_ci		 * it, or because DIVISOR_LIMB is already normalized.
1548c2ecf20Sopenharmony_ci		 */
1558c2ecf20Sopenharmony_ci		i = dividend_size - 1;
1568c2ecf20Sopenharmony_ci		r = dividend_ptr[i];
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci		if (r >= divisor_limb)
1598c2ecf20Sopenharmony_ci			r = 0;
1608c2ecf20Sopenharmony_ci		else
1618c2ecf20Sopenharmony_ci			i--;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci		for (; i >= 0; i--) {
1648c2ecf20Sopenharmony_ci			n0 = dividend_ptr[i];
1658c2ecf20Sopenharmony_ci			udiv_qrnnd(dummy, r, r, n0, divisor_limb);
1668c2ecf20Sopenharmony_ci		}
1678c2ecf20Sopenharmony_ci		return r;
1688c2ecf20Sopenharmony_ci	}
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
1728c2ecf20Sopenharmony_ci * the NSIZE-DSIZE least significant quotient limbs at QP
1738c2ecf20Sopenharmony_ci * and the DSIZE long remainder at NP.	If QEXTRA_LIMBS is
1748c2ecf20Sopenharmony_ci * non-zero, generate that many fraction bits and append them after the
1758c2ecf20Sopenharmony_ci * other quotient limbs.
1768c2ecf20Sopenharmony_ci * Return the most significant limb of the quotient, this is always 0 or 1.
1778c2ecf20Sopenharmony_ci *
1788c2ecf20Sopenharmony_ci * Preconditions:
1798c2ecf20Sopenharmony_ci * 0. NSIZE >= DSIZE.
1808c2ecf20Sopenharmony_ci * 1. The most significant bit of the divisor must be set.
1818c2ecf20Sopenharmony_ci * 2. QP must either not overlap with the input operands at all, or
1828c2ecf20Sopenharmony_ci *    QP + DSIZE >= NP must hold true.	(This means that it's
1838c2ecf20Sopenharmony_ci *    possible to put the quotient in the high part of NUM, right after the
1848c2ecf20Sopenharmony_ci *    remainder in NUM.
1858c2ecf20Sopenharmony_ci * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
1868c2ecf20Sopenharmony_ci */
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_cimpi_limb_t
1898c2ecf20Sopenharmony_cimpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
1908c2ecf20Sopenharmony_ci	       mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
1918c2ecf20Sopenharmony_ci{
1928c2ecf20Sopenharmony_ci	mpi_limb_t most_significant_q_limb = 0;
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci	switch (dsize) {
1958c2ecf20Sopenharmony_ci	case 0:
1968c2ecf20Sopenharmony_ci		/* We are asked to divide by zero, so go ahead and do it!  (To make
1978c2ecf20Sopenharmony_ci		   the compiler not remove this statement, return the value.)  */
1988c2ecf20Sopenharmony_ci		/*
1998c2ecf20Sopenharmony_ci		 * existing clients of this function have been modified
2008c2ecf20Sopenharmony_ci		 * not to call it with dsize == 0, so this should not happen
2018c2ecf20Sopenharmony_ci		 */
2028c2ecf20Sopenharmony_ci		return 1 / dsize;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	case 1:
2058c2ecf20Sopenharmony_ci		{
2068c2ecf20Sopenharmony_ci			mpi_size_t i;
2078c2ecf20Sopenharmony_ci			mpi_limb_t n1;
2088c2ecf20Sopenharmony_ci			mpi_limb_t d;
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_ci			d = dp[0];
2118c2ecf20Sopenharmony_ci			n1 = np[nsize - 1];
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci			if (n1 >= d) {
2148c2ecf20Sopenharmony_ci				n1 -= d;
2158c2ecf20Sopenharmony_ci				most_significant_q_limb = 1;
2168c2ecf20Sopenharmony_ci			}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci			qp += qextra_limbs;
2198c2ecf20Sopenharmony_ci			for (i = nsize - 2; i >= 0; i--)
2208c2ecf20Sopenharmony_ci				udiv_qrnnd(qp[i], n1, n1, np[i], d);
2218c2ecf20Sopenharmony_ci			qp -= qextra_limbs;
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci			for (i = qextra_limbs - 1; i >= 0; i--)
2248c2ecf20Sopenharmony_ci				udiv_qrnnd(qp[i], n1, n1, 0, d);
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci			np[0] = n1;
2278c2ecf20Sopenharmony_ci		}
2288c2ecf20Sopenharmony_ci		break;
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	case 2:
2318c2ecf20Sopenharmony_ci		{
2328c2ecf20Sopenharmony_ci			mpi_size_t i;
2338c2ecf20Sopenharmony_ci			mpi_limb_t n1, n0, n2;
2348c2ecf20Sopenharmony_ci			mpi_limb_t d1, d0;
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci			np += nsize - 2;
2378c2ecf20Sopenharmony_ci			d1 = dp[1];
2388c2ecf20Sopenharmony_ci			d0 = dp[0];
2398c2ecf20Sopenharmony_ci			n1 = np[1];
2408c2ecf20Sopenharmony_ci			n0 = np[0];
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci			if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
2438c2ecf20Sopenharmony_ci				sub_ddmmss(n1, n0, n1, n0, d1, d0);
2448c2ecf20Sopenharmony_ci				most_significant_q_limb = 1;
2458c2ecf20Sopenharmony_ci			}
2468c2ecf20Sopenharmony_ci
2478c2ecf20Sopenharmony_ci			for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
2488c2ecf20Sopenharmony_ci				mpi_limb_t q;
2498c2ecf20Sopenharmony_ci				mpi_limb_t r;
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci				if (i >= qextra_limbs)
2528c2ecf20Sopenharmony_ci					np--;
2538c2ecf20Sopenharmony_ci				else
2548c2ecf20Sopenharmony_ci					np[0] = 0;
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci				if (n1 == d1) {
2578c2ecf20Sopenharmony_ci					/* Q should be either 111..111 or 111..110.  Need special
2588c2ecf20Sopenharmony_ci					 * treatment of this rare case as normal division would
2598c2ecf20Sopenharmony_ci					 * give overflow.  */
2608c2ecf20Sopenharmony_ci					q = ~(mpi_limb_t) 0;
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci					r = n0 + d1;
2638c2ecf20Sopenharmony_ci					if (r < d1) {	/* Carry in the addition? */
2648c2ecf20Sopenharmony_ci						add_ssaaaa(n1, n0, r - d0,
2658c2ecf20Sopenharmony_ci							   np[0], 0, d0);
2668c2ecf20Sopenharmony_ci						qp[i] = q;
2678c2ecf20Sopenharmony_ci						continue;
2688c2ecf20Sopenharmony_ci					}
2698c2ecf20Sopenharmony_ci					n1 = d0 - (d0 != 0 ? 1 : 0);
2708c2ecf20Sopenharmony_ci					n0 = -d0;
2718c2ecf20Sopenharmony_ci				} else {
2728c2ecf20Sopenharmony_ci					udiv_qrnnd(q, r, n1, n0, d1);
2738c2ecf20Sopenharmony_ci					umul_ppmm(n1, n0, d0, q);
2748c2ecf20Sopenharmony_ci				}
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci				n2 = np[0];
2778c2ecf20Sopenharmony_ciq_test:
2788c2ecf20Sopenharmony_ci				if (n1 > r || (n1 == r && n0 > n2)) {
2798c2ecf20Sopenharmony_ci					/* The estimated Q was too large.  */
2808c2ecf20Sopenharmony_ci					q--;
2818c2ecf20Sopenharmony_ci					sub_ddmmss(n1, n0, n1, n0, 0, d0);
2828c2ecf20Sopenharmony_ci					r += d1;
2838c2ecf20Sopenharmony_ci					if (r >= d1)	/* If not carry, test Q again.  */
2848c2ecf20Sopenharmony_ci						goto q_test;
2858c2ecf20Sopenharmony_ci				}
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci				qp[i] = q;
2888c2ecf20Sopenharmony_ci				sub_ddmmss(n1, n0, r, n2, n1, n0);
2898c2ecf20Sopenharmony_ci			}
2908c2ecf20Sopenharmony_ci			np[1] = n1;
2918c2ecf20Sopenharmony_ci			np[0] = n0;
2928c2ecf20Sopenharmony_ci		}
2938c2ecf20Sopenharmony_ci		break;
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	default:
2968c2ecf20Sopenharmony_ci		{
2978c2ecf20Sopenharmony_ci			mpi_size_t i;
2988c2ecf20Sopenharmony_ci			mpi_limb_t dX, d1, n0;
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci			np += nsize - dsize;
3018c2ecf20Sopenharmony_ci			dX = dp[dsize - 1];
3028c2ecf20Sopenharmony_ci			d1 = dp[dsize - 2];
3038c2ecf20Sopenharmony_ci			n0 = np[dsize - 1];
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci			if (n0 >= dX) {
3068c2ecf20Sopenharmony_ci				if (n0 > dX
3078c2ecf20Sopenharmony_ci				    || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
3088c2ecf20Sopenharmony_ci					mpihelp_sub_n(np, np, dp, dsize);
3098c2ecf20Sopenharmony_ci					n0 = np[dsize - 1];
3108c2ecf20Sopenharmony_ci					most_significant_q_limb = 1;
3118c2ecf20Sopenharmony_ci				}
3128c2ecf20Sopenharmony_ci			}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci			for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
3158c2ecf20Sopenharmony_ci				mpi_limb_t q;
3168c2ecf20Sopenharmony_ci				mpi_limb_t n1, n2;
3178c2ecf20Sopenharmony_ci				mpi_limb_t cy_limb;
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci				if (i >= qextra_limbs) {
3208c2ecf20Sopenharmony_ci					np--;
3218c2ecf20Sopenharmony_ci					n2 = np[dsize];
3228c2ecf20Sopenharmony_ci				} else {
3238c2ecf20Sopenharmony_ci					n2 = np[dsize - 1];
3248c2ecf20Sopenharmony_ci					MPN_COPY_DECR(np + 1, np, dsize - 1);
3258c2ecf20Sopenharmony_ci					np[0] = 0;
3268c2ecf20Sopenharmony_ci				}
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci				if (n0 == dX) {
3298c2ecf20Sopenharmony_ci					/* This might over-estimate q, but it's probably not worth
3308c2ecf20Sopenharmony_ci					 * the extra code here to find out.  */
3318c2ecf20Sopenharmony_ci					q = ~(mpi_limb_t) 0;
3328c2ecf20Sopenharmony_ci				} else {
3338c2ecf20Sopenharmony_ci					mpi_limb_t r;
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci					udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
3368c2ecf20Sopenharmony_ci					umul_ppmm(n1, n0, d1, q);
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci					while (n1 > r
3398c2ecf20Sopenharmony_ci					       || (n1 == r
3408c2ecf20Sopenharmony_ci						   && n0 > np[dsize - 2])) {
3418c2ecf20Sopenharmony_ci						q--;
3428c2ecf20Sopenharmony_ci						r += dX;
3438c2ecf20Sopenharmony_ci						if (r < dX)	/* I.e. "carry in previous addition?" */
3448c2ecf20Sopenharmony_ci							break;
3458c2ecf20Sopenharmony_ci						n1 -= n0 < d1;
3468c2ecf20Sopenharmony_ci						n0 -= d1;
3478c2ecf20Sopenharmony_ci					}
3488c2ecf20Sopenharmony_ci				}
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci				/* Possible optimization: We already have (q * n0) and (1 * n1)
3518c2ecf20Sopenharmony_ci				 * after the calculation of q.  Taking advantage of that, we
3528c2ecf20Sopenharmony_ci				 * could make this loop make two iterations less.  */
3538c2ecf20Sopenharmony_ci				cy_limb = mpihelp_submul_1(np, dp, dsize, q);
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci				if (n2 != cy_limb) {
3568c2ecf20Sopenharmony_ci					mpihelp_add_n(np, np, dp, dsize);
3578c2ecf20Sopenharmony_ci					q--;
3588c2ecf20Sopenharmony_ci				}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci				qp[i] = q;
3618c2ecf20Sopenharmony_ci				n0 = np[dsize - 1];
3628c2ecf20Sopenharmony_ci			}
3638c2ecf20Sopenharmony_ci		}
3648c2ecf20Sopenharmony_ci	}
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	return most_significant_q_limb;
3678c2ecf20Sopenharmony_ci}
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci/****************
3708c2ecf20Sopenharmony_ci * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
3718c2ecf20Sopenharmony_ci * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
3728c2ecf20Sopenharmony_ci * Return the single-limb remainder.
3738c2ecf20Sopenharmony_ci * There are no constraints on the value of the divisor.
3748c2ecf20Sopenharmony_ci *
3758c2ecf20Sopenharmony_ci * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
3768c2ecf20Sopenharmony_ci */
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_cimpi_limb_t
3798c2ecf20Sopenharmony_cimpihelp_divmod_1(mpi_ptr_t quot_ptr,
3808c2ecf20Sopenharmony_ci		mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
3818c2ecf20Sopenharmony_ci		mpi_limb_t divisor_limb)
3828c2ecf20Sopenharmony_ci{
3838c2ecf20Sopenharmony_ci	mpi_size_t i;
3848c2ecf20Sopenharmony_ci	mpi_limb_t n1, n0, r;
3858c2ecf20Sopenharmony_ci	mpi_limb_t dummy __maybe_unused;
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	if (!dividend_size)
3888c2ecf20Sopenharmony_ci		return 0;
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	/* If multiplication is much faster than division, and the
3918c2ecf20Sopenharmony_ci	 * dividend is large, pre-invert the divisor, and use
3928c2ecf20Sopenharmony_ci	 * only multiplications in the inner loop.
3938c2ecf20Sopenharmony_ci	 *
3948c2ecf20Sopenharmony_ci	 * This test should be read:
3958c2ecf20Sopenharmony_ci	 * Does it ever help to use udiv_qrnnd_preinv?
3968c2ecf20Sopenharmony_ci	 * && Does what we save compensate for the inversion overhead?
3978c2ecf20Sopenharmony_ci	 */
3988c2ecf20Sopenharmony_ci	if (UDIV_TIME > (2 * UMUL_TIME + 6)
3998c2ecf20Sopenharmony_ci			&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
4008c2ecf20Sopenharmony_ci		int normalization_steps;
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci		normalization_steps = count_leading_zeros(divisor_limb);
4038c2ecf20Sopenharmony_ci		if (normalization_steps) {
4048c2ecf20Sopenharmony_ci			mpi_limb_t divisor_limb_inverted;
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci			divisor_limb <<= normalization_steps;
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
4098c2ecf20Sopenharmony_ci			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
4108c2ecf20Sopenharmony_ci			 * most significant bit (with weight 2**N) implicit.
4118c2ecf20Sopenharmony_ci			 */
4128c2ecf20Sopenharmony_ci			/* Special case for DIVISOR_LIMB == 100...000.  */
4138c2ecf20Sopenharmony_ci			if (!(divisor_limb << 1))
4148c2ecf20Sopenharmony_ci				divisor_limb_inverted = ~(mpi_limb_t)0;
4158c2ecf20Sopenharmony_ci			else
4168c2ecf20Sopenharmony_ci				udiv_qrnnd(divisor_limb_inverted, dummy,
4178c2ecf20Sopenharmony_ci						-divisor_limb, 0, divisor_limb);
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_ci			n1 = dividend_ptr[dividend_size - 1];
4208c2ecf20Sopenharmony_ci			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci			/* Possible optimization:
4238c2ecf20Sopenharmony_ci			 * if (r == 0
4248c2ecf20Sopenharmony_ci			 * && divisor_limb > ((n1 << normalization_steps)
4258c2ecf20Sopenharmony_ci			 *		       | (dividend_ptr[dividend_size - 2] >> ...)))
4268c2ecf20Sopenharmony_ci			 * ...one division less...
4278c2ecf20Sopenharmony_ci			 */
4288c2ecf20Sopenharmony_ci			for (i = dividend_size - 2; i >= 0; i--) {
4298c2ecf20Sopenharmony_ci				n0 = dividend_ptr[i];
4308c2ecf20Sopenharmony_ci				UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
4318c2ecf20Sopenharmony_ci						((n1 << normalization_steps)
4328c2ecf20Sopenharmony_ci						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
4338c2ecf20Sopenharmony_ci						divisor_limb, divisor_limb_inverted);
4348c2ecf20Sopenharmony_ci				n1 = n0;
4358c2ecf20Sopenharmony_ci			}
4368c2ecf20Sopenharmony_ci			UDIV_QRNND_PREINV(quot_ptr[0], r, r,
4378c2ecf20Sopenharmony_ci					n1 << normalization_steps,
4388c2ecf20Sopenharmony_ci					divisor_limb, divisor_limb_inverted);
4398c2ecf20Sopenharmony_ci			return r >> normalization_steps;
4408c2ecf20Sopenharmony_ci		} else {
4418c2ecf20Sopenharmony_ci			mpi_limb_t divisor_limb_inverted;
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
4448c2ecf20Sopenharmony_ci			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
4458c2ecf20Sopenharmony_ci			 * most significant bit (with weight 2**N) implicit.
4468c2ecf20Sopenharmony_ci			 */
4478c2ecf20Sopenharmony_ci			/* Special case for DIVISOR_LIMB == 100...000.  */
4488c2ecf20Sopenharmony_ci			if (!(divisor_limb << 1))
4498c2ecf20Sopenharmony_ci				divisor_limb_inverted = ~(mpi_limb_t) 0;
4508c2ecf20Sopenharmony_ci			else
4518c2ecf20Sopenharmony_ci				udiv_qrnnd(divisor_limb_inverted, dummy,
4528c2ecf20Sopenharmony_ci						-divisor_limb, 0, divisor_limb);
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci			i = dividend_size - 1;
4558c2ecf20Sopenharmony_ci			r = dividend_ptr[i];
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci			if (r >= divisor_limb)
4588c2ecf20Sopenharmony_ci				r = 0;
4598c2ecf20Sopenharmony_ci			else
4608c2ecf20Sopenharmony_ci				quot_ptr[i--] = 0;
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci			for ( ; i >= 0; i--) {
4638c2ecf20Sopenharmony_ci				n0 = dividend_ptr[i];
4648c2ecf20Sopenharmony_ci				UDIV_QRNND_PREINV(quot_ptr[i], r, r,
4658c2ecf20Sopenharmony_ci						n0, divisor_limb, divisor_limb_inverted);
4668c2ecf20Sopenharmony_ci			}
4678c2ecf20Sopenharmony_ci			return r;
4688c2ecf20Sopenharmony_ci		}
4698c2ecf20Sopenharmony_ci	} else {
4708c2ecf20Sopenharmony_ci		if (UDIV_NEEDS_NORMALIZATION) {
4718c2ecf20Sopenharmony_ci			int normalization_steps;
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci			normalization_steps = count_leading_zeros(divisor_limb);
4748c2ecf20Sopenharmony_ci			if (normalization_steps) {
4758c2ecf20Sopenharmony_ci				divisor_limb <<= normalization_steps;
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci				n1 = dividend_ptr[dividend_size - 1];
4788c2ecf20Sopenharmony_ci				r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ci				/* Possible optimization:
4818c2ecf20Sopenharmony_ci				 * if (r == 0
4828c2ecf20Sopenharmony_ci				 * && divisor_limb > ((n1 << normalization_steps)
4838c2ecf20Sopenharmony_ci				 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
4848c2ecf20Sopenharmony_ci				 * ...one division less...
4858c2ecf20Sopenharmony_ci				 */
4868c2ecf20Sopenharmony_ci				for (i = dividend_size - 2; i >= 0; i--) {
4878c2ecf20Sopenharmony_ci					n0 = dividend_ptr[i];
4888c2ecf20Sopenharmony_ci					udiv_qrnnd(quot_ptr[i + 1], r, r,
4898c2ecf20Sopenharmony_ci						((n1 << normalization_steps)
4908c2ecf20Sopenharmony_ci						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
4918c2ecf20Sopenharmony_ci						divisor_limb);
4928c2ecf20Sopenharmony_ci					n1 = n0;
4938c2ecf20Sopenharmony_ci				}
4948c2ecf20Sopenharmony_ci				udiv_qrnnd(quot_ptr[0], r, r,
4958c2ecf20Sopenharmony_ci						n1 << normalization_steps,
4968c2ecf20Sopenharmony_ci						divisor_limb);
4978c2ecf20Sopenharmony_ci				return r >> normalization_steps;
4988c2ecf20Sopenharmony_ci			}
4998c2ecf20Sopenharmony_ci		}
5008c2ecf20Sopenharmony_ci		/* No normalization needed, either because udiv_qrnnd doesn't require
5018c2ecf20Sopenharmony_ci		 * it, or because DIVISOR_LIMB is already normalized.
5028c2ecf20Sopenharmony_ci		 */
5038c2ecf20Sopenharmony_ci		i = dividend_size - 1;
5048c2ecf20Sopenharmony_ci		r = dividend_ptr[i];
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci		if (r >= divisor_limb)
5078c2ecf20Sopenharmony_ci			r = 0;
5088c2ecf20Sopenharmony_ci		else
5098c2ecf20Sopenharmony_ci			quot_ptr[i--] = 0;
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci		for (; i >= 0; i--) {
5128c2ecf20Sopenharmony_ci			n0 = dividend_ptr[i];
5138c2ecf20Sopenharmony_ci			udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
5148c2ecf20Sopenharmony_ci		}
5158c2ecf20Sopenharmony_ci		return r;
5168c2ecf20Sopenharmony_ci	}
5178c2ecf20Sopenharmony_ci}
518