1e1051a39Sopenharmony_ci/*
2e1051a39Sopenharmony_ci * Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci *
4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci * this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci */
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci#include <assert.h>
11e1051a39Sopenharmony_ci#include <openssl/bn.h>
12e1051a39Sopenharmony_ci#include "internal/cryptlib.h"
13e1051a39Sopenharmony_ci#include "bn_local.h"
14e1051a39Sopenharmony_ci
15e1051a39Sopenharmony_ci/* The old slow way */
16e1051a39Sopenharmony_ci#if 0
17e1051a39Sopenharmony_ciint BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
18e1051a39Sopenharmony_ci           BN_CTX *ctx)
19e1051a39Sopenharmony_ci{
20e1051a39Sopenharmony_ci    int i, nm, nd;
21e1051a39Sopenharmony_ci    int ret = 0;
22e1051a39Sopenharmony_ci    BIGNUM *D;
23e1051a39Sopenharmony_ci
24e1051a39Sopenharmony_ci    bn_check_top(m);
25e1051a39Sopenharmony_ci    bn_check_top(d);
26e1051a39Sopenharmony_ci    if (BN_is_zero(d)) {
27e1051a39Sopenharmony_ci        ERR_raise(ERR_LIB_BN, BN_R_DIV_BY_ZERO);
28e1051a39Sopenharmony_ci        return 0;
29e1051a39Sopenharmony_ci    }
30e1051a39Sopenharmony_ci
31e1051a39Sopenharmony_ci    if (BN_ucmp(m, d) < 0) {
32e1051a39Sopenharmony_ci        if (rem != NULL) {
33e1051a39Sopenharmony_ci            if (BN_copy(rem, m) == NULL)
34e1051a39Sopenharmony_ci                return 0;
35e1051a39Sopenharmony_ci        }
36e1051a39Sopenharmony_ci        if (dv != NULL)
37e1051a39Sopenharmony_ci            BN_zero(dv);
38e1051a39Sopenharmony_ci        return 1;
39e1051a39Sopenharmony_ci    }
40e1051a39Sopenharmony_ci
41e1051a39Sopenharmony_ci    BN_CTX_start(ctx);
42e1051a39Sopenharmony_ci    D = BN_CTX_get(ctx);
43e1051a39Sopenharmony_ci    if (dv == NULL)
44e1051a39Sopenharmony_ci        dv = BN_CTX_get(ctx);
45e1051a39Sopenharmony_ci    if (rem == NULL)
46e1051a39Sopenharmony_ci        rem = BN_CTX_get(ctx);
47e1051a39Sopenharmony_ci    if (D == NULL || dv == NULL || rem == NULL)
48e1051a39Sopenharmony_ci        goto end;
49e1051a39Sopenharmony_ci
50e1051a39Sopenharmony_ci    nd = BN_num_bits(d);
51e1051a39Sopenharmony_ci    nm = BN_num_bits(m);
52e1051a39Sopenharmony_ci    if (BN_copy(D, d) == NULL)
53e1051a39Sopenharmony_ci        goto end;
54e1051a39Sopenharmony_ci    if (BN_copy(rem, m) == NULL)
55e1051a39Sopenharmony_ci        goto end;
56e1051a39Sopenharmony_ci
57e1051a39Sopenharmony_ci    /*
58e1051a39Sopenharmony_ci     * The next 2 are needed so we can do a dv->d[0]|=1 later since
59e1051a39Sopenharmony_ci     * BN_lshift1 will only work once there is a value :-)
60e1051a39Sopenharmony_ci     */
61e1051a39Sopenharmony_ci    BN_zero(dv);
62e1051a39Sopenharmony_ci    if (bn_wexpand(dv, 1) == NULL)
63e1051a39Sopenharmony_ci        goto end;
64e1051a39Sopenharmony_ci    dv->top = 1;
65e1051a39Sopenharmony_ci
66e1051a39Sopenharmony_ci    if (!BN_lshift(D, D, nm - nd))
67e1051a39Sopenharmony_ci        goto end;
68e1051a39Sopenharmony_ci    for (i = nm - nd; i >= 0; i--) {
69e1051a39Sopenharmony_ci        if (!BN_lshift1(dv, dv))
70e1051a39Sopenharmony_ci            goto end;
71e1051a39Sopenharmony_ci        if (BN_ucmp(rem, D) >= 0) {
72e1051a39Sopenharmony_ci            dv->d[0] |= 1;
73e1051a39Sopenharmony_ci            if (!BN_usub(rem, rem, D))
74e1051a39Sopenharmony_ci                goto end;
75e1051a39Sopenharmony_ci        }
76e1051a39Sopenharmony_ci/* CAN IMPROVE (and have now :=) */
77e1051a39Sopenharmony_ci        if (!BN_rshift1(D, D))
78e1051a39Sopenharmony_ci            goto end;
79e1051a39Sopenharmony_ci    }
80e1051a39Sopenharmony_ci    rem->neg = BN_is_zero(rem) ? 0 : m->neg;
81e1051a39Sopenharmony_ci    dv->neg = m->neg ^ d->neg;
82e1051a39Sopenharmony_ci    ret = 1;
83e1051a39Sopenharmony_ci end:
84e1051a39Sopenharmony_ci    BN_CTX_end(ctx);
85e1051a39Sopenharmony_ci    return ret;
86e1051a39Sopenharmony_ci}
87e1051a39Sopenharmony_ci
88e1051a39Sopenharmony_ci#else
89e1051a39Sopenharmony_ci
90e1051a39Sopenharmony_ci# if defined(BN_DIV3W)
91e1051a39Sopenharmony_ciBN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0);
92e1051a39Sopenharmony_ci# elif 0
93e1051a39Sopenharmony_ci/*
94e1051a39Sopenharmony_ci * This is #if-ed away, because it's a reference for assembly implementations,
95e1051a39Sopenharmony_ci * where it can and should be made constant-time. But if you want to test it,
96e1051a39Sopenharmony_ci * just replace 0 with 1.
97e1051a39Sopenharmony_ci */
98e1051a39Sopenharmony_ci#  if BN_BITS2 == 64 && defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
99e1051a39Sopenharmony_ci#   undef BN_ULLONG
100e1051a39Sopenharmony_ci#   define BN_ULLONG uint128_t
101e1051a39Sopenharmony_ci#   define BN_LLONG
102e1051a39Sopenharmony_ci#  endif
103e1051a39Sopenharmony_ci
104e1051a39Sopenharmony_ci#  ifdef BN_LLONG
105e1051a39Sopenharmony_ci#   define BN_DIV3W
106e1051a39Sopenharmony_ci/*
107e1051a39Sopenharmony_ci * Interface is somewhat quirky, |m| is pointer to most significant limb,
108e1051a39Sopenharmony_ci * and less significant limb is referred at |m[-1]|. This means that caller
109e1051a39Sopenharmony_ci * is responsible for ensuring that |m[-1]| is valid. Second condition that
110e1051a39Sopenharmony_ci * has to be met is that |d0|'s most significant bit has to be set. Or in
111e1051a39Sopenharmony_ci * other words divisor has to be "bit-aligned to the left." bn_div_fixed_top
112e1051a39Sopenharmony_ci * does all this. The subroutine considers four limbs, two of which are
113e1051a39Sopenharmony_ci * "overlapping," hence the name...
114e1051a39Sopenharmony_ci */
115e1051a39Sopenharmony_cistatic BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0)
116e1051a39Sopenharmony_ci{
117e1051a39Sopenharmony_ci    BN_ULLONG R = ((BN_ULLONG)m[0] << BN_BITS2) | m[-1];
118e1051a39Sopenharmony_ci    BN_ULLONG D = ((BN_ULLONG)d0 << BN_BITS2) | d1;
119e1051a39Sopenharmony_ci    BN_ULONG Q = 0, mask;
120e1051a39Sopenharmony_ci    int i;
121e1051a39Sopenharmony_ci
122e1051a39Sopenharmony_ci    for (i = 0; i < BN_BITS2; i++) {
123e1051a39Sopenharmony_ci        Q <<= 1;
124e1051a39Sopenharmony_ci        if (R >= D) {
125e1051a39Sopenharmony_ci            Q |= 1;
126e1051a39Sopenharmony_ci            R -= D;
127e1051a39Sopenharmony_ci        }
128e1051a39Sopenharmony_ci        D >>= 1;
129e1051a39Sopenharmony_ci    }
130e1051a39Sopenharmony_ci
131e1051a39Sopenharmony_ci    mask = 0 - (Q >> (BN_BITS2 - 1));   /* does it overflow? */
132e1051a39Sopenharmony_ci
133e1051a39Sopenharmony_ci    Q <<= 1;
134e1051a39Sopenharmony_ci    Q |= (R >= D);
135e1051a39Sopenharmony_ci
136e1051a39Sopenharmony_ci    return (Q | mask) & BN_MASK2;
137e1051a39Sopenharmony_ci}
138e1051a39Sopenharmony_ci#  endif
139e1051a39Sopenharmony_ci# endif
140e1051a39Sopenharmony_ci
141e1051a39Sopenharmony_cistatic int bn_left_align(BIGNUM *num)
142e1051a39Sopenharmony_ci{
143e1051a39Sopenharmony_ci    BN_ULONG *d = num->d, n, m, rmask;
144e1051a39Sopenharmony_ci    int top = num->top;
145e1051a39Sopenharmony_ci    int rshift = BN_num_bits_word(d[top - 1]), lshift, i;
146e1051a39Sopenharmony_ci
147e1051a39Sopenharmony_ci    lshift = BN_BITS2 - rshift;
148e1051a39Sopenharmony_ci    rshift %= BN_BITS2;            /* say no to undefined behaviour */
149e1051a39Sopenharmony_ci    rmask = (BN_ULONG)0 - rshift;  /* rmask = 0 - (rshift != 0) */
150e1051a39Sopenharmony_ci    rmask |= rmask >> 8;
151e1051a39Sopenharmony_ci
152e1051a39Sopenharmony_ci    for (i = 0, m = 0; i < top; i++) {
153e1051a39Sopenharmony_ci        n = d[i];
154e1051a39Sopenharmony_ci        d[i] = ((n << lshift) | m) & BN_MASK2;
155e1051a39Sopenharmony_ci        m = (n >> rshift) & rmask;
156e1051a39Sopenharmony_ci    }
157e1051a39Sopenharmony_ci
158e1051a39Sopenharmony_ci    return lshift;
159e1051a39Sopenharmony_ci}
160e1051a39Sopenharmony_ci
161e1051a39Sopenharmony_ci# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
162e1051a39Sopenharmony_ci    && !defined(PEDANTIC) && !defined(BN_DIV3W)
163e1051a39Sopenharmony_ci#  if defined(__GNUC__) && __GNUC__>=2
164e1051a39Sopenharmony_ci#   if defined(__i386) || defined (__i386__)
165e1051a39Sopenharmony_ci   /*-
166e1051a39Sopenharmony_ci    * There were two reasons for implementing this template:
167e1051a39Sopenharmony_ci    * - GNU C generates a call to a function (__udivdi3 to be exact)
168e1051a39Sopenharmony_ci    *   in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
169e1051a39Sopenharmony_ci    *   understand why...);
170e1051a39Sopenharmony_ci    * - divl doesn't only calculate quotient, but also leaves
171e1051a39Sopenharmony_ci    *   remainder in %edx which we can definitely use here:-)
172e1051a39Sopenharmony_ci    */
173e1051a39Sopenharmony_ci#    undef bn_div_words
174e1051a39Sopenharmony_ci#    define bn_div_words(n0,n1,d0)                \
175e1051a39Sopenharmony_ci        ({  asm volatile (                      \
176e1051a39Sopenharmony_ci                "divl   %4"                     \
177e1051a39Sopenharmony_ci                : "=a"(q), "=d"(rem)            \
178e1051a39Sopenharmony_ci                : "a"(n1), "d"(n0), "r"(d0)     \
179e1051a39Sopenharmony_ci                : "cc");                        \
180e1051a39Sopenharmony_ci            q;                                  \
181e1051a39Sopenharmony_ci        })
182e1051a39Sopenharmony_ci#    define REMAINDER_IS_ALREADY_CALCULATED
183e1051a39Sopenharmony_ci#   elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
184e1051a39Sopenharmony_ci   /*
185e1051a39Sopenharmony_ci    * Same story here, but it's 128-bit by 64-bit division. Wow!
186e1051a39Sopenharmony_ci    */
187e1051a39Sopenharmony_ci#    undef bn_div_words
188e1051a39Sopenharmony_ci#    define bn_div_words(n0,n1,d0)                \
189e1051a39Sopenharmony_ci        ({  asm volatile (                      \
190e1051a39Sopenharmony_ci                "divq   %4"                     \
191e1051a39Sopenharmony_ci                : "=a"(q), "=d"(rem)            \
192e1051a39Sopenharmony_ci                : "a"(n1), "d"(n0), "r"(d0)     \
193e1051a39Sopenharmony_ci                : "cc");                        \
194e1051a39Sopenharmony_ci            q;                                  \
195e1051a39Sopenharmony_ci        })
196e1051a39Sopenharmony_ci#    define REMAINDER_IS_ALREADY_CALCULATED
197e1051a39Sopenharmony_ci#   endif                       /* __<cpu> */
198e1051a39Sopenharmony_ci#  endif                        /* __GNUC__ */
199e1051a39Sopenharmony_ci# endif                         /* OPENSSL_NO_ASM */
200e1051a39Sopenharmony_ci
201e1051a39Sopenharmony_ci/*-
202e1051a39Sopenharmony_ci * BN_div computes  dv := num / divisor, rounding towards
203e1051a39Sopenharmony_ci * zero, and sets up rm  such that  dv*divisor + rm = num  holds.
204e1051a39Sopenharmony_ci * Thus:
205e1051a39Sopenharmony_ci *     dv->neg == num->neg ^ divisor->neg  (unless the result is zero)
206e1051a39Sopenharmony_ci *     rm->neg == num->neg                 (unless the remainder is zero)
207e1051a39Sopenharmony_ci * If 'dv' or 'rm' is NULL, the respective value is not returned.
208e1051a39Sopenharmony_ci */
209e1051a39Sopenharmony_ciint BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
210e1051a39Sopenharmony_ci           BN_CTX *ctx)
211e1051a39Sopenharmony_ci{
212e1051a39Sopenharmony_ci    int ret;
213e1051a39Sopenharmony_ci
214e1051a39Sopenharmony_ci    if (BN_is_zero(divisor)) {
215e1051a39Sopenharmony_ci        ERR_raise(ERR_LIB_BN, BN_R_DIV_BY_ZERO);
216e1051a39Sopenharmony_ci        return 0;
217e1051a39Sopenharmony_ci    }
218e1051a39Sopenharmony_ci
219e1051a39Sopenharmony_ci    /*
220e1051a39Sopenharmony_ci     * Invalid zero-padding would have particularly bad consequences so don't
221e1051a39Sopenharmony_ci     * just rely on bn_check_top() here (bn_check_top() works only for
222e1051a39Sopenharmony_ci     * BN_DEBUG builds)
223e1051a39Sopenharmony_ci     */
224e1051a39Sopenharmony_ci    if (divisor->d[divisor->top - 1] == 0) {
225e1051a39Sopenharmony_ci        ERR_raise(ERR_LIB_BN, BN_R_NOT_INITIALIZED);
226e1051a39Sopenharmony_ci        return 0;
227e1051a39Sopenharmony_ci    }
228e1051a39Sopenharmony_ci
229e1051a39Sopenharmony_ci    ret = bn_div_fixed_top(dv, rm, num, divisor, ctx);
230e1051a39Sopenharmony_ci
231e1051a39Sopenharmony_ci    if (ret) {
232e1051a39Sopenharmony_ci        if (dv != NULL)
233e1051a39Sopenharmony_ci            bn_correct_top(dv);
234e1051a39Sopenharmony_ci        if (rm != NULL)
235e1051a39Sopenharmony_ci            bn_correct_top(rm);
236e1051a39Sopenharmony_ci    }
237e1051a39Sopenharmony_ci
238e1051a39Sopenharmony_ci    return ret;
239e1051a39Sopenharmony_ci}
240e1051a39Sopenharmony_ci
241e1051a39Sopenharmony_ci/*
242e1051a39Sopenharmony_ci * It's argued that *length* of *significant* part of divisor is public.
243e1051a39Sopenharmony_ci * Even if it's private modulus that is. Again, *length* is assumed
244e1051a39Sopenharmony_ci * public, but not *value*. Former is likely to be pre-defined by
245e1051a39Sopenharmony_ci * algorithm with bit granularity, though below subroutine is invariant
246e1051a39Sopenharmony_ci * of limb length. Thanks to this assumption we can require that |divisor|
247e1051a39Sopenharmony_ci * may not be zero-padded, yet claim this subroutine "constant-time"(*).
248e1051a39Sopenharmony_ci * This is because zero-padded dividend, |num|, is tolerated, so that
249e1051a39Sopenharmony_ci * caller can pass dividend of public length(*), but with smaller amount
250e1051a39Sopenharmony_ci * of significant limbs. This naturally means that quotient, |dv|, would
251e1051a39Sopenharmony_ci * contain correspongly less significant limbs as well, and will be zero-
252e1051a39Sopenharmony_ci * padded accordingly. Returned remainder, |rm|, will have same bit length
253e1051a39Sopenharmony_ci * as divisor, also zero-padded if needed. These actually leave sign bits
254e1051a39Sopenharmony_ci * in ambiguous state. In sense that we try to avoid negative zeros, while
255e1051a39Sopenharmony_ci * zero-padded zeros would retain sign.
256e1051a39Sopenharmony_ci *
257e1051a39Sopenharmony_ci * (*) "Constant-time-ness" has two pre-conditions:
258e1051a39Sopenharmony_ci *
259e1051a39Sopenharmony_ci *     - availability of constant-time bn_div_3_words;
260e1051a39Sopenharmony_ci *     - dividend is at least as "wide" as divisor, limb-wise, zero-padded
261e1051a39Sopenharmony_ci *       if so required, which shouldn't be a privacy problem, because
262e1051a39Sopenharmony_ci *       divisor's length is considered public;
263e1051a39Sopenharmony_ci */
264e1051a39Sopenharmony_ciint bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
265e1051a39Sopenharmony_ci                     const BIGNUM *divisor, BN_CTX *ctx)
266e1051a39Sopenharmony_ci{
267e1051a39Sopenharmony_ci    int norm_shift, i, j, loop;
268e1051a39Sopenharmony_ci    BIGNUM *tmp, *snum, *sdiv, *res;
269e1051a39Sopenharmony_ci    BN_ULONG *resp, *wnum, *wnumtop;
270e1051a39Sopenharmony_ci    BN_ULONG d0, d1;
271e1051a39Sopenharmony_ci    int num_n, div_n, num_neg;
272e1051a39Sopenharmony_ci
273e1051a39Sopenharmony_ci    assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0);
274e1051a39Sopenharmony_ci
275e1051a39Sopenharmony_ci    bn_check_top(num);
276e1051a39Sopenharmony_ci    bn_check_top(divisor);
277e1051a39Sopenharmony_ci    bn_check_top(dv);
278e1051a39Sopenharmony_ci    bn_check_top(rm);
279e1051a39Sopenharmony_ci
280e1051a39Sopenharmony_ci    BN_CTX_start(ctx);
281e1051a39Sopenharmony_ci    res = (dv == NULL) ? BN_CTX_get(ctx) : dv;
282e1051a39Sopenharmony_ci    tmp = BN_CTX_get(ctx);
283e1051a39Sopenharmony_ci    snum = BN_CTX_get(ctx);
284e1051a39Sopenharmony_ci    sdiv = BN_CTX_get(ctx);
285e1051a39Sopenharmony_ci    if (sdiv == NULL)
286e1051a39Sopenharmony_ci        goto err;
287e1051a39Sopenharmony_ci
288e1051a39Sopenharmony_ci    /* First we normalise the numbers */
289e1051a39Sopenharmony_ci    if (!BN_copy(sdiv, divisor))
290e1051a39Sopenharmony_ci        goto err;
291e1051a39Sopenharmony_ci    norm_shift = bn_left_align(sdiv);
292e1051a39Sopenharmony_ci    sdiv->neg = 0;
293e1051a39Sopenharmony_ci    /*
294e1051a39Sopenharmony_ci     * Note that bn_lshift_fixed_top's output is always one limb longer
295e1051a39Sopenharmony_ci     * than input, even when norm_shift is zero. This means that amount of
296e1051a39Sopenharmony_ci     * inner loop iterations is invariant of dividend value, and that one
297e1051a39Sopenharmony_ci     * doesn't need to compare dividend and divisor if they were originally
298e1051a39Sopenharmony_ci     * of the same bit length.
299e1051a39Sopenharmony_ci     */
300e1051a39Sopenharmony_ci    if (!(bn_lshift_fixed_top(snum, num, norm_shift)))
301e1051a39Sopenharmony_ci        goto err;
302e1051a39Sopenharmony_ci
303e1051a39Sopenharmony_ci    div_n = sdiv->top;
304e1051a39Sopenharmony_ci    num_n = snum->top;
305e1051a39Sopenharmony_ci
306e1051a39Sopenharmony_ci    if (num_n <= div_n) {
307e1051a39Sopenharmony_ci        /* caller didn't pad dividend -> no constant-time guarantee... */
308e1051a39Sopenharmony_ci        if (bn_wexpand(snum, div_n + 1) == NULL)
309e1051a39Sopenharmony_ci            goto err;
310e1051a39Sopenharmony_ci        memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG));
311e1051a39Sopenharmony_ci        snum->top = num_n = div_n + 1;
312e1051a39Sopenharmony_ci    }
313e1051a39Sopenharmony_ci
314e1051a39Sopenharmony_ci    loop = num_n - div_n;
315e1051a39Sopenharmony_ci    /*
316e1051a39Sopenharmony_ci     * Lets setup a 'window' into snum This is the part that corresponds to
317e1051a39Sopenharmony_ci     * the current 'area' being divided
318e1051a39Sopenharmony_ci     */
319e1051a39Sopenharmony_ci    wnum = &(snum->d[loop]);
320e1051a39Sopenharmony_ci    wnumtop = &(snum->d[num_n - 1]);
321e1051a39Sopenharmony_ci
322e1051a39Sopenharmony_ci    /* Get the top 2 words of sdiv */
323e1051a39Sopenharmony_ci    d0 = sdiv->d[div_n - 1];
324e1051a39Sopenharmony_ci    d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
325e1051a39Sopenharmony_ci
326e1051a39Sopenharmony_ci    /* Setup quotient */
327e1051a39Sopenharmony_ci    if (!bn_wexpand(res, loop))
328e1051a39Sopenharmony_ci        goto err;
329e1051a39Sopenharmony_ci    num_neg = num->neg;
330e1051a39Sopenharmony_ci    res->neg = (num_neg ^ divisor->neg);
331e1051a39Sopenharmony_ci    res->top = loop;
332e1051a39Sopenharmony_ci    res->flags |= BN_FLG_FIXED_TOP;
333e1051a39Sopenharmony_ci    resp = &(res->d[loop]);
334e1051a39Sopenharmony_ci
335e1051a39Sopenharmony_ci    /* space for temp */
336e1051a39Sopenharmony_ci    if (!bn_wexpand(tmp, (div_n + 1)))
337e1051a39Sopenharmony_ci        goto err;
338e1051a39Sopenharmony_ci
339e1051a39Sopenharmony_ci    for (i = 0; i < loop; i++, wnumtop--) {
340e1051a39Sopenharmony_ci        BN_ULONG q, l0;
341e1051a39Sopenharmony_ci        /*
342e1051a39Sopenharmony_ci         * the first part of the loop uses the top two words of snum and sdiv
343e1051a39Sopenharmony_ci         * to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv
344e1051a39Sopenharmony_ci         */
345e1051a39Sopenharmony_ci# if defined(BN_DIV3W)
346e1051a39Sopenharmony_ci        q = bn_div_3_words(wnumtop, d1, d0);
347e1051a39Sopenharmony_ci# else
348e1051a39Sopenharmony_ci        BN_ULONG n0, n1, rem = 0;
349e1051a39Sopenharmony_ci
350e1051a39Sopenharmony_ci        n0 = wnumtop[0];
351e1051a39Sopenharmony_ci        n1 = wnumtop[-1];
352e1051a39Sopenharmony_ci        if (n0 == d0)
353e1051a39Sopenharmony_ci            q = BN_MASK2;
354e1051a39Sopenharmony_ci        else {                  /* n0 < d0 */
355e1051a39Sopenharmony_ci            BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2];
356e1051a39Sopenharmony_ci#  ifdef BN_LLONG
357e1051a39Sopenharmony_ci            BN_ULLONG t2;
358e1051a39Sopenharmony_ci
359e1051a39Sopenharmony_ci#   if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
360e1051a39Sopenharmony_ci            q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) | n1) / d0);
361e1051a39Sopenharmony_ci#   else
362e1051a39Sopenharmony_ci            q = bn_div_words(n0, n1, d0);
363e1051a39Sopenharmony_ci#   endif
364e1051a39Sopenharmony_ci
365e1051a39Sopenharmony_ci#   ifndef REMAINDER_IS_ALREADY_CALCULATED
366e1051a39Sopenharmony_ci            /*
367e1051a39Sopenharmony_ci             * rem doesn't have to be BN_ULLONG. The least we
368e1051a39Sopenharmony_ci             * know it's less that d0, isn't it?
369e1051a39Sopenharmony_ci             */
370e1051a39Sopenharmony_ci            rem = (n1 - q * d0) & BN_MASK2;
371e1051a39Sopenharmony_ci#   endif
372e1051a39Sopenharmony_ci            t2 = (BN_ULLONG) d1 *q;
373e1051a39Sopenharmony_ci
374e1051a39Sopenharmony_ci            for (;;) {
375e1051a39Sopenharmony_ci                if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2))
376e1051a39Sopenharmony_ci                    break;
377e1051a39Sopenharmony_ci                q--;
378e1051a39Sopenharmony_ci                rem += d0;
379e1051a39Sopenharmony_ci                if (rem < d0)
380e1051a39Sopenharmony_ci                    break;      /* don't let rem overflow */
381e1051a39Sopenharmony_ci                t2 -= d1;
382e1051a39Sopenharmony_ci            }
383e1051a39Sopenharmony_ci#  else                         /* !BN_LLONG */
384e1051a39Sopenharmony_ci            BN_ULONG t2l, t2h;
385e1051a39Sopenharmony_ci
386e1051a39Sopenharmony_ci            q = bn_div_words(n0, n1, d0);
387e1051a39Sopenharmony_ci#   ifndef REMAINDER_IS_ALREADY_CALCULATED
388e1051a39Sopenharmony_ci            rem = (n1 - q * d0) & BN_MASK2;
389e1051a39Sopenharmony_ci#   endif
390e1051a39Sopenharmony_ci
391e1051a39Sopenharmony_ci#   if defined(BN_UMULT_LOHI)
392e1051a39Sopenharmony_ci            BN_UMULT_LOHI(t2l, t2h, d1, q);
393e1051a39Sopenharmony_ci#   elif defined(BN_UMULT_HIGH)
394e1051a39Sopenharmony_ci            t2l = d1 * q;
395e1051a39Sopenharmony_ci            t2h = BN_UMULT_HIGH(d1, q);
396e1051a39Sopenharmony_ci#   else
397e1051a39Sopenharmony_ci            {
398e1051a39Sopenharmony_ci                BN_ULONG ql, qh;
399e1051a39Sopenharmony_ci                t2l = LBITS(d1);
400e1051a39Sopenharmony_ci                t2h = HBITS(d1);
401e1051a39Sopenharmony_ci                ql = LBITS(q);
402e1051a39Sopenharmony_ci                qh = HBITS(q);
403e1051a39Sopenharmony_ci                mul64(t2l, t2h, ql, qh); /* t2=(BN_ULLONG)d1*q; */
404e1051a39Sopenharmony_ci            }
405e1051a39Sopenharmony_ci#   endif
406e1051a39Sopenharmony_ci
407e1051a39Sopenharmony_ci            for (;;) {
408e1051a39Sopenharmony_ci                if ((t2h < rem) || ((t2h == rem) && (t2l <= n2)))
409e1051a39Sopenharmony_ci                    break;
410e1051a39Sopenharmony_ci                q--;
411e1051a39Sopenharmony_ci                rem += d0;
412e1051a39Sopenharmony_ci                if (rem < d0)
413e1051a39Sopenharmony_ci                    break;      /* don't let rem overflow */
414e1051a39Sopenharmony_ci                if (t2l < d1)
415e1051a39Sopenharmony_ci                    t2h--;
416e1051a39Sopenharmony_ci                t2l -= d1;
417e1051a39Sopenharmony_ci            }
418e1051a39Sopenharmony_ci#  endif                        /* !BN_LLONG */
419e1051a39Sopenharmony_ci        }
420e1051a39Sopenharmony_ci# endif                         /* !BN_DIV3W */
421e1051a39Sopenharmony_ci
422e1051a39Sopenharmony_ci        l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
423e1051a39Sopenharmony_ci        tmp->d[div_n] = l0;
424e1051a39Sopenharmony_ci        wnum--;
425e1051a39Sopenharmony_ci        /*
426e1051a39Sopenharmony_ci         * ignore top values of the bignums just sub the two BN_ULONG arrays
427e1051a39Sopenharmony_ci         * with bn_sub_words
428e1051a39Sopenharmony_ci         */
429e1051a39Sopenharmony_ci        l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1);
430e1051a39Sopenharmony_ci        q -= l0;
431e1051a39Sopenharmony_ci        /*
432e1051a39Sopenharmony_ci         * Note: As we have considered only the leading two BN_ULONGs in
433e1051a39Sopenharmony_ci         * the calculation of q, sdiv * q might be greater than wnum (but
434e1051a39Sopenharmony_ci         * then (q-1) * sdiv is less or equal than wnum)
435e1051a39Sopenharmony_ci         */
436e1051a39Sopenharmony_ci        for (l0 = 0 - l0, j = 0; j < div_n; j++)
437e1051a39Sopenharmony_ci            tmp->d[j] = sdiv->d[j] & l0;
438e1051a39Sopenharmony_ci        l0 = bn_add_words(wnum, wnum, tmp->d, div_n);
439e1051a39Sopenharmony_ci        (*wnumtop) += l0;
440e1051a39Sopenharmony_ci        assert((*wnumtop) == 0);
441e1051a39Sopenharmony_ci
442e1051a39Sopenharmony_ci        /* store part of the result */
443e1051a39Sopenharmony_ci        *--resp = q;
444e1051a39Sopenharmony_ci    }
445e1051a39Sopenharmony_ci    /* snum holds remainder, it's as wide as divisor */
446e1051a39Sopenharmony_ci    snum->neg = num_neg;
447e1051a39Sopenharmony_ci    snum->top = div_n;
448e1051a39Sopenharmony_ci    snum->flags |= BN_FLG_FIXED_TOP;
449e1051a39Sopenharmony_ci
450e1051a39Sopenharmony_ci    if (rm != NULL && bn_rshift_fixed_top(rm, snum, norm_shift) == 0)
451e1051a39Sopenharmony_ci        goto err;
452e1051a39Sopenharmony_ci
453e1051a39Sopenharmony_ci    BN_CTX_end(ctx);
454e1051a39Sopenharmony_ci    return 1;
455e1051a39Sopenharmony_ci err:
456e1051a39Sopenharmony_ci    bn_check_top(rm);
457e1051a39Sopenharmony_ci    BN_CTX_end(ctx);
458e1051a39Sopenharmony_ci    return 0;
459e1051a39Sopenharmony_ci}
460e1051a39Sopenharmony_ci#endif
461