1e1051a39Sopenharmony_ci/* 2e1051a39Sopenharmony_ci * Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci * 4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci */ 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci#include <assert.h> 11e1051a39Sopenharmony_ci#include <openssl/bn.h> 12e1051a39Sopenharmony_ci#include "internal/cryptlib.h" 13e1051a39Sopenharmony_ci#include "bn_local.h" 14e1051a39Sopenharmony_ci 15e1051a39Sopenharmony_ci/* The old slow way */ 16e1051a39Sopenharmony_ci#if 0 17e1051a39Sopenharmony_ciint BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, 18e1051a39Sopenharmony_ci BN_CTX *ctx) 19e1051a39Sopenharmony_ci{ 20e1051a39Sopenharmony_ci int i, nm, nd; 21e1051a39Sopenharmony_ci int ret = 0; 22e1051a39Sopenharmony_ci BIGNUM *D; 23e1051a39Sopenharmony_ci 24e1051a39Sopenharmony_ci bn_check_top(m); 25e1051a39Sopenharmony_ci bn_check_top(d); 26e1051a39Sopenharmony_ci if (BN_is_zero(d)) { 27e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_BN, BN_R_DIV_BY_ZERO); 28e1051a39Sopenharmony_ci return 0; 29e1051a39Sopenharmony_ci } 30e1051a39Sopenharmony_ci 31e1051a39Sopenharmony_ci if (BN_ucmp(m, d) < 0) { 32e1051a39Sopenharmony_ci if (rem != NULL) { 33e1051a39Sopenharmony_ci if (BN_copy(rem, m) == NULL) 34e1051a39Sopenharmony_ci return 0; 35e1051a39Sopenharmony_ci } 36e1051a39Sopenharmony_ci if (dv != NULL) 37e1051a39Sopenharmony_ci BN_zero(dv); 38e1051a39Sopenharmony_ci return 1; 39e1051a39Sopenharmony_ci } 40e1051a39Sopenharmony_ci 41e1051a39Sopenharmony_ci BN_CTX_start(ctx); 42e1051a39Sopenharmony_ci D = BN_CTX_get(ctx); 43e1051a39Sopenharmony_ci if (dv == NULL) 44e1051a39Sopenharmony_ci dv = BN_CTX_get(ctx); 45e1051a39Sopenharmony_ci if (rem == NULL) 46e1051a39Sopenharmony_ci rem = BN_CTX_get(ctx); 47e1051a39Sopenharmony_ci if (D == NULL || dv == NULL || rem == NULL) 48e1051a39Sopenharmony_ci goto end; 49e1051a39Sopenharmony_ci 50e1051a39Sopenharmony_ci nd = BN_num_bits(d); 51e1051a39Sopenharmony_ci nm = BN_num_bits(m); 52e1051a39Sopenharmony_ci if (BN_copy(D, d) == NULL) 53e1051a39Sopenharmony_ci goto end; 54e1051a39Sopenharmony_ci if (BN_copy(rem, m) == NULL) 55e1051a39Sopenharmony_ci goto end; 56e1051a39Sopenharmony_ci 57e1051a39Sopenharmony_ci /* 58e1051a39Sopenharmony_ci * The next 2 are needed so we can do a dv->d[0]|=1 later since 59e1051a39Sopenharmony_ci * BN_lshift1 will only work once there is a value :-) 60e1051a39Sopenharmony_ci */ 61e1051a39Sopenharmony_ci BN_zero(dv); 62e1051a39Sopenharmony_ci if (bn_wexpand(dv, 1) == NULL) 63e1051a39Sopenharmony_ci goto end; 64e1051a39Sopenharmony_ci dv->top = 1; 65e1051a39Sopenharmony_ci 66e1051a39Sopenharmony_ci if (!BN_lshift(D, D, nm - nd)) 67e1051a39Sopenharmony_ci goto end; 68e1051a39Sopenharmony_ci for (i = nm - nd; i >= 0; i--) { 69e1051a39Sopenharmony_ci if (!BN_lshift1(dv, dv)) 70e1051a39Sopenharmony_ci goto end; 71e1051a39Sopenharmony_ci if (BN_ucmp(rem, D) >= 0) { 72e1051a39Sopenharmony_ci dv->d[0] |= 1; 73e1051a39Sopenharmony_ci if (!BN_usub(rem, rem, D)) 74e1051a39Sopenharmony_ci goto end; 75e1051a39Sopenharmony_ci } 76e1051a39Sopenharmony_ci/* CAN IMPROVE (and have now :=) */ 77e1051a39Sopenharmony_ci if (!BN_rshift1(D, D)) 78e1051a39Sopenharmony_ci goto end; 79e1051a39Sopenharmony_ci } 80e1051a39Sopenharmony_ci rem->neg = BN_is_zero(rem) ? 0 : m->neg; 81e1051a39Sopenharmony_ci dv->neg = m->neg ^ d->neg; 82e1051a39Sopenharmony_ci ret = 1; 83e1051a39Sopenharmony_ci end: 84e1051a39Sopenharmony_ci BN_CTX_end(ctx); 85e1051a39Sopenharmony_ci return ret; 86e1051a39Sopenharmony_ci} 87e1051a39Sopenharmony_ci 88e1051a39Sopenharmony_ci#else 89e1051a39Sopenharmony_ci 90e1051a39Sopenharmony_ci# if defined(BN_DIV3W) 91e1051a39Sopenharmony_ciBN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0); 92e1051a39Sopenharmony_ci# elif 0 93e1051a39Sopenharmony_ci/* 94e1051a39Sopenharmony_ci * This is #if-ed away, because it's a reference for assembly implementations, 95e1051a39Sopenharmony_ci * where it can and should be made constant-time. But if you want to test it, 96e1051a39Sopenharmony_ci * just replace 0 with 1. 97e1051a39Sopenharmony_ci */ 98e1051a39Sopenharmony_ci# if BN_BITS2 == 64 && defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 99e1051a39Sopenharmony_ci# undef BN_ULLONG 100e1051a39Sopenharmony_ci# define BN_ULLONG uint128_t 101e1051a39Sopenharmony_ci# define BN_LLONG 102e1051a39Sopenharmony_ci# endif 103e1051a39Sopenharmony_ci 104e1051a39Sopenharmony_ci# ifdef BN_LLONG 105e1051a39Sopenharmony_ci# define BN_DIV3W 106e1051a39Sopenharmony_ci/* 107e1051a39Sopenharmony_ci * Interface is somewhat quirky, |m| is pointer to most significant limb, 108e1051a39Sopenharmony_ci * and less significant limb is referred at |m[-1]|. This means that caller 109e1051a39Sopenharmony_ci * is responsible for ensuring that |m[-1]| is valid. Second condition that 110e1051a39Sopenharmony_ci * has to be met is that |d0|'s most significant bit has to be set. Or in 111e1051a39Sopenharmony_ci * other words divisor has to be "bit-aligned to the left." bn_div_fixed_top 112e1051a39Sopenharmony_ci * does all this. The subroutine considers four limbs, two of which are 113e1051a39Sopenharmony_ci * "overlapping," hence the name... 114e1051a39Sopenharmony_ci */ 115e1051a39Sopenharmony_cistatic BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0) 116e1051a39Sopenharmony_ci{ 117e1051a39Sopenharmony_ci BN_ULLONG R = ((BN_ULLONG)m[0] << BN_BITS2) | m[-1]; 118e1051a39Sopenharmony_ci BN_ULLONG D = ((BN_ULLONG)d0 << BN_BITS2) | d1; 119e1051a39Sopenharmony_ci BN_ULONG Q = 0, mask; 120e1051a39Sopenharmony_ci int i; 121e1051a39Sopenharmony_ci 122e1051a39Sopenharmony_ci for (i = 0; i < BN_BITS2; i++) { 123e1051a39Sopenharmony_ci Q <<= 1; 124e1051a39Sopenharmony_ci if (R >= D) { 125e1051a39Sopenharmony_ci Q |= 1; 126e1051a39Sopenharmony_ci R -= D; 127e1051a39Sopenharmony_ci } 128e1051a39Sopenharmony_ci D >>= 1; 129e1051a39Sopenharmony_ci } 130e1051a39Sopenharmony_ci 131e1051a39Sopenharmony_ci mask = 0 - (Q >> (BN_BITS2 - 1)); /* does it overflow? */ 132e1051a39Sopenharmony_ci 133e1051a39Sopenharmony_ci Q <<= 1; 134e1051a39Sopenharmony_ci Q |= (R >= D); 135e1051a39Sopenharmony_ci 136e1051a39Sopenharmony_ci return (Q | mask) & BN_MASK2; 137e1051a39Sopenharmony_ci} 138e1051a39Sopenharmony_ci# endif 139e1051a39Sopenharmony_ci# endif 140e1051a39Sopenharmony_ci 141e1051a39Sopenharmony_cistatic int bn_left_align(BIGNUM *num) 142e1051a39Sopenharmony_ci{ 143e1051a39Sopenharmony_ci BN_ULONG *d = num->d, n, m, rmask; 144e1051a39Sopenharmony_ci int top = num->top; 145e1051a39Sopenharmony_ci int rshift = BN_num_bits_word(d[top - 1]), lshift, i; 146e1051a39Sopenharmony_ci 147e1051a39Sopenharmony_ci lshift = BN_BITS2 - rshift; 148e1051a39Sopenharmony_ci rshift %= BN_BITS2; /* say no to undefined behaviour */ 149e1051a39Sopenharmony_ci rmask = (BN_ULONG)0 - rshift; /* rmask = 0 - (rshift != 0) */ 150e1051a39Sopenharmony_ci rmask |= rmask >> 8; 151e1051a39Sopenharmony_ci 152e1051a39Sopenharmony_ci for (i = 0, m = 0; i < top; i++) { 153e1051a39Sopenharmony_ci n = d[i]; 154e1051a39Sopenharmony_ci d[i] = ((n << lshift) | m) & BN_MASK2; 155e1051a39Sopenharmony_ci m = (n >> rshift) & rmask; 156e1051a39Sopenharmony_ci } 157e1051a39Sopenharmony_ci 158e1051a39Sopenharmony_ci return lshift; 159e1051a39Sopenharmony_ci} 160e1051a39Sopenharmony_ci 161e1051a39Sopenharmony_ci# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \ 162e1051a39Sopenharmony_ci && !defined(PEDANTIC) && !defined(BN_DIV3W) 163e1051a39Sopenharmony_ci# if defined(__GNUC__) && __GNUC__>=2 164e1051a39Sopenharmony_ci# if defined(__i386) || defined (__i386__) 165e1051a39Sopenharmony_ci /*- 166e1051a39Sopenharmony_ci * There were two reasons for implementing this template: 167e1051a39Sopenharmony_ci * - GNU C generates a call to a function (__udivdi3 to be exact) 168e1051a39Sopenharmony_ci * in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to 169e1051a39Sopenharmony_ci * understand why...); 170e1051a39Sopenharmony_ci * - divl doesn't only calculate quotient, but also leaves 171e1051a39Sopenharmony_ci * remainder in %edx which we can definitely use here:-) 172e1051a39Sopenharmony_ci */ 173e1051a39Sopenharmony_ci# undef bn_div_words 174e1051a39Sopenharmony_ci# define bn_div_words(n0,n1,d0) \ 175e1051a39Sopenharmony_ci ({ asm volatile ( \ 176e1051a39Sopenharmony_ci "divl %4" \ 177e1051a39Sopenharmony_ci : "=a"(q), "=d"(rem) \ 178e1051a39Sopenharmony_ci : "a"(n1), "d"(n0), "r"(d0) \ 179e1051a39Sopenharmony_ci : "cc"); \ 180e1051a39Sopenharmony_ci q; \ 181e1051a39Sopenharmony_ci }) 182e1051a39Sopenharmony_ci# define REMAINDER_IS_ALREADY_CALCULATED 183e1051a39Sopenharmony_ci# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG) 184e1051a39Sopenharmony_ci /* 185e1051a39Sopenharmony_ci * Same story here, but it's 128-bit by 64-bit division. Wow! 186e1051a39Sopenharmony_ci */ 187e1051a39Sopenharmony_ci# undef bn_div_words 188e1051a39Sopenharmony_ci# define bn_div_words(n0,n1,d0) \ 189e1051a39Sopenharmony_ci ({ asm volatile ( \ 190e1051a39Sopenharmony_ci "divq %4" \ 191e1051a39Sopenharmony_ci : "=a"(q), "=d"(rem) \ 192e1051a39Sopenharmony_ci : "a"(n1), "d"(n0), "r"(d0) \ 193e1051a39Sopenharmony_ci : "cc"); \ 194e1051a39Sopenharmony_ci q; \ 195e1051a39Sopenharmony_ci }) 196e1051a39Sopenharmony_ci# define REMAINDER_IS_ALREADY_CALCULATED 197e1051a39Sopenharmony_ci# endif /* __<cpu> */ 198e1051a39Sopenharmony_ci# endif /* __GNUC__ */ 199e1051a39Sopenharmony_ci# endif /* OPENSSL_NO_ASM */ 200e1051a39Sopenharmony_ci 201e1051a39Sopenharmony_ci/*- 202e1051a39Sopenharmony_ci * BN_div computes dv := num / divisor, rounding towards 203e1051a39Sopenharmony_ci * zero, and sets up rm such that dv*divisor + rm = num holds. 204e1051a39Sopenharmony_ci * Thus: 205e1051a39Sopenharmony_ci * dv->neg == num->neg ^ divisor->neg (unless the result is zero) 206e1051a39Sopenharmony_ci * rm->neg == num->neg (unless the remainder is zero) 207e1051a39Sopenharmony_ci * If 'dv' or 'rm' is NULL, the respective value is not returned. 208e1051a39Sopenharmony_ci */ 209e1051a39Sopenharmony_ciint BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, 210e1051a39Sopenharmony_ci BN_CTX *ctx) 211e1051a39Sopenharmony_ci{ 212e1051a39Sopenharmony_ci int ret; 213e1051a39Sopenharmony_ci 214e1051a39Sopenharmony_ci if (BN_is_zero(divisor)) { 215e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_BN, BN_R_DIV_BY_ZERO); 216e1051a39Sopenharmony_ci return 0; 217e1051a39Sopenharmony_ci } 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ci /* 220e1051a39Sopenharmony_ci * Invalid zero-padding would have particularly bad consequences so don't 221e1051a39Sopenharmony_ci * just rely on bn_check_top() here (bn_check_top() works only for 222e1051a39Sopenharmony_ci * BN_DEBUG builds) 223e1051a39Sopenharmony_ci */ 224e1051a39Sopenharmony_ci if (divisor->d[divisor->top - 1] == 0) { 225e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_BN, BN_R_NOT_INITIALIZED); 226e1051a39Sopenharmony_ci return 0; 227e1051a39Sopenharmony_ci } 228e1051a39Sopenharmony_ci 229e1051a39Sopenharmony_ci ret = bn_div_fixed_top(dv, rm, num, divisor, ctx); 230e1051a39Sopenharmony_ci 231e1051a39Sopenharmony_ci if (ret) { 232e1051a39Sopenharmony_ci if (dv != NULL) 233e1051a39Sopenharmony_ci bn_correct_top(dv); 234e1051a39Sopenharmony_ci if (rm != NULL) 235e1051a39Sopenharmony_ci bn_correct_top(rm); 236e1051a39Sopenharmony_ci } 237e1051a39Sopenharmony_ci 238e1051a39Sopenharmony_ci return ret; 239e1051a39Sopenharmony_ci} 240e1051a39Sopenharmony_ci 241e1051a39Sopenharmony_ci/* 242e1051a39Sopenharmony_ci * It's argued that *length* of *significant* part of divisor is public. 243e1051a39Sopenharmony_ci * Even if it's private modulus that is. Again, *length* is assumed 244e1051a39Sopenharmony_ci * public, but not *value*. Former is likely to be pre-defined by 245e1051a39Sopenharmony_ci * algorithm with bit granularity, though below subroutine is invariant 246e1051a39Sopenharmony_ci * of limb length. Thanks to this assumption we can require that |divisor| 247e1051a39Sopenharmony_ci * may not be zero-padded, yet claim this subroutine "constant-time"(*). 248e1051a39Sopenharmony_ci * This is because zero-padded dividend, |num|, is tolerated, so that 249e1051a39Sopenharmony_ci * caller can pass dividend of public length(*), but with smaller amount 250e1051a39Sopenharmony_ci * of significant limbs. This naturally means that quotient, |dv|, would 251e1051a39Sopenharmony_ci * contain correspongly less significant limbs as well, and will be zero- 252e1051a39Sopenharmony_ci * padded accordingly. Returned remainder, |rm|, will have same bit length 253e1051a39Sopenharmony_ci * as divisor, also zero-padded if needed. These actually leave sign bits 254e1051a39Sopenharmony_ci * in ambiguous state. In sense that we try to avoid negative zeros, while 255e1051a39Sopenharmony_ci * zero-padded zeros would retain sign. 256e1051a39Sopenharmony_ci * 257e1051a39Sopenharmony_ci * (*) "Constant-time-ness" has two pre-conditions: 258e1051a39Sopenharmony_ci * 259e1051a39Sopenharmony_ci * - availability of constant-time bn_div_3_words; 260e1051a39Sopenharmony_ci * - dividend is at least as "wide" as divisor, limb-wise, zero-padded 261e1051a39Sopenharmony_ci * if so required, which shouldn't be a privacy problem, because 262e1051a39Sopenharmony_ci * divisor's length is considered public; 263e1051a39Sopenharmony_ci */ 264e1051a39Sopenharmony_ciint bn_div_fixed_top(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, 265e1051a39Sopenharmony_ci const BIGNUM *divisor, BN_CTX *ctx) 266e1051a39Sopenharmony_ci{ 267e1051a39Sopenharmony_ci int norm_shift, i, j, loop; 268e1051a39Sopenharmony_ci BIGNUM *tmp, *snum, *sdiv, *res; 269e1051a39Sopenharmony_ci BN_ULONG *resp, *wnum, *wnumtop; 270e1051a39Sopenharmony_ci BN_ULONG d0, d1; 271e1051a39Sopenharmony_ci int num_n, div_n, num_neg; 272e1051a39Sopenharmony_ci 273e1051a39Sopenharmony_ci assert(divisor->top > 0 && divisor->d[divisor->top - 1] != 0); 274e1051a39Sopenharmony_ci 275e1051a39Sopenharmony_ci bn_check_top(num); 276e1051a39Sopenharmony_ci bn_check_top(divisor); 277e1051a39Sopenharmony_ci bn_check_top(dv); 278e1051a39Sopenharmony_ci bn_check_top(rm); 279e1051a39Sopenharmony_ci 280e1051a39Sopenharmony_ci BN_CTX_start(ctx); 281e1051a39Sopenharmony_ci res = (dv == NULL) ? BN_CTX_get(ctx) : dv; 282e1051a39Sopenharmony_ci tmp = BN_CTX_get(ctx); 283e1051a39Sopenharmony_ci snum = BN_CTX_get(ctx); 284e1051a39Sopenharmony_ci sdiv = BN_CTX_get(ctx); 285e1051a39Sopenharmony_ci if (sdiv == NULL) 286e1051a39Sopenharmony_ci goto err; 287e1051a39Sopenharmony_ci 288e1051a39Sopenharmony_ci /* First we normalise the numbers */ 289e1051a39Sopenharmony_ci if (!BN_copy(sdiv, divisor)) 290e1051a39Sopenharmony_ci goto err; 291e1051a39Sopenharmony_ci norm_shift = bn_left_align(sdiv); 292e1051a39Sopenharmony_ci sdiv->neg = 0; 293e1051a39Sopenharmony_ci /* 294e1051a39Sopenharmony_ci * Note that bn_lshift_fixed_top's output is always one limb longer 295e1051a39Sopenharmony_ci * than input, even when norm_shift is zero. This means that amount of 296e1051a39Sopenharmony_ci * inner loop iterations is invariant of dividend value, and that one 297e1051a39Sopenharmony_ci * doesn't need to compare dividend and divisor if they were originally 298e1051a39Sopenharmony_ci * of the same bit length. 299e1051a39Sopenharmony_ci */ 300e1051a39Sopenharmony_ci if (!(bn_lshift_fixed_top(snum, num, norm_shift))) 301e1051a39Sopenharmony_ci goto err; 302e1051a39Sopenharmony_ci 303e1051a39Sopenharmony_ci div_n = sdiv->top; 304e1051a39Sopenharmony_ci num_n = snum->top; 305e1051a39Sopenharmony_ci 306e1051a39Sopenharmony_ci if (num_n <= div_n) { 307e1051a39Sopenharmony_ci /* caller didn't pad dividend -> no constant-time guarantee... */ 308e1051a39Sopenharmony_ci if (bn_wexpand(snum, div_n + 1) == NULL) 309e1051a39Sopenharmony_ci goto err; 310e1051a39Sopenharmony_ci memset(&(snum->d[num_n]), 0, (div_n - num_n + 1) * sizeof(BN_ULONG)); 311e1051a39Sopenharmony_ci snum->top = num_n = div_n + 1; 312e1051a39Sopenharmony_ci } 313e1051a39Sopenharmony_ci 314e1051a39Sopenharmony_ci loop = num_n - div_n; 315e1051a39Sopenharmony_ci /* 316e1051a39Sopenharmony_ci * Lets setup a 'window' into snum This is the part that corresponds to 317e1051a39Sopenharmony_ci * the current 'area' being divided 318e1051a39Sopenharmony_ci */ 319e1051a39Sopenharmony_ci wnum = &(snum->d[loop]); 320e1051a39Sopenharmony_ci wnumtop = &(snum->d[num_n - 1]); 321e1051a39Sopenharmony_ci 322e1051a39Sopenharmony_ci /* Get the top 2 words of sdiv */ 323e1051a39Sopenharmony_ci d0 = sdiv->d[div_n - 1]; 324e1051a39Sopenharmony_ci d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2]; 325e1051a39Sopenharmony_ci 326e1051a39Sopenharmony_ci /* Setup quotient */ 327e1051a39Sopenharmony_ci if (!bn_wexpand(res, loop)) 328e1051a39Sopenharmony_ci goto err; 329e1051a39Sopenharmony_ci num_neg = num->neg; 330e1051a39Sopenharmony_ci res->neg = (num_neg ^ divisor->neg); 331e1051a39Sopenharmony_ci res->top = loop; 332e1051a39Sopenharmony_ci res->flags |= BN_FLG_FIXED_TOP; 333e1051a39Sopenharmony_ci resp = &(res->d[loop]); 334e1051a39Sopenharmony_ci 335e1051a39Sopenharmony_ci /* space for temp */ 336e1051a39Sopenharmony_ci if (!bn_wexpand(tmp, (div_n + 1))) 337e1051a39Sopenharmony_ci goto err; 338e1051a39Sopenharmony_ci 339e1051a39Sopenharmony_ci for (i = 0; i < loop; i++, wnumtop--) { 340e1051a39Sopenharmony_ci BN_ULONG q, l0; 341e1051a39Sopenharmony_ci /* 342e1051a39Sopenharmony_ci * the first part of the loop uses the top two words of snum and sdiv 343e1051a39Sopenharmony_ci * to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv 344e1051a39Sopenharmony_ci */ 345e1051a39Sopenharmony_ci# if defined(BN_DIV3W) 346e1051a39Sopenharmony_ci q = bn_div_3_words(wnumtop, d1, d0); 347e1051a39Sopenharmony_ci# else 348e1051a39Sopenharmony_ci BN_ULONG n0, n1, rem = 0; 349e1051a39Sopenharmony_ci 350e1051a39Sopenharmony_ci n0 = wnumtop[0]; 351e1051a39Sopenharmony_ci n1 = wnumtop[-1]; 352e1051a39Sopenharmony_ci if (n0 == d0) 353e1051a39Sopenharmony_ci q = BN_MASK2; 354e1051a39Sopenharmony_ci else { /* n0 < d0 */ 355e1051a39Sopenharmony_ci BN_ULONG n2 = (wnumtop == wnum) ? 0 : wnumtop[-2]; 356e1051a39Sopenharmony_ci# ifdef BN_LLONG 357e1051a39Sopenharmony_ci BN_ULLONG t2; 358e1051a39Sopenharmony_ci 359e1051a39Sopenharmony_ci# if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words) 360e1051a39Sopenharmony_ci q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) | n1) / d0); 361e1051a39Sopenharmony_ci# else 362e1051a39Sopenharmony_ci q = bn_div_words(n0, n1, d0); 363e1051a39Sopenharmony_ci# endif 364e1051a39Sopenharmony_ci 365e1051a39Sopenharmony_ci# ifndef REMAINDER_IS_ALREADY_CALCULATED 366e1051a39Sopenharmony_ci /* 367e1051a39Sopenharmony_ci * rem doesn't have to be BN_ULLONG. The least we 368e1051a39Sopenharmony_ci * know it's less that d0, isn't it? 369e1051a39Sopenharmony_ci */ 370e1051a39Sopenharmony_ci rem = (n1 - q * d0) & BN_MASK2; 371e1051a39Sopenharmony_ci# endif 372e1051a39Sopenharmony_ci t2 = (BN_ULLONG) d1 *q; 373e1051a39Sopenharmony_ci 374e1051a39Sopenharmony_ci for (;;) { 375e1051a39Sopenharmony_ci if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | n2)) 376e1051a39Sopenharmony_ci break; 377e1051a39Sopenharmony_ci q--; 378e1051a39Sopenharmony_ci rem += d0; 379e1051a39Sopenharmony_ci if (rem < d0) 380e1051a39Sopenharmony_ci break; /* don't let rem overflow */ 381e1051a39Sopenharmony_ci t2 -= d1; 382e1051a39Sopenharmony_ci } 383e1051a39Sopenharmony_ci# else /* !BN_LLONG */ 384e1051a39Sopenharmony_ci BN_ULONG t2l, t2h; 385e1051a39Sopenharmony_ci 386e1051a39Sopenharmony_ci q = bn_div_words(n0, n1, d0); 387e1051a39Sopenharmony_ci# ifndef REMAINDER_IS_ALREADY_CALCULATED 388e1051a39Sopenharmony_ci rem = (n1 - q * d0) & BN_MASK2; 389e1051a39Sopenharmony_ci# endif 390e1051a39Sopenharmony_ci 391e1051a39Sopenharmony_ci# if defined(BN_UMULT_LOHI) 392e1051a39Sopenharmony_ci BN_UMULT_LOHI(t2l, t2h, d1, q); 393e1051a39Sopenharmony_ci# elif defined(BN_UMULT_HIGH) 394e1051a39Sopenharmony_ci t2l = d1 * q; 395e1051a39Sopenharmony_ci t2h = BN_UMULT_HIGH(d1, q); 396e1051a39Sopenharmony_ci# else 397e1051a39Sopenharmony_ci { 398e1051a39Sopenharmony_ci BN_ULONG ql, qh; 399e1051a39Sopenharmony_ci t2l = LBITS(d1); 400e1051a39Sopenharmony_ci t2h = HBITS(d1); 401e1051a39Sopenharmony_ci ql = LBITS(q); 402e1051a39Sopenharmony_ci qh = HBITS(q); 403e1051a39Sopenharmony_ci mul64(t2l, t2h, ql, qh); /* t2=(BN_ULLONG)d1*q; */ 404e1051a39Sopenharmony_ci } 405e1051a39Sopenharmony_ci# endif 406e1051a39Sopenharmony_ci 407e1051a39Sopenharmony_ci for (;;) { 408e1051a39Sopenharmony_ci if ((t2h < rem) || ((t2h == rem) && (t2l <= n2))) 409e1051a39Sopenharmony_ci break; 410e1051a39Sopenharmony_ci q--; 411e1051a39Sopenharmony_ci rem += d0; 412e1051a39Sopenharmony_ci if (rem < d0) 413e1051a39Sopenharmony_ci break; /* don't let rem overflow */ 414e1051a39Sopenharmony_ci if (t2l < d1) 415e1051a39Sopenharmony_ci t2h--; 416e1051a39Sopenharmony_ci t2l -= d1; 417e1051a39Sopenharmony_ci } 418e1051a39Sopenharmony_ci# endif /* !BN_LLONG */ 419e1051a39Sopenharmony_ci } 420e1051a39Sopenharmony_ci# endif /* !BN_DIV3W */ 421e1051a39Sopenharmony_ci 422e1051a39Sopenharmony_ci l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q); 423e1051a39Sopenharmony_ci tmp->d[div_n] = l0; 424e1051a39Sopenharmony_ci wnum--; 425e1051a39Sopenharmony_ci /* 426e1051a39Sopenharmony_ci * ignore top values of the bignums just sub the two BN_ULONG arrays 427e1051a39Sopenharmony_ci * with bn_sub_words 428e1051a39Sopenharmony_ci */ 429e1051a39Sopenharmony_ci l0 = bn_sub_words(wnum, wnum, tmp->d, div_n + 1); 430e1051a39Sopenharmony_ci q -= l0; 431e1051a39Sopenharmony_ci /* 432e1051a39Sopenharmony_ci * Note: As we have considered only the leading two BN_ULONGs in 433e1051a39Sopenharmony_ci * the calculation of q, sdiv * q might be greater than wnum (but 434e1051a39Sopenharmony_ci * then (q-1) * sdiv is less or equal than wnum) 435e1051a39Sopenharmony_ci */ 436e1051a39Sopenharmony_ci for (l0 = 0 - l0, j = 0; j < div_n; j++) 437e1051a39Sopenharmony_ci tmp->d[j] = sdiv->d[j] & l0; 438e1051a39Sopenharmony_ci l0 = bn_add_words(wnum, wnum, tmp->d, div_n); 439e1051a39Sopenharmony_ci (*wnumtop) += l0; 440e1051a39Sopenharmony_ci assert((*wnumtop) == 0); 441e1051a39Sopenharmony_ci 442e1051a39Sopenharmony_ci /* store part of the result */ 443e1051a39Sopenharmony_ci *--resp = q; 444e1051a39Sopenharmony_ci } 445e1051a39Sopenharmony_ci /* snum holds remainder, it's as wide as divisor */ 446e1051a39Sopenharmony_ci snum->neg = num_neg; 447e1051a39Sopenharmony_ci snum->top = div_n; 448e1051a39Sopenharmony_ci snum->flags |= BN_FLG_FIXED_TOP; 449e1051a39Sopenharmony_ci 450e1051a39Sopenharmony_ci if (rm != NULL && bn_rshift_fixed_top(rm, snum, norm_shift) == 0) 451e1051a39Sopenharmony_ci goto err; 452e1051a39Sopenharmony_ci 453e1051a39Sopenharmony_ci BN_CTX_end(ctx); 454e1051a39Sopenharmony_ci return 1; 455e1051a39Sopenharmony_ci err: 456e1051a39Sopenharmony_ci bn_check_top(rm); 457e1051a39Sopenharmony_ci BN_CTX_end(ctx); 458e1051a39Sopenharmony_ci return 0; 459e1051a39Sopenharmony_ci} 460e1051a39Sopenharmony_ci#endif 461