1/* 2 * Core bignum functions 3 * 4 * Copyright The Mbed TLS Contributors 5 * SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 6 */ 7 8#include "common.h" 9 10#if defined(MBEDTLS_BIGNUM_C) 11 12#include <string.h> 13 14#include "mbedtls/error.h" 15#include "mbedtls/platform_util.h" 16#include "constant_time_internal.h" 17 18#include "mbedtls/platform.h" 19 20#include "bignum_core.h" 21#include "bn_mul.h" 22#include "constant_time_internal.h" 23 24size_t mbedtls_mpi_core_clz(mbedtls_mpi_uint a) 25{ 26#if defined(__has_builtin) 27#if (MBEDTLS_MPI_UINT_MAX == UINT_MAX) && __has_builtin(__builtin_clz) 28 #define core_clz __builtin_clz 29#elif (MBEDTLS_MPI_UINT_MAX == ULONG_MAX) && __has_builtin(__builtin_clzl) 30 #define core_clz __builtin_clzl 31#elif (MBEDTLS_MPI_UINT_MAX == ULLONG_MAX) && __has_builtin(__builtin_clzll) 32 #define core_clz __builtin_clzll 33#endif 34#endif 35#if defined(core_clz) 36 return (size_t) core_clz(a); 37#else 38 size_t j; 39 mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1); 40 41 for (j = 0; j < biL; j++) { 42 if (a & mask) { 43 break; 44 } 45 46 mask >>= 1; 47 } 48 49 return j; 50#endif 51} 52 53size_t mbedtls_mpi_core_bitlen(const mbedtls_mpi_uint *A, size_t A_limbs) 54{ 55 int i; 56 size_t j; 57 58 for (i = ((int) A_limbs) - 1; i >= 0; i--) { 59 if (A[i] != 0) { 60 j = biL - mbedtls_mpi_core_clz(A[i]); 61 return (i * biL) + j; 62 } 63 } 64 65 return 0; 66} 67 68static mbedtls_mpi_uint mpi_bigendian_to_host(mbedtls_mpi_uint a) 69{ 70 if (MBEDTLS_IS_BIG_ENDIAN) { 71 /* Nothing to do on bigendian systems. */ 72 return a; 73 } else { 74#if defined(MBEDTLS_HAVE_INT32) 75 return (mbedtls_mpi_uint) MBEDTLS_BSWAP32(a); 76#elif defined(MBEDTLS_HAVE_INT64) 77 return (mbedtls_mpi_uint) MBEDTLS_BSWAP64(a); 78#endif 79 } 80} 81 82void mbedtls_mpi_core_bigendian_to_host(mbedtls_mpi_uint *A, 83 size_t A_limbs) 84{ 85 mbedtls_mpi_uint *cur_limb_left; 86 mbedtls_mpi_uint *cur_limb_right; 87 if (A_limbs == 0) { 88 return; 89 } 90 91 /* 92 * Traverse limbs and 93 * - adapt byte-order in each limb 94 * - swap the limbs themselves. 95 * For that, simultaneously traverse the limbs from left to right 96 * and from right to left, as long as the left index is not bigger 97 * than the right index (it's not a problem if limbs is odd and the 98 * indices coincide in the last iteration). 99 */ 100 for (cur_limb_left = A, cur_limb_right = A + (A_limbs - 1); 101 cur_limb_left <= cur_limb_right; 102 cur_limb_left++, cur_limb_right--) { 103 mbedtls_mpi_uint tmp; 104 /* Note that if cur_limb_left == cur_limb_right, 105 * this code effectively swaps the bytes only once. */ 106 tmp = mpi_bigendian_to_host(*cur_limb_left); 107 *cur_limb_left = mpi_bigendian_to_host(*cur_limb_right); 108 *cur_limb_right = tmp; 109 } 110} 111 112/* Whether min <= A, in constant time. 113 * A_limbs must be at least 1. */ 114mbedtls_ct_condition_t mbedtls_mpi_core_uint_le_mpi(mbedtls_mpi_uint min, 115 const mbedtls_mpi_uint *A, 116 size_t A_limbs) 117{ 118 /* min <= least significant limb? */ 119 mbedtls_ct_condition_t min_le_lsl = mbedtls_ct_uint_ge(A[0], min); 120 121 /* limbs other than the least significant one are all zero? */ 122 mbedtls_ct_condition_t msll_mask = MBEDTLS_CT_FALSE; 123 for (size_t i = 1; i < A_limbs; i++) { 124 msll_mask = mbedtls_ct_bool_or(msll_mask, mbedtls_ct_bool(A[i])); 125 } 126 127 /* min <= A iff the lowest limb of A is >= min or the other limbs 128 * are not all zero. */ 129 return mbedtls_ct_bool_or(msll_mask, min_le_lsl); 130} 131 132mbedtls_ct_condition_t mbedtls_mpi_core_lt_ct(const mbedtls_mpi_uint *A, 133 const mbedtls_mpi_uint *B, 134 size_t limbs) 135{ 136 mbedtls_ct_condition_t ret = MBEDTLS_CT_FALSE, cond = MBEDTLS_CT_FALSE, done = MBEDTLS_CT_FALSE; 137 138 for (size_t i = limbs; i > 0; i--) { 139 /* 140 * If B[i - 1] < A[i - 1] then A < B is false and the result must 141 * remain 0. 142 * 143 * Again even if we can make a decision, we just mark the result and 144 * the fact that we are done and continue looping. 145 */ 146 cond = mbedtls_ct_uint_lt(B[i - 1], A[i - 1]); 147 done = mbedtls_ct_bool_or(done, cond); 148 149 /* 150 * If A[i - 1] < B[i - 1] then A < B is true. 151 * 152 * Again even if we can make a decision, we just mark the result and 153 * the fact that we are done and continue looping. 154 */ 155 cond = mbedtls_ct_uint_lt(A[i - 1], B[i - 1]); 156 ret = mbedtls_ct_bool_or(ret, mbedtls_ct_bool_and(cond, mbedtls_ct_bool_not(done))); 157 done = mbedtls_ct_bool_or(done, cond); 158 } 159 160 /* 161 * If all the limbs were equal, then the numbers are equal, A < B is false 162 * and leaving the result 0 is correct. 163 */ 164 165 return ret; 166} 167 168void mbedtls_mpi_core_cond_assign(mbedtls_mpi_uint *X, 169 const mbedtls_mpi_uint *A, 170 size_t limbs, 171 mbedtls_ct_condition_t assign) 172{ 173 if (X == A) { 174 return; 175 } 176 177 /* This function is very performance-sensitive for RSA. For this reason 178 * we have the loop below, instead of calling mbedtls_ct_memcpy_if 179 * (this is more optimal since here we don't have to handle the case where 180 * we copy awkwardly sized data). 181 */ 182 for (size_t i = 0; i < limbs; i++) { 183 X[i] = mbedtls_ct_mpi_uint_if(assign, A[i], X[i]); 184 } 185} 186 187void mbedtls_mpi_core_cond_swap(mbedtls_mpi_uint *X, 188 mbedtls_mpi_uint *Y, 189 size_t limbs, 190 mbedtls_ct_condition_t swap) 191{ 192 if (X == Y) { 193 return; 194 } 195 196 for (size_t i = 0; i < limbs; i++) { 197 mbedtls_mpi_uint tmp = X[i]; 198 X[i] = mbedtls_ct_mpi_uint_if(swap, Y[i], X[i]); 199 Y[i] = mbedtls_ct_mpi_uint_if(swap, tmp, Y[i]); 200 } 201} 202 203int mbedtls_mpi_core_read_le(mbedtls_mpi_uint *X, 204 size_t X_limbs, 205 const unsigned char *input, 206 size_t input_length) 207{ 208 const size_t limbs = CHARS_TO_LIMBS(input_length); 209 210 if (X_limbs < limbs) { 211 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 212 } 213 214 if (X != NULL) { 215 memset(X, 0, X_limbs * ciL); 216 217 for (size_t i = 0; i < input_length; i++) { 218 size_t offset = ((i % ciL) << 3); 219 X[i / ciL] |= ((mbedtls_mpi_uint) input[i]) << offset; 220 } 221 } 222 223 return 0; 224} 225 226int mbedtls_mpi_core_read_be(mbedtls_mpi_uint *X, 227 size_t X_limbs, 228 const unsigned char *input, 229 size_t input_length) 230{ 231 const size_t limbs = CHARS_TO_LIMBS(input_length); 232 233 if (X_limbs < limbs) { 234 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 235 } 236 237 /* If X_limbs is 0, input_length must also be 0 (from previous test). 238 * Nothing to do. */ 239 if (X_limbs == 0) { 240 return 0; 241 } 242 243 memset(X, 0, X_limbs * ciL); 244 245 /* memcpy() with (NULL, 0) is undefined behaviour */ 246 if (input_length != 0) { 247 size_t overhead = (X_limbs * ciL) - input_length; 248 unsigned char *Xp = (unsigned char *) X; 249 memcpy(Xp + overhead, input, input_length); 250 } 251 252 mbedtls_mpi_core_bigendian_to_host(X, X_limbs); 253 254 return 0; 255} 256 257int mbedtls_mpi_core_write_le(const mbedtls_mpi_uint *A, 258 size_t A_limbs, 259 unsigned char *output, 260 size_t output_length) 261{ 262 size_t stored_bytes = A_limbs * ciL; 263 size_t bytes_to_copy; 264 265 if (stored_bytes < output_length) { 266 bytes_to_copy = stored_bytes; 267 } else { 268 bytes_to_copy = output_length; 269 270 /* The output buffer is smaller than the allocated size of A. 271 * However A may fit if its leading bytes are zero. */ 272 for (size_t i = bytes_to_copy; i < stored_bytes; i++) { 273 if (GET_BYTE(A, i) != 0) { 274 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 275 } 276 } 277 } 278 279 for (size_t i = 0; i < bytes_to_copy; i++) { 280 output[i] = GET_BYTE(A, i); 281 } 282 283 if (stored_bytes < output_length) { 284 /* Write trailing 0 bytes */ 285 memset(output + stored_bytes, 0, output_length - stored_bytes); 286 } 287 288 return 0; 289} 290 291int mbedtls_mpi_core_write_be(const mbedtls_mpi_uint *X, 292 size_t X_limbs, 293 unsigned char *output, 294 size_t output_length) 295{ 296 size_t stored_bytes; 297 size_t bytes_to_copy; 298 unsigned char *p; 299 300 stored_bytes = X_limbs * ciL; 301 302 if (stored_bytes < output_length) { 303 /* There is enough space in the output buffer. Write initial 304 * null bytes and record the position at which to start 305 * writing the significant bytes. In this case, the execution 306 * trace of this function does not depend on the value of the 307 * number. */ 308 bytes_to_copy = stored_bytes; 309 p = output + output_length - stored_bytes; 310 memset(output, 0, output_length - stored_bytes); 311 } else { 312 /* The output buffer is smaller than the allocated size of X. 313 * However X may fit if its leading bytes are zero. */ 314 bytes_to_copy = output_length; 315 p = output; 316 for (size_t i = bytes_to_copy; i < stored_bytes; i++) { 317 if (GET_BYTE(X, i) != 0) { 318 return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL; 319 } 320 } 321 } 322 323 for (size_t i = 0; i < bytes_to_copy; i++) { 324 p[bytes_to_copy - i - 1] = GET_BYTE(X, i); 325 } 326 327 return 0; 328} 329 330void mbedtls_mpi_core_shift_r(mbedtls_mpi_uint *X, size_t limbs, 331 size_t count) 332{ 333 size_t i, v0, v1; 334 mbedtls_mpi_uint r0 = 0, r1; 335 336 v0 = count / biL; 337 v1 = count & (biL - 1); 338 339 if (v0 > limbs || (v0 == limbs && v1 > 0)) { 340 memset(X, 0, limbs * ciL); 341 return; 342 } 343 344 /* 345 * shift by count / limb_size 346 */ 347 if (v0 > 0) { 348 for (i = 0; i < limbs - v0; i++) { 349 X[i] = X[i + v0]; 350 } 351 352 for (; i < limbs; i++) { 353 X[i] = 0; 354 } 355 } 356 357 /* 358 * shift by count % limb_size 359 */ 360 if (v1 > 0) { 361 for (i = limbs; i > 0; i--) { 362 r1 = X[i - 1] << (biL - v1); 363 X[i - 1] >>= v1; 364 X[i - 1] |= r0; 365 r0 = r1; 366 } 367 } 368} 369 370void mbedtls_mpi_core_shift_l(mbedtls_mpi_uint *X, size_t limbs, 371 size_t count) 372{ 373 size_t i, v0, v1; 374 mbedtls_mpi_uint r0 = 0, r1; 375 376 v0 = count / (biL); 377 v1 = count & (biL - 1); 378 379 /* 380 * shift by count / limb_size 381 */ 382 if (v0 > 0) { 383 for (i = limbs; i > v0; i--) { 384 X[i - 1] = X[i - v0 - 1]; 385 } 386 387 for (; i > 0; i--) { 388 X[i - 1] = 0; 389 } 390 } 391 392 /* 393 * shift by count % limb_size 394 */ 395 if (v1 > 0) { 396 for (i = v0; i < limbs; i++) { 397 r1 = X[i] >> (biL - v1); 398 X[i] <<= v1; 399 X[i] |= r0; 400 r0 = r1; 401 } 402 } 403} 404 405mbedtls_mpi_uint mbedtls_mpi_core_add(mbedtls_mpi_uint *X, 406 const mbedtls_mpi_uint *A, 407 const mbedtls_mpi_uint *B, 408 size_t limbs) 409{ 410 mbedtls_mpi_uint c = 0; 411 412 for (size_t i = 0; i < limbs; i++) { 413 mbedtls_mpi_uint t = c + A[i]; 414 c = (t < A[i]); 415 t += B[i]; 416 c += (t < B[i]); 417 X[i] = t; 418 } 419 420 return c; 421} 422 423mbedtls_mpi_uint mbedtls_mpi_core_add_if(mbedtls_mpi_uint *X, 424 const mbedtls_mpi_uint *A, 425 size_t limbs, 426 unsigned cond) 427{ 428 mbedtls_mpi_uint c = 0; 429 430 mbedtls_ct_condition_t do_add = mbedtls_ct_bool(cond); 431 432 for (size_t i = 0; i < limbs; i++) { 433 mbedtls_mpi_uint add = mbedtls_ct_mpi_uint_if_else_0(do_add, A[i]); 434 mbedtls_mpi_uint t = c + X[i]; 435 c = (t < X[i]); 436 t += add; 437 c += (t < add); 438 X[i] = t; 439 } 440 441 return c; 442} 443 444mbedtls_mpi_uint mbedtls_mpi_core_sub(mbedtls_mpi_uint *X, 445 const mbedtls_mpi_uint *A, 446 const mbedtls_mpi_uint *B, 447 size_t limbs) 448{ 449 mbedtls_mpi_uint c = 0; 450 451 for (size_t i = 0; i < limbs; i++) { 452 mbedtls_mpi_uint z = (A[i] < c); 453 mbedtls_mpi_uint t = A[i] - c; 454 c = (t < B[i]) + z; 455 X[i] = t - B[i]; 456 } 457 458 return c; 459} 460 461mbedtls_mpi_uint mbedtls_mpi_core_mla(mbedtls_mpi_uint *d, size_t d_len, 462 const mbedtls_mpi_uint *s, size_t s_len, 463 mbedtls_mpi_uint b) 464{ 465 mbedtls_mpi_uint c = 0; /* carry */ 466 /* 467 * It is a documented precondition of this function that d_len >= s_len. 468 * If that's not the case, we swap these round: this turns what would be 469 * a buffer overflow into an incorrect result. 470 */ 471 if (d_len < s_len) { 472 s_len = d_len; 473 } 474 size_t excess_len = d_len - s_len; 475 size_t steps_x8 = s_len / 8; 476 size_t steps_x1 = s_len & 7; 477 478 while (steps_x8--) { 479 MULADDC_X8_INIT 480 MULADDC_X8_CORE 481 MULADDC_X8_STOP 482 } 483 484 while (steps_x1--) { 485 MULADDC_X1_INIT 486 MULADDC_X1_CORE 487 MULADDC_X1_STOP 488 } 489 490 while (excess_len--) { 491 *d += c; 492 c = (*d < c); 493 d++; 494 } 495 496 return c; 497} 498 499void mbedtls_mpi_core_mul(mbedtls_mpi_uint *X, 500 const mbedtls_mpi_uint *A, size_t A_limbs, 501 const mbedtls_mpi_uint *B, size_t B_limbs) 502{ 503 memset(X, 0, (A_limbs + B_limbs) * ciL); 504 505 for (size_t i = 0; i < B_limbs; i++) { 506 (void) mbedtls_mpi_core_mla(X + i, A_limbs + 1, A, A_limbs, B[i]); 507 } 508} 509 510/* 511 * Fast Montgomery initialization (thanks to Tom St Denis). 512 */ 513mbedtls_mpi_uint mbedtls_mpi_core_montmul_init(const mbedtls_mpi_uint *N) 514{ 515 mbedtls_mpi_uint x = N[0]; 516 517 x += ((N[0] + 2) & 4) << 1; 518 519 for (unsigned int i = biL; i >= 8; i /= 2) { 520 x *= (2 - (N[0] * x)); 521 } 522 523 return ~x + 1; 524} 525 526void mbedtls_mpi_core_montmul(mbedtls_mpi_uint *X, 527 const mbedtls_mpi_uint *A, 528 const mbedtls_mpi_uint *B, 529 size_t B_limbs, 530 const mbedtls_mpi_uint *N, 531 size_t AN_limbs, 532 mbedtls_mpi_uint mm, 533 mbedtls_mpi_uint *T) 534{ 535 memset(T, 0, (2 * AN_limbs + 1) * ciL); 536 537 for (size_t i = 0; i < AN_limbs; i++) { 538 /* T = (T + u0*B + u1*N) / 2^biL */ 539 mbedtls_mpi_uint u0 = A[i]; 540 mbedtls_mpi_uint u1 = (T[0] + u0 * B[0]) * mm; 541 542 (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, B, B_limbs, u0); 543 (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, N, AN_limbs, u1); 544 545 T++; 546 } 547 548 /* 549 * The result we want is (T >= N) ? T - N : T. 550 * 551 * For better constant-time properties in this function, we always do the 552 * subtraction, with the result in X. 553 * 554 * We also look to see if there was any carry in the final additions in the 555 * loop above. 556 */ 557 558 mbedtls_mpi_uint carry = T[AN_limbs]; 559 mbedtls_mpi_uint borrow = mbedtls_mpi_core_sub(X, T, N, AN_limbs); 560 561 /* 562 * Using R as the Montgomery radix (auxiliary modulus) i.e. 2^(biL*AN_limbs): 563 * 564 * T can be in one of 3 ranges: 565 * 566 * 1) T < N : (carry, borrow) = (0, 1): we want T 567 * 2) N <= T < R : (carry, borrow) = (0, 0): we want X 568 * 3) T >= R : (carry, borrow) = (1, 1): we want X 569 * 570 * and (carry, borrow) = (1, 0) can't happen. 571 * 572 * So the correct return value is already in X if (carry ^ borrow) = 0, 573 * but is in (the lower AN_limbs limbs of) T if (carry ^ borrow) = 1. 574 */ 575 mbedtls_ct_memcpy_if(mbedtls_ct_bool(carry ^ borrow), 576 (unsigned char *) X, 577 (unsigned char *) T, 578 NULL, 579 AN_limbs * sizeof(mbedtls_mpi_uint)); 580} 581 582int mbedtls_mpi_core_get_mont_r2_unsafe(mbedtls_mpi *X, 583 const mbedtls_mpi *N) 584{ 585 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 586 587 MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 1)); 588 MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(X, N->n * 2 * biL)); 589 MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(X, X, N)); 590 MBEDTLS_MPI_CHK(mbedtls_mpi_shrink(X, N->n)); 591 592cleanup: 593 return ret; 594} 595 596MBEDTLS_STATIC_TESTABLE 597void mbedtls_mpi_core_ct_uint_table_lookup(mbedtls_mpi_uint *dest, 598 const mbedtls_mpi_uint *table, 599 size_t limbs, 600 size_t count, 601 size_t index) 602{ 603 for (size_t i = 0; i < count; i++, table += limbs) { 604 mbedtls_ct_condition_t assign = mbedtls_ct_uint_eq(i, index); 605 mbedtls_mpi_core_cond_assign(dest, table, limbs, assign); 606 } 607} 608 609/* Fill X with n_bytes random bytes. 610 * X must already have room for those bytes. 611 * The ordering of the bytes returned from the RNG is suitable for 612 * deterministic ECDSA (see RFC 6979 §3.3 and the specification of 613 * mbedtls_mpi_core_random()). 614 */ 615int mbedtls_mpi_core_fill_random( 616 mbedtls_mpi_uint *X, size_t X_limbs, 617 size_t n_bytes, 618 int (*f_rng)(void *, unsigned char *, size_t), void *p_rng) 619{ 620 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 621 const size_t limbs = CHARS_TO_LIMBS(n_bytes); 622 const size_t overhead = (limbs * ciL) - n_bytes; 623 624 if (X_limbs < limbs) { 625 return MBEDTLS_ERR_MPI_BAD_INPUT_DATA; 626 } 627 628 memset(X, 0, overhead); 629 memset((unsigned char *) X + limbs * ciL, 0, (X_limbs - limbs) * ciL); 630 MBEDTLS_MPI_CHK(f_rng(p_rng, (unsigned char *) X + overhead, n_bytes)); 631 mbedtls_mpi_core_bigendian_to_host(X, limbs); 632 633cleanup: 634 return ret; 635} 636 637int mbedtls_mpi_core_random(mbedtls_mpi_uint *X, 638 mbedtls_mpi_uint min, 639 const mbedtls_mpi_uint *N, 640 size_t limbs, 641 int (*f_rng)(void *, unsigned char *, size_t), 642 void *p_rng) 643{ 644 mbedtls_ct_condition_t ge_lower = MBEDTLS_CT_TRUE, lt_upper = MBEDTLS_CT_FALSE; 645 size_t n_bits = mbedtls_mpi_core_bitlen(N, limbs); 646 size_t n_bytes = (n_bits + 7) / 8; 647 int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED; 648 649 /* 650 * When min == 0, each try has at worst a probability 1/2 of failing 651 * (the msb has a probability 1/2 of being 0, and then the result will 652 * be < N), so after 30 tries failure probability is a most 2**(-30). 653 * 654 * When N is just below a power of 2, as is the case when generating 655 * a random scalar on most elliptic curves, 1 try is enough with 656 * overwhelming probability. When N is just above a power of 2, 657 * as when generating a random scalar on secp224k1, each try has 658 * a probability of failing that is almost 1/2. 659 * 660 * The probabilities are almost the same if min is nonzero but negligible 661 * compared to N. This is always the case when N is crypto-sized, but 662 * it's convenient to support small N for testing purposes. When N 663 * is small, use a higher repeat count, otherwise the probability of 664 * failure is macroscopic. 665 */ 666 int count = (n_bytes > 4 ? 30 : 250); 667 668 /* 669 * Match the procedure given in RFC 6979 §3.3 (deterministic ECDSA) 670 * when f_rng is a suitably parametrized instance of HMAC_DRBG: 671 * - use the same byte ordering; 672 * - keep the leftmost n_bits bits of the generated octet string; 673 * - try until result is in the desired range. 674 * This also avoids any bias, which is especially important for ECDSA. 675 */ 676 do { 677 MBEDTLS_MPI_CHK(mbedtls_mpi_core_fill_random(X, limbs, 678 n_bytes, 679 f_rng, p_rng)); 680 mbedtls_mpi_core_shift_r(X, limbs, 8 * n_bytes - n_bits); 681 682 if (--count == 0) { 683 ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE; 684 goto cleanup; 685 } 686 687 ge_lower = mbedtls_mpi_core_uint_le_mpi(min, X, limbs); 688 lt_upper = mbedtls_mpi_core_lt_ct(X, N, limbs); 689 } while (mbedtls_ct_bool_and(ge_lower, lt_upper) == MBEDTLS_CT_FALSE); 690 691cleanup: 692 return ret; 693} 694 695static size_t exp_mod_get_window_size(size_t Ebits) 696{ 697#if MBEDTLS_MPI_WINDOW_SIZE >= 6 698 return (Ebits > 671) ? 6 : (Ebits > 239) ? 5 : (Ebits > 79) ? 4 : 1; 699#elif MBEDTLS_MPI_WINDOW_SIZE == 5 700 return (Ebits > 239) ? 5 : (Ebits > 79) ? 4 : 1; 701#elif MBEDTLS_MPI_WINDOW_SIZE > 1 702 return (Ebits > 79) ? MBEDTLS_MPI_WINDOW_SIZE : 1; 703#else 704 (void) Ebits; 705 return 1; 706#endif 707} 708 709size_t mbedtls_mpi_core_exp_mod_working_limbs(size_t AN_limbs, size_t E_limbs) 710{ 711 const size_t wsize = exp_mod_get_window_size(E_limbs * biL); 712 const size_t welem = ((size_t) 1) << wsize; 713 714 /* How big does each part of the working memory pool need to be? */ 715 const size_t table_limbs = welem * AN_limbs; 716 const size_t select_limbs = AN_limbs; 717 const size_t temp_limbs = 2 * AN_limbs + 1; 718 719 return table_limbs + select_limbs + temp_limbs; 720} 721 722static void exp_mod_precompute_window(const mbedtls_mpi_uint *A, 723 const mbedtls_mpi_uint *N, 724 size_t AN_limbs, 725 mbedtls_mpi_uint mm, 726 const mbedtls_mpi_uint *RR, 727 size_t welem, 728 mbedtls_mpi_uint *Wtable, 729 mbedtls_mpi_uint *temp) 730{ 731 /* W[0] = 1 (in Montgomery presentation) */ 732 memset(Wtable, 0, AN_limbs * ciL); 733 Wtable[0] = 1; 734 mbedtls_mpi_core_montmul(Wtable, Wtable, RR, AN_limbs, N, AN_limbs, mm, temp); 735 736 /* W[1] = A (already in Montgomery presentation) */ 737 mbedtls_mpi_uint *W1 = Wtable + AN_limbs; 738 memcpy(W1, A, AN_limbs * ciL); 739 740 /* W[i+1] = W[i] * W[1], i >= 2 */ 741 mbedtls_mpi_uint *Wprev = W1; 742 for (size_t i = 2; i < welem; i++) { 743 mbedtls_mpi_uint *Wcur = Wprev + AN_limbs; 744 mbedtls_mpi_core_montmul(Wcur, Wprev, W1, AN_limbs, N, AN_limbs, mm, temp); 745 Wprev = Wcur; 746 } 747} 748 749/* Exponentiation: X := A^E mod N. 750 * 751 * A must already be in Montgomery form. 752 * 753 * As in other bignum functions, assume that AN_limbs and E_limbs are nonzero. 754 * 755 * RR must contain 2^{2*biL} mod N. 756 * 757 * The algorithm is a variant of Left-to-right k-ary exponentiation: HAC 14.82 758 * (The difference is that the body in our loop processes a single bit instead 759 * of a full window.) 760 */ 761void mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X, 762 const mbedtls_mpi_uint *A, 763 const mbedtls_mpi_uint *N, 764 size_t AN_limbs, 765 const mbedtls_mpi_uint *E, 766 size_t E_limbs, 767 const mbedtls_mpi_uint *RR, 768 mbedtls_mpi_uint *T) 769{ 770 const size_t wsize = exp_mod_get_window_size(E_limbs * biL); 771 const size_t welem = ((size_t) 1) << wsize; 772 773 /* This is how we will use the temporary storage T, which must have space 774 * for table_limbs, select_limbs and (2 * AN_limbs + 1) for montmul. */ 775 const size_t table_limbs = welem * AN_limbs; 776 const size_t select_limbs = AN_limbs; 777 778 /* Pointers to specific parts of the temporary working memory pool */ 779 mbedtls_mpi_uint *const Wtable = T; 780 mbedtls_mpi_uint *const Wselect = Wtable + table_limbs; 781 mbedtls_mpi_uint *const temp = Wselect + select_limbs; 782 783 /* 784 * Window precomputation 785 */ 786 787 const mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N); 788 789 /* Set Wtable[i] = A^(2^i) (in Montgomery representation) */ 790 exp_mod_precompute_window(A, N, AN_limbs, 791 mm, RR, 792 welem, Wtable, temp); 793 794 /* 795 * Fixed window exponentiation 796 */ 797 798 /* X = 1 (in Montgomery presentation) initially */ 799 memcpy(X, Wtable, AN_limbs * ciL); 800 801 /* We'll process the bits of E from most significant 802 * (limb_index=E_limbs-1, E_bit_index=biL-1) to least significant 803 * (limb_index=0, E_bit_index=0). */ 804 size_t E_limb_index = E_limbs; 805 size_t E_bit_index = 0; 806 /* At any given time, window contains window_bits bits from E. 807 * window_bits can go up to wsize. */ 808 size_t window_bits = 0; 809 mbedtls_mpi_uint window = 0; 810 811 do { 812 /* Square */ 813 mbedtls_mpi_core_montmul(X, X, X, AN_limbs, N, AN_limbs, mm, temp); 814 815 /* Move to the next bit of the exponent */ 816 if (E_bit_index == 0) { 817 --E_limb_index; 818 E_bit_index = biL - 1; 819 } else { 820 --E_bit_index; 821 } 822 /* Insert next exponent bit into window */ 823 ++window_bits; 824 window <<= 1; 825 window |= (E[E_limb_index] >> E_bit_index) & 1; 826 827 /* Clear window if it's full. Also clear the window at the end, 828 * when we've finished processing the exponent. */ 829 if (window_bits == wsize || 830 (E_bit_index == 0 && E_limb_index == 0)) { 831 /* Select Wtable[window] without leaking window through 832 * memory access patterns. */ 833 mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable, 834 AN_limbs, welem, window); 835 /* Multiply X by the selected element. */ 836 mbedtls_mpi_core_montmul(X, X, Wselect, AN_limbs, N, AN_limbs, mm, 837 temp); 838 window = 0; 839 window_bits = 0; 840 } 841 } while (!(E_bit_index == 0 && E_limb_index == 0)); 842} 843 844mbedtls_mpi_uint mbedtls_mpi_core_sub_int(mbedtls_mpi_uint *X, 845 const mbedtls_mpi_uint *A, 846 mbedtls_mpi_uint c, /* doubles as carry */ 847 size_t limbs) 848{ 849 for (size_t i = 0; i < limbs; i++) { 850 mbedtls_mpi_uint s = A[i]; 851 mbedtls_mpi_uint t = s - c; 852 c = (t > s); 853 X[i] = t; 854 } 855 856 return c; 857} 858 859mbedtls_ct_condition_t mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A, 860 size_t limbs) 861{ 862 volatile const mbedtls_mpi_uint *force_read_A = A; 863 mbedtls_mpi_uint bits = 0; 864 865 for (size_t i = 0; i < limbs; i++) { 866 bits |= force_read_A[i]; 867 } 868 869 return mbedtls_ct_bool(bits); 870} 871 872void mbedtls_mpi_core_to_mont_rep(mbedtls_mpi_uint *X, 873 const mbedtls_mpi_uint *A, 874 const mbedtls_mpi_uint *N, 875 size_t AN_limbs, 876 mbedtls_mpi_uint mm, 877 const mbedtls_mpi_uint *rr, 878 mbedtls_mpi_uint *T) 879{ 880 mbedtls_mpi_core_montmul(X, A, rr, AN_limbs, N, AN_limbs, mm, T); 881} 882 883void mbedtls_mpi_core_from_mont_rep(mbedtls_mpi_uint *X, 884 const mbedtls_mpi_uint *A, 885 const mbedtls_mpi_uint *N, 886 size_t AN_limbs, 887 mbedtls_mpi_uint mm, 888 mbedtls_mpi_uint *T) 889{ 890 const mbedtls_mpi_uint Rinv = 1; /* 1/R in Mont. rep => 1 */ 891 892 mbedtls_mpi_core_montmul(X, A, &Rinv, 1, N, AN_limbs, mm, T); 893} 894 895#endif /* MBEDTLS_BIGNUM_C */ 896