1a8e1175bSopenharmony_ci/*
2a8e1175bSopenharmony_ci *  Core bignum functions
3a8e1175bSopenharmony_ci *
4a8e1175bSopenharmony_ci *  Copyright The Mbed TLS Contributors
5a8e1175bSopenharmony_ci *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6a8e1175bSopenharmony_ci */
7a8e1175bSopenharmony_ci
8a8e1175bSopenharmony_ci#include "common.h"
9a8e1175bSopenharmony_ci
10a8e1175bSopenharmony_ci#if defined(MBEDTLS_BIGNUM_C)
11a8e1175bSopenharmony_ci
12a8e1175bSopenharmony_ci#include <string.h>
13a8e1175bSopenharmony_ci
14a8e1175bSopenharmony_ci#include "mbedtls/error.h"
15a8e1175bSopenharmony_ci#include "mbedtls/platform_util.h"
16a8e1175bSopenharmony_ci#include "constant_time_internal.h"
17a8e1175bSopenharmony_ci
18a8e1175bSopenharmony_ci#include "mbedtls/platform.h"
19a8e1175bSopenharmony_ci
20a8e1175bSopenharmony_ci#include "bignum_core.h"
21a8e1175bSopenharmony_ci#include "bn_mul.h"
22a8e1175bSopenharmony_ci#include "constant_time_internal.h"
23a8e1175bSopenharmony_ci
24a8e1175bSopenharmony_cisize_t mbedtls_mpi_core_clz(mbedtls_mpi_uint a)
25a8e1175bSopenharmony_ci{
26a8e1175bSopenharmony_ci#if defined(__has_builtin)
27a8e1175bSopenharmony_ci#if (MBEDTLS_MPI_UINT_MAX == UINT_MAX) && __has_builtin(__builtin_clz)
28a8e1175bSopenharmony_ci    #define core_clz __builtin_clz
29a8e1175bSopenharmony_ci#elif (MBEDTLS_MPI_UINT_MAX == ULONG_MAX) && __has_builtin(__builtin_clzl)
30a8e1175bSopenharmony_ci    #define core_clz __builtin_clzl
31a8e1175bSopenharmony_ci#elif (MBEDTLS_MPI_UINT_MAX == ULLONG_MAX) && __has_builtin(__builtin_clzll)
32a8e1175bSopenharmony_ci    #define core_clz __builtin_clzll
33a8e1175bSopenharmony_ci#endif
34a8e1175bSopenharmony_ci#endif
35a8e1175bSopenharmony_ci#if defined(core_clz)
36a8e1175bSopenharmony_ci    return (size_t) core_clz(a);
37a8e1175bSopenharmony_ci#else
38a8e1175bSopenharmony_ci    size_t j;
39a8e1175bSopenharmony_ci    mbedtls_mpi_uint mask = (mbedtls_mpi_uint) 1 << (biL - 1);
40a8e1175bSopenharmony_ci
41a8e1175bSopenharmony_ci    for (j = 0; j < biL; j++) {
42a8e1175bSopenharmony_ci        if (a & mask) {
43a8e1175bSopenharmony_ci            break;
44a8e1175bSopenharmony_ci        }
45a8e1175bSopenharmony_ci
46a8e1175bSopenharmony_ci        mask >>= 1;
47a8e1175bSopenharmony_ci    }
48a8e1175bSopenharmony_ci
49a8e1175bSopenharmony_ci    return j;
50a8e1175bSopenharmony_ci#endif
51a8e1175bSopenharmony_ci}
52a8e1175bSopenharmony_ci
53a8e1175bSopenharmony_cisize_t mbedtls_mpi_core_bitlen(const mbedtls_mpi_uint *A, size_t A_limbs)
54a8e1175bSopenharmony_ci{
55a8e1175bSopenharmony_ci    int i;
56a8e1175bSopenharmony_ci    size_t j;
57a8e1175bSopenharmony_ci
58a8e1175bSopenharmony_ci    for (i = ((int) A_limbs) - 1; i >= 0; i--) {
59a8e1175bSopenharmony_ci        if (A[i] != 0) {
60a8e1175bSopenharmony_ci            j = biL - mbedtls_mpi_core_clz(A[i]);
61a8e1175bSopenharmony_ci            return (i * biL) + j;
62a8e1175bSopenharmony_ci        }
63a8e1175bSopenharmony_ci    }
64a8e1175bSopenharmony_ci
65a8e1175bSopenharmony_ci    return 0;
66a8e1175bSopenharmony_ci}
67a8e1175bSopenharmony_ci
68a8e1175bSopenharmony_cistatic mbedtls_mpi_uint mpi_bigendian_to_host(mbedtls_mpi_uint a)
69a8e1175bSopenharmony_ci{
70a8e1175bSopenharmony_ci    if (MBEDTLS_IS_BIG_ENDIAN) {
71a8e1175bSopenharmony_ci        /* Nothing to do on bigendian systems. */
72a8e1175bSopenharmony_ci        return a;
73a8e1175bSopenharmony_ci    } else {
74a8e1175bSopenharmony_ci#if defined(MBEDTLS_HAVE_INT32)
75a8e1175bSopenharmony_ci        return (mbedtls_mpi_uint) MBEDTLS_BSWAP32(a);
76a8e1175bSopenharmony_ci#elif defined(MBEDTLS_HAVE_INT64)
77a8e1175bSopenharmony_ci        return (mbedtls_mpi_uint) MBEDTLS_BSWAP64(a);
78a8e1175bSopenharmony_ci#endif
79a8e1175bSopenharmony_ci    }
80a8e1175bSopenharmony_ci}
81a8e1175bSopenharmony_ci
82a8e1175bSopenharmony_civoid mbedtls_mpi_core_bigendian_to_host(mbedtls_mpi_uint *A,
83a8e1175bSopenharmony_ci                                        size_t A_limbs)
84a8e1175bSopenharmony_ci{
85a8e1175bSopenharmony_ci    mbedtls_mpi_uint *cur_limb_left;
86a8e1175bSopenharmony_ci    mbedtls_mpi_uint *cur_limb_right;
87a8e1175bSopenharmony_ci    if (A_limbs == 0) {
88a8e1175bSopenharmony_ci        return;
89a8e1175bSopenharmony_ci    }
90a8e1175bSopenharmony_ci
91a8e1175bSopenharmony_ci    /*
92a8e1175bSopenharmony_ci     * Traverse limbs and
93a8e1175bSopenharmony_ci     * - adapt byte-order in each limb
94a8e1175bSopenharmony_ci     * - swap the limbs themselves.
95a8e1175bSopenharmony_ci     * For that, simultaneously traverse the limbs from left to right
96a8e1175bSopenharmony_ci     * and from right to left, as long as the left index is not bigger
97a8e1175bSopenharmony_ci     * than the right index (it's not a problem if limbs is odd and the
98a8e1175bSopenharmony_ci     * indices coincide in the last iteration).
99a8e1175bSopenharmony_ci     */
100a8e1175bSopenharmony_ci    for (cur_limb_left = A, cur_limb_right = A + (A_limbs - 1);
101a8e1175bSopenharmony_ci         cur_limb_left <= cur_limb_right;
102a8e1175bSopenharmony_ci         cur_limb_left++, cur_limb_right--) {
103a8e1175bSopenharmony_ci        mbedtls_mpi_uint tmp;
104a8e1175bSopenharmony_ci        /* Note that if cur_limb_left == cur_limb_right,
105a8e1175bSopenharmony_ci         * this code effectively swaps the bytes only once. */
106a8e1175bSopenharmony_ci        tmp             = mpi_bigendian_to_host(*cur_limb_left);
107a8e1175bSopenharmony_ci        *cur_limb_left  = mpi_bigendian_to_host(*cur_limb_right);
108a8e1175bSopenharmony_ci        *cur_limb_right = tmp;
109a8e1175bSopenharmony_ci    }
110a8e1175bSopenharmony_ci}
111a8e1175bSopenharmony_ci
112a8e1175bSopenharmony_ci/* Whether min <= A, in constant time.
113a8e1175bSopenharmony_ci * A_limbs must be at least 1. */
114a8e1175bSopenharmony_cimbedtls_ct_condition_t mbedtls_mpi_core_uint_le_mpi(mbedtls_mpi_uint min,
115a8e1175bSopenharmony_ci                                                    const mbedtls_mpi_uint *A,
116a8e1175bSopenharmony_ci                                                    size_t A_limbs)
117a8e1175bSopenharmony_ci{
118a8e1175bSopenharmony_ci    /* min <= least significant limb? */
119a8e1175bSopenharmony_ci    mbedtls_ct_condition_t min_le_lsl = mbedtls_ct_uint_ge(A[0], min);
120a8e1175bSopenharmony_ci
121a8e1175bSopenharmony_ci    /* limbs other than the least significant one are all zero? */
122a8e1175bSopenharmony_ci    mbedtls_ct_condition_t msll_mask = MBEDTLS_CT_FALSE;
123a8e1175bSopenharmony_ci    for (size_t i = 1; i < A_limbs; i++) {
124a8e1175bSopenharmony_ci        msll_mask = mbedtls_ct_bool_or(msll_mask, mbedtls_ct_bool(A[i]));
125a8e1175bSopenharmony_ci    }
126a8e1175bSopenharmony_ci
127a8e1175bSopenharmony_ci    /* min <= A iff the lowest limb of A is >= min or the other limbs
128a8e1175bSopenharmony_ci     * are not all zero. */
129a8e1175bSopenharmony_ci    return mbedtls_ct_bool_or(msll_mask, min_le_lsl);
130a8e1175bSopenharmony_ci}
131a8e1175bSopenharmony_ci
132a8e1175bSopenharmony_cimbedtls_ct_condition_t mbedtls_mpi_core_lt_ct(const mbedtls_mpi_uint *A,
133a8e1175bSopenharmony_ci                                              const mbedtls_mpi_uint *B,
134a8e1175bSopenharmony_ci                                              size_t limbs)
135a8e1175bSopenharmony_ci{
136a8e1175bSopenharmony_ci    mbedtls_ct_condition_t ret = MBEDTLS_CT_FALSE, cond = MBEDTLS_CT_FALSE, done = MBEDTLS_CT_FALSE;
137a8e1175bSopenharmony_ci
138a8e1175bSopenharmony_ci    for (size_t i = limbs; i > 0; i--) {
139a8e1175bSopenharmony_ci        /*
140a8e1175bSopenharmony_ci         * If B[i - 1] < A[i - 1] then A < B is false and the result must
141a8e1175bSopenharmony_ci         * remain 0.
142a8e1175bSopenharmony_ci         *
143a8e1175bSopenharmony_ci         * Again even if we can make a decision, we just mark the result and
144a8e1175bSopenharmony_ci         * the fact that we are done and continue looping.
145a8e1175bSopenharmony_ci         */
146a8e1175bSopenharmony_ci        cond = mbedtls_ct_uint_lt(B[i - 1], A[i - 1]);
147a8e1175bSopenharmony_ci        done = mbedtls_ct_bool_or(done, cond);
148a8e1175bSopenharmony_ci
149a8e1175bSopenharmony_ci        /*
150a8e1175bSopenharmony_ci         * If A[i - 1] < B[i - 1] then A < B is true.
151a8e1175bSopenharmony_ci         *
152a8e1175bSopenharmony_ci         * Again even if we can make a decision, we just mark the result and
153a8e1175bSopenharmony_ci         * the fact that we are done and continue looping.
154a8e1175bSopenharmony_ci         */
155a8e1175bSopenharmony_ci        cond = mbedtls_ct_uint_lt(A[i - 1], B[i - 1]);
156a8e1175bSopenharmony_ci        ret  = mbedtls_ct_bool_or(ret, mbedtls_ct_bool_and(cond, mbedtls_ct_bool_not(done)));
157a8e1175bSopenharmony_ci        done = mbedtls_ct_bool_or(done, cond);
158a8e1175bSopenharmony_ci    }
159a8e1175bSopenharmony_ci
160a8e1175bSopenharmony_ci    /*
161a8e1175bSopenharmony_ci     * If all the limbs were equal, then the numbers are equal, A < B is false
162a8e1175bSopenharmony_ci     * and leaving the result 0 is correct.
163a8e1175bSopenharmony_ci     */
164a8e1175bSopenharmony_ci
165a8e1175bSopenharmony_ci    return ret;
166a8e1175bSopenharmony_ci}
167a8e1175bSopenharmony_ci
168a8e1175bSopenharmony_civoid mbedtls_mpi_core_cond_assign(mbedtls_mpi_uint *X,
169a8e1175bSopenharmony_ci                                  const mbedtls_mpi_uint *A,
170a8e1175bSopenharmony_ci                                  size_t limbs,
171a8e1175bSopenharmony_ci                                  mbedtls_ct_condition_t assign)
172a8e1175bSopenharmony_ci{
173a8e1175bSopenharmony_ci    if (X == A) {
174a8e1175bSopenharmony_ci        return;
175a8e1175bSopenharmony_ci    }
176a8e1175bSopenharmony_ci
177a8e1175bSopenharmony_ci    /* This function is very performance-sensitive for RSA. For this reason
178a8e1175bSopenharmony_ci     * we have the loop below, instead of calling mbedtls_ct_memcpy_if
179a8e1175bSopenharmony_ci     * (this is more optimal since here we don't have to handle the case where
180a8e1175bSopenharmony_ci     * we copy awkwardly sized data).
181a8e1175bSopenharmony_ci     */
182a8e1175bSopenharmony_ci    for (size_t i = 0; i < limbs; i++) {
183a8e1175bSopenharmony_ci        X[i] = mbedtls_ct_mpi_uint_if(assign, A[i], X[i]);
184a8e1175bSopenharmony_ci    }
185a8e1175bSopenharmony_ci}
186a8e1175bSopenharmony_ci
187a8e1175bSopenharmony_civoid mbedtls_mpi_core_cond_swap(mbedtls_mpi_uint *X,
188a8e1175bSopenharmony_ci                                mbedtls_mpi_uint *Y,
189a8e1175bSopenharmony_ci                                size_t limbs,
190a8e1175bSopenharmony_ci                                mbedtls_ct_condition_t swap)
191a8e1175bSopenharmony_ci{
192a8e1175bSopenharmony_ci    if (X == Y) {
193a8e1175bSopenharmony_ci        return;
194a8e1175bSopenharmony_ci    }
195a8e1175bSopenharmony_ci
196a8e1175bSopenharmony_ci    for (size_t i = 0; i < limbs; i++) {
197a8e1175bSopenharmony_ci        mbedtls_mpi_uint tmp = X[i];
198a8e1175bSopenharmony_ci        X[i] = mbedtls_ct_mpi_uint_if(swap, Y[i], X[i]);
199a8e1175bSopenharmony_ci        Y[i] = mbedtls_ct_mpi_uint_if(swap, tmp, Y[i]);
200a8e1175bSopenharmony_ci    }
201a8e1175bSopenharmony_ci}
202a8e1175bSopenharmony_ci
203a8e1175bSopenharmony_ciint mbedtls_mpi_core_read_le(mbedtls_mpi_uint *X,
204a8e1175bSopenharmony_ci                             size_t X_limbs,
205a8e1175bSopenharmony_ci                             const unsigned char *input,
206a8e1175bSopenharmony_ci                             size_t input_length)
207a8e1175bSopenharmony_ci{
208a8e1175bSopenharmony_ci    const size_t limbs = CHARS_TO_LIMBS(input_length);
209a8e1175bSopenharmony_ci
210a8e1175bSopenharmony_ci    if (X_limbs < limbs) {
211a8e1175bSopenharmony_ci        return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
212a8e1175bSopenharmony_ci    }
213a8e1175bSopenharmony_ci
214a8e1175bSopenharmony_ci    if (X != NULL) {
215a8e1175bSopenharmony_ci        memset(X, 0, X_limbs * ciL);
216a8e1175bSopenharmony_ci
217a8e1175bSopenharmony_ci        for (size_t i = 0; i < input_length; i++) {
218a8e1175bSopenharmony_ci            size_t offset = ((i % ciL) << 3);
219a8e1175bSopenharmony_ci            X[i / ciL] |= ((mbedtls_mpi_uint) input[i]) << offset;
220a8e1175bSopenharmony_ci        }
221a8e1175bSopenharmony_ci    }
222a8e1175bSopenharmony_ci
223a8e1175bSopenharmony_ci    return 0;
224a8e1175bSopenharmony_ci}
225a8e1175bSopenharmony_ci
226a8e1175bSopenharmony_ciint mbedtls_mpi_core_read_be(mbedtls_mpi_uint *X,
227a8e1175bSopenharmony_ci                             size_t X_limbs,
228a8e1175bSopenharmony_ci                             const unsigned char *input,
229a8e1175bSopenharmony_ci                             size_t input_length)
230a8e1175bSopenharmony_ci{
231a8e1175bSopenharmony_ci    const size_t limbs = CHARS_TO_LIMBS(input_length);
232a8e1175bSopenharmony_ci
233a8e1175bSopenharmony_ci    if (X_limbs < limbs) {
234a8e1175bSopenharmony_ci        return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
235a8e1175bSopenharmony_ci    }
236a8e1175bSopenharmony_ci
237a8e1175bSopenharmony_ci    /* If X_limbs is 0, input_length must also be 0 (from previous test).
238a8e1175bSopenharmony_ci     * Nothing to do. */
239a8e1175bSopenharmony_ci    if (X_limbs == 0) {
240a8e1175bSopenharmony_ci        return 0;
241a8e1175bSopenharmony_ci    }
242a8e1175bSopenharmony_ci
243a8e1175bSopenharmony_ci    memset(X, 0, X_limbs * ciL);
244a8e1175bSopenharmony_ci
245a8e1175bSopenharmony_ci    /* memcpy() with (NULL, 0) is undefined behaviour */
246a8e1175bSopenharmony_ci    if (input_length != 0) {
247a8e1175bSopenharmony_ci        size_t overhead = (X_limbs * ciL) - input_length;
248a8e1175bSopenharmony_ci        unsigned char *Xp = (unsigned char *) X;
249a8e1175bSopenharmony_ci        memcpy(Xp + overhead, input, input_length);
250a8e1175bSopenharmony_ci    }
251a8e1175bSopenharmony_ci
252a8e1175bSopenharmony_ci    mbedtls_mpi_core_bigendian_to_host(X, X_limbs);
253a8e1175bSopenharmony_ci
254a8e1175bSopenharmony_ci    return 0;
255a8e1175bSopenharmony_ci}
256a8e1175bSopenharmony_ci
257a8e1175bSopenharmony_ciint mbedtls_mpi_core_write_le(const mbedtls_mpi_uint *A,
258a8e1175bSopenharmony_ci                              size_t A_limbs,
259a8e1175bSopenharmony_ci                              unsigned char *output,
260a8e1175bSopenharmony_ci                              size_t output_length)
261a8e1175bSopenharmony_ci{
262a8e1175bSopenharmony_ci    size_t stored_bytes = A_limbs * ciL;
263a8e1175bSopenharmony_ci    size_t bytes_to_copy;
264a8e1175bSopenharmony_ci
265a8e1175bSopenharmony_ci    if (stored_bytes < output_length) {
266a8e1175bSopenharmony_ci        bytes_to_copy = stored_bytes;
267a8e1175bSopenharmony_ci    } else {
268a8e1175bSopenharmony_ci        bytes_to_copy = output_length;
269a8e1175bSopenharmony_ci
270a8e1175bSopenharmony_ci        /* The output buffer is smaller than the allocated size of A.
271a8e1175bSopenharmony_ci         * However A may fit if its leading bytes are zero. */
272a8e1175bSopenharmony_ci        for (size_t i = bytes_to_copy; i < stored_bytes; i++) {
273a8e1175bSopenharmony_ci            if (GET_BYTE(A, i) != 0) {
274a8e1175bSopenharmony_ci                return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
275a8e1175bSopenharmony_ci            }
276a8e1175bSopenharmony_ci        }
277a8e1175bSopenharmony_ci    }
278a8e1175bSopenharmony_ci
279a8e1175bSopenharmony_ci    for (size_t i = 0; i < bytes_to_copy; i++) {
280a8e1175bSopenharmony_ci        output[i] = GET_BYTE(A, i);
281a8e1175bSopenharmony_ci    }
282a8e1175bSopenharmony_ci
283a8e1175bSopenharmony_ci    if (stored_bytes < output_length) {
284a8e1175bSopenharmony_ci        /* Write trailing 0 bytes */
285a8e1175bSopenharmony_ci        memset(output + stored_bytes, 0, output_length - stored_bytes);
286a8e1175bSopenharmony_ci    }
287a8e1175bSopenharmony_ci
288a8e1175bSopenharmony_ci    return 0;
289a8e1175bSopenharmony_ci}
290a8e1175bSopenharmony_ci
291a8e1175bSopenharmony_ciint mbedtls_mpi_core_write_be(const mbedtls_mpi_uint *X,
292a8e1175bSopenharmony_ci                              size_t X_limbs,
293a8e1175bSopenharmony_ci                              unsigned char *output,
294a8e1175bSopenharmony_ci                              size_t output_length)
295a8e1175bSopenharmony_ci{
296a8e1175bSopenharmony_ci    size_t stored_bytes;
297a8e1175bSopenharmony_ci    size_t bytes_to_copy;
298a8e1175bSopenharmony_ci    unsigned char *p;
299a8e1175bSopenharmony_ci
300a8e1175bSopenharmony_ci    stored_bytes = X_limbs * ciL;
301a8e1175bSopenharmony_ci
302a8e1175bSopenharmony_ci    if (stored_bytes < output_length) {
303a8e1175bSopenharmony_ci        /* There is enough space in the output buffer. Write initial
304a8e1175bSopenharmony_ci         * null bytes and record the position at which to start
305a8e1175bSopenharmony_ci         * writing the significant bytes. In this case, the execution
306a8e1175bSopenharmony_ci         * trace of this function does not depend on the value of the
307a8e1175bSopenharmony_ci         * number. */
308a8e1175bSopenharmony_ci        bytes_to_copy = stored_bytes;
309a8e1175bSopenharmony_ci        p = output + output_length - stored_bytes;
310a8e1175bSopenharmony_ci        memset(output, 0, output_length - stored_bytes);
311a8e1175bSopenharmony_ci    } else {
312a8e1175bSopenharmony_ci        /* The output buffer is smaller than the allocated size of X.
313a8e1175bSopenharmony_ci         * However X may fit if its leading bytes are zero. */
314a8e1175bSopenharmony_ci        bytes_to_copy = output_length;
315a8e1175bSopenharmony_ci        p = output;
316a8e1175bSopenharmony_ci        for (size_t i = bytes_to_copy; i < stored_bytes; i++) {
317a8e1175bSopenharmony_ci            if (GET_BYTE(X, i) != 0) {
318a8e1175bSopenharmony_ci                return MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL;
319a8e1175bSopenharmony_ci            }
320a8e1175bSopenharmony_ci        }
321a8e1175bSopenharmony_ci    }
322a8e1175bSopenharmony_ci
323a8e1175bSopenharmony_ci    for (size_t i = 0; i < bytes_to_copy; i++) {
324a8e1175bSopenharmony_ci        p[bytes_to_copy - i - 1] = GET_BYTE(X, i);
325a8e1175bSopenharmony_ci    }
326a8e1175bSopenharmony_ci
327a8e1175bSopenharmony_ci    return 0;
328a8e1175bSopenharmony_ci}
329a8e1175bSopenharmony_ci
330a8e1175bSopenharmony_civoid mbedtls_mpi_core_shift_r(mbedtls_mpi_uint *X, size_t limbs,
331a8e1175bSopenharmony_ci                              size_t count)
332a8e1175bSopenharmony_ci{
333a8e1175bSopenharmony_ci    size_t i, v0, v1;
334a8e1175bSopenharmony_ci    mbedtls_mpi_uint r0 = 0, r1;
335a8e1175bSopenharmony_ci
336a8e1175bSopenharmony_ci    v0 = count /  biL;
337a8e1175bSopenharmony_ci    v1 = count & (biL - 1);
338a8e1175bSopenharmony_ci
339a8e1175bSopenharmony_ci    if (v0 > limbs || (v0 == limbs && v1 > 0)) {
340a8e1175bSopenharmony_ci        memset(X, 0, limbs * ciL);
341a8e1175bSopenharmony_ci        return;
342a8e1175bSopenharmony_ci    }
343a8e1175bSopenharmony_ci
344a8e1175bSopenharmony_ci    /*
345a8e1175bSopenharmony_ci     * shift by count / limb_size
346a8e1175bSopenharmony_ci     */
347a8e1175bSopenharmony_ci    if (v0 > 0) {
348a8e1175bSopenharmony_ci        for (i = 0; i < limbs - v0; i++) {
349a8e1175bSopenharmony_ci            X[i] = X[i + v0];
350a8e1175bSopenharmony_ci        }
351a8e1175bSopenharmony_ci
352a8e1175bSopenharmony_ci        for (; i < limbs; i++) {
353a8e1175bSopenharmony_ci            X[i] = 0;
354a8e1175bSopenharmony_ci        }
355a8e1175bSopenharmony_ci    }
356a8e1175bSopenharmony_ci
357a8e1175bSopenharmony_ci    /*
358a8e1175bSopenharmony_ci     * shift by count % limb_size
359a8e1175bSopenharmony_ci     */
360a8e1175bSopenharmony_ci    if (v1 > 0) {
361a8e1175bSopenharmony_ci        for (i = limbs; i > 0; i--) {
362a8e1175bSopenharmony_ci            r1 = X[i - 1] << (biL - v1);
363a8e1175bSopenharmony_ci            X[i - 1] >>= v1;
364a8e1175bSopenharmony_ci            X[i - 1] |= r0;
365a8e1175bSopenharmony_ci            r0 = r1;
366a8e1175bSopenharmony_ci        }
367a8e1175bSopenharmony_ci    }
368a8e1175bSopenharmony_ci}
369a8e1175bSopenharmony_ci
370a8e1175bSopenharmony_civoid mbedtls_mpi_core_shift_l(mbedtls_mpi_uint *X, size_t limbs,
371a8e1175bSopenharmony_ci                              size_t count)
372a8e1175bSopenharmony_ci{
373a8e1175bSopenharmony_ci    size_t i, v0, v1;
374a8e1175bSopenharmony_ci    mbedtls_mpi_uint r0 = 0, r1;
375a8e1175bSopenharmony_ci
376a8e1175bSopenharmony_ci    v0 = count / (biL);
377a8e1175bSopenharmony_ci    v1 = count & (biL - 1);
378a8e1175bSopenharmony_ci
379a8e1175bSopenharmony_ci    /*
380a8e1175bSopenharmony_ci     * shift by count / limb_size
381a8e1175bSopenharmony_ci     */
382a8e1175bSopenharmony_ci    if (v0 > 0) {
383a8e1175bSopenharmony_ci        for (i = limbs; i > v0; i--) {
384a8e1175bSopenharmony_ci            X[i - 1] = X[i - v0 - 1];
385a8e1175bSopenharmony_ci        }
386a8e1175bSopenharmony_ci
387a8e1175bSopenharmony_ci        for (; i > 0; i--) {
388a8e1175bSopenharmony_ci            X[i - 1] = 0;
389a8e1175bSopenharmony_ci        }
390a8e1175bSopenharmony_ci    }
391a8e1175bSopenharmony_ci
392a8e1175bSopenharmony_ci    /*
393a8e1175bSopenharmony_ci     * shift by count % limb_size
394a8e1175bSopenharmony_ci     */
395a8e1175bSopenharmony_ci    if (v1 > 0) {
396a8e1175bSopenharmony_ci        for (i = v0; i < limbs; i++) {
397a8e1175bSopenharmony_ci            r1 = X[i] >> (biL - v1);
398a8e1175bSopenharmony_ci            X[i] <<= v1;
399a8e1175bSopenharmony_ci            X[i] |= r0;
400a8e1175bSopenharmony_ci            r0 = r1;
401a8e1175bSopenharmony_ci        }
402a8e1175bSopenharmony_ci    }
403a8e1175bSopenharmony_ci}
404a8e1175bSopenharmony_ci
405a8e1175bSopenharmony_cimbedtls_mpi_uint mbedtls_mpi_core_add(mbedtls_mpi_uint *X,
406a8e1175bSopenharmony_ci                                      const mbedtls_mpi_uint *A,
407a8e1175bSopenharmony_ci                                      const mbedtls_mpi_uint *B,
408a8e1175bSopenharmony_ci                                      size_t limbs)
409a8e1175bSopenharmony_ci{
410a8e1175bSopenharmony_ci    mbedtls_mpi_uint c = 0;
411a8e1175bSopenharmony_ci
412a8e1175bSopenharmony_ci    for (size_t i = 0; i < limbs; i++) {
413a8e1175bSopenharmony_ci        mbedtls_mpi_uint t = c + A[i];
414a8e1175bSopenharmony_ci        c = (t < A[i]);
415a8e1175bSopenharmony_ci        t += B[i];
416a8e1175bSopenharmony_ci        c += (t < B[i]);
417a8e1175bSopenharmony_ci        X[i] = t;
418a8e1175bSopenharmony_ci    }
419a8e1175bSopenharmony_ci
420a8e1175bSopenharmony_ci    return c;
421a8e1175bSopenharmony_ci}
422a8e1175bSopenharmony_ci
423a8e1175bSopenharmony_cimbedtls_mpi_uint mbedtls_mpi_core_add_if(mbedtls_mpi_uint *X,
424a8e1175bSopenharmony_ci                                         const mbedtls_mpi_uint *A,
425a8e1175bSopenharmony_ci                                         size_t limbs,
426a8e1175bSopenharmony_ci                                         unsigned cond)
427a8e1175bSopenharmony_ci{
428a8e1175bSopenharmony_ci    mbedtls_mpi_uint c = 0;
429a8e1175bSopenharmony_ci
430a8e1175bSopenharmony_ci    mbedtls_ct_condition_t do_add = mbedtls_ct_bool(cond);
431a8e1175bSopenharmony_ci
432a8e1175bSopenharmony_ci    for (size_t i = 0; i < limbs; i++) {
433a8e1175bSopenharmony_ci        mbedtls_mpi_uint add = mbedtls_ct_mpi_uint_if_else_0(do_add, A[i]);
434a8e1175bSopenharmony_ci        mbedtls_mpi_uint t = c + X[i];
435a8e1175bSopenharmony_ci        c = (t < X[i]);
436a8e1175bSopenharmony_ci        t += add;
437a8e1175bSopenharmony_ci        c += (t < add);
438a8e1175bSopenharmony_ci        X[i] = t;
439a8e1175bSopenharmony_ci    }
440a8e1175bSopenharmony_ci
441a8e1175bSopenharmony_ci    return c;
442a8e1175bSopenharmony_ci}
443a8e1175bSopenharmony_ci
444a8e1175bSopenharmony_cimbedtls_mpi_uint mbedtls_mpi_core_sub(mbedtls_mpi_uint *X,
445a8e1175bSopenharmony_ci                                      const mbedtls_mpi_uint *A,
446a8e1175bSopenharmony_ci                                      const mbedtls_mpi_uint *B,
447a8e1175bSopenharmony_ci                                      size_t limbs)
448a8e1175bSopenharmony_ci{
449a8e1175bSopenharmony_ci    mbedtls_mpi_uint c = 0;
450a8e1175bSopenharmony_ci
451a8e1175bSopenharmony_ci    for (size_t i = 0; i < limbs; i++) {
452a8e1175bSopenharmony_ci        mbedtls_mpi_uint z = (A[i] < c);
453a8e1175bSopenharmony_ci        mbedtls_mpi_uint t = A[i] - c;
454a8e1175bSopenharmony_ci        c = (t < B[i]) + z;
455a8e1175bSopenharmony_ci        X[i] = t - B[i];
456a8e1175bSopenharmony_ci    }
457a8e1175bSopenharmony_ci
458a8e1175bSopenharmony_ci    return c;
459a8e1175bSopenharmony_ci}
460a8e1175bSopenharmony_ci
461a8e1175bSopenharmony_cimbedtls_mpi_uint mbedtls_mpi_core_mla(mbedtls_mpi_uint *d, size_t d_len,
462a8e1175bSopenharmony_ci                                      const mbedtls_mpi_uint *s, size_t s_len,
463a8e1175bSopenharmony_ci                                      mbedtls_mpi_uint b)
464a8e1175bSopenharmony_ci{
465a8e1175bSopenharmony_ci    mbedtls_mpi_uint c = 0; /* carry */
466a8e1175bSopenharmony_ci    /*
467a8e1175bSopenharmony_ci     * It is a documented precondition of this function that d_len >= s_len.
468a8e1175bSopenharmony_ci     * If that's not the case, we swap these round: this turns what would be
469a8e1175bSopenharmony_ci     * a buffer overflow into an incorrect result.
470a8e1175bSopenharmony_ci     */
471a8e1175bSopenharmony_ci    if (d_len < s_len) {
472a8e1175bSopenharmony_ci        s_len = d_len;
473a8e1175bSopenharmony_ci    }
474a8e1175bSopenharmony_ci    size_t excess_len = d_len - s_len;
475a8e1175bSopenharmony_ci    size_t steps_x8 = s_len / 8;
476a8e1175bSopenharmony_ci    size_t steps_x1 = s_len & 7;
477a8e1175bSopenharmony_ci
478a8e1175bSopenharmony_ci    while (steps_x8--) {
479a8e1175bSopenharmony_ci        MULADDC_X8_INIT
480a8e1175bSopenharmony_ci        MULADDC_X8_CORE
481a8e1175bSopenharmony_ci            MULADDC_X8_STOP
482a8e1175bSopenharmony_ci    }
483a8e1175bSopenharmony_ci
484a8e1175bSopenharmony_ci    while (steps_x1--) {
485a8e1175bSopenharmony_ci        MULADDC_X1_INIT
486a8e1175bSopenharmony_ci        MULADDC_X1_CORE
487a8e1175bSopenharmony_ci            MULADDC_X1_STOP
488a8e1175bSopenharmony_ci    }
489a8e1175bSopenharmony_ci
490a8e1175bSopenharmony_ci    while (excess_len--) {
491a8e1175bSopenharmony_ci        *d += c;
492a8e1175bSopenharmony_ci        c = (*d < c);
493a8e1175bSopenharmony_ci        d++;
494a8e1175bSopenharmony_ci    }
495a8e1175bSopenharmony_ci
496a8e1175bSopenharmony_ci    return c;
497a8e1175bSopenharmony_ci}
498a8e1175bSopenharmony_ci
499a8e1175bSopenharmony_civoid mbedtls_mpi_core_mul(mbedtls_mpi_uint *X,
500a8e1175bSopenharmony_ci                          const mbedtls_mpi_uint *A, size_t A_limbs,
501a8e1175bSopenharmony_ci                          const mbedtls_mpi_uint *B, size_t B_limbs)
502a8e1175bSopenharmony_ci{
503a8e1175bSopenharmony_ci    memset(X, 0, (A_limbs + B_limbs) * ciL);
504a8e1175bSopenharmony_ci
505a8e1175bSopenharmony_ci    for (size_t i = 0; i < B_limbs; i++) {
506a8e1175bSopenharmony_ci        (void) mbedtls_mpi_core_mla(X + i, A_limbs + 1, A, A_limbs, B[i]);
507a8e1175bSopenharmony_ci    }
508a8e1175bSopenharmony_ci}
509a8e1175bSopenharmony_ci
510a8e1175bSopenharmony_ci/*
511a8e1175bSopenharmony_ci * Fast Montgomery initialization (thanks to Tom St Denis).
512a8e1175bSopenharmony_ci */
513a8e1175bSopenharmony_cimbedtls_mpi_uint mbedtls_mpi_core_montmul_init(const mbedtls_mpi_uint *N)
514a8e1175bSopenharmony_ci{
515a8e1175bSopenharmony_ci    mbedtls_mpi_uint x = N[0];
516a8e1175bSopenharmony_ci
517a8e1175bSopenharmony_ci    x += ((N[0] + 2) & 4) << 1;
518a8e1175bSopenharmony_ci
519a8e1175bSopenharmony_ci    for (unsigned int i = biL; i >= 8; i /= 2) {
520a8e1175bSopenharmony_ci        x *= (2 - (N[0] * x));
521a8e1175bSopenharmony_ci    }
522a8e1175bSopenharmony_ci
523a8e1175bSopenharmony_ci    return ~x + 1;
524a8e1175bSopenharmony_ci}
525a8e1175bSopenharmony_ci
526a8e1175bSopenharmony_civoid mbedtls_mpi_core_montmul(mbedtls_mpi_uint *X,
527a8e1175bSopenharmony_ci                              const mbedtls_mpi_uint *A,
528a8e1175bSopenharmony_ci                              const mbedtls_mpi_uint *B,
529a8e1175bSopenharmony_ci                              size_t B_limbs,
530a8e1175bSopenharmony_ci                              const mbedtls_mpi_uint *N,
531a8e1175bSopenharmony_ci                              size_t AN_limbs,
532a8e1175bSopenharmony_ci                              mbedtls_mpi_uint mm,
533a8e1175bSopenharmony_ci                              mbedtls_mpi_uint *T)
534a8e1175bSopenharmony_ci{
535a8e1175bSopenharmony_ci    memset(T, 0, (2 * AN_limbs + 1) * ciL);
536a8e1175bSopenharmony_ci
537a8e1175bSopenharmony_ci    for (size_t i = 0; i < AN_limbs; i++) {
538a8e1175bSopenharmony_ci        /* T = (T + u0*B + u1*N) / 2^biL */
539a8e1175bSopenharmony_ci        mbedtls_mpi_uint u0 = A[i];
540a8e1175bSopenharmony_ci        mbedtls_mpi_uint u1 = (T[0] + u0 * B[0]) * mm;
541a8e1175bSopenharmony_ci
542a8e1175bSopenharmony_ci        (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, B, B_limbs, u0);
543a8e1175bSopenharmony_ci        (void) mbedtls_mpi_core_mla(T, AN_limbs + 2, N, AN_limbs, u1);
544a8e1175bSopenharmony_ci
545a8e1175bSopenharmony_ci        T++;
546a8e1175bSopenharmony_ci    }
547a8e1175bSopenharmony_ci
548a8e1175bSopenharmony_ci    /*
549a8e1175bSopenharmony_ci     * The result we want is (T >= N) ? T - N : T.
550a8e1175bSopenharmony_ci     *
551a8e1175bSopenharmony_ci     * For better constant-time properties in this function, we always do the
552a8e1175bSopenharmony_ci     * subtraction, with the result in X.
553a8e1175bSopenharmony_ci     *
554a8e1175bSopenharmony_ci     * We also look to see if there was any carry in the final additions in the
555a8e1175bSopenharmony_ci     * loop above.
556a8e1175bSopenharmony_ci     */
557a8e1175bSopenharmony_ci
558a8e1175bSopenharmony_ci    mbedtls_mpi_uint carry  = T[AN_limbs];
559a8e1175bSopenharmony_ci    mbedtls_mpi_uint borrow = mbedtls_mpi_core_sub(X, T, N, AN_limbs);
560a8e1175bSopenharmony_ci
561a8e1175bSopenharmony_ci    /*
562a8e1175bSopenharmony_ci     * Using R as the Montgomery radix (auxiliary modulus) i.e. 2^(biL*AN_limbs):
563a8e1175bSopenharmony_ci     *
564a8e1175bSopenharmony_ci     * T can be in one of 3 ranges:
565a8e1175bSopenharmony_ci     *
566a8e1175bSopenharmony_ci     * 1) T < N      : (carry, borrow) = (0, 1): we want T
567a8e1175bSopenharmony_ci     * 2) N <= T < R : (carry, borrow) = (0, 0): we want X
568a8e1175bSopenharmony_ci     * 3) T >= R     : (carry, borrow) = (1, 1): we want X
569a8e1175bSopenharmony_ci     *
570a8e1175bSopenharmony_ci     * and (carry, borrow) = (1, 0) can't happen.
571a8e1175bSopenharmony_ci     *
572a8e1175bSopenharmony_ci     * So the correct return value is already in X if (carry ^ borrow) = 0,
573a8e1175bSopenharmony_ci     * but is in (the lower AN_limbs limbs of) T if (carry ^ borrow) = 1.
574a8e1175bSopenharmony_ci     */
575a8e1175bSopenharmony_ci    mbedtls_ct_memcpy_if(mbedtls_ct_bool(carry ^ borrow),
576a8e1175bSopenharmony_ci                         (unsigned char *) X,
577a8e1175bSopenharmony_ci                         (unsigned char *) T,
578a8e1175bSopenharmony_ci                         NULL,
579a8e1175bSopenharmony_ci                         AN_limbs * sizeof(mbedtls_mpi_uint));
580a8e1175bSopenharmony_ci}
581a8e1175bSopenharmony_ci
582a8e1175bSopenharmony_ciint mbedtls_mpi_core_get_mont_r2_unsafe(mbedtls_mpi *X,
583a8e1175bSopenharmony_ci                                        const mbedtls_mpi *N)
584a8e1175bSopenharmony_ci{
585a8e1175bSopenharmony_ci    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
586a8e1175bSopenharmony_ci
587a8e1175bSopenharmony_ci    MBEDTLS_MPI_CHK(mbedtls_mpi_lset(X, 1));
588a8e1175bSopenharmony_ci    MBEDTLS_MPI_CHK(mbedtls_mpi_shift_l(X, N->n * 2 * biL));
589a8e1175bSopenharmony_ci    MBEDTLS_MPI_CHK(mbedtls_mpi_mod_mpi(X, X, N));
590a8e1175bSopenharmony_ci    MBEDTLS_MPI_CHK(mbedtls_mpi_shrink(X, N->n));
591a8e1175bSopenharmony_ci
592a8e1175bSopenharmony_cicleanup:
593a8e1175bSopenharmony_ci    return ret;
594a8e1175bSopenharmony_ci}
595a8e1175bSopenharmony_ci
596a8e1175bSopenharmony_ciMBEDTLS_STATIC_TESTABLE
597a8e1175bSopenharmony_civoid mbedtls_mpi_core_ct_uint_table_lookup(mbedtls_mpi_uint *dest,
598a8e1175bSopenharmony_ci                                           const mbedtls_mpi_uint *table,
599a8e1175bSopenharmony_ci                                           size_t limbs,
600a8e1175bSopenharmony_ci                                           size_t count,
601a8e1175bSopenharmony_ci                                           size_t index)
602a8e1175bSopenharmony_ci{
603a8e1175bSopenharmony_ci    for (size_t i = 0; i < count; i++, table += limbs) {
604a8e1175bSopenharmony_ci        mbedtls_ct_condition_t assign = mbedtls_ct_uint_eq(i, index);
605a8e1175bSopenharmony_ci        mbedtls_mpi_core_cond_assign(dest, table, limbs, assign);
606a8e1175bSopenharmony_ci    }
607a8e1175bSopenharmony_ci}
608a8e1175bSopenharmony_ci
609a8e1175bSopenharmony_ci/* Fill X with n_bytes random bytes.
610a8e1175bSopenharmony_ci * X must already have room for those bytes.
611a8e1175bSopenharmony_ci * The ordering of the bytes returned from the RNG is suitable for
612a8e1175bSopenharmony_ci * deterministic ECDSA (see RFC 6979 §3.3 and the specification of
613a8e1175bSopenharmony_ci * mbedtls_mpi_core_random()).
614a8e1175bSopenharmony_ci */
615a8e1175bSopenharmony_ciint mbedtls_mpi_core_fill_random(
616a8e1175bSopenharmony_ci    mbedtls_mpi_uint *X, size_t X_limbs,
617a8e1175bSopenharmony_ci    size_t n_bytes,
618a8e1175bSopenharmony_ci    int (*f_rng)(void *, unsigned char *, size_t), void *p_rng)
619a8e1175bSopenharmony_ci{
620a8e1175bSopenharmony_ci    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
621a8e1175bSopenharmony_ci    const size_t limbs = CHARS_TO_LIMBS(n_bytes);
622a8e1175bSopenharmony_ci    const size_t overhead = (limbs * ciL) - n_bytes;
623a8e1175bSopenharmony_ci
624a8e1175bSopenharmony_ci    if (X_limbs < limbs) {
625a8e1175bSopenharmony_ci        return MBEDTLS_ERR_MPI_BAD_INPUT_DATA;
626a8e1175bSopenharmony_ci    }
627a8e1175bSopenharmony_ci
628a8e1175bSopenharmony_ci    memset(X, 0, overhead);
629a8e1175bSopenharmony_ci    memset((unsigned char *) X + limbs * ciL, 0, (X_limbs - limbs) * ciL);
630a8e1175bSopenharmony_ci    MBEDTLS_MPI_CHK(f_rng(p_rng, (unsigned char *) X + overhead, n_bytes));
631a8e1175bSopenharmony_ci    mbedtls_mpi_core_bigendian_to_host(X, limbs);
632a8e1175bSopenharmony_ci
633a8e1175bSopenharmony_cicleanup:
634a8e1175bSopenharmony_ci    return ret;
635a8e1175bSopenharmony_ci}
636a8e1175bSopenharmony_ci
637a8e1175bSopenharmony_ciint mbedtls_mpi_core_random(mbedtls_mpi_uint *X,
638a8e1175bSopenharmony_ci                            mbedtls_mpi_uint min,
639a8e1175bSopenharmony_ci                            const mbedtls_mpi_uint *N,
640a8e1175bSopenharmony_ci                            size_t limbs,
641a8e1175bSopenharmony_ci                            int (*f_rng)(void *, unsigned char *, size_t),
642a8e1175bSopenharmony_ci                            void *p_rng)
643a8e1175bSopenharmony_ci{
644a8e1175bSopenharmony_ci    mbedtls_ct_condition_t ge_lower = MBEDTLS_CT_TRUE, lt_upper = MBEDTLS_CT_FALSE;
645a8e1175bSopenharmony_ci    size_t n_bits = mbedtls_mpi_core_bitlen(N, limbs);
646a8e1175bSopenharmony_ci    size_t n_bytes = (n_bits + 7) / 8;
647a8e1175bSopenharmony_ci    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
648a8e1175bSopenharmony_ci
649a8e1175bSopenharmony_ci    /*
650a8e1175bSopenharmony_ci     * When min == 0, each try has at worst a probability 1/2 of failing
651a8e1175bSopenharmony_ci     * (the msb has a probability 1/2 of being 0, and then the result will
652a8e1175bSopenharmony_ci     * be < N), so after 30 tries failure probability is a most 2**(-30).
653a8e1175bSopenharmony_ci     *
654a8e1175bSopenharmony_ci     * When N is just below a power of 2, as is the case when generating
655a8e1175bSopenharmony_ci     * a random scalar on most elliptic curves, 1 try is enough with
656a8e1175bSopenharmony_ci     * overwhelming probability. When N is just above a power of 2,
657a8e1175bSopenharmony_ci     * as when generating a random scalar on secp224k1, each try has
658a8e1175bSopenharmony_ci     * a probability of failing that is almost 1/2.
659a8e1175bSopenharmony_ci     *
660a8e1175bSopenharmony_ci     * The probabilities are almost the same if min is nonzero but negligible
661a8e1175bSopenharmony_ci     * compared to N. This is always the case when N is crypto-sized, but
662a8e1175bSopenharmony_ci     * it's convenient to support small N for testing purposes. When N
663a8e1175bSopenharmony_ci     * is small, use a higher repeat count, otherwise the probability of
664a8e1175bSopenharmony_ci     * failure is macroscopic.
665a8e1175bSopenharmony_ci     */
666a8e1175bSopenharmony_ci    int count = (n_bytes > 4 ? 30 : 250);
667a8e1175bSopenharmony_ci
668a8e1175bSopenharmony_ci    /*
669a8e1175bSopenharmony_ci     * Match the procedure given in RFC 6979 §3.3 (deterministic ECDSA)
670a8e1175bSopenharmony_ci     * when f_rng is a suitably parametrized instance of HMAC_DRBG:
671a8e1175bSopenharmony_ci     * - use the same byte ordering;
672a8e1175bSopenharmony_ci     * - keep the leftmost n_bits bits of the generated octet string;
673a8e1175bSopenharmony_ci     * - try until result is in the desired range.
674a8e1175bSopenharmony_ci     * This also avoids any bias, which is especially important for ECDSA.
675a8e1175bSopenharmony_ci     */
676a8e1175bSopenharmony_ci    do {
677a8e1175bSopenharmony_ci        MBEDTLS_MPI_CHK(mbedtls_mpi_core_fill_random(X, limbs,
678a8e1175bSopenharmony_ci                                                     n_bytes,
679a8e1175bSopenharmony_ci                                                     f_rng, p_rng));
680a8e1175bSopenharmony_ci        mbedtls_mpi_core_shift_r(X, limbs, 8 * n_bytes - n_bits);
681a8e1175bSopenharmony_ci
682a8e1175bSopenharmony_ci        if (--count == 0) {
683a8e1175bSopenharmony_ci            ret = MBEDTLS_ERR_MPI_NOT_ACCEPTABLE;
684a8e1175bSopenharmony_ci            goto cleanup;
685a8e1175bSopenharmony_ci        }
686a8e1175bSopenharmony_ci
687a8e1175bSopenharmony_ci        ge_lower = mbedtls_mpi_core_uint_le_mpi(min, X, limbs);
688a8e1175bSopenharmony_ci        lt_upper = mbedtls_mpi_core_lt_ct(X, N, limbs);
689a8e1175bSopenharmony_ci    } while (mbedtls_ct_bool_and(ge_lower, lt_upper) == MBEDTLS_CT_FALSE);
690a8e1175bSopenharmony_ci
691a8e1175bSopenharmony_cicleanup:
692a8e1175bSopenharmony_ci    return ret;
693a8e1175bSopenharmony_ci}
694a8e1175bSopenharmony_ci
695a8e1175bSopenharmony_cistatic size_t exp_mod_get_window_size(size_t Ebits)
696a8e1175bSopenharmony_ci{
697a8e1175bSopenharmony_ci#if MBEDTLS_MPI_WINDOW_SIZE >= 6
698a8e1175bSopenharmony_ci    return (Ebits > 671) ? 6 : (Ebits > 239) ? 5 : (Ebits >  79) ? 4 : 1;
699a8e1175bSopenharmony_ci#elif MBEDTLS_MPI_WINDOW_SIZE == 5
700a8e1175bSopenharmony_ci    return (Ebits > 239) ? 5 : (Ebits >  79) ? 4 : 1;
701a8e1175bSopenharmony_ci#elif MBEDTLS_MPI_WINDOW_SIZE > 1
702a8e1175bSopenharmony_ci    return (Ebits >  79) ? MBEDTLS_MPI_WINDOW_SIZE : 1;
703a8e1175bSopenharmony_ci#else
704a8e1175bSopenharmony_ci    (void) Ebits;
705a8e1175bSopenharmony_ci    return 1;
706a8e1175bSopenharmony_ci#endif
707a8e1175bSopenharmony_ci}
708a8e1175bSopenharmony_ci
709a8e1175bSopenharmony_cisize_t mbedtls_mpi_core_exp_mod_working_limbs(size_t AN_limbs, size_t E_limbs)
710a8e1175bSopenharmony_ci{
711a8e1175bSopenharmony_ci    const size_t wsize = exp_mod_get_window_size(E_limbs * biL);
712a8e1175bSopenharmony_ci    const size_t welem = ((size_t) 1) << wsize;
713a8e1175bSopenharmony_ci
714a8e1175bSopenharmony_ci    /* How big does each part of the working memory pool need to be? */
715a8e1175bSopenharmony_ci    const size_t table_limbs   = welem * AN_limbs;
716a8e1175bSopenharmony_ci    const size_t select_limbs  = AN_limbs;
717a8e1175bSopenharmony_ci    const size_t temp_limbs    = 2 * AN_limbs + 1;
718a8e1175bSopenharmony_ci
719a8e1175bSopenharmony_ci    return table_limbs + select_limbs + temp_limbs;
720a8e1175bSopenharmony_ci}
721a8e1175bSopenharmony_ci
722a8e1175bSopenharmony_cistatic void exp_mod_precompute_window(const mbedtls_mpi_uint *A,
723a8e1175bSopenharmony_ci                                      const mbedtls_mpi_uint *N,
724a8e1175bSopenharmony_ci                                      size_t AN_limbs,
725a8e1175bSopenharmony_ci                                      mbedtls_mpi_uint mm,
726a8e1175bSopenharmony_ci                                      const mbedtls_mpi_uint *RR,
727a8e1175bSopenharmony_ci                                      size_t welem,
728a8e1175bSopenharmony_ci                                      mbedtls_mpi_uint *Wtable,
729a8e1175bSopenharmony_ci                                      mbedtls_mpi_uint *temp)
730a8e1175bSopenharmony_ci{
731a8e1175bSopenharmony_ci    /* W[0] = 1 (in Montgomery presentation) */
732a8e1175bSopenharmony_ci    memset(Wtable, 0, AN_limbs * ciL);
733a8e1175bSopenharmony_ci    Wtable[0] = 1;
734a8e1175bSopenharmony_ci    mbedtls_mpi_core_montmul(Wtable, Wtable, RR, AN_limbs, N, AN_limbs, mm, temp);
735a8e1175bSopenharmony_ci
736a8e1175bSopenharmony_ci    /* W[1] = A (already in Montgomery presentation) */
737a8e1175bSopenharmony_ci    mbedtls_mpi_uint *W1 = Wtable + AN_limbs;
738a8e1175bSopenharmony_ci    memcpy(W1, A, AN_limbs * ciL);
739a8e1175bSopenharmony_ci
740a8e1175bSopenharmony_ci    /* W[i+1] = W[i] * W[1], i >= 2 */
741a8e1175bSopenharmony_ci    mbedtls_mpi_uint *Wprev = W1;
742a8e1175bSopenharmony_ci    for (size_t i = 2; i < welem; i++) {
743a8e1175bSopenharmony_ci        mbedtls_mpi_uint *Wcur = Wprev + AN_limbs;
744a8e1175bSopenharmony_ci        mbedtls_mpi_core_montmul(Wcur, Wprev, W1, AN_limbs, N, AN_limbs, mm, temp);
745a8e1175bSopenharmony_ci        Wprev = Wcur;
746a8e1175bSopenharmony_ci    }
747a8e1175bSopenharmony_ci}
748a8e1175bSopenharmony_ci
749a8e1175bSopenharmony_ci/* Exponentiation: X := A^E mod N.
750a8e1175bSopenharmony_ci *
751a8e1175bSopenharmony_ci * A must already be in Montgomery form.
752a8e1175bSopenharmony_ci *
753a8e1175bSopenharmony_ci * As in other bignum functions, assume that AN_limbs and E_limbs are nonzero.
754a8e1175bSopenharmony_ci *
755a8e1175bSopenharmony_ci * RR must contain 2^{2*biL} mod N.
756a8e1175bSopenharmony_ci *
757a8e1175bSopenharmony_ci * The algorithm is a variant of Left-to-right k-ary exponentiation: HAC 14.82
758a8e1175bSopenharmony_ci * (The difference is that the body in our loop processes a single bit instead
759a8e1175bSopenharmony_ci * of a full window.)
760a8e1175bSopenharmony_ci */
761a8e1175bSopenharmony_civoid mbedtls_mpi_core_exp_mod(mbedtls_mpi_uint *X,
762a8e1175bSopenharmony_ci                              const mbedtls_mpi_uint *A,
763a8e1175bSopenharmony_ci                              const mbedtls_mpi_uint *N,
764a8e1175bSopenharmony_ci                              size_t AN_limbs,
765a8e1175bSopenharmony_ci                              const mbedtls_mpi_uint *E,
766a8e1175bSopenharmony_ci                              size_t E_limbs,
767a8e1175bSopenharmony_ci                              const mbedtls_mpi_uint *RR,
768a8e1175bSopenharmony_ci                              mbedtls_mpi_uint *T)
769a8e1175bSopenharmony_ci{
770a8e1175bSopenharmony_ci    const size_t wsize = exp_mod_get_window_size(E_limbs * biL);
771a8e1175bSopenharmony_ci    const size_t welem = ((size_t) 1) << wsize;
772a8e1175bSopenharmony_ci
773a8e1175bSopenharmony_ci    /* This is how we will use the temporary storage T, which must have space
774a8e1175bSopenharmony_ci     * for table_limbs, select_limbs and (2 * AN_limbs + 1) for montmul. */
775a8e1175bSopenharmony_ci    const size_t table_limbs  = welem * AN_limbs;
776a8e1175bSopenharmony_ci    const size_t select_limbs = AN_limbs;
777a8e1175bSopenharmony_ci
778a8e1175bSopenharmony_ci    /* Pointers to specific parts of the temporary working memory pool */
779a8e1175bSopenharmony_ci    mbedtls_mpi_uint *const Wtable  = T;
780a8e1175bSopenharmony_ci    mbedtls_mpi_uint *const Wselect = Wtable  +  table_limbs;
781a8e1175bSopenharmony_ci    mbedtls_mpi_uint *const temp    = Wselect + select_limbs;
782a8e1175bSopenharmony_ci
783a8e1175bSopenharmony_ci    /*
784a8e1175bSopenharmony_ci     * Window precomputation
785a8e1175bSopenharmony_ci     */
786a8e1175bSopenharmony_ci
787a8e1175bSopenharmony_ci    const mbedtls_mpi_uint mm = mbedtls_mpi_core_montmul_init(N);
788a8e1175bSopenharmony_ci
789a8e1175bSopenharmony_ci    /* Set Wtable[i] = A^(2^i) (in Montgomery representation) */
790a8e1175bSopenharmony_ci    exp_mod_precompute_window(A, N, AN_limbs,
791a8e1175bSopenharmony_ci                              mm, RR,
792a8e1175bSopenharmony_ci                              welem, Wtable, temp);
793a8e1175bSopenharmony_ci
794a8e1175bSopenharmony_ci    /*
795a8e1175bSopenharmony_ci     * Fixed window exponentiation
796a8e1175bSopenharmony_ci     */
797a8e1175bSopenharmony_ci
798a8e1175bSopenharmony_ci    /* X = 1 (in Montgomery presentation) initially */
799a8e1175bSopenharmony_ci    memcpy(X, Wtable, AN_limbs * ciL);
800a8e1175bSopenharmony_ci
801a8e1175bSopenharmony_ci    /* We'll process the bits of E from most significant
802a8e1175bSopenharmony_ci     * (limb_index=E_limbs-1, E_bit_index=biL-1) to least significant
803a8e1175bSopenharmony_ci     * (limb_index=0, E_bit_index=0). */
804a8e1175bSopenharmony_ci    size_t E_limb_index = E_limbs;
805a8e1175bSopenharmony_ci    size_t E_bit_index = 0;
806a8e1175bSopenharmony_ci    /* At any given time, window contains window_bits bits from E.
807a8e1175bSopenharmony_ci     * window_bits can go up to wsize. */
808a8e1175bSopenharmony_ci    size_t window_bits = 0;
809a8e1175bSopenharmony_ci    mbedtls_mpi_uint window = 0;
810a8e1175bSopenharmony_ci
811a8e1175bSopenharmony_ci    do {
812a8e1175bSopenharmony_ci        /* Square */
813a8e1175bSopenharmony_ci        mbedtls_mpi_core_montmul(X, X, X, AN_limbs, N, AN_limbs, mm, temp);
814a8e1175bSopenharmony_ci
815a8e1175bSopenharmony_ci        /* Move to the next bit of the exponent */
816a8e1175bSopenharmony_ci        if (E_bit_index == 0) {
817a8e1175bSopenharmony_ci            --E_limb_index;
818a8e1175bSopenharmony_ci            E_bit_index = biL - 1;
819a8e1175bSopenharmony_ci        } else {
820a8e1175bSopenharmony_ci            --E_bit_index;
821a8e1175bSopenharmony_ci        }
822a8e1175bSopenharmony_ci        /* Insert next exponent bit into window */
823a8e1175bSopenharmony_ci        ++window_bits;
824a8e1175bSopenharmony_ci        window <<= 1;
825a8e1175bSopenharmony_ci        window |= (E[E_limb_index] >> E_bit_index) & 1;
826a8e1175bSopenharmony_ci
827a8e1175bSopenharmony_ci        /* Clear window if it's full. Also clear the window at the end,
828a8e1175bSopenharmony_ci         * when we've finished processing the exponent. */
829a8e1175bSopenharmony_ci        if (window_bits == wsize ||
830a8e1175bSopenharmony_ci            (E_bit_index == 0 && E_limb_index == 0)) {
831a8e1175bSopenharmony_ci            /* Select Wtable[window] without leaking window through
832a8e1175bSopenharmony_ci             * memory access patterns. */
833a8e1175bSopenharmony_ci            mbedtls_mpi_core_ct_uint_table_lookup(Wselect, Wtable,
834a8e1175bSopenharmony_ci                                                  AN_limbs, welem, window);
835a8e1175bSopenharmony_ci            /* Multiply X by the selected element. */
836a8e1175bSopenharmony_ci            mbedtls_mpi_core_montmul(X, X, Wselect, AN_limbs, N, AN_limbs, mm,
837a8e1175bSopenharmony_ci                                     temp);
838a8e1175bSopenharmony_ci            window = 0;
839a8e1175bSopenharmony_ci            window_bits = 0;
840a8e1175bSopenharmony_ci        }
841a8e1175bSopenharmony_ci    } while (!(E_bit_index == 0 && E_limb_index == 0));
842a8e1175bSopenharmony_ci}
843a8e1175bSopenharmony_ci
844a8e1175bSopenharmony_cimbedtls_mpi_uint mbedtls_mpi_core_sub_int(mbedtls_mpi_uint *X,
845a8e1175bSopenharmony_ci                                          const mbedtls_mpi_uint *A,
846a8e1175bSopenharmony_ci                                          mbedtls_mpi_uint c,  /* doubles as carry */
847a8e1175bSopenharmony_ci                                          size_t limbs)
848a8e1175bSopenharmony_ci{
849a8e1175bSopenharmony_ci    for (size_t i = 0; i < limbs; i++) {
850a8e1175bSopenharmony_ci        mbedtls_mpi_uint s = A[i];
851a8e1175bSopenharmony_ci        mbedtls_mpi_uint t = s - c;
852a8e1175bSopenharmony_ci        c = (t > s);
853a8e1175bSopenharmony_ci        X[i] = t;
854a8e1175bSopenharmony_ci    }
855a8e1175bSopenharmony_ci
856a8e1175bSopenharmony_ci    return c;
857a8e1175bSopenharmony_ci}
858a8e1175bSopenharmony_ci
859a8e1175bSopenharmony_cimbedtls_ct_condition_t mbedtls_mpi_core_check_zero_ct(const mbedtls_mpi_uint *A,
860a8e1175bSopenharmony_ci                                                      size_t limbs)
861a8e1175bSopenharmony_ci{
862a8e1175bSopenharmony_ci    volatile const mbedtls_mpi_uint *force_read_A = A;
863a8e1175bSopenharmony_ci    mbedtls_mpi_uint bits = 0;
864a8e1175bSopenharmony_ci
865a8e1175bSopenharmony_ci    for (size_t i = 0; i < limbs; i++) {
866a8e1175bSopenharmony_ci        bits |= force_read_A[i];
867a8e1175bSopenharmony_ci    }
868a8e1175bSopenharmony_ci
869a8e1175bSopenharmony_ci    return mbedtls_ct_bool(bits);
870a8e1175bSopenharmony_ci}
871a8e1175bSopenharmony_ci
872a8e1175bSopenharmony_civoid mbedtls_mpi_core_to_mont_rep(mbedtls_mpi_uint *X,
873a8e1175bSopenharmony_ci                                  const mbedtls_mpi_uint *A,
874a8e1175bSopenharmony_ci                                  const mbedtls_mpi_uint *N,
875a8e1175bSopenharmony_ci                                  size_t AN_limbs,
876a8e1175bSopenharmony_ci                                  mbedtls_mpi_uint mm,
877a8e1175bSopenharmony_ci                                  const mbedtls_mpi_uint *rr,
878a8e1175bSopenharmony_ci                                  mbedtls_mpi_uint *T)
879a8e1175bSopenharmony_ci{
880a8e1175bSopenharmony_ci    mbedtls_mpi_core_montmul(X, A, rr, AN_limbs, N, AN_limbs, mm, T);
881a8e1175bSopenharmony_ci}
882a8e1175bSopenharmony_ci
883a8e1175bSopenharmony_civoid mbedtls_mpi_core_from_mont_rep(mbedtls_mpi_uint *X,
884a8e1175bSopenharmony_ci                                    const mbedtls_mpi_uint *A,
885a8e1175bSopenharmony_ci                                    const mbedtls_mpi_uint *N,
886a8e1175bSopenharmony_ci                                    size_t AN_limbs,
887a8e1175bSopenharmony_ci                                    mbedtls_mpi_uint mm,
888a8e1175bSopenharmony_ci                                    mbedtls_mpi_uint *T)
889a8e1175bSopenharmony_ci{
890a8e1175bSopenharmony_ci    const mbedtls_mpi_uint Rinv = 1;    /* 1/R in Mont. rep => 1 */
891a8e1175bSopenharmony_ci
892a8e1175bSopenharmony_ci    mbedtls_mpi_core_montmul(X, A, &Rinv, 1, N, AN_limbs, mm, T);
893a8e1175bSopenharmony_ci}
894a8e1175bSopenharmony_ci
895a8e1175bSopenharmony_ci#endif /* MBEDTLS_BIGNUM_C */
896