1e1051a39Sopenharmony_ci/* 2e1051a39Sopenharmony_ci * Copyright 2014-2022 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci * Copyright (c) 2014, Intel Corporation. All Rights Reserved. 4e1051a39Sopenharmony_ci * Copyright (c) 2015, CloudFlare, Inc. 5e1051a39Sopenharmony_ci * 6e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 7e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 8e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 9e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 10e1051a39Sopenharmony_ci * 11e1051a39Sopenharmony_ci * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1, 3) 12e1051a39Sopenharmony_ci * (1) Intel Corporation, Israel Development Center, Haifa, Israel 13e1051a39Sopenharmony_ci * (2) University of Haifa, Israel 14e1051a39Sopenharmony_ci * (3) CloudFlare, Inc. 15e1051a39Sopenharmony_ci * 16e1051a39Sopenharmony_ci * Reference: 17e1051a39Sopenharmony_ci * S.Gueron and V.Krasnov, "Fast Prime Field Elliptic Curve Cryptography with 18e1051a39Sopenharmony_ci * 256 Bit Primes" 19e1051a39Sopenharmony_ci */ 20e1051a39Sopenharmony_ci 21e1051a39Sopenharmony_ci/* 22e1051a39Sopenharmony_ci * ECDSA low level APIs are deprecated for public use, but still ok for 23e1051a39Sopenharmony_ci * internal use. 24e1051a39Sopenharmony_ci */ 25e1051a39Sopenharmony_ci#include "internal/deprecated.h" 26e1051a39Sopenharmony_ci 27e1051a39Sopenharmony_ci#include <string.h> 28e1051a39Sopenharmony_ci 29e1051a39Sopenharmony_ci#include "internal/cryptlib.h" 30e1051a39Sopenharmony_ci#include "crypto/bn.h" 31e1051a39Sopenharmony_ci#include "ec_local.h" 32e1051a39Sopenharmony_ci#include "internal/refcount.h" 33e1051a39Sopenharmony_ci 34e1051a39Sopenharmony_ci#if BN_BITS2 != 64 35e1051a39Sopenharmony_ci# define TOBN(hi,lo) lo,hi 36e1051a39Sopenharmony_ci#else 37e1051a39Sopenharmony_ci# define TOBN(hi,lo) ((BN_ULONG)hi<<32|lo) 38e1051a39Sopenharmony_ci#endif 39e1051a39Sopenharmony_ci 40e1051a39Sopenharmony_ci#if defined(__GNUC__) 41e1051a39Sopenharmony_ci# define ALIGN32 __attribute((aligned(32))) 42e1051a39Sopenharmony_ci#elif defined(_MSC_VER) 43e1051a39Sopenharmony_ci# define ALIGN32 __declspec(align(32)) 44e1051a39Sopenharmony_ci#else 45e1051a39Sopenharmony_ci# define ALIGN32 46e1051a39Sopenharmony_ci#endif 47e1051a39Sopenharmony_ci 48e1051a39Sopenharmony_ci#define ALIGNPTR(p,N) ((unsigned char *)p+N-(size_t)p%N) 49e1051a39Sopenharmony_ci#define P256_LIMBS (256/BN_BITS2) 50e1051a39Sopenharmony_ci 51e1051a39Sopenharmony_citypedef unsigned short u16; 52e1051a39Sopenharmony_ci 53e1051a39Sopenharmony_citypedef struct { 54e1051a39Sopenharmony_ci BN_ULONG X[P256_LIMBS]; 55e1051a39Sopenharmony_ci BN_ULONG Y[P256_LIMBS]; 56e1051a39Sopenharmony_ci BN_ULONG Z[P256_LIMBS]; 57e1051a39Sopenharmony_ci} P256_POINT; 58e1051a39Sopenharmony_ci 59e1051a39Sopenharmony_citypedef struct { 60e1051a39Sopenharmony_ci BN_ULONG X[P256_LIMBS]; 61e1051a39Sopenharmony_ci BN_ULONG Y[P256_LIMBS]; 62e1051a39Sopenharmony_ci} P256_POINT_AFFINE; 63e1051a39Sopenharmony_ci 64e1051a39Sopenharmony_citypedef P256_POINT_AFFINE PRECOMP256_ROW[64]; 65e1051a39Sopenharmony_ci 66e1051a39Sopenharmony_ci/* structure for precomputed multiples of the generator */ 67e1051a39Sopenharmony_cistruct nistz256_pre_comp_st { 68e1051a39Sopenharmony_ci const EC_GROUP *group; /* Parent EC_GROUP object */ 69e1051a39Sopenharmony_ci size_t w; /* Window size */ 70e1051a39Sopenharmony_ci /* 71e1051a39Sopenharmony_ci * Constant time access to the X and Y coordinates of the pre-computed, 72e1051a39Sopenharmony_ci * generator multiplies, in the Montgomery domain. Pre-calculated 73e1051a39Sopenharmony_ci * multiplies are stored in affine form. 74e1051a39Sopenharmony_ci */ 75e1051a39Sopenharmony_ci PRECOMP256_ROW *precomp; 76e1051a39Sopenharmony_ci void *precomp_storage; 77e1051a39Sopenharmony_ci CRYPTO_REF_COUNT references; 78e1051a39Sopenharmony_ci CRYPTO_RWLOCK *lock; 79e1051a39Sopenharmony_ci}; 80e1051a39Sopenharmony_ci 81e1051a39Sopenharmony_ci/* Functions implemented in assembly */ 82e1051a39Sopenharmony_ci/* 83e1051a39Sopenharmony_ci * Most of below mentioned functions *preserve* the property of inputs 84e1051a39Sopenharmony_ci * being fully reduced, i.e. being in [0, modulus) range. Simply put if 85e1051a39Sopenharmony_ci * inputs are fully reduced, then output is too. Note that reverse is 86e1051a39Sopenharmony_ci * not true, in sense that given partially reduced inputs output can be 87e1051a39Sopenharmony_ci * either, not unlikely reduced. And "most" in first sentence refers to 88e1051a39Sopenharmony_ci * the fact that given the calculations flow one can tolerate that 89e1051a39Sopenharmony_ci * addition, 1st function below, produces partially reduced result *if* 90e1051a39Sopenharmony_ci * multiplications by 2 and 3, which customarily use addition, fully 91e1051a39Sopenharmony_ci * reduce it. This effectively gives two options: a) addition produces 92e1051a39Sopenharmony_ci * fully reduced result [as long as inputs are, just like remaining 93e1051a39Sopenharmony_ci * functions]; b) addition is allowed to produce partially reduced 94e1051a39Sopenharmony_ci * result, but multiplications by 2 and 3 perform additional reduction 95e1051a39Sopenharmony_ci * step. Choice between the two can be platform-specific, but it was a) 96e1051a39Sopenharmony_ci * in all cases so far... 97e1051a39Sopenharmony_ci */ 98e1051a39Sopenharmony_ci/* Modular add: res = a+b mod P */ 99e1051a39Sopenharmony_civoid ecp_nistz256_add(BN_ULONG res[P256_LIMBS], 100e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS], 101e1051a39Sopenharmony_ci const BN_ULONG b[P256_LIMBS]); 102e1051a39Sopenharmony_ci/* Modular mul by 2: res = 2*a mod P */ 103e1051a39Sopenharmony_civoid ecp_nistz256_mul_by_2(BN_ULONG res[P256_LIMBS], 104e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS]); 105e1051a39Sopenharmony_ci/* Modular mul by 3: res = 3*a mod P */ 106e1051a39Sopenharmony_civoid ecp_nistz256_mul_by_3(BN_ULONG res[P256_LIMBS], 107e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS]); 108e1051a39Sopenharmony_ci 109e1051a39Sopenharmony_ci/* Modular div by 2: res = a/2 mod P */ 110e1051a39Sopenharmony_civoid ecp_nistz256_div_by_2(BN_ULONG res[P256_LIMBS], 111e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS]); 112e1051a39Sopenharmony_ci/* Modular sub: res = a-b mod P */ 113e1051a39Sopenharmony_civoid ecp_nistz256_sub(BN_ULONG res[P256_LIMBS], 114e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS], 115e1051a39Sopenharmony_ci const BN_ULONG b[P256_LIMBS]); 116e1051a39Sopenharmony_ci/* Modular neg: res = -a mod P */ 117e1051a39Sopenharmony_civoid ecp_nistz256_neg(BN_ULONG res[P256_LIMBS], const BN_ULONG a[P256_LIMBS]); 118e1051a39Sopenharmony_ci/* Montgomery mul: res = a*b*2^-256 mod P */ 119e1051a39Sopenharmony_civoid ecp_nistz256_mul_mont(BN_ULONG res[P256_LIMBS], 120e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS], 121e1051a39Sopenharmony_ci const BN_ULONG b[P256_LIMBS]); 122e1051a39Sopenharmony_ci/* Montgomery sqr: res = a*a*2^-256 mod P */ 123e1051a39Sopenharmony_civoid ecp_nistz256_sqr_mont(BN_ULONG res[P256_LIMBS], 124e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS]); 125e1051a39Sopenharmony_ci/* Convert a number from Montgomery domain, by multiplying with 1 */ 126e1051a39Sopenharmony_civoid ecp_nistz256_from_mont(BN_ULONG res[P256_LIMBS], 127e1051a39Sopenharmony_ci const BN_ULONG in[P256_LIMBS]); 128e1051a39Sopenharmony_ci/* Convert a number to Montgomery domain, by multiplying with 2^512 mod P*/ 129e1051a39Sopenharmony_civoid ecp_nistz256_to_mont(BN_ULONG res[P256_LIMBS], 130e1051a39Sopenharmony_ci const BN_ULONG in[P256_LIMBS]); 131e1051a39Sopenharmony_ci/* Functions that perform constant time access to the precomputed tables */ 132e1051a39Sopenharmony_civoid ecp_nistz256_scatter_w5(P256_POINT *val, 133e1051a39Sopenharmony_ci const P256_POINT *in_t, int idx); 134e1051a39Sopenharmony_civoid ecp_nistz256_gather_w5(P256_POINT *val, 135e1051a39Sopenharmony_ci const P256_POINT *in_t, int idx); 136e1051a39Sopenharmony_civoid ecp_nistz256_scatter_w7(P256_POINT_AFFINE *val, 137e1051a39Sopenharmony_ci const P256_POINT_AFFINE *in_t, int idx); 138e1051a39Sopenharmony_civoid ecp_nistz256_gather_w7(P256_POINT_AFFINE *val, 139e1051a39Sopenharmony_ci const P256_POINT_AFFINE *in_t, int idx); 140e1051a39Sopenharmony_ci 141e1051a39Sopenharmony_ci/* One converted into the Montgomery domain */ 142e1051a39Sopenharmony_cistatic const BN_ULONG ONE[P256_LIMBS] = { 143e1051a39Sopenharmony_ci TOBN(0x00000000, 0x00000001), TOBN(0xffffffff, 0x00000000), 144e1051a39Sopenharmony_ci TOBN(0xffffffff, 0xffffffff), TOBN(0x00000000, 0xfffffffe) 145e1051a39Sopenharmony_ci}; 146e1051a39Sopenharmony_ci 147e1051a39Sopenharmony_cistatic NISTZ256_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group); 148e1051a39Sopenharmony_ci 149e1051a39Sopenharmony_ci/* Precomputed tables for the default generator */ 150e1051a39Sopenharmony_ciextern const PRECOMP256_ROW ecp_nistz256_precomputed[37]; 151e1051a39Sopenharmony_ci 152e1051a39Sopenharmony_ci/* Recode window to a signed digit, see ecp_nistputil.c for details */ 153e1051a39Sopenharmony_cistatic unsigned int _booth_recode_w5(unsigned int in) 154e1051a39Sopenharmony_ci{ 155e1051a39Sopenharmony_ci unsigned int s, d; 156e1051a39Sopenharmony_ci 157e1051a39Sopenharmony_ci s = ~((in >> 5) - 1); 158e1051a39Sopenharmony_ci d = (1 << 6) - in - 1; 159e1051a39Sopenharmony_ci d = (d & s) | (in & ~s); 160e1051a39Sopenharmony_ci d = (d >> 1) + (d & 1); 161e1051a39Sopenharmony_ci 162e1051a39Sopenharmony_ci return (d << 1) + (s & 1); 163e1051a39Sopenharmony_ci} 164e1051a39Sopenharmony_ci 165e1051a39Sopenharmony_cistatic unsigned int _booth_recode_w7(unsigned int in) 166e1051a39Sopenharmony_ci{ 167e1051a39Sopenharmony_ci unsigned int s, d; 168e1051a39Sopenharmony_ci 169e1051a39Sopenharmony_ci s = ~((in >> 7) - 1); 170e1051a39Sopenharmony_ci d = (1 << 8) - in - 1; 171e1051a39Sopenharmony_ci d = (d & s) | (in & ~s); 172e1051a39Sopenharmony_ci d = (d >> 1) + (d & 1); 173e1051a39Sopenharmony_ci 174e1051a39Sopenharmony_ci return (d << 1) + (s & 1); 175e1051a39Sopenharmony_ci} 176e1051a39Sopenharmony_ci 177e1051a39Sopenharmony_cistatic void copy_conditional(BN_ULONG dst[P256_LIMBS], 178e1051a39Sopenharmony_ci const BN_ULONG src[P256_LIMBS], BN_ULONG move) 179e1051a39Sopenharmony_ci{ 180e1051a39Sopenharmony_ci BN_ULONG mask1 = 0-move; 181e1051a39Sopenharmony_ci BN_ULONG mask2 = ~mask1; 182e1051a39Sopenharmony_ci 183e1051a39Sopenharmony_ci dst[0] = (src[0] & mask1) ^ (dst[0] & mask2); 184e1051a39Sopenharmony_ci dst[1] = (src[1] & mask1) ^ (dst[1] & mask2); 185e1051a39Sopenharmony_ci dst[2] = (src[2] & mask1) ^ (dst[2] & mask2); 186e1051a39Sopenharmony_ci dst[3] = (src[3] & mask1) ^ (dst[3] & mask2); 187e1051a39Sopenharmony_ci if (P256_LIMBS == 8) { 188e1051a39Sopenharmony_ci dst[4] = (src[4] & mask1) ^ (dst[4] & mask2); 189e1051a39Sopenharmony_ci dst[5] = (src[5] & mask1) ^ (dst[5] & mask2); 190e1051a39Sopenharmony_ci dst[6] = (src[6] & mask1) ^ (dst[6] & mask2); 191e1051a39Sopenharmony_ci dst[7] = (src[7] & mask1) ^ (dst[7] & mask2); 192e1051a39Sopenharmony_ci } 193e1051a39Sopenharmony_ci} 194e1051a39Sopenharmony_ci 195e1051a39Sopenharmony_cistatic BN_ULONG is_zero(BN_ULONG in) 196e1051a39Sopenharmony_ci{ 197e1051a39Sopenharmony_ci in |= (0 - in); 198e1051a39Sopenharmony_ci in = ~in; 199e1051a39Sopenharmony_ci in >>= BN_BITS2 - 1; 200e1051a39Sopenharmony_ci return in; 201e1051a39Sopenharmony_ci} 202e1051a39Sopenharmony_ci 203e1051a39Sopenharmony_cistatic BN_ULONG is_equal(const BN_ULONG a[P256_LIMBS], 204e1051a39Sopenharmony_ci const BN_ULONG b[P256_LIMBS]) 205e1051a39Sopenharmony_ci{ 206e1051a39Sopenharmony_ci BN_ULONG res; 207e1051a39Sopenharmony_ci 208e1051a39Sopenharmony_ci res = a[0] ^ b[0]; 209e1051a39Sopenharmony_ci res |= a[1] ^ b[1]; 210e1051a39Sopenharmony_ci res |= a[2] ^ b[2]; 211e1051a39Sopenharmony_ci res |= a[3] ^ b[3]; 212e1051a39Sopenharmony_ci if (P256_LIMBS == 8) { 213e1051a39Sopenharmony_ci res |= a[4] ^ b[4]; 214e1051a39Sopenharmony_ci res |= a[5] ^ b[5]; 215e1051a39Sopenharmony_ci res |= a[6] ^ b[6]; 216e1051a39Sopenharmony_ci res |= a[7] ^ b[7]; 217e1051a39Sopenharmony_ci } 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ci return is_zero(res); 220e1051a39Sopenharmony_ci} 221e1051a39Sopenharmony_ci 222e1051a39Sopenharmony_cistatic BN_ULONG is_one(const BIGNUM *z) 223e1051a39Sopenharmony_ci{ 224e1051a39Sopenharmony_ci BN_ULONG res = 0; 225e1051a39Sopenharmony_ci BN_ULONG *a = bn_get_words(z); 226e1051a39Sopenharmony_ci 227e1051a39Sopenharmony_ci if (bn_get_top(z) == (P256_LIMBS - P256_LIMBS / 8)) { 228e1051a39Sopenharmony_ci res = a[0] ^ ONE[0]; 229e1051a39Sopenharmony_ci res |= a[1] ^ ONE[1]; 230e1051a39Sopenharmony_ci res |= a[2] ^ ONE[2]; 231e1051a39Sopenharmony_ci res |= a[3] ^ ONE[3]; 232e1051a39Sopenharmony_ci if (P256_LIMBS == 8) { 233e1051a39Sopenharmony_ci res |= a[4] ^ ONE[4]; 234e1051a39Sopenharmony_ci res |= a[5] ^ ONE[5]; 235e1051a39Sopenharmony_ci res |= a[6] ^ ONE[6]; 236e1051a39Sopenharmony_ci /* 237e1051a39Sopenharmony_ci * no check for a[7] (being zero) on 32-bit platforms, 238e1051a39Sopenharmony_ci * because value of "one" takes only 7 limbs. 239e1051a39Sopenharmony_ci */ 240e1051a39Sopenharmony_ci } 241e1051a39Sopenharmony_ci res = is_zero(res); 242e1051a39Sopenharmony_ci } 243e1051a39Sopenharmony_ci 244e1051a39Sopenharmony_ci return res; 245e1051a39Sopenharmony_ci} 246e1051a39Sopenharmony_ci 247e1051a39Sopenharmony_ci/* 248e1051a39Sopenharmony_ci * For reference, this macro is used only when new ecp_nistz256 assembly 249e1051a39Sopenharmony_ci * module is being developed. For example, configure with 250e1051a39Sopenharmony_ci * -DECP_NISTZ256_REFERENCE_IMPLEMENTATION and implement only functions 251e1051a39Sopenharmony_ci * performing simplest arithmetic operations on 256-bit vectors. Then 252e1051a39Sopenharmony_ci * work on implementation of higher-level functions performing point 253e1051a39Sopenharmony_ci * operations. Then remove ECP_NISTZ256_REFERENCE_IMPLEMENTATION 254e1051a39Sopenharmony_ci * and never define it again. (The correct macro denoting presence of 255e1051a39Sopenharmony_ci * ecp_nistz256 module is ECP_NISTZ256_ASM.) 256e1051a39Sopenharmony_ci */ 257e1051a39Sopenharmony_ci#ifndef ECP_NISTZ256_REFERENCE_IMPLEMENTATION 258e1051a39Sopenharmony_civoid ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a); 259e1051a39Sopenharmony_civoid ecp_nistz256_point_add(P256_POINT *r, 260e1051a39Sopenharmony_ci const P256_POINT *a, const P256_POINT *b); 261e1051a39Sopenharmony_civoid ecp_nistz256_point_add_affine(P256_POINT *r, 262e1051a39Sopenharmony_ci const P256_POINT *a, 263e1051a39Sopenharmony_ci const P256_POINT_AFFINE *b); 264e1051a39Sopenharmony_ci#else 265e1051a39Sopenharmony_ci/* Point double: r = 2*a */ 266e1051a39Sopenharmony_cistatic void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a) 267e1051a39Sopenharmony_ci{ 268e1051a39Sopenharmony_ci BN_ULONG S[P256_LIMBS]; 269e1051a39Sopenharmony_ci BN_ULONG M[P256_LIMBS]; 270e1051a39Sopenharmony_ci BN_ULONG Zsqr[P256_LIMBS]; 271e1051a39Sopenharmony_ci BN_ULONG tmp0[P256_LIMBS]; 272e1051a39Sopenharmony_ci 273e1051a39Sopenharmony_ci const BN_ULONG *in_x = a->X; 274e1051a39Sopenharmony_ci const BN_ULONG *in_y = a->Y; 275e1051a39Sopenharmony_ci const BN_ULONG *in_z = a->Z; 276e1051a39Sopenharmony_ci 277e1051a39Sopenharmony_ci BN_ULONG *res_x = r->X; 278e1051a39Sopenharmony_ci BN_ULONG *res_y = r->Y; 279e1051a39Sopenharmony_ci BN_ULONG *res_z = r->Z; 280e1051a39Sopenharmony_ci 281e1051a39Sopenharmony_ci ecp_nistz256_mul_by_2(S, in_y); 282e1051a39Sopenharmony_ci 283e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(Zsqr, in_z); 284e1051a39Sopenharmony_ci 285e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(S, S); 286e1051a39Sopenharmony_ci 287e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res_z, in_z, in_y); 288e1051a39Sopenharmony_ci ecp_nistz256_mul_by_2(res_z, res_z); 289e1051a39Sopenharmony_ci 290e1051a39Sopenharmony_ci ecp_nistz256_add(M, in_x, Zsqr); 291e1051a39Sopenharmony_ci ecp_nistz256_sub(Zsqr, in_x, Zsqr); 292e1051a39Sopenharmony_ci 293e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res_y, S); 294e1051a39Sopenharmony_ci ecp_nistz256_div_by_2(res_y, res_y); 295e1051a39Sopenharmony_ci 296e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(M, M, Zsqr); 297e1051a39Sopenharmony_ci ecp_nistz256_mul_by_3(M, M); 298e1051a39Sopenharmony_ci 299e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S, S, in_x); 300e1051a39Sopenharmony_ci ecp_nistz256_mul_by_2(tmp0, S); 301e1051a39Sopenharmony_ci 302e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res_x, M); 303e1051a39Sopenharmony_ci 304e1051a39Sopenharmony_ci ecp_nistz256_sub(res_x, res_x, tmp0); 305e1051a39Sopenharmony_ci ecp_nistz256_sub(S, S, res_x); 306e1051a39Sopenharmony_ci 307e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S, S, M); 308e1051a39Sopenharmony_ci ecp_nistz256_sub(res_y, S, res_y); 309e1051a39Sopenharmony_ci} 310e1051a39Sopenharmony_ci 311e1051a39Sopenharmony_ci/* Point addition: r = a+b */ 312e1051a39Sopenharmony_cistatic void ecp_nistz256_point_add(P256_POINT *r, 313e1051a39Sopenharmony_ci const P256_POINT *a, const P256_POINT *b) 314e1051a39Sopenharmony_ci{ 315e1051a39Sopenharmony_ci BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS]; 316e1051a39Sopenharmony_ci BN_ULONG U1[P256_LIMBS], S1[P256_LIMBS]; 317e1051a39Sopenharmony_ci BN_ULONG Z1sqr[P256_LIMBS]; 318e1051a39Sopenharmony_ci BN_ULONG Z2sqr[P256_LIMBS]; 319e1051a39Sopenharmony_ci BN_ULONG H[P256_LIMBS], R[P256_LIMBS]; 320e1051a39Sopenharmony_ci BN_ULONG Hsqr[P256_LIMBS]; 321e1051a39Sopenharmony_ci BN_ULONG Rsqr[P256_LIMBS]; 322e1051a39Sopenharmony_ci BN_ULONG Hcub[P256_LIMBS]; 323e1051a39Sopenharmony_ci 324e1051a39Sopenharmony_ci BN_ULONG res_x[P256_LIMBS]; 325e1051a39Sopenharmony_ci BN_ULONG res_y[P256_LIMBS]; 326e1051a39Sopenharmony_ci BN_ULONG res_z[P256_LIMBS]; 327e1051a39Sopenharmony_ci 328e1051a39Sopenharmony_ci BN_ULONG in1infty, in2infty; 329e1051a39Sopenharmony_ci 330e1051a39Sopenharmony_ci const BN_ULONG *in1_x = a->X; 331e1051a39Sopenharmony_ci const BN_ULONG *in1_y = a->Y; 332e1051a39Sopenharmony_ci const BN_ULONG *in1_z = a->Z; 333e1051a39Sopenharmony_ci 334e1051a39Sopenharmony_ci const BN_ULONG *in2_x = b->X; 335e1051a39Sopenharmony_ci const BN_ULONG *in2_y = b->Y; 336e1051a39Sopenharmony_ci const BN_ULONG *in2_z = b->Z; 337e1051a39Sopenharmony_ci 338e1051a39Sopenharmony_ci /* 339e1051a39Sopenharmony_ci * Infinity in encoded as (,,0) 340e1051a39Sopenharmony_ci */ 341e1051a39Sopenharmony_ci in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]); 342e1051a39Sopenharmony_ci if (P256_LIMBS == 8) 343e1051a39Sopenharmony_ci in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]); 344e1051a39Sopenharmony_ci 345e1051a39Sopenharmony_ci in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]); 346e1051a39Sopenharmony_ci if (P256_LIMBS == 8) 347e1051a39Sopenharmony_ci in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]); 348e1051a39Sopenharmony_ci 349e1051a39Sopenharmony_ci in1infty = is_zero(in1infty); 350e1051a39Sopenharmony_ci in2infty = is_zero(in2infty); 351e1051a39Sopenharmony_ci 352e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(Z2sqr, in2_z); /* Z2^2 */ 353e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */ 354e1051a39Sopenharmony_ci 355e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S1, Z2sqr, in2_z); /* S1 = Z2^3 */ 356e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */ 357e1051a39Sopenharmony_ci 358e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S1, S1, in1_y); /* S1 = Y1*Z2^3 */ 359e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */ 360e1051a39Sopenharmony_ci ecp_nistz256_sub(R, S2, S1); /* R = S2 - S1 */ 361e1051a39Sopenharmony_ci 362e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(U1, in1_x, Z2sqr); /* U1 = X1*Z2^2 */ 363e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */ 364e1051a39Sopenharmony_ci ecp_nistz256_sub(H, U2, U1); /* H = U2 - U1 */ 365e1051a39Sopenharmony_ci 366e1051a39Sopenharmony_ci /* 367e1051a39Sopenharmony_ci * The formulae are incorrect if the points are equal so we check for 368e1051a39Sopenharmony_ci * this and do doubling if this happens. 369e1051a39Sopenharmony_ci * 370e1051a39Sopenharmony_ci * Points here are in Jacobian projective coordinates (Xi, Yi, Zi) 371e1051a39Sopenharmony_ci * that are bound to the affine coordinates (xi, yi) by the following 372e1051a39Sopenharmony_ci * equations: 373e1051a39Sopenharmony_ci * - xi = Xi / (Zi)^2 374e1051a39Sopenharmony_ci * - y1 = Yi / (Zi)^3 375e1051a39Sopenharmony_ci * 376e1051a39Sopenharmony_ci * For the sake of optimization, the algorithm operates over 377e1051a39Sopenharmony_ci * intermediate variables U1, U2 and S1, S2 that are derived from 378e1051a39Sopenharmony_ci * the projective coordinates: 379e1051a39Sopenharmony_ci * - U1 = X1 * (Z2)^2 ; U2 = X2 * (Z1)^2 380e1051a39Sopenharmony_ci * - S1 = Y1 * (Z2)^3 ; S2 = Y2 * (Z1)^3 381e1051a39Sopenharmony_ci * 382e1051a39Sopenharmony_ci * It is easy to prove that is_equal(U1, U2) implies that the affine 383e1051a39Sopenharmony_ci * x-coordinates are equal, or either point is at infinity. 384e1051a39Sopenharmony_ci * Likewise is_equal(S1, S2) implies that the affine y-coordinates are 385e1051a39Sopenharmony_ci * equal, or either point is at infinity. 386e1051a39Sopenharmony_ci * 387e1051a39Sopenharmony_ci * The special case of either point being the point at infinity (Z1 or Z2 388e1051a39Sopenharmony_ci * is zero), is handled separately later on in this function, so we avoid 389e1051a39Sopenharmony_ci * jumping to point_double here in those special cases. 390e1051a39Sopenharmony_ci * 391e1051a39Sopenharmony_ci * When both points are inverse of each other, we know that the affine 392e1051a39Sopenharmony_ci * x-coordinates are equal, and the y-coordinates have different sign. 393e1051a39Sopenharmony_ci * Therefore since U1 = U2, we know H = 0, and therefore Z3 = H*Z1*Z2 394e1051a39Sopenharmony_ci * will equal 0, thus the result is infinity, if we simply let this 395e1051a39Sopenharmony_ci * function continue normally. 396e1051a39Sopenharmony_ci * 397e1051a39Sopenharmony_ci * We use bitwise operations to avoid potential side-channels introduced by 398e1051a39Sopenharmony_ci * the short-circuiting behaviour of boolean operators. 399e1051a39Sopenharmony_ci */ 400e1051a39Sopenharmony_ci if (is_equal(U1, U2) & ~in1infty & ~in2infty & is_equal(S1, S2)) { 401e1051a39Sopenharmony_ci /* 402e1051a39Sopenharmony_ci * This is obviously not constant-time but it should never happen during 403e1051a39Sopenharmony_ci * single point multiplication, so there is no timing leak for ECDH or 404e1051a39Sopenharmony_ci * ECDSA signing. 405e1051a39Sopenharmony_ci */ 406e1051a39Sopenharmony_ci ecp_nistz256_point_double(r, a); 407e1051a39Sopenharmony_ci return; 408e1051a39Sopenharmony_ci } 409e1051a39Sopenharmony_ci 410e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */ 411e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */ 412e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */ 413e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res_z, res_z, in2_z); /* Z3 = H*Z1*Z2 */ 414e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */ 415e1051a39Sopenharmony_ci 416e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(U2, U1, Hsqr); /* U1*H^2 */ 417e1051a39Sopenharmony_ci ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */ 418e1051a39Sopenharmony_ci 419e1051a39Sopenharmony_ci ecp_nistz256_sub(res_x, Rsqr, Hsqr); 420e1051a39Sopenharmony_ci ecp_nistz256_sub(res_x, res_x, Hcub); 421e1051a39Sopenharmony_ci 422e1051a39Sopenharmony_ci ecp_nistz256_sub(res_y, U2, res_x); 423e1051a39Sopenharmony_ci 424e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S2, S1, Hcub); 425e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res_y, R, res_y); 426e1051a39Sopenharmony_ci ecp_nistz256_sub(res_y, res_y, S2); 427e1051a39Sopenharmony_ci 428e1051a39Sopenharmony_ci copy_conditional(res_x, in2_x, in1infty); 429e1051a39Sopenharmony_ci copy_conditional(res_y, in2_y, in1infty); 430e1051a39Sopenharmony_ci copy_conditional(res_z, in2_z, in1infty); 431e1051a39Sopenharmony_ci 432e1051a39Sopenharmony_ci copy_conditional(res_x, in1_x, in2infty); 433e1051a39Sopenharmony_ci copy_conditional(res_y, in1_y, in2infty); 434e1051a39Sopenharmony_ci copy_conditional(res_z, in1_z, in2infty); 435e1051a39Sopenharmony_ci 436e1051a39Sopenharmony_ci memcpy(r->X, res_x, sizeof(res_x)); 437e1051a39Sopenharmony_ci memcpy(r->Y, res_y, sizeof(res_y)); 438e1051a39Sopenharmony_ci memcpy(r->Z, res_z, sizeof(res_z)); 439e1051a39Sopenharmony_ci} 440e1051a39Sopenharmony_ci 441e1051a39Sopenharmony_ci/* Point addition when b is known to be affine: r = a+b */ 442e1051a39Sopenharmony_cistatic void ecp_nistz256_point_add_affine(P256_POINT *r, 443e1051a39Sopenharmony_ci const P256_POINT *a, 444e1051a39Sopenharmony_ci const P256_POINT_AFFINE *b) 445e1051a39Sopenharmony_ci{ 446e1051a39Sopenharmony_ci BN_ULONG U2[P256_LIMBS], S2[P256_LIMBS]; 447e1051a39Sopenharmony_ci BN_ULONG Z1sqr[P256_LIMBS]; 448e1051a39Sopenharmony_ci BN_ULONG H[P256_LIMBS], R[P256_LIMBS]; 449e1051a39Sopenharmony_ci BN_ULONG Hsqr[P256_LIMBS]; 450e1051a39Sopenharmony_ci BN_ULONG Rsqr[P256_LIMBS]; 451e1051a39Sopenharmony_ci BN_ULONG Hcub[P256_LIMBS]; 452e1051a39Sopenharmony_ci 453e1051a39Sopenharmony_ci BN_ULONG res_x[P256_LIMBS]; 454e1051a39Sopenharmony_ci BN_ULONG res_y[P256_LIMBS]; 455e1051a39Sopenharmony_ci BN_ULONG res_z[P256_LIMBS]; 456e1051a39Sopenharmony_ci 457e1051a39Sopenharmony_ci BN_ULONG in1infty, in2infty; 458e1051a39Sopenharmony_ci 459e1051a39Sopenharmony_ci const BN_ULONG *in1_x = a->X; 460e1051a39Sopenharmony_ci const BN_ULONG *in1_y = a->Y; 461e1051a39Sopenharmony_ci const BN_ULONG *in1_z = a->Z; 462e1051a39Sopenharmony_ci 463e1051a39Sopenharmony_ci const BN_ULONG *in2_x = b->X; 464e1051a39Sopenharmony_ci const BN_ULONG *in2_y = b->Y; 465e1051a39Sopenharmony_ci 466e1051a39Sopenharmony_ci /* 467e1051a39Sopenharmony_ci * Infinity in encoded as (,,0) 468e1051a39Sopenharmony_ci */ 469e1051a39Sopenharmony_ci in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]); 470e1051a39Sopenharmony_ci if (P256_LIMBS == 8) 471e1051a39Sopenharmony_ci in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]); 472e1051a39Sopenharmony_ci 473e1051a39Sopenharmony_ci /* 474e1051a39Sopenharmony_ci * In affine representation we encode infinity as (0,0), which is 475e1051a39Sopenharmony_ci * not on the curve, so it is OK 476e1051a39Sopenharmony_ci */ 477e1051a39Sopenharmony_ci in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] | 478e1051a39Sopenharmony_ci in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]); 479e1051a39Sopenharmony_ci if (P256_LIMBS == 8) 480e1051a39Sopenharmony_ci in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] | 481e1051a39Sopenharmony_ci in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]); 482e1051a39Sopenharmony_ci 483e1051a39Sopenharmony_ci in1infty = is_zero(in1infty); 484e1051a39Sopenharmony_ci in2infty = is_zero(in2infty); 485e1051a39Sopenharmony_ci 486e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(Z1sqr, in1_z); /* Z1^2 */ 487e1051a39Sopenharmony_ci 488e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(U2, in2_x, Z1sqr); /* U2 = X2*Z1^2 */ 489e1051a39Sopenharmony_ci ecp_nistz256_sub(H, U2, in1_x); /* H = U2 - U1 */ 490e1051a39Sopenharmony_ci 491e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S2, Z1sqr, in1_z); /* S2 = Z1^3 */ 492e1051a39Sopenharmony_ci 493e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res_z, H, in1_z); /* Z3 = H*Z1*Z2 */ 494e1051a39Sopenharmony_ci 495e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S2, S2, in2_y); /* S2 = Y2*Z1^3 */ 496e1051a39Sopenharmony_ci ecp_nistz256_sub(R, S2, in1_y); /* R = S2 - S1 */ 497e1051a39Sopenharmony_ci 498e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(Hsqr, H); /* H^2 */ 499e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(Rsqr, R); /* R^2 */ 500e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(Hcub, Hsqr, H); /* H^3 */ 501e1051a39Sopenharmony_ci 502e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(U2, in1_x, Hsqr); /* U1*H^2 */ 503e1051a39Sopenharmony_ci ecp_nistz256_mul_by_2(Hsqr, U2); /* 2*U1*H^2 */ 504e1051a39Sopenharmony_ci 505e1051a39Sopenharmony_ci ecp_nistz256_sub(res_x, Rsqr, Hsqr); 506e1051a39Sopenharmony_ci ecp_nistz256_sub(res_x, res_x, Hcub); 507e1051a39Sopenharmony_ci ecp_nistz256_sub(H, U2, res_x); 508e1051a39Sopenharmony_ci 509e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(S2, in1_y, Hcub); 510e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(H, H, R); 511e1051a39Sopenharmony_ci ecp_nistz256_sub(res_y, H, S2); 512e1051a39Sopenharmony_ci 513e1051a39Sopenharmony_ci copy_conditional(res_x, in2_x, in1infty); 514e1051a39Sopenharmony_ci copy_conditional(res_x, in1_x, in2infty); 515e1051a39Sopenharmony_ci 516e1051a39Sopenharmony_ci copy_conditional(res_y, in2_y, in1infty); 517e1051a39Sopenharmony_ci copy_conditional(res_y, in1_y, in2infty); 518e1051a39Sopenharmony_ci 519e1051a39Sopenharmony_ci copy_conditional(res_z, ONE, in1infty); 520e1051a39Sopenharmony_ci copy_conditional(res_z, in1_z, in2infty); 521e1051a39Sopenharmony_ci 522e1051a39Sopenharmony_ci memcpy(r->X, res_x, sizeof(res_x)); 523e1051a39Sopenharmony_ci memcpy(r->Y, res_y, sizeof(res_y)); 524e1051a39Sopenharmony_ci memcpy(r->Z, res_z, sizeof(res_z)); 525e1051a39Sopenharmony_ci} 526e1051a39Sopenharmony_ci#endif 527e1051a39Sopenharmony_ci 528e1051a39Sopenharmony_ci/* r = in^-1 mod p */ 529e1051a39Sopenharmony_cistatic void ecp_nistz256_mod_inverse(BN_ULONG r[P256_LIMBS], 530e1051a39Sopenharmony_ci const BN_ULONG in[P256_LIMBS]) 531e1051a39Sopenharmony_ci{ 532e1051a39Sopenharmony_ci /* 533e1051a39Sopenharmony_ci * The poly is ffffffff 00000001 00000000 00000000 00000000 ffffffff 534e1051a39Sopenharmony_ci * ffffffff ffffffff We use FLT and used poly-2 as exponent 535e1051a39Sopenharmony_ci */ 536e1051a39Sopenharmony_ci BN_ULONG p2[P256_LIMBS]; 537e1051a39Sopenharmony_ci BN_ULONG p4[P256_LIMBS]; 538e1051a39Sopenharmony_ci BN_ULONG p8[P256_LIMBS]; 539e1051a39Sopenharmony_ci BN_ULONG p16[P256_LIMBS]; 540e1051a39Sopenharmony_ci BN_ULONG p32[P256_LIMBS]; 541e1051a39Sopenharmony_ci BN_ULONG res[P256_LIMBS]; 542e1051a39Sopenharmony_ci int i; 543e1051a39Sopenharmony_ci 544e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, in); 545e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(p2, res, in); /* 3*p */ 546e1051a39Sopenharmony_ci 547e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, p2); 548e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 549e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(p4, res, p2); /* f*p */ 550e1051a39Sopenharmony_ci 551e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, p4); 552e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 553e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 554e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 555e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(p8, res, p4); /* ff*p */ 556e1051a39Sopenharmony_ci 557e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, p8); 558e1051a39Sopenharmony_ci for (i = 0; i < 7; i++) 559e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 560e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(p16, res, p8); /* ffff*p */ 561e1051a39Sopenharmony_ci 562e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, p16); 563e1051a39Sopenharmony_ci for (i = 0; i < 15; i++) 564e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 565e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(p32, res, p16); /* ffffffff*p */ 566e1051a39Sopenharmony_ci 567e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, p32); 568e1051a39Sopenharmony_ci for (i = 0; i < 31; i++) 569e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 570e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res, res, in); 571e1051a39Sopenharmony_ci 572e1051a39Sopenharmony_ci for (i = 0; i < 32 * 4; i++) 573e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 574e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res, res, p32); 575e1051a39Sopenharmony_ci 576e1051a39Sopenharmony_ci for (i = 0; i < 32; i++) 577e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 578e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res, res, p32); 579e1051a39Sopenharmony_ci 580e1051a39Sopenharmony_ci for (i = 0; i < 16; i++) 581e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 582e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res, res, p16); 583e1051a39Sopenharmony_ci 584e1051a39Sopenharmony_ci for (i = 0; i < 8; i++) 585e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 586e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res, res, p8); 587e1051a39Sopenharmony_ci 588e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 589e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 590e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 591e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 592e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res, res, p4); 593e1051a39Sopenharmony_ci 594e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 595e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 596e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res, res, p2); 597e1051a39Sopenharmony_ci 598e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 599e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(res, res); 600e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(res, res, in); 601e1051a39Sopenharmony_ci 602e1051a39Sopenharmony_ci memcpy(r, res, sizeof(res)); 603e1051a39Sopenharmony_ci} 604e1051a39Sopenharmony_ci 605e1051a39Sopenharmony_ci/* 606e1051a39Sopenharmony_ci * ecp_nistz256_bignum_to_field_elem copies the contents of |in| to |out| and 607e1051a39Sopenharmony_ci * returns one if it fits. Otherwise it returns zero. 608e1051a39Sopenharmony_ci */ 609e1051a39Sopenharmony_ci__owur static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS], 610e1051a39Sopenharmony_ci const BIGNUM *in) 611e1051a39Sopenharmony_ci{ 612e1051a39Sopenharmony_ci return bn_copy_words(out, in, P256_LIMBS); 613e1051a39Sopenharmony_ci} 614e1051a39Sopenharmony_ci 615e1051a39Sopenharmony_ci/* r = sum(scalar[i]*point[i]) */ 616e1051a39Sopenharmony_ci__owur static int ecp_nistz256_windowed_mul(const EC_GROUP *group, 617e1051a39Sopenharmony_ci P256_POINT *r, 618e1051a39Sopenharmony_ci const BIGNUM **scalar, 619e1051a39Sopenharmony_ci const EC_POINT **point, 620e1051a39Sopenharmony_ci size_t num, BN_CTX *ctx) 621e1051a39Sopenharmony_ci{ 622e1051a39Sopenharmony_ci size_t i; 623e1051a39Sopenharmony_ci int j, ret = 0; 624e1051a39Sopenharmony_ci unsigned int idx; 625e1051a39Sopenharmony_ci unsigned char (*p_str)[33] = NULL; 626e1051a39Sopenharmony_ci const unsigned int window_size = 5; 627e1051a39Sopenharmony_ci const unsigned int mask = (1 << (window_size + 1)) - 1; 628e1051a39Sopenharmony_ci unsigned int wvalue; 629e1051a39Sopenharmony_ci P256_POINT *temp; /* place for 5 temporary points */ 630e1051a39Sopenharmony_ci const BIGNUM **scalars = NULL; 631e1051a39Sopenharmony_ci P256_POINT (*table)[16] = NULL; 632e1051a39Sopenharmony_ci void *table_storage = NULL; 633e1051a39Sopenharmony_ci 634e1051a39Sopenharmony_ci if ((num * 16 + 6) > OPENSSL_MALLOC_MAX_NELEMS(P256_POINT) 635e1051a39Sopenharmony_ci || (table_storage = 636e1051a39Sopenharmony_ci OPENSSL_malloc((num * 16 + 5) * sizeof(P256_POINT) + 64)) == NULL 637e1051a39Sopenharmony_ci || (p_str = 638e1051a39Sopenharmony_ci OPENSSL_malloc(num * 33 * sizeof(unsigned char))) == NULL 639e1051a39Sopenharmony_ci || (scalars = OPENSSL_malloc(num * sizeof(BIGNUM *))) == NULL) { 640e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE); 641e1051a39Sopenharmony_ci goto err; 642e1051a39Sopenharmony_ci } 643e1051a39Sopenharmony_ci 644e1051a39Sopenharmony_ci table = (void *)ALIGNPTR(table_storage, 64); 645e1051a39Sopenharmony_ci temp = (P256_POINT *)(table + num); 646e1051a39Sopenharmony_ci 647e1051a39Sopenharmony_ci for (i = 0; i < num; i++) { 648e1051a39Sopenharmony_ci P256_POINT *row = table[i]; 649e1051a39Sopenharmony_ci 650e1051a39Sopenharmony_ci /* This is an unusual input, we don't guarantee constant-timeness. */ 651e1051a39Sopenharmony_ci if ((BN_num_bits(scalar[i]) > 256) || BN_is_negative(scalar[i])) { 652e1051a39Sopenharmony_ci BIGNUM *mod; 653e1051a39Sopenharmony_ci 654e1051a39Sopenharmony_ci if ((mod = BN_CTX_get(ctx)) == NULL) 655e1051a39Sopenharmony_ci goto err; 656e1051a39Sopenharmony_ci if (!BN_nnmod(mod, scalar[i], group->order, ctx)) { 657e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB); 658e1051a39Sopenharmony_ci goto err; 659e1051a39Sopenharmony_ci } 660e1051a39Sopenharmony_ci scalars[i] = mod; 661e1051a39Sopenharmony_ci } else 662e1051a39Sopenharmony_ci scalars[i] = scalar[i]; 663e1051a39Sopenharmony_ci 664e1051a39Sopenharmony_ci for (j = 0; j < bn_get_top(scalars[i]) * BN_BYTES; j += BN_BYTES) { 665e1051a39Sopenharmony_ci BN_ULONG d = bn_get_words(scalars[i])[j / BN_BYTES]; 666e1051a39Sopenharmony_ci 667e1051a39Sopenharmony_ci p_str[i][j + 0] = (unsigned char)d; 668e1051a39Sopenharmony_ci p_str[i][j + 1] = (unsigned char)(d >> 8); 669e1051a39Sopenharmony_ci p_str[i][j + 2] = (unsigned char)(d >> 16); 670e1051a39Sopenharmony_ci p_str[i][j + 3] = (unsigned char)(d >>= 24); 671e1051a39Sopenharmony_ci if (BN_BYTES == 8) { 672e1051a39Sopenharmony_ci d >>= 8; 673e1051a39Sopenharmony_ci p_str[i][j + 4] = (unsigned char)d; 674e1051a39Sopenharmony_ci p_str[i][j + 5] = (unsigned char)(d >> 8); 675e1051a39Sopenharmony_ci p_str[i][j + 6] = (unsigned char)(d >> 16); 676e1051a39Sopenharmony_ci p_str[i][j + 7] = (unsigned char)(d >> 24); 677e1051a39Sopenharmony_ci } 678e1051a39Sopenharmony_ci } 679e1051a39Sopenharmony_ci for (; j < 33; j++) 680e1051a39Sopenharmony_ci p_str[i][j] = 0; 681e1051a39Sopenharmony_ci 682e1051a39Sopenharmony_ci if (!ecp_nistz256_bignum_to_field_elem(temp[0].X, point[i]->X) 683e1051a39Sopenharmony_ci || !ecp_nistz256_bignum_to_field_elem(temp[0].Y, point[i]->Y) 684e1051a39Sopenharmony_ci || !ecp_nistz256_bignum_to_field_elem(temp[0].Z, point[i]->Z)) { 685e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, EC_R_COORDINATES_OUT_OF_RANGE); 686e1051a39Sopenharmony_ci goto err; 687e1051a39Sopenharmony_ci } 688e1051a39Sopenharmony_ci 689e1051a39Sopenharmony_ci /* 690e1051a39Sopenharmony_ci * row[0] is implicitly (0,0,0) (the point at infinity), therefore it 691e1051a39Sopenharmony_ci * is not stored. All other values are actually stored with an offset 692e1051a39Sopenharmony_ci * of -1 in table. 693e1051a39Sopenharmony_ci */ 694e1051a39Sopenharmony_ci 695e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[0], 1); 696e1051a39Sopenharmony_ci ecp_nistz256_point_double(&temp[1], &temp[0]); /*1+1=2 */ 697e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[1], 2); 698e1051a39Sopenharmony_ci ecp_nistz256_point_add (&temp[2], &temp[1], &temp[0]); /*2+1=3 */ 699e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[2], 3); 700e1051a39Sopenharmony_ci ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*2=4 */ 701e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[1], 4); 702e1051a39Sopenharmony_ci ecp_nistz256_point_double(&temp[2], &temp[2]); /*2*3=6 */ 703e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[2], 6); 704e1051a39Sopenharmony_ci ecp_nistz256_point_add (&temp[3], &temp[1], &temp[0]); /*4+1=5 */ 705e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[3], 5); 706e1051a39Sopenharmony_ci ecp_nistz256_point_add (&temp[4], &temp[2], &temp[0]); /*6+1=7 */ 707e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[4], 7); 708e1051a39Sopenharmony_ci ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*4=8 */ 709e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[1], 8); 710e1051a39Sopenharmony_ci ecp_nistz256_point_double(&temp[2], &temp[2]); /*2*6=12 */ 711e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[2], 12); 712e1051a39Sopenharmony_ci ecp_nistz256_point_double(&temp[3], &temp[3]); /*2*5=10 */ 713e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[3], 10); 714e1051a39Sopenharmony_ci ecp_nistz256_point_double(&temp[4], &temp[4]); /*2*7=14 */ 715e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[4], 14); 716e1051a39Sopenharmony_ci ecp_nistz256_point_add (&temp[2], &temp[2], &temp[0]); /*12+1=13*/ 717e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[2], 13); 718e1051a39Sopenharmony_ci ecp_nistz256_point_add (&temp[3], &temp[3], &temp[0]); /*10+1=11*/ 719e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[3], 11); 720e1051a39Sopenharmony_ci ecp_nistz256_point_add (&temp[4], &temp[4], &temp[0]); /*14+1=15*/ 721e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[4], 15); 722e1051a39Sopenharmony_ci ecp_nistz256_point_add (&temp[2], &temp[1], &temp[0]); /*8+1=9 */ 723e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[2], 9); 724e1051a39Sopenharmony_ci ecp_nistz256_point_double(&temp[1], &temp[1]); /*2*8=16 */ 725e1051a39Sopenharmony_ci ecp_nistz256_scatter_w5 (row, &temp[1], 16); 726e1051a39Sopenharmony_ci } 727e1051a39Sopenharmony_ci 728e1051a39Sopenharmony_ci idx = 255; 729e1051a39Sopenharmony_ci 730e1051a39Sopenharmony_ci wvalue = p_str[0][(idx - 1) / 8]; 731e1051a39Sopenharmony_ci wvalue = (wvalue >> ((idx - 1) % 8)) & mask; 732e1051a39Sopenharmony_ci 733e1051a39Sopenharmony_ci /* 734e1051a39Sopenharmony_ci * We gather to temp[0], because we know it's position relative 735e1051a39Sopenharmony_ci * to table 736e1051a39Sopenharmony_ci */ 737e1051a39Sopenharmony_ci ecp_nistz256_gather_w5(&temp[0], table[0], _booth_recode_w5(wvalue) >> 1); 738e1051a39Sopenharmony_ci memcpy(r, &temp[0], sizeof(temp[0])); 739e1051a39Sopenharmony_ci 740e1051a39Sopenharmony_ci while (idx >= 5) { 741e1051a39Sopenharmony_ci for (i = (idx == 255 ? 1 : 0); i < num; i++) { 742e1051a39Sopenharmony_ci unsigned int off = (idx - 1) / 8; 743e1051a39Sopenharmony_ci 744e1051a39Sopenharmony_ci wvalue = p_str[i][off] | p_str[i][off + 1] << 8; 745e1051a39Sopenharmony_ci wvalue = (wvalue >> ((idx - 1) % 8)) & mask; 746e1051a39Sopenharmony_ci 747e1051a39Sopenharmony_ci wvalue = _booth_recode_w5(wvalue); 748e1051a39Sopenharmony_ci 749e1051a39Sopenharmony_ci ecp_nistz256_gather_w5(&temp[0], table[i], wvalue >> 1); 750e1051a39Sopenharmony_ci 751e1051a39Sopenharmony_ci ecp_nistz256_neg(temp[1].Y, temp[0].Y); 752e1051a39Sopenharmony_ci copy_conditional(temp[0].Y, temp[1].Y, (wvalue & 1)); 753e1051a39Sopenharmony_ci 754e1051a39Sopenharmony_ci ecp_nistz256_point_add(r, r, &temp[0]); 755e1051a39Sopenharmony_ci } 756e1051a39Sopenharmony_ci 757e1051a39Sopenharmony_ci idx -= window_size; 758e1051a39Sopenharmony_ci 759e1051a39Sopenharmony_ci ecp_nistz256_point_double(r, r); 760e1051a39Sopenharmony_ci ecp_nistz256_point_double(r, r); 761e1051a39Sopenharmony_ci ecp_nistz256_point_double(r, r); 762e1051a39Sopenharmony_ci ecp_nistz256_point_double(r, r); 763e1051a39Sopenharmony_ci ecp_nistz256_point_double(r, r); 764e1051a39Sopenharmony_ci } 765e1051a39Sopenharmony_ci 766e1051a39Sopenharmony_ci /* Final window */ 767e1051a39Sopenharmony_ci for (i = 0; i < num; i++) { 768e1051a39Sopenharmony_ci wvalue = p_str[i][0]; 769e1051a39Sopenharmony_ci wvalue = (wvalue << 1) & mask; 770e1051a39Sopenharmony_ci 771e1051a39Sopenharmony_ci wvalue = _booth_recode_w5(wvalue); 772e1051a39Sopenharmony_ci 773e1051a39Sopenharmony_ci ecp_nistz256_gather_w5(&temp[0], table[i], wvalue >> 1); 774e1051a39Sopenharmony_ci 775e1051a39Sopenharmony_ci ecp_nistz256_neg(temp[1].Y, temp[0].Y); 776e1051a39Sopenharmony_ci copy_conditional(temp[0].Y, temp[1].Y, wvalue & 1); 777e1051a39Sopenharmony_ci 778e1051a39Sopenharmony_ci ecp_nistz256_point_add(r, r, &temp[0]); 779e1051a39Sopenharmony_ci } 780e1051a39Sopenharmony_ci 781e1051a39Sopenharmony_ci ret = 1; 782e1051a39Sopenharmony_ci err: 783e1051a39Sopenharmony_ci OPENSSL_free(table_storage); 784e1051a39Sopenharmony_ci OPENSSL_free(p_str); 785e1051a39Sopenharmony_ci OPENSSL_free(scalars); 786e1051a39Sopenharmony_ci return ret; 787e1051a39Sopenharmony_ci} 788e1051a39Sopenharmony_ci 789e1051a39Sopenharmony_ci/* Coordinates of G, for which we have precomputed tables */ 790e1051a39Sopenharmony_cistatic const BN_ULONG def_xG[P256_LIMBS] = { 791e1051a39Sopenharmony_ci TOBN(0x79e730d4, 0x18a9143c), TOBN(0x75ba95fc, 0x5fedb601), 792e1051a39Sopenharmony_ci TOBN(0x79fb732b, 0x77622510), TOBN(0x18905f76, 0xa53755c6) 793e1051a39Sopenharmony_ci}; 794e1051a39Sopenharmony_ci 795e1051a39Sopenharmony_cistatic const BN_ULONG def_yG[P256_LIMBS] = { 796e1051a39Sopenharmony_ci TOBN(0xddf25357, 0xce95560a), TOBN(0x8b4ab8e4, 0xba19e45c), 797e1051a39Sopenharmony_ci TOBN(0xd2e88688, 0xdd21f325), TOBN(0x8571ff18, 0x25885d85) 798e1051a39Sopenharmony_ci}; 799e1051a39Sopenharmony_ci 800e1051a39Sopenharmony_ci/* 801e1051a39Sopenharmony_ci * ecp_nistz256_is_affine_G returns one if |generator| is the standard, P-256 802e1051a39Sopenharmony_ci * generator. 803e1051a39Sopenharmony_ci */ 804e1051a39Sopenharmony_cistatic int ecp_nistz256_is_affine_G(const EC_POINT *generator) 805e1051a39Sopenharmony_ci{ 806e1051a39Sopenharmony_ci return (bn_get_top(generator->X) == P256_LIMBS) && 807e1051a39Sopenharmony_ci (bn_get_top(generator->Y) == P256_LIMBS) && 808e1051a39Sopenharmony_ci is_equal(bn_get_words(generator->X), def_xG) && 809e1051a39Sopenharmony_ci is_equal(bn_get_words(generator->Y), def_yG) && 810e1051a39Sopenharmony_ci is_one(generator->Z); 811e1051a39Sopenharmony_ci} 812e1051a39Sopenharmony_ci 813e1051a39Sopenharmony_ci__owur static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) 814e1051a39Sopenharmony_ci{ 815e1051a39Sopenharmony_ci /* 816e1051a39Sopenharmony_ci * We precompute a table for a Booth encoded exponent (wNAF) based 817e1051a39Sopenharmony_ci * computation. Each table holds 64 values for safe access, with an 818e1051a39Sopenharmony_ci * implicit value of infinity at index zero. We use window of size 7, and 819e1051a39Sopenharmony_ci * therefore require ceil(256/7) = 37 tables. 820e1051a39Sopenharmony_ci */ 821e1051a39Sopenharmony_ci const BIGNUM *order; 822e1051a39Sopenharmony_ci EC_POINT *P = NULL, *T = NULL; 823e1051a39Sopenharmony_ci const EC_POINT *generator; 824e1051a39Sopenharmony_ci NISTZ256_PRE_COMP *pre_comp; 825e1051a39Sopenharmony_ci BN_CTX *new_ctx = NULL; 826e1051a39Sopenharmony_ci int i, j, k, ret = 0; 827e1051a39Sopenharmony_ci size_t w; 828e1051a39Sopenharmony_ci 829e1051a39Sopenharmony_ci PRECOMP256_ROW *preComputedTable = NULL; 830e1051a39Sopenharmony_ci unsigned char *precomp_storage = NULL; 831e1051a39Sopenharmony_ci 832e1051a39Sopenharmony_ci /* if there is an old NISTZ256_PRE_COMP object, throw it away */ 833e1051a39Sopenharmony_ci EC_pre_comp_free(group); 834e1051a39Sopenharmony_ci generator = EC_GROUP_get0_generator(group); 835e1051a39Sopenharmony_ci if (generator == NULL) { 836e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, EC_R_UNDEFINED_GENERATOR); 837e1051a39Sopenharmony_ci return 0; 838e1051a39Sopenharmony_ci } 839e1051a39Sopenharmony_ci 840e1051a39Sopenharmony_ci if (ecp_nistz256_is_affine_G(generator)) { 841e1051a39Sopenharmony_ci /* 842e1051a39Sopenharmony_ci * No need to calculate tables for the standard generator because we 843e1051a39Sopenharmony_ci * have them statically. 844e1051a39Sopenharmony_ci */ 845e1051a39Sopenharmony_ci return 1; 846e1051a39Sopenharmony_ci } 847e1051a39Sopenharmony_ci 848e1051a39Sopenharmony_ci if ((pre_comp = ecp_nistz256_pre_comp_new(group)) == NULL) 849e1051a39Sopenharmony_ci return 0; 850e1051a39Sopenharmony_ci 851e1051a39Sopenharmony_ci if (ctx == NULL) { 852e1051a39Sopenharmony_ci ctx = new_ctx = BN_CTX_new_ex(group->libctx); 853e1051a39Sopenharmony_ci if (ctx == NULL) 854e1051a39Sopenharmony_ci goto err; 855e1051a39Sopenharmony_ci } 856e1051a39Sopenharmony_ci 857e1051a39Sopenharmony_ci BN_CTX_start(ctx); 858e1051a39Sopenharmony_ci 859e1051a39Sopenharmony_ci order = EC_GROUP_get0_order(group); 860e1051a39Sopenharmony_ci if (order == NULL) 861e1051a39Sopenharmony_ci goto err; 862e1051a39Sopenharmony_ci 863e1051a39Sopenharmony_ci if (BN_is_zero(order)) { 864e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, EC_R_UNKNOWN_ORDER); 865e1051a39Sopenharmony_ci goto err; 866e1051a39Sopenharmony_ci } 867e1051a39Sopenharmony_ci 868e1051a39Sopenharmony_ci w = 7; 869e1051a39Sopenharmony_ci 870e1051a39Sopenharmony_ci if ((precomp_storage = 871e1051a39Sopenharmony_ci OPENSSL_malloc(37 * 64 * sizeof(P256_POINT_AFFINE) + 64)) == NULL) { 872e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE); 873e1051a39Sopenharmony_ci goto err; 874e1051a39Sopenharmony_ci } 875e1051a39Sopenharmony_ci 876e1051a39Sopenharmony_ci preComputedTable = (void *)ALIGNPTR(precomp_storage, 64); 877e1051a39Sopenharmony_ci 878e1051a39Sopenharmony_ci P = EC_POINT_new(group); 879e1051a39Sopenharmony_ci T = EC_POINT_new(group); 880e1051a39Sopenharmony_ci if (P == NULL || T == NULL) 881e1051a39Sopenharmony_ci goto err; 882e1051a39Sopenharmony_ci 883e1051a39Sopenharmony_ci /* 884e1051a39Sopenharmony_ci * The zero entry is implicitly infinity, and we skip it, storing other 885e1051a39Sopenharmony_ci * values with -1 offset. 886e1051a39Sopenharmony_ci */ 887e1051a39Sopenharmony_ci if (!EC_POINT_copy(T, generator)) 888e1051a39Sopenharmony_ci goto err; 889e1051a39Sopenharmony_ci 890e1051a39Sopenharmony_ci for (k = 0; k < 64; k++) { 891e1051a39Sopenharmony_ci if (!EC_POINT_copy(P, T)) 892e1051a39Sopenharmony_ci goto err; 893e1051a39Sopenharmony_ci for (j = 0; j < 37; j++) { 894e1051a39Sopenharmony_ci P256_POINT_AFFINE temp; 895e1051a39Sopenharmony_ci /* 896e1051a39Sopenharmony_ci * It would be faster to use EC_POINTs_make_affine and 897e1051a39Sopenharmony_ci * make multiple points affine at the same time. 898e1051a39Sopenharmony_ci */ 899e1051a39Sopenharmony_ci if (group->meth->make_affine == NULL 900e1051a39Sopenharmony_ci || !group->meth->make_affine(group, P, ctx)) 901e1051a39Sopenharmony_ci goto err; 902e1051a39Sopenharmony_ci if (!ecp_nistz256_bignum_to_field_elem(temp.X, P->X) || 903e1051a39Sopenharmony_ci !ecp_nistz256_bignum_to_field_elem(temp.Y, P->Y)) { 904e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, EC_R_COORDINATES_OUT_OF_RANGE); 905e1051a39Sopenharmony_ci goto err; 906e1051a39Sopenharmony_ci } 907e1051a39Sopenharmony_ci ecp_nistz256_scatter_w7(preComputedTable[j], &temp, k); 908e1051a39Sopenharmony_ci for (i = 0; i < 7; i++) { 909e1051a39Sopenharmony_ci if (!EC_POINT_dbl(group, P, P, ctx)) 910e1051a39Sopenharmony_ci goto err; 911e1051a39Sopenharmony_ci } 912e1051a39Sopenharmony_ci } 913e1051a39Sopenharmony_ci if (!EC_POINT_add(group, T, T, generator, ctx)) 914e1051a39Sopenharmony_ci goto err; 915e1051a39Sopenharmony_ci } 916e1051a39Sopenharmony_ci 917e1051a39Sopenharmony_ci pre_comp->group = group; 918e1051a39Sopenharmony_ci pre_comp->w = w; 919e1051a39Sopenharmony_ci pre_comp->precomp = preComputedTable; 920e1051a39Sopenharmony_ci pre_comp->precomp_storage = precomp_storage; 921e1051a39Sopenharmony_ci precomp_storage = NULL; 922e1051a39Sopenharmony_ci SETPRECOMP(group, nistz256, pre_comp); 923e1051a39Sopenharmony_ci pre_comp = NULL; 924e1051a39Sopenharmony_ci ret = 1; 925e1051a39Sopenharmony_ci 926e1051a39Sopenharmony_ci err: 927e1051a39Sopenharmony_ci BN_CTX_end(ctx); 928e1051a39Sopenharmony_ci BN_CTX_free(new_ctx); 929e1051a39Sopenharmony_ci 930e1051a39Sopenharmony_ci EC_nistz256_pre_comp_free(pre_comp); 931e1051a39Sopenharmony_ci OPENSSL_free(precomp_storage); 932e1051a39Sopenharmony_ci EC_POINT_free(P); 933e1051a39Sopenharmony_ci EC_POINT_free(T); 934e1051a39Sopenharmony_ci return ret; 935e1051a39Sopenharmony_ci} 936e1051a39Sopenharmony_ci 937e1051a39Sopenharmony_ci__owur static int ecp_nistz256_set_from_affine(EC_POINT *out, const EC_GROUP *group, 938e1051a39Sopenharmony_ci const P256_POINT_AFFINE *in, 939e1051a39Sopenharmony_ci BN_CTX *ctx) 940e1051a39Sopenharmony_ci{ 941e1051a39Sopenharmony_ci int ret = 0; 942e1051a39Sopenharmony_ci 943e1051a39Sopenharmony_ci if ((ret = bn_set_words(out->X, in->X, P256_LIMBS)) 944e1051a39Sopenharmony_ci && (ret = bn_set_words(out->Y, in->Y, P256_LIMBS)) 945e1051a39Sopenharmony_ci && (ret = bn_set_words(out->Z, ONE, P256_LIMBS))) 946e1051a39Sopenharmony_ci out->Z_is_one = 1; 947e1051a39Sopenharmony_ci 948e1051a39Sopenharmony_ci return ret; 949e1051a39Sopenharmony_ci} 950e1051a39Sopenharmony_ci 951e1051a39Sopenharmony_ci/* r = scalar*G + sum(scalars[i]*points[i]) */ 952e1051a39Sopenharmony_ci__owur static int ecp_nistz256_points_mul(const EC_GROUP *group, 953e1051a39Sopenharmony_ci EC_POINT *r, 954e1051a39Sopenharmony_ci const BIGNUM *scalar, 955e1051a39Sopenharmony_ci size_t num, 956e1051a39Sopenharmony_ci const EC_POINT *points[], 957e1051a39Sopenharmony_ci const BIGNUM *scalars[], BN_CTX *ctx) 958e1051a39Sopenharmony_ci{ 959e1051a39Sopenharmony_ci int i = 0, ret = 0, no_precomp_for_generator = 0, p_is_infinity = 0; 960e1051a39Sopenharmony_ci unsigned char p_str[33] = { 0 }; 961e1051a39Sopenharmony_ci const PRECOMP256_ROW *preComputedTable = NULL; 962e1051a39Sopenharmony_ci const NISTZ256_PRE_COMP *pre_comp = NULL; 963e1051a39Sopenharmony_ci const EC_POINT *generator = NULL; 964e1051a39Sopenharmony_ci const BIGNUM **new_scalars = NULL; 965e1051a39Sopenharmony_ci const EC_POINT **new_points = NULL; 966e1051a39Sopenharmony_ci unsigned int idx = 0; 967e1051a39Sopenharmony_ci const unsigned int window_size = 7; 968e1051a39Sopenharmony_ci const unsigned int mask = (1 << (window_size + 1)) - 1; 969e1051a39Sopenharmony_ci unsigned int wvalue; 970e1051a39Sopenharmony_ci ALIGN32 union { 971e1051a39Sopenharmony_ci P256_POINT p; 972e1051a39Sopenharmony_ci P256_POINT_AFFINE a; 973e1051a39Sopenharmony_ci } t, p; 974e1051a39Sopenharmony_ci BIGNUM *tmp_scalar; 975e1051a39Sopenharmony_ci 976e1051a39Sopenharmony_ci if ((num + 1) == 0 || (num + 1) > OPENSSL_MALLOC_MAX_NELEMS(void *)) { 977e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE); 978e1051a39Sopenharmony_ci return 0; 979e1051a39Sopenharmony_ci } 980e1051a39Sopenharmony_ci 981e1051a39Sopenharmony_ci memset(&p, 0, sizeof(p)); 982e1051a39Sopenharmony_ci BN_CTX_start(ctx); 983e1051a39Sopenharmony_ci 984e1051a39Sopenharmony_ci if (scalar) { 985e1051a39Sopenharmony_ci generator = EC_GROUP_get0_generator(group); 986e1051a39Sopenharmony_ci if (generator == NULL) { 987e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, EC_R_UNDEFINED_GENERATOR); 988e1051a39Sopenharmony_ci goto err; 989e1051a39Sopenharmony_ci } 990e1051a39Sopenharmony_ci 991e1051a39Sopenharmony_ci /* look if we can use precomputed multiples of generator */ 992e1051a39Sopenharmony_ci pre_comp = group->pre_comp.nistz256; 993e1051a39Sopenharmony_ci 994e1051a39Sopenharmony_ci if (pre_comp) { 995e1051a39Sopenharmony_ci /* 996e1051a39Sopenharmony_ci * If there is a precomputed table for the generator, check that 997e1051a39Sopenharmony_ci * it was generated with the same generator. 998e1051a39Sopenharmony_ci */ 999e1051a39Sopenharmony_ci EC_POINT *pre_comp_generator = EC_POINT_new(group); 1000e1051a39Sopenharmony_ci if (pre_comp_generator == NULL) 1001e1051a39Sopenharmony_ci goto err; 1002e1051a39Sopenharmony_ci 1003e1051a39Sopenharmony_ci ecp_nistz256_gather_w7(&p.a, pre_comp->precomp[0], 1); 1004e1051a39Sopenharmony_ci if (!ecp_nistz256_set_from_affine(pre_comp_generator, 1005e1051a39Sopenharmony_ci group, &p.a, ctx)) { 1006e1051a39Sopenharmony_ci EC_POINT_free(pre_comp_generator); 1007e1051a39Sopenharmony_ci goto err; 1008e1051a39Sopenharmony_ci } 1009e1051a39Sopenharmony_ci 1010e1051a39Sopenharmony_ci if (0 == EC_POINT_cmp(group, generator, pre_comp_generator, ctx)) 1011e1051a39Sopenharmony_ci preComputedTable = (const PRECOMP256_ROW *)pre_comp->precomp; 1012e1051a39Sopenharmony_ci 1013e1051a39Sopenharmony_ci EC_POINT_free(pre_comp_generator); 1014e1051a39Sopenharmony_ci } 1015e1051a39Sopenharmony_ci 1016e1051a39Sopenharmony_ci if (preComputedTable == NULL && ecp_nistz256_is_affine_G(generator)) { 1017e1051a39Sopenharmony_ci /* 1018e1051a39Sopenharmony_ci * If there is no precomputed data, but the generator is the 1019e1051a39Sopenharmony_ci * default, a hardcoded table of precomputed data is used. This 1020e1051a39Sopenharmony_ci * is because applications, such as Apache, do not use 1021e1051a39Sopenharmony_ci * EC_KEY_precompute_mult. 1022e1051a39Sopenharmony_ci */ 1023e1051a39Sopenharmony_ci preComputedTable = ecp_nistz256_precomputed; 1024e1051a39Sopenharmony_ci } 1025e1051a39Sopenharmony_ci 1026e1051a39Sopenharmony_ci if (preComputedTable) { 1027e1051a39Sopenharmony_ci BN_ULONG infty; 1028e1051a39Sopenharmony_ci 1029e1051a39Sopenharmony_ci if ((BN_num_bits(scalar) > 256) 1030e1051a39Sopenharmony_ci || BN_is_negative(scalar)) { 1031e1051a39Sopenharmony_ci if ((tmp_scalar = BN_CTX_get(ctx)) == NULL) 1032e1051a39Sopenharmony_ci goto err; 1033e1051a39Sopenharmony_ci 1034e1051a39Sopenharmony_ci if (!BN_nnmod(tmp_scalar, scalar, group->order, ctx)) { 1035e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB); 1036e1051a39Sopenharmony_ci goto err; 1037e1051a39Sopenharmony_ci } 1038e1051a39Sopenharmony_ci scalar = tmp_scalar; 1039e1051a39Sopenharmony_ci } 1040e1051a39Sopenharmony_ci 1041e1051a39Sopenharmony_ci for (i = 0; i < bn_get_top(scalar) * BN_BYTES; i += BN_BYTES) { 1042e1051a39Sopenharmony_ci BN_ULONG d = bn_get_words(scalar)[i / BN_BYTES]; 1043e1051a39Sopenharmony_ci 1044e1051a39Sopenharmony_ci p_str[i + 0] = (unsigned char)d; 1045e1051a39Sopenharmony_ci p_str[i + 1] = (unsigned char)(d >> 8); 1046e1051a39Sopenharmony_ci p_str[i + 2] = (unsigned char)(d >> 16); 1047e1051a39Sopenharmony_ci p_str[i + 3] = (unsigned char)(d >>= 24); 1048e1051a39Sopenharmony_ci if (BN_BYTES == 8) { 1049e1051a39Sopenharmony_ci d >>= 8; 1050e1051a39Sopenharmony_ci p_str[i + 4] = (unsigned char)d; 1051e1051a39Sopenharmony_ci p_str[i + 5] = (unsigned char)(d >> 8); 1052e1051a39Sopenharmony_ci p_str[i + 6] = (unsigned char)(d >> 16); 1053e1051a39Sopenharmony_ci p_str[i + 7] = (unsigned char)(d >> 24); 1054e1051a39Sopenharmony_ci } 1055e1051a39Sopenharmony_ci } 1056e1051a39Sopenharmony_ci 1057e1051a39Sopenharmony_ci for (; i < 33; i++) 1058e1051a39Sopenharmony_ci p_str[i] = 0; 1059e1051a39Sopenharmony_ci 1060e1051a39Sopenharmony_ci /* First window */ 1061e1051a39Sopenharmony_ci wvalue = (p_str[0] << 1) & mask; 1062e1051a39Sopenharmony_ci idx += window_size; 1063e1051a39Sopenharmony_ci 1064e1051a39Sopenharmony_ci wvalue = _booth_recode_w7(wvalue); 1065e1051a39Sopenharmony_ci 1066e1051a39Sopenharmony_ci ecp_nistz256_gather_w7(&p.a, preComputedTable[0], 1067e1051a39Sopenharmony_ci wvalue >> 1); 1068e1051a39Sopenharmony_ci 1069e1051a39Sopenharmony_ci ecp_nistz256_neg(p.p.Z, p.p.Y); 1070e1051a39Sopenharmony_ci copy_conditional(p.p.Y, p.p.Z, wvalue & 1); 1071e1051a39Sopenharmony_ci 1072e1051a39Sopenharmony_ci /* 1073e1051a39Sopenharmony_ci * Since affine infinity is encoded as (0,0) and 1074e1051a39Sopenharmony_ci * Jacobian is (,,0), we need to harmonize them 1075e1051a39Sopenharmony_ci * by assigning "one" or zero to Z. 1076e1051a39Sopenharmony_ci */ 1077e1051a39Sopenharmony_ci infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] | 1078e1051a39Sopenharmony_ci p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]); 1079e1051a39Sopenharmony_ci if (P256_LIMBS == 8) 1080e1051a39Sopenharmony_ci infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] | 1081e1051a39Sopenharmony_ci p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]); 1082e1051a39Sopenharmony_ci 1083e1051a39Sopenharmony_ci infty = 0 - is_zero(infty); 1084e1051a39Sopenharmony_ci infty = ~infty; 1085e1051a39Sopenharmony_ci 1086e1051a39Sopenharmony_ci p.p.Z[0] = ONE[0] & infty; 1087e1051a39Sopenharmony_ci p.p.Z[1] = ONE[1] & infty; 1088e1051a39Sopenharmony_ci p.p.Z[2] = ONE[2] & infty; 1089e1051a39Sopenharmony_ci p.p.Z[3] = ONE[3] & infty; 1090e1051a39Sopenharmony_ci if (P256_LIMBS == 8) { 1091e1051a39Sopenharmony_ci p.p.Z[4] = ONE[4] & infty; 1092e1051a39Sopenharmony_ci p.p.Z[5] = ONE[5] & infty; 1093e1051a39Sopenharmony_ci p.p.Z[6] = ONE[6] & infty; 1094e1051a39Sopenharmony_ci p.p.Z[7] = ONE[7] & infty; 1095e1051a39Sopenharmony_ci } 1096e1051a39Sopenharmony_ci 1097e1051a39Sopenharmony_ci for (i = 1; i < 37; i++) { 1098e1051a39Sopenharmony_ci unsigned int off = (idx - 1) / 8; 1099e1051a39Sopenharmony_ci wvalue = p_str[off] | p_str[off + 1] << 8; 1100e1051a39Sopenharmony_ci wvalue = (wvalue >> ((idx - 1) % 8)) & mask; 1101e1051a39Sopenharmony_ci idx += window_size; 1102e1051a39Sopenharmony_ci 1103e1051a39Sopenharmony_ci wvalue = _booth_recode_w7(wvalue); 1104e1051a39Sopenharmony_ci 1105e1051a39Sopenharmony_ci ecp_nistz256_gather_w7(&t.a, 1106e1051a39Sopenharmony_ci preComputedTable[i], wvalue >> 1); 1107e1051a39Sopenharmony_ci 1108e1051a39Sopenharmony_ci ecp_nistz256_neg(t.p.Z, t.a.Y); 1109e1051a39Sopenharmony_ci copy_conditional(t.a.Y, t.p.Z, wvalue & 1); 1110e1051a39Sopenharmony_ci 1111e1051a39Sopenharmony_ci ecp_nistz256_point_add_affine(&p.p, &p.p, &t.a); 1112e1051a39Sopenharmony_ci } 1113e1051a39Sopenharmony_ci } else { 1114e1051a39Sopenharmony_ci p_is_infinity = 1; 1115e1051a39Sopenharmony_ci no_precomp_for_generator = 1; 1116e1051a39Sopenharmony_ci } 1117e1051a39Sopenharmony_ci } else 1118e1051a39Sopenharmony_ci p_is_infinity = 1; 1119e1051a39Sopenharmony_ci 1120e1051a39Sopenharmony_ci if (no_precomp_for_generator) { 1121e1051a39Sopenharmony_ci /* 1122e1051a39Sopenharmony_ci * Without a precomputed table for the generator, it has to be 1123e1051a39Sopenharmony_ci * handled like a normal point. 1124e1051a39Sopenharmony_ci */ 1125e1051a39Sopenharmony_ci new_scalars = OPENSSL_malloc((num + 1) * sizeof(BIGNUM *)); 1126e1051a39Sopenharmony_ci if (new_scalars == NULL) { 1127e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE); 1128e1051a39Sopenharmony_ci goto err; 1129e1051a39Sopenharmony_ci } 1130e1051a39Sopenharmony_ci 1131e1051a39Sopenharmony_ci new_points = OPENSSL_malloc((num + 1) * sizeof(EC_POINT *)); 1132e1051a39Sopenharmony_ci if (new_points == NULL) { 1133e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE); 1134e1051a39Sopenharmony_ci goto err; 1135e1051a39Sopenharmony_ci } 1136e1051a39Sopenharmony_ci 1137e1051a39Sopenharmony_ci memcpy(new_scalars, scalars, num * sizeof(BIGNUM *)); 1138e1051a39Sopenharmony_ci new_scalars[num] = scalar; 1139e1051a39Sopenharmony_ci memcpy(new_points, points, num * sizeof(EC_POINT *)); 1140e1051a39Sopenharmony_ci new_points[num] = generator; 1141e1051a39Sopenharmony_ci 1142e1051a39Sopenharmony_ci scalars = new_scalars; 1143e1051a39Sopenharmony_ci points = new_points; 1144e1051a39Sopenharmony_ci num++; 1145e1051a39Sopenharmony_ci } 1146e1051a39Sopenharmony_ci 1147e1051a39Sopenharmony_ci if (num) { 1148e1051a39Sopenharmony_ci P256_POINT *out = &t.p; 1149e1051a39Sopenharmony_ci if (p_is_infinity) 1150e1051a39Sopenharmony_ci out = &p.p; 1151e1051a39Sopenharmony_ci 1152e1051a39Sopenharmony_ci if (!ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx)) 1153e1051a39Sopenharmony_ci goto err; 1154e1051a39Sopenharmony_ci 1155e1051a39Sopenharmony_ci if (!p_is_infinity) 1156e1051a39Sopenharmony_ci ecp_nistz256_point_add(&p.p, &p.p, out); 1157e1051a39Sopenharmony_ci } 1158e1051a39Sopenharmony_ci 1159e1051a39Sopenharmony_ci /* Not constant-time, but we're only operating on the public output. */ 1160e1051a39Sopenharmony_ci if (!bn_set_words(r->X, p.p.X, P256_LIMBS) || 1161e1051a39Sopenharmony_ci !bn_set_words(r->Y, p.p.Y, P256_LIMBS) || 1162e1051a39Sopenharmony_ci !bn_set_words(r->Z, p.p.Z, P256_LIMBS)) { 1163e1051a39Sopenharmony_ci goto err; 1164e1051a39Sopenharmony_ci } 1165e1051a39Sopenharmony_ci r->Z_is_one = is_one(r->Z) & 1; 1166e1051a39Sopenharmony_ci 1167e1051a39Sopenharmony_ci ret = 1; 1168e1051a39Sopenharmony_ci 1169e1051a39Sopenharmony_cierr: 1170e1051a39Sopenharmony_ci BN_CTX_end(ctx); 1171e1051a39Sopenharmony_ci OPENSSL_free(new_points); 1172e1051a39Sopenharmony_ci OPENSSL_free(new_scalars); 1173e1051a39Sopenharmony_ci return ret; 1174e1051a39Sopenharmony_ci} 1175e1051a39Sopenharmony_ci 1176e1051a39Sopenharmony_ci__owur static int ecp_nistz256_get_affine(const EC_GROUP *group, 1177e1051a39Sopenharmony_ci const EC_POINT *point, 1178e1051a39Sopenharmony_ci BIGNUM *x, BIGNUM *y, BN_CTX *ctx) 1179e1051a39Sopenharmony_ci{ 1180e1051a39Sopenharmony_ci BN_ULONG z_inv2[P256_LIMBS]; 1181e1051a39Sopenharmony_ci BN_ULONG z_inv3[P256_LIMBS]; 1182e1051a39Sopenharmony_ci BN_ULONG x_aff[P256_LIMBS]; 1183e1051a39Sopenharmony_ci BN_ULONG y_aff[P256_LIMBS]; 1184e1051a39Sopenharmony_ci BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS]; 1185e1051a39Sopenharmony_ci BN_ULONG x_ret[P256_LIMBS], y_ret[P256_LIMBS]; 1186e1051a39Sopenharmony_ci 1187e1051a39Sopenharmony_ci if (EC_POINT_is_at_infinity(group, point)) { 1188e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, EC_R_POINT_AT_INFINITY); 1189e1051a39Sopenharmony_ci return 0; 1190e1051a39Sopenharmony_ci } 1191e1051a39Sopenharmony_ci 1192e1051a39Sopenharmony_ci if (!ecp_nistz256_bignum_to_field_elem(point_x, point->X) || 1193e1051a39Sopenharmony_ci !ecp_nistz256_bignum_to_field_elem(point_y, point->Y) || 1194e1051a39Sopenharmony_ci !ecp_nistz256_bignum_to_field_elem(point_z, point->Z)) { 1195e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, EC_R_COORDINATES_OUT_OF_RANGE); 1196e1051a39Sopenharmony_ci return 0; 1197e1051a39Sopenharmony_ci } 1198e1051a39Sopenharmony_ci 1199e1051a39Sopenharmony_ci ecp_nistz256_mod_inverse(z_inv3, point_z); 1200e1051a39Sopenharmony_ci ecp_nistz256_sqr_mont(z_inv2, z_inv3); 1201e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(x_aff, z_inv2, point_x); 1202e1051a39Sopenharmony_ci 1203e1051a39Sopenharmony_ci if (x != NULL) { 1204e1051a39Sopenharmony_ci ecp_nistz256_from_mont(x_ret, x_aff); 1205e1051a39Sopenharmony_ci if (!bn_set_words(x, x_ret, P256_LIMBS)) 1206e1051a39Sopenharmony_ci return 0; 1207e1051a39Sopenharmony_ci } 1208e1051a39Sopenharmony_ci 1209e1051a39Sopenharmony_ci if (y != NULL) { 1210e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2); 1211e1051a39Sopenharmony_ci ecp_nistz256_mul_mont(y_aff, z_inv3, point_y); 1212e1051a39Sopenharmony_ci ecp_nistz256_from_mont(y_ret, y_aff); 1213e1051a39Sopenharmony_ci if (!bn_set_words(y, y_ret, P256_LIMBS)) 1214e1051a39Sopenharmony_ci return 0; 1215e1051a39Sopenharmony_ci } 1216e1051a39Sopenharmony_ci 1217e1051a39Sopenharmony_ci return 1; 1218e1051a39Sopenharmony_ci} 1219e1051a39Sopenharmony_ci 1220e1051a39Sopenharmony_cistatic NISTZ256_PRE_COMP *ecp_nistz256_pre_comp_new(const EC_GROUP *group) 1221e1051a39Sopenharmony_ci{ 1222e1051a39Sopenharmony_ci NISTZ256_PRE_COMP *ret = NULL; 1223e1051a39Sopenharmony_ci 1224e1051a39Sopenharmony_ci if (!group) 1225e1051a39Sopenharmony_ci return NULL; 1226e1051a39Sopenharmony_ci 1227e1051a39Sopenharmony_ci ret = OPENSSL_zalloc(sizeof(*ret)); 1228e1051a39Sopenharmony_ci 1229e1051a39Sopenharmony_ci if (ret == NULL) { 1230e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE); 1231e1051a39Sopenharmony_ci return ret; 1232e1051a39Sopenharmony_ci } 1233e1051a39Sopenharmony_ci 1234e1051a39Sopenharmony_ci ret->group = group; 1235e1051a39Sopenharmony_ci ret->w = 6; /* default */ 1236e1051a39Sopenharmony_ci ret->references = 1; 1237e1051a39Sopenharmony_ci 1238e1051a39Sopenharmony_ci ret->lock = CRYPTO_THREAD_lock_new(); 1239e1051a39Sopenharmony_ci if (ret->lock == NULL) { 1240e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_MALLOC_FAILURE); 1241e1051a39Sopenharmony_ci OPENSSL_free(ret); 1242e1051a39Sopenharmony_ci return NULL; 1243e1051a39Sopenharmony_ci } 1244e1051a39Sopenharmony_ci return ret; 1245e1051a39Sopenharmony_ci} 1246e1051a39Sopenharmony_ci 1247e1051a39Sopenharmony_ciNISTZ256_PRE_COMP *EC_nistz256_pre_comp_dup(NISTZ256_PRE_COMP *p) 1248e1051a39Sopenharmony_ci{ 1249e1051a39Sopenharmony_ci int i; 1250e1051a39Sopenharmony_ci if (p != NULL) 1251e1051a39Sopenharmony_ci CRYPTO_UP_REF(&p->references, &i, p->lock); 1252e1051a39Sopenharmony_ci return p; 1253e1051a39Sopenharmony_ci} 1254e1051a39Sopenharmony_ci 1255e1051a39Sopenharmony_civoid EC_nistz256_pre_comp_free(NISTZ256_PRE_COMP *pre) 1256e1051a39Sopenharmony_ci{ 1257e1051a39Sopenharmony_ci int i; 1258e1051a39Sopenharmony_ci 1259e1051a39Sopenharmony_ci if (pre == NULL) 1260e1051a39Sopenharmony_ci return; 1261e1051a39Sopenharmony_ci 1262e1051a39Sopenharmony_ci CRYPTO_DOWN_REF(&pre->references, &i, pre->lock); 1263e1051a39Sopenharmony_ci REF_PRINT_COUNT("EC_nistz256", pre); 1264e1051a39Sopenharmony_ci if (i > 0) 1265e1051a39Sopenharmony_ci return; 1266e1051a39Sopenharmony_ci REF_ASSERT_ISNT(i < 0); 1267e1051a39Sopenharmony_ci 1268e1051a39Sopenharmony_ci OPENSSL_free(pre->precomp_storage); 1269e1051a39Sopenharmony_ci CRYPTO_THREAD_lock_free(pre->lock); 1270e1051a39Sopenharmony_ci OPENSSL_free(pre); 1271e1051a39Sopenharmony_ci} 1272e1051a39Sopenharmony_ci 1273e1051a39Sopenharmony_ci 1274e1051a39Sopenharmony_cistatic int ecp_nistz256_window_have_precompute_mult(const EC_GROUP *group) 1275e1051a39Sopenharmony_ci{ 1276e1051a39Sopenharmony_ci /* There is a hard-coded table for the default generator. */ 1277e1051a39Sopenharmony_ci const EC_POINT *generator = EC_GROUP_get0_generator(group); 1278e1051a39Sopenharmony_ci 1279e1051a39Sopenharmony_ci if (generator != NULL && ecp_nistz256_is_affine_G(generator)) { 1280e1051a39Sopenharmony_ci /* There is a hard-coded table for the default generator. */ 1281e1051a39Sopenharmony_ci return 1; 1282e1051a39Sopenharmony_ci } 1283e1051a39Sopenharmony_ci 1284e1051a39Sopenharmony_ci return HAVEPRECOMP(group, nistz256); 1285e1051a39Sopenharmony_ci} 1286e1051a39Sopenharmony_ci 1287e1051a39Sopenharmony_ci#if defined(__x86_64) || defined(__x86_64__) || \ 1288e1051a39Sopenharmony_ci defined(_M_AMD64) || defined(_M_X64) || \ 1289e1051a39Sopenharmony_ci defined(__powerpc64__) || defined(_ARCH_PP64) || \ 1290e1051a39Sopenharmony_ci defined(__aarch64__) 1291e1051a39Sopenharmony_ci/* 1292e1051a39Sopenharmony_ci * Montgomery mul modulo Order(P): res = a*b*2^-256 mod Order(P) 1293e1051a39Sopenharmony_ci */ 1294e1051a39Sopenharmony_civoid ecp_nistz256_ord_mul_mont(BN_ULONG res[P256_LIMBS], 1295e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS], 1296e1051a39Sopenharmony_ci const BN_ULONG b[P256_LIMBS]); 1297e1051a39Sopenharmony_civoid ecp_nistz256_ord_sqr_mont(BN_ULONG res[P256_LIMBS], 1298e1051a39Sopenharmony_ci const BN_ULONG a[P256_LIMBS], 1299e1051a39Sopenharmony_ci BN_ULONG rep); 1300e1051a39Sopenharmony_ci 1301e1051a39Sopenharmony_cistatic int ecp_nistz256_inv_mod_ord(const EC_GROUP *group, BIGNUM *r, 1302e1051a39Sopenharmony_ci const BIGNUM *x, BN_CTX *ctx) 1303e1051a39Sopenharmony_ci{ 1304e1051a39Sopenharmony_ci /* RR = 2^512 mod ord(p256) */ 1305e1051a39Sopenharmony_ci static const BN_ULONG RR[P256_LIMBS] = { 1306e1051a39Sopenharmony_ci TOBN(0x83244c95,0xbe79eea2), TOBN(0x4699799c,0x49bd6fa6), 1307e1051a39Sopenharmony_ci TOBN(0x2845b239,0x2b6bec59), TOBN(0x66e12d94,0xf3d95620) 1308e1051a39Sopenharmony_ci }; 1309e1051a39Sopenharmony_ci /* The constant 1 (unlike ONE that is one in Montgomery representation) */ 1310e1051a39Sopenharmony_ci static const BN_ULONG one[P256_LIMBS] = { 1311e1051a39Sopenharmony_ci TOBN(0,1), TOBN(0,0), TOBN(0,0), TOBN(0,0) 1312e1051a39Sopenharmony_ci }; 1313e1051a39Sopenharmony_ci /* 1314e1051a39Sopenharmony_ci * We don't use entry 0 in the table, so we omit it and address 1315e1051a39Sopenharmony_ci * with -1 offset. 1316e1051a39Sopenharmony_ci */ 1317e1051a39Sopenharmony_ci BN_ULONG table[15][P256_LIMBS]; 1318e1051a39Sopenharmony_ci BN_ULONG out[P256_LIMBS], t[P256_LIMBS]; 1319e1051a39Sopenharmony_ci int i, ret = 0; 1320e1051a39Sopenharmony_ci enum { 1321e1051a39Sopenharmony_ci i_1 = 0, i_10, i_11, i_101, i_111, i_1010, i_1111, 1322e1051a39Sopenharmony_ci i_10101, i_101010, i_101111, i_x6, i_x8, i_x16, i_x32 1323e1051a39Sopenharmony_ci }; 1324e1051a39Sopenharmony_ci 1325e1051a39Sopenharmony_ci /* 1326e1051a39Sopenharmony_ci * Catch allocation failure early. 1327e1051a39Sopenharmony_ci */ 1328e1051a39Sopenharmony_ci if (bn_wexpand(r, P256_LIMBS) == NULL) { 1329e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB); 1330e1051a39Sopenharmony_ci goto err; 1331e1051a39Sopenharmony_ci } 1332e1051a39Sopenharmony_ci 1333e1051a39Sopenharmony_ci if ((BN_num_bits(x) > 256) || BN_is_negative(x)) { 1334e1051a39Sopenharmony_ci BIGNUM *tmp; 1335e1051a39Sopenharmony_ci 1336e1051a39Sopenharmony_ci if ((tmp = BN_CTX_get(ctx)) == NULL 1337e1051a39Sopenharmony_ci || !BN_nnmod(tmp, x, group->order, ctx)) { 1338e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, ERR_R_BN_LIB); 1339e1051a39Sopenharmony_ci goto err; 1340e1051a39Sopenharmony_ci } 1341e1051a39Sopenharmony_ci x = tmp; 1342e1051a39Sopenharmony_ci } 1343e1051a39Sopenharmony_ci 1344e1051a39Sopenharmony_ci if (!ecp_nistz256_bignum_to_field_elem(t, x)) { 1345e1051a39Sopenharmony_ci ERR_raise(ERR_LIB_EC, EC_R_COORDINATES_OUT_OF_RANGE); 1346e1051a39Sopenharmony_ci goto err; 1347e1051a39Sopenharmony_ci } 1348e1051a39Sopenharmony_ci 1349e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[0], t, RR); 1350e1051a39Sopenharmony_ci#if 0 1351e1051a39Sopenharmony_ci /* 1352e1051a39Sopenharmony_ci * Original sparse-then-fixed-window algorithm, retained for reference. 1353e1051a39Sopenharmony_ci */ 1354e1051a39Sopenharmony_ci for (i = 2; i < 16; i += 2) { 1355e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(table[i-1], table[i/2-1], 1); 1356e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i], table[i-1], table[0]); 1357e1051a39Sopenharmony_ci } 1358e1051a39Sopenharmony_ci 1359e1051a39Sopenharmony_ci /* 1360e1051a39Sopenharmony_ci * The top 128bit of the exponent are highly redudndant, so we 1361e1051a39Sopenharmony_ci * perform an optimized flow 1362e1051a39Sopenharmony_ci */ 1363e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(t, table[15-1], 4); /* f0 */ 1364e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(t, t, table[15-1]); /* ff */ 1365e1051a39Sopenharmony_ci 1366e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(out, t, 8); /* ff00 */ 1367e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(out, out, t); /* ffff */ 1368e1051a39Sopenharmony_ci 1369e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(t, out, 16); /* ffff0000 */ 1370e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(t, t, out); /* ffffffff */ 1371e1051a39Sopenharmony_ci 1372e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(out, t, 64); /* ffffffff0000000000000000 */ 1373e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(out, out, t); /* ffffffff00000000ffffffff */ 1374e1051a39Sopenharmony_ci 1375e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(out, out, 32); /* ffffffff00000000ffffffff00000000 */ 1376e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(out, out, t); /* ffffffff00000000ffffffffffffffff */ 1377e1051a39Sopenharmony_ci 1378e1051a39Sopenharmony_ci /* 1379e1051a39Sopenharmony_ci * The bottom 128 bit of the exponent are processed with fixed 4-bit window 1380e1051a39Sopenharmony_ci */ 1381e1051a39Sopenharmony_ci for(i = 0; i < 32; i++) { 1382e1051a39Sopenharmony_ci /* expLo - the low 128 bits of the exponent we use (ord(p256) - 2), 1383e1051a39Sopenharmony_ci * split into nibbles */ 1384e1051a39Sopenharmony_ci static const unsigned char expLo[32] = { 1385e1051a39Sopenharmony_ci 0xb,0xc,0xe,0x6,0xf,0xa,0xa,0xd,0xa,0x7,0x1,0x7,0x9,0xe,0x8,0x4, 1386e1051a39Sopenharmony_ci 0xf,0x3,0xb,0x9,0xc,0xa,0xc,0x2,0xf,0xc,0x6,0x3,0x2,0x5,0x4,0xf 1387e1051a39Sopenharmony_ci }; 1388e1051a39Sopenharmony_ci 1389e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(out, out, 4); 1390e1051a39Sopenharmony_ci /* The exponent is public, no need in constant-time access */ 1391e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(out, out, table[expLo[i]-1]); 1392e1051a39Sopenharmony_ci } 1393e1051a39Sopenharmony_ci#else 1394e1051a39Sopenharmony_ci /* 1395e1051a39Sopenharmony_ci * https://briansmith.org/ecc-inversion-addition-chains-01#p256_scalar_inversion 1396e1051a39Sopenharmony_ci * 1397e1051a39Sopenharmony_ci * Even though this code path spares 12 squarings, 4.5%, and 13 1398e1051a39Sopenharmony_ci * multiplications, 25%, on grand scale sign operation is not that 1399e1051a39Sopenharmony_ci * much faster, not more that 2%... 1400e1051a39Sopenharmony_ci */ 1401e1051a39Sopenharmony_ci 1402e1051a39Sopenharmony_ci /* pre-calculate powers */ 1403e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(table[i_10], table[i_1], 1); 1404e1051a39Sopenharmony_ci 1405e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_11], table[i_1], table[i_10]); 1406e1051a39Sopenharmony_ci 1407e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_101], table[i_11], table[i_10]); 1408e1051a39Sopenharmony_ci 1409e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_111], table[i_101], table[i_10]); 1410e1051a39Sopenharmony_ci 1411e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(table[i_1010], table[i_101], 1); 1412e1051a39Sopenharmony_ci 1413e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_1111], table[i_1010], table[i_101]); 1414e1051a39Sopenharmony_ci 1415e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(table[i_10101], table[i_1010], 1); 1416e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_10101], table[i_10101], table[i_1]); 1417e1051a39Sopenharmony_ci 1418e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(table[i_101010], table[i_10101], 1); 1419e1051a39Sopenharmony_ci 1420e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_101111], table[i_101010], table[i_101]); 1421e1051a39Sopenharmony_ci 1422e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_x6], table[i_101010], table[i_10101]); 1423e1051a39Sopenharmony_ci 1424e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(table[i_x8], table[i_x6], 2); 1425e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_x8], table[i_x8], table[i_11]); 1426e1051a39Sopenharmony_ci 1427e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(table[i_x16], table[i_x8], 8); 1428e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_x16], table[i_x16], table[i_x8]); 1429e1051a39Sopenharmony_ci 1430e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(table[i_x32], table[i_x16], 16); 1431e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(table[i_x32], table[i_x32], table[i_x16]); 1432e1051a39Sopenharmony_ci 1433e1051a39Sopenharmony_ci /* calculations */ 1434e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(out, table[i_x32], 64); 1435e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(out, out, table[i_x32]); 1436e1051a39Sopenharmony_ci 1437e1051a39Sopenharmony_ci for (i = 0; i < 27; i++) { 1438e1051a39Sopenharmony_ci static const struct { unsigned char p, i; } chain[27] = { 1439e1051a39Sopenharmony_ci { 32, i_x32 }, { 6, i_101111 }, { 5, i_111 }, 1440e1051a39Sopenharmony_ci { 4, i_11 }, { 5, i_1111 }, { 5, i_10101 }, 1441e1051a39Sopenharmony_ci { 4, i_101 }, { 3, i_101 }, { 3, i_101 }, 1442e1051a39Sopenharmony_ci { 5, i_111 }, { 9, i_101111 }, { 6, i_1111 }, 1443e1051a39Sopenharmony_ci { 2, i_1 }, { 5, i_1 }, { 6, i_1111 }, 1444e1051a39Sopenharmony_ci { 5, i_111 }, { 4, i_111 }, { 5, i_111 }, 1445e1051a39Sopenharmony_ci { 5, i_101 }, { 3, i_11 }, { 10, i_101111 }, 1446e1051a39Sopenharmony_ci { 2, i_11 }, { 5, i_11 }, { 5, i_11 }, 1447e1051a39Sopenharmony_ci { 3, i_1 }, { 7, i_10101 }, { 6, i_1111 } 1448e1051a39Sopenharmony_ci }; 1449e1051a39Sopenharmony_ci 1450e1051a39Sopenharmony_ci ecp_nistz256_ord_sqr_mont(out, out, chain[i].p); 1451e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(out, out, table[chain[i].i]); 1452e1051a39Sopenharmony_ci } 1453e1051a39Sopenharmony_ci#endif 1454e1051a39Sopenharmony_ci ecp_nistz256_ord_mul_mont(out, out, one); 1455e1051a39Sopenharmony_ci 1456e1051a39Sopenharmony_ci /* 1457e1051a39Sopenharmony_ci * Can't fail, but check return code to be consistent anyway. 1458e1051a39Sopenharmony_ci */ 1459e1051a39Sopenharmony_ci if (!bn_set_words(r, out, P256_LIMBS)) 1460e1051a39Sopenharmony_ci goto err; 1461e1051a39Sopenharmony_ci 1462e1051a39Sopenharmony_ci ret = 1; 1463e1051a39Sopenharmony_cierr: 1464e1051a39Sopenharmony_ci return ret; 1465e1051a39Sopenharmony_ci} 1466e1051a39Sopenharmony_ci#else 1467e1051a39Sopenharmony_ci# define ecp_nistz256_inv_mod_ord NULL 1468e1051a39Sopenharmony_ci#endif 1469e1051a39Sopenharmony_ci 1470e1051a39Sopenharmony_ciconst EC_METHOD *EC_GFp_nistz256_method(void) 1471e1051a39Sopenharmony_ci{ 1472e1051a39Sopenharmony_ci static const EC_METHOD ret = { 1473e1051a39Sopenharmony_ci EC_FLAGS_DEFAULT_OCT, 1474e1051a39Sopenharmony_ci NID_X9_62_prime_field, 1475e1051a39Sopenharmony_ci ossl_ec_GFp_mont_group_init, 1476e1051a39Sopenharmony_ci ossl_ec_GFp_mont_group_finish, 1477e1051a39Sopenharmony_ci ossl_ec_GFp_mont_group_clear_finish, 1478e1051a39Sopenharmony_ci ossl_ec_GFp_mont_group_copy, 1479e1051a39Sopenharmony_ci ossl_ec_GFp_mont_group_set_curve, 1480e1051a39Sopenharmony_ci ossl_ec_GFp_simple_group_get_curve, 1481e1051a39Sopenharmony_ci ossl_ec_GFp_simple_group_get_degree, 1482e1051a39Sopenharmony_ci ossl_ec_group_simple_order_bits, 1483e1051a39Sopenharmony_ci ossl_ec_GFp_simple_group_check_discriminant, 1484e1051a39Sopenharmony_ci ossl_ec_GFp_simple_point_init, 1485e1051a39Sopenharmony_ci ossl_ec_GFp_simple_point_finish, 1486e1051a39Sopenharmony_ci ossl_ec_GFp_simple_point_clear_finish, 1487e1051a39Sopenharmony_ci ossl_ec_GFp_simple_point_copy, 1488e1051a39Sopenharmony_ci ossl_ec_GFp_simple_point_set_to_infinity, 1489e1051a39Sopenharmony_ci ossl_ec_GFp_simple_point_set_affine_coordinates, 1490e1051a39Sopenharmony_ci ecp_nistz256_get_affine, 1491e1051a39Sopenharmony_ci 0, 0, 0, 1492e1051a39Sopenharmony_ci ossl_ec_GFp_simple_add, 1493e1051a39Sopenharmony_ci ossl_ec_GFp_simple_dbl, 1494e1051a39Sopenharmony_ci ossl_ec_GFp_simple_invert, 1495e1051a39Sopenharmony_ci ossl_ec_GFp_simple_is_at_infinity, 1496e1051a39Sopenharmony_ci ossl_ec_GFp_simple_is_on_curve, 1497e1051a39Sopenharmony_ci ossl_ec_GFp_simple_cmp, 1498e1051a39Sopenharmony_ci ossl_ec_GFp_simple_make_affine, 1499e1051a39Sopenharmony_ci ossl_ec_GFp_simple_points_make_affine, 1500e1051a39Sopenharmony_ci ecp_nistz256_points_mul, /* mul */ 1501e1051a39Sopenharmony_ci ecp_nistz256_mult_precompute, /* precompute_mult */ 1502e1051a39Sopenharmony_ci ecp_nistz256_window_have_precompute_mult, /* have_precompute_mult */ 1503e1051a39Sopenharmony_ci ossl_ec_GFp_mont_field_mul, 1504e1051a39Sopenharmony_ci ossl_ec_GFp_mont_field_sqr, 1505e1051a39Sopenharmony_ci 0, /* field_div */ 1506e1051a39Sopenharmony_ci ossl_ec_GFp_mont_field_inv, 1507e1051a39Sopenharmony_ci ossl_ec_GFp_mont_field_encode, 1508e1051a39Sopenharmony_ci ossl_ec_GFp_mont_field_decode, 1509e1051a39Sopenharmony_ci ossl_ec_GFp_mont_field_set_to_one, 1510e1051a39Sopenharmony_ci ossl_ec_key_simple_priv2oct, 1511e1051a39Sopenharmony_ci ossl_ec_key_simple_oct2priv, 1512e1051a39Sopenharmony_ci 0, /* set private */ 1513e1051a39Sopenharmony_ci ossl_ec_key_simple_generate_key, 1514e1051a39Sopenharmony_ci ossl_ec_key_simple_check_key, 1515e1051a39Sopenharmony_ci ossl_ec_key_simple_generate_public_key, 1516e1051a39Sopenharmony_ci 0, /* keycopy */ 1517e1051a39Sopenharmony_ci 0, /* keyfinish */ 1518e1051a39Sopenharmony_ci ossl_ecdh_simple_compute_key, 1519e1051a39Sopenharmony_ci ossl_ecdsa_simple_sign_setup, 1520e1051a39Sopenharmony_ci ossl_ecdsa_simple_sign_sig, 1521e1051a39Sopenharmony_ci ossl_ecdsa_simple_verify_sig, 1522e1051a39Sopenharmony_ci ecp_nistz256_inv_mod_ord, /* can be #define-d NULL */ 1523e1051a39Sopenharmony_ci 0, /* blind_coordinates */ 1524e1051a39Sopenharmony_ci 0, /* ladder_pre */ 1525e1051a39Sopenharmony_ci 0, /* ladder_step */ 1526e1051a39Sopenharmony_ci 0 /* ladder_post */ 1527e1051a39Sopenharmony_ci }; 1528e1051a39Sopenharmony_ci 1529e1051a39Sopenharmony_ci return &ret; 1530e1051a39Sopenharmony_ci} 1531