1e1051a39Sopenharmony_ci/* 2e1051a39Sopenharmony_ci * Copyright 2020-2023 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci * Copyright (c) 2020, Intel Corporation. All Rights Reserved. 4e1051a39Sopenharmony_ci * 5e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 6e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 7e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 8e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 9e1051a39Sopenharmony_ci * 10e1051a39Sopenharmony_ci * 11e1051a39Sopenharmony_ci * Originally written by Ilya Albrekht, Sergey Kirillov and Andrey Matyukov 12e1051a39Sopenharmony_ci * Intel Corporation 13e1051a39Sopenharmony_ci * 14e1051a39Sopenharmony_ci */ 15e1051a39Sopenharmony_ci 16e1051a39Sopenharmony_ci#include <openssl/opensslconf.h> 17e1051a39Sopenharmony_ci#include <openssl/crypto.h> 18e1051a39Sopenharmony_ci#include "rsaz_exp.h" 19e1051a39Sopenharmony_ci 20e1051a39Sopenharmony_ci#ifndef RSAZ_ENABLED 21e1051a39Sopenharmony_ciNON_EMPTY_TRANSLATION_UNIT 22e1051a39Sopenharmony_ci#else 23e1051a39Sopenharmony_ci# include <assert.h> 24e1051a39Sopenharmony_ci# include <string.h> 25e1051a39Sopenharmony_ci 26e1051a39Sopenharmony_ci# if defined(__GNUC__) 27e1051a39Sopenharmony_ci# define ALIGN64 __attribute__((aligned(64))) 28e1051a39Sopenharmony_ci# elif defined(_MSC_VER) 29e1051a39Sopenharmony_ci# define ALIGN64 __declspec(align(64)) 30e1051a39Sopenharmony_ci# else 31e1051a39Sopenharmony_ci# define ALIGN64 32e1051a39Sopenharmony_ci# endif 33e1051a39Sopenharmony_ci 34e1051a39Sopenharmony_ci# define ALIGN_OF(ptr, boundary) \ 35e1051a39Sopenharmony_ci ((unsigned char *)(ptr) + (boundary - (((size_t)(ptr)) & (boundary - 1)))) 36e1051a39Sopenharmony_ci 37e1051a39Sopenharmony_ci/* Internal radix */ 38e1051a39Sopenharmony_ci# define DIGIT_SIZE (52) 39e1051a39Sopenharmony_ci/* 52-bit mask */ 40e1051a39Sopenharmony_ci# define DIGIT_MASK ((uint64_t)0xFFFFFFFFFFFFF) 41e1051a39Sopenharmony_ci 42e1051a39Sopenharmony_ci# define BITS2WORD8_SIZE(x) (((x) + 7) >> 3) 43e1051a39Sopenharmony_ci# define BITS2WORD64_SIZE(x) (((x) + 63) >> 6) 44e1051a39Sopenharmony_ci 45e1051a39Sopenharmony_cistatic ossl_inline uint64_t get_digit52(const uint8_t *in, int in_len); 46e1051a39Sopenharmony_cistatic ossl_inline void put_digit52(uint8_t *out, int out_len, uint64_t digit); 47e1051a39Sopenharmony_cistatic void to_words52(BN_ULONG *out, int out_len, const BN_ULONG *in, 48e1051a39Sopenharmony_ci int in_bitsize); 49e1051a39Sopenharmony_cistatic void from_words52(BN_ULONG *bn_out, int out_bitsize, const BN_ULONG *in); 50e1051a39Sopenharmony_cistatic ossl_inline void set_bit(BN_ULONG *a, int idx); 51e1051a39Sopenharmony_ci 52e1051a39Sopenharmony_ci/* Number of |digit_size|-bit digits in |bitsize|-bit value */ 53e1051a39Sopenharmony_cistatic ossl_inline int number_of_digits(int bitsize, int digit_size) 54e1051a39Sopenharmony_ci{ 55e1051a39Sopenharmony_ci return (bitsize + digit_size - 1) / digit_size; 56e1051a39Sopenharmony_ci} 57e1051a39Sopenharmony_ci 58e1051a39Sopenharmony_citypedef void (*AMM52)(BN_ULONG *res, const BN_ULONG *base, 59e1051a39Sopenharmony_ci const BN_ULONG *exp, const BN_ULONG *m, BN_ULONG k0); 60e1051a39Sopenharmony_citypedef void (*EXP52_x2)(BN_ULONG *res, const BN_ULONG *base, 61e1051a39Sopenharmony_ci const BN_ULONG *exp[2], const BN_ULONG *m, 62e1051a39Sopenharmony_ci const BN_ULONG *rr, const BN_ULONG k0[2]); 63e1051a39Sopenharmony_ci 64e1051a39Sopenharmony_ci/* 65e1051a39Sopenharmony_ci * For details of the methods declared below please refer to 66e1051a39Sopenharmony_ci * crypto/bn/asm/rsaz-avx512.pl 67e1051a39Sopenharmony_ci * 68e1051a39Sopenharmony_ci * Naming notes: 69e1051a39Sopenharmony_ci * amm = Almost Montgomery Multiplication 70e1051a39Sopenharmony_ci * ams = Almost Montgomery Squaring 71e1051a39Sopenharmony_ci * 52x20 - data represented as array of 20 digits in 52-bit radix 72e1051a39Sopenharmony_ci * _x1_/_x2_ - 1 or 2 independent inputs/outputs 73e1051a39Sopenharmony_ci * _256 suffix - uses 256-bit (AVX512VL) registers 74e1051a39Sopenharmony_ci */ 75e1051a39Sopenharmony_ci 76e1051a39Sopenharmony_ci/*AMM = Almost Montgomery Multiplication. */ 77e1051a39Sopenharmony_civoid ossl_rsaz_amm52x20_x1_256(BN_ULONG *res, const BN_ULONG *base, 78e1051a39Sopenharmony_ci const BN_ULONG *exp, const BN_ULONG *m, 79e1051a39Sopenharmony_ci BN_ULONG k0); 80e1051a39Sopenharmony_cistatic void RSAZ_exp52x20_x2_256(BN_ULONG *res, const BN_ULONG *base, 81e1051a39Sopenharmony_ci const BN_ULONG *exp[2], const BN_ULONG *m, 82e1051a39Sopenharmony_ci const BN_ULONG *rr, const BN_ULONG k0[2]); 83e1051a39Sopenharmony_civoid ossl_rsaz_amm52x20_x2_256(BN_ULONG *out, const BN_ULONG *a, 84e1051a39Sopenharmony_ci const BN_ULONG *b, const BN_ULONG *m, 85e1051a39Sopenharmony_ci const BN_ULONG k0[2]); 86e1051a39Sopenharmony_civoid ossl_extract_multiplier_2x20_win5(BN_ULONG *red_Y, 87e1051a39Sopenharmony_ci const BN_ULONG *red_table, 88e1051a39Sopenharmony_ci int red_table_idx, int tbl_idx); 89e1051a39Sopenharmony_ci 90e1051a39Sopenharmony_ci/* 91e1051a39Sopenharmony_ci * Dual Montgomery modular exponentiation using prime moduli of the 92e1051a39Sopenharmony_ci * same bit size, optimized with AVX512 ISA. 93e1051a39Sopenharmony_ci * 94e1051a39Sopenharmony_ci * Input and output parameters for each exponentiation are independent and 95e1051a39Sopenharmony_ci * denoted here by index |i|, i = 1..2. 96e1051a39Sopenharmony_ci * 97e1051a39Sopenharmony_ci * Input and output are all in regular 2^64 radix. 98e1051a39Sopenharmony_ci * 99e1051a39Sopenharmony_ci * Each moduli shall be |factor_size| bit size. 100e1051a39Sopenharmony_ci * 101e1051a39Sopenharmony_ci * NOTE: currently only 2x1024 case is supported. 102e1051a39Sopenharmony_ci * 103e1051a39Sopenharmony_ci * [out] res|i| - result of modular exponentiation: array of qword values 104e1051a39Sopenharmony_ci * in regular (2^64) radix. Size of array shall be enough 105e1051a39Sopenharmony_ci * to hold |factor_size| bits. 106e1051a39Sopenharmony_ci * [in] base|i| - base 107e1051a39Sopenharmony_ci * [in] exp|i| - exponent 108e1051a39Sopenharmony_ci * [in] m|i| - moduli 109e1051a39Sopenharmony_ci * [in] rr|i| - Montgomery parameter RR = R^2 mod m|i| 110e1051a39Sopenharmony_ci * [in] k0_|i| - Montgomery parameter k0 = -1/m|i| mod 2^64 111e1051a39Sopenharmony_ci * [in] factor_size - moduli bit size 112e1051a39Sopenharmony_ci * 113e1051a39Sopenharmony_ci * \return 0 in case of failure, 114e1051a39Sopenharmony_ci * 1 in case of success. 115e1051a39Sopenharmony_ci */ 116e1051a39Sopenharmony_ciint ossl_rsaz_mod_exp_avx512_x2(BN_ULONG *res1, 117e1051a39Sopenharmony_ci const BN_ULONG *base1, 118e1051a39Sopenharmony_ci const BN_ULONG *exp1, 119e1051a39Sopenharmony_ci const BN_ULONG *m1, 120e1051a39Sopenharmony_ci const BN_ULONG *rr1, 121e1051a39Sopenharmony_ci BN_ULONG k0_1, 122e1051a39Sopenharmony_ci BN_ULONG *res2, 123e1051a39Sopenharmony_ci const BN_ULONG *base2, 124e1051a39Sopenharmony_ci const BN_ULONG *exp2, 125e1051a39Sopenharmony_ci const BN_ULONG *m2, 126e1051a39Sopenharmony_ci const BN_ULONG *rr2, 127e1051a39Sopenharmony_ci BN_ULONG k0_2, 128e1051a39Sopenharmony_ci int factor_size) 129e1051a39Sopenharmony_ci{ 130e1051a39Sopenharmony_ci int ret = 0; 131e1051a39Sopenharmony_ci 132e1051a39Sopenharmony_ci /* 133e1051a39Sopenharmony_ci * Number of word-size (BN_ULONG) digits to store exponent in redundant 134e1051a39Sopenharmony_ci * representation. 135e1051a39Sopenharmony_ci */ 136e1051a39Sopenharmony_ci int exp_digits = number_of_digits(factor_size + 2, DIGIT_SIZE); 137e1051a39Sopenharmony_ci int coeff_pow = 4 * (DIGIT_SIZE * exp_digits - factor_size); 138e1051a39Sopenharmony_ci BN_ULONG *base1_red, *m1_red, *rr1_red; 139e1051a39Sopenharmony_ci BN_ULONG *base2_red, *m2_red, *rr2_red; 140e1051a39Sopenharmony_ci BN_ULONG *coeff_red; 141e1051a39Sopenharmony_ci BN_ULONG *storage = NULL; 142e1051a39Sopenharmony_ci BN_ULONG *storage_aligned = NULL; 143e1051a39Sopenharmony_ci BN_ULONG storage_len_bytes = 7 * exp_digits * sizeof(BN_ULONG); 144e1051a39Sopenharmony_ci 145e1051a39Sopenharmony_ci /* AMM = Almost Montgomery Multiplication */ 146e1051a39Sopenharmony_ci AMM52 amm = NULL; 147e1051a39Sopenharmony_ci /* Dual (2-exps in parallel) exponentiation */ 148e1051a39Sopenharmony_ci EXP52_x2 exp_x2 = NULL; 149e1051a39Sopenharmony_ci 150e1051a39Sopenharmony_ci const BN_ULONG *exp[2] = {0}; 151e1051a39Sopenharmony_ci BN_ULONG k0[2] = {0}; 152e1051a39Sopenharmony_ci 153e1051a39Sopenharmony_ci /* Only 1024-bit factor size is supported now */ 154e1051a39Sopenharmony_ci switch (factor_size) { 155e1051a39Sopenharmony_ci case 1024: 156e1051a39Sopenharmony_ci amm = ossl_rsaz_amm52x20_x1_256; 157e1051a39Sopenharmony_ci exp_x2 = RSAZ_exp52x20_x2_256; 158e1051a39Sopenharmony_ci break; 159e1051a39Sopenharmony_ci default: 160e1051a39Sopenharmony_ci goto err; 161e1051a39Sopenharmony_ci } 162e1051a39Sopenharmony_ci 163e1051a39Sopenharmony_ci storage = (BN_ULONG *)OPENSSL_malloc(storage_len_bytes + 64); 164e1051a39Sopenharmony_ci if (storage == NULL) 165e1051a39Sopenharmony_ci goto err; 166e1051a39Sopenharmony_ci storage_aligned = (BN_ULONG *)ALIGN_OF(storage, 64); 167e1051a39Sopenharmony_ci 168e1051a39Sopenharmony_ci /* Memory layout for red(undant) representations */ 169e1051a39Sopenharmony_ci base1_red = storage_aligned; 170e1051a39Sopenharmony_ci base2_red = storage_aligned + 1 * exp_digits; 171e1051a39Sopenharmony_ci m1_red = storage_aligned + 2 * exp_digits; 172e1051a39Sopenharmony_ci m2_red = storage_aligned + 3 * exp_digits; 173e1051a39Sopenharmony_ci rr1_red = storage_aligned + 4 * exp_digits; 174e1051a39Sopenharmony_ci rr2_red = storage_aligned + 5 * exp_digits; 175e1051a39Sopenharmony_ci coeff_red = storage_aligned + 6 * exp_digits; 176e1051a39Sopenharmony_ci 177e1051a39Sopenharmony_ci /* Convert base_i, m_i, rr_i, from regular to 52-bit radix */ 178e1051a39Sopenharmony_ci to_words52(base1_red, exp_digits, base1, factor_size); 179e1051a39Sopenharmony_ci to_words52(base2_red, exp_digits, base2, factor_size); 180e1051a39Sopenharmony_ci to_words52(m1_red, exp_digits, m1, factor_size); 181e1051a39Sopenharmony_ci to_words52(m2_red, exp_digits, m2, factor_size); 182e1051a39Sopenharmony_ci to_words52(rr1_red, exp_digits, rr1, factor_size); 183e1051a39Sopenharmony_ci to_words52(rr2_red, exp_digits, rr2, factor_size); 184e1051a39Sopenharmony_ci 185e1051a39Sopenharmony_ci /* 186e1051a39Sopenharmony_ci * Compute target domain Montgomery converters RR' for each modulus 187e1051a39Sopenharmony_ci * based on precomputed original domain's RR. 188e1051a39Sopenharmony_ci * 189e1051a39Sopenharmony_ci * RR -> RR' transformation steps: 190e1051a39Sopenharmony_ci * (1) coeff = 2^k 191e1051a39Sopenharmony_ci * (2) t = AMM(RR,RR) = RR^2 / R' mod m 192e1051a39Sopenharmony_ci * (3) RR' = AMM(t, coeff) = RR^2 * 2^k / R'^2 mod m 193e1051a39Sopenharmony_ci * where 194e1051a39Sopenharmony_ci * k = 4 * (52 * digits52 - modlen) 195e1051a39Sopenharmony_ci * R = 2^(64 * ceil(modlen/64)) mod m 196e1051a39Sopenharmony_ci * RR = R^2 mod M 197e1051a39Sopenharmony_ci * R' = 2^(52 * ceil(modlen/52)) mod m 198e1051a39Sopenharmony_ci * 199e1051a39Sopenharmony_ci * modlen = 1024: k = 64, RR = 2^2048 mod m, RR' = 2^2080 mod m 200e1051a39Sopenharmony_ci */ 201e1051a39Sopenharmony_ci memset(coeff_red, 0, exp_digits * sizeof(BN_ULONG)); 202e1051a39Sopenharmony_ci /* (1) in reduced domain representation */ 203e1051a39Sopenharmony_ci set_bit(coeff_red, 64 * (int)(coeff_pow / 52) + coeff_pow % 52); 204e1051a39Sopenharmony_ci 205e1051a39Sopenharmony_ci amm(rr1_red, rr1_red, rr1_red, m1_red, k0_1); /* (2) for m1 */ 206e1051a39Sopenharmony_ci amm(rr1_red, rr1_red, coeff_red, m1_red, k0_1); /* (3) for m1 */ 207e1051a39Sopenharmony_ci 208e1051a39Sopenharmony_ci amm(rr2_red, rr2_red, rr2_red, m2_red, k0_2); /* (2) for m2 */ 209e1051a39Sopenharmony_ci amm(rr2_red, rr2_red, coeff_red, m2_red, k0_2); /* (3) for m2 */ 210e1051a39Sopenharmony_ci 211e1051a39Sopenharmony_ci exp[0] = exp1; 212e1051a39Sopenharmony_ci exp[1] = exp2; 213e1051a39Sopenharmony_ci 214e1051a39Sopenharmony_ci k0[0] = k0_1; 215e1051a39Sopenharmony_ci k0[1] = k0_2; 216e1051a39Sopenharmony_ci 217e1051a39Sopenharmony_ci exp_x2(rr1_red, base1_red, exp, m1_red, rr1_red, k0); 218e1051a39Sopenharmony_ci 219e1051a39Sopenharmony_ci /* Convert rr_i back to regular radix */ 220e1051a39Sopenharmony_ci from_words52(res1, factor_size, rr1_red); 221e1051a39Sopenharmony_ci from_words52(res2, factor_size, rr2_red); 222e1051a39Sopenharmony_ci 223e1051a39Sopenharmony_ci /* bn_reduce_once_in_place expects number of BN_ULONG, not bit size */ 224e1051a39Sopenharmony_ci factor_size /= sizeof(BN_ULONG) * 8; 225e1051a39Sopenharmony_ci 226e1051a39Sopenharmony_ci bn_reduce_once_in_place(res1, /*carry=*/0, m1, storage, factor_size); 227e1051a39Sopenharmony_ci bn_reduce_once_in_place(res2, /*carry=*/0, m2, storage, factor_size); 228e1051a39Sopenharmony_ci 229e1051a39Sopenharmony_ci ret = 1; 230e1051a39Sopenharmony_cierr: 231e1051a39Sopenharmony_ci if (storage != NULL) { 232e1051a39Sopenharmony_ci OPENSSL_cleanse(storage, storage_len_bytes); 233e1051a39Sopenharmony_ci OPENSSL_free(storage); 234e1051a39Sopenharmony_ci } 235e1051a39Sopenharmony_ci return ret; 236e1051a39Sopenharmony_ci} 237e1051a39Sopenharmony_ci 238e1051a39Sopenharmony_ci/* 239e1051a39Sopenharmony_ci * Dual 1024-bit w-ary modular exponentiation using prime moduli of the same 240e1051a39Sopenharmony_ci * bit size using Almost Montgomery Multiplication, optimized with AVX512_IFMA 241e1051a39Sopenharmony_ci * ISA. 242e1051a39Sopenharmony_ci * 243e1051a39Sopenharmony_ci * The parameter w (window size) = 5. 244e1051a39Sopenharmony_ci * 245e1051a39Sopenharmony_ci * [out] res - result of modular exponentiation: 2x20 qword 246e1051a39Sopenharmony_ci * values in 2^52 radix. 247e1051a39Sopenharmony_ci * [in] base - base (2x20 qword values in 2^52 radix) 248e1051a39Sopenharmony_ci * [in] exp - array of 2 pointers to 16 qword values in 2^64 radix. 249e1051a39Sopenharmony_ci * Exponent is not converted to redundant representation. 250e1051a39Sopenharmony_ci * [in] m - moduli (2x20 qword values in 2^52 radix) 251e1051a39Sopenharmony_ci * [in] rr - Montgomery parameter for 2 moduli: RR = 2^2080 mod m. 252e1051a39Sopenharmony_ci * (2x20 qword values in 2^52 radix) 253e1051a39Sopenharmony_ci * [in] k0 - Montgomery parameter for 2 moduli: k0 = -1/m mod 2^64 254e1051a39Sopenharmony_ci * 255e1051a39Sopenharmony_ci * \return (void). 256e1051a39Sopenharmony_ci */ 257e1051a39Sopenharmony_cistatic void RSAZ_exp52x20_x2_256(BN_ULONG *out, /* [2][20] */ 258e1051a39Sopenharmony_ci const BN_ULONG *base, /* [2][20] */ 259e1051a39Sopenharmony_ci const BN_ULONG *exp[2], /* 2x16 */ 260e1051a39Sopenharmony_ci const BN_ULONG *m, /* [2][20] */ 261e1051a39Sopenharmony_ci const BN_ULONG *rr, /* [2][20] */ 262e1051a39Sopenharmony_ci const BN_ULONG k0[2]) 263e1051a39Sopenharmony_ci{ 264e1051a39Sopenharmony_ci# define BITSIZE_MODULUS (1024) 265e1051a39Sopenharmony_ci# define EXP_WIN_SIZE (5) 266e1051a39Sopenharmony_ci# define EXP_WIN_MASK ((1U << EXP_WIN_SIZE) - 1) 267e1051a39Sopenharmony_ci/* 268e1051a39Sopenharmony_ci * Number of digits (64-bit words) in redundant representation to handle 269e1051a39Sopenharmony_ci * modulus bits 270e1051a39Sopenharmony_ci */ 271e1051a39Sopenharmony_ci# define RED_DIGITS (20) 272e1051a39Sopenharmony_ci# define EXP_DIGITS (16) 273e1051a39Sopenharmony_ci# define DAMM ossl_rsaz_amm52x20_x2_256 274e1051a39Sopenharmony_ci/* 275e1051a39Sopenharmony_ci * Squaring is done using multiplication now. That can be a subject of 276e1051a39Sopenharmony_ci * optimization in future. 277e1051a39Sopenharmony_ci */ 278e1051a39Sopenharmony_ci# define DAMS(r,a,m,k0) \ 279e1051a39Sopenharmony_ci ossl_rsaz_amm52x20_x2_256((r),(a),(a),(m),(k0)) 280e1051a39Sopenharmony_ci 281e1051a39Sopenharmony_ci /* Allocate stack for red(undant) result Y and multiplier X */ 282e1051a39Sopenharmony_ci ALIGN64 BN_ULONG red_Y[2][RED_DIGITS]; 283e1051a39Sopenharmony_ci ALIGN64 BN_ULONG red_X[2][RED_DIGITS]; 284e1051a39Sopenharmony_ci 285e1051a39Sopenharmony_ci /* Allocate expanded exponent */ 286e1051a39Sopenharmony_ci ALIGN64 BN_ULONG expz[2][EXP_DIGITS + 1]; 287e1051a39Sopenharmony_ci 288e1051a39Sopenharmony_ci /* Pre-computed table of base powers */ 289e1051a39Sopenharmony_ci ALIGN64 BN_ULONG red_table[1U << EXP_WIN_SIZE][2][RED_DIGITS]; 290e1051a39Sopenharmony_ci 291e1051a39Sopenharmony_ci int idx; 292e1051a39Sopenharmony_ci 293e1051a39Sopenharmony_ci memset(red_Y, 0, sizeof(red_Y)); 294e1051a39Sopenharmony_ci memset(red_table, 0, sizeof(red_table)); 295e1051a39Sopenharmony_ci memset(red_X, 0, sizeof(red_X)); 296e1051a39Sopenharmony_ci 297e1051a39Sopenharmony_ci /* 298e1051a39Sopenharmony_ci * Compute table of powers base^i, i = 0, ..., (2^EXP_WIN_SIZE) - 1 299e1051a39Sopenharmony_ci * table[0] = mont(x^0) = mont(1) 300e1051a39Sopenharmony_ci * table[1] = mont(x^1) = mont(x) 301e1051a39Sopenharmony_ci */ 302e1051a39Sopenharmony_ci red_X[0][0] = 1; 303e1051a39Sopenharmony_ci red_X[1][0] = 1; 304e1051a39Sopenharmony_ci DAMM(red_table[0][0], (const BN_ULONG*)red_X, rr, m, k0); 305e1051a39Sopenharmony_ci DAMM(red_table[1][0], base, rr, m, k0); 306e1051a39Sopenharmony_ci 307e1051a39Sopenharmony_ci for (idx = 1; idx < (int)((1U << EXP_WIN_SIZE) / 2); idx++) { 308e1051a39Sopenharmony_ci DAMS(red_table[2 * idx + 0][0], red_table[1 * idx][0], m, k0); 309e1051a39Sopenharmony_ci DAMM(red_table[2 * idx + 1][0], red_table[2 * idx][0], red_table[1][0], m, k0); 310e1051a39Sopenharmony_ci } 311e1051a39Sopenharmony_ci 312e1051a39Sopenharmony_ci /* Copy and expand exponents */ 313e1051a39Sopenharmony_ci memcpy(expz[0], exp[0], EXP_DIGITS * sizeof(BN_ULONG)); 314e1051a39Sopenharmony_ci expz[0][EXP_DIGITS] = 0; 315e1051a39Sopenharmony_ci memcpy(expz[1], exp[1], EXP_DIGITS * sizeof(BN_ULONG)); 316e1051a39Sopenharmony_ci expz[1][EXP_DIGITS] = 0; 317e1051a39Sopenharmony_ci 318e1051a39Sopenharmony_ci /* Exponentiation */ 319e1051a39Sopenharmony_ci { 320e1051a39Sopenharmony_ci const int rem = BITSIZE_MODULUS % EXP_WIN_SIZE; 321e1051a39Sopenharmony_ci BN_ULONG table_idx_mask = EXP_WIN_MASK; 322e1051a39Sopenharmony_ci 323e1051a39Sopenharmony_ci int exp_bit_no = BITSIZE_MODULUS - rem; 324e1051a39Sopenharmony_ci int exp_chunk_no = exp_bit_no / 64; 325e1051a39Sopenharmony_ci int exp_chunk_shift = exp_bit_no % 64; 326e1051a39Sopenharmony_ci 327e1051a39Sopenharmony_ci BN_ULONG red_table_idx_0, red_table_idx_1; 328e1051a39Sopenharmony_ci 329e1051a39Sopenharmony_ci /* 330e1051a39Sopenharmony_ci * If rem == 0, then 331e1051a39Sopenharmony_ci * exp_bit_no = modulus_bitsize - exp_win_size 332e1051a39Sopenharmony_ci * However, this isn't possible because rem is { 1024, 1536, 2048 } % 5 333e1051a39Sopenharmony_ci * which is { 4, 1, 3 } respectively. 334e1051a39Sopenharmony_ci * 335e1051a39Sopenharmony_ci * If this assertion ever fails the fix above is easy. 336e1051a39Sopenharmony_ci */ 337e1051a39Sopenharmony_ci OPENSSL_assert(rem != 0); 338e1051a39Sopenharmony_ci 339e1051a39Sopenharmony_ci /* Process 1-st exp window - just init result */ 340e1051a39Sopenharmony_ci red_table_idx_0 = expz[0][exp_chunk_no]; 341e1051a39Sopenharmony_ci red_table_idx_1 = expz[1][exp_chunk_no]; 342e1051a39Sopenharmony_ci /* 343e1051a39Sopenharmony_ci * The function operates with fixed moduli sizes divisible by 64, 344e1051a39Sopenharmony_ci * thus table index here is always in supported range [0, EXP_WIN_SIZE). 345e1051a39Sopenharmony_ci */ 346e1051a39Sopenharmony_ci red_table_idx_0 >>= exp_chunk_shift; 347e1051a39Sopenharmony_ci red_table_idx_1 >>= exp_chunk_shift; 348e1051a39Sopenharmony_ci 349e1051a39Sopenharmony_ci ossl_extract_multiplier_2x20_win5(red_Y[0], (const BN_ULONG*)red_table, 350e1051a39Sopenharmony_ci (int)red_table_idx_0, 0); 351e1051a39Sopenharmony_ci ossl_extract_multiplier_2x20_win5(red_Y[1], (const BN_ULONG*)red_table, 352e1051a39Sopenharmony_ci (int)red_table_idx_1, 1); 353e1051a39Sopenharmony_ci 354e1051a39Sopenharmony_ci /* Process other exp windows */ 355e1051a39Sopenharmony_ci for (exp_bit_no -= EXP_WIN_SIZE; exp_bit_no >= 0; exp_bit_no -= EXP_WIN_SIZE) { 356e1051a39Sopenharmony_ci /* Extract pre-computed multiplier from the table */ 357e1051a39Sopenharmony_ci { 358e1051a39Sopenharmony_ci BN_ULONG T; 359e1051a39Sopenharmony_ci 360e1051a39Sopenharmony_ci exp_chunk_no = exp_bit_no / 64; 361e1051a39Sopenharmony_ci exp_chunk_shift = exp_bit_no % 64; 362e1051a39Sopenharmony_ci { 363e1051a39Sopenharmony_ci red_table_idx_0 = expz[0][exp_chunk_no]; 364e1051a39Sopenharmony_ci T = expz[0][exp_chunk_no + 1]; 365e1051a39Sopenharmony_ci 366e1051a39Sopenharmony_ci red_table_idx_0 >>= exp_chunk_shift; 367e1051a39Sopenharmony_ci /* 368e1051a39Sopenharmony_ci * Get additional bits from then next quadword 369e1051a39Sopenharmony_ci * when 64-bit boundaries are crossed. 370e1051a39Sopenharmony_ci */ 371e1051a39Sopenharmony_ci if (exp_chunk_shift > 64 - EXP_WIN_SIZE) { 372e1051a39Sopenharmony_ci T <<= (64 - exp_chunk_shift); 373e1051a39Sopenharmony_ci red_table_idx_0 ^= T; 374e1051a39Sopenharmony_ci } 375e1051a39Sopenharmony_ci red_table_idx_0 &= table_idx_mask; 376e1051a39Sopenharmony_ci 377e1051a39Sopenharmony_ci ossl_extract_multiplier_2x20_win5(red_X[0], 378e1051a39Sopenharmony_ci (const BN_ULONG*)red_table, 379e1051a39Sopenharmony_ci (int)red_table_idx_0, 0); 380e1051a39Sopenharmony_ci } 381e1051a39Sopenharmony_ci { 382e1051a39Sopenharmony_ci red_table_idx_1 = expz[1][exp_chunk_no]; 383e1051a39Sopenharmony_ci T = expz[1][exp_chunk_no + 1]; 384e1051a39Sopenharmony_ci 385e1051a39Sopenharmony_ci red_table_idx_1 >>= exp_chunk_shift; 386e1051a39Sopenharmony_ci /* 387e1051a39Sopenharmony_ci * Get additional bits from then next quadword 388e1051a39Sopenharmony_ci * when 64-bit boundaries are crossed. 389e1051a39Sopenharmony_ci */ 390e1051a39Sopenharmony_ci if (exp_chunk_shift > 64 - EXP_WIN_SIZE) { 391e1051a39Sopenharmony_ci T <<= (64 - exp_chunk_shift); 392e1051a39Sopenharmony_ci red_table_idx_1 ^= T; 393e1051a39Sopenharmony_ci } 394e1051a39Sopenharmony_ci red_table_idx_1 &= table_idx_mask; 395e1051a39Sopenharmony_ci 396e1051a39Sopenharmony_ci ossl_extract_multiplier_2x20_win5(red_X[1], 397e1051a39Sopenharmony_ci (const BN_ULONG*)red_table, 398e1051a39Sopenharmony_ci (int)red_table_idx_1, 1); 399e1051a39Sopenharmony_ci } 400e1051a39Sopenharmony_ci } 401e1051a39Sopenharmony_ci 402e1051a39Sopenharmony_ci /* Series of squaring */ 403e1051a39Sopenharmony_ci DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0); 404e1051a39Sopenharmony_ci DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0); 405e1051a39Sopenharmony_ci DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0); 406e1051a39Sopenharmony_ci DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0); 407e1051a39Sopenharmony_ci DAMS((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, m, k0); 408e1051a39Sopenharmony_ci 409e1051a39Sopenharmony_ci DAMM((BN_ULONG*)red_Y, (const BN_ULONG*)red_Y, (const BN_ULONG*)red_X, m, k0); 410e1051a39Sopenharmony_ci } 411e1051a39Sopenharmony_ci } 412e1051a39Sopenharmony_ci 413e1051a39Sopenharmony_ci /* 414e1051a39Sopenharmony_ci * 415e1051a39Sopenharmony_ci * NB: After the last AMM of exponentiation in Montgomery domain, the result 416e1051a39Sopenharmony_ci * may be 1025-bit, but the conversion out of Montgomery domain performs an 417e1051a39Sopenharmony_ci * AMM(x,1) which guarantees that the final result is less than |m|, so no 418e1051a39Sopenharmony_ci * conditional subtraction is needed here. See "Efficient Software 419e1051a39Sopenharmony_ci * Implementations of Modular Exponentiation" (by Shay Gueron) paper for details. 420e1051a39Sopenharmony_ci */ 421e1051a39Sopenharmony_ci 422e1051a39Sopenharmony_ci /* Convert result back in regular 2^52 domain */ 423e1051a39Sopenharmony_ci memset(red_X, 0, sizeof(red_X)); 424e1051a39Sopenharmony_ci red_X[0][0] = 1; 425e1051a39Sopenharmony_ci red_X[1][0] = 1; 426e1051a39Sopenharmony_ci DAMM(out, (const BN_ULONG*)red_Y, (const BN_ULONG*)red_X, m, k0); 427e1051a39Sopenharmony_ci 428e1051a39Sopenharmony_ci /* Clear exponents */ 429e1051a39Sopenharmony_ci OPENSSL_cleanse(expz, sizeof(expz)); 430e1051a39Sopenharmony_ci OPENSSL_cleanse(red_Y, sizeof(red_Y)); 431e1051a39Sopenharmony_ci 432e1051a39Sopenharmony_ci# undef DAMS 433e1051a39Sopenharmony_ci# undef DAMM 434e1051a39Sopenharmony_ci# undef EXP_DIGITS 435e1051a39Sopenharmony_ci# undef RED_DIGITS 436e1051a39Sopenharmony_ci# undef EXP_WIN_MASK 437e1051a39Sopenharmony_ci# undef EXP_WIN_SIZE 438e1051a39Sopenharmony_ci# undef BITSIZE_MODULUS 439e1051a39Sopenharmony_ci} 440e1051a39Sopenharmony_ci 441e1051a39Sopenharmony_cistatic ossl_inline uint64_t get_digit52(const uint8_t *in, int in_len) 442e1051a39Sopenharmony_ci{ 443e1051a39Sopenharmony_ci uint64_t digit = 0; 444e1051a39Sopenharmony_ci 445e1051a39Sopenharmony_ci assert(in != NULL); 446e1051a39Sopenharmony_ci 447e1051a39Sopenharmony_ci for (; in_len > 0; in_len--) { 448e1051a39Sopenharmony_ci digit <<= 8; 449e1051a39Sopenharmony_ci digit += (uint64_t)(in[in_len - 1]); 450e1051a39Sopenharmony_ci } 451e1051a39Sopenharmony_ci return digit; 452e1051a39Sopenharmony_ci} 453e1051a39Sopenharmony_ci 454e1051a39Sopenharmony_ci/* 455e1051a39Sopenharmony_ci * Convert array of words in regular (base=2^64) representation to array of 456e1051a39Sopenharmony_ci * words in redundant (base=2^52) one. 457e1051a39Sopenharmony_ci */ 458e1051a39Sopenharmony_cistatic void to_words52(BN_ULONG *out, int out_len, 459e1051a39Sopenharmony_ci const BN_ULONG *in, int in_bitsize) 460e1051a39Sopenharmony_ci{ 461e1051a39Sopenharmony_ci uint8_t *in_str = NULL; 462e1051a39Sopenharmony_ci 463e1051a39Sopenharmony_ci assert(out != NULL); 464e1051a39Sopenharmony_ci assert(in != NULL); 465e1051a39Sopenharmony_ci /* Check destination buffer capacity */ 466e1051a39Sopenharmony_ci assert(out_len >= number_of_digits(in_bitsize, DIGIT_SIZE)); 467e1051a39Sopenharmony_ci 468e1051a39Sopenharmony_ci in_str = (uint8_t *)in; 469e1051a39Sopenharmony_ci 470e1051a39Sopenharmony_ci for (; in_bitsize >= (2 * DIGIT_SIZE); in_bitsize -= (2 * DIGIT_SIZE), out += 2) { 471e1051a39Sopenharmony_ci uint64_t digit; 472e1051a39Sopenharmony_ci 473e1051a39Sopenharmony_ci memcpy(&digit, in_str, sizeof(digit)); 474e1051a39Sopenharmony_ci out[0] = digit & DIGIT_MASK; 475e1051a39Sopenharmony_ci in_str += 6; 476e1051a39Sopenharmony_ci memcpy(&digit, in_str, sizeof(digit)); 477e1051a39Sopenharmony_ci out[1] = (digit >> 4) & DIGIT_MASK; 478e1051a39Sopenharmony_ci in_str += 7; 479e1051a39Sopenharmony_ci out_len -= 2; 480e1051a39Sopenharmony_ci } 481e1051a39Sopenharmony_ci 482e1051a39Sopenharmony_ci if (in_bitsize > DIGIT_SIZE) { 483e1051a39Sopenharmony_ci uint64_t digit = get_digit52(in_str, 7); 484e1051a39Sopenharmony_ci 485e1051a39Sopenharmony_ci out[0] = digit & DIGIT_MASK; 486e1051a39Sopenharmony_ci in_str += 6; 487e1051a39Sopenharmony_ci in_bitsize -= DIGIT_SIZE; 488e1051a39Sopenharmony_ci digit = get_digit52(in_str, BITS2WORD8_SIZE(in_bitsize)); 489e1051a39Sopenharmony_ci out[1] = digit >> 4; 490e1051a39Sopenharmony_ci out += 2; 491e1051a39Sopenharmony_ci out_len -= 2; 492e1051a39Sopenharmony_ci } else if (in_bitsize > 0) { 493e1051a39Sopenharmony_ci out[0] = get_digit52(in_str, BITS2WORD8_SIZE(in_bitsize)); 494e1051a39Sopenharmony_ci out++; 495e1051a39Sopenharmony_ci out_len--; 496e1051a39Sopenharmony_ci } 497e1051a39Sopenharmony_ci 498e1051a39Sopenharmony_ci while (out_len > 0) { 499e1051a39Sopenharmony_ci *out = 0; 500e1051a39Sopenharmony_ci out_len--; 501e1051a39Sopenharmony_ci out++; 502e1051a39Sopenharmony_ci } 503e1051a39Sopenharmony_ci} 504e1051a39Sopenharmony_ci 505e1051a39Sopenharmony_cistatic ossl_inline void put_digit52(uint8_t *pStr, int strLen, uint64_t digit) 506e1051a39Sopenharmony_ci{ 507e1051a39Sopenharmony_ci assert(pStr != NULL); 508e1051a39Sopenharmony_ci 509e1051a39Sopenharmony_ci for (; strLen > 0; strLen--) { 510e1051a39Sopenharmony_ci *pStr++ = (uint8_t)(digit & 0xFF); 511e1051a39Sopenharmony_ci digit >>= 8; 512e1051a39Sopenharmony_ci } 513e1051a39Sopenharmony_ci} 514e1051a39Sopenharmony_ci 515e1051a39Sopenharmony_ci/* 516e1051a39Sopenharmony_ci * Convert array of words in redundant (base=2^52) representation to array of 517e1051a39Sopenharmony_ci * words in regular (base=2^64) one. 518e1051a39Sopenharmony_ci */ 519e1051a39Sopenharmony_cistatic void from_words52(BN_ULONG *out, int out_bitsize, const BN_ULONG *in) 520e1051a39Sopenharmony_ci{ 521e1051a39Sopenharmony_ci int i; 522e1051a39Sopenharmony_ci int out_len = BITS2WORD64_SIZE(out_bitsize); 523e1051a39Sopenharmony_ci 524e1051a39Sopenharmony_ci assert(out != NULL); 525e1051a39Sopenharmony_ci assert(in != NULL); 526e1051a39Sopenharmony_ci 527e1051a39Sopenharmony_ci for (i = 0; i < out_len; i++) 528e1051a39Sopenharmony_ci out[i] = 0; 529e1051a39Sopenharmony_ci 530e1051a39Sopenharmony_ci { 531e1051a39Sopenharmony_ci uint8_t *out_str = (uint8_t *)out; 532e1051a39Sopenharmony_ci 533e1051a39Sopenharmony_ci for (; out_bitsize >= (2 * DIGIT_SIZE); 534e1051a39Sopenharmony_ci out_bitsize -= (2 * DIGIT_SIZE), in += 2) { 535e1051a39Sopenharmony_ci uint64_t digit; 536e1051a39Sopenharmony_ci 537e1051a39Sopenharmony_ci digit = in[0]; 538e1051a39Sopenharmony_ci memcpy(out_str, &digit, sizeof(digit)); 539e1051a39Sopenharmony_ci out_str += 6; 540e1051a39Sopenharmony_ci digit = digit >> 48 | in[1] << 4; 541e1051a39Sopenharmony_ci memcpy(out_str, &digit, sizeof(digit)); 542e1051a39Sopenharmony_ci out_str += 7; 543e1051a39Sopenharmony_ci } 544e1051a39Sopenharmony_ci 545e1051a39Sopenharmony_ci if (out_bitsize > DIGIT_SIZE) { 546e1051a39Sopenharmony_ci put_digit52(out_str, 7, in[0]); 547e1051a39Sopenharmony_ci out_str += 6; 548e1051a39Sopenharmony_ci out_bitsize -= DIGIT_SIZE; 549e1051a39Sopenharmony_ci put_digit52(out_str, BITS2WORD8_SIZE(out_bitsize), 550e1051a39Sopenharmony_ci (in[1] << 4 | in[0] >> 48)); 551e1051a39Sopenharmony_ci } else if (out_bitsize) { 552e1051a39Sopenharmony_ci put_digit52(out_str, BITS2WORD8_SIZE(out_bitsize), in[0]); 553e1051a39Sopenharmony_ci } 554e1051a39Sopenharmony_ci } 555e1051a39Sopenharmony_ci} 556e1051a39Sopenharmony_ci 557e1051a39Sopenharmony_ci/* 558e1051a39Sopenharmony_ci * Set bit at index |idx| in the words array |a|. 559e1051a39Sopenharmony_ci * It does not do any boundaries checks, make sure the index is valid before 560e1051a39Sopenharmony_ci * calling the function. 561e1051a39Sopenharmony_ci */ 562e1051a39Sopenharmony_cistatic ossl_inline void set_bit(BN_ULONG *a, int idx) 563e1051a39Sopenharmony_ci{ 564e1051a39Sopenharmony_ci assert(a != NULL); 565e1051a39Sopenharmony_ci 566e1051a39Sopenharmony_ci { 567e1051a39Sopenharmony_ci int i, j; 568e1051a39Sopenharmony_ci 569e1051a39Sopenharmony_ci i = idx / BN_BITS2; 570e1051a39Sopenharmony_ci j = idx % BN_BITS2; 571e1051a39Sopenharmony_ci a[i] |= (((BN_ULONG)1) << j); 572e1051a39Sopenharmony_ci } 573e1051a39Sopenharmony_ci} 574e1051a39Sopenharmony_ci 575e1051a39Sopenharmony_ci#endif 576