1e1051a39Sopenharmony_ci/* 2e1051a39Sopenharmony_ci * Copyright 2013-2022 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci * Copyright (c) 2012, Intel Corporation. All Rights Reserved. 4e1051a39Sopenharmony_ci * 5e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 6e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 7e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 8e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 9e1051a39Sopenharmony_ci * 10e1051a39Sopenharmony_ci * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1) 11e1051a39Sopenharmony_ci * (1) Intel Corporation, Israel Development Center, Haifa, Israel 12e1051a39Sopenharmony_ci * (2) University of Haifa, Israel 13e1051a39Sopenharmony_ci */ 14e1051a39Sopenharmony_ci 15e1051a39Sopenharmony_ci#include <openssl/opensslconf.h> 16e1051a39Sopenharmony_ci#include "rsaz_exp.h" 17e1051a39Sopenharmony_ci 18e1051a39Sopenharmony_ci#ifndef RSAZ_ENABLED 19e1051a39Sopenharmony_ciNON_EMPTY_TRANSLATION_UNIT 20e1051a39Sopenharmony_ci#else 21e1051a39Sopenharmony_ci 22e1051a39Sopenharmony_ci/* 23e1051a39Sopenharmony_ci * See crypto/bn/asm/rsaz-avx2.pl for further details. 24e1051a39Sopenharmony_ci */ 25e1051a39Sopenharmony_civoid rsaz_1024_norm2red_avx2(void *red, const void *norm); 26e1051a39Sopenharmony_civoid rsaz_1024_mul_avx2(void *ret, const void *a, const void *b, 27e1051a39Sopenharmony_ci const void *n, BN_ULONG k); 28e1051a39Sopenharmony_civoid rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k, 29e1051a39Sopenharmony_ci int cnt); 30e1051a39Sopenharmony_civoid rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i); 31e1051a39Sopenharmony_civoid rsaz_1024_gather5_avx2(void *val, const void *tbl, int i); 32e1051a39Sopenharmony_civoid rsaz_1024_red2norm_avx2(void *norm, const void *red); 33e1051a39Sopenharmony_ci 34e1051a39Sopenharmony_ci#if defined(__GNUC__) 35e1051a39Sopenharmony_ci# define ALIGN64 __attribute__((aligned(64))) 36e1051a39Sopenharmony_ci#elif defined(_MSC_VER) 37e1051a39Sopenharmony_ci# define ALIGN64 __declspec(align(64)) 38e1051a39Sopenharmony_ci#elif defined(__SUNPRO_C) 39e1051a39Sopenharmony_ci# define ALIGN64 40e1051a39Sopenharmony_ci# pragma align 64(one,two80) 41e1051a39Sopenharmony_ci#else 42e1051a39Sopenharmony_ci/* not fatal, might hurt performance a little */ 43e1051a39Sopenharmony_ci# define ALIGN64 44e1051a39Sopenharmony_ci#endif 45e1051a39Sopenharmony_ci 46e1051a39Sopenharmony_ciALIGN64 static const BN_ULONG one[40] = { 47e1051a39Sopenharmony_ci 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48e1051a39Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 49e1051a39Sopenharmony_ci}; 50e1051a39Sopenharmony_ci 51e1051a39Sopenharmony_ciALIGN64 static const BN_ULONG two80[40] = { 52e1051a39Sopenharmony_ci 0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 53e1051a39Sopenharmony_ci 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 54e1051a39Sopenharmony_ci}; 55e1051a39Sopenharmony_ci 56e1051a39Sopenharmony_civoid RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16], 57e1051a39Sopenharmony_ci const BN_ULONG base_norm[16], 58e1051a39Sopenharmony_ci const BN_ULONG exponent[16], 59e1051a39Sopenharmony_ci const BN_ULONG m_norm[16], const BN_ULONG RR[16], 60e1051a39Sopenharmony_ci BN_ULONG k0) 61e1051a39Sopenharmony_ci{ 62e1051a39Sopenharmony_ci unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */ 63e1051a39Sopenharmony_ci unsigned char *p_str = storage + (64 - ((size_t)storage % 64)); 64e1051a39Sopenharmony_ci unsigned char *a_inv, *m, *result; 65e1051a39Sopenharmony_ci unsigned char *table_s = p_str + 320 * 3; 66e1051a39Sopenharmony_ci unsigned char *R2 = table_s; /* borrow */ 67e1051a39Sopenharmony_ci int index; 68e1051a39Sopenharmony_ci int wvalue; 69e1051a39Sopenharmony_ci BN_ULONG tmp[16]; 70e1051a39Sopenharmony_ci 71e1051a39Sopenharmony_ci if ((((size_t)p_str & 4095) + 320) >> 12) { 72e1051a39Sopenharmony_ci result = p_str; 73e1051a39Sopenharmony_ci a_inv = p_str + 320; 74e1051a39Sopenharmony_ci m = p_str + 320 * 2; /* should not cross page */ 75e1051a39Sopenharmony_ci } else { 76e1051a39Sopenharmony_ci m = p_str; /* should not cross page */ 77e1051a39Sopenharmony_ci result = p_str + 320; 78e1051a39Sopenharmony_ci a_inv = p_str + 320 * 2; 79e1051a39Sopenharmony_ci } 80e1051a39Sopenharmony_ci 81e1051a39Sopenharmony_ci rsaz_1024_norm2red_avx2(m, m_norm); 82e1051a39Sopenharmony_ci rsaz_1024_norm2red_avx2(a_inv, base_norm); 83e1051a39Sopenharmony_ci rsaz_1024_norm2red_avx2(R2, RR); 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(R2, R2, R2, m, k0); 86e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(R2, R2, two80, m, k0); 87e1051a39Sopenharmony_ci 88e1051a39Sopenharmony_ci /* table[0] = 1 */ 89e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, R2, one, m, k0); 90e1051a39Sopenharmony_ci /* table[1] = a_inv^1 */ 91e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0); 92e1051a39Sopenharmony_ci 93e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 0); 94e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, a_inv, 1); 95e1051a39Sopenharmony_ci 96e1051a39Sopenharmony_ci /* table[2] = a_inv^2 */ 97e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1); 98e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 2); 99e1051a39Sopenharmony_ci#if 0 100e1051a39Sopenharmony_ci /* this is almost 2x smaller and less than 1% slower */ 101e1051a39Sopenharmony_ci for (index = 3; index < 32; index++) { 102e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 103e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, index); 104e1051a39Sopenharmony_ci } 105e1051a39Sopenharmony_ci#else 106e1051a39Sopenharmony_ci /* table[4] = a_inv^4 */ 107e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 108e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 4); 109e1051a39Sopenharmony_ci /* table[8] = a_inv^8 */ 110e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 111e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 8); 112e1051a39Sopenharmony_ci /* table[16] = a_inv^16 */ 113e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 114e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 16); 115e1051a39Sopenharmony_ci /* table[17] = a_inv^17 */ 116e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 117e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 17); 118e1051a39Sopenharmony_ci 119e1051a39Sopenharmony_ci /* table[3] */ 120e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(result, table_s, 2); 121e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 122e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 3); 123e1051a39Sopenharmony_ci /* table[6] */ 124e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 125e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 6); 126e1051a39Sopenharmony_ci /* table[12] */ 127e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 128e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 12); 129e1051a39Sopenharmony_ci /* table[24] */ 130e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 131e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 24); 132e1051a39Sopenharmony_ci /* table[25] */ 133e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 134e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 25); 135e1051a39Sopenharmony_ci 136e1051a39Sopenharmony_ci /* table[5] */ 137e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(result, table_s, 4); 138e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 139e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 5); 140e1051a39Sopenharmony_ci /* table[10] */ 141e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 142e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 10); 143e1051a39Sopenharmony_ci /* table[20] */ 144e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 145e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 20); 146e1051a39Sopenharmony_ci /* table[21] */ 147e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 148e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 21); 149e1051a39Sopenharmony_ci 150e1051a39Sopenharmony_ci /* table[7] */ 151e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(result, table_s, 6); 152e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 153e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 7); 154e1051a39Sopenharmony_ci /* table[14] */ 155e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 156e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 14); 157e1051a39Sopenharmony_ci /* table[28] */ 158e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 159e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 28); 160e1051a39Sopenharmony_ci /* table[29] */ 161e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 162e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 29); 163e1051a39Sopenharmony_ci 164e1051a39Sopenharmony_ci /* table[9] */ 165e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(result, table_s, 8); 166e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 167e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 9); 168e1051a39Sopenharmony_ci /* table[18] */ 169e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 170e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 18); 171e1051a39Sopenharmony_ci /* table[19] */ 172e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 173e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 19); 174e1051a39Sopenharmony_ci 175e1051a39Sopenharmony_ci /* table[11] */ 176e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(result, table_s, 10); 177e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 178e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 11); 179e1051a39Sopenharmony_ci /* table[22] */ 180e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 181e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 22); 182e1051a39Sopenharmony_ci /* table[23] */ 183e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 184e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 23); 185e1051a39Sopenharmony_ci 186e1051a39Sopenharmony_ci /* table[13] */ 187e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(result, table_s, 12); 188e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 189e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 13); 190e1051a39Sopenharmony_ci /* table[26] */ 191e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 192e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 26); 193e1051a39Sopenharmony_ci /* table[27] */ 194e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 195e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 27); 196e1051a39Sopenharmony_ci 197e1051a39Sopenharmony_ci /* table[15] */ 198e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(result, table_s, 14); 199e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 200e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 15); 201e1051a39Sopenharmony_ci /* table[30] */ 202e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 1); 203e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 30); 204e1051a39Sopenharmony_ci /* table[31] */ 205e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 206e1051a39Sopenharmony_ci rsaz_1024_scatter5_avx2(table_s, result, 31); 207e1051a39Sopenharmony_ci#endif 208e1051a39Sopenharmony_ci 209e1051a39Sopenharmony_ci /* load first window */ 210e1051a39Sopenharmony_ci p_str = (unsigned char *)exponent; 211e1051a39Sopenharmony_ci wvalue = p_str[127] >> 3; 212e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(result, table_s, wvalue); 213e1051a39Sopenharmony_ci 214e1051a39Sopenharmony_ci index = 1014; 215e1051a39Sopenharmony_ci 216e1051a39Sopenharmony_ci while (index > -1) { /* loop for the remaining 127 windows */ 217e1051a39Sopenharmony_ci 218e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 5); 219e1051a39Sopenharmony_ci 220e1051a39Sopenharmony_ci wvalue = (p_str[(index / 8) + 1] << 8) | p_str[index / 8]; 221e1051a39Sopenharmony_ci wvalue = (wvalue >> (index % 8)) & 31; 222e1051a39Sopenharmony_ci index -= 5; 223e1051a39Sopenharmony_ci 224e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ 225e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 226e1051a39Sopenharmony_ci } 227e1051a39Sopenharmony_ci 228e1051a39Sopenharmony_ci /* square four times */ 229e1051a39Sopenharmony_ci rsaz_1024_sqr_avx2(result, result, m, k0, 4); 230e1051a39Sopenharmony_ci 231e1051a39Sopenharmony_ci wvalue = p_str[0] & 15; 232e1051a39Sopenharmony_ci 233e1051a39Sopenharmony_ci rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */ 234e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, a_inv, m, k0); 235e1051a39Sopenharmony_ci 236e1051a39Sopenharmony_ci /* from Montgomery */ 237e1051a39Sopenharmony_ci rsaz_1024_mul_avx2(result, result, one, m, k0); 238e1051a39Sopenharmony_ci 239e1051a39Sopenharmony_ci rsaz_1024_red2norm_avx2(result_norm, result); 240e1051a39Sopenharmony_ci 241e1051a39Sopenharmony_ci bn_reduce_once_in_place(result_norm, /*carry=*/0, m_norm, tmp, 16); 242e1051a39Sopenharmony_ci 243e1051a39Sopenharmony_ci OPENSSL_cleanse(storage, sizeof(storage)); 244e1051a39Sopenharmony_ci OPENSSL_cleanse(tmp, sizeof(tmp)); 245e1051a39Sopenharmony_ci} 246e1051a39Sopenharmony_ci 247e1051a39Sopenharmony_ci/* 248e1051a39Sopenharmony_ci * See crypto/bn/rsaz-x86_64.pl for further details. 249e1051a39Sopenharmony_ci */ 250e1051a39Sopenharmony_civoid rsaz_512_mul(void *ret, const void *a, const void *b, const void *n, 251e1051a39Sopenharmony_ci BN_ULONG k); 252e1051a39Sopenharmony_civoid rsaz_512_mul_scatter4(void *ret, const void *a, const void *n, 253e1051a39Sopenharmony_ci BN_ULONG k, const void *tbl, unsigned int power); 254e1051a39Sopenharmony_civoid rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl, 255e1051a39Sopenharmony_ci const void *n, BN_ULONG k, unsigned int power); 256e1051a39Sopenharmony_civoid rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k); 257e1051a39Sopenharmony_civoid rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k, 258e1051a39Sopenharmony_ci int cnt); 259e1051a39Sopenharmony_civoid rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power); 260e1051a39Sopenharmony_civoid rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power); 261e1051a39Sopenharmony_ci 262e1051a39Sopenharmony_civoid RSAZ_512_mod_exp(BN_ULONG result[8], 263e1051a39Sopenharmony_ci const BN_ULONG base[8], const BN_ULONG exponent[8], 264e1051a39Sopenharmony_ci const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8]) 265e1051a39Sopenharmony_ci{ 266e1051a39Sopenharmony_ci unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */ 267e1051a39Sopenharmony_ci unsigned char *table = storage + (64 - ((size_t)storage % 64)); 268e1051a39Sopenharmony_ci BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8); 269e1051a39Sopenharmony_ci BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8); 270e1051a39Sopenharmony_ci unsigned char *p_str = (unsigned char *)exponent; 271e1051a39Sopenharmony_ci int index; 272e1051a39Sopenharmony_ci unsigned int wvalue; 273e1051a39Sopenharmony_ci BN_ULONG tmp[8]; 274e1051a39Sopenharmony_ci 275e1051a39Sopenharmony_ci /* table[0] = 1_inv */ 276e1051a39Sopenharmony_ci temp[0] = 0 - m[0]; 277e1051a39Sopenharmony_ci temp[1] = ~m[1]; 278e1051a39Sopenharmony_ci temp[2] = ~m[2]; 279e1051a39Sopenharmony_ci temp[3] = ~m[3]; 280e1051a39Sopenharmony_ci temp[4] = ~m[4]; 281e1051a39Sopenharmony_ci temp[5] = ~m[5]; 282e1051a39Sopenharmony_ci temp[6] = ~m[6]; 283e1051a39Sopenharmony_ci temp[7] = ~m[7]; 284e1051a39Sopenharmony_ci rsaz_512_scatter4(table, temp, 0); 285e1051a39Sopenharmony_ci 286e1051a39Sopenharmony_ci /* table [1] = a_inv^1 */ 287e1051a39Sopenharmony_ci rsaz_512_mul(a_inv, base, RR, m, k0); 288e1051a39Sopenharmony_ci rsaz_512_scatter4(table, a_inv, 1); 289e1051a39Sopenharmony_ci 290e1051a39Sopenharmony_ci /* table [2] = a_inv^2 */ 291e1051a39Sopenharmony_ci rsaz_512_sqr(temp, a_inv, m, k0, 1); 292e1051a39Sopenharmony_ci rsaz_512_scatter4(table, temp, 2); 293e1051a39Sopenharmony_ci 294e1051a39Sopenharmony_ci for (index = 3; index < 16; index++) 295e1051a39Sopenharmony_ci rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index); 296e1051a39Sopenharmony_ci 297e1051a39Sopenharmony_ci /* load first window */ 298e1051a39Sopenharmony_ci wvalue = p_str[63]; 299e1051a39Sopenharmony_ci 300e1051a39Sopenharmony_ci rsaz_512_gather4(temp, table, wvalue >> 4); 301e1051a39Sopenharmony_ci rsaz_512_sqr(temp, temp, m, k0, 4); 302e1051a39Sopenharmony_ci rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf); 303e1051a39Sopenharmony_ci 304e1051a39Sopenharmony_ci for (index = 62; index >= 0; index--) { 305e1051a39Sopenharmony_ci wvalue = p_str[index]; 306e1051a39Sopenharmony_ci 307e1051a39Sopenharmony_ci rsaz_512_sqr(temp, temp, m, k0, 4); 308e1051a39Sopenharmony_ci rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4); 309e1051a39Sopenharmony_ci 310e1051a39Sopenharmony_ci rsaz_512_sqr(temp, temp, m, k0, 4); 311e1051a39Sopenharmony_ci rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f); 312e1051a39Sopenharmony_ci } 313e1051a39Sopenharmony_ci 314e1051a39Sopenharmony_ci /* from Montgomery */ 315e1051a39Sopenharmony_ci rsaz_512_mul_by_one(result, temp, m, k0); 316e1051a39Sopenharmony_ci 317e1051a39Sopenharmony_ci bn_reduce_once_in_place(result, /*carry=*/0, m, tmp, 8); 318e1051a39Sopenharmony_ci 319e1051a39Sopenharmony_ci OPENSSL_cleanse(storage, sizeof(storage)); 320e1051a39Sopenharmony_ci OPENSSL_cleanse(tmp, sizeof(tmp)); 321e1051a39Sopenharmony_ci} 322e1051a39Sopenharmony_ci 323e1051a39Sopenharmony_ci#endif 324