18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR MIT 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2016-2017 INRIA and Microsoft Corporation. 48c2ecf20Sopenharmony_ci * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * This is a machine-generated formally verified implementation of Curve25519 78c2ecf20Sopenharmony_ci * ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine 88c2ecf20Sopenharmony_ci * generated, it has been tweaked to be suitable for use in the kernel. It is 98c2ecf20Sopenharmony_ci * optimized for 64-bit machines that can efficiently work with 128-bit 108c2ecf20Sopenharmony_ci * integer types. 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <asm/unaligned.h> 148c2ecf20Sopenharmony_ci#include <crypto/curve25519.h> 158c2ecf20Sopenharmony_ci#include <linux/string.h> 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_citypedef __uint128_t u128; 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_cistatic __always_inline u64 u64_eq_mask(u64 a, u64 b) 208c2ecf20Sopenharmony_ci{ 218c2ecf20Sopenharmony_ci u64 x = a ^ b; 228c2ecf20Sopenharmony_ci u64 minus_x = ~x + (u64)1U; 238c2ecf20Sopenharmony_ci u64 x_or_minus_x = x | minus_x; 248c2ecf20Sopenharmony_ci u64 xnx = x_or_minus_x >> (u32)63U; 258c2ecf20Sopenharmony_ci u64 c = xnx - (u64)1U; 268c2ecf20Sopenharmony_ci return c; 278c2ecf20Sopenharmony_ci} 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_cistatic __always_inline u64 u64_gte_mask(u64 a, u64 b) 308c2ecf20Sopenharmony_ci{ 318c2ecf20Sopenharmony_ci u64 x = a; 328c2ecf20Sopenharmony_ci u64 y = b; 338c2ecf20Sopenharmony_ci u64 x_xor_y = x ^ y; 348c2ecf20Sopenharmony_ci u64 x_sub_y = x - y; 358c2ecf20Sopenharmony_ci u64 x_sub_y_xor_y = x_sub_y ^ y; 368c2ecf20Sopenharmony_ci u64 q = x_xor_y | x_sub_y_xor_y; 378c2ecf20Sopenharmony_ci u64 x_xor_q = x ^ q; 388c2ecf20Sopenharmony_ci u64 x_xor_q_ = x_xor_q >> (u32)63U; 398c2ecf20Sopenharmony_ci u64 c = x_xor_q_ - (u64)1U; 408c2ecf20Sopenharmony_ci return c; 418c2ecf20Sopenharmony_ci} 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_cistatic __always_inline void modulo_carry_top(u64 *b) 448c2ecf20Sopenharmony_ci{ 458c2ecf20Sopenharmony_ci u64 b4 = b[4]; 468c2ecf20Sopenharmony_ci u64 b0 = b[0]; 478c2ecf20Sopenharmony_ci u64 b4_ = b4 & 0x7ffffffffffffLLU; 488c2ecf20Sopenharmony_ci u64 b0_ = b0 + 19 * (b4 >> 51); 498c2ecf20Sopenharmony_ci b[4] = b4_; 508c2ecf20Sopenharmony_ci b[0] = b0_; 518c2ecf20Sopenharmony_ci} 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_cistatic __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input) 548c2ecf20Sopenharmony_ci{ 558c2ecf20Sopenharmony_ci { 568c2ecf20Sopenharmony_ci u128 xi = input[0]; 578c2ecf20Sopenharmony_ci output[0] = ((u64)(xi)); 588c2ecf20Sopenharmony_ci } 598c2ecf20Sopenharmony_ci { 608c2ecf20Sopenharmony_ci u128 xi = input[1]; 618c2ecf20Sopenharmony_ci output[1] = ((u64)(xi)); 628c2ecf20Sopenharmony_ci } 638c2ecf20Sopenharmony_ci { 648c2ecf20Sopenharmony_ci u128 xi = input[2]; 658c2ecf20Sopenharmony_ci output[2] = ((u64)(xi)); 668c2ecf20Sopenharmony_ci } 678c2ecf20Sopenharmony_ci { 688c2ecf20Sopenharmony_ci u128 xi = input[3]; 698c2ecf20Sopenharmony_ci output[3] = ((u64)(xi)); 708c2ecf20Sopenharmony_ci } 718c2ecf20Sopenharmony_ci { 728c2ecf20Sopenharmony_ci u128 xi = input[4]; 738c2ecf20Sopenharmony_ci output[4] = ((u64)(xi)); 748c2ecf20Sopenharmony_ci } 758c2ecf20Sopenharmony_ci} 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_cistatic __always_inline void 788c2ecf20Sopenharmony_cifproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s) 798c2ecf20Sopenharmony_ci{ 808c2ecf20Sopenharmony_ci output[0] += (u128)input[0] * s; 818c2ecf20Sopenharmony_ci output[1] += (u128)input[1] * s; 828c2ecf20Sopenharmony_ci output[2] += (u128)input[2] * s; 838c2ecf20Sopenharmony_ci output[3] += (u128)input[3] * s; 848c2ecf20Sopenharmony_ci output[4] += (u128)input[4] * s; 858c2ecf20Sopenharmony_ci} 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_cistatic __always_inline void fproduct_carry_wide_(u128 *tmp) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci { 908c2ecf20Sopenharmony_ci u32 ctr = 0; 918c2ecf20Sopenharmony_ci u128 tctr = tmp[ctr]; 928c2ecf20Sopenharmony_ci u128 tctrp1 = tmp[ctr + 1]; 938c2ecf20Sopenharmony_ci u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; 948c2ecf20Sopenharmony_ci u128 c = ((tctr) >> (51)); 958c2ecf20Sopenharmony_ci tmp[ctr] = ((u128)(r0)); 968c2ecf20Sopenharmony_ci tmp[ctr + 1] = ((tctrp1) + (c)); 978c2ecf20Sopenharmony_ci } 988c2ecf20Sopenharmony_ci { 998c2ecf20Sopenharmony_ci u32 ctr = 1; 1008c2ecf20Sopenharmony_ci u128 tctr = tmp[ctr]; 1018c2ecf20Sopenharmony_ci u128 tctrp1 = tmp[ctr + 1]; 1028c2ecf20Sopenharmony_ci u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; 1038c2ecf20Sopenharmony_ci u128 c = ((tctr) >> (51)); 1048c2ecf20Sopenharmony_ci tmp[ctr] = ((u128)(r0)); 1058c2ecf20Sopenharmony_ci tmp[ctr + 1] = ((tctrp1) + (c)); 1068c2ecf20Sopenharmony_ci } 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci { 1098c2ecf20Sopenharmony_ci u32 ctr = 2; 1108c2ecf20Sopenharmony_ci u128 tctr = tmp[ctr]; 1118c2ecf20Sopenharmony_ci u128 tctrp1 = tmp[ctr + 1]; 1128c2ecf20Sopenharmony_ci u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; 1138c2ecf20Sopenharmony_ci u128 c = ((tctr) >> (51)); 1148c2ecf20Sopenharmony_ci tmp[ctr] = ((u128)(r0)); 1158c2ecf20Sopenharmony_ci tmp[ctr + 1] = ((tctrp1) + (c)); 1168c2ecf20Sopenharmony_ci } 1178c2ecf20Sopenharmony_ci { 1188c2ecf20Sopenharmony_ci u32 ctr = 3; 1198c2ecf20Sopenharmony_ci u128 tctr = tmp[ctr]; 1208c2ecf20Sopenharmony_ci u128 tctrp1 = tmp[ctr + 1]; 1218c2ecf20Sopenharmony_ci u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; 1228c2ecf20Sopenharmony_ci u128 c = ((tctr) >> (51)); 1238c2ecf20Sopenharmony_ci tmp[ctr] = ((u128)(r0)); 1248c2ecf20Sopenharmony_ci tmp[ctr + 1] = ((tctrp1) + (c)); 1258c2ecf20Sopenharmony_ci } 1268c2ecf20Sopenharmony_ci} 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_cistatic __always_inline void fmul_shift_reduce(u64 *output) 1298c2ecf20Sopenharmony_ci{ 1308c2ecf20Sopenharmony_ci u64 tmp = output[4]; 1318c2ecf20Sopenharmony_ci u64 b0; 1328c2ecf20Sopenharmony_ci { 1338c2ecf20Sopenharmony_ci u32 ctr = 5 - 0 - 1; 1348c2ecf20Sopenharmony_ci u64 z = output[ctr - 1]; 1358c2ecf20Sopenharmony_ci output[ctr] = z; 1368c2ecf20Sopenharmony_ci } 1378c2ecf20Sopenharmony_ci { 1388c2ecf20Sopenharmony_ci u32 ctr = 5 - 1 - 1; 1398c2ecf20Sopenharmony_ci u64 z = output[ctr - 1]; 1408c2ecf20Sopenharmony_ci output[ctr] = z; 1418c2ecf20Sopenharmony_ci } 1428c2ecf20Sopenharmony_ci { 1438c2ecf20Sopenharmony_ci u32 ctr = 5 - 2 - 1; 1448c2ecf20Sopenharmony_ci u64 z = output[ctr - 1]; 1458c2ecf20Sopenharmony_ci output[ctr] = z; 1468c2ecf20Sopenharmony_ci } 1478c2ecf20Sopenharmony_ci { 1488c2ecf20Sopenharmony_ci u32 ctr = 5 - 3 - 1; 1498c2ecf20Sopenharmony_ci u64 z = output[ctr - 1]; 1508c2ecf20Sopenharmony_ci output[ctr] = z; 1518c2ecf20Sopenharmony_ci } 1528c2ecf20Sopenharmony_ci output[0] = tmp; 1538c2ecf20Sopenharmony_ci b0 = output[0]; 1548c2ecf20Sopenharmony_ci output[0] = 19 * b0; 1558c2ecf20Sopenharmony_ci} 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_cistatic __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input, 1588c2ecf20Sopenharmony_ci u64 *input21) 1598c2ecf20Sopenharmony_ci{ 1608c2ecf20Sopenharmony_ci u32 i; 1618c2ecf20Sopenharmony_ci u64 input2i; 1628c2ecf20Sopenharmony_ci { 1638c2ecf20Sopenharmony_ci u64 input2i = input21[0]; 1648c2ecf20Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 1658c2ecf20Sopenharmony_ci fmul_shift_reduce(input); 1668c2ecf20Sopenharmony_ci } 1678c2ecf20Sopenharmony_ci { 1688c2ecf20Sopenharmony_ci u64 input2i = input21[1]; 1698c2ecf20Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 1708c2ecf20Sopenharmony_ci fmul_shift_reduce(input); 1718c2ecf20Sopenharmony_ci } 1728c2ecf20Sopenharmony_ci { 1738c2ecf20Sopenharmony_ci u64 input2i = input21[2]; 1748c2ecf20Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 1758c2ecf20Sopenharmony_ci fmul_shift_reduce(input); 1768c2ecf20Sopenharmony_ci } 1778c2ecf20Sopenharmony_ci { 1788c2ecf20Sopenharmony_ci u64 input2i = input21[3]; 1798c2ecf20Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 1808c2ecf20Sopenharmony_ci fmul_shift_reduce(input); 1818c2ecf20Sopenharmony_ci } 1828c2ecf20Sopenharmony_ci i = 4; 1838c2ecf20Sopenharmony_ci input2i = input21[i]; 1848c2ecf20Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_cistatic __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21) 1888c2ecf20Sopenharmony_ci{ 1898c2ecf20Sopenharmony_ci u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] }; 1908c2ecf20Sopenharmony_ci { 1918c2ecf20Sopenharmony_ci u128 b4; 1928c2ecf20Sopenharmony_ci u128 b0; 1938c2ecf20Sopenharmony_ci u128 b4_; 1948c2ecf20Sopenharmony_ci u128 b0_; 1958c2ecf20Sopenharmony_ci u64 i0; 1968c2ecf20Sopenharmony_ci u64 i1; 1978c2ecf20Sopenharmony_ci u64 i0_; 1988c2ecf20Sopenharmony_ci u64 i1_; 1998c2ecf20Sopenharmony_ci u128 t[5] = { 0 }; 2008c2ecf20Sopenharmony_ci fmul_mul_shift_reduce_(t, tmp, input21); 2018c2ecf20Sopenharmony_ci fproduct_carry_wide_(t); 2028c2ecf20Sopenharmony_ci b4 = t[4]; 2038c2ecf20Sopenharmony_ci b0 = t[0]; 2048c2ecf20Sopenharmony_ci b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); 2058c2ecf20Sopenharmony_ci b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); 2068c2ecf20Sopenharmony_ci t[4] = b4_; 2078c2ecf20Sopenharmony_ci t[0] = b0_; 2088c2ecf20Sopenharmony_ci fproduct_copy_from_wide_(output, t); 2098c2ecf20Sopenharmony_ci i0 = output[0]; 2108c2ecf20Sopenharmony_ci i1 = output[1]; 2118c2ecf20Sopenharmony_ci i0_ = i0 & 0x7ffffffffffffLLU; 2128c2ecf20Sopenharmony_ci i1_ = i1 + (i0 >> 51); 2138c2ecf20Sopenharmony_ci output[0] = i0_; 2148c2ecf20Sopenharmony_ci output[1] = i1_; 2158c2ecf20Sopenharmony_ci } 2168c2ecf20Sopenharmony_ci} 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_cistatic __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output) 2198c2ecf20Sopenharmony_ci{ 2208c2ecf20Sopenharmony_ci u64 r0 = output[0]; 2218c2ecf20Sopenharmony_ci u64 r1 = output[1]; 2228c2ecf20Sopenharmony_ci u64 r2 = output[2]; 2238c2ecf20Sopenharmony_ci u64 r3 = output[3]; 2248c2ecf20Sopenharmony_ci u64 r4 = output[4]; 2258c2ecf20Sopenharmony_ci u64 d0 = r0 * 2; 2268c2ecf20Sopenharmony_ci u64 d1 = r1 * 2; 2278c2ecf20Sopenharmony_ci u64 d2 = r2 * 2 * 19; 2288c2ecf20Sopenharmony_ci u64 d419 = r4 * 19; 2298c2ecf20Sopenharmony_ci u64 d4 = d419 * 2; 2308c2ecf20Sopenharmony_ci u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) + 2318c2ecf20Sopenharmony_ci (((u128)(d2) * (r3)))); 2328c2ecf20Sopenharmony_ci u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) + 2338c2ecf20Sopenharmony_ci (((u128)(r3 * 19) * (r3)))); 2348c2ecf20Sopenharmony_ci u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) + 2358c2ecf20Sopenharmony_ci (((u128)(d4) * (r3)))); 2368c2ecf20Sopenharmony_ci u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) + 2378c2ecf20Sopenharmony_ci (((u128)(r4) * (d419)))); 2388c2ecf20Sopenharmony_ci u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) + 2398c2ecf20Sopenharmony_ci (((u128)(r2) * (r2)))); 2408c2ecf20Sopenharmony_ci tmp[0] = s0; 2418c2ecf20Sopenharmony_ci tmp[1] = s1; 2428c2ecf20Sopenharmony_ci tmp[2] = s2; 2438c2ecf20Sopenharmony_ci tmp[3] = s3; 2448c2ecf20Sopenharmony_ci tmp[4] = s4; 2458c2ecf20Sopenharmony_ci} 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_cistatic __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output) 2488c2ecf20Sopenharmony_ci{ 2498c2ecf20Sopenharmony_ci u128 b4; 2508c2ecf20Sopenharmony_ci u128 b0; 2518c2ecf20Sopenharmony_ci u128 b4_; 2528c2ecf20Sopenharmony_ci u128 b0_; 2538c2ecf20Sopenharmony_ci u64 i0; 2548c2ecf20Sopenharmony_ci u64 i1; 2558c2ecf20Sopenharmony_ci u64 i0_; 2568c2ecf20Sopenharmony_ci u64 i1_; 2578c2ecf20Sopenharmony_ci fsquare_fsquare__(tmp, output); 2588c2ecf20Sopenharmony_ci fproduct_carry_wide_(tmp); 2598c2ecf20Sopenharmony_ci b4 = tmp[4]; 2608c2ecf20Sopenharmony_ci b0 = tmp[0]; 2618c2ecf20Sopenharmony_ci b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); 2628c2ecf20Sopenharmony_ci b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); 2638c2ecf20Sopenharmony_ci tmp[4] = b4_; 2648c2ecf20Sopenharmony_ci tmp[0] = b0_; 2658c2ecf20Sopenharmony_ci fproduct_copy_from_wide_(output, tmp); 2668c2ecf20Sopenharmony_ci i0 = output[0]; 2678c2ecf20Sopenharmony_ci i1 = output[1]; 2688c2ecf20Sopenharmony_ci i0_ = i0 & 0x7ffffffffffffLLU; 2698c2ecf20Sopenharmony_ci i1_ = i1 + (i0 >> 51); 2708c2ecf20Sopenharmony_ci output[0] = i0_; 2718c2ecf20Sopenharmony_ci output[1] = i1_; 2728c2ecf20Sopenharmony_ci} 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_cistatic __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp, 2758c2ecf20Sopenharmony_ci u32 count1) 2768c2ecf20Sopenharmony_ci{ 2778c2ecf20Sopenharmony_ci u32 i; 2788c2ecf20Sopenharmony_ci fsquare_fsquare_(tmp, output); 2798c2ecf20Sopenharmony_ci for (i = 1; i < count1; ++i) 2808c2ecf20Sopenharmony_ci fsquare_fsquare_(tmp, output); 2818c2ecf20Sopenharmony_ci} 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_cistatic __always_inline void fsquare_fsquare_times(u64 *output, u64 *input, 2848c2ecf20Sopenharmony_ci u32 count1) 2858c2ecf20Sopenharmony_ci{ 2868c2ecf20Sopenharmony_ci u128 t[5]; 2878c2ecf20Sopenharmony_ci memcpy(output, input, 5 * sizeof(*input)); 2888c2ecf20Sopenharmony_ci fsquare_fsquare_times_(output, t, count1); 2898c2ecf20Sopenharmony_ci} 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_cistatic __always_inline void fsquare_fsquare_times_inplace(u64 *output, 2928c2ecf20Sopenharmony_ci u32 count1) 2938c2ecf20Sopenharmony_ci{ 2948c2ecf20Sopenharmony_ci u128 t[5]; 2958c2ecf20Sopenharmony_ci fsquare_fsquare_times_(output, t, count1); 2968c2ecf20Sopenharmony_ci} 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_cistatic __always_inline void crecip_crecip(u64 *out, u64 *z) 2998c2ecf20Sopenharmony_ci{ 3008c2ecf20Sopenharmony_ci u64 buf[20] = { 0 }; 3018c2ecf20Sopenharmony_ci u64 *a0 = buf; 3028c2ecf20Sopenharmony_ci u64 *t00 = buf + 5; 3038c2ecf20Sopenharmony_ci u64 *b0 = buf + 10; 3048c2ecf20Sopenharmony_ci u64 *t01; 3058c2ecf20Sopenharmony_ci u64 *b1; 3068c2ecf20Sopenharmony_ci u64 *c0; 3078c2ecf20Sopenharmony_ci u64 *a; 3088c2ecf20Sopenharmony_ci u64 *t0; 3098c2ecf20Sopenharmony_ci u64 *b; 3108c2ecf20Sopenharmony_ci u64 *c; 3118c2ecf20Sopenharmony_ci fsquare_fsquare_times(a0, z, 1); 3128c2ecf20Sopenharmony_ci fsquare_fsquare_times(t00, a0, 2); 3138c2ecf20Sopenharmony_ci fmul_fmul(b0, t00, z); 3148c2ecf20Sopenharmony_ci fmul_fmul(a0, b0, a0); 3158c2ecf20Sopenharmony_ci fsquare_fsquare_times(t00, a0, 1); 3168c2ecf20Sopenharmony_ci fmul_fmul(b0, t00, b0); 3178c2ecf20Sopenharmony_ci fsquare_fsquare_times(t00, b0, 5); 3188c2ecf20Sopenharmony_ci t01 = buf + 5; 3198c2ecf20Sopenharmony_ci b1 = buf + 10; 3208c2ecf20Sopenharmony_ci c0 = buf + 15; 3218c2ecf20Sopenharmony_ci fmul_fmul(b1, t01, b1); 3228c2ecf20Sopenharmony_ci fsquare_fsquare_times(t01, b1, 10); 3238c2ecf20Sopenharmony_ci fmul_fmul(c0, t01, b1); 3248c2ecf20Sopenharmony_ci fsquare_fsquare_times(t01, c0, 20); 3258c2ecf20Sopenharmony_ci fmul_fmul(t01, t01, c0); 3268c2ecf20Sopenharmony_ci fsquare_fsquare_times_inplace(t01, 10); 3278c2ecf20Sopenharmony_ci fmul_fmul(b1, t01, b1); 3288c2ecf20Sopenharmony_ci fsquare_fsquare_times(t01, b1, 50); 3298c2ecf20Sopenharmony_ci a = buf; 3308c2ecf20Sopenharmony_ci t0 = buf + 5; 3318c2ecf20Sopenharmony_ci b = buf + 10; 3328c2ecf20Sopenharmony_ci c = buf + 15; 3338c2ecf20Sopenharmony_ci fmul_fmul(c, t0, b); 3348c2ecf20Sopenharmony_ci fsquare_fsquare_times(t0, c, 100); 3358c2ecf20Sopenharmony_ci fmul_fmul(t0, t0, c); 3368c2ecf20Sopenharmony_ci fsquare_fsquare_times_inplace(t0, 50); 3378c2ecf20Sopenharmony_ci fmul_fmul(t0, t0, b); 3388c2ecf20Sopenharmony_ci fsquare_fsquare_times_inplace(t0, 5); 3398c2ecf20Sopenharmony_ci fmul_fmul(out, t0, a); 3408c2ecf20Sopenharmony_ci} 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_cistatic __always_inline void fsum(u64 *a, u64 *b) 3438c2ecf20Sopenharmony_ci{ 3448c2ecf20Sopenharmony_ci a[0] += b[0]; 3458c2ecf20Sopenharmony_ci a[1] += b[1]; 3468c2ecf20Sopenharmony_ci a[2] += b[2]; 3478c2ecf20Sopenharmony_ci a[3] += b[3]; 3488c2ecf20Sopenharmony_ci a[4] += b[4]; 3498c2ecf20Sopenharmony_ci} 3508c2ecf20Sopenharmony_ci 3518c2ecf20Sopenharmony_cistatic __always_inline void fdifference(u64 *a, u64 *b) 3528c2ecf20Sopenharmony_ci{ 3538c2ecf20Sopenharmony_ci u64 tmp[5] = { 0 }; 3548c2ecf20Sopenharmony_ci u64 b0; 3558c2ecf20Sopenharmony_ci u64 b1; 3568c2ecf20Sopenharmony_ci u64 b2; 3578c2ecf20Sopenharmony_ci u64 b3; 3588c2ecf20Sopenharmony_ci u64 b4; 3598c2ecf20Sopenharmony_ci memcpy(tmp, b, 5 * sizeof(*b)); 3608c2ecf20Sopenharmony_ci b0 = tmp[0]; 3618c2ecf20Sopenharmony_ci b1 = tmp[1]; 3628c2ecf20Sopenharmony_ci b2 = tmp[2]; 3638c2ecf20Sopenharmony_ci b3 = tmp[3]; 3648c2ecf20Sopenharmony_ci b4 = tmp[4]; 3658c2ecf20Sopenharmony_ci tmp[0] = b0 + 0x3fffffffffff68LLU; 3668c2ecf20Sopenharmony_ci tmp[1] = b1 + 0x3ffffffffffff8LLU; 3678c2ecf20Sopenharmony_ci tmp[2] = b2 + 0x3ffffffffffff8LLU; 3688c2ecf20Sopenharmony_ci tmp[3] = b3 + 0x3ffffffffffff8LLU; 3698c2ecf20Sopenharmony_ci tmp[4] = b4 + 0x3ffffffffffff8LLU; 3708c2ecf20Sopenharmony_ci { 3718c2ecf20Sopenharmony_ci u64 xi = a[0]; 3728c2ecf20Sopenharmony_ci u64 yi = tmp[0]; 3738c2ecf20Sopenharmony_ci a[0] = yi - xi; 3748c2ecf20Sopenharmony_ci } 3758c2ecf20Sopenharmony_ci { 3768c2ecf20Sopenharmony_ci u64 xi = a[1]; 3778c2ecf20Sopenharmony_ci u64 yi = tmp[1]; 3788c2ecf20Sopenharmony_ci a[1] = yi - xi; 3798c2ecf20Sopenharmony_ci } 3808c2ecf20Sopenharmony_ci { 3818c2ecf20Sopenharmony_ci u64 xi = a[2]; 3828c2ecf20Sopenharmony_ci u64 yi = tmp[2]; 3838c2ecf20Sopenharmony_ci a[2] = yi - xi; 3848c2ecf20Sopenharmony_ci } 3858c2ecf20Sopenharmony_ci { 3868c2ecf20Sopenharmony_ci u64 xi = a[3]; 3878c2ecf20Sopenharmony_ci u64 yi = tmp[3]; 3888c2ecf20Sopenharmony_ci a[3] = yi - xi; 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci { 3918c2ecf20Sopenharmony_ci u64 xi = a[4]; 3928c2ecf20Sopenharmony_ci u64 yi = tmp[4]; 3938c2ecf20Sopenharmony_ci a[4] = yi - xi; 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci} 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_cistatic __always_inline void fscalar(u64 *output, u64 *b, u64 s) 3988c2ecf20Sopenharmony_ci{ 3998c2ecf20Sopenharmony_ci u128 tmp[5]; 4008c2ecf20Sopenharmony_ci u128 b4; 4018c2ecf20Sopenharmony_ci u128 b0; 4028c2ecf20Sopenharmony_ci u128 b4_; 4038c2ecf20Sopenharmony_ci u128 b0_; 4048c2ecf20Sopenharmony_ci { 4058c2ecf20Sopenharmony_ci u64 xi = b[0]; 4068c2ecf20Sopenharmony_ci tmp[0] = ((u128)(xi) * (s)); 4078c2ecf20Sopenharmony_ci } 4088c2ecf20Sopenharmony_ci { 4098c2ecf20Sopenharmony_ci u64 xi = b[1]; 4108c2ecf20Sopenharmony_ci tmp[1] = ((u128)(xi) * (s)); 4118c2ecf20Sopenharmony_ci } 4128c2ecf20Sopenharmony_ci { 4138c2ecf20Sopenharmony_ci u64 xi = b[2]; 4148c2ecf20Sopenharmony_ci tmp[2] = ((u128)(xi) * (s)); 4158c2ecf20Sopenharmony_ci } 4168c2ecf20Sopenharmony_ci { 4178c2ecf20Sopenharmony_ci u64 xi = b[3]; 4188c2ecf20Sopenharmony_ci tmp[3] = ((u128)(xi) * (s)); 4198c2ecf20Sopenharmony_ci } 4208c2ecf20Sopenharmony_ci { 4218c2ecf20Sopenharmony_ci u64 xi = b[4]; 4228c2ecf20Sopenharmony_ci tmp[4] = ((u128)(xi) * (s)); 4238c2ecf20Sopenharmony_ci } 4248c2ecf20Sopenharmony_ci fproduct_carry_wide_(tmp); 4258c2ecf20Sopenharmony_ci b4 = tmp[4]; 4268c2ecf20Sopenharmony_ci b0 = tmp[0]; 4278c2ecf20Sopenharmony_ci b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); 4288c2ecf20Sopenharmony_ci b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); 4298c2ecf20Sopenharmony_ci tmp[4] = b4_; 4308c2ecf20Sopenharmony_ci tmp[0] = b0_; 4318c2ecf20Sopenharmony_ci fproduct_copy_from_wide_(output, tmp); 4328c2ecf20Sopenharmony_ci} 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_cistatic __always_inline void fmul(u64 *output, u64 *a, u64 *b) 4358c2ecf20Sopenharmony_ci{ 4368c2ecf20Sopenharmony_ci fmul_fmul(output, a, b); 4378c2ecf20Sopenharmony_ci} 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_cistatic __always_inline void crecip(u64 *output, u64 *input) 4408c2ecf20Sopenharmony_ci{ 4418c2ecf20Sopenharmony_ci crecip_crecip(output, input); 4428c2ecf20Sopenharmony_ci} 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_cistatic __always_inline void point_swap_conditional_step(u64 *a, u64 *b, 4458c2ecf20Sopenharmony_ci u64 swap1, u32 ctr) 4468c2ecf20Sopenharmony_ci{ 4478c2ecf20Sopenharmony_ci u32 i = ctr - 1; 4488c2ecf20Sopenharmony_ci u64 ai = a[i]; 4498c2ecf20Sopenharmony_ci u64 bi = b[i]; 4508c2ecf20Sopenharmony_ci u64 x = swap1 & (ai ^ bi); 4518c2ecf20Sopenharmony_ci u64 ai1 = ai ^ x; 4528c2ecf20Sopenharmony_ci u64 bi1 = bi ^ x; 4538c2ecf20Sopenharmony_ci a[i] = ai1; 4548c2ecf20Sopenharmony_ci b[i] = bi1; 4558c2ecf20Sopenharmony_ci} 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_cistatic __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1) 4588c2ecf20Sopenharmony_ci{ 4598c2ecf20Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 5); 4608c2ecf20Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 4); 4618c2ecf20Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 3); 4628c2ecf20Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 2); 4638c2ecf20Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 1); 4648c2ecf20Sopenharmony_ci} 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_cistatic __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap) 4678c2ecf20Sopenharmony_ci{ 4688c2ecf20Sopenharmony_ci u64 swap1 = 0 - iswap; 4698c2ecf20Sopenharmony_ci point_swap_conditional5(a, b, swap1); 4708c2ecf20Sopenharmony_ci point_swap_conditional5(a + 5, b + 5, swap1); 4718c2ecf20Sopenharmony_ci} 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_cistatic __always_inline void point_copy(u64 *output, u64 *input) 4748c2ecf20Sopenharmony_ci{ 4758c2ecf20Sopenharmony_ci memcpy(output, input, 5 * sizeof(*input)); 4768c2ecf20Sopenharmony_ci memcpy(output + 5, input + 5, 5 * sizeof(*input)); 4778c2ecf20Sopenharmony_ci} 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_cistatic __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p, 4808c2ecf20Sopenharmony_ci u64 *pq, u64 *qmqp) 4818c2ecf20Sopenharmony_ci{ 4828c2ecf20Sopenharmony_ci u64 *qx = qmqp; 4838c2ecf20Sopenharmony_ci u64 *x2 = pp; 4848c2ecf20Sopenharmony_ci u64 *z2 = pp + 5; 4858c2ecf20Sopenharmony_ci u64 *x3 = ppq; 4868c2ecf20Sopenharmony_ci u64 *z3 = ppq + 5; 4878c2ecf20Sopenharmony_ci u64 *x = p; 4888c2ecf20Sopenharmony_ci u64 *z = p + 5; 4898c2ecf20Sopenharmony_ci u64 *xprime = pq; 4908c2ecf20Sopenharmony_ci u64 *zprime = pq + 5; 4918c2ecf20Sopenharmony_ci u64 buf[40] = { 0 }; 4928c2ecf20Sopenharmony_ci u64 *origx = buf; 4938c2ecf20Sopenharmony_ci u64 *origxprime0 = buf + 5; 4948c2ecf20Sopenharmony_ci u64 *xxprime0; 4958c2ecf20Sopenharmony_ci u64 *zzprime0; 4968c2ecf20Sopenharmony_ci u64 *origxprime; 4978c2ecf20Sopenharmony_ci xxprime0 = buf + 25; 4988c2ecf20Sopenharmony_ci zzprime0 = buf + 30; 4998c2ecf20Sopenharmony_ci memcpy(origx, x, 5 * sizeof(*x)); 5008c2ecf20Sopenharmony_ci fsum(x, z); 5018c2ecf20Sopenharmony_ci fdifference(z, origx); 5028c2ecf20Sopenharmony_ci memcpy(origxprime0, xprime, 5 * sizeof(*xprime)); 5038c2ecf20Sopenharmony_ci fsum(xprime, zprime); 5048c2ecf20Sopenharmony_ci fdifference(zprime, origxprime0); 5058c2ecf20Sopenharmony_ci fmul(xxprime0, xprime, z); 5068c2ecf20Sopenharmony_ci fmul(zzprime0, x, zprime); 5078c2ecf20Sopenharmony_ci origxprime = buf + 5; 5088c2ecf20Sopenharmony_ci { 5098c2ecf20Sopenharmony_ci u64 *xx0; 5108c2ecf20Sopenharmony_ci u64 *zz0; 5118c2ecf20Sopenharmony_ci u64 *xxprime; 5128c2ecf20Sopenharmony_ci u64 *zzprime; 5138c2ecf20Sopenharmony_ci u64 *zzzprime; 5148c2ecf20Sopenharmony_ci xx0 = buf + 15; 5158c2ecf20Sopenharmony_ci zz0 = buf + 20; 5168c2ecf20Sopenharmony_ci xxprime = buf + 25; 5178c2ecf20Sopenharmony_ci zzprime = buf + 30; 5188c2ecf20Sopenharmony_ci zzzprime = buf + 35; 5198c2ecf20Sopenharmony_ci memcpy(origxprime, xxprime, 5 * sizeof(*xxprime)); 5208c2ecf20Sopenharmony_ci fsum(xxprime, zzprime); 5218c2ecf20Sopenharmony_ci fdifference(zzprime, origxprime); 5228c2ecf20Sopenharmony_ci fsquare_fsquare_times(x3, xxprime, 1); 5238c2ecf20Sopenharmony_ci fsquare_fsquare_times(zzzprime, zzprime, 1); 5248c2ecf20Sopenharmony_ci fmul(z3, zzzprime, qx); 5258c2ecf20Sopenharmony_ci fsquare_fsquare_times(xx0, x, 1); 5268c2ecf20Sopenharmony_ci fsquare_fsquare_times(zz0, z, 1); 5278c2ecf20Sopenharmony_ci { 5288c2ecf20Sopenharmony_ci u64 *zzz; 5298c2ecf20Sopenharmony_ci u64 *xx; 5308c2ecf20Sopenharmony_ci u64 *zz; 5318c2ecf20Sopenharmony_ci u64 scalar; 5328c2ecf20Sopenharmony_ci zzz = buf + 10; 5338c2ecf20Sopenharmony_ci xx = buf + 15; 5348c2ecf20Sopenharmony_ci zz = buf + 20; 5358c2ecf20Sopenharmony_ci fmul(x2, xx, zz); 5368c2ecf20Sopenharmony_ci fdifference(zz, xx); 5378c2ecf20Sopenharmony_ci scalar = 121665; 5388c2ecf20Sopenharmony_ci fscalar(zzz, zz, scalar); 5398c2ecf20Sopenharmony_ci fsum(zzz, xx); 5408c2ecf20Sopenharmony_ci fmul(z2, zzz, zz); 5418c2ecf20Sopenharmony_ci } 5428c2ecf20Sopenharmony_ci } 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_cistatic __always_inline void 5468c2ecf20Sopenharmony_ciladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, 5478c2ecf20Sopenharmony_ci u64 *q, u8 byt) 5488c2ecf20Sopenharmony_ci{ 5498c2ecf20Sopenharmony_ci u64 bit0 = (u64)(byt >> 7); 5508c2ecf20Sopenharmony_ci u64 bit; 5518c2ecf20Sopenharmony_ci point_swap_conditional(nq, nqpq, bit0); 5528c2ecf20Sopenharmony_ci addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q); 5538c2ecf20Sopenharmony_ci bit = (u64)(byt >> 7); 5548c2ecf20Sopenharmony_ci point_swap_conditional(nq2, nqpq2, bit); 5558c2ecf20Sopenharmony_ci} 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_cistatic __always_inline void 5588c2ecf20Sopenharmony_ciladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2, 5598c2ecf20Sopenharmony_ci u64 *nqpq2, u64 *q, u8 byt) 5608c2ecf20Sopenharmony_ci{ 5618c2ecf20Sopenharmony_ci u8 byt1; 5628c2ecf20Sopenharmony_ci ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); 5638c2ecf20Sopenharmony_ci byt1 = byt << 1; 5648c2ecf20Sopenharmony_ci ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); 5658c2ecf20Sopenharmony_ci} 5668c2ecf20Sopenharmony_ci 5678c2ecf20Sopenharmony_cistatic __always_inline void 5688c2ecf20Sopenharmony_ciladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, 5698c2ecf20Sopenharmony_ci u64 *q, u8 byt, u32 i) 5708c2ecf20Sopenharmony_ci{ 5718c2ecf20Sopenharmony_ci while (i--) { 5728c2ecf20Sopenharmony_ci ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2, 5738c2ecf20Sopenharmony_ci nqpq2, q, byt); 5748c2ecf20Sopenharmony_ci byt <<= 2; 5758c2ecf20Sopenharmony_ci } 5768c2ecf20Sopenharmony_ci} 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_cistatic __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq, 5798c2ecf20Sopenharmony_ci u64 *nqpq, u64 *nq2, 5808c2ecf20Sopenharmony_ci u64 *nqpq2, u64 *q, 5818c2ecf20Sopenharmony_ci u32 i) 5828c2ecf20Sopenharmony_ci{ 5838c2ecf20Sopenharmony_ci while (i--) { 5848c2ecf20Sopenharmony_ci u8 byte = n1[i]; 5858c2ecf20Sopenharmony_ci ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, 5868c2ecf20Sopenharmony_ci byte, 4); 5878c2ecf20Sopenharmony_ci } 5888c2ecf20Sopenharmony_ci} 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_cistatic void ladder_cmult(u64 *result, u8 *n1, u64 *q) 5918c2ecf20Sopenharmony_ci{ 5928c2ecf20Sopenharmony_ci u64 point_buf[40] = { 0 }; 5938c2ecf20Sopenharmony_ci u64 *nq = point_buf; 5948c2ecf20Sopenharmony_ci u64 *nqpq = point_buf + 10; 5958c2ecf20Sopenharmony_ci u64 *nq2 = point_buf + 20; 5968c2ecf20Sopenharmony_ci u64 *nqpq2 = point_buf + 30; 5978c2ecf20Sopenharmony_ci point_copy(nqpq, q); 5988c2ecf20Sopenharmony_ci nq[0] = 1; 5998c2ecf20Sopenharmony_ci ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32); 6008c2ecf20Sopenharmony_ci point_copy(result, nq); 6018c2ecf20Sopenharmony_ci} 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_cistatic __always_inline void format_fexpand(u64 *output, const u8 *input) 6048c2ecf20Sopenharmony_ci{ 6058c2ecf20Sopenharmony_ci const u8 *x00 = input + 6; 6068c2ecf20Sopenharmony_ci const u8 *x01 = input + 12; 6078c2ecf20Sopenharmony_ci const u8 *x02 = input + 19; 6088c2ecf20Sopenharmony_ci const u8 *x0 = input + 24; 6098c2ecf20Sopenharmony_ci u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4; 6108c2ecf20Sopenharmony_ci i0 = get_unaligned_le64(input); 6118c2ecf20Sopenharmony_ci i1 = get_unaligned_le64(x00); 6128c2ecf20Sopenharmony_ci i2 = get_unaligned_le64(x01); 6138c2ecf20Sopenharmony_ci i3 = get_unaligned_le64(x02); 6148c2ecf20Sopenharmony_ci i4 = get_unaligned_le64(x0); 6158c2ecf20Sopenharmony_ci output0 = i0 & 0x7ffffffffffffLLU; 6168c2ecf20Sopenharmony_ci output1 = i1 >> 3 & 0x7ffffffffffffLLU; 6178c2ecf20Sopenharmony_ci output2 = i2 >> 6 & 0x7ffffffffffffLLU; 6188c2ecf20Sopenharmony_ci output3 = i3 >> 1 & 0x7ffffffffffffLLU; 6198c2ecf20Sopenharmony_ci output4 = i4 >> 12 & 0x7ffffffffffffLLU; 6208c2ecf20Sopenharmony_ci output[0] = output0; 6218c2ecf20Sopenharmony_ci output[1] = output1; 6228c2ecf20Sopenharmony_ci output[2] = output2; 6238c2ecf20Sopenharmony_ci output[3] = output3; 6248c2ecf20Sopenharmony_ci output[4] = output4; 6258c2ecf20Sopenharmony_ci} 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_cistatic __always_inline void format_fcontract_first_carry_pass(u64 *input) 6288c2ecf20Sopenharmony_ci{ 6298c2ecf20Sopenharmony_ci u64 t0 = input[0]; 6308c2ecf20Sopenharmony_ci u64 t1 = input[1]; 6318c2ecf20Sopenharmony_ci u64 t2 = input[2]; 6328c2ecf20Sopenharmony_ci u64 t3 = input[3]; 6338c2ecf20Sopenharmony_ci u64 t4 = input[4]; 6348c2ecf20Sopenharmony_ci u64 t1_ = t1 + (t0 >> 51); 6358c2ecf20Sopenharmony_ci u64 t0_ = t0 & 0x7ffffffffffffLLU; 6368c2ecf20Sopenharmony_ci u64 t2_ = t2 + (t1_ >> 51); 6378c2ecf20Sopenharmony_ci u64 t1__ = t1_ & 0x7ffffffffffffLLU; 6388c2ecf20Sopenharmony_ci u64 t3_ = t3 + (t2_ >> 51); 6398c2ecf20Sopenharmony_ci u64 t2__ = t2_ & 0x7ffffffffffffLLU; 6408c2ecf20Sopenharmony_ci u64 t4_ = t4 + (t3_ >> 51); 6418c2ecf20Sopenharmony_ci u64 t3__ = t3_ & 0x7ffffffffffffLLU; 6428c2ecf20Sopenharmony_ci input[0] = t0_; 6438c2ecf20Sopenharmony_ci input[1] = t1__; 6448c2ecf20Sopenharmony_ci input[2] = t2__; 6458c2ecf20Sopenharmony_ci input[3] = t3__; 6468c2ecf20Sopenharmony_ci input[4] = t4_; 6478c2ecf20Sopenharmony_ci} 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_cistatic __always_inline void format_fcontract_first_carry_full(u64 *input) 6508c2ecf20Sopenharmony_ci{ 6518c2ecf20Sopenharmony_ci format_fcontract_first_carry_pass(input); 6528c2ecf20Sopenharmony_ci modulo_carry_top(input); 6538c2ecf20Sopenharmony_ci} 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_cistatic __always_inline void format_fcontract_second_carry_pass(u64 *input) 6568c2ecf20Sopenharmony_ci{ 6578c2ecf20Sopenharmony_ci u64 t0 = input[0]; 6588c2ecf20Sopenharmony_ci u64 t1 = input[1]; 6598c2ecf20Sopenharmony_ci u64 t2 = input[2]; 6608c2ecf20Sopenharmony_ci u64 t3 = input[3]; 6618c2ecf20Sopenharmony_ci u64 t4 = input[4]; 6628c2ecf20Sopenharmony_ci u64 t1_ = t1 + (t0 >> 51); 6638c2ecf20Sopenharmony_ci u64 t0_ = t0 & 0x7ffffffffffffLLU; 6648c2ecf20Sopenharmony_ci u64 t2_ = t2 + (t1_ >> 51); 6658c2ecf20Sopenharmony_ci u64 t1__ = t1_ & 0x7ffffffffffffLLU; 6668c2ecf20Sopenharmony_ci u64 t3_ = t3 + (t2_ >> 51); 6678c2ecf20Sopenharmony_ci u64 t2__ = t2_ & 0x7ffffffffffffLLU; 6688c2ecf20Sopenharmony_ci u64 t4_ = t4 + (t3_ >> 51); 6698c2ecf20Sopenharmony_ci u64 t3__ = t3_ & 0x7ffffffffffffLLU; 6708c2ecf20Sopenharmony_ci input[0] = t0_; 6718c2ecf20Sopenharmony_ci input[1] = t1__; 6728c2ecf20Sopenharmony_ci input[2] = t2__; 6738c2ecf20Sopenharmony_ci input[3] = t3__; 6748c2ecf20Sopenharmony_ci input[4] = t4_; 6758c2ecf20Sopenharmony_ci} 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_cistatic __always_inline void format_fcontract_second_carry_full(u64 *input) 6788c2ecf20Sopenharmony_ci{ 6798c2ecf20Sopenharmony_ci u64 i0; 6808c2ecf20Sopenharmony_ci u64 i1; 6818c2ecf20Sopenharmony_ci u64 i0_; 6828c2ecf20Sopenharmony_ci u64 i1_; 6838c2ecf20Sopenharmony_ci format_fcontract_second_carry_pass(input); 6848c2ecf20Sopenharmony_ci modulo_carry_top(input); 6858c2ecf20Sopenharmony_ci i0 = input[0]; 6868c2ecf20Sopenharmony_ci i1 = input[1]; 6878c2ecf20Sopenharmony_ci i0_ = i0 & 0x7ffffffffffffLLU; 6888c2ecf20Sopenharmony_ci i1_ = i1 + (i0 >> 51); 6898c2ecf20Sopenharmony_ci input[0] = i0_; 6908c2ecf20Sopenharmony_ci input[1] = i1_; 6918c2ecf20Sopenharmony_ci} 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_cistatic __always_inline void format_fcontract_trim(u64 *input) 6948c2ecf20Sopenharmony_ci{ 6958c2ecf20Sopenharmony_ci u64 a0 = input[0]; 6968c2ecf20Sopenharmony_ci u64 a1 = input[1]; 6978c2ecf20Sopenharmony_ci u64 a2 = input[2]; 6988c2ecf20Sopenharmony_ci u64 a3 = input[3]; 6998c2ecf20Sopenharmony_ci u64 a4 = input[4]; 7008c2ecf20Sopenharmony_ci u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU); 7018c2ecf20Sopenharmony_ci u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU); 7028c2ecf20Sopenharmony_ci u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU); 7038c2ecf20Sopenharmony_ci u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU); 7048c2ecf20Sopenharmony_ci u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU); 7058c2ecf20Sopenharmony_ci u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4; 7068c2ecf20Sopenharmony_ci u64 a0_ = a0 - (0x7ffffffffffedLLU & mask); 7078c2ecf20Sopenharmony_ci u64 a1_ = a1 - (0x7ffffffffffffLLU & mask); 7088c2ecf20Sopenharmony_ci u64 a2_ = a2 - (0x7ffffffffffffLLU & mask); 7098c2ecf20Sopenharmony_ci u64 a3_ = a3 - (0x7ffffffffffffLLU & mask); 7108c2ecf20Sopenharmony_ci u64 a4_ = a4 - (0x7ffffffffffffLLU & mask); 7118c2ecf20Sopenharmony_ci input[0] = a0_; 7128c2ecf20Sopenharmony_ci input[1] = a1_; 7138c2ecf20Sopenharmony_ci input[2] = a2_; 7148c2ecf20Sopenharmony_ci input[3] = a3_; 7158c2ecf20Sopenharmony_ci input[4] = a4_; 7168c2ecf20Sopenharmony_ci} 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_cistatic __always_inline void format_fcontract_store(u8 *output, u64 *input) 7198c2ecf20Sopenharmony_ci{ 7208c2ecf20Sopenharmony_ci u64 t0 = input[0]; 7218c2ecf20Sopenharmony_ci u64 t1 = input[1]; 7228c2ecf20Sopenharmony_ci u64 t2 = input[2]; 7238c2ecf20Sopenharmony_ci u64 t3 = input[3]; 7248c2ecf20Sopenharmony_ci u64 t4 = input[4]; 7258c2ecf20Sopenharmony_ci u64 o0 = t1 << 51 | t0; 7268c2ecf20Sopenharmony_ci u64 o1 = t2 << 38 | t1 >> 13; 7278c2ecf20Sopenharmony_ci u64 o2 = t3 << 25 | t2 >> 26; 7288c2ecf20Sopenharmony_ci u64 o3 = t4 << 12 | t3 >> 39; 7298c2ecf20Sopenharmony_ci u8 *b0 = output; 7308c2ecf20Sopenharmony_ci u8 *b1 = output + 8; 7318c2ecf20Sopenharmony_ci u8 *b2 = output + 16; 7328c2ecf20Sopenharmony_ci u8 *b3 = output + 24; 7338c2ecf20Sopenharmony_ci put_unaligned_le64(o0, b0); 7348c2ecf20Sopenharmony_ci put_unaligned_le64(o1, b1); 7358c2ecf20Sopenharmony_ci put_unaligned_le64(o2, b2); 7368c2ecf20Sopenharmony_ci put_unaligned_le64(o3, b3); 7378c2ecf20Sopenharmony_ci} 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_cistatic __always_inline void format_fcontract(u8 *output, u64 *input) 7408c2ecf20Sopenharmony_ci{ 7418c2ecf20Sopenharmony_ci format_fcontract_first_carry_full(input); 7428c2ecf20Sopenharmony_ci format_fcontract_second_carry_full(input); 7438c2ecf20Sopenharmony_ci format_fcontract_trim(input); 7448c2ecf20Sopenharmony_ci format_fcontract_store(output, input); 7458c2ecf20Sopenharmony_ci} 7468c2ecf20Sopenharmony_ci 7478c2ecf20Sopenharmony_cistatic __always_inline void format_scalar_of_point(u8 *scalar, u64 *point) 7488c2ecf20Sopenharmony_ci{ 7498c2ecf20Sopenharmony_ci u64 *x = point; 7508c2ecf20Sopenharmony_ci u64 *z = point + 5; 7518c2ecf20Sopenharmony_ci u64 buf[10] __aligned(32) = { 0 }; 7528c2ecf20Sopenharmony_ci u64 *zmone = buf; 7538c2ecf20Sopenharmony_ci u64 *sc = buf + 5; 7548c2ecf20Sopenharmony_ci crecip(zmone, z); 7558c2ecf20Sopenharmony_ci fmul(sc, x, zmone); 7568c2ecf20Sopenharmony_ci format_fcontract(scalar, sc); 7578c2ecf20Sopenharmony_ci} 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_civoid curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE], 7608c2ecf20Sopenharmony_ci const u8 secret[CURVE25519_KEY_SIZE], 7618c2ecf20Sopenharmony_ci const u8 basepoint[CURVE25519_KEY_SIZE]) 7628c2ecf20Sopenharmony_ci{ 7638c2ecf20Sopenharmony_ci u64 buf0[10] __aligned(32) = { 0 }; 7648c2ecf20Sopenharmony_ci u64 *x0 = buf0; 7658c2ecf20Sopenharmony_ci u64 *z = buf0 + 5; 7668c2ecf20Sopenharmony_ci u64 *q; 7678c2ecf20Sopenharmony_ci format_fexpand(x0, basepoint); 7688c2ecf20Sopenharmony_ci z[0] = 1; 7698c2ecf20Sopenharmony_ci q = buf0; 7708c2ecf20Sopenharmony_ci { 7718c2ecf20Sopenharmony_ci u8 e[32] __aligned(32) = { 0 }; 7728c2ecf20Sopenharmony_ci u8 *scalar; 7738c2ecf20Sopenharmony_ci memcpy(e, secret, 32); 7748c2ecf20Sopenharmony_ci curve25519_clamp_secret(e); 7758c2ecf20Sopenharmony_ci scalar = e; 7768c2ecf20Sopenharmony_ci { 7778c2ecf20Sopenharmony_ci u64 buf[15] = { 0 }; 7788c2ecf20Sopenharmony_ci u64 *nq = buf; 7798c2ecf20Sopenharmony_ci u64 *x = nq; 7808c2ecf20Sopenharmony_ci x[0] = 1; 7818c2ecf20Sopenharmony_ci ladder_cmult(nq, scalar, q); 7828c2ecf20Sopenharmony_ci format_scalar_of_point(mypublic, nq); 7838c2ecf20Sopenharmony_ci memzero_explicit(buf, sizeof(buf)); 7848c2ecf20Sopenharmony_ci } 7858c2ecf20Sopenharmony_ci memzero_explicit(e, sizeof(e)); 7868c2ecf20Sopenharmony_ci } 7878c2ecf20Sopenharmony_ci memzero_explicit(buf0, sizeof(buf0)); 7888c2ecf20Sopenharmony_ci} 789