162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR MIT 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (C) 2016-2017 INRIA and Microsoft Corporation. 462306a36Sopenharmony_ci * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * This is a machine-generated formally verified implementation of Curve25519 762306a36Sopenharmony_ci * ECDH from: <https://github.com/mitls/hacl-star>. Though originally machine 862306a36Sopenharmony_ci * generated, it has been tweaked to be suitable for use in the kernel. It is 962306a36Sopenharmony_ci * optimized for 64-bit machines that can efficiently work with 128-bit 1062306a36Sopenharmony_ci * integer types. 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <asm/unaligned.h> 1462306a36Sopenharmony_ci#include <crypto/curve25519.h> 1562306a36Sopenharmony_ci#include <linux/string.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_cistatic __always_inline u64 u64_eq_mask(u64 a, u64 b) 1862306a36Sopenharmony_ci{ 1962306a36Sopenharmony_ci u64 x = a ^ b; 2062306a36Sopenharmony_ci u64 minus_x = ~x + (u64)1U; 2162306a36Sopenharmony_ci u64 x_or_minus_x = x | minus_x; 2262306a36Sopenharmony_ci u64 xnx = x_or_minus_x >> (u32)63U; 2362306a36Sopenharmony_ci u64 c = xnx - (u64)1U; 2462306a36Sopenharmony_ci return c; 2562306a36Sopenharmony_ci} 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic __always_inline u64 u64_gte_mask(u64 a, u64 b) 2862306a36Sopenharmony_ci{ 2962306a36Sopenharmony_ci u64 x = a; 3062306a36Sopenharmony_ci u64 y = b; 3162306a36Sopenharmony_ci u64 x_xor_y = x ^ y; 3262306a36Sopenharmony_ci u64 x_sub_y = x - y; 3362306a36Sopenharmony_ci u64 x_sub_y_xor_y = x_sub_y ^ y; 3462306a36Sopenharmony_ci u64 q = x_xor_y | x_sub_y_xor_y; 3562306a36Sopenharmony_ci u64 x_xor_q = x ^ q; 3662306a36Sopenharmony_ci u64 x_xor_q_ = x_xor_q >> (u32)63U; 3762306a36Sopenharmony_ci u64 c = x_xor_q_ - (u64)1U; 3862306a36Sopenharmony_ci return c; 3962306a36Sopenharmony_ci} 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_cistatic __always_inline void modulo_carry_top(u64 *b) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci u64 b4 = b[4]; 4462306a36Sopenharmony_ci u64 b0 = b[0]; 4562306a36Sopenharmony_ci u64 b4_ = b4 & 0x7ffffffffffffLLU; 4662306a36Sopenharmony_ci u64 b0_ = b0 + 19 * (b4 >> 51); 4762306a36Sopenharmony_ci b[4] = b4_; 4862306a36Sopenharmony_ci b[0] = b0_; 4962306a36Sopenharmony_ci} 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_cistatic __always_inline void fproduct_copy_from_wide_(u64 *output, u128 *input) 5262306a36Sopenharmony_ci{ 5362306a36Sopenharmony_ci { 5462306a36Sopenharmony_ci u128 xi = input[0]; 5562306a36Sopenharmony_ci output[0] = ((u64)(xi)); 5662306a36Sopenharmony_ci } 5762306a36Sopenharmony_ci { 5862306a36Sopenharmony_ci u128 xi = input[1]; 5962306a36Sopenharmony_ci output[1] = ((u64)(xi)); 6062306a36Sopenharmony_ci } 6162306a36Sopenharmony_ci { 6262306a36Sopenharmony_ci u128 xi = input[2]; 6362306a36Sopenharmony_ci output[2] = ((u64)(xi)); 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci { 6662306a36Sopenharmony_ci u128 xi = input[3]; 6762306a36Sopenharmony_ci output[3] = ((u64)(xi)); 6862306a36Sopenharmony_ci } 6962306a36Sopenharmony_ci { 7062306a36Sopenharmony_ci u128 xi = input[4]; 7162306a36Sopenharmony_ci output[4] = ((u64)(xi)); 7262306a36Sopenharmony_ci } 7362306a36Sopenharmony_ci} 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_cistatic __always_inline void 7662306a36Sopenharmony_cifproduct_sum_scalar_multiplication_(u128 *output, u64 *input, u64 s) 7762306a36Sopenharmony_ci{ 7862306a36Sopenharmony_ci output[0] += (u128)input[0] * s; 7962306a36Sopenharmony_ci output[1] += (u128)input[1] * s; 8062306a36Sopenharmony_ci output[2] += (u128)input[2] * s; 8162306a36Sopenharmony_ci output[3] += (u128)input[3] * s; 8262306a36Sopenharmony_ci output[4] += (u128)input[4] * s; 8362306a36Sopenharmony_ci} 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_cistatic __always_inline void fproduct_carry_wide_(u128 *tmp) 8662306a36Sopenharmony_ci{ 8762306a36Sopenharmony_ci { 8862306a36Sopenharmony_ci u32 ctr = 0; 8962306a36Sopenharmony_ci u128 tctr = tmp[ctr]; 9062306a36Sopenharmony_ci u128 tctrp1 = tmp[ctr + 1]; 9162306a36Sopenharmony_ci u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; 9262306a36Sopenharmony_ci u128 c = ((tctr) >> (51)); 9362306a36Sopenharmony_ci tmp[ctr] = ((u128)(r0)); 9462306a36Sopenharmony_ci tmp[ctr + 1] = ((tctrp1) + (c)); 9562306a36Sopenharmony_ci } 9662306a36Sopenharmony_ci { 9762306a36Sopenharmony_ci u32 ctr = 1; 9862306a36Sopenharmony_ci u128 tctr = tmp[ctr]; 9962306a36Sopenharmony_ci u128 tctrp1 = tmp[ctr + 1]; 10062306a36Sopenharmony_ci u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; 10162306a36Sopenharmony_ci u128 c = ((tctr) >> (51)); 10262306a36Sopenharmony_ci tmp[ctr] = ((u128)(r0)); 10362306a36Sopenharmony_ci tmp[ctr + 1] = ((tctrp1) + (c)); 10462306a36Sopenharmony_ci } 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci { 10762306a36Sopenharmony_ci u32 ctr = 2; 10862306a36Sopenharmony_ci u128 tctr = tmp[ctr]; 10962306a36Sopenharmony_ci u128 tctrp1 = tmp[ctr + 1]; 11062306a36Sopenharmony_ci u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; 11162306a36Sopenharmony_ci u128 c = ((tctr) >> (51)); 11262306a36Sopenharmony_ci tmp[ctr] = ((u128)(r0)); 11362306a36Sopenharmony_ci tmp[ctr + 1] = ((tctrp1) + (c)); 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci { 11662306a36Sopenharmony_ci u32 ctr = 3; 11762306a36Sopenharmony_ci u128 tctr = tmp[ctr]; 11862306a36Sopenharmony_ci u128 tctrp1 = tmp[ctr + 1]; 11962306a36Sopenharmony_ci u64 r0 = ((u64)(tctr)) & 0x7ffffffffffffLLU; 12062306a36Sopenharmony_ci u128 c = ((tctr) >> (51)); 12162306a36Sopenharmony_ci tmp[ctr] = ((u128)(r0)); 12262306a36Sopenharmony_ci tmp[ctr + 1] = ((tctrp1) + (c)); 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_cistatic __always_inline void fmul_shift_reduce(u64 *output) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci u64 tmp = output[4]; 12962306a36Sopenharmony_ci u64 b0; 13062306a36Sopenharmony_ci { 13162306a36Sopenharmony_ci u32 ctr = 5 - 0 - 1; 13262306a36Sopenharmony_ci u64 z = output[ctr - 1]; 13362306a36Sopenharmony_ci output[ctr] = z; 13462306a36Sopenharmony_ci } 13562306a36Sopenharmony_ci { 13662306a36Sopenharmony_ci u32 ctr = 5 - 1 - 1; 13762306a36Sopenharmony_ci u64 z = output[ctr - 1]; 13862306a36Sopenharmony_ci output[ctr] = z; 13962306a36Sopenharmony_ci } 14062306a36Sopenharmony_ci { 14162306a36Sopenharmony_ci u32 ctr = 5 - 2 - 1; 14262306a36Sopenharmony_ci u64 z = output[ctr - 1]; 14362306a36Sopenharmony_ci output[ctr] = z; 14462306a36Sopenharmony_ci } 14562306a36Sopenharmony_ci { 14662306a36Sopenharmony_ci u32 ctr = 5 - 3 - 1; 14762306a36Sopenharmony_ci u64 z = output[ctr - 1]; 14862306a36Sopenharmony_ci output[ctr] = z; 14962306a36Sopenharmony_ci } 15062306a36Sopenharmony_ci output[0] = tmp; 15162306a36Sopenharmony_ci b0 = output[0]; 15262306a36Sopenharmony_ci output[0] = 19 * b0; 15362306a36Sopenharmony_ci} 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_cistatic __always_inline void fmul_mul_shift_reduce_(u128 *output, u64 *input, 15662306a36Sopenharmony_ci u64 *input21) 15762306a36Sopenharmony_ci{ 15862306a36Sopenharmony_ci u32 i; 15962306a36Sopenharmony_ci u64 input2i; 16062306a36Sopenharmony_ci { 16162306a36Sopenharmony_ci u64 input2i = input21[0]; 16262306a36Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 16362306a36Sopenharmony_ci fmul_shift_reduce(input); 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci { 16662306a36Sopenharmony_ci u64 input2i = input21[1]; 16762306a36Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 16862306a36Sopenharmony_ci fmul_shift_reduce(input); 16962306a36Sopenharmony_ci } 17062306a36Sopenharmony_ci { 17162306a36Sopenharmony_ci u64 input2i = input21[2]; 17262306a36Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 17362306a36Sopenharmony_ci fmul_shift_reduce(input); 17462306a36Sopenharmony_ci } 17562306a36Sopenharmony_ci { 17662306a36Sopenharmony_ci u64 input2i = input21[3]; 17762306a36Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 17862306a36Sopenharmony_ci fmul_shift_reduce(input); 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci i = 4; 18162306a36Sopenharmony_ci input2i = input21[i]; 18262306a36Sopenharmony_ci fproduct_sum_scalar_multiplication_(output, input, input2i); 18362306a36Sopenharmony_ci} 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_cistatic __always_inline void fmul_fmul(u64 *output, u64 *input, u64 *input21) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci u64 tmp[5] = { input[0], input[1], input[2], input[3], input[4] }; 18862306a36Sopenharmony_ci { 18962306a36Sopenharmony_ci u128 b4; 19062306a36Sopenharmony_ci u128 b0; 19162306a36Sopenharmony_ci u128 b4_; 19262306a36Sopenharmony_ci u128 b0_; 19362306a36Sopenharmony_ci u64 i0; 19462306a36Sopenharmony_ci u64 i1; 19562306a36Sopenharmony_ci u64 i0_; 19662306a36Sopenharmony_ci u64 i1_; 19762306a36Sopenharmony_ci u128 t[5] = { 0 }; 19862306a36Sopenharmony_ci fmul_mul_shift_reduce_(t, tmp, input21); 19962306a36Sopenharmony_ci fproduct_carry_wide_(t); 20062306a36Sopenharmony_ci b4 = t[4]; 20162306a36Sopenharmony_ci b0 = t[0]; 20262306a36Sopenharmony_ci b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); 20362306a36Sopenharmony_ci b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); 20462306a36Sopenharmony_ci t[4] = b4_; 20562306a36Sopenharmony_ci t[0] = b0_; 20662306a36Sopenharmony_ci fproduct_copy_from_wide_(output, t); 20762306a36Sopenharmony_ci i0 = output[0]; 20862306a36Sopenharmony_ci i1 = output[1]; 20962306a36Sopenharmony_ci i0_ = i0 & 0x7ffffffffffffLLU; 21062306a36Sopenharmony_ci i1_ = i1 + (i0 >> 51); 21162306a36Sopenharmony_ci output[0] = i0_; 21262306a36Sopenharmony_ci output[1] = i1_; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_cistatic __always_inline void fsquare_fsquare__(u128 *tmp, u64 *output) 21762306a36Sopenharmony_ci{ 21862306a36Sopenharmony_ci u64 r0 = output[0]; 21962306a36Sopenharmony_ci u64 r1 = output[1]; 22062306a36Sopenharmony_ci u64 r2 = output[2]; 22162306a36Sopenharmony_ci u64 r3 = output[3]; 22262306a36Sopenharmony_ci u64 r4 = output[4]; 22362306a36Sopenharmony_ci u64 d0 = r0 * 2; 22462306a36Sopenharmony_ci u64 d1 = r1 * 2; 22562306a36Sopenharmony_ci u64 d2 = r2 * 2 * 19; 22662306a36Sopenharmony_ci u64 d419 = r4 * 19; 22762306a36Sopenharmony_ci u64 d4 = d419 * 2; 22862306a36Sopenharmony_ci u128 s0 = ((((((u128)(r0) * (r0))) + (((u128)(d4) * (r1))))) + 22962306a36Sopenharmony_ci (((u128)(d2) * (r3)))); 23062306a36Sopenharmony_ci u128 s1 = ((((((u128)(d0) * (r1))) + (((u128)(d4) * (r2))))) + 23162306a36Sopenharmony_ci (((u128)(r3 * 19) * (r3)))); 23262306a36Sopenharmony_ci u128 s2 = ((((((u128)(d0) * (r2))) + (((u128)(r1) * (r1))))) + 23362306a36Sopenharmony_ci (((u128)(d4) * (r3)))); 23462306a36Sopenharmony_ci u128 s3 = ((((((u128)(d0) * (r3))) + (((u128)(d1) * (r2))))) + 23562306a36Sopenharmony_ci (((u128)(r4) * (d419)))); 23662306a36Sopenharmony_ci u128 s4 = ((((((u128)(d0) * (r4))) + (((u128)(d1) * (r3))))) + 23762306a36Sopenharmony_ci (((u128)(r2) * (r2)))); 23862306a36Sopenharmony_ci tmp[0] = s0; 23962306a36Sopenharmony_ci tmp[1] = s1; 24062306a36Sopenharmony_ci tmp[2] = s2; 24162306a36Sopenharmony_ci tmp[3] = s3; 24262306a36Sopenharmony_ci tmp[4] = s4; 24362306a36Sopenharmony_ci} 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_cistatic __always_inline void fsquare_fsquare_(u128 *tmp, u64 *output) 24662306a36Sopenharmony_ci{ 24762306a36Sopenharmony_ci u128 b4; 24862306a36Sopenharmony_ci u128 b0; 24962306a36Sopenharmony_ci u128 b4_; 25062306a36Sopenharmony_ci u128 b0_; 25162306a36Sopenharmony_ci u64 i0; 25262306a36Sopenharmony_ci u64 i1; 25362306a36Sopenharmony_ci u64 i0_; 25462306a36Sopenharmony_ci u64 i1_; 25562306a36Sopenharmony_ci fsquare_fsquare__(tmp, output); 25662306a36Sopenharmony_ci fproduct_carry_wide_(tmp); 25762306a36Sopenharmony_ci b4 = tmp[4]; 25862306a36Sopenharmony_ci b0 = tmp[0]; 25962306a36Sopenharmony_ci b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); 26062306a36Sopenharmony_ci b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); 26162306a36Sopenharmony_ci tmp[4] = b4_; 26262306a36Sopenharmony_ci tmp[0] = b0_; 26362306a36Sopenharmony_ci fproduct_copy_from_wide_(output, tmp); 26462306a36Sopenharmony_ci i0 = output[0]; 26562306a36Sopenharmony_ci i1 = output[1]; 26662306a36Sopenharmony_ci i0_ = i0 & 0x7ffffffffffffLLU; 26762306a36Sopenharmony_ci i1_ = i1 + (i0 >> 51); 26862306a36Sopenharmony_ci output[0] = i0_; 26962306a36Sopenharmony_ci output[1] = i1_; 27062306a36Sopenharmony_ci} 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_cistatic __always_inline void fsquare_fsquare_times_(u64 *output, u128 *tmp, 27362306a36Sopenharmony_ci u32 count1) 27462306a36Sopenharmony_ci{ 27562306a36Sopenharmony_ci u32 i; 27662306a36Sopenharmony_ci fsquare_fsquare_(tmp, output); 27762306a36Sopenharmony_ci for (i = 1; i < count1; ++i) 27862306a36Sopenharmony_ci fsquare_fsquare_(tmp, output); 27962306a36Sopenharmony_ci} 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_cistatic __always_inline void fsquare_fsquare_times(u64 *output, u64 *input, 28262306a36Sopenharmony_ci u32 count1) 28362306a36Sopenharmony_ci{ 28462306a36Sopenharmony_ci u128 t[5]; 28562306a36Sopenharmony_ci memcpy(output, input, 5 * sizeof(*input)); 28662306a36Sopenharmony_ci fsquare_fsquare_times_(output, t, count1); 28762306a36Sopenharmony_ci} 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_cistatic __always_inline void fsquare_fsquare_times_inplace(u64 *output, 29062306a36Sopenharmony_ci u32 count1) 29162306a36Sopenharmony_ci{ 29262306a36Sopenharmony_ci u128 t[5]; 29362306a36Sopenharmony_ci fsquare_fsquare_times_(output, t, count1); 29462306a36Sopenharmony_ci} 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_cistatic __always_inline void crecip_crecip(u64 *out, u64 *z) 29762306a36Sopenharmony_ci{ 29862306a36Sopenharmony_ci u64 buf[20] = { 0 }; 29962306a36Sopenharmony_ci u64 *a0 = buf; 30062306a36Sopenharmony_ci u64 *t00 = buf + 5; 30162306a36Sopenharmony_ci u64 *b0 = buf + 10; 30262306a36Sopenharmony_ci u64 *t01; 30362306a36Sopenharmony_ci u64 *b1; 30462306a36Sopenharmony_ci u64 *c0; 30562306a36Sopenharmony_ci u64 *a; 30662306a36Sopenharmony_ci u64 *t0; 30762306a36Sopenharmony_ci u64 *b; 30862306a36Sopenharmony_ci u64 *c; 30962306a36Sopenharmony_ci fsquare_fsquare_times(a0, z, 1); 31062306a36Sopenharmony_ci fsquare_fsquare_times(t00, a0, 2); 31162306a36Sopenharmony_ci fmul_fmul(b0, t00, z); 31262306a36Sopenharmony_ci fmul_fmul(a0, b0, a0); 31362306a36Sopenharmony_ci fsquare_fsquare_times(t00, a0, 1); 31462306a36Sopenharmony_ci fmul_fmul(b0, t00, b0); 31562306a36Sopenharmony_ci fsquare_fsquare_times(t00, b0, 5); 31662306a36Sopenharmony_ci t01 = buf + 5; 31762306a36Sopenharmony_ci b1 = buf + 10; 31862306a36Sopenharmony_ci c0 = buf + 15; 31962306a36Sopenharmony_ci fmul_fmul(b1, t01, b1); 32062306a36Sopenharmony_ci fsquare_fsquare_times(t01, b1, 10); 32162306a36Sopenharmony_ci fmul_fmul(c0, t01, b1); 32262306a36Sopenharmony_ci fsquare_fsquare_times(t01, c0, 20); 32362306a36Sopenharmony_ci fmul_fmul(t01, t01, c0); 32462306a36Sopenharmony_ci fsquare_fsquare_times_inplace(t01, 10); 32562306a36Sopenharmony_ci fmul_fmul(b1, t01, b1); 32662306a36Sopenharmony_ci fsquare_fsquare_times(t01, b1, 50); 32762306a36Sopenharmony_ci a = buf; 32862306a36Sopenharmony_ci t0 = buf + 5; 32962306a36Sopenharmony_ci b = buf + 10; 33062306a36Sopenharmony_ci c = buf + 15; 33162306a36Sopenharmony_ci fmul_fmul(c, t0, b); 33262306a36Sopenharmony_ci fsquare_fsquare_times(t0, c, 100); 33362306a36Sopenharmony_ci fmul_fmul(t0, t0, c); 33462306a36Sopenharmony_ci fsquare_fsquare_times_inplace(t0, 50); 33562306a36Sopenharmony_ci fmul_fmul(t0, t0, b); 33662306a36Sopenharmony_ci fsquare_fsquare_times_inplace(t0, 5); 33762306a36Sopenharmony_ci fmul_fmul(out, t0, a); 33862306a36Sopenharmony_ci} 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_cistatic __always_inline void fsum(u64 *a, u64 *b) 34162306a36Sopenharmony_ci{ 34262306a36Sopenharmony_ci a[0] += b[0]; 34362306a36Sopenharmony_ci a[1] += b[1]; 34462306a36Sopenharmony_ci a[2] += b[2]; 34562306a36Sopenharmony_ci a[3] += b[3]; 34662306a36Sopenharmony_ci a[4] += b[4]; 34762306a36Sopenharmony_ci} 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_cistatic __always_inline void fdifference(u64 *a, u64 *b) 35062306a36Sopenharmony_ci{ 35162306a36Sopenharmony_ci u64 tmp[5] = { 0 }; 35262306a36Sopenharmony_ci u64 b0; 35362306a36Sopenharmony_ci u64 b1; 35462306a36Sopenharmony_ci u64 b2; 35562306a36Sopenharmony_ci u64 b3; 35662306a36Sopenharmony_ci u64 b4; 35762306a36Sopenharmony_ci memcpy(tmp, b, 5 * sizeof(*b)); 35862306a36Sopenharmony_ci b0 = tmp[0]; 35962306a36Sopenharmony_ci b1 = tmp[1]; 36062306a36Sopenharmony_ci b2 = tmp[2]; 36162306a36Sopenharmony_ci b3 = tmp[3]; 36262306a36Sopenharmony_ci b4 = tmp[4]; 36362306a36Sopenharmony_ci tmp[0] = b0 + 0x3fffffffffff68LLU; 36462306a36Sopenharmony_ci tmp[1] = b1 + 0x3ffffffffffff8LLU; 36562306a36Sopenharmony_ci tmp[2] = b2 + 0x3ffffffffffff8LLU; 36662306a36Sopenharmony_ci tmp[3] = b3 + 0x3ffffffffffff8LLU; 36762306a36Sopenharmony_ci tmp[4] = b4 + 0x3ffffffffffff8LLU; 36862306a36Sopenharmony_ci { 36962306a36Sopenharmony_ci u64 xi = a[0]; 37062306a36Sopenharmony_ci u64 yi = tmp[0]; 37162306a36Sopenharmony_ci a[0] = yi - xi; 37262306a36Sopenharmony_ci } 37362306a36Sopenharmony_ci { 37462306a36Sopenharmony_ci u64 xi = a[1]; 37562306a36Sopenharmony_ci u64 yi = tmp[1]; 37662306a36Sopenharmony_ci a[1] = yi - xi; 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci { 37962306a36Sopenharmony_ci u64 xi = a[2]; 38062306a36Sopenharmony_ci u64 yi = tmp[2]; 38162306a36Sopenharmony_ci a[2] = yi - xi; 38262306a36Sopenharmony_ci } 38362306a36Sopenharmony_ci { 38462306a36Sopenharmony_ci u64 xi = a[3]; 38562306a36Sopenharmony_ci u64 yi = tmp[3]; 38662306a36Sopenharmony_ci a[3] = yi - xi; 38762306a36Sopenharmony_ci } 38862306a36Sopenharmony_ci { 38962306a36Sopenharmony_ci u64 xi = a[4]; 39062306a36Sopenharmony_ci u64 yi = tmp[4]; 39162306a36Sopenharmony_ci a[4] = yi - xi; 39262306a36Sopenharmony_ci } 39362306a36Sopenharmony_ci} 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_cistatic __always_inline void fscalar(u64 *output, u64 *b, u64 s) 39662306a36Sopenharmony_ci{ 39762306a36Sopenharmony_ci u128 tmp[5]; 39862306a36Sopenharmony_ci u128 b4; 39962306a36Sopenharmony_ci u128 b0; 40062306a36Sopenharmony_ci u128 b4_; 40162306a36Sopenharmony_ci u128 b0_; 40262306a36Sopenharmony_ci { 40362306a36Sopenharmony_ci u64 xi = b[0]; 40462306a36Sopenharmony_ci tmp[0] = ((u128)(xi) * (s)); 40562306a36Sopenharmony_ci } 40662306a36Sopenharmony_ci { 40762306a36Sopenharmony_ci u64 xi = b[1]; 40862306a36Sopenharmony_ci tmp[1] = ((u128)(xi) * (s)); 40962306a36Sopenharmony_ci } 41062306a36Sopenharmony_ci { 41162306a36Sopenharmony_ci u64 xi = b[2]; 41262306a36Sopenharmony_ci tmp[2] = ((u128)(xi) * (s)); 41362306a36Sopenharmony_ci } 41462306a36Sopenharmony_ci { 41562306a36Sopenharmony_ci u64 xi = b[3]; 41662306a36Sopenharmony_ci tmp[3] = ((u128)(xi) * (s)); 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci { 41962306a36Sopenharmony_ci u64 xi = b[4]; 42062306a36Sopenharmony_ci tmp[4] = ((u128)(xi) * (s)); 42162306a36Sopenharmony_ci } 42262306a36Sopenharmony_ci fproduct_carry_wide_(tmp); 42362306a36Sopenharmony_ci b4 = tmp[4]; 42462306a36Sopenharmony_ci b0 = tmp[0]; 42562306a36Sopenharmony_ci b4_ = ((b4) & (((u128)(0x7ffffffffffffLLU)))); 42662306a36Sopenharmony_ci b0_ = ((b0) + (((u128)(19) * (((u64)(((b4) >> (51)))))))); 42762306a36Sopenharmony_ci tmp[4] = b4_; 42862306a36Sopenharmony_ci tmp[0] = b0_; 42962306a36Sopenharmony_ci fproduct_copy_from_wide_(output, tmp); 43062306a36Sopenharmony_ci} 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_cistatic __always_inline void fmul(u64 *output, u64 *a, u64 *b) 43362306a36Sopenharmony_ci{ 43462306a36Sopenharmony_ci fmul_fmul(output, a, b); 43562306a36Sopenharmony_ci} 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_cistatic __always_inline void crecip(u64 *output, u64 *input) 43862306a36Sopenharmony_ci{ 43962306a36Sopenharmony_ci crecip_crecip(output, input); 44062306a36Sopenharmony_ci} 44162306a36Sopenharmony_ci 44262306a36Sopenharmony_cistatic __always_inline void point_swap_conditional_step(u64 *a, u64 *b, 44362306a36Sopenharmony_ci u64 swap1, u32 ctr) 44462306a36Sopenharmony_ci{ 44562306a36Sopenharmony_ci u32 i = ctr - 1; 44662306a36Sopenharmony_ci u64 ai = a[i]; 44762306a36Sopenharmony_ci u64 bi = b[i]; 44862306a36Sopenharmony_ci u64 x = swap1 & (ai ^ bi); 44962306a36Sopenharmony_ci u64 ai1 = ai ^ x; 45062306a36Sopenharmony_ci u64 bi1 = bi ^ x; 45162306a36Sopenharmony_ci a[i] = ai1; 45262306a36Sopenharmony_ci b[i] = bi1; 45362306a36Sopenharmony_ci} 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_cistatic __always_inline void point_swap_conditional5(u64 *a, u64 *b, u64 swap1) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 5); 45862306a36Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 4); 45962306a36Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 3); 46062306a36Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 2); 46162306a36Sopenharmony_ci point_swap_conditional_step(a, b, swap1, 1); 46262306a36Sopenharmony_ci} 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_cistatic __always_inline void point_swap_conditional(u64 *a, u64 *b, u64 iswap) 46562306a36Sopenharmony_ci{ 46662306a36Sopenharmony_ci u64 swap1 = 0 - iswap; 46762306a36Sopenharmony_ci point_swap_conditional5(a, b, swap1); 46862306a36Sopenharmony_ci point_swap_conditional5(a + 5, b + 5, swap1); 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_cistatic __always_inline void point_copy(u64 *output, u64 *input) 47262306a36Sopenharmony_ci{ 47362306a36Sopenharmony_ci memcpy(output, input, 5 * sizeof(*input)); 47462306a36Sopenharmony_ci memcpy(output + 5, input + 5, 5 * sizeof(*input)); 47562306a36Sopenharmony_ci} 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_cistatic __always_inline void addanddouble_fmonty(u64 *pp, u64 *ppq, u64 *p, 47862306a36Sopenharmony_ci u64 *pq, u64 *qmqp) 47962306a36Sopenharmony_ci{ 48062306a36Sopenharmony_ci u64 *qx = qmqp; 48162306a36Sopenharmony_ci u64 *x2 = pp; 48262306a36Sopenharmony_ci u64 *z2 = pp + 5; 48362306a36Sopenharmony_ci u64 *x3 = ppq; 48462306a36Sopenharmony_ci u64 *z3 = ppq + 5; 48562306a36Sopenharmony_ci u64 *x = p; 48662306a36Sopenharmony_ci u64 *z = p + 5; 48762306a36Sopenharmony_ci u64 *xprime = pq; 48862306a36Sopenharmony_ci u64 *zprime = pq + 5; 48962306a36Sopenharmony_ci u64 buf[40] = { 0 }; 49062306a36Sopenharmony_ci u64 *origx = buf; 49162306a36Sopenharmony_ci u64 *origxprime0 = buf + 5; 49262306a36Sopenharmony_ci u64 *xxprime0; 49362306a36Sopenharmony_ci u64 *zzprime0; 49462306a36Sopenharmony_ci u64 *origxprime; 49562306a36Sopenharmony_ci xxprime0 = buf + 25; 49662306a36Sopenharmony_ci zzprime0 = buf + 30; 49762306a36Sopenharmony_ci memcpy(origx, x, 5 * sizeof(*x)); 49862306a36Sopenharmony_ci fsum(x, z); 49962306a36Sopenharmony_ci fdifference(z, origx); 50062306a36Sopenharmony_ci memcpy(origxprime0, xprime, 5 * sizeof(*xprime)); 50162306a36Sopenharmony_ci fsum(xprime, zprime); 50262306a36Sopenharmony_ci fdifference(zprime, origxprime0); 50362306a36Sopenharmony_ci fmul(xxprime0, xprime, z); 50462306a36Sopenharmony_ci fmul(zzprime0, x, zprime); 50562306a36Sopenharmony_ci origxprime = buf + 5; 50662306a36Sopenharmony_ci { 50762306a36Sopenharmony_ci u64 *xx0; 50862306a36Sopenharmony_ci u64 *zz0; 50962306a36Sopenharmony_ci u64 *xxprime; 51062306a36Sopenharmony_ci u64 *zzprime; 51162306a36Sopenharmony_ci u64 *zzzprime; 51262306a36Sopenharmony_ci xx0 = buf + 15; 51362306a36Sopenharmony_ci zz0 = buf + 20; 51462306a36Sopenharmony_ci xxprime = buf + 25; 51562306a36Sopenharmony_ci zzprime = buf + 30; 51662306a36Sopenharmony_ci zzzprime = buf + 35; 51762306a36Sopenharmony_ci memcpy(origxprime, xxprime, 5 * sizeof(*xxprime)); 51862306a36Sopenharmony_ci fsum(xxprime, zzprime); 51962306a36Sopenharmony_ci fdifference(zzprime, origxprime); 52062306a36Sopenharmony_ci fsquare_fsquare_times(x3, xxprime, 1); 52162306a36Sopenharmony_ci fsquare_fsquare_times(zzzprime, zzprime, 1); 52262306a36Sopenharmony_ci fmul(z3, zzzprime, qx); 52362306a36Sopenharmony_ci fsquare_fsquare_times(xx0, x, 1); 52462306a36Sopenharmony_ci fsquare_fsquare_times(zz0, z, 1); 52562306a36Sopenharmony_ci { 52662306a36Sopenharmony_ci u64 *zzz; 52762306a36Sopenharmony_ci u64 *xx; 52862306a36Sopenharmony_ci u64 *zz; 52962306a36Sopenharmony_ci u64 scalar; 53062306a36Sopenharmony_ci zzz = buf + 10; 53162306a36Sopenharmony_ci xx = buf + 15; 53262306a36Sopenharmony_ci zz = buf + 20; 53362306a36Sopenharmony_ci fmul(x2, xx, zz); 53462306a36Sopenharmony_ci fdifference(zz, xx); 53562306a36Sopenharmony_ci scalar = 121665; 53662306a36Sopenharmony_ci fscalar(zzz, zz, scalar); 53762306a36Sopenharmony_ci fsum(zzz, xx); 53862306a36Sopenharmony_ci fmul(z2, zzz, zz); 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci } 54162306a36Sopenharmony_ci} 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_cistatic __always_inline void 54462306a36Sopenharmony_ciladder_smallloop_cmult_small_loop_step(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, 54562306a36Sopenharmony_ci u64 *q, u8 byt) 54662306a36Sopenharmony_ci{ 54762306a36Sopenharmony_ci u64 bit0 = (u64)(byt >> 7); 54862306a36Sopenharmony_ci u64 bit; 54962306a36Sopenharmony_ci point_swap_conditional(nq, nqpq, bit0); 55062306a36Sopenharmony_ci addanddouble_fmonty(nq2, nqpq2, nq, nqpq, q); 55162306a36Sopenharmony_ci bit = (u64)(byt >> 7); 55262306a36Sopenharmony_ci point_swap_conditional(nq2, nqpq2, bit); 55362306a36Sopenharmony_ci} 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_cistatic __always_inline void 55662306a36Sopenharmony_ciladder_smallloop_cmult_small_loop_double_step(u64 *nq, u64 *nqpq, u64 *nq2, 55762306a36Sopenharmony_ci u64 *nqpq2, u64 *q, u8 byt) 55862306a36Sopenharmony_ci{ 55962306a36Sopenharmony_ci u8 byt1; 56062306a36Sopenharmony_ci ladder_smallloop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); 56162306a36Sopenharmony_ci byt1 = byt << 1; 56262306a36Sopenharmony_ci ladder_smallloop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); 56362306a36Sopenharmony_ci} 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_cistatic __always_inline void 56662306a36Sopenharmony_ciladder_smallloop_cmult_small_loop(u64 *nq, u64 *nqpq, u64 *nq2, u64 *nqpq2, 56762306a36Sopenharmony_ci u64 *q, u8 byt, u32 i) 56862306a36Sopenharmony_ci{ 56962306a36Sopenharmony_ci while (i--) { 57062306a36Sopenharmony_ci ladder_smallloop_cmult_small_loop_double_step(nq, nqpq, nq2, 57162306a36Sopenharmony_ci nqpq2, q, byt); 57262306a36Sopenharmony_ci byt <<= 2; 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci} 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_cistatic __always_inline void ladder_bigloop_cmult_big_loop(u8 *n1, u64 *nq, 57762306a36Sopenharmony_ci u64 *nqpq, u64 *nq2, 57862306a36Sopenharmony_ci u64 *nqpq2, u64 *q, 57962306a36Sopenharmony_ci u32 i) 58062306a36Sopenharmony_ci{ 58162306a36Sopenharmony_ci while (i--) { 58262306a36Sopenharmony_ci u8 byte = n1[i]; 58362306a36Sopenharmony_ci ladder_smallloop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, 58462306a36Sopenharmony_ci byte, 4); 58562306a36Sopenharmony_ci } 58662306a36Sopenharmony_ci} 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_cistatic void ladder_cmult(u64 *result, u8 *n1, u64 *q) 58962306a36Sopenharmony_ci{ 59062306a36Sopenharmony_ci u64 point_buf[40] = { 0 }; 59162306a36Sopenharmony_ci u64 *nq = point_buf; 59262306a36Sopenharmony_ci u64 *nqpq = point_buf + 10; 59362306a36Sopenharmony_ci u64 *nq2 = point_buf + 20; 59462306a36Sopenharmony_ci u64 *nqpq2 = point_buf + 30; 59562306a36Sopenharmony_ci point_copy(nqpq, q); 59662306a36Sopenharmony_ci nq[0] = 1; 59762306a36Sopenharmony_ci ladder_bigloop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, 32); 59862306a36Sopenharmony_ci point_copy(result, nq); 59962306a36Sopenharmony_ci} 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_cistatic __always_inline void format_fexpand(u64 *output, const u8 *input) 60262306a36Sopenharmony_ci{ 60362306a36Sopenharmony_ci const u8 *x00 = input + 6; 60462306a36Sopenharmony_ci const u8 *x01 = input + 12; 60562306a36Sopenharmony_ci const u8 *x02 = input + 19; 60662306a36Sopenharmony_ci const u8 *x0 = input + 24; 60762306a36Sopenharmony_ci u64 i0, i1, i2, i3, i4, output0, output1, output2, output3, output4; 60862306a36Sopenharmony_ci i0 = get_unaligned_le64(input); 60962306a36Sopenharmony_ci i1 = get_unaligned_le64(x00); 61062306a36Sopenharmony_ci i2 = get_unaligned_le64(x01); 61162306a36Sopenharmony_ci i3 = get_unaligned_le64(x02); 61262306a36Sopenharmony_ci i4 = get_unaligned_le64(x0); 61362306a36Sopenharmony_ci output0 = i0 & 0x7ffffffffffffLLU; 61462306a36Sopenharmony_ci output1 = i1 >> 3 & 0x7ffffffffffffLLU; 61562306a36Sopenharmony_ci output2 = i2 >> 6 & 0x7ffffffffffffLLU; 61662306a36Sopenharmony_ci output3 = i3 >> 1 & 0x7ffffffffffffLLU; 61762306a36Sopenharmony_ci output4 = i4 >> 12 & 0x7ffffffffffffLLU; 61862306a36Sopenharmony_ci output[0] = output0; 61962306a36Sopenharmony_ci output[1] = output1; 62062306a36Sopenharmony_ci output[2] = output2; 62162306a36Sopenharmony_ci output[3] = output3; 62262306a36Sopenharmony_ci output[4] = output4; 62362306a36Sopenharmony_ci} 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_cistatic __always_inline void format_fcontract_first_carry_pass(u64 *input) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci u64 t0 = input[0]; 62862306a36Sopenharmony_ci u64 t1 = input[1]; 62962306a36Sopenharmony_ci u64 t2 = input[2]; 63062306a36Sopenharmony_ci u64 t3 = input[3]; 63162306a36Sopenharmony_ci u64 t4 = input[4]; 63262306a36Sopenharmony_ci u64 t1_ = t1 + (t0 >> 51); 63362306a36Sopenharmony_ci u64 t0_ = t0 & 0x7ffffffffffffLLU; 63462306a36Sopenharmony_ci u64 t2_ = t2 + (t1_ >> 51); 63562306a36Sopenharmony_ci u64 t1__ = t1_ & 0x7ffffffffffffLLU; 63662306a36Sopenharmony_ci u64 t3_ = t3 + (t2_ >> 51); 63762306a36Sopenharmony_ci u64 t2__ = t2_ & 0x7ffffffffffffLLU; 63862306a36Sopenharmony_ci u64 t4_ = t4 + (t3_ >> 51); 63962306a36Sopenharmony_ci u64 t3__ = t3_ & 0x7ffffffffffffLLU; 64062306a36Sopenharmony_ci input[0] = t0_; 64162306a36Sopenharmony_ci input[1] = t1__; 64262306a36Sopenharmony_ci input[2] = t2__; 64362306a36Sopenharmony_ci input[3] = t3__; 64462306a36Sopenharmony_ci input[4] = t4_; 64562306a36Sopenharmony_ci} 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_cistatic __always_inline void format_fcontract_first_carry_full(u64 *input) 64862306a36Sopenharmony_ci{ 64962306a36Sopenharmony_ci format_fcontract_first_carry_pass(input); 65062306a36Sopenharmony_ci modulo_carry_top(input); 65162306a36Sopenharmony_ci} 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_cistatic __always_inline void format_fcontract_second_carry_pass(u64 *input) 65462306a36Sopenharmony_ci{ 65562306a36Sopenharmony_ci u64 t0 = input[0]; 65662306a36Sopenharmony_ci u64 t1 = input[1]; 65762306a36Sopenharmony_ci u64 t2 = input[2]; 65862306a36Sopenharmony_ci u64 t3 = input[3]; 65962306a36Sopenharmony_ci u64 t4 = input[4]; 66062306a36Sopenharmony_ci u64 t1_ = t1 + (t0 >> 51); 66162306a36Sopenharmony_ci u64 t0_ = t0 & 0x7ffffffffffffLLU; 66262306a36Sopenharmony_ci u64 t2_ = t2 + (t1_ >> 51); 66362306a36Sopenharmony_ci u64 t1__ = t1_ & 0x7ffffffffffffLLU; 66462306a36Sopenharmony_ci u64 t3_ = t3 + (t2_ >> 51); 66562306a36Sopenharmony_ci u64 t2__ = t2_ & 0x7ffffffffffffLLU; 66662306a36Sopenharmony_ci u64 t4_ = t4 + (t3_ >> 51); 66762306a36Sopenharmony_ci u64 t3__ = t3_ & 0x7ffffffffffffLLU; 66862306a36Sopenharmony_ci input[0] = t0_; 66962306a36Sopenharmony_ci input[1] = t1__; 67062306a36Sopenharmony_ci input[2] = t2__; 67162306a36Sopenharmony_ci input[3] = t3__; 67262306a36Sopenharmony_ci input[4] = t4_; 67362306a36Sopenharmony_ci} 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_cistatic __always_inline void format_fcontract_second_carry_full(u64 *input) 67662306a36Sopenharmony_ci{ 67762306a36Sopenharmony_ci u64 i0; 67862306a36Sopenharmony_ci u64 i1; 67962306a36Sopenharmony_ci u64 i0_; 68062306a36Sopenharmony_ci u64 i1_; 68162306a36Sopenharmony_ci format_fcontract_second_carry_pass(input); 68262306a36Sopenharmony_ci modulo_carry_top(input); 68362306a36Sopenharmony_ci i0 = input[0]; 68462306a36Sopenharmony_ci i1 = input[1]; 68562306a36Sopenharmony_ci i0_ = i0 & 0x7ffffffffffffLLU; 68662306a36Sopenharmony_ci i1_ = i1 + (i0 >> 51); 68762306a36Sopenharmony_ci input[0] = i0_; 68862306a36Sopenharmony_ci input[1] = i1_; 68962306a36Sopenharmony_ci} 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_cistatic __always_inline void format_fcontract_trim(u64 *input) 69262306a36Sopenharmony_ci{ 69362306a36Sopenharmony_ci u64 a0 = input[0]; 69462306a36Sopenharmony_ci u64 a1 = input[1]; 69562306a36Sopenharmony_ci u64 a2 = input[2]; 69662306a36Sopenharmony_ci u64 a3 = input[3]; 69762306a36Sopenharmony_ci u64 a4 = input[4]; 69862306a36Sopenharmony_ci u64 mask0 = u64_gte_mask(a0, 0x7ffffffffffedLLU); 69962306a36Sopenharmony_ci u64 mask1 = u64_eq_mask(a1, 0x7ffffffffffffLLU); 70062306a36Sopenharmony_ci u64 mask2 = u64_eq_mask(a2, 0x7ffffffffffffLLU); 70162306a36Sopenharmony_ci u64 mask3 = u64_eq_mask(a3, 0x7ffffffffffffLLU); 70262306a36Sopenharmony_ci u64 mask4 = u64_eq_mask(a4, 0x7ffffffffffffLLU); 70362306a36Sopenharmony_ci u64 mask = (((mask0 & mask1) & mask2) & mask3) & mask4; 70462306a36Sopenharmony_ci u64 a0_ = a0 - (0x7ffffffffffedLLU & mask); 70562306a36Sopenharmony_ci u64 a1_ = a1 - (0x7ffffffffffffLLU & mask); 70662306a36Sopenharmony_ci u64 a2_ = a2 - (0x7ffffffffffffLLU & mask); 70762306a36Sopenharmony_ci u64 a3_ = a3 - (0x7ffffffffffffLLU & mask); 70862306a36Sopenharmony_ci u64 a4_ = a4 - (0x7ffffffffffffLLU & mask); 70962306a36Sopenharmony_ci input[0] = a0_; 71062306a36Sopenharmony_ci input[1] = a1_; 71162306a36Sopenharmony_ci input[2] = a2_; 71262306a36Sopenharmony_ci input[3] = a3_; 71362306a36Sopenharmony_ci input[4] = a4_; 71462306a36Sopenharmony_ci} 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_cistatic __always_inline void format_fcontract_store(u8 *output, u64 *input) 71762306a36Sopenharmony_ci{ 71862306a36Sopenharmony_ci u64 t0 = input[0]; 71962306a36Sopenharmony_ci u64 t1 = input[1]; 72062306a36Sopenharmony_ci u64 t2 = input[2]; 72162306a36Sopenharmony_ci u64 t3 = input[3]; 72262306a36Sopenharmony_ci u64 t4 = input[4]; 72362306a36Sopenharmony_ci u64 o0 = t1 << 51 | t0; 72462306a36Sopenharmony_ci u64 o1 = t2 << 38 | t1 >> 13; 72562306a36Sopenharmony_ci u64 o2 = t3 << 25 | t2 >> 26; 72662306a36Sopenharmony_ci u64 o3 = t4 << 12 | t3 >> 39; 72762306a36Sopenharmony_ci u8 *b0 = output; 72862306a36Sopenharmony_ci u8 *b1 = output + 8; 72962306a36Sopenharmony_ci u8 *b2 = output + 16; 73062306a36Sopenharmony_ci u8 *b3 = output + 24; 73162306a36Sopenharmony_ci put_unaligned_le64(o0, b0); 73262306a36Sopenharmony_ci put_unaligned_le64(o1, b1); 73362306a36Sopenharmony_ci put_unaligned_le64(o2, b2); 73462306a36Sopenharmony_ci put_unaligned_le64(o3, b3); 73562306a36Sopenharmony_ci} 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_cistatic __always_inline void format_fcontract(u8 *output, u64 *input) 73862306a36Sopenharmony_ci{ 73962306a36Sopenharmony_ci format_fcontract_first_carry_full(input); 74062306a36Sopenharmony_ci format_fcontract_second_carry_full(input); 74162306a36Sopenharmony_ci format_fcontract_trim(input); 74262306a36Sopenharmony_ci format_fcontract_store(output, input); 74362306a36Sopenharmony_ci} 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_cistatic __always_inline void format_scalar_of_point(u8 *scalar, u64 *point) 74662306a36Sopenharmony_ci{ 74762306a36Sopenharmony_ci u64 *x = point; 74862306a36Sopenharmony_ci u64 *z = point + 5; 74962306a36Sopenharmony_ci u64 buf[10] __aligned(32) = { 0 }; 75062306a36Sopenharmony_ci u64 *zmone = buf; 75162306a36Sopenharmony_ci u64 *sc = buf + 5; 75262306a36Sopenharmony_ci crecip(zmone, z); 75362306a36Sopenharmony_ci fmul(sc, x, zmone); 75462306a36Sopenharmony_ci format_fcontract(scalar, sc); 75562306a36Sopenharmony_ci} 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_civoid curve25519_generic(u8 mypublic[CURVE25519_KEY_SIZE], 75862306a36Sopenharmony_ci const u8 secret[CURVE25519_KEY_SIZE], 75962306a36Sopenharmony_ci const u8 basepoint[CURVE25519_KEY_SIZE]) 76062306a36Sopenharmony_ci{ 76162306a36Sopenharmony_ci u64 buf0[10] __aligned(32) = { 0 }; 76262306a36Sopenharmony_ci u64 *x0 = buf0; 76362306a36Sopenharmony_ci u64 *z = buf0 + 5; 76462306a36Sopenharmony_ci u64 *q; 76562306a36Sopenharmony_ci format_fexpand(x0, basepoint); 76662306a36Sopenharmony_ci z[0] = 1; 76762306a36Sopenharmony_ci q = buf0; 76862306a36Sopenharmony_ci { 76962306a36Sopenharmony_ci u8 e[32] __aligned(32) = { 0 }; 77062306a36Sopenharmony_ci u8 *scalar; 77162306a36Sopenharmony_ci memcpy(e, secret, 32); 77262306a36Sopenharmony_ci curve25519_clamp_secret(e); 77362306a36Sopenharmony_ci scalar = e; 77462306a36Sopenharmony_ci { 77562306a36Sopenharmony_ci u64 buf[15] = { 0 }; 77662306a36Sopenharmony_ci u64 *nq = buf; 77762306a36Sopenharmony_ci u64 *x = nq; 77862306a36Sopenharmony_ci x[0] = 1; 77962306a36Sopenharmony_ci ladder_cmult(nq, scalar, q); 78062306a36Sopenharmony_ci format_scalar_of_point(mypublic, nq); 78162306a36Sopenharmony_ci memzero_explicit(buf, sizeof(buf)); 78262306a36Sopenharmony_ci } 78362306a36Sopenharmony_ci memzero_explicit(e, sizeof(e)); 78462306a36Sopenharmony_ci } 78562306a36Sopenharmony_ci memzero_explicit(buf0, sizeof(buf0)); 78662306a36Sopenharmony_ci} 787