162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (c) 2013, 2014 Kenneth MacKay. All rights reserved.
362306a36Sopenharmony_ci * Copyright (c) 2019 Vitaly Chikunov <vt@altlinux.org>
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
662306a36Sopenharmony_ci * modification, are permitted provided that the following conditions are
762306a36Sopenharmony_ci * met:
862306a36Sopenharmony_ci *  * Redistributions of source code must retain the above copyright
962306a36Sopenharmony_ci *   notice, this list of conditions and the following disclaimer.
1062306a36Sopenharmony_ci *  * Redistributions in binary form must reproduce the above copyright
1162306a36Sopenharmony_ci *    notice, this list of conditions and the following disclaimer in the
1262306a36Sopenharmony_ci *    documentation and/or other materials provided with the distribution.
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1562306a36Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1662306a36Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1762306a36Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
1862306a36Sopenharmony_ci * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
1962306a36Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2062306a36Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
2162306a36Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
2262306a36Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2362306a36Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2462306a36Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2562306a36Sopenharmony_ci */
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci#include <crypto/ecc_curve.h>
2862306a36Sopenharmony_ci#include <linux/module.h>
2962306a36Sopenharmony_ci#include <linux/random.h>
3062306a36Sopenharmony_ci#include <linux/slab.h>
3162306a36Sopenharmony_ci#include <linux/swab.h>
3262306a36Sopenharmony_ci#include <linux/fips.h>
3362306a36Sopenharmony_ci#include <crypto/ecdh.h>
3462306a36Sopenharmony_ci#include <crypto/rng.h>
3562306a36Sopenharmony_ci#include <crypto/internal/ecc.h>
3662306a36Sopenharmony_ci#include <asm/unaligned.h>
3762306a36Sopenharmony_ci#include <linux/ratelimit.h>
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci#include "ecc_curve_defs.h"
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_citypedef struct {
4262306a36Sopenharmony_ci	u64 m_low;
4362306a36Sopenharmony_ci	u64 m_high;
4462306a36Sopenharmony_ci} uint128_t;
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci/* Returns curv25519 curve param */
4762306a36Sopenharmony_ciconst struct ecc_curve *ecc_get_curve25519(void)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	return &ecc_25519;
5062306a36Sopenharmony_ci}
5162306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_get_curve25519);
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ciconst struct ecc_curve *ecc_get_curve(unsigned int curve_id)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	switch (curve_id) {
5662306a36Sopenharmony_ci	/* In FIPS mode only allow P256 and higher */
5762306a36Sopenharmony_ci	case ECC_CURVE_NIST_P192:
5862306a36Sopenharmony_ci		return fips_enabled ? NULL : &nist_p192;
5962306a36Sopenharmony_ci	case ECC_CURVE_NIST_P256:
6062306a36Sopenharmony_ci		return &nist_p256;
6162306a36Sopenharmony_ci	case ECC_CURVE_NIST_P384:
6262306a36Sopenharmony_ci		return &nist_p384;
6362306a36Sopenharmony_ci	default:
6462306a36Sopenharmony_ci		return NULL;
6562306a36Sopenharmony_ci	}
6662306a36Sopenharmony_ci}
6762306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_get_curve);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistatic u64 *ecc_alloc_digits_space(unsigned int ndigits)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	size_t len = ndigits * sizeof(u64);
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	if (!len)
7462306a36Sopenharmony_ci		return NULL;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	return kmalloc(len, GFP_KERNEL);
7762306a36Sopenharmony_ci}
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_cistatic void ecc_free_digits_space(u64 *space)
8062306a36Sopenharmony_ci{
8162306a36Sopenharmony_ci	kfree_sensitive(space);
8262306a36Sopenharmony_ci}
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistruct ecc_point *ecc_alloc_point(unsigned int ndigits)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	struct ecc_point *p = kmalloc(sizeof(*p), GFP_KERNEL);
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	if (!p)
8962306a36Sopenharmony_ci		return NULL;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	p->x = ecc_alloc_digits_space(ndigits);
9262306a36Sopenharmony_ci	if (!p->x)
9362306a36Sopenharmony_ci		goto err_alloc_x;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	p->y = ecc_alloc_digits_space(ndigits);
9662306a36Sopenharmony_ci	if (!p->y)
9762306a36Sopenharmony_ci		goto err_alloc_y;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	p->ndigits = ndigits;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	return p;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_cierr_alloc_y:
10462306a36Sopenharmony_ci	ecc_free_digits_space(p->x);
10562306a36Sopenharmony_cierr_alloc_x:
10662306a36Sopenharmony_ci	kfree(p);
10762306a36Sopenharmony_ci	return NULL;
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_alloc_point);
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_civoid ecc_free_point(struct ecc_point *p)
11262306a36Sopenharmony_ci{
11362306a36Sopenharmony_ci	if (!p)
11462306a36Sopenharmony_ci		return;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	kfree_sensitive(p->x);
11762306a36Sopenharmony_ci	kfree_sensitive(p->y);
11862306a36Sopenharmony_ci	kfree_sensitive(p);
11962306a36Sopenharmony_ci}
12062306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_free_point);
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_cistatic void vli_clear(u64 *vli, unsigned int ndigits)
12362306a36Sopenharmony_ci{
12462306a36Sopenharmony_ci	int i;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++)
12762306a36Sopenharmony_ci		vli[i] = 0;
12862306a36Sopenharmony_ci}
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci/* Returns true if vli == 0, false otherwise. */
13162306a36Sopenharmony_cibool vli_is_zero(const u64 *vli, unsigned int ndigits)
13262306a36Sopenharmony_ci{
13362306a36Sopenharmony_ci	int i;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++) {
13662306a36Sopenharmony_ci		if (vli[i])
13762306a36Sopenharmony_ci			return false;
13862306a36Sopenharmony_ci	}
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	return true;
14162306a36Sopenharmony_ci}
14262306a36Sopenharmony_ciEXPORT_SYMBOL(vli_is_zero);
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci/* Returns nonzero if bit of vli is set. */
14562306a36Sopenharmony_cistatic u64 vli_test_bit(const u64 *vli, unsigned int bit)
14662306a36Sopenharmony_ci{
14762306a36Sopenharmony_ci	return (vli[bit / 64] & ((u64)1 << (bit % 64)));
14862306a36Sopenharmony_ci}
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_cistatic bool vli_is_negative(const u64 *vli, unsigned int ndigits)
15162306a36Sopenharmony_ci{
15262306a36Sopenharmony_ci	return vli_test_bit(vli, ndigits * 64 - 1);
15362306a36Sopenharmony_ci}
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci/* Counts the number of 64-bit "digits" in vli. */
15662306a36Sopenharmony_cistatic unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
15762306a36Sopenharmony_ci{
15862306a36Sopenharmony_ci	int i;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	/* Search from the end until we find a non-zero digit.
16162306a36Sopenharmony_ci	 * We do it in reverse because we expect that most digits will
16262306a36Sopenharmony_ci	 * be nonzero.
16362306a36Sopenharmony_ci	 */
16462306a36Sopenharmony_ci	for (i = ndigits - 1; i >= 0 && vli[i] == 0; i--);
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci	return (i + 1);
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci/* Counts the number of bits required for vli. */
17062306a36Sopenharmony_ciunsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	unsigned int i, num_digits;
17362306a36Sopenharmony_ci	u64 digit;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	num_digits = vli_num_digits(vli, ndigits);
17662306a36Sopenharmony_ci	if (num_digits == 0)
17762306a36Sopenharmony_ci		return 0;
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	digit = vli[num_digits - 1];
18062306a36Sopenharmony_ci	for (i = 0; digit; i++)
18162306a36Sopenharmony_ci		digit >>= 1;
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	return ((num_digits - 1) * 64 + i);
18462306a36Sopenharmony_ci}
18562306a36Sopenharmony_ciEXPORT_SYMBOL(vli_num_bits);
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci/* Set dest from unaligned bit string src. */
18862306a36Sopenharmony_civoid vli_from_be64(u64 *dest, const void *src, unsigned int ndigits)
18962306a36Sopenharmony_ci{
19062306a36Sopenharmony_ci	int i;
19162306a36Sopenharmony_ci	const u64 *from = src;
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++)
19462306a36Sopenharmony_ci		dest[i] = get_unaligned_be64(&from[ndigits - 1 - i]);
19562306a36Sopenharmony_ci}
19662306a36Sopenharmony_ciEXPORT_SYMBOL(vli_from_be64);
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_civoid vli_from_le64(u64 *dest, const void *src, unsigned int ndigits)
19962306a36Sopenharmony_ci{
20062306a36Sopenharmony_ci	int i;
20162306a36Sopenharmony_ci	const u64 *from = src;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++)
20462306a36Sopenharmony_ci		dest[i] = get_unaligned_le64(&from[i]);
20562306a36Sopenharmony_ci}
20662306a36Sopenharmony_ciEXPORT_SYMBOL(vli_from_le64);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci/* Sets dest = src. */
20962306a36Sopenharmony_cistatic void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
21062306a36Sopenharmony_ci{
21162306a36Sopenharmony_ci	int i;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++)
21462306a36Sopenharmony_ci		dest[i] = src[i];
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci/* Returns sign of left - right. */
21862306a36Sopenharmony_ciint vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
21962306a36Sopenharmony_ci{
22062306a36Sopenharmony_ci	int i;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	for (i = ndigits - 1; i >= 0; i--) {
22362306a36Sopenharmony_ci		if (left[i] > right[i])
22462306a36Sopenharmony_ci			return 1;
22562306a36Sopenharmony_ci		else if (left[i] < right[i])
22662306a36Sopenharmony_ci			return -1;
22762306a36Sopenharmony_ci	}
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	return 0;
23062306a36Sopenharmony_ci}
23162306a36Sopenharmony_ciEXPORT_SYMBOL(vli_cmp);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci/* Computes result = in << c, returning carry. Can modify in place
23462306a36Sopenharmony_ci * (if result == in). 0 < shift < 64.
23562306a36Sopenharmony_ci */
23662306a36Sopenharmony_cistatic u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
23762306a36Sopenharmony_ci		      unsigned int ndigits)
23862306a36Sopenharmony_ci{
23962306a36Sopenharmony_ci	u64 carry = 0;
24062306a36Sopenharmony_ci	int i;
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++) {
24362306a36Sopenharmony_ci		u64 temp = in[i];
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci		result[i] = (temp << shift) | carry;
24662306a36Sopenharmony_ci		carry = temp >> (64 - shift);
24762306a36Sopenharmony_ci	}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	return carry;
25062306a36Sopenharmony_ci}
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci/* Computes vli = vli >> 1. */
25362306a36Sopenharmony_cistatic void vli_rshift1(u64 *vli, unsigned int ndigits)
25462306a36Sopenharmony_ci{
25562306a36Sopenharmony_ci	u64 *end = vli;
25662306a36Sopenharmony_ci	u64 carry = 0;
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	vli += ndigits;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	while (vli-- > end) {
26162306a36Sopenharmony_ci		u64 temp = *vli;
26262306a36Sopenharmony_ci		*vli = (temp >> 1) | carry;
26362306a36Sopenharmony_ci		carry = temp << 63;
26462306a36Sopenharmony_ci	}
26562306a36Sopenharmony_ci}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci/* Computes result = left + right, returning carry. Can modify in place. */
26862306a36Sopenharmony_cistatic u64 vli_add(u64 *result, const u64 *left, const u64 *right,
26962306a36Sopenharmony_ci		   unsigned int ndigits)
27062306a36Sopenharmony_ci{
27162306a36Sopenharmony_ci	u64 carry = 0;
27262306a36Sopenharmony_ci	int i;
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++) {
27562306a36Sopenharmony_ci		u64 sum;
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci		sum = left[i] + right[i] + carry;
27862306a36Sopenharmony_ci		if (sum != left[i])
27962306a36Sopenharmony_ci			carry = (sum < left[i]);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci		result[i] = sum;
28262306a36Sopenharmony_ci	}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	return carry;
28562306a36Sopenharmony_ci}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci/* Computes result = left + right, returning carry. Can modify in place. */
28862306a36Sopenharmony_cistatic u64 vli_uadd(u64 *result, const u64 *left, u64 right,
28962306a36Sopenharmony_ci		    unsigned int ndigits)
29062306a36Sopenharmony_ci{
29162306a36Sopenharmony_ci	u64 carry = right;
29262306a36Sopenharmony_ci	int i;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++) {
29562306a36Sopenharmony_ci		u64 sum;
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci		sum = left[i] + carry;
29862306a36Sopenharmony_ci		if (sum != left[i])
29962306a36Sopenharmony_ci			carry = (sum < left[i]);
30062306a36Sopenharmony_ci		else
30162306a36Sopenharmony_ci			carry = !!carry;
30262306a36Sopenharmony_ci
30362306a36Sopenharmony_ci		result[i] = sum;
30462306a36Sopenharmony_ci	}
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_ci	return carry;
30762306a36Sopenharmony_ci}
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_ci/* Computes result = left - right, returning borrow. Can modify in place. */
31062306a36Sopenharmony_ciu64 vli_sub(u64 *result, const u64 *left, const u64 *right,
31162306a36Sopenharmony_ci		   unsigned int ndigits)
31262306a36Sopenharmony_ci{
31362306a36Sopenharmony_ci	u64 borrow = 0;
31462306a36Sopenharmony_ci	int i;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++) {
31762306a36Sopenharmony_ci		u64 diff;
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci		diff = left[i] - right[i] - borrow;
32062306a36Sopenharmony_ci		if (diff != left[i])
32162306a36Sopenharmony_ci			borrow = (diff > left[i]);
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci		result[i] = diff;
32462306a36Sopenharmony_ci	}
32562306a36Sopenharmony_ci
32662306a36Sopenharmony_ci	return borrow;
32762306a36Sopenharmony_ci}
32862306a36Sopenharmony_ciEXPORT_SYMBOL(vli_sub);
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci/* Computes result = left - right, returning borrow. Can modify in place. */
33162306a36Sopenharmony_cistatic u64 vli_usub(u64 *result, const u64 *left, u64 right,
33262306a36Sopenharmony_ci	     unsigned int ndigits)
33362306a36Sopenharmony_ci{
33462306a36Sopenharmony_ci	u64 borrow = right;
33562306a36Sopenharmony_ci	int i;
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_ci	for (i = 0; i < ndigits; i++) {
33862306a36Sopenharmony_ci		u64 diff;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci		diff = left[i] - borrow;
34162306a36Sopenharmony_ci		if (diff != left[i])
34262306a36Sopenharmony_ci			borrow = (diff > left[i]);
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci		result[i] = diff;
34562306a36Sopenharmony_ci	}
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	return borrow;
34862306a36Sopenharmony_ci}
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_cistatic uint128_t mul_64_64(u64 left, u64 right)
35162306a36Sopenharmony_ci{
35262306a36Sopenharmony_ci	uint128_t result;
35362306a36Sopenharmony_ci#if defined(CONFIG_ARCH_SUPPORTS_INT128)
35462306a36Sopenharmony_ci	unsigned __int128 m = (unsigned __int128)left * right;
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	result.m_low  = m;
35762306a36Sopenharmony_ci	result.m_high = m >> 64;
35862306a36Sopenharmony_ci#else
35962306a36Sopenharmony_ci	u64 a0 = left & 0xffffffffull;
36062306a36Sopenharmony_ci	u64 a1 = left >> 32;
36162306a36Sopenharmony_ci	u64 b0 = right & 0xffffffffull;
36262306a36Sopenharmony_ci	u64 b1 = right >> 32;
36362306a36Sopenharmony_ci	u64 m0 = a0 * b0;
36462306a36Sopenharmony_ci	u64 m1 = a0 * b1;
36562306a36Sopenharmony_ci	u64 m2 = a1 * b0;
36662306a36Sopenharmony_ci	u64 m3 = a1 * b1;
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	m2 += (m0 >> 32);
36962306a36Sopenharmony_ci	m2 += m1;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	/* Overflow */
37262306a36Sopenharmony_ci	if (m2 < m1)
37362306a36Sopenharmony_ci		m3 += 0x100000000ull;
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_ci	result.m_low = (m0 & 0xffffffffull) | (m2 << 32);
37662306a36Sopenharmony_ci	result.m_high = m3 + (m2 >> 32);
37762306a36Sopenharmony_ci#endif
37862306a36Sopenharmony_ci	return result;
37962306a36Sopenharmony_ci}
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_cistatic uint128_t add_128_128(uint128_t a, uint128_t b)
38262306a36Sopenharmony_ci{
38362306a36Sopenharmony_ci	uint128_t result;
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_ci	result.m_low = a.m_low + b.m_low;
38662306a36Sopenharmony_ci	result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	return result;
38962306a36Sopenharmony_ci}
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_cistatic void vli_mult(u64 *result, const u64 *left, const u64 *right,
39262306a36Sopenharmony_ci		     unsigned int ndigits)
39362306a36Sopenharmony_ci{
39462306a36Sopenharmony_ci	uint128_t r01 = { 0, 0 };
39562306a36Sopenharmony_ci	u64 r2 = 0;
39662306a36Sopenharmony_ci	unsigned int i, k;
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	/* Compute each digit of result in sequence, maintaining the
39962306a36Sopenharmony_ci	 * carries.
40062306a36Sopenharmony_ci	 */
40162306a36Sopenharmony_ci	for (k = 0; k < ndigits * 2 - 1; k++) {
40262306a36Sopenharmony_ci		unsigned int min;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci		if (k < ndigits)
40562306a36Sopenharmony_ci			min = 0;
40662306a36Sopenharmony_ci		else
40762306a36Sopenharmony_ci			min = (k + 1) - ndigits;
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci		for (i = min; i <= k && i < ndigits; i++) {
41062306a36Sopenharmony_ci			uint128_t product;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci			product = mul_64_64(left[i], right[k - i]);
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci			r01 = add_128_128(r01, product);
41562306a36Sopenharmony_ci			r2 += (r01.m_high < product.m_high);
41662306a36Sopenharmony_ci		}
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci		result[k] = r01.m_low;
41962306a36Sopenharmony_ci		r01.m_low = r01.m_high;
42062306a36Sopenharmony_ci		r01.m_high = r2;
42162306a36Sopenharmony_ci		r2 = 0;
42262306a36Sopenharmony_ci	}
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci	result[ndigits * 2 - 1] = r01.m_low;
42562306a36Sopenharmony_ci}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci/* Compute product = left * right, for a small right value. */
42862306a36Sopenharmony_cistatic void vli_umult(u64 *result, const u64 *left, u32 right,
42962306a36Sopenharmony_ci		      unsigned int ndigits)
43062306a36Sopenharmony_ci{
43162306a36Sopenharmony_ci	uint128_t r01 = { 0 };
43262306a36Sopenharmony_ci	unsigned int k;
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	for (k = 0; k < ndigits; k++) {
43562306a36Sopenharmony_ci		uint128_t product;
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci		product = mul_64_64(left[k], right);
43862306a36Sopenharmony_ci		r01 = add_128_128(r01, product);
43962306a36Sopenharmony_ci		/* no carry */
44062306a36Sopenharmony_ci		result[k] = r01.m_low;
44162306a36Sopenharmony_ci		r01.m_low = r01.m_high;
44262306a36Sopenharmony_ci		r01.m_high = 0;
44362306a36Sopenharmony_ci	}
44462306a36Sopenharmony_ci	result[k] = r01.m_low;
44562306a36Sopenharmony_ci	for (++k; k < ndigits * 2; k++)
44662306a36Sopenharmony_ci		result[k] = 0;
44762306a36Sopenharmony_ci}
44862306a36Sopenharmony_ci
44962306a36Sopenharmony_cistatic void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
45062306a36Sopenharmony_ci{
45162306a36Sopenharmony_ci	uint128_t r01 = { 0, 0 };
45262306a36Sopenharmony_ci	u64 r2 = 0;
45362306a36Sopenharmony_ci	int i, k;
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	for (k = 0; k < ndigits * 2 - 1; k++) {
45662306a36Sopenharmony_ci		unsigned int min;
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci		if (k < ndigits)
45962306a36Sopenharmony_ci			min = 0;
46062306a36Sopenharmony_ci		else
46162306a36Sopenharmony_ci			min = (k + 1) - ndigits;
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci		for (i = min; i <= k && i <= k - i; i++) {
46462306a36Sopenharmony_ci			uint128_t product;
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci			product = mul_64_64(left[i], left[k - i]);
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci			if (i < k - i) {
46962306a36Sopenharmony_ci				r2 += product.m_high >> 63;
47062306a36Sopenharmony_ci				product.m_high = (product.m_high << 1) |
47162306a36Sopenharmony_ci						 (product.m_low >> 63);
47262306a36Sopenharmony_ci				product.m_low <<= 1;
47362306a36Sopenharmony_ci			}
47462306a36Sopenharmony_ci
47562306a36Sopenharmony_ci			r01 = add_128_128(r01, product);
47662306a36Sopenharmony_ci			r2 += (r01.m_high < product.m_high);
47762306a36Sopenharmony_ci		}
47862306a36Sopenharmony_ci
47962306a36Sopenharmony_ci		result[k] = r01.m_low;
48062306a36Sopenharmony_ci		r01.m_low = r01.m_high;
48162306a36Sopenharmony_ci		r01.m_high = r2;
48262306a36Sopenharmony_ci		r2 = 0;
48362306a36Sopenharmony_ci	}
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	result[ndigits * 2 - 1] = r01.m_low;
48662306a36Sopenharmony_ci}
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci/* Computes result = (left + right) % mod.
48962306a36Sopenharmony_ci * Assumes that left < mod and right < mod, result != mod.
49062306a36Sopenharmony_ci */
49162306a36Sopenharmony_cistatic void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
49262306a36Sopenharmony_ci			const u64 *mod, unsigned int ndigits)
49362306a36Sopenharmony_ci{
49462306a36Sopenharmony_ci	u64 carry;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	carry = vli_add(result, left, right, ndigits);
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	/* result > mod (result = mod + remainder), so subtract mod to
49962306a36Sopenharmony_ci	 * get remainder.
50062306a36Sopenharmony_ci	 */
50162306a36Sopenharmony_ci	if (carry || vli_cmp(result, mod, ndigits) >= 0)
50262306a36Sopenharmony_ci		vli_sub(result, result, mod, ndigits);
50362306a36Sopenharmony_ci}
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci/* Computes result = (left - right) % mod.
50662306a36Sopenharmony_ci * Assumes that left < mod and right < mod, result != mod.
50762306a36Sopenharmony_ci */
50862306a36Sopenharmony_cistatic void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
50962306a36Sopenharmony_ci			const u64 *mod, unsigned int ndigits)
51062306a36Sopenharmony_ci{
51162306a36Sopenharmony_ci	u64 borrow = vli_sub(result, left, right, ndigits);
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	/* In this case, p_result == -diff == (max int) - diff.
51462306a36Sopenharmony_ci	 * Since -x % d == d - x, we can get the correct result from
51562306a36Sopenharmony_ci	 * result + mod (with overflow).
51662306a36Sopenharmony_ci	 */
51762306a36Sopenharmony_ci	if (borrow)
51862306a36Sopenharmony_ci		vli_add(result, result, mod, ndigits);
51962306a36Sopenharmony_ci}
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ci/*
52262306a36Sopenharmony_ci * Computes result = product % mod
52362306a36Sopenharmony_ci * for special form moduli: p = 2^k-c, for small c (note the minus sign)
52462306a36Sopenharmony_ci *
52562306a36Sopenharmony_ci * References:
52662306a36Sopenharmony_ci * R. Crandall, C. Pomerance. Prime Numbers: A Computational Perspective.
52762306a36Sopenharmony_ci * 9 Fast Algorithms for Large-Integer Arithmetic. 9.2.3 Moduli of special form
52862306a36Sopenharmony_ci * Algorithm 9.2.13 (Fast mod operation for special-form moduli).
52962306a36Sopenharmony_ci */
53062306a36Sopenharmony_cistatic void vli_mmod_special(u64 *result, const u64 *product,
53162306a36Sopenharmony_ci			      const u64 *mod, unsigned int ndigits)
53262306a36Sopenharmony_ci{
53362306a36Sopenharmony_ci	u64 c = -mod[0];
53462306a36Sopenharmony_ci	u64 t[ECC_MAX_DIGITS * 2];
53562306a36Sopenharmony_ci	u64 r[ECC_MAX_DIGITS * 2];
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci	vli_set(r, product, ndigits * 2);
53862306a36Sopenharmony_ci	while (!vli_is_zero(r + ndigits, ndigits)) {
53962306a36Sopenharmony_ci		vli_umult(t, r + ndigits, c, ndigits);
54062306a36Sopenharmony_ci		vli_clear(r + ndigits, ndigits);
54162306a36Sopenharmony_ci		vli_add(r, r, t, ndigits * 2);
54262306a36Sopenharmony_ci	}
54362306a36Sopenharmony_ci	vli_set(t, mod, ndigits);
54462306a36Sopenharmony_ci	vli_clear(t + ndigits, ndigits);
54562306a36Sopenharmony_ci	while (vli_cmp(r, t, ndigits * 2) >= 0)
54662306a36Sopenharmony_ci		vli_sub(r, r, t, ndigits * 2);
54762306a36Sopenharmony_ci	vli_set(result, r, ndigits);
54862306a36Sopenharmony_ci}
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci/*
55162306a36Sopenharmony_ci * Computes result = product % mod
55262306a36Sopenharmony_ci * for special form moduli: p = 2^{k-1}+c, for small c (note the plus sign)
55362306a36Sopenharmony_ci * where k-1 does not fit into qword boundary by -1 bit (such as 255).
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ci * References (loosely based on):
55662306a36Sopenharmony_ci * A. Menezes, P. van Oorschot, S. Vanstone. Handbook of Applied Cryptography.
55762306a36Sopenharmony_ci * 14.3.4 Reduction methods for moduli of special form. Algorithm 14.47.
55862306a36Sopenharmony_ci * URL: http://cacr.uwaterloo.ca/hac/about/chap14.pdf
55962306a36Sopenharmony_ci *
56062306a36Sopenharmony_ci * H. Cohen, G. Frey, R. Avanzi, C. Doche, T. Lange, K. Nguyen, F. Vercauteren.
56162306a36Sopenharmony_ci * Handbook of Elliptic and Hyperelliptic Curve Cryptography.
56262306a36Sopenharmony_ci * Algorithm 10.25 Fast reduction for special form moduli
56362306a36Sopenharmony_ci */
56462306a36Sopenharmony_cistatic void vli_mmod_special2(u64 *result, const u64 *product,
56562306a36Sopenharmony_ci			       const u64 *mod, unsigned int ndigits)
56662306a36Sopenharmony_ci{
56762306a36Sopenharmony_ci	u64 c2 = mod[0] * 2;
56862306a36Sopenharmony_ci	u64 q[ECC_MAX_DIGITS];
56962306a36Sopenharmony_ci	u64 r[ECC_MAX_DIGITS * 2];
57062306a36Sopenharmony_ci	u64 m[ECC_MAX_DIGITS * 2]; /* expanded mod */
57162306a36Sopenharmony_ci	int carry; /* last bit that doesn't fit into q */
57262306a36Sopenharmony_ci	int i;
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	vli_set(m, mod, ndigits);
57562306a36Sopenharmony_ci	vli_clear(m + ndigits, ndigits);
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	vli_set(r, product, ndigits);
57862306a36Sopenharmony_ci	/* q and carry are top bits */
57962306a36Sopenharmony_ci	vli_set(q, product + ndigits, ndigits);
58062306a36Sopenharmony_ci	vli_clear(r + ndigits, ndigits);
58162306a36Sopenharmony_ci	carry = vli_is_negative(r, ndigits);
58262306a36Sopenharmony_ci	if (carry)
58362306a36Sopenharmony_ci		r[ndigits - 1] &= (1ull << 63) - 1;
58462306a36Sopenharmony_ci	for (i = 1; carry || !vli_is_zero(q, ndigits); i++) {
58562306a36Sopenharmony_ci		u64 qc[ECC_MAX_DIGITS * 2];
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci		vli_umult(qc, q, c2, ndigits);
58862306a36Sopenharmony_ci		if (carry)
58962306a36Sopenharmony_ci			vli_uadd(qc, qc, mod[0], ndigits * 2);
59062306a36Sopenharmony_ci		vli_set(q, qc + ndigits, ndigits);
59162306a36Sopenharmony_ci		vli_clear(qc + ndigits, ndigits);
59262306a36Sopenharmony_ci		carry = vli_is_negative(qc, ndigits);
59362306a36Sopenharmony_ci		if (carry)
59462306a36Sopenharmony_ci			qc[ndigits - 1] &= (1ull << 63) - 1;
59562306a36Sopenharmony_ci		if (i & 1)
59662306a36Sopenharmony_ci			vli_sub(r, r, qc, ndigits * 2);
59762306a36Sopenharmony_ci		else
59862306a36Sopenharmony_ci			vli_add(r, r, qc, ndigits * 2);
59962306a36Sopenharmony_ci	}
60062306a36Sopenharmony_ci	while (vli_is_negative(r, ndigits * 2))
60162306a36Sopenharmony_ci		vli_add(r, r, m, ndigits * 2);
60262306a36Sopenharmony_ci	while (vli_cmp(r, m, ndigits * 2) >= 0)
60362306a36Sopenharmony_ci		vli_sub(r, r, m, ndigits * 2);
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	vli_set(result, r, ndigits);
60662306a36Sopenharmony_ci}
60762306a36Sopenharmony_ci
60862306a36Sopenharmony_ci/*
60962306a36Sopenharmony_ci * Computes result = product % mod, where product is 2N words long.
61062306a36Sopenharmony_ci * Reference: Ken MacKay's micro-ecc.
61162306a36Sopenharmony_ci * Currently only designed to work for curve_p or curve_n.
61262306a36Sopenharmony_ci */
61362306a36Sopenharmony_cistatic void vli_mmod_slow(u64 *result, u64 *product, const u64 *mod,
61462306a36Sopenharmony_ci			  unsigned int ndigits)
61562306a36Sopenharmony_ci{
61662306a36Sopenharmony_ci	u64 mod_m[2 * ECC_MAX_DIGITS];
61762306a36Sopenharmony_ci	u64 tmp[2 * ECC_MAX_DIGITS];
61862306a36Sopenharmony_ci	u64 *v[2] = { tmp, product };
61962306a36Sopenharmony_ci	u64 carry = 0;
62062306a36Sopenharmony_ci	unsigned int i;
62162306a36Sopenharmony_ci	/* Shift mod so its highest set bit is at the maximum position. */
62262306a36Sopenharmony_ci	int shift = (ndigits * 2 * 64) - vli_num_bits(mod, ndigits);
62362306a36Sopenharmony_ci	int word_shift = shift / 64;
62462306a36Sopenharmony_ci	int bit_shift = shift % 64;
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	vli_clear(mod_m, word_shift);
62762306a36Sopenharmony_ci	if (bit_shift > 0) {
62862306a36Sopenharmony_ci		for (i = 0; i < ndigits; ++i) {
62962306a36Sopenharmony_ci			mod_m[word_shift + i] = (mod[i] << bit_shift) | carry;
63062306a36Sopenharmony_ci			carry = mod[i] >> (64 - bit_shift);
63162306a36Sopenharmony_ci		}
63262306a36Sopenharmony_ci	} else
63362306a36Sopenharmony_ci		vli_set(mod_m + word_shift, mod, ndigits);
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	for (i = 1; shift >= 0; --shift) {
63662306a36Sopenharmony_ci		u64 borrow = 0;
63762306a36Sopenharmony_ci		unsigned int j;
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci		for (j = 0; j < ndigits * 2; ++j) {
64062306a36Sopenharmony_ci			u64 diff = v[i][j] - mod_m[j] - borrow;
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci			if (diff != v[i][j])
64362306a36Sopenharmony_ci				borrow = (diff > v[i][j]);
64462306a36Sopenharmony_ci			v[1 - i][j] = diff;
64562306a36Sopenharmony_ci		}
64662306a36Sopenharmony_ci		i = !(i ^ borrow); /* Swap the index if there was no borrow */
64762306a36Sopenharmony_ci		vli_rshift1(mod_m, ndigits);
64862306a36Sopenharmony_ci		mod_m[ndigits - 1] |= mod_m[ndigits] << (64 - 1);
64962306a36Sopenharmony_ci		vli_rshift1(mod_m + ndigits, ndigits);
65062306a36Sopenharmony_ci	}
65162306a36Sopenharmony_ci	vli_set(result, v[i], ndigits);
65262306a36Sopenharmony_ci}
65362306a36Sopenharmony_ci
65462306a36Sopenharmony_ci/* Computes result = product % mod using Barrett's reduction with precomputed
65562306a36Sopenharmony_ci * value mu appended to the mod after ndigits, mu = (2^{2w} / mod) and have
65662306a36Sopenharmony_ci * length ndigits + 1, where mu * (2^w - 1) should not overflow ndigits
65762306a36Sopenharmony_ci * boundary.
65862306a36Sopenharmony_ci *
65962306a36Sopenharmony_ci * Reference:
66062306a36Sopenharmony_ci * R. Brent, P. Zimmermann. Modern Computer Arithmetic. 2010.
66162306a36Sopenharmony_ci * 2.4.1 Barrett's algorithm. Algorithm 2.5.
66262306a36Sopenharmony_ci */
66362306a36Sopenharmony_cistatic void vli_mmod_barrett(u64 *result, u64 *product, const u64 *mod,
66462306a36Sopenharmony_ci			     unsigned int ndigits)
66562306a36Sopenharmony_ci{
66662306a36Sopenharmony_ci	u64 q[ECC_MAX_DIGITS * 2];
66762306a36Sopenharmony_ci	u64 r[ECC_MAX_DIGITS * 2];
66862306a36Sopenharmony_ci	const u64 *mu = mod + ndigits;
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	vli_mult(q, product + ndigits, mu, ndigits);
67162306a36Sopenharmony_ci	if (mu[ndigits])
67262306a36Sopenharmony_ci		vli_add(q + ndigits, q + ndigits, product + ndigits, ndigits);
67362306a36Sopenharmony_ci	vli_mult(r, mod, q + ndigits, ndigits);
67462306a36Sopenharmony_ci	vli_sub(r, product, r, ndigits * 2);
67562306a36Sopenharmony_ci	while (!vli_is_zero(r + ndigits, ndigits) ||
67662306a36Sopenharmony_ci	       vli_cmp(r, mod, ndigits) != -1) {
67762306a36Sopenharmony_ci		u64 carry;
67862306a36Sopenharmony_ci
67962306a36Sopenharmony_ci		carry = vli_sub(r, r, mod, ndigits);
68062306a36Sopenharmony_ci		vli_usub(r + ndigits, r + ndigits, carry, ndigits);
68162306a36Sopenharmony_ci	}
68262306a36Sopenharmony_ci	vli_set(result, r, ndigits);
68362306a36Sopenharmony_ci}
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci/* Computes p_result = p_product % curve_p.
68662306a36Sopenharmony_ci * See algorithm 5 and 6 from
68762306a36Sopenharmony_ci * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
68862306a36Sopenharmony_ci */
68962306a36Sopenharmony_cistatic void vli_mmod_fast_192(u64 *result, const u64 *product,
69062306a36Sopenharmony_ci			      const u64 *curve_prime, u64 *tmp)
69162306a36Sopenharmony_ci{
69262306a36Sopenharmony_ci	const unsigned int ndigits = 3;
69362306a36Sopenharmony_ci	int carry;
69462306a36Sopenharmony_ci
69562306a36Sopenharmony_ci	vli_set(result, product, ndigits);
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	vli_set(tmp, &product[3], ndigits);
69862306a36Sopenharmony_ci	carry = vli_add(result, result, tmp, ndigits);
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci	tmp[0] = 0;
70162306a36Sopenharmony_ci	tmp[1] = product[3];
70262306a36Sopenharmony_ci	tmp[2] = product[4];
70362306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
70462306a36Sopenharmony_ci
70562306a36Sopenharmony_ci	tmp[0] = tmp[1] = product[5];
70662306a36Sopenharmony_ci	tmp[2] = 0;
70762306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
70862306a36Sopenharmony_ci
70962306a36Sopenharmony_ci	while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
71062306a36Sopenharmony_ci		carry -= vli_sub(result, result, curve_prime, ndigits);
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ci/* Computes result = product % curve_prime
71462306a36Sopenharmony_ci * from http://www.nsa.gov/ia/_files/nist-routines.pdf
71562306a36Sopenharmony_ci */
71662306a36Sopenharmony_cistatic void vli_mmod_fast_256(u64 *result, const u64 *product,
71762306a36Sopenharmony_ci			      const u64 *curve_prime, u64 *tmp)
71862306a36Sopenharmony_ci{
71962306a36Sopenharmony_ci	int carry;
72062306a36Sopenharmony_ci	const unsigned int ndigits = 4;
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	/* t */
72362306a36Sopenharmony_ci	vli_set(result, product, ndigits);
72462306a36Sopenharmony_ci
72562306a36Sopenharmony_ci	/* s1 */
72662306a36Sopenharmony_ci	tmp[0] = 0;
72762306a36Sopenharmony_ci	tmp[1] = product[5] & 0xffffffff00000000ull;
72862306a36Sopenharmony_ci	tmp[2] = product[6];
72962306a36Sopenharmony_ci	tmp[3] = product[7];
73062306a36Sopenharmony_ci	carry = vli_lshift(tmp, tmp, 1, ndigits);
73162306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
73262306a36Sopenharmony_ci
73362306a36Sopenharmony_ci	/* s2 */
73462306a36Sopenharmony_ci	tmp[1] = product[6] << 32;
73562306a36Sopenharmony_ci	tmp[2] = (product[6] >> 32) | (product[7] << 32);
73662306a36Sopenharmony_ci	tmp[3] = product[7] >> 32;
73762306a36Sopenharmony_ci	carry += vli_lshift(tmp, tmp, 1, ndigits);
73862306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	/* s3 */
74162306a36Sopenharmony_ci	tmp[0] = product[4];
74262306a36Sopenharmony_ci	tmp[1] = product[5] & 0xffffffff;
74362306a36Sopenharmony_ci	tmp[2] = 0;
74462306a36Sopenharmony_ci	tmp[3] = product[7];
74562306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	/* s4 */
74862306a36Sopenharmony_ci	tmp[0] = (product[4] >> 32) | (product[5] << 32);
74962306a36Sopenharmony_ci	tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
75062306a36Sopenharmony_ci	tmp[2] = product[7];
75162306a36Sopenharmony_ci	tmp[3] = (product[6] >> 32) | (product[4] << 32);
75262306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_ci	/* d1 */
75562306a36Sopenharmony_ci	tmp[0] = (product[5] >> 32) | (product[6] << 32);
75662306a36Sopenharmony_ci	tmp[1] = (product[6] >> 32);
75762306a36Sopenharmony_ci	tmp[2] = 0;
75862306a36Sopenharmony_ci	tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
75962306a36Sopenharmony_ci	carry -= vli_sub(result, result, tmp, ndigits);
76062306a36Sopenharmony_ci
76162306a36Sopenharmony_ci	/* d2 */
76262306a36Sopenharmony_ci	tmp[0] = product[6];
76362306a36Sopenharmony_ci	tmp[1] = product[7];
76462306a36Sopenharmony_ci	tmp[2] = 0;
76562306a36Sopenharmony_ci	tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
76662306a36Sopenharmony_ci	carry -= vli_sub(result, result, tmp, ndigits);
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_ci	/* d3 */
76962306a36Sopenharmony_ci	tmp[0] = (product[6] >> 32) | (product[7] << 32);
77062306a36Sopenharmony_ci	tmp[1] = (product[7] >> 32) | (product[4] << 32);
77162306a36Sopenharmony_ci	tmp[2] = (product[4] >> 32) | (product[5] << 32);
77262306a36Sopenharmony_ci	tmp[3] = (product[6] << 32);
77362306a36Sopenharmony_ci	carry -= vli_sub(result, result, tmp, ndigits);
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci	/* d4 */
77662306a36Sopenharmony_ci	tmp[0] = product[7];
77762306a36Sopenharmony_ci	tmp[1] = product[4] & 0xffffffff00000000ull;
77862306a36Sopenharmony_ci	tmp[2] = product[5];
77962306a36Sopenharmony_ci	tmp[3] = product[6] & 0xffffffff00000000ull;
78062306a36Sopenharmony_ci	carry -= vli_sub(result, result, tmp, ndigits);
78162306a36Sopenharmony_ci
78262306a36Sopenharmony_ci	if (carry < 0) {
78362306a36Sopenharmony_ci		do {
78462306a36Sopenharmony_ci			carry += vli_add(result, result, curve_prime, ndigits);
78562306a36Sopenharmony_ci		} while (carry < 0);
78662306a36Sopenharmony_ci	} else {
78762306a36Sopenharmony_ci		while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
78862306a36Sopenharmony_ci			carry -= vli_sub(result, result, curve_prime, ndigits);
78962306a36Sopenharmony_ci	}
79062306a36Sopenharmony_ci}
79162306a36Sopenharmony_ci
79262306a36Sopenharmony_ci#define SL32OR32(x32, y32) (((u64)x32 << 32) | y32)
79362306a36Sopenharmony_ci#define AND64H(x64)  (x64 & 0xffFFffFF00000000ull)
79462306a36Sopenharmony_ci#define AND64L(x64)  (x64 & 0x00000000ffFFffFFull)
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_ci/* Computes result = product % curve_prime
79762306a36Sopenharmony_ci * from "Mathematical routines for the NIST prime elliptic curves"
79862306a36Sopenharmony_ci */
79962306a36Sopenharmony_cistatic void vli_mmod_fast_384(u64 *result, const u64 *product,
80062306a36Sopenharmony_ci				const u64 *curve_prime, u64 *tmp)
80162306a36Sopenharmony_ci{
80262306a36Sopenharmony_ci	int carry;
80362306a36Sopenharmony_ci	const unsigned int ndigits = 6;
80462306a36Sopenharmony_ci
80562306a36Sopenharmony_ci	/* t */
80662306a36Sopenharmony_ci	vli_set(result, product, ndigits);
80762306a36Sopenharmony_ci
80862306a36Sopenharmony_ci	/* s1 */
80962306a36Sopenharmony_ci	tmp[0] = 0;		// 0 || 0
81062306a36Sopenharmony_ci	tmp[1] = 0;		// 0 || 0
81162306a36Sopenharmony_ci	tmp[2] = SL32OR32(product[11], (product[10]>>32));	//a22||a21
81262306a36Sopenharmony_ci	tmp[3] = product[11]>>32;	// 0 ||a23
81362306a36Sopenharmony_ci	tmp[4] = 0;		// 0 || 0
81462306a36Sopenharmony_ci	tmp[5] = 0;		// 0 || 0
81562306a36Sopenharmony_ci	carry = vli_lshift(tmp, tmp, 1, ndigits);
81662306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	/* s2 */
81962306a36Sopenharmony_ci	tmp[0] = product[6];	//a13||a12
82062306a36Sopenharmony_ci	tmp[1] = product[7];	//a15||a14
82162306a36Sopenharmony_ci	tmp[2] = product[8];	//a17||a16
82262306a36Sopenharmony_ci	tmp[3] = product[9];	//a19||a18
82362306a36Sopenharmony_ci	tmp[4] = product[10];	//a21||a20
82462306a36Sopenharmony_ci	tmp[5] = product[11];	//a23||a22
82562306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	/* s3 */
82862306a36Sopenharmony_ci	tmp[0] = SL32OR32(product[11], (product[10]>>32));	//a22||a21
82962306a36Sopenharmony_ci	tmp[1] = SL32OR32(product[6], (product[11]>>32));	//a12||a23
83062306a36Sopenharmony_ci	tmp[2] = SL32OR32(product[7], (product[6])>>32);	//a14||a13
83162306a36Sopenharmony_ci	tmp[3] = SL32OR32(product[8], (product[7]>>32));	//a16||a15
83262306a36Sopenharmony_ci	tmp[4] = SL32OR32(product[9], (product[8]>>32));	//a18||a17
83362306a36Sopenharmony_ci	tmp[5] = SL32OR32(product[10], (product[9]>>32));	//a20||a19
83462306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	/* s4 */
83762306a36Sopenharmony_ci	tmp[0] = AND64H(product[11]);	//a23|| 0
83862306a36Sopenharmony_ci	tmp[1] = (product[10]<<32);	//a20|| 0
83962306a36Sopenharmony_ci	tmp[2] = product[6];	//a13||a12
84062306a36Sopenharmony_ci	tmp[3] = product[7];	//a15||a14
84162306a36Sopenharmony_ci	tmp[4] = product[8];	//a17||a16
84262306a36Sopenharmony_ci	tmp[5] = product[9];	//a19||a18
84362306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
84462306a36Sopenharmony_ci
84562306a36Sopenharmony_ci	/* s5 */
84662306a36Sopenharmony_ci	tmp[0] = 0;		//  0|| 0
84762306a36Sopenharmony_ci	tmp[1] = 0;		//  0|| 0
84862306a36Sopenharmony_ci	tmp[2] = product[10];	//a21||a20
84962306a36Sopenharmony_ci	tmp[3] = product[11];	//a23||a22
85062306a36Sopenharmony_ci	tmp[4] = 0;		//  0|| 0
85162306a36Sopenharmony_ci	tmp[5] = 0;		//  0|| 0
85262306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
85362306a36Sopenharmony_ci
85462306a36Sopenharmony_ci	/* s6 */
85562306a36Sopenharmony_ci	tmp[0] = AND64L(product[10]);	// 0 ||a20
85662306a36Sopenharmony_ci	tmp[1] = AND64H(product[10]);	//a21|| 0
85762306a36Sopenharmony_ci	tmp[2] = product[11];	//a23||a22
85862306a36Sopenharmony_ci	tmp[3] = 0;		// 0 || 0
85962306a36Sopenharmony_ci	tmp[4] = 0;		// 0 || 0
86062306a36Sopenharmony_ci	tmp[5] = 0;		// 0 || 0
86162306a36Sopenharmony_ci	carry += vli_add(result, result, tmp, ndigits);
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	/* d1 */
86462306a36Sopenharmony_ci	tmp[0] = SL32OR32(product[6], (product[11]>>32));	//a12||a23
86562306a36Sopenharmony_ci	tmp[1] = SL32OR32(product[7], (product[6]>>32));	//a14||a13
86662306a36Sopenharmony_ci	tmp[2] = SL32OR32(product[8], (product[7]>>32));	//a16||a15
86762306a36Sopenharmony_ci	tmp[3] = SL32OR32(product[9], (product[8]>>32));	//a18||a17
86862306a36Sopenharmony_ci	tmp[4] = SL32OR32(product[10], (product[9]>>32));	//a20||a19
86962306a36Sopenharmony_ci	tmp[5] = SL32OR32(product[11], (product[10]>>32));	//a22||a21
87062306a36Sopenharmony_ci	carry -= vli_sub(result, result, tmp, ndigits);
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	/* d2 */
87362306a36Sopenharmony_ci	tmp[0] = (product[10]<<32);	//a20|| 0
87462306a36Sopenharmony_ci	tmp[1] = SL32OR32(product[11], (product[10]>>32));	//a22||a21
87562306a36Sopenharmony_ci	tmp[2] = (product[11]>>32);	// 0 ||a23
87662306a36Sopenharmony_ci	tmp[3] = 0;		// 0 || 0
87762306a36Sopenharmony_ci	tmp[4] = 0;		// 0 || 0
87862306a36Sopenharmony_ci	tmp[5] = 0;		// 0 || 0
87962306a36Sopenharmony_ci	carry -= vli_sub(result, result, tmp, ndigits);
88062306a36Sopenharmony_ci
88162306a36Sopenharmony_ci	/* d3 */
88262306a36Sopenharmony_ci	tmp[0] = 0;		// 0 || 0
88362306a36Sopenharmony_ci	tmp[1] = AND64H(product[11]);	//a23|| 0
88462306a36Sopenharmony_ci	tmp[2] = product[11]>>32;	// 0 ||a23
88562306a36Sopenharmony_ci	tmp[3] = 0;		// 0 || 0
88662306a36Sopenharmony_ci	tmp[4] = 0;		// 0 || 0
88762306a36Sopenharmony_ci	tmp[5] = 0;		// 0 || 0
88862306a36Sopenharmony_ci	carry -= vli_sub(result, result, tmp, ndigits);
88962306a36Sopenharmony_ci
89062306a36Sopenharmony_ci	if (carry < 0) {
89162306a36Sopenharmony_ci		do {
89262306a36Sopenharmony_ci			carry += vli_add(result, result, curve_prime, ndigits);
89362306a36Sopenharmony_ci		} while (carry < 0);
89462306a36Sopenharmony_ci	} else {
89562306a36Sopenharmony_ci		while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
89662306a36Sopenharmony_ci			carry -= vli_sub(result, result, curve_prime, ndigits);
89762306a36Sopenharmony_ci	}
89862306a36Sopenharmony_ci
89962306a36Sopenharmony_ci}
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_ci#undef SL32OR32
90262306a36Sopenharmony_ci#undef AND64H
90362306a36Sopenharmony_ci#undef AND64L
90462306a36Sopenharmony_ci
90562306a36Sopenharmony_ci/* Computes result = product % curve_prime for different curve_primes.
90662306a36Sopenharmony_ci *
90762306a36Sopenharmony_ci * Note that curve_primes are distinguished just by heuristic check and
90862306a36Sopenharmony_ci * not by complete conformance check.
90962306a36Sopenharmony_ci */
91062306a36Sopenharmony_cistatic bool vli_mmod_fast(u64 *result, u64 *product,
91162306a36Sopenharmony_ci			  const struct ecc_curve *curve)
91262306a36Sopenharmony_ci{
91362306a36Sopenharmony_ci	u64 tmp[2 * ECC_MAX_DIGITS];
91462306a36Sopenharmony_ci	const u64 *curve_prime = curve->p;
91562306a36Sopenharmony_ci	const unsigned int ndigits = curve->g.ndigits;
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	/* All NIST curves have name prefix 'nist_' */
91862306a36Sopenharmony_ci	if (strncmp(curve->name, "nist_", 5) != 0) {
91962306a36Sopenharmony_ci		/* Try to handle Pseudo-Marsenne primes. */
92062306a36Sopenharmony_ci		if (curve_prime[ndigits - 1] == -1ull) {
92162306a36Sopenharmony_ci			vli_mmod_special(result, product, curve_prime,
92262306a36Sopenharmony_ci					 ndigits);
92362306a36Sopenharmony_ci			return true;
92462306a36Sopenharmony_ci		} else if (curve_prime[ndigits - 1] == 1ull << 63 &&
92562306a36Sopenharmony_ci			   curve_prime[ndigits - 2] == 0) {
92662306a36Sopenharmony_ci			vli_mmod_special2(result, product, curve_prime,
92762306a36Sopenharmony_ci					  ndigits);
92862306a36Sopenharmony_ci			return true;
92962306a36Sopenharmony_ci		}
93062306a36Sopenharmony_ci		vli_mmod_barrett(result, product, curve_prime, ndigits);
93162306a36Sopenharmony_ci		return true;
93262306a36Sopenharmony_ci	}
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_ci	switch (ndigits) {
93562306a36Sopenharmony_ci	case 3:
93662306a36Sopenharmony_ci		vli_mmod_fast_192(result, product, curve_prime, tmp);
93762306a36Sopenharmony_ci		break;
93862306a36Sopenharmony_ci	case 4:
93962306a36Sopenharmony_ci		vli_mmod_fast_256(result, product, curve_prime, tmp);
94062306a36Sopenharmony_ci		break;
94162306a36Sopenharmony_ci	case 6:
94262306a36Sopenharmony_ci		vli_mmod_fast_384(result, product, curve_prime, tmp);
94362306a36Sopenharmony_ci		break;
94462306a36Sopenharmony_ci	default:
94562306a36Sopenharmony_ci		pr_err_ratelimited("ecc: unsupported digits size!\n");
94662306a36Sopenharmony_ci		return false;
94762306a36Sopenharmony_ci	}
94862306a36Sopenharmony_ci
94962306a36Sopenharmony_ci	return true;
95062306a36Sopenharmony_ci}
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci/* Computes result = (left * right) % mod.
95362306a36Sopenharmony_ci * Assumes that mod is big enough curve order.
95462306a36Sopenharmony_ci */
95562306a36Sopenharmony_civoid vli_mod_mult_slow(u64 *result, const u64 *left, const u64 *right,
95662306a36Sopenharmony_ci		       const u64 *mod, unsigned int ndigits)
95762306a36Sopenharmony_ci{
95862306a36Sopenharmony_ci	u64 product[ECC_MAX_DIGITS * 2];
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci	vli_mult(product, left, right, ndigits);
96162306a36Sopenharmony_ci	vli_mmod_slow(result, product, mod, ndigits);
96262306a36Sopenharmony_ci}
96362306a36Sopenharmony_ciEXPORT_SYMBOL(vli_mod_mult_slow);
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci/* Computes result = (left * right) % curve_prime. */
96662306a36Sopenharmony_cistatic void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
96762306a36Sopenharmony_ci			      const struct ecc_curve *curve)
96862306a36Sopenharmony_ci{
96962306a36Sopenharmony_ci	u64 product[2 * ECC_MAX_DIGITS];
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci	vli_mult(product, left, right, curve->g.ndigits);
97262306a36Sopenharmony_ci	vli_mmod_fast(result, product, curve);
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci/* Computes result = left^2 % curve_prime. */
97662306a36Sopenharmony_cistatic void vli_mod_square_fast(u64 *result, const u64 *left,
97762306a36Sopenharmony_ci				const struct ecc_curve *curve)
97862306a36Sopenharmony_ci{
97962306a36Sopenharmony_ci	u64 product[2 * ECC_MAX_DIGITS];
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci	vli_square(product, left, curve->g.ndigits);
98262306a36Sopenharmony_ci	vli_mmod_fast(result, product, curve);
98362306a36Sopenharmony_ci}
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_ci#define EVEN(vli) (!(vli[0] & 1))
98662306a36Sopenharmony_ci/* Computes result = (1 / p_input) % mod. All VLIs are the same size.
98762306a36Sopenharmony_ci * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
98862306a36Sopenharmony_ci * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
98962306a36Sopenharmony_ci */
99062306a36Sopenharmony_civoid vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
99162306a36Sopenharmony_ci			unsigned int ndigits)
99262306a36Sopenharmony_ci{
99362306a36Sopenharmony_ci	u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
99462306a36Sopenharmony_ci	u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
99562306a36Sopenharmony_ci	u64 carry;
99662306a36Sopenharmony_ci	int cmp_result;
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	if (vli_is_zero(input, ndigits)) {
99962306a36Sopenharmony_ci		vli_clear(result, ndigits);
100062306a36Sopenharmony_ci		return;
100162306a36Sopenharmony_ci	}
100262306a36Sopenharmony_ci
100362306a36Sopenharmony_ci	vli_set(a, input, ndigits);
100462306a36Sopenharmony_ci	vli_set(b, mod, ndigits);
100562306a36Sopenharmony_ci	vli_clear(u, ndigits);
100662306a36Sopenharmony_ci	u[0] = 1;
100762306a36Sopenharmony_ci	vli_clear(v, ndigits);
100862306a36Sopenharmony_ci
100962306a36Sopenharmony_ci	while ((cmp_result = vli_cmp(a, b, ndigits)) != 0) {
101062306a36Sopenharmony_ci		carry = 0;
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_ci		if (EVEN(a)) {
101362306a36Sopenharmony_ci			vli_rshift1(a, ndigits);
101462306a36Sopenharmony_ci
101562306a36Sopenharmony_ci			if (!EVEN(u))
101662306a36Sopenharmony_ci				carry = vli_add(u, u, mod, ndigits);
101762306a36Sopenharmony_ci
101862306a36Sopenharmony_ci			vli_rshift1(u, ndigits);
101962306a36Sopenharmony_ci			if (carry)
102062306a36Sopenharmony_ci				u[ndigits - 1] |= 0x8000000000000000ull;
102162306a36Sopenharmony_ci		} else if (EVEN(b)) {
102262306a36Sopenharmony_ci			vli_rshift1(b, ndigits);
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci			if (!EVEN(v))
102562306a36Sopenharmony_ci				carry = vli_add(v, v, mod, ndigits);
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci			vli_rshift1(v, ndigits);
102862306a36Sopenharmony_ci			if (carry)
102962306a36Sopenharmony_ci				v[ndigits - 1] |= 0x8000000000000000ull;
103062306a36Sopenharmony_ci		} else if (cmp_result > 0) {
103162306a36Sopenharmony_ci			vli_sub(a, a, b, ndigits);
103262306a36Sopenharmony_ci			vli_rshift1(a, ndigits);
103362306a36Sopenharmony_ci
103462306a36Sopenharmony_ci			if (vli_cmp(u, v, ndigits) < 0)
103562306a36Sopenharmony_ci				vli_add(u, u, mod, ndigits);
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ci			vli_sub(u, u, v, ndigits);
103862306a36Sopenharmony_ci			if (!EVEN(u))
103962306a36Sopenharmony_ci				carry = vli_add(u, u, mod, ndigits);
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci			vli_rshift1(u, ndigits);
104262306a36Sopenharmony_ci			if (carry)
104362306a36Sopenharmony_ci				u[ndigits - 1] |= 0x8000000000000000ull;
104462306a36Sopenharmony_ci		} else {
104562306a36Sopenharmony_ci			vli_sub(b, b, a, ndigits);
104662306a36Sopenharmony_ci			vli_rshift1(b, ndigits);
104762306a36Sopenharmony_ci
104862306a36Sopenharmony_ci			if (vli_cmp(v, u, ndigits) < 0)
104962306a36Sopenharmony_ci				vli_add(v, v, mod, ndigits);
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci			vli_sub(v, v, u, ndigits);
105262306a36Sopenharmony_ci			if (!EVEN(v))
105362306a36Sopenharmony_ci				carry = vli_add(v, v, mod, ndigits);
105462306a36Sopenharmony_ci
105562306a36Sopenharmony_ci			vli_rshift1(v, ndigits);
105662306a36Sopenharmony_ci			if (carry)
105762306a36Sopenharmony_ci				v[ndigits - 1] |= 0x8000000000000000ull;
105862306a36Sopenharmony_ci		}
105962306a36Sopenharmony_ci	}
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	vli_set(result, u, ndigits);
106262306a36Sopenharmony_ci}
106362306a36Sopenharmony_ciEXPORT_SYMBOL(vli_mod_inv);
106462306a36Sopenharmony_ci
106562306a36Sopenharmony_ci/* ------ Point operations ------ */
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci/* Returns true if p_point is the point at infinity, false otherwise. */
106862306a36Sopenharmony_cibool ecc_point_is_zero(const struct ecc_point *point)
106962306a36Sopenharmony_ci{
107062306a36Sopenharmony_ci	return (vli_is_zero(point->x, point->ndigits) &&
107162306a36Sopenharmony_ci		vli_is_zero(point->y, point->ndigits));
107262306a36Sopenharmony_ci}
107362306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_point_is_zero);
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_ci/* Point multiplication algorithm using Montgomery's ladder with co-Z
107662306a36Sopenharmony_ci * coordinates. From https://eprint.iacr.org/2011/338.pdf
107762306a36Sopenharmony_ci */
107862306a36Sopenharmony_ci
107962306a36Sopenharmony_ci/* Double in place */
108062306a36Sopenharmony_cistatic void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
108162306a36Sopenharmony_ci					const struct ecc_curve *curve)
108262306a36Sopenharmony_ci{
108362306a36Sopenharmony_ci	/* t1 = x, t2 = y, t3 = z */
108462306a36Sopenharmony_ci	u64 t4[ECC_MAX_DIGITS];
108562306a36Sopenharmony_ci	u64 t5[ECC_MAX_DIGITS];
108662306a36Sopenharmony_ci	const u64 *curve_prime = curve->p;
108762306a36Sopenharmony_ci	const unsigned int ndigits = curve->g.ndigits;
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci	if (vli_is_zero(z1, ndigits))
109062306a36Sopenharmony_ci		return;
109162306a36Sopenharmony_ci
109262306a36Sopenharmony_ci	/* t4 = y1^2 */
109362306a36Sopenharmony_ci	vli_mod_square_fast(t4, y1, curve);
109462306a36Sopenharmony_ci	/* t5 = x1*y1^2 = A */
109562306a36Sopenharmony_ci	vli_mod_mult_fast(t5, x1, t4, curve);
109662306a36Sopenharmony_ci	/* t4 = y1^4 */
109762306a36Sopenharmony_ci	vli_mod_square_fast(t4, t4, curve);
109862306a36Sopenharmony_ci	/* t2 = y1*z1 = z3 */
109962306a36Sopenharmony_ci	vli_mod_mult_fast(y1, y1, z1, curve);
110062306a36Sopenharmony_ci	/* t3 = z1^2 */
110162306a36Sopenharmony_ci	vli_mod_square_fast(z1, z1, curve);
110262306a36Sopenharmony_ci
110362306a36Sopenharmony_ci	/* t1 = x1 + z1^2 */
110462306a36Sopenharmony_ci	vli_mod_add(x1, x1, z1, curve_prime, ndigits);
110562306a36Sopenharmony_ci	/* t3 = 2*z1^2 */
110662306a36Sopenharmony_ci	vli_mod_add(z1, z1, z1, curve_prime, ndigits);
110762306a36Sopenharmony_ci	/* t3 = x1 - z1^2 */
110862306a36Sopenharmony_ci	vli_mod_sub(z1, x1, z1, curve_prime, ndigits);
110962306a36Sopenharmony_ci	/* t1 = x1^2 - z1^4 */
111062306a36Sopenharmony_ci	vli_mod_mult_fast(x1, x1, z1, curve);
111162306a36Sopenharmony_ci
111262306a36Sopenharmony_ci	/* t3 = 2*(x1^2 - z1^4) */
111362306a36Sopenharmony_ci	vli_mod_add(z1, x1, x1, curve_prime, ndigits);
111462306a36Sopenharmony_ci	/* t1 = 3*(x1^2 - z1^4) */
111562306a36Sopenharmony_ci	vli_mod_add(x1, x1, z1, curve_prime, ndigits);
111662306a36Sopenharmony_ci	if (vli_test_bit(x1, 0)) {
111762306a36Sopenharmony_ci		u64 carry = vli_add(x1, x1, curve_prime, ndigits);
111862306a36Sopenharmony_ci
111962306a36Sopenharmony_ci		vli_rshift1(x1, ndigits);
112062306a36Sopenharmony_ci		x1[ndigits - 1] |= carry << 63;
112162306a36Sopenharmony_ci	} else {
112262306a36Sopenharmony_ci		vli_rshift1(x1, ndigits);
112362306a36Sopenharmony_ci	}
112462306a36Sopenharmony_ci	/* t1 = 3/2*(x1^2 - z1^4) = B */
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	/* t3 = B^2 */
112762306a36Sopenharmony_ci	vli_mod_square_fast(z1, x1, curve);
112862306a36Sopenharmony_ci	/* t3 = B^2 - A */
112962306a36Sopenharmony_ci	vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
113062306a36Sopenharmony_ci	/* t3 = B^2 - 2A = x3 */
113162306a36Sopenharmony_ci	vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
113262306a36Sopenharmony_ci	/* t5 = A - x3 */
113362306a36Sopenharmony_ci	vli_mod_sub(t5, t5, z1, curve_prime, ndigits);
113462306a36Sopenharmony_ci	/* t1 = B * (A - x3) */
113562306a36Sopenharmony_ci	vli_mod_mult_fast(x1, x1, t5, curve);
113662306a36Sopenharmony_ci	/* t4 = B * (A - x3) - y1^4 = y3 */
113762306a36Sopenharmony_ci	vli_mod_sub(t4, x1, t4, curve_prime, ndigits);
113862306a36Sopenharmony_ci
113962306a36Sopenharmony_ci	vli_set(x1, z1, ndigits);
114062306a36Sopenharmony_ci	vli_set(z1, y1, ndigits);
114162306a36Sopenharmony_ci	vli_set(y1, t4, ndigits);
114262306a36Sopenharmony_ci}
114362306a36Sopenharmony_ci
114462306a36Sopenharmony_ci/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
114562306a36Sopenharmony_cistatic void apply_z(u64 *x1, u64 *y1, u64 *z, const struct ecc_curve *curve)
114662306a36Sopenharmony_ci{
114762306a36Sopenharmony_ci	u64 t1[ECC_MAX_DIGITS];
114862306a36Sopenharmony_ci
114962306a36Sopenharmony_ci	vli_mod_square_fast(t1, z, curve);		/* z^2 */
115062306a36Sopenharmony_ci	vli_mod_mult_fast(x1, x1, t1, curve);	/* x1 * z^2 */
115162306a36Sopenharmony_ci	vli_mod_mult_fast(t1, t1, z, curve);	/* z^3 */
115262306a36Sopenharmony_ci	vli_mod_mult_fast(y1, y1, t1, curve);	/* y1 * z^3 */
115362306a36Sopenharmony_ci}
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci/* P = (x1, y1) => 2P, (x2, y2) => P' */
115662306a36Sopenharmony_cistatic void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
115762306a36Sopenharmony_ci				u64 *p_initial_z, const struct ecc_curve *curve)
115862306a36Sopenharmony_ci{
115962306a36Sopenharmony_ci	u64 z[ECC_MAX_DIGITS];
116062306a36Sopenharmony_ci	const unsigned int ndigits = curve->g.ndigits;
116162306a36Sopenharmony_ci
116262306a36Sopenharmony_ci	vli_set(x2, x1, ndigits);
116362306a36Sopenharmony_ci	vli_set(y2, y1, ndigits);
116462306a36Sopenharmony_ci
116562306a36Sopenharmony_ci	vli_clear(z, ndigits);
116662306a36Sopenharmony_ci	z[0] = 1;
116762306a36Sopenharmony_ci
116862306a36Sopenharmony_ci	if (p_initial_z)
116962306a36Sopenharmony_ci		vli_set(z, p_initial_z, ndigits);
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci	apply_z(x1, y1, z, curve);
117262306a36Sopenharmony_ci
117362306a36Sopenharmony_ci	ecc_point_double_jacobian(x1, y1, z, curve);
117462306a36Sopenharmony_ci
117562306a36Sopenharmony_ci	apply_z(x2, y2, z, curve);
117662306a36Sopenharmony_ci}
117762306a36Sopenharmony_ci
117862306a36Sopenharmony_ci/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
117962306a36Sopenharmony_ci * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
118062306a36Sopenharmony_ci * or P => P', Q => P + Q
118162306a36Sopenharmony_ci */
118262306a36Sopenharmony_cistatic void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
118362306a36Sopenharmony_ci			const struct ecc_curve *curve)
118462306a36Sopenharmony_ci{
118562306a36Sopenharmony_ci	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
118662306a36Sopenharmony_ci	u64 t5[ECC_MAX_DIGITS];
118762306a36Sopenharmony_ci	const u64 *curve_prime = curve->p;
118862306a36Sopenharmony_ci	const unsigned int ndigits = curve->g.ndigits;
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_ci	/* t5 = x2 - x1 */
119162306a36Sopenharmony_ci	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
119262306a36Sopenharmony_ci	/* t5 = (x2 - x1)^2 = A */
119362306a36Sopenharmony_ci	vli_mod_square_fast(t5, t5, curve);
119462306a36Sopenharmony_ci	/* t1 = x1*A = B */
119562306a36Sopenharmony_ci	vli_mod_mult_fast(x1, x1, t5, curve);
119662306a36Sopenharmony_ci	/* t3 = x2*A = C */
119762306a36Sopenharmony_ci	vli_mod_mult_fast(x2, x2, t5, curve);
119862306a36Sopenharmony_ci	/* t4 = y2 - y1 */
119962306a36Sopenharmony_ci	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
120062306a36Sopenharmony_ci	/* t5 = (y2 - y1)^2 = D */
120162306a36Sopenharmony_ci	vli_mod_square_fast(t5, y2, curve);
120262306a36Sopenharmony_ci
120362306a36Sopenharmony_ci	/* t5 = D - B */
120462306a36Sopenharmony_ci	vli_mod_sub(t5, t5, x1, curve_prime, ndigits);
120562306a36Sopenharmony_ci	/* t5 = D - B - C = x3 */
120662306a36Sopenharmony_ci	vli_mod_sub(t5, t5, x2, curve_prime, ndigits);
120762306a36Sopenharmony_ci	/* t3 = C - B */
120862306a36Sopenharmony_ci	vli_mod_sub(x2, x2, x1, curve_prime, ndigits);
120962306a36Sopenharmony_ci	/* t2 = y1*(C - B) */
121062306a36Sopenharmony_ci	vli_mod_mult_fast(y1, y1, x2, curve);
121162306a36Sopenharmony_ci	/* t3 = B - x3 */
121262306a36Sopenharmony_ci	vli_mod_sub(x2, x1, t5, curve_prime, ndigits);
121362306a36Sopenharmony_ci	/* t4 = (y2 - y1)*(B - x3) */
121462306a36Sopenharmony_ci	vli_mod_mult_fast(y2, y2, x2, curve);
121562306a36Sopenharmony_ci	/* t4 = y3 */
121662306a36Sopenharmony_ci	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	vli_set(x2, t5, ndigits);
121962306a36Sopenharmony_ci}
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
122262306a36Sopenharmony_ci * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
122362306a36Sopenharmony_ci * or P => P - Q, Q => P + Q
122462306a36Sopenharmony_ci */
122562306a36Sopenharmony_cistatic void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
122662306a36Sopenharmony_ci			const struct ecc_curve *curve)
122762306a36Sopenharmony_ci{
122862306a36Sopenharmony_ci	/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
122962306a36Sopenharmony_ci	u64 t5[ECC_MAX_DIGITS];
123062306a36Sopenharmony_ci	u64 t6[ECC_MAX_DIGITS];
123162306a36Sopenharmony_ci	u64 t7[ECC_MAX_DIGITS];
123262306a36Sopenharmony_ci	const u64 *curve_prime = curve->p;
123362306a36Sopenharmony_ci	const unsigned int ndigits = curve->g.ndigits;
123462306a36Sopenharmony_ci
123562306a36Sopenharmony_ci	/* t5 = x2 - x1 */
123662306a36Sopenharmony_ci	vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
123762306a36Sopenharmony_ci	/* t5 = (x2 - x1)^2 = A */
123862306a36Sopenharmony_ci	vli_mod_square_fast(t5, t5, curve);
123962306a36Sopenharmony_ci	/* t1 = x1*A = B */
124062306a36Sopenharmony_ci	vli_mod_mult_fast(x1, x1, t5, curve);
124162306a36Sopenharmony_ci	/* t3 = x2*A = C */
124262306a36Sopenharmony_ci	vli_mod_mult_fast(x2, x2, t5, curve);
124362306a36Sopenharmony_ci	/* t4 = y2 + y1 */
124462306a36Sopenharmony_ci	vli_mod_add(t5, y2, y1, curve_prime, ndigits);
124562306a36Sopenharmony_ci	/* t4 = y2 - y1 */
124662306a36Sopenharmony_ci	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
124762306a36Sopenharmony_ci
124862306a36Sopenharmony_ci	/* t6 = C - B */
124962306a36Sopenharmony_ci	vli_mod_sub(t6, x2, x1, curve_prime, ndigits);
125062306a36Sopenharmony_ci	/* t2 = y1 * (C - B) */
125162306a36Sopenharmony_ci	vli_mod_mult_fast(y1, y1, t6, curve);
125262306a36Sopenharmony_ci	/* t6 = B + C */
125362306a36Sopenharmony_ci	vli_mod_add(t6, x1, x2, curve_prime, ndigits);
125462306a36Sopenharmony_ci	/* t3 = (y2 - y1)^2 */
125562306a36Sopenharmony_ci	vli_mod_square_fast(x2, y2, curve);
125662306a36Sopenharmony_ci	/* t3 = x3 */
125762306a36Sopenharmony_ci	vli_mod_sub(x2, x2, t6, curve_prime, ndigits);
125862306a36Sopenharmony_ci
125962306a36Sopenharmony_ci	/* t7 = B - x3 */
126062306a36Sopenharmony_ci	vli_mod_sub(t7, x1, x2, curve_prime, ndigits);
126162306a36Sopenharmony_ci	/* t4 = (y2 - y1)*(B - x3) */
126262306a36Sopenharmony_ci	vli_mod_mult_fast(y2, y2, t7, curve);
126362306a36Sopenharmony_ci	/* t4 = y3 */
126462306a36Sopenharmony_ci	vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
126562306a36Sopenharmony_ci
126662306a36Sopenharmony_ci	/* t7 = (y2 + y1)^2 = F */
126762306a36Sopenharmony_ci	vli_mod_square_fast(t7, t5, curve);
126862306a36Sopenharmony_ci	/* t7 = x3' */
126962306a36Sopenharmony_ci	vli_mod_sub(t7, t7, t6, curve_prime, ndigits);
127062306a36Sopenharmony_ci	/* t6 = x3' - B */
127162306a36Sopenharmony_ci	vli_mod_sub(t6, t7, x1, curve_prime, ndigits);
127262306a36Sopenharmony_ci	/* t6 = (y2 + y1)*(x3' - B) */
127362306a36Sopenharmony_ci	vli_mod_mult_fast(t6, t6, t5, curve);
127462306a36Sopenharmony_ci	/* t2 = y3' */
127562306a36Sopenharmony_ci	vli_mod_sub(y1, t6, y1, curve_prime, ndigits);
127662306a36Sopenharmony_ci
127762306a36Sopenharmony_ci	vli_set(x1, t7, ndigits);
127862306a36Sopenharmony_ci}
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_cistatic void ecc_point_mult(struct ecc_point *result,
128162306a36Sopenharmony_ci			   const struct ecc_point *point, const u64 *scalar,
128262306a36Sopenharmony_ci			   u64 *initial_z, const struct ecc_curve *curve,
128362306a36Sopenharmony_ci			   unsigned int ndigits)
128462306a36Sopenharmony_ci{
128562306a36Sopenharmony_ci	/* R0 and R1 */
128662306a36Sopenharmony_ci	u64 rx[2][ECC_MAX_DIGITS];
128762306a36Sopenharmony_ci	u64 ry[2][ECC_MAX_DIGITS];
128862306a36Sopenharmony_ci	u64 z[ECC_MAX_DIGITS];
128962306a36Sopenharmony_ci	u64 sk[2][ECC_MAX_DIGITS];
129062306a36Sopenharmony_ci	u64 *curve_prime = curve->p;
129162306a36Sopenharmony_ci	int i, nb;
129262306a36Sopenharmony_ci	int num_bits;
129362306a36Sopenharmony_ci	int carry;
129462306a36Sopenharmony_ci
129562306a36Sopenharmony_ci	carry = vli_add(sk[0], scalar, curve->n, ndigits);
129662306a36Sopenharmony_ci	vli_add(sk[1], sk[0], curve->n, ndigits);
129762306a36Sopenharmony_ci	scalar = sk[!carry];
129862306a36Sopenharmony_ci	num_bits = sizeof(u64) * ndigits * 8 + 1;
129962306a36Sopenharmony_ci
130062306a36Sopenharmony_ci	vli_set(rx[1], point->x, ndigits);
130162306a36Sopenharmony_ci	vli_set(ry[1], point->y, ndigits);
130262306a36Sopenharmony_ci
130362306a36Sopenharmony_ci	xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve);
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_ci	for (i = num_bits - 2; i > 0; i--) {
130662306a36Sopenharmony_ci		nb = !vli_test_bit(scalar, i);
130762306a36Sopenharmony_ci		xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve);
130862306a36Sopenharmony_ci		xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve);
130962306a36Sopenharmony_ci	}
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_ci	nb = !vli_test_bit(scalar, 0);
131262306a36Sopenharmony_ci	xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve);
131362306a36Sopenharmony_ci
131462306a36Sopenharmony_ci	/* Find final 1/Z value. */
131562306a36Sopenharmony_ci	/* X1 - X0 */
131662306a36Sopenharmony_ci	vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits);
131762306a36Sopenharmony_ci	/* Yb * (X1 - X0) */
131862306a36Sopenharmony_ci	vli_mod_mult_fast(z, z, ry[1 - nb], curve);
131962306a36Sopenharmony_ci	/* xP * Yb * (X1 - X0) */
132062306a36Sopenharmony_ci	vli_mod_mult_fast(z, z, point->x, curve);
132162306a36Sopenharmony_ci
132262306a36Sopenharmony_ci	/* 1 / (xP * Yb * (X1 - X0)) */
132362306a36Sopenharmony_ci	vli_mod_inv(z, z, curve_prime, point->ndigits);
132462306a36Sopenharmony_ci
132562306a36Sopenharmony_ci	/* yP / (xP * Yb * (X1 - X0)) */
132662306a36Sopenharmony_ci	vli_mod_mult_fast(z, z, point->y, curve);
132762306a36Sopenharmony_ci	/* Xb * yP / (xP * Yb * (X1 - X0)) */
132862306a36Sopenharmony_ci	vli_mod_mult_fast(z, z, rx[1 - nb], curve);
132962306a36Sopenharmony_ci	/* End 1/Z calculation */
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve);
133262306a36Sopenharmony_ci
133362306a36Sopenharmony_ci	apply_z(rx[0], ry[0], z, curve);
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci	vli_set(result->x, rx[0], ndigits);
133662306a36Sopenharmony_ci	vli_set(result->y, ry[0], ndigits);
133762306a36Sopenharmony_ci}
133862306a36Sopenharmony_ci
133962306a36Sopenharmony_ci/* Computes R = P + Q mod p */
134062306a36Sopenharmony_cistatic void ecc_point_add(const struct ecc_point *result,
134162306a36Sopenharmony_ci		   const struct ecc_point *p, const struct ecc_point *q,
134262306a36Sopenharmony_ci		   const struct ecc_curve *curve)
134362306a36Sopenharmony_ci{
134462306a36Sopenharmony_ci	u64 z[ECC_MAX_DIGITS];
134562306a36Sopenharmony_ci	u64 px[ECC_MAX_DIGITS];
134662306a36Sopenharmony_ci	u64 py[ECC_MAX_DIGITS];
134762306a36Sopenharmony_ci	unsigned int ndigits = curve->g.ndigits;
134862306a36Sopenharmony_ci
134962306a36Sopenharmony_ci	vli_set(result->x, q->x, ndigits);
135062306a36Sopenharmony_ci	vli_set(result->y, q->y, ndigits);
135162306a36Sopenharmony_ci	vli_mod_sub(z, result->x, p->x, curve->p, ndigits);
135262306a36Sopenharmony_ci	vli_set(px, p->x, ndigits);
135362306a36Sopenharmony_ci	vli_set(py, p->y, ndigits);
135462306a36Sopenharmony_ci	xycz_add(px, py, result->x, result->y, curve);
135562306a36Sopenharmony_ci	vli_mod_inv(z, z, curve->p, ndigits);
135662306a36Sopenharmony_ci	apply_z(result->x, result->y, z, curve);
135762306a36Sopenharmony_ci}
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci/* Computes R = u1P + u2Q mod p using Shamir's trick.
136062306a36Sopenharmony_ci * Based on: Kenneth MacKay's micro-ecc (2014).
136162306a36Sopenharmony_ci */
136262306a36Sopenharmony_civoid ecc_point_mult_shamir(const struct ecc_point *result,
136362306a36Sopenharmony_ci			   const u64 *u1, const struct ecc_point *p,
136462306a36Sopenharmony_ci			   const u64 *u2, const struct ecc_point *q,
136562306a36Sopenharmony_ci			   const struct ecc_curve *curve)
136662306a36Sopenharmony_ci{
136762306a36Sopenharmony_ci	u64 z[ECC_MAX_DIGITS];
136862306a36Sopenharmony_ci	u64 sump[2][ECC_MAX_DIGITS];
136962306a36Sopenharmony_ci	u64 *rx = result->x;
137062306a36Sopenharmony_ci	u64 *ry = result->y;
137162306a36Sopenharmony_ci	unsigned int ndigits = curve->g.ndigits;
137262306a36Sopenharmony_ci	unsigned int num_bits;
137362306a36Sopenharmony_ci	struct ecc_point sum = ECC_POINT_INIT(sump[0], sump[1], ndigits);
137462306a36Sopenharmony_ci	const struct ecc_point *points[4];
137562306a36Sopenharmony_ci	const struct ecc_point *point;
137662306a36Sopenharmony_ci	unsigned int idx;
137762306a36Sopenharmony_ci	int i;
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_ci	ecc_point_add(&sum, p, q, curve);
138062306a36Sopenharmony_ci	points[0] = NULL;
138162306a36Sopenharmony_ci	points[1] = p;
138262306a36Sopenharmony_ci	points[2] = q;
138362306a36Sopenharmony_ci	points[3] = &sum;
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci	num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits));
138662306a36Sopenharmony_ci	i = num_bits - 1;
138762306a36Sopenharmony_ci	idx = !!vli_test_bit(u1, i);
138862306a36Sopenharmony_ci	idx |= (!!vli_test_bit(u2, i)) << 1;
138962306a36Sopenharmony_ci	point = points[idx];
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci	vli_set(rx, point->x, ndigits);
139262306a36Sopenharmony_ci	vli_set(ry, point->y, ndigits);
139362306a36Sopenharmony_ci	vli_clear(z + 1, ndigits - 1);
139462306a36Sopenharmony_ci	z[0] = 1;
139562306a36Sopenharmony_ci
139662306a36Sopenharmony_ci	for (--i; i >= 0; i--) {
139762306a36Sopenharmony_ci		ecc_point_double_jacobian(rx, ry, z, curve);
139862306a36Sopenharmony_ci		idx = !!vli_test_bit(u1, i);
139962306a36Sopenharmony_ci		idx |= (!!vli_test_bit(u2, i)) << 1;
140062306a36Sopenharmony_ci		point = points[idx];
140162306a36Sopenharmony_ci		if (point) {
140262306a36Sopenharmony_ci			u64 tx[ECC_MAX_DIGITS];
140362306a36Sopenharmony_ci			u64 ty[ECC_MAX_DIGITS];
140462306a36Sopenharmony_ci			u64 tz[ECC_MAX_DIGITS];
140562306a36Sopenharmony_ci
140662306a36Sopenharmony_ci			vli_set(tx, point->x, ndigits);
140762306a36Sopenharmony_ci			vli_set(ty, point->y, ndigits);
140862306a36Sopenharmony_ci			apply_z(tx, ty, z, curve);
140962306a36Sopenharmony_ci			vli_mod_sub(tz, rx, tx, curve->p, ndigits);
141062306a36Sopenharmony_ci			xycz_add(tx, ty, rx, ry, curve);
141162306a36Sopenharmony_ci			vli_mod_mult_fast(z, z, tz, curve);
141262306a36Sopenharmony_ci		}
141362306a36Sopenharmony_ci	}
141462306a36Sopenharmony_ci	vli_mod_inv(z, z, curve->p, ndigits);
141562306a36Sopenharmony_ci	apply_z(rx, ry, z, curve);
141662306a36Sopenharmony_ci}
141762306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_point_mult_shamir);
141862306a36Sopenharmony_ci
141962306a36Sopenharmony_cistatic int __ecc_is_key_valid(const struct ecc_curve *curve,
142062306a36Sopenharmony_ci			      const u64 *private_key, unsigned int ndigits)
142162306a36Sopenharmony_ci{
142262306a36Sopenharmony_ci	u64 one[ECC_MAX_DIGITS] = { 1, };
142362306a36Sopenharmony_ci	u64 res[ECC_MAX_DIGITS];
142462306a36Sopenharmony_ci
142562306a36Sopenharmony_ci	if (!private_key)
142662306a36Sopenharmony_ci		return -EINVAL;
142762306a36Sopenharmony_ci
142862306a36Sopenharmony_ci	if (curve->g.ndigits != ndigits)
142962306a36Sopenharmony_ci		return -EINVAL;
143062306a36Sopenharmony_ci
143162306a36Sopenharmony_ci	/* Make sure the private key is in the range [2, n-3]. */
143262306a36Sopenharmony_ci	if (vli_cmp(one, private_key, ndigits) != -1)
143362306a36Sopenharmony_ci		return -EINVAL;
143462306a36Sopenharmony_ci	vli_sub(res, curve->n, one, ndigits);
143562306a36Sopenharmony_ci	vli_sub(res, res, one, ndigits);
143662306a36Sopenharmony_ci	if (vli_cmp(res, private_key, ndigits) != 1)
143762306a36Sopenharmony_ci		return -EINVAL;
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	return 0;
144062306a36Sopenharmony_ci}
144162306a36Sopenharmony_ci
144262306a36Sopenharmony_ciint ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
144362306a36Sopenharmony_ci		     const u64 *private_key, unsigned int private_key_len)
144462306a36Sopenharmony_ci{
144562306a36Sopenharmony_ci	int nbytes;
144662306a36Sopenharmony_ci	const struct ecc_curve *curve = ecc_get_curve(curve_id);
144762306a36Sopenharmony_ci
144862306a36Sopenharmony_ci	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
144962306a36Sopenharmony_ci
145062306a36Sopenharmony_ci	if (private_key_len != nbytes)
145162306a36Sopenharmony_ci		return -EINVAL;
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_ci	return __ecc_is_key_valid(curve, private_key, ndigits);
145462306a36Sopenharmony_ci}
145562306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_is_key_valid);
145662306a36Sopenharmony_ci
145762306a36Sopenharmony_ci/*
145862306a36Sopenharmony_ci * ECC private keys are generated using the method of extra random bits,
145962306a36Sopenharmony_ci * equivalent to that described in FIPS 186-4, Appendix B.4.1.
146062306a36Sopenharmony_ci *
146162306a36Sopenharmony_ci * d = (c mod(n–1)) + 1    where c is a string of random bits, 64 bits longer
146262306a36Sopenharmony_ci *                         than requested
146362306a36Sopenharmony_ci * 0 <= c mod(n-1) <= n-2  and implies that
146462306a36Sopenharmony_ci * 1 <= d <= n-1
146562306a36Sopenharmony_ci *
146662306a36Sopenharmony_ci * This method generates a private key uniformly distributed in the range
146762306a36Sopenharmony_ci * [1, n-1].
146862306a36Sopenharmony_ci */
146962306a36Sopenharmony_ciint ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
147062306a36Sopenharmony_ci{
147162306a36Sopenharmony_ci	const struct ecc_curve *curve = ecc_get_curve(curve_id);
147262306a36Sopenharmony_ci	u64 priv[ECC_MAX_DIGITS];
147362306a36Sopenharmony_ci	unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
147462306a36Sopenharmony_ci	unsigned int nbits = vli_num_bits(curve->n, ndigits);
147562306a36Sopenharmony_ci	int err;
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci	/* Check that N is included in Table 1 of FIPS 186-4, section 6.1.1 */
147862306a36Sopenharmony_ci	if (nbits < 160 || ndigits > ARRAY_SIZE(priv))
147962306a36Sopenharmony_ci		return -EINVAL;
148062306a36Sopenharmony_ci
148162306a36Sopenharmony_ci	/*
148262306a36Sopenharmony_ci	 * FIPS 186-4 recommends that the private key should be obtained from a
148362306a36Sopenharmony_ci	 * RBG with a security strength equal to or greater than the security
148462306a36Sopenharmony_ci	 * strength associated with N.
148562306a36Sopenharmony_ci	 *
148662306a36Sopenharmony_ci	 * The maximum security strength identified by NIST SP800-57pt1r4 for
148762306a36Sopenharmony_ci	 * ECC is 256 (N >= 512).
148862306a36Sopenharmony_ci	 *
148962306a36Sopenharmony_ci	 * This condition is met by the default RNG because it selects a favored
149062306a36Sopenharmony_ci	 * DRBG with a security strength of 256.
149162306a36Sopenharmony_ci	 */
149262306a36Sopenharmony_ci	if (crypto_get_default_rng())
149362306a36Sopenharmony_ci		return -EFAULT;
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_ci	err = crypto_rng_get_bytes(crypto_default_rng, (u8 *)priv, nbytes);
149662306a36Sopenharmony_ci	crypto_put_default_rng();
149762306a36Sopenharmony_ci	if (err)
149862306a36Sopenharmony_ci		return err;
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci	/* Make sure the private key is in the valid range. */
150162306a36Sopenharmony_ci	if (__ecc_is_key_valid(curve, priv, ndigits))
150262306a36Sopenharmony_ci		return -EINVAL;
150362306a36Sopenharmony_ci
150462306a36Sopenharmony_ci	ecc_swap_digits(priv, privkey, ndigits);
150562306a36Sopenharmony_ci
150662306a36Sopenharmony_ci	return 0;
150762306a36Sopenharmony_ci}
150862306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_gen_privkey);
150962306a36Sopenharmony_ci
151062306a36Sopenharmony_ciint ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
151162306a36Sopenharmony_ci		     const u64 *private_key, u64 *public_key)
151262306a36Sopenharmony_ci{
151362306a36Sopenharmony_ci	int ret = 0;
151462306a36Sopenharmony_ci	struct ecc_point *pk;
151562306a36Sopenharmony_ci	u64 priv[ECC_MAX_DIGITS];
151662306a36Sopenharmony_ci	const struct ecc_curve *curve = ecc_get_curve(curve_id);
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_ci	if (!private_key || !curve || ndigits > ARRAY_SIZE(priv)) {
151962306a36Sopenharmony_ci		ret = -EINVAL;
152062306a36Sopenharmony_ci		goto out;
152162306a36Sopenharmony_ci	}
152262306a36Sopenharmony_ci
152362306a36Sopenharmony_ci	ecc_swap_digits(private_key, priv, ndigits);
152462306a36Sopenharmony_ci
152562306a36Sopenharmony_ci	pk = ecc_alloc_point(ndigits);
152662306a36Sopenharmony_ci	if (!pk) {
152762306a36Sopenharmony_ci		ret = -ENOMEM;
152862306a36Sopenharmony_ci		goto out;
152962306a36Sopenharmony_ci	}
153062306a36Sopenharmony_ci
153162306a36Sopenharmony_ci	ecc_point_mult(pk, &curve->g, priv, NULL, curve, ndigits);
153262306a36Sopenharmony_ci
153362306a36Sopenharmony_ci	/* SP800-56A rev 3 5.6.2.1.3 key check */
153462306a36Sopenharmony_ci	if (ecc_is_pubkey_valid_full(curve, pk)) {
153562306a36Sopenharmony_ci		ret = -EAGAIN;
153662306a36Sopenharmony_ci		goto err_free_point;
153762306a36Sopenharmony_ci	}
153862306a36Sopenharmony_ci
153962306a36Sopenharmony_ci	ecc_swap_digits(pk->x, public_key, ndigits);
154062306a36Sopenharmony_ci	ecc_swap_digits(pk->y, &public_key[ndigits], ndigits);
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_cierr_free_point:
154362306a36Sopenharmony_ci	ecc_free_point(pk);
154462306a36Sopenharmony_ciout:
154562306a36Sopenharmony_ci	return ret;
154662306a36Sopenharmony_ci}
154762306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_make_pub_key);
154862306a36Sopenharmony_ci
154962306a36Sopenharmony_ci/* SP800-56A section 5.6.2.3.4 partial verification: ephemeral keys only */
155062306a36Sopenharmony_ciint ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
155162306a36Sopenharmony_ci				struct ecc_point *pk)
155262306a36Sopenharmony_ci{
155362306a36Sopenharmony_ci	u64 yy[ECC_MAX_DIGITS], xxx[ECC_MAX_DIGITS], w[ECC_MAX_DIGITS];
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ci	if (WARN_ON(pk->ndigits != curve->g.ndigits))
155662306a36Sopenharmony_ci		return -EINVAL;
155762306a36Sopenharmony_ci
155862306a36Sopenharmony_ci	/* Check 1: Verify key is not the zero point. */
155962306a36Sopenharmony_ci	if (ecc_point_is_zero(pk))
156062306a36Sopenharmony_ci		return -EINVAL;
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	/* Check 2: Verify key is in the range [1, p-1]. */
156362306a36Sopenharmony_ci	if (vli_cmp(curve->p, pk->x, pk->ndigits) != 1)
156462306a36Sopenharmony_ci		return -EINVAL;
156562306a36Sopenharmony_ci	if (vli_cmp(curve->p, pk->y, pk->ndigits) != 1)
156662306a36Sopenharmony_ci		return -EINVAL;
156762306a36Sopenharmony_ci
156862306a36Sopenharmony_ci	/* Check 3: Verify that y^2 == (x^3 + a·x + b) mod p */
156962306a36Sopenharmony_ci	vli_mod_square_fast(yy, pk->y, curve); /* y^2 */
157062306a36Sopenharmony_ci	vli_mod_square_fast(xxx, pk->x, curve); /* x^2 */
157162306a36Sopenharmony_ci	vli_mod_mult_fast(xxx, xxx, pk->x, curve); /* x^3 */
157262306a36Sopenharmony_ci	vli_mod_mult_fast(w, curve->a, pk->x, curve); /* a·x */
157362306a36Sopenharmony_ci	vli_mod_add(w, w, curve->b, curve->p, pk->ndigits); /* a·x + b */
157462306a36Sopenharmony_ci	vli_mod_add(w, w, xxx, curve->p, pk->ndigits); /* x^3 + a·x + b */
157562306a36Sopenharmony_ci	if (vli_cmp(yy, w, pk->ndigits) != 0) /* Equation */
157662306a36Sopenharmony_ci		return -EINVAL;
157762306a36Sopenharmony_ci
157862306a36Sopenharmony_ci	return 0;
157962306a36Sopenharmony_ci}
158062306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_is_pubkey_valid_partial);
158162306a36Sopenharmony_ci
158262306a36Sopenharmony_ci/* SP800-56A section 5.6.2.3.3 full verification */
158362306a36Sopenharmony_ciint ecc_is_pubkey_valid_full(const struct ecc_curve *curve,
158462306a36Sopenharmony_ci			     struct ecc_point *pk)
158562306a36Sopenharmony_ci{
158662306a36Sopenharmony_ci	struct ecc_point *nQ;
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_ci	/* Checks 1 through 3 */
158962306a36Sopenharmony_ci	int ret = ecc_is_pubkey_valid_partial(curve, pk);
159062306a36Sopenharmony_ci
159162306a36Sopenharmony_ci	if (ret)
159262306a36Sopenharmony_ci		return ret;
159362306a36Sopenharmony_ci
159462306a36Sopenharmony_ci	/* Check 4: Verify that nQ is the zero point. */
159562306a36Sopenharmony_ci	nQ = ecc_alloc_point(pk->ndigits);
159662306a36Sopenharmony_ci	if (!nQ)
159762306a36Sopenharmony_ci		return -ENOMEM;
159862306a36Sopenharmony_ci
159962306a36Sopenharmony_ci	ecc_point_mult(nQ, pk, curve->n, NULL, curve, pk->ndigits);
160062306a36Sopenharmony_ci	if (!ecc_point_is_zero(nQ))
160162306a36Sopenharmony_ci		ret = -EINVAL;
160262306a36Sopenharmony_ci
160362306a36Sopenharmony_ci	ecc_free_point(nQ);
160462306a36Sopenharmony_ci
160562306a36Sopenharmony_ci	return ret;
160662306a36Sopenharmony_ci}
160762306a36Sopenharmony_ciEXPORT_SYMBOL(ecc_is_pubkey_valid_full);
160862306a36Sopenharmony_ci
160962306a36Sopenharmony_ciint crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
161062306a36Sopenharmony_ci			      const u64 *private_key, const u64 *public_key,
161162306a36Sopenharmony_ci			      u64 *secret)
161262306a36Sopenharmony_ci{
161362306a36Sopenharmony_ci	int ret = 0;
161462306a36Sopenharmony_ci	struct ecc_point *product, *pk;
161562306a36Sopenharmony_ci	u64 priv[ECC_MAX_DIGITS];
161662306a36Sopenharmony_ci	u64 rand_z[ECC_MAX_DIGITS];
161762306a36Sopenharmony_ci	unsigned int nbytes;
161862306a36Sopenharmony_ci	const struct ecc_curve *curve = ecc_get_curve(curve_id);
161962306a36Sopenharmony_ci
162062306a36Sopenharmony_ci	if (!private_key || !public_key || !curve ||
162162306a36Sopenharmony_ci	    ndigits > ARRAY_SIZE(priv) || ndigits > ARRAY_SIZE(rand_z)) {
162262306a36Sopenharmony_ci		ret = -EINVAL;
162362306a36Sopenharmony_ci		goto out;
162462306a36Sopenharmony_ci	}
162562306a36Sopenharmony_ci
162662306a36Sopenharmony_ci	nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
162762306a36Sopenharmony_ci
162862306a36Sopenharmony_ci	get_random_bytes(rand_z, nbytes);
162962306a36Sopenharmony_ci
163062306a36Sopenharmony_ci	pk = ecc_alloc_point(ndigits);
163162306a36Sopenharmony_ci	if (!pk) {
163262306a36Sopenharmony_ci		ret = -ENOMEM;
163362306a36Sopenharmony_ci		goto out;
163462306a36Sopenharmony_ci	}
163562306a36Sopenharmony_ci
163662306a36Sopenharmony_ci	ecc_swap_digits(public_key, pk->x, ndigits);
163762306a36Sopenharmony_ci	ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
163862306a36Sopenharmony_ci	ret = ecc_is_pubkey_valid_partial(curve, pk);
163962306a36Sopenharmony_ci	if (ret)
164062306a36Sopenharmony_ci		goto err_alloc_product;
164162306a36Sopenharmony_ci
164262306a36Sopenharmony_ci	ecc_swap_digits(private_key, priv, ndigits);
164362306a36Sopenharmony_ci
164462306a36Sopenharmony_ci	product = ecc_alloc_point(ndigits);
164562306a36Sopenharmony_ci	if (!product) {
164662306a36Sopenharmony_ci		ret = -ENOMEM;
164762306a36Sopenharmony_ci		goto err_alloc_product;
164862306a36Sopenharmony_ci	}
164962306a36Sopenharmony_ci
165062306a36Sopenharmony_ci	ecc_point_mult(product, pk, priv, rand_z, curve, ndigits);
165162306a36Sopenharmony_ci
165262306a36Sopenharmony_ci	if (ecc_point_is_zero(product)) {
165362306a36Sopenharmony_ci		ret = -EFAULT;
165462306a36Sopenharmony_ci		goto err_validity;
165562306a36Sopenharmony_ci	}
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_ci	ecc_swap_digits(product->x, secret, ndigits);
165862306a36Sopenharmony_ci
165962306a36Sopenharmony_cierr_validity:
166062306a36Sopenharmony_ci	memzero_explicit(priv, sizeof(priv));
166162306a36Sopenharmony_ci	memzero_explicit(rand_z, sizeof(rand_z));
166262306a36Sopenharmony_ci	ecc_free_point(product);
166362306a36Sopenharmony_cierr_alloc_product:
166462306a36Sopenharmony_ci	ecc_free_point(pk);
166562306a36Sopenharmony_ciout:
166662306a36Sopenharmony_ci	return ret;
166762306a36Sopenharmony_ci}
166862306a36Sopenharmony_ciEXPORT_SYMBOL(crypto_ecdh_shared_secret);
166962306a36Sopenharmony_ci
167062306a36Sopenharmony_ciMODULE_LICENSE("Dual BSD/GPL");
1671