162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <asm/hwcap.h>
962306a36Sopenharmony_ci#include <asm/neon.h>
1062306a36Sopenharmony_ci#include <asm/simd.h>
1162306a36Sopenharmony_ci#include <asm/unaligned.h>
1262306a36Sopenharmony_ci#include <crypto/algapi.h>
1362306a36Sopenharmony_ci#include <crypto/internal/hash.h>
1462306a36Sopenharmony_ci#include <crypto/internal/poly1305.h>
1562306a36Sopenharmony_ci#include <crypto/internal/simd.h>
1662306a36Sopenharmony_ci#include <linux/cpufeature.h>
1762306a36Sopenharmony_ci#include <linux/crypto.h>
1862306a36Sopenharmony_ci#include <linux/jump_label.h>
1962306a36Sopenharmony_ci#include <linux/module.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ciasmlinkage void poly1305_init_arm64(void *state, const u8 *key);
2262306a36Sopenharmony_ciasmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
2362306a36Sopenharmony_ciasmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
2462306a36Sopenharmony_ciasmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce);
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_civoid poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
2962306a36Sopenharmony_ci{
3062306a36Sopenharmony_ci	poly1305_init_arm64(&dctx->h, key);
3162306a36Sopenharmony_ci	dctx->s[0] = get_unaligned_le32(key + 16);
3262306a36Sopenharmony_ci	dctx->s[1] = get_unaligned_le32(key + 20);
3362306a36Sopenharmony_ci	dctx->s[2] = get_unaligned_le32(key + 24);
3462306a36Sopenharmony_ci	dctx->s[3] = get_unaligned_le32(key + 28);
3562306a36Sopenharmony_ci	dctx->buflen = 0;
3662306a36Sopenharmony_ci}
3762306a36Sopenharmony_ciEXPORT_SYMBOL(poly1305_init_arch);
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_cistatic int neon_poly1305_init(struct shash_desc *desc)
4062306a36Sopenharmony_ci{
4162306a36Sopenharmony_ci	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci	dctx->buflen = 0;
4462306a36Sopenharmony_ci	dctx->rset = 0;
4562306a36Sopenharmony_ci	dctx->sset = false;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	return 0;
4862306a36Sopenharmony_ci}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cistatic void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
5162306a36Sopenharmony_ci				 u32 len, u32 hibit, bool do_neon)
5262306a36Sopenharmony_ci{
5362306a36Sopenharmony_ci	if (unlikely(!dctx->sset)) {
5462306a36Sopenharmony_ci		if (!dctx->rset) {
5562306a36Sopenharmony_ci			poly1305_init_arm64(&dctx->h, src);
5662306a36Sopenharmony_ci			src += POLY1305_BLOCK_SIZE;
5762306a36Sopenharmony_ci			len -= POLY1305_BLOCK_SIZE;
5862306a36Sopenharmony_ci			dctx->rset = 1;
5962306a36Sopenharmony_ci		}
6062306a36Sopenharmony_ci		if (len >= POLY1305_BLOCK_SIZE) {
6162306a36Sopenharmony_ci			dctx->s[0] = get_unaligned_le32(src +  0);
6262306a36Sopenharmony_ci			dctx->s[1] = get_unaligned_le32(src +  4);
6362306a36Sopenharmony_ci			dctx->s[2] = get_unaligned_le32(src +  8);
6462306a36Sopenharmony_ci			dctx->s[3] = get_unaligned_le32(src + 12);
6562306a36Sopenharmony_ci			src += POLY1305_BLOCK_SIZE;
6662306a36Sopenharmony_ci			len -= POLY1305_BLOCK_SIZE;
6762306a36Sopenharmony_ci			dctx->sset = true;
6862306a36Sopenharmony_ci		}
6962306a36Sopenharmony_ci		if (len < POLY1305_BLOCK_SIZE)
7062306a36Sopenharmony_ci			return;
7162306a36Sopenharmony_ci	}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	len &= ~(POLY1305_BLOCK_SIZE - 1);
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	if (static_branch_likely(&have_neon) && likely(do_neon))
7662306a36Sopenharmony_ci		poly1305_blocks_neon(&dctx->h, src, len, hibit);
7762306a36Sopenharmony_ci	else
7862306a36Sopenharmony_ci		poly1305_blocks(&dctx->h, src, len, hibit);
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_cistatic void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
8262306a36Sopenharmony_ci				    const u8 *src, u32 len, bool do_neon)
8362306a36Sopenharmony_ci{
8462306a36Sopenharmony_ci	if (unlikely(dctx->buflen)) {
8562306a36Sopenharmony_ci		u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci		memcpy(dctx->buf + dctx->buflen, src, bytes);
8862306a36Sopenharmony_ci		src += bytes;
8962306a36Sopenharmony_ci		len -= bytes;
9062306a36Sopenharmony_ci		dctx->buflen += bytes;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
9362306a36Sopenharmony_ci			neon_poly1305_blocks(dctx, dctx->buf,
9462306a36Sopenharmony_ci					     POLY1305_BLOCK_SIZE, 1, false);
9562306a36Sopenharmony_ci			dctx->buflen = 0;
9662306a36Sopenharmony_ci		}
9762306a36Sopenharmony_ci	}
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	if (likely(len >= POLY1305_BLOCK_SIZE)) {
10062306a36Sopenharmony_ci		neon_poly1305_blocks(dctx, src, len, 1, do_neon);
10162306a36Sopenharmony_ci		src += round_down(len, POLY1305_BLOCK_SIZE);
10262306a36Sopenharmony_ci		len %= POLY1305_BLOCK_SIZE;
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_ci	if (unlikely(len)) {
10662306a36Sopenharmony_ci		dctx->buflen = len;
10762306a36Sopenharmony_ci		memcpy(dctx->buf, src, len);
10862306a36Sopenharmony_ci	}
10962306a36Sopenharmony_ci}
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_cistatic int neon_poly1305_update(struct shash_desc *desc,
11262306a36Sopenharmony_ci				const u8 *src, unsigned int srclen)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	bool do_neon = crypto_simd_usable() && srclen > 128;
11562306a36Sopenharmony_ci	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	if (static_branch_likely(&have_neon) && do_neon)
11862306a36Sopenharmony_ci		kernel_neon_begin();
11962306a36Sopenharmony_ci	neon_poly1305_do_update(dctx, src, srclen, do_neon);
12062306a36Sopenharmony_ci	if (static_branch_likely(&have_neon) && do_neon)
12162306a36Sopenharmony_ci		kernel_neon_end();
12262306a36Sopenharmony_ci	return 0;
12362306a36Sopenharmony_ci}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_civoid poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
12662306a36Sopenharmony_ci			  unsigned int nbytes)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	if (unlikely(dctx->buflen)) {
12962306a36Sopenharmony_ci		u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci		memcpy(dctx->buf + dctx->buflen, src, bytes);
13262306a36Sopenharmony_ci		src += bytes;
13362306a36Sopenharmony_ci		nbytes -= bytes;
13462306a36Sopenharmony_ci		dctx->buflen += bytes;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
13762306a36Sopenharmony_ci			poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
13862306a36Sopenharmony_ci			dctx->buflen = 0;
13962306a36Sopenharmony_ci		}
14062306a36Sopenharmony_ci	}
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
14362306a36Sopenharmony_ci		unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci		if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
14662306a36Sopenharmony_ci			do {
14762306a36Sopenharmony_ci				unsigned int todo = min_t(unsigned int, len, SZ_4K);
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci				kernel_neon_begin();
15062306a36Sopenharmony_ci				poly1305_blocks_neon(&dctx->h, src, todo, 1);
15162306a36Sopenharmony_ci				kernel_neon_end();
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci				len -= todo;
15462306a36Sopenharmony_ci				src += todo;
15562306a36Sopenharmony_ci			} while (len);
15662306a36Sopenharmony_ci		} else {
15762306a36Sopenharmony_ci			poly1305_blocks(&dctx->h, src, len, 1);
15862306a36Sopenharmony_ci			src += len;
15962306a36Sopenharmony_ci		}
16062306a36Sopenharmony_ci		nbytes %= POLY1305_BLOCK_SIZE;
16162306a36Sopenharmony_ci	}
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	if (unlikely(nbytes)) {
16462306a36Sopenharmony_ci		dctx->buflen = nbytes;
16562306a36Sopenharmony_ci		memcpy(dctx->buf, src, nbytes);
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ciEXPORT_SYMBOL(poly1305_update_arch);
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_civoid poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
17162306a36Sopenharmony_ci{
17262306a36Sopenharmony_ci	if (unlikely(dctx->buflen)) {
17362306a36Sopenharmony_ci		dctx->buf[dctx->buflen++] = 1;
17462306a36Sopenharmony_ci		memset(dctx->buf + dctx->buflen, 0,
17562306a36Sopenharmony_ci		       POLY1305_BLOCK_SIZE - dctx->buflen);
17662306a36Sopenharmony_ci		poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
17762306a36Sopenharmony_ci	}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci	poly1305_emit(&dctx->h, dst, dctx->s);
18062306a36Sopenharmony_ci	memzero_explicit(dctx, sizeof(*dctx));
18162306a36Sopenharmony_ci}
18262306a36Sopenharmony_ciEXPORT_SYMBOL(poly1305_final_arch);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_cistatic int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
18562306a36Sopenharmony_ci{
18662306a36Sopenharmony_ci	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	if (unlikely(!dctx->sset))
18962306a36Sopenharmony_ci		return -ENOKEY;
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ci	poly1305_final_arch(dctx, dst);
19262306a36Sopenharmony_ci	return 0;
19362306a36Sopenharmony_ci}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_cistatic struct shash_alg neon_poly1305_alg = {
19662306a36Sopenharmony_ci	.init			= neon_poly1305_init,
19762306a36Sopenharmony_ci	.update			= neon_poly1305_update,
19862306a36Sopenharmony_ci	.final			= neon_poly1305_final,
19962306a36Sopenharmony_ci	.digestsize		= POLY1305_DIGEST_SIZE,
20062306a36Sopenharmony_ci	.descsize		= sizeof(struct poly1305_desc_ctx),
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	.base.cra_name		= "poly1305",
20362306a36Sopenharmony_ci	.base.cra_driver_name	= "poly1305-neon",
20462306a36Sopenharmony_ci	.base.cra_priority	= 200,
20562306a36Sopenharmony_ci	.base.cra_blocksize	= POLY1305_BLOCK_SIZE,
20662306a36Sopenharmony_ci	.base.cra_module	= THIS_MODULE,
20762306a36Sopenharmony_ci};
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_cistatic int __init neon_poly1305_mod_init(void)
21062306a36Sopenharmony_ci{
21162306a36Sopenharmony_ci	if (!cpu_have_named_feature(ASIMD))
21262306a36Sopenharmony_ci		return 0;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	static_branch_enable(&have_neon);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
21762306a36Sopenharmony_ci		crypto_register_shash(&neon_poly1305_alg) : 0;
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_cistatic void __exit neon_poly1305_mod_exit(void)
22162306a36Sopenharmony_ci{
22262306a36Sopenharmony_ci	if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD))
22362306a36Sopenharmony_ci		crypto_unregister_shash(&neon_poly1305_alg);
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_cimodule_init(neon_poly1305_mod_init);
22762306a36Sopenharmony_cimodule_exit(neon_poly1305_mod_exit);
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
23062306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("poly1305");
23162306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("poly1305-neon");
232