162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Poly1305 authenticator algorithm, RFC7539.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright 2023- IBM Corp. All rights reserved.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#include <crypto/algapi.h>
962306a36Sopenharmony_ci#include <linux/crypto.h>
1062306a36Sopenharmony_ci#include <linux/kernel.h>
1162306a36Sopenharmony_ci#include <linux/module.h>
1262306a36Sopenharmony_ci#include <linux/jump_label.h>
1362306a36Sopenharmony_ci#include <crypto/internal/hash.h>
1462306a36Sopenharmony_ci#include <crypto/internal/poly1305.h>
1562306a36Sopenharmony_ci#include <crypto/internal/simd.h>
1662306a36Sopenharmony_ci#include <linux/cpufeature.h>
1762306a36Sopenharmony_ci#include <asm/unaligned.h>
1862306a36Sopenharmony_ci#include <asm/simd.h>
1962306a36Sopenharmony_ci#include <asm/switch_to.h>
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ciasmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen);
2262306a36Sopenharmony_ciasmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit);
2362306a36Sopenharmony_ciasmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst);
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_cistatic void vsx_begin(void)
2662306a36Sopenharmony_ci{
2762306a36Sopenharmony_ci	preempt_disable();
2862306a36Sopenharmony_ci	enable_kernel_vsx();
2962306a36Sopenharmony_ci}
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic void vsx_end(void)
3262306a36Sopenharmony_ci{
3362306a36Sopenharmony_ci	disable_kernel_vsx();
3462306a36Sopenharmony_ci	preempt_enable();
3562306a36Sopenharmony_ci}
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic int crypto_poly1305_p10_init(struct shash_desc *desc)
3862306a36Sopenharmony_ci{
3962306a36Sopenharmony_ci	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	poly1305_core_init(&dctx->h);
4262306a36Sopenharmony_ci	dctx->buflen = 0;
4362306a36Sopenharmony_ci	dctx->rset = 0;
4462306a36Sopenharmony_ci	dctx->sset = false;
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci	return 0;
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_cistatic unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx,
5062306a36Sopenharmony_ci					       const u8 *inp, unsigned int len)
5162306a36Sopenharmony_ci{
5262306a36Sopenharmony_ci	unsigned int acc = 0;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ci	if (unlikely(!dctx->sset)) {
5562306a36Sopenharmony_ci		if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) {
5662306a36Sopenharmony_ci			struct poly1305_core_key *key = &dctx->core_r;
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci			key->key.r64[0] = get_unaligned_le64(&inp[0]);
5962306a36Sopenharmony_ci			key->key.r64[1] = get_unaligned_le64(&inp[8]);
6062306a36Sopenharmony_ci			inp += POLY1305_BLOCK_SIZE;
6162306a36Sopenharmony_ci			len -= POLY1305_BLOCK_SIZE;
6262306a36Sopenharmony_ci			acc += POLY1305_BLOCK_SIZE;
6362306a36Sopenharmony_ci			dctx->rset = 1;
6462306a36Sopenharmony_ci		}
6562306a36Sopenharmony_ci		if (len >= POLY1305_BLOCK_SIZE) {
6662306a36Sopenharmony_ci			dctx->s[0] = get_unaligned_le32(&inp[0]);
6762306a36Sopenharmony_ci			dctx->s[1] = get_unaligned_le32(&inp[4]);
6862306a36Sopenharmony_ci			dctx->s[2] = get_unaligned_le32(&inp[8]);
6962306a36Sopenharmony_ci			dctx->s[3] = get_unaligned_le32(&inp[12]);
7062306a36Sopenharmony_ci			acc += POLY1305_BLOCK_SIZE;
7162306a36Sopenharmony_ci			dctx->sset = true;
7262306a36Sopenharmony_ci		}
7362306a36Sopenharmony_ci	}
7462306a36Sopenharmony_ci	return acc;
7562306a36Sopenharmony_ci}
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_cistatic int crypto_poly1305_p10_update(struct shash_desc *desc,
7862306a36Sopenharmony_ci				      const u8 *src, unsigned int srclen)
7962306a36Sopenharmony_ci{
8062306a36Sopenharmony_ci	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
8162306a36Sopenharmony_ci	unsigned int bytes, used;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	if (unlikely(dctx->buflen)) {
8462306a36Sopenharmony_ci		bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
8562306a36Sopenharmony_ci		memcpy(dctx->buf + dctx->buflen, src, bytes);
8662306a36Sopenharmony_ci		src += bytes;
8762306a36Sopenharmony_ci		srclen -= bytes;
8862306a36Sopenharmony_ci		dctx->buflen += bytes;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci		if (dctx->buflen == POLY1305_BLOCK_SIZE) {
9162306a36Sopenharmony_ci			if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf,
9262306a36Sopenharmony_ci							       POLY1305_BLOCK_SIZE))) {
9362306a36Sopenharmony_ci				vsx_begin();
9462306a36Sopenharmony_ci				poly1305_64s(&dctx->h, dctx->buf,
9562306a36Sopenharmony_ci						  POLY1305_BLOCK_SIZE, 1);
9662306a36Sopenharmony_ci				vsx_end();
9762306a36Sopenharmony_ci			}
9862306a36Sopenharmony_ci			dctx->buflen = 0;
9962306a36Sopenharmony_ci		}
10062306a36Sopenharmony_ci	}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
10362306a36Sopenharmony_ci		bytes = round_down(srclen, POLY1305_BLOCK_SIZE);
10462306a36Sopenharmony_ci		used = crypto_poly1305_setdctxkey(dctx, src, bytes);
10562306a36Sopenharmony_ci		if (likely(used)) {
10662306a36Sopenharmony_ci			srclen -= used;
10762306a36Sopenharmony_ci			src += used;
10862306a36Sopenharmony_ci		}
10962306a36Sopenharmony_ci		if (crypto_simd_usable() && (srclen >= POLY1305_BLOCK_SIZE*4)) {
11062306a36Sopenharmony_ci			vsx_begin();
11162306a36Sopenharmony_ci			poly1305_p10le_4blocks(&dctx->h, src, srclen);
11262306a36Sopenharmony_ci			vsx_end();
11362306a36Sopenharmony_ci			src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4));
11462306a36Sopenharmony_ci			srclen %= POLY1305_BLOCK_SIZE * 4;
11562306a36Sopenharmony_ci		}
11662306a36Sopenharmony_ci		while (srclen >= POLY1305_BLOCK_SIZE) {
11762306a36Sopenharmony_ci			vsx_begin();
11862306a36Sopenharmony_ci			poly1305_64s(&dctx->h, src, POLY1305_BLOCK_SIZE, 1);
11962306a36Sopenharmony_ci			vsx_end();
12062306a36Sopenharmony_ci			srclen -= POLY1305_BLOCK_SIZE;
12162306a36Sopenharmony_ci			src += POLY1305_BLOCK_SIZE;
12262306a36Sopenharmony_ci		}
12362306a36Sopenharmony_ci	}
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	if (unlikely(srclen)) {
12662306a36Sopenharmony_ci		dctx->buflen = srclen;
12762306a36Sopenharmony_ci		memcpy(dctx->buf, src, srclen);
12862306a36Sopenharmony_ci	}
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	return 0;
13162306a36Sopenharmony_ci}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_cistatic int crypto_poly1305_p10_final(struct shash_desc *desc, u8 *dst)
13462306a36Sopenharmony_ci{
13562306a36Sopenharmony_ci	struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	if (unlikely(!dctx->sset))
13862306a36Sopenharmony_ci		return -ENOKEY;
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	if ((dctx->buflen)) {
14162306a36Sopenharmony_ci		dctx->buf[dctx->buflen++] = 1;
14262306a36Sopenharmony_ci		memset(dctx->buf + dctx->buflen, 0,
14362306a36Sopenharmony_ci		       POLY1305_BLOCK_SIZE - dctx->buflen);
14462306a36Sopenharmony_ci		vsx_begin();
14562306a36Sopenharmony_ci		poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
14662306a36Sopenharmony_ci		vsx_end();
14762306a36Sopenharmony_ci		dctx->buflen = 0;
14862306a36Sopenharmony_ci	}
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	poly1305_emit_64(&dctx->h, &dctx->s, dst);
15162306a36Sopenharmony_ci	return 0;
15262306a36Sopenharmony_ci}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_cistatic struct shash_alg poly1305_alg = {
15562306a36Sopenharmony_ci	.digestsize	= POLY1305_DIGEST_SIZE,
15662306a36Sopenharmony_ci	.init		= crypto_poly1305_p10_init,
15762306a36Sopenharmony_ci	.update		= crypto_poly1305_p10_update,
15862306a36Sopenharmony_ci	.final		= crypto_poly1305_p10_final,
15962306a36Sopenharmony_ci	.descsize	= sizeof(struct poly1305_desc_ctx),
16062306a36Sopenharmony_ci	.base		= {
16162306a36Sopenharmony_ci		.cra_name		= "poly1305",
16262306a36Sopenharmony_ci		.cra_driver_name	= "poly1305-p10",
16362306a36Sopenharmony_ci		.cra_priority		= 300,
16462306a36Sopenharmony_ci		.cra_blocksize		= POLY1305_BLOCK_SIZE,
16562306a36Sopenharmony_ci		.cra_module		= THIS_MODULE,
16662306a36Sopenharmony_ci	},
16762306a36Sopenharmony_ci};
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_cistatic int __init poly1305_p10_init(void)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	return crypto_register_shash(&poly1305_alg);
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_cistatic void __exit poly1305_p10_exit(void)
17562306a36Sopenharmony_ci{
17662306a36Sopenharmony_ci	crypto_unregister_shash(&poly1305_alg);
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_cimodule_cpu_feature_match(PPC_MODULE_FEATURE_P10, poly1305_p10_init);
18062306a36Sopenharmony_cimodule_exit(poly1305_p10_exit);
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ciMODULE_LICENSE("GPL");
18362306a36Sopenharmony_ciMODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
18462306a36Sopenharmony_ciMODULE_DESCRIPTION("Optimized Poly1305 for P10");
18562306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("poly1305");
18662306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("poly1305-p10");
187