162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers,
462306a36Sopenharmony_ci * including ChaCha20 (RFC7539)
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci * Copyright 2023- IBM Corp. All rights reserved.
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <crypto/algapi.h>
1062306a36Sopenharmony_ci#include <crypto/internal/chacha.h>
1162306a36Sopenharmony_ci#include <crypto/internal/simd.h>
1262306a36Sopenharmony_ci#include <crypto/internal/skcipher.h>
1362306a36Sopenharmony_ci#include <linux/kernel.h>
1462306a36Sopenharmony_ci#include <linux/module.h>
1562306a36Sopenharmony_ci#include <linux/cpufeature.h>
1662306a36Sopenharmony_ci#include <linux/sizes.h>
1762306a36Sopenharmony_ci#include <asm/simd.h>
1862306a36Sopenharmony_ci#include <asm/switch_to.h>
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ciasmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src,
2162306a36Sopenharmony_ci				unsigned int len, int nrounds);
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10);
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_cistatic void vsx_begin(void)
2662306a36Sopenharmony_ci{
2762306a36Sopenharmony_ci	preempt_disable();
2862306a36Sopenharmony_ci	enable_kernel_vsx();
2962306a36Sopenharmony_ci}
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic void vsx_end(void)
3262306a36Sopenharmony_ci{
3362306a36Sopenharmony_ci	disable_kernel_vsx();
3462306a36Sopenharmony_ci	preempt_enable();
3562306a36Sopenharmony_ci}
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src,
3862306a36Sopenharmony_ci			     unsigned int bytes, int nrounds)
3962306a36Sopenharmony_ci{
4062306a36Sopenharmony_ci	unsigned int l = bytes & ~0x0FF;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	if (l > 0) {
4362306a36Sopenharmony_ci		chacha_p10le_8x(state, dst, src, l, nrounds);
4462306a36Sopenharmony_ci		bytes -= l;
4562306a36Sopenharmony_ci		src += l;
4662306a36Sopenharmony_ci		dst += l;
4762306a36Sopenharmony_ci		state[12] += l / CHACHA_BLOCK_SIZE;
4862306a36Sopenharmony_ci	}
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci	if (bytes > 0)
5162306a36Sopenharmony_ci		chacha_crypt_generic(state, dst, src, bytes, nrounds);
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_civoid hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
5562306a36Sopenharmony_ci{
5662306a36Sopenharmony_ci	hchacha_block_generic(state, stream, nrounds);
5762306a36Sopenharmony_ci}
5862306a36Sopenharmony_ciEXPORT_SYMBOL(hchacha_block_arch);
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_civoid chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
6162306a36Sopenharmony_ci{
6262306a36Sopenharmony_ci	chacha_init_generic(state, key, iv);
6362306a36Sopenharmony_ci}
6462306a36Sopenharmony_ciEXPORT_SYMBOL(chacha_init_arch);
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_civoid chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
6762306a36Sopenharmony_ci		       int nrounds)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE ||
7062306a36Sopenharmony_ci	    !crypto_simd_usable())
7162306a36Sopenharmony_ci		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_ci	do {
7462306a36Sopenharmony_ci		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci		vsx_begin();
7762306a36Sopenharmony_ci		chacha_p10_do_8x(state, dst, src, todo, nrounds);
7862306a36Sopenharmony_ci		vsx_end();
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci		bytes -= todo;
8162306a36Sopenharmony_ci		src += todo;
8262306a36Sopenharmony_ci		dst += todo;
8362306a36Sopenharmony_ci	} while (bytes);
8462306a36Sopenharmony_ci}
8562306a36Sopenharmony_ciEXPORT_SYMBOL(chacha_crypt_arch);
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_cistatic int chacha_p10_stream_xor(struct skcipher_request *req,
8862306a36Sopenharmony_ci				 const struct chacha_ctx *ctx, const u8 *iv)
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	struct skcipher_walk walk;
9162306a36Sopenharmony_ci	u32 state[16];
9262306a36Sopenharmony_ci	int err;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
9562306a36Sopenharmony_ci	if (err)
9662306a36Sopenharmony_ci		return err;
9762306a36Sopenharmony_ci
9862306a36Sopenharmony_ci	chacha_init_generic(state, ctx->key, iv);
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	while (walk.nbytes > 0) {
10162306a36Sopenharmony_ci		unsigned int nbytes = walk.nbytes;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci		if (nbytes < walk.total)
10462306a36Sopenharmony_ci			nbytes = rounddown(nbytes, walk.stride);
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci		if (!crypto_simd_usable()) {
10762306a36Sopenharmony_ci			chacha_crypt_generic(state, walk.dst.virt.addr,
10862306a36Sopenharmony_ci					     walk.src.virt.addr, nbytes,
10962306a36Sopenharmony_ci					     ctx->nrounds);
11062306a36Sopenharmony_ci		} else {
11162306a36Sopenharmony_ci			vsx_begin();
11262306a36Sopenharmony_ci			chacha_p10_do_8x(state, walk.dst.virt.addr,
11362306a36Sopenharmony_ci				      walk.src.virt.addr, nbytes, ctx->nrounds);
11462306a36Sopenharmony_ci			vsx_end();
11562306a36Sopenharmony_ci		}
11662306a36Sopenharmony_ci		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
11762306a36Sopenharmony_ci		if (err)
11862306a36Sopenharmony_ci			break;
11962306a36Sopenharmony_ci	}
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	return err;
12262306a36Sopenharmony_ci}
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_cistatic int chacha_p10(struct skcipher_request *req)
12562306a36Sopenharmony_ci{
12662306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
12762306a36Sopenharmony_ci	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_ci	return chacha_p10_stream_xor(req, ctx, req->iv);
13062306a36Sopenharmony_ci}
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_cistatic int xchacha_p10(struct skcipher_request *req)
13362306a36Sopenharmony_ci{
13462306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
13562306a36Sopenharmony_ci	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
13662306a36Sopenharmony_ci	struct chacha_ctx subctx;
13762306a36Sopenharmony_ci	u32 state[16];
13862306a36Sopenharmony_ci	u8 real_iv[16];
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	chacha_init_generic(state, ctx->key, req->iv);
14162306a36Sopenharmony_ci	hchacha_block_arch(state, subctx.key, ctx->nrounds);
14262306a36Sopenharmony_ci	subctx.nrounds = ctx->nrounds;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	memcpy(&real_iv[0], req->iv + 24, 8);
14562306a36Sopenharmony_ci	memcpy(&real_iv[8], req->iv + 16, 8);
14662306a36Sopenharmony_ci	return chacha_p10_stream_xor(req, &subctx, real_iv);
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_cistatic struct skcipher_alg algs[] = {
15062306a36Sopenharmony_ci	{
15162306a36Sopenharmony_ci		.base.cra_name		= "chacha20",
15262306a36Sopenharmony_ci		.base.cra_driver_name	= "chacha20-p10",
15362306a36Sopenharmony_ci		.base.cra_priority	= 300,
15462306a36Sopenharmony_ci		.base.cra_blocksize	= 1,
15562306a36Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
15662306a36Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
15962306a36Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
16062306a36Sopenharmony_ci		.ivsize			= CHACHA_IV_SIZE,
16162306a36Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
16262306a36Sopenharmony_ci		.setkey			= chacha20_setkey,
16362306a36Sopenharmony_ci		.encrypt		= chacha_p10,
16462306a36Sopenharmony_ci		.decrypt		= chacha_p10,
16562306a36Sopenharmony_ci	}, {
16662306a36Sopenharmony_ci		.base.cra_name		= "xchacha20",
16762306a36Sopenharmony_ci		.base.cra_driver_name	= "xchacha20-p10",
16862306a36Sopenharmony_ci		.base.cra_priority	= 300,
16962306a36Sopenharmony_ci		.base.cra_blocksize	= 1,
17062306a36Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
17162306a36Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
17462306a36Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
17562306a36Sopenharmony_ci		.ivsize			= XCHACHA_IV_SIZE,
17662306a36Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
17762306a36Sopenharmony_ci		.setkey			= chacha20_setkey,
17862306a36Sopenharmony_ci		.encrypt		= xchacha_p10,
17962306a36Sopenharmony_ci		.decrypt		= xchacha_p10,
18062306a36Sopenharmony_ci	}, {
18162306a36Sopenharmony_ci		.base.cra_name		= "xchacha12",
18262306a36Sopenharmony_ci		.base.cra_driver_name	= "xchacha12-p10",
18362306a36Sopenharmony_ci		.base.cra_priority	= 300,
18462306a36Sopenharmony_ci		.base.cra_blocksize	= 1,
18562306a36Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
18662306a36Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
18962306a36Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
19062306a36Sopenharmony_ci		.ivsize			= XCHACHA_IV_SIZE,
19162306a36Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
19262306a36Sopenharmony_ci		.setkey			= chacha12_setkey,
19362306a36Sopenharmony_ci		.encrypt		= xchacha_p10,
19462306a36Sopenharmony_ci		.decrypt		= xchacha_p10,
19562306a36Sopenharmony_ci	}
19662306a36Sopenharmony_ci};
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_cistatic int __init chacha_p10_init(void)
19962306a36Sopenharmony_ci{
20062306a36Sopenharmony_ci	static_branch_enable(&have_p10);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_cistatic void __exit chacha_p10_exit(void)
20662306a36Sopenharmony_ci{
20762306a36Sopenharmony_ci	crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
20862306a36Sopenharmony_ci}
20962306a36Sopenharmony_ci
21062306a36Sopenharmony_cimodule_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init);
21162306a36Sopenharmony_cimodule_exit(chacha_p10_exit);
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ciMODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)");
21462306a36Sopenharmony_ciMODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>");
21562306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
21662306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("chacha20");
21762306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("chacha20-p10");
21862306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha20");
21962306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha20-p10");
22062306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha12");
22162306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha12-p10");
222