162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
362306a36Sopenharmony_ci * including ChaCha20 (RFC7539)
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
862306a36Sopenharmony_ci * it under the terms of the GNU General Public License version 2 as
962306a36Sopenharmony_ci * published by the Free Software Foundation.
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * Based on:
1262306a36Sopenharmony_ci * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * Copyright (C) 2015 Martin Willi
1562306a36Sopenharmony_ci *
1662306a36Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
1762306a36Sopenharmony_ci * it under the terms of the GNU General Public License as published by
1862306a36Sopenharmony_ci * the Free Software Foundation; either version 2 of the License, or
1962306a36Sopenharmony_ci * (at your option) any later version.
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci#include <crypto/algapi.h>
2362306a36Sopenharmony_ci#include <crypto/internal/chacha.h>
2462306a36Sopenharmony_ci#include <crypto/internal/simd.h>
2562306a36Sopenharmony_ci#include <crypto/internal/skcipher.h>
2662306a36Sopenharmony_ci#include <linux/jump_label.h>
2762306a36Sopenharmony_ci#include <linux/kernel.h>
2862306a36Sopenharmony_ci#include <linux/module.h>
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#include <asm/hwcap.h>
3162306a36Sopenharmony_ci#include <asm/neon.h>
3262306a36Sopenharmony_ci#include <asm/simd.h>
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ciasmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
3562306a36Sopenharmony_ci				      int nrounds);
3662306a36Sopenharmony_ciasmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
3762306a36Sopenharmony_ci				       int nrounds, int bytes);
3862306a36Sopenharmony_ciasmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_cistatic void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
4362306a36Sopenharmony_ci			  int bytes, int nrounds)
4462306a36Sopenharmony_ci{
4562306a36Sopenharmony_ci	while (bytes > 0) {
4662306a36Sopenharmony_ci		int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci		if (l <= CHACHA_BLOCK_SIZE) {
4962306a36Sopenharmony_ci			u8 buf[CHACHA_BLOCK_SIZE];
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci			memcpy(buf, src, l);
5262306a36Sopenharmony_ci			chacha_block_xor_neon(state, buf, buf, nrounds);
5362306a36Sopenharmony_ci			memcpy(dst, buf, l);
5462306a36Sopenharmony_ci			state[12] += 1;
5562306a36Sopenharmony_ci			break;
5662306a36Sopenharmony_ci		}
5762306a36Sopenharmony_ci		chacha_4block_xor_neon(state, dst, src, nrounds, l);
5862306a36Sopenharmony_ci		bytes -= l;
5962306a36Sopenharmony_ci		src += l;
6062306a36Sopenharmony_ci		dst += l;
6162306a36Sopenharmony_ci		state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
6262306a36Sopenharmony_ci	}
6362306a36Sopenharmony_ci}
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_civoid hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
6662306a36Sopenharmony_ci{
6762306a36Sopenharmony_ci	if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
6862306a36Sopenharmony_ci		hchacha_block_generic(state, stream, nrounds);
6962306a36Sopenharmony_ci	} else {
7062306a36Sopenharmony_ci		kernel_neon_begin();
7162306a36Sopenharmony_ci		hchacha_block_neon(state, stream, nrounds);
7262306a36Sopenharmony_ci		kernel_neon_end();
7362306a36Sopenharmony_ci	}
7462306a36Sopenharmony_ci}
7562306a36Sopenharmony_ciEXPORT_SYMBOL(hchacha_block_arch);
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_civoid chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
7862306a36Sopenharmony_ci{
7962306a36Sopenharmony_ci	chacha_init_generic(state, key, iv);
8062306a36Sopenharmony_ci}
8162306a36Sopenharmony_ciEXPORT_SYMBOL(chacha_init_arch);
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_civoid chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
8462306a36Sopenharmony_ci		       int nrounds)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
8762306a36Sopenharmony_ci	    !crypto_simd_usable())
8862306a36Sopenharmony_ci		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	do {
9162306a36Sopenharmony_ci		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci		kernel_neon_begin();
9462306a36Sopenharmony_ci		chacha_doneon(state, dst, src, todo, nrounds);
9562306a36Sopenharmony_ci		kernel_neon_end();
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci		bytes -= todo;
9862306a36Sopenharmony_ci		src += todo;
9962306a36Sopenharmony_ci		dst += todo;
10062306a36Sopenharmony_ci	} while (bytes);
10162306a36Sopenharmony_ci}
10262306a36Sopenharmony_ciEXPORT_SYMBOL(chacha_crypt_arch);
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_cistatic int chacha_neon_stream_xor(struct skcipher_request *req,
10562306a36Sopenharmony_ci				  const struct chacha_ctx *ctx, const u8 *iv)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	struct skcipher_walk walk;
10862306a36Sopenharmony_ci	u32 state[16];
10962306a36Sopenharmony_ci	int err;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	chacha_init_generic(state, ctx->key, iv);
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	while (walk.nbytes > 0) {
11662306a36Sopenharmony_ci		unsigned int nbytes = walk.nbytes;
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci		if (nbytes < walk.total)
11962306a36Sopenharmony_ci			nbytes = rounddown(nbytes, walk.stride);
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci		if (!static_branch_likely(&have_neon) ||
12262306a36Sopenharmony_ci		    !crypto_simd_usable()) {
12362306a36Sopenharmony_ci			chacha_crypt_generic(state, walk.dst.virt.addr,
12462306a36Sopenharmony_ci					     walk.src.virt.addr, nbytes,
12562306a36Sopenharmony_ci					     ctx->nrounds);
12662306a36Sopenharmony_ci		} else {
12762306a36Sopenharmony_ci			kernel_neon_begin();
12862306a36Sopenharmony_ci			chacha_doneon(state, walk.dst.virt.addr,
12962306a36Sopenharmony_ci				      walk.src.virt.addr, nbytes, ctx->nrounds);
13062306a36Sopenharmony_ci			kernel_neon_end();
13162306a36Sopenharmony_ci		}
13262306a36Sopenharmony_ci		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
13362306a36Sopenharmony_ci	}
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	return err;
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_cistatic int chacha_neon(struct skcipher_request *req)
13962306a36Sopenharmony_ci{
14062306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
14162306a36Sopenharmony_ci	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	return chacha_neon_stream_xor(req, ctx, req->iv);
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_cistatic int xchacha_neon(struct skcipher_request *req)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
14962306a36Sopenharmony_ci	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
15062306a36Sopenharmony_ci	struct chacha_ctx subctx;
15162306a36Sopenharmony_ci	u32 state[16];
15262306a36Sopenharmony_ci	u8 real_iv[16];
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	chacha_init_generic(state, ctx->key, req->iv);
15562306a36Sopenharmony_ci	hchacha_block_arch(state, subctx.key, ctx->nrounds);
15662306a36Sopenharmony_ci	subctx.nrounds = ctx->nrounds;
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	memcpy(&real_iv[0], req->iv + 24, 8);
15962306a36Sopenharmony_ci	memcpy(&real_iv[8], req->iv + 16, 8);
16062306a36Sopenharmony_ci	return chacha_neon_stream_xor(req, &subctx, real_iv);
16162306a36Sopenharmony_ci}
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_cistatic struct skcipher_alg algs[] = {
16462306a36Sopenharmony_ci	{
16562306a36Sopenharmony_ci		.base.cra_name		= "chacha20",
16662306a36Sopenharmony_ci		.base.cra_driver_name	= "chacha20-neon",
16762306a36Sopenharmony_ci		.base.cra_priority	= 300,
16862306a36Sopenharmony_ci		.base.cra_blocksize	= 1,
16962306a36Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
17062306a36Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
17362306a36Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
17462306a36Sopenharmony_ci		.ivsize			= CHACHA_IV_SIZE,
17562306a36Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
17662306a36Sopenharmony_ci		.walksize		= 5 * CHACHA_BLOCK_SIZE,
17762306a36Sopenharmony_ci		.setkey			= chacha20_setkey,
17862306a36Sopenharmony_ci		.encrypt		= chacha_neon,
17962306a36Sopenharmony_ci		.decrypt		= chacha_neon,
18062306a36Sopenharmony_ci	}, {
18162306a36Sopenharmony_ci		.base.cra_name		= "xchacha20",
18262306a36Sopenharmony_ci		.base.cra_driver_name	= "xchacha20-neon",
18362306a36Sopenharmony_ci		.base.cra_priority	= 300,
18462306a36Sopenharmony_ci		.base.cra_blocksize	= 1,
18562306a36Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
18662306a36Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
18962306a36Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
19062306a36Sopenharmony_ci		.ivsize			= XCHACHA_IV_SIZE,
19162306a36Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
19262306a36Sopenharmony_ci		.walksize		= 5 * CHACHA_BLOCK_SIZE,
19362306a36Sopenharmony_ci		.setkey			= chacha20_setkey,
19462306a36Sopenharmony_ci		.encrypt		= xchacha_neon,
19562306a36Sopenharmony_ci		.decrypt		= xchacha_neon,
19662306a36Sopenharmony_ci	}, {
19762306a36Sopenharmony_ci		.base.cra_name		= "xchacha12",
19862306a36Sopenharmony_ci		.base.cra_driver_name	= "xchacha12-neon",
19962306a36Sopenharmony_ci		.base.cra_priority	= 300,
20062306a36Sopenharmony_ci		.base.cra_blocksize	= 1,
20162306a36Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
20262306a36Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
20562306a36Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
20662306a36Sopenharmony_ci		.ivsize			= XCHACHA_IV_SIZE,
20762306a36Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
20862306a36Sopenharmony_ci		.walksize		= 5 * CHACHA_BLOCK_SIZE,
20962306a36Sopenharmony_ci		.setkey			= chacha12_setkey,
21062306a36Sopenharmony_ci		.encrypt		= xchacha_neon,
21162306a36Sopenharmony_ci		.decrypt		= xchacha_neon,
21262306a36Sopenharmony_ci	}
21362306a36Sopenharmony_ci};
21462306a36Sopenharmony_ci
21562306a36Sopenharmony_cistatic int __init chacha_simd_mod_init(void)
21662306a36Sopenharmony_ci{
21762306a36Sopenharmony_ci	if (!cpu_have_named_feature(ASIMD))
21862306a36Sopenharmony_ci		return 0;
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	static_branch_enable(&have_neon);
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
22362306a36Sopenharmony_ci		crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_cistatic void __exit chacha_simd_mod_fini(void)
22762306a36Sopenharmony_ci{
22862306a36Sopenharmony_ci	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD))
22962306a36Sopenharmony_ci		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
23062306a36Sopenharmony_ci}
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_cimodule_init(chacha_simd_mod_init);
23362306a36Sopenharmony_cimodule_exit(chacha_simd_mod_fini);
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ciMODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
23662306a36Sopenharmony_ciMODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
23762306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
23862306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("chacha20");
23962306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("chacha20-neon");
24062306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha20");
24162306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha20-neon");
24262306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha12");
24362306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha12-neon");
244