18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
38c2ecf20Sopenharmony_ci * including ChaCha20 (RFC7539)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
88c2ecf20Sopenharmony_ci * it under the terms of the GNU General Public License version 2 as
98c2ecf20Sopenharmony_ci * published by the Free Software Foundation.
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * Based on:
128c2ecf20Sopenharmony_ci * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci * Copyright (C) 2015 Martin Willi
158c2ecf20Sopenharmony_ci *
168c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
178c2ecf20Sopenharmony_ci * it under the terms of the GNU General Public License as published by
188c2ecf20Sopenharmony_ci * the Free Software Foundation; either version 2 of the License, or
198c2ecf20Sopenharmony_ci * (at your option) any later version.
208c2ecf20Sopenharmony_ci */
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci#include <crypto/algapi.h>
238c2ecf20Sopenharmony_ci#include <crypto/internal/chacha.h>
248c2ecf20Sopenharmony_ci#include <crypto/internal/simd.h>
258c2ecf20Sopenharmony_ci#include <crypto/internal/skcipher.h>
268c2ecf20Sopenharmony_ci#include <linux/jump_label.h>
278c2ecf20Sopenharmony_ci#include <linux/kernel.h>
288c2ecf20Sopenharmony_ci#include <linux/module.h>
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#include <asm/hwcap.h>
318c2ecf20Sopenharmony_ci#include <asm/neon.h>
328c2ecf20Sopenharmony_ci#include <asm/simd.h>
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ciasmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
358c2ecf20Sopenharmony_ci				      int nrounds);
368c2ecf20Sopenharmony_ciasmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
378c2ecf20Sopenharmony_ci				       int nrounds, int bytes);
388c2ecf20Sopenharmony_ciasmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_cistatic void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
438c2ecf20Sopenharmony_ci			  int bytes, int nrounds)
448c2ecf20Sopenharmony_ci{
458c2ecf20Sopenharmony_ci	while (bytes > 0) {
468c2ecf20Sopenharmony_ci		int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci		if (l <= CHACHA_BLOCK_SIZE) {
498c2ecf20Sopenharmony_ci			u8 buf[CHACHA_BLOCK_SIZE];
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_ci			memcpy(buf, src, l);
528c2ecf20Sopenharmony_ci			chacha_block_xor_neon(state, buf, buf, nrounds);
538c2ecf20Sopenharmony_ci			memcpy(dst, buf, l);
548c2ecf20Sopenharmony_ci			state[12] += 1;
558c2ecf20Sopenharmony_ci			break;
568c2ecf20Sopenharmony_ci		}
578c2ecf20Sopenharmony_ci		chacha_4block_xor_neon(state, dst, src, nrounds, l);
588c2ecf20Sopenharmony_ci		bytes -= l;
598c2ecf20Sopenharmony_ci		src += l;
608c2ecf20Sopenharmony_ci		dst += l;
618c2ecf20Sopenharmony_ci		state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
628c2ecf20Sopenharmony_ci	}
638c2ecf20Sopenharmony_ci}
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_civoid hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
668c2ecf20Sopenharmony_ci{
678c2ecf20Sopenharmony_ci	if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
688c2ecf20Sopenharmony_ci		hchacha_block_generic(state, stream, nrounds);
698c2ecf20Sopenharmony_ci	} else {
708c2ecf20Sopenharmony_ci		kernel_neon_begin();
718c2ecf20Sopenharmony_ci		hchacha_block_neon(state, stream, nrounds);
728c2ecf20Sopenharmony_ci		kernel_neon_end();
738c2ecf20Sopenharmony_ci	}
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ciEXPORT_SYMBOL(hchacha_block_arch);
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_civoid chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
788c2ecf20Sopenharmony_ci{
798c2ecf20Sopenharmony_ci	chacha_init_generic(state, key, iv);
808c2ecf20Sopenharmony_ci}
818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(chacha_init_arch);
828c2ecf20Sopenharmony_ci
838c2ecf20Sopenharmony_civoid chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
848c2ecf20Sopenharmony_ci		       int nrounds)
858c2ecf20Sopenharmony_ci{
868c2ecf20Sopenharmony_ci	if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
878c2ecf20Sopenharmony_ci	    !crypto_simd_usable())
888c2ecf20Sopenharmony_ci		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	do {
918c2ecf20Sopenharmony_ci		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci		kernel_neon_begin();
948c2ecf20Sopenharmony_ci		chacha_doneon(state, dst, src, todo, nrounds);
958c2ecf20Sopenharmony_ci		kernel_neon_end();
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci		bytes -= todo;
988c2ecf20Sopenharmony_ci		src += todo;
998c2ecf20Sopenharmony_ci		dst += todo;
1008c2ecf20Sopenharmony_ci	} while (bytes);
1018c2ecf20Sopenharmony_ci}
1028c2ecf20Sopenharmony_ciEXPORT_SYMBOL(chacha_crypt_arch);
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_cistatic int chacha_neon_stream_xor(struct skcipher_request *req,
1058c2ecf20Sopenharmony_ci				  const struct chacha_ctx *ctx, const u8 *iv)
1068c2ecf20Sopenharmony_ci{
1078c2ecf20Sopenharmony_ci	struct skcipher_walk walk;
1088c2ecf20Sopenharmony_ci	u32 state[16];
1098c2ecf20Sopenharmony_ci	int err;
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
1128c2ecf20Sopenharmony_ci
1138c2ecf20Sopenharmony_ci	chacha_init_generic(state, ctx->key, iv);
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	while (walk.nbytes > 0) {
1168c2ecf20Sopenharmony_ci		unsigned int nbytes = walk.nbytes;
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci		if (nbytes < walk.total)
1198c2ecf20Sopenharmony_ci			nbytes = rounddown(nbytes, walk.stride);
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci		if (!static_branch_likely(&have_neon) ||
1228c2ecf20Sopenharmony_ci		    !crypto_simd_usable()) {
1238c2ecf20Sopenharmony_ci			chacha_crypt_generic(state, walk.dst.virt.addr,
1248c2ecf20Sopenharmony_ci					     walk.src.virt.addr, nbytes,
1258c2ecf20Sopenharmony_ci					     ctx->nrounds);
1268c2ecf20Sopenharmony_ci		} else {
1278c2ecf20Sopenharmony_ci			kernel_neon_begin();
1288c2ecf20Sopenharmony_ci			chacha_doneon(state, walk.dst.virt.addr,
1298c2ecf20Sopenharmony_ci				      walk.src.virt.addr, nbytes, ctx->nrounds);
1308c2ecf20Sopenharmony_ci			kernel_neon_end();
1318c2ecf20Sopenharmony_ci		}
1328c2ecf20Sopenharmony_ci		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
1338c2ecf20Sopenharmony_ci	}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	return err;
1368c2ecf20Sopenharmony_ci}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_cistatic int chacha_neon(struct skcipher_request *req)
1398c2ecf20Sopenharmony_ci{
1408c2ecf20Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
1418c2ecf20Sopenharmony_ci	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	return chacha_neon_stream_xor(req, ctx, req->iv);
1448c2ecf20Sopenharmony_ci}
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_cistatic int xchacha_neon(struct skcipher_request *req)
1478c2ecf20Sopenharmony_ci{
1488c2ecf20Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
1498c2ecf20Sopenharmony_ci	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
1508c2ecf20Sopenharmony_ci	struct chacha_ctx subctx;
1518c2ecf20Sopenharmony_ci	u32 state[16];
1528c2ecf20Sopenharmony_ci	u8 real_iv[16];
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_ci	chacha_init_generic(state, ctx->key, req->iv);
1558c2ecf20Sopenharmony_ci	hchacha_block_arch(state, subctx.key, ctx->nrounds);
1568c2ecf20Sopenharmony_ci	subctx.nrounds = ctx->nrounds;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci	memcpy(&real_iv[0], req->iv + 24, 8);
1598c2ecf20Sopenharmony_ci	memcpy(&real_iv[8], req->iv + 16, 8);
1608c2ecf20Sopenharmony_ci	return chacha_neon_stream_xor(req, &subctx, real_iv);
1618c2ecf20Sopenharmony_ci}
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_cistatic struct skcipher_alg algs[] = {
1648c2ecf20Sopenharmony_ci	{
1658c2ecf20Sopenharmony_ci		.base.cra_name		= "chacha20",
1668c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "chacha20-neon",
1678c2ecf20Sopenharmony_ci		.base.cra_priority	= 300,
1688c2ecf20Sopenharmony_ci		.base.cra_blocksize	= 1,
1698c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
1708c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
1738c2ecf20Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
1748c2ecf20Sopenharmony_ci		.ivsize			= CHACHA_IV_SIZE,
1758c2ecf20Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
1768c2ecf20Sopenharmony_ci		.walksize		= 5 * CHACHA_BLOCK_SIZE,
1778c2ecf20Sopenharmony_ci		.setkey			= chacha20_setkey,
1788c2ecf20Sopenharmony_ci		.encrypt		= chacha_neon,
1798c2ecf20Sopenharmony_ci		.decrypt		= chacha_neon,
1808c2ecf20Sopenharmony_ci	}, {
1818c2ecf20Sopenharmony_ci		.base.cra_name		= "xchacha20",
1828c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "xchacha20-neon",
1838c2ecf20Sopenharmony_ci		.base.cra_priority	= 300,
1848c2ecf20Sopenharmony_ci		.base.cra_blocksize	= 1,
1858c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
1868c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
1898c2ecf20Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
1908c2ecf20Sopenharmony_ci		.ivsize			= XCHACHA_IV_SIZE,
1918c2ecf20Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
1928c2ecf20Sopenharmony_ci		.walksize		= 5 * CHACHA_BLOCK_SIZE,
1938c2ecf20Sopenharmony_ci		.setkey			= chacha20_setkey,
1948c2ecf20Sopenharmony_ci		.encrypt		= xchacha_neon,
1958c2ecf20Sopenharmony_ci		.decrypt		= xchacha_neon,
1968c2ecf20Sopenharmony_ci	}, {
1978c2ecf20Sopenharmony_ci		.base.cra_name		= "xchacha12",
1988c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "xchacha12-neon",
1998c2ecf20Sopenharmony_ci		.base.cra_priority	= 300,
2008c2ecf20Sopenharmony_ci		.base.cra_blocksize	= 1,
2018c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
2028c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci		.min_keysize		= CHACHA_KEY_SIZE,
2058c2ecf20Sopenharmony_ci		.max_keysize		= CHACHA_KEY_SIZE,
2068c2ecf20Sopenharmony_ci		.ivsize			= XCHACHA_IV_SIZE,
2078c2ecf20Sopenharmony_ci		.chunksize		= CHACHA_BLOCK_SIZE,
2088c2ecf20Sopenharmony_ci		.walksize		= 5 * CHACHA_BLOCK_SIZE,
2098c2ecf20Sopenharmony_ci		.setkey			= chacha12_setkey,
2108c2ecf20Sopenharmony_ci		.encrypt		= xchacha_neon,
2118c2ecf20Sopenharmony_ci		.decrypt		= xchacha_neon,
2128c2ecf20Sopenharmony_ci	}
2138c2ecf20Sopenharmony_ci};
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_cistatic int __init chacha_simd_mod_init(void)
2168c2ecf20Sopenharmony_ci{
2178c2ecf20Sopenharmony_ci	if (!cpu_have_named_feature(ASIMD))
2188c2ecf20Sopenharmony_ci		return 0;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	static_branch_enable(&have_neon);
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ?
2238c2ecf20Sopenharmony_ci		crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0;
2248c2ecf20Sopenharmony_ci}
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_cistatic void __exit chacha_simd_mod_fini(void)
2278c2ecf20Sopenharmony_ci{
2288c2ecf20Sopenharmony_ci	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD))
2298c2ecf20Sopenharmony_ci		crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
2308c2ecf20Sopenharmony_ci}
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_cimodule_init(chacha_simd_mod_init);
2338c2ecf20Sopenharmony_cimodule_exit(chacha_simd_mod_fini);
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
2368c2ecf20Sopenharmony_ciMODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
2378c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2");
2388c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("chacha20");
2398c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("chacha20-neon");
2408c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha20");
2418c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha20-neon");
2428c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha12");
2438c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha12-neon");
244