18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers, 38c2ecf20Sopenharmony_ci * including ChaCha20 (RFC7539) 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org> 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 88c2ecf20Sopenharmony_ci * it under the terms of the GNU General Public License version 2 as 98c2ecf20Sopenharmony_ci * published by the Free Software Foundation. 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * Based on: 128c2ecf20Sopenharmony_ci * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * Copyright (C) 2015 Martin Willi 158c2ecf20Sopenharmony_ci * 168c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 178c2ecf20Sopenharmony_ci * it under the terms of the GNU General Public License as published by 188c2ecf20Sopenharmony_ci * the Free Software Foundation; either version 2 of the License, or 198c2ecf20Sopenharmony_ci * (at your option) any later version. 208c2ecf20Sopenharmony_ci */ 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci#include <crypto/algapi.h> 238c2ecf20Sopenharmony_ci#include <crypto/internal/chacha.h> 248c2ecf20Sopenharmony_ci#include <crypto/internal/simd.h> 258c2ecf20Sopenharmony_ci#include <crypto/internal/skcipher.h> 268c2ecf20Sopenharmony_ci#include <linux/jump_label.h> 278c2ecf20Sopenharmony_ci#include <linux/kernel.h> 288c2ecf20Sopenharmony_ci#include <linux/module.h> 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#include <asm/hwcap.h> 318c2ecf20Sopenharmony_ci#include <asm/neon.h> 328c2ecf20Sopenharmony_ci#include <asm/simd.h> 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ciasmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src, 358c2ecf20Sopenharmony_ci int nrounds); 368c2ecf20Sopenharmony_ciasmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src, 378c2ecf20Sopenharmony_ci int nrounds, int bytes); 388c2ecf20Sopenharmony_ciasmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_cistatic void chacha_doneon(u32 *state, u8 *dst, const u8 *src, 438c2ecf20Sopenharmony_ci int bytes, int nrounds) 448c2ecf20Sopenharmony_ci{ 458c2ecf20Sopenharmony_ci while (bytes > 0) { 468c2ecf20Sopenharmony_ci int l = min(bytes, CHACHA_BLOCK_SIZE * 5); 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci if (l <= CHACHA_BLOCK_SIZE) { 498c2ecf20Sopenharmony_ci u8 buf[CHACHA_BLOCK_SIZE]; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci memcpy(buf, src, l); 528c2ecf20Sopenharmony_ci chacha_block_xor_neon(state, buf, buf, nrounds); 538c2ecf20Sopenharmony_ci memcpy(dst, buf, l); 548c2ecf20Sopenharmony_ci state[12] += 1; 558c2ecf20Sopenharmony_ci break; 568c2ecf20Sopenharmony_ci } 578c2ecf20Sopenharmony_ci chacha_4block_xor_neon(state, dst, src, nrounds, l); 588c2ecf20Sopenharmony_ci bytes -= l; 598c2ecf20Sopenharmony_ci src += l; 608c2ecf20Sopenharmony_ci dst += l; 618c2ecf20Sopenharmony_ci state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); 628c2ecf20Sopenharmony_ci } 638c2ecf20Sopenharmony_ci} 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_civoid hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) 668c2ecf20Sopenharmony_ci{ 678c2ecf20Sopenharmony_ci if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) { 688c2ecf20Sopenharmony_ci hchacha_block_generic(state, stream, nrounds); 698c2ecf20Sopenharmony_ci } else { 708c2ecf20Sopenharmony_ci kernel_neon_begin(); 718c2ecf20Sopenharmony_ci hchacha_block_neon(state, stream, nrounds); 728c2ecf20Sopenharmony_ci kernel_neon_end(); 738c2ecf20Sopenharmony_ci } 748c2ecf20Sopenharmony_ci} 758c2ecf20Sopenharmony_ciEXPORT_SYMBOL(hchacha_block_arch); 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_civoid chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) 788c2ecf20Sopenharmony_ci{ 798c2ecf20Sopenharmony_ci chacha_init_generic(state, key, iv); 808c2ecf20Sopenharmony_ci} 818c2ecf20Sopenharmony_ciEXPORT_SYMBOL(chacha_init_arch); 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_civoid chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, 848c2ecf20Sopenharmony_ci int nrounds) 858c2ecf20Sopenharmony_ci{ 868c2ecf20Sopenharmony_ci if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE || 878c2ecf20Sopenharmony_ci !crypto_simd_usable()) 888c2ecf20Sopenharmony_ci return chacha_crypt_generic(state, dst, src, bytes, nrounds); 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci do { 918c2ecf20Sopenharmony_ci unsigned int todo = min_t(unsigned int, bytes, SZ_4K); 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci kernel_neon_begin(); 948c2ecf20Sopenharmony_ci chacha_doneon(state, dst, src, todo, nrounds); 958c2ecf20Sopenharmony_ci kernel_neon_end(); 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_ci bytes -= todo; 988c2ecf20Sopenharmony_ci src += todo; 998c2ecf20Sopenharmony_ci dst += todo; 1008c2ecf20Sopenharmony_ci } while (bytes); 1018c2ecf20Sopenharmony_ci} 1028c2ecf20Sopenharmony_ciEXPORT_SYMBOL(chacha_crypt_arch); 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_cistatic int chacha_neon_stream_xor(struct skcipher_request *req, 1058c2ecf20Sopenharmony_ci const struct chacha_ctx *ctx, const u8 *iv) 1068c2ecf20Sopenharmony_ci{ 1078c2ecf20Sopenharmony_ci struct skcipher_walk walk; 1088c2ecf20Sopenharmony_ci u32 state[16]; 1098c2ecf20Sopenharmony_ci int err; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci err = skcipher_walk_virt(&walk, req, false); 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci chacha_init_generic(state, ctx->key, iv); 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci while (walk.nbytes > 0) { 1168c2ecf20Sopenharmony_ci unsigned int nbytes = walk.nbytes; 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci if (nbytes < walk.total) 1198c2ecf20Sopenharmony_ci nbytes = rounddown(nbytes, walk.stride); 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci if (!static_branch_likely(&have_neon) || 1228c2ecf20Sopenharmony_ci !crypto_simd_usable()) { 1238c2ecf20Sopenharmony_ci chacha_crypt_generic(state, walk.dst.virt.addr, 1248c2ecf20Sopenharmony_ci walk.src.virt.addr, nbytes, 1258c2ecf20Sopenharmony_ci ctx->nrounds); 1268c2ecf20Sopenharmony_ci } else { 1278c2ecf20Sopenharmony_ci kernel_neon_begin(); 1288c2ecf20Sopenharmony_ci chacha_doneon(state, walk.dst.virt.addr, 1298c2ecf20Sopenharmony_ci walk.src.virt.addr, nbytes, ctx->nrounds); 1308c2ecf20Sopenharmony_ci kernel_neon_end(); 1318c2ecf20Sopenharmony_ci } 1328c2ecf20Sopenharmony_ci err = skcipher_walk_done(&walk, walk.nbytes - nbytes); 1338c2ecf20Sopenharmony_ci } 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci return err; 1368c2ecf20Sopenharmony_ci} 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_cistatic int chacha_neon(struct skcipher_request *req) 1398c2ecf20Sopenharmony_ci{ 1408c2ecf20Sopenharmony_ci struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 1418c2ecf20Sopenharmony_ci struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci return chacha_neon_stream_xor(req, ctx, req->iv); 1448c2ecf20Sopenharmony_ci} 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_cistatic int xchacha_neon(struct skcipher_request *req) 1478c2ecf20Sopenharmony_ci{ 1488c2ecf20Sopenharmony_ci struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); 1498c2ecf20Sopenharmony_ci struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); 1508c2ecf20Sopenharmony_ci struct chacha_ctx subctx; 1518c2ecf20Sopenharmony_ci u32 state[16]; 1528c2ecf20Sopenharmony_ci u8 real_iv[16]; 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_ci chacha_init_generic(state, ctx->key, req->iv); 1558c2ecf20Sopenharmony_ci hchacha_block_arch(state, subctx.key, ctx->nrounds); 1568c2ecf20Sopenharmony_ci subctx.nrounds = ctx->nrounds; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci memcpy(&real_iv[0], req->iv + 24, 8); 1598c2ecf20Sopenharmony_ci memcpy(&real_iv[8], req->iv + 16, 8); 1608c2ecf20Sopenharmony_ci return chacha_neon_stream_xor(req, &subctx, real_iv); 1618c2ecf20Sopenharmony_ci} 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_cistatic struct skcipher_alg algs[] = { 1648c2ecf20Sopenharmony_ci { 1658c2ecf20Sopenharmony_ci .base.cra_name = "chacha20", 1668c2ecf20Sopenharmony_ci .base.cra_driver_name = "chacha20-neon", 1678c2ecf20Sopenharmony_ci .base.cra_priority = 300, 1688c2ecf20Sopenharmony_ci .base.cra_blocksize = 1, 1698c2ecf20Sopenharmony_ci .base.cra_ctxsize = sizeof(struct chacha_ctx), 1708c2ecf20Sopenharmony_ci .base.cra_module = THIS_MODULE, 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_ci .min_keysize = CHACHA_KEY_SIZE, 1738c2ecf20Sopenharmony_ci .max_keysize = CHACHA_KEY_SIZE, 1748c2ecf20Sopenharmony_ci .ivsize = CHACHA_IV_SIZE, 1758c2ecf20Sopenharmony_ci .chunksize = CHACHA_BLOCK_SIZE, 1768c2ecf20Sopenharmony_ci .walksize = 5 * CHACHA_BLOCK_SIZE, 1778c2ecf20Sopenharmony_ci .setkey = chacha20_setkey, 1788c2ecf20Sopenharmony_ci .encrypt = chacha_neon, 1798c2ecf20Sopenharmony_ci .decrypt = chacha_neon, 1808c2ecf20Sopenharmony_ci }, { 1818c2ecf20Sopenharmony_ci .base.cra_name = "xchacha20", 1828c2ecf20Sopenharmony_ci .base.cra_driver_name = "xchacha20-neon", 1838c2ecf20Sopenharmony_ci .base.cra_priority = 300, 1848c2ecf20Sopenharmony_ci .base.cra_blocksize = 1, 1858c2ecf20Sopenharmony_ci .base.cra_ctxsize = sizeof(struct chacha_ctx), 1868c2ecf20Sopenharmony_ci .base.cra_module = THIS_MODULE, 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci .min_keysize = CHACHA_KEY_SIZE, 1898c2ecf20Sopenharmony_ci .max_keysize = CHACHA_KEY_SIZE, 1908c2ecf20Sopenharmony_ci .ivsize = XCHACHA_IV_SIZE, 1918c2ecf20Sopenharmony_ci .chunksize = CHACHA_BLOCK_SIZE, 1928c2ecf20Sopenharmony_ci .walksize = 5 * CHACHA_BLOCK_SIZE, 1938c2ecf20Sopenharmony_ci .setkey = chacha20_setkey, 1948c2ecf20Sopenharmony_ci .encrypt = xchacha_neon, 1958c2ecf20Sopenharmony_ci .decrypt = xchacha_neon, 1968c2ecf20Sopenharmony_ci }, { 1978c2ecf20Sopenharmony_ci .base.cra_name = "xchacha12", 1988c2ecf20Sopenharmony_ci .base.cra_driver_name = "xchacha12-neon", 1998c2ecf20Sopenharmony_ci .base.cra_priority = 300, 2008c2ecf20Sopenharmony_ci .base.cra_blocksize = 1, 2018c2ecf20Sopenharmony_ci .base.cra_ctxsize = sizeof(struct chacha_ctx), 2028c2ecf20Sopenharmony_ci .base.cra_module = THIS_MODULE, 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci .min_keysize = CHACHA_KEY_SIZE, 2058c2ecf20Sopenharmony_ci .max_keysize = CHACHA_KEY_SIZE, 2068c2ecf20Sopenharmony_ci .ivsize = XCHACHA_IV_SIZE, 2078c2ecf20Sopenharmony_ci .chunksize = CHACHA_BLOCK_SIZE, 2088c2ecf20Sopenharmony_ci .walksize = 5 * CHACHA_BLOCK_SIZE, 2098c2ecf20Sopenharmony_ci .setkey = chacha12_setkey, 2108c2ecf20Sopenharmony_ci .encrypt = xchacha_neon, 2118c2ecf20Sopenharmony_ci .decrypt = xchacha_neon, 2128c2ecf20Sopenharmony_ci } 2138c2ecf20Sopenharmony_ci}; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_cistatic int __init chacha_simd_mod_init(void) 2168c2ecf20Sopenharmony_ci{ 2178c2ecf20Sopenharmony_ci if (!cpu_have_named_feature(ASIMD)) 2188c2ecf20Sopenharmony_ci return 0; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci static_branch_enable(&have_neon); 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? 2238c2ecf20Sopenharmony_ci crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cistatic void __exit chacha_simd_mod_fini(void) 2278c2ecf20Sopenharmony_ci{ 2288c2ecf20Sopenharmony_ci if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) && cpu_have_named_feature(ASIMD)) 2298c2ecf20Sopenharmony_ci crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); 2308c2ecf20Sopenharmony_ci} 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_cimodule_init(chacha_simd_mod_init); 2338c2ecf20Sopenharmony_cimodule_exit(chacha_simd_mod_fini); 2348c2ecf20Sopenharmony_ci 2358c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)"); 2368c2ecf20Sopenharmony_ciMODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); 2378c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2"); 2388c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("chacha20"); 2398c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("chacha20-neon"); 2408c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha20"); 2418c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha20-neon"); 2428c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha12"); 2438c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("xchacha12-neon"); 244