1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
4 * including ChaCha20 (RFC7539)
5 *
6 * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
7 * Copyright (C) 2015 Martin Willi
8 */
9
10#include <crypto/algapi.h>
11#include <crypto/internal/chacha.h>
12#include <crypto/internal/simd.h>
13#include <crypto/internal/skcipher.h>
14#include <linux/jump_label.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17
18#include <asm/cputype.h>
19#include <asm/hwcap.h>
20#include <asm/neon.h>
21#include <asm/simd.h>
22
23asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
24				      int nrounds);
25asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
26				       int nrounds);
27asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
28asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
29
30asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
31			     const u32 *state, int nrounds);
32
33static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
34
35static inline bool neon_usable(void)
36{
37	return static_branch_likely(&use_neon) && crypto_simd_usable();
38}
39
40static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
41			  unsigned int bytes, int nrounds)
42{
43	u8 buf[CHACHA_BLOCK_SIZE];
44
45	while (bytes >= CHACHA_BLOCK_SIZE * 4) {
46		chacha_4block_xor_neon(state, dst, src, nrounds);
47		bytes -= CHACHA_BLOCK_SIZE * 4;
48		src += CHACHA_BLOCK_SIZE * 4;
49		dst += CHACHA_BLOCK_SIZE * 4;
50		state[12] += 4;
51	}
52	while (bytes >= CHACHA_BLOCK_SIZE) {
53		chacha_block_xor_neon(state, dst, src, nrounds);
54		bytes -= CHACHA_BLOCK_SIZE;
55		src += CHACHA_BLOCK_SIZE;
56		dst += CHACHA_BLOCK_SIZE;
57		state[12]++;
58	}
59	if (bytes) {
60		memcpy(buf, src, bytes);
61		chacha_block_xor_neon(state, buf, buf, nrounds);
62		memcpy(dst, buf, bytes);
63	}
64}
65
66void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
67{
68	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
69		hchacha_block_arm(state, stream, nrounds);
70	} else {
71		kernel_neon_begin();
72		hchacha_block_neon(state, stream, nrounds);
73		kernel_neon_end();
74	}
75}
76EXPORT_SYMBOL(hchacha_block_arch);
77
78void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
79{
80	chacha_init_generic(state, key, iv);
81}
82EXPORT_SYMBOL(chacha_init_arch);
83
84void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
85		       int nrounds)
86{
87	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
88	    bytes <= CHACHA_BLOCK_SIZE) {
89		chacha_doarm(dst, src, bytes, state, nrounds);
90		state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
91		return;
92	}
93
94	do {
95		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
96
97		kernel_neon_begin();
98		chacha_doneon(state, dst, src, todo, nrounds);
99		kernel_neon_end();
100
101		bytes -= todo;
102		src += todo;
103		dst += todo;
104	} while (bytes);
105}
106EXPORT_SYMBOL(chacha_crypt_arch);
107
108static int chacha_stream_xor(struct skcipher_request *req,
109			     const struct chacha_ctx *ctx, const u8 *iv,
110			     bool neon)
111{
112	struct skcipher_walk walk;
113	u32 state[16];
114	int err;
115
116	err = skcipher_walk_virt(&walk, req, false);
117
118	chacha_init_generic(state, ctx->key, iv);
119
120	while (walk.nbytes > 0) {
121		unsigned int nbytes = walk.nbytes;
122
123		if (nbytes < walk.total)
124			nbytes = round_down(nbytes, walk.stride);
125
126		if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
127			chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
128				     nbytes, state, ctx->nrounds);
129			state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
130		} else {
131			kernel_neon_begin();
132			chacha_doneon(state, walk.dst.virt.addr,
133				      walk.src.virt.addr, nbytes, ctx->nrounds);
134			kernel_neon_end();
135		}
136		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
137	}
138
139	return err;
140}
141
142static int do_chacha(struct skcipher_request *req, bool neon)
143{
144	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
145	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
146
147	return chacha_stream_xor(req, ctx, req->iv, neon);
148}
149
150static int chacha_arm(struct skcipher_request *req)
151{
152	return do_chacha(req, false);
153}
154
155static int chacha_neon(struct skcipher_request *req)
156{
157	return do_chacha(req, neon_usable());
158}
159
160static int do_xchacha(struct skcipher_request *req, bool neon)
161{
162	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
163	struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
164	struct chacha_ctx subctx;
165	u32 state[16];
166	u8 real_iv[16];
167
168	chacha_init_generic(state, ctx->key, req->iv);
169
170	if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) {
171		hchacha_block_arm(state, subctx.key, ctx->nrounds);
172	} else {
173		kernel_neon_begin();
174		hchacha_block_neon(state, subctx.key, ctx->nrounds);
175		kernel_neon_end();
176	}
177	subctx.nrounds = ctx->nrounds;
178
179	memcpy(&real_iv[0], req->iv + 24, 8);
180	memcpy(&real_iv[8], req->iv + 16, 8);
181	return chacha_stream_xor(req, &subctx, real_iv, neon);
182}
183
184static int xchacha_arm(struct skcipher_request *req)
185{
186	return do_xchacha(req, false);
187}
188
189static int xchacha_neon(struct skcipher_request *req)
190{
191	return do_xchacha(req, neon_usable());
192}
193
194static struct skcipher_alg arm_algs[] = {
195	{
196		.base.cra_name		= "chacha20",
197		.base.cra_driver_name	= "chacha20-arm",
198		.base.cra_priority	= 200,
199		.base.cra_blocksize	= 1,
200		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
201		.base.cra_module	= THIS_MODULE,
202
203		.min_keysize		= CHACHA_KEY_SIZE,
204		.max_keysize		= CHACHA_KEY_SIZE,
205		.ivsize			= CHACHA_IV_SIZE,
206		.chunksize		= CHACHA_BLOCK_SIZE,
207		.setkey			= chacha20_setkey,
208		.encrypt		= chacha_arm,
209		.decrypt		= chacha_arm,
210	}, {
211		.base.cra_name		= "xchacha20",
212		.base.cra_driver_name	= "xchacha20-arm",
213		.base.cra_priority	= 200,
214		.base.cra_blocksize	= 1,
215		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
216		.base.cra_module	= THIS_MODULE,
217
218		.min_keysize		= CHACHA_KEY_SIZE,
219		.max_keysize		= CHACHA_KEY_SIZE,
220		.ivsize			= XCHACHA_IV_SIZE,
221		.chunksize		= CHACHA_BLOCK_SIZE,
222		.setkey			= chacha20_setkey,
223		.encrypt		= xchacha_arm,
224		.decrypt		= xchacha_arm,
225	}, {
226		.base.cra_name		= "xchacha12",
227		.base.cra_driver_name	= "xchacha12-arm",
228		.base.cra_priority	= 200,
229		.base.cra_blocksize	= 1,
230		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
231		.base.cra_module	= THIS_MODULE,
232
233		.min_keysize		= CHACHA_KEY_SIZE,
234		.max_keysize		= CHACHA_KEY_SIZE,
235		.ivsize			= XCHACHA_IV_SIZE,
236		.chunksize		= CHACHA_BLOCK_SIZE,
237		.setkey			= chacha12_setkey,
238		.encrypt		= xchacha_arm,
239		.decrypt		= xchacha_arm,
240	},
241};
242
243static struct skcipher_alg neon_algs[] = {
244	{
245		.base.cra_name		= "chacha20",
246		.base.cra_driver_name	= "chacha20-neon",
247		.base.cra_priority	= 300,
248		.base.cra_blocksize	= 1,
249		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
250		.base.cra_module	= THIS_MODULE,
251
252		.min_keysize		= CHACHA_KEY_SIZE,
253		.max_keysize		= CHACHA_KEY_SIZE,
254		.ivsize			= CHACHA_IV_SIZE,
255		.chunksize		= CHACHA_BLOCK_SIZE,
256		.walksize		= 4 * CHACHA_BLOCK_SIZE,
257		.setkey			= chacha20_setkey,
258		.encrypt		= chacha_neon,
259		.decrypt		= chacha_neon,
260	}, {
261		.base.cra_name		= "xchacha20",
262		.base.cra_driver_name	= "xchacha20-neon",
263		.base.cra_priority	= 300,
264		.base.cra_blocksize	= 1,
265		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
266		.base.cra_module	= THIS_MODULE,
267
268		.min_keysize		= CHACHA_KEY_SIZE,
269		.max_keysize		= CHACHA_KEY_SIZE,
270		.ivsize			= XCHACHA_IV_SIZE,
271		.chunksize		= CHACHA_BLOCK_SIZE,
272		.walksize		= 4 * CHACHA_BLOCK_SIZE,
273		.setkey			= chacha20_setkey,
274		.encrypt		= xchacha_neon,
275		.decrypt		= xchacha_neon,
276	}, {
277		.base.cra_name		= "xchacha12",
278		.base.cra_driver_name	= "xchacha12-neon",
279		.base.cra_priority	= 300,
280		.base.cra_blocksize	= 1,
281		.base.cra_ctxsize	= sizeof(struct chacha_ctx),
282		.base.cra_module	= THIS_MODULE,
283
284		.min_keysize		= CHACHA_KEY_SIZE,
285		.max_keysize		= CHACHA_KEY_SIZE,
286		.ivsize			= XCHACHA_IV_SIZE,
287		.chunksize		= CHACHA_BLOCK_SIZE,
288		.walksize		= 4 * CHACHA_BLOCK_SIZE,
289		.setkey			= chacha12_setkey,
290		.encrypt		= xchacha_neon,
291		.decrypt		= xchacha_neon,
292	}
293};
294
295static int __init chacha_simd_mod_init(void)
296{
297	int err = 0;
298
299	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
300		err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
301		if (err)
302			return err;
303	}
304
305	if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
306		int i;
307
308		switch (read_cpuid_part()) {
309		case ARM_CPU_PART_CORTEX_A7:
310		case ARM_CPU_PART_CORTEX_A5:
311			/*
312			 * The Cortex-A7 and Cortex-A5 do not perform well with
313			 * the NEON implementation but do incredibly with the
314			 * scalar one and use less power.
315			 */
316			for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
317				neon_algs[i].base.cra_priority = 0;
318			break;
319		default:
320			static_branch_enable(&use_neon);
321		}
322
323		if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
324			err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
325			if (err)
326				crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
327		}
328	}
329	return err;
330}
331
332static void __exit chacha_simd_mod_fini(void)
333{
334	if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) {
335		crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
336		if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
337			crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
338	}
339}
340
341module_init(chacha_simd_mod_init);
342module_exit(chacha_simd_mod_fini);
343
344MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
345MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
346MODULE_LICENSE("GPL v2");
347MODULE_ALIAS_CRYPTO("chacha20");
348MODULE_ALIAS_CRYPTO("chacha20-arm");
349MODULE_ALIAS_CRYPTO("xchacha20");
350MODULE_ALIAS_CRYPTO("xchacha20-arm");
351MODULE_ALIAS_CRYPTO("xchacha12");
352MODULE_ALIAS_CRYPTO("xchacha12-arm");
353#ifdef CONFIG_KERNEL_MODE_NEON
354MODULE_ALIAS_CRYPTO("chacha20-neon");
355MODULE_ALIAS_CRYPTO("xchacha20-neon");
356MODULE_ALIAS_CRYPTO("xchacha12-neon");
357#endif
358