1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Glue Code for 3-way parallel assembler optimized version of Twofish
4 *
5 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
6 */
7
8#include <asm/crypto/glue_helper.h>
9#include <asm/crypto/twofish.h>
10#include <crypto/algapi.h>
11#include <crypto/b128ops.h>
12#include <crypto/internal/skcipher.h>
13#include <crypto/twofish.h>
14#include <linux/crypto.h>
15#include <linux/init.h>
16#include <linux/module.h>
17#include <linux/types.h>
18
19EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
20EXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
21
22static int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
23				   const u8 *key, unsigned int keylen)
24{
25	return twofish_setkey(&tfm->base, key, keylen);
26}
27
28static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
29{
30	__twofish_enc_blk_3way(ctx, dst, src, false);
31}
32
33static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst,
34					    const u8 *src)
35{
36	__twofish_enc_blk_3way(ctx, dst, src, true);
37}
38
39void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s)
40{
41	u128 ivs[2];
42	u128 *dst = (u128 *)d;
43	const u128 *src = (const u128 *)s;
44
45	ivs[0] = src[0];
46	ivs[1] = src[1];
47
48	twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
49
50	u128_xor(&dst[1], &dst[1], &ivs[0]);
51	u128_xor(&dst[2], &dst[2], &ivs[1]);
52}
53EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
54
55void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
56{
57	be128 ctrblk;
58	u128 *dst = (u128 *)d;
59	const u128 *src = (const u128 *)s;
60
61	if (dst != src)
62		*dst = *src;
63
64	le128_to_be128(&ctrblk, iv);
65	le128_inc(iv);
66
67	twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
68	u128_xor(dst, dst, (u128 *)&ctrblk);
69}
70EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
71
72void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
73{
74	be128 ctrblks[3];
75	u128 *dst = (u128 *)d;
76	const u128 *src = (const u128 *)s;
77
78	if (dst != src) {
79		dst[0] = src[0];
80		dst[1] = src[1];
81		dst[2] = src[2];
82	}
83
84	le128_to_be128(&ctrblks[0], iv);
85	le128_inc(iv);
86	le128_to_be128(&ctrblks[1], iv);
87	le128_inc(iv);
88	le128_to_be128(&ctrblks[2], iv);
89	le128_inc(iv);
90
91	twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
92}
93EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
94
95static const struct common_glue_ctx twofish_enc = {
96	.num_funcs = 2,
97	.fpu_blocks_limit = -1,
98
99	.funcs = { {
100		.num_blocks = 3,
101		.fn_u = { .ecb = twofish_enc_blk_3way }
102	}, {
103		.num_blocks = 1,
104		.fn_u = { .ecb = twofish_enc_blk }
105	} }
106};
107
108static const struct common_glue_ctx twofish_ctr = {
109	.num_funcs = 2,
110	.fpu_blocks_limit = -1,
111
112	.funcs = { {
113		.num_blocks = 3,
114		.fn_u = { .ctr = twofish_enc_blk_ctr_3way }
115	}, {
116		.num_blocks = 1,
117		.fn_u = { .ctr = twofish_enc_blk_ctr }
118	} }
119};
120
121static const struct common_glue_ctx twofish_dec = {
122	.num_funcs = 2,
123	.fpu_blocks_limit = -1,
124
125	.funcs = { {
126		.num_blocks = 3,
127		.fn_u = { .ecb = twofish_dec_blk_3way }
128	}, {
129		.num_blocks = 1,
130		.fn_u = { .ecb = twofish_dec_blk }
131	} }
132};
133
134static const struct common_glue_ctx twofish_dec_cbc = {
135	.num_funcs = 2,
136	.fpu_blocks_limit = -1,
137
138	.funcs = { {
139		.num_blocks = 3,
140		.fn_u = { .cbc = twofish_dec_blk_cbc_3way }
141	}, {
142		.num_blocks = 1,
143		.fn_u = { .cbc = twofish_dec_blk }
144	} }
145};
146
147static int ecb_encrypt(struct skcipher_request *req)
148{
149	return glue_ecb_req_128bit(&twofish_enc, req);
150}
151
152static int ecb_decrypt(struct skcipher_request *req)
153{
154	return glue_ecb_req_128bit(&twofish_dec, req);
155}
156
157static int cbc_encrypt(struct skcipher_request *req)
158{
159	return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
160}
161
162static int cbc_decrypt(struct skcipher_request *req)
163{
164	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
165}
166
167static int ctr_crypt(struct skcipher_request *req)
168{
169	return glue_ctr_req_128bit(&twofish_ctr, req);
170}
171
172static struct skcipher_alg tf_skciphers[] = {
173	{
174		.base.cra_name		= "ecb(twofish)",
175		.base.cra_driver_name	= "ecb-twofish-3way",
176		.base.cra_priority	= 300,
177		.base.cra_blocksize	= TF_BLOCK_SIZE,
178		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
179		.base.cra_module	= THIS_MODULE,
180		.min_keysize		= TF_MIN_KEY_SIZE,
181		.max_keysize		= TF_MAX_KEY_SIZE,
182		.setkey			= twofish_setkey_skcipher,
183		.encrypt		= ecb_encrypt,
184		.decrypt		= ecb_decrypt,
185	}, {
186		.base.cra_name		= "cbc(twofish)",
187		.base.cra_driver_name	= "cbc-twofish-3way",
188		.base.cra_priority	= 300,
189		.base.cra_blocksize	= TF_BLOCK_SIZE,
190		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
191		.base.cra_module	= THIS_MODULE,
192		.min_keysize		= TF_MIN_KEY_SIZE,
193		.max_keysize		= TF_MAX_KEY_SIZE,
194		.ivsize			= TF_BLOCK_SIZE,
195		.setkey			= twofish_setkey_skcipher,
196		.encrypt		= cbc_encrypt,
197		.decrypt		= cbc_decrypt,
198	}, {
199		.base.cra_name		= "ctr(twofish)",
200		.base.cra_driver_name	= "ctr-twofish-3way",
201		.base.cra_priority	= 300,
202		.base.cra_blocksize	= 1,
203		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
204		.base.cra_module	= THIS_MODULE,
205		.min_keysize		= TF_MIN_KEY_SIZE,
206		.max_keysize		= TF_MAX_KEY_SIZE,
207		.ivsize			= TF_BLOCK_SIZE,
208		.chunksize		= TF_BLOCK_SIZE,
209		.setkey			= twofish_setkey_skcipher,
210		.encrypt		= ctr_crypt,
211		.decrypt		= ctr_crypt,
212	},
213};
214
215static bool is_blacklisted_cpu(void)
216{
217	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
218		return false;
219
220	if (boot_cpu_data.x86 == 0x06 &&
221		(boot_cpu_data.x86_model == 0x1c ||
222		 boot_cpu_data.x86_model == 0x26 ||
223		 boot_cpu_data.x86_model == 0x36)) {
224		/*
225		 * On Atom, twofish-3way is slower than original assembler
226		 * implementation. Twofish-3way trades off some performance in
227		 * storing blocks in 64bit registers to allow three blocks to
228		 * be processed parallel. Parallel operation then allows gaining
229		 * more performance than was trade off, on out-of-order CPUs.
230		 * However Atom does not benefit from this parallellism and
231		 * should be blacklisted.
232		 */
233		return true;
234	}
235
236	if (boot_cpu_data.x86 == 0x0f) {
237		/*
238		 * On Pentium 4, twofish-3way is slower than original assembler
239		 * implementation because excessive uses of 64bit rotate and
240		 * left-shifts (which are really slow on P4) needed to store and
241		 * handle 128bit block in two 64bit registers.
242		 */
243		return true;
244	}
245
246	return false;
247}
248
249static int force;
250module_param(force, int, 0);
251MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
252
253static int __init init(void)
254{
255	if (!force && is_blacklisted_cpu()) {
256		printk(KERN_INFO
257			"twofish-x86_64-3way: performance on this CPU "
258			"would be suboptimal: disabling "
259			"twofish-x86_64-3way.\n");
260		return -ENODEV;
261	}
262
263	return crypto_register_skciphers(tf_skciphers,
264					 ARRAY_SIZE(tf_skciphers));
265}
266
267static void __exit fini(void)
268{
269	crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
270}
271
272module_init(init);
273module_exit(fini);
274
275MODULE_LICENSE("GPL");
276MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
277MODULE_ALIAS_CRYPTO("twofish");
278MODULE_ALIAS_CRYPTO("twofish-asm");
279