18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Glue Code for 3-way parallel assembler optimized version of Twofish
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <asm/crypto/glue_helper.h>
98c2ecf20Sopenharmony_ci#include <asm/crypto/twofish.h>
108c2ecf20Sopenharmony_ci#include <crypto/algapi.h>
118c2ecf20Sopenharmony_ci#include <crypto/b128ops.h>
128c2ecf20Sopenharmony_ci#include <crypto/internal/skcipher.h>
138c2ecf20Sopenharmony_ci#include <crypto/twofish.h>
148c2ecf20Sopenharmony_ci#include <linux/crypto.h>
158c2ecf20Sopenharmony_ci#include <linux/init.h>
168c2ecf20Sopenharmony_ci#include <linux/module.h>
178c2ecf20Sopenharmony_ci#include <linux/types.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__twofish_enc_blk_3way);
208c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(twofish_dec_blk_3way);
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_cistatic int twofish_setkey_skcipher(struct crypto_skcipher *tfm,
238c2ecf20Sopenharmony_ci				   const u8 *key, unsigned int keylen)
248c2ecf20Sopenharmony_ci{
258c2ecf20Sopenharmony_ci	return twofish_setkey(&tfm->base, key, keylen);
268c2ecf20Sopenharmony_ci}
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_cistatic inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src)
298c2ecf20Sopenharmony_ci{
308c2ecf20Sopenharmony_ci	__twofish_enc_blk_3way(ctx, dst, src, false);
318c2ecf20Sopenharmony_ci}
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistatic inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst,
348c2ecf20Sopenharmony_ci					    const u8 *src)
358c2ecf20Sopenharmony_ci{
368c2ecf20Sopenharmony_ci	__twofish_enc_blk_3way(ctx, dst, src, true);
378c2ecf20Sopenharmony_ci}
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_civoid twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s)
408c2ecf20Sopenharmony_ci{
418c2ecf20Sopenharmony_ci	u128 ivs[2];
428c2ecf20Sopenharmony_ci	u128 *dst = (u128 *)d;
438c2ecf20Sopenharmony_ci	const u128 *src = (const u128 *)s;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	ivs[0] = src[0];
468c2ecf20Sopenharmony_ci	ivs[1] = src[1];
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src);
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ci	u128_xor(&dst[1], &dst[1], &ivs[0]);
518c2ecf20Sopenharmony_ci	u128_xor(&dst[2], &dst[2], &ivs[1]);
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way);
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_civoid twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv)
568c2ecf20Sopenharmony_ci{
578c2ecf20Sopenharmony_ci	be128 ctrblk;
588c2ecf20Sopenharmony_ci	u128 *dst = (u128 *)d;
598c2ecf20Sopenharmony_ci	const u128 *src = (const u128 *)s;
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	if (dst != src)
628c2ecf20Sopenharmony_ci		*dst = *src;
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	le128_to_be128(&ctrblk, iv);
658c2ecf20Sopenharmony_ci	le128_inc(iv);
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
688c2ecf20Sopenharmony_ci	u128_xor(dst, dst, (u128 *)&ctrblk);
698c2ecf20Sopenharmony_ci}
708c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(twofish_enc_blk_ctr);
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_civoid twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv)
738c2ecf20Sopenharmony_ci{
748c2ecf20Sopenharmony_ci	be128 ctrblks[3];
758c2ecf20Sopenharmony_ci	u128 *dst = (u128 *)d;
768c2ecf20Sopenharmony_ci	const u128 *src = (const u128 *)s;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	if (dst != src) {
798c2ecf20Sopenharmony_ci		dst[0] = src[0];
808c2ecf20Sopenharmony_ci		dst[1] = src[1];
818c2ecf20Sopenharmony_ci		dst[2] = src[2];
828c2ecf20Sopenharmony_ci	}
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	le128_to_be128(&ctrblks[0], iv);
858c2ecf20Sopenharmony_ci	le128_inc(iv);
868c2ecf20Sopenharmony_ci	le128_to_be128(&ctrblks[1], iv);
878c2ecf20Sopenharmony_ci	le128_inc(iv);
888c2ecf20Sopenharmony_ci	le128_to_be128(&ctrblks[2], iv);
898c2ecf20Sopenharmony_ci	le128_inc(iv);
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks);
928c2ecf20Sopenharmony_ci}
938c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way);
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_cistatic const struct common_glue_ctx twofish_enc = {
968c2ecf20Sopenharmony_ci	.num_funcs = 2,
978c2ecf20Sopenharmony_ci	.fpu_blocks_limit = -1,
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	.funcs = { {
1008c2ecf20Sopenharmony_ci		.num_blocks = 3,
1018c2ecf20Sopenharmony_ci		.fn_u = { .ecb = twofish_enc_blk_3way }
1028c2ecf20Sopenharmony_ci	}, {
1038c2ecf20Sopenharmony_ci		.num_blocks = 1,
1048c2ecf20Sopenharmony_ci		.fn_u = { .ecb = twofish_enc_blk }
1058c2ecf20Sopenharmony_ci	} }
1068c2ecf20Sopenharmony_ci};
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_cistatic const struct common_glue_ctx twofish_ctr = {
1098c2ecf20Sopenharmony_ci	.num_funcs = 2,
1108c2ecf20Sopenharmony_ci	.fpu_blocks_limit = -1,
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	.funcs = { {
1138c2ecf20Sopenharmony_ci		.num_blocks = 3,
1148c2ecf20Sopenharmony_ci		.fn_u = { .ctr = twofish_enc_blk_ctr_3way }
1158c2ecf20Sopenharmony_ci	}, {
1168c2ecf20Sopenharmony_ci		.num_blocks = 1,
1178c2ecf20Sopenharmony_ci		.fn_u = { .ctr = twofish_enc_blk_ctr }
1188c2ecf20Sopenharmony_ci	} }
1198c2ecf20Sopenharmony_ci};
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_cistatic const struct common_glue_ctx twofish_dec = {
1228c2ecf20Sopenharmony_ci	.num_funcs = 2,
1238c2ecf20Sopenharmony_ci	.fpu_blocks_limit = -1,
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	.funcs = { {
1268c2ecf20Sopenharmony_ci		.num_blocks = 3,
1278c2ecf20Sopenharmony_ci		.fn_u = { .ecb = twofish_dec_blk_3way }
1288c2ecf20Sopenharmony_ci	}, {
1298c2ecf20Sopenharmony_ci		.num_blocks = 1,
1308c2ecf20Sopenharmony_ci		.fn_u = { .ecb = twofish_dec_blk }
1318c2ecf20Sopenharmony_ci	} }
1328c2ecf20Sopenharmony_ci};
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_cistatic const struct common_glue_ctx twofish_dec_cbc = {
1358c2ecf20Sopenharmony_ci	.num_funcs = 2,
1368c2ecf20Sopenharmony_ci	.fpu_blocks_limit = -1,
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	.funcs = { {
1398c2ecf20Sopenharmony_ci		.num_blocks = 3,
1408c2ecf20Sopenharmony_ci		.fn_u = { .cbc = twofish_dec_blk_cbc_3way }
1418c2ecf20Sopenharmony_ci	}, {
1428c2ecf20Sopenharmony_ci		.num_blocks = 1,
1438c2ecf20Sopenharmony_ci		.fn_u = { .cbc = twofish_dec_blk }
1448c2ecf20Sopenharmony_ci	} }
1458c2ecf20Sopenharmony_ci};
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_cistatic int ecb_encrypt(struct skcipher_request *req)
1488c2ecf20Sopenharmony_ci{
1498c2ecf20Sopenharmony_ci	return glue_ecb_req_128bit(&twofish_enc, req);
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_cistatic int ecb_decrypt(struct skcipher_request *req)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	return glue_ecb_req_128bit(&twofish_dec, req);
1558c2ecf20Sopenharmony_ci}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_cistatic int cbc_encrypt(struct skcipher_request *req)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req);
1608c2ecf20Sopenharmony_ci}
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_cistatic int cbc_decrypt(struct skcipher_request *req)
1638c2ecf20Sopenharmony_ci{
1648c2ecf20Sopenharmony_ci	return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req);
1658c2ecf20Sopenharmony_ci}
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_cistatic int ctr_crypt(struct skcipher_request *req)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	return glue_ctr_req_128bit(&twofish_ctr, req);
1708c2ecf20Sopenharmony_ci}
1718c2ecf20Sopenharmony_ci
1728c2ecf20Sopenharmony_cistatic struct skcipher_alg tf_skciphers[] = {
1738c2ecf20Sopenharmony_ci	{
1748c2ecf20Sopenharmony_ci		.base.cra_name		= "ecb(twofish)",
1758c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "ecb-twofish-3way",
1768c2ecf20Sopenharmony_ci		.base.cra_priority	= 300,
1778c2ecf20Sopenharmony_ci		.base.cra_blocksize	= TF_BLOCK_SIZE,
1788c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
1798c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
1808c2ecf20Sopenharmony_ci		.min_keysize		= TF_MIN_KEY_SIZE,
1818c2ecf20Sopenharmony_ci		.max_keysize		= TF_MAX_KEY_SIZE,
1828c2ecf20Sopenharmony_ci		.setkey			= twofish_setkey_skcipher,
1838c2ecf20Sopenharmony_ci		.encrypt		= ecb_encrypt,
1848c2ecf20Sopenharmony_ci		.decrypt		= ecb_decrypt,
1858c2ecf20Sopenharmony_ci	}, {
1868c2ecf20Sopenharmony_ci		.base.cra_name		= "cbc(twofish)",
1878c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "cbc-twofish-3way",
1888c2ecf20Sopenharmony_ci		.base.cra_priority	= 300,
1898c2ecf20Sopenharmony_ci		.base.cra_blocksize	= TF_BLOCK_SIZE,
1908c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
1918c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
1928c2ecf20Sopenharmony_ci		.min_keysize		= TF_MIN_KEY_SIZE,
1938c2ecf20Sopenharmony_ci		.max_keysize		= TF_MAX_KEY_SIZE,
1948c2ecf20Sopenharmony_ci		.ivsize			= TF_BLOCK_SIZE,
1958c2ecf20Sopenharmony_ci		.setkey			= twofish_setkey_skcipher,
1968c2ecf20Sopenharmony_ci		.encrypt		= cbc_encrypt,
1978c2ecf20Sopenharmony_ci		.decrypt		= cbc_decrypt,
1988c2ecf20Sopenharmony_ci	}, {
1998c2ecf20Sopenharmony_ci		.base.cra_name		= "ctr(twofish)",
2008c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "ctr-twofish-3way",
2018c2ecf20Sopenharmony_ci		.base.cra_priority	= 300,
2028c2ecf20Sopenharmony_ci		.base.cra_blocksize	= 1,
2038c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct twofish_ctx),
2048c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2058c2ecf20Sopenharmony_ci		.min_keysize		= TF_MIN_KEY_SIZE,
2068c2ecf20Sopenharmony_ci		.max_keysize		= TF_MAX_KEY_SIZE,
2078c2ecf20Sopenharmony_ci		.ivsize			= TF_BLOCK_SIZE,
2088c2ecf20Sopenharmony_ci		.chunksize		= TF_BLOCK_SIZE,
2098c2ecf20Sopenharmony_ci		.setkey			= twofish_setkey_skcipher,
2108c2ecf20Sopenharmony_ci		.encrypt		= ctr_crypt,
2118c2ecf20Sopenharmony_ci		.decrypt		= ctr_crypt,
2128c2ecf20Sopenharmony_ci	},
2138c2ecf20Sopenharmony_ci};
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_cistatic bool is_blacklisted_cpu(void)
2168c2ecf20Sopenharmony_ci{
2178c2ecf20Sopenharmony_ci	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
2188c2ecf20Sopenharmony_ci		return false;
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	if (boot_cpu_data.x86 == 0x06 &&
2218c2ecf20Sopenharmony_ci		(boot_cpu_data.x86_model == 0x1c ||
2228c2ecf20Sopenharmony_ci		 boot_cpu_data.x86_model == 0x26 ||
2238c2ecf20Sopenharmony_ci		 boot_cpu_data.x86_model == 0x36)) {
2248c2ecf20Sopenharmony_ci		/*
2258c2ecf20Sopenharmony_ci		 * On Atom, twofish-3way is slower than original assembler
2268c2ecf20Sopenharmony_ci		 * implementation. Twofish-3way trades off some performance in
2278c2ecf20Sopenharmony_ci		 * storing blocks in 64bit registers to allow three blocks to
2288c2ecf20Sopenharmony_ci		 * be processed parallel. Parallel operation then allows gaining
2298c2ecf20Sopenharmony_ci		 * more performance than was trade off, on out-of-order CPUs.
2308c2ecf20Sopenharmony_ci		 * However Atom does not benefit from this parallellism and
2318c2ecf20Sopenharmony_ci		 * should be blacklisted.
2328c2ecf20Sopenharmony_ci		 */
2338c2ecf20Sopenharmony_ci		return true;
2348c2ecf20Sopenharmony_ci	}
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ci	if (boot_cpu_data.x86 == 0x0f) {
2378c2ecf20Sopenharmony_ci		/*
2388c2ecf20Sopenharmony_ci		 * On Pentium 4, twofish-3way is slower than original assembler
2398c2ecf20Sopenharmony_ci		 * implementation because excessive uses of 64bit rotate and
2408c2ecf20Sopenharmony_ci		 * left-shifts (which are really slow on P4) needed to store and
2418c2ecf20Sopenharmony_ci		 * handle 128bit block in two 64bit registers.
2428c2ecf20Sopenharmony_ci		 */
2438c2ecf20Sopenharmony_ci		return true;
2448c2ecf20Sopenharmony_ci	}
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	return false;
2478c2ecf20Sopenharmony_ci}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_cistatic int force;
2508c2ecf20Sopenharmony_cimodule_param(force, int, 0);
2518c2ecf20Sopenharmony_ciMODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist");
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_cistatic int __init init(void)
2548c2ecf20Sopenharmony_ci{
2558c2ecf20Sopenharmony_ci	if (!force && is_blacklisted_cpu()) {
2568c2ecf20Sopenharmony_ci		printk(KERN_INFO
2578c2ecf20Sopenharmony_ci			"twofish-x86_64-3way: performance on this CPU "
2588c2ecf20Sopenharmony_ci			"would be suboptimal: disabling "
2598c2ecf20Sopenharmony_ci			"twofish-x86_64-3way.\n");
2608c2ecf20Sopenharmony_ci		return -ENODEV;
2618c2ecf20Sopenharmony_ci	}
2628c2ecf20Sopenharmony_ci
2638c2ecf20Sopenharmony_ci	return crypto_register_skciphers(tf_skciphers,
2648c2ecf20Sopenharmony_ci					 ARRAY_SIZE(tf_skciphers));
2658c2ecf20Sopenharmony_ci}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_cistatic void __exit fini(void)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers));
2708c2ecf20Sopenharmony_ci}
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_cimodule_init(init);
2738c2ecf20Sopenharmony_cimodule_exit(fini);
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
2768c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
2778c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("twofish");
2788c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("twofish-asm");
279