18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Glue Code for 3-way parallel assembler optimized version of Twofish 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <asm/crypto/glue_helper.h> 98c2ecf20Sopenharmony_ci#include <asm/crypto/twofish.h> 108c2ecf20Sopenharmony_ci#include <crypto/algapi.h> 118c2ecf20Sopenharmony_ci#include <crypto/b128ops.h> 128c2ecf20Sopenharmony_ci#include <crypto/internal/skcipher.h> 138c2ecf20Sopenharmony_ci#include <crypto/twofish.h> 148c2ecf20Sopenharmony_ci#include <linux/crypto.h> 158c2ecf20Sopenharmony_ci#include <linux/init.h> 168c2ecf20Sopenharmony_ci#include <linux/module.h> 178c2ecf20Sopenharmony_ci#include <linux/types.h> 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); 208c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(twofish_dec_blk_3way); 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_cistatic int twofish_setkey_skcipher(struct crypto_skcipher *tfm, 238c2ecf20Sopenharmony_ci const u8 *key, unsigned int keylen) 248c2ecf20Sopenharmony_ci{ 258c2ecf20Sopenharmony_ci return twofish_setkey(&tfm->base, key, keylen); 268c2ecf20Sopenharmony_ci} 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_cistatic inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) 298c2ecf20Sopenharmony_ci{ 308c2ecf20Sopenharmony_ci __twofish_enc_blk_3way(ctx, dst, src, false); 318c2ecf20Sopenharmony_ci} 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_cistatic inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst, 348c2ecf20Sopenharmony_ci const u8 *src) 358c2ecf20Sopenharmony_ci{ 368c2ecf20Sopenharmony_ci __twofish_enc_blk_3way(ctx, dst, src, true); 378c2ecf20Sopenharmony_ci} 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_civoid twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) 408c2ecf20Sopenharmony_ci{ 418c2ecf20Sopenharmony_ci u128 ivs[2]; 428c2ecf20Sopenharmony_ci u128 *dst = (u128 *)d; 438c2ecf20Sopenharmony_ci const u128 *src = (const u128 *)s; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci ivs[0] = src[0]; 468c2ecf20Sopenharmony_ci ivs[1] = src[1]; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci u128_xor(&dst[1], &dst[1], &ivs[0]); 518c2ecf20Sopenharmony_ci u128_xor(&dst[2], &dst[2], &ivs[1]); 528c2ecf20Sopenharmony_ci} 538c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_civoid twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) 568c2ecf20Sopenharmony_ci{ 578c2ecf20Sopenharmony_ci be128 ctrblk; 588c2ecf20Sopenharmony_ci u128 *dst = (u128 *)d; 598c2ecf20Sopenharmony_ci const u128 *src = (const u128 *)s; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci if (dst != src) 628c2ecf20Sopenharmony_ci *dst = *src; 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci le128_to_be128(&ctrblk, iv); 658c2ecf20Sopenharmony_ci le128_inc(iv); 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 688c2ecf20Sopenharmony_ci u128_xor(dst, dst, (u128 *)&ctrblk); 698c2ecf20Sopenharmony_ci} 708c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_civoid twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv) 738c2ecf20Sopenharmony_ci{ 748c2ecf20Sopenharmony_ci be128 ctrblks[3]; 758c2ecf20Sopenharmony_ci u128 *dst = (u128 *)d; 768c2ecf20Sopenharmony_ci const u128 *src = (const u128 *)s; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci if (dst != src) { 798c2ecf20Sopenharmony_ci dst[0] = src[0]; 808c2ecf20Sopenharmony_ci dst[1] = src[1]; 818c2ecf20Sopenharmony_ci dst[2] = src[2]; 828c2ecf20Sopenharmony_ci } 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci le128_to_be128(&ctrblks[0], iv); 858c2ecf20Sopenharmony_ci le128_inc(iv); 868c2ecf20Sopenharmony_ci le128_to_be128(&ctrblks[1], iv); 878c2ecf20Sopenharmony_ci le128_inc(iv); 888c2ecf20Sopenharmony_ci le128_to_be128(&ctrblks[2], iv); 898c2ecf20Sopenharmony_ci le128_inc(iv); 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_ci twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); 928c2ecf20Sopenharmony_ci} 938c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_cistatic const struct common_glue_ctx twofish_enc = { 968c2ecf20Sopenharmony_ci .num_funcs = 2, 978c2ecf20Sopenharmony_ci .fpu_blocks_limit = -1, 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci .funcs = { { 1008c2ecf20Sopenharmony_ci .num_blocks = 3, 1018c2ecf20Sopenharmony_ci .fn_u = { .ecb = twofish_enc_blk_3way } 1028c2ecf20Sopenharmony_ci }, { 1038c2ecf20Sopenharmony_ci .num_blocks = 1, 1048c2ecf20Sopenharmony_ci .fn_u = { .ecb = twofish_enc_blk } 1058c2ecf20Sopenharmony_ci } } 1068c2ecf20Sopenharmony_ci}; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_cistatic const struct common_glue_ctx twofish_ctr = { 1098c2ecf20Sopenharmony_ci .num_funcs = 2, 1108c2ecf20Sopenharmony_ci .fpu_blocks_limit = -1, 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci .funcs = { { 1138c2ecf20Sopenharmony_ci .num_blocks = 3, 1148c2ecf20Sopenharmony_ci .fn_u = { .ctr = twofish_enc_blk_ctr_3way } 1158c2ecf20Sopenharmony_ci }, { 1168c2ecf20Sopenharmony_ci .num_blocks = 1, 1178c2ecf20Sopenharmony_ci .fn_u = { .ctr = twofish_enc_blk_ctr } 1188c2ecf20Sopenharmony_ci } } 1198c2ecf20Sopenharmony_ci}; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_cistatic const struct common_glue_ctx twofish_dec = { 1228c2ecf20Sopenharmony_ci .num_funcs = 2, 1238c2ecf20Sopenharmony_ci .fpu_blocks_limit = -1, 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci .funcs = { { 1268c2ecf20Sopenharmony_ci .num_blocks = 3, 1278c2ecf20Sopenharmony_ci .fn_u = { .ecb = twofish_dec_blk_3way } 1288c2ecf20Sopenharmony_ci }, { 1298c2ecf20Sopenharmony_ci .num_blocks = 1, 1308c2ecf20Sopenharmony_ci .fn_u = { .ecb = twofish_dec_blk } 1318c2ecf20Sopenharmony_ci } } 1328c2ecf20Sopenharmony_ci}; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_cistatic const struct common_glue_ctx twofish_dec_cbc = { 1358c2ecf20Sopenharmony_ci .num_funcs = 2, 1368c2ecf20Sopenharmony_ci .fpu_blocks_limit = -1, 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci .funcs = { { 1398c2ecf20Sopenharmony_ci .num_blocks = 3, 1408c2ecf20Sopenharmony_ci .fn_u = { .cbc = twofish_dec_blk_cbc_3way } 1418c2ecf20Sopenharmony_ci }, { 1428c2ecf20Sopenharmony_ci .num_blocks = 1, 1438c2ecf20Sopenharmony_ci .fn_u = { .cbc = twofish_dec_blk } 1448c2ecf20Sopenharmony_ci } } 1458c2ecf20Sopenharmony_ci}; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_cistatic int ecb_encrypt(struct skcipher_request *req) 1488c2ecf20Sopenharmony_ci{ 1498c2ecf20Sopenharmony_ci return glue_ecb_req_128bit(&twofish_enc, req); 1508c2ecf20Sopenharmony_ci} 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_cistatic int ecb_decrypt(struct skcipher_request *req) 1538c2ecf20Sopenharmony_ci{ 1548c2ecf20Sopenharmony_ci return glue_ecb_req_128bit(&twofish_dec, req); 1558c2ecf20Sopenharmony_ci} 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_cistatic int cbc_encrypt(struct skcipher_request *req) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); 1608c2ecf20Sopenharmony_ci} 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_cistatic int cbc_decrypt(struct skcipher_request *req) 1638c2ecf20Sopenharmony_ci{ 1648c2ecf20Sopenharmony_ci return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); 1658c2ecf20Sopenharmony_ci} 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_cistatic int ctr_crypt(struct skcipher_request *req) 1688c2ecf20Sopenharmony_ci{ 1698c2ecf20Sopenharmony_ci return glue_ctr_req_128bit(&twofish_ctr, req); 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_cistatic struct skcipher_alg tf_skciphers[] = { 1738c2ecf20Sopenharmony_ci { 1748c2ecf20Sopenharmony_ci .base.cra_name = "ecb(twofish)", 1758c2ecf20Sopenharmony_ci .base.cra_driver_name = "ecb-twofish-3way", 1768c2ecf20Sopenharmony_ci .base.cra_priority = 300, 1778c2ecf20Sopenharmony_ci .base.cra_blocksize = TF_BLOCK_SIZE, 1788c2ecf20Sopenharmony_ci .base.cra_ctxsize = sizeof(struct twofish_ctx), 1798c2ecf20Sopenharmony_ci .base.cra_module = THIS_MODULE, 1808c2ecf20Sopenharmony_ci .min_keysize = TF_MIN_KEY_SIZE, 1818c2ecf20Sopenharmony_ci .max_keysize = TF_MAX_KEY_SIZE, 1828c2ecf20Sopenharmony_ci .setkey = twofish_setkey_skcipher, 1838c2ecf20Sopenharmony_ci .encrypt = ecb_encrypt, 1848c2ecf20Sopenharmony_ci .decrypt = ecb_decrypt, 1858c2ecf20Sopenharmony_ci }, { 1868c2ecf20Sopenharmony_ci .base.cra_name = "cbc(twofish)", 1878c2ecf20Sopenharmony_ci .base.cra_driver_name = "cbc-twofish-3way", 1888c2ecf20Sopenharmony_ci .base.cra_priority = 300, 1898c2ecf20Sopenharmony_ci .base.cra_blocksize = TF_BLOCK_SIZE, 1908c2ecf20Sopenharmony_ci .base.cra_ctxsize = sizeof(struct twofish_ctx), 1918c2ecf20Sopenharmony_ci .base.cra_module = THIS_MODULE, 1928c2ecf20Sopenharmony_ci .min_keysize = TF_MIN_KEY_SIZE, 1938c2ecf20Sopenharmony_ci .max_keysize = TF_MAX_KEY_SIZE, 1948c2ecf20Sopenharmony_ci .ivsize = TF_BLOCK_SIZE, 1958c2ecf20Sopenharmony_ci .setkey = twofish_setkey_skcipher, 1968c2ecf20Sopenharmony_ci .encrypt = cbc_encrypt, 1978c2ecf20Sopenharmony_ci .decrypt = cbc_decrypt, 1988c2ecf20Sopenharmony_ci }, { 1998c2ecf20Sopenharmony_ci .base.cra_name = "ctr(twofish)", 2008c2ecf20Sopenharmony_ci .base.cra_driver_name = "ctr-twofish-3way", 2018c2ecf20Sopenharmony_ci .base.cra_priority = 300, 2028c2ecf20Sopenharmony_ci .base.cra_blocksize = 1, 2038c2ecf20Sopenharmony_ci .base.cra_ctxsize = sizeof(struct twofish_ctx), 2048c2ecf20Sopenharmony_ci .base.cra_module = THIS_MODULE, 2058c2ecf20Sopenharmony_ci .min_keysize = TF_MIN_KEY_SIZE, 2068c2ecf20Sopenharmony_ci .max_keysize = TF_MAX_KEY_SIZE, 2078c2ecf20Sopenharmony_ci .ivsize = TF_BLOCK_SIZE, 2088c2ecf20Sopenharmony_ci .chunksize = TF_BLOCK_SIZE, 2098c2ecf20Sopenharmony_ci .setkey = twofish_setkey_skcipher, 2108c2ecf20Sopenharmony_ci .encrypt = ctr_crypt, 2118c2ecf20Sopenharmony_ci .decrypt = ctr_crypt, 2128c2ecf20Sopenharmony_ci }, 2138c2ecf20Sopenharmony_ci}; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_cistatic bool is_blacklisted_cpu(void) 2168c2ecf20Sopenharmony_ci{ 2178c2ecf20Sopenharmony_ci if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 2188c2ecf20Sopenharmony_ci return false; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci if (boot_cpu_data.x86 == 0x06 && 2218c2ecf20Sopenharmony_ci (boot_cpu_data.x86_model == 0x1c || 2228c2ecf20Sopenharmony_ci boot_cpu_data.x86_model == 0x26 || 2238c2ecf20Sopenharmony_ci boot_cpu_data.x86_model == 0x36)) { 2248c2ecf20Sopenharmony_ci /* 2258c2ecf20Sopenharmony_ci * On Atom, twofish-3way is slower than original assembler 2268c2ecf20Sopenharmony_ci * implementation. Twofish-3way trades off some performance in 2278c2ecf20Sopenharmony_ci * storing blocks in 64bit registers to allow three blocks to 2288c2ecf20Sopenharmony_ci * be processed parallel. Parallel operation then allows gaining 2298c2ecf20Sopenharmony_ci * more performance than was trade off, on out-of-order CPUs. 2308c2ecf20Sopenharmony_ci * However Atom does not benefit from this parallellism and 2318c2ecf20Sopenharmony_ci * should be blacklisted. 2328c2ecf20Sopenharmony_ci */ 2338c2ecf20Sopenharmony_ci return true; 2348c2ecf20Sopenharmony_ci } 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci if (boot_cpu_data.x86 == 0x0f) { 2378c2ecf20Sopenharmony_ci /* 2388c2ecf20Sopenharmony_ci * On Pentium 4, twofish-3way is slower than original assembler 2398c2ecf20Sopenharmony_ci * implementation because excessive uses of 64bit rotate and 2408c2ecf20Sopenharmony_ci * left-shifts (which are really slow on P4) needed to store and 2418c2ecf20Sopenharmony_ci * handle 128bit block in two 64bit registers. 2428c2ecf20Sopenharmony_ci */ 2438c2ecf20Sopenharmony_ci return true; 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ci 2468c2ecf20Sopenharmony_ci return false; 2478c2ecf20Sopenharmony_ci} 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_cistatic int force; 2508c2ecf20Sopenharmony_cimodule_param(force, int, 0); 2518c2ecf20Sopenharmony_ciMODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_cistatic int __init init(void) 2548c2ecf20Sopenharmony_ci{ 2558c2ecf20Sopenharmony_ci if (!force && is_blacklisted_cpu()) { 2568c2ecf20Sopenharmony_ci printk(KERN_INFO 2578c2ecf20Sopenharmony_ci "twofish-x86_64-3way: performance on this CPU " 2588c2ecf20Sopenharmony_ci "would be suboptimal: disabling " 2598c2ecf20Sopenharmony_ci "twofish-x86_64-3way.\n"); 2608c2ecf20Sopenharmony_ci return -ENODEV; 2618c2ecf20Sopenharmony_ci } 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci return crypto_register_skciphers(tf_skciphers, 2648c2ecf20Sopenharmony_ci ARRAY_SIZE(tf_skciphers)); 2658c2ecf20Sopenharmony_ci} 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_cistatic void __exit fini(void) 2688c2ecf20Sopenharmony_ci{ 2698c2ecf20Sopenharmony_ci crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); 2708c2ecf20Sopenharmony_ci} 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_cimodule_init(init); 2738c2ecf20Sopenharmony_cimodule_exit(fini); 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 2768c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); 2778c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("twofish"); 2788c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("twofish-asm"); 279