18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Glue Code for x86_64/AVX2 assembler optimized version of Serpent
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <linux/module.h>
98c2ecf20Sopenharmony_ci#include <linux/types.h>
108c2ecf20Sopenharmony_ci#include <linux/crypto.h>
118c2ecf20Sopenharmony_ci#include <linux/err.h>
128c2ecf20Sopenharmony_ci#include <crypto/algapi.h>
138c2ecf20Sopenharmony_ci#include <crypto/internal/simd.h>
148c2ecf20Sopenharmony_ci#include <crypto/serpent.h>
158c2ecf20Sopenharmony_ci#include <crypto/xts.h>
168c2ecf20Sopenharmony_ci#include <asm/crypto/glue_helper.h>
178c2ecf20Sopenharmony_ci#include <asm/crypto/serpent-avx.h>
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci#define SERPENT_AVX2_PARALLEL_BLOCKS 16
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci/* 16-way AVX2 parallel cipher functions */
228c2ecf20Sopenharmony_ciasmlinkage void serpent_ecb_enc_16way(const void *ctx, u8 *dst, const u8 *src);
238c2ecf20Sopenharmony_ciasmlinkage void serpent_ecb_dec_16way(const void *ctx, u8 *dst, const u8 *src);
248c2ecf20Sopenharmony_ciasmlinkage void serpent_cbc_dec_16way(const void *ctx, u8 *dst, const u8 *src);
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ciasmlinkage void serpent_ctr_16way(const void *ctx, u8 *dst, const u8 *src,
278c2ecf20Sopenharmony_ci				  le128 *iv);
288c2ecf20Sopenharmony_ciasmlinkage void serpent_xts_enc_16way(const void *ctx, u8 *dst, const u8 *src,
298c2ecf20Sopenharmony_ci				      le128 *iv);
308c2ecf20Sopenharmony_ciasmlinkage void serpent_xts_dec_16way(const void *ctx, u8 *dst, const u8 *src,
318c2ecf20Sopenharmony_ci				      le128 *iv);
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistatic int serpent_setkey_skcipher(struct crypto_skcipher *tfm,
348c2ecf20Sopenharmony_ci				   const u8 *key, unsigned int keylen)
358c2ecf20Sopenharmony_ci{
368c2ecf20Sopenharmony_ci	return __serpent_setkey(crypto_skcipher_ctx(tfm), key, keylen);
378c2ecf20Sopenharmony_ci}
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_cistatic const struct common_glue_ctx serpent_enc = {
408c2ecf20Sopenharmony_ci	.num_funcs = 3,
418c2ecf20Sopenharmony_ci	.fpu_blocks_limit = 8,
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci	.funcs = { {
448c2ecf20Sopenharmony_ci		.num_blocks = 16,
458c2ecf20Sopenharmony_ci		.fn_u = { .ecb = serpent_ecb_enc_16way }
468c2ecf20Sopenharmony_ci	}, {
478c2ecf20Sopenharmony_ci		.num_blocks = 8,
488c2ecf20Sopenharmony_ci		.fn_u = { .ecb = serpent_ecb_enc_8way_avx }
498c2ecf20Sopenharmony_ci	}, {
508c2ecf20Sopenharmony_ci		.num_blocks = 1,
518c2ecf20Sopenharmony_ci		.fn_u = { .ecb = __serpent_encrypt }
528c2ecf20Sopenharmony_ci	} }
538c2ecf20Sopenharmony_ci};
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_cistatic const struct common_glue_ctx serpent_ctr = {
568c2ecf20Sopenharmony_ci	.num_funcs = 3,
578c2ecf20Sopenharmony_ci	.fpu_blocks_limit = 8,
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci	.funcs = { {
608c2ecf20Sopenharmony_ci		.num_blocks = 16,
618c2ecf20Sopenharmony_ci		.fn_u = { .ctr = serpent_ctr_16way }
628c2ecf20Sopenharmony_ci	},  {
638c2ecf20Sopenharmony_ci		.num_blocks = 8,
648c2ecf20Sopenharmony_ci		.fn_u = { .ctr = serpent_ctr_8way_avx }
658c2ecf20Sopenharmony_ci	}, {
668c2ecf20Sopenharmony_ci		.num_blocks = 1,
678c2ecf20Sopenharmony_ci		.fn_u = { .ctr = __serpent_crypt_ctr }
688c2ecf20Sopenharmony_ci	} }
698c2ecf20Sopenharmony_ci};
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_cistatic const struct common_glue_ctx serpent_enc_xts = {
728c2ecf20Sopenharmony_ci	.num_funcs = 3,
738c2ecf20Sopenharmony_ci	.fpu_blocks_limit = 8,
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	.funcs = { {
768c2ecf20Sopenharmony_ci		.num_blocks = 16,
778c2ecf20Sopenharmony_ci		.fn_u = { .xts = serpent_xts_enc_16way }
788c2ecf20Sopenharmony_ci	}, {
798c2ecf20Sopenharmony_ci		.num_blocks = 8,
808c2ecf20Sopenharmony_ci		.fn_u = { .xts = serpent_xts_enc_8way_avx }
818c2ecf20Sopenharmony_ci	}, {
828c2ecf20Sopenharmony_ci		.num_blocks = 1,
838c2ecf20Sopenharmony_ci		.fn_u = { .xts = serpent_xts_enc }
848c2ecf20Sopenharmony_ci	} }
858c2ecf20Sopenharmony_ci};
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistatic const struct common_glue_ctx serpent_dec = {
888c2ecf20Sopenharmony_ci	.num_funcs = 3,
898c2ecf20Sopenharmony_ci	.fpu_blocks_limit = 8,
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_ci	.funcs = { {
928c2ecf20Sopenharmony_ci		.num_blocks = 16,
938c2ecf20Sopenharmony_ci		.fn_u = { .ecb = serpent_ecb_dec_16way }
948c2ecf20Sopenharmony_ci	}, {
958c2ecf20Sopenharmony_ci		.num_blocks = 8,
968c2ecf20Sopenharmony_ci		.fn_u = { .ecb = serpent_ecb_dec_8way_avx }
978c2ecf20Sopenharmony_ci	}, {
988c2ecf20Sopenharmony_ci		.num_blocks = 1,
998c2ecf20Sopenharmony_ci		.fn_u = { .ecb = __serpent_decrypt }
1008c2ecf20Sopenharmony_ci	} }
1018c2ecf20Sopenharmony_ci};
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_cistatic const struct common_glue_ctx serpent_dec_cbc = {
1048c2ecf20Sopenharmony_ci	.num_funcs = 3,
1058c2ecf20Sopenharmony_ci	.fpu_blocks_limit = 8,
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	.funcs = { {
1088c2ecf20Sopenharmony_ci		.num_blocks = 16,
1098c2ecf20Sopenharmony_ci		.fn_u = { .cbc = serpent_cbc_dec_16way }
1108c2ecf20Sopenharmony_ci	}, {
1118c2ecf20Sopenharmony_ci		.num_blocks = 8,
1128c2ecf20Sopenharmony_ci		.fn_u = { .cbc = serpent_cbc_dec_8way_avx }
1138c2ecf20Sopenharmony_ci	}, {
1148c2ecf20Sopenharmony_ci		.num_blocks = 1,
1158c2ecf20Sopenharmony_ci		.fn_u = { .cbc = __serpent_decrypt }
1168c2ecf20Sopenharmony_ci	} }
1178c2ecf20Sopenharmony_ci};
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_cistatic const struct common_glue_ctx serpent_dec_xts = {
1208c2ecf20Sopenharmony_ci	.num_funcs = 3,
1218c2ecf20Sopenharmony_ci	.fpu_blocks_limit = 8,
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	.funcs = { {
1248c2ecf20Sopenharmony_ci		.num_blocks = 16,
1258c2ecf20Sopenharmony_ci		.fn_u = { .xts = serpent_xts_dec_16way }
1268c2ecf20Sopenharmony_ci	}, {
1278c2ecf20Sopenharmony_ci		.num_blocks = 8,
1288c2ecf20Sopenharmony_ci		.fn_u = { .xts = serpent_xts_dec_8way_avx }
1298c2ecf20Sopenharmony_ci	}, {
1308c2ecf20Sopenharmony_ci		.num_blocks = 1,
1318c2ecf20Sopenharmony_ci		.fn_u = { .xts = serpent_xts_dec }
1328c2ecf20Sopenharmony_ci	} }
1338c2ecf20Sopenharmony_ci};
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_cistatic int ecb_encrypt(struct skcipher_request *req)
1368c2ecf20Sopenharmony_ci{
1378c2ecf20Sopenharmony_ci	return glue_ecb_req_128bit(&serpent_enc, req);
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_cistatic int ecb_decrypt(struct skcipher_request *req)
1418c2ecf20Sopenharmony_ci{
1428c2ecf20Sopenharmony_ci	return glue_ecb_req_128bit(&serpent_dec, req);
1438c2ecf20Sopenharmony_ci}
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_cistatic int cbc_encrypt(struct skcipher_request *req)
1468c2ecf20Sopenharmony_ci{
1478c2ecf20Sopenharmony_ci	return glue_cbc_encrypt_req_128bit(__serpent_encrypt, req);
1488c2ecf20Sopenharmony_ci}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_cistatic int cbc_decrypt(struct skcipher_request *req)
1518c2ecf20Sopenharmony_ci{
1528c2ecf20Sopenharmony_ci	return glue_cbc_decrypt_req_128bit(&serpent_dec_cbc, req);
1538c2ecf20Sopenharmony_ci}
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_cistatic int ctr_crypt(struct skcipher_request *req)
1568c2ecf20Sopenharmony_ci{
1578c2ecf20Sopenharmony_ci	return glue_ctr_req_128bit(&serpent_ctr, req);
1588c2ecf20Sopenharmony_ci}
1598c2ecf20Sopenharmony_ci
1608c2ecf20Sopenharmony_cistatic int xts_encrypt(struct skcipher_request *req)
1618c2ecf20Sopenharmony_ci{
1628c2ecf20Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
1638c2ecf20Sopenharmony_ci	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	return glue_xts_req_128bit(&serpent_enc_xts, req,
1668c2ecf20Sopenharmony_ci				   __serpent_encrypt, &ctx->tweak_ctx,
1678c2ecf20Sopenharmony_ci				   &ctx->crypt_ctx, false);
1688c2ecf20Sopenharmony_ci}
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_cistatic int xts_decrypt(struct skcipher_request *req)
1718c2ecf20Sopenharmony_ci{
1728c2ecf20Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
1738c2ecf20Sopenharmony_ci	struct serpent_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	return glue_xts_req_128bit(&serpent_dec_xts, req,
1768c2ecf20Sopenharmony_ci				   __serpent_encrypt, &ctx->tweak_ctx,
1778c2ecf20Sopenharmony_ci				   &ctx->crypt_ctx, true);
1788c2ecf20Sopenharmony_ci}
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_cistatic struct skcipher_alg serpent_algs[] = {
1818c2ecf20Sopenharmony_ci	{
1828c2ecf20Sopenharmony_ci		.base.cra_name		= "__ecb(serpent)",
1838c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "__ecb-serpent-avx2",
1848c2ecf20Sopenharmony_ci		.base.cra_priority	= 600,
1858c2ecf20Sopenharmony_ci		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
1868c2ecf20Sopenharmony_ci		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
1878c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
1888c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
1898c2ecf20Sopenharmony_ci		.min_keysize		= SERPENT_MIN_KEY_SIZE,
1908c2ecf20Sopenharmony_ci		.max_keysize		= SERPENT_MAX_KEY_SIZE,
1918c2ecf20Sopenharmony_ci		.setkey			= serpent_setkey_skcipher,
1928c2ecf20Sopenharmony_ci		.encrypt		= ecb_encrypt,
1938c2ecf20Sopenharmony_ci		.decrypt		= ecb_decrypt,
1948c2ecf20Sopenharmony_ci	}, {
1958c2ecf20Sopenharmony_ci		.base.cra_name		= "__cbc(serpent)",
1968c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "__cbc-serpent-avx2",
1978c2ecf20Sopenharmony_ci		.base.cra_priority	= 600,
1988c2ecf20Sopenharmony_ci		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
1998c2ecf20Sopenharmony_ci		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
2008c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
2018c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2028c2ecf20Sopenharmony_ci		.min_keysize		= SERPENT_MIN_KEY_SIZE,
2038c2ecf20Sopenharmony_ci		.max_keysize		= SERPENT_MAX_KEY_SIZE,
2048c2ecf20Sopenharmony_ci		.ivsize			= SERPENT_BLOCK_SIZE,
2058c2ecf20Sopenharmony_ci		.setkey			= serpent_setkey_skcipher,
2068c2ecf20Sopenharmony_ci		.encrypt		= cbc_encrypt,
2078c2ecf20Sopenharmony_ci		.decrypt		= cbc_decrypt,
2088c2ecf20Sopenharmony_ci	}, {
2098c2ecf20Sopenharmony_ci		.base.cra_name		= "__ctr(serpent)",
2108c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "__ctr-serpent-avx2",
2118c2ecf20Sopenharmony_ci		.base.cra_priority	= 600,
2128c2ecf20Sopenharmony_ci		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
2138c2ecf20Sopenharmony_ci		.base.cra_blocksize	= 1,
2148c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct serpent_ctx),
2158c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2168c2ecf20Sopenharmony_ci		.min_keysize		= SERPENT_MIN_KEY_SIZE,
2178c2ecf20Sopenharmony_ci		.max_keysize		= SERPENT_MAX_KEY_SIZE,
2188c2ecf20Sopenharmony_ci		.ivsize			= SERPENT_BLOCK_SIZE,
2198c2ecf20Sopenharmony_ci		.chunksize		= SERPENT_BLOCK_SIZE,
2208c2ecf20Sopenharmony_ci		.setkey			= serpent_setkey_skcipher,
2218c2ecf20Sopenharmony_ci		.encrypt		= ctr_crypt,
2228c2ecf20Sopenharmony_ci		.decrypt		= ctr_crypt,
2238c2ecf20Sopenharmony_ci	}, {
2248c2ecf20Sopenharmony_ci		.base.cra_name		= "__xts(serpent)",
2258c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "__xts-serpent-avx2",
2268c2ecf20Sopenharmony_ci		.base.cra_priority	= 600,
2278c2ecf20Sopenharmony_ci		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
2288c2ecf20Sopenharmony_ci		.base.cra_blocksize	= SERPENT_BLOCK_SIZE,
2298c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct serpent_xts_ctx),
2308c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2318c2ecf20Sopenharmony_ci		.min_keysize		= 2 * SERPENT_MIN_KEY_SIZE,
2328c2ecf20Sopenharmony_ci		.max_keysize		= 2 * SERPENT_MAX_KEY_SIZE,
2338c2ecf20Sopenharmony_ci		.ivsize			= SERPENT_BLOCK_SIZE,
2348c2ecf20Sopenharmony_ci		.setkey			= xts_serpent_setkey,
2358c2ecf20Sopenharmony_ci		.encrypt		= xts_encrypt,
2368c2ecf20Sopenharmony_ci		.decrypt		= xts_decrypt,
2378c2ecf20Sopenharmony_ci	},
2388c2ecf20Sopenharmony_ci};
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_cistatic struct simd_skcipher_alg *serpent_simd_algs[ARRAY_SIZE(serpent_algs)];
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_cistatic int __init init(void)
2438c2ecf20Sopenharmony_ci{
2448c2ecf20Sopenharmony_ci	const char *feature_name;
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_ci	if (!boot_cpu_has(X86_FEATURE_AVX2) || !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
2478c2ecf20Sopenharmony_ci		pr_info("AVX2 instructions are not detected.\n");
2488c2ecf20Sopenharmony_ci		return -ENODEV;
2498c2ecf20Sopenharmony_ci	}
2508c2ecf20Sopenharmony_ci	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
2518c2ecf20Sopenharmony_ci				&feature_name)) {
2528c2ecf20Sopenharmony_ci		pr_info("CPU feature '%s' is not supported.\n", feature_name);
2538c2ecf20Sopenharmony_ci		return -ENODEV;
2548c2ecf20Sopenharmony_ci	}
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	return simd_register_skciphers_compat(serpent_algs,
2578c2ecf20Sopenharmony_ci					      ARRAY_SIZE(serpent_algs),
2588c2ecf20Sopenharmony_ci					      serpent_simd_algs);
2598c2ecf20Sopenharmony_ci}
2608c2ecf20Sopenharmony_ci
2618c2ecf20Sopenharmony_cistatic void __exit fini(void)
2628c2ecf20Sopenharmony_ci{
2638c2ecf20Sopenharmony_ci	simd_unregister_skciphers(serpent_algs, ARRAY_SIZE(serpent_algs),
2648c2ecf20Sopenharmony_ci				  serpent_simd_algs);
2658c2ecf20Sopenharmony_ci}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_cimodule_init(init);
2688c2ecf20Sopenharmony_cimodule_exit(fini);
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
2718c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");
2728c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("serpent");
2738c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("serpent-asm");
274