18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Glue Code for x86_64/AVX2/AES-NI assembler optimized version of Camellia
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
68c2ecf20Sopenharmony_ci */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include <asm/crypto/camellia.h>
98c2ecf20Sopenharmony_ci#include <asm/crypto/glue_helper.h>
108c2ecf20Sopenharmony_ci#include <crypto/algapi.h>
118c2ecf20Sopenharmony_ci#include <crypto/internal/simd.h>
128c2ecf20Sopenharmony_ci#include <crypto/xts.h>
138c2ecf20Sopenharmony_ci#include <linux/crypto.h>
148c2ecf20Sopenharmony_ci#include <linux/err.h>
158c2ecf20Sopenharmony_ci#include <linux/module.h>
168c2ecf20Sopenharmony_ci#include <linux/types.h>
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci#define CAMELLIA_AESNI_PARALLEL_BLOCKS 16
198c2ecf20Sopenharmony_ci#define CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS 32
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci/* 32-way AVX2/AES-NI parallel cipher functions */
228c2ecf20Sopenharmony_ciasmlinkage void camellia_ecb_enc_32way(const void *ctx, u8 *dst, const u8 *src);
238c2ecf20Sopenharmony_ciasmlinkage void camellia_ecb_dec_32way(const void *ctx, u8 *dst, const u8 *src);
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ciasmlinkage void camellia_cbc_dec_32way(const void *ctx, u8 *dst, const u8 *src);
268c2ecf20Sopenharmony_ciasmlinkage void camellia_ctr_32way(const void *ctx, u8 *dst, const u8 *src,
278c2ecf20Sopenharmony_ci				   le128 *iv);
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ciasmlinkage void camellia_xts_enc_32way(const void *ctx, u8 *dst, const u8 *src,
308c2ecf20Sopenharmony_ci				       le128 *iv);
318c2ecf20Sopenharmony_ciasmlinkage void camellia_xts_dec_32way(const void *ctx, u8 *dst, const u8 *src,
328c2ecf20Sopenharmony_ci				       le128 *iv);
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_cistatic const struct common_glue_ctx camellia_enc = {
358c2ecf20Sopenharmony_ci	.num_funcs = 4,
368c2ecf20Sopenharmony_ci	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci	.funcs = { {
398c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
408c2ecf20Sopenharmony_ci		.fn_u = { .ecb = camellia_ecb_enc_32way }
418c2ecf20Sopenharmony_ci	}, {
428c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
438c2ecf20Sopenharmony_ci		.fn_u = { .ecb = camellia_ecb_enc_16way }
448c2ecf20Sopenharmony_ci	}, {
458c2ecf20Sopenharmony_ci		.num_blocks = 2,
468c2ecf20Sopenharmony_ci		.fn_u = { .ecb = camellia_enc_blk_2way }
478c2ecf20Sopenharmony_ci	}, {
488c2ecf20Sopenharmony_ci		.num_blocks = 1,
498c2ecf20Sopenharmony_ci		.fn_u = { .ecb = camellia_enc_blk }
508c2ecf20Sopenharmony_ci	} }
518c2ecf20Sopenharmony_ci};
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_cistatic const struct common_glue_ctx camellia_ctr = {
548c2ecf20Sopenharmony_ci	.num_funcs = 4,
558c2ecf20Sopenharmony_ci	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	.funcs = { {
588c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
598c2ecf20Sopenharmony_ci		.fn_u = { .ctr = camellia_ctr_32way }
608c2ecf20Sopenharmony_ci	}, {
618c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
628c2ecf20Sopenharmony_ci		.fn_u = { .ctr = camellia_ctr_16way }
638c2ecf20Sopenharmony_ci	}, {
648c2ecf20Sopenharmony_ci		.num_blocks = 2,
658c2ecf20Sopenharmony_ci		.fn_u = { .ctr = camellia_crypt_ctr_2way }
668c2ecf20Sopenharmony_ci	}, {
678c2ecf20Sopenharmony_ci		.num_blocks = 1,
688c2ecf20Sopenharmony_ci		.fn_u = { .ctr = camellia_crypt_ctr }
698c2ecf20Sopenharmony_ci	} }
708c2ecf20Sopenharmony_ci};
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_cistatic const struct common_glue_ctx camellia_enc_xts = {
738c2ecf20Sopenharmony_ci	.num_funcs = 3,
748c2ecf20Sopenharmony_ci	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	.funcs = { {
778c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
788c2ecf20Sopenharmony_ci		.fn_u = { .xts = camellia_xts_enc_32way }
798c2ecf20Sopenharmony_ci	}, {
808c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
818c2ecf20Sopenharmony_ci		.fn_u = { .xts = camellia_xts_enc_16way }
828c2ecf20Sopenharmony_ci	}, {
838c2ecf20Sopenharmony_ci		.num_blocks = 1,
848c2ecf20Sopenharmony_ci		.fn_u = { .xts = camellia_xts_enc }
858c2ecf20Sopenharmony_ci	} }
868c2ecf20Sopenharmony_ci};
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_cistatic const struct common_glue_ctx camellia_dec = {
898c2ecf20Sopenharmony_ci	.num_funcs = 4,
908c2ecf20Sopenharmony_ci	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	.funcs = { {
938c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
948c2ecf20Sopenharmony_ci		.fn_u = { .ecb = camellia_ecb_dec_32way }
958c2ecf20Sopenharmony_ci	}, {
968c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
978c2ecf20Sopenharmony_ci		.fn_u = { .ecb = camellia_ecb_dec_16way }
988c2ecf20Sopenharmony_ci	}, {
998c2ecf20Sopenharmony_ci		.num_blocks = 2,
1008c2ecf20Sopenharmony_ci		.fn_u = { .ecb = camellia_dec_blk_2way }
1018c2ecf20Sopenharmony_ci	}, {
1028c2ecf20Sopenharmony_ci		.num_blocks = 1,
1038c2ecf20Sopenharmony_ci		.fn_u = { .ecb = camellia_dec_blk }
1048c2ecf20Sopenharmony_ci	} }
1058c2ecf20Sopenharmony_ci};
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_cistatic const struct common_glue_ctx camellia_dec_cbc = {
1088c2ecf20Sopenharmony_ci	.num_funcs = 4,
1098c2ecf20Sopenharmony_ci	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	.funcs = { {
1128c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
1138c2ecf20Sopenharmony_ci		.fn_u = { .cbc = camellia_cbc_dec_32way }
1148c2ecf20Sopenharmony_ci	}, {
1158c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
1168c2ecf20Sopenharmony_ci		.fn_u = { .cbc = camellia_cbc_dec_16way }
1178c2ecf20Sopenharmony_ci	}, {
1188c2ecf20Sopenharmony_ci		.num_blocks = 2,
1198c2ecf20Sopenharmony_ci		.fn_u = { .cbc = camellia_decrypt_cbc_2way }
1208c2ecf20Sopenharmony_ci	}, {
1218c2ecf20Sopenharmony_ci		.num_blocks = 1,
1228c2ecf20Sopenharmony_ci		.fn_u = { .cbc = camellia_dec_blk }
1238c2ecf20Sopenharmony_ci	} }
1248c2ecf20Sopenharmony_ci};
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_cistatic const struct common_glue_ctx camellia_dec_xts = {
1278c2ecf20Sopenharmony_ci	.num_funcs = 3,
1288c2ecf20Sopenharmony_ci	.fpu_blocks_limit = CAMELLIA_AESNI_PARALLEL_BLOCKS,
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	.funcs = { {
1318c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_AVX2_PARALLEL_BLOCKS,
1328c2ecf20Sopenharmony_ci		.fn_u = { .xts = camellia_xts_dec_32way }
1338c2ecf20Sopenharmony_ci	}, {
1348c2ecf20Sopenharmony_ci		.num_blocks = CAMELLIA_AESNI_PARALLEL_BLOCKS,
1358c2ecf20Sopenharmony_ci		.fn_u = { .xts = camellia_xts_dec_16way }
1368c2ecf20Sopenharmony_ci	}, {
1378c2ecf20Sopenharmony_ci		.num_blocks = 1,
1388c2ecf20Sopenharmony_ci		.fn_u = { .xts = camellia_xts_dec }
1398c2ecf20Sopenharmony_ci	} }
1408c2ecf20Sopenharmony_ci};
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_cistatic int camellia_setkey(struct crypto_skcipher *tfm, const u8 *key,
1438c2ecf20Sopenharmony_ci			   unsigned int keylen)
1448c2ecf20Sopenharmony_ci{
1458c2ecf20Sopenharmony_ci	return __camellia_setkey(crypto_skcipher_ctx(tfm), key, keylen);
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_cistatic int ecb_encrypt(struct skcipher_request *req)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	return glue_ecb_req_128bit(&camellia_enc, req);
1518c2ecf20Sopenharmony_ci}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_cistatic int ecb_decrypt(struct skcipher_request *req)
1548c2ecf20Sopenharmony_ci{
1558c2ecf20Sopenharmony_ci	return glue_ecb_req_128bit(&camellia_dec, req);
1568c2ecf20Sopenharmony_ci}
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_cistatic int cbc_encrypt(struct skcipher_request *req)
1598c2ecf20Sopenharmony_ci{
1608c2ecf20Sopenharmony_ci	return glue_cbc_encrypt_req_128bit(camellia_enc_blk, req);
1618c2ecf20Sopenharmony_ci}
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_cistatic int cbc_decrypt(struct skcipher_request *req)
1648c2ecf20Sopenharmony_ci{
1658c2ecf20Sopenharmony_ci	return glue_cbc_decrypt_req_128bit(&camellia_dec_cbc, req);
1668c2ecf20Sopenharmony_ci}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_cistatic int ctr_crypt(struct skcipher_request *req)
1698c2ecf20Sopenharmony_ci{
1708c2ecf20Sopenharmony_ci	return glue_ctr_req_128bit(&camellia_ctr, req);
1718c2ecf20Sopenharmony_ci}
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_cistatic int xts_encrypt(struct skcipher_request *req)
1748c2ecf20Sopenharmony_ci{
1758c2ecf20Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
1768c2ecf20Sopenharmony_ci	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci	return glue_xts_req_128bit(&camellia_enc_xts, req, camellia_enc_blk,
1798c2ecf20Sopenharmony_ci				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
1808c2ecf20Sopenharmony_ci}
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_cistatic int xts_decrypt(struct skcipher_request *req)
1838c2ecf20Sopenharmony_ci{
1848c2ecf20Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
1858c2ecf20Sopenharmony_ci	struct camellia_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	return glue_xts_req_128bit(&camellia_dec_xts, req, camellia_enc_blk,
1888c2ecf20Sopenharmony_ci				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
1898c2ecf20Sopenharmony_ci}
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_cistatic struct skcipher_alg camellia_algs[] = {
1928c2ecf20Sopenharmony_ci	{
1938c2ecf20Sopenharmony_ci		.base.cra_name		= "__ecb(camellia)",
1948c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "__ecb-camellia-aesni-avx2",
1958c2ecf20Sopenharmony_ci		.base.cra_priority	= 500,
1968c2ecf20Sopenharmony_ci		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
1978c2ecf20Sopenharmony_ci		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
1988c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
1998c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2008c2ecf20Sopenharmony_ci		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
2018c2ecf20Sopenharmony_ci		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
2028c2ecf20Sopenharmony_ci		.setkey			= camellia_setkey,
2038c2ecf20Sopenharmony_ci		.encrypt		= ecb_encrypt,
2048c2ecf20Sopenharmony_ci		.decrypt		= ecb_decrypt,
2058c2ecf20Sopenharmony_ci	}, {
2068c2ecf20Sopenharmony_ci		.base.cra_name		= "__cbc(camellia)",
2078c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "__cbc-camellia-aesni-avx2",
2088c2ecf20Sopenharmony_ci		.base.cra_priority	= 500,
2098c2ecf20Sopenharmony_ci		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
2108c2ecf20Sopenharmony_ci		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
2118c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
2128c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2138c2ecf20Sopenharmony_ci		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
2148c2ecf20Sopenharmony_ci		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
2158c2ecf20Sopenharmony_ci		.ivsize			= CAMELLIA_BLOCK_SIZE,
2168c2ecf20Sopenharmony_ci		.setkey			= camellia_setkey,
2178c2ecf20Sopenharmony_ci		.encrypt		= cbc_encrypt,
2188c2ecf20Sopenharmony_ci		.decrypt		= cbc_decrypt,
2198c2ecf20Sopenharmony_ci	}, {
2208c2ecf20Sopenharmony_ci		.base.cra_name		= "__ctr(camellia)",
2218c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "__ctr-camellia-aesni-avx2",
2228c2ecf20Sopenharmony_ci		.base.cra_priority	= 500,
2238c2ecf20Sopenharmony_ci		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
2248c2ecf20Sopenharmony_ci		.base.cra_blocksize	= 1,
2258c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct camellia_ctx),
2268c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2278c2ecf20Sopenharmony_ci		.min_keysize		= CAMELLIA_MIN_KEY_SIZE,
2288c2ecf20Sopenharmony_ci		.max_keysize		= CAMELLIA_MAX_KEY_SIZE,
2298c2ecf20Sopenharmony_ci		.ivsize			= CAMELLIA_BLOCK_SIZE,
2308c2ecf20Sopenharmony_ci		.chunksize		= CAMELLIA_BLOCK_SIZE,
2318c2ecf20Sopenharmony_ci		.setkey			= camellia_setkey,
2328c2ecf20Sopenharmony_ci		.encrypt		= ctr_crypt,
2338c2ecf20Sopenharmony_ci		.decrypt		= ctr_crypt,
2348c2ecf20Sopenharmony_ci	}, {
2358c2ecf20Sopenharmony_ci		.base.cra_name		= "__xts(camellia)",
2368c2ecf20Sopenharmony_ci		.base.cra_driver_name	= "__xts-camellia-aesni-avx2",
2378c2ecf20Sopenharmony_ci		.base.cra_priority	= 500,
2388c2ecf20Sopenharmony_ci		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
2398c2ecf20Sopenharmony_ci		.base.cra_blocksize	= CAMELLIA_BLOCK_SIZE,
2408c2ecf20Sopenharmony_ci		.base.cra_ctxsize	= sizeof(struct camellia_xts_ctx),
2418c2ecf20Sopenharmony_ci		.base.cra_module	= THIS_MODULE,
2428c2ecf20Sopenharmony_ci		.min_keysize		= 2 * CAMELLIA_MIN_KEY_SIZE,
2438c2ecf20Sopenharmony_ci		.max_keysize		= 2 * CAMELLIA_MAX_KEY_SIZE,
2448c2ecf20Sopenharmony_ci		.ivsize			= CAMELLIA_BLOCK_SIZE,
2458c2ecf20Sopenharmony_ci		.setkey			= xts_camellia_setkey,
2468c2ecf20Sopenharmony_ci		.encrypt		= xts_encrypt,
2478c2ecf20Sopenharmony_ci		.decrypt		= xts_decrypt,
2488c2ecf20Sopenharmony_ci	},
2498c2ecf20Sopenharmony_ci};
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_cistatic struct simd_skcipher_alg *camellia_simd_algs[ARRAY_SIZE(camellia_algs)];
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_cistatic int __init camellia_aesni_init(void)
2548c2ecf20Sopenharmony_ci{
2558c2ecf20Sopenharmony_ci	const char *feature_name;
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	if (!boot_cpu_has(X86_FEATURE_AVX) ||
2588c2ecf20Sopenharmony_ci	    !boot_cpu_has(X86_FEATURE_AVX2) ||
2598c2ecf20Sopenharmony_ci	    !boot_cpu_has(X86_FEATURE_AES) ||
2608c2ecf20Sopenharmony_ci	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
2618c2ecf20Sopenharmony_ci		pr_info("AVX2 or AES-NI instructions are not detected.\n");
2628c2ecf20Sopenharmony_ci		return -ENODEV;
2638c2ecf20Sopenharmony_ci	}
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ci	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
2668c2ecf20Sopenharmony_ci				&feature_name)) {
2678c2ecf20Sopenharmony_ci		pr_info("CPU feature '%s' is not supported.\n", feature_name);
2688c2ecf20Sopenharmony_ci		return -ENODEV;
2698c2ecf20Sopenharmony_ci	}
2708c2ecf20Sopenharmony_ci
2718c2ecf20Sopenharmony_ci	return simd_register_skciphers_compat(camellia_algs,
2728c2ecf20Sopenharmony_ci					      ARRAY_SIZE(camellia_algs),
2738c2ecf20Sopenharmony_ci					      camellia_simd_algs);
2748c2ecf20Sopenharmony_ci}
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_cistatic void __exit camellia_aesni_fini(void)
2778c2ecf20Sopenharmony_ci{
2788c2ecf20Sopenharmony_ci	simd_unregister_skciphers(camellia_algs, ARRAY_SIZE(camellia_algs),
2798c2ecf20Sopenharmony_ci				  camellia_simd_algs);
2808c2ecf20Sopenharmony_ci}
2818c2ecf20Sopenharmony_ci
2828c2ecf20Sopenharmony_cimodule_init(camellia_aesni_init);
2838c2ecf20Sopenharmony_cimodule_exit(camellia_aesni_fini);
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
2868c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized");
2878c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("camellia");
2888c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("camellia-asm");
289