162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-or-later */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * SM4 Cipher Algorithm, AES-NI/AVX optimized.
462306a36Sopenharmony_ci * as specified in
562306a36Sopenharmony_ci * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Copyright (c) 2021, Alibaba Group.
862306a36Sopenharmony_ci * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
962306a36Sopenharmony_ci */
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include <linux/module.h>
1262306a36Sopenharmony_ci#include <linux/crypto.h>
1362306a36Sopenharmony_ci#include <linux/kernel.h>
1462306a36Sopenharmony_ci#include <asm/simd.h>
1562306a36Sopenharmony_ci#include <crypto/internal/simd.h>
1662306a36Sopenharmony_ci#include <crypto/internal/skcipher.h>
1762306a36Sopenharmony_ci#include <crypto/sm4.h>
1862306a36Sopenharmony_ci#include "sm4-avx.h"
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci#define SM4_CRYPT8_BLOCK_SIZE	(SM4_BLOCK_SIZE * 8)
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ciasmlinkage void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
2362306a36Sopenharmony_ci				const u8 *src, int nblocks);
2462306a36Sopenharmony_ciasmlinkage void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
2562306a36Sopenharmony_ci				const u8 *src, int nblocks);
2662306a36Sopenharmony_ciasmlinkage void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
2762306a36Sopenharmony_ci				const u8 *src, u8 *iv);
2862306a36Sopenharmony_ciasmlinkage void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
2962306a36Sopenharmony_ci				const u8 *src, u8 *iv);
3062306a36Sopenharmony_ciasmlinkage void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
3162306a36Sopenharmony_ci				const u8 *src, u8 *iv);
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_cistatic int sm4_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
3462306a36Sopenharmony_ci			unsigned int key_len)
3562306a36Sopenharmony_ci{
3662306a36Sopenharmony_ci	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	return sm4_expandkey(ctx, key, key_len);
3962306a36Sopenharmony_ci}
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_cistatic int ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
4262306a36Sopenharmony_ci{
4362306a36Sopenharmony_ci	struct skcipher_walk walk;
4462306a36Sopenharmony_ci	unsigned int nbytes;
4562306a36Sopenharmony_ci	int err;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci	while ((nbytes = walk.nbytes) > 0) {
5062306a36Sopenharmony_ci		const u8 *src = walk.src.virt.addr;
5162306a36Sopenharmony_ci		u8 *dst = walk.dst.virt.addr;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci		kernel_fpu_begin();
5462306a36Sopenharmony_ci		while (nbytes >= SM4_CRYPT8_BLOCK_SIZE) {
5562306a36Sopenharmony_ci			sm4_aesni_avx_crypt8(rkey, dst, src, 8);
5662306a36Sopenharmony_ci			dst += SM4_CRYPT8_BLOCK_SIZE;
5762306a36Sopenharmony_ci			src += SM4_CRYPT8_BLOCK_SIZE;
5862306a36Sopenharmony_ci			nbytes -= SM4_CRYPT8_BLOCK_SIZE;
5962306a36Sopenharmony_ci		}
6062306a36Sopenharmony_ci		while (nbytes >= SM4_BLOCK_SIZE) {
6162306a36Sopenharmony_ci			unsigned int nblocks = min(nbytes >> 4, 4u);
6262306a36Sopenharmony_ci			sm4_aesni_avx_crypt4(rkey, dst, src, nblocks);
6362306a36Sopenharmony_ci			dst += nblocks * SM4_BLOCK_SIZE;
6462306a36Sopenharmony_ci			src += nblocks * SM4_BLOCK_SIZE;
6562306a36Sopenharmony_ci			nbytes -= nblocks * SM4_BLOCK_SIZE;
6662306a36Sopenharmony_ci		}
6762306a36Sopenharmony_ci		kernel_fpu_end();
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci		err = skcipher_walk_done(&walk, nbytes);
7062306a36Sopenharmony_ci	}
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	return err;
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ciint sm4_avx_ecb_encrypt(struct skcipher_request *req)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
7862306a36Sopenharmony_ci	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	return ecb_do_crypt(req, ctx->rkey_enc);
8162306a36Sopenharmony_ci}
8262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sm4_avx_ecb_encrypt);
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ciint sm4_avx_ecb_decrypt(struct skcipher_request *req)
8562306a36Sopenharmony_ci{
8662306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
8762306a36Sopenharmony_ci	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ci	return ecb_do_crypt(req, ctx->rkey_dec);
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sm4_avx_ecb_decrypt);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ciint sm4_cbc_encrypt(struct skcipher_request *req)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
9662306a36Sopenharmony_ci	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
9762306a36Sopenharmony_ci	struct skcipher_walk walk;
9862306a36Sopenharmony_ci	unsigned int nbytes;
9962306a36Sopenharmony_ci	int err;
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	while ((nbytes = walk.nbytes) > 0) {
10462306a36Sopenharmony_ci		const u8 *iv = walk.iv;
10562306a36Sopenharmony_ci		const u8 *src = walk.src.virt.addr;
10662306a36Sopenharmony_ci		u8 *dst = walk.dst.virt.addr;
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci		while (nbytes >= SM4_BLOCK_SIZE) {
10962306a36Sopenharmony_ci			crypto_xor_cpy(dst, src, iv, SM4_BLOCK_SIZE);
11062306a36Sopenharmony_ci			sm4_crypt_block(ctx->rkey_enc, dst, dst);
11162306a36Sopenharmony_ci			iv = dst;
11262306a36Sopenharmony_ci			src += SM4_BLOCK_SIZE;
11362306a36Sopenharmony_ci			dst += SM4_BLOCK_SIZE;
11462306a36Sopenharmony_ci			nbytes -= SM4_BLOCK_SIZE;
11562306a36Sopenharmony_ci		}
11662306a36Sopenharmony_ci		if (iv != walk.iv)
11762306a36Sopenharmony_ci			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci		err = skcipher_walk_done(&walk, nbytes);
12062306a36Sopenharmony_ci	}
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	return err;
12362306a36Sopenharmony_ci}
12462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sm4_cbc_encrypt);
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ciint sm4_avx_cbc_decrypt(struct skcipher_request *req,
12762306a36Sopenharmony_ci			unsigned int bsize, sm4_crypt_func func)
12862306a36Sopenharmony_ci{
12962306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
13062306a36Sopenharmony_ci	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
13162306a36Sopenharmony_ci	struct skcipher_walk walk;
13262306a36Sopenharmony_ci	unsigned int nbytes;
13362306a36Sopenharmony_ci	int err;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	while ((nbytes = walk.nbytes) > 0) {
13862306a36Sopenharmony_ci		const u8 *src = walk.src.virt.addr;
13962306a36Sopenharmony_ci		u8 *dst = walk.dst.virt.addr;
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci		kernel_fpu_begin();
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci		while (nbytes >= bsize) {
14462306a36Sopenharmony_ci			func(ctx->rkey_dec, dst, src, walk.iv);
14562306a36Sopenharmony_ci			dst += bsize;
14662306a36Sopenharmony_ci			src += bsize;
14762306a36Sopenharmony_ci			nbytes -= bsize;
14862306a36Sopenharmony_ci		}
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci		while (nbytes >= SM4_BLOCK_SIZE) {
15162306a36Sopenharmony_ci			u8 keystream[SM4_BLOCK_SIZE * 8];
15262306a36Sopenharmony_ci			u8 iv[SM4_BLOCK_SIZE];
15362306a36Sopenharmony_ci			unsigned int nblocks = min(nbytes >> 4, 8u);
15462306a36Sopenharmony_ci			int i;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci			sm4_aesni_avx_crypt8(ctx->rkey_dec, keystream,
15762306a36Sopenharmony_ci						src, nblocks);
15862306a36Sopenharmony_ci
15962306a36Sopenharmony_ci			src += ((int)nblocks - 2) * SM4_BLOCK_SIZE;
16062306a36Sopenharmony_ci			dst += (nblocks - 1) * SM4_BLOCK_SIZE;
16162306a36Sopenharmony_ci			memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci			for (i = nblocks - 1; i > 0; i--) {
16462306a36Sopenharmony_ci				crypto_xor_cpy(dst, src,
16562306a36Sopenharmony_ci					&keystream[i * SM4_BLOCK_SIZE],
16662306a36Sopenharmony_ci					SM4_BLOCK_SIZE);
16762306a36Sopenharmony_ci				src -= SM4_BLOCK_SIZE;
16862306a36Sopenharmony_ci				dst -= SM4_BLOCK_SIZE;
16962306a36Sopenharmony_ci			}
17062306a36Sopenharmony_ci			crypto_xor_cpy(dst, walk.iv, keystream, SM4_BLOCK_SIZE);
17162306a36Sopenharmony_ci			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
17262306a36Sopenharmony_ci			dst += nblocks * SM4_BLOCK_SIZE;
17362306a36Sopenharmony_ci			src += (nblocks + 1) * SM4_BLOCK_SIZE;
17462306a36Sopenharmony_ci			nbytes -= nblocks * SM4_BLOCK_SIZE;
17562306a36Sopenharmony_ci		}
17662306a36Sopenharmony_ci
17762306a36Sopenharmony_ci		kernel_fpu_end();
17862306a36Sopenharmony_ci		err = skcipher_walk_done(&walk, nbytes);
17962306a36Sopenharmony_ci	}
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	return err;
18262306a36Sopenharmony_ci}
18362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sm4_avx_cbc_decrypt);
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_cistatic int cbc_decrypt(struct skcipher_request *req)
18662306a36Sopenharmony_ci{
18762306a36Sopenharmony_ci	return sm4_avx_cbc_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
18862306a36Sopenharmony_ci				sm4_aesni_avx_cbc_dec_blk8);
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_ciint sm4_cfb_encrypt(struct skcipher_request *req)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
19462306a36Sopenharmony_ci	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
19562306a36Sopenharmony_ci	struct skcipher_walk walk;
19662306a36Sopenharmony_ci	unsigned int nbytes;
19762306a36Sopenharmony_ci	int err;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	while ((nbytes = walk.nbytes) > 0) {
20262306a36Sopenharmony_ci		u8 keystream[SM4_BLOCK_SIZE];
20362306a36Sopenharmony_ci		const u8 *iv = walk.iv;
20462306a36Sopenharmony_ci		const u8 *src = walk.src.virt.addr;
20562306a36Sopenharmony_ci		u8 *dst = walk.dst.virt.addr;
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci		while (nbytes >= SM4_BLOCK_SIZE) {
20862306a36Sopenharmony_ci			sm4_crypt_block(ctx->rkey_enc, keystream, iv);
20962306a36Sopenharmony_ci			crypto_xor_cpy(dst, src, keystream, SM4_BLOCK_SIZE);
21062306a36Sopenharmony_ci			iv = dst;
21162306a36Sopenharmony_ci			src += SM4_BLOCK_SIZE;
21262306a36Sopenharmony_ci			dst += SM4_BLOCK_SIZE;
21362306a36Sopenharmony_ci			nbytes -= SM4_BLOCK_SIZE;
21462306a36Sopenharmony_ci		}
21562306a36Sopenharmony_ci		if (iv != walk.iv)
21662306a36Sopenharmony_ci			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci		/* tail */
21962306a36Sopenharmony_ci		if (walk.nbytes == walk.total && nbytes > 0) {
22062306a36Sopenharmony_ci			sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
22162306a36Sopenharmony_ci			crypto_xor_cpy(dst, src, keystream, nbytes);
22262306a36Sopenharmony_ci			nbytes = 0;
22362306a36Sopenharmony_ci		}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci		err = skcipher_walk_done(&walk, nbytes);
22662306a36Sopenharmony_ci	}
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	return err;
22962306a36Sopenharmony_ci}
23062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sm4_cfb_encrypt);
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ciint sm4_avx_cfb_decrypt(struct skcipher_request *req,
23362306a36Sopenharmony_ci			unsigned int bsize, sm4_crypt_func func)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
23662306a36Sopenharmony_ci	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
23762306a36Sopenharmony_ci	struct skcipher_walk walk;
23862306a36Sopenharmony_ci	unsigned int nbytes;
23962306a36Sopenharmony_ci	int err;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	while ((nbytes = walk.nbytes) > 0) {
24462306a36Sopenharmony_ci		const u8 *src = walk.src.virt.addr;
24562306a36Sopenharmony_ci		u8 *dst = walk.dst.virt.addr;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci		kernel_fpu_begin();
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci		while (nbytes >= bsize) {
25062306a36Sopenharmony_ci			func(ctx->rkey_enc, dst, src, walk.iv);
25162306a36Sopenharmony_ci			dst += bsize;
25262306a36Sopenharmony_ci			src += bsize;
25362306a36Sopenharmony_ci			nbytes -= bsize;
25462306a36Sopenharmony_ci		}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci		while (nbytes >= SM4_BLOCK_SIZE) {
25762306a36Sopenharmony_ci			u8 keystream[SM4_BLOCK_SIZE * 8];
25862306a36Sopenharmony_ci			unsigned int nblocks = min(nbytes >> 4, 8u);
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci			memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
26162306a36Sopenharmony_ci			if (nblocks > 1)
26262306a36Sopenharmony_ci				memcpy(&keystream[SM4_BLOCK_SIZE], src,
26362306a36Sopenharmony_ci					(nblocks - 1) * SM4_BLOCK_SIZE);
26462306a36Sopenharmony_ci			memcpy(walk.iv, src + (nblocks - 1) * SM4_BLOCK_SIZE,
26562306a36Sopenharmony_ci				SM4_BLOCK_SIZE);
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci			sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
26862306a36Sopenharmony_ci						keystream, nblocks);
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_ci			crypto_xor_cpy(dst, src, keystream,
27162306a36Sopenharmony_ci					nblocks * SM4_BLOCK_SIZE);
27262306a36Sopenharmony_ci			dst += nblocks * SM4_BLOCK_SIZE;
27362306a36Sopenharmony_ci			src += nblocks * SM4_BLOCK_SIZE;
27462306a36Sopenharmony_ci			nbytes -= nblocks * SM4_BLOCK_SIZE;
27562306a36Sopenharmony_ci		}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci		kernel_fpu_end();
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci		/* tail */
28062306a36Sopenharmony_ci		if (walk.nbytes == walk.total && nbytes > 0) {
28162306a36Sopenharmony_ci			u8 keystream[SM4_BLOCK_SIZE];
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci			sm4_crypt_block(ctx->rkey_enc, keystream, walk.iv);
28462306a36Sopenharmony_ci			crypto_xor_cpy(dst, src, keystream, nbytes);
28562306a36Sopenharmony_ci			nbytes = 0;
28662306a36Sopenharmony_ci		}
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci		err = skcipher_walk_done(&walk, nbytes);
28962306a36Sopenharmony_ci	}
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	return err;
29262306a36Sopenharmony_ci}
29362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sm4_avx_cfb_decrypt);
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_cistatic int cfb_decrypt(struct skcipher_request *req)
29662306a36Sopenharmony_ci{
29762306a36Sopenharmony_ci	return sm4_avx_cfb_decrypt(req, SM4_CRYPT8_BLOCK_SIZE,
29862306a36Sopenharmony_ci				sm4_aesni_avx_cfb_dec_blk8);
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ciint sm4_avx_ctr_crypt(struct skcipher_request *req,
30262306a36Sopenharmony_ci			unsigned int bsize, sm4_crypt_func func)
30362306a36Sopenharmony_ci{
30462306a36Sopenharmony_ci	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
30562306a36Sopenharmony_ci	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
30662306a36Sopenharmony_ci	struct skcipher_walk walk;
30762306a36Sopenharmony_ci	unsigned int nbytes;
30862306a36Sopenharmony_ci	int err;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	err = skcipher_walk_virt(&walk, req, false);
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	while ((nbytes = walk.nbytes) > 0) {
31362306a36Sopenharmony_ci		const u8 *src = walk.src.virt.addr;
31462306a36Sopenharmony_ci		u8 *dst = walk.dst.virt.addr;
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci		kernel_fpu_begin();
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci		while (nbytes >= bsize) {
31962306a36Sopenharmony_ci			func(ctx->rkey_enc, dst, src, walk.iv);
32062306a36Sopenharmony_ci			dst += bsize;
32162306a36Sopenharmony_ci			src += bsize;
32262306a36Sopenharmony_ci			nbytes -= bsize;
32362306a36Sopenharmony_ci		}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci		while (nbytes >= SM4_BLOCK_SIZE) {
32662306a36Sopenharmony_ci			u8 keystream[SM4_BLOCK_SIZE * 8];
32762306a36Sopenharmony_ci			unsigned int nblocks = min(nbytes >> 4, 8u);
32862306a36Sopenharmony_ci			int i;
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci			for (i = 0; i < nblocks; i++) {
33162306a36Sopenharmony_ci				memcpy(&keystream[i * SM4_BLOCK_SIZE],
33262306a36Sopenharmony_ci					walk.iv, SM4_BLOCK_SIZE);
33362306a36Sopenharmony_ci				crypto_inc(walk.iv, SM4_BLOCK_SIZE);
33462306a36Sopenharmony_ci			}
33562306a36Sopenharmony_ci			sm4_aesni_avx_crypt8(ctx->rkey_enc, keystream,
33662306a36Sopenharmony_ci					keystream, nblocks);
33762306a36Sopenharmony_ci
33862306a36Sopenharmony_ci			crypto_xor_cpy(dst, src, keystream,
33962306a36Sopenharmony_ci					nblocks * SM4_BLOCK_SIZE);
34062306a36Sopenharmony_ci			dst += nblocks * SM4_BLOCK_SIZE;
34162306a36Sopenharmony_ci			src += nblocks * SM4_BLOCK_SIZE;
34262306a36Sopenharmony_ci			nbytes -= nblocks * SM4_BLOCK_SIZE;
34362306a36Sopenharmony_ci		}
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_ci		kernel_fpu_end();
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci		/* tail */
34862306a36Sopenharmony_ci		if (walk.nbytes == walk.total && nbytes > 0) {
34962306a36Sopenharmony_ci			u8 keystream[SM4_BLOCK_SIZE];
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci			memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
35262306a36Sopenharmony_ci			crypto_inc(walk.iv, SM4_BLOCK_SIZE);
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci			sm4_crypt_block(ctx->rkey_enc, keystream, keystream);
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci			crypto_xor_cpy(dst, src, keystream, nbytes);
35762306a36Sopenharmony_ci			dst += nbytes;
35862306a36Sopenharmony_ci			src += nbytes;
35962306a36Sopenharmony_ci			nbytes = 0;
36062306a36Sopenharmony_ci		}
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci		err = skcipher_walk_done(&walk, nbytes);
36362306a36Sopenharmony_ci	}
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_ci	return err;
36662306a36Sopenharmony_ci}
36762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(sm4_avx_ctr_crypt);
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_cistatic int ctr_crypt(struct skcipher_request *req)
37062306a36Sopenharmony_ci{
37162306a36Sopenharmony_ci	return sm4_avx_ctr_crypt(req, SM4_CRYPT8_BLOCK_SIZE,
37262306a36Sopenharmony_ci				sm4_aesni_avx_ctr_enc_blk8);
37362306a36Sopenharmony_ci}
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_cistatic struct skcipher_alg sm4_aesni_avx_skciphers[] = {
37662306a36Sopenharmony_ci	{
37762306a36Sopenharmony_ci		.base = {
37862306a36Sopenharmony_ci			.cra_name		= "__ecb(sm4)",
37962306a36Sopenharmony_ci			.cra_driver_name	= "__ecb-sm4-aesni-avx",
38062306a36Sopenharmony_ci			.cra_priority		= 400,
38162306a36Sopenharmony_ci			.cra_flags		= CRYPTO_ALG_INTERNAL,
38262306a36Sopenharmony_ci			.cra_blocksize		= SM4_BLOCK_SIZE,
38362306a36Sopenharmony_ci			.cra_ctxsize		= sizeof(struct sm4_ctx),
38462306a36Sopenharmony_ci			.cra_module		= THIS_MODULE,
38562306a36Sopenharmony_ci		},
38662306a36Sopenharmony_ci		.min_keysize	= SM4_KEY_SIZE,
38762306a36Sopenharmony_ci		.max_keysize	= SM4_KEY_SIZE,
38862306a36Sopenharmony_ci		.walksize	= 8 * SM4_BLOCK_SIZE,
38962306a36Sopenharmony_ci		.setkey		= sm4_skcipher_setkey,
39062306a36Sopenharmony_ci		.encrypt	= sm4_avx_ecb_encrypt,
39162306a36Sopenharmony_ci		.decrypt	= sm4_avx_ecb_decrypt,
39262306a36Sopenharmony_ci	}, {
39362306a36Sopenharmony_ci		.base = {
39462306a36Sopenharmony_ci			.cra_name		= "__cbc(sm4)",
39562306a36Sopenharmony_ci			.cra_driver_name	= "__cbc-sm4-aesni-avx",
39662306a36Sopenharmony_ci			.cra_priority		= 400,
39762306a36Sopenharmony_ci			.cra_flags		= CRYPTO_ALG_INTERNAL,
39862306a36Sopenharmony_ci			.cra_blocksize		= SM4_BLOCK_SIZE,
39962306a36Sopenharmony_ci			.cra_ctxsize		= sizeof(struct sm4_ctx),
40062306a36Sopenharmony_ci			.cra_module		= THIS_MODULE,
40162306a36Sopenharmony_ci		},
40262306a36Sopenharmony_ci		.min_keysize	= SM4_KEY_SIZE,
40362306a36Sopenharmony_ci		.max_keysize	= SM4_KEY_SIZE,
40462306a36Sopenharmony_ci		.ivsize		= SM4_BLOCK_SIZE,
40562306a36Sopenharmony_ci		.walksize	= 8 * SM4_BLOCK_SIZE,
40662306a36Sopenharmony_ci		.setkey		= sm4_skcipher_setkey,
40762306a36Sopenharmony_ci		.encrypt	= sm4_cbc_encrypt,
40862306a36Sopenharmony_ci		.decrypt	= cbc_decrypt,
40962306a36Sopenharmony_ci	}, {
41062306a36Sopenharmony_ci		.base = {
41162306a36Sopenharmony_ci			.cra_name		= "__cfb(sm4)",
41262306a36Sopenharmony_ci			.cra_driver_name	= "__cfb-sm4-aesni-avx",
41362306a36Sopenharmony_ci			.cra_priority		= 400,
41462306a36Sopenharmony_ci			.cra_flags		= CRYPTO_ALG_INTERNAL,
41562306a36Sopenharmony_ci			.cra_blocksize		= 1,
41662306a36Sopenharmony_ci			.cra_ctxsize		= sizeof(struct sm4_ctx),
41762306a36Sopenharmony_ci			.cra_module		= THIS_MODULE,
41862306a36Sopenharmony_ci		},
41962306a36Sopenharmony_ci		.min_keysize	= SM4_KEY_SIZE,
42062306a36Sopenharmony_ci		.max_keysize	= SM4_KEY_SIZE,
42162306a36Sopenharmony_ci		.ivsize		= SM4_BLOCK_SIZE,
42262306a36Sopenharmony_ci		.chunksize	= SM4_BLOCK_SIZE,
42362306a36Sopenharmony_ci		.walksize	= 8 * SM4_BLOCK_SIZE,
42462306a36Sopenharmony_ci		.setkey		= sm4_skcipher_setkey,
42562306a36Sopenharmony_ci		.encrypt	= sm4_cfb_encrypt,
42662306a36Sopenharmony_ci		.decrypt	= cfb_decrypt,
42762306a36Sopenharmony_ci	}, {
42862306a36Sopenharmony_ci		.base = {
42962306a36Sopenharmony_ci			.cra_name		= "__ctr(sm4)",
43062306a36Sopenharmony_ci			.cra_driver_name	= "__ctr-sm4-aesni-avx",
43162306a36Sopenharmony_ci			.cra_priority		= 400,
43262306a36Sopenharmony_ci			.cra_flags		= CRYPTO_ALG_INTERNAL,
43362306a36Sopenharmony_ci			.cra_blocksize		= 1,
43462306a36Sopenharmony_ci			.cra_ctxsize		= sizeof(struct sm4_ctx),
43562306a36Sopenharmony_ci			.cra_module		= THIS_MODULE,
43662306a36Sopenharmony_ci		},
43762306a36Sopenharmony_ci		.min_keysize	= SM4_KEY_SIZE,
43862306a36Sopenharmony_ci		.max_keysize	= SM4_KEY_SIZE,
43962306a36Sopenharmony_ci		.ivsize		= SM4_BLOCK_SIZE,
44062306a36Sopenharmony_ci		.chunksize	= SM4_BLOCK_SIZE,
44162306a36Sopenharmony_ci		.walksize	= 8 * SM4_BLOCK_SIZE,
44262306a36Sopenharmony_ci		.setkey		= sm4_skcipher_setkey,
44362306a36Sopenharmony_ci		.encrypt	= ctr_crypt,
44462306a36Sopenharmony_ci		.decrypt	= ctr_crypt,
44562306a36Sopenharmony_ci	}
44662306a36Sopenharmony_ci};
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_cistatic struct simd_skcipher_alg *
44962306a36Sopenharmony_cisimd_sm4_aesni_avx_skciphers[ARRAY_SIZE(sm4_aesni_avx_skciphers)];
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_cistatic int __init sm4_init(void)
45262306a36Sopenharmony_ci{
45362306a36Sopenharmony_ci	const char *feature_name;
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	if (!boot_cpu_has(X86_FEATURE_AVX) ||
45662306a36Sopenharmony_ci	    !boot_cpu_has(X86_FEATURE_AES) ||
45762306a36Sopenharmony_ci	    !boot_cpu_has(X86_FEATURE_OSXSAVE)) {
45862306a36Sopenharmony_ci		pr_info("AVX or AES-NI instructions are not detected.\n");
45962306a36Sopenharmony_ci		return -ENODEV;
46062306a36Sopenharmony_ci	}
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
46362306a36Sopenharmony_ci				&feature_name)) {
46462306a36Sopenharmony_ci		pr_info("CPU feature '%s' is not supported.\n", feature_name);
46562306a36Sopenharmony_ci		return -ENODEV;
46662306a36Sopenharmony_ci	}
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	return simd_register_skciphers_compat(sm4_aesni_avx_skciphers,
46962306a36Sopenharmony_ci					ARRAY_SIZE(sm4_aesni_avx_skciphers),
47062306a36Sopenharmony_ci					simd_sm4_aesni_avx_skciphers);
47162306a36Sopenharmony_ci}
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_cistatic void __exit sm4_exit(void)
47462306a36Sopenharmony_ci{
47562306a36Sopenharmony_ci	simd_unregister_skciphers(sm4_aesni_avx_skciphers,
47662306a36Sopenharmony_ci					ARRAY_SIZE(sm4_aesni_avx_skciphers),
47762306a36Sopenharmony_ci					simd_sm4_aesni_avx_skciphers);
47862306a36Sopenharmony_ci}
47962306a36Sopenharmony_ci
48062306a36Sopenharmony_cimodule_init(sm4_init);
48162306a36Sopenharmony_cimodule_exit(sm4_exit);
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
48462306a36Sopenharmony_ciMODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
48562306a36Sopenharmony_ciMODULE_DESCRIPTION("SM4 Cipher Algorithm, AES-NI/AVX optimized");
48662306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("sm4");
48762306a36Sopenharmony_ciMODULE_ALIAS_CRYPTO("sm4-aesni-avx");
488