18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Glue code for SHA-1 implementation for SPE instructions (PPC)
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Based on generic implementation.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci
108c2ecf20Sopenharmony_ci#include <crypto/internal/hash.h>
118c2ecf20Sopenharmony_ci#include <linux/init.h>
128c2ecf20Sopenharmony_ci#include <linux/module.h>
138c2ecf20Sopenharmony_ci#include <linux/mm.h>
148c2ecf20Sopenharmony_ci#include <linux/types.h>
158c2ecf20Sopenharmony_ci#include <crypto/sha.h>
168c2ecf20Sopenharmony_ci#include <asm/byteorder.h>
178c2ecf20Sopenharmony_ci#include <asm/switch_to.h>
188c2ecf20Sopenharmony_ci#include <linux/hardirq.h>
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci/*
218c2ecf20Sopenharmony_ci * MAX_BYTES defines the number of bytes that are allowed to be processed
228c2ecf20Sopenharmony_ci * between preempt_disable() and preempt_enable(). SHA1 takes ~1000
238c2ecf20Sopenharmony_ci * operations per 64 bytes. e500 cores can issue two arithmetic instructions
248c2ecf20Sopenharmony_ci * per clock cycle using one 32/64 bit unit (SU1) and one 32 bit unit (SU2).
258c2ecf20Sopenharmony_ci * Thus 2KB of input data will need an estimated maximum of 18,000 cycles.
268c2ecf20Sopenharmony_ci * Headroom for cache misses included. Even with the low end model clocked
278c2ecf20Sopenharmony_ci * at 667 MHz this equals to a critical time window of less than 27us.
288c2ecf20Sopenharmony_ci *
298c2ecf20Sopenharmony_ci */
308c2ecf20Sopenharmony_ci#define MAX_BYTES 2048
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ciextern void ppc_spe_sha1_transform(u32 *state, const u8 *src, u32 blocks);
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_cistatic void spe_begin(void)
358c2ecf20Sopenharmony_ci{
368c2ecf20Sopenharmony_ci	/* We just start SPE operations and will save SPE registers later. */
378c2ecf20Sopenharmony_ci	preempt_disable();
388c2ecf20Sopenharmony_ci	enable_kernel_spe();
398c2ecf20Sopenharmony_ci}
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cistatic void spe_end(void)
428c2ecf20Sopenharmony_ci{
438c2ecf20Sopenharmony_ci	disable_kernel_spe();
448c2ecf20Sopenharmony_ci	/* reenable preemption */
458c2ecf20Sopenharmony_ci	preempt_enable();
468c2ecf20Sopenharmony_ci}
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cistatic inline void ppc_sha1_clear_context(struct sha1_state *sctx)
498c2ecf20Sopenharmony_ci{
508c2ecf20Sopenharmony_ci	int count = sizeof(struct sha1_state) >> 2;
518c2ecf20Sopenharmony_ci	u32 *ptr = (u32 *)sctx;
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	/* make sure we can clear the fast way */
548c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct sha1_state) % 4);
558c2ecf20Sopenharmony_ci	do { *ptr++ = 0; } while (--count);
568c2ecf20Sopenharmony_ci}
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_cistatic int ppc_spe_sha1_init(struct shash_desc *desc)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	struct sha1_state *sctx = shash_desc_ctx(desc);
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	sctx->state[0] = SHA1_H0;
638c2ecf20Sopenharmony_ci	sctx->state[1] = SHA1_H1;
648c2ecf20Sopenharmony_ci	sctx->state[2] = SHA1_H2;
658c2ecf20Sopenharmony_ci	sctx->state[3] = SHA1_H3;
668c2ecf20Sopenharmony_ci	sctx->state[4] = SHA1_H4;
678c2ecf20Sopenharmony_ci	sctx->count = 0;
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_ci	return 0;
708c2ecf20Sopenharmony_ci}
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_cistatic int ppc_spe_sha1_update(struct shash_desc *desc, const u8 *data,
738c2ecf20Sopenharmony_ci			unsigned int len)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	struct sha1_state *sctx = shash_desc_ctx(desc);
768c2ecf20Sopenharmony_ci	const unsigned int offset = sctx->count & 0x3f;
778c2ecf20Sopenharmony_ci	const unsigned int avail = 64 - offset;
788c2ecf20Sopenharmony_ci	unsigned int bytes;
798c2ecf20Sopenharmony_ci	const u8 *src = data;
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_ci	if (avail > len) {
828c2ecf20Sopenharmony_ci		sctx->count += len;
838c2ecf20Sopenharmony_ci		memcpy((char *)sctx->buffer + offset, src, len);
848c2ecf20Sopenharmony_ci		return 0;
858c2ecf20Sopenharmony_ci	}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	sctx->count += len;
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	if (offset) {
908c2ecf20Sopenharmony_ci		memcpy((char *)sctx->buffer + offset, src, avail);
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci		spe_begin();
938c2ecf20Sopenharmony_ci		ppc_spe_sha1_transform(sctx->state, (const u8 *)sctx->buffer, 1);
948c2ecf20Sopenharmony_ci		spe_end();
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci		len -= avail;
978c2ecf20Sopenharmony_ci		src += avail;
988c2ecf20Sopenharmony_ci	}
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	while (len > 63) {
1018c2ecf20Sopenharmony_ci		bytes = (len > MAX_BYTES) ? MAX_BYTES : len;
1028c2ecf20Sopenharmony_ci		bytes = bytes & ~0x3f;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci		spe_begin();
1058c2ecf20Sopenharmony_ci		ppc_spe_sha1_transform(sctx->state, src, bytes >> 6);
1068c2ecf20Sopenharmony_ci		spe_end();
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci		src += bytes;
1098c2ecf20Sopenharmony_ci		len -= bytes;
1108c2ecf20Sopenharmony_ci	};
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	memcpy((char *)sctx->buffer, src, len);
1138c2ecf20Sopenharmony_ci	return 0;
1148c2ecf20Sopenharmony_ci}
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_cistatic int ppc_spe_sha1_final(struct shash_desc *desc, u8 *out)
1178c2ecf20Sopenharmony_ci{
1188c2ecf20Sopenharmony_ci	struct sha1_state *sctx = shash_desc_ctx(desc);
1198c2ecf20Sopenharmony_ci	const unsigned int offset = sctx->count & 0x3f;
1208c2ecf20Sopenharmony_ci	char *p = (char *)sctx->buffer + offset;
1218c2ecf20Sopenharmony_ci	int padlen;
1228c2ecf20Sopenharmony_ci	__be64 *pbits = (__be64 *)(((char *)&sctx->buffer) + 56);
1238c2ecf20Sopenharmony_ci	__be32 *dst = (__be32 *)out;
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	padlen = 55 - offset;
1268c2ecf20Sopenharmony_ci	*p++ = 0x80;
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ci	spe_begin();
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	if (padlen < 0) {
1318c2ecf20Sopenharmony_ci		memset(p, 0x00, padlen + sizeof (u64));
1328c2ecf20Sopenharmony_ci		ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
1338c2ecf20Sopenharmony_ci		p = (char *)sctx->buffer;
1348c2ecf20Sopenharmony_ci		padlen = 56;
1358c2ecf20Sopenharmony_ci	}
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	memset(p, 0, padlen);
1388c2ecf20Sopenharmony_ci	*pbits = cpu_to_be64(sctx->count << 3);
1398c2ecf20Sopenharmony_ci	ppc_spe_sha1_transform(sctx->state, sctx->buffer, 1);
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	spe_end();
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	dst[0] = cpu_to_be32(sctx->state[0]);
1448c2ecf20Sopenharmony_ci	dst[1] = cpu_to_be32(sctx->state[1]);
1458c2ecf20Sopenharmony_ci	dst[2] = cpu_to_be32(sctx->state[2]);
1468c2ecf20Sopenharmony_ci	dst[3] = cpu_to_be32(sctx->state[3]);
1478c2ecf20Sopenharmony_ci	dst[4] = cpu_to_be32(sctx->state[4]);
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	ppc_sha1_clear_context(sctx);
1508c2ecf20Sopenharmony_ci	return 0;
1518c2ecf20Sopenharmony_ci}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_cistatic int ppc_spe_sha1_export(struct shash_desc *desc, void *out)
1548c2ecf20Sopenharmony_ci{
1558c2ecf20Sopenharmony_ci	struct sha1_state *sctx = shash_desc_ctx(desc);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	memcpy(out, sctx, sizeof(*sctx));
1588c2ecf20Sopenharmony_ci	return 0;
1598c2ecf20Sopenharmony_ci}
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_cistatic int ppc_spe_sha1_import(struct shash_desc *desc, const void *in)
1628c2ecf20Sopenharmony_ci{
1638c2ecf20Sopenharmony_ci	struct sha1_state *sctx = shash_desc_ctx(desc);
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	memcpy(sctx, in, sizeof(*sctx));
1668c2ecf20Sopenharmony_ci	return 0;
1678c2ecf20Sopenharmony_ci}
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_cistatic struct shash_alg alg = {
1708c2ecf20Sopenharmony_ci	.digestsize	=	SHA1_DIGEST_SIZE,
1718c2ecf20Sopenharmony_ci	.init		=	ppc_spe_sha1_init,
1728c2ecf20Sopenharmony_ci	.update		=	ppc_spe_sha1_update,
1738c2ecf20Sopenharmony_ci	.final		=	ppc_spe_sha1_final,
1748c2ecf20Sopenharmony_ci	.export		=	ppc_spe_sha1_export,
1758c2ecf20Sopenharmony_ci	.import		=	ppc_spe_sha1_import,
1768c2ecf20Sopenharmony_ci	.descsize	=	sizeof(struct sha1_state),
1778c2ecf20Sopenharmony_ci	.statesize	=	sizeof(struct sha1_state),
1788c2ecf20Sopenharmony_ci	.base		=	{
1798c2ecf20Sopenharmony_ci		.cra_name	=	"sha1",
1808c2ecf20Sopenharmony_ci		.cra_driver_name=	"sha1-ppc-spe",
1818c2ecf20Sopenharmony_ci		.cra_priority	=	300,
1828c2ecf20Sopenharmony_ci		.cra_blocksize	=	SHA1_BLOCK_SIZE,
1838c2ecf20Sopenharmony_ci		.cra_module	=	THIS_MODULE,
1848c2ecf20Sopenharmony_ci	}
1858c2ecf20Sopenharmony_ci};
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_cistatic int __init ppc_spe_sha1_mod_init(void)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	return crypto_register_shash(&alg);
1908c2ecf20Sopenharmony_ci}
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_cistatic void __exit ppc_spe_sha1_mod_fini(void)
1938c2ecf20Sopenharmony_ci{
1948c2ecf20Sopenharmony_ci	crypto_unregister_shash(&alg);
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_cimodule_init(ppc_spe_sha1_mod_init);
1988c2ecf20Sopenharmony_cimodule_exit(ppc_spe_sha1_mod_fini);
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
2018c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, SPE optimized");
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("sha1");
2048c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("sha1-ppc-spe");
205