18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <asm/hwcap.h> 98c2ecf20Sopenharmony_ci#include <asm/neon.h> 108c2ecf20Sopenharmony_ci#include <asm/simd.h> 118c2ecf20Sopenharmony_ci#include <asm/unaligned.h> 128c2ecf20Sopenharmony_ci#include <crypto/algapi.h> 138c2ecf20Sopenharmony_ci#include <crypto/internal/hash.h> 148c2ecf20Sopenharmony_ci#include <crypto/internal/poly1305.h> 158c2ecf20Sopenharmony_ci#include <crypto/internal/simd.h> 168c2ecf20Sopenharmony_ci#include <linux/cpufeature.h> 178c2ecf20Sopenharmony_ci#include <linux/crypto.h> 188c2ecf20Sopenharmony_ci#include <linux/jump_label.h> 198c2ecf20Sopenharmony_ci#include <linux/module.h> 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ciasmlinkage void poly1305_init_arm64(void *state, const u8 *key); 228c2ecf20Sopenharmony_ciasmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit); 238c2ecf20Sopenharmony_ciasmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); 248c2ecf20Sopenharmony_ciasmlinkage void poly1305_emit(void *state, u8 *digest, const u32 *nonce); 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_civoid poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) 298c2ecf20Sopenharmony_ci{ 308c2ecf20Sopenharmony_ci poly1305_init_arm64(&dctx->h, key); 318c2ecf20Sopenharmony_ci dctx->s[0] = get_unaligned_le32(key + 16); 328c2ecf20Sopenharmony_ci dctx->s[1] = get_unaligned_le32(key + 20); 338c2ecf20Sopenharmony_ci dctx->s[2] = get_unaligned_le32(key + 24); 348c2ecf20Sopenharmony_ci dctx->s[3] = get_unaligned_le32(key + 28); 358c2ecf20Sopenharmony_ci dctx->buflen = 0; 368c2ecf20Sopenharmony_ci} 378c2ecf20Sopenharmony_ciEXPORT_SYMBOL(poly1305_init_arch); 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_cistatic int neon_poly1305_init(struct shash_desc *desc) 408c2ecf20Sopenharmony_ci{ 418c2ecf20Sopenharmony_ci struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci dctx->buflen = 0; 448c2ecf20Sopenharmony_ci dctx->rset = 0; 458c2ecf20Sopenharmony_ci dctx->sset = false; 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci return 0; 488c2ecf20Sopenharmony_ci} 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_cistatic void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, 518c2ecf20Sopenharmony_ci u32 len, u32 hibit, bool do_neon) 528c2ecf20Sopenharmony_ci{ 538c2ecf20Sopenharmony_ci if (unlikely(!dctx->sset)) { 548c2ecf20Sopenharmony_ci if (!dctx->rset) { 558c2ecf20Sopenharmony_ci poly1305_init_arm64(&dctx->h, src); 568c2ecf20Sopenharmony_ci src += POLY1305_BLOCK_SIZE; 578c2ecf20Sopenharmony_ci len -= POLY1305_BLOCK_SIZE; 588c2ecf20Sopenharmony_ci dctx->rset = 1; 598c2ecf20Sopenharmony_ci } 608c2ecf20Sopenharmony_ci if (len >= POLY1305_BLOCK_SIZE) { 618c2ecf20Sopenharmony_ci dctx->s[0] = get_unaligned_le32(src + 0); 628c2ecf20Sopenharmony_ci dctx->s[1] = get_unaligned_le32(src + 4); 638c2ecf20Sopenharmony_ci dctx->s[2] = get_unaligned_le32(src + 8); 648c2ecf20Sopenharmony_ci dctx->s[3] = get_unaligned_le32(src + 12); 658c2ecf20Sopenharmony_ci src += POLY1305_BLOCK_SIZE; 668c2ecf20Sopenharmony_ci len -= POLY1305_BLOCK_SIZE; 678c2ecf20Sopenharmony_ci dctx->sset = true; 688c2ecf20Sopenharmony_ci } 698c2ecf20Sopenharmony_ci if (len < POLY1305_BLOCK_SIZE) 708c2ecf20Sopenharmony_ci return; 718c2ecf20Sopenharmony_ci } 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci len &= ~(POLY1305_BLOCK_SIZE - 1); 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci if (static_branch_likely(&have_neon) && likely(do_neon)) 768c2ecf20Sopenharmony_ci poly1305_blocks_neon(&dctx->h, src, len, hibit); 778c2ecf20Sopenharmony_ci else 788c2ecf20Sopenharmony_ci poly1305_blocks(&dctx->h, src, len, hibit); 798c2ecf20Sopenharmony_ci} 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_cistatic void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx, 828c2ecf20Sopenharmony_ci const u8 *src, u32 len, bool do_neon) 838c2ecf20Sopenharmony_ci{ 848c2ecf20Sopenharmony_ci if (unlikely(dctx->buflen)) { 858c2ecf20Sopenharmony_ci u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci memcpy(dctx->buf + dctx->buflen, src, bytes); 888c2ecf20Sopenharmony_ci src += bytes; 898c2ecf20Sopenharmony_ci len -= bytes; 908c2ecf20Sopenharmony_ci dctx->buflen += bytes; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci if (dctx->buflen == POLY1305_BLOCK_SIZE) { 938c2ecf20Sopenharmony_ci neon_poly1305_blocks(dctx, dctx->buf, 948c2ecf20Sopenharmony_ci POLY1305_BLOCK_SIZE, 1, false); 958c2ecf20Sopenharmony_ci dctx->buflen = 0; 968c2ecf20Sopenharmony_ci } 978c2ecf20Sopenharmony_ci } 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci if (likely(len >= POLY1305_BLOCK_SIZE)) { 1008c2ecf20Sopenharmony_ci neon_poly1305_blocks(dctx, src, len, 1, do_neon); 1018c2ecf20Sopenharmony_ci src += round_down(len, POLY1305_BLOCK_SIZE); 1028c2ecf20Sopenharmony_ci len %= POLY1305_BLOCK_SIZE; 1038c2ecf20Sopenharmony_ci } 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ci if (unlikely(len)) { 1068c2ecf20Sopenharmony_ci dctx->buflen = len; 1078c2ecf20Sopenharmony_ci memcpy(dctx->buf, src, len); 1088c2ecf20Sopenharmony_ci } 1098c2ecf20Sopenharmony_ci} 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_cistatic int neon_poly1305_update(struct shash_desc *desc, 1128c2ecf20Sopenharmony_ci const u8 *src, unsigned int srclen) 1138c2ecf20Sopenharmony_ci{ 1148c2ecf20Sopenharmony_ci bool do_neon = crypto_simd_usable() && srclen > 128; 1158c2ecf20Sopenharmony_ci struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci if (static_branch_likely(&have_neon) && do_neon) 1188c2ecf20Sopenharmony_ci kernel_neon_begin(); 1198c2ecf20Sopenharmony_ci neon_poly1305_do_update(dctx, src, srclen, do_neon); 1208c2ecf20Sopenharmony_ci if (static_branch_likely(&have_neon) && do_neon) 1218c2ecf20Sopenharmony_ci kernel_neon_end(); 1228c2ecf20Sopenharmony_ci return 0; 1238c2ecf20Sopenharmony_ci} 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_civoid poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, 1268c2ecf20Sopenharmony_ci unsigned int nbytes) 1278c2ecf20Sopenharmony_ci{ 1288c2ecf20Sopenharmony_ci if (unlikely(dctx->buflen)) { 1298c2ecf20Sopenharmony_ci u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci memcpy(dctx->buf + dctx->buflen, src, bytes); 1328c2ecf20Sopenharmony_ci src += bytes; 1338c2ecf20Sopenharmony_ci nbytes -= bytes; 1348c2ecf20Sopenharmony_ci dctx->buflen += bytes; 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci if (dctx->buflen == POLY1305_BLOCK_SIZE) { 1378c2ecf20Sopenharmony_ci poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1); 1388c2ecf20Sopenharmony_ci dctx->buflen = 0; 1398c2ecf20Sopenharmony_ci } 1408c2ecf20Sopenharmony_ci } 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { 1438c2ecf20Sopenharmony_ci unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci if (static_branch_likely(&have_neon) && crypto_simd_usable()) { 1468c2ecf20Sopenharmony_ci do { 1478c2ecf20Sopenharmony_ci unsigned int todo = min_t(unsigned int, len, SZ_4K); 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci kernel_neon_begin(); 1508c2ecf20Sopenharmony_ci poly1305_blocks_neon(&dctx->h, src, todo, 1); 1518c2ecf20Sopenharmony_ci kernel_neon_end(); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci len -= todo; 1548c2ecf20Sopenharmony_ci src += todo; 1558c2ecf20Sopenharmony_ci } while (len); 1568c2ecf20Sopenharmony_ci } else { 1578c2ecf20Sopenharmony_ci poly1305_blocks(&dctx->h, src, len, 1); 1588c2ecf20Sopenharmony_ci src += len; 1598c2ecf20Sopenharmony_ci } 1608c2ecf20Sopenharmony_ci nbytes %= POLY1305_BLOCK_SIZE; 1618c2ecf20Sopenharmony_ci } 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci if (unlikely(nbytes)) { 1648c2ecf20Sopenharmony_ci dctx->buflen = nbytes; 1658c2ecf20Sopenharmony_ci memcpy(dctx->buf, src, nbytes); 1668c2ecf20Sopenharmony_ci } 1678c2ecf20Sopenharmony_ci} 1688c2ecf20Sopenharmony_ciEXPORT_SYMBOL(poly1305_update_arch); 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_civoid poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) 1718c2ecf20Sopenharmony_ci{ 1728c2ecf20Sopenharmony_ci if (unlikely(dctx->buflen)) { 1738c2ecf20Sopenharmony_ci dctx->buf[dctx->buflen++] = 1; 1748c2ecf20Sopenharmony_ci memset(dctx->buf + dctx->buflen, 0, 1758c2ecf20Sopenharmony_ci POLY1305_BLOCK_SIZE - dctx->buflen); 1768c2ecf20Sopenharmony_ci poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); 1778c2ecf20Sopenharmony_ci } 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci poly1305_emit(&dctx->h, dst, dctx->s); 1808c2ecf20Sopenharmony_ci *dctx = (struct poly1305_desc_ctx){}; 1818c2ecf20Sopenharmony_ci} 1828c2ecf20Sopenharmony_ciEXPORT_SYMBOL(poly1305_final_arch); 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_cistatic int neon_poly1305_final(struct shash_desc *desc, u8 *dst) 1858c2ecf20Sopenharmony_ci{ 1868c2ecf20Sopenharmony_ci struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci if (unlikely(!dctx->sset)) 1898c2ecf20Sopenharmony_ci return -ENOKEY; 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci poly1305_final_arch(dctx, dst); 1928c2ecf20Sopenharmony_ci return 0; 1938c2ecf20Sopenharmony_ci} 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_cistatic struct shash_alg neon_poly1305_alg = { 1968c2ecf20Sopenharmony_ci .init = neon_poly1305_init, 1978c2ecf20Sopenharmony_ci .update = neon_poly1305_update, 1988c2ecf20Sopenharmony_ci .final = neon_poly1305_final, 1998c2ecf20Sopenharmony_ci .digestsize = POLY1305_DIGEST_SIZE, 2008c2ecf20Sopenharmony_ci .descsize = sizeof(struct poly1305_desc_ctx), 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci .base.cra_name = "poly1305", 2038c2ecf20Sopenharmony_ci .base.cra_driver_name = "poly1305-neon", 2048c2ecf20Sopenharmony_ci .base.cra_priority = 200, 2058c2ecf20Sopenharmony_ci .base.cra_blocksize = POLY1305_BLOCK_SIZE, 2068c2ecf20Sopenharmony_ci .base.cra_module = THIS_MODULE, 2078c2ecf20Sopenharmony_ci}; 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_cistatic int __init neon_poly1305_mod_init(void) 2108c2ecf20Sopenharmony_ci{ 2118c2ecf20Sopenharmony_ci if (!cpu_have_named_feature(ASIMD)) 2128c2ecf20Sopenharmony_ci return 0; 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci static_branch_enable(&have_neon); 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? 2178c2ecf20Sopenharmony_ci crypto_register_shash(&neon_poly1305_alg) : 0; 2188c2ecf20Sopenharmony_ci} 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_cistatic void __exit neon_poly1305_mod_exit(void) 2218c2ecf20Sopenharmony_ci{ 2228c2ecf20Sopenharmony_ci if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && cpu_have_named_feature(ASIMD)) 2238c2ecf20Sopenharmony_ci crypto_unregister_shash(&neon_poly1305_alg); 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cimodule_init(neon_poly1305_mod_init); 2278c2ecf20Sopenharmony_cimodule_exit(neon_poly1305_mod_exit); 2288c2ecf20Sopenharmony_ci 2298c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2"); 2308c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("poly1305"); 2318c2ecf20Sopenharmony_ciMODULE_ALIAS_CRYPTO("poly1305-neon"); 232