18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR MIT 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. 48c2ecf20Sopenharmony_ci */ 58c2ecf20Sopenharmony_ci 68c2ecf20Sopenharmony_ci#include <crypto/internal/blake2s.h> 78c2ecf20Sopenharmony_ci#include <crypto/internal/simd.h> 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include <linux/types.h> 108c2ecf20Sopenharmony_ci#include <linux/jump_label.h> 118c2ecf20Sopenharmony_ci#include <linux/kernel.h> 128c2ecf20Sopenharmony_ci#include <linux/module.h> 138c2ecf20Sopenharmony_ci#include <linux/sizes.h> 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci#include <asm/cpufeature.h> 168c2ecf20Sopenharmony_ci#include <asm/fpu/api.h> 178c2ecf20Sopenharmony_ci#include <asm/processor.h> 188c2ecf20Sopenharmony_ci#include <asm/simd.h> 198c2ecf20Sopenharmony_ci 208c2ecf20Sopenharmony_ciasmlinkage void blake2s_compress_ssse3(struct blake2s_state *state, 218c2ecf20Sopenharmony_ci const u8 *block, const size_t nblocks, 228c2ecf20Sopenharmony_ci const u32 inc); 238c2ecf20Sopenharmony_ciasmlinkage void blake2s_compress_avx512(struct blake2s_state *state, 248c2ecf20Sopenharmony_ci const u8 *block, const size_t nblocks, 258c2ecf20Sopenharmony_ci const u32 inc); 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3); 288c2ecf20Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512); 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_civoid blake2s_compress(struct blake2s_state *state, const u8 *block, 318c2ecf20Sopenharmony_ci size_t nblocks, const u32 inc) 328c2ecf20Sopenharmony_ci{ 338c2ecf20Sopenharmony_ci /* SIMD disables preemption, so relax after processing each page. */ 348c2ecf20Sopenharmony_ci BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { 378c2ecf20Sopenharmony_ci blake2s_compress_generic(state, block, nblocks, inc); 388c2ecf20Sopenharmony_ci return; 398c2ecf20Sopenharmony_ci } 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci do { 428c2ecf20Sopenharmony_ci const size_t blocks = min_t(size_t, nblocks, 438c2ecf20Sopenharmony_ci SZ_4K / BLAKE2S_BLOCK_SIZE); 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci kernel_fpu_begin(); 468c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_AS_AVX512) && 478c2ecf20Sopenharmony_ci static_branch_likely(&blake2s_use_avx512)) 488c2ecf20Sopenharmony_ci blake2s_compress_avx512(state, block, blocks, inc); 498c2ecf20Sopenharmony_ci else 508c2ecf20Sopenharmony_ci blake2s_compress_ssse3(state, block, blocks, inc); 518c2ecf20Sopenharmony_ci kernel_fpu_end(); 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci nblocks -= blocks; 548c2ecf20Sopenharmony_ci block += blocks * BLAKE2S_BLOCK_SIZE; 558c2ecf20Sopenharmony_ci } while (nblocks); 568c2ecf20Sopenharmony_ci} 578c2ecf20Sopenharmony_ciEXPORT_SYMBOL(blake2s_compress); 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_cistatic int __init blake2s_mod_init(void) 608c2ecf20Sopenharmony_ci{ 618c2ecf20Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_SSSE3)) 628c2ecf20Sopenharmony_ci static_branch_enable(&blake2s_use_ssse3); 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_AS_AVX512) && 658c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX) && 668c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX2) && 678c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512F) && 688c2ecf20Sopenharmony_ci boot_cpu_has(X86_FEATURE_AVX512VL) && 698c2ecf20Sopenharmony_ci cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | 708c2ecf20Sopenharmony_ci XFEATURE_MASK_AVX512, NULL)) 718c2ecf20Sopenharmony_ci static_branch_enable(&blake2s_use_avx512); 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci return 0; 748c2ecf20Sopenharmony_ci} 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_cimodule_init(blake2s_mod_init); 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2"); 79