18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 OR MIT
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <crypto/internal/blake2s.h>
78c2ecf20Sopenharmony_ci#include <crypto/internal/simd.h>
88c2ecf20Sopenharmony_ci
98c2ecf20Sopenharmony_ci#include <linux/types.h>
108c2ecf20Sopenharmony_ci#include <linux/jump_label.h>
118c2ecf20Sopenharmony_ci#include <linux/kernel.h>
128c2ecf20Sopenharmony_ci#include <linux/module.h>
138c2ecf20Sopenharmony_ci#include <linux/sizes.h>
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#include <asm/cpufeature.h>
168c2ecf20Sopenharmony_ci#include <asm/fpu/api.h>
178c2ecf20Sopenharmony_ci#include <asm/processor.h>
188c2ecf20Sopenharmony_ci#include <asm/simd.h>
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ciasmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
218c2ecf20Sopenharmony_ci				       const u8 *block, const size_t nblocks,
228c2ecf20Sopenharmony_ci				       const u32 inc);
238c2ecf20Sopenharmony_ciasmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
248c2ecf20Sopenharmony_ci					const u8 *block, const size_t nblocks,
258c2ecf20Sopenharmony_ci					const u32 inc);
268c2ecf20Sopenharmony_ci
278c2ecf20Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
288c2ecf20Sopenharmony_cistatic __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_civoid blake2s_compress(struct blake2s_state *state, const u8 *block,
318c2ecf20Sopenharmony_ci		      size_t nblocks, const u32 inc)
328c2ecf20Sopenharmony_ci{
338c2ecf20Sopenharmony_ci	/* SIMD disables preemption, so relax after processing each page. */
348c2ecf20Sopenharmony_ci	BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci	if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
378c2ecf20Sopenharmony_ci		blake2s_compress_generic(state, block, nblocks, inc);
388c2ecf20Sopenharmony_ci		return;
398c2ecf20Sopenharmony_ci	}
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci	do {
428c2ecf20Sopenharmony_ci		const size_t blocks = min_t(size_t, nblocks,
438c2ecf20Sopenharmony_ci					    SZ_4K / BLAKE2S_BLOCK_SIZE);
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci		kernel_fpu_begin();
468c2ecf20Sopenharmony_ci		if (IS_ENABLED(CONFIG_AS_AVX512) &&
478c2ecf20Sopenharmony_ci		    static_branch_likely(&blake2s_use_avx512))
488c2ecf20Sopenharmony_ci			blake2s_compress_avx512(state, block, blocks, inc);
498c2ecf20Sopenharmony_ci		else
508c2ecf20Sopenharmony_ci			blake2s_compress_ssse3(state, block, blocks, inc);
518c2ecf20Sopenharmony_ci		kernel_fpu_end();
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci		nblocks -= blocks;
548c2ecf20Sopenharmony_ci		block += blocks * BLAKE2S_BLOCK_SIZE;
558c2ecf20Sopenharmony_ci	} while (nblocks);
568c2ecf20Sopenharmony_ci}
578c2ecf20Sopenharmony_ciEXPORT_SYMBOL(blake2s_compress);
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_cistatic int __init blake2s_mod_init(void)
608c2ecf20Sopenharmony_ci{
618c2ecf20Sopenharmony_ci	if (boot_cpu_has(X86_FEATURE_SSSE3))
628c2ecf20Sopenharmony_ci		static_branch_enable(&blake2s_use_ssse3);
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_ci	if (IS_ENABLED(CONFIG_AS_AVX512) &&
658c2ecf20Sopenharmony_ci	    boot_cpu_has(X86_FEATURE_AVX) &&
668c2ecf20Sopenharmony_ci	    boot_cpu_has(X86_FEATURE_AVX2) &&
678c2ecf20Sopenharmony_ci	    boot_cpu_has(X86_FEATURE_AVX512F) &&
688c2ecf20Sopenharmony_ci	    boot_cpu_has(X86_FEATURE_AVX512VL) &&
698c2ecf20Sopenharmony_ci	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
708c2ecf20Sopenharmony_ci			      XFEATURE_MASK_AVX512, NULL))
718c2ecf20Sopenharmony_ci		static_branch_enable(&blake2s_use_avx512);
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	return 0;
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_cimodule_init(blake2s_mod_init);
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2");
79