xref: /kernel/linux/linux-6.6/lib/zstd/compress/hist.h (revision 62306a36)
162306a36Sopenharmony_ci/* ******************************************************************
262306a36Sopenharmony_ci * hist : Histogram functions
362306a36Sopenharmony_ci * part of Finite State Entropy project
462306a36Sopenharmony_ci * Copyright (c) Yann Collet, Facebook, Inc.
562306a36Sopenharmony_ci *
662306a36Sopenharmony_ci *  You can contact the author at :
762306a36Sopenharmony_ci *  - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
862306a36Sopenharmony_ci *  - Public forum : https://groups.google.com/forum/#!forum/lz4c
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci * This source code is licensed under both the BSD-style license (found in the
1162306a36Sopenharmony_ci * LICENSE file in the root directory of this source tree) and the GPLv2 (found
1262306a36Sopenharmony_ci * in the COPYING file in the root directory of this source tree).
1362306a36Sopenharmony_ci * You may select, at your option, one of the above-listed licenses.
1462306a36Sopenharmony_ci****************************************************************** */
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci/* --- dependencies --- */
1762306a36Sopenharmony_ci#include "../common/zstd_deps.h"   /* size_t */
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci/* --- simple histogram functions --- */
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_ci/*! HIST_count():
2362306a36Sopenharmony_ci *  Provides the precise count of each byte within a table 'count'.
2462306a36Sopenharmony_ci * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
2562306a36Sopenharmony_ci *  Updates *maxSymbolValuePtr with actual largest symbol value detected.
2662306a36Sopenharmony_ci * @return : count of the most frequent symbol (which isn't identified).
2762306a36Sopenharmony_ci *           or an error code, which can be tested using HIST_isError().
2862306a36Sopenharmony_ci *           note : if return == srcSize, there is only one symbol.
2962306a36Sopenharmony_ci */
3062306a36Sopenharmony_cisize_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
3162306a36Sopenharmony_ci                  const void* src, size_t srcSize);
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ciunsigned HIST_isError(size_t code);  /*< tells if a return value is an error code */
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci/* --- advanced histogram functions --- */
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci#define HIST_WKSP_SIZE_U32 1024
3962306a36Sopenharmony_ci#define HIST_WKSP_SIZE    (HIST_WKSP_SIZE_U32 * sizeof(unsigned))
4062306a36Sopenharmony_ci/* HIST_count_wksp() :
4162306a36Sopenharmony_ci *  Same as HIST_count(), but using an externally provided scratch buffer.
4262306a36Sopenharmony_ci *  Benefit is this function will use very little stack space.
4362306a36Sopenharmony_ci * `workSpace` is a writable buffer which must be 4-bytes aligned,
4462306a36Sopenharmony_ci * `workSpaceSize` must be >= HIST_WKSP_SIZE
4562306a36Sopenharmony_ci */
4662306a36Sopenharmony_cisize_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
4762306a36Sopenharmony_ci                       const void* src, size_t srcSize,
4862306a36Sopenharmony_ci                       void* workSpace, size_t workSpaceSize);
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_ci/* HIST_countFast() :
5162306a36Sopenharmony_ci *  same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
5262306a36Sopenharmony_ci *  This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
5362306a36Sopenharmony_ci */
5462306a36Sopenharmony_cisize_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
5562306a36Sopenharmony_ci                      const void* src, size_t srcSize);
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci/* HIST_countFast_wksp() :
5862306a36Sopenharmony_ci *  Same as HIST_countFast(), but using an externally provided scratch buffer.
5962306a36Sopenharmony_ci * `workSpace` is a writable buffer which must be 4-bytes aligned,
6062306a36Sopenharmony_ci * `workSpaceSize` must be >= HIST_WKSP_SIZE
6162306a36Sopenharmony_ci */
6262306a36Sopenharmony_cisize_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
6362306a36Sopenharmony_ci                           const void* src, size_t srcSize,
6462306a36Sopenharmony_ci                           void* workSpace, size_t workSpaceSize);
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci/*! HIST_count_simple() :
6762306a36Sopenharmony_ci *  Same as HIST_countFast(), this function is unsafe,
6862306a36Sopenharmony_ci *  and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
6962306a36Sopenharmony_ci *  It is also a bit slower for large inputs.
7062306a36Sopenharmony_ci *  However, it does not need any additional memory (not even on stack).
7162306a36Sopenharmony_ci * @return : count of the most frequent symbol.
7262306a36Sopenharmony_ci *  Note this function doesn't produce any error (i.e. it must succeed).
7362306a36Sopenharmony_ci */
7462306a36Sopenharmony_ciunsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
7562306a36Sopenharmony_ci                           const void* src, size_t srcSize);
76