18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * FSE : Finite State Entropy encoder
38c2ecf20Sopenharmony_ci * Copyright (C) 2013-2015, Yann Collet.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
88c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions are
98c2ecf20Sopenharmony_ci * met:
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci *   * Redistributions of source code must retain the above copyright
128c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer.
138c2ecf20Sopenharmony_ci *   * Redistributions in binary form must reproduce the above
148c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following disclaimer
158c2ecf20Sopenharmony_ci * in the documentation and/or other materials provided with the
168c2ecf20Sopenharmony_ci * distribution.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
198c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
208c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
218c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
228c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
238c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
248c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
258c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
268c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
278c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
288c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
298c2ecf20Sopenharmony_ci *
308c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify it under
318c2ecf20Sopenharmony_ci * the terms of the GNU General Public License version 2 as published by the
328c2ecf20Sopenharmony_ci * Free Software Foundation. This program is dual-licensed; you may select
338c2ecf20Sopenharmony_ci * either version 2 of the GNU General Public License ("GPL") or BSD license
348c2ecf20Sopenharmony_ci * ("BSD").
358c2ecf20Sopenharmony_ci *
368c2ecf20Sopenharmony_ci * You can contact the author at :
378c2ecf20Sopenharmony_ci * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
388c2ecf20Sopenharmony_ci */
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci/* **************************************************************
418c2ecf20Sopenharmony_ci*  Compiler specifics
428c2ecf20Sopenharmony_ci****************************************************************/
438c2ecf20Sopenharmony_ci#define FORCE_INLINE static __always_inline
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/* **************************************************************
468c2ecf20Sopenharmony_ci*  Includes
478c2ecf20Sopenharmony_ci****************************************************************/
488c2ecf20Sopenharmony_ci#include "bitstream.h"
498c2ecf20Sopenharmony_ci#include "fse.h"
508c2ecf20Sopenharmony_ci#include <linux/compiler.h>
518c2ecf20Sopenharmony_ci#include <linux/kernel.h>
528c2ecf20Sopenharmony_ci#include <linux/math64.h>
538c2ecf20Sopenharmony_ci#include <linux/string.h> /* memcpy, memset */
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci/* **************************************************************
568c2ecf20Sopenharmony_ci*  Error Management
578c2ecf20Sopenharmony_ci****************************************************************/
588c2ecf20Sopenharmony_ci#define FSE_STATIC_ASSERT(c)                                   \
598c2ecf20Sopenharmony_ci	{                                                      \
608c2ecf20Sopenharmony_ci		enum { FSE_static_assert = 1 / (int)(!!(c)) }; \
618c2ecf20Sopenharmony_ci	} /* use only *after* variable declarations */
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci/* **************************************************************
648c2ecf20Sopenharmony_ci*  Templates
658c2ecf20Sopenharmony_ci****************************************************************/
668c2ecf20Sopenharmony_ci/*
678c2ecf20Sopenharmony_ci  designed to be included
688c2ecf20Sopenharmony_ci  for type-specific functions (template emulation in C)
698c2ecf20Sopenharmony_ci  Objective is to write these functions only once, for improved maintenance
708c2ecf20Sopenharmony_ci*/
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci/* safety checks */
738c2ecf20Sopenharmony_ci#ifndef FSE_FUNCTION_EXTENSION
748c2ecf20Sopenharmony_ci#error "FSE_FUNCTION_EXTENSION must be defined"
758c2ecf20Sopenharmony_ci#endif
768c2ecf20Sopenharmony_ci#ifndef FSE_FUNCTION_TYPE
778c2ecf20Sopenharmony_ci#error "FSE_FUNCTION_TYPE must be defined"
788c2ecf20Sopenharmony_ci#endif
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci/* Function names */
818c2ecf20Sopenharmony_ci#define FSE_CAT(X, Y) X##Y
828c2ecf20Sopenharmony_ci#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y)
838c2ecf20Sopenharmony_ci#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y)
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_ci/* Function templates */
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci/* FSE_buildCTable_wksp() :
888c2ecf20Sopenharmony_ci * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
898c2ecf20Sopenharmony_ci * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
908c2ecf20Sopenharmony_ci * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
918c2ecf20Sopenharmony_ci */
928c2ecf20Sopenharmony_cisize_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize)
938c2ecf20Sopenharmony_ci{
948c2ecf20Sopenharmony_ci	U32 const tableSize = 1 << tableLog;
958c2ecf20Sopenharmony_ci	U32 const tableMask = tableSize - 1;
968c2ecf20Sopenharmony_ci	void *const ptr = ct;
978c2ecf20Sopenharmony_ci	U16 *const tableU16 = ((U16 *)ptr) + 2;
988c2ecf20Sopenharmony_ci	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1);
998c2ecf20Sopenharmony_ci	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
1008c2ecf20Sopenharmony_ci	U32 const step = FSE_TABLESTEP(tableSize);
1018c2ecf20Sopenharmony_ci	U32 highThreshold = tableSize - 1;
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	U32 *cumul;
1048c2ecf20Sopenharmony_ci	FSE_FUNCTION_TYPE *tableSymbol;
1058c2ecf20Sopenharmony_ci	size_t spaceUsed32 = 0;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	cumul = (U32 *)workspace + spaceUsed32;
1088c2ecf20Sopenharmony_ci	spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2;
1098c2ecf20Sopenharmony_ci	tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32);
1108c2ecf20Sopenharmony_ci	spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	if ((spaceUsed32 << 2) > workspaceSize)
1138c2ecf20Sopenharmony_ci		return ERROR(tableLog_tooLarge);
1148c2ecf20Sopenharmony_ci	workspace = (U32 *)workspace + spaceUsed32;
1158c2ecf20Sopenharmony_ci	workspaceSize -= (spaceUsed32 << 2);
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	/* CTable header */
1188c2ecf20Sopenharmony_ci	tableU16[-2] = (U16)tableLog;
1198c2ecf20Sopenharmony_ci	tableU16[-1] = (U16)maxSymbolValue;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	/* For explanations on how to distribute symbol values over the table :
1228c2ecf20Sopenharmony_ci	*  http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci	/* symbol start positions */
1258c2ecf20Sopenharmony_ci	{
1268c2ecf20Sopenharmony_ci		U32 u;
1278c2ecf20Sopenharmony_ci		cumul[0] = 0;
1288c2ecf20Sopenharmony_ci		for (u = 1; u <= maxSymbolValue + 1; u++) {
1298c2ecf20Sopenharmony_ci			if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */
1308c2ecf20Sopenharmony_ci				cumul[u] = cumul[u - 1] + 1;
1318c2ecf20Sopenharmony_ci				tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1);
1328c2ecf20Sopenharmony_ci			} else {
1338c2ecf20Sopenharmony_ci				cumul[u] = cumul[u - 1] + normalizedCounter[u - 1];
1348c2ecf20Sopenharmony_ci			}
1358c2ecf20Sopenharmony_ci		}
1368c2ecf20Sopenharmony_ci		cumul[maxSymbolValue + 1] = tableSize + 1;
1378c2ecf20Sopenharmony_ci	}
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	/* Spread symbols */
1408c2ecf20Sopenharmony_ci	{
1418c2ecf20Sopenharmony_ci		U32 position = 0;
1428c2ecf20Sopenharmony_ci		U32 symbol;
1438c2ecf20Sopenharmony_ci		for (symbol = 0; symbol <= maxSymbolValue; symbol++) {
1448c2ecf20Sopenharmony_ci			int nbOccurences;
1458c2ecf20Sopenharmony_ci			for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) {
1468c2ecf20Sopenharmony_ci				tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
1478c2ecf20Sopenharmony_ci				position = (position + step) & tableMask;
1488c2ecf20Sopenharmony_ci				while (position > highThreshold)
1498c2ecf20Sopenharmony_ci					position = (position + step) & tableMask; /* Low proba area */
1508c2ecf20Sopenharmony_ci			}
1518c2ecf20Sopenharmony_ci		}
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci		if (position != 0)
1548c2ecf20Sopenharmony_ci			return ERROR(GENERIC); /* Must have gone through all positions */
1558c2ecf20Sopenharmony_ci	}
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	/* Build table */
1588c2ecf20Sopenharmony_ci	{
1598c2ecf20Sopenharmony_ci		U32 u;
1608c2ecf20Sopenharmony_ci		for (u = 0; u < tableSize; u++) {
1618c2ecf20Sopenharmony_ci			FSE_FUNCTION_TYPE s = tableSymbol[u];	/* note : static analyzer may not understand tableSymbol is properly initialized */
1628c2ecf20Sopenharmony_ci			tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */
1638c2ecf20Sopenharmony_ci		}
1648c2ecf20Sopenharmony_ci	}
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	/* Build Symbol Transformation Table */
1678c2ecf20Sopenharmony_ci	{
1688c2ecf20Sopenharmony_ci		unsigned total = 0;
1698c2ecf20Sopenharmony_ci		unsigned s;
1708c2ecf20Sopenharmony_ci		for (s = 0; s <= maxSymbolValue; s++) {
1718c2ecf20Sopenharmony_ci			switch (normalizedCounter[s]) {
1728c2ecf20Sopenharmony_ci			case 0: break;
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci			case -1:
1758c2ecf20Sopenharmony_ci			case 1:
1768c2ecf20Sopenharmony_ci				symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog);
1778c2ecf20Sopenharmony_ci				symbolTT[s].deltaFindState = total - 1;
1788c2ecf20Sopenharmony_ci				total++;
1798c2ecf20Sopenharmony_ci				break;
1808c2ecf20Sopenharmony_ci			default: {
1818c2ecf20Sopenharmony_ci				U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1);
1828c2ecf20Sopenharmony_ci				U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
1838c2ecf20Sopenharmony_ci				symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
1848c2ecf20Sopenharmony_ci				symbolTT[s].deltaFindState = total - normalizedCounter[s];
1858c2ecf20Sopenharmony_ci				total += normalizedCounter[s];
1868c2ecf20Sopenharmony_ci			}
1878c2ecf20Sopenharmony_ci			}
1888c2ecf20Sopenharmony_ci		}
1898c2ecf20Sopenharmony_ci	}
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	return 0;
1928c2ecf20Sopenharmony_ci}
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_ci/*-**************************************************************
1958c2ecf20Sopenharmony_ci*  FSE NCount encoding-decoding
1968c2ecf20Sopenharmony_ci****************************************************************/
1978c2ecf20Sopenharmony_cisize_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3;
2008c2ecf20Sopenharmony_ci	return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
2018c2ecf20Sopenharmony_ci}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_cistatic size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
2048c2ecf20Sopenharmony_ci				      unsigned writeIsSafe)
2058c2ecf20Sopenharmony_ci{
2068c2ecf20Sopenharmony_ci	BYTE *const ostart = (BYTE *)header;
2078c2ecf20Sopenharmony_ci	BYTE *out = ostart;
2088c2ecf20Sopenharmony_ci	BYTE *const oend = ostart + headerBufferSize;
2098c2ecf20Sopenharmony_ci	int nbBits;
2108c2ecf20Sopenharmony_ci	const int tableSize = 1 << tableLog;
2118c2ecf20Sopenharmony_ci	int remaining;
2128c2ecf20Sopenharmony_ci	int threshold;
2138c2ecf20Sopenharmony_ci	U32 bitStream;
2148c2ecf20Sopenharmony_ci	int bitCount;
2158c2ecf20Sopenharmony_ci	unsigned charnum = 0;
2168c2ecf20Sopenharmony_ci	int previous0 = 0;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	bitStream = 0;
2198c2ecf20Sopenharmony_ci	bitCount = 0;
2208c2ecf20Sopenharmony_ci	/* Table Size */
2218c2ecf20Sopenharmony_ci	bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount;
2228c2ecf20Sopenharmony_ci	bitCount += 4;
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci	/* Init */
2258c2ecf20Sopenharmony_ci	remaining = tableSize + 1; /* +1 for extra accuracy */
2268c2ecf20Sopenharmony_ci	threshold = tableSize;
2278c2ecf20Sopenharmony_ci	nbBits = tableLog + 1;
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	while (remaining > 1) { /* stops at 1 */
2308c2ecf20Sopenharmony_ci		if (previous0) {
2318c2ecf20Sopenharmony_ci			unsigned start = charnum;
2328c2ecf20Sopenharmony_ci			while (!normalizedCounter[charnum])
2338c2ecf20Sopenharmony_ci				charnum++;
2348c2ecf20Sopenharmony_ci			while (charnum >= start + 24) {
2358c2ecf20Sopenharmony_ci				start += 24;
2368c2ecf20Sopenharmony_ci				bitStream += 0xFFFFU << bitCount;
2378c2ecf20Sopenharmony_ci				if ((!writeIsSafe) && (out > oend - 2))
2388c2ecf20Sopenharmony_ci					return ERROR(dstSize_tooSmall); /* Buffer overflow */
2398c2ecf20Sopenharmony_ci				out[0] = (BYTE)bitStream;
2408c2ecf20Sopenharmony_ci				out[1] = (BYTE)(bitStream >> 8);
2418c2ecf20Sopenharmony_ci				out += 2;
2428c2ecf20Sopenharmony_ci				bitStream >>= 16;
2438c2ecf20Sopenharmony_ci			}
2448c2ecf20Sopenharmony_ci			while (charnum >= start + 3) {
2458c2ecf20Sopenharmony_ci				start += 3;
2468c2ecf20Sopenharmony_ci				bitStream += 3 << bitCount;
2478c2ecf20Sopenharmony_ci				bitCount += 2;
2488c2ecf20Sopenharmony_ci			}
2498c2ecf20Sopenharmony_ci			bitStream += (charnum - start) << bitCount;
2508c2ecf20Sopenharmony_ci			bitCount += 2;
2518c2ecf20Sopenharmony_ci			if (bitCount > 16) {
2528c2ecf20Sopenharmony_ci				if ((!writeIsSafe) && (out > oend - 2))
2538c2ecf20Sopenharmony_ci					return ERROR(dstSize_tooSmall); /* Buffer overflow */
2548c2ecf20Sopenharmony_ci				out[0] = (BYTE)bitStream;
2558c2ecf20Sopenharmony_ci				out[1] = (BYTE)(bitStream >> 8);
2568c2ecf20Sopenharmony_ci				out += 2;
2578c2ecf20Sopenharmony_ci				bitStream >>= 16;
2588c2ecf20Sopenharmony_ci				bitCount -= 16;
2598c2ecf20Sopenharmony_ci			}
2608c2ecf20Sopenharmony_ci		}
2618c2ecf20Sopenharmony_ci		{
2628c2ecf20Sopenharmony_ci			int count = normalizedCounter[charnum++];
2638c2ecf20Sopenharmony_ci			int const max = (2 * threshold - 1) - remaining;
2648c2ecf20Sopenharmony_ci			remaining -= count < 0 ? -count : count;
2658c2ecf20Sopenharmony_ci			count++; /* +1 for extra accuracy */
2668c2ecf20Sopenharmony_ci			if (count >= threshold)
2678c2ecf20Sopenharmony_ci				count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
2688c2ecf20Sopenharmony_ci			bitStream += count << bitCount;
2698c2ecf20Sopenharmony_ci			bitCount += nbBits;
2708c2ecf20Sopenharmony_ci			bitCount -= (count < max);
2718c2ecf20Sopenharmony_ci			previous0 = (count == 1);
2728c2ecf20Sopenharmony_ci			if (remaining < 1)
2738c2ecf20Sopenharmony_ci				return ERROR(GENERIC);
2748c2ecf20Sopenharmony_ci			while (remaining < threshold)
2758c2ecf20Sopenharmony_ci				nbBits--, threshold >>= 1;
2768c2ecf20Sopenharmony_ci		}
2778c2ecf20Sopenharmony_ci		if (bitCount > 16) {
2788c2ecf20Sopenharmony_ci			if ((!writeIsSafe) && (out > oend - 2))
2798c2ecf20Sopenharmony_ci				return ERROR(dstSize_tooSmall); /* Buffer overflow */
2808c2ecf20Sopenharmony_ci			out[0] = (BYTE)bitStream;
2818c2ecf20Sopenharmony_ci			out[1] = (BYTE)(bitStream >> 8);
2828c2ecf20Sopenharmony_ci			out += 2;
2838c2ecf20Sopenharmony_ci			bitStream >>= 16;
2848c2ecf20Sopenharmony_ci			bitCount -= 16;
2858c2ecf20Sopenharmony_ci		}
2868c2ecf20Sopenharmony_ci	}
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	/* flush remaining bitStream */
2898c2ecf20Sopenharmony_ci	if ((!writeIsSafe) && (out > oend - 2))
2908c2ecf20Sopenharmony_ci		return ERROR(dstSize_tooSmall); /* Buffer overflow */
2918c2ecf20Sopenharmony_ci	out[0] = (BYTE)bitStream;
2928c2ecf20Sopenharmony_ci	out[1] = (BYTE)(bitStream >> 8);
2938c2ecf20Sopenharmony_ci	out += (bitCount + 7) / 8;
2948c2ecf20Sopenharmony_ci
2958c2ecf20Sopenharmony_ci	if (charnum > maxSymbolValue + 1)
2968c2ecf20Sopenharmony_ci		return ERROR(GENERIC);
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	return (out - ostart);
2998c2ecf20Sopenharmony_ci}
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_cisize_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
3028c2ecf20Sopenharmony_ci{
3038c2ecf20Sopenharmony_ci	if (tableLog > FSE_MAX_TABLELOG)
3048c2ecf20Sopenharmony_ci		return ERROR(tableLog_tooLarge); /* Unsupported */
3058c2ecf20Sopenharmony_ci	if (tableLog < FSE_MIN_TABLELOG)
3068c2ecf20Sopenharmony_ci		return ERROR(GENERIC); /* Unsupported */
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
3098c2ecf20Sopenharmony_ci		return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci	return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
3128c2ecf20Sopenharmony_ci}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci/*-**************************************************************
3158c2ecf20Sopenharmony_ci*  Counting histogram
3168c2ecf20Sopenharmony_ci****************************************************************/
3178c2ecf20Sopenharmony_ci/*! FSE_count_simple
3188c2ecf20Sopenharmony_ci	This function counts byte values within `src`, and store the histogram into table `count`.
3198c2ecf20Sopenharmony_ci	It doesn't use any additional memory.
3208c2ecf20Sopenharmony_ci	But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
3218c2ecf20Sopenharmony_ci	For this reason, prefer using a table `count` with 256 elements.
3228c2ecf20Sopenharmony_ci	@return : count of most numerous element
3238c2ecf20Sopenharmony_ci*/
3248c2ecf20Sopenharmony_cisize_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize)
3258c2ecf20Sopenharmony_ci{
3268c2ecf20Sopenharmony_ci	const BYTE *ip = (const BYTE *)src;
3278c2ecf20Sopenharmony_ci	const BYTE *const end = ip + srcSize;
3288c2ecf20Sopenharmony_ci	unsigned maxSymbolValue = *maxSymbolValuePtr;
3298c2ecf20Sopenharmony_ci	unsigned max = 0;
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	memset(count, 0, (maxSymbolValue + 1) * sizeof(*count));
3328c2ecf20Sopenharmony_ci	if (srcSize == 0) {
3338c2ecf20Sopenharmony_ci		*maxSymbolValuePtr = 0;
3348c2ecf20Sopenharmony_ci		return 0;
3358c2ecf20Sopenharmony_ci	}
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci	while (ip < end)
3388c2ecf20Sopenharmony_ci		count[*ip++]++;
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	while (!count[maxSymbolValue])
3418c2ecf20Sopenharmony_ci		maxSymbolValue--;
3428c2ecf20Sopenharmony_ci	*maxSymbolValuePtr = maxSymbolValue;
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	{
3458c2ecf20Sopenharmony_ci		U32 s;
3468c2ecf20Sopenharmony_ci		for (s = 0; s <= maxSymbolValue; s++)
3478c2ecf20Sopenharmony_ci			if (count[s] > max)
3488c2ecf20Sopenharmony_ci				max = count[s];
3498c2ecf20Sopenharmony_ci	}
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	return (size_t)max;
3528c2ecf20Sopenharmony_ci}
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci/* FSE_count_parallel_wksp() :
3558c2ecf20Sopenharmony_ci * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
3568c2ecf20Sopenharmony_ci * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */
3578c2ecf20Sopenharmony_cistatic size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax,
3588c2ecf20Sopenharmony_ci				      unsigned *const workSpace)
3598c2ecf20Sopenharmony_ci{
3608c2ecf20Sopenharmony_ci	const BYTE *ip = (const BYTE *)source;
3618c2ecf20Sopenharmony_ci	const BYTE *const iend = ip + sourceSize;
3628c2ecf20Sopenharmony_ci	unsigned maxSymbolValue = *maxSymbolValuePtr;
3638c2ecf20Sopenharmony_ci	unsigned max = 0;
3648c2ecf20Sopenharmony_ci	U32 *const Counting1 = workSpace;
3658c2ecf20Sopenharmony_ci	U32 *const Counting2 = Counting1 + 256;
3668c2ecf20Sopenharmony_ci	U32 *const Counting3 = Counting2 + 256;
3678c2ecf20Sopenharmony_ci	U32 *const Counting4 = Counting3 + 256;
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci	memset(Counting1, 0, 4 * 256 * sizeof(unsigned));
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	/* safety checks */
3728c2ecf20Sopenharmony_ci	if (!sourceSize) {
3738c2ecf20Sopenharmony_ci		memset(count, 0, maxSymbolValue + 1);
3748c2ecf20Sopenharmony_ci		*maxSymbolValuePtr = 0;
3758c2ecf20Sopenharmony_ci		return 0;
3768c2ecf20Sopenharmony_ci	}
3778c2ecf20Sopenharmony_ci	if (!maxSymbolValue)
3788c2ecf20Sopenharmony_ci		maxSymbolValue = 255; /* 0 == default */
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	/* by stripes of 16 bytes */
3818c2ecf20Sopenharmony_ci	{
3828c2ecf20Sopenharmony_ci		U32 cached = ZSTD_read32(ip);
3838c2ecf20Sopenharmony_ci		ip += 4;
3848c2ecf20Sopenharmony_ci		while (ip < iend - 15) {
3858c2ecf20Sopenharmony_ci			U32 c = cached;
3868c2ecf20Sopenharmony_ci			cached = ZSTD_read32(ip);
3878c2ecf20Sopenharmony_ci			ip += 4;
3888c2ecf20Sopenharmony_ci			Counting1[(BYTE)c]++;
3898c2ecf20Sopenharmony_ci			Counting2[(BYTE)(c >> 8)]++;
3908c2ecf20Sopenharmony_ci			Counting3[(BYTE)(c >> 16)]++;
3918c2ecf20Sopenharmony_ci			Counting4[c >> 24]++;
3928c2ecf20Sopenharmony_ci			c = cached;
3938c2ecf20Sopenharmony_ci			cached = ZSTD_read32(ip);
3948c2ecf20Sopenharmony_ci			ip += 4;
3958c2ecf20Sopenharmony_ci			Counting1[(BYTE)c]++;
3968c2ecf20Sopenharmony_ci			Counting2[(BYTE)(c >> 8)]++;
3978c2ecf20Sopenharmony_ci			Counting3[(BYTE)(c >> 16)]++;
3988c2ecf20Sopenharmony_ci			Counting4[c >> 24]++;
3998c2ecf20Sopenharmony_ci			c = cached;
4008c2ecf20Sopenharmony_ci			cached = ZSTD_read32(ip);
4018c2ecf20Sopenharmony_ci			ip += 4;
4028c2ecf20Sopenharmony_ci			Counting1[(BYTE)c]++;
4038c2ecf20Sopenharmony_ci			Counting2[(BYTE)(c >> 8)]++;
4048c2ecf20Sopenharmony_ci			Counting3[(BYTE)(c >> 16)]++;
4058c2ecf20Sopenharmony_ci			Counting4[c >> 24]++;
4068c2ecf20Sopenharmony_ci			c = cached;
4078c2ecf20Sopenharmony_ci			cached = ZSTD_read32(ip);
4088c2ecf20Sopenharmony_ci			ip += 4;
4098c2ecf20Sopenharmony_ci			Counting1[(BYTE)c]++;
4108c2ecf20Sopenharmony_ci			Counting2[(BYTE)(c >> 8)]++;
4118c2ecf20Sopenharmony_ci			Counting3[(BYTE)(c >> 16)]++;
4128c2ecf20Sopenharmony_ci			Counting4[c >> 24]++;
4138c2ecf20Sopenharmony_ci		}
4148c2ecf20Sopenharmony_ci		ip -= 4;
4158c2ecf20Sopenharmony_ci	}
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	/* finish last symbols */
4188c2ecf20Sopenharmony_ci	while (ip < iend)
4198c2ecf20Sopenharmony_ci		Counting1[*ip++]++;
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci	if (checkMax) { /* verify stats will fit into destination table */
4228c2ecf20Sopenharmony_ci		U32 s;
4238c2ecf20Sopenharmony_ci		for (s = 255; s > maxSymbolValue; s--) {
4248c2ecf20Sopenharmony_ci			Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
4258c2ecf20Sopenharmony_ci			if (Counting1[s])
4268c2ecf20Sopenharmony_ci				return ERROR(maxSymbolValue_tooSmall);
4278c2ecf20Sopenharmony_ci		}
4288c2ecf20Sopenharmony_ci	}
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_ci	{
4318c2ecf20Sopenharmony_ci		U32 s;
4328c2ecf20Sopenharmony_ci		for (s = 0; s <= maxSymbolValue; s++) {
4338c2ecf20Sopenharmony_ci			count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
4348c2ecf20Sopenharmony_ci			if (count[s] > max)
4358c2ecf20Sopenharmony_ci				max = count[s];
4368c2ecf20Sopenharmony_ci		}
4378c2ecf20Sopenharmony_ci	}
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci	while (!count[maxSymbolValue])
4408c2ecf20Sopenharmony_ci		maxSymbolValue--;
4418c2ecf20Sopenharmony_ci	*maxSymbolValuePtr = maxSymbolValue;
4428c2ecf20Sopenharmony_ci	return (size_t)max;
4438c2ecf20Sopenharmony_ci}
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci/* FSE_countFast_wksp() :
4468c2ecf20Sopenharmony_ci * Same as FSE_countFast(), but using an externally provided scratch buffer.
4478c2ecf20Sopenharmony_ci * `workSpace` size must be table of >= `1024` unsigned */
4488c2ecf20Sopenharmony_cisize_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
4498c2ecf20Sopenharmony_ci{
4508c2ecf20Sopenharmony_ci	if (sourceSize < 1500)
4518c2ecf20Sopenharmony_ci		return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
4528c2ecf20Sopenharmony_ci	return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
4538c2ecf20Sopenharmony_ci}
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci/* FSE_count_wksp() :
4568c2ecf20Sopenharmony_ci * Same as FSE_count(), but using an externally provided scratch buffer.
4578c2ecf20Sopenharmony_ci * `workSpace` size must be table of >= `1024` unsigned */
4588c2ecf20Sopenharmony_cisize_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace)
4598c2ecf20Sopenharmony_ci{
4608c2ecf20Sopenharmony_ci	if (*maxSymbolValuePtr < 255)
4618c2ecf20Sopenharmony_ci		return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
4628c2ecf20Sopenharmony_ci	*maxSymbolValuePtr = 255;
4638c2ecf20Sopenharmony_ci	return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
4648c2ecf20Sopenharmony_ci}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci/*-**************************************************************
4678c2ecf20Sopenharmony_ci*  FSE Compression Code
4688c2ecf20Sopenharmony_ci****************************************************************/
4698c2ecf20Sopenharmony_ci/*! FSE_sizeof_CTable() :
4708c2ecf20Sopenharmony_ci	FSE_CTable is a variable size structure which contains :
4718c2ecf20Sopenharmony_ci	`U16 tableLog;`
4728c2ecf20Sopenharmony_ci	`U16 maxSymbolValue;`
4738c2ecf20Sopenharmony_ci	`U16 nextStateNumber[1 << tableLog];`                         // This size is variable
4748c2ecf20Sopenharmony_ci	`FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];`  // This size is variable
4758c2ecf20Sopenharmony_ciAllocation is manual (C standard does not support variable-size structures).
4768c2ecf20Sopenharmony_ci*/
4778c2ecf20Sopenharmony_cisize_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog)
4788c2ecf20Sopenharmony_ci{
4798c2ecf20Sopenharmony_ci	if (tableLog > FSE_MAX_TABLELOG)
4808c2ecf20Sopenharmony_ci		return ERROR(tableLog_tooLarge);
4818c2ecf20Sopenharmony_ci	return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32);
4828c2ecf20Sopenharmony_ci}
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci/* provides the minimum logSize to safely represent a distribution */
4858c2ecf20Sopenharmony_cistatic unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
4868c2ecf20Sopenharmony_ci{
4878c2ecf20Sopenharmony_ci	U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
4888c2ecf20Sopenharmony_ci	U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
4898c2ecf20Sopenharmony_ci	U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
4908c2ecf20Sopenharmony_ci	return minBits;
4918c2ecf20Sopenharmony_ci}
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ciunsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
4948c2ecf20Sopenharmony_ci{
4958c2ecf20Sopenharmony_ci	U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
4968c2ecf20Sopenharmony_ci	U32 tableLog = maxTableLog;
4978c2ecf20Sopenharmony_ci	U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
4988c2ecf20Sopenharmony_ci	if (tableLog == 0)
4998c2ecf20Sopenharmony_ci		tableLog = FSE_DEFAULT_TABLELOG;
5008c2ecf20Sopenharmony_ci	if (maxBitsSrc < tableLog)
5018c2ecf20Sopenharmony_ci		tableLog = maxBitsSrc; /* Accuracy can be reduced */
5028c2ecf20Sopenharmony_ci	if (minBits > tableLog)
5038c2ecf20Sopenharmony_ci		tableLog = minBits; /* Need a minimum to safely represent all symbol values */
5048c2ecf20Sopenharmony_ci	if (tableLog < FSE_MIN_TABLELOG)
5058c2ecf20Sopenharmony_ci		tableLog = FSE_MIN_TABLELOG;
5068c2ecf20Sopenharmony_ci	if (tableLog > FSE_MAX_TABLELOG)
5078c2ecf20Sopenharmony_ci		tableLog = FSE_MAX_TABLELOG;
5088c2ecf20Sopenharmony_ci	return tableLog;
5098c2ecf20Sopenharmony_ci}
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ciunsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
5128c2ecf20Sopenharmony_ci{
5138c2ecf20Sopenharmony_ci	return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
5148c2ecf20Sopenharmony_ci}
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci/* Secondary normalization method.
5178c2ecf20Sopenharmony_ci   To be used when primary method fails. */
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_cistatic size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue)
5208c2ecf20Sopenharmony_ci{
5218c2ecf20Sopenharmony_ci	short const NOT_YET_ASSIGNED = -2;
5228c2ecf20Sopenharmony_ci	U32 s;
5238c2ecf20Sopenharmony_ci	U32 distributed = 0;
5248c2ecf20Sopenharmony_ci	U32 ToDistribute;
5258c2ecf20Sopenharmony_ci
5268c2ecf20Sopenharmony_ci	/* Init */
5278c2ecf20Sopenharmony_ci	U32 const lowThreshold = (U32)(total >> tableLog);
5288c2ecf20Sopenharmony_ci	U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci	for (s = 0; s <= maxSymbolValue; s++) {
5318c2ecf20Sopenharmony_ci		if (count[s] == 0) {
5328c2ecf20Sopenharmony_ci			norm[s] = 0;
5338c2ecf20Sopenharmony_ci			continue;
5348c2ecf20Sopenharmony_ci		}
5358c2ecf20Sopenharmony_ci		if (count[s] <= lowThreshold) {
5368c2ecf20Sopenharmony_ci			norm[s] = -1;
5378c2ecf20Sopenharmony_ci			distributed++;
5388c2ecf20Sopenharmony_ci			total -= count[s];
5398c2ecf20Sopenharmony_ci			continue;
5408c2ecf20Sopenharmony_ci		}
5418c2ecf20Sopenharmony_ci		if (count[s] <= lowOne) {
5428c2ecf20Sopenharmony_ci			norm[s] = 1;
5438c2ecf20Sopenharmony_ci			distributed++;
5448c2ecf20Sopenharmony_ci			total -= count[s];
5458c2ecf20Sopenharmony_ci			continue;
5468c2ecf20Sopenharmony_ci		}
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci		norm[s] = NOT_YET_ASSIGNED;
5498c2ecf20Sopenharmony_ci	}
5508c2ecf20Sopenharmony_ci	ToDistribute = (1 << tableLog) - distributed;
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci	if ((total / ToDistribute) > lowOne) {
5538c2ecf20Sopenharmony_ci		/* risk of rounding to zero */
5548c2ecf20Sopenharmony_ci		lowOne = (U32)((total * 3) / (ToDistribute * 2));
5558c2ecf20Sopenharmony_ci		for (s = 0; s <= maxSymbolValue; s++) {
5568c2ecf20Sopenharmony_ci			if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
5578c2ecf20Sopenharmony_ci				norm[s] = 1;
5588c2ecf20Sopenharmony_ci				distributed++;
5598c2ecf20Sopenharmony_ci				total -= count[s];
5608c2ecf20Sopenharmony_ci				continue;
5618c2ecf20Sopenharmony_ci			}
5628c2ecf20Sopenharmony_ci		}
5638c2ecf20Sopenharmony_ci		ToDistribute = (1 << tableLog) - distributed;
5648c2ecf20Sopenharmony_ci	}
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci	if (distributed == maxSymbolValue + 1) {
5678c2ecf20Sopenharmony_ci		/* all values are pretty poor;
5688c2ecf20Sopenharmony_ci		   probably incompressible data (should have already been detected);
5698c2ecf20Sopenharmony_ci		   find max, then give all remaining points to max */
5708c2ecf20Sopenharmony_ci		U32 maxV = 0, maxC = 0;
5718c2ecf20Sopenharmony_ci		for (s = 0; s <= maxSymbolValue; s++)
5728c2ecf20Sopenharmony_ci			if (count[s] > maxC)
5738c2ecf20Sopenharmony_ci				maxV = s, maxC = count[s];
5748c2ecf20Sopenharmony_ci		norm[maxV] += (short)ToDistribute;
5758c2ecf20Sopenharmony_ci		return 0;
5768c2ecf20Sopenharmony_ci	}
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	if (total == 0) {
5798c2ecf20Sopenharmony_ci		/* all of the symbols were low enough for the lowOne or lowThreshold */
5808c2ecf20Sopenharmony_ci		for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1))
5818c2ecf20Sopenharmony_ci			if (norm[s] > 0)
5828c2ecf20Sopenharmony_ci				ToDistribute--, norm[s]++;
5838c2ecf20Sopenharmony_ci		return 0;
5848c2ecf20Sopenharmony_ci	}
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_ci	{
5878c2ecf20Sopenharmony_ci		U64 const vStepLog = 62 - tableLog;
5888c2ecf20Sopenharmony_ci		U64 const mid = (1ULL << (vStepLog - 1)) - 1;
5898c2ecf20Sopenharmony_ci		U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */
5908c2ecf20Sopenharmony_ci		U64 tmpTotal = mid;
5918c2ecf20Sopenharmony_ci		for (s = 0; s <= maxSymbolValue; s++) {
5928c2ecf20Sopenharmony_ci			if (norm[s] == NOT_YET_ASSIGNED) {
5938c2ecf20Sopenharmony_ci				U64 const end = tmpTotal + (count[s] * rStep);
5948c2ecf20Sopenharmony_ci				U32 const sStart = (U32)(tmpTotal >> vStepLog);
5958c2ecf20Sopenharmony_ci				U32 const sEnd = (U32)(end >> vStepLog);
5968c2ecf20Sopenharmony_ci				U32 const weight = sEnd - sStart;
5978c2ecf20Sopenharmony_ci				if (weight < 1)
5988c2ecf20Sopenharmony_ci					return ERROR(GENERIC);
5998c2ecf20Sopenharmony_ci				norm[s] = (short)weight;
6008c2ecf20Sopenharmony_ci				tmpTotal = end;
6018c2ecf20Sopenharmony_ci			}
6028c2ecf20Sopenharmony_ci		}
6038c2ecf20Sopenharmony_ci	}
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	return 0;
6068c2ecf20Sopenharmony_ci}
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_cisize_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue)
6098c2ecf20Sopenharmony_ci{
6108c2ecf20Sopenharmony_ci	/* Sanity checks */
6118c2ecf20Sopenharmony_ci	if (tableLog == 0)
6128c2ecf20Sopenharmony_ci		tableLog = FSE_DEFAULT_TABLELOG;
6138c2ecf20Sopenharmony_ci	if (tableLog < FSE_MIN_TABLELOG)
6148c2ecf20Sopenharmony_ci		return ERROR(GENERIC); /* Unsupported size */
6158c2ecf20Sopenharmony_ci	if (tableLog > FSE_MAX_TABLELOG)
6168c2ecf20Sopenharmony_ci		return ERROR(tableLog_tooLarge); /* Unsupported size */
6178c2ecf20Sopenharmony_ci	if (tableLog < FSE_minTableLog(total, maxSymbolValue))
6188c2ecf20Sopenharmony_ci		return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	{
6218c2ecf20Sopenharmony_ci		U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000};
6228c2ecf20Sopenharmony_ci		U64 const scale = 62 - tableLog;
6238c2ecf20Sopenharmony_ci		U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */
6248c2ecf20Sopenharmony_ci		U64 const vStep = 1ULL << (scale - 20);
6258c2ecf20Sopenharmony_ci		int stillToDistribute = 1 << tableLog;
6268c2ecf20Sopenharmony_ci		unsigned s;
6278c2ecf20Sopenharmony_ci		unsigned largest = 0;
6288c2ecf20Sopenharmony_ci		short largestP = 0;
6298c2ecf20Sopenharmony_ci		U32 lowThreshold = (U32)(total >> tableLog);
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci		for (s = 0; s <= maxSymbolValue; s++) {
6328c2ecf20Sopenharmony_ci			if (count[s] == total)
6338c2ecf20Sopenharmony_ci				return 0; /* rle special case */
6348c2ecf20Sopenharmony_ci			if (count[s] == 0) {
6358c2ecf20Sopenharmony_ci				normalizedCounter[s] = 0;
6368c2ecf20Sopenharmony_ci				continue;
6378c2ecf20Sopenharmony_ci			}
6388c2ecf20Sopenharmony_ci			if (count[s] <= lowThreshold) {
6398c2ecf20Sopenharmony_ci				normalizedCounter[s] = -1;
6408c2ecf20Sopenharmony_ci				stillToDistribute--;
6418c2ecf20Sopenharmony_ci			} else {
6428c2ecf20Sopenharmony_ci				short proba = (short)((count[s] * step) >> scale);
6438c2ecf20Sopenharmony_ci				if (proba < 8) {
6448c2ecf20Sopenharmony_ci					U64 restToBeat = vStep * rtbTable[proba];
6458c2ecf20Sopenharmony_ci					proba += (count[s] * step) - ((U64)proba << scale) > restToBeat;
6468c2ecf20Sopenharmony_ci				}
6478c2ecf20Sopenharmony_ci				if (proba > largestP)
6488c2ecf20Sopenharmony_ci					largestP = proba, largest = s;
6498c2ecf20Sopenharmony_ci				normalizedCounter[s] = proba;
6508c2ecf20Sopenharmony_ci				stillToDistribute -= proba;
6518c2ecf20Sopenharmony_ci			}
6528c2ecf20Sopenharmony_ci		}
6538c2ecf20Sopenharmony_ci		if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
6548c2ecf20Sopenharmony_ci			/* corner case, need another normalization method */
6558c2ecf20Sopenharmony_ci			size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
6568c2ecf20Sopenharmony_ci			if (FSE_isError(errorCode))
6578c2ecf20Sopenharmony_ci				return errorCode;
6588c2ecf20Sopenharmony_ci		} else
6598c2ecf20Sopenharmony_ci			normalizedCounter[largest] += (short)stillToDistribute;
6608c2ecf20Sopenharmony_ci	}
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	return tableLog;
6638c2ecf20Sopenharmony_ci}
6648c2ecf20Sopenharmony_ci
6658c2ecf20Sopenharmony_ci/* fake FSE_CTable, for raw (uncompressed) input */
6668c2ecf20Sopenharmony_cisize_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits)
6678c2ecf20Sopenharmony_ci{
6688c2ecf20Sopenharmony_ci	const unsigned tableSize = 1 << nbBits;
6698c2ecf20Sopenharmony_ci	const unsigned tableMask = tableSize - 1;
6708c2ecf20Sopenharmony_ci	const unsigned maxSymbolValue = tableMask;
6718c2ecf20Sopenharmony_ci	void *const ptr = ct;
6728c2ecf20Sopenharmony_ci	U16 *const tableU16 = ((U16 *)ptr) + 2;
6738c2ecf20Sopenharmony_ci	void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */
6748c2ecf20Sopenharmony_ci	FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT);
6758c2ecf20Sopenharmony_ci	unsigned s;
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci	/* Sanity checks */
6788c2ecf20Sopenharmony_ci	if (nbBits < 1)
6798c2ecf20Sopenharmony_ci		return ERROR(GENERIC); /* min size */
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_ci	/* header */
6828c2ecf20Sopenharmony_ci	tableU16[-2] = (U16)nbBits;
6838c2ecf20Sopenharmony_ci	tableU16[-1] = (U16)maxSymbolValue;
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_ci	/* Build table */
6868c2ecf20Sopenharmony_ci	for (s = 0; s < tableSize; s++)
6878c2ecf20Sopenharmony_ci		tableU16[s] = (U16)(tableSize + s);
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci	/* Build Symbol Transformation Table */
6908c2ecf20Sopenharmony_ci	{
6918c2ecf20Sopenharmony_ci		const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
6928c2ecf20Sopenharmony_ci		for (s = 0; s <= maxSymbolValue; s++) {
6938c2ecf20Sopenharmony_ci			symbolTT[s].deltaNbBits = deltaNbBits;
6948c2ecf20Sopenharmony_ci			symbolTT[s].deltaFindState = s - 1;
6958c2ecf20Sopenharmony_ci		}
6968c2ecf20Sopenharmony_ci	}
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	return 0;
6998c2ecf20Sopenharmony_ci}
7008c2ecf20Sopenharmony_ci
7018c2ecf20Sopenharmony_ci/* fake FSE_CTable, for rle input (always same symbol) */
7028c2ecf20Sopenharmony_cisize_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue)
7038c2ecf20Sopenharmony_ci{
7048c2ecf20Sopenharmony_ci	void *ptr = ct;
7058c2ecf20Sopenharmony_ci	U16 *tableU16 = ((U16 *)ptr) + 2;
7068c2ecf20Sopenharmony_ci	void *FSCTptr = (U32 *)ptr + 2;
7078c2ecf20Sopenharmony_ci	FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr;
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci	/* header */
7108c2ecf20Sopenharmony_ci	tableU16[-2] = (U16)0;
7118c2ecf20Sopenharmony_ci	tableU16[-1] = (U16)symbolValue;
7128c2ecf20Sopenharmony_ci
7138c2ecf20Sopenharmony_ci	/* Build table */
7148c2ecf20Sopenharmony_ci	tableU16[0] = 0;
7158c2ecf20Sopenharmony_ci	tableU16[1] = 0; /* just in case */
7168c2ecf20Sopenharmony_ci
7178c2ecf20Sopenharmony_ci	/* Build Symbol Transformation Table */
7188c2ecf20Sopenharmony_ci	symbolTT[symbolValue].deltaNbBits = 0;
7198c2ecf20Sopenharmony_ci	symbolTT[symbolValue].deltaFindState = 0;
7208c2ecf20Sopenharmony_ci
7218c2ecf20Sopenharmony_ci	return 0;
7228c2ecf20Sopenharmony_ci}
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_cistatic size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast)
7258c2ecf20Sopenharmony_ci{
7268c2ecf20Sopenharmony_ci	const BYTE *const istart = (const BYTE *)src;
7278c2ecf20Sopenharmony_ci	const BYTE *const iend = istart + srcSize;
7288c2ecf20Sopenharmony_ci	const BYTE *ip = iend;
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci	BIT_CStream_t bitC;
7318c2ecf20Sopenharmony_ci	FSE_CState_t CState1, CState2;
7328c2ecf20Sopenharmony_ci
7338c2ecf20Sopenharmony_ci	/* init */
7348c2ecf20Sopenharmony_ci	if (srcSize <= 2)
7358c2ecf20Sopenharmony_ci		return 0;
7368c2ecf20Sopenharmony_ci	{
7378c2ecf20Sopenharmony_ci		size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
7388c2ecf20Sopenharmony_ci		if (FSE_isError(initError))
7398c2ecf20Sopenharmony_ci			return 0; /* not enough space available to write a bitstream */
7408c2ecf20Sopenharmony_ci	}
7418c2ecf20Sopenharmony_ci
7428c2ecf20Sopenharmony_ci#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_ci	if (srcSize & 1) {
7458c2ecf20Sopenharmony_ci		FSE_initCState2(&CState1, ct, *--ip);
7468c2ecf20Sopenharmony_ci		FSE_initCState2(&CState2, ct, *--ip);
7478c2ecf20Sopenharmony_ci		FSE_encodeSymbol(&bitC, &CState1, *--ip);
7488c2ecf20Sopenharmony_ci		FSE_FLUSHBITS(&bitC);
7498c2ecf20Sopenharmony_ci	} else {
7508c2ecf20Sopenharmony_ci		FSE_initCState2(&CState2, ct, *--ip);
7518c2ecf20Sopenharmony_ci		FSE_initCState2(&CState1, ct, *--ip);
7528c2ecf20Sopenharmony_ci	}
7538c2ecf20Sopenharmony_ci
7548c2ecf20Sopenharmony_ci	/* join to mod 4 */
7558c2ecf20Sopenharmony_ci	srcSize -= 2;
7568c2ecf20Sopenharmony_ci	if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */
7578c2ecf20Sopenharmony_ci		FSE_encodeSymbol(&bitC, &CState2, *--ip);
7588c2ecf20Sopenharmony_ci		FSE_encodeSymbol(&bitC, &CState1, *--ip);
7598c2ecf20Sopenharmony_ci		FSE_FLUSHBITS(&bitC);
7608c2ecf20Sopenharmony_ci	}
7618c2ecf20Sopenharmony_ci
7628c2ecf20Sopenharmony_ci	/* 2 or 4 encoding per loop */
7638c2ecf20Sopenharmony_ci	while (ip > istart) {
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_ci		FSE_encodeSymbol(&bitC, &CState2, *--ip);
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci		if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */
7688c2ecf20Sopenharmony_ci			FSE_FLUSHBITS(&bitC);
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci		FSE_encodeSymbol(&bitC, &CState1, *--ip);
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci		if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */
7738c2ecf20Sopenharmony_ci			FSE_encodeSymbol(&bitC, &CState2, *--ip);
7748c2ecf20Sopenharmony_ci			FSE_encodeSymbol(&bitC, &CState1, *--ip);
7758c2ecf20Sopenharmony_ci		}
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci		FSE_FLUSHBITS(&bitC);
7788c2ecf20Sopenharmony_ci	}
7798c2ecf20Sopenharmony_ci
7808c2ecf20Sopenharmony_ci	FSE_flushCState(&bitC, &CState2);
7818c2ecf20Sopenharmony_ci	FSE_flushCState(&bitC, &CState1);
7828c2ecf20Sopenharmony_ci	return BIT_closeCStream(&bitC);
7838c2ecf20Sopenharmony_ci}
7848c2ecf20Sopenharmony_ci
7858c2ecf20Sopenharmony_cisize_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct)
7868c2ecf20Sopenharmony_ci{
7878c2ecf20Sopenharmony_ci	unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
7888c2ecf20Sopenharmony_ci
7898c2ecf20Sopenharmony_ci	if (fast)
7908c2ecf20Sopenharmony_ci		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
7918c2ecf20Sopenharmony_ci	else
7928c2ecf20Sopenharmony_ci		return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
7938c2ecf20Sopenharmony_ci}
7948c2ecf20Sopenharmony_ci
7958c2ecf20Sopenharmony_cisize_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
796