1/* 2 * Common functions of New Generation Entropy library 3 * Copyright (C) 2016, Yann Collet. 4 * 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * This program is free software; you can redistribute it and/or modify it under 31 * the terms of the GNU General Public License version 2 as published by the 32 * Free Software Foundation. This program is dual-licensed; you may select 33 * either version 2 of the GNU General Public License ("GPL") or BSD license 34 * ("BSD"). 35 * 36 * You can contact the author at : 37 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 38 */ 39 40/* ************************************* 41* Dependencies 42***************************************/ 43#include "error_private.h" /* ERR_*, ERROR */ 44#include "fse.h" 45#include "huf.h" 46#include "mem.h" 47 48/*=== Version ===*/ 49unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } 50 51/*=== Error Management ===*/ 52unsigned FSE_isError(size_t code) { return ERR_isError(code); } 53 54unsigned HUF_isError(size_t code) { return ERR_isError(code); } 55 56/*-************************************************************** 57* FSE NCount encoding-decoding 58****************************************************************/ 59size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize) 60{ 61 const BYTE *const istart = (const BYTE *)headerBuffer; 62 const BYTE *const iend = istart + hbSize; 63 const BYTE *ip = istart; 64 int nbBits; 65 int remaining; 66 int threshold; 67 U32 bitStream; 68 int bitCount; 69 unsigned charnum = 0; 70 int previous0 = 0; 71 72 if (hbSize < 4) 73 return ERROR(srcSize_wrong); 74 bitStream = ZSTD_readLE32(ip); 75 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ 76 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) 77 return ERROR(tableLog_tooLarge); 78 bitStream >>= 4; 79 bitCount = 4; 80 *tableLogPtr = nbBits; 81 remaining = (1 << nbBits) + 1; 82 threshold = 1 << nbBits; 83 nbBits++; 84 85 while ((remaining > 1) & (charnum <= *maxSVPtr)) { 86 if (previous0) { 87 unsigned n0 = charnum; 88 while ((bitStream & 0xFFFF) == 0xFFFF) { 89 n0 += 24; 90 if (ip < iend - 5) { 91 ip += 2; 92 bitStream = ZSTD_readLE32(ip) >> bitCount; 93 } else { 94 bitStream >>= 16; 95 bitCount += 16; 96 } 97 } 98 while ((bitStream & 3) == 3) { 99 n0 += 3; 100 bitStream >>= 2; 101 bitCount += 2; 102 } 103 n0 += bitStream & 3; 104 bitCount += 2; 105 if (n0 > *maxSVPtr) 106 return ERROR(maxSymbolValue_tooSmall); 107 while (charnum < n0) 108 normalizedCounter[charnum++] = 0; 109 if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { 110 ip += bitCount >> 3; 111 bitCount &= 7; 112 bitStream = ZSTD_readLE32(ip) >> bitCount; 113 } else { 114 bitStream >>= 2; 115 } 116 } 117 { 118 int const max = (2 * threshold - 1) - remaining; 119 int count; 120 121 if ((bitStream & (threshold - 1)) < (U32)max) { 122 count = bitStream & (threshold - 1); 123 bitCount += nbBits - 1; 124 } else { 125 count = bitStream & (2 * threshold - 1); 126 if (count >= threshold) 127 count -= max; 128 bitCount += nbBits; 129 } 130 131 count--; /* extra accuracy */ 132 remaining -= count < 0 ? -count : count; /* -1 means +1 */ 133 normalizedCounter[charnum++] = (short)count; 134 previous0 = !count; 135 while (remaining < threshold) { 136 nbBits--; 137 threshold >>= 1; 138 } 139 140 if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { 141 ip += bitCount >> 3; 142 bitCount &= 7; 143 } else { 144 bitCount -= (int)(8 * (iend - 4 - ip)); 145 ip = iend - 4; 146 } 147 bitStream = ZSTD_readLE32(ip) >> (bitCount & 31); 148 } 149 } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ 150 if (remaining != 1) 151 return ERROR(corruption_detected); 152 if (bitCount > 32) 153 return ERROR(corruption_detected); 154 *maxSVPtr = charnum - 1; 155 156 ip += (bitCount + 7) >> 3; 157 return ip - istart; 158} 159 160/*! HUF_readStats() : 161 Read compact Huffman tree, saved by HUF_writeCTable(). 162 `huffWeight` is destination buffer. 163 `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. 164 @return : size read from `src` , or an error Code . 165 Note : Needed by HUF_readCTable() and HUF_readDTableX?() . 166*/ 167size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) 168{ 169 U32 weightTotal; 170 const BYTE *ip = (const BYTE *)src; 171 size_t iSize; 172 size_t oSize; 173 174 if (!srcSize) 175 return ERROR(srcSize_wrong); 176 iSize = ip[0]; 177 /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */ 178 179 if (iSize >= 128) { /* special header */ 180 oSize = iSize - 127; 181 iSize = ((oSize + 1) / 2); 182 if (iSize + 1 > srcSize) 183 return ERROR(srcSize_wrong); 184 if (oSize >= hwSize) 185 return ERROR(corruption_detected); 186 ip += 1; 187 { 188 U32 n; 189 for (n = 0; n < oSize; n += 2) { 190 huffWeight[n] = ip[n / 2] >> 4; 191 huffWeight[n + 1] = ip[n / 2] & 15; 192 } 193 } 194 } else { /* header compressed with FSE (normal case) */ 195 if (iSize + 1 > srcSize) 196 return ERROR(srcSize_wrong); 197 oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */ 198 if (FSE_isError(oSize)) 199 return oSize; 200 } 201 202 /* collect weight stats */ 203 memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); 204 weightTotal = 0; 205 { 206 U32 n; 207 for (n = 0; n < oSize; n++) { 208 if (huffWeight[n] >= HUF_TABLELOG_MAX) 209 return ERROR(corruption_detected); 210 rankStats[huffWeight[n]]++; 211 weightTotal += (1 << huffWeight[n]) >> 1; 212 } 213 } 214 if (weightTotal == 0) 215 return ERROR(corruption_detected); 216 217 /* get last non-null symbol weight (implied, total must be 2^n) */ 218 { 219 U32 const tableLog = BIT_highbit32(weightTotal) + 1; 220 if (tableLog > HUF_TABLELOG_MAX) 221 return ERROR(corruption_detected); 222 *tableLogPtr = tableLog; 223 /* determine last weight */ 224 { 225 U32 const total = 1 << tableLog; 226 U32 const rest = total - weightTotal; 227 U32 const verif = 1 << BIT_highbit32(rest); 228 U32 const lastWeight = BIT_highbit32(rest) + 1; 229 if (verif != rest) 230 return ERROR(corruption_detected); /* last value must be a clean power of 2 */ 231 huffWeight[oSize] = (BYTE)lastWeight; 232 rankStats[lastWeight]++; 233 } 234 } 235 236 /* check tree construction validity */ 237 if ((rankStats[1] < 2) || (rankStats[1] & 1)) 238 return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ 239 240 /* results */ 241 *nbSymbolsPtr = (U32)(oSize + 1); 242 return iSize + 1; 243} 244