1/* 2 * FSE : Finite State Entropy encoder 3 * Copyright (C) 2013-2015, Yann Collet. 4 * 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * This program is free software; you can redistribute it and/or modify it under 31 * the terms of the GNU General Public License version 2 as published by the 32 * Free Software Foundation. This program is dual-licensed; you may select 33 * either version 2 of the GNU General Public License ("GPL") or BSD license 34 * ("BSD"). 35 * 36 * You can contact the author at : 37 * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy 38 */ 39 40/* ************************************************************** 41* Compiler specifics 42****************************************************************/ 43#define FORCE_INLINE static __always_inline 44 45/* ************************************************************** 46* Includes 47****************************************************************/ 48#include "bitstream.h" 49#include "fse.h" 50#include <linux/compiler.h> 51#include <linux/kernel.h> 52#include <linux/math64.h> 53#include <linux/string.h> /* memcpy, memset */ 54 55/* ************************************************************** 56* Error Management 57****************************************************************/ 58#define FSE_STATIC_ASSERT(c) \ 59 { \ 60 enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ 61 } /* use only *after* variable declarations */ 62 63/* ************************************************************** 64* Templates 65****************************************************************/ 66/* 67 designed to be included 68 for type-specific functions (template emulation in C) 69 Objective is to write these functions only once, for improved maintenance 70*/ 71 72/* safety checks */ 73#ifndef FSE_FUNCTION_EXTENSION 74#error "FSE_FUNCTION_EXTENSION must be defined" 75#endif 76#ifndef FSE_FUNCTION_TYPE 77#error "FSE_FUNCTION_TYPE must be defined" 78#endif 79 80/* Function names */ 81#define FSE_CAT(X, Y) X##Y 82#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) 83#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) 84 85/* Function templates */ 86 87/* FSE_buildCTable_wksp() : 88 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). 89 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` 90 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements 91 */ 92size_t FSE_buildCTable_wksp(FSE_CTable *ct, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize) 93{ 94 U32 const tableSize = 1 << tableLog; 95 U32 const tableMask = tableSize - 1; 96 void *const ptr = ct; 97 U16 *const tableU16 = ((U16 *)ptr) + 2; 98 void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableLog ? tableSize >> 1 : 1); 99 FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); 100 U32 const step = FSE_TABLESTEP(tableSize); 101 U32 highThreshold = tableSize - 1; 102 103 U32 *cumul; 104 FSE_FUNCTION_TYPE *tableSymbol; 105 size_t spaceUsed32 = 0; 106 107 cumul = (U32 *)workspace + spaceUsed32; 108 spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2; 109 tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32); 110 spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2; 111 112 if ((spaceUsed32 << 2) > workspaceSize) 113 return ERROR(tableLog_tooLarge); 114 workspace = (U32 *)workspace + spaceUsed32; 115 workspaceSize -= (spaceUsed32 << 2); 116 117 /* CTable header */ 118 tableU16[-2] = (U16)tableLog; 119 tableU16[-1] = (U16)maxSymbolValue; 120 121 /* For explanations on how to distribute symbol values over the table : 122 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ 123 124 /* symbol start positions */ 125 { 126 U32 u; 127 cumul[0] = 0; 128 for (u = 1; u <= maxSymbolValue + 1; u++) { 129 if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */ 130 cumul[u] = cumul[u - 1] + 1; 131 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1); 132 } else { 133 cumul[u] = cumul[u - 1] + normalizedCounter[u - 1]; 134 } 135 } 136 cumul[maxSymbolValue + 1] = tableSize + 1; 137 } 138 139 /* Spread symbols */ 140 { 141 U32 position = 0; 142 U32 symbol; 143 for (symbol = 0; symbol <= maxSymbolValue; symbol++) { 144 int nbOccurences; 145 for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) { 146 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; 147 position = (position + step) & tableMask; 148 while (position > highThreshold) 149 position = (position + step) & tableMask; /* Low proba area */ 150 } 151 } 152 153 if (position != 0) 154 return ERROR(GENERIC); /* Must have gone through all positions */ 155 } 156 157 /* Build table */ 158 { 159 U32 u; 160 for (u = 0; u < tableSize; u++) { 161 FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ 162 tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */ 163 } 164 } 165 166 /* Build Symbol Transformation Table */ 167 { 168 unsigned total = 0; 169 unsigned s; 170 for (s = 0; s <= maxSymbolValue; s++) { 171 switch (normalizedCounter[s]) { 172 case 0: break; 173 174 case -1: 175 case 1: 176 symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog); 177 symbolTT[s].deltaFindState = total - 1; 178 total++; 179 break; 180 default: { 181 U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1); 182 U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; 183 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; 184 symbolTT[s].deltaFindState = total - normalizedCounter[s]; 185 total += normalizedCounter[s]; 186 } 187 } 188 } 189 } 190 191 return 0; 192} 193 194/*-************************************************************** 195* FSE NCount encoding-decoding 196****************************************************************/ 197size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) 198{ 199 size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3; 200 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ 201} 202 203static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, 204 unsigned writeIsSafe) 205{ 206 BYTE *const ostart = (BYTE *)header; 207 BYTE *out = ostart; 208 BYTE *const oend = ostart + headerBufferSize; 209 int nbBits; 210 const int tableSize = 1 << tableLog; 211 int remaining; 212 int threshold; 213 U32 bitStream; 214 int bitCount; 215 unsigned charnum = 0; 216 int previous0 = 0; 217 218 bitStream = 0; 219 bitCount = 0; 220 /* Table Size */ 221 bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount; 222 bitCount += 4; 223 224 /* Init */ 225 remaining = tableSize + 1; /* +1 for extra accuracy */ 226 threshold = tableSize; 227 nbBits = tableLog + 1; 228 229 while (remaining > 1) { /* stops at 1 */ 230 if (previous0) { 231 unsigned start = charnum; 232 while (!normalizedCounter[charnum]) 233 charnum++; 234 while (charnum >= start + 24) { 235 start += 24; 236 bitStream += 0xFFFFU << bitCount; 237 if ((!writeIsSafe) && (out > oend - 2)) 238 return ERROR(dstSize_tooSmall); /* Buffer overflow */ 239 out[0] = (BYTE)bitStream; 240 out[1] = (BYTE)(bitStream >> 8); 241 out += 2; 242 bitStream >>= 16; 243 } 244 while (charnum >= start + 3) { 245 start += 3; 246 bitStream += 3 << bitCount; 247 bitCount += 2; 248 } 249 bitStream += (charnum - start) << bitCount; 250 bitCount += 2; 251 if (bitCount > 16) { 252 if ((!writeIsSafe) && (out > oend - 2)) 253 return ERROR(dstSize_tooSmall); /* Buffer overflow */ 254 out[0] = (BYTE)bitStream; 255 out[1] = (BYTE)(bitStream >> 8); 256 out += 2; 257 bitStream >>= 16; 258 bitCount -= 16; 259 } 260 } 261 { 262 int count = normalizedCounter[charnum++]; 263 int const max = (2 * threshold - 1) - remaining; 264 remaining -= count < 0 ? -count : count; 265 count++; /* +1 for extra accuracy */ 266 if (count >= threshold) 267 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ 268 bitStream += count << bitCount; 269 bitCount += nbBits; 270 bitCount -= (count < max); 271 previous0 = (count == 1); 272 if (remaining < 1) 273 return ERROR(GENERIC); 274 while (remaining < threshold) 275 nbBits--, threshold >>= 1; 276 } 277 if (bitCount > 16) { 278 if ((!writeIsSafe) && (out > oend - 2)) 279 return ERROR(dstSize_tooSmall); /* Buffer overflow */ 280 out[0] = (BYTE)bitStream; 281 out[1] = (BYTE)(bitStream >> 8); 282 out += 2; 283 bitStream >>= 16; 284 bitCount -= 16; 285 } 286 } 287 288 /* flush remaining bitStream */ 289 if ((!writeIsSafe) && (out > oend - 2)) 290 return ERROR(dstSize_tooSmall); /* Buffer overflow */ 291 out[0] = (BYTE)bitStream; 292 out[1] = (BYTE)(bitStream >> 8); 293 out += (bitCount + 7) / 8; 294 295 if (charnum > maxSymbolValue + 1) 296 return ERROR(GENERIC); 297 298 return (out - ostart); 299} 300 301size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) 302{ 303 if (tableLog > FSE_MAX_TABLELOG) 304 return ERROR(tableLog_tooLarge); /* Unsupported */ 305 if (tableLog < FSE_MIN_TABLELOG) 306 return ERROR(GENERIC); /* Unsupported */ 307 308 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) 309 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); 310 311 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); 312} 313 314/*-************************************************************** 315* Counting histogram 316****************************************************************/ 317/*! FSE_count_simple 318 This function counts byte values within `src`, and store the histogram into table `count`. 319 It doesn't use any additional memory. 320 But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. 321 For this reason, prefer using a table `count` with 256 elements. 322 @return : count of most numerous element 323*/ 324size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize) 325{ 326 const BYTE *ip = (const BYTE *)src; 327 const BYTE *const end = ip + srcSize; 328 unsigned maxSymbolValue = *maxSymbolValuePtr; 329 unsigned max = 0; 330 331 memset(count, 0, (maxSymbolValue + 1) * sizeof(*count)); 332 if (srcSize == 0) { 333 *maxSymbolValuePtr = 0; 334 return 0; 335 } 336 337 while (ip < end) 338 count[*ip++]++; 339 340 while (!count[maxSymbolValue]) 341 maxSymbolValue--; 342 *maxSymbolValuePtr = maxSymbolValue; 343 344 { 345 U32 s; 346 for (s = 0; s <= maxSymbolValue; s++) 347 if (count[s] > max) 348 max = count[s]; 349 } 350 351 return (size_t)max; 352} 353 354/* FSE_count_parallel_wksp() : 355 * Same as FSE_count_parallel(), but using an externally provided scratch buffer. 356 * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ 357static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax, 358 unsigned *const workSpace) 359{ 360 const BYTE *ip = (const BYTE *)source; 361 const BYTE *const iend = ip + sourceSize; 362 unsigned maxSymbolValue = *maxSymbolValuePtr; 363 unsigned max = 0; 364 U32 *const Counting1 = workSpace; 365 U32 *const Counting2 = Counting1 + 256; 366 U32 *const Counting3 = Counting2 + 256; 367 U32 *const Counting4 = Counting3 + 256; 368 369 memset(Counting1, 0, 4 * 256 * sizeof(unsigned)); 370 371 /* safety checks */ 372 if (!sourceSize) { 373 memset(count, 0, maxSymbolValue + 1); 374 *maxSymbolValuePtr = 0; 375 return 0; 376 } 377 if (!maxSymbolValue) 378 maxSymbolValue = 255; /* 0 == default */ 379 380 /* by stripes of 16 bytes */ 381 { 382 U32 cached = ZSTD_read32(ip); 383 ip += 4; 384 while (ip < iend - 15) { 385 U32 c = cached; 386 cached = ZSTD_read32(ip); 387 ip += 4; 388 Counting1[(BYTE)c]++; 389 Counting2[(BYTE)(c >> 8)]++; 390 Counting3[(BYTE)(c >> 16)]++; 391 Counting4[c >> 24]++; 392 c = cached; 393 cached = ZSTD_read32(ip); 394 ip += 4; 395 Counting1[(BYTE)c]++; 396 Counting2[(BYTE)(c >> 8)]++; 397 Counting3[(BYTE)(c >> 16)]++; 398 Counting4[c >> 24]++; 399 c = cached; 400 cached = ZSTD_read32(ip); 401 ip += 4; 402 Counting1[(BYTE)c]++; 403 Counting2[(BYTE)(c >> 8)]++; 404 Counting3[(BYTE)(c >> 16)]++; 405 Counting4[c >> 24]++; 406 c = cached; 407 cached = ZSTD_read32(ip); 408 ip += 4; 409 Counting1[(BYTE)c]++; 410 Counting2[(BYTE)(c >> 8)]++; 411 Counting3[(BYTE)(c >> 16)]++; 412 Counting4[c >> 24]++; 413 } 414 ip -= 4; 415 } 416 417 /* finish last symbols */ 418 while (ip < iend) 419 Counting1[*ip++]++; 420 421 if (checkMax) { /* verify stats will fit into destination table */ 422 U32 s; 423 for (s = 255; s > maxSymbolValue; s--) { 424 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; 425 if (Counting1[s]) 426 return ERROR(maxSymbolValue_tooSmall); 427 } 428 } 429 430 { 431 U32 s; 432 for (s = 0; s <= maxSymbolValue; s++) { 433 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; 434 if (count[s] > max) 435 max = count[s]; 436 } 437 } 438 439 while (!count[maxSymbolValue]) 440 maxSymbolValue--; 441 *maxSymbolValuePtr = maxSymbolValue; 442 return (size_t)max; 443} 444 445/* FSE_countFast_wksp() : 446 * Same as FSE_countFast(), but using an externally provided scratch buffer. 447 * `workSpace` size must be table of >= `1024` unsigned */ 448size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) 449{ 450 if (sourceSize < 1500) 451 return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); 452 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); 453} 454 455/* FSE_count_wksp() : 456 * Same as FSE_count(), but using an externally provided scratch buffer. 457 * `workSpace` size must be table of >= `1024` unsigned */ 458size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) 459{ 460 if (*maxSymbolValuePtr < 255) 461 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); 462 *maxSymbolValuePtr = 255; 463 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); 464} 465 466/*-************************************************************** 467* FSE Compression Code 468****************************************************************/ 469/*! FSE_sizeof_CTable() : 470 FSE_CTable is a variable size structure which contains : 471 `U16 tableLog;` 472 `U16 maxSymbolValue;` 473 `U16 nextStateNumber[1 << tableLog];` // This size is variable 474 `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable 475Allocation is manual (C standard does not support variable-size structures). 476*/ 477size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog) 478{ 479 if (tableLog > FSE_MAX_TABLELOG) 480 return ERROR(tableLog_tooLarge); 481 return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32); 482} 483 484/* provides the minimum logSize to safely represent a distribution */ 485static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) 486{ 487 U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; 488 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; 489 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; 490 return minBits; 491} 492 493unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) 494{ 495 U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; 496 U32 tableLog = maxTableLog; 497 U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); 498 if (tableLog == 0) 499 tableLog = FSE_DEFAULT_TABLELOG; 500 if (maxBitsSrc < tableLog) 501 tableLog = maxBitsSrc; /* Accuracy can be reduced */ 502 if (minBits > tableLog) 503 tableLog = minBits; /* Need a minimum to safely represent all symbol values */ 504 if (tableLog < FSE_MIN_TABLELOG) 505 tableLog = FSE_MIN_TABLELOG; 506 if (tableLog > FSE_MAX_TABLELOG) 507 tableLog = FSE_MAX_TABLELOG; 508 return tableLog; 509} 510 511unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) 512{ 513 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); 514} 515 516/* Secondary normalization method. 517 To be used when primary method fails. */ 518 519static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue) 520{ 521 short const NOT_YET_ASSIGNED = -2; 522 U32 s; 523 U32 distributed = 0; 524 U32 ToDistribute; 525 526 /* Init */ 527 U32 const lowThreshold = (U32)(total >> tableLog); 528 U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); 529 530 for (s = 0; s <= maxSymbolValue; s++) { 531 if (count[s] == 0) { 532 norm[s] = 0; 533 continue; 534 } 535 if (count[s] <= lowThreshold) { 536 norm[s] = -1; 537 distributed++; 538 total -= count[s]; 539 continue; 540 } 541 if (count[s] <= lowOne) { 542 norm[s] = 1; 543 distributed++; 544 total -= count[s]; 545 continue; 546 } 547 548 norm[s] = NOT_YET_ASSIGNED; 549 } 550 ToDistribute = (1 << tableLog) - distributed; 551 552 if ((total / ToDistribute) > lowOne) { 553 /* risk of rounding to zero */ 554 lowOne = (U32)((total * 3) / (ToDistribute * 2)); 555 for (s = 0; s <= maxSymbolValue; s++) { 556 if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { 557 norm[s] = 1; 558 distributed++; 559 total -= count[s]; 560 continue; 561 } 562 } 563 ToDistribute = (1 << tableLog) - distributed; 564 } 565 566 if (distributed == maxSymbolValue + 1) { 567 /* all values are pretty poor; 568 probably incompressible data (should have already been detected); 569 find max, then give all remaining points to max */ 570 U32 maxV = 0, maxC = 0; 571 for (s = 0; s <= maxSymbolValue; s++) 572 if (count[s] > maxC) 573 maxV = s, maxC = count[s]; 574 norm[maxV] += (short)ToDistribute; 575 return 0; 576 } 577 578 if (total == 0) { 579 /* all of the symbols were low enough for the lowOne or lowThreshold */ 580 for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1)) 581 if (norm[s] > 0) 582 ToDistribute--, norm[s]++; 583 return 0; 584 } 585 586 { 587 U64 const vStepLog = 62 - tableLog; 588 U64 const mid = (1ULL << (vStepLog - 1)) - 1; 589 U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */ 590 U64 tmpTotal = mid; 591 for (s = 0; s <= maxSymbolValue; s++) { 592 if (norm[s] == NOT_YET_ASSIGNED) { 593 U64 const end = tmpTotal + (count[s] * rStep); 594 U32 const sStart = (U32)(tmpTotal >> vStepLog); 595 U32 const sEnd = (U32)(end >> vStepLog); 596 U32 const weight = sEnd - sStart; 597 if (weight < 1) 598 return ERROR(GENERIC); 599 norm[s] = (short)weight; 600 tmpTotal = end; 601 } 602 } 603 } 604 605 return 0; 606} 607 608size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue) 609{ 610 /* Sanity checks */ 611 if (tableLog == 0) 612 tableLog = FSE_DEFAULT_TABLELOG; 613 if (tableLog < FSE_MIN_TABLELOG) 614 return ERROR(GENERIC); /* Unsupported size */ 615 if (tableLog > FSE_MAX_TABLELOG) 616 return ERROR(tableLog_tooLarge); /* Unsupported size */ 617 if (tableLog < FSE_minTableLog(total, maxSymbolValue)) 618 return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ 619 620 { 621 U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}; 622 U64 const scale = 62 - tableLog; 623 U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */ 624 U64 const vStep = 1ULL << (scale - 20); 625 int stillToDistribute = 1 << tableLog; 626 unsigned s; 627 unsigned largest = 0; 628 short largestP = 0; 629 U32 lowThreshold = (U32)(total >> tableLog); 630 631 for (s = 0; s <= maxSymbolValue; s++) { 632 if (count[s] == total) 633 return 0; /* rle special case */ 634 if (count[s] == 0) { 635 normalizedCounter[s] = 0; 636 continue; 637 } 638 if (count[s] <= lowThreshold) { 639 normalizedCounter[s] = -1; 640 stillToDistribute--; 641 } else { 642 short proba = (short)((count[s] * step) >> scale); 643 if (proba < 8) { 644 U64 restToBeat = vStep * rtbTable[proba]; 645 proba += (count[s] * step) - ((U64)proba << scale) > restToBeat; 646 } 647 if (proba > largestP) 648 largestP = proba, largest = s; 649 normalizedCounter[s] = proba; 650 stillToDistribute -= proba; 651 } 652 } 653 if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { 654 /* corner case, need another normalization method */ 655 size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); 656 if (FSE_isError(errorCode)) 657 return errorCode; 658 } else 659 normalizedCounter[largest] += (short)stillToDistribute; 660 } 661 662 return tableLog; 663} 664 665/* fake FSE_CTable, for raw (uncompressed) input */ 666size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits) 667{ 668 const unsigned tableSize = 1 << nbBits; 669 const unsigned tableMask = tableSize - 1; 670 const unsigned maxSymbolValue = tableMask; 671 void *const ptr = ct; 672 U16 *const tableU16 = ((U16 *)ptr) + 2; 673 void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */ 674 FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); 675 unsigned s; 676 677 /* Sanity checks */ 678 if (nbBits < 1) 679 return ERROR(GENERIC); /* min size */ 680 681 /* header */ 682 tableU16[-2] = (U16)nbBits; 683 tableU16[-1] = (U16)maxSymbolValue; 684 685 /* Build table */ 686 for (s = 0; s < tableSize; s++) 687 tableU16[s] = (U16)(tableSize + s); 688 689 /* Build Symbol Transformation Table */ 690 { 691 const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); 692 for (s = 0; s <= maxSymbolValue; s++) { 693 symbolTT[s].deltaNbBits = deltaNbBits; 694 symbolTT[s].deltaFindState = s - 1; 695 } 696 } 697 698 return 0; 699} 700 701/* fake FSE_CTable, for rle input (always same symbol) */ 702size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue) 703{ 704 void *ptr = ct; 705 U16 *tableU16 = ((U16 *)ptr) + 2; 706 void *FSCTptr = (U32 *)ptr + 2; 707 FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr; 708 709 /* header */ 710 tableU16[-2] = (U16)0; 711 tableU16[-1] = (U16)symbolValue; 712 713 /* Build table */ 714 tableU16[0] = 0; 715 tableU16[1] = 0; /* just in case */ 716 717 /* Build Symbol Transformation Table */ 718 symbolTT[symbolValue].deltaNbBits = 0; 719 symbolTT[symbolValue].deltaFindState = 0; 720 721 return 0; 722} 723 724static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast) 725{ 726 const BYTE *const istart = (const BYTE *)src; 727 const BYTE *const iend = istart + srcSize; 728 const BYTE *ip = iend; 729 730 BIT_CStream_t bitC; 731 FSE_CState_t CState1, CState2; 732 733 /* init */ 734 if (srcSize <= 2) 735 return 0; 736 { 737 size_t const initError = BIT_initCStream(&bitC, dst, dstSize); 738 if (FSE_isError(initError)) 739 return 0; /* not enough space available to write a bitstream */ 740 } 741 742#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) 743 744 if (srcSize & 1) { 745 FSE_initCState2(&CState1, ct, *--ip); 746 FSE_initCState2(&CState2, ct, *--ip); 747 FSE_encodeSymbol(&bitC, &CState1, *--ip); 748 FSE_FLUSHBITS(&bitC); 749 } else { 750 FSE_initCState2(&CState2, ct, *--ip); 751 FSE_initCState2(&CState1, ct, *--ip); 752 } 753 754 /* join to mod 4 */ 755 srcSize -= 2; 756 if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */ 757 FSE_encodeSymbol(&bitC, &CState2, *--ip); 758 FSE_encodeSymbol(&bitC, &CState1, *--ip); 759 FSE_FLUSHBITS(&bitC); 760 } 761 762 /* 2 or 4 encoding per loop */ 763 while (ip > istart) { 764 765 FSE_encodeSymbol(&bitC, &CState2, *--ip); 766 767 if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */ 768 FSE_FLUSHBITS(&bitC); 769 770 FSE_encodeSymbol(&bitC, &CState1, *--ip); 771 772 if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */ 773 FSE_encodeSymbol(&bitC, &CState2, *--ip); 774 FSE_encodeSymbol(&bitC, &CState1, *--ip); 775 } 776 777 FSE_FLUSHBITS(&bitC); 778 } 779 780 FSE_flushCState(&bitC, &CState2); 781 FSE_flushCState(&bitC, &CState1); 782 return BIT_closeCStream(&bitC); 783} 784 785size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct) 786{ 787 unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); 788 789 if (fast) 790 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); 791 else 792 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); 793} 794 795size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } 796