162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) Yann Collet, Facebook, Inc. 362306a36Sopenharmony_ci * All rights reserved. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This source code is licensed under both the BSD-style license (found in the 662306a36Sopenharmony_ci * LICENSE file in the root directory of this source tree) and the GPLv2 (found 762306a36Sopenharmony_ci * in the COPYING file in the root directory of this source tree). 862306a36Sopenharmony_ci * You may select, at your option, one of the above-listed licenses. 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci/* This header contains definitions 1262306a36Sopenharmony_ci * that shall **only** be used by modules within lib/compress. 1362306a36Sopenharmony_ci */ 1462306a36Sopenharmony_ci 1562306a36Sopenharmony_ci#ifndef ZSTD_COMPRESS_H 1662306a36Sopenharmony_ci#define ZSTD_COMPRESS_H 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/*-************************************* 1962306a36Sopenharmony_ci* Dependencies 2062306a36Sopenharmony_ci***************************************/ 2162306a36Sopenharmony_ci#include "../common/zstd_internal.h" 2262306a36Sopenharmony_ci#include "zstd_cwksp.h" 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci/*-************************************* 2662306a36Sopenharmony_ci* Constants 2762306a36Sopenharmony_ci***************************************/ 2862306a36Sopenharmony_ci#define kSearchStrength 8 2962306a36Sopenharmony_ci#define HASH_READ_SIZE 8 3062306a36Sopenharmony_ci#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". 3162306a36Sopenharmony_ci It could be confused for a real successor at index "1", if sorted as larger than its predecessor. 3262306a36Sopenharmony_ci It's not a big deal though : candidate will just be sorted again. 3362306a36Sopenharmony_ci Additionally, candidate position 1 will be lost. 3462306a36Sopenharmony_ci But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. 3562306a36Sopenharmony_ci The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. 3662306a36Sopenharmony_ci This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci/*-************************************* 4062306a36Sopenharmony_ci* Context memory management 4162306a36Sopenharmony_ci***************************************/ 4262306a36Sopenharmony_citypedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; 4362306a36Sopenharmony_citypedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_citypedef struct ZSTD_prefixDict_s { 4662306a36Sopenharmony_ci const void* dict; 4762306a36Sopenharmony_ci size_t dictSize; 4862306a36Sopenharmony_ci ZSTD_dictContentType_e dictContentType; 4962306a36Sopenharmony_ci} ZSTD_prefixDict; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_citypedef struct { 5262306a36Sopenharmony_ci void* dictBuffer; 5362306a36Sopenharmony_ci void const* dict; 5462306a36Sopenharmony_ci size_t dictSize; 5562306a36Sopenharmony_ci ZSTD_dictContentType_e dictContentType; 5662306a36Sopenharmony_ci ZSTD_CDict* cdict; 5762306a36Sopenharmony_ci} ZSTD_localDict; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_citypedef struct { 6062306a36Sopenharmony_ci HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)]; 6162306a36Sopenharmony_ci HUF_repeat repeatMode; 6262306a36Sopenharmony_ci} ZSTD_hufCTables_t; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_citypedef struct { 6562306a36Sopenharmony_ci FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; 6662306a36Sopenharmony_ci FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; 6762306a36Sopenharmony_ci FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; 6862306a36Sopenharmony_ci FSE_repeat offcode_repeatMode; 6962306a36Sopenharmony_ci FSE_repeat matchlength_repeatMode; 7062306a36Sopenharmony_ci FSE_repeat litlength_repeatMode; 7162306a36Sopenharmony_ci} ZSTD_fseCTables_t; 7262306a36Sopenharmony_ci 7362306a36Sopenharmony_citypedef struct { 7462306a36Sopenharmony_ci ZSTD_hufCTables_t huf; 7562306a36Sopenharmony_ci ZSTD_fseCTables_t fse; 7662306a36Sopenharmony_ci} ZSTD_entropyCTables_t; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci/* ********************************************* 7962306a36Sopenharmony_ci* Entropy buffer statistics structs and funcs * 8062306a36Sopenharmony_ci***********************************************/ 8162306a36Sopenharmony_ci/* ZSTD_hufCTablesMetadata_t : 8262306a36Sopenharmony_ci * Stores Literals Block Type for a super-block in hType, and 8362306a36Sopenharmony_ci * huffman tree description in hufDesBuffer. 8462306a36Sopenharmony_ci * hufDesSize refers to the size of huffman tree description in bytes. 8562306a36Sopenharmony_ci * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ 8662306a36Sopenharmony_citypedef struct { 8762306a36Sopenharmony_ci symbolEncodingType_e hType; 8862306a36Sopenharmony_ci BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; 8962306a36Sopenharmony_ci size_t hufDesSize; 9062306a36Sopenharmony_ci} ZSTD_hufCTablesMetadata_t; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci/* ZSTD_fseCTablesMetadata_t : 9362306a36Sopenharmony_ci * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and 9462306a36Sopenharmony_ci * fse tables in fseTablesBuffer. 9562306a36Sopenharmony_ci * fseTablesSize refers to the size of fse tables in bytes. 9662306a36Sopenharmony_ci * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ 9762306a36Sopenharmony_citypedef struct { 9862306a36Sopenharmony_ci symbolEncodingType_e llType; 9962306a36Sopenharmony_ci symbolEncodingType_e ofType; 10062306a36Sopenharmony_ci symbolEncodingType_e mlType; 10162306a36Sopenharmony_ci BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; 10262306a36Sopenharmony_ci size_t fseTablesSize; 10362306a36Sopenharmony_ci size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ 10462306a36Sopenharmony_ci} ZSTD_fseCTablesMetadata_t; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_citypedef struct { 10762306a36Sopenharmony_ci ZSTD_hufCTablesMetadata_t hufMetadata; 10862306a36Sopenharmony_ci ZSTD_fseCTablesMetadata_t fseMetadata; 10962306a36Sopenharmony_ci} ZSTD_entropyCTablesMetadata_t; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci/* ZSTD_buildBlockEntropyStats() : 11262306a36Sopenharmony_ci * Builds entropy for the block. 11362306a36Sopenharmony_ci * @return : 0 on success or error code */ 11462306a36Sopenharmony_cisize_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, 11562306a36Sopenharmony_ci const ZSTD_entropyCTables_t* prevEntropy, 11662306a36Sopenharmony_ci ZSTD_entropyCTables_t* nextEntropy, 11762306a36Sopenharmony_ci const ZSTD_CCtx_params* cctxParams, 11862306a36Sopenharmony_ci ZSTD_entropyCTablesMetadata_t* entropyMetadata, 11962306a36Sopenharmony_ci void* workspace, size_t wkspSize); 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci/* ******************************* 12262306a36Sopenharmony_ci* Compression internals structs * 12362306a36Sopenharmony_ci*********************************/ 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_citypedef struct { 12662306a36Sopenharmony_ci U32 off; /* Offset sumtype code for the match, using ZSTD_storeSeq() format */ 12762306a36Sopenharmony_ci U32 len; /* Raw length of match */ 12862306a36Sopenharmony_ci} ZSTD_match_t; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_citypedef struct { 13162306a36Sopenharmony_ci U32 offset; /* Offset of sequence */ 13262306a36Sopenharmony_ci U32 litLength; /* Length of literals prior to match */ 13362306a36Sopenharmony_ci U32 matchLength; /* Raw length of match */ 13462306a36Sopenharmony_ci} rawSeq; 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_citypedef struct { 13762306a36Sopenharmony_ci rawSeq* seq; /* The start of the sequences */ 13862306a36Sopenharmony_ci size_t pos; /* The index in seq where reading stopped. pos <= size. */ 13962306a36Sopenharmony_ci size_t posInSequence; /* The position within the sequence at seq[pos] where reading 14062306a36Sopenharmony_ci stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ 14162306a36Sopenharmony_ci size_t size; /* The number of sequences. <= capacity. */ 14262306a36Sopenharmony_ci size_t capacity; /* The capacity starting from `seq` pointer */ 14362306a36Sopenharmony_ci} rawSeqStore_t; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ciUNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_citypedef struct { 14862306a36Sopenharmony_ci int price; 14962306a36Sopenharmony_ci U32 off; 15062306a36Sopenharmony_ci U32 mlen; 15162306a36Sopenharmony_ci U32 litlen; 15262306a36Sopenharmony_ci U32 rep[ZSTD_REP_NUM]; 15362306a36Sopenharmony_ci} ZSTD_optimal_t; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_citypedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_citypedef struct { 15862306a36Sopenharmony_ci /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ 15962306a36Sopenharmony_ci unsigned* litFreq; /* table of literals statistics, of size 256 */ 16062306a36Sopenharmony_ci unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ 16162306a36Sopenharmony_ci unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ 16262306a36Sopenharmony_ci unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ 16362306a36Sopenharmony_ci ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ 16462306a36Sopenharmony_ci ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci U32 litSum; /* nb of literals */ 16762306a36Sopenharmony_ci U32 litLengthSum; /* nb of litLength codes */ 16862306a36Sopenharmony_ci U32 matchLengthSum; /* nb of matchLength codes */ 16962306a36Sopenharmony_ci U32 offCodeSum; /* nb of offset codes */ 17062306a36Sopenharmony_ci U32 litSumBasePrice; /* to compare to log2(litfreq) */ 17162306a36Sopenharmony_ci U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ 17262306a36Sopenharmony_ci U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ 17362306a36Sopenharmony_ci U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ 17462306a36Sopenharmony_ci ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ 17562306a36Sopenharmony_ci const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ 17662306a36Sopenharmony_ci ZSTD_paramSwitch_e literalCompressionMode; 17762306a36Sopenharmony_ci} optState_t; 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_citypedef struct { 18062306a36Sopenharmony_ci ZSTD_entropyCTables_t entropy; 18162306a36Sopenharmony_ci U32 rep[ZSTD_REP_NUM]; 18262306a36Sopenharmony_ci} ZSTD_compressedBlockState_t; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_citypedef struct { 18562306a36Sopenharmony_ci BYTE const* nextSrc; /* next block here to continue on current prefix */ 18662306a36Sopenharmony_ci BYTE const* base; /* All regular indexes relative to this position */ 18762306a36Sopenharmony_ci BYTE const* dictBase; /* extDict indexes relative to this position */ 18862306a36Sopenharmony_ci U32 dictLimit; /* below that point, need extDict */ 18962306a36Sopenharmony_ci U32 lowLimit; /* below that point, no more valid data */ 19062306a36Sopenharmony_ci U32 nbOverflowCorrections; /* Number of times overflow correction has run since 19162306a36Sopenharmony_ci * ZSTD_window_init(). Useful for debugging coredumps 19262306a36Sopenharmony_ci * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. 19362306a36Sopenharmony_ci */ 19462306a36Sopenharmony_ci} ZSTD_window_t; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci#define ZSTD_WINDOW_START_INDEX 2 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_citypedef struct ZSTD_matchState_t ZSTD_matchState_t; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_cistruct ZSTD_matchState_t { 20362306a36Sopenharmony_ci ZSTD_window_t window; /* State for window round buffer management */ 20462306a36Sopenharmony_ci U32 loadedDictEnd; /* index of end of dictionary, within context's referential. 20562306a36Sopenharmony_ci * When loadedDictEnd != 0, a dictionary is in use, and still valid. 20662306a36Sopenharmony_ci * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. 20762306a36Sopenharmony_ci * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). 20862306a36Sopenharmony_ci * When dict referential is copied into active context (i.e. not attached), 20962306a36Sopenharmony_ci * loadedDictEnd == dictSize, since referential starts from zero. 21062306a36Sopenharmony_ci */ 21162306a36Sopenharmony_ci U32 nextToUpdate; /* index from which to continue table update */ 21262306a36Sopenharmony_ci U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ 21562306a36Sopenharmony_ci U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ 21662306a36Sopenharmony_ci U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci U32* hashTable; 21962306a36Sopenharmony_ci U32* hashTable3; 22062306a36Sopenharmony_ci U32* chainTable; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci int dedicatedDictSearch; /* Indicates whether this matchState is using the 22562306a36Sopenharmony_ci * dedicated dictionary search structure. 22662306a36Sopenharmony_ci */ 22762306a36Sopenharmony_ci optState_t opt; /* optimal parser state */ 22862306a36Sopenharmony_ci const ZSTD_matchState_t* dictMatchState; 22962306a36Sopenharmony_ci ZSTD_compressionParameters cParams; 23062306a36Sopenharmony_ci const rawSeqStore_t* ldmSeqStore; 23162306a36Sopenharmony_ci}; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_citypedef struct { 23462306a36Sopenharmony_ci ZSTD_compressedBlockState_t* prevCBlock; 23562306a36Sopenharmony_ci ZSTD_compressedBlockState_t* nextCBlock; 23662306a36Sopenharmony_ci ZSTD_matchState_t matchState; 23762306a36Sopenharmony_ci} ZSTD_blockState_t; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_citypedef struct { 24062306a36Sopenharmony_ci U32 offset; 24162306a36Sopenharmony_ci U32 checksum; 24262306a36Sopenharmony_ci} ldmEntry_t; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_citypedef struct { 24562306a36Sopenharmony_ci BYTE const* split; 24662306a36Sopenharmony_ci U32 hash; 24762306a36Sopenharmony_ci U32 checksum; 24862306a36Sopenharmony_ci ldmEntry_t* bucket; 24962306a36Sopenharmony_ci} ldmMatchCandidate_t; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci#define LDM_BATCH_SIZE 64 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_citypedef struct { 25462306a36Sopenharmony_ci ZSTD_window_t window; /* State for the window round buffer management */ 25562306a36Sopenharmony_ci ldmEntry_t* hashTable; 25662306a36Sopenharmony_ci U32 loadedDictEnd; 25762306a36Sopenharmony_ci BYTE* bucketOffsets; /* Next position in bucket to insert entry */ 25862306a36Sopenharmony_ci size_t splitIndices[LDM_BATCH_SIZE]; 25962306a36Sopenharmony_ci ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; 26062306a36Sopenharmony_ci} ldmState_t; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_citypedef struct { 26362306a36Sopenharmony_ci ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */ 26462306a36Sopenharmony_ci U32 hashLog; /* Log size of hashTable */ 26562306a36Sopenharmony_ci U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ 26662306a36Sopenharmony_ci U32 minMatchLength; /* Minimum match length */ 26762306a36Sopenharmony_ci U32 hashRateLog; /* Log number of entries to skip */ 26862306a36Sopenharmony_ci U32 windowLog; /* Window log for the LDM */ 26962306a36Sopenharmony_ci} ldmParams_t; 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_citypedef struct { 27262306a36Sopenharmony_ci int collectSequences; 27362306a36Sopenharmony_ci ZSTD_Sequence* seqStart; 27462306a36Sopenharmony_ci size_t seqIndex; 27562306a36Sopenharmony_ci size_t maxSequences; 27662306a36Sopenharmony_ci} SeqCollector; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_cistruct ZSTD_CCtx_params_s { 27962306a36Sopenharmony_ci ZSTD_format_e format; 28062306a36Sopenharmony_ci ZSTD_compressionParameters cParams; 28162306a36Sopenharmony_ci ZSTD_frameParameters fParams; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci int compressionLevel; 28462306a36Sopenharmony_ci int forceWindow; /* force back-references to respect limit of 28562306a36Sopenharmony_ci * 1<<wLog, even for dictionary */ 28662306a36Sopenharmony_ci size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. 28762306a36Sopenharmony_ci * No target when targetCBlockSize == 0. 28862306a36Sopenharmony_ci * There is no guarantee on compressed block size */ 28962306a36Sopenharmony_ci int srcSizeHint; /* User's best guess of source size. 29062306a36Sopenharmony_ci * Hint is not valid when srcSizeHint == 0. 29162306a36Sopenharmony_ci * There is no guarantee that hint is close to actual source size */ 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci ZSTD_dictAttachPref_e attachDictPref; 29462306a36Sopenharmony_ci ZSTD_paramSwitch_e literalCompressionMode; 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci /* Multithreading: used to pass parameters to mtctx */ 29762306a36Sopenharmony_ci int nbWorkers; 29862306a36Sopenharmony_ci size_t jobSize; 29962306a36Sopenharmony_ci int overlapLog; 30062306a36Sopenharmony_ci int rsyncable; 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci /* Long distance matching parameters */ 30362306a36Sopenharmony_ci ldmParams_t ldmParams; 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci /* Dedicated dict search algorithm trigger */ 30662306a36Sopenharmony_ci int enableDedicatedDictSearch; 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci /* Input/output buffer modes */ 30962306a36Sopenharmony_ci ZSTD_bufferMode_e inBufferMode; 31062306a36Sopenharmony_ci ZSTD_bufferMode_e outBufferMode; 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci /* Sequence compression API */ 31362306a36Sopenharmony_ci ZSTD_sequenceFormat_e blockDelimiters; 31462306a36Sopenharmony_ci int validateSequences; 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci /* Block splitting */ 31762306a36Sopenharmony_ci ZSTD_paramSwitch_e useBlockSplitter; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci /* Param for deciding whether to use row-based matchfinder */ 32062306a36Sopenharmony_ci ZSTD_paramSwitch_e useRowMatchFinder; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci /* Always load a dictionary in ext-dict mode (not prefix mode)? */ 32362306a36Sopenharmony_ci int deterministicRefPrefix; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci /* Internal use, for createCCtxParams() and freeCCtxParams() only */ 32662306a36Sopenharmony_ci ZSTD_customMem customMem; 32762306a36Sopenharmony_ci}; /* typedef'd to ZSTD_CCtx_params within "zstd.h" */ 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci#define COMPRESS_SEQUENCES_WORKSPACE_SIZE (sizeof(unsigned) * (MaxSeq + 2)) 33062306a36Sopenharmony_ci#define ENTROPY_WORKSPACE_SIZE (HUF_WORKSPACE_SIZE + COMPRESS_SEQUENCES_WORKSPACE_SIZE) 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci/* 33362306a36Sopenharmony_ci * Indicates whether this compression proceeds directly from user-provided 33462306a36Sopenharmony_ci * source buffer to user-provided destination buffer (ZSTDb_not_buffered), or 33562306a36Sopenharmony_ci * whether the context needs to buffer the input/output (ZSTDb_buffered). 33662306a36Sopenharmony_ci */ 33762306a36Sopenharmony_citypedef enum { 33862306a36Sopenharmony_ci ZSTDb_not_buffered, 33962306a36Sopenharmony_ci ZSTDb_buffered 34062306a36Sopenharmony_ci} ZSTD_buffered_policy_e; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci/* 34362306a36Sopenharmony_ci * Struct that contains all elements of block splitter that should be allocated 34462306a36Sopenharmony_ci * in a wksp. 34562306a36Sopenharmony_ci */ 34662306a36Sopenharmony_ci#define ZSTD_MAX_NB_BLOCK_SPLITS 196 34762306a36Sopenharmony_citypedef struct { 34862306a36Sopenharmony_ci seqStore_t fullSeqStoreChunk; 34962306a36Sopenharmony_ci seqStore_t firstHalfSeqStore; 35062306a36Sopenharmony_ci seqStore_t secondHalfSeqStore; 35162306a36Sopenharmony_ci seqStore_t currSeqStore; 35262306a36Sopenharmony_ci seqStore_t nextSeqStore; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS]; 35562306a36Sopenharmony_ci ZSTD_entropyCTablesMetadata_t entropyMetadata; 35662306a36Sopenharmony_ci} ZSTD_blockSplitCtx; 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_cistruct ZSTD_CCtx_s { 35962306a36Sopenharmony_ci ZSTD_compressionStage_e stage; 36062306a36Sopenharmony_ci int cParamsChanged; /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */ 36162306a36Sopenharmony_ci int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ 36262306a36Sopenharmony_ci ZSTD_CCtx_params requestedParams; 36362306a36Sopenharmony_ci ZSTD_CCtx_params appliedParams; 36462306a36Sopenharmony_ci ZSTD_CCtx_params simpleApiParams; /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */ 36562306a36Sopenharmony_ci U32 dictID; 36662306a36Sopenharmony_ci size_t dictContentSize; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci ZSTD_cwksp workspace; /* manages buffer for dynamic allocations */ 36962306a36Sopenharmony_ci size_t blockSize; 37062306a36Sopenharmony_ci unsigned long long pledgedSrcSizePlusOne; /* this way, 0 (default) == unknown */ 37162306a36Sopenharmony_ci unsigned long long consumedSrcSize; 37262306a36Sopenharmony_ci unsigned long long producedCSize; 37362306a36Sopenharmony_ci struct xxh64_state xxhState; 37462306a36Sopenharmony_ci ZSTD_customMem customMem; 37562306a36Sopenharmony_ci ZSTD_threadPool* pool; 37662306a36Sopenharmony_ci size_t staticSize; 37762306a36Sopenharmony_ci SeqCollector seqCollector; 37862306a36Sopenharmony_ci int isFirstBlock; 37962306a36Sopenharmony_ci int initialized; 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci seqStore_t seqStore; /* sequences storage ptrs */ 38262306a36Sopenharmony_ci ldmState_t ldmState; /* long distance matching state */ 38362306a36Sopenharmony_ci rawSeq* ldmSequences; /* Storage for the ldm output sequences */ 38462306a36Sopenharmony_ci size_t maxNbLdmSequences; 38562306a36Sopenharmony_ci rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ 38662306a36Sopenharmony_ci ZSTD_blockState_t blockState; 38762306a36Sopenharmony_ci U32* entropyWorkspace; /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */ 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci /* Whether we are streaming or not */ 39062306a36Sopenharmony_ci ZSTD_buffered_policy_e bufferedPolicy; 39162306a36Sopenharmony_ci 39262306a36Sopenharmony_ci /* streaming */ 39362306a36Sopenharmony_ci char* inBuff; 39462306a36Sopenharmony_ci size_t inBuffSize; 39562306a36Sopenharmony_ci size_t inToCompress; 39662306a36Sopenharmony_ci size_t inBuffPos; 39762306a36Sopenharmony_ci size_t inBuffTarget; 39862306a36Sopenharmony_ci char* outBuff; 39962306a36Sopenharmony_ci size_t outBuffSize; 40062306a36Sopenharmony_ci size_t outBuffContentSize; 40162306a36Sopenharmony_ci size_t outBuffFlushedSize; 40262306a36Sopenharmony_ci ZSTD_cStreamStage streamStage; 40362306a36Sopenharmony_ci U32 frameEnded; 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci /* Stable in/out buffer verification */ 40662306a36Sopenharmony_ci ZSTD_inBuffer expectedInBuffer; 40762306a36Sopenharmony_ci size_t expectedOutBufferSize; 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci /* Dictionary */ 41062306a36Sopenharmony_ci ZSTD_localDict localDict; 41162306a36Sopenharmony_ci const ZSTD_CDict* cdict; 41262306a36Sopenharmony_ci ZSTD_prefixDict prefixDict; /* single-usage dictionary */ 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci /* Multi-threading */ 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci /* Tracing */ 41762306a36Sopenharmony_ci 41862306a36Sopenharmony_ci /* Workspace for block splitter */ 41962306a36Sopenharmony_ci ZSTD_blockSplitCtx blockSplitCtx; 42062306a36Sopenharmony_ci}; 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_citypedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e; 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_citypedef enum { 42562306a36Sopenharmony_ci ZSTD_noDict = 0, 42662306a36Sopenharmony_ci ZSTD_extDict = 1, 42762306a36Sopenharmony_ci ZSTD_dictMatchState = 2, 42862306a36Sopenharmony_ci ZSTD_dedicatedDictSearch = 3 42962306a36Sopenharmony_ci} ZSTD_dictMode_e; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_citypedef enum { 43262306a36Sopenharmony_ci ZSTD_cpm_noAttachDict = 0, /* Compression with ZSTD_noDict or ZSTD_extDict. 43362306a36Sopenharmony_ci * In this mode we use both the srcSize and the dictSize 43462306a36Sopenharmony_ci * when selecting and adjusting parameters. 43562306a36Sopenharmony_ci */ 43662306a36Sopenharmony_ci ZSTD_cpm_attachDict = 1, /* Compression with ZSTD_dictMatchState or ZSTD_dedicatedDictSearch. 43762306a36Sopenharmony_ci * In this mode we only take the srcSize into account when selecting 43862306a36Sopenharmony_ci * and adjusting parameters. 43962306a36Sopenharmony_ci */ 44062306a36Sopenharmony_ci ZSTD_cpm_createCDict = 2, /* Creating a CDict. 44162306a36Sopenharmony_ci * In this mode we take both the source size and the dictionary size 44262306a36Sopenharmony_ci * into account when selecting and adjusting the parameters. 44362306a36Sopenharmony_ci */ 44462306a36Sopenharmony_ci ZSTD_cpm_unknown = 3, /* ZSTD_getCParams, ZSTD_getParams, ZSTD_adjustParams. 44562306a36Sopenharmony_ci * We don't know what these parameters are for. We default to the legacy 44662306a36Sopenharmony_ci * behavior of taking both the source size and the dict size into account 44762306a36Sopenharmony_ci * when selecting and adjusting parameters. 44862306a36Sopenharmony_ci */ 44962306a36Sopenharmony_ci} ZSTD_cParamMode_e; 45062306a36Sopenharmony_ci 45162306a36Sopenharmony_citypedef size_t (*ZSTD_blockCompressor) ( 45262306a36Sopenharmony_ci ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], 45362306a36Sopenharmony_ci void const* src, size_t srcSize); 45462306a36Sopenharmony_ciZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode); 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_LLcode(U32 litLength) 45862306a36Sopenharmony_ci{ 45962306a36Sopenharmony_ci static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7, 46062306a36Sopenharmony_ci 8, 9, 10, 11, 12, 13, 14, 15, 46162306a36Sopenharmony_ci 16, 16, 17, 17, 18, 18, 19, 19, 46262306a36Sopenharmony_ci 20, 20, 20, 20, 21, 21, 21, 21, 46362306a36Sopenharmony_ci 22, 22, 22, 22, 22, 22, 22, 22, 46462306a36Sopenharmony_ci 23, 23, 23, 23, 23, 23, 23, 23, 46562306a36Sopenharmony_ci 24, 24, 24, 24, 24, 24, 24, 24, 46662306a36Sopenharmony_ci 24, 24, 24, 24, 24, 24, 24, 24 }; 46762306a36Sopenharmony_ci static const U32 LL_deltaCode = 19; 46862306a36Sopenharmony_ci return (litLength > 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci/* ZSTD_MLcode() : 47262306a36Sopenharmony_ci * note : mlBase = matchLength - MINMATCH; 47362306a36Sopenharmony_ci * because it's the format it's stored in seqStore->sequences */ 47462306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_MLcode(U32 mlBase) 47562306a36Sopenharmony_ci{ 47662306a36Sopenharmony_ci static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 47762306a36Sopenharmony_ci 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 47862306a36Sopenharmony_ci 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 47962306a36Sopenharmony_ci 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 48062306a36Sopenharmony_ci 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 48162306a36Sopenharmony_ci 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 48262306a36Sopenharmony_ci 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 48362306a36Sopenharmony_ci 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; 48462306a36Sopenharmony_ci static const U32 ML_deltaCode = 36; 48562306a36Sopenharmony_ci return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; 48662306a36Sopenharmony_ci} 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci/* ZSTD_cParam_withinBounds: 48962306a36Sopenharmony_ci * @return 1 if value is within cParam bounds, 49062306a36Sopenharmony_ci * 0 otherwise */ 49162306a36Sopenharmony_ciMEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) 49262306a36Sopenharmony_ci{ 49362306a36Sopenharmony_ci ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); 49462306a36Sopenharmony_ci if (ZSTD_isError(bounds.error)) return 0; 49562306a36Sopenharmony_ci if (value < bounds.lowerBound) return 0; 49662306a36Sopenharmony_ci if (value > bounds.upperBound) return 0; 49762306a36Sopenharmony_ci return 1; 49862306a36Sopenharmony_ci} 49962306a36Sopenharmony_ci 50062306a36Sopenharmony_ci/* ZSTD_noCompressBlock() : 50162306a36Sopenharmony_ci * Writes uncompressed block to dst buffer from given src. 50262306a36Sopenharmony_ci * Returns the size of the block */ 50362306a36Sopenharmony_ciMEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) 50462306a36Sopenharmony_ci{ 50562306a36Sopenharmony_ci U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); 50662306a36Sopenharmony_ci RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, 50762306a36Sopenharmony_ci dstSize_tooSmall, "dst buf too small for uncompressed block"); 50862306a36Sopenharmony_ci MEM_writeLE24(dst, cBlockHeader24); 50962306a36Sopenharmony_ci ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); 51062306a36Sopenharmony_ci return ZSTD_blockHeaderSize + srcSize; 51162306a36Sopenharmony_ci} 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ciMEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) 51462306a36Sopenharmony_ci{ 51562306a36Sopenharmony_ci BYTE* const op = (BYTE*)dst; 51662306a36Sopenharmony_ci U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); 51762306a36Sopenharmony_ci RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); 51862306a36Sopenharmony_ci MEM_writeLE24(op, cBlockHeader); 51962306a36Sopenharmony_ci op[3] = src; 52062306a36Sopenharmony_ci return 4; 52162306a36Sopenharmony_ci} 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci/* ZSTD_minGain() : 52562306a36Sopenharmony_ci * minimum compression required 52662306a36Sopenharmony_ci * to generate a compress block or a compressed literals section. 52762306a36Sopenharmony_ci * note : use same formula for both situations */ 52862306a36Sopenharmony_ciMEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) 52962306a36Sopenharmony_ci{ 53062306a36Sopenharmony_ci U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; 53162306a36Sopenharmony_ci ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); 53262306a36Sopenharmony_ci assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); 53362306a36Sopenharmony_ci return (srcSize >> minlog) + 2; 53462306a36Sopenharmony_ci} 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_ciMEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams) 53762306a36Sopenharmony_ci{ 53862306a36Sopenharmony_ci switch (cctxParams->literalCompressionMode) { 53962306a36Sopenharmony_ci case ZSTD_ps_enable: 54062306a36Sopenharmony_ci return 0; 54162306a36Sopenharmony_ci case ZSTD_ps_disable: 54262306a36Sopenharmony_ci return 1; 54362306a36Sopenharmony_ci default: 54462306a36Sopenharmony_ci assert(0 /* impossible: pre-validated */); 54562306a36Sopenharmony_ci ZSTD_FALLTHROUGH; 54662306a36Sopenharmony_ci case ZSTD_ps_auto: 54762306a36Sopenharmony_ci return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); 54862306a36Sopenharmony_ci } 54962306a36Sopenharmony_ci} 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci/*! ZSTD_safecopyLiterals() : 55262306a36Sopenharmony_ci * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. 55362306a36Sopenharmony_ci * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single 55462306a36Sopenharmony_ci * large copies. 55562306a36Sopenharmony_ci */ 55662306a36Sopenharmony_cistatic void 55762306a36Sopenharmony_ciZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) 55862306a36Sopenharmony_ci{ 55962306a36Sopenharmony_ci assert(iend > ilimit_w); 56062306a36Sopenharmony_ci if (ip <= ilimit_w) { 56162306a36Sopenharmony_ci ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); 56262306a36Sopenharmony_ci op += ilimit_w - ip; 56362306a36Sopenharmony_ci ip = ilimit_w; 56462306a36Sopenharmony_ci } 56562306a36Sopenharmony_ci while (ip < iend) *op++ = *ip++; 56662306a36Sopenharmony_ci} 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) 56962306a36Sopenharmony_ci#define STORE_REPCODE_1 STORE_REPCODE(1) 57062306a36Sopenharmony_ci#define STORE_REPCODE_2 STORE_REPCODE(2) 57162306a36Sopenharmony_ci#define STORE_REPCODE_3 STORE_REPCODE(3) 57262306a36Sopenharmony_ci#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1) 57362306a36Sopenharmony_ci#define STORE_OFFSET(o) (assert((o)>0), o + ZSTD_REP_MOVE) 57462306a36Sopenharmony_ci#define STORED_IS_OFFSET(o) ((o) > ZSTD_REP_MOVE) 57562306a36Sopenharmony_ci#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE) 57662306a36Sopenharmony_ci#define STORED_OFFSET(o) (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE) 57762306a36Sopenharmony_ci#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1) /* returns ID 1,2,3 */ 57862306a36Sopenharmony_ci#define STORED_TO_OFFBASE(o) ((o)+1) 57962306a36Sopenharmony_ci#define OFFBASE_TO_STORED(o) ((o)-1) 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci/*! ZSTD_storeSeq() : 58262306a36Sopenharmony_ci * Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t. 58362306a36Sopenharmony_ci * @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET(). 58462306a36Sopenharmony_ci * @matchLength : must be >= MINMATCH 58562306a36Sopenharmony_ci * Allowed to overread literals up to litLimit. 58662306a36Sopenharmony_ci*/ 58762306a36Sopenharmony_ciHINT_INLINE UNUSED_ATTR void 58862306a36Sopenharmony_ciZSTD_storeSeq(seqStore_t* seqStorePtr, 58962306a36Sopenharmony_ci size_t litLength, const BYTE* literals, const BYTE* litLimit, 59062306a36Sopenharmony_ci U32 offBase_minus1, 59162306a36Sopenharmony_ci size_t matchLength) 59262306a36Sopenharmony_ci{ 59362306a36Sopenharmony_ci BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; 59462306a36Sopenharmony_ci BYTE const* const litEnd = literals + litLength; 59562306a36Sopenharmony_ci#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) 59662306a36Sopenharmony_ci static const BYTE* g_start = NULL; 59762306a36Sopenharmony_ci if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ 59862306a36Sopenharmony_ci { U32 const pos = (U32)((const BYTE*)literals - g_start); 59962306a36Sopenharmony_ci DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", 60062306a36Sopenharmony_ci pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1); 60162306a36Sopenharmony_ci } 60262306a36Sopenharmony_ci#endif 60362306a36Sopenharmony_ci assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); 60462306a36Sopenharmony_ci /* copy Literals */ 60562306a36Sopenharmony_ci assert(seqStorePtr->maxNbLit <= 128 KB); 60662306a36Sopenharmony_ci assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); 60762306a36Sopenharmony_ci assert(literals + litLength <= litLimit); 60862306a36Sopenharmony_ci if (litEnd <= litLimit_w) { 60962306a36Sopenharmony_ci /* Common case we can use wildcopy. 61062306a36Sopenharmony_ci * First copy 16 bytes, because literals are likely short. 61162306a36Sopenharmony_ci */ 61262306a36Sopenharmony_ci assert(WILDCOPY_OVERLENGTH >= 16); 61362306a36Sopenharmony_ci ZSTD_copy16(seqStorePtr->lit, literals); 61462306a36Sopenharmony_ci if (litLength > 16) { 61562306a36Sopenharmony_ci ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); 61662306a36Sopenharmony_ci } 61762306a36Sopenharmony_ci } else { 61862306a36Sopenharmony_ci ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); 61962306a36Sopenharmony_ci } 62062306a36Sopenharmony_ci seqStorePtr->lit += litLength; 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_ci /* literal Length */ 62362306a36Sopenharmony_ci if (litLength>0xFFFF) { 62462306a36Sopenharmony_ci assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 62562306a36Sopenharmony_ci seqStorePtr->longLengthType = ZSTD_llt_literalLength; 62662306a36Sopenharmony_ci seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 62762306a36Sopenharmony_ci } 62862306a36Sopenharmony_ci seqStorePtr->sequences[0].litLength = (U16)litLength; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci /* match offset */ 63162306a36Sopenharmony_ci seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1); 63262306a36Sopenharmony_ci 63362306a36Sopenharmony_ci /* match Length */ 63462306a36Sopenharmony_ci assert(matchLength >= MINMATCH); 63562306a36Sopenharmony_ci { size_t const mlBase = matchLength - MINMATCH; 63662306a36Sopenharmony_ci if (mlBase>0xFFFF) { 63762306a36Sopenharmony_ci assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ 63862306a36Sopenharmony_ci seqStorePtr->longLengthType = ZSTD_llt_matchLength; 63962306a36Sopenharmony_ci seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); 64062306a36Sopenharmony_ci } 64162306a36Sopenharmony_ci seqStorePtr->sequences[0].mlBase = (U16)mlBase; 64262306a36Sopenharmony_ci } 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci seqStorePtr->sequences++; 64562306a36Sopenharmony_ci} 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci/* ZSTD_updateRep() : 64862306a36Sopenharmony_ci * updates in-place @rep (array of repeat offsets) 64962306a36Sopenharmony_ci * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq() 65062306a36Sopenharmony_ci */ 65162306a36Sopenharmony_ciMEM_STATIC void 65262306a36Sopenharmony_ciZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) 65362306a36Sopenharmony_ci{ 65462306a36Sopenharmony_ci if (STORED_IS_OFFSET(offBase_minus1)) { /* full offset */ 65562306a36Sopenharmony_ci rep[2] = rep[1]; 65662306a36Sopenharmony_ci rep[1] = rep[0]; 65762306a36Sopenharmony_ci rep[0] = STORED_OFFSET(offBase_minus1); 65862306a36Sopenharmony_ci } else { /* repcode */ 65962306a36Sopenharmony_ci U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0; 66062306a36Sopenharmony_ci if (repCode > 0) { /* note : if repCode==0, no change */ 66162306a36Sopenharmony_ci U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; 66262306a36Sopenharmony_ci rep[2] = (repCode >= 2) ? rep[1] : rep[2]; 66362306a36Sopenharmony_ci rep[1] = rep[0]; 66462306a36Sopenharmony_ci rep[0] = currentOffset; 66562306a36Sopenharmony_ci } else { /* repCode == 0 */ 66662306a36Sopenharmony_ci /* nothing to do */ 66762306a36Sopenharmony_ci } 66862306a36Sopenharmony_ci } 66962306a36Sopenharmony_ci} 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_citypedef struct repcodes_s { 67262306a36Sopenharmony_ci U32 rep[3]; 67362306a36Sopenharmony_ci} repcodes_t; 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ciMEM_STATIC repcodes_t 67662306a36Sopenharmony_ciZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0) 67762306a36Sopenharmony_ci{ 67862306a36Sopenharmony_ci repcodes_t newReps; 67962306a36Sopenharmony_ci ZSTD_memcpy(&newReps, rep, sizeof(newReps)); 68062306a36Sopenharmony_ci ZSTD_updateRep(newReps.rep, offBase_minus1, ll0); 68162306a36Sopenharmony_ci return newReps; 68262306a36Sopenharmony_ci} 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci/*-************************************* 68662306a36Sopenharmony_ci* Match length counter 68762306a36Sopenharmony_ci***************************************/ 68862306a36Sopenharmony_cistatic unsigned ZSTD_NbCommonBytes (size_t val) 68962306a36Sopenharmony_ci{ 69062306a36Sopenharmony_ci if (MEM_isLittleEndian()) { 69162306a36Sopenharmony_ci if (MEM_64bits()) { 69262306a36Sopenharmony_ci# if (__GNUC__ >= 4) 69362306a36Sopenharmony_ci return (__builtin_ctzll((U64)val) >> 3); 69462306a36Sopenharmony_ci# else 69562306a36Sopenharmony_ci static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 69662306a36Sopenharmony_ci 0, 3, 1, 3, 1, 4, 2, 7, 69762306a36Sopenharmony_ci 0, 2, 3, 6, 1, 5, 3, 5, 69862306a36Sopenharmony_ci 1, 3, 4, 4, 2, 5, 6, 7, 69962306a36Sopenharmony_ci 7, 0, 1, 2, 3, 3, 4, 6, 70062306a36Sopenharmony_ci 2, 6, 5, 5, 3, 4, 5, 6, 70162306a36Sopenharmony_ci 7, 1, 2, 4, 6, 4, 4, 5, 70262306a36Sopenharmony_ci 7, 2, 6, 5, 7, 6, 7, 7 }; 70362306a36Sopenharmony_ci return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; 70462306a36Sopenharmony_ci# endif 70562306a36Sopenharmony_ci } else { /* 32 bits */ 70662306a36Sopenharmony_ci# if (__GNUC__ >= 3) 70762306a36Sopenharmony_ci return (__builtin_ctz((U32)val) >> 3); 70862306a36Sopenharmony_ci# else 70962306a36Sopenharmony_ci static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 71062306a36Sopenharmony_ci 3, 2, 2, 1, 3, 2, 0, 1, 71162306a36Sopenharmony_ci 3, 3, 1, 2, 2, 2, 2, 0, 71262306a36Sopenharmony_ci 3, 1, 2, 0, 1, 0, 1, 1 }; 71362306a36Sopenharmony_ci return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; 71462306a36Sopenharmony_ci# endif 71562306a36Sopenharmony_ci } 71662306a36Sopenharmony_ci } else { /* Big Endian CPU */ 71762306a36Sopenharmony_ci if (MEM_64bits()) { 71862306a36Sopenharmony_ci# if (__GNUC__ >= 4) 71962306a36Sopenharmony_ci return (__builtin_clzll(val) >> 3); 72062306a36Sopenharmony_ci# else 72162306a36Sopenharmony_ci unsigned r; 72262306a36Sopenharmony_ci const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ 72362306a36Sopenharmony_ci if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } 72462306a36Sopenharmony_ci if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } 72562306a36Sopenharmony_ci r += (!val); 72662306a36Sopenharmony_ci return r; 72762306a36Sopenharmony_ci# endif 72862306a36Sopenharmony_ci } else { /* 32 bits */ 72962306a36Sopenharmony_ci# if (__GNUC__ >= 3) 73062306a36Sopenharmony_ci return (__builtin_clz((U32)val) >> 3); 73162306a36Sopenharmony_ci# else 73262306a36Sopenharmony_ci unsigned r; 73362306a36Sopenharmony_ci if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } 73462306a36Sopenharmony_ci r += (!val); 73562306a36Sopenharmony_ci return r; 73662306a36Sopenharmony_ci# endif 73762306a36Sopenharmony_ci } } 73862306a36Sopenharmony_ci} 73962306a36Sopenharmony_ci 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ciMEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) 74262306a36Sopenharmony_ci{ 74362306a36Sopenharmony_ci const BYTE* const pStart = pIn; 74462306a36Sopenharmony_ci const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci if (pIn < pInLoopLimit) { 74762306a36Sopenharmony_ci { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 74862306a36Sopenharmony_ci if (diff) return ZSTD_NbCommonBytes(diff); } 74962306a36Sopenharmony_ci pIn+=sizeof(size_t); pMatch+=sizeof(size_t); 75062306a36Sopenharmony_ci while (pIn < pInLoopLimit) { 75162306a36Sopenharmony_ci size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); 75262306a36Sopenharmony_ci if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } 75362306a36Sopenharmony_ci pIn += ZSTD_NbCommonBytes(diff); 75462306a36Sopenharmony_ci return (size_t)(pIn - pStart); 75562306a36Sopenharmony_ci } } 75662306a36Sopenharmony_ci if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } 75762306a36Sopenharmony_ci if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } 75862306a36Sopenharmony_ci if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++; 75962306a36Sopenharmony_ci return (size_t)(pIn - pStart); 76062306a36Sopenharmony_ci} 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci/* ZSTD_count_2segments() : 76362306a36Sopenharmony_ci * can count match length with `ip` & `match` in 2 different segments. 76462306a36Sopenharmony_ci * convention : on reaching mEnd, match count continue starting from iStart 76562306a36Sopenharmony_ci */ 76662306a36Sopenharmony_ciMEM_STATIC size_t 76762306a36Sopenharmony_ciZSTD_count_2segments(const BYTE* ip, const BYTE* match, 76862306a36Sopenharmony_ci const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart) 76962306a36Sopenharmony_ci{ 77062306a36Sopenharmony_ci const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd); 77162306a36Sopenharmony_ci size_t const matchLength = ZSTD_count(ip, match, vEnd); 77262306a36Sopenharmony_ci if (match + matchLength != mEnd) return matchLength; 77362306a36Sopenharmony_ci DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength); 77462306a36Sopenharmony_ci DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match); 77562306a36Sopenharmony_ci DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip); 77662306a36Sopenharmony_ci DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart); 77762306a36Sopenharmony_ci DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd)); 77862306a36Sopenharmony_ci return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd); 77962306a36Sopenharmony_ci} 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci 78262306a36Sopenharmony_ci/*-************************************* 78362306a36Sopenharmony_ci * Hashes 78462306a36Sopenharmony_ci ***************************************/ 78562306a36Sopenharmony_cistatic const U32 prime3bytes = 506832829U; 78662306a36Sopenharmony_cistatic U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; } 78762306a36Sopenharmony_ciMEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_cistatic const U32 prime4bytes = 2654435761U; 79062306a36Sopenharmony_cistatic U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } 79162306a36Sopenharmony_cistatic size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } 79262306a36Sopenharmony_ci 79362306a36Sopenharmony_cistatic const U64 prime5bytes = 889523592379ULL; 79462306a36Sopenharmony_cistatic size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } 79562306a36Sopenharmony_cistatic size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_cistatic const U64 prime6bytes = 227718039650203ULL; 79862306a36Sopenharmony_cistatic size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } 79962306a36Sopenharmony_cistatic size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_cistatic const U64 prime7bytes = 58295818150454627ULL; 80262306a36Sopenharmony_cistatic size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } 80362306a36Sopenharmony_cistatic size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_cistatic const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; 80662306a36Sopenharmony_cistatic size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } 80762306a36Sopenharmony_cistatic size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ciMEM_STATIC FORCE_INLINE_ATTR 81062306a36Sopenharmony_cisize_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) 81162306a36Sopenharmony_ci{ 81262306a36Sopenharmony_ci switch(mls) 81362306a36Sopenharmony_ci { 81462306a36Sopenharmony_ci default: 81562306a36Sopenharmony_ci case 4: return ZSTD_hash4Ptr(p, hBits); 81662306a36Sopenharmony_ci case 5: return ZSTD_hash5Ptr(p, hBits); 81762306a36Sopenharmony_ci case 6: return ZSTD_hash6Ptr(p, hBits); 81862306a36Sopenharmony_ci case 7: return ZSTD_hash7Ptr(p, hBits); 81962306a36Sopenharmony_ci case 8: return ZSTD_hash8Ptr(p, hBits); 82062306a36Sopenharmony_ci } 82162306a36Sopenharmony_ci} 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci/* ZSTD_ipow() : 82462306a36Sopenharmony_ci * Return base^exponent. 82562306a36Sopenharmony_ci */ 82662306a36Sopenharmony_cistatic U64 ZSTD_ipow(U64 base, U64 exponent) 82762306a36Sopenharmony_ci{ 82862306a36Sopenharmony_ci U64 power = 1; 82962306a36Sopenharmony_ci while (exponent) { 83062306a36Sopenharmony_ci if (exponent & 1) power *= base; 83162306a36Sopenharmony_ci exponent >>= 1; 83262306a36Sopenharmony_ci base *= base; 83362306a36Sopenharmony_ci } 83462306a36Sopenharmony_ci return power; 83562306a36Sopenharmony_ci} 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci/* ZSTD_rollingHash_append() : 84062306a36Sopenharmony_ci * Add the buffer to the hash value. 84162306a36Sopenharmony_ci */ 84262306a36Sopenharmony_cistatic U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) 84362306a36Sopenharmony_ci{ 84462306a36Sopenharmony_ci BYTE const* istart = (BYTE const*)buf; 84562306a36Sopenharmony_ci size_t pos; 84662306a36Sopenharmony_ci for (pos = 0; pos < size; ++pos) { 84762306a36Sopenharmony_ci hash *= prime8bytes; 84862306a36Sopenharmony_ci hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; 84962306a36Sopenharmony_ci } 85062306a36Sopenharmony_ci return hash; 85162306a36Sopenharmony_ci} 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci/* ZSTD_rollingHash_compute() : 85462306a36Sopenharmony_ci * Compute the rolling hash value of the buffer. 85562306a36Sopenharmony_ci */ 85662306a36Sopenharmony_ciMEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) 85762306a36Sopenharmony_ci{ 85862306a36Sopenharmony_ci return ZSTD_rollingHash_append(0, buf, size); 85962306a36Sopenharmony_ci} 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci/* ZSTD_rollingHash_primePower() : 86262306a36Sopenharmony_ci * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash 86362306a36Sopenharmony_ci * over a window of length bytes. 86462306a36Sopenharmony_ci */ 86562306a36Sopenharmony_ciMEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) 86662306a36Sopenharmony_ci{ 86762306a36Sopenharmony_ci return ZSTD_ipow(prime8bytes, length - 1); 86862306a36Sopenharmony_ci} 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci/* ZSTD_rollingHash_rotate() : 87162306a36Sopenharmony_ci * Rotate the rolling hash by one byte. 87262306a36Sopenharmony_ci */ 87362306a36Sopenharmony_ciMEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) 87462306a36Sopenharmony_ci{ 87562306a36Sopenharmony_ci hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; 87662306a36Sopenharmony_ci hash *= prime8bytes; 87762306a36Sopenharmony_ci hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; 87862306a36Sopenharmony_ci return hash; 87962306a36Sopenharmony_ci} 88062306a36Sopenharmony_ci 88162306a36Sopenharmony_ci/*-************************************* 88262306a36Sopenharmony_ci* Round buffer management 88362306a36Sopenharmony_ci***************************************/ 88462306a36Sopenharmony_ci#if (ZSTD_WINDOWLOG_MAX_64 > 31) 88562306a36Sopenharmony_ci# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" 88662306a36Sopenharmony_ci#endif 88762306a36Sopenharmony_ci/* Max current allowed */ 88862306a36Sopenharmony_ci#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) 88962306a36Sopenharmony_ci/* Maximum chunk size before overflow correction needs to be called again */ 89062306a36Sopenharmony_ci#define ZSTD_CHUNKSIZE_MAX \ 89162306a36Sopenharmony_ci ( ((U32)-1) /* Maximum ending current index */ \ 89262306a36Sopenharmony_ci - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci/* 89562306a36Sopenharmony_ci * ZSTD_window_clear(): 89662306a36Sopenharmony_ci * Clears the window containing the history by simply setting it to empty. 89762306a36Sopenharmony_ci */ 89862306a36Sopenharmony_ciMEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) 89962306a36Sopenharmony_ci{ 90062306a36Sopenharmony_ci size_t const endT = (size_t)(window->nextSrc - window->base); 90162306a36Sopenharmony_ci U32 const end = (U32)endT; 90262306a36Sopenharmony_ci 90362306a36Sopenharmony_ci window->lowLimit = end; 90462306a36Sopenharmony_ci window->dictLimit = end; 90562306a36Sopenharmony_ci} 90662306a36Sopenharmony_ci 90762306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) 90862306a36Sopenharmony_ci{ 90962306a36Sopenharmony_ci return window.dictLimit == ZSTD_WINDOW_START_INDEX && 91062306a36Sopenharmony_ci window.lowLimit == ZSTD_WINDOW_START_INDEX && 91162306a36Sopenharmony_ci (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX; 91262306a36Sopenharmony_ci} 91362306a36Sopenharmony_ci 91462306a36Sopenharmony_ci/* 91562306a36Sopenharmony_ci * ZSTD_window_hasExtDict(): 91662306a36Sopenharmony_ci * Returns non-zero if the window has a non-empty extDict. 91762306a36Sopenharmony_ci */ 91862306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) 91962306a36Sopenharmony_ci{ 92062306a36Sopenharmony_ci return window.lowLimit < window.dictLimit; 92162306a36Sopenharmony_ci} 92262306a36Sopenharmony_ci 92362306a36Sopenharmony_ci/* 92462306a36Sopenharmony_ci * ZSTD_matchState_dictMode(): 92562306a36Sopenharmony_ci * Inspects the provided matchState and figures out what dictMode should be 92662306a36Sopenharmony_ci * passed to the compressor. 92762306a36Sopenharmony_ci */ 92862306a36Sopenharmony_ciMEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) 92962306a36Sopenharmony_ci{ 93062306a36Sopenharmony_ci return ZSTD_window_hasExtDict(ms->window) ? 93162306a36Sopenharmony_ci ZSTD_extDict : 93262306a36Sopenharmony_ci ms->dictMatchState != NULL ? 93362306a36Sopenharmony_ci (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : 93462306a36Sopenharmony_ci ZSTD_noDict; 93562306a36Sopenharmony_ci} 93662306a36Sopenharmony_ci 93762306a36Sopenharmony_ci/* Defining this macro to non-zero tells zstd to run the overflow correction 93862306a36Sopenharmony_ci * code much more frequently. This is very inefficient, and should only be 93962306a36Sopenharmony_ci * used for tests and fuzzers. 94062306a36Sopenharmony_ci */ 94162306a36Sopenharmony_ci#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 94262306a36Sopenharmony_ci# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 94362306a36Sopenharmony_ci# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 94462306a36Sopenharmony_ci# else 94562306a36Sopenharmony_ci# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 94662306a36Sopenharmony_ci# endif 94762306a36Sopenharmony_ci#endif 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci/* 95062306a36Sopenharmony_ci * ZSTD_window_canOverflowCorrect(): 95162306a36Sopenharmony_ci * Returns non-zero if the indices are large enough for overflow correction 95262306a36Sopenharmony_ci * to work correctly without impacting compression ratio. 95362306a36Sopenharmony_ci */ 95462306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, 95562306a36Sopenharmony_ci U32 cycleLog, 95662306a36Sopenharmony_ci U32 maxDist, 95762306a36Sopenharmony_ci U32 loadedDictEnd, 95862306a36Sopenharmony_ci void const* src) 95962306a36Sopenharmony_ci{ 96062306a36Sopenharmony_ci U32 const cycleSize = 1u << cycleLog; 96162306a36Sopenharmony_ci U32 const curr = (U32)((BYTE const*)src - window.base); 96262306a36Sopenharmony_ci U32 const minIndexToOverflowCorrect = cycleSize 96362306a36Sopenharmony_ci + MAX(maxDist, cycleSize) 96462306a36Sopenharmony_ci + ZSTD_WINDOW_START_INDEX; 96562306a36Sopenharmony_ci 96662306a36Sopenharmony_ci /* Adjust the min index to backoff the overflow correction frequency, 96762306a36Sopenharmony_ci * so we don't waste too much CPU in overflow correction. If this 96862306a36Sopenharmony_ci * computation overflows we don't really care, we just need to make 96962306a36Sopenharmony_ci * sure it is at least minIndexToOverflowCorrect. 97062306a36Sopenharmony_ci */ 97162306a36Sopenharmony_ci U32 const adjustment = window.nbOverflowCorrections + 1; 97262306a36Sopenharmony_ci U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, 97362306a36Sopenharmony_ci minIndexToOverflowCorrect); 97462306a36Sopenharmony_ci U32 const indexLargeEnough = curr > adjustedIndex; 97562306a36Sopenharmony_ci 97662306a36Sopenharmony_ci /* Only overflow correct early if the dictionary is invalidated already, 97762306a36Sopenharmony_ci * so we don't hurt compression ratio. 97862306a36Sopenharmony_ci */ 97962306a36Sopenharmony_ci U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci return indexLargeEnough && dictionaryInvalidated; 98262306a36Sopenharmony_ci} 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_ci/* 98562306a36Sopenharmony_ci * ZSTD_window_needOverflowCorrection(): 98662306a36Sopenharmony_ci * Returns non-zero if the indices are getting too large and need overflow 98762306a36Sopenharmony_ci * protection. 98862306a36Sopenharmony_ci */ 98962306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, 99062306a36Sopenharmony_ci U32 cycleLog, 99162306a36Sopenharmony_ci U32 maxDist, 99262306a36Sopenharmony_ci U32 loadedDictEnd, 99362306a36Sopenharmony_ci void const* src, 99462306a36Sopenharmony_ci void const* srcEnd) 99562306a36Sopenharmony_ci{ 99662306a36Sopenharmony_ci U32 const curr = (U32)((BYTE const*)srcEnd - window.base); 99762306a36Sopenharmony_ci if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { 99862306a36Sopenharmony_ci if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { 99962306a36Sopenharmony_ci return 1; 100062306a36Sopenharmony_ci } 100162306a36Sopenharmony_ci } 100262306a36Sopenharmony_ci return curr > ZSTD_CURRENT_MAX; 100362306a36Sopenharmony_ci} 100462306a36Sopenharmony_ci 100562306a36Sopenharmony_ci/* 100662306a36Sopenharmony_ci * ZSTD_window_correctOverflow(): 100762306a36Sopenharmony_ci * Reduces the indices to protect from index overflow. 100862306a36Sopenharmony_ci * Returns the correction made to the indices, which must be applied to every 100962306a36Sopenharmony_ci * stored index. 101062306a36Sopenharmony_ci * 101162306a36Sopenharmony_ci * The least significant cycleLog bits of the indices must remain the same, 101262306a36Sopenharmony_ci * which may be 0. Every index up to maxDist in the past must be valid. 101362306a36Sopenharmony_ci */ 101462306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, 101562306a36Sopenharmony_ci U32 maxDist, void const* src) 101662306a36Sopenharmony_ci{ 101762306a36Sopenharmony_ci /* preemptive overflow correction: 101862306a36Sopenharmony_ci * 1. correction is large enough: 101962306a36Sopenharmony_ci * lowLimit > (3<<29) ==> current > 3<<29 + 1<<windowLog 102062306a36Sopenharmony_ci * 1<<windowLog <= newCurrent < 1<<chainLog + 1<<windowLog 102162306a36Sopenharmony_ci * 102262306a36Sopenharmony_ci * current - newCurrent 102362306a36Sopenharmony_ci * > (3<<29 + 1<<windowLog) - (1<<windowLog + 1<<chainLog) 102462306a36Sopenharmony_ci * > (3<<29) - (1<<chainLog) 102562306a36Sopenharmony_ci * > (3<<29) - (1<<30) (NOTE: chainLog <= 30) 102662306a36Sopenharmony_ci * > 1<<29 102762306a36Sopenharmony_ci * 102862306a36Sopenharmony_ci * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: 102962306a36Sopenharmony_ci * After correction, current is less than (1<<chainLog + 1<<windowLog). 103062306a36Sopenharmony_ci * In 64-bit mode we are safe, because we have 64-bit ptrdiff_t. 103162306a36Sopenharmony_ci * In 32-bit mode we are safe, because (chainLog <= 29), so 103262306a36Sopenharmony_ci * ip+ZSTD_CHUNKSIZE_MAX - cctx->base < 1<<32. 103362306a36Sopenharmony_ci * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32: 103462306a36Sopenharmony_ci * windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32. 103562306a36Sopenharmony_ci */ 103662306a36Sopenharmony_ci U32 const cycleSize = 1u << cycleLog; 103762306a36Sopenharmony_ci U32 const cycleMask = cycleSize - 1; 103862306a36Sopenharmony_ci U32 const curr = (U32)((BYTE const*)src - window->base); 103962306a36Sopenharmony_ci U32 const currentCycle = curr & cycleMask; 104062306a36Sopenharmony_ci /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */ 104162306a36Sopenharmony_ci U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX 104262306a36Sopenharmony_ci ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX) 104362306a36Sopenharmony_ci : 0; 104462306a36Sopenharmony_ci U32 const newCurrent = currentCycle 104562306a36Sopenharmony_ci + currentCycleCorrection 104662306a36Sopenharmony_ci + MAX(maxDist, cycleSize); 104762306a36Sopenharmony_ci U32 const correction = curr - newCurrent; 104862306a36Sopenharmony_ci /* maxDist must be a power of two so that: 104962306a36Sopenharmony_ci * (newCurrent & cycleMask) == (curr & cycleMask) 105062306a36Sopenharmony_ci * This is required to not corrupt the chains / binary tree. 105162306a36Sopenharmony_ci */ 105262306a36Sopenharmony_ci assert((maxDist & (maxDist - 1)) == 0); 105362306a36Sopenharmony_ci assert((curr & cycleMask) == (newCurrent & cycleMask)); 105462306a36Sopenharmony_ci assert(curr > newCurrent); 105562306a36Sopenharmony_ci if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { 105662306a36Sopenharmony_ci /* Loose bound, should be around 1<<29 (see above) */ 105762306a36Sopenharmony_ci assert(correction > 1<<28); 105862306a36Sopenharmony_ci } 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci window->base += correction; 106162306a36Sopenharmony_ci window->dictBase += correction; 106262306a36Sopenharmony_ci if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) { 106362306a36Sopenharmony_ci window->lowLimit = ZSTD_WINDOW_START_INDEX; 106462306a36Sopenharmony_ci } else { 106562306a36Sopenharmony_ci window->lowLimit -= correction; 106662306a36Sopenharmony_ci } 106762306a36Sopenharmony_ci if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) { 106862306a36Sopenharmony_ci window->dictLimit = ZSTD_WINDOW_START_INDEX; 106962306a36Sopenharmony_ci } else { 107062306a36Sopenharmony_ci window->dictLimit -= correction; 107162306a36Sopenharmony_ci } 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_ci /* Ensure we can still reference the full window. */ 107462306a36Sopenharmony_ci assert(newCurrent >= maxDist); 107562306a36Sopenharmony_ci assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX); 107662306a36Sopenharmony_ci /* Ensure that lowLimit and dictLimit didn't underflow. */ 107762306a36Sopenharmony_ci assert(window->lowLimit <= newCurrent); 107862306a36Sopenharmony_ci assert(window->dictLimit <= newCurrent); 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci ++window->nbOverflowCorrections; 108162306a36Sopenharmony_ci 108262306a36Sopenharmony_ci DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, 108362306a36Sopenharmony_ci window->lowLimit); 108462306a36Sopenharmony_ci return correction; 108562306a36Sopenharmony_ci} 108662306a36Sopenharmony_ci 108762306a36Sopenharmony_ci/* 108862306a36Sopenharmony_ci * ZSTD_window_enforceMaxDist(): 108962306a36Sopenharmony_ci * Updates lowLimit so that: 109062306a36Sopenharmony_ci * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd 109162306a36Sopenharmony_ci * 109262306a36Sopenharmony_ci * It ensures index is valid as long as index >= lowLimit. 109362306a36Sopenharmony_ci * This must be called before a block compression call. 109462306a36Sopenharmony_ci * 109562306a36Sopenharmony_ci * loadedDictEnd is only defined if a dictionary is in use for current compression. 109662306a36Sopenharmony_ci * As the name implies, loadedDictEnd represents the index at end of dictionary. 109762306a36Sopenharmony_ci * The value lies within context's referential, it can be directly compared to blockEndIdx. 109862306a36Sopenharmony_ci * 109962306a36Sopenharmony_ci * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. 110062306a36Sopenharmony_ci * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. 110162306a36Sopenharmony_ci * This is because dictionaries are allowed to be referenced fully 110262306a36Sopenharmony_ci * as long as the last byte of the dictionary is in the window. 110362306a36Sopenharmony_ci * Once input has progressed beyond window size, dictionary cannot be referenced anymore. 110462306a36Sopenharmony_ci * 110562306a36Sopenharmony_ci * In normal dict mode, the dictionary lies between lowLimit and dictLimit. 110662306a36Sopenharmony_ci * In dictMatchState mode, lowLimit and dictLimit are the same, 110762306a36Sopenharmony_ci * and the dictionary is below them. 110862306a36Sopenharmony_ci * forceWindow and dictMatchState are therefore incompatible. 110962306a36Sopenharmony_ci */ 111062306a36Sopenharmony_ciMEM_STATIC void 111162306a36Sopenharmony_ciZSTD_window_enforceMaxDist(ZSTD_window_t* window, 111262306a36Sopenharmony_ci const void* blockEnd, 111362306a36Sopenharmony_ci U32 maxDist, 111462306a36Sopenharmony_ci U32* loadedDictEndPtr, 111562306a36Sopenharmony_ci const ZSTD_matchState_t** dictMatchStatePtr) 111662306a36Sopenharmony_ci{ 111762306a36Sopenharmony_ci U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 111862306a36Sopenharmony_ci U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; 111962306a36Sopenharmony_ci DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 112062306a36Sopenharmony_ci (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 112162306a36Sopenharmony_ci 112262306a36Sopenharmony_ci /* - When there is no dictionary : loadedDictEnd == 0. 112362306a36Sopenharmony_ci In which case, the test (blockEndIdx > maxDist) is merely to avoid 112462306a36Sopenharmony_ci overflowing next operation `newLowLimit = blockEndIdx - maxDist`. 112562306a36Sopenharmony_ci - When there is a standard dictionary : 112662306a36Sopenharmony_ci Index referential is copied from the dictionary, 112762306a36Sopenharmony_ci which means it starts from 0. 112862306a36Sopenharmony_ci In which case, loadedDictEnd == dictSize, 112962306a36Sopenharmony_ci and it makes sense to compare `blockEndIdx > maxDist + dictSize` 113062306a36Sopenharmony_ci since `blockEndIdx` also starts from zero. 113162306a36Sopenharmony_ci - When there is an attached dictionary : 113262306a36Sopenharmony_ci loadedDictEnd is expressed within the referential of the context, 113362306a36Sopenharmony_ci so it can be directly compared against blockEndIdx. 113462306a36Sopenharmony_ci */ 113562306a36Sopenharmony_ci if (blockEndIdx > maxDist + loadedDictEnd) { 113662306a36Sopenharmony_ci U32 const newLowLimit = blockEndIdx - maxDist; 113762306a36Sopenharmony_ci if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; 113862306a36Sopenharmony_ci if (window->dictLimit < window->lowLimit) { 113962306a36Sopenharmony_ci DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", 114062306a36Sopenharmony_ci (unsigned)window->dictLimit, (unsigned)window->lowLimit); 114162306a36Sopenharmony_ci window->dictLimit = window->lowLimit; 114262306a36Sopenharmony_ci } 114362306a36Sopenharmony_ci /* On reaching window size, dictionaries are invalidated */ 114462306a36Sopenharmony_ci if (loadedDictEndPtr) *loadedDictEndPtr = 0; 114562306a36Sopenharmony_ci if (dictMatchStatePtr) *dictMatchStatePtr = NULL; 114662306a36Sopenharmony_ci } 114762306a36Sopenharmony_ci} 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci/* Similar to ZSTD_window_enforceMaxDist(), 115062306a36Sopenharmony_ci * but only invalidates dictionary 115162306a36Sopenharmony_ci * when input progresses beyond window size. 115262306a36Sopenharmony_ci * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) 115362306a36Sopenharmony_ci * loadedDictEnd uses same referential as window->base 115462306a36Sopenharmony_ci * maxDist is the window size */ 115562306a36Sopenharmony_ciMEM_STATIC void 115662306a36Sopenharmony_ciZSTD_checkDictValidity(const ZSTD_window_t* window, 115762306a36Sopenharmony_ci const void* blockEnd, 115862306a36Sopenharmony_ci U32 maxDist, 115962306a36Sopenharmony_ci U32* loadedDictEndPtr, 116062306a36Sopenharmony_ci const ZSTD_matchState_t** dictMatchStatePtr) 116162306a36Sopenharmony_ci{ 116262306a36Sopenharmony_ci assert(loadedDictEndPtr != NULL); 116362306a36Sopenharmony_ci assert(dictMatchStatePtr != NULL); 116462306a36Sopenharmony_ci { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); 116562306a36Sopenharmony_ci U32 const loadedDictEnd = *loadedDictEndPtr; 116662306a36Sopenharmony_ci DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", 116762306a36Sopenharmony_ci (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); 116862306a36Sopenharmony_ci assert(blockEndIdx >= loadedDictEnd); 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci if (blockEndIdx > loadedDictEnd + maxDist) { 117162306a36Sopenharmony_ci /* On reaching window size, dictionaries are invalidated. 117262306a36Sopenharmony_ci * For simplification, if window size is reached anywhere within next block, 117362306a36Sopenharmony_ci * the dictionary is invalidated for the full block. 117462306a36Sopenharmony_ci */ 117562306a36Sopenharmony_ci DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); 117662306a36Sopenharmony_ci *loadedDictEndPtr = 0; 117762306a36Sopenharmony_ci *dictMatchStatePtr = NULL; 117862306a36Sopenharmony_ci } else { 117962306a36Sopenharmony_ci if (*loadedDictEndPtr != 0) { 118062306a36Sopenharmony_ci DEBUGLOG(6, "dictionary considered valid for current block"); 118162306a36Sopenharmony_ci } } } 118262306a36Sopenharmony_ci} 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ciMEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { 118562306a36Sopenharmony_ci ZSTD_memset(window, 0, sizeof(*window)); 118662306a36Sopenharmony_ci window->base = (BYTE const*)" "; 118762306a36Sopenharmony_ci window->dictBase = (BYTE const*)" "; 118862306a36Sopenharmony_ci ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */ 118962306a36Sopenharmony_ci window->dictLimit = ZSTD_WINDOW_START_INDEX; /* start from >0, so that 1st position is valid */ 119062306a36Sopenharmony_ci window->lowLimit = ZSTD_WINDOW_START_INDEX; /* it ensures first and later CCtx usages compress the same */ 119162306a36Sopenharmony_ci window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX; /* see issue #1241 */ 119262306a36Sopenharmony_ci window->nbOverflowCorrections = 0; 119362306a36Sopenharmony_ci} 119462306a36Sopenharmony_ci 119562306a36Sopenharmony_ci/* 119662306a36Sopenharmony_ci * ZSTD_window_update(): 119762306a36Sopenharmony_ci * Updates the window by appending [src, src + srcSize) to the window. 119862306a36Sopenharmony_ci * If it is not contiguous, the current prefix becomes the extDict, and we 119962306a36Sopenharmony_ci * forget about the extDict. Handles overlap of the prefix and extDict. 120062306a36Sopenharmony_ci * Returns non-zero if the segment is contiguous. 120162306a36Sopenharmony_ci */ 120262306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, 120362306a36Sopenharmony_ci void const* src, size_t srcSize, 120462306a36Sopenharmony_ci int forceNonContiguous) 120562306a36Sopenharmony_ci{ 120662306a36Sopenharmony_ci BYTE const* const ip = (BYTE const*)src; 120762306a36Sopenharmony_ci U32 contiguous = 1; 120862306a36Sopenharmony_ci DEBUGLOG(5, "ZSTD_window_update"); 120962306a36Sopenharmony_ci if (srcSize == 0) 121062306a36Sopenharmony_ci return contiguous; 121162306a36Sopenharmony_ci assert(window->base != NULL); 121262306a36Sopenharmony_ci assert(window->dictBase != NULL); 121362306a36Sopenharmony_ci /* Check if blocks follow each other */ 121462306a36Sopenharmony_ci if (src != window->nextSrc || forceNonContiguous) { 121562306a36Sopenharmony_ci /* not contiguous */ 121662306a36Sopenharmony_ci size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); 121762306a36Sopenharmony_ci DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); 121862306a36Sopenharmony_ci window->lowLimit = window->dictLimit; 121962306a36Sopenharmony_ci assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ 122062306a36Sopenharmony_ci window->dictLimit = (U32)distanceFromBase; 122162306a36Sopenharmony_ci window->dictBase = window->base; 122262306a36Sopenharmony_ci window->base = ip - distanceFromBase; 122362306a36Sopenharmony_ci /* ms->nextToUpdate = window->dictLimit; */ 122462306a36Sopenharmony_ci if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ 122562306a36Sopenharmony_ci contiguous = 0; 122662306a36Sopenharmony_ci } 122762306a36Sopenharmony_ci window->nextSrc = ip + srcSize; 122862306a36Sopenharmony_ci /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ 122962306a36Sopenharmony_ci if ( (ip+srcSize > window->dictBase + window->lowLimit) 123062306a36Sopenharmony_ci & (ip < window->dictBase + window->dictLimit)) { 123162306a36Sopenharmony_ci ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; 123262306a36Sopenharmony_ci U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; 123362306a36Sopenharmony_ci window->lowLimit = lowLimitMax; 123462306a36Sopenharmony_ci DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); 123562306a36Sopenharmony_ci } 123662306a36Sopenharmony_ci return contiguous; 123762306a36Sopenharmony_ci} 123862306a36Sopenharmony_ci 123962306a36Sopenharmony_ci/* 124062306a36Sopenharmony_ci * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. 124162306a36Sopenharmony_ci */ 124262306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) 124362306a36Sopenharmony_ci{ 124462306a36Sopenharmony_ci U32 const maxDistance = 1U << windowLog; 124562306a36Sopenharmony_ci U32 const lowestValid = ms->window.lowLimit; 124662306a36Sopenharmony_ci U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 124762306a36Sopenharmony_ci U32 const isDictionary = (ms->loadedDictEnd != 0); 124862306a36Sopenharmony_ci /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary 124962306a36Sopenharmony_ci * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't 125062306a36Sopenharmony_ci * valid for the entire block. So this check is sufficient to find the lowest valid match index. 125162306a36Sopenharmony_ci */ 125262306a36Sopenharmony_ci U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 125362306a36Sopenharmony_ci return matchLowest; 125462306a36Sopenharmony_ci} 125562306a36Sopenharmony_ci 125662306a36Sopenharmony_ci/* 125762306a36Sopenharmony_ci * Returns the lowest allowed match index in the prefix. 125862306a36Sopenharmony_ci */ 125962306a36Sopenharmony_ciMEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) 126062306a36Sopenharmony_ci{ 126162306a36Sopenharmony_ci U32 const maxDistance = 1U << windowLog; 126262306a36Sopenharmony_ci U32 const lowestValid = ms->window.dictLimit; 126362306a36Sopenharmony_ci U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; 126462306a36Sopenharmony_ci U32 const isDictionary = (ms->loadedDictEnd != 0); 126562306a36Sopenharmony_ci /* When computing the lowest prefix index we need to take the dictionary into account to handle 126662306a36Sopenharmony_ci * the edge case where the dictionary and the source are contiguous in memory. 126762306a36Sopenharmony_ci */ 126862306a36Sopenharmony_ci U32 const matchLowest = isDictionary ? lowestValid : withinWindow; 126962306a36Sopenharmony_ci return matchLowest; 127062306a36Sopenharmony_ci} 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci 127462306a36Sopenharmony_ci/* debug functions */ 127562306a36Sopenharmony_ci#if (DEBUGLEVEL>=2) 127662306a36Sopenharmony_ci 127762306a36Sopenharmony_ciMEM_STATIC double ZSTD_fWeight(U32 rawStat) 127862306a36Sopenharmony_ci{ 127962306a36Sopenharmony_ci U32 const fp_accuracy = 8; 128062306a36Sopenharmony_ci U32 const fp_multiplier = (1 << fp_accuracy); 128162306a36Sopenharmony_ci U32 const newStat = rawStat + 1; 128262306a36Sopenharmony_ci U32 const hb = ZSTD_highbit32(newStat); 128362306a36Sopenharmony_ci U32 const BWeight = hb * fp_multiplier; 128462306a36Sopenharmony_ci U32 const FWeight = (newStat << fp_accuracy) >> hb; 128562306a36Sopenharmony_ci U32 const weight = BWeight + FWeight; 128662306a36Sopenharmony_ci assert(hb + fp_accuracy < 31); 128762306a36Sopenharmony_ci return (double)weight / fp_multiplier; 128862306a36Sopenharmony_ci} 128962306a36Sopenharmony_ci 129062306a36Sopenharmony_ci/* display a table content, 129162306a36Sopenharmony_ci * listing each element, its frequency, and its predicted bit cost */ 129262306a36Sopenharmony_ciMEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) 129362306a36Sopenharmony_ci{ 129462306a36Sopenharmony_ci unsigned u, sum; 129562306a36Sopenharmony_ci for (u=0, sum=0; u<=max; u++) sum += table[u]; 129662306a36Sopenharmony_ci DEBUGLOG(2, "total nb elts: %u", sum); 129762306a36Sopenharmony_ci for (u=0; u<=max; u++) { 129862306a36Sopenharmony_ci DEBUGLOG(2, "%2u: %5u (%.2f)", 129962306a36Sopenharmony_ci u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); 130062306a36Sopenharmony_ci } 130162306a36Sopenharmony_ci} 130262306a36Sopenharmony_ci 130362306a36Sopenharmony_ci#endif 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci 130762306a36Sopenharmony_ci/* =============================================================== 130862306a36Sopenharmony_ci * Shared internal declarations 130962306a36Sopenharmony_ci * These prototypes may be called from sources not in lib/compress 131062306a36Sopenharmony_ci * =============================================================== */ 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_ci/* ZSTD_loadCEntropy() : 131362306a36Sopenharmony_ci * dict : must point at beginning of a valid zstd dictionary. 131462306a36Sopenharmony_ci * return : size of dictionary header (size of magic number + dict ID + entropy tables) 131562306a36Sopenharmony_ci * assumptions : magic number supposed already checked 131662306a36Sopenharmony_ci * and dictSize >= 8 */ 131762306a36Sopenharmony_cisize_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, 131862306a36Sopenharmony_ci const void* const dict, size_t dictSize); 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_civoid ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_ci/* ============================================================== 132362306a36Sopenharmony_ci * Private declarations 132462306a36Sopenharmony_ci * These prototypes shall only be called from within lib/compress 132562306a36Sopenharmony_ci * ============================================================== */ 132662306a36Sopenharmony_ci 132762306a36Sopenharmony_ci/* ZSTD_getCParamsFromCCtxParams() : 132862306a36Sopenharmony_ci * cParams are built depending on compressionLevel, src size hints, 132962306a36Sopenharmony_ci * LDM and manually set compression parameters. 133062306a36Sopenharmony_ci * Note: srcSizeHint == 0 means 0! 133162306a36Sopenharmony_ci */ 133262306a36Sopenharmony_ciZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( 133362306a36Sopenharmony_ci const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci/*! ZSTD_initCStream_internal() : 133662306a36Sopenharmony_ci * Private use only. Init streaming operation. 133762306a36Sopenharmony_ci * expects params to be valid. 133862306a36Sopenharmony_ci * must receive dict, or cdict, or none, but not both. 133962306a36Sopenharmony_ci * @return : 0, or an error code */ 134062306a36Sopenharmony_cisize_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, 134162306a36Sopenharmony_ci const void* dict, size_t dictSize, 134262306a36Sopenharmony_ci const ZSTD_CDict* cdict, 134362306a36Sopenharmony_ci const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); 134462306a36Sopenharmony_ci 134562306a36Sopenharmony_civoid ZSTD_resetSeqStore(seqStore_t* ssPtr); 134662306a36Sopenharmony_ci 134762306a36Sopenharmony_ci/*! ZSTD_getCParamsFromCDict() : 134862306a36Sopenharmony_ci * as the name implies */ 134962306a36Sopenharmony_ciZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_ci/* ZSTD_compressBegin_advanced_internal() : 135262306a36Sopenharmony_ci * Private use only. To be called from zstdmt_compress.c. */ 135362306a36Sopenharmony_cisize_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, 135462306a36Sopenharmony_ci const void* dict, size_t dictSize, 135562306a36Sopenharmony_ci ZSTD_dictContentType_e dictContentType, 135662306a36Sopenharmony_ci ZSTD_dictTableLoadMethod_e dtlm, 135762306a36Sopenharmony_ci const ZSTD_CDict* cdict, 135862306a36Sopenharmony_ci const ZSTD_CCtx_params* params, 135962306a36Sopenharmony_ci unsigned long long pledgedSrcSize); 136062306a36Sopenharmony_ci 136162306a36Sopenharmony_ci/* ZSTD_compress_advanced_internal() : 136262306a36Sopenharmony_ci * Private use only. To be called from zstdmt_compress.c. */ 136362306a36Sopenharmony_cisize_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, 136462306a36Sopenharmony_ci void* dst, size_t dstCapacity, 136562306a36Sopenharmony_ci const void* src, size_t srcSize, 136662306a36Sopenharmony_ci const void* dict,size_t dictSize, 136762306a36Sopenharmony_ci const ZSTD_CCtx_params* params); 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_ci/* ZSTD_writeLastEmptyBlock() : 137162306a36Sopenharmony_ci * output an empty Block with end-of-frame mark to complete a frame 137262306a36Sopenharmony_ci * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) 137362306a36Sopenharmony_ci * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) 137462306a36Sopenharmony_ci */ 137562306a36Sopenharmony_cisize_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); 137662306a36Sopenharmony_ci 137762306a36Sopenharmony_ci 137862306a36Sopenharmony_ci/* ZSTD_referenceExternalSequences() : 137962306a36Sopenharmony_ci * Must be called before starting a compression operation. 138062306a36Sopenharmony_ci * seqs must parse a prefix of the source. 138162306a36Sopenharmony_ci * This cannot be used when long range matching is enabled. 138262306a36Sopenharmony_ci * Zstd will use these sequences, and pass the literals to a secondary block 138362306a36Sopenharmony_ci * compressor. 138462306a36Sopenharmony_ci * @return : An error code on failure. 138562306a36Sopenharmony_ci * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory 138662306a36Sopenharmony_ci * access and data corruption. 138762306a36Sopenharmony_ci */ 138862306a36Sopenharmony_cisize_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); 138962306a36Sopenharmony_ci 139062306a36Sopenharmony_ci/* ZSTD_cycleLog() : 139162306a36Sopenharmony_ci * condition for correct operation : hashLog > 1 */ 139262306a36Sopenharmony_ciU32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); 139362306a36Sopenharmony_ci 139462306a36Sopenharmony_ci/* ZSTD_CCtx_trace() : 139562306a36Sopenharmony_ci * Trace the end of a compression call. 139662306a36Sopenharmony_ci */ 139762306a36Sopenharmony_civoid ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); 139862306a36Sopenharmony_ci 139962306a36Sopenharmony_ci#endif /* ZSTD_COMPRESS_H */ 1400