1370b324cSopenharmony_ci/* LzFindMt.c -- multithreaded Match finder for LZ algorithms 2370b324cSopenharmony_ci2023-04-02 : Igor Pavlov : Public domain */ 3370b324cSopenharmony_ci 4370b324cSopenharmony_ci#include "Precomp.h" 5370b324cSopenharmony_ci 6370b324cSopenharmony_ci// #include <stdio.h> 7370b324cSopenharmony_ci 8370b324cSopenharmony_ci#include "CpuArch.h" 9370b324cSopenharmony_ci 10370b324cSopenharmony_ci#include "LzHash.h" 11370b324cSopenharmony_ci#include "LzFindMt.h" 12370b324cSopenharmony_ci 13370b324cSopenharmony_ci// #define LOG_ITERS 14370b324cSopenharmony_ci 15370b324cSopenharmony_ci// #define LOG_THREAD 16370b324cSopenharmony_ci 17370b324cSopenharmony_ci#ifdef LOG_THREAD 18370b324cSopenharmony_ci#include <stdio.h> 19370b324cSopenharmony_ci#define PRF(x) x 20370b324cSopenharmony_ci#else 21370b324cSopenharmony_ci#define PRF(x) 22370b324cSopenharmony_ci#endif 23370b324cSopenharmony_ci 24370b324cSopenharmony_ci#ifdef LOG_ITERS 25370b324cSopenharmony_ci#include <stdio.h> 26370b324cSopenharmony_ciextern UInt64 g_NumIters_Tree; 27370b324cSopenharmony_ciextern UInt64 g_NumIters_Loop; 28370b324cSopenharmony_ciextern UInt64 g_NumIters_Bytes; 29370b324cSopenharmony_ci#define LOG_ITER(x) x 30370b324cSopenharmony_ci#else 31370b324cSopenharmony_ci#define LOG_ITER(x) 32370b324cSopenharmony_ci#endif 33370b324cSopenharmony_ci 34370b324cSopenharmony_ci#define kMtHashBlockSize ((UInt32)1 << 17) 35370b324cSopenharmony_ci#define kMtHashNumBlocks (1 << 1) 36370b324cSopenharmony_ci 37370b324cSopenharmony_ci#define GET_HASH_BLOCK_OFFSET(i) (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize) 38370b324cSopenharmony_ci 39370b324cSopenharmony_ci#define kMtBtBlockSize ((UInt32)1 << 16) 40370b324cSopenharmony_ci#define kMtBtNumBlocks (1 << 4) 41370b324cSopenharmony_ci 42370b324cSopenharmony_ci#define GET_BT_BLOCK_OFFSET(i) (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize) 43370b324cSopenharmony_ci 44370b324cSopenharmony_ci/* 45370b324cSopenharmony_ci HASH functions: 46370b324cSopenharmony_ci We use raw 8/16 bits from a[1] and a[2], 47370b324cSopenharmony_ci xored with crc(a[0]) and crc(a[3]). 48370b324cSopenharmony_ci We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches. 49370b324cSopenharmony_ci our crc() function provides one-to-one correspondence for low 8-bit values: 50370b324cSopenharmony_ci (crc[0...0xFF] & 0xFF) <-> [0...0xFF] 51370b324cSopenharmony_ci*/ 52370b324cSopenharmony_ci 53370b324cSopenharmony_ci#define MF(mt) ((mt)->MatchFinder) 54370b324cSopenharmony_ci#define MF_CRC (p->crc) 55370b324cSopenharmony_ci 56370b324cSopenharmony_ci// #define MF(mt) (&(mt)->MatchFinder) 57370b324cSopenharmony_ci// #define MF_CRC (p->MatchFinder.crc) 58370b324cSopenharmony_ci 59370b324cSopenharmony_ci#define MT_HASH2_CALC \ 60370b324cSopenharmony_ci h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1); 61370b324cSopenharmony_ci 62370b324cSopenharmony_ci#define MT_HASH3_CALC { \ 63370b324cSopenharmony_ci UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \ 64370b324cSopenharmony_ci h2 = temp & (kHash2Size - 1); \ 65370b324cSopenharmony_ci h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } 66370b324cSopenharmony_ci 67370b324cSopenharmony_ci/* 68370b324cSopenharmony_ci#define MT_HASH3_CALC__NO_2 { \ 69370b324cSopenharmony_ci UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 70370b324cSopenharmony_ci h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); } 71370b324cSopenharmony_ci 72370b324cSopenharmony_ci#define MT_HASH4_CALC { \ 73370b324cSopenharmony_ci UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ 74370b324cSopenharmony_ci h2 = temp & (kHash2Size - 1); \ 75370b324cSopenharmony_ci temp ^= ((UInt32)cur[2] << 8); \ 76370b324cSopenharmony_ci h3 = temp & (kHash3Size - 1); \ 77370b324cSopenharmony_ci h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; } 78370b324cSopenharmony_ci // (kHash4Size - 1); 79370b324cSopenharmony_ci*/ 80370b324cSopenharmony_ci 81370b324cSopenharmony_ci 82370b324cSopenharmony_ciZ7_NO_INLINE 83370b324cSopenharmony_cistatic void MtSync_Construct(CMtSync *p) 84370b324cSopenharmony_ci{ 85370b324cSopenharmony_ci p->affinity = 0; 86370b324cSopenharmony_ci p->wasCreated = False; 87370b324cSopenharmony_ci p->csWasInitialized = False; 88370b324cSopenharmony_ci p->csWasEntered = False; 89370b324cSopenharmony_ci Thread_CONSTRUCT(&p->thread) 90370b324cSopenharmony_ci Event_Construct(&p->canStart); 91370b324cSopenharmony_ci Event_Construct(&p->wasStopped); 92370b324cSopenharmony_ci Semaphore_Construct(&p->freeSemaphore); 93370b324cSopenharmony_ci Semaphore_Construct(&p->filledSemaphore); 94370b324cSopenharmony_ci} 95370b324cSopenharmony_ci 96370b324cSopenharmony_ci 97370b324cSopenharmony_ci#define DEBUG_BUFFER_LOCK // define it to debug lock state 98370b324cSopenharmony_ci 99370b324cSopenharmony_ci#ifdef DEBUG_BUFFER_LOCK 100370b324cSopenharmony_ci#include <stdlib.h> 101370b324cSopenharmony_ci#define BUFFER_MUST_BE_LOCKED(p) if (!(p)->csWasEntered) exit(1); 102370b324cSopenharmony_ci#define BUFFER_MUST_BE_UNLOCKED(p) if ( (p)->csWasEntered) exit(1); 103370b324cSopenharmony_ci#else 104370b324cSopenharmony_ci#define BUFFER_MUST_BE_LOCKED(p) 105370b324cSopenharmony_ci#define BUFFER_MUST_BE_UNLOCKED(p) 106370b324cSopenharmony_ci#endif 107370b324cSopenharmony_ci 108370b324cSopenharmony_ci#define LOCK_BUFFER(p) { \ 109370b324cSopenharmony_ci BUFFER_MUST_BE_UNLOCKED(p); \ 110370b324cSopenharmony_ci CriticalSection_Enter(&(p)->cs); \ 111370b324cSopenharmony_ci (p)->csWasEntered = True; } 112370b324cSopenharmony_ci 113370b324cSopenharmony_ci#define UNLOCK_BUFFER(p) { \ 114370b324cSopenharmony_ci BUFFER_MUST_BE_LOCKED(p); \ 115370b324cSopenharmony_ci CriticalSection_Leave(&(p)->cs); \ 116370b324cSopenharmony_ci (p)->csWasEntered = False; } 117370b324cSopenharmony_ci 118370b324cSopenharmony_ci 119370b324cSopenharmony_ciZ7_NO_INLINE 120370b324cSopenharmony_cistatic UInt32 MtSync_GetNextBlock(CMtSync *p) 121370b324cSopenharmony_ci{ 122370b324cSopenharmony_ci UInt32 numBlocks = 0; 123370b324cSopenharmony_ci if (p->needStart) 124370b324cSopenharmony_ci { 125370b324cSopenharmony_ci BUFFER_MUST_BE_UNLOCKED(p) 126370b324cSopenharmony_ci p->numProcessedBlocks = 1; 127370b324cSopenharmony_ci p->needStart = False; 128370b324cSopenharmony_ci p->stopWriting = False; 129370b324cSopenharmony_ci p->exit = False; 130370b324cSopenharmony_ci Event_Reset(&p->wasStopped); 131370b324cSopenharmony_ci Event_Set(&p->canStart); 132370b324cSopenharmony_ci } 133370b324cSopenharmony_ci else 134370b324cSopenharmony_ci { 135370b324cSopenharmony_ci UNLOCK_BUFFER(p) 136370b324cSopenharmony_ci // we free current block 137370b324cSopenharmony_ci numBlocks = p->numProcessedBlocks++; 138370b324cSopenharmony_ci Semaphore_Release1(&p->freeSemaphore); 139370b324cSopenharmony_ci } 140370b324cSopenharmony_ci 141370b324cSopenharmony_ci // buffer is UNLOCKED here 142370b324cSopenharmony_ci Semaphore_Wait(&p->filledSemaphore); 143370b324cSopenharmony_ci LOCK_BUFFER(p) 144370b324cSopenharmony_ci return numBlocks; 145370b324cSopenharmony_ci} 146370b324cSopenharmony_ci 147370b324cSopenharmony_ci 148370b324cSopenharmony_ci/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */ 149370b324cSopenharmony_ci 150370b324cSopenharmony_ciZ7_NO_INLINE 151370b324cSopenharmony_cistatic void MtSync_StopWriting(CMtSync *p) 152370b324cSopenharmony_ci{ 153370b324cSopenharmony_ci if (!Thread_WasCreated(&p->thread) || p->needStart) 154370b324cSopenharmony_ci return; 155370b324cSopenharmony_ci 156370b324cSopenharmony_ci PRF(printf("\nMtSync_StopWriting %p\n", p)); 157370b324cSopenharmony_ci 158370b324cSopenharmony_ci if (p->csWasEntered) 159370b324cSopenharmony_ci { 160370b324cSopenharmony_ci /* we don't use buffer in this thread after StopWriting(). 161370b324cSopenharmony_ci So we UNLOCK buffer. 162370b324cSopenharmony_ci And we restore default UNLOCKED state for stopped thread */ 163370b324cSopenharmony_ci UNLOCK_BUFFER(p) 164370b324cSopenharmony_ci } 165370b324cSopenharmony_ci 166370b324cSopenharmony_ci /* We send (p->stopWriting) message and release freeSemaphore 167370b324cSopenharmony_ci to free current block. 168370b324cSopenharmony_ci So the thread will see (p->stopWriting) at some 169370b324cSopenharmony_ci iteration after Wait(freeSemaphore). 170370b324cSopenharmony_ci The thread doesn't need to fill all avail free blocks, 171370b324cSopenharmony_ci so we can get fast thread stop. 172370b324cSopenharmony_ci */ 173370b324cSopenharmony_ci 174370b324cSopenharmony_ci p->stopWriting = True; 175370b324cSopenharmony_ci Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!! 176370b324cSopenharmony_ci 177370b324cSopenharmony_ci PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p)); 178370b324cSopenharmony_ci Event_Wait(&p->wasStopped); 179370b324cSopenharmony_ci PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p)); 180370b324cSopenharmony_ci 181370b324cSopenharmony_ci /* 21.03 : we don't restore samaphore counters here. 182370b324cSopenharmony_ci We will recreate and reinit samaphores in next start */ 183370b324cSopenharmony_ci 184370b324cSopenharmony_ci p->needStart = True; 185370b324cSopenharmony_ci} 186370b324cSopenharmony_ci 187370b324cSopenharmony_ci 188370b324cSopenharmony_ciZ7_NO_INLINE 189370b324cSopenharmony_cistatic void MtSync_Destruct(CMtSync *p) 190370b324cSopenharmony_ci{ 191370b324cSopenharmony_ci PRF(printf("\nMtSync_Destruct %p\n", p)); 192370b324cSopenharmony_ci 193370b324cSopenharmony_ci if (Thread_WasCreated(&p->thread)) 194370b324cSopenharmony_ci { 195370b324cSopenharmony_ci /* we want thread to be in Stopped state before sending EXIT command. 196370b324cSopenharmony_ci note: stop(btSync) will stop (htSync) also */ 197370b324cSopenharmony_ci MtSync_StopWriting(p); 198370b324cSopenharmony_ci /* thread in Stopped state here : (p->needStart == true) */ 199370b324cSopenharmony_ci p->exit = True; 200370b324cSopenharmony_ci // if (p->needStart) // it's (true) 201370b324cSopenharmony_ci Event_Set(&p->canStart); // we send EXIT command to thread 202370b324cSopenharmony_ci Thread_Wait_Close(&p->thread); // we wait thread finishing 203370b324cSopenharmony_ci } 204370b324cSopenharmony_ci 205370b324cSopenharmony_ci if (p->csWasInitialized) 206370b324cSopenharmony_ci { 207370b324cSopenharmony_ci CriticalSection_Delete(&p->cs); 208370b324cSopenharmony_ci p->csWasInitialized = False; 209370b324cSopenharmony_ci } 210370b324cSopenharmony_ci p->csWasEntered = False; 211370b324cSopenharmony_ci 212370b324cSopenharmony_ci Event_Close(&p->canStart); 213370b324cSopenharmony_ci Event_Close(&p->wasStopped); 214370b324cSopenharmony_ci Semaphore_Close(&p->freeSemaphore); 215370b324cSopenharmony_ci Semaphore_Close(&p->filledSemaphore); 216370b324cSopenharmony_ci 217370b324cSopenharmony_ci p->wasCreated = False; 218370b324cSopenharmony_ci} 219370b324cSopenharmony_ci 220370b324cSopenharmony_ci 221370b324cSopenharmony_ci// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; } 222370b324cSopenharmony_ci// we want to get real system error codes here instead of SZ_ERROR_THREAD 223370b324cSopenharmony_ci#define RINOK_THREAD(x) RINOK_WRes(x) 224370b324cSopenharmony_ci 225370b324cSopenharmony_ci 226370b324cSopenharmony_ci// call it before each new file (when new starting is required): 227370b324cSopenharmony_ciZ7_NO_INLINE 228370b324cSopenharmony_cistatic SRes MtSync_Init(CMtSync *p, UInt32 numBlocks) 229370b324cSopenharmony_ci{ 230370b324cSopenharmony_ci WRes wres; 231370b324cSopenharmony_ci // BUFFER_MUST_BE_UNLOCKED(p) 232370b324cSopenharmony_ci if (!p->needStart || p->csWasEntered) 233370b324cSopenharmony_ci return SZ_ERROR_FAIL; 234370b324cSopenharmony_ci wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks); 235370b324cSopenharmony_ci if (wres == 0) 236370b324cSopenharmony_ci wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks); 237370b324cSopenharmony_ci return MY_SRes_HRESULT_FROM_WRes(wres); 238370b324cSopenharmony_ci} 239370b324cSopenharmony_ci 240370b324cSopenharmony_ci 241370b324cSopenharmony_cistatic WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) 242370b324cSopenharmony_ci{ 243370b324cSopenharmony_ci WRes wres; 244370b324cSopenharmony_ci 245370b324cSopenharmony_ci if (p->wasCreated) 246370b324cSopenharmony_ci return SZ_OK; 247370b324cSopenharmony_ci 248370b324cSopenharmony_ci RINOK_THREAD(CriticalSection_Init(&p->cs)) 249370b324cSopenharmony_ci p->csWasInitialized = True; 250370b324cSopenharmony_ci p->csWasEntered = False; 251370b324cSopenharmony_ci 252370b324cSopenharmony_ci RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)) 253370b324cSopenharmony_ci RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)) 254370b324cSopenharmony_ci 255370b324cSopenharmony_ci p->needStart = True; 256370b324cSopenharmony_ci p->exit = True; /* p->exit is unused before (canStart) Event. 257370b324cSopenharmony_ci But in case of some unexpected code failure we will get fast exit from thread */ 258370b324cSopenharmony_ci 259370b324cSopenharmony_ci // return ERROR_TOO_MANY_POSTS; // for debug 260370b324cSopenharmony_ci // return EINVAL; // for debug 261370b324cSopenharmony_ci 262370b324cSopenharmony_ci if (p->affinity != 0) 263370b324cSopenharmony_ci wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity); 264370b324cSopenharmony_ci else 265370b324cSopenharmony_ci wres = Thread_Create(&p->thread, startAddress, obj); 266370b324cSopenharmony_ci 267370b324cSopenharmony_ci RINOK_THREAD(wres) 268370b324cSopenharmony_ci p->wasCreated = True; 269370b324cSopenharmony_ci return SZ_OK; 270370b324cSopenharmony_ci} 271370b324cSopenharmony_ci 272370b324cSopenharmony_ci 273370b324cSopenharmony_ciZ7_NO_INLINE 274370b324cSopenharmony_cistatic SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj) 275370b324cSopenharmony_ci{ 276370b324cSopenharmony_ci const WRes wres = MtSync_Create_WRes(p, startAddress, obj); 277370b324cSopenharmony_ci if (wres == 0) 278370b324cSopenharmony_ci return 0; 279370b324cSopenharmony_ci MtSync_Destruct(p); 280370b324cSopenharmony_ci return MY_SRes_HRESULT_FROM_WRes(wres); 281370b324cSopenharmony_ci} 282370b324cSopenharmony_ci 283370b324cSopenharmony_ci 284370b324cSopenharmony_ci// ---------- HASH THREAD ---------- 285370b324cSopenharmony_ci 286370b324cSopenharmony_ci#define kMtMaxValForNormalize 0xFFFFFFFF 287370b324cSopenharmony_ci// #define kMtMaxValForNormalize ((1 << 21)) // for debug 288370b324cSopenharmony_ci// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses 289370b324cSopenharmony_ci 290370b324cSopenharmony_ci#ifdef MY_CPU_LE_UNALIGN 291370b324cSopenharmony_ci #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8) 292370b324cSopenharmony_ci#else 293370b324cSopenharmony_ci #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16)) 294370b324cSopenharmony_ci#endif 295370b324cSopenharmony_ci 296370b324cSopenharmony_ci#define GetHeads_DECL(name) \ 297370b324cSopenharmony_ci static void GetHeads ## name(const Byte *p, UInt32 pos, \ 298370b324cSopenharmony_ci UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc) 299370b324cSopenharmony_ci 300370b324cSopenharmony_ci#define GetHeads_LOOP(v) \ 301370b324cSopenharmony_ci for (; numHeads != 0; numHeads--) { \ 302370b324cSopenharmony_ci const UInt32 value = (v); \ 303370b324cSopenharmony_ci p++; \ 304370b324cSopenharmony_ci *heads++ = pos - hash[value]; \ 305370b324cSopenharmony_ci hash[value] = pos++; } 306370b324cSopenharmony_ci 307370b324cSopenharmony_ci#define DEF_GetHeads2(name, v, action) \ 308370b324cSopenharmony_ci GetHeads_DECL(name) { action \ 309370b324cSopenharmony_ci GetHeads_LOOP(v) } 310370b324cSopenharmony_ci 311370b324cSopenharmony_ci#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) 312370b324cSopenharmony_ci 313370b324cSopenharmony_ciDEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) 314370b324cSopenharmony_ciDEF_GetHeads(3, (crc[p[0]] ^ GetUi16(p + 1)) & hashMask) 315370b324cSopenharmony_ciDEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); ) 316370b324cSopenharmony_ci// BT3 is not good for crc collisions for big hashMask values. 317370b324cSopenharmony_ci 318370b324cSopenharmony_ci/* 319370b324cSopenharmony_ciGetHeads_DECL(3b) 320370b324cSopenharmony_ci{ 321370b324cSopenharmony_ci UNUSED_VAR(hashMask); 322370b324cSopenharmony_ci UNUSED_VAR(crc); 323370b324cSopenharmony_ci { 324370b324cSopenharmony_ci const Byte *pLim = p + numHeads; 325370b324cSopenharmony_ci if (numHeads == 0) 326370b324cSopenharmony_ci return; 327370b324cSopenharmony_ci pLim--; 328370b324cSopenharmony_ci while (p < pLim) 329370b324cSopenharmony_ci { 330370b324cSopenharmony_ci UInt32 v1 = GetUi32(p); 331370b324cSopenharmony_ci UInt32 v0 = v1 & 0xFFFFFF; 332370b324cSopenharmony_ci UInt32 h0, h1; 333370b324cSopenharmony_ci p += 2; 334370b324cSopenharmony_ci v1 >>= 8; 335370b324cSopenharmony_ci h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++; 336370b324cSopenharmony_ci h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++; 337370b324cSopenharmony_ci heads += 2; 338370b324cSopenharmony_ci } 339370b324cSopenharmony_ci if (p == pLim) 340370b324cSopenharmony_ci { 341370b324cSopenharmony_ci UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16); 342370b324cSopenharmony_ci *heads = pos - hash[v0]; 343370b324cSopenharmony_ci hash[v0] = pos; 344370b324cSopenharmony_ci } 345370b324cSopenharmony_ci } 346370b324cSopenharmony_ci} 347370b324cSopenharmony_ci*/ 348370b324cSopenharmony_ci 349370b324cSopenharmony_ci/* 350370b324cSopenharmony_ciGetHeads_DECL(4) 351370b324cSopenharmony_ci{ 352370b324cSopenharmony_ci unsigned sh = 0; 353370b324cSopenharmony_ci UNUSED_VAR(crc) 354370b324cSopenharmony_ci while ((hashMask & 0x80000000) == 0) 355370b324cSopenharmony_ci { 356370b324cSopenharmony_ci hashMask <<= 1; 357370b324cSopenharmony_ci sh++; 358370b324cSopenharmony_ci } 359370b324cSopenharmony_ci GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh) 360370b324cSopenharmony_ci} 361370b324cSopenharmony_ci#define GetHeads4b GetHeads4 362370b324cSopenharmony_ci*/ 363370b324cSopenharmony_ci 364370b324cSopenharmony_ci#define USE_GetHeads_LOCAL_CRC 365370b324cSopenharmony_ci 366370b324cSopenharmony_ci#ifdef USE_GetHeads_LOCAL_CRC 367370b324cSopenharmony_ci 368370b324cSopenharmony_ciGetHeads_DECL(4) 369370b324cSopenharmony_ci{ 370370b324cSopenharmony_ci UInt32 crc0[256]; 371370b324cSopenharmony_ci UInt32 crc1[256]; 372370b324cSopenharmony_ci { 373370b324cSopenharmony_ci unsigned i; 374370b324cSopenharmony_ci for (i = 0; i < 256; i++) 375370b324cSopenharmony_ci { 376370b324cSopenharmony_ci UInt32 v = crc[i]; 377370b324cSopenharmony_ci crc0[i] = v & hashMask; 378370b324cSopenharmony_ci crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; 379370b324cSopenharmony_ci // crc1[i] = rotlFixed(v, 8) & hashMask; 380370b324cSopenharmony_ci } 381370b324cSopenharmony_ci } 382370b324cSopenharmony_ci GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1)) 383370b324cSopenharmony_ci} 384370b324cSopenharmony_ci 385370b324cSopenharmony_ciGetHeads_DECL(4b) 386370b324cSopenharmony_ci{ 387370b324cSopenharmony_ci UInt32 crc0[256]; 388370b324cSopenharmony_ci { 389370b324cSopenharmony_ci unsigned i; 390370b324cSopenharmony_ci for (i = 0; i < 256; i++) 391370b324cSopenharmony_ci crc0[i] = crc[i] & hashMask; 392370b324cSopenharmony_ci } 393370b324cSopenharmony_ci GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p)) 394370b324cSopenharmony_ci} 395370b324cSopenharmony_ci 396370b324cSopenharmony_ciGetHeads_DECL(5) 397370b324cSopenharmony_ci{ 398370b324cSopenharmony_ci UInt32 crc0[256]; 399370b324cSopenharmony_ci UInt32 crc1[256]; 400370b324cSopenharmony_ci UInt32 crc2[256]; 401370b324cSopenharmony_ci { 402370b324cSopenharmony_ci unsigned i; 403370b324cSopenharmony_ci for (i = 0; i < 256; i++) 404370b324cSopenharmony_ci { 405370b324cSopenharmony_ci UInt32 v = crc[i]; 406370b324cSopenharmony_ci crc0[i] = v & hashMask; 407370b324cSopenharmony_ci crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; 408370b324cSopenharmony_ci crc2[i] = (v << kLzHash_CrcShift_2) & hashMask; 409370b324cSopenharmony_ci } 410370b324cSopenharmony_ci } 411370b324cSopenharmony_ci GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1)) 412370b324cSopenharmony_ci} 413370b324cSopenharmony_ci 414370b324cSopenharmony_ciGetHeads_DECL(5b) 415370b324cSopenharmony_ci{ 416370b324cSopenharmony_ci UInt32 crc0[256]; 417370b324cSopenharmony_ci UInt32 crc1[256]; 418370b324cSopenharmony_ci { 419370b324cSopenharmony_ci unsigned i; 420370b324cSopenharmony_ci for (i = 0; i < 256; i++) 421370b324cSopenharmony_ci { 422370b324cSopenharmony_ci UInt32 v = crc[i]; 423370b324cSopenharmony_ci crc0[i] = v & hashMask; 424370b324cSopenharmony_ci crc1[i] = (v << kLzHash_CrcShift_1) & hashMask; 425370b324cSopenharmony_ci } 426370b324cSopenharmony_ci } 427370b324cSopenharmony_ci GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p)) 428370b324cSopenharmony_ci} 429370b324cSopenharmony_ci 430370b324cSopenharmony_ci#else 431370b324cSopenharmony_ci 432370b324cSopenharmony_ciDEF_GetHeads(4, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask) 433370b324cSopenharmony_ciDEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask) 434370b324cSopenharmony_ciDEF_GetHeads(5, (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask) 435370b324cSopenharmony_ciDEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask) 436370b324cSopenharmony_ci 437370b324cSopenharmony_ci#endif 438370b324cSopenharmony_ci 439370b324cSopenharmony_ci 440370b324cSopenharmony_cistatic void HashThreadFunc(CMatchFinderMt *mt) 441370b324cSopenharmony_ci{ 442370b324cSopenharmony_ci CMtSync *p = &mt->hashSync; 443370b324cSopenharmony_ci PRF(printf("\nHashThreadFunc\n")); 444370b324cSopenharmony_ci 445370b324cSopenharmony_ci for (;;) 446370b324cSopenharmony_ci { 447370b324cSopenharmony_ci UInt32 blockIndex = 0; 448370b324cSopenharmony_ci PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n")); 449370b324cSopenharmony_ci Event_Wait(&p->canStart); 450370b324cSopenharmony_ci PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n")); 451370b324cSopenharmony_ci if (p->exit) 452370b324cSopenharmony_ci { 453370b324cSopenharmony_ci PRF(printf("\nHashThreadFunc : exit \n")); 454370b324cSopenharmony_ci return; 455370b324cSopenharmony_ci } 456370b324cSopenharmony_ci 457370b324cSopenharmony_ci MatchFinder_Init_HighHash(MF(mt)); 458370b324cSopenharmony_ci 459370b324cSopenharmony_ci for (;;) 460370b324cSopenharmony_ci { 461370b324cSopenharmony_ci PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos)); 462370b324cSopenharmony_ci 463370b324cSopenharmony_ci { 464370b324cSopenharmony_ci CMatchFinder *mf = MF(mt); 465370b324cSopenharmony_ci if (MatchFinder_NeedMove(mf)) 466370b324cSopenharmony_ci { 467370b324cSopenharmony_ci CriticalSection_Enter(&mt->btSync.cs); 468370b324cSopenharmony_ci CriticalSection_Enter(&mt->hashSync.cs); 469370b324cSopenharmony_ci { 470370b324cSopenharmony_ci const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf); 471370b324cSopenharmony_ci ptrdiff_t offset; 472370b324cSopenharmony_ci MatchFinder_MoveBlock(mf); 473370b324cSopenharmony_ci offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf); 474370b324cSopenharmony_ci mt->pointerToCurPos -= offset; 475370b324cSopenharmony_ci mt->buffer -= offset; 476370b324cSopenharmony_ci } 477370b324cSopenharmony_ci CriticalSection_Leave(&mt->hashSync.cs); 478370b324cSopenharmony_ci CriticalSection_Leave(&mt->btSync.cs); 479370b324cSopenharmony_ci continue; 480370b324cSopenharmony_ci } 481370b324cSopenharmony_ci 482370b324cSopenharmony_ci Semaphore_Wait(&p->freeSemaphore); 483370b324cSopenharmony_ci 484370b324cSopenharmony_ci if (p->exit) // exit is unexpected here. But we check it here for some failure case 485370b324cSopenharmony_ci return; 486370b324cSopenharmony_ci 487370b324cSopenharmony_ci // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) 488370b324cSopenharmony_ci if (p->stopWriting) 489370b324cSopenharmony_ci break; 490370b324cSopenharmony_ci 491370b324cSopenharmony_ci MatchFinder_ReadIfRequired(mf); 492370b324cSopenharmony_ci { 493370b324cSopenharmony_ci UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++); 494370b324cSopenharmony_ci UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf); 495370b324cSopenharmony_ci heads[0] = 2; 496370b324cSopenharmony_ci heads[1] = num; 497370b324cSopenharmony_ci 498370b324cSopenharmony_ci /* heads[1] contains the number of avail bytes: 499370b324cSopenharmony_ci if (avail < mf->numHashBytes) : 500370b324cSopenharmony_ci { 501370b324cSopenharmony_ci it means that stream was finished 502370b324cSopenharmony_ci HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes. 503370b324cSopenharmony_ci HASH_THREAD doesn't stop, 504370b324cSopenharmony_ci HASH_THREAD fills only the header (2 numbers) for all next blocks: 505370b324cSopenharmony_ci {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0} 506370b324cSopenharmony_ci } 507370b324cSopenharmony_ci else 508370b324cSopenharmony_ci { 509370b324cSopenharmony_ci HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes; 510370b324cSopenharmony_ci } 511370b324cSopenharmony_ci */ 512370b324cSopenharmony_ci 513370b324cSopenharmony_ci if (num >= mf->numHashBytes) 514370b324cSopenharmony_ci { 515370b324cSopenharmony_ci num = num - mf->numHashBytes + 1; 516370b324cSopenharmony_ci if (num > kMtHashBlockSize - 2) 517370b324cSopenharmony_ci num = kMtHashBlockSize - 2; 518370b324cSopenharmony_ci 519370b324cSopenharmony_ci if (mf->pos > (UInt32)kMtMaxValForNormalize - num) 520370b324cSopenharmony_ci { 521370b324cSopenharmony_ci const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); 522370b324cSopenharmony_ci MatchFinder_REDUCE_OFFSETS(mf, subValue) 523370b324cSopenharmony_ci MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1); 524370b324cSopenharmony_ci } 525370b324cSopenharmony_ci 526370b324cSopenharmony_ci heads[0] = 2 + num; 527370b324cSopenharmony_ci mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); 528370b324cSopenharmony_ci } 529370b324cSopenharmony_ci 530370b324cSopenharmony_ci mf->pos += num; // wrap over zero is allowed at the end of stream 531370b324cSopenharmony_ci mf->buffer += num; 532370b324cSopenharmony_ci } 533370b324cSopenharmony_ci } 534370b324cSopenharmony_ci 535370b324cSopenharmony_ci Semaphore_Release1(&p->filledSemaphore); 536370b324cSopenharmony_ci } // for() processing end 537370b324cSopenharmony_ci 538370b324cSopenharmony_ci // p->numBlocks_Sent = blockIndex; 539370b324cSopenharmony_ci Event_Set(&p->wasStopped); 540370b324cSopenharmony_ci } // for() thread end 541370b324cSopenharmony_ci} 542370b324cSopenharmony_ci 543370b324cSopenharmony_ci 544370b324cSopenharmony_ci 545370b324cSopenharmony_ci 546370b324cSopenharmony_ci// ---------- BT THREAD ---------- 547370b324cSopenharmony_ci 548370b324cSopenharmony_ci/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap. 549370b324cSopenharmony_ci here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */ 550370b324cSopenharmony_ci#define CYC_TO_POS_OFFSET 0 551370b324cSopenharmony_ci// #define CYC_TO_POS_OFFSET 1 // for debug 552370b324cSopenharmony_ci 553370b324cSopenharmony_ci#define MFMT_GM_INLINE 554370b324cSopenharmony_ci 555370b324cSopenharmony_ci#ifdef MFMT_GM_INLINE 556370b324cSopenharmony_ci 557370b324cSopenharmony_ci/* 558370b324cSopenharmony_ci we use size_t for (pos) instead of UInt32 559370b324cSopenharmony_ci to eliminate "movsx" BUG in old MSVC x64 compiler. 560370b324cSopenharmony_ci*/ 561370b324cSopenharmony_ci 562370b324cSopenharmony_ci 563370b324cSopenharmony_ciUInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son, 564370b324cSopenharmony_ci UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size, 565370b324cSopenharmony_ci size_t _cyclicBufferPos, UInt32 _cyclicBufferSize, 566370b324cSopenharmony_ci UInt32 *posRes); 567370b324cSopenharmony_ci 568370b324cSopenharmony_ci#endif 569370b324cSopenharmony_ci 570370b324cSopenharmony_ci 571370b324cSopenharmony_cistatic void BtGetMatches(CMatchFinderMt *p, UInt32 *d) 572370b324cSopenharmony_ci{ 573370b324cSopenharmony_ci UInt32 numProcessed = 0; 574370b324cSopenharmony_ci UInt32 curPos = 2; 575370b324cSopenharmony_ci 576370b324cSopenharmony_ci /* GetMatchesSpec() functions don't create (len = 1) 577370b324cSopenharmony_ci in [len, dist] match pairs, if (p->numHashBytes >= 2) 578370b324cSopenharmony_ci Also we suppose here that (matchMaxLen >= 2). 579370b324cSopenharmony_ci So the following code for (reserve) is not required 580370b324cSopenharmony_ci UInt32 reserve = (p->matchMaxLen * 2); 581370b324cSopenharmony_ci const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX 582370b324cSopenharmony_ci if (reserve < kNumHashBytes_Max - 1) 583370b324cSopenharmony_ci reserve = kNumHashBytes_Max - 1; 584370b324cSopenharmony_ci const UInt32 limit = kMtBtBlockSize - (reserve); 585370b324cSopenharmony_ci */ 586370b324cSopenharmony_ci 587370b324cSopenharmony_ci const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); 588370b324cSopenharmony_ci 589370b324cSopenharmony_ci d[1] = p->hashNumAvail; 590370b324cSopenharmony_ci 591370b324cSopenharmony_ci if (p->failure_BT) 592370b324cSopenharmony_ci { 593370b324cSopenharmony_ci // printf("\n == 1 BtGetMatches() p->failure_BT\n"); 594370b324cSopenharmony_ci d[0] = 0; 595370b324cSopenharmony_ci // d[1] = 0; 596370b324cSopenharmony_ci return; 597370b324cSopenharmony_ci } 598370b324cSopenharmony_ci 599370b324cSopenharmony_ci while (curPos < limit) 600370b324cSopenharmony_ci { 601370b324cSopenharmony_ci if (p->hashBufPos == p->hashBufPosLimit) 602370b324cSopenharmony_ci { 603370b324cSopenharmony_ci // MatchFinderMt_GetNextBlock_Hash(p); 604370b324cSopenharmony_ci UInt32 avail; 605370b324cSopenharmony_ci { 606370b324cSopenharmony_ci const UInt32 bi = MtSync_GetNextBlock(&p->hashSync); 607370b324cSopenharmony_ci const UInt32 k = GET_HASH_BLOCK_OFFSET(bi); 608370b324cSopenharmony_ci const UInt32 *h = p->hashBuf + k; 609370b324cSopenharmony_ci avail = h[1]; 610370b324cSopenharmony_ci p->hashBufPosLimit = k + h[0]; 611370b324cSopenharmony_ci p->hashNumAvail = avail; 612370b324cSopenharmony_ci p->hashBufPos = k + 2; 613370b324cSopenharmony_ci } 614370b324cSopenharmony_ci 615370b324cSopenharmony_ci { 616370b324cSopenharmony_ci /* we must prevent UInt32 overflow for avail total value, 617370b324cSopenharmony_ci if avail was increased with new hash block */ 618370b324cSopenharmony_ci UInt32 availSum = numProcessed + avail; 619370b324cSopenharmony_ci if (availSum < numProcessed) 620370b324cSopenharmony_ci availSum = (UInt32)(Int32)-1; 621370b324cSopenharmony_ci d[1] = availSum; 622370b324cSopenharmony_ci } 623370b324cSopenharmony_ci 624370b324cSopenharmony_ci if (avail >= p->numHashBytes) 625370b324cSopenharmony_ci continue; 626370b324cSopenharmony_ci 627370b324cSopenharmony_ci // if (p->hashBufPos != p->hashBufPosLimit) exit(1); 628370b324cSopenharmony_ci 629370b324cSopenharmony_ci /* (avail < p->numHashBytes) 630370b324cSopenharmony_ci It means that stream was finished. 631370b324cSopenharmony_ci And (avail) - is a number of remaining bytes, 632370b324cSopenharmony_ci we fill (d) for (avail) bytes for LZ_THREAD (receiver). 633370b324cSopenharmony_ci but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */ 634370b324cSopenharmony_ci 635370b324cSopenharmony_ci /* here we suppose that we have space enough: 636370b324cSopenharmony_ci (kMtBtBlockSize - curPos >= p->hashNumAvail) */ 637370b324cSopenharmony_ci p->hashNumAvail = 0; 638370b324cSopenharmony_ci d[0] = curPos + avail; 639370b324cSopenharmony_ci d += curPos; 640370b324cSopenharmony_ci for (; avail != 0; avail--) 641370b324cSopenharmony_ci *d++ = 0; 642370b324cSopenharmony_ci return; 643370b324cSopenharmony_ci } 644370b324cSopenharmony_ci { 645370b324cSopenharmony_ci UInt32 size = p->hashBufPosLimit - p->hashBufPos; 646370b324cSopenharmony_ci UInt32 pos = p->pos; 647370b324cSopenharmony_ci UInt32 cyclicBufferPos = p->cyclicBufferPos; 648370b324cSopenharmony_ci UInt32 lenLimit = p->matchMaxLen; 649370b324cSopenharmony_ci if (lenLimit >= p->hashNumAvail) 650370b324cSopenharmony_ci lenLimit = p->hashNumAvail; 651370b324cSopenharmony_ci { 652370b324cSopenharmony_ci UInt32 size2 = p->hashNumAvail - lenLimit + 1; 653370b324cSopenharmony_ci if (size2 < size) 654370b324cSopenharmony_ci size = size2; 655370b324cSopenharmony_ci size2 = p->cyclicBufferSize - cyclicBufferPos; 656370b324cSopenharmony_ci if (size2 < size) 657370b324cSopenharmony_ci size = size2; 658370b324cSopenharmony_ci } 659370b324cSopenharmony_ci 660370b324cSopenharmony_ci if (pos > (UInt32)kMtMaxValForNormalize - size) 661370b324cSopenharmony_ci { 662370b324cSopenharmony_ci const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1); 663370b324cSopenharmony_ci pos -= subValue; 664370b324cSopenharmony_ci p->pos = pos; 665370b324cSopenharmony_ci MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2); 666370b324cSopenharmony_ci } 667370b324cSopenharmony_ci 668370b324cSopenharmony_ci #ifndef MFMT_GM_INLINE 669370b324cSopenharmony_ci while (curPos < limit && size-- != 0) 670370b324cSopenharmony_ci { 671370b324cSopenharmony_ci UInt32 *startDistances = d + curPos; 672370b324cSopenharmony_ci UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], 673370b324cSopenharmony_ci pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, 674370b324cSopenharmony_ci startDistances + 1, p->numHashBytes - 1) - startDistances); 675370b324cSopenharmony_ci *startDistances = num - 1; 676370b324cSopenharmony_ci curPos += num; 677370b324cSopenharmony_ci cyclicBufferPos++; 678370b324cSopenharmony_ci pos++; 679370b324cSopenharmony_ci p->buffer++; 680370b324cSopenharmony_ci } 681370b324cSopenharmony_ci #else 682370b324cSopenharmony_ci { 683370b324cSopenharmony_ci UInt32 posRes = pos; 684370b324cSopenharmony_ci const UInt32 *d_end; 685370b324cSopenharmony_ci { 686370b324cSopenharmony_ci d_end = GetMatchesSpecN_2( 687370b324cSopenharmony_ci p->buffer + lenLimit - 1, 688370b324cSopenharmony_ci pos, p->buffer, p->son, p->cutValue, d + curPos, 689370b324cSopenharmony_ci p->numHashBytes - 1, p->hashBuf + p->hashBufPos, 690370b324cSopenharmony_ci d + limit, p->hashBuf + p->hashBufPos + size, 691370b324cSopenharmony_ci cyclicBufferPos, p->cyclicBufferSize, 692370b324cSopenharmony_ci &posRes); 693370b324cSopenharmony_ci } 694370b324cSopenharmony_ci { 695370b324cSopenharmony_ci if (!d_end) 696370b324cSopenharmony_ci { 697370b324cSopenharmony_ci // printf("\n == 2 BtGetMatches() p->failure_BT\n"); 698370b324cSopenharmony_ci // internal data failure 699370b324cSopenharmony_ci p->failure_BT = True; 700370b324cSopenharmony_ci d[0] = 0; 701370b324cSopenharmony_ci // d[1] = 0; 702370b324cSopenharmony_ci return; 703370b324cSopenharmony_ci } 704370b324cSopenharmony_ci } 705370b324cSopenharmony_ci curPos = (UInt32)(d_end - d); 706370b324cSopenharmony_ci { 707370b324cSopenharmony_ci const UInt32 processed = posRes - pos; 708370b324cSopenharmony_ci pos = posRes; 709370b324cSopenharmony_ci p->hashBufPos += processed; 710370b324cSopenharmony_ci cyclicBufferPos += processed; 711370b324cSopenharmony_ci p->buffer += processed; 712370b324cSopenharmony_ci } 713370b324cSopenharmony_ci } 714370b324cSopenharmony_ci #endif 715370b324cSopenharmony_ci 716370b324cSopenharmony_ci { 717370b324cSopenharmony_ci const UInt32 processed = pos - p->pos; 718370b324cSopenharmony_ci numProcessed += processed; 719370b324cSopenharmony_ci p->hashNumAvail -= processed; 720370b324cSopenharmony_ci p->pos = pos; 721370b324cSopenharmony_ci } 722370b324cSopenharmony_ci if (cyclicBufferPos == p->cyclicBufferSize) 723370b324cSopenharmony_ci cyclicBufferPos = 0; 724370b324cSopenharmony_ci p->cyclicBufferPos = cyclicBufferPos; 725370b324cSopenharmony_ci } 726370b324cSopenharmony_ci } 727370b324cSopenharmony_ci 728370b324cSopenharmony_ci d[0] = curPos; 729370b324cSopenharmony_ci} 730370b324cSopenharmony_ci 731370b324cSopenharmony_ci 732370b324cSopenharmony_cistatic void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex) 733370b324cSopenharmony_ci{ 734370b324cSopenharmony_ci CMtSync *sync = &p->hashSync; 735370b324cSopenharmony_ci 736370b324cSopenharmony_ci BUFFER_MUST_BE_UNLOCKED(sync) 737370b324cSopenharmony_ci 738370b324cSopenharmony_ci if (!sync->needStart) 739370b324cSopenharmony_ci { 740370b324cSopenharmony_ci LOCK_BUFFER(sync) 741370b324cSopenharmony_ci } 742370b324cSopenharmony_ci 743370b324cSopenharmony_ci BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex)); 744370b324cSopenharmony_ci 745370b324cSopenharmony_ci /* We suppose that we have called GetNextBlock() from start. 746370b324cSopenharmony_ci So buffer is LOCKED */ 747370b324cSopenharmony_ci 748370b324cSopenharmony_ci UNLOCK_BUFFER(sync) 749370b324cSopenharmony_ci} 750370b324cSopenharmony_ci 751370b324cSopenharmony_ci 752370b324cSopenharmony_ciZ7_NO_INLINE 753370b324cSopenharmony_cistatic void BtThreadFunc(CMatchFinderMt *mt) 754370b324cSopenharmony_ci{ 755370b324cSopenharmony_ci CMtSync *p = &mt->btSync; 756370b324cSopenharmony_ci for (;;) 757370b324cSopenharmony_ci { 758370b324cSopenharmony_ci UInt32 blockIndex = 0; 759370b324cSopenharmony_ci Event_Wait(&p->canStart); 760370b324cSopenharmony_ci 761370b324cSopenharmony_ci for (;;) 762370b324cSopenharmony_ci { 763370b324cSopenharmony_ci PRF(printf(" BT thread block = %d pos = %d\n", (unsigned)blockIndex, mt->pos)); 764370b324cSopenharmony_ci /* (p->exit == true) is possible after (p->canStart) at first loop iteration 765370b324cSopenharmony_ci and is unexpected after more Wait(freeSemaphore) iterations */ 766370b324cSopenharmony_ci if (p->exit) 767370b324cSopenharmony_ci return; 768370b324cSopenharmony_ci 769370b324cSopenharmony_ci Semaphore_Wait(&p->freeSemaphore); 770370b324cSopenharmony_ci 771370b324cSopenharmony_ci // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore) 772370b324cSopenharmony_ci if (p->stopWriting) 773370b324cSopenharmony_ci break; 774370b324cSopenharmony_ci 775370b324cSopenharmony_ci BtFillBlock(mt, blockIndex++); 776370b324cSopenharmony_ci 777370b324cSopenharmony_ci Semaphore_Release1(&p->filledSemaphore); 778370b324cSopenharmony_ci } 779370b324cSopenharmony_ci 780370b324cSopenharmony_ci // we stop HASH_THREAD here 781370b324cSopenharmony_ci MtSync_StopWriting(&mt->hashSync); 782370b324cSopenharmony_ci 783370b324cSopenharmony_ci // p->numBlocks_Sent = blockIndex; 784370b324cSopenharmony_ci Event_Set(&p->wasStopped); 785370b324cSopenharmony_ci } 786370b324cSopenharmony_ci} 787370b324cSopenharmony_ci 788370b324cSopenharmony_ci 789370b324cSopenharmony_civoid MatchFinderMt_Construct(CMatchFinderMt *p) 790370b324cSopenharmony_ci{ 791370b324cSopenharmony_ci p->hashBuf = NULL; 792370b324cSopenharmony_ci MtSync_Construct(&p->hashSync); 793370b324cSopenharmony_ci MtSync_Construct(&p->btSync); 794370b324cSopenharmony_ci} 795370b324cSopenharmony_ci 796370b324cSopenharmony_cistatic void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc) 797370b324cSopenharmony_ci{ 798370b324cSopenharmony_ci ISzAlloc_Free(alloc, p->hashBuf); 799370b324cSopenharmony_ci p->hashBuf = NULL; 800370b324cSopenharmony_ci} 801370b324cSopenharmony_ci 802370b324cSopenharmony_civoid MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc) 803370b324cSopenharmony_ci{ 804370b324cSopenharmony_ci /* 805370b324cSopenharmony_ci HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs. 806370b324cSopenharmony_ci So we must be sure that HASH_THREAD will not use CriticalSection(s) 807370b324cSopenharmony_ci after deleting CriticalSection here. 808370b324cSopenharmony_ci 809370b324cSopenharmony_ci we call ReleaseStream(p) 810370b324cSopenharmony_ci that calls StopWriting(btSync) 811370b324cSopenharmony_ci that calls StopWriting(hashSync), if it's required to stop HASH_THREAD. 812370b324cSopenharmony_ci after StopWriting() it's safe to destruct MtSync(s) in any order */ 813370b324cSopenharmony_ci 814370b324cSopenharmony_ci MatchFinderMt_ReleaseStream(p); 815370b324cSopenharmony_ci 816370b324cSopenharmony_ci MtSync_Destruct(&p->btSync); 817370b324cSopenharmony_ci MtSync_Destruct(&p->hashSync); 818370b324cSopenharmony_ci 819370b324cSopenharmony_ci LOG_ITER( 820370b324cSopenharmony_ci printf("\nTree %9d * %7d iter = %9d = sum : bytes = %9d\n", 821370b324cSopenharmony_ci (UInt32)(g_NumIters_Tree / 1000), 822370b324cSopenharmony_ci (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)), 823370b324cSopenharmony_ci (UInt32)(g_NumIters_Loop / 1000), 824370b324cSopenharmony_ci (UInt32)(g_NumIters_Bytes / 1000) 825370b324cSopenharmony_ci )); 826370b324cSopenharmony_ci 827370b324cSopenharmony_ci MatchFinderMt_FreeMem(p, alloc); 828370b324cSopenharmony_ci} 829370b324cSopenharmony_ci 830370b324cSopenharmony_ci 831370b324cSopenharmony_ci#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) 832370b324cSopenharmony_ci#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) 833370b324cSopenharmony_ci 834370b324cSopenharmony_ci 835370b324cSopenharmony_cistatic THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p); return 0; } 836370b324cSopenharmony_cistatic THREAD_FUNC_DECL BtThreadFunc2(void *p) 837370b324cSopenharmony_ci{ 838370b324cSopenharmony_ci Byte allocaDummy[0x180]; 839370b324cSopenharmony_ci unsigned i = 0; 840370b324cSopenharmony_ci for (i = 0; i < 16; i++) 841370b324cSopenharmony_ci allocaDummy[i] = (Byte)0; 842370b324cSopenharmony_ci if (allocaDummy[0] == 0) 843370b324cSopenharmony_ci BtThreadFunc((CMatchFinderMt *)p); 844370b324cSopenharmony_ci return 0; 845370b324cSopenharmony_ci} 846370b324cSopenharmony_ci 847370b324cSopenharmony_ci 848370b324cSopenharmony_ciSRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore, 849370b324cSopenharmony_ci UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc) 850370b324cSopenharmony_ci{ 851370b324cSopenharmony_ci CMatchFinder *mf = MF(p); 852370b324cSopenharmony_ci p->historySize = historySize; 853370b324cSopenharmony_ci if (kMtBtBlockSize <= matchMaxLen * 4) 854370b324cSopenharmony_ci return SZ_ERROR_PARAM; 855370b324cSopenharmony_ci if (!p->hashBuf) 856370b324cSopenharmony_ci { 857370b324cSopenharmony_ci p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32)); 858370b324cSopenharmony_ci if (!p->hashBuf) 859370b324cSopenharmony_ci return SZ_ERROR_MEM; 860370b324cSopenharmony_ci p->btBuf = p->hashBuf + kHashBufferSize; 861370b324cSopenharmony_ci } 862370b324cSopenharmony_ci keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); 863370b324cSopenharmony_ci keepAddBufferAfter += kMtHashBlockSize; 864370b324cSopenharmony_ci if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) 865370b324cSopenharmony_ci return SZ_ERROR_MEM; 866370b324cSopenharmony_ci 867370b324cSopenharmony_ci RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p)) 868370b324cSopenharmony_ci RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p)) 869370b324cSopenharmony_ci return SZ_OK; 870370b324cSopenharmony_ci} 871370b324cSopenharmony_ci 872370b324cSopenharmony_ci 873370b324cSopenharmony_ciSRes MatchFinderMt_InitMt(CMatchFinderMt *p) 874370b324cSopenharmony_ci{ 875370b324cSopenharmony_ci RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks)) 876370b324cSopenharmony_ci return MtSync_Init(&p->btSync, kMtBtNumBlocks); 877370b324cSopenharmony_ci} 878370b324cSopenharmony_ci 879370b324cSopenharmony_ci 880370b324cSopenharmony_cistatic void MatchFinderMt_Init(CMatchFinderMt *p) 881370b324cSopenharmony_ci{ 882370b324cSopenharmony_ci CMatchFinder *mf = MF(p); 883370b324cSopenharmony_ci 884370b324cSopenharmony_ci p->btBufPos = 885370b324cSopenharmony_ci p->btBufPosLimit = NULL; 886370b324cSopenharmony_ci p->hashBufPos = 887370b324cSopenharmony_ci p->hashBufPosLimit = 0; 888370b324cSopenharmony_ci p->hashNumAvail = 0; // 21.03 889370b324cSopenharmony_ci 890370b324cSopenharmony_ci p->failure_BT = False; 891370b324cSopenharmony_ci 892370b324cSopenharmony_ci /* Init without data reading. We don't want to read data in this thread */ 893370b324cSopenharmony_ci MatchFinder_Init_4(mf); 894370b324cSopenharmony_ci 895370b324cSopenharmony_ci MatchFinder_Init_LowHash(mf); 896370b324cSopenharmony_ci 897370b324cSopenharmony_ci p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf); 898370b324cSopenharmony_ci p->btNumAvailBytes = 0; 899370b324cSopenharmony_ci p->failure_LZ_BT = False; 900370b324cSopenharmony_ci // p->failure_LZ_LZ = False; 901370b324cSopenharmony_ci 902370b324cSopenharmony_ci p->lzPos = 903370b324cSopenharmony_ci 1; // optimal smallest value 904370b324cSopenharmony_ci // 0; // for debug: ignores match to start 905370b324cSopenharmony_ci // kNormalizeAlign; // for debug 906370b324cSopenharmony_ci 907370b324cSopenharmony_ci p->hash = mf->hash; 908370b324cSopenharmony_ci p->fixedHashSize = mf->fixedHashSize; 909370b324cSopenharmony_ci // p->hash4Mask = mf->hash4Mask; 910370b324cSopenharmony_ci p->crc = mf->crc; 911370b324cSopenharmony_ci // memcpy(p->crc, mf->crc, sizeof(mf->crc)); 912370b324cSopenharmony_ci 913370b324cSopenharmony_ci p->son = mf->son; 914370b324cSopenharmony_ci p->matchMaxLen = mf->matchMaxLen; 915370b324cSopenharmony_ci p->numHashBytes = mf->numHashBytes; 916370b324cSopenharmony_ci 917370b324cSopenharmony_ci /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */ 918370b324cSopenharmony_ci // mf->streamPos = mf->pos = 1; // optimal smallest value 919370b324cSopenharmony_ci // 0; // for debug: ignores match to start 920370b324cSopenharmony_ci // kNormalizeAlign; // for debug 921370b324cSopenharmony_ci 922370b324cSopenharmony_ci /* we must init (p->pos = mf->pos) for BT, because 923370b324cSopenharmony_ci BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */ 924370b324cSopenharmony_ci p->pos = mf->pos; // do not change it 925370b324cSopenharmony_ci 926370b324cSopenharmony_ci p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET); 927370b324cSopenharmony_ci p->cyclicBufferSize = mf->cyclicBufferSize; 928370b324cSopenharmony_ci p->buffer = mf->buffer; 929370b324cSopenharmony_ci p->cutValue = mf->cutValue; 930370b324cSopenharmony_ci // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses. 931370b324cSopenharmony_ci} 932370b324cSopenharmony_ci 933370b324cSopenharmony_ci 934370b324cSopenharmony_ci/* ReleaseStream is required to finish multithreading */ 935370b324cSopenharmony_civoid MatchFinderMt_ReleaseStream(CMatchFinderMt *p) 936370b324cSopenharmony_ci{ 937370b324cSopenharmony_ci // Sleep(1); // for debug 938370b324cSopenharmony_ci MtSync_StopWriting(&p->btSync); 939370b324cSopenharmony_ci // Sleep(200); // for debug 940370b324cSopenharmony_ci /* p->MatchFinder->ReleaseStream(); */ 941370b324cSopenharmony_ci} 942370b324cSopenharmony_ci 943370b324cSopenharmony_ci 944370b324cSopenharmony_ciZ7_NO_INLINE 945370b324cSopenharmony_cistatic UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p) 946370b324cSopenharmony_ci{ 947370b324cSopenharmony_ci if (p->failure_LZ_BT) 948370b324cSopenharmony_ci p->btBufPos = p->failureBuf; 949370b324cSopenharmony_ci else 950370b324cSopenharmony_ci { 951370b324cSopenharmony_ci const UInt32 bi = MtSync_GetNextBlock(&p->btSync); 952370b324cSopenharmony_ci const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi); 953370b324cSopenharmony_ci { 954370b324cSopenharmony_ci const UInt32 numItems = bt[0]; 955370b324cSopenharmony_ci p->btBufPosLimit = bt + numItems; 956370b324cSopenharmony_ci p->btNumAvailBytes = bt[1]; 957370b324cSopenharmony_ci p->btBufPos = bt + 2; 958370b324cSopenharmony_ci if (numItems < 2 || numItems > kMtBtBlockSize) 959370b324cSopenharmony_ci { 960370b324cSopenharmony_ci p->failureBuf[0] = 0; 961370b324cSopenharmony_ci p->btBufPos = p->failureBuf; 962370b324cSopenharmony_ci p->btBufPosLimit = p->failureBuf + 1; 963370b324cSopenharmony_ci p->failure_LZ_BT = True; 964370b324cSopenharmony_ci // p->btNumAvailBytes = 0; 965370b324cSopenharmony_ci /* we don't want to decrease AvailBytes, that was load before. 966370b324cSopenharmony_ci that can be unxepected for the code that have loaded anopther value before */ 967370b324cSopenharmony_ci } 968370b324cSopenharmony_ci } 969370b324cSopenharmony_ci 970370b324cSopenharmony_ci if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize) 971370b324cSopenharmony_ci { 972370b324cSopenharmony_ci /* we don't check (lzPos) over exact avail bytes in (btBuf). 973370b324cSopenharmony_ci (fixedHashSize) is small, so normalization is fast */ 974370b324cSopenharmony_ci const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1); 975370b324cSopenharmony_ci p->lzPos -= subValue; 976370b324cSopenharmony_ci MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize); 977370b324cSopenharmony_ci } 978370b324cSopenharmony_ci } 979370b324cSopenharmony_ci return p->btNumAvailBytes; 980370b324cSopenharmony_ci} 981370b324cSopenharmony_ci 982370b324cSopenharmony_ci 983370b324cSopenharmony_ci 984370b324cSopenharmony_cistatic const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p) 985370b324cSopenharmony_ci{ 986370b324cSopenharmony_ci return p->pointerToCurPos; 987370b324cSopenharmony_ci} 988370b324cSopenharmony_ci 989370b324cSopenharmony_ci 990370b324cSopenharmony_ci#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p); 991370b324cSopenharmony_ci 992370b324cSopenharmony_ci 993370b324cSopenharmony_cistatic UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p) 994370b324cSopenharmony_ci{ 995370b324cSopenharmony_ci if (p->btBufPos != p->btBufPosLimit) 996370b324cSopenharmony_ci return p->btNumAvailBytes; 997370b324cSopenharmony_ci return MatchFinderMt_GetNextBlock_Bt(p); 998370b324cSopenharmony_ci} 999370b324cSopenharmony_ci 1000370b324cSopenharmony_ci 1001370b324cSopenharmony_ci// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True; return d; } 1002370b324cSopenharmony_ci#define CHECK_FAILURE_LZ(_match_, _pos_) 1003370b324cSopenharmony_ci 1004370b324cSopenharmony_cistatic UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) 1005370b324cSopenharmony_ci{ 1006370b324cSopenharmony_ci UInt32 h2, c2; 1007370b324cSopenharmony_ci UInt32 *hash = p->hash; 1008370b324cSopenharmony_ci const Byte *cur = p->pointerToCurPos; 1009370b324cSopenharmony_ci const UInt32 m = p->lzPos; 1010370b324cSopenharmony_ci MT_HASH2_CALC 1011370b324cSopenharmony_ci 1012370b324cSopenharmony_ci c2 = hash[h2]; 1013370b324cSopenharmony_ci hash[h2] = m; 1014370b324cSopenharmony_ci 1015370b324cSopenharmony_ci if (c2 >= matchMinPos) 1016370b324cSopenharmony_ci { 1017370b324cSopenharmony_ci CHECK_FAILURE_LZ(c2, m) 1018370b324cSopenharmony_ci if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) 1019370b324cSopenharmony_ci { 1020370b324cSopenharmony_ci *d++ = 2; 1021370b324cSopenharmony_ci *d++ = m - c2 - 1; 1022370b324cSopenharmony_ci } 1023370b324cSopenharmony_ci } 1024370b324cSopenharmony_ci 1025370b324cSopenharmony_ci return d; 1026370b324cSopenharmony_ci} 1027370b324cSopenharmony_ci 1028370b324cSopenharmony_cistatic UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) 1029370b324cSopenharmony_ci{ 1030370b324cSopenharmony_ci UInt32 h2, h3, c2, c3; 1031370b324cSopenharmony_ci UInt32 *hash = p->hash; 1032370b324cSopenharmony_ci const Byte *cur = p->pointerToCurPos; 1033370b324cSopenharmony_ci const UInt32 m = p->lzPos; 1034370b324cSopenharmony_ci MT_HASH3_CALC 1035370b324cSopenharmony_ci 1036370b324cSopenharmony_ci c2 = hash[h2]; 1037370b324cSopenharmony_ci c3 = (hash + kFix3HashSize)[h3]; 1038370b324cSopenharmony_ci 1039370b324cSopenharmony_ci hash[h2] = m; 1040370b324cSopenharmony_ci (hash + kFix3HashSize)[h3] = m; 1041370b324cSopenharmony_ci 1042370b324cSopenharmony_ci if (c2 >= matchMinPos) 1043370b324cSopenharmony_ci { 1044370b324cSopenharmony_ci CHECK_FAILURE_LZ(c2, m) 1045370b324cSopenharmony_ci if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) 1046370b324cSopenharmony_ci { 1047370b324cSopenharmony_ci d[1] = m - c2 - 1; 1048370b324cSopenharmony_ci if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) 1049370b324cSopenharmony_ci { 1050370b324cSopenharmony_ci d[0] = 3; 1051370b324cSopenharmony_ci return d + 2; 1052370b324cSopenharmony_ci } 1053370b324cSopenharmony_ci d[0] = 2; 1054370b324cSopenharmony_ci d += 2; 1055370b324cSopenharmony_ci } 1056370b324cSopenharmony_ci } 1057370b324cSopenharmony_ci 1058370b324cSopenharmony_ci if (c3 >= matchMinPos) 1059370b324cSopenharmony_ci { 1060370b324cSopenharmony_ci CHECK_FAILURE_LZ(c3, m) 1061370b324cSopenharmony_ci if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) 1062370b324cSopenharmony_ci { 1063370b324cSopenharmony_ci *d++ = 3; 1064370b324cSopenharmony_ci *d++ = m - c3 - 1; 1065370b324cSopenharmony_ci } 1066370b324cSopenharmony_ci } 1067370b324cSopenharmony_ci 1068370b324cSopenharmony_ci return d; 1069370b324cSopenharmony_ci} 1070370b324cSopenharmony_ci 1071370b324cSopenharmony_ci 1072370b324cSopenharmony_ci#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++; 1073370b324cSopenharmony_ci 1074370b324cSopenharmony_ci/* 1075370b324cSopenharmony_cistatic 1076370b324cSopenharmony_ciUInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d) 1077370b324cSopenharmony_ci{ 1078370b324cSopenharmony_ci const UInt32 *bt = p->btBufPos; 1079370b324cSopenharmony_ci const UInt32 len = *bt++; 1080370b324cSopenharmony_ci const UInt32 *btLim = bt + len; 1081370b324cSopenharmony_ci UInt32 matchMinPos; 1082370b324cSopenharmony_ci UInt32 avail = p->btNumAvailBytes - 1; 1083370b324cSopenharmony_ci p->btBufPos = btLim; 1084370b324cSopenharmony_ci 1085370b324cSopenharmony_ci { 1086370b324cSopenharmony_ci p->btNumAvailBytes = avail; 1087370b324cSopenharmony_ci 1088370b324cSopenharmony_ci #define BT_HASH_BYTES_MAX 5 1089370b324cSopenharmony_ci 1090370b324cSopenharmony_ci matchMinPos = p->lzPos; 1091370b324cSopenharmony_ci 1092370b324cSopenharmony_ci if (len != 0) 1093370b324cSopenharmony_ci matchMinPos -= bt[1]; 1094370b324cSopenharmony_ci else if (avail < (BT_HASH_BYTES_MAX - 1) - 1) 1095370b324cSopenharmony_ci { 1096370b324cSopenharmony_ci INCREASE_LZ_POS 1097370b324cSopenharmony_ci return d; 1098370b324cSopenharmony_ci } 1099370b324cSopenharmony_ci else 1100370b324cSopenharmony_ci { 1101370b324cSopenharmony_ci const UInt32 hs = p->historySize; 1102370b324cSopenharmony_ci if (matchMinPos > hs) 1103370b324cSopenharmony_ci matchMinPos -= hs; 1104370b324cSopenharmony_ci else 1105370b324cSopenharmony_ci matchMinPos = 1; 1106370b324cSopenharmony_ci } 1107370b324cSopenharmony_ci } 1108370b324cSopenharmony_ci 1109370b324cSopenharmony_ci for (;;) 1110370b324cSopenharmony_ci { 1111370b324cSopenharmony_ci 1112370b324cSopenharmony_ci UInt32 h2, h3, c2, c3; 1113370b324cSopenharmony_ci UInt32 *hash = p->hash; 1114370b324cSopenharmony_ci const Byte *cur = p->pointerToCurPos; 1115370b324cSopenharmony_ci UInt32 m = p->lzPos; 1116370b324cSopenharmony_ci MT_HASH3_CALC 1117370b324cSopenharmony_ci 1118370b324cSopenharmony_ci c2 = hash[h2]; 1119370b324cSopenharmony_ci c3 = (hash + kFix3HashSize)[h3]; 1120370b324cSopenharmony_ci 1121370b324cSopenharmony_ci hash[h2] = m; 1122370b324cSopenharmony_ci (hash + kFix3HashSize)[h3] = m; 1123370b324cSopenharmony_ci 1124370b324cSopenharmony_ci if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) 1125370b324cSopenharmony_ci { 1126370b324cSopenharmony_ci d[1] = m - c2 - 1; 1127370b324cSopenharmony_ci if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) 1128370b324cSopenharmony_ci { 1129370b324cSopenharmony_ci d[0] = 3; 1130370b324cSopenharmony_ci d += 2; 1131370b324cSopenharmony_ci break; 1132370b324cSopenharmony_ci } 1133370b324cSopenharmony_ci // else 1134370b324cSopenharmony_ci { 1135370b324cSopenharmony_ci d[0] = 2; 1136370b324cSopenharmony_ci d += 2; 1137370b324cSopenharmony_ci } 1138370b324cSopenharmony_ci } 1139370b324cSopenharmony_ci if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) 1140370b324cSopenharmony_ci { 1141370b324cSopenharmony_ci *d++ = 3; 1142370b324cSopenharmony_ci *d++ = m - c3 - 1; 1143370b324cSopenharmony_ci } 1144370b324cSopenharmony_ci break; 1145370b324cSopenharmony_ci } 1146370b324cSopenharmony_ci 1147370b324cSopenharmony_ci if (len != 0) 1148370b324cSopenharmony_ci { 1149370b324cSopenharmony_ci do 1150370b324cSopenharmony_ci { 1151370b324cSopenharmony_ci const UInt32 v0 = bt[0]; 1152370b324cSopenharmony_ci const UInt32 v1 = bt[1]; 1153370b324cSopenharmony_ci bt += 2; 1154370b324cSopenharmony_ci d[0] = v0; 1155370b324cSopenharmony_ci d[1] = v1; 1156370b324cSopenharmony_ci d += 2; 1157370b324cSopenharmony_ci } 1158370b324cSopenharmony_ci while (bt != btLim); 1159370b324cSopenharmony_ci } 1160370b324cSopenharmony_ci INCREASE_LZ_POS 1161370b324cSopenharmony_ci return d; 1162370b324cSopenharmony_ci} 1163370b324cSopenharmony_ci*/ 1164370b324cSopenharmony_ci 1165370b324cSopenharmony_ci 1166370b324cSopenharmony_cistatic UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d) 1167370b324cSopenharmony_ci{ 1168370b324cSopenharmony_ci UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */; 1169370b324cSopenharmony_ci UInt32 *hash = p->hash; 1170370b324cSopenharmony_ci const Byte *cur = p->pointerToCurPos; 1171370b324cSopenharmony_ci const UInt32 m = p->lzPos; 1172370b324cSopenharmony_ci MT_HASH3_CALC 1173370b324cSopenharmony_ci // MT_HASH4_CALC 1174370b324cSopenharmony_ci c2 = hash[h2]; 1175370b324cSopenharmony_ci c3 = (hash + kFix3HashSize)[h3]; 1176370b324cSopenharmony_ci // c4 = (hash + kFix4HashSize)[h4]; 1177370b324cSopenharmony_ci 1178370b324cSopenharmony_ci hash[h2] = m; 1179370b324cSopenharmony_ci (hash + kFix3HashSize)[h3] = m; 1180370b324cSopenharmony_ci // (hash + kFix4HashSize)[h4] = m; 1181370b324cSopenharmony_ci 1182370b324cSopenharmony_ci // #define BT5_USE_H2 1183370b324cSopenharmony_ci // #ifdef BT5_USE_H2 1184370b324cSopenharmony_ci if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0]) 1185370b324cSopenharmony_ci { 1186370b324cSopenharmony_ci d[1] = m - c2 - 1; 1187370b324cSopenharmony_ci if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2]) 1188370b324cSopenharmony_ci { 1189370b324cSopenharmony_ci // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3; 1190370b324cSopenharmony_ci // return d + 2; 1191370b324cSopenharmony_ci 1192370b324cSopenharmony_ci if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) 1193370b324cSopenharmony_ci { 1194370b324cSopenharmony_ci d[0] = 4; 1195370b324cSopenharmony_ci return d + 2; 1196370b324cSopenharmony_ci } 1197370b324cSopenharmony_ci d[0] = 3; 1198370b324cSopenharmony_ci d += 2; 1199370b324cSopenharmony_ci 1200370b324cSopenharmony_ci #ifdef BT5_USE_H4 1201370b324cSopenharmony_ci if (c4 >= matchMinPos) 1202370b324cSopenharmony_ci if ( 1203370b324cSopenharmony_ci cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && 1204370b324cSopenharmony_ci cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] 1205370b324cSopenharmony_ci ) 1206370b324cSopenharmony_ci { 1207370b324cSopenharmony_ci *d++ = 4; 1208370b324cSopenharmony_ci *d++ = m - c4 - 1; 1209370b324cSopenharmony_ci } 1210370b324cSopenharmony_ci #endif 1211370b324cSopenharmony_ci return d; 1212370b324cSopenharmony_ci } 1213370b324cSopenharmony_ci d[0] = 2; 1214370b324cSopenharmony_ci d += 2; 1215370b324cSopenharmony_ci } 1216370b324cSopenharmony_ci // #endif 1217370b324cSopenharmony_ci 1218370b324cSopenharmony_ci if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0]) 1219370b324cSopenharmony_ci { 1220370b324cSopenharmony_ci d[1] = m - c3 - 1; 1221370b324cSopenharmony_ci if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3]) 1222370b324cSopenharmony_ci { 1223370b324cSopenharmony_ci d[0] = 4; 1224370b324cSopenharmony_ci return d + 2; 1225370b324cSopenharmony_ci } 1226370b324cSopenharmony_ci d[0] = 3; 1227370b324cSopenharmony_ci d += 2; 1228370b324cSopenharmony_ci } 1229370b324cSopenharmony_ci 1230370b324cSopenharmony_ci #ifdef BT5_USE_H4 1231370b324cSopenharmony_ci if (c4 >= matchMinPos) 1232370b324cSopenharmony_ci if ( 1233370b324cSopenharmony_ci cur[(ptrdiff_t)c4 - (ptrdiff_t)m] == cur[0] && 1234370b324cSopenharmony_ci cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3] 1235370b324cSopenharmony_ci ) 1236370b324cSopenharmony_ci { 1237370b324cSopenharmony_ci *d++ = 4; 1238370b324cSopenharmony_ci *d++ = m - c4 - 1; 1239370b324cSopenharmony_ci } 1240370b324cSopenharmony_ci #endif 1241370b324cSopenharmony_ci 1242370b324cSopenharmony_ci return d; 1243370b324cSopenharmony_ci} 1244370b324cSopenharmony_ci 1245370b324cSopenharmony_ci 1246370b324cSopenharmony_cistatic UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d) 1247370b324cSopenharmony_ci{ 1248370b324cSopenharmony_ci const UInt32 *bt = p->btBufPos; 1249370b324cSopenharmony_ci const UInt32 len = *bt++; 1250370b324cSopenharmony_ci const UInt32 *btLim = bt + len; 1251370b324cSopenharmony_ci p->btBufPos = btLim; 1252370b324cSopenharmony_ci p->btNumAvailBytes--; 1253370b324cSopenharmony_ci INCREASE_LZ_POS 1254370b324cSopenharmony_ci { 1255370b324cSopenharmony_ci while (bt != btLim) 1256370b324cSopenharmony_ci { 1257370b324cSopenharmony_ci const UInt32 v0 = bt[0]; 1258370b324cSopenharmony_ci const UInt32 v1 = bt[1]; 1259370b324cSopenharmony_ci bt += 2; 1260370b324cSopenharmony_ci d[0] = v0; 1261370b324cSopenharmony_ci d[1] = v1; 1262370b324cSopenharmony_ci d += 2; 1263370b324cSopenharmony_ci } 1264370b324cSopenharmony_ci } 1265370b324cSopenharmony_ci return d; 1266370b324cSopenharmony_ci} 1267370b324cSopenharmony_ci 1268370b324cSopenharmony_ci 1269370b324cSopenharmony_ci 1270370b324cSopenharmony_cistatic UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d) 1271370b324cSopenharmony_ci{ 1272370b324cSopenharmony_ci const UInt32 *bt = p->btBufPos; 1273370b324cSopenharmony_ci UInt32 len = *bt++; 1274370b324cSopenharmony_ci const UInt32 avail = p->btNumAvailBytes - 1; 1275370b324cSopenharmony_ci p->btNumAvailBytes = avail; 1276370b324cSopenharmony_ci p->btBufPos = bt + len; 1277370b324cSopenharmony_ci if (len == 0) 1278370b324cSopenharmony_ci { 1279370b324cSopenharmony_ci #define BT_HASH_BYTES_MAX 5 1280370b324cSopenharmony_ci if (avail >= (BT_HASH_BYTES_MAX - 1) - 1) 1281370b324cSopenharmony_ci { 1282370b324cSopenharmony_ci UInt32 m = p->lzPos; 1283370b324cSopenharmony_ci if (m > p->historySize) 1284370b324cSopenharmony_ci m -= p->historySize; 1285370b324cSopenharmony_ci else 1286370b324cSopenharmony_ci m = 1; 1287370b324cSopenharmony_ci d = p->MixMatchesFunc(p, m, d); 1288370b324cSopenharmony_ci } 1289370b324cSopenharmony_ci } 1290370b324cSopenharmony_ci else 1291370b324cSopenharmony_ci { 1292370b324cSopenharmony_ci /* 1293370b324cSopenharmony_ci first match pair from BinTree: (match_len, match_dist), 1294370b324cSopenharmony_ci (match_len >= numHashBytes). 1295370b324cSopenharmony_ci MixMatchesFunc() inserts only hash matches that are nearer than (match_dist) 1296370b324cSopenharmony_ci */ 1297370b324cSopenharmony_ci d = p->MixMatchesFunc(p, p->lzPos - bt[1], d); 1298370b324cSopenharmony_ci // if (d) // check for failure 1299370b324cSopenharmony_ci do 1300370b324cSopenharmony_ci { 1301370b324cSopenharmony_ci const UInt32 v0 = bt[0]; 1302370b324cSopenharmony_ci const UInt32 v1 = bt[1]; 1303370b324cSopenharmony_ci bt += 2; 1304370b324cSopenharmony_ci d[0] = v0; 1305370b324cSopenharmony_ci d[1] = v1; 1306370b324cSopenharmony_ci d += 2; 1307370b324cSopenharmony_ci } 1308370b324cSopenharmony_ci while (len -= 2); 1309370b324cSopenharmony_ci } 1310370b324cSopenharmony_ci INCREASE_LZ_POS 1311370b324cSopenharmony_ci return d; 1312370b324cSopenharmony_ci} 1313370b324cSopenharmony_ci 1314370b324cSopenharmony_ci#define SKIP_HEADER2_MT do { GET_NEXT_BLOCK_IF_REQUIRED 1315370b324cSopenharmony_ci#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash; 1316370b324cSopenharmony_ci#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0); 1317370b324cSopenharmony_ci 1318370b324cSopenharmony_cistatic void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num) 1319370b324cSopenharmony_ci{ 1320370b324cSopenharmony_ci SKIP_HEADER2_MT { p->btNumAvailBytes--; 1321370b324cSopenharmony_ci SKIP_FOOTER_MT 1322370b324cSopenharmony_ci} 1323370b324cSopenharmony_ci 1324370b324cSopenharmony_cistatic void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num) 1325370b324cSopenharmony_ci{ 1326370b324cSopenharmony_ci SKIP_HEADER_MT(2) 1327370b324cSopenharmony_ci UInt32 h2; 1328370b324cSopenharmony_ci MT_HASH2_CALC 1329370b324cSopenharmony_ci hash[h2] = p->lzPos; 1330370b324cSopenharmony_ci SKIP_FOOTER_MT 1331370b324cSopenharmony_ci} 1332370b324cSopenharmony_ci 1333370b324cSopenharmony_cistatic void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num) 1334370b324cSopenharmony_ci{ 1335370b324cSopenharmony_ci SKIP_HEADER_MT(3) 1336370b324cSopenharmony_ci UInt32 h2, h3; 1337370b324cSopenharmony_ci MT_HASH3_CALC 1338370b324cSopenharmony_ci (hash + kFix3HashSize)[h3] = 1339370b324cSopenharmony_ci hash[ h2] = 1340370b324cSopenharmony_ci p->lzPos; 1341370b324cSopenharmony_ci SKIP_FOOTER_MT 1342370b324cSopenharmony_ci} 1343370b324cSopenharmony_ci 1344370b324cSopenharmony_ci/* 1345370b324cSopenharmony_ci// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip(). 1346370b324cSopenharmony_ci// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream. 1347370b324cSopenharmony_ci 1348370b324cSopenharmony_cistatic void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) 1349370b324cSopenharmony_ci{ 1350370b324cSopenharmony_ci SKIP_HEADER_MT(4) 1351370b324cSopenharmony_ci UInt32 h2, h3; // h4 1352370b324cSopenharmony_ci MT_HASH3_CALC 1353370b324cSopenharmony_ci // MT_HASH4_CALC 1354370b324cSopenharmony_ci // (hash + kFix4HashSize)[h4] = 1355370b324cSopenharmony_ci (hash + kFix3HashSize)[h3] = 1356370b324cSopenharmony_ci hash[ h2] = 1357370b324cSopenharmony_ci p->lzPos; 1358370b324cSopenharmony_ci SKIP_FOOTER_MT 1359370b324cSopenharmony_ci} 1360370b324cSopenharmony_ci*/ 1361370b324cSopenharmony_ci 1362370b324cSopenharmony_civoid MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable) 1363370b324cSopenharmony_ci{ 1364370b324cSopenharmony_ci vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; 1365370b324cSopenharmony_ci vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; 1366370b324cSopenharmony_ci vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; 1367370b324cSopenharmony_ci vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; 1368370b324cSopenharmony_ci 1369370b324cSopenharmony_ci switch (MF(p)->numHashBytes) 1370370b324cSopenharmony_ci { 1371370b324cSopenharmony_ci case 2: 1372370b324cSopenharmony_ci p->GetHeadsFunc = GetHeads2; 1373370b324cSopenharmony_ci p->MixMatchesFunc = (Mf_Mix_Matches)NULL; 1374370b324cSopenharmony_ci vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; 1375370b324cSopenharmony_ci vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; 1376370b324cSopenharmony_ci break; 1377370b324cSopenharmony_ci case 3: 1378370b324cSopenharmony_ci p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3; 1379370b324cSopenharmony_ci p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; 1380370b324cSopenharmony_ci vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; 1381370b324cSopenharmony_ci break; 1382370b324cSopenharmony_ci case 4: 1383370b324cSopenharmony_ci p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4; 1384370b324cSopenharmony_ci 1385370b324cSopenharmony_ci // it's fast inline version of GetMatches() 1386370b324cSopenharmony_ci // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4; 1387370b324cSopenharmony_ci 1388370b324cSopenharmony_ci p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; 1389370b324cSopenharmony_ci vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; 1390370b324cSopenharmony_ci break; 1391370b324cSopenharmony_ci default: 1392370b324cSopenharmony_ci p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5; 1393370b324cSopenharmony_ci p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; 1394370b324cSopenharmony_ci vTable->Skip = 1395370b324cSopenharmony_ci (Mf_Skip_Func)MatchFinderMt3_Skip; 1396370b324cSopenharmony_ci // (Mf_Skip_Func)MatchFinderMt4_Skip; 1397370b324cSopenharmony_ci break; 1398370b324cSopenharmony_ci } 1399370b324cSopenharmony_ci} 1400370b324cSopenharmony_ci 1401370b324cSopenharmony_ci#undef RINOK_THREAD 1402370b324cSopenharmony_ci#undef PRF 1403370b324cSopenharmony_ci#undef MF 1404370b324cSopenharmony_ci#undef GetUi24hi_from32 1405370b324cSopenharmony_ci#undef LOCK_BUFFER 1406370b324cSopenharmony_ci#undef UNLOCK_BUFFER 1407