xref: /third_party/lzma/C/LzFindMt.c (revision 370b324c)
1/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
22023-04-02 : Igor Pavlov : Public domain */
3
4#include "Precomp.h"
5
6// #include <stdio.h>
7
8#include "CpuArch.h"
9
10#include "LzHash.h"
11#include "LzFindMt.h"
12
13// #define LOG_ITERS
14
15// #define LOG_THREAD
16
17#ifdef LOG_THREAD
18#include <stdio.h>
19#define PRF(x) x
20#else
21#define PRF(x)
22#endif
23
24#ifdef LOG_ITERS
25#include <stdio.h>
26extern UInt64 g_NumIters_Tree;
27extern UInt64 g_NumIters_Loop;
28extern UInt64 g_NumIters_Bytes;
29#define LOG_ITER(x) x
30#else
31#define LOG_ITER(x)
32#endif
33
34#define kMtHashBlockSize ((UInt32)1 << 17)
35#define kMtHashNumBlocks (1 << 1)
36
37#define GET_HASH_BLOCK_OFFSET(i)  (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
38
39#define kMtBtBlockSize ((UInt32)1 << 16)
40#define kMtBtNumBlocks (1 << 4)
41
42#define GET_BT_BLOCK_OFFSET(i)  (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
43
44/*
45  HASH functions:
46  We use raw 8/16 bits from a[1] and a[2],
47  xored with crc(a[0]) and crc(a[3]).
48  We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
49  our crc() function provides one-to-one correspondence for low 8-bit values:
50    (crc[0...0xFF] & 0xFF) <-> [0...0xFF]
51*/
52
53#define MF(mt) ((mt)->MatchFinder)
54#define MF_CRC (p->crc)
55
56// #define MF(mt) (&(mt)->MatchFinder)
57// #define MF_CRC (p->MatchFinder.crc)
58
59#define MT_HASH2_CALC \
60  h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
61
62#define MT_HASH3_CALC { \
63  UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
64  h2 = temp & (kHash2Size - 1); \
65  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
66
67/*
68#define MT_HASH3_CALC__NO_2 { \
69  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
70  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
71
72#define MT_HASH4_CALC { \
73  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
74  h2 = temp & (kHash2Size - 1); \
75  temp ^= ((UInt32)cur[2] << 8); \
76  h3 = temp & (kHash3Size - 1); \
77  h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
78  // (kHash4Size - 1);
79*/
80
81
82Z7_NO_INLINE
83static void MtSync_Construct(CMtSync *p)
84{
85  p->affinity = 0;
86  p->wasCreated = False;
87  p->csWasInitialized = False;
88  p->csWasEntered = False;
89  Thread_CONSTRUCT(&p->thread)
90  Event_Construct(&p->canStart);
91  Event_Construct(&p->wasStopped);
92  Semaphore_Construct(&p->freeSemaphore);
93  Semaphore_Construct(&p->filledSemaphore);
94}
95
96
97#define DEBUG_BUFFER_LOCK   // define it to debug lock state
98
99#ifdef DEBUG_BUFFER_LOCK
100#include <stdlib.h>
101#define BUFFER_MUST_BE_LOCKED(p)    if (!(p)->csWasEntered) exit(1);
102#define BUFFER_MUST_BE_UNLOCKED(p)  if ( (p)->csWasEntered) exit(1);
103#else
104#define BUFFER_MUST_BE_LOCKED(p)
105#define BUFFER_MUST_BE_UNLOCKED(p)
106#endif
107
108#define LOCK_BUFFER(p) { \
109    BUFFER_MUST_BE_UNLOCKED(p); \
110    CriticalSection_Enter(&(p)->cs); \
111    (p)->csWasEntered = True; }
112
113#define UNLOCK_BUFFER(p) { \
114    BUFFER_MUST_BE_LOCKED(p); \
115    CriticalSection_Leave(&(p)->cs); \
116    (p)->csWasEntered = False; }
117
118
119Z7_NO_INLINE
120static UInt32 MtSync_GetNextBlock(CMtSync *p)
121{
122  UInt32 numBlocks = 0;
123  if (p->needStart)
124  {
125    BUFFER_MUST_BE_UNLOCKED(p)
126    p->numProcessedBlocks = 1;
127    p->needStart = False;
128    p->stopWriting = False;
129    p->exit = False;
130    Event_Reset(&p->wasStopped);
131    Event_Set(&p->canStart);
132  }
133  else
134  {
135    UNLOCK_BUFFER(p)
136    // we free current block
137    numBlocks = p->numProcessedBlocks++;
138    Semaphore_Release1(&p->freeSemaphore);
139  }
140
141  // buffer is UNLOCKED here
142  Semaphore_Wait(&p->filledSemaphore);
143  LOCK_BUFFER(p)
144  return numBlocks;
145}
146
147
148/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
149
150Z7_NO_INLINE
151static void MtSync_StopWriting(CMtSync *p)
152{
153  if (!Thread_WasCreated(&p->thread) || p->needStart)
154    return;
155
156    PRF(printf("\nMtSync_StopWriting %p\n", p));
157
158  if (p->csWasEntered)
159  {
160    /* we don't use buffer in this thread after StopWriting().
161       So we UNLOCK buffer.
162       And we restore default UNLOCKED state for stopped thread */
163    UNLOCK_BUFFER(p)
164  }
165
166  /* We send (p->stopWriting) message and release freeSemaphore
167     to free current block.
168     So the thread will see (p->stopWriting) at some
169     iteration after Wait(freeSemaphore).
170     The thread doesn't need to fill all avail free blocks,
171     so we can get fast thread stop.
172  */
173
174  p->stopWriting = True;
175  Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
176
177    PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
178  Event_Wait(&p->wasStopped);
179    PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
180
181  /* 21.03 : we don't restore samaphore counters here.
182     We will recreate and reinit samaphores in next start */
183
184  p->needStart = True;
185}
186
187
188Z7_NO_INLINE
189static void MtSync_Destruct(CMtSync *p)
190{
191    PRF(printf("\nMtSync_Destruct %p\n", p));
192
193  if (Thread_WasCreated(&p->thread))
194  {
195    /* we want thread to be in Stopped state before sending EXIT command.
196       note: stop(btSync) will stop (htSync) also */
197    MtSync_StopWriting(p);
198    /* thread in Stopped state here : (p->needStart == true) */
199    p->exit = True;
200    // if (p->needStart)  // it's (true)
201    Event_Set(&p->canStart);  // we send EXIT command to thread
202    Thread_Wait_Close(&p->thread);  // we wait thread finishing
203  }
204
205  if (p->csWasInitialized)
206  {
207    CriticalSection_Delete(&p->cs);
208    p->csWasInitialized = False;
209  }
210  p->csWasEntered = False;
211
212  Event_Close(&p->canStart);
213  Event_Close(&p->wasStopped);
214  Semaphore_Close(&p->freeSemaphore);
215  Semaphore_Close(&p->filledSemaphore);
216
217  p->wasCreated = False;
218}
219
220
221// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
222// we want to get real system error codes here instead of SZ_ERROR_THREAD
223#define RINOK_THREAD(x)  RINOK_WRes(x)
224
225
226// call it before each new file (when new starting is required):
227Z7_NO_INLINE
228static SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
229{
230  WRes wres;
231  // BUFFER_MUST_BE_UNLOCKED(p)
232  if (!p->needStart || p->csWasEntered)
233    return SZ_ERROR_FAIL;
234  wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
235  if (wres == 0)
236    wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
237  return MY_SRes_HRESULT_FROM_WRes(wres);
238}
239
240
241static WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
242{
243  WRes wres;
244
245  if (p->wasCreated)
246    return SZ_OK;
247
248  RINOK_THREAD(CriticalSection_Init(&p->cs))
249  p->csWasInitialized = True;
250  p->csWasEntered = False;
251
252  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
253  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
254
255  p->needStart = True;
256  p->exit = True;  /* p->exit is unused before (canStart) Event.
257     But in case of some unexpected code failure we will get fast exit from thread */
258
259  // return ERROR_TOO_MANY_POSTS; // for debug
260  // return EINVAL; // for debug
261
262  if (p->affinity != 0)
263    wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
264  else
265    wres = Thread_Create(&p->thread, startAddress, obj);
266
267  RINOK_THREAD(wres)
268  p->wasCreated = True;
269  return SZ_OK;
270}
271
272
273Z7_NO_INLINE
274static SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
275{
276  const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
277  if (wres == 0)
278    return 0;
279  MtSync_Destruct(p);
280  return MY_SRes_HRESULT_FROM_WRes(wres);
281}
282
283
284// ---------- HASH THREAD ----------
285
286#define kMtMaxValForNormalize 0xFFFFFFFF
287// #define kMtMaxValForNormalize ((1 << 21)) // for debug
288// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
289
290#ifdef MY_CPU_LE_UNALIGN
291  #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
292#else
293  #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
294#endif
295
296#define GetHeads_DECL(name) \
297    static void GetHeads ## name(const Byte *p, UInt32 pos, \
298      UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
299
300#define GetHeads_LOOP(v) \
301    for (; numHeads != 0; numHeads--) { \
302      const UInt32 value = (v); \
303      p++; \
304      *heads++ = pos - hash[value]; \
305      hash[value] = pos++; }
306
307#define DEF_GetHeads2(name, v, action) \
308    GetHeads_DECL(name) { action \
309    GetHeads_LOOP(v) }
310
311#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
312
313DEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
314DEF_GetHeads(3,  (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
315DEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
316// BT3 is not good for crc collisions for big hashMask values.
317
318/*
319GetHeads_DECL(3b)
320{
321  UNUSED_VAR(hashMask);
322  UNUSED_VAR(crc);
323  {
324  const Byte *pLim = p + numHeads;
325  if (numHeads == 0)
326    return;
327  pLim--;
328  while (p < pLim)
329  {
330    UInt32 v1 = GetUi32(p);
331    UInt32 v0 = v1 & 0xFFFFFF;
332    UInt32 h0, h1;
333    p += 2;
334    v1 >>= 8;
335    h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
336    h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
337    heads += 2;
338  }
339  if (p == pLim)
340  {
341    UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
342    *heads = pos - hash[v0];
343    hash[v0] = pos;
344  }
345  }
346}
347*/
348
349/*
350GetHeads_DECL(4)
351{
352  unsigned sh = 0;
353  UNUSED_VAR(crc)
354  while ((hashMask & 0x80000000) == 0)
355  {
356    hashMask <<= 1;
357    sh++;
358  }
359  GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
360}
361#define GetHeads4b GetHeads4
362*/
363
364#define USE_GetHeads_LOCAL_CRC
365
366#ifdef USE_GetHeads_LOCAL_CRC
367
368GetHeads_DECL(4)
369{
370  UInt32 crc0[256];
371  UInt32 crc1[256];
372  {
373    unsigned i;
374    for (i = 0; i < 256; i++)
375    {
376      UInt32 v = crc[i];
377      crc0[i] = v & hashMask;
378      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
379      // crc1[i] = rotlFixed(v, 8) & hashMask;
380    }
381  }
382  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
383}
384
385GetHeads_DECL(4b)
386{
387  UInt32 crc0[256];
388  {
389    unsigned i;
390    for (i = 0; i < 256; i++)
391      crc0[i] = crc[i] & hashMask;
392  }
393  GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
394}
395
396GetHeads_DECL(5)
397{
398  UInt32 crc0[256];
399  UInt32 crc1[256];
400  UInt32 crc2[256];
401  {
402    unsigned i;
403    for (i = 0; i < 256; i++)
404    {
405      UInt32 v = crc[i];
406      crc0[i] = v & hashMask;
407      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
408      crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
409    }
410  }
411  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
412}
413
414GetHeads_DECL(5b)
415{
416  UInt32 crc0[256];
417  UInt32 crc1[256];
418  {
419    unsigned i;
420    for (i = 0; i < 256; i++)
421    {
422      UInt32 v = crc[i];
423      crc0[i] = v & hashMask;
424      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
425    }
426  }
427  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
428}
429
430#else
431
432DEF_GetHeads(4,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
433DEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
434DEF_GetHeads(5,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
435DEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
436
437#endif
438
439
440static void HashThreadFunc(CMatchFinderMt *mt)
441{
442  CMtSync *p = &mt->hashSync;
443    PRF(printf("\nHashThreadFunc\n"));
444
445  for (;;)
446  {
447    UInt32 blockIndex = 0;
448      PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
449    Event_Wait(&p->canStart);
450      PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
451    if (p->exit)
452    {
453      PRF(printf("\nHashThreadFunc : exit \n"));
454      return;
455    }
456
457    MatchFinder_Init_HighHash(MF(mt));
458
459    for (;;)
460    {
461      PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
462
463      {
464        CMatchFinder *mf = MF(mt);
465        if (MatchFinder_NeedMove(mf))
466        {
467          CriticalSection_Enter(&mt->btSync.cs);
468          CriticalSection_Enter(&mt->hashSync.cs);
469          {
470            const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
471            ptrdiff_t offset;
472            MatchFinder_MoveBlock(mf);
473            offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
474            mt->pointerToCurPos -= offset;
475            mt->buffer -= offset;
476          }
477          CriticalSection_Leave(&mt->hashSync.cs);
478          CriticalSection_Leave(&mt->btSync.cs);
479          continue;
480        }
481
482        Semaphore_Wait(&p->freeSemaphore);
483
484        if (p->exit) // exit is unexpected here. But we check it here for some failure case
485          return;
486
487        // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
488        if (p->stopWriting)
489          break;
490
491        MatchFinder_ReadIfRequired(mf);
492        {
493          UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
494          UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
495          heads[0] = 2;
496          heads[1] = num;
497
498          /* heads[1] contains the number of avail bytes:
499             if (avail < mf->numHashBytes) :
500             {
501               it means that stream was finished
502               HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
503               HASH_THREAD doesn't stop,
504               HASH_THREAD fills only the header (2 numbers) for all next blocks:
505               {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
506             }
507             else
508             {
509               HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
510             }
511          */
512
513          if (num >= mf->numHashBytes)
514          {
515            num = num - mf->numHashBytes + 1;
516            if (num > kMtHashBlockSize - 2)
517              num = kMtHashBlockSize - 2;
518
519            if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
520            {
521              const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
522              MatchFinder_REDUCE_OFFSETS(mf, subValue)
523              MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
524            }
525
526            heads[0] = 2 + num;
527            mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
528          }
529
530          mf->pos += num;  // wrap over zero is allowed at the end of stream
531          mf->buffer += num;
532        }
533      }
534
535      Semaphore_Release1(&p->filledSemaphore);
536    } // for() processing end
537
538    // p->numBlocks_Sent = blockIndex;
539    Event_Set(&p->wasStopped);
540  } // for() thread end
541}
542
543
544
545
546// ---------- BT THREAD ----------
547
548/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
549   here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
550#define CYC_TO_POS_OFFSET 0
551// #define CYC_TO_POS_OFFSET 1 // for debug
552
553#define MFMT_GM_INLINE
554
555#ifdef MFMT_GM_INLINE
556
557/*
558  we use size_t for (pos) instead of UInt32
559  to eliminate "movsx" BUG in old MSVC x64 compiler.
560*/
561
562
563UInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
564    UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
565    size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
566    UInt32 *posRes);
567
568#endif
569
570
571static void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
572{
573  UInt32 numProcessed = 0;
574  UInt32 curPos = 2;
575
576  /* GetMatchesSpec() functions don't create (len = 1)
577     in [len, dist] match pairs, if (p->numHashBytes >= 2)
578     Also we suppose here that (matchMaxLen >= 2).
579     So the following code for (reserve) is not required
580     UInt32 reserve = (p->matchMaxLen * 2);
581     const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
582     if (reserve < kNumHashBytes_Max - 1)
583        reserve = kNumHashBytes_Max - 1;
584     const UInt32 limit = kMtBtBlockSize - (reserve);
585  */
586
587  const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
588
589  d[1] = p->hashNumAvail;
590
591  if (p->failure_BT)
592  {
593    // printf("\n == 1 BtGetMatches() p->failure_BT\n");
594    d[0] = 0;
595    // d[1] = 0;
596    return;
597  }
598
599  while (curPos < limit)
600  {
601    if (p->hashBufPos == p->hashBufPosLimit)
602    {
603      // MatchFinderMt_GetNextBlock_Hash(p);
604      UInt32 avail;
605      {
606        const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
607        const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
608        const UInt32 *h = p->hashBuf + k;
609        avail = h[1];
610        p->hashBufPosLimit = k + h[0];
611        p->hashNumAvail = avail;
612        p->hashBufPos = k + 2;
613      }
614
615      {
616        /* we must prevent UInt32 overflow for avail total value,
617           if avail was increased with new hash block */
618        UInt32 availSum = numProcessed + avail;
619        if (availSum < numProcessed)
620          availSum = (UInt32)(Int32)-1;
621        d[1] = availSum;
622      }
623
624      if (avail >= p->numHashBytes)
625        continue;
626
627      // if (p->hashBufPos != p->hashBufPosLimit) exit(1);
628
629      /* (avail < p->numHashBytes)
630         It means that stream was finished.
631         And (avail) - is a number of remaining bytes,
632         we fill (d) for (avail) bytes for LZ_THREAD (receiver).
633         but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
634
635      /* here we suppose that we have space enough:
636         (kMtBtBlockSize - curPos >= p->hashNumAvail) */
637      p->hashNumAvail = 0;
638      d[0] = curPos + avail;
639      d += curPos;
640      for (; avail != 0; avail--)
641        *d++ = 0;
642      return;
643    }
644    {
645      UInt32 size = p->hashBufPosLimit - p->hashBufPos;
646      UInt32 pos = p->pos;
647      UInt32 cyclicBufferPos = p->cyclicBufferPos;
648      UInt32 lenLimit = p->matchMaxLen;
649      if (lenLimit >= p->hashNumAvail)
650        lenLimit = p->hashNumAvail;
651      {
652        UInt32 size2 = p->hashNumAvail - lenLimit + 1;
653        if (size2 < size)
654          size = size2;
655        size2 = p->cyclicBufferSize - cyclicBufferPos;
656        if (size2 < size)
657          size = size2;
658      }
659
660      if (pos > (UInt32)kMtMaxValForNormalize - size)
661      {
662        const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
663        pos -= subValue;
664        p->pos = pos;
665        MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
666      }
667
668      #ifndef MFMT_GM_INLINE
669      while (curPos < limit && size-- != 0)
670      {
671        UInt32 *startDistances = d + curPos;
672        UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
673            pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
674            startDistances + 1, p->numHashBytes - 1) - startDistances);
675        *startDistances = num - 1;
676        curPos += num;
677        cyclicBufferPos++;
678        pos++;
679        p->buffer++;
680      }
681      #else
682      {
683        UInt32 posRes = pos;
684        const UInt32 *d_end;
685        {
686          d_end = GetMatchesSpecN_2(
687              p->buffer + lenLimit - 1,
688              pos, p->buffer, p->son, p->cutValue, d + curPos,
689              p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
690              d + limit, p->hashBuf + p->hashBufPos + size,
691              cyclicBufferPos, p->cyclicBufferSize,
692              &posRes);
693        }
694        {
695          if (!d_end)
696          {
697            // printf("\n == 2 BtGetMatches() p->failure_BT\n");
698            // internal data failure
699            p->failure_BT = True;
700            d[0] = 0;
701            // d[1] = 0;
702            return;
703          }
704        }
705        curPos = (UInt32)(d_end - d);
706        {
707          const UInt32 processed = posRes - pos;
708          pos = posRes;
709          p->hashBufPos += processed;
710          cyclicBufferPos += processed;
711          p->buffer += processed;
712        }
713      }
714      #endif
715
716      {
717        const UInt32 processed = pos - p->pos;
718        numProcessed += processed;
719        p->hashNumAvail -= processed;
720        p->pos = pos;
721      }
722      if (cyclicBufferPos == p->cyclicBufferSize)
723        cyclicBufferPos = 0;
724      p->cyclicBufferPos = cyclicBufferPos;
725    }
726  }
727
728  d[0] = curPos;
729}
730
731
732static void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
733{
734  CMtSync *sync = &p->hashSync;
735
736  BUFFER_MUST_BE_UNLOCKED(sync)
737
738  if (!sync->needStart)
739  {
740    LOCK_BUFFER(sync)
741  }
742
743  BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
744
745  /* We suppose that we have called GetNextBlock() from start.
746     So buffer is LOCKED */
747
748  UNLOCK_BUFFER(sync)
749}
750
751
752Z7_NO_INLINE
753static void BtThreadFunc(CMatchFinderMt *mt)
754{
755  CMtSync *p = &mt->btSync;
756  for (;;)
757  {
758    UInt32 blockIndex = 0;
759    Event_Wait(&p->canStart);
760
761    for (;;)
762    {
763        PRF(printf("  BT thread block = %d  pos = %d\n", (unsigned)blockIndex, mt->pos));
764      /* (p->exit == true) is possible after (p->canStart) at first loop iteration
765         and is unexpected after more Wait(freeSemaphore) iterations */
766      if (p->exit)
767        return;
768
769      Semaphore_Wait(&p->freeSemaphore);
770
771      // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
772      if (p->stopWriting)
773        break;
774
775      BtFillBlock(mt, blockIndex++);
776
777      Semaphore_Release1(&p->filledSemaphore);
778    }
779
780    // we stop HASH_THREAD here
781    MtSync_StopWriting(&mt->hashSync);
782
783    // p->numBlocks_Sent = blockIndex;
784    Event_Set(&p->wasStopped);
785  }
786}
787
788
789void MatchFinderMt_Construct(CMatchFinderMt *p)
790{
791  p->hashBuf = NULL;
792  MtSync_Construct(&p->hashSync);
793  MtSync_Construct(&p->btSync);
794}
795
796static void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
797{
798  ISzAlloc_Free(alloc, p->hashBuf);
799  p->hashBuf = NULL;
800}
801
802void MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
803{
804  /*
805     HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
806     So we must be sure that HASH_THREAD will not use CriticalSection(s)
807     after deleting CriticalSection here.
808
809     we call ReleaseStream(p)
810       that calls StopWriting(btSync)
811         that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
812     after StopWriting() it's safe to destruct MtSync(s) in any order */
813
814  MatchFinderMt_ReleaseStream(p);
815
816  MtSync_Destruct(&p->btSync);
817  MtSync_Destruct(&p->hashSync);
818
819  LOG_ITER(
820  printf("\nTree %9d * %7d iter = %9d = sum  :  bytes = %9d\n",
821      (UInt32)(g_NumIters_Tree / 1000),
822      (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
823      (UInt32)(g_NumIters_Loop / 1000),
824      (UInt32)(g_NumIters_Bytes / 1000)
825      ));
826
827  MatchFinderMt_FreeMem(p, alloc);
828}
829
830
831#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
832#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
833
834
835static THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p);  return 0; }
836static THREAD_FUNC_DECL BtThreadFunc2(void *p)
837{
838  Byte allocaDummy[0x180];
839  unsigned i = 0;
840  for (i = 0; i < 16; i++)
841    allocaDummy[i] = (Byte)0;
842  if (allocaDummy[0] == 0)
843    BtThreadFunc((CMatchFinderMt *)p);
844  return 0;
845}
846
847
848SRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
849    UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
850{
851  CMatchFinder *mf = MF(p);
852  p->historySize = historySize;
853  if (kMtBtBlockSize <= matchMaxLen * 4)
854    return SZ_ERROR_PARAM;
855  if (!p->hashBuf)
856  {
857    p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
858    if (!p->hashBuf)
859      return SZ_ERROR_MEM;
860    p->btBuf = p->hashBuf + kHashBufferSize;
861  }
862  keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
863  keepAddBufferAfter += kMtHashBlockSize;
864  if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
865    return SZ_ERROR_MEM;
866
867  RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
868  RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
869  return SZ_OK;
870}
871
872
873SRes MatchFinderMt_InitMt(CMatchFinderMt *p)
874{
875  RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
876  return MtSync_Init(&p->btSync, kMtBtNumBlocks);
877}
878
879
880static void MatchFinderMt_Init(CMatchFinderMt *p)
881{
882  CMatchFinder *mf = MF(p);
883
884  p->btBufPos =
885  p->btBufPosLimit = NULL;
886  p->hashBufPos =
887  p->hashBufPosLimit = 0;
888  p->hashNumAvail = 0; // 21.03
889
890  p->failure_BT = False;
891
892  /* Init without data reading. We don't want to read data in this thread */
893  MatchFinder_Init_4(mf);
894
895  MatchFinder_Init_LowHash(mf);
896
897  p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
898  p->btNumAvailBytes = 0;
899  p->failure_LZ_BT = False;
900  // p->failure_LZ_LZ = False;
901
902  p->lzPos =
903      1; // optimal smallest value
904      // 0; // for debug: ignores match to start
905      // kNormalizeAlign; // for debug
906
907  p->hash = mf->hash;
908  p->fixedHashSize = mf->fixedHashSize;
909  // p->hash4Mask = mf->hash4Mask;
910  p->crc = mf->crc;
911  // memcpy(p->crc, mf->crc, sizeof(mf->crc));
912
913  p->son = mf->son;
914  p->matchMaxLen = mf->matchMaxLen;
915  p->numHashBytes = mf->numHashBytes;
916
917  /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
918  // mf->streamPos = mf->pos = 1; // optimal smallest value
919      // 0; // for debug: ignores match to start
920      // kNormalizeAlign; // for debug
921
922  /* we must init (p->pos = mf->pos) for BT, because
923     BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
924  p->pos = mf->pos; // do not change it
925
926  p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
927  p->cyclicBufferSize = mf->cyclicBufferSize;
928  p->buffer = mf->buffer;
929  p->cutValue = mf->cutValue;
930  // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
931}
932
933
934/* ReleaseStream is required to finish multithreading */
935void MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
936{
937  // Sleep(1); // for debug
938  MtSync_StopWriting(&p->btSync);
939  // Sleep(200); // for debug
940  /* p->MatchFinder->ReleaseStream(); */
941}
942
943
944Z7_NO_INLINE
945static UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
946{
947  if (p->failure_LZ_BT)
948    p->btBufPos = p->failureBuf;
949  else
950  {
951    const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
952    const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
953    {
954      const UInt32 numItems = bt[0];
955      p->btBufPosLimit = bt + numItems;
956      p->btNumAvailBytes = bt[1];
957      p->btBufPos = bt + 2;
958      if (numItems < 2 || numItems > kMtBtBlockSize)
959      {
960        p->failureBuf[0] = 0;
961        p->btBufPos = p->failureBuf;
962        p->btBufPosLimit = p->failureBuf + 1;
963        p->failure_LZ_BT = True;
964        // p->btNumAvailBytes = 0;
965        /* we don't want to decrease AvailBytes, that was load before.
966            that can be unxepected for the code that have loaded anopther value before */
967      }
968    }
969
970    if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
971    {
972      /* we don't check (lzPos) over exact avail bytes in (btBuf).
973         (fixedHashSize) is small, so normalization is fast */
974      const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
975      p->lzPos -= subValue;
976      MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
977    }
978  }
979  return p->btNumAvailBytes;
980}
981
982
983
984static const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
985{
986  return p->pointerToCurPos;
987}
988
989
990#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
991
992
993static UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
994{
995  if (p->btBufPos != p->btBufPosLimit)
996    return p->btNumAvailBytes;
997  return MatchFinderMt_GetNextBlock_Bt(p);
998}
999
1000
1001// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True;  return d; }
1002#define CHECK_FAILURE_LZ(_match_, _pos_)
1003
1004static UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1005{
1006  UInt32 h2, c2;
1007  UInt32 *hash = p->hash;
1008  const Byte *cur = p->pointerToCurPos;
1009  const UInt32 m = p->lzPos;
1010  MT_HASH2_CALC
1011
1012  c2 = hash[h2];
1013  hash[h2] = m;
1014
1015  if (c2 >= matchMinPos)
1016  {
1017    CHECK_FAILURE_LZ(c2, m)
1018    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1019    {
1020      *d++ = 2;
1021      *d++ = m - c2 - 1;
1022    }
1023  }
1024
1025  return d;
1026}
1027
1028static UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1029{
1030  UInt32 h2, h3, c2, c3;
1031  UInt32 *hash = p->hash;
1032  const Byte *cur = p->pointerToCurPos;
1033  const UInt32 m = p->lzPos;
1034  MT_HASH3_CALC
1035
1036  c2 = hash[h2];
1037  c3 = (hash + kFix3HashSize)[h3];
1038
1039  hash[h2] = m;
1040  (hash + kFix3HashSize)[h3] = m;
1041
1042  if (c2 >= matchMinPos)
1043  {
1044    CHECK_FAILURE_LZ(c2, m)
1045    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1046    {
1047      d[1] = m - c2 - 1;
1048      if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1049      {
1050        d[0] = 3;
1051        return d + 2;
1052      }
1053      d[0] = 2;
1054      d += 2;
1055    }
1056  }
1057
1058  if (c3 >= matchMinPos)
1059  {
1060    CHECK_FAILURE_LZ(c3, m)
1061    if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1062    {
1063      *d++ = 3;
1064      *d++ = m - c3 - 1;
1065    }
1066  }
1067
1068  return d;
1069}
1070
1071
1072#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
1073
1074/*
1075static
1076UInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
1077{
1078  const UInt32 *bt = p->btBufPos;
1079  const UInt32 len = *bt++;
1080  const UInt32 *btLim = bt + len;
1081  UInt32 matchMinPos;
1082  UInt32 avail = p->btNumAvailBytes - 1;
1083  p->btBufPos = btLim;
1084
1085  {
1086    p->btNumAvailBytes = avail;
1087
1088    #define BT_HASH_BYTES_MAX 5
1089
1090    matchMinPos = p->lzPos;
1091
1092    if (len != 0)
1093      matchMinPos -= bt[1];
1094    else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
1095    {
1096      INCREASE_LZ_POS
1097      return d;
1098    }
1099    else
1100    {
1101      const UInt32 hs = p->historySize;
1102      if (matchMinPos > hs)
1103        matchMinPos -= hs;
1104      else
1105        matchMinPos = 1;
1106    }
1107  }
1108
1109  for (;;)
1110  {
1111
1112  UInt32 h2, h3, c2, c3;
1113  UInt32 *hash = p->hash;
1114  const Byte *cur = p->pointerToCurPos;
1115  UInt32 m = p->lzPos;
1116  MT_HASH3_CALC
1117
1118  c2 = hash[h2];
1119  c3 = (hash + kFix3HashSize)[h3];
1120
1121  hash[h2] = m;
1122  (hash + kFix3HashSize)[h3] = m;
1123
1124  if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1125  {
1126    d[1] = m - c2 - 1;
1127    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1128    {
1129      d[0] = 3;
1130      d += 2;
1131      break;
1132    }
1133    // else
1134    {
1135      d[0] = 2;
1136      d += 2;
1137    }
1138  }
1139  if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1140  {
1141    *d++ = 3;
1142    *d++ = m - c3 - 1;
1143  }
1144  break;
1145  }
1146
1147  if (len != 0)
1148  {
1149    do
1150    {
1151      const UInt32 v0 = bt[0];
1152      const UInt32 v1 = bt[1];
1153      bt += 2;
1154      d[0] = v0;
1155      d[1] = v1;
1156      d += 2;
1157    }
1158    while (bt != btLim);
1159  }
1160  INCREASE_LZ_POS
1161  return d;
1162}
1163*/
1164
1165
1166static UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1167{
1168  UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
1169  UInt32 *hash = p->hash;
1170  const Byte *cur = p->pointerToCurPos;
1171  const UInt32 m = p->lzPos;
1172  MT_HASH3_CALC
1173  // MT_HASH4_CALC
1174  c2 = hash[h2];
1175  c3 = (hash + kFix3HashSize)[h3];
1176  // c4 = (hash + kFix4HashSize)[h4];
1177
1178  hash[h2] = m;
1179  (hash + kFix3HashSize)[h3] = m;
1180  // (hash + kFix4HashSize)[h4] = m;
1181
1182  // #define BT5_USE_H2
1183  // #ifdef BT5_USE_H2
1184  if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1185  {
1186    d[1] = m - c2 - 1;
1187    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1188    {
1189      // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
1190      // return d + 2;
1191
1192      if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
1193      {
1194        d[0] = 4;
1195        return d + 2;
1196      }
1197      d[0] = 3;
1198      d += 2;
1199
1200      #ifdef BT5_USE_H4
1201      if (c4 >= matchMinPos)
1202        if (
1203          cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
1204          cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
1205          )
1206      {
1207        *d++ = 4;
1208        *d++ = m - c4 - 1;
1209      }
1210      #endif
1211      return d;
1212    }
1213    d[0] = 2;
1214    d += 2;
1215  }
1216  // #endif
1217
1218  if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1219  {
1220    d[1] = m - c3 - 1;
1221    if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
1222    {
1223      d[0] = 4;
1224      return d + 2;
1225    }
1226    d[0] = 3;
1227    d += 2;
1228  }
1229
1230  #ifdef BT5_USE_H4
1231  if (c4 >= matchMinPos)
1232    if (
1233      cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
1234      cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
1235      )
1236    {
1237      *d++ = 4;
1238      *d++ = m - c4 - 1;
1239    }
1240  #endif
1241
1242  return d;
1243}
1244
1245
1246static UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
1247{
1248  const UInt32 *bt = p->btBufPos;
1249  const UInt32 len = *bt++;
1250  const UInt32 *btLim = bt + len;
1251  p->btBufPos = btLim;
1252  p->btNumAvailBytes--;
1253  INCREASE_LZ_POS
1254  {
1255    while (bt != btLim)
1256    {
1257      const UInt32 v0 = bt[0];
1258      const UInt32 v1 = bt[1];
1259      bt += 2;
1260      d[0] = v0;
1261      d[1] = v1;
1262      d += 2;
1263    }
1264  }
1265  return d;
1266}
1267
1268
1269
1270static UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
1271{
1272  const UInt32 *bt = p->btBufPos;
1273  UInt32 len = *bt++;
1274  const UInt32 avail = p->btNumAvailBytes - 1;
1275  p->btNumAvailBytes = avail;
1276  p->btBufPos = bt + len;
1277  if (len == 0)
1278  {
1279    #define BT_HASH_BYTES_MAX 5
1280    if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
1281    {
1282      UInt32 m = p->lzPos;
1283      if (m > p->historySize)
1284        m -= p->historySize;
1285      else
1286        m = 1;
1287      d = p->MixMatchesFunc(p, m, d);
1288    }
1289  }
1290  else
1291  {
1292    /*
1293      first match pair from BinTree: (match_len, match_dist),
1294      (match_len >= numHashBytes).
1295      MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
1296    */
1297    d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
1298    // if (d) // check for failure
1299    do
1300    {
1301      const UInt32 v0 = bt[0];
1302      const UInt32 v1 = bt[1];
1303      bt += 2;
1304      d[0] = v0;
1305      d[1] = v1;
1306      d += 2;
1307    }
1308    while (len -= 2);
1309  }
1310  INCREASE_LZ_POS
1311  return d;
1312}
1313
1314#define SKIP_HEADER2_MT  do { GET_NEXT_BLOCK_IF_REQUIRED
1315#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
1316#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
1317
1318static void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
1319{
1320  SKIP_HEADER2_MT { p->btNumAvailBytes--;
1321  SKIP_FOOTER_MT
1322}
1323
1324static void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
1325{
1326  SKIP_HEADER_MT(2)
1327      UInt32 h2;
1328      MT_HASH2_CALC
1329      hash[h2] = p->lzPos;
1330  SKIP_FOOTER_MT
1331}
1332
1333static void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
1334{
1335  SKIP_HEADER_MT(3)
1336      UInt32 h2, h3;
1337      MT_HASH3_CALC
1338      (hash + kFix3HashSize)[h3] =
1339      hash[                h2] =
1340        p->lzPos;
1341  SKIP_FOOTER_MT
1342}
1343
1344/*
1345// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
1346// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
1347
1348static void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
1349{
1350  SKIP_HEADER_MT(4)
1351      UInt32 h2, h3; // h4
1352      MT_HASH3_CALC
1353      // MT_HASH4_CALC
1354      // (hash + kFix4HashSize)[h4] =
1355      (hash + kFix3HashSize)[h3] =
1356      hash[                h2] =
1357        p->lzPos;
1358  SKIP_FOOTER_MT
1359}
1360*/
1361
1362void MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
1363{
1364  vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
1365  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
1366  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
1367  vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
1368
1369  switch (MF(p)->numHashBytes)
1370  {
1371    case 2:
1372      p->GetHeadsFunc = GetHeads2;
1373      p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
1374      vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
1375      vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
1376      break;
1377    case 3:
1378      p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
1379      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
1380      vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
1381      break;
1382    case 4:
1383      p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
1384
1385      // it's fast inline version of GetMatches()
1386      // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
1387
1388      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
1389      vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
1390      break;
1391    default:
1392      p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
1393      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
1394      vTable->Skip =
1395          (Mf_Skip_Func)MatchFinderMt3_Skip;
1396          // (Mf_Skip_Func)MatchFinderMt4_Skip;
1397      break;
1398  }
1399}
1400
1401#undef RINOK_THREAD
1402#undef PRF
1403#undef MF
1404#undef GetUi24hi_from32
1405#undef LOCK_BUFFER
1406#undef UNLOCK_BUFFER
1407