1370b324cSopenharmony_ci/* LzFindMt.c -- multithreaded Match finder for LZ algorithms
2370b324cSopenharmony_ci2023-04-02 : Igor Pavlov : Public domain */
3370b324cSopenharmony_ci
4370b324cSopenharmony_ci#include "Precomp.h"
5370b324cSopenharmony_ci
6370b324cSopenharmony_ci// #include <stdio.h>
7370b324cSopenharmony_ci
8370b324cSopenharmony_ci#include "CpuArch.h"
9370b324cSopenharmony_ci
10370b324cSopenharmony_ci#include "LzHash.h"
11370b324cSopenharmony_ci#include "LzFindMt.h"
12370b324cSopenharmony_ci
13370b324cSopenharmony_ci// #define LOG_ITERS
14370b324cSopenharmony_ci
15370b324cSopenharmony_ci// #define LOG_THREAD
16370b324cSopenharmony_ci
17370b324cSopenharmony_ci#ifdef LOG_THREAD
18370b324cSopenharmony_ci#include <stdio.h>
19370b324cSopenharmony_ci#define PRF(x) x
20370b324cSopenharmony_ci#else
21370b324cSopenharmony_ci#define PRF(x)
22370b324cSopenharmony_ci#endif
23370b324cSopenharmony_ci
24370b324cSopenharmony_ci#ifdef LOG_ITERS
25370b324cSopenharmony_ci#include <stdio.h>
26370b324cSopenharmony_ciextern UInt64 g_NumIters_Tree;
27370b324cSopenharmony_ciextern UInt64 g_NumIters_Loop;
28370b324cSopenharmony_ciextern UInt64 g_NumIters_Bytes;
29370b324cSopenharmony_ci#define LOG_ITER(x) x
30370b324cSopenharmony_ci#else
31370b324cSopenharmony_ci#define LOG_ITER(x)
32370b324cSopenharmony_ci#endif
33370b324cSopenharmony_ci
34370b324cSopenharmony_ci#define kMtHashBlockSize ((UInt32)1 << 17)
35370b324cSopenharmony_ci#define kMtHashNumBlocks (1 << 1)
36370b324cSopenharmony_ci
37370b324cSopenharmony_ci#define GET_HASH_BLOCK_OFFSET(i)  (((i) & (kMtHashNumBlocks - 1)) * kMtHashBlockSize)
38370b324cSopenharmony_ci
39370b324cSopenharmony_ci#define kMtBtBlockSize ((UInt32)1 << 16)
40370b324cSopenharmony_ci#define kMtBtNumBlocks (1 << 4)
41370b324cSopenharmony_ci
42370b324cSopenharmony_ci#define GET_BT_BLOCK_OFFSET(i)  (((i) & (kMtBtNumBlocks - 1)) * (size_t)kMtBtBlockSize)
43370b324cSopenharmony_ci
44370b324cSopenharmony_ci/*
45370b324cSopenharmony_ci  HASH functions:
46370b324cSopenharmony_ci  We use raw 8/16 bits from a[1] and a[2],
47370b324cSopenharmony_ci  xored with crc(a[0]) and crc(a[3]).
48370b324cSopenharmony_ci  We check a[0], a[3] only. We don't need to compare a[1] and a[2] in matches.
49370b324cSopenharmony_ci  our crc() function provides one-to-one correspondence for low 8-bit values:
50370b324cSopenharmony_ci    (crc[0...0xFF] & 0xFF) <-> [0...0xFF]
51370b324cSopenharmony_ci*/
52370b324cSopenharmony_ci
53370b324cSopenharmony_ci#define MF(mt) ((mt)->MatchFinder)
54370b324cSopenharmony_ci#define MF_CRC (p->crc)
55370b324cSopenharmony_ci
56370b324cSopenharmony_ci// #define MF(mt) (&(mt)->MatchFinder)
57370b324cSopenharmony_ci// #define MF_CRC (p->MatchFinder.crc)
58370b324cSopenharmony_ci
59370b324cSopenharmony_ci#define MT_HASH2_CALC \
60370b324cSopenharmony_ci  h2 = (MF_CRC[cur[0]] ^ cur[1]) & (kHash2Size - 1);
61370b324cSopenharmony_ci
62370b324cSopenharmony_ci#define MT_HASH3_CALC { \
63370b324cSopenharmony_ci  UInt32 temp = MF_CRC[cur[0]] ^ cur[1]; \
64370b324cSopenharmony_ci  h2 = temp & (kHash2Size - 1); \
65370b324cSopenharmony_ci  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
66370b324cSopenharmony_ci
67370b324cSopenharmony_ci/*
68370b324cSopenharmony_ci#define MT_HASH3_CALC__NO_2 { \
69370b324cSopenharmony_ci  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
70370b324cSopenharmony_ci  h3 = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); }
71370b324cSopenharmony_ci
72370b324cSopenharmony_ci#define MT_HASH4_CALC { \
73370b324cSopenharmony_ci  UInt32 temp = p->crc[cur[0]] ^ cur[1]; \
74370b324cSopenharmony_ci  h2 = temp & (kHash2Size - 1); \
75370b324cSopenharmony_ci  temp ^= ((UInt32)cur[2] << 8); \
76370b324cSopenharmony_ci  h3 = temp & (kHash3Size - 1); \
77370b324cSopenharmony_ci  h4 = (temp ^ (p->crc[cur[3]] << kLzHash_CrcShift_1)) & p->hash4Mask; }
78370b324cSopenharmony_ci  // (kHash4Size - 1);
79370b324cSopenharmony_ci*/
80370b324cSopenharmony_ci
81370b324cSopenharmony_ci
82370b324cSopenharmony_ciZ7_NO_INLINE
83370b324cSopenharmony_cistatic void MtSync_Construct(CMtSync *p)
84370b324cSopenharmony_ci{
85370b324cSopenharmony_ci  p->affinity = 0;
86370b324cSopenharmony_ci  p->wasCreated = False;
87370b324cSopenharmony_ci  p->csWasInitialized = False;
88370b324cSopenharmony_ci  p->csWasEntered = False;
89370b324cSopenharmony_ci  Thread_CONSTRUCT(&p->thread)
90370b324cSopenharmony_ci  Event_Construct(&p->canStart);
91370b324cSopenharmony_ci  Event_Construct(&p->wasStopped);
92370b324cSopenharmony_ci  Semaphore_Construct(&p->freeSemaphore);
93370b324cSopenharmony_ci  Semaphore_Construct(&p->filledSemaphore);
94370b324cSopenharmony_ci}
95370b324cSopenharmony_ci
96370b324cSopenharmony_ci
97370b324cSopenharmony_ci#define DEBUG_BUFFER_LOCK   // define it to debug lock state
98370b324cSopenharmony_ci
99370b324cSopenharmony_ci#ifdef DEBUG_BUFFER_LOCK
100370b324cSopenharmony_ci#include <stdlib.h>
101370b324cSopenharmony_ci#define BUFFER_MUST_BE_LOCKED(p)    if (!(p)->csWasEntered) exit(1);
102370b324cSopenharmony_ci#define BUFFER_MUST_BE_UNLOCKED(p)  if ( (p)->csWasEntered) exit(1);
103370b324cSopenharmony_ci#else
104370b324cSopenharmony_ci#define BUFFER_MUST_BE_LOCKED(p)
105370b324cSopenharmony_ci#define BUFFER_MUST_BE_UNLOCKED(p)
106370b324cSopenharmony_ci#endif
107370b324cSopenharmony_ci
108370b324cSopenharmony_ci#define LOCK_BUFFER(p) { \
109370b324cSopenharmony_ci    BUFFER_MUST_BE_UNLOCKED(p); \
110370b324cSopenharmony_ci    CriticalSection_Enter(&(p)->cs); \
111370b324cSopenharmony_ci    (p)->csWasEntered = True; }
112370b324cSopenharmony_ci
113370b324cSopenharmony_ci#define UNLOCK_BUFFER(p) { \
114370b324cSopenharmony_ci    BUFFER_MUST_BE_LOCKED(p); \
115370b324cSopenharmony_ci    CriticalSection_Leave(&(p)->cs); \
116370b324cSopenharmony_ci    (p)->csWasEntered = False; }
117370b324cSopenharmony_ci
118370b324cSopenharmony_ci
119370b324cSopenharmony_ciZ7_NO_INLINE
120370b324cSopenharmony_cistatic UInt32 MtSync_GetNextBlock(CMtSync *p)
121370b324cSopenharmony_ci{
122370b324cSopenharmony_ci  UInt32 numBlocks = 0;
123370b324cSopenharmony_ci  if (p->needStart)
124370b324cSopenharmony_ci  {
125370b324cSopenharmony_ci    BUFFER_MUST_BE_UNLOCKED(p)
126370b324cSopenharmony_ci    p->numProcessedBlocks = 1;
127370b324cSopenharmony_ci    p->needStart = False;
128370b324cSopenharmony_ci    p->stopWriting = False;
129370b324cSopenharmony_ci    p->exit = False;
130370b324cSopenharmony_ci    Event_Reset(&p->wasStopped);
131370b324cSopenharmony_ci    Event_Set(&p->canStart);
132370b324cSopenharmony_ci  }
133370b324cSopenharmony_ci  else
134370b324cSopenharmony_ci  {
135370b324cSopenharmony_ci    UNLOCK_BUFFER(p)
136370b324cSopenharmony_ci    // we free current block
137370b324cSopenharmony_ci    numBlocks = p->numProcessedBlocks++;
138370b324cSopenharmony_ci    Semaphore_Release1(&p->freeSemaphore);
139370b324cSopenharmony_ci  }
140370b324cSopenharmony_ci
141370b324cSopenharmony_ci  // buffer is UNLOCKED here
142370b324cSopenharmony_ci  Semaphore_Wait(&p->filledSemaphore);
143370b324cSopenharmony_ci  LOCK_BUFFER(p)
144370b324cSopenharmony_ci  return numBlocks;
145370b324cSopenharmony_ci}
146370b324cSopenharmony_ci
147370b324cSopenharmony_ci
148370b324cSopenharmony_ci/* if Writing (Processing) thread was started, we must call MtSync_StopWriting() */
149370b324cSopenharmony_ci
150370b324cSopenharmony_ciZ7_NO_INLINE
151370b324cSopenharmony_cistatic void MtSync_StopWriting(CMtSync *p)
152370b324cSopenharmony_ci{
153370b324cSopenharmony_ci  if (!Thread_WasCreated(&p->thread) || p->needStart)
154370b324cSopenharmony_ci    return;
155370b324cSopenharmony_ci
156370b324cSopenharmony_ci    PRF(printf("\nMtSync_StopWriting %p\n", p));
157370b324cSopenharmony_ci
158370b324cSopenharmony_ci  if (p->csWasEntered)
159370b324cSopenharmony_ci  {
160370b324cSopenharmony_ci    /* we don't use buffer in this thread after StopWriting().
161370b324cSopenharmony_ci       So we UNLOCK buffer.
162370b324cSopenharmony_ci       And we restore default UNLOCKED state for stopped thread */
163370b324cSopenharmony_ci    UNLOCK_BUFFER(p)
164370b324cSopenharmony_ci  }
165370b324cSopenharmony_ci
166370b324cSopenharmony_ci  /* We send (p->stopWriting) message and release freeSemaphore
167370b324cSopenharmony_ci     to free current block.
168370b324cSopenharmony_ci     So the thread will see (p->stopWriting) at some
169370b324cSopenharmony_ci     iteration after Wait(freeSemaphore).
170370b324cSopenharmony_ci     The thread doesn't need to fill all avail free blocks,
171370b324cSopenharmony_ci     so we can get fast thread stop.
172370b324cSopenharmony_ci  */
173370b324cSopenharmony_ci
174370b324cSopenharmony_ci  p->stopWriting = True;
175370b324cSopenharmony_ci  Semaphore_Release1(&p->freeSemaphore); // check semaphore count !!!
176370b324cSopenharmony_ci
177370b324cSopenharmony_ci    PRF(printf("\nMtSync_StopWriting %p : Event_Wait(&p->wasStopped)\n", p));
178370b324cSopenharmony_ci  Event_Wait(&p->wasStopped);
179370b324cSopenharmony_ci    PRF(printf("\nMtSync_StopWriting %p : Event_Wait() finsihed\n", p));
180370b324cSopenharmony_ci
181370b324cSopenharmony_ci  /* 21.03 : we don't restore samaphore counters here.
182370b324cSopenharmony_ci     We will recreate and reinit samaphores in next start */
183370b324cSopenharmony_ci
184370b324cSopenharmony_ci  p->needStart = True;
185370b324cSopenharmony_ci}
186370b324cSopenharmony_ci
187370b324cSopenharmony_ci
188370b324cSopenharmony_ciZ7_NO_INLINE
189370b324cSopenharmony_cistatic void MtSync_Destruct(CMtSync *p)
190370b324cSopenharmony_ci{
191370b324cSopenharmony_ci    PRF(printf("\nMtSync_Destruct %p\n", p));
192370b324cSopenharmony_ci
193370b324cSopenharmony_ci  if (Thread_WasCreated(&p->thread))
194370b324cSopenharmony_ci  {
195370b324cSopenharmony_ci    /* we want thread to be in Stopped state before sending EXIT command.
196370b324cSopenharmony_ci       note: stop(btSync) will stop (htSync) also */
197370b324cSopenharmony_ci    MtSync_StopWriting(p);
198370b324cSopenharmony_ci    /* thread in Stopped state here : (p->needStart == true) */
199370b324cSopenharmony_ci    p->exit = True;
200370b324cSopenharmony_ci    // if (p->needStart)  // it's (true)
201370b324cSopenharmony_ci    Event_Set(&p->canStart);  // we send EXIT command to thread
202370b324cSopenharmony_ci    Thread_Wait_Close(&p->thread);  // we wait thread finishing
203370b324cSopenharmony_ci  }
204370b324cSopenharmony_ci
205370b324cSopenharmony_ci  if (p->csWasInitialized)
206370b324cSopenharmony_ci  {
207370b324cSopenharmony_ci    CriticalSection_Delete(&p->cs);
208370b324cSopenharmony_ci    p->csWasInitialized = False;
209370b324cSopenharmony_ci  }
210370b324cSopenharmony_ci  p->csWasEntered = False;
211370b324cSopenharmony_ci
212370b324cSopenharmony_ci  Event_Close(&p->canStart);
213370b324cSopenharmony_ci  Event_Close(&p->wasStopped);
214370b324cSopenharmony_ci  Semaphore_Close(&p->freeSemaphore);
215370b324cSopenharmony_ci  Semaphore_Close(&p->filledSemaphore);
216370b324cSopenharmony_ci
217370b324cSopenharmony_ci  p->wasCreated = False;
218370b324cSopenharmony_ci}
219370b324cSopenharmony_ci
220370b324cSopenharmony_ci
221370b324cSopenharmony_ci// #define RINOK_THREAD(x) { if ((x) != 0) return SZ_ERROR_THREAD; }
222370b324cSopenharmony_ci// we want to get real system error codes here instead of SZ_ERROR_THREAD
223370b324cSopenharmony_ci#define RINOK_THREAD(x)  RINOK_WRes(x)
224370b324cSopenharmony_ci
225370b324cSopenharmony_ci
226370b324cSopenharmony_ci// call it before each new file (when new starting is required):
227370b324cSopenharmony_ciZ7_NO_INLINE
228370b324cSopenharmony_cistatic SRes MtSync_Init(CMtSync *p, UInt32 numBlocks)
229370b324cSopenharmony_ci{
230370b324cSopenharmony_ci  WRes wres;
231370b324cSopenharmony_ci  // BUFFER_MUST_BE_UNLOCKED(p)
232370b324cSopenharmony_ci  if (!p->needStart || p->csWasEntered)
233370b324cSopenharmony_ci    return SZ_ERROR_FAIL;
234370b324cSopenharmony_ci  wres = Semaphore_OptCreateInit(&p->freeSemaphore, numBlocks, numBlocks);
235370b324cSopenharmony_ci  if (wres == 0)
236370b324cSopenharmony_ci    wres = Semaphore_OptCreateInit(&p->filledSemaphore, 0, numBlocks);
237370b324cSopenharmony_ci  return MY_SRes_HRESULT_FROM_WRes(wres);
238370b324cSopenharmony_ci}
239370b324cSopenharmony_ci
240370b324cSopenharmony_ci
241370b324cSopenharmony_cistatic WRes MtSync_Create_WRes(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
242370b324cSopenharmony_ci{
243370b324cSopenharmony_ci  WRes wres;
244370b324cSopenharmony_ci
245370b324cSopenharmony_ci  if (p->wasCreated)
246370b324cSopenharmony_ci    return SZ_OK;
247370b324cSopenharmony_ci
248370b324cSopenharmony_ci  RINOK_THREAD(CriticalSection_Init(&p->cs))
249370b324cSopenharmony_ci  p->csWasInitialized = True;
250370b324cSopenharmony_ci  p->csWasEntered = False;
251370b324cSopenharmony_ci
252370b324cSopenharmony_ci  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart))
253370b324cSopenharmony_ci  RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped))
254370b324cSopenharmony_ci
255370b324cSopenharmony_ci  p->needStart = True;
256370b324cSopenharmony_ci  p->exit = True;  /* p->exit is unused before (canStart) Event.
257370b324cSopenharmony_ci     But in case of some unexpected code failure we will get fast exit from thread */
258370b324cSopenharmony_ci
259370b324cSopenharmony_ci  // return ERROR_TOO_MANY_POSTS; // for debug
260370b324cSopenharmony_ci  // return EINVAL; // for debug
261370b324cSopenharmony_ci
262370b324cSopenharmony_ci  if (p->affinity != 0)
263370b324cSopenharmony_ci    wres = Thread_Create_With_Affinity(&p->thread, startAddress, obj, (CAffinityMask)p->affinity);
264370b324cSopenharmony_ci  else
265370b324cSopenharmony_ci    wres = Thread_Create(&p->thread, startAddress, obj);
266370b324cSopenharmony_ci
267370b324cSopenharmony_ci  RINOK_THREAD(wres)
268370b324cSopenharmony_ci  p->wasCreated = True;
269370b324cSopenharmony_ci  return SZ_OK;
270370b324cSopenharmony_ci}
271370b324cSopenharmony_ci
272370b324cSopenharmony_ci
273370b324cSopenharmony_ciZ7_NO_INLINE
274370b324cSopenharmony_cistatic SRes MtSync_Create(CMtSync *p, THREAD_FUNC_TYPE startAddress, void *obj)
275370b324cSopenharmony_ci{
276370b324cSopenharmony_ci  const WRes wres = MtSync_Create_WRes(p, startAddress, obj);
277370b324cSopenharmony_ci  if (wres == 0)
278370b324cSopenharmony_ci    return 0;
279370b324cSopenharmony_ci  MtSync_Destruct(p);
280370b324cSopenharmony_ci  return MY_SRes_HRESULT_FROM_WRes(wres);
281370b324cSopenharmony_ci}
282370b324cSopenharmony_ci
283370b324cSopenharmony_ci
284370b324cSopenharmony_ci// ---------- HASH THREAD ----------
285370b324cSopenharmony_ci
286370b324cSopenharmony_ci#define kMtMaxValForNormalize 0xFFFFFFFF
287370b324cSopenharmony_ci// #define kMtMaxValForNormalize ((1 << 21)) // for debug
288370b324cSopenharmony_ci// #define kNormalizeAlign (1 << 7) // alignment for speculated accesses
289370b324cSopenharmony_ci
290370b324cSopenharmony_ci#ifdef MY_CPU_LE_UNALIGN
291370b324cSopenharmony_ci  #define GetUi24hi_from32(p) ((UInt32)GetUi32(p) >> 8)
292370b324cSopenharmony_ci#else
293370b324cSopenharmony_ci  #define GetUi24hi_from32(p) ((p)[1] ^ ((UInt32)(p)[2] << 8) ^ ((UInt32)(p)[3] << 16))
294370b324cSopenharmony_ci#endif
295370b324cSopenharmony_ci
296370b324cSopenharmony_ci#define GetHeads_DECL(name) \
297370b324cSopenharmony_ci    static void GetHeads ## name(const Byte *p, UInt32 pos, \
298370b324cSopenharmony_ci      UInt32 *hash, UInt32 hashMask, UInt32 *heads, UInt32 numHeads, const UInt32 *crc)
299370b324cSopenharmony_ci
300370b324cSopenharmony_ci#define GetHeads_LOOP(v) \
301370b324cSopenharmony_ci    for (; numHeads != 0; numHeads--) { \
302370b324cSopenharmony_ci      const UInt32 value = (v); \
303370b324cSopenharmony_ci      p++; \
304370b324cSopenharmony_ci      *heads++ = pos - hash[value]; \
305370b324cSopenharmony_ci      hash[value] = pos++; }
306370b324cSopenharmony_ci
307370b324cSopenharmony_ci#define DEF_GetHeads2(name, v, action) \
308370b324cSopenharmony_ci    GetHeads_DECL(name) { action \
309370b324cSopenharmony_ci    GetHeads_LOOP(v) }
310370b324cSopenharmony_ci
311370b324cSopenharmony_ci#define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;)
312370b324cSopenharmony_ci
313370b324cSopenharmony_ciDEF_GetHeads2(2, GetUi16(p), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
314370b324cSopenharmony_ciDEF_GetHeads(3,  (crc[p[0]] ^ GetUi16(p + 1)) & hashMask)
315370b324cSopenharmony_ciDEF_GetHeads2(3b, GetUi16(p) ^ ((UInt32)(p)[2] << 16), UNUSED_VAR(hashMask); UNUSED_VAR(crc); )
316370b324cSopenharmony_ci// BT3 is not good for crc collisions for big hashMask values.
317370b324cSopenharmony_ci
318370b324cSopenharmony_ci/*
319370b324cSopenharmony_ciGetHeads_DECL(3b)
320370b324cSopenharmony_ci{
321370b324cSopenharmony_ci  UNUSED_VAR(hashMask);
322370b324cSopenharmony_ci  UNUSED_VAR(crc);
323370b324cSopenharmony_ci  {
324370b324cSopenharmony_ci  const Byte *pLim = p + numHeads;
325370b324cSopenharmony_ci  if (numHeads == 0)
326370b324cSopenharmony_ci    return;
327370b324cSopenharmony_ci  pLim--;
328370b324cSopenharmony_ci  while (p < pLim)
329370b324cSopenharmony_ci  {
330370b324cSopenharmony_ci    UInt32 v1 = GetUi32(p);
331370b324cSopenharmony_ci    UInt32 v0 = v1 & 0xFFFFFF;
332370b324cSopenharmony_ci    UInt32 h0, h1;
333370b324cSopenharmony_ci    p += 2;
334370b324cSopenharmony_ci    v1 >>= 8;
335370b324cSopenharmony_ci    h0 = hash[v0]; hash[v0] = pos; heads[0] = pos - h0; pos++;
336370b324cSopenharmony_ci    h1 = hash[v1]; hash[v1] = pos; heads[1] = pos - h1; pos++;
337370b324cSopenharmony_ci    heads += 2;
338370b324cSopenharmony_ci  }
339370b324cSopenharmony_ci  if (p == pLim)
340370b324cSopenharmony_ci  {
341370b324cSopenharmony_ci    UInt32 v0 = GetUi16(p) ^ ((UInt32)(p)[2] << 16);
342370b324cSopenharmony_ci    *heads = pos - hash[v0];
343370b324cSopenharmony_ci    hash[v0] = pos;
344370b324cSopenharmony_ci  }
345370b324cSopenharmony_ci  }
346370b324cSopenharmony_ci}
347370b324cSopenharmony_ci*/
348370b324cSopenharmony_ci
349370b324cSopenharmony_ci/*
350370b324cSopenharmony_ciGetHeads_DECL(4)
351370b324cSopenharmony_ci{
352370b324cSopenharmony_ci  unsigned sh = 0;
353370b324cSopenharmony_ci  UNUSED_VAR(crc)
354370b324cSopenharmony_ci  while ((hashMask & 0x80000000) == 0)
355370b324cSopenharmony_ci  {
356370b324cSopenharmony_ci    hashMask <<= 1;
357370b324cSopenharmony_ci    sh++;
358370b324cSopenharmony_ci  }
359370b324cSopenharmony_ci  GetHeads_LOOP((GetUi32(p) * 0xa54a1) >> sh)
360370b324cSopenharmony_ci}
361370b324cSopenharmony_ci#define GetHeads4b GetHeads4
362370b324cSopenharmony_ci*/
363370b324cSopenharmony_ci
364370b324cSopenharmony_ci#define USE_GetHeads_LOCAL_CRC
365370b324cSopenharmony_ci
366370b324cSopenharmony_ci#ifdef USE_GetHeads_LOCAL_CRC
367370b324cSopenharmony_ci
368370b324cSopenharmony_ciGetHeads_DECL(4)
369370b324cSopenharmony_ci{
370370b324cSopenharmony_ci  UInt32 crc0[256];
371370b324cSopenharmony_ci  UInt32 crc1[256];
372370b324cSopenharmony_ci  {
373370b324cSopenharmony_ci    unsigned i;
374370b324cSopenharmony_ci    for (i = 0; i < 256; i++)
375370b324cSopenharmony_ci    {
376370b324cSopenharmony_ci      UInt32 v = crc[i];
377370b324cSopenharmony_ci      crc0[i] = v & hashMask;
378370b324cSopenharmony_ci      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
379370b324cSopenharmony_ci      // crc1[i] = rotlFixed(v, 8) & hashMask;
380370b324cSopenharmony_ci    }
381370b324cSopenharmony_ci  }
382370b324cSopenharmony_ci  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ (UInt32)GetUi16(p+1))
383370b324cSopenharmony_ci}
384370b324cSopenharmony_ci
385370b324cSopenharmony_ciGetHeads_DECL(4b)
386370b324cSopenharmony_ci{
387370b324cSopenharmony_ci  UInt32 crc0[256];
388370b324cSopenharmony_ci  {
389370b324cSopenharmony_ci    unsigned i;
390370b324cSopenharmony_ci    for (i = 0; i < 256; i++)
391370b324cSopenharmony_ci      crc0[i] = crc[i] & hashMask;
392370b324cSopenharmony_ci  }
393370b324cSopenharmony_ci  GetHeads_LOOP(crc0[p[0]] ^ GetUi24hi_from32(p))
394370b324cSopenharmony_ci}
395370b324cSopenharmony_ci
396370b324cSopenharmony_ciGetHeads_DECL(5)
397370b324cSopenharmony_ci{
398370b324cSopenharmony_ci  UInt32 crc0[256];
399370b324cSopenharmony_ci  UInt32 crc1[256];
400370b324cSopenharmony_ci  UInt32 crc2[256];
401370b324cSopenharmony_ci  {
402370b324cSopenharmony_ci    unsigned i;
403370b324cSopenharmony_ci    for (i = 0; i < 256; i++)
404370b324cSopenharmony_ci    {
405370b324cSopenharmony_ci      UInt32 v = crc[i];
406370b324cSopenharmony_ci      crc0[i] = v & hashMask;
407370b324cSopenharmony_ci      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
408370b324cSopenharmony_ci      crc2[i] = (v << kLzHash_CrcShift_2) & hashMask;
409370b324cSopenharmony_ci    }
410370b324cSopenharmony_ci  }
411370b324cSopenharmony_ci  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[3]] ^ crc2[p[4]] ^ (UInt32)GetUi16(p+1))
412370b324cSopenharmony_ci}
413370b324cSopenharmony_ci
414370b324cSopenharmony_ciGetHeads_DECL(5b)
415370b324cSopenharmony_ci{
416370b324cSopenharmony_ci  UInt32 crc0[256];
417370b324cSopenharmony_ci  UInt32 crc1[256];
418370b324cSopenharmony_ci  {
419370b324cSopenharmony_ci    unsigned i;
420370b324cSopenharmony_ci    for (i = 0; i < 256; i++)
421370b324cSopenharmony_ci    {
422370b324cSopenharmony_ci      UInt32 v = crc[i];
423370b324cSopenharmony_ci      crc0[i] = v & hashMask;
424370b324cSopenharmony_ci      crc1[i] = (v << kLzHash_CrcShift_1) & hashMask;
425370b324cSopenharmony_ci    }
426370b324cSopenharmony_ci  }
427370b324cSopenharmony_ci  GetHeads_LOOP(crc0[p[0]] ^ crc1[p[4]] ^ GetUi24hi_from32(p))
428370b324cSopenharmony_ci}
429370b324cSopenharmony_ci
430370b324cSopenharmony_ci#else
431370b324cSopenharmony_ci
432370b324cSopenharmony_ciDEF_GetHeads(4,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (UInt32)GetUi16(p+1)) & hashMask)
433370b324cSopenharmony_ciDEF_GetHeads(4b, (crc[p[0]] ^ GetUi24hi_from32(p)) & hashMask)
434370b324cSopenharmony_ciDEF_GetHeads(5,  (crc[p[0]] ^ (crc[p[3]] << kLzHash_CrcShift_1) ^ (crc[p[4]] << kLzHash_CrcShift_2) ^ (UInt32)GetUi16(p + 1)) & hashMask)
435370b324cSopenharmony_ciDEF_GetHeads(5b, (crc[p[0]] ^ (crc[p[4]] << kLzHash_CrcShift_1) ^ GetUi24hi_from32(p)) & hashMask)
436370b324cSopenharmony_ci
437370b324cSopenharmony_ci#endif
438370b324cSopenharmony_ci
439370b324cSopenharmony_ci
440370b324cSopenharmony_cistatic void HashThreadFunc(CMatchFinderMt *mt)
441370b324cSopenharmony_ci{
442370b324cSopenharmony_ci  CMtSync *p = &mt->hashSync;
443370b324cSopenharmony_ci    PRF(printf("\nHashThreadFunc\n"));
444370b324cSopenharmony_ci
445370b324cSopenharmony_ci  for (;;)
446370b324cSopenharmony_ci  {
447370b324cSopenharmony_ci    UInt32 blockIndex = 0;
448370b324cSopenharmony_ci      PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart)\n"));
449370b324cSopenharmony_ci    Event_Wait(&p->canStart);
450370b324cSopenharmony_ci      PRF(printf("\nHashThreadFunc : Event_Wait(&p->canStart) : after \n"));
451370b324cSopenharmony_ci    if (p->exit)
452370b324cSopenharmony_ci    {
453370b324cSopenharmony_ci      PRF(printf("\nHashThreadFunc : exit \n"));
454370b324cSopenharmony_ci      return;
455370b324cSopenharmony_ci    }
456370b324cSopenharmony_ci
457370b324cSopenharmony_ci    MatchFinder_Init_HighHash(MF(mt));
458370b324cSopenharmony_ci
459370b324cSopenharmony_ci    for (;;)
460370b324cSopenharmony_ci    {
461370b324cSopenharmony_ci      PRF(printf("Hash thread block = %d pos = %d\n", (unsigned)blockIndex, mt->MatchFinder->pos));
462370b324cSopenharmony_ci
463370b324cSopenharmony_ci      {
464370b324cSopenharmony_ci        CMatchFinder *mf = MF(mt);
465370b324cSopenharmony_ci        if (MatchFinder_NeedMove(mf))
466370b324cSopenharmony_ci        {
467370b324cSopenharmony_ci          CriticalSection_Enter(&mt->btSync.cs);
468370b324cSopenharmony_ci          CriticalSection_Enter(&mt->hashSync.cs);
469370b324cSopenharmony_ci          {
470370b324cSopenharmony_ci            const Byte *beforePtr = Inline_MatchFinder_GetPointerToCurrentPos(mf);
471370b324cSopenharmony_ci            ptrdiff_t offset;
472370b324cSopenharmony_ci            MatchFinder_MoveBlock(mf);
473370b324cSopenharmony_ci            offset = beforePtr - Inline_MatchFinder_GetPointerToCurrentPos(mf);
474370b324cSopenharmony_ci            mt->pointerToCurPos -= offset;
475370b324cSopenharmony_ci            mt->buffer -= offset;
476370b324cSopenharmony_ci          }
477370b324cSopenharmony_ci          CriticalSection_Leave(&mt->hashSync.cs);
478370b324cSopenharmony_ci          CriticalSection_Leave(&mt->btSync.cs);
479370b324cSopenharmony_ci          continue;
480370b324cSopenharmony_ci        }
481370b324cSopenharmony_ci
482370b324cSopenharmony_ci        Semaphore_Wait(&p->freeSemaphore);
483370b324cSopenharmony_ci
484370b324cSopenharmony_ci        if (p->exit) // exit is unexpected here. But we check it here for some failure case
485370b324cSopenharmony_ci          return;
486370b324cSopenharmony_ci
487370b324cSopenharmony_ci        // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
488370b324cSopenharmony_ci        if (p->stopWriting)
489370b324cSopenharmony_ci          break;
490370b324cSopenharmony_ci
491370b324cSopenharmony_ci        MatchFinder_ReadIfRequired(mf);
492370b324cSopenharmony_ci        {
493370b324cSopenharmony_ci          UInt32 *heads = mt->hashBuf + GET_HASH_BLOCK_OFFSET(blockIndex++);
494370b324cSopenharmony_ci          UInt32 num = Inline_MatchFinder_GetNumAvailableBytes(mf);
495370b324cSopenharmony_ci          heads[0] = 2;
496370b324cSopenharmony_ci          heads[1] = num;
497370b324cSopenharmony_ci
498370b324cSopenharmony_ci          /* heads[1] contains the number of avail bytes:
499370b324cSopenharmony_ci             if (avail < mf->numHashBytes) :
500370b324cSopenharmony_ci             {
501370b324cSopenharmony_ci               it means that stream was finished
502370b324cSopenharmony_ci               HASH_THREAD and BT_TREAD must move position for heads[1] (avail) bytes.
503370b324cSopenharmony_ci               HASH_THREAD doesn't stop,
504370b324cSopenharmony_ci               HASH_THREAD fills only the header (2 numbers) for all next blocks:
505370b324cSopenharmony_ci               {2, NumHashBytes - 1}, {2,0}, {2,0}, ... , {2,0}
506370b324cSopenharmony_ci             }
507370b324cSopenharmony_ci             else
508370b324cSopenharmony_ci             {
509370b324cSopenharmony_ci               HASH_THREAD and BT_TREAD must move position for (heads[0] - 2) bytes;
510370b324cSopenharmony_ci             }
511370b324cSopenharmony_ci          */
512370b324cSopenharmony_ci
513370b324cSopenharmony_ci          if (num >= mf->numHashBytes)
514370b324cSopenharmony_ci          {
515370b324cSopenharmony_ci            num = num - mf->numHashBytes + 1;
516370b324cSopenharmony_ci            if (num > kMtHashBlockSize - 2)
517370b324cSopenharmony_ci              num = kMtHashBlockSize - 2;
518370b324cSopenharmony_ci
519370b324cSopenharmony_ci            if (mf->pos > (UInt32)kMtMaxValForNormalize - num)
520370b324cSopenharmony_ci            {
521370b324cSopenharmony_ci              const UInt32 subValue = (mf->pos - mf->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
522370b324cSopenharmony_ci              MatchFinder_REDUCE_OFFSETS(mf, subValue)
523370b324cSopenharmony_ci              MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, (size_t)mf->hashMask + 1);
524370b324cSopenharmony_ci            }
525370b324cSopenharmony_ci
526370b324cSopenharmony_ci            heads[0] = 2 + num;
527370b324cSopenharmony_ci            mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc);
528370b324cSopenharmony_ci          }
529370b324cSopenharmony_ci
530370b324cSopenharmony_ci          mf->pos += num;  // wrap over zero is allowed at the end of stream
531370b324cSopenharmony_ci          mf->buffer += num;
532370b324cSopenharmony_ci        }
533370b324cSopenharmony_ci      }
534370b324cSopenharmony_ci
535370b324cSopenharmony_ci      Semaphore_Release1(&p->filledSemaphore);
536370b324cSopenharmony_ci    } // for() processing end
537370b324cSopenharmony_ci
538370b324cSopenharmony_ci    // p->numBlocks_Sent = blockIndex;
539370b324cSopenharmony_ci    Event_Set(&p->wasStopped);
540370b324cSopenharmony_ci  } // for() thread end
541370b324cSopenharmony_ci}
542370b324cSopenharmony_ci
543370b324cSopenharmony_ci
544370b324cSopenharmony_ci
545370b324cSopenharmony_ci
546370b324cSopenharmony_ci// ---------- BT THREAD ----------
547370b324cSopenharmony_ci
548370b324cSopenharmony_ci/* we use one variable instead of two (cyclicBufferPos == pos) before CyclicBuf wrap.
549370b324cSopenharmony_ci   here we define fixed offset of (p->pos) from (p->cyclicBufferPos) */
550370b324cSopenharmony_ci#define CYC_TO_POS_OFFSET 0
551370b324cSopenharmony_ci// #define CYC_TO_POS_OFFSET 1 // for debug
552370b324cSopenharmony_ci
553370b324cSopenharmony_ci#define MFMT_GM_INLINE
554370b324cSopenharmony_ci
555370b324cSopenharmony_ci#ifdef MFMT_GM_INLINE
556370b324cSopenharmony_ci
557370b324cSopenharmony_ci/*
558370b324cSopenharmony_ci  we use size_t for (pos) instead of UInt32
559370b324cSopenharmony_ci  to eliminate "movsx" BUG in old MSVC x64 compiler.
560370b324cSopenharmony_ci*/
561370b324cSopenharmony_ci
562370b324cSopenharmony_ci
563370b324cSopenharmony_ciUInt32 * Z7_FASTCALL GetMatchesSpecN_2(const Byte *lenLimit, size_t pos, const Byte *cur, CLzRef *son,
564370b324cSopenharmony_ci    UInt32 _cutValue, UInt32 *d, size_t _maxLen, const UInt32 *hash, const UInt32 *limit, const UInt32 *size,
565370b324cSopenharmony_ci    size_t _cyclicBufferPos, UInt32 _cyclicBufferSize,
566370b324cSopenharmony_ci    UInt32 *posRes);
567370b324cSopenharmony_ci
568370b324cSopenharmony_ci#endif
569370b324cSopenharmony_ci
570370b324cSopenharmony_ci
571370b324cSopenharmony_cistatic void BtGetMatches(CMatchFinderMt *p, UInt32 *d)
572370b324cSopenharmony_ci{
573370b324cSopenharmony_ci  UInt32 numProcessed = 0;
574370b324cSopenharmony_ci  UInt32 curPos = 2;
575370b324cSopenharmony_ci
576370b324cSopenharmony_ci  /* GetMatchesSpec() functions don't create (len = 1)
577370b324cSopenharmony_ci     in [len, dist] match pairs, if (p->numHashBytes >= 2)
578370b324cSopenharmony_ci     Also we suppose here that (matchMaxLen >= 2).
579370b324cSopenharmony_ci     So the following code for (reserve) is not required
580370b324cSopenharmony_ci     UInt32 reserve = (p->matchMaxLen * 2);
581370b324cSopenharmony_ci     const UInt32 kNumHashBytes_Max = 5; // BT_HASH_BYTES_MAX
582370b324cSopenharmony_ci     if (reserve < kNumHashBytes_Max - 1)
583370b324cSopenharmony_ci        reserve = kNumHashBytes_Max - 1;
584370b324cSopenharmony_ci     const UInt32 limit = kMtBtBlockSize - (reserve);
585370b324cSopenharmony_ci  */
586370b324cSopenharmony_ci
587370b324cSopenharmony_ci  const UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2);
588370b324cSopenharmony_ci
589370b324cSopenharmony_ci  d[1] = p->hashNumAvail;
590370b324cSopenharmony_ci
591370b324cSopenharmony_ci  if (p->failure_BT)
592370b324cSopenharmony_ci  {
593370b324cSopenharmony_ci    // printf("\n == 1 BtGetMatches() p->failure_BT\n");
594370b324cSopenharmony_ci    d[0] = 0;
595370b324cSopenharmony_ci    // d[1] = 0;
596370b324cSopenharmony_ci    return;
597370b324cSopenharmony_ci  }
598370b324cSopenharmony_ci
599370b324cSopenharmony_ci  while (curPos < limit)
600370b324cSopenharmony_ci  {
601370b324cSopenharmony_ci    if (p->hashBufPos == p->hashBufPosLimit)
602370b324cSopenharmony_ci    {
603370b324cSopenharmony_ci      // MatchFinderMt_GetNextBlock_Hash(p);
604370b324cSopenharmony_ci      UInt32 avail;
605370b324cSopenharmony_ci      {
606370b324cSopenharmony_ci        const UInt32 bi = MtSync_GetNextBlock(&p->hashSync);
607370b324cSopenharmony_ci        const UInt32 k = GET_HASH_BLOCK_OFFSET(bi);
608370b324cSopenharmony_ci        const UInt32 *h = p->hashBuf + k;
609370b324cSopenharmony_ci        avail = h[1];
610370b324cSopenharmony_ci        p->hashBufPosLimit = k + h[0];
611370b324cSopenharmony_ci        p->hashNumAvail = avail;
612370b324cSopenharmony_ci        p->hashBufPos = k + 2;
613370b324cSopenharmony_ci      }
614370b324cSopenharmony_ci
615370b324cSopenharmony_ci      {
616370b324cSopenharmony_ci        /* we must prevent UInt32 overflow for avail total value,
617370b324cSopenharmony_ci           if avail was increased with new hash block */
618370b324cSopenharmony_ci        UInt32 availSum = numProcessed + avail;
619370b324cSopenharmony_ci        if (availSum < numProcessed)
620370b324cSopenharmony_ci          availSum = (UInt32)(Int32)-1;
621370b324cSopenharmony_ci        d[1] = availSum;
622370b324cSopenharmony_ci      }
623370b324cSopenharmony_ci
624370b324cSopenharmony_ci      if (avail >= p->numHashBytes)
625370b324cSopenharmony_ci        continue;
626370b324cSopenharmony_ci
627370b324cSopenharmony_ci      // if (p->hashBufPos != p->hashBufPosLimit) exit(1);
628370b324cSopenharmony_ci
629370b324cSopenharmony_ci      /* (avail < p->numHashBytes)
630370b324cSopenharmony_ci         It means that stream was finished.
631370b324cSopenharmony_ci         And (avail) - is a number of remaining bytes,
632370b324cSopenharmony_ci         we fill (d) for (avail) bytes for LZ_THREAD (receiver).
633370b324cSopenharmony_ci         but we don't update (p->pos) and (p->cyclicBufferPos) here in BT_THREAD */
634370b324cSopenharmony_ci
635370b324cSopenharmony_ci      /* here we suppose that we have space enough:
636370b324cSopenharmony_ci         (kMtBtBlockSize - curPos >= p->hashNumAvail) */
637370b324cSopenharmony_ci      p->hashNumAvail = 0;
638370b324cSopenharmony_ci      d[0] = curPos + avail;
639370b324cSopenharmony_ci      d += curPos;
640370b324cSopenharmony_ci      for (; avail != 0; avail--)
641370b324cSopenharmony_ci        *d++ = 0;
642370b324cSopenharmony_ci      return;
643370b324cSopenharmony_ci    }
644370b324cSopenharmony_ci    {
645370b324cSopenharmony_ci      UInt32 size = p->hashBufPosLimit - p->hashBufPos;
646370b324cSopenharmony_ci      UInt32 pos = p->pos;
647370b324cSopenharmony_ci      UInt32 cyclicBufferPos = p->cyclicBufferPos;
648370b324cSopenharmony_ci      UInt32 lenLimit = p->matchMaxLen;
649370b324cSopenharmony_ci      if (lenLimit >= p->hashNumAvail)
650370b324cSopenharmony_ci        lenLimit = p->hashNumAvail;
651370b324cSopenharmony_ci      {
652370b324cSopenharmony_ci        UInt32 size2 = p->hashNumAvail - lenLimit + 1;
653370b324cSopenharmony_ci        if (size2 < size)
654370b324cSopenharmony_ci          size = size2;
655370b324cSopenharmony_ci        size2 = p->cyclicBufferSize - cyclicBufferPos;
656370b324cSopenharmony_ci        if (size2 < size)
657370b324cSopenharmony_ci          size = size2;
658370b324cSopenharmony_ci      }
659370b324cSopenharmony_ci
660370b324cSopenharmony_ci      if (pos > (UInt32)kMtMaxValForNormalize - size)
661370b324cSopenharmony_ci      {
662370b324cSopenharmony_ci        const UInt32 subValue = (pos - p->cyclicBufferSize); // & ~(UInt32)(kNormalizeAlign - 1);
663370b324cSopenharmony_ci        pos -= subValue;
664370b324cSopenharmony_ci        p->pos = pos;
665370b324cSopenharmony_ci        MatchFinder_Normalize3(subValue, p->son, (size_t)p->cyclicBufferSize * 2);
666370b324cSopenharmony_ci      }
667370b324cSopenharmony_ci
668370b324cSopenharmony_ci      #ifndef MFMT_GM_INLINE
669370b324cSopenharmony_ci      while (curPos < limit && size-- != 0)
670370b324cSopenharmony_ci      {
671370b324cSopenharmony_ci        UInt32 *startDistances = d + curPos;
672370b324cSopenharmony_ci        UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++],
673370b324cSopenharmony_ci            pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue,
674370b324cSopenharmony_ci            startDistances + 1, p->numHashBytes - 1) - startDistances);
675370b324cSopenharmony_ci        *startDistances = num - 1;
676370b324cSopenharmony_ci        curPos += num;
677370b324cSopenharmony_ci        cyclicBufferPos++;
678370b324cSopenharmony_ci        pos++;
679370b324cSopenharmony_ci        p->buffer++;
680370b324cSopenharmony_ci      }
681370b324cSopenharmony_ci      #else
682370b324cSopenharmony_ci      {
683370b324cSopenharmony_ci        UInt32 posRes = pos;
684370b324cSopenharmony_ci        const UInt32 *d_end;
685370b324cSopenharmony_ci        {
686370b324cSopenharmony_ci          d_end = GetMatchesSpecN_2(
687370b324cSopenharmony_ci              p->buffer + lenLimit - 1,
688370b324cSopenharmony_ci              pos, p->buffer, p->son, p->cutValue, d + curPos,
689370b324cSopenharmony_ci              p->numHashBytes - 1, p->hashBuf + p->hashBufPos,
690370b324cSopenharmony_ci              d + limit, p->hashBuf + p->hashBufPos + size,
691370b324cSopenharmony_ci              cyclicBufferPos, p->cyclicBufferSize,
692370b324cSopenharmony_ci              &posRes);
693370b324cSopenharmony_ci        }
694370b324cSopenharmony_ci        {
695370b324cSopenharmony_ci          if (!d_end)
696370b324cSopenharmony_ci          {
697370b324cSopenharmony_ci            // printf("\n == 2 BtGetMatches() p->failure_BT\n");
698370b324cSopenharmony_ci            // internal data failure
699370b324cSopenharmony_ci            p->failure_BT = True;
700370b324cSopenharmony_ci            d[0] = 0;
701370b324cSopenharmony_ci            // d[1] = 0;
702370b324cSopenharmony_ci            return;
703370b324cSopenharmony_ci          }
704370b324cSopenharmony_ci        }
705370b324cSopenharmony_ci        curPos = (UInt32)(d_end - d);
706370b324cSopenharmony_ci        {
707370b324cSopenharmony_ci          const UInt32 processed = posRes - pos;
708370b324cSopenharmony_ci          pos = posRes;
709370b324cSopenharmony_ci          p->hashBufPos += processed;
710370b324cSopenharmony_ci          cyclicBufferPos += processed;
711370b324cSopenharmony_ci          p->buffer += processed;
712370b324cSopenharmony_ci        }
713370b324cSopenharmony_ci      }
714370b324cSopenharmony_ci      #endif
715370b324cSopenharmony_ci
716370b324cSopenharmony_ci      {
717370b324cSopenharmony_ci        const UInt32 processed = pos - p->pos;
718370b324cSopenharmony_ci        numProcessed += processed;
719370b324cSopenharmony_ci        p->hashNumAvail -= processed;
720370b324cSopenharmony_ci        p->pos = pos;
721370b324cSopenharmony_ci      }
722370b324cSopenharmony_ci      if (cyclicBufferPos == p->cyclicBufferSize)
723370b324cSopenharmony_ci        cyclicBufferPos = 0;
724370b324cSopenharmony_ci      p->cyclicBufferPos = cyclicBufferPos;
725370b324cSopenharmony_ci    }
726370b324cSopenharmony_ci  }
727370b324cSopenharmony_ci
728370b324cSopenharmony_ci  d[0] = curPos;
729370b324cSopenharmony_ci}
730370b324cSopenharmony_ci
731370b324cSopenharmony_ci
732370b324cSopenharmony_cistatic void BtFillBlock(CMatchFinderMt *p, UInt32 globalBlockIndex)
733370b324cSopenharmony_ci{
734370b324cSopenharmony_ci  CMtSync *sync = &p->hashSync;
735370b324cSopenharmony_ci
736370b324cSopenharmony_ci  BUFFER_MUST_BE_UNLOCKED(sync)
737370b324cSopenharmony_ci
738370b324cSopenharmony_ci  if (!sync->needStart)
739370b324cSopenharmony_ci  {
740370b324cSopenharmony_ci    LOCK_BUFFER(sync)
741370b324cSopenharmony_ci  }
742370b324cSopenharmony_ci
743370b324cSopenharmony_ci  BtGetMatches(p, p->btBuf + GET_BT_BLOCK_OFFSET(globalBlockIndex));
744370b324cSopenharmony_ci
745370b324cSopenharmony_ci  /* We suppose that we have called GetNextBlock() from start.
746370b324cSopenharmony_ci     So buffer is LOCKED */
747370b324cSopenharmony_ci
748370b324cSopenharmony_ci  UNLOCK_BUFFER(sync)
749370b324cSopenharmony_ci}
750370b324cSopenharmony_ci
751370b324cSopenharmony_ci
752370b324cSopenharmony_ciZ7_NO_INLINE
753370b324cSopenharmony_cistatic void BtThreadFunc(CMatchFinderMt *mt)
754370b324cSopenharmony_ci{
755370b324cSopenharmony_ci  CMtSync *p = &mt->btSync;
756370b324cSopenharmony_ci  for (;;)
757370b324cSopenharmony_ci  {
758370b324cSopenharmony_ci    UInt32 blockIndex = 0;
759370b324cSopenharmony_ci    Event_Wait(&p->canStart);
760370b324cSopenharmony_ci
761370b324cSopenharmony_ci    for (;;)
762370b324cSopenharmony_ci    {
763370b324cSopenharmony_ci        PRF(printf("  BT thread block = %d  pos = %d\n", (unsigned)blockIndex, mt->pos));
764370b324cSopenharmony_ci      /* (p->exit == true) is possible after (p->canStart) at first loop iteration
765370b324cSopenharmony_ci         and is unexpected after more Wait(freeSemaphore) iterations */
766370b324cSopenharmony_ci      if (p->exit)
767370b324cSopenharmony_ci        return;
768370b324cSopenharmony_ci
769370b324cSopenharmony_ci      Semaphore_Wait(&p->freeSemaphore);
770370b324cSopenharmony_ci
771370b324cSopenharmony_ci      // for faster stop : we check (p->stopWriting) after Wait(freeSemaphore)
772370b324cSopenharmony_ci      if (p->stopWriting)
773370b324cSopenharmony_ci        break;
774370b324cSopenharmony_ci
775370b324cSopenharmony_ci      BtFillBlock(mt, blockIndex++);
776370b324cSopenharmony_ci
777370b324cSopenharmony_ci      Semaphore_Release1(&p->filledSemaphore);
778370b324cSopenharmony_ci    }
779370b324cSopenharmony_ci
780370b324cSopenharmony_ci    // we stop HASH_THREAD here
781370b324cSopenharmony_ci    MtSync_StopWriting(&mt->hashSync);
782370b324cSopenharmony_ci
783370b324cSopenharmony_ci    // p->numBlocks_Sent = blockIndex;
784370b324cSopenharmony_ci    Event_Set(&p->wasStopped);
785370b324cSopenharmony_ci  }
786370b324cSopenharmony_ci}
787370b324cSopenharmony_ci
788370b324cSopenharmony_ci
789370b324cSopenharmony_civoid MatchFinderMt_Construct(CMatchFinderMt *p)
790370b324cSopenharmony_ci{
791370b324cSopenharmony_ci  p->hashBuf = NULL;
792370b324cSopenharmony_ci  MtSync_Construct(&p->hashSync);
793370b324cSopenharmony_ci  MtSync_Construct(&p->btSync);
794370b324cSopenharmony_ci}
795370b324cSopenharmony_ci
796370b324cSopenharmony_cistatic void MatchFinderMt_FreeMem(CMatchFinderMt *p, ISzAllocPtr alloc)
797370b324cSopenharmony_ci{
798370b324cSopenharmony_ci  ISzAlloc_Free(alloc, p->hashBuf);
799370b324cSopenharmony_ci  p->hashBuf = NULL;
800370b324cSopenharmony_ci}
801370b324cSopenharmony_ci
802370b324cSopenharmony_civoid MatchFinderMt_Destruct(CMatchFinderMt *p, ISzAllocPtr alloc)
803370b324cSopenharmony_ci{
804370b324cSopenharmony_ci  /*
805370b324cSopenharmony_ci     HASH_THREAD can use CriticalSection(s) btSync.cs and hashSync.cs.
806370b324cSopenharmony_ci     So we must be sure that HASH_THREAD will not use CriticalSection(s)
807370b324cSopenharmony_ci     after deleting CriticalSection here.
808370b324cSopenharmony_ci
809370b324cSopenharmony_ci     we call ReleaseStream(p)
810370b324cSopenharmony_ci       that calls StopWriting(btSync)
811370b324cSopenharmony_ci         that calls StopWriting(hashSync), if it's required to stop HASH_THREAD.
812370b324cSopenharmony_ci     after StopWriting() it's safe to destruct MtSync(s) in any order */
813370b324cSopenharmony_ci
814370b324cSopenharmony_ci  MatchFinderMt_ReleaseStream(p);
815370b324cSopenharmony_ci
816370b324cSopenharmony_ci  MtSync_Destruct(&p->btSync);
817370b324cSopenharmony_ci  MtSync_Destruct(&p->hashSync);
818370b324cSopenharmony_ci
819370b324cSopenharmony_ci  LOG_ITER(
820370b324cSopenharmony_ci  printf("\nTree %9d * %7d iter = %9d = sum  :  bytes = %9d\n",
821370b324cSopenharmony_ci      (UInt32)(g_NumIters_Tree / 1000),
822370b324cSopenharmony_ci      (UInt32)(((UInt64)g_NumIters_Loop * 1000) / (g_NumIters_Tree + 1)),
823370b324cSopenharmony_ci      (UInt32)(g_NumIters_Loop / 1000),
824370b324cSopenharmony_ci      (UInt32)(g_NumIters_Bytes / 1000)
825370b324cSopenharmony_ci      ));
826370b324cSopenharmony_ci
827370b324cSopenharmony_ci  MatchFinderMt_FreeMem(p, alloc);
828370b324cSopenharmony_ci}
829370b324cSopenharmony_ci
830370b324cSopenharmony_ci
831370b324cSopenharmony_ci#define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks)
832370b324cSopenharmony_ci#define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks)
833370b324cSopenharmony_ci
834370b324cSopenharmony_ci
835370b324cSopenharmony_cistatic THREAD_FUNC_DECL HashThreadFunc2(void *p) { HashThreadFunc((CMatchFinderMt *)p);  return 0; }
836370b324cSopenharmony_cistatic THREAD_FUNC_DECL BtThreadFunc2(void *p)
837370b324cSopenharmony_ci{
838370b324cSopenharmony_ci  Byte allocaDummy[0x180];
839370b324cSopenharmony_ci  unsigned i = 0;
840370b324cSopenharmony_ci  for (i = 0; i < 16; i++)
841370b324cSopenharmony_ci    allocaDummy[i] = (Byte)0;
842370b324cSopenharmony_ci  if (allocaDummy[0] == 0)
843370b324cSopenharmony_ci    BtThreadFunc((CMatchFinderMt *)p);
844370b324cSopenharmony_ci  return 0;
845370b324cSopenharmony_ci}
846370b324cSopenharmony_ci
847370b324cSopenharmony_ci
848370b324cSopenharmony_ciSRes MatchFinderMt_Create(CMatchFinderMt *p, UInt32 historySize, UInt32 keepAddBufferBefore,
849370b324cSopenharmony_ci    UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAllocPtr alloc)
850370b324cSopenharmony_ci{
851370b324cSopenharmony_ci  CMatchFinder *mf = MF(p);
852370b324cSopenharmony_ci  p->historySize = historySize;
853370b324cSopenharmony_ci  if (kMtBtBlockSize <= matchMaxLen * 4)
854370b324cSopenharmony_ci    return SZ_ERROR_PARAM;
855370b324cSopenharmony_ci  if (!p->hashBuf)
856370b324cSopenharmony_ci  {
857370b324cSopenharmony_ci    p->hashBuf = (UInt32 *)ISzAlloc_Alloc(alloc, ((size_t)kHashBufferSize + (size_t)kBtBufferSize) * sizeof(UInt32));
858370b324cSopenharmony_ci    if (!p->hashBuf)
859370b324cSopenharmony_ci      return SZ_ERROR_MEM;
860370b324cSopenharmony_ci    p->btBuf = p->hashBuf + kHashBufferSize;
861370b324cSopenharmony_ci  }
862370b324cSopenharmony_ci  keepAddBufferBefore += (kHashBufferSize + kBtBufferSize);
863370b324cSopenharmony_ci  keepAddBufferAfter += kMtHashBlockSize;
864370b324cSopenharmony_ci  if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc))
865370b324cSopenharmony_ci    return SZ_ERROR_MEM;
866370b324cSopenharmony_ci
867370b324cSopenharmony_ci  RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p))
868370b324cSopenharmony_ci  RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p))
869370b324cSopenharmony_ci  return SZ_OK;
870370b324cSopenharmony_ci}
871370b324cSopenharmony_ci
872370b324cSopenharmony_ci
873370b324cSopenharmony_ciSRes MatchFinderMt_InitMt(CMatchFinderMt *p)
874370b324cSopenharmony_ci{
875370b324cSopenharmony_ci  RINOK(MtSync_Init(&p->hashSync, kMtHashNumBlocks))
876370b324cSopenharmony_ci  return MtSync_Init(&p->btSync, kMtBtNumBlocks);
877370b324cSopenharmony_ci}
878370b324cSopenharmony_ci
879370b324cSopenharmony_ci
880370b324cSopenharmony_cistatic void MatchFinderMt_Init(CMatchFinderMt *p)
881370b324cSopenharmony_ci{
882370b324cSopenharmony_ci  CMatchFinder *mf = MF(p);
883370b324cSopenharmony_ci
884370b324cSopenharmony_ci  p->btBufPos =
885370b324cSopenharmony_ci  p->btBufPosLimit = NULL;
886370b324cSopenharmony_ci  p->hashBufPos =
887370b324cSopenharmony_ci  p->hashBufPosLimit = 0;
888370b324cSopenharmony_ci  p->hashNumAvail = 0; // 21.03
889370b324cSopenharmony_ci
890370b324cSopenharmony_ci  p->failure_BT = False;
891370b324cSopenharmony_ci
892370b324cSopenharmony_ci  /* Init without data reading. We don't want to read data in this thread */
893370b324cSopenharmony_ci  MatchFinder_Init_4(mf);
894370b324cSopenharmony_ci
895370b324cSopenharmony_ci  MatchFinder_Init_LowHash(mf);
896370b324cSopenharmony_ci
897370b324cSopenharmony_ci  p->pointerToCurPos = Inline_MatchFinder_GetPointerToCurrentPos(mf);
898370b324cSopenharmony_ci  p->btNumAvailBytes = 0;
899370b324cSopenharmony_ci  p->failure_LZ_BT = False;
900370b324cSopenharmony_ci  // p->failure_LZ_LZ = False;
901370b324cSopenharmony_ci
902370b324cSopenharmony_ci  p->lzPos =
903370b324cSopenharmony_ci      1; // optimal smallest value
904370b324cSopenharmony_ci      // 0; // for debug: ignores match to start
905370b324cSopenharmony_ci      // kNormalizeAlign; // for debug
906370b324cSopenharmony_ci
907370b324cSopenharmony_ci  p->hash = mf->hash;
908370b324cSopenharmony_ci  p->fixedHashSize = mf->fixedHashSize;
909370b324cSopenharmony_ci  // p->hash4Mask = mf->hash4Mask;
910370b324cSopenharmony_ci  p->crc = mf->crc;
911370b324cSopenharmony_ci  // memcpy(p->crc, mf->crc, sizeof(mf->crc));
912370b324cSopenharmony_ci
913370b324cSopenharmony_ci  p->son = mf->son;
914370b324cSopenharmony_ci  p->matchMaxLen = mf->matchMaxLen;
915370b324cSopenharmony_ci  p->numHashBytes = mf->numHashBytes;
916370b324cSopenharmony_ci
917370b324cSopenharmony_ci  /* (mf->pos) and (mf->streamPos) were already initialized to 1 in MatchFinder_Init_4() */
918370b324cSopenharmony_ci  // mf->streamPos = mf->pos = 1; // optimal smallest value
919370b324cSopenharmony_ci      // 0; // for debug: ignores match to start
920370b324cSopenharmony_ci      // kNormalizeAlign; // for debug
921370b324cSopenharmony_ci
922370b324cSopenharmony_ci  /* we must init (p->pos = mf->pos) for BT, because
923370b324cSopenharmony_ci     BT code needs (p->pos == delta_value_for_empty_hash_record == mf->pos) */
924370b324cSopenharmony_ci  p->pos = mf->pos; // do not change it
925370b324cSopenharmony_ci
926370b324cSopenharmony_ci  p->cyclicBufferPos = (p->pos - CYC_TO_POS_OFFSET);
927370b324cSopenharmony_ci  p->cyclicBufferSize = mf->cyclicBufferSize;
928370b324cSopenharmony_ci  p->buffer = mf->buffer;
929370b324cSopenharmony_ci  p->cutValue = mf->cutValue;
930370b324cSopenharmony_ci  // p->son[0] = p->son[1] = 0; // unused: to init skipped record for speculated accesses.
931370b324cSopenharmony_ci}
932370b324cSopenharmony_ci
933370b324cSopenharmony_ci
934370b324cSopenharmony_ci/* ReleaseStream is required to finish multithreading */
935370b324cSopenharmony_civoid MatchFinderMt_ReleaseStream(CMatchFinderMt *p)
936370b324cSopenharmony_ci{
937370b324cSopenharmony_ci  // Sleep(1); // for debug
938370b324cSopenharmony_ci  MtSync_StopWriting(&p->btSync);
939370b324cSopenharmony_ci  // Sleep(200); // for debug
940370b324cSopenharmony_ci  /* p->MatchFinder->ReleaseStream(); */
941370b324cSopenharmony_ci}
942370b324cSopenharmony_ci
943370b324cSopenharmony_ci
944370b324cSopenharmony_ciZ7_NO_INLINE
945370b324cSopenharmony_cistatic UInt32 MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt *p)
946370b324cSopenharmony_ci{
947370b324cSopenharmony_ci  if (p->failure_LZ_BT)
948370b324cSopenharmony_ci    p->btBufPos = p->failureBuf;
949370b324cSopenharmony_ci  else
950370b324cSopenharmony_ci  {
951370b324cSopenharmony_ci    const UInt32 bi = MtSync_GetNextBlock(&p->btSync);
952370b324cSopenharmony_ci    const UInt32 *bt = p->btBuf + GET_BT_BLOCK_OFFSET(bi);
953370b324cSopenharmony_ci    {
954370b324cSopenharmony_ci      const UInt32 numItems = bt[0];
955370b324cSopenharmony_ci      p->btBufPosLimit = bt + numItems;
956370b324cSopenharmony_ci      p->btNumAvailBytes = bt[1];
957370b324cSopenharmony_ci      p->btBufPos = bt + 2;
958370b324cSopenharmony_ci      if (numItems < 2 || numItems > kMtBtBlockSize)
959370b324cSopenharmony_ci      {
960370b324cSopenharmony_ci        p->failureBuf[0] = 0;
961370b324cSopenharmony_ci        p->btBufPos = p->failureBuf;
962370b324cSopenharmony_ci        p->btBufPosLimit = p->failureBuf + 1;
963370b324cSopenharmony_ci        p->failure_LZ_BT = True;
964370b324cSopenharmony_ci        // p->btNumAvailBytes = 0;
965370b324cSopenharmony_ci        /* we don't want to decrease AvailBytes, that was load before.
966370b324cSopenharmony_ci            that can be unxepected for the code that have loaded anopther value before */
967370b324cSopenharmony_ci      }
968370b324cSopenharmony_ci    }
969370b324cSopenharmony_ci
970370b324cSopenharmony_ci    if (p->lzPos >= (UInt32)kMtMaxValForNormalize - (UInt32)kMtBtBlockSize)
971370b324cSopenharmony_ci    {
972370b324cSopenharmony_ci      /* we don't check (lzPos) over exact avail bytes in (btBuf).
973370b324cSopenharmony_ci         (fixedHashSize) is small, so normalization is fast */
974370b324cSopenharmony_ci      const UInt32 subValue = (p->lzPos - p->historySize - 1); // & ~(UInt32)(kNormalizeAlign - 1);
975370b324cSopenharmony_ci      p->lzPos -= subValue;
976370b324cSopenharmony_ci      MatchFinder_Normalize3(subValue, p->hash, p->fixedHashSize);
977370b324cSopenharmony_ci    }
978370b324cSopenharmony_ci  }
979370b324cSopenharmony_ci  return p->btNumAvailBytes;
980370b324cSopenharmony_ci}
981370b324cSopenharmony_ci
982370b324cSopenharmony_ci
983370b324cSopenharmony_ci
984370b324cSopenharmony_cistatic const Byte * MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt *p)
985370b324cSopenharmony_ci{
986370b324cSopenharmony_ci  return p->pointerToCurPos;
987370b324cSopenharmony_ci}
988370b324cSopenharmony_ci
989370b324cSopenharmony_ci
990370b324cSopenharmony_ci#define GET_NEXT_BLOCK_IF_REQUIRED if (p->btBufPos == p->btBufPosLimit) MatchFinderMt_GetNextBlock_Bt(p);
991370b324cSopenharmony_ci
992370b324cSopenharmony_ci
993370b324cSopenharmony_cistatic UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt *p)
994370b324cSopenharmony_ci{
995370b324cSopenharmony_ci  if (p->btBufPos != p->btBufPosLimit)
996370b324cSopenharmony_ci    return p->btNumAvailBytes;
997370b324cSopenharmony_ci  return MatchFinderMt_GetNextBlock_Bt(p);
998370b324cSopenharmony_ci}
999370b324cSopenharmony_ci
1000370b324cSopenharmony_ci
1001370b324cSopenharmony_ci// #define CHECK_FAILURE_LZ(_match_, _pos_) if (_match_ >= _pos_) { p->failure_LZ_LZ = True;  return d; }
1002370b324cSopenharmony_ci#define CHECK_FAILURE_LZ(_match_, _pos_)
1003370b324cSopenharmony_ci
1004370b324cSopenharmony_cistatic UInt32 * MixMatches2(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1005370b324cSopenharmony_ci{
1006370b324cSopenharmony_ci  UInt32 h2, c2;
1007370b324cSopenharmony_ci  UInt32 *hash = p->hash;
1008370b324cSopenharmony_ci  const Byte *cur = p->pointerToCurPos;
1009370b324cSopenharmony_ci  const UInt32 m = p->lzPos;
1010370b324cSopenharmony_ci  MT_HASH2_CALC
1011370b324cSopenharmony_ci
1012370b324cSopenharmony_ci  c2 = hash[h2];
1013370b324cSopenharmony_ci  hash[h2] = m;
1014370b324cSopenharmony_ci
1015370b324cSopenharmony_ci  if (c2 >= matchMinPos)
1016370b324cSopenharmony_ci  {
1017370b324cSopenharmony_ci    CHECK_FAILURE_LZ(c2, m)
1018370b324cSopenharmony_ci    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1019370b324cSopenharmony_ci    {
1020370b324cSopenharmony_ci      *d++ = 2;
1021370b324cSopenharmony_ci      *d++ = m - c2 - 1;
1022370b324cSopenharmony_ci    }
1023370b324cSopenharmony_ci  }
1024370b324cSopenharmony_ci
1025370b324cSopenharmony_ci  return d;
1026370b324cSopenharmony_ci}
1027370b324cSopenharmony_ci
1028370b324cSopenharmony_cistatic UInt32 * MixMatches3(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1029370b324cSopenharmony_ci{
1030370b324cSopenharmony_ci  UInt32 h2, h3, c2, c3;
1031370b324cSopenharmony_ci  UInt32 *hash = p->hash;
1032370b324cSopenharmony_ci  const Byte *cur = p->pointerToCurPos;
1033370b324cSopenharmony_ci  const UInt32 m = p->lzPos;
1034370b324cSopenharmony_ci  MT_HASH3_CALC
1035370b324cSopenharmony_ci
1036370b324cSopenharmony_ci  c2 = hash[h2];
1037370b324cSopenharmony_ci  c3 = (hash + kFix3HashSize)[h3];
1038370b324cSopenharmony_ci
1039370b324cSopenharmony_ci  hash[h2] = m;
1040370b324cSopenharmony_ci  (hash + kFix3HashSize)[h3] = m;
1041370b324cSopenharmony_ci
1042370b324cSopenharmony_ci  if (c2 >= matchMinPos)
1043370b324cSopenharmony_ci  {
1044370b324cSopenharmony_ci    CHECK_FAILURE_LZ(c2, m)
1045370b324cSopenharmony_ci    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1046370b324cSopenharmony_ci    {
1047370b324cSopenharmony_ci      d[1] = m - c2 - 1;
1048370b324cSopenharmony_ci      if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1049370b324cSopenharmony_ci      {
1050370b324cSopenharmony_ci        d[0] = 3;
1051370b324cSopenharmony_ci        return d + 2;
1052370b324cSopenharmony_ci      }
1053370b324cSopenharmony_ci      d[0] = 2;
1054370b324cSopenharmony_ci      d += 2;
1055370b324cSopenharmony_ci    }
1056370b324cSopenharmony_ci  }
1057370b324cSopenharmony_ci
1058370b324cSopenharmony_ci  if (c3 >= matchMinPos)
1059370b324cSopenharmony_ci  {
1060370b324cSopenharmony_ci    CHECK_FAILURE_LZ(c3, m)
1061370b324cSopenharmony_ci    if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1062370b324cSopenharmony_ci    {
1063370b324cSopenharmony_ci      *d++ = 3;
1064370b324cSopenharmony_ci      *d++ = m - c3 - 1;
1065370b324cSopenharmony_ci    }
1066370b324cSopenharmony_ci  }
1067370b324cSopenharmony_ci
1068370b324cSopenharmony_ci  return d;
1069370b324cSopenharmony_ci}
1070370b324cSopenharmony_ci
1071370b324cSopenharmony_ci
1072370b324cSopenharmony_ci#define INCREASE_LZ_POS p->lzPos++; p->pointerToCurPos++;
1073370b324cSopenharmony_ci
1074370b324cSopenharmony_ci/*
1075370b324cSopenharmony_cistatic
1076370b324cSopenharmony_ciUInt32* MatchFinderMt_GetMatches_Bt4(CMatchFinderMt *p, UInt32 *d)
1077370b324cSopenharmony_ci{
1078370b324cSopenharmony_ci  const UInt32 *bt = p->btBufPos;
1079370b324cSopenharmony_ci  const UInt32 len = *bt++;
1080370b324cSopenharmony_ci  const UInt32 *btLim = bt + len;
1081370b324cSopenharmony_ci  UInt32 matchMinPos;
1082370b324cSopenharmony_ci  UInt32 avail = p->btNumAvailBytes - 1;
1083370b324cSopenharmony_ci  p->btBufPos = btLim;
1084370b324cSopenharmony_ci
1085370b324cSopenharmony_ci  {
1086370b324cSopenharmony_ci    p->btNumAvailBytes = avail;
1087370b324cSopenharmony_ci
1088370b324cSopenharmony_ci    #define BT_HASH_BYTES_MAX 5
1089370b324cSopenharmony_ci
1090370b324cSopenharmony_ci    matchMinPos = p->lzPos;
1091370b324cSopenharmony_ci
1092370b324cSopenharmony_ci    if (len != 0)
1093370b324cSopenharmony_ci      matchMinPos -= bt[1];
1094370b324cSopenharmony_ci    else if (avail < (BT_HASH_BYTES_MAX - 1) - 1)
1095370b324cSopenharmony_ci    {
1096370b324cSopenharmony_ci      INCREASE_LZ_POS
1097370b324cSopenharmony_ci      return d;
1098370b324cSopenharmony_ci    }
1099370b324cSopenharmony_ci    else
1100370b324cSopenharmony_ci    {
1101370b324cSopenharmony_ci      const UInt32 hs = p->historySize;
1102370b324cSopenharmony_ci      if (matchMinPos > hs)
1103370b324cSopenharmony_ci        matchMinPos -= hs;
1104370b324cSopenharmony_ci      else
1105370b324cSopenharmony_ci        matchMinPos = 1;
1106370b324cSopenharmony_ci    }
1107370b324cSopenharmony_ci  }
1108370b324cSopenharmony_ci
1109370b324cSopenharmony_ci  for (;;)
1110370b324cSopenharmony_ci  {
1111370b324cSopenharmony_ci
1112370b324cSopenharmony_ci  UInt32 h2, h3, c2, c3;
1113370b324cSopenharmony_ci  UInt32 *hash = p->hash;
1114370b324cSopenharmony_ci  const Byte *cur = p->pointerToCurPos;
1115370b324cSopenharmony_ci  UInt32 m = p->lzPos;
1116370b324cSopenharmony_ci  MT_HASH3_CALC
1117370b324cSopenharmony_ci
1118370b324cSopenharmony_ci  c2 = hash[h2];
1119370b324cSopenharmony_ci  c3 = (hash + kFix3HashSize)[h3];
1120370b324cSopenharmony_ci
1121370b324cSopenharmony_ci  hash[h2] = m;
1122370b324cSopenharmony_ci  (hash + kFix3HashSize)[h3] = m;
1123370b324cSopenharmony_ci
1124370b324cSopenharmony_ci  if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1125370b324cSopenharmony_ci  {
1126370b324cSopenharmony_ci    d[1] = m - c2 - 1;
1127370b324cSopenharmony_ci    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1128370b324cSopenharmony_ci    {
1129370b324cSopenharmony_ci      d[0] = 3;
1130370b324cSopenharmony_ci      d += 2;
1131370b324cSopenharmony_ci      break;
1132370b324cSopenharmony_ci    }
1133370b324cSopenharmony_ci    // else
1134370b324cSopenharmony_ci    {
1135370b324cSopenharmony_ci      d[0] = 2;
1136370b324cSopenharmony_ci      d += 2;
1137370b324cSopenharmony_ci    }
1138370b324cSopenharmony_ci  }
1139370b324cSopenharmony_ci  if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1140370b324cSopenharmony_ci  {
1141370b324cSopenharmony_ci    *d++ = 3;
1142370b324cSopenharmony_ci    *d++ = m - c3 - 1;
1143370b324cSopenharmony_ci  }
1144370b324cSopenharmony_ci  break;
1145370b324cSopenharmony_ci  }
1146370b324cSopenharmony_ci
1147370b324cSopenharmony_ci  if (len != 0)
1148370b324cSopenharmony_ci  {
1149370b324cSopenharmony_ci    do
1150370b324cSopenharmony_ci    {
1151370b324cSopenharmony_ci      const UInt32 v0 = bt[0];
1152370b324cSopenharmony_ci      const UInt32 v1 = bt[1];
1153370b324cSopenharmony_ci      bt += 2;
1154370b324cSopenharmony_ci      d[0] = v0;
1155370b324cSopenharmony_ci      d[1] = v1;
1156370b324cSopenharmony_ci      d += 2;
1157370b324cSopenharmony_ci    }
1158370b324cSopenharmony_ci    while (bt != btLim);
1159370b324cSopenharmony_ci  }
1160370b324cSopenharmony_ci  INCREASE_LZ_POS
1161370b324cSopenharmony_ci  return d;
1162370b324cSopenharmony_ci}
1163370b324cSopenharmony_ci*/
1164370b324cSopenharmony_ci
1165370b324cSopenharmony_ci
1166370b324cSopenharmony_cistatic UInt32 * MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *d)
1167370b324cSopenharmony_ci{
1168370b324cSopenharmony_ci  UInt32 h2, h3, /* h4, */ c2, c3 /* , c4 */;
1169370b324cSopenharmony_ci  UInt32 *hash = p->hash;
1170370b324cSopenharmony_ci  const Byte *cur = p->pointerToCurPos;
1171370b324cSopenharmony_ci  const UInt32 m = p->lzPos;
1172370b324cSopenharmony_ci  MT_HASH3_CALC
1173370b324cSopenharmony_ci  // MT_HASH4_CALC
1174370b324cSopenharmony_ci  c2 = hash[h2];
1175370b324cSopenharmony_ci  c3 = (hash + kFix3HashSize)[h3];
1176370b324cSopenharmony_ci  // c4 = (hash + kFix4HashSize)[h4];
1177370b324cSopenharmony_ci
1178370b324cSopenharmony_ci  hash[h2] = m;
1179370b324cSopenharmony_ci  (hash + kFix3HashSize)[h3] = m;
1180370b324cSopenharmony_ci  // (hash + kFix4HashSize)[h4] = m;
1181370b324cSopenharmony_ci
1182370b324cSopenharmony_ci  // #define BT5_USE_H2
1183370b324cSopenharmony_ci  // #ifdef BT5_USE_H2
1184370b324cSopenharmony_ci  if (c2 >= matchMinPos && cur[(ptrdiff_t)c2 - (ptrdiff_t)m] == cur[0])
1185370b324cSopenharmony_ci  {
1186370b324cSopenharmony_ci    d[1] = m - c2 - 1;
1187370b324cSopenharmony_ci    if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 2] == cur[2])
1188370b324cSopenharmony_ci    {
1189370b324cSopenharmony_ci      // d[0] = (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3]) ? 4 : 3;
1190370b324cSopenharmony_ci      // return d + 2;
1191370b324cSopenharmony_ci
1192370b324cSopenharmony_ci      if (cur[(ptrdiff_t)c2 - (ptrdiff_t)m + 3] == cur[3])
1193370b324cSopenharmony_ci      {
1194370b324cSopenharmony_ci        d[0] = 4;
1195370b324cSopenharmony_ci        return d + 2;
1196370b324cSopenharmony_ci      }
1197370b324cSopenharmony_ci      d[0] = 3;
1198370b324cSopenharmony_ci      d += 2;
1199370b324cSopenharmony_ci
1200370b324cSopenharmony_ci      #ifdef BT5_USE_H4
1201370b324cSopenharmony_ci      if (c4 >= matchMinPos)
1202370b324cSopenharmony_ci        if (
1203370b324cSopenharmony_ci          cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
1204370b324cSopenharmony_ci          cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
1205370b324cSopenharmony_ci          )
1206370b324cSopenharmony_ci      {
1207370b324cSopenharmony_ci        *d++ = 4;
1208370b324cSopenharmony_ci        *d++ = m - c4 - 1;
1209370b324cSopenharmony_ci      }
1210370b324cSopenharmony_ci      #endif
1211370b324cSopenharmony_ci      return d;
1212370b324cSopenharmony_ci    }
1213370b324cSopenharmony_ci    d[0] = 2;
1214370b324cSopenharmony_ci    d += 2;
1215370b324cSopenharmony_ci  }
1216370b324cSopenharmony_ci  // #endif
1217370b324cSopenharmony_ci
1218370b324cSopenharmony_ci  if (c3 >= matchMinPos && cur[(ptrdiff_t)c3 - (ptrdiff_t)m] == cur[0])
1219370b324cSopenharmony_ci  {
1220370b324cSopenharmony_ci    d[1] = m - c3 - 1;
1221370b324cSopenharmony_ci    if (cur[(ptrdiff_t)c3 - (ptrdiff_t)m + 3] == cur[3])
1222370b324cSopenharmony_ci    {
1223370b324cSopenharmony_ci      d[0] = 4;
1224370b324cSopenharmony_ci      return d + 2;
1225370b324cSopenharmony_ci    }
1226370b324cSopenharmony_ci    d[0] = 3;
1227370b324cSopenharmony_ci    d += 2;
1228370b324cSopenharmony_ci  }
1229370b324cSopenharmony_ci
1230370b324cSopenharmony_ci  #ifdef BT5_USE_H4
1231370b324cSopenharmony_ci  if (c4 >= matchMinPos)
1232370b324cSopenharmony_ci    if (
1233370b324cSopenharmony_ci      cur[(ptrdiff_t)c4 - (ptrdiff_t)m]     == cur[0] &&
1234370b324cSopenharmony_ci      cur[(ptrdiff_t)c4 - (ptrdiff_t)m + 3] == cur[3]
1235370b324cSopenharmony_ci      )
1236370b324cSopenharmony_ci    {
1237370b324cSopenharmony_ci      *d++ = 4;
1238370b324cSopenharmony_ci      *d++ = m - c4 - 1;
1239370b324cSopenharmony_ci    }
1240370b324cSopenharmony_ci  #endif
1241370b324cSopenharmony_ci
1242370b324cSopenharmony_ci  return d;
1243370b324cSopenharmony_ci}
1244370b324cSopenharmony_ci
1245370b324cSopenharmony_ci
1246370b324cSopenharmony_cistatic UInt32 * MatchFinderMt2_GetMatches(CMatchFinderMt *p, UInt32 *d)
1247370b324cSopenharmony_ci{
1248370b324cSopenharmony_ci  const UInt32 *bt = p->btBufPos;
1249370b324cSopenharmony_ci  const UInt32 len = *bt++;
1250370b324cSopenharmony_ci  const UInt32 *btLim = bt + len;
1251370b324cSopenharmony_ci  p->btBufPos = btLim;
1252370b324cSopenharmony_ci  p->btNumAvailBytes--;
1253370b324cSopenharmony_ci  INCREASE_LZ_POS
1254370b324cSopenharmony_ci  {
1255370b324cSopenharmony_ci    while (bt != btLim)
1256370b324cSopenharmony_ci    {
1257370b324cSopenharmony_ci      const UInt32 v0 = bt[0];
1258370b324cSopenharmony_ci      const UInt32 v1 = bt[1];
1259370b324cSopenharmony_ci      bt += 2;
1260370b324cSopenharmony_ci      d[0] = v0;
1261370b324cSopenharmony_ci      d[1] = v1;
1262370b324cSopenharmony_ci      d += 2;
1263370b324cSopenharmony_ci    }
1264370b324cSopenharmony_ci  }
1265370b324cSopenharmony_ci  return d;
1266370b324cSopenharmony_ci}
1267370b324cSopenharmony_ci
1268370b324cSopenharmony_ci
1269370b324cSopenharmony_ci
1270370b324cSopenharmony_cistatic UInt32 * MatchFinderMt_GetMatches(CMatchFinderMt *p, UInt32 *d)
1271370b324cSopenharmony_ci{
1272370b324cSopenharmony_ci  const UInt32 *bt = p->btBufPos;
1273370b324cSopenharmony_ci  UInt32 len = *bt++;
1274370b324cSopenharmony_ci  const UInt32 avail = p->btNumAvailBytes - 1;
1275370b324cSopenharmony_ci  p->btNumAvailBytes = avail;
1276370b324cSopenharmony_ci  p->btBufPos = bt + len;
1277370b324cSopenharmony_ci  if (len == 0)
1278370b324cSopenharmony_ci  {
1279370b324cSopenharmony_ci    #define BT_HASH_BYTES_MAX 5
1280370b324cSopenharmony_ci    if (avail >= (BT_HASH_BYTES_MAX - 1) - 1)
1281370b324cSopenharmony_ci    {
1282370b324cSopenharmony_ci      UInt32 m = p->lzPos;
1283370b324cSopenharmony_ci      if (m > p->historySize)
1284370b324cSopenharmony_ci        m -= p->historySize;
1285370b324cSopenharmony_ci      else
1286370b324cSopenharmony_ci        m = 1;
1287370b324cSopenharmony_ci      d = p->MixMatchesFunc(p, m, d);
1288370b324cSopenharmony_ci    }
1289370b324cSopenharmony_ci  }
1290370b324cSopenharmony_ci  else
1291370b324cSopenharmony_ci  {
1292370b324cSopenharmony_ci    /*
1293370b324cSopenharmony_ci      first match pair from BinTree: (match_len, match_dist),
1294370b324cSopenharmony_ci      (match_len >= numHashBytes).
1295370b324cSopenharmony_ci      MixMatchesFunc() inserts only hash matches that are nearer than (match_dist)
1296370b324cSopenharmony_ci    */
1297370b324cSopenharmony_ci    d = p->MixMatchesFunc(p, p->lzPos - bt[1], d);
1298370b324cSopenharmony_ci    // if (d) // check for failure
1299370b324cSopenharmony_ci    do
1300370b324cSopenharmony_ci    {
1301370b324cSopenharmony_ci      const UInt32 v0 = bt[0];
1302370b324cSopenharmony_ci      const UInt32 v1 = bt[1];
1303370b324cSopenharmony_ci      bt += 2;
1304370b324cSopenharmony_ci      d[0] = v0;
1305370b324cSopenharmony_ci      d[1] = v1;
1306370b324cSopenharmony_ci      d += 2;
1307370b324cSopenharmony_ci    }
1308370b324cSopenharmony_ci    while (len -= 2);
1309370b324cSopenharmony_ci  }
1310370b324cSopenharmony_ci  INCREASE_LZ_POS
1311370b324cSopenharmony_ci  return d;
1312370b324cSopenharmony_ci}
1313370b324cSopenharmony_ci
1314370b324cSopenharmony_ci#define SKIP_HEADER2_MT  do { GET_NEXT_BLOCK_IF_REQUIRED
1315370b324cSopenharmony_ci#define SKIP_HEADER_MT(n) SKIP_HEADER2_MT if (p->btNumAvailBytes-- >= (n)) { const Byte *cur = p->pointerToCurPos; UInt32 *hash = p->hash;
1316370b324cSopenharmony_ci#define SKIP_FOOTER_MT } INCREASE_LZ_POS p->btBufPos += (size_t)*p->btBufPos + 1; } while (--num != 0);
1317370b324cSopenharmony_ci
1318370b324cSopenharmony_cistatic void MatchFinderMt0_Skip(CMatchFinderMt *p, UInt32 num)
1319370b324cSopenharmony_ci{
1320370b324cSopenharmony_ci  SKIP_HEADER2_MT { p->btNumAvailBytes--;
1321370b324cSopenharmony_ci  SKIP_FOOTER_MT
1322370b324cSopenharmony_ci}
1323370b324cSopenharmony_ci
1324370b324cSopenharmony_cistatic void MatchFinderMt2_Skip(CMatchFinderMt *p, UInt32 num)
1325370b324cSopenharmony_ci{
1326370b324cSopenharmony_ci  SKIP_HEADER_MT(2)
1327370b324cSopenharmony_ci      UInt32 h2;
1328370b324cSopenharmony_ci      MT_HASH2_CALC
1329370b324cSopenharmony_ci      hash[h2] = p->lzPos;
1330370b324cSopenharmony_ci  SKIP_FOOTER_MT
1331370b324cSopenharmony_ci}
1332370b324cSopenharmony_ci
1333370b324cSopenharmony_cistatic void MatchFinderMt3_Skip(CMatchFinderMt *p, UInt32 num)
1334370b324cSopenharmony_ci{
1335370b324cSopenharmony_ci  SKIP_HEADER_MT(3)
1336370b324cSopenharmony_ci      UInt32 h2, h3;
1337370b324cSopenharmony_ci      MT_HASH3_CALC
1338370b324cSopenharmony_ci      (hash + kFix3HashSize)[h3] =
1339370b324cSopenharmony_ci      hash[                h2] =
1340370b324cSopenharmony_ci        p->lzPos;
1341370b324cSopenharmony_ci  SKIP_FOOTER_MT
1342370b324cSopenharmony_ci}
1343370b324cSopenharmony_ci
1344370b324cSopenharmony_ci/*
1345370b324cSopenharmony_ci// MatchFinderMt4_Skip() is similar to MatchFinderMt3_Skip().
1346370b324cSopenharmony_ci// The difference is that MatchFinderMt3_Skip() updates hash for last 3 bytes of stream.
1347370b324cSopenharmony_ci
1348370b324cSopenharmony_cistatic void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num)
1349370b324cSopenharmony_ci{
1350370b324cSopenharmony_ci  SKIP_HEADER_MT(4)
1351370b324cSopenharmony_ci      UInt32 h2, h3; // h4
1352370b324cSopenharmony_ci      MT_HASH3_CALC
1353370b324cSopenharmony_ci      // MT_HASH4_CALC
1354370b324cSopenharmony_ci      // (hash + kFix4HashSize)[h4] =
1355370b324cSopenharmony_ci      (hash + kFix3HashSize)[h3] =
1356370b324cSopenharmony_ci      hash[                h2] =
1357370b324cSopenharmony_ci        p->lzPos;
1358370b324cSopenharmony_ci  SKIP_FOOTER_MT
1359370b324cSopenharmony_ci}
1360370b324cSopenharmony_ci*/
1361370b324cSopenharmony_ci
1362370b324cSopenharmony_civoid MatchFinderMt_CreateVTable(CMatchFinderMt *p, IMatchFinder2 *vTable)
1363370b324cSopenharmony_ci{
1364370b324cSopenharmony_ci  vTable->Init = (Mf_Init_Func)MatchFinderMt_Init;
1365370b324cSopenharmony_ci  vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes;
1366370b324cSopenharmony_ci  vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos;
1367370b324cSopenharmony_ci  vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches;
1368370b324cSopenharmony_ci
1369370b324cSopenharmony_ci  switch (MF(p)->numHashBytes)
1370370b324cSopenharmony_ci  {
1371370b324cSopenharmony_ci    case 2:
1372370b324cSopenharmony_ci      p->GetHeadsFunc = GetHeads2;
1373370b324cSopenharmony_ci      p->MixMatchesFunc = (Mf_Mix_Matches)NULL;
1374370b324cSopenharmony_ci      vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip;
1375370b324cSopenharmony_ci      vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches;
1376370b324cSopenharmony_ci      break;
1377370b324cSopenharmony_ci    case 3:
1378370b324cSopenharmony_ci      p->GetHeadsFunc = MF(p)->bigHash ? GetHeads3b : GetHeads3;
1379370b324cSopenharmony_ci      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2;
1380370b324cSopenharmony_ci      vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip;
1381370b324cSopenharmony_ci      break;
1382370b324cSopenharmony_ci    case 4:
1383370b324cSopenharmony_ci      p->GetHeadsFunc = MF(p)->bigHash ? GetHeads4b : GetHeads4;
1384370b324cSopenharmony_ci
1385370b324cSopenharmony_ci      // it's fast inline version of GetMatches()
1386370b324cSopenharmony_ci      // vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches_Bt4;
1387370b324cSopenharmony_ci
1388370b324cSopenharmony_ci      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3;
1389370b324cSopenharmony_ci      vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip;
1390370b324cSopenharmony_ci      break;
1391370b324cSopenharmony_ci    default:
1392370b324cSopenharmony_ci      p->GetHeadsFunc = MF(p)->bigHash ? GetHeads5b : GetHeads5;
1393370b324cSopenharmony_ci      p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4;
1394370b324cSopenharmony_ci      vTable->Skip =
1395370b324cSopenharmony_ci          (Mf_Skip_Func)MatchFinderMt3_Skip;
1396370b324cSopenharmony_ci          // (Mf_Skip_Func)MatchFinderMt4_Skip;
1397370b324cSopenharmony_ci      break;
1398370b324cSopenharmony_ci  }
1399370b324cSopenharmony_ci}
1400370b324cSopenharmony_ci
1401370b324cSopenharmony_ci#undef RINOK_THREAD
1402370b324cSopenharmony_ci#undef PRF
1403370b324cSopenharmony_ci#undef MF
1404370b324cSopenharmony_ci#undef GetUi24hi_from32
1405370b324cSopenharmony_ci#undef LOCK_BUFFER
1406370b324cSopenharmony_ci#undef UNLOCK_BUFFER
1407