1 // Bench.cpp
2 
3 #include "StdAfx.h"
4 
5 #include "../../../../C/CpuArch.h"
6 
7 // #include <stdio.h>
8 
9 #ifndef _WIN32
10 
11 #define USE_POSIX_TIME
12 #define USE_POSIX_TIME2
13 #endif // _WIN32
14 
15 #ifdef USE_POSIX_TIME
16 #include <time.h>
17 #include <unistd.h>
18 #ifdef USE_POSIX_TIME2
19 #include <sys/time.h>
20 #include <sys/times.h>
21 #endif
22 #endif // USE_POSIX_TIME
23 
24 #ifdef _WIN32
25 #define USE_ALLOCA
26 #endif
27 
28 #ifdef USE_ALLOCA
29 #ifdef _WIN32
30 #include <malloc.h>
31 #else
32 #include <stdlib.h>
33 #endif
34 #endif
35 
36 #include "../../../../C/7zCrc.h"
37 #include "../../../../C/RotateDefs.h"
38 
39 #ifndef Z7_ST
40 #include "../../../Windows/Synchronization.h"
41 #include "../../../Windows/Thread.h"
42 #endif
43 
44 #include "../../../Windows/FileFind.h"
45 #include "../../../Windows/FileIO.h"
46 #include "../../../Windows/SystemInfo.h"
47 
48 #include "../../../Common/MyBuffer2.h"
49 #include "../../../Common/IntToString.h"
50 #include "../../../Common/StringConvert.h"
51 #include "../../../Common/StringToInt.h"
52 #include "../../../Common/Wildcard.h"
53 
54 #include "../../Common/MethodProps.h"
55 #include "../../Common/StreamObjects.h"
56 #include "../../Common/StreamUtils.h"
57 
58 #include "Bench.h"
59 
60 using namespace NWindows;
61 
62 #ifndef Z7_ST
63 static const UInt32 k_LZMA = 0x030101;
64 #endif
65 
66 static const UInt64 kComplexInCommands = (UInt64)1 <<
67   #ifdef UNDER_CE
68     31;
69   #else
70     34;
71   #endif
72 
73 static const UInt32 kComplexInMs = 4000;
74 
SetComplexCommandsMs(UInt32 complexInMs, bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)75 static void SetComplexCommandsMs(UInt32 complexInMs,
76     bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
77 {
78   complexInCommands = kComplexInCommands;
79   const UInt64 kMinFreq = (UInt64)1000000 * 4;
80   const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
81   if (cpuFreq < kMinFreq && !isSpecifiedFreq)
82     cpuFreq = kMinFreq;
83   if (cpuFreq < kMaxFreq || isSpecifiedFreq)
84   {
85     if (complexInMs != 0)
86       complexInCommands = complexInMs * cpuFreq / 1000;
87     else
88       complexInCommands = cpuFreq >> 2;
89   }
90 }
91 
92 // const UInt64 kBenchmarkUsageMult = 1000000; // for debug
93 static const unsigned kBenchmarkUsageMultBits = 16;
94 static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
95 
Benchmark_GetUsage_Percents(UInt64 usage)96 UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
97 {
98   return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
99 }
100 
101 static const unsigned kNumHashDictBits = 17;
102 static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
103 
104 static const unsigned kOldLzmaDictBits = 32;
105 
106 // static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
107 static const size_t kAdditionalSize = (size_t)1 << 16;
108 static const UInt32 kCompressedAdditionalSize = (1 << 10);
109 
110 static const UInt32 kMaxMethodPropSize = (1 << 6);
111 
112 
113 #define ALLOC_WITH_HRESULT(_buffer_, _size_) \
114   { (_buffer_)->Alloc(_size_); \
115   if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
116 
117 
118 class CBaseRandomGenerator
119 {
120   UInt32 A1;
121   UInt32 A2;
122   UInt32 Salt;
123 public:
CBaseRandomGenerator(UInt32 salt = 0)124   CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
Init()125   void Init() { A1 = 362436069; A2 = 521288629;}
126   Z7_FORCE_INLINE
GetRnd()127   UInt32 GetRnd()
128   {
129     return Salt ^
130     (
131       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
132       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
133     );
134   }
135 };
136 
137 
138 Z7_NO_INLINE
RandGen(Byte *buf, size_t size)139 static void RandGen(Byte *buf, size_t size)
140 {
141   CBaseRandomGenerator RG;
142   const size_t size4 = size & ~((size_t)3);
143   size_t i;
144   for (i = 0; i < size4; i += 4)
145   {
146     const UInt32 v = RG.GetRnd();
147     SetUi32(buf + i, v)
148   }
149   UInt32 v = RG.GetRnd();
150   for (; i < size; i++)
151   {
152     buf[i] = (Byte)v;
153     v >>= 8;
154   }
155 }
156 
157 
158 class CBenchRandomGenerator: public CMidAlignedBuffer
159 {
GetVal(UInt32 &res, unsigned numBits)160   static UInt32 GetVal(UInt32 &res, unsigned numBits)
161   {
162     UInt32 val = res & (((UInt32)1 << numBits) - 1);
163     res >>= numBits;
164     return val;
165   }
166 
GetLen(UInt32 &r)167   static UInt32 GetLen(UInt32 &r)
168   {
169     UInt32 len = GetVal(r, 2);
170     return GetVal(r, 1 + len);
171   }
172 
173 public:
174 
GenerateSimpleRandom(UInt32 salt)175   void GenerateSimpleRandom(UInt32 salt)
176   {
177     CBaseRandomGenerator rg(salt);
178     const size_t bufSize = Size();
179     Byte *buf = (Byte *)*this;
180     for (size_t i = 0; i < bufSize; i++)
181       buf[i] = (Byte)rg.GetRnd();
182   }
183 
GenerateLz(unsigned dictBits, UInt32 salt)184   void GenerateLz(unsigned dictBits, UInt32 salt)
185   {
186     CBaseRandomGenerator rg(salt);
187     size_t pos = 0;
188     size_t rep0 = 1;
189     const size_t bufSize = Size();
190     Byte *buf = (Byte *)*this;
191     unsigned posBits = 1;
192 
193     // printf("\n dictBits = %d\n", (UInt32)dictBits);
194     // printf("\n bufSize = 0x%p\n", (const void *)bufSize);
195 
196     while (pos < bufSize)
197     {
198       /*
199       if (pos >= ((UInt32)1 << 31))
200         printf(" %x\n", pos);
201       */
202       UInt32 r = rg.GetRnd();
203       if (GetVal(r, 1) == 0 || pos < 1024)
204         buf[pos++] = (Byte)(r & 0xFF);
205       else
206       {
207         UInt32 len;
208         len = 1 + GetLen(r);
209 
210         if (GetVal(r, 3) != 0)
211         {
212           len += GetLen(r);
213 
214           while (((size_t)1 << posBits) < pos)
215             posBits++;
216 
217           unsigned numBitsMax = dictBits;
218           if (numBitsMax > posBits)
219             numBitsMax = posBits;
220 
221           const unsigned kAddBits = 6;
222           unsigned numLogBits = 5;
223           if (numBitsMax <= (1 << 4) - 1 + kAddBits)
224             numLogBits = 4;
225 
226           for (;;)
227           {
228             const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
229             r = rg.GetRnd();
230             if (ppp > numBitsMax)
231               continue;
232             // rep0 = GetVal(r, ppp);
233             rep0 = r & (((size_t)1 << ppp) - 1);
234             if (rep0 < pos)
235               break;
236             r = rg.GetRnd();
237           }
238           rep0++;
239         }
240 
241         // len *= 300; // for debug
242         {
243           const size_t rem = bufSize - pos;
244           if (len > rem)
245             len = (UInt32)rem;
246         }
247         Byte *dest = buf + pos;
248         const Byte *src = dest - rep0;
249         pos += len;
250         for (UInt32 i = 0; i < len; i++)
251           *dest++ = *src++;
252       }
253     }
254     // printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
255   }
256 };
257 
258 
259 Z7_CLASS_IMP_NOQIB_1(
260   CBenchmarkInStream
261   , ISequentialInStream
262 )
263   const Byte *Data;
264   size_t Pos;
265   size_t Size;
266 public:
Init(const Byte *data, size_t size)267   void Init(const Byte *data, size_t size)
268   {
269     Data = data;
270     Size = size;
271     Pos = 0;
272   }
WasFinished() const273   bool WasFinished() const { return Pos == Size; }
274 };
275 
276 Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize))
277 {
278   const UInt32 kMaxBlockSize = (1 << 20);
279   if (size > kMaxBlockSize)
280     size = kMaxBlockSize;
281   const size_t remain = Size - Pos;
282   if (size > remain)
283     size = (UInt32)remain;
284 
285   if (size != 0)
286     memcpy(data, Data + Pos, size);
287 
288   Pos += size;
289   if (processedSize)
290     *processedSize = size;
291   return S_OK;
292 }
293 
294 
295 class CBenchmarkOutStream Z7_final:
296   public ISequentialOutStream,
297   public CMyUnknownImp,
298   public CMidAlignedBuffer
299 {
300   Z7_COM_UNKNOWN_IMP_0
301   Z7_IFACE_COM7_IMP(ISequentialOutStream)
302   // bool _overflow;
303 public:
304   size_t Pos;
305   bool RealCopy;
306   bool CalcCrc;
307   UInt32 Crc;
308 
309   // CBenchmarkOutStream(): _overflow(false) {}
310   void Init(bool realCopy, bool calcCrc)
311   {
312     Crc = CRC_INIT_VAL;
313     RealCopy = realCopy;
314     CalcCrc = calcCrc;
315     // _overflow = false;
316     Pos = 0;
317   }
318 
319   void InitCrc()
320   {
321     Crc = CRC_INIT_VAL;
322   }
323 
324   void Calc(const void *data, size_t size)
325   {
326     Crc = CrcUpdate(Crc, data, size);
327   }
328 
329   size_t GetPos() const { return Pos; }
330 
331   // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
332 };
333 
334 Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
335 {
336   size_t curSize = Size() - Pos;
337   if (curSize > size)
338     curSize = size;
339   if (curSize != 0)
340   {
341     if (RealCopy)
342       memcpy(((Byte *)*this) + Pos, data, curSize);
343     if (CalcCrc)
344       Calc(data, curSize);
345     Pos += curSize;
346   }
347   if (processedSize)
348     *processedSize = (UInt32)curSize;
349   if (curSize != size)
350   {
351     // _overflow = true;
352     return E_FAIL;
353   }
354   return S_OK;
355 }
356 
357 
358 Z7_CLASS_IMP_NOQIB_1(
359   CCrcOutStream
360   , ISequentialOutStream
361 )
362 public:
363   bool CalcCrc;
364   UInt32 Crc;
365   UInt64 Pos;
366 
367   CCrcOutStream(): CalcCrc(true) {}
368   void Init() { Crc = CRC_INIT_VAL; Pos = 0; }
369   void Calc(const void *data, size_t size)
370   {
371     Crc = CrcUpdate(Crc, data, size);
372   }
373 };
374 
375 Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
376 {
377   if (CalcCrc)
378     Calc(data, size);
379   Pos += size;
380   if (processedSize)
381     *processedSize = size;
382   return S_OK;
383 }
384 
385 // #include "../../../../C/My_sys_time.h"
386 
387 static UInt64 GetTimeCount()
388 {
389   #ifdef USE_POSIX_TIME
390   #ifdef USE_POSIX_TIME2
391   timeval v;
392   if (gettimeofday(&v, NULL) == 0)
393     return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec;
394   return (UInt64)time(NULL) * 1000000;
395   #else
396   return time(NULL);
397   #endif
398   #else
399   LARGE_INTEGER value;
400   if (::QueryPerformanceCounter(&value))
401     return (UInt64)value.QuadPart;
402   return GetTickCount();
403   #endif
404 }
405 
406 static UInt64 GetFreq()
407 {
408   #ifdef USE_POSIX_TIME
409   #ifdef USE_POSIX_TIME2
410   return 1000000;
411   #else
412   return 1;
413   #endif
414   #else
415   LARGE_INTEGER value;
416   if (::QueryPerformanceFrequency(&value))
417     return (UInt64)value.QuadPart;
418   return 1000;
419   #endif
420 }
421 
422 
423 #ifdef USE_POSIX_TIME
424 
425 struct CUserTime
426 {
427   UInt64 Sum;
428   clock_t Prev;
429 
430   void Init()
431   {
432     // Prev = clock();
433     Sum = 0;
434     Prev = 0;
435     Update();
436     Sum = 0;
437   }
438 
439   void Update()
440   {
441     tms t;
442     /* clock_t res = */ times(&t);
443     clock_t newVal = t.tms_utime + t.tms_stime;
444     Sum += (UInt64)(newVal - Prev);
445     Prev = newVal;
446 
447     /*
448     clock_t v = clock();
449     if (v != -1)
450     {
451       Sum += v - Prev;
452       Prev = v;
453     }
454     */
455   }
456   UInt64 GetUserTime()
457   {
458     Update();
459     return Sum;
460   }
461 };
462 
463 #else
464 
465 
466 struct CUserTime
467 {
468   bool UseTick;
469   DWORD Prev_Tick;
470   UInt64 Prev;
471   UInt64 Sum;
472 
473   void Init()
474   {
475     UseTick = false;
476     Prev_Tick = 0;
477     Prev = 0;
478     Sum = 0;
479     Update();
480     Sum = 0;
481   }
482   UInt64 GetUserTime()
483   {
484     Update();
485     return Sum;
486   }
487   void Update();
488 };
489 
490 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
491 
492 void CUserTime::Update()
493 {
494   DWORD new_Tick = GetTickCount();
495   FILETIME creationTime, exitTime, kernelTime, userTime;
496   if (!UseTick &&
497       #ifdef UNDER_CE
498         ::GetThreadTimes(::GetCurrentThread()
499       #else
500         ::GetProcessTimes(::GetCurrentProcess()
501       #endif
502       , &creationTime, &exitTime, &kernelTime, &userTime))
503   {
504     UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime);
505     Sum += newVal - Prev;
506     Prev = newVal;
507   }
508   else
509   {
510     UseTick = true;
511     Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000;
512   }
513   Prev_Tick = new_Tick;
514 }
515 
516 
517 #endif
518 
519 static UInt64 GetUserFreq()
520 {
521   #ifdef USE_POSIX_TIME
522   // return CLOCKS_PER_SEC;
523   return (UInt64)sysconf(_SC_CLK_TCK);
524   #else
525   return 10000000;
526   #endif
527 }
528 
529 class CBenchProgressStatus Z7_final
530 {
531   #ifndef Z7_ST
532   NSynchronization::CCriticalSection CS;
533   #endif
534 public:
535   HRESULT Res;
536   bool EncodeMode;
537   void SetResult(HRESULT res)
538   {
539     #ifndef Z7_ST
540     NSynchronization::CCriticalSectionLock lock(CS);
541     #endif
542     Res = res;
543   }
544   HRESULT GetResult()
545   {
546     #ifndef Z7_ST
547     NSynchronization::CCriticalSectionLock lock(CS);
548     #endif
549     return Res;
550   }
551 };
552 
553 struct CBenchInfoCalc
554 {
555   CBenchInfo BenchInfo;
556   CUserTime UserTime;
557 
558   void SetStartTime();
559   void SetFinishTime(CBenchInfo &dest);
560 };
561 
562 void CBenchInfoCalc::SetStartTime()
563 {
564   BenchInfo.GlobalFreq = GetFreq();
565   BenchInfo.UserFreq = GetUserFreq();
566   BenchInfo.GlobalTime = ::GetTimeCount();
567   BenchInfo.UserTime = 0;
568   UserTime.Init();
569 }
570 
571 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
572 {
573   dest = BenchInfo;
574   dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
575   dest.UserTime = UserTime.GetUserTime();
576 }
577 
578 class CBenchProgressInfo Z7_final:
579   public ICompressProgressInfo,
580   public CMyUnknownImp,
581   public CBenchInfoCalc
582 {
583   Z7_COM_UNKNOWN_IMP_0
584   Z7_IFACE_COM7_IMP(ICompressProgressInfo)
585 public:
586   CBenchProgressStatus *Status;
587   IBenchCallback *Callback;
588 
589   CBenchProgressInfo(): Callback(NULL) {}
590 };
591 
592 
593 Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize))
594 {
595   HRESULT res = Status->GetResult();
596   if (res != S_OK)
597     return res;
598   if (!Callback)
599     return res;
600 
601   /*
602   static UInt64 inSizePrev = 0;
603   static UInt64 outSizePrev = 0;
604   UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
605   if (inSize)   { val1 = *inSize;  delta1 = val1 - inSizePrev;  inSizePrev  = val1; }
606   if (outSize)  { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2;  }
607   UInt64 percents = delta2 * 1000;
608   if (delta1 != 0)
609     percents /= delta1;
610   printf("=== %7d %7d     %7d %7d  ratio = %4d\n",
611       (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
612       (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
613       (unsigned)percents);
614   */
615 
616   CBenchInfo info;
617   SetFinishTime(info);
618   if (Status->EncodeMode)
619   {
620     info.UnpackSize = BenchInfo.UnpackSize + *inSize;
621     info.PackSize = BenchInfo.PackSize + *outSize;
622     res = Callback->SetEncodeResult(info, false);
623   }
624   else
625   {
626     info.PackSize = BenchInfo.PackSize + *inSize;
627     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
628     res = Callback->SetDecodeResult(info, false);
629   }
630   if (res != S_OK)
631     Status->SetResult(res);
632   return res;
633 }
634 
635 static const unsigned kSubBits = 8;
636 
637 static unsigned GetLogSize(UInt64 size)
638 {
639   unsigned i = 0;
640   for (;;)
641   {
642     i++;  size >>= 1;  if (size == 0) break;
643   }
644   return i;
645 }
646 
647 
648 static UInt32 GetLogSize_Sub(UInt64 size)
649 {
650   if (size <= 1)
651     return 0;
652   const unsigned i = GetLogSize(size) - 1;
653   UInt32 v;
654   if (i <= kSubBits)
655     v = (UInt32)(size) << (kSubBits - i);
656   else
657     v = (UInt32)(size >> (i - kSubBits));
658   return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
659 }
660 
661 
662 static UInt64 Get_UInt64_from_double(double v)
663 {
664   const UInt64 kMaxVal = (UInt64)1 << 62;
665   if (v > (double)(Int64)kMaxVal)
666     return kMaxVal;
667   return (UInt64)v;
668 }
669 
670 static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d)
671 {
672   if (d == 0)
673     d = 1;
674   const double v =
675       (double)(Int64)m1 *
676       (double)(Int64)m2 /
677       (double)(Int64)d;
678   return Get_UInt64_from_double(v);
679   /*
680   unsigned n1 = GetLogSize(m1);
681   unsigned n2 = GetLogSize(m2);
682   while (n1 + n2 > 64)
683   {
684     if (n1 >= n2)
685     {
686       m1 >>= 1;
687       n1--;
688     }
689     else
690     {
691       m2 >>= 1;
692       n2--;
693     }
694     d >>= 1;
695   }
696 
697   if (d == 0)
698     d = 1;
699   return m1 * m2 / d;
700   */
701 }
702 
703 
704 UInt64 CBenchInfo::GetUsage() const
705 {
706   UInt64 userTime = UserTime;
707   UInt64 userFreq = UserFreq;
708   UInt64 globalTime = GlobalTime;
709   UInt64 globalFreq = GlobalFreq;
710 
711   if (userFreq == 0)
712     userFreq = 1;
713   if (globalTime == 0)
714     globalTime = 1;
715 
716   const double v =
717         ((double)(Int64)userTime / (double)(Int64)userFreq)
718       * ((double)(Int64)globalFreq / (double)(Int64)globalTime)
719       * (double)(Int64)kBenchmarkUsageMult;
720   return Get_UInt64_from_double(v);
721   /*
722   return MyMultDiv64(
723         MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq),
724         globalFreq, globalTime);
725   */
726 }
727 
728 
729 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
730 {
731   if (UserTime == 0)
732   {
733     return 0;
734     // userTime = 1;
735   }
736   UInt64 globalFreq = GlobalFreq;
737   if (globalFreq == 0)
738     globalFreq = 1;
739 
740   const double v =
741         ((double)(Int64)GlobalTime / (double)(Int64)globalFreq)
742       * ((double)(Int64)UserFreq  / (double)(Int64)UserTime)
743       * (double)(Int64)rating;
744   return Get_UInt64_from_double(v);
745   /*
746   return MyMultDiv64(
747         MyMultDiv64(rating, UserFreq, UserTime),
748         GlobalTime, globalFreq);
749   */
750 }
751 
752 
753 UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
754 {
755   return MyMultDiv64(numUnits, GlobalFreq, GlobalTime);
756 }
757 
758 static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity)
759 {
760   return complexity >= 0 ?
761       size * (UInt32)complexity :
762       size / (UInt32)(-complexity);
763 }
764 
765 struct CBenchProps
766 {
767   bool LzmaRatingMode;
768 
769   Int32 EncComplex;
770   Int32 DecComplexCompr;
771   Int32 DecComplexUnc;
772 
773   unsigned KeySize;
774 
775   CBenchProps():
776       LzmaRatingMode(false),
777       KeySize(0)
778     {}
779 
780   void SetLzmaCompexity();
781 
782   UInt64 GetNumCommands_Enc(UInt64 unpackSize) const
783   {
784     const UInt32 kMinSize = 100;
785     if (unpackSize < kMinSize)
786       unpackSize = kMinSize;
787     return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex);
788   }
789 
790   UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const
791   {
792     return
793         GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) +
794         GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc);
795   }
796 
797   UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const;
798   UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const;
799 };
800 
801 void CBenchProps::SetLzmaCompexity()
802 {
803   EncComplex = 1200;
804   DecComplexUnc = 4;
805   DecComplexCompr = 190;
806   LzmaRatingMode = true;
807 }
808 
809 UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const
810 {
811   if (dictSize < (1 << kBenchMinDicLogSize))
812     dictSize = (1 << kBenchMinDicLogSize);
813   Int32 encComplex = EncComplex;
814   if (LzmaRatingMode)
815   {
816     /*
817     for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
818     {
819       unsigned rr = GetLogSize_Sub(uu);
820       printf("\n%16I64x , log = %4x", uu, rr);
821       uu += 1;
822       uu += uu / 50;
823     }
824     */
825     // throw 1;
826     const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits);
827     encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
828   }
829   const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex);
830   return MyMultDiv64(numCommands, freq, elapsedTime);
831 }
832 
833 UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const
834 {
835   const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations;
836   return MyMultDiv64(numCommands, freq, elapsedTime);
837 }
838 
839 
840 
841 UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
842 {
843   CBenchProps props;
844   props.SetLzmaCompexity();
845   return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
846 }
847 
848 UInt64 CBenchInfo::GetRating_LzmaDec() const
849 {
850   CBenchProps props;
851   props.SetLzmaCompexity();
852   return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
853 }
854 
855 
856 #ifndef Z7_ST
857 
858 #define NUM_CPU_LEVELS_MAX 3
859 
860 struct CAffinityMode
861 {
862   unsigned NumBundleThreads;
863   unsigned NumLevels;
864   unsigned NumCoreThreads;
865   unsigned NumCores;
866   // unsigned DivideNum;
867   UInt32 Sizes[NUM_CPU_LEVELS_MAX];
868 
869   void SetLevels(unsigned numCores, unsigned numCoreThreads);
870   DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
871   bool NeedAffinity() const { return NumBundleThreads != 0; }
872 
873   WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
874   {
875     if (NeedAffinity())
876     {
877       CCpuSet cpuSet;
878       GetAffinityMask(bundleIndex, &cpuSet);
879       return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet);
880     }
881     return thread.Create(startAddress, parameter);
882   }
883 
884   CAffinityMode():
885     NumBundleThreads(0),
886     NumLevels(0),
887     NumCoreThreads(1)
888     // DivideNum(1)
889     {}
890 };
891 
892 void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads)
893 {
894   NumCores = numCores;
895   NumCoreThreads = numCoreThreads;
896   NumLevels = 0;
897   if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0)
898     return;
899   UInt32 c = numCores / numCoreThreads;
900   UInt32 c2 = 1;
901   while ((c & 1) == 0)
902   {
903     c >>= 1;
904     c2 <<= 1;
905   }
906   if (c2 != 1)
907     Sizes[NumLevels++] = c2;
908   if (c != 1)
909     Sizes[NumLevels++] = c;
910   if (numCoreThreads != 1)
911     Sizes[NumLevels++] = numCoreThreads;
912   if (NumLevels == 0)
913     Sizes[NumLevels++] = 1;
914 
915   /*
916   printf("\n Cores:");
917   for (unsigned i = 0; i < NumLevels; i++)
918   {
919     printf(" %d", Sizes[i]);
920   }
921   printf("\n");
922   */
923 }
924 
925 
926 DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const
927 {
928   CpuSet_Zero(cpuSet);
929 
930   if (NumLevels == 0)
931     return 0;
932 
933   // printf("\n%2d", bundleIndex);
934 
935   /*
936   UInt32 low = 0;
937   if (DivideNum != 1)
938   {
939     low = bundleIndex % DivideNum;
940     bundleIndex /= DivideNum;
941   }
942   */
943 
944   UInt32 numGroups = NumCores / NumBundleThreads;
945   UInt32 m = bundleIndex % numGroups;
946   UInt32 v = 0;
947   for (unsigned i = 0; i < NumLevels; i++)
948   {
949     UInt32 size = Sizes[i];
950     while ((size & 1) == 0)
951     {
952       v *= 2;
953       v |= (m & 1);
954       m >>= 1;
955       size >>= 1;
956     }
957     v *= size;
958     v += m % size;
959     m /= size;
960   }
961 
962   // UInt32 nb = NumBundleThreads / DivideNum;
963   UInt32 nb = NumBundleThreads;
964 
965   DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1;
966   // v += low;
967   mask <<= v;
968 
969   // printf(" %2d %8x \n ", v, (unsigned)mask);
970   #ifdef _WIN32
971     *cpuSet = mask;
972   #else
973   {
974     for (unsigned k = 0; k < nb; k++)
975       CpuSet_Set(cpuSet, v + k);
976   }
977   #endif
978 
979   return mask;
980 }
981 
982 
983 struct CBenchSyncCommon
984 {
985   bool ExitMode;
986   NSynchronization::CManualResetEvent StartEvent;
987 
988   CBenchSyncCommon(): ExitMode(false) {}
989 };
990 
991 #endif
992 
993 
994 
995 enum E_CheckCrcMode
996 {
997   k_CheckCrcMode_Never = 0,
998   k_CheckCrcMode_Always = 1,
999   k_CheckCrcMode_FirstPass = 2
1000 };
1001 
1002 class CEncoderInfo;
1003 
1004 class CEncoderInfo Z7_final
1005 {
1006   Z7_CLASS_NO_COPY(CEncoderInfo)
1007 
1008 public:
1009 
1010   #ifndef Z7_ST
1011   NWindows::CThread thread[2];
1012   NSynchronization::CManualResetEvent ReadyEvent;
1013   UInt32 NumDecoderSubThreads;
1014   CBenchSyncCommon *Common;
1015   UInt32 EncoderIndex;
1016   UInt32 NumEncoderInternalThreads;
1017   CAffinityMode AffinityMode;
1018   bool IsGlobalMtMode; // if more than one benchmark encoder threads
1019   #endif
1020 
1021   CMyComPtr<ICompressCoder> _encoder;
1022   CMyComPtr<ICompressFilter> _encoderFilter;
1023   CBenchProgressInfo *progressInfoSpec[2];
1024   CMyComPtr<ICompressProgressInfo> progressInfo[2];
1025   UInt64 NumIterations;
1026 
1027   UInt32 Salt;
1028 
1029   #ifdef USE_ALLOCA
1030   size_t AllocaSize;
1031   #endif
1032 
1033   unsigned KeySize;
1034   Byte _key[32];
1035   Byte _iv[16];
1036 
1037   HRESULT Set_Key_and_IV(ICryptoProperties *cp)
1038   {
1039     RINOK(cp->SetKey(_key, KeySize))
1040     return cp->SetInitVector(_iv, sizeof(_iv));
1041   }
1042 
1043   Byte _psw[16];
1044 
1045   bool CheckCrc_Enc;    /* = 1, if we want to check packed data crcs after each pass
1046                                 used for filter and usual coders */
1047   bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass
1048                                 used only for filter */
1049   E_CheckCrcMode CheckCrcMode_Dec;
1050 
1051   struct CDecoderInfo
1052   {
1053     CEncoderInfo *Encoder;
1054     UInt32 DecoderIndex;
1055     bool CallbackMode;
1056 
1057     #ifdef USE_ALLOCA
1058     size_t AllocaSize;
1059     #endif
1060   };
1061   CDecoderInfo decodersInfo[2];
1062 
1063   CMyComPtr<ICompressCoder> _decoders[2];
1064   CMyComPtr<ICompressFilter> _decoderFilter;
1065 
1066   HRESULT Results[2];
1067   CBenchmarkOutStream *outStreamSpec;
1068   CMyComPtr<ISequentialOutStream> outStream;
1069   IBenchCallback *callback;
1070   IBenchPrintCallback *printCallback;
1071   UInt32 crc;
1072   size_t kBufferSize;
1073   size_t compressedSize;
1074   const Byte *uncompressedDataPtr;
1075 
1076   const Byte *fileData;
1077   CBenchRandomGenerator rg;
1078 
1079   CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
1080 
1081   // CBenchmarkOutStream *propStreamSpec;
1082   Byte propsData[kMaxMethodPropSize];
1083   CBufPtrSeqOutStream *propStreamSpec;
1084   CMyComPtr<ISequentialOutStream> propStream;
1085 
1086   unsigned generateDictBits;
1087   COneMethodInfo _method;
1088 
1089   // for decode
1090   size_t _uncompressedDataSize;
1091 
1092   HRESULT Generate();
1093   HRESULT Encode();
1094   HRESULT Decode(UInt32 decoderIndex);
1095 
1096   CEncoderInfo():
1097     #ifndef Z7_ST
1098     Common(NULL),
1099     IsGlobalMtMode(true),
1100     #endif
1101     Salt(0),
1102     KeySize(0),
1103     CheckCrc_Enc(true),
1104     UseRealData_Enc(true),
1105     CheckCrcMode_Dec(k_CheckCrcMode_Always),
1106     outStreamSpec(NULL),
1107     callback(NULL),
1108     printCallback(NULL),
1109     fileData(NULL),
1110     propStreamSpec(NULL)
1111     {}
1112 
1113   #ifndef Z7_ST
1114 
1115   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
1116   {
1117     HRESULT res;
1118     CEncoderInfo *encoder = (CEncoderInfo *)param;
1119     try
1120     {
1121       #ifdef USE_ALLOCA
1122       alloca(encoder->AllocaSize);
1123       #endif
1124 
1125       res = encoder->Encode();
1126     }
1127     catch(...)
1128     {
1129       res = E_FAIL;
1130     }
1131     encoder->Results[0] = res;
1132     if (res != S_OK)
1133       encoder->progressInfoSpec[0]->Status->SetResult(res);
1134     encoder->ReadyEvent.Set();
1135     return THREAD_FUNC_RET_ZERO;
1136   }
1137 
1138   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
1139   {
1140     CDecoderInfo *decoder = (CDecoderInfo *)param;
1141 
1142     #ifdef USE_ALLOCA
1143     alloca(decoder->AllocaSize);
1144     #endif
1145 
1146     CEncoderInfo *encoder = decoder->Encoder;
1147     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
1148     return THREAD_FUNC_RET_ZERO;
1149   }
1150 
1151   HRESULT CreateEncoderThread()
1152   {
1153     WRes res = 0;
1154     if (!ReadyEvent.IsCreated())
1155       res = ReadyEvent.Create();
1156     if (res == 0)
1157       res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this,
1158           EncoderIndex);
1159     return HRESULT_FROM_WIN32(res);
1160   }
1161 
1162   HRESULT CreateDecoderThread(unsigned index, bool callbackMode
1163       #ifdef USE_ALLOCA
1164       , size_t allocaSize
1165       #endif
1166       )
1167   {
1168     CDecoderInfo &decoder = decodersInfo[index];
1169     decoder.DecoderIndex = index;
1170     decoder.Encoder = this;
1171 
1172     #ifdef USE_ALLOCA
1173     decoder.AllocaSize = allocaSize;
1174     #endif
1175 
1176     decoder.CallbackMode = callbackMode;
1177 
1178     WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder,
1179         // EncoderIndex * NumEncoderInternalThreads + index
1180         EncoderIndex
1181         );
1182 
1183     return HRESULT_FROM_WIN32(res);
1184   }
1185 
1186   #endif
1187 };
1188 
1189 
1190 
1191 
1192 static size_t GetBenchCompressedSize(size_t bufferSize)
1193 {
1194   return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
1195   // kBufferSize / 2;
1196 }
1197 
1198 
1199 HRESULT CEncoderInfo::Generate()
1200 {
1201   const COneMethodInfo &method = _method;
1202 
1203   // we need extra space, if input data is already compressed
1204   const size_t kCompressedBufferSize = _encoderFilter ?
1205       kBufferSize :
1206       GetBenchCompressedSize(kBufferSize);
1207 
1208   if (kCompressedBufferSize < kBufferSize)
1209     return E_FAIL;
1210 
1211   uncompressedDataPtr = fileData;
1212   if (fileData)
1213   {
1214     #if !defined(Z7_ST)
1215     if (IsGlobalMtMode)
1216     {
1217       /* we copy the data to local buffer of thread to eliminate
1218          using of shared buffer by different threads */
1219       ALLOC_WITH_HRESULT(&rg, kBufferSize)
1220       memcpy((Byte *)rg, fileData, kBufferSize);
1221       uncompressedDataPtr = (const Byte *)rg;
1222     }
1223     #endif
1224   }
1225   else
1226   {
1227     ALLOC_WITH_HRESULT(&rg, kBufferSize)
1228     // DWORD ttt = GetTickCount();
1229     if (generateDictBits == 0)
1230       rg.GenerateSimpleRandom(Salt);
1231     else
1232     {
1233       if (generateDictBits >= sizeof(size_t) * 8
1234           && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
1235         return E_INVALIDARG;
1236       rg.GenerateLz(generateDictBits, Salt);
1237       // return E_ABORT; // for debug
1238     }
1239     // printf("\n%d\n            ", GetTickCount() - ttt);
1240 
1241     crc = CrcCalc((const Byte *)rg, rg.Size());
1242     uncompressedDataPtr = (const Byte *)rg;
1243   }
1244 
1245   if (!outStream)
1246   {
1247     outStreamSpec = new CBenchmarkOutStream;
1248     outStream = outStreamSpec;
1249   }
1250 
1251   ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
1252 
1253   if (_encoderFilter)
1254   {
1255     /* we try to reduce the number of memcpy() in main encoding loop.
1256        so we copy data to temp buffers here */
1257     ALLOC_WITH_HRESULT(&rgCopy, kBufferSize)
1258     memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize);
1259     memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
1260   }
1261 
1262   if (!propStream)
1263   {
1264     propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
1265     propStream = propStreamSpec;
1266   }
1267   // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
1268   // propStreamSpec->Init(true, false);
1269   propStreamSpec->Init(propsData, sizeof(propsData));
1270 
1271 
1272   CMyComPtr<IUnknown> coder;
1273   if (_encoderFilter)
1274     coder = _encoderFilter;
1275   else
1276     coder = _encoder;
1277   {
1278     CMyComPtr<ICompressSetCoderProperties> scp;
1279     coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1280     if (scp)
1281     {
1282       const UInt64 reduceSize = kBufferSize;
1283 
1284       /* in posix new thread uses same affinity as parent thread,
1285          so we don't need to send affinity to coder in posix */
1286       UInt64 affMask;
1287       #if !defined(Z7_ST) && defined(_WIN32)
1288       {
1289         CCpuSet cpuSet;
1290         affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
1291       }
1292       #else
1293         affMask = 0;
1294       #endif
1295       // affMask <<= 3; // debug line: to test no affinity in coder;
1296       // affMask = 0;
1297 
1298       RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
1299     }
1300     else
1301     {
1302       if (method.AreThereNonOptionalProps())
1303         return E_INVALIDARG;
1304     }
1305 
1306     CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
1307     coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
1308     if (writeCoderProps)
1309     {
1310       RINOK(writeCoderProps->WriteCoderProperties(propStream))
1311     }
1312 
1313     {
1314       CMyComPtr<ICryptoSetPassword> sp;
1315       coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1316       if (sp)
1317       {
1318         RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1319 
1320         // we must call encoding one time to calculate password key for key cache.
1321         // it must be after WriteCoderProperties!
1322         Byte temp[16];
1323         memset(temp, 0, sizeof(temp));
1324 
1325         if (_encoderFilter)
1326         {
1327           _encoderFilter->Init();
1328           _encoderFilter->Filter(temp, sizeof(temp));
1329         }
1330         else
1331         {
1332           CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1333           CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1334           inStreamSpec->Init(temp, sizeof(temp));
1335 
1336           CCrcOutStream *crcStreamSpec = new CCrcOutStream;
1337           CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
1338           crcStreamSpec->Init();
1339 
1340           RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL))
1341         }
1342       }
1343     }
1344   }
1345 
1346   return S_OK;
1347 }
1348 
1349 
1350 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc)
1351 {
1352   while (size != 0)
1353   {
1354     UInt32 cur = crc ? 1 << 17 : 1 << 24;
1355     if (cur > size)
1356       cur = (UInt32)size;
1357     UInt32 processed = filter->Filter(data, cur);
1358     /* if (processed > size) (in AES filter), we must fill last block with zeros.
1359        but it is not important for benchmark. So we just copy that data without filtering.
1360        if (processed == 0) then filter can't process more  */
1361     if (processed > size || processed == 0)
1362       processed = (UInt32)size;
1363     if (crc)
1364       *crc = CrcUpdate(*crc, data, processed);
1365     data += processed;
1366     size -= processed;
1367   }
1368 }
1369 
1370 
1371 HRESULT CEncoderInfo::Encode()
1372 {
1373   // printf("\nCEncoderInfo::Generate\n");
1374 
1375   RINOK(Generate())
1376 
1377   // printf("\n2222\n");
1378 
1379   #ifndef Z7_ST
1380   if (Common)
1381   {
1382     Results[0] = S_OK;
1383     WRes wres = ReadyEvent.Set();
1384     if (wres == 0)
1385       wres = Common->StartEvent.Lock();
1386     if (wres != 0)
1387       return HRESULT_FROM_WIN32(wres);
1388     if (Common->ExitMode)
1389       return S_OK;
1390   }
1391   else
1392   #endif
1393   {
1394     CBenchProgressInfo *bpi = progressInfoSpec[0];
1395     bpi->SetStartTime();
1396   }
1397 
1398 
1399   CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
1400   bi.UnpackSize = 0;
1401   bi.PackSize = 0;
1402   CMyComPtr<ICryptoProperties> cp;
1403   CMyComPtr<IUnknown> coder;
1404   if (_encoderFilter)
1405     coder = _encoderFilter;
1406   else
1407     coder = _encoder;
1408   coder.QueryInterface(IID_ICryptoProperties, &cp);
1409   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1410   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1411 
1412   if (cp)
1413   {
1414     RINOK(Set_Key_and_IV(cp))
1415   }
1416 
1417   compressedSize = 0;
1418   if (_encoderFilter)
1419     compressedSize = kBufferSize;
1420 
1421   // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec;
1422   UInt64 prev = 0;
1423 
1424   const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF);
1425   const bool useCrc = (mask < NumIterations);
1426   bool crcPrev_defined = false;
1427   UInt32 crcPrev = 0;
1428 
1429   bool useRealData_Enc = UseRealData_Enc;
1430   bool data_Was_Changed = false;
1431   if (useRealData_Enc)
1432   {
1433     /* we want memcpy() for each iteration including first iteration.
1434        So results will be equal for different number of iterations */
1435     data_Was_Changed = true;
1436   }
1437 
1438   const UInt64 numIterations = NumIterations;
1439   UInt64 i = numIterations;
1440     // printCallback->NewLine();
1441 
1442   while (i != 0)
1443   {
1444     i--;
1445     if (printCallback && bi.UnpackSize - prev >= (1 << 26))
1446     {
1447       prev = bi.UnpackSize;
1448       RINOK(printCallback->CheckBreak())
1449     }
1450 
1451     /*
1452     CBenchInfo info;
1453     progressInfoSpec[0]->SetStartTime();
1454     */
1455 
1456     bool calcCrc = false;
1457     if (useCrc)
1458       calcCrc = (((UInt32)i & mask) == 0);
1459 
1460     if (_encoderFilter)
1461     {
1462       Byte *filterData = rgCopy;
1463       if (i == numIterations - 1 || calcCrc || useRealData_Enc)
1464       {
1465         filterData = (Byte *)*outStreamSpec;
1466         if (data_Was_Changed)
1467           memcpy(filterData, uncompressedDataPtr, kBufferSize);
1468         data_Was_Changed = true;
1469       }
1470       _encoderFilter->Init();
1471       if (calcCrc)
1472         outStreamSpec->InitCrc();
1473       My_FilterBench(_encoderFilter, filterData, kBufferSize,
1474           calcCrc ? &outStreamSpec->Crc : NULL);
1475     }
1476     else
1477     {
1478       outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations
1479       inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1480       RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]))
1481       if (!inStreamSpec->WasFinished())
1482         return E_FAIL;
1483       if (compressedSize != outStreamSpec->Pos)
1484       {
1485         if (compressedSize != 0)
1486           return E_FAIL;
1487         compressedSize = outStreamSpec->Pos;
1488       }
1489     }
1490 
1491     // outStreamSpec->Print();
1492 
1493     if (calcCrc)
1494     {
1495       const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc);
1496       if (crcPrev_defined && crcPrev != crc2)
1497         return E_FAIL;
1498       crcPrev = crc2;
1499       crcPrev_defined = true;
1500     }
1501 
1502     bi.UnpackSize += kBufferSize;
1503     bi.PackSize += compressedSize;
1504 
1505     /*
1506     {
1507       progressInfoSpec[0]->SetFinishTime(info);
1508       info.UnpackSize = 0;
1509       info.PackSize = 0;
1510       info.NumIterations = 1;
1511 
1512       info.UnpackSize = kBufferSize;
1513       info.PackSize = compressedSize;
1514       // printf("\n%7d\n", encoder.compressedSize);
1515 
1516       RINOK(callback->SetEncodeResult(info, true))
1517       printCallback->NewLine();
1518     }
1519     */
1520 
1521   }
1522 
1523   _encoder.Release();
1524   _encoderFilter.Release();
1525   return S_OK;
1526 }
1527 
1528 
1529 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1530 {
1531   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1532   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1533   CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1534   CMyComPtr<IUnknown> coder;
1535   if (_decoderFilter)
1536   {
1537     if (decoderIndex != 0)
1538       return E_FAIL;
1539     coder = _decoderFilter;
1540   }
1541   else
1542     coder = decoder;
1543 
1544   CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1545   coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1546   if (!setDecProps && propStreamSpec->GetPos() != 0)
1547     return E_FAIL;
1548 
1549   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1550   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1551 
1552   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1553   pi->BenchInfo.UnpackSize = 0;
1554   pi->BenchInfo.PackSize = 0;
1555 
1556   #ifndef Z7_ST
1557   {
1558     CMyComPtr<ICompressSetCoderMt> setCoderMt;
1559     coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1560     if (setCoderMt)
1561     {
1562       RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads))
1563     }
1564   }
1565   #endif
1566 
1567   CMyComPtr<ICompressSetCoderProperties> scp;
1568   coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1569   if (scp)
1570   {
1571     const UInt64 reduceSize = _uncompressedDataSize;
1572     RINOK(_method.SetCoderProps(scp, &reduceSize))
1573   }
1574 
1575   CMyComPtr<ICryptoProperties> cp;
1576   coder.QueryInterface(IID_ICryptoProperties, &cp);
1577 
1578   if (setDecProps)
1579   {
1580     RINOK(setDecProps->SetDecoderProperties2(
1581         /* (const Byte *)*propStreamSpec, */
1582         propsData,
1583         (UInt32)propStreamSpec->GetPos()))
1584   }
1585 
1586   {
1587     CMyComPtr<ICryptoSetPassword> sp;
1588     coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1589     if (sp)
1590     {
1591       RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1592     }
1593   }
1594 
1595   UInt64 prev = 0;
1596 
1597   if (cp)
1598   {
1599     RINOK(Set_Key_and_IV(cp))
1600   }
1601 
1602   CMyComPtr<ICompressSetFinishMode> setFinishMode;
1603 
1604   if (_decoderFilter)
1605   {
1606     if (compressedSize > rgCopy.Size())
1607       return E_FAIL;
1608   }
1609   else
1610   {
1611     decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode);
1612   }
1613 
1614   const UInt64 numIterations = NumIterations;
1615   const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec;
1616 
1617   for (UInt64 i = 0; i < numIterations; i++)
1618   {
1619     if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26))
1620     {
1621       RINOK(printCallback->CheckBreak())
1622       prev = pi->BenchInfo.UnpackSize;
1623     }
1624 
1625     const UInt64 outSize = kBufferSize;
1626     bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never);
1627 
1628     crcOutStreamSpec->Init();
1629 
1630     if (_decoderFilter)
1631     {
1632       Byte *filterData = (Byte *)*outStreamSpec;
1633       if (calcCrc)
1634       {
1635         calcCrc = (i == 0);
1636         if (checkCrcMode == k_CheckCrcMode_Always)
1637         {
1638           calcCrc = true;
1639           memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1640           filterData = rgCopy;
1641         }
1642       }
1643       _decoderFilter->Init();
1644       My_FilterBench(_decoderFilter, filterData, compressedSize,
1645           calcCrc ? &crcOutStreamSpec->Crc : NULL);
1646     }
1647     else
1648     {
1649       crcOutStreamSpec->CalcCrc = calcCrc;
1650       inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1651 
1652       if (setFinishMode)
1653       {
1654         RINOK(setFinishMode->SetFinishMode(BoolToUInt(true)))
1655       }
1656 
1657       RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex]))
1658 
1659       if (setFinishMode)
1660       {
1661         if (!inStreamSpec->WasFinished())
1662           return S_FALSE;
1663 
1664         CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize;
1665         decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize);
1666 
1667         if (getInStreamProcessedSize)
1668         {
1669           UInt64 processed;
1670           RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed))
1671           if (processed != compressedSize)
1672             return S_FALSE;
1673         }
1674       }
1675 
1676       if (crcOutStreamSpec->Pos != outSize)
1677         return S_FALSE;
1678     }
1679 
1680     if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1681       return S_FALSE;
1682 
1683     pi->BenchInfo.UnpackSize += kBufferSize;
1684     pi->BenchInfo.PackSize += compressedSize;
1685   }
1686 
1687   decoder.Release();
1688   _decoderFilter.Release();
1689   return S_OK;
1690 }
1691 
1692 
1693 static const UInt32 kNumThreadsMax = (1 << 12);
1694 
1695 struct CBenchEncoders
1696 {
1697   CEncoderInfo *encoders;
1698   CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
1699   ~CBenchEncoders() { delete []encoders; }
1700 };
1701 
1702 
1703 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1704 {
1705   if (numCommands < (1 << 4))
1706     numCommands = (1 << 4);
1707   UInt64 res = complexInCommands / numCommands;
1708   return (res == 0 ? 1 : res);
1709 }
1710 
1711 
1712 
1713 #ifndef Z7_ST
1714 
1715 // ---------- CBenchThreadsFlusher ----------
1716 
1717 struct CBenchThreadsFlusher
1718 {
1719   CBenchEncoders *EncodersSpec;
1720   CBenchSyncCommon Common;
1721   unsigned NumThreads;
1722   bool NeedClose;
1723 
1724   CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1725 
1726   ~CBenchThreadsFlusher()
1727   {
1728     StartAndWait(true);
1729   }
1730 
1731   WRes StartAndWait(bool exitMode = false);
1732 };
1733 
1734 
1735 WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1736 {
1737   if (!NeedClose)
1738     return 0;
1739 
1740   Common.ExitMode = exitMode;
1741   WRes res = Common.StartEvent.Set();
1742 
1743   for (unsigned i = 0; i < NumThreads; i++)
1744   {
1745     NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1746     if (t.IsCreated())
1747     {
1748       WRes res2 = t.Wait_Close();
1749       if (res == 0)
1750         res = res2;
1751     }
1752   }
1753   NeedClose = false;
1754   return res;
1755 }
1756 
1757 #endif // Z7_ST
1758 
1759 
1760 
1761 static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue)
1762 {
1763   for (size_t i = 0; i < size; i++)
1764   {
1765     data[i] = (Byte)startValue;
1766     startValue++;
1767   }
1768 }
1769 
1770 
1771 
1772 static HRESULT MethodBench(
1773     DECL_EXTERNAL_CODECS_LOC_VARS
1774     UInt64 complexInCommands,
1775     #ifndef Z7_ST
1776       bool oldLzmaBenchMode,
1777       UInt32 numThreads,
1778       const CAffinityMode *affinityMode,
1779     #endif
1780     const COneMethodInfo &method2,
1781     size_t uncompressedDataSize,
1782     const Byte *fileData,
1783     unsigned generateDictBits,
1784 
1785     IBenchPrintCallback *printCallback,
1786     IBenchCallback *callback,
1787     CBenchProps *benchProps)
1788 {
1789   COneMethodInfo method = method2;
1790   UInt64 methodId;
1791   UInt32 numStreams;
1792   bool isFilter;
1793   const int codecIndex = FindMethod_Index(
1794       EXTERNAL_CODECS_LOC_VARS
1795       method.MethodName, true,
1796       methodId, numStreams, isFilter);
1797   if (codecIndex < 0)
1798     return E_NOTIMPL;
1799   if (numStreams != 1)
1800     return E_INVALIDARG;
1801 
1802   UInt32 numEncoderThreads = 1;
1803   UInt32 numSubDecoderThreads = 1;
1804 
1805   #ifndef Z7_ST
1806     numEncoderThreads = numThreads;
1807 
1808     if (oldLzmaBenchMode)
1809     if (methodId == k_LZMA)
1810     {
1811       if (numThreads == 1 && method.Get_NumThreads() < 0)
1812         method.AddProp_NumThreads(1);
1813       const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1814       if (numThreads > 1 && numLzmaThreads > 1)
1815       {
1816         numEncoderThreads = (numThreads + 1) / 2; // 20.03
1817         numSubDecoderThreads = 2;
1818       }
1819     }
1820 
1821   const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity();
1822 
1823   #endif
1824 
1825   CBenchEncoders encodersSpec(numEncoderThreads);
1826   CEncoderInfo *encoders = encodersSpec.encoders;
1827 
1828   UInt32 i;
1829 
1830   for (i = 0; i < numEncoderThreads; i++)
1831   {
1832     CEncoderInfo &encoder = encoders[i];
1833     encoder.callback = (i == 0) ? callback : NULL;
1834     encoder.printCallback = printCallback;
1835 
1836     #ifndef Z7_ST
1837     encoder.EncoderIndex = i;
1838     encoder.NumEncoderInternalThreads = numSubDecoderThreads;
1839     encoder.AffinityMode = *affinityMode;
1840 
1841     /*
1842     if (numSubDecoderThreads > 1)
1843     if (encoder.AffinityMode.NeedAffinity()
1844         && encoder.AffinityMode.NumBundleThreads == 1)
1845     {
1846       // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one
1847       if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores)
1848         encoder.AffinityMode.NumBundleThreads *= 2;
1849     }
1850     */
1851 
1852     #endif
1853 
1854     {
1855       CCreatedCoder cod;
1856       RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod))
1857       encoder._encoder = cod.Coder;
1858       if (!encoder._encoder && !encoder._encoderFilter)
1859         return E_NOTIMPL;
1860     }
1861 
1862     SetPseudoRand(encoder._iv,  sizeof(encoder._iv), 17);
1863     SetPseudoRand(encoder._key, sizeof(encoder._key), 51);
1864     SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123);
1865 
1866     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1867     {
1868       CCreatedCoder cod;
1869       CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1870       RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod))
1871       decoder = cod.Coder;
1872       if (!encoder._decoderFilter && !decoder)
1873         return E_NOTIMPL;
1874     }
1875 
1876     encoder.UseRealData_Enc =
1877     encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30;
1878 
1879     encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1880     if (benchProps->DecComplexCompr +
1881         benchProps->DecComplexUnc <= 30)
1882       encoder.CheckCrcMode_Dec =
1883           k_CheckCrcMode_FirstPass; // for filters
1884           // k_CheckCrcMode_Never; // for debug
1885           // k_CheckCrcMode_Always; // for debug
1886     if (fileData)
1887     {
1888       encoder.UseRealData_Enc = true;
1889       encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1890     }
1891   }
1892 
1893   UInt32 crc = 0;
1894   if (fileData)
1895     crc = CrcCalc(fileData, uncompressedDataSize);
1896 
1897   for (i = 0; i < numEncoderThreads; i++)
1898   {
1899     CEncoderInfo &encoder = encoders[i];
1900     encoder._method = method;
1901     encoder.generateDictBits = generateDictBits;
1902     encoder._uncompressedDataSize = uncompressedDataSize;
1903     encoder.kBufferSize = uncompressedDataSize;
1904     encoder.fileData = fileData;
1905     encoder.crc = crc;
1906   }
1907 
1908   CBenchProgressStatus status;
1909   status.Res = S_OK;
1910   status.EncodeMode = true;
1911 
1912   #ifndef Z7_ST
1913   CBenchThreadsFlusher encoderFlusher;
1914   if (mtEncMode)
1915   {
1916     WRes wres = encoderFlusher.Common.StartEvent.Create();
1917     if (wres != 0)
1918       return HRESULT_FROM_WIN32(wres);
1919     encoderFlusher.NumThreads = numEncoderThreads;
1920     encoderFlusher.EncodersSpec = &encodersSpec;
1921     encoderFlusher.NeedClose = true;
1922   }
1923   #endif
1924 
1925   for (i = 0; i < numEncoderThreads; i++)
1926   {
1927     CEncoderInfo &encoder = encoders[i];
1928     encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands);
1929     // encoder.NumIterations = 3;
1930     encoder.Salt = g_CrcTable[i & 0xFF];
1931     encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3);
1932     // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1933     // printf(" %8x", encoder.Salt);
1934 
1935     encoder.KeySize = benchProps->KeySize;
1936 
1937     for (int j = 0; j < 2; j++)
1938     {
1939       CBenchProgressInfo *spec = new CBenchProgressInfo;
1940       encoder.progressInfoSpec[j] = spec;
1941       encoder.progressInfo[j] = spec;
1942       spec->Status = &status;
1943     }
1944 
1945     if (i == 0)
1946     {
1947       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1948       bpi->Callback = callback;
1949       bpi->BenchInfo.NumIterations = numEncoderThreads;
1950     }
1951 
1952     #ifndef Z7_ST
1953     if (mtEncMode)
1954     {
1955       #ifdef USE_ALLOCA
1956       encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1957       #endif
1958 
1959       encoder.Common = &encoderFlusher.Common;
1960       encoder.IsGlobalMtMode = numEncoderThreads > 1;
1961       RINOK(encoder.CreateEncoderThread())
1962     }
1963     #endif
1964   }
1965 
1966   if (printCallback)
1967   {
1968     RINOK(printCallback->CheckBreak())
1969   }
1970 
1971   #ifndef Z7_ST
1972   if (mtEncMode)
1973   {
1974     for (i = 0; i < numEncoderThreads; i++)
1975     {
1976       CEncoderInfo &encoder = encoders[i];
1977       const WRes wres = encoder.ReadyEvent.Lock();
1978       if (wres != 0)
1979         return HRESULT_FROM_WIN32(wres);
1980       RINOK(encoder.Results[0])
1981     }
1982 
1983     CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
1984     bpi->SetStartTime();
1985 
1986     const WRes wres = encoderFlusher.StartAndWait();
1987     if (status.Res == 0 && wres != 0)
1988       return HRESULT_FROM_WIN32(wres);
1989   }
1990   else
1991   #endif
1992   {
1993     RINOK(encoders[0].Encode())
1994   }
1995 
1996   RINOK(status.Res)
1997 
1998   CBenchInfo info;
1999 
2000   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2001   info.UnpackSize = 0;
2002   info.PackSize = 0;
2003   info.NumIterations = encoders[0].NumIterations;
2004 
2005   for (i = 0; i < numEncoderThreads; i++)
2006   {
2007     const CEncoderInfo &encoder = encoders[i];
2008     info.UnpackSize += encoder.kBufferSize;
2009     info.PackSize += encoder.compressedSize;
2010     // printf("\n%7d\n", encoder.compressedSize);
2011   }
2012 
2013   RINOK(callback->SetEncodeResult(info, true))
2014 
2015 
2016 
2017 
2018   // ---------- Decode ----------
2019 
2020   status.Res = S_OK;
2021   status.EncodeMode = false;
2022 
2023   const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
2024   #ifndef Z7_ST
2025   const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity();
2026   #endif
2027 
2028   for (i = 0; i < numEncoderThreads; i++)
2029   {
2030     CEncoderInfo &encoder = encoders[i];
2031 
2032     /*
2033     #ifndef Z7_ST
2034     // encoder.affinityMode = *affinityMode;
2035     if (encoder.NumEncoderInternalThreads != 1)
2036       encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads;
2037     #endif
2038     */
2039 
2040 
2041     if (i == 0)
2042     {
2043       encoder.NumIterations = GetNumIterations(
2044           benchProps->GetNumCommands_Dec(
2045               encoder.compressedSize,
2046               encoder.kBufferSize),
2047           complexInCommands);
2048       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
2049       bpi->Callback = callback;
2050       bpi->BenchInfo.NumIterations = numDecoderThreads;
2051       bpi->SetStartTime();
2052     }
2053     else
2054       encoder.NumIterations = encoders[0].NumIterations;
2055 
2056     #ifndef Z7_ST
2057     {
2058       int numSubThreads = method.Get_NumThreads();
2059       encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads;
2060     }
2061     if (mtDecoderMode)
2062     {
2063       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2064       {
2065         const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
2066             #ifdef USE_ALLOCA
2067             , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
2068             #endif
2069             );
2070         RINOK(res)
2071       }
2072     }
2073     else
2074     #endif
2075     {
2076       RINOK(encoder.Decode(0))
2077     }
2078   }
2079 
2080   #ifndef Z7_ST
2081   if (mtDecoderMode)
2082   {
2083     WRes wres = 0;
2084     HRESULT res = S_OK;
2085     for (i = 0; i < numEncoderThreads; i++)
2086       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2087       {
2088         CEncoderInfo &encoder = encoders[i];
2089         const WRes wres2 = encoder.thread[j].
2090             // Wait(); // later we can get thread times from thread in UNDER_CE
2091             Wait_Close();
2092         if (wres == 0 && wres2 != 0)
2093           wres = wres2;
2094         const HRESULT res2 = encoder.Results[j];
2095         if (res == 0 && res2 != 0)
2096           res = res2;
2097       }
2098     if (wres != 0)
2099       return HRESULT_FROM_WIN32(wres);
2100     RINOK(res)
2101   }
2102   #endif // Z7_ST
2103 
2104   RINOK(status.Res)
2105   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2106 
2107   /*
2108   #ifndef Z7_ST
2109   #ifdef UNDER_CE
2110   if (mtDecoderMode)
2111     for (i = 0; i < numEncoderThreads; i++)
2112       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2113       {
2114         FILETIME creationTime, exitTime, kernelTime, userTime;
2115         if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
2116           info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
2117       }
2118   #endif
2119   #endif
2120   */
2121 
2122   info.UnpackSize = 0;
2123   info.PackSize = 0;
2124   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
2125 
2126   for (i = 0; i < numEncoderThreads; i++)
2127   {
2128     const CEncoderInfo &encoder = encoders[i];
2129     info.UnpackSize += encoder.kBufferSize;
2130     info.PackSize += encoder.compressedSize;
2131   }
2132 
2133   // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ??
2134   RINOK(callback->SetDecodeResult(info, true))
2135 
2136   return S_OK;
2137 }
2138 
2139 
2140 
2141 static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
2142 {
2143   /*
2144   if (dictSizeLog < 32)
2145     return (UInt32)1 << dictSizeLog;
2146   else
2147     return (UInt32)(Int32)-1;
2148   */
2149   return (UInt64)1 << dictSizeLog;
2150 }
2151 
2152 
2153 // it's limit of current LZMA implementation that can be changed later
2154 #define kLzmaMaxDictSize ((UInt32)15 << 28)
2155 
2156 static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
2157 {
2158   if (dict == 0)
2159     dict = 1;
2160   if (dict > kLzmaMaxDictSize)
2161     dict = kLzmaMaxDictSize;
2162   UInt32 hs = (UInt32)dict - 1;
2163   hs |= (hs >> 1);
2164   hs |= (hs >> 2);
2165   hs |= (hs >> 4);
2166   hs |= (hs >> 8);
2167   hs >>= 1;
2168   hs |= 0xFFFF;
2169   if (hs > (1 << 24))
2170     hs >>= 1;
2171   hs++;
2172   hs += (1 << 16);
2173 
2174   const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
2175   UInt64 blockSize = (UInt64)dict + (1 << 16)
2176       + (multiThread ? (1 << 20) : 0);
2177   blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
2178   if (blockSize >= kBlockSizeMax)
2179     blockSize = kBlockSizeMax;
2180 
2181   UInt64 son = (UInt64)dict;
2182   if (btMode)
2183     son *= 2;
2184   const UInt64 v = (hs + son) * 4 + blockSize +
2185       (1 << 20) + (multiThread ? (6 << 20) : 0);
2186 
2187   // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
2188   // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
2189   return v;
2190 }
2191 
2192 
2193 UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
2194 {
2195   const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
2196   const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
2197   if (level < 0)
2198     level = 5;
2199   const int algo = (level < 5 ? 0 : 1);
2200   const int btMode = (algo == 0 ? 0 : 1);
2201 
2202   UInt32 numBigThreads = numThreads;
2203   bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
2204   if (btMode)
2205   {
2206     if (!totalBench && lzmaMt)
2207       numBigThreads /= 2;
2208   }
2209   return ((UInt64)kBufferSize + kCompressedBufferSize +
2210     GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
2211 }
2212 
2213 static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
2214 {
2215   // dictionary += (dictionary >> 9); // for page tables (virtual memory)
2216   return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
2217 }
2218 
2219 
2220 // ---------- CRC and HASH ----------
2221 
2222 struct CCrcInfo_Base
2223 {
2224   CMidAlignedBuffer Buffer;
2225   const Byte *Data;
2226   size_t Size;
2227   bool CreateLocalBuf;
2228   UInt32 CheckSum_Res;
2229 
2230   CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {}
2231 
2232   HRESULT Generate(const Byte *data, size_t size);
2233   HRESULT CrcProcess(UInt64 numIterations,
2234       const UInt32 *checkSum, IHasher *hf,
2235       IBenchPrintCallback *callback);
2236 };
2237 
2238 
2239 HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size)
2240 {
2241   Size = size;
2242   Data = data;
2243   if (!data || CreateLocalBuf)
2244   {
2245     ALLOC_WITH_HRESULT(&Buffer, size)
2246     Data = Buffer;
2247   }
2248   if (!data)
2249     RandGen(Buffer, size);
2250   else if (CreateLocalBuf && size != 0)
2251     memcpy(Buffer, data, size);
2252   return S_OK;
2253 }
2254 
2255 
2256 HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations,
2257     const UInt32 *checkSum, IHasher *hf,
2258     IBenchPrintCallback *callback)
2259 {
2260   MY_ALIGN(16)
2261   Byte hash[64];
2262   memset(hash, 0, sizeof(hash));
2263 
2264   CheckSum_Res = 0;
2265 
2266   const UInt32 hashSize = hf->GetDigestSize();
2267   if (hashSize > sizeof(hash))
2268     return S_FALSE;
2269 
2270   const Byte *buf = Data;
2271   const size_t size = Size;
2272   UInt32 checkSum_Prev = 0;
2273 
2274   UInt64 prev = 0;
2275   UInt64 cur = 0;
2276 
2277   for (UInt64 i = 0; i < numIterations; i++)
2278   {
2279     hf->Init();
2280     size_t pos = 0;
2281     do
2282     {
2283       const size_t rem = size - pos;
2284       const UInt32 kStep = ((UInt32)1 << 31);
2285       const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep;
2286       hf->Update(buf + pos, curSize);
2287       pos += curSize;
2288     }
2289     while (pos != size);
2290 
2291     hf->Final(hash);
2292     UInt32 sum = 0;
2293     for (UInt32 j = 0; j < hashSize; j += 4)
2294     {
2295       sum = rotlFixed(sum, 11);
2296       sum += GetUi32(hash + j);
2297     }
2298     if (checkSum)
2299     {
2300       if (sum != *checkSum)
2301         return S_FALSE;
2302     }
2303     else
2304     {
2305       checkSum_Prev = sum;
2306       checkSum = &checkSum_Prev;
2307     }
2308     if (callback)
2309     {
2310       cur += size;
2311       if (cur - prev >= ((UInt32)1 << 30))
2312       {
2313         prev = cur;
2314         RINOK(callback->CheckBreak())
2315       }
2316     }
2317   }
2318   CheckSum_Res = checkSum_Prev;
2319   return S_OK;
2320 }
2321 
2322 extern
2323 UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization
2324 UInt32 g_BenchCpuFreqTemp = 1;
2325 
2326 #define YY1 sum += val; sum ^= val;
2327 #define YY3 YY1 YY1 YY1 YY1
2328 #define YY5 YY3 YY3 YY3 YY3
2329 #define YY7 YY5 YY5 YY5 YY5
2330 static const UInt32 kNumFreqCommands = 128;
2331 
2332 EXTERN_C_BEGIN
2333 
2334 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
2335 {
2336   for (UInt32 i = 0; i < num; i++)
2337   {
2338     YY7
2339   }
2340   return sum;
2341 }
2342 
2343 EXTERN_C_END
2344 
2345 
2346 #ifndef Z7_ST
2347 
2348 struct CBaseThreadInfo
2349 {
2350   NWindows::CThread Thread;
2351   IBenchPrintCallback *Callback;
2352   HRESULT CallbackRes;
2353 
2354   WRes Wait_If_Created()
2355   {
2356     if (!Thread.IsCreated())
2357       return 0;
2358     return Thread.Wait_Close();
2359   }
2360 };
2361 
2362 struct CFreqInfo: public CBaseThreadInfo
2363 {
2364   UInt32 ValRes;
2365   UInt32 Size;
2366   UInt64 NumIterations;
2367 };
2368 
2369 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
2370 {
2371   CFreqInfo *p = (CFreqInfo *)param;
2372 
2373   UInt32 sum = g_BenchCpuFreqTemp;
2374   for (UInt64 k = p->NumIterations; k > 0; k--)
2375   {
2376     if (p->Callback)
2377     {
2378       p->CallbackRes = p->Callback->CheckBreak();
2379       if (p->CallbackRes != S_OK)
2380         break;
2381     }
2382     sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
2383   }
2384   p->ValRes = sum;
2385   return THREAD_FUNC_RET_ZERO;
2386 }
2387 
2388 struct CFreqThreads
2389 {
2390   CFreqInfo *Items;
2391   UInt32 NumThreads;
2392 
2393   CFreqThreads(): Items(NULL), NumThreads(0) {}
2394 
2395   WRes WaitAll()
2396   {
2397     WRes wres = 0;
2398     for (UInt32 i = 0; i < NumThreads; i++)
2399     {
2400       WRes wres2 = Items[i].Wait_If_Created();
2401       if (wres == 0 && wres2 != 0)
2402         wres = wres2;
2403     }
2404     NumThreads = 0;
2405     return wres;
2406   }
2407 
2408   ~CFreqThreads()
2409   {
2410     WaitAll();
2411     delete []Items;
2412   }
2413 };
2414 
2415 
2416 static THREAD_FUNC_DECL CrcThreadFunction(void *param);
2417 
2418 struct CCrcInfo: public CBaseThreadInfo
2419 {
2420   const Byte *Data;
2421   size_t Size;
2422   UInt64 NumIterations;
2423   bool CheckSumDefined;
2424   UInt32 CheckSum;
2425   CMyComPtr<IHasher> Hasher;
2426   HRESULT Res;
2427   UInt32 CheckSum_Res;
2428 
2429   #ifndef Z7_ST
2430   NSynchronization::CManualResetEvent ReadyEvent;
2431   UInt32 ThreadIndex;
2432   CBenchSyncCommon *Common;
2433   CAffinityMode AffinityMode;
2434   #endif
2435 
2436   // we want to call CCrcInfo_Base::Buffer.Free() in main thread.
2437   // so we uses non-local CCrcInfo_Base.
2438   CCrcInfo_Base crcib;
2439 
2440   HRESULT CreateThread()
2441   {
2442     WRes res = 0;
2443     if (!ReadyEvent.IsCreated())
2444       res = ReadyEvent.Create();
2445     if (res == 0)
2446       res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this,
2447           ThreadIndex);
2448     return HRESULT_FROM_WIN32(res);
2449   }
2450 
2451   #ifdef USE_ALLOCA
2452   size_t AllocaSize;
2453   #endif
2454 
2455   void Process();
2456 
2457   CCrcInfo(): Res(E_FAIL) {}
2458 };
2459 
2460 static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test
2461 // static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test
2462 
2463 void CCrcInfo::Process()
2464 {
2465   crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File;
2466   // we can use additional Generate() passes to reduce some time effects for new page allocation
2467   // for (unsigned y = 0; y < 10; y++)
2468   Res = crcib.Generate(Data, Size);
2469 
2470   // if (Common)
2471   {
2472     WRes wres = ReadyEvent.Set();
2473     if (wres != 0)
2474     {
2475       if (Res == 0)
2476         Res = HRESULT_FROM_WIN32(wres);
2477       return;
2478     }
2479     if (Res != 0)
2480       return;
2481 
2482     wres = Common->StartEvent.Lock();
2483 
2484     if (wres != 0)
2485     {
2486       Res = HRESULT_FROM_WIN32(wres);
2487       return;
2488     }
2489     if (Common->ExitMode)
2490       return;
2491   }
2492 
2493   Res = crcib.CrcProcess(NumIterations,
2494       CheckSumDefined ? &CheckSum : NULL, Hasher,
2495       Callback);
2496   CheckSum_Res = crcib.CheckSum_Res;
2497   /*
2498   We don't want to include the time of slow CCrcInfo_Base::Buffer.Free()
2499   to time of benchmark. So we don't free Buffer here
2500   */
2501   // crcib.Buffer.Free();
2502 }
2503 
2504 
2505 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
2506 {
2507   CCrcInfo *p = (CCrcInfo *)param;
2508 
2509   #ifdef USE_ALLOCA
2510   alloca(p->AllocaSize);
2511   #endif
2512   p->Process();
2513   return THREAD_FUNC_RET_ZERO;
2514 }
2515 
2516 
2517 struct CCrcThreads
2518 {
2519   CCrcInfo *Items;
2520   unsigned NumThreads;
2521   CBenchSyncCommon Common;
2522   bool NeedClose;
2523 
2524   CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {}
2525 
2526   WRes StartAndWait(bool exitMode = false);
2527 
2528   ~CCrcThreads()
2529   {
2530     StartAndWait(true);
2531     delete []Items;
2532   }
2533 };
2534 
2535 
2536 WRes CCrcThreads::StartAndWait(bool exitMode)
2537 {
2538   if (!NeedClose)
2539     return 0;
2540 
2541   Common.ExitMode = exitMode;
2542   WRes wres = Common.StartEvent.Set();
2543 
2544   for (unsigned i = 0; i < NumThreads; i++)
2545   {
2546     WRes wres2 = Items[i].Wait_If_Created();
2547     if (wres == 0 && wres2 != 0)
2548       wres = wres2;
2549   }
2550   NumThreads = 0;
2551   NeedClose = false;
2552   return wres;
2553 }
2554 
2555 #endif
2556 
2557 
2558 static UInt32 CrcCalc1(const Byte *buf, size_t size)
2559 {
2560   UInt32 crc = CRC_INIT_VAL;
2561   for (size_t i = 0; i < size; i++)
2562     crc = CRC_UPDATE_BYTE(crc, buf[i]);
2563   return CRC_GET_DIGEST(crc);
2564 }
2565 
2566 /*
2567 static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
2568 {
2569   RandGen(buf, size, RG);
2570   return CrcCalc1(buf, size);
2571 }
2572 */
2573 
2574 static bool CrcInternalTest()
2575 {
2576   CAlignedBuffer buffer;
2577   const size_t kBufferSize0 = (1 << 8);
2578   const size_t kBufferSize1 = (1 << 10);
2579   const unsigned kCheckSize = (1 << 5);
2580   buffer.Alloc(kBufferSize0 + kBufferSize1);
2581   if (!buffer.IsAllocated())
2582     return false;
2583   Byte *buf = (Byte *)buffer;
2584   size_t i;
2585   for (i = 0; i < kBufferSize0; i++)
2586     buf[i] = (Byte)i;
2587   UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
2588   if (crc1 != 0x29058C73)
2589     return false;
2590   RandGen(buf + kBufferSize0, kBufferSize1);
2591   for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
2592     for (unsigned j = 0; j < kCheckSize; j++)
2593       if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
2594         return false;
2595   return true;
2596 }
2597 
2598 struct CBenchMethod
2599 {
2600   unsigned Weight;
2601   unsigned DictBits;
2602   Int32 EncComplex;
2603   Int32 DecComplexCompr;
2604   Int32 DecComplexUnc;
2605   const char *Name;
2606   // unsigned KeySize;
2607 };
2608 
2609 // #define USE_SW_CMPLX
2610 
2611 #ifdef USE_SW_CMPLX
2612 #define CMPLX(x) ((x) * 1000)
2613 #else
2614 #define CMPLX(x) (x)
2615 #endif
2616 
2617 static const CBenchMethod g_Bench[] =
2618 {
2619   // { 40, 17,  357,  145,   20, "LZMA:x1" },
2620   // { 20, 18,  360,  145,   20, "LZMA2:x1:mt2" },
2621 
2622   { 20, 18,  360,  145,   20, "LZMA:x1" },
2623   { 20, 22,  600,  145,   20, "LZMA:x3" },
2624 
2625   { 80, 24, 1220,  145,   20, "LZMA:x5:mt1" },
2626   { 80, 24, 1220,  145,   20, "LZMA:x5:mt2" },
2627 
2628   { 10, 16,  124,   40,   14, "Deflate:x1" },
2629   { 20, 16,  376,   40,   14, "Deflate:x5" },
2630   { 10, 16, 1082,   40,   14, "Deflate:x7" },
2631   { 10, 17,  422,   40,   14, "Deflate64:x5" },
2632 
2633   { 10, 15,  590,   69,   69, "BZip2:x1" },
2634   { 20, 19,  815,  122,  122, "BZip2:x5" },
2635   { 10, 19,  815,  122,  122, "BZip2:x5:mt2" },
2636   { 10, 19, 2530,  122,  122, "BZip2:x7" },
2637 
2638   // { 10, 18, 1010,    0, 1150, "PPMDZip:x1" },
2639   { 10, 18, 1010,    0, 1150, "PPMD:x1" },
2640   // { 10, 22, 1655,    0, 1830, "PPMDZip:x5" },
2641   { 10, 22, 1655,    0, 1830, "PPMD:x5" },
2642 
2643   // {  2,  0,  -16,    0,  -16, "Swap2" },
2644   {  2,  0,  -16,    0,  -16, "Swap4" },
2645 
2646   // {  2,  0,    3,    0,    4, "Delta:1" },
2647   // {  2,  0,    3,    0,    4, "Delta:2" },
2648   // {  2,  0,    3,    0,    4, "Delta:3" },
2649   {  2,  0,    3,    0,    4, "Delta:4" },
2650   // {  2,  0,    3,    0,    4, "Delta:8" },
2651   // {  2,  0,    3,    0,    4, "Delta:32" },
2652 
2653   {  2,  0,    2,    0,    2, "BCJ" },
2654   {  2,  0,    1,    0,    1, "ARM64" },
2655 
2656   // { 10,  0,   18,    0,   18, "AES128CBC:1" },
2657   // { 10,  0,   21,    0,   21, "AES192CBC:1" },
2658   { 10,  0,   24,    0,   24, "AES256CBC:1" },
2659 
2660   // { 10,  0,   18,    0,   18, "AES128CTR:1" },
2661   // { 10,  0,   21,    0,   21, "AES192CTR:1" },
2662   // { 10,  0,   24,    0,   24, "AES256CTR:1" },
2663   // {  2,  0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" },
2664   // {  2,  0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" },
2665   {  2,  0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" },
2666 
2667   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" },
2668   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" },
2669   // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" },
2670 
2671   // {  1,  0, CMPLX(6), 0, CMPLX(1), "AES128CBC:3" },
2672   // {  1,  0, CMPLX(7), 0, CMPLX(1), "AES192CBC:3" },
2673   {  1,  0, CMPLX(8), 0, CMPLX(1), "AES256CBC:3" }
2674 
2675   // {  1,  0, CMPLX(1), 0, CMPLX(1), "AES128CTR:3" },
2676   // {  1,  0, CMPLX(1), 0, CMPLX(1), "AES192CTR:3" },
2677   // {  1,  0, CMPLX(1), 0, CMPLX(1), "AES256CTR:3" },
2678 };
2679 
2680 struct CBenchHash
2681 {
2682   unsigned Weight;
2683   UInt32 Complex;
2684   UInt32 CheckSum;
2685   const char *Name;
2686 };
2687 
2688 // #define ARM_CRC_MUL 100
2689 #define ARM_CRC_MUL 1
2690 
2691 #define k_Hash_Complex_Mult 256
2692 
2693 static const CBenchHash g_Hash[] =
2694 {
2695   // {  1,  1820, 0x21e207bb, "CRC32:1" },
2696   // { 10,   558, 0x21e207bb, "CRC32:4" },
2697   { 20,   339, 0x21e207bb, "CRC32:8" } ,
2698   {  2,   128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" },
2699   {  2,    64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" },
2700   { 10,   512, 0x41b901d1, "CRC64" },
2701 
2702   { 10, 5100,       0x7913ba03, "SHA256:1" },
2703   {  2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" },
2704 
2705   { 10, 2340,       0xff769021, "SHA1:1" },
2706   {  2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" },
2707 
2708   {  2,  5500, 0x85189d02, "BLAKE2sp" }
2709 };
2710 
2711 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
2712 {
2713   char s[128];
2714   unsigned startPos = (unsigned)sizeof(s) - 32;
2715   memset(s, ' ', startPos);
2716   ConvertUInt64ToString(value, s + startPos);
2717   // if (withSpace)
2718   {
2719     startPos--;
2720     size++;
2721   }
2722   unsigned len = (unsigned)strlen(s + startPos);
2723   if (size > len)
2724   {
2725     size -= len;
2726     if (startPos < size)
2727       startPos = 0;
2728     else
2729       startPos -= size;
2730   }
2731   f.Print(s + startPos);
2732 }
2733 
2734 static const unsigned kFieldSize_Name = 12;
2735 static const unsigned kFieldSize_SmallName = 4;
2736 static const unsigned kFieldSize_Speed = 9;
2737 static const unsigned kFieldSize_Usage = 5;
2738 static const unsigned kFieldSize_RU = 6;
2739 static const unsigned kFieldSize_Rating = 6;
2740 static const unsigned kFieldSize_EU = 5;
2741 static const unsigned kFieldSize_Effec = 5;
2742 static const unsigned kFieldSize_CrcSpeed = 8;
2743 
2744 
2745 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
2746 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
2747 
2748 
2749 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
2750 {
2751   PrintNumber(f, (rating + 500000) / 1000000, size);
2752 }
2753 
2754 
2755 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
2756 {
2757   UInt64 v = 0;
2758   if (divider != 0)
2759     v = (val * 100 + divider / 2) / divider;
2760   PrintNumber(f, v, size);
2761 }
2762 
2763 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
2764 {
2765   char s[256];
2766   memset(s, (Byte)c, size);
2767   s[size] = 0;
2768   f.Print(s);
2769 }
2770 
2771 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
2772 {
2773   PrintChars(f, ' ', size);
2774 }
2775 
2776 static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
2777 {
2778   PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
2779 }
2780 
2781 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
2782 {
2783   PrintUsage(f, usage, kFieldSize_Usage);
2784   PrintRating(f, rpu, kFieldSize_RU);
2785   PrintRating(f, rating, kFieldSize_Rating);
2786   if (showFreq)
2787   {
2788     if (cpuFreq == 0)
2789       PrintSpaces(f, kFieldSize_EUAndEffec);
2790     else
2791     {
2792       PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
2793       PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
2794     }
2795   }
2796 }
2797 
2798 
2799 void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
2800 {
2801   Speed = info.GetUnpackSizeSpeed();
2802   Usage = info.GetUsage();
2803   RPU = info.GetRatingPerUsage(Rating);
2804 }
2805 
2806 void CTotalBenchRes::Mult_For_Weight(unsigned weight)
2807 {
2808   NumIterations2 *= weight;
2809   RPU *= weight;
2810   Rating *= weight;
2811   Usage *= weight;
2812   Speed *= weight;
2813 }
2814 
2815 void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
2816 {
2817   Rating += r.Rating;
2818   Usage += r.Usage;
2819   RPU += r.RPU;
2820   Speed += r.Speed;
2821     // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
2822   NumIterations2 += r.NumIterations2;
2823 }
2824 
2825 static void PrintResults(IBenchPrintCallback *f,
2826     const CBenchInfo &info,
2827     unsigned weight,
2828     UInt64 rating,
2829     bool showFreq, UInt64 cpuFreq,
2830     CTotalBenchRes *res)
2831 {
2832   CTotalBenchRes t;
2833   t.Rating = rating;
2834   t.NumIterations2 = 1;
2835   t.Generate_From_BenchInfo(info);
2836 
2837   if (f)
2838   {
2839     if (t.Speed != 0)
2840       PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
2841     else
2842       PrintSpaces(*f, 1 + kFieldSize_Speed);
2843   }
2844   if (f)
2845   {
2846     PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
2847   }
2848 
2849   if (res)
2850   {
2851     // res->NumIterations1++;
2852     t.Mult_For_Weight(weight);
2853     res->Update_With_Res(t);
2854   }
2855 }
2856 
2857 static void PrintTotals(IBenchPrintCallback &f,
2858     bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
2859 {
2860   const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
2861   const UInt64 speed = res.Speed / numIterations2;
2862   if (showSpeed && speed != 0)
2863     PrintNumber(f, speed / 1024, kFieldSize_Speed);
2864   else
2865     PrintSpaces(f, 1 + kFieldSize_Speed);
2866 
2867   // PrintSpaces(f, 1 + kFieldSize_Speed);
2868   // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
2869   PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
2870 }
2871 
2872 
2873 static void PrintHex(AString &s, UInt64 v)
2874 {
2875   char temp[32];
2876   ConvertUInt64ToHex(v, temp);
2877   s += temp;
2878 }
2879 
2880 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
2881 {
2882   AString s;
2883   // s.Add_UInt32(ti.numProcessThreads);
2884   unsigned numSysThreads = ti.GetNumSystemThreads();
2885   if (ti.GetNumProcessThreads() != numSysThreads)
2886   {
2887     // if (ti.numProcessThreads != ti.numSysThreads)
2888     {
2889       s += " / ";
2890       s.Add_UInt32(numSysThreads);
2891     }
2892     s += " : ";
2893     #ifdef _WIN32
2894     PrintHex(s, ti.processAffinityMask);
2895     s += " / ";
2896     PrintHex(s, ti.systemAffinityMask);
2897     #else
2898     unsigned i = (numSysThreads + 3) & ~(unsigned)3;
2899     if (i == 0)
2900       i = 4;
2901     for (; i >= 4; )
2902     {
2903       i -= 4;
2904       unsigned val = 0;
2905       for (unsigned k = 0; k < 4; k++)
2906       {
2907         const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0);
2908         val += (bit << k);
2909       }
2910       PrintHex(s, val);
2911     }
2912     #endif
2913   }
2914   return s;
2915 }
2916 
2917 
2918 #ifdef Z7_LARGE_PAGES
2919 
2920 #ifdef _WIN32
2921 extern bool g_LargePagesMode;
2922 extern "C"
2923 {
2924   extern SIZE_T g_LargePageSize;
2925 }
2926 #endif
2927 
2928 void Add_LargePages_String(AString &s)
2929 {
2930   #ifdef _WIN32
2931   if (g_LargePagesMode || g_LargePageSize != 0)
2932   {
2933     s.Add_OptSpaced("(LP-");
2934     PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
2935     #ifdef MY_CPU_X86_OR_AMD64
2936     if (CPU_IsSupported_PageGB())
2937       s += "-1G";
2938     #endif
2939     if (!g_LargePagesMode)
2940       s += "-NA";
2941     s += ")";
2942   }
2943   #else
2944     s += "";
2945   #endif
2946 }
2947 
2948 #endif
2949 
2950 
2951 
2952 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
2953     bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
2954 {
2955   f.Print("RAM ");
2956   f.Print(sizeString);
2957   if (size_Defined)
2958     PrintNumber(f, (size >> 20), 6);
2959   else
2960     f.Print("      ?");
2961   f.Print(" MB");
2962 
2963   #ifdef Z7_LARGE_PAGES
2964   {
2965     AString s;
2966     Add_LargePages_String(s);
2967     f.Print(s);
2968   }
2969   #endif
2970 
2971   f.Print(",  # ");
2972   f.Print(threadsString);
2973   PrintNumber(f, numThreads, 3);
2974 }
2975 
2976 
2977 
2978 struct CBenchCallbackToPrint Z7_final: public IBenchCallback
2979 {
2980   bool NeedPrint;
2981   bool Use2Columns;
2982   bool ShowFreq;
2983   unsigned NameFieldSize;
2984 
2985   unsigned EncodeWeight;
2986   unsigned DecodeWeight;
2987 
2988   UInt64 CpuFreq;
2989   UInt64 DictSize;
2990 
2991   IBenchPrintCallback *_file;
2992   CBenchProps BenchProps;
2993   CTotalBenchRes EncodeRes;
2994   CTotalBenchRes DecodeRes;
2995 
2996   CBenchInfo BenchInfo_Results[2];
2997 
2998   CBenchCallbackToPrint():
2999       NeedPrint(true),
3000       Use2Columns(false),
3001       ShowFreq(false),
3002       NameFieldSize(0),
3003       EncodeWeight(1),
3004       DecodeWeight(1),
3005       CpuFreq(0)
3006       {}
3007 
3008   void Init() { EncodeRes.Init(); DecodeRes.Init(); }
3009   void Print(const char *s);
3010   void NewLine();
3011 
3012   HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
3013   HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override;
3014   HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override;
3015 };
3016 
3017 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
3018 {
3019   ShowFreq = showFreq;
3020   CpuFreq = cpuFreq;
3021   return S_OK;
3022 }
3023 
3024 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
3025 {
3026   RINOK(_file->CheckBreak())
3027   if (final)
3028     BenchInfo_Results[0] = info;
3029   if (final)
3030   if (NeedPrint)
3031   {
3032     const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
3033     PrintResults(_file, info,
3034         EncodeWeight, rating,
3035         ShowFreq, CpuFreq, &EncodeRes);
3036     if (!Use2Columns)
3037       _file->NewLine();
3038   }
3039   return S_OK;
3040 }
3041 
3042 static const char * const kSep = "  | ";
3043 
3044 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
3045 {
3046   RINOK(_file->CheckBreak())
3047   if (final)
3048     BenchInfo_Results[1] = info;
3049   if (final)
3050   if (NeedPrint)
3051   {
3052     const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
3053     if (Use2Columns)
3054       _file->Print(kSep);
3055     else
3056       PrintSpaces(*_file, NameFieldSize);
3057     CBenchInfo info2 = info;
3058     info2.UnpackSize *= info2.NumIterations;
3059     info2.PackSize *= info2.NumIterations;
3060     info2.NumIterations = 1;
3061     PrintResults(_file, info2,
3062         DecodeWeight, rating,
3063         ShowFreq, CpuFreq, &DecodeRes);
3064   }
3065   return S_OK;
3066 }
3067 
3068 void CBenchCallbackToPrint::Print(const char *s)
3069 {
3070   _file->Print(s);
3071 }
3072 
3073 void CBenchCallbackToPrint::NewLine()
3074 {
3075   _file->NewLine();
3076 }
3077 
3078 static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
3079 {
3080   f.Print(s);
3081   int numSpaces = (int)size - (int)MyStringLen(s);
3082   if (numSpaces > 0)
3083     PrintSpaces(f, (unsigned)numSpaces);
3084 }
3085 
3086 static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
3087 {
3088   int numSpaces = (int)size - (int)MyStringLen(s);
3089   if (numSpaces > 0)
3090     PrintSpaces(f, (unsigned)numSpaces);
3091   f.Print(s);
3092 }
3093 
3094 
3095 static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name)
3096 {
3097   UString wildc = GetUnicodeString(mask);
3098   UString bname = GetUnicodeString(name);
3099   wildc.MakeLower_Ascii();
3100   bname.MakeLower_Ascii();
3101   return DoesWildcardMatchName(wildc, bname);
3102 }
3103 
3104 
3105 static HRESULT TotalBench(
3106     DECL_EXTERNAL_CODECS_LOC_VARS
3107     const COneMethodInfo &methodMask,
3108     UInt64 complexInCommands,
3109   #ifndef Z7_ST
3110     UInt32 numThreads,
3111     const CAffinityMode *affinityMode,
3112   #endif
3113     bool forceUnpackSize,
3114     size_t unpackSize,
3115     const Byte *fileData,
3116     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
3117 {
3118   for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
3119   {
3120     const CBenchMethod &bench = g_Bench[i];
3121     if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3122       continue;
3123     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3124     {
3125       unsigned keySize = 32;
3126            if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16;
3127       else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24;
3128       callback->BenchProps.KeySize = keySize;
3129     }
3130     callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3131     callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3132     callback->BenchProps.EncComplex = bench.EncComplex;
3133 
3134     COneMethodInfo method;
3135     NCOM::CPropVariant propVariant;
3136     propVariant = bench.Name;
3137     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3138 
3139     size_t unpackSize2 = unpackSize;
3140     if (!forceUnpackSize && bench.DictBits == 0)
3141       unpackSize2 = kFilterUnpackSize;
3142 
3143     callback->EncodeWeight = bench.Weight;
3144     callback->DecodeWeight = bench.Weight;
3145 
3146     const HRESULT res = MethodBench(
3147         EXTERNAL_CODECS_LOC_VARS
3148         complexInCommands,
3149         #ifndef Z7_ST
3150         false, numThreads, affinityMode,
3151         #endif
3152         method,
3153         unpackSize2, fileData,
3154         bench.DictBits,
3155         printCallback, callback, &callback->BenchProps);
3156 
3157     if (res == E_NOTIMPL)
3158     {
3159       // callback->Print(" ---");
3160       // we need additional empty line as line for decompression results
3161       if (!callback->Use2Columns)
3162         callback->NewLine();
3163     }
3164     else
3165     {
3166       RINOK(res)
3167     }
3168 
3169     callback->NewLine();
3170   }
3171   return S_OK;
3172 }
3173 
3174 
3175 struct CFreqBench
3176 {
3177   // in:
3178   UInt64 complexInCommands;
3179   UInt32 numThreads;
3180   bool showFreq;
3181   UInt64 specifiedFreq;
3182 
3183   // out:
3184   UInt64 CpuFreqRes;
3185   UInt64 UsageRes;
3186   UInt32 res;
3187 
3188   CFreqBench()
3189     {}
3190 
3191   HRESULT FreqBench(IBenchPrintCallback *_file
3192       #ifndef Z7_ST
3193       , const CAffinityMode *affinityMode
3194       #endif
3195       );
3196 };
3197 
3198 
3199 HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
3200     #ifndef Z7_ST
3201     , const CAffinityMode *affinityMode
3202     #endif
3203     )
3204 {
3205   res = 0;
3206   CpuFreqRes = 0;
3207   UsageRes = 0;
3208 
3209   if (numThreads == 0)
3210     numThreads = 1;
3211 
3212   #ifdef Z7_ST
3213   numThreads = 1;
3214   #endif
3215 
3216   const UInt32 complexity = kNumFreqCommands;
3217   UInt64 numIterations = complexInCommands / complexity;
3218   UInt32 numIterations2 = 1 << 30;
3219   if (numIterations > numIterations2)
3220     numIterations /= numIterations2;
3221   else
3222   {
3223     numIterations2 = (UInt32)numIterations;
3224     numIterations = 1;
3225   }
3226 
3227   CBenchInfoCalc progressInfoSpec;
3228 
3229   #ifndef Z7_ST
3230 
3231   bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity();
3232 
3233   if (mtMode)
3234   {
3235     CFreqThreads threads;
3236     threads.Items = new CFreqInfo[numThreads];
3237     UInt32 i;
3238     for (i = 0; i < numThreads; i++)
3239     {
3240       CFreqInfo &info = threads.Items[i];
3241       info.Callback = _file;
3242       info.CallbackRes = S_OK;
3243       info.NumIterations = numIterations;
3244       info.Size = numIterations2;
3245     }
3246     progressInfoSpec.SetStartTime();
3247     for (i = 0; i < numThreads; i++)
3248     {
3249       // Sleep(10);
3250       CFreqInfo &info = threads.Items[i];
3251       WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i);
3252       if (info.Thread.IsCreated())
3253         threads.NumThreads++;
3254       if (wres != 0)
3255         return HRESULT_FROM_WIN32(wres);
3256     }
3257     WRes wres = threads.WaitAll();
3258     if (wres != 0)
3259       return HRESULT_FROM_WIN32(wres);
3260     for (i = 0; i < numThreads; i++)
3261     {
3262       RINOK(threads.Items[i].CallbackRes)
3263     }
3264   }
3265   else
3266   #endif
3267   {
3268     progressInfoSpec.SetStartTime();
3269     UInt32 sum = g_BenchCpuFreqTemp;
3270     for (UInt64 k = numIterations; k > 0; k--)
3271     {
3272       sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp);
3273       if (_file)
3274       {
3275         RINOK(_file->CheckBreak())
3276       }
3277     }
3278     res += sum;
3279   }
3280 
3281   if (res == 0x12345678)
3282   if (_file)
3283   {
3284     RINOK(_file->CheckBreak())
3285   }
3286 
3287   CBenchInfo info;
3288   progressInfoSpec.SetFinishTime(info);
3289 
3290   info.UnpackSize = 0;
3291   info.PackSize = 0;
3292   info.NumIterations = 1;
3293 
3294   const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
3295   const UInt64 rating = info.GetSpeed(numCommands);
3296   CpuFreqRes = rating / numThreads;
3297   UsageRes = info.GetUsage();
3298 
3299   if (_file)
3300   {
3301     PrintResults(_file, info,
3302           0, // weight
3303           rating,
3304           showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
3305     RINOK(_file->CheckBreak())
3306   }
3307 
3308   return S_OK;
3309 }
3310 
3311 
3312 
3313 static HRESULT CrcBench(
3314     DECL_EXTERNAL_CODECS_LOC_VARS
3315     UInt64 complexInCommands,
3316     UInt32 numThreads,
3317     const size_t bufferSize,
3318     const Byte *fileData,
3319 
3320     UInt64 &speed,
3321     UInt64 &usage,
3322 
3323     UInt32 complexity, unsigned benchWeight,
3324     const UInt32 *checkSum,
3325     const COneMethodInfo &method,
3326     IBenchPrintCallback *_file,
3327     #ifndef Z7_ST
3328     const CAffinityMode *affinityMode,
3329     #endif
3330     bool showRating,
3331     CTotalBenchRes *encodeRes,
3332     bool showFreq, UInt64 cpuFreq)
3333 {
3334   if (numThreads == 0)
3335     numThreads = 1;
3336 
3337   #ifdef Z7_ST
3338   numThreads = 1;
3339   #endif
3340 
3341   const AString &methodName = method.MethodName;
3342   // methodName.RemoveChar(L'-');
3343   CMethodId hashID;
3344   if (!FindHashMethod(
3345       EXTERNAL_CODECS_LOC_VARS
3346       methodName, hashID))
3347     return E_NOTIMPL;
3348 
3349   /*
3350   // if will generate random data in each thread, instead of global data
3351   CMidAlignedBuffer buffer;
3352   if (!fileData)
3353   {
3354     ALLOC_WITH_HRESULT(&buffer, bufferSize)
3355     RandGen(buffer, bufferSize);
3356     fileData = buffer;
3357   }
3358   */
3359 
3360   const size_t bsize = (bufferSize == 0 ? 1 : bufferSize);
3361   UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize;
3362   if (numIterations == 0)
3363     numIterations = 1;
3364 
3365   CBenchInfoCalc progressInfoSpec;
3366   CBenchInfo info;
3367 
3368   #ifndef Z7_ST
3369   bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity();
3370 
3371   if (mtEncMode)
3372   {
3373     CCrcThreads threads;
3374     threads.Items = new CCrcInfo[numThreads];
3375     {
3376       WRes wres = threads.Common.StartEvent.Create();
3377       if (wres != 0)
3378         return HRESULT_FROM_WIN32(wres);
3379       threads.NeedClose = true;
3380     }
3381 
3382     UInt32 i;
3383     for (i = 0; i < numThreads; i++)
3384     {
3385       CCrcInfo &ci = threads.Items[i];
3386       AString name;
3387       RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher))
3388       if (!ci.Hasher)
3389         return E_NOTIMPL;
3390       CMyComPtr<ICompressSetCoderProperties> scp;
3391       ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3392       if (scp)
3393       {
3394         RINOK(method.SetCoderProps(scp))
3395       }
3396 
3397       ci.Callback = _file;
3398       ci.Data = fileData;
3399       ci.NumIterations = numIterations;
3400       ci.Size = bufferSize;
3401       ci.CheckSumDefined = false;
3402       if (checkSum)
3403       {
3404         ci.CheckSum = *checkSum;
3405         ci.CheckSumDefined = true;
3406       }
3407 
3408       #ifdef USE_ALLOCA
3409       ci.AllocaSize = (i * 16 * 21) & 0x7FF;
3410       #endif
3411     }
3412 
3413     for (i = 0; i < numThreads; i++)
3414     {
3415       CCrcInfo &ci = threads.Items[i];
3416       ci.ThreadIndex = i;
3417       ci.Common = &threads.Common;
3418       ci.AffinityMode = *affinityMode;
3419       HRESULT hres = ci.CreateThread();
3420       if (ci.Thread.IsCreated())
3421         threads.NumThreads++;
3422       if (hres != 0)
3423         return hres;
3424     }
3425 
3426     for (i = 0; i < numThreads; i++)
3427     {
3428       CCrcInfo &ci = threads.Items[i];
3429       WRes wres = ci.ReadyEvent.Lock();
3430       if (wres != 0)
3431         return HRESULT_FROM_WIN32(wres);
3432       RINOK(ci.Res)
3433     }
3434 
3435     progressInfoSpec.SetStartTime();
3436 
3437     WRes wres = threads.StartAndWait();
3438     if (wres != 0)
3439       return HRESULT_FROM_WIN32(wres);
3440 
3441     progressInfoSpec.SetFinishTime(info);
3442 
3443     for (i = 0; i < numThreads; i++)
3444     {
3445       RINOK(threads.Items[i].Res)
3446       if (i != 0)
3447         if (threads.Items[i].CheckSum_Res !=
3448             threads.Items[i - 1].CheckSum_Res)
3449           return S_FALSE;
3450     }
3451   }
3452   else
3453   #endif
3454   {
3455     CMyComPtr<IHasher> hasher;
3456     AString name;
3457     RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher))
3458     if (!hasher)
3459       return E_NOTIMPL;
3460     CMyComPtr<ICompressSetCoderProperties> scp;
3461     hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3462     if (scp)
3463     {
3464       RINOK(method.SetCoderProps(scp))
3465     }
3466     CCrcInfo_Base crcib;
3467     crcib.CreateLocalBuf = false;
3468     RINOK(crcib.Generate(fileData, bufferSize))
3469     progressInfoSpec.SetStartTime();
3470     RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file))
3471     progressInfoSpec.SetFinishTime(info);
3472   }
3473 
3474 
3475   UInt64 unpSize = numIterations * bufferSize;
3476   UInt64 unpSizeThreads = unpSize * numThreads;
3477   info.UnpackSize = unpSizeThreads;
3478   info.PackSize = unpSizeThreads;
3479   info.NumIterations = 1;
3480 
3481   if (_file)
3482   {
3483     if (showRating)
3484     {
3485       UInt64 unpSizeThreads2 = unpSizeThreads;
3486       if (unpSizeThreads2 == 0)
3487         unpSizeThreads2 = numIterations * 1 * numThreads;
3488       const UInt64 numCommands = unpSizeThreads2 * complexity / 256;
3489       const UInt64 rating = info.GetSpeed(numCommands);
3490       PrintResults(_file, info,
3491           benchWeight, rating,
3492           showFreq, cpuFreq, encodeRes);
3493     }
3494     RINOK(_file->CheckBreak())
3495   }
3496 
3497   speed = info.GetSpeed(unpSizeThreads);
3498   usage = info.GetUsage();
3499 
3500   return S_OK;
3501 }
3502 
3503 
3504 
3505 static HRESULT TotalBench_Hash(
3506     DECL_EXTERNAL_CODECS_LOC_VARS
3507     const COneMethodInfo &methodMask,
3508     UInt64 complexInCommands,
3509     UInt32 numThreads,
3510     size_t bufSize,
3511     const Byte *fileData,
3512     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
3513     #ifndef Z7_ST
3514     const CAffinityMode *affinityMode,
3515     #endif
3516     CTotalBenchRes *encodeRes,
3517     bool showFreq, UInt64 cpuFreq)
3518 {
3519   for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
3520   {
3521     const CBenchHash &bench = g_Hash[i];
3522     if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3523       continue;
3524     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3525     // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3526     // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3527     // callback->BenchProps.EncComplex = bench.EncComplex;
3528 
3529     COneMethodInfo method;
3530     NCOM::CPropVariant propVariant;
3531     propVariant = bench.Name;
3532     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3533 
3534     UInt64 speed, usage;
3535 
3536     const HRESULT res = CrcBench(
3537         EXTERNAL_CODECS_LOC_VARS
3538         complexInCommands,
3539         numThreads, bufSize, fileData,
3540         speed, usage,
3541         bench.Complex, bench.Weight,
3542         (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL,
3543         method,
3544         printCallback,
3545      #ifndef Z7_ST
3546         affinityMode,
3547      #endif
3548         true, // showRating
3549         encodeRes, showFreq, cpuFreq);
3550     if (res == E_NOTIMPL)
3551     {
3552       // callback->Print(" ---");
3553     }
3554     else
3555     {
3556       RINOK(res)
3557     }
3558     callback->NewLine();
3559   }
3560   return S_OK;
3561 }
3562 
3563 struct CTempValues
3564 {
3565   UInt64 *Values;
3566   CTempValues(): Values(NULL) {}
3567   void Alloc(UInt32 num) { Values = new UInt64[num]; }
3568   ~CTempValues() { delete []Values; }
3569 };
3570 
3571 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
3572 {
3573   const wchar_t *end;
3574   UInt64 result = ConvertStringToUInt64(s, &end);
3575   if (*end != 0 || s.IsEmpty())
3576     prop = s;
3577   else if (result <= (UInt32)0xFFFFFFFF)
3578     prop = (UInt32)result;
3579   else
3580     prop = result;
3581 }
3582 
3583 
3584 static bool AreSameMethodNames(const char *fullName, const char *shortName)
3585 {
3586   return StringsAreEqualNoCase_Ascii(fullName, shortName);
3587 }
3588 
3589 
3590 
3591 
3592 static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads)
3593 {
3594   PrintRequirements(f, "usage:", true, usage, "Benchmark threads:   ", threads);
3595 }
3596 
3597 
3598 static void Print_Delimiter(IBenchPrintCallback &f)
3599 {
3600   f.Print(" |");
3601 }
3602 
3603 static void Print_Pow(IBenchPrintCallback &f, unsigned pow)
3604 {
3605   char s[16];
3606   ConvertUInt32ToString(pow, s);
3607   unsigned pos = MyStringLen(s);
3608   s[pos++] = ':';
3609   s[pos] = 0;
3610   PrintLeft(f, s, kFieldSize_SmallName); // 4
3611 }
3612 
3613 static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f,
3614     UInt64 usage, UInt64 speed)
3615 {
3616   PrintUsage(f, usage, kFieldSize_Usage);
3617   PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed);
3618 }
3619 
3620 
3621 HRESULT Bench(
3622     DECL_EXTERNAL_CODECS_LOC_VARS
3623     IBenchPrintCallback *printCallback,
3624     IBenchCallback *benchCallback,
3625     const CObjectVector<CProperty> &props,
3626     UInt32 numIterations,
3627     bool multiDict,
3628     IBenchFreqCallback *freqCallback)
3629 {
3630   if (!CrcInternalTest())
3631     return E_FAIL;
3632 
3633   UInt32 numCPUs = 1;
3634   UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
3635 
3636   NSystem::CProcessAffinity threadsInfo;
3637   threadsInfo.InitST();
3638 
3639   #ifndef Z7_ST
3640 
3641   if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0)
3642     numCPUs = threadsInfo.GetNumProcessThreads();
3643   else
3644     numCPUs = NSystem::GetNumberOfProcessors();
3645 
3646   #endif
3647 
3648   // numCPUs = 24;
3649   /*
3650   {
3651     DWORD_PTR mask = (1 << 0);
3652     DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask);
3653     old = old;
3654     DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask);
3655     old2 = old2;
3656     return 0;
3657   }
3658   */
3659 
3660   bool ramSize_Defined = NSystem::GetRamSize(ramSize);
3661 
3662   UInt32 numThreadsSpecified = numCPUs;
3663   bool needSetComplexity = false;
3664   UInt32 testTimeMs = kComplexInMs;
3665   UInt32 startDicLog = 22;
3666   bool startDicLog_Defined = false;
3667   UInt64 specifiedFreq = 0;
3668   bool multiThreadTests = false;
3669   UInt64 complexInCommands = kComplexInCommands;
3670   UInt32 numThreads_Start = 1;
3671 
3672   #ifndef Z7_ST
3673   CAffinityMode affinityMode;
3674   #endif
3675 
3676 
3677   COneMethodInfo method;
3678 
3679   CMidAlignedBuffer fileDataBuffer;
3680   bool use_fileData = false;
3681   bool isFixedDict = false;
3682 
3683   {
3684   unsigned i;
3685 
3686   if (printCallback)
3687   {
3688     for (i = 0; i < props.Size(); i++)
3689     {
3690       const CProperty &property = props[i];
3691       printCallback->Print(" ");
3692       printCallback->Print(GetAnsiString(property.Name));
3693       if (!property.Value.IsEmpty())
3694       {
3695         printCallback->Print("=");
3696         printCallback->Print(GetAnsiString(property.Value));
3697       }
3698     }
3699     if (!props.IsEmpty())
3700       printCallback->NewLine();
3701   }
3702 
3703 
3704   for (i = 0; i < props.Size(); i++)
3705   {
3706     const CProperty &property = props[i];
3707     UString name (property.Name);
3708     name.MakeLower_Ascii();
3709 
3710     if (name.IsEqualTo("file"))
3711     {
3712       if (property.Value.IsEmpty())
3713         return E_INVALIDARG;
3714 
3715       NFile::NIO::CInFile file;
3716       if (!file.Open(us2fs(property.Value)))
3717         return GetLastError_noZero_HRESULT();
3718       size_t len;
3719       {
3720         UInt64 len64;
3721         if (!file.GetLength(len64))
3722           return GetLastError_noZero_HRESULT();
3723         if (printCallback)
3724         {
3725           printCallback->Print("file size =");
3726           PrintNumber(*printCallback, len64, 0);
3727           printCallback->NewLine();
3728         }
3729         len = (size_t)len64;
3730         if (len != len64)
3731           return E_INVALIDARG;
3732       }
3733 
3734       // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
3735 
3736       ALLOC_WITH_HRESULT(&fileDataBuffer, len)
3737       use_fileData = true;
3738 
3739       {
3740         size_t processed;
3741         if (!file.ReadFull((Byte *)fileDataBuffer, len, processed))
3742           return GetLastError_noZero_HRESULT();
3743         if (processed != len)
3744           return E_FAIL;
3745       }
3746       continue;
3747     }
3748 
3749     NCOM::CPropVariant propVariant;
3750     if (!property.Value.IsEmpty())
3751       ParseNumberString(property.Value, propVariant);
3752 
3753     if (name.IsEqualTo("time"))
3754     {
3755       RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3756       needSetComplexity = true;
3757       testTimeMs *= 1000;
3758       continue;
3759     }
3760 
3761     if (name.IsEqualTo("timems"))
3762     {
3763       RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3764       needSetComplexity = true;
3765       continue;
3766     }
3767 
3768     if (name.IsEqualTo("tic"))
3769     {
3770       UInt32 v;
3771       RINOK(ParsePropToUInt32(UString(), propVariant, v))
3772       if (v >= 64)
3773         return E_INVALIDARG;
3774       complexInCommands = (UInt64)1 << v;
3775       continue;
3776     }
3777 
3778     const bool isCurrent_fixedDict = name.IsEqualTo("df");
3779     if (isCurrent_fixedDict)
3780       isFixedDict = true;
3781     if (isCurrent_fixedDict || name.IsEqualTo("ds"))
3782     {
3783       RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog))
3784       if (startDicLog > 32)
3785         return E_INVALIDARG;
3786       startDicLog_Defined = true;
3787       continue;
3788     }
3789 
3790     if (name.IsEqualTo("mts"))
3791     {
3792       RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start))
3793       continue;
3794     }
3795 
3796     if (name.IsEqualTo("af"))
3797     {
3798       UInt32 bundle;
3799       RINOK(ParsePropToUInt32(UString(), propVariant, bundle))
3800       if (bundle > 0 && bundle < numCPUs)
3801       {
3802         #ifndef Z7_ST
3803         affinityMode.SetLevels(numCPUs, 2);
3804         affinityMode.NumBundleThreads = bundle;
3805         #endif
3806       }
3807       continue;
3808     }
3809 
3810     if (name.IsEqualTo("freq"))
3811     {
3812       UInt32 freq32 = 0;
3813       RINOK(ParsePropToUInt32(UString(), propVariant, freq32))
3814       if (freq32 == 0)
3815         return E_INVALIDARG;
3816       specifiedFreq = (UInt64)freq32 * 1000000;
3817 
3818       if (printCallback)
3819       {
3820         printCallback->Print("freq=");
3821         PrintNumber(*printCallback, freq32, 0);
3822         printCallback->NewLine();
3823       }
3824 
3825       continue;
3826     }
3827 
3828     if (name.IsPrefixedBy_Ascii_NoCase("mt"))
3829     {
3830       const UString s = name.Ptr(2);
3831       if (s.IsEqualTo("*")
3832           || (s.IsEmpty()
3833             && propVariant.vt == VT_BSTR
3834             && StringsAreEqual_Ascii(propVariant.bstrVal, "*")))
3835       {
3836         multiThreadTests = true;
3837         continue;
3838       }
3839       #ifndef Z7_ST
3840       RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified))
3841       #endif
3842       continue;
3843     }
3844 
3845     RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant))
3846   }
3847   }
3848 
3849   if (printCallback)
3850   {
3851     AString s;
3852 
3853    #ifndef _WIN32
3854     s += "Compiler: ";
3855     GetCompiler(s);
3856     printCallback->Print(s);
3857     printCallback->NewLine();
3858     s.Empty();
3859    #endif
3860 
3861     GetSystemInfoText(s);
3862     printCallback->Print(s);
3863     printCallback->NewLine();
3864   }
3865 
3866   if (printCallback)
3867   {
3868     printCallback->Print("1T CPU Freq (MHz):");
3869   }
3870 
3871   if (printCallback || freqCallback)
3872   {
3873     UInt64 numMilCommands = 1 << 6;
3874     if (specifiedFreq != 0)
3875     {
3876       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3877         numMilCommands >>= 1;
3878     }
3879 
3880     for (int jj = 0;; jj++)
3881     {
3882       if (printCallback)
3883         RINOK(printCallback->CheckBreak())
3884 
3885       UInt64 start = ::GetTimeCount();
3886       UInt32 sum = (UInt32)start;
3887       sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3888       if (sum == 0xF1541213)
3889         if (printCallback)
3890           printCallback->Print("");
3891       const UInt64 realDelta = ::GetTimeCount() - start;
3892       start = realDelta;
3893       if (start == 0)
3894         start = 1;
3895       if (start > (UInt64)1 << 61)
3896         start = 1;
3897       const UInt64 freq = GetFreq();
3898       // mips is constant in some compilers
3899       const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, freq, start);
3900       const UInt64 mipsVal = numMilCommands * freq / start;
3901       if (printCallback)
3902       {
3903         if (realDelta == 0)
3904         {
3905           printCallback->Print(" -");
3906         }
3907         else
3908         {
3909           // PrintNumber(*printCallback, start, 0);
3910           PrintNumber(*printCallback, mipsVal, 5);
3911         }
3912       }
3913       if (freqCallback)
3914       {
3915         RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult))
3916       }
3917 
3918       if (jj >= 1)
3919       {
3920         bool needStop = (numMilCommands >= (1 <<
3921           #ifdef _DEBUG
3922             7
3923           #else
3924             11
3925           #endif
3926           ));
3927         if (start >= freq * 16)
3928         {
3929           printCallback->Print(" (Cmplx)");
3930           if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
3931           {
3932             needSetComplexity = true;
3933           }
3934           needStop = true;
3935         }
3936         if (needSetComplexity)
3937           SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands);
3938         if (needStop)
3939           break;
3940         numMilCommands <<= 1;
3941       }
3942     }
3943     if (freqCallback)
3944     {
3945       RINOK(freqCallback->FreqsFinished(1))
3946     }
3947   }
3948 
3949   if (numThreadsSpecified >= 2)
3950   if (printCallback || freqCallback)
3951   {
3952     if (printCallback)
3953       printCallback->NewLine();
3954 
3955     /* it can show incorrect frequency for HT threads.
3956        so we reduce freq test to (numCPUs / 2) */
3957 
3958     UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified;
3959     if (numThreads < 1)
3960       numThreads = 1;
3961 
3962     if (printCallback)
3963     {
3964       char s[128];
3965       ConvertUInt64ToString(numThreads, s);
3966       printCallback->Print(s);
3967       printCallback->Print("T CPU Freq (MHz):");
3968     }
3969     UInt64 numMilCommands = 1 <<
3970           #ifdef _DEBUG
3971             7;
3972           #else
3973             10;
3974           #endif
3975 
3976     if (specifiedFreq != 0)
3977     {
3978       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3979         numMilCommands >>= 1;
3980     }
3981 
3982     // for (int jj = 0;; jj++)
3983     for (;;)
3984     {
3985       if (printCallback)
3986         RINOK(printCallback->CheckBreak())
3987 
3988       {
3989         // PrintLeft(f, "CPU", kFieldSize_Name);
3990 
3991         // UInt32 resVal;
3992 
3993         CFreqBench fb;
3994         fb.complexInCommands = numMilCommands * 1000000;
3995         fb.numThreads = numThreads;
3996         // showFreq;
3997         // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
3998         fb.showFreq = true;
3999         fb.specifiedFreq = 1;
4000 
4001         const HRESULT res = fb.FreqBench(NULL /* printCallback */
4002             #ifndef Z7_ST
4003               , &affinityMode
4004             #endif
4005             );
4006         RINOK(res)
4007 
4008         if (freqCallback)
4009         {
4010           RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes))
4011         }
4012 
4013         if (printCallback)
4014         {
4015           /*
4016           if (realDelta == 0)
4017           {
4018             printCallback->Print(" -");
4019           }
4020           else
4021           */
4022           {
4023             // PrintNumber(*printCallback, start, 0);
4024             PrintUsage(*printCallback, fb.UsageRes, 3);
4025             printCallback->Print("%");
4026             PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
4027             printCallback->Print("  ");
4028 
4029             // PrintNumber(*printCallback, fb.UsageRes, 5);
4030           }
4031         }
4032       }
4033       // if (jj >= 1)
4034       {
4035         const bool needStop = (numMilCommands >= (1 <<
4036           #ifdef _DEBUG
4037             7
4038           #else
4039             11
4040           #endif
4041           ));
4042         if (needStop)
4043           break;
4044         numMilCommands <<= 1;
4045       }
4046     }
4047     if (freqCallback)
4048     {
4049       RINOK(freqCallback->FreqsFinished(numThreads))
4050     }
4051   }
4052 
4053 
4054   if (printCallback)
4055   {
4056     printCallback->NewLine();
4057     printCallback->NewLine();
4058     PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
4059     printCallback->Print(GetProcessThreadsInfo(threadsInfo));
4060     printCallback->NewLine();
4061   }
4062 
4063   if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
4064     return E_INVALIDARG;
4065 
4066   UInt64 dict = (UInt64)1 << startDicLog;
4067   const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
4068 
4069   const unsigned level = method.GetLevel();
4070 
4071   AString &methodName = method.MethodName;
4072   const AString original_MethodName = methodName;
4073   if (methodName.IsEmpty())
4074     methodName = "LZMA";
4075 
4076   if (benchCallback)
4077   {
4078     CBenchProps benchProps;
4079     benchProps.SetLzmaCompexity();
4080     const UInt64 dictSize = method.Get_Lzma_DicSize();
4081 
4082     size_t uncompressedDataSize;
4083     if (use_fileData)
4084     {
4085       uncompressedDataSize = fileDataBuffer.Size();
4086     }
4087     else
4088     {
4089       uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
4090       if (uncompressedDataSize < dictSize)
4091         return E_INVALIDARG;
4092     }
4093 
4094     return MethodBench(
4095         EXTERNAL_CODECS_LOC_VARS
4096         complexInCommands,
4097       #ifndef Z7_ST
4098         true, numThreadsSpecified,
4099         &affinityMode,
4100       #endif
4101         method,
4102         uncompressedDataSize, (const Byte *)fileDataBuffer,
4103         kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
4104   }
4105 
4106   if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
4107     methodName = "crc32";
4108 
4109   CMethodId hashID;
4110   const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID);
4111   int codecIndex = -1;
4112   bool isFilter = false;
4113   if (!isHashMethod)
4114   {
4115     UInt32 numStreams;
4116     codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName,
4117         true,  // encode
4118         hashID, numStreams, isFilter);
4119     // we can allow non filter for BW tests
4120     if (!isFilter) codecIndex = -1;
4121   }
4122 
4123   CBenchCallbackToPrint callback;
4124   callback.Init();
4125   callback._file = printCallback;
4126 
4127   if (isHashMethod || codecIndex != -1)
4128   {
4129     if (!printCallback)
4130       return S_FALSE;
4131     IBenchPrintCallback &f = *printCallback;
4132 
4133     UInt64 dict64 = dict;
4134     if (!dictIsDefined)
4135       dict64 = (1 << 27);
4136     if (use_fileData)
4137     {
4138       if (!dictIsDefined)
4139         dict64 = fileDataBuffer.Size();
4140       else if (dict64 > fileDataBuffer.Size())
4141         dict64 = fileDataBuffer.Size();
4142     }
4143 
4144     for (;;)
4145     {
4146       const int index = method.FindProp(NCoderPropID::kDictionarySize);
4147       if (index < 0)
4148         break;
4149       method.Props.Delete((unsigned)index);
4150     }
4151 
4152     // methodName.RemoveChar(L'-');
4153     Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method
4154     const UInt32 *checkSum = NULL;
4155     int benchIndex = -1;
4156 
4157     if (isHashMethod)
4158     {
4159       for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
4160       {
4161         const CBenchHash &h = g_Hash[i];
4162         AString benchMethod (h.Name);
4163         AString benchProps;
4164         const int propPos = benchMethod.Find(':');
4165         if (propPos >= 0)
4166         {
4167           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4168           benchMethod.DeleteFrom((unsigned)propPos);
4169         }
4170 
4171         if (AreSameMethodNames(benchMethod, methodName))
4172         {
4173           const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4174           /*
4175           bool isMainMethod = method.PropsString.IsEmpty();
4176           if (isMainMethod)
4177             isMainMethod = !checkSum
4178                 || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8"));
4179           if (sameProps || isMainMethod)
4180           */
4181           {
4182             complexity = (Int32)h.Complex;
4183             checkSum = &h.CheckSum;
4184             if (sameProps)
4185               break;
4186             /*
4187             if property. is not specified, we use the complexity
4188             for latest fastest method (crc32:64)
4189             */
4190           }
4191         }
4192       }
4193       // if (!checkSum) return E_NOTIMPL;
4194     }
4195     else
4196     {
4197       for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4198       {
4199         const CBenchMethod &bench = g_Bench[i];
4200         AString benchMethod (bench.Name);
4201         AString benchProps;
4202         const int propPos = benchMethod.Find(':');
4203         if (propPos >= 0)
4204         {
4205           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4206           benchMethod.DeleteFrom((unsigned)propPos);
4207         }
4208 
4209         if (AreSameMethodNames(benchMethod, methodName))
4210         {
4211           const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4212           // bool isMainMethod = method.PropsString.IsEmpty();
4213           // if (sameProps || isMainMethod)
4214           {
4215             benchIndex = (int)i;
4216             if (sameProps)
4217               break;
4218           }
4219         }
4220       }
4221       // if (benchIndex < 0) return E_NOTIMPL;
4222     }
4223 
4224     {
4225       /* we count usage only for crc and filter. non-filters are not supported */
4226       UInt64 usage = (1 << 20);
4227       UInt64 bufSize = dict64;
4228       UInt32 numBlocks = isHashMethod ? 1 : 3;
4229       if (use_fileData)
4230       {
4231         usage += fileDataBuffer.Size();
4232         if (bufSize > fileDataBuffer.Size())
4233           bufSize = fileDataBuffer.Size();
4234         if (isHashMethod)
4235         {
4236           numBlocks = 0;
4237           #ifndef Z7_ST
4238           if (numThreadsSpecified != 1)
4239             numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
4240           #endif
4241         }
4242       }
4243       usage += numThreadsSpecified * bufSize * numBlocks;
4244       Print_Usage_and_Threads(f, usage, numThreadsSpecified);
4245     }
4246 
4247     CUIntVector numThreadsVector;
4248     {
4249       unsigned nt = numThreads_Start;
4250       for (;;)
4251       {
4252         if (nt > numThreadsSpecified)
4253           break;
4254         numThreadsVector.Add(nt);
4255         const unsigned next = nt * 2;
4256         const UInt32 ntHalf= numThreadsSpecified / 2;
4257         if (ntHalf > nt && ntHalf < next)
4258           numThreadsVector.Add(ntHalf);
4259         if (numThreadsSpecified > nt && numThreadsSpecified < next)
4260           numThreadsVector.Add(numThreadsSpecified);
4261         nt = next;
4262       }
4263     }
4264 
4265     unsigned numColumns = isHashMethod ? 1 : 2;
4266     CTempValues speedTotals;
4267     CTempValues usageTotals;
4268     {
4269       const unsigned numItems = numThreadsVector.Size() * numColumns;
4270       speedTotals.Alloc(numItems);
4271       usageTotals.Alloc(numItems);
4272       for (unsigned i = 0; i < numItems; i++)
4273       {
4274         speedTotals.Values[i] = 0;
4275         usageTotals.Values[i] = 0;
4276       }
4277     }
4278 
4279     f.NewLine();
4280     for (unsigned line = 0; line < 3; line++)
4281     {
4282       f.NewLine();
4283       f.Print(line == 0 ? "THRD" : line == 1 ? "    " : "Size");
4284       FOR_VECTOR (ti, numThreadsVector)
4285       {
4286         if (ti != 0)
4287           Print_Delimiter(f);
4288         if (line == 0)
4289         {
4290           PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1));
4291           PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed);
4292         }
4293         else
4294         {
4295           for (unsigned c = 0; c < numColumns; c++)
4296           {
4297             PrintRight(f, line == 1 ? "Usage" : "%",    kFieldSize_Usage + 1);
4298             PrintRight(f, line == 1 ? "BW"    : "MB/s", kFieldSize_CrcSpeed + 1);
4299           }
4300         }
4301       }
4302     }
4303     f.NewLine();
4304 
4305     UInt64 numSteps = 0;
4306 
4307     // for (UInt32 iter = 0; iter < numIterations; iter++)
4308     // {
4309     unsigned pow = 10; // kNumHashDictBits
4310     if (startDicLog_Defined)
4311       pow = startDicLog;
4312 
4313     // #define NUM_SUB_BITS 2
4314     // pow <<= NUM_SUB_BITS;
4315     for (;; pow++)
4316     {
4317       const UInt64 bufSize = (UInt64)1 << pow;
4318       // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS);
4319       // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS);
4320 
4321       size_t dataSize = fileDataBuffer.Size();
4322       if (dataSize > bufSize || !use_fileData)
4323         dataSize = (size_t)bufSize;
4324 
4325       for (UInt32 iter = 0; iter < numIterations; iter++)
4326       {
4327         Print_Pow(f, pow);
4328         // PrintNumber(f, bufSize >> 10, 4);
4329 
4330         FOR_VECTOR (ti, numThreadsVector)
4331         {
4332           RINOK(f.CheckBreak())
4333           const UInt32 numThreads = numThreadsVector[ti];
4334           if (isHashMethod)
4335           {
4336             UInt64 speed = 0;
4337             UInt64 usage = 0;
4338             const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
4339               numThreads,
4340               dataSize, (const Byte *)fileDataBuffer,
4341               speed, usage,
4342               (UInt32)complexity,
4343               1, // benchWeight,
4344               (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
4345               method,
4346               &f,
4347             #ifndef Z7_ST
4348               &affinityMode,
4349             #endif
4350               false, // showRating
4351               NULL, false, 0);
4352             RINOK(res)
4353 
4354             if (ti != 0)
4355               Print_Delimiter(f);
4356 
4357             Bench_BW_Print_Usage_Speed(f, usage, speed);
4358             speedTotals.Values[ti] += speed;
4359             usageTotals.Values[ti] += usage;
4360           }
4361           else
4362           {
4363             {
4364               unsigned keySize = 32;
4365                    if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16;
4366               else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24;
4367               callback.BenchProps.KeySize = keySize;
4368             }
4369 
4370             COneMethodInfo method2 = method;
4371             unsigned bench_DictBits;
4372 
4373             if (benchIndex >= 0)
4374             {
4375               const CBenchMethod &bench = g_Bench[benchIndex];
4376               callback.BenchProps.EncComplex = bench.EncComplex;
4377               callback.BenchProps.DecComplexUnc = bench.DecComplexUnc;
4378               callback.BenchProps.DecComplexCompr = bench.DecComplexCompr;
4379               bench_DictBits = bench.DictBits;
4380               // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug
4381             }
4382             else
4383             {
4384               bench_DictBits = kOldLzmaDictBits; // = 32 default
4385               if (isFilter)
4386               {
4387                 const unsigned k_UnknownCoderComplexity = 4;
4388                 callback.BenchProps.EncComplex = k_UnknownCoderComplexity;
4389                 callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity;
4390               }
4391               else
4392               {
4393                 callback.BenchProps.EncComplex = 1 << 10;
4394                 callback.BenchProps.DecComplexUnc = 1 << 6;
4395               }
4396               callback.BenchProps.DecComplexCompr = 0;
4397             }
4398             callback.NeedPrint = false;
4399 
4400             if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4401             {
4402               const NCOM::CPropVariant propVariant = (UInt32)pow;
4403               RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4404             }
4405 
4406             const HRESULT res = MethodBench(
4407                 EXTERNAL_CODECS_LOC_VARS
4408                 complexInCommands,
4409               #ifndef Z7_ST
4410                 false, // oldLzmaBenchMode
4411                 numThreadsVector[ti],
4412                 &affinityMode,
4413               #endif
4414                 method2,
4415                 dataSize, (const Byte *)fileDataBuffer,
4416                 bench_DictBits,
4417                 printCallback,
4418                 &callback,
4419                 &callback.BenchProps);
4420             RINOK(res)
4421 
4422             if (ti != 0)
4423               Print_Delimiter(f);
4424 
4425             for (unsigned i = 0; i < 2; i++)
4426             {
4427               const CBenchInfo &bi = callback.BenchInfo_Results[i];
4428               const UInt64 usage = bi.GetUsage();
4429               const UInt64 speed = bi.GetUnpackSizeSpeed();
4430               usageTotals.Values[ti * 2 + i] += usage;
4431               speedTotals.Values[ti * 2 + i] += speed;
4432               Bench_BW_Print_Usage_Speed(f, usage, speed);
4433             }
4434           }
4435         }
4436 
4437         f.NewLine();
4438         numSteps++;
4439       }
4440       if (dataSize >= dict64)
4441         break;
4442     }
4443 
4444     if (numSteps != 0)
4445     {
4446       f.Print("Avg:");
4447       for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++)
4448       {
4449         if (ti != 0)
4450           Print_Delimiter(f);
4451         for (unsigned i = 0; i < numColumns; i++)
4452           Bench_BW_Print_Usage_Speed(f,
4453               usageTotals.Values[ti * numColumns + i] / numSteps,
4454               speedTotals.Values[ti * numColumns + i] / numSteps);
4455       }
4456       f.NewLine();
4457     }
4458 
4459     return S_OK;
4460   }
4461 
4462   bool use2Columns = false;
4463 
4464   bool totalBenchMode = false;
4465   bool onlyHashBench = false;
4466   if (methodName.IsEqualTo_Ascii_NoCase("hash"))
4467   {
4468     onlyHashBench = true;
4469     methodName = "*";
4470     totalBenchMode = true;
4471   }
4472   else if (methodName.Find('*') >= 0)
4473     totalBenchMode = true;
4474 
4475   // ---------- Threads loop ----------
4476   for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
4477   {
4478 
4479   UInt32 numThreads = numThreadsSpecified;
4480 
4481   if (!multiThreadTests)
4482   {
4483     if (threadsPassIndex != 0)
4484       break;
4485   }
4486   else
4487   {
4488     numThreads = 1;
4489     if (threadsPassIndex != 0)
4490     {
4491       if (numCPUs < 2)
4492         break;
4493       numThreads = numCPUs;
4494       if (threadsPassIndex == 1)
4495       {
4496         if (numCPUs >= 4)
4497           numThreads = numCPUs / 2;
4498       }
4499       else if (numCPUs < 4)
4500         break;
4501     }
4502   }
4503 
4504   IBenchPrintCallback &f = *printCallback;
4505 
4506   if (threadsPassIndex > 0)
4507   {
4508     f.NewLine();
4509     f.NewLine();
4510   }
4511 
4512   if (!dictIsDefined && !onlyHashBench)
4513   {
4514     const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
4515     unsigned dicSizeLog = dicSizeLog_Main;
4516 
4517     #ifdef UNDER_CE
4518     dicSizeLog = (UInt64)1 << 20;
4519     #endif
4520 
4521     if (ramSize_Defined)
4522     for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
4523       if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
4524         break;
4525 
4526     dict = (UInt64)1 << dicSizeLog;
4527 
4528     if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
4529     {
4530       f.Print("Dictionary reduced to: ");
4531       PrintNumber(f, dicSizeLog, 1);
4532       f.NewLine();
4533     }
4534   }
4535 
4536   Print_Usage_and_Threads(f,
4537       onlyHashBench ?
4538         GetBenchMemoryUsage_Hash(numThreads, dict) :
4539         GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode),
4540       numThreads);
4541 
4542   f.NewLine();
4543 
4544   f.NewLine();
4545 
4546   if (totalBenchMode)
4547   {
4548     callback.NameFieldSize = kFieldSize_Name;
4549     use2Columns = false;
4550   }
4551   else
4552   {
4553     callback.NameFieldSize = kFieldSize_SmallName;
4554     use2Columns = true;
4555   }
4556   callback.Use2Columns = use2Columns;
4557 
4558   bool showFreq = false;
4559   UInt64 cpuFreq = 0;
4560 
4561   if (totalBenchMode)
4562   {
4563     showFreq = true;
4564   }
4565 
4566   unsigned fileldSize = kFieldSize_TotalSize;
4567   if (showFreq)
4568     fileldSize += kFieldSize_EUAndEffec;
4569 
4570   if (use2Columns)
4571   {
4572     PrintSpaces(f, callback.NameFieldSize);
4573     PrintRight(f, "Compressing", fileldSize);
4574     f.Print(kSep);
4575     PrintRight(f, "Decompressing", fileldSize);
4576   }
4577   f.NewLine();
4578   PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
4579 
4580   int j;
4581 
4582   for (j = 0; j < 2; j++)
4583   {
4584     PrintRight(f, "Speed", kFieldSize_Speed + 1);
4585     PrintRight(f, "Usage", kFieldSize_Usage + 1);
4586     PrintRight(f, "R/U", kFieldSize_RU + 1);
4587     PrintRight(f, "Rating", kFieldSize_Rating + 1);
4588     if (showFreq)
4589     {
4590       PrintRight(f, "E/U", kFieldSize_EU + 1);
4591       PrintRight(f, "Effec", kFieldSize_Effec + 1);
4592     }
4593     if (!use2Columns)
4594       break;
4595     if (j == 0)
4596       f.Print(kSep);
4597   }
4598 
4599   f.NewLine();
4600   PrintSpaces(f, callback.NameFieldSize);
4601 
4602   for (j = 0; j < 2; j++)
4603   {
4604     PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
4605     PrintRight(f, "%", kFieldSize_Usage + 1);
4606     PrintRight(f, "MIPS", kFieldSize_RU + 1);
4607     PrintRight(f, "MIPS", kFieldSize_Rating + 1);
4608     if (showFreq)
4609     {
4610       PrintRight(f, "%", kFieldSize_EU + 1);
4611       PrintRight(f, "%", kFieldSize_Effec + 1);
4612     }
4613     if (!use2Columns)
4614       break;
4615     if (j == 0)
4616       f.Print(kSep);
4617   }
4618 
4619   f.NewLine();
4620   f.NewLine();
4621 
4622   if (specifiedFreq != 0)
4623     cpuFreq = specifiedFreq;
4624 
4625   // bool showTotalSpeed = false;
4626 
4627   if (totalBenchMode)
4628   {
4629     for (UInt32 i = 0; i < numIterations; i++)
4630     {
4631       if (i != 0)
4632         printCallback->NewLine();
4633 
4634       const unsigned kNumCpuTests = 3;
4635       for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
4636       {
4637         PrintLeft(f, "CPU", kFieldSize_Name);
4638 
4639         // UInt32 resVal;
4640 
4641         CFreqBench fb;
4642         fb.complexInCommands = complexInCommands;
4643         fb.numThreads = numThreads;
4644         // showFreq;
4645         fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4646         fb.specifiedFreq = specifiedFreq;
4647 
4648         const HRESULT res = fb.FreqBench(printCallback
4649             #ifndef Z7_ST
4650               , &affinityMode
4651             #endif
4652             );
4653         RINOK(res)
4654 
4655         cpuFreq = fb.CpuFreqRes;
4656         callback.NewLine();
4657 
4658         if (specifiedFreq != 0)
4659           cpuFreq = specifiedFreq;
4660 
4661         if (testTimeMs >= 1000)
4662         if (freqTest == kNumCpuTests - 1)
4663         {
4664           // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands);
4665         }
4666       }
4667       callback.NewLine();
4668 
4669       // return S_OK; // change it
4670 
4671       callback.SetFreq(true, cpuFreq);
4672 
4673       if (!onlyHashBench)
4674       {
4675         size_t dataSize = (size_t)dict;
4676         if (use_fileData)
4677         {
4678           dataSize = fileDataBuffer.Size();
4679           if (dictIsDefined && dataSize > dict)
4680             dataSize = (size_t)dict;
4681         }
4682 
4683         const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
4684             method, complexInCommands,
4685           #ifndef Z7_ST
4686             numThreads,
4687             &affinityMode,
4688           #endif
4689             dictIsDefined || use_fileData, // forceUnpackSize
4690             dataSize,
4691             (const Byte *)fileDataBuffer,
4692             printCallback, &callback);
4693         RINOK(res)
4694       }
4695 
4696       {
4697         size_t dataSize = (size_t)1 << kNumHashDictBits;
4698         if (dictIsDefined)
4699         {
4700           dataSize = (size_t)dict;
4701           if (dataSize != dict)
4702             return E_OUTOFMEMORY;
4703         }
4704         if (use_fileData)
4705         {
4706           dataSize = fileDataBuffer.Size();
4707           if (dictIsDefined && dataSize > dict)
4708             dataSize = (size_t)dict;
4709         }
4710 
4711         const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS
4712             method, complexInCommands,
4713             numThreads,
4714             dataSize, (const Byte *)fileDataBuffer,
4715             printCallback, &callback,
4716         #ifndef Z7_ST
4717           &affinityMode,
4718         #endif
4719           &callback.EncodeRes, true, cpuFreq);
4720         RINOK(res)
4721       }
4722 
4723       callback.NewLine();
4724       {
4725         PrintLeft(f, "CPU", kFieldSize_Name);
4726 
4727         CFreqBench fb;
4728         fb.complexInCommands = complexInCommands;
4729         fb.numThreads = numThreads;
4730         // showFreq;
4731         fb.showFreq = (specifiedFreq != 0);
4732         fb.specifiedFreq = specifiedFreq;
4733 
4734         const HRESULT res = fb.FreqBench(printCallback
4735           #ifndef Z7_ST
4736             , &affinityMode
4737           #endif
4738           );
4739         RINOK(res)
4740         callback.NewLine();
4741       }
4742     }
4743   }
4744   else
4745   {
4746     needSetComplexity = true;
4747     if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
4748     {
4749       unsigned i;
4750       for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4751       {
4752         const CBenchMethod &h = g_Bench[i];
4753         AString benchMethod (h.Name);
4754         AString benchProps;
4755         const int propPos = benchMethod.Find(':');
4756         if (propPos >= 0)
4757         {
4758           benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4759           benchMethod.DeleteFrom((unsigned)propPos);
4760         }
4761 
4762         if (AreSameMethodNames(benchMethod, methodName))
4763         {
4764           if (benchProps.IsEmpty()
4765               || (benchProps == "x5" && method.PropsString.IsEmpty())
4766               || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
4767           {
4768             callback.BenchProps.EncComplex = h.EncComplex;
4769             callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
4770             callback.BenchProps.DecComplexUnc = h.DecComplexUnc;
4771             needSetComplexity = false;
4772             break;
4773           }
4774         }
4775       }
4776       /*
4777       if (i == Z7_ARRAY_SIZE(g_Bench))
4778         return E_NOTIMPL;
4779       */
4780     }
4781     if (needSetComplexity)
4782       callback.BenchProps.SetLzmaCompexity();
4783 
4784   if (startDicLog < kBenchMinDicLogSize)
4785     startDicLog = kBenchMinDicLogSize;
4786 
4787   for (unsigned i = 0; i < numIterations; i++)
4788   {
4789     unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
4790     if (!multiDict)
4791       pow = 32;
4792     while (GetDictSizeFromLog(pow) > dict && pow > 0)
4793       pow--;
4794     for (; GetDictSizeFromLog(pow) <= dict; pow++)
4795     {
4796       Print_Pow(f, pow);
4797       callback.DictSize = (UInt64)1 << pow;
4798 
4799       COneMethodInfo method2 = method;
4800 
4801       if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4802       {
4803         // We add dictionary size property.
4804         // method2 can have two different dictionary size properties.
4805         // And last property is main.
4806         NCOM::CPropVariant propVariant = (UInt32)pow;
4807         RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4808       }
4809 
4810       size_t uncompressedDataSize;
4811       if (use_fileData)
4812       {
4813         uncompressedDataSize = fileDataBuffer.Size();
4814       }
4815       else
4816       {
4817         uncompressedDataSize = (size_t)callback.DictSize;
4818         if (uncompressedDataSize != callback.DictSize)
4819           return E_OUTOFMEMORY;
4820         if (uncompressedDataSize >= (1 << 18))
4821           uncompressedDataSize += kAdditionalSize;
4822       }
4823 
4824       const HRESULT res = MethodBench(
4825           EXTERNAL_CODECS_LOC_VARS
4826           complexInCommands,
4827         #ifndef Z7_ST
4828           true, numThreads,
4829           &affinityMode,
4830         #endif
4831           method2,
4832           uncompressedDataSize, (const Byte *)fileDataBuffer,
4833           kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
4834       f.NewLine();
4835       RINOK(res)
4836       if (!multiDict)
4837         break;
4838     }
4839   }
4840   }
4841 
4842   PrintChars(f, '-', callback.NameFieldSize + fileldSize);
4843 
4844   if (use2Columns)
4845   {
4846     f.Print(kSep);
4847     PrintChars(f, '-', fileldSize);
4848   }
4849 
4850   f.NewLine();
4851 
4852   if (use2Columns)
4853   {
4854     PrintLeft(f, "Avr:", callback.NameFieldSize);
4855     PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
4856     f.Print(kSep);
4857     PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
4858     f.NewLine();
4859   }
4860 
4861   PrintLeft(f, "Tot:", callback.NameFieldSize);
4862   CTotalBenchRes midRes;
4863   midRes = callback.EncodeRes;
4864   midRes.Update_With_Res(callback.DecodeRes);
4865 
4866   // midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
4867   PrintTotals(f, showFreq, cpuFreq, false, midRes);
4868   f.NewLine();
4869 
4870   }
4871   return S_OK;
4872 }
4873