xref: /third_party/lzma/CPP/7zip/UI/Common/Bench.cpp (revision 370b324c)
1// Bench.cpp
2
3#include "StdAfx.h"
4
5#include "../../../../C/CpuArch.h"
6
7// #include <stdio.h>
8
9#ifndef _WIN32
10
11#define USE_POSIX_TIME
12#define USE_POSIX_TIME2
13#endif // _WIN32
14
15#ifdef USE_POSIX_TIME
16#include <time.h>
17#include <unistd.h>
18#ifdef USE_POSIX_TIME2
19#include <sys/time.h>
20#include <sys/times.h>
21#endif
22#endif // USE_POSIX_TIME
23
24#ifdef _WIN32
25#define USE_ALLOCA
26#endif
27
28#ifdef USE_ALLOCA
29#ifdef _WIN32
30#include <malloc.h>
31#else
32#include <stdlib.h>
33#endif
34#endif
35
36#include "../../../../C/7zCrc.h"
37#include "../../../../C/RotateDefs.h"
38
39#ifndef Z7_ST
40#include "../../../Windows/Synchronization.h"
41#include "../../../Windows/Thread.h"
42#endif
43
44#include "../../../Windows/FileFind.h"
45#include "../../../Windows/FileIO.h"
46#include "../../../Windows/SystemInfo.h"
47
48#include "../../../Common/MyBuffer2.h"
49#include "../../../Common/IntToString.h"
50#include "../../../Common/StringConvert.h"
51#include "../../../Common/StringToInt.h"
52#include "../../../Common/Wildcard.h"
53
54#include "../../Common/MethodProps.h"
55#include "../../Common/StreamObjects.h"
56#include "../../Common/StreamUtils.h"
57
58#include "Bench.h"
59
60using namespace NWindows;
61
62#ifndef Z7_ST
63static const UInt32 k_LZMA = 0x030101;
64#endif
65
66static const UInt64 kComplexInCommands = (UInt64)1 <<
67  #ifdef UNDER_CE
68    31;
69  #else
70    34;
71  #endif
72
73static const UInt32 kComplexInMs = 4000;
74
75static void SetComplexCommandsMs(UInt32 complexInMs,
76    bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
77{
78  complexInCommands = kComplexInCommands;
79  const UInt64 kMinFreq = (UInt64)1000000 * 4;
80  const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
81  if (cpuFreq < kMinFreq && !isSpecifiedFreq)
82    cpuFreq = kMinFreq;
83  if (cpuFreq < kMaxFreq || isSpecifiedFreq)
84  {
85    if (complexInMs != 0)
86      complexInCommands = complexInMs * cpuFreq / 1000;
87    else
88      complexInCommands = cpuFreq >> 2;
89  }
90}
91
92// const UInt64 kBenchmarkUsageMult = 1000000; // for debug
93static const unsigned kBenchmarkUsageMultBits = 16;
94static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
95
96UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
97{
98  return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
99}
100
101static const unsigned kNumHashDictBits = 17;
102static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
103
104static const unsigned kOldLzmaDictBits = 32;
105
106// static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
107static const size_t kAdditionalSize = (size_t)1 << 16;
108static const UInt32 kCompressedAdditionalSize = (1 << 10);
109
110static const UInt32 kMaxMethodPropSize = (1 << 6);
111
112
113#define ALLOC_WITH_HRESULT(_buffer_, _size_) \
114  { (_buffer_)->Alloc(_size_); \
115  if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
116
117
118class CBaseRandomGenerator
119{
120  UInt32 A1;
121  UInt32 A2;
122  UInt32 Salt;
123public:
124  CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
125  void Init() { A1 = 362436069; A2 = 521288629;}
126  Z7_FORCE_INLINE
127  UInt32 GetRnd()
128  {
129    return Salt ^
130    (
131      ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
132      ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
133    );
134  }
135};
136
137
138Z7_NO_INLINE
139static void RandGen(Byte *buf, size_t size)
140{
141  CBaseRandomGenerator RG;
142  const size_t size4 = size & ~((size_t)3);
143  size_t i;
144  for (i = 0; i < size4; i += 4)
145  {
146    const UInt32 v = RG.GetRnd();
147    SetUi32(buf + i, v)
148  }
149  UInt32 v = RG.GetRnd();
150  for (; i < size; i++)
151  {
152    buf[i] = (Byte)v;
153    v >>= 8;
154  }
155}
156
157
158class CBenchRandomGenerator: public CMidAlignedBuffer
159{
160  static UInt32 GetVal(UInt32 &res, unsigned numBits)
161  {
162    UInt32 val = res & (((UInt32)1 << numBits) - 1);
163    res >>= numBits;
164    return val;
165  }
166
167  static UInt32 GetLen(UInt32 &r)
168  {
169    UInt32 len = GetVal(r, 2);
170    return GetVal(r, 1 + len);
171  }
172
173public:
174
175  void GenerateSimpleRandom(UInt32 salt)
176  {
177    CBaseRandomGenerator rg(salt);
178    const size_t bufSize = Size();
179    Byte *buf = (Byte *)*this;
180    for (size_t i = 0; i < bufSize; i++)
181      buf[i] = (Byte)rg.GetRnd();
182  }
183
184  void GenerateLz(unsigned dictBits, UInt32 salt)
185  {
186    CBaseRandomGenerator rg(salt);
187    size_t pos = 0;
188    size_t rep0 = 1;
189    const size_t bufSize = Size();
190    Byte *buf = (Byte *)*this;
191    unsigned posBits = 1;
192
193    // printf("\n dictBits = %d\n", (UInt32)dictBits);
194    // printf("\n bufSize = 0x%p\n", (const void *)bufSize);
195
196    while (pos < bufSize)
197    {
198      /*
199      if (pos >= ((UInt32)1 << 31))
200        printf(" %x\n", pos);
201      */
202      UInt32 r = rg.GetRnd();
203      if (GetVal(r, 1) == 0 || pos < 1024)
204        buf[pos++] = (Byte)(r & 0xFF);
205      else
206      {
207        UInt32 len;
208        len = 1 + GetLen(r);
209
210        if (GetVal(r, 3) != 0)
211        {
212          len += GetLen(r);
213
214          while (((size_t)1 << posBits) < pos)
215            posBits++;
216
217          unsigned numBitsMax = dictBits;
218          if (numBitsMax > posBits)
219            numBitsMax = posBits;
220
221          const unsigned kAddBits = 6;
222          unsigned numLogBits = 5;
223          if (numBitsMax <= (1 << 4) - 1 + kAddBits)
224            numLogBits = 4;
225
226          for (;;)
227          {
228            const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
229            r = rg.GetRnd();
230            if (ppp > numBitsMax)
231              continue;
232            // rep0 = GetVal(r, ppp);
233            rep0 = r & (((size_t)1 << ppp) - 1);
234            if (rep0 < pos)
235              break;
236            r = rg.GetRnd();
237          }
238          rep0++;
239        }
240
241        // len *= 300; // for debug
242        {
243          const size_t rem = bufSize - pos;
244          if (len > rem)
245            len = (UInt32)rem;
246        }
247        Byte *dest = buf + pos;
248        const Byte *src = dest - rep0;
249        pos += len;
250        for (UInt32 i = 0; i < len; i++)
251          *dest++ = *src++;
252      }
253    }
254    // printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
255  }
256};
257
258
259Z7_CLASS_IMP_NOQIB_1(
260  CBenchmarkInStream
261  , ISequentialInStream
262)
263  const Byte *Data;
264  size_t Pos;
265  size_t Size;
266public:
267  void Init(const Byte *data, size_t size)
268  {
269    Data = data;
270    Size = size;
271    Pos = 0;
272  }
273  bool WasFinished() const { return Pos == Size; }
274};
275
276Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize))
277{
278  const UInt32 kMaxBlockSize = (1 << 20);
279  if (size > kMaxBlockSize)
280    size = kMaxBlockSize;
281  const size_t remain = Size - Pos;
282  if (size > remain)
283    size = (UInt32)remain;
284
285  if (size != 0)
286    memcpy(data, Data + Pos, size);
287
288  Pos += size;
289  if (processedSize)
290    *processedSize = size;
291  return S_OK;
292}
293
294
295class CBenchmarkOutStream Z7_final:
296  public ISequentialOutStream,
297  public CMyUnknownImp,
298  public CMidAlignedBuffer
299{
300  Z7_COM_UNKNOWN_IMP_0
301  Z7_IFACE_COM7_IMP(ISequentialOutStream)
302  // bool _overflow;
303public:
304  size_t Pos;
305  bool RealCopy;
306  bool CalcCrc;
307  UInt32 Crc;
308
309  // CBenchmarkOutStream(): _overflow(false) {}
310  void Init(bool realCopy, bool calcCrc)
311  {
312    Crc = CRC_INIT_VAL;
313    RealCopy = realCopy;
314    CalcCrc = calcCrc;
315    // _overflow = false;
316    Pos = 0;
317  }
318
319  void InitCrc()
320  {
321    Crc = CRC_INIT_VAL;
322  }
323
324  void Calc(const void *data, size_t size)
325  {
326    Crc = CrcUpdate(Crc, data, size);
327  }
328
329  size_t GetPos() const { return Pos; }
330
331  // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
332};
333
334Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
335{
336  size_t curSize = Size() - Pos;
337  if (curSize > size)
338    curSize = size;
339  if (curSize != 0)
340  {
341    if (RealCopy)
342      memcpy(((Byte *)*this) + Pos, data, curSize);
343    if (CalcCrc)
344      Calc(data, curSize);
345    Pos += curSize;
346  }
347  if (processedSize)
348    *processedSize = (UInt32)curSize;
349  if (curSize != size)
350  {
351    // _overflow = true;
352    return E_FAIL;
353  }
354  return S_OK;
355}
356
357
358Z7_CLASS_IMP_NOQIB_1(
359  CCrcOutStream
360  , ISequentialOutStream
361)
362public:
363  bool CalcCrc;
364  UInt32 Crc;
365  UInt64 Pos;
366
367  CCrcOutStream(): CalcCrc(true) {}
368  void Init() { Crc = CRC_INIT_VAL; Pos = 0; }
369  void Calc(const void *data, size_t size)
370  {
371    Crc = CrcUpdate(Crc, data, size);
372  }
373};
374
375Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
376{
377  if (CalcCrc)
378    Calc(data, size);
379  Pos += size;
380  if (processedSize)
381    *processedSize = size;
382  return S_OK;
383}
384
385// #include "../../../../C/My_sys_time.h"
386
387static UInt64 GetTimeCount()
388{
389  #ifdef USE_POSIX_TIME
390  #ifdef USE_POSIX_TIME2
391  timeval v;
392  if (gettimeofday(&v, NULL) == 0)
393    return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec;
394  return (UInt64)time(NULL) * 1000000;
395  #else
396  return time(NULL);
397  #endif
398  #else
399  LARGE_INTEGER value;
400  if (::QueryPerformanceCounter(&value))
401    return (UInt64)value.QuadPart;
402  return GetTickCount();
403  #endif
404}
405
406static UInt64 GetFreq()
407{
408  #ifdef USE_POSIX_TIME
409  #ifdef USE_POSIX_TIME2
410  return 1000000;
411  #else
412  return 1;
413  #endif
414  #else
415  LARGE_INTEGER value;
416  if (::QueryPerformanceFrequency(&value))
417    return (UInt64)value.QuadPart;
418  return 1000;
419  #endif
420}
421
422
423#ifdef USE_POSIX_TIME
424
425struct CUserTime
426{
427  UInt64 Sum;
428  clock_t Prev;
429
430  void Init()
431  {
432    // Prev = clock();
433    Sum = 0;
434    Prev = 0;
435    Update();
436    Sum = 0;
437  }
438
439  void Update()
440  {
441    tms t;
442    /* clock_t res = */ times(&t);
443    clock_t newVal = t.tms_utime + t.tms_stime;
444    Sum += (UInt64)(newVal - Prev);
445    Prev = newVal;
446
447    /*
448    clock_t v = clock();
449    if (v != -1)
450    {
451      Sum += v - Prev;
452      Prev = v;
453    }
454    */
455  }
456  UInt64 GetUserTime()
457  {
458    Update();
459    return Sum;
460  }
461};
462
463#else
464
465
466struct CUserTime
467{
468  bool UseTick;
469  DWORD Prev_Tick;
470  UInt64 Prev;
471  UInt64 Sum;
472
473  void Init()
474  {
475    UseTick = false;
476    Prev_Tick = 0;
477    Prev = 0;
478    Sum = 0;
479    Update();
480    Sum = 0;
481  }
482  UInt64 GetUserTime()
483  {
484    Update();
485    return Sum;
486  }
487  void Update();
488};
489
490static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
491
492void CUserTime::Update()
493{
494  DWORD new_Tick = GetTickCount();
495  FILETIME creationTime, exitTime, kernelTime, userTime;
496  if (!UseTick &&
497      #ifdef UNDER_CE
498        ::GetThreadTimes(::GetCurrentThread()
499      #else
500        ::GetProcessTimes(::GetCurrentProcess()
501      #endif
502      , &creationTime, &exitTime, &kernelTime, &userTime))
503  {
504    UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime);
505    Sum += newVal - Prev;
506    Prev = newVal;
507  }
508  else
509  {
510    UseTick = true;
511    Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000;
512  }
513  Prev_Tick = new_Tick;
514}
515
516
517#endif
518
519static UInt64 GetUserFreq()
520{
521  #ifdef USE_POSIX_TIME
522  // return CLOCKS_PER_SEC;
523  return (UInt64)sysconf(_SC_CLK_TCK);
524  #else
525  return 10000000;
526  #endif
527}
528
529class CBenchProgressStatus Z7_final
530{
531  #ifndef Z7_ST
532  NSynchronization::CCriticalSection CS;
533  #endif
534public:
535  HRESULT Res;
536  bool EncodeMode;
537  void SetResult(HRESULT res)
538  {
539    #ifndef Z7_ST
540    NSynchronization::CCriticalSectionLock lock(CS);
541    #endif
542    Res = res;
543  }
544  HRESULT GetResult()
545  {
546    #ifndef Z7_ST
547    NSynchronization::CCriticalSectionLock lock(CS);
548    #endif
549    return Res;
550  }
551};
552
553struct CBenchInfoCalc
554{
555  CBenchInfo BenchInfo;
556  CUserTime UserTime;
557
558  void SetStartTime();
559  void SetFinishTime(CBenchInfo &dest);
560};
561
562void CBenchInfoCalc::SetStartTime()
563{
564  BenchInfo.GlobalFreq = GetFreq();
565  BenchInfo.UserFreq = GetUserFreq();
566  BenchInfo.GlobalTime = ::GetTimeCount();
567  BenchInfo.UserTime = 0;
568  UserTime.Init();
569}
570
571void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
572{
573  dest = BenchInfo;
574  dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
575  dest.UserTime = UserTime.GetUserTime();
576}
577
578class CBenchProgressInfo Z7_final:
579  public ICompressProgressInfo,
580  public CMyUnknownImp,
581  public CBenchInfoCalc
582{
583  Z7_COM_UNKNOWN_IMP_0
584  Z7_IFACE_COM7_IMP(ICompressProgressInfo)
585public:
586  CBenchProgressStatus *Status;
587  IBenchCallback *Callback;
588
589  CBenchProgressInfo(): Callback(NULL) {}
590};
591
592
593Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize))
594{
595  HRESULT res = Status->GetResult();
596  if (res != S_OK)
597    return res;
598  if (!Callback)
599    return res;
600
601  /*
602  static UInt64 inSizePrev = 0;
603  static UInt64 outSizePrev = 0;
604  UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
605  if (inSize)   { val1 = *inSize;  delta1 = val1 - inSizePrev;  inSizePrev  = val1; }
606  if (outSize)  { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2;  }
607  UInt64 percents = delta2 * 1000;
608  if (delta1 != 0)
609    percents /= delta1;
610  printf("=== %7d %7d     %7d %7d  ratio = %4d\n",
611      (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
612      (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
613      (unsigned)percents);
614  */
615
616  CBenchInfo info;
617  SetFinishTime(info);
618  if (Status->EncodeMode)
619  {
620    info.UnpackSize = BenchInfo.UnpackSize + *inSize;
621    info.PackSize = BenchInfo.PackSize + *outSize;
622    res = Callback->SetEncodeResult(info, false);
623  }
624  else
625  {
626    info.PackSize = BenchInfo.PackSize + *inSize;
627    info.UnpackSize = BenchInfo.UnpackSize + *outSize;
628    res = Callback->SetDecodeResult(info, false);
629  }
630  if (res != S_OK)
631    Status->SetResult(res);
632  return res;
633}
634
635static const unsigned kSubBits = 8;
636
637static unsigned GetLogSize(UInt64 size)
638{
639  unsigned i = 0;
640  for (;;)
641  {
642    i++;  size >>= 1;  if (size == 0) break;
643  }
644  return i;
645}
646
647
648static UInt32 GetLogSize_Sub(UInt64 size)
649{
650  if (size <= 1)
651    return 0;
652  const unsigned i = GetLogSize(size) - 1;
653  UInt32 v;
654  if (i <= kSubBits)
655    v = (UInt32)(size) << (kSubBits - i);
656  else
657    v = (UInt32)(size >> (i - kSubBits));
658  return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
659}
660
661
662static UInt64 Get_UInt64_from_double(double v)
663{
664  const UInt64 kMaxVal = (UInt64)1 << 62;
665  if (v > (double)(Int64)kMaxVal)
666    return kMaxVal;
667  return (UInt64)v;
668}
669
670static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d)
671{
672  if (d == 0)
673    d = 1;
674  const double v =
675      (double)(Int64)m1 *
676      (double)(Int64)m2 /
677      (double)(Int64)d;
678  return Get_UInt64_from_double(v);
679  /*
680  unsigned n1 = GetLogSize(m1);
681  unsigned n2 = GetLogSize(m2);
682  while (n1 + n2 > 64)
683  {
684    if (n1 >= n2)
685    {
686      m1 >>= 1;
687      n1--;
688    }
689    else
690    {
691      m2 >>= 1;
692      n2--;
693    }
694    d >>= 1;
695  }
696
697  if (d == 0)
698    d = 1;
699  return m1 * m2 / d;
700  */
701}
702
703
704UInt64 CBenchInfo::GetUsage() const
705{
706  UInt64 userTime = UserTime;
707  UInt64 userFreq = UserFreq;
708  UInt64 globalTime = GlobalTime;
709  UInt64 globalFreq = GlobalFreq;
710
711  if (userFreq == 0)
712    userFreq = 1;
713  if (globalTime == 0)
714    globalTime = 1;
715
716  const double v =
717        ((double)(Int64)userTime / (double)(Int64)userFreq)
718      * ((double)(Int64)globalFreq / (double)(Int64)globalTime)
719      * (double)(Int64)kBenchmarkUsageMult;
720  return Get_UInt64_from_double(v);
721  /*
722  return MyMultDiv64(
723        MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq),
724        globalFreq, globalTime);
725  */
726}
727
728
729UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
730{
731  if (UserTime == 0)
732  {
733    return 0;
734    // userTime = 1;
735  }
736  UInt64 globalFreq = GlobalFreq;
737  if (globalFreq == 0)
738    globalFreq = 1;
739
740  const double v =
741        ((double)(Int64)GlobalTime / (double)(Int64)globalFreq)
742      * ((double)(Int64)UserFreq  / (double)(Int64)UserTime)
743      * (double)(Int64)rating;
744  return Get_UInt64_from_double(v);
745  /*
746  return MyMultDiv64(
747        MyMultDiv64(rating, UserFreq, UserTime),
748        GlobalTime, globalFreq);
749  */
750}
751
752
753UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
754{
755  return MyMultDiv64(numUnits, GlobalFreq, GlobalTime);
756}
757
758static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity)
759{
760  return complexity >= 0 ?
761      size * (UInt32)complexity :
762      size / (UInt32)(-complexity);
763}
764
765struct CBenchProps
766{
767  bool LzmaRatingMode;
768
769  Int32 EncComplex;
770  Int32 DecComplexCompr;
771  Int32 DecComplexUnc;
772
773  unsigned KeySize;
774
775  CBenchProps():
776      LzmaRatingMode(false),
777      KeySize(0)
778    {}
779
780  void SetLzmaCompexity();
781
782  UInt64 GetNumCommands_Enc(UInt64 unpackSize) const
783  {
784    const UInt32 kMinSize = 100;
785    if (unpackSize < kMinSize)
786      unpackSize = kMinSize;
787    return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex);
788  }
789
790  UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const
791  {
792    return
793        GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) +
794        GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc);
795  }
796
797  UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const;
798  UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const;
799};
800
801void CBenchProps::SetLzmaCompexity()
802{
803  EncComplex = 1200;
804  DecComplexUnc = 4;
805  DecComplexCompr = 190;
806  LzmaRatingMode = true;
807}
808
809UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const
810{
811  if (dictSize < (1 << kBenchMinDicLogSize))
812    dictSize = (1 << kBenchMinDicLogSize);
813  Int32 encComplex = EncComplex;
814  if (LzmaRatingMode)
815  {
816    /*
817    for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
818    {
819      unsigned rr = GetLogSize_Sub(uu);
820      printf("\n%16I64x , log = %4x", uu, rr);
821      uu += 1;
822      uu += uu / 50;
823    }
824    */
825    // throw 1;
826    const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits);
827    encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
828  }
829  const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex);
830  return MyMultDiv64(numCommands, freq, elapsedTime);
831}
832
833UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const
834{
835  const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations;
836  return MyMultDiv64(numCommands, freq, elapsedTime);
837}
838
839
840
841UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
842{
843  CBenchProps props;
844  props.SetLzmaCompexity();
845  return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
846}
847
848UInt64 CBenchInfo::GetRating_LzmaDec() const
849{
850  CBenchProps props;
851  props.SetLzmaCompexity();
852  return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
853}
854
855
856#ifndef Z7_ST
857
858#define NUM_CPU_LEVELS_MAX 3
859
860struct CAffinityMode
861{
862  unsigned NumBundleThreads;
863  unsigned NumLevels;
864  unsigned NumCoreThreads;
865  unsigned NumCores;
866  // unsigned DivideNum;
867  UInt32 Sizes[NUM_CPU_LEVELS_MAX];
868
869  void SetLevels(unsigned numCores, unsigned numCoreThreads);
870  DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
871  bool NeedAffinity() const { return NumBundleThreads != 0; }
872
873  WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
874  {
875    if (NeedAffinity())
876    {
877      CCpuSet cpuSet;
878      GetAffinityMask(bundleIndex, &cpuSet);
879      return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet);
880    }
881    return thread.Create(startAddress, parameter);
882  }
883
884  CAffinityMode():
885    NumBundleThreads(0),
886    NumLevels(0),
887    NumCoreThreads(1)
888    // DivideNum(1)
889    {}
890};
891
892void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads)
893{
894  NumCores = numCores;
895  NumCoreThreads = numCoreThreads;
896  NumLevels = 0;
897  if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0)
898    return;
899  UInt32 c = numCores / numCoreThreads;
900  UInt32 c2 = 1;
901  while ((c & 1) == 0)
902  {
903    c >>= 1;
904    c2 <<= 1;
905  }
906  if (c2 != 1)
907    Sizes[NumLevels++] = c2;
908  if (c != 1)
909    Sizes[NumLevels++] = c;
910  if (numCoreThreads != 1)
911    Sizes[NumLevels++] = numCoreThreads;
912  if (NumLevels == 0)
913    Sizes[NumLevels++] = 1;
914
915  /*
916  printf("\n Cores:");
917  for (unsigned i = 0; i < NumLevels; i++)
918  {
919    printf(" %d", Sizes[i]);
920  }
921  printf("\n");
922  */
923}
924
925
926DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const
927{
928  CpuSet_Zero(cpuSet);
929
930  if (NumLevels == 0)
931    return 0;
932
933  // printf("\n%2d", bundleIndex);
934
935  /*
936  UInt32 low = 0;
937  if (DivideNum != 1)
938  {
939    low = bundleIndex % DivideNum;
940    bundleIndex /= DivideNum;
941  }
942  */
943
944  UInt32 numGroups = NumCores / NumBundleThreads;
945  UInt32 m = bundleIndex % numGroups;
946  UInt32 v = 0;
947  for (unsigned i = 0; i < NumLevels; i++)
948  {
949    UInt32 size = Sizes[i];
950    while ((size & 1) == 0)
951    {
952      v *= 2;
953      v |= (m & 1);
954      m >>= 1;
955      size >>= 1;
956    }
957    v *= size;
958    v += m % size;
959    m /= size;
960  }
961
962  // UInt32 nb = NumBundleThreads / DivideNum;
963  UInt32 nb = NumBundleThreads;
964
965  DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1;
966  // v += low;
967  mask <<= v;
968
969  // printf(" %2d %8x \n ", v, (unsigned)mask);
970  #ifdef _WIN32
971    *cpuSet = mask;
972  #else
973  {
974    for (unsigned k = 0; k < nb; k++)
975      CpuSet_Set(cpuSet, v + k);
976  }
977  #endif
978
979  return mask;
980}
981
982
983struct CBenchSyncCommon
984{
985  bool ExitMode;
986  NSynchronization::CManualResetEvent StartEvent;
987
988  CBenchSyncCommon(): ExitMode(false) {}
989};
990
991#endif
992
993
994
995enum E_CheckCrcMode
996{
997  k_CheckCrcMode_Never = 0,
998  k_CheckCrcMode_Always = 1,
999  k_CheckCrcMode_FirstPass = 2
1000};
1001
1002class CEncoderInfo;
1003
1004class CEncoderInfo Z7_final
1005{
1006  Z7_CLASS_NO_COPY(CEncoderInfo)
1007
1008public:
1009
1010  #ifndef Z7_ST
1011  NWindows::CThread thread[2];
1012  NSynchronization::CManualResetEvent ReadyEvent;
1013  UInt32 NumDecoderSubThreads;
1014  CBenchSyncCommon *Common;
1015  UInt32 EncoderIndex;
1016  UInt32 NumEncoderInternalThreads;
1017  CAffinityMode AffinityMode;
1018  bool IsGlobalMtMode; // if more than one benchmark encoder threads
1019  #endif
1020
1021  CMyComPtr<ICompressCoder> _encoder;
1022  CMyComPtr<ICompressFilter> _encoderFilter;
1023  CBenchProgressInfo *progressInfoSpec[2];
1024  CMyComPtr<ICompressProgressInfo> progressInfo[2];
1025  UInt64 NumIterations;
1026
1027  UInt32 Salt;
1028
1029  #ifdef USE_ALLOCA
1030  size_t AllocaSize;
1031  #endif
1032
1033  unsigned KeySize;
1034  Byte _key[32];
1035  Byte _iv[16];
1036
1037  HRESULT Set_Key_and_IV(ICryptoProperties *cp)
1038  {
1039    RINOK(cp->SetKey(_key, KeySize))
1040    return cp->SetInitVector(_iv, sizeof(_iv));
1041  }
1042
1043  Byte _psw[16];
1044
1045  bool CheckCrc_Enc;    /* = 1, if we want to check packed data crcs after each pass
1046                                used for filter and usual coders */
1047  bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass
1048                                used only for filter */
1049  E_CheckCrcMode CheckCrcMode_Dec;
1050
1051  struct CDecoderInfo
1052  {
1053    CEncoderInfo *Encoder;
1054    UInt32 DecoderIndex;
1055    bool CallbackMode;
1056
1057    #ifdef USE_ALLOCA
1058    size_t AllocaSize;
1059    #endif
1060  };
1061  CDecoderInfo decodersInfo[2];
1062
1063  CMyComPtr<ICompressCoder> _decoders[2];
1064  CMyComPtr<ICompressFilter> _decoderFilter;
1065
1066  HRESULT Results[2];
1067  CBenchmarkOutStream *outStreamSpec;
1068  CMyComPtr<ISequentialOutStream> outStream;
1069  IBenchCallback *callback;
1070  IBenchPrintCallback *printCallback;
1071  UInt32 crc;
1072  size_t kBufferSize;
1073  size_t compressedSize;
1074  const Byte *uncompressedDataPtr;
1075
1076  const Byte *fileData;
1077  CBenchRandomGenerator rg;
1078
1079  CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
1080
1081  // CBenchmarkOutStream *propStreamSpec;
1082  Byte propsData[kMaxMethodPropSize];
1083  CBufPtrSeqOutStream *propStreamSpec;
1084  CMyComPtr<ISequentialOutStream> propStream;
1085
1086  unsigned generateDictBits;
1087  COneMethodInfo _method;
1088
1089  // for decode
1090  size_t _uncompressedDataSize;
1091
1092  HRESULT Generate();
1093  HRESULT Encode();
1094  HRESULT Decode(UInt32 decoderIndex);
1095
1096  CEncoderInfo():
1097    #ifndef Z7_ST
1098    Common(NULL),
1099    IsGlobalMtMode(true),
1100    #endif
1101    Salt(0),
1102    KeySize(0),
1103    CheckCrc_Enc(true),
1104    UseRealData_Enc(true),
1105    CheckCrcMode_Dec(k_CheckCrcMode_Always),
1106    outStreamSpec(NULL),
1107    callback(NULL),
1108    printCallback(NULL),
1109    fileData(NULL),
1110    propStreamSpec(NULL)
1111    {}
1112
1113  #ifndef Z7_ST
1114
1115  static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
1116  {
1117    HRESULT res;
1118    CEncoderInfo *encoder = (CEncoderInfo *)param;
1119    try
1120    {
1121      #ifdef USE_ALLOCA
1122      alloca(encoder->AllocaSize);
1123      #endif
1124
1125      res = encoder->Encode();
1126    }
1127    catch(...)
1128    {
1129      res = E_FAIL;
1130    }
1131    encoder->Results[0] = res;
1132    if (res != S_OK)
1133      encoder->progressInfoSpec[0]->Status->SetResult(res);
1134    encoder->ReadyEvent.Set();
1135    return THREAD_FUNC_RET_ZERO;
1136  }
1137
1138  static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
1139  {
1140    CDecoderInfo *decoder = (CDecoderInfo *)param;
1141
1142    #ifdef USE_ALLOCA
1143    alloca(decoder->AllocaSize);
1144    #endif
1145
1146    CEncoderInfo *encoder = decoder->Encoder;
1147    encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
1148    return THREAD_FUNC_RET_ZERO;
1149  }
1150
1151  HRESULT CreateEncoderThread()
1152  {
1153    WRes res = 0;
1154    if (!ReadyEvent.IsCreated())
1155      res = ReadyEvent.Create();
1156    if (res == 0)
1157      res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this,
1158          EncoderIndex);
1159    return HRESULT_FROM_WIN32(res);
1160  }
1161
1162  HRESULT CreateDecoderThread(unsigned index, bool callbackMode
1163      #ifdef USE_ALLOCA
1164      , size_t allocaSize
1165      #endif
1166      )
1167  {
1168    CDecoderInfo &decoder = decodersInfo[index];
1169    decoder.DecoderIndex = index;
1170    decoder.Encoder = this;
1171
1172    #ifdef USE_ALLOCA
1173    decoder.AllocaSize = allocaSize;
1174    #endif
1175
1176    decoder.CallbackMode = callbackMode;
1177
1178    WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder,
1179        // EncoderIndex * NumEncoderInternalThreads + index
1180        EncoderIndex
1181        );
1182
1183    return HRESULT_FROM_WIN32(res);
1184  }
1185
1186  #endif
1187};
1188
1189
1190
1191
1192static size_t GetBenchCompressedSize(size_t bufferSize)
1193{
1194  return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
1195  // kBufferSize / 2;
1196}
1197
1198
1199HRESULT CEncoderInfo::Generate()
1200{
1201  const COneMethodInfo &method = _method;
1202
1203  // we need extra space, if input data is already compressed
1204  const size_t kCompressedBufferSize = _encoderFilter ?
1205      kBufferSize :
1206      GetBenchCompressedSize(kBufferSize);
1207
1208  if (kCompressedBufferSize < kBufferSize)
1209    return E_FAIL;
1210
1211  uncompressedDataPtr = fileData;
1212  if (fileData)
1213  {
1214    #if !defined(Z7_ST)
1215    if (IsGlobalMtMode)
1216    {
1217      /* we copy the data to local buffer of thread to eliminate
1218         using of shared buffer by different threads */
1219      ALLOC_WITH_HRESULT(&rg, kBufferSize)
1220      memcpy((Byte *)rg, fileData, kBufferSize);
1221      uncompressedDataPtr = (const Byte *)rg;
1222    }
1223    #endif
1224  }
1225  else
1226  {
1227    ALLOC_WITH_HRESULT(&rg, kBufferSize)
1228    // DWORD ttt = GetTickCount();
1229    if (generateDictBits == 0)
1230      rg.GenerateSimpleRandom(Salt);
1231    else
1232    {
1233      if (generateDictBits >= sizeof(size_t) * 8
1234          && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
1235        return E_INVALIDARG;
1236      rg.GenerateLz(generateDictBits, Salt);
1237      // return E_ABORT; // for debug
1238    }
1239    // printf("\n%d\n            ", GetTickCount() - ttt);
1240
1241    crc = CrcCalc((const Byte *)rg, rg.Size());
1242    uncompressedDataPtr = (const Byte *)rg;
1243  }
1244
1245  if (!outStream)
1246  {
1247    outStreamSpec = new CBenchmarkOutStream;
1248    outStream = outStreamSpec;
1249  }
1250
1251  ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
1252
1253  if (_encoderFilter)
1254  {
1255    /* we try to reduce the number of memcpy() in main encoding loop.
1256       so we copy data to temp buffers here */
1257    ALLOC_WITH_HRESULT(&rgCopy, kBufferSize)
1258    memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize);
1259    memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
1260  }
1261
1262  if (!propStream)
1263  {
1264    propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
1265    propStream = propStreamSpec;
1266  }
1267  // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
1268  // propStreamSpec->Init(true, false);
1269  propStreamSpec->Init(propsData, sizeof(propsData));
1270
1271
1272  CMyComPtr<IUnknown> coder;
1273  if (_encoderFilter)
1274    coder = _encoderFilter;
1275  else
1276    coder = _encoder;
1277  {
1278    CMyComPtr<ICompressSetCoderProperties> scp;
1279    coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1280    if (scp)
1281    {
1282      const UInt64 reduceSize = kBufferSize;
1283
1284      /* in posix new thread uses same affinity as parent thread,
1285         so we don't need to send affinity to coder in posix */
1286      UInt64 affMask;
1287      #if !defined(Z7_ST) && defined(_WIN32)
1288      {
1289        CCpuSet cpuSet;
1290        affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
1291      }
1292      #else
1293        affMask = 0;
1294      #endif
1295      // affMask <<= 3; // debug line: to test no affinity in coder;
1296      // affMask = 0;
1297
1298      RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
1299    }
1300    else
1301    {
1302      if (method.AreThereNonOptionalProps())
1303        return E_INVALIDARG;
1304    }
1305
1306    CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
1307    coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
1308    if (writeCoderProps)
1309    {
1310      RINOK(writeCoderProps->WriteCoderProperties(propStream))
1311    }
1312
1313    {
1314      CMyComPtr<ICryptoSetPassword> sp;
1315      coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1316      if (sp)
1317      {
1318        RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1319
1320        // we must call encoding one time to calculate password key for key cache.
1321        // it must be after WriteCoderProperties!
1322        Byte temp[16];
1323        memset(temp, 0, sizeof(temp));
1324
1325        if (_encoderFilter)
1326        {
1327          _encoderFilter->Init();
1328          _encoderFilter->Filter(temp, sizeof(temp));
1329        }
1330        else
1331        {
1332          CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1333          CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1334          inStreamSpec->Init(temp, sizeof(temp));
1335
1336          CCrcOutStream *crcStreamSpec = new CCrcOutStream;
1337          CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
1338          crcStreamSpec->Init();
1339
1340          RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL))
1341        }
1342      }
1343    }
1344  }
1345
1346  return S_OK;
1347}
1348
1349
1350static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc)
1351{
1352  while (size != 0)
1353  {
1354    UInt32 cur = crc ? 1 << 17 : 1 << 24;
1355    if (cur > size)
1356      cur = (UInt32)size;
1357    UInt32 processed = filter->Filter(data, cur);
1358    /* if (processed > size) (in AES filter), we must fill last block with zeros.
1359       but it is not important for benchmark. So we just copy that data without filtering.
1360       if (processed == 0) then filter can't process more  */
1361    if (processed > size || processed == 0)
1362      processed = (UInt32)size;
1363    if (crc)
1364      *crc = CrcUpdate(*crc, data, processed);
1365    data += processed;
1366    size -= processed;
1367  }
1368}
1369
1370
1371HRESULT CEncoderInfo::Encode()
1372{
1373  // printf("\nCEncoderInfo::Generate\n");
1374
1375  RINOK(Generate())
1376
1377  // printf("\n2222\n");
1378
1379  #ifndef Z7_ST
1380  if (Common)
1381  {
1382    Results[0] = S_OK;
1383    WRes wres = ReadyEvent.Set();
1384    if (wres == 0)
1385      wres = Common->StartEvent.Lock();
1386    if (wres != 0)
1387      return HRESULT_FROM_WIN32(wres);
1388    if (Common->ExitMode)
1389      return S_OK;
1390  }
1391  else
1392  #endif
1393  {
1394    CBenchProgressInfo *bpi = progressInfoSpec[0];
1395    bpi->SetStartTime();
1396  }
1397
1398
1399  CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
1400  bi.UnpackSize = 0;
1401  bi.PackSize = 0;
1402  CMyComPtr<ICryptoProperties> cp;
1403  CMyComPtr<IUnknown> coder;
1404  if (_encoderFilter)
1405    coder = _encoderFilter;
1406  else
1407    coder = _encoder;
1408  coder.QueryInterface(IID_ICryptoProperties, &cp);
1409  CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1410  CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1411
1412  if (cp)
1413  {
1414    RINOK(Set_Key_and_IV(cp))
1415  }
1416
1417  compressedSize = 0;
1418  if (_encoderFilter)
1419    compressedSize = kBufferSize;
1420
1421  // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec;
1422  UInt64 prev = 0;
1423
1424  const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF);
1425  const bool useCrc = (mask < NumIterations);
1426  bool crcPrev_defined = false;
1427  UInt32 crcPrev = 0;
1428
1429  bool useRealData_Enc = UseRealData_Enc;
1430  bool data_Was_Changed = false;
1431  if (useRealData_Enc)
1432  {
1433    /* we want memcpy() for each iteration including first iteration.
1434       So results will be equal for different number of iterations */
1435    data_Was_Changed = true;
1436  }
1437
1438  const UInt64 numIterations = NumIterations;
1439  UInt64 i = numIterations;
1440    // printCallback->NewLine();
1441
1442  while (i != 0)
1443  {
1444    i--;
1445    if (printCallback && bi.UnpackSize - prev >= (1 << 26))
1446    {
1447      prev = bi.UnpackSize;
1448      RINOK(printCallback->CheckBreak())
1449    }
1450
1451    /*
1452    CBenchInfo info;
1453    progressInfoSpec[0]->SetStartTime();
1454    */
1455
1456    bool calcCrc = false;
1457    if (useCrc)
1458      calcCrc = (((UInt32)i & mask) == 0);
1459
1460    if (_encoderFilter)
1461    {
1462      Byte *filterData = rgCopy;
1463      if (i == numIterations - 1 || calcCrc || useRealData_Enc)
1464      {
1465        filterData = (Byte *)*outStreamSpec;
1466        if (data_Was_Changed)
1467          memcpy(filterData, uncompressedDataPtr, kBufferSize);
1468        data_Was_Changed = true;
1469      }
1470      _encoderFilter->Init();
1471      if (calcCrc)
1472        outStreamSpec->InitCrc();
1473      My_FilterBench(_encoderFilter, filterData, kBufferSize,
1474          calcCrc ? &outStreamSpec->Crc : NULL);
1475    }
1476    else
1477    {
1478      outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations
1479      inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1480      RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]))
1481      if (!inStreamSpec->WasFinished())
1482        return E_FAIL;
1483      if (compressedSize != outStreamSpec->Pos)
1484      {
1485        if (compressedSize != 0)
1486          return E_FAIL;
1487        compressedSize = outStreamSpec->Pos;
1488      }
1489    }
1490
1491    // outStreamSpec->Print();
1492
1493    if (calcCrc)
1494    {
1495      const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc);
1496      if (crcPrev_defined && crcPrev != crc2)
1497        return E_FAIL;
1498      crcPrev = crc2;
1499      crcPrev_defined = true;
1500    }
1501
1502    bi.UnpackSize += kBufferSize;
1503    bi.PackSize += compressedSize;
1504
1505    /*
1506    {
1507      progressInfoSpec[0]->SetFinishTime(info);
1508      info.UnpackSize = 0;
1509      info.PackSize = 0;
1510      info.NumIterations = 1;
1511
1512      info.UnpackSize = kBufferSize;
1513      info.PackSize = compressedSize;
1514      // printf("\n%7d\n", encoder.compressedSize);
1515
1516      RINOK(callback->SetEncodeResult(info, true))
1517      printCallback->NewLine();
1518    }
1519    */
1520
1521  }
1522
1523  _encoder.Release();
1524  _encoderFilter.Release();
1525  return S_OK;
1526}
1527
1528
1529HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1530{
1531  CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1532  CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1533  CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1534  CMyComPtr<IUnknown> coder;
1535  if (_decoderFilter)
1536  {
1537    if (decoderIndex != 0)
1538      return E_FAIL;
1539    coder = _decoderFilter;
1540  }
1541  else
1542    coder = decoder;
1543
1544  CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1545  coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1546  if (!setDecProps && propStreamSpec->GetPos() != 0)
1547    return E_FAIL;
1548
1549  CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1550  CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1551
1552  CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1553  pi->BenchInfo.UnpackSize = 0;
1554  pi->BenchInfo.PackSize = 0;
1555
1556  #ifndef Z7_ST
1557  {
1558    CMyComPtr<ICompressSetCoderMt> setCoderMt;
1559    coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1560    if (setCoderMt)
1561    {
1562      RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads))
1563    }
1564  }
1565  #endif
1566
1567  CMyComPtr<ICompressSetCoderProperties> scp;
1568  coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1569  if (scp)
1570  {
1571    const UInt64 reduceSize = _uncompressedDataSize;
1572    RINOK(_method.SetCoderProps(scp, &reduceSize))
1573  }
1574
1575  CMyComPtr<ICryptoProperties> cp;
1576  coder.QueryInterface(IID_ICryptoProperties, &cp);
1577
1578  if (setDecProps)
1579  {
1580    RINOK(setDecProps->SetDecoderProperties2(
1581        /* (const Byte *)*propStreamSpec, */
1582        propsData,
1583        (UInt32)propStreamSpec->GetPos()))
1584  }
1585
1586  {
1587    CMyComPtr<ICryptoSetPassword> sp;
1588    coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1589    if (sp)
1590    {
1591      RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1592    }
1593  }
1594
1595  UInt64 prev = 0;
1596
1597  if (cp)
1598  {
1599    RINOK(Set_Key_and_IV(cp))
1600  }
1601
1602  CMyComPtr<ICompressSetFinishMode> setFinishMode;
1603
1604  if (_decoderFilter)
1605  {
1606    if (compressedSize > rgCopy.Size())
1607      return E_FAIL;
1608  }
1609  else
1610  {
1611    decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode);
1612  }
1613
1614  const UInt64 numIterations = NumIterations;
1615  const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec;
1616
1617  for (UInt64 i = 0; i < numIterations; i++)
1618  {
1619    if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26))
1620    {
1621      RINOK(printCallback->CheckBreak())
1622      prev = pi->BenchInfo.UnpackSize;
1623    }
1624
1625    const UInt64 outSize = kBufferSize;
1626    bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never);
1627
1628    crcOutStreamSpec->Init();
1629
1630    if (_decoderFilter)
1631    {
1632      Byte *filterData = (Byte *)*outStreamSpec;
1633      if (calcCrc)
1634      {
1635        calcCrc = (i == 0);
1636        if (checkCrcMode == k_CheckCrcMode_Always)
1637        {
1638          calcCrc = true;
1639          memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1640          filterData = rgCopy;
1641        }
1642      }
1643      _decoderFilter->Init();
1644      My_FilterBench(_decoderFilter, filterData, compressedSize,
1645          calcCrc ? &crcOutStreamSpec->Crc : NULL);
1646    }
1647    else
1648    {
1649      crcOutStreamSpec->CalcCrc = calcCrc;
1650      inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1651
1652      if (setFinishMode)
1653      {
1654        RINOK(setFinishMode->SetFinishMode(BoolToUInt(true)))
1655      }
1656
1657      RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex]))
1658
1659      if (setFinishMode)
1660      {
1661        if (!inStreamSpec->WasFinished())
1662          return S_FALSE;
1663
1664        CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize;
1665        decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize);
1666
1667        if (getInStreamProcessedSize)
1668        {
1669          UInt64 processed;
1670          RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed))
1671          if (processed != compressedSize)
1672            return S_FALSE;
1673        }
1674      }
1675
1676      if (crcOutStreamSpec->Pos != outSize)
1677        return S_FALSE;
1678    }
1679
1680    if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1681      return S_FALSE;
1682
1683    pi->BenchInfo.UnpackSize += kBufferSize;
1684    pi->BenchInfo.PackSize += compressedSize;
1685  }
1686
1687  decoder.Release();
1688  _decoderFilter.Release();
1689  return S_OK;
1690}
1691
1692
1693static const UInt32 kNumThreadsMax = (1 << 12);
1694
1695struct CBenchEncoders
1696{
1697  CEncoderInfo *encoders;
1698  CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
1699  ~CBenchEncoders() { delete []encoders; }
1700};
1701
1702
1703static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1704{
1705  if (numCommands < (1 << 4))
1706    numCommands = (1 << 4);
1707  UInt64 res = complexInCommands / numCommands;
1708  return (res == 0 ? 1 : res);
1709}
1710
1711
1712
1713#ifndef Z7_ST
1714
1715// ---------- CBenchThreadsFlusher ----------
1716
1717struct CBenchThreadsFlusher
1718{
1719  CBenchEncoders *EncodersSpec;
1720  CBenchSyncCommon Common;
1721  unsigned NumThreads;
1722  bool NeedClose;
1723
1724  CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1725
1726  ~CBenchThreadsFlusher()
1727  {
1728    StartAndWait(true);
1729  }
1730
1731  WRes StartAndWait(bool exitMode = false);
1732};
1733
1734
1735WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1736{
1737  if (!NeedClose)
1738    return 0;
1739
1740  Common.ExitMode = exitMode;
1741  WRes res = Common.StartEvent.Set();
1742
1743  for (unsigned i = 0; i < NumThreads; i++)
1744  {
1745    NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1746    if (t.IsCreated())
1747    {
1748      WRes res2 = t.Wait_Close();
1749      if (res == 0)
1750        res = res2;
1751    }
1752  }
1753  NeedClose = false;
1754  return res;
1755}
1756
1757#endif // Z7_ST
1758
1759
1760
1761static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue)
1762{
1763  for (size_t i = 0; i < size; i++)
1764  {
1765    data[i] = (Byte)startValue;
1766    startValue++;
1767  }
1768}
1769
1770
1771
1772static HRESULT MethodBench(
1773    DECL_EXTERNAL_CODECS_LOC_VARS
1774    UInt64 complexInCommands,
1775    #ifndef Z7_ST
1776      bool oldLzmaBenchMode,
1777      UInt32 numThreads,
1778      const CAffinityMode *affinityMode,
1779    #endif
1780    const COneMethodInfo &method2,
1781    size_t uncompressedDataSize,
1782    const Byte *fileData,
1783    unsigned generateDictBits,
1784
1785    IBenchPrintCallback *printCallback,
1786    IBenchCallback *callback,
1787    CBenchProps *benchProps)
1788{
1789  COneMethodInfo method = method2;
1790  UInt64 methodId;
1791  UInt32 numStreams;
1792  bool isFilter;
1793  const int codecIndex = FindMethod_Index(
1794      EXTERNAL_CODECS_LOC_VARS
1795      method.MethodName, true,
1796      methodId, numStreams, isFilter);
1797  if (codecIndex < 0)
1798    return E_NOTIMPL;
1799  if (numStreams != 1)
1800    return E_INVALIDARG;
1801
1802  UInt32 numEncoderThreads = 1;
1803  UInt32 numSubDecoderThreads = 1;
1804
1805  #ifndef Z7_ST
1806    numEncoderThreads = numThreads;
1807
1808    if (oldLzmaBenchMode)
1809    if (methodId == k_LZMA)
1810    {
1811      if (numThreads == 1 && method.Get_NumThreads() < 0)
1812        method.AddProp_NumThreads(1);
1813      const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1814      if (numThreads > 1 && numLzmaThreads > 1)
1815      {
1816        numEncoderThreads = (numThreads + 1) / 2; // 20.03
1817        numSubDecoderThreads = 2;
1818      }
1819    }
1820
1821  const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity();
1822
1823  #endif
1824
1825  CBenchEncoders encodersSpec(numEncoderThreads);
1826  CEncoderInfo *encoders = encodersSpec.encoders;
1827
1828  UInt32 i;
1829
1830  for (i = 0; i < numEncoderThreads; i++)
1831  {
1832    CEncoderInfo &encoder = encoders[i];
1833    encoder.callback = (i == 0) ? callback : NULL;
1834    encoder.printCallback = printCallback;
1835
1836    #ifndef Z7_ST
1837    encoder.EncoderIndex = i;
1838    encoder.NumEncoderInternalThreads = numSubDecoderThreads;
1839    encoder.AffinityMode = *affinityMode;
1840
1841    /*
1842    if (numSubDecoderThreads > 1)
1843    if (encoder.AffinityMode.NeedAffinity()
1844        && encoder.AffinityMode.NumBundleThreads == 1)
1845    {
1846      // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one
1847      if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores)
1848        encoder.AffinityMode.NumBundleThreads *= 2;
1849    }
1850    */
1851
1852    #endif
1853
1854    {
1855      CCreatedCoder cod;
1856      RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod))
1857      encoder._encoder = cod.Coder;
1858      if (!encoder._encoder && !encoder._encoderFilter)
1859        return E_NOTIMPL;
1860    }
1861
1862    SetPseudoRand(encoder._iv,  sizeof(encoder._iv), 17);
1863    SetPseudoRand(encoder._key, sizeof(encoder._key), 51);
1864    SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123);
1865
1866    for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1867    {
1868      CCreatedCoder cod;
1869      CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1870      RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod))
1871      decoder = cod.Coder;
1872      if (!encoder._decoderFilter && !decoder)
1873        return E_NOTIMPL;
1874    }
1875
1876    encoder.UseRealData_Enc =
1877    encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30;
1878
1879    encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1880    if (benchProps->DecComplexCompr +
1881        benchProps->DecComplexUnc <= 30)
1882      encoder.CheckCrcMode_Dec =
1883          k_CheckCrcMode_FirstPass; // for filters
1884          // k_CheckCrcMode_Never; // for debug
1885          // k_CheckCrcMode_Always; // for debug
1886    if (fileData)
1887    {
1888      encoder.UseRealData_Enc = true;
1889      encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1890    }
1891  }
1892
1893  UInt32 crc = 0;
1894  if (fileData)
1895    crc = CrcCalc(fileData, uncompressedDataSize);
1896
1897  for (i = 0; i < numEncoderThreads; i++)
1898  {
1899    CEncoderInfo &encoder = encoders[i];
1900    encoder._method = method;
1901    encoder.generateDictBits = generateDictBits;
1902    encoder._uncompressedDataSize = uncompressedDataSize;
1903    encoder.kBufferSize = uncompressedDataSize;
1904    encoder.fileData = fileData;
1905    encoder.crc = crc;
1906  }
1907
1908  CBenchProgressStatus status;
1909  status.Res = S_OK;
1910  status.EncodeMode = true;
1911
1912  #ifndef Z7_ST
1913  CBenchThreadsFlusher encoderFlusher;
1914  if (mtEncMode)
1915  {
1916    WRes wres = encoderFlusher.Common.StartEvent.Create();
1917    if (wres != 0)
1918      return HRESULT_FROM_WIN32(wres);
1919    encoderFlusher.NumThreads = numEncoderThreads;
1920    encoderFlusher.EncodersSpec = &encodersSpec;
1921    encoderFlusher.NeedClose = true;
1922  }
1923  #endif
1924
1925  for (i = 0; i < numEncoderThreads; i++)
1926  {
1927    CEncoderInfo &encoder = encoders[i];
1928    encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands);
1929    // encoder.NumIterations = 3;
1930    encoder.Salt = g_CrcTable[i & 0xFF];
1931    encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3);
1932    // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1933    // printf(" %8x", encoder.Salt);
1934
1935    encoder.KeySize = benchProps->KeySize;
1936
1937    for (int j = 0; j < 2; j++)
1938    {
1939      CBenchProgressInfo *spec = new CBenchProgressInfo;
1940      encoder.progressInfoSpec[j] = spec;
1941      encoder.progressInfo[j] = spec;
1942      spec->Status = &status;
1943    }
1944
1945    if (i == 0)
1946    {
1947      CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1948      bpi->Callback = callback;
1949      bpi->BenchInfo.NumIterations = numEncoderThreads;
1950    }
1951
1952    #ifndef Z7_ST
1953    if (mtEncMode)
1954    {
1955      #ifdef USE_ALLOCA
1956      encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1957      #endif
1958
1959      encoder.Common = &encoderFlusher.Common;
1960      encoder.IsGlobalMtMode = numEncoderThreads > 1;
1961      RINOK(encoder.CreateEncoderThread())
1962    }
1963    #endif
1964  }
1965
1966  if (printCallback)
1967  {
1968    RINOK(printCallback->CheckBreak())
1969  }
1970
1971  #ifndef Z7_ST
1972  if (mtEncMode)
1973  {
1974    for (i = 0; i < numEncoderThreads; i++)
1975    {
1976      CEncoderInfo &encoder = encoders[i];
1977      const WRes wres = encoder.ReadyEvent.Lock();
1978      if (wres != 0)
1979        return HRESULT_FROM_WIN32(wres);
1980      RINOK(encoder.Results[0])
1981    }
1982
1983    CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
1984    bpi->SetStartTime();
1985
1986    const WRes wres = encoderFlusher.StartAndWait();
1987    if (status.Res == 0 && wres != 0)
1988      return HRESULT_FROM_WIN32(wres);
1989  }
1990  else
1991  #endif
1992  {
1993    RINOK(encoders[0].Encode())
1994  }
1995
1996  RINOK(status.Res)
1997
1998  CBenchInfo info;
1999
2000  encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2001  info.UnpackSize = 0;
2002  info.PackSize = 0;
2003  info.NumIterations = encoders[0].NumIterations;
2004
2005  for (i = 0; i < numEncoderThreads; i++)
2006  {
2007    const CEncoderInfo &encoder = encoders[i];
2008    info.UnpackSize += encoder.kBufferSize;
2009    info.PackSize += encoder.compressedSize;
2010    // printf("\n%7d\n", encoder.compressedSize);
2011  }
2012
2013  RINOK(callback->SetEncodeResult(info, true))
2014
2015
2016
2017
2018  // ---------- Decode ----------
2019
2020  status.Res = S_OK;
2021  status.EncodeMode = false;
2022
2023  const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
2024  #ifndef Z7_ST
2025  const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity();
2026  #endif
2027
2028  for (i = 0; i < numEncoderThreads; i++)
2029  {
2030    CEncoderInfo &encoder = encoders[i];
2031
2032    /*
2033    #ifndef Z7_ST
2034    // encoder.affinityMode = *affinityMode;
2035    if (encoder.NumEncoderInternalThreads != 1)
2036      encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads;
2037    #endif
2038    */
2039
2040
2041    if (i == 0)
2042    {
2043      encoder.NumIterations = GetNumIterations(
2044          benchProps->GetNumCommands_Dec(
2045              encoder.compressedSize,
2046              encoder.kBufferSize),
2047          complexInCommands);
2048      CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
2049      bpi->Callback = callback;
2050      bpi->BenchInfo.NumIterations = numDecoderThreads;
2051      bpi->SetStartTime();
2052    }
2053    else
2054      encoder.NumIterations = encoders[0].NumIterations;
2055
2056    #ifndef Z7_ST
2057    {
2058      int numSubThreads = method.Get_NumThreads();
2059      encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads;
2060    }
2061    if (mtDecoderMode)
2062    {
2063      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2064      {
2065        const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
2066            #ifdef USE_ALLOCA
2067            , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
2068            #endif
2069            );
2070        RINOK(res)
2071      }
2072    }
2073    else
2074    #endif
2075    {
2076      RINOK(encoder.Decode(0))
2077    }
2078  }
2079
2080  #ifndef Z7_ST
2081  if (mtDecoderMode)
2082  {
2083    WRes wres = 0;
2084    HRESULT res = S_OK;
2085    for (i = 0; i < numEncoderThreads; i++)
2086      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2087      {
2088        CEncoderInfo &encoder = encoders[i];
2089        const WRes wres2 = encoder.thread[j].
2090            // Wait(); // later we can get thread times from thread in UNDER_CE
2091            Wait_Close();
2092        if (wres == 0 && wres2 != 0)
2093          wres = wres2;
2094        const HRESULT res2 = encoder.Results[j];
2095        if (res == 0 && res2 != 0)
2096          res = res2;
2097      }
2098    if (wres != 0)
2099      return HRESULT_FROM_WIN32(wres);
2100    RINOK(res)
2101  }
2102  #endif // Z7_ST
2103
2104  RINOK(status.Res)
2105  encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2106
2107  /*
2108  #ifndef Z7_ST
2109  #ifdef UNDER_CE
2110  if (mtDecoderMode)
2111    for (i = 0; i < numEncoderThreads; i++)
2112      for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2113      {
2114        FILETIME creationTime, exitTime, kernelTime, userTime;
2115        if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
2116          info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
2117      }
2118  #endif
2119  #endif
2120  */
2121
2122  info.UnpackSize = 0;
2123  info.PackSize = 0;
2124  info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
2125
2126  for (i = 0; i < numEncoderThreads; i++)
2127  {
2128    const CEncoderInfo &encoder = encoders[i];
2129    info.UnpackSize += encoder.kBufferSize;
2130    info.PackSize += encoder.compressedSize;
2131  }
2132
2133  // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ??
2134  RINOK(callback->SetDecodeResult(info, true))
2135
2136  return S_OK;
2137}
2138
2139
2140
2141static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
2142{
2143  /*
2144  if (dictSizeLog < 32)
2145    return (UInt32)1 << dictSizeLog;
2146  else
2147    return (UInt32)(Int32)-1;
2148  */
2149  return (UInt64)1 << dictSizeLog;
2150}
2151
2152
2153// it's limit of current LZMA implementation that can be changed later
2154#define kLzmaMaxDictSize ((UInt32)15 << 28)
2155
2156static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
2157{
2158  if (dict == 0)
2159    dict = 1;
2160  if (dict > kLzmaMaxDictSize)
2161    dict = kLzmaMaxDictSize;
2162  UInt32 hs = (UInt32)dict - 1;
2163  hs |= (hs >> 1);
2164  hs |= (hs >> 2);
2165  hs |= (hs >> 4);
2166  hs |= (hs >> 8);
2167  hs >>= 1;
2168  hs |= 0xFFFF;
2169  if (hs > (1 << 24))
2170    hs >>= 1;
2171  hs++;
2172  hs += (1 << 16);
2173
2174  const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
2175  UInt64 blockSize = (UInt64)dict + (1 << 16)
2176      + (multiThread ? (1 << 20) : 0);
2177  blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
2178  if (blockSize >= kBlockSizeMax)
2179    blockSize = kBlockSizeMax;
2180
2181  UInt64 son = (UInt64)dict;
2182  if (btMode)
2183    son *= 2;
2184  const UInt64 v = (hs + son) * 4 + blockSize +
2185      (1 << 20) + (multiThread ? (6 << 20) : 0);
2186
2187  // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
2188  // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
2189  return v;
2190}
2191
2192
2193UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
2194{
2195  const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
2196  const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
2197  if (level < 0)
2198    level = 5;
2199  const int algo = (level < 5 ? 0 : 1);
2200  const int btMode = (algo == 0 ? 0 : 1);
2201
2202  UInt32 numBigThreads = numThreads;
2203  bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
2204  if (btMode)
2205  {
2206    if (!totalBench && lzmaMt)
2207      numBigThreads /= 2;
2208  }
2209  return ((UInt64)kBufferSize + kCompressedBufferSize +
2210    GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
2211}
2212
2213static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
2214{
2215  // dictionary += (dictionary >> 9); // for page tables (virtual memory)
2216  return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
2217}
2218
2219
2220// ---------- CRC and HASH ----------
2221
2222struct CCrcInfo_Base
2223{
2224  CMidAlignedBuffer Buffer;
2225  const Byte *Data;
2226  size_t Size;
2227  bool CreateLocalBuf;
2228  UInt32 CheckSum_Res;
2229
2230  CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {}
2231
2232  HRESULT Generate(const Byte *data, size_t size);
2233  HRESULT CrcProcess(UInt64 numIterations,
2234      const UInt32 *checkSum, IHasher *hf,
2235      IBenchPrintCallback *callback);
2236};
2237
2238
2239HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size)
2240{
2241  Size = size;
2242  Data = data;
2243  if (!data || CreateLocalBuf)
2244  {
2245    ALLOC_WITH_HRESULT(&Buffer, size)
2246    Data = Buffer;
2247  }
2248  if (!data)
2249    RandGen(Buffer, size);
2250  else if (CreateLocalBuf && size != 0)
2251    memcpy(Buffer, data, size);
2252  return S_OK;
2253}
2254
2255
2256HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations,
2257    const UInt32 *checkSum, IHasher *hf,
2258    IBenchPrintCallback *callback)
2259{
2260  MY_ALIGN(16)
2261  Byte hash[64];
2262  memset(hash, 0, sizeof(hash));
2263
2264  CheckSum_Res = 0;
2265
2266  const UInt32 hashSize = hf->GetDigestSize();
2267  if (hashSize > sizeof(hash))
2268    return S_FALSE;
2269
2270  const Byte *buf = Data;
2271  const size_t size = Size;
2272  UInt32 checkSum_Prev = 0;
2273
2274  UInt64 prev = 0;
2275  UInt64 cur = 0;
2276
2277  for (UInt64 i = 0; i < numIterations; i++)
2278  {
2279    hf->Init();
2280    size_t pos = 0;
2281    do
2282    {
2283      const size_t rem = size - pos;
2284      const UInt32 kStep = ((UInt32)1 << 31);
2285      const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep;
2286      hf->Update(buf + pos, curSize);
2287      pos += curSize;
2288    }
2289    while (pos != size);
2290
2291    hf->Final(hash);
2292    UInt32 sum = 0;
2293    for (UInt32 j = 0; j < hashSize; j += 4)
2294    {
2295      sum = rotlFixed(sum, 11);
2296      sum += GetUi32(hash + j);
2297    }
2298    if (checkSum)
2299    {
2300      if (sum != *checkSum)
2301        return S_FALSE;
2302    }
2303    else
2304    {
2305      checkSum_Prev = sum;
2306      checkSum = &checkSum_Prev;
2307    }
2308    if (callback)
2309    {
2310      cur += size;
2311      if (cur - prev >= ((UInt32)1 << 30))
2312      {
2313        prev = cur;
2314        RINOK(callback->CheckBreak())
2315      }
2316    }
2317  }
2318  CheckSum_Res = checkSum_Prev;
2319  return S_OK;
2320}
2321
2322extern
2323UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization
2324UInt32 g_BenchCpuFreqTemp = 1;
2325
2326#define YY1 sum += val; sum ^= val;
2327#define YY3 YY1 YY1 YY1 YY1
2328#define YY5 YY3 YY3 YY3 YY3
2329#define YY7 YY5 YY5 YY5 YY5
2330static const UInt32 kNumFreqCommands = 128;
2331
2332EXTERN_C_BEGIN
2333
2334static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
2335{
2336  for (UInt32 i = 0; i < num; i++)
2337  {
2338    YY7
2339  }
2340  return sum;
2341}
2342
2343EXTERN_C_END
2344
2345
2346#ifndef Z7_ST
2347
2348struct CBaseThreadInfo
2349{
2350  NWindows::CThread Thread;
2351  IBenchPrintCallback *Callback;
2352  HRESULT CallbackRes;
2353
2354  WRes Wait_If_Created()
2355  {
2356    if (!Thread.IsCreated())
2357      return 0;
2358    return Thread.Wait_Close();
2359  }
2360};
2361
2362struct CFreqInfo: public CBaseThreadInfo
2363{
2364  UInt32 ValRes;
2365  UInt32 Size;
2366  UInt64 NumIterations;
2367};
2368
2369static THREAD_FUNC_DECL FreqThreadFunction(void *param)
2370{
2371  CFreqInfo *p = (CFreqInfo *)param;
2372
2373  UInt32 sum = g_BenchCpuFreqTemp;
2374  for (UInt64 k = p->NumIterations; k > 0; k--)
2375  {
2376    if (p->Callback)
2377    {
2378      p->CallbackRes = p->Callback->CheckBreak();
2379      if (p->CallbackRes != S_OK)
2380        break;
2381    }
2382    sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
2383  }
2384  p->ValRes = sum;
2385  return THREAD_FUNC_RET_ZERO;
2386}
2387
2388struct CFreqThreads
2389{
2390  CFreqInfo *Items;
2391  UInt32 NumThreads;
2392
2393  CFreqThreads(): Items(NULL), NumThreads(0) {}
2394
2395  WRes WaitAll()
2396  {
2397    WRes wres = 0;
2398    for (UInt32 i = 0; i < NumThreads; i++)
2399    {
2400      WRes wres2 = Items[i].Wait_If_Created();
2401      if (wres == 0 && wres2 != 0)
2402        wres = wres2;
2403    }
2404    NumThreads = 0;
2405    return wres;
2406  }
2407
2408  ~CFreqThreads()
2409  {
2410    WaitAll();
2411    delete []Items;
2412  }
2413};
2414
2415
2416static THREAD_FUNC_DECL CrcThreadFunction(void *param);
2417
2418struct CCrcInfo: public CBaseThreadInfo
2419{
2420  const Byte *Data;
2421  size_t Size;
2422  UInt64 NumIterations;
2423  bool CheckSumDefined;
2424  UInt32 CheckSum;
2425  CMyComPtr<IHasher> Hasher;
2426  HRESULT Res;
2427  UInt32 CheckSum_Res;
2428
2429  #ifndef Z7_ST
2430  NSynchronization::CManualResetEvent ReadyEvent;
2431  UInt32 ThreadIndex;
2432  CBenchSyncCommon *Common;
2433  CAffinityMode AffinityMode;
2434  #endif
2435
2436  // we want to call CCrcInfo_Base::Buffer.Free() in main thread.
2437  // so we uses non-local CCrcInfo_Base.
2438  CCrcInfo_Base crcib;
2439
2440  HRESULT CreateThread()
2441  {
2442    WRes res = 0;
2443    if (!ReadyEvent.IsCreated())
2444      res = ReadyEvent.Create();
2445    if (res == 0)
2446      res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this,
2447          ThreadIndex);
2448    return HRESULT_FROM_WIN32(res);
2449  }
2450
2451  #ifdef USE_ALLOCA
2452  size_t AllocaSize;
2453  #endif
2454
2455  void Process();
2456
2457  CCrcInfo(): Res(E_FAIL) {}
2458};
2459
2460static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test
2461// static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test
2462
2463void CCrcInfo::Process()
2464{
2465  crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File;
2466  // we can use additional Generate() passes to reduce some time effects for new page allocation
2467  // for (unsigned y = 0; y < 10; y++)
2468  Res = crcib.Generate(Data, Size);
2469
2470  // if (Common)
2471  {
2472    WRes wres = ReadyEvent.Set();
2473    if (wres != 0)
2474    {
2475      if (Res == 0)
2476        Res = HRESULT_FROM_WIN32(wres);
2477      return;
2478    }
2479    if (Res != 0)
2480      return;
2481
2482    wres = Common->StartEvent.Lock();
2483
2484    if (wres != 0)
2485    {
2486      Res = HRESULT_FROM_WIN32(wres);
2487      return;
2488    }
2489    if (Common->ExitMode)
2490      return;
2491  }
2492
2493  Res = crcib.CrcProcess(NumIterations,
2494      CheckSumDefined ? &CheckSum : NULL, Hasher,
2495      Callback);
2496  CheckSum_Res = crcib.CheckSum_Res;
2497  /*
2498  We don't want to include the time of slow CCrcInfo_Base::Buffer.Free()
2499  to time of benchmark. So we don't free Buffer here
2500  */
2501  // crcib.Buffer.Free();
2502}
2503
2504
2505static THREAD_FUNC_DECL CrcThreadFunction(void *param)
2506{
2507  CCrcInfo *p = (CCrcInfo *)param;
2508
2509  #ifdef USE_ALLOCA
2510  alloca(p->AllocaSize);
2511  #endif
2512  p->Process();
2513  return THREAD_FUNC_RET_ZERO;
2514}
2515
2516
2517struct CCrcThreads
2518{
2519  CCrcInfo *Items;
2520  unsigned NumThreads;
2521  CBenchSyncCommon Common;
2522  bool NeedClose;
2523
2524  CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {}
2525
2526  WRes StartAndWait(bool exitMode = false);
2527
2528  ~CCrcThreads()
2529  {
2530    StartAndWait(true);
2531    delete []Items;
2532  }
2533};
2534
2535
2536WRes CCrcThreads::StartAndWait(bool exitMode)
2537{
2538  if (!NeedClose)
2539    return 0;
2540
2541  Common.ExitMode = exitMode;
2542  WRes wres = Common.StartEvent.Set();
2543
2544  for (unsigned i = 0; i < NumThreads; i++)
2545  {
2546    WRes wres2 = Items[i].Wait_If_Created();
2547    if (wres == 0 && wres2 != 0)
2548      wres = wres2;
2549  }
2550  NumThreads = 0;
2551  NeedClose = false;
2552  return wres;
2553}
2554
2555#endif
2556
2557
2558static UInt32 CrcCalc1(const Byte *buf, size_t size)
2559{
2560  UInt32 crc = CRC_INIT_VAL;
2561  for (size_t i = 0; i < size; i++)
2562    crc = CRC_UPDATE_BYTE(crc, buf[i]);
2563  return CRC_GET_DIGEST(crc);
2564}
2565
2566/*
2567static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
2568{
2569  RandGen(buf, size, RG);
2570  return CrcCalc1(buf, size);
2571}
2572*/
2573
2574static bool CrcInternalTest()
2575{
2576  CAlignedBuffer buffer;
2577  const size_t kBufferSize0 = (1 << 8);
2578  const size_t kBufferSize1 = (1 << 10);
2579  const unsigned kCheckSize = (1 << 5);
2580  buffer.Alloc(kBufferSize0 + kBufferSize1);
2581  if (!buffer.IsAllocated())
2582    return false;
2583  Byte *buf = (Byte *)buffer;
2584  size_t i;
2585  for (i = 0; i < kBufferSize0; i++)
2586    buf[i] = (Byte)i;
2587  UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
2588  if (crc1 != 0x29058C73)
2589    return false;
2590  RandGen(buf + kBufferSize0, kBufferSize1);
2591  for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
2592    for (unsigned j = 0; j < kCheckSize; j++)
2593      if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
2594        return false;
2595  return true;
2596}
2597
2598struct CBenchMethod
2599{
2600  unsigned Weight;
2601  unsigned DictBits;
2602  Int32 EncComplex;
2603  Int32 DecComplexCompr;
2604  Int32 DecComplexUnc;
2605  const char *Name;
2606  // unsigned KeySize;
2607};
2608
2609// #define USE_SW_CMPLX
2610
2611#ifdef USE_SW_CMPLX
2612#define CMPLX(x) ((x) * 1000)
2613#else
2614#define CMPLX(x) (x)
2615#endif
2616
2617static const CBenchMethod g_Bench[] =
2618{
2619  // { 40, 17,  357,  145,   20, "LZMA:x1" },
2620  // { 20, 18,  360,  145,   20, "LZMA2:x1:mt2" },
2621
2622  { 20, 18,  360,  145,   20, "LZMA:x1" },
2623  { 20, 22,  600,  145,   20, "LZMA:x3" },
2624
2625  { 80, 24, 1220,  145,   20, "LZMA:x5:mt1" },
2626  { 80, 24, 1220,  145,   20, "LZMA:x5:mt2" },
2627
2628  { 10, 16,  124,   40,   14, "Deflate:x1" },
2629  { 20, 16,  376,   40,   14, "Deflate:x5" },
2630  { 10, 16, 1082,   40,   14, "Deflate:x7" },
2631  { 10, 17,  422,   40,   14, "Deflate64:x5" },
2632
2633  { 10, 15,  590,   69,   69, "BZip2:x1" },
2634  { 20, 19,  815,  122,  122, "BZip2:x5" },
2635  { 10, 19,  815,  122,  122, "BZip2:x5:mt2" },
2636  { 10, 19, 2530,  122,  122, "BZip2:x7" },
2637
2638  // { 10, 18, 1010,    0, 1150, "PPMDZip:x1" },
2639  { 10, 18, 1010,    0, 1150, "PPMD:x1" },
2640  // { 10, 22, 1655,    0, 1830, "PPMDZip:x5" },
2641  { 10, 22, 1655,    0, 1830, "PPMD:x5" },
2642
2643  // {  2,  0,  -16,    0,  -16, "Swap2" },
2644  {  2,  0,  -16,    0,  -16, "Swap4" },
2645
2646  // {  2,  0,    3,    0,    4, "Delta:1" },
2647  // {  2,  0,    3,    0,    4, "Delta:2" },
2648  // {  2,  0,    3,    0,    4, "Delta:3" },
2649  {  2,  0,    3,    0,    4, "Delta:4" },
2650  // {  2,  0,    3,    0,    4, "Delta:8" },
2651  // {  2,  0,    3,    0,    4, "Delta:32" },
2652
2653  {  2,  0,    2,    0,    2, "BCJ" },
2654  {  2,  0,    1,    0,    1, "ARM64" },
2655
2656  // { 10,  0,   18,    0,   18, "AES128CBC:1" },
2657  // { 10,  0,   21,    0,   21, "AES192CBC:1" },
2658  { 10,  0,   24,    0,   24, "AES256CBC:1" },
2659
2660  // { 10,  0,   18,    0,   18, "AES128CTR:1" },
2661  // { 10,  0,   21,    0,   21, "AES192CTR:1" },
2662  // { 10,  0,   24,    0,   24, "AES256CTR:1" },
2663  // {  2,  0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" },
2664  // {  2,  0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" },
2665  {  2,  0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" },
2666
2667  // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" },
2668  // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" },
2669  // {  2,  0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" },
2670
2671  // {  1,  0, CMPLX(6), 0, CMPLX(1), "AES128CBC:3" },
2672  // {  1,  0, CMPLX(7), 0, CMPLX(1), "AES192CBC:3" },
2673  {  1,  0, CMPLX(8), 0, CMPLX(1), "AES256CBC:3" }
2674
2675  // {  1,  0, CMPLX(1), 0, CMPLX(1), "AES128CTR:3" },
2676  // {  1,  0, CMPLX(1), 0, CMPLX(1), "AES192CTR:3" },
2677  // {  1,  0, CMPLX(1), 0, CMPLX(1), "AES256CTR:3" },
2678};
2679
2680struct CBenchHash
2681{
2682  unsigned Weight;
2683  UInt32 Complex;
2684  UInt32 CheckSum;
2685  const char *Name;
2686};
2687
2688// #define ARM_CRC_MUL 100
2689#define ARM_CRC_MUL 1
2690
2691#define k_Hash_Complex_Mult 256
2692
2693static const CBenchHash g_Hash[] =
2694{
2695  // {  1,  1820, 0x21e207bb, "CRC32:1" },
2696  // { 10,   558, 0x21e207bb, "CRC32:4" },
2697  { 20,   339, 0x21e207bb, "CRC32:8" } ,
2698  {  2,   128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" },
2699  {  2,    64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" },
2700  { 10,   512, 0x41b901d1, "CRC64" },
2701
2702  { 10, 5100,       0x7913ba03, "SHA256:1" },
2703  {  2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" },
2704
2705  { 10, 2340,       0xff769021, "SHA1:1" },
2706  {  2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" },
2707
2708  {  2,  5500, 0x85189d02, "BLAKE2sp" }
2709};
2710
2711static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
2712{
2713  char s[128];
2714  unsigned startPos = (unsigned)sizeof(s) - 32;
2715  memset(s, ' ', startPos);
2716  ConvertUInt64ToString(value, s + startPos);
2717  // if (withSpace)
2718  {
2719    startPos--;
2720    size++;
2721  }
2722  unsigned len = (unsigned)strlen(s + startPos);
2723  if (size > len)
2724  {
2725    size -= len;
2726    if (startPos < size)
2727      startPos = 0;
2728    else
2729      startPos -= size;
2730  }
2731  f.Print(s + startPos);
2732}
2733
2734static const unsigned kFieldSize_Name = 12;
2735static const unsigned kFieldSize_SmallName = 4;
2736static const unsigned kFieldSize_Speed = 9;
2737static const unsigned kFieldSize_Usage = 5;
2738static const unsigned kFieldSize_RU = 6;
2739static const unsigned kFieldSize_Rating = 6;
2740static const unsigned kFieldSize_EU = 5;
2741static const unsigned kFieldSize_Effec = 5;
2742static const unsigned kFieldSize_CrcSpeed = 8;
2743
2744
2745static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
2746static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
2747
2748
2749static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
2750{
2751  PrintNumber(f, (rating + 500000) / 1000000, size);
2752}
2753
2754
2755static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
2756{
2757  UInt64 v = 0;
2758  if (divider != 0)
2759    v = (val * 100 + divider / 2) / divider;
2760  PrintNumber(f, v, size);
2761}
2762
2763static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
2764{
2765  char s[256];
2766  memset(s, (Byte)c, size);
2767  s[size] = 0;
2768  f.Print(s);
2769}
2770
2771static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
2772{
2773  PrintChars(f, ' ', size);
2774}
2775
2776static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
2777{
2778  PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
2779}
2780
2781static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
2782{
2783  PrintUsage(f, usage, kFieldSize_Usage);
2784  PrintRating(f, rpu, kFieldSize_RU);
2785  PrintRating(f, rating, kFieldSize_Rating);
2786  if (showFreq)
2787  {
2788    if (cpuFreq == 0)
2789      PrintSpaces(f, kFieldSize_EUAndEffec);
2790    else
2791    {
2792      PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
2793      PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
2794    }
2795  }
2796}
2797
2798
2799void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
2800{
2801  Speed = info.GetUnpackSizeSpeed();
2802  Usage = info.GetUsage();
2803  RPU = info.GetRatingPerUsage(Rating);
2804}
2805
2806void CTotalBenchRes::Mult_For_Weight(unsigned weight)
2807{
2808  NumIterations2 *= weight;
2809  RPU *= weight;
2810  Rating *= weight;
2811  Usage *= weight;
2812  Speed *= weight;
2813}
2814
2815void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
2816{
2817  Rating += r.Rating;
2818  Usage += r.Usage;
2819  RPU += r.RPU;
2820  Speed += r.Speed;
2821    // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
2822  NumIterations2 += r.NumIterations2;
2823}
2824
2825static void PrintResults(IBenchPrintCallback *f,
2826    const CBenchInfo &info,
2827    unsigned weight,
2828    UInt64 rating,
2829    bool showFreq, UInt64 cpuFreq,
2830    CTotalBenchRes *res)
2831{
2832  CTotalBenchRes t;
2833  t.Rating = rating;
2834  t.NumIterations2 = 1;
2835  t.Generate_From_BenchInfo(info);
2836
2837  if (f)
2838  {
2839    if (t.Speed != 0)
2840      PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
2841    else
2842      PrintSpaces(*f, 1 + kFieldSize_Speed);
2843  }
2844  if (f)
2845  {
2846    PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
2847  }
2848
2849  if (res)
2850  {
2851    // res->NumIterations1++;
2852    t.Mult_For_Weight(weight);
2853    res->Update_With_Res(t);
2854  }
2855}
2856
2857static void PrintTotals(IBenchPrintCallback &f,
2858    bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
2859{
2860  const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
2861  const UInt64 speed = res.Speed / numIterations2;
2862  if (showSpeed && speed != 0)
2863    PrintNumber(f, speed / 1024, kFieldSize_Speed);
2864  else
2865    PrintSpaces(f, 1 + kFieldSize_Speed);
2866
2867  // PrintSpaces(f, 1 + kFieldSize_Speed);
2868  // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
2869  PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
2870}
2871
2872
2873static void PrintHex(AString &s, UInt64 v)
2874{
2875  char temp[32];
2876  ConvertUInt64ToHex(v, temp);
2877  s += temp;
2878}
2879
2880AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
2881{
2882  AString s;
2883  // s.Add_UInt32(ti.numProcessThreads);
2884  unsigned numSysThreads = ti.GetNumSystemThreads();
2885  if (ti.GetNumProcessThreads() != numSysThreads)
2886  {
2887    // if (ti.numProcessThreads != ti.numSysThreads)
2888    {
2889      s += " / ";
2890      s.Add_UInt32(numSysThreads);
2891    }
2892    s += " : ";
2893    #ifdef _WIN32
2894    PrintHex(s, ti.processAffinityMask);
2895    s += " / ";
2896    PrintHex(s, ti.systemAffinityMask);
2897    #else
2898    unsigned i = (numSysThreads + 3) & ~(unsigned)3;
2899    if (i == 0)
2900      i = 4;
2901    for (; i >= 4; )
2902    {
2903      i -= 4;
2904      unsigned val = 0;
2905      for (unsigned k = 0; k < 4; k++)
2906      {
2907        const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0);
2908        val += (bit << k);
2909      }
2910      PrintHex(s, val);
2911    }
2912    #endif
2913  }
2914  return s;
2915}
2916
2917
2918#ifdef Z7_LARGE_PAGES
2919
2920#ifdef _WIN32
2921extern bool g_LargePagesMode;
2922extern "C"
2923{
2924  extern SIZE_T g_LargePageSize;
2925}
2926#endif
2927
2928void Add_LargePages_String(AString &s)
2929{
2930  #ifdef _WIN32
2931  if (g_LargePagesMode || g_LargePageSize != 0)
2932  {
2933    s.Add_OptSpaced("(LP-");
2934    PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
2935    #ifdef MY_CPU_X86_OR_AMD64
2936    if (CPU_IsSupported_PageGB())
2937      s += "-1G";
2938    #endif
2939    if (!g_LargePagesMode)
2940      s += "-NA";
2941    s += ")";
2942  }
2943  #else
2944    s += "";
2945  #endif
2946}
2947
2948#endif
2949
2950
2951
2952static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
2953    bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
2954{
2955  f.Print("RAM ");
2956  f.Print(sizeString);
2957  if (size_Defined)
2958    PrintNumber(f, (size >> 20), 6);
2959  else
2960    f.Print("      ?");
2961  f.Print(" MB");
2962
2963  #ifdef Z7_LARGE_PAGES
2964  {
2965    AString s;
2966    Add_LargePages_String(s);
2967    f.Print(s);
2968  }
2969  #endif
2970
2971  f.Print(",  # ");
2972  f.Print(threadsString);
2973  PrintNumber(f, numThreads, 3);
2974}
2975
2976
2977
2978struct CBenchCallbackToPrint Z7_final: public IBenchCallback
2979{
2980  bool NeedPrint;
2981  bool Use2Columns;
2982  bool ShowFreq;
2983  unsigned NameFieldSize;
2984
2985  unsigned EncodeWeight;
2986  unsigned DecodeWeight;
2987
2988  UInt64 CpuFreq;
2989  UInt64 DictSize;
2990
2991  IBenchPrintCallback *_file;
2992  CBenchProps BenchProps;
2993  CTotalBenchRes EncodeRes;
2994  CTotalBenchRes DecodeRes;
2995
2996  CBenchInfo BenchInfo_Results[2];
2997
2998  CBenchCallbackToPrint():
2999      NeedPrint(true),
3000      Use2Columns(false),
3001      ShowFreq(false),
3002      NameFieldSize(0),
3003      EncodeWeight(1),
3004      DecodeWeight(1),
3005      CpuFreq(0)
3006      {}
3007
3008  void Init() { EncodeRes.Init(); DecodeRes.Init(); }
3009  void Print(const char *s);
3010  void NewLine();
3011
3012  HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
3013  HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override;
3014  HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override;
3015};
3016
3017HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
3018{
3019  ShowFreq = showFreq;
3020  CpuFreq = cpuFreq;
3021  return S_OK;
3022}
3023
3024HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
3025{
3026  RINOK(_file->CheckBreak())
3027  if (final)
3028    BenchInfo_Results[0] = info;
3029  if (final)
3030  if (NeedPrint)
3031  {
3032    const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
3033    PrintResults(_file, info,
3034        EncodeWeight, rating,
3035        ShowFreq, CpuFreq, &EncodeRes);
3036    if (!Use2Columns)
3037      _file->NewLine();
3038  }
3039  return S_OK;
3040}
3041
3042static const char * const kSep = "  | ";
3043
3044HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
3045{
3046  RINOK(_file->CheckBreak())
3047  if (final)
3048    BenchInfo_Results[1] = info;
3049  if (final)
3050  if (NeedPrint)
3051  {
3052    const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
3053    if (Use2Columns)
3054      _file->Print(kSep);
3055    else
3056      PrintSpaces(*_file, NameFieldSize);
3057    CBenchInfo info2 = info;
3058    info2.UnpackSize *= info2.NumIterations;
3059    info2.PackSize *= info2.NumIterations;
3060    info2.NumIterations = 1;
3061    PrintResults(_file, info2,
3062        DecodeWeight, rating,
3063        ShowFreq, CpuFreq, &DecodeRes);
3064  }
3065  return S_OK;
3066}
3067
3068void CBenchCallbackToPrint::Print(const char *s)
3069{
3070  _file->Print(s);
3071}
3072
3073void CBenchCallbackToPrint::NewLine()
3074{
3075  _file->NewLine();
3076}
3077
3078static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
3079{
3080  f.Print(s);
3081  int numSpaces = (int)size - (int)MyStringLen(s);
3082  if (numSpaces > 0)
3083    PrintSpaces(f, (unsigned)numSpaces);
3084}
3085
3086static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
3087{
3088  int numSpaces = (int)size - (int)MyStringLen(s);
3089  if (numSpaces > 0)
3090    PrintSpaces(f, (unsigned)numSpaces);
3091  f.Print(s);
3092}
3093
3094
3095static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name)
3096{
3097  UString wildc = GetUnicodeString(mask);
3098  UString bname = GetUnicodeString(name);
3099  wildc.MakeLower_Ascii();
3100  bname.MakeLower_Ascii();
3101  return DoesWildcardMatchName(wildc, bname);
3102}
3103
3104
3105static HRESULT TotalBench(
3106    DECL_EXTERNAL_CODECS_LOC_VARS
3107    const COneMethodInfo &methodMask,
3108    UInt64 complexInCommands,
3109  #ifndef Z7_ST
3110    UInt32 numThreads,
3111    const CAffinityMode *affinityMode,
3112  #endif
3113    bool forceUnpackSize,
3114    size_t unpackSize,
3115    const Byte *fileData,
3116    IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
3117{
3118  for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
3119  {
3120    const CBenchMethod &bench = g_Bench[i];
3121    if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3122      continue;
3123    PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3124    {
3125      unsigned keySize = 32;
3126           if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16;
3127      else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24;
3128      callback->BenchProps.KeySize = keySize;
3129    }
3130    callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3131    callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3132    callback->BenchProps.EncComplex = bench.EncComplex;
3133
3134    COneMethodInfo method;
3135    NCOM::CPropVariant propVariant;
3136    propVariant = bench.Name;
3137    RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3138
3139    size_t unpackSize2 = unpackSize;
3140    if (!forceUnpackSize && bench.DictBits == 0)
3141      unpackSize2 = kFilterUnpackSize;
3142
3143    callback->EncodeWeight = bench.Weight;
3144    callback->DecodeWeight = bench.Weight;
3145
3146    const HRESULT res = MethodBench(
3147        EXTERNAL_CODECS_LOC_VARS
3148        complexInCommands,
3149        #ifndef Z7_ST
3150        false, numThreads, affinityMode,
3151        #endif
3152        method,
3153        unpackSize2, fileData,
3154        bench.DictBits,
3155        printCallback, callback, &callback->BenchProps);
3156
3157    if (res == E_NOTIMPL)
3158    {
3159      // callback->Print(" ---");
3160      // we need additional empty line as line for decompression results
3161      if (!callback->Use2Columns)
3162        callback->NewLine();
3163    }
3164    else
3165    {
3166      RINOK(res)
3167    }
3168
3169    callback->NewLine();
3170  }
3171  return S_OK;
3172}
3173
3174
3175struct CFreqBench
3176{
3177  // in:
3178  UInt64 complexInCommands;
3179  UInt32 numThreads;
3180  bool showFreq;
3181  UInt64 specifiedFreq;
3182
3183  // out:
3184  UInt64 CpuFreqRes;
3185  UInt64 UsageRes;
3186  UInt32 res;
3187
3188  CFreqBench()
3189    {}
3190
3191  HRESULT FreqBench(IBenchPrintCallback *_file
3192      #ifndef Z7_ST
3193      , const CAffinityMode *affinityMode
3194      #endif
3195      );
3196};
3197
3198
3199HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
3200    #ifndef Z7_ST
3201    , const CAffinityMode *affinityMode
3202    #endif
3203    )
3204{
3205  res = 0;
3206  CpuFreqRes = 0;
3207  UsageRes = 0;
3208
3209  if (numThreads == 0)
3210    numThreads = 1;
3211
3212  #ifdef Z7_ST
3213  numThreads = 1;
3214  #endif
3215
3216  const UInt32 complexity = kNumFreqCommands;
3217  UInt64 numIterations = complexInCommands / complexity;
3218  UInt32 numIterations2 = 1 << 30;
3219  if (numIterations > numIterations2)
3220    numIterations /= numIterations2;
3221  else
3222  {
3223    numIterations2 = (UInt32)numIterations;
3224    numIterations = 1;
3225  }
3226
3227  CBenchInfoCalc progressInfoSpec;
3228
3229  #ifndef Z7_ST
3230
3231  bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity();
3232
3233  if (mtMode)
3234  {
3235    CFreqThreads threads;
3236    threads.Items = new CFreqInfo[numThreads];
3237    UInt32 i;
3238    for (i = 0; i < numThreads; i++)
3239    {
3240      CFreqInfo &info = threads.Items[i];
3241      info.Callback = _file;
3242      info.CallbackRes = S_OK;
3243      info.NumIterations = numIterations;
3244      info.Size = numIterations2;
3245    }
3246    progressInfoSpec.SetStartTime();
3247    for (i = 0; i < numThreads; i++)
3248    {
3249      // Sleep(10);
3250      CFreqInfo &info = threads.Items[i];
3251      WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i);
3252      if (info.Thread.IsCreated())
3253        threads.NumThreads++;
3254      if (wres != 0)
3255        return HRESULT_FROM_WIN32(wres);
3256    }
3257    WRes wres = threads.WaitAll();
3258    if (wres != 0)
3259      return HRESULT_FROM_WIN32(wres);
3260    for (i = 0; i < numThreads; i++)
3261    {
3262      RINOK(threads.Items[i].CallbackRes)
3263    }
3264  }
3265  else
3266  #endif
3267  {
3268    progressInfoSpec.SetStartTime();
3269    UInt32 sum = g_BenchCpuFreqTemp;
3270    for (UInt64 k = numIterations; k > 0; k--)
3271    {
3272      sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp);
3273      if (_file)
3274      {
3275        RINOK(_file->CheckBreak())
3276      }
3277    }
3278    res += sum;
3279  }
3280
3281  if (res == 0x12345678)
3282  if (_file)
3283  {
3284    RINOK(_file->CheckBreak())
3285  }
3286
3287  CBenchInfo info;
3288  progressInfoSpec.SetFinishTime(info);
3289
3290  info.UnpackSize = 0;
3291  info.PackSize = 0;
3292  info.NumIterations = 1;
3293
3294  const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
3295  const UInt64 rating = info.GetSpeed(numCommands);
3296  CpuFreqRes = rating / numThreads;
3297  UsageRes = info.GetUsage();
3298
3299  if (_file)
3300  {
3301    PrintResults(_file, info,
3302          0, // weight
3303          rating,
3304          showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
3305    RINOK(_file->CheckBreak())
3306  }
3307
3308  return S_OK;
3309}
3310
3311
3312
3313static HRESULT CrcBench(
3314    DECL_EXTERNAL_CODECS_LOC_VARS
3315    UInt64 complexInCommands,
3316    UInt32 numThreads,
3317    const size_t bufferSize,
3318    const Byte *fileData,
3319
3320    UInt64 &speed,
3321    UInt64 &usage,
3322
3323    UInt32 complexity, unsigned benchWeight,
3324    const UInt32 *checkSum,
3325    const COneMethodInfo &method,
3326    IBenchPrintCallback *_file,
3327    #ifndef Z7_ST
3328    const CAffinityMode *affinityMode,
3329    #endif
3330    bool showRating,
3331    CTotalBenchRes *encodeRes,
3332    bool showFreq, UInt64 cpuFreq)
3333{
3334  if (numThreads == 0)
3335    numThreads = 1;
3336
3337  #ifdef Z7_ST
3338  numThreads = 1;
3339  #endif
3340
3341  const AString &methodName = method.MethodName;
3342  // methodName.RemoveChar(L'-');
3343  CMethodId hashID;
3344  if (!FindHashMethod(
3345      EXTERNAL_CODECS_LOC_VARS
3346      methodName, hashID))
3347    return E_NOTIMPL;
3348
3349  /*
3350  // if will generate random data in each thread, instead of global data
3351  CMidAlignedBuffer buffer;
3352  if (!fileData)
3353  {
3354    ALLOC_WITH_HRESULT(&buffer, bufferSize)
3355    RandGen(buffer, bufferSize);
3356    fileData = buffer;
3357  }
3358  */
3359
3360  const size_t bsize = (bufferSize == 0 ? 1 : bufferSize);
3361  UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize;
3362  if (numIterations == 0)
3363    numIterations = 1;
3364
3365  CBenchInfoCalc progressInfoSpec;
3366  CBenchInfo info;
3367
3368  #ifndef Z7_ST
3369  bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity();
3370
3371  if (mtEncMode)
3372  {
3373    CCrcThreads threads;
3374    threads.Items = new CCrcInfo[numThreads];
3375    {
3376      WRes wres = threads.Common.StartEvent.Create();
3377      if (wres != 0)
3378        return HRESULT_FROM_WIN32(wres);
3379      threads.NeedClose = true;
3380    }
3381
3382    UInt32 i;
3383    for (i = 0; i < numThreads; i++)
3384    {
3385      CCrcInfo &ci = threads.Items[i];
3386      AString name;
3387      RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher))
3388      if (!ci.Hasher)
3389        return E_NOTIMPL;
3390      CMyComPtr<ICompressSetCoderProperties> scp;
3391      ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3392      if (scp)
3393      {
3394        RINOK(method.SetCoderProps(scp))
3395      }
3396
3397      ci.Callback = _file;
3398      ci.Data = fileData;
3399      ci.NumIterations = numIterations;
3400      ci.Size = bufferSize;
3401      ci.CheckSumDefined = false;
3402      if (checkSum)
3403      {
3404        ci.CheckSum = *checkSum;
3405        ci.CheckSumDefined = true;
3406      }
3407
3408      #ifdef USE_ALLOCA
3409      ci.AllocaSize = (i * 16 * 21) & 0x7FF;
3410      #endif
3411    }
3412
3413    for (i = 0; i < numThreads; i++)
3414    {
3415      CCrcInfo &ci = threads.Items[i];
3416      ci.ThreadIndex = i;
3417      ci.Common = &threads.Common;
3418      ci.AffinityMode = *affinityMode;
3419      HRESULT hres = ci.CreateThread();
3420      if (ci.Thread.IsCreated())
3421        threads.NumThreads++;
3422      if (hres != 0)
3423        return hres;
3424    }
3425
3426    for (i = 0; i < numThreads; i++)
3427    {
3428      CCrcInfo &ci = threads.Items[i];
3429      WRes wres = ci.ReadyEvent.Lock();
3430      if (wres != 0)
3431        return HRESULT_FROM_WIN32(wres);
3432      RINOK(ci.Res)
3433    }
3434
3435    progressInfoSpec.SetStartTime();
3436
3437    WRes wres = threads.StartAndWait();
3438    if (wres != 0)
3439      return HRESULT_FROM_WIN32(wres);
3440
3441    progressInfoSpec.SetFinishTime(info);
3442
3443    for (i = 0; i < numThreads; i++)
3444    {
3445      RINOK(threads.Items[i].Res)
3446      if (i != 0)
3447        if (threads.Items[i].CheckSum_Res !=
3448            threads.Items[i - 1].CheckSum_Res)
3449          return S_FALSE;
3450    }
3451  }
3452  else
3453  #endif
3454  {
3455    CMyComPtr<IHasher> hasher;
3456    AString name;
3457    RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher))
3458    if (!hasher)
3459      return E_NOTIMPL;
3460    CMyComPtr<ICompressSetCoderProperties> scp;
3461    hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3462    if (scp)
3463    {
3464      RINOK(method.SetCoderProps(scp))
3465    }
3466    CCrcInfo_Base crcib;
3467    crcib.CreateLocalBuf = false;
3468    RINOK(crcib.Generate(fileData, bufferSize))
3469    progressInfoSpec.SetStartTime();
3470    RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file))
3471    progressInfoSpec.SetFinishTime(info);
3472  }
3473
3474
3475  UInt64 unpSize = numIterations * bufferSize;
3476  UInt64 unpSizeThreads = unpSize * numThreads;
3477  info.UnpackSize = unpSizeThreads;
3478  info.PackSize = unpSizeThreads;
3479  info.NumIterations = 1;
3480
3481  if (_file)
3482  {
3483    if (showRating)
3484    {
3485      UInt64 unpSizeThreads2 = unpSizeThreads;
3486      if (unpSizeThreads2 == 0)
3487        unpSizeThreads2 = numIterations * 1 * numThreads;
3488      const UInt64 numCommands = unpSizeThreads2 * complexity / 256;
3489      const UInt64 rating = info.GetSpeed(numCommands);
3490      PrintResults(_file, info,
3491          benchWeight, rating,
3492          showFreq, cpuFreq, encodeRes);
3493    }
3494    RINOK(_file->CheckBreak())
3495  }
3496
3497  speed = info.GetSpeed(unpSizeThreads);
3498  usage = info.GetUsage();
3499
3500  return S_OK;
3501}
3502
3503
3504
3505static HRESULT TotalBench_Hash(
3506    DECL_EXTERNAL_CODECS_LOC_VARS
3507    const COneMethodInfo &methodMask,
3508    UInt64 complexInCommands,
3509    UInt32 numThreads,
3510    size_t bufSize,
3511    const Byte *fileData,
3512    IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
3513    #ifndef Z7_ST
3514    const CAffinityMode *affinityMode,
3515    #endif
3516    CTotalBenchRes *encodeRes,
3517    bool showFreq, UInt64 cpuFreq)
3518{
3519  for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
3520  {
3521    const CBenchHash &bench = g_Hash[i];
3522    if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3523      continue;
3524    PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3525    // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3526    // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3527    // callback->BenchProps.EncComplex = bench.EncComplex;
3528
3529    COneMethodInfo method;
3530    NCOM::CPropVariant propVariant;
3531    propVariant = bench.Name;
3532    RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3533
3534    UInt64 speed, usage;
3535
3536    const HRESULT res = CrcBench(
3537        EXTERNAL_CODECS_LOC_VARS
3538        complexInCommands,
3539        numThreads, bufSize, fileData,
3540        speed, usage,
3541        bench.Complex, bench.Weight,
3542        (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL,
3543        method,
3544        printCallback,
3545     #ifndef Z7_ST
3546        affinityMode,
3547     #endif
3548        true, // showRating
3549        encodeRes, showFreq, cpuFreq);
3550    if (res == E_NOTIMPL)
3551    {
3552      // callback->Print(" ---");
3553    }
3554    else
3555    {
3556      RINOK(res)
3557    }
3558    callback->NewLine();
3559  }
3560  return S_OK;
3561}
3562
3563struct CTempValues
3564{
3565  UInt64 *Values;
3566  CTempValues(): Values(NULL) {}
3567  void Alloc(UInt32 num) { Values = new UInt64[num]; }
3568  ~CTempValues() { delete []Values; }
3569};
3570
3571static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
3572{
3573  const wchar_t *end;
3574  UInt64 result = ConvertStringToUInt64(s, &end);
3575  if (*end != 0 || s.IsEmpty())
3576    prop = s;
3577  else if (result <= (UInt32)0xFFFFFFFF)
3578    prop = (UInt32)result;
3579  else
3580    prop = result;
3581}
3582
3583
3584static bool AreSameMethodNames(const char *fullName, const char *shortName)
3585{
3586  return StringsAreEqualNoCase_Ascii(fullName, shortName);
3587}
3588
3589
3590
3591
3592static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads)
3593{
3594  PrintRequirements(f, "usage:", true, usage, "Benchmark threads:   ", threads);
3595}
3596
3597
3598static void Print_Delimiter(IBenchPrintCallback &f)
3599{
3600  f.Print(" |");
3601}
3602
3603static void Print_Pow(IBenchPrintCallback &f, unsigned pow)
3604{
3605  char s[16];
3606  ConvertUInt32ToString(pow, s);
3607  unsigned pos = MyStringLen(s);
3608  s[pos++] = ':';
3609  s[pos] = 0;
3610  PrintLeft(f, s, kFieldSize_SmallName); // 4
3611}
3612
3613static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f,
3614    UInt64 usage, UInt64 speed)
3615{
3616  PrintUsage(f, usage, kFieldSize_Usage);
3617  PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed);
3618}
3619
3620
3621HRESULT Bench(
3622    DECL_EXTERNAL_CODECS_LOC_VARS
3623    IBenchPrintCallback *printCallback,
3624    IBenchCallback *benchCallback,
3625    const CObjectVector<CProperty> &props,
3626    UInt32 numIterations,
3627    bool multiDict,
3628    IBenchFreqCallback *freqCallback)
3629{
3630  if (!CrcInternalTest())
3631    return E_FAIL;
3632
3633  UInt32 numCPUs = 1;
3634  UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
3635
3636  NSystem::CProcessAffinity threadsInfo;
3637  threadsInfo.InitST();
3638
3639  #ifndef Z7_ST
3640
3641  if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0)
3642    numCPUs = threadsInfo.GetNumProcessThreads();
3643  else
3644    numCPUs = NSystem::GetNumberOfProcessors();
3645
3646  #endif
3647
3648  // numCPUs = 24;
3649  /*
3650  {
3651    DWORD_PTR mask = (1 << 0);
3652    DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask);
3653    old = old;
3654    DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask);
3655    old2 = old2;
3656    return 0;
3657  }
3658  */
3659
3660  bool ramSize_Defined = NSystem::GetRamSize(ramSize);
3661
3662  UInt32 numThreadsSpecified = numCPUs;
3663  bool needSetComplexity = false;
3664  UInt32 testTimeMs = kComplexInMs;
3665  UInt32 startDicLog = 22;
3666  bool startDicLog_Defined = false;
3667  UInt64 specifiedFreq = 0;
3668  bool multiThreadTests = false;
3669  UInt64 complexInCommands = kComplexInCommands;
3670  UInt32 numThreads_Start = 1;
3671
3672  #ifndef Z7_ST
3673  CAffinityMode affinityMode;
3674  #endif
3675
3676
3677  COneMethodInfo method;
3678
3679  CMidAlignedBuffer fileDataBuffer;
3680  bool use_fileData = false;
3681  bool isFixedDict = false;
3682
3683  {
3684  unsigned i;
3685
3686  if (printCallback)
3687  {
3688    for (i = 0; i < props.Size(); i++)
3689    {
3690      const CProperty &property = props[i];
3691      printCallback->Print(" ");
3692      printCallback->Print(GetAnsiString(property.Name));
3693      if (!property.Value.IsEmpty())
3694      {
3695        printCallback->Print("=");
3696        printCallback->Print(GetAnsiString(property.Value));
3697      }
3698    }
3699    if (!props.IsEmpty())
3700      printCallback->NewLine();
3701  }
3702
3703
3704  for (i = 0; i < props.Size(); i++)
3705  {
3706    const CProperty &property = props[i];
3707    UString name (property.Name);
3708    name.MakeLower_Ascii();
3709
3710    if (name.IsEqualTo("file"))
3711    {
3712      if (property.Value.IsEmpty())
3713        return E_INVALIDARG;
3714
3715      NFile::NIO::CInFile file;
3716      if (!file.Open(us2fs(property.Value)))
3717        return GetLastError_noZero_HRESULT();
3718      size_t len;
3719      {
3720        UInt64 len64;
3721        if (!file.GetLength(len64))
3722          return GetLastError_noZero_HRESULT();
3723        if (printCallback)
3724        {
3725          printCallback->Print("file size =");
3726          PrintNumber(*printCallback, len64, 0);
3727          printCallback->NewLine();
3728        }
3729        len = (size_t)len64;
3730        if (len != len64)
3731          return E_INVALIDARG;
3732      }
3733
3734      // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
3735
3736      ALLOC_WITH_HRESULT(&fileDataBuffer, len)
3737      use_fileData = true;
3738
3739      {
3740        size_t processed;
3741        if (!file.ReadFull((Byte *)fileDataBuffer, len, processed))
3742          return GetLastError_noZero_HRESULT();
3743        if (processed != len)
3744          return E_FAIL;
3745      }
3746      continue;
3747    }
3748
3749    NCOM::CPropVariant propVariant;
3750    if (!property.Value.IsEmpty())
3751      ParseNumberString(property.Value, propVariant);
3752
3753    if (name.IsEqualTo("time"))
3754    {
3755      RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3756      needSetComplexity = true;
3757      testTimeMs *= 1000;
3758      continue;
3759    }
3760
3761    if (name.IsEqualTo("timems"))
3762    {
3763      RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3764      needSetComplexity = true;
3765      continue;
3766    }
3767
3768    if (name.IsEqualTo("tic"))
3769    {
3770      UInt32 v;
3771      RINOK(ParsePropToUInt32(UString(), propVariant, v))
3772      if (v >= 64)
3773        return E_INVALIDARG;
3774      complexInCommands = (UInt64)1 << v;
3775      continue;
3776    }
3777
3778    const bool isCurrent_fixedDict = name.IsEqualTo("df");
3779    if (isCurrent_fixedDict)
3780      isFixedDict = true;
3781    if (isCurrent_fixedDict || name.IsEqualTo("ds"))
3782    {
3783      RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog))
3784      if (startDicLog > 32)
3785        return E_INVALIDARG;
3786      startDicLog_Defined = true;
3787      continue;
3788    }
3789
3790    if (name.IsEqualTo("mts"))
3791    {
3792      RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start))
3793      continue;
3794    }
3795
3796    if (name.IsEqualTo("af"))
3797    {
3798      UInt32 bundle;
3799      RINOK(ParsePropToUInt32(UString(), propVariant, bundle))
3800      if (bundle > 0 && bundle < numCPUs)
3801      {
3802        #ifndef Z7_ST
3803        affinityMode.SetLevels(numCPUs, 2);
3804        affinityMode.NumBundleThreads = bundle;
3805        #endif
3806      }
3807      continue;
3808    }
3809
3810    if (name.IsEqualTo("freq"))
3811    {
3812      UInt32 freq32 = 0;
3813      RINOK(ParsePropToUInt32(UString(), propVariant, freq32))
3814      if (freq32 == 0)
3815        return E_INVALIDARG;
3816      specifiedFreq = (UInt64)freq32 * 1000000;
3817
3818      if (printCallback)
3819      {
3820        printCallback->Print("freq=");
3821        PrintNumber(*printCallback, freq32, 0);
3822        printCallback->NewLine();
3823      }
3824
3825      continue;
3826    }
3827
3828    if (name.IsPrefixedBy_Ascii_NoCase("mt"))
3829    {
3830      const UString s = name.Ptr(2);
3831      if (s.IsEqualTo("*")
3832          || (s.IsEmpty()
3833            && propVariant.vt == VT_BSTR
3834            && StringsAreEqual_Ascii(propVariant.bstrVal, "*")))
3835      {
3836        multiThreadTests = true;
3837        continue;
3838      }
3839      #ifndef Z7_ST
3840      RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified))
3841      #endif
3842      continue;
3843    }
3844
3845    RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant))
3846  }
3847  }
3848
3849  if (printCallback)
3850  {
3851    AString s;
3852
3853   #ifndef _WIN32
3854    s += "Compiler: ";
3855    GetCompiler(s);
3856    printCallback->Print(s);
3857    printCallback->NewLine();
3858    s.Empty();
3859   #endif
3860
3861    GetSystemInfoText(s);
3862    printCallback->Print(s);
3863    printCallback->NewLine();
3864  }
3865
3866  if (printCallback)
3867  {
3868    printCallback->Print("1T CPU Freq (MHz):");
3869  }
3870
3871  if (printCallback || freqCallback)
3872  {
3873    UInt64 numMilCommands = 1 << 6;
3874    if (specifiedFreq != 0)
3875    {
3876      while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3877        numMilCommands >>= 1;
3878    }
3879
3880    for (int jj = 0;; jj++)
3881    {
3882      if (printCallback)
3883        RINOK(printCallback->CheckBreak())
3884
3885      UInt64 start = ::GetTimeCount();
3886      UInt32 sum = (UInt32)start;
3887      sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3888      if (sum == 0xF1541213)
3889        if (printCallback)
3890          printCallback->Print("");
3891      const UInt64 realDelta = ::GetTimeCount() - start;
3892      start = realDelta;
3893      if (start == 0)
3894        start = 1;
3895      if (start > (UInt64)1 << 61)
3896        start = 1;
3897      const UInt64 freq = GetFreq();
3898      // mips is constant in some compilers
3899      const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, freq, start);
3900      const UInt64 mipsVal = numMilCommands * freq / start;
3901      if (printCallback)
3902      {
3903        if (realDelta == 0)
3904        {
3905          printCallback->Print(" -");
3906        }
3907        else
3908        {
3909          // PrintNumber(*printCallback, start, 0);
3910          PrintNumber(*printCallback, mipsVal, 5);
3911        }
3912      }
3913      if (freqCallback)
3914      {
3915        RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult))
3916      }
3917
3918      if (jj >= 1)
3919      {
3920        bool needStop = (numMilCommands >= (1 <<
3921          #ifdef _DEBUG
3922            7
3923          #else
3924            11
3925          #endif
3926          ));
3927        if (start >= freq * 16)
3928        {
3929          printCallback->Print(" (Cmplx)");
3930          if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
3931          {
3932            needSetComplexity = true;
3933          }
3934          needStop = true;
3935        }
3936        if (needSetComplexity)
3937          SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands);
3938        if (needStop)
3939          break;
3940        numMilCommands <<= 1;
3941      }
3942    }
3943    if (freqCallback)
3944    {
3945      RINOK(freqCallback->FreqsFinished(1))
3946    }
3947  }
3948
3949  if (numThreadsSpecified >= 2)
3950  if (printCallback || freqCallback)
3951  {
3952    if (printCallback)
3953      printCallback->NewLine();
3954
3955    /* it can show incorrect frequency for HT threads.
3956       so we reduce freq test to (numCPUs / 2) */
3957
3958    UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified;
3959    if (numThreads < 1)
3960      numThreads = 1;
3961
3962    if (printCallback)
3963    {
3964      char s[128];
3965      ConvertUInt64ToString(numThreads, s);
3966      printCallback->Print(s);
3967      printCallback->Print("T CPU Freq (MHz):");
3968    }
3969    UInt64 numMilCommands = 1 <<
3970          #ifdef _DEBUG
3971            7;
3972          #else
3973            10;
3974          #endif
3975
3976    if (specifiedFreq != 0)
3977    {
3978      while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3979        numMilCommands >>= 1;
3980    }
3981
3982    // for (int jj = 0;; jj++)
3983    for (;;)
3984    {
3985      if (printCallback)
3986        RINOK(printCallback->CheckBreak())
3987
3988      {
3989        // PrintLeft(f, "CPU", kFieldSize_Name);
3990
3991        // UInt32 resVal;
3992
3993        CFreqBench fb;
3994        fb.complexInCommands = numMilCommands * 1000000;
3995        fb.numThreads = numThreads;
3996        // showFreq;
3997        // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
3998        fb.showFreq = true;
3999        fb.specifiedFreq = 1;
4000
4001        const HRESULT res = fb.FreqBench(NULL /* printCallback */
4002            #ifndef Z7_ST
4003              , &affinityMode
4004            #endif
4005            );
4006        RINOK(res)
4007
4008        if (freqCallback)
4009        {
4010          RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes))
4011        }
4012
4013        if (printCallback)
4014        {
4015          /*
4016          if (realDelta == 0)
4017          {
4018            printCallback->Print(" -");
4019          }
4020          else
4021          */
4022          {
4023            // PrintNumber(*printCallback, start, 0);
4024            PrintUsage(*printCallback, fb.UsageRes, 3);
4025            printCallback->Print("%");
4026            PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
4027            printCallback->Print("  ");
4028
4029            // PrintNumber(*printCallback, fb.UsageRes, 5);
4030          }
4031        }
4032      }
4033      // if (jj >= 1)
4034      {
4035        const bool needStop = (numMilCommands >= (1 <<
4036          #ifdef _DEBUG
4037            7
4038          #else
4039            11
4040          #endif
4041          ));
4042        if (needStop)
4043          break;
4044        numMilCommands <<= 1;
4045      }
4046    }
4047    if (freqCallback)
4048    {
4049      RINOK(freqCallback->FreqsFinished(numThreads))
4050    }
4051  }
4052
4053
4054  if (printCallback)
4055  {
4056    printCallback->NewLine();
4057    printCallback->NewLine();
4058    PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
4059    printCallback->Print(GetProcessThreadsInfo(threadsInfo));
4060    printCallback->NewLine();
4061  }
4062
4063  if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
4064    return E_INVALIDARG;
4065
4066  UInt64 dict = (UInt64)1 << startDicLog;
4067  const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
4068
4069  const unsigned level = method.GetLevel();
4070
4071  AString &methodName = method.MethodName;
4072  const AString original_MethodName = methodName;
4073  if (methodName.IsEmpty())
4074    methodName = "LZMA";
4075
4076  if (benchCallback)
4077  {
4078    CBenchProps benchProps;
4079    benchProps.SetLzmaCompexity();
4080    const UInt64 dictSize = method.Get_Lzma_DicSize();
4081
4082    size_t uncompressedDataSize;
4083    if (use_fileData)
4084    {
4085      uncompressedDataSize = fileDataBuffer.Size();
4086    }
4087    else
4088    {
4089      uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
4090      if (uncompressedDataSize < dictSize)
4091        return E_INVALIDARG;
4092    }
4093
4094    return MethodBench(
4095        EXTERNAL_CODECS_LOC_VARS
4096        complexInCommands,
4097      #ifndef Z7_ST
4098        true, numThreadsSpecified,
4099        &affinityMode,
4100      #endif
4101        method,
4102        uncompressedDataSize, (const Byte *)fileDataBuffer,
4103        kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
4104  }
4105
4106  if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
4107    methodName = "crc32";
4108
4109  CMethodId hashID;
4110  const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID);
4111  int codecIndex = -1;
4112  bool isFilter = false;
4113  if (!isHashMethod)
4114  {
4115    UInt32 numStreams;
4116    codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName,
4117        true,  // encode
4118        hashID, numStreams, isFilter);
4119    // we can allow non filter for BW tests
4120    if (!isFilter) codecIndex = -1;
4121  }
4122
4123  CBenchCallbackToPrint callback;
4124  callback.Init();
4125  callback._file = printCallback;
4126
4127  if (isHashMethod || codecIndex != -1)
4128  {
4129    if (!printCallback)
4130      return S_FALSE;
4131    IBenchPrintCallback &f = *printCallback;
4132
4133    UInt64 dict64 = dict;
4134    if (!dictIsDefined)
4135      dict64 = (1 << 27);
4136    if (use_fileData)
4137    {
4138      if (!dictIsDefined)
4139        dict64 = fileDataBuffer.Size();
4140      else if (dict64 > fileDataBuffer.Size())
4141        dict64 = fileDataBuffer.Size();
4142    }
4143
4144    for (;;)
4145    {
4146      const int index = method.FindProp(NCoderPropID::kDictionarySize);
4147      if (index < 0)
4148        break;
4149      method.Props.Delete((unsigned)index);
4150    }
4151
4152    // methodName.RemoveChar(L'-');
4153    Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method
4154    const UInt32 *checkSum = NULL;
4155    int benchIndex = -1;
4156
4157    if (isHashMethod)
4158    {
4159      for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
4160      {
4161        const CBenchHash &h = g_Hash[i];
4162        AString benchMethod (h.Name);
4163        AString benchProps;
4164        const int propPos = benchMethod.Find(':');
4165        if (propPos >= 0)
4166        {
4167          benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4168          benchMethod.DeleteFrom((unsigned)propPos);
4169        }
4170
4171        if (AreSameMethodNames(benchMethod, methodName))
4172        {
4173          const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4174          /*
4175          bool isMainMethod = method.PropsString.IsEmpty();
4176          if (isMainMethod)
4177            isMainMethod = !checkSum
4178                || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8"));
4179          if (sameProps || isMainMethod)
4180          */
4181          {
4182            complexity = (Int32)h.Complex;
4183            checkSum = &h.CheckSum;
4184            if (sameProps)
4185              break;
4186            /*
4187            if property. is not specified, we use the complexity
4188            for latest fastest method (crc32:64)
4189            */
4190          }
4191        }
4192      }
4193      // if (!checkSum) return E_NOTIMPL;
4194    }
4195    else
4196    {
4197      for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4198      {
4199        const CBenchMethod &bench = g_Bench[i];
4200        AString benchMethod (bench.Name);
4201        AString benchProps;
4202        const int propPos = benchMethod.Find(':');
4203        if (propPos >= 0)
4204        {
4205          benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4206          benchMethod.DeleteFrom((unsigned)propPos);
4207        }
4208
4209        if (AreSameMethodNames(benchMethod, methodName))
4210        {
4211          const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4212          // bool isMainMethod = method.PropsString.IsEmpty();
4213          // if (sameProps || isMainMethod)
4214          {
4215            benchIndex = (int)i;
4216            if (sameProps)
4217              break;
4218          }
4219        }
4220      }
4221      // if (benchIndex < 0) return E_NOTIMPL;
4222    }
4223
4224    {
4225      /* we count usage only for crc and filter. non-filters are not supported */
4226      UInt64 usage = (1 << 20);
4227      UInt64 bufSize = dict64;
4228      UInt32 numBlocks = isHashMethod ? 1 : 3;
4229      if (use_fileData)
4230      {
4231        usage += fileDataBuffer.Size();
4232        if (bufSize > fileDataBuffer.Size())
4233          bufSize = fileDataBuffer.Size();
4234        if (isHashMethod)
4235        {
4236          numBlocks = 0;
4237          #ifndef Z7_ST
4238          if (numThreadsSpecified != 1)
4239            numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
4240          #endif
4241        }
4242      }
4243      usage += numThreadsSpecified * bufSize * numBlocks;
4244      Print_Usage_and_Threads(f, usage, numThreadsSpecified);
4245    }
4246
4247    CUIntVector numThreadsVector;
4248    {
4249      unsigned nt = numThreads_Start;
4250      for (;;)
4251      {
4252        if (nt > numThreadsSpecified)
4253          break;
4254        numThreadsVector.Add(nt);
4255        const unsigned next = nt * 2;
4256        const UInt32 ntHalf= numThreadsSpecified / 2;
4257        if (ntHalf > nt && ntHalf < next)
4258          numThreadsVector.Add(ntHalf);
4259        if (numThreadsSpecified > nt && numThreadsSpecified < next)
4260          numThreadsVector.Add(numThreadsSpecified);
4261        nt = next;
4262      }
4263    }
4264
4265    unsigned numColumns = isHashMethod ? 1 : 2;
4266    CTempValues speedTotals;
4267    CTempValues usageTotals;
4268    {
4269      const unsigned numItems = numThreadsVector.Size() * numColumns;
4270      speedTotals.Alloc(numItems);
4271      usageTotals.Alloc(numItems);
4272      for (unsigned i = 0; i < numItems; i++)
4273      {
4274        speedTotals.Values[i] = 0;
4275        usageTotals.Values[i] = 0;
4276      }
4277    }
4278
4279    f.NewLine();
4280    for (unsigned line = 0; line < 3; line++)
4281    {
4282      f.NewLine();
4283      f.Print(line == 0 ? "THRD" : line == 1 ? "    " : "Size");
4284      FOR_VECTOR (ti, numThreadsVector)
4285      {
4286        if (ti != 0)
4287          Print_Delimiter(f);
4288        if (line == 0)
4289        {
4290          PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1));
4291          PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed);
4292        }
4293        else
4294        {
4295          for (unsigned c = 0; c < numColumns; c++)
4296          {
4297            PrintRight(f, line == 1 ? "Usage" : "%",    kFieldSize_Usage + 1);
4298            PrintRight(f, line == 1 ? "BW"    : "MB/s", kFieldSize_CrcSpeed + 1);
4299          }
4300        }
4301      }
4302    }
4303    f.NewLine();
4304
4305    UInt64 numSteps = 0;
4306
4307    // for (UInt32 iter = 0; iter < numIterations; iter++)
4308    // {
4309    unsigned pow = 10; // kNumHashDictBits
4310    if (startDicLog_Defined)
4311      pow = startDicLog;
4312
4313    // #define NUM_SUB_BITS 2
4314    // pow <<= NUM_SUB_BITS;
4315    for (;; pow++)
4316    {
4317      const UInt64 bufSize = (UInt64)1 << pow;
4318      // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS);
4319      // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS);
4320
4321      size_t dataSize = fileDataBuffer.Size();
4322      if (dataSize > bufSize || !use_fileData)
4323        dataSize = (size_t)bufSize;
4324
4325      for (UInt32 iter = 0; iter < numIterations; iter++)
4326      {
4327        Print_Pow(f, pow);
4328        // PrintNumber(f, bufSize >> 10, 4);
4329
4330        FOR_VECTOR (ti, numThreadsVector)
4331        {
4332          RINOK(f.CheckBreak())
4333          const UInt32 numThreads = numThreadsVector[ti];
4334          if (isHashMethod)
4335          {
4336            UInt64 speed = 0;
4337            UInt64 usage = 0;
4338            const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
4339              numThreads,
4340              dataSize, (const Byte *)fileDataBuffer,
4341              speed, usage,
4342              (UInt32)complexity,
4343              1, // benchWeight,
4344              (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
4345              method,
4346              &f,
4347            #ifndef Z7_ST
4348              &affinityMode,
4349            #endif
4350              false, // showRating
4351              NULL, false, 0);
4352            RINOK(res)
4353
4354            if (ti != 0)
4355              Print_Delimiter(f);
4356
4357            Bench_BW_Print_Usage_Speed(f, usage, speed);
4358            speedTotals.Values[ti] += speed;
4359            usageTotals.Values[ti] += usage;
4360          }
4361          else
4362          {
4363            {
4364              unsigned keySize = 32;
4365                   if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16;
4366              else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24;
4367              callback.BenchProps.KeySize = keySize;
4368            }
4369
4370            COneMethodInfo method2 = method;
4371            unsigned bench_DictBits;
4372
4373            if (benchIndex >= 0)
4374            {
4375              const CBenchMethod &bench = g_Bench[benchIndex];
4376              callback.BenchProps.EncComplex = bench.EncComplex;
4377              callback.BenchProps.DecComplexUnc = bench.DecComplexUnc;
4378              callback.BenchProps.DecComplexCompr = bench.DecComplexCompr;
4379              bench_DictBits = bench.DictBits;
4380              // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug
4381            }
4382            else
4383            {
4384              bench_DictBits = kOldLzmaDictBits; // = 32 default
4385              if (isFilter)
4386              {
4387                const unsigned k_UnknownCoderComplexity = 4;
4388                callback.BenchProps.EncComplex = k_UnknownCoderComplexity;
4389                callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity;
4390              }
4391              else
4392              {
4393                callback.BenchProps.EncComplex = 1 << 10;
4394                callback.BenchProps.DecComplexUnc = 1 << 6;
4395              }
4396              callback.BenchProps.DecComplexCompr = 0;
4397            }
4398            callback.NeedPrint = false;
4399
4400            if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4401            {
4402              const NCOM::CPropVariant propVariant = (UInt32)pow;
4403              RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4404            }
4405
4406            const HRESULT res = MethodBench(
4407                EXTERNAL_CODECS_LOC_VARS
4408                complexInCommands,
4409              #ifndef Z7_ST
4410                false, // oldLzmaBenchMode
4411                numThreadsVector[ti],
4412                &affinityMode,
4413              #endif
4414                method2,
4415                dataSize, (const Byte *)fileDataBuffer,
4416                bench_DictBits,
4417                printCallback,
4418                &callback,
4419                &callback.BenchProps);
4420            RINOK(res)
4421
4422            if (ti != 0)
4423              Print_Delimiter(f);
4424
4425            for (unsigned i = 0; i < 2; i++)
4426            {
4427              const CBenchInfo &bi = callback.BenchInfo_Results[i];
4428              const UInt64 usage = bi.GetUsage();
4429              const UInt64 speed = bi.GetUnpackSizeSpeed();
4430              usageTotals.Values[ti * 2 + i] += usage;
4431              speedTotals.Values[ti * 2 + i] += speed;
4432              Bench_BW_Print_Usage_Speed(f, usage, speed);
4433            }
4434          }
4435        }
4436
4437        f.NewLine();
4438        numSteps++;
4439      }
4440      if (dataSize >= dict64)
4441        break;
4442    }
4443
4444    if (numSteps != 0)
4445    {
4446      f.Print("Avg:");
4447      for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++)
4448      {
4449        if (ti != 0)
4450          Print_Delimiter(f);
4451        for (unsigned i = 0; i < numColumns; i++)
4452          Bench_BW_Print_Usage_Speed(f,
4453              usageTotals.Values[ti * numColumns + i] / numSteps,
4454              speedTotals.Values[ti * numColumns + i] / numSteps);
4455      }
4456      f.NewLine();
4457    }
4458
4459    return S_OK;
4460  }
4461
4462  bool use2Columns = false;
4463
4464  bool totalBenchMode = false;
4465  bool onlyHashBench = false;
4466  if (methodName.IsEqualTo_Ascii_NoCase("hash"))
4467  {
4468    onlyHashBench = true;
4469    methodName = "*";
4470    totalBenchMode = true;
4471  }
4472  else if (methodName.Find('*') >= 0)
4473    totalBenchMode = true;
4474
4475  // ---------- Threads loop ----------
4476  for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
4477  {
4478
4479  UInt32 numThreads = numThreadsSpecified;
4480
4481  if (!multiThreadTests)
4482  {
4483    if (threadsPassIndex != 0)
4484      break;
4485  }
4486  else
4487  {
4488    numThreads = 1;
4489    if (threadsPassIndex != 0)
4490    {
4491      if (numCPUs < 2)
4492        break;
4493      numThreads = numCPUs;
4494      if (threadsPassIndex == 1)
4495      {
4496        if (numCPUs >= 4)
4497          numThreads = numCPUs / 2;
4498      }
4499      else if (numCPUs < 4)
4500        break;
4501    }
4502  }
4503
4504  IBenchPrintCallback &f = *printCallback;
4505
4506  if (threadsPassIndex > 0)
4507  {
4508    f.NewLine();
4509    f.NewLine();
4510  }
4511
4512  if (!dictIsDefined && !onlyHashBench)
4513  {
4514    const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
4515    unsigned dicSizeLog = dicSizeLog_Main;
4516
4517    #ifdef UNDER_CE
4518    dicSizeLog = (UInt64)1 << 20;
4519    #endif
4520
4521    if (ramSize_Defined)
4522    for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
4523      if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
4524        break;
4525
4526    dict = (UInt64)1 << dicSizeLog;
4527
4528    if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
4529    {
4530      f.Print("Dictionary reduced to: ");
4531      PrintNumber(f, dicSizeLog, 1);
4532      f.NewLine();
4533    }
4534  }
4535
4536  Print_Usage_and_Threads(f,
4537      onlyHashBench ?
4538        GetBenchMemoryUsage_Hash(numThreads, dict) :
4539        GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode),
4540      numThreads);
4541
4542  f.NewLine();
4543
4544  f.NewLine();
4545
4546  if (totalBenchMode)
4547  {
4548    callback.NameFieldSize = kFieldSize_Name;
4549    use2Columns = false;
4550  }
4551  else
4552  {
4553    callback.NameFieldSize = kFieldSize_SmallName;
4554    use2Columns = true;
4555  }
4556  callback.Use2Columns = use2Columns;
4557
4558  bool showFreq = false;
4559  UInt64 cpuFreq = 0;
4560
4561  if (totalBenchMode)
4562  {
4563    showFreq = true;
4564  }
4565
4566  unsigned fileldSize = kFieldSize_TotalSize;
4567  if (showFreq)
4568    fileldSize += kFieldSize_EUAndEffec;
4569
4570  if (use2Columns)
4571  {
4572    PrintSpaces(f, callback.NameFieldSize);
4573    PrintRight(f, "Compressing", fileldSize);
4574    f.Print(kSep);
4575    PrintRight(f, "Decompressing", fileldSize);
4576  }
4577  f.NewLine();
4578  PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
4579
4580  int j;
4581
4582  for (j = 0; j < 2; j++)
4583  {
4584    PrintRight(f, "Speed", kFieldSize_Speed + 1);
4585    PrintRight(f, "Usage", kFieldSize_Usage + 1);
4586    PrintRight(f, "R/U", kFieldSize_RU + 1);
4587    PrintRight(f, "Rating", kFieldSize_Rating + 1);
4588    if (showFreq)
4589    {
4590      PrintRight(f, "E/U", kFieldSize_EU + 1);
4591      PrintRight(f, "Effec", kFieldSize_Effec + 1);
4592    }
4593    if (!use2Columns)
4594      break;
4595    if (j == 0)
4596      f.Print(kSep);
4597  }
4598
4599  f.NewLine();
4600  PrintSpaces(f, callback.NameFieldSize);
4601
4602  for (j = 0; j < 2; j++)
4603  {
4604    PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
4605    PrintRight(f, "%", kFieldSize_Usage + 1);
4606    PrintRight(f, "MIPS", kFieldSize_RU + 1);
4607    PrintRight(f, "MIPS", kFieldSize_Rating + 1);
4608    if (showFreq)
4609    {
4610      PrintRight(f, "%", kFieldSize_EU + 1);
4611      PrintRight(f, "%", kFieldSize_Effec + 1);
4612    }
4613    if (!use2Columns)
4614      break;
4615    if (j == 0)
4616      f.Print(kSep);
4617  }
4618
4619  f.NewLine();
4620  f.NewLine();
4621
4622  if (specifiedFreq != 0)
4623    cpuFreq = specifiedFreq;
4624
4625  // bool showTotalSpeed = false;
4626
4627  if (totalBenchMode)
4628  {
4629    for (UInt32 i = 0; i < numIterations; i++)
4630    {
4631      if (i != 0)
4632        printCallback->NewLine();
4633
4634      const unsigned kNumCpuTests = 3;
4635      for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
4636      {
4637        PrintLeft(f, "CPU", kFieldSize_Name);
4638
4639        // UInt32 resVal;
4640
4641        CFreqBench fb;
4642        fb.complexInCommands = complexInCommands;
4643        fb.numThreads = numThreads;
4644        // showFreq;
4645        fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4646        fb.specifiedFreq = specifiedFreq;
4647
4648        const HRESULT res = fb.FreqBench(printCallback
4649            #ifndef Z7_ST
4650              , &affinityMode
4651            #endif
4652            );
4653        RINOK(res)
4654
4655        cpuFreq = fb.CpuFreqRes;
4656        callback.NewLine();
4657
4658        if (specifiedFreq != 0)
4659          cpuFreq = specifiedFreq;
4660
4661        if (testTimeMs >= 1000)
4662        if (freqTest == kNumCpuTests - 1)
4663        {
4664          // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands);
4665        }
4666      }
4667      callback.NewLine();
4668
4669      // return S_OK; // change it
4670
4671      callback.SetFreq(true, cpuFreq);
4672
4673      if (!onlyHashBench)
4674      {
4675        size_t dataSize = (size_t)dict;
4676        if (use_fileData)
4677        {
4678          dataSize = fileDataBuffer.Size();
4679          if (dictIsDefined && dataSize > dict)
4680            dataSize = (size_t)dict;
4681        }
4682
4683        const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
4684            method, complexInCommands,
4685          #ifndef Z7_ST
4686            numThreads,
4687            &affinityMode,
4688          #endif
4689            dictIsDefined || use_fileData, // forceUnpackSize
4690            dataSize,
4691            (const Byte *)fileDataBuffer,
4692            printCallback, &callback);
4693        RINOK(res)
4694      }
4695
4696      {
4697        size_t dataSize = (size_t)1 << kNumHashDictBits;
4698        if (dictIsDefined)
4699        {
4700          dataSize = (size_t)dict;
4701          if (dataSize != dict)
4702            return E_OUTOFMEMORY;
4703        }
4704        if (use_fileData)
4705        {
4706          dataSize = fileDataBuffer.Size();
4707          if (dictIsDefined && dataSize > dict)
4708            dataSize = (size_t)dict;
4709        }
4710
4711        const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS
4712            method, complexInCommands,
4713            numThreads,
4714            dataSize, (const Byte *)fileDataBuffer,
4715            printCallback, &callback,
4716        #ifndef Z7_ST
4717          &affinityMode,
4718        #endif
4719          &callback.EncodeRes, true, cpuFreq);
4720        RINOK(res)
4721      }
4722
4723      callback.NewLine();
4724      {
4725        PrintLeft(f, "CPU", kFieldSize_Name);
4726
4727        CFreqBench fb;
4728        fb.complexInCommands = complexInCommands;
4729        fb.numThreads = numThreads;
4730        // showFreq;
4731        fb.showFreq = (specifiedFreq != 0);
4732        fb.specifiedFreq = specifiedFreq;
4733
4734        const HRESULT res = fb.FreqBench(printCallback
4735          #ifndef Z7_ST
4736            , &affinityMode
4737          #endif
4738          );
4739        RINOK(res)
4740        callback.NewLine();
4741      }
4742    }
4743  }
4744  else
4745  {
4746    needSetComplexity = true;
4747    if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
4748    {
4749      unsigned i;
4750      for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4751      {
4752        const CBenchMethod &h = g_Bench[i];
4753        AString benchMethod (h.Name);
4754        AString benchProps;
4755        const int propPos = benchMethod.Find(':');
4756        if (propPos >= 0)
4757        {
4758          benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4759          benchMethod.DeleteFrom((unsigned)propPos);
4760        }
4761
4762        if (AreSameMethodNames(benchMethod, methodName))
4763        {
4764          if (benchProps.IsEmpty()
4765              || (benchProps == "x5" && method.PropsString.IsEmpty())
4766              || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
4767          {
4768            callback.BenchProps.EncComplex = h.EncComplex;
4769            callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
4770            callback.BenchProps.DecComplexUnc = h.DecComplexUnc;
4771            needSetComplexity = false;
4772            break;
4773          }
4774        }
4775      }
4776      /*
4777      if (i == Z7_ARRAY_SIZE(g_Bench))
4778        return E_NOTIMPL;
4779      */
4780    }
4781    if (needSetComplexity)
4782      callback.BenchProps.SetLzmaCompexity();
4783
4784  if (startDicLog < kBenchMinDicLogSize)
4785    startDicLog = kBenchMinDicLogSize;
4786
4787  for (unsigned i = 0; i < numIterations; i++)
4788  {
4789    unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
4790    if (!multiDict)
4791      pow = 32;
4792    while (GetDictSizeFromLog(pow) > dict && pow > 0)
4793      pow--;
4794    for (; GetDictSizeFromLog(pow) <= dict; pow++)
4795    {
4796      Print_Pow(f, pow);
4797      callback.DictSize = (UInt64)1 << pow;
4798
4799      COneMethodInfo method2 = method;
4800
4801      if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4802      {
4803        // We add dictionary size property.
4804        // method2 can have two different dictionary size properties.
4805        // And last property is main.
4806        NCOM::CPropVariant propVariant = (UInt32)pow;
4807        RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4808      }
4809
4810      size_t uncompressedDataSize;
4811      if (use_fileData)
4812      {
4813        uncompressedDataSize = fileDataBuffer.Size();
4814      }
4815      else
4816      {
4817        uncompressedDataSize = (size_t)callback.DictSize;
4818        if (uncompressedDataSize != callback.DictSize)
4819          return E_OUTOFMEMORY;
4820        if (uncompressedDataSize >= (1 << 18))
4821          uncompressedDataSize += kAdditionalSize;
4822      }
4823
4824      const HRESULT res = MethodBench(
4825          EXTERNAL_CODECS_LOC_VARS
4826          complexInCommands,
4827        #ifndef Z7_ST
4828          true, numThreads,
4829          &affinityMode,
4830        #endif
4831          method2,
4832          uncompressedDataSize, (const Byte *)fileDataBuffer,
4833          kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
4834      f.NewLine();
4835      RINOK(res)
4836      if (!multiDict)
4837        break;
4838    }
4839  }
4840  }
4841
4842  PrintChars(f, '-', callback.NameFieldSize + fileldSize);
4843
4844  if (use2Columns)
4845  {
4846    f.Print(kSep);
4847    PrintChars(f, '-', fileldSize);
4848  }
4849
4850  f.NewLine();
4851
4852  if (use2Columns)
4853  {
4854    PrintLeft(f, "Avr:", callback.NameFieldSize);
4855    PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
4856    f.Print(kSep);
4857    PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
4858    f.NewLine();
4859  }
4860
4861  PrintLeft(f, "Tot:", callback.NameFieldSize);
4862  CTotalBenchRes midRes;
4863  midRes = callback.EncodeRes;
4864  midRes.Update_With_Res(callback.DecodeRes);
4865
4866  // midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
4867  PrintTotals(f, showFreq, cpuFreq, false, midRes);
4868  f.NewLine();
4869
4870  }
4871  return S_OK;
4872}
4873