1 // Bench.cpp
2
3 #include "StdAfx.h"
4
5 #include "../../../../C/CpuArch.h"
6
7 // #include <stdio.h>
8
9 #ifndef _WIN32
10
11 #define USE_POSIX_TIME
12 #define USE_POSIX_TIME2
13 #endif // _WIN32
14
15 #ifdef USE_POSIX_TIME
16 #include <time.h>
17 #include <unistd.h>
18 #ifdef USE_POSIX_TIME2
19 #include <sys/time.h>
20 #include <sys/times.h>
21 #endif
22 #endif // USE_POSIX_TIME
23
24 #ifdef _WIN32
25 #define USE_ALLOCA
26 #endif
27
28 #ifdef USE_ALLOCA
29 #ifdef _WIN32
30 #include <malloc.h>
31 #else
32 #include <stdlib.h>
33 #endif
34 #endif
35
36 #include "../../../../C/7zCrc.h"
37 #include "../../../../C/RotateDefs.h"
38
39 #ifndef Z7_ST
40 #include "../../../Windows/Synchronization.h"
41 #include "../../../Windows/Thread.h"
42 #endif
43
44 #include "../../../Windows/FileFind.h"
45 #include "../../../Windows/FileIO.h"
46 #include "../../../Windows/SystemInfo.h"
47
48 #include "../../../Common/MyBuffer2.h"
49 #include "../../../Common/IntToString.h"
50 #include "../../../Common/StringConvert.h"
51 #include "../../../Common/StringToInt.h"
52 #include "../../../Common/Wildcard.h"
53
54 #include "../../Common/MethodProps.h"
55 #include "../../Common/StreamObjects.h"
56 #include "../../Common/StreamUtils.h"
57
58 #include "Bench.h"
59
60 using namespace NWindows;
61
62 #ifndef Z7_ST
63 static const UInt32 k_LZMA = 0x030101;
64 #endif
65
66 static const UInt64 kComplexInCommands = (UInt64)1 <<
67 #ifdef UNDER_CE
68 31;
69 #else
70 34;
71 #endif
72
73 static const UInt32 kComplexInMs = 4000;
74
SetComplexCommandsMs(UInt32 complexInMs, bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)75 static void SetComplexCommandsMs(UInt32 complexInMs,
76 bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
77 {
78 complexInCommands = kComplexInCommands;
79 const UInt64 kMinFreq = (UInt64)1000000 * 4;
80 const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
81 if (cpuFreq < kMinFreq && !isSpecifiedFreq)
82 cpuFreq = kMinFreq;
83 if (cpuFreq < kMaxFreq || isSpecifiedFreq)
84 {
85 if (complexInMs != 0)
86 complexInCommands = complexInMs * cpuFreq / 1000;
87 else
88 complexInCommands = cpuFreq >> 2;
89 }
90 }
91
92 // const UInt64 kBenchmarkUsageMult = 1000000; // for debug
93 static const unsigned kBenchmarkUsageMultBits = 16;
94 static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits;
95
Benchmark_GetUsage_Percents(UInt64 usage)96 UInt64 Benchmark_GetUsage_Percents(UInt64 usage)
97 {
98 return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult;
99 }
100
101 static const unsigned kNumHashDictBits = 17;
102 static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test
103
104 static const unsigned kOldLzmaDictBits = 32;
105
106 // static const size_t kAdditionalSize = (size_t)1 << 32; // for debug
107 static const size_t kAdditionalSize = (size_t)1 << 16;
108 static const UInt32 kCompressedAdditionalSize = (1 << 10);
109
110 static const UInt32 kMaxMethodPropSize = (1 << 6);
111
112
113 #define ALLOC_WITH_HRESULT(_buffer_, _size_) \
114 { (_buffer_)->Alloc(_size_); \
115 if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; }
116
117
118 class CBaseRandomGenerator
119 {
120 UInt32 A1;
121 UInt32 A2;
122 UInt32 Salt;
123 public:
CBaseRandomGenerator(UInt32 salt = 0)124 CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); }
Init()125 void Init() { A1 = 362436069; A2 = 521288629;}
126 Z7_FORCE_INLINE
GetRnd()127 UInt32 GetRnd()
128 {
129 return Salt ^
130 (
131 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
132 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) )
133 );
134 }
135 };
136
137
138 Z7_NO_INLINE
RandGen(Byte *buf, size_t size)139 static void RandGen(Byte *buf, size_t size)
140 {
141 CBaseRandomGenerator RG;
142 const size_t size4 = size & ~((size_t)3);
143 size_t i;
144 for (i = 0; i < size4; i += 4)
145 {
146 const UInt32 v = RG.GetRnd();
147 SetUi32(buf + i, v)
148 }
149 UInt32 v = RG.GetRnd();
150 for (; i < size; i++)
151 {
152 buf[i] = (Byte)v;
153 v >>= 8;
154 }
155 }
156
157
158 class CBenchRandomGenerator: public CMidAlignedBuffer
159 {
GetVal(UInt32 &res, unsigned numBits)160 static UInt32 GetVal(UInt32 &res, unsigned numBits)
161 {
162 UInt32 val = res & (((UInt32)1 << numBits) - 1);
163 res >>= numBits;
164 return val;
165 }
166
GetLen(UInt32 &r)167 static UInt32 GetLen(UInt32 &r)
168 {
169 UInt32 len = GetVal(r, 2);
170 return GetVal(r, 1 + len);
171 }
172
173 public:
174
GenerateSimpleRandom(UInt32 salt)175 void GenerateSimpleRandom(UInt32 salt)
176 {
177 CBaseRandomGenerator rg(salt);
178 const size_t bufSize = Size();
179 Byte *buf = (Byte *)*this;
180 for (size_t i = 0; i < bufSize; i++)
181 buf[i] = (Byte)rg.GetRnd();
182 }
183
GenerateLz(unsigned dictBits, UInt32 salt)184 void GenerateLz(unsigned dictBits, UInt32 salt)
185 {
186 CBaseRandomGenerator rg(salt);
187 size_t pos = 0;
188 size_t rep0 = 1;
189 const size_t bufSize = Size();
190 Byte *buf = (Byte *)*this;
191 unsigned posBits = 1;
192
193 // printf("\n dictBits = %d\n", (UInt32)dictBits);
194 // printf("\n bufSize = 0x%p\n", (const void *)bufSize);
195
196 while (pos < bufSize)
197 {
198 /*
199 if (pos >= ((UInt32)1 << 31))
200 printf(" %x\n", pos);
201 */
202 UInt32 r = rg.GetRnd();
203 if (GetVal(r, 1) == 0 || pos < 1024)
204 buf[pos++] = (Byte)(r & 0xFF);
205 else
206 {
207 UInt32 len;
208 len = 1 + GetLen(r);
209
210 if (GetVal(r, 3) != 0)
211 {
212 len += GetLen(r);
213
214 while (((size_t)1 << posBits) < pos)
215 posBits++;
216
217 unsigned numBitsMax = dictBits;
218 if (numBitsMax > posBits)
219 numBitsMax = posBits;
220
221 const unsigned kAddBits = 6;
222 unsigned numLogBits = 5;
223 if (numBitsMax <= (1 << 4) - 1 + kAddBits)
224 numLogBits = 4;
225
226 for (;;)
227 {
228 const UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
229 r = rg.GetRnd();
230 if (ppp > numBitsMax)
231 continue;
232 // rep0 = GetVal(r, ppp);
233 rep0 = r & (((size_t)1 << ppp) - 1);
234 if (rep0 < pos)
235 break;
236 r = rg.GetRnd();
237 }
238 rep0++;
239 }
240
241 // len *= 300; // for debug
242 {
243 const size_t rem = bufSize - pos;
244 if (len > rem)
245 len = (UInt32)rem;
246 }
247 Byte *dest = buf + pos;
248 const Byte *src = dest - rep0;
249 pos += len;
250 for (UInt32 i = 0; i < len; i++)
251 *dest++ = *src++;
252 }
253 }
254 // printf("\n CRC = %x\n", CrcCalc(buf, bufSize));
255 }
256 };
257
258
259 Z7_CLASS_IMP_NOQIB_1(
260 CBenchmarkInStream
261 , ISequentialInStream
262 )
263 const Byte *Data;
264 size_t Pos;
265 size_t Size;
266 public:
Init(const Byte *data, size_t size)267 void Init(const Byte *data, size_t size)
268 {
269 Data = data;
270 Size = size;
271 Pos = 0;
272 }
WasFinished() const273 bool WasFinished() const { return Pos == Size; }
274 };
275
276 Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize))
277 {
278 const UInt32 kMaxBlockSize = (1 << 20);
279 if (size > kMaxBlockSize)
280 size = kMaxBlockSize;
281 const size_t remain = Size - Pos;
282 if (size > remain)
283 size = (UInt32)remain;
284
285 if (size != 0)
286 memcpy(data, Data + Pos, size);
287
288 Pos += size;
289 if (processedSize)
290 *processedSize = size;
291 return S_OK;
292 }
293
294
295 class CBenchmarkOutStream Z7_final:
296 public ISequentialOutStream,
297 public CMyUnknownImp,
298 public CMidAlignedBuffer
299 {
300 Z7_COM_UNKNOWN_IMP_0
301 Z7_IFACE_COM7_IMP(ISequentialOutStream)
302 // bool _overflow;
303 public:
304 size_t Pos;
305 bool RealCopy;
306 bool CalcCrc;
307 UInt32 Crc;
308
309 // CBenchmarkOutStream(): _overflow(false) {}
310 void Init(bool realCopy, bool calcCrc)
311 {
312 Crc = CRC_INIT_VAL;
313 RealCopy = realCopy;
314 CalcCrc = calcCrc;
315 // _overflow = false;
316 Pos = 0;
317 }
318
319 void InitCrc()
320 {
321 Crc = CRC_INIT_VAL;
322 }
323
324 void Calc(const void *data, size_t size)
325 {
326 Crc = CrcUpdate(Crc, data, size);
327 }
328
329 size_t GetPos() const { return Pos; }
330
331 // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
332 };
333
334 Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
335 {
336 size_t curSize = Size() - Pos;
337 if (curSize > size)
338 curSize = size;
339 if (curSize != 0)
340 {
341 if (RealCopy)
342 memcpy(((Byte *)*this) + Pos, data, curSize);
343 if (CalcCrc)
344 Calc(data, curSize);
345 Pos += curSize;
346 }
347 if (processedSize)
348 *processedSize = (UInt32)curSize;
349 if (curSize != size)
350 {
351 // _overflow = true;
352 return E_FAIL;
353 }
354 return S_OK;
355 }
356
357
358 Z7_CLASS_IMP_NOQIB_1(
359 CCrcOutStream
360 , ISequentialOutStream
361 )
362 public:
363 bool CalcCrc;
364 UInt32 Crc;
365 UInt64 Pos;
366
367 CCrcOutStream(): CalcCrc(true) {}
368 void Init() { Crc = CRC_INIT_VAL; Pos = 0; }
369 void Calc(const void *data, size_t size)
370 {
371 Crc = CrcUpdate(Crc, data, size);
372 }
373 };
374
375 Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize))
376 {
377 if (CalcCrc)
378 Calc(data, size);
379 Pos += size;
380 if (processedSize)
381 *processedSize = size;
382 return S_OK;
383 }
384
385 // #include "../../../../C/My_sys_time.h"
386
387 static UInt64 GetTimeCount()
388 {
389 #ifdef USE_POSIX_TIME
390 #ifdef USE_POSIX_TIME2
391 timeval v;
392 if (gettimeofday(&v, NULL) == 0)
393 return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec;
394 return (UInt64)time(NULL) * 1000000;
395 #else
396 return time(NULL);
397 #endif
398 #else
399 LARGE_INTEGER value;
400 if (::QueryPerformanceCounter(&value))
401 return (UInt64)value.QuadPart;
402 return GetTickCount();
403 #endif
404 }
405
406 static UInt64 GetFreq()
407 {
408 #ifdef USE_POSIX_TIME
409 #ifdef USE_POSIX_TIME2
410 return 1000000;
411 #else
412 return 1;
413 #endif
414 #else
415 LARGE_INTEGER value;
416 if (::QueryPerformanceFrequency(&value))
417 return (UInt64)value.QuadPart;
418 return 1000;
419 #endif
420 }
421
422
423 #ifdef USE_POSIX_TIME
424
425 struct CUserTime
426 {
427 UInt64 Sum;
428 clock_t Prev;
429
430 void Init()
431 {
432 // Prev = clock();
433 Sum = 0;
434 Prev = 0;
435 Update();
436 Sum = 0;
437 }
438
439 void Update()
440 {
441 tms t;
442 /* clock_t res = */ times(&t);
443 clock_t newVal = t.tms_utime + t.tms_stime;
444 Sum += (UInt64)(newVal - Prev);
445 Prev = newVal;
446
447 /*
448 clock_t v = clock();
449 if (v != -1)
450 {
451 Sum += v - Prev;
452 Prev = v;
453 }
454 */
455 }
456 UInt64 GetUserTime()
457 {
458 Update();
459 return Sum;
460 }
461 };
462
463 #else
464
465
466 struct CUserTime
467 {
468 bool UseTick;
469 DWORD Prev_Tick;
470 UInt64 Prev;
471 UInt64 Sum;
472
473 void Init()
474 {
475 UseTick = false;
476 Prev_Tick = 0;
477 Prev = 0;
478 Sum = 0;
479 Update();
480 Sum = 0;
481 }
482 UInt64 GetUserTime()
483 {
484 Update();
485 return Sum;
486 }
487 void Update();
488 };
489
490 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
491
492 void CUserTime::Update()
493 {
494 DWORD new_Tick = GetTickCount();
495 FILETIME creationTime, exitTime, kernelTime, userTime;
496 if (!UseTick &&
497 #ifdef UNDER_CE
498 ::GetThreadTimes(::GetCurrentThread()
499 #else
500 ::GetProcessTimes(::GetCurrentProcess()
501 #endif
502 , &creationTime, &exitTime, &kernelTime, &userTime))
503 {
504 UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime);
505 Sum += newVal - Prev;
506 Prev = newVal;
507 }
508 else
509 {
510 UseTick = true;
511 Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000;
512 }
513 Prev_Tick = new_Tick;
514 }
515
516
517 #endif
518
519 static UInt64 GetUserFreq()
520 {
521 #ifdef USE_POSIX_TIME
522 // return CLOCKS_PER_SEC;
523 return (UInt64)sysconf(_SC_CLK_TCK);
524 #else
525 return 10000000;
526 #endif
527 }
528
529 class CBenchProgressStatus Z7_final
530 {
531 #ifndef Z7_ST
532 NSynchronization::CCriticalSection CS;
533 #endif
534 public:
535 HRESULT Res;
536 bool EncodeMode;
537 void SetResult(HRESULT res)
538 {
539 #ifndef Z7_ST
540 NSynchronization::CCriticalSectionLock lock(CS);
541 #endif
542 Res = res;
543 }
544 HRESULT GetResult()
545 {
546 #ifndef Z7_ST
547 NSynchronization::CCriticalSectionLock lock(CS);
548 #endif
549 return Res;
550 }
551 };
552
553 struct CBenchInfoCalc
554 {
555 CBenchInfo BenchInfo;
556 CUserTime UserTime;
557
558 void SetStartTime();
559 void SetFinishTime(CBenchInfo &dest);
560 };
561
562 void CBenchInfoCalc::SetStartTime()
563 {
564 BenchInfo.GlobalFreq = GetFreq();
565 BenchInfo.UserFreq = GetUserFreq();
566 BenchInfo.GlobalTime = ::GetTimeCount();
567 BenchInfo.UserTime = 0;
568 UserTime.Init();
569 }
570
571 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
572 {
573 dest = BenchInfo;
574 dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
575 dest.UserTime = UserTime.GetUserTime();
576 }
577
578 class CBenchProgressInfo Z7_final:
579 public ICompressProgressInfo,
580 public CMyUnknownImp,
581 public CBenchInfoCalc
582 {
583 Z7_COM_UNKNOWN_IMP_0
584 Z7_IFACE_COM7_IMP(ICompressProgressInfo)
585 public:
586 CBenchProgressStatus *Status;
587 IBenchCallback *Callback;
588
589 CBenchProgressInfo(): Callback(NULL) {}
590 };
591
592
593 Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize))
594 {
595 HRESULT res = Status->GetResult();
596 if (res != S_OK)
597 return res;
598 if (!Callback)
599 return res;
600
601 /*
602 static UInt64 inSizePrev = 0;
603 static UInt64 outSizePrev = 0;
604 UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0;
605 if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; }
606 if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; }
607 UInt64 percents = delta2 * 1000;
608 if (delta1 != 0)
609 percents /= delta1;
610 printf("=== %7d %7d %7d %7d ratio = %4d\n",
611 (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10),
612 (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10),
613 (unsigned)percents);
614 */
615
616 CBenchInfo info;
617 SetFinishTime(info);
618 if (Status->EncodeMode)
619 {
620 info.UnpackSize = BenchInfo.UnpackSize + *inSize;
621 info.PackSize = BenchInfo.PackSize + *outSize;
622 res = Callback->SetEncodeResult(info, false);
623 }
624 else
625 {
626 info.PackSize = BenchInfo.PackSize + *inSize;
627 info.UnpackSize = BenchInfo.UnpackSize + *outSize;
628 res = Callback->SetDecodeResult(info, false);
629 }
630 if (res != S_OK)
631 Status->SetResult(res);
632 return res;
633 }
634
635 static const unsigned kSubBits = 8;
636
637 static unsigned GetLogSize(UInt64 size)
638 {
639 unsigned i = 0;
640 for (;;)
641 {
642 i++; size >>= 1; if (size == 0) break;
643 }
644 return i;
645 }
646
647
648 static UInt32 GetLogSize_Sub(UInt64 size)
649 {
650 if (size <= 1)
651 return 0;
652 const unsigned i = GetLogSize(size) - 1;
653 UInt32 v;
654 if (i <= kSubBits)
655 v = (UInt32)(size) << (kSubBits - i);
656 else
657 v = (UInt32)(size >> (i - kSubBits));
658 return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1));
659 }
660
661
662 static UInt64 Get_UInt64_from_double(double v)
663 {
664 const UInt64 kMaxVal = (UInt64)1 << 62;
665 if (v > (double)(Int64)kMaxVal)
666 return kMaxVal;
667 return (UInt64)v;
668 }
669
670 static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d)
671 {
672 if (d == 0)
673 d = 1;
674 const double v =
675 (double)(Int64)m1 *
676 (double)(Int64)m2 /
677 (double)(Int64)d;
678 return Get_UInt64_from_double(v);
679 /*
680 unsigned n1 = GetLogSize(m1);
681 unsigned n2 = GetLogSize(m2);
682 while (n1 + n2 > 64)
683 {
684 if (n1 >= n2)
685 {
686 m1 >>= 1;
687 n1--;
688 }
689 else
690 {
691 m2 >>= 1;
692 n2--;
693 }
694 d >>= 1;
695 }
696
697 if (d == 0)
698 d = 1;
699 return m1 * m2 / d;
700 */
701 }
702
703
704 UInt64 CBenchInfo::GetUsage() const
705 {
706 UInt64 userTime = UserTime;
707 UInt64 userFreq = UserFreq;
708 UInt64 globalTime = GlobalTime;
709 UInt64 globalFreq = GlobalFreq;
710
711 if (userFreq == 0)
712 userFreq = 1;
713 if (globalTime == 0)
714 globalTime = 1;
715
716 const double v =
717 ((double)(Int64)userTime / (double)(Int64)userFreq)
718 * ((double)(Int64)globalFreq / (double)(Int64)globalTime)
719 * (double)(Int64)kBenchmarkUsageMult;
720 return Get_UInt64_from_double(v);
721 /*
722 return MyMultDiv64(
723 MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq),
724 globalFreq, globalTime);
725 */
726 }
727
728
729 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
730 {
731 if (UserTime == 0)
732 {
733 return 0;
734 // userTime = 1;
735 }
736 UInt64 globalFreq = GlobalFreq;
737 if (globalFreq == 0)
738 globalFreq = 1;
739
740 const double v =
741 ((double)(Int64)GlobalTime / (double)(Int64)globalFreq)
742 * ((double)(Int64)UserFreq / (double)(Int64)UserTime)
743 * (double)(Int64)rating;
744 return Get_UInt64_from_double(v);
745 /*
746 return MyMultDiv64(
747 MyMultDiv64(rating, UserFreq, UserTime),
748 GlobalTime, globalFreq);
749 */
750 }
751
752
753 UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const
754 {
755 return MyMultDiv64(numUnits, GlobalFreq, GlobalTime);
756 }
757
758 static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity)
759 {
760 return complexity >= 0 ?
761 size * (UInt32)complexity :
762 size / (UInt32)(-complexity);
763 }
764
765 struct CBenchProps
766 {
767 bool LzmaRatingMode;
768
769 Int32 EncComplex;
770 Int32 DecComplexCompr;
771 Int32 DecComplexUnc;
772
773 unsigned KeySize;
774
775 CBenchProps():
776 LzmaRatingMode(false),
777 KeySize(0)
778 {}
779
780 void SetLzmaCompexity();
781
782 UInt64 GetNumCommands_Enc(UInt64 unpackSize) const
783 {
784 const UInt32 kMinSize = 100;
785 if (unpackSize < kMinSize)
786 unpackSize = kMinSize;
787 return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex);
788 }
789
790 UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const
791 {
792 return
793 GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) +
794 GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc);
795 }
796
797 UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const;
798 UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const;
799 };
800
801 void CBenchProps::SetLzmaCompexity()
802 {
803 EncComplex = 1200;
804 DecComplexUnc = 4;
805 DecComplexCompr = 190;
806 LzmaRatingMode = true;
807 }
808
809 UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const
810 {
811 if (dictSize < (1 << kBenchMinDicLogSize))
812 dictSize = (1 << kBenchMinDicLogSize);
813 Int32 encComplex = EncComplex;
814 if (LzmaRatingMode)
815 {
816 /*
817 for (UInt64 uu = 0; uu < (UInt64)0xf << 60;)
818 {
819 unsigned rr = GetLogSize_Sub(uu);
820 printf("\n%16I64x , log = %4x", uu, rr);
821 uu += 1;
822 uu += uu / 50;
823 }
824 */
825 // throw 1;
826 const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits);
827 encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
828 }
829 const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex);
830 return MyMultDiv64(numCommands, freq, elapsedTime);
831 }
832
833 UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const
834 {
835 const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations;
836 return MyMultDiv64(numCommands, freq, elapsedTime);
837 }
838
839
840
841 UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const
842 {
843 CBenchProps props;
844 props.SetLzmaCompexity();
845 return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations);
846 }
847
848 UInt64 CBenchInfo::GetRating_LzmaDec() const
849 {
850 CBenchProps props;
851 props.SetLzmaCompexity();
852 return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations);
853 }
854
855
856 #ifndef Z7_ST
857
858 #define NUM_CPU_LEVELS_MAX 3
859
860 struct CAffinityMode
861 {
862 unsigned NumBundleThreads;
863 unsigned NumLevels;
864 unsigned NumCoreThreads;
865 unsigned NumCores;
866 // unsigned DivideNum;
867 UInt32 Sizes[NUM_CPU_LEVELS_MAX];
868
869 void SetLevels(unsigned numCores, unsigned numCoreThreads);
870 DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const;
871 bool NeedAffinity() const { return NumBundleThreads != 0; }
872
873 WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const
874 {
875 if (NeedAffinity())
876 {
877 CCpuSet cpuSet;
878 GetAffinityMask(bundleIndex, &cpuSet);
879 return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet);
880 }
881 return thread.Create(startAddress, parameter);
882 }
883
884 CAffinityMode():
885 NumBundleThreads(0),
886 NumLevels(0),
887 NumCoreThreads(1)
888 // DivideNum(1)
889 {}
890 };
891
892 void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads)
893 {
894 NumCores = numCores;
895 NumCoreThreads = numCoreThreads;
896 NumLevels = 0;
897 if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0)
898 return;
899 UInt32 c = numCores / numCoreThreads;
900 UInt32 c2 = 1;
901 while ((c & 1) == 0)
902 {
903 c >>= 1;
904 c2 <<= 1;
905 }
906 if (c2 != 1)
907 Sizes[NumLevels++] = c2;
908 if (c != 1)
909 Sizes[NumLevels++] = c;
910 if (numCoreThreads != 1)
911 Sizes[NumLevels++] = numCoreThreads;
912 if (NumLevels == 0)
913 Sizes[NumLevels++] = 1;
914
915 /*
916 printf("\n Cores:");
917 for (unsigned i = 0; i < NumLevels; i++)
918 {
919 printf(" %d", Sizes[i]);
920 }
921 printf("\n");
922 */
923 }
924
925
926 DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const
927 {
928 CpuSet_Zero(cpuSet);
929
930 if (NumLevels == 0)
931 return 0;
932
933 // printf("\n%2d", bundleIndex);
934
935 /*
936 UInt32 low = 0;
937 if (DivideNum != 1)
938 {
939 low = bundleIndex % DivideNum;
940 bundleIndex /= DivideNum;
941 }
942 */
943
944 UInt32 numGroups = NumCores / NumBundleThreads;
945 UInt32 m = bundleIndex % numGroups;
946 UInt32 v = 0;
947 for (unsigned i = 0; i < NumLevels; i++)
948 {
949 UInt32 size = Sizes[i];
950 while ((size & 1) == 0)
951 {
952 v *= 2;
953 v |= (m & 1);
954 m >>= 1;
955 size >>= 1;
956 }
957 v *= size;
958 v += m % size;
959 m /= size;
960 }
961
962 // UInt32 nb = NumBundleThreads / DivideNum;
963 UInt32 nb = NumBundleThreads;
964
965 DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1;
966 // v += low;
967 mask <<= v;
968
969 // printf(" %2d %8x \n ", v, (unsigned)mask);
970 #ifdef _WIN32
971 *cpuSet = mask;
972 #else
973 {
974 for (unsigned k = 0; k < nb; k++)
975 CpuSet_Set(cpuSet, v + k);
976 }
977 #endif
978
979 return mask;
980 }
981
982
983 struct CBenchSyncCommon
984 {
985 bool ExitMode;
986 NSynchronization::CManualResetEvent StartEvent;
987
988 CBenchSyncCommon(): ExitMode(false) {}
989 };
990
991 #endif
992
993
994
995 enum E_CheckCrcMode
996 {
997 k_CheckCrcMode_Never = 0,
998 k_CheckCrcMode_Always = 1,
999 k_CheckCrcMode_FirstPass = 2
1000 };
1001
1002 class CEncoderInfo;
1003
1004 class CEncoderInfo Z7_final
1005 {
1006 Z7_CLASS_NO_COPY(CEncoderInfo)
1007
1008 public:
1009
1010 #ifndef Z7_ST
1011 NWindows::CThread thread[2];
1012 NSynchronization::CManualResetEvent ReadyEvent;
1013 UInt32 NumDecoderSubThreads;
1014 CBenchSyncCommon *Common;
1015 UInt32 EncoderIndex;
1016 UInt32 NumEncoderInternalThreads;
1017 CAffinityMode AffinityMode;
1018 bool IsGlobalMtMode; // if more than one benchmark encoder threads
1019 #endif
1020
1021 CMyComPtr<ICompressCoder> _encoder;
1022 CMyComPtr<ICompressFilter> _encoderFilter;
1023 CBenchProgressInfo *progressInfoSpec[2];
1024 CMyComPtr<ICompressProgressInfo> progressInfo[2];
1025 UInt64 NumIterations;
1026
1027 UInt32 Salt;
1028
1029 #ifdef USE_ALLOCA
1030 size_t AllocaSize;
1031 #endif
1032
1033 unsigned KeySize;
1034 Byte _key[32];
1035 Byte _iv[16];
1036
1037 HRESULT Set_Key_and_IV(ICryptoProperties *cp)
1038 {
1039 RINOK(cp->SetKey(_key, KeySize))
1040 return cp->SetInitVector(_iv, sizeof(_iv));
1041 }
1042
1043 Byte _psw[16];
1044
1045 bool CheckCrc_Enc; /* = 1, if we want to check packed data crcs after each pass
1046 used for filter and usual coders */
1047 bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass
1048 used only for filter */
1049 E_CheckCrcMode CheckCrcMode_Dec;
1050
1051 struct CDecoderInfo
1052 {
1053 CEncoderInfo *Encoder;
1054 UInt32 DecoderIndex;
1055 bool CallbackMode;
1056
1057 #ifdef USE_ALLOCA
1058 size_t AllocaSize;
1059 #endif
1060 };
1061 CDecoderInfo decodersInfo[2];
1062
1063 CMyComPtr<ICompressCoder> _decoders[2];
1064 CMyComPtr<ICompressFilter> _decoderFilter;
1065
1066 HRESULT Results[2];
1067 CBenchmarkOutStream *outStreamSpec;
1068 CMyComPtr<ISequentialOutStream> outStream;
1069 IBenchCallback *callback;
1070 IBenchPrintCallback *printCallback;
1071 UInt32 crc;
1072 size_t kBufferSize;
1073 size_t compressedSize;
1074 const Byte *uncompressedDataPtr;
1075
1076 const Byte *fileData;
1077 CBenchRandomGenerator rg;
1078
1079 CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!!
1080
1081 // CBenchmarkOutStream *propStreamSpec;
1082 Byte propsData[kMaxMethodPropSize];
1083 CBufPtrSeqOutStream *propStreamSpec;
1084 CMyComPtr<ISequentialOutStream> propStream;
1085
1086 unsigned generateDictBits;
1087 COneMethodInfo _method;
1088
1089 // for decode
1090 size_t _uncompressedDataSize;
1091
1092 HRESULT Generate();
1093 HRESULT Encode();
1094 HRESULT Decode(UInt32 decoderIndex);
1095
1096 CEncoderInfo():
1097 #ifndef Z7_ST
1098 Common(NULL),
1099 IsGlobalMtMode(true),
1100 #endif
1101 Salt(0),
1102 KeySize(0),
1103 CheckCrc_Enc(true),
1104 UseRealData_Enc(true),
1105 CheckCrcMode_Dec(k_CheckCrcMode_Always),
1106 outStreamSpec(NULL),
1107 callback(NULL),
1108 printCallback(NULL),
1109 fileData(NULL),
1110 propStreamSpec(NULL)
1111 {}
1112
1113 #ifndef Z7_ST
1114
1115 static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
1116 {
1117 HRESULT res;
1118 CEncoderInfo *encoder = (CEncoderInfo *)param;
1119 try
1120 {
1121 #ifdef USE_ALLOCA
1122 alloca(encoder->AllocaSize);
1123 #endif
1124
1125 res = encoder->Encode();
1126 }
1127 catch(...)
1128 {
1129 res = E_FAIL;
1130 }
1131 encoder->Results[0] = res;
1132 if (res != S_OK)
1133 encoder->progressInfoSpec[0]->Status->SetResult(res);
1134 encoder->ReadyEvent.Set();
1135 return THREAD_FUNC_RET_ZERO;
1136 }
1137
1138 static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
1139 {
1140 CDecoderInfo *decoder = (CDecoderInfo *)param;
1141
1142 #ifdef USE_ALLOCA
1143 alloca(decoder->AllocaSize);
1144 #endif
1145
1146 CEncoderInfo *encoder = decoder->Encoder;
1147 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
1148 return THREAD_FUNC_RET_ZERO;
1149 }
1150
1151 HRESULT CreateEncoderThread()
1152 {
1153 WRes res = 0;
1154 if (!ReadyEvent.IsCreated())
1155 res = ReadyEvent.Create();
1156 if (res == 0)
1157 res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this,
1158 EncoderIndex);
1159 return HRESULT_FROM_WIN32(res);
1160 }
1161
1162 HRESULT CreateDecoderThread(unsigned index, bool callbackMode
1163 #ifdef USE_ALLOCA
1164 , size_t allocaSize
1165 #endif
1166 )
1167 {
1168 CDecoderInfo &decoder = decodersInfo[index];
1169 decoder.DecoderIndex = index;
1170 decoder.Encoder = this;
1171
1172 #ifdef USE_ALLOCA
1173 decoder.AllocaSize = allocaSize;
1174 #endif
1175
1176 decoder.CallbackMode = callbackMode;
1177
1178 WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder,
1179 // EncoderIndex * NumEncoderInternalThreads + index
1180 EncoderIndex
1181 );
1182
1183 return HRESULT_FROM_WIN32(res);
1184 }
1185
1186 #endif
1187 };
1188
1189
1190
1191
1192 static size_t GetBenchCompressedSize(size_t bufferSize)
1193 {
1194 return kCompressedAdditionalSize + bufferSize + bufferSize / 16;
1195 // kBufferSize / 2;
1196 }
1197
1198
1199 HRESULT CEncoderInfo::Generate()
1200 {
1201 const COneMethodInfo &method = _method;
1202
1203 // we need extra space, if input data is already compressed
1204 const size_t kCompressedBufferSize = _encoderFilter ?
1205 kBufferSize :
1206 GetBenchCompressedSize(kBufferSize);
1207
1208 if (kCompressedBufferSize < kBufferSize)
1209 return E_FAIL;
1210
1211 uncompressedDataPtr = fileData;
1212 if (fileData)
1213 {
1214 #if !defined(Z7_ST)
1215 if (IsGlobalMtMode)
1216 {
1217 /* we copy the data to local buffer of thread to eliminate
1218 using of shared buffer by different threads */
1219 ALLOC_WITH_HRESULT(&rg, kBufferSize)
1220 memcpy((Byte *)rg, fileData, kBufferSize);
1221 uncompressedDataPtr = (const Byte *)rg;
1222 }
1223 #endif
1224 }
1225 else
1226 {
1227 ALLOC_WITH_HRESULT(&rg, kBufferSize)
1228 // DWORD ttt = GetTickCount();
1229 if (generateDictBits == 0)
1230 rg.GenerateSimpleRandom(Salt);
1231 else
1232 {
1233 if (generateDictBits >= sizeof(size_t) * 8
1234 && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1)))
1235 return E_INVALIDARG;
1236 rg.GenerateLz(generateDictBits, Salt);
1237 // return E_ABORT; // for debug
1238 }
1239 // printf("\n%d\n ", GetTickCount() - ttt);
1240
1241 crc = CrcCalc((const Byte *)rg, rg.Size());
1242 uncompressedDataPtr = (const Byte *)rg;
1243 }
1244
1245 if (!outStream)
1246 {
1247 outStreamSpec = new CBenchmarkOutStream;
1248 outStream = outStreamSpec;
1249 }
1250
1251 ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize)
1252
1253 if (_encoderFilter)
1254 {
1255 /* we try to reduce the number of memcpy() in main encoding loop.
1256 so we copy data to temp buffers here */
1257 ALLOC_WITH_HRESULT(&rgCopy, kBufferSize)
1258 memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize);
1259 memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize);
1260 }
1261
1262 if (!propStream)
1263 {
1264 propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream;
1265 propStream = propStreamSpec;
1266 }
1267 // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize);
1268 // propStreamSpec->Init(true, false);
1269 propStreamSpec->Init(propsData, sizeof(propsData));
1270
1271
1272 CMyComPtr<IUnknown> coder;
1273 if (_encoderFilter)
1274 coder = _encoderFilter;
1275 else
1276 coder = _encoder;
1277 {
1278 CMyComPtr<ICompressSetCoderProperties> scp;
1279 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1280 if (scp)
1281 {
1282 const UInt64 reduceSize = kBufferSize;
1283
1284 /* in posix new thread uses same affinity as parent thread,
1285 so we don't need to send affinity to coder in posix */
1286 UInt64 affMask;
1287 #if !defined(Z7_ST) && defined(_WIN32)
1288 {
1289 CCpuSet cpuSet;
1290 affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet);
1291 }
1292 #else
1293 affMask = 0;
1294 #endif
1295 // affMask <<= 3; // debug line: to test no affinity in coder;
1296 // affMask = 0;
1297
1298 RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL)))
1299 }
1300 else
1301 {
1302 if (method.AreThereNonOptionalProps())
1303 return E_INVALIDARG;
1304 }
1305
1306 CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
1307 coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
1308 if (writeCoderProps)
1309 {
1310 RINOK(writeCoderProps->WriteCoderProperties(propStream))
1311 }
1312
1313 {
1314 CMyComPtr<ICryptoSetPassword> sp;
1315 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1316 if (sp)
1317 {
1318 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1319
1320 // we must call encoding one time to calculate password key for key cache.
1321 // it must be after WriteCoderProperties!
1322 Byte temp[16];
1323 memset(temp, 0, sizeof(temp));
1324
1325 if (_encoderFilter)
1326 {
1327 _encoderFilter->Init();
1328 _encoderFilter->Filter(temp, sizeof(temp));
1329 }
1330 else
1331 {
1332 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1333 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1334 inStreamSpec->Init(temp, sizeof(temp));
1335
1336 CCrcOutStream *crcStreamSpec = new CCrcOutStream;
1337 CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
1338 crcStreamSpec->Init();
1339
1340 RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL))
1341 }
1342 }
1343 }
1344 }
1345
1346 return S_OK;
1347 }
1348
1349
1350 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc)
1351 {
1352 while (size != 0)
1353 {
1354 UInt32 cur = crc ? 1 << 17 : 1 << 24;
1355 if (cur > size)
1356 cur = (UInt32)size;
1357 UInt32 processed = filter->Filter(data, cur);
1358 /* if (processed > size) (in AES filter), we must fill last block with zeros.
1359 but it is not important for benchmark. So we just copy that data without filtering.
1360 if (processed == 0) then filter can't process more */
1361 if (processed > size || processed == 0)
1362 processed = (UInt32)size;
1363 if (crc)
1364 *crc = CrcUpdate(*crc, data, processed);
1365 data += processed;
1366 size -= processed;
1367 }
1368 }
1369
1370
1371 HRESULT CEncoderInfo::Encode()
1372 {
1373 // printf("\nCEncoderInfo::Generate\n");
1374
1375 RINOK(Generate())
1376
1377 // printf("\n2222\n");
1378
1379 #ifndef Z7_ST
1380 if (Common)
1381 {
1382 Results[0] = S_OK;
1383 WRes wres = ReadyEvent.Set();
1384 if (wres == 0)
1385 wres = Common->StartEvent.Lock();
1386 if (wres != 0)
1387 return HRESULT_FROM_WIN32(wres);
1388 if (Common->ExitMode)
1389 return S_OK;
1390 }
1391 else
1392 #endif
1393 {
1394 CBenchProgressInfo *bpi = progressInfoSpec[0];
1395 bpi->SetStartTime();
1396 }
1397
1398
1399 CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
1400 bi.UnpackSize = 0;
1401 bi.PackSize = 0;
1402 CMyComPtr<ICryptoProperties> cp;
1403 CMyComPtr<IUnknown> coder;
1404 if (_encoderFilter)
1405 coder = _encoderFilter;
1406 else
1407 coder = _encoder;
1408 coder.QueryInterface(IID_ICryptoProperties, &cp);
1409 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1410 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1411
1412 if (cp)
1413 {
1414 RINOK(Set_Key_and_IV(cp))
1415 }
1416
1417 compressedSize = 0;
1418 if (_encoderFilter)
1419 compressedSize = kBufferSize;
1420
1421 // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec;
1422 UInt64 prev = 0;
1423
1424 const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF);
1425 const bool useCrc = (mask < NumIterations);
1426 bool crcPrev_defined = false;
1427 UInt32 crcPrev = 0;
1428
1429 bool useRealData_Enc = UseRealData_Enc;
1430 bool data_Was_Changed = false;
1431 if (useRealData_Enc)
1432 {
1433 /* we want memcpy() for each iteration including first iteration.
1434 So results will be equal for different number of iterations */
1435 data_Was_Changed = true;
1436 }
1437
1438 const UInt64 numIterations = NumIterations;
1439 UInt64 i = numIterations;
1440 // printCallback->NewLine();
1441
1442 while (i != 0)
1443 {
1444 i--;
1445 if (printCallback && bi.UnpackSize - prev >= (1 << 26))
1446 {
1447 prev = bi.UnpackSize;
1448 RINOK(printCallback->CheckBreak())
1449 }
1450
1451 /*
1452 CBenchInfo info;
1453 progressInfoSpec[0]->SetStartTime();
1454 */
1455
1456 bool calcCrc = false;
1457 if (useCrc)
1458 calcCrc = (((UInt32)i & mask) == 0);
1459
1460 if (_encoderFilter)
1461 {
1462 Byte *filterData = rgCopy;
1463 if (i == numIterations - 1 || calcCrc || useRealData_Enc)
1464 {
1465 filterData = (Byte *)*outStreamSpec;
1466 if (data_Was_Changed)
1467 memcpy(filterData, uncompressedDataPtr, kBufferSize);
1468 data_Was_Changed = true;
1469 }
1470 _encoderFilter->Init();
1471 if (calcCrc)
1472 outStreamSpec->InitCrc();
1473 My_FilterBench(_encoderFilter, filterData, kBufferSize,
1474 calcCrc ? &outStreamSpec->Crc : NULL);
1475 }
1476 else
1477 {
1478 outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations
1479 inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
1480 RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]))
1481 if (!inStreamSpec->WasFinished())
1482 return E_FAIL;
1483 if (compressedSize != outStreamSpec->Pos)
1484 {
1485 if (compressedSize != 0)
1486 return E_FAIL;
1487 compressedSize = outStreamSpec->Pos;
1488 }
1489 }
1490
1491 // outStreamSpec->Print();
1492
1493 if (calcCrc)
1494 {
1495 const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc);
1496 if (crcPrev_defined && crcPrev != crc2)
1497 return E_FAIL;
1498 crcPrev = crc2;
1499 crcPrev_defined = true;
1500 }
1501
1502 bi.UnpackSize += kBufferSize;
1503 bi.PackSize += compressedSize;
1504
1505 /*
1506 {
1507 progressInfoSpec[0]->SetFinishTime(info);
1508 info.UnpackSize = 0;
1509 info.PackSize = 0;
1510 info.NumIterations = 1;
1511
1512 info.UnpackSize = kBufferSize;
1513 info.PackSize = compressedSize;
1514 // printf("\n%7d\n", encoder.compressedSize);
1515
1516 RINOK(callback->SetEncodeResult(info, true))
1517 printCallback->NewLine();
1518 }
1519 */
1520
1521 }
1522
1523 _encoder.Release();
1524 _encoderFilter.Release();
1525 return S_OK;
1526 }
1527
1528
1529 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
1530 {
1531 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
1532 CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
1533 CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
1534 CMyComPtr<IUnknown> coder;
1535 if (_decoderFilter)
1536 {
1537 if (decoderIndex != 0)
1538 return E_FAIL;
1539 coder = _decoderFilter;
1540 }
1541 else
1542 coder = decoder;
1543
1544 CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
1545 coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
1546 if (!setDecProps && propStreamSpec->GetPos() != 0)
1547 return E_FAIL;
1548
1549 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
1550 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
1551
1552 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
1553 pi->BenchInfo.UnpackSize = 0;
1554 pi->BenchInfo.PackSize = 0;
1555
1556 #ifndef Z7_ST
1557 {
1558 CMyComPtr<ICompressSetCoderMt> setCoderMt;
1559 coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
1560 if (setCoderMt)
1561 {
1562 RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads))
1563 }
1564 }
1565 #endif
1566
1567 CMyComPtr<ICompressSetCoderProperties> scp;
1568 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
1569 if (scp)
1570 {
1571 const UInt64 reduceSize = _uncompressedDataSize;
1572 RINOK(_method.SetCoderProps(scp, &reduceSize))
1573 }
1574
1575 CMyComPtr<ICryptoProperties> cp;
1576 coder.QueryInterface(IID_ICryptoProperties, &cp);
1577
1578 if (setDecProps)
1579 {
1580 RINOK(setDecProps->SetDecoderProperties2(
1581 /* (const Byte *)*propStreamSpec, */
1582 propsData,
1583 (UInt32)propStreamSpec->GetPos()))
1584 }
1585
1586 {
1587 CMyComPtr<ICryptoSetPassword> sp;
1588 coder.QueryInterface(IID_ICryptoSetPassword, &sp);
1589 if (sp)
1590 {
1591 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)))
1592 }
1593 }
1594
1595 UInt64 prev = 0;
1596
1597 if (cp)
1598 {
1599 RINOK(Set_Key_and_IV(cp))
1600 }
1601
1602 CMyComPtr<ICompressSetFinishMode> setFinishMode;
1603
1604 if (_decoderFilter)
1605 {
1606 if (compressedSize > rgCopy.Size())
1607 return E_FAIL;
1608 }
1609 else
1610 {
1611 decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode);
1612 }
1613
1614 const UInt64 numIterations = NumIterations;
1615 const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec;
1616
1617 for (UInt64 i = 0; i < numIterations; i++)
1618 {
1619 if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26))
1620 {
1621 RINOK(printCallback->CheckBreak())
1622 prev = pi->BenchInfo.UnpackSize;
1623 }
1624
1625 const UInt64 outSize = kBufferSize;
1626 bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never);
1627
1628 crcOutStreamSpec->Init();
1629
1630 if (_decoderFilter)
1631 {
1632 Byte *filterData = (Byte *)*outStreamSpec;
1633 if (calcCrc)
1634 {
1635 calcCrc = (i == 0);
1636 if (checkCrcMode == k_CheckCrcMode_Always)
1637 {
1638 calcCrc = true;
1639 memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize);
1640 filterData = rgCopy;
1641 }
1642 }
1643 _decoderFilter->Init();
1644 My_FilterBench(_decoderFilter, filterData, compressedSize,
1645 calcCrc ? &crcOutStreamSpec->Crc : NULL);
1646 }
1647 else
1648 {
1649 crcOutStreamSpec->CalcCrc = calcCrc;
1650 inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize);
1651
1652 if (setFinishMode)
1653 {
1654 RINOK(setFinishMode->SetFinishMode(BoolToUInt(true)))
1655 }
1656
1657 RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex]))
1658
1659 if (setFinishMode)
1660 {
1661 if (!inStreamSpec->WasFinished())
1662 return S_FALSE;
1663
1664 CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize;
1665 decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize);
1666
1667 if (getInStreamProcessedSize)
1668 {
1669 UInt64 processed;
1670 RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed))
1671 if (processed != compressedSize)
1672 return S_FALSE;
1673 }
1674 }
1675
1676 if (crcOutStreamSpec->Pos != outSize)
1677 return S_FALSE;
1678 }
1679
1680 if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
1681 return S_FALSE;
1682
1683 pi->BenchInfo.UnpackSize += kBufferSize;
1684 pi->BenchInfo.PackSize += compressedSize;
1685 }
1686
1687 decoder.Release();
1688 _decoderFilter.Release();
1689 return S_OK;
1690 }
1691
1692
1693 static const UInt32 kNumThreadsMax = (1 << 12);
1694
1695 struct CBenchEncoders
1696 {
1697 CEncoderInfo *encoders;
1698 CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; }
1699 ~CBenchEncoders() { delete []encoders; }
1700 };
1701
1702
1703 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
1704 {
1705 if (numCommands < (1 << 4))
1706 numCommands = (1 << 4);
1707 UInt64 res = complexInCommands / numCommands;
1708 return (res == 0 ? 1 : res);
1709 }
1710
1711
1712
1713 #ifndef Z7_ST
1714
1715 // ---------- CBenchThreadsFlusher ----------
1716
1717 struct CBenchThreadsFlusher
1718 {
1719 CBenchEncoders *EncodersSpec;
1720 CBenchSyncCommon Common;
1721 unsigned NumThreads;
1722 bool NeedClose;
1723
1724 CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {}
1725
1726 ~CBenchThreadsFlusher()
1727 {
1728 StartAndWait(true);
1729 }
1730
1731 WRes StartAndWait(bool exitMode = false);
1732 };
1733
1734
1735 WRes CBenchThreadsFlusher::StartAndWait(bool exitMode)
1736 {
1737 if (!NeedClose)
1738 return 0;
1739
1740 Common.ExitMode = exitMode;
1741 WRes res = Common.StartEvent.Set();
1742
1743 for (unsigned i = 0; i < NumThreads; i++)
1744 {
1745 NWindows::CThread &t = EncodersSpec->encoders[i].thread[0];
1746 if (t.IsCreated())
1747 {
1748 WRes res2 = t.Wait_Close();
1749 if (res == 0)
1750 res = res2;
1751 }
1752 }
1753 NeedClose = false;
1754 return res;
1755 }
1756
1757 #endif // Z7_ST
1758
1759
1760
1761 static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue)
1762 {
1763 for (size_t i = 0; i < size; i++)
1764 {
1765 data[i] = (Byte)startValue;
1766 startValue++;
1767 }
1768 }
1769
1770
1771
1772 static HRESULT MethodBench(
1773 DECL_EXTERNAL_CODECS_LOC_VARS
1774 UInt64 complexInCommands,
1775 #ifndef Z7_ST
1776 bool oldLzmaBenchMode,
1777 UInt32 numThreads,
1778 const CAffinityMode *affinityMode,
1779 #endif
1780 const COneMethodInfo &method2,
1781 size_t uncompressedDataSize,
1782 const Byte *fileData,
1783 unsigned generateDictBits,
1784
1785 IBenchPrintCallback *printCallback,
1786 IBenchCallback *callback,
1787 CBenchProps *benchProps)
1788 {
1789 COneMethodInfo method = method2;
1790 UInt64 methodId;
1791 UInt32 numStreams;
1792 bool isFilter;
1793 const int codecIndex = FindMethod_Index(
1794 EXTERNAL_CODECS_LOC_VARS
1795 method.MethodName, true,
1796 methodId, numStreams, isFilter);
1797 if (codecIndex < 0)
1798 return E_NOTIMPL;
1799 if (numStreams != 1)
1800 return E_INVALIDARG;
1801
1802 UInt32 numEncoderThreads = 1;
1803 UInt32 numSubDecoderThreads = 1;
1804
1805 #ifndef Z7_ST
1806 numEncoderThreads = numThreads;
1807
1808 if (oldLzmaBenchMode)
1809 if (methodId == k_LZMA)
1810 {
1811 if (numThreads == 1 && method.Get_NumThreads() < 0)
1812 method.AddProp_NumThreads(1);
1813 const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads();
1814 if (numThreads > 1 && numLzmaThreads > 1)
1815 {
1816 numEncoderThreads = (numThreads + 1) / 2; // 20.03
1817 numSubDecoderThreads = 2;
1818 }
1819 }
1820
1821 const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity();
1822
1823 #endif
1824
1825 CBenchEncoders encodersSpec(numEncoderThreads);
1826 CEncoderInfo *encoders = encodersSpec.encoders;
1827
1828 UInt32 i;
1829
1830 for (i = 0; i < numEncoderThreads; i++)
1831 {
1832 CEncoderInfo &encoder = encoders[i];
1833 encoder.callback = (i == 0) ? callback : NULL;
1834 encoder.printCallback = printCallback;
1835
1836 #ifndef Z7_ST
1837 encoder.EncoderIndex = i;
1838 encoder.NumEncoderInternalThreads = numSubDecoderThreads;
1839 encoder.AffinityMode = *affinityMode;
1840
1841 /*
1842 if (numSubDecoderThreads > 1)
1843 if (encoder.AffinityMode.NeedAffinity()
1844 && encoder.AffinityMode.NumBundleThreads == 1)
1845 {
1846 // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one
1847 if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores)
1848 encoder.AffinityMode.NumBundleThreads *= 2;
1849 }
1850 */
1851
1852 #endif
1853
1854 {
1855 CCreatedCoder cod;
1856 RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod))
1857 encoder._encoder = cod.Coder;
1858 if (!encoder._encoder && !encoder._encoderFilter)
1859 return E_NOTIMPL;
1860 }
1861
1862 SetPseudoRand(encoder._iv, sizeof(encoder._iv), 17);
1863 SetPseudoRand(encoder._key, sizeof(encoder._key), 51);
1864 SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123);
1865
1866 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
1867 {
1868 CCreatedCoder cod;
1869 CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
1870 RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod))
1871 decoder = cod.Coder;
1872 if (!encoder._decoderFilter && !decoder)
1873 return E_NOTIMPL;
1874 }
1875
1876 encoder.UseRealData_Enc =
1877 encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30;
1878
1879 encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1880 if (benchProps->DecComplexCompr +
1881 benchProps->DecComplexUnc <= 30)
1882 encoder.CheckCrcMode_Dec =
1883 k_CheckCrcMode_FirstPass; // for filters
1884 // k_CheckCrcMode_Never; // for debug
1885 // k_CheckCrcMode_Always; // for debug
1886 if (fileData)
1887 {
1888 encoder.UseRealData_Enc = true;
1889 encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always;
1890 }
1891 }
1892
1893 UInt32 crc = 0;
1894 if (fileData)
1895 crc = CrcCalc(fileData, uncompressedDataSize);
1896
1897 for (i = 0; i < numEncoderThreads; i++)
1898 {
1899 CEncoderInfo &encoder = encoders[i];
1900 encoder._method = method;
1901 encoder.generateDictBits = generateDictBits;
1902 encoder._uncompressedDataSize = uncompressedDataSize;
1903 encoder.kBufferSize = uncompressedDataSize;
1904 encoder.fileData = fileData;
1905 encoder.crc = crc;
1906 }
1907
1908 CBenchProgressStatus status;
1909 status.Res = S_OK;
1910 status.EncodeMode = true;
1911
1912 #ifndef Z7_ST
1913 CBenchThreadsFlusher encoderFlusher;
1914 if (mtEncMode)
1915 {
1916 WRes wres = encoderFlusher.Common.StartEvent.Create();
1917 if (wres != 0)
1918 return HRESULT_FROM_WIN32(wres);
1919 encoderFlusher.NumThreads = numEncoderThreads;
1920 encoderFlusher.EncodersSpec = &encodersSpec;
1921 encoderFlusher.NeedClose = true;
1922 }
1923 #endif
1924
1925 for (i = 0; i < numEncoderThreads; i++)
1926 {
1927 CEncoderInfo &encoder = encoders[i];
1928 encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands);
1929 // encoder.NumIterations = 3;
1930 encoder.Salt = g_CrcTable[i & 0xFF];
1931 encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3);
1932 // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread
1933 // printf(" %8x", encoder.Salt);
1934
1935 encoder.KeySize = benchProps->KeySize;
1936
1937 for (int j = 0; j < 2; j++)
1938 {
1939 CBenchProgressInfo *spec = new CBenchProgressInfo;
1940 encoder.progressInfoSpec[j] = spec;
1941 encoder.progressInfo[j] = spec;
1942 spec->Status = &status;
1943 }
1944
1945 if (i == 0)
1946 {
1947 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
1948 bpi->Callback = callback;
1949 bpi->BenchInfo.NumIterations = numEncoderThreads;
1950 }
1951
1952 #ifndef Z7_ST
1953 if (mtEncMode)
1954 {
1955 #ifdef USE_ALLOCA
1956 encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
1957 #endif
1958
1959 encoder.Common = &encoderFlusher.Common;
1960 encoder.IsGlobalMtMode = numEncoderThreads > 1;
1961 RINOK(encoder.CreateEncoderThread())
1962 }
1963 #endif
1964 }
1965
1966 if (printCallback)
1967 {
1968 RINOK(printCallback->CheckBreak())
1969 }
1970
1971 #ifndef Z7_ST
1972 if (mtEncMode)
1973 {
1974 for (i = 0; i < numEncoderThreads; i++)
1975 {
1976 CEncoderInfo &encoder = encoders[i];
1977 const WRes wres = encoder.ReadyEvent.Lock();
1978 if (wres != 0)
1979 return HRESULT_FROM_WIN32(wres);
1980 RINOK(encoder.Results[0])
1981 }
1982
1983 CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0];
1984 bpi->SetStartTime();
1985
1986 const WRes wres = encoderFlusher.StartAndWait();
1987 if (status.Res == 0 && wres != 0)
1988 return HRESULT_FROM_WIN32(wres);
1989 }
1990 else
1991 #endif
1992 {
1993 RINOK(encoders[0].Encode())
1994 }
1995
1996 RINOK(status.Res)
1997
1998 CBenchInfo info;
1999
2000 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2001 info.UnpackSize = 0;
2002 info.PackSize = 0;
2003 info.NumIterations = encoders[0].NumIterations;
2004
2005 for (i = 0; i < numEncoderThreads; i++)
2006 {
2007 const CEncoderInfo &encoder = encoders[i];
2008 info.UnpackSize += encoder.kBufferSize;
2009 info.PackSize += encoder.compressedSize;
2010 // printf("\n%7d\n", encoder.compressedSize);
2011 }
2012
2013 RINOK(callback->SetEncodeResult(info, true))
2014
2015
2016
2017
2018 // ---------- Decode ----------
2019
2020 status.Res = S_OK;
2021 status.EncodeMode = false;
2022
2023 const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
2024 #ifndef Z7_ST
2025 const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity();
2026 #endif
2027
2028 for (i = 0; i < numEncoderThreads; i++)
2029 {
2030 CEncoderInfo &encoder = encoders[i];
2031
2032 /*
2033 #ifndef Z7_ST
2034 // encoder.affinityMode = *affinityMode;
2035 if (encoder.NumEncoderInternalThreads != 1)
2036 encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads;
2037 #endif
2038 */
2039
2040
2041 if (i == 0)
2042 {
2043 encoder.NumIterations = GetNumIterations(
2044 benchProps->GetNumCommands_Dec(
2045 encoder.compressedSize,
2046 encoder.kBufferSize),
2047 complexInCommands);
2048 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
2049 bpi->Callback = callback;
2050 bpi->BenchInfo.NumIterations = numDecoderThreads;
2051 bpi->SetStartTime();
2052 }
2053 else
2054 encoder.NumIterations = encoders[0].NumIterations;
2055
2056 #ifndef Z7_ST
2057 {
2058 int numSubThreads = method.Get_NumThreads();
2059 encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads;
2060 }
2061 if (mtDecoderMode)
2062 {
2063 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2064 {
2065 const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
2066 #ifdef USE_ALLOCA
2067 , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
2068 #endif
2069 );
2070 RINOK(res)
2071 }
2072 }
2073 else
2074 #endif
2075 {
2076 RINOK(encoder.Decode(0))
2077 }
2078 }
2079
2080 #ifndef Z7_ST
2081 if (mtDecoderMode)
2082 {
2083 WRes wres = 0;
2084 HRESULT res = S_OK;
2085 for (i = 0; i < numEncoderThreads; i++)
2086 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2087 {
2088 CEncoderInfo &encoder = encoders[i];
2089 const WRes wres2 = encoder.thread[j].
2090 // Wait(); // later we can get thread times from thread in UNDER_CE
2091 Wait_Close();
2092 if (wres == 0 && wres2 != 0)
2093 wres = wres2;
2094 const HRESULT res2 = encoder.Results[j];
2095 if (res == 0 && res2 != 0)
2096 res = res2;
2097 }
2098 if (wres != 0)
2099 return HRESULT_FROM_WIN32(wres);
2100 RINOK(res)
2101 }
2102 #endif // Z7_ST
2103
2104 RINOK(status.Res)
2105 encoders[0].progressInfoSpec[0]->SetFinishTime(info);
2106
2107 /*
2108 #ifndef Z7_ST
2109 #ifdef UNDER_CE
2110 if (mtDecoderMode)
2111 for (i = 0; i < numEncoderThreads; i++)
2112 for (UInt32 j = 0; j < numSubDecoderThreads; j++)
2113 {
2114 FILETIME creationTime, exitTime, kernelTime, userTime;
2115 if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
2116 info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
2117 }
2118 #endif
2119 #endif
2120 */
2121
2122 info.UnpackSize = 0;
2123 info.PackSize = 0;
2124 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
2125
2126 for (i = 0; i < numEncoderThreads; i++)
2127 {
2128 const CEncoderInfo &encoder = encoders[i];
2129 info.UnpackSize += encoder.kBufferSize;
2130 info.PackSize += encoder.compressedSize;
2131 }
2132
2133 // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ??
2134 RINOK(callback->SetDecodeResult(info, true))
2135
2136 return S_OK;
2137 }
2138
2139
2140
2141 static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog)
2142 {
2143 /*
2144 if (dictSizeLog < 32)
2145 return (UInt32)1 << dictSizeLog;
2146 else
2147 return (UInt32)(Int32)-1;
2148 */
2149 return (UInt64)1 << dictSizeLog;
2150 }
2151
2152
2153 // it's limit of current LZMA implementation that can be changed later
2154 #define kLzmaMaxDictSize ((UInt32)15 << 28)
2155
2156 static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict)
2157 {
2158 if (dict == 0)
2159 dict = 1;
2160 if (dict > kLzmaMaxDictSize)
2161 dict = kLzmaMaxDictSize;
2162 UInt32 hs = (UInt32)dict - 1;
2163 hs |= (hs >> 1);
2164 hs |= (hs >> 2);
2165 hs |= (hs >> 4);
2166 hs |= (hs >> 8);
2167 hs >>= 1;
2168 hs |= 0xFFFF;
2169 if (hs > (1 << 24))
2170 hs >>= 1;
2171 hs++;
2172 hs += (1 << 16);
2173
2174 const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16);
2175 UInt64 blockSize = (UInt64)dict + (1 << 16)
2176 + (multiThread ? (1 << 20) : 0);
2177 blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2));
2178 if (blockSize >= kBlockSizeMax)
2179 blockSize = kBlockSizeMax;
2180
2181 UInt64 son = (UInt64)dict;
2182 if (btMode)
2183 son *= 2;
2184 const UInt64 v = (hs + son) * 4 + blockSize +
2185 (1 << 20) + (multiThread ? (6 << 20) : 0);
2186
2187 // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20));
2188 // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20));
2189 return v;
2190 }
2191
2192
2193 UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench)
2194 {
2195 const size_t kBufferSize = (size_t)dictionary + kAdditionalSize;
2196 const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2;
2197 if (level < 0)
2198 level = 5;
2199 const int algo = (level < 5 ? 0 : 1);
2200 const int btMode = (algo == 0 ? 0 : 1);
2201
2202 UInt32 numBigThreads = numThreads;
2203 bool lzmaMt = (totalBench || (numThreads > 1 && btMode));
2204 if (btMode)
2205 {
2206 if (!totalBench && lzmaMt)
2207 numBigThreads /= 2;
2208 }
2209 return ((UInt64)kBufferSize + kCompressedBufferSize +
2210 GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads;
2211 }
2212
2213 static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary)
2214 {
2215 // dictionary += (dictionary >> 9); // for page tables (virtual memory)
2216 return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20);
2217 }
2218
2219
2220 // ---------- CRC and HASH ----------
2221
2222 struct CCrcInfo_Base
2223 {
2224 CMidAlignedBuffer Buffer;
2225 const Byte *Data;
2226 size_t Size;
2227 bool CreateLocalBuf;
2228 UInt32 CheckSum_Res;
2229
2230 CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {}
2231
2232 HRESULT Generate(const Byte *data, size_t size);
2233 HRESULT CrcProcess(UInt64 numIterations,
2234 const UInt32 *checkSum, IHasher *hf,
2235 IBenchPrintCallback *callback);
2236 };
2237
2238
2239 HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size)
2240 {
2241 Size = size;
2242 Data = data;
2243 if (!data || CreateLocalBuf)
2244 {
2245 ALLOC_WITH_HRESULT(&Buffer, size)
2246 Data = Buffer;
2247 }
2248 if (!data)
2249 RandGen(Buffer, size);
2250 else if (CreateLocalBuf && size != 0)
2251 memcpy(Buffer, data, size);
2252 return S_OK;
2253 }
2254
2255
2256 HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations,
2257 const UInt32 *checkSum, IHasher *hf,
2258 IBenchPrintCallback *callback)
2259 {
2260 MY_ALIGN(16)
2261 Byte hash[64];
2262 memset(hash, 0, sizeof(hash));
2263
2264 CheckSum_Res = 0;
2265
2266 const UInt32 hashSize = hf->GetDigestSize();
2267 if (hashSize > sizeof(hash))
2268 return S_FALSE;
2269
2270 const Byte *buf = Data;
2271 const size_t size = Size;
2272 UInt32 checkSum_Prev = 0;
2273
2274 UInt64 prev = 0;
2275 UInt64 cur = 0;
2276
2277 for (UInt64 i = 0; i < numIterations; i++)
2278 {
2279 hf->Init();
2280 size_t pos = 0;
2281 do
2282 {
2283 const size_t rem = size - pos;
2284 const UInt32 kStep = ((UInt32)1 << 31);
2285 const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep;
2286 hf->Update(buf + pos, curSize);
2287 pos += curSize;
2288 }
2289 while (pos != size);
2290
2291 hf->Final(hash);
2292 UInt32 sum = 0;
2293 for (UInt32 j = 0; j < hashSize; j += 4)
2294 {
2295 sum = rotlFixed(sum, 11);
2296 sum += GetUi32(hash + j);
2297 }
2298 if (checkSum)
2299 {
2300 if (sum != *checkSum)
2301 return S_FALSE;
2302 }
2303 else
2304 {
2305 checkSum_Prev = sum;
2306 checkSum = &checkSum_Prev;
2307 }
2308 if (callback)
2309 {
2310 cur += size;
2311 if (cur - prev >= ((UInt32)1 << 30))
2312 {
2313 prev = cur;
2314 RINOK(callback->CheckBreak())
2315 }
2316 }
2317 }
2318 CheckSum_Res = checkSum_Prev;
2319 return S_OK;
2320 }
2321
2322 extern
2323 UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization
2324 UInt32 g_BenchCpuFreqTemp = 1;
2325
2326 #define YY1 sum += val; sum ^= val;
2327 #define YY3 YY1 YY1 YY1 YY1
2328 #define YY5 YY3 YY3 YY3 YY3
2329 #define YY7 YY5 YY5 YY5 YY5
2330 static const UInt32 kNumFreqCommands = 128;
2331
2332 EXTERN_C_BEGIN
2333
2334 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
2335 {
2336 for (UInt32 i = 0; i < num; i++)
2337 {
2338 YY7
2339 }
2340 return sum;
2341 }
2342
2343 EXTERN_C_END
2344
2345
2346 #ifndef Z7_ST
2347
2348 struct CBaseThreadInfo
2349 {
2350 NWindows::CThread Thread;
2351 IBenchPrintCallback *Callback;
2352 HRESULT CallbackRes;
2353
2354 WRes Wait_If_Created()
2355 {
2356 if (!Thread.IsCreated())
2357 return 0;
2358 return Thread.Wait_Close();
2359 }
2360 };
2361
2362 struct CFreqInfo: public CBaseThreadInfo
2363 {
2364 UInt32 ValRes;
2365 UInt32 Size;
2366 UInt64 NumIterations;
2367 };
2368
2369 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
2370 {
2371 CFreqInfo *p = (CFreqInfo *)param;
2372
2373 UInt32 sum = g_BenchCpuFreqTemp;
2374 for (UInt64 k = p->NumIterations; k > 0; k--)
2375 {
2376 if (p->Callback)
2377 {
2378 p->CallbackRes = p->Callback->CheckBreak();
2379 if (p->CallbackRes != S_OK)
2380 break;
2381 }
2382 sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
2383 }
2384 p->ValRes = sum;
2385 return THREAD_FUNC_RET_ZERO;
2386 }
2387
2388 struct CFreqThreads
2389 {
2390 CFreqInfo *Items;
2391 UInt32 NumThreads;
2392
2393 CFreqThreads(): Items(NULL), NumThreads(0) {}
2394
2395 WRes WaitAll()
2396 {
2397 WRes wres = 0;
2398 for (UInt32 i = 0; i < NumThreads; i++)
2399 {
2400 WRes wres2 = Items[i].Wait_If_Created();
2401 if (wres == 0 && wres2 != 0)
2402 wres = wres2;
2403 }
2404 NumThreads = 0;
2405 return wres;
2406 }
2407
2408 ~CFreqThreads()
2409 {
2410 WaitAll();
2411 delete []Items;
2412 }
2413 };
2414
2415
2416 static THREAD_FUNC_DECL CrcThreadFunction(void *param);
2417
2418 struct CCrcInfo: public CBaseThreadInfo
2419 {
2420 const Byte *Data;
2421 size_t Size;
2422 UInt64 NumIterations;
2423 bool CheckSumDefined;
2424 UInt32 CheckSum;
2425 CMyComPtr<IHasher> Hasher;
2426 HRESULT Res;
2427 UInt32 CheckSum_Res;
2428
2429 #ifndef Z7_ST
2430 NSynchronization::CManualResetEvent ReadyEvent;
2431 UInt32 ThreadIndex;
2432 CBenchSyncCommon *Common;
2433 CAffinityMode AffinityMode;
2434 #endif
2435
2436 // we want to call CCrcInfo_Base::Buffer.Free() in main thread.
2437 // so we uses non-local CCrcInfo_Base.
2438 CCrcInfo_Base crcib;
2439
2440 HRESULT CreateThread()
2441 {
2442 WRes res = 0;
2443 if (!ReadyEvent.IsCreated())
2444 res = ReadyEvent.Create();
2445 if (res == 0)
2446 res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this,
2447 ThreadIndex);
2448 return HRESULT_FROM_WIN32(res);
2449 }
2450
2451 #ifdef USE_ALLOCA
2452 size_t AllocaSize;
2453 #endif
2454
2455 void Process();
2456
2457 CCrcInfo(): Res(E_FAIL) {}
2458 };
2459
2460 static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test
2461 // static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test
2462
2463 void CCrcInfo::Process()
2464 {
2465 crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File;
2466 // we can use additional Generate() passes to reduce some time effects for new page allocation
2467 // for (unsigned y = 0; y < 10; y++)
2468 Res = crcib.Generate(Data, Size);
2469
2470 // if (Common)
2471 {
2472 WRes wres = ReadyEvent.Set();
2473 if (wres != 0)
2474 {
2475 if (Res == 0)
2476 Res = HRESULT_FROM_WIN32(wres);
2477 return;
2478 }
2479 if (Res != 0)
2480 return;
2481
2482 wres = Common->StartEvent.Lock();
2483
2484 if (wres != 0)
2485 {
2486 Res = HRESULT_FROM_WIN32(wres);
2487 return;
2488 }
2489 if (Common->ExitMode)
2490 return;
2491 }
2492
2493 Res = crcib.CrcProcess(NumIterations,
2494 CheckSumDefined ? &CheckSum : NULL, Hasher,
2495 Callback);
2496 CheckSum_Res = crcib.CheckSum_Res;
2497 /*
2498 We don't want to include the time of slow CCrcInfo_Base::Buffer.Free()
2499 to time of benchmark. So we don't free Buffer here
2500 */
2501 // crcib.Buffer.Free();
2502 }
2503
2504
2505 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
2506 {
2507 CCrcInfo *p = (CCrcInfo *)param;
2508
2509 #ifdef USE_ALLOCA
2510 alloca(p->AllocaSize);
2511 #endif
2512 p->Process();
2513 return THREAD_FUNC_RET_ZERO;
2514 }
2515
2516
2517 struct CCrcThreads
2518 {
2519 CCrcInfo *Items;
2520 unsigned NumThreads;
2521 CBenchSyncCommon Common;
2522 bool NeedClose;
2523
2524 CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {}
2525
2526 WRes StartAndWait(bool exitMode = false);
2527
2528 ~CCrcThreads()
2529 {
2530 StartAndWait(true);
2531 delete []Items;
2532 }
2533 };
2534
2535
2536 WRes CCrcThreads::StartAndWait(bool exitMode)
2537 {
2538 if (!NeedClose)
2539 return 0;
2540
2541 Common.ExitMode = exitMode;
2542 WRes wres = Common.StartEvent.Set();
2543
2544 for (unsigned i = 0; i < NumThreads; i++)
2545 {
2546 WRes wres2 = Items[i].Wait_If_Created();
2547 if (wres == 0 && wres2 != 0)
2548 wres = wres2;
2549 }
2550 NumThreads = 0;
2551 NeedClose = false;
2552 return wres;
2553 }
2554
2555 #endif
2556
2557
2558 static UInt32 CrcCalc1(const Byte *buf, size_t size)
2559 {
2560 UInt32 crc = CRC_INIT_VAL;
2561 for (size_t i = 0; i < size; i++)
2562 crc = CRC_UPDATE_BYTE(crc, buf[i]);
2563 return CRC_GET_DIGEST(crc);
2564 }
2565
2566 /*
2567 static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG)
2568 {
2569 RandGen(buf, size, RG);
2570 return CrcCalc1(buf, size);
2571 }
2572 */
2573
2574 static bool CrcInternalTest()
2575 {
2576 CAlignedBuffer buffer;
2577 const size_t kBufferSize0 = (1 << 8);
2578 const size_t kBufferSize1 = (1 << 10);
2579 const unsigned kCheckSize = (1 << 5);
2580 buffer.Alloc(kBufferSize0 + kBufferSize1);
2581 if (!buffer.IsAllocated())
2582 return false;
2583 Byte *buf = (Byte *)buffer;
2584 size_t i;
2585 for (i = 0; i < kBufferSize0; i++)
2586 buf[i] = (Byte)i;
2587 UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
2588 if (crc1 != 0x29058C73)
2589 return false;
2590 RandGen(buf + kBufferSize0, kBufferSize1);
2591 for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
2592 for (unsigned j = 0; j < kCheckSize; j++)
2593 if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
2594 return false;
2595 return true;
2596 }
2597
2598 struct CBenchMethod
2599 {
2600 unsigned Weight;
2601 unsigned DictBits;
2602 Int32 EncComplex;
2603 Int32 DecComplexCompr;
2604 Int32 DecComplexUnc;
2605 const char *Name;
2606 // unsigned KeySize;
2607 };
2608
2609 // #define USE_SW_CMPLX
2610
2611 #ifdef USE_SW_CMPLX
2612 #define CMPLX(x) ((x) * 1000)
2613 #else
2614 #define CMPLX(x) (x)
2615 #endif
2616
2617 static const CBenchMethod g_Bench[] =
2618 {
2619 // { 40, 17, 357, 145, 20, "LZMA:x1" },
2620 // { 20, 18, 360, 145, 20, "LZMA2:x1:mt2" },
2621
2622 { 20, 18, 360, 145, 20, "LZMA:x1" },
2623 { 20, 22, 600, 145, 20, "LZMA:x3" },
2624
2625 { 80, 24, 1220, 145, 20, "LZMA:x5:mt1" },
2626 { 80, 24, 1220, 145, 20, "LZMA:x5:mt2" },
2627
2628 { 10, 16, 124, 40, 14, "Deflate:x1" },
2629 { 20, 16, 376, 40, 14, "Deflate:x5" },
2630 { 10, 16, 1082, 40, 14, "Deflate:x7" },
2631 { 10, 17, 422, 40, 14, "Deflate64:x5" },
2632
2633 { 10, 15, 590, 69, 69, "BZip2:x1" },
2634 { 20, 19, 815, 122, 122, "BZip2:x5" },
2635 { 10, 19, 815, 122, 122, "BZip2:x5:mt2" },
2636 { 10, 19, 2530, 122, 122, "BZip2:x7" },
2637
2638 // { 10, 18, 1010, 0, 1150, "PPMDZip:x1" },
2639 { 10, 18, 1010, 0, 1150, "PPMD:x1" },
2640 // { 10, 22, 1655, 0, 1830, "PPMDZip:x5" },
2641 { 10, 22, 1655, 0, 1830, "PPMD:x5" },
2642
2643 // { 2, 0, -16, 0, -16, "Swap2" },
2644 { 2, 0, -16, 0, -16, "Swap4" },
2645
2646 // { 2, 0, 3, 0, 4, "Delta:1" },
2647 // { 2, 0, 3, 0, 4, "Delta:2" },
2648 // { 2, 0, 3, 0, 4, "Delta:3" },
2649 { 2, 0, 3, 0, 4, "Delta:4" },
2650 // { 2, 0, 3, 0, 4, "Delta:8" },
2651 // { 2, 0, 3, 0, 4, "Delta:32" },
2652
2653 { 2, 0, 2, 0, 2, "BCJ" },
2654 { 2, 0, 1, 0, 1, "ARM64" },
2655
2656 // { 10, 0, 18, 0, 18, "AES128CBC:1" },
2657 // { 10, 0, 21, 0, 21, "AES192CBC:1" },
2658 { 10, 0, 24, 0, 24, "AES256CBC:1" },
2659
2660 // { 10, 0, 18, 0, 18, "AES128CTR:1" },
2661 // { 10, 0, 21, 0, 21, "AES192CTR:1" },
2662 // { 10, 0, 24, 0, 24, "AES256CTR:1" },
2663 // { 2, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" },
2664 // { 2, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" },
2665 { 2, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" },
2666
2667 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" },
2668 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" },
2669 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" },
2670
2671 // { 1, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:3" },
2672 // { 1, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:3" },
2673 { 1, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:3" }
2674
2675 // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:3" },
2676 // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:3" },
2677 // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:3" },
2678 };
2679
2680 struct CBenchHash
2681 {
2682 unsigned Weight;
2683 UInt32 Complex;
2684 UInt32 CheckSum;
2685 const char *Name;
2686 };
2687
2688 // #define ARM_CRC_MUL 100
2689 #define ARM_CRC_MUL 1
2690
2691 #define k_Hash_Complex_Mult 256
2692
2693 static const CBenchHash g_Hash[] =
2694 {
2695 // { 1, 1820, 0x21e207bb, "CRC32:1" },
2696 // { 10, 558, 0x21e207bb, "CRC32:4" },
2697 { 20, 339, 0x21e207bb, "CRC32:8" } ,
2698 { 2, 128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" },
2699 { 2, 64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" },
2700 { 10, 512, 0x41b901d1, "CRC64" },
2701
2702 { 10, 5100, 0x7913ba03, "SHA256:1" },
2703 { 2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" },
2704
2705 { 10, 2340, 0xff769021, "SHA1:1" },
2706 { 2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" },
2707
2708 { 2, 5500, 0x85189d02, "BLAKE2sp" }
2709 };
2710
2711 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
2712 {
2713 char s[128];
2714 unsigned startPos = (unsigned)sizeof(s) - 32;
2715 memset(s, ' ', startPos);
2716 ConvertUInt64ToString(value, s + startPos);
2717 // if (withSpace)
2718 {
2719 startPos--;
2720 size++;
2721 }
2722 unsigned len = (unsigned)strlen(s + startPos);
2723 if (size > len)
2724 {
2725 size -= len;
2726 if (startPos < size)
2727 startPos = 0;
2728 else
2729 startPos -= size;
2730 }
2731 f.Print(s + startPos);
2732 }
2733
2734 static const unsigned kFieldSize_Name = 12;
2735 static const unsigned kFieldSize_SmallName = 4;
2736 static const unsigned kFieldSize_Speed = 9;
2737 static const unsigned kFieldSize_Usage = 5;
2738 static const unsigned kFieldSize_RU = 6;
2739 static const unsigned kFieldSize_Rating = 6;
2740 static const unsigned kFieldSize_EU = 5;
2741 static const unsigned kFieldSize_Effec = 5;
2742 static const unsigned kFieldSize_CrcSpeed = 8;
2743
2744
2745 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
2746 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
2747
2748
2749 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
2750 {
2751 PrintNumber(f, (rating + 500000) / 1000000, size);
2752 }
2753
2754
2755 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
2756 {
2757 UInt64 v = 0;
2758 if (divider != 0)
2759 v = (val * 100 + divider / 2) / divider;
2760 PrintNumber(f, v, size);
2761 }
2762
2763 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
2764 {
2765 char s[256];
2766 memset(s, (Byte)c, size);
2767 s[size] = 0;
2768 f.Print(s);
2769 }
2770
2771 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
2772 {
2773 PrintChars(f, ' ', size);
2774 }
2775
2776 static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size)
2777 {
2778 PrintNumber(f, Benchmark_GetUsage_Percents(usage), size);
2779 }
2780
2781 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
2782 {
2783 PrintUsage(f, usage, kFieldSize_Usage);
2784 PrintRating(f, rpu, kFieldSize_RU);
2785 PrintRating(f, rating, kFieldSize_Rating);
2786 if (showFreq)
2787 {
2788 if (cpuFreq == 0)
2789 PrintSpaces(f, kFieldSize_EUAndEffec);
2790 else
2791 {
2792 PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU);
2793 PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
2794 }
2795 }
2796 }
2797
2798
2799 void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info)
2800 {
2801 Speed = info.GetUnpackSizeSpeed();
2802 Usage = info.GetUsage();
2803 RPU = info.GetRatingPerUsage(Rating);
2804 }
2805
2806 void CTotalBenchRes::Mult_For_Weight(unsigned weight)
2807 {
2808 NumIterations2 *= weight;
2809 RPU *= weight;
2810 Rating *= weight;
2811 Usage *= weight;
2812 Speed *= weight;
2813 }
2814
2815 void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r)
2816 {
2817 Rating += r.Rating;
2818 Usage += r.Usage;
2819 RPU += r.RPU;
2820 Speed += r.Speed;
2821 // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
2822 NumIterations2 += r.NumIterations2;
2823 }
2824
2825 static void PrintResults(IBenchPrintCallback *f,
2826 const CBenchInfo &info,
2827 unsigned weight,
2828 UInt64 rating,
2829 bool showFreq, UInt64 cpuFreq,
2830 CTotalBenchRes *res)
2831 {
2832 CTotalBenchRes t;
2833 t.Rating = rating;
2834 t.NumIterations2 = 1;
2835 t.Generate_From_BenchInfo(info);
2836
2837 if (f)
2838 {
2839 if (t.Speed != 0)
2840 PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed);
2841 else
2842 PrintSpaces(*f, 1 + kFieldSize_Speed);
2843 }
2844 if (f)
2845 {
2846 PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq);
2847 }
2848
2849 if (res)
2850 {
2851 // res->NumIterations1++;
2852 t.Mult_For_Weight(weight);
2853 res->Update_With_Res(t);
2854 }
2855 }
2856
2857 static void PrintTotals(IBenchPrintCallback &f,
2858 bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res)
2859 {
2860 const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1;
2861 const UInt64 speed = res.Speed / numIterations2;
2862 if (showSpeed && speed != 0)
2863 PrintNumber(f, speed / 1024, kFieldSize_Speed);
2864 else
2865 PrintSpaces(f, 1 + kFieldSize_Speed);
2866
2867 // PrintSpaces(f, 1 + kFieldSize_Speed);
2868 // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
2869 PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
2870 }
2871
2872
2873 static void PrintHex(AString &s, UInt64 v)
2874 {
2875 char temp[32];
2876 ConvertUInt64ToHex(v, temp);
2877 s += temp;
2878 }
2879
2880 AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti)
2881 {
2882 AString s;
2883 // s.Add_UInt32(ti.numProcessThreads);
2884 unsigned numSysThreads = ti.GetNumSystemThreads();
2885 if (ti.GetNumProcessThreads() != numSysThreads)
2886 {
2887 // if (ti.numProcessThreads != ti.numSysThreads)
2888 {
2889 s += " / ";
2890 s.Add_UInt32(numSysThreads);
2891 }
2892 s += " : ";
2893 #ifdef _WIN32
2894 PrintHex(s, ti.processAffinityMask);
2895 s += " / ";
2896 PrintHex(s, ti.systemAffinityMask);
2897 #else
2898 unsigned i = (numSysThreads + 3) & ~(unsigned)3;
2899 if (i == 0)
2900 i = 4;
2901 for (; i >= 4; )
2902 {
2903 i -= 4;
2904 unsigned val = 0;
2905 for (unsigned k = 0; k < 4; k++)
2906 {
2907 const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0);
2908 val += (bit << k);
2909 }
2910 PrintHex(s, val);
2911 }
2912 #endif
2913 }
2914 return s;
2915 }
2916
2917
2918 #ifdef Z7_LARGE_PAGES
2919
2920 #ifdef _WIN32
2921 extern bool g_LargePagesMode;
2922 extern "C"
2923 {
2924 extern SIZE_T g_LargePageSize;
2925 }
2926 #endif
2927
2928 void Add_LargePages_String(AString &s)
2929 {
2930 #ifdef _WIN32
2931 if (g_LargePagesMode || g_LargePageSize != 0)
2932 {
2933 s.Add_OptSpaced("(LP-");
2934 PrintSize_KMGT_Or_Hex(s, g_LargePageSize);
2935 #ifdef MY_CPU_X86_OR_AMD64
2936 if (CPU_IsSupported_PageGB())
2937 s += "-1G";
2938 #endif
2939 if (!g_LargePagesMode)
2940 s += "-NA";
2941 s += ")";
2942 }
2943 #else
2944 s += "";
2945 #endif
2946 }
2947
2948 #endif
2949
2950
2951
2952 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
2953 bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
2954 {
2955 f.Print("RAM ");
2956 f.Print(sizeString);
2957 if (size_Defined)
2958 PrintNumber(f, (size >> 20), 6);
2959 else
2960 f.Print(" ?");
2961 f.Print(" MB");
2962
2963 #ifdef Z7_LARGE_PAGES
2964 {
2965 AString s;
2966 Add_LargePages_String(s);
2967 f.Print(s);
2968 }
2969 #endif
2970
2971 f.Print(", # ");
2972 f.Print(threadsString);
2973 PrintNumber(f, numThreads, 3);
2974 }
2975
2976
2977
2978 struct CBenchCallbackToPrint Z7_final: public IBenchCallback
2979 {
2980 bool NeedPrint;
2981 bool Use2Columns;
2982 bool ShowFreq;
2983 unsigned NameFieldSize;
2984
2985 unsigned EncodeWeight;
2986 unsigned DecodeWeight;
2987
2988 UInt64 CpuFreq;
2989 UInt64 DictSize;
2990
2991 IBenchPrintCallback *_file;
2992 CBenchProps BenchProps;
2993 CTotalBenchRes EncodeRes;
2994 CTotalBenchRes DecodeRes;
2995
2996 CBenchInfo BenchInfo_Results[2];
2997
2998 CBenchCallbackToPrint():
2999 NeedPrint(true),
3000 Use2Columns(false),
3001 ShowFreq(false),
3002 NameFieldSize(0),
3003 EncodeWeight(1),
3004 DecodeWeight(1),
3005 CpuFreq(0)
3006 {}
3007
3008 void Init() { EncodeRes.Init(); DecodeRes.Init(); }
3009 void Print(const char *s);
3010 void NewLine();
3011
3012 HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
3013 HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override;
3014 HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override;
3015 };
3016
3017 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
3018 {
3019 ShowFreq = showFreq;
3020 CpuFreq = cpuFreq;
3021 return S_OK;
3022 }
3023
3024 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
3025 {
3026 RINOK(_file->CheckBreak())
3027 if (final)
3028 BenchInfo_Results[0] = info;
3029 if (final)
3030 if (NeedPrint)
3031 {
3032 const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
3033 PrintResults(_file, info,
3034 EncodeWeight, rating,
3035 ShowFreq, CpuFreq, &EncodeRes);
3036 if (!Use2Columns)
3037 _file->NewLine();
3038 }
3039 return S_OK;
3040 }
3041
3042 static const char * const kSep = " | ";
3043
3044 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
3045 {
3046 RINOK(_file->CheckBreak())
3047 if (final)
3048 BenchInfo_Results[1] = info;
3049 if (final)
3050 if (NeedPrint)
3051 {
3052 const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
3053 if (Use2Columns)
3054 _file->Print(kSep);
3055 else
3056 PrintSpaces(*_file, NameFieldSize);
3057 CBenchInfo info2 = info;
3058 info2.UnpackSize *= info2.NumIterations;
3059 info2.PackSize *= info2.NumIterations;
3060 info2.NumIterations = 1;
3061 PrintResults(_file, info2,
3062 DecodeWeight, rating,
3063 ShowFreq, CpuFreq, &DecodeRes);
3064 }
3065 return S_OK;
3066 }
3067
3068 void CBenchCallbackToPrint::Print(const char *s)
3069 {
3070 _file->Print(s);
3071 }
3072
3073 void CBenchCallbackToPrint::NewLine()
3074 {
3075 _file->NewLine();
3076 }
3077
3078 static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
3079 {
3080 f.Print(s);
3081 int numSpaces = (int)size - (int)MyStringLen(s);
3082 if (numSpaces > 0)
3083 PrintSpaces(f, (unsigned)numSpaces);
3084 }
3085
3086 static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
3087 {
3088 int numSpaces = (int)size - (int)MyStringLen(s);
3089 if (numSpaces > 0)
3090 PrintSpaces(f, (unsigned)numSpaces);
3091 f.Print(s);
3092 }
3093
3094
3095 static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name)
3096 {
3097 UString wildc = GetUnicodeString(mask);
3098 UString bname = GetUnicodeString(name);
3099 wildc.MakeLower_Ascii();
3100 bname.MakeLower_Ascii();
3101 return DoesWildcardMatchName(wildc, bname);
3102 }
3103
3104
3105 static HRESULT TotalBench(
3106 DECL_EXTERNAL_CODECS_LOC_VARS
3107 const COneMethodInfo &methodMask,
3108 UInt64 complexInCommands,
3109 #ifndef Z7_ST
3110 UInt32 numThreads,
3111 const CAffinityMode *affinityMode,
3112 #endif
3113 bool forceUnpackSize,
3114 size_t unpackSize,
3115 const Byte *fileData,
3116 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
3117 {
3118 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
3119 {
3120 const CBenchMethod &bench = g_Bench[i];
3121 if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3122 continue;
3123 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3124 {
3125 unsigned keySize = 32;
3126 if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16;
3127 else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24;
3128 callback->BenchProps.KeySize = keySize;
3129 }
3130 callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3131 callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3132 callback->BenchProps.EncComplex = bench.EncComplex;
3133
3134 COneMethodInfo method;
3135 NCOM::CPropVariant propVariant;
3136 propVariant = bench.Name;
3137 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3138
3139 size_t unpackSize2 = unpackSize;
3140 if (!forceUnpackSize && bench.DictBits == 0)
3141 unpackSize2 = kFilterUnpackSize;
3142
3143 callback->EncodeWeight = bench.Weight;
3144 callback->DecodeWeight = bench.Weight;
3145
3146 const HRESULT res = MethodBench(
3147 EXTERNAL_CODECS_LOC_VARS
3148 complexInCommands,
3149 #ifndef Z7_ST
3150 false, numThreads, affinityMode,
3151 #endif
3152 method,
3153 unpackSize2, fileData,
3154 bench.DictBits,
3155 printCallback, callback, &callback->BenchProps);
3156
3157 if (res == E_NOTIMPL)
3158 {
3159 // callback->Print(" ---");
3160 // we need additional empty line as line for decompression results
3161 if (!callback->Use2Columns)
3162 callback->NewLine();
3163 }
3164 else
3165 {
3166 RINOK(res)
3167 }
3168
3169 callback->NewLine();
3170 }
3171 return S_OK;
3172 }
3173
3174
3175 struct CFreqBench
3176 {
3177 // in:
3178 UInt64 complexInCommands;
3179 UInt32 numThreads;
3180 bool showFreq;
3181 UInt64 specifiedFreq;
3182
3183 // out:
3184 UInt64 CpuFreqRes;
3185 UInt64 UsageRes;
3186 UInt32 res;
3187
3188 CFreqBench()
3189 {}
3190
3191 HRESULT FreqBench(IBenchPrintCallback *_file
3192 #ifndef Z7_ST
3193 , const CAffinityMode *affinityMode
3194 #endif
3195 );
3196 };
3197
3198
3199 HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file
3200 #ifndef Z7_ST
3201 , const CAffinityMode *affinityMode
3202 #endif
3203 )
3204 {
3205 res = 0;
3206 CpuFreqRes = 0;
3207 UsageRes = 0;
3208
3209 if (numThreads == 0)
3210 numThreads = 1;
3211
3212 #ifdef Z7_ST
3213 numThreads = 1;
3214 #endif
3215
3216 const UInt32 complexity = kNumFreqCommands;
3217 UInt64 numIterations = complexInCommands / complexity;
3218 UInt32 numIterations2 = 1 << 30;
3219 if (numIterations > numIterations2)
3220 numIterations /= numIterations2;
3221 else
3222 {
3223 numIterations2 = (UInt32)numIterations;
3224 numIterations = 1;
3225 }
3226
3227 CBenchInfoCalc progressInfoSpec;
3228
3229 #ifndef Z7_ST
3230
3231 bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity();
3232
3233 if (mtMode)
3234 {
3235 CFreqThreads threads;
3236 threads.Items = new CFreqInfo[numThreads];
3237 UInt32 i;
3238 for (i = 0; i < numThreads; i++)
3239 {
3240 CFreqInfo &info = threads.Items[i];
3241 info.Callback = _file;
3242 info.CallbackRes = S_OK;
3243 info.NumIterations = numIterations;
3244 info.Size = numIterations2;
3245 }
3246 progressInfoSpec.SetStartTime();
3247 for (i = 0; i < numThreads; i++)
3248 {
3249 // Sleep(10);
3250 CFreqInfo &info = threads.Items[i];
3251 WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i);
3252 if (info.Thread.IsCreated())
3253 threads.NumThreads++;
3254 if (wres != 0)
3255 return HRESULT_FROM_WIN32(wres);
3256 }
3257 WRes wres = threads.WaitAll();
3258 if (wres != 0)
3259 return HRESULT_FROM_WIN32(wres);
3260 for (i = 0; i < numThreads; i++)
3261 {
3262 RINOK(threads.Items[i].CallbackRes)
3263 }
3264 }
3265 else
3266 #endif
3267 {
3268 progressInfoSpec.SetStartTime();
3269 UInt32 sum = g_BenchCpuFreqTemp;
3270 for (UInt64 k = numIterations; k > 0; k--)
3271 {
3272 sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp);
3273 if (_file)
3274 {
3275 RINOK(_file->CheckBreak())
3276 }
3277 }
3278 res += sum;
3279 }
3280
3281 if (res == 0x12345678)
3282 if (_file)
3283 {
3284 RINOK(_file->CheckBreak())
3285 }
3286
3287 CBenchInfo info;
3288 progressInfoSpec.SetFinishTime(info);
3289
3290 info.UnpackSize = 0;
3291 info.PackSize = 0;
3292 info.NumIterations = 1;
3293
3294 const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity;
3295 const UInt64 rating = info.GetSpeed(numCommands);
3296 CpuFreqRes = rating / numThreads;
3297 UsageRes = info.GetUsage();
3298
3299 if (_file)
3300 {
3301 PrintResults(_file, info,
3302 0, // weight
3303 rating,
3304 showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL);
3305 RINOK(_file->CheckBreak())
3306 }
3307
3308 return S_OK;
3309 }
3310
3311
3312
3313 static HRESULT CrcBench(
3314 DECL_EXTERNAL_CODECS_LOC_VARS
3315 UInt64 complexInCommands,
3316 UInt32 numThreads,
3317 const size_t bufferSize,
3318 const Byte *fileData,
3319
3320 UInt64 &speed,
3321 UInt64 &usage,
3322
3323 UInt32 complexity, unsigned benchWeight,
3324 const UInt32 *checkSum,
3325 const COneMethodInfo &method,
3326 IBenchPrintCallback *_file,
3327 #ifndef Z7_ST
3328 const CAffinityMode *affinityMode,
3329 #endif
3330 bool showRating,
3331 CTotalBenchRes *encodeRes,
3332 bool showFreq, UInt64 cpuFreq)
3333 {
3334 if (numThreads == 0)
3335 numThreads = 1;
3336
3337 #ifdef Z7_ST
3338 numThreads = 1;
3339 #endif
3340
3341 const AString &methodName = method.MethodName;
3342 // methodName.RemoveChar(L'-');
3343 CMethodId hashID;
3344 if (!FindHashMethod(
3345 EXTERNAL_CODECS_LOC_VARS
3346 methodName, hashID))
3347 return E_NOTIMPL;
3348
3349 /*
3350 // if will generate random data in each thread, instead of global data
3351 CMidAlignedBuffer buffer;
3352 if (!fileData)
3353 {
3354 ALLOC_WITH_HRESULT(&buffer, bufferSize)
3355 RandGen(buffer, bufferSize);
3356 fileData = buffer;
3357 }
3358 */
3359
3360 const size_t bsize = (bufferSize == 0 ? 1 : bufferSize);
3361 UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize;
3362 if (numIterations == 0)
3363 numIterations = 1;
3364
3365 CBenchInfoCalc progressInfoSpec;
3366 CBenchInfo info;
3367
3368 #ifndef Z7_ST
3369 bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity();
3370
3371 if (mtEncMode)
3372 {
3373 CCrcThreads threads;
3374 threads.Items = new CCrcInfo[numThreads];
3375 {
3376 WRes wres = threads.Common.StartEvent.Create();
3377 if (wres != 0)
3378 return HRESULT_FROM_WIN32(wres);
3379 threads.NeedClose = true;
3380 }
3381
3382 UInt32 i;
3383 for (i = 0; i < numThreads; i++)
3384 {
3385 CCrcInfo &ci = threads.Items[i];
3386 AString name;
3387 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher))
3388 if (!ci.Hasher)
3389 return E_NOTIMPL;
3390 CMyComPtr<ICompressSetCoderProperties> scp;
3391 ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3392 if (scp)
3393 {
3394 RINOK(method.SetCoderProps(scp))
3395 }
3396
3397 ci.Callback = _file;
3398 ci.Data = fileData;
3399 ci.NumIterations = numIterations;
3400 ci.Size = bufferSize;
3401 ci.CheckSumDefined = false;
3402 if (checkSum)
3403 {
3404 ci.CheckSum = *checkSum;
3405 ci.CheckSumDefined = true;
3406 }
3407
3408 #ifdef USE_ALLOCA
3409 ci.AllocaSize = (i * 16 * 21) & 0x7FF;
3410 #endif
3411 }
3412
3413 for (i = 0; i < numThreads; i++)
3414 {
3415 CCrcInfo &ci = threads.Items[i];
3416 ci.ThreadIndex = i;
3417 ci.Common = &threads.Common;
3418 ci.AffinityMode = *affinityMode;
3419 HRESULT hres = ci.CreateThread();
3420 if (ci.Thread.IsCreated())
3421 threads.NumThreads++;
3422 if (hres != 0)
3423 return hres;
3424 }
3425
3426 for (i = 0; i < numThreads; i++)
3427 {
3428 CCrcInfo &ci = threads.Items[i];
3429 WRes wres = ci.ReadyEvent.Lock();
3430 if (wres != 0)
3431 return HRESULT_FROM_WIN32(wres);
3432 RINOK(ci.Res)
3433 }
3434
3435 progressInfoSpec.SetStartTime();
3436
3437 WRes wres = threads.StartAndWait();
3438 if (wres != 0)
3439 return HRESULT_FROM_WIN32(wres);
3440
3441 progressInfoSpec.SetFinishTime(info);
3442
3443 for (i = 0; i < numThreads; i++)
3444 {
3445 RINOK(threads.Items[i].Res)
3446 if (i != 0)
3447 if (threads.Items[i].CheckSum_Res !=
3448 threads.Items[i - 1].CheckSum_Res)
3449 return S_FALSE;
3450 }
3451 }
3452 else
3453 #endif
3454 {
3455 CMyComPtr<IHasher> hasher;
3456 AString name;
3457 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher))
3458 if (!hasher)
3459 return E_NOTIMPL;
3460 CMyComPtr<ICompressSetCoderProperties> scp;
3461 hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
3462 if (scp)
3463 {
3464 RINOK(method.SetCoderProps(scp))
3465 }
3466 CCrcInfo_Base crcib;
3467 crcib.CreateLocalBuf = false;
3468 RINOK(crcib.Generate(fileData, bufferSize))
3469 progressInfoSpec.SetStartTime();
3470 RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file))
3471 progressInfoSpec.SetFinishTime(info);
3472 }
3473
3474
3475 UInt64 unpSize = numIterations * bufferSize;
3476 UInt64 unpSizeThreads = unpSize * numThreads;
3477 info.UnpackSize = unpSizeThreads;
3478 info.PackSize = unpSizeThreads;
3479 info.NumIterations = 1;
3480
3481 if (_file)
3482 {
3483 if (showRating)
3484 {
3485 UInt64 unpSizeThreads2 = unpSizeThreads;
3486 if (unpSizeThreads2 == 0)
3487 unpSizeThreads2 = numIterations * 1 * numThreads;
3488 const UInt64 numCommands = unpSizeThreads2 * complexity / 256;
3489 const UInt64 rating = info.GetSpeed(numCommands);
3490 PrintResults(_file, info,
3491 benchWeight, rating,
3492 showFreq, cpuFreq, encodeRes);
3493 }
3494 RINOK(_file->CheckBreak())
3495 }
3496
3497 speed = info.GetSpeed(unpSizeThreads);
3498 usage = info.GetUsage();
3499
3500 return S_OK;
3501 }
3502
3503
3504
3505 static HRESULT TotalBench_Hash(
3506 DECL_EXTERNAL_CODECS_LOC_VARS
3507 const COneMethodInfo &methodMask,
3508 UInt64 complexInCommands,
3509 UInt32 numThreads,
3510 size_t bufSize,
3511 const Byte *fileData,
3512 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
3513 #ifndef Z7_ST
3514 const CAffinityMode *affinityMode,
3515 #endif
3516 CTotalBenchRes *encodeRes,
3517 bool showFreq, UInt64 cpuFreq)
3518 {
3519 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
3520 {
3521 const CBenchHash &bench = g_Hash[i];
3522 if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name))
3523 continue;
3524 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
3525 // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
3526 // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
3527 // callback->BenchProps.EncComplex = bench.EncComplex;
3528
3529 COneMethodInfo method;
3530 NCOM::CPropVariant propVariant;
3531 propVariant = bench.Name;
3532 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant))
3533
3534 UInt64 speed, usage;
3535
3536 const HRESULT res = CrcBench(
3537 EXTERNAL_CODECS_LOC_VARS
3538 complexInCommands,
3539 numThreads, bufSize, fileData,
3540 speed, usage,
3541 bench.Complex, bench.Weight,
3542 (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL,
3543 method,
3544 printCallback,
3545 #ifndef Z7_ST
3546 affinityMode,
3547 #endif
3548 true, // showRating
3549 encodeRes, showFreq, cpuFreq);
3550 if (res == E_NOTIMPL)
3551 {
3552 // callback->Print(" ---");
3553 }
3554 else
3555 {
3556 RINOK(res)
3557 }
3558 callback->NewLine();
3559 }
3560 return S_OK;
3561 }
3562
3563 struct CTempValues
3564 {
3565 UInt64 *Values;
3566 CTempValues(): Values(NULL) {}
3567 void Alloc(UInt32 num) { Values = new UInt64[num]; }
3568 ~CTempValues() { delete []Values; }
3569 };
3570
3571 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
3572 {
3573 const wchar_t *end;
3574 UInt64 result = ConvertStringToUInt64(s, &end);
3575 if (*end != 0 || s.IsEmpty())
3576 prop = s;
3577 else if (result <= (UInt32)0xFFFFFFFF)
3578 prop = (UInt32)result;
3579 else
3580 prop = result;
3581 }
3582
3583
3584 static bool AreSameMethodNames(const char *fullName, const char *shortName)
3585 {
3586 return StringsAreEqualNoCase_Ascii(fullName, shortName);
3587 }
3588
3589
3590
3591
3592 static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads)
3593 {
3594 PrintRequirements(f, "usage:", true, usage, "Benchmark threads: ", threads);
3595 }
3596
3597
3598 static void Print_Delimiter(IBenchPrintCallback &f)
3599 {
3600 f.Print(" |");
3601 }
3602
3603 static void Print_Pow(IBenchPrintCallback &f, unsigned pow)
3604 {
3605 char s[16];
3606 ConvertUInt32ToString(pow, s);
3607 unsigned pos = MyStringLen(s);
3608 s[pos++] = ':';
3609 s[pos] = 0;
3610 PrintLeft(f, s, kFieldSize_SmallName); // 4
3611 }
3612
3613 static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f,
3614 UInt64 usage, UInt64 speed)
3615 {
3616 PrintUsage(f, usage, kFieldSize_Usage);
3617 PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed);
3618 }
3619
3620
3621 HRESULT Bench(
3622 DECL_EXTERNAL_CODECS_LOC_VARS
3623 IBenchPrintCallback *printCallback,
3624 IBenchCallback *benchCallback,
3625 const CObjectVector<CProperty> &props,
3626 UInt32 numIterations,
3627 bool multiDict,
3628 IBenchFreqCallback *freqCallback)
3629 {
3630 if (!CrcInternalTest())
3631 return E_FAIL;
3632
3633 UInt32 numCPUs = 1;
3634 UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
3635
3636 NSystem::CProcessAffinity threadsInfo;
3637 threadsInfo.InitST();
3638
3639 #ifndef Z7_ST
3640
3641 if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0)
3642 numCPUs = threadsInfo.GetNumProcessThreads();
3643 else
3644 numCPUs = NSystem::GetNumberOfProcessors();
3645
3646 #endif
3647
3648 // numCPUs = 24;
3649 /*
3650 {
3651 DWORD_PTR mask = (1 << 0);
3652 DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask);
3653 old = old;
3654 DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask);
3655 old2 = old2;
3656 return 0;
3657 }
3658 */
3659
3660 bool ramSize_Defined = NSystem::GetRamSize(ramSize);
3661
3662 UInt32 numThreadsSpecified = numCPUs;
3663 bool needSetComplexity = false;
3664 UInt32 testTimeMs = kComplexInMs;
3665 UInt32 startDicLog = 22;
3666 bool startDicLog_Defined = false;
3667 UInt64 specifiedFreq = 0;
3668 bool multiThreadTests = false;
3669 UInt64 complexInCommands = kComplexInCommands;
3670 UInt32 numThreads_Start = 1;
3671
3672 #ifndef Z7_ST
3673 CAffinityMode affinityMode;
3674 #endif
3675
3676
3677 COneMethodInfo method;
3678
3679 CMidAlignedBuffer fileDataBuffer;
3680 bool use_fileData = false;
3681 bool isFixedDict = false;
3682
3683 {
3684 unsigned i;
3685
3686 if (printCallback)
3687 {
3688 for (i = 0; i < props.Size(); i++)
3689 {
3690 const CProperty &property = props[i];
3691 printCallback->Print(" ");
3692 printCallback->Print(GetAnsiString(property.Name));
3693 if (!property.Value.IsEmpty())
3694 {
3695 printCallback->Print("=");
3696 printCallback->Print(GetAnsiString(property.Value));
3697 }
3698 }
3699 if (!props.IsEmpty())
3700 printCallback->NewLine();
3701 }
3702
3703
3704 for (i = 0; i < props.Size(); i++)
3705 {
3706 const CProperty &property = props[i];
3707 UString name (property.Name);
3708 name.MakeLower_Ascii();
3709
3710 if (name.IsEqualTo("file"))
3711 {
3712 if (property.Value.IsEmpty())
3713 return E_INVALIDARG;
3714
3715 NFile::NIO::CInFile file;
3716 if (!file.Open(us2fs(property.Value)))
3717 return GetLastError_noZero_HRESULT();
3718 size_t len;
3719 {
3720 UInt64 len64;
3721 if (!file.GetLength(len64))
3722 return GetLastError_noZero_HRESULT();
3723 if (printCallback)
3724 {
3725 printCallback->Print("file size =");
3726 PrintNumber(*printCallback, len64, 0);
3727 printCallback->NewLine();
3728 }
3729 len = (size_t)len64;
3730 if (len != len64)
3731 return E_INVALIDARG;
3732 }
3733
3734 // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here
3735
3736 ALLOC_WITH_HRESULT(&fileDataBuffer, len)
3737 use_fileData = true;
3738
3739 {
3740 size_t processed;
3741 if (!file.ReadFull((Byte *)fileDataBuffer, len, processed))
3742 return GetLastError_noZero_HRESULT();
3743 if (processed != len)
3744 return E_FAIL;
3745 }
3746 continue;
3747 }
3748
3749 NCOM::CPropVariant propVariant;
3750 if (!property.Value.IsEmpty())
3751 ParseNumberString(property.Value, propVariant);
3752
3753 if (name.IsEqualTo("time"))
3754 {
3755 RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3756 needSetComplexity = true;
3757 testTimeMs *= 1000;
3758 continue;
3759 }
3760
3761 if (name.IsEqualTo("timems"))
3762 {
3763 RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs))
3764 needSetComplexity = true;
3765 continue;
3766 }
3767
3768 if (name.IsEqualTo("tic"))
3769 {
3770 UInt32 v;
3771 RINOK(ParsePropToUInt32(UString(), propVariant, v))
3772 if (v >= 64)
3773 return E_INVALIDARG;
3774 complexInCommands = (UInt64)1 << v;
3775 continue;
3776 }
3777
3778 const bool isCurrent_fixedDict = name.IsEqualTo("df");
3779 if (isCurrent_fixedDict)
3780 isFixedDict = true;
3781 if (isCurrent_fixedDict || name.IsEqualTo("ds"))
3782 {
3783 RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog))
3784 if (startDicLog > 32)
3785 return E_INVALIDARG;
3786 startDicLog_Defined = true;
3787 continue;
3788 }
3789
3790 if (name.IsEqualTo("mts"))
3791 {
3792 RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start))
3793 continue;
3794 }
3795
3796 if (name.IsEqualTo("af"))
3797 {
3798 UInt32 bundle;
3799 RINOK(ParsePropToUInt32(UString(), propVariant, bundle))
3800 if (bundle > 0 && bundle < numCPUs)
3801 {
3802 #ifndef Z7_ST
3803 affinityMode.SetLevels(numCPUs, 2);
3804 affinityMode.NumBundleThreads = bundle;
3805 #endif
3806 }
3807 continue;
3808 }
3809
3810 if (name.IsEqualTo("freq"))
3811 {
3812 UInt32 freq32 = 0;
3813 RINOK(ParsePropToUInt32(UString(), propVariant, freq32))
3814 if (freq32 == 0)
3815 return E_INVALIDARG;
3816 specifiedFreq = (UInt64)freq32 * 1000000;
3817
3818 if (printCallback)
3819 {
3820 printCallback->Print("freq=");
3821 PrintNumber(*printCallback, freq32, 0);
3822 printCallback->NewLine();
3823 }
3824
3825 continue;
3826 }
3827
3828 if (name.IsPrefixedBy_Ascii_NoCase("mt"))
3829 {
3830 const UString s = name.Ptr(2);
3831 if (s.IsEqualTo("*")
3832 || (s.IsEmpty()
3833 && propVariant.vt == VT_BSTR
3834 && StringsAreEqual_Ascii(propVariant.bstrVal, "*")))
3835 {
3836 multiThreadTests = true;
3837 continue;
3838 }
3839 #ifndef Z7_ST
3840 RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified))
3841 #endif
3842 continue;
3843 }
3844
3845 RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant))
3846 }
3847 }
3848
3849 if (printCallback)
3850 {
3851 AString s;
3852
3853 #ifndef _WIN32
3854 s += "Compiler: ";
3855 GetCompiler(s);
3856 printCallback->Print(s);
3857 printCallback->NewLine();
3858 s.Empty();
3859 #endif
3860
3861 GetSystemInfoText(s);
3862 printCallback->Print(s);
3863 printCallback->NewLine();
3864 }
3865
3866 if (printCallback)
3867 {
3868 printCallback->Print("1T CPU Freq (MHz):");
3869 }
3870
3871 if (printCallback || freqCallback)
3872 {
3873 UInt64 numMilCommands = 1 << 6;
3874 if (specifiedFreq != 0)
3875 {
3876 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3877 numMilCommands >>= 1;
3878 }
3879
3880 for (int jj = 0;; jj++)
3881 {
3882 if (printCallback)
3883 RINOK(printCallback->CheckBreak())
3884
3885 UInt64 start = ::GetTimeCount();
3886 UInt32 sum = (UInt32)start;
3887 sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
3888 if (sum == 0xF1541213)
3889 if (printCallback)
3890 printCallback->Print("");
3891 const UInt64 realDelta = ::GetTimeCount() - start;
3892 start = realDelta;
3893 if (start == 0)
3894 start = 1;
3895 if (start > (UInt64)1 << 61)
3896 start = 1;
3897 const UInt64 freq = GetFreq();
3898 // mips is constant in some compilers
3899 const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, freq, start);
3900 const UInt64 mipsVal = numMilCommands * freq / start;
3901 if (printCallback)
3902 {
3903 if (realDelta == 0)
3904 {
3905 printCallback->Print(" -");
3906 }
3907 else
3908 {
3909 // PrintNumber(*printCallback, start, 0);
3910 PrintNumber(*printCallback, mipsVal, 5);
3911 }
3912 }
3913 if (freqCallback)
3914 {
3915 RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult))
3916 }
3917
3918 if (jj >= 1)
3919 {
3920 bool needStop = (numMilCommands >= (1 <<
3921 #ifdef _DEBUG
3922 7
3923 #else
3924 11
3925 #endif
3926 ));
3927 if (start >= freq * 16)
3928 {
3929 printCallback->Print(" (Cmplx)");
3930 if (!freqCallback) // we don't want complexity change for old gui lzma benchmark
3931 {
3932 needSetComplexity = true;
3933 }
3934 needStop = true;
3935 }
3936 if (needSetComplexity)
3937 SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands);
3938 if (needStop)
3939 break;
3940 numMilCommands <<= 1;
3941 }
3942 }
3943 if (freqCallback)
3944 {
3945 RINOK(freqCallback->FreqsFinished(1))
3946 }
3947 }
3948
3949 if (numThreadsSpecified >= 2)
3950 if (printCallback || freqCallback)
3951 {
3952 if (printCallback)
3953 printCallback->NewLine();
3954
3955 /* it can show incorrect frequency for HT threads.
3956 so we reduce freq test to (numCPUs / 2) */
3957
3958 UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified;
3959 if (numThreads < 1)
3960 numThreads = 1;
3961
3962 if (printCallback)
3963 {
3964 char s[128];
3965 ConvertUInt64ToString(numThreads, s);
3966 printCallback->Print(s);
3967 printCallback->Print("T CPU Freq (MHz):");
3968 }
3969 UInt64 numMilCommands = 1 <<
3970 #ifdef _DEBUG
3971 7;
3972 #else
3973 10;
3974 #endif
3975
3976 if (specifiedFreq != 0)
3977 {
3978 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
3979 numMilCommands >>= 1;
3980 }
3981
3982 // for (int jj = 0;; jj++)
3983 for (;;)
3984 {
3985 if (printCallback)
3986 RINOK(printCallback->CheckBreak())
3987
3988 {
3989 // PrintLeft(f, "CPU", kFieldSize_Name);
3990
3991 // UInt32 resVal;
3992
3993 CFreqBench fb;
3994 fb.complexInCommands = numMilCommands * 1000000;
3995 fb.numThreads = numThreads;
3996 // showFreq;
3997 // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
3998 fb.showFreq = true;
3999 fb.specifiedFreq = 1;
4000
4001 const HRESULT res = fb.FreqBench(NULL /* printCallback */
4002 #ifndef Z7_ST
4003 , &affinityMode
4004 #endif
4005 );
4006 RINOK(res)
4007
4008 if (freqCallback)
4009 {
4010 RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes))
4011 }
4012
4013 if (printCallback)
4014 {
4015 /*
4016 if (realDelta == 0)
4017 {
4018 printCallback->Print(" -");
4019 }
4020 else
4021 */
4022 {
4023 // PrintNumber(*printCallback, start, 0);
4024 PrintUsage(*printCallback, fb.UsageRes, 3);
4025 printCallback->Print("%");
4026 PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0);
4027 printCallback->Print(" ");
4028
4029 // PrintNumber(*printCallback, fb.UsageRes, 5);
4030 }
4031 }
4032 }
4033 // if (jj >= 1)
4034 {
4035 const bool needStop = (numMilCommands >= (1 <<
4036 #ifdef _DEBUG
4037 7
4038 #else
4039 11
4040 #endif
4041 ));
4042 if (needStop)
4043 break;
4044 numMilCommands <<= 1;
4045 }
4046 }
4047 if (freqCallback)
4048 {
4049 RINOK(freqCallback->FreqsFinished(numThreads))
4050 }
4051 }
4052
4053
4054 if (printCallback)
4055 {
4056 printCallback->NewLine();
4057 printCallback->NewLine();
4058 PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
4059 printCallback->Print(GetProcessThreadsInfo(threadsInfo));
4060 printCallback->NewLine();
4061 }
4062
4063 if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
4064 return E_INVALIDARG;
4065
4066 UInt64 dict = (UInt64)1 << startDicLog;
4067 const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict));
4068
4069 const unsigned level = method.GetLevel();
4070
4071 AString &methodName = method.MethodName;
4072 const AString original_MethodName = methodName;
4073 if (methodName.IsEmpty())
4074 methodName = "LZMA";
4075
4076 if (benchCallback)
4077 {
4078 CBenchProps benchProps;
4079 benchProps.SetLzmaCompexity();
4080 const UInt64 dictSize = method.Get_Lzma_DicSize();
4081
4082 size_t uncompressedDataSize;
4083 if (use_fileData)
4084 {
4085 uncompressedDataSize = fileDataBuffer.Size();
4086 }
4087 else
4088 {
4089 uncompressedDataSize = kAdditionalSize + (size_t)dictSize;
4090 if (uncompressedDataSize < dictSize)
4091 return E_INVALIDARG;
4092 }
4093
4094 return MethodBench(
4095 EXTERNAL_CODECS_LOC_VARS
4096 complexInCommands,
4097 #ifndef Z7_ST
4098 true, numThreadsSpecified,
4099 &affinityMode,
4100 #endif
4101 method,
4102 uncompressedDataSize, (const Byte *)fileDataBuffer,
4103 kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
4104 }
4105
4106 if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
4107 methodName = "crc32";
4108
4109 CMethodId hashID;
4110 const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID);
4111 int codecIndex = -1;
4112 bool isFilter = false;
4113 if (!isHashMethod)
4114 {
4115 UInt32 numStreams;
4116 codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName,
4117 true, // encode
4118 hashID, numStreams, isFilter);
4119 // we can allow non filter for BW tests
4120 if (!isFilter) codecIndex = -1;
4121 }
4122
4123 CBenchCallbackToPrint callback;
4124 callback.Init();
4125 callback._file = printCallback;
4126
4127 if (isHashMethod || codecIndex != -1)
4128 {
4129 if (!printCallback)
4130 return S_FALSE;
4131 IBenchPrintCallback &f = *printCallback;
4132
4133 UInt64 dict64 = dict;
4134 if (!dictIsDefined)
4135 dict64 = (1 << 27);
4136 if (use_fileData)
4137 {
4138 if (!dictIsDefined)
4139 dict64 = fileDataBuffer.Size();
4140 else if (dict64 > fileDataBuffer.Size())
4141 dict64 = fileDataBuffer.Size();
4142 }
4143
4144 for (;;)
4145 {
4146 const int index = method.FindProp(NCoderPropID::kDictionarySize);
4147 if (index < 0)
4148 break;
4149 method.Props.Delete((unsigned)index);
4150 }
4151
4152 // methodName.RemoveChar(L'-');
4153 Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method
4154 const UInt32 *checkSum = NULL;
4155 int benchIndex = -1;
4156
4157 if (isHashMethod)
4158 {
4159 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++)
4160 {
4161 const CBenchHash &h = g_Hash[i];
4162 AString benchMethod (h.Name);
4163 AString benchProps;
4164 const int propPos = benchMethod.Find(':');
4165 if (propPos >= 0)
4166 {
4167 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4168 benchMethod.DeleteFrom((unsigned)propPos);
4169 }
4170
4171 if (AreSameMethodNames(benchMethod, methodName))
4172 {
4173 const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4174 /*
4175 bool isMainMethod = method.PropsString.IsEmpty();
4176 if (isMainMethod)
4177 isMainMethod = !checkSum
4178 || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8"));
4179 if (sameProps || isMainMethod)
4180 */
4181 {
4182 complexity = (Int32)h.Complex;
4183 checkSum = &h.CheckSum;
4184 if (sameProps)
4185 break;
4186 /*
4187 if property. is not specified, we use the complexity
4188 for latest fastest method (crc32:64)
4189 */
4190 }
4191 }
4192 }
4193 // if (!checkSum) return E_NOTIMPL;
4194 }
4195 else
4196 {
4197 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4198 {
4199 const CBenchMethod &bench = g_Bench[i];
4200 AString benchMethod (bench.Name);
4201 AString benchProps;
4202 const int propPos = benchMethod.Find(':');
4203 if (propPos >= 0)
4204 {
4205 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4206 benchMethod.DeleteFrom((unsigned)propPos);
4207 }
4208
4209 if (AreSameMethodNames(benchMethod, methodName))
4210 {
4211 const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps);
4212 // bool isMainMethod = method.PropsString.IsEmpty();
4213 // if (sameProps || isMainMethod)
4214 {
4215 benchIndex = (int)i;
4216 if (sameProps)
4217 break;
4218 }
4219 }
4220 }
4221 // if (benchIndex < 0) return E_NOTIMPL;
4222 }
4223
4224 {
4225 /* we count usage only for crc and filter. non-filters are not supported */
4226 UInt64 usage = (1 << 20);
4227 UInt64 bufSize = dict64;
4228 UInt32 numBlocks = isHashMethod ? 1 : 3;
4229 if (use_fileData)
4230 {
4231 usage += fileDataBuffer.Size();
4232 if (bufSize > fileDataBuffer.Size())
4233 bufSize = fileDataBuffer.Size();
4234 if (isHashMethod)
4235 {
4236 numBlocks = 0;
4237 #ifndef Z7_ST
4238 if (numThreadsSpecified != 1)
4239 numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0);
4240 #endif
4241 }
4242 }
4243 usage += numThreadsSpecified * bufSize * numBlocks;
4244 Print_Usage_and_Threads(f, usage, numThreadsSpecified);
4245 }
4246
4247 CUIntVector numThreadsVector;
4248 {
4249 unsigned nt = numThreads_Start;
4250 for (;;)
4251 {
4252 if (nt > numThreadsSpecified)
4253 break;
4254 numThreadsVector.Add(nt);
4255 const unsigned next = nt * 2;
4256 const UInt32 ntHalf= numThreadsSpecified / 2;
4257 if (ntHalf > nt && ntHalf < next)
4258 numThreadsVector.Add(ntHalf);
4259 if (numThreadsSpecified > nt && numThreadsSpecified < next)
4260 numThreadsVector.Add(numThreadsSpecified);
4261 nt = next;
4262 }
4263 }
4264
4265 unsigned numColumns = isHashMethod ? 1 : 2;
4266 CTempValues speedTotals;
4267 CTempValues usageTotals;
4268 {
4269 const unsigned numItems = numThreadsVector.Size() * numColumns;
4270 speedTotals.Alloc(numItems);
4271 usageTotals.Alloc(numItems);
4272 for (unsigned i = 0; i < numItems; i++)
4273 {
4274 speedTotals.Values[i] = 0;
4275 usageTotals.Values[i] = 0;
4276 }
4277 }
4278
4279 f.NewLine();
4280 for (unsigned line = 0; line < 3; line++)
4281 {
4282 f.NewLine();
4283 f.Print(line == 0 ? "THRD" : line == 1 ? " " : "Size");
4284 FOR_VECTOR (ti, numThreadsVector)
4285 {
4286 if (ti != 0)
4287 Print_Delimiter(f);
4288 if (line == 0)
4289 {
4290 PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1));
4291 PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed);
4292 }
4293 else
4294 {
4295 for (unsigned c = 0; c < numColumns; c++)
4296 {
4297 PrintRight(f, line == 1 ? "Usage" : "%", kFieldSize_Usage + 1);
4298 PrintRight(f, line == 1 ? "BW" : "MB/s", kFieldSize_CrcSpeed + 1);
4299 }
4300 }
4301 }
4302 }
4303 f.NewLine();
4304
4305 UInt64 numSteps = 0;
4306
4307 // for (UInt32 iter = 0; iter < numIterations; iter++)
4308 // {
4309 unsigned pow = 10; // kNumHashDictBits
4310 if (startDicLog_Defined)
4311 pow = startDicLog;
4312
4313 // #define NUM_SUB_BITS 2
4314 // pow <<= NUM_SUB_BITS;
4315 for (;; pow++)
4316 {
4317 const UInt64 bufSize = (UInt64)1 << pow;
4318 // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS);
4319 // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS);
4320
4321 size_t dataSize = fileDataBuffer.Size();
4322 if (dataSize > bufSize || !use_fileData)
4323 dataSize = (size_t)bufSize;
4324
4325 for (UInt32 iter = 0; iter < numIterations; iter++)
4326 {
4327 Print_Pow(f, pow);
4328 // PrintNumber(f, bufSize >> 10, 4);
4329
4330 FOR_VECTOR (ti, numThreadsVector)
4331 {
4332 RINOK(f.CheckBreak())
4333 const UInt32 numThreads = numThreadsVector[ti];
4334 if (isHashMethod)
4335 {
4336 UInt64 speed = 0;
4337 UInt64 usage = 0;
4338 const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
4339 numThreads,
4340 dataSize, (const Byte *)fileDataBuffer,
4341 speed, usage,
4342 (UInt32)complexity,
4343 1, // benchWeight,
4344 (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL,
4345 method,
4346 &f,
4347 #ifndef Z7_ST
4348 &affinityMode,
4349 #endif
4350 false, // showRating
4351 NULL, false, 0);
4352 RINOK(res)
4353
4354 if (ti != 0)
4355 Print_Delimiter(f);
4356
4357 Bench_BW_Print_Usage_Speed(f, usage, speed);
4358 speedTotals.Values[ti] += speed;
4359 usageTotals.Values[ti] += usage;
4360 }
4361 else
4362 {
4363 {
4364 unsigned keySize = 32;
4365 if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16;
4366 else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24;
4367 callback.BenchProps.KeySize = keySize;
4368 }
4369
4370 COneMethodInfo method2 = method;
4371 unsigned bench_DictBits;
4372
4373 if (benchIndex >= 0)
4374 {
4375 const CBenchMethod &bench = g_Bench[benchIndex];
4376 callback.BenchProps.EncComplex = bench.EncComplex;
4377 callback.BenchProps.DecComplexUnc = bench.DecComplexUnc;
4378 callback.BenchProps.DecComplexCompr = bench.DecComplexCompr;
4379 bench_DictBits = bench.DictBits;
4380 // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug
4381 }
4382 else
4383 {
4384 bench_DictBits = kOldLzmaDictBits; // = 32 default
4385 if (isFilter)
4386 {
4387 const unsigned k_UnknownCoderComplexity = 4;
4388 callback.BenchProps.EncComplex = k_UnknownCoderComplexity;
4389 callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity;
4390 }
4391 else
4392 {
4393 callback.BenchProps.EncComplex = 1 << 10;
4394 callback.BenchProps.DecComplexUnc = 1 << 6;
4395 }
4396 callback.BenchProps.DecComplexCompr = 0;
4397 }
4398 callback.NeedPrint = false;
4399
4400 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4401 {
4402 const NCOM::CPropVariant propVariant = (UInt32)pow;
4403 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4404 }
4405
4406 const HRESULT res = MethodBench(
4407 EXTERNAL_CODECS_LOC_VARS
4408 complexInCommands,
4409 #ifndef Z7_ST
4410 false, // oldLzmaBenchMode
4411 numThreadsVector[ti],
4412 &affinityMode,
4413 #endif
4414 method2,
4415 dataSize, (const Byte *)fileDataBuffer,
4416 bench_DictBits,
4417 printCallback,
4418 &callback,
4419 &callback.BenchProps);
4420 RINOK(res)
4421
4422 if (ti != 0)
4423 Print_Delimiter(f);
4424
4425 for (unsigned i = 0; i < 2; i++)
4426 {
4427 const CBenchInfo &bi = callback.BenchInfo_Results[i];
4428 const UInt64 usage = bi.GetUsage();
4429 const UInt64 speed = bi.GetUnpackSizeSpeed();
4430 usageTotals.Values[ti * 2 + i] += usage;
4431 speedTotals.Values[ti * 2 + i] += speed;
4432 Bench_BW_Print_Usage_Speed(f, usage, speed);
4433 }
4434 }
4435 }
4436
4437 f.NewLine();
4438 numSteps++;
4439 }
4440 if (dataSize >= dict64)
4441 break;
4442 }
4443
4444 if (numSteps != 0)
4445 {
4446 f.Print("Avg:");
4447 for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++)
4448 {
4449 if (ti != 0)
4450 Print_Delimiter(f);
4451 for (unsigned i = 0; i < numColumns; i++)
4452 Bench_BW_Print_Usage_Speed(f,
4453 usageTotals.Values[ti * numColumns + i] / numSteps,
4454 speedTotals.Values[ti * numColumns + i] / numSteps);
4455 }
4456 f.NewLine();
4457 }
4458
4459 return S_OK;
4460 }
4461
4462 bool use2Columns = false;
4463
4464 bool totalBenchMode = false;
4465 bool onlyHashBench = false;
4466 if (methodName.IsEqualTo_Ascii_NoCase("hash"))
4467 {
4468 onlyHashBench = true;
4469 methodName = "*";
4470 totalBenchMode = true;
4471 }
4472 else if (methodName.Find('*') >= 0)
4473 totalBenchMode = true;
4474
4475 // ---------- Threads loop ----------
4476 for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
4477 {
4478
4479 UInt32 numThreads = numThreadsSpecified;
4480
4481 if (!multiThreadTests)
4482 {
4483 if (threadsPassIndex != 0)
4484 break;
4485 }
4486 else
4487 {
4488 numThreads = 1;
4489 if (threadsPassIndex != 0)
4490 {
4491 if (numCPUs < 2)
4492 break;
4493 numThreads = numCPUs;
4494 if (threadsPassIndex == 1)
4495 {
4496 if (numCPUs >= 4)
4497 numThreads = numCPUs / 2;
4498 }
4499 else if (numCPUs < 4)
4500 break;
4501 }
4502 }
4503
4504 IBenchPrintCallback &f = *printCallback;
4505
4506 if (threadsPassIndex > 0)
4507 {
4508 f.NewLine();
4509 f.NewLine();
4510 }
4511
4512 if (!dictIsDefined && !onlyHashBench)
4513 {
4514 const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
4515 unsigned dicSizeLog = dicSizeLog_Main;
4516
4517 #ifdef UNDER_CE
4518 dicSizeLog = (UInt64)1 << 20;
4519 #endif
4520
4521 if (ramSize_Defined)
4522 for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
4523 if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
4524 break;
4525
4526 dict = (UInt64)1 << dicSizeLog;
4527
4528 if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
4529 {
4530 f.Print("Dictionary reduced to: ");
4531 PrintNumber(f, dicSizeLog, 1);
4532 f.NewLine();
4533 }
4534 }
4535
4536 Print_Usage_and_Threads(f,
4537 onlyHashBench ?
4538 GetBenchMemoryUsage_Hash(numThreads, dict) :
4539 GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode),
4540 numThreads);
4541
4542 f.NewLine();
4543
4544 f.NewLine();
4545
4546 if (totalBenchMode)
4547 {
4548 callback.NameFieldSize = kFieldSize_Name;
4549 use2Columns = false;
4550 }
4551 else
4552 {
4553 callback.NameFieldSize = kFieldSize_SmallName;
4554 use2Columns = true;
4555 }
4556 callback.Use2Columns = use2Columns;
4557
4558 bool showFreq = false;
4559 UInt64 cpuFreq = 0;
4560
4561 if (totalBenchMode)
4562 {
4563 showFreq = true;
4564 }
4565
4566 unsigned fileldSize = kFieldSize_TotalSize;
4567 if (showFreq)
4568 fileldSize += kFieldSize_EUAndEffec;
4569
4570 if (use2Columns)
4571 {
4572 PrintSpaces(f, callback.NameFieldSize);
4573 PrintRight(f, "Compressing", fileldSize);
4574 f.Print(kSep);
4575 PrintRight(f, "Decompressing", fileldSize);
4576 }
4577 f.NewLine();
4578 PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
4579
4580 int j;
4581
4582 for (j = 0; j < 2; j++)
4583 {
4584 PrintRight(f, "Speed", kFieldSize_Speed + 1);
4585 PrintRight(f, "Usage", kFieldSize_Usage + 1);
4586 PrintRight(f, "R/U", kFieldSize_RU + 1);
4587 PrintRight(f, "Rating", kFieldSize_Rating + 1);
4588 if (showFreq)
4589 {
4590 PrintRight(f, "E/U", kFieldSize_EU + 1);
4591 PrintRight(f, "Effec", kFieldSize_Effec + 1);
4592 }
4593 if (!use2Columns)
4594 break;
4595 if (j == 0)
4596 f.Print(kSep);
4597 }
4598
4599 f.NewLine();
4600 PrintSpaces(f, callback.NameFieldSize);
4601
4602 for (j = 0; j < 2; j++)
4603 {
4604 PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
4605 PrintRight(f, "%", kFieldSize_Usage + 1);
4606 PrintRight(f, "MIPS", kFieldSize_RU + 1);
4607 PrintRight(f, "MIPS", kFieldSize_Rating + 1);
4608 if (showFreq)
4609 {
4610 PrintRight(f, "%", kFieldSize_EU + 1);
4611 PrintRight(f, "%", kFieldSize_Effec + 1);
4612 }
4613 if (!use2Columns)
4614 break;
4615 if (j == 0)
4616 f.Print(kSep);
4617 }
4618
4619 f.NewLine();
4620 f.NewLine();
4621
4622 if (specifiedFreq != 0)
4623 cpuFreq = specifiedFreq;
4624
4625 // bool showTotalSpeed = false;
4626
4627 if (totalBenchMode)
4628 {
4629 for (UInt32 i = 0; i < numIterations; i++)
4630 {
4631 if (i != 0)
4632 printCallback->NewLine();
4633
4634 const unsigned kNumCpuTests = 3;
4635 for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
4636 {
4637 PrintLeft(f, "CPU", kFieldSize_Name);
4638
4639 // UInt32 resVal;
4640
4641 CFreqBench fb;
4642 fb.complexInCommands = complexInCommands;
4643 fb.numThreads = numThreads;
4644 // showFreq;
4645 fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0);
4646 fb.specifiedFreq = specifiedFreq;
4647
4648 const HRESULT res = fb.FreqBench(printCallback
4649 #ifndef Z7_ST
4650 , &affinityMode
4651 #endif
4652 );
4653 RINOK(res)
4654
4655 cpuFreq = fb.CpuFreqRes;
4656 callback.NewLine();
4657
4658 if (specifiedFreq != 0)
4659 cpuFreq = specifiedFreq;
4660
4661 if (testTimeMs >= 1000)
4662 if (freqTest == kNumCpuTests - 1)
4663 {
4664 // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands);
4665 }
4666 }
4667 callback.NewLine();
4668
4669 // return S_OK; // change it
4670
4671 callback.SetFreq(true, cpuFreq);
4672
4673 if (!onlyHashBench)
4674 {
4675 size_t dataSize = (size_t)dict;
4676 if (use_fileData)
4677 {
4678 dataSize = fileDataBuffer.Size();
4679 if (dictIsDefined && dataSize > dict)
4680 dataSize = (size_t)dict;
4681 }
4682
4683 const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS
4684 method, complexInCommands,
4685 #ifndef Z7_ST
4686 numThreads,
4687 &affinityMode,
4688 #endif
4689 dictIsDefined || use_fileData, // forceUnpackSize
4690 dataSize,
4691 (const Byte *)fileDataBuffer,
4692 printCallback, &callback);
4693 RINOK(res)
4694 }
4695
4696 {
4697 size_t dataSize = (size_t)1 << kNumHashDictBits;
4698 if (dictIsDefined)
4699 {
4700 dataSize = (size_t)dict;
4701 if (dataSize != dict)
4702 return E_OUTOFMEMORY;
4703 }
4704 if (use_fileData)
4705 {
4706 dataSize = fileDataBuffer.Size();
4707 if (dictIsDefined && dataSize > dict)
4708 dataSize = (size_t)dict;
4709 }
4710
4711 const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS
4712 method, complexInCommands,
4713 numThreads,
4714 dataSize, (const Byte *)fileDataBuffer,
4715 printCallback, &callback,
4716 #ifndef Z7_ST
4717 &affinityMode,
4718 #endif
4719 &callback.EncodeRes, true, cpuFreq);
4720 RINOK(res)
4721 }
4722
4723 callback.NewLine();
4724 {
4725 PrintLeft(f, "CPU", kFieldSize_Name);
4726
4727 CFreqBench fb;
4728 fb.complexInCommands = complexInCommands;
4729 fb.numThreads = numThreads;
4730 // showFreq;
4731 fb.showFreq = (specifiedFreq != 0);
4732 fb.specifiedFreq = specifiedFreq;
4733
4734 const HRESULT res = fb.FreqBench(printCallback
4735 #ifndef Z7_ST
4736 , &affinityMode
4737 #endif
4738 );
4739 RINOK(res)
4740 callback.NewLine();
4741 }
4742 }
4743 }
4744 else
4745 {
4746 needSetComplexity = true;
4747 if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
4748 {
4749 unsigned i;
4750 for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++)
4751 {
4752 const CBenchMethod &h = g_Bench[i];
4753 AString benchMethod (h.Name);
4754 AString benchProps;
4755 const int propPos = benchMethod.Find(':');
4756 if (propPos >= 0)
4757 {
4758 benchProps = benchMethod.Ptr((unsigned)(propPos + 1));
4759 benchMethod.DeleteFrom((unsigned)propPos);
4760 }
4761
4762 if (AreSameMethodNames(benchMethod, methodName))
4763 {
4764 if (benchProps.IsEmpty()
4765 || (benchProps == "x5" && method.PropsString.IsEmpty())
4766 || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps))
4767 {
4768 callback.BenchProps.EncComplex = h.EncComplex;
4769 callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
4770 callback.BenchProps.DecComplexUnc = h.DecComplexUnc;
4771 needSetComplexity = false;
4772 break;
4773 }
4774 }
4775 }
4776 /*
4777 if (i == Z7_ARRAY_SIZE(g_Bench))
4778 return E_NOTIMPL;
4779 */
4780 }
4781 if (needSetComplexity)
4782 callback.BenchProps.SetLzmaCompexity();
4783
4784 if (startDicLog < kBenchMinDicLogSize)
4785 startDicLog = kBenchMinDicLogSize;
4786
4787 for (unsigned i = 0; i < numIterations; i++)
4788 {
4789 unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog;
4790 if (!multiDict)
4791 pow = 32;
4792 while (GetDictSizeFromLog(pow) > dict && pow > 0)
4793 pow--;
4794 for (; GetDictSizeFromLog(pow) <= dict; pow++)
4795 {
4796 Print_Pow(f, pow);
4797 callback.DictSize = (UInt64)1 << pow;
4798
4799 COneMethodInfo method2 = method;
4800
4801 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
4802 {
4803 // We add dictionary size property.
4804 // method2 can have two different dictionary size properties.
4805 // And last property is main.
4806 NCOM::CPropVariant propVariant = (UInt32)pow;
4807 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant))
4808 }
4809
4810 size_t uncompressedDataSize;
4811 if (use_fileData)
4812 {
4813 uncompressedDataSize = fileDataBuffer.Size();
4814 }
4815 else
4816 {
4817 uncompressedDataSize = (size_t)callback.DictSize;
4818 if (uncompressedDataSize != callback.DictSize)
4819 return E_OUTOFMEMORY;
4820 if (uncompressedDataSize >= (1 << 18))
4821 uncompressedDataSize += kAdditionalSize;
4822 }
4823
4824 const HRESULT res = MethodBench(
4825 EXTERNAL_CODECS_LOC_VARS
4826 complexInCommands,
4827 #ifndef Z7_ST
4828 true, numThreads,
4829 &affinityMode,
4830 #endif
4831 method2,
4832 uncompressedDataSize, (const Byte *)fileDataBuffer,
4833 kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
4834 f.NewLine();
4835 RINOK(res)
4836 if (!multiDict)
4837 break;
4838 }
4839 }
4840 }
4841
4842 PrintChars(f, '-', callback.NameFieldSize + fileldSize);
4843
4844 if (use2Columns)
4845 {
4846 f.Print(kSep);
4847 PrintChars(f, '-', fileldSize);
4848 }
4849
4850 f.NewLine();
4851
4852 if (use2Columns)
4853 {
4854 PrintLeft(f, "Avr:", callback.NameFieldSize);
4855 PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes);
4856 f.Print(kSep);
4857 PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes);
4858 f.NewLine();
4859 }
4860
4861 PrintLeft(f, "Tot:", callback.NameFieldSize);
4862 CTotalBenchRes midRes;
4863 midRes = callback.EncodeRes;
4864 midRes.Update_With_Res(callback.DecodeRes);
4865
4866 // midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
4867 PrintTotals(f, showFreq, cpuFreq, false, midRes);
4868 f.NewLine();
4869
4870 }
4871 return S_OK;
4872 }
4873