1// Bench.cpp 2 3#include "StdAfx.h" 4 5#include "../../../../C/CpuArch.h" 6 7// #include <stdio.h> 8 9#ifndef _WIN32 10 11#define USE_POSIX_TIME 12#define USE_POSIX_TIME2 13#endif // _WIN32 14 15#ifdef USE_POSIX_TIME 16#include <time.h> 17#include <unistd.h> 18#ifdef USE_POSIX_TIME2 19#include <sys/time.h> 20#include <sys/times.h> 21#endif 22#endif // USE_POSIX_TIME 23 24#ifdef _WIN32 25#define USE_ALLOCA 26#endif 27 28#ifdef USE_ALLOCA 29#ifdef _WIN32 30#include <malloc.h> 31#else 32#include <stdlib.h> 33#endif 34#endif 35 36#include "../../../../C/7zCrc.h" 37#include "../../../../C/RotateDefs.h" 38 39#ifndef Z7_ST 40#include "../../../Windows/Synchronization.h" 41#include "../../../Windows/Thread.h" 42#endif 43 44#include "../../../Windows/FileFind.h" 45#include "../../../Windows/FileIO.h" 46#include "../../../Windows/SystemInfo.h" 47 48#include "../../../Common/MyBuffer2.h" 49#include "../../../Common/IntToString.h" 50#include "../../../Common/StringConvert.h" 51#include "../../../Common/StringToInt.h" 52#include "../../../Common/Wildcard.h" 53 54#include "../../Common/MethodProps.h" 55#include "../../Common/StreamObjects.h" 56#include "../../Common/StreamUtils.h" 57 58#include "Bench.h" 59 60using namespace NWindows; 61 62#ifndef Z7_ST 63static const UInt32 k_LZMA = 0x030101; 64#endif 65 66static const UInt64 kComplexInCommands = (UInt64)1 << 67 #ifdef UNDER_CE 68 31; 69 #else 70 34; 71 #endif 72 73static const UInt32 kComplexInMs = 4000; 74 75static void SetComplexCommandsMs(UInt32 complexInMs, 76 bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands) 77{ 78 complexInCommands = kComplexInCommands; 79 const UInt64 kMinFreq = (UInt64)1000000 * 4; 80 const UInt64 kMaxFreq = (UInt64)1000000 * 20000; 81 if (cpuFreq < kMinFreq && !isSpecifiedFreq) 82 cpuFreq = kMinFreq; 83 if (cpuFreq < kMaxFreq || isSpecifiedFreq) 84 { 85 if (complexInMs != 0) 86 complexInCommands = complexInMs * cpuFreq / 1000; 87 else 88 complexInCommands = cpuFreq >> 2; 89 } 90} 91 92// const UInt64 kBenchmarkUsageMult = 1000000; // for debug 93static const unsigned kBenchmarkUsageMultBits = 16; 94static const UInt64 kBenchmarkUsageMult = 1 << kBenchmarkUsageMultBits; 95 96UInt64 Benchmark_GetUsage_Percents(UInt64 usage) 97{ 98 return (100 * usage + kBenchmarkUsageMult / 2) / kBenchmarkUsageMult; 99} 100 101static const unsigned kNumHashDictBits = 17; 102static const UInt32 kFilterUnpackSize = (47 << 10); // + 5; // for test 103 104static const unsigned kOldLzmaDictBits = 32; 105 106// static const size_t kAdditionalSize = (size_t)1 << 32; // for debug 107static const size_t kAdditionalSize = (size_t)1 << 16; 108static const UInt32 kCompressedAdditionalSize = (1 << 10); 109 110static const UInt32 kMaxMethodPropSize = (1 << 6); 111 112 113#define ALLOC_WITH_HRESULT(_buffer_, _size_) \ 114 { (_buffer_)->Alloc(_size_); \ 115 if (_size_ && !(_buffer_)->IsAllocated()) return E_OUTOFMEMORY; } 116 117 118class CBaseRandomGenerator 119{ 120 UInt32 A1; 121 UInt32 A2; 122 UInt32 Salt; 123public: 124 CBaseRandomGenerator(UInt32 salt = 0): Salt(salt) { Init(); } 125 void Init() { A1 = 362436069; A2 = 521288629;} 126 Z7_FORCE_INLINE 127 UInt32 GetRnd() 128 { 129 return Salt ^ 130 ( 131 ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) + 132 ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) ) 133 ); 134 } 135}; 136 137 138Z7_NO_INLINE 139static void RandGen(Byte *buf, size_t size) 140{ 141 CBaseRandomGenerator RG; 142 const size_t size4 = size & ~((size_t)3); 143 size_t i; 144 for (i = 0; i < size4; i += 4) 145 { 146 const UInt32 v = RG.GetRnd(); 147 SetUi32(buf + i, v) 148 } 149 UInt32 v = RG.GetRnd(); 150 for (; i < size; i++) 151 { 152 buf[i] = (Byte)v; 153 v >>= 8; 154 } 155} 156 157 158class CBenchRandomGenerator: public CMidAlignedBuffer 159{ 160 static UInt32 GetVal(UInt32 &res, unsigned numBits) 161 { 162 UInt32 val = res & (((UInt32)1 << numBits) - 1); 163 res >>= numBits; 164 return val; 165 } 166 167 static UInt32 GetLen(UInt32 &r) 168 { 169 UInt32 len = GetVal(r, 2); 170 return GetVal(r, 1 + len); 171 } 172 173public: 174 175 void GenerateSimpleRandom(UInt32 salt) 176 { 177 CBaseRandomGenerator rg(salt); 178 const size_t bufSize = Size(); 179 Byte *buf = (Byte *)*this; 180 for (size_t i = 0; i < bufSize; i++) 181 buf[i] = (Byte)rg.GetRnd(); 182 } 183 184 void GenerateLz(unsigned dictBits, UInt32 salt) 185 { 186 CBaseRandomGenerator rg(salt); 187 size_t pos = 0; 188 size_t rep0 = 1; 189 const size_t bufSize = Size(); 190 Byte *buf = (Byte *)*this; 191 unsigned posBits = 1; 192 193 // printf("\n dictBits = %d\n", (UInt32)dictBits); 194 // printf("\n bufSize = 0x%p\n", (const void *)bufSize); 195 196 while (pos < bufSize) 197 { 198 /* 199 if (pos >= ((UInt32)1 << 31)) 200 printf(" %x\n", pos); 201 */ 202 UInt32 r = rg.GetRnd(); 203 if (GetVal(r, 1) == 0 || pos < 1024) 204 buf[pos++] = (Byte)(r & 0xFF); 205 else 206 { 207 UInt32 len; 208 len = 1 + GetLen(r); 209 210 if (GetVal(r, 3) != 0) 211 { 212 len += GetLen(r); 213 214 while (((size_t)1 << posBits) < pos) 215 posBits++; 216 217 unsigned numBitsMax = dictBits; 218 if (numBitsMax > posBits) 219 numBitsMax = posBits; 220 221 const unsigned kAddBits = 6; 222 unsigned numLogBits = 5; 223 if (numBitsMax <= (1 << 4) - 1 + kAddBits) 224 numLogBits = 4; 225 226 for (;;) 227 { 228 const UInt32 ppp = GetVal(r, numLogBits) + kAddBits; 229 r = rg.GetRnd(); 230 if (ppp > numBitsMax) 231 continue; 232 // rep0 = GetVal(r, ppp); 233 rep0 = r & (((size_t)1 << ppp) - 1); 234 if (rep0 < pos) 235 break; 236 r = rg.GetRnd(); 237 } 238 rep0++; 239 } 240 241 // len *= 300; // for debug 242 { 243 const size_t rem = bufSize - pos; 244 if (len > rem) 245 len = (UInt32)rem; 246 } 247 Byte *dest = buf + pos; 248 const Byte *src = dest - rep0; 249 pos += len; 250 for (UInt32 i = 0; i < len; i++) 251 *dest++ = *src++; 252 } 253 } 254 // printf("\n CRC = %x\n", CrcCalc(buf, bufSize)); 255 } 256}; 257 258 259Z7_CLASS_IMP_NOQIB_1( 260 CBenchmarkInStream 261 , ISequentialInStream 262) 263 const Byte *Data; 264 size_t Pos; 265 size_t Size; 266public: 267 void Init(const Byte *data, size_t size) 268 { 269 Data = data; 270 Size = size; 271 Pos = 0; 272 } 273 bool WasFinished() const { return Pos == Size; } 274}; 275 276Z7_COM7F_IMF(CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)) 277{ 278 const UInt32 kMaxBlockSize = (1 << 20); 279 if (size > kMaxBlockSize) 280 size = kMaxBlockSize; 281 const size_t remain = Size - Pos; 282 if (size > remain) 283 size = (UInt32)remain; 284 285 if (size != 0) 286 memcpy(data, Data + Pos, size); 287 288 Pos += size; 289 if (processedSize) 290 *processedSize = size; 291 return S_OK; 292} 293 294 295class CBenchmarkOutStream Z7_final: 296 public ISequentialOutStream, 297 public CMyUnknownImp, 298 public CMidAlignedBuffer 299{ 300 Z7_COM_UNKNOWN_IMP_0 301 Z7_IFACE_COM7_IMP(ISequentialOutStream) 302 // bool _overflow; 303public: 304 size_t Pos; 305 bool RealCopy; 306 bool CalcCrc; 307 UInt32 Crc; 308 309 // CBenchmarkOutStream(): _overflow(false) {} 310 void Init(bool realCopy, bool calcCrc) 311 { 312 Crc = CRC_INIT_VAL; 313 RealCopy = realCopy; 314 CalcCrc = calcCrc; 315 // _overflow = false; 316 Pos = 0; 317 } 318 319 void InitCrc() 320 { 321 Crc = CRC_INIT_VAL; 322 } 323 324 void Calc(const void *data, size_t size) 325 { 326 Crc = CrcUpdate(Crc, data, size); 327 } 328 329 size_t GetPos() const { return Pos; } 330 331 // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); } 332}; 333 334Z7_COM7F_IMF(CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)) 335{ 336 size_t curSize = Size() - Pos; 337 if (curSize > size) 338 curSize = size; 339 if (curSize != 0) 340 { 341 if (RealCopy) 342 memcpy(((Byte *)*this) + Pos, data, curSize); 343 if (CalcCrc) 344 Calc(data, curSize); 345 Pos += curSize; 346 } 347 if (processedSize) 348 *processedSize = (UInt32)curSize; 349 if (curSize != size) 350 { 351 // _overflow = true; 352 return E_FAIL; 353 } 354 return S_OK; 355} 356 357 358Z7_CLASS_IMP_NOQIB_1( 359 CCrcOutStream 360 , ISequentialOutStream 361) 362public: 363 bool CalcCrc; 364 UInt32 Crc; 365 UInt64 Pos; 366 367 CCrcOutStream(): CalcCrc(true) {} 368 void Init() { Crc = CRC_INIT_VAL; Pos = 0; } 369 void Calc(const void *data, size_t size) 370 { 371 Crc = CrcUpdate(Crc, data, size); 372 } 373}; 374 375Z7_COM7F_IMF(CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)) 376{ 377 if (CalcCrc) 378 Calc(data, size); 379 Pos += size; 380 if (processedSize) 381 *processedSize = size; 382 return S_OK; 383} 384 385// #include "../../../../C/My_sys_time.h" 386 387static UInt64 GetTimeCount() 388{ 389 #ifdef USE_POSIX_TIME 390 #ifdef USE_POSIX_TIME2 391 timeval v; 392 if (gettimeofday(&v, NULL) == 0) 393 return (UInt64)(v.tv_sec) * 1000000 + (UInt64)v.tv_usec; 394 return (UInt64)time(NULL) * 1000000; 395 #else 396 return time(NULL); 397 #endif 398 #else 399 LARGE_INTEGER value; 400 if (::QueryPerformanceCounter(&value)) 401 return (UInt64)value.QuadPart; 402 return GetTickCount(); 403 #endif 404} 405 406static UInt64 GetFreq() 407{ 408 #ifdef USE_POSIX_TIME 409 #ifdef USE_POSIX_TIME2 410 return 1000000; 411 #else 412 return 1; 413 #endif 414 #else 415 LARGE_INTEGER value; 416 if (::QueryPerformanceFrequency(&value)) 417 return (UInt64)value.QuadPart; 418 return 1000; 419 #endif 420} 421 422 423#ifdef USE_POSIX_TIME 424 425struct CUserTime 426{ 427 UInt64 Sum; 428 clock_t Prev; 429 430 void Init() 431 { 432 // Prev = clock(); 433 Sum = 0; 434 Prev = 0; 435 Update(); 436 Sum = 0; 437 } 438 439 void Update() 440 { 441 tms t; 442 /* clock_t res = */ times(&t); 443 clock_t newVal = t.tms_utime + t.tms_stime; 444 Sum += (UInt64)(newVal - Prev); 445 Prev = newVal; 446 447 /* 448 clock_t v = clock(); 449 if (v != -1) 450 { 451 Sum += v - Prev; 452 Prev = v; 453 } 454 */ 455 } 456 UInt64 GetUserTime() 457 { 458 Update(); 459 return Sum; 460 } 461}; 462 463#else 464 465 466struct CUserTime 467{ 468 bool UseTick; 469 DWORD Prev_Tick; 470 UInt64 Prev; 471 UInt64 Sum; 472 473 void Init() 474 { 475 UseTick = false; 476 Prev_Tick = 0; 477 Prev = 0; 478 Sum = 0; 479 Update(); 480 Sum = 0; 481 } 482 UInt64 GetUserTime() 483 { 484 Update(); 485 return Sum; 486 } 487 void Update(); 488}; 489 490static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; } 491 492void CUserTime::Update() 493{ 494 DWORD new_Tick = GetTickCount(); 495 FILETIME creationTime, exitTime, kernelTime, userTime; 496 if (!UseTick && 497 #ifdef UNDER_CE 498 ::GetThreadTimes(::GetCurrentThread() 499 #else 500 ::GetProcessTimes(::GetCurrentProcess() 501 #endif 502 , &creationTime, &exitTime, &kernelTime, &userTime)) 503 { 504 UInt64 newVal = GetTime64(userTime) + GetTime64(kernelTime); 505 Sum += newVal - Prev; 506 Prev = newVal; 507 } 508 else 509 { 510 UseTick = true; 511 Sum += (UInt64)(new_Tick - (DWORD)Prev_Tick) * 10000; 512 } 513 Prev_Tick = new_Tick; 514} 515 516 517#endif 518 519static UInt64 GetUserFreq() 520{ 521 #ifdef USE_POSIX_TIME 522 // return CLOCKS_PER_SEC; 523 return (UInt64)sysconf(_SC_CLK_TCK); 524 #else 525 return 10000000; 526 #endif 527} 528 529class CBenchProgressStatus Z7_final 530{ 531 #ifndef Z7_ST 532 NSynchronization::CCriticalSection CS; 533 #endif 534public: 535 HRESULT Res; 536 bool EncodeMode; 537 void SetResult(HRESULT res) 538 { 539 #ifndef Z7_ST 540 NSynchronization::CCriticalSectionLock lock(CS); 541 #endif 542 Res = res; 543 } 544 HRESULT GetResult() 545 { 546 #ifndef Z7_ST 547 NSynchronization::CCriticalSectionLock lock(CS); 548 #endif 549 return Res; 550 } 551}; 552 553struct CBenchInfoCalc 554{ 555 CBenchInfo BenchInfo; 556 CUserTime UserTime; 557 558 void SetStartTime(); 559 void SetFinishTime(CBenchInfo &dest); 560}; 561 562void CBenchInfoCalc::SetStartTime() 563{ 564 BenchInfo.GlobalFreq = GetFreq(); 565 BenchInfo.UserFreq = GetUserFreq(); 566 BenchInfo.GlobalTime = ::GetTimeCount(); 567 BenchInfo.UserTime = 0; 568 UserTime.Init(); 569} 570 571void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest) 572{ 573 dest = BenchInfo; 574 dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime; 575 dest.UserTime = UserTime.GetUserTime(); 576} 577 578class CBenchProgressInfo Z7_final: 579 public ICompressProgressInfo, 580 public CMyUnknownImp, 581 public CBenchInfoCalc 582{ 583 Z7_COM_UNKNOWN_IMP_0 584 Z7_IFACE_COM7_IMP(ICompressProgressInfo) 585public: 586 CBenchProgressStatus *Status; 587 IBenchCallback *Callback; 588 589 CBenchProgressInfo(): Callback(NULL) {} 590}; 591 592 593Z7_COM7F_IMF(CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)) 594{ 595 HRESULT res = Status->GetResult(); 596 if (res != S_OK) 597 return res; 598 if (!Callback) 599 return res; 600 601 /* 602 static UInt64 inSizePrev = 0; 603 static UInt64 outSizePrev = 0; 604 UInt64 delta1 = 0, delta2 = 0, val1 = 0, val2 = 0; 605 if (inSize) { val1 = *inSize; delta1 = val1 - inSizePrev; inSizePrev = val1; } 606 if (outSize) { val2 = *outSize; delta2 = val2 - outSizePrev; outSizePrev = val2; } 607 UInt64 percents = delta2 * 1000; 608 if (delta1 != 0) 609 percents /= delta1; 610 printf("=== %7d %7d %7d %7d ratio = %4d\n", 611 (unsigned)(val1 >> 10), (unsigned)(delta1 >> 10), 612 (unsigned)(val2 >> 10), (unsigned)(delta2 >> 10), 613 (unsigned)percents); 614 */ 615 616 CBenchInfo info; 617 SetFinishTime(info); 618 if (Status->EncodeMode) 619 { 620 info.UnpackSize = BenchInfo.UnpackSize + *inSize; 621 info.PackSize = BenchInfo.PackSize + *outSize; 622 res = Callback->SetEncodeResult(info, false); 623 } 624 else 625 { 626 info.PackSize = BenchInfo.PackSize + *inSize; 627 info.UnpackSize = BenchInfo.UnpackSize + *outSize; 628 res = Callback->SetDecodeResult(info, false); 629 } 630 if (res != S_OK) 631 Status->SetResult(res); 632 return res; 633} 634 635static const unsigned kSubBits = 8; 636 637static unsigned GetLogSize(UInt64 size) 638{ 639 unsigned i = 0; 640 for (;;) 641 { 642 i++; size >>= 1; if (size == 0) break; 643 } 644 return i; 645} 646 647 648static UInt32 GetLogSize_Sub(UInt64 size) 649{ 650 if (size <= 1) 651 return 0; 652 const unsigned i = GetLogSize(size) - 1; 653 UInt32 v; 654 if (i <= kSubBits) 655 v = (UInt32)(size) << (kSubBits - i); 656 else 657 v = (UInt32)(size >> (i - kSubBits)); 658 return ((UInt32)i << kSubBits) + (v & (((UInt32)1 << kSubBits) - 1)); 659} 660 661 662static UInt64 Get_UInt64_from_double(double v) 663{ 664 const UInt64 kMaxVal = (UInt64)1 << 62; 665 if (v > (double)(Int64)kMaxVal) 666 return kMaxVal; 667 return (UInt64)v; 668} 669 670static UInt64 MyMultDiv64(UInt64 m1, UInt64 m2, UInt64 d) 671{ 672 if (d == 0) 673 d = 1; 674 const double v = 675 (double)(Int64)m1 * 676 (double)(Int64)m2 / 677 (double)(Int64)d; 678 return Get_UInt64_from_double(v); 679 /* 680 unsigned n1 = GetLogSize(m1); 681 unsigned n2 = GetLogSize(m2); 682 while (n1 + n2 > 64) 683 { 684 if (n1 >= n2) 685 { 686 m1 >>= 1; 687 n1--; 688 } 689 else 690 { 691 m2 >>= 1; 692 n2--; 693 } 694 d >>= 1; 695 } 696 697 if (d == 0) 698 d = 1; 699 return m1 * m2 / d; 700 */ 701} 702 703 704UInt64 CBenchInfo::GetUsage() const 705{ 706 UInt64 userTime = UserTime; 707 UInt64 userFreq = UserFreq; 708 UInt64 globalTime = GlobalTime; 709 UInt64 globalFreq = GlobalFreq; 710 711 if (userFreq == 0) 712 userFreq = 1; 713 if (globalTime == 0) 714 globalTime = 1; 715 716 const double v = 717 ((double)(Int64)userTime / (double)(Int64)userFreq) 718 * ((double)(Int64)globalFreq / (double)(Int64)globalTime) 719 * (double)(Int64)kBenchmarkUsageMult; 720 return Get_UInt64_from_double(v); 721 /* 722 return MyMultDiv64( 723 MyMultDiv64(kBenchmarkUsageMult, userTime, userFreq), 724 globalFreq, globalTime); 725 */ 726} 727 728 729UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const 730{ 731 if (UserTime == 0) 732 { 733 return 0; 734 // userTime = 1; 735 } 736 UInt64 globalFreq = GlobalFreq; 737 if (globalFreq == 0) 738 globalFreq = 1; 739 740 const double v = 741 ((double)(Int64)GlobalTime / (double)(Int64)globalFreq) 742 * ((double)(Int64)UserFreq / (double)(Int64)UserTime) 743 * (double)(Int64)rating; 744 return Get_UInt64_from_double(v); 745 /* 746 return MyMultDiv64( 747 MyMultDiv64(rating, UserFreq, UserTime), 748 GlobalTime, globalFreq); 749 */ 750} 751 752 753UInt64 CBenchInfo::GetSpeed(UInt64 numUnits) const 754{ 755 return MyMultDiv64(numUnits, GlobalFreq, GlobalTime); 756} 757 758static UInt64 GetNumCommands_from_Size_and_Complexity(UInt64 size, Int32 complexity) 759{ 760 return complexity >= 0 ? 761 size * (UInt32)complexity : 762 size / (UInt32)(-complexity); 763} 764 765struct CBenchProps 766{ 767 bool LzmaRatingMode; 768 769 Int32 EncComplex; 770 Int32 DecComplexCompr; 771 Int32 DecComplexUnc; 772 773 unsigned KeySize; 774 775 CBenchProps(): 776 LzmaRatingMode(false), 777 KeySize(0) 778 {} 779 780 void SetLzmaCompexity(); 781 782 UInt64 GetNumCommands_Enc(UInt64 unpackSize) const 783 { 784 const UInt32 kMinSize = 100; 785 if (unpackSize < kMinSize) 786 unpackSize = kMinSize; 787 return GetNumCommands_from_Size_and_Complexity(unpackSize, EncComplex); 788 } 789 790 UInt64 GetNumCommands_Dec(UInt64 packSize, UInt64 unpackSize) const 791 { 792 return 793 GetNumCommands_from_Size_and_Complexity(packSize, DecComplexCompr) + 794 GetNumCommands_from_Size_and_Complexity(unpackSize, DecComplexUnc); 795 } 796 797 UInt64 GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const; 798 UInt64 GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const; 799}; 800 801void CBenchProps::SetLzmaCompexity() 802{ 803 EncComplex = 1200; 804 DecComplexUnc = 4; 805 DecComplexCompr = 190; 806 LzmaRatingMode = true; 807} 808 809UInt64 CBenchProps::GetRating_Enc(UInt64 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size) const 810{ 811 if (dictSize < (1 << kBenchMinDicLogSize)) 812 dictSize = (1 << kBenchMinDicLogSize); 813 Int32 encComplex = EncComplex; 814 if (LzmaRatingMode) 815 { 816 /* 817 for (UInt64 uu = 0; uu < (UInt64)0xf << 60;) 818 { 819 unsigned rr = GetLogSize_Sub(uu); 820 printf("\n%16I64x , log = %4x", uu, rr); 821 uu += 1; 822 uu += uu / 50; 823 } 824 */ 825 // throw 1; 826 const UInt32 t = GetLogSize_Sub(dictSize) - (kBenchMinDicLogSize << kSubBits); 827 encComplex = 870 + ((t * t * 5) >> (2 * kSubBits)); 828 } 829 const UInt64 numCommands = GetNumCommands_from_Size_and_Complexity(size, encComplex); 830 return MyMultDiv64(numCommands, freq, elapsedTime); 831} 832 833UInt64 CBenchProps::GetRating_Dec(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations) const 834{ 835 const UInt64 numCommands = GetNumCommands_Dec(inSize, outSize) * numIterations; 836 return MyMultDiv64(numCommands, freq, elapsedTime); 837} 838 839 840 841UInt64 CBenchInfo::GetRating_LzmaEnc(UInt64 dictSize) const 842{ 843 CBenchProps props; 844 props.SetLzmaCompexity(); 845 return props.GetRating_Enc(dictSize, GlobalTime, GlobalFreq, UnpackSize * NumIterations); 846} 847 848UInt64 CBenchInfo::GetRating_LzmaDec() const 849{ 850 CBenchProps props; 851 props.SetLzmaCompexity(); 852 return props.GetRating_Dec(GlobalTime, GlobalFreq, UnpackSize, PackSize, NumIterations); 853} 854 855 856#ifndef Z7_ST 857 858#define NUM_CPU_LEVELS_MAX 3 859 860struct CAffinityMode 861{ 862 unsigned NumBundleThreads; 863 unsigned NumLevels; 864 unsigned NumCoreThreads; 865 unsigned NumCores; 866 // unsigned DivideNum; 867 UInt32 Sizes[NUM_CPU_LEVELS_MAX]; 868 869 void SetLevels(unsigned numCores, unsigned numCoreThreads); 870 DWORD_PTR GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const; 871 bool NeedAffinity() const { return NumBundleThreads != 0; } 872 873 WRes CreateThread_WithAffinity(NWindows::CThread &thread, THREAD_FUNC_TYPE startAddress, LPVOID parameter, UInt32 bundleIndex) const 874 { 875 if (NeedAffinity()) 876 { 877 CCpuSet cpuSet; 878 GetAffinityMask(bundleIndex, &cpuSet); 879 return thread.Create_With_CpuSet(startAddress, parameter, &cpuSet); 880 } 881 return thread.Create(startAddress, parameter); 882 } 883 884 CAffinityMode(): 885 NumBundleThreads(0), 886 NumLevels(0), 887 NumCoreThreads(1) 888 // DivideNum(1) 889 {} 890}; 891 892void CAffinityMode::SetLevels(unsigned numCores, unsigned numCoreThreads) 893{ 894 NumCores = numCores; 895 NumCoreThreads = numCoreThreads; 896 NumLevels = 0; 897 if (numCoreThreads == 0 || numCores == 0 || numCores % numCoreThreads != 0) 898 return; 899 UInt32 c = numCores / numCoreThreads; 900 UInt32 c2 = 1; 901 while ((c & 1) == 0) 902 { 903 c >>= 1; 904 c2 <<= 1; 905 } 906 if (c2 != 1) 907 Sizes[NumLevels++] = c2; 908 if (c != 1) 909 Sizes[NumLevels++] = c; 910 if (numCoreThreads != 1) 911 Sizes[NumLevels++] = numCoreThreads; 912 if (NumLevels == 0) 913 Sizes[NumLevels++] = 1; 914 915 /* 916 printf("\n Cores:"); 917 for (unsigned i = 0; i < NumLevels; i++) 918 { 919 printf(" %d", Sizes[i]); 920 } 921 printf("\n"); 922 */ 923} 924 925 926DWORD_PTR CAffinityMode::GetAffinityMask(UInt32 bundleIndex, CCpuSet *cpuSet) const 927{ 928 CpuSet_Zero(cpuSet); 929 930 if (NumLevels == 0) 931 return 0; 932 933 // printf("\n%2d", bundleIndex); 934 935 /* 936 UInt32 low = 0; 937 if (DivideNum != 1) 938 { 939 low = bundleIndex % DivideNum; 940 bundleIndex /= DivideNum; 941 } 942 */ 943 944 UInt32 numGroups = NumCores / NumBundleThreads; 945 UInt32 m = bundleIndex % numGroups; 946 UInt32 v = 0; 947 for (unsigned i = 0; i < NumLevels; i++) 948 { 949 UInt32 size = Sizes[i]; 950 while ((size & 1) == 0) 951 { 952 v *= 2; 953 v |= (m & 1); 954 m >>= 1; 955 size >>= 1; 956 } 957 v *= size; 958 v += m % size; 959 m /= size; 960 } 961 962 // UInt32 nb = NumBundleThreads / DivideNum; 963 UInt32 nb = NumBundleThreads; 964 965 DWORD_PTR mask = ((DWORD_PTR)1 << nb) - 1; 966 // v += low; 967 mask <<= v; 968 969 // printf(" %2d %8x \n ", v, (unsigned)mask); 970 #ifdef _WIN32 971 *cpuSet = mask; 972 #else 973 { 974 for (unsigned k = 0; k < nb; k++) 975 CpuSet_Set(cpuSet, v + k); 976 } 977 #endif 978 979 return mask; 980} 981 982 983struct CBenchSyncCommon 984{ 985 bool ExitMode; 986 NSynchronization::CManualResetEvent StartEvent; 987 988 CBenchSyncCommon(): ExitMode(false) {} 989}; 990 991#endif 992 993 994 995enum E_CheckCrcMode 996{ 997 k_CheckCrcMode_Never = 0, 998 k_CheckCrcMode_Always = 1, 999 k_CheckCrcMode_FirstPass = 2 1000}; 1001 1002class CEncoderInfo; 1003 1004class CEncoderInfo Z7_final 1005{ 1006 Z7_CLASS_NO_COPY(CEncoderInfo) 1007 1008public: 1009 1010 #ifndef Z7_ST 1011 NWindows::CThread thread[2]; 1012 NSynchronization::CManualResetEvent ReadyEvent; 1013 UInt32 NumDecoderSubThreads; 1014 CBenchSyncCommon *Common; 1015 UInt32 EncoderIndex; 1016 UInt32 NumEncoderInternalThreads; 1017 CAffinityMode AffinityMode; 1018 bool IsGlobalMtMode; // if more than one benchmark encoder threads 1019 #endif 1020 1021 CMyComPtr<ICompressCoder> _encoder; 1022 CMyComPtr<ICompressFilter> _encoderFilter; 1023 CBenchProgressInfo *progressInfoSpec[2]; 1024 CMyComPtr<ICompressProgressInfo> progressInfo[2]; 1025 UInt64 NumIterations; 1026 1027 UInt32 Salt; 1028 1029 #ifdef USE_ALLOCA 1030 size_t AllocaSize; 1031 #endif 1032 1033 unsigned KeySize; 1034 Byte _key[32]; 1035 Byte _iv[16]; 1036 1037 HRESULT Set_Key_and_IV(ICryptoProperties *cp) 1038 { 1039 RINOK(cp->SetKey(_key, KeySize)) 1040 return cp->SetInitVector(_iv, sizeof(_iv)); 1041 } 1042 1043 Byte _psw[16]; 1044 1045 bool CheckCrc_Enc; /* = 1, if we want to check packed data crcs after each pass 1046 used for filter and usual coders */ 1047 bool UseRealData_Enc; /* = 1, if we want to use only original data for each pass 1048 used only for filter */ 1049 E_CheckCrcMode CheckCrcMode_Dec; 1050 1051 struct CDecoderInfo 1052 { 1053 CEncoderInfo *Encoder; 1054 UInt32 DecoderIndex; 1055 bool CallbackMode; 1056 1057 #ifdef USE_ALLOCA 1058 size_t AllocaSize; 1059 #endif 1060 }; 1061 CDecoderInfo decodersInfo[2]; 1062 1063 CMyComPtr<ICompressCoder> _decoders[2]; 1064 CMyComPtr<ICompressFilter> _decoderFilter; 1065 1066 HRESULT Results[2]; 1067 CBenchmarkOutStream *outStreamSpec; 1068 CMyComPtr<ISequentialOutStream> outStream; 1069 IBenchCallback *callback; 1070 IBenchPrintCallback *printCallback; 1071 UInt32 crc; 1072 size_t kBufferSize; 1073 size_t compressedSize; 1074 const Byte *uncompressedDataPtr; 1075 1076 const Byte *fileData; 1077 CBenchRandomGenerator rg; 1078 1079 CMidAlignedBuffer rgCopy; // it must be 16-byte aligned !!! 1080 1081 // CBenchmarkOutStream *propStreamSpec; 1082 Byte propsData[kMaxMethodPropSize]; 1083 CBufPtrSeqOutStream *propStreamSpec; 1084 CMyComPtr<ISequentialOutStream> propStream; 1085 1086 unsigned generateDictBits; 1087 COneMethodInfo _method; 1088 1089 // for decode 1090 size_t _uncompressedDataSize; 1091 1092 HRESULT Generate(); 1093 HRESULT Encode(); 1094 HRESULT Decode(UInt32 decoderIndex); 1095 1096 CEncoderInfo(): 1097 #ifndef Z7_ST 1098 Common(NULL), 1099 IsGlobalMtMode(true), 1100 #endif 1101 Salt(0), 1102 KeySize(0), 1103 CheckCrc_Enc(true), 1104 UseRealData_Enc(true), 1105 CheckCrcMode_Dec(k_CheckCrcMode_Always), 1106 outStreamSpec(NULL), 1107 callback(NULL), 1108 printCallback(NULL), 1109 fileData(NULL), 1110 propStreamSpec(NULL) 1111 {} 1112 1113 #ifndef Z7_ST 1114 1115 static THREAD_FUNC_DECL EncodeThreadFunction(void *param) 1116 { 1117 HRESULT res; 1118 CEncoderInfo *encoder = (CEncoderInfo *)param; 1119 try 1120 { 1121 #ifdef USE_ALLOCA 1122 alloca(encoder->AllocaSize); 1123 #endif 1124 1125 res = encoder->Encode(); 1126 } 1127 catch(...) 1128 { 1129 res = E_FAIL; 1130 } 1131 encoder->Results[0] = res; 1132 if (res != S_OK) 1133 encoder->progressInfoSpec[0]->Status->SetResult(res); 1134 encoder->ReadyEvent.Set(); 1135 return THREAD_FUNC_RET_ZERO; 1136 } 1137 1138 static THREAD_FUNC_DECL DecodeThreadFunction(void *param) 1139 { 1140 CDecoderInfo *decoder = (CDecoderInfo *)param; 1141 1142 #ifdef USE_ALLOCA 1143 alloca(decoder->AllocaSize); 1144 #endif 1145 1146 CEncoderInfo *encoder = decoder->Encoder; 1147 encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex); 1148 return THREAD_FUNC_RET_ZERO; 1149 } 1150 1151 HRESULT CreateEncoderThread() 1152 { 1153 WRes res = 0; 1154 if (!ReadyEvent.IsCreated()) 1155 res = ReadyEvent.Create(); 1156 if (res == 0) 1157 res = AffinityMode.CreateThread_WithAffinity(thread[0], EncodeThreadFunction, this, 1158 EncoderIndex); 1159 return HRESULT_FROM_WIN32(res); 1160 } 1161 1162 HRESULT CreateDecoderThread(unsigned index, bool callbackMode 1163 #ifdef USE_ALLOCA 1164 , size_t allocaSize 1165 #endif 1166 ) 1167 { 1168 CDecoderInfo &decoder = decodersInfo[index]; 1169 decoder.DecoderIndex = index; 1170 decoder.Encoder = this; 1171 1172 #ifdef USE_ALLOCA 1173 decoder.AllocaSize = allocaSize; 1174 #endif 1175 1176 decoder.CallbackMode = callbackMode; 1177 1178 WRes res = AffinityMode.CreateThread_WithAffinity(thread[index], DecodeThreadFunction, &decoder, 1179 // EncoderIndex * NumEncoderInternalThreads + index 1180 EncoderIndex 1181 ); 1182 1183 return HRESULT_FROM_WIN32(res); 1184 } 1185 1186 #endif 1187}; 1188 1189 1190 1191 1192static size_t GetBenchCompressedSize(size_t bufferSize) 1193{ 1194 return kCompressedAdditionalSize + bufferSize + bufferSize / 16; 1195 // kBufferSize / 2; 1196} 1197 1198 1199HRESULT CEncoderInfo::Generate() 1200{ 1201 const COneMethodInfo &method = _method; 1202 1203 // we need extra space, if input data is already compressed 1204 const size_t kCompressedBufferSize = _encoderFilter ? 1205 kBufferSize : 1206 GetBenchCompressedSize(kBufferSize); 1207 1208 if (kCompressedBufferSize < kBufferSize) 1209 return E_FAIL; 1210 1211 uncompressedDataPtr = fileData; 1212 if (fileData) 1213 { 1214 #if !defined(Z7_ST) 1215 if (IsGlobalMtMode) 1216 { 1217 /* we copy the data to local buffer of thread to eliminate 1218 using of shared buffer by different threads */ 1219 ALLOC_WITH_HRESULT(&rg, kBufferSize) 1220 memcpy((Byte *)rg, fileData, kBufferSize); 1221 uncompressedDataPtr = (const Byte *)rg; 1222 } 1223 #endif 1224 } 1225 else 1226 { 1227 ALLOC_WITH_HRESULT(&rg, kBufferSize) 1228 // DWORD ttt = GetTickCount(); 1229 if (generateDictBits == 0) 1230 rg.GenerateSimpleRandom(Salt); 1231 else 1232 { 1233 if (generateDictBits >= sizeof(size_t) * 8 1234 && kBufferSize > ((size_t)1 << (sizeof(size_t) * 8 - 1))) 1235 return E_INVALIDARG; 1236 rg.GenerateLz(generateDictBits, Salt); 1237 // return E_ABORT; // for debug 1238 } 1239 // printf("\n%d\n ", GetTickCount() - ttt); 1240 1241 crc = CrcCalc((const Byte *)rg, rg.Size()); 1242 uncompressedDataPtr = (const Byte *)rg; 1243 } 1244 1245 if (!outStream) 1246 { 1247 outStreamSpec = new CBenchmarkOutStream; 1248 outStream = outStreamSpec; 1249 } 1250 1251 ALLOC_WITH_HRESULT(outStreamSpec, kCompressedBufferSize) 1252 1253 if (_encoderFilter) 1254 { 1255 /* we try to reduce the number of memcpy() in main encoding loop. 1256 so we copy data to temp buffers here */ 1257 ALLOC_WITH_HRESULT(&rgCopy, kBufferSize) 1258 memcpy((Byte *)*outStreamSpec, uncompressedDataPtr, kBufferSize); 1259 memcpy((Byte *)rgCopy, uncompressedDataPtr, kBufferSize); 1260 } 1261 1262 if (!propStream) 1263 { 1264 propStreamSpec = new CBufPtrSeqOutStream; // CBenchmarkOutStream; 1265 propStream = propStreamSpec; 1266 } 1267 // ALLOC_WITH_HRESULT_2(propStreamSpec, kMaxMethodPropSize); 1268 // propStreamSpec->Init(true, false); 1269 propStreamSpec->Init(propsData, sizeof(propsData)); 1270 1271 1272 CMyComPtr<IUnknown> coder; 1273 if (_encoderFilter) 1274 coder = _encoderFilter; 1275 else 1276 coder = _encoder; 1277 { 1278 CMyComPtr<ICompressSetCoderProperties> scp; 1279 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp); 1280 if (scp) 1281 { 1282 const UInt64 reduceSize = kBufferSize; 1283 1284 /* in posix new thread uses same affinity as parent thread, 1285 so we don't need to send affinity to coder in posix */ 1286 UInt64 affMask; 1287 #if !defined(Z7_ST) && defined(_WIN32) 1288 { 1289 CCpuSet cpuSet; 1290 affMask = AffinityMode.GetAffinityMask(EncoderIndex, &cpuSet); 1291 } 1292 #else 1293 affMask = 0; 1294 #endif 1295 // affMask <<= 3; // debug line: to test no affinity in coder; 1296 // affMask = 0; 1297 1298 RINOK(method.SetCoderProps_DSReduce_Aff(scp, &reduceSize, (affMask != 0 ? &affMask : NULL))) 1299 } 1300 else 1301 { 1302 if (method.AreThereNonOptionalProps()) 1303 return E_INVALIDARG; 1304 } 1305 1306 CMyComPtr<ICompressWriteCoderProperties> writeCoderProps; 1307 coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps); 1308 if (writeCoderProps) 1309 { 1310 RINOK(writeCoderProps->WriteCoderProperties(propStream)) 1311 } 1312 1313 { 1314 CMyComPtr<ICryptoSetPassword> sp; 1315 coder.QueryInterface(IID_ICryptoSetPassword, &sp); 1316 if (sp) 1317 { 1318 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw))) 1319 1320 // we must call encoding one time to calculate password key for key cache. 1321 // it must be after WriteCoderProperties! 1322 Byte temp[16]; 1323 memset(temp, 0, sizeof(temp)); 1324 1325 if (_encoderFilter) 1326 { 1327 _encoderFilter->Init(); 1328 _encoderFilter->Filter(temp, sizeof(temp)); 1329 } 1330 else 1331 { 1332 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; 1333 CMyComPtr<ISequentialInStream> inStream = inStreamSpec; 1334 inStreamSpec->Init(temp, sizeof(temp)); 1335 1336 CCrcOutStream *crcStreamSpec = new CCrcOutStream; 1337 CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec; 1338 crcStreamSpec->Init(); 1339 1340 RINOK(_encoder->Code(inStream, crcStream, NULL, NULL, NULL)) 1341 } 1342 } 1343 } 1344 } 1345 1346 return S_OK; 1347} 1348 1349 1350static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size, UInt32 *crc) 1351{ 1352 while (size != 0) 1353 { 1354 UInt32 cur = crc ? 1 << 17 : 1 << 24; 1355 if (cur > size) 1356 cur = (UInt32)size; 1357 UInt32 processed = filter->Filter(data, cur); 1358 /* if (processed > size) (in AES filter), we must fill last block with zeros. 1359 but it is not important for benchmark. So we just copy that data without filtering. 1360 if (processed == 0) then filter can't process more */ 1361 if (processed > size || processed == 0) 1362 processed = (UInt32)size; 1363 if (crc) 1364 *crc = CrcUpdate(*crc, data, processed); 1365 data += processed; 1366 size -= processed; 1367 } 1368} 1369 1370 1371HRESULT CEncoderInfo::Encode() 1372{ 1373 // printf("\nCEncoderInfo::Generate\n"); 1374 1375 RINOK(Generate()) 1376 1377 // printf("\n2222\n"); 1378 1379 #ifndef Z7_ST 1380 if (Common) 1381 { 1382 Results[0] = S_OK; 1383 WRes wres = ReadyEvent.Set(); 1384 if (wres == 0) 1385 wres = Common->StartEvent.Lock(); 1386 if (wres != 0) 1387 return HRESULT_FROM_WIN32(wres); 1388 if (Common->ExitMode) 1389 return S_OK; 1390 } 1391 else 1392 #endif 1393 { 1394 CBenchProgressInfo *bpi = progressInfoSpec[0]; 1395 bpi->SetStartTime(); 1396 } 1397 1398 1399 CBenchInfo &bi = progressInfoSpec[0]->BenchInfo; 1400 bi.UnpackSize = 0; 1401 bi.PackSize = 0; 1402 CMyComPtr<ICryptoProperties> cp; 1403 CMyComPtr<IUnknown> coder; 1404 if (_encoderFilter) 1405 coder = _encoderFilter; 1406 else 1407 coder = _encoder; 1408 coder.QueryInterface(IID_ICryptoProperties, &cp); 1409 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; 1410 CMyComPtr<ISequentialInStream> inStream = inStreamSpec; 1411 1412 if (cp) 1413 { 1414 RINOK(Set_Key_and_IV(cp)) 1415 } 1416 1417 compressedSize = 0; 1418 if (_encoderFilter) 1419 compressedSize = kBufferSize; 1420 1421 // CBenchmarkOutStream *outStreamSpec = this->outStreamSpec; 1422 UInt64 prev = 0; 1423 1424 const UInt32 mask = (CheckCrc_Enc ? 0 : 0xFFFF); 1425 const bool useCrc = (mask < NumIterations); 1426 bool crcPrev_defined = false; 1427 UInt32 crcPrev = 0; 1428 1429 bool useRealData_Enc = UseRealData_Enc; 1430 bool data_Was_Changed = false; 1431 if (useRealData_Enc) 1432 { 1433 /* we want memcpy() for each iteration including first iteration. 1434 So results will be equal for different number of iterations */ 1435 data_Was_Changed = true; 1436 } 1437 1438 const UInt64 numIterations = NumIterations; 1439 UInt64 i = numIterations; 1440 // printCallback->NewLine(); 1441 1442 while (i != 0) 1443 { 1444 i--; 1445 if (printCallback && bi.UnpackSize - prev >= (1 << 26)) 1446 { 1447 prev = bi.UnpackSize; 1448 RINOK(printCallback->CheckBreak()) 1449 } 1450 1451 /* 1452 CBenchInfo info; 1453 progressInfoSpec[0]->SetStartTime(); 1454 */ 1455 1456 bool calcCrc = false; 1457 if (useCrc) 1458 calcCrc = (((UInt32)i & mask) == 0); 1459 1460 if (_encoderFilter) 1461 { 1462 Byte *filterData = rgCopy; 1463 if (i == numIterations - 1 || calcCrc || useRealData_Enc) 1464 { 1465 filterData = (Byte *)*outStreamSpec; 1466 if (data_Was_Changed) 1467 memcpy(filterData, uncompressedDataPtr, kBufferSize); 1468 data_Was_Changed = true; 1469 } 1470 _encoderFilter->Init(); 1471 if (calcCrc) 1472 outStreamSpec->InitCrc(); 1473 My_FilterBench(_encoderFilter, filterData, kBufferSize, 1474 calcCrc ? &outStreamSpec->Crc : NULL); 1475 } 1476 else 1477 { 1478 outStreamSpec->Init(true, calcCrc); // write real data for speed consistency at any number of iterations 1479 inStreamSpec->Init(uncompressedDataPtr, kBufferSize); 1480 RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0])) 1481 if (!inStreamSpec->WasFinished()) 1482 return E_FAIL; 1483 if (compressedSize != outStreamSpec->Pos) 1484 { 1485 if (compressedSize != 0) 1486 return E_FAIL; 1487 compressedSize = outStreamSpec->Pos; 1488 } 1489 } 1490 1491 // outStreamSpec->Print(); 1492 1493 if (calcCrc) 1494 { 1495 const UInt32 crc2 = CRC_GET_DIGEST(outStreamSpec->Crc); 1496 if (crcPrev_defined && crcPrev != crc2) 1497 return E_FAIL; 1498 crcPrev = crc2; 1499 crcPrev_defined = true; 1500 } 1501 1502 bi.UnpackSize += kBufferSize; 1503 bi.PackSize += compressedSize; 1504 1505 /* 1506 { 1507 progressInfoSpec[0]->SetFinishTime(info); 1508 info.UnpackSize = 0; 1509 info.PackSize = 0; 1510 info.NumIterations = 1; 1511 1512 info.UnpackSize = kBufferSize; 1513 info.PackSize = compressedSize; 1514 // printf("\n%7d\n", encoder.compressedSize); 1515 1516 RINOK(callback->SetEncodeResult(info, true)) 1517 printCallback->NewLine(); 1518 } 1519 */ 1520 1521 } 1522 1523 _encoder.Release(); 1524 _encoderFilter.Release(); 1525 return S_OK; 1526} 1527 1528 1529HRESULT CEncoderInfo::Decode(UInt32 decoderIndex) 1530{ 1531 CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream; 1532 CMyComPtr<ISequentialInStream> inStream = inStreamSpec; 1533 CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex]; 1534 CMyComPtr<IUnknown> coder; 1535 if (_decoderFilter) 1536 { 1537 if (decoderIndex != 0) 1538 return E_FAIL; 1539 coder = _decoderFilter; 1540 } 1541 else 1542 coder = decoder; 1543 1544 CMyComPtr<ICompressSetDecoderProperties2> setDecProps; 1545 coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps); 1546 if (!setDecProps && propStreamSpec->GetPos() != 0) 1547 return E_FAIL; 1548 1549 CCrcOutStream *crcOutStreamSpec = new CCrcOutStream; 1550 CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec; 1551 1552 CBenchProgressInfo *pi = progressInfoSpec[decoderIndex]; 1553 pi->BenchInfo.UnpackSize = 0; 1554 pi->BenchInfo.PackSize = 0; 1555 1556 #ifndef Z7_ST 1557 { 1558 CMyComPtr<ICompressSetCoderMt> setCoderMt; 1559 coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt); 1560 if (setCoderMt) 1561 { 1562 RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads)) 1563 } 1564 } 1565 #endif 1566 1567 CMyComPtr<ICompressSetCoderProperties> scp; 1568 coder.QueryInterface(IID_ICompressSetCoderProperties, &scp); 1569 if (scp) 1570 { 1571 const UInt64 reduceSize = _uncompressedDataSize; 1572 RINOK(_method.SetCoderProps(scp, &reduceSize)) 1573 } 1574 1575 CMyComPtr<ICryptoProperties> cp; 1576 coder.QueryInterface(IID_ICryptoProperties, &cp); 1577 1578 if (setDecProps) 1579 { 1580 RINOK(setDecProps->SetDecoderProperties2( 1581 /* (const Byte *)*propStreamSpec, */ 1582 propsData, 1583 (UInt32)propStreamSpec->GetPos())) 1584 } 1585 1586 { 1587 CMyComPtr<ICryptoSetPassword> sp; 1588 coder.QueryInterface(IID_ICryptoSetPassword, &sp); 1589 if (sp) 1590 { 1591 RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw))) 1592 } 1593 } 1594 1595 UInt64 prev = 0; 1596 1597 if (cp) 1598 { 1599 RINOK(Set_Key_and_IV(cp)) 1600 } 1601 1602 CMyComPtr<ICompressSetFinishMode> setFinishMode; 1603 1604 if (_decoderFilter) 1605 { 1606 if (compressedSize > rgCopy.Size()) 1607 return E_FAIL; 1608 } 1609 else 1610 { 1611 decoder->QueryInterface(IID_ICompressSetFinishMode, (void **)&setFinishMode); 1612 } 1613 1614 const UInt64 numIterations = NumIterations; 1615 const E_CheckCrcMode checkCrcMode = CheckCrcMode_Dec; 1616 1617 for (UInt64 i = 0; i < numIterations; i++) 1618 { 1619 if (printCallback && pi->BenchInfo.UnpackSize - prev >= (1 << 26)) 1620 { 1621 RINOK(printCallback->CheckBreak()) 1622 prev = pi->BenchInfo.UnpackSize; 1623 } 1624 1625 const UInt64 outSize = kBufferSize; 1626 bool calcCrc = (checkCrcMode != k_CheckCrcMode_Never); 1627 1628 crcOutStreamSpec->Init(); 1629 1630 if (_decoderFilter) 1631 { 1632 Byte *filterData = (Byte *)*outStreamSpec; 1633 if (calcCrc) 1634 { 1635 calcCrc = (i == 0); 1636 if (checkCrcMode == k_CheckCrcMode_Always) 1637 { 1638 calcCrc = true; 1639 memcpy((Byte *)rgCopy, (const Byte *)*outStreamSpec, compressedSize); 1640 filterData = rgCopy; 1641 } 1642 } 1643 _decoderFilter->Init(); 1644 My_FilterBench(_decoderFilter, filterData, compressedSize, 1645 calcCrc ? &crcOutStreamSpec->Crc : NULL); 1646 } 1647 else 1648 { 1649 crcOutStreamSpec->CalcCrc = calcCrc; 1650 inStreamSpec->Init((const Byte *)*outStreamSpec, compressedSize); 1651 1652 if (setFinishMode) 1653 { 1654 RINOK(setFinishMode->SetFinishMode(BoolToUInt(true))) 1655 } 1656 1657 RINOK(decoder->Code(inStream, crcOutStream, NULL, &outSize, progressInfo[decoderIndex])) 1658 1659 if (setFinishMode) 1660 { 1661 if (!inStreamSpec->WasFinished()) 1662 return S_FALSE; 1663 1664 CMyComPtr<ICompressGetInStreamProcessedSize> getInStreamProcessedSize; 1665 decoder.QueryInterface(IID_ICompressGetInStreamProcessedSize, (void **)&getInStreamProcessedSize); 1666 1667 if (getInStreamProcessedSize) 1668 { 1669 UInt64 processed; 1670 RINOK(getInStreamProcessedSize->GetInStreamProcessedSize(&processed)) 1671 if (processed != compressedSize) 1672 return S_FALSE; 1673 } 1674 } 1675 1676 if (crcOutStreamSpec->Pos != outSize) 1677 return S_FALSE; 1678 } 1679 1680 if (calcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc) 1681 return S_FALSE; 1682 1683 pi->BenchInfo.UnpackSize += kBufferSize; 1684 pi->BenchInfo.PackSize += compressedSize; 1685 } 1686 1687 decoder.Release(); 1688 _decoderFilter.Release(); 1689 return S_OK; 1690} 1691 1692 1693static const UInt32 kNumThreadsMax = (1 << 12); 1694 1695struct CBenchEncoders 1696{ 1697 CEncoderInfo *encoders; 1698 CBenchEncoders(UInt32 num): encoders(NULL) { encoders = new CEncoderInfo[num]; } 1699 ~CBenchEncoders() { delete []encoders; } 1700}; 1701 1702 1703static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands) 1704{ 1705 if (numCommands < (1 << 4)) 1706 numCommands = (1 << 4); 1707 UInt64 res = complexInCommands / numCommands; 1708 return (res == 0 ? 1 : res); 1709} 1710 1711 1712 1713#ifndef Z7_ST 1714 1715// ---------- CBenchThreadsFlusher ---------- 1716 1717struct CBenchThreadsFlusher 1718{ 1719 CBenchEncoders *EncodersSpec; 1720 CBenchSyncCommon Common; 1721 unsigned NumThreads; 1722 bool NeedClose; 1723 1724 CBenchThreadsFlusher(): NumThreads(0), NeedClose(false) {} 1725 1726 ~CBenchThreadsFlusher() 1727 { 1728 StartAndWait(true); 1729 } 1730 1731 WRes StartAndWait(bool exitMode = false); 1732}; 1733 1734 1735WRes CBenchThreadsFlusher::StartAndWait(bool exitMode) 1736{ 1737 if (!NeedClose) 1738 return 0; 1739 1740 Common.ExitMode = exitMode; 1741 WRes res = Common.StartEvent.Set(); 1742 1743 for (unsigned i = 0; i < NumThreads; i++) 1744 { 1745 NWindows::CThread &t = EncodersSpec->encoders[i].thread[0]; 1746 if (t.IsCreated()) 1747 { 1748 WRes res2 = t.Wait_Close(); 1749 if (res == 0) 1750 res = res2; 1751 } 1752 } 1753 NeedClose = false; 1754 return res; 1755} 1756 1757#endif // Z7_ST 1758 1759 1760 1761static void SetPseudoRand(Byte *data, size_t size, UInt32 startValue) 1762{ 1763 for (size_t i = 0; i < size; i++) 1764 { 1765 data[i] = (Byte)startValue; 1766 startValue++; 1767 } 1768} 1769 1770 1771 1772static HRESULT MethodBench( 1773 DECL_EXTERNAL_CODECS_LOC_VARS 1774 UInt64 complexInCommands, 1775 #ifndef Z7_ST 1776 bool oldLzmaBenchMode, 1777 UInt32 numThreads, 1778 const CAffinityMode *affinityMode, 1779 #endif 1780 const COneMethodInfo &method2, 1781 size_t uncompressedDataSize, 1782 const Byte *fileData, 1783 unsigned generateDictBits, 1784 1785 IBenchPrintCallback *printCallback, 1786 IBenchCallback *callback, 1787 CBenchProps *benchProps) 1788{ 1789 COneMethodInfo method = method2; 1790 UInt64 methodId; 1791 UInt32 numStreams; 1792 bool isFilter; 1793 const int codecIndex = FindMethod_Index( 1794 EXTERNAL_CODECS_LOC_VARS 1795 method.MethodName, true, 1796 methodId, numStreams, isFilter); 1797 if (codecIndex < 0) 1798 return E_NOTIMPL; 1799 if (numStreams != 1) 1800 return E_INVALIDARG; 1801 1802 UInt32 numEncoderThreads = 1; 1803 UInt32 numSubDecoderThreads = 1; 1804 1805 #ifndef Z7_ST 1806 numEncoderThreads = numThreads; 1807 1808 if (oldLzmaBenchMode) 1809 if (methodId == k_LZMA) 1810 { 1811 if (numThreads == 1 && method.Get_NumThreads() < 0) 1812 method.AddProp_NumThreads(1); 1813 const UInt32 numLzmaThreads = method.Get_Lzma_NumThreads(); 1814 if (numThreads > 1 && numLzmaThreads > 1) 1815 { 1816 numEncoderThreads = (numThreads + 1) / 2; // 20.03 1817 numSubDecoderThreads = 2; 1818 } 1819 } 1820 1821 const bool mtEncMode = (numEncoderThreads > 1) || affinityMode->NeedAffinity(); 1822 1823 #endif 1824 1825 CBenchEncoders encodersSpec(numEncoderThreads); 1826 CEncoderInfo *encoders = encodersSpec.encoders; 1827 1828 UInt32 i; 1829 1830 for (i = 0; i < numEncoderThreads; i++) 1831 { 1832 CEncoderInfo &encoder = encoders[i]; 1833 encoder.callback = (i == 0) ? callback : NULL; 1834 encoder.printCallback = printCallback; 1835 1836 #ifndef Z7_ST 1837 encoder.EncoderIndex = i; 1838 encoder.NumEncoderInternalThreads = numSubDecoderThreads; 1839 encoder.AffinityMode = *affinityMode; 1840 1841 /* 1842 if (numSubDecoderThreads > 1) 1843 if (encoder.AffinityMode.NeedAffinity() 1844 && encoder.AffinityMode.NumBundleThreads == 1) 1845 { 1846 // if old LZMA benchmark uses two threads in coder, we increase (NumBundleThreads) for old LZMA benchmark uses two threads instead of one 1847 if (encoder.AffinityMode.NumBundleThreads * 2 <= encoder.AffinityMode.NumCores) 1848 encoder.AffinityMode.NumBundleThreads *= 2; 1849 } 1850 */ 1851 1852 #endif 1853 1854 { 1855 CCreatedCoder cod; 1856 RINOK(CreateCoder_Index(EXTERNAL_CODECS_LOC_VARS (unsigned)codecIndex, true, encoder._encoderFilter, cod)) 1857 encoder._encoder = cod.Coder; 1858 if (!encoder._encoder && !encoder._encoderFilter) 1859 return E_NOTIMPL; 1860 } 1861 1862 SetPseudoRand(encoder._iv, sizeof(encoder._iv), 17); 1863 SetPseudoRand(encoder._key, sizeof(encoder._key), 51); 1864 SetPseudoRand(encoder._psw, sizeof(encoder._psw), 123); 1865 1866 for (UInt32 j = 0; j < numSubDecoderThreads; j++) 1867 { 1868 CCreatedCoder cod; 1869 CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j]; 1870 RINOK(CreateCoder_Id(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod)) 1871 decoder = cod.Coder; 1872 if (!encoder._decoderFilter && !decoder) 1873 return E_NOTIMPL; 1874 } 1875 1876 encoder.UseRealData_Enc = 1877 encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30; 1878 1879 encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always; 1880 if (benchProps->DecComplexCompr + 1881 benchProps->DecComplexUnc <= 30) 1882 encoder.CheckCrcMode_Dec = 1883 k_CheckCrcMode_FirstPass; // for filters 1884 // k_CheckCrcMode_Never; // for debug 1885 // k_CheckCrcMode_Always; // for debug 1886 if (fileData) 1887 { 1888 encoder.UseRealData_Enc = true; 1889 encoder.CheckCrcMode_Dec = k_CheckCrcMode_Always; 1890 } 1891 } 1892 1893 UInt32 crc = 0; 1894 if (fileData) 1895 crc = CrcCalc(fileData, uncompressedDataSize); 1896 1897 for (i = 0; i < numEncoderThreads; i++) 1898 { 1899 CEncoderInfo &encoder = encoders[i]; 1900 encoder._method = method; 1901 encoder.generateDictBits = generateDictBits; 1902 encoder._uncompressedDataSize = uncompressedDataSize; 1903 encoder.kBufferSize = uncompressedDataSize; 1904 encoder.fileData = fileData; 1905 encoder.crc = crc; 1906 } 1907 1908 CBenchProgressStatus status; 1909 status.Res = S_OK; 1910 status.EncodeMode = true; 1911 1912 #ifndef Z7_ST 1913 CBenchThreadsFlusher encoderFlusher; 1914 if (mtEncMode) 1915 { 1916 WRes wres = encoderFlusher.Common.StartEvent.Create(); 1917 if (wres != 0) 1918 return HRESULT_FROM_WIN32(wres); 1919 encoderFlusher.NumThreads = numEncoderThreads; 1920 encoderFlusher.EncodersSpec = &encodersSpec; 1921 encoderFlusher.NeedClose = true; 1922 } 1923 #endif 1924 1925 for (i = 0; i < numEncoderThreads; i++) 1926 { 1927 CEncoderInfo &encoder = encoders[i]; 1928 encoder.NumIterations = GetNumIterations(benchProps->GetNumCommands_Enc(uncompressedDataSize), complexInCommands); 1929 // encoder.NumIterations = 3; 1930 encoder.Salt = g_CrcTable[i & 0xFF]; 1931 encoder.Salt ^= (g_CrcTable[(i >> 8) & 0xFF] << 3); 1932 // (g_CrcTable[0] == 0), and (encoder.Salt == 0) for first thread 1933 // printf(" %8x", encoder.Salt); 1934 1935 encoder.KeySize = benchProps->KeySize; 1936 1937 for (int j = 0; j < 2; j++) 1938 { 1939 CBenchProgressInfo *spec = new CBenchProgressInfo; 1940 encoder.progressInfoSpec[j] = spec; 1941 encoder.progressInfo[j] = spec; 1942 spec->Status = &status; 1943 } 1944 1945 if (i == 0) 1946 { 1947 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0]; 1948 bpi->Callback = callback; 1949 bpi->BenchInfo.NumIterations = numEncoderThreads; 1950 } 1951 1952 #ifndef Z7_ST 1953 if (mtEncMode) 1954 { 1955 #ifdef USE_ALLOCA 1956 encoder.AllocaSize = (i * 16 * 21) & 0x7FF; 1957 #endif 1958 1959 encoder.Common = &encoderFlusher.Common; 1960 encoder.IsGlobalMtMode = numEncoderThreads > 1; 1961 RINOK(encoder.CreateEncoderThread()) 1962 } 1963 #endif 1964 } 1965 1966 if (printCallback) 1967 { 1968 RINOK(printCallback->CheckBreak()) 1969 } 1970 1971 #ifndef Z7_ST 1972 if (mtEncMode) 1973 { 1974 for (i = 0; i < numEncoderThreads; i++) 1975 { 1976 CEncoderInfo &encoder = encoders[i]; 1977 const WRes wres = encoder.ReadyEvent.Lock(); 1978 if (wres != 0) 1979 return HRESULT_FROM_WIN32(wres); 1980 RINOK(encoder.Results[0]) 1981 } 1982 1983 CBenchProgressInfo *bpi = encoders[0].progressInfoSpec[0]; 1984 bpi->SetStartTime(); 1985 1986 const WRes wres = encoderFlusher.StartAndWait(); 1987 if (status.Res == 0 && wres != 0) 1988 return HRESULT_FROM_WIN32(wres); 1989 } 1990 else 1991 #endif 1992 { 1993 RINOK(encoders[0].Encode()) 1994 } 1995 1996 RINOK(status.Res) 1997 1998 CBenchInfo info; 1999 2000 encoders[0].progressInfoSpec[0]->SetFinishTime(info); 2001 info.UnpackSize = 0; 2002 info.PackSize = 0; 2003 info.NumIterations = encoders[0].NumIterations; 2004 2005 for (i = 0; i < numEncoderThreads; i++) 2006 { 2007 const CEncoderInfo &encoder = encoders[i]; 2008 info.UnpackSize += encoder.kBufferSize; 2009 info.PackSize += encoder.compressedSize; 2010 // printf("\n%7d\n", encoder.compressedSize); 2011 } 2012 2013 RINOK(callback->SetEncodeResult(info, true)) 2014 2015 2016 2017 2018 // ---------- Decode ---------- 2019 2020 status.Res = S_OK; 2021 status.EncodeMode = false; 2022 2023 const UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads; 2024 #ifndef Z7_ST 2025 const bool mtDecoderMode = (numDecoderThreads > 1) || affinityMode->NeedAffinity(); 2026 #endif 2027 2028 for (i = 0; i < numEncoderThreads; i++) 2029 { 2030 CEncoderInfo &encoder = encoders[i]; 2031 2032 /* 2033 #ifndef Z7_ST 2034 // encoder.affinityMode = *affinityMode; 2035 if (encoder.NumEncoderInternalThreads != 1) 2036 encoder.AffinityMode.DivideNum = encoder.NumEncoderInternalThreads; 2037 #endif 2038 */ 2039 2040 2041 if (i == 0) 2042 { 2043 encoder.NumIterations = GetNumIterations( 2044 benchProps->GetNumCommands_Dec( 2045 encoder.compressedSize, 2046 encoder.kBufferSize), 2047 complexInCommands); 2048 CBenchProgressInfo *bpi = encoder.progressInfoSpec[0]; 2049 bpi->Callback = callback; 2050 bpi->BenchInfo.NumIterations = numDecoderThreads; 2051 bpi->SetStartTime(); 2052 } 2053 else 2054 encoder.NumIterations = encoders[0].NumIterations; 2055 2056 #ifndef Z7_ST 2057 { 2058 int numSubThreads = method.Get_NumThreads(); 2059 encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : (unsigned)numSubThreads; 2060 } 2061 if (mtDecoderMode) 2062 { 2063 for (UInt32 j = 0; j < numSubDecoderThreads; j++) 2064 { 2065 const HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0) 2066 #ifdef USE_ALLOCA 2067 , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF 2068 #endif 2069 ); 2070 RINOK(res) 2071 } 2072 } 2073 else 2074 #endif 2075 { 2076 RINOK(encoder.Decode(0)) 2077 } 2078 } 2079 2080 #ifndef Z7_ST 2081 if (mtDecoderMode) 2082 { 2083 WRes wres = 0; 2084 HRESULT res = S_OK; 2085 for (i = 0; i < numEncoderThreads; i++) 2086 for (UInt32 j = 0; j < numSubDecoderThreads; j++) 2087 { 2088 CEncoderInfo &encoder = encoders[i]; 2089 const WRes wres2 = encoder.thread[j]. 2090 // Wait(); // later we can get thread times from thread in UNDER_CE 2091 Wait_Close(); 2092 if (wres == 0 && wres2 != 0) 2093 wres = wres2; 2094 const HRESULT res2 = encoder.Results[j]; 2095 if (res == 0 && res2 != 0) 2096 res = res2; 2097 } 2098 if (wres != 0) 2099 return HRESULT_FROM_WIN32(wres); 2100 RINOK(res) 2101 } 2102 #endif // Z7_ST 2103 2104 RINOK(status.Res) 2105 encoders[0].progressInfoSpec[0]->SetFinishTime(info); 2106 2107 /* 2108 #ifndef Z7_ST 2109 #ifdef UNDER_CE 2110 if (mtDecoderMode) 2111 for (i = 0; i < numEncoderThreads; i++) 2112 for (UInt32 j = 0; j < numSubDecoderThreads; j++) 2113 { 2114 FILETIME creationTime, exitTime, kernelTime, userTime; 2115 if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0) 2116 info.UserTime += GetTime64(userTime) + GetTime64(kernelTime); 2117 } 2118 #endif 2119 #endif 2120 */ 2121 2122 info.UnpackSize = 0; 2123 info.PackSize = 0; 2124 info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations; 2125 2126 for (i = 0; i < numEncoderThreads; i++) 2127 { 2128 const CEncoderInfo &encoder = encoders[i]; 2129 info.UnpackSize += encoder.kBufferSize; 2130 info.PackSize += encoder.compressedSize; 2131 } 2132 2133 // RINOK(callback->SetDecodeResult(info, false)) // why we called before 21.03 ?? 2134 RINOK(callback->SetDecodeResult(info, true)) 2135 2136 return S_OK; 2137} 2138 2139 2140 2141static inline UInt64 GetDictSizeFromLog(unsigned dictSizeLog) 2142{ 2143 /* 2144 if (dictSizeLog < 32) 2145 return (UInt32)1 << dictSizeLog; 2146 else 2147 return (UInt32)(Int32)-1; 2148 */ 2149 return (UInt64)1 << dictSizeLog; 2150} 2151 2152 2153// it's limit of current LZMA implementation that can be changed later 2154#define kLzmaMaxDictSize ((UInt32)15 << 28) 2155 2156static inline UInt64 GetLZMAUsage(bool multiThread, int btMode, UInt64 dict) 2157{ 2158 if (dict == 0) 2159 dict = 1; 2160 if (dict > kLzmaMaxDictSize) 2161 dict = kLzmaMaxDictSize; 2162 UInt32 hs = (UInt32)dict - 1; 2163 hs |= (hs >> 1); 2164 hs |= (hs >> 2); 2165 hs |= (hs >> 4); 2166 hs |= (hs >> 8); 2167 hs >>= 1; 2168 hs |= 0xFFFF; 2169 if (hs > (1 << 24)) 2170 hs >>= 1; 2171 hs++; 2172 hs += (1 << 16); 2173 2174 const UInt32 kBlockSizeMax = (UInt32)0 - (UInt32)(1 << 16); 2175 UInt64 blockSize = (UInt64)dict + (1 << 16) 2176 + (multiThread ? (1 << 20) : 0); 2177 blockSize += (blockSize >> (blockSize < ((UInt32)1 << 30) ? 1 : 2)); 2178 if (blockSize >= kBlockSizeMax) 2179 blockSize = kBlockSizeMax; 2180 2181 UInt64 son = (UInt64)dict; 2182 if (btMode) 2183 son *= 2; 2184 const UInt64 v = (hs + son) * 4 + blockSize + 2185 (1 << 20) + (multiThread ? (6 << 20) : 0); 2186 2187 // printf("\nGetLZMAUsage = %d\n", (UInt32)(v >> 20)); 2188 // printf("\nblockSize = %d\n", (UInt32)(blockSize >> 20)); 2189 return v; 2190} 2191 2192 2193UInt64 GetBenchMemoryUsage(UInt32 numThreads, int level, UInt64 dictionary, bool totalBench) 2194{ 2195 const size_t kBufferSize = (size_t)dictionary + kAdditionalSize; 2196 const UInt64 kCompressedBufferSize = GetBenchCompressedSize(kBufferSize); // / 2; 2197 if (level < 0) 2198 level = 5; 2199 const int algo = (level < 5 ? 0 : 1); 2200 const int btMode = (algo == 0 ? 0 : 1); 2201 2202 UInt32 numBigThreads = numThreads; 2203 bool lzmaMt = (totalBench || (numThreads > 1 && btMode)); 2204 if (btMode) 2205 { 2206 if (!totalBench && lzmaMt) 2207 numBigThreads /= 2; 2208 } 2209 return ((UInt64)kBufferSize + kCompressedBufferSize + 2210 GetLZMAUsage(lzmaMt, btMode, dictionary) + (2 << 20)) * numBigThreads; 2211} 2212 2213static UInt64 GetBenchMemoryUsage_Hash(UInt32 numThreads, UInt64 dictionary) 2214{ 2215 // dictionary += (dictionary >> 9); // for page tables (virtual memory) 2216 return (UInt64)(dictionary + (1 << 15)) * numThreads + (2 << 20); 2217} 2218 2219 2220// ---------- CRC and HASH ---------- 2221 2222struct CCrcInfo_Base 2223{ 2224 CMidAlignedBuffer Buffer; 2225 const Byte *Data; 2226 size_t Size; 2227 bool CreateLocalBuf; 2228 UInt32 CheckSum_Res; 2229 2230 CCrcInfo_Base(): CreateLocalBuf(true), CheckSum_Res(0) {} 2231 2232 HRESULT Generate(const Byte *data, size_t size); 2233 HRESULT CrcProcess(UInt64 numIterations, 2234 const UInt32 *checkSum, IHasher *hf, 2235 IBenchPrintCallback *callback); 2236}; 2237 2238 2239HRESULT CCrcInfo_Base::Generate(const Byte *data, size_t size) 2240{ 2241 Size = size; 2242 Data = data; 2243 if (!data || CreateLocalBuf) 2244 { 2245 ALLOC_WITH_HRESULT(&Buffer, size) 2246 Data = Buffer; 2247 } 2248 if (!data) 2249 RandGen(Buffer, size); 2250 else if (CreateLocalBuf && size != 0) 2251 memcpy(Buffer, data, size); 2252 return S_OK; 2253} 2254 2255 2256HRESULT CCrcInfo_Base::CrcProcess(UInt64 numIterations, 2257 const UInt32 *checkSum, IHasher *hf, 2258 IBenchPrintCallback *callback) 2259{ 2260 MY_ALIGN(16) 2261 Byte hash[64]; 2262 memset(hash, 0, sizeof(hash)); 2263 2264 CheckSum_Res = 0; 2265 2266 const UInt32 hashSize = hf->GetDigestSize(); 2267 if (hashSize > sizeof(hash)) 2268 return S_FALSE; 2269 2270 const Byte *buf = Data; 2271 const size_t size = Size; 2272 UInt32 checkSum_Prev = 0; 2273 2274 UInt64 prev = 0; 2275 UInt64 cur = 0; 2276 2277 for (UInt64 i = 0; i < numIterations; i++) 2278 { 2279 hf->Init(); 2280 size_t pos = 0; 2281 do 2282 { 2283 const size_t rem = size - pos; 2284 const UInt32 kStep = ((UInt32)1 << 31); 2285 const UInt32 curSize = (rem < kStep) ? (UInt32)rem : kStep; 2286 hf->Update(buf + pos, curSize); 2287 pos += curSize; 2288 } 2289 while (pos != size); 2290 2291 hf->Final(hash); 2292 UInt32 sum = 0; 2293 for (UInt32 j = 0; j < hashSize; j += 4) 2294 { 2295 sum = rotlFixed(sum, 11); 2296 sum += GetUi32(hash + j); 2297 } 2298 if (checkSum) 2299 { 2300 if (sum != *checkSum) 2301 return S_FALSE; 2302 } 2303 else 2304 { 2305 checkSum_Prev = sum; 2306 checkSum = &checkSum_Prev; 2307 } 2308 if (callback) 2309 { 2310 cur += size; 2311 if (cur - prev >= ((UInt32)1 << 30)) 2312 { 2313 prev = cur; 2314 RINOK(callback->CheckBreak()) 2315 } 2316 } 2317 } 2318 CheckSum_Res = checkSum_Prev; 2319 return S_OK; 2320} 2321 2322extern 2323UInt32 g_BenchCpuFreqTemp; // we need non-static variavble to disable compiler optimization 2324UInt32 g_BenchCpuFreqTemp = 1; 2325 2326#define YY1 sum += val; sum ^= val; 2327#define YY3 YY1 YY1 YY1 YY1 2328#define YY5 YY3 YY3 YY3 YY3 2329#define YY7 YY5 YY5 YY5 YY5 2330static const UInt32 kNumFreqCommands = 128; 2331 2332EXTERN_C_BEGIN 2333 2334static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val) 2335{ 2336 for (UInt32 i = 0; i < num; i++) 2337 { 2338 YY7 2339 } 2340 return sum; 2341} 2342 2343EXTERN_C_END 2344 2345 2346#ifndef Z7_ST 2347 2348struct CBaseThreadInfo 2349{ 2350 NWindows::CThread Thread; 2351 IBenchPrintCallback *Callback; 2352 HRESULT CallbackRes; 2353 2354 WRes Wait_If_Created() 2355 { 2356 if (!Thread.IsCreated()) 2357 return 0; 2358 return Thread.Wait_Close(); 2359 } 2360}; 2361 2362struct CFreqInfo: public CBaseThreadInfo 2363{ 2364 UInt32 ValRes; 2365 UInt32 Size; 2366 UInt64 NumIterations; 2367}; 2368 2369static THREAD_FUNC_DECL FreqThreadFunction(void *param) 2370{ 2371 CFreqInfo *p = (CFreqInfo *)param; 2372 2373 UInt32 sum = g_BenchCpuFreqTemp; 2374 for (UInt64 k = p->NumIterations; k > 0; k--) 2375 { 2376 if (p->Callback) 2377 { 2378 p->CallbackRes = p->Callback->CheckBreak(); 2379 if (p->CallbackRes != S_OK) 2380 break; 2381 } 2382 sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp); 2383 } 2384 p->ValRes = sum; 2385 return THREAD_FUNC_RET_ZERO; 2386} 2387 2388struct CFreqThreads 2389{ 2390 CFreqInfo *Items; 2391 UInt32 NumThreads; 2392 2393 CFreqThreads(): Items(NULL), NumThreads(0) {} 2394 2395 WRes WaitAll() 2396 { 2397 WRes wres = 0; 2398 for (UInt32 i = 0; i < NumThreads; i++) 2399 { 2400 WRes wres2 = Items[i].Wait_If_Created(); 2401 if (wres == 0 && wres2 != 0) 2402 wres = wres2; 2403 } 2404 NumThreads = 0; 2405 return wres; 2406 } 2407 2408 ~CFreqThreads() 2409 { 2410 WaitAll(); 2411 delete []Items; 2412 } 2413}; 2414 2415 2416static THREAD_FUNC_DECL CrcThreadFunction(void *param); 2417 2418struct CCrcInfo: public CBaseThreadInfo 2419{ 2420 const Byte *Data; 2421 size_t Size; 2422 UInt64 NumIterations; 2423 bool CheckSumDefined; 2424 UInt32 CheckSum; 2425 CMyComPtr<IHasher> Hasher; 2426 HRESULT Res; 2427 UInt32 CheckSum_Res; 2428 2429 #ifndef Z7_ST 2430 NSynchronization::CManualResetEvent ReadyEvent; 2431 UInt32 ThreadIndex; 2432 CBenchSyncCommon *Common; 2433 CAffinityMode AffinityMode; 2434 #endif 2435 2436 // we want to call CCrcInfo_Base::Buffer.Free() in main thread. 2437 // so we uses non-local CCrcInfo_Base. 2438 CCrcInfo_Base crcib; 2439 2440 HRESULT CreateThread() 2441 { 2442 WRes res = 0; 2443 if (!ReadyEvent.IsCreated()) 2444 res = ReadyEvent.Create(); 2445 if (res == 0) 2446 res = AffinityMode.CreateThread_WithAffinity(Thread, CrcThreadFunction, this, 2447 ThreadIndex); 2448 return HRESULT_FROM_WIN32(res); 2449 } 2450 2451 #ifdef USE_ALLOCA 2452 size_t AllocaSize; 2453 #endif 2454 2455 void Process(); 2456 2457 CCrcInfo(): Res(E_FAIL) {} 2458}; 2459 2460static const bool k_Crc_CreateLocalBuf_For_File = true; // for total BW test 2461// static const bool k_Crc_CreateLocalBuf_For_File = false; // for shared memory read test 2462 2463void CCrcInfo::Process() 2464{ 2465 crcib.CreateLocalBuf = k_Crc_CreateLocalBuf_For_File; 2466 // we can use additional Generate() passes to reduce some time effects for new page allocation 2467 // for (unsigned y = 0; y < 10; y++) 2468 Res = crcib.Generate(Data, Size); 2469 2470 // if (Common) 2471 { 2472 WRes wres = ReadyEvent.Set(); 2473 if (wres != 0) 2474 { 2475 if (Res == 0) 2476 Res = HRESULT_FROM_WIN32(wres); 2477 return; 2478 } 2479 if (Res != 0) 2480 return; 2481 2482 wres = Common->StartEvent.Lock(); 2483 2484 if (wres != 0) 2485 { 2486 Res = HRESULT_FROM_WIN32(wres); 2487 return; 2488 } 2489 if (Common->ExitMode) 2490 return; 2491 } 2492 2493 Res = crcib.CrcProcess(NumIterations, 2494 CheckSumDefined ? &CheckSum : NULL, Hasher, 2495 Callback); 2496 CheckSum_Res = crcib.CheckSum_Res; 2497 /* 2498 We don't want to include the time of slow CCrcInfo_Base::Buffer.Free() 2499 to time of benchmark. So we don't free Buffer here 2500 */ 2501 // crcib.Buffer.Free(); 2502} 2503 2504 2505static THREAD_FUNC_DECL CrcThreadFunction(void *param) 2506{ 2507 CCrcInfo *p = (CCrcInfo *)param; 2508 2509 #ifdef USE_ALLOCA 2510 alloca(p->AllocaSize); 2511 #endif 2512 p->Process(); 2513 return THREAD_FUNC_RET_ZERO; 2514} 2515 2516 2517struct CCrcThreads 2518{ 2519 CCrcInfo *Items; 2520 unsigned NumThreads; 2521 CBenchSyncCommon Common; 2522 bool NeedClose; 2523 2524 CCrcThreads(): Items(NULL), NumThreads(0), NeedClose(false) {} 2525 2526 WRes StartAndWait(bool exitMode = false); 2527 2528 ~CCrcThreads() 2529 { 2530 StartAndWait(true); 2531 delete []Items; 2532 } 2533}; 2534 2535 2536WRes CCrcThreads::StartAndWait(bool exitMode) 2537{ 2538 if (!NeedClose) 2539 return 0; 2540 2541 Common.ExitMode = exitMode; 2542 WRes wres = Common.StartEvent.Set(); 2543 2544 for (unsigned i = 0; i < NumThreads; i++) 2545 { 2546 WRes wres2 = Items[i].Wait_If_Created(); 2547 if (wres == 0 && wres2 != 0) 2548 wres = wres2; 2549 } 2550 NumThreads = 0; 2551 NeedClose = false; 2552 return wres; 2553} 2554 2555#endif 2556 2557 2558static UInt32 CrcCalc1(const Byte *buf, size_t size) 2559{ 2560 UInt32 crc = CRC_INIT_VAL; 2561 for (size_t i = 0; i < size; i++) 2562 crc = CRC_UPDATE_BYTE(crc, buf[i]); 2563 return CRC_GET_DIGEST(crc); 2564} 2565 2566/* 2567static UInt32 RandGenCrc(Byte *buf, size_t size, CBaseRandomGenerator &RG) 2568{ 2569 RandGen(buf, size, RG); 2570 return CrcCalc1(buf, size); 2571} 2572*/ 2573 2574static bool CrcInternalTest() 2575{ 2576 CAlignedBuffer buffer; 2577 const size_t kBufferSize0 = (1 << 8); 2578 const size_t kBufferSize1 = (1 << 10); 2579 const unsigned kCheckSize = (1 << 5); 2580 buffer.Alloc(kBufferSize0 + kBufferSize1); 2581 if (!buffer.IsAllocated()) 2582 return false; 2583 Byte *buf = (Byte *)buffer; 2584 size_t i; 2585 for (i = 0; i < kBufferSize0; i++) 2586 buf[i] = (Byte)i; 2587 UInt32 crc1 = CrcCalc1(buf, kBufferSize0); 2588 if (crc1 != 0x29058C73) 2589 return false; 2590 RandGen(buf + kBufferSize0, kBufferSize1); 2591 for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++) 2592 for (unsigned j = 0; j < kCheckSize; j++) 2593 if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j)) 2594 return false; 2595 return true; 2596} 2597 2598struct CBenchMethod 2599{ 2600 unsigned Weight; 2601 unsigned DictBits; 2602 Int32 EncComplex; 2603 Int32 DecComplexCompr; 2604 Int32 DecComplexUnc; 2605 const char *Name; 2606 // unsigned KeySize; 2607}; 2608 2609// #define USE_SW_CMPLX 2610 2611#ifdef USE_SW_CMPLX 2612#define CMPLX(x) ((x) * 1000) 2613#else 2614#define CMPLX(x) (x) 2615#endif 2616 2617static const CBenchMethod g_Bench[] = 2618{ 2619 // { 40, 17, 357, 145, 20, "LZMA:x1" }, 2620 // { 20, 18, 360, 145, 20, "LZMA2:x1:mt2" }, 2621 2622 { 20, 18, 360, 145, 20, "LZMA:x1" }, 2623 { 20, 22, 600, 145, 20, "LZMA:x3" }, 2624 2625 { 80, 24, 1220, 145, 20, "LZMA:x5:mt1" }, 2626 { 80, 24, 1220, 145, 20, "LZMA:x5:mt2" }, 2627 2628 { 10, 16, 124, 40, 14, "Deflate:x1" }, 2629 { 20, 16, 376, 40, 14, "Deflate:x5" }, 2630 { 10, 16, 1082, 40, 14, "Deflate:x7" }, 2631 { 10, 17, 422, 40, 14, "Deflate64:x5" }, 2632 2633 { 10, 15, 590, 69, 69, "BZip2:x1" }, 2634 { 20, 19, 815, 122, 122, "BZip2:x5" }, 2635 { 10, 19, 815, 122, 122, "BZip2:x5:mt2" }, 2636 { 10, 19, 2530, 122, 122, "BZip2:x7" }, 2637 2638 // { 10, 18, 1010, 0, 1150, "PPMDZip:x1" }, 2639 { 10, 18, 1010, 0, 1150, "PPMD:x1" }, 2640 // { 10, 22, 1655, 0, 1830, "PPMDZip:x5" }, 2641 { 10, 22, 1655, 0, 1830, "PPMD:x5" }, 2642 2643 // { 2, 0, -16, 0, -16, "Swap2" }, 2644 { 2, 0, -16, 0, -16, "Swap4" }, 2645 2646 // { 2, 0, 3, 0, 4, "Delta:1" }, 2647 // { 2, 0, 3, 0, 4, "Delta:2" }, 2648 // { 2, 0, 3, 0, 4, "Delta:3" }, 2649 { 2, 0, 3, 0, 4, "Delta:4" }, 2650 // { 2, 0, 3, 0, 4, "Delta:8" }, 2651 // { 2, 0, 3, 0, 4, "Delta:32" }, 2652 2653 { 2, 0, 2, 0, 2, "BCJ" }, 2654 { 2, 0, 1, 0, 1, "ARM64" }, 2655 2656 // { 10, 0, 18, 0, 18, "AES128CBC:1" }, 2657 // { 10, 0, 21, 0, 21, "AES192CBC:1" }, 2658 { 10, 0, 24, 0, 24, "AES256CBC:1" }, 2659 2660 // { 10, 0, 18, 0, 18, "AES128CTR:1" }, 2661 // { 10, 0, 21, 0, 21, "AES192CTR:1" }, 2662 // { 10, 0, 24, 0, 24, "AES256CTR:1" }, 2663 // { 2, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:2" }, 2664 // { 2, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:2" }, 2665 { 2, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:2" }, 2666 2667 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:2" }, 2668 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:2" }, 2669 // { 2, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:2" }, 2670 2671 // { 1, 0, CMPLX(6), 0, CMPLX(1), "AES128CBC:3" }, 2672 // { 1, 0, CMPLX(7), 0, CMPLX(1), "AES192CBC:3" }, 2673 { 1, 0, CMPLX(8), 0, CMPLX(1), "AES256CBC:3" } 2674 2675 // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES128CTR:3" }, 2676 // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES192CTR:3" }, 2677 // { 1, 0, CMPLX(1), 0, CMPLX(1), "AES256CTR:3" }, 2678}; 2679 2680struct CBenchHash 2681{ 2682 unsigned Weight; 2683 UInt32 Complex; 2684 UInt32 CheckSum; 2685 const char *Name; 2686}; 2687 2688// #define ARM_CRC_MUL 100 2689#define ARM_CRC_MUL 1 2690 2691#define k_Hash_Complex_Mult 256 2692 2693static const CBenchHash g_Hash[] = 2694{ 2695 // { 1, 1820, 0x21e207bb, "CRC32:1" }, 2696 // { 10, 558, 0x21e207bb, "CRC32:4" }, 2697 { 20, 339, 0x21e207bb, "CRC32:8" } , 2698 { 2, 128 *ARM_CRC_MUL, 0x21e207bb, "CRC32:32" }, 2699 { 2, 64 *ARM_CRC_MUL, 0x21e207bb, "CRC32:64" }, 2700 { 10, 512, 0x41b901d1, "CRC64" }, 2701 2702 { 10, 5100, 0x7913ba03, "SHA256:1" }, 2703 { 2, CMPLX((32 * 4 + 1) * 4 + 4), 0x7913ba03, "SHA256:2" }, 2704 2705 { 10, 2340, 0xff769021, "SHA1:1" }, 2706 { 2, CMPLX((20 * 6 + 1) * 4 + 4), 0xff769021, "SHA1:2" }, 2707 2708 { 2, 5500, 0x85189d02, "BLAKE2sp" } 2709}; 2710 2711static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size) 2712{ 2713 char s[128]; 2714 unsigned startPos = (unsigned)sizeof(s) - 32; 2715 memset(s, ' ', startPos); 2716 ConvertUInt64ToString(value, s + startPos); 2717 // if (withSpace) 2718 { 2719 startPos--; 2720 size++; 2721 } 2722 unsigned len = (unsigned)strlen(s + startPos); 2723 if (size > len) 2724 { 2725 size -= len; 2726 if (startPos < size) 2727 startPos = 0; 2728 else 2729 startPos -= size; 2730 } 2731 f.Print(s + startPos); 2732} 2733 2734static const unsigned kFieldSize_Name = 12; 2735static const unsigned kFieldSize_SmallName = 4; 2736static const unsigned kFieldSize_Speed = 9; 2737static const unsigned kFieldSize_Usage = 5; 2738static const unsigned kFieldSize_RU = 6; 2739static const unsigned kFieldSize_Rating = 6; 2740static const unsigned kFieldSize_EU = 5; 2741static const unsigned kFieldSize_Effec = 5; 2742static const unsigned kFieldSize_CrcSpeed = 8; 2743 2744 2745static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating; 2746static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec; 2747 2748 2749static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size) 2750{ 2751 PrintNumber(f, (rating + 500000) / 1000000, size); 2752} 2753 2754 2755static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size) 2756{ 2757 UInt64 v = 0; 2758 if (divider != 0) 2759 v = (val * 100 + divider / 2) / divider; 2760 PrintNumber(f, v, size); 2761} 2762 2763static void PrintChars(IBenchPrintCallback &f, char c, unsigned size) 2764{ 2765 char s[256]; 2766 memset(s, (Byte)c, size); 2767 s[size] = 0; 2768 f.Print(s); 2769} 2770 2771static void PrintSpaces(IBenchPrintCallback &f, unsigned size) 2772{ 2773 PrintChars(f, ' ', size); 2774} 2775 2776static void PrintUsage(IBenchPrintCallback &f, UInt64 usage, unsigned size) 2777{ 2778 PrintNumber(f, Benchmark_GetUsage_Percents(usage), size); 2779} 2780 2781static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq) 2782{ 2783 PrintUsage(f, usage, kFieldSize_Usage); 2784 PrintRating(f, rpu, kFieldSize_RU); 2785 PrintRating(f, rating, kFieldSize_Rating); 2786 if (showFreq) 2787 { 2788 if (cpuFreq == 0) 2789 PrintSpaces(f, kFieldSize_EUAndEffec); 2790 else 2791 { 2792 PrintPercents(f, rating, cpuFreq * usage / kBenchmarkUsageMult, kFieldSize_EU); 2793 PrintPercents(f, rating, cpuFreq, kFieldSize_Effec); 2794 } 2795 } 2796} 2797 2798 2799void CTotalBenchRes::Generate_From_BenchInfo(const CBenchInfo &info) 2800{ 2801 Speed = info.GetUnpackSizeSpeed(); 2802 Usage = info.GetUsage(); 2803 RPU = info.GetRatingPerUsage(Rating); 2804} 2805 2806void CTotalBenchRes::Mult_For_Weight(unsigned weight) 2807{ 2808 NumIterations2 *= weight; 2809 RPU *= weight; 2810 Rating *= weight; 2811 Usage *= weight; 2812 Speed *= weight; 2813} 2814 2815void CTotalBenchRes::Update_With_Res(const CTotalBenchRes &r) 2816{ 2817 Rating += r.Rating; 2818 Usage += r.Usage; 2819 RPU += r.RPU; 2820 Speed += r.Speed; 2821 // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1); 2822 NumIterations2 += r.NumIterations2; 2823} 2824 2825static void PrintResults(IBenchPrintCallback *f, 2826 const CBenchInfo &info, 2827 unsigned weight, 2828 UInt64 rating, 2829 bool showFreq, UInt64 cpuFreq, 2830 CTotalBenchRes *res) 2831{ 2832 CTotalBenchRes t; 2833 t.Rating = rating; 2834 t.NumIterations2 = 1; 2835 t.Generate_From_BenchInfo(info); 2836 2837 if (f) 2838 { 2839 if (t.Speed != 0) 2840 PrintNumber(*f, t.Speed / 1024, kFieldSize_Speed); 2841 else 2842 PrintSpaces(*f, 1 + kFieldSize_Speed); 2843 } 2844 if (f) 2845 { 2846 PrintResults(*f, t.Usage, t.RPU, rating, showFreq, cpuFreq); 2847 } 2848 2849 if (res) 2850 { 2851 // res->NumIterations1++; 2852 t.Mult_For_Weight(weight); 2853 res->Update_With_Res(t); 2854 } 2855} 2856 2857static void PrintTotals(IBenchPrintCallback &f, 2858 bool showFreq, UInt64 cpuFreq, bool showSpeed, const CTotalBenchRes &res) 2859{ 2860 const UInt64 numIterations2 = res.NumIterations2 ? res.NumIterations2 : 1; 2861 const UInt64 speed = res.Speed / numIterations2; 2862 if (showSpeed && speed != 0) 2863 PrintNumber(f, speed / 1024, kFieldSize_Speed); 2864 else 2865 PrintSpaces(f, 1 + kFieldSize_Speed); 2866 2867 // PrintSpaces(f, 1 + kFieldSize_Speed); 2868 // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1; 2869 PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq); 2870} 2871 2872 2873static void PrintHex(AString &s, UInt64 v) 2874{ 2875 char temp[32]; 2876 ConvertUInt64ToHex(v, temp); 2877 s += temp; 2878} 2879 2880AString GetProcessThreadsInfo(const NSystem::CProcessAffinity &ti) 2881{ 2882 AString s; 2883 // s.Add_UInt32(ti.numProcessThreads); 2884 unsigned numSysThreads = ti.GetNumSystemThreads(); 2885 if (ti.GetNumProcessThreads() != numSysThreads) 2886 { 2887 // if (ti.numProcessThreads != ti.numSysThreads) 2888 { 2889 s += " / "; 2890 s.Add_UInt32(numSysThreads); 2891 } 2892 s += " : "; 2893 #ifdef _WIN32 2894 PrintHex(s, ti.processAffinityMask); 2895 s += " / "; 2896 PrintHex(s, ti.systemAffinityMask); 2897 #else 2898 unsigned i = (numSysThreads + 3) & ~(unsigned)3; 2899 if (i == 0) 2900 i = 4; 2901 for (; i >= 4; ) 2902 { 2903 i -= 4; 2904 unsigned val = 0; 2905 for (unsigned k = 0; k < 4; k++) 2906 { 2907 const unsigned bit = (ti.IsCpuSet(i + k) ? 1 : 0); 2908 val += (bit << k); 2909 } 2910 PrintHex(s, val); 2911 } 2912 #endif 2913 } 2914 return s; 2915} 2916 2917 2918#ifdef Z7_LARGE_PAGES 2919 2920#ifdef _WIN32 2921extern bool g_LargePagesMode; 2922extern "C" 2923{ 2924 extern SIZE_T g_LargePageSize; 2925} 2926#endif 2927 2928void Add_LargePages_String(AString &s) 2929{ 2930 #ifdef _WIN32 2931 if (g_LargePagesMode || g_LargePageSize != 0) 2932 { 2933 s.Add_OptSpaced("(LP-"); 2934 PrintSize_KMGT_Or_Hex(s, g_LargePageSize); 2935 #ifdef MY_CPU_X86_OR_AMD64 2936 if (CPU_IsSupported_PageGB()) 2937 s += "-1G"; 2938 #endif 2939 if (!g_LargePagesMode) 2940 s += "-NA"; 2941 s += ")"; 2942 } 2943 #else 2944 s += ""; 2945 #endif 2946} 2947 2948#endif 2949 2950 2951 2952static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString, 2953 bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads) 2954{ 2955 f.Print("RAM "); 2956 f.Print(sizeString); 2957 if (size_Defined) 2958 PrintNumber(f, (size >> 20), 6); 2959 else 2960 f.Print(" ?"); 2961 f.Print(" MB"); 2962 2963 #ifdef Z7_LARGE_PAGES 2964 { 2965 AString s; 2966 Add_LargePages_String(s); 2967 f.Print(s); 2968 } 2969 #endif 2970 2971 f.Print(", # "); 2972 f.Print(threadsString); 2973 PrintNumber(f, numThreads, 3); 2974} 2975 2976 2977 2978struct CBenchCallbackToPrint Z7_final: public IBenchCallback 2979{ 2980 bool NeedPrint; 2981 bool Use2Columns; 2982 bool ShowFreq; 2983 unsigned NameFieldSize; 2984 2985 unsigned EncodeWeight; 2986 unsigned DecodeWeight; 2987 2988 UInt64 CpuFreq; 2989 UInt64 DictSize; 2990 2991 IBenchPrintCallback *_file; 2992 CBenchProps BenchProps; 2993 CTotalBenchRes EncodeRes; 2994 CTotalBenchRes DecodeRes; 2995 2996 CBenchInfo BenchInfo_Results[2]; 2997 2998 CBenchCallbackToPrint(): 2999 NeedPrint(true), 3000 Use2Columns(false), 3001 ShowFreq(false), 3002 NameFieldSize(0), 3003 EncodeWeight(1), 3004 DecodeWeight(1), 3005 CpuFreq(0) 3006 {} 3007 3008 void Init() { EncodeRes.Init(); DecodeRes.Init(); } 3009 void Print(const char *s); 3010 void NewLine(); 3011 3012 HRESULT SetFreq(bool showFreq, UInt64 cpuFreq); 3013 HRESULT SetEncodeResult(const CBenchInfo &info, bool final) Z7_override; 3014 HRESULT SetDecodeResult(const CBenchInfo &info, bool final) Z7_override; 3015}; 3016 3017HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq) 3018{ 3019 ShowFreq = showFreq; 3020 CpuFreq = cpuFreq; 3021 return S_OK; 3022} 3023 3024HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final) 3025{ 3026 RINOK(_file->CheckBreak()) 3027 if (final) 3028 BenchInfo_Results[0] = info; 3029 if (final) 3030 if (NeedPrint) 3031 { 3032 const UInt64 rating = BenchProps.GetRating_Enc(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations); 3033 PrintResults(_file, info, 3034 EncodeWeight, rating, 3035 ShowFreq, CpuFreq, &EncodeRes); 3036 if (!Use2Columns) 3037 _file->NewLine(); 3038 } 3039 return S_OK; 3040} 3041 3042static const char * const kSep = " | "; 3043 3044HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final) 3045{ 3046 RINOK(_file->CheckBreak()) 3047 if (final) 3048 BenchInfo_Results[1] = info; 3049 if (final) 3050 if (NeedPrint) 3051 { 3052 const UInt64 rating = BenchProps.GetRating_Dec(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations); 3053 if (Use2Columns) 3054 _file->Print(kSep); 3055 else 3056 PrintSpaces(*_file, NameFieldSize); 3057 CBenchInfo info2 = info; 3058 info2.UnpackSize *= info2.NumIterations; 3059 info2.PackSize *= info2.NumIterations; 3060 info2.NumIterations = 1; 3061 PrintResults(_file, info2, 3062 DecodeWeight, rating, 3063 ShowFreq, CpuFreq, &DecodeRes); 3064 } 3065 return S_OK; 3066} 3067 3068void CBenchCallbackToPrint::Print(const char *s) 3069{ 3070 _file->Print(s); 3071} 3072 3073void CBenchCallbackToPrint::NewLine() 3074{ 3075 _file->NewLine(); 3076} 3077 3078static void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size) 3079{ 3080 f.Print(s); 3081 int numSpaces = (int)size - (int)MyStringLen(s); 3082 if (numSpaces > 0) 3083 PrintSpaces(f, (unsigned)numSpaces); 3084} 3085 3086static void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size) 3087{ 3088 int numSpaces = (int)size - (int)MyStringLen(s); 3089 if (numSpaces > 0) 3090 PrintSpaces(f, (unsigned)numSpaces); 3091 f.Print(s); 3092} 3093 3094 3095static bool DoesWildcardMatchName_NoCase(const AString &mask, const char *name) 3096{ 3097 UString wildc = GetUnicodeString(mask); 3098 UString bname = GetUnicodeString(name); 3099 wildc.MakeLower_Ascii(); 3100 bname.MakeLower_Ascii(); 3101 return DoesWildcardMatchName(wildc, bname); 3102} 3103 3104 3105static HRESULT TotalBench( 3106 DECL_EXTERNAL_CODECS_LOC_VARS 3107 const COneMethodInfo &methodMask, 3108 UInt64 complexInCommands, 3109 #ifndef Z7_ST 3110 UInt32 numThreads, 3111 const CAffinityMode *affinityMode, 3112 #endif 3113 bool forceUnpackSize, 3114 size_t unpackSize, 3115 const Byte *fileData, 3116 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback) 3117{ 3118 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) 3119 { 3120 const CBenchMethod &bench = g_Bench[i]; 3121 if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name)) 3122 continue; 3123 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name); 3124 { 3125 unsigned keySize = 32; 3126 if (IsString1PrefixedByString2(bench.Name, "AES128")) keySize = 16; 3127 else if (IsString1PrefixedByString2(bench.Name, "AES192")) keySize = 24; 3128 callback->BenchProps.KeySize = keySize; 3129 } 3130 callback->BenchProps.DecComplexUnc = bench.DecComplexUnc; 3131 callback->BenchProps.DecComplexCompr = bench.DecComplexCompr; 3132 callback->BenchProps.EncComplex = bench.EncComplex; 3133 3134 COneMethodInfo method; 3135 NCOM::CPropVariant propVariant; 3136 propVariant = bench.Name; 3137 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant)) 3138 3139 size_t unpackSize2 = unpackSize; 3140 if (!forceUnpackSize && bench.DictBits == 0) 3141 unpackSize2 = kFilterUnpackSize; 3142 3143 callback->EncodeWeight = bench.Weight; 3144 callback->DecodeWeight = bench.Weight; 3145 3146 const HRESULT res = MethodBench( 3147 EXTERNAL_CODECS_LOC_VARS 3148 complexInCommands, 3149 #ifndef Z7_ST 3150 false, numThreads, affinityMode, 3151 #endif 3152 method, 3153 unpackSize2, fileData, 3154 bench.DictBits, 3155 printCallback, callback, &callback->BenchProps); 3156 3157 if (res == E_NOTIMPL) 3158 { 3159 // callback->Print(" ---"); 3160 // we need additional empty line as line for decompression results 3161 if (!callback->Use2Columns) 3162 callback->NewLine(); 3163 } 3164 else 3165 { 3166 RINOK(res) 3167 } 3168 3169 callback->NewLine(); 3170 } 3171 return S_OK; 3172} 3173 3174 3175struct CFreqBench 3176{ 3177 // in: 3178 UInt64 complexInCommands; 3179 UInt32 numThreads; 3180 bool showFreq; 3181 UInt64 specifiedFreq; 3182 3183 // out: 3184 UInt64 CpuFreqRes; 3185 UInt64 UsageRes; 3186 UInt32 res; 3187 3188 CFreqBench() 3189 {} 3190 3191 HRESULT FreqBench(IBenchPrintCallback *_file 3192 #ifndef Z7_ST 3193 , const CAffinityMode *affinityMode 3194 #endif 3195 ); 3196}; 3197 3198 3199HRESULT CFreqBench::FreqBench(IBenchPrintCallback *_file 3200 #ifndef Z7_ST 3201 , const CAffinityMode *affinityMode 3202 #endif 3203 ) 3204{ 3205 res = 0; 3206 CpuFreqRes = 0; 3207 UsageRes = 0; 3208 3209 if (numThreads == 0) 3210 numThreads = 1; 3211 3212 #ifdef Z7_ST 3213 numThreads = 1; 3214 #endif 3215 3216 const UInt32 complexity = kNumFreqCommands; 3217 UInt64 numIterations = complexInCommands / complexity; 3218 UInt32 numIterations2 = 1 << 30; 3219 if (numIterations > numIterations2) 3220 numIterations /= numIterations2; 3221 else 3222 { 3223 numIterations2 = (UInt32)numIterations; 3224 numIterations = 1; 3225 } 3226 3227 CBenchInfoCalc progressInfoSpec; 3228 3229 #ifndef Z7_ST 3230 3231 bool mtMode = (numThreads > 1) || affinityMode->NeedAffinity(); 3232 3233 if (mtMode) 3234 { 3235 CFreqThreads threads; 3236 threads.Items = new CFreqInfo[numThreads]; 3237 UInt32 i; 3238 for (i = 0; i < numThreads; i++) 3239 { 3240 CFreqInfo &info = threads.Items[i]; 3241 info.Callback = _file; 3242 info.CallbackRes = S_OK; 3243 info.NumIterations = numIterations; 3244 info.Size = numIterations2; 3245 } 3246 progressInfoSpec.SetStartTime(); 3247 for (i = 0; i < numThreads; i++) 3248 { 3249 // Sleep(10); 3250 CFreqInfo &info = threads.Items[i]; 3251 WRes wres = affinityMode->CreateThread_WithAffinity(info.Thread, FreqThreadFunction, &info, i); 3252 if (info.Thread.IsCreated()) 3253 threads.NumThreads++; 3254 if (wres != 0) 3255 return HRESULT_FROM_WIN32(wres); 3256 } 3257 WRes wres = threads.WaitAll(); 3258 if (wres != 0) 3259 return HRESULT_FROM_WIN32(wres); 3260 for (i = 0; i < numThreads; i++) 3261 { 3262 RINOK(threads.Items[i].CallbackRes) 3263 } 3264 } 3265 else 3266 #endif 3267 { 3268 progressInfoSpec.SetStartTime(); 3269 UInt32 sum = g_BenchCpuFreqTemp; 3270 for (UInt64 k = numIterations; k > 0; k--) 3271 { 3272 sum = CountCpuFreq(sum, numIterations2, g_BenchCpuFreqTemp); 3273 if (_file) 3274 { 3275 RINOK(_file->CheckBreak()) 3276 } 3277 } 3278 res += sum; 3279 } 3280 3281 if (res == 0x12345678) 3282 if (_file) 3283 { 3284 RINOK(_file->CheckBreak()) 3285 } 3286 3287 CBenchInfo info; 3288 progressInfoSpec.SetFinishTime(info); 3289 3290 info.UnpackSize = 0; 3291 info.PackSize = 0; 3292 info.NumIterations = 1; 3293 3294 const UInt64 numCommands = (UInt64)numIterations * numIterations2 * numThreads * complexity; 3295 const UInt64 rating = info.GetSpeed(numCommands); 3296 CpuFreqRes = rating / numThreads; 3297 UsageRes = info.GetUsage(); 3298 3299 if (_file) 3300 { 3301 PrintResults(_file, info, 3302 0, // weight 3303 rating, 3304 showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : CpuFreqRes) : 0, NULL); 3305 RINOK(_file->CheckBreak()) 3306 } 3307 3308 return S_OK; 3309} 3310 3311 3312 3313static HRESULT CrcBench( 3314 DECL_EXTERNAL_CODECS_LOC_VARS 3315 UInt64 complexInCommands, 3316 UInt32 numThreads, 3317 const size_t bufferSize, 3318 const Byte *fileData, 3319 3320 UInt64 &speed, 3321 UInt64 &usage, 3322 3323 UInt32 complexity, unsigned benchWeight, 3324 const UInt32 *checkSum, 3325 const COneMethodInfo &method, 3326 IBenchPrintCallback *_file, 3327 #ifndef Z7_ST 3328 const CAffinityMode *affinityMode, 3329 #endif 3330 bool showRating, 3331 CTotalBenchRes *encodeRes, 3332 bool showFreq, UInt64 cpuFreq) 3333{ 3334 if (numThreads == 0) 3335 numThreads = 1; 3336 3337 #ifdef Z7_ST 3338 numThreads = 1; 3339 #endif 3340 3341 const AString &methodName = method.MethodName; 3342 // methodName.RemoveChar(L'-'); 3343 CMethodId hashID; 3344 if (!FindHashMethod( 3345 EXTERNAL_CODECS_LOC_VARS 3346 methodName, hashID)) 3347 return E_NOTIMPL; 3348 3349 /* 3350 // if will generate random data in each thread, instead of global data 3351 CMidAlignedBuffer buffer; 3352 if (!fileData) 3353 { 3354 ALLOC_WITH_HRESULT(&buffer, bufferSize) 3355 RandGen(buffer, bufferSize); 3356 fileData = buffer; 3357 } 3358 */ 3359 3360 const size_t bsize = (bufferSize == 0 ? 1 : bufferSize); 3361 UInt64 numIterations = complexInCommands * k_Hash_Complex_Mult / complexity / bsize; 3362 if (numIterations == 0) 3363 numIterations = 1; 3364 3365 CBenchInfoCalc progressInfoSpec; 3366 CBenchInfo info; 3367 3368 #ifndef Z7_ST 3369 bool mtEncMode = (numThreads > 1) || affinityMode->NeedAffinity(); 3370 3371 if (mtEncMode) 3372 { 3373 CCrcThreads threads; 3374 threads.Items = new CCrcInfo[numThreads]; 3375 { 3376 WRes wres = threads.Common.StartEvent.Create(); 3377 if (wres != 0) 3378 return HRESULT_FROM_WIN32(wres); 3379 threads.NeedClose = true; 3380 } 3381 3382 UInt32 i; 3383 for (i = 0; i < numThreads; i++) 3384 { 3385 CCrcInfo &ci = threads.Items[i]; 3386 AString name; 3387 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, ci.Hasher)) 3388 if (!ci.Hasher) 3389 return E_NOTIMPL; 3390 CMyComPtr<ICompressSetCoderProperties> scp; 3391 ci.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp); 3392 if (scp) 3393 { 3394 RINOK(method.SetCoderProps(scp)) 3395 } 3396 3397 ci.Callback = _file; 3398 ci.Data = fileData; 3399 ci.NumIterations = numIterations; 3400 ci.Size = bufferSize; 3401 ci.CheckSumDefined = false; 3402 if (checkSum) 3403 { 3404 ci.CheckSum = *checkSum; 3405 ci.CheckSumDefined = true; 3406 } 3407 3408 #ifdef USE_ALLOCA 3409 ci.AllocaSize = (i * 16 * 21) & 0x7FF; 3410 #endif 3411 } 3412 3413 for (i = 0; i < numThreads; i++) 3414 { 3415 CCrcInfo &ci = threads.Items[i]; 3416 ci.ThreadIndex = i; 3417 ci.Common = &threads.Common; 3418 ci.AffinityMode = *affinityMode; 3419 HRESULT hres = ci.CreateThread(); 3420 if (ci.Thread.IsCreated()) 3421 threads.NumThreads++; 3422 if (hres != 0) 3423 return hres; 3424 } 3425 3426 for (i = 0; i < numThreads; i++) 3427 { 3428 CCrcInfo &ci = threads.Items[i]; 3429 WRes wres = ci.ReadyEvent.Lock(); 3430 if (wres != 0) 3431 return HRESULT_FROM_WIN32(wres); 3432 RINOK(ci.Res) 3433 } 3434 3435 progressInfoSpec.SetStartTime(); 3436 3437 WRes wres = threads.StartAndWait(); 3438 if (wres != 0) 3439 return HRESULT_FROM_WIN32(wres); 3440 3441 progressInfoSpec.SetFinishTime(info); 3442 3443 for (i = 0; i < numThreads; i++) 3444 { 3445 RINOK(threads.Items[i].Res) 3446 if (i != 0) 3447 if (threads.Items[i].CheckSum_Res != 3448 threads.Items[i - 1].CheckSum_Res) 3449 return S_FALSE; 3450 } 3451 } 3452 else 3453 #endif 3454 { 3455 CMyComPtr<IHasher> hasher; 3456 AString name; 3457 RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher)) 3458 if (!hasher) 3459 return E_NOTIMPL; 3460 CMyComPtr<ICompressSetCoderProperties> scp; 3461 hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp); 3462 if (scp) 3463 { 3464 RINOK(method.SetCoderProps(scp)) 3465 } 3466 CCrcInfo_Base crcib; 3467 crcib.CreateLocalBuf = false; 3468 RINOK(crcib.Generate(fileData, bufferSize)) 3469 progressInfoSpec.SetStartTime(); 3470 RINOK(crcib.CrcProcess(numIterations, checkSum, hasher, _file)) 3471 progressInfoSpec.SetFinishTime(info); 3472 } 3473 3474 3475 UInt64 unpSize = numIterations * bufferSize; 3476 UInt64 unpSizeThreads = unpSize * numThreads; 3477 info.UnpackSize = unpSizeThreads; 3478 info.PackSize = unpSizeThreads; 3479 info.NumIterations = 1; 3480 3481 if (_file) 3482 { 3483 if (showRating) 3484 { 3485 UInt64 unpSizeThreads2 = unpSizeThreads; 3486 if (unpSizeThreads2 == 0) 3487 unpSizeThreads2 = numIterations * 1 * numThreads; 3488 const UInt64 numCommands = unpSizeThreads2 * complexity / 256; 3489 const UInt64 rating = info.GetSpeed(numCommands); 3490 PrintResults(_file, info, 3491 benchWeight, rating, 3492 showFreq, cpuFreq, encodeRes); 3493 } 3494 RINOK(_file->CheckBreak()) 3495 } 3496 3497 speed = info.GetSpeed(unpSizeThreads); 3498 usage = info.GetUsage(); 3499 3500 return S_OK; 3501} 3502 3503 3504 3505static HRESULT TotalBench_Hash( 3506 DECL_EXTERNAL_CODECS_LOC_VARS 3507 const COneMethodInfo &methodMask, 3508 UInt64 complexInCommands, 3509 UInt32 numThreads, 3510 size_t bufSize, 3511 const Byte *fileData, 3512 IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback, 3513 #ifndef Z7_ST 3514 const CAffinityMode *affinityMode, 3515 #endif 3516 CTotalBenchRes *encodeRes, 3517 bool showFreq, UInt64 cpuFreq) 3518{ 3519 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++) 3520 { 3521 const CBenchHash &bench = g_Hash[i]; 3522 if (!DoesWildcardMatchName_NoCase(methodMask.MethodName, bench.Name)) 3523 continue; 3524 PrintLeft(*callback->_file, bench.Name, kFieldSize_Name); 3525 // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc; 3526 // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr; 3527 // callback->BenchProps.EncComplex = bench.EncComplex; 3528 3529 COneMethodInfo method; 3530 NCOM::CPropVariant propVariant; 3531 propVariant = bench.Name; 3532 RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant)) 3533 3534 UInt64 speed, usage; 3535 3536 const HRESULT res = CrcBench( 3537 EXTERNAL_CODECS_LOC_VARS 3538 complexInCommands, 3539 numThreads, bufSize, fileData, 3540 speed, usage, 3541 bench.Complex, bench.Weight, 3542 (!fileData && bufSize == (1 << kNumHashDictBits)) ? &bench.CheckSum : NULL, 3543 method, 3544 printCallback, 3545 #ifndef Z7_ST 3546 affinityMode, 3547 #endif 3548 true, // showRating 3549 encodeRes, showFreq, cpuFreq); 3550 if (res == E_NOTIMPL) 3551 { 3552 // callback->Print(" ---"); 3553 } 3554 else 3555 { 3556 RINOK(res) 3557 } 3558 callback->NewLine(); 3559 } 3560 return S_OK; 3561} 3562 3563struct CTempValues 3564{ 3565 UInt64 *Values; 3566 CTempValues(): Values(NULL) {} 3567 void Alloc(UInt32 num) { Values = new UInt64[num]; } 3568 ~CTempValues() { delete []Values; } 3569}; 3570 3571static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop) 3572{ 3573 const wchar_t *end; 3574 UInt64 result = ConvertStringToUInt64(s, &end); 3575 if (*end != 0 || s.IsEmpty()) 3576 prop = s; 3577 else if (result <= (UInt32)0xFFFFFFFF) 3578 prop = (UInt32)result; 3579 else 3580 prop = result; 3581} 3582 3583 3584static bool AreSameMethodNames(const char *fullName, const char *shortName) 3585{ 3586 return StringsAreEqualNoCase_Ascii(fullName, shortName); 3587} 3588 3589 3590 3591 3592static void Print_Usage_and_Threads(IBenchPrintCallback &f, UInt64 usage, UInt32 threads) 3593{ 3594 PrintRequirements(f, "usage:", true, usage, "Benchmark threads: ", threads); 3595} 3596 3597 3598static void Print_Delimiter(IBenchPrintCallback &f) 3599{ 3600 f.Print(" |"); 3601} 3602 3603static void Print_Pow(IBenchPrintCallback &f, unsigned pow) 3604{ 3605 char s[16]; 3606 ConvertUInt32ToString(pow, s); 3607 unsigned pos = MyStringLen(s); 3608 s[pos++] = ':'; 3609 s[pos] = 0; 3610 PrintLeft(f, s, kFieldSize_SmallName); // 4 3611} 3612 3613static void Bench_BW_Print_Usage_Speed(IBenchPrintCallback &f, 3614 UInt64 usage, UInt64 speed) 3615{ 3616 PrintUsage(f, usage, kFieldSize_Usage); 3617 PrintNumber(f, speed / 1000000, kFieldSize_CrcSpeed); 3618} 3619 3620 3621HRESULT Bench( 3622 DECL_EXTERNAL_CODECS_LOC_VARS 3623 IBenchPrintCallback *printCallback, 3624 IBenchCallback *benchCallback, 3625 const CObjectVector<CProperty> &props, 3626 UInt32 numIterations, 3627 bool multiDict, 3628 IBenchFreqCallback *freqCallback) 3629{ 3630 if (!CrcInternalTest()) 3631 return E_FAIL; 3632 3633 UInt32 numCPUs = 1; 3634 UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29; 3635 3636 NSystem::CProcessAffinity threadsInfo; 3637 threadsInfo.InitST(); 3638 3639 #ifndef Z7_ST 3640 3641 if (threadsInfo.Get() && threadsInfo.GetNumProcessThreads() != 0) 3642 numCPUs = threadsInfo.GetNumProcessThreads(); 3643 else 3644 numCPUs = NSystem::GetNumberOfProcessors(); 3645 3646 #endif 3647 3648 // numCPUs = 24; 3649 /* 3650 { 3651 DWORD_PTR mask = (1 << 0); 3652 DWORD_PTR old = SetThreadAffinityMask(GetCurrentThread(), mask); 3653 old = old; 3654 DWORD_PTR old2 = SetThreadAffinityMask(GetCurrentThread(), mask); 3655 old2 = old2; 3656 return 0; 3657 } 3658 */ 3659 3660 bool ramSize_Defined = NSystem::GetRamSize(ramSize); 3661 3662 UInt32 numThreadsSpecified = numCPUs; 3663 bool needSetComplexity = false; 3664 UInt32 testTimeMs = kComplexInMs; 3665 UInt32 startDicLog = 22; 3666 bool startDicLog_Defined = false; 3667 UInt64 specifiedFreq = 0; 3668 bool multiThreadTests = false; 3669 UInt64 complexInCommands = kComplexInCommands; 3670 UInt32 numThreads_Start = 1; 3671 3672 #ifndef Z7_ST 3673 CAffinityMode affinityMode; 3674 #endif 3675 3676 3677 COneMethodInfo method; 3678 3679 CMidAlignedBuffer fileDataBuffer; 3680 bool use_fileData = false; 3681 bool isFixedDict = false; 3682 3683 { 3684 unsigned i; 3685 3686 if (printCallback) 3687 { 3688 for (i = 0; i < props.Size(); i++) 3689 { 3690 const CProperty &property = props[i]; 3691 printCallback->Print(" "); 3692 printCallback->Print(GetAnsiString(property.Name)); 3693 if (!property.Value.IsEmpty()) 3694 { 3695 printCallback->Print("="); 3696 printCallback->Print(GetAnsiString(property.Value)); 3697 } 3698 } 3699 if (!props.IsEmpty()) 3700 printCallback->NewLine(); 3701 } 3702 3703 3704 for (i = 0; i < props.Size(); i++) 3705 { 3706 const CProperty &property = props[i]; 3707 UString name (property.Name); 3708 name.MakeLower_Ascii(); 3709 3710 if (name.IsEqualTo("file")) 3711 { 3712 if (property.Value.IsEmpty()) 3713 return E_INVALIDARG; 3714 3715 NFile::NIO::CInFile file; 3716 if (!file.Open(us2fs(property.Value))) 3717 return GetLastError_noZero_HRESULT(); 3718 size_t len; 3719 { 3720 UInt64 len64; 3721 if (!file.GetLength(len64)) 3722 return GetLastError_noZero_HRESULT(); 3723 if (printCallback) 3724 { 3725 printCallback->Print("file size ="); 3726 PrintNumber(*printCallback, len64, 0); 3727 printCallback->NewLine(); 3728 } 3729 len = (size_t)len64; 3730 if (len != len64) 3731 return E_INVALIDARG; 3732 } 3733 3734 // (len == 0) is allowed. Also it's allowed if Alloc(0) returns NULL here 3735 3736 ALLOC_WITH_HRESULT(&fileDataBuffer, len) 3737 use_fileData = true; 3738 3739 { 3740 size_t processed; 3741 if (!file.ReadFull((Byte *)fileDataBuffer, len, processed)) 3742 return GetLastError_noZero_HRESULT(); 3743 if (processed != len) 3744 return E_FAIL; 3745 } 3746 continue; 3747 } 3748 3749 NCOM::CPropVariant propVariant; 3750 if (!property.Value.IsEmpty()) 3751 ParseNumberString(property.Value, propVariant); 3752 3753 if (name.IsEqualTo("time")) 3754 { 3755 RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs)) 3756 needSetComplexity = true; 3757 testTimeMs *= 1000; 3758 continue; 3759 } 3760 3761 if (name.IsEqualTo("timems")) 3762 { 3763 RINOK(ParsePropToUInt32(UString(), propVariant, testTimeMs)) 3764 needSetComplexity = true; 3765 continue; 3766 } 3767 3768 if (name.IsEqualTo("tic")) 3769 { 3770 UInt32 v; 3771 RINOK(ParsePropToUInt32(UString(), propVariant, v)) 3772 if (v >= 64) 3773 return E_INVALIDARG; 3774 complexInCommands = (UInt64)1 << v; 3775 continue; 3776 } 3777 3778 const bool isCurrent_fixedDict = name.IsEqualTo("df"); 3779 if (isCurrent_fixedDict) 3780 isFixedDict = true; 3781 if (isCurrent_fixedDict || name.IsEqualTo("ds")) 3782 { 3783 RINOK(ParsePropToUInt32(UString(), propVariant, startDicLog)) 3784 if (startDicLog > 32) 3785 return E_INVALIDARG; 3786 startDicLog_Defined = true; 3787 continue; 3788 } 3789 3790 if (name.IsEqualTo("mts")) 3791 { 3792 RINOK(ParsePropToUInt32(UString(), propVariant, numThreads_Start)) 3793 continue; 3794 } 3795 3796 if (name.IsEqualTo("af")) 3797 { 3798 UInt32 bundle; 3799 RINOK(ParsePropToUInt32(UString(), propVariant, bundle)) 3800 if (bundle > 0 && bundle < numCPUs) 3801 { 3802 #ifndef Z7_ST 3803 affinityMode.SetLevels(numCPUs, 2); 3804 affinityMode.NumBundleThreads = bundle; 3805 #endif 3806 } 3807 continue; 3808 } 3809 3810 if (name.IsEqualTo("freq")) 3811 { 3812 UInt32 freq32 = 0; 3813 RINOK(ParsePropToUInt32(UString(), propVariant, freq32)) 3814 if (freq32 == 0) 3815 return E_INVALIDARG; 3816 specifiedFreq = (UInt64)freq32 * 1000000; 3817 3818 if (printCallback) 3819 { 3820 printCallback->Print("freq="); 3821 PrintNumber(*printCallback, freq32, 0); 3822 printCallback->NewLine(); 3823 } 3824 3825 continue; 3826 } 3827 3828 if (name.IsPrefixedBy_Ascii_NoCase("mt")) 3829 { 3830 const UString s = name.Ptr(2); 3831 if (s.IsEqualTo("*") 3832 || (s.IsEmpty() 3833 && propVariant.vt == VT_BSTR 3834 && StringsAreEqual_Ascii(propVariant.bstrVal, "*"))) 3835 { 3836 multiThreadTests = true; 3837 continue; 3838 } 3839 #ifndef Z7_ST 3840 RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified)) 3841 #endif 3842 continue; 3843 } 3844 3845 RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant)) 3846 } 3847 } 3848 3849 if (printCallback) 3850 { 3851 AString s; 3852 3853 #ifndef _WIN32 3854 s += "Compiler: "; 3855 GetCompiler(s); 3856 printCallback->Print(s); 3857 printCallback->NewLine(); 3858 s.Empty(); 3859 #endif 3860 3861 GetSystemInfoText(s); 3862 printCallback->Print(s); 3863 printCallback->NewLine(); 3864 } 3865 3866 if (printCallback) 3867 { 3868 printCallback->Print("1T CPU Freq (MHz):"); 3869 } 3870 3871 if (printCallback || freqCallback) 3872 { 3873 UInt64 numMilCommands = 1 << 6; 3874 if (specifiedFreq != 0) 3875 { 3876 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000)) 3877 numMilCommands >>= 1; 3878 } 3879 3880 for (int jj = 0;; jj++) 3881 { 3882 if (printCallback) 3883 RINOK(printCallback->CheckBreak()) 3884 3885 UInt64 start = ::GetTimeCount(); 3886 UInt32 sum = (UInt32)start; 3887 sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp); 3888 if (sum == 0xF1541213) 3889 if (printCallback) 3890 printCallback->Print(""); 3891 const UInt64 realDelta = ::GetTimeCount() - start; 3892 start = realDelta; 3893 if (start == 0) 3894 start = 1; 3895 if (start > (UInt64)1 << 61) 3896 start = 1; 3897 const UInt64 freq = GetFreq(); 3898 // mips is constant in some compilers 3899 const UInt64 hz = MyMultDiv64(numMilCommands * 1000000, freq, start); 3900 const UInt64 mipsVal = numMilCommands * freq / start; 3901 if (printCallback) 3902 { 3903 if (realDelta == 0) 3904 { 3905 printCallback->Print(" -"); 3906 } 3907 else 3908 { 3909 // PrintNumber(*printCallback, start, 0); 3910 PrintNumber(*printCallback, mipsVal, 5); 3911 } 3912 } 3913 if (freqCallback) 3914 { 3915 RINOK(freqCallback->AddCpuFreq(1, hz, kBenchmarkUsageMult)) 3916 } 3917 3918 if (jj >= 1) 3919 { 3920 bool needStop = (numMilCommands >= (1 << 3921 #ifdef _DEBUG 3922 7 3923 #else 3924 11 3925 #endif 3926 )); 3927 if (start >= freq * 16) 3928 { 3929 printCallback->Print(" (Cmplx)"); 3930 if (!freqCallback) // we don't want complexity change for old gui lzma benchmark 3931 { 3932 needSetComplexity = true; 3933 } 3934 needStop = true; 3935 } 3936 if (needSetComplexity) 3937 SetComplexCommandsMs(testTimeMs, false, mipsVal * 1000000, complexInCommands); 3938 if (needStop) 3939 break; 3940 numMilCommands <<= 1; 3941 } 3942 } 3943 if (freqCallback) 3944 { 3945 RINOK(freqCallback->FreqsFinished(1)) 3946 } 3947 } 3948 3949 if (numThreadsSpecified >= 2) 3950 if (printCallback || freqCallback) 3951 { 3952 if (printCallback) 3953 printCallback->NewLine(); 3954 3955 /* it can show incorrect frequency for HT threads. 3956 so we reduce freq test to (numCPUs / 2) */ 3957 3958 UInt32 numThreads = numThreadsSpecified >= numCPUs / 2 ? numCPUs / 2: numThreadsSpecified; 3959 if (numThreads < 1) 3960 numThreads = 1; 3961 3962 if (printCallback) 3963 { 3964 char s[128]; 3965 ConvertUInt64ToString(numThreads, s); 3966 printCallback->Print(s); 3967 printCallback->Print("T CPU Freq (MHz):"); 3968 } 3969 UInt64 numMilCommands = 1 << 3970 #ifdef _DEBUG 3971 7; 3972 #else 3973 10; 3974 #endif 3975 3976 if (specifiedFreq != 0) 3977 { 3978 while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000)) 3979 numMilCommands >>= 1; 3980 } 3981 3982 // for (int jj = 0;; jj++) 3983 for (;;) 3984 { 3985 if (printCallback) 3986 RINOK(printCallback->CheckBreak()) 3987 3988 { 3989 // PrintLeft(f, "CPU", kFieldSize_Name); 3990 3991 // UInt32 resVal; 3992 3993 CFreqBench fb; 3994 fb.complexInCommands = numMilCommands * 1000000; 3995 fb.numThreads = numThreads; 3996 // showFreq; 3997 // fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0); 3998 fb.showFreq = true; 3999 fb.specifiedFreq = 1; 4000 4001 const HRESULT res = fb.FreqBench(NULL /* printCallback */ 4002 #ifndef Z7_ST 4003 , &affinityMode 4004 #endif 4005 ); 4006 RINOK(res) 4007 4008 if (freqCallback) 4009 { 4010 RINOK(freqCallback->AddCpuFreq(numThreads, fb.CpuFreqRes, fb.UsageRes)) 4011 } 4012 4013 if (printCallback) 4014 { 4015 /* 4016 if (realDelta == 0) 4017 { 4018 printCallback->Print(" -"); 4019 } 4020 else 4021 */ 4022 { 4023 // PrintNumber(*printCallback, start, 0); 4024 PrintUsage(*printCallback, fb.UsageRes, 3); 4025 printCallback->Print("%"); 4026 PrintNumber(*printCallback, fb.CpuFreqRes / 1000000, 0); 4027 printCallback->Print(" "); 4028 4029 // PrintNumber(*printCallback, fb.UsageRes, 5); 4030 } 4031 } 4032 } 4033 // if (jj >= 1) 4034 { 4035 const bool needStop = (numMilCommands >= (1 << 4036 #ifdef _DEBUG 4037 7 4038 #else 4039 11 4040 #endif 4041 )); 4042 if (needStop) 4043 break; 4044 numMilCommands <<= 1; 4045 } 4046 } 4047 if (freqCallback) 4048 { 4049 RINOK(freqCallback->FreqsFinished(numThreads)) 4050 } 4051 } 4052 4053 4054 if (printCallback) 4055 { 4056 printCallback->NewLine(); 4057 printCallback->NewLine(); 4058 PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs); 4059 printCallback->Print(GetProcessThreadsInfo(threadsInfo)); 4060 printCallback->NewLine(); 4061 } 4062 4063 if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax) 4064 return E_INVALIDARG; 4065 4066 UInt64 dict = (UInt64)1 << startDicLog; 4067 const bool dictIsDefined = (isFixedDict || method.Get_DicSize(dict)); 4068 4069 const unsigned level = method.GetLevel(); 4070 4071 AString &methodName = method.MethodName; 4072 const AString original_MethodName = methodName; 4073 if (methodName.IsEmpty()) 4074 methodName = "LZMA"; 4075 4076 if (benchCallback) 4077 { 4078 CBenchProps benchProps; 4079 benchProps.SetLzmaCompexity(); 4080 const UInt64 dictSize = method.Get_Lzma_DicSize(); 4081 4082 size_t uncompressedDataSize; 4083 if (use_fileData) 4084 { 4085 uncompressedDataSize = fileDataBuffer.Size(); 4086 } 4087 else 4088 { 4089 uncompressedDataSize = kAdditionalSize + (size_t)dictSize; 4090 if (uncompressedDataSize < dictSize) 4091 return E_INVALIDARG; 4092 } 4093 4094 return MethodBench( 4095 EXTERNAL_CODECS_LOC_VARS 4096 complexInCommands, 4097 #ifndef Z7_ST 4098 true, numThreadsSpecified, 4099 &affinityMode, 4100 #endif 4101 method, 4102 uncompressedDataSize, (const Byte *)fileDataBuffer, 4103 kOldLzmaDictBits, printCallback, benchCallback, &benchProps); 4104 } 4105 4106 if (methodName.IsEqualTo_Ascii_NoCase("CRC")) 4107 methodName = "crc32"; 4108 4109 CMethodId hashID; 4110 const bool isHashMethod = FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID); 4111 int codecIndex = -1; 4112 bool isFilter = false; 4113 if (!isHashMethod) 4114 { 4115 UInt32 numStreams; 4116 codecIndex = FindMethod_Index(EXTERNAL_CODECS_LOC_VARS original_MethodName, 4117 true, // encode 4118 hashID, numStreams, isFilter); 4119 // we can allow non filter for BW tests 4120 if (!isFilter) codecIndex = -1; 4121 } 4122 4123 CBenchCallbackToPrint callback; 4124 callback.Init(); 4125 callback._file = printCallback; 4126 4127 if (isHashMethod || codecIndex != -1) 4128 { 4129 if (!printCallback) 4130 return S_FALSE; 4131 IBenchPrintCallback &f = *printCallback; 4132 4133 UInt64 dict64 = dict; 4134 if (!dictIsDefined) 4135 dict64 = (1 << 27); 4136 if (use_fileData) 4137 { 4138 if (!dictIsDefined) 4139 dict64 = fileDataBuffer.Size(); 4140 else if (dict64 > fileDataBuffer.Size()) 4141 dict64 = fileDataBuffer.Size(); 4142 } 4143 4144 for (;;) 4145 { 4146 const int index = method.FindProp(NCoderPropID::kDictionarySize); 4147 if (index < 0) 4148 break; 4149 method.Props.Delete((unsigned)index); 4150 } 4151 4152 // methodName.RemoveChar(L'-'); 4153 Int32 complexity = 16 * k_Hash_Complex_Mult; // for unknown hash method 4154 const UInt32 *checkSum = NULL; 4155 int benchIndex = -1; 4156 4157 if (isHashMethod) 4158 { 4159 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Hash); i++) 4160 { 4161 const CBenchHash &h = g_Hash[i]; 4162 AString benchMethod (h.Name); 4163 AString benchProps; 4164 const int propPos = benchMethod.Find(':'); 4165 if (propPos >= 0) 4166 { 4167 benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); 4168 benchMethod.DeleteFrom((unsigned)propPos); 4169 } 4170 4171 if (AreSameMethodNames(benchMethod, methodName)) 4172 { 4173 const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps); 4174 /* 4175 bool isMainMethod = method.PropsString.IsEmpty(); 4176 if (isMainMethod) 4177 isMainMethod = !checkSum 4178 || (benchMethod.IsEqualTo_Ascii_NoCase("crc32") && benchProps.IsEqualTo_Ascii_NoCase("8")); 4179 if (sameProps || isMainMethod) 4180 */ 4181 { 4182 complexity = (Int32)h.Complex; 4183 checkSum = &h.CheckSum; 4184 if (sameProps) 4185 break; 4186 /* 4187 if property. is not specified, we use the complexity 4188 for latest fastest method (crc32:64) 4189 */ 4190 } 4191 } 4192 } 4193 // if (!checkSum) return E_NOTIMPL; 4194 } 4195 else 4196 { 4197 for (unsigned i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) 4198 { 4199 const CBenchMethod &bench = g_Bench[i]; 4200 AString benchMethod (bench.Name); 4201 AString benchProps; 4202 const int propPos = benchMethod.Find(':'); 4203 if (propPos >= 0) 4204 { 4205 benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); 4206 benchMethod.DeleteFrom((unsigned)propPos); 4207 } 4208 4209 if (AreSameMethodNames(benchMethod, methodName)) 4210 { 4211 const bool sameProps = method.PropsString.IsEqualTo_Ascii_NoCase(benchProps); 4212 // bool isMainMethod = method.PropsString.IsEmpty(); 4213 // if (sameProps || isMainMethod) 4214 { 4215 benchIndex = (int)i; 4216 if (sameProps) 4217 break; 4218 } 4219 } 4220 } 4221 // if (benchIndex < 0) return E_NOTIMPL; 4222 } 4223 4224 { 4225 /* we count usage only for crc and filter. non-filters are not supported */ 4226 UInt64 usage = (1 << 20); 4227 UInt64 bufSize = dict64; 4228 UInt32 numBlocks = isHashMethod ? 1 : 3; 4229 if (use_fileData) 4230 { 4231 usage += fileDataBuffer.Size(); 4232 if (bufSize > fileDataBuffer.Size()) 4233 bufSize = fileDataBuffer.Size(); 4234 if (isHashMethod) 4235 { 4236 numBlocks = 0; 4237 #ifndef Z7_ST 4238 if (numThreadsSpecified != 1) 4239 numBlocks = (k_Crc_CreateLocalBuf_For_File ? 1 : 0); 4240 #endif 4241 } 4242 } 4243 usage += numThreadsSpecified * bufSize * numBlocks; 4244 Print_Usage_and_Threads(f, usage, numThreadsSpecified); 4245 } 4246 4247 CUIntVector numThreadsVector; 4248 { 4249 unsigned nt = numThreads_Start; 4250 for (;;) 4251 { 4252 if (nt > numThreadsSpecified) 4253 break; 4254 numThreadsVector.Add(nt); 4255 const unsigned next = nt * 2; 4256 const UInt32 ntHalf= numThreadsSpecified / 2; 4257 if (ntHalf > nt && ntHalf < next) 4258 numThreadsVector.Add(ntHalf); 4259 if (numThreadsSpecified > nt && numThreadsSpecified < next) 4260 numThreadsVector.Add(numThreadsSpecified); 4261 nt = next; 4262 } 4263 } 4264 4265 unsigned numColumns = isHashMethod ? 1 : 2; 4266 CTempValues speedTotals; 4267 CTempValues usageTotals; 4268 { 4269 const unsigned numItems = numThreadsVector.Size() * numColumns; 4270 speedTotals.Alloc(numItems); 4271 usageTotals.Alloc(numItems); 4272 for (unsigned i = 0; i < numItems; i++) 4273 { 4274 speedTotals.Values[i] = 0; 4275 usageTotals.Values[i] = 0; 4276 } 4277 } 4278 4279 f.NewLine(); 4280 for (unsigned line = 0; line < 3; line++) 4281 { 4282 f.NewLine(); 4283 f.Print(line == 0 ? "THRD" : line == 1 ? " " : "Size"); 4284 FOR_VECTOR (ti, numThreadsVector) 4285 { 4286 if (ti != 0) 4287 Print_Delimiter(f); 4288 if (line == 0) 4289 { 4290 PrintSpaces(f, (kFieldSize_CrcSpeed + kFieldSize_Usage + 2) * (numColumns - 1)); 4291 PrintNumber(f, numThreadsVector[ti], 1 + kFieldSize_Usage + kFieldSize_CrcSpeed); 4292 } 4293 else 4294 { 4295 for (unsigned c = 0; c < numColumns; c++) 4296 { 4297 PrintRight(f, line == 1 ? "Usage" : "%", kFieldSize_Usage + 1); 4298 PrintRight(f, line == 1 ? "BW" : "MB/s", kFieldSize_CrcSpeed + 1); 4299 } 4300 } 4301 } 4302 } 4303 f.NewLine(); 4304 4305 UInt64 numSteps = 0; 4306 4307 // for (UInt32 iter = 0; iter < numIterations; iter++) 4308 // { 4309 unsigned pow = 10; // kNumHashDictBits 4310 if (startDicLog_Defined) 4311 pow = startDicLog; 4312 4313 // #define NUM_SUB_BITS 2 4314 // pow <<= NUM_SUB_BITS; 4315 for (;; pow++) 4316 { 4317 const UInt64 bufSize = (UInt64)1 << pow; 4318 // UInt64 bufSize = (UInt64)1 << (pow >> NUM_SUB_BITS); 4319 // bufSize += ((UInt64)pow & ((1 << NUM_SUB_BITS) - 1)) << ((pow >> NUM_SUB_BITS) - NUM_SUB_BITS); 4320 4321 size_t dataSize = fileDataBuffer.Size(); 4322 if (dataSize > bufSize || !use_fileData) 4323 dataSize = (size_t)bufSize; 4324 4325 for (UInt32 iter = 0; iter < numIterations; iter++) 4326 { 4327 Print_Pow(f, pow); 4328 // PrintNumber(f, bufSize >> 10, 4); 4329 4330 FOR_VECTOR (ti, numThreadsVector) 4331 { 4332 RINOK(f.CheckBreak()) 4333 const UInt32 numThreads = numThreadsVector[ti]; 4334 if (isHashMethod) 4335 { 4336 UInt64 speed = 0; 4337 UInt64 usage = 0; 4338 const HRESULT res = CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands, 4339 numThreads, 4340 dataSize, (const Byte *)fileDataBuffer, 4341 speed, usage, 4342 (UInt32)complexity, 4343 1, // benchWeight, 4344 (pow == kNumHashDictBits && !use_fileData) ? checkSum : NULL, 4345 method, 4346 &f, 4347 #ifndef Z7_ST 4348 &affinityMode, 4349 #endif 4350 false, // showRating 4351 NULL, false, 0); 4352 RINOK(res) 4353 4354 if (ti != 0) 4355 Print_Delimiter(f); 4356 4357 Bench_BW_Print_Usage_Speed(f, usage, speed); 4358 speedTotals.Values[ti] += speed; 4359 usageTotals.Values[ti] += usage; 4360 } 4361 else 4362 { 4363 { 4364 unsigned keySize = 32; 4365 if (IsString1PrefixedByString2(methodName, "AES128")) keySize = 16; 4366 else if (IsString1PrefixedByString2(methodName, "AES192")) keySize = 24; 4367 callback.BenchProps.KeySize = keySize; 4368 } 4369 4370 COneMethodInfo method2 = method; 4371 unsigned bench_DictBits; 4372 4373 if (benchIndex >= 0) 4374 { 4375 const CBenchMethod &bench = g_Bench[benchIndex]; 4376 callback.BenchProps.EncComplex = bench.EncComplex; 4377 callback.BenchProps.DecComplexUnc = bench.DecComplexUnc; 4378 callback.BenchProps.DecComplexCompr = bench.DecComplexCompr; 4379 bench_DictBits = bench.DictBits; 4380 // bench_DictBits = kOldLzmaDictBits; = 32 default : for debug 4381 } 4382 else 4383 { 4384 bench_DictBits = kOldLzmaDictBits; // = 32 default 4385 if (isFilter) 4386 { 4387 const unsigned k_UnknownCoderComplexity = 4; 4388 callback.BenchProps.EncComplex = k_UnknownCoderComplexity; 4389 callback.BenchProps.DecComplexUnc = k_UnknownCoderComplexity; 4390 } 4391 else 4392 { 4393 callback.BenchProps.EncComplex = 1 << 10; 4394 callback.BenchProps.DecComplexUnc = 1 << 6; 4395 } 4396 callback.BenchProps.DecComplexCompr = 0; 4397 } 4398 callback.NeedPrint = false; 4399 4400 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA")) 4401 { 4402 const NCOM::CPropVariant propVariant = (UInt32)pow; 4403 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant)) 4404 } 4405 4406 const HRESULT res = MethodBench( 4407 EXTERNAL_CODECS_LOC_VARS 4408 complexInCommands, 4409 #ifndef Z7_ST 4410 false, // oldLzmaBenchMode 4411 numThreadsVector[ti], 4412 &affinityMode, 4413 #endif 4414 method2, 4415 dataSize, (const Byte *)fileDataBuffer, 4416 bench_DictBits, 4417 printCallback, 4418 &callback, 4419 &callback.BenchProps); 4420 RINOK(res) 4421 4422 if (ti != 0) 4423 Print_Delimiter(f); 4424 4425 for (unsigned i = 0; i < 2; i++) 4426 { 4427 const CBenchInfo &bi = callback.BenchInfo_Results[i]; 4428 const UInt64 usage = bi.GetUsage(); 4429 const UInt64 speed = bi.GetUnpackSizeSpeed(); 4430 usageTotals.Values[ti * 2 + i] += usage; 4431 speedTotals.Values[ti * 2 + i] += speed; 4432 Bench_BW_Print_Usage_Speed(f, usage, speed); 4433 } 4434 } 4435 } 4436 4437 f.NewLine(); 4438 numSteps++; 4439 } 4440 if (dataSize >= dict64) 4441 break; 4442 } 4443 4444 if (numSteps != 0) 4445 { 4446 f.Print("Avg:"); 4447 for (unsigned ti = 0; ti < numThreadsVector.Size(); ti++) 4448 { 4449 if (ti != 0) 4450 Print_Delimiter(f); 4451 for (unsigned i = 0; i < numColumns; i++) 4452 Bench_BW_Print_Usage_Speed(f, 4453 usageTotals.Values[ti * numColumns + i] / numSteps, 4454 speedTotals.Values[ti * numColumns + i] / numSteps); 4455 } 4456 f.NewLine(); 4457 } 4458 4459 return S_OK; 4460 } 4461 4462 bool use2Columns = false; 4463 4464 bool totalBenchMode = false; 4465 bool onlyHashBench = false; 4466 if (methodName.IsEqualTo_Ascii_NoCase("hash")) 4467 { 4468 onlyHashBench = true; 4469 methodName = "*"; 4470 totalBenchMode = true; 4471 } 4472 else if (methodName.Find('*') >= 0) 4473 totalBenchMode = true; 4474 4475 // ---------- Threads loop ---------- 4476 for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++) 4477 { 4478 4479 UInt32 numThreads = numThreadsSpecified; 4480 4481 if (!multiThreadTests) 4482 { 4483 if (threadsPassIndex != 0) 4484 break; 4485 } 4486 else 4487 { 4488 numThreads = 1; 4489 if (threadsPassIndex != 0) 4490 { 4491 if (numCPUs < 2) 4492 break; 4493 numThreads = numCPUs; 4494 if (threadsPassIndex == 1) 4495 { 4496 if (numCPUs >= 4) 4497 numThreads = numCPUs / 2; 4498 } 4499 else if (numCPUs < 4) 4500 break; 4501 } 4502 } 4503 4504 IBenchPrintCallback &f = *printCallback; 4505 4506 if (threadsPassIndex > 0) 4507 { 4508 f.NewLine(); 4509 f.NewLine(); 4510 } 4511 4512 if (!dictIsDefined && !onlyHashBench) 4513 { 4514 const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25); 4515 unsigned dicSizeLog = dicSizeLog_Main; 4516 4517 #ifdef UNDER_CE 4518 dicSizeLog = (UInt64)1 << 20; 4519 #endif 4520 4521 if (ramSize_Defined) 4522 for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--) 4523 if (GetBenchMemoryUsage(numThreads, (int)level, ((UInt64)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize) 4524 break; 4525 4526 dict = (UInt64)1 << dicSizeLog; 4527 4528 if (totalBenchMode && dicSizeLog != dicSizeLog_Main) 4529 { 4530 f.Print("Dictionary reduced to: "); 4531 PrintNumber(f, dicSizeLog, 1); 4532 f.NewLine(); 4533 } 4534 } 4535 4536 Print_Usage_and_Threads(f, 4537 onlyHashBench ? 4538 GetBenchMemoryUsage_Hash(numThreads, dict) : 4539 GetBenchMemoryUsage(numThreads, (int)level, dict, totalBenchMode), 4540 numThreads); 4541 4542 f.NewLine(); 4543 4544 f.NewLine(); 4545 4546 if (totalBenchMode) 4547 { 4548 callback.NameFieldSize = kFieldSize_Name; 4549 use2Columns = false; 4550 } 4551 else 4552 { 4553 callback.NameFieldSize = kFieldSize_SmallName; 4554 use2Columns = true; 4555 } 4556 callback.Use2Columns = use2Columns; 4557 4558 bool showFreq = false; 4559 UInt64 cpuFreq = 0; 4560 4561 if (totalBenchMode) 4562 { 4563 showFreq = true; 4564 } 4565 4566 unsigned fileldSize = kFieldSize_TotalSize; 4567 if (showFreq) 4568 fileldSize += kFieldSize_EUAndEffec; 4569 4570 if (use2Columns) 4571 { 4572 PrintSpaces(f, callback.NameFieldSize); 4573 PrintRight(f, "Compressing", fileldSize); 4574 f.Print(kSep); 4575 PrintRight(f, "Decompressing", fileldSize); 4576 } 4577 f.NewLine(); 4578 PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize); 4579 4580 int j; 4581 4582 for (j = 0; j < 2; j++) 4583 { 4584 PrintRight(f, "Speed", kFieldSize_Speed + 1); 4585 PrintRight(f, "Usage", kFieldSize_Usage + 1); 4586 PrintRight(f, "R/U", kFieldSize_RU + 1); 4587 PrintRight(f, "Rating", kFieldSize_Rating + 1); 4588 if (showFreq) 4589 { 4590 PrintRight(f, "E/U", kFieldSize_EU + 1); 4591 PrintRight(f, "Effec", kFieldSize_Effec + 1); 4592 } 4593 if (!use2Columns) 4594 break; 4595 if (j == 0) 4596 f.Print(kSep); 4597 } 4598 4599 f.NewLine(); 4600 PrintSpaces(f, callback.NameFieldSize); 4601 4602 for (j = 0; j < 2; j++) 4603 { 4604 PrintRight(f, "KiB/s", kFieldSize_Speed + 1); 4605 PrintRight(f, "%", kFieldSize_Usage + 1); 4606 PrintRight(f, "MIPS", kFieldSize_RU + 1); 4607 PrintRight(f, "MIPS", kFieldSize_Rating + 1); 4608 if (showFreq) 4609 { 4610 PrintRight(f, "%", kFieldSize_EU + 1); 4611 PrintRight(f, "%", kFieldSize_Effec + 1); 4612 } 4613 if (!use2Columns) 4614 break; 4615 if (j == 0) 4616 f.Print(kSep); 4617 } 4618 4619 f.NewLine(); 4620 f.NewLine(); 4621 4622 if (specifiedFreq != 0) 4623 cpuFreq = specifiedFreq; 4624 4625 // bool showTotalSpeed = false; 4626 4627 if (totalBenchMode) 4628 { 4629 for (UInt32 i = 0; i < numIterations; i++) 4630 { 4631 if (i != 0) 4632 printCallback->NewLine(); 4633 4634 const unsigned kNumCpuTests = 3; 4635 for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++) 4636 { 4637 PrintLeft(f, "CPU", kFieldSize_Name); 4638 4639 // UInt32 resVal; 4640 4641 CFreqBench fb; 4642 fb.complexInCommands = complexInCommands; 4643 fb.numThreads = numThreads; 4644 // showFreq; 4645 fb.showFreq = (freqTest == kNumCpuTests - 1 || specifiedFreq != 0); 4646 fb.specifiedFreq = specifiedFreq; 4647 4648 const HRESULT res = fb.FreqBench(printCallback 4649 #ifndef Z7_ST 4650 , &affinityMode 4651 #endif 4652 ); 4653 RINOK(res) 4654 4655 cpuFreq = fb.CpuFreqRes; 4656 callback.NewLine(); 4657 4658 if (specifiedFreq != 0) 4659 cpuFreq = specifiedFreq; 4660 4661 if (testTimeMs >= 1000) 4662 if (freqTest == kNumCpuTests - 1) 4663 { 4664 // SetComplexCommandsMs(testTimeMs, specifiedFreq != 0, cpuFreq, complexInCommands); 4665 } 4666 } 4667 callback.NewLine(); 4668 4669 // return S_OK; // change it 4670 4671 callback.SetFreq(true, cpuFreq); 4672 4673 if (!onlyHashBench) 4674 { 4675 size_t dataSize = (size_t)dict; 4676 if (use_fileData) 4677 { 4678 dataSize = fileDataBuffer.Size(); 4679 if (dictIsDefined && dataSize > dict) 4680 dataSize = (size_t)dict; 4681 } 4682 4683 const HRESULT res = TotalBench(EXTERNAL_CODECS_LOC_VARS 4684 method, complexInCommands, 4685 #ifndef Z7_ST 4686 numThreads, 4687 &affinityMode, 4688 #endif 4689 dictIsDefined || use_fileData, // forceUnpackSize 4690 dataSize, 4691 (const Byte *)fileDataBuffer, 4692 printCallback, &callback); 4693 RINOK(res) 4694 } 4695 4696 { 4697 size_t dataSize = (size_t)1 << kNumHashDictBits; 4698 if (dictIsDefined) 4699 { 4700 dataSize = (size_t)dict; 4701 if (dataSize != dict) 4702 return E_OUTOFMEMORY; 4703 } 4704 if (use_fileData) 4705 { 4706 dataSize = fileDataBuffer.Size(); 4707 if (dictIsDefined && dataSize > dict) 4708 dataSize = (size_t)dict; 4709 } 4710 4711 const HRESULT res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS 4712 method, complexInCommands, 4713 numThreads, 4714 dataSize, (const Byte *)fileDataBuffer, 4715 printCallback, &callback, 4716 #ifndef Z7_ST 4717 &affinityMode, 4718 #endif 4719 &callback.EncodeRes, true, cpuFreq); 4720 RINOK(res) 4721 } 4722 4723 callback.NewLine(); 4724 { 4725 PrintLeft(f, "CPU", kFieldSize_Name); 4726 4727 CFreqBench fb; 4728 fb.complexInCommands = complexInCommands; 4729 fb.numThreads = numThreads; 4730 // showFreq; 4731 fb.showFreq = (specifiedFreq != 0); 4732 fb.specifiedFreq = specifiedFreq; 4733 4734 const HRESULT res = fb.FreqBench(printCallback 4735 #ifndef Z7_ST 4736 , &affinityMode 4737 #endif 4738 ); 4739 RINOK(res) 4740 callback.NewLine(); 4741 } 4742 } 4743 } 4744 else 4745 { 4746 needSetComplexity = true; 4747 if (!methodName.IsEqualTo_Ascii_NoCase("LZMA")) 4748 { 4749 unsigned i; 4750 for (i = 0; i < Z7_ARRAY_SIZE(g_Bench); i++) 4751 { 4752 const CBenchMethod &h = g_Bench[i]; 4753 AString benchMethod (h.Name); 4754 AString benchProps; 4755 const int propPos = benchMethod.Find(':'); 4756 if (propPos >= 0) 4757 { 4758 benchProps = benchMethod.Ptr((unsigned)(propPos + 1)); 4759 benchMethod.DeleteFrom((unsigned)propPos); 4760 } 4761 4762 if (AreSameMethodNames(benchMethod, methodName)) 4763 { 4764 if (benchProps.IsEmpty() 4765 || (benchProps == "x5" && method.PropsString.IsEmpty()) 4766 || method.PropsString.IsPrefixedBy_Ascii_NoCase(benchProps)) 4767 { 4768 callback.BenchProps.EncComplex = h.EncComplex; 4769 callback.BenchProps.DecComplexCompr = h.DecComplexCompr; 4770 callback.BenchProps.DecComplexUnc = h.DecComplexUnc; 4771 needSetComplexity = false; 4772 break; 4773 } 4774 } 4775 } 4776 /* 4777 if (i == Z7_ARRAY_SIZE(g_Bench)) 4778 return E_NOTIMPL; 4779 */ 4780 } 4781 if (needSetComplexity) 4782 callback.BenchProps.SetLzmaCompexity(); 4783 4784 if (startDicLog < kBenchMinDicLogSize) 4785 startDicLog = kBenchMinDicLogSize; 4786 4787 for (unsigned i = 0; i < numIterations; i++) 4788 { 4789 unsigned pow = (dict < GetDictSizeFromLog(startDicLog)) ? kBenchMinDicLogSize : (unsigned)startDicLog; 4790 if (!multiDict) 4791 pow = 32; 4792 while (GetDictSizeFromLog(pow) > dict && pow > 0) 4793 pow--; 4794 for (; GetDictSizeFromLog(pow) <= dict; pow++) 4795 { 4796 Print_Pow(f, pow); 4797 callback.DictSize = (UInt64)1 << pow; 4798 4799 COneMethodInfo method2 = method; 4800 4801 if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA")) 4802 { 4803 // We add dictionary size property. 4804 // method2 can have two different dictionary size properties. 4805 // And last property is main. 4806 NCOM::CPropVariant propVariant = (UInt32)pow; 4807 RINOK(method2.ParseMethodFromPROPVARIANT((UString)"d", propVariant)) 4808 } 4809 4810 size_t uncompressedDataSize; 4811 if (use_fileData) 4812 { 4813 uncompressedDataSize = fileDataBuffer.Size(); 4814 } 4815 else 4816 { 4817 uncompressedDataSize = (size_t)callback.DictSize; 4818 if (uncompressedDataSize != callback.DictSize) 4819 return E_OUTOFMEMORY; 4820 if (uncompressedDataSize >= (1 << 18)) 4821 uncompressedDataSize += kAdditionalSize; 4822 } 4823 4824 const HRESULT res = MethodBench( 4825 EXTERNAL_CODECS_LOC_VARS 4826 complexInCommands, 4827 #ifndef Z7_ST 4828 true, numThreads, 4829 &affinityMode, 4830 #endif 4831 method2, 4832 uncompressedDataSize, (const Byte *)fileDataBuffer, 4833 kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps); 4834 f.NewLine(); 4835 RINOK(res) 4836 if (!multiDict) 4837 break; 4838 } 4839 } 4840 } 4841 4842 PrintChars(f, '-', callback.NameFieldSize + fileldSize); 4843 4844 if (use2Columns) 4845 { 4846 f.Print(kSep); 4847 PrintChars(f, '-', fileldSize); 4848 } 4849 4850 f.NewLine(); 4851 4852 if (use2Columns) 4853 { 4854 PrintLeft(f, "Avr:", callback.NameFieldSize); 4855 PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.EncodeRes); 4856 f.Print(kSep); 4857 PrintTotals(f, showFreq, cpuFreq, !totalBenchMode, callback.DecodeRes); 4858 f.NewLine(); 4859 } 4860 4861 PrintLeft(f, "Tot:", callback.NameFieldSize); 4862 CTotalBenchRes midRes; 4863 midRes = callback.EncodeRes; 4864 midRes.Update_With_Res(callback.DecodeRes); 4865 4866 // midRes.SetSum(callback.EncodeRes, callback.DecodeRes); 4867 PrintTotals(f, showFreq, cpuFreq, false, midRes); 4868 f.NewLine(); 4869 4870 } 4871 return S_OK; 4872} 4873