1// Copyright 2015 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include "benchmark_register.h" 16 17#ifndef BENCHMARK_OS_WINDOWS 18#if !defined(BENCHMARK_OS_FUCHSIA) && !defined(BENCHMARK_OS_QURT) 19#include <sys/resource.h> 20#endif 21#include <sys/time.h> 22#include <unistd.h> 23#endif 24 25#include <algorithm> 26#include <atomic> 27#include <cinttypes> 28#include <condition_variable> 29#include <cstdio> 30#include <cstdlib> 31#include <cstring> 32#include <fstream> 33#include <iostream> 34#include <memory> 35#include <numeric> 36#include <sstream> 37#include <thread> 38 39#include "benchmark/benchmark.h" 40#include "benchmark_api_internal.h" 41#include "check.h" 42#include "commandlineflags.h" 43#include "complexity.h" 44#include "internal_macros.h" 45#include "log.h" 46#include "mutex.h" 47#include "re.h" 48#include "statistics.h" 49#include "string_util.h" 50#include "timers.h" 51 52namespace benchmark { 53 54namespace { 55// For non-dense Range, intermediate values are powers of kRangeMultiplier. 56static constexpr int kRangeMultiplier = 8; 57 58// The size of a benchmark family determines is the number of inputs to repeat 59// the benchmark on. If this is "large" then warn the user during configuration. 60static constexpr size_t kMaxFamilySize = 100; 61 62static constexpr char kDisabledPrefix[] = "DISABLED_"; 63} // end namespace 64 65namespace internal { 66 67//=============================================================================// 68// BenchmarkFamilies 69//=============================================================================// 70 71// Class for managing registered benchmarks. Note that each registered 72// benchmark identifies a family of related benchmarks to run. 73class BenchmarkFamilies { 74 public: 75 static BenchmarkFamilies* GetInstance(); 76 77 // Registers a benchmark family and returns the index assigned to it. 78 size_t AddBenchmark(std::unique_ptr<Benchmark> family); 79 80 // Clear all registered benchmark families. 81 void ClearBenchmarks(); 82 83 // Extract the list of benchmark instances that match the specified 84 // regular expression. 85 bool FindBenchmarks(std::string re, 86 std::vector<BenchmarkInstance>* benchmarks, 87 std::ostream* Err); 88 89 private: 90 BenchmarkFamilies() {} 91 92 std::vector<std::unique_ptr<Benchmark>> families_; 93 Mutex mutex_; 94}; 95 96BenchmarkFamilies* BenchmarkFamilies::GetInstance() { 97 static BenchmarkFamilies instance; 98 return &instance; 99} 100 101size_t BenchmarkFamilies::AddBenchmark(std::unique_ptr<Benchmark> family) { 102 MutexLock l(mutex_); 103 size_t index = families_.size(); 104 families_.push_back(std::move(family)); 105 return index; 106} 107 108void BenchmarkFamilies::ClearBenchmarks() { 109 MutexLock l(mutex_); 110 families_.clear(); 111 families_.shrink_to_fit(); 112} 113 114bool BenchmarkFamilies::FindBenchmarks( 115 std::string spec, std::vector<BenchmarkInstance>* benchmarks, 116 std::ostream* ErrStream) { 117 BM_CHECK(ErrStream); 118 auto& Err = *ErrStream; 119 // Make regular expression out of command-line flag 120 std::string error_msg; 121 Regex re; 122 bool is_negative_filter = false; 123 if (spec[0] == '-') { 124 spec.replace(0, 1, ""); 125 is_negative_filter = true; 126 } 127 if (!re.Init(spec, &error_msg)) { 128 Err << "Could not compile benchmark re: " << error_msg << std::endl; 129 return false; 130 } 131 132 // Special list of thread counts to use when none are specified 133 const std::vector<int> one_thread = {1}; 134 135 int next_family_index = 0; 136 137 MutexLock l(mutex_); 138 for (std::unique_ptr<Benchmark>& family : families_) { 139 int family_index = next_family_index; 140 int per_family_instance_index = 0; 141 142 // Family was deleted or benchmark doesn't match 143 if (!family) continue; 144 145 if (family->ArgsCnt() == -1) { 146 family->Args({}); 147 } 148 const std::vector<int>* thread_counts = 149 (family->thread_counts_.empty() 150 ? &one_thread 151 : &static_cast<const std::vector<int>&>(family->thread_counts_)); 152 const size_t family_size = family->args_.size() * thread_counts->size(); 153 // The benchmark will be run at least 'family_size' different inputs. 154 // If 'family_size' is very large warn the user. 155 if (family_size > kMaxFamilySize) { 156 Err << "The number of inputs is very large. " << family->name_ 157 << " will be repeated at least " << family_size << " times.\n"; 158 } 159 // reserve in the special case the regex ".", since we know the final 160 // family size. this doesn't take into account any disabled benchmarks 161 // so worst case we reserve more than we need. 162 if (spec == ".") benchmarks->reserve(benchmarks->size() + family_size); 163 164 for (auto const& args : family->args_) { 165 for (int num_threads : *thread_counts) { 166 BenchmarkInstance instance(family.get(), family_index, 167 per_family_instance_index, args, 168 num_threads); 169 170 const auto full_name = instance.name().str(); 171 if (full_name.rfind(kDisabledPrefix, 0) != 0 && 172 ((re.Match(full_name) && !is_negative_filter) || 173 (!re.Match(full_name) && is_negative_filter))) { 174 benchmarks->push_back(std::move(instance)); 175 176 ++per_family_instance_index; 177 178 // Only bump the next family index once we've estabilished that 179 // at least one instance of this family will be run. 180 if (next_family_index == family_index) ++next_family_index; 181 } 182 } 183 } 184 } 185 return true; 186} 187 188Benchmark* RegisterBenchmarkInternal(Benchmark* bench) { 189 std::unique_ptr<Benchmark> bench_ptr(bench); 190 BenchmarkFamilies* families = BenchmarkFamilies::GetInstance(); 191 families->AddBenchmark(std::move(bench_ptr)); 192 return bench; 193} 194 195// FIXME: This function is a hack so that benchmark.cc can access 196// `BenchmarkFamilies` 197bool FindBenchmarksInternal(const std::string& re, 198 std::vector<BenchmarkInstance>* benchmarks, 199 std::ostream* Err) { 200 return BenchmarkFamilies::GetInstance()->FindBenchmarks(re, benchmarks, Err); 201} 202 203//=============================================================================// 204// Benchmark 205//=============================================================================// 206 207Benchmark::Benchmark(const std::string& name) 208 : name_(name), 209 aggregation_report_mode_(ARM_Unspecified), 210 time_unit_(GetDefaultTimeUnit()), 211 use_default_time_unit_(true), 212 range_multiplier_(kRangeMultiplier), 213 min_time_(0), 214 min_warmup_time_(0), 215 iterations_(0), 216 repetitions_(0), 217 measure_process_cpu_time_(false), 218 use_real_time_(false), 219 use_manual_time_(false), 220 complexity_(oNone), 221 complexity_lambda_(nullptr), 222 setup_(nullptr), 223 teardown_(nullptr) { 224 ComputeStatistics("mean", StatisticsMean); 225 ComputeStatistics("median", StatisticsMedian); 226 ComputeStatistics("stddev", StatisticsStdDev); 227 ComputeStatistics("cv", StatisticsCV, kPercentage); 228} 229 230Benchmark::~Benchmark() {} 231 232Benchmark* Benchmark::Name(const std::string& name) { 233 SetName(name); 234 return this; 235} 236 237Benchmark* Benchmark::Arg(int64_t x) { 238 BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 239 args_.push_back({x}); 240 return this; 241} 242 243Benchmark* Benchmark::Unit(TimeUnit unit) { 244 time_unit_ = unit; 245 use_default_time_unit_ = false; 246 return this; 247} 248 249Benchmark* Benchmark::Range(int64_t start, int64_t limit) { 250 BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 251 std::vector<int64_t> arglist; 252 AddRange(&arglist, start, limit, range_multiplier_); 253 254 for (int64_t i : arglist) { 255 args_.push_back({i}); 256 } 257 return this; 258} 259 260Benchmark* Benchmark::Ranges( 261 const std::vector<std::pair<int64_t, int64_t>>& ranges) { 262 BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(ranges.size())); 263 std::vector<std::vector<int64_t>> arglists(ranges.size()); 264 for (std::size_t i = 0; i < ranges.size(); i++) { 265 AddRange(&arglists[i], ranges[i].first, ranges[i].second, 266 range_multiplier_); 267 } 268 269 ArgsProduct(arglists); 270 271 return this; 272} 273 274Benchmark* Benchmark::ArgsProduct( 275 const std::vector<std::vector<int64_t>>& arglists) { 276 BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(arglists.size())); 277 278 std::vector<std::size_t> indices(arglists.size()); 279 const std::size_t total = std::accumulate( 280 std::begin(arglists), std::end(arglists), std::size_t{1}, 281 [](const std::size_t res, const std::vector<int64_t>& arglist) { 282 return res * arglist.size(); 283 }); 284 std::vector<int64_t> args; 285 args.reserve(arglists.size()); 286 for (std::size_t i = 0; i < total; i++) { 287 for (std::size_t arg = 0; arg < arglists.size(); arg++) { 288 args.push_back(arglists[arg][indices[arg]]); 289 } 290 args_.push_back(args); 291 args.clear(); 292 293 std::size_t arg = 0; 294 do { 295 indices[arg] = (indices[arg] + 1) % arglists[arg].size(); 296 } while (indices[arg++] == 0 && arg < arglists.size()); 297 } 298 299 return this; 300} 301 302Benchmark* Benchmark::ArgName(const std::string& name) { 303 BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 304 arg_names_ = {name}; 305 return this; 306} 307 308Benchmark* Benchmark::ArgNames(const std::vector<std::string>& names) { 309 BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(names.size())); 310 arg_names_ = names; 311 return this; 312} 313 314Benchmark* Benchmark::DenseRange(int64_t start, int64_t limit, int step) { 315 BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == 1); 316 BM_CHECK_LE(start, limit); 317 for (int64_t arg = start; arg <= limit; arg += step) { 318 args_.push_back({arg}); 319 } 320 return this; 321} 322 323Benchmark* Benchmark::Args(const std::vector<int64_t>& args) { 324 BM_CHECK(ArgsCnt() == -1 || ArgsCnt() == static_cast<int>(args.size())); 325 args_.push_back(args); 326 return this; 327} 328 329Benchmark* Benchmark::Apply(void (*custom_arguments)(Benchmark* benchmark)) { 330 custom_arguments(this); 331 return this; 332} 333 334Benchmark* Benchmark::Setup(void (*setup)(const benchmark::State&)) { 335 BM_CHECK(setup != nullptr); 336 setup_ = setup; 337 return this; 338} 339 340Benchmark* Benchmark::Teardown(void (*teardown)(const benchmark::State&)) { 341 BM_CHECK(teardown != nullptr); 342 teardown_ = teardown; 343 return this; 344} 345 346Benchmark* Benchmark::RangeMultiplier(int multiplier) { 347 BM_CHECK(multiplier > 1); 348 range_multiplier_ = multiplier; 349 return this; 350} 351 352Benchmark* Benchmark::MinTime(double t) { 353 BM_CHECK(t > 0.0); 354 BM_CHECK(iterations_ == 0); 355 min_time_ = t; 356 return this; 357} 358 359Benchmark* Benchmark::MinWarmUpTime(double t) { 360 BM_CHECK(t >= 0.0); 361 BM_CHECK(iterations_ == 0); 362 min_warmup_time_ = t; 363 return this; 364} 365 366Benchmark* Benchmark::Iterations(IterationCount n) { 367 BM_CHECK(n > 0); 368 BM_CHECK(IsZero(min_time_)); 369 BM_CHECK(IsZero(min_warmup_time_)); 370 iterations_ = n; 371 return this; 372} 373 374Benchmark* Benchmark::Repetitions(int n) { 375 BM_CHECK(n > 0); 376 repetitions_ = n; 377 return this; 378} 379 380Benchmark* Benchmark::ReportAggregatesOnly(bool value) { 381 aggregation_report_mode_ = value ? ARM_ReportAggregatesOnly : ARM_Default; 382 return this; 383} 384 385Benchmark* Benchmark::DisplayAggregatesOnly(bool value) { 386 // If we were called, the report mode is no longer 'unspecified', in any case. 387 aggregation_report_mode_ = static_cast<AggregationReportMode>( 388 aggregation_report_mode_ | ARM_Default); 389 390 if (value) { 391 aggregation_report_mode_ = static_cast<AggregationReportMode>( 392 aggregation_report_mode_ | ARM_DisplayReportAggregatesOnly); 393 } else { 394 aggregation_report_mode_ = static_cast<AggregationReportMode>( 395 aggregation_report_mode_ & ~ARM_DisplayReportAggregatesOnly); 396 } 397 398 return this; 399} 400 401Benchmark* Benchmark::MeasureProcessCPUTime() { 402 // Can be used together with UseRealTime() / UseManualTime(). 403 measure_process_cpu_time_ = true; 404 return this; 405} 406 407Benchmark* Benchmark::UseRealTime() { 408 BM_CHECK(!use_manual_time_) 409 << "Cannot set UseRealTime and UseManualTime simultaneously."; 410 use_real_time_ = true; 411 return this; 412} 413 414Benchmark* Benchmark::UseManualTime() { 415 BM_CHECK(!use_real_time_) 416 << "Cannot set UseRealTime and UseManualTime simultaneously."; 417 use_manual_time_ = true; 418 return this; 419} 420 421Benchmark* Benchmark::Complexity(BigO complexity) { 422 complexity_ = complexity; 423 return this; 424} 425 426Benchmark* Benchmark::Complexity(BigOFunc* complexity) { 427 complexity_lambda_ = complexity; 428 complexity_ = oLambda; 429 return this; 430} 431 432Benchmark* Benchmark::ComputeStatistics(const std::string& name, 433 StatisticsFunc* statistics, 434 StatisticUnit unit) { 435 statistics_.emplace_back(name, statistics, unit); 436 return this; 437} 438 439Benchmark* Benchmark::Threads(int t) { 440 BM_CHECK_GT(t, 0); 441 thread_counts_.push_back(t); 442 return this; 443} 444 445Benchmark* Benchmark::ThreadRange(int min_threads, int max_threads) { 446 BM_CHECK_GT(min_threads, 0); 447 BM_CHECK_GE(max_threads, min_threads); 448 449 AddRange(&thread_counts_, min_threads, max_threads, 2); 450 return this; 451} 452 453Benchmark* Benchmark::DenseThreadRange(int min_threads, int max_threads, 454 int stride) { 455 BM_CHECK_GT(min_threads, 0); 456 BM_CHECK_GE(max_threads, min_threads); 457 BM_CHECK_GE(stride, 1); 458 459 for (auto i = min_threads; i < max_threads; i += stride) { 460 thread_counts_.push_back(i); 461 } 462 thread_counts_.push_back(max_threads); 463 return this; 464} 465 466Benchmark* Benchmark::ThreadPerCpu() { 467 thread_counts_.push_back(CPUInfo::Get().num_cpus); 468 return this; 469} 470 471void Benchmark::SetName(const std::string& name) { name_ = name; } 472 473const char* Benchmark::GetName() const { return name_.c_str(); } 474 475int Benchmark::ArgsCnt() const { 476 if (args_.empty()) { 477 if (arg_names_.empty()) return -1; 478 return static_cast<int>(arg_names_.size()); 479 } 480 return static_cast<int>(args_.front().size()); 481} 482 483const char* Benchmark::GetArgName(int arg) const { 484 BM_CHECK_GE(arg, 0); 485 BM_CHECK_LT(arg, static_cast<int>(arg_names_.size())); 486 return arg_names_[arg].c_str(); 487} 488 489TimeUnit Benchmark::GetTimeUnit() const { 490 return use_default_time_unit_ ? GetDefaultTimeUnit() : time_unit_; 491} 492 493//=============================================================================// 494// FunctionBenchmark 495//=============================================================================// 496 497void FunctionBenchmark::Run(State& st) { func_(st); } 498 499} // end namespace internal 500 501void ClearRegisteredBenchmarks() { 502 internal::BenchmarkFamilies::GetInstance()->ClearBenchmarks(); 503} 504 505std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi) { 506 std::vector<int64_t> args; 507 internal::AddRange(&args, lo, hi, multi); 508 return args; 509} 510 511std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step) { 512 BM_CHECK_LE(start, limit); 513 std::vector<int64_t> args; 514 for (int64_t arg = start; arg <= limit; arg += step) { 515 args.push_back(arg); 516 } 517 return args; 518} 519 520} // end namespace benchmark 521