1a8c51b3fSopenharmony_ci// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
2a8c51b3fSopenharmony_ci//
3a8c51b3fSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License");
4a8c51b3fSopenharmony_ci// you may not use this file except in compliance with the License.
5a8c51b3fSopenharmony_ci// You may obtain a copy of the License at
6a8c51b3fSopenharmony_ci//
7a8c51b3fSopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
8a8c51b3fSopenharmony_ci//
9a8c51b3fSopenharmony_ci// Unless required by applicable law or agreed to in writing, software
10a8c51b3fSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS,
11a8c51b3fSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12a8c51b3fSopenharmony_ci// See the License for the specific language governing permissions and
13a8c51b3fSopenharmony_ci// limitations under the License.
14a8c51b3fSopenharmony_ci
15a8c51b3fSopenharmony_ci// Source project : https://github.com/ismaelJimenez/cpp.leastsq
16a8c51b3fSopenharmony_ci// Adapted to be used with google benchmark
17a8c51b3fSopenharmony_ci
18a8c51b3fSopenharmony_ci#include "complexity.h"
19a8c51b3fSopenharmony_ci
20a8c51b3fSopenharmony_ci#include <algorithm>
21a8c51b3fSopenharmony_ci#include <cmath>
22a8c51b3fSopenharmony_ci
23a8c51b3fSopenharmony_ci#include "benchmark/benchmark.h"
24a8c51b3fSopenharmony_ci#include "check.h"
25a8c51b3fSopenharmony_ci
26a8c51b3fSopenharmony_cinamespace benchmark {
27a8c51b3fSopenharmony_ci
28a8c51b3fSopenharmony_ci// Internal function to calculate the different scalability forms
29a8c51b3fSopenharmony_ciBigOFunc* FittingCurve(BigO complexity) {
30a8c51b3fSopenharmony_ci  static const double kLog2E = 1.44269504088896340736;
31a8c51b3fSopenharmony_ci  switch (complexity) {
32a8c51b3fSopenharmony_ci    case oN:
33a8c51b3fSopenharmony_ci      return [](IterationCount n) -> double { return static_cast<double>(n); };
34a8c51b3fSopenharmony_ci    case oNSquared:
35a8c51b3fSopenharmony_ci      return [](IterationCount n) -> double { return std::pow(n, 2); };
36a8c51b3fSopenharmony_ci    case oNCubed:
37a8c51b3fSopenharmony_ci      return [](IterationCount n) -> double { return std::pow(n, 3); };
38a8c51b3fSopenharmony_ci    case oLogN:
39a8c51b3fSopenharmony_ci      /* Note: can't use log2 because Android's GNU STL lacks it */
40a8c51b3fSopenharmony_ci      return
41a8c51b3fSopenharmony_ci          [](IterationCount n) { return kLog2E * log(static_cast<double>(n)); };
42a8c51b3fSopenharmony_ci    case oNLogN:
43a8c51b3fSopenharmony_ci      /* Note: can't use log2 because Android's GNU STL lacks it */
44a8c51b3fSopenharmony_ci      return [](IterationCount n) {
45a8c51b3fSopenharmony_ci        return kLog2E * n * log(static_cast<double>(n));
46a8c51b3fSopenharmony_ci      };
47a8c51b3fSopenharmony_ci    case o1:
48a8c51b3fSopenharmony_ci    default:
49a8c51b3fSopenharmony_ci      return [](IterationCount) { return 1.0; };
50a8c51b3fSopenharmony_ci  }
51a8c51b3fSopenharmony_ci}
52a8c51b3fSopenharmony_ci
53a8c51b3fSopenharmony_ci// Function to return an string for the calculated complexity
54a8c51b3fSopenharmony_cistd::string GetBigOString(BigO complexity) {
55a8c51b3fSopenharmony_ci  switch (complexity) {
56a8c51b3fSopenharmony_ci    case oN:
57a8c51b3fSopenharmony_ci      return "N";
58a8c51b3fSopenharmony_ci    case oNSquared:
59a8c51b3fSopenharmony_ci      return "N^2";
60a8c51b3fSopenharmony_ci    case oNCubed:
61a8c51b3fSopenharmony_ci      return "N^3";
62a8c51b3fSopenharmony_ci    case oLogN:
63a8c51b3fSopenharmony_ci      return "lgN";
64a8c51b3fSopenharmony_ci    case oNLogN:
65a8c51b3fSopenharmony_ci      return "NlgN";
66a8c51b3fSopenharmony_ci    case o1:
67a8c51b3fSopenharmony_ci      return "(1)";
68a8c51b3fSopenharmony_ci    default:
69a8c51b3fSopenharmony_ci      return "f(N)";
70a8c51b3fSopenharmony_ci  }
71a8c51b3fSopenharmony_ci}
72a8c51b3fSopenharmony_ci
73a8c51b3fSopenharmony_ci// Find the coefficient for the high-order term in the running time, by
74a8c51b3fSopenharmony_ci// minimizing the sum of squares of relative error, for the fitting curve
75a8c51b3fSopenharmony_ci// given by the lambda expression.
76a8c51b3fSopenharmony_ci//   - n             : Vector containing the size of the benchmark tests.
77a8c51b3fSopenharmony_ci//   - time          : Vector containing the times for the benchmark tests.
78a8c51b3fSopenharmony_ci//   - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };).
79a8c51b3fSopenharmony_ci
80a8c51b3fSopenharmony_ci// For a deeper explanation on the algorithm logic, please refer to
81a8c51b3fSopenharmony_ci// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics
82a8c51b3fSopenharmony_ci
83a8c51b3fSopenharmony_ciLeastSq MinimalLeastSq(const std::vector<int64_t>& n,
84a8c51b3fSopenharmony_ci                       const std::vector<double>& time,
85a8c51b3fSopenharmony_ci                       BigOFunc* fitting_curve) {
86a8c51b3fSopenharmony_ci  double sigma_gn_squared = 0.0;
87a8c51b3fSopenharmony_ci  double sigma_time = 0.0;
88a8c51b3fSopenharmony_ci  double sigma_time_gn = 0.0;
89a8c51b3fSopenharmony_ci
90a8c51b3fSopenharmony_ci  // Calculate least square fitting parameter
91a8c51b3fSopenharmony_ci  for (size_t i = 0; i < n.size(); ++i) {
92a8c51b3fSopenharmony_ci    double gn_i = fitting_curve(n[i]);
93a8c51b3fSopenharmony_ci    sigma_gn_squared += gn_i * gn_i;
94a8c51b3fSopenharmony_ci    sigma_time += time[i];
95a8c51b3fSopenharmony_ci    sigma_time_gn += time[i] * gn_i;
96a8c51b3fSopenharmony_ci  }
97a8c51b3fSopenharmony_ci
98a8c51b3fSopenharmony_ci  LeastSq result;
99a8c51b3fSopenharmony_ci  result.complexity = oLambda;
100a8c51b3fSopenharmony_ci
101a8c51b3fSopenharmony_ci  // Calculate complexity.
102a8c51b3fSopenharmony_ci  result.coef = sigma_time_gn / sigma_gn_squared;
103a8c51b3fSopenharmony_ci
104a8c51b3fSopenharmony_ci  // Calculate RMS
105a8c51b3fSopenharmony_ci  double rms = 0.0;
106a8c51b3fSopenharmony_ci  for (size_t i = 0; i < n.size(); ++i) {
107a8c51b3fSopenharmony_ci    double fit = result.coef * fitting_curve(n[i]);
108a8c51b3fSopenharmony_ci    rms += pow((time[i] - fit), 2);
109a8c51b3fSopenharmony_ci  }
110a8c51b3fSopenharmony_ci
111a8c51b3fSopenharmony_ci  // Normalized RMS by the mean of the observed values
112a8c51b3fSopenharmony_ci  double mean = sigma_time / n.size();
113a8c51b3fSopenharmony_ci  result.rms = sqrt(rms / n.size()) / mean;
114a8c51b3fSopenharmony_ci
115a8c51b3fSopenharmony_ci  return result;
116a8c51b3fSopenharmony_ci}
117a8c51b3fSopenharmony_ci
118a8c51b3fSopenharmony_ci// Find the coefficient for the high-order term in the running time, by
119a8c51b3fSopenharmony_ci// minimizing the sum of squares of relative error.
120a8c51b3fSopenharmony_ci//   - n          : Vector containing the size of the benchmark tests.
121a8c51b3fSopenharmony_ci//   - time       : Vector containing the times for the benchmark tests.
122a8c51b3fSopenharmony_ci//   - complexity : If different than oAuto, the fitting curve will stick to
123a8c51b3fSopenharmony_ci//                  this one. If it is oAuto, it will be calculated the best
124a8c51b3fSopenharmony_ci//                  fitting curve.
125a8c51b3fSopenharmony_ciLeastSq MinimalLeastSq(const std::vector<int64_t>& n,
126a8c51b3fSopenharmony_ci                       const std::vector<double>& time, const BigO complexity) {
127a8c51b3fSopenharmony_ci  BM_CHECK_EQ(n.size(), time.size());
128a8c51b3fSopenharmony_ci  BM_CHECK_GE(n.size(), 2);  // Do not compute fitting curve is less than two
129a8c51b3fSopenharmony_ci                             // benchmark runs are given
130a8c51b3fSopenharmony_ci  BM_CHECK_NE(complexity, oNone);
131a8c51b3fSopenharmony_ci
132a8c51b3fSopenharmony_ci  LeastSq best_fit;
133a8c51b3fSopenharmony_ci
134a8c51b3fSopenharmony_ci  if (complexity == oAuto) {
135a8c51b3fSopenharmony_ci    std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed};
136a8c51b3fSopenharmony_ci
137a8c51b3fSopenharmony_ci    // Take o1 as default best fitting curve
138a8c51b3fSopenharmony_ci    best_fit = MinimalLeastSq(n, time, FittingCurve(o1));
139a8c51b3fSopenharmony_ci    best_fit.complexity = o1;
140a8c51b3fSopenharmony_ci
141a8c51b3fSopenharmony_ci    // Compute all possible fitting curves and stick to the best one
142a8c51b3fSopenharmony_ci    for (const auto& fit : fit_curves) {
143a8c51b3fSopenharmony_ci      LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit));
144a8c51b3fSopenharmony_ci      if (current_fit.rms < best_fit.rms) {
145a8c51b3fSopenharmony_ci        best_fit = current_fit;
146a8c51b3fSopenharmony_ci        best_fit.complexity = fit;
147a8c51b3fSopenharmony_ci      }
148a8c51b3fSopenharmony_ci    }
149a8c51b3fSopenharmony_ci  } else {
150a8c51b3fSopenharmony_ci    best_fit = MinimalLeastSq(n, time, FittingCurve(complexity));
151a8c51b3fSopenharmony_ci    best_fit.complexity = complexity;
152a8c51b3fSopenharmony_ci  }
153a8c51b3fSopenharmony_ci
154a8c51b3fSopenharmony_ci  return best_fit;
155a8c51b3fSopenharmony_ci}
156a8c51b3fSopenharmony_ci
157a8c51b3fSopenharmony_cistd::vector<BenchmarkReporter::Run> ComputeBigO(
158a8c51b3fSopenharmony_ci    const std::vector<BenchmarkReporter::Run>& reports) {
159a8c51b3fSopenharmony_ci  typedef BenchmarkReporter::Run Run;
160a8c51b3fSopenharmony_ci  std::vector<Run> results;
161a8c51b3fSopenharmony_ci
162a8c51b3fSopenharmony_ci  if (reports.size() < 2) return results;
163a8c51b3fSopenharmony_ci
164a8c51b3fSopenharmony_ci  // Accumulators.
165a8c51b3fSopenharmony_ci  std::vector<int64_t> n;
166a8c51b3fSopenharmony_ci  std::vector<double> real_time;
167a8c51b3fSopenharmony_ci  std::vector<double> cpu_time;
168a8c51b3fSopenharmony_ci
169a8c51b3fSopenharmony_ci  // Populate the accumulators.
170a8c51b3fSopenharmony_ci  for (const Run& run : reports) {
171a8c51b3fSopenharmony_ci    BM_CHECK_GT(run.complexity_n, 0)
172a8c51b3fSopenharmony_ci        << "Did you forget to call SetComplexityN?";
173a8c51b3fSopenharmony_ci    n.push_back(run.complexity_n);
174a8c51b3fSopenharmony_ci    real_time.push_back(run.real_accumulated_time / run.iterations);
175a8c51b3fSopenharmony_ci    cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
176a8c51b3fSopenharmony_ci  }
177a8c51b3fSopenharmony_ci
178a8c51b3fSopenharmony_ci  LeastSq result_cpu;
179a8c51b3fSopenharmony_ci  LeastSq result_real;
180a8c51b3fSopenharmony_ci
181a8c51b3fSopenharmony_ci  if (reports[0].complexity == oLambda) {
182a8c51b3fSopenharmony_ci    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity_lambda);
183a8c51b3fSopenharmony_ci    result_real = MinimalLeastSq(n, real_time, reports[0].complexity_lambda);
184a8c51b3fSopenharmony_ci  } else {
185a8c51b3fSopenharmony_ci    result_cpu = MinimalLeastSq(n, cpu_time, reports[0].complexity);
186a8c51b3fSopenharmony_ci    result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
187a8c51b3fSopenharmony_ci  }
188a8c51b3fSopenharmony_ci
189a8c51b3fSopenharmony_ci  // Drop the 'args' when reporting complexity.
190a8c51b3fSopenharmony_ci  auto run_name = reports[0].run_name;
191a8c51b3fSopenharmony_ci  run_name.args.clear();
192a8c51b3fSopenharmony_ci
193a8c51b3fSopenharmony_ci  // Get the data from the accumulator to BenchmarkReporter::Run's.
194a8c51b3fSopenharmony_ci  Run big_o;
195a8c51b3fSopenharmony_ci  big_o.run_name = run_name;
196a8c51b3fSopenharmony_ci  big_o.family_index = reports[0].family_index;
197a8c51b3fSopenharmony_ci  big_o.per_family_instance_index = reports[0].per_family_instance_index;
198a8c51b3fSopenharmony_ci  big_o.run_type = BenchmarkReporter::Run::RT_Aggregate;
199a8c51b3fSopenharmony_ci  big_o.repetitions = reports[0].repetitions;
200a8c51b3fSopenharmony_ci  big_o.repetition_index = Run::no_repetition_index;
201a8c51b3fSopenharmony_ci  big_o.threads = reports[0].threads;
202a8c51b3fSopenharmony_ci  big_o.aggregate_name = "BigO";
203a8c51b3fSopenharmony_ci  big_o.aggregate_unit = StatisticUnit::kTime;
204a8c51b3fSopenharmony_ci  big_o.report_label = reports[0].report_label;
205a8c51b3fSopenharmony_ci  big_o.iterations = 0;
206a8c51b3fSopenharmony_ci  big_o.real_accumulated_time = result_real.coef;
207a8c51b3fSopenharmony_ci  big_o.cpu_accumulated_time = result_cpu.coef;
208a8c51b3fSopenharmony_ci  big_o.report_big_o = true;
209a8c51b3fSopenharmony_ci  big_o.complexity = result_cpu.complexity;
210a8c51b3fSopenharmony_ci
211a8c51b3fSopenharmony_ci  // All the time results are reported after being multiplied by the
212a8c51b3fSopenharmony_ci  // time unit multiplier. But since RMS is a relative quantity it
213a8c51b3fSopenharmony_ci  // should not be multiplied at all. So, here, we _divide_ it by the
214a8c51b3fSopenharmony_ci  // multiplier so that when it is multiplied later the result is the
215a8c51b3fSopenharmony_ci  // correct one.
216a8c51b3fSopenharmony_ci  double multiplier = GetTimeUnitMultiplier(reports[0].time_unit);
217a8c51b3fSopenharmony_ci
218a8c51b3fSopenharmony_ci  // Only add label to mean/stddev if it is same for all runs
219a8c51b3fSopenharmony_ci  Run rms;
220a8c51b3fSopenharmony_ci  rms.run_name = run_name;
221a8c51b3fSopenharmony_ci  rms.family_index = reports[0].family_index;
222a8c51b3fSopenharmony_ci  rms.per_family_instance_index = reports[0].per_family_instance_index;
223a8c51b3fSopenharmony_ci  rms.run_type = BenchmarkReporter::Run::RT_Aggregate;
224a8c51b3fSopenharmony_ci  rms.aggregate_name = "RMS";
225a8c51b3fSopenharmony_ci  rms.aggregate_unit = StatisticUnit::kPercentage;
226a8c51b3fSopenharmony_ci  rms.report_label = big_o.report_label;
227a8c51b3fSopenharmony_ci  rms.iterations = 0;
228a8c51b3fSopenharmony_ci  rms.repetition_index = Run::no_repetition_index;
229a8c51b3fSopenharmony_ci  rms.repetitions = reports[0].repetitions;
230a8c51b3fSopenharmony_ci  rms.threads = reports[0].threads;
231a8c51b3fSopenharmony_ci  rms.real_accumulated_time = result_real.rms / multiplier;
232a8c51b3fSopenharmony_ci  rms.cpu_accumulated_time = result_cpu.rms / multiplier;
233a8c51b3fSopenharmony_ci  rms.report_rms = true;
234a8c51b3fSopenharmony_ci  rms.complexity = result_cpu.complexity;
235a8c51b3fSopenharmony_ci  // don't forget to keep the time unit, or we won't be able to
236a8c51b3fSopenharmony_ci  // recover the correct value.
237a8c51b3fSopenharmony_ci  rms.time_unit = reports[0].time_unit;
238a8c51b3fSopenharmony_ci
239a8c51b3fSopenharmony_ci  results.push_back(big_o);
240a8c51b3fSopenharmony_ci  results.push_back(rms);
241a8c51b3fSopenharmony_ci  return results;
242a8c51b3fSopenharmony_ci}
243a8c51b3fSopenharmony_ci
244a8c51b3fSopenharmony_ci}  // end namespace benchmark
245