1a8c51b3fSopenharmony_ci// Copyright 2021 Google Inc. All rights reserved.
2a8c51b3fSopenharmony_ci//
3a8c51b3fSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License");
4a8c51b3fSopenharmony_ci// you may not use this file except in compliance with the License.
5a8c51b3fSopenharmony_ci// You may obtain a copy of the License at
6a8c51b3fSopenharmony_ci//
7a8c51b3fSopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
8a8c51b3fSopenharmony_ci//
9a8c51b3fSopenharmony_ci// Unless required by applicable law or agreed to in writing, software
10a8c51b3fSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS,
11a8c51b3fSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12a8c51b3fSopenharmony_ci// See the License for the specific language governing permissions and
13a8c51b3fSopenharmony_ci// limitations under the License.
14a8c51b3fSopenharmony_ci
15a8c51b3fSopenharmony_ci#include "perf_counters.h"
16a8c51b3fSopenharmony_ci
17a8c51b3fSopenharmony_ci#include <cstring>
18a8c51b3fSopenharmony_ci#include <memory>
19a8c51b3fSopenharmony_ci#include <vector>
20a8c51b3fSopenharmony_ci
21a8c51b3fSopenharmony_ci#if defined HAVE_LIBPFM
22a8c51b3fSopenharmony_ci#include "perfmon/pfmlib.h"
23a8c51b3fSopenharmony_ci#include "perfmon/pfmlib_perf_event.h"
24a8c51b3fSopenharmony_ci#endif
25a8c51b3fSopenharmony_ci
26a8c51b3fSopenharmony_cinamespace benchmark {
27a8c51b3fSopenharmony_cinamespace internal {
28a8c51b3fSopenharmony_ci
29a8c51b3fSopenharmony_ciconstexpr size_t PerfCounterValues::kMaxCounters;
30a8c51b3fSopenharmony_ci
31a8c51b3fSopenharmony_ci#if defined HAVE_LIBPFM
32a8c51b3fSopenharmony_ci
33a8c51b3fSopenharmony_cisize_t PerfCounterValues::Read(const std::vector<int>& leaders) {
34a8c51b3fSopenharmony_ci  // Create a pointer for multiple reads
35a8c51b3fSopenharmony_ci  const size_t bufsize = values_.size() * sizeof(values_[0]);
36a8c51b3fSopenharmony_ci  char* ptr = reinterpret_cast<char*>(values_.data());
37a8c51b3fSopenharmony_ci  size_t size = bufsize;
38a8c51b3fSopenharmony_ci  for (int lead : leaders) {
39a8c51b3fSopenharmony_ci    auto read_bytes = ::read(lead, ptr, size);
40a8c51b3fSopenharmony_ci    if (read_bytes >= ssize_t(sizeof(uint64_t))) {
41a8c51b3fSopenharmony_ci      // Actual data bytes are all bytes minus initial padding
42a8c51b3fSopenharmony_ci      std::size_t data_bytes = read_bytes - sizeof(uint64_t);
43a8c51b3fSopenharmony_ci      // This should be very cheap since it's in hot cache
44a8c51b3fSopenharmony_ci      std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes);
45a8c51b3fSopenharmony_ci      // Increment our counters
46a8c51b3fSopenharmony_ci      ptr += data_bytes;
47a8c51b3fSopenharmony_ci      size -= data_bytes;
48a8c51b3fSopenharmony_ci    } else {
49a8c51b3fSopenharmony_ci      int err = errno;
50a8c51b3fSopenharmony_ci      GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err
51a8c51b3fSopenharmony_ci                            << " " << ::strerror(err) << "\n";
52a8c51b3fSopenharmony_ci      return 0;
53a8c51b3fSopenharmony_ci    }
54a8c51b3fSopenharmony_ci  }
55a8c51b3fSopenharmony_ci  return (bufsize - size) / sizeof(uint64_t);
56a8c51b3fSopenharmony_ci}
57a8c51b3fSopenharmony_ci
58a8c51b3fSopenharmony_ciconst bool PerfCounters::kSupported = true;
59a8c51b3fSopenharmony_ci
60a8c51b3fSopenharmony_ci// Initializes libpfm only on the first call.  Returns whether that single
61a8c51b3fSopenharmony_ci// initialization was successful.
62a8c51b3fSopenharmony_cibool PerfCounters::Initialize() {
63a8c51b3fSopenharmony_ci  // Function-scope static gets initialized only once on first call.
64a8c51b3fSopenharmony_ci  static const bool success = []() {
65a8c51b3fSopenharmony_ci    return pfm_initialize() == PFM_SUCCESS;
66a8c51b3fSopenharmony_ci  }();
67a8c51b3fSopenharmony_ci  return success;
68a8c51b3fSopenharmony_ci}
69a8c51b3fSopenharmony_ci
70a8c51b3fSopenharmony_cibool PerfCounters::IsCounterSupported(const std::string& name) {
71a8c51b3fSopenharmony_ci  Initialize();
72a8c51b3fSopenharmony_ci  perf_event_attr_t attr;
73a8c51b3fSopenharmony_ci  std::memset(&attr, 0, sizeof(attr));
74a8c51b3fSopenharmony_ci  pfm_perf_encode_arg_t arg;
75a8c51b3fSopenharmony_ci  std::memset(&arg, 0, sizeof(arg));
76a8c51b3fSopenharmony_ci  arg.attr = &attr;
77a8c51b3fSopenharmony_ci  const int mode = PFM_PLM3;  // user mode only
78a8c51b3fSopenharmony_ci  int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT,
79a8c51b3fSopenharmony_ci                                      &arg);
80a8c51b3fSopenharmony_ci  return (ret == PFM_SUCCESS);
81a8c51b3fSopenharmony_ci}
82a8c51b3fSopenharmony_ci
83a8c51b3fSopenharmony_ciPerfCounters PerfCounters::Create(
84a8c51b3fSopenharmony_ci    const std::vector<std::string>& counter_names) {
85a8c51b3fSopenharmony_ci  if (!counter_names.empty()) {
86a8c51b3fSopenharmony_ci    Initialize();
87a8c51b3fSopenharmony_ci  }
88a8c51b3fSopenharmony_ci
89a8c51b3fSopenharmony_ci  // Valid counters will populate these arrays but we start empty
90a8c51b3fSopenharmony_ci  std::vector<std::string> valid_names;
91a8c51b3fSopenharmony_ci  std::vector<int> counter_ids;
92a8c51b3fSopenharmony_ci  std::vector<int> leader_ids;
93a8c51b3fSopenharmony_ci
94a8c51b3fSopenharmony_ci  // Resize to the maximum possible
95a8c51b3fSopenharmony_ci  valid_names.reserve(counter_names.size());
96a8c51b3fSopenharmony_ci  counter_ids.reserve(counter_names.size());
97a8c51b3fSopenharmony_ci
98a8c51b3fSopenharmony_ci  const int kCounterMode = PFM_PLM3;  // user mode only
99a8c51b3fSopenharmony_ci
100a8c51b3fSopenharmony_ci  // Group leads will be assigned on demand. The idea is that once we cannot
101a8c51b3fSopenharmony_ci  // create a counter descriptor, the reason is that this group has maxed out
102a8c51b3fSopenharmony_ci  // so we set the group_id again to -1 and retry - giving the algorithm a
103a8c51b3fSopenharmony_ci  // chance to create a new group leader to hold the next set of counters.
104a8c51b3fSopenharmony_ci  int group_id = -1;
105a8c51b3fSopenharmony_ci
106a8c51b3fSopenharmony_ci  // Loop through all performance counters
107a8c51b3fSopenharmony_ci  for (size_t i = 0; i < counter_names.size(); ++i) {
108a8c51b3fSopenharmony_ci    // we are about to push into the valid names vector
109a8c51b3fSopenharmony_ci    // check if we did not reach the maximum
110a8c51b3fSopenharmony_ci    if (valid_names.size() == PerfCounterValues::kMaxCounters) {
111a8c51b3fSopenharmony_ci      // Log a message if we maxed out and stop adding
112a8c51b3fSopenharmony_ci      GetErrorLogInstance()
113a8c51b3fSopenharmony_ci          << counter_names.size() << " counters were requested. The maximum is "
114a8c51b3fSopenharmony_ci          << PerfCounterValues::kMaxCounters << " and " << valid_names.size()
115a8c51b3fSopenharmony_ci          << " were already added. All remaining counters will be ignored\n";
116a8c51b3fSopenharmony_ci      // stop the loop and return what we have already
117a8c51b3fSopenharmony_ci      break;
118a8c51b3fSopenharmony_ci    }
119a8c51b3fSopenharmony_ci
120a8c51b3fSopenharmony_ci    // Check if this name is empty
121a8c51b3fSopenharmony_ci    const auto& name = counter_names[i];
122a8c51b3fSopenharmony_ci    if (name.empty()) {
123a8c51b3fSopenharmony_ci      GetErrorLogInstance()
124a8c51b3fSopenharmony_ci          << "A performance counter name was the empty string\n";
125a8c51b3fSopenharmony_ci      continue;
126a8c51b3fSopenharmony_ci    }
127a8c51b3fSopenharmony_ci
128a8c51b3fSopenharmony_ci    // Here first means first in group, ie the group leader
129a8c51b3fSopenharmony_ci    const bool is_first = (group_id < 0);
130a8c51b3fSopenharmony_ci
131a8c51b3fSopenharmony_ci    // This struct will be populated by libpfm from the counter string
132a8c51b3fSopenharmony_ci    // and then fed into the syscall perf_event_open
133a8c51b3fSopenharmony_ci    struct perf_event_attr attr {};
134a8c51b3fSopenharmony_ci    attr.size = sizeof(attr);
135a8c51b3fSopenharmony_ci
136a8c51b3fSopenharmony_ci    // This is the input struct to libpfm.
137a8c51b3fSopenharmony_ci    pfm_perf_encode_arg_t arg{};
138a8c51b3fSopenharmony_ci    arg.attr = &attr;
139a8c51b3fSopenharmony_ci    const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode,
140a8c51b3fSopenharmony_ci                                                  PFM_OS_PERF_EVENT, &arg);
141a8c51b3fSopenharmony_ci    if (pfm_get != PFM_SUCCESS) {
142a8c51b3fSopenharmony_ci      GetErrorLogInstance()
143a8c51b3fSopenharmony_ci          << "Unknown performance counter name: " << name << "\n";
144a8c51b3fSopenharmony_ci      continue;
145a8c51b3fSopenharmony_ci    }
146a8c51b3fSopenharmony_ci
147a8c51b3fSopenharmony_ci    // We then proceed to populate the remaining fields in our attribute struct
148a8c51b3fSopenharmony_ci    // Note: the man page for perf_event_create suggests inherit = true and
149a8c51b3fSopenharmony_ci    // read_format = PERF_FORMAT_GROUP don't work together, but that's not the
150a8c51b3fSopenharmony_ci    // case.
151a8c51b3fSopenharmony_ci    attr.disabled = is_first;
152a8c51b3fSopenharmony_ci    attr.inherit = true;
153a8c51b3fSopenharmony_ci    attr.pinned = is_first;
154a8c51b3fSopenharmony_ci    attr.exclude_kernel = true;
155a8c51b3fSopenharmony_ci    attr.exclude_user = false;
156a8c51b3fSopenharmony_ci    attr.exclude_hv = true;
157a8c51b3fSopenharmony_ci
158a8c51b3fSopenharmony_ci    // Read all counters in a group in one read.
159a8c51b3fSopenharmony_ci    attr.read_format = PERF_FORMAT_GROUP;
160a8c51b3fSopenharmony_ci
161a8c51b3fSopenharmony_ci    int id = -1;
162a8c51b3fSopenharmony_ci    while (id < 0) {
163a8c51b3fSopenharmony_ci      static constexpr size_t kNrOfSyscallRetries = 5;
164a8c51b3fSopenharmony_ci      // Retry syscall as it was interrupted often (b/64774091).
165a8c51b3fSopenharmony_ci      for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries;
166a8c51b3fSopenharmony_ci           ++num_retries) {
167a8c51b3fSopenharmony_ci        id = perf_event_open(&attr, 0, -1, group_id, 0);
168a8c51b3fSopenharmony_ci        if (id >= 0 || errno != EINTR) {
169a8c51b3fSopenharmony_ci          break;
170a8c51b3fSopenharmony_ci        }
171a8c51b3fSopenharmony_ci      }
172a8c51b3fSopenharmony_ci      if (id < 0) {
173a8c51b3fSopenharmony_ci        // If the file descriptor is negative we might have reached a limit
174a8c51b3fSopenharmony_ci        // in the current group. Set the group_id to -1 and retry
175a8c51b3fSopenharmony_ci        if (group_id >= 0) {
176a8c51b3fSopenharmony_ci          // Create a new group
177a8c51b3fSopenharmony_ci          group_id = -1;
178a8c51b3fSopenharmony_ci        } else {
179a8c51b3fSopenharmony_ci          // At this point we have already retried to set a new group id and
180a8c51b3fSopenharmony_ci          // failed. We then give up.
181a8c51b3fSopenharmony_ci          break;
182a8c51b3fSopenharmony_ci        }
183a8c51b3fSopenharmony_ci      }
184a8c51b3fSopenharmony_ci    }
185a8c51b3fSopenharmony_ci
186a8c51b3fSopenharmony_ci    // We failed to get a new file descriptor. We might have reached a hard
187a8c51b3fSopenharmony_ci    // hardware limit that cannot be resolved even with group multiplexing
188a8c51b3fSopenharmony_ci    if (id < 0) {
189a8c51b3fSopenharmony_ci      GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor "
190a8c51b3fSopenharmony_ci                               "for performance counter "
191a8c51b3fSopenharmony_ci                            << name << ". Ignoring\n";
192a8c51b3fSopenharmony_ci
193a8c51b3fSopenharmony_ci      // We give up on this counter but try to keep going
194a8c51b3fSopenharmony_ci      // as the others would be fine
195a8c51b3fSopenharmony_ci      continue;
196a8c51b3fSopenharmony_ci    }
197a8c51b3fSopenharmony_ci    if (group_id < 0) {
198a8c51b3fSopenharmony_ci      // This is a leader, store and assign it to the current file descriptor
199a8c51b3fSopenharmony_ci      leader_ids.push_back(id);
200a8c51b3fSopenharmony_ci      group_id = id;
201a8c51b3fSopenharmony_ci    }
202a8c51b3fSopenharmony_ci    // This is a valid counter, add it to our descriptor's list
203a8c51b3fSopenharmony_ci    counter_ids.push_back(id);
204a8c51b3fSopenharmony_ci    valid_names.push_back(name);
205a8c51b3fSopenharmony_ci  }
206a8c51b3fSopenharmony_ci
207a8c51b3fSopenharmony_ci  // Loop through all group leaders activating them
208a8c51b3fSopenharmony_ci  // There is another option of starting ALL counters in a process but
209a8c51b3fSopenharmony_ci  // that would be far reaching an intrusion. If the user is using PMCs
210a8c51b3fSopenharmony_ci  // by themselves then this would have a side effect on them. It is
211a8c51b3fSopenharmony_ci  // friendlier to loop through all groups individually.
212a8c51b3fSopenharmony_ci  for (int lead : leader_ids) {
213a8c51b3fSopenharmony_ci    if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) {
214a8c51b3fSopenharmony_ci      // This should never happen but if it does, we give up on the
215a8c51b3fSopenharmony_ci      // entire batch as recovery would be a mess.
216a8c51b3fSopenharmony_ci      GetErrorLogInstance() << "***WARNING*** Failed to start counters. "
217a8c51b3fSopenharmony_ci                               "Claring out all counters.\n";
218a8c51b3fSopenharmony_ci
219a8c51b3fSopenharmony_ci      // Close all peformance counters
220a8c51b3fSopenharmony_ci      for (int id : counter_ids) {
221a8c51b3fSopenharmony_ci        ::close(id);
222a8c51b3fSopenharmony_ci      }
223a8c51b3fSopenharmony_ci
224a8c51b3fSopenharmony_ci      // Return an empty object so our internal state is still good and
225a8c51b3fSopenharmony_ci      // the process can continue normally without impact
226a8c51b3fSopenharmony_ci      return NoCounters();
227a8c51b3fSopenharmony_ci    }
228a8c51b3fSopenharmony_ci  }
229a8c51b3fSopenharmony_ci
230a8c51b3fSopenharmony_ci  return PerfCounters(std::move(valid_names), std::move(counter_ids),
231a8c51b3fSopenharmony_ci                      std::move(leader_ids));
232a8c51b3fSopenharmony_ci}
233a8c51b3fSopenharmony_ci
234a8c51b3fSopenharmony_civoid PerfCounters::CloseCounters() const {
235a8c51b3fSopenharmony_ci  if (counter_ids_.empty()) {
236a8c51b3fSopenharmony_ci    return;
237a8c51b3fSopenharmony_ci  }
238a8c51b3fSopenharmony_ci  for (int lead : leader_ids_) {
239a8c51b3fSopenharmony_ci    ioctl(lead, PERF_EVENT_IOC_DISABLE);
240a8c51b3fSopenharmony_ci  }
241a8c51b3fSopenharmony_ci  for (int fd : counter_ids_) {
242a8c51b3fSopenharmony_ci    close(fd);
243a8c51b3fSopenharmony_ci  }
244a8c51b3fSopenharmony_ci}
245a8c51b3fSopenharmony_ci#else   // defined HAVE_LIBPFM
246a8c51b3fSopenharmony_cisize_t PerfCounterValues::Read(const std::vector<int>&) { return 0; }
247a8c51b3fSopenharmony_ci
248a8c51b3fSopenharmony_ciconst bool PerfCounters::kSupported = false;
249a8c51b3fSopenharmony_ci
250a8c51b3fSopenharmony_cibool PerfCounters::Initialize() { return false; }
251a8c51b3fSopenharmony_ci
252a8c51b3fSopenharmony_cibool PerfCounters::IsCounterSupported(const std::string&) { return false; }
253a8c51b3fSopenharmony_ci
254a8c51b3fSopenharmony_ciPerfCounters PerfCounters::Create(
255a8c51b3fSopenharmony_ci    const std::vector<std::string>& counter_names) {
256a8c51b3fSopenharmony_ci  if (!counter_names.empty()) {
257a8c51b3fSopenharmony_ci    GetErrorLogInstance() << "Performance counters not supported.";
258a8c51b3fSopenharmony_ci  }
259a8c51b3fSopenharmony_ci  return NoCounters();
260a8c51b3fSopenharmony_ci}
261a8c51b3fSopenharmony_ci
262a8c51b3fSopenharmony_civoid PerfCounters::CloseCounters() const {}
263a8c51b3fSopenharmony_ci#endif  // defined HAVE_LIBPFM
264a8c51b3fSopenharmony_ci
265a8c51b3fSopenharmony_ciPerfCountersMeasurement::PerfCountersMeasurement(
266a8c51b3fSopenharmony_ci    const std::vector<std::string>& counter_names)
267a8c51b3fSopenharmony_ci    : start_values_(counter_names.size()), end_values_(counter_names.size()) {
268a8c51b3fSopenharmony_ci  counters_ = PerfCounters::Create(counter_names);
269a8c51b3fSopenharmony_ci}
270a8c51b3fSopenharmony_ci
271a8c51b3fSopenharmony_ciPerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept {
272a8c51b3fSopenharmony_ci  if (this != &other) {
273a8c51b3fSopenharmony_ci    CloseCounters();
274a8c51b3fSopenharmony_ci
275a8c51b3fSopenharmony_ci    counter_ids_ = std::move(other.counter_ids_);
276a8c51b3fSopenharmony_ci    leader_ids_ = std::move(other.leader_ids_);
277a8c51b3fSopenharmony_ci    counter_names_ = std::move(other.counter_names_);
278a8c51b3fSopenharmony_ci  }
279a8c51b3fSopenharmony_ci  return *this;
280a8c51b3fSopenharmony_ci}
281a8c51b3fSopenharmony_ci}  // namespace internal
282a8c51b3fSopenharmony_ci}  // namespace benchmark
283