1a8c51b3fSopenharmony_ci// Copyright 2021 Google Inc. All rights reserved. 2a8c51b3fSopenharmony_ci// 3a8c51b3fSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); 4a8c51b3fSopenharmony_ci// you may not use this file except in compliance with the License. 5a8c51b3fSopenharmony_ci// You may obtain a copy of the License at 6a8c51b3fSopenharmony_ci// 7a8c51b3fSopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 8a8c51b3fSopenharmony_ci// 9a8c51b3fSopenharmony_ci// Unless required by applicable law or agreed to in writing, software 10a8c51b3fSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, 11a8c51b3fSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12a8c51b3fSopenharmony_ci// See the License for the specific language governing permissions and 13a8c51b3fSopenharmony_ci// limitations under the License. 14a8c51b3fSopenharmony_ci 15a8c51b3fSopenharmony_ci#include "perf_counters.h" 16a8c51b3fSopenharmony_ci 17a8c51b3fSopenharmony_ci#include <cstring> 18a8c51b3fSopenharmony_ci#include <memory> 19a8c51b3fSopenharmony_ci#include <vector> 20a8c51b3fSopenharmony_ci 21a8c51b3fSopenharmony_ci#if defined HAVE_LIBPFM 22a8c51b3fSopenharmony_ci#include "perfmon/pfmlib.h" 23a8c51b3fSopenharmony_ci#include "perfmon/pfmlib_perf_event.h" 24a8c51b3fSopenharmony_ci#endif 25a8c51b3fSopenharmony_ci 26a8c51b3fSopenharmony_cinamespace benchmark { 27a8c51b3fSopenharmony_cinamespace internal { 28a8c51b3fSopenharmony_ci 29a8c51b3fSopenharmony_ciconstexpr size_t PerfCounterValues::kMaxCounters; 30a8c51b3fSopenharmony_ci 31a8c51b3fSopenharmony_ci#if defined HAVE_LIBPFM 32a8c51b3fSopenharmony_ci 33a8c51b3fSopenharmony_cisize_t PerfCounterValues::Read(const std::vector<int>& leaders) { 34a8c51b3fSopenharmony_ci // Create a pointer for multiple reads 35a8c51b3fSopenharmony_ci const size_t bufsize = values_.size() * sizeof(values_[0]); 36a8c51b3fSopenharmony_ci char* ptr = reinterpret_cast<char*>(values_.data()); 37a8c51b3fSopenharmony_ci size_t size = bufsize; 38a8c51b3fSopenharmony_ci for (int lead : leaders) { 39a8c51b3fSopenharmony_ci auto read_bytes = ::read(lead, ptr, size); 40a8c51b3fSopenharmony_ci if (read_bytes >= ssize_t(sizeof(uint64_t))) { 41a8c51b3fSopenharmony_ci // Actual data bytes are all bytes minus initial padding 42a8c51b3fSopenharmony_ci std::size_t data_bytes = read_bytes - sizeof(uint64_t); 43a8c51b3fSopenharmony_ci // This should be very cheap since it's in hot cache 44a8c51b3fSopenharmony_ci std::memmove(ptr, ptr + sizeof(uint64_t), data_bytes); 45a8c51b3fSopenharmony_ci // Increment our counters 46a8c51b3fSopenharmony_ci ptr += data_bytes; 47a8c51b3fSopenharmony_ci size -= data_bytes; 48a8c51b3fSopenharmony_ci } else { 49a8c51b3fSopenharmony_ci int err = errno; 50a8c51b3fSopenharmony_ci GetErrorLogInstance() << "Error reading lead " << lead << " errno:" << err 51a8c51b3fSopenharmony_ci << " " << ::strerror(err) << "\n"; 52a8c51b3fSopenharmony_ci return 0; 53a8c51b3fSopenharmony_ci } 54a8c51b3fSopenharmony_ci } 55a8c51b3fSopenharmony_ci return (bufsize - size) / sizeof(uint64_t); 56a8c51b3fSopenharmony_ci} 57a8c51b3fSopenharmony_ci 58a8c51b3fSopenharmony_ciconst bool PerfCounters::kSupported = true; 59a8c51b3fSopenharmony_ci 60a8c51b3fSopenharmony_ci// Initializes libpfm only on the first call. Returns whether that single 61a8c51b3fSopenharmony_ci// initialization was successful. 62a8c51b3fSopenharmony_cibool PerfCounters::Initialize() { 63a8c51b3fSopenharmony_ci // Function-scope static gets initialized only once on first call. 64a8c51b3fSopenharmony_ci static const bool success = []() { 65a8c51b3fSopenharmony_ci return pfm_initialize() == PFM_SUCCESS; 66a8c51b3fSopenharmony_ci }(); 67a8c51b3fSopenharmony_ci return success; 68a8c51b3fSopenharmony_ci} 69a8c51b3fSopenharmony_ci 70a8c51b3fSopenharmony_cibool PerfCounters::IsCounterSupported(const std::string& name) { 71a8c51b3fSopenharmony_ci Initialize(); 72a8c51b3fSopenharmony_ci perf_event_attr_t attr; 73a8c51b3fSopenharmony_ci std::memset(&attr, 0, sizeof(attr)); 74a8c51b3fSopenharmony_ci pfm_perf_encode_arg_t arg; 75a8c51b3fSopenharmony_ci std::memset(&arg, 0, sizeof(arg)); 76a8c51b3fSopenharmony_ci arg.attr = &attr; 77a8c51b3fSopenharmony_ci const int mode = PFM_PLM3; // user mode only 78a8c51b3fSopenharmony_ci int ret = pfm_get_os_event_encoding(name.c_str(), mode, PFM_OS_PERF_EVENT_EXT, 79a8c51b3fSopenharmony_ci &arg); 80a8c51b3fSopenharmony_ci return (ret == PFM_SUCCESS); 81a8c51b3fSopenharmony_ci} 82a8c51b3fSopenharmony_ci 83a8c51b3fSopenharmony_ciPerfCounters PerfCounters::Create( 84a8c51b3fSopenharmony_ci const std::vector<std::string>& counter_names) { 85a8c51b3fSopenharmony_ci if (!counter_names.empty()) { 86a8c51b3fSopenharmony_ci Initialize(); 87a8c51b3fSopenharmony_ci } 88a8c51b3fSopenharmony_ci 89a8c51b3fSopenharmony_ci // Valid counters will populate these arrays but we start empty 90a8c51b3fSopenharmony_ci std::vector<std::string> valid_names; 91a8c51b3fSopenharmony_ci std::vector<int> counter_ids; 92a8c51b3fSopenharmony_ci std::vector<int> leader_ids; 93a8c51b3fSopenharmony_ci 94a8c51b3fSopenharmony_ci // Resize to the maximum possible 95a8c51b3fSopenharmony_ci valid_names.reserve(counter_names.size()); 96a8c51b3fSopenharmony_ci counter_ids.reserve(counter_names.size()); 97a8c51b3fSopenharmony_ci 98a8c51b3fSopenharmony_ci const int kCounterMode = PFM_PLM3; // user mode only 99a8c51b3fSopenharmony_ci 100a8c51b3fSopenharmony_ci // Group leads will be assigned on demand. The idea is that once we cannot 101a8c51b3fSopenharmony_ci // create a counter descriptor, the reason is that this group has maxed out 102a8c51b3fSopenharmony_ci // so we set the group_id again to -1 and retry - giving the algorithm a 103a8c51b3fSopenharmony_ci // chance to create a new group leader to hold the next set of counters. 104a8c51b3fSopenharmony_ci int group_id = -1; 105a8c51b3fSopenharmony_ci 106a8c51b3fSopenharmony_ci // Loop through all performance counters 107a8c51b3fSopenharmony_ci for (size_t i = 0; i < counter_names.size(); ++i) { 108a8c51b3fSopenharmony_ci // we are about to push into the valid names vector 109a8c51b3fSopenharmony_ci // check if we did not reach the maximum 110a8c51b3fSopenharmony_ci if (valid_names.size() == PerfCounterValues::kMaxCounters) { 111a8c51b3fSopenharmony_ci // Log a message if we maxed out and stop adding 112a8c51b3fSopenharmony_ci GetErrorLogInstance() 113a8c51b3fSopenharmony_ci << counter_names.size() << " counters were requested. The maximum is " 114a8c51b3fSopenharmony_ci << PerfCounterValues::kMaxCounters << " and " << valid_names.size() 115a8c51b3fSopenharmony_ci << " were already added. All remaining counters will be ignored\n"; 116a8c51b3fSopenharmony_ci // stop the loop and return what we have already 117a8c51b3fSopenharmony_ci break; 118a8c51b3fSopenharmony_ci } 119a8c51b3fSopenharmony_ci 120a8c51b3fSopenharmony_ci // Check if this name is empty 121a8c51b3fSopenharmony_ci const auto& name = counter_names[i]; 122a8c51b3fSopenharmony_ci if (name.empty()) { 123a8c51b3fSopenharmony_ci GetErrorLogInstance() 124a8c51b3fSopenharmony_ci << "A performance counter name was the empty string\n"; 125a8c51b3fSopenharmony_ci continue; 126a8c51b3fSopenharmony_ci } 127a8c51b3fSopenharmony_ci 128a8c51b3fSopenharmony_ci // Here first means first in group, ie the group leader 129a8c51b3fSopenharmony_ci const bool is_first = (group_id < 0); 130a8c51b3fSopenharmony_ci 131a8c51b3fSopenharmony_ci // This struct will be populated by libpfm from the counter string 132a8c51b3fSopenharmony_ci // and then fed into the syscall perf_event_open 133a8c51b3fSopenharmony_ci struct perf_event_attr attr {}; 134a8c51b3fSopenharmony_ci attr.size = sizeof(attr); 135a8c51b3fSopenharmony_ci 136a8c51b3fSopenharmony_ci // This is the input struct to libpfm. 137a8c51b3fSopenharmony_ci pfm_perf_encode_arg_t arg{}; 138a8c51b3fSopenharmony_ci arg.attr = &attr; 139a8c51b3fSopenharmony_ci const int pfm_get = pfm_get_os_event_encoding(name.c_str(), kCounterMode, 140a8c51b3fSopenharmony_ci PFM_OS_PERF_EVENT, &arg); 141a8c51b3fSopenharmony_ci if (pfm_get != PFM_SUCCESS) { 142a8c51b3fSopenharmony_ci GetErrorLogInstance() 143a8c51b3fSopenharmony_ci << "Unknown performance counter name: " << name << "\n"; 144a8c51b3fSopenharmony_ci continue; 145a8c51b3fSopenharmony_ci } 146a8c51b3fSopenharmony_ci 147a8c51b3fSopenharmony_ci // We then proceed to populate the remaining fields in our attribute struct 148a8c51b3fSopenharmony_ci // Note: the man page for perf_event_create suggests inherit = true and 149a8c51b3fSopenharmony_ci // read_format = PERF_FORMAT_GROUP don't work together, but that's not the 150a8c51b3fSopenharmony_ci // case. 151a8c51b3fSopenharmony_ci attr.disabled = is_first; 152a8c51b3fSopenharmony_ci attr.inherit = true; 153a8c51b3fSopenharmony_ci attr.pinned = is_first; 154a8c51b3fSopenharmony_ci attr.exclude_kernel = true; 155a8c51b3fSopenharmony_ci attr.exclude_user = false; 156a8c51b3fSopenharmony_ci attr.exclude_hv = true; 157a8c51b3fSopenharmony_ci 158a8c51b3fSopenharmony_ci // Read all counters in a group in one read. 159a8c51b3fSopenharmony_ci attr.read_format = PERF_FORMAT_GROUP; 160a8c51b3fSopenharmony_ci 161a8c51b3fSopenharmony_ci int id = -1; 162a8c51b3fSopenharmony_ci while (id < 0) { 163a8c51b3fSopenharmony_ci static constexpr size_t kNrOfSyscallRetries = 5; 164a8c51b3fSopenharmony_ci // Retry syscall as it was interrupted often (b/64774091). 165a8c51b3fSopenharmony_ci for (size_t num_retries = 0; num_retries < kNrOfSyscallRetries; 166a8c51b3fSopenharmony_ci ++num_retries) { 167a8c51b3fSopenharmony_ci id = perf_event_open(&attr, 0, -1, group_id, 0); 168a8c51b3fSopenharmony_ci if (id >= 0 || errno != EINTR) { 169a8c51b3fSopenharmony_ci break; 170a8c51b3fSopenharmony_ci } 171a8c51b3fSopenharmony_ci } 172a8c51b3fSopenharmony_ci if (id < 0) { 173a8c51b3fSopenharmony_ci // If the file descriptor is negative we might have reached a limit 174a8c51b3fSopenharmony_ci // in the current group. Set the group_id to -1 and retry 175a8c51b3fSopenharmony_ci if (group_id >= 0) { 176a8c51b3fSopenharmony_ci // Create a new group 177a8c51b3fSopenharmony_ci group_id = -1; 178a8c51b3fSopenharmony_ci } else { 179a8c51b3fSopenharmony_ci // At this point we have already retried to set a new group id and 180a8c51b3fSopenharmony_ci // failed. We then give up. 181a8c51b3fSopenharmony_ci break; 182a8c51b3fSopenharmony_ci } 183a8c51b3fSopenharmony_ci } 184a8c51b3fSopenharmony_ci } 185a8c51b3fSopenharmony_ci 186a8c51b3fSopenharmony_ci // We failed to get a new file descriptor. We might have reached a hard 187a8c51b3fSopenharmony_ci // hardware limit that cannot be resolved even with group multiplexing 188a8c51b3fSopenharmony_ci if (id < 0) { 189a8c51b3fSopenharmony_ci GetErrorLogInstance() << "***WARNING** Failed to get a file descriptor " 190a8c51b3fSopenharmony_ci "for performance counter " 191a8c51b3fSopenharmony_ci << name << ". Ignoring\n"; 192a8c51b3fSopenharmony_ci 193a8c51b3fSopenharmony_ci // We give up on this counter but try to keep going 194a8c51b3fSopenharmony_ci // as the others would be fine 195a8c51b3fSopenharmony_ci continue; 196a8c51b3fSopenharmony_ci } 197a8c51b3fSopenharmony_ci if (group_id < 0) { 198a8c51b3fSopenharmony_ci // This is a leader, store and assign it to the current file descriptor 199a8c51b3fSopenharmony_ci leader_ids.push_back(id); 200a8c51b3fSopenharmony_ci group_id = id; 201a8c51b3fSopenharmony_ci } 202a8c51b3fSopenharmony_ci // This is a valid counter, add it to our descriptor's list 203a8c51b3fSopenharmony_ci counter_ids.push_back(id); 204a8c51b3fSopenharmony_ci valid_names.push_back(name); 205a8c51b3fSopenharmony_ci } 206a8c51b3fSopenharmony_ci 207a8c51b3fSopenharmony_ci // Loop through all group leaders activating them 208a8c51b3fSopenharmony_ci // There is another option of starting ALL counters in a process but 209a8c51b3fSopenharmony_ci // that would be far reaching an intrusion. If the user is using PMCs 210a8c51b3fSopenharmony_ci // by themselves then this would have a side effect on them. It is 211a8c51b3fSopenharmony_ci // friendlier to loop through all groups individually. 212a8c51b3fSopenharmony_ci for (int lead : leader_ids) { 213a8c51b3fSopenharmony_ci if (ioctl(lead, PERF_EVENT_IOC_ENABLE) != 0) { 214a8c51b3fSopenharmony_ci // This should never happen but if it does, we give up on the 215a8c51b3fSopenharmony_ci // entire batch as recovery would be a mess. 216a8c51b3fSopenharmony_ci GetErrorLogInstance() << "***WARNING*** Failed to start counters. " 217a8c51b3fSopenharmony_ci "Claring out all counters.\n"; 218a8c51b3fSopenharmony_ci 219a8c51b3fSopenharmony_ci // Close all peformance counters 220a8c51b3fSopenharmony_ci for (int id : counter_ids) { 221a8c51b3fSopenharmony_ci ::close(id); 222a8c51b3fSopenharmony_ci } 223a8c51b3fSopenharmony_ci 224a8c51b3fSopenharmony_ci // Return an empty object so our internal state is still good and 225a8c51b3fSopenharmony_ci // the process can continue normally without impact 226a8c51b3fSopenharmony_ci return NoCounters(); 227a8c51b3fSopenharmony_ci } 228a8c51b3fSopenharmony_ci } 229a8c51b3fSopenharmony_ci 230a8c51b3fSopenharmony_ci return PerfCounters(std::move(valid_names), std::move(counter_ids), 231a8c51b3fSopenharmony_ci std::move(leader_ids)); 232a8c51b3fSopenharmony_ci} 233a8c51b3fSopenharmony_ci 234a8c51b3fSopenharmony_civoid PerfCounters::CloseCounters() const { 235a8c51b3fSopenharmony_ci if (counter_ids_.empty()) { 236a8c51b3fSopenharmony_ci return; 237a8c51b3fSopenharmony_ci } 238a8c51b3fSopenharmony_ci for (int lead : leader_ids_) { 239a8c51b3fSopenharmony_ci ioctl(lead, PERF_EVENT_IOC_DISABLE); 240a8c51b3fSopenharmony_ci } 241a8c51b3fSopenharmony_ci for (int fd : counter_ids_) { 242a8c51b3fSopenharmony_ci close(fd); 243a8c51b3fSopenharmony_ci } 244a8c51b3fSopenharmony_ci} 245a8c51b3fSopenharmony_ci#else // defined HAVE_LIBPFM 246a8c51b3fSopenharmony_cisize_t PerfCounterValues::Read(const std::vector<int>&) { return 0; } 247a8c51b3fSopenharmony_ci 248a8c51b3fSopenharmony_ciconst bool PerfCounters::kSupported = false; 249a8c51b3fSopenharmony_ci 250a8c51b3fSopenharmony_cibool PerfCounters::Initialize() { return false; } 251a8c51b3fSopenharmony_ci 252a8c51b3fSopenharmony_cibool PerfCounters::IsCounterSupported(const std::string&) { return false; } 253a8c51b3fSopenharmony_ci 254a8c51b3fSopenharmony_ciPerfCounters PerfCounters::Create( 255a8c51b3fSopenharmony_ci const std::vector<std::string>& counter_names) { 256a8c51b3fSopenharmony_ci if (!counter_names.empty()) { 257a8c51b3fSopenharmony_ci GetErrorLogInstance() << "Performance counters not supported."; 258a8c51b3fSopenharmony_ci } 259a8c51b3fSopenharmony_ci return NoCounters(); 260a8c51b3fSopenharmony_ci} 261a8c51b3fSopenharmony_ci 262a8c51b3fSopenharmony_civoid PerfCounters::CloseCounters() const {} 263a8c51b3fSopenharmony_ci#endif // defined HAVE_LIBPFM 264a8c51b3fSopenharmony_ci 265a8c51b3fSopenharmony_ciPerfCountersMeasurement::PerfCountersMeasurement( 266a8c51b3fSopenharmony_ci const std::vector<std::string>& counter_names) 267a8c51b3fSopenharmony_ci : start_values_(counter_names.size()), end_values_(counter_names.size()) { 268a8c51b3fSopenharmony_ci counters_ = PerfCounters::Create(counter_names); 269a8c51b3fSopenharmony_ci} 270a8c51b3fSopenharmony_ci 271a8c51b3fSopenharmony_ciPerfCounters& PerfCounters::operator=(PerfCounters&& other) noexcept { 272a8c51b3fSopenharmony_ci if (this != &other) { 273a8c51b3fSopenharmony_ci CloseCounters(); 274a8c51b3fSopenharmony_ci 275a8c51b3fSopenharmony_ci counter_ids_ = std::move(other.counter_ids_); 276a8c51b3fSopenharmony_ci leader_ids_ = std::move(other.leader_ids_); 277a8c51b3fSopenharmony_ci counter_names_ = std::move(other.counter_names_); 278a8c51b3fSopenharmony_ci } 279a8c51b3fSopenharmony_ci return *this; 280a8c51b3fSopenharmony_ci} 281a8c51b3fSopenharmony_ci} // namespace internal 282a8c51b3fSopenharmony_ci} // namespace benchmark 283