1/*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 *     http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15#ifndef SUBCOMMAND_RECORD_H
16#define SUBCOMMAND_RECORD_H
17
18// some debug feaure
19#define HIDEBUG_RECORD_NOT_PROCESS       0
20#define HIDEBUG_RECORD_NOT_PROCESS_VM    0
21#define HIDEBUG_RECORD_NOT_SAVE          0
22#define HIDEBUG_SKIP_PROCESS_SYMBOLS     0
23#define HIDEBUG_SKIP_MATCH_SYMBOLS       0
24#define HIDEBUG_SKIP_LOAD_KERNEL_SYMBOLS 0
25#define HIDEBUG_SKIP_SAVE_SYMBOLS        0
26#define USE_COLLECT_SYMBOLIC             1
27
28#include <functional>
29#include <thread>
30#include <unordered_map>
31#include <unordered_set>
32#include <chrono>
33#include "perf_event_record.h"
34#include "perf_events.h"
35#include "perf_file_writer.h"
36#include "subcommand.h"
37#include "virtual_runtime.h"
38
39namespace OHOS {
40namespace Developtools {
41namespace HiPerf {
42class SubCommandRecord : public SubCommand {
43public:
44    static constexpr int DEFAULT_CPU_PERCENT = 25;
45    static constexpr int MIN_CPU_PERCENT = 1;
46    static constexpr int MAX_CPU_PERCENT = 100;
47    static constexpr int MIN_SAMPLE_FREQUENCY = 1;
48    static constexpr int MAX_SAMPLE_FREQUENCY = 100000;
49    static constexpr int DEFAULT_MMAP_PAGES = 256;
50    static constexpr int MIN_PERF_MMAP_PAGE = 2;
51    static constexpr int MAX_PERF_MMAP_PAGE = 1024;
52    static constexpr int DEFAULT_CHECK_APP_MS = 10;
53    static constexpr int MIN_CHECK_APP_MS = 1;
54    static constexpr int MAX_CHECK_APP_MS = 200;
55    static constexpr float MIN_STOP_SECONDS = 0.100;
56    static constexpr float MAX_STOP_SECONDS = 10000.0;
57    static constexpr int MIN_SAVED_CMDLINES_SIZE = 512;
58    static constexpr int DEFAULT_SAVED_CMDLINES_SIZE = 2048;
59    static constexpr int MAX_SAVED_CMDLINES_SIZE = 4096;
60
61    SubCommandRecord()
62        // clang-format off
63        : SubCommand("record", "Collect performance sample information",
64        "Usage: hiperf record [options] [command [command-args]]\n"
65        "       Collect performance sampling information of running [command].\n"
66        "       The default options are: -c <all cpu> --cpu-limit 25 -d 10000.0 -e hw-cpu-cycles\n"
67        "       -f 4000 -m 1024 -o /data/local/tmp/perf.data.\n"
68        "   -a\n"
69        "         Collect system-wide information.\n"
70        "         for measures all processes/threads\n"
71        "         This requires CAP_PERFMON (since Linux 5.8) or CAP_SYS_ADMIN capability or a\n"
72        "         /proc/sys/kernel/perf_event_paranoid value of less than 1.\n"
73        "   --exclude-hiperf\n"
74        "         Don't record events issued by hiperf itself.\n"
75        "   -c <cpuid>[<,cpuid>]...\n"
76        "         cpuid should be 0,1,2...\n"
77        "         Limit the CPU that collects data.\n"
78        "         0 means cpu0, 1 means cpu1 ...\n"
79        "   --cpu-limit <percent>\n"
80        "         Set the max percent of cpu time used for recording.\n"
81        "         percent is in range [1-100], default is 25.\n"
82        "   -d <sec>\n"
83        "         stop in <sec> seconds. floating point number. seconds is in range [0.100-10000.0]\n"
84        "         default is 10000.0\n"
85        "   -f <freq>\n"
86        "         Set event sampling frequency. default is 4000 samples every second.\n"
87        "         check /proc/sys/kernel/perf_event_max_sample_rate for maximum allowed frequency\n"
88        "   --period <num>\n"
89        "         Set event sampling period for tracepoint events. recording one sample when <num> events happen.\n"
90        "         The default <num> is 1\n"
91        "   -e <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
92        "         Customize the name of the event that needs to be sampled.\n"
93        "         The name can use the names listed in the list parameter.\n"
94        "         It can also be represented by the value of 0x<hex>.\n"
95        "           u - monitor user space events only\n"
96        "           k - monitor kernel space events only\n"
97        "   -g <event1[:<u|k>]>[,event1[:<u|k>]]...\n"
98        "         Put the events into a group, can set multiple groups by multiple -g\n"
99        "         PMU is required to report data in designated groups\n"
100        "         limited by HW capability, too many events cannot be reported in the same sampling)\n"
101        "   --no-inherit\n"
102        "         Don't trace child processes.\n"
103        "   -p <pid1>[,pid2]...\n"
104        "         Limit the process id of the collection target. Conflicts with the -a option.\n"
105        "   -t <tid1>[,tid2]...\n"
106        "         Limit the thread id of the collection target. Conflicts with the -a option.\n"
107        "   --exclude-tid <tid1>[,tid2]...\n"
108        "         Exclude threads of the collection target by thread ids. Conflicts with the -a option.\n"
109        "   --exclude-thread <tname1>[,tname2]...\n"
110        "         Exclude threads of the collection target by thread names. Conflicts with the -a option.\n"
111        "   --offcpu\n"
112        "         Trace when threads are scheduled off cpu.\n"
113        "   -j <branch_filter1>[,branch_filter2]...\n"
114        "         taken branch stack sampling, filter can be:\n"
115        "           any: any type of branch\n"
116        "           any_call: any function call or system call\n"
117        "           any_ret: any function return or system call return\n"
118        "           ind_call: any indirect branch\n"
119        "           ind_jmp: any indirect jump\n"
120        "           cond: conditional branches\n"
121        "           call: direct calls, including far (to/from kernel) calls\n"
122        "           u: only when the branch target is at the user level\n"
123        "           k: only when the branch target is in the kernel\n"
124        "         requires at least one of any, any_call, any_ret, ind_call, ind_jmp, cond, call\n"
125        "   -s / --call-stack <fp|dwarf[,size]>\n"
126        "         Setup and enable call stack (stack chain/backtrace) recording, Default is 'fp'.\n"
127        "           the value can be:\n"
128        "             fp: frame pointer\n"
129        "             dwarf: DWARF's CFI - Call Frame Information\n"
130        "               'dwarf,size' set sample stack size, size should be in 8~65528 and 8 byte aligned. \n"
131        "           as the method to collect the information used to show the call stacks.\n"
132        "   --kernel-callchain\n"
133        "         collect kernel callchain, must used with -s fp/dwarf simultaneously.\n"
134        "   --callchain-useronly\n"
135        "         collect only user callchain.\n"
136        "   --delay-unwind\n"
137        "         If '-s dwarf' used, stack will be unwind while recording, use this option to switch\n"
138        "         to unwind after recording.\n"
139        "   --disable-unwind\n"
140        "         If '-s dwarf' is used, stack will be unwind while recording by default\n"
141        "         use this option to disable unwinding.\n"
142        "   --disable-callstack-expand\n"
143        "         If '-s dwarf' is used, to break the 64k stack limit, callstack is merged by default\n"
144        "         to build more complete call stack. that may not be correct sometimes.\n"
145        "   --enable-debuginfo-symbolic\n"
146        "         If '-s fp/dwarf' is used, symbols in .gnu_debugdata section of an elf, also called minidebuginfo\n"
147        "         will be parsed, if not use this option, we will not parse minidebuginfo by default.\n"
148        "   --clockid <clock_id>\n"
149        "         Set the clock id to use for the various time fields in the perf_event_type records.\n"
150        "         monotonic and monotonic_raw are supported,\n"
151        "         some events might also allow boottime, realtime and clock_tai.\n"
152        "   --symbol-dir <dir>\n"
153        "         Set directory to look for symbol files, used for unwinding. \n"
154        "   -m <mmap_pages>\n"
155        "         Number of the mmap pages, used to receiving record data from kernel,\n"
156        "         must be a power of two, rang[2,1024], default is 1024.\n"
157        "   --app <package_name>\n"
158        "         Collect profile info for an OHOS app, the app must be debuggable.\n"
159        "         Record will exit if the process is not started within 10 seconds.\n"
160        "   --chkms <millisec>\n"
161        "         Set the interval of querying the <package_name>.\n"
162        "         <millisec> is in range [1-200], default is 10.\n"
163        "   --data-limit <SIZE[K|M|G]>\n"
164        "         Stop recording after SIZE bytes of records. Default is unlimited.\n"
165        "   -o <output_file_name>\n"
166        "         Set output file name, default is /data/local/tmp/perf.data.\n"
167        "   -z\n"
168        "         Compress record data.\n"
169        "   --restart\n"
170        "         Collect performance counter information of application startup.\n"
171        "         Record will exit if the process is not started within 30 seconds.\n"
172        "   --verbose\n"
173        "         Show more detailed reports.\n"
174        "   --control <command>\n"
175        "         Control sampling by <command>, the <command> can be:\n"
176        "           prepare: set arguments and prepare sampling\n"
177        "           start: start sampling\n"
178        "           pause: pause sampling\n"
179        "           resume: resume sampling\n"
180        "           stop: stop sampling\n"
181        "   --dedup_stack\n"
182        "         Remove duplicated stacks in perf record, conflicts with -a, only restrain using with -p\n"
183        "   --cmdline-size <size>\n"
184        "         set value to /sys/kernel/tracing/saved_cmdlines_size\n"
185        "         the value should be between 512 and 4096\n"
186        "   --report\n"
187        "         Report with callstack after record. Conflicts with the -a option.\n"
188        "   --dumpoptions\n"
189        "         Dump command options.\n"
190        )
191    // clang-format on
192    {
193    }
194
195    ~SubCommandRecord();
196    bool OnSubCommand(std::vector<std::string> &args) override;
197    bool ParseOption(std::vector<std::string> &args) override;
198    void DumpOptions(void) const override;
199
200    static bool RegisterSubCommandRecord(void);
201    std::map<const std::string, unsigned long long> speOptMap_ = {
202        {"branch_filter", 0},   {"load_filter", 0},
203        {"store_filter", 0},    {"ts_enable", 0},
204        {"pa_enable", 0},       {"jitter", 0},
205        {"min_latency", 0},      {"event_filter", 0},
206    };
207
208private:
209    PerfEvents perfEvents_;
210
211    bool targetSystemWide_ = false;
212    bool compressData_ = false;
213    bool noInherit_ = false;
214    bool excludeHiperf_ = false;
215    bool offCPU_ = false;
216    bool delayUnwind_ = false;
217    bool disableUnwind_ = false;
218    bool disableCallstackExpend_ = false;
219    bool enableDebugInfoSymbolic_ = false;
220    bool verboseReport_ = false;
221    bool kernelCallChain_ = true;
222    bool callChainUserOnly_ = false;
223    bool report_ = false;
224    float timeStopSec_ = PerfEvents::DEFAULT_TIMEOUT;
225    int frequency_ = 0;
226    int period_ = 0;
227    int cpuPercent_ = DEFAULT_CPU_PERCENT;
228    int mmapPages_ = MAX_PERF_MMAP_PAGE;
229    int cmdlinesSize_ = DEFAULT_SAVED_CMDLINES_SIZE;
230    int oldCmdlinesSize_ = 0;
231    std::vector<std::string> symbolDir_ = {};
232    std::string outputFilename_ = "/data/local/tmp/perf.data";
233    std::string appPackage_ = {};
234    int checkAppMs_ = DEFAULT_CHECK_APP_MS;
235    std::string clockId_ = {};
236    std::string strLimit_ = {};
237    std::vector<pid_t> selectCpus_ = {};
238    std::vector<pid_t> selectPids_ = {};
239    std::vector<pid_t> selectTids_ = {};
240    std::vector<pid_t> excludeTids_ = {};
241    bool restart_ = false;
242    std::vector<std::string> selectEvents_ = {};
243    std::vector<std::string> speOptions_ = {};
244    std::vector<std::vector<std::string>> selectGroups_ = {};
245    std::vector<std::string> callStackType_ = {};
246    std::vector<std::string> vecBranchFilters_ = {};
247    std::vector<std::string> trackedCommand_ = {};
248    std::vector<std::string> excludeThreadNames_ = {};
249
250    bool GetOptions(std::vector<std::string> &args);
251    bool CheckArgsRange();
252    bool CheckOptions();
253    bool GetSpeOptions();
254    bool CheckDataLimitOption();
255    bool CheckSelectCpuPidOption();
256    bool GetOptionFrequencyAndPeriod(std::vector<std::string> &args);
257
258    bool isCallStackDwarf_ = false;
259    bool isCallStackFp_ = false;
260    uint32_t callStackDwarfSize_ = MAX_SAMPLE_STACK_SIZE;
261    uint64_t branchSampleType_ = 0;
262    uint64_t dataSizeLimit_ = 0;
263    bool isDataSizeLimitStop_ = false;
264
265    std::unique_ptr<PerfFileWriter> fileWriter_ = nullptr;
266
267    // for client
268    int clientPipeInput_ = -1;
269    int clientPipeOutput_ = -1;
270    int nullFd_ = -1;
271    std::thread clientCommandHanle_;
272    bool clientExit_ = false;
273    void ClientCommandHandle();
274    bool ClientCommandResponse(bool OK);
275    bool IsSamplingRunning();
276    // for cmdline client
277    std::string controlCmd_ = {};
278    bool isFifoServer_ = false;
279    bool isFifoClient_ = false;
280    bool dedupStack_ = false;
281    std::map<pid_t, std::vector<pid_t>> mapPids_;
282    bool ProcessControl();
283    bool CreateFifoServer();
284    bool SendFifoAndWaitReply(const std::string &cmd, const std::chrono::milliseconds &timeOut);
285    bool WaitFifoReply(int fd, const std::chrono::milliseconds &timeOut);
286    void CloseClientThread();
287
288    bool PreparePerfEvent();
289    bool PrepareSysKernel();
290    bool PrepareVirtualRuntime();
291
292    size_t recordSamples_ = 0;
293    size_t recordNoSamples_ = 0;
294
295    bool isNeedSetPerfHarden_ = false;
296    bool isSpe_ = false;
297
298    // callback to process record
299    bool ProcessRecord(std::unique_ptr<PerfEventRecord>);
300    bool SaveRecord(std::unique_ptr<PerfEventRecord>, bool ptrReleaseFlag = false);
301
302    // file format like as 0,1-3,4-6,7,8
303    uint32_t GetCountFromFile(const std::string &fileName);
304    std::string GetCpuDescFromFile();
305    bool AddCpuFeature();
306    void AddMemTotalFeature();
307    void AddEventDescFeature();
308    void AddRecordTimeFeature();
309    void AddWorkloadCmdFeature();
310    void AddCommandLineFeature();
311    void AddCpuOffFeature();
312    void AddDevhostFeature();
313    bool AddFeatureRecordFile();
314
315    bool CreateInitRecordFile(bool compressData = false);
316    bool FinishWriteRecordFile();
317    bool PostProcessRecordFile();
318    bool RecordCompleted();
319#ifdef HIPERF_DEBUG_TIME
320    void ReportTime();
321#endif
322
323    bool CollectionSymbol(std::unique_ptr<PerfEventRecord> record);
324    void CollectSymbol(PerfRecordSample *sample);
325    bool SetPerfLimit(const std::string& file, int value, std::function<bool (int, int)> const& cmd,
326        const std::string& param);
327    bool SetPerfCpuMaxPercent();
328    bool SetPerfMaxSampleRate();
329    bool SetPerfEventMlock();
330    bool SetPerfHarden();
331
332    bool TraceOffCpu();
333    bool ParseCallStackOption(const std::vector<std::string> &callStackType);
334    bool ParseDataLimitOption(const std::string &str);
335    bool ParseBranchSampleType(const std::vector<std::string> &vecBranchSampleTypes);
336    bool ParseControlCmd(const std::string cmd);
337    bool CheckTargetProcessOptions();
338    bool CheckTargetPids();
339    bool CheckReportOption();
340    void WriteCommEventBeforeSampling();
341    void RemoveVdsoTmpFile();
342
343    VirtualRuntime virtualRuntime_;
344#if USE_COLLECT_SYMBOLIC
345    std::unordered_map<pid_t, std::unordered_set<uint64_t>> kernelThreadSymbolsHits_;
346    kSymbolsHits kernelSymbolsHits_;
347    uSymbolsHits userSymbolsHits_;
348    void SymbolicHits();
349#endif
350
351#ifdef HIPERF_DEBUG_TIME
352    std::chrono::microseconds prcessRecordTimes_ = std::chrono::microseconds::zero();
353    std::chrono::microseconds saveRecordTimes_ = std::chrono::microseconds::zero();
354    std::chrono::microseconds saveFeatureTimes_ = std::chrono::microseconds::zero();
355#endif
356    std::chrono::time_point<std::chrono::steady_clock> startSaveFileTimes_;
357
358    void SetHM();
359    void SetSavedCmdlinesSize();
360    void RecoverSavedCmdlinesSize();
361    bool OnlineReportData();
362};
363} // namespace HiPerf
364} // namespace Developtools
365} // namespace OHOS
366#endif // SUBCOMMAND_RECORD_H
367