1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * builtin-stat.c
4 *
5 * Builtin stat command: Give a precise performance counters summary
6 * overview about any workload, CPU or specific PID.
7 *
8 * Sample output:
9
10   $ perf stat ./hackbench 10
11
12  Time: 0.118
13
14  Performance counter stats for './hackbench 10':
15
16       1708.761321 task-clock                #   11.037 CPUs utilized
17            41,190 context-switches          #    0.024 M/sec
18             6,735 CPU-migrations            #    0.004 M/sec
19            17,318 page-faults               #    0.010 M/sec
20     5,205,202,243 cycles                    #    3.046 GHz
21     3,856,436,920 stalled-cycles-frontend   #   74.09% frontend cycles idle
22     1,600,790,871 stalled-cycles-backend    #   30.75% backend  cycles idle
23     2,603,501,247 instructions              #    0.50  insns per cycle
24                                             #    1.48  stalled cycles per insn
25       484,357,498 branches                  #  283.455 M/sec
26         6,388,934 branch-misses             #    1.32% of all branches
27
28        0.154822978  seconds time elapsed
29
30 *
31 * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
32 *
33 * Improvements and fixes by:
34 *
35 *   Arjan van de Ven <arjan@linux.intel.com>
36 *   Yanmin Zhang <yanmin.zhang@intel.com>
37 *   Wu Fengguang <fengguang.wu@intel.com>
38 *   Mike Galbraith <efault@gmx.de>
39 *   Paul Mackerras <paulus@samba.org>
40 *   Jaswinder Singh Rajput <jaswinder@kernel.org>
41 */
42
43#include "builtin.h"
44#include "perf.h"
45#include "util/cgroup.h"
46#include <subcmd/parse-options.h>
47#include "util/parse-events.h"
48#include "util/pmu.h"
49#include "util/event.h"
50#include "util/evlist.h"
51#include "util/evsel.h"
52#include "util/debug.h"
53#include "util/color.h"
54#include "util/stat.h"
55#include "util/header.h"
56#include "util/cpumap.h"
57#include "util/thread_map.h"
58#include "util/counts.h"
59#include "util/topdown.h"
60#include "util/session.h"
61#include "util/tool.h"
62#include "util/string2.h"
63#include "util/metricgroup.h"
64#include "util/synthetic-events.h"
65#include "util/target.h"
66#include "util/time-utils.h"
67#include "util/top.h"
68#include "util/affinity.h"
69#include "util/pfm.h"
70#include "asm/bug.h"
71
72#include <linux/time64.h>
73#include <linux/zalloc.h>
74#include <api/fs/fs.h>
75#include <errno.h>
76#include <signal.h>
77#include <stdlib.h>
78#include <sys/prctl.h>
79#include <inttypes.h>
80#include <locale.h>
81#include <math.h>
82#include <sys/types.h>
83#include <sys/stat.h>
84#include <sys/wait.h>
85#include <unistd.h>
86#include <sys/time.h>
87#include <sys/resource.h>
88#include <linux/err.h>
89
90#include <linux/ctype.h>
91#include <perf/evlist.h>
92
93#define DEFAULT_SEPARATOR	" "
94#define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
95
96static void print_counters(struct timespec *ts, int argc, const char **argv);
97
98/* Default events used for perf stat -T */
99static const char *transaction_attrs = {
100	"task-clock,"
101	"{"
102	"instructions,"
103	"cycles,"
104	"cpu/cycles-t/,"
105	"cpu/tx-start/,"
106	"cpu/el-start/,"
107	"cpu/cycles-ct/"
108	"}"
109};
110
111/* More limited version when the CPU does not have all events. */
112static const char * transaction_limited_attrs = {
113	"task-clock,"
114	"{"
115	"instructions,"
116	"cycles,"
117	"cpu/cycles-t/,"
118	"cpu/tx-start/"
119	"}"
120};
121
122static const char * topdown_attrs[] = {
123	"topdown-total-slots",
124	"topdown-slots-retired",
125	"topdown-recovery-bubbles",
126	"topdown-fetch-bubbles",
127	"topdown-slots-issued",
128	NULL,
129};
130
131static const char *topdown_metric_attrs[] = {
132	"slots",
133	"topdown-retiring",
134	"topdown-bad-spec",
135	"topdown-fe-bound",
136	"topdown-be-bound",
137	NULL,
138};
139
140static const char *smi_cost_attrs = {
141	"{"
142	"msr/aperf/,"
143	"msr/smi/,"
144	"cycles"
145	"}"
146};
147
148static struct evlist	*evsel_list;
149
150static struct target target = {
151	.uid	= UINT_MAX,
152};
153
154#define METRIC_ONLY_LEN 20
155
156static volatile pid_t		child_pid			= -1;
157static int			detailed_run			=  0;
158static bool			transaction_run;
159static bool			topdown_run			= false;
160static bool			smi_cost			= false;
161static bool			smi_reset			= false;
162static int			big_num_opt			=  -1;
163static bool			group				= false;
164static const char		*pre_cmd			= NULL;
165static const char		*post_cmd			= NULL;
166static bool			sync_run			= false;
167static bool			forever				= false;
168static bool			force_metric_only		= false;
169static struct timespec		ref_time;
170static bool			append_file;
171static bool			interval_count;
172static const char		*output_name;
173static int			output_fd;
174
175struct perf_stat {
176	bool			 record;
177	struct perf_data	 data;
178	struct perf_session	*session;
179	u64			 bytes_written;
180	struct perf_tool	 tool;
181	bool			 maps_allocated;
182	struct perf_cpu_map	*cpus;
183	struct perf_thread_map *threads;
184	enum aggr_mode		 aggr_mode;
185};
186
187static struct perf_stat		perf_stat;
188#define STAT_RECORD		perf_stat.record
189
190static volatile int done = 0;
191
192static struct perf_stat_config stat_config = {
193	.aggr_mode		= AGGR_GLOBAL,
194	.scale			= true,
195	.unit_width		= 4, /* strlen("unit") */
196	.run_count		= 1,
197	.metric_only_len	= METRIC_ONLY_LEN,
198	.walltime_nsecs_stats	= &walltime_nsecs_stats,
199	.big_num		= true,
200	.ctl_fd			= -1,
201	.ctl_fd_ack		= -1
202};
203
204static bool cpus_map_matched(struct evsel *a, struct evsel *b)
205{
206	if (!a->core.cpus && !b->core.cpus)
207		return true;
208
209	if (!a->core.cpus || !b->core.cpus)
210		return false;
211
212	if (a->core.cpus->nr != b->core.cpus->nr)
213		return false;
214
215	for (int i = 0; i < a->core.cpus->nr; i++) {
216		if (a->core.cpus->map[i] != b->core.cpus->map[i])
217			return false;
218	}
219
220	return true;
221}
222
223static void evlist__check_cpu_maps(struct evlist *evlist)
224{
225	struct evsel *evsel, *pos, *leader;
226	char buf[1024];
227
228	evlist__for_each_entry(evlist, evsel) {
229		leader = evsel->leader;
230
231		/* Check that leader matches cpus with each member. */
232		if (leader == evsel)
233			continue;
234		if (cpus_map_matched(leader, evsel))
235			continue;
236
237		/* If there's mismatch disable the group and warn user. */
238		WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
239		evsel__group_desc(leader, buf, sizeof(buf));
240		pr_warning("  %s\n", buf);
241
242		if (verbose) {
243			cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
244			pr_warning("     %s: %s\n", leader->name, buf);
245			cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
246			pr_warning("     %s: %s\n", evsel->name, buf);
247		}
248
249		for_each_group_evsel(pos, leader) {
250			pos->leader = pos;
251			pos->core.nr_members = 0;
252		}
253		evsel->leader->core.nr_members = 0;
254	}
255}
256
257static inline void diff_timespec(struct timespec *r, struct timespec *a,
258				 struct timespec *b)
259{
260	r->tv_sec = a->tv_sec - b->tv_sec;
261	if (a->tv_nsec < b->tv_nsec) {
262		r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
263		r->tv_sec--;
264	} else {
265		r->tv_nsec = a->tv_nsec - b->tv_nsec ;
266	}
267}
268
269static void perf_stat__reset_stats(void)
270{
271	int i;
272
273	perf_evlist__reset_stats(evsel_list);
274	perf_stat__reset_shadow_stats();
275
276	for (i = 0; i < stat_config.stats_num; i++)
277		perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
278}
279
280static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
281				     union perf_event *event,
282				     struct perf_sample *sample __maybe_unused,
283				     struct machine *machine __maybe_unused)
284{
285	if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
286		pr_err("failed to write perf data, error: %m\n");
287		return -1;
288	}
289
290	perf_stat.bytes_written += event->header.size;
291	return 0;
292}
293
294static int write_stat_round_event(u64 tm, u64 type)
295{
296	return perf_event__synthesize_stat_round(NULL, tm, type,
297						 process_synthesized_event,
298						 NULL);
299}
300
301#define WRITE_STAT_ROUND_EVENT(time, interval) \
302	write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
303
304#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
305
306static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
307				   struct perf_counts_values *count)
308{
309	struct perf_sample_id *sid = SID(counter, cpu, thread);
310
311	return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
312					   process_synthesized_event, NULL);
313}
314
315static int read_single_counter(struct evsel *counter, int cpu,
316			       int thread, struct timespec *rs)
317{
318	if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
319		u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
320		struct perf_counts_values *count =
321			perf_counts(counter->counts, cpu, thread);
322		count->ena = count->run = val;
323		count->val = val;
324		return 0;
325	}
326	return evsel__read_counter(counter, cpu, thread);
327}
328
329/*
330 * Read out the results of a single counter:
331 * do not aggregate counts across CPUs in system-wide mode
332 */
333static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
334{
335	int nthreads = perf_thread_map__nr(evsel_list->core.threads);
336	int thread;
337
338	if (!counter->supported)
339		return -ENOENT;
340
341	if (counter->core.system_wide)
342		nthreads = 1;
343
344	for (thread = 0; thread < nthreads; thread++) {
345		struct perf_counts_values *count;
346
347		count = perf_counts(counter->counts, cpu, thread);
348
349		/*
350		 * The leader's group read loads data into its group members
351		 * (via evsel__read_counter()) and sets their count->loaded.
352		 */
353		if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
354		    read_single_counter(counter, cpu, thread, rs)) {
355			counter->counts->scaled = -1;
356			perf_counts(counter->counts, cpu, thread)->ena = 0;
357			perf_counts(counter->counts, cpu, thread)->run = 0;
358			return -1;
359		}
360
361		perf_counts__set_loaded(counter->counts, cpu, thread, false);
362
363		if (STAT_RECORD) {
364			if (evsel__write_stat_event(counter, cpu, thread, count)) {
365				pr_err("failed to write stat event\n");
366				return -1;
367			}
368		}
369
370		if (verbose > 1) {
371			fprintf(stat_config.output,
372				"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
373					evsel__name(counter),
374					cpu,
375					count->val, count->ena, count->run);
376		}
377	}
378
379	return 0;
380}
381
382static int read_affinity_counters(struct timespec *rs)
383{
384	struct evsel *counter;
385	struct affinity affinity;
386	int i, ncpus, cpu;
387
388	if (affinity__setup(&affinity) < 0)
389		return -1;
390
391	ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
392	if (!target__has_cpu(&target) || target__has_per_thread(&target))
393		ncpus = 1;
394	evlist__for_each_cpu(evsel_list, i, cpu) {
395		if (i >= ncpus)
396			break;
397		affinity__set(&affinity, cpu);
398
399		evlist__for_each_entry(evsel_list, counter) {
400			if (evsel__cpu_iter_skip(counter, cpu))
401				continue;
402			if (!counter->err) {
403				counter->err = read_counter_cpu(counter, rs,
404								counter->cpu_iter - 1);
405			}
406		}
407	}
408	affinity__cleanup(&affinity);
409	return 0;
410}
411
412static void read_counters(struct timespec *rs)
413{
414	struct evsel *counter;
415
416	if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0))
417		return;
418
419	evlist__for_each_entry(evsel_list, counter) {
420		if (counter->err)
421			pr_debug("failed to read counter %s\n", counter->name);
422		if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
423			pr_warning("failed to process counter %s\n", counter->name);
424		counter->err = 0;
425	}
426}
427
428static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
429{
430	int i;
431
432	config->stats = calloc(nthreads, sizeof(struct runtime_stat));
433	if (!config->stats)
434		return -1;
435
436	config->stats_num = nthreads;
437
438	for (i = 0; i < nthreads; i++)
439		runtime_stat__init(&config->stats[i]);
440
441	return 0;
442}
443
444static void runtime_stat_delete(struct perf_stat_config *config)
445{
446	int i;
447
448	if (!config->stats)
449		return;
450
451	for (i = 0; i < config->stats_num; i++)
452		runtime_stat__exit(&config->stats[i]);
453
454	zfree(&config->stats);
455}
456
457static void runtime_stat_reset(struct perf_stat_config *config)
458{
459	int i;
460
461	if (!config->stats)
462		return;
463
464	for (i = 0; i < config->stats_num; i++)
465		perf_stat__reset_shadow_per_stat(&config->stats[i]);
466}
467
468static void process_interval(void)
469{
470	struct timespec ts, rs;
471
472	clock_gettime(CLOCK_MONOTONIC, &ts);
473	diff_timespec(&rs, &ts, &ref_time);
474
475	perf_stat__reset_shadow_per_stat(&rt_stat);
476	runtime_stat_reset(&stat_config);
477	read_counters(&rs);
478
479	if (STAT_RECORD) {
480		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
481			pr_err("failed to write stat round event\n");
482	}
483
484	init_stats(&walltime_nsecs_stats);
485	update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
486	print_counters(&rs, 0, NULL);
487}
488
489static bool handle_interval(unsigned int interval, int *times)
490{
491	if (interval) {
492		process_interval();
493		if (interval_count && !(--(*times)))
494			return true;
495	}
496	return false;
497}
498
499static void enable_counters(void)
500{
501	if (stat_config.initial_delay < 0) {
502		pr_info(EVLIST_DISABLED_MSG);
503		return;
504	}
505
506	if (stat_config.initial_delay > 0) {
507		pr_info(EVLIST_DISABLED_MSG);
508		usleep(stat_config.initial_delay * USEC_PER_MSEC);
509	}
510
511	/*
512	 * We need to enable counters only if:
513	 * - we don't have tracee (attaching to task or cpu)
514	 * - we have initial delay configured
515	 */
516	if (!target__none(&target) || stat_config.initial_delay) {
517		evlist__enable(evsel_list);
518		if (stat_config.initial_delay > 0)
519			pr_info(EVLIST_ENABLED_MSG);
520	}
521}
522
523static void disable_counters(void)
524{
525	/*
526	 * If we don't have tracee (attaching to task or cpu), counters may
527	 * still be running. To get accurate group ratios, we must stop groups
528	 * from counting before reading their constituent counters.
529	 */
530	if (!target__none(&target))
531		evlist__disable(evsel_list);
532}
533
534static volatile int workload_exec_errno;
535
536/*
537 * perf_evlist__prepare_workload will send a SIGUSR1
538 * if the fork fails, since we asked by setting its
539 * want_signal to true.
540 */
541static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
542					void *ucontext __maybe_unused)
543{
544	workload_exec_errno = info->si_value.sival_int;
545}
546
547static bool evsel__should_store_id(struct evsel *counter)
548{
549	return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
550}
551
552static bool is_target_alive(struct target *_target,
553			    struct perf_thread_map *threads)
554{
555	struct stat st;
556	int i;
557
558	if (!target__has_task(_target))
559		return true;
560
561	for (i = 0; i < threads->nr; i++) {
562		char path[PATH_MAX];
563
564		scnprintf(path, PATH_MAX, "%s/%d", procfs__mountpoint(),
565			  threads->map[i].pid);
566
567		if (!stat(path, &st))
568			return true;
569	}
570
571	return false;
572}
573
574static void process_evlist(struct evlist *evlist, unsigned int interval)
575{
576	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
577
578	if (evlist__ctlfd_process(evlist, &cmd) > 0) {
579		switch (cmd) {
580		case EVLIST_CTL_CMD_ENABLE:
581			pr_info(EVLIST_ENABLED_MSG);
582			if (interval)
583				process_interval();
584			break;
585		case EVLIST_CTL_CMD_DISABLE:
586			if (interval)
587				process_interval();
588			pr_info(EVLIST_DISABLED_MSG);
589			break;
590		case EVLIST_CTL_CMD_SNAPSHOT:
591		case EVLIST_CTL_CMD_ACK:
592		case EVLIST_CTL_CMD_UNSUPPORTED:
593		default:
594			break;
595		}
596	}
597}
598
599static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
600			int *time_to_sleep)
601{
602	int tts = *time_to_sleep;
603	struct timespec time_diff;
604
605	diff_timespec(&time_diff, time_stop, time_start);
606
607	tts -= time_diff.tv_sec * MSEC_PER_SEC +
608	       time_diff.tv_nsec / NSEC_PER_MSEC;
609
610	if (tts < 0)
611		tts = 0;
612
613	*time_to_sleep = tts;
614}
615
616static int dispatch_events(bool forks, int timeout, int interval, int *times)
617{
618	int child_exited = 0, status = 0;
619	int time_to_sleep, sleep_time;
620	struct timespec time_start, time_stop;
621
622	if (interval)
623		sleep_time = interval;
624	else if (timeout)
625		sleep_time = timeout;
626	else
627		sleep_time = 1000;
628
629	time_to_sleep = sleep_time;
630
631	while (!done) {
632		if (forks)
633			child_exited = waitpid(child_pid, &status, WNOHANG);
634		else
635			child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
636
637		if (child_exited)
638			break;
639
640		clock_gettime(CLOCK_MONOTONIC, &time_start);
641		if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
642			if (timeout || handle_interval(interval, times))
643				break;
644			time_to_sleep = sleep_time;
645		} else { /* fd revent */
646			process_evlist(evsel_list, interval);
647			clock_gettime(CLOCK_MONOTONIC, &time_stop);
648			compute_tts(&time_start, &time_stop, &time_to_sleep);
649		}
650	}
651
652	return status;
653}
654
655enum counter_recovery {
656	COUNTER_SKIP,
657	COUNTER_RETRY,
658	COUNTER_FATAL,
659};
660
661static enum counter_recovery stat_handle_error(struct evsel *counter)
662{
663	char msg[BUFSIZ];
664	/*
665	 * PPC returns ENXIO for HW counters until 2.6.37
666	 * (behavior changed with commit b0a873e).
667	 */
668	if (errno == EINVAL || errno == ENOSYS ||
669	    errno == ENOENT || errno == EOPNOTSUPP ||
670	    errno == ENXIO) {
671		if (verbose > 0)
672			ui__warning("%s event is not supported by the kernel.\n",
673				    evsel__name(counter));
674		counter->supported = false;
675		/*
676		 * errored is a sticky flag that means one of the counter's
677		 * cpu event had a problem and needs to be reexamined.
678		 */
679		counter->errored = true;
680
681		if ((counter->leader != counter) ||
682		    !(counter->leader->core.nr_members > 1))
683			return COUNTER_SKIP;
684	} else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
685		if (verbose > 0)
686			ui__warning("%s\n", msg);
687		return COUNTER_RETRY;
688	} else if (target__has_per_thread(&target) &&
689		   evsel_list->core.threads &&
690		   evsel_list->core.threads->err_thread != -1) {
691		/*
692		 * For global --per-thread case, skip current
693		 * error thread.
694		 */
695		if (!thread_map__remove(evsel_list->core.threads,
696					evsel_list->core.threads->err_thread)) {
697			evsel_list->core.threads->err_thread = -1;
698			return COUNTER_RETRY;
699		}
700	}
701
702	evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
703	ui__error("%s\n", msg);
704
705	if (child_pid != -1)
706		kill(child_pid, SIGTERM);
707	return COUNTER_FATAL;
708}
709
710static int __run_perf_stat(int argc, const char **argv, int run_idx)
711{
712	int interval = stat_config.interval;
713	int times = stat_config.times;
714	int timeout = stat_config.timeout;
715	char msg[BUFSIZ];
716	unsigned long long t0, t1;
717	struct evsel *counter;
718	size_t l;
719	int status = 0;
720	const bool forks = (argc > 0);
721	bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
722	struct affinity affinity;
723	int i, cpu;
724	bool second_pass = false;
725
726	if (forks) {
727		if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
728						  workload_exec_failed_signal) < 0) {
729			perror("failed to prepare workload");
730			return -1;
731		}
732		child_pid = evsel_list->workload.pid;
733	}
734
735	if (group)
736		perf_evlist__set_leader(evsel_list);
737
738	if (affinity__setup(&affinity) < 0)
739		return -1;
740
741	evlist__for_each_cpu (evsel_list, i, cpu) {
742		affinity__set(&affinity, cpu);
743
744		evlist__for_each_entry(evsel_list, counter) {
745			if (evsel__cpu_iter_skip(counter, cpu))
746				continue;
747			if (counter->reset_group || counter->errored)
748				continue;
749try_again:
750			if (create_perf_stat_counter(counter, &stat_config, &target,
751						     counter->cpu_iter - 1) < 0) {
752
753				/*
754				 * Weak group failed. We cannot just undo this here
755				 * because earlier CPUs might be in group mode, and the kernel
756				 * doesn't support mixing group and non group reads. Defer
757				 * it to later.
758				 * Don't close here because we're in the wrong affinity.
759				 */
760				if ((errno == EINVAL || errno == EBADF) &&
761				    counter->leader != counter &&
762				    counter->weak_group) {
763					perf_evlist__reset_weak_group(evsel_list, counter, false);
764					assert(counter->reset_group);
765					second_pass = true;
766					continue;
767				}
768
769				switch (stat_handle_error(counter)) {
770				case COUNTER_FATAL:
771					return -1;
772				case COUNTER_RETRY:
773					goto try_again;
774				case COUNTER_SKIP:
775					continue;
776				default:
777					break;
778				}
779
780			}
781			counter->supported = true;
782		}
783	}
784
785	if (second_pass) {
786		/*
787		 * Now redo all the weak group after closing them,
788		 * and also close errored counters.
789		 */
790
791		evlist__for_each_cpu(evsel_list, i, cpu) {
792			affinity__set(&affinity, cpu);
793			/* First close errored or weak retry */
794			evlist__for_each_entry(evsel_list, counter) {
795				if (!counter->reset_group && !counter->errored)
796					continue;
797				if (evsel__cpu_iter_skip_no_inc(counter, cpu))
798					continue;
799				perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
800			}
801			/* Now reopen weak */
802			evlist__for_each_entry(evsel_list, counter) {
803				if (!counter->reset_group && !counter->errored)
804					continue;
805				if (evsel__cpu_iter_skip(counter, cpu))
806					continue;
807				if (!counter->reset_group)
808					continue;
809try_again_reset:
810				pr_debug2("reopening weak %s\n", evsel__name(counter));
811				if (create_perf_stat_counter(counter, &stat_config, &target,
812							     counter->cpu_iter - 1) < 0) {
813
814					switch (stat_handle_error(counter)) {
815					case COUNTER_FATAL:
816						return -1;
817					case COUNTER_RETRY:
818						goto try_again_reset;
819					case COUNTER_SKIP:
820						continue;
821					default:
822						break;
823					}
824				}
825				counter->supported = true;
826			}
827		}
828	}
829	affinity__cleanup(&affinity);
830
831	evlist__for_each_entry(evsel_list, counter) {
832		if (!counter->supported) {
833			perf_evsel__free_fd(&counter->core);
834			continue;
835		}
836
837		l = strlen(counter->unit);
838		if (l > stat_config.unit_width)
839			stat_config.unit_width = l;
840
841		if (evsel__should_store_id(counter) &&
842		    evsel__store_ids(counter, evsel_list))
843			return -1;
844	}
845
846	if (perf_evlist__apply_filters(evsel_list, &counter)) {
847		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
848			counter->filter, evsel__name(counter), errno,
849			str_error_r(errno, msg, sizeof(msg)));
850		return -1;
851	}
852
853	if (STAT_RECORD) {
854		int err, fd = perf_data__fd(&perf_stat.data);
855
856		if (is_pipe) {
857			err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
858		} else {
859			err = perf_session__write_header(perf_stat.session, evsel_list,
860							 fd, false);
861		}
862
863		if (err < 0)
864			return err;
865
866		err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list,
867							 process_synthesized_event, is_pipe);
868		if (err < 0)
869			return err;
870	}
871
872	/*
873	 * Enable counters and exec the command:
874	 */
875	t0 = rdclock();
876	clock_gettime(CLOCK_MONOTONIC, &ref_time);
877
878	if (forks) {
879		perf_evlist__start_workload(evsel_list);
880		enable_counters();
881
882		if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
883			status = dispatch_events(forks, timeout, interval, &times);
884		if (child_pid != -1) {
885			if (timeout)
886				kill(child_pid, SIGTERM);
887			wait4(child_pid, &status, 0, &stat_config.ru_data);
888		}
889
890		if (workload_exec_errno) {
891			const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
892			pr_err("Workload failed: %s\n", emsg);
893			return -1;
894		}
895
896		if (WIFSIGNALED(status))
897			psignal(WTERMSIG(status), argv[0]);
898	} else {
899		enable_counters();
900		status = dispatch_events(forks, timeout, interval, &times);
901	}
902
903	disable_counters();
904
905	t1 = rdclock();
906
907	if (stat_config.walltime_run_table)
908		stat_config.walltime_run[run_idx] = t1 - t0;
909
910	if (interval && stat_config.summary) {
911		stat_config.interval = 0;
912		stat_config.stop_read_counter = true;
913		init_stats(&walltime_nsecs_stats);
914		update_stats(&walltime_nsecs_stats, t1 - t0);
915
916		if (stat_config.aggr_mode == AGGR_GLOBAL)
917			perf_evlist__save_aggr_prev_raw_counts(evsel_list);
918
919		perf_evlist__copy_prev_raw_counts(evsel_list);
920		perf_evlist__reset_prev_raw_counts(evsel_list);
921		runtime_stat_reset(&stat_config);
922		perf_stat__reset_shadow_per_stat(&rt_stat);
923	} else
924		update_stats(&walltime_nsecs_stats, t1 - t0);
925
926	/*
927	 * Closing a group leader splits the group, and as we only disable
928	 * group leaders, results in remaining events becoming enabled. To
929	 * avoid arbitrary skew, we must read all counters before closing any
930	 * group leaders.
931	 */
932	read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
933
934	/*
935	 * We need to keep evsel_list alive, because it's processed
936	 * later the evsel_list will be closed after.
937	 */
938	if (!STAT_RECORD)
939		evlist__close(evsel_list);
940
941	return WEXITSTATUS(status);
942}
943
944static int run_perf_stat(int argc, const char **argv, int run_idx)
945{
946	int ret;
947
948	if (pre_cmd) {
949		ret = system(pre_cmd);
950		if (ret)
951			return ret;
952	}
953
954	if (sync_run)
955		sync();
956
957	ret = __run_perf_stat(argc, argv, run_idx);
958	if (ret)
959		return ret;
960
961	if (post_cmd) {
962		ret = system(post_cmd);
963		if (ret)
964			return ret;
965	}
966
967	return ret;
968}
969
970static void print_counters(struct timespec *ts, int argc, const char **argv)
971{
972	/* Do not print anything if we record to the pipe. */
973	if (STAT_RECORD && perf_stat.data.is_pipe)
974		return;
975
976	perf_evlist__print_counters(evsel_list, &stat_config, &target,
977				    ts, argc, argv);
978}
979
980static volatile int signr = -1;
981
982static void skip_signal(int signo)
983{
984	if ((child_pid == -1) || stat_config.interval)
985		done = 1;
986
987	signr = signo;
988	/*
989	 * render child_pid harmless
990	 * won't send SIGTERM to a random
991	 * process in case of race condition
992	 * and fast PID recycling
993	 */
994	child_pid = -1;
995}
996
997static void sig_atexit(void)
998{
999	sigset_t set, oset;
1000
1001	/*
1002	 * avoid race condition with SIGCHLD handler
1003	 * in skip_signal() which is modifying child_pid
1004	 * goal is to avoid send SIGTERM to a random
1005	 * process
1006	 */
1007	sigemptyset(&set);
1008	sigaddset(&set, SIGCHLD);
1009	sigprocmask(SIG_BLOCK, &set, &oset);
1010
1011	if (child_pid != -1)
1012		kill(child_pid, SIGTERM);
1013
1014	sigprocmask(SIG_SETMASK, &oset, NULL);
1015
1016	if (signr == -1)
1017		return;
1018
1019	signal(signr, SIG_DFL);
1020	kill(getpid(), signr);
1021}
1022
1023void perf_stat__set_big_num(int set)
1024{
1025	stat_config.big_num = (set != 0);
1026}
1027
1028static int stat__set_big_num(const struct option *opt __maybe_unused,
1029			     const char *s __maybe_unused, int unset)
1030{
1031	big_num_opt = unset ? 0 : 1;
1032	perf_stat__set_big_num(!unset);
1033	return 0;
1034}
1035
1036static int enable_metric_only(const struct option *opt __maybe_unused,
1037			      const char *s __maybe_unused, int unset)
1038{
1039	force_metric_only = true;
1040	stat_config.metric_only = !unset;
1041	return 0;
1042}
1043
1044static int parse_metric_groups(const struct option *opt,
1045			       const char *str,
1046			       int unset __maybe_unused)
1047{
1048	return metricgroup__parse_groups(opt, str,
1049					 stat_config.metric_no_group,
1050					 stat_config.metric_no_merge,
1051					 &stat_config.metric_events);
1052}
1053
1054static int parse_control_option(const struct option *opt,
1055				const char *str,
1056				int unset __maybe_unused)
1057{
1058	struct perf_stat_config *config = opt->value;
1059
1060	return evlist__parse_control(str, &config->ctl_fd, &config->ctl_fd_ack, &config->ctl_fd_close);
1061}
1062
1063static int parse_stat_cgroups(const struct option *opt,
1064			      const char *str, int unset)
1065{
1066	if (stat_config.cgroup_list) {
1067		pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
1068		return -1;
1069	}
1070
1071	return parse_cgroups(opt, str, unset);
1072}
1073
1074static struct option stat_options[] = {
1075	OPT_BOOLEAN('T', "transaction", &transaction_run,
1076		    "hardware transaction statistics"),
1077	OPT_CALLBACK('e', "event", &evsel_list, "event",
1078		     "event selector. use 'perf list' to list available events",
1079		     parse_events_option),
1080	OPT_CALLBACK(0, "filter", &evsel_list, "filter",
1081		     "event filter", parse_filter),
1082	OPT_BOOLEAN('i', "no-inherit", &stat_config.no_inherit,
1083		    "child tasks do not inherit counters"),
1084	OPT_STRING('p', "pid", &target.pid, "pid",
1085		   "stat events on existing process id"),
1086	OPT_STRING('t', "tid", &target.tid, "tid",
1087		   "stat events on existing thread id"),
1088	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
1089		    "system-wide collection from all CPUs"),
1090	OPT_BOOLEAN('g', "group", &group,
1091		    "put the counters into a counter group"),
1092	OPT_BOOLEAN(0, "scale", &stat_config.scale,
1093		    "Use --no-scale to disable counter scaling for multiplexing"),
1094	OPT_INCR('v', "verbose", &verbose,
1095		    "be more verbose (show counter open errors, etc)"),
1096	OPT_INTEGER('r', "repeat", &stat_config.run_count,
1097		    "repeat command and print average + stddev (max: 100, forever: 0)"),
1098	OPT_BOOLEAN(0, "table", &stat_config.walltime_run_table,
1099		    "display details about each run (only with -r option)"),
1100	OPT_BOOLEAN('n', "null", &stat_config.null_run,
1101		    "null run - dont start any counters"),
1102	OPT_INCR('d', "detailed", &detailed_run,
1103		    "detailed run - start a lot of events"),
1104	OPT_BOOLEAN('S', "sync", &sync_run,
1105		    "call sync() before starting a run"),
1106	OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
1107			   "print large numbers with thousands\' separators",
1108			   stat__set_big_num),
1109	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
1110		    "list of cpus to monitor in system-wide"),
1111	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
1112		    "disable CPU count aggregation", AGGR_NONE),
1113	OPT_BOOLEAN(0, "no-merge", &stat_config.no_merge, "Do not merge identical named events"),
1114	OPT_STRING('x', "field-separator", &stat_config.csv_sep, "separator",
1115		   "print counts with custom separator"),
1116	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
1117		     "monitor event in cgroup name only", parse_stat_cgroups),
1118	OPT_STRING(0, "for-each-cgroup", &stat_config.cgroup_list, "name",
1119		    "expand events for each cgroup"),
1120	OPT_STRING('o', "output", &output_name, "file", "output file name"),
1121	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
1122	OPT_INTEGER(0, "log-fd", &output_fd,
1123		    "log output to fd, instead of stderr"),
1124	OPT_STRING(0, "pre", &pre_cmd, "command",
1125			"command to run prior to the measured command"),
1126	OPT_STRING(0, "post", &post_cmd, "command",
1127			"command to run after to the measured command"),
1128	OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1129		    "print counts at regular interval in ms "
1130		    "(overhead is possible for values <= 100ms)"),
1131	OPT_INTEGER(0, "interval-count", &stat_config.times,
1132		    "print counts for fixed number of times"),
1133	OPT_BOOLEAN(0, "interval-clear", &stat_config.interval_clear,
1134		    "clear screen in between new interval"),
1135	OPT_UINTEGER(0, "timeout", &stat_config.timeout,
1136		    "stop workload and print counts after a timeout period in ms (>= 10ms)"),
1137	OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1138		     "aggregate counts per processor socket", AGGR_SOCKET),
1139	OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
1140		     "aggregate counts per processor die", AGGR_DIE),
1141	OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
1142		     "aggregate counts per physical processor core", AGGR_CORE),
1143	OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
1144		     "aggregate counts per thread", AGGR_THREAD),
1145	OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
1146		     "aggregate counts per numa node", AGGR_NODE),
1147	OPT_INTEGER('D', "delay", &stat_config.initial_delay,
1148		    "ms to wait before starting measurement after program start (-1: start with events disabled)"),
1149	OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
1150			"Only print computed metrics. No raw values", enable_metric_only),
1151	OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
1152		       "don't group metric events, impacts multiplexing"),
1153	OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
1154		       "don't try to share events between metrics in a group"),
1155	OPT_BOOLEAN(0, "topdown", &topdown_run,
1156			"measure topdown level 1 statistics"),
1157	OPT_BOOLEAN(0, "smi-cost", &smi_cost,
1158			"measure SMI cost"),
1159	OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
1160		     "monitor specified metrics or metric groups (separated by ,)",
1161		     parse_metric_groups),
1162	OPT_BOOLEAN_FLAG(0, "all-kernel", &stat_config.all_kernel,
1163			 "Configure all used events to run in kernel space.",
1164			 PARSE_OPT_EXCLUSIVE),
1165	OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
1166			 "Configure all used events to run in user space.",
1167			 PARSE_OPT_EXCLUSIVE),
1168	OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
1169		    "Use with 'percore' event qualifier to show the event "
1170		    "counts of one hardware thread by sum up total hardware "
1171		    "threads of same physical core"),
1172	OPT_BOOLEAN(0, "summary", &stat_config.summary,
1173		       "print summary for interval mode"),
1174#ifdef HAVE_LIBPFM
1175	OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
1176		"libpfm4 event selector. use 'perf list' to list available events",
1177		parse_libpfm_events_option),
1178#endif
1179	OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
1180		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
1181		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
1182		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
1183		      parse_control_option),
1184	OPT_END()
1185};
1186
1187static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
1188				 struct perf_cpu_map *map, int cpu)
1189{
1190	return cpu_map__get_socket(map, cpu, NULL);
1191}
1192
1193static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
1194			      struct perf_cpu_map *map, int cpu)
1195{
1196	return cpu_map__get_die(map, cpu, NULL);
1197}
1198
1199static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
1200			       struct perf_cpu_map *map, int cpu)
1201{
1202	return cpu_map__get_core(map, cpu, NULL);
1203}
1204
1205static int perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
1206			       struct perf_cpu_map *map, int cpu)
1207{
1208	return cpu_map__get_node(map, cpu, NULL);
1209}
1210
1211static int perf_stat__get_aggr(struct perf_stat_config *config,
1212			       aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
1213{
1214	int cpu;
1215
1216	if (idx >= map->nr)
1217		return -1;
1218
1219	cpu = map->map[idx];
1220
1221	if (config->cpus_aggr_map->map[cpu] == -1)
1222		config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
1223
1224	return config->cpus_aggr_map->map[cpu];
1225}
1226
1227static int perf_stat__get_socket_cached(struct perf_stat_config *config,
1228					struct perf_cpu_map *map, int idx)
1229{
1230	return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
1231}
1232
1233static int perf_stat__get_die_cached(struct perf_stat_config *config,
1234					struct perf_cpu_map *map, int idx)
1235{
1236	return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
1237}
1238
1239static int perf_stat__get_core_cached(struct perf_stat_config *config,
1240				      struct perf_cpu_map *map, int idx)
1241{
1242	return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
1243}
1244
1245static int perf_stat__get_node_cached(struct perf_stat_config *config,
1246				      struct perf_cpu_map *map, int idx)
1247{
1248	return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
1249}
1250
1251static bool term_percore_set(void)
1252{
1253	struct evsel *counter;
1254
1255	evlist__for_each_entry(evsel_list, counter) {
1256		if (counter->percore)
1257			return true;
1258	}
1259
1260	return false;
1261}
1262
1263static int perf_stat_init_aggr_mode(void)
1264{
1265	int nr;
1266
1267	switch (stat_config.aggr_mode) {
1268	case AGGR_SOCKET:
1269		if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1270			perror("cannot build socket map");
1271			return -1;
1272		}
1273		stat_config.aggr_get_id = perf_stat__get_socket_cached;
1274		break;
1275	case AGGR_DIE:
1276		if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1277			perror("cannot build die map");
1278			return -1;
1279		}
1280		stat_config.aggr_get_id = perf_stat__get_die_cached;
1281		break;
1282	case AGGR_CORE:
1283		if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1284			perror("cannot build core map");
1285			return -1;
1286		}
1287		stat_config.aggr_get_id = perf_stat__get_core_cached;
1288		break;
1289	case AGGR_NODE:
1290		if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
1291			perror("cannot build core map");
1292			return -1;
1293		}
1294		stat_config.aggr_get_id = perf_stat__get_node_cached;
1295		break;
1296	case AGGR_NONE:
1297		if (term_percore_set()) {
1298			if (cpu_map__build_core_map(evsel_list->core.cpus,
1299						    &stat_config.aggr_map)) {
1300				perror("cannot build core map");
1301				return -1;
1302			}
1303			stat_config.aggr_get_id = perf_stat__get_core_cached;
1304		}
1305		break;
1306	case AGGR_GLOBAL:
1307	case AGGR_THREAD:
1308	case AGGR_UNSET:
1309	default:
1310		break;
1311	}
1312
1313	/*
1314	 * The evsel_list->cpus is the base we operate on,
1315	 * taking the highest cpu number to be the size of
1316	 * the aggregation translate cpumap.
1317	 */
1318	nr = perf_cpu_map__max(evsel_list->core.cpus);
1319	stat_config.cpus_aggr_map = perf_cpu_map__empty_new(nr + 1);
1320	return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
1321}
1322
1323static void perf_stat__exit_aggr_mode(void)
1324{
1325	perf_cpu_map__put(stat_config.aggr_map);
1326	perf_cpu_map__put(stat_config.cpus_aggr_map);
1327	stat_config.aggr_map = NULL;
1328	stat_config.cpus_aggr_map = NULL;
1329}
1330
1331static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
1332{
1333	int cpu;
1334
1335	if (idx > map->nr)
1336		return -1;
1337
1338	cpu = map->map[idx];
1339
1340	if (cpu >= env->nr_cpus_avail)
1341		return -1;
1342
1343	return cpu;
1344}
1345
1346static int perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
1347{
1348	struct perf_env *env = data;
1349	int cpu = perf_env__get_cpu(env, map, idx);
1350
1351	return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
1352}
1353
1354static int perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
1355{
1356	struct perf_env *env = data;
1357	int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
1358
1359	if (cpu != -1) {
1360		/*
1361		 * Encode socket in bit range 15:8
1362		 * die_id is relative to socket,
1363		 * we need a global id. So we combine
1364		 * socket + die id
1365		 */
1366		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
1367			return -1;
1368
1369		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1370			return -1;
1371
1372		die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
1373	}
1374
1375	return die_id;
1376}
1377
1378static int perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
1379{
1380	struct perf_env *env = data;
1381	int core = -1, cpu = perf_env__get_cpu(env, map, idx);
1382
1383	if (cpu != -1) {
1384		/*
1385		 * Encode socket in bit range 31:24
1386		 * encode die id in bit range 23:16
1387		 * core_id is relative to socket and die,
1388		 * we need a global id. So we combine
1389		 * socket + die id + core id
1390		 */
1391		if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
1392			return -1;
1393
1394		if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1395			return -1;
1396
1397		if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
1398			return -1;
1399
1400		core = (env->cpu[cpu].socket_id << 24) |
1401		       (env->cpu[cpu].die_id << 16) |
1402		       (env->cpu[cpu].core_id & 0xffff);
1403	}
1404
1405	return core;
1406}
1407
1408static int perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
1409{
1410	int cpu = perf_env__get_cpu(data, map, idx);
1411
1412	return perf_env__numa_node(data, cpu);
1413}
1414
1415static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
1416				      struct perf_cpu_map **sockp)
1417{
1418	return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
1419}
1420
1421static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
1422				   struct perf_cpu_map **diep)
1423{
1424	return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
1425}
1426
1427static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
1428				    struct perf_cpu_map **corep)
1429{
1430	return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
1431}
1432
1433static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
1434				    struct perf_cpu_map **nodep)
1435{
1436	return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
1437}
1438
1439static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
1440				      struct perf_cpu_map *map, int idx)
1441{
1442	return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
1443}
1444static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1445				   struct perf_cpu_map *map, int idx)
1446{
1447	return perf_env__get_die(map, idx, &perf_stat.session->header.env);
1448}
1449
1450static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
1451				    struct perf_cpu_map *map, int idx)
1452{
1453	return perf_env__get_core(map, idx, &perf_stat.session->header.env);
1454}
1455
1456static int perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
1457				    struct perf_cpu_map *map, int idx)
1458{
1459	return perf_env__get_node(map, idx, &perf_stat.session->header.env);
1460}
1461
1462static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
1463{
1464	struct perf_env *env = &st->session->header.env;
1465
1466	switch (stat_config.aggr_mode) {
1467	case AGGR_SOCKET:
1468		if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1469			perror("cannot build socket map");
1470			return -1;
1471		}
1472		stat_config.aggr_get_id = perf_stat__get_socket_file;
1473		break;
1474	case AGGR_DIE:
1475		if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1476			perror("cannot build die map");
1477			return -1;
1478		}
1479		stat_config.aggr_get_id = perf_stat__get_die_file;
1480		break;
1481	case AGGR_CORE:
1482		if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1483			perror("cannot build core map");
1484			return -1;
1485		}
1486		stat_config.aggr_get_id = perf_stat__get_core_file;
1487		break;
1488	case AGGR_NODE:
1489		if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
1490			perror("cannot build core map");
1491			return -1;
1492		}
1493		stat_config.aggr_get_id = perf_stat__get_node_file;
1494		break;
1495	case AGGR_NONE:
1496	case AGGR_GLOBAL:
1497	case AGGR_THREAD:
1498	case AGGR_UNSET:
1499	default:
1500		break;
1501	}
1502
1503	return 0;
1504}
1505
1506/*
1507 * Add default attributes, if there were no attributes specified or
1508 * if -d/--detailed, -d -d or -d -d -d is used:
1509 */
1510static int add_default_attributes(void)
1511{
1512	int err;
1513	struct perf_event_attr default_attrs0[] = {
1514
1515  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
1516  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES	},
1517  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS		},
1518  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS		},
1519
1520  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
1521};
1522	struct perf_event_attr frontend_attrs[] = {
1523  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND	},
1524};
1525	struct perf_event_attr backend_attrs[] = {
1526  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND	},
1527};
1528	struct perf_event_attr default_attrs1[] = {
1529  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
1530  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS	},
1531  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES		},
1532
1533};
1534
1535/*
1536 * Detailed stats (-d), covering the L1 and last level data caches:
1537 */
1538	struct perf_event_attr detailed_attrs[] = {
1539
1540  { .type = PERF_TYPE_HW_CACHE,
1541    .config =
1542	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1543	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1544	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1545
1546  { .type = PERF_TYPE_HW_CACHE,
1547    .config =
1548	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1549	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1550	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1551
1552  { .type = PERF_TYPE_HW_CACHE,
1553    .config =
1554	 PERF_COUNT_HW_CACHE_LL			<<  0  |
1555	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1556	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1557
1558  { .type = PERF_TYPE_HW_CACHE,
1559    .config =
1560	 PERF_COUNT_HW_CACHE_LL			<<  0  |
1561	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1562	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1563};
1564
1565/*
1566 * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
1567 */
1568	struct perf_event_attr very_detailed_attrs[] = {
1569
1570  { .type = PERF_TYPE_HW_CACHE,
1571    .config =
1572	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
1573	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1574	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1575
1576  { .type = PERF_TYPE_HW_CACHE,
1577    .config =
1578	 PERF_COUNT_HW_CACHE_L1I		<<  0  |
1579	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1580	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1581
1582  { .type = PERF_TYPE_HW_CACHE,
1583    .config =
1584	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
1585	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1586	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1587
1588  { .type = PERF_TYPE_HW_CACHE,
1589    .config =
1590	 PERF_COUNT_HW_CACHE_DTLB		<<  0  |
1591	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1592	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1593
1594  { .type = PERF_TYPE_HW_CACHE,
1595    .config =
1596	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
1597	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1598	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1599
1600  { .type = PERF_TYPE_HW_CACHE,
1601    .config =
1602	 PERF_COUNT_HW_CACHE_ITLB		<<  0  |
1603	(PERF_COUNT_HW_CACHE_OP_READ		<<  8) |
1604	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1605
1606};
1607
1608/*
1609 * Very, very detailed stats (-d -d -d), adding prefetch events:
1610 */
1611	struct perf_event_attr very_very_detailed_attrs[] = {
1612
1613  { .type = PERF_TYPE_HW_CACHE,
1614    .config =
1615	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1616	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
1617	(PERF_COUNT_HW_CACHE_RESULT_ACCESS	<< 16)				},
1618
1619  { .type = PERF_TYPE_HW_CACHE,
1620    .config =
1621	 PERF_COUNT_HW_CACHE_L1D		<<  0  |
1622	(PERF_COUNT_HW_CACHE_OP_PREFETCH	<<  8) |
1623	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
1624};
1625	struct parse_events_error errinfo;
1626
1627	/* Set attrs if no event is selected and !null_run: */
1628	if (stat_config.null_run)
1629		return 0;
1630
1631	bzero(&errinfo, sizeof(errinfo));
1632	if (transaction_run) {
1633		/* Handle -T as -M transaction. Once platform specific metrics
1634		 * support has been added to the json files, all archictures
1635		 * will use this approach. To determine transaction support
1636		 * on an architecture test for such a metric name.
1637		 */
1638		if (metricgroup__has_metric("transaction")) {
1639			struct option opt = { .value = &evsel_list };
1640
1641			return metricgroup__parse_groups(&opt, "transaction",
1642							 stat_config.metric_no_group,
1643							stat_config.metric_no_merge,
1644							 &stat_config.metric_events);
1645		}
1646
1647		if (pmu_have_event("cpu", "cycles-ct") &&
1648		    pmu_have_event("cpu", "el-start"))
1649			err = parse_events(evsel_list, transaction_attrs,
1650					   &errinfo);
1651		else
1652			err = parse_events(evsel_list,
1653					   transaction_limited_attrs,
1654					   &errinfo);
1655		if (err) {
1656			fprintf(stderr, "Cannot set up transaction events\n");
1657			parse_events_print_error(&errinfo, transaction_attrs);
1658			return -1;
1659		}
1660		return 0;
1661	}
1662
1663	if (smi_cost) {
1664		int smi;
1665
1666		if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
1667			fprintf(stderr, "freeze_on_smi is not supported.\n");
1668			return -1;
1669		}
1670
1671		if (!smi) {
1672			if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
1673				fprintf(stderr, "Failed to set freeze_on_smi.\n");
1674				return -1;
1675			}
1676			smi_reset = true;
1677		}
1678
1679		if (pmu_have_event("msr", "aperf") &&
1680		    pmu_have_event("msr", "smi")) {
1681			if (!force_metric_only)
1682				stat_config.metric_only = true;
1683			err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
1684		} else {
1685			fprintf(stderr, "To measure SMI cost, it needs "
1686				"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
1687			parse_events_print_error(&errinfo, smi_cost_attrs);
1688			return -1;
1689		}
1690		if (err) {
1691			parse_events_print_error(&errinfo, smi_cost_attrs);
1692			fprintf(stderr, "Cannot set up SMI cost events\n");
1693			return -1;
1694		}
1695		return 0;
1696	}
1697
1698	if (topdown_run) {
1699		char *str = NULL;
1700		bool warn = false;
1701
1702		if (!force_metric_only)
1703			stat_config.metric_only = true;
1704
1705		if (topdown_filter_events(topdown_metric_attrs, &str, 1) < 0) {
1706			pr_err("Out of memory\n");
1707			return -1;
1708		}
1709		if (topdown_metric_attrs[0] && str) {
1710			if (!stat_config.interval && !stat_config.metric_only) {
1711				fprintf(stat_config.output,
1712					"Topdown accuracy may decrease when measuring long periods.\n"
1713					"Please print the result regularly, e.g. -I1000\n");
1714			}
1715			goto setup_metrics;
1716		}
1717
1718		zfree(&str);
1719
1720		if (stat_config.aggr_mode != AGGR_GLOBAL &&
1721		    stat_config.aggr_mode != AGGR_CORE) {
1722			pr_err("top down event configuration requires --per-core mode\n");
1723			return -1;
1724		}
1725		stat_config.aggr_mode = AGGR_CORE;
1726		if (nr_cgroups || !target__has_cpu(&target)) {
1727			pr_err("top down event configuration requires system-wide mode (-a)\n");
1728			return -1;
1729		}
1730
1731		if (topdown_filter_events(topdown_attrs, &str,
1732				arch_topdown_check_group(&warn)) < 0) {
1733			pr_err("Out of memory\n");
1734			return -1;
1735		}
1736		if (topdown_attrs[0] && str) {
1737			if (warn)
1738				arch_topdown_group_warn();
1739setup_metrics:
1740			err = parse_events(evsel_list, str, &errinfo);
1741			if (err) {
1742				fprintf(stderr,
1743					"Cannot set up top down events %s: %d\n",
1744					str, err);
1745				parse_events_print_error(&errinfo, str);
1746				free(str);
1747				return -1;
1748			}
1749		} else {
1750			fprintf(stderr, "System does not support topdown\n");
1751			return -1;
1752		}
1753		free(str);
1754	}
1755
1756	if (!evsel_list->core.nr_entries) {
1757		if (target__has_cpu(&target))
1758			default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
1759
1760		if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
1761			return -1;
1762		if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
1763			if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
1764				return -1;
1765		}
1766		if (pmu_have_event("cpu", "stalled-cycles-backend")) {
1767			if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
1768				return -1;
1769		}
1770		if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
1771			return -1;
1772	}
1773
1774	/* Detailed events get appended to the event list: */
1775
1776	if (detailed_run <  1)
1777		return 0;
1778
1779	/* Append detailed run extra attributes: */
1780	if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
1781		return -1;
1782
1783	if (detailed_run < 2)
1784		return 0;
1785
1786	/* Append very detailed run extra attributes: */
1787	if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
1788		return -1;
1789
1790	if (detailed_run < 3)
1791		return 0;
1792
1793	/* Append very, very detailed run extra attributes: */
1794	return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
1795}
1796
1797static const char * const stat_record_usage[] = {
1798	"perf stat record [<options>]",
1799	NULL,
1800};
1801
1802static void init_features(struct perf_session *session)
1803{
1804	int feat;
1805
1806	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1807		perf_header__set_feat(&session->header, feat);
1808
1809	perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1810	perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1811	perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1812	perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1813	perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1814}
1815
1816static int __cmd_record(int argc, const char **argv)
1817{
1818	struct perf_session *session;
1819	struct perf_data *data = &perf_stat.data;
1820
1821	argc = parse_options(argc, argv, stat_options, stat_record_usage,
1822			     PARSE_OPT_STOP_AT_NON_OPTION);
1823
1824	if (output_name)
1825		data->path = output_name;
1826
1827	if (stat_config.run_count != 1 || forever) {
1828		pr_err("Cannot use -r option with perf stat record.\n");
1829		return -1;
1830	}
1831
1832	session = perf_session__new(data, false, NULL);
1833	if (IS_ERR(session)) {
1834		pr_err("Perf session creation failed\n");
1835		return PTR_ERR(session);
1836	}
1837
1838	init_features(session);
1839
1840	session->evlist   = evsel_list;
1841	perf_stat.session = session;
1842	perf_stat.record  = true;
1843	return argc;
1844}
1845
1846static int process_stat_round_event(struct perf_session *session,
1847				    union perf_event *event)
1848{
1849	struct perf_record_stat_round *stat_round = &event->stat_round;
1850	struct evsel *counter;
1851	struct timespec tsh, *ts = NULL;
1852	const char **argv = session->header.env.cmdline_argv;
1853	int argc = session->header.env.nr_cmdline;
1854
1855	evlist__for_each_entry(evsel_list, counter)
1856		perf_stat_process_counter(&stat_config, counter);
1857
1858	if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
1859		update_stats(&walltime_nsecs_stats, stat_round->time);
1860
1861	if (stat_config.interval && stat_round->time) {
1862		tsh.tv_sec  = stat_round->time / NSEC_PER_SEC;
1863		tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
1864		ts = &tsh;
1865	}
1866
1867	print_counters(ts, argc, argv);
1868	return 0;
1869}
1870
1871static
1872int process_stat_config_event(struct perf_session *session,
1873			      union perf_event *event)
1874{
1875	struct perf_tool *tool = session->tool;
1876	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1877
1878	perf_event__read_stat_config(&stat_config, &event->stat_config);
1879
1880	if (perf_cpu_map__empty(st->cpus)) {
1881		if (st->aggr_mode != AGGR_UNSET)
1882			pr_warning("warning: processing task data, aggregation mode not set\n");
1883		return 0;
1884	}
1885
1886	if (st->aggr_mode != AGGR_UNSET)
1887		stat_config.aggr_mode = st->aggr_mode;
1888
1889	if (perf_stat.data.is_pipe)
1890		perf_stat_init_aggr_mode();
1891	else
1892		perf_stat_init_aggr_mode_file(st);
1893
1894	return 0;
1895}
1896
1897static int set_maps(struct perf_stat *st)
1898{
1899	if (!st->cpus || !st->threads)
1900		return 0;
1901
1902	if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
1903		return -EINVAL;
1904
1905	perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
1906
1907	if (perf_evlist__alloc_stats(evsel_list, true))
1908		return -ENOMEM;
1909
1910	st->maps_allocated = true;
1911	return 0;
1912}
1913
1914static
1915int process_thread_map_event(struct perf_session *session,
1916			     union perf_event *event)
1917{
1918	struct perf_tool *tool = session->tool;
1919	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1920
1921	if (st->threads) {
1922		pr_warning("Extra thread map event, ignoring.\n");
1923		return 0;
1924	}
1925
1926	st->threads = thread_map__new_event(&event->thread_map);
1927	if (!st->threads)
1928		return -ENOMEM;
1929
1930	return set_maps(st);
1931}
1932
1933static
1934int process_cpu_map_event(struct perf_session *session,
1935			  union perf_event *event)
1936{
1937	struct perf_tool *tool = session->tool;
1938	struct perf_stat *st = container_of(tool, struct perf_stat, tool);
1939	struct perf_cpu_map *cpus;
1940
1941	if (st->cpus) {
1942		pr_warning("Extra cpu map event, ignoring.\n");
1943		return 0;
1944	}
1945
1946	cpus = cpu_map__new_data(&event->cpu_map.data);
1947	if (!cpus)
1948		return -ENOMEM;
1949
1950	st->cpus = cpus;
1951	return set_maps(st);
1952}
1953
1954static const char * const stat_report_usage[] = {
1955	"perf stat report [<options>]",
1956	NULL,
1957};
1958
1959static struct perf_stat perf_stat = {
1960	.tool = {
1961		.attr		= perf_event__process_attr,
1962		.event_update	= perf_event__process_event_update,
1963		.thread_map	= process_thread_map_event,
1964		.cpu_map	= process_cpu_map_event,
1965		.stat_config	= process_stat_config_event,
1966		.stat		= perf_event__process_stat_event,
1967		.stat_round	= process_stat_round_event,
1968	},
1969	.aggr_mode = AGGR_UNSET,
1970};
1971
1972static int __cmd_report(int argc, const char **argv)
1973{
1974	struct perf_session *session;
1975	const struct option options[] = {
1976	OPT_STRING('i', "input", &input_name, "file", "input file name"),
1977	OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
1978		     "aggregate counts per processor socket", AGGR_SOCKET),
1979	OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
1980		     "aggregate counts per processor die", AGGR_DIE),
1981	OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
1982		     "aggregate counts per physical processor core", AGGR_CORE),
1983	OPT_SET_UINT(0, "per-node", &perf_stat.aggr_mode,
1984		     "aggregate counts per numa node", AGGR_NODE),
1985	OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
1986		     "disable CPU count aggregation", AGGR_NONE),
1987	OPT_END()
1988	};
1989	struct stat st;
1990	int ret;
1991
1992	argc = parse_options(argc, argv, options, stat_report_usage, 0);
1993
1994	if (!input_name || !strlen(input_name)) {
1995		if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
1996			input_name = "-";
1997		else
1998			input_name = "perf.data";
1999	}
2000
2001	perf_stat.data.path = input_name;
2002	perf_stat.data.mode = PERF_DATA_MODE_READ;
2003
2004	session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
2005	if (IS_ERR(session))
2006		return PTR_ERR(session);
2007
2008	perf_stat.session  = session;
2009	stat_config.output = stderr;
2010	evsel_list         = session->evlist;
2011
2012	ret = perf_session__process_events(session);
2013	if (ret)
2014		return ret;
2015
2016	perf_session__delete(session);
2017	return 0;
2018}
2019
2020static void setup_system_wide(int forks)
2021{
2022	/*
2023	 * Make system wide (-a) the default target if
2024	 * no target was specified and one of following
2025	 * conditions is met:
2026	 *
2027	 *   - there's no workload specified
2028	 *   - there is workload specified but all requested
2029	 *     events are system wide events
2030	 */
2031	if (!target__none(&target))
2032		return;
2033
2034	if (!forks)
2035		target.system_wide = true;
2036	else {
2037		struct evsel *counter;
2038
2039		evlist__for_each_entry(evsel_list, counter) {
2040			if (!counter->core.system_wide &&
2041			    strcmp(counter->name, "duration_time")) {
2042				return;
2043			}
2044		}
2045
2046		if (evsel_list->core.nr_entries)
2047			target.system_wide = true;
2048	}
2049}
2050
2051int cmd_stat(int argc, const char **argv)
2052{
2053	const char * const stat_usage[] = {
2054		"perf stat [<options>] [<command>]",
2055		NULL
2056	};
2057	int status = -EINVAL, run_idx;
2058	const char *mode;
2059	FILE *output = stderr;
2060	unsigned int interval, timeout;
2061	const char * const stat_subcommands[] = { "record", "report" };
2062
2063	setlocale(LC_ALL, "");
2064
2065	evsel_list = evlist__new();
2066	if (evsel_list == NULL)
2067		return -ENOMEM;
2068
2069	parse_events__shrink_config_terms();
2070
2071	/* String-parsing callback-based options would segfault when negated */
2072	set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
2073	set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
2074	set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
2075
2076	argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
2077					(const char **) stat_usage,
2078					PARSE_OPT_STOP_AT_NON_OPTION);
2079	perf_stat__collect_metric_expr(evsel_list);
2080	perf_stat__init_shadow_stats();
2081
2082	if (stat_config.csv_sep) {
2083		stat_config.csv_output = true;
2084		if (!strcmp(stat_config.csv_sep, "\\t"))
2085			stat_config.csv_sep = "\t";
2086	} else
2087		stat_config.csv_sep = DEFAULT_SEPARATOR;
2088
2089	if (argc && !strncmp(argv[0], "rec", 3)) {
2090		argc = __cmd_record(argc, argv);
2091		if (argc < 0)
2092			return -1;
2093	} else if (argc && !strncmp(argv[0], "rep", 3))
2094		return __cmd_report(argc, argv);
2095
2096	interval = stat_config.interval;
2097	timeout = stat_config.timeout;
2098
2099	/*
2100	 * For record command the -o is already taken care of.
2101	 */
2102	if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
2103		output = NULL;
2104
2105	if (output_name && output_fd) {
2106		fprintf(stderr, "cannot use both --output and --log-fd\n");
2107		parse_options_usage(stat_usage, stat_options, "o", 1);
2108		parse_options_usage(NULL, stat_options, "log-fd", 0);
2109		goto out;
2110	}
2111
2112	if (stat_config.metric_only && stat_config.aggr_mode == AGGR_THREAD) {
2113		fprintf(stderr, "--metric-only is not supported with --per-thread\n");
2114		goto out;
2115	}
2116
2117	if (stat_config.metric_only && stat_config.run_count > 1) {
2118		fprintf(stderr, "--metric-only is not supported with -r\n");
2119		goto out;
2120	}
2121
2122	if (stat_config.walltime_run_table && stat_config.run_count <= 1) {
2123		fprintf(stderr, "--table is only supported with -r\n");
2124		parse_options_usage(stat_usage, stat_options, "r", 1);
2125		parse_options_usage(NULL, stat_options, "table", 0);
2126		goto out;
2127	}
2128
2129	if (output_fd < 0) {
2130		fprintf(stderr, "argument to --log-fd must be a > 0\n");
2131		parse_options_usage(stat_usage, stat_options, "log-fd", 0);
2132		goto out;
2133	}
2134
2135	if (!output) {
2136		struct timespec tm;
2137		mode = append_file ? "a" : "w";
2138
2139		output = fopen(output_name, mode);
2140		if (!output) {
2141			perror("failed to create output file");
2142			return -1;
2143		}
2144		clock_gettime(CLOCK_REALTIME, &tm);
2145		fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
2146	} else if (output_fd > 0) {
2147		mode = append_file ? "a" : "w";
2148		output = fdopen(output_fd, mode);
2149		if (!output) {
2150			perror("Failed opening logfd");
2151			return -errno;
2152		}
2153	}
2154
2155	stat_config.output = output;
2156
2157	/*
2158	 * let the spreadsheet do the pretty-printing
2159	 */
2160	if (stat_config.csv_output) {
2161		/* User explicitly passed -B? */
2162		if (big_num_opt == 1) {
2163			fprintf(stderr, "-B option not supported with -x\n");
2164			parse_options_usage(stat_usage, stat_options, "B", 1);
2165			parse_options_usage(NULL, stat_options, "x", 1);
2166			goto out;
2167		} else /* Nope, so disable big number formatting */
2168			stat_config.big_num = false;
2169	} else if (big_num_opt == 0) /* User passed --no-big-num */
2170		stat_config.big_num = false;
2171
2172	setup_system_wide(argc);
2173
2174	/*
2175	 * Display user/system times only for single
2176	 * run and when there's specified tracee.
2177	 */
2178	if ((stat_config.run_count == 1) && target__none(&target))
2179		stat_config.ru_display = true;
2180
2181	if (stat_config.run_count < 0) {
2182		pr_err("Run count must be a positive number\n");
2183		parse_options_usage(stat_usage, stat_options, "r", 1);
2184		goto out;
2185	} else if (stat_config.run_count == 0) {
2186		forever = true;
2187		stat_config.run_count = 1;
2188	}
2189
2190	if (stat_config.walltime_run_table) {
2191		stat_config.walltime_run = zalloc(stat_config.run_count * sizeof(stat_config.walltime_run[0]));
2192		if (!stat_config.walltime_run) {
2193			pr_err("failed to setup -r option");
2194			goto out;
2195		}
2196	}
2197
2198	if ((stat_config.aggr_mode == AGGR_THREAD) &&
2199		!target__has_task(&target)) {
2200		if (!target.system_wide || target.cpu_list) {
2201			fprintf(stderr, "The --per-thread option is only "
2202				"available when monitoring via -p -t -a "
2203				"options or only --per-thread.\n");
2204			parse_options_usage(NULL, stat_options, "p", 1);
2205			parse_options_usage(NULL, stat_options, "t", 1);
2206			goto out;
2207		}
2208	}
2209
2210	/*
2211	 * no_aggr, cgroup are for system-wide only
2212	 * --per-thread is aggregated per thread, we dont mix it with cpu mode
2213	 */
2214	if (((stat_config.aggr_mode != AGGR_GLOBAL &&
2215	      stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
2216	    !target__has_cpu(&target)) {
2217		fprintf(stderr, "both cgroup and no-aggregation "
2218			"modes only available in system-wide mode\n");
2219
2220		parse_options_usage(stat_usage, stat_options, "G", 1);
2221		parse_options_usage(NULL, stat_options, "A", 1);
2222		parse_options_usage(NULL, stat_options, "a", 1);
2223		goto out;
2224	}
2225
2226	if (add_default_attributes())
2227		goto out;
2228
2229	if (stat_config.cgroup_list) {
2230		if (nr_cgroups > 0) {
2231			pr_err("--cgroup and --for-each-cgroup cannot be used together\n");
2232			parse_options_usage(stat_usage, stat_options, "G", 1);
2233			parse_options_usage(NULL, stat_options, "for-each-cgroup", 0);
2234			goto out;
2235		}
2236
2237		if (evlist__expand_cgroup(evsel_list, stat_config.cgroup_list,
2238					  &stat_config.metric_events, true) < 0)
2239			goto out;
2240	}
2241
2242	target__validate(&target);
2243
2244	if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
2245		target.per_thread = true;
2246
2247	if (perf_evlist__create_maps(evsel_list, &target) < 0) {
2248		if (target__has_task(&target)) {
2249			pr_err("Problems finding threads of monitor\n");
2250			parse_options_usage(stat_usage, stat_options, "p", 1);
2251			parse_options_usage(NULL, stat_options, "t", 1);
2252		} else if (target__has_cpu(&target)) {
2253			perror("failed to parse CPUs map");
2254			parse_options_usage(stat_usage, stat_options, "C", 1);
2255			parse_options_usage(NULL, stat_options, "a", 1);
2256		}
2257		goto out;
2258	}
2259
2260	evlist__check_cpu_maps(evsel_list);
2261
2262	/*
2263	 * Initialize thread_map with comm names,
2264	 * so we could print it out on output.
2265	 */
2266	if (stat_config.aggr_mode == AGGR_THREAD) {
2267		thread_map__read_comms(evsel_list->core.threads);
2268		if (target.system_wide) {
2269			if (runtime_stat_new(&stat_config,
2270				perf_thread_map__nr(evsel_list->core.threads))) {
2271				goto out;
2272			}
2273		}
2274	}
2275
2276	if (stat_config.aggr_mode == AGGR_NODE)
2277		cpu__setup_cpunode_map();
2278
2279	if (stat_config.times && interval)
2280		interval_count = true;
2281	else if (stat_config.times && !interval) {
2282		pr_err("interval-count option should be used together with "
2283				"interval-print.\n");
2284		parse_options_usage(stat_usage, stat_options, "interval-count", 0);
2285		parse_options_usage(stat_usage, stat_options, "I", 1);
2286		goto out;
2287	}
2288
2289	if (timeout && timeout < 100) {
2290		if (timeout < 10) {
2291			pr_err("timeout must be >= 10ms.\n");
2292			parse_options_usage(stat_usage, stat_options, "timeout", 0);
2293			goto out;
2294		} else
2295			pr_warning("timeout < 100ms. "
2296				   "The overhead percentage could be high in some cases. "
2297				   "Please proceed with caution.\n");
2298	}
2299	if (timeout && interval) {
2300		pr_err("timeout option is not supported with interval-print.\n");
2301		parse_options_usage(stat_usage, stat_options, "timeout", 0);
2302		parse_options_usage(stat_usage, stat_options, "I", 1);
2303		goto out;
2304	}
2305
2306	if (perf_evlist__alloc_stats(evsel_list, interval))
2307		goto out;
2308
2309	if (perf_stat_init_aggr_mode())
2310		goto out;
2311
2312	/*
2313	 * Set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
2314	 * while avoiding that older tools show confusing messages.
2315	 *
2316	 * However for pipe sessions we need to keep it zero,
2317	 * because script's perf_evsel__check_attr is triggered
2318	 * by attr->sample_type != 0, and we can't run it on
2319	 * stat sessions.
2320	 */
2321	stat_config.identifier = !(STAT_RECORD && perf_stat.data.is_pipe);
2322
2323	/*
2324	 * We dont want to block the signals - that would cause
2325	 * child tasks to inherit that and Ctrl-C would not work.
2326	 * What we want is for Ctrl-C to work in the exec()-ed
2327	 * task, but being ignored by perf stat itself:
2328	 */
2329	atexit(sig_atexit);
2330	if (!forever)
2331		signal(SIGINT,  skip_signal);
2332	signal(SIGCHLD, skip_signal);
2333	signal(SIGALRM, skip_signal);
2334	signal(SIGABRT, skip_signal);
2335
2336	if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
2337		goto out;
2338
2339	status = 0;
2340	for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
2341		if (stat_config.run_count != 1 && verbose > 0)
2342			fprintf(output, "[ perf stat: executing run #%d ... ]\n",
2343				run_idx + 1);
2344
2345		if (run_idx != 0)
2346			perf_evlist__reset_prev_raw_counts(evsel_list);
2347
2348		status = run_perf_stat(argc, argv, run_idx);
2349		if (forever && status != -1 && !interval) {
2350			print_counters(NULL, argc, argv);
2351			perf_stat__reset_stats();
2352		}
2353	}
2354
2355	if (!forever && status != -1 && (!interval || stat_config.summary))
2356		print_counters(NULL, argc, argv);
2357
2358	evlist__finalize_ctlfd(evsel_list);
2359
2360	if (STAT_RECORD) {
2361		/*
2362		 * We synthesize the kernel mmap record just so that older tools
2363		 * don't emit warnings about not being able to resolve symbols
2364		 * due to /proc/sys/kernel/kptr_restrict settings and instear provide
2365		 * a saner message about no samples being in the perf.data file.
2366		 *
2367		 * This also serves to suppress a warning about f_header.data.size == 0
2368		 * in header.c at the moment 'perf stat record' gets introduced, which
2369		 * is not really needed once we start adding the stat specific PERF_RECORD_
2370		 * records, but the need to suppress the kptr_restrict messages in older
2371		 * tools remain  -acme
2372		 */
2373		int fd = perf_data__fd(&perf_stat.data);
2374		int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
2375							     process_synthesized_event,
2376							     &perf_stat.session->machines.host);
2377		if (err) {
2378			pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
2379				   "older tools may produce warnings about this file\n.");
2380		}
2381
2382		if (!interval) {
2383			if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
2384				pr_err("failed to write stat round event\n");
2385		}
2386
2387		if (!perf_stat.data.is_pipe) {
2388			perf_stat.session->header.data_size += perf_stat.bytes_written;
2389			perf_session__write_header(perf_stat.session, evsel_list, fd, true);
2390		}
2391
2392		evlist__close(evsel_list);
2393		perf_session__delete(perf_stat.session);
2394	}
2395
2396	perf_stat__exit_aggr_mode();
2397	perf_evlist__free_stats(evsel_list);
2398out:
2399	zfree(&stat_config.walltime_run);
2400
2401	if (smi_cost && smi_reset)
2402		sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
2403
2404	evlist__delete(evsel_list);
2405
2406	metricgroup__rblist_exit(&stat_config.metric_events);
2407	runtime_stat_delete(&stat_config);
2408	evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
2409
2410	return status;
2411}
2412