1// SPDX-License-Identifier: GPL-2.0
2/*
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
8 */
9#include "builtin.h"
10
11#include "util/build-id.h"
12#include <subcmd/parse-options.h>
13#include <internal/xyarray.h>
14#include "util/parse-events.h"
15#include "util/config.h"
16
17#include "util/callchain.h"
18#include "util/cgroup.h"
19#include "util/header.h"
20#include "util/event.h"
21#include "util/evlist.h"
22#include "util/evsel.h"
23#include "util/debug.h"
24#include "util/mmap.h"
25#include "util/mutex.h"
26#include "util/target.h"
27#include "util/session.h"
28#include "util/tool.h"
29#include "util/symbol.h"
30#include "util/record.h"
31#include "util/cpumap.h"
32#include "util/thread_map.h"
33#include "util/data.h"
34#include "util/perf_regs.h"
35#include "util/auxtrace.h"
36#include "util/tsc.h"
37#include "util/parse-branch-options.h"
38#include "util/parse-regs-options.h"
39#include "util/perf_api_probe.h"
40#include "util/trigger.h"
41#include "util/perf-hooks.h"
42#include "util/cpu-set-sched.h"
43#include "util/synthetic-events.h"
44#include "util/time-utils.h"
45#include "util/units.h"
46#include "util/bpf-event.h"
47#include "util/util.h"
48#include "util/pfm.h"
49#include "util/pmu.h"
50#include "util/pmus.h"
51#include "util/clockid.h"
52#include "util/off_cpu.h"
53#include "util/bpf-filter.h"
54#include "asm/bug.h"
55#include "perf.h"
56#include "cputopo.h"
57
58#include <errno.h>
59#include <inttypes.h>
60#include <locale.h>
61#include <poll.h>
62#include <pthread.h>
63#include <unistd.h>
64#ifndef HAVE_GETTID
65#include <syscall.h>
66#endif
67#include <sched.h>
68#include <signal.h>
69#ifdef HAVE_EVENTFD_SUPPORT
70#include <sys/eventfd.h>
71#endif
72#include <sys/mman.h>
73#include <sys/wait.h>
74#include <sys/types.h>
75#include <sys/stat.h>
76#include <fcntl.h>
77#include <linux/err.h>
78#include <linux/string.h>
79#include <linux/time64.h>
80#include <linux/zalloc.h>
81#include <linux/bitmap.h>
82#include <sys/time.h>
83
84struct switch_output {
85	bool		 enabled;
86	bool		 signal;
87	unsigned long	 size;
88	unsigned long	 time;
89	const char	*str;
90	bool		 set;
91	char		 **filenames;
92	int		 num_files;
93	int		 cur_file;
94};
95
96struct thread_mask {
97	struct mmap_cpu_mask	maps;
98	struct mmap_cpu_mask	affinity;
99};
100
101struct record_thread {
102	pid_t			tid;
103	struct thread_mask	*mask;
104	struct {
105		int		msg[2];
106		int		ack[2];
107	} pipes;
108	struct fdarray		pollfd;
109	int			ctlfd_pos;
110	int			nr_mmaps;
111	struct mmap		**maps;
112	struct mmap		**overwrite_maps;
113	struct record		*rec;
114	unsigned long long	samples;
115	unsigned long		waking;
116	u64			bytes_written;
117	u64			bytes_transferred;
118	u64			bytes_compressed;
119};
120
121static __thread struct record_thread *thread;
122
123enum thread_msg {
124	THREAD_MSG__UNDEFINED = 0,
125	THREAD_MSG__READY,
126	THREAD_MSG__MAX,
127};
128
129static const char *thread_msg_tags[THREAD_MSG__MAX] = {
130	"UNDEFINED", "READY"
131};
132
133enum thread_spec {
134	THREAD_SPEC__UNDEFINED = 0,
135	THREAD_SPEC__CPU,
136	THREAD_SPEC__CORE,
137	THREAD_SPEC__PACKAGE,
138	THREAD_SPEC__NUMA,
139	THREAD_SPEC__USER,
140	THREAD_SPEC__MAX,
141};
142
143static const char *thread_spec_tags[THREAD_SPEC__MAX] = {
144	"undefined", "cpu", "core", "package", "numa", "user"
145};
146
147struct pollfd_index_map {
148	int evlist_pollfd_index;
149	int thread_pollfd_index;
150};
151
152struct record {
153	struct perf_tool	tool;
154	struct record_opts	opts;
155	u64			bytes_written;
156	u64			thread_bytes_written;
157	struct perf_data	data;
158	struct auxtrace_record	*itr;
159	struct evlist	*evlist;
160	struct perf_session	*session;
161	struct evlist		*sb_evlist;
162	pthread_t		thread_id;
163	int			realtime_prio;
164	bool			switch_output_event_set;
165	bool			no_buildid;
166	bool			no_buildid_set;
167	bool			no_buildid_cache;
168	bool			no_buildid_cache_set;
169	bool			buildid_all;
170	bool			buildid_mmap;
171	bool			timestamp_filename;
172	bool			timestamp_boundary;
173	bool			off_cpu;
174	struct switch_output	switch_output;
175	unsigned long long	samples;
176	unsigned long		output_max_size;	/* = 0: unlimited */
177	struct perf_debuginfod	debuginfod;
178	int			nr_threads;
179	struct thread_mask	*thread_masks;
180	struct record_thread	*thread_data;
181	struct pollfd_index_map	*index_map;
182	size_t			index_map_sz;
183	size_t			index_map_cnt;
184};
185
186static volatile int done;
187
188static volatile int auxtrace_record__snapshot_started;
189static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
190static DEFINE_TRIGGER(switch_output_trigger);
191
192static const char *affinity_tags[PERF_AFFINITY_MAX] = {
193	"SYS", "NODE", "CPU"
194};
195
196#ifndef HAVE_GETTID
197static inline pid_t gettid(void)
198{
199	return (pid_t)syscall(__NR_gettid);
200}
201#endif
202
203static int record__threads_enabled(struct record *rec)
204{
205	return rec->opts.threads_spec;
206}
207
208static bool switch_output_signal(struct record *rec)
209{
210	return rec->switch_output.signal &&
211	       trigger_is_ready(&switch_output_trigger);
212}
213
214static bool switch_output_size(struct record *rec)
215{
216	return rec->switch_output.size &&
217	       trigger_is_ready(&switch_output_trigger) &&
218	       (rec->bytes_written >= rec->switch_output.size);
219}
220
221static bool switch_output_time(struct record *rec)
222{
223	return rec->switch_output.time &&
224	       trigger_is_ready(&switch_output_trigger);
225}
226
227static u64 record__bytes_written(struct record *rec)
228{
229	return rec->bytes_written + rec->thread_bytes_written;
230}
231
232static bool record__output_max_size_exceeded(struct record *rec)
233{
234	return rec->output_max_size &&
235	       (record__bytes_written(rec) >= rec->output_max_size);
236}
237
238static int record__write(struct record *rec, struct mmap *map __maybe_unused,
239			 void *bf, size_t size)
240{
241	struct perf_data_file *file = &rec->session->data->file;
242
243	if (map && map->file)
244		file = map->file;
245
246	if (perf_data_file__write(file, bf, size) < 0) {
247		pr_err("failed to write perf data, error: %m\n");
248		return -1;
249	}
250
251	if (map && map->file) {
252		thread->bytes_written += size;
253		rec->thread_bytes_written += size;
254	} else {
255		rec->bytes_written += size;
256	}
257
258	if (record__output_max_size_exceeded(rec) && !done) {
259		fprintf(stderr, "[ perf record: perf size limit reached (%" PRIu64 " KB),"
260				" stopping session ]\n",
261				record__bytes_written(rec) >> 10);
262		done = 1;
263	}
264
265	if (switch_output_size(rec))
266		trigger_hit(&switch_output_trigger);
267
268	return 0;
269}
270
271static int record__aio_enabled(struct record *rec);
272static int record__comp_enabled(struct record *rec);
273static size_t zstd_compress(struct perf_session *session, struct mmap *map,
274			    void *dst, size_t dst_size, void *src, size_t src_size);
275
276#ifdef HAVE_AIO_SUPPORT
277static int record__aio_write(struct aiocb *cblock, int trace_fd,
278		void *buf, size_t size, off_t off)
279{
280	int rc;
281
282	cblock->aio_fildes = trace_fd;
283	cblock->aio_buf    = buf;
284	cblock->aio_nbytes = size;
285	cblock->aio_offset = off;
286	cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
287
288	do {
289		rc = aio_write(cblock);
290		if (rc == 0) {
291			break;
292		} else if (errno != EAGAIN) {
293			cblock->aio_fildes = -1;
294			pr_err("failed to queue perf data, error: %m\n");
295			break;
296		}
297	} while (1);
298
299	return rc;
300}
301
302static int record__aio_complete(struct mmap *md, struct aiocb *cblock)
303{
304	void *rem_buf;
305	off_t rem_off;
306	size_t rem_size;
307	int rc, aio_errno;
308	ssize_t aio_ret, written;
309
310	aio_errno = aio_error(cblock);
311	if (aio_errno == EINPROGRESS)
312		return 0;
313
314	written = aio_ret = aio_return(cblock);
315	if (aio_ret < 0) {
316		if (aio_errno != EINTR)
317			pr_err("failed to write perf data, error: %m\n");
318		written = 0;
319	}
320
321	rem_size = cblock->aio_nbytes - written;
322
323	if (rem_size == 0) {
324		cblock->aio_fildes = -1;
325		/*
326		 * md->refcount is incremented in record__aio_pushfn() for
327		 * every aio write request started in record__aio_push() so
328		 * decrement it because the request is now complete.
329		 */
330		perf_mmap__put(&md->core);
331		rc = 1;
332	} else {
333		/*
334		 * aio write request may require restart with the
335		 * reminder if the kernel didn't write whole
336		 * chunk at once.
337		 */
338		rem_off = cblock->aio_offset + written;
339		rem_buf = (void *)(cblock->aio_buf + written);
340		record__aio_write(cblock, cblock->aio_fildes,
341				rem_buf, rem_size, rem_off);
342		rc = 0;
343	}
344
345	return rc;
346}
347
348static int record__aio_sync(struct mmap *md, bool sync_all)
349{
350	struct aiocb **aiocb = md->aio.aiocb;
351	struct aiocb *cblocks = md->aio.cblocks;
352	struct timespec timeout = { 0, 1000 * 1000  * 1 }; /* 1ms */
353	int i, do_suspend;
354
355	do {
356		do_suspend = 0;
357		for (i = 0; i < md->aio.nr_cblocks; ++i) {
358			if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
359				if (sync_all)
360					aiocb[i] = NULL;
361				else
362					return i;
363			} else {
364				/*
365				 * Started aio write is not complete yet
366				 * so it has to be waited before the
367				 * next allocation.
368				 */
369				aiocb[i] = &cblocks[i];
370				do_suspend = 1;
371			}
372		}
373		if (!do_suspend)
374			return -1;
375
376		while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
377			if (!(errno == EAGAIN || errno == EINTR))
378				pr_err("failed to sync perf data, error: %m\n");
379		}
380	} while (1);
381}
382
383struct record_aio {
384	struct record	*rec;
385	void		*data;
386	size_t		size;
387};
388
389static int record__aio_pushfn(struct mmap *map, void *to, void *buf, size_t size)
390{
391	struct record_aio *aio = to;
392
393	/*
394	 * map->core.base data pointed by buf is copied into free map->aio.data[] buffer
395	 * to release space in the kernel buffer as fast as possible, calling
396	 * perf_mmap__consume() from perf_mmap__push() function.
397	 *
398	 * That lets the kernel to proceed with storing more profiling data into
399	 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
400	 *
401	 * Coping can be done in two steps in case the chunk of profiling data
402	 * crosses the upper bound of the kernel buffer. In this case we first move
403	 * part of data from map->start till the upper bound and then the reminder
404	 * from the beginning of the kernel buffer till the end of the data chunk.
405	 */
406
407	if (record__comp_enabled(aio->rec)) {
408		size = zstd_compress(aio->rec->session, NULL, aio->data + aio->size,
409				     mmap__mmap_len(map) - aio->size,
410				     buf, size);
411	} else {
412		memcpy(aio->data + aio->size, buf, size);
413	}
414
415	if (!aio->size) {
416		/*
417		 * Increment map->refcount to guard map->aio.data[] buffer
418		 * from premature deallocation because map object can be
419		 * released earlier than aio write request started on
420		 * map->aio.data[] buffer is complete.
421		 *
422		 * perf_mmap__put() is done at record__aio_complete()
423		 * after started aio request completion or at record__aio_push()
424		 * if the request failed to start.
425		 */
426		perf_mmap__get(&map->core);
427	}
428
429	aio->size += size;
430
431	return size;
432}
433
434static int record__aio_push(struct record *rec, struct mmap *map, off_t *off)
435{
436	int ret, idx;
437	int trace_fd = rec->session->data->file.fd;
438	struct record_aio aio = { .rec = rec, .size = 0 };
439
440	/*
441	 * Call record__aio_sync() to wait till map->aio.data[] buffer
442	 * becomes available after previous aio write operation.
443	 */
444
445	idx = record__aio_sync(map, false);
446	aio.data = map->aio.data[idx];
447	ret = perf_mmap__push(map, &aio, record__aio_pushfn);
448	if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
449		return ret;
450
451	rec->samples++;
452	ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
453	if (!ret) {
454		*off += aio.size;
455		rec->bytes_written += aio.size;
456		if (switch_output_size(rec))
457			trigger_hit(&switch_output_trigger);
458	} else {
459		/*
460		 * Decrement map->refcount incremented in record__aio_pushfn()
461		 * back if record__aio_write() operation failed to start, otherwise
462		 * map->refcount is decremented in record__aio_complete() after
463		 * aio write operation finishes successfully.
464		 */
465		perf_mmap__put(&map->core);
466	}
467
468	return ret;
469}
470
471static off_t record__aio_get_pos(int trace_fd)
472{
473	return lseek(trace_fd, 0, SEEK_CUR);
474}
475
476static void record__aio_set_pos(int trace_fd, off_t pos)
477{
478	lseek(trace_fd, pos, SEEK_SET);
479}
480
481static void record__aio_mmap_read_sync(struct record *rec)
482{
483	int i;
484	struct evlist *evlist = rec->evlist;
485	struct mmap *maps = evlist->mmap;
486
487	if (!record__aio_enabled(rec))
488		return;
489
490	for (i = 0; i < evlist->core.nr_mmaps; i++) {
491		struct mmap *map = &maps[i];
492
493		if (map->core.base)
494			record__aio_sync(map, true);
495	}
496}
497
498static int nr_cblocks_default = 1;
499static int nr_cblocks_max = 4;
500
501static int record__aio_parse(const struct option *opt,
502			     const char *str,
503			     int unset)
504{
505	struct record_opts *opts = (struct record_opts *)opt->value;
506
507	if (unset) {
508		opts->nr_cblocks = 0;
509	} else {
510		if (str)
511			opts->nr_cblocks = strtol(str, NULL, 0);
512		if (!opts->nr_cblocks)
513			opts->nr_cblocks = nr_cblocks_default;
514	}
515
516	return 0;
517}
518#else /* HAVE_AIO_SUPPORT */
519static int nr_cblocks_max = 0;
520
521static int record__aio_push(struct record *rec __maybe_unused, struct mmap *map __maybe_unused,
522			    off_t *off __maybe_unused)
523{
524	return -1;
525}
526
527static off_t record__aio_get_pos(int trace_fd __maybe_unused)
528{
529	return -1;
530}
531
532static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
533{
534}
535
536static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
537{
538}
539#endif
540
541static int record__aio_enabled(struct record *rec)
542{
543	return rec->opts.nr_cblocks > 0;
544}
545
546#define MMAP_FLUSH_DEFAULT 1
547static int record__mmap_flush_parse(const struct option *opt,
548				    const char *str,
549				    int unset)
550{
551	int flush_max;
552	struct record_opts *opts = (struct record_opts *)opt->value;
553	static struct parse_tag tags[] = {
554			{ .tag  = 'B', .mult = 1       },
555			{ .tag  = 'K', .mult = 1 << 10 },
556			{ .tag  = 'M', .mult = 1 << 20 },
557			{ .tag  = 'G', .mult = 1 << 30 },
558			{ .tag  = 0 },
559	};
560
561	if (unset)
562		return 0;
563
564	if (str) {
565		opts->mmap_flush = parse_tag_value(str, tags);
566		if (opts->mmap_flush == (int)-1)
567			opts->mmap_flush = strtol(str, NULL, 0);
568	}
569
570	if (!opts->mmap_flush)
571		opts->mmap_flush = MMAP_FLUSH_DEFAULT;
572
573	flush_max = evlist__mmap_size(opts->mmap_pages);
574	flush_max /= 4;
575	if (opts->mmap_flush > flush_max)
576		opts->mmap_flush = flush_max;
577
578	return 0;
579}
580
581#ifdef HAVE_ZSTD_SUPPORT
582static unsigned int comp_level_default = 1;
583
584static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
585{
586	struct record_opts *opts = opt->value;
587
588	if (unset) {
589		opts->comp_level = 0;
590	} else {
591		if (str)
592			opts->comp_level = strtol(str, NULL, 0);
593		if (!opts->comp_level)
594			opts->comp_level = comp_level_default;
595	}
596
597	return 0;
598}
599#endif
600static unsigned int comp_level_max = 22;
601
602static int record__comp_enabled(struct record *rec)
603{
604	return rec->opts.comp_level > 0;
605}
606
607static int process_synthesized_event(struct perf_tool *tool,
608				     union perf_event *event,
609				     struct perf_sample *sample __maybe_unused,
610				     struct machine *machine __maybe_unused)
611{
612	struct record *rec = container_of(tool, struct record, tool);
613	return record__write(rec, NULL, event, event->header.size);
614}
615
616static struct mutex synth_lock;
617
618static int process_locked_synthesized_event(struct perf_tool *tool,
619				     union perf_event *event,
620				     struct perf_sample *sample __maybe_unused,
621				     struct machine *machine __maybe_unused)
622{
623	int ret;
624
625	mutex_lock(&synth_lock);
626	ret = process_synthesized_event(tool, event, sample, machine);
627	mutex_unlock(&synth_lock);
628	return ret;
629}
630
631static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
632{
633	struct record *rec = to;
634
635	if (record__comp_enabled(rec)) {
636		size = zstd_compress(rec->session, map, map->data, mmap__mmap_len(map), bf, size);
637		bf   = map->data;
638	}
639
640	thread->samples++;
641	return record__write(rec, map, bf, size);
642}
643
644static volatile sig_atomic_t signr = -1;
645static volatile sig_atomic_t child_finished;
646#ifdef HAVE_EVENTFD_SUPPORT
647static volatile sig_atomic_t done_fd = -1;
648#endif
649
650static void sig_handler(int sig)
651{
652	if (sig == SIGCHLD)
653		child_finished = 1;
654	else
655		signr = sig;
656
657	done = 1;
658#ifdef HAVE_EVENTFD_SUPPORT
659	if (done_fd >= 0) {
660		u64 tmp = 1;
661		int orig_errno = errno;
662
663		/*
664		 * It is possible for this signal handler to run after done is
665		 * checked in the main loop, but before the perf counter fds are
666		 * polled. If this happens, the poll() will continue to wait
667		 * even though done is set, and will only break out if either
668		 * another signal is received, or the counters are ready for
669		 * read. To ensure the poll() doesn't sleep when done is set,
670		 * use an eventfd (done_fd) to wake up the poll().
671		 */
672		if (write(done_fd, &tmp, sizeof(tmp)) < 0)
673			pr_err("failed to signal wakeup fd, error: %m\n");
674
675		errno = orig_errno;
676	}
677#endif // HAVE_EVENTFD_SUPPORT
678}
679
680static void sigsegv_handler(int sig)
681{
682	perf_hooks__recover();
683	sighandler_dump_stack(sig);
684}
685
686static void record__sig_exit(void)
687{
688	if (signr == -1)
689		return;
690
691	signal(signr, SIG_DFL);
692	raise(signr);
693}
694
695#ifdef HAVE_AUXTRACE_SUPPORT
696
697static int record__process_auxtrace(struct perf_tool *tool,
698				    struct mmap *map,
699				    union perf_event *event, void *data1,
700				    size_t len1, void *data2, size_t len2)
701{
702	struct record *rec = container_of(tool, struct record, tool);
703	struct perf_data *data = &rec->data;
704	size_t padding;
705	u8 pad[8] = {0};
706
707	if (!perf_data__is_pipe(data) && perf_data__is_single_file(data)) {
708		off_t file_offset;
709		int fd = perf_data__fd(data);
710		int err;
711
712		file_offset = lseek(fd, 0, SEEK_CUR);
713		if (file_offset == -1)
714			return -1;
715		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
716						     event, file_offset);
717		if (err)
718			return err;
719	}
720
721	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
722	padding = (len1 + len2) & 7;
723	if (padding)
724		padding = 8 - padding;
725
726	record__write(rec, map, event, event->header.size);
727	record__write(rec, map, data1, len1);
728	if (len2)
729		record__write(rec, map, data2, len2);
730	record__write(rec, map, &pad, padding);
731
732	return 0;
733}
734
735static int record__auxtrace_mmap_read(struct record *rec,
736				      struct mmap *map)
737{
738	int ret;
739
740	ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
741				  record__process_auxtrace);
742	if (ret < 0)
743		return ret;
744
745	if (ret)
746		rec->samples++;
747
748	return 0;
749}
750
751static int record__auxtrace_mmap_read_snapshot(struct record *rec,
752					       struct mmap *map)
753{
754	int ret;
755
756	ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
757					   record__process_auxtrace,
758					   rec->opts.auxtrace_snapshot_size);
759	if (ret < 0)
760		return ret;
761
762	if (ret)
763		rec->samples++;
764
765	return 0;
766}
767
768static int record__auxtrace_read_snapshot_all(struct record *rec)
769{
770	int i;
771	int rc = 0;
772
773	for (i = 0; i < rec->evlist->core.nr_mmaps; i++) {
774		struct mmap *map = &rec->evlist->mmap[i];
775
776		if (!map->auxtrace_mmap.base)
777			continue;
778
779		if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
780			rc = -1;
781			goto out;
782		}
783	}
784out:
785	return rc;
786}
787
788static void record__read_auxtrace_snapshot(struct record *rec, bool on_exit)
789{
790	pr_debug("Recording AUX area tracing snapshot\n");
791	if (record__auxtrace_read_snapshot_all(rec) < 0) {
792		trigger_error(&auxtrace_snapshot_trigger);
793	} else {
794		if (auxtrace_record__snapshot_finish(rec->itr, on_exit))
795			trigger_error(&auxtrace_snapshot_trigger);
796		else
797			trigger_ready(&auxtrace_snapshot_trigger);
798	}
799}
800
801static int record__auxtrace_snapshot_exit(struct record *rec)
802{
803	if (trigger_is_error(&auxtrace_snapshot_trigger))
804		return 0;
805
806	if (!auxtrace_record__snapshot_started &&
807	    auxtrace_record__snapshot_start(rec->itr))
808		return -1;
809
810	record__read_auxtrace_snapshot(rec, true);
811	if (trigger_is_error(&auxtrace_snapshot_trigger))
812		return -1;
813
814	return 0;
815}
816
817static int record__auxtrace_init(struct record *rec)
818{
819	int err;
820
821	if ((rec->opts.auxtrace_snapshot_opts || rec->opts.auxtrace_sample_opts)
822	    && record__threads_enabled(rec)) {
823		pr_err("AUX area tracing options are not available in parallel streaming mode.\n");
824		return -EINVAL;
825	}
826
827	if (!rec->itr) {
828		rec->itr = auxtrace_record__init(rec->evlist, &err);
829		if (err)
830			return err;
831	}
832
833	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
834					      rec->opts.auxtrace_snapshot_opts);
835	if (err)
836		return err;
837
838	err = auxtrace_parse_sample_options(rec->itr, rec->evlist, &rec->opts,
839					    rec->opts.auxtrace_sample_opts);
840	if (err)
841		return err;
842
843	auxtrace_regroup_aux_output(rec->evlist);
844
845	return auxtrace_parse_filters(rec->evlist);
846}
847
848#else
849
850static inline
851int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
852			       struct mmap *map __maybe_unused)
853{
854	return 0;
855}
856
857static inline
858void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
859				    bool on_exit __maybe_unused)
860{
861}
862
863static inline
864int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
865{
866	return 0;
867}
868
869static inline
870int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
871{
872	return 0;
873}
874
875static int record__auxtrace_init(struct record *rec __maybe_unused)
876{
877	return 0;
878}
879
880#endif
881
882static int record__config_text_poke(struct evlist *evlist)
883{
884	struct evsel *evsel;
885
886	/* Nothing to do if text poke is already configured */
887	evlist__for_each_entry(evlist, evsel) {
888		if (evsel->core.attr.text_poke)
889			return 0;
890	}
891
892	evsel = evlist__add_dummy_on_all_cpus(evlist);
893	if (!evsel)
894		return -ENOMEM;
895
896	evsel->core.attr.text_poke = 1;
897	evsel->core.attr.ksymbol = 1;
898	evsel->immediate = true;
899	evsel__set_sample_bit(evsel, TIME);
900
901	return 0;
902}
903
904static int record__config_off_cpu(struct record *rec)
905{
906	return off_cpu_prepare(rec->evlist, &rec->opts.target, &rec->opts);
907}
908
909static bool record__kcore_readable(struct machine *machine)
910{
911	char kcore[PATH_MAX];
912	int fd;
913
914	scnprintf(kcore, sizeof(kcore), "%s/proc/kcore", machine->root_dir);
915
916	fd = open(kcore, O_RDONLY);
917	if (fd < 0)
918		return false;
919
920	close(fd);
921
922	return true;
923}
924
925static int record__kcore_copy(struct machine *machine, struct perf_data *data)
926{
927	char from_dir[PATH_MAX];
928	char kcore_dir[PATH_MAX];
929	int ret;
930
931	snprintf(from_dir, sizeof(from_dir), "%s/proc", machine->root_dir);
932
933	ret = perf_data__make_kcore_dir(data, kcore_dir, sizeof(kcore_dir));
934	if (ret)
935		return ret;
936
937	return kcore_copy(from_dir, kcore_dir);
938}
939
940static void record__thread_data_init_pipes(struct record_thread *thread_data)
941{
942	thread_data->pipes.msg[0] = -1;
943	thread_data->pipes.msg[1] = -1;
944	thread_data->pipes.ack[0] = -1;
945	thread_data->pipes.ack[1] = -1;
946}
947
948static int record__thread_data_open_pipes(struct record_thread *thread_data)
949{
950	if (pipe(thread_data->pipes.msg))
951		return -EINVAL;
952
953	if (pipe(thread_data->pipes.ack)) {
954		close(thread_data->pipes.msg[0]);
955		thread_data->pipes.msg[0] = -1;
956		close(thread_data->pipes.msg[1]);
957		thread_data->pipes.msg[1] = -1;
958		return -EINVAL;
959	}
960
961	pr_debug2("thread_data[%p]: msg=[%d,%d], ack=[%d,%d]\n", thread_data,
962		 thread_data->pipes.msg[0], thread_data->pipes.msg[1],
963		 thread_data->pipes.ack[0], thread_data->pipes.ack[1]);
964
965	return 0;
966}
967
968static void record__thread_data_close_pipes(struct record_thread *thread_data)
969{
970	if (thread_data->pipes.msg[0] != -1) {
971		close(thread_data->pipes.msg[0]);
972		thread_data->pipes.msg[0] = -1;
973	}
974	if (thread_data->pipes.msg[1] != -1) {
975		close(thread_data->pipes.msg[1]);
976		thread_data->pipes.msg[1] = -1;
977	}
978	if (thread_data->pipes.ack[0] != -1) {
979		close(thread_data->pipes.ack[0]);
980		thread_data->pipes.ack[0] = -1;
981	}
982	if (thread_data->pipes.ack[1] != -1) {
983		close(thread_data->pipes.ack[1]);
984		thread_data->pipes.ack[1] = -1;
985	}
986}
987
988static bool evlist__per_thread(struct evlist *evlist)
989{
990	return cpu_map__is_dummy(evlist->core.user_requested_cpus);
991}
992
993static int record__thread_data_init_maps(struct record_thread *thread_data, struct evlist *evlist)
994{
995	int m, tm, nr_mmaps = evlist->core.nr_mmaps;
996	struct mmap *mmap = evlist->mmap;
997	struct mmap *overwrite_mmap = evlist->overwrite_mmap;
998	struct perf_cpu_map *cpus = evlist->core.all_cpus;
999	bool per_thread = evlist__per_thread(evlist);
1000
1001	if (per_thread)
1002		thread_data->nr_mmaps = nr_mmaps;
1003	else
1004		thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
1005						      thread_data->mask->maps.nbits);
1006	if (mmap) {
1007		thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1008		if (!thread_data->maps)
1009			return -ENOMEM;
1010	}
1011	if (overwrite_mmap) {
1012		thread_data->overwrite_maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
1013		if (!thread_data->overwrite_maps) {
1014			zfree(&thread_data->maps);
1015			return -ENOMEM;
1016		}
1017	}
1018	pr_debug2("thread_data[%p]: nr_mmaps=%d, maps=%p, ow_maps=%p\n", thread_data,
1019		 thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);
1020
1021	for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
1022		if (per_thread ||
1023		    test_bit(perf_cpu_map__cpu(cpus, m).cpu, thread_data->mask->maps.bits)) {
1024			if (thread_data->maps) {
1025				thread_data->maps[tm] = &mmap[m];
1026				pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
1027					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1028			}
1029			if (thread_data->overwrite_maps) {
1030				thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
1031				pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
1032					  thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
1033			}
1034			tm++;
1035		}
1036	}
1037
1038	return 0;
1039}
1040
1041static int record__thread_data_init_pollfd(struct record_thread *thread_data, struct evlist *evlist)
1042{
1043	int f, tm, pos;
1044	struct mmap *map, *overwrite_map;
1045
1046	fdarray__init(&thread_data->pollfd, 64);
1047
1048	for (tm = 0; tm < thread_data->nr_mmaps; tm++) {
1049		map = thread_data->maps ? thread_data->maps[tm] : NULL;
1050		overwrite_map = thread_data->overwrite_maps ?
1051				thread_data->overwrite_maps[tm] : NULL;
1052
1053		for (f = 0; f < evlist->core.pollfd.nr; f++) {
1054			void *ptr = evlist->core.pollfd.priv[f].ptr;
1055
1056			if ((map && ptr == map) || (overwrite_map && ptr == overwrite_map)) {
1057				pos = fdarray__dup_entry_from(&thread_data->pollfd, f,
1058							      &evlist->core.pollfd);
1059				if (pos < 0)
1060					return pos;
1061				pr_debug2("thread_data[%p]: pollfd[%d] <- event_fd=%d\n",
1062					 thread_data, pos, evlist->core.pollfd.entries[f].fd);
1063			}
1064		}
1065	}
1066
1067	return 0;
1068}
1069
1070static void record__free_thread_data(struct record *rec)
1071{
1072	int t;
1073	struct record_thread *thread_data = rec->thread_data;
1074
1075	if (thread_data == NULL)
1076		return;
1077
1078	for (t = 0; t < rec->nr_threads; t++) {
1079		record__thread_data_close_pipes(&thread_data[t]);
1080		zfree(&thread_data[t].maps);
1081		zfree(&thread_data[t].overwrite_maps);
1082		fdarray__exit(&thread_data[t].pollfd);
1083	}
1084
1085	zfree(&rec->thread_data);
1086}
1087
1088static int record__map_thread_evlist_pollfd_indexes(struct record *rec,
1089						    int evlist_pollfd_index,
1090						    int thread_pollfd_index)
1091{
1092	size_t x = rec->index_map_cnt;
1093
1094	if (realloc_array_as_needed(rec->index_map, rec->index_map_sz, x, NULL))
1095		return -ENOMEM;
1096	rec->index_map[x].evlist_pollfd_index = evlist_pollfd_index;
1097	rec->index_map[x].thread_pollfd_index = thread_pollfd_index;
1098	rec->index_map_cnt += 1;
1099	return 0;
1100}
1101
1102static int record__update_evlist_pollfd_from_thread(struct record *rec,
1103						    struct evlist *evlist,
1104						    struct record_thread *thread_data)
1105{
1106	struct pollfd *e_entries = evlist->core.pollfd.entries;
1107	struct pollfd *t_entries = thread_data->pollfd.entries;
1108	int err = 0;
1109	size_t i;
1110
1111	for (i = 0; i < rec->index_map_cnt; i++) {
1112		int e_pos = rec->index_map[i].evlist_pollfd_index;
1113		int t_pos = rec->index_map[i].thread_pollfd_index;
1114
1115		if (e_entries[e_pos].fd != t_entries[t_pos].fd ||
1116		    e_entries[e_pos].events != t_entries[t_pos].events) {
1117			pr_err("Thread and evlist pollfd index mismatch\n");
1118			err = -EINVAL;
1119			continue;
1120		}
1121		e_entries[e_pos].revents = t_entries[t_pos].revents;
1122	}
1123	return err;
1124}
1125
1126static int record__dup_non_perf_events(struct record *rec,
1127				       struct evlist *evlist,
1128				       struct record_thread *thread_data)
1129{
1130	struct fdarray *fda = &evlist->core.pollfd;
1131	int i, ret;
1132
1133	for (i = 0; i < fda->nr; i++) {
1134		if (!(fda->priv[i].flags & fdarray_flag__non_perf_event))
1135			continue;
1136		ret = fdarray__dup_entry_from(&thread_data->pollfd, i, fda);
1137		if (ret < 0) {
1138			pr_err("Failed to duplicate descriptor in main thread pollfd\n");
1139			return ret;
1140		}
1141		pr_debug2("thread_data[%p]: pollfd[%d] <- non_perf_event fd=%d\n",
1142			  thread_data, ret, fda->entries[i].fd);
1143		ret = record__map_thread_evlist_pollfd_indexes(rec, i, ret);
1144		if (ret < 0) {
1145			pr_err("Failed to map thread and evlist pollfd indexes\n");
1146			return ret;
1147		}
1148	}
1149	return 0;
1150}
1151
1152static int record__alloc_thread_data(struct record *rec, struct evlist *evlist)
1153{
1154	int t, ret;
1155	struct record_thread *thread_data;
1156
1157	rec->thread_data = zalloc(rec->nr_threads * sizeof(*(rec->thread_data)));
1158	if (!rec->thread_data) {
1159		pr_err("Failed to allocate thread data\n");
1160		return -ENOMEM;
1161	}
1162	thread_data = rec->thread_data;
1163
1164	for (t = 0; t < rec->nr_threads; t++)
1165		record__thread_data_init_pipes(&thread_data[t]);
1166
1167	for (t = 0; t < rec->nr_threads; t++) {
1168		thread_data[t].rec = rec;
1169		thread_data[t].mask = &rec->thread_masks[t];
1170		ret = record__thread_data_init_maps(&thread_data[t], evlist);
1171		if (ret) {
1172			pr_err("Failed to initialize thread[%d] maps\n", t);
1173			goto out_free;
1174		}
1175		ret = record__thread_data_init_pollfd(&thread_data[t], evlist);
1176		if (ret) {
1177			pr_err("Failed to initialize thread[%d] pollfd\n", t);
1178			goto out_free;
1179		}
1180		if (t) {
1181			thread_data[t].tid = -1;
1182			ret = record__thread_data_open_pipes(&thread_data[t]);
1183			if (ret) {
1184				pr_err("Failed to open thread[%d] communication pipes\n", t);
1185				goto out_free;
1186			}
1187			ret = fdarray__add(&thread_data[t].pollfd, thread_data[t].pipes.msg[0],
1188					   POLLIN | POLLERR | POLLHUP, fdarray_flag__nonfilterable);
1189			if (ret < 0) {
1190				pr_err("Failed to add descriptor to thread[%d] pollfd\n", t);
1191				goto out_free;
1192			}
1193			thread_data[t].ctlfd_pos = ret;
1194			pr_debug2("thread_data[%p]: pollfd[%d] <- ctl_fd=%d\n",
1195				 thread_data, thread_data[t].ctlfd_pos,
1196				 thread_data[t].pipes.msg[0]);
1197		} else {
1198			thread_data[t].tid = gettid();
1199
1200			ret = record__dup_non_perf_events(rec, evlist, &thread_data[t]);
1201			if (ret < 0)
1202				goto out_free;
1203
1204			thread_data[t].ctlfd_pos = -1; /* Not used */
1205		}
1206	}
1207
1208	return 0;
1209
1210out_free:
1211	record__free_thread_data(rec);
1212
1213	return ret;
1214}
1215
1216static int record__mmap_evlist(struct record *rec,
1217			       struct evlist *evlist)
1218{
1219	int i, ret;
1220	struct record_opts *opts = &rec->opts;
1221	bool auxtrace_overwrite = opts->auxtrace_snapshot_mode ||
1222				  opts->auxtrace_sample_mode;
1223	char msg[512];
1224
1225	if (opts->affinity != PERF_AFFINITY_SYS)
1226		cpu__setup_cpunode_map();
1227
1228	if (evlist__mmap_ex(evlist, opts->mmap_pages,
1229				 opts->auxtrace_mmap_pages,
1230				 auxtrace_overwrite,
1231				 opts->nr_cblocks, opts->affinity,
1232				 opts->mmap_flush, opts->comp_level) < 0) {
1233		if (errno == EPERM) {
1234			pr_err("Permission error mapping pages.\n"
1235			       "Consider increasing "
1236			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
1237			       "or try again with a smaller value of -m/--mmap_pages.\n"
1238			       "(current value: %u,%u)\n",
1239			       opts->mmap_pages, opts->auxtrace_mmap_pages);
1240			return -errno;
1241		} else {
1242			pr_err("failed to mmap with %d (%s)\n", errno,
1243				str_error_r(errno, msg, sizeof(msg)));
1244			if (errno)
1245				return -errno;
1246			else
1247				return -EINVAL;
1248		}
1249	}
1250
1251	if (evlist__initialize_ctlfd(evlist, opts->ctl_fd, opts->ctl_fd_ack))
1252		return -1;
1253
1254	ret = record__alloc_thread_data(rec, evlist);
1255	if (ret)
1256		return ret;
1257
1258	if (record__threads_enabled(rec)) {
1259		ret = perf_data__create_dir(&rec->data, evlist->core.nr_mmaps);
1260		if (ret) {
1261			pr_err("Failed to create data directory: %s\n", strerror(-ret));
1262			return ret;
1263		}
1264		for (i = 0; i < evlist->core.nr_mmaps; i++) {
1265			if (evlist->mmap)
1266				evlist->mmap[i].file = &rec->data.dir.files[i];
1267			if (evlist->overwrite_mmap)
1268				evlist->overwrite_mmap[i].file = &rec->data.dir.files[i];
1269		}
1270	}
1271
1272	return 0;
1273}
1274
1275static int record__mmap(struct record *rec)
1276{
1277	return record__mmap_evlist(rec, rec->evlist);
1278}
1279
1280static int record__open(struct record *rec)
1281{
1282	char msg[BUFSIZ];
1283	struct evsel *pos;
1284	struct evlist *evlist = rec->evlist;
1285	struct perf_session *session = rec->session;
1286	struct record_opts *opts = &rec->opts;
1287	int rc = 0;
1288
1289	/*
1290	 * For initial_delay, system wide or a hybrid system, we need to add a
1291	 * dummy event so that we can track PERF_RECORD_MMAP to cover the delay
1292	 * of waiting or event synthesis.
1293	 */
1294	if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
1295	    perf_pmus__num_core_pmus() > 1) {
1296		pos = evlist__get_tracking_event(evlist);
1297		if (!evsel__is_dummy_event(pos)) {
1298			/* Set up dummy event. */
1299			if (evlist__add_dummy(evlist))
1300				return -ENOMEM;
1301			pos = evlist__last(evlist);
1302			evlist__set_tracking_event(evlist, pos);
1303		}
1304
1305		/*
1306		 * Enable the dummy event when the process is forked for
1307		 * initial_delay, immediately for system wide.
1308		 */
1309		if (opts->target.initial_delay && !pos->immediate &&
1310		    !target__has_cpu(&opts->target))
1311			pos->core.attr.enable_on_exec = 1;
1312		else
1313			pos->immediate = 1;
1314	}
1315
1316	evlist__config(evlist, opts, &callchain_param);
1317
1318	evlist__for_each_entry(evlist, pos) {
1319try_again:
1320		if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
1321			if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
1322				if (verbose > 0)
1323					ui__warning("%s\n", msg);
1324				goto try_again;
1325			}
1326			if ((errno == EINVAL || errno == EBADF) &&
1327			    pos->core.leader != &pos->core &&
1328			    pos->weak_group) {
1329			        pos = evlist__reset_weak_group(evlist, pos, true);
1330				goto try_again;
1331			}
1332			rc = -errno;
1333			evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
1334			ui__error("%s\n", msg);
1335			goto out;
1336		}
1337
1338		pos->supported = true;
1339	}
1340
1341	if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
1342		pr_warning(
1343"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1344"check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n"
1345"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1346"file is not found in the buildid cache or in the vmlinux path.\n\n"
1347"Samples in kernel modules won't be resolved at all.\n\n"
1348"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1349"even with a suitable vmlinux or kallsyms file.\n\n");
1350	}
1351
1352	if (evlist__apply_filters(evlist, &pos)) {
1353		pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
1354			pos->filter ?: "BPF", evsel__name(pos), errno,
1355			str_error_r(errno, msg, sizeof(msg)));
1356		rc = -1;
1357		goto out;
1358	}
1359
1360	rc = record__mmap(rec);
1361	if (rc)
1362		goto out;
1363
1364	session->evlist = evlist;
1365	perf_session__set_id_hdr_size(session);
1366out:
1367	return rc;
1368}
1369
1370static void set_timestamp_boundary(struct record *rec, u64 sample_time)
1371{
1372	if (rec->evlist->first_sample_time == 0)
1373		rec->evlist->first_sample_time = sample_time;
1374
1375	if (sample_time)
1376		rec->evlist->last_sample_time = sample_time;
1377}
1378
1379static int process_sample_event(struct perf_tool *tool,
1380				union perf_event *event,
1381				struct perf_sample *sample,
1382				struct evsel *evsel,
1383				struct machine *machine)
1384{
1385	struct record *rec = container_of(tool, struct record, tool);
1386
1387	set_timestamp_boundary(rec, sample->time);
1388
1389	if (rec->buildid_all)
1390		return 0;
1391
1392	rec->samples++;
1393	return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
1394}
1395
1396static int process_buildids(struct record *rec)
1397{
1398	struct perf_session *session = rec->session;
1399
1400	if (perf_data__size(&rec->data) == 0)
1401		return 0;
1402
1403	/*
1404	 * During this process, it'll load kernel map and replace the
1405	 * dso->long_name to a real pathname it found.  In this case
1406	 * we prefer the vmlinux path like
1407	 *   /lib/modules/3.16.4/build/vmlinux
1408	 *
1409	 * rather than build-id path (in debug directory).
1410	 *   $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
1411	 */
1412	symbol_conf.ignore_vmlinux_buildid = true;
1413
1414	/*
1415	 * If --buildid-all is given, it marks all DSO regardless of hits,
1416	 * so no need to process samples. But if timestamp_boundary is enabled,
1417	 * it still needs to walk on all samples to get the timestamps of
1418	 * first/last samples.
1419	 */
1420	if (rec->buildid_all && !rec->timestamp_boundary)
1421		rec->tool.sample = NULL;
1422
1423	return perf_session__process_events(session);
1424}
1425
1426static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
1427{
1428	int err;
1429	struct perf_tool *tool = data;
1430	/*
1431	 *As for guest kernel when processing subcommand record&report,
1432	 *we arrange module mmap prior to guest kernel mmap and trigger
1433	 *a preload dso because default guest module symbols are loaded
1434	 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
1435	 *method is used to avoid symbol missing when the first addr is
1436	 *in module instead of in guest kernel.
1437	 */
1438	err = perf_event__synthesize_modules(tool, process_synthesized_event,
1439					     machine);
1440	if (err < 0)
1441		pr_err("Couldn't record guest kernel [%d]'s reference"
1442		       " relocation symbol.\n", machine->pid);
1443
1444	/*
1445	 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
1446	 * have no _text sometimes.
1447	 */
1448	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1449						 machine);
1450	if (err < 0)
1451		pr_err("Couldn't record guest kernel [%d]'s reference"
1452		       " relocation symbol.\n", machine->pid);
1453}
1454
1455static struct perf_event_header finished_round_event = {
1456	.size = sizeof(struct perf_event_header),
1457	.type = PERF_RECORD_FINISHED_ROUND,
1458};
1459
1460static struct perf_event_header finished_init_event = {
1461	.size = sizeof(struct perf_event_header),
1462	.type = PERF_RECORD_FINISHED_INIT,
1463};
1464
1465static void record__adjust_affinity(struct record *rec, struct mmap *map)
1466{
1467	if (rec->opts.affinity != PERF_AFFINITY_SYS &&
1468	    !bitmap_equal(thread->mask->affinity.bits, map->affinity_mask.bits,
1469			  thread->mask->affinity.nbits)) {
1470		bitmap_zero(thread->mask->affinity.bits, thread->mask->affinity.nbits);
1471		bitmap_or(thread->mask->affinity.bits, thread->mask->affinity.bits,
1472			  map->affinity_mask.bits, thread->mask->affinity.nbits);
1473		sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
1474					(cpu_set_t *)thread->mask->affinity.bits);
1475		if (verbose == 2) {
1476			pr_debug("threads[%d]: running on cpu%d: ", thread->tid, sched_getcpu());
1477			mmap_cpu_mask__scnprintf(&thread->mask->affinity, "affinity");
1478		}
1479	}
1480}
1481
1482static size_t process_comp_header(void *record, size_t increment)
1483{
1484	struct perf_record_compressed *event = record;
1485	size_t size = sizeof(*event);
1486
1487	if (increment) {
1488		event->header.size += increment;
1489		return increment;
1490	}
1491
1492	event->header.type = PERF_RECORD_COMPRESSED;
1493	event->header.size = size;
1494
1495	return size;
1496}
1497
1498static size_t zstd_compress(struct perf_session *session, struct mmap *map,
1499			    void *dst, size_t dst_size, void *src, size_t src_size)
1500{
1501	size_t compressed;
1502	size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_record_compressed) - 1;
1503	struct zstd_data *zstd_data = &session->zstd_data;
1504
1505	if (map && map->file)
1506		zstd_data = &map->zstd_data;
1507
1508	compressed = zstd_compress_stream_to_records(zstd_data, dst, dst_size, src, src_size,
1509						     max_record_size, process_comp_header);
1510
1511	if (map && map->file) {
1512		thread->bytes_transferred += src_size;
1513		thread->bytes_compressed  += compressed;
1514	} else {
1515		session->bytes_transferred += src_size;
1516		session->bytes_compressed  += compressed;
1517	}
1518
1519	return compressed;
1520}
1521
1522static int record__mmap_read_evlist(struct record *rec, struct evlist *evlist,
1523				    bool overwrite, bool synch)
1524{
1525	u64 bytes_written = rec->bytes_written;
1526	int i;
1527	int rc = 0;
1528	int nr_mmaps;
1529	struct mmap **maps;
1530	int trace_fd = rec->data.file.fd;
1531	off_t off = 0;
1532
1533	if (!evlist)
1534		return 0;
1535
1536	nr_mmaps = thread->nr_mmaps;
1537	maps = overwrite ? thread->overwrite_maps : thread->maps;
1538
1539	if (!maps)
1540		return 0;
1541
1542	if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
1543		return 0;
1544
1545	if (record__aio_enabled(rec))
1546		off = record__aio_get_pos(trace_fd);
1547
1548	for (i = 0; i < nr_mmaps; i++) {
1549		u64 flush = 0;
1550		struct mmap *map = maps[i];
1551
1552		if (map->core.base) {
1553			record__adjust_affinity(rec, map);
1554			if (synch) {
1555				flush = map->core.flush;
1556				map->core.flush = 1;
1557			}
1558			if (!record__aio_enabled(rec)) {
1559				if (perf_mmap__push(map, rec, record__pushfn) < 0) {
1560					if (synch)
1561						map->core.flush = flush;
1562					rc = -1;
1563					goto out;
1564				}
1565			} else {
1566				if (record__aio_push(rec, map, &off) < 0) {
1567					record__aio_set_pos(trace_fd, off);
1568					if (synch)
1569						map->core.flush = flush;
1570					rc = -1;
1571					goto out;
1572				}
1573			}
1574			if (synch)
1575				map->core.flush = flush;
1576		}
1577
1578		if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
1579		    !rec->opts.auxtrace_sample_mode &&
1580		    record__auxtrace_mmap_read(rec, map) != 0) {
1581			rc = -1;
1582			goto out;
1583		}
1584	}
1585
1586	if (record__aio_enabled(rec))
1587		record__aio_set_pos(trace_fd, off);
1588
1589	/*
1590	 * Mark the round finished in case we wrote
1591	 * at least one event.
1592	 *
1593	 * No need for round events in directory mode,
1594	 * because per-cpu maps and files have data
1595	 * sorted by kernel.
1596	 */
1597	if (!record__threads_enabled(rec) && bytes_written != rec->bytes_written)
1598		rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
1599
1600	if (overwrite)
1601		evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
1602out:
1603	return rc;
1604}
1605
1606static int record__mmap_read_all(struct record *rec, bool synch)
1607{
1608	int err;
1609
1610	err = record__mmap_read_evlist(rec, rec->evlist, false, synch);
1611	if (err)
1612		return err;
1613
1614	return record__mmap_read_evlist(rec, rec->evlist, true, synch);
1615}
1616
1617static void record__thread_munmap_filtered(struct fdarray *fda, int fd,
1618					   void *arg __maybe_unused)
1619{
1620	struct perf_mmap *map = fda->priv[fd].ptr;
1621
1622	if (map)
1623		perf_mmap__put(map);
1624}
1625
1626static void *record__thread(void *arg)
1627{
1628	enum thread_msg msg = THREAD_MSG__READY;
1629	bool terminate = false;
1630	struct fdarray *pollfd;
1631	int err, ctlfd_pos;
1632
1633	thread = arg;
1634	thread->tid = gettid();
1635
1636	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1637	if (err == -1)
1638		pr_warning("threads[%d]: failed to notify on start: %s\n",
1639			   thread->tid, strerror(errno));
1640
1641	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
1642
1643	pollfd = &thread->pollfd;
1644	ctlfd_pos = thread->ctlfd_pos;
1645
1646	for (;;) {
1647		unsigned long long hits = thread->samples;
1648
1649		if (record__mmap_read_all(thread->rec, false) < 0 || terminate)
1650			break;
1651
1652		if (hits == thread->samples) {
1653
1654			err = fdarray__poll(pollfd, -1);
1655			/*
1656			 * Propagate error, only if there's any. Ignore positive
1657			 * number of returned events and interrupt error.
1658			 */
1659			if (err > 0 || (err < 0 && errno == EINTR))
1660				err = 0;
1661			thread->waking++;
1662
1663			if (fdarray__filter(pollfd, POLLERR | POLLHUP,
1664					    record__thread_munmap_filtered, NULL) == 0)
1665				break;
1666		}
1667
1668		if (pollfd->entries[ctlfd_pos].revents & POLLHUP) {
1669			terminate = true;
1670			close(thread->pipes.msg[0]);
1671			thread->pipes.msg[0] = -1;
1672			pollfd->entries[ctlfd_pos].fd = -1;
1673			pollfd->entries[ctlfd_pos].events = 0;
1674		}
1675
1676		pollfd->entries[ctlfd_pos].revents = 0;
1677	}
1678	record__mmap_read_all(thread->rec, true);
1679
1680	err = write(thread->pipes.ack[1], &msg, sizeof(msg));
1681	if (err == -1)
1682		pr_warning("threads[%d]: failed to notify on termination: %s\n",
1683			   thread->tid, strerror(errno));
1684
1685	return NULL;
1686}
1687
1688static void record__init_features(struct record *rec)
1689{
1690	struct perf_session *session = rec->session;
1691	int feat;
1692
1693	for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
1694		perf_header__set_feat(&session->header, feat);
1695
1696	if (rec->no_buildid)
1697		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
1698
1699#ifdef HAVE_LIBTRACEEVENT
1700	if (!have_tracepoints(&rec->evlist->core.entries))
1701		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
1702#endif
1703
1704	if (!rec->opts.branch_stack)
1705		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
1706
1707	if (!rec->opts.full_auxtrace)
1708		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
1709
1710	if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
1711		perf_header__clear_feat(&session->header, HEADER_CLOCKID);
1712
1713	if (!rec->opts.use_clockid)
1714		perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
1715
1716	if (!record__threads_enabled(rec))
1717		perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1718
1719	if (!record__comp_enabled(rec))
1720		perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
1721
1722	perf_header__clear_feat(&session->header, HEADER_STAT);
1723}
1724
1725static void
1726record__finish_output(struct record *rec)
1727{
1728	int i;
1729	struct perf_data *data = &rec->data;
1730	int fd = perf_data__fd(data);
1731
1732	if (data->is_pipe)
1733		return;
1734
1735	rec->session->header.data_size += rec->bytes_written;
1736	data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
1737	if (record__threads_enabled(rec)) {
1738		for (i = 0; i < data->dir.nr; i++)
1739			data->dir.files[i].size = lseek(data->dir.files[i].fd, 0, SEEK_CUR);
1740	}
1741
1742	if (!rec->no_buildid) {
1743		process_buildids(rec);
1744
1745		if (rec->buildid_all)
1746			dsos__hit_all(rec->session);
1747	}
1748	perf_session__write_header(rec->session, rec->evlist, fd, true);
1749
1750	return;
1751}
1752
1753static int record__synthesize_workload(struct record *rec, bool tail)
1754{
1755	int err;
1756	struct perf_thread_map *thread_map;
1757	bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
1758
1759	if (rec->opts.tail_synthesize != tail)
1760		return 0;
1761
1762	thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
1763	if (thread_map == NULL)
1764		return -1;
1765
1766	err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
1767						 process_synthesized_event,
1768						 &rec->session->machines.host,
1769						 needs_mmap,
1770						 rec->opts.sample_address);
1771	perf_thread_map__put(thread_map);
1772	return err;
1773}
1774
1775static int write_finished_init(struct record *rec, bool tail)
1776{
1777	if (rec->opts.tail_synthesize != tail)
1778		return 0;
1779
1780	return record__write(rec, NULL, &finished_init_event, sizeof(finished_init_event));
1781}
1782
1783static int record__synthesize(struct record *rec, bool tail);
1784
1785static int
1786record__switch_output(struct record *rec, bool at_exit)
1787{
1788	struct perf_data *data = &rec->data;
1789	char *new_filename = NULL;
1790	int fd, err;
1791
1792	/* Same Size:      "2015122520103046"*/
1793	char timestamp[] = "InvalidTimestamp";
1794
1795	record__aio_mmap_read_sync(rec);
1796
1797	write_finished_init(rec, true);
1798
1799	record__synthesize(rec, true);
1800	if (target__none(&rec->opts.target))
1801		record__synthesize_workload(rec, true);
1802
1803	rec->samples = 0;
1804	record__finish_output(rec);
1805	err = fetch_current_timestamp(timestamp, sizeof(timestamp));
1806	if (err) {
1807		pr_err("Failed to get current timestamp\n");
1808		return -EINVAL;
1809	}
1810
1811	fd = perf_data__switch(data, timestamp,
1812				    rec->session->header.data_offset,
1813				    at_exit, &new_filename);
1814	if (fd >= 0 && !at_exit) {
1815		rec->bytes_written = 0;
1816		rec->session->header.data_size = 0;
1817	}
1818
1819	if (!quiet)
1820		fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
1821			data->path, timestamp);
1822
1823	if (rec->switch_output.num_files) {
1824		int n = rec->switch_output.cur_file + 1;
1825
1826		if (n >= rec->switch_output.num_files)
1827			n = 0;
1828		rec->switch_output.cur_file = n;
1829		if (rec->switch_output.filenames[n]) {
1830			remove(rec->switch_output.filenames[n]);
1831			zfree(&rec->switch_output.filenames[n]);
1832		}
1833		rec->switch_output.filenames[n] = new_filename;
1834	} else {
1835		free(new_filename);
1836	}
1837
1838	/* Output tracking events */
1839	if (!at_exit) {
1840		record__synthesize(rec, false);
1841
1842		/*
1843		 * In 'perf record --switch-output' without -a,
1844		 * record__synthesize() in record__switch_output() won't
1845		 * generate tracking events because there's no thread_map
1846		 * in evlist. Which causes newly created perf.data doesn't
1847		 * contain map and comm information.
1848		 * Create a fake thread_map and directly call
1849		 * perf_event__synthesize_thread_map() for those events.
1850		 */
1851		if (target__none(&rec->opts.target))
1852			record__synthesize_workload(rec, false);
1853		write_finished_init(rec, false);
1854	}
1855	return fd;
1856}
1857
1858static void __record__save_lost_samples(struct record *rec, struct evsel *evsel,
1859					struct perf_record_lost_samples *lost,
1860					int cpu_idx, int thread_idx, u64 lost_count,
1861					u16 misc_flag)
1862{
1863	struct perf_sample_id *sid;
1864	struct perf_sample sample = {};
1865	int id_hdr_size;
1866
1867	lost->lost = lost_count;
1868	if (evsel->core.ids) {
1869		sid = xyarray__entry(evsel->core.sample_id, cpu_idx, thread_idx);
1870		sample.id = sid->id;
1871	}
1872
1873	id_hdr_size = perf_event__synthesize_id_sample((void *)(lost + 1),
1874						       evsel->core.attr.sample_type, &sample);
1875	lost->header.size = sizeof(*lost) + id_hdr_size;
1876	lost->header.misc = misc_flag;
1877	record__write(rec, NULL, lost, lost->header.size);
1878}
1879
1880static void record__read_lost_samples(struct record *rec)
1881{
1882	struct perf_session *session = rec->session;
1883	struct perf_record_lost_samples *lost;
1884	struct evsel *evsel;
1885
1886	/* there was an error during record__open */
1887	if (session->evlist == NULL)
1888		return;
1889
1890	lost = zalloc(PERF_SAMPLE_MAX_SIZE);
1891	if (lost == NULL) {
1892		pr_debug("Memory allocation failed\n");
1893		return;
1894	}
1895
1896	lost->header.type = PERF_RECORD_LOST_SAMPLES;
1897
1898	evlist__for_each_entry(session->evlist, evsel) {
1899		struct xyarray *xy = evsel->core.sample_id;
1900		u64 lost_count;
1901
1902		if (xy == NULL || evsel->core.fd == NULL)
1903			continue;
1904		if (xyarray__max_x(evsel->core.fd) != xyarray__max_x(xy) ||
1905		    xyarray__max_y(evsel->core.fd) != xyarray__max_y(xy)) {
1906			pr_debug("Unmatched FD vs. sample ID: skip reading LOST count\n");
1907			continue;
1908		}
1909
1910		for (int x = 0; x < xyarray__max_x(xy); x++) {
1911			for (int y = 0; y < xyarray__max_y(xy); y++) {
1912				struct perf_counts_values count;
1913
1914				if (perf_evsel__read(&evsel->core, x, y, &count) < 0) {
1915					pr_debug("read LOST count failed\n");
1916					goto out;
1917				}
1918
1919				if (count.lost) {
1920					__record__save_lost_samples(rec, evsel, lost,
1921								    x, y, count.lost, 0);
1922				}
1923			}
1924		}
1925
1926		lost_count = perf_bpf_filter__lost_count(evsel);
1927		if (lost_count)
1928			__record__save_lost_samples(rec, evsel, lost, 0, 0, lost_count,
1929						    PERF_RECORD_MISC_LOST_SAMPLES_BPF);
1930	}
1931out:
1932	free(lost);
1933}
1934
1935static volatile sig_atomic_t workload_exec_errno;
1936
1937/*
1938 * evlist__prepare_workload will send a SIGUSR1
1939 * if the fork fails, since we asked by setting its
1940 * want_signal to true.
1941 */
1942static void workload_exec_failed_signal(int signo __maybe_unused,
1943					siginfo_t *info,
1944					void *ucontext __maybe_unused)
1945{
1946	workload_exec_errno = info->si_value.sival_int;
1947	done = 1;
1948	child_finished = 1;
1949}
1950
1951static void snapshot_sig_handler(int sig);
1952static void alarm_sig_handler(int sig);
1953
1954static const struct perf_event_mmap_page *evlist__pick_pc(struct evlist *evlist)
1955{
1956	if (evlist) {
1957		if (evlist->mmap && evlist->mmap[0].core.base)
1958			return evlist->mmap[0].core.base;
1959		if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].core.base)
1960			return evlist->overwrite_mmap[0].core.base;
1961	}
1962	return NULL;
1963}
1964
1965static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1966{
1967	const struct perf_event_mmap_page *pc = evlist__pick_pc(rec->evlist);
1968	if (pc)
1969		return pc;
1970	return NULL;
1971}
1972
1973static int record__synthesize(struct record *rec, bool tail)
1974{
1975	struct perf_session *session = rec->session;
1976	struct machine *machine = &session->machines.host;
1977	struct perf_data *data = &rec->data;
1978	struct record_opts *opts = &rec->opts;
1979	struct perf_tool *tool = &rec->tool;
1980	int err = 0;
1981	event_op f = process_synthesized_event;
1982
1983	if (rec->opts.tail_synthesize != tail)
1984		return 0;
1985
1986	if (data->is_pipe) {
1987		err = perf_event__synthesize_for_pipe(tool, session, data,
1988						      process_synthesized_event);
1989		if (err < 0)
1990			goto out;
1991
1992		rec->bytes_written += err;
1993	}
1994
1995	err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
1996					  process_synthesized_event, machine);
1997	if (err)
1998		goto out;
1999
2000	/* Synthesize id_index before auxtrace_info */
2001	err = perf_event__synthesize_id_index(tool,
2002					      process_synthesized_event,
2003					      session->evlist, machine);
2004	if (err)
2005		goto out;
2006
2007	if (rec->opts.full_auxtrace) {
2008		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
2009					session, process_synthesized_event);
2010		if (err)
2011			goto out;
2012	}
2013
2014	if (!evlist__exclude_kernel(rec->evlist)) {
2015		err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
2016							 machine);
2017		WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
2018				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2019				   "Check /proc/kallsyms permission or run as root.\n");
2020
2021		err = perf_event__synthesize_modules(tool, process_synthesized_event,
2022						     machine);
2023		WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
2024				   "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
2025				   "Check /proc/modules permission or run as root.\n");
2026	}
2027
2028	if (perf_guest) {
2029		machines__process_guests(&session->machines,
2030					 perf_event__synthesize_guest_os, tool);
2031	}
2032
2033	err = perf_event__synthesize_extra_attr(&rec->tool,
2034						rec->evlist,
2035						process_synthesized_event,
2036						data->is_pipe);
2037	if (err)
2038		goto out;
2039
2040	err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->core.threads,
2041						 process_synthesized_event,
2042						NULL);
2043	if (err < 0) {
2044		pr_err("Couldn't synthesize thread map.\n");
2045		return err;
2046	}
2047
2048	err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->core.all_cpus,
2049					     process_synthesized_event, NULL);
2050	if (err < 0) {
2051		pr_err("Couldn't synthesize cpu map.\n");
2052		return err;
2053	}
2054
2055	err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
2056						machine, opts);
2057	if (err < 0) {
2058		pr_warning("Couldn't synthesize bpf events.\n");
2059		err = 0;
2060	}
2061
2062	if (rec->opts.synth & PERF_SYNTH_CGROUP) {
2063		err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
2064						     machine);
2065		if (err < 0) {
2066			pr_warning("Couldn't synthesize cgroup events.\n");
2067			err = 0;
2068		}
2069	}
2070
2071	if (rec->opts.nr_threads_synthesize > 1) {
2072		mutex_init(&synth_lock);
2073		perf_set_multithreaded();
2074		f = process_locked_synthesized_event;
2075	}
2076
2077	if (rec->opts.synth & PERF_SYNTH_TASK) {
2078		bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
2079
2080		err = __machine__synthesize_threads(machine, tool, &opts->target,
2081						    rec->evlist->core.threads,
2082						    f, needs_mmap, opts->sample_address,
2083						    rec->opts.nr_threads_synthesize);
2084	}
2085
2086	if (rec->opts.nr_threads_synthesize > 1) {
2087		perf_set_singlethreaded();
2088		mutex_destroy(&synth_lock);
2089	}
2090
2091out:
2092	return err;
2093}
2094
2095static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
2096{
2097	struct record *rec = data;
2098	pthread_kill(rec->thread_id, SIGUSR2);
2099	return 0;
2100}
2101
2102static int record__setup_sb_evlist(struct record *rec)
2103{
2104	struct record_opts *opts = &rec->opts;
2105
2106	if (rec->sb_evlist != NULL) {
2107		/*
2108		 * We get here if --switch-output-event populated the
2109		 * sb_evlist, so associate a callback that will send a SIGUSR2
2110		 * to the main thread.
2111		 */
2112		evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
2113		rec->thread_id = pthread_self();
2114	}
2115#ifdef HAVE_LIBBPF_SUPPORT
2116	if (!opts->no_bpf_event) {
2117		if (rec->sb_evlist == NULL) {
2118			rec->sb_evlist = evlist__new();
2119
2120			if (rec->sb_evlist == NULL) {
2121				pr_err("Couldn't create side band evlist.\n.");
2122				return -1;
2123			}
2124		}
2125
2126		if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
2127			pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
2128			return -1;
2129		}
2130	}
2131#endif
2132	if (evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
2133		pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
2134		opts->no_bpf_event = true;
2135	}
2136
2137	return 0;
2138}
2139
2140static int record__init_clock(struct record *rec)
2141{
2142	struct perf_session *session = rec->session;
2143	struct timespec ref_clockid;
2144	struct timeval ref_tod;
2145	u64 ref;
2146
2147	if (!rec->opts.use_clockid)
2148		return 0;
2149
2150	if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
2151		session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
2152
2153	session->header.env.clock.clockid = rec->opts.clockid;
2154
2155	if (gettimeofday(&ref_tod, NULL) != 0) {
2156		pr_err("gettimeofday failed, cannot set reference time.\n");
2157		return -1;
2158	}
2159
2160	if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
2161		pr_err("clock_gettime failed, cannot set reference time.\n");
2162		return -1;
2163	}
2164
2165	ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
2166	      (u64) ref_tod.tv_usec * NSEC_PER_USEC;
2167
2168	session->header.env.clock.tod_ns = ref;
2169
2170	ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
2171	      (u64) ref_clockid.tv_nsec;
2172
2173	session->header.env.clock.clockid_ns = ref;
2174	return 0;
2175}
2176
2177static void hit_auxtrace_snapshot_trigger(struct record *rec)
2178{
2179	if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2180		trigger_hit(&auxtrace_snapshot_trigger);
2181		auxtrace_record__snapshot_started = 1;
2182		if (auxtrace_record__snapshot_start(rec->itr))
2183			trigger_error(&auxtrace_snapshot_trigger);
2184	}
2185}
2186
2187static int record__terminate_thread(struct record_thread *thread_data)
2188{
2189	int err;
2190	enum thread_msg ack = THREAD_MSG__UNDEFINED;
2191	pid_t tid = thread_data->tid;
2192
2193	close(thread_data->pipes.msg[1]);
2194	thread_data->pipes.msg[1] = -1;
2195	err = read(thread_data->pipes.ack[0], &ack, sizeof(ack));
2196	if (err > 0)
2197		pr_debug2("threads[%d]: sent %s\n", tid, thread_msg_tags[ack]);
2198	else
2199		pr_warning("threads[%d]: failed to receive termination notification from %d\n",
2200			   thread->tid, tid);
2201
2202	return 0;
2203}
2204
2205static int record__start_threads(struct record *rec)
2206{
2207	int t, tt, err, ret = 0, nr_threads = rec->nr_threads;
2208	struct record_thread *thread_data = rec->thread_data;
2209	sigset_t full, mask;
2210	pthread_t handle;
2211	pthread_attr_t attrs;
2212
2213	thread = &thread_data[0];
2214
2215	if (!record__threads_enabled(rec))
2216		return 0;
2217
2218	sigfillset(&full);
2219	if (sigprocmask(SIG_SETMASK, &full, &mask)) {
2220		pr_err("Failed to block signals on threads start: %s\n", strerror(errno));
2221		return -1;
2222	}
2223
2224	pthread_attr_init(&attrs);
2225	pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
2226
2227	for (t = 1; t < nr_threads; t++) {
2228		enum thread_msg msg = THREAD_MSG__UNDEFINED;
2229
2230#ifdef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
2231		pthread_attr_setaffinity_np(&attrs,
2232					    MMAP_CPU_MASK_BYTES(&(thread_data[t].mask->affinity)),
2233					    (cpu_set_t *)(thread_data[t].mask->affinity.bits));
2234#endif
2235		if (pthread_create(&handle, &attrs, record__thread, &thread_data[t])) {
2236			for (tt = 1; tt < t; tt++)
2237				record__terminate_thread(&thread_data[t]);
2238			pr_err("Failed to start threads: %s\n", strerror(errno));
2239			ret = -1;
2240			goto out_err;
2241		}
2242
2243		err = read(thread_data[t].pipes.ack[0], &msg, sizeof(msg));
2244		if (err > 0)
2245			pr_debug2("threads[%d]: sent %s\n", rec->thread_data[t].tid,
2246				  thread_msg_tags[msg]);
2247		else
2248			pr_warning("threads[%d]: failed to receive start notification from %d\n",
2249				   thread->tid, rec->thread_data[t].tid);
2250	}
2251
2252	sched_setaffinity(0, MMAP_CPU_MASK_BYTES(&thread->mask->affinity),
2253			(cpu_set_t *)thread->mask->affinity.bits);
2254
2255	pr_debug("threads[%d]: started on cpu%d\n", thread->tid, sched_getcpu());
2256
2257out_err:
2258	pthread_attr_destroy(&attrs);
2259
2260	if (sigprocmask(SIG_SETMASK, &mask, NULL)) {
2261		pr_err("Failed to unblock signals on threads start: %s\n", strerror(errno));
2262		ret = -1;
2263	}
2264
2265	return ret;
2266}
2267
2268static int record__stop_threads(struct record *rec)
2269{
2270	int t;
2271	struct record_thread *thread_data = rec->thread_data;
2272
2273	for (t = 1; t < rec->nr_threads; t++)
2274		record__terminate_thread(&thread_data[t]);
2275
2276	for (t = 0; t < rec->nr_threads; t++) {
2277		rec->samples += thread_data[t].samples;
2278		if (!record__threads_enabled(rec))
2279			continue;
2280		rec->session->bytes_transferred += thread_data[t].bytes_transferred;
2281		rec->session->bytes_compressed += thread_data[t].bytes_compressed;
2282		pr_debug("threads[%d]: samples=%lld, wakes=%ld, ", thread_data[t].tid,
2283			 thread_data[t].samples, thread_data[t].waking);
2284		if (thread_data[t].bytes_transferred && thread_data[t].bytes_compressed)
2285			pr_debug("transferred=%" PRIu64 ", compressed=%" PRIu64 "\n",
2286				 thread_data[t].bytes_transferred, thread_data[t].bytes_compressed);
2287		else
2288			pr_debug("written=%" PRIu64 "\n", thread_data[t].bytes_written);
2289	}
2290
2291	return 0;
2292}
2293
2294static unsigned long record__waking(struct record *rec)
2295{
2296	int t;
2297	unsigned long waking = 0;
2298	struct record_thread *thread_data = rec->thread_data;
2299
2300	for (t = 0; t < rec->nr_threads; t++)
2301		waking += thread_data[t].waking;
2302
2303	return waking;
2304}
2305
2306static int __cmd_record(struct record *rec, int argc, const char **argv)
2307{
2308	int err;
2309	int status = 0;
2310	const bool forks = argc > 0;
2311	struct perf_tool *tool = &rec->tool;
2312	struct record_opts *opts = &rec->opts;
2313	struct perf_data *data = &rec->data;
2314	struct perf_session *session;
2315	bool disabled = false, draining = false;
2316	int fd;
2317	float ratio = 0;
2318	enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
2319
2320	atexit(record__sig_exit);
2321	signal(SIGCHLD, sig_handler);
2322	signal(SIGINT, sig_handler);
2323	signal(SIGTERM, sig_handler);
2324	signal(SIGSEGV, sigsegv_handler);
2325
2326	if (rec->opts.record_namespaces)
2327		tool->namespace_events = true;
2328
2329	if (rec->opts.record_cgroup) {
2330#ifdef HAVE_FILE_HANDLE
2331		tool->cgroup_events = true;
2332#else
2333		pr_err("cgroup tracking is not supported\n");
2334		return -1;
2335#endif
2336	}
2337
2338	if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2339		signal(SIGUSR2, snapshot_sig_handler);
2340		if (rec->opts.auxtrace_snapshot_mode)
2341			trigger_on(&auxtrace_snapshot_trigger);
2342		if (rec->switch_output.enabled)
2343			trigger_on(&switch_output_trigger);
2344	} else {
2345		signal(SIGUSR2, SIG_IGN);
2346	}
2347
2348	session = perf_session__new(data, tool);
2349	if (IS_ERR(session)) {
2350		pr_err("Perf session creation failed.\n");
2351		return PTR_ERR(session);
2352	}
2353
2354	if (record__threads_enabled(rec)) {
2355		if (perf_data__is_pipe(&rec->data)) {
2356			pr_err("Parallel trace streaming is not available in pipe mode.\n");
2357			return -1;
2358		}
2359		if (rec->opts.full_auxtrace) {
2360			pr_err("Parallel trace streaming is not available in AUX area tracing mode.\n");
2361			return -1;
2362		}
2363	}
2364
2365	fd = perf_data__fd(data);
2366	rec->session = session;
2367
2368	if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
2369		pr_err("Compression initialization failed.\n");
2370		return -1;
2371	}
2372#ifdef HAVE_EVENTFD_SUPPORT
2373	done_fd = eventfd(0, EFD_NONBLOCK);
2374	if (done_fd < 0) {
2375		pr_err("Failed to create wakeup eventfd, error: %m\n");
2376		status = -1;
2377		goto out_delete_session;
2378	}
2379	err = evlist__add_wakeup_eventfd(rec->evlist, done_fd);
2380	if (err < 0) {
2381		pr_err("Failed to add wakeup eventfd to poll list\n");
2382		status = err;
2383		goto out_delete_session;
2384	}
2385#endif // HAVE_EVENTFD_SUPPORT
2386
2387	session->header.env.comp_type  = PERF_COMP_ZSTD;
2388	session->header.env.comp_level = rec->opts.comp_level;
2389
2390	if (rec->opts.kcore &&
2391	    !record__kcore_readable(&session->machines.host)) {
2392		pr_err("ERROR: kcore is not readable.\n");
2393		return -1;
2394	}
2395
2396	if (record__init_clock(rec))
2397		return -1;
2398
2399	record__init_features(rec);
2400
2401	if (forks) {
2402		err = evlist__prepare_workload(rec->evlist, &opts->target, argv, data->is_pipe,
2403					       workload_exec_failed_signal);
2404		if (err < 0) {
2405			pr_err("Couldn't run the workload!\n");
2406			status = err;
2407			goto out_delete_session;
2408		}
2409	}
2410
2411	/*
2412	 * If we have just single event and are sending data
2413	 * through pipe, we need to force the ids allocation,
2414	 * because we synthesize event name through the pipe
2415	 * and need the id for that.
2416	 */
2417	if (data->is_pipe && rec->evlist->core.nr_entries == 1)
2418		rec->opts.sample_id = true;
2419
2420	if (rec->timestamp_filename && perf_data__is_pipe(data)) {
2421		rec->timestamp_filename = false;
2422		pr_warning("WARNING: --timestamp-filename option is not available in pipe mode.\n");
2423	}
2424
2425	evlist__uniquify_name(rec->evlist);
2426
2427	/* Debug message used by test scripts */
2428	pr_debug3("perf record opening and mmapping events\n");
2429	if (record__open(rec) != 0) {
2430		err = -1;
2431		goto out_free_threads;
2432	}
2433	/* Debug message used by test scripts */
2434	pr_debug3("perf record done opening and mmapping events\n");
2435	session->header.env.comp_mmap_len = session->evlist->core.mmap_len;
2436
2437	if (rec->opts.kcore) {
2438		err = record__kcore_copy(&session->machines.host, data);
2439		if (err) {
2440			pr_err("ERROR: Failed to copy kcore\n");
2441			goto out_free_threads;
2442		}
2443	}
2444
2445	/*
2446	 * Normally perf_session__new would do this, but it doesn't have the
2447	 * evlist.
2448	 */
2449	if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
2450		pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
2451		rec->tool.ordered_events = false;
2452	}
2453
2454	if (evlist__nr_groups(rec->evlist) == 0)
2455		perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
2456
2457	if (data->is_pipe) {
2458		err = perf_header__write_pipe(fd);
2459		if (err < 0)
2460			goto out_free_threads;
2461	} else {
2462		err = perf_session__write_header(session, rec->evlist, fd, false);
2463		if (err < 0)
2464			goto out_free_threads;
2465	}
2466
2467	err = -1;
2468	if (!rec->no_buildid
2469	    && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
2470		pr_err("Couldn't generate buildids. "
2471		       "Use --no-buildid to profile anyway.\n");
2472		goto out_free_threads;
2473	}
2474
2475	err = record__setup_sb_evlist(rec);
2476	if (err)
2477		goto out_free_threads;
2478
2479	err = record__synthesize(rec, false);
2480	if (err < 0)
2481		goto out_free_threads;
2482
2483	if (rec->realtime_prio) {
2484		struct sched_param param;
2485
2486		param.sched_priority = rec->realtime_prio;
2487		if (sched_setscheduler(0, SCHED_FIFO, &param)) {
2488			pr_err("Could not set realtime priority.\n");
2489			err = -1;
2490			goto out_free_threads;
2491		}
2492	}
2493
2494	if (record__start_threads(rec))
2495		goto out_free_threads;
2496
2497	/*
2498	 * When perf is starting the traced process, all the events
2499	 * (apart from group members) have enable_on_exec=1 set,
2500	 * so don't spoil it by prematurely enabling them.
2501	 */
2502	if (!target__none(&opts->target) && !opts->target.initial_delay)
2503		evlist__enable(rec->evlist);
2504
2505	/*
2506	 * Let the child rip
2507	 */
2508	if (forks) {
2509		struct machine *machine = &session->machines.host;
2510		union perf_event *event;
2511		pid_t tgid;
2512
2513		event = malloc(sizeof(event->comm) + machine->id_hdr_size);
2514		if (event == NULL) {
2515			err = -ENOMEM;
2516			goto out_child;
2517		}
2518
2519		/*
2520		 * Some H/W events are generated before COMM event
2521		 * which is emitted during exec(), so perf script
2522		 * cannot see a correct process name for those events.
2523		 * Synthesize COMM event to prevent it.
2524		 */
2525		tgid = perf_event__synthesize_comm(tool, event,
2526						   rec->evlist->workload.pid,
2527						   process_synthesized_event,
2528						   machine);
2529		free(event);
2530
2531		if (tgid == -1)
2532			goto out_child;
2533
2534		event = malloc(sizeof(event->namespaces) +
2535			       (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
2536			       machine->id_hdr_size);
2537		if (event == NULL) {
2538			err = -ENOMEM;
2539			goto out_child;
2540		}
2541
2542		/*
2543		 * Synthesize NAMESPACES event for the command specified.
2544		 */
2545		perf_event__synthesize_namespaces(tool, event,
2546						  rec->evlist->workload.pid,
2547						  tgid, process_synthesized_event,
2548						  machine);
2549		free(event);
2550
2551		evlist__start_workload(rec->evlist);
2552	}
2553
2554	if (opts->target.initial_delay) {
2555		pr_info(EVLIST_DISABLED_MSG);
2556		if (opts->target.initial_delay > 0) {
2557			usleep(opts->target.initial_delay * USEC_PER_MSEC);
2558			evlist__enable(rec->evlist);
2559			pr_info(EVLIST_ENABLED_MSG);
2560		}
2561	}
2562
2563	err = event_enable_timer__start(rec->evlist->eet);
2564	if (err)
2565		goto out_child;
2566
2567	/* Debug message used by test scripts */
2568	pr_debug3("perf record has started\n");
2569	fflush(stderr);
2570
2571	trigger_ready(&auxtrace_snapshot_trigger);
2572	trigger_ready(&switch_output_trigger);
2573	perf_hooks__invoke_record_start();
2574
2575	/*
2576	 * Must write FINISHED_INIT so it will be seen after all other
2577	 * synthesized user events, but before any regular events.
2578	 */
2579	err = write_finished_init(rec, false);
2580	if (err < 0)
2581		goto out_child;
2582
2583	for (;;) {
2584		unsigned long long hits = thread->samples;
2585
2586		/*
2587		 * rec->evlist->bkw_mmap_state is possible to be
2588		 * BKW_MMAP_EMPTY here: when done == true and
2589		 * hits != rec->samples in previous round.
2590		 *
2591		 * evlist__toggle_bkw_mmap ensure we never
2592		 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
2593		 */
2594		if (trigger_is_hit(&switch_output_trigger) || done || draining)
2595			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
2596
2597		if (record__mmap_read_all(rec, false) < 0) {
2598			trigger_error(&auxtrace_snapshot_trigger);
2599			trigger_error(&switch_output_trigger);
2600			err = -1;
2601			goto out_child;
2602		}
2603
2604		if (auxtrace_record__snapshot_started) {
2605			auxtrace_record__snapshot_started = 0;
2606			if (!trigger_is_error(&auxtrace_snapshot_trigger))
2607				record__read_auxtrace_snapshot(rec, false);
2608			if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2609				pr_err("AUX area tracing snapshot failed\n");
2610				err = -1;
2611				goto out_child;
2612			}
2613		}
2614
2615		if (trigger_is_hit(&switch_output_trigger)) {
2616			/*
2617			 * If switch_output_trigger is hit, the data in
2618			 * overwritable ring buffer should have been collected,
2619			 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
2620			 *
2621			 * If SIGUSR2 raise after or during record__mmap_read_all(),
2622			 * record__mmap_read_all() didn't collect data from
2623			 * overwritable ring buffer. Read again.
2624			 */
2625			if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
2626				continue;
2627			trigger_ready(&switch_output_trigger);
2628
2629			/*
2630			 * Reenable events in overwrite ring buffer after
2631			 * record__mmap_read_all(): we should have collected
2632			 * data from it.
2633			 */
2634			evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
2635
2636			if (!quiet)
2637				fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
2638					record__waking(rec));
2639			thread->waking = 0;
2640			fd = record__switch_output(rec, false);
2641			if (fd < 0) {
2642				pr_err("Failed to switch to new file\n");
2643				trigger_error(&switch_output_trigger);
2644				err = fd;
2645				goto out_child;
2646			}
2647
2648			/* re-arm the alarm */
2649			if (rec->switch_output.time)
2650				alarm(rec->switch_output.time);
2651		}
2652
2653		if (hits == thread->samples) {
2654			if (done || draining)
2655				break;
2656			err = fdarray__poll(&thread->pollfd, -1);
2657			/*
2658			 * Propagate error, only if there's any. Ignore positive
2659			 * number of returned events and interrupt error.
2660			 */
2661			if (err > 0 || (err < 0 && errno == EINTR))
2662				err = 0;
2663			thread->waking++;
2664
2665			if (fdarray__filter(&thread->pollfd, POLLERR | POLLHUP,
2666					    record__thread_munmap_filtered, NULL) == 0)
2667				draining = true;
2668
2669			err = record__update_evlist_pollfd_from_thread(rec, rec->evlist, thread);
2670			if (err)
2671				goto out_child;
2672		}
2673
2674		if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
2675			switch (cmd) {
2676			case EVLIST_CTL_CMD_SNAPSHOT:
2677				hit_auxtrace_snapshot_trigger(rec);
2678				evlist__ctlfd_ack(rec->evlist);
2679				break;
2680			case EVLIST_CTL_CMD_STOP:
2681				done = 1;
2682				break;
2683			case EVLIST_CTL_CMD_ACK:
2684			case EVLIST_CTL_CMD_UNSUPPORTED:
2685			case EVLIST_CTL_CMD_ENABLE:
2686			case EVLIST_CTL_CMD_DISABLE:
2687			case EVLIST_CTL_CMD_EVLIST:
2688			case EVLIST_CTL_CMD_PING:
2689			default:
2690				break;
2691			}
2692		}
2693
2694		err = event_enable_timer__process(rec->evlist->eet);
2695		if (err < 0)
2696			goto out_child;
2697		if (err) {
2698			err = 0;
2699			done = 1;
2700		}
2701
2702		/*
2703		 * When perf is starting the traced process, at the end events
2704		 * die with the process and we wait for that. Thus no need to
2705		 * disable events in this case.
2706		 */
2707		if (done && !disabled && !target__none(&opts->target)) {
2708			trigger_off(&auxtrace_snapshot_trigger);
2709			evlist__disable(rec->evlist);
2710			disabled = true;
2711		}
2712	}
2713
2714	trigger_off(&auxtrace_snapshot_trigger);
2715	trigger_off(&switch_output_trigger);
2716
2717	if (opts->auxtrace_snapshot_on_exit)
2718		record__auxtrace_snapshot_exit(rec);
2719
2720	if (forks && workload_exec_errno) {
2721		char msg[STRERR_BUFSIZE], strevsels[2048];
2722		const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
2723
2724		evlist__scnprintf_evsels(rec->evlist, sizeof(strevsels), strevsels);
2725
2726		pr_err("Failed to collect '%s' for the '%s' workload: %s\n",
2727			strevsels, argv[0], emsg);
2728		err = -1;
2729		goto out_child;
2730	}
2731
2732	if (!quiet)
2733		fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n",
2734			record__waking(rec));
2735
2736	write_finished_init(rec, true);
2737
2738	if (target__none(&rec->opts.target))
2739		record__synthesize_workload(rec, true);
2740
2741out_child:
2742	record__stop_threads(rec);
2743	record__mmap_read_all(rec, true);
2744out_free_threads:
2745	record__free_thread_data(rec);
2746	evlist__finalize_ctlfd(rec->evlist);
2747	record__aio_mmap_read_sync(rec);
2748
2749	if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
2750		ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
2751		session->header.env.comp_ratio = ratio + 0.5;
2752	}
2753
2754	if (forks) {
2755		int exit_status;
2756
2757		if (!child_finished)
2758			kill(rec->evlist->workload.pid, SIGTERM);
2759
2760		wait(&exit_status);
2761
2762		if (err < 0)
2763			status = err;
2764		else if (WIFEXITED(exit_status))
2765			status = WEXITSTATUS(exit_status);
2766		else if (WIFSIGNALED(exit_status))
2767			signr = WTERMSIG(exit_status);
2768	} else
2769		status = err;
2770
2771	if (rec->off_cpu)
2772		rec->bytes_written += off_cpu_write(rec->session);
2773
2774	record__read_lost_samples(rec);
2775	record__synthesize(rec, true);
2776	/* this will be recalculated during process_buildids() */
2777	rec->samples = 0;
2778
2779	if (!err) {
2780		if (!rec->timestamp_filename) {
2781			record__finish_output(rec);
2782		} else {
2783			fd = record__switch_output(rec, true);
2784			if (fd < 0) {
2785				status = fd;
2786				goto out_delete_session;
2787			}
2788		}
2789	}
2790
2791	perf_hooks__invoke_record_end();
2792
2793	if (!err && !quiet) {
2794		char samples[128];
2795		const char *postfix = rec->timestamp_filename ?
2796					".<timestamp>" : "";
2797
2798		if (rec->samples && !rec->opts.full_auxtrace)
2799			scnprintf(samples, sizeof(samples),
2800				  " (%" PRIu64 " samples)", rec->samples);
2801		else
2802			samples[0] = '\0';
2803
2804		fprintf(stderr,	"[ perf record: Captured and wrote %.3f MB %s%s%s",
2805			perf_data__size(data) / 1024.0 / 1024.0,
2806			data->path, postfix, samples);
2807		if (ratio) {
2808			fprintf(stderr,	", compressed (original %.3f MB, ratio is %.3f)",
2809					rec->session->bytes_transferred / 1024.0 / 1024.0,
2810					ratio);
2811		}
2812		fprintf(stderr, " ]\n");
2813	}
2814
2815out_delete_session:
2816#ifdef HAVE_EVENTFD_SUPPORT
2817	if (done_fd >= 0) {
2818		fd = done_fd;
2819		done_fd = -1;
2820
2821		close(fd);
2822	}
2823#endif
2824	zstd_fini(&session->zstd_data);
2825	perf_session__delete(session);
2826
2827	if (!opts->no_bpf_event)
2828		evlist__stop_sb_thread(rec->sb_evlist);
2829	return status;
2830}
2831
2832static void callchain_debug(struct callchain_param *callchain)
2833{
2834	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
2835
2836	pr_debug("callchain: type %s\n", str[callchain->record_mode]);
2837
2838	if (callchain->record_mode == CALLCHAIN_DWARF)
2839		pr_debug("callchain: stack dump size %d\n",
2840			 callchain->dump_size);
2841}
2842
2843int record_opts__parse_callchain(struct record_opts *record,
2844				 struct callchain_param *callchain,
2845				 const char *arg, bool unset)
2846{
2847	int ret;
2848	callchain->enabled = !unset;
2849
2850	/* --no-call-graph */
2851	if (unset) {
2852		callchain->record_mode = CALLCHAIN_NONE;
2853		pr_debug("callchain: disabled\n");
2854		return 0;
2855	}
2856
2857	ret = parse_callchain_record_opt(arg, callchain);
2858	if (!ret) {
2859		/* Enable data address sampling for DWARF unwind. */
2860		if (callchain->record_mode == CALLCHAIN_DWARF)
2861			record->sample_address = true;
2862		callchain_debug(callchain);
2863	}
2864
2865	return ret;
2866}
2867
2868int record_parse_callchain_opt(const struct option *opt,
2869			       const char *arg,
2870			       int unset)
2871{
2872	return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
2873}
2874
2875int record_callchain_opt(const struct option *opt,
2876			 const char *arg __maybe_unused,
2877			 int unset __maybe_unused)
2878{
2879	struct callchain_param *callchain = opt->value;
2880
2881	callchain->enabled = true;
2882
2883	if (callchain->record_mode == CALLCHAIN_NONE)
2884		callchain->record_mode = CALLCHAIN_FP;
2885
2886	callchain_debug(callchain);
2887	return 0;
2888}
2889
2890static int perf_record_config(const char *var, const char *value, void *cb)
2891{
2892	struct record *rec = cb;
2893
2894	if (!strcmp(var, "record.build-id")) {
2895		if (!strcmp(value, "cache"))
2896			rec->no_buildid_cache = false;
2897		else if (!strcmp(value, "no-cache"))
2898			rec->no_buildid_cache = true;
2899		else if (!strcmp(value, "skip"))
2900			rec->no_buildid = true;
2901		else if (!strcmp(value, "mmap"))
2902			rec->buildid_mmap = true;
2903		else
2904			return -1;
2905		return 0;
2906	}
2907	if (!strcmp(var, "record.call-graph")) {
2908		var = "call-graph.record-mode";
2909		return perf_default_config(var, value, cb);
2910	}
2911#ifdef HAVE_AIO_SUPPORT
2912	if (!strcmp(var, "record.aio")) {
2913		rec->opts.nr_cblocks = strtol(value, NULL, 0);
2914		if (!rec->opts.nr_cblocks)
2915			rec->opts.nr_cblocks = nr_cblocks_default;
2916	}
2917#endif
2918	if (!strcmp(var, "record.debuginfod")) {
2919		rec->debuginfod.urls = strdup(value);
2920		if (!rec->debuginfod.urls)
2921			return -ENOMEM;
2922		rec->debuginfod.set = true;
2923	}
2924
2925	return 0;
2926}
2927
2928static int record__parse_event_enable_time(const struct option *opt, const char *str, int unset)
2929{
2930	struct record *rec = (struct record *)opt->value;
2931
2932	return evlist__parse_event_enable_time(rec->evlist, &rec->opts, str, unset);
2933}
2934
2935static int record__parse_affinity(const struct option *opt, const char *str, int unset)
2936{
2937	struct record_opts *opts = (struct record_opts *)opt->value;
2938
2939	if (unset || !str)
2940		return 0;
2941
2942	if (!strcasecmp(str, "node"))
2943		opts->affinity = PERF_AFFINITY_NODE;
2944	else if (!strcasecmp(str, "cpu"))
2945		opts->affinity = PERF_AFFINITY_CPU;
2946
2947	return 0;
2948}
2949
2950static int record__mmap_cpu_mask_alloc(struct mmap_cpu_mask *mask, int nr_bits)
2951{
2952	mask->nbits = nr_bits;
2953	mask->bits = bitmap_zalloc(mask->nbits);
2954	if (!mask->bits)
2955		return -ENOMEM;
2956
2957	return 0;
2958}
2959
2960static void record__mmap_cpu_mask_free(struct mmap_cpu_mask *mask)
2961{
2962	bitmap_free(mask->bits);
2963	mask->nbits = 0;
2964}
2965
2966static int record__thread_mask_alloc(struct thread_mask *mask, int nr_bits)
2967{
2968	int ret;
2969
2970	ret = record__mmap_cpu_mask_alloc(&mask->maps, nr_bits);
2971	if (ret) {
2972		mask->affinity.bits = NULL;
2973		return ret;
2974	}
2975
2976	ret = record__mmap_cpu_mask_alloc(&mask->affinity, nr_bits);
2977	if (ret) {
2978		record__mmap_cpu_mask_free(&mask->maps);
2979		mask->maps.bits = NULL;
2980	}
2981
2982	return ret;
2983}
2984
2985static void record__thread_mask_free(struct thread_mask *mask)
2986{
2987	record__mmap_cpu_mask_free(&mask->maps);
2988	record__mmap_cpu_mask_free(&mask->affinity);
2989}
2990
2991static int record__parse_threads(const struct option *opt, const char *str, int unset)
2992{
2993	int s;
2994	struct record_opts *opts = opt->value;
2995
2996	if (unset || !str || !strlen(str)) {
2997		opts->threads_spec = THREAD_SPEC__CPU;
2998	} else {
2999		for (s = 1; s < THREAD_SPEC__MAX; s++) {
3000			if (s == THREAD_SPEC__USER) {
3001				opts->threads_user_spec = strdup(str);
3002				if (!opts->threads_user_spec)
3003					return -ENOMEM;
3004				opts->threads_spec = THREAD_SPEC__USER;
3005				break;
3006			}
3007			if (!strncasecmp(str, thread_spec_tags[s], strlen(thread_spec_tags[s]))) {
3008				opts->threads_spec = s;
3009				break;
3010			}
3011		}
3012	}
3013
3014	if (opts->threads_spec == THREAD_SPEC__USER)
3015		pr_debug("threads_spec: %s\n", opts->threads_user_spec);
3016	else
3017		pr_debug("threads_spec: %s\n", thread_spec_tags[opts->threads_spec]);
3018
3019	return 0;
3020}
3021
3022static int parse_output_max_size(const struct option *opt,
3023				 const char *str, int unset)
3024{
3025	unsigned long *s = (unsigned long *)opt->value;
3026	static struct parse_tag tags_size[] = {
3027		{ .tag  = 'B', .mult = 1       },
3028		{ .tag  = 'K', .mult = 1 << 10 },
3029		{ .tag  = 'M', .mult = 1 << 20 },
3030		{ .tag  = 'G', .mult = 1 << 30 },
3031		{ .tag  = 0 },
3032	};
3033	unsigned long val;
3034
3035	if (unset) {
3036		*s = 0;
3037		return 0;
3038	}
3039
3040	val = parse_tag_value(str, tags_size);
3041	if (val != (unsigned long) -1) {
3042		*s = val;
3043		return 0;
3044	}
3045
3046	return -1;
3047}
3048
3049static int record__parse_mmap_pages(const struct option *opt,
3050				    const char *str,
3051				    int unset __maybe_unused)
3052{
3053	struct record_opts *opts = opt->value;
3054	char *s, *p;
3055	unsigned int mmap_pages;
3056	int ret;
3057
3058	if (!str)
3059		return -EINVAL;
3060
3061	s = strdup(str);
3062	if (!s)
3063		return -ENOMEM;
3064
3065	p = strchr(s, ',');
3066	if (p)
3067		*p = '\0';
3068
3069	if (*s) {
3070		ret = __evlist__parse_mmap_pages(&mmap_pages, s);
3071		if (ret)
3072			goto out_free;
3073		opts->mmap_pages = mmap_pages;
3074	}
3075
3076	if (!p) {
3077		ret = 0;
3078		goto out_free;
3079	}
3080
3081	ret = __evlist__parse_mmap_pages(&mmap_pages, p + 1);
3082	if (ret)
3083		goto out_free;
3084
3085	opts->auxtrace_mmap_pages = mmap_pages;
3086
3087out_free:
3088	free(s);
3089	return ret;
3090}
3091
3092void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
3093{
3094}
3095
3096static int parse_control_option(const struct option *opt,
3097				const char *str,
3098				int unset __maybe_unused)
3099{
3100	struct record_opts *opts = opt->value;
3101
3102	return evlist__parse_control(str, &opts->ctl_fd, &opts->ctl_fd_ack, &opts->ctl_fd_close);
3103}
3104
3105static void switch_output_size_warn(struct record *rec)
3106{
3107	u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
3108	struct switch_output *s = &rec->switch_output;
3109
3110	wakeup_size /= 2;
3111
3112	if (s->size < wakeup_size) {
3113		char buf[100];
3114
3115		unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
3116		pr_warning("WARNING: switch-output data size lower than "
3117			   "wakeup kernel buffer size (%s) "
3118			   "expect bigger perf.data sizes\n", buf);
3119	}
3120}
3121
3122static int switch_output_setup(struct record *rec)
3123{
3124	struct switch_output *s = &rec->switch_output;
3125	static struct parse_tag tags_size[] = {
3126		{ .tag  = 'B', .mult = 1       },
3127		{ .tag  = 'K', .mult = 1 << 10 },
3128		{ .tag  = 'M', .mult = 1 << 20 },
3129		{ .tag  = 'G', .mult = 1 << 30 },
3130		{ .tag  = 0 },
3131	};
3132	static struct parse_tag tags_time[] = {
3133		{ .tag  = 's', .mult = 1        },
3134		{ .tag  = 'm', .mult = 60       },
3135		{ .tag  = 'h', .mult = 60*60    },
3136		{ .tag  = 'd', .mult = 60*60*24 },
3137		{ .tag  = 0 },
3138	};
3139	unsigned long val;
3140
3141	/*
3142	 * If we're using --switch-output-events, then we imply its
3143	 * --switch-output=signal, as we'll send a SIGUSR2 from the side band
3144	 *  thread to its parent.
3145	 */
3146	if (rec->switch_output_event_set) {
3147		if (record__threads_enabled(rec)) {
3148			pr_warning("WARNING: --switch-output-event option is not available in parallel streaming mode.\n");
3149			return 0;
3150		}
3151		goto do_signal;
3152	}
3153
3154	if (!s->set)
3155		return 0;
3156
3157	if (record__threads_enabled(rec)) {
3158		pr_warning("WARNING: --switch-output option is not available in parallel streaming mode.\n");
3159		return 0;
3160	}
3161
3162	if (!strcmp(s->str, "signal")) {
3163do_signal:
3164		s->signal = true;
3165		pr_debug("switch-output with SIGUSR2 signal\n");
3166		goto enabled;
3167	}
3168
3169	val = parse_tag_value(s->str, tags_size);
3170	if (val != (unsigned long) -1) {
3171		s->size = val;
3172		pr_debug("switch-output with %s size threshold\n", s->str);
3173		goto enabled;
3174	}
3175
3176	val = parse_tag_value(s->str, tags_time);
3177	if (val != (unsigned long) -1) {
3178		s->time = val;
3179		pr_debug("switch-output with %s time threshold (%lu seconds)\n",
3180			 s->str, s->time);
3181		goto enabled;
3182	}
3183
3184	return -1;
3185
3186enabled:
3187	rec->timestamp_filename = true;
3188	s->enabled              = true;
3189
3190	if (s->size && !rec->opts.no_buffering)
3191		switch_output_size_warn(rec);
3192
3193	return 0;
3194}
3195
3196static const char * const __record_usage[] = {
3197	"perf record [<options>] [<command>]",
3198	"perf record [<options>] -- <command> [<options>]",
3199	NULL
3200};
3201const char * const *record_usage = __record_usage;
3202
3203static int build_id__process_mmap(struct perf_tool *tool, union perf_event *event,
3204				  struct perf_sample *sample, struct machine *machine)
3205{
3206	/*
3207	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3208	 * no need to add them twice.
3209	 */
3210	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3211		return 0;
3212	return perf_event__process_mmap(tool, event, sample, machine);
3213}
3214
3215static int build_id__process_mmap2(struct perf_tool *tool, union perf_event *event,
3216				   struct perf_sample *sample, struct machine *machine)
3217{
3218	/*
3219	 * We already have the kernel maps, put in place via perf_session__create_kernel_maps()
3220	 * no need to add them twice.
3221	 */
3222	if (!(event->header.misc & PERF_RECORD_MISC_USER))
3223		return 0;
3224
3225	return perf_event__process_mmap2(tool, event, sample, machine);
3226}
3227
3228static int process_timestamp_boundary(struct perf_tool *tool,
3229				      union perf_event *event __maybe_unused,
3230				      struct perf_sample *sample,
3231				      struct machine *machine __maybe_unused)
3232{
3233	struct record *rec = container_of(tool, struct record, tool);
3234
3235	set_timestamp_boundary(rec, sample->time);
3236	return 0;
3237}
3238
3239static int parse_record_synth_option(const struct option *opt,
3240				     const char *str,
3241				     int unset __maybe_unused)
3242{
3243	struct record_opts *opts = opt->value;
3244	char *p = strdup(str);
3245
3246	if (p == NULL)
3247		return -1;
3248
3249	opts->synth = parse_synth_opt(p);
3250	free(p);
3251
3252	if (opts->synth < 0) {
3253		pr_err("Invalid synth option: %s\n", str);
3254		return -1;
3255	}
3256	return 0;
3257}
3258
3259/*
3260 * XXX Ideally would be local to cmd_record() and passed to a record__new
3261 * because we need to have access to it in record__exit, that is called
3262 * after cmd_record() exits, but since record_options need to be accessible to
3263 * builtin-script, leave it here.
3264 *
3265 * At least we don't ouch it in all the other functions here directly.
3266 *
3267 * Just say no to tons of global variables, sigh.
3268 */
3269static struct record record = {
3270	.opts = {
3271		.sample_time	     = true,
3272		.mmap_pages	     = UINT_MAX,
3273		.user_freq	     = UINT_MAX,
3274		.user_interval	     = ULLONG_MAX,
3275		.freq		     = 4000,
3276		.target		     = {
3277			.uses_mmap   = true,
3278			.default_per_cpu = true,
3279		},
3280		.mmap_flush          = MMAP_FLUSH_DEFAULT,
3281		.nr_threads_synthesize = 1,
3282		.ctl_fd              = -1,
3283		.ctl_fd_ack          = -1,
3284		.synth               = PERF_SYNTH_ALL,
3285	},
3286	.tool = {
3287		.sample		= process_sample_event,
3288		.fork		= perf_event__process_fork,
3289		.exit		= perf_event__process_exit,
3290		.comm		= perf_event__process_comm,
3291		.namespaces	= perf_event__process_namespaces,
3292		.mmap		= build_id__process_mmap,
3293		.mmap2		= build_id__process_mmap2,
3294		.itrace_start	= process_timestamp_boundary,
3295		.aux		= process_timestamp_boundary,
3296		.ordered_events	= true,
3297	},
3298};
3299
3300const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
3301	"\n\t\t\t\tDefault: fp";
3302
3303static bool dry_run;
3304
3305static struct parse_events_option_args parse_events_option_args = {
3306	.evlistp = &record.evlist,
3307};
3308
3309static struct parse_events_option_args switch_output_parse_events_option_args = {
3310	.evlistp = &record.sb_evlist,
3311};
3312
3313/*
3314 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
3315 * with it and switch to use the library functions in perf_evlist that came
3316 * from builtin-record.c, i.e. use record_opts,
3317 * evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
3318 * using pipes, etc.
3319 */
3320static struct option __record_options[] = {
3321	OPT_CALLBACK('e', "event", &parse_events_option_args, "event",
3322		     "event selector. use 'perf list' to list available events",
3323		     parse_events_option),
3324	OPT_CALLBACK(0, "filter", &record.evlist, "filter",
3325		     "event filter", parse_filter),
3326	OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
3327			   NULL, "don't record events from perf itself",
3328			   exclude_perf),
3329	OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
3330		    "record events on existing process id"),
3331	OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
3332		    "record events on existing thread id"),
3333	OPT_INTEGER('r', "realtime", &record.realtime_prio,
3334		    "collect data with this RT SCHED_FIFO priority"),
3335	OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
3336		    "collect data without buffering"),
3337	OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
3338		    "collect raw sample records from all opened counters"),
3339	OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
3340			    "system-wide collection from all CPUs"),
3341	OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
3342		    "list of cpus to monitor"),
3343	OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
3344	OPT_STRING('o', "output", &record.data.path, "file",
3345		    "output file name"),
3346	OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
3347			&record.opts.no_inherit_set,
3348			"child tasks do not inherit counters"),
3349	OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
3350		    "synthesize non-sample events at the end of output"),
3351	OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
3352	OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
3353	OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
3354		    "Fail if the specified frequency can't be used"),
3355	OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
3356		     "profile at this frequency",
3357		      record__parse_freq),
3358	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
3359		     "number of mmap data pages and AUX area tracing mmap pages",
3360		     record__parse_mmap_pages),
3361	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
3362		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
3363		     record__mmap_flush_parse),
3364	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
3365			   NULL, "enables call-graph recording" ,
3366			   &record_callchain_opt),
3367	OPT_CALLBACK(0, "call-graph", &record.opts,
3368		     "record_mode[,record_size]", record_callchain_help,
3369		     &record_parse_callchain_opt),
3370	OPT_INCR('v', "verbose", &verbose,
3371		    "be more verbose (show counter open errors, etc)"),
3372	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
3373	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
3374		    "per thread counts"),
3375	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3376	OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
3377		    "Record the sample physical addresses"),
3378	OPT_BOOLEAN(0, "data-page-size", &record.opts.sample_data_page_size,
3379		    "Record the sampled data address data page size"),
3380	OPT_BOOLEAN(0, "code-page-size", &record.opts.sample_code_page_size,
3381		    "Record the sampled code address (ip) page size"),
3382	OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3383	OPT_BOOLEAN(0, "sample-identifier", &record.opts.sample_identifier,
3384		    "Record the sample identifier"),
3385	OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
3386			&record.opts.sample_time_set,
3387			"Record the sample timestamps"),
3388	OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
3389			"Record the sample period"),
3390	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
3391		    "don't sample"),
3392	OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
3393			&record.no_buildid_cache_set,
3394			"do not update the buildid cache"),
3395	OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
3396			&record.no_buildid_set,
3397			"do not collect buildids in perf.data"),
3398	OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
3399		     "monitor event in cgroup name only",
3400		     parse_cgroups),
3401	OPT_CALLBACK('D', "delay", &record, "ms",
3402		     "ms to wait before starting measurement after program start (-1: start with events disabled), "
3403		     "or ranges of time to enable events e.g. '-D 10-20,30-40'",
3404		     record__parse_event_enable_time),
3405	OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
3406	OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
3407		   "user to profile"),
3408
3409	OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
3410		     "branch any", "sample any taken branches",
3411		     parse_branch_stack),
3412
3413	OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
3414		     "branch filter mask", "branch stack filter modes",
3415		     parse_branch_stack),
3416	OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
3417		    "sample by weight (on special events only)"),
3418	OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
3419		    "sample transaction flags (special events only)"),
3420	OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
3421		    "use per-thread mmaps"),
3422	OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
3423		    "sample selected machine registers on interrupt,"
3424		    " use '-I?' to list register names", parse_intr_regs),
3425	OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
3426		    "sample selected machine registers on interrupt,"
3427		    " use '--user-regs=?' to list register names", parse_user_regs),
3428	OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
3429		    "Record running/enabled time of read (:S) events"),
3430	OPT_CALLBACK('k', "clockid", &record.opts,
3431	"clockid", "clockid to use for events, see clock_gettime()",
3432	parse_clockid),
3433	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
3434			  "opts", "AUX area tracing Snapshot Mode", ""),
3435	OPT_STRING_OPTARG(0, "aux-sample", &record.opts.auxtrace_sample_opts,
3436			  "opts", "sample AUX area", ""),
3437	OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
3438			"per thread proc mmap processing timeout in ms"),
3439	OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
3440		    "Record namespaces events"),
3441	OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
3442		    "Record cgroup events"),
3443	OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
3444			&record.opts.record_switch_events_set,
3445			"Record context switch events"),
3446	OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
3447			 "Configure all used events to run in kernel space.",
3448			 PARSE_OPT_EXCLUSIVE),
3449	OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
3450			 "Configure all used events to run in user space.",
3451			 PARSE_OPT_EXCLUSIVE),
3452	OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
3453		    "collect kernel callchains"),
3454	OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
3455		    "collect user callchains"),
3456	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
3457		   "file", "vmlinux pathname"),
3458	OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
3459		    "Record build-id of all DSOs regardless of hits"),
3460	OPT_BOOLEAN(0, "buildid-mmap", &record.buildid_mmap,
3461		    "Record build-id in map events"),
3462	OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
3463		    "append timestamp to output filename"),
3464	OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
3465		    "Record timestamp boundary (time of first/last samples)"),
3466	OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
3467			  &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
3468			  "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
3469			  "signal"),
3470	OPT_CALLBACK_SET(0, "switch-output-event", &switch_output_parse_events_option_args,
3471			 &record.switch_output_event_set, "switch output event",
3472			 "switch output event selector. use 'perf list' to list available events",
3473			 parse_events_option_new_evlist),
3474	OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
3475		   "Limit number of switch output generated files"),
3476	OPT_BOOLEAN(0, "dry-run", &dry_run,
3477		    "Parse options then exit"),
3478#ifdef HAVE_AIO_SUPPORT
3479	OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
3480		     &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
3481		     record__aio_parse),
3482#endif
3483	OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
3484		     "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
3485		     record__parse_affinity),
3486#ifdef HAVE_ZSTD_SUPPORT
3487	OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, "n",
3488			    "Compress records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
3489			    record__parse_comp_level),
3490#endif
3491	OPT_CALLBACK(0, "max-size", &record.output_max_size,
3492		     "size", "Limit the maximum size of the output file", parse_output_max_size),
3493	OPT_UINTEGER(0, "num-thread-synthesize",
3494		     &record.opts.nr_threads_synthesize,
3495		     "number of threads to run for event synthesis"),
3496#ifdef HAVE_LIBPFM
3497	OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
3498		"libpfm4 event selector. use 'perf list' to list available events",
3499		parse_libpfm_events_option),
3500#endif
3501	OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd] or fifo:ctl-fifo[,ack-fifo]",
3502		     "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events,\n"
3503		     "\t\t\t  'snapshot': AUX area tracing snapshot).\n"
3504		     "\t\t\t  Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
3505		     "\t\t\t  Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
3506		      parse_control_option),
3507	OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
3508		     "Fine-tune event synthesis: default=all", parse_record_synth_option),
3509	OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
3510			  &record.debuginfod.set, "debuginfod urls",
3511			  "Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
3512			  "system"),
3513	OPT_CALLBACK_OPTARG(0, "threads", &record.opts, NULL, "spec",
3514			    "write collected trace data into several data files using parallel threads",
3515			    record__parse_threads),
3516	OPT_BOOLEAN(0, "off-cpu", &record.off_cpu, "Enable off-cpu analysis"),
3517	OPT_END()
3518};
3519
3520struct option *record_options = __record_options;
3521
3522static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cpu_map *cpus)
3523{
3524	struct perf_cpu cpu;
3525	int idx;
3526
3527	if (cpu_map__is_dummy(cpus))
3528		return 0;
3529
3530	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
3531		if (cpu.cpu == -1)
3532			continue;
3533		/* Return ENODEV is input cpu is greater than max cpu */
3534		if ((unsigned long)cpu.cpu > mask->nbits)
3535			return -ENODEV;
3536		__set_bit(cpu.cpu, mask->bits);
3537	}
3538
3539	return 0;
3540}
3541
3542static int record__mmap_cpu_mask_init_spec(struct mmap_cpu_mask *mask, const char *mask_spec)
3543{
3544	struct perf_cpu_map *cpus;
3545
3546	cpus = perf_cpu_map__new(mask_spec);
3547	if (!cpus)
3548		return -ENOMEM;
3549
3550	bitmap_zero(mask->bits, mask->nbits);
3551	if (record__mmap_cpu_mask_init(mask, cpus))
3552		return -ENODEV;
3553
3554	perf_cpu_map__put(cpus);
3555
3556	return 0;
3557}
3558
3559static void record__free_thread_masks(struct record *rec, int nr_threads)
3560{
3561	int t;
3562
3563	if (rec->thread_masks)
3564		for (t = 0; t < nr_threads; t++)
3565			record__thread_mask_free(&rec->thread_masks[t]);
3566
3567	zfree(&rec->thread_masks);
3568}
3569
3570static int record__alloc_thread_masks(struct record *rec, int nr_threads, int nr_bits)
3571{
3572	int t, ret;
3573
3574	rec->thread_masks = zalloc(nr_threads * sizeof(*(rec->thread_masks)));
3575	if (!rec->thread_masks) {
3576		pr_err("Failed to allocate thread masks\n");
3577		return -ENOMEM;
3578	}
3579
3580	for (t = 0; t < nr_threads; t++) {
3581		ret = record__thread_mask_alloc(&rec->thread_masks[t], nr_bits);
3582		if (ret) {
3583			pr_err("Failed to allocate thread masks[%d]\n", t);
3584			goto out_free;
3585		}
3586	}
3587
3588	return 0;
3589
3590out_free:
3591	record__free_thread_masks(rec, nr_threads);
3592
3593	return ret;
3594}
3595
3596static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map *cpus)
3597{
3598	int t, ret, nr_cpus = perf_cpu_map__nr(cpus);
3599
3600	ret = record__alloc_thread_masks(rec, nr_cpus, cpu__max_cpu().cpu);
3601	if (ret)
3602		return ret;
3603
3604	rec->nr_threads = nr_cpus;
3605	pr_debug("nr_threads: %d\n", rec->nr_threads);
3606
3607	for (t = 0; t < rec->nr_threads; t++) {
3608		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
3609		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
3610		if (verbose > 0) {
3611			pr_debug("thread_masks[%d]: ", t);
3612			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3613			pr_debug("thread_masks[%d]: ", t);
3614			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3615		}
3616	}
3617
3618	return 0;
3619}
3620
3621static int record__init_thread_masks_spec(struct record *rec, struct perf_cpu_map *cpus,
3622					  const char **maps_spec, const char **affinity_spec,
3623					  u32 nr_spec)
3624{
3625	u32 s;
3626	int ret = 0, t = 0;
3627	struct mmap_cpu_mask cpus_mask;
3628	struct thread_mask thread_mask, full_mask, *thread_masks;
3629
3630	ret = record__mmap_cpu_mask_alloc(&cpus_mask, cpu__max_cpu().cpu);
3631	if (ret) {
3632		pr_err("Failed to allocate CPUs mask\n");
3633		return ret;
3634	}
3635
3636	ret = record__mmap_cpu_mask_init(&cpus_mask, cpus);
3637	if (ret) {
3638		pr_err("Failed to init cpu mask\n");
3639		goto out_free_cpu_mask;
3640	}
3641
3642	ret = record__thread_mask_alloc(&full_mask, cpu__max_cpu().cpu);
3643	if (ret) {
3644		pr_err("Failed to allocate full mask\n");
3645		goto out_free_cpu_mask;
3646	}
3647
3648	ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3649	if (ret) {
3650		pr_err("Failed to allocate thread mask\n");
3651		goto out_free_full_and_cpu_masks;
3652	}
3653
3654	for (s = 0; s < nr_spec; s++) {
3655		ret = record__mmap_cpu_mask_init_spec(&thread_mask.maps, maps_spec[s]);
3656		if (ret) {
3657			pr_err("Failed to initialize maps thread mask\n");
3658			goto out_free;
3659		}
3660		ret = record__mmap_cpu_mask_init_spec(&thread_mask.affinity, affinity_spec[s]);
3661		if (ret) {
3662			pr_err("Failed to initialize affinity thread mask\n");
3663			goto out_free;
3664		}
3665
3666		/* ignore invalid CPUs but do not allow empty masks */
3667		if (!bitmap_and(thread_mask.maps.bits, thread_mask.maps.bits,
3668				cpus_mask.bits, thread_mask.maps.nbits)) {
3669			pr_err("Empty maps mask: %s\n", maps_spec[s]);
3670			ret = -EINVAL;
3671			goto out_free;
3672		}
3673		if (!bitmap_and(thread_mask.affinity.bits, thread_mask.affinity.bits,
3674				cpus_mask.bits, thread_mask.affinity.nbits)) {
3675			pr_err("Empty affinity mask: %s\n", affinity_spec[s]);
3676			ret = -EINVAL;
3677			goto out_free;
3678		}
3679
3680		/* do not allow intersection with other masks (full_mask) */
3681		if (bitmap_intersects(thread_mask.maps.bits, full_mask.maps.bits,
3682				      thread_mask.maps.nbits)) {
3683			pr_err("Intersecting maps mask: %s\n", maps_spec[s]);
3684			ret = -EINVAL;
3685			goto out_free;
3686		}
3687		if (bitmap_intersects(thread_mask.affinity.bits, full_mask.affinity.bits,
3688				      thread_mask.affinity.nbits)) {
3689			pr_err("Intersecting affinity mask: %s\n", affinity_spec[s]);
3690			ret = -EINVAL;
3691			goto out_free;
3692		}
3693
3694		bitmap_or(full_mask.maps.bits, full_mask.maps.bits,
3695			  thread_mask.maps.bits, full_mask.maps.nbits);
3696		bitmap_or(full_mask.affinity.bits, full_mask.affinity.bits,
3697			  thread_mask.affinity.bits, full_mask.maps.nbits);
3698
3699		thread_masks = realloc(rec->thread_masks, (t + 1) * sizeof(struct thread_mask));
3700		if (!thread_masks) {
3701			pr_err("Failed to reallocate thread masks\n");
3702			ret = -ENOMEM;
3703			goto out_free;
3704		}
3705		rec->thread_masks = thread_masks;
3706		rec->thread_masks[t] = thread_mask;
3707		if (verbose > 0) {
3708			pr_debug("thread_masks[%d]: ", t);
3709			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
3710			pr_debug("thread_masks[%d]: ", t);
3711			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].affinity, "affinity");
3712		}
3713		t++;
3714		ret = record__thread_mask_alloc(&thread_mask, cpu__max_cpu().cpu);
3715		if (ret) {
3716			pr_err("Failed to allocate thread mask\n");
3717			goto out_free_full_and_cpu_masks;
3718		}
3719	}
3720	rec->nr_threads = t;
3721	pr_debug("nr_threads: %d\n", rec->nr_threads);
3722	if (!rec->nr_threads)
3723		ret = -EINVAL;
3724
3725out_free:
3726	record__thread_mask_free(&thread_mask);
3727out_free_full_and_cpu_masks:
3728	record__thread_mask_free(&full_mask);
3729out_free_cpu_mask:
3730	record__mmap_cpu_mask_free(&cpus_mask);
3731
3732	return ret;
3733}
3734
3735static int record__init_thread_core_masks(struct record *rec, struct perf_cpu_map *cpus)
3736{
3737	int ret;
3738	struct cpu_topology *topo;
3739
3740	topo = cpu_topology__new();
3741	if (!topo) {
3742		pr_err("Failed to allocate CPU topology\n");
3743		return -ENOMEM;
3744	}
3745
3746	ret = record__init_thread_masks_spec(rec, cpus, topo->core_cpus_list,
3747					     topo->core_cpus_list, topo->core_cpus_lists);
3748	cpu_topology__delete(topo);
3749
3750	return ret;
3751}
3752
3753static int record__init_thread_package_masks(struct record *rec, struct perf_cpu_map *cpus)
3754{
3755	int ret;
3756	struct cpu_topology *topo;
3757
3758	topo = cpu_topology__new();
3759	if (!topo) {
3760		pr_err("Failed to allocate CPU topology\n");
3761		return -ENOMEM;
3762	}
3763
3764	ret = record__init_thread_masks_spec(rec, cpus, topo->package_cpus_list,
3765					     topo->package_cpus_list, topo->package_cpus_lists);
3766	cpu_topology__delete(topo);
3767
3768	return ret;
3769}
3770
3771static int record__init_thread_numa_masks(struct record *rec, struct perf_cpu_map *cpus)
3772{
3773	u32 s;
3774	int ret;
3775	const char **spec;
3776	struct numa_topology *topo;
3777
3778	topo = numa_topology__new();
3779	if (!topo) {
3780		pr_err("Failed to allocate NUMA topology\n");
3781		return -ENOMEM;
3782	}
3783
3784	spec = zalloc(topo->nr * sizeof(char *));
3785	if (!spec) {
3786		pr_err("Failed to allocate NUMA spec\n");
3787		ret = -ENOMEM;
3788		goto out_delete_topo;
3789	}
3790	for (s = 0; s < topo->nr; s++)
3791		spec[s] = topo->nodes[s].cpus;
3792
3793	ret = record__init_thread_masks_spec(rec, cpus, spec, spec, topo->nr);
3794
3795	zfree(&spec);
3796
3797out_delete_topo:
3798	numa_topology__delete(topo);
3799
3800	return ret;
3801}
3802
3803static int record__init_thread_user_masks(struct record *rec, struct perf_cpu_map *cpus)
3804{
3805	int t, ret;
3806	u32 s, nr_spec = 0;
3807	char **maps_spec = NULL, **affinity_spec = NULL, **tmp_spec;
3808	char *user_spec, *spec, *spec_ptr, *mask, *mask_ptr, *dup_mask = NULL;
3809
3810	for (t = 0, user_spec = (char *)rec->opts.threads_user_spec; ; t++, user_spec = NULL) {
3811		spec = strtok_r(user_spec, ":", &spec_ptr);
3812		if (spec == NULL)
3813			break;
3814		pr_debug2("threads_spec[%d]: %s\n", t, spec);
3815		mask = strtok_r(spec, "/", &mask_ptr);
3816		if (mask == NULL)
3817			break;
3818		pr_debug2("  maps mask: %s\n", mask);
3819		tmp_spec = realloc(maps_spec, (nr_spec + 1) * sizeof(char *));
3820		if (!tmp_spec) {
3821			pr_err("Failed to reallocate maps spec\n");
3822			ret = -ENOMEM;
3823			goto out_free;
3824		}
3825		maps_spec = tmp_spec;
3826		maps_spec[nr_spec] = dup_mask = strdup(mask);
3827		if (!maps_spec[nr_spec]) {
3828			pr_err("Failed to allocate maps spec[%d]\n", nr_spec);
3829			ret = -ENOMEM;
3830			goto out_free;
3831		}
3832		mask = strtok_r(NULL, "/", &mask_ptr);
3833		if (mask == NULL) {
3834			pr_err("Invalid thread maps or affinity specs\n");
3835			ret = -EINVAL;
3836			goto out_free;
3837		}
3838		pr_debug2("  affinity mask: %s\n", mask);
3839		tmp_spec = realloc(affinity_spec, (nr_spec + 1) * sizeof(char *));
3840		if (!tmp_spec) {
3841			pr_err("Failed to reallocate affinity spec\n");
3842			ret = -ENOMEM;
3843			goto out_free;
3844		}
3845		affinity_spec = tmp_spec;
3846		affinity_spec[nr_spec] = strdup(mask);
3847		if (!affinity_spec[nr_spec]) {
3848			pr_err("Failed to allocate affinity spec[%d]\n", nr_spec);
3849			ret = -ENOMEM;
3850			goto out_free;
3851		}
3852		dup_mask = NULL;
3853		nr_spec++;
3854	}
3855
3856	ret = record__init_thread_masks_spec(rec, cpus, (const char **)maps_spec,
3857					     (const char **)affinity_spec, nr_spec);
3858
3859out_free:
3860	free(dup_mask);
3861	for (s = 0; s < nr_spec; s++) {
3862		if (maps_spec)
3863			free(maps_spec[s]);
3864		if (affinity_spec)
3865			free(affinity_spec[s]);
3866	}
3867	free(affinity_spec);
3868	free(maps_spec);
3869
3870	return ret;
3871}
3872
3873static int record__init_thread_default_masks(struct record *rec, struct perf_cpu_map *cpus)
3874{
3875	int ret;
3876
3877	ret = record__alloc_thread_masks(rec, 1, cpu__max_cpu().cpu);
3878	if (ret)
3879		return ret;
3880
3881	if (record__mmap_cpu_mask_init(&rec->thread_masks->maps, cpus))
3882		return -ENODEV;
3883
3884	rec->nr_threads = 1;
3885
3886	return 0;
3887}
3888
3889static int record__init_thread_masks(struct record *rec)
3890{
3891	int ret = 0;
3892	struct perf_cpu_map *cpus = rec->evlist->core.all_cpus;
3893
3894	if (!record__threads_enabled(rec))
3895		return record__init_thread_default_masks(rec, cpus);
3896
3897	if (evlist__per_thread(rec->evlist)) {
3898		pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
3899		return -EINVAL;
3900	}
3901
3902	switch (rec->opts.threads_spec) {
3903	case THREAD_SPEC__CPU:
3904		ret = record__init_thread_cpu_masks(rec, cpus);
3905		break;
3906	case THREAD_SPEC__CORE:
3907		ret = record__init_thread_core_masks(rec, cpus);
3908		break;
3909	case THREAD_SPEC__PACKAGE:
3910		ret = record__init_thread_package_masks(rec, cpus);
3911		break;
3912	case THREAD_SPEC__NUMA:
3913		ret = record__init_thread_numa_masks(rec, cpus);
3914		break;
3915	case THREAD_SPEC__USER:
3916		ret = record__init_thread_user_masks(rec, cpus);
3917		break;
3918	default:
3919		break;
3920	}
3921
3922	return ret;
3923}
3924
3925int cmd_record(int argc, const char **argv)
3926{
3927	int err;
3928	struct record *rec = &record;
3929	char errbuf[BUFSIZ];
3930
3931	setlocale(LC_ALL, "");
3932
3933#ifndef HAVE_BPF_SKEL
3934# define set_nobuild(s, l, m, c) set_option_nobuild(record_options, s, l, m, c)
3935	set_nobuild('\0', "off-cpu", "no BUILD_BPF_SKEL=1", true);
3936# undef set_nobuild
3937#endif
3938
3939	rec->opts.affinity = PERF_AFFINITY_SYS;
3940
3941	rec->evlist = evlist__new();
3942	if (rec->evlist == NULL)
3943		return -ENOMEM;
3944
3945	err = perf_config(perf_record_config, rec);
3946	if (err)
3947		return err;
3948
3949	argc = parse_options(argc, argv, record_options, record_usage,
3950			    PARSE_OPT_STOP_AT_NON_OPTION);
3951	if (quiet)
3952		perf_quiet_option();
3953
3954	err = symbol__validate_sym_arguments();
3955	if (err)
3956		return err;
3957
3958	perf_debuginfod_setup(&record.debuginfod);
3959
3960	/* Make system wide (-a) the default target. */
3961	if (!argc && target__none(&rec->opts.target))
3962		rec->opts.target.system_wide = true;
3963
3964	if (nr_cgroups && !rec->opts.target.system_wide) {
3965		usage_with_options_msg(record_usage, record_options,
3966			"cgroup monitoring only available in system-wide mode");
3967
3968	}
3969
3970	if (rec->buildid_mmap) {
3971		if (!perf_can_record_build_id()) {
3972			pr_err("Failed: no support to record build id in mmap events, update your kernel.\n");
3973			err = -EINVAL;
3974			goto out_opts;
3975		}
3976		pr_debug("Enabling build id in mmap2 events.\n");
3977		/* Enable mmap build id synthesizing. */
3978		symbol_conf.buildid_mmap2 = true;
3979		/* Enable perf_event_attr::build_id bit. */
3980		rec->opts.build_id = true;
3981		/* Disable build id cache. */
3982		rec->no_buildid = true;
3983	}
3984
3985	if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
3986		pr_err("Kernel has no cgroup sampling support.\n");
3987		err = -EINVAL;
3988		goto out_opts;
3989	}
3990
3991	if (rec->opts.kcore)
3992		rec->opts.text_poke = true;
3993
3994	if (rec->opts.kcore || record__threads_enabled(rec))
3995		rec->data.is_dir = true;
3996
3997	if (record__threads_enabled(rec)) {
3998		if (rec->opts.affinity != PERF_AFFINITY_SYS) {
3999			pr_err("--affinity option is mutually exclusive to parallel streaming mode.\n");
4000			goto out_opts;
4001		}
4002		if (record__aio_enabled(rec)) {
4003			pr_err("Asynchronous streaming mode (--aio) is mutually exclusive to parallel streaming mode.\n");
4004			goto out_opts;
4005		}
4006	}
4007
4008	if (rec->opts.comp_level != 0) {
4009		pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
4010		rec->no_buildid = true;
4011	}
4012
4013	if (rec->opts.record_switch_events &&
4014	    !perf_can_record_switch_events()) {
4015		ui__error("kernel does not support recording context switch events\n");
4016		parse_options_usage(record_usage, record_options, "switch-events", 0);
4017		err = -EINVAL;
4018		goto out_opts;
4019	}
4020
4021	if (switch_output_setup(rec)) {
4022		parse_options_usage(record_usage, record_options, "switch-output", 0);
4023		err = -EINVAL;
4024		goto out_opts;
4025	}
4026
4027	if (rec->switch_output.time) {
4028		signal(SIGALRM, alarm_sig_handler);
4029		alarm(rec->switch_output.time);
4030	}
4031
4032	if (rec->switch_output.num_files) {
4033		rec->switch_output.filenames = calloc(sizeof(char *),
4034						      rec->switch_output.num_files);
4035		if (!rec->switch_output.filenames) {
4036			err = -EINVAL;
4037			goto out_opts;
4038		}
4039	}
4040
4041	if (rec->timestamp_filename && record__threads_enabled(rec)) {
4042		rec->timestamp_filename = false;
4043		pr_warning("WARNING: --timestamp-filename option is not available in parallel streaming mode.\n");
4044	}
4045
4046	/*
4047	 * Allow aliases to facilitate the lookup of symbols for address
4048	 * filters. Refer to auxtrace_parse_filters().
4049	 */
4050	symbol_conf.allow_aliases = true;
4051
4052	symbol__init(NULL);
4053
4054	err = record__auxtrace_init(rec);
4055	if (err)
4056		goto out;
4057
4058	if (dry_run)
4059		goto out;
4060
4061	err = -ENOMEM;
4062
4063	if (rec->no_buildid_cache || rec->no_buildid) {
4064		disable_buildid_cache();
4065	} else if (rec->switch_output.enabled) {
4066		/*
4067		 * In 'perf record --switch-output', disable buildid
4068		 * generation by default to reduce data file switching
4069		 * overhead. Still generate buildid if they are required
4070		 * explicitly using
4071		 *
4072		 *  perf record --switch-output --no-no-buildid \
4073		 *              --no-no-buildid-cache
4074		 *
4075		 * Following code equals to:
4076		 *
4077		 * if ((rec->no_buildid || !rec->no_buildid_set) &&
4078		 *     (rec->no_buildid_cache || !rec->no_buildid_cache_set))
4079		 *         disable_buildid_cache();
4080		 */
4081		bool disable = true;
4082
4083		if (rec->no_buildid_set && !rec->no_buildid)
4084			disable = false;
4085		if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
4086			disable = false;
4087		if (disable) {
4088			rec->no_buildid = true;
4089			rec->no_buildid_cache = true;
4090			disable_buildid_cache();
4091		}
4092	}
4093
4094	if (record.opts.overwrite)
4095		record.opts.tail_synthesize = true;
4096
4097	if (rec->evlist->core.nr_entries == 0) {
4098		bool can_profile_kernel = perf_event_paranoid_check(1);
4099
4100		err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
4101		if (err)
4102			goto out;
4103	}
4104
4105	if (rec->opts.target.tid && !rec->opts.no_inherit_set)
4106		rec->opts.no_inherit = true;
4107
4108	err = target__validate(&rec->opts.target);
4109	if (err) {
4110		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4111		ui__warning("%s\n", errbuf);
4112	}
4113
4114	err = target__parse_uid(&rec->opts.target);
4115	if (err) {
4116		int saved_errno = errno;
4117
4118		target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
4119		ui__error("%s", errbuf);
4120
4121		err = -saved_errno;
4122		goto out;
4123	}
4124
4125	/* Enable ignoring missing threads when -u/-p option is defined. */
4126	rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
4127
4128	evlist__warn_user_requested_cpus(rec->evlist, rec->opts.target.cpu_list);
4129
4130	if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
4131		arch__add_leaf_frame_record_opts(&rec->opts);
4132
4133	err = -ENOMEM;
4134	if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0) {
4135		if (rec->opts.target.pid != NULL) {
4136			pr_err("Couldn't create thread/CPU maps: %s\n",
4137				errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
4138			goto out;
4139		}
4140		else
4141			usage_with_options(record_usage, record_options);
4142	}
4143
4144	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
4145	if (err)
4146		goto out;
4147
4148	/*
4149	 * We take all buildids when the file contains
4150	 * AUX area tracing data because we do not decode the
4151	 * trace because it would take too long.
4152	 */
4153	if (rec->opts.full_auxtrace)
4154		rec->buildid_all = true;
4155
4156	if (rec->opts.text_poke) {
4157		err = record__config_text_poke(rec->evlist);
4158		if (err) {
4159			pr_err("record__config_text_poke failed, error %d\n", err);
4160			goto out;
4161		}
4162	}
4163
4164	if (rec->off_cpu) {
4165		err = record__config_off_cpu(rec);
4166		if (err) {
4167			pr_err("record__config_off_cpu failed, error %d\n", err);
4168			goto out;
4169		}
4170	}
4171
4172	if (record_opts__config(&rec->opts)) {
4173		err = -EINVAL;
4174		goto out;
4175	}
4176
4177	err = record__init_thread_masks(rec);
4178	if (err) {
4179		pr_err("Failed to initialize parallel data streaming masks\n");
4180		goto out;
4181	}
4182
4183	if (rec->opts.nr_cblocks > nr_cblocks_max)
4184		rec->opts.nr_cblocks = nr_cblocks_max;
4185	pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
4186
4187	pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
4188	pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
4189
4190	if (rec->opts.comp_level > comp_level_max)
4191		rec->opts.comp_level = comp_level_max;
4192	pr_debug("comp level: %d\n", rec->opts.comp_level);
4193
4194	err = __cmd_record(&record, argc, argv);
4195out:
4196	evlist__delete(rec->evlist);
4197	symbol__exit();
4198	auxtrace_record__free(rec->itr);
4199out_opts:
4200	record__free_thread_masks(rec, rec->nr_threads);
4201	rec->nr_threads = 0;
4202	evlist__close_control(rec->opts.ctl_fd, rec->opts.ctl_fd_ack, &rec->opts.ctl_fd_close);
4203	return err;
4204}
4205
4206static void snapshot_sig_handler(int sig __maybe_unused)
4207{
4208	struct record *rec = &record;
4209
4210	hit_auxtrace_snapshot_trigger(rec);
4211
4212	if (switch_output_signal(rec))
4213		trigger_hit(&switch_output_trigger);
4214}
4215
4216static void alarm_sig_handler(int sig __maybe_unused)
4217{
4218	struct record *rec = &record;
4219
4220	if (switch_output_time(rec))
4221		trigger_hit(&switch_output_trigger);
4222}
4223