xref: /kernel/linux/linux-6.6/tools/perf/util/cs-etm.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright(C) 2015-2018 Linaro Limited.
4 *
5 * Author: Tor Jeremiassen <tor@ti.com>
6 * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
7 */
8
9#include <linux/kernel.h>
10#include <linux/bitfield.h>
11#include <linux/bitops.h>
12#include <linux/coresight-pmu.h>
13#include <linux/err.h>
14#include <linux/log2.h>
15#include <linux/types.h>
16#include <linux/zalloc.h>
17
18#include <stdlib.h>
19
20#include "auxtrace.h"
21#include "color.h"
22#include "cs-etm.h"
23#include "cs-etm-decoder/cs-etm-decoder.h"
24#include "debug.h"
25#include "dso.h"
26#include "evlist.h"
27#include "intlist.h"
28#include "machine.h"
29#include "map.h"
30#include "perf.h"
31#include "session.h"
32#include "map_symbol.h"
33#include "branch.h"
34#include "symbol.h"
35#include "tool.h"
36#include "thread.h"
37#include "thread-stack.h"
38#include "tsc.h"
39#include <tools/libc_compat.h>
40#include "util/synthetic-events.h"
41#include "util/util.h"
42
43struct cs_etm_auxtrace {
44	struct auxtrace auxtrace;
45	struct auxtrace_queues queues;
46	struct auxtrace_heap heap;
47	struct itrace_synth_opts synth_opts;
48	struct perf_session *session;
49	struct perf_tsc_conversion tc;
50
51	/*
52	 * Timeless has no timestamps in the trace so overlapping mmap lookups
53	 * are less accurate but produces smaller trace data. We use context IDs
54	 * in the trace instead of matching timestamps with fork records so
55	 * they're not really needed in the general case. Overlapping mmaps
56	 * happen in cases like between a fork and an exec.
57	 */
58	bool timeless_decoding;
59
60	/*
61	 * Per-thread ignores the trace channel ID and instead assumes that
62	 * everything in a buffer comes from the same process regardless of
63	 * which CPU it ran on. It also implies no context IDs so the TID is
64	 * taken from the auxtrace buffer.
65	 */
66	bool per_thread_decoding;
67	bool snapshot_mode;
68	bool data_queued;
69	bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
70
71	int num_cpu;
72	u64 latest_kernel_timestamp;
73	u32 auxtrace_type;
74	u64 branches_sample_type;
75	u64 branches_id;
76	u64 instructions_sample_type;
77	u64 instructions_sample_period;
78	u64 instructions_id;
79	u64 **metadata;
80	unsigned int pmu_type;
81	enum cs_etm_pid_fmt pid_fmt;
82};
83
84struct cs_etm_traceid_queue {
85	u8 trace_chan_id;
86	u64 period_instructions;
87	size_t last_branch_pos;
88	union perf_event *event_buf;
89	struct thread *thread;
90	struct thread *prev_packet_thread;
91	ocsd_ex_level prev_packet_el;
92	ocsd_ex_level el;
93	struct branch_stack *last_branch;
94	struct branch_stack *last_branch_rb;
95	struct cs_etm_packet *prev_packet;
96	struct cs_etm_packet *packet;
97	struct cs_etm_packet_queue packet_queue;
98};
99
100struct cs_etm_queue {
101	struct cs_etm_auxtrace *etm;
102	struct cs_etm_decoder *decoder;
103	struct auxtrace_buffer *buffer;
104	unsigned int queue_nr;
105	u8 pending_timestamp_chan_id;
106	u64 offset;
107	const unsigned char *buf;
108	size_t buf_len, buf_used;
109	/* Conversion between traceID and index in traceid_queues array */
110	struct intlist *traceid_queues_list;
111	struct cs_etm_traceid_queue **traceid_queues;
112};
113
114/* RB tree for quick conversion between traceID and metadata pointers */
115static struct intlist *traceid_list;
116
117static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
118static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
119					   pid_t tid);
120static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
121static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
122
123/* PTMs ETMIDR [11:8] set to b0011 */
124#define ETMIDR_PTM_VERSION 0x00000300
125
126/*
127 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
128 * work with.  One option is to modify to auxtrace_heap_XYZ() API or simply
129 * encode the etm queue number as the upper 16 bit and the channel as
130 * the lower 16 bit.
131 */
132#define TO_CS_QUEUE_NR(queue_nr, trace_chan_id)	\
133		      (queue_nr << 16 | trace_chan_id)
134#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
135#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
136
137static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
138{
139	etmidr &= ETMIDR_PTM_VERSION;
140
141	if (etmidr == ETMIDR_PTM_VERSION)
142		return CS_ETM_PROTO_PTM;
143
144	return CS_ETM_PROTO_ETMV3;
145}
146
147static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
148{
149	struct int_node *inode;
150	u64 *metadata;
151
152	inode = intlist__find(traceid_list, trace_chan_id);
153	if (!inode)
154		return -EINVAL;
155
156	metadata = inode->priv;
157	*magic = metadata[CS_ETM_MAGIC];
158	return 0;
159}
160
161int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
162{
163	struct int_node *inode;
164	u64 *metadata;
165
166	inode = intlist__find(traceid_list, trace_chan_id);
167	if (!inode)
168		return -EINVAL;
169
170	metadata = inode->priv;
171	*cpu = (int)metadata[CS_ETM_CPU];
172	return 0;
173}
174
175/*
176 * The returned PID format is presented as an enum:
177 *
178 *   CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced.
179 *   CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced.
180 *   CS_ETM_PIDFMT_NONE: No context IDs
181 *
182 * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2
183 * are enabled at the same time when the session runs on an EL2 kernel.
184 * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be
185 * recorded in the trace data, the tool will selectively use
186 * CONTEXTIDR_EL2 as PID.
187 *
188 * The result is cached in etm->pid_fmt so this function only needs to be called
189 * when processing the aux info.
190 */
191static enum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
192{
193	u64 val;
194
195	if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
196		val = metadata[CS_ETM_ETMCR];
197		/* CONTEXTIDR is traced */
198		if (val & BIT(ETM_OPT_CTXTID))
199			return CS_ETM_PIDFMT_CTXTID;
200	} else {
201		val = metadata[CS_ETMV4_TRCCONFIGR];
202		/* CONTEXTIDR_EL2 is traced */
203		if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT)))
204			return CS_ETM_PIDFMT_CTXTID2;
205		/* CONTEXTIDR_EL1 is traced */
206		else if (val & BIT(ETM4_CFG_BIT_CTXTID))
207			return CS_ETM_PIDFMT_CTXTID;
208	}
209
210	return CS_ETM_PIDFMT_NONE;
211}
212
213enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
214{
215	return etmq->etm->pid_fmt;
216}
217
218static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
219{
220	struct int_node *inode;
221
222	/* Get an RB node for this CPU */
223	inode = intlist__findnew(traceid_list, trace_chan_id);
224
225	/* Something went wrong, no need to continue */
226	if (!inode)
227		return -ENOMEM;
228
229	/*
230	 * The node for that CPU should not be taken.
231	 * Back out if that's the case.
232	 */
233	if (inode->priv)
234		return -EINVAL;
235
236	/* All good, associate the traceID with the metadata pointer */
237	inode->priv = cpu_metadata;
238
239	return 0;
240}
241
242static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
243{
244	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
245
246	switch (cs_etm_magic) {
247	case __perf_cs_etmv3_magic:
248		*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
249				      CORESIGHT_TRACE_ID_VAL_MASK);
250		break;
251	case __perf_cs_etmv4_magic:
252	case __perf_cs_ete_magic:
253		*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
254				      CORESIGHT_TRACE_ID_VAL_MASK);
255		break;
256	default:
257		return -EINVAL;
258	}
259	return 0;
260}
261
262/*
263 * update metadata trace ID from the value found in the AUX_HW_INFO packet.
264 * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
265 */
266static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
267{
268	u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
269
270	switch (cs_etm_magic) {
271	case __perf_cs_etmv3_magic:
272		 cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id;
273		break;
274	case __perf_cs_etmv4_magic:
275	case __perf_cs_ete_magic:
276		cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id;
277		break;
278
279	default:
280		return -EINVAL;
281	}
282	return 0;
283}
284
285/*
286 * Get a metadata for a specific cpu from an array.
287 *
288 */
289static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
290{
291	int i;
292	u64 *metadata = NULL;
293
294	for (i = 0; i < etm->num_cpu; i++) {
295		if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) {
296			metadata = etm->metadata[i];
297			break;
298		}
299	}
300
301	return metadata;
302}
303
304/*
305 * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event.
306 *
307 * The payload associates the Trace ID and the CPU.
308 * The routine is tolerant of seeing multiple packets with the same association,
309 * but a CPU / Trace ID association changing during a session is an error.
310 */
311static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
312					    union perf_event *event)
313{
314	struct cs_etm_auxtrace *etm;
315	struct perf_sample sample;
316	struct int_node *inode;
317	struct evsel *evsel;
318	u64 *cpu_data;
319	u64 hw_id;
320	int cpu, version, err;
321	u8 trace_chan_id, curr_chan_id;
322
323	/* extract and parse the HW ID */
324	hw_id = event->aux_output_hw_id.hw_id;
325	version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
326	trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
327
328	/* check that we can handle this version */
329	if (version > CS_AUX_HW_ID_CURR_VERSION)
330		return -EINVAL;
331
332	/* get access to the etm metadata */
333	etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace);
334	if (!etm || !etm->metadata)
335		return -EINVAL;
336
337	/* parse the sample to get the CPU */
338	evsel = evlist__event2evsel(session->evlist, event);
339	if (!evsel)
340		return -EINVAL;
341	err = evsel__parse_sample(evsel, event, &sample);
342	if (err)
343		return err;
344	cpu = sample.cpu;
345	if (cpu == -1) {
346		/* no CPU in the sample - possibly recorded with an old version of perf */
347		pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
348		return -EINVAL;
349	}
350
351	/* See if the ID is mapped to a CPU, and it matches the current CPU */
352	inode = intlist__find(traceid_list, trace_chan_id);
353	if (inode) {
354		cpu_data = inode->priv;
355		if ((int)cpu_data[CS_ETM_CPU] != cpu) {
356			pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
357			return -EINVAL;
358		}
359
360		/* check that the mapped ID matches */
361		err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
362		if (err)
363			return err;
364		if (curr_chan_id != trace_chan_id) {
365			pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
366			return -EINVAL;
367		}
368
369		/* mapped and matched - return OK */
370		return 0;
371	}
372
373	cpu_data = get_cpu_data(etm, cpu);
374	if (cpu_data == NULL)
375		return err;
376
377	/* not one we've seen before - lets map it */
378	err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
379	if (err)
380		return err;
381
382	/*
383	 * if we are picking up the association from the packet, need to plug
384	 * the correct trace ID into the metadata for setting up decoders later.
385	 */
386	err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
387	return err;
388}
389
390void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
391					      u8 trace_chan_id)
392{
393	/*
394	 * When a timestamp packet is encountered the backend code
395	 * is stopped so that the front end has time to process packets
396	 * that were accumulated in the traceID queue.  Since there can
397	 * be more than one channel per cs_etm_queue, we need to specify
398	 * what traceID queue needs servicing.
399	 */
400	etmq->pending_timestamp_chan_id = trace_chan_id;
401}
402
403static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
404				      u8 *trace_chan_id)
405{
406	struct cs_etm_packet_queue *packet_queue;
407
408	if (!etmq->pending_timestamp_chan_id)
409		return 0;
410
411	if (trace_chan_id)
412		*trace_chan_id = etmq->pending_timestamp_chan_id;
413
414	packet_queue = cs_etm__etmq_get_packet_queue(etmq,
415						     etmq->pending_timestamp_chan_id);
416	if (!packet_queue)
417		return 0;
418
419	/* Acknowledge pending status */
420	etmq->pending_timestamp_chan_id = 0;
421
422	/* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
423	return packet_queue->cs_timestamp;
424}
425
426static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
427{
428	int i;
429
430	queue->head = 0;
431	queue->tail = 0;
432	queue->packet_count = 0;
433	for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
434		queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
435		queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
436		queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
437		queue->packet_buffer[i].instr_count = 0;
438		queue->packet_buffer[i].last_instr_taken_branch = false;
439		queue->packet_buffer[i].last_instr_size = 0;
440		queue->packet_buffer[i].last_instr_type = 0;
441		queue->packet_buffer[i].last_instr_subtype = 0;
442		queue->packet_buffer[i].last_instr_cond = 0;
443		queue->packet_buffer[i].flags = 0;
444		queue->packet_buffer[i].exception_number = UINT32_MAX;
445		queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
446		queue->packet_buffer[i].cpu = INT_MIN;
447	}
448}
449
450static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
451{
452	int idx;
453	struct int_node *inode;
454	struct cs_etm_traceid_queue *tidq;
455	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
456
457	intlist__for_each_entry(inode, traceid_queues_list) {
458		idx = (int)(intptr_t)inode->priv;
459		tidq = etmq->traceid_queues[idx];
460		cs_etm__clear_packet_queue(&tidq->packet_queue);
461	}
462}
463
464static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
465				      struct cs_etm_traceid_queue *tidq,
466				      u8 trace_chan_id)
467{
468	int rc = -ENOMEM;
469	struct auxtrace_queue *queue;
470	struct cs_etm_auxtrace *etm = etmq->etm;
471
472	cs_etm__clear_packet_queue(&tidq->packet_queue);
473
474	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
475	tidq->trace_chan_id = trace_chan_id;
476	tidq->el = tidq->prev_packet_el = ocsd_EL_unknown;
477	tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1,
478					       queue->tid);
479	tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host);
480
481	tidq->packet = zalloc(sizeof(struct cs_etm_packet));
482	if (!tidq->packet)
483		goto out;
484
485	tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
486	if (!tidq->prev_packet)
487		goto out_free;
488
489	if (etm->synth_opts.last_branch) {
490		size_t sz = sizeof(struct branch_stack);
491
492		sz += etm->synth_opts.last_branch_sz *
493		      sizeof(struct branch_entry);
494		tidq->last_branch = zalloc(sz);
495		if (!tidq->last_branch)
496			goto out_free;
497		tidq->last_branch_rb = zalloc(sz);
498		if (!tidq->last_branch_rb)
499			goto out_free;
500	}
501
502	tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
503	if (!tidq->event_buf)
504		goto out_free;
505
506	return 0;
507
508out_free:
509	zfree(&tidq->last_branch_rb);
510	zfree(&tidq->last_branch);
511	zfree(&tidq->prev_packet);
512	zfree(&tidq->packet);
513out:
514	return rc;
515}
516
517static struct cs_etm_traceid_queue
518*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
519{
520	int idx;
521	struct int_node *inode;
522	struct intlist *traceid_queues_list;
523	struct cs_etm_traceid_queue *tidq, **traceid_queues;
524	struct cs_etm_auxtrace *etm = etmq->etm;
525
526	if (etm->per_thread_decoding)
527		trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
528
529	traceid_queues_list = etmq->traceid_queues_list;
530
531	/*
532	 * Check if the traceid_queue exist for this traceID by looking
533	 * in the queue list.
534	 */
535	inode = intlist__find(traceid_queues_list, trace_chan_id);
536	if (inode) {
537		idx = (int)(intptr_t)inode->priv;
538		return etmq->traceid_queues[idx];
539	}
540
541	/* We couldn't find a traceid_queue for this traceID, allocate one */
542	tidq = malloc(sizeof(*tidq));
543	if (!tidq)
544		return NULL;
545
546	memset(tidq, 0, sizeof(*tidq));
547
548	/* Get a valid index for the new traceid_queue */
549	idx = intlist__nr_entries(traceid_queues_list);
550	/* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
551	inode = intlist__findnew(traceid_queues_list, trace_chan_id);
552	if (!inode)
553		goto out_free;
554
555	/* Associate this traceID with this index */
556	inode->priv = (void *)(intptr_t)idx;
557
558	if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
559		goto out_free;
560
561	/* Grow the traceid_queues array by one unit */
562	traceid_queues = etmq->traceid_queues;
563	traceid_queues = reallocarray(traceid_queues,
564				      idx + 1,
565				      sizeof(*traceid_queues));
566
567	/*
568	 * On failure reallocarray() returns NULL and the original block of
569	 * memory is left untouched.
570	 */
571	if (!traceid_queues)
572		goto out_free;
573
574	traceid_queues[idx] = tidq;
575	etmq->traceid_queues = traceid_queues;
576
577	return etmq->traceid_queues[idx];
578
579out_free:
580	/*
581	 * Function intlist__remove() removes the inode from the list
582	 * and delete the memory associated to it.
583	 */
584	intlist__remove(traceid_queues_list, inode);
585	free(tidq);
586
587	return NULL;
588}
589
590struct cs_etm_packet_queue
591*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
592{
593	struct cs_etm_traceid_queue *tidq;
594
595	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
596	if (tidq)
597		return &tidq->packet_queue;
598
599	return NULL;
600}
601
602static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
603				struct cs_etm_traceid_queue *tidq)
604{
605	struct cs_etm_packet *tmp;
606
607	if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
608	    etm->synth_opts.instructions) {
609		/*
610		 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
611		 * the next incoming packet.
612		 *
613		 * Threads and exception levels are also tracked for both the
614		 * previous and current packets. This is because the previous
615		 * packet is used for the 'from' IP for branch samples, so the
616		 * thread at that time must also be assigned to that sample.
617		 * Across discontinuity packets the thread can change, so by
618		 * tracking the thread for the previous packet the branch sample
619		 * will have the correct info.
620		 */
621		tmp = tidq->packet;
622		tidq->packet = tidq->prev_packet;
623		tidq->prev_packet = tmp;
624		tidq->prev_packet_el = tidq->el;
625		thread__put(tidq->prev_packet_thread);
626		tidq->prev_packet_thread = thread__get(tidq->thread);
627	}
628}
629
630static void cs_etm__packet_dump(const char *pkt_string)
631{
632	const char *color = PERF_COLOR_BLUE;
633	int len = strlen(pkt_string);
634
635	if (len && (pkt_string[len-1] == '\n'))
636		color_fprintf(stdout, color, "	%s", pkt_string);
637	else
638		color_fprintf(stdout, color, "	%s\n", pkt_string);
639
640	fflush(stdout);
641}
642
643static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
644					  struct cs_etm_auxtrace *etm, int idx,
645					  u32 etmidr)
646{
647	u64 **metadata = etm->metadata;
648
649	t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
650	t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
651	t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
652}
653
654static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
655					  struct cs_etm_auxtrace *etm, int idx)
656{
657	u64 **metadata = etm->metadata;
658
659	t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
660	t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
661	t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
662	t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
663	t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
664	t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
665	t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
666}
667
668static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
669					  struct cs_etm_auxtrace *etm, int idx)
670{
671	u64 **metadata = etm->metadata;
672
673	t_params[idx].protocol = CS_ETM_PROTO_ETE;
674	t_params[idx].ete.reg_idr0 = metadata[idx][CS_ETE_TRCIDR0];
675	t_params[idx].ete.reg_idr1 = metadata[idx][CS_ETE_TRCIDR1];
676	t_params[idx].ete.reg_idr2 = metadata[idx][CS_ETE_TRCIDR2];
677	t_params[idx].ete.reg_idr8 = metadata[idx][CS_ETE_TRCIDR8];
678	t_params[idx].ete.reg_configr = metadata[idx][CS_ETE_TRCCONFIGR];
679	t_params[idx].ete.reg_traceidr = metadata[idx][CS_ETE_TRCTRACEIDR];
680	t_params[idx].ete.reg_devarch = metadata[idx][CS_ETE_TRCDEVARCH];
681}
682
683static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
684				     struct cs_etm_auxtrace *etm,
685				     int decoders)
686{
687	int i;
688	u32 etmidr;
689	u64 architecture;
690
691	for (i = 0; i < decoders; i++) {
692		architecture = etm->metadata[i][CS_ETM_MAGIC];
693
694		switch (architecture) {
695		case __perf_cs_etmv3_magic:
696			etmidr = etm->metadata[i][CS_ETM_ETMIDR];
697			cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
698			break;
699		case __perf_cs_etmv4_magic:
700			cs_etm__set_trace_param_etmv4(t_params, etm, i);
701			break;
702		case __perf_cs_ete_magic:
703			cs_etm__set_trace_param_ete(t_params, etm, i);
704			break;
705		default:
706			return -EINVAL;
707		}
708	}
709
710	return 0;
711}
712
713static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
714				       struct cs_etm_queue *etmq,
715				       enum cs_etm_decoder_operation mode,
716				       bool formatted)
717{
718	int ret = -EINVAL;
719
720	if (!(mode < CS_ETM_OPERATION_MAX))
721		goto out;
722
723	d_params->packet_printer = cs_etm__packet_dump;
724	d_params->operation = mode;
725	d_params->data = etmq;
726	d_params->formatted = formatted;
727	d_params->fsyncs = false;
728	d_params->hsyncs = false;
729	d_params->frame_aligned = true;
730
731	ret = 0;
732out:
733	return ret;
734}
735
736static void cs_etm__dump_event(struct cs_etm_queue *etmq,
737			       struct auxtrace_buffer *buffer)
738{
739	int ret;
740	const char *color = PERF_COLOR_BLUE;
741	size_t buffer_used = 0;
742
743	fprintf(stdout, "\n");
744	color_fprintf(stdout, color,
745		     ". ... CoreSight %s Trace data: size %#zx bytes\n",
746		     cs_etm_decoder__get_name(etmq->decoder), buffer->size);
747
748	do {
749		size_t consumed;
750
751		ret = cs_etm_decoder__process_data_block(
752				etmq->decoder, buffer->offset,
753				&((u8 *)buffer->data)[buffer_used],
754				buffer->size - buffer_used, &consumed);
755		if (ret)
756			break;
757
758		buffer_used += consumed;
759	} while (buffer_used < buffer->size);
760
761	cs_etm_decoder__reset(etmq->decoder);
762}
763
764static int cs_etm__flush_events(struct perf_session *session,
765				struct perf_tool *tool)
766{
767	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
768						   struct cs_etm_auxtrace,
769						   auxtrace);
770	if (dump_trace)
771		return 0;
772
773	if (!tool->ordered_events)
774		return -EINVAL;
775
776	if (etm->timeless_decoding) {
777		/*
778		 * Pass tid = -1 to process all queues. But likely they will have
779		 * already been processed on PERF_RECORD_EXIT anyway.
780		 */
781		return cs_etm__process_timeless_queues(etm, -1);
782	}
783
784	return cs_etm__process_timestamped_queues(etm);
785}
786
787static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
788{
789	int idx;
790	uintptr_t priv;
791	struct int_node *inode, *tmp;
792	struct cs_etm_traceid_queue *tidq;
793	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
794
795	intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
796		priv = (uintptr_t)inode->priv;
797		idx = priv;
798
799		/* Free this traceid_queue from the array */
800		tidq = etmq->traceid_queues[idx];
801		thread__zput(tidq->thread);
802		thread__zput(tidq->prev_packet_thread);
803		zfree(&tidq->event_buf);
804		zfree(&tidq->last_branch);
805		zfree(&tidq->last_branch_rb);
806		zfree(&tidq->prev_packet);
807		zfree(&tidq->packet);
808		zfree(&tidq);
809
810		/*
811		 * Function intlist__remove() removes the inode from the list
812		 * and delete the memory associated to it.
813		 */
814		intlist__remove(traceid_queues_list, inode);
815	}
816
817	/* Then the RB tree itself */
818	intlist__delete(traceid_queues_list);
819	etmq->traceid_queues_list = NULL;
820
821	/* finally free the traceid_queues array */
822	zfree(&etmq->traceid_queues);
823}
824
825static void cs_etm__free_queue(void *priv)
826{
827	struct cs_etm_queue *etmq = priv;
828
829	if (!etmq)
830		return;
831
832	cs_etm_decoder__free(etmq->decoder);
833	cs_etm__free_traceid_queues(etmq);
834	free(etmq);
835}
836
837static void cs_etm__free_events(struct perf_session *session)
838{
839	unsigned int i;
840	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
841						   struct cs_etm_auxtrace,
842						   auxtrace);
843	struct auxtrace_queues *queues = &aux->queues;
844
845	for (i = 0; i < queues->nr_queues; i++) {
846		cs_etm__free_queue(queues->queue_array[i].priv);
847		queues->queue_array[i].priv = NULL;
848	}
849
850	auxtrace_queues__free(queues);
851}
852
853static void cs_etm__free(struct perf_session *session)
854{
855	int i;
856	struct int_node *inode, *tmp;
857	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
858						   struct cs_etm_auxtrace,
859						   auxtrace);
860	cs_etm__free_events(session);
861	session->auxtrace = NULL;
862
863	/* First remove all traceID/metadata nodes for the RB tree */
864	intlist__for_each_entry_safe(inode, tmp, traceid_list)
865		intlist__remove(traceid_list, inode);
866	/* Then the RB tree itself */
867	intlist__delete(traceid_list);
868
869	for (i = 0; i < aux->num_cpu; i++)
870		zfree(&aux->metadata[i]);
871
872	zfree(&aux->metadata);
873	zfree(&aux);
874}
875
876static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
877				      struct evsel *evsel)
878{
879	struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
880						   struct cs_etm_auxtrace,
881						   auxtrace);
882
883	return evsel->core.attr.type == aux->pmu_type;
884}
885
886static struct machine *cs_etm__get_machine(struct cs_etm_queue *etmq,
887					   ocsd_ex_level el)
888{
889	enum cs_etm_pid_fmt pid_fmt = cs_etm__get_pid_fmt(etmq);
890
891	/*
892	 * For any virtualisation based on nVHE (e.g. pKVM), or host kernels
893	 * running at EL1 assume everything is the host.
894	 */
895	if (pid_fmt == CS_ETM_PIDFMT_CTXTID)
896		return &etmq->etm->session->machines.host;
897
898	/*
899	 * Not perfect, but otherwise assume anything in EL1 is the default
900	 * guest, and everything else is the host. Distinguishing between guest
901	 * and host userspaces isn't currently supported either. Neither is
902	 * multiple guest support. All this does is reduce the likeliness of
903	 * decode errors where we look into the host kernel maps when it should
904	 * have been the guest maps.
905	 */
906	switch (el) {
907	case ocsd_EL1:
908		return machines__find_guest(&etmq->etm->session->machines,
909					    DEFAULT_GUEST_KERNEL_ID);
910	case ocsd_EL3:
911	case ocsd_EL2:
912	case ocsd_EL0:
913	case ocsd_EL_unknown:
914	default:
915		return &etmq->etm->session->machines.host;
916	}
917}
918
919static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address,
920			   ocsd_ex_level el)
921{
922	struct machine *machine = cs_etm__get_machine(etmq, el);
923
924	if (address >= machine__kernel_start(machine)) {
925		if (machine__is_host(machine))
926			return PERF_RECORD_MISC_KERNEL;
927		else
928			return PERF_RECORD_MISC_GUEST_KERNEL;
929	} else {
930		if (machine__is_host(machine))
931			return PERF_RECORD_MISC_USER;
932		else {
933			/*
934			 * Can't really happen at the moment because
935			 * cs_etm__get_machine() will always return
936			 * machines.host for any non EL1 trace.
937			 */
938			return PERF_RECORD_MISC_GUEST_USER;
939		}
940	}
941}
942
943static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
944			      u64 address, size_t size, u8 *buffer,
945			      const ocsd_mem_space_acc_t mem_space)
946{
947	u8  cpumode;
948	u64 offset;
949	int len;
950	struct addr_location al;
951	struct dso *dso;
952	struct cs_etm_traceid_queue *tidq;
953	int ret = 0;
954
955	if (!etmq)
956		return 0;
957
958	addr_location__init(&al);
959	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
960	if (!tidq)
961		goto out;
962
963	/*
964	 * We've already tracked EL along side the PID in cs_etm__set_thread()
965	 * so double check that it matches what OpenCSD thinks as well. It
966	 * doesn't distinguish between EL0 and EL1 for this mem access callback
967	 * so we had to do the extra tracking. Skip validation if it's any of
968	 * the 'any' values.
969	 */
970	if (!(mem_space == OCSD_MEM_SPACE_ANY ||
971	      mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) {
972		if (mem_space & OCSD_MEM_SPACE_EL1N) {
973			/* Includes both non secure EL1 and EL0 */
974			assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
975		} else if (mem_space & OCSD_MEM_SPACE_EL2)
976			assert(tidq->el == ocsd_EL2);
977		else if (mem_space & OCSD_MEM_SPACE_EL3)
978			assert(tidq->el == ocsd_EL3);
979	}
980
981	cpumode = cs_etm__cpu_mode(etmq, address, tidq->el);
982
983	if (!thread__find_map(tidq->thread, cpumode, address, &al))
984		goto out;
985
986	dso = map__dso(al.map);
987	if (!dso)
988		goto out;
989
990	if (dso->data.status == DSO_DATA_STATUS_ERROR &&
991	    dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE))
992		goto out;
993
994	offset = map__map_ip(al.map, address);
995
996	map__load(al.map);
997
998	len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
999				    offset, buffer, size);
1000
1001	if (len <= 0) {
1002		ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n"
1003				 "              Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n");
1004		if (!dso->auxtrace_warned) {
1005			pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
1006				    address,
1007				    dso->long_name ? dso->long_name : "Unknown");
1008			dso->auxtrace_warned = true;
1009		}
1010		goto out;
1011	}
1012	ret = len;
1013out:
1014	addr_location__exit(&al);
1015	return ret;
1016}
1017
1018static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
1019						bool formatted)
1020{
1021	struct cs_etm_decoder_params d_params;
1022	struct cs_etm_trace_params  *t_params = NULL;
1023	struct cs_etm_queue *etmq;
1024	/*
1025	 * Each queue can only contain data from one CPU when unformatted, so only one decoder is
1026	 * needed.
1027	 */
1028	int decoders = formatted ? etm->num_cpu : 1;
1029
1030	etmq = zalloc(sizeof(*etmq));
1031	if (!etmq)
1032		return NULL;
1033
1034	etmq->traceid_queues_list = intlist__new(NULL);
1035	if (!etmq->traceid_queues_list)
1036		goto out_free;
1037
1038	/* Use metadata to fill in trace parameters for trace decoder */
1039	t_params = zalloc(sizeof(*t_params) * decoders);
1040
1041	if (!t_params)
1042		goto out_free;
1043
1044	if (cs_etm__init_trace_params(t_params, etm, decoders))
1045		goto out_free;
1046
1047	/* Set decoder parameters to decode trace packets */
1048	if (cs_etm__init_decoder_params(&d_params, etmq,
1049					dump_trace ? CS_ETM_OPERATION_PRINT :
1050						     CS_ETM_OPERATION_DECODE,
1051					formatted))
1052		goto out_free;
1053
1054	etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
1055					    t_params);
1056
1057	if (!etmq->decoder)
1058		goto out_free;
1059
1060	/*
1061	 * Register a function to handle all memory accesses required by
1062	 * the trace decoder library.
1063	 */
1064	if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
1065					      0x0L, ((u64) -1L),
1066					      cs_etm__mem_access))
1067		goto out_free_decoder;
1068
1069	zfree(&t_params);
1070	return etmq;
1071
1072out_free_decoder:
1073	cs_etm_decoder__free(etmq->decoder);
1074out_free:
1075	intlist__delete(etmq->traceid_queues_list);
1076	free(etmq);
1077
1078	return NULL;
1079}
1080
1081static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
1082			       struct auxtrace_queue *queue,
1083			       unsigned int queue_nr,
1084			       bool formatted)
1085{
1086	struct cs_etm_queue *etmq = queue->priv;
1087
1088	if (list_empty(&queue->head) || etmq)
1089		return 0;
1090
1091	etmq = cs_etm__alloc_queue(etm, formatted);
1092
1093	if (!etmq)
1094		return -ENOMEM;
1095
1096	queue->priv = etmq;
1097	etmq->etm = etm;
1098	etmq->queue_nr = queue_nr;
1099	etmq->offset = 0;
1100
1101	return 0;
1102}
1103
1104static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm,
1105					    struct cs_etm_queue *etmq,
1106					    unsigned int queue_nr)
1107{
1108	int ret = 0;
1109	unsigned int cs_queue_nr;
1110	u8 trace_chan_id;
1111	u64 cs_timestamp;
1112
1113	/*
1114	 * We are under a CPU-wide trace scenario.  As such we need to know
1115	 * when the code that generated the traces started to execute so that
1116	 * it can be correlated with execution on other CPUs.  So we get a
1117	 * handle on the beginning of traces and decode until we find a
1118	 * timestamp.  The timestamp is then added to the auxtrace min heap
1119	 * in order to know what nibble (of all the etmqs) to decode first.
1120	 */
1121	while (1) {
1122		/*
1123		 * Fetch an aux_buffer from this etmq.  Bail if no more
1124		 * blocks or an error has been encountered.
1125		 */
1126		ret = cs_etm__get_data_block(etmq);
1127		if (ret <= 0)
1128			goto out;
1129
1130		/*
1131		 * Run decoder on the trace block.  The decoder will stop when
1132		 * encountering a CS timestamp, a full packet queue or the end of
1133		 * trace for that block.
1134		 */
1135		ret = cs_etm__decode_data_block(etmq);
1136		if (ret)
1137			goto out;
1138
1139		/*
1140		 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
1141		 * the timestamp calculation for us.
1142		 */
1143		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
1144
1145		/* We found a timestamp, no need to continue. */
1146		if (cs_timestamp)
1147			break;
1148
1149		/*
1150		 * We didn't find a timestamp so empty all the traceid packet
1151		 * queues before looking for another timestamp packet, either
1152		 * in the current data block or a new one.  Packets that were
1153		 * just decoded are useless since no timestamp has been
1154		 * associated with them.  As such simply discard them.
1155		 */
1156		cs_etm__clear_all_packet_queues(etmq);
1157	}
1158
1159	/*
1160	 * We have a timestamp.  Add it to the min heap to reflect when
1161	 * instructions conveyed by the range packets of this traceID queue
1162	 * started to execute.  Once the same has been done for all the traceID
1163	 * queues of each etmq, redenring and decoding can start in
1164	 * chronological order.
1165	 *
1166	 * Note that packets decoded above are still in the traceID's packet
1167	 * queue and will be processed in cs_etm__process_timestamped_queues().
1168	 */
1169	cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
1170	ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
1171out:
1172	return ret;
1173}
1174
1175static inline
1176void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
1177				 struct cs_etm_traceid_queue *tidq)
1178{
1179	struct branch_stack *bs_src = tidq->last_branch_rb;
1180	struct branch_stack *bs_dst = tidq->last_branch;
1181	size_t nr = 0;
1182
1183	/*
1184	 * Set the number of records before early exit: ->nr is used to
1185	 * determine how many branches to copy from ->entries.
1186	 */
1187	bs_dst->nr = bs_src->nr;
1188
1189	/*
1190	 * Early exit when there is nothing to copy.
1191	 */
1192	if (!bs_src->nr)
1193		return;
1194
1195	/*
1196	 * As bs_src->entries is a circular buffer, we need to copy from it in
1197	 * two steps.  First, copy the branches from the most recently inserted
1198	 * branch ->last_branch_pos until the end of bs_src->entries buffer.
1199	 */
1200	nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
1201	memcpy(&bs_dst->entries[0],
1202	       &bs_src->entries[tidq->last_branch_pos],
1203	       sizeof(struct branch_entry) * nr);
1204
1205	/*
1206	 * If we wrapped around at least once, the branches from the beginning
1207	 * of the bs_src->entries buffer and until the ->last_branch_pos element
1208	 * are older valid branches: copy them over.  The total number of
1209	 * branches copied over will be equal to the number of branches asked by
1210	 * the user in last_branch_sz.
1211	 */
1212	if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
1213		memcpy(&bs_dst->entries[nr],
1214		       &bs_src->entries[0],
1215		       sizeof(struct branch_entry) * tidq->last_branch_pos);
1216	}
1217}
1218
1219static inline
1220void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
1221{
1222	tidq->last_branch_pos = 0;
1223	tidq->last_branch_rb->nr = 0;
1224}
1225
1226static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
1227					 u8 trace_chan_id, u64 addr)
1228{
1229	u8 instrBytes[2];
1230
1231	cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
1232			   instrBytes, 0);
1233	/*
1234	 * T32 instruction size is indicated by bits[15:11] of the first
1235	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
1236	 * denote a 32-bit instruction.
1237	 */
1238	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
1239}
1240
1241static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
1242{
1243	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1244	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1245		return 0;
1246
1247	return packet->start_addr;
1248}
1249
1250static inline
1251u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
1252{
1253	/* Returns 0 for the CS_ETM_DISCONTINUITY packet */
1254	if (packet->sample_type == CS_ETM_DISCONTINUITY)
1255		return 0;
1256
1257	return packet->end_addr - packet->last_instr_size;
1258}
1259
1260static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
1261				     u64 trace_chan_id,
1262				     const struct cs_etm_packet *packet,
1263				     u64 offset)
1264{
1265	if (packet->isa == CS_ETM_ISA_T32) {
1266		u64 addr = packet->start_addr;
1267
1268		while (offset) {
1269			addr += cs_etm__t32_instr_size(etmq,
1270						       trace_chan_id, addr);
1271			offset--;
1272		}
1273		return addr;
1274	}
1275
1276	/* Assume a 4 byte instruction size (A32/A64) */
1277	return packet->start_addr + offset * 4;
1278}
1279
1280static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
1281					  struct cs_etm_traceid_queue *tidq)
1282{
1283	struct branch_stack *bs = tidq->last_branch_rb;
1284	struct branch_entry *be;
1285
1286	/*
1287	 * The branches are recorded in a circular buffer in reverse
1288	 * chronological order: we start recording from the last element of the
1289	 * buffer down.  After writing the first element of the stack, move the
1290	 * insert position back to the end of the buffer.
1291	 */
1292	if (!tidq->last_branch_pos)
1293		tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
1294
1295	tidq->last_branch_pos -= 1;
1296
1297	be       = &bs->entries[tidq->last_branch_pos];
1298	be->from = cs_etm__last_executed_instr(tidq->prev_packet);
1299	be->to	 = cs_etm__first_executed_instr(tidq->packet);
1300	/* No support for mispredict */
1301	be->flags.mispred = 0;
1302	be->flags.predicted = 1;
1303
1304	/*
1305	 * Increment bs->nr until reaching the number of last branches asked by
1306	 * the user on the command line.
1307	 */
1308	if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
1309		bs->nr += 1;
1310}
1311
1312static int cs_etm__inject_event(union perf_event *event,
1313			       struct perf_sample *sample, u64 type)
1314{
1315	event->header.size = perf_event__sample_event_size(sample, type, 0);
1316	return perf_event__synthesize_sample(event, type, 0, sample);
1317}
1318
1319
1320static int
1321cs_etm__get_trace(struct cs_etm_queue *etmq)
1322{
1323	struct auxtrace_buffer *aux_buffer = etmq->buffer;
1324	struct auxtrace_buffer *old_buffer = aux_buffer;
1325	struct auxtrace_queue *queue;
1326
1327	queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
1328
1329	aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
1330
1331	/* If no more data, drop the previous auxtrace_buffer and return */
1332	if (!aux_buffer) {
1333		if (old_buffer)
1334			auxtrace_buffer__drop_data(old_buffer);
1335		etmq->buf_len = 0;
1336		return 0;
1337	}
1338
1339	etmq->buffer = aux_buffer;
1340
1341	/* If the aux_buffer doesn't have data associated, try to load it */
1342	if (!aux_buffer->data) {
1343		/* get the file desc associated with the perf data file */
1344		int fd = perf_data__fd(etmq->etm->session->data);
1345
1346		aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
1347		if (!aux_buffer->data)
1348			return -ENOMEM;
1349	}
1350
1351	/* If valid, drop the previous buffer */
1352	if (old_buffer)
1353		auxtrace_buffer__drop_data(old_buffer);
1354
1355	etmq->buf_used = 0;
1356	etmq->buf_len = aux_buffer->size;
1357	etmq->buf = aux_buffer->data;
1358
1359	return etmq->buf_len;
1360}
1361
1362static void cs_etm__set_thread(struct cs_etm_queue *etmq,
1363			       struct cs_etm_traceid_queue *tidq, pid_t tid,
1364			       ocsd_ex_level el)
1365{
1366	struct machine *machine = cs_etm__get_machine(etmq, el);
1367
1368	if (tid != -1) {
1369		thread__zput(tidq->thread);
1370		tidq->thread = machine__find_thread(machine, -1, tid);
1371	}
1372
1373	/* Couldn't find a known thread */
1374	if (!tidq->thread)
1375		tidq->thread = machine__idle_thread(machine);
1376
1377	tidq->el = el;
1378}
1379
1380int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
1381			    u8 trace_chan_id, ocsd_ex_level el)
1382{
1383	struct cs_etm_traceid_queue *tidq;
1384
1385	tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
1386	if (!tidq)
1387		return -EINVAL;
1388
1389	cs_etm__set_thread(etmq, tidq, tid, el);
1390	return 0;
1391}
1392
1393bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1394{
1395	return !!etmq->etm->timeless_decoding;
1396}
1397
1398static void cs_etm__copy_insn(struct cs_etm_queue *etmq,
1399			      u64 trace_chan_id,
1400			      const struct cs_etm_packet *packet,
1401			      struct perf_sample *sample)
1402{
1403	/*
1404	 * It's pointless to read instructions for the CS_ETM_DISCONTINUITY
1405	 * packet, so directly bail out with 'insn_len' = 0.
1406	 */
1407	if (packet->sample_type == CS_ETM_DISCONTINUITY) {
1408		sample->insn_len = 0;
1409		return;
1410	}
1411
1412	/*
1413	 * T32 instruction size might be 32-bit or 16-bit, decide by calling
1414	 * cs_etm__t32_instr_size().
1415	 */
1416	if (packet->isa == CS_ETM_ISA_T32)
1417		sample->insn_len = cs_etm__t32_instr_size(etmq, trace_chan_id,
1418							  sample->ip);
1419	/* Otherwise, A64 and A32 instruction size are always 32-bit. */
1420	else
1421		sample->insn_len = 4;
1422
1423	cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len,
1424			   (void *)sample->insn, 0);
1425}
1426
1427u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp)
1428{
1429	struct cs_etm_auxtrace *etm = etmq->etm;
1430
1431	if (etm->has_virtual_ts)
1432		return tsc_to_perf_time(cs_timestamp, &etm->tc);
1433	else
1434		return cs_timestamp;
1435}
1436
1437static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq,
1438					       struct cs_etm_traceid_queue *tidq)
1439{
1440	struct cs_etm_auxtrace *etm = etmq->etm;
1441	struct cs_etm_packet_queue *packet_queue = &tidq->packet_queue;
1442
1443	if (!etm->timeless_decoding && etm->has_virtual_ts)
1444		return packet_queue->cs_timestamp;
1445	else
1446		return etm->latest_kernel_timestamp;
1447}
1448
1449static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1450					    struct cs_etm_traceid_queue *tidq,
1451					    u64 addr, u64 period)
1452{
1453	int ret = 0;
1454	struct cs_etm_auxtrace *etm = etmq->etm;
1455	union perf_event *event = tidq->event_buf;
1456	struct perf_sample sample = {.ip = 0,};
1457
1458	event->sample.header.type = PERF_RECORD_SAMPLE;
1459	event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el);
1460	event->sample.header.size = sizeof(struct perf_event_header);
1461
1462	/* Set time field based on etm auxtrace config. */
1463	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1464
1465	sample.ip = addr;
1466	sample.pid = thread__pid(tidq->thread);
1467	sample.tid = thread__tid(tidq->thread);
1468	sample.id = etmq->etm->instructions_id;
1469	sample.stream_id = etmq->etm->instructions_id;
1470	sample.period = period;
1471	sample.cpu = tidq->packet->cpu;
1472	sample.flags = tidq->prev_packet->flags;
1473	sample.cpumode = event->sample.header.misc;
1474
1475	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
1476
1477	if (etm->synth_opts.last_branch)
1478		sample.branch_stack = tidq->last_branch;
1479
1480	if (etm->synth_opts.inject) {
1481		ret = cs_etm__inject_event(event, &sample,
1482					   etm->instructions_sample_type);
1483		if (ret)
1484			return ret;
1485	}
1486
1487	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1488
1489	if (ret)
1490		pr_err(
1491			"CS ETM Trace: failed to deliver instruction event, error %d\n",
1492			ret);
1493
1494	return ret;
1495}
1496
1497/*
1498 * The cs etm packet encodes an instruction range between a branch target
1499 * and the next taken branch. Generate sample accordingly.
1500 */
1501static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1502				       struct cs_etm_traceid_queue *tidq)
1503{
1504	int ret = 0;
1505	struct cs_etm_auxtrace *etm = etmq->etm;
1506	struct perf_sample sample = {.ip = 0,};
1507	union perf_event *event = tidq->event_buf;
1508	struct dummy_branch_stack {
1509		u64			nr;
1510		u64			hw_idx;
1511		struct branch_entry	entries;
1512	} dummy_bs;
1513	u64 ip;
1514
1515	ip = cs_etm__last_executed_instr(tidq->prev_packet);
1516
1517	event->sample.header.type = PERF_RECORD_SAMPLE;
1518	event->sample.header.misc = cs_etm__cpu_mode(etmq, ip,
1519						     tidq->prev_packet_el);
1520	event->sample.header.size = sizeof(struct perf_event_header);
1521
1522	/* Set time field based on etm auxtrace config. */
1523	sample.time = cs_etm__resolve_sample_time(etmq, tidq);
1524
1525	sample.ip = ip;
1526	sample.pid = thread__pid(tidq->prev_packet_thread);
1527	sample.tid = thread__tid(tidq->prev_packet_thread);
1528	sample.addr = cs_etm__first_executed_instr(tidq->packet);
1529	sample.id = etmq->etm->branches_id;
1530	sample.stream_id = etmq->etm->branches_id;
1531	sample.period = 1;
1532	sample.cpu = tidq->packet->cpu;
1533	sample.flags = tidq->prev_packet->flags;
1534	sample.cpumode = event->sample.header.misc;
1535
1536	cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->prev_packet,
1537			  &sample);
1538
1539	/*
1540	 * perf report cannot handle events without a branch stack
1541	 */
1542	if (etm->synth_opts.last_branch) {
1543		dummy_bs = (struct dummy_branch_stack){
1544			.nr = 1,
1545			.hw_idx = -1ULL,
1546			.entries = {
1547				.from = sample.ip,
1548				.to = sample.addr,
1549			},
1550		};
1551		sample.branch_stack = (struct branch_stack *)&dummy_bs;
1552	}
1553
1554	if (etm->synth_opts.inject) {
1555		ret = cs_etm__inject_event(event, &sample,
1556					   etm->branches_sample_type);
1557		if (ret)
1558			return ret;
1559	}
1560
1561	ret = perf_session__deliver_synth_event(etm->session, event, &sample);
1562
1563	if (ret)
1564		pr_err(
1565		"CS ETM Trace: failed to deliver instruction event, error %d\n",
1566		ret);
1567
1568	return ret;
1569}
1570
1571struct cs_etm_synth {
1572	struct perf_tool dummy_tool;
1573	struct perf_session *session;
1574};
1575
1576static int cs_etm__event_synth(struct perf_tool *tool,
1577			       union perf_event *event,
1578			       struct perf_sample *sample __maybe_unused,
1579			       struct machine *machine __maybe_unused)
1580{
1581	struct cs_etm_synth *cs_etm_synth =
1582		      container_of(tool, struct cs_etm_synth, dummy_tool);
1583
1584	return perf_session__deliver_synth_event(cs_etm_synth->session,
1585						 event, NULL);
1586}
1587
1588static int cs_etm__synth_event(struct perf_session *session,
1589			       struct perf_event_attr *attr, u64 id)
1590{
1591	struct cs_etm_synth cs_etm_synth;
1592
1593	memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
1594	cs_etm_synth.session = session;
1595
1596	return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
1597					   &id, cs_etm__event_synth);
1598}
1599
1600static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
1601				struct perf_session *session)
1602{
1603	struct evlist *evlist = session->evlist;
1604	struct evsel *evsel;
1605	struct perf_event_attr attr;
1606	bool found = false;
1607	u64 id;
1608	int err;
1609
1610	evlist__for_each_entry(evlist, evsel) {
1611		if (evsel->core.attr.type == etm->pmu_type) {
1612			found = true;
1613			break;
1614		}
1615	}
1616
1617	if (!found) {
1618		pr_debug("No selected events with CoreSight Trace data\n");
1619		return 0;
1620	}
1621
1622	memset(&attr, 0, sizeof(struct perf_event_attr));
1623	attr.size = sizeof(struct perf_event_attr);
1624	attr.type = PERF_TYPE_HARDWARE;
1625	attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
1626	attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
1627			    PERF_SAMPLE_PERIOD;
1628	if (etm->timeless_decoding)
1629		attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
1630	else
1631		attr.sample_type |= PERF_SAMPLE_TIME;
1632
1633	attr.exclude_user = evsel->core.attr.exclude_user;
1634	attr.exclude_kernel = evsel->core.attr.exclude_kernel;
1635	attr.exclude_hv = evsel->core.attr.exclude_hv;
1636	attr.exclude_host = evsel->core.attr.exclude_host;
1637	attr.exclude_guest = evsel->core.attr.exclude_guest;
1638	attr.sample_id_all = evsel->core.attr.sample_id_all;
1639	attr.read_format = evsel->core.attr.read_format;
1640
1641	/* create new id val to be a fixed offset from evsel id */
1642	id = evsel->core.id[0] + 1000000000;
1643
1644	if (!id)
1645		id = 1;
1646
1647	if (etm->synth_opts.branches) {
1648		attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
1649		attr.sample_period = 1;
1650		attr.sample_type |= PERF_SAMPLE_ADDR;
1651		err = cs_etm__synth_event(session, &attr, id);
1652		if (err)
1653			return err;
1654		etm->branches_sample_type = attr.sample_type;
1655		etm->branches_id = id;
1656		id += 1;
1657		attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
1658	}
1659
1660	if (etm->synth_opts.last_branch) {
1661		attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
1662		/*
1663		 * We don't use the hardware index, but the sample generation
1664		 * code uses the new format branch_stack with this field,
1665		 * so the event attributes must indicate that it's present.
1666		 */
1667		attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
1668	}
1669
1670	if (etm->synth_opts.instructions) {
1671		attr.config = PERF_COUNT_HW_INSTRUCTIONS;
1672		attr.sample_period = etm->synth_opts.period;
1673		etm->instructions_sample_period = attr.sample_period;
1674		err = cs_etm__synth_event(session, &attr, id);
1675		if (err)
1676			return err;
1677		etm->instructions_sample_type = attr.sample_type;
1678		etm->instructions_id = id;
1679		id += 1;
1680	}
1681
1682	return 0;
1683}
1684
1685static int cs_etm__sample(struct cs_etm_queue *etmq,
1686			  struct cs_etm_traceid_queue *tidq)
1687{
1688	struct cs_etm_auxtrace *etm = etmq->etm;
1689	int ret;
1690	u8 trace_chan_id = tidq->trace_chan_id;
1691	u64 instrs_prev;
1692
1693	/* Get instructions remainder from previous packet */
1694	instrs_prev = tidq->period_instructions;
1695
1696	tidq->period_instructions += tidq->packet->instr_count;
1697
1698	/*
1699	 * Record a branch when the last instruction in
1700	 * PREV_PACKET is a branch.
1701	 */
1702	if (etm->synth_opts.last_branch &&
1703	    tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1704	    tidq->prev_packet->last_instr_taken_branch)
1705		cs_etm__update_last_branch_rb(etmq, tidq);
1706
1707	if (etm->synth_opts.instructions &&
1708	    tidq->period_instructions >= etm->instructions_sample_period) {
1709		/*
1710		 * Emit instruction sample periodically
1711		 * TODO: allow period to be defined in cycles and clock time
1712		 */
1713
1714		/*
1715		 * Below diagram demonstrates the instruction samples
1716		 * generation flows:
1717		 *
1718		 *    Instrs     Instrs       Instrs       Instrs
1719		 *   Sample(n)  Sample(n+1)  Sample(n+2)  Sample(n+3)
1720		 *    |            |            |            |
1721		 *    V            V            V            V
1722		 *   --------------------------------------------------
1723		 *            ^                                  ^
1724		 *            |                                  |
1725		 *         Period                             Period
1726		 *    instructions(Pi)                   instructions(Pi')
1727		 *
1728		 *            |                                  |
1729		 *            \---------------- -----------------/
1730		 *                             V
1731		 *                 tidq->packet->instr_count
1732		 *
1733		 * Instrs Sample(n...) are the synthesised samples occurring
1734		 * every etm->instructions_sample_period instructions - as
1735		 * defined on the perf command line.  Sample(n) is being the
1736		 * last sample before the current etm packet, n+1 to n+3
1737		 * samples are generated from the current etm packet.
1738		 *
1739		 * tidq->packet->instr_count represents the number of
1740		 * instructions in the current etm packet.
1741		 *
1742		 * Period instructions (Pi) contains the number of
1743		 * instructions executed after the sample point(n) from the
1744		 * previous etm packet.  This will always be less than
1745		 * etm->instructions_sample_period.
1746		 *
1747		 * When generate new samples, it combines with two parts
1748		 * instructions, one is the tail of the old packet and another
1749		 * is the head of the new coming packet, to generate
1750		 * sample(n+1); sample(n+2) and sample(n+3) consume the
1751		 * instructions with sample period.  After sample(n+3), the rest
1752		 * instructions will be used by later packet and it is assigned
1753		 * to tidq->period_instructions for next round calculation.
1754		 */
1755
1756		/*
1757		 * Get the initial offset into the current packet instructions;
1758		 * entry conditions ensure that instrs_prev is less than
1759		 * etm->instructions_sample_period.
1760		 */
1761		u64 offset = etm->instructions_sample_period - instrs_prev;
1762		u64 addr;
1763
1764		/* Prepare last branches for instruction sample */
1765		if (etm->synth_opts.last_branch)
1766			cs_etm__copy_last_branch_rb(etmq, tidq);
1767
1768		while (tidq->period_instructions >=
1769				etm->instructions_sample_period) {
1770			/*
1771			 * Calculate the address of the sampled instruction (-1
1772			 * as sample is reported as though instruction has just
1773			 * been executed, but PC has not advanced to next
1774			 * instruction)
1775			 */
1776			addr = cs_etm__instr_addr(etmq, trace_chan_id,
1777						  tidq->packet, offset - 1);
1778			ret = cs_etm__synth_instruction_sample(
1779				etmq, tidq, addr,
1780				etm->instructions_sample_period);
1781			if (ret)
1782				return ret;
1783
1784			offset += etm->instructions_sample_period;
1785			tidq->period_instructions -=
1786				etm->instructions_sample_period;
1787		}
1788	}
1789
1790	if (etm->synth_opts.branches) {
1791		bool generate_sample = false;
1792
1793		/* Generate sample for tracing on packet */
1794		if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1795			generate_sample = true;
1796
1797		/* Generate sample for branch taken packet */
1798		if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1799		    tidq->prev_packet->last_instr_taken_branch)
1800			generate_sample = true;
1801
1802		if (generate_sample) {
1803			ret = cs_etm__synth_branch_sample(etmq, tidq);
1804			if (ret)
1805				return ret;
1806		}
1807	}
1808
1809	cs_etm__packet_swap(etm, tidq);
1810
1811	return 0;
1812}
1813
1814static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1815{
1816	/*
1817	 * When the exception packet is inserted, whether the last instruction
1818	 * in previous range packet is taken branch or not, we need to force
1819	 * to set 'prev_packet->last_instr_taken_branch' to true.  This ensures
1820	 * to generate branch sample for the instruction range before the
1821	 * exception is trapped to kernel or before the exception returning.
1822	 *
1823	 * The exception packet includes the dummy address values, so don't
1824	 * swap PACKET with PREV_PACKET.  This keeps PREV_PACKET to be useful
1825	 * for generating instruction and branch samples.
1826	 */
1827	if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1828		tidq->prev_packet->last_instr_taken_branch = true;
1829
1830	return 0;
1831}
1832
1833static int cs_etm__flush(struct cs_etm_queue *etmq,
1834			 struct cs_etm_traceid_queue *tidq)
1835{
1836	int err = 0;
1837	struct cs_etm_auxtrace *etm = etmq->etm;
1838
1839	/* Handle start tracing packet */
1840	if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1841		goto swap_packet;
1842
1843	if (etmq->etm->synth_opts.last_branch &&
1844	    etmq->etm->synth_opts.instructions &&
1845	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1846		u64 addr;
1847
1848		/* Prepare last branches for instruction sample */
1849		cs_etm__copy_last_branch_rb(etmq, tidq);
1850
1851		/*
1852		 * Generate a last branch event for the branches left in the
1853		 * circular buffer at the end of the trace.
1854		 *
1855		 * Use the address of the end of the last reported execution
1856		 * range
1857		 */
1858		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1859
1860		err = cs_etm__synth_instruction_sample(
1861			etmq, tidq, addr,
1862			tidq->period_instructions);
1863		if (err)
1864			return err;
1865
1866		tidq->period_instructions = 0;
1867
1868	}
1869
1870	if (etm->synth_opts.branches &&
1871	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1872		err = cs_etm__synth_branch_sample(etmq, tidq);
1873		if (err)
1874			return err;
1875	}
1876
1877swap_packet:
1878	cs_etm__packet_swap(etm, tidq);
1879
1880	/* Reset last branches after flush the trace */
1881	if (etm->synth_opts.last_branch)
1882		cs_etm__reset_last_branch_rb(tidq);
1883
1884	return err;
1885}
1886
1887static int cs_etm__end_block(struct cs_etm_queue *etmq,
1888			     struct cs_etm_traceid_queue *tidq)
1889{
1890	int err;
1891
1892	/*
1893	 * It has no new packet coming and 'etmq->packet' contains the stale
1894	 * packet which was set at the previous time with packets swapping;
1895	 * so skip to generate branch sample to avoid stale packet.
1896	 *
1897	 * For this case only flush branch stack and generate a last branch
1898	 * event for the branches left in the circular buffer at the end of
1899	 * the trace.
1900	 */
1901	if (etmq->etm->synth_opts.last_branch &&
1902	    etmq->etm->synth_opts.instructions &&
1903	    tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1904		u64 addr;
1905
1906		/* Prepare last branches for instruction sample */
1907		cs_etm__copy_last_branch_rb(etmq, tidq);
1908
1909		/*
1910		 * Use the address of the end of the last reported execution
1911		 * range.
1912		 */
1913		addr = cs_etm__last_executed_instr(tidq->prev_packet);
1914
1915		err = cs_etm__synth_instruction_sample(
1916			etmq, tidq, addr,
1917			tidq->period_instructions);
1918		if (err)
1919			return err;
1920
1921		tidq->period_instructions = 0;
1922	}
1923
1924	return 0;
1925}
1926/*
1927 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1928 *			   if need be.
1929 * Returns:	< 0	if error
1930 *		= 0	if no more auxtrace_buffer to read
1931 *		> 0	if the current buffer isn't empty yet
1932 */
1933static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1934{
1935	int ret;
1936
1937	if (!etmq->buf_len) {
1938		ret = cs_etm__get_trace(etmq);
1939		if (ret <= 0)
1940			return ret;
1941		/*
1942		 * We cannot assume consecutive blocks in the data file
1943		 * are contiguous, reset the decoder to force re-sync.
1944		 */
1945		ret = cs_etm_decoder__reset(etmq->decoder);
1946		if (ret)
1947			return ret;
1948	}
1949
1950	return etmq->buf_len;
1951}
1952
1953static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1954				 struct cs_etm_packet *packet,
1955				 u64 end_addr)
1956{
1957	/* Initialise to keep compiler happy */
1958	u16 instr16 = 0;
1959	u32 instr32 = 0;
1960	u64 addr;
1961
1962	switch (packet->isa) {
1963	case CS_ETM_ISA_T32:
1964		/*
1965		 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1966		 *
1967		 *  b'15         b'8
1968		 * +-----------------+--------+
1969		 * | 1 1 0 1 1 1 1 1 |  imm8  |
1970		 * +-----------------+--------+
1971		 *
1972		 * According to the specification, it only defines SVC for T32
1973		 * with 16 bits instruction and has no definition for 32bits;
1974		 * so below only read 2 bytes as instruction size for T32.
1975		 */
1976		addr = end_addr - 2;
1977		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
1978				   (u8 *)&instr16, 0);
1979		if ((instr16 & 0xFF00) == 0xDF00)
1980			return true;
1981
1982		break;
1983	case CS_ETM_ISA_A32:
1984		/*
1985		 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
1986		 *
1987		 *  b'31 b'28 b'27 b'24
1988		 * +---------+---------+-------------------------+
1989		 * |  !1111  | 1 1 1 1 |        imm24            |
1990		 * +---------+---------+-------------------------+
1991		 */
1992		addr = end_addr - 4;
1993		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
1994				   (u8 *)&instr32, 0);
1995		if ((instr32 & 0x0F000000) == 0x0F000000 &&
1996		    (instr32 & 0xF0000000) != 0xF0000000)
1997			return true;
1998
1999		break;
2000	case CS_ETM_ISA_A64:
2001		/*
2002		 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
2003		 *
2004		 *  b'31               b'21           b'4     b'0
2005		 * +-----------------------+---------+-----------+
2006		 * | 1 1 0 1 0 1 0 0 0 0 0 |  imm16  | 0 0 0 0 1 |
2007		 * +-----------------------+---------+-----------+
2008		 */
2009		addr = end_addr - 4;
2010		cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
2011				   (u8 *)&instr32, 0);
2012		if ((instr32 & 0xFFE0001F) == 0xd4000001)
2013			return true;
2014
2015		break;
2016	case CS_ETM_ISA_UNKNOWN:
2017	default:
2018		break;
2019	}
2020
2021	return false;
2022}
2023
2024static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
2025			       struct cs_etm_traceid_queue *tidq, u64 magic)
2026{
2027	u8 trace_chan_id = tidq->trace_chan_id;
2028	struct cs_etm_packet *packet = tidq->packet;
2029	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2030
2031	if (magic == __perf_cs_etmv3_magic)
2032		if (packet->exception_number == CS_ETMV3_EXC_SVC)
2033			return true;
2034
2035	/*
2036	 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
2037	 * HVC cases; need to check if it's SVC instruction based on
2038	 * packet address.
2039	 */
2040	if (magic == __perf_cs_etmv4_magic) {
2041		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2042		    cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2043					 prev_packet->end_addr))
2044			return true;
2045	}
2046
2047	return false;
2048}
2049
2050static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
2051				       u64 magic)
2052{
2053	struct cs_etm_packet *packet = tidq->packet;
2054
2055	if (magic == __perf_cs_etmv3_magic)
2056		if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
2057		    packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
2058		    packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
2059		    packet->exception_number == CS_ETMV3_EXC_IRQ ||
2060		    packet->exception_number == CS_ETMV3_EXC_FIQ)
2061			return true;
2062
2063	if (magic == __perf_cs_etmv4_magic)
2064		if (packet->exception_number == CS_ETMV4_EXC_RESET ||
2065		    packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
2066		    packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
2067		    packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
2068		    packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
2069		    packet->exception_number == CS_ETMV4_EXC_IRQ ||
2070		    packet->exception_number == CS_ETMV4_EXC_FIQ)
2071			return true;
2072
2073	return false;
2074}
2075
2076static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
2077				      struct cs_etm_traceid_queue *tidq,
2078				      u64 magic)
2079{
2080	u8 trace_chan_id = tidq->trace_chan_id;
2081	struct cs_etm_packet *packet = tidq->packet;
2082	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2083
2084	if (magic == __perf_cs_etmv3_magic)
2085		if (packet->exception_number == CS_ETMV3_EXC_SMC ||
2086		    packet->exception_number == CS_ETMV3_EXC_HYP ||
2087		    packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
2088		    packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
2089		    packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
2090		    packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
2091		    packet->exception_number == CS_ETMV3_EXC_GENERIC)
2092			return true;
2093
2094	if (magic == __perf_cs_etmv4_magic) {
2095		if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
2096		    packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
2097		    packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
2098		    packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
2099			return true;
2100
2101		/*
2102		 * For CS_ETMV4_EXC_CALL, except SVC other instructions
2103		 * (SMC, HVC) are taken as sync exceptions.
2104		 */
2105		if (packet->exception_number == CS_ETMV4_EXC_CALL &&
2106		    !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
2107					  prev_packet->end_addr))
2108			return true;
2109
2110		/*
2111		 * ETMv4 has 5 bits for exception number; if the numbers
2112		 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
2113		 * they are implementation defined exceptions.
2114		 *
2115		 * For this case, simply take it as sync exception.
2116		 */
2117		if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
2118		    packet->exception_number <= CS_ETMV4_EXC_END)
2119			return true;
2120	}
2121
2122	return false;
2123}
2124
2125static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
2126				    struct cs_etm_traceid_queue *tidq)
2127{
2128	struct cs_etm_packet *packet = tidq->packet;
2129	struct cs_etm_packet *prev_packet = tidq->prev_packet;
2130	u8 trace_chan_id = tidq->trace_chan_id;
2131	u64 magic;
2132	int ret;
2133
2134	switch (packet->sample_type) {
2135	case CS_ETM_RANGE:
2136		/*
2137		 * Immediate branch instruction without neither link nor
2138		 * return flag, it's normal branch instruction within
2139		 * the function.
2140		 */
2141		if (packet->last_instr_type == OCSD_INSTR_BR &&
2142		    packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
2143			packet->flags = PERF_IP_FLAG_BRANCH;
2144
2145			if (packet->last_instr_cond)
2146				packet->flags |= PERF_IP_FLAG_CONDITIONAL;
2147		}
2148
2149		/*
2150		 * Immediate branch instruction with link (e.g. BL), this is
2151		 * branch instruction for function call.
2152		 */
2153		if (packet->last_instr_type == OCSD_INSTR_BR &&
2154		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2155			packet->flags = PERF_IP_FLAG_BRANCH |
2156					PERF_IP_FLAG_CALL;
2157
2158		/*
2159		 * Indirect branch instruction with link (e.g. BLR), this is
2160		 * branch instruction for function call.
2161		 */
2162		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2163		    packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
2164			packet->flags = PERF_IP_FLAG_BRANCH |
2165					PERF_IP_FLAG_CALL;
2166
2167		/*
2168		 * Indirect branch instruction with subtype of
2169		 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
2170		 * function return for A32/T32.
2171		 */
2172		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2173		    packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
2174			packet->flags = PERF_IP_FLAG_BRANCH |
2175					PERF_IP_FLAG_RETURN;
2176
2177		/*
2178		 * Indirect branch instruction without link (e.g. BR), usually
2179		 * this is used for function return, especially for functions
2180		 * within dynamic link lib.
2181		 */
2182		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2183		    packet->last_instr_subtype == OCSD_S_INSTR_NONE)
2184			packet->flags = PERF_IP_FLAG_BRANCH |
2185					PERF_IP_FLAG_RETURN;
2186
2187		/* Return instruction for function return. */
2188		if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
2189		    packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
2190			packet->flags = PERF_IP_FLAG_BRANCH |
2191					PERF_IP_FLAG_RETURN;
2192
2193		/*
2194		 * Decoder might insert a discontinuity in the middle of
2195		 * instruction packets, fixup prev_packet with flag
2196		 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
2197		 */
2198		if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
2199			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2200					      PERF_IP_FLAG_TRACE_BEGIN;
2201
2202		/*
2203		 * If the previous packet is an exception return packet
2204		 * and the return address just follows SVC instruction,
2205		 * it needs to calibrate the previous packet sample flags
2206		 * as PERF_IP_FLAG_SYSCALLRET.
2207		 */
2208		if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
2209					   PERF_IP_FLAG_RETURN |
2210					   PERF_IP_FLAG_INTERRUPT) &&
2211		    cs_etm__is_svc_instr(etmq, trace_chan_id,
2212					 packet, packet->start_addr))
2213			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2214					     PERF_IP_FLAG_RETURN |
2215					     PERF_IP_FLAG_SYSCALLRET;
2216		break;
2217	case CS_ETM_DISCONTINUITY:
2218		/*
2219		 * The trace is discontinuous, if the previous packet is
2220		 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
2221		 * for previous packet.
2222		 */
2223		if (prev_packet->sample_type == CS_ETM_RANGE)
2224			prev_packet->flags |= PERF_IP_FLAG_BRANCH |
2225					      PERF_IP_FLAG_TRACE_END;
2226		break;
2227	case CS_ETM_EXCEPTION:
2228		ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
2229		if (ret)
2230			return ret;
2231
2232		/* The exception is for system call. */
2233		if (cs_etm__is_syscall(etmq, tidq, magic))
2234			packet->flags = PERF_IP_FLAG_BRANCH |
2235					PERF_IP_FLAG_CALL |
2236					PERF_IP_FLAG_SYSCALLRET;
2237		/*
2238		 * The exceptions are triggered by external signals from bus,
2239		 * interrupt controller, debug module, PE reset or halt.
2240		 */
2241		else if (cs_etm__is_async_exception(tidq, magic))
2242			packet->flags = PERF_IP_FLAG_BRANCH |
2243					PERF_IP_FLAG_CALL |
2244					PERF_IP_FLAG_ASYNC |
2245					PERF_IP_FLAG_INTERRUPT;
2246		/*
2247		 * Otherwise, exception is caused by trap, instruction &
2248		 * data fault, or alignment errors.
2249		 */
2250		else if (cs_etm__is_sync_exception(etmq, tidq, magic))
2251			packet->flags = PERF_IP_FLAG_BRANCH |
2252					PERF_IP_FLAG_CALL |
2253					PERF_IP_FLAG_INTERRUPT;
2254
2255		/*
2256		 * When the exception packet is inserted, since exception
2257		 * packet is not used standalone for generating samples
2258		 * and it's affiliation to the previous instruction range
2259		 * packet; so set previous range packet flags to tell perf
2260		 * it is an exception taken branch.
2261		 */
2262		if (prev_packet->sample_type == CS_ETM_RANGE)
2263			prev_packet->flags = packet->flags;
2264		break;
2265	case CS_ETM_EXCEPTION_RET:
2266		/*
2267		 * When the exception return packet is inserted, since
2268		 * exception return packet is not used standalone for
2269		 * generating samples and it's affiliation to the previous
2270		 * instruction range packet; so set previous range packet
2271		 * flags to tell perf it is an exception return branch.
2272		 *
2273		 * The exception return can be for either system call or
2274		 * other exception types; unfortunately the packet doesn't
2275		 * contain exception type related info so we cannot decide
2276		 * the exception type purely based on exception return packet.
2277		 * If we record the exception number from exception packet and
2278		 * reuse it for exception return packet, this is not reliable
2279		 * due the trace can be discontinuity or the interrupt can
2280		 * be nested, thus the recorded exception number cannot be
2281		 * used for exception return packet for these two cases.
2282		 *
2283		 * For exception return packet, we only need to distinguish the
2284		 * packet is for system call or for other types.  Thus the
2285		 * decision can be deferred when receive the next packet which
2286		 * contains the return address, based on the return address we
2287		 * can read out the previous instruction and check if it's a
2288		 * system call instruction and then calibrate the sample flag
2289		 * as needed.
2290		 */
2291		if (prev_packet->sample_type == CS_ETM_RANGE)
2292			prev_packet->flags = PERF_IP_FLAG_BRANCH |
2293					     PERF_IP_FLAG_RETURN |
2294					     PERF_IP_FLAG_INTERRUPT;
2295		break;
2296	case CS_ETM_EMPTY:
2297	default:
2298		break;
2299	}
2300
2301	return 0;
2302}
2303
2304static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
2305{
2306	int ret = 0;
2307	size_t processed = 0;
2308
2309	/*
2310	 * Packets are decoded and added to the decoder's packet queue
2311	 * until the decoder packet processing callback has requested that
2312	 * processing stops or there is nothing left in the buffer.  Normal
2313	 * operations that stop processing are a timestamp packet or a full
2314	 * decoder buffer queue.
2315	 */
2316	ret = cs_etm_decoder__process_data_block(etmq->decoder,
2317						 etmq->offset,
2318						 &etmq->buf[etmq->buf_used],
2319						 etmq->buf_len,
2320						 &processed);
2321	if (ret)
2322		goto out;
2323
2324	etmq->offset += processed;
2325	etmq->buf_used += processed;
2326	etmq->buf_len -= processed;
2327
2328out:
2329	return ret;
2330}
2331
2332static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
2333					 struct cs_etm_traceid_queue *tidq)
2334{
2335	int ret;
2336	struct cs_etm_packet_queue *packet_queue;
2337
2338	packet_queue = &tidq->packet_queue;
2339
2340	/* Process each packet in this chunk */
2341	while (1) {
2342		ret = cs_etm_decoder__get_packet(packet_queue,
2343						 tidq->packet);
2344		if (ret <= 0)
2345			/*
2346			 * Stop processing this chunk on
2347			 * end of data or error
2348			 */
2349			break;
2350
2351		/*
2352		 * Since packet addresses are swapped in packet
2353		 * handling within below switch() statements,
2354		 * thus setting sample flags must be called
2355		 * prior to switch() statement to use address
2356		 * information before packets swapping.
2357		 */
2358		ret = cs_etm__set_sample_flags(etmq, tidq);
2359		if (ret < 0)
2360			break;
2361
2362		switch (tidq->packet->sample_type) {
2363		case CS_ETM_RANGE:
2364			/*
2365			 * If the packet contains an instruction
2366			 * range, generate instruction sequence
2367			 * events.
2368			 */
2369			cs_etm__sample(etmq, tidq);
2370			break;
2371		case CS_ETM_EXCEPTION:
2372		case CS_ETM_EXCEPTION_RET:
2373			/*
2374			 * If the exception packet is coming,
2375			 * make sure the previous instruction
2376			 * range packet to be handled properly.
2377			 */
2378			cs_etm__exception(tidq);
2379			break;
2380		case CS_ETM_DISCONTINUITY:
2381			/*
2382			 * Discontinuity in trace, flush
2383			 * previous branch stack
2384			 */
2385			cs_etm__flush(etmq, tidq);
2386			break;
2387		case CS_ETM_EMPTY:
2388			/*
2389			 * Should not receive empty packet,
2390			 * report error.
2391			 */
2392			pr_err("CS ETM Trace: empty packet\n");
2393			return -EINVAL;
2394		default:
2395			break;
2396		}
2397	}
2398
2399	return ret;
2400}
2401
2402static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
2403{
2404	int idx;
2405	struct int_node *inode;
2406	struct cs_etm_traceid_queue *tidq;
2407	struct intlist *traceid_queues_list = etmq->traceid_queues_list;
2408
2409	intlist__for_each_entry(inode, traceid_queues_list) {
2410		idx = (int)(intptr_t)inode->priv;
2411		tidq = etmq->traceid_queues[idx];
2412
2413		/* Ignore return value */
2414		cs_etm__process_traceid_queue(etmq, tidq);
2415
2416		/*
2417		 * Generate an instruction sample with the remaining
2418		 * branchstack entries.
2419		 */
2420		cs_etm__flush(etmq, tidq);
2421	}
2422}
2423
2424static int cs_etm__run_per_thread_timeless_decoder(struct cs_etm_queue *etmq)
2425{
2426	int err = 0;
2427	struct cs_etm_traceid_queue *tidq;
2428
2429	tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
2430	if (!tidq)
2431		return -EINVAL;
2432
2433	/* Go through each buffer in the queue and decode them one by one */
2434	while (1) {
2435		err = cs_etm__get_data_block(etmq);
2436		if (err <= 0)
2437			return err;
2438
2439		/* Run trace decoder until buffer consumed or end of trace */
2440		do {
2441			err = cs_etm__decode_data_block(etmq);
2442			if (err)
2443				return err;
2444
2445			/*
2446			 * Process each packet in this chunk, nothing to do if
2447			 * an error occurs other than hoping the next one will
2448			 * be better.
2449			 */
2450			err = cs_etm__process_traceid_queue(etmq, tidq);
2451
2452		} while (etmq->buf_len);
2453
2454		if (err == 0)
2455			/* Flush any remaining branch stack entries */
2456			err = cs_etm__end_block(etmq, tidq);
2457	}
2458
2459	return err;
2460}
2461
2462static int cs_etm__run_per_cpu_timeless_decoder(struct cs_etm_queue *etmq)
2463{
2464	int idx, err = 0;
2465	struct cs_etm_traceid_queue *tidq;
2466	struct int_node *inode;
2467
2468	/* Go through each buffer in the queue and decode them one by one */
2469	while (1) {
2470		err = cs_etm__get_data_block(etmq);
2471		if (err <= 0)
2472			return err;
2473
2474		/* Run trace decoder until buffer consumed or end of trace */
2475		do {
2476			err = cs_etm__decode_data_block(etmq);
2477			if (err)
2478				return err;
2479
2480			/*
2481			 * cs_etm__run_per_thread_timeless_decoder() runs on a
2482			 * single traceID queue because each TID has a separate
2483			 * buffer. But here in per-cpu mode we need to iterate
2484			 * over each channel instead.
2485			 */
2486			intlist__for_each_entry(inode,
2487						etmq->traceid_queues_list) {
2488				idx = (int)(intptr_t)inode->priv;
2489				tidq = etmq->traceid_queues[idx];
2490				cs_etm__process_traceid_queue(etmq, tidq);
2491			}
2492		} while (etmq->buf_len);
2493
2494		intlist__for_each_entry(inode, etmq->traceid_queues_list) {
2495			idx = (int)(intptr_t)inode->priv;
2496			tidq = etmq->traceid_queues[idx];
2497			/* Flush any remaining branch stack entries */
2498			err = cs_etm__end_block(etmq, tidq);
2499			if (err)
2500				return err;
2501		}
2502	}
2503
2504	return err;
2505}
2506
2507static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
2508					   pid_t tid)
2509{
2510	unsigned int i;
2511	struct auxtrace_queues *queues = &etm->queues;
2512
2513	for (i = 0; i < queues->nr_queues; i++) {
2514		struct auxtrace_queue *queue = &etm->queues.queue_array[i];
2515		struct cs_etm_queue *etmq = queue->priv;
2516		struct cs_etm_traceid_queue *tidq;
2517
2518		if (!etmq)
2519			continue;
2520
2521		if (etm->per_thread_decoding) {
2522			tidq = cs_etm__etmq_get_traceid_queue(
2523				etmq, CS_ETM_PER_THREAD_TRACEID);
2524
2525			if (!tidq)
2526				continue;
2527
2528			if (tid == -1 || thread__tid(tidq->thread) == tid)
2529				cs_etm__run_per_thread_timeless_decoder(etmq);
2530		} else
2531			cs_etm__run_per_cpu_timeless_decoder(etmq);
2532	}
2533
2534	return 0;
2535}
2536
2537static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm)
2538{
2539	int ret = 0;
2540	unsigned int cs_queue_nr, queue_nr, i;
2541	u8 trace_chan_id;
2542	u64 cs_timestamp;
2543	struct auxtrace_queue *queue;
2544	struct cs_etm_queue *etmq;
2545	struct cs_etm_traceid_queue *tidq;
2546
2547	/*
2548	 * Pre-populate the heap with one entry from each queue so that we can
2549	 * start processing in time order across all queues.
2550	 */
2551	for (i = 0; i < etm->queues.nr_queues; i++) {
2552		etmq = etm->queues.queue_array[i].priv;
2553		if (!etmq)
2554			continue;
2555
2556		ret = cs_etm__queue_first_cs_timestamp(etm, etmq, i);
2557		if (ret)
2558			return ret;
2559	}
2560
2561	while (1) {
2562		if (!etm->heap.heap_cnt)
2563			goto out;
2564
2565		/* Take the entry at the top of the min heap */
2566		cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2567		queue_nr = TO_QUEUE_NR(cs_queue_nr);
2568		trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2569		queue = &etm->queues.queue_array[queue_nr];
2570		etmq = queue->priv;
2571
2572		/*
2573		 * Remove the top entry from the heap since we are about
2574		 * to process it.
2575		 */
2576		auxtrace_heap__pop(&etm->heap);
2577
2578		tidq  = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2579		if (!tidq) {
2580			/*
2581			 * No traceID queue has been allocated for this traceID,
2582			 * which means something somewhere went very wrong.  No
2583			 * other choice than simply exit.
2584			 */
2585			ret = -EINVAL;
2586			goto out;
2587		}
2588
2589		/*
2590		 * Packets associated with this timestamp are already in
2591		 * the etmq's traceID queue, so process them.
2592		 */
2593		ret = cs_etm__process_traceid_queue(etmq, tidq);
2594		if (ret < 0)
2595			goto out;
2596
2597		/*
2598		 * Packets for this timestamp have been processed, time to
2599		 * move on to the next timestamp, fetching a new auxtrace_buffer
2600		 * if need be.
2601		 */
2602refetch:
2603		ret = cs_etm__get_data_block(etmq);
2604		if (ret < 0)
2605			goto out;
2606
2607		/*
2608		 * No more auxtrace_buffers to process in this etmq, simply
2609		 * move on to another entry in the auxtrace_heap.
2610		 */
2611		if (!ret)
2612			continue;
2613
2614		ret = cs_etm__decode_data_block(etmq);
2615		if (ret)
2616			goto out;
2617
2618		cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2619
2620		if (!cs_timestamp) {
2621			/*
2622			 * Function cs_etm__decode_data_block() returns when
2623			 * there is no more traces to decode in the current
2624			 * auxtrace_buffer OR when a timestamp has been
2625			 * encountered on any of the traceID queues.  Since we
2626			 * did not get a timestamp, there is no more traces to
2627			 * process in this auxtrace_buffer.  As such empty and
2628			 * flush all traceID queues.
2629			 */
2630			cs_etm__clear_all_traceid_queues(etmq);
2631
2632			/* Fetch another auxtrace_buffer for this etmq */
2633			goto refetch;
2634		}
2635
2636		/*
2637		 * Add to the min heap the timestamp for packets that have
2638		 * just been decoded.  They will be processed and synthesized
2639		 * during the next call to cs_etm__process_traceid_queue() for
2640		 * this queue/traceID.
2641		 */
2642		cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2643		ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
2644	}
2645
2646out:
2647	return ret;
2648}
2649
2650static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2651					union perf_event *event)
2652{
2653	struct thread *th;
2654
2655	if (etm->timeless_decoding)
2656		return 0;
2657
2658	/*
2659	 * Add the tid/pid to the log so that we can get a match when we get a
2660	 * contextID from the decoder. Only track for the host: only kernel
2661	 * trace is supported for guests which wouldn't need pids so this should
2662	 * be fine.
2663	 */
2664	th = machine__findnew_thread(&etm->session->machines.host,
2665				     event->itrace_start.pid,
2666				     event->itrace_start.tid);
2667	if (!th)
2668		return -ENOMEM;
2669
2670	thread__put(th);
2671
2672	return 0;
2673}
2674
2675static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2676					   union perf_event *event)
2677{
2678	struct thread *th;
2679	bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2680
2681	/*
2682	 * Context switch in per-thread mode are irrelevant since perf
2683	 * will start/stop tracing as the process is scheduled.
2684	 */
2685	if (etm->timeless_decoding)
2686		return 0;
2687
2688	/*
2689	 * SWITCH_IN events carry the next process to be switched out while
2690	 * SWITCH_OUT events carry the process to be switched in.  As such
2691	 * we don't care about IN events.
2692	 */
2693	if (!out)
2694		return 0;
2695
2696	/*
2697	 * Add the tid/pid to the log so that we can get a match when we get a
2698	 * contextID from the decoder. Only track for the host: only kernel
2699	 * trace is supported for guests which wouldn't need pids so this should
2700	 * be fine.
2701	 */
2702	th = machine__findnew_thread(&etm->session->machines.host,
2703				     event->context_switch.next_prev_pid,
2704				     event->context_switch.next_prev_tid);
2705	if (!th)
2706		return -ENOMEM;
2707
2708	thread__put(th);
2709
2710	return 0;
2711}
2712
2713static int cs_etm__process_event(struct perf_session *session,
2714				 union perf_event *event,
2715				 struct perf_sample *sample,
2716				 struct perf_tool *tool)
2717{
2718	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2719						   struct cs_etm_auxtrace,
2720						   auxtrace);
2721
2722	if (dump_trace)
2723		return 0;
2724
2725	if (!tool->ordered_events) {
2726		pr_err("CoreSight ETM Trace requires ordered events\n");
2727		return -EINVAL;
2728	}
2729
2730	switch (event->header.type) {
2731	case PERF_RECORD_EXIT:
2732		/*
2733		 * Don't need to wait for cs_etm__flush_events() in per-thread mode to
2734		 * start the decode because we know there will be no more trace from
2735		 * this thread. All this does is emit samples earlier than waiting for
2736		 * the flush in other modes, but with timestamps it makes sense to wait
2737		 * for flush so that events from different threads are interleaved
2738		 * properly.
2739		 */
2740		if (etm->per_thread_decoding && etm->timeless_decoding)
2741			return cs_etm__process_timeless_queues(etm,
2742							       event->fork.tid);
2743		break;
2744
2745	case PERF_RECORD_ITRACE_START:
2746		return cs_etm__process_itrace_start(etm, event);
2747
2748	case PERF_RECORD_SWITCH_CPU_WIDE:
2749		return cs_etm__process_switch_cpu_wide(etm, event);
2750
2751	case PERF_RECORD_AUX:
2752		/*
2753		 * Record the latest kernel timestamp available in the header
2754		 * for samples so that synthesised samples occur from this point
2755		 * onwards.
2756		 */
2757		if (sample->time && (sample->time != (u64)-1))
2758			etm->latest_kernel_timestamp = sample->time;
2759		break;
2760
2761	default:
2762		break;
2763	}
2764
2765	return 0;
2766}
2767
2768static void dump_queued_data(struct cs_etm_auxtrace *etm,
2769			     struct perf_record_auxtrace *event)
2770{
2771	struct auxtrace_buffer *buf;
2772	unsigned int i;
2773	/*
2774	 * Find all buffers with same reference in the queues and dump them.
2775	 * This is because the queues can contain multiple entries of the same
2776	 * buffer that were split on aux records.
2777	 */
2778	for (i = 0; i < etm->queues.nr_queues; ++i)
2779		list_for_each_entry(buf, &etm->queues.queue_array[i].head, list)
2780			if (buf->reference == event->reference)
2781				cs_etm__dump_event(etm->queues.queue_array[i].priv, buf);
2782}
2783
2784static int cs_etm__process_auxtrace_event(struct perf_session *session,
2785					  union perf_event *event,
2786					  struct perf_tool *tool __maybe_unused)
2787{
2788	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2789						   struct cs_etm_auxtrace,
2790						   auxtrace);
2791	if (!etm->data_queued) {
2792		struct auxtrace_buffer *buffer;
2793		off_t  data_offset;
2794		int fd = perf_data__fd(session->data);
2795		bool is_pipe = perf_data__is_pipe(session->data);
2796		int err;
2797		int idx = event->auxtrace.idx;
2798
2799		if (is_pipe)
2800			data_offset = 0;
2801		else {
2802			data_offset = lseek(fd, 0, SEEK_CUR);
2803			if (data_offset == -1)
2804				return -errno;
2805		}
2806
2807		err = auxtrace_queues__add_event(&etm->queues, session,
2808						 event, data_offset, &buffer);
2809		if (err)
2810			return err;
2811
2812		/*
2813		 * Knowing if the trace is formatted or not requires a lookup of
2814		 * the aux record so only works in non-piped mode where data is
2815		 * queued in cs_etm__queue_aux_records(). Always assume
2816		 * formatted in piped mode (true).
2817		 */
2818		err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
2819					  idx, true);
2820		if (err)
2821			return err;
2822
2823		if (dump_trace)
2824			if (auxtrace_buffer__get_data(buffer, fd)) {
2825				cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
2826				auxtrace_buffer__put_data(buffer);
2827			}
2828	} else if (dump_trace)
2829		dump_queued_data(etm, &event->auxtrace);
2830
2831	return 0;
2832}
2833
2834static int cs_etm__setup_timeless_decoding(struct cs_etm_auxtrace *etm)
2835{
2836	struct evsel *evsel;
2837	struct evlist *evlist = etm->session->evlist;
2838
2839	/* Override timeless mode with user input from --itrace=Z */
2840	if (etm->synth_opts.timeless_decoding) {
2841		etm->timeless_decoding = true;
2842		return 0;
2843	}
2844
2845	/*
2846	 * Find the cs_etm evsel and look at what its timestamp setting was
2847	 */
2848	evlist__for_each_entry(evlist, evsel)
2849		if (cs_etm__evsel_is_auxtrace(etm->session, evsel)) {
2850			etm->timeless_decoding =
2851				!(evsel->core.attr.config & BIT(ETM_OPT_TS));
2852			return 0;
2853		}
2854
2855	pr_err("CS ETM: Couldn't find ETM evsel\n");
2856	return -EINVAL;
2857}
2858
2859/*
2860 * Read a single cpu parameter block from the auxtrace_info priv block.
2861 *
2862 * For version 1 there is a per cpu nr_params entry. If we are handling
2863 * version 1 file, then there may be less, the same, or more params
2864 * indicated by this value than the compile time number we understand.
2865 *
2866 * For a version 0 info block, there are a fixed number, and we need to
2867 * fill out the nr_param value in the metadata we create.
2868 */
2869static u64 *cs_etm__create_meta_blk(u64 *buff_in, int *buff_in_offset,
2870				    int out_blk_size, int nr_params_v0)
2871{
2872	u64 *metadata = NULL;
2873	int hdr_version;
2874	int nr_in_params, nr_out_params, nr_cmn_params;
2875	int i, k;
2876
2877	metadata = zalloc(sizeof(*metadata) * out_blk_size);
2878	if (!metadata)
2879		return NULL;
2880
2881	/* read block current index & version */
2882	i = *buff_in_offset;
2883	hdr_version = buff_in[CS_HEADER_VERSION];
2884
2885	if (!hdr_version) {
2886	/* read version 0 info block into a version 1 metadata block  */
2887		nr_in_params = nr_params_v0;
2888		metadata[CS_ETM_MAGIC] = buff_in[i + CS_ETM_MAGIC];
2889		metadata[CS_ETM_CPU] = buff_in[i + CS_ETM_CPU];
2890		metadata[CS_ETM_NR_TRC_PARAMS] = nr_in_params;
2891		/* remaining block params at offset +1 from source */
2892		for (k = CS_ETM_COMMON_BLK_MAX_V1 - 1; k < nr_in_params; k++)
2893			metadata[k + 1] = buff_in[i + k];
2894		/* version 0 has 2 common params */
2895		nr_cmn_params = 2;
2896	} else {
2897	/* read version 1 info block - input and output nr_params may differ */
2898		/* version 1 has 3 common params */
2899		nr_cmn_params = 3;
2900		nr_in_params = buff_in[i + CS_ETM_NR_TRC_PARAMS];
2901
2902		/* if input has more params than output - skip excess */
2903		nr_out_params = nr_in_params + nr_cmn_params;
2904		if (nr_out_params > out_blk_size)
2905			nr_out_params = out_blk_size;
2906
2907		for (k = CS_ETM_MAGIC; k < nr_out_params; k++)
2908			metadata[k] = buff_in[i + k];
2909
2910		/* record the actual nr params we copied */
2911		metadata[CS_ETM_NR_TRC_PARAMS] = nr_out_params - nr_cmn_params;
2912	}
2913
2914	/* adjust in offset by number of in params used */
2915	i += nr_in_params + nr_cmn_params;
2916	*buff_in_offset = i;
2917	return metadata;
2918}
2919
2920/**
2921 * Puts a fragment of an auxtrace buffer into the auxtrace queues based
2922 * on the bounds of aux_event, if it matches with the buffer that's at
2923 * file_offset.
2924 *
2925 * Normally, whole auxtrace buffers would be added to the queue. But we
2926 * want to reset the decoder for every PERF_RECORD_AUX event, and the decoder
2927 * is reset across each buffer, so splitting the buffers up in advance has
2928 * the same effect.
2929 */
2930static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_offset, size_t sz,
2931				      struct perf_record_aux *aux_event, struct perf_sample *sample)
2932{
2933	int err;
2934	char buf[PERF_SAMPLE_MAX_SIZE];
2935	union perf_event *auxtrace_event_union;
2936	struct perf_record_auxtrace *auxtrace_event;
2937	union perf_event auxtrace_fragment;
2938	__u64 aux_offset, aux_size;
2939	__u32 idx;
2940	bool formatted;
2941
2942	struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
2943						   struct cs_etm_auxtrace,
2944						   auxtrace);
2945
2946	/*
2947	 * There should be a PERF_RECORD_AUXTRACE event at the file_offset that we got
2948	 * from looping through the auxtrace index.
2949	 */
2950	err = perf_session__peek_event(session, file_offset, buf,
2951				       PERF_SAMPLE_MAX_SIZE, &auxtrace_event_union, NULL);
2952	if (err)
2953		return err;
2954	auxtrace_event = &auxtrace_event_union->auxtrace;
2955	if (auxtrace_event->header.type != PERF_RECORD_AUXTRACE)
2956		return -EINVAL;
2957
2958	if (auxtrace_event->header.size < sizeof(struct perf_record_auxtrace) ||
2959		auxtrace_event->header.size != sz) {
2960		return -EINVAL;
2961	}
2962
2963	/*
2964	 * In per-thread mode, auxtrace CPU is set to -1, but TID will be set instead. See
2965	 * auxtrace_mmap_params__set_idx(). However, the sample AUX event will contain a
2966	 * CPU as we set this always for the AUX_OUTPUT_HW_ID event.
2967	 * So now compare only TIDs if auxtrace CPU is -1, and CPUs if auxtrace CPU is not -1.
2968	 * Return 'not found' if mismatch.
2969	 */
2970	if (auxtrace_event->cpu == (__u32) -1) {
2971		etm->per_thread_decoding = true;
2972		if (auxtrace_event->tid != sample->tid)
2973			return 1;
2974	} else if (auxtrace_event->cpu != sample->cpu) {
2975		if (etm->per_thread_decoding) {
2976			/*
2977			 * Found a per-cpu buffer after a per-thread one was
2978			 * already found
2979			 */
2980			pr_err("CS ETM: Inconsistent per-thread/per-cpu mode.\n");
2981			return -EINVAL;
2982		}
2983		return 1;
2984	}
2985
2986	if (aux_event->flags & PERF_AUX_FLAG_OVERWRITE) {
2987		/*
2988		 * Clamp size in snapshot mode. The buffer size is clamped in
2989		 * __auxtrace_mmap__read() for snapshots, so the aux record size doesn't reflect
2990		 * the buffer size.
2991		 */
2992		aux_size = min(aux_event->aux_size, auxtrace_event->size);
2993
2994		/*
2995		 * In this mode, the head also points to the end of the buffer so aux_offset
2996		 * needs to have the size subtracted so it points to the beginning as in normal mode
2997		 */
2998		aux_offset = aux_event->aux_offset - aux_size;
2999	} else {
3000		aux_size = aux_event->aux_size;
3001		aux_offset = aux_event->aux_offset;
3002	}
3003
3004	if (aux_offset >= auxtrace_event->offset &&
3005	    aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
3006		/*
3007		 * If this AUX event was inside this buffer somewhere, create a new auxtrace event
3008		 * based on the sizes of the aux event, and queue that fragment.
3009		 */
3010		auxtrace_fragment.auxtrace = *auxtrace_event;
3011		auxtrace_fragment.auxtrace.size = aux_size;
3012		auxtrace_fragment.auxtrace.offset = aux_offset;
3013		file_offset += aux_offset - auxtrace_event->offset + auxtrace_event->header.size;
3014
3015		pr_debug3("CS ETM: Queue buffer size: %#"PRI_lx64" offset: %#"PRI_lx64
3016			  " tid: %d cpu: %d\n", aux_size, aux_offset, sample->tid, sample->cpu);
3017		err = auxtrace_queues__add_event(&etm->queues, session, &auxtrace_fragment,
3018						 file_offset, NULL);
3019		if (err)
3020			return err;
3021
3022		idx = auxtrace_event->idx;
3023		formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
3024		return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
3025					   idx, formatted);
3026	}
3027
3028	/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
3029	return 1;
3030}
3031
3032static int cs_etm__process_aux_hw_id_cb(struct perf_session *session, union perf_event *event,
3033					u64 offset __maybe_unused, void *data __maybe_unused)
3034{
3035	/* look to handle PERF_RECORD_AUX_OUTPUT_HW_ID early to ensure decoders can be set up */
3036	if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID) {
3037		(*(int *)data)++; /* increment found count */
3038		return cs_etm__process_aux_output_hw_id(session, event);
3039	}
3040	return 0;
3041}
3042
3043static int cs_etm__queue_aux_records_cb(struct perf_session *session, union perf_event *event,
3044					u64 offset __maybe_unused, void *data __maybe_unused)
3045{
3046	struct perf_sample sample;
3047	int ret;
3048	struct auxtrace_index_entry *ent;
3049	struct auxtrace_index *auxtrace_index;
3050	struct evsel *evsel;
3051	size_t i;
3052
3053	/* Don't care about any other events, we're only queuing buffers for AUX events */
3054	if (event->header.type != PERF_RECORD_AUX)
3055		return 0;
3056
3057	if (event->header.size < sizeof(struct perf_record_aux))
3058		return -EINVAL;
3059
3060	/* Truncated Aux records can have 0 size and shouldn't result in anything being queued. */
3061	if (!event->aux.aux_size)
3062		return 0;
3063
3064	/*
3065	 * Parse the sample, we need the sample_id_all data that comes after the event so that the
3066	 * CPU or PID can be matched to an AUXTRACE buffer's CPU or PID.
3067	 */
3068	evsel = evlist__event2evsel(session->evlist, event);
3069	if (!evsel)
3070		return -EINVAL;
3071	ret = evsel__parse_sample(evsel, event, &sample);
3072	if (ret)
3073		return ret;
3074
3075	/*
3076	 * Loop through the auxtrace index to find the buffer that matches up with this aux event.
3077	 */
3078	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
3079		for (i = 0; i < auxtrace_index->nr; i++) {
3080			ent = &auxtrace_index->entries[i];
3081			ret = cs_etm__queue_aux_fragment(session, ent->file_offset,
3082							 ent->sz, &event->aux, &sample);
3083			/*
3084			 * Stop search on error or successful values. Continue search on
3085			 * 1 ('not found')
3086			 */
3087			if (ret != 1)
3088				return ret;
3089		}
3090	}
3091
3092	/*
3093	 * Couldn't find the buffer corresponding to this aux record, something went wrong. Warn but
3094	 * don't exit with an error because it will still be possible to decode other aux records.
3095	 */
3096	pr_err("CS ETM: Couldn't find auxtrace buffer for aux_offset: %#"PRI_lx64
3097	       " tid: %d cpu: %d\n", event->aux.aux_offset, sample.tid, sample.cpu);
3098	return 0;
3099}
3100
3101static int cs_etm__queue_aux_records(struct perf_session *session)
3102{
3103	struct auxtrace_index *index = list_first_entry_or_null(&session->auxtrace_index,
3104								struct auxtrace_index, list);
3105	if (index && index->nr > 0)
3106		return perf_session__peek_events(session, session->header.data_offset,
3107						 session->header.data_size,
3108						 cs_etm__queue_aux_records_cb, NULL);
3109
3110	/*
3111	 * We would get here if there are no entries in the index (either no auxtrace
3112	 * buffers or no index at all). Fail silently as there is the possibility of
3113	 * queueing them in cs_etm__process_auxtrace_event() if etm->data_queued is still
3114	 * false.
3115	 *
3116	 * In that scenario, buffers will not be split by AUX records.
3117	 */
3118	return 0;
3119}
3120
3121#define HAS_PARAM(j, type, param) (metadata[(j)][CS_ETM_NR_TRC_PARAMS] <= \
3122				  (CS_##type##_##param - CS_ETM_COMMON_BLK_MAX_V1))
3123
3124/*
3125 * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
3126 * timestamps).
3127 */
3128static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
3129{
3130	int j;
3131
3132	for (j = 0; j < num_cpu; j++) {
3133		switch (metadata[j][CS_ETM_MAGIC]) {
3134		case __perf_cs_etmv4_magic:
3135			if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
3136				return false;
3137			break;
3138		case __perf_cs_ete_magic:
3139			if (HAS_PARAM(j, ETE, TS_SOURCE) || metadata[j][CS_ETE_TS_SOURCE] != 1)
3140				return false;
3141			break;
3142		default:
3143			/* Unknown / unsupported magic number. */
3144			return false;
3145		}
3146	}
3147	return true;
3148}
3149
3150/* map trace ids to correct metadata block, from information in metadata */
3151static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
3152{
3153	u64 cs_etm_magic;
3154	u8 trace_chan_id;
3155	int i, err;
3156
3157	for (i = 0; i < num_cpu; i++) {
3158		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3159		switch (cs_etm_magic) {
3160		case __perf_cs_etmv3_magic:
3161			metadata[i][CS_ETM_ETMTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3162			trace_chan_id = (u8)(metadata[i][CS_ETM_ETMTRACEIDR]);
3163			break;
3164		case __perf_cs_etmv4_magic:
3165		case __perf_cs_ete_magic:
3166			metadata[i][CS_ETMV4_TRCTRACEIDR] &= CORESIGHT_TRACE_ID_VAL_MASK;
3167			trace_chan_id = (u8)(metadata[i][CS_ETMV4_TRCTRACEIDR]);
3168			break;
3169		default:
3170			/* unknown magic number */
3171			return -EINVAL;
3172		}
3173		err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
3174		if (err)
3175			return err;
3176	}
3177	return 0;
3178}
3179
3180/*
3181 * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
3182 * unused value to reduce the number of unneeded decoders created.
3183 */
3184static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
3185{
3186	u64 cs_etm_magic;
3187	int i;
3188
3189	for (i = 0; i < num_cpu; i++) {
3190		cs_etm_magic = metadata[i][CS_ETM_MAGIC];
3191		switch (cs_etm_magic) {
3192		case __perf_cs_etmv3_magic:
3193			if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3194				metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3195			break;
3196		case __perf_cs_etmv4_magic:
3197		case __perf_cs_ete_magic:
3198			if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
3199				metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
3200			break;
3201		default:
3202			/* unknown magic number */
3203			return -EINVAL;
3204		}
3205	}
3206	return 0;
3207}
3208
3209int cs_etm__process_auxtrace_info_full(union perf_event *event,
3210				       struct perf_session *session)
3211{
3212	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
3213	struct cs_etm_auxtrace *etm = NULL;
3214	struct perf_record_time_conv *tc = &session->time_conv;
3215	int event_header_size = sizeof(struct perf_event_header);
3216	int total_size = auxtrace_info->header.size;
3217	int priv_size = 0;
3218	int num_cpu;
3219	int err = 0;
3220	int aux_hw_id_found;
3221	int i, j;
3222	u64 *ptr = NULL;
3223	u64 **metadata = NULL;
3224
3225	/*
3226	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
3227	 * has to be made for each packet that gets decoded, optimizing access
3228	 * in anything other than a sequential array is worth doing.
3229	 */
3230	traceid_list = intlist__new(NULL);
3231	if (!traceid_list)
3232		return -ENOMEM;
3233
3234	/* First the global part */
3235	ptr = (u64 *) auxtrace_info->priv;
3236	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
3237	metadata = zalloc(sizeof(*metadata) * num_cpu);
3238	if (!metadata) {
3239		err = -ENOMEM;
3240		goto err_free_traceid_list;
3241	}
3242
3243	/* Start parsing after the common part of the header */
3244	i = CS_HEADER_VERSION_MAX;
3245
3246	/*
3247	 * The metadata is stored in the auxtrace_info section and encodes
3248	 * the configuration of the ARM embedded trace macrocell which is
3249	 * required by the trace decoder to properly decode the trace due
3250	 * to its highly compressed nature.
3251	 */
3252	for (j = 0; j < num_cpu; j++) {
3253		if (ptr[i] == __perf_cs_etmv3_magic) {
3254			metadata[j] =
3255				cs_etm__create_meta_blk(ptr, &i,
3256							CS_ETM_PRIV_MAX,
3257							CS_ETM_NR_TRC_PARAMS_V0);
3258		} else if (ptr[i] == __perf_cs_etmv4_magic) {
3259			metadata[j] =
3260				cs_etm__create_meta_blk(ptr, &i,
3261							CS_ETMV4_PRIV_MAX,
3262							CS_ETMV4_NR_TRC_PARAMS_V0);
3263		} else if (ptr[i] == __perf_cs_ete_magic) {
3264			metadata[j] = cs_etm__create_meta_blk(ptr, &i, CS_ETE_PRIV_MAX, -1);
3265		} else {
3266			ui__error("CS ETM Trace: Unrecognised magic number %#"PRIx64". File could be from a newer version of perf.\n",
3267				  ptr[i]);
3268			err = -EINVAL;
3269			goto err_free_metadata;
3270		}
3271
3272		if (!metadata[j]) {
3273			err = -ENOMEM;
3274			goto err_free_metadata;
3275		}
3276	}
3277
3278	/*
3279	 * Each of CS_HEADER_VERSION_MAX, CS_ETM_PRIV_MAX and
3280	 * CS_ETMV4_PRIV_MAX mark how many double words are in the
3281	 * global metadata, and each cpu's metadata respectively.
3282	 * The following tests if the correct number of double words was
3283	 * present in the auxtrace info section.
3284	 */
3285	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
3286	if (i * 8 != priv_size) {
3287		err = -EINVAL;
3288		goto err_free_metadata;
3289	}
3290
3291	etm = zalloc(sizeof(*etm));
3292
3293	if (!etm) {
3294		err = -ENOMEM;
3295		goto err_free_metadata;
3296	}
3297
3298	/*
3299	 * As all the ETMs run at the same exception level, the system should
3300	 * have the same PID format crossing CPUs.  So cache the PID format
3301	 * and reuse it for sequential decoding.
3302	 */
3303	etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
3304
3305	err = auxtrace_queues__init(&etm->queues);
3306	if (err)
3307		goto err_free_etm;
3308
3309	if (session->itrace_synth_opts->set) {
3310		etm->synth_opts = *session->itrace_synth_opts;
3311	} else {
3312		itrace_synth_opts__set_default(&etm->synth_opts,
3313				session->itrace_synth_opts->default_no_sample);
3314		etm->synth_opts.callchain = false;
3315	}
3316
3317	etm->session = session;
3318
3319	etm->num_cpu = num_cpu;
3320	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
3321	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
3322	etm->metadata = metadata;
3323	etm->auxtrace_type = auxtrace_info->type;
3324
3325	/* Use virtual timestamps if all ETMs report ts_source = 1 */
3326	etm->has_virtual_ts = cs_etm__has_virtual_ts(metadata, num_cpu);
3327
3328	if (!etm->has_virtual_ts)
3329		ui__warning("Virtual timestamps are not enabled, or not supported by the traced system.\n"
3330			    "The time field of the samples will not be set accurately.\n\n");
3331
3332	etm->auxtrace.process_event = cs_etm__process_event;
3333	etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
3334	etm->auxtrace.flush_events = cs_etm__flush_events;
3335	etm->auxtrace.free_events = cs_etm__free_events;
3336	etm->auxtrace.free = cs_etm__free;
3337	etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
3338	session->auxtrace = &etm->auxtrace;
3339
3340	err = cs_etm__setup_timeless_decoding(etm);
3341	if (err)
3342		return err;
3343
3344	etm->tc.time_shift = tc->time_shift;
3345	etm->tc.time_mult = tc->time_mult;
3346	etm->tc.time_zero = tc->time_zero;
3347	if (event_contains(*tc, time_cycles)) {
3348		etm->tc.time_cycles = tc->time_cycles;
3349		etm->tc.time_mask = tc->time_mask;
3350		etm->tc.cap_user_time_zero = tc->cap_user_time_zero;
3351		etm->tc.cap_user_time_short = tc->cap_user_time_short;
3352	}
3353	err = cs_etm__synth_events(etm, session);
3354	if (err)
3355		goto err_free_queues;
3356
3357	/*
3358	 * Map Trace ID values to CPU metadata.
3359	 *
3360	 * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
3361	 * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
3362	 * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
3363	 *
3364	 * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
3365	 * the same IDs as the old algorithm as far as is possible, unless there are clashes
3366	 * in which case a different value will be used. This means an older perf may still
3367	 * be able to record and read files generate on a newer system.
3368	 *
3369	 * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
3370	 * those packets. If they are there then the values will be mapped and plugged into
3371	 * the metadata. We then set any remaining metadata values with the used flag to a
3372	 * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
3373	 *
3374	 * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
3375	 * then we map Trace ID values to CPU directly from the metadata - clearing any unused
3376	 * flags if present.
3377	 */
3378
3379	/* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
3380	aux_hw_id_found = 0;
3381	err = perf_session__peek_events(session, session->header.data_offset,
3382					session->header.data_size,
3383					cs_etm__process_aux_hw_id_cb, &aux_hw_id_found);
3384	if (err)
3385		goto err_free_queues;
3386
3387	/* if HW ID found then clear any unused metadata ID values */
3388	if (aux_hw_id_found)
3389		err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
3390	/* otherwise, this is a file with metadata values only, map from metadata */
3391	else
3392		err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
3393
3394	if (err)
3395		goto err_free_queues;
3396
3397	err = cs_etm__queue_aux_records(session);
3398	if (err)
3399		goto err_free_queues;
3400
3401	etm->data_queued = etm->queues.populated;
3402	return 0;
3403
3404err_free_queues:
3405	auxtrace_queues__free(&etm->queues);
3406	session->auxtrace = NULL;
3407err_free_etm:
3408	zfree(&etm);
3409err_free_metadata:
3410	/* No need to check @metadata[j], free(NULL) is supported */
3411	for (j = 0; j < num_cpu; j++)
3412		zfree(&metadata[j]);
3413	zfree(&metadata);
3414err_free_traceid_list:
3415	intlist__delete(traceid_list);
3416	return err;
3417}
3418