1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6#include <stdlib.h>
7#include <errno.h>
8#include "utils.h"
9#include "osnoise.h"
10#include "timerlat.h"
11#include <unistd.h>
12
13enum timelat_state {
14	TIMERLAT_INIT = 0,
15	TIMERLAT_WAITING_IRQ,
16	TIMERLAT_WAITING_THREAD,
17};
18
19#define MAX_COMM		24
20
21/*
22 * Per-cpu data statistics and data.
23 */
24struct timerlat_aa_data {
25	/* Current CPU state */
26	int			curr_state;
27
28	/* timerlat IRQ latency */
29	unsigned long long	tlat_irq_seqnum;
30	unsigned long long	tlat_irq_latency;
31	unsigned long long	tlat_irq_timstamp;
32
33	/* timerlat Thread latency */
34	unsigned long long	tlat_thread_seqnum;
35	unsigned long long	tlat_thread_latency;
36	unsigned long long	tlat_thread_timstamp;
37
38	/*
39	 * Information about the thread running when the IRQ
40	 * arrived.
41	 *
42	 * This can be blocking or interference, depending on the
43	 * priority of the thread. Assuming timerlat is the highest
44	 * prio, it is blocking. If timerlat has a lower prio, it is
45	 * interference.
46	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
47	 */
48	unsigned long long	run_thread_pid;
49	char			run_thread_comm[MAX_COMM];
50	unsigned long long	thread_blocking_duration;
51	unsigned long long	max_exit_idle_latency;
52
53	/* Information about the timerlat timer irq */
54	unsigned long long	timer_irq_start_time;
55	unsigned long long	timer_irq_start_delay;
56	unsigned long long	timer_irq_duration;
57	unsigned long long	timer_exit_from_idle;
58
59	/*
60	 * Information about the last IRQ before the timerlat irq
61	 * arrived.
62	 *
63	 * If now - timestamp is <= latency, it might have influenced
64	 * in the timerlat irq latency. Otherwise, ignore it.
65	 */
66	unsigned long long	prev_irq_duration;
67	unsigned long long	prev_irq_timstamp;
68
69	/*
70	 * Interference sum.
71	 */
72	unsigned long long	thread_nmi_sum;
73	unsigned long long	thread_irq_sum;
74	unsigned long long	thread_softirq_sum;
75	unsigned long long	thread_thread_sum;
76
77	/*
78	 * Interference task information.
79	 */
80	struct trace_seq	*prev_irqs_seq;
81	struct trace_seq	*nmi_seq;
82	struct trace_seq	*irqs_seq;
83	struct trace_seq	*softirqs_seq;
84	struct trace_seq	*threads_seq;
85	struct trace_seq	*stack_seq;
86
87	/*
88	 * Current thread.
89	 */
90	char			current_comm[MAX_COMM];
91	unsigned long long	current_pid;
92
93	/*
94	 * Is the system running a kworker?
95	 */
96	unsigned long long	kworker;
97	unsigned long long	kworker_func;
98};
99
100/*
101 * The analysis context and system wide view
102 */
103struct timerlat_aa_context {
104	int nr_cpus;
105	int dump_tasks;
106
107	/* per CPU data */
108	struct timerlat_aa_data *taa_data;
109
110	/*
111	 * required to translate function names and register
112	 * events.
113	 */
114	struct osnoise_tool *tool;
115};
116
117/*
118 * The data is stored as a local variable, but accessed via a helper function.
119 *
120 * It could be stored inside the trace context. But every access would
121 * require container_of() + a series of pointers. Do we need it? Not sure.
122 *
123 * For now keep it simple. If needed, store it in the tool, add the *context
124 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
125 */
126static struct timerlat_aa_context *__timerlat_aa_ctx;
127
128static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
129{
130	return __timerlat_aa_ctx;
131}
132
133/*
134 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
135 */
136static struct timerlat_aa_data
137*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
138{
139	return &taa_ctx->taa_data[cpu];
140}
141
142/*
143 * timerlat_aa_irq_latency - Handles timerlat IRQ event
144 */
145static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
146				   struct trace_seq *s, struct tep_record *record,
147				   struct tep_event *event)
148{
149	/*
150	 * For interference, we start now looking for things that can delay
151	 * the thread.
152	 */
153	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
154	taa_data->tlat_irq_timstamp = record->ts;
155
156	/*
157	 * Zero values.
158	 */
159	taa_data->thread_nmi_sum = 0;
160	taa_data->thread_irq_sum = 0;
161	taa_data->thread_softirq_sum = 0;
162	taa_data->thread_thread_sum = 0;
163	taa_data->thread_blocking_duration = 0;
164	taa_data->timer_irq_start_time = 0;
165	taa_data->timer_irq_duration = 0;
166	taa_data->timer_exit_from_idle = 0;
167
168	/*
169	 * Zero interference tasks.
170	 */
171	trace_seq_reset(taa_data->nmi_seq);
172	trace_seq_reset(taa_data->irqs_seq);
173	trace_seq_reset(taa_data->softirqs_seq);
174	trace_seq_reset(taa_data->threads_seq);
175
176	/* IRQ latency values */
177	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
178	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
179
180	/* The thread that can cause blocking */
181	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
182
183	/*
184	 * Get exit from idle case.
185	 *
186	 * If it is not idle thread:
187	 */
188	if (taa_data->run_thread_pid)
189		return 0;
190
191	/*
192	 * if the latency is shorter than the known exit from idle:
193	 */
194	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
195		return 0;
196
197	/*
198	 * To be safe, ignore the cases in which an IRQ/NMI could have
199	 * interfered with the timerlat IRQ.
200	 */
201	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
202	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
203		return 0;
204
205	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
206
207	return 0;
208}
209
210/*
211 * timerlat_aa_thread_latency - Handles timerlat thread event
212 */
213static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
214				      struct trace_seq *s, struct tep_record *record,
215				      struct tep_event *event)
216{
217	/*
218	 * For interference, we start now looking for things that can delay
219	 * the IRQ of the next cycle.
220	 */
221	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
222	taa_data->tlat_thread_timstamp = record->ts;
223
224	/* Thread latency values */
225	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
226	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
227
228	return 0;
229}
230
231/*
232 * timerlat_aa_handler - Handle timerlat events
233 *
234 * This function is called to handle timerlat events recording statistics.
235 *
236 * Returns 0 on success, -1 otherwise.
237 */
238static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
239			struct tep_event *event, void *context)
240{
241	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
242	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
243	unsigned long long thread;
244
245	if (!taa_data)
246		return -1;
247
248	tep_get_field_val(s, event, "context", record, &thread, 1);
249	if (!thread)
250		return timerlat_aa_irq_latency(taa_data, s, record, event);
251	else
252		return timerlat_aa_thread_latency(taa_data, s, record, event);
253}
254
255/*
256 * timerlat_aa_nmi_handler - Handles NMI noise
257 *
258 * It is used to collect information about interferences from NMI. It is
259 * hooked to the osnoise:nmi_noise event.
260 */
261static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
262				   struct tep_event *event, void *context)
263{
264	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
265	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
266	unsigned long long duration;
267	unsigned long long start;
268
269	tep_get_field_val(s, event, "duration", record, &duration, 1);
270	tep_get_field_val(s, event, "start", record, &start, 1);
271
272	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
273		taa_data->prev_irq_duration = duration;
274		taa_data->prev_irq_timstamp = start;
275
276		trace_seq_reset(taa_data->prev_irqs_seq);
277		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s	\t\t\t%9.2f us\n",
278			 "nmi", ns_to_usf(duration));
279		return 0;
280	}
281
282	taa_data->thread_nmi_sum += duration;
283	trace_seq_printf(taa_data->nmi_seq, "	%24s	\t\t\t%9.2f us\n",
284		 "nmi", ns_to_usf(duration));
285
286	return 0;
287}
288
289/*
290 * timerlat_aa_irq_handler - Handles IRQ noise
291 *
292 * It is used to collect information about interferences from IRQ. It is
293 * hooked to the osnoise:irq_noise event.
294 *
295 * It is a little bit more complex than the other because it measures:
296 *	- The IRQs that can delay the timer IRQ before it happened.
297 *	- The Timerlat IRQ handler
298 *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
299 *	  (IRQ interference).
300 */
301static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
302				   struct tep_event *event, void *context)
303{
304	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
305	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
306	unsigned long long expected_start;
307	unsigned long long duration;
308	unsigned long long vector;
309	unsigned long long start;
310	char *desc;
311	int val;
312
313	tep_get_field_val(s, event, "duration", record, &duration, 1);
314	tep_get_field_val(s, event, "start", record, &start, 1);
315	tep_get_field_val(s, event, "vector", record, &vector, 1);
316	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
317
318	/*
319	 * Before the timerlat IRQ.
320	 */
321	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
322		taa_data->prev_irq_duration = duration;
323		taa_data->prev_irq_timstamp = start;
324
325		trace_seq_reset(taa_data->prev_irqs_seq);
326		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
327				 desc, vector, ns_to_usf(duration));
328		return 0;
329	}
330
331	/*
332	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
333	 * the timerlat irq handler.
334	 */
335	if (!taa_data->timer_irq_start_time) {
336		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
337
338		taa_data->timer_irq_start_time = start;
339		taa_data->timer_irq_duration = duration;
340
341		/*
342		 * We are dealing with two different clock sources: the
343		 * external clock source that timerlat uses as a reference
344		 * and the clock used by the tracer. There are also two
345		 * moments: the time reading the clock and the timer in
346		 * which the event is placed in the buffer (the trace
347		 * event timestamp). If the processor is slow or there
348		 * is some hardware noise, the difference between the
349		 * timestamp and the external clock read can be longer
350		 * than the IRQ handler delay, resulting in a negative
351		 * time. If so, set IRQ start delay as 0. In the end,
352		 * it is less relevant than the noise.
353		 */
354		if (expected_start < taa_data->timer_irq_start_time)
355			taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
356		else
357			taa_data->timer_irq_start_delay = 0;
358
359		/*
360		 * not exit from idle.
361		 */
362		if (taa_data->run_thread_pid)
363			return 0;
364
365		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
366			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
367
368		return 0;
369	}
370
371	/*
372	 * IRQ interference.
373	 */
374	taa_data->thread_irq_sum += duration;
375	trace_seq_printf(taa_data->irqs_seq, "	%24s:%-3llu	\t	%9.2f us\n",
376			 desc, vector, ns_to_usf(duration));
377
378	return 0;
379}
380
381static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
382				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
383
384
385/*
386 * timerlat_aa_softirq_handler - Handles Softirq noise
387 *
388 * It is used to collect information about interferences from Softirq. It is
389 * hooked to the osnoise:softirq_noise event.
390 *
391 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
392 */
393static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
394				       struct tep_event *event, void *context)
395{
396	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
397	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
398	unsigned long long duration;
399	unsigned long long vector;
400	unsigned long long start;
401
402	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
403		return 0;
404
405	tep_get_field_val(s, event, "duration", record, &duration, 1);
406	tep_get_field_val(s, event, "start", record, &start, 1);
407	tep_get_field_val(s, event, "vector", record, &vector, 1);
408
409	taa_data->thread_softirq_sum += duration;
410
411	trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu	\t	%9.2f us\n",
412			 softirq_name[vector], vector, ns_to_usf(duration));
413	return 0;
414}
415
416/*
417 * timerlat_aa_softirq_handler - Handles thread noise
418 *
419 * It is used to collect information about interferences from threads. It is
420 * hooked to the osnoise:thread_noise event.
421 *
422 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
423 */
424static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
425				      struct tep_event *event, void *context)
426{
427	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
428	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
429	unsigned long long duration;
430	unsigned long long start;
431	unsigned long long pid;
432	const char *comm;
433	int val;
434
435	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
436		return 0;
437
438	tep_get_field_val(s, event, "duration", record, &duration, 1);
439	tep_get_field_val(s, event, "start", record, &start, 1);
440
441	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
442	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
443
444	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
445		taa_data->thread_blocking_duration = duration;
446
447		if (comm)
448			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
449		else
450			sprintf(taa_data->run_thread_comm, "<...>");
451
452	} else {
453		taa_data->thread_thread_sum += duration;
454
455		trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
456			 comm, pid, ns_to_usf(duration));
457	}
458
459	return 0;
460}
461
462/*
463 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
464 *
465 * Saves and parse the stack trace generated by the timerlat IRQ.
466 */
467static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
468			      struct tep_event *event, void *context)
469{
470	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
471	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
472	unsigned long *caller;
473	const char *function;
474	int val, i;
475
476	trace_seq_reset(taa_data->stack_seq);
477
478	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
479	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
480	if (caller) {
481		for (i = 0; ; i++) {
482			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
483			if (!function)
484				break;
485			trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
486		}
487	}
488	return 0;
489}
490
491/*
492 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
493 *
494 * Handles the sched:sched_switch event to trace the current thread running on the
495 * CPU. It is used to display the threads running on the other CPUs when the trace
496 * stops.
497 */
498static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
499					    struct tep_event *event, void *context)
500{
501	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
502	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
503	const char *comm;
504	int val;
505
506	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
507	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
508
509	strncpy(taa_data->current_comm, comm, MAX_COMM);
510
511	/*
512	 * If this was a kworker, clean the last kworkers that ran.
513	 */
514	taa_data->kworker = 0;
515	taa_data->kworker_func = 0;
516
517	return 0;
518}
519
520/*
521 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
522 *
523 * Handles workqueue:workqueue_execute_start event, keeping track of
524 * the job that a kworker could be doing in the CPU.
525 *
526 * We already catch problems of hardware related latencies caused by work queues
527 * running driver code that causes hardware stall. For example, with DRM drivers.
528 */
529static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
530					     struct tep_event *event, void *context)
531{
532	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
533	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
534
535	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
536	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
537	return 0;
538}
539
540/*
541 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
542 *
543 * This is the core of the analysis.
544 */
545static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
546				     int irq_thresh, int thread_thresh)
547{
548	long long exp_irq_ts;
549	int total;
550	int irq;
551
552	/*
553	 * IRQ latency or Thread latency?
554	 */
555	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
556		irq = 1;
557		total = taa_data->tlat_irq_latency;
558	} else {
559		irq = 0;
560		total = taa_data->tlat_thread_latency;
561	}
562
563	/*
564	 * Expected IRQ arrival time using the trace clock as the base.
565	 *
566	 * TODO: Add a list of previous IRQ, and then run the list backwards.
567	 */
568	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
569	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
570		if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
571			printf("  Previous IRQ interference:	\t\t up to  %9.2f us\n",
572				ns_to_usf(taa_data->prev_irq_duration));
573	}
574
575	/*
576	 * The delay that the IRQ suffered before starting.
577	 */
578	printf("  IRQ handler delay:		%16s	%9.2f us (%.2f %%)\n",
579		(ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
580		ns_to_usf(taa_data->timer_irq_start_delay),
581		ns_to_per(total, taa_data->timer_irq_start_delay));
582
583	/*
584	 * Timerlat IRQ.
585	 */
586	printf("  IRQ latency:	\t\t\t\t	%9.2f us\n",
587		ns_to_usf(taa_data->tlat_irq_latency));
588
589	if (irq) {
590		/*
591		 * If the trace stopped due to IRQ, the other events will not happen
592		 * because... the trace stopped :-).
593		 *
594		 * That is all folks, the stack trace was printed before the stop,
595		 * so it will be displayed, it is the key.
596		 */
597		printf("  Blocking thread:\n");
598		printf("	%24s:%-9llu\n",
599			taa_data->run_thread_comm, taa_data->run_thread_pid);
600	} else  {
601		/*
602		 * The duration of the IRQ handler that handled the timerlat IRQ.
603		 */
604		printf("  Timerlat IRQ duration:	\t\t	%9.2f us (%.2f %%)\n",
605			ns_to_usf(taa_data->timer_irq_duration),
606			ns_to_per(total, taa_data->timer_irq_duration));
607
608		/*
609		 * The amount of time that the current thread postponed the scheduler.
610		 *
611		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
612		 * is no need to compute values here.
613		 */
614		printf("  Blocking thread:	\t\t\t	%9.2f us (%.2f %%)\n",
615			ns_to_usf(taa_data->thread_blocking_duration),
616			ns_to_per(total, taa_data->thread_blocking_duration));
617
618		printf("	%24s:%-9llu		%9.2f us\n",
619			taa_data->run_thread_comm, taa_data->run_thread_pid,
620			ns_to_usf(taa_data->thread_blocking_duration));
621	}
622
623	/*
624	 * Print the stack trace!
625	 */
626	trace_seq_do_printf(taa_data->stack_seq);
627
628	/*
629	 * NMIs can happen during the IRQ, so they are always possible.
630	 */
631	if (taa_data->thread_nmi_sum)
632		printf("  NMI interference	\t\t\t	%9.2f us (%.2f %%)\n",
633			ns_to_usf(taa_data->thread_nmi_sum),
634			ns_to_per(total, taa_data->thread_nmi_sum));
635
636	/*
637	 * If it is an IRQ latency, the other factors can be skipped.
638	 */
639	if (irq)
640		goto print_total;
641
642	/*
643	 * Prints the interference caused by IRQs to the thread latency.
644	 */
645	if (taa_data->thread_irq_sum) {
646		printf("  IRQ interference	\t\t\t	%9.2f us (%.2f %%)\n",
647			ns_to_usf(taa_data->thread_irq_sum),
648			ns_to_per(total, taa_data->thread_irq_sum));
649
650		trace_seq_do_printf(taa_data->irqs_seq);
651	}
652
653	/*
654	 * Prints the interference caused by Softirqs to the thread latency.
655	 */
656	if (taa_data->thread_softirq_sum) {
657		printf("  Softirq interference	\t\t\t	%9.2f us (%.2f %%)\n",
658			ns_to_usf(taa_data->thread_softirq_sum),
659			ns_to_per(total, taa_data->thread_softirq_sum));
660
661		trace_seq_do_printf(taa_data->softirqs_seq);
662	}
663
664	/*
665	 * Prints the interference caused by other threads to the thread latency.
666	 *
667	 * If this happens, your timerlat is not the highest prio. OK, migration
668	 * thread can happen. But otherwise, you are not measuring the "scheduling
669	 * latency" only, and here is the difference from scheduling latency and
670	 * timer handling latency.
671	 */
672	if (taa_data->thread_thread_sum) {
673		printf("  Thread interference	\t\t\t	%9.2f us (%.2f %%)\n",
674			ns_to_usf(taa_data->thread_thread_sum),
675			ns_to_per(total, taa_data->thread_thread_sum));
676
677		trace_seq_do_printf(taa_data->threads_seq);
678	}
679
680	/*
681	 * Done.
682	 */
683print_total:
684	printf("------------------------------------------------------------------------\n");
685	printf("  %s latency:	\t\t\t	%9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
686		ns_to_usf(total));
687}
688
689static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
690{
691	struct trace_instance *trace = &taa_ctx->tool->trace;
692	int retval;
693
694	retval = tracefs_iterate_raw_events(trace->tep,
695					    trace->inst,
696					    NULL,
697					    0,
698					    collect_registered_events,
699					    trace);
700		if (retval < 0) {
701			err_msg("Error iterating on events\n");
702			return 0;
703		}
704
705	return 1;
706}
707
708/**
709 * timerlat_auto_analysis - Analyze the collected data
710 */
711void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
712{
713	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
714	unsigned long long max_exit_from_idle = 0;
715	struct timerlat_aa_data *taa_data;
716	int max_exit_from_idle_cpu;
717	struct tep_handle *tep;
718	int cpu;
719
720	timerlat_auto_analysis_collect_trace(taa_ctx);
721
722	/* bring stop tracing to the ns scale */
723	irq_thresh = irq_thresh * 1000;
724	thread_thresh = thread_thresh * 1000;
725
726	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
727		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
728
729		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
730			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
731			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
732		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
733			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
734			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
735		}
736
737		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
738			max_exit_from_idle = taa_data->max_exit_idle_latency;
739			max_exit_from_idle_cpu = cpu;
740		}
741
742	}
743
744	if (max_exit_from_idle) {
745		printf("\n");
746		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
747			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
748	}
749	if (!taa_ctx->dump_tasks)
750		return;
751
752	printf("\n");
753	printf("Printing CPU tasks:\n");
754	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
755		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
756		tep = taa_ctx->tool->trace.tep;
757
758		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
759
760		if (taa_data->kworker_func)
761			printf(" kworker:%s:%s",
762				tep_find_function(tep, taa_data->kworker) ? : "<...>",
763				tep_find_function(tep, taa_data->kworker_func));
764		printf("\n");
765	}
766
767}
768
769/*
770 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
771 */
772static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
773{
774	struct timerlat_aa_data *taa_data;
775	int i;
776
777	if (!taa_ctx->taa_data)
778		return;
779
780	for (i = 0; i < taa_ctx->nr_cpus; i++) {
781		taa_data = timerlat_aa_get_data(taa_ctx, i);
782
783		if (taa_data->prev_irqs_seq) {
784			trace_seq_destroy(taa_data->prev_irqs_seq);
785			free(taa_data->prev_irqs_seq);
786		}
787
788		if (taa_data->nmi_seq) {
789			trace_seq_destroy(taa_data->nmi_seq);
790			free(taa_data->nmi_seq);
791		}
792
793		if (taa_data->irqs_seq) {
794			trace_seq_destroy(taa_data->irqs_seq);
795			free(taa_data->irqs_seq);
796		}
797
798		if (taa_data->softirqs_seq) {
799			trace_seq_destroy(taa_data->softirqs_seq);
800			free(taa_data->softirqs_seq);
801		}
802
803		if (taa_data->threads_seq) {
804			trace_seq_destroy(taa_data->threads_seq);
805			free(taa_data->threads_seq);
806		}
807
808		if (taa_data->stack_seq) {
809			trace_seq_destroy(taa_data->stack_seq);
810			free(taa_data->stack_seq);
811		}
812	}
813}
814
815/*
816 * timerlat_aa_init_seqs - Init seq files used to store parsed information
817 *
818 * Instead of keeping data structures to store raw data, use seq files to
819 * store parsed data.
820 *
821 * Allocates and initialize seq files.
822 *
823 * Returns 0 on success, -1 otherwise.
824 */
825static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
826{
827	struct timerlat_aa_data *taa_data;
828	int i;
829
830	for (i = 0; i < taa_ctx->nr_cpus; i++) {
831
832		taa_data = timerlat_aa_get_data(taa_ctx, i);
833
834		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
835		if (!taa_data->prev_irqs_seq)
836			goto out_err;
837
838		trace_seq_init(taa_data->prev_irqs_seq);
839
840		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
841		if (!taa_data->nmi_seq)
842			goto out_err;
843
844		trace_seq_init(taa_data->nmi_seq);
845
846		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
847		if (!taa_data->irqs_seq)
848			goto out_err;
849
850		trace_seq_init(taa_data->irqs_seq);
851
852		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
853		if (!taa_data->softirqs_seq)
854			goto out_err;
855
856		trace_seq_init(taa_data->softirqs_seq);
857
858		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
859		if (!taa_data->threads_seq)
860			goto out_err;
861
862		trace_seq_init(taa_data->threads_seq);
863
864		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
865		if (!taa_data->stack_seq)
866			goto out_err;
867
868		trace_seq_init(taa_data->stack_seq);
869	}
870
871	return 0;
872
873out_err:
874	timerlat_aa_destroy_seqs(taa_ctx);
875	return -1;
876}
877
878/*
879 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
880 */
881static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
882{
883
884	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
885				     timerlat_aa_handler, tool);
886
887	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
888
889	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
890				     timerlat_aa_nmi_handler, tool);
891
892	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
893				     timerlat_aa_irq_handler, tool);
894
895	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
896				     timerlat_aa_softirq_handler, tool);
897
898	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
899				     timerlat_aa_thread_handler, tool);
900
901	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
902				     timerlat_aa_stack_handler, tool);
903	if (!dump_tasks)
904		return;
905
906	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
907	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
908				     timerlat_aa_sched_switch_handler, tool);
909
910	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
911	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
912				     timerlat_aa_kworker_start_handler, tool);
913}
914
915/*
916 * timerlat_aa_register_events - Register events used in the auto-analysis
917 *
918 * Returns 0 on success, -1 otherwise.
919 */
920static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
921{
922	int retval;
923
924	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
925				timerlat_aa_handler, tool);
926
927
928	/*
929	 * register auto-analysis handlers.
930	 */
931	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
932	if (retval < 0 && !errno) {
933		err_msg("Could not find osnoise events\n");
934		goto out_err;
935	}
936
937	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
938				   timerlat_aa_nmi_handler, tool);
939
940	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
941				   timerlat_aa_irq_handler, tool);
942
943	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
944				   timerlat_aa_softirq_handler, tool);
945
946	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
947				   timerlat_aa_thread_handler, tool);
948
949	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
950				   timerlat_aa_stack_handler, tool);
951
952	if (!dump_tasks)
953		return 0;
954
955	/*
956	 * Dump task events.
957	 */
958	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
959	if (retval < 0 && !errno) {
960		err_msg("Could not find sched_switch\n");
961		goto out_err;
962	}
963
964	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
965				   timerlat_aa_sched_switch_handler, tool);
966
967	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
968	if (retval < 0 && !errno) {
969		err_msg("Could not find workqueue_execute_start\n");
970		goto out_err;
971	}
972
973	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
974				   timerlat_aa_kworker_start_handler, tool);
975
976	return 0;
977
978out_err:
979	timerlat_aa_unregister_events(tool, dump_tasks);
980	return -1;
981}
982
983/**
984 * timerlat_aa_destroy - Destroy timerlat auto-analysis
985 */
986void timerlat_aa_destroy(void)
987{
988	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
989
990	if (!taa_ctx)
991		return;
992
993	if (!taa_ctx->taa_data)
994		goto out_ctx;
995
996	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
997	timerlat_aa_destroy_seqs(taa_ctx);
998	free(taa_ctx->taa_data);
999out_ctx:
1000	free(taa_ctx);
1001}
1002
1003/**
1004 * timerlat_aa_init - Initialize timerlat auto-analysis
1005 *
1006 * Returns 0 on success, -1 otherwise.
1007 */
1008int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1009{
1010	int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1011	struct timerlat_aa_context *taa_ctx;
1012	int retval;
1013
1014	taa_ctx = calloc(1, sizeof(*taa_ctx));
1015	if (!taa_ctx)
1016		return -1;
1017
1018	__timerlat_aa_ctx = taa_ctx;
1019
1020	taa_ctx->nr_cpus = nr_cpus;
1021	taa_ctx->tool = tool;
1022	taa_ctx->dump_tasks = dump_tasks;
1023
1024	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1025	if (!taa_ctx->taa_data)
1026		goto out_err;
1027
1028	retval = timerlat_aa_init_seqs(taa_ctx);
1029	if (retval)
1030		goto out_err;
1031
1032	retval = timerlat_aa_register_events(tool, dump_tasks);
1033	if (retval)
1034		goto out_err;
1035
1036	return 0;
1037
1038out_err:
1039	timerlat_aa_destroy();
1040	return -1;
1041}
1042