1// SPDX-License-Identifier: GPL-2.0
2#include <stdio.h>
3#include "evsel.h"
4#include "stat.h"
5#include "color.h"
6#include "pmu.h"
7#include "rblist.h"
8#include "evlist.h"
9#include "expr.h"
10#include "metricgroup.h"
11#include <linux/zalloc.h>
12
13/*
14 * AGGR_GLOBAL: Use CPU 0
15 * AGGR_SOCKET: Use first CPU of socket
16 * AGGR_DIE: Use first CPU of die
17 * AGGR_CORE: Use first CPU of core
18 * AGGR_NONE: Use matching CPU
19 * AGGR_THREAD: Not supported?
20 */
21
22struct runtime_stat rt_stat;
23struct stats walltime_nsecs_stats;
24
25struct saved_value {
26	struct rb_node rb_node;
27	struct evsel *evsel;
28	enum stat_type type;
29	int ctx;
30	int cpu;
31	struct runtime_stat *stat;
32	struct stats stats;
33	u64 metric_total;
34	int metric_other;
35};
36
37static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
38{
39	struct saved_value *a = container_of(rb_node,
40					     struct saved_value,
41					     rb_node);
42	const struct saved_value *b = entry;
43
44	if (a->cpu != b->cpu)
45		return a->cpu - b->cpu;
46
47	/*
48	 * Previously the rbtree was used to link generic metrics.
49	 * The keys were evsel/cpu. Now the rbtree is extended to support
50	 * per-thread shadow stats. For shadow stats case, the keys
51	 * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
52	 * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
53	 */
54	if (a->type != b->type)
55		return a->type - b->type;
56
57	if (a->ctx != b->ctx)
58		return a->ctx - b->ctx;
59
60	if (a->evsel == NULL && b->evsel == NULL) {
61		if (a->stat == b->stat)
62			return 0;
63
64		if ((char *)a->stat < (char *)b->stat)
65			return -1;
66
67		return 1;
68	}
69
70	if (a->evsel == b->evsel)
71		return 0;
72	if ((char *)a->evsel < (char *)b->evsel)
73		return -1;
74	return +1;
75}
76
77static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
78				     const void *entry)
79{
80	struct saved_value *nd = malloc(sizeof(struct saved_value));
81
82	if (!nd)
83		return NULL;
84	memcpy(nd, entry, sizeof(struct saved_value));
85	return &nd->rb_node;
86}
87
88static void saved_value_delete(struct rblist *rblist __maybe_unused,
89			       struct rb_node *rb_node)
90{
91	struct saved_value *v;
92
93	BUG_ON(!rb_node);
94	v = container_of(rb_node, struct saved_value, rb_node);
95	free(v);
96}
97
98static struct saved_value *saved_value_lookup(struct evsel *evsel,
99					      int cpu,
100					      bool create,
101					      enum stat_type type,
102					      int ctx,
103					      struct runtime_stat *st)
104{
105	struct rblist *rblist;
106	struct rb_node *nd;
107	struct saved_value dm = {
108		.cpu = cpu,
109		.evsel = evsel,
110		.type = type,
111		.ctx = ctx,
112		.stat = st,
113	};
114
115	rblist = &st->value_list;
116
117	nd = rblist__find(rblist, &dm);
118	if (nd)
119		return container_of(nd, struct saved_value, rb_node);
120	if (create) {
121		rblist__add_node(rblist, &dm);
122		nd = rblist__find(rblist, &dm);
123		if (nd)
124			return container_of(nd, struct saved_value, rb_node);
125	}
126	return NULL;
127}
128
129void runtime_stat__init(struct runtime_stat *st)
130{
131	struct rblist *rblist = &st->value_list;
132
133	rblist__init(rblist);
134	rblist->node_cmp = saved_value_cmp;
135	rblist->node_new = saved_value_new;
136	rblist->node_delete = saved_value_delete;
137}
138
139void runtime_stat__exit(struct runtime_stat *st)
140{
141	rblist__exit(&st->value_list);
142}
143
144void perf_stat__init_shadow_stats(void)
145{
146	runtime_stat__init(&rt_stat);
147}
148
149static int evsel_context(struct evsel *evsel)
150{
151	int ctx = 0;
152
153	if (evsel->core.attr.exclude_kernel)
154		ctx |= CTX_BIT_KERNEL;
155	if (evsel->core.attr.exclude_user)
156		ctx |= CTX_BIT_USER;
157	if (evsel->core.attr.exclude_hv)
158		ctx |= CTX_BIT_HV;
159	if (evsel->core.attr.exclude_host)
160		ctx |= CTX_BIT_HOST;
161	if (evsel->core.attr.exclude_idle)
162		ctx |= CTX_BIT_IDLE;
163
164	return ctx;
165}
166
167static void reset_stat(struct runtime_stat *st)
168{
169	struct rblist *rblist;
170	struct rb_node *pos, *next;
171
172	rblist = &st->value_list;
173	next = rb_first_cached(&rblist->entries);
174	while (next) {
175		pos = next;
176		next = rb_next(pos);
177		memset(&container_of(pos, struct saved_value, rb_node)->stats,
178		       0,
179		       sizeof(struct stats));
180	}
181}
182
183void perf_stat__reset_shadow_stats(void)
184{
185	reset_stat(&rt_stat);
186	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
187}
188
189void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
190{
191	reset_stat(st);
192}
193
194static void update_runtime_stat(struct runtime_stat *st,
195				enum stat_type type,
196				int ctx, int cpu, u64 count)
197{
198	struct saved_value *v = saved_value_lookup(NULL, cpu, true,
199						   type, ctx, st);
200
201	if (v)
202		update_stats(&v->stats, count);
203}
204
205/*
206 * Update various tracking values we maintain to print
207 * more semantic information such as miss/hit ratios,
208 * instruction rates, etc:
209 */
210void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
211				    int cpu, struct runtime_stat *st)
212{
213	int ctx = evsel_context(counter);
214	u64 count_ns = count;
215	struct saved_value *v;
216
217	count *= counter->scale;
218
219	if (evsel__is_clock(counter))
220		update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
221	else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
222		update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
223	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
224		update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
225	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
226		update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
227	else if (perf_stat_evsel__is(counter, ELISION_START))
228		update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
229	else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
230		update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
231				    ctx, cpu, count);
232	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
233		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
234				    ctx, cpu, count);
235	else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
236		update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
237				    ctx, cpu, count);
238	else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
239		update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
240				    ctx, cpu, count);
241	else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
242		update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
243				    ctx, cpu, count);
244	else if (perf_stat_evsel__is(counter, TOPDOWN_RETIRING))
245		update_runtime_stat(st, STAT_TOPDOWN_RETIRING,
246				    ctx, cpu, count);
247	else if (perf_stat_evsel__is(counter, TOPDOWN_BAD_SPEC))
248		update_runtime_stat(st, STAT_TOPDOWN_BAD_SPEC,
249				    ctx, cpu, count);
250	else if (perf_stat_evsel__is(counter, TOPDOWN_FE_BOUND))
251		update_runtime_stat(st, STAT_TOPDOWN_FE_BOUND,
252				    ctx, cpu, count);
253	else if (perf_stat_evsel__is(counter, TOPDOWN_BE_BOUND))
254		update_runtime_stat(st, STAT_TOPDOWN_BE_BOUND,
255				    ctx, cpu, count);
256	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
257		update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
258				    ctx, cpu, count);
259	else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
260		update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
261				    ctx, cpu, count);
262	else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
263		update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
264	else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
265		update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
266	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
267		update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
268	else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
269		update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
270	else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
271		update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
272	else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
273		update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
274	else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
275		update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
276	else if (perf_stat_evsel__is(counter, SMI_NUM))
277		update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
278	else if (perf_stat_evsel__is(counter, APERF))
279		update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
280
281	if (counter->collect_stat) {
282		v = saved_value_lookup(counter, cpu, true, STAT_NONE, 0, st);
283		update_stats(&v->stats, count);
284		if (counter->metric_leader)
285			v->metric_total += count;
286	} else if (counter->metric_leader) {
287		v = saved_value_lookup(counter->metric_leader,
288				       cpu, true, STAT_NONE, 0, st);
289		v->metric_total += count;
290		v->metric_other++;
291	}
292}
293
294/* used for get_ratio_color() */
295enum grc_type {
296	GRC_STALLED_CYCLES_FE,
297	GRC_STALLED_CYCLES_BE,
298	GRC_CACHE_MISSES,
299	GRC_MAX_NR
300};
301
302static const char *get_ratio_color(enum grc_type type, double ratio)
303{
304	static const double grc_table[GRC_MAX_NR][3] = {
305		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
306		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
307		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
308	};
309	const char *color = PERF_COLOR_NORMAL;
310
311	if (ratio > grc_table[type][0])
312		color = PERF_COLOR_RED;
313	else if (ratio > grc_table[type][1])
314		color = PERF_COLOR_MAGENTA;
315	else if (ratio > grc_table[type][2])
316		color = PERF_COLOR_YELLOW;
317
318	return color;
319}
320
321static struct evsel *perf_stat__find_event(struct evlist *evsel_list,
322						const char *name)
323{
324	struct evsel *c2;
325
326	evlist__for_each_entry (evsel_list, c2) {
327		if (!strcasecmp(c2->name, name) && !c2->collect_stat)
328			return c2;
329	}
330	return NULL;
331}
332
333/* Mark MetricExpr target events and link events using them to them. */
334void perf_stat__collect_metric_expr(struct evlist *evsel_list)
335{
336	struct evsel *counter, *leader, **metric_events, *oc;
337	bool found;
338	struct expr_parse_ctx ctx;
339	struct hashmap_entry *cur;
340	size_t bkt;
341	int i;
342
343	expr__ctx_init(&ctx);
344	evlist__for_each_entry(evsel_list, counter) {
345		bool invalid = false;
346
347		leader = counter->leader;
348		if (!counter->metric_expr)
349			continue;
350
351		expr__ctx_clear(&ctx);
352		metric_events = counter->metric_events;
353		if (!metric_events) {
354			if (expr__find_other(counter->metric_expr,
355					     counter->name,
356					     &ctx, 1) < 0)
357				continue;
358
359			metric_events = calloc(sizeof(struct evsel *),
360					       hashmap__size(&ctx.ids) + 1);
361			if (!metric_events) {
362				expr__ctx_clear(&ctx);
363				return;
364			}
365			counter->metric_events = metric_events;
366		}
367
368		i = 0;
369		hashmap__for_each_entry((&ctx.ids), cur, bkt) {
370			const char *metric_name = (const char *)cur->key;
371
372			found = false;
373			if (leader) {
374				/* Search in group */
375				for_each_group_member (oc, leader) {
376					if (!strcasecmp(oc->name,
377							metric_name) &&
378						!oc->collect_stat) {
379						found = true;
380						break;
381					}
382				}
383			}
384			if (!found) {
385				/* Search ignoring groups */
386				oc = perf_stat__find_event(evsel_list,
387							   metric_name);
388			}
389			if (!oc) {
390				/* Deduping one is good enough to handle duplicated PMUs. */
391				static char *printed;
392
393				/*
394				 * Adding events automatically would be difficult, because
395				 * it would risk creating groups that are not schedulable.
396				 * perf stat doesn't understand all the scheduling constraints
397				 * of events. So we ask the user instead to add the missing
398				 * events.
399				 */
400				if (!printed ||
401				    strcasecmp(printed, metric_name)) {
402					fprintf(stderr,
403						"Add %s event to groups to get metric expression for %s\n",
404						metric_name,
405						counter->name);
406					printed = strdup(metric_name);
407				}
408				invalid = true;
409				continue;
410			}
411			metric_events[i++] = oc;
412			oc->collect_stat = true;
413		}
414		metric_events[i] = NULL;
415		if (invalid) {
416			free(metric_events);
417			counter->metric_events = NULL;
418			counter->metric_expr = NULL;
419		}
420	}
421	expr__ctx_clear(&ctx);
422}
423
424static double runtime_stat_avg(struct runtime_stat *st,
425			       enum stat_type type, int ctx, int cpu)
426{
427	struct saved_value *v;
428
429	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
430	if (!v)
431		return 0.0;
432
433	return avg_stats(&v->stats);
434}
435
436static double runtime_stat_n(struct runtime_stat *st,
437			     enum stat_type type, int ctx, int cpu)
438{
439	struct saved_value *v;
440
441	v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
442	if (!v)
443		return 0.0;
444
445	return v->stats.n;
446}
447
448static void print_stalled_cycles_frontend(struct perf_stat_config *config,
449					  int cpu,
450					  struct evsel *evsel, double avg,
451					  struct perf_stat_output_ctx *out,
452					  struct runtime_stat *st)
453{
454	double total, ratio = 0.0;
455	const char *color;
456	int ctx = evsel_context(evsel);
457
458	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
459
460	if (total)
461		ratio = avg / total * 100.0;
462
463	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
464
465	if (ratio)
466		out->print_metric(config, out->ctx, color, "%7.2f%%", "frontend cycles idle",
467				  ratio);
468	else
469		out->print_metric(config, out->ctx, NULL, NULL, "frontend cycles idle", 0);
470}
471
472static void print_stalled_cycles_backend(struct perf_stat_config *config,
473					 int cpu,
474					 struct evsel *evsel, double avg,
475					 struct perf_stat_output_ctx *out,
476					 struct runtime_stat *st)
477{
478	double total, ratio = 0.0;
479	const char *color;
480	int ctx = evsel_context(evsel);
481
482	total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
483
484	if (total)
485		ratio = avg / total * 100.0;
486
487	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
488
489	out->print_metric(config, out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
490}
491
492static void print_branch_misses(struct perf_stat_config *config,
493				int cpu,
494				struct evsel *evsel,
495				double avg,
496				struct perf_stat_output_ctx *out,
497				struct runtime_stat *st)
498{
499	double total, ratio = 0.0;
500	const char *color;
501	int ctx = evsel_context(evsel);
502
503	total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
504
505	if (total)
506		ratio = avg / total * 100.0;
507
508	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
509
510	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all branches", ratio);
511}
512
513static void print_l1_dcache_misses(struct perf_stat_config *config,
514				   int cpu,
515				   struct evsel *evsel,
516				   double avg,
517				   struct perf_stat_output_ctx *out,
518				   struct runtime_stat *st)
519
520{
521	double total, ratio = 0.0;
522	const char *color;
523	int ctx = evsel_context(evsel);
524
525	total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
526
527	if (total)
528		ratio = avg / total * 100.0;
529
530	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
531
532	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
533}
534
535static void print_l1_icache_misses(struct perf_stat_config *config,
536				   int cpu,
537				   struct evsel *evsel,
538				   double avg,
539				   struct perf_stat_output_ctx *out,
540				   struct runtime_stat *st)
541
542{
543	double total, ratio = 0.0;
544	const char *color;
545	int ctx = evsel_context(evsel);
546
547	total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
548
549	if (total)
550		ratio = avg / total * 100.0;
551
552	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
553	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
554}
555
556static void print_dtlb_cache_misses(struct perf_stat_config *config,
557				    int cpu,
558				    struct evsel *evsel,
559				    double avg,
560				    struct perf_stat_output_ctx *out,
561				    struct runtime_stat *st)
562{
563	double total, ratio = 0.0;
564	const char *color;
565	int ctx = evsel_context(evsel);
566
567	total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
568
569	if (total)
570		ratio = avg / total * 100.0;
571
572	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
573	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
574}
575
576static void print_itlb_cache_misses(struct perf_stat_config *config,
577				    int cpu,
578				    struct evsel *evsel,
579				    double avg,
580				    struct perf_stat_output_ctx *out,
581				    struct runtime_stat *st)
582{
583	double total, ratio = 0.0;
584	const char *color;
585	int ctx = evsel_context(evsel);
586
587	total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
588
589	if (total)
590		ratio = avg / total * 100.0;
591
592	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
593	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
594}
595
596static void print_ll_cache_misses(struct perf_stat_config *config,
597				  int cpu,
598				  struct evsel *evsel,
599				  double avg,
600				  struct perf_stat_output_ctx *out,
601				  struct runtime_stat *st)
602{
603	double total, ratio = 0.0;
604	const char *color;
605	int ctx = evsel_context(evsel);
606
607	total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
608
609	if (total)
610		ratio = avg / total * 100.0;
611
612	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
613	out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
614}
615
616/*
617 * High level "TopDown" CPU core pipe line bottleneck break down.
618 *
619 * Basic concept following
620 * Yasin, A Top Down Method for Performance analysis and Counter architecture
621 * ISPASS14
622 *
623 * The CPU pipeline is divided into 4 areas that can be bottlenecks:
624 *
625 * Frontend -> Backend -> Retiring
626 * BadSpeculation in addition means out of order execution that is thrown away
627 * (for example branch mispredictions)
628 * Frontend is instruction decoding.
629 * Backend is execution, like computation and accessing data in memory
630 * Retiring is good execution that is not directly bottlenecked
631 *
632 * The formulas are computed in slots.
633 * A slot is an entry in the pipeline each for the pipeline width
634 * (for example a 4-wide pipeline has 4 slots for each cycle)
635 *
636 * Formulas:
637 * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
638 *			TotalSlots
639 * Retiring = SlotsRetired / TotalSlots
640 * FrontendBound = FetchBubbles / TotalSlots
641 * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
642 *
643 * The kernel provides the mapping to the low level CPU events and any scaling
644 * needed for the CPU pipeline width, for example:
645 *
646 * TotalSlots = Cycles * 4
647 *
648 * The scaling factor is communicated in the sysfs unit.
649 *
650 * In some cases the CPU may not be able to measure all the formulas due to
651 * missing events. In this case multiple formulas are combined, as possible.
652 *
653 * Full TopDown supports more levels to sub-divide each area: for example
654 * BackendBound into computing bound and memory bound. For now we only
655 * support Level 1 TopDown.
656 */
657
658static double sanitize_val(double x)
659{
660	if (x < 0 && x >= -0.02)
661		return 0.0;
662	return x;
663}
664
665static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
666{
667	return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
668}
669
670static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
671{
672	double bad_spec = 0;
673	double total_slots;
674	double total;
675
676	total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
677		runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
678		runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
679
680	total_slots = td_total_slots(ctx, cpu, st);
681	if (total_slots)
682		bad_spec = total / total_slots;
683	return sanitize_val(bad_spec);
684}
685
686static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
687{
688	double retiring = 0;
689	double total_slots = td_total_slots(ctx, cpu, st);
690	double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
691					    ctx, cpu);
692
693	if (total_slots)
694		retiring = ret_slots / total_slots;
695	return retiring;
696}
697
698static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
699{
700	double fe_bound = 0;
701	double total_slots = td_total_slots(ctx, cpu, st);
702	double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
703					    ctx, cpu);
704
705	if (total_slots)
706		fe_bound = fetch_bub / total_slots;
707	return fe_bound;
708}
709
710static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
711{
712	double sum = (td_fe_bound(ctx, cpu, st) +
713		      td_bad_spec(ctx, cpu, st) +
714		      td_retiring(ctx, cpu, st));
715	if (sum == 0)
716		return 0;
717	return sanitize_val(1.0 - sum);
718}
719
720/*
721 * Kernel reports metrics multiplied with slots. To get back
722 * the ratios we need to recreate the sum.
723 */
724
725static double td_metric_ratio(int ctx, int cpu,
726			      enum stat_type type,
727			      struct runtime_stat *stat)
728{
729	double sum = runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) +
730		runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) +
731		runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) +
732		runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu);
733	double d = runtime_stat_avg(stat, type, ctx, cpu);
734
735	if (sum)
736		return d / sum;
737	return 0;
738}
739
740/*
741 * ... but only if most of the values are actually available.
742 * We allow two missing.
743 */
744
745static bool full_td(int ctx, int cpu,
746		    struct runtime_stat *stat)
747{
748	int c = 0;
749
750	if (runtime_stat_avg(stat, STAT_TOPDOWN_RETIRING, ctx, cpu) > 0)
751		c++;
752	if (runtime_stat_avg(stat, STAT_TOPDOWN_BE_BOUND, ctx, cpu) > 0)
753		c++;
754	if (runtime_stat_avg(stat, STAT_TOPDOWN_FE_BOUND, ctx, cpu) > 0)
755		c++;
756	if (runtime_stat_avg(stat, STAT_TOPDOWN_BAD_SPEC, ctx, cpu) > 0)
757		c++;
758	return c >= 2;
759}
760
761static void print_smi_cost(struct perf_stat_config *config,
762			   int cpu, struct evsel *evsel,
763			   struct perf_stat_output_ctx *out,
764			   struct runtime_stat *st)
765{
766	double smi_num, aperf, cycles, cost = 0.0;
767	int ctx = evsel_context(evsel);
768	const char *color = NULL;
769
770	smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
771	aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
772	cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
773
774	if ((cycles == 0) || (aperf == 0))
775		return;
776
777	if (smi_num)
778		cost = (aperf - cycles) / aperf * 100.00;
779
780	if (cost > 10)
781		color = PERF_COLOR_RED;
782	out->print_metric(config, out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
783	out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
784}
785
786static int prepare_metric(struct evsel **metric_events,
787			  struct metric_ref *metric_refs,
788			  struct expr_parse_ctx *pctx,
789			  int cpu,
790			  struct runtime_stat *st)
791{
792	double scale;
793	char *n, *pn;
794	int i, j, ret;
795
796	expr__ctx_init(pctx);
797	for (i = 0; metric_events[i]; i++) {
798		struct saved_value *v;
799		struct stats *stats;
800		u64 metric_total = 0;
801
802		if (!strcmp(metric_events[i]->name, "duration_time")) {
803			stats = &walltime_nsecs_stats;
804			scale = 1e-9;
805		} else {
806			v = saved_value_lookup(metric_events[i], cpu, false,
807					       STAT_NONE, 0, st);
808			if (!v)
809				break;
810			stats = &v->stats;
811			scale = 1.0;
812
813			if (v->metric_other)
814				metric_total = v->metric_total;
815		}
816
817		n = strdup(metric_events[i]->name);
818		if (!n)
819			return -ENOMEM;
820		/*
821		 * This display code with --no-merge adds [cpu] postfixes.
822		 * These are not supported by the parser. Remove everything
823		 * after the space.
824		 */
825		pn = strchr(n, ' ');
826		if (pn)
827			*pn = 0;
828
829		if (metric_total)
830			expr__add_id_val(pctx, n, metric_total);
831		else
832			expr__add_id_val(pctx, n, avg_stats(stats)*scale);
833	}
834
835	for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
836		ret = expr__add_ref(pctx, &metric_refs[j]);
837		if (ret)
838			return ret;
839	}
840
841	return i;
842}
843
844static void generic_metric(struct perf_stat_config *config,
845			   const char *metric_expr,
846			   struct evsel **metric_events,
847			   struct metric_ref *metric_refs,
848			   char *name,
849			   const char *metric_name,
850			   const char *metric_unit,
851			   int runtime,
852			   int cpu,
853			   struct perf_stat_output_ctx *out,
854			   struct runtime_stat *st)
855{
856	print_metric_t print_metric = out->print_metric;
857	struct expr_parse_ctx pctx;
858	double ratio, scale;
859	int i;
860	void *ctxp = out->ctx;
861
862	i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st);
863	if (i < 0)
864		return;
865
866	if (!metric_events[i]) {
867		if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
868			char *unit;
869			char metric_bf[64];
870
871			if (metric_unit && metric_name) {
872				if (perf_pmu__convert_scale(metric_unit,
873					&unit, &scale) >= 0) {
874					ratio *= scale;
875				}
876				if (strstr(metric_expr, "?"))
877					scnprintf(metric_bf, sizeof(metric_bf),
878					  "%s  %s_%d", unit, metric_name, runtime);
879				else
880					scnprintf(metric_bf, sizeof(metric_bf),
881					  "%s  %s", unit, metric_name);
882
883				print_metric(config, ctxp, NULL, "%8.1f",
884					     metric_bf, ratio);
885			} else {
886				print_metric(config, ctxp, NULL, "%8.2f",
887					metric_name ?
888					metric_name :
889					out->force_header ?  name : "",
890					ratio);
891			}
892		} else {
893			print_metric(config, ctxp, NULL, NULL,
894				     out->force_header ?
895				     (metric_name ? metric_name : name) : "", 0);
896		}
897	} else {
898		print_metric(config, ctxp, NULL, NULL,
899			     out->force_header ?
900			     (metric_name ? metric_name : name) : "", 0);
901	}
902
903	expr__ctx_clear(&pctx);
904}
905
906double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
907{
908	struct expr_parse_ctx pctx;
909	double ratio = 0.0;
910
911	if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0)
912		goto out;
913
914	if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1))
915		ratio = 0.0;
916
917out:
918	expr__ctx_clear(&pctx);
919	return ratio;
920}
921
922void perf_stat__print_shadow_stats(struct perf_stat_config *config,
923				   struct evsel *evsel,
924				   double avg, int cpu,
925				   struct perf_stat_output_ctx *out,
926				   struct rblist *metric_events,
927				   struct runtime_stat *st)
928{
929	void *ctxp = out->ctx;
930	print_metric_t print_metric = out->print_metric;
931	double total, ratio = 0.0, total2;
932	const char *color = NULL;
933	int ctx = evsel_context(evsel);
934	struct metric_event *me;
935	int num = 1;
936
937	if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
938		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
939
940		if (total) {
941			ratio = avg / total;
942			print_metric(config, ctxp, NULL, "%7.2f ",
943					"insn per cycle", ratio);
944		} else {
945			print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0);
946		}
947
948		total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
949					 ctx, cpu);
950
951		total = max(total, runtime_stat_avg(st,
952						    STAT_STALLED_CYCLES_BACK,
953						    ctx, cpu));
954
955		if (total && avg) {
956			out->new_line(config, ctxp);
957			ratio = total / avg;
958			print_metric(config, ctxp, NULL, "%7.2f ",
959					"stalled cycles per insn",
960					ratio);
961		}
962	} else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
963		if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
964			print_branch_misses(config, cpu, evsel, avg, out, st);
965		else
966			print_metric(config, ctxp, NULL, NULL, "of all branches", 0);
967	} else if (
968		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
969		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
970					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
971					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
972
973		if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
974			print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
975		else
976			print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
977	} else if (
978		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
979		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
980					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
981					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
982
983		if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
984			print_l1_icache_misses(config, cpu, evsel, avg, out, st);
985		else
986			print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
987	} else if (
988		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
989		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
990					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
991					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
992
993		if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
994			print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
995		else
996			print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
997	} else if (
998		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
999		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
1000					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1001					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1002
1003		if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
1004			print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
1005		else
1006			print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
1007	} else if (
1008		evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
1009		evsel->core.attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
1010					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
1011					 ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
1012
1013		if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
1014			print_ll_cache_misses(config, cpu, evsel, avg, out, st);
1015		else
1016			print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
1017	} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
1018		total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
1019
1020		if (total)
1021			ratio = avg * 100 / total;
1022
1023		if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
1024			print_metric(config, ctxp, NULL, "%8.3f %%",
1025				     "of all cache refs", ratio);
1026		else
1027			print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
1028	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
1029		print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
1030	} else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
1031		print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
1032	} else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
1033		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
1034
1035		if (total) {
1036			ratio = avg / total;
1037			print_metric(config, ctxp, NULL, "%8.3f", "GHz", ratio);
1038		} else {
1039			print_metric(config, ctxp, NULL, NULL, "Ghz", 0);
1040		}
1041	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
1042		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
1043
1044		if (total)
1045			print_metric(config, ctxp, NULL,
1046					"%7.2f%%", "transactional cycles",
1047					100.0 * (avg / total));
1048		else
1049			print_metric(config, ctxp, NULL, NULL, "transactional cycles",
1050				     0);
1051	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
1052		total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
1053		total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
1054
1055		if (total2 < avg)
1056			total2 = avg;
1057		if (total)
1058			print_metric(config, ctxp, NULL, "%7.2f%%", "aborted cycles",
1059				100.0 * ((total2-avg) / total));
1060		else
1061			print_metric(config, ctxp, NULL, NULL, "aborted cycles", 0);
1062	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
1063		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
1064					 ctx, cpu);
1065
1066		if (avg)
1067			ratio = total / avg;
1068
1069		if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
1070			print_metric(config, ctxp, NULL, "%8.0f",
1071				     "cycles / transaction", ratio);
1072		else
1073			print_metric(config, ctxp, NULL, NULL, "cycles / transaction",
1074				      0);
1075	} else if (perf_stat_evsel__is(evsel, ELISION_START)) {
1076		total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
1077					 ctx, cpu);
1078
1079		if (avg)
1080			ratio = total / avg;
1081
1082		print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
1083	} else if (evsel__is_clock(evsel)) {
1084		if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
1085			print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
1086				     avg / (ratio * evsel->scale));
1087		else
1088			print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0);
1089	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
1090		double fe_bound = td_fe_bound(ctx, cpu, st);
1091
1092		if (fe_bound > 0.2)
1093			color = PERF_COLOR_RED;
1094		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
1095				fe_bound * 100.);
1096	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
1097		double retiring = td_retiring(ctx, cpu, st);
1098
1099		if (retiring > 0.7)
1100			color = PERF_COLOR_GREEN;
1101		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
1102				retiring * 100.);
1103	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
1104		double bad_spec = td_bad_spec(ctx, cpu, st);
1105
1106		if (bad_spec > 0.1)
1107			color = PERF_COLOR_RED;
1108		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1109				bad_spec * 100.);
1110	} else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
1111		double be_bound = td_be_bound(ctx, cpu, st);
1112		const char *name = "backend bound";
1113		static int have_recovery_bubbles = -1;
1114
1115		/* In case the CPU does not support topdown-recovery-bubbles */
1116		if (have_recovery_bubbles < 0)
1117			have_recovery_bubbles = pmu_have_event("cpu",
1118					"topdown-recovery-bubbles");
1119		if (!have_recovery_bubbles)
1120			name = "backend bound/bad spec";
1121
1122		if (be_bound > 0.2)
1123			color = PERF_COLOR_RED;
1124		if (td_total_slots(ctx, cpu, st) > 0)
1125			print_metric(config, ctxp, color, "%8.1f%%", name,
1126					be_bound * 100.);
1127		else
1128			print_metric(config, ctxp, NULL, NULL, name, 0);
1129	} else if (perf_stat_evsel__is(evsel, TOPDOWN_RETIRING) &&
1130			full_td(ctx, cpu, st)) {
1131		double retiring = td_metric_ratio(ctx, cpu,
1132						  STAT_TOPDOWN_RETIRING, st);
1133
1134		if (retiring > 0.7)
1135			color = PERF_COLOR_GREEN;
1136		print_metric(config, ctxp, color, "%8.1f%%", "retiring",
1137				retiring * 100.);
1138	} else if (perf_stat_evsel__is(evsel, TOPDOWN_FE_BOUND) &&
1139			full_td(ctx, cpu, st)) {
1140		double fe_bound = td_metric_ratio(ctx, cpu,
1141						  STAT_TOPDOWN_FE_BOUND, st);
1142
1143		if (fe_bound > 0.2)
1144			color = PERF_COLOR_RED;
1145		print_metric(config, ctxp, color, "%8.1f%%", "frontend bound",
1146				fe_bound * 100.);
1147	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BE_BOUND) &&
1148			full_td(ctx, cpu, st)) {
1149		double be_bound = td_metric_ratio(ctx, cpu,
1150						  STAT_TOPDOWN_BE_BOUND, st);
1151
1152		if (be_bound > 0.2)
1153			color = PERF_COLOR_RED;
1154		print_metric(config, ctxp, color, "%8.1f%%", "backend bound",
1155				be_bound * 100.);
1156	} else if (perf_stat_evsel__is(evsel, TOPDOWN_BAD_SPEC) &&
1157			full_td(ctx, cpu, st)) {
1158		double bad_spec = td_metric_ratio(ctx, cpu,
1159						  STAT_TOPDOWN_BAD_SPEC, st);
1160
1161		if (bad_spec > 0.1)
1162			color = PERF_COLOR_RED;
1163		print_metric(config, ctxp, color, "%8.1f%%", "bad speculation",
1164				bad_spec * 100.);
1165	} else if (evsel->metric_expr) {
1166		generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
1167				evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
1168	} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
1169		char unit = 'M';
1170		char unit_buf[10];
1171
1172		total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
1173
1174		if (total)
1175			ratio = 1000.0 * avg / total;
1176		if (ratio < 0.001) {
1177			ratio *= 1000;
1178			unit = 'K';
1179		}
1180		snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
1181		print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio);
1182	} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
1183		print_smi_cost(config, cpu, evsel, out, st);
1184	} else {
1185		num = 0;
1186	}
1187
1188	if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
1189		struct metric_expr *mexp;
1190
1191		list_for_each_entry (mexp, &me->head, nd) {
1192			if (num++ > 0)
1193				out->new_line(config, ctxp);
1194			generic_metric(config, mexp->metric_expr, mexp->metric_events,
1195					mexp->metric_refs, evsel->name, mexp->metric_name,
1196					mexp->metric_unit, mexp->runtime, cpu, out, st);
1197		}
1198	}
1199	if (num == 0)
1200		print_metric(config, ctxp, NULL, NULL, NULL, 0);
1201}
1202