1// SPDX-License-Identifier: GPL-2.0
2#include <errno.h>
3#include <inttypes.h>
4#include "builtin.h"
5#include "perf.h"
6
7#include "util/evlist.h" // for struct evsel_str_handler
8#include "util/evsel.h"
9#include "util/symbol.h"
10#include "util/thread.h"
11#include "util/header.h"
12#include "util/target.h"
13#include "util/callchain.h"
14#include "util/lock-contention.h"
15#include "util/bpf_skel/lock_data.h"
16
17#include <subcmd/pager.h>
18#include <subcmd/parse-options.h>
19#include "util/trace-event.h"
20#include "util/tracepoint.h"
21
22#include "util/debug.h"
23#include "util/session.h"
24#include "util/tool.h"
25#include "util/data.h"
26#include "util/string2.h"
27#include "util/map.h"
28#include "util/util.h"
29
30#include <stdio.h>
31#include <sys/types.h>
32#include <sys/prctl.h>
33#include <semaphore.h>
34#include <math.h>
35#include <limits.h>
36#include <ctype.h>
37
38#include <linux/list.h>
39#include <linux/hash.h>
40#include <linux/kernel.h>
41#include <linux/zalloc.h>
42#include <linux/err.h>
43#include <linux/stringify.h>
44
45static struct perf_session *session;
46static struct target target;
47
48/* based on kernel/lockdep.c */
49#define LOCKHASH_BITS		12
50#define LOCKHASH_SIZE		(1UL << LOCKHASH_BITS)
51
52static struct hlist_head *lockhash_table;
53
54#define __lockhashfn(key)	hash_long((unsigned long)key, LOCKHASH_BITS)
55#define lockhashentry(key)	(lockhash_table + __lockhashfn((key)))
56
57static struct rb_root		thread_stats;
58
59static bool combine_locks;
60static bool show_thread_stats;
61static bool show_lock_addrs;
62static bool show_lock_owner;
63static bool use_bpf;
64static unsigned long bpf_map_entries = MAX_ENTRIES;
65static int max_stack_depth = CONTENTION_STACK_DEPTH;
66static int stack_skip = CONTENTION_STACK_SKIP;
67static int print_nr_entries = INT_MAX / 2;
68static LIST_HEAD(callstack_filters);
69static const char *output_name = NULL;
70static FILE *lock_output;
71
72struct callstack_filter {
73	struct list_head list;
74	char name[];
75};
76
77static struct lock_filter filters;
78
79static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
80
81static bool needs_callstack(void)
82{
83	return !list_empty(&callstack_filters);
84}
85
86static struct thread_stat *thread_stat_find(u32 tid)
87{
88	struct rb_node *node;
89	struct thread_stat *st;
90
91	node = thread_stats.rb_node;
92	while (node) {
93		st = container_of(node, struct thread_stat, rb);
94		if (st->tid == tid)
95			return st;
96		else if (tid < st->tid)
97			node = node->rb_left;
98		else
99			node = node->rb_right;
100	}
101
102	return NULL;
103}
104
105static void thread_stat_insert(struct thread_stat *new)
106{
107	struct rb_node **rb = &thread_stats.rb_node;
108	struct rb_node *parent = NULL;
109	struct thread_stat *p;
110
111	while (*rb) {
112		p = container_of(*rb, struct thread_stat, rb);
113		parent = *rb;
114
115		if (new->tid < p->tid)
116			rb = &(*rb)->rb_left;
117		else if (new->tid > p->tid)
118			rb = &(*rb)->rb_right;
119		else
120			BUG_ON("inserting invalid thread_stat\n");
121	}
122
123	rb_link_node(&new->rb, parent, rb);
124	rb_insert_color(&new->rb, &thread_stats);
125}
126
127static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
128{
129	struct thread_stat *st;
130
131	st = thread_stat_find(tid);
132	if (st)
133		return st;
134
135	st = zalloc(sizeof(struct thread_stat));
136	if (!st) {
137		pr_err("memory allocation failed\n");
138		return NULL;
139	}
140
141	st->tid = tid;
142	INIT_LIST_HEAD(&st->seq_list);
143
144	thread_stat_insert(st);
145
146	return st;
147}
148
149static struct thread_stat *thread_stat_findnew_first(u32 tid);
150static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
151	thread_stat_findnew_first;
152
153static struct thread_stat *thread_stat_findnew_first(u32 tid)
154{
155	struct thread_stat *st;
156
157	st = zalloc(sizeof(struct thread_stat));
158	if (!st) {
159		pr_err("memory allocation failed\n");
160		return NULL;
161	}
162	st->tid = tid;
163	INIT_LIST_HEAD(&st->seq_list);
164
165	rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
166	rb_insert_color(&st->rb, &thread_stats);
167
168	thread_stat_findnew = thread_stat_findnew_after_first;
169	return st;
170}
171
172/* build simple key function one is bigger than two */
173#define SINGLE_KEY(member)						\
174	static int lock_stat_key_ ## member(struct lock_stat *one,	\
175					 struct lock_stat *two)		\
176	{								\
177		return one->member > two->member;			\
178	}
179
180SINGLE_KEY(nr_acquired)
181SINGLE_KEY(nr_contended)
182SINGLE_KEY(avg_wait_time)
183SINGLE_KEY(wait_time_total)
184SINGLE_KEY(wait_time_max)
185
186static int lock_stat_key_wait_time_min(struct lock_stat *one,
187					struct lock_stat *two)
188{
189	u64 s1 = one->wait_time_min;
190	u64 s2 = two->wait_time_min;
191	if (s1 == ULLONG_MAX)
192		s1 = 0;
193	if (s2 == ULLONG_MAX)
194		s2 = 0;
195	return s1 > s2;
196}
197
198struct lock_key {
199	/*
200	 * name: the value for specify by user
201	 * this should be simpler than raw name of member
202	 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
203	 */
204	const char		*name;
205	/* header: the string printed on the header line */
206	const char		*header;
207	/* len: the printing width of the field */
208	int			len;
209	/* key: a pointer to function to compare two lock stats for sorting */
210	int			(*key)(struct lock_stat*, struct lock_stat*);
211	/* print: a pointer to function to print a given lock stats */
212	void			(*print)(struct lock_key*, struct lock_stat*);
213	/* list: list entry to link this */
214	struct list_head	list;
215};
216
217static void lock_stat_key_print_time(unsigned long long nsec, int len)
218{
219	static const struct {
220		float base;
221		const char *unit;
222	} table[] = {
223		{ 1e9 * 3600, "h " },
224		{ 1e9 * 60, "m " },
225		{ 1e9, "s " },
226		{ 1e6, "ms" },
227		{ 1e3, "us" },
228		{ 0, NULL },
229	};
230
231	/* for CSV output */
232	if (len == 0) {
233		fprintf(lock_output, "%llu", nsec);
234		return;
235	}
236
237	for (int i = 0; table[i].unit; i++) {
238		if (nsec < table[i].base)
239			continue;
240
241		fprintf(lock_output, "%*.2f %s", len - 3, nsec / table[i].base, table[i].unit);
242		return;
243	}
244
245	fprintf(lock_output, "%*llu %s", len - 3, nsec, "ns");
246}
247
248#define PRINT_KEY(member)						\
249static void lock_stat_key_print_ ## member(struct lock_key *key,	\
250					   struct lock_stat *ls)	\
251{									\
252	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
253}
254
255#define PRINT_TIME(member)						\
256static void lock_stat_key_print_ ## member(struct lock_key *key,	\
257					   struct lock_stat *ls)	\
258{									\
259	lock_stat_key_print_time((unsigned long long)ls->member, key->len);	\
260}
261
262PRINT_KEY(nr_acquired)
263PRINT_KEY(nr_contended)
264PRINT_TIME(avg_wait_time)
265PRINT_TIME(wait_time_total)
266PRINT_TIME(wait_time_max)
267
268static void lock_stat_key_print_wait_time_min(struct lock_key *key,
269					      struct lock_stat *ls)
270{
271	u64 wait_time = ls->wait_time_min;
272
273	if (wait_time == ULLONG_MAX)
274		wait_time = 0;
275
276	lock_stat_key_print_time(wait_time, key->len);
277}
278
279
280static const char		*sort_key = "acquired";
281
282static int			(*compare)(struct lock_stat *, struct lock_stat *);
283
284static struct rb_root		sorted; /* place to store intermediate data */
285static struct rb_root		result;	/* place to store sorted data */
286
287static LIST_HEAD(lock_keys);
288static const char		*output_fields;
289
290#define DEF_KEY_LOCK(name, header, fn_suffix, len)			\
291	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
292static struct lock_key report_keys[] = {
293	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, 10),
294	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
295	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
296	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
297	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
298	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
299
300	/* extra comparisons much complicated should be here */
301	{ }
302};
303
304static struct lock_key contention_keys[] = {
305	DEF_KEY_LOCK(contended, "contended", nr_contended, 10),
306	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, 12),
307	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, 12),
308	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, 12),
309	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, 12),
310
311	/* extra comparisons much complicated should be here */
312	{ }
313};
314
315static int select_key(bool contention)
316{
317	int i;
318	struct lock_key *keys = report_keys;
319
320	if (contention)
321		keys = contention_keys;
322
323	for (i = 0; keys[i].name; i++) {
324		if (!strcmp(keys[i].name, sort_key)) {
325			compare = keys[i].key;
326
327			/* selected key should be in the output fields */
328			if (list_empty(&keys[i].list))
329				list_add_tail(&keys[i].list, &lock_keys);
330
331			return 0;
332		}
333	}
334
335	pr_err("Unknown compare key: %s\n", sort_key);
336	return -1;
337}
338
339static int add_output_field(bool contention, char *name)
340{
341	int i;
342	struct lock_key *keys = report_keys;
343
344	if (contention)
345		keys = contention_keys;
346
347	for (i = 0; keys[i].name; i++) {
348		if (strcmp(keys[i].name, name))
349			continue;
350
351		/* prevent double link */
352		if (list_empty(&keys[i].list))
353			list_add_tail(&keys[i].list, &lock_keys);
354
355		return 0;
356	}
357
358	pr_err("Unknown output field: %s\n", name);
359	return -1;
360}
361
362static int setup_output_field(bool contention, const char *str)
363{
364	char *tok, *tmp, *orig;
365	int i, ret = 0;
366	struct lock_key *keys = report_keys;
367
368	if (contention)
369		keys = contention_keys;
370
371	/* no output field given: use all of them */
372	if (str == NULL) {
373		for (i = 0; keys[i].name; i++)
374			list_add_tail(&keys[i].list, &lock_keys);
375		return 0;
376	}
377
378	for (i = 0; keys[i].name; i++)
379		INIT_LIST_HEAD(&keys[i].list);
380
381	orig = tmp = strdup(str);
382	if (orig == NULL)
383		return -ENOMEM;
384
385	while ((tok = strsep(&tmp, ",")) != NULL){
386		ret = add_output_field(contention, tok);
387		if (ret < 0)
388			break;
389	}
390	free(orig);
391
392	return ret;
393}
394
395static void combine_lock_stats(struct lock_stat *st)
396{
397	struct rb_node **rb = &sorted.rb_node;
398	struct rb_node *parent = NULL;
399	struct lock_stat *p;
400	int ret;
401
402	while (*rb) {
403		p = container_of(*rb, struct lock_stat, rb);
404		parent = *rb;
405
406		if (st->name && p->name)
407			ret = strcmp(st->name, p->name);
408		else
409			ret = !!st->name - !!p->name;
410
411		if (ret == 0) {
412			p->nr_acquired += st->nr_acquired;
413			p->nr_contended += st->nr_contended;
414			p->wait_time_total += st->wait_time_total;
415
416			if (p->nr_contended)
417				p->avg_wait_time = p->wait_time_total / p->nr_contended;
418
419			if (p->wait_time_min > st->wait_time_min)
420				p->wait_time_min = st->wait_time_min;
421			if (p->wait_time_max < st->wait_time_max)
422				p->wait_time_max = st->wait_time_max;
423
424			p->broken |= st->broken;
425			st->combined = 1;
426			return;
427		}
428
429		if (ret < 0)
430			rb = &(*rb)->rb_left;
431		else
432			rb = &(*rb)->rb_right;
433	}
434
435	rb_link_node(&st->rb, parent, rb);
436	rb_insert_color(&st->rb, &sorted);
437}
438
439static void insert_to_result(struct lock_stat *st,
440			     int (*bigger)(struct lock_stat *, struct lock_stat *))
441{
442	struct rb_node **rb = &result.rb_node;
443	struct rb_node *parent = NULL;
444	struct lock_stat *p;
445
446	if (combine_locks && st->combined)
447		return;
448
449	while (*rb) {
450		p = container_of(*rb, struct lock_stat, rb);
451		parent = *rb;
452
453		if (bigger(st, p))
454			rb = &(*rb)->rb_left;
455		else
456			rb = &(*rb)->rb_right;
457	}
458
459	rb_link_node(&st->rb, parent, rb);
460	rb_insert_color(&st->rb, &result);
461}
462
463/* returns left most element of result, and erase it */
464static struct lock_stat *pop_from_result(void)
465{
466	struct rb_node *node = result.rb_node;
467
468	if (!node)
469		return NULL;
470
471	while (node->rb_left)
472		node = node->rb_left;
473
474	rb_erase(node, &result);
475	return container_of(node, struct lock_stat, rb);
476}
477
478struct lock_stat *lock_stat_find(u64 addr)
479{
480	struct hlist_head *entry = lockhashentry(addr);
481	struct lock_stat *ret;
482
483	hlist_for_each_entry(ret, entry, hash_entry) {
484		if (ret->addr == addr)
485			return ret;
486	}
487	return NULL;
488}
489
490struct lock_stat *lock_stat_findnew(u64 addr, const char *name, int flags)
491{
492	struct hlist_head *entry = lockhashentry(addr);
493	struct lock_stat *ret, *new;
494
495	hlist_for_each_entry(ret, entry, hash_entry) {
496		if (ret->addr == addr)
497			return ret;
498	}
499
500	new = zalloc(sizeof(struct lock_stat));
501	if (!new)
502		goto alloc_failed;
503
504	new->addr = addr;
505	new->name = strdup(name);
506	if (!new->name) {
507		free(new);
508		goto alloc_failed;
509	}
510
511	new->flags = flags;
512	new->wait_time_min = ULLONG_MAX;
513
514	hlist_add_head(&new->hash_entry, entry);
515	return new;
516
517alloc_failed:
518	pr_err("memory allocation failed\n");
519	return NULL;
520}
521
522bool match_callstack_filter(struct machine *machine, u64 *callstack)
523{
524	struct map *kmap;
525	struct symbol *sym;
526	u64 ip;
527	const char *arch = perf_env__arch(machine->env);
528
529	if (list_empty(&callstack_filters))
530		return true;
531
532	for (int i = 0; i < max_stack_depth; i++) {
533		struct callstack_filter *filter;
534
535		/*
536		 * In powerpc, the callchain saved by kernel always includes
537		 * first three entries as the NIP (next instruction pointer),
538		 * LR (link register), and the contents of LR save area in the
539		 * second stack frame. In certain scenarios its possible to have
540		 * invalid kernel instruction addresses in either LR or the second
541		 * stack frame's LR. In that case, kernel will store that address as
542		 * zero.
543		 *
544		 * The below check will continue to look into callstack,
545		 * incase first or second callstack index entry has 0
546		 * address for powerpc.
547		 */
548		if (!callstack || (!callstack[i] && (strcmp(arch, "powerpc") ||
549						(i != 1 && i != 2))))
550			break;
551
552		ip = callstack[i];
553		sym = machine__find_kernel_symbol(machine, ip, &kmap);
554		if (sym == NULL)
555			continue;
556
557		list_for_each_entry(filter, &callstack_filters, list) {
558			if (strstr(sym->name, filter->name))
559				return true;
560		}
561	}
562	return false;
563}
564
565struct trace_lock_handler {
566	/* it's used on CONFIG_LOCKDEP */
567	int (*acquire_event)(struct evsel *evsel,
568			     struct perf_sample *sample);
569
570	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
571	int (*acquired_event)(struct evsel *evsel,
572			      struct perf_sample *sample);
573
574	/* it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT */
575	int (*contended_event)(struct evsel *evsel,
576			       struct perf_sample *sample);
577
578	/* it's used on CONFIG_LOCKDEP */
579	int (*release_event)(struct evsel *evsel,
580			     struct perf_sample *sample);
581
582	/* it's used when CONFIG_LOCKDEP is off */
583	int (*contention_begin_event)(struct evsel *evsel,
584				      struct perf_sample *sample);
585
586	/* it's used when CONFIG_LOCKDEP is off */
587	int (*contention_end_event)(struct evsel *evsel,
588				    struct perf_sample *sample);
589};
590
591static struct lock_seq_stat *get_seq(struct thread_stat *ts, u64 addr)
592{
593	struct lock_seq_stat *seq;
594
595	list_for_each_entry(seq, &ts->seq_list, list) {
596		if (seq->addr == addr)
597			return seq;
598	}
599
600	seq = zalloc(sizeof(struct lock_seq_stat));
601	if (!seq) {
602		pr_err("memory allocation failed\n");
603		return NULL;
604	}
605	seq->state = SEQ_STATE_UNINITIALIZED;
606	seq->addr = addr;
607
608	list_add(&seq->list, &ts->seq_list);
609	return seq;
610}
611
612enum broken_state {
613	BROKEN_ACQUIRE,
614	BROKEN_ACQUIRED,
615	BROKEN_CONTENDED,
616	BROKEN_RELEASE,
617	BROKEN_MAX,
618};
619
620static int bad_hist[BROKEN_MAX];
621
622enum acquire_flags {
623	TRY_LOCK = 1,
624	READ_LOCK = 2,
625};
626
627static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
628{
629	switch (aggr_mode) {
630	case LOCK_AGGR_ADDR:
631		*key = addr;
632		break;
633	case LOCK_AGGR_TASK:
634		*key = tid;
635		break;
636	case LOCK_AGGR_CALLER:
637	default:
638		pr_err("Invalid aggregation mode: %d\n", aggr_mode);
639		return -EINVAL;
640	}
641	return 0;
642}
643
644static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample);
645
646static int get_key_by_aggr_mode(u64 *key, u64 addr, struct evsel *evsel,
647				 struct perf_sample *sample)
648{
649	if (aggr_mode == LOCK_AGGR_CALLER) {
650		*key = callchain_id(evsel, sample);
651		return 0;
652	}
653	return get_key_by_aggr_mode_simple(key, addr, sample->tid);
654}
655
656static int report_lock_acquire_event(struct evsel *evsel,
657				     struct perf_sample *sample)
658{
659	struct lock_stat *ls;
660	struct thread_stat *ts;
661	struct lock_seq_stat *seq;
662	const char *name = evsel__strval(evsel, sample, "name");
663	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
664	int flag = evsel__intval(evsel, sample, "flags");
665	u64 key;
666	int ret;
667
668	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
669	if (ret < 0)
670		return ret;
671
672	ls = lock_stat_findnew(key, name, 0);
673	if (!ls)
674		return -ENOMEM;
675
676	ts = thread_stat_findnew(sample->tid);
677	if (!ts)
678		return -ENOMEM;
679
680	seq = get_seq(ts, addr);
681	if (!seq)
682		return -ENOMEM;
683
684	switch (seq->state) {
685	case SEQ_STATE_UNINITIALIZED:
686	case SEQ_STATE_RELEASED:
687		if (!flag) {
688			seq->state = SEQ_STATE_ACQUIRING;
689		} else {
690			if (flag & TRY_LOCK)
691				ls->nr_trylock++;
692			if (flag & READ_LOCK)
693				ls->nr_readlock++;
694			seq->state = SEQ_STATE_READ_ACQUIRED;
695			seq->read_count = 1;
696			ls->nr_acquired++;
697		}
698		break;
699	case SEQ_STATE_READ_ACQUIRED:
700		if (flag & READ_LOCK) {
701			seq->read_count++;
702			ls->nr_acquired++;
703			goto end;
704		} else {
705			goto broken;
706		}
707		break;
708	case SEQ_STATE_ACQUIRED:
709	case SEQ_STATE_ACQUIRING:
710	case SEQ_STATE_CONTENDED:
711broken:
712		/* broken lock sequence */
713		if (!ls->broken) {
714			ls->broken = 1;
715			bad_hist[BROKEN_ACQUIRE]++;
716		}
717		list_del_init(&seq->list);
718		free(seq);
719		goto end;
720	default:
721		BUG_ON("Unknown state of lock sequence found!\n");
722		break;
723	}
724
725	ls->nr_acquire++;
726	seq->prev_event_time = sample->time;
727end:
728	return 0;
729}
730
731static int report_lock_acquired_event(struct evsel *evsel,
732				      struct perf_sample *sample)
733{
734	struct lock_stat *ls;
735	struct thread_stat *ts;
736	struct lock_seq_stat *seq;
737	u64 contended_term;
738	const char *name = evsel__strval(evsel, sample, "name");
739	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
740	u64 key;
741	int ret;
742
743	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
744	if (ret < 0)
745		return ret;
746
747	ls = lock_stat_findnew(key, name, 0);
748	if (!ls)
749		return -ENOMEM;
750
751	ts = thread_stat_findnew(sample->tid);
752	if (!ts)
753		return -ENOMEM;
754
755	seq = get_seq(ts, addr);
756	if (!seq)
757		return -ENOMEM;
758
759	switch (seq->state) {
760	case SEQ_STATE_UNINITIALIZED:
761		/* orphan event, do nothing */
762		return 0;
763	case SEQ_STATE_ACQUIRING:
764		break;
765	case SEQ_STATE_CONTENDED:
766		contended_term = sample->time - seq->prev_event_time;
767		ls->wait_time_total += contended_term;
768		if (contended_term < ls->wait_time_min)
769			ls->wait_time_min = contended_term;
770		if (ls->wait_time_max < contended_term)
771			ls->wait_time_max = contended_term;
772		break;
773	case SEQ_STATE_RELEASED:
774	case SEQ_STATE_ACQUIRED:
775	case SEQ_STATE_READ_ACQUIRED:
776		/* broken lock sequence */
777		if (!ls->broken) {
778			ls->broken = 1;
779			bad_hist[BROKEN_ACQUIRED]++;
780		}
781		list_del_init(&seq->list);
782		free(seq);
783		goto end;
784	default:
785		BUG_ON("Unknown state of lock sequence found!\n");
786		break;
787	}
788
789	seq->state = SEQ_STATE_ACQUIRED;
790	ls->nr_acquired++;
791	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
792	seq->prev_event_time = sample->time;
793end:
794	return 0;
795}
796
797static int report_lock_contended_event(struct evsel *evsel,
798				       struct perf_sample *sample)
799{
800	struct lock_stat *ls;
801	struct thread_stat *ts;
802	struct lock_seq_stat *seq;
803	const char *name = evsel__strval(evsel, sample, "name");
804	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
805	u64 key;
806	int ret;
807
808	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
809	if (ret < 0)
810		return ret;
811
812	ls = lock_stat_findnew(key, name, 0);
813	if (!ls)
814		return -ENOMEM;
815
816	ts = thread_stat_findnew(sample->tid);
817	if (!ts)
818		return -ENOMEM;
819
820	seq = get_seq(ts, addr);
821	if (!seq)
822		return -ENOMEM;
823
824	switch (seq->state) {
825	case SEQ_STATE_UNINITIALIZED:
826		/* orphan event, do nothing */
827		return 0;
828	case SEQ_STATE_ACQUIRING:
829		break;
830	case SEQ_STATE_RELEASED:
831	case SEQ_STATE_ACQUIRED:
832	case SEQ_STATE_READ_ACQUIRED:
833	case SEQ_STATE_CONTENDED:
834		/* broken lock sequence */
835		if (!ls->broken) {
836			ls->broken = 1;
837			bad_hist[BROKEN_CONTENDED]++;
838		}
839		list_del_init(&seq->list);
840		free(seq);
841		goto end;
842	default:
843		BUG_ON("Unknown state of lock sequence found!\n");
844		break;
845	}
846
847	seq->state = SEQ_STATE_CONTENDED;
848	ls->nr_contended++;
849	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
850	seq->prev_event_time = sample->time;
851end:
852	return 0;
853}
854
855static int report_lock_release_event(struct evsel *evsel,
856				     struct perf_sample *sample)
857{
858	struct lock_stat *ls;
859	struct thread_stat *ts;
860	struct lock_seq_stat *seq;
861	const char *name = evsel__strval(evsel, sample, "name");
862	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
863	u64 key;
864	int ret;
865
866	ret = get_key_by_aggr_mode_simple(&key, addr, sample->tid);
867	if (ret < 0)
868		return ret;
869
870	ls = lock_stat_findnew(key, name, 0);
871	if (!ls)
872		return -ENOMEM;
873
874	ts = thread_stat_findnew(sample->tid);
875	if (!ts)
876		return -ENOMEM;
877
878	seq = get_seq(ts, addr);
879	if (!seq)
880		return -ENOMEM;
881
882	switch (seq->state) {
883	case SEQ_STATE_UNINITIALIZED:
884		goto end;
885	case SEQ_STATE_ACQUIRED:
886		break;
887	case SEQ_STATE_READ_ACQUIRED:
888		seq->read_count--;
889		BUG_ON(seq->read_count < 0);
890		if (seq->read_count) {
891			ls->nr_release++;
892			goto end;
893		}
894		break;
895	case SEQ_STATE_ACQUIRING:
896	case SEQ_STATE_CONTENDED:
897	case SEQ_STATE_RELEASED:
898		/* broken lock sequence */
899		if (!ls->broken) {
900			ls->broken = 1;
901			bad_hist[BROKEN_RELEASE]++;
902		}
903		goto free_seq;
904	default:
905		BUG_ON("Unknown state of lock sequence found!\n");
906		break;
907	}
908
909	ls->nr_release++;
910free_seq:
911	list_del_init(&seq->list);
912	free(seq);
913end:
914	return 0;
915}
916
917static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
918				  char *buf, int size)
919{
920	u64 offset;
921
922	if (map == NULL || sym == NULL) {
923		buf[0] = '\0';
924		return 0;
925	}
926
927	offset = map__map_ip(map, ip) - sym->start;
928
929	if (offset)
930		return scnprintf(buf, size, "%s+%#lx", sym->name, offset);
931	else
932		return strlcpy(buf, sym->name, size);
933}
934static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sample,
935				  char *buf, int size)
936{
937	struct thread *thread;
938	struct callchain_cursor *cursor;
939	struct machine *machine = &session->machines.host;
940	struct symbol *sym;
941	int skip = 0;
942	int ret;
943
944	/* lock names will be replaced to task name later */
945	if (show_thread_stats)
946		return -1;
947
948	thread = machine__findnew_thread(machine, -1, sample->pid);
949	if (thread == NULL)
950		return -1;
951
952	cursor = get_tls_callchain_cursor();
953
954	/* use caller function name from the callchain */
955	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
956					NULL, NULL, max_stack_depth);
957	if (ret != 0) {
958		thread__put(thread);
959		return -1;
960	}
961
962	callchain_cursor_commit(cursor);
963	thread__put(thread);
964
965	while (true) {
966		struct callchain_cursor_node *node;
967
968		node = callchain_cursor_current(cursor);
969		if (node == NULL)
970			break;
971
972		/* skip first few entries - for lock functions */
973		if (++skip <= stack_skip)
974			goto next;
975
976		sym = node->ms.sym;
977		if (sym && !machine__is_lock_function(machine, node->ip)) {
978			get_symbol_name_offset(node->ms.map, sym, node->ip,
979					       buf, size);
980			return 0;
981		}
982
983next:
984		callchain_cursor_advance(cursor);
985	}
986	return -1;
987}
988
989static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
990{
991	struct callchain_cursor *cursor;
992	struct machine *machine = &session->machines.host;
993	struct thread *thread;
994	u64 hash = 0;
995	int skip = 0;
996	int ret;
997
998	thread = machine__findnew_thread(machine, -1, sample->pid);
999	if (thread == NULL)
1000		return -1;
1001
1002	cursor = get_tls_callchain_cursor();
1003	/* use caller function name from the callchain */
1004	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
1005					NULL, NULL, max_stack_depth);
1006	thread__put(thread);
1007
1008	if (ret != 0)
1009		return -1;
1010
1011	callchain_cursor_commit(cursor);
1012
1013	while (true) {
1014		struct callchain_cursor_node *node;
1015
1016		node = callchain_cursor_current(cursor);
1017		if (node == NULL)
1018			break;
1019
1020		/* skip first few entries - for lock functions */
1021		if (++skip <= stack_skip)
1022			goto next;
1023
1024		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
1025			goto next;
1026
1027		hash ^= hash_long((unsigned long)node->ip, 64);
1028
1029next:
1030		callchain_cursor_advance(cursor);
1031	}
1032	return hash;
1033}
1034
1035static u64 *get_callstack(struct perf_sample *sample, int max_stack)
1036{
1037	u64 *callstack;
1038	u64 i;
1039	int c;
1040
1041	callstack = calloc(max_stack, sizeof(*callstack));
1042	if (callstack == NULL)
1043		return NULL;
1044
1045	for (i = 0, c = 0; i < sample->callchain->nr && c < max_stack; i++) {
1046		u64 ip = sample->callchain->ips[i];
1047
1048		if (ip >= PERF_CONTEXT_MAX)
1049			continue;
1050
1051		callstack[c++] = ip;
1052	}
1053	return callstack;
1054}
1055
1056static int report_lock_contention_begin_event(struct evsel *evsel,
1057					      struct perf_sample *sample)
1058{
1059	struct lock_stat *ls;
1060	struct thread_stat *ts;
1061	struct lock_seq_stat *seq;
1062	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1063	unsigned int flags = evsel__intval(evsel, sample, "flags");
1064	u64 key;
1065	int i, ret;
1066	static bool kmap_loaded;
1067	struct machine *machine = &session->machines.host;
1068	struct map *kmap;
1069	struct symbol *sym;
1070
1071	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1072	if (ret < 0)
1073		return ret;
1074
1075	if (!kmap_loaded) {
1076		unsigned long *addrs;
1077
1078		/* make sure it loads the kernel map to find lock symbols */
1079		map__load(machine__kernel_map(machine));
1080		kmap_loaded = true;
1081
1082		/* convert (kernel) symbols to addresses */
1083		for (i = 0; i < filters.nr_syms; i++) {
1084			sym = machine__find_kernel_symbol_by_name(machine,
1085								  filters.syms[i],
1086								  &kmap);
1087			if (sym == NULL) {
1088				pr_warning("ignore unknown symbol: %s\n",
1089					   filters.syms[i]);
1090				continue;
1091			}
1092
1093			addrs = realloc(filters.addrs,
1094					(filters.nr_addrs + 1) * sizeof(*addrs));
1095			if (addrs == NULL) {
1096				pr_warning("memory allocation failure\n");
1097				return -ENOMEM;
1098			}
1099
1100			addrs[filters.nr_addrs++] = map__unmap_ip(kmap, sym->start);
1101			filters.addrs = addrs;
1102		}
1103	}
1104
1105	ls = lock_stat_find(key);
1106	if (!ls) {
1107		char buf[128];
1108		const char *name = "";
1109
1110		switch (aggr_mode) {
1111		case LOCK_AGGR_ADDR:
1112			sym = machine__find_kernel_symbol(machine, key, &kmap);
1113			if (sym)
1114				name = sym->name;
1115			break;
1116		case LOCK_AGGR_CALLER:
1117			name = buf;
1118			if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
1119				name = "Unknown";
1120			break;
1121		case LOCK_AGGR_TASK:
1122		default:
1123			break;
1124		}
1125
1126		ls = lock_stat_findnew(key, name, flags);
1127		if (!ls)
1128			return -ENOMEM;
1129	}
1130
1131	if (filters.nr_types) {
1132		bool found = false;
1133
1134		for (i = 0; i < filters.nr_types; i++) {
1135			if (flags == filters.types[i]) {
1136				found = true;
1137				break;
1138			}
1139		}
1140
1141		if (!found)
1142			return 0;
1143	}
1144
1145	if (filters.nr_addrs) {
1146		bool found = false;
1147
1148		for (i = 0; i < filters.nr_addrs; i++) {
1149			if (addr == filters.addrs[i]) {
1150				found = true;
1151				break;
1152			}
1153		}
1154
1155		if (!found)
1156			return 0;
1157	}
1158
1159	if (needs_callstack()) {
1160		u64 *callstack = get_callstack(sample, max_stack_depth);
1161		if (callstack == NULL)
1162			return -ENOMEM;
1163
1164		if (!match_callstack_filter(machine, callstack)) {
1165			free(callstack);
1166			return 0;
1167		}
1168
1169		if (ls->callstack == NULL)
1170			ls->callstack = callstack;
1171		else
1172			free(callstack);
1173	}
1174
1175	ts = thread_stat_findnew(sample->tid);
1176	if (!ts)
1177		return -ENOMEM;
1178
1179	seq = get_seq(ts, addr);
1180	if (!seq)
1181		return -ENOMEM;
1182
1183	switch (seq->state) {
1184	case SEQ_STATE_UNINITIALIZED:
1185	case SEQ_STATE_ACQUIRED:
1186		break;
1187	case SEQ_STATE_CONTENDED:
1188		/*
1189		 * It can have nested contention begin with mutex spinning,
1190		 * then we would use the original contention begin event and
1191		 * ignore the second one.
1192		 */
1193		goto end;
1194	case SEQ_STATE_ACQUIRING:
1195	case SEQ_STATE_READ_ACQUIRED:
1196	case SEQ_STATE_RELEASED:
1197		/* broken lock sequence */
1198		if (!ls->broken) {
1199			ls->broken = 1;
1200			bad_hist[BROKEN_CONTENDED]++;
1201		}
1202		list_del_init(&seq->list);
1203		free(seq);
1204		goto end;
1205	default:
1206		BUG_ON("Unknown state of lock sequence found!\n");
1207		break;
1208	}
1209
1210	if (seq->state != SEQ_STATE_CONTENDED) {
1211		seq->state = SEQ_STATE_CONTENDED;
1212		seq->prev_event_time = sample->time;
1213		ls->nr_contended++;
1214	}
1215end:
1216	return 0;
1217}
1218
1219static int report_lock_contention_end_event(struct evsel *evsel,
1220					    struct perf_sample *sample)
1221{
1222	struct lock_stat *ls;
1223	struct thread_stat *ts;
1224	struct lock_seq_stat *seq;
1225	u64 contended_term;
1226	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1227	u64 key;
1228	int ret;
1229
1230	ret = get_key_by_aggr_mode(&key, addr, evsel, sample);
1231	if (ret < 0)
1232		return ret;
1233
1234	ls = lock_stat_find(key);
1235	if (!ls)
1236		return 0;
1237
1238	ts = thread_stat_find(sample->tid);
1239	if (!ts)
1240		return 0;
1241
1242	seq = get_seq(ts, addr);
1243	if (!seq)
1244		return -ENOMEM;
1245
1246	switch (seq->state) {
1247	case SEQ_STATE_UNINITIALIZED:
1248		goto end;
1249	case SEQ_STATE_CONTENDED:
1250		contended_term = sample->time - seq->prev_event_time;
1251		ls->wait_time_total += contended_term;
1252		if (contended_term < ls->wait_time_min)
1253			ls->wait_time_min = contended_term;
1254		if (ls->wait_time_max < contended_term)
1255			ls->wait_time_max = contended_term;
1256		break;
1257	case SEQ_STATE_ACQUIRING:
1258	case SEQ_STATE_ACQUIRED:
1259	case SEQ_STATE_READ_ACQUIRED:
1260	case SEQ_STATE_RELEASED:
1261		/* broken lock sequence */
1262		if (!ls->broken) {
1263			ls->broken = 1;
1264			bad_hist[BROKEN_ACQUIRED]++;
1265		}
1266		list_del_init(&seq->list);
1267		free(seq);
1268		goto end;
1269	default:
1270		BUG_ON("Unknown state of lock sequence found!\n");
1271		break;
1272	}
1273
1274	seq->state = SEQ_STATE_ACQUIRED;
1275	ls->nr_acquired++;
1276	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1277end:
1278	return 0;
1279}
1280
1281/* lock oriented handlers */
1282/* TODO: handlers for CPU oriented, thread oriented */
1283static struct trace_lock_handler report_lock_ops  = {
1284	.acquire_event		= report_lock_acquire_event,
1285	.acquired_event		= report_lock_acquired_event,
1286	.contended_event	= report_lock_contended_event,
1287	.release_event		= report_lock_release_event,
1288	.contention_begin_event	= report_lock_contention_begin_event,
1289	.contention_end_event	= report_lock_contention_end_event,
1290};
1291
1292static struct trace_lock_handler contention_lock_ops  = {
1293	.contention_begin_event	= report_lock_contention_begin_event,
1294	.contention_end_event	= report_lock_contention_end_event,
1295};
1296
1297
1298static struct trace_lock_handler *trace_handler;
1299
1300static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
1301{
1302	if (trace_handler->acquire_event)
1303		return trace_handler->acquire_event(evsel, sample);
1304	return 0;
1305}
1306
1307static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
1308{
1309	if (trace_handler->acquired_event)
1310		return trace_handler->acquired_event(evsel, sample);
1311	return 0;
1312}
1313
1314static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
1315{
1316	if (trace_handler->contended_event)
1317		return trace_handler->contended_event(evsel, sample);
1318	return 0;
1319}
1320
1321static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
1322{
1323	if (trace_handler->release_event)
1324		return trace_handler->release_event(evsel, sample);
1325	return 0;
1326}
1327
1328static int evsel__process_contention_begin(struct evsel *evsel, struct perf_sample *sample)
1329{
1330	if (trace_handler->contention_begin_event)
1331		return trace_handler->contention_begin_event(evsel, sample);
1332	return 0;
1333}
1334
1335static int evsel__process_contention_end(struct evsel *evsel, struct perf_sample *sample)
1336{
1337	if (trace_handler->contention_end_event)
1338		return trace_handler->contention_end_event(evsel, sample);
1339	return 0;
1340}
1341
1342static void print_bad_events(int bad, int total)
1343{
1344	/* Output for debug, this have to be removed */
1345	int i;
1346	int broken = 0;
1347	const char *name[4] =
1348		{ "acquire", "acquired", "contended", "release" };
1349
1350	for (i = 0; i < BROKEN_MAX; i++)
1351		broken += bad_hist[i];
1352
1353	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1354		return;
1355
1356	fprintf(lock_output, "\n=== output for debug ===\n\n");
1357	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1358	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * 100);
1359	fprintf(lock_output, "histogram of events caused bad sequence\n");
1360	for (i = 0; i < BROKEN_MAX; i++)
1361		fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1362}
1363
1364/* TODO: various way to print, coloring, nano or milli sec */
1365static void print_result(void)
1366{
1367	struct lock_stat *st;
1368	struct lock_key *key;
1369	char cut_name[20];
1370	int bad, total, printed;
1371
1372	if (!quiet) {
1373		fprintf(lock_output, "%20s ", "Name");
1374		list_for_each_entry(key, &lock_keys, list)
1375			fprintf(lock_output, "%*s ", key->len, key->header);
1376		fprintf(lock_output, "\n\n");
1377	}
1378
1379	bad = total = printed = 0;
1380	while ((st = pop_from_result())) {
1381		total++;
1382		if (st->broken)
1383			bad++;
1384		if (!st->nr_acquired)
1385			continue;
1386
1387		bzero(cut_name, 20);
1388
1389		if (strlen(st->name) < 20) {
1390			/* output raw name */
1391			const char *name = st->name;
1392
1393			if (show_thread_stats) {
1394				struct thread *t;
1395
1396				/* st->addr contains tid of thread */
1397				t = perf_session__findnew(session, st->addr);
1398				name = thread__comm_str(t);
1399			}
1400
1401			fprintf(lock_output, "%20s ", name);
1402		} else {
1403			strncpy(cut_name, st->name, 16);
1404			cut_name[16] = '.';
1405			cut_name[17] = '.';
1406			cut_name[18] = '.';
1407			cut_name[19] = '\0';
1408			/* cut off name for saving output style */
1409			fprintf(lock_output, "%20s ", cut_name);
1410		}
1411
1412		list_for_each_entry(key, &lock_keys, list) {
1413			key->print(key, st);
1414			fprintf(lock_output, " ");
1415		}
1416		fprintf(lock_output, "\n");
1417
1418		if (++printed >= print_nr_entries)
1419			break;
1420	}
1421
1422	print_bad_events(bad, total);
1423}
1424
1425static bool info_threads, info_map;
1426
1427static void dump_threads(void)
1428{
1429	struct thread_stat *st;
1430	struct rb_node *node;
1431	struct thread *t;
1432
1433	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1434
1435	node = rb_first(&thread_stats);
1436	while (node) {
1437		st = container_of(node, struct thread_stat, rb);
1438		t = perf_session__findnew(session, st->tid);
1439		fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(t));
1440		node = rb_next(node);
1441		thread__put(t);
1442	}
1443}
1444
1445static int compare_maps(struct lock_stat *a, struct lock_stat *b)
1446{
1447	int ret;
1448
1449	if (a->name && b->name)
1450		ret = strcmp(a->name, b->name);
1451	else
1452		ret = !!a->name - !!b->name;
1453
1454	if (!ret)
1455		return a->addr < b->addr;
1456	else
1457		return ret < 0;
1458}
1459
1460static void dump_map(void)
1461{
1462	unsigned int i;
1463	struct lock_stat *st;
1464
1465	fprintf(lock_output, "Address of instance: name of class\n");
1466	for (i = 0; i < LOCKHASH_SIZE; i++) {
1467		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1468			insert_to_result(st, compare_maps);
1469		}
1470	}
1471
1472	while ((st = pop_from_result()))
1473		fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1474}
1475
1476static int dump_info(void)
1477{
1478	int rc = 0;
1479
1480	if (info_threads)
1481		dump_threads();
1482	else if (info_map)
1483		dump_map();
1484	else {
1485		rc = -1;
1486		pr_err("Unknown type of information\n");
1487	}
1488
1489	return rc;
1490}
1491
1492static const struct evsel_str_handler lock_tracepoints[] = {
1493	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
1494	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1495	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
1496	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
1497};
1498
1499static const struct evsel_str_handler contention_tracepoints[] = {
1500	{ "lock:contention_begin", evsel__process_contention_begin, },
1501	{ "lock:contention_end",   evsel__process_contention_end,   },
1502};
1503
1504static int process_event_update(struct perf_tool *tool,
1505				union perf_event *event,
1506				struct evlist **pevlist)
1507{
1508	int ret;
1509
1510	ret = perf_event__process_event_update(tool, event, pevlist);
1511	if (ret < 0)
1512		return ret;
1513
1514	/* this can return -EEXIST since we call it for each evsel */
1515	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1516	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1517	return 0;
1518}
1519
1520typedef int (*tracepoint_handler)(struct evsel *evsel,
1521				  struct perf_sample *sample);
1522
1523static int process_sample_event(struct perf_tool *tool __maybe_unused,
1524				union perf_event *event,
1525				struct perf_sample *sample,
1526				struct evsel *evsel,
1527				struct machine *machine)
1528{
1529	int err = 0;
1530	struct thread *thread = machine__findnew_thread(machine, sample->pid,
1531							sample->tid);
1532
1533	if (thread == NULL) {
1534		pr_debug("problem processing %d event, skipping it.\n",
1535			event->header.type);
1536		return -1;
1537	}
1538
1539	if (evsel->handler != NULL) {
1540		tracepoint_handler f = evsel->handler;
1541		err = f(evsel, sample);
1542	}
1543
1544	thread__put(thread);
1545
1546	return err;
1547}
1548
1549static void combine_result(void)
1550{
1551	unsigned int i;
1552	struct lock_stat *st;
1553
1554	if (!combine_locks)
1555		return;
1556
1557	for (i = 0; i < LOCKHASH_SIZE; i++) {
1558		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1559			combine_lock_stats(st);
1560		}
1561	}
1562}
1563
1564static void sort_result(void)
1565{
1566	unsigned int i;
1567	struct lock_stat *st;
1568
1569	for (i = 0; i < LOCKHASH_SIZE; i++) {
1570		hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1571			insert_to_result(st, compare);
1572		}
1573	}
1574}
1575
1576static const struct {
1577	unsigned int flags;
1578	const char *str;
1579	const char *name;
1580} lock_type_table[] = {
1581	{ 0,				"semaphore",	"semaphore" },
1582	{ LCB_F_SPIN,			"spinlock",	"spinlock" },
1583	{ LCB_F_SPIN | LCB_F_READ,	"rwlock:R",	"rwlock" },
1584	{ LCB_F_SPIN | LCB_F_WRITE,	"rwlock:W",	"rwlock" },
1585	{ LCB_F_READ,			"rwsem:R",	"rwsem" },
1586	{ LCB_F_WRITE,			"rwsem:W",	"rwsem" },
1587	{ LCB_F_RT,			"rt-mutex",	"rt-mutex" },
1588	{ LCB_F_RT | LCB_F_READ,	"rwlock-rt:R",	"rwlock-rt" },
1589	{ LCB_F_RT | LCB_F_WRITE,	"rwlock-rt:W",	"rwlock-rt" },
1590	{ LCB_F_PERCPU | LCB_F_READ,	"pcpu-sem:R",	"percpu-rwsem" },
1591	{ LCB_F_PERCPU | LCB_F_WRITE,	"pcpu-sem:W",	"percpu-rwsem" },
1592	{ LCB_F_MUTEX,			"mutex",	"mutex" },
1593	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex",	"mutex" },
1594	/* alias for get_type_flag() */
1595	{ LCB_F_MUTEX | LCB_F_SPIN,	"mutex-spin",	"mutex" },
1596};
1597
1598static const char *get_type_str(unsigned int flags)
1599{
1600	flags &= LCB_F_MAX_FLAGS - 1;
1601
1602	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1603		if (lock_type_table[i].flags == flags)
1604			return lock_type_table[i].str;
1605	}
1606	return "unknown";
1607}
1608
1609static const char *get_type_name(unsigned int flags)
1610{
1611	flags &= LCB_F_MAX_FLAGS - 1;
1612
1613	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1614		if (lock_type_table[i].flags == flags)
1615			return lock_type_table[i].name;
1616	}
1617	return "unknown";
1618}
1619
1620static unsigned int get_type_flag(const char *str)
1621{
1622	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1623		if (!strcmp(lock_type_table[i].name, str))
1624			return lock_type_table[i].flags;
1625	}
1626	for (unsigned int i = 0; i < ARRAY_SIZE(lock_type_table); i++) {
1627		if (!strcmp(lock_type_table[i].str, str))
1628			return lock_type_table[i].flags;
1629	}
1630	return UINT_MAX;
1631}
1632
1633static void lock_filter_finish(void)
1634{
1635	zfree(&filters.types);
1636	filters.nr_types = 0;
1637
1638	zfree(&filters.addrs);
1639	filters.nr_addrs = 0;
1640
1641	for (int i = 0; i < filters.nr_syms; i++)
1642		free(filters.syms[i]);
1643
1644	zfree(&filters.syms);
1645	filters.nr_syms = 0;
1646}
1647
1648static void sort_contention_result(void)
1649{
1650	sort_result();
1651}
1652
1653static void print_header_stdio(void)
1654{
1655	struct lock_key *key;
1656
1657	list_for_each_entry(key, &lock_keys, list)
1658		fprintf(lock_output, "%*s ", key->len, key->header);
1659
1660	switch (aggr_mode) {
1661	case LOCK_AGGR_TASK:
1662		fprintf(lock_output, "  %10s   %s\n\n", "pid",
1663			show_lock_owner ? "owner" : "comm");
1664		break;
1665	case LOCK_AGGR_CALLER:
1666		fprintf(lock_output, "  %10s   %s\n\n", "type", "caller");
1667		break;
1668	case LOCK_AGGR_ADDR:
1669		fprintf(lock_output, "  %16s   %s\n\n", "address", "symbol");
1670		break;
1671	default:
1672		break;
1673	}
1674}
1675
1676static void print_header_csv(const char *sep)
1677{
1678	struct lock_key *key;
1679
1680	fprintf(lock_output, "# output: ");
1681	list_for_each_entry(key, &lock_keys, list)
1682		fprintf(lock_output, "%s%s ", key->header, sep);
1683
1684	switch (aggr_mode) {
1685	case LOCK_AGGR_TASK:
1686		fprintf(lock_output, "%s%s %s\n", "pid", sep,
1687			show_lock_owner ? "owner" : "comm");
1688		break;
1689	case LOCK_AGGR_CALLER:
1690		fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1691		if (verbose > 0)
1692			fprintf(lock_output, "%s %s", sep, "stacktrace");
1693		fprintf(lock_output, "\n");
1694		break;
1695	case LOCK_AGGR_ADDR:
1696		fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1697		break;
1698	default:
1699		break;
1700	}
1701}
1702
1703static void print_header(void)
1704{
1705	if (!quiet) {
1706		if (symbol_conf.field_sep)
1707			print_header_csv(symbol_conf.field_sep);
1708		else
1709			print_header_stdio();
1710	}
1711}
1712
1713static void print_lock_stat_stdio(struct lock_contention *con, struct lock_stat *st)
1714{
1715	struct lock_key *key;
1716	struct thread *t;
1717	int pid;
1718
1719	list_for_each_entry(key, &lock_keys, list) {
1720		key->print(key, st);
1721		fprintf(lock_output, " ");
1722	}
1723
1724	switch (aggr_mode) {
1725	case LOCK_AGGR_CALLER:
1726		fprintf(lock_output, "  %10s   %s\n", get_type_str(st->flags), st->name);
1727		break;
1728	case LOCK_AGGR_TASK:
1729		pid = st->addr;
1730		t = perf_session__findnew(session, pid);
1731		fprintf(lock_output, "  %10d   %s\n",
1732			pid, pid == -1 ? "Unknown" : thread__comm_str(t));
1733		break;
1734	case LOCK_AGGR_ADDR:
1735		fprintf(lock_output, "  %016llx   %s (%s)\n", (unsigned long long)st->addr,
1736			st->name, get_type_name(st->flags));
1737		break;
1738	default:
1739		break;
1740	}
1741
1742	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1743		struct map *kmap;
1744		struct symbol *sym;
1745		char buf[128];
1746		u64 ip;
1747
1748		for (int i = 0; i < max_stack_depth; i++) {
1749			if (!st->callstack || !st->callstack[i])
1750				break;
1751
1752			ip = st->callstack[i];
1753			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1754			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1755			fprintf(lock_output, "\t\t\t%#lx  %s\n", (unsigned long)ip, buf);
1756		}
1757	}
1758}
1759
1760static void print_lock_stat_csv(struct lock_contention *con, struct lock_stat *st,
1761				const char *sep)
1762{
1763	struct lock_key *key;
1764	struct thread *t;
1765	int pid;
1766
1767	list_for_each_entry(key, &lock_keys, list) {
1768		key->print(key, st);
1769		fprintf(lock_output, "%s ", sep);
1770	}
1771
1772	switch (aggr_mode) {
1773	case LOCK_AGGR_CALLER:
1774		fprintf(lock_output, "%s%s %s", get_type_str(st->flags), sep, st->name);
1775		if (verbose <= 0)
1776			fprintf(lock_output, "\n");
1777		break;
1778	case LOCK_AGGR_TASK:
1779		pid = st->addr;
1780		t = perf_session__findnew(session, pid);
1781		fprintf(lock_output, "%d%s %s\n", pid, sep,
1782			pid == -1 ? "Unknown" : thread__comm_str(t));
1783		break;
1784	case LOCK_AGGR_ADDR:
1785		fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1786			st->name, sep, get_type_name(st->flags));
1787		break;
1788	default:
1789		break;
1790	}
1791
1792	if (aggr_mode == LOCK_AGGR_CALLER && verbose > 0) {
1793		struct map *kmap;
1794		struct symbol *sym;
1795		char buf[128];
1796		u64 ip;
1797
1798		for (int i = 0; i < max_stack_depth; i++) {
1799			if (!st->callstack || !st->callstack[i])
1800				break;
1801
1802			ip = st->callstack[i];
1803			sym = machine__find_kernel_symbol(con->machine, ip, &kmap);
1804			get_symbol_name_offset(kmap, sym, ip, buf, sizeof(buf));
1805			fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1806		}
1807		fprintf(lock_output, "\n");
1808	}
1809}
1810
1811static void print_lock_stat(struct lock_contention *con, struct lock_stat *st)
1812{
1813	if (symbol_conf.field_sep)
1814		print_lock_stat_csv(con, st, symbol_conf.field_sep);
1815	else
1816		print_lock_stat_stdio(con, st);
1817}
1818
1819static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1820{
1821	/* Output for debug, this have to be removed */
1822	int broken = fails->task + fails->stack + fails->time + fails->data;
1823
1824	if (!use_bpf)
1825		print_bad_events(bad, total);
1826
1827	if (quiet || total == 0 || (broken == 0 && verbose <= 0))
1828		return;
1829
1830	total += broken;
1831	fprintf(lock_output, "\n=== output for debug ===\n\n");
1832	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1833	fprintf(lock_output, "bad rate: %.2f %%\n", 100.0 * broken / total);
1834
1835	fprintf(lock_output, "histogram of failure reasons\n");
1836	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1837	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1838	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1839	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1840}
1841
1842static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1843			     const char *sep)
1844{
1845	/* Output for debug, this have to be removed */
1846	if (use_bpf)
1847		bad = fails->task + fails->stack + fails->time + fails->data;
1848
1849	if (quiet || total == 0 || (bad == 0 && verbose <= 0))
1850		return;
1851
1852	total += bad;
1853	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1854
1855	if (use_bpf) {
1856		fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1857		fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1858		fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1859		fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1860	} else {
1861		int i;
1862		const char *name[4] = { "acquire", "acquired", "contended", "release" };
1863
1864		for (i = 0; i < BROKEN_MAX; i++)
1865			fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1866	}
1867	fprintf(lock_output, "\n");
1868}
1869
1870static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1871{
1872	if (symbol_conf.field_sep)
1873		print_footer_csv(total, bad, fails, symbol_conf.field_sep);
1874	else
1875		print_footer_stdio(total, bad, fails);
1876}
1877
1878static void print_contention_result(struct lock_contention *con)
1879{
1880	struct lock_stat *st;
1881	int bad, total, printed;
1882
1883	if (!quiet)
1884		print_header();
1885
1886	bad = total = printed = 0;
1887
1888	while ((st = pop_from_result())) {
1889		total += use_bpf ? st->nr_contended : 1;
1890		if (st->broken)
1891			bad++;
1892
1893		if (!st->wait_time_total)
1894			continue;
1895
1896		print_lock_stat(con, st);
1897
1898		if (++printed >= print_nr_entries)
1899			break;
1900	}
1901
1902	if (print_nr_entries) {
1903		/* update the total/bad stats */
1904		while ((st = pop_from_result())) {
1905			total += use_bpf ? st->nr_contended : 1;
1906			if (st->broken)
1907				bad++;
1908		}
1909	}
1910	/* some entries are collected but hidden by the callstack filter */
1911	total += con->nr_filtered;
1912
1913	print_footer(total, bad, &con->fails);
1914}
1915
1916static bool force;
1917
1918static int __cmd_report(bool display_info)
1919{
1920	int err = -EINVAL;
1921	struct perf_tool eops = {
1922		.attr		 = perf_event__process_attr,
1923		.event_update	 = process_event_update,
1924		.sample		 = process_sample_event,
1925		.comm		 = perf_event__process_comm,
1926		.mmap		 = perf_event__process_mmap,
1927		.namespaces	 = perf_event__process_namespaces,
1928		.tracing_data	 = perf_event__process_tracing_data,
1929		.ordered_events	 = true,
1930	};
1931	struct perf_data data = {
1932		.path  = input_name,
1933		.mode  = PERF_DATA_MODE_READ,
1934		.force = force,
1935	};
1936
1937	session = perf_session__new(&data, &eops);
1938	if (IS_ERR(session)) {
1939		pr_err("Initializing perf session failed\n");
1940		return PTR_ERR(session);
1941	}
1942
1943	symbol_conf.allow_aliases = true;
1944	symbol__init(&session->header.env);
1945
1946	if (!data.is_pipe) {
1947		if (!perf_session__has_traces(session, "lock record"))
1948			goto out_delete;
1949
1950		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1951			pr_err("Initializing perf session tracepoint handlers failed\n");
1952			goto out_delete;
1953		}
1954
1955		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1956			pr_err("Initializing perf session tracepoint handlers failed\n");
1957			goto out_delete;
1958		}
1959	}
1960
1961	if (setup_output_field(false, output_fields))
1962		goto out_delete;
1963
1964	if (select_key(false))
1965		goto out_delete;
1966
1967	if (show_thread_stats)
1968		aggr_mode = LOCK_AGGR_TASK;
1969
1970	err = perf_session__process_events(session);
1971	if (err)
1972		goto out_delete;
1973
1974	setup_pager();
1975	if (display_info) /* used for info subcommand */
1976		err = dump_info();
1977	else {
1978		combine_result();
1979		sort_result();
1980		print_result();
1981	}
1982
1983out_delete:
1984	perf_session__delete(session);
1985	return err;
1986}
1987
1988static void sighandler(int sig __maybe_unused)
1989{
1990}
1991
1992static int check_lock_contention_options(const struct option *options,
1993					 const char * const *usage)
1994
1995{
1996	if (show_thread_stats && show_lock_addrs) {
1997		pr_err("Cannot use thread and addr mode together\n");
1998		parse_options_usage(usage, options, "threads", 0);
1999		parse_options_usage(NULL, options, "lock-addr", 0);
2000		return -1;
2001	}
2002
2003	if (show_lock_owner && !use_bpf) {
2004		pr_err("Lock owners are available only with BPF\n");
2005		parse_options_usage(usage, options, "lock-owner", 0);
2006		parse_options_usage(NULL, options, "use-bpf", 0);
2007		return -1;
2008	}
2009
2010	if (show_lock_owner && show_lock_addrs) {
2011		pr_err("Cannot use owner and addr mode together\n");
2012		parse_options_usage(usage, options, "lock-owner", 0);
2013		parse_options_usage(NULL, options, "lock-addr", 0);
2014		return -1;
2015	}
2016
2017	if (symbol_conf.field_sep) {
2018		if (strstr(symbol_conf.field_sep, ":") || /* part of type flags */
2019		    strstr(symbol_conf.field_sep, "+") || /* part of caller offset */
2020		    strstr(symbol_conf.field_sep, ".")) { /* can be in a symbol name */
2021			pr_err("Cannot use the separator that is already used\n");
2022			parse_options_usage(usage, options, "x", 1);
2023			return -1;
2024		}
2025	}
2026
2027	if (show_lock_owner)
2028		show_thread_stats = true;
2029
2030	return 0;
2031}
2032
2033static int __cmd_contention(int argc, const char **argv)
2034{
2035	int err = -EINVAL;
2036	struct perf_tool eops = {
2037		.attr		 = perf_event__process_attr,
2038		.event_update	 = process_event_update,
2039		.sample		 = process_sample_event,
2040		.comm		 = perf_event__process_comm,
2041		.mmap		 = perf_event__process_mmap,
2042		.tracing_data	 = perf_event__process_tracing_data,
2043		.ordered_events	 = true,
2044	};
2045	struct perf_data data = {
2046		.path  = input_name,
2047		.mode  = PERF_DATA_MODE_READ,
2048		.force = force,
2049	};
2050	struct lock_contention con = {
2051		.target = &target,
2052		.map_nr_entries = bpf_map_entries,
2053		.max_stack = max_stack_depth,
2054		.stack_skip = stack_skip,
2055		.filters = &filters,
2056		.save_callstack = needs_callstack(),
2057		.owner = show_lock_owner,
2058	};
2059
2060	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2061	if (!lockhash_table)
2062		return -ENOMEM;
2063
2064	con.result = &lockhash_table[0];
2065
2066	session = perf_session__new(use_bpf ? NULL : &data, &eops);
2067	if (IS_ERR(session)) {
2068		pr_err("Initializing perf session failed\n");
2069		err = PTR_ERR(session);
2070		session = NULL;
2071		goto out_delete;
2072	}
2073
2074	con.machine = &session->machines.host;
2075
2076	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2077		show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER;
2078
2079	if (con.aggr_mode == LOCK_AGGR_CALLER)
2080		con.save_callstack = true;
2081
2082	symbol_conf.allow_aliases = true;
2083	symbol__init(&session->header.env);
2084
2085	if (use_bpf) {
2086		err = target__validate(&target);
2087		if (err) {
2088			char errbuf[512];
2089
2090			target__strerror(&target, err, errbuf, 512);
2091			pr_err("%s\n", errbuf);
2092			goto out_delete;
2093		}
2094
2095		signal(SIGINT, sighandler);
2096		signal(SIGCHLD, sighandler);
2097		signal(SIGTERM, sighandler);
2098
2099		con.evlist = evlist__new();
2100		if (con.evlist == NULL) {
2101			err = -ENOMEM;
2102			goto out_delete;
2103		}
2104
2105		err = evlist__create_maps(con.evlist, &target);
2106		if (err < 0)
2107			goto out_delete;
2108
2109		if (argc) {
2110			err = evlist__prepare_workload(con.evlist, &target,
2111						       argv, false, NULL);
2112			if (err < 0)
2113				goto out_delete;
2114		}
2115
2116		if (lock_contention_prepare(&con) < 0) {
2117			pr_err("lock contention BPF setup failed\n");
2118			goto out_delete;
2119		}
2120	} else if (!data.is_pipe) {
2121		if (!perf_session__has_traces(session, "lock record"))
2122			goto out_delete;
2123
2124		if (!evlist__find_evsel_by_str(session->evlist,
2125					       "lock:contention_begin")) {
2126			pr_err("lock contention evsel not found\n");
2127			goto out_delete;
2128		}
2129
2130		if (perf_session__set_tracepoints_handlers(session,
2131						contention_tracepoints)) {
2132			pr_err("Initializing perf session tracepoint handlers failed\n");
2133			goto out_delete;
2134		}
2135	}
2136
2137	if (setup_output_field(true, output_fields))
2138		goto out_delete;
2139
2140	if (select_key(true))
2141		goto out_delete;
2142
2143	if (symbol_conf.field_sep) {
2144		int i;
2145		struct lock_key *keys = contention_keys;
2146
2147		/* do not align output in CSV format */
2148		for (i = 0; keys[i].name; i++)
2149			keys[i].len = 0;
2150	}
2151
2152	if (use_bpf) {
2153		lock_contention_start();
2154		if (argc)
2155			evlist__start_workload(con.evlist);
2156
2157		/* wait for signal */
2158		pause();
2159
2160		lock_contention_stop();
2161		lock_contention_read(&con);
2162	} else {
2163		err = perf_session__process_events(session);
2164		if (err)
2165			goto out_delete;
2166	}
2167
2168	setup_pager();
2169
2170	sort_contention_result();
2171	print_contention_result(&con);
2172
2173out_delete:
2174	lock_filter_finish();
2175	evlist__delete(con.evlist);
2176	lock_contention_finish();
2177	perf_session__delete(session);
2178	zfree(&lockhash_table);
2179	return err;
2180}
2181
2182
2183static int __cmd_record(int argc, const char **argv)
2184{
2185	const char *record_args[] = {
2186		"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2187	};
2188	const char *callgraph_args[] = {
2189		"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2190	};
2191	unsigned int rec_argc, i, j, ret;
2192	unsigned int nr_tracepoints;
2193	unsigned int nr_callgraph_args = 0;
2194	const char **rec_argv;
2195	bool has_lock_stat = true;
2196
2197	for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
2198		if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
2199			pr_debug("tracepoint %s is not enabled. "
2200				 "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2201				 lock_tracepoints[i].name);
2202			has_lock_stat = false;
2203			break;
2204		}
2205	}
2206
2207	if (has_lock_stat)
2208		goto setup_args;
2209
2210	for (i = 0; i < ARRAY_SIZE(contention_tracepoints); i++) {
2211		if (!is_valid_tracepoint(contention_tracepoints[i].name)) {
2212			pr_err("tracepoint %s is not enabled.\n",
2213			       contention_tracepoints[i].name);
2214			return 1;
2215		}
2216	}
2217
2218	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2219
2220setup_args:
2221	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - 1;
2222
2223	if (has_lock_stat)
2224		nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2225	else
2226		nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2227
2228	/* factor of 2 is for -e in front of each tracepoint */
2229	rec_argc += 2 * nr_tracepoints;
2230
2231	rec_argv = calloc(rec_argc + 1, sizeof(char *));
2232	if (!rec_argv)
2233		return -ENOMEM;
2234
2235	for (i = 0; i < ARRAY_SIZE(record_args); i++)
2236		rec_argv[i] = strdup(record_args[i]);
2237
2238	for (j = 0; j < nr_tracepoints; j++) {
2239		const char *ev_name;
2240
2241		if (has_lock_stat)
2242			ev_name = strdup(lock_tracepoints[j].name);
2243		else
2244			ev_name = strdup(contention_tracepoints[j].name);
2245
2246		if (!ev_name)
2247			return -ENOMEM;
2248
2249		rec_argv[i++] = "-e";
2250		rec_argv[i++] = ev_name;
2251	}
2252
2253	for (j = 0; j < nr_callgraph_args; j++, i++)
2254		rec_argv[i] = callgraph_args[j];
2255
2256	for (j = 1; j < (unsigned int)argc; j++, i++)
2257		rec_argv[i] = argv[j];
2258
2259	BUG_ON(i != rec_argc);
2260
2261	ret = cmd_record(i, rec_argv);
2262	free(rec_argv);
2263	return ret;
2264}
2265
2266static int parse_map_entry(const struct option *opt, const char *str,
2267			    int unset __maybe_unused)
2268{
2269	unsigned long *len = (unsigned long *)opt->value;
2270	unsigned long val;
2271	char *endptr;
2272
2273	errno = 0;
2274	val = strtoul(str, &endptr, 0);
2275	if (*endptr != '\0' || errno != 0) {
2276		pr_err("invalid BPF map length: %s\n", str);
2277		return -1;
2278	}
2279
2280	*len = val;
2281	return 0;
2282}
2283
2284static int parse_max_stack(const struct option *opt, const char *str,
2285			   int unset __maybe_unused)
2286{
2287	unsigned long *len = (unsigned long *)opt->value;
2288	long val;
2289	char *endptr;
2290
2291	errno = 0;
2292	val = strtol(str, &endptr, 0);
2293	if (*endptr != '\0' || errno != 0) {
2294		pr_err("invalid max stack depth: %s\n", str);
2295		return -1;
2296	}
2297
2298	if (val < 0 || val > sysctl__max_stack()) {
2299		pr_err("invalid max stack depth: %ld\n", val);
2300		return -1;
2301	}
2302
2303	*len = val;
2304	return 0;
2305}
2306
2307static bool add_lock_type(unsigned int flags)
2308{
2309	unsigned int *tmp;
2310
2311	tmp = realloc(filters.types, (filters.nr_types + 1) * sizeof(*filters.types));
2312	if (tmp == NULL)
2313		return false;
2314
2315	tmp[filters.nr_types++] = flags;
2316	filters.types = tmp;
2317	return true;
2318}
2319
2320static int parse_lock_type(const struct option *opt __maybe_unused, const char *str,
2321			   int unset __maybe_unused)
2322{
2323	char *s, *tmp, *tok;
2324	int ret = 0;
2325
2326	s = strdup(str);
2327	if (s == NULL)
2328		return -1;
2329
2330	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2331		unsigned int flags = get_type_flag(tok);
2332
2333		if (flags == -1U) {
2334			pr_err("Unknown lock flags: %s\n", tok);
2335			ret = -1;
2336			break;
2337		}
2338
2339		if (!add_lock_type(flags)) {
2340			ret = -1;
2341			break;
2342		}
2343	}
2344
2345	free(s);
2346	return ret;
2347}
2348
2349static bool add_lock_addr(unsigned long addr)
2350{
2351	unsigned long *tmp;
2352
2353	tmp = realloc(filters.addrs, (filters.nr_addrs + 1) * sizeof(*filters.addrs));
2354	if (tmp == NULL) {
2355		pr_err("Memory allocation failure\n");
2356		return false;
2357	}
2358
2359	tmp[filters.nr_addrs++] = addr;
2360	filters.addrs = tmp;
2361	return true;
2362}
2363
2364static bool add_lock_sym(char *name)
2365{
2366	char **tmp;
2367	char *sym = strdup(name);
2368
2369	if (sym == NULL) {
2370		pr_err("Memory allocation failure\n");
2371		return false;
2372	}
2373
2374	tmp = realloc(filters.syms, (filters.nr_syms + 1) * sizeof(*filters.syms));
2375	if (tmp == NULL) {
2376		pr_err("Memory allocation failure\n");
2377		free(sym);
2378		return false;
2379	}
2380
2381	tmp[filters.nr_syms++] = sym;
2382	filters.syms = tmp;
2383	return true;
2384}
2385
2386static int parse_lock_addr(const struct option *opt __maybe_unused, const char *str,
2387			   int unset __maybe_unused)
2388{
2389	char *s, *tmp, *tok;
2390	int ret = 0;
2391	u64 addr;
2392
2393	s = strdup(str);
2394	if (s == NULL)
2395		return -1;
2396
2397	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2398		char *end;
2399
2400		addr = strtoul(tok, &end, 16);
2401		if (*end == '\0') {
2402			if (!add_lock_addr(addr)) {
2403				ret = -1;
2404				break;
2405			}
2406			continue;
2407		}
2408
2409		/*
2410		 * At this moment, we don't have kernel symbols.  Save the symbols
2411		 * in a separate list and resolve them to addresses later.
2412		 */
2413		if (!add_lock_sym(tok)) {
2414			ret = -1;
2415			break;
2416		}
2417	}
2418
2419	free(s);
2420	return ret;
2421}
2422
2423static int parse_call_stack(const struct option *opt __maybe_unused, const char *str,
2424			   int unset __maybe_unused)
2425{
2426	char *s, *tmp, *tok;
2427	int ret = 0;
2428
2429	s = strdup(str);
2430	if (s == NULL)
2431		return -1;
2432
2433	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2434		struct callstack_filter *entry;
2435
2436		entry = malloc(sizeof(*entry) + strlen(tok) + 1);
2437		if (entry == NULL) {
2438			pr_err("Memory allocation failure\n");
2439			return -1;
2440		}
2441
2442		strcpy(entry->name, tok);
2443		list_add_tail(&entry->list, &callstack_filters);
2444	}
2445
2446	free(s);
2447	return ret;
2448}
2449
2450static int parse_output(const struct option *opt __maybe_unused, const char *str,
2451			int unset __maybe_unused)
2452{
2453	const char **name = (const char **)opt->value;
2454
2455	if (str == NULL)
2456		return -1;
2457
2458	lock_output = fopen(str, "w");
2459	if (lock_output == NULL) {
2460		pr_err("Cannot open %s\n", str);
2461		return -1;
2462	}
2463
2464	*name = str;
2465	return 0;
2466}
2467
2468int cmd_lock(int argc, const char **argv)
2469{
2470	const struct option lock_options[] = {
2471	OPT_STRING('i', "input", &input_name, "file", "input file name"),
2472	OPT_CALLBACK(0, "output", &output_name, "file", "output file name", parse_output),
2473	OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2474	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2475	OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
2476	OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
2477		   "file", "vmlinux pathname"),
2478	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
2479		   "file", "kallsyms pathname"),
2480	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
2481	OPT_END()
2482	};
2483
2484	const struct option info_options[] = {
2485	OPT_BOOLEAN('t', "threads", &info_threads,
2486		    "dump thread list in perf.data"),
2487	OPT_BOOLEAN('m', "map", &info_map,
2488		    "map of lock instances (address:name table)"),
2489	OPT_PARENT(lock_options)
2490	};
2491
2492	const struct option report_options[] = {
2493	OPT_STRING('k', "key", &sort_key, "acquired",
2494		    "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2495	OPT_STRING('F', "field", &output_fields, NULL,
2496		    "output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2497	/* TODO: type */
2498	OPT_BOOLEAN('c', "combine-locks", &combine_locks,
2499		    "combine locks in the same class"),
2500	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2501		    "show per-thread lock stats"),
2502	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2503	OPT_PARENT(lock_options)
2504	};
2505
2506	struct option contention_options[] = {
2507	OPT_STRING('k', "key", &sort_key, "wait_total",
2508		    "key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2509	OPT_STRING('F', "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2510		    "output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2511	OPT_BOOLEAN('t', "threads", &show_thread_stats,
2512		    "show per-thread lock stats"),
2513	OPT_BOOLEAN('b', "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2514	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
2515		    "System-wide collection from all CPUs"),
2516	OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
2517		    "List of cpus to monitor"),
2518	OPT_STRING('p', "pid", &target.pid, "pid",
2519		   "Trace on existing process id"),
2520	OPT_STRING(0, "tid", &target.tid, "tid",
2521		   "Trace on existing thread id (exclusive to --pid)"),
2522	OPT_CALLBACK('M', "map-nr-entries", &bpf_map_entries, "num",
2523		     "Max number of BPF map entries", parse_map_entry),
2524	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
2525		     "Set the maximum stack depth when collecting lock contention, "
2526		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2527	OPT_INTEGER(0, "stack-skip", &stack_skip,
2528		    "Set the number of stack depth to skip when finding a lock caller, "
2529		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
2530	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
2531	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
2532	OPT_CALLBACK('Y', "type-filter", NULL, "FLAGS",
2533		     "Filter specific type of locks", parse_lock_type),
2534	OPT_CALLBACK('L', "lock-filter", NULL, "ADDRS/NAMES",
2535		     "Filter specific address/symbol of locks", parse_lock_addr),
2536	OPT_CALLBACK('S', "callstack-filter", NULL, "NAMES",
2537		     "Filter specific function in the callstack", parse_call_stack),
2538	OPT_BOOLEAN('o', "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2539	OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator",
2540		   "print result in CSV format with custom separator"),
2541	OPT_PARENT(lock_options)
2542	};
2543
2544	const char * const info_usage[] = {
2545		"perf lock info [<options>]",
2546		NULL
2547	};
2548	const char *const lock_subcommands[] = { "record", "report", "script",
2549						 "info", "contention", NULL };
2550	const char *lock_usage[] = {
2551		NULL,
2552		NULL
2553	};
2554	const char * const report_usage[] = {
2555		"perf lock report [<options>]",
2556		NULL
2557	};
2558	const char * const contention_usage[] = {
2559		"perf lock contention [<options>]",
2560		NULL
2561	};
2562	unsigned int i;
2563	int rc = 0;
2564
2565	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2566	if (!lockhash_table)
2567		return -ENOMEM;
2568
2569	for (i = 0; i < LOCKHASH_SIZE; i++)
2570		INIT_HLIST_HEAD(lockhash_table + i);
2571
2572	lock_output = stderr;
2573	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2574					lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2575	if (!argc)
2576		usage_with_options(lock_usage, lock_options);
2577
2578	if (strlen(argv[0]) > 2 && strstarts("record", argv[0])) {
2579		return __cmd_record(argc, argv);
2580	} else if (strlen(argv[0]) > 2 && strstarts("report", argv[0])) {
2581		trace_handler = &report_lock_ops;
2582		if (argc) {
2583			argc = parse_options(argc, argv,
2584					     report_options, report_usage, 0);
2585			if (argc)
2586				usage_with_options(report_usage, report_options);
2587		}
2588		rc = __cmd_report(false);
2589	} else if (!strcmp(argv[0], "script")) {
2590		/* Aliased to 'perf script' */
2591		rc = cmd_script(argc, argv);
2592	} else if (!strcmp(argv[0], "info")) {
2593		if (argc) {
2594			argc = parse_options(argc, argv,
2595					     info_options, info_usage, 0);
2596			if (argc)
2597				usage_with_options(info_usage, info_options);
2598		}
2599		/* recycling report_lock_ops */
2600		trace_handler = &report_lock_ops;
2601		rc = __cmd_report(true);
2602	} else if (strlen(argv[0]) > 2 && strstarts("contention", argv[0])) {
2603		trace_handler = &contention_lock_ops;
2604		sort_key = "wait_total";
2605		output_fields = "contended,wait_total,wait_max,avg_wait";
2606
2607#ifndef HAVE_BPF_SKEL
2608		set_option_nobuild(contention_options, 'b', "use-bpf",
2609				   "no BUILD_BPF_SKEL=1", false);
2610#endif
2611		if (argc) {
2612			argc = parse_options(argc, argv, contention_options,
2613					     contention_usage, 0);
2614		}
2615
2616		if (check_lock_contention_options(contention_options,
2617						  contention_usage) < 0)
2618			return -1;
2619
2620		rc = __cmd_contention(argc, argv);
2621	} else {
2622		usage_with_options(lock_usage, lock_options);
2623	}
2624
2625	zfree(&lockhash_table);
2626	return rc;
2627}
2628