1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3/*
4 * resolve_btfids scans Elf object for .BTF_ids section and resolves
5 * its symbols with BTF ID values.
6 *
7 * Each symbol points to 4 bytes data and is expected to have
8 * following name syntax:
9 *
10 * __BTF_ID__<type>__<symbol>[__<id>]
11 *
12 * type is:
13 *
14 *   func    - lookup BTF_KIND_FUNC symbol with <symbol> name
15 *             and store its ID into the data:
16 *
17 *             __BTF_ID__func__vfs_close__1:
18 *             .zero 4
19 *
20 *   struct  - lookup BTF_KIND_STRUCT symbol with <symbol> name
21 *             and store its ID into the data:
22 *
23 *             __BTF_ID__struct__sk_buff__1:
24 *             .zero 4
25 *
26 *   union   - lookup BTF_KIND_UNION symbol with <symbol> name
27 *             and store its ID into the data:
28 *
29 *             __BTF_ID__union__thread_union__1:
30 *             .zero 4
31 *
32 *   typedef - lookup BTF_KIND_TYPEDEF symbol with <symbol> name
33 *             and store its ID into the data:
34 *
35 *             __BTF_ID__typedef__pid_t__1:
36 *             .zero 4
37 *
38 *   set     - store symbol size into first 4 bytes and sort following
39 *             ID list
40 *
41 *             __BTF_ID__set__list:
42 *             .zero 4
43 *             list:
44 *             __BTF_ID__func__vfs_getattr__3:
45 *             .zero 4
46 *             __BTF_ID__func__vfs_fallocate__4:
47 *             .zero 4
48 */
49
50#define  _GNU_SOURCE
51#include <stdio.h>
52#include <string.h>
53#include <unistd.h>
54#include <stdlib.h>
55#include <libelf.h>
56#include <gelf.h>
57#include <sys/stat.h>
58#include <fcntl.h>
59#include <errno.h>
60#include <linux/rbtree.h>
61#include <linux/zalloc.h>
62#include <linux/err.h>
63#include <btf.h>
64#include <libbpf.h>
65#include <parse-options.h>
66
67#define BTF_IDS_SECTION	".BTF_ids"
68#define BTF_ID		"__BTF_ID__"
69
70#define BTF_STRUCT	"struct"
71#define BTF_UNION	"union"
72#define BTF_TYPEDEF	"typedef"
73#define BTF_FUNC	"func"
74#define BTF_SET		"set"
75
76#define ADDR_CNT	100
77
78struct btf_id {
79	struct rb_node	 rb_node;
80	char		*name;
81	union {
82		int	 id;
83		int	 cnt;
84	};
85	int		 addr_cnt;
86	Elf64_Addr	 addr[ADDR_CNT];
87};
88
89struct object {
90	const char *path;
91	const char *btf;
92
93	struct {
94		int		 fd;
95		Elf		*elf;
96		Elf_Data	*symbols;
97		Elf_Data	*idlist;
98		int		 symbols_shndx;
99		int		 idlist_shndx;
100		size_t		 strtabidx;
101		unsigned long	 idlist_addr;
102	} efile;
103
104	struct rb_root	sets;
105	struct rb_root	structs;
106	struct rb_root	unions;
107	struct rb_root	typedefs;
108	struct rb_root	funcs;
109
110	int nr_funcs;
111	int nr_structs;
112	int nr_unions;
113	int nr_typedefs;
114};
115
116static int verbose;
117
118int eprintf(int level, int var, const char *fmt, ...)
119{
120	va_list args;
121	int ret;
122
123	if (var >= level) {
124		va_start(args, fmt);
125		ret = vfprintf(stderr, fmt, args);
126		va_end(args);
127	}
128	return ret;
129}
130
131#ifndef pr_fmt
132#define pr_fmt(fmt) fmt
133#endif
134
135#define pr_debug(fmt, ...) \
136	eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
137#define pr_debugN(n, fmt, ...) \
138	eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
139#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
140#define pr_err(fmt, ...) \
141	eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
142
143static bool is_btf_id(const char *name)
144{
145	return name && !strncmp(name, BTF_ID, sizeof(BTF_ID) - 1);
146}
147
148static struct btf_id *btf_id__find(struct rb_root *root, const char *name)
149{
150	struct rb_node *p = root->rb_node;
151	struct btf_id *id;
152	int cmp;
153
154	while (p) {
155		id = rb_entry(p, struct btf_id, rb_node);
156		cmp = strcmp(id->name, name);
157		if (cmp < 0)
158			p = p->rb_left;
159		else if (cmp > 0)
160			p = p->rb_right;
161		else
162			return id;
163	}
164	return NULL;
165}
166
167static struct btf_id*
168btf_id__add(struct rb_root *root, char *name, bool unique)
169{
170	struct rb_node **p = &root->rb_node;
171	struct rb_node *parent = NULL;
172	struct btf_id *id;
173	int cmp;
174
175	while (*p != NULL) {
176		parent = *p;
177		id = rb_entry(parent, struct btf_id, rb_node);
178		cmp = strcmp(id->name, name);
179		if (cmp < 0)
180			p = &(*p)->rb_left;
181		else if (cmp > 0)
182			p = &(*p)->rb_right;
183		else
184			return unique ? NULL : id;
185	}
186
187	id = zalloc(sizeof(*id));
188	if (id) {
189		pr_debug("adding symbol %s\n", name);
190		id->name = name;
191		rb_link_node(&id->rb_node, parent, p);
192		rb_insert_color(&id->rb_node, root);
193	}
194	return id;
195}
196
197static char *get_id(const char *prefix_end)
198{
199	/*
200	 * __BTF_ID__func__vfs_truncate__0
201	 * prefix_end =  ^
202	 * pos        =    ^
203	 */
204	int len = strlen(prefix_end);
205	int pos = sizeof("__") - 1;
206	char *p, *id;
207
208	if (pos >= len)
209		return NULL;
210
211	id = strdup(prefix_end + pos);
212	if (id) {
213		/*
214		 * __BTF_ID__func__vfs_truncate__0
215		 * id =            ^
216		 *
217		 * cut the unique id part
218		 */
219		p = strrchr(id, '_');
220		p--;
221		if (*p != '_') {
222			free(id);
223			return NULL;
224		}
225		*p = '\0';
226	}
227	return id;
228}
229
230static struct btf_id *add_set(struct object *obj, char *name)
231{
232	/*
233	 * __BTF_ID__set__name
234	 * name =    ^
235	 * id   =         ^
236	 */
237	char *id = name + sizeof(BTF_SET "__") - 1;
238	int len = strlen(name);
239
240	if (id >= name + len) {
241		pr_err("FAILED to parse set name: %s\n", name);
242		return NULL;
243	}
244
245	return btf_id__add(&obj->sets, id, true);
246}
247
248static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
249{
250	char *id;
251
252	id = get_id(name + size);
253	if (!id) {
254		pr_err("FAILED to parse symbol name: %s\n", name);
255		return NULL;
256	}
257
258	return btf_id__add(root, id, false);
259}
260
261/* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */
262#ifndef SHF_COMPRESSED
263#define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */
264#endif
265
266/*
267 * The data of compressed section should be aligned to 4
268 * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
269 * sets sh_addralign to 1, which makes libelf fail with
270 * misaligned section error during the update:
271 *    FAILED elf_update(WRITE): invalid section alignment
272 *
273 * While waiting for ld fix, we fix the compressed sections
274 * sh_addralign value manualy.
275 */
276static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
277{
278	int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8;
279
280	if (!(sh->sh_flags & SHF_COMPRESSED))
281		return 0;
282
283	if (sh->sh_addralign == expected)
284		return 0;
285
286	pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n",
287		  sh->sh_addralign, expected);
288
289	sh->sh_addralign = expected;
290
291	if (gelf_update_shdr(scn, sh) == 0) {
292		printf("FAILED cannot update section header: %s\n",
293			elf_errmsg(-1));
294		return -1;
295	}
296	return 0;
297}
298
299static int elf_collect(struct object *obj)
300{
301	Elf_Scn *scn = NULL;
302	size_t shdrstrndx;
303	int idx = 0;
304	Elf *elf;
305	int fd;
306
307	fd = open(obj->path, O_RDWR, 0666);
308	if (fd == -1) {
309		pr_err("FAILED cannot open %s: %s\n",
310			obj->path, strerror(errno));
311		return -1;
312	}
313
314	elf_version(EV_CURRENT);
315
316	elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL);
317	if (!elf) {
318		pr_err("FAILED cannot create ELF descriptor: %s\n",
319			elf_errmsg(-1));
320		return -1;
321	}
322
323	obj->efile.fd  = fd;
324	obj->efile.elf = elf;
325
326	elf_flagelf(elf, ELF_C_SET, ELF_F_LAYOUT);
327
328	if (elf_getshdrstrndx(elf, &shdrstrndx) != 0) {
329		pr_err("FAILED cannot get shdr str ndx\n");
330		return -1;
331	}
332
333	/*
334	 * Scan all the elf sections and look for save data
335	 * from .BTF_ids section and symbols.
336	 */
337	while ((scn = elf_nextscn(elf, scn)) != NULL) {
338		Elf_Data *data;
339		GElf_Shdr sh;
340		char *name;
341
342		idx++;
343		if (gelf_getshdr(scn, &sh) != &sh) {
344			pr_err("FAILED get section(%d) header\n", idx);
345			return -1;
346		}
347
348		name = elf_strptr(elf, shdrstrndx, sh.sh_name);
349		if (!name) {
350			pr_err("FAILED get section(%d) name\n", idx);
351			return -1;
352		}
353
354		data = elf_getdata(scn, 0);
355		if (!data) {
356			pr_err("FAILED to get section(%d) data from %s\n",
357				idx, name);
358			return -1;
359		}
360
361		pr_debug2("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
362			  idx, name, (unsigned long) data->d_size,
363			  (int) sh.sh_link, (unsigned long) sh.sh_flags,
364			  (int) sh.sh_type);
365
366		if (sh.sh_type == SHT_SYMTAB) {
367			obj->efile.symbols       = data;
368			obj->efile.symbols_shndx = idx;
369			obj->efile.strtabidx     = sh.sh_link;
370		} else if (!strcmp(name, BTF_IDS_SECTION)) {
371			obj->efile.idlist       = data;
372			obj->efile.idlist_shndx = idx;
373			obj->efile.idlist_addr  = sh.sh_addr;
374		}
375
376		if (compressed_section_fix(elf, scn, &sh))
377			return -1;
378	}
379
380	return 0;
381}
382
383static int symbols_collect(struct object *obj)
384{
385	Elf_Scn *scn = NULL;
386	int n, i, err = 0;
387	GElf_Shdr sh;
388	char *name;
389
390	scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
391	if (!scn)
392		return -1;
393
394	if (gelf_getshdr(scn, &sh) != &sh)
395		return -1;
396
397	n = sh.sh_size / sh.sh_entsize;
398
399	/*
400	 * Scan symbols and look for the ones starting with
401	 * __BTF_ID__* over .BTF_ids section.
402	 */
403	for (i = 0; !err && i < n; i++) {
404		char *tmp, *prefix;
405		struct btf_id *id;
406		GElf_Sym sym;
407		int err = -1;
408
409		if (!gelf_getsym(obj->efile.symbols, i, &sym))
410			return -1;
411
412		if (sym.st_shndx != obj->efile.idlist_shndx)
413			continue;
414
415		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
416				  sym.st_name);
417
418		if (!is_btf_id(name))
419			continue;
420
421		/*
422		 * __BTF_ID__TYPE__vfs_truncate__0
423		 * prefix =  ^
424		 */
425		prefix = name + sizeof(BTF_ID) - 1;
426
427		/* struct */
428		if (!strncmp(prefix, BTF_STRUCT, sizeof(BTF_STRUCT) - 1)) {
429			obj->nr_structs++;
430			id = add_symbol(&obj->structs, prefix, sizeof(BTF_STRUCT) - 1);
431		/* union  */
432		} else if (!strncmp(prefix, BTF_UNION, sizeof(BTF_UNION) - 1)) {
433			obj->nr_unions++;
434			id = add_symbol(&obj->unions, prefix, sizeof(BTF_UNION) - 1);
435		/* typedef */
436		} else if (!strncmp(prefix, BTF_TYPEDEF, sizeof(BTF_TYPEDEF) - 1)) {
437			obj->nr_typedefs++;
438			id = add_symbol(&obj->typedefs, prefix, sizeof(BTF_TYPEDEF) - 1);
439		/* func */
440		} else if (!strncmp(prefix, BTF_FUNC, sizeof(BTF_FUNC) - 1)) {
441			obj->nr_funcs++;
442			id = add_symbol(&obj->funcs, prefix, sizeof(BTF_FUNC) - 1);
443		/* set */
444		} else if (!strncmp(prefix, BTF_SET, sizeof(BTF_SET) - 1)) {
445			id = add_set(obj, prefix);
446			/*
447			 * SET objects store list's count, which is encoded
448			 * in symbol's size, together with 'cnt' field hence
449			 * that - 1.
450			 */
451			if (id)
452				id->cnt = sym.st_size / sizeof(int) - 1;
453		} else {
454			pr_err("FAILED unsupported prefix %s\n", prefix);
455			return -1;
456		}
457
458		if (!id)
459			return -ENOMEM;
460
461		if (id->addr_cnt >= ADDR_CNT) {
462			pr_err("FAILED symbol %s crossed the number of allowed lists",
463				id->name);
464			return -1;
465		}
466		id->addr[id->addr_cnt++] = sym.st_value;
467	}
468
469	return 0;
470}
471
472static int symbols_resolve(struct object *obj)
473{
474	int nr_typedefs = obj->nr_typedefs;
475	int nr_structs  = obj->nr_structs;
476	int nr_unions   = obj->nr_unions;
477	int nr_funcs    = obj->nr_funcs;
478	int err, type_id;
479	struct btf *btf;
480	__u32 nr;
481
482	btf = btf__parse(obj->btf ?: obj->path, NULL);
483	err = libbpf_get_error(btf);
484	if (err) {
485		pr_err("FAILED: load BTF from %s: %s",
486			obj->path, strerror(err));
487		return -1;
488	}
489
490	err = -1;
491	nr  = btf__get_nr_types(btf);
492
493	/*
494	 * Iterate all the BTF types and search for collected symbol IDs.
495	 */
496	for (type_id = 1; type_id <= nr; type_id++) {
497		const struct btf_type *type;
498		struct rb_root *root;
499		struct btf_id *id;
500		const char *str;
501		int *nr;
502
503		type = btf__type_by_id(btf, type_id);
504		if (!type) {
505			pr_err("FAILED: malformed BTF, can't resolve type for ID %d\n",
506				type_id);
507			goto out;
508		}
509
510		if (btf_is_func(type) && nr_funcs) {
511			nr   = &nr_funcs;
512			root = &obj->funcs;
513		} else if (btf_is_struct(type) && nr_structs) {
514			nr   = &nr_structs;
515			root = &obj->structs;
516		} else if (btf_is_union(type) && nr_unions) {
517			nr   = &nr_unions;
518			root = &obj->unions;
519		} else if (btf_is_typedef(type) && nr_typedefs) {
520			nr   = &nr_typedefs;
521			root = &obj->typedefs;
522		} else
523			continue;
524
525		str = btf__name_by_offset(btf, type->name_off);
526		if (!str) {
527			pr_err("FAILED: malformed BTF, can't resolve name for ID %d\n",
528				type_id);
529			goto out;
530		}
531
532		id = btf_id__find(root, str);
533		if (id) {
534			id->id = type_id;
535			(*nr)--;
536		}
537	}
538
539	err = 0;
540out:
541	btf__free(btf);
542	return err;
543}
544
545static int id_patch(struct object *obj, struct btf_id *id)
546{
547	Elf_Data *data = obj->efile.idlist;
548	int *ptr = data->d_buf;
549	int i;
550
551	if (!id->id) {
552		pr_err("FAILED unresolved symbol %s\n", id->name);
553		return -EINVAL;
554	}
555
556	for (i = 0; i < id->addr_cnt; i++) {
557		unsigned long addr = id->addr[i];
558		unsigned long idx = addr - obj->efile.idlist_addr;
559
560		pr_debug("patching addr %5lu: ID %7d [%s]\n",
561			 idx, id->id, id->name);
562
563		if (idx >= data->d_size) {
564			pr_err("FAILED patching index %lu out of bounds %lu\n",
565				idx, data->d_size);
566			return -1;
567		}
568
569		idx = idx / sizeof(int);
570		ptr[idx] = id->id;
571	}
572
573	return 0;
574}
575
576static int __symbols_patch(struct object *obj, struct rb_root *root)
577{
578	struct rb_node *next;
579	struct btf_id *id;
580
581	next = rb_first(root);
582	while (next) {
583		id = rb_entry(next, struct btf_id, rb_node);
584
585		if (id_patch(obj, id))
586			return -1;
587
588		next = rb_next(next);
589	}
590	return 0;
591}
592
593static int cmp_id(const void *pa, const void *pb)
594{
595	const int *a = pa, *b = pb;
596
597	return *a - *b;
598}
599
600static int sets_patch(struct object *obj)
601{
602	Elf_Data *data = obj->efile.idlist;
603	int *ptr = data->d_buf;
604	struct rb_node *next;
605
606	next = rb_first(&obj->sets);
607	while (next) {
608		unsigned long addr, idx;
609		struct btf_id *id;
610		int *base;
611		int cnt;
612
613		id   = rb_entry(next, struct btf_id, rb_node);
614		addr = id->addr[0];
615		idx  = addr - obj->efile.idlist_addr;
616
617		/* sets are unique */
618		if (id->addr_cnt != 1) {
619			pr_err("FAILED malformed data for set '%s'\n",
620				id->name);
621			return -1;
622		}
623
624		idx = idx / sizeof(int);
625		base = &ptr[idx] + 1;
626		cnt = ptr[idx];
627
628		pr_debug("sorting  addr %5lu: cnt %6d [%s]\n",
629			 (idx + 1) * sizeof(int), cnt, id->name);
630
631		qsort(base, cnt, sizeof(int), cmp_id);
632
633		next = rb_next(next);
634	}
635	return 0;
636}
637
638static int symbols_patch(struct object *obj)
639{
640	int err;
641
642	if (__symbols_patch(obj, &obj->structs)  ||
643	    __symbols_patch(obj, &obj->unions)   ||
644	    __symbols_patch(obj, &obj->typedefs) ||
645	    __symbols_patch(obj, &obj->funcs)    ||
646	    __symbols_patch(obj, &obj->sets))
647		return -1;
648
649	if (sets_patch(obj))
650		return -1;
651
652	/* Set type to ensure endian translation occurs. */
653	obj->efile.idlist->d_type = ELF_T_WORD;
654
655	elf_flagdata(obj->efile.idlist, ELF_C_SET, ELF_F_DIRTY);
656
657	err = elf_update(obj->efile.elf, ELF_C_WRITE);
658	if (err < 0) {
659		pr_err("FAILED elf_update(WRITE): %s\n",
660			elf_errmsg(-1));
661	}
662
663	pr_debug("update %s for %s\n",
664		 err >= 0 ? "ok" : "failed", obj->path);
665	return err < 0 ? -1 : 0;
666}
667
668static const char * const resolve_btfids_usage[] = {
669	"resolve_btfids [<options>] <ELF object>",
670	NULL
671};
672
673int main(int argc, const char **argv)
674{
675	bool no_fail = false;
676	struct object obj = {
677		.efile = {
678			.idlist_shndx  = -1,
679			.symbols_shndx = -1,
680		},
681		.structs  = RB_ROOT,
682		.unions   = RB_ROOT,
683		.typedefs = RB_ROOT,
684		.funcs    = RB_ROOT,
685		.sets     = RB_ROOT,
686	};
687	struct option btfid_options[] = {
688		OPT_INCR('v', "verbose", &verbose,
689			 "be more verbose (show errors, etc)"),
690		OPT_STRING(0, "btf", &obj.btf, "BTF data",
691			   "BTF data"),
692		OPT_BOOLEAN(0, "no-fail", &no_fail,
693			   "do not fail if " BTF_IDS_SECTION " section is not found"),
694		OPT_END()
695	};
696	int err = -1;
697
698	argc = parse_options(argc, argv, btfid_options, resolve_btfids_usage,
699			     PARSE_OPT_STOP_AT_NON_OPTION);
700	if (argc != 1)
701		usage_with_options(resolve_btfids_usage, btfid_options);
702
703	obj.path = argv[0];
704
705	if (elf_collect(&obj))
706		goto out;
707
708	/*
709	 * We did not find .BTF_ids section or symbols section,
710	 * nothing to do..
711	 */
712	if (obj.efile.idlist_shndx == -1 ||
713	    obj.efile.symbols_shndx == -1) {
714		if (no_fail)
715			return 0;
716		pr_err("FAILED to find needed sections\n");
717		return -1;
718	}
719
720	if (symbols_collect(&obj))
721		goto out;
722
723	if (symbols_resolve(&obj))
724		goto out;
725
726	if (symbols_patch(&obj))
727		goto out;
728
729	err = 0;
730out:
731	if (obj.efile.elf)
732		elf_end(obj.efile.elf);
733	close(obj.efile.fd);
734	return err;
735}
736