xref: /kernel/linux/linux-6.6/tools/lib/bpf/elf.c (revision 62306a36)
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3#include <libelf.h>
4#include <gelf.h>
5#include <fcntl.h>
6#include <linux/kernel.h>
7
8#include "libbpf_internal.h"
9#include "str_error.h"
10
11#define STRERR_BUFSIZE  128
12
13int elf_open(const char *binary_path, struct elf_fd *elf_fd)
14{
15	char errmsg[STRERR_BUFSIZE];
16	int fd, ret;
17	Elf *elf;
18
19	if (elf_version(EV_CURRENT) == EV_NONE) {
20		pr_warn("elf: failed to init libelf for %s\n", binary_path);
21		return -LIBBPF_ERRNO__LIBELF;
22	}
23	fd = open(binary_path, O_RDONLY | O_CLOEXEC);
24	if (fd < 0) {
25		ret = -errno;
26		pr_warn("elf: failed to open %s: %s\n", binary_path,
27			libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
28		return ret;
29	}
30	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
31	if (!elf) {
32		pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
33		close(fd);
34		return -LIBBPF_ERRNO__FORMAT;
35	}
36	elf_fd->fd = fd;
37	elf_fd->elf = elf;
38	return 0;
39}
40
41void elf_close(struct elf_fd *elf_fd)
42{
43	if (!elf_fd)
44		return;
45	elf_end(elf_fd->elf);
46	close(elf_fd->fd);
47}
48
49/* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
50static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
51{
52	while ((scn = elf_nextscn(elf, scn)) != NULL) {
53		GElf_Shdr sh;
54
55		if (!gelf_getshdr(scn, &sh))
56			continue;
57		if (sh.sh_type == sh_type)
58			return scn;
59	}
60	return NULL;
61}
62
63struct elf_sym {
64	const char *name;
65	GElf_Sym sym;
66	GElf_Shdr sh;
67};
68
69struct elf_sym_iter {
70	Elf *elf;
71	Elf_Data *syms;
72	size_t nr_syms;
73	size_t strtabidx;
74	size_t next_sym_idx;
75	struct elf_sym sym;
76	int st_type;
77};
78
79static int elf_sym_iter_new(struct elf_sym_iter *iter,
80			    Elf *elf, const char *binary_path,
81			    int sh_type, int st_type)
82{
83	Elf_Scn *scn = NULL;
84	GElf_Ehdr ehdr;
85	GElf_Shdr sh;
86
87	memset(iter, 0, sizeof(*iter));
88
89	if (!gelf_getehdr(elf, &ehdr)) {
90		pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
91		return -EINVAL;
92	}
93
94	scn = elf_find_next_scn_by_type(elf, sh_type, NULL);
95	if (!scn) {
96		pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
97			 binary_path);
98		return -ENOENT;
99	}
100
101	if (!gelf_getshdr(scn, &sh))
102		return -EINVAL;
103
104	iter->strtabidx = sh.sh_link;
105	iter->syms = elf_getdata(scn, 0);
106	if (!iter->syms) {
107		pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
108			binary_path, elf_errmsg(-1));
109		return -EINVAL;
110	}
111	iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
112	iter->elf = elf;
113	iter->st_type = st_type;
114	return 0;
115}
116
117static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
118{
119	struct elf_sym *ret = &iter->sym;
120	GElf_Sym *sym = &ret->sym;
121	const char *name = NULL;
122	Elf_Scn *sym_scn;
123	size_t idx;
124
125	for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
126		if (!gelf_getsym(iter->syms, idx, sym))
127			continue;
128		if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
129			continue;
130		name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
131		if (!name)
132			continue;
133		sym_scn = elf_getscn(iter->elf, sym->st_shndx);
134		if (!sym_scn)
135			continue;
136		if (!gelf_getshdr(sym_scn, &ret->sh))
137			continue;
138
139		iter->next_sym_idx = idx + 1;
140		ret->name = name;
141		return ret;
142	}
143
144	return NULL;
145}
146
147
148/* Transform symbol's virtual address (absolute for binaries and relative
149 * for shared libs) into file offset, which is what kernel is expecting
150 * for uprobe/uretprobe attachment.
151 * See Documentation/trace/uprobetracer.rst for more details. This is done
152 * by looking up symbol's containing section's header and using iter's virtual
153 * address (sh_addr) and corresponding file offset (sh_offset) to transform
154 * sym.st_value (virtual address) into desired final file offset.
155 */
156static unsigned long elf_sym_offset(struct elf_sym *sym)
157{
158	return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
159}
160
161/* Find offset of function name in the provided ELF object. "binary_path" is
162 * the path to the ELF binary represented by "elf", and only used for error
163 * reporting matters. "name" matches symbol name or name@@LIB for library
164 * functions.
165 */
166long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
167{
168	int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
169	bool is_shared_lib, is_name_qualified;
170	long ret = -ENOENT;
171	size_t name_len;
172	GElf_Ehdr ehdr;
173
174	if (!gelf_getehdr(elf, &ehdr)) {
175		pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
176		ret = -LIBBPF_ERRNO__FORMAT;
177		goto out;
178	}
179	/* for shared lib case, we do not need to calculate relative offset */
180	is_shared_lib = ehdr.e_type == ET_DYN;
181
182	name_len = strlen(name);
183	/* Does name specify "@@LIB"? */
184	is_name_qualified = strstr(name, "@@") != NULL;
185
186	/* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
187	 * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
188	 * linked binary may not have SHT_DYMSYM, so absence of a section should not be
189	 * reported as a warning/error.
190	 */
191	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
192		struct elf_sym_iter iter;
193		struct elf_sym *sym;
194		int last_bind = -1;
195		int cur_bind;
196
197		ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
198		if (ret == -ENOENT)
199			continue;
200		if (ret)
201			goto out;
202
203		while ((sym = elf_sym_iter_next(&iter))) {
204			/* User can specify func, func@@LIB or func@@LIB_VERSION. */
205			if (strncmp(sym->name, name, name_len) != 0)
206				continue;
207			/* ...but we don't want a search for "foo" to match 'foo2" also, so any
208			 * additional characters in sname should be of the form "@@LIB".
209			 */
210			if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@')
211				continue;
212
213			cur_bind = GELF_ST_BIND(sym->sym.st_info);
214
215			if (ret > 0) {
216				/* handle multiple matches */
217				if (last_bind != STB_WEAK && cur_bind != STB_WEAK) {
218					/* Only accept one non-weak bind. */
219					pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
220						sym->name, name, binary_path);
221					ret = -LIBBPF_ERRNO__FORMAT;
222					goto out;
223				} else if (cur_bind == STB_WEAK) {
224					/* already have a non-weak bind, and
225					 * this is a weak bind, so ignore.
226					 */
227					continue;
228				}
229			}
230
231			ret = elf_sym_offset(sym);
232			last_bind = cur_bind;
233		}
234		if (ret > 0)
235			break;
236	}
237
238	if (ret > 0) {
239		pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
240			 ret);
241	} else {
242		if (ret == 0) {
243			pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
244				is_shared_lib ? "should not be 0 in a shared library" :
245						"try using shared library path instead");
246			ret = -ENOENT;
247		} else {
248			pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
249		}
250	}
251out:
252	return ret;
253}
254
255/* Find offset of function name in ELF object specified by path. "name" matches
256 * symbol name or name@@LIB for library functions.
257 */
258long elf_find_func_offset_from_file(const char *binary_path, const char *name)
259{
260	struct elf_fd elf_fd;
261	long ret = -ENOENT;
262
263	ret = elf_open(binary_path, &elf_fd);
264	if (ret)
265		return ret;
266	ret = elf_find_func_offset(elf_fd.elf, binary_path, name);
267	elf_close(&elf_fd);
268	return ret;
269}
270
271struct symbol {
272	const char *name;
273	int bind;
274	int idx;
275};
276
277static int symbol_cmp(const void *a, const void *b)
278{
279	const struct symbol *sym_a = a;
280	const struct symbol *sym_b = b;
281
282	return strcmp(sym_a->name, sym_b->name);
283}
284
285/*
286 * Return offsets in @poffsets for symbols specified in @syms array argument.
287 * On success returns 0 and offsets are returned in allocated array with @cnt
288 * size, that needs to be released by the caller.
289 */
290int elf_resolve_syms_offsets(const char *binary_path, int cnt,
291			     const char **syms, unsigned long **poffsets)
292{
293	int sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
294	int err = 0, i, cnt_done = 0;
295	unsigned long *offsets;
296	struct symbol *symbols;
297	struct elf_fd elf_fd;
298
299	err = elf_open(binary_path, &elf_fd);
300	if (err)
301		return err;
302
303	offsets = calloc(cnt, sizeof(*offsets));
304	symbols = calloc(cnt, sizeof(*symbols));
305
306	if (!offsets || !symbols) {
307		err = -ENOMEM;
308		goto out;
309	}
310
311	for (i = 0; i < cnt; i++) {
312		symbols[i].name = syms[i];
313		symbols[i].idx = i;
314	}
315
316	qsort(symbols, cnt, sizeof(*symbols), symbol_cmp);
317
318	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
319		struct elf_sym_iter iter;
320		struct elf_sym *sym;
321
322		err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC);
323		if (err == -ENOENT)
324			continue;
325		if (err)
326			goto out;
327
328		while ((sym = elf_sym_iter_next(&iter))) {
329			unsigned long sym_offset = elf_sym_offset(sym);
330			int bind = GELF_ST_BIND(sym->sym.st_info);
331			struct symbol *found, tmp = {
332				.name = sym->name,
333			};
334			unsigned long *offset;
335
336			found = bsearch(&tmp, symbols, cnt, sizeof(*symbols), symbol_cmp);
337			if (!found)
338				continue;
339
340			offset = &offsets[found->idx];
341			if (*offset > 0) {
342				/* same offset, no problem */
343				if (*offset == sym_offset)
344					continue;
345				/* handle multiple matches */
346				if (found->bind != STB_WEAK && bind != STB_WEAK) {
347					/* Only accept one non-weak bind. */
348					pr_warn("elf: ambiguous match found '%s@%lu' in '%s' previous offset %lu\n",
349						sym->name, sym_offset, binary_path, *offset);
350					err = -ESRCH;
351					goto out;
352				} else if (bind == STB_WEAK) {
353					/* already have a non-weak bind, and
354					 * this is a weak bind, so ignore.
355					 */
356					continue;
357				}
358			} else {
359				cnt_done++;
360			}
361			*offset = sym_offset;
362			found->bind = bind;
363		}
364	}
365
366	if (cnt != cnt_done) {
367		err = -ENOENT;
368		goto out;
369	}
370
371	*poffsets = offsets;
372
373out:
374	free(symbols);
375	if (err)
376		free(offsets);
377	elf_close(&elf_fd);
378	return err;
379}
380
381/*
382 * Return offsets in @poffsets for symbols specified by @pattern argument.
383 * On success returns 0 and offsets are returned in allocated @poffsets
384 * array with the @pctn size, that needs to be released by the caller.
385 */
386int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
387				unsigned long **poffsets, size_t *pcnt)
388{
389	int sh_types[2] = { SHT_SYMTAB, SHT_DYNSYM };
390	unsigned long *offsets = NULL;
391	size_t cap = 0, cnt = 0;
392	struct elf_fd elf_fd;
393	int err = 0, i;
394
395	err = elf_open(binary_path, &elf_fd);
396	if (err)
397		return err;
398
399	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
400		struct elf_sym_iter iter;
401		struct elf_sym *sym;
402
403		err = elf_sym_iter_new(&iter, elf_fd.elf, binary_path, sh_types[i], STT_FUNC);
404		if (err == -ENOENT)
405			continue;
406		if (err)
407			goto out;
408
409		while ((sym = elf_sym_iter_next(&iter))) {
410			if (!glob_match(sym->name, pattern))
411				continue;
412
413			err = libbpf_ensure_mem((void **) &offsets, &cap, sizeof(*offsets),
414						cnt + 1);
415			if (err)
416				goto out;
417
418			offsets[cnt++] = elf_sym_offset(sym);
419		}
420
421		/* If we found anything in the first symbol section,
422		 * do not search others to avoid duplicates.
423		 */
424		if (cnt)
425			break;
426	}
427
428	if (cnt) {
429		*poffsets = offsets;
430		*pcnt = cnt;
431	} else {
432		err = -ENOENT;
433	}
434
435out:
436	if (err)
437		free(offsets);
438	elf_close(&elf_fd);
439	return err;
440}
441