xref: /kernel/linux/linux-5.10/scripts/kallsyms.c (revision 8c2ecf20)
1/* Generate assembler source containing symbol information
2 *
3 * Copyright 2002       by Kai Germaschewski
4 *
5 * This software may be used and distributed according to the terms
6 * of the GNU General Public License, incorporated herein by reference.
7 *
8 * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S
9 *
10 *      Table compression uses all the unused char codes on the symbols and
11 *  maps these to the most used substrings (tokens). For instance, it might
12 *  map char code 0xF7 to represent "write_" and then in every symbol where
13 *  "write_" appears it can be replaced by 0xF7, saving 5 bytes.
14 *      The used codes themselves are also placed in the table so that the
15 *  decompresion can work without "special cases".
16 *      Applied to kernel symbols, this usually produces a compression ratio
17 *  of about 50%.
18 *
19 */
20
21#include <stdbool.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <string.h>
25#include <ctype.h>
26#include <limits.h>
27
28#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
29
30#define KSYM_NAME_LEN		128
31
32struct sym_entry {
33	unsigned long long addr;
34	unsigned int len;
35	unsigned int start_pos;
36	unsigned int percpu_absolute;
37	unsigned char sym[];
38};
39
40struct addr_range {
41	const char *start_sym, *end_sym;
42	unsigned long long start, end;
43};
44
45static unsigned long long _text;
46static unsigned long long relative_base;
47static struct addr_range text_ranges[] = {
48	{ "_stext",     "_etext"     },
49	{ "_sinittext", "_einittext" },
50};
51#define text_range_text     (&text_ranges[0])
52#define text_range_inittext (&text_ranges[1])
53
54static struct addr_range percpu_range = {
55	"__per_cpu_start", "__per_cpu_end", -1ULL, 0
56};
57
58static struct sym_entry **table;
59static unsigned int table_size, table_cnt;
60static int all_symbols;
61static int absolute_percpu;
62static int base_relative;
63
64static int token_profit[0x10000];
65
66/* the table that holds the result of the compression */
67static unsigned char best_table[256][2];
68static unsigned char best_table_len[256];
69
70
71static void usage(void)
72{
73	fprintf(stderr, "Usage: kallsyms [--all-symbols] "
74			"[--base-relative] < in.map > out.S\n");
75	exit(1);
76}
77
78static char *sym_name(const struct sym_entry *s)
79{
80	return (char *)s->sym + 1;
81}
82
83static bool is_ignored_symbol(const char *name, char type)
84{
85	/* Symbol names that exactly match to the following are ignored.*/
86	static const char * const ignored_symbols[] = {
87		/*
88		 * Symbols which vary between passes. Passes 1 and 2 must have
89		 * identical symbol lists. The kallsyms_* symbols below are
90		 * only added after pass 1, they would be included in pass 2
91		 * when --all-symbols is specified so exclude them to get a
92		 * stable symbol list.
93		 */
94		"kallsyms_addresses",
95		"kallsyms_offsets",
96		"kallsyms_relative_base",
97		"kallsyms_num_syms",
98		"kallsyms_names",
99		"kallsyms_markers",
100		"kallsyms_token_table",
101		"kallsyms_token_index",
102		/* Exclude linker generated symbols which vary between passes */
103		"_SDA_BASE_",		/* ppc */
104		"_SDA2_BASE_",		/* ppc */
105		NULL
106	};
107
108	/* Symbol names that begin with the following are ignored.*/
109	static const char * const ignored_prefixes[] = {
110		"$",			/* local symbols for ARM, MIPS, etc. */
111		"L0",			/* LoongArch local symbols */
112		".L",			/* LoongArch/S390 local symbols */
113		"__crc_",		/* modversions */
114		"__efistub_",		/* arm64 EFI stub namespace */
115		"__kvm_nvhe_",		/* arm64 non-VHE KVM namespace */
116		"__AArch64ADRPThunk_",	/* arm64 lld */
117		"__ARMV5PILongThunk_",	/* arm lld */
118		"__ARMV7PILongThunk_",
119		"__ThumbV7PILongThunk_",
120		"__LA25Thunk_",		/* mips lld */
121		"__microLA25Thunk_",
122		NULL
123	};
124
125	/* Symbol names that end with the following are ignored.*/
126	static const char * const ignored_suffixes[] = {
127		"_from_arm",		/* arm */
128		"_from_thumb",		/* arm */
129		"_veneer",		/* arm */
130		NULL
131	};
132
133	/* Symbol names that contain the following are ignored.*/
134	static const char * const ignored_matches[] = {
135		".long_branch.",	/* ppc stub */
136		".plt_branch.",		/* ppc stub */
137		NULL
138	};
139
140	const char * const *p;
141
142	for (p = ignored_symbols; *p; p++)
143		if (!strcmp(name, *p))
144			return true;
145
146	for (p = ignored_prefixes; *p; p++)
147		if (!strncmp(name, *p, strlen(*p)))
148			return true;
149
150	for (p = ignored_suffixes; *p; p++) {
151		int l = strlen(name) - strlen(*p);
152
153		if (l >= 0 && !strcmp(name + l, *p))
154			return true;
155	}
156
157	for (p = ignored_matches; *p; p++) {
158		if (strstr(name, *p))
159			return true;
160	}
161
162	if (type == 'U' || type == 'u')
163		return true;
164	/* exclude debugging symbols */
165	if (type == 'N' || type == 'n')
166		return true;
167
168	if (toupper(type) == 'A') {
169		/* Keep these useful absolute symbols */
170		if (strcmp(name, "__kernel_syscall_via_break") &&
171		    strcmp(name, "__kernel_syscall_via_epc") &&
172		    strcmp(name, "__kernel_sigtramp") &&
173		    strcmp(name, "__gp"))
174			return true;
175	}
176
177	return false;
178}
179
180static void check_symbol_range(const char *sym, unsigned long long addr,
181			       struct addr_range *ranges, int entries)
182{
183	size_t i;
184	struct addr_range *ar;
185
186	for (i = 0; i < entries; ++i) {
187		ar = &ranges[i];
188
189		if (strcmp(sym, ar->start_sym) == 0) {
190			ar->start = addr;
191			return;
192		} else if (strcmp(sym, ar->end_sym) == 0) {
193			ar->end = addr;
194			return;
195		}
196	}
197}
198
199static struct sym_entry *read_symbol(FILE *in)
200{
201	char name[500], type;
202	unsigned long long addr;
203	unsigned int len;
204	struct sym_entry *sym;
205	int rc;
206
207	rc = fscanf(in, "%llx %c %499s\n", &addr, &type, name);
208	if (rc != 3) {
209		if (rc != EOF && fgets(name, 500, in) == NULL)
210			fprintf(stderr, "Read error or end of file.\n");
211		return NULL;
212	}
213	if (strlen(name) >= KSYM_NAME_LEN) {
214		fprintf(stderr, "Symbol %s too long for kallsyms (%zu >= %d).\n"
215				"Please increase KSYM_NAME_LEN both in kernel and kallsyms.c\n",
216			name, strlen(name), KSYM_NAME_LEN);
217		return NULL;
218	}
219
220	if (strcmp(name, "_text") == 0)
221		_text = addr;
222
223	/* Ignore most absolute/undefined (?) symbols. */
224	if (is_ignored_symbol(name, type))
225		return NULL;
226
227	check_symbol_range(name, addr, text_ranges, ARRAY_SIZE(text_ranges));
228	check_symbol_range(name, addr, &percpu_range, 1);
229
230	/* include the type field in the symbol name, so that it gets
231	 * compressed together */
232
233	len = strlen(name) + 1;
234
235	sym = malloc(sizeof(*sym) + len + 1);
236	if (!sym) {
237		fprintf(stderr, "kallsyms failure: "
238			"unable to allocate required amount of memory\n");
239		exit(EXIT_FAILURE);
240	}
241	sym->addr = addr;
242	sym->len = len;
243	sym->sym[0] = type;
244	strcpy(sym_name(sym), name);
245	sym->percpu_absolute = 0;
246
247	return sym;
248}
249
250static int symbol_in_range(const struct sym_entry *s,
251			   const struct addr_range *ranges, int entries)
252{
253	size_t i;
254	const struct addr_range *ar;
255
256	for (i = 0; i < entries; ++i) {
257		ar = &ranges[i];
258
259		if (s->addr >= ar->start && s->addr <= ar->end)
260			return 1;
261	}
262
263	return 0;
264}
265
266static int symbol_valid(const struct sym_entry *s)
267{
268	const char *name = sym_name(s);
269
270	/* if --all-symbols is not specified, then symbols outside the text
271	 * and inittext sections are discarded */
272	if (!all_symbols) {
273		if (symbol_in_range(s, text_ranges,
274				    ARRAY_SIZE(text_ranges)) == 0)
275			return 0;
276		/* Corner case.  Discard any symbols with the same value as
277		 * _etext _einittext; they can move between pass 1 and 2 when
278		 * the kallsyms data are added.  If these symbols move then
279		 * they may get dropped in pass 2, which breaks the kallsyms
280		 * rules.
281		 */
282		if ((s->addr == text_range_text->end &&
283		     strcmp(name, text_range_text->end_sym)) ||
284		    (s->addr == text_range_inittext->end &&
285		     strcmp(name, text_range_inittext->end_sym)))
286			return 0;
287	}
288
289	return 1;
290}
291
292/* remove all the invalid symbols from the table */
293static void shrink_table(void)
294{
295	unsigned int i, pos;
296
297	pos = 0;
298	for (i = 0; i < table_cnt; i++) {
299		if (symbol_valid(table[i])) {
300			if (pos != i)
301				table[pos] = table[i];
302			pos++;
303		} else {
304			free(table[i]);
305		}
306	}
307	table_cnt = pos;
308
309	/* When valid symbol is not registered, exit to error */
310	if (!table_cnt) {
311		fprintf(stderr, "No valid symbol.\n");
312		exit(1);
313	}
314}
315
316static void read_map(FILE *in)
317{
318	struct sym_entry *sym;
319
320	while (!feof(in)) {
321		sym = read_symbol(in);
322		if (!sym)
323			continue;
324
325		sym->start_pos = table_cnt;
326
327		if (table_cnt >= table_size) {
328			table_size += 10000;
329			table = realloc(table, sizeof(*table) * table_size);
330			if (!table) {
331				fprintf(stderr, "out of memory\n");
332				exit (1);
333			}
334		}
335
336		table[table_cnt++] = sym;
337	}
338}
339
340static void output_label(const char *label)
341{
342	printf(".globl %s\n", label);
343	printf("\tALGN\n");
344	printf("%s:\n", label);
345}
346
347/* Provide proper symbols relocatability by their '_text' relativeness. */
348static void output_address(unsigned long long addr)
349{
350	if (_text <= addr)
351		printf("\tPTR\t_text + %#llx\n", addr - _text);
352	else
353		printf("\tPTR\t_text - %#llx\n", _text - addr);
354}
355
356/* uncompress a compressed symbol. When this function is called, the best table
357 * might still be compressed itself, so the function needs to be recursive */
358static int expand_symbol(const unsigned char *data, int len, char *result)
359{
360	int c, rlen, total=0;
361
362	while (len) {
363		c = *data;
364		/* if the table holds a single char that is the same as the one
365		 * we are looking for, then end the search */
366		if (best_table[c][0]==c && best_table_len[c]==1) {
367			*result++ = c;
368			total++;
369		} else {
370			/* if not, recurse and expand */
371			rlen = expand_symbol(best_table[c], best_table_len[c], result);
372			total += rlen;
373			result += rlen;
374		}
375		data++;
376		len--;
377	}
378	*result=0;
379
380	return total;
381}
382
383static int symbol_absolute(const struct sym_entry *s)
384{
385	return s->percpu_absolute;
386}
387
388static void write_src(void)
389{
390	unsigned int i, k, off;
391	unsigned int best_idx[256];
392	unsigned int *markers;
393	char buf[KSYM_NAME_LEN];
394
395	printf("#include <asm/bitsperlong.h>\n");
396	printf("#if BITS_PER_LONG == 64\n");
397	printf("#define PTR .quad\n");
398	printf("#define ALGN .balign 8\n");
399	printf("#else\n");
400	printf("#define PTR .long\n");
401	printf("#define ALGN .balign 4\n");
402	printf("#endif\n");
403
404	printf("\t.section .rodata, \"a\"\n");
405
406	if (!base_relative)
407		output_label("kallsyms_addresses");
408	else
409		output_label("kallsyms_offsets");
410
411	for (i = 0; i < table_cnt; i++) {
412		if (base_relative) {
413			/*
414			 * Use the offset relative to the lowest value
415			 * encountered of all relative symbols, and emit
416			 * non-relocatable fixed offsets that will be fixed
417			 * up at runtime.
418			 */
419
420			long long offset;
421			int overflow;
422
423			if (!absolute_percpu) {
424				offset = table[i]->addr - relative_base;
425				overflow = (offset < 0 || offset > UINT_MAX);
426			} else if (symbol_absolute(table[i])) {
427				offset = table[i]->addr;
428				overflow = (offset < 0 || offset > INT_MAX);
429			} else {
430				offset = relative_base - table[i]->addr - 1;
431				overflow = (offset < INT_MIN || offset >= 0);
432			}
433			if (overflow) {
434				fprintf(stderr, "kallsyms failure: "
435					"%s symbol value %#llx out of range in relative mode\n",
436					symbol_absolute(table[i]) ? "absolute" : "relative",
437					table[i]->addr);
438				exit(EXIT_FAILURE);
439			}
440			printf("\t.long\t%#x\n", (int)offset);
441		} else if (!symbol_absolute(table[i])) {
442			output_address(table[i]->addr);
443		} else {
444			printf("\tPTR\t%#llx\n", table[i]->addr);
445		}
446	}
447	printf("\n");
448
449	if (base_relative) {
450		output_label("kallsyms_relative_base");
451		output_address(relative_base);
452		printf("\n");
453	}
454
455	output_label("kallsyms_num_syms");
456	printf("\t.long\t%u\n", table_cnt);
457	printf("\n");
458
459	/* table of offset markers, that give the offset in the compressed stream
460	 * every 256 symbols */
461	markers = malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
462	if (!markers) {
463		fprintf(stderr, "kallsyms failure: "
464			"unable to allocate required memory\n");
465		exit(EXIT_FAILURE);
466	}
467
468	output_label("kallsyms_names");
469	off = 0;
470	for (i = 0; i < table_cnt; i++) {
471		if ((i & 0xFF) == 0)
472			markers[i >> 8] = off;
473
474		printf("\t.byte 0x%02x", table[i]->len);
475		for (k = 0; k < table[i]->len; k++)
476			printf(", 0x%02x", table[i]->sym[k]);
477		printf("\n");
478
479		off += table[i]->len + 1;
480	}
481	printf("\n");
482
483	output_label("kallsyms_markers");
484	for (i = 0; i < ((table_cnt + 255) >> 8); i++)
485		printf("\t.long\t%u\n", markers[i]);
486	printf("\n");
487
488	free(markers);
489
490	output_label("kallsyms_token_table");
491	off = 0;
492	for (i = 0; i < 256; i++) {
493		best_idx[i] = off;
494		expand_symbol(best_table[i], best_table_len[i], buf);
495		printf("\t.asciz\t\"%s\"\n", buf);
496		off += strlen(buf) + 1;
497	}
498	printf("\n");
499
500	output_label("kallsyms_token_index");
501	for (i = 0; i < 256; i++)
502		printf("\t.short\t%d\n", best_idx[i]);
503	printf("\n");
504}
505
506
507/* table lookup compression functions */
508
509/* count all the possible tokens in a symbol */
510static void learn_symbol(const unsigned char *symbol, int len)
511{
512	int i;
513
514	for (i = 0; i < len - 1; i++)
515		token_profit[ symbol[i] + (symbol[i + 1] << 8) ]++;
516}
517
518/* decrease the count for all the possible tokens in a symbol */
519static void forget_symbol(const unsigned char *symbol, int len)
520{
521	int i;
522
523	for (i = 0; i < len - 1; i++)
524		token_profit[ symbol[i] + (symbol[i + 1] << 8) ]--;
525}
526
527/* do the initial token count */
528static void build_initial_tok_table(void)
529{
530	unsigned int i;
531
532	for (i = 0; i < table_cnt; i++)
533		learn_symbol(table[i]->sym, table[i]->len);
534}
535
536static unsigned char *find_token(unsigned char *str, int len,
537				 const unsigned char *token)
538{
539	int i;
540
541	for (i = 0; i < len - 1; i++) {
542		if (str[i] == token[0] && str[i+1] == token[1])
543			return &str[i];
544	}
545	return NULL;
546}
547
548/* replace a given token in all the valid symbols. Use the sampled symbols
549 * to update the counts */
550static void compress_symbols(const unsigned char *str, int idx)
551{
552	unsigned int i, len, size;
553	unsigned char *p1, *p2;
554
555	for (i = 0; i < table_cnt; i++) {
556
557		len = table[i]->len;
558		p1 = table[i]->sym;
559
560		/* find the token on the symbol */
561		p2 = find_token(p1, len, str);
562		if (!p2) continue;
563
564		/* decrease the counts for this symbol's tokens */
565		forget_symbol(table[i]->sym, len);
566
567		size = len;
568
569		do {
570			*p2 = idx;
571			p2++;
572			size -= (p2 - p1);
573			memmove(p2, p2 + 1, size);
574			p1 = p2;
575			len--;
576
577			if (size < 2) break;
578
579			/* find the token on the symbol */
580			p2 = find_token(p1, size, str);
581
582		} while (p2);
583
584		table[i]->len = len;
585
586		/* increase the counts for this symbol's new tokens */
587		learn_symbol(table[i]->sym, len);
588	}
589}
590
591/* search the token with the maximum profit */
592static int find_best_token(void)
593{
594	int i, best, bestprofit;
595
596	bestprofit=-10000;
597	best = 0;
598
599	for (i = 0; i < 0x10000; i++) {
600		if (token_profit[i] > bestprofit) {
601			best = i;
602			bestprofit = token_profit[i];
603		}
604	}
605	return best;
606}
607
608/* this is the core of the algorithm: calculate the "best" table */
609static void optimize_result(void)
610{
611	int i, best;
612
613	/* using the '\0' symbol last allows compress_symbols to use standard
614	 * fast string functions */
615	for (i = 255; i >= 0; i--) {
616
617		/* if this table slot is empty (it is not used by an actual
618		 * original char code */
619		if (!best_table_len[i]) {
620
621			/* find the token with the best profit value */
622			best = find_best_token();
623			if (token_profit[best] == 0)
624				break;
625
626			/* place it in the "best" table */
627			best_table_len[i] = 2;
628			best_table[i][0] = best & 0xFF;
629			best_table[i][1] = (best >> 8) & 0xFF;
630
631			/* replace this token in all the valid symbols */
632			compress_symbols(best_table[i], i);
633		}
634	}
635}
636
637/* start by placing the symbols that are actually used on the table */
638static void insert_real_symbols_in_table(void)
639{
640	unsigned int i, j, c;
641
642	for (i = 0; i < table_cnt; i++) {
643		for (j = 0; j < table[i]->len; j++) {
644			c = table[i]->sym[j];
645			best_table[c][0]=c;
646			best_table_len[c]=1;
647		}
648	}
649}
650
651static void optimize_token_table(void)
652{
653	build_initial_tok_table();
654
655	insert_real_symbols_in_table();
656
657	optimize_result();
658}
659
660/* guess for "linker script provide" symbol */
661static int may_be_linker_script_provide_symbol(const struct sym_entry *se)
662{
663	const char *symbol = sym_name(se);
664	int len = se->len - 1;
665
666	if (len < 8)
667		return 0;
668
669	if (symbol[0] != '_' || symbol[1] != '_')
670		return 0;
671
672	/* __start_XXXXX */
673	if (!memcmp(symbol + 2, "start_", 6))
674		return 1;
675
676	/* __stop_XXXXX */
677	if (!memcmp(symbol + 2, "stop_", 5))
678		return 1;
679
680	/* __end_XXXXX */
681	if (!memcmp(symbol + 2, "end_", 4))
682		return 1;
683
684	/* __XXXXX_start */
685	if (!memcmp(symbol + len - 6, "_start", 6))
686		return 1;
687
688	/* __XXXXX_end */
689	if (!memcmp(symbol + len - 4, "_end", 4))
690		return 1;
691
692	return 0;
693}
694
695static int compare_symbols(const void *a, const void *b)
696{
697	const struct sym_entry *sa = *(const struct sym_entry **)a;
698	const struct sym_entry *sb = *(const struct sym_entry **)b;
699	int wa, wb;
700
701	/* sort by address first */
702	if (sa->addr > sb->addr)
703		return 1;
704	if (sa->addr < sb->addr)
705		return -1;
706
707	/* sort by "weakness" type */
708	wa = (sa->sym[0] == 'w') || (sa->sym[0] == 'W');
709	wb = (sb->sym[0] == 'w') || (sb->sym[0] == 'W');
710	if (wa != wb)
711		return wa - wb;
712
713	/* sort by "linker script provide" type */
714	wa = may_be_linker_script_provide_symbol(sa);
715	wb = may_be_linker_script_provide_symbol(sb);
716	if (wa != wb)
717		return wa - wb;
718
719	/* sort by the number of prefix underscores */
720	wa = strspn(sym_name(sa), "_");
721	wb = strspn(sym_name(sb), "_");
722	if (wa != wb)
723		return wa - wb;
724
725	/* sort by initial order, so that other symbols are left undisturbed */
726	return sa->start_pos - sb->start_pos;
727}
728
729static void sort_symbols(void)
730{
731	qsort(table, table_cnt, sizeof(table[0]), compare_symbols);
732}
733
734static void make_percpus_absolute(void)
735{
736	unsigned int i;
737
738	for (i = 0; i < table_cnt; i++)
739		if (symbol_in_range(table[i], &percpu_range, 1)) {
740			/*
741			 * Keep the 'A' override for percpu symbols to
742			 * ensure consistent behavior compared to older
743			 * versions of this tool.
744			 */
745			table[i]->sym[0] = 'A';
746			table[i]->percpu_absolute = 1;
747		}
748}
749
750/* find the minimum non-absolute symbol address */
751static void record_relative_base(void)
752{
753	unsigned int i;
754
755	for (i = 0; i < table_cnt; i++)
756		if (!symbol_absolute(table[i])) {
757			/*
758			 * The table is sorted by address.
759			 * Take the first non-absolute symbol value.
760			 */
761			relative_base = table[i]->addr;
762			return;
763		}
764}
765
766int main(int argc, char **argv)
767{
768	if (argc >= 2) {
769		int i;
770		for (i = 1; i < argc; i++) {
771			if(strcmp(argv[i], "--all-symbols") == 0)
772				all_symbols = 1;
773			else if (strcmp(argv[i], "--absolute-percpu") == 0)
774				absolute_percpu = 1;
775			else if (strcmp(argv[i], "--base-relative") == 0)
776				base_relative = 1;
777			else
778				usage();
779		}
780	} else if (argc != 1)
781		usage();
782
783	read_map(stdin);
784	shrink_table();
785	if (absolute_percpu)
786		make_percpus_absolute();
787	sort_symbols();
788	if (base_relative)
789		record_relative_base();
790	optimize_token_table();
791	write_src();
792
793	return 0;
794}
795