xref: /third_party/elfutils/src/strings.c (revision da0c48c4)
1/* Print the strings of printable characters in files.
2   Copyright (C) 2005-2010, 2012, 2014 Red Hat, Inc.
3   This file is part of elfutils.
4   Written by Ulrich Drepper <drepper@redhat.com>, 2005.
5
6   This file is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   elfutils is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23#include <argp.h>
24#include <assert.h>
25#include <ctype.h>
26#include <endian.h>
27#include <errno.h>
28#include <fcntl.h>
29#include <gelf.h>
30#include <inttypes.h>
31#include <locale.h>
32#include <stdbool.h>
33#include <stdio.h>
34#include <stdio_ext.h>
35#include <stdlib.h>
36#include <string.h>
37#include <unistd.h>
38#include <sys/mman.h>
39#include <sys/stat.h>
40
41#include <libeu.h>
42#include <system.h>
43#include <printversion.h>
44
45#ifndef MAP_POPULATE
46# define MAP_POPULATE 0
47#endif
48
49
50/* Prototypes of local functions.  */
51static int read_fd (int fd, const char *fname, off_t fdlen);
52static int read_elf (Elf *elf, int fd, const char *fname, off_t fdlen);
53
54
55/* Name and version of program.  */
56ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
57
58/* Bug report address.  */
59ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
60
61/* Definitions of arguments for argp functions.  */
62static const struct argp_option options[] =
63{
64  { NULL, 0, NULL, 0, N_("Output Selection:"), 0 },
65  { "all", 'a', NULL, 0, N_("Scan entire file, not only loaded sections"), 0 },
66  { "bytes", 'n', "MIN-LEN", 0,
67    N_("Only NUL-terminated sequences of MIN-LEN characters or more are printed"), 0 },
68  { "encoding", 'e', "SELECTOR", 0, N_("\
69Select character size and endianness: s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit"),
70    0},
71  { "print-file-name", 'f', NULL, 0,
72    N_("Print name of the file before each string."), 0 },
73  { "radix", 't', "{o,d,x}", 0,
74    N_("Print location of the string in base 8, 10, or 16 respectively."), 0 },
75  { NULL, 'o', NULL, 0, N_("Alias for --radix=o"), 0 },
76
77  { NULL, 0, NULL, 0, N_("Miscellaneous:"), 0 },
78  { NULL, 0, NULL, 0, NULL, 0 }
79};
80
81/* Short description of program.  */
82static const char doc[] = N_("\
83Print the strings of printable characters in files.");
84
85/* Strings for arguments in help texts.  */
86static const char args_doc[] = N_("[FILE...]");
87
88/* Prototype for option handler.  */
89static error_t parse_opt (int key, char *arg, struct argp_state *state);
90
91/* Data structure to communicate with argp functions.  */
92static struct argp argp =
93{
94  options, parse_opt, args_doc, doc, NULL, NULL, NULL
95};
96
97
98/* Global variables.  */
99
100/* True if whole file and not only loaded sections are looked at.  */
101static bool entire_file;
102
103/* Minimum length of any sequence reported.  */
104static size_t min_len = 4;
105
106/* Number of bytes per character.  */
107static size_t bytes_per_char = 1;
108
109/* Minimum length of any sequence reported in bytes.  */
110static size_t min_len_bytes;
111
112/* True if multibyte characters are in big-endian order.  */
113static bool big_endian;
114
115/* True unless 7-bit ASCII are expected.  */
116static bool char_7bit;
117
118/* True if file names should be printed before strings.  */
119static bool print_file_name;
120
121/* Radix for printed numbers.  */
122static enum
123{
124  radix_none = 0,
125  radix_decimal,
126  radix_hex,
127  radix_octal
128} radix = radix_none;
129
130
131/* Page size in use.  */
132static size_t ps;
133
134
135/* Mapped parts of the ELF file.  */
136static unsigned char *elfmap;
137static unsigned char *elfmap_base;
138static size_t elfmap_size;
139static off_t elfmap_off;
140
141
142int
143main (int argc, char *argv[])
144{
145  /* We use no threads.  */
146  __fsetlocking (stdin, FSETLOCKING_BYCALLER);
147  __fsetlocking (stdout, FSETLOCKING_BYCALLER);
148
149  /* Set locale.  */
150  (void) setlocale (LC_ALL, "");
151
152  /* Make sure the message catalog can be found.  */
153  (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
154
155  /* Initialize the message catalog.  */
156  (void) textdomain (PACKAGE_TARNAME);
157
158  /* Parse and process arguments.  */
159  int remaining;
160  (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL);
161
162  /* Tell the library which version we are expecting.  */
163  elf_version (EV_CURRENT);
164
165  /* Determine the page size.  We will likely need it a couple of times.  */
166  ps = sysconf (_SC_PAGESIZE);
167
168  struct stat st;
169  int result = 0;
170  if (remaining == argc)
171    /* We read from standard input.  This we cannot do for a
172       structured file.  */
173    result = read_fd (STDIN_FILENO,
174		      print_file_name ? "{standard input}" : NULL,
175		      (fstat (STDIN_FILENO, &st) == 0 && S_ISREG (st.st_mode))
176		      ? st.st_size : INT64_C (0x7fffffffffffffff));
177  else
178    do
179      {
180	int fd = (strcmp (argv[remaining], "-") == 0
181		  ? STDIN_FILENO : open (argv[remaining], O_RDONLY));
182	if (unlikely (fd == -1))
183	  {
184	    error (0, errno, _("cannot open '%s'"), argv[remaining]);
185	    result = 1;
186	  }
187	else
188	  {
189	    const char *fname = print_file_name ? argv[remaining] : NULL;
190	    int fstat_fail = fstat (fd, &st);
191	    off_t fdlen = (fstat_fail
192			     ? INT64_C (0x7fffffffffffffff) : st.st_size);
193	    if (fdlen > (off_t) min_len_bytes)
194	      {
195		Elf *elf = NULL;
196		if (entire_file
197		    || fstat_fail
198		    || !S_ISREG (st.st_mode)
199		    || (elf = elf_begin (fd, ELF_C_READ, NULL)) == NULL
200		    || elf_kind (elf) != ELF_K_ELF)
201		  result |= read_fd (fd, fname, fdlen);
202		else
203		  result |= read_elf (elf, fd, fname, fdlen);
204
205		/* This call will succeed even if ELF is NULL.  */
206		elf_end (elf);
207	      }
208
209	    if (strcmp (argv[remaining], "-") != 0)
210	      close (fd);
211	  }
212
213	if (elfmap != NULL && elfmap != MAP_FAILED)
214	  munmap (elfmap, elfmap_size);
215	elfmap = NULL;
216      }
217    while (++remaining < argc);
218
219  return result;
220}
221
222
223/* Handle program arguments.  */
224static error_t
225parse_opt (int key, char *arg,
226	   struct argp_state *state __attribute__ ((unused)))
227{
228  switch (key)
229    {
230    case 'a':
231      entire_file = true;
232      break;
233
234    case 'e':
235      /* We expect a string of one character.  */
236      switch (arg[1] != '\0' ? '\0' : arg[0])
237	{
238	case 's':
239	case 'S':
240	  char_7bit = arg[0] == 's';
241	  bytes_per_char = 1;
242	  break;
243
244	case 'b':
245	case 'B':
246	  big_endian = true;
247	  FALLTHROUGH;
248
249	case 'l':
250	case 'L':
251	  bytes_per_char = isupper (arg[0]) ? 4 : 2;
252	  break;
253
254	default:
255	  error (0, 0, _("invalid value '%s' for %s parameter"),
256		 arg, "-e");
257	  argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
258	  return ARGP_ERR_UNKNOWN;
259	}
260      break;
261
262    case 'f':
263      print_file_name = true;
264      break;
265
266    case 'n':
267      min_len = atoi (arg);
268      break;
269
270    case 'o':
271      goto octfmt;
272
273    case 't':
274      switch (arg[0])
275	{
276	case 'd':
277	  radix = radix_decimal;
278	  break;
279
280	case 'o':
281	octfmt:
282	  radix = radix_octal;
283	  break;
284
285	case 'x':
286	  radix = radix_hex;
287	  break;
288
289	default:
290	  error (0, 0, _("invalid value '%s' for %s parameter"),
291		 arg, "-t");
292	  argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
293	  return ARGP_ERR_UNKNOWN;
294	}
295      break;
296
297    case ARGP_KEY_FINI:
298      /* Compute the length in bytes of any match.  */
299      if (min_len <= 0 || min_len > INT_MAX / bytes_per_char)
300	error_exit (0, _("invalid minimum length of matched string size"));
301      min_len_bytes = min_len * bytes_per_char;
302      break;
303
304    default:
305      return ARGP_ERR_UNKNOWN;
306    }
307  return 0;
308}
309
310
311static void
312process_chunk_mb (const char *fname, const unsigned char *buf, off_t to,
313		  size_t len, char **unprinted)
314{
315  size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
316  const unsigned char *start = buf;
317  while (len >= bytes_per_char)
318    {
319      uint32_t ch;
320
321      if (bytes_per_char == 2)
322	{
323	  if (big_endian)
324	    ch = buf[0] << 8 | buf[1];
325	  else
326	    ch = buf[1] << 8 | buf[0];
327	}
328      else
329	{
330	  if (big_endian)
331	    ch = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
332	  else
333	    ch = buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0];
334	}
335
336      if (ch <= 255 && (isprint (ch) || ch == '\t'))
337	{
338	  ++buf;
339	  ++curlen;
340	}
341      else
342	{
343	  if (curlen >= min_len)
344	    {
345	      /* We found a match.  */
346	      if (unlikely (fname != NULL))
347		{
348		  fputs_unlocked (fname, stdout);
349		  fputs_unlocked (": ", stdout);
350		}
351
352	      if (unlikely (radix != radix_none))
353		printf ((radix == radix_octal ? "%7" PRIo64 " "
354			 : (radix == radix_decimal ? "%7" PRId64 " "
355			    : "%7" PRIx64 " ")),
356			(int64_t) to - len - (buf - start));
357
358	      if (unlikely (*unprinted != NULL))
359		{
360		  fputs_unlocked (*unprinted, stdout);
361		  free (*unprinted);
362		  *unprinted = NULL;
363		}
364
365	      /* There is no sane way of printing the string.  If we
366		 assume the file data is encoded in UCS-2/UTF-16 or
367		 UCS-4/UTF-32 respectively we could covert the string.
368		 But there is no such guarantee.  */
369	      fwrite_unlocked (start, 1, buf - start, stdout);
370	      putc_unlocked ('\n', stdout);
371	    }
372
373	  start = ++buf;
374	  curlen =  0;
375
376	  if (len <= min_len)
377	    break;
378	}
379
380      --len;
381    }
382
383  if (curlen != 0)
384    *unprinted = xstrndup ((const char *) start, curlen);
385}
386
387
388static void
389process_chunk (const char *fname, const unsigned char *buf, off_t to,
390	       size_t len, char **unprinted)
391{
392  /* We are not going to slow the check down for the 2- and 4-byte
393     encodings.  Handle them special.  */
394  if (unlikely (bytes_per_char != 1))
395    {
396      process_chunk_mb (fname, buf, to, len, unprinted);
397      return;
398    }
399
400  size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
401  const unsigned char *start = buf;
402  while (len > 0)
403    {
404      if ((isprint (*buf) || *buf == '\t') && (! char_7bit || *buf <= 127))
405	{
406	  ++buf;
407	  ++curlen;
408	}
409      else
410	{
411	  if (curlen >= min_len)
412	    {
413	      /* We found a match.  */
414	      if (likely (fname != NULL))
415		{
416		  fputs_unlocked (fname, stdout);
417		  fputs_unlocked (": ", stdout);
418		}
419
420	      if (likely (radix != radix_none))
421		printf ((radix == radix_octal ? "%7" PRIo64 " "
422			 : (radix == radix_decimal ? "%7" PRId64 " "
423			    : "%7" PRIx64 " ")),
424			(int64_t) to - len - (buf - start));
425
426	      if (unlikely (*unprinted != NULL))
427		{
428		  fputs_unlocked (*unprinted, stdout);
429		  free (*unprinted);
430		  *unprinted = NULL;
431		}
432	      fwrite_unlocked (start, 1, buf - start, stdout);
433	      putc_unlocked ('\n', stdout);
434	    }
435
436	  start = ++buf;
437	  curlen =  0;
438
439	  if (len <= min_len)
440	    break;
441	}
442
443      --len;
444    }
445
446  if (curlen != 0)
447    *unprinted = xstrndup ((const char *) start, curlen);
448}
449
450
451/* Map a file in as large chunks as possible.  */
452static void *
453map_file (int fd, off_t start_off, off_t fdlen, size_t *map_sizep)
454{
455  /* Maximum size we mmap.  We use an #ifdef to avoid overflows on
456     32-bit machines.  64-bit machines these days do not have usable
457     address spaces larger than about 43 bits.  Not that any file
458     should be that large.  */
459# if SIZE_MAX > 0xffffffff
460  const size_t mmap_max = 0x4000000000lu;
461# else
462  const size_t mmap_max = 0x40000000lu;
463# endif
464
465  /* Try to mmap the file.  */
466  size_t map_size = MIN ((off_t) mmap_max, fdlen);
467  const size_t map_size_min = MAX (MAX (SIZE_MAX / 16, 2 * ps),
468				   roundup (2 * min_len_bytes + 1, ps));
469  void *mem;
470  while (1)
471    {
472      /* We map the memory for reading only here.  Since we will
473	 always look at every byte of the file it makes sense to
474	 use MAP_POPULATE.  */
475      mem = mmap (NULL, map_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE,
476		  fd, start_off);
477      if (mem != MAP_FAILED)
478	{
479	  /* We will go through the mapping sequentially.  */
480	  (void) posix_madvise (mem, map_size, POSIX_MADV_SEQUENTIAL);
481	  break;
482	}
483      if (errno != EINVAL && errno != ENOMEM)
484	/* This is an error other than the lack of address space.  */
485	break;
486
487      /* Maybe the size of the mapping is too big.  Try again.  */
488      map_size /= 2;
489      if (map_size < map_size_min)
490	/* That size should have fit.  */
491	break;
492    }
493
494  *map_sizep = map_size;
495  return mem;
496}
497
498
499/* Read the file without mapping.  */
500static int
501read_block_no_mmap (int fd, const char *fname, off_t from, off_t fdlen)
502{
503  char *unprinted = NULL;
504#define CHUNKSIZE 65536
505  unsigned char *buf = xmalloc (CHUNKSIZE + min_len_bytes
506				+ bytes_per_char - 1);
507  size_t ntrailer = 0;
508  int result = 0;
509  while (fdlen > 0)
510    {
511      ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + ntrailer,
512					    MIN (fdlen, CHUNKSIZE)));
513      if (n == 0)
514	{
515	  /* There are less than MIN_LEN+1 bytes left so there cannot be
516	     another match.  */
517	  assert (unprinted == NULL || ntrailer == 0);
518	  break;
519	}
520      if (unlikely (n < 0))
521	{
522	  /* Something went wrong.  */
523	  result = 1;
524	  break;
525	}
526
527      /* Account for the number of bytes read in this round.  */
528      fdlen -= n;
529
530      /* Do not use the signed N value.  Note that the addition cannot
531	 overflow.  */
532      size_t nb = (size_t) n + ntrailer;
533      if (nb >= min_len_bytes)
534	{
535	  /* We only use complete characters.  */
536	  nb &= ~(bytes_per_char - 1);
537
538	  process_chunk (fname, buf, from + nb, nb, &unprinted);
539
540	  /* If the last bytes of the buffer (modulo the character
541	     size) have been printed we are not copying them.  */
542	  size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
543
544	  memmove (buf, buf + nb - to_keep, to_keep);
545	  ntrailer = to_keep;
546	  from += nb;
547	}
548      else
549	ntrailer = nb;
550    }
551
552  free (buf);
553
554  /* Don't print anything we collected so far.  There is no
555     terminating NUL byte.  */
556  free (unprinted);
557
558  return result;
559}
560
561
562static int
563read_block (int fd, const char *fname, off_t fdlen, off_t from, off_t to)
564{
565  if (elfmap == NULL)
566    {
567      /* We need a completely new mapping.  */
568      elfmap_off = from & ~(ps - 1);
569      elfmap_base = elfmap = map_file (fd, elfmap_off, fdlen, &elfmap_size);
570
571      if (unlikely (elfmap == MAP_FAILED))
572	/* Let the kernel know we are going to read everything in sequence.  */
573	(void) posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
574    }
575
576  if (unlikely (elfmap == MAP_FAILED))
577    {
578      /* Read from the file descriptor.  For this we must position the
579	 read pointer.  */
580      // XXX Eventually add flag which avoids this if the position
581      // XXX is known to match.
582      if (from != 0 && lseek (fd, from, SEEK_SET) != from)
583	error_exit (errno, _("lseek failed"));
584
585      return read_block_no_mmap (fd, fname, from, to - from);
586    }
587
588  assert ((off_t) min_len_bytes < fdlen);
589
590  if (to < (off_t) elfmap_off || from > (off_t) (elfmap_off + elfmap_size))
591    {
592      /* The existing mapping cannot fit at all.  Map the new area.
593	 We always map the full range of ELFMAP_SIZE bytes even if
594	 this extend beyond the end of the file.  The Linux kernel
595	 handles this OK if the access pages are not touched.  */
596      elfmap_off = from & ~(ps - 1);
597      if (mmap (elfmap, elfmap_size, PROT_READ,
598		MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, from)
599	  == MAP_FAILED)
600	error_exit (errno, _("re-mmap failed"));
601      elfmap_base = elfmap;
602    }
603
604  char *unprinted = NULL;
605
606  /* Use the existing mapping as much as possible.  If necessary, map
607     new pages.  */
608  if (from >= (off_t) elfmap_off
609      && from < (off_t) (elfmap_off + elfmap_size))
610    /* There are at least a few bytes in this mapping which we can
611       use.  */
612    process_chunk (fname, elfmap_base + (from - elfmap_off),
613		   MIN (to, (off_t) (elfmap_off + elfmap_size)),
614		   MIN (to, (off_t) (elfmap_off + elfmap_size)) - from,
615		   &unprinted);
616
617  if (to > (off_t) (elfmap_off + elfmap_size))
618    {
619      unsigned char *remap_base = elfmap_base;
620      size_t read_now = elfmap_size - (elfmap_base - elfmap);
621
622      assert (from >= (off_t) elfmap_off
623	      && from < (off_t) (elfmap_off + elfmap_size));
624      off_t handled_to = elfmap_off + elfmap_size;
625      assert (elfmap == elfmap_base
626	      || (elfmap_base - elfmap
627		  == (ptrdiff_t) ((min_len_bytes + ps - 1) & ~(ps - 1))));
628      if (elfmap == elfmap_base)
629	{
630	  size_t keep_area = (min_len_bytes + ps - 1) & ~(ps - 1);
631	  assert (elfmap_size >= keep_area + ps);
632	  /* The keep area is used for the content of the previous
633	     buffer we have to keep.  This means copying those bytes
634	     and for this we have to make the data writable.  */
635	  if (unlikely (mprotect (elfmap, keep_area, PROT_READ | PROT_WRITE)
636			!= 0))
637	    error_exit (errno, _("mprotect failed"));
638
639	  elfmap_base = elfmap + keep_area;
640	}
641
642      while (1)
643	{
644	  /* Map the rest of the file, eventually again in pieces.
645	     We speed things up with a nice Linux feature.  Note
646	     that we have at least two pages mapped.  */
647	  size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
648
649	  assert (read_now >= to_keep);
650	  memmove (elfmap_base - to_keep,
651		   remap_base + read_now - to_keep, to_keep);
652	  remap_base = elfmap_base;
653
654	  assert ((elfmap_size - (elfmap_base - elfmap)) % bytes_per_char
655		  == 0);
656	  read_now = MIN (to - handled_to,
657			  (ptrdiff_t) elfmap_size - (elfmap_base - elfmap));
658
659	  assert (handled_to % ps == 0);
660	  assert (handled_to % bytes_per_char == 0);
661	  if (mmap (remap_base, read_now, PROT_READ,
662		    MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, handled_to)
663	      == MAP_FAILED)
664	    error_exit (errno, _("re-mmap failed"));
665	  elfmap_off = handled_to;
666
667	  process_chunk (fname, remap_base - to_keep,
668			 elfmap_off + (read_now & ~(bytes_per_char - 1)),
669			 to_keep + (read_now & ~(bytes_per_char - 1)),
670			 &unprinted);
671	  handled_to += read_now;
672	  if (handled_to >= to)
673	    break;
674	}
675    }
676
677  /* Don't print anything we collected so far.  There is no
678     terminating NUL byte.  */
679  free (unprinted);
680
681  return 0;
682}
683
684
685static int
686read_fd (int fd, const char *fname, off_t fdlen)
687{
688  return read_block (fd, fname, fdlen, 0, fdlen);
689}
690
691
692static int
693read_elf (Elf *elf, int fd, const char *fname, off_t fdlen)
694{
695  assert (fdlen >= 0);
696
697  /* We will look at each section separately.  The ELF file is not
698     mmapped.  The libelf implementation will load the needed parts on
699     demand.  Since we only iterate over the section header table the
700     memory consumption at this stage is kept minimal.  */
701  Elf_Scn *scn = elf_nextscn (elf, NULL);
702  if (scn == NULL)
703    return read_fd (fd, fname, fdlen);
704
705  int result = 0;
706  do
707    {
708      GElf_Shdr shdr_mem;
709      GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
710
711      /* Only look in sections which are loaded at runtime and
712	 actually have content.  */
713      if (shdr != NULL && shdr->sh_type != SHT_NOBITS
714	  && (shdr->sh_flags & SHF_ALLOC) != 0)
715	{
716	  if (shdr->sh_offset > (Elf64_Off) fdlen
717	      || fdlen - shdr->sh_offset < shdr->sh_size)
718	    {
719	      size_t strndx = 0;
720	      const char *sname;
721	      if (unlikely (elf_getshdrstrndx (elf, &strndx) < 0))
722		sname = "<unknown>";
723	      else
724		sname = elf_strptr (elf, strndx, shdr->sh_name) ?: "<unknown>";
725	      error (0, 0,
726		     _("Skipping section %zd '%s' data outside file"),
727		     elf_ndxscn (scn), sname);
728	      result = 1;
729	    }
730	  else
731	    result |= read_block (fd, fname, fdlen, shdr->sh_offset,
732				  shdr->sh_offset + shdr->sh_size);
733	}
734    }
735  while ((scn = elf_nextscn (elf, scn)) != NULL);
736
737  if (elfmap != NULL && elfmap != MAP_FAILED)
738    munmap (elfmap, elfmap_size);
739  elfmap = NULL;
740
741  return result;
742}
743
744
745#include "debugpred.h"
746