1/* Print the strings of printable characters in files. 2 Copyright (C) 2005-2010, 2012, 2014 Red Hat, Inc. 3 This file is part of elfutils. 4 Written by Ulrich Drepper <drepper@redhat.com>, 2005. 5 6 This file is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 elfutils is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19#ifdef HAVE_CONFIG_H 20# include <config.h> 21#endif 22 23#include <argp.h> 24#include <assert.h> 25#include <ctype.h> 26#include <endian.h> 27#include <errno.h> 28#include <fcntl.h> 29#include <gelf.h> 30#include <inttypes.h> 31#include <locale.h> 32#include <stdbool.h> 33#include <stdio.h> 34#include <stdio_ext.h> 35#include <stdlib.h> 36#include <string.h> 37#include <unistd.h> 38#include <sys/mman.h> 39#include <sys/stat.h> 40 41#include <libeu.h> 42#include <system.h> 43#include <printversion.h> 44 45#ifndef MAP_POPULATE 46# define MAP_POPULATE 0 47#endif 48 49 50/* Prototypes of local functions. */ 51static int read_fd (int fd, const char *fname, off_t fdlen); 52static int read_elf (Elf *elf, int fd, const char *fname, off_t fdlen); 53 54 55/* Name and version of program. */ 56ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; 57 58/* Bug report address. */ 59ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT; 60 61/* Definitions of arguments for argp functions. */ 62static const struct argp_option options[] = 63{ 64 { NULL, 0, NULL, 0, N_("Output Selection:"), 0 }, 65 { "all", 'a', NULL, 0, N_("Scan entire file, not only loaded sections"), 0 }, 66 { "bytes", 'n', "MIN-LEN", 0, 67 N_("Only NUL-terminated sequences of MIN-LEN characters or more are printed"), 0 }, 68 { "encoding", 'e', "SELECTOR", 0, N_("\ 69Select character size and endianness: s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit"), 70 0}, 71 { "print-file-name", 'f', NULL, 0, 72 N_("Print name of the file before each string."), 0 }, 73 { "radix", 't', "{o,d,x}", 0, 74 N_("Print location of the string in base 8, 10, or 16 respectively."), 0 }, 75 { NULL, 'o', NULL, 0, N_("Alias for --radix=o"), 0 }, 76 77 { NULL, 0, NULL, 0, N_("Miscellaneous:"), 0 }, 78 { NULL, 0, NULL, 0, NULL, 0 } 79}; 80 81/* Short description of program. */ 82static const char doc[] = N_("\ 83Print the strings of printable characters in files."); 84 85/* Strings for arguments in help texts. */ 86static const char args_doc[] = N_("[FILE...]"); 87 88/* Prototype for option handler. */ 89static error_t parse_opt (int key, char *arg, struct argp_state *state); 90 91/* Data structure to communicate with argp functions. */ 92static struct argp argp = 93{ 94 options, parse_opt, args_doc, doc, NULL, NULL, NULL 95}; 96 97 98/* Global variables. */ 99 100/* True if whole file and not only loaded sections are looked at. */ 101static bool entire_file; 102 103/* Minimum length of any sequence reported. */ 104static size_t min_len = 4; 105 106/* Number of bytes per character. */ 107static size_t bytes_per_char = 1; 108 109/* Minimum length of any sequence reported in bytes. */ 110static size_t min_len_bytes; 111 112/* True if multibyte characters are in big-endian order. */ 113static bool big_endian; 114 115/* True unless 7-bit ASCII are expected. */ 116static bool char_7bit; 117 118/* True if file names should be printed before strings. */ 119static bool print_file_name; 120 121/* Radix for printed numbers. */ 122static enum 123{ 124 radix_none = 0, 125 radix_decimal, 126 radix_hex, 127 radix_octal 128} radix = radix_none; 129 130 131/* Page size in use. */ 132static size_t ps; 133 134 135/* Mapped parts of the ELF file. */ 136static unsigned char *elfmap; 137static unsigned char *elfmap_base; 138static size_t elfmap_size; 139static off_t elfmap_off; 140 141 142int 143main (int argc, char *argv[]) 144{ 145 /* We use no threads. */ 146 __fsetlocking (stdin, FSETLOCKING_BYCALLER); 147 __fsetlocking (stdout, FSETLOCKING_BYCALLER); 148 149 /* Set locale. */ 150 (void) setlocale (LC_ALL, ""); 151 152 /* Make sure the message catalog can be found. */ 153 (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR); 154 155 /* Initialize the message catalog. */ 156 (void) textdomain (PACKAGE_TARNAME); 157 158 /* Parse and process arguments. */ 159 int remaining; 160 (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL); 161 162 /* Tell the library which version we are expecting. */ 163 elf_version (EV_CURRENT); 164 165 /* Determine the page size. We will likely need it a couple of times. */ 166 ps = sysconf (_SC_PAGESIZE); 167 168 struct stat st; 169 int result = 0; 170 if (remaining == argc) 171 /* We read from standard input. This we cannot do for a 172 structured file. */ 173 result = read_fd (STDIN_FILENO, 174 print_file_name ? "{standard input}" : NULL, 175 (fstat (STDIN_FILENO, &st) == 0 && S_ISREG (st.st_mode)) 176 ? st.st_size : INT64_C (0x7fffffffffffffff)); 177 else 178 do 179 { 180 int fd = (strcmp (argv[remaining], "-") == 0 181 ? STDIN_FILENO : open (argv[remaining], O_RDONLY)); 182 if (unlikely (fd == -1)) 183 { 184 error (0, errno, _("cannot open '%s'"), argv[remaining]); 185 result = 1; 186 } 187 else 188 { 189 const char *fname = print_file_name ? argv[remaining] : NULL; 190 int fstat_fail = fstat (fd, &st); 191 off_t fdlen = (fstat_fail 192 ? INT64_C (0x7fffffffffffffff) : st.st_size); 193 if (fdlen > (off_t) min_len_bytes) 194 { 195 Elf *elf = NULL; 196 if (entire_file 197 || fstat_fail 198 || !S_ISREG (st.st_mode) 199 || (elf = elf_begin (fd, ELF_C_READ, NULL)) == NULL 200 || elf_kind (elf) != ELF_K_ELF) 201 result |= read_fd (fd, fname, fdlen); 202 else 203 result |= read_elf (elf, fd, fname, fdlen); 204 205 /* This call will succeed even if ELF is NULL. */ 206 elf_end (elf); 207 } 208 209 if (strcmp (argv[remaining], "-") != 0) 210 close (fd); 211 } 212 213 if (elfmap != NULL && elfmap != MAP_FAILED) 214 munmap (elfmap, elfmap_size); 215 elfmap = NULL; 216 } 217 while (++remaining < argc); 218 219 return result; 220} 221 222 223/* Handle program arguments. */ 224static error_t 225parse_opt (int key, char *arg, 226 struct argp_state *state __attribute__ ((unused))) 227{ 228 switch (key) 229 { 230 case 'a': 231 entire_file = true; 232 break; 233 234 case 'e': 235 /* We expect a string of one character. */ 236 switch (arg[1] != '\0' ? '\0' : arg[0]) 237 { 238 case 's': 239 case 'S': 240 char_7bit = arg[0] == 's'; 241 bytes_per_char = 1; 242 break; 243 244 case 'b': 245 case 'B': 246 big_endian = true; 247 FALLTHROUGH; 248 249 case 'l': 250 case 'L': 251 bytes_per_char = isupper (arg[0]) ? 4 : 2; 252 break; 253 254 default: 255 error (0, 0, _("invalid value '%s' for %s parameter"), 256 arg, "-e"); 257 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings"); 258 return ARGP_ERR_UNKNOWN; 259 } 260 break; 261 262 case 'f': 263 print_file_name = true; 264 break; 265 266 case 'n': 267 min_len = atoi (arg); 268 break; 269 270 case 'o': 271 goto octfmt; 272 273 case 't': 274 switch (arg[0]) 275 { 276 case 'd': 277 radix = radix_decimal; 278 break; 279 280 case 'o': 281 octfmt: 282 radix = radix_octal; 283 break; 284 285 case 'x': 286 radix = radix_hex; 287 break; 288 289 default: 290 error (0, 0, _("invalid value '%s' for %s parameter"), 291 arg, "-t"); 292 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings"); 293 return ARGP_ERR_UNKNOWN; 294 } 295 break; 296 297 case ARGP_KEY_FINI: 298 /* Compute the length in bytes of any match. */ 299 if (min_len <= 0 || min_len > INT_MAX / bytes_per_char) 300 error_exit (0, _("invalid minimum length of matched string size")); 301 min_len_bytes = min_len * bytes_per_char; 302 break; 303 304 default: 305 return ARGP_ERR_UNKNOWN; 306 } 307 return 0; 308} 309 310 311static void 312process_chunk_mb (const char *fname, const unsigned char *buf, off_t to, 313 size_t len, char **unprinted) 314{ 315 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted); 316 const unsigned char *start = buf; 317 while (len >= bytes_per_char) 318 { 319 uint32_t ch; 320 321 if (bytes_per_char == 2) 322 { 323 if (big_endian) 324 ch = buf[0] << 8 | buf[1]; 325 else 326 ch = buf[1] << 8 | buf[0]; 327 } 328 else 329 { 330 if (big_endian) 331 ch = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3]; 332 else 333 ch = buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0]; 334 } 335 336 if (ch <= 255 && (isprint (ch) || ch == '\t')) 337 { 338 ++buf; 339 ++curlen; 340 } 341 else 342 { 343 if (curlen >= min_len) 344 { 345 /* We found a match. */ 346 if (unlikely (fname != NULL)) 347 { 348 fputs_unlocked (fname, stdout); 349 fputs_unlocked (": ", stdout); 350 } 351 352 if (unlikely (radix != radix_none)) 353 printf ((radix == radix_octal ? "%7" PRIo64 " " 354 : (radix == radix_decimal ? "%7" PRId64 " " 355 : "%7" PRIx64 " ")), 356 (int64_t) to - len - (buf - start)); 357 358 if (unlikely (*unprinted != NULL)) 359 { 360 fputs_unlocked (*unprinted, stdout); 361 free (*unprinted); 362 *unprinted = NULL; 363 } 364 365 /* There is no sane way of printing the string. If we 366 assume the file data is encoded in UCS-2/UTF-16 or 367 UCS-4/UTF-32 respectively we could covert the string. 368 But there is no such guarantee. */ 369 fwrite_unlocked (start, 1, buf - start, stdout); 370 putc_unlocked ('\n', stdout); 371 } 372 373 start = ++buf; 374 curlen = 0; 375 376 if (len <= min_len) 377 break; 378 } 379 380 --len; 381 } 382 383 if (curlen != 0) 384 *unprinted = xstrndup ((const char *) start, curlen); 385} 386 387 388static void 389process_chunk (const char *fname, const unsigned char *buf, off_t to, 390 size_t len, char **unprinted) 391{ 392 /* We are not going to slow the check down for the 2- and 4-byte 393 encodings. Handle them special. */ 394 if (unlikely (bytes_per_char != 1)) 395 { 396 process_chunk_mb (fname, buf, to, len, unprinted); 397 return; 398 } 399 400 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted); 401 const unsigned char *start = buf; 402 while (len > 0) 403 { 404 if ((isprint (*buf) || *buf == '\t') && (! char_7bit || *buf <= 127)) 405 { 406 ++buf; 407 ++curlen; 408 } 409 else 410 { 411 if (curlen >= min_len) 412 { 413 /* We found a match. */ 414 if (likely (fname != NULL)) 415 { 416 fputs_unlocked (fname, stdout); 417 fputs_unlocked (": ", stdout); 418 } 419 420 if (likely (radix != radix_none)) 421 printf ((radix == radix_octal ? "%7" PRIo64 " " 422 : (radix == radix_decimal ? "%7" PRId64 " " 423 : "%7" PRIx64 " ")), 424 (int64_t) to - len - (buf - start)); 425 426 if (unlikely (*unprinted != NULL)) 427 { 428 fputs_unlocked (*unprinted, stdout); 429 free (*unprinted); 430 *unprinted = NULL; 431 } 432 fwrite_unlocked (start, 1, buf - start, stdout); 433 putc_unlocked ('\n', stdout); 434 } 435 436 start = ++buf; 437 curlen = 0; 438 439 if (len <= min_len) 440 break; 441 } 442 443 --len; 444 } 445 446 if (curlen != 0) 447 *unprinted = xstrndup ((const char *) start, curlen); 448} 449 450 451/* Map a file in as large chunks as possible. */ 452static void * 453map_file (int fd, off_t start_off, off_t fdlen, size_t *map_sizep) 454{ 455 /* Maximum size we mmap. We use an #ifdef to avoid overflows on 456 32-bit machines. 64-bit machines these days do not have usable 457 address spaces larger than about 43 bits. Not that any file 458 should be that large. */ 459# if SIZE_MAX > 0xffffffff 460 const size_t mmap_max = 0x4000000000lu; 461# else 462 const size_t mmap_max = 0x40000000lu; 463# endif 464 465 /* Try to mmap the file. */ 466 size_t map_size = MIN ((off_t) mmap_max, fdlen); 467 const size_t map_size_min = MAX (MAX (SIZE_MAX / 16, 2 * ps), 468 roundup (2 * min_len_bytes + 1, ps)); 469 void *mem; 470 while (1) 471 { 472 /* We map the memory for reading only here. Since we will 473 always look at every byte of the file it makes sense to 474 use MAP_POPULATE. */ 475 mem = mmap (NULL, map_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE, 476 fd, start_off); 477 if (mem != MAP_FAILED) 478 { 479 /* We will go through the mapping sequentially. */ 480 (void) posix_madvise (mem, map_size, POSIX_MADV_SEQUENTIAL); 481 break; 482 } 483 if (errno != EINVAL && errno != ENOMEM) 484 /* This is an error other than the lack of address space. */ 485 break; 486 487 /* Maybe the size of the mapping is too big. Try again. */ 488 map_size /= 2; 489 if (map_size < map_size_min) 490 /* That size should have fit. */ 491 break; 492 } 493 494 *map_sizep = map_size; 495 return mem; 496} 497 498 499/* Read the file without mapping. */ 500static int 501read_block_no_mmap (int fd, const char *fname, off_t from, off_t fdlen) 502{ 503 char *unprinted = NULL; 504#define CHUNKSIZE 65536 505 unsigned char *buf = xmalloc (CHUNKSIZE + min_len_bytes 506 + bytes_per_char - 1); 507 size_t ntrailer = 0; 508 int result = 0; 509 while (fdlen > 0) 510 { 511 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + ntrailer, 512 MIN (fdlen, CHUNKSIZE))); 513 if (n == 0) 514 { 515 /* There are less than MIN_LEN+1 bytes left so there cannot be 516 another match. */ 517 assert (unprinted == NULL || ntrailer == 0); 518 break; 519 } 520 if (unlikely (n < 0)) 521 { 522 /* Something went wrong. */ 523 result = 1; 524 break; 525 } 526 527 /* Account for the number of bytes read in this round. */ 528 fdlen -= n; 529 530 /* Do not use the signed N value. Note that the addition cannot 531 overflow. */ 532 size_t nb = (size_t) n + ntrailer; 533 if (nb >= min_len_bytes) 534 { 535 /* We only use complete characters. */ 536 nb &= ~(bytes_per_char - 1); 537 538 process_chunk (fname, buf, from + nb, nb, &unprinted); 539 540 /* If the last bytes of the buffer (modulo the character 541 size) have been printed we are not copying them. */ 542 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes; 543 544 memmove (buf, buf + nb - to_keep, to_keep); 545 ntrailer = to_keep; 546 from += nb; 547 } 548 else 549 ntrailer = nb; 550 } 551 552 free (buf); 553 554 /* Don't print anything we collected so far. There is no 555 terminating NUL byte. */ 556 free (unprinted); 557 558 return result; 559} 560 561 562static int 563read_block (int fd, const char *fname, off_t fdlen, off_t from, off_t to) 564{ 565 if (elfmap == NULL) 566 { 567 /* We need a completely new mapping. */ 568 elfmap_off = from & ~(ps - 1); 569 elfmap_base = elfmap = map_file (fd, elfmap_off, fdlen, &elfmap_size); 570 571 if (unlikely (elfmap == MAP_FAILED)) 572 /* Let the kernel know we are going to read everything in sequence. */ 573 (void) posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL); 574 } 575 576 if (unlikely (elfmap == MAP_FAILED)) 577 { 578 /* Read from the file descriptor. For this we must position the 579 read pointer. */ 580 // XXX Eventually add flag which avoids this if the position 581 // XXX is known to match. 582 if (from != 0 && lseek (fd, from, SEEK_SET) != from) 583 error_exit (errno, _("lseek failed")); 584 585 return read_block_no_mmap (fd, fname, from, to - from); 586 } 587 588 assert ((off_t) min_len_bytes < fdlen); 589 590 if (to < (off_t) elfmap_off || from > (off_t) (elfmap_off + elfmap_size)) 591 { 592 /* The existing mapping cannot fit at all. Map the new area. 593 We always map the full range of ELFMAP_SIZE bytes even if 594 this extend beyond the end of the file. The Linux kernel 595 handles this OK if the access pages are not touched. */ 596 elfmap_off = from & ~(ps - 1); 597 if (mmap (elfmap, elfmap_size, PROT_READ, 598 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, from) 599 == MAP_FAILED) 600 error_exit (errno, _("re-mmap failed")); 601 elfmap_base = elfmap; 602 } 603 604 char *unprinted = NULL; 605 606 /* Use the existing mapping as much as possible. If necessary, map 607 new pages. */ 608 if (from >= (off_t) elfmap_off 609 && from < (off_t) (elfmap_off + elfmap_size)) 610 /* There are at least a few bytes in this mapping which we can 611 use. */ 612 process_chunk (fname, elfmap_base + (from - elfmap_off), 613 MIN (to, (off_t) (elfmap_off + elfmap_size)), 614 MIN (to, (off_t) (elfmap_off + elfmap_size)) - from, 615 &unprinted); 616 617 if (to > (off_t) (elfmap_off + elfmap_size)) 618 { 619 unsigned char *remap_base = elfmap_base; 620 size_t read_now = elfmap_size - (elfmap_base - elfmap); 621 622 assert (from >= (off_t) elfmap_off 623 && from < (off_t) (elfmap_off + elfmap_size)); 624 off_t handled_to = elfmap_off + elfmap_size; 625 assert (elfmap == elfmap_base 626 || (elfmap_base - elfmap 627 == (ptrdiff_t) ((min_len_bytes + ps - 1) & ~(ps - 1)))); 628 if (elfmap == elfmap_base) 629 { 630 size_t keep_area = (min_len_bytes + ps - 1) & ~(ps - 1); 631 assert (elfmap_size >= keep_area + ps); 632 /* The keep area is used for the content of the previous 633 buffer we have to keep. This means copying those bytes 634 and for this we have to make the data writable. */ 635 if (unlikely (mprotect (elfmap, keep_area, PROT_READ | PROT_WRITE) 636 != 0)) 637 error_exit (errno, _("mprotect failed")); 638 639 elfmap_base = elfmap + keep_area; 640 } 641 642 while (1) 643 { 644 /* Map the rest of the file, eventually again in pieces. 645 We speed things up with a nice Linux feature. Note 646 that we have at least two pages mapped. */ 647 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes; 648 649 assert (read_now >= to_keep); 650 memmove (elfmap_base - to_keep, 651 remap_base + read_now - to_keep, to_keep); 652 remap_base = elfmap_base; 653 654 assert ((elfmap_size - (elfmap_base - elfmap)) % bytes_per_char 655 == 0); 656 read_now = MIN (to - handled_to, 657 (ptrdiff_t) elfmap_size - (elfmap_base - elfmap)); 658 659 assert (handled_to % ps == 0); 660 assert (handled_to % bytes_per_char == 0); 661 if (mmap (remap_base, read_now, PROT_READ, 662 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, handled_to) 663 == MAP_FAILED) 664 error_exit (errno, _("re-mmap failed")); 665 elfmap_off = handled_to; 666 667 process_chunk (fname, remap_base - to_keep, 668 elfmap_off + (read_now & ~(bytes_per_char - 1)), 669 to_keep + (read_now & ~(bytes_per_char - 1)), 670 &unprinted); 671 handled_to += read_now; 672 if (handled_to >= to) 673 break; 674 } 675 } 676 677 /* Don't print anything we collected so far. There is no 678 terminating NUL byte. */ 679 free (unprinted); 680 681 return 0; 682} 683 684 685static int 686read_fd (int fd, const char *fname, off_t fdlen) 687{ 688 return read_block (fd, fname, fdlen, 0, fdlen); 689} 690 691 692static int 693read_elf (Elf *elf, int fd, const char *fname, off_t fdlen) 694{ 695 assert (fdlen >= 0); 696 697 /* We will look at each section separately. The ELF file is not 698 mmapped. The libelf implementation will load the needed parts on 699 demand. Since we only iterate over the section header table the 700 memory consumption at this stage is kept minimal. */ 701 Elf_Scn *scn = elf_nextscn (elf, NULL); 702 if (scn == NULL) 703 return read_fd (fd, fname, fdlen); 704 705 int result = 0; 706 do 707 { 708 GElf_Shdr shdr_mem; 709 GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem); 710 711 /* Only look in sections which are loaded at runtime and 712 actually have content. */ 713 if (shdr != NULL && shdr->sh_type != SHT_NOBITS 714 && (shdr->sh_flags & SHF_ALLOC) != 0) 715 { 716 if (shdr->sh_offset > (Elf64_Off) fdlen 717 || fdlen - shdr->sh_offset < shdr->sh_size) 718 { 719 size_t strndx = 0; 720 const char *sname; 721 if (unlikely (elf_getshdrstrndx (elf, &strndx) < 0)) 722 sname = "<unknown>"; 723 else 724 sname = elf_strptr (elf, strndx, shdr->sh_name) ?: "<unknown>"; 725 error (0, 0, 726 _("Skipping section %zd '%s' data outside file"), 727 elf_ndxscn (scn), sname); 728 result = 1; 729 } 730 else 731 result |= read_block (fd, fname, fdlen, shdr->sh_offset, 732 shdr->sh_offset + shdr->sh_size); 733 } 734 } 735 while ((scn = elf_nextscn (elf, scn)) != NULL); 736 737 if (elfmap != NULL && elfmap != MAP_FAILED) 738 munmap (elfmap, elfmap_size); 739 elfmap = NULL; 740 741 return result; 742} 743 744 745#include "debugpred.h" 746