blob: 03d0f1338374743af650094691ba92da10a52ccb [file] [log] [blame]
Brian Silverman86497922018-02-10 19:28:39 -05001/* Print the strings of printable characters in files.
2 Copyright (C) 2005-2010, 2012, 2014 Red Hat, Inc.
3 This file is part of elfutils.
4 Written by Ulrich Drepper <drepper@redhat.com>, 2005.
5
6 This file is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 elfutils is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18
19#ifdef HAVE_CONFIG_H
20# include <config.h>
21#endif
22
23#include <argp.h>
24#include <assert.h>
25#include <ctype.h>
26#include <endian.h>
27#include <errno.h>
28#include <error.h>
29#include <fcntl.h>
30#include <gelf.h>
31#include <inttypes.h>
32#include <libintl.h>
33#include <locale.h>
34#include <stdbool.h>
35#include <stdio.h>
36#include <stdio_ext.h>
37#include <stdlib.h>
38#include <string.h>
39#include <unistd.h>
40#include <sys/mman.h>
41#include <sys/stat.h>
42
43#include <libeu.h>
44#include <system.h>
45#include <printversion.h>
46
47#ifndef MAP_POPULATE
48# define MAP_POPULATE 0
49#endif
50
51
52/* Prototypes of local functions. */
53static int read_fd (int fd, const char *fname, off_t fdlen);
54static int read_elf (Elf *elf, int fd, const char *fname, off_t fdlen);
55
56
57/* Name and version of program. */
58ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
59
60/* Bug report address. */
61ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
62
63/* Definitions of arguments for argp functions. */
64static const struct argp_option options[] =
65{
66 { NULL, 0, NULL, 0, N_("Output Selection:"), 0 },
67 { "all", 'a', NULL, 0, N_("Scan entire file, not only loaded sections"), 0 },
68 { "bytes", 'n', "MIN-LEN", 0,
69 N_("Only NUL-terminated sequences of MIN-LEN characters or more are printed"), 0 },
70 { "encoding", 'e', "SELECTOR", 0, N_("\
71Select character size and endianess: s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit"),
72 0},
73 { "print-file-name", 'f', NULL, 0,
74 N_("Print name of the file before each string."), 0 },
75 { "radix", 't', "{o,d,x}", 0,
76 N_("Print location of the string in base 8, 10, or 16 respectively."), 0 },
77 { NULL, 'o', NULL, 0, N_("Alias for --radix=o"), 0 },
78
79 { NULL, 0, NULL, 0, N_("Miscellaneous:"), 0 },
80 { NULL, 0, NULL, 0, NULL, 0 }
81};
82
83/* Short description of program. */
84static const char doc[] = N_("\
85Print the strings of printable characters in files.");
86
87/* Strings for arguments in help texts. */
88static const char args_doc[] = N_("[FILE...]");
89
90/* Prototype for option handler. */
91static error_t parse_opt (int key, char *arg, struct argp_state *state);
92
93/* Data structure to communicate with argp functions. */
94static struct argp argp =
95{
96 options, parse_opt, args_doc, doc, NULL, NULL, NULL
97};
98
99
100/* Global variables. */
101
102/* True if whole file and not only loaded sections are looked at. */
103static bool entire_file;
104
105/* Minimum length of any sequence reported. */
106static size_t min_len = 4;
107
108/* Number of bytes per character. */
109static size_t bytes_per_char = 1;
110
111/* Minimum length of any sequence reported in bytes. */
112static size_t min_len_bytes;
113
114/* True if multibyte characters are in big-endian order. */
115static bool big_endian;
116
117/* True unless 7-bit ASCII are expected. */
118static bool char_7bit;
119
120/* True if file names should be printed before strings. */
121static bool print_file_name;
122
123/* Radix for printed numbers. */
124static enum
125{
126 radix_none = 0,
127 radix_decimal,
128 radix_hex,
129 radix_octal
130} radix = radix_none;
131
132
133/* Page size in use. */
134static size_t ps;
135
136
137/* Mapped parts of the ELF file. */
138static unsigned char *elfmap;
139static unsigned char *elfmap_base;
140static size_t elfmap_size;
141static off_t elfmap_off;
142
143
144int
145main (int argc, char *argv[])
146{
147 /* We use no threads. */
148 __fsetlocking (stdin, FSETLOCKING_BYCALLER);
149 __fsetlocking (stdout, FSETLOCKING_BYCALLER);
150
151 /* Set locale. */
152 (void) setlocale (LC_ALL, "");
153
154 /* Make sure the message catalog can be found. */
155 (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
156
157 /* Initialize the message catalog. */
158 (void) textdomain (PACKAGE_TARNAME);
159
160 /* Parse and process arguments. */
161 int remaining;
162 (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL);
163
164 /* Tell the library which version we are expecting. */
165 elf_version (EV_CURRENT);
166
167 /* Determine the page size. We will likely need it a couple of times. */
168 ps = sysconf (_SC_PAGESIZE);
169
170 struct stat st;
171 int result = 0;
172 if (remaining == argc)
173 /* We read from standard input. This we cannot do for a
174 structured file. */
175 result = read_fd (STDIN_FILENO,
176 print_file_name ? "{standard input}" : NULL,
177 (fstat (STDIN_FILENO, &st) == 0 && S_ISREG (st.st_mode))
178 ? st.st_size : INT64_C (0x7fffffffffffffff));
179 else
180 do
181 {
182 int fd = (strcmp (argv[remaining], "-") == 0
183 ? STDIN_FILENO : open (argv[remaining], O_RDONLY));
184 if (unlikely (fd == -1))
185 {
186 error (0, errno, gettext ("cannot open '%s'"), argv[remaining]);
187 result = 1;
188 }
189 else
190 {
191 const char *fname = print_file_name ? argv[remaining] : NULL;
192 int fstat_fail = fstat (fd, &st);
193 off_t fdlen = (fstat_fail
194 ? INT64_C (0x7fffffffffffffff) : st.st_size);
195 if (fdlen > (off_t) min_len_bytes)
196 {
197 Elf *elf = NULL;
198 if (entire_file
199 || fstat_fail
200 || !S_ISREG (st.st_mode)
201 || (elf = elf_begin (fd, ELF_C_READ, NULL)) == NULL
202 || elf_kind (elf) != ELF_K_ELF)
203 result |= read_fd (fd, fname, fdlen);
204 else
205 result |= read_elf (elf, fd, fname, fdlen);
206
207 /* This call will succeed even if ELF is NULL. */
208 elf_end (elf);
209 }
210
211 if (strcmp (argv[remaining], "-") != 0)
212 close (fd);
213 }
214
215 if (elfmap != NULL && elfmap != MAP_FAILED)
216 munmap (elfmap, elfmap_size);
217 elfmap = NULL;
218 }
219 while (++remaining < argc);
220
221 return result;
222}
223
224
225/* Handle program arguments. */
226static error_t
227parse_opt (int key, char *arg,
228 struct argp_state *state __attribute__ ((unused)))
229{
230 switch (key)
231 {
232 case 'a':
233 entire_file = true;
234 break;
235
236 case 'e':
237 /* We expect a string of one character. */
238 switch (arg[1] != '\0' ? '\0' : arg[0])
239 {
240 case 's':
241 case 'S':
242 char_7bit = arg[0] == 's';
243 bytes_per_char = 1;
244 break;
245
246 case 'b':
247 case 'B':
248 big_endian = true;
249 FALLTHROUGH;
250
251 case 'l':
252 case 'L':
253 bytes_per_char = isupper (arg[0]) ? 4 : 2;
254 break;
255
256 default:
257 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
258 arg, "-e");
259 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
260 return ARGP_ERR_UNKNOWN;
261 }
262 break;
263
264 case 'f':
265 print_file_name = true;
266 break;
267
268 case 'n':
269 min_len = atoi (arg);
270 break;
271
272 case 'o':
273 goto octfmt;
274
275 case 't':
276 switch (arg[0])
277 {
278 case 'd':
279 radix = radix_decimal;
280 break;
281
282 case 'o':
283 octfmt:
284 radix = radix_octal;
285 break;
286
287 case 'x':
288 radix = radix_hex;
289 break;
290
291 default:
292 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
293 arg, "-t");
294 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
295 return ARGP_ERR_UNKNOWN;
296 }
297 break;
298
299 case ARGP_KEY_FINI:
300 /* Compute the length in bytes of any match. */
301 if (min_len <= 0 || min_len > INT_MAX / bytes_per_char)
302 error (EXIT_FAILURE, 0,
303 gettext ("invalid minimum length of matched string size"));
304 min_len_bytes = min_len * bytes_per_char;
305 break;
306
307 default:
308 return ARGP_ERR_UNKNOWN;
309 }
310 return 0;
311}
312
313
314static void
315process_chunk_mb (const char *fname, const unsigned char *buf, off_t to,
316 size_t len, char **unprinted)
317{
318 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
319 const unsigned char *start = buf;
320 while (len >= bytes_per_char)
321 {
322 uint32_t ch;
323
324 if (bytes_per_char == 2)
325 {
326 if (big_endian)
327 ch = buf[0] << 8 | buf[1];
328 else
329 ch = buf[1] << 8 | buf[0];
330 }
331 else
332 {
333 if (big_endian)
334 ch = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
335 else
336 ch = buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0];
337 }
338
339 if (ch <= 255 && (isprint (ch) || ch == '\t'))
340 {
341 ++buf;
342 ++curlen;
343 }
344 else
345 {
346 if (curlen >= min_len)
347 {
348 /* We found a match. */
349 if (unlikely (fname != NULL))
350 {
351 fputs_unlocked (fname, stdout);
352 fputs_unlocked (": ", stdout);
353 }
354
355 if (unlikely (radix != radix_none))
356 printf ((radix == radix_octal ? "%7" PRIo64 " "
357 : (radix == radix_decimal ? "%7" PRId64 " "
358 : "%7" PRIx64 " ")),
359 (int64_t) to - len - (buf - start));
360
361 if (unlikely (*unprinted != NULL))
362 {
363 fputs_unlocked (*unprinted, stdout);
364 free (*unprinted);
365 *unprinted = NULL;
366 }
367
368 /* There is no sane way of printing the string. If we
369 assume the file data is encoded in UCS-2/UTF-16 or
370 UCS-4/UTF-32 respectively we could covert the string.
371 But there is no such guarantee. */
372 fwrite_unlocked (start, 1, buf - start, stdout);
373 putc_unlocked ('\n', stdout);
374 }
375
376 start = ++buf;
377 curlen = 0;
378
379 if (len <= min_len)
380 break;
381 }
382
383 --len;
384 }
385
386 if (curlen != 0)
387 *unprinted = xstrndup ((const char *) start, curlen);
388}
389
390
391static void
392process_chunk (const char *fname, const unsigned char *buf, off_t to,
393 size_t len, char **unprinted)
394{
395 /* We are not going to slow the check down for the 2- and 4-byte
396 encodings. Handle them special. */
397 if (unlikely (bytes_per_char != 1))
398 {
399 process_chunk_mb (fname, buf, to, len, unprinted);
400 return;
401 }
402
403 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
404 const unsigned char *start = buf;
405 while (len > 0)
406 {
407 if ((isprint (*buf) || *buf == '\t') && (! char_7bit || *buf <= 127))
408 {
409 ++buf;
410 ++curlen;
411 }
412 else
413 {
414 if (curlen >= min_len)
415 {
416 /* We found a match. */
417 if (likely (fname != NULL))
418 {
419 fputs_unlocked (fname, stdout);
420 fputs_unlocked (": ", stdout);
421 }
422
423 if (likely (radix != radix_none))
424 printf ((radix == radix_octal ? "%7" PRIo64 " "
425 : (radix == radix_decimal ? "%7" PRId64 " "
426 : "%7" PRIx64 " ")),
427 (int64_t) to - len - (buf - start));
428
429 if (unlikely (*unprinted != NULL))
430 {
431 fputs_unlocked (*unprinted, stdout);
432 free (*unprinted);
433 *unprinted = NULL;
434 }
435 fwrite_unlocked (start, 1, buf - start, stdout);
436 putc_unlocked ('\n', stdout);
437 }
438
439 start = ++buf;
440 curlen = 0;
441
442 if (len <= min_len)
443 break;
444 }
445
446 --len;
447 }
448
449 if (curlen != 0)
450 *unprinted = xstrndup ((const char *) start, curlen);
451}
452
453
454/* Map a file in as large chunks as possible. */
455static void *
456map_file (int fd, off_t start_off, off_t fdlen, size_t *map_sizep)
457{
458 /* Maximum size we mmap. We use an #ifdef to avoid overflows on
459 32-bit machines. 64-bit machines these days do not have usable
460 address spaces larger than about 43 bits. Not that any file
461 should be that large. */
462# if SIZE_MAX > 0xffffffff
463 const size_t mmap_max = 0x4000000000lu;
464# else
465 const size_t mmap_max = 0x40000000lu;
466# endif
467
468 /* Try to mmap the file. */
469 size_t map_size = MIN ((off_t) mmap_max, fdlen);
470 const size_t map_size_min = MAX (MAX (SIZE_MAX / 16, 2 * ps),
471 roundup (2 * min_len_bytes + 1, ps));
472 void *mem;
473 while (1)
474 {
475 /* We map the memory for reading only here. Since we will
476 always look at every byte of the file it makes sense to
477 use MAP_POPULATE. */
478 mem = mmap (NULL, map_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE,
479 fd, start_off);
480 if (mem != MAP_FAILED)
481 {
482 /* We will go through the mapping sequentially. */
483 (void) posix_madvise (mem, map_size, POSIX_MADV_SEQUENTIAL);
484 break;
485 }
486 if (errno != EINVAL && errno != ENOMEM)
487 /* This is an error other than the lack of address space. */
488 break;
489
490 /* Maybe the size of the mapping is too big. Try again. */
491 map_size /= 2;
492 if (map_size < map_size_min)
493 /* That size should have fit. */
494 break;
495 }
496
497 *map_sizep = map_size;
498 return mem;
499}
500
501
502/* Read the file without mapping. */
503static int
504read_block_no_mmap (int fd, const char *fname, off_t from, off_t fdlen)
505{
506 char *unprinted = NULL;
507#define CHUNKSIZE 65536
508 unsigned char *buf = xmalloc (CHUNKSIZE + min_len_bytes
509 + bytes_per_char - 1);
510 size_t ntrailer = 0;
511 int result = 0;
512 while (fdlen > 0)
513 {
514 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + ntrailer,
515 MIN (fdlen, CHUNKSIZE)));
516 if (n == 0)
517 {
518 /* There are less than MIN_LEN+1 bytes left so there cannot be
519 another match. */
520 assert (unprinted == NULL || ntrailer == 0);
521 break;
522 }
523 if (unlikely (n < 0))
524 {
525 /* Something went wrong. */
526 result = 1;
527 break;
528 }
529
530 /* Account for the number of bytes read in this round. */
531 fdlen -= n;
532
533 /* Do not use the signed N value. Note that the addition cannot
534 overflow. */
535 size_t nb = (size_t) n + ntrailer;
536 if (nb >= min_len_bytes)
537 {
538 /* We only use complete characters. */
539 nb &= ~(bytes_per_char - 1);
540
541 process_chunk (fname, buf, from + nb, nb, &unprinted);
542
543 /* If the last bytes of the buffer (modulo the character
544 size) have been printed we are not copying them. */
545 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
546
547 memmove (buf, buf + nb - to_keep, to_keep);
548 ntrailer = to_keep;
549 from += nb;
550 }
551 else
552 ntrailer = nb;
553 }
554
555 free (buf);
556
557 /* Don't print anything we collected so far. There is no
558 terminating NUL byte. */
559 free (unprinted);
560
561 return result;
562}
563
564
565static int
566read_block (int fd, const char *fname, off_t fdlen, off_t from, off_t to)
567{
568 if (elfmap == NULL)
569 {
570 /* We need a completely new mapping. */
571 elfmap_off = from & ~(ps - 1);
572 elfmap_base = elfmap = map_file (fd, elfmap_off, fdlen, &elfmap_size);
573
574 if (unlikely (elfmap == MAP_FAILED))
575 /* Let the kernel know we are going to read everything in sequence. */
576 (void) posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
577 }
578
579 if (unlikely (elfmap == MAP_FAILED))
580 {
581 /* Read from the file descriptor. For this we must position the
582 read pointer. */
583 // XXX Eventually add flag which avoids this if the position
584 // XXX is known to match.
585 if (from != 0 && lseek (fd, from, SEEK_SET) != from)
586 error (EXIT_FAILURE, errno, gettext ("lseek failed"));
587
588 return read_block_no_mmap (fd, fname, from, to - from);
589 }
590
591 assert ((off_t) min_len_bytes < fdlen);
592
593 if (to < (off_t) elfmap_off || from > (off_t) (elfmap_off + elfmap_size))
594 {
595 /* The existing mapping cannot fit at all. Map the new area.
596 We always map the full range of ELFMAP_SIZE bytes even if
597 this extend beyond the end of the file. The Linux kernel
598 handles this OK if the access pages are not touched. */
599 elfmap_off = from & ~(ps - 1);
600 if (mmap (elfmap, elfmap_size, PROT_READ,
601 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, from)
602 == MAP_FAILED)
603 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
604 elfmap_base = elfmap;
605 }
606
607 char *unprinted = NULL;
608
609 /* Use the existing mapping as much as possible. If necessary, map
610 new pages. */
611 if (from >= (off_t) elfmap_off
612 && from < (off_t) (elfmap_off + elfmap_size))
613 /* There are at least a few bytes in this mapping which we can
614 use. */
615 process_chunk (fname, elfmap_base + (from - elfmap_off),
616 MIN (to, (off_t) (elfmap_off + elfmap_size)),
617 MIN (to, (off_t) (elfmap_off + elfmap_size)) - from,
618 &unprinted);
619
620 if (to > (off_t) (elfmap_off + elfmap_size))
621 {
622 unsigned char *remap_base = elfmap_base;
623 size_t read_now = elfmap_size - (elfmap_base - elfmap);
624
625 assert (from >= (off_t) elfmap_off
626 && from < (off_t) (elfmap_off + elfmap_size));
627 off_t handled_to = elfmap_off + elfmap_size;
628 assert (elfmap == elfmap_base
629 || (elfmap_base - elfmap
630 == (ptrdiff_t) ((min_len_bytes + ps - 1) & ~(ps - 1))));
631 if (elfmap == elfmap_base)
632 {
633 size_t keep_area = (min_len_bytes + ps - 1) & ~(ps - 1);
634 assert (elfmap_size >= keep_area + ps);
635 /* The keep area is used for the content of the previous
636 buffer we have to keep. This means copying those bytes
637 and for this we have to make the data writable. */
638 if (unlikely (mprotect (elfmap, keep_area, PROT_READ | PROT_WRITE)
639 != 0))
640 error (EXIT_FAILURE, errno, gettext ("mprotect failed"));
641
642 elfmap_base = elfmap + keep_area;
643 }
644
645 while (1)
646 {
647 /* Map the rest of the file, eventually again in pieces.
648 We speed things up with a nice Linux feature. Note
649 that we have at least two pages mapped. */
650 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
651
652 assert (read_now >= to_keep);
653 memmove (elfmap_base - to_keep,
654 remap_base + read_now - to_keep, to_keep);
655 remap_base = elfmap_base;
656
657 assert ((elfmap_size - (elfmap_base - elfmap)) % bytes_per_char
658 == 0);
659 read_now = MIN (to - handled_to,
660 (ptrdiff_t) elfmap_size - (elfmap_base - elfmap));
661
662 assert (handled_to % ps == 0);
663 assert (handled_to % bytes_per_char == 0);
664 if (mmap (remap_base, read_now, PROT_READ,
665 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, handled_to)
666 == MAP_FAILED)
667 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
668 elfmap_off = handled_to;
669
670 process_chunk (fname, remap_base - to_keep,
671 elfmap_off + (read_now & ~(bytes_per_char - 1)),
672 to_keep + (read_now & ~(bytes_per_char - 1)),
673 &unprinted);
674 handled_to += read_now;
675 if (handled_to >= to)
676 break;
677 }
678 }
679
680 /* Don't print anything we collected so far. There is no
681 terminating NUL byte. */
682 free (unprinted);
683
684 return 0;
685}
686
687
688static int
689read_fd (int fd, const char *fname, off_t fdlen)
690{
691 return read_block (fd, fname, fdlen, 0, fdlen);
692}
693
694
695static int
696read_elf (Elf *elf, int fd, const char *fname, off_t fdlen)
697{
698 assert (fdlen >= 0);
699
700 /* We will look at each section separately. The ELF file is not
701 mmapped. The libelf implementation will load the needed parts on
702 demand. Since we only interate over the section header table the
703 memory consumption at this stage is kept minimal. */
704 Elf_Scn *scn = elf_nextscn (elf, NULL);
705 if (scn == NULL)
706 return read_fd (fd, fname, fdlen);
707
708 int result = 0;
709 do
710 {
711 GElf_Shdr shdr_mem;
712 GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
713
714 /* Only look in sections which are loaded at runtime and
715 actually have content. */
716 if (shdr != NULL && shdr->sh_type != SHT_NOBITS
717 && (shdr->sh_flags & SHF_ALLOC) != 0)
718 {
719 if (shdr->sh_offset > (Elf64_Off) fdlen
720 || fdlen - shdr->sh_offset < shdr->sh_size)
721 {
722 size_t strndx = 0;
723 const char *sname;
724 if (unlikely (elf_getshdrstrndx (elf, &strndx) < 0))
725 sname = "<unknown>";
726 else
727 sname = elf_strptr (elf, strndx, shdr->sh_name) ?: "<unknown>";
728 error (0, 0,
729 gettext ("Skipping section %zd '%s' data outside file"),
730 elf_ndxscn (scn), sname);
731 result = 1;
732 }
733 else
734 result |= read_block (fd, fname, fdlen, shdr->sh_offset,
735 shdr->sh_offset + shdr->sh_size);
736 }
737 }
738 while ((scn = elf_nextscn (elf, scn)) != NULL);
739
740 if (elfmap != NULL && elfmap != MAP_FAILED)
741 munmap (elfmap, elfmap_size);
742 elfmap = NULL;
743
744 return result;
745}
746
747
748#include "debugpred.h"