xref: /third_party/elfutils/libdwfl/core-file.c (revision da0c48c4)
1/* Core file handling.
2   Copyright (C) 2008-2010, 2013, 2015 Red Hat, Inc.
3   Copyright (C) 2021 Mark J. Wielaard <mark@klomp.org>
4   This file is part of elfutils.
5
6   This file is free software; you can redistribute it and/or modify
7   it under the terms of either
8
9     * the GNU Lesser General Public License as published by the Free
10       Software Foundation; either version 3 of the License, or (at
11       your option) any later version
12
13   or
14
15     * the GNU General Public License as published by the Free
16       Software Foundation; either version 2 of the License, or (at
17       your option) any later version
18
19   or both in parallel, as here.
20
21   elfutils is distributed in the hope that it will be useful, but
22   WITHOUT ANY WARRANTY; without even the implied warranty of
23   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
24   General Public License for more details.
25
26   You should have received copies of the GNU General Public License and
27   the GNU Lesser General Public License along with this program.  If
28   not, see <http://www.gnu.org/licenses/>.  */
29
30#include <config.h>
31#include "../libelf/libelfP.h"	/* For NOTE_ALIGN.  */
32#undef	_
33#include "libdwflP.h"
34#include <gelf.h>
35
36/* On failure return, we update *NEXT to point back at OFFSET.  */
37static inline Elf *
38do_fail (int error, off_t *next, off_t offset)
39{
40    if (next != NULL)
41      *next = offset;
42    //__libelf_seterrno (error);
43    __libdwfl_seterrno (DWFL_E (LIBELF, error));
44    return NULL;
45}
46
47#define fail(error) do_fail (error, next, offset)
48
49/* This is a prototype of what a new libelf interface might be.
50   This implementation is pessimal for non-mmap cases and should
51   be replaced by more diddling inside libelf internals.  */
52static Elf *
53elf_begin_rand (Elf *parent, off_t offset, off_t size, off_t *next)
54{
55  if (parent == NULL)
56    return NULL;
57
58  off_t min = (parent->kind == ELF_K_ELF ?
59		(parent->class == ELFCLASS32
60		 ? sizeof (Elf32_Ehdr) : sizeof (Elf64_Ehdr))
61		: parent->kind == ELF_K_AR ? SARMAG
62		: 0);
63
64  if (unlikely (offset < min)
65      || unlikely (offset >= (off_t) parent->maximum_size))
66    return fail (ELF_E_RANGE);
67
68  /* For an archive, fetch just the size field
69     from the archive header to override SIZE.  */
70  if (parent->kind == ELF_K_AR)
71    {
72      /* File size, in ASCII decimal, right-padded with ASCII spaces.
73         Max 10 characters. Not zero terminated. So make this ar_size
74         array one larger and explicitly zero terminate it.  As needed
75         for strtoll.  */
76      #define AR_SIZE_CHARS 10
77      char ar_size[AR_SIZE_CHARS + 1];
78      ar_size[AR_SIZE_CHARS] = '\0';
79
80      if (unlikely (parent->maximum_size - offset < sizeof (struct ar_hdr)))
81	return fail (ELF_E_RANGE);
82
83      if (parent->map_address != NULL)
84	memcpy (ar_size, parent->map_address + parent->start_offset + offset,
85		AR_SIZE_CHARS);
86      else if (unlikely (pread_retry (parent->fildes,
87				      ar_size, AR_SIZE_CHARS,
88				      parent->start_offset + offset
89				      + offsetof (struct ar_hdr, ar_size))
90			 != AR_SIZE_CHARS))
91	return fail (ELF_E_READ_ERROR);
92
93      offset += sizeof (struct ar_hdr);
94
95      char *endp;
96      size = strtoll (ar_size, &endp, 10);
97      if (unlikely (endp == ar_size)
98	  || unlikely ((off_t) parent->maximum_size - offset < size))
99	return fail (ELF_E_INVALID_ARCHIVE);
100    }
101
102  if (unlikely ((off_t) parent->maximum_size - offset < size))
103    return fail (ELF_E_RANGE);
104
105  /* Even if we fail at this point, update *NEXT to point past the file.  */
106  if (next != NULL)
107    *next = offset + size;
108
109  if (unlikely (offset == 0)
110      && unlikely (size == (off_t) parent->maximum_size))
111    return elf_clone (parent, parent->cmd);
112
113  /* Note the image is guaranteed live only as long as PARENT
114     lives.  Using elf_memory is quite suboptimal if the whole
115     file is not mmap'd.  We really should have something like
116     a generalization of the archive support.  */
117  Elf_Data *data = elf_getdata_rawchunk (parent, offset, size, ELF_T_BYTE);
118  if (data == NULL)
119    return NULL;
120  assert ((off_t) data->d_size == size);
121  return elf_memory (data->d_buf, size);
122}
123
124
125int
126dwfl_report_core_segments (Dwfl *dwfl, Elf *elf, size_t phnum, GElf_Phdr *notes)
127{
128  if (unlikely (dwfl == NULL))
129    return -1;
130
131  int result = 0;
132
133  if (notes != NULL)
134    notes->p_type = PT_NULL;
135
136  for (size_t ndx = 0; result >= 0 && ndx < phnum; ++ndx)
137    {
138      GElf_Phdr phdr_mem;
139      GElf_Phdr *phdr = gelf_getphdr (elf, ndx, &phdr_mem);
140      if (unlikely (phdr == NULL))
141	{
142	  __libdwfl_seterrno (DWFL_E_LIBELF);
143	  return -1;
144	}
145      switch (phdr->p_type)
146	{
147	case PT_LOAD:
148	  result = dwfl_report_segment (dwfl, ndx, phdr, 0, NULL);
149	  break;
150
151	case PT_NOTE:
152	  if (notes != NULL)
153	    {
154	      *notes = *phdr;
155	      notes = NULL;
156	    }
157	  break;
158	}
159    }
160
161  return result;
162}
163
164/* Never read more than this much without mmap.  */
165#define MAX_EAGER_COST	8192
166
167/* Dwfl_Module_Callback passed to and called by dwfl_segment_report_module
168   to read in a segment as ELF image directly if possible or indicate an
169   attempt must be made to read in the while segment right now.  */
170static bool
171core_file_read_eagerly (Dwfl_Module *mod,
172			void **userdata __attribute__ ((unused)),
173			const char *name __attribute__ ((unused)),
174			Dwarf_Addr start __attribute__ ((unused)),
175			void **buffer, size_t *buffer_available,
176			GElf_Off cost, GElf_Off worthwhile,
177			GElf_Off whole,
178			GElf_Off contiguous __attribute__ ((unused)),
179			void *arg, Elf **elfp)
180{
181  Elf *core = arg;
182
183  /* The available buffer is often the whole segment when the core file
184     was mmap'd if used together with the dwfl_elf_phdr_memory_callback.
185     Which means that if it is complete we can just construct the whole
186     ELF image right now without having to read in anything more.  */
187  if (whole <= *buffer_available)
188    {
189      /* All there ever was, we already have on hand.  */
190
191      if (core->map_address == NULL)
192	{
193	  /* We already malloc'd the buffer.  */
194	  *elfp = elf_memory (*buffer, whole);
195	  if (unlikely (*elfp == NULL))
196	    return false;
197
198	  (*elfp)->flags |= ELF_F_MALLOCED;
199	  *buffer = NULL;
200	  *buffer_available = 0;
201	  return true;
202	}
203
204      /* We can use the image inside the core file directly.  */
205      *elfp = elf_begin_rand (core, *buffer - core->map_address, whole, NULL);
206      *buffer = NULL;
207      *buffer_available = 0;
208      return *elfp != NULL;
209    }
210
211  /* We don't have the whole file.  Which either means the core file
212     wasn't mmap'd, but needs to still be read in, or that the segment
213     is truncated.  Figure out if this is better than nothing.  */
214
215  if (worthwhile == 0)
216    /* Caller doesn't think so.  */
217    return false;
218
219  /*
220    XXX would like to fall back to partial file via memory
221    when build id find_elf fails
222    also, link_map name may give file name from disk better than partial here
223    requires find_elf hook re-doing the magic to fall back if no file found
224  */
225
226  if (whole > MAX_EAGER_COST && mod->build_id_len > 0)
227    /* We can't cheaply read the whole file here, so we'd
228       be using a partial file.  But there is a build ID that could
229       help us find the whole file, which might be more useful than
230       what we have.  We'll just rely on that.  */
231    return false;
232
233  /* The file is either small (most likely the vdso) or big and incomplete,
234     but we don't have a build-id.  */
235
236  if (core->map_address != NULL)
237    /* It's cheap to get, so get it.  */
238    return true;
239
240  /* Only use it if there isn't too much to be read.  */
241  return cost <= MAX_EAGER_COST;
242}
243
244static inline void
245update_end (GElf_Phdr *pphdr, const GElf_Off align,
246            GElf_Off *pend, GElf_Addr *pend_vaddr)
247{
248  *pend = (pphdr->p_offset + pphdr->p_filesz + align - 1) & -align;
249  *pend_vaddr = (pphdr->p_vaddr + pphdr->p_memsz + align - 1) & -align;
250}
251
252/* Use following contiguous segments to get towards SIZE.  */
253static inline bool
254do_more (size_t size, GElf_Phdr *pphdr, const GElf_Off align,
255         Elf *elf, GElf_Off start, int *pndx,
256         GElf_Off *pend, GElf_Addr *pend_vaddr)
257{
258  while (*pend <= start || *pend - start < size)
259    {
260      if (pphdr->p_filesz < pphdr->p_memsz)
261	/* This segment is truncated, so no following one helps us.  */
262	return false;
263
264      if (unlikely (gelf_getphdr (elf, (*pndx)++, pphdr) == NULL))
265	return false;
266
267      if (pphdr->p_type == PT_LOAD)
268	{
269	  if (pphdr->p_offset > *pend
270	      || pphdr->p_vaddr > *pend_vaddr)
271	    /* It's discontiguous!  */
272	    return false;
273
274	  update_end (pphdr, align, pend, pend_vaddr);
275	}
276    }
277  return true;
278}
279
280#define more(size) do_more (size, &phdr, align, elf, start, &ndx, &end, &end_vaddr)
281
282bool
283dwfl_elf_phdr_memory_callback (Dwfl *dwfl, int ndx,
284			       void **buffer, size_t *buffer_available,
285			       GElf_Addr vaddr,
286			       size_t minread,
287			       void *arg)
288{
289  Elf *elf = arg;
290
291  if (ndx == -1)
292    {
293      /* Called for cleanup.  */
294      if (elf->map_address == NULL)
295	free (*buffer);
296      *buffer = NULL;
297      *buffer_available = 0;
298      return false;
299    }
300
301  const GElf_Off align = dwfl->segment_align ?: 1;
302  GElf_Phdr phdr;
303
304  do
305    if (unlikely (gelf_getphdr (elf, ndx++, &phdr) == NULL))
306      return false;
307  while (phdr.p_type != PT_LOAD
308	 || ((phdr.p_vaddr + phdr.p_memsz + align - 1) & -align) <= vaddr);
309
310  GElf_Off start = vaddr - phdr.p_vaddr + phdr.p_offset;
311  GElf_Off end;
312  GElf_Addr end_vaddr;
313
314  update_end (&phdr, align, &end, &end_vaddr);
315
316  /* We need at least this much.  */
317  if (! more (minread))
318    return false;
319
320  /* See how much more we can get of what the caller wants.  */
321  (void) more (*buffer_available);
322
323  /* If it's already on hand anyway, use as much as there is.  */
324  if (elf->map_address != NULL && start < elf->maximum_size)
325    (void) more (elf->maximum_size - start);
326
327  /* Make sure we don't look past the end of the actual file,
328     even if the headers tell us to.  */
329  if (unlikely (end > elf->maximum_size))
330    end = elf->maximum_size;
331
332  /* If the file is too small, there is nothing at all to get.  */
333  if (unlikely (start >= end))
334    return false;
335
336  if (end - start < minread)
337    return false;
338
339  if (elf->map_address != NULL)
340    {
341      void *contents = elf->map_address + elf->start_offset + start;
342      size_t size = end - start;
343
344      if (minread == 0)		/* String mode.  */
345	{
346	  const void *eos = memchr (contents, '\0', size);
347	  if (unlikely (eos == NULL) || unlikely (eos == contents))
348	    return false;
349	  size = eos + 1 - contents;
350	}
351
352      if (*buffer == NULL)
353	{
354	  *buffer = contents;
355	  *buffer_available = size;
356	}
357      else
358	{
359	  *buffer_available = MIN (size, *buffer_available);
360	  memcpy (*buffer, contents, *buffer_available);
361	}
362    }
363  else
364    {
365      void *into = *buffer;
366      if (*buffer == NULL)
367	{
368	  *buffer_available = MIN (minread ?: 512,
369				   MAX (4096, MIN (end - start,
370						   *buffer_available)));
371	  into = malloc (*buffer_available);
372	  if (unlikely (into == NULL))
373	    {
374	      __libdwfl_seterrno (DWFL_E_NOMEM);
375	      return false;
376	    }
377	}
378
379      ssize_t nread = pread_retry (elf->fildes, into, *buffer_available, start);
380      if (nread < (ssize_t) minread)
381	{
382	  if (into != *buffer)
383	    free (into);
384	  if (nread < 0)
385	    __libdwfl_seterrno (DWFL_E_ERRNO);
386	  return false;
387	}
388
389      if (minread == 0)		/* String mode.  */
390	{
391	  const void *eos = memchr (into, '\0', nread);
392	  if (unlikely (eos == NULL) || unlikely (eos == into))
393	    {
394	      if (*buffer == NULL)
395		free (into);
396	      return false;
397	    }
398	  nread = eos + 1 - into;
399	}
400
401      if (*buffer == NULL)
402	*buffer = into;
403      *buffer_available = nread;
404    }
405
406  return true;
407}
408
409/* Free the contents of R_DEBUG_INFO without the R_DEBUG_INFO memory itself.  */
410
411static void
412clear_r_debug_info (struct r_debug_info *r_debug_info)
413{
414  while (r_debug_info->module != NULL)
415    {
416      struct r_debug_info_module *module = r_debug_info->module;
417      r_debug_info->module = module->next;
418      elf_end (module->elf);
419      if (module->fd != -1)
420	close (module->fd);
421      free (module);
422    }
423}
424
425bool
426internal_function
427__libdwfl_dynamic_vaddr_get (Elf *elf, GElf_Addr *vaddrp)
428{
429  size_t phnum;
430  if (unlikely (elf_getphdrnum (elf, &phnum) != 0))
431    return false;
432  for (size_t i = 0; i < phnum; ++i)
433    {
434      GElf_Phdr phdr_mem;
435      GElf_Phdr *phdr = gelf_getphdr (elf, i, &phdr_mem);
436      if (unlikely (phdr == NULL))
437	return false;
438      if (phdr->p_type == PT_DYNAMIC)
439	{
440	  *vaddrp = phdr->p_vaddr;
441	  return true;
442	}
443    }
444  return false;
445}
446
447NEW_VERSION (dwfl_core_file_report, ELFUTILS_0.158)
448int
449dwfl_core_file_report (Dwfl *dwfl, Elf *elf, const char *executable)
450{
451  size_t phnum;
452  if (unlikely (elf_getphdrnum (elf, &phnum) != 0))
453    {
454      __libdwfl_seterrno (DWFL_E_LIBELF);
455      return -1;
456    }
457
458  bool cleanup_user_core = false;
459  if (dwfl->user_core != NULL)
460    free (dwfl->user_core->executable_for_core);
461  if (executable == NULL)
462    {
463      if (dwfl->user_core != NULL)
464	dwfl->user_core->executable_for_core = NULL;
465    }
466  else
467    {
468      if (dwfl->user_core == NULL)
469	{
470	  cleanup_user_core = true;
471	  dwfl->user_core = calloc (1, sizeof (struct Dwfl_User_Core));
472	  if (dwfl->user_core == NULL)
473	    {
474	      __libdwfl_seterrno (DWFL_E_NOMEM);
475	      return -1;
476	    }
477	  dwfl->user_core->fd = -1;
478	}
479      dwfl->user_core->executable_for_core = strdup (executable);
480      if (dwfl->user_core->executable_for_core == NULL)
481	{
482	  if (cleanup_user_core)
483	    {
484	      free (dwfl->user_core);
485	      dwfl->user_core = NULL;
486	    }
487	  __libdwfl_seterrno (DWFL_E_NOMEM);
488	  return -1;
489	}
490    }
491
492  /* First report each PT_LOAD segment.  */
493  GElf_Phdr notes_phdr;
494  int ndx = dwfl_report_core_segments (dwfl, elf, phnum, &notes_phdr);
495  if (unlikely (ndx <= 0))
496    {
497      if (cleanup_user_core)
498	{
499	  free (dwfl->user_core->executable_for_core);
500	  free (dwfl->user_core);
501	  dwfl->user_core = NULL;
502	}
503      return ndx;
504    }
505
506  /* Next, we should follow the chain from DT_DEBUG.  */
507
508  const void *auxv = NULL;
509  const void *note_file = NULL;
510  size_t auxv_size = 0;
511  size_t note_file_size = 0;
512  if (likely (notes_phdr.p_type == PT_NOTE))
513    {
514      /* PT_NOTE -> NT_AUXV -> AT_PHDR -> PT_DYNAMIC -> DT_DEBUG */
515
516      Elf_Data *notes = elf_getdata_rawchunk (elf,
517					      notes_phdr.p_offset,
518					      notes_phdr.p_filesz,
519					      (notes_phdr.p_align == 8
520					       ? ELF_T_NHDR8
521					       : ELF_T_NHDR));
522      if (likely (notes != NULL))
523	{
524	  size_t pos = 0;
525	  GElf_Nhdr nhdr;
526	  size_t name_pos;
527	  size_t desc_pos;
528	  while ((pos = gelf_getnote (notes, pos, &nhdr,
529				      &name_pos, &desc_pos)) > 0)
530	    if (nhdr.n_namesz == sizeof "CORE"
531		&& !memcmp (notes->d_buf + name_pos, "CORE", sizeof "CORE"))
532	      {
533		if (nhdr.n_type == NT_AUXV)
534		  {
535		    auxv = notes->d_buf + desc_pos;
536		    auxv_size = nhdr.n_descsz;
537		  }
538		if (nhdr.n_type == NT_FILE)
539		  {
540		    note_file = notes->d_buf + desc_pos;
541		    note_file_size = nhdr.n_descsz;
542		  }
543	      }
544	}
545    }
546
547  /* Now we have NT_AUXV contents.  From here on this processing could be
548     used for a live process with auxv read from /proc.  */
549
550  struct r_debug_info r_debug_info;
551  memset (&r_debug_info, 0, sizeof r_debug_info);
552  int retval = dwfl_link_map_report (dwfl, auxv, auxv_size,
553				     dwfl_elf_phdr_memory_callback, elf,
554				     &r_debug_info);
555  int listed = retval > 0 ? retval : 0;
556
557  /* Now sniff segment contents for modules hinted by information gathered
558     from DT_DEBUG.  */
559
560  ndx = 0;
561  do
562    {
563      int seg = dwfl_segment_report_module (dwfl, ndx, NULL,
564					    &dwfl_elf_phdr_memory_callback, elf,
565					    core_file_read_eagerly, elf,
566					    elf->maximum_size,
567					    note_file, note_file_size,
568					    &r_debug_info);
569      if (unlikely (seg < 0))
570	{
571	  clear_r_debug_info (&r_debug_info);
572	  return seg;
573	}
574      if (seg > ndx)
575	{
576	  ndx = seg;
577	  ++listed;
578	}
579      else
580	++ndx;
581    }
582  while (ndx < (int) phnum);
583
584  /* Now report the modules from dwfl_link_map_report which were not filtered
585     out by dwfl_segment_report_module.  */
586
587  Dwfl_Module **lastmodp = &dwfl->modulelist;
588  while (*lastmodp != NULL)
589    lastmodp = &(*lastmodp)->next;
590  for (struct r_debug_info_module *module = r_debug_info.module;
591       module != NULL; module = module->next)
592    {
593      if (module->elf == NULL)
594	continue;
595      GElf_Addr file_dynamic_vaddr;
596      if (! __libdwfl_dynamic_vaddr_get (module->elf, &file_dynamic_vaddr))
597	continue;
598      Dwfl_Module *mod;
599      mod = __libdwfl_report_elf (dwfl, basename (module->name), module->name,
600				  module->fd, module->elf,
601				  module->l_ld - file_dynamic_vaddr,
602				  true, true);
603      if (mod == NULL)
604	continue;
605      ++listed;
606      module->elf = NULL;
607      module->fd = -1;
608      /* Move this module to the end of the list, so that we end
609	 up with a list in the same order as the link_map chain.  */
610      if (mod->next != NULL)
611	{
612	  if (*lastmodp != mod)
613	    {
614	      lastmodp = &dwfl->modulelist;
615	      while (*lastmodp != mod)
616		lastmodp = &(*lastmodp)->next;
617	    }
618	  *lastmodp = mod->next;
619	  mod->next = NULL;
620	  while (*lastmodp != NULL)
621	    lastmodp = &(*lastmodp)->next;
622	  *lastmodp = mod;
623	}
624      lastmodp = &mod->next;
625    }
626
627  clear_r_debug_info (&r_debug_info);
628
629  /* We return the number of modules we found if we found any.
630     If we found none, we return -1 instead of 0 if there was an
631     error rather than just nothing found.  */
632  return listed > 0 ? listed : retval;
633}
634NEW_INTDEF (dwfl_core_file_report)
635
636#ifdef SYMBOL_VERSIONING
637int _compat_without_executable_dwfl_core_file_report (Dwfl *dwfl, Elf *elf);
638COMPAT_VERSION_NEWPROTO (dwfl_core_file_report, ELFUTILS_0.146,
639			 without_executable)
640
641int
642_compat_without_executable_dwfl_core_file_report (Dwfl *dwfl, Elf *elf)
643{
644  return dwfl_core_file_report (dwfl, elf, NULL);
645}
646#endif
647