1/* Get macro information.
2   Copyright (C) 2002-2009, 2014, 2017, 2018 Red Hat, Inc.
3   This file is part of elfutils.
4
5   This file is free software; you can redistribute it and/or modify
6   it under the terms of either
7
8     * the GNU Lesser General Public License as published by the Free
9       Software Foundation; either version 3 of the License, or (at
10       your option) any later version
11
12   or
13
14     * the GNU General Public License as published by the Free
15       Software Foundation; either version 2 of the License, or (at
16       your option) any later version
17
18   or both in parallel, as here.
19
20   elfutils is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received copies of the GNU General Public License and
26   the GNU Lesser General Public License along with this program.  If
27   not, see <http://www.gnu.org/licenses/>.  */
28
29#ifdef HAVE_CONFIG_H
30# include <config.h>
31#endif
32
33#include <assert.h>
34#include <dwarf.h>
35#include <search.h>
36#include <stdlib.h>
37#include <string.h>
38
39#include <libdwP.h>
40
41static int
42get_offset_from (Dwarf_Die *die, int name, Dwarf_Word *retp)
43{
44  /* Get the appropriate attribute.  */
45  Dwarf_Attribute attr;
46  if (INTUSE(dwarf_attr) (die, name, &attr) == NULL)
47    return -1;
48
49  /* Offset into the corresponding section.  */
50  return INTUSE(dwarf_formudata) (&attr, retp);
51}
52
53static int
54macro_op_compare (const void *p1, const void *p2)
55{
56  const Dwarf_Macro_Op_Table *t1 = (const Dwarf_Macro_Op_Table *) p1;
57  const Dwarf_Macro_Op_Table *t2 = (const Dwarf_Macro_Op_Table *) p2;
58
59  if (t1->offset < t2->offset)
60    return -1;
61  if (t1->offset > t2->offset)
62    return 1;
63
64  if (t1->sec_index < t2->sec_index)
65    return -1;
66  if (t1->sec_index > t2->sec_index)
67    return 1;
68
69  return 0;
70}
71
72static void
73build_table (Dwarf_Macro_Op_Table *table,
74	     Dwarf_Macro_Op_Proto op_protos[static 255])
75{
76  unsigned ct = 0;
77  for (unsigned i = 1; i < 256; ++i)
78    if (op_protos[i - 1].forms != NULL)
79      table->table[table->opcodes[i - 1] = ct++] = op_protos[i - 1];
80    else
81      table->opcodes[i - 1] = 0xff;
82}
83
84#define MACRO_PROTO(NAME, ...)					\
85  Dwarf_Macro_Op_Proto NAME = ({				\
86      static const uint8_t proto[] = {__VA_ARGS__};		\
87      (Dwarf_Macro_Op_Proto) {sizeof proto, proto};		\
88    })
89
90enum { macinfo_data_size = offsetof (Dwarf_Macro_Op_Table, table[5]) };
91static unsigned char macinfo_data[macinfo_data_size]
92	__attribute__ ((aligned (__alignof (Dwarf_Macro_Op_Table))));
93
94static __attribute__ ((constructor)) void
95init_macinfo_table (void)
96{
97  MACRO_PROTO (p_udata_str, DW_FORM_udata, DW_FORM_string);
98  MACRO_PROTO (p_udata_udata, DW_FORM_udata, DW_FORM_udata);
99  MACRO_PROTO (p_none);
100
101  Dwarf_Macro_Op_Proto op_protos[255] =
102    {
103      [DW_MACINFO_define - 1] = p_udata_str,
104      [DW_MACINFO_undef - 1] = p_udata_str,
105      [DW_MACINFO_vendor_ext - 1] = p_udata_str,
106      [DW_MACINFO_start_file - 1] = p_udata_udata,
107      [DW_MACINFO_end_file - 1] = p_none,
108      /* If you are adding more elements to this array, increase
109	 MACINFO_DATA_SIZE above.  */
110    };
111
112  Dwarf_Macro_Op_Table *macinfo_table = (void *) macinfo_data;
113  memset (macinfo_table, 0, sizeof macinfo_data);
114  build_table (macinfo_table, op_protos);
115  macinfo_table->sec_index = IDX_debug_macinfo;
116}
117
118static Dwarf_Macro_Op_Table *
119get_macinfo_table (Dwarf *dbg, Dwarf_Word macoff, Dwarf_Die *cudie)
120{
121  assert (cudie != NULL);
122
123  Dwarf_Attribute attr_mem, *attr
124    = INTUSE(dwarf_attr) (cudie, DW_AT_stmt_list, &attr_mem);
125  Dwarf_Off line_offset = (Dwarf_Off) -1;
126  if (attr != NULL)
127    if (unlikely (INTUSE(dwarf_formudata) (attr, &line_offset) != 0))
128      return NULL;
129
130  Dwarf_Macro_Op_Table *table = libdw_alloc (dbg, Dwarf_Macro_Op_Table,
131					     macinfo_data_size, 1);
132  memcpy (table, macinfo_data, macinfo_data_size);
133
134  table->offset = macoff;
135  table->sec_index = IDX_debug_macinfo;
136  table->line_offset = line_offset;
137  table->is_64bit = cudie->cu->address_size == 8;
138  table->comp_dir = __libdw_getcompdir (cudie);
139
140  return table;
141}
142
143static Dwarf_Macro_Op_Table *
144get_table_for_offset (Dwarf *dbg, Dwarf_Word macoff,
145		      const unsigned char *readp,
146		      const unsigned char *const endp,
147		      Dwarf_Die *cudie)
148{
149  const unsigned char *startp = readp;
150
151  /* Request at least 3 bytes for header.  */
152  if (readp + 3 > endp)
153    {
154    invalid_dwarf:
155      __libdw_seterrno (DWARF_E_INVALID_DWARF);
156      return NULL;
157    }
158
159  uint16_t version = read_2ubyte_unaligned_inc (dbg, readp);
160  if (version != 4 && version != 5)
161    {
162      __libdw_seterrno (DWARF_E_INVALID_VERSION);
163      return NULL;
164    }
165
166  uint8_t flags = *readp++;
167  bool is_64bit = (flags & 0x1) != 0;
168
169  Dwarf_Off line_offset = (Dwarf_Off) -1;
170  if ((flags & 0x2) != 0)
171    {
172      line_offset = read_addr_unaligned_inc (is_64bit ? 8 : 4, dbg, readp);
173      if (readp > endp)
174	goto invalid_dwarf;
175    }
176  else if (cudie != NULL)
177    {
178      Dwarf_Attribute attr_mem, *attr
179	= INTUSE(dwarf_attr) (cudie, DW_AT_stmt_list, &attr_mem);
180      if (attr != NULL)
181	if (unlikely (INTUSE(dwarf_formudata) (attr, &line_offset) != 0))
182	  return NULL;
183    }
184
185  /* """The macinfo entry types defined in this standard may, but
186     might not, be described in the table""".
187
188     I.e. these may be present.  It's tempting to simply skip them,
189     but it's probably more correct to tolerate that a producer tweaks
190     the way certain opcodes are encoded, for whatever reasons.  */
191
192  MACRO_PROTO (p_udata_str, DW_FORM_udata, DW_FORM_string);
193  MACRO_PROTO (p_udata_strp, DW_FORM_udata, DW_FORM_strp);
194  MACRO_PROTO (p_udata_strsup, DW_FORM_udata, DW_FORM_strp_sup);
195  MACRO_PROTO (p_udata_strx, DW_FORM_udata, DW_FORM_strx);
196  MACRO_PROTO (p_udata_udata, DW_FORM_udata, DW_FORM_udata);
197  MACRO_PROTO (p_secoffset, DW_FORM_sec_offset);
198  MACRO_PROTO (p_none);
199
200  Dwarf_Macro_Op_Proto op_protos[255] =
201    {
202      [DW_MACRO_define - 1] = p_udata_str,
203      [DW_MACRO_undef - 1] = p_udata_str,
204      [DW_MACRO_define_strp - 1] = p_udata_strp,
205      [DW_MACRO_undef_strp - 1] = p_udata_strp,
206      [DW_MACRO_start_file - 1] = p_udata_udata,
207      [DW_MACRO_end_file - 1] = p_none,
208      [DW_MACRO_import - 1] = p_secoffset,
209      [DW_MACRO_define_sup - 1] = p_udata_strsup,
210      [DW_MACRO_undef_sup - 1] = p_udata_strsup,
211      [DW_MACRO_import_sup - 1] = p_secoffset, /* XXX - but in sup!. */
212      [DW_MACRO_define_strx - 1] = p_udata_strx,
213      [DW_MACRO_undef_strx - 1] = p_udata_strx,
214    };
215
216  if ((flags & 0x4) != 0)
217    {
218      unsigned count = *readp++;
219      for (unsigned i = 0; i < count; ++i)
220	{
221	  unsigned opcode = *readp++;
222
223	  Dwarf_Macro_Op_Proto e;
224	  if (readp >= endp)
225	    goto invalid;
226	  get_uleb128 (e.nforms, readp, endp);
227	  e.forms = readp;
228	  op_protos[opcode - 1] = e;
229
230	  readp += e.nforms;
231	  if (readp > endp)
232	    {
233	    invalid:
234	      __libdw_seterrno (DWARF_E_INVALID_DWARF);
235	      return NULL;
236	    }
237	}
238    }
239
240  size_t ct = 0;
241  for (unsigned i = 1; i < 256; ++i)
242    if (op_protos[i - 1].forms != NULL)
243      ++ct;
244
245  /* We support at most 0xfe opcodes defined in the table, as 0xff is
246     a value that means that given opcode is not stored at all.  But
247     that should be fine, as opcode 0 is not allocated.  */
248  assert (ct < 0xff);
249
250  size_t macop_table_size = offsetof (Dwarf_Macro_Op_Table, table[ct]);
251
252  Dwarf_Macro_Op_Table *table = libdw_alloc (dbg, Dwarf_Macro_Op_Table,
253					     macop_table_size, 1);
254
255  *table = (Dwarf_Macro_Op_Table) {
256    .offset = macoff,
257    .sec_index = IDX_debug_macro,
258    .line_offset = line_offset,
259    .header_len = readp - startp,
260    .version = version,
261    .is_64bit = is_64bit,
262
263    /* NULL if CUDIE is NULL or DW_AT_comp_dir is absent.  */
264    .comp_dir = __libdw_getcompdir (cudie),
265  };
266  build_table (table, op_protos);
267
268  return table;
269}
270
271static Dwarf_Macro_Op_Table *
272cache_op_table (Dwarf *dbg, int sec_index, Dwarf_Off macoff,
273		const unsigned char *startp,
274		const unsigned char *const endp,
275		Dwarf_Die *cudie)
276{
277  Dwarf_Macro_Op_Table fake = { .offset = macoff, .sec_index = sec_index };
278  Dwarf_Macro_Op_Table **found = tfind (&fake, &dbg->macro_ops,
279					macro_op_compare);
280  if (found != NULL)
281    return *found;
282
283  Dwarf_Macro_Op_Table *table = sec_index == IDX_debug_macro
284    ? get_table_for_offset (dbg, macoff, startp, endp, cudie)
285    : get_macinfo_table (dbg, macoff, cudie);
286
287  if (table == NULL)
288    return NULL;
289
290  Dwarf_Macro_Op_Table **ret = tsearch (table, &dbg->macro_ops,
291					macro_op_compare);
292  if (unlikely (ret == NULL))
293    {
294      __libdw_seterrno (DWARF_E_NOMEM);
295      return NULL;
296    }
297
298  return *ret;
299}
300
301static ptrdiff_t
302read_macros (Dwarf *dbg, int sec_index,
303	     Dwarf_Off macoff, int (*callback) (Dwarf_Macro *, void *),
304	     void *arg, ptrdiff_t offset, bool accept_0xff,
305	     Dwarf_Die *cudie)
306{
307  Elf_Data *d = dbg->sectiondata[sec_index];
308  if (unlikely (d == NULL || d->d_buf == NULL))
309    {
310      __libdw_seterrno (DWARF_E_NO_ENTRY);
311      return -1;
312    }
313
314  if (unlikely (macoff >= d->d_size))
315    {
316      __libdw_seterrno (DWARF_E_INVALID_DWARF);
317      return -1;
318    }
319
320  const unsigned char *const startp = d->d_buf + macoff;
321  const unsigned char *const endp = d->d_buf + d->d_size;
322
323  Dwarf_Macro_Op_Table *table = cache_op_table (dbg, sec_index, macoff,
324						startp, endp, cudie);
325  if (table == NULL)
326    return -1;
327
328  if (offset == 0)
329    offset = table->header_len;
330
331  assert (offset >= 0);
332  assert (offset < endp - startp);
333  const unsigned char *readp = startp + offset;
334
335  while (readp < endp)
336    {
337      unsigned int opcode = *readp++;
338      if (opcode == 0)
339	/* Nothing more to do.  */
340	return 0;
341
342      if (unlikely (opcode == 0xff && ! accept_0xff))
343	{
344	  /* See comment below at dwarf_getmacros for explanation of
345	     why we are doing this.  */
346	  __libdw_seterrno (DWARF_E_INVALID_OPCODE);
347	  return -1;
348	}
349
350      unsigned int idx = table->opcodes[opcode - 1];
351      if (idx == 0xff)
352	{
353	  __libdw_seterrno (DWARF_E_INVALID_OPCODE);
354	  return -1;
355	}
356
357      Dwarf_Macro_Op_Proto *proto = &table->table[idx];
358
359      /* A fake CU with bare minimum data to fool dwarf_formX into
360	 doing the right thing with the attributes that we put out.
361	 We pretend it is the same version as the actual table.
362	 Version 4 for the old GNU extension, version 5 for DWARF5.
363	 To handle DW_FORM_strx[1234] we set the .str_offsets_base
364	 from the given CU.
365	 XXX We will need to deal with DW_MACRO_import_sup and change
366	 out the dbg somehow for the DW_FORM_sec_offset to make sense.  */
367      Dwarf_CU fake_cu = {
368	.dbg = dbg,
369	.sec_idx = sec_index,
370	.version = table->version,
371	.offset_size = table->is_64bit ? 8 : 4,
372	.str_off_base = str_offsets_base_off (dbg, (cudie != NULL
373						    ? cudie->cu: NULL)),
374	.startp = (void *) startp + offset,
375	.endp = (void *) endp,
376      };
377
378      Dwarf_Attribute *attributes;
379      Dwarf_Attribute *attributesp = NULL;
380      Dwarf_Attribute nattributes[8];
381      if (unlikely (proto->nforms > 8))
382	{
383	  attributesp = malloc (sizeof (Dwarf_Attribute) * proto->nforms);
384	  if (attributesp == NULL)
385	    {
386	      __libdw_seterrno (DWARF_E_NOMEM);
387	      return -1;
388	    }
389	  attributes = attributesp;
390	}
391      else
392	attributes = &nattributes[0];
393
394      for (Dwarf_Word i = 0; i < proto->nforms; ++i)
395	{
396	  /* We pretend this is a DW_AT[_GNU]_macros attribute so that
397	     DW_FORM_sec_offset forms get correctly interpreted as
398	     offset into .debug_macro.  XXX Deal with DW_MACRO_import_sup
399	     (swap .dbg) for DW_FORM_sec_offset? */
400	  attributes[i].code = (fake_cu.version == 4 ? DW_AT_GNU_macros
401						     : DW_AT_macros);
402	  attributes[i].form = proto->forms[i];
403	  attributes[i].valp = (void *) readp;
404	  attributes[i].cu = &fake_cu;
405
406	  /* We don't want forms that aren't allowed because they could
407	     read from the "abbrev" like DW_FORM_implicit_const.  */
408	  if (! libdw_valid_user_form (attributes[i].form))
409	    {
410	      __libdw_seterrno (DWARF_E_INVALID_DWARF);
411	      free (attributesp);
412	      return -1;
413	    }
414
415	  size_t len = __libdw_form_val_len (&fake_cu, proto->forms[i], readp);
416	  if (unlikely (len == (size_t) -1))
417	    {
418	      free (attributesp);
419	      return -1;
420	    }
421
422	  readp += len;
423	}
424
425      Dwarf_Macro macro = {
426	.table = table,
427	.opcode = opcode,
428	.attributes = attributes,
429      };
430
431      int res = callback (&macro, arg);
432      if (unlikely (attributesp != NULL))
433	free (attributesp);
434
435      if (res != DWARF_CB_OK)
436	return readp - startp;
437    }
438
439  return 0;
440}
441
442/* Token layout:
443
444   - The highest bit is used for distinguishing between callers that
445     know that opcode 0xff may have one of two incompatible meanings.
446     The mask that we use for selecting this bit is
447     DWARF_GETMACROS_START.
448
449   - The rest of the token (31 or 63 bits) encodes address inside the
450     macro unit.
451
452   Besides, token value of 0 signals end of iteration and -1 is
453   reserved for signaling errors.  That means it's impossible to
454   represent maximum offset of a .debug_macro unit to new-style
455   callers (which in practice decreases the permissible macro unit
456   size by another 1 byte).  */
457
458static ptrdiff_t
459token_from_offset (ptrdiff_t offset, bool accept_0xff)
460{
461  if (offset == -1 || offset == 0)
462    return offset;
463
464  /* Make sure the offset didn't overflow into the flag bit.  */
465  if ((offset & DWARF_GETMACROS_START) != 0)
466    {
467      __libdw_seterrno (DWARF_E_TOO_BIG);
468      return -1;
469    }
470
471  if (accept_0xff)
472    offset |= DWARF_GETMACROS_START;
473
474  return offset;
475}
476
477static ptrdiff_t
478offset_from_token (ptrdiff_t token, bool *accept_0xffp)
479{
480  *accept_0xffp = (token & DWARF_GETMACROS_START) != 0;
481  token &= ~DWARF_GETMACROS_START;
482
483  return token;
484}
485
486static ptrdiff_t
487gnu_macros_getmacros_off (Dwarf *dbg, Dwarf_Off macoff,
488			  int (*callback) (Dwarf_Macro *, void *),
489			  void *arg, ptrdiff_t offset, bool accept_0xff,
490			  Dwarf_Die *cudie)
491{
492  assert (offset >= 0);
493
494  if (macoff >= dbg->sectiondata[IDX_debug_macro]->d_size)
495    {
496      __libdw_seterrno (DWARF_E_INVALID_OFFSET);
497      return -1;
498    }
499
500  return read_macros (dbg, IDX_debug_macro, macoff,
501		      callback, arg, offset, accept_0xff, cudie);
502}
503
504static ptrdiff_t
505macro_info_getmacros_off (Dwarf *dbg, Dwarf_Off macoff,
506			  int (*callback) (Dwarf_Macro *, void *),
507			  void *arg, ptrdiff_t offset, Dwarf_Die *cudie)
508{
509  assert (offset >= 0);
510
511  return read_macros (dbg, IDX_debug_macinfo, macoff,
512		      callback, arg, offset, true, cudie);
513}
514
515ptrdiff_t
516dwarf_getmacros_off (Dwarf *dbg, Dwarf_Off macoff,
517		     int (*callback) (Dwarf_Macro *, void *),
518		     void *arg, ptrdiff_t token)
519{
520  if (dbg == NULL)
521    {
522      __libdw_seterrno (DWARF_E_NO_DWARF);
523      return -1;
524    }
525
526  bool accept_0xff;
527  ptrdiff_t offset = offset_from_token (token, &accept_0xff);
528  assert (accept_0xff);
529
530  offset = gnu_macros_getmacros_off (dbg, macoff, callback, arg, offset,
531				     accept_0xff, NULL);
532
533  return token_from_offset (offset, accept_0xff);
534}
535
536ptrdiff_t
537dwarf_getmacros (Dwarf_Die *cudie, int (*callback) (Dwarf_Macro *, void *),
538		 void *arg, ptrdiff_t token)
539{
540  if (cudie == NULL)
541    {
542      __libdw_seterrno (DWARF_E_NO_DWARF);
543      return -1;
544    }
545
546  /* This function might be called from a code that expects to see
547     DW_MACINFO_* opcodes, not DW_MACRO_{GNU_,}* ones.  It is fine to
548     serve most DW_MACRO_{GNU_,}* opcodes to such code, because those
549     whose values are the same as DW_MACINFO_* ones also have the same
550     behavior.  It is not very likely that a .debug_macro section
551     would only use the part of opcode space that it shares with
552     .debug_macinfo, but it is possible.  Serving the opcodes that are
553     only valid in DW_MACRO_{GNU_,}* domain is OK as well, because
554     clients in general need to be ready that newer standards define
555     more opcodes, and have coping mechanisms for unfamiliar opcodes.
556
557     The one exception to the above rule is opcode 0xff, which has
558     concrete semantics in .debug_macinfo, but falls into vendor block
559     in .debug_macro, and can be assigned to do whatever.  There is
560     some small probability that the two opcodes would look
561     superficially similar enough that a client would be confused and
562     misbehave as a result.  For this reason, we refuse to serve
563     through this interface 0xff's originating from .debug_macro
564     unless the TOKEN that we obtained indicates the call originates
565     from a new-style caller.  See above for details on what
566     information is encoded into tokens.  */
567
568  bool accept_0xff;
569  ptrdiff_t offset = offset_from_token (token, &accept_0xff);
570
571  /* DW_AT_macro_info */
572  if (dwarf_hasattr (cudie, DW_AT_macro_info))
573    {
574      Dwarf_Word macoff;
575      if (get_offset_from (cudie, DW_AT_macro_info, &macoff) != 0)
576	return -1;
577      offset = macro_info_getmacros_off (cudie->cu->dbg, macoff,
578					 callback, arg, offset, cudie);
579    }
580  else
581    {
582      /* DW_AT_GNU_macros, DW_AT_macros */
583      Dwarf_Word macoff;
584      if (get_offset_from (cudie, DW_AT_GNU_macros, &macoff) != 0
585	  && get_offset_from (cudie, DW_AT_macros, &macoff) != 0)
586	return -1;
587      offset = gnu_macros_getmacros_off (cudie->cu->dbg, macoff,
588					 callback, arg, offset, accept_0xff,
589					 cudie);
590    }
591
592  return token_from_offset (offset, accept_0xff);
593}
594