xref: /third_party/mesa3d/src/amd/common/ac_rtld.c (revision bf215546)
1/*
2 * Copyright 2014-2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "ac_rtld.h"
25
26#include "ac_binary.h"
27#include "ac_gpu_info.h"
28#include "util/compiler.h"
29#include "util/u_dynarray.h"
30#include "util/u_math.h"
31
32#include <gelf.h>
33#include <libelf.h>
34#include <stdarg.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38
39#ifndef EM_AMDGPU
40// Old distributions may not have this enum constant
41#define EM_AMDGPU 224
42#endif
43
44#ifndef STT_AMDGPU_LDS
45#define STT_AMDGPU_LDS 13 // this is deprecated -- remove
46#endif
47
48#ifndef SHN_AMDGPU_LDS
49#define SHN_AMDGPU_LDS 0xff00
50#endif
51
52#ifndef R_AMDGPU_NONE
53#define R_AMDGPU_NONE          0
54#define R_AMDGPU_ABS32_LO      1
55#define R_AMDGPU_ABS32_HI      2
56#define R_AMDGPU_ABS64         3
57#define R_AMDGPU_REL32         4
58#define R_AMDGPU_REL64         5
59#define R_AMDGPU_ABS32         6
60#define R_AMDGPU_GOTPCREL      7
61#define R_AMDGPU_GOTPCREL32_LO 8
62#define R_AMDGPU_GOTPCREL32_HI 9
63#define R_AMDGPU_REL32_LO      10
64#define R_AMDGPU_REL32_HI      11
65#define R_AMDGPU_RELATIVE64    13
66#endif
67
68/* For the UMR disassembler. */
69#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
70#define DEBUGGER_NUM_MARKERS        5
71
72struct ac_rtld_section {
73   bool is_rx : 1;
74   bool is_pasted_text : 1;
75   uint64_t offset;
76   const char *name;
77};
78
79struct ac_rtld_part {
80   Elf *elf;
81   struct ac_rtld_section *sections;
82   unsigned num_sections;
83};
84
85static void report_errorvf(const char *fmt, va_list va)
86{
87   fprintf(stderr, "ac_rtld error: ");
88
89   vfprintf(stderr, fmt, va);
90
91   fprintf(stderr, "\n");
92}
93
94static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
95
96static void report_errorf(const char *fmt, ...)
97{
98   va_list va;
99   va_start(va, fmt);
100   report_errorvf(fmt, va);
101   va_end(va);
102}
103
104static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
105
106static void report_elf_errorf(const char *fmt, ...)
107{
108   va_list va;
109   va_start(va, fmt);
110   report_errorvf(fmt, va);
111   va_end(va);
112
113   fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
114}
115
116/**
117 * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
118 * \p part_idx.
119 */
120static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
121                                                const char *name, unsigned part_idx)
122{
123   util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
124      if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
125         return symbol;
126   }
127   return NULL;
128}
129
130static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
131{
132   const struct ac_rtld_symbol *lhs = lhsp;
133   const struct ac_rtld_symbol *rhs = rhsp;
134   if (rhs->align > lhs->align)
135      return 1;
136   if (rhs->align < lhs->align)
137      return -1;
138   return 0;
139}
140
141/**
142 * Sort the given symbol list by decreasing alignment and assign offsets.
143 */
144static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
145                           uint64_t *ptotal_size)
146{
147   qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
148
149   uint64_t total_size = *ptotal_size;
150
151   for (unsigned i = 0; i < num_symbols; ++i) {
152      struct ac_rtld_symbol *s = &symbols[i];
153      assert(util_is_power_of_two_nonzero(s->align));
154
155      total_size = align64(total_size, s->align);
156      s->offset = total_size;
157
158      if (total_size + s->size < total_size) {
159         report_errorf("%s: size overflow", __FUNCTION__);
160         return false;
161      }
162
163      total_size += s->size;
164   }
165
166   *ptotal_size = total_size;
167   return true;
168}
169
170/**
171 * Read LDS symbols from the given \p section of the ELF of \p part and append
172 * them to the LDS symbols list.
173 *
174 * Shared LDS symbols are filtered out.
175 */
176static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
177                                     Elf_Scn *section, uint32_t *lds_end_align)
178{
179#define report_if(cond)                                                                            \
180   do {                                                                                            \
181      if ((cond)) {                                                                                \
182         report_errorf(#cond);                                                                     \
183         return false;                                                                             \
184      }                                                                                            \
185   } while (false)
186#define report_elf_if(cond)                                                                        \
187   do {                                                                                            \
188      if ((cond)) {                                                                                \
189         report_elf_errorf(#cond);                                                                 \
190         return false;                                                                             \
191      }                                                                                            \
192   } while (false)
193
194   struct ac_rtld_part *part = &binary->parts[part_idx];
195   Elf64_Shdr *shdr = elf64_getshdr(section);
196   uint32_t strtabidx = shdr->sh_link;
197   Elf_Data *symbols_data = elf_getdata(section, NULL);
198   report_elf_if(!symbols_data);
199
200   const Elf64_Sym *symbol = symbols_data->d_buf;
201   size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
202
203   for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
204      struct ac_rtld_symbol s = {0};
205
206      if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
207         /* old-style LDS symbols from initial prototype -- remove eventually */
208         s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
209      } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
210         s.align = MIN2(symbol->st_value, 1u << 16);
211         report_if(!util_is_power_of_two_nonzero(s.align));
212      } else
213         continue;
214
215      report_if(symbol->st_size > 1u << 29);
216
217      s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
218      s.size = symbol->st_size;
219      s.part_idx = part_idx;
220
221      if (!strcmp(s.name, "__lds_end")) {
222         report_elf_if(s.size != 0);
223         *lds_end_align = MAX2(*lds_end_align, s.align);
224         continue;
225      }
226
227      const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
228      if (shared) {
229         report_elf_if(s.align > shared->align);
230         report_elf_if(s.size > shared->size);
231         continue;
232      }
233
234      util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
235   }
236
237   return true;
238
239#undef report_if
240#undef report_elf_if
241}
242
243/**
244 * Open a binary consisting of one or more shader parts.
245 *
246 * \param binary the uninitialized struct
247 * \param i binary opening parameters
248 */
249bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
250{
251   /* One of the libelf implementations
252    * (http://www.mr511.de/software/english.htm) requires calling
253    * elf_version() before elf_memory().
254    */
255   elf_version(EV_CURRENT);
256
257   memset(binary, 0, sizeof(*binary));
258   memcpy(&binary->options, &i.options, sizeof(binary->options));
259   binary->wave_size = i.wave_size;
260   binary->gfx_level = i.info->gfx_level;
261   binary->num_parts = i.num_parts;
262   binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
263   if (!binary->parts)
264      return false;
265
266   uint64_t pasted_text_size = 0;
267   uint64_t rx_align = 1;
268   uint64_t rx_size = 0;
269   uint64_t exec_size = 0;
270
271#define report_if(cond)                                                                            \
272   do {                                                                                            \
273      if ((cond)) {                                                                                \
274         report_errorf(#cond);                                                                     \
275         goto fail;                                                                                \
276      }                                                                                            \
277   } while (false)
278#define report_elf_if(cond)                                                                        \
279   do {                                                                                            \
280      if ((cond)) {                                                                                \
281         report_elf_errorf(#cond);                                                                 \
282         goto fail;                                                                                \
283      }                                                                                            \
284   } while (false)
285
286   /* Copy and layout shared LDS symbols. */
287   if (i.num_shared_lds_symbols) {
288      if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
289                                i.num_shared_lds_symbols))
290         goto fail;
291
292      memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
293   }
294
295   util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
296      symbol->part_idx = ~0u;
297
298   unsigned max_lds_size = 64 * 1024;
299
300   if (i.info->gfx_level == GFX6 ||
301       (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
302      max_lds_size = 32 * 1024;
303
304   uint64_t shared_lds_size = 0;
305   if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
306      goto fail;
307
308   if (shared_lds_size > max_lds_size) {
309      fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
310              (unsigned)shared_lds_size, max_lds_size);
311      goto fail;
312   }
313   binary->lds_size = shared_lds_size;
314
315   /* First pass over all parts: open ELFs, pre-determine the placement of
316    * sections in the memory image, and collect and layout private LDS symbols. */
317   uint32_t lds_end_align = 0;
318
319   if (binary->options.halt_at_entry)
320      pasted_text_size += 4;
321
322   for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
323      struct ac_rtld_part *part = &binary->parts[part_idx];
324      unsigned part_lds_symbols_begin =
325         util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
326
327      part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
328      report_elf_if(!part->elf);
329
330      const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
331      report_elf_if(!ehdr);
332      report_if(ehdr->e_machine != EM_AMDGPU);
333
334      size_t section_str_index;
335      size_t num_shdrs;
336      report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
337      report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
338
339      part->num_sections = num_shdrs;
340      part->sections = calloc(sizeof(*part->sections), num_shdrs);
341      report_if(!part->sections);
342
343      Elf_Scn *section = NULL;
344      while ((section = elf_nextscn(part->elf, section))) {
345         Elf64_Shdr *shdr = elf64_getshdr(section);
346         struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
347         s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
348         report_elf_if(!s->name);
349
350         /* Cannot actually handle linked objects yet */
351         report_elf_if(shdr->sh_addr != 0);
352
353         /* Alignment must be 0 or a power of two */
354         report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
355         uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
356
357         if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
358            report_if(shdr->sh_flags & SHF_WRITE);
359
360            s->is_rx = true;
361
362            if (shdr->sh_flags & SHF_EXECINSTR) {
363               report_elf_if(shdr->sh_size & 3);
364
365               if (!strcmp(s->name, ".text"))
366                  s->is_pasted_text = true;
367
368               exec_size += shdr->sh_size;
369            }
370
371            if (s->is_pasted_text) {
372               s->offset = pasted_text_size;
373               pasted_text_size += shdr->sh_size;
374            } else {
375               rx_align = align(rx_align, sh_align);
376               rx_size = align(rx_size, sh_align);
377               s->offset = rx_size;
378               rx_size += shdr->sh_size;
379            }
380         } else if (shdr->sh_type == SHT_SYMTAB) {
381            if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
382               goto fail;
383         }
384      }
385
386      uint64_t part_lds_size = shared_lds_size;
387      if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
388                                                part_lds_symbols_begin),
389                          util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
390                             part_lds_symbols_begin,
391                          &part_lds_size))
392         goto fail;
393      binary->lds_size = MAX2(binary->lds_size, part_lds_size);
394   }
395
396   binary->rx_end_markers = pasted_text_size;
397   pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
398
399   /* __lds_end is a special symbol that points at the end of the memory
400    * occupied by other LDS symbols. Its alignment is taken as the
401    * maximum of its alignment over all shader parts where it occurs.
402    */
403   if (lds_end_align) {
404      binary->lds_size = align(binary->lds_size, lds_end_align);
405
406      struct ac_rtld_symbol *lds_end =
407         util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
408      lds_end->name = "__lds_end";
409      lds_end->size = 0;
410      lds_end->align = lds_end_align;
411      lds_end->offset = binary->lds_size;
412      lds_end->part_idx = ~0u;
413   }
414
415   if (binary->lds_size > max_lds_size) {
416      fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
417              (unsigned)binary->lds_size, max_lds_size);
418      goto fail;
419   }
420
421   /* Second pass: Adjust offsets of non-pasted text sections. */
422   binary->rx_size = pasted_text_size;
423   binary->rx_size = align(binary->rx_size, rx_align);
424
425   for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
426      struct ac_rtld_part *part = &binary->parts[part_idx];
427      size_t num_shdrs;
428      elf_getshdrnum(part->elf, &num_shdrs);
429
430      for (unsigned j = 0; j < num_shdrs; ++j) {
431         struct ac_rtld_section *s = &part->sections[j];
432         if (s->is_rx && !s->is_pasted_text)
433            s->offset += binary->rx_size;
434      }
435   }
436
437   binary->rx_size += rx_size;
438   binary->exec_size = exec_size;
439
440   /* The SQ fetches up to N cache lines of 16 dwords
441    * ahead of the PC, configurable by SH_MEM_CONFIG and
442    * S_INST_PREFETCH. This can cause two issues:
443    *
444    * (1) Crossing a page boundary to an unmapped page. The logic
445    *     does not distinguish between a required fetch and a "mere"
446    *     prefetch and will fault.
447    *
448    * (2) Prefetching instructions that will be changed for a
449    *     different shader.
450    *
451    * (2) is not currently an issue because we flush the I$ at IB
452    * boundaries, but (1) needs to be addressed. Due to buffer
453    * suballocation, we just play it safe.
454    */
455   unsigned prefetch_distance = 0;
456
457   if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN)
458      prefetch_distance = 16;
459   else if (i.info->gfx_level >= GFX10)
460      prefetch_distance = 3;
461
462   if (prefetch_distance) {
463      if (i.info->gfx_level >= GFX11)
464         binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 128);
465      else
466         binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64);
467   }
468
469   return true;
470
471#undef report_if
472#undef report_elf_if
473
474fail:
475   ac_rtld_close(binary);
476   return false;
477}
478
479void ac_rtld_close(struct ac_rtld_binary *binary)
480{
481   for (unsigned i = 0; i < binary->num_parts; ++i) {
482      struct ac_rtld_part *part = &binary->parts[i];
483      free(part->sections);
484      elf_end(part->elf);
485   }
486
487   util_dynarray_fini(&binary->lds_symbols);
488   free(binary->parts);
489   binary->parts = NULL;
490   binary->num_parts = 0;
491}
492
493static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
494                                size_t *nbytes)
495{
496   for (unsigned i = 0; i < part->num_sections; ++i) {
497      struct ac_rtld_section *s = &part->sections[i];
498      if (s->name && !strcmp(name, s->name)) {
499         Elf_Scn *target_scn = elf_getscn(part->elf, i);
500         Elf_Data *target_data = elf_getdata(target_scn, NULL);
501         if (!target_data) {
502            report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
503            return false;
504         }
505
506         *data = target_data->d_buf;
507         *nbytes = target_data->d_size;
508         return true;
509      }
510   }
511   return false;
512}
513
514bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
515                                 size_t *nbytes)
516{
517   assert(binary->num_parts == 1);
518   return get_section_by_name(&binary->parts[0], name, data, nbytes);
519}
520
521bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
522                         struct ac_shader_config *config)
523{
524   for (unsigned i = 0; i < binary->num_parts; ++i) {
525      struct ac_rtld_part *part = &binary->parts[i];
526      const char *config_data;
527      size_t config_nbytes;
528
529      if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
530         return false;
531
532      /* TODO: be precise about scratch use? */
533      struct ac_shader_config c = {0};
534      ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, info, &c);
535
536      config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
537      config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
538      config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
539      config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
540      config->scratch_bytes_per_wave =
541         MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
542
543      assert(i == 0 || config->float_mode == c.float_mode);
544      config->float_mode = c.float_mode;
545
546      /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
547       * the main shader part is used. */
548      assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
549      config->spi_ps_input_ena = c.spi_ps_input_ena;
550      config->spi_ps_input_addr = c.spi_ps_input_addr;
551
552      /* TODO: consistently use LDS symbols for this */
553      config->lds_size = MAX2(config->lds_size, c.lds_size);
554
555      /* TODO: Should we combine these somehow? It's currently only
556       * used for radeonsi's compute, where multiple parts aren't used. */
557      assert(config->rsrc1 == 0 && config->rsrc2 == 0);
558      config->rsrc1 = c.rsrc1;
559      config->rsrc2 = c.rsrc2;
560   }
561
562   return true;
563}
564
565static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
566                           const Elf64_Sym *sym, const char *name, uint64_t *value)
567{
568   /* TODO: properly disentangle the undef and the LDS cases once
569    * STT_AMDGPU_LDS is retired. */
570   if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
571      const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
572
573      if (lds_sym) {
574         *value = lds_sym->offset;
575         return true;
576      }
577
578      /* TODO: resolve from other parts */
579
580      if (u->get_external_symbol(u->binary->gfx_level, u->cb_data, name, value))
581         return true;
582
583      report_errorf("symbol %s: unknown", name);
584      return false;
585   }
586
587   struct ac_rtld_part *part = &u->binary->parts[part_idx];
588   if (sym->st_shndx >= part->num_sections) {
589      report_errorf("symbol %s: section out of bounds", name);
590      return false;
591   }
592
593   struct ac_rtld_section *s = &part->sections[sym->st_shndx];
594   if (!s->is_rx) {
595      report_errorf("symbol %s: bad section", name);
596      return false;
597   }
598
599   uint64_t section_base = u->rx_va + s->offset;
600
601   *value = section_base + sym->st_value;
602   return true;
603}
604
605static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
606                         const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
607{
608#define report_if(cond)                                                                            \
609   do {                                                                                            \
610      if ((cond)) {                                                                                \
611         report_errorf(#cond);                                                                     \
612         return false;                                                                             \
613      }                                                                                            \
614   } while (false)
615#define report_elf_if(cond)                                                                        \
616   do {                                                                                            \
617      if ((cond)) {                                                                                \
618         report_elf_errorf(#cond);                                                                 \
619         return false;                                                                             \
620      }                                                                                            \
621   } while (false)
622
623   struct ac_rtld_part *part = &u->binary->parts[part_idx];
624   Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
625   report_elf_if(!target_scn);
626
627   Elf_Data *target_data = elf_getdata(target_scn, NULL);
628   report_elf_if(!target_data);
629
630   Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
631   report_elf_if(!symbols_scn);
632
633   Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
634   report_elf_if(!symbols_shdr);
635   uint32_t strtabidx = symbols_shdr->sh_link;
636
637   Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
638   report_elf_if(!symbols_data);
639
640   const Elf64_Sym *symbols = symbols_data->d_buf;
641   size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
642
643   struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
644   report_if(!s->is_rx);
645
646   const char *orig_base = target_data->d_buf;
647   char *dst_base = u->rx_ptr + s->offset;
648   uint64_t va_base = u->rx_va + s->offset;
649
650   Elf64_Rel *rel = reloc_data->d_buf;
651   size_t num_relocs = reloc_data->d_size / sizeof(*rel);
652   for (size_t i = 0; i < num_relocs; ++i, ++rel) {
653      size_t r_sym = ELF64_R_SYM(rel->r_info);
654      unsigned r_type = ELF64_R_TYPE(rel->r_info);
655
656      const char *orig_ptr = orig_base + rel->r_offset;
657      char *dst_ptr = dst_base + rel->r_offset;
658      uint64_t va = va_base + rel->r_offset;
659
660      uint64_t symbol;
661      uint64_t addend;
662
663      if (r_sym == STN_UNDEF) {
664         symbol = 0;
665      } else {
666         report_elf_if(r_sym >= num_symbols);
667
668         const Elf64_Sym *sym = &symbols[r_sym];
669         const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
670         report_elf_if(!symbol_name);
671
672         if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
673            return false;
674      }
675
676      /* TODO: Should we also support .rela sections, where the
677       * addend is part of the relocation record? */
678
679      /* Load the addend from the ELF instead of the destination,
680       * because the destination may be in VRAM. */
681      switch (r_type) {
682      case R_AMDGPU_ABS32:
683      case R_AMDGPU_ABS32_LO:
684      case R_AMDGPU_ABS32_HI:
685      case R_AMDGPU_REL32:
686      case R_AMDGPU_REL32_LO:
687      case R_AMDGPU_REL32_HI:
688         addend = *(const uint32_t *)orig_ptr;
689         break;
690      case R_AMDGPU_ABS64:
691      case R_AMDGPU_REL64:
692         addend = *(const uint64_t *)orig_ptr;
693         break;
694      default:
695         report_errorf("unsupported r_type == %u", r_type);
696         return false;
697      }
698
699      uint64_t abs = symbol + addend;
700
701      switch (r_type) {
702      case R_AMDGPU_ABS32:
703         assert((uint32_t)abs == abs);
704         FALLTHROUGH;
705      case R_AMDGPU_ABS32_LO:
706         *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
707         break;
708      case R_AMDGPU_ABS32_HI:
709         *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
710         break;
711      case R_AMDGPU_ABS64:
712         *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
713         break;
714      case R_AMDGPU_REL32:
715         assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
716         FALLTHROUGH;
717      case R_AMDGPU_REL32_LO:
718         *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
719         break;
720      case R_AMDGPU_REL32_HI:
721         *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
722         break;
723      case R_AMDGPU_REL64:
724         *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
725         break;
726      default:
727         unreachable("bad r_type");
728      }
729   }
730
731   return true;
732
733#undef report_if
734#undef report_elf_if
735}
736
737/**
738 * Upload the binary or binaries to the provided GPU buffers, including
739 * relocations.
740 */
741int ac_rtld_upload(struct ac_rtld_upload_info *u)
742{
743#define report_if(cond)                                                                            \
744   do {                                                                                            \
745      if ((cond)) {                                                                                \
746         report_errorf(#cond);                                                                     \
747         return -1;                                                                             \
748      }                                                                                            \
749   } while (false)
750#define report_elf_if(cond)                                                                        \
751   do {                                                                                            \
752      if ((cond)) {                                                                                \
753         report_errorf(#cond);                                                                     \
754         return -1;                                                                             \
755      }                                                                                            \
756   } while (false)
757
758   int size = 0;
759   if (u->binary->options.halt_at_entry) {
760      /* s_sethalt 1 */
761      *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
762   }
763
764   /* First pass: upload raw section data and lay out private LDS symbols. */
765   for (unsigned i = 0; i < u->binary->num_parts; ++i) {
766      struct ac_rtld_part *part = &u->binary->parts[i];
767
768      Elf_Scn *section = NULL;
769      while ((section = elf_nextscn(part->elf, section))) {
770         Elf64_Shdr *shdr = elf64_getshdr(section);
771         struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
772
773         if (!s->is_rx)
774            continue;
775
776         report_if(shdr->sh_type != SHT_PROGBITS);
777
778         Elf_Data *data = elf_getdata(section, NULL);
779         report_elf_if(!data || data->d_size != shdr->sh_size);
780         memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
781
782         size = MAX2(size, s->offset + shdr->sh_size);
783      }
784   }
785
786   if (u->binary->rx_end_markers) {
787      uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
788      for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
789         *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
790      size += 4 * DEBUGGER_NUM_MARKERS;
791   }
792
793   /* Second pass: handle relocations, overwriting uploaded data where
794    * appropriate. */
795   for (unsigned i = 0; i < u->binary->num_parts; ++i) {
796      struct ac_rtld_part *part = &u->binary->parts[i];
797      Elf_Scn *section = NULL;
798      while ((section = elf_nextscn(part->elf, section))) {
799         Elf64_Shdr *shdr = elf64_getshdr(section);
800         if (shdr->sh_type == SHT_REL) {
801            Elf_Data *relocs = elf_getdata(section, NULL);
802            report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
803            if (!apply_relocs(u, i, shdr, relocs))
804               return -1;
805         } else if (shdr->sh_type == SHT_RELA) {
806            report_errorf("SHT_RELA not supported");
807            return -1;
808         }
809      }
810   }
811
812   return size;
813
814#undef report_if
815#undef report_elf_if
816}
817