1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2014-2019 Advanced Micro Devices, Inc.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "ac_rtld.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "ac_binary.h"
27bf215546Sopenharmony_ci#include "ac_gpu_info.h"
28bf215546Sopenharmony_ci#include "util/compiler.h"
29bf215546Sopenharmony_ci#include "util/u_dynarray.h"
30bf215546Sopenharmony_ci#include "util/u_math.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_ci#include <gelf.h>
33bf215546Sopenharmony_ci#include <libelf.h>
34bf215546Sopenharmony_ci#include <stdarg.h>
35bf215546Sopenharmony_ci#include <stdio.h>
36bf215546Sopenharmony_ci#include <stdlib.h>
37bf215546Sopenharmony_ci#include <string.h>
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ci#ifndef EM_AMDGPU
40bf215546Sopenharmony_ci// Old distributions may not have this enum constant
41bf215546Sopenharmony_ci#define EM_AMDGPU 224
42bf215546Sopenharmony_ci#endif
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci#ifndef STT_AMDGPU_LDS
45bf215546Sopenharmony_ci#define STT_AMDGPU_LDS 13 // this is deprecated -- remove
46bf215546Sopenharmony_ci#endif
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci#ifndef SHN_AMDGPU_LDS
49bf215546Sopenharmony_ci#define SHN_AMDGPU_LDS 0xff00
50bf215546Sopenharmony_ci#endif
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci#ifndef R_AMDGPU_NONE
53bf215546Sopenharmony_ci#define R_AMDGPU_NONE          0
54bf215546Sopenharmony_ci#define R_AMDGPU_ABS32_LO      1
55bf215546Sopenharmony_ci#define R_AMDGPU_ABS32_HI      2
56bf215546Sopenharmony_ci#define R_AMDGPU_ABS64         3
57bf215546Sopenharmony_ci#define R_AMDGPU_REL32         4
58bf215546Sopenharmony_ci#define R_AMDGPU_REL64         5
59bf215546Sopenharmony_ci#define R_AMDGPU_ABS32         6
60bf215546Sopenharmony_ci#define R_AMDGPU_GOTPCREL      7
61bf215546Sopenharmony_ci#define R_AMDGPU_GOTPCREL32_LO 8
62bf215546Sopenharmony_ci#define R_AMDGPU_GOTPCREL32_HI 9
63bf215546Sopenharmony_ci#define R_AMDGPU_REL32_LO      10
64bf215546Sopenharmony_ci#define R_AMDGPU_REL32_HI      11
65bf215546Sopenharmony_ci#define R_AMDGPU_RELATIVE64    13
66bf215546Sopenharmony_ci#endif
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_ci/* For the UMR disassembler. */
69bf215546Sopenharmony_ci#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */
70bf215546Sopenharmony_ci#define DEBUGGER_NUM_MARKERS        5
71bf215546Sopenharmony_ci
72bf215546Sopenharmony_cistruct ac_rtld_section {
73bf215546Sopenharmony_ci   bool is_rx : 1;
74bf215546Sopenharmony_ci   bool is_pasted_text : 1;
75bf215546Sopenharmony_ci   uint64_t offset;
76bf215546Sopenharmony_ci   const char *name;
77bf215546Sopenharmony_ci};
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_cistruct ac_rtld_part {
80bf215546Sopenharmony_ci   Elf *elf;
81bf215546Sopenharmony_ci   struct ac_rtld_section *sections;
82bf215546Sopenharmony_ci   unsigned num_sections;
83bf215546Sopenharmony_ci};
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_cistatic void report_errorvf(const char *fmt, va_list va)
86bf215546Sopenharmony_ci{
87bf215546Sopenharmony_ci   fprintf(stderr, "ac_rtld error: ");
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci   vfprintf(stderr, fmt, va);
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   fprintf(stderr, "\n");
92bf215546Sopenharmony_ci}
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_cistatic void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_cistatic void report_errorf(const char *fmt, ...)
97bf215546Sopenharmony_ci{
98bf215546Sopenharmony_ci   va_list va;
99bf215546Sopenharmony_ci   va_start(va, fmt);
100bf215546Sopenharmony_ci   report_errorvf(fmt, va);
101bf215546Sopenharmony_ci   va_end(va);
102bf215546Sopenharmony_ci}
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_cistatic void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_cistatic void report_elf_errorf(const char *fmt, ...)
107bf215546Sopenharmony_ci{
108bf215546Sopenharmony_ci   va_list va;
109bf215546Sopenharmony_ci   va_start(va, fmt);
110bf215546Sopenharmony_ci   report_errorvf(fmt, va);
111bf215546Sopenharmony_ci   va_end(va);
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
114bf215546Sopenharmony_ci}
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci/**
117bf215546Sopenharmony_ci * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader
118bf215546Sopenharmony_ci * \p part_idx.
119bf215546Sopenharmony_ci */
120bf215546Sopenharmony_cistatic const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols,
121bf215546Sopenharmony_ci                                                const char *name, unsigned part_idx)
122bf215546Sopenharmony_ci{
123bf215546Sopenharmony_ci   util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) {
124bf215546Sopenharmony_ci      if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name))
125bf215546Sopenharmony_ci         return symbol;
126bf215546Sopenharmony_ci   }
127bf215546Sopenharmony_ci   return NULL;
128bf215546Sopenharmony_ci}
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_cistatic int compare_symbol_by_align(const void *lhsp, const void *rhsp)
131bf215546Sopenharmony_ci{
132bf215546Sopenharmony_ci   const struct ac_rtld_symbol *lhs = lhsp;
133bf215546Sopenharmony_ci   const struct ac_rtld_symbol *rhs = rhsp;
134bf215546Sopenharmony_ci   if (rhs->align > lhs->align)
135bf215546Sopenharmony_ci      return 1;
136bf215546Sopenharmony_ci   if (rhs->align < lhs->align)
137bf215546Sopenharmony_ci      return -1;
138bf215546Sopenharmony_ci   return 0;
139bf215546Sopenharmony_ci}
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci/**
142bf215546Sopenharmony_ci * Sort the given symbol list by decreasing alignment and assign offsets.
143bf215546Sopenharmony_ci */
144bf215546Sopenharmony_cistatic bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols,
145bf215546Sopenharmony_ci                           uint64_t *ptotal_size)
146bf215546Sopenharmony_ci{
147bf215546Sopenharmony_ci   qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align);
148bf215546Sopenharmony_ci
149bf215546Sopenharmony_ci   uint64_t total_size = *ptotal_size;
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci   for (unsigned i = 0; i < num_symbols; ++i) {
152bf215546Sopenharmony_ci      struct ac_rtld_symbol *s = &symbols[i];
153bf215546Sopenharmony_ci      assert(util_is_power_of_two_nonzero(s->align));
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci      total_size = align64(total_size, s->align);
156bf215546Sopenharmony_ci      s->offset = total_size;
157bf215546Sopenharmony_ci
158bf215546Sopenharmony_ci      if (total_size + s->size < total_size) {
159bf215546Sopenharmony_ci         report_errorf("%s: size overflow", __FUNCTION__);
160bf215546Sopenharmony_ci         return false;
161bf215546Sopenharmony_ci      }
162bf215546Sopenharmony_ci
163bf215546Sopenharmony_ci      total_size += s->size;
164bf215546Sopenharmony_ci   }
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci   *ptotal_size = total_size;
167bf215546Sopenharmony_ci   return true;
168bf215546Sopenharmony_ci}
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci/**
171bf215546Sopenharmony_ci * Read LDS symbols from the given \p section of the ELF of \p part and append
172bf215546Sopenharmony_ci * them to the LDS symbols list.
173bf215546Sopenharmony_ci *
174bf215546Sopenharmony_ci * Shared LDS symbols are filtered out.
175bf215546Sopenharmony_ci */
176bf215546Sopenharmony_cistatic bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx,
177bf215546Sopenharmony_ci                                     Elf_Scn *section, uint32_t *lds_end_align)
178bf215546Sopenharmony_ci{
179bf215546Sopenharmony_ci#define report_if(cond)                                                                            \
180bf215546Sopenharmony_ci   do {                                                                                            \
181bf215546Sopenharmony_ci      if ((cond)) {                                                                                \
182bf215546Sopenharmony_ci         report_errorf(#cond);                                                                     \
183bf215546Sopenharmony_ci         return false;                                                                             \
184bf215546Sopenharmony_ci      }                                                                                            \
185bf215546Sopenharmony_ci   } while (false)
186bf215546Sopenharmony_ci#define report_elf_if(cond)                                                                        \
187bf215546Sopenharmony_ci   do {                                                                                            \
188bf215546Sopenharmony_ci      if ((cond)) {                                                                                \
189bf215546Sopenharmony_ci         report_elf_errorf(#cond);                                                                 \
190bf215546Sopenharmony_ci         return false;                                                                             \
191bf215546Sopenharmony_ci      }                                                                                            \
192bf215546Sopenharmony_ci   } while (false)
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci   struct ac_rtld_part *part = &binary->parts[part_idx];
195bf215546Sopenharmony_ci   Elf64_Shdr *shdr = elf64_getshdr(section);
196bf215546Sopenharmony_ci   uint32_t strtabidx = shdr->sh_link;
197bf215546Sopenharmony_ci   Elf_Data *symbols_data = elf_getdata(section, NULL);
198bf215546Sopenharmony_ci   report_elf_if(!symbols_data);
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   const Elf64_Sym *symbol = symbols_data->d_buf;
201bf215546Sopenharmony_ci   size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci   for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
204bf215546Sopenharmony_ci      struct ac_rtld_symbol s = {0};
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci      if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) {
207bf215546Sopenharmony_ci         /* old-style LDS symbols from initial prototype -- remove eventually */
208bf215546Sopenharmony_ci         s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
209bf215546Sopenharmony_ci      } else if (symbol->st_shndx == SHN_AMDGPU_LDS) {
210bf215546Sopenharmony_ci         s.align = MIN2(symbol->st_value, 1u << 16);
211bf215546Sopenharmony_ci         report_if(!util_is_power_of_two_nonzero(s.align));
212bf215546Sopenharmony_ci      } else
213bf215546Sopenharmony_ci         continue;
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci      report_if(symbol->st_size > 1u << 29);
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci      s.name = elf_strptr(part->elf, strtabidx, symbol->st_name);
218bf215546Sopenharmony_ci      s.size = symbol->st_size;
219bf215546Sopenharmony_ci      s.part_idx = part_idx;
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci      if (!strcmp(s.name, "__lds_end")) {
222bf215546Sopenharmony_ci         report_elf_if(s.size != 0);
223bf215546Sopenharmony_ci         *lds_end_align = MAX2(*lds_end_align, s.align);
224bf215546Sopenharmony_ci         continue;
225bf215546Sopenharmony_ci      }
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci      const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx);
228bf215546Sopenharmony_ci      if (shared) {
229bf215546Sopenharmony_ci         report_elf_if(s.align > shared->align);
230bf215546Sopenharmony_ci         report_elf_if(s.size > shared->size);
231bf215546Sopenharmony_ci         continue;
232bf215546Sopenharmony_ci      }
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci      util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s);
235bf215546Sopenharmony_ci   }
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   return true;
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci#undef report_if
240bf215546Sopenharmony_ci#undef report_elf_if
241bf215546Sopenharmony_ci}
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci/**
244bf215546Sopenharmony_ci * Open a binary consisting of one or more shader parts.
245bf215546Sopenharmony_ci *
246bf215546Sopenharmony_ci * \param binary the uninitialized struct
247bf215546Sopenharmony_ci * \param i binary opening parameters
248bf215546Sopenharmony_ci */
249bf215546Sopenharmony_cibool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i)
250bf215546Sopenharmony_ci{
251bf215546Sopenharmony_ci   /* One of the libelf implementations
252bf215546Sopenharmony_ci    * (http://www.mr511.de/software/english.htm) requires calling
253bf215546Sopenharmony_ci    * elf_version() before elf_memory().
254bf215546Sopenharmony_ci    */
255bf215546Sopenharmony_ci   elf_version(EV_CURRENT);
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ci   memset(binary, 0, sizeof(*binary));
258bf215546Sopenharmony_ci   memcpy(&binary->options, &i.options, sizeof(binary->options));
259bf215546Sopenharmony_ci   binary->wave_size = i.wave_size;
260bf215546Sopenharmony_ci   binary->gfx_level = i.info->gfx_level;
261bf215546Sopenharmony_ci   binary->num_parts = i.num_parts;
262bf215546Sopenharmony_ci   binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
263bf215546Sopenharmony_ci   if (!binary->parts)
264bf215546Sopenharmony_ci      return false;
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_ci   uint64_t pasted_text_size = 0;
267bf215546Sopenharmony_ci   uint64_t rx_align = 1;
268bf215546Sopenharmony_ci   uint64_t rx_size = 0;
269bf215546Sopenharmony_ci   uint64_t exec_size = 0;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci#define report_if(cond)                                                                            \
272bf215546Sopenharmony_ci   do {                                                                                            \
273bf215546Sopenharmony_ci      if ((cond)) {                                                                                \
274bf215546Sopenharmony_ci         report_errorf(#cond);                                                                     \
275bf215546Sopenharmony_ci         goto fail;                                                                                \
276bf215546Sopenharmony_ci      }                                                                                            \
277bf215546Sopenharmony_ci   } while (false)
278bf215546Sopenharmony_ci#define report_elf_if(cond)                                                                        \
279bf215546Sopenharmony_ci   do {                                                                                            \
280bf215546Sopenharmony_ci      if ((cond)) {                                                                                \
281bf215546Sopenharmony_ci         report_elf_errorf(#cond);                                                                 \
282bf215546Sopenharmony_ci         goto fail;                                                                                \
283bf215546Sopenharmony_ci      }                                                                                            \
284bf215546Sopenharmony_ci   } while (false)
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci   /* Copy and layout shared LDS symbols. */
287bf215546Sopenharmony_ci   if (i.num_shared_lds_symbols) {
288bf215546Sopenharmony_ci      if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
289bf215546Sopenharmony_ci                                i.num_shared_lds_symbols))
290bf215546Sopenharmony_ci         goto fail;
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci      memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size);
293bf215546Sopenharmony_ci   }
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ci   util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol)
296bf215546Sopenharmony_ci      symbol->part_idx = ~0u;
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci   unsigned max_lds_size = 64 * 1024;
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_ci   if (i.info->gfx_level == GFX6 ||
301bf215546Sopenharmony_ci       (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT))
302bf215546Sopenharmony_ci      max_lds_size = 32 * 1024;
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci   uint64_t shared_lds_size = 0;
305bf215546Sopenharmony_ci   if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size))
306bf215546Sopenharmony_ci      goto fail;
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci   if (shared_lds_size > max_lds_size) {
309bf215546Sopenharmony_ci      fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n",
310bf215546Sopenharmony_ci              (unsigned)shared_lds_size, max_lds_size);
311bf215546Sopenharmony_ci      goto fail;
312bf215546Sopenharmony_ci   }
313bf215546Sopenharmony_ci   binary->lds_size = shared_lds_size;
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   /* First pass over all parts: open ELFs, pre-determine the placement of
316bf215546Sopenharmony_ci    * sections in the memory image, and collect and layout private LDS symbols. */
317bf215546Sopenharmony_ci   uint32_t lds_end_align = 0;
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_ci   if (binary->options.halt_at_entry)
320bf215546Sopenharmony_ci      pasted_text_size += 4;
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_ci   for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
323bf215546Sopenharmony_ci      struct ac_rtld_part *part = &binary->parts[part_idx];
324bf215546Sopenharmony_ci      unsigned part_lds_symbols_begin =
325bf215546Sopenharmony_ci         util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol);
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_ci      part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]);
328bf215546Sopenharmony_ci      report_elf_if(!part->elf);
329bf215546Sopenharmony_ci
330bf215546Sopenharmony_ci      const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);
331bf215546Sopenharmony_ci      report_elf_if(!ehdr);
332bf215546Sopenharmony_ci      report_if(ehdr->e_machine != EM_AMDGPU);
333bf215546Sopenharmony_ci
334bf215546Sopenharmony_ci      size_t section_str_index;
335bf215546Sopenharmony_ci      size_t num_shdrs;
336bf215546Sopenharmony_ci      report_elf_if(elf_getshdrstrndx(part->elf, &section_str_index) < 0);
337bf215546Sopenharmony_ci      report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci      part->num_sections = num_shdrs;
340bf215546Sopenharmony_ci      part->sections = calloc(sizeof(*part->sections), num_shdrs);
341bf215546Sopenharmony_ci      report_if(!part->sections);
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_ci      Elf_Scn *section = NULL;
344bf215546Sopenharmony_ci      while ((section = elf_nextscn(part->elf, section))) {
345bf215546Sopenharmony_ci         Elf64_Shdr *shdr = elf64_getshdr(section);
346bf215546Sopenharmony_ci         struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
347bf215546Sopenharmony_ci         s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);
348bf215546Sopenharmony_ci         report_elf_if(!s->name);
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci         /* Cannot actually handle linked objects yet */
351bf215546Sopenharmony_ci         report_elf_if(shdr->sh_addr != 0);
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci         /* Alignment must be 0 or a power of two */
354bf215546Sopenharmony_ci         report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));
355bf215546Sopenharmony_ci         uint64_t sh_align = MAX2(shdr->sh_addralign, 1);
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci         if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) {
358bf215546Sopenharmony_ci            report_if(shdr->sh_flags & SHF_WRITE);
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci            s->is_rx = true;
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci            if (shdr->sh_flags & SHF_EXECINSTR) {
363bf215546Sopenharmony_ci               report_elf_if(shdr->sh_size & 3);
364bf215546Sopenharmony_ci
365bf215546Sopenharmony_ci               if (!strcmp(s->name, ".text"))
366bf215546Sopenharmony_ci                  s->is_pasted_text = true;
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci               exec_size += shdr->sh_size;
369bf215546Sopenharmony_ci            }
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ci            if (s->is_pasted_text) {
372bf215546Sopenharmony_ci               s->offset = pasted_text_size;
373bf215546Sopenharmony_ci               pasted_text_size += shdr->sh_size;
374bf215546Sopenharmony_ci            } else {
375bf215546Sopenharmony_ci               rx_align = align(rx_align, sh_align);
376bf215546Sopenharmony_ci               rx_size = align(rx_size, sh_align);
377bf215546Sopenharmony_ci               s->offset = rx_size;
378bf215546Sopenharmony_ci               rx_size += shdr->sh_size;
379bf215546Sopenharmony_ci            }
380bf215546Sopenharmony_ci         } else if (shdr->sh_type == SHT_SYMTAB) {
381bf215546Sopenharmony_ci            if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align))
382bf215546Sopenharmony_ci               goto fail;
383bf215546Sopenharmony_ci         }
384bf215546Sopenharmony_ci      }
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci      uint64_t part_lds_size = shared_lds_size;
387bf215546Sopenharmony_ci      if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol,
388bf215546Sopenharmony_ci                                                part_lds_symbols_begin),
389bf215546Sopenharmony_ci                          util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) -
390bf215546Sopenharmony_ci                             part_lds_symbols_begin,
391bf215546Sopenharmony_ci                          &part_lds_size))
392bf215546Sopenharmony_ci         goto fail;
393bf215546Sopenharmony_ci      binary->lds_size = MAX2(binary->lds_size, part_lds_size);
394bf215546Sopenharmony_ci   }
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci   binary->rx_end_markers = pasted_text_size;
397bf215546Sopenharmony_ci   pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   /* __lds_end is a special symbol that points at the end of the memory
400bf215546Sopenharmony_ci    * occupied by other LDS symbols. Its alignment is taken as the
401bf215546Sopenharmony_ci    * maximum of its alignment over all shader parts where it occurs.
402bf215546Sopenharmony_ci    */
403bf215546Sopenharmony_ci   if (lds_end_align) {
404bf215546Sopenharmony_ci      binary->lds_size = align(binary->lds_size, lds_end_align);
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci      struct ac_rtld_symbol *lds_end =
407bf215546Sopenharmony_ci         util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1);
408bf215546Sopenharmony_ci      lds_end->name = "__lds_end";
409bf215546Sopenharmony_ci      lds_end->size = 0;
410bf215546Sopenharmony_ci      lds_end->align = lds_end_align;
411bf215546Sopenharmony_ci      lds_end->offset = binary->lds_size;
412bf215546Sopenharmony_ci      lds_end->part_idx = ~0u;
413bf215546Sopenharmony_ci   }
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci   if (binary->lds_size > max_lds_size) {
416bf215546Sopenharmony_ci      fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n",
417bf215546Sopenharmony_ci              (unsigned)binary->lds_size, max_lds_size);
418bf215546Sopenharmony_ci      goto fail;
419bf215546Sopenharmony_ci   }
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci   /* Second pass: Adjust offsets of non-pasted text sections. */
422bf215546Sopenharmony_ci   binary->rx_size = pasted_text_size;
423bf215546Sopenharmony_ci   binary->rx_size = align(binary->rx_size, rx_align);
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) {
426bf215546Sopenharmony_ci      struct ac_rtld_part *part = &binary->parts[part_idx];
427bf215546Sopenharmony_ci      size_t num_shdrs;
428bf215546Sopenharmony_ci      elf_getshdrnum(part->elf, &num_shdrs);
429bf215546Sopenharmony_ci
430bf215546Sopenharmony_ci      for (unsigned j = 0; j < num_shdrs; ++j) {
431bf215546Sopenharmony_ci         struct ac_rtld_section *s = &part->sections[j];
432bf215546Sopenharmony_ci         if (s->is_rx && !s->is_pasted_text)
433bf215546Sopenharmony_ci            s->offset += binary->rx_size;
434bf215546Sopenharmony_ci      }
435bf215546Sopenharmony_ci   }
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   binary->rx_size += rx_size;
438bf215546Sopenharmony_ci   binary->exec_size = exec_size;
439bf215546Sopenharmony_ci
440bf215546Sopenharmony_ci   /* The SQ fetches up to N cache lines of 16 dwords
441bf215546Sopenharmony_ci    * ahead of the PC, configurable by SH_MEM_CONFIG and
442bf215546Sopenharmony_ci    * S_INST_PREFETCH. This can cause two issues:
443bf215546Sopenharmony_ci    *
444bf215546Sopenharmony_ci    * (1) Crossing a page boundary to an unmapped page. The logic
445bf215546Sopenharmony_ci    *     does not distinguish between a required fetch and a "mere"
446bf215546Sopenharmony_ci    *     prefetch and will fault.
447bf215546Sopenharmony_ci    *
448bf215546Sopenharmony_ci    * (2) Prefetching instructions that will be changed for a
449bf215546Sopenharmony_ci    *     different shader.
450bf215546Sopenharmony_ci    *
451bf215546Sopenharmony_ci    * (2) is not currently an issue because we flush the I$ at IB
452bf215546Sopenharmony_ci    * boundaries, but (1) needs to be addressed. Due to buffer
453bf215546Sopenharmony_ci    * suballocation, we just play it safe.
454bf215546Sopenharmony_ci    */
455bf215546Sopenharmony_ci   unsigned prefetch_distance = 0;
456bf215546Sopenharmony_ci
457bf215546Sopenharmony_ci   if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN)
458bf215546Sopenharmony_ci      prefetch_distance = 16;
459bf215546Sopenharmony_ci   else if (i.info->gfx_level >= GFX10)
460bf215546Sopenharmony_ci      prefetch_distance = 3;
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   if (prefetch_distance) {
463bf215546Sopenharmony_ci      if (i.info->gfx_level >= GFX11)
464bf215546Sopenharmony_ci         binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 128);
465bf215546Sopenharmony_ci      else
466bf215546Sopenharmony_ci         binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64);
467bf215546Sopenharmony_ci   }
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_ci   return true;
470bf215546Sopenharmony_ci
471bf215546Sopenharmony_ci#undef report_if
472bf215546Sopenharmony_ci#undef report_elf_if
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_cifail:
475bf215546Sopenharmony_ci   ac_rtld_close(binary);
476bf215546Sopenharmony_ci   return false;
477bf215546Sopenharmony_ci}
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_civoid ac_rtld_close(struct ac_rtld_binary *binary)
480bf215546Sopenharmony_ci{
481bf215546Sopenharmony_ci   for (unsigned i = 0; i < binary->num_parts; ++i) {
482bf215546Sopenharmony_ci      struct ac_rtld_part *part = &binary->parts[i];
483bf215546Sopenharmony_ci      free(part->sections);
484bf215546Sopenharmony_ci      elf_end(part->elf);
485bf215546Sopenharmony_ci   }
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   util_dynarray_fini(&binary->lds_symbols);
488bf215546Sopenharmony_ci   free(binary->parts);
489bf215546Sopenharmony_ci   binary->parts = NULL;
490bf215546Sopenharmony_ci   binary->num_parts = 0;
491bf215546Sopenharmony_ci}
492bf215546Sopenharmony_ci
493bf215546Sopenharmony_cistatic bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data,
494bf215546Sopenharmony_ci                                size_t *nbytes)
495bf215546Sopenharmony_ci{
496bf215546Sopenharmony_ci   for (unsigned i = 0; i < part->num_sections; ++i) {
497bf215546Sopenharmony_ci      struct ac_rtld_section *s = &part->sections[i];
498bf215546Sopenharmony_ci      if (s->name && !strcmp(name, s->name)) {
499bf215546Sopenharmony_ci         Elf_Scn *target_scn = elf_getscn(part->elf, i);
500bf215546Sopenharmony_ci         Elf_Data *target_data = elf_getdata(target_scn, NULL);
501bf215546Sopenharmony_ci         if (!target_data) {
502bf215546Sopenharmony_ci            report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");
503bf215546Sopenharmony_ci            return false;
504bf215546Sopenharmony_ci         }
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci         *data = target_data->d_buf;
507bf215546Sopenharmony_ci         *nbytes = target_data->d_size;
508bf215546Sopenharmony_ci         return true;
509bf215546Sopenharmony_ci      }
510bf215546Sopenharmony_ci   }
511bf215546Sopenharmony_ci   return false;
512bf215546Sopenharmony_ci}
513bf215546Sopenharmony_ci
514bf215546Sopenharmony_cibool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data,
515bf215546Sopenharmony_ci                                 size_t *nbytes)
516bf215546Sopenharmony_ci{
517bf215546Sopenharmony_ci   assert(binary->num_parts == 1);
518bf215546Sopenharmony_ci   return get_section_by_name(&binary->parts[0], name, data, nbytes);
519bf215546Sopenharmony_ci}
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_cibool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary,
522bf215546Sopenharmony_ci                         struct ac_shader_config *config)
523bf215546Sopenharmony_ci{
524bf215546Sopenharmony_ci   for (unsigned i = 0; i < binary->num_parts; ++i) {
525bf215546Sopenharmony_ci      struct ac_rtld_part *part = &binary->parts[i];
526bf215546Sopenharmony_ci      const char *config_data;
527bf215546Sopenharmony_ci      size_t config_nbytes;
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci      if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes))
530bf215546Sopenharmony_ci         return false;
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci      /* TODO: be precise about scratch use? */
533bf215546Sopenharmony_ci      struct ac_shader_config c = {0};
534bf215546Sopenharmony_ci      ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, info, &c);
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci      config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);
537bf215546Sopenharmony_ci      config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);
538bf215546Sopenharmony_ci      config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);
539bf215546Sopenharmony_ci      config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);
540bf215546Sopenharmony_ci      config->scratch_bytes_per_wave =
541bf215546Sopenharmony_ci         MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave);
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci      assert(i == 0 || config->float_mode == c.float_mode);
544bf215546Sopenharmony_ci      config->float_mode = c.float_mode;
545bf215546Sopenharmony_ci
546bf215546Sopenharmony_ci      /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from
547bf215546Sopenharmony_ci       * the main shader part is used. */
548bf215546Sopenharmony_ci      assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0);
549bf215546Sopenharmony_ci      config->spi_ps_input_ena = c.spi_ps_input_ena;
550bf215546Sopenharmony_ci      config->spi_ps_input_addr = c.spi_ps_input_addr;
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci      /* TODO: consistently use LDS symbols for this */
553bf215546Sopenharmony_ci      config->lds_size = MAX2(config->lds_size, c.lds_size);
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci      /* TODO: Should we combine these somehow? It's currently only
556bf215546Sopenharmony_ci       * used for radeonsi's compute, where multiple parts aren't used. */
557bf215546Sopenharmony_ci      assert(config->rsrc1 == 0 && config->rsrc2 == 0);
558bf215546Sopenharmony_ci      config->rsrc1 = c.rsrc1;
559bf215546Sopenharmony_ci      config->rsrc2 = c.rsrc2;
560bf215546Sopenharmony_ci   }
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci   return true;
563bf215546Sopenharmony_ci}
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_cistatic bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx,
566bf215546Sopenharmony_ci                           const Elf64_Sym *sym, const char *name, uint64_t *value)
567bf215546Sopenharmony_ci{
568bf215546Sopenharmony_ci   /* TODO: properly disentangle the undef and the LDS cases once
569bf215546Sopenharmony_ci    * STT_AMDGPU_LDS is retired. */
570bf215546Sopenharmony_ci   if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) {
571bf215546Sopenharmony_ci      const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx);
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_ci      if (lds_sym) {
574bf215546Sopenharmony_ci         *value = lds_sym->offset;
575bf215546Sopenharmony_ci         return true;
576bf215546Sopenharmony_ci      }
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci      /* TODO: resolve from other parts */
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci      if (u->get_external_symbol(u->binary->gfx_level, u->cb_data, name, value))
581bf215546Sopenharmony_ci         return true;
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_ci      report_errorf("symbol %s: unknown", name);
584bf215546Sopenharmony_ci      return false;
585bf215546Sopenharmony_ci   }
586bf215546Sopenharmony_ci
587bf215546Sopenharmony_ci   struct ac_rtld_part *part = &u->binary->parts[part_idx];
588bf215546Sopenharmony_ci   if (sym->st_shndx >= part->num_sections) {
589bf215546Sopenharmony_ci      report_errorf("symbol %s: section out of bounds", name);
590bf215546Sopenharmony_ci      return false;
591bf215546Sopenharmony_ci   }
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci   struct ac_rtld_section *s = &part->sections[sym->st_shndx];
594bf215546Sopenharmony_ci   if (!s->is_rx) {
595bf215546Sopenharmony_ci      report_errorf("symbol %s: bad section", name);
596bf215546Sopenharmony_ci      return false;
597bf215546Sopenharmony_ci   }
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci   uint64_t section_base = u->rx_va + s->offset;
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci   *value = section_base + sym->st_value;
602bf215546Sopenharmony_ci   return true;
603bf215546Sopenharmony_ci}
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_cistatic bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx,
606bf215546Sopenharmony_ci                         const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data)
607bf215546Sopenharmony_ci{
608bf215546Sopenharmony_ci#define report_if(cond)                                                                            \
609bf215546Sopenharmony_ci   do {                                                                                            \
610bf215546Sopenharmony_ci      if ((cond)) {                                                                                \
611bf215546Sopenharmony_ci         report_errorf(#cond);                                                                     \
612bf215546Sopenharmony_ci         return false;                                                                             \
613bf215546Sopenharmony_ci      }                                                                                            \
614bf215546Sopenharmony_ci   } while (false)
615bf215546Sopenharmony_ci#define report_elf_if(cond)                                                                        \
616bf215546Sopenharmony_ci   do {                                                                                            \
617bf215546Sopenharmony_ci      if ((cond)) {                                                                                \
618bf215546Sopenharmony_ci         report_elf_errorf(#cond);                                                                 \
619bf215546Sopenharmony_ci         return false;                                                                             \
620bf215546Sopenharmony_ci      }                                                                                            \
621bf215546Sopenharmony_ci   } while (false)
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci   struct ac_rtld_part *part = &u->binary->parts[part_idx];
624bf215546Sopenharmony_ci   Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info);
625bf215546Sopenharmony_ci   report_elf_if(!target_scn);
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_ci   Elf_Data *target_data = elf_getdata(target_scn, NULL);
628bf215546Sopenharmony_ci   report_elf_if(!target_data);
629bf215546Sopenharmony_ci
630bf215546Sopenharmony_ci   Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link);
631bf215546Sopenharmony_ci   report_elf_if(!symbols_scn);
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci   Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn);
634bf215546Sopenharmony_ci   report_elf_if(!symbols_shdr);
635bf215546Sopenharmony_ci   uint32_t strtabidx = symbols_shdr->sh_link;
636bf215546Sopenharmony_ci
637bf215546Sopenharmony_ci   Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL);
638bf215546Sopenharmony_ci   report_elf_if(!symbols_data);
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci   const Elf64_Sym *symbols = symbols_data->d_buf;
641bf215546Sopenharmony_ci   size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci   struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info];
644bf215546Sopenharmony_ci   report_if(!s->is_rx);
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci   const char *orig_base = target_data->d_buf;
647bf215546Sopenharmony_ci   char *dst_base = u->rx_ptr + s->offset;
648bf215546Sopenharmony_ci   uint64_t va_base = u->rx_va + s->offset;
649bf215546Sopenharmony_ci
650bf215546Sopenharmony_ci   Elf64_Rel *rel = reloc_data->d_buf;
651bf215546Sopenharmony_ci   size_t num_relocs = reloc_data->d_size / sizeof(*rel);
652bf215546Sopenharmony_ci   for (size_t i = 0; i < num_relocs; ++i, ++rel) {
653bf215546Sopenharmony_ci      size_t r_sym = ELF64_R_SYM(rel->r_info);
654bf215546Sopenharmony_ci      unsigned r_type = ELF64_R_TYPE(rel->r_info);
655bf215546Sopenharmony_ci
656bf215546Sopenharmony_ci      const char *orig_ptr = orig_base + rel->r_offset;
657bf215546Sopenharmony_ci      char *dst_ptr = dst_base + rel->r_offset;
658bf215546Sopenharmony_ci      uint64_t va = va_base + rel->r_offset;
659bf215546Sopenharmony_ci
660bf215546Sopenharmony_ci      uint64_t symbol;
661bf215546Sopenharmony_ci      uint64_t addend;
662bf215546Sopenharmony_ci
663bf215546Sopenharmony_ci      if (r_sym == STN_UNDEF) {
664bf215546Sopenharmony_ci         symbol = 0;
665bf215546Sopenharmony_ci      } else {
666bf215546Sopenharmony_ci         report_elf_if(r_sym >= num_symbols);
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci         const Elf64_Sym *sym = &symbols[r_sym];
669bf215546Sopenharmony_ci         const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name);
670bf215546Sopenharmony_ci         report_elf_if(!symbol_name);
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci         if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol))
673bf215546Sopenharmony_ci            return false;
674bf215546Sopenharmony_ci      }
675bf215546Sopenharmony_ci
676bf215546Sopenharmony_ci      /* TODO: Should we also support .rela sections, where the
677bf215546Sopenharmony_ci       * addend is part of the relocation record? */
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci      /* Load the addend from the ELF instead of the destination,
680bf215546Sopenharmony_ci       * because the destination may be in VRAM. */
681bf215546Sopenharmony_ci      switch (r_type) {
682bf215546Sopenharmony_ci      case R_AMDGPU_ABS32:
683bf215546Sopenharmony_ci      case R_AMDGPU_ABS32_LO:
684bf215546Sopenharmony_ci      case R_AMDGPU_ABS32_HI:
685bf215546Sopenharmony_ci      case R_AMDGPU_REL32:
686bf215546Sopenharmony_ci      case R_AMDGPU_REL32_LO:
687bf215546Sopenharmony_ci      case R_AMDGPU_REL32_HI:
688bf215546Sopenharmony_ci         addend = *(const uint32_t *)orig_ptr;
689bf215546Sopenharmony_ci         break;
690bf215546Sopenharmony_ci      case R_AMDGPU_ABS64:
691bf215546Sopenharmony_ci      case R_AMDGPU_REL64:
692bf215546Sopenharmony_ci         addend = *(const uint64_t *)orig_ptr;
693bf215546Sopenharmony_ci         break;
694bf215546Sopenharmony_ci      default:
695bf215546Sopenharmony_ci         report_errorf("unsupported r_type == %u", r_type);
696bf215546Sopenharmony_ci         return false;
697bf215546Sopenharmony_ci      }
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci      uint64_t abs = symbol + addend;
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci      switch (r_type) {
702bf215546Sopenharmony_ci      case R_AMDGPU_ABS32:
703bf215546Sopenharmony_ci         assert((uint32_t)abs == abs);
704bf215546Sopenharmony_ci         FALLTHROUGH;
705bf215546Sopenharmony_ci      case R_AMDGPU_ABS32_LO:
706bf215546Sopenharmony_ci         *(uint32_t *)dst_ptr = util_cpu_to_le32(abs);
707bf215546Sopenharmony_ci         break;
708bf215546Sopenharmony_ci      case R_AMDGPU_ABS32_HI:
709bf215546Sopenharmony_ci         *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32);
710bf215546Sopenharmony_ci         break;
711bf215546Sopenharmony_ci      case R_AMDGPU_ABS64:
712bf215546Sopenharmony_ci         *(uint64_t *)dst_ptr = util_cpu_to_le64(abs);
713bf215546Sopenharmony_ci         break;
714bf215546Sopenharmony_ci      case R_AMDGPU_REL32:
715bf215546Sopenharmony_ci         assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va));
716bf215546Sopenharmony_ci         FALLTHROUGH;
717bf215546Sopenharmony_ci      case R_AMDGPU_REL32_LO:
718bf215546Sopenharmony_ci         *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va);
719bf215546Sopenharmony_ci         break;
720bf215546Sopenharmony_ci      case R_AMDGPU_REL32_HI:
721bf215546Sopenharmony_ci         *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32);
722bf215546Sopenharmony_ci         break;
723bf215546Sopenharmony_ci      case R_AMDGPU_REL64:
724bf215546Sopenharmony_ci         *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va);
725bf215546Sopenharmony_ci         break;
726bf215546Sopenharmony_ci      default:
727bf215546Sopenharmony_ci         unreachable("bad r_type");
728bf215546Sopenharmony_ci      }
729bf215546Sopenharmony_ci   }
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci   return true;
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci#undef report_if
734bf215546Sopenharmony_ci#undef report_elf_if
735bf215546Sopenharmony_ci}
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci/**
738bf215546Sopenharmony_ci * Upload the binary or binaries to the provided GPU buffers, including
739bf215546Sopenharmony_ci * relocations.
740bf215546Sopenharmony_ci */
741bf215546Sopenharmony_ciint ac_rtld_upload(struct ac_rtld_upload_info *u)
742bf215546Sopenharmony_ci{
743bf215546Sopenharmony_ci#define report_if(cond)                                                                            \
744bf215546Sopenharmony_ci   do {                                                                                            \
745bf215546Sopenharmony_ci      if ((cond)) {                                                                                \
746bf215546Sopenharmony_ci         report_errorf(#cond);                                                                     \
747bf215546Sopenharmony_ci         return -1;                                                                             \
748bf215546Sopenharmony_ci      }                                                                                            \
749bf215546Sopenharmony_ci   } while (false)
750bf215546Sopenharmony_ci#define report_elf_if(cond)                                                                        \
751bf215546Sopenharmony_ci   do {                                                                                            \
752bf215546Sopenharmony_ci      if ((cond)) {                                                                                \
753bf215546Sopenharmony_ci         report_errorf(#cond);                                                                     \
754bf215546Sopenharmony_ci         return -1;                                                                             \
755bf215546Sopenharmony_ci      }                                                                                            \
756bf215546Sopenharmony_ci   } while (false)
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci   int size = 0;
759bf215546Sopenharmony_ci   if (u->binary->options.halt_at_entry) {
760bf215546Sopenharmony_ci      /* s_sethalt 1 */
761bf215546Sopenharmony_ci      *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001);
762bf215546Sopenharmony_ci   }
763bf215546Sopenharmony_ci
764bf215546Sopenharmony_ci   /* First pass: upload raw section data and lay out private LDS symbols. */
765bf215546Sopenharmony_ci   for (unsigned i = 0; i < u->binary->num_parts; ++i) {
766bf215546Sopenharmony_ci      struct ac_rtld_part *part = &u->binary->parts[i];
767bf215546Sopenharmony_ci
768bf215546Sopenharmony_ci      Elf_Scn *section = NULL;
769bf215546Sopenharmony_ci      while ((section = elf_nextscn(part->elf, section))) {
770bf215546Sopenharmony_ci         Elf64_Shdr *shdr = elf64_getshdr(section);
771bf215546Sopenharmony_ci         struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];
772bf215546Sopenharmony_ci
773bf215546Sopenharmony_ci         if (!s->is_rx)
774bf215546Sopenharmony_ci            continue;
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci         report_if(shdr->sh_type != SHT_PROGBITS);
777bf215546Sopenharmony_ci
778bf215546Sopenharmony_ci         Elf_Data *data = elf_getdata(section, NULL);
779bf215546Sopenharmony_ci         report_elf_if(!data || data->d_size != shdr->sh_size);
780bf215546Sopenharmony_ci         memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size);
781bf215546Sopenharmony_ci
782bf215546Sopenharmony_ci         size = MAX2(size, s->offset + shdr->sh_size);
783bf215546Sopenharmony_ci      }
784bf215546Sopenharmony_ci   }
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci   if (u->binary->rx_end_markers) {
787bf215546Sopenharmony_ci      uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers);
788bf215546Sopenharmony_ci      for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i)
789bf215546Sopenharmony_ci         *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER);
790bf215546Sopenharmony_ci      size += 4 * DEBUGGER_NUM_MARKERS;
791bf215546Sopenharmony_ci   }
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci   /* Second pass: handle relocations, overwriting uploaded data where
794bf215546Sopenharmony_ci    * appropriate. */
795bf215546Sopenharmony_ci   for (unsigned i = 0; i < u->binary->num_parts; ++i) {
796bf215546Sopenharmony_ci      struct ac_rtld_part *part = &u->binary->parts[i];
797bf215546Sopenharmony_ci      Elf_Scn *section = NULL;
798bf215546Sopenharmony_ci      while ((section = elf_nextscn(part->elf, section))) {
799bf215546Sopenharmony_ci         Elf64_Shdr *shdr = elf64_getshdr(section);
800bf215546Sopenharmony_ci         if (shdr->sh_type == SHT_REL) {
801bf215546Sopenharmony_ci            Elf_Data *relocs = elf_getdata(section, NULL);
802bf215546Sopenharmony_ci            report_elf_if(!relocs || relocs->d_size != shdr->sh_size);
803bf215546Sopenharmony_ci            if (!apply_relocs(u, i, shdr, relocs))
804bf215546Sopenharmony_ci               return -1;
805bf215546Sopenharmony_ci         } else if (shdr->sh_type == SHT_RELA) {
806bf215546Sopenharmony_ci            report_errorf("SHT_RELA not supported");
807bf215546Sopenharmony_ci            return -1;
808bf215546Sopenharmony_ci         }
809bf215546Sopenharmony_ci      }
810bf215546Sopenharmony_ci   }
811bf215546Sopenharmony_ci
812bf215546Sopenharmony_ci   return size;
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci#undef report_if
815bf215546Sopenharmony_ci#undef report_elf_if
816bf215546Sopenharmony_ci}
817