1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2014-2019 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "ac_rtld.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "ac_binary.h" 27bf215546Sopenharmony_ci#include "ac_gpu_info.h" 28bf215546Sopenharmony_ci#include "util/compiler.h" 29bf215546Sopenharmony_ci#include "util/u_dynarray.h" 30bf215546Sopenharmony_ci#include "util/u_math.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci#include <gelf.h> 33bf215546Sopenharmony_ci#include <libelf.h> 34bf215546Sopenharmony_ci#include <stdarg.h> 35bf215546Sopenharmony_ci#include <stdio.h> 36bf215546Sopenharmony_ci#include <stdlib.h> 37bf215546Sopenharmony_ci#include <string.h> 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci#ifndef EM_AMDGPU 40bf215546Sopenharmony_ci// Old distributions may not have this enum constant 41bf215546Sopenharmony_ci#define EM_AMDGPU 224 42bf215546Sopenharmony_ci#endif 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci#ifndef STT_AMDGPU_LDS 45bf215546Sopenharmony_ci#define STT_AMDGPU_LDS 13 // this is deprecated -- remove 46bf215546Sopenharmony_ci#endif 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci#ifndef SHN_AMDGPU_LDS 49bf215546Sopenharmony_ci#define SHN_AMDGPU_LDS 0xff00 50bf215546Sopenharmony_ci#endif 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci#ifndef R_AMDGPU_NONE 53bf215546Sopenharmony_ci#define R_AMDGPU_NONE 0 54bf215546Sopenharmony_ci#define R_AMDGPU_ABS32_LO 1 55bf215546Sopenharmony_ci#define R_AMDGPU_ABS32_HI 2 56bf215546Sopenharmony_ci#define R_AMDGPU_ABS64 3 57bf215546Sopenharmony_ci#define R_AMDGPU_REL32 4 58bf215546Sopenharmony_ci#define R_AMDGPU_REL64 5 59bf215546Sopenharmony_ci#define R_AMDGPU_ABS32 6 60bf215546Sopenharmony_ci#define R_AMDGPU_GOTPCREL 7 61bf215546Sopenharmony_ci#define R_AMDGPU_GOTPCREL32_LO 8 62bf215546Sopenharmony_ci#define R_AMDGPU_GOTPCREL32_HI 9 63bf215546Sopenharmony_ci#define R_AMDGPU_REL32_LO 10 64bf215546Sopenharmony_ci#define R_AMDGPU_REL32_HI 11 65bf215546Sopenharmony_ci#define R_AMDGPU_RELATIVE64 13 66bf215546Sopenharmony_ci#endif 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci/* For the UMR disassembler. */ 69bf215546Sopenharmony_ci#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */ 70bf215546Sopenharmony_ci#define DEBUGGER_NUM_MARKERS 5 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_cistruct ac_rtld_section { 73bf215546Sopenharmony_ci bool is_rx : 1; 74bf215546Sopenharmony_ci bool is_pasted_text : 1; 75bf215546Sopenharmony_ci uint64_t offset; 76bf215546Sopenharmony_ci const char *name; 77bf215546Sopenharmony_ci}; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_cistruct ac_rtld_part { 80bf215546Sopenharmony_ci Elf *elf; 81bf215546Sopenharmony_ci struct ac_rtld_section *sections; 82bf215546Sopenharmony_ci unsigned num_sections; 83bf215546Sopenharmony_ci}; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_cistatic void report_errorvf(const char *fmt, va_list va) 86bf215546Sopenharmony_ci{ 87bf215546Sopenharmony_ci fprintf(stderr, "ac_rtld error: "); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci vfprintf(stderr, fmt, va); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci fprintf(stderr, "\n"); 92bf215546Sopenharmony_ci} 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistatic void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2); 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_cistatic void report_errorf(const char *fmt, ...) 97bf215546Sopenharmony_ci{ 98bf215546Sopenharmony_ci va_list va; 99bf215546Sopenharmony_ci va_start(va, fmt); 100bf215546Sopenharmony_ci report_errorvf(fmt, va); 101bf215546Sopenharmony_ci va_end(va); 102bf215546Sopenharmony_ci} 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_cistatic void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_cistatic void report_elf_errorf(const char *fmt, ...) 107bf215546Sopenharmony_ci{ 108bf215546Sopenharmony_ci va_list va; 109bf215546Sopenharmony_ci va_start(va, fmt); 110bf215546Sopenharmony_ci report_errorvf(fmt, va); 111bf215546Sopenharmony_ci va_end(va); 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno())); 114bf215546Sopenharmony_ci} 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci/** 117bf215546Sopenharmony_ci * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader 118bf215546Sopenharmony_ci * \p part_idx. 119bf215546Sopenharmony_ci */ 120bf215546Sopenharmony_cistatic const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols, 121bf215546Sopenharmony_ci const char *name, unsigned part_idx) 122bf215546Sopenharmony_ci{ 123bf215546Sopenharmony_ci util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) { 124bf215546Sopenharmony_ci if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name)) 125bf215546Sopenharmony_ci return symbol; 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci return NULL; 128bf215546Sopenharmony_ci} 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_cistatic int compare_symbol_by_align(const void *lhsp, const void *rhsp) 131bf215546Sopenharmony_ci{ 132bf215546Sopenharmony_ci const struct ac_rtld_symbol *lhs = lhsp; 133bf215546Sopenharmony_ci const struct ac_rtld_symbol *rhs = rhsp; 134bf215546Sopenharmony_ci if (rhs->align > lhs->align) 135bf215546Sopenharmony_ci return 1; 136bf215546Sopenharmony_ci if (rhs->align < lhs->align) 137bf215546Sopenharmony_ci return -1; 138bf215546Sopenharmony_ci return 0; 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci/** 142bf215546Sopenharmony_ci * Sort the given symbol list by decreasing alignment and assign offsets. 143bf215546Sopenharmony_ci */ 144bf215546Sopenharmony_cistatic bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols, 145bf215546Sopenharmony_ci uint64_t *ptotal_size) 146bf215546Sopenharmony_ci{ 147bf215546Sopenharmony_ci qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align); 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci uint64_t total_size = *ptotal_size; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci for (unsigned i = 0; i < num_symbols; ++i) { 152bf215546Sopenharmony_ci struct ac_rtld_symbol *s = &symbols[i]; 153bf215546Sopenharmony_ci assert(util_is_power_of_two_nonzero(s->align)); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci total_size = align64(total_size, s->align); 156bf215546Sopenharmony_ci s->offset = total_size; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci if (total_size + s->size < total_size) { 159bf215546Sopenharmony_ci report_errorf("%s: size overflow", __FUNCTION__); 160bf215546Sopenharmony_ci return false; 161bf215546Sopenharmony_ci } 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci total_size += s->size; 164bf215546Sopenharmony_ci } 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci *ptotal_size = total_size; 167bf215546Sopenharmony_ci return true; 168bf215546Sopenharmony_ci} 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci/** 171bf215546Sopenharmony_ci * Read LDS symbols from the given \p section of the ELF of \p part and append 172bf215546Sopenharmony_ci * them to the LDS symbols list. 173bf215546Sopenharmony_ci * 174bf215546Sopenharmony_ci * Shared LDS symbols are filtered out. 175bf215546Sopenharmony_ci */ 176bf215546Sopenharmony_cistatic bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx, 177bf215546Sopenharmony_ci Elf_Scn *section, uint32_t *lds_end_align) 178bf215546Sopenharmony_ci{ 179bf215546Sopenharmony_ci#define report_if(cond) \ 180bf215546Sopenharmony_ci do { \ 181bf215546Sopenharmony_ci if ((cond)) { \ 182bf215546Sopenharmony_ci report_errorf(#cond); \ 183bf215546Sopenharmony_ci return false; \ 184bf215546Sopenharmony_ci } \ 185bf215546Sopenharmony_ci } while (false) 186bf215546Sopenharmony_ci#define report_elf_if(cond) \ 187bf215546Sopenharmony_ci do { \ 188bf215546Sopenharmony_ci if ((cond)) { \ 189bf215546Sopenharmony_ci report_elf_errorf(#cond); \ 190bf215546Sopenharmony_ci return false; \ 191bf215546Sopenharmony_ci } \ 192bf215546Sopenharmony_ci } while (false) 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci struct ac_rtld_part *part = &binary->parts[part_idx]; 195bf215546Sopenharmony_ci Elf64_Shdr *shdr = elf64_getshdr(section); 196bf215546Sopenharmony_ci uint32_t strtabidx = shdr->sh_link; 197bf215546Sopenharmony_ci Elf_Data *symbols_data = elf_getdata(section, NULL); 198bf215546Sopenharmony_ci report_elf_if(!symbols_data); 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci const Elf64_Sym *symbol = symbols_data->d_buf; 201bf215546Sopenharmony_ci size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym); 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci for (size_t j = 0; j < num_symbols; ++j, ++symbol) { 204bf215546Sopenharmony_ci struct ac_rtld_symbol s = {0}; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) { 207bf215546Sopenharmony_ci /* old-style LDS symbols from initial prototype -- remove eventually */ 208bf215546Sopenharmony_ci s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16); 209bf215546Sopenharmony_ci } else if (symbol->st_shndx == SHN_AMDGPU_LDS) { 210bf215546Sopenharmony_ci s.align = MIN2(symbol->st_value, 1u << 16); 211bf215546Sopenharmony_ci report_if(!util_is_power_of_two_nonzero(s.align)); 212bf215546Sopenharmony_ci } else 213bf215546Sopenharmony_ci continue; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci report_if(symbol->st_size > 1u << 29); 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci s.name = elf_strptr(part->elf, strtabidx, symbol->st_name); 218bf215546Sopenharmony_ci s.size = symbol->st_size; 219bf215546Sopenharmony_ci s.part_idx = part_idx; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci if (!strcmp(s.name, "__lds_end")) { 222bf215546Sopenharmony_ci report_elf_if(s.size != 0); 223bf215546Sopenharmony_ci *lds_end_align = MAX2(*lds_end_align, s.align); 224bf215546Sopenharmony_ci continue; 225bf215546Sopenharmony_ci } 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx); 228bf215546Sopenharmony_ci if (shared) { 229bf215546Sopenharmony_ci report_elf_if(s.align > shared->align); 230bf215546Sopenharmony_ci report_elf_if(s.size > shared->size); 231bf215546Sopenharmony_ci continue; 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s); 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci return true; 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci#undef report_if 240bf215546Sopenharmony_ci#undef report_elf_if 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci/** 244bf215546Sopenharmony_ci * Open a binary consisting of one or more shader parts. 245bf215546Sopenharmony_ci * 246bf215546Sopenharmony_ci * \param binary the uninitialized struct 247bf215546Sopenharmony_ci * \param i binary opening parameters 248bf215546Sopenharmony_ci */ 249bf215546Sopenharmony_cibool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i) 250bf215546Sopenharmony_ci{ 251bf215546Sopenharmony_ci /* One of the libelf implementations 252bf215546Sopenharmony_ci * (http://www.mr511.de/software/english.htm) requires calling 253bf215546Sopenharmony_ci * elf_version() before elf_memory(). 254bf215546Sopenharmony_ci */ 255bf215546Sopenharmony_ci elf_version(EV_CURRENT); 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci memset(binary, 0, sizeof(*binary)); 258bf215546Sopenharmony_ci memcpy(&binary->options, &i.options, sizeof(binary->options)); 259bf215546Sopenharmony_ci binary->wave_size = i.wave_size; 260bf215546Sopenharmony_ci binary->gfx_level = i.info->gfx_level; 261bf215546Sopenharmony_ci binary->num_parts = i.num_parts; 262bf215546Sopenharmony_ci binary->parts = calloc(sizeof(*binary->parts), i.num_parts); 263bf215546Sopenharmony_ci if (!binary->parts) 264bf215546Sopenharmony_ci return false; 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci uint64_t pasted_text_size = 0; 267bf215546Sopenharmony_ci uint64_t rx_align = 1; 268bf215546Sopenharmony_ci uint64_t rx_size = 0; 269bf215546Sopenharmony_ci uint64_t exec_size = 0; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci#define report_if(cond) \ 272bf215546Sopenharmony_ci do { \ 273bf215546Sopenharmony_ci if ((cond)) { \ 274bf215546Sopenharmony_ci report_errorf(#cond); \ 275bf215546Sopenharmony_ci goto fail; \ 276bf215546Sopenharmony_ci } \ 277bf215546Sopenharmony_ci } while (false) 278bf215546Sopenharmony_ci#define report_elf_if(cond) \ 279bf215546Sopenharmony_ci do { \ 280bf215546Sopenharmony_ci if ((cond)) { \ 281bf215546Sopenharmony_ci report_elf_errorf(#cond); \ 282bf215546Sopenharmony_ci goto fail; \ 283bf215546Sopenharmony_ci } \ 284bf215546Sopenharmony_ci } while (false) 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci /* Copy and layout shared LDS symbols. */ 287bf215546Sopenharmony_ci if (i.num_shared_lds_symbols) { 288bf215546Sopenharmony_ci if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol, 289bf215546Sopenharmony_ci i.num_shared_lds_symbols)) 290bf215546Sopenharmony_ci goto fail; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size); 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol) 296bf215546Sopenharmony_ci symbol->part_idx = ~0u; 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci unsigned max_lds_size = 64 * 1024; 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci if (i.info->gfx_level == GFX6 || 301bf215546Sopenharmony_ci (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT)) 302bf215546Sopenharmony_ci max_lds_size = 32 * 1024; 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci uint64_t shared_lds_size = 0; 305bf215546Sopenharmony_ci if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size)) 306bf215546Sopenharmony_ci goto fail; 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci if (shared_lds_size > max_lds_size) { 309bf215546Sopenharmony_ci fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n", 310bf215546Sopenharmony_ci (unsigned)shared_lds_size, max_lds_size); 311bf215546Sopenharmony_ci goto fail; 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci binary->lds_size = shared_lds_size; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci /* First pass over all parts: open ELFs, pre-determine the placement of 316bf215546Sopenharmony_ci * sections in the memory image, and collect and layout private LDS symbols. */ 317bf215546Sopenharmony_ci uint32_t lds_end_align = 0; 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci if (binary->options.halt_at_entry) 320bf215546Sopenharmony_ci pasted_text_size += 4; 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) { 323bf215546Sopenharmony_ci struct ac_rtld_part *part = &binary->parts[part_idx]; 324bf215546Sopenharmony_ci unsigned part_lds_symbols_begin = 325bf215546Sopenharmony_ci util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol); 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]); 328bf215546Sopenharmony_ci report_elf_if(!part->elf); 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf); 331bf215546Sopenharmony_ci report_elf_if(!ehdr); 332bf215546Sopenharmony_ci report_if(ehdr->e_machine != EM_AMDGPU); 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci size_t section_str_index; 335bf215546Sopenharmony_ci size_t num_shdrs; 336bf215546Sopenharmony_ci report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0); 337bf215546Sopenharmony_ci report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0); 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci part->num_sections = num_shdrs; 340bf215546Sopenharmony_ci part->sections = calloc(sizeof(*part->sections), num_shdrs); 341bf215546Sopenharmony_ci report_if(!part->sections); 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_ci Elf_Scn *section = NULL; 344bf215546Sopenharmony_ci while ((section = elf_nextscn(part->elf, section))) { 345bf215546Sopenharmony_ci Elf64_Shdr *shdr = elf64_getshdr(section); 346bf215546Sopenharmony_ci struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)]; 347bf215546Sopenharmony_ci s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name); 348bf215546Sopenharmony_ci report_elf_if(!s->name); 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci /* Cannot actually handle linked objects yet */ 351bf215546Sopenharmony_ci report_elf_if(shdr->sh_addr != 0); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci /* Alignment must be 0 or a power of two */ 354bf215546Sopenharmony_ci report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1)); 355bf215546Sopenharmony_ci uint64_t sh_align = MAX2(shdr->sh_addralign, 1); 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) { 358bf215546Sopenharmony_ci report_if(shdr->sh_flags & SHF_WRITE); 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci s->is_rx = true; 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci if (shdr->sh_flags & SHF_EXECINSTR) { 363bf215546Sopenharmony_ci report_elf_if(shdr->sh_size & 3); 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci if (!strcmp(s->name, ".text")) 366bf215546Sopenharmony_ci s->is_pasted_text = true; 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci exec_size += shdr->sh_size; 369bf215546Sopenharmony_ci } 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci if (s->is_pasted_text) { 372bf215546Sopenharmony_ci s->offset = pasted_text_size; 373bf215546Sopenharmony_ci pasted_text_size += shdr->sh_size; 374bf215546Sopenharmony_ci } else { 375bf215546Sopenharmony_ci rx_align = align(rx_align, sh_align); 376bf215546Sopenharmony_ci rx_size = align(rx_size, sh_align); 377bf215546Sopenharmony_ci s->offset = rx_size; 378bf215546Sopenharmony_ci rx_size += shdr->sh_size; 379bf215546Sopenharmony_ci } 380bf215546Sopenharmony_ci } else if (shdr->sh_type == SHT_SYMTAB) { 381bf215546Sopenharmony_ci if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align)) 382bf215546Sopenharmony_ci goto fail; 383bf215546Sopenharmony_ci } 384bf215546Sopenharmony_ci } 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci uint64_t part_lds_size = shared_lds_size; 387bf215546Sopenharmony_ci if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, 388bf215546Sopenharmony_ci part_lds_symbols_begin), 389bf215546Sopenharmony_ci util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - 390bf215546Sopenharmony_ci part_lds_symbols_begin, 391bf215546Sopenharmony_ci &part_lds_size)) 392bf215546Sopenharmony_ci goto fail; 393bf215546Sopenharmony_ci binary->lds_size = MAX2(binary->lds_size, part_lds_size); 394bf215546Sopenharmony_ci } 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci binary->rx_end_markers = pasted_text_size; 397bf215546Sopenharmony_ci pasted_text_size += 4 * DEBUGGER_NUM_MARKERS; 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci /* __lds_end is a special symbol that points at the end of the memory 400bf215546Sopenharmony_ci * occupied by other LDS symbols. Its alignment is taken as the 401bf215546Sopenharmony_ci * maximum of its alignment over all shader parts where it occurs. 402bf215546Sopenharmony_ci */ 403bf215546Sopenharmony_ci if (lds_end_align) { 404bf215546Sopenharmony_ci binary->lds_size = align(binary->lds_size, lds_end_align); 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci struct ac_rtld_symbol *lds_end = 407bf215546Sopenharmony_ci util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1); 408bf215546Sopenharmony_ci lds_end->name = "__lds_end"; 409bf215546Sopenharmony_ci lds_end->size = 0; 410bf215546Sopenharmony_ci lds_end->align = lds_end_align; 411bf215546Sopenharmony_ci lds_end->offset = binary->lds_size; 412bf215546Sopenharmony_ci lds_end->part_idx = ~0u; 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci if (binary->lds_size > max_lds_size) { 416bf215546Sopenharmony_ci fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n", 417bf215546Sopenharmony_ci (unsigned)binary->lds_size, max_lds_size); 418bf215546Sopenharmony_ci goto fail; 419bf215546Sopenharmony_ci } 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci /* Second pass: Adjust offsets of non-pasted text sections. */ 422bf215546Sopenharmony_ci binary->rx_size = pasted_text_size; 423bf215546Sopenharmony_ci binary->rx_size = align(binary->rx_size, rx_align); 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) { 426bf215546Sopenharmony_ci struct ac_rtld_part *part = &binary->parts[part_idx]; 427bf215546Sopenharmony_ci size_t num_shdrs; 428bf215546Sopenharmony_ci elf_getshdrnum(part->elf, &num_shdrs); 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_ci for (unsigned j = 0; j < num_shdrs; ++j) { 431bf215546Sopenharmony_ci struct ac_rtld_section *s = &part->sections[j]; 432bf215546Sopenharmony_ci if (s->is_rx && !s->is_pasted_text) 433bf215546Sopenharmony_ci s->offset += binary->rx_size; 434bf215546Sopenharmony_ci } 435bf215546Sopenharmony_ci } 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci binary->rx_size += rx_size; 438bf215546Sopenharmony_ci binary->exec_size = exec_size; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci /* The SQ fetches up to N cache lines of 16 dwords 441bf215546Sopenharmony_ci * ahead of the PC, configurable by SH_MEM_CONFIG and 442bf215546Sopenharmony_ci * S_INST_PREFETCH. This can cause two issues: 443bf215546Sopenharmony_ci * 444bf215546Sopenharmony_ci * (1) Crossing a page boundary to an unmapped page. The logic 445bf215546Sopenharmony_ci * does not distinguish between a required fetch and a "mere" 446bf215546Sopenharmony_ci * prefetch and will fault. 447bf215546Sopenharmony_ci * 448bf215546Sopenharmony_ci * (2) Prefetching instructions that will be changed for a 449bf215546Sopenharmony_ci * different shader. 450bf215546Sopenharmony_ci * 451bf215546Sopenharmony_ci * (2) is not currently an issue because we flush the I$ at IB 452bf215546Sopenharmony_ci * boundaries, but (1) needs to be addressed. Due to buffer 453bf215546Sopenharmony_ci * suballocation, we just play it safe. 454bf215546Sopenharmony_ci */ 455bf215546Sopenharmony_ci unsigned prefetch_distance = 0; 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN) 458bf215546Sopenharmony_ci prefetch_distance = 16; 459bf215546Sopenharmony_ci else if (i.info->gfx_level >= GFX10) 460bf215546Sopenharmony_ci prefetch_distance = 3; 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci if (prefetch_distance) { 463bf215546Sopenharmony_ci if (i.info->gfx_level >= GFX11) 464bf215546Sopenharmony_ci binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 128); 465bf215546Sopenharmony_ci else 466bf215546Sopenharmony_ci binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64); 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci return true; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci#undef report_if 472bf215546Sopenharmony_ci#undef report_elf_if 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_cifail: 475bf215546Sopenharmony_ci ac_rtld_close(binary); 476bf215546Sopenharmony_ci return false; 477bf215546Sopenharmony_ci} 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_civoid ac_rtld_close(struct ac_rtld_binary *binary) 480bf215546Sopenharmony_ci{ 481bf215546Sopenharmony_ci for (unsigned i = 0; i < binary->num_parts; ++i) { 482bf215546Sopenharmony_ci struct ac_rtld_part *part = &binary->parts[i]; 483bf215546Sopenharmony_ci free(part->sections); 484bf215546Sopenharmony_ci elf_end(part->elf); 485bf215546Sopenharmony_ci } 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci util_dynarray_fini(&binary->lds_symbols); 488bf215546Sopenharmony_ci free(binary->parts); 489bf215546Sopenharmony_ci binary->parts = NULL; 490bf215546Sopenharmony_ci binary->num_parts = 0; 491bf215546Sopenharmony_ci} 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_cistatic bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data, 494bf215546Sopenharmony_ci size_t *nbytes) 495bf215546Sopenharmony_ci{ 496bf215546Sopenharmony_ci for (unsigned i = 0; i < part->num_sections; ++i) { 497bf215546Sopenharmony_ci struct ac_rtld_section *s = &part->sections[i]; 498bf215546Sopenharmony_ci if (s->name && !strcmp(name, s->name)) { 499bf215546Sopenharmony_ci Elf_Scn *target_scn = elf_getscn(part->elf, i); 500bf215546Sopenharmony_ci Elf_Data *target_data = elf_getdata(target_scn, NULL); 501bf215546Sopenharmony_ci if (!target_data) { 502bf215546Sopenharmony_ci report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata"); 503bf215546Sopenharmony_ci return false; 504bf215546Sopenharmony_ci } 505bf215546Sopenharmony_ci 506bf215546Sopenharmony_ci *data = target_data->d_buf; 507bf215546Sopenharmony_ci *nbytes = target_data->d_size; 508bf215546Sopenharmony_ci return true; 509bf215546Sopenharmony_ci } 510bf215546Sopenharmony_ci } 511bf215546Sopenharmony_ci return false; 512bf215546Sopenharmony_ci} 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_cibool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data, 515bf215546Sopenharmony_ci size_t *nbytes) 516bf215546Sopenharmony_ci{ 517bf215546Sopenharmony_ci assert(binary->num_parts == 1); 518bf215546Sopenharmony_ci return get_section_by_name(&binary->parts[0], name, data, nbytes); 519bf215546Sopenharmony_ci} 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_cibool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary, 522bf215546Sopenharmony_ci struct ac_shader_config *config) 523bf215546Sopenharmony_ci{ 524bf215546Sopenharmony_ci for (unsigned i = 0; i < binary->num_parts; ++i) { 525bf215546Sopenharmony_ci struct ac_rtld_part *part = &binary->parts[i]; 526bf215546Sopenharmony_ci const char *config_data; 527bf215546Sopenharmony_ci size_t config_nbytes; 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes)) 530bf215546Sopenharmony_ci return false; 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci /* TODO: be precise about scratch use? */ 533bf215546Sopenharmony_ci struct ac_shader_config c = {0}; 534bf215546Sopenharmony_ci ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, info, &c); 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs); 537bf215546Sopenharmony_ci config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs); 538bf215546Sopenharmony_ci config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs); 539bf215546Sopenharmony_ci config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs); 540bf215546Sopenharmony_ci config->scratch_bytes_per_wave = 541bf215546Sopenharmony_ci MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave); 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci assert(i == 0 || config->float_mode == c.float_mode); 544bf215546Sopenharmony_ci config->float_mode = c.float_mode; 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from 547bf215546Sopenharmony_ci * the main shader part is used. */ 548bf215546Sopenharmony_ci assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0); 549bf215546Sopenharmony_ci config->spi_ps_input_ena = c.spi_ps_input_ena; 550bf215546Sopenharmony_ci config->spi_ps_input_addr = c.spi_ps_input_addr; 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci /* TODO: consistently use LDS symbols for this */ 553bf215546Sopenharmony_ci config->lds_size = MAX2(config->lds_size, c.lds_size); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci /* TODO: Should we combine these somehow? It's currently only 556bf215546Sopenharmony_ci * used for radeonsi's compute, where multiple parts aren't used. */ 557bf215546Sopenharmony_ci assert(config->rsrc1 == 0 && config->rsrc2 == 0); 558bf215546Sopenharmony_ci config->rsrc1 = c.rsrc1; 559bf215546Sopenharmony_ci config->rsrc2 = c.rsrc2; 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci return true; 563bf215546Sopenharmony_ci} 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_cistatic bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx, 566bf215546Sopenharmony_ci const Elf64_Sym *sym, const char *name, uint64_t *value) 567bf215546Sopenharmony_ci{ 568bf215546Sopenharmony_ci /* TODO: properly disentangle the undef and the LDS cases once 569bf215546Sopenharmony_ci * STT_AMDGPU_LDS is retired. */ 570bf215546Sopenharmony_ci if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) { 571bf215546Sopenharmony_ci const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx); 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci if (lds_sym) { 574bf215546Sopenharmony_ci *value = lds_sym->offset; 575bf215546Sopenharmony_ci return true; 576bf215546Sopenharmony_ci } 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci /* TODO: resolve from other parts */ 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci if (u->get_external_symbol(u->binary->gfx_level, u->cb_data, name, value)) 581bf215546Sopenharmony_ci return true; 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_ci report_errorf("symbol %s: unknown", name); 584bf215546Sopenharmony_ci return false; 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci struct ac_rtld_part *part = &u->binary->parts[part_idx]; 588bf215546Sopenharmony_ci if (sym->st_shndx >= part->num_sections) { 589bf215546Sopenharmony_ci report_errorf("symbol %s: section out of bounds", name); 590bf215546Sopenharmony_ci return false; 591bf215546Sopenharmony_ci } 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci struct ac_rtld_section *s = &part->sections[sym->st_shndx]; 594bf215546Sopenharmony_ci if (!s->is_rx) { 595bf215546Sopenharmony_ci report_errorf("symbol %s: bad section", name); 596bf215546Sopenharmony_ci return false; 597bf215546Sopenharmony_ci } 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci uint64_t section_base = u->rx_va + s->offset; 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci *value = section_base + sym->st_value; 602bf215546Sopenharmony_ci return true; 603bf215546Sopenharmony_ci} 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_cistatic bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx, 606bf215546Sopenharmony_ci const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data) 607bf215546Sopenharmony_ci{ 608bf215546Sopenharmony_ci#define report_if(cond) \ 609bf215546Sopenharmony_ci do { \ 610bf215546Sopenharmony_ci if ((cond)) { \ 611bf215546Sopenharmony_ci report_errorf(#cond); \ 612bf215546Sopenharmony_ci return false; \ 613bf215546Sopenharmony_ci } \ 614bf215546Sopenharmony_ci } while (false) 615bf215546Sopenharmony_ci#define report_elf_if(cond) \ 616bf215546Sopenharmony_ci do { \ 617bf215546Sopenharmony_ci if ((cond)) { \ 618bf215546Sopenharmony_ci report_elf_errorf(#cond); \ 619bf215546Sopenharmony_ci return false; \ 620bf215546Sopenharmony_ci } \ 621bf215546Sopenharmony_ci } while (false) 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci struct ac_rtld_part *part = &u->binary->parts[part_idx]; 624bf215546Sopenharmony_ci Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info); 625bf215546Sopenharmony_ci report_elf_if(!target_scn); 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci Elf_Data *target_data = elf_getdata(target_scn, NULL); 628bf215546Sopenharmony_ci report_elf_if(!target_data); 629bf215546Sopenharmony_ci 630bf215546Sopenharmony_ci Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link); 631bf215546Sopenharmony_ci report_elf_if(!symbols_scn); 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn); 634bf215546Sopenharmony_ci report_elf_if(!symbols_shdr); 635bf215546Sopenharmony_ci uint32_t strtabidx = symbols_shdr->sh_link; 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_ci Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL); 638bf215546Sopenharmony_ci report_elf_if(!symbols_data); 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci const Elf64_Sym *symbols = symbols_data->d_buf; 641bf215546Sopenharmony_ci size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym); 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info]; 644bf215546Sopenharmony_ci report_if(!s->is_rx); 645bf215546Sopenharmony_ci 646bf215546Sopenharmony_ci const char *orig_base = target_data->d_buf; 647bf215546Sopenharmony_ci char *dst_base = u->rx_ptr + s->offset; 648bf215546Sopenharmony_ci uint64_t va_base = u->rx_va + s->offset; 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci Elf64_Rel *rel = reloc_data->d_buf; 651bf215546Sopenharmony_ci size_t num_relocs = reloc_data->d_size / sizeof(*rel); 652bf215546Sopenharmony_ci for (size_t i = 0; i < num_relocs; ++i, ++rel) { 653bf215546Sopenharmony_ci size_t r_sym = ELF64_R_SYM(rel->r_info); 654bf215546Sopenharmony_ci unsigned r_type = ELF64_R_TYPE(rel->r_info); 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci const char *orig_ptr = orig_base + rel->r_offset; 657bf215546Sopenharmony_ci char *dst_ptr = dst_base + rel->r_offset; 658bf215546Sopenharmony_ci uint64_t va = va_base + rel->r_offset; 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci uint64_t symbol; 661bf215546Sopenharmony_ci uint64_t addend; 662bf215546Sopenharmony_ci 663bf215546Sopenharmony_ci if (r_sym == STN_UNDEF) { 664bf215546Sopenharmony_ci symbol = 0; 665bf215546Sopenharmony_ci } else { 666bf215546Sopenharmony_ci report_elf_if(r_sym >= num_symbols); 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci const Elf64_Sym *sym = &symbols[r_sym]; 669bf215546Sopenharmony_ci const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name); 670bf215546Sopenharmony_ci report_elf_if(!symbol_name); 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol)) 673bf215546Sopenharmony_ci return false; 674bf215546Sopenharmony_ci } 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_ci /* TODO: Should we also support .rela sections, where the 677bf215546Sopenharmony_ci * addend is part of the relocation record? */ 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci /* Load the addend from the ELF instead of the destination, 680bf215546Sopenharmony_ci * because the destination may be in VRAM. */ 681bf215546Sopenharmony_ci switch (r_type) { 682bf215546Sopenharmony_ci case R_AMDGPU_ABS32: 683bf215546Sopenharmony_ci case R_AMDGPU_ABS32_LO: 684bf215546Sopenharmony_ci case R_AMDGPU_ABS32_HI: 685bf215546Sopenharmony_ci case R_AMDGPU_REL32: 686bf215546Sopenharmony_ci case R_AMDGPU_REL32_LO: 687bf215546Sopenharmony_ci case R_AMDGPU_REL32_HI: 688bf215546Sopenharmony_ci addend = *(const uint32_t *)orig_ptr; 689bf215546Sopenharmony_ci break; 690bf215546Sopenharmony_ci case R_AMDGPU_ABS64: 691bf215546Sopenharmony_ci case R_AMDGPU_REL64: 692bf215546Sopenharmony_ci addend = *(const uint64_t *)orig_ptr; 693bf215546Sopenharmony_ci break; 694bf215546Sopenharmony_ci default: 695bf215546Sopenharmony_ci report_errorf("unsupported r_type == %u", r_type); 696bf215546Sopenharmony_ci return false; 697bf215546Sopenharmony_ci } 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci uint64_t abs = symbol + addend; 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci switch (r_type) { 702bf215546Sopenharmony_ci case R_AMDGPU_ABS32: 703bf215546Sopenharmony_ci assert((uint32_t)abs == abs); 704bf215546Sopenharmony_ci FALLTHROUGH; 705bf215546Sopenharmony_ci case R_AMDGPU_ABS32_LO: 706bf215546Sopenharmony_ci *(uint32_t *)dst_ptr = util_cpu_to_le32(abs); 707bf215546Sopenharmony_ci break; 708bf215546Sopenharmony_ci case R_AMDGPU_ABS32_HI: 709bf215546Sopenharmony_ci *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32); 710bf215546Sopenharmony_ci break; 711bf215546Sopenharmony_ci case R_AMDGPU_ABS64: 712bf215546Sopenharmony_ci *(uint64_t *)dst_ptr = util_cpu_to_le64(abs); 713bf215546Sopenharmony_ci break; 714bf215546Sopenharmony_ci case R_AMDGPU_REL32: 715bf215546Sopenharmony_ci assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va)); 716bf215546Sopenharmony_ci FALLTHROUGH; 717bf215546Sopenharmony_ci case R_AMDGPU_REL32_LO: 718bf215546Sopenharmony_ci *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va); 719bf215546Sopenharmony_ci break; 720bf215546Sopenharmony_ci case R_AMDGPU_REL32_HI: 721bf215546Sopenharmony_ci *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32); 722bf215546Sopenharmony_ci break; 723bf215546Sopenharmony_ci case R_AMDGPU_REL64: 724bf215546Sopenharmony_ci *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va); 725bf215546Sopenharmony_ci break; 726bf215546Sopenharmony_ci default: 727bf215546Sopenharmony_ci unreachable("bad r_type"); 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci } 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci return true; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci#undef report_if 734bf215546Sopenharmony_ci#undef report_elf_if 735bf215546Sopenharmony_ci} 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci/** 738bf215546Sopenharmony_ci * Upload the binary or binaries to the provided GPU buffers, including 739bf215546Sopenharmony_ci * relocations. 740bf215546Sopenharmony_ci */ 741bf215546Sopenharmony_ciint ac_rtld_upload(struct ac_rtld_upload_info *u) 742bf215546Sopenharmony_ci{ 743bf215546Sopenharmony_ci#define report_if(cond) \ 744bf215546Sopenharmony_ci do { \ 745bf215546Sopenharmony_ci if ((cond)) { \ 746bf215546Sopenharmony_ci report_errorf(#cond); \ 747bf215546Sopenharmony_ci return -1; \ 748bf215546Sopenharmony_ci } \ 749bf215546Sopenharmony_ci } while (false) 750bf215546Sopenharmony_ci#define report_elf_if(cond) \ 751bf215546Sopenharmony_ci do { \ 752bf215546Sopenharmony_ci if ((cond)) { \ 753bf215546Sopenharmony_ci report_errorf(#cond); \ 754bf215546Sopenharmony_ci return -1; \ 755bf215546Sopenharmony_ci } \ 756bf215546Sopenharmony_ci } while (false) 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci int size = 0; 759bf215546Sopenharmony_ci if (u->binary->options.halt_at_entry) { 760bf215546Sopenharmony_ci /* s_sethalt 1 */ 761bf215546Sopenharmony_ci *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001); 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci /* First pass: upload raw section data and lay out private LDS symbols. */ 765bf215546Sopenharmony_ci for (unsigned i = 0; i < u->binary->num_parts; ++i) { 766bf215546Sopenharmony_ci struct ac_rtld_part *part = &u->binary->parts[i]; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci Elf_Scn *section = NULL; 769bf215546Sopenharmony_ci while ((section = elf_nextscn(part->elf, section))) { 770bf215546Sopenharmony_ci Elf64_Shdr *shdr = elf64_getshdr(section); 771bf215546Sopenharmony_ci struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)]; 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci if (!s->is_rx) 774bf215546Sopenharmony_ci continue; 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci report_if(shdr->sh_type != SHT_PROGBITS); 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci Elf_Data *data = elf_getdata(section, NULL); 779bf215546Sopenharmony_ci report_elf_if(!data || data->d_size != shdr->sh_size); 780bf215546Sopenharmony_ci memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size); 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci size = MAX2(size, s->offset + shdr->sh_size); 783bf215546Sopenharmony_ci } 784bf215546Sopenharmony_ci } 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci if (u->binary->rx_end_markers) { 787bf215546Sopenharmony_ci uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers); 788bf215546Sopenharmony_ci for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i) 789bf215546Sopenharmony_ci *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER); 790bf215546Sopenharmony_ci size += 4 * DEBUGGER_NUM_MARKERS; 791bf215546Sopenharmony_ci } 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci /* Second pass: handle relocations, overwriting uploaded data where 794bf215546Sopenharmony_ci * appropriate. */ 795bf215546Sopenharmony_ci for (unsigned i = 0; i < u->binary->num_parts; ++i) { 796bf215546Sopenharmony_ci struct ac_rtld_part *part = &u->binary->parts[i]; 797bf215546Sopenharmony_ci Elf_Scn *section = NULL; 798bf215546Sopenharmony_ci while ((section = elf_nextscn(part->elf, section))) { 799bf215546Sopenharmony_ci Elf64_Shdr *shdr = elf64_getshdr(section); 800bf215546Sopenharmony_ci if (shdr->sh_type == SHT_REL) { 801bf215546Sopenharmony_ci Elf_Data *relocs = elf_getdata(section, NULL); 802bf215546Sopenharmony_ci report_elf_if(!relocs || relocs->d_size != shdr->sh_size); 803bf215546Sopenharmony_ci if (!apply_relocs(u, i, shdr, relocs)) 804bf215546Sopenharmony_ci return -1; 805bf215546Sopenharmony_ci } else if (shdr->sh_type == SHT_RELA) { 806bf215546Sopenharmony_ci report_errorf("SHT_RELA not supported"); 807bf215546Sopenharmony_ci return -1; 808bf215546Sopenharmony_ci } 809bf215546Sopenharmony_ci } 810bf215546Sopenharmony_ci } 811bf215546Sopenharmony_ci 812bf215546Sopenharmony_ci return size; 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci#undef report_if 815bf215546Sopenharmony_ci#undef report_elf_if 816bf215546Sopenharmony_ci} 817