1/* 2 * Copyright 2014-2019 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "ac_rtld.h" 25 26#include "ac_binary.h" 27#include "ac_gpu_info.h" 28#include "util/compiler.h" 29#include "util/u_dynarray.h" 30#include "util/u_math.h" 31 32#include <gelf.h> 33#include <libelf.h> 34#include <stdarg.h> 35#include <stdio.h> 36#include <stdlib.h> 37#include <string.h> 38 39#ifndef EM_AMDGPU 40// Old distributions may not have this enum constant 41#define EM_AMDGPU 224 42#endif 43 44#ifndef STT_AMDGPU_LDS 45#define STT_AMDGPU_LDS 13 // this is deprecated -- remove 46#endif 47 48#ifndef SHN_AMDGPU_LDS 49#define SHN_AMDGPU_LDS 0xff00 50#endif 51 52#ifndef R_AMDGPU_NONE 53#define R_AMDGPU_NONE 0 54#define R_AMDGPU_ABS32_LO 1 55#define R_AMDGPU_ABS32_HI 2 56#define R_AMDGPU_ABS64 3 57#define R_AMDGPU_REL32 4 58#define R_AMDGPU_REL64 5 59#define R_AMDGPU_ABS32 6 60#define R_AMDGPU_GOTPCREL 7 61#define R_AMDGPU_GOTPCREL32_LO 8 62#define R_AMDGPU_GOTPCREL32_HI 9 63#define R_AMDGPU_REL32_LO 10 64#define R_AMDGPU_REL32_HI 11 65#define R_AMDGPU_RELATIVE64 13 66#endif 67 68/* For the UMR disassembler. */ 69#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */ 70#define DEBUGGER_NUM_MARKERS 5 71 72struct ac_rtld_section { 73 bool is_rx : 1; 74 bool is_pasted_text : 1; 75 uint64_t offset; 76 const char *name; 77}; 78 79struct ac_rtld_part { 80 Elf *elf; 81 struct ac_rtld_section *sections; 82 unsigned num_sections; 83}; 84 85static void report_errorvf(const char *fmt, va_list va) 86{ 87 fprintf(stderr, "ac_rtld error: "); 88 89 vfprintf(stderr, fmt, va); 90 91 fprintf(stderr, "\n"); 92} 93 94static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2); 95 96static void report_errorf(const char *fmt, ...) 97{ 98 va_list va; 99 va_start(va, fmt); 100 report_errorvf(fmt, va); 101 va_end(va); 102} 103 104static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2); 105 106static void report_elf_errorf(const char *fmt, ...) 107{ 108 va_list va; 109 va_start(va, fmt); 110 report_errorvf(fmt, va); 111 va_end(va); 112 113 fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno())); 114} 115 116/** 117 * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and shader 118 * \p part_idx. 119 */ 120static const struct ac_rtld_symbol *find_symbol(const struct util_dynarray *symbols, 121 const char *name, unsigned part_idx) 122{ 123 util_dynarray_foreach (symbols, struct ac_rtld_symbol, symbol) { 124 if ((symbol->part_idx == ~0u || symbol->part_idx == part_idx) && !strcmp(name, symbol->name)) 125 return symbol; 126 } 127 return NULL; 128} 129 130static int compare_symbol_by_align(const void *lhsp, const void *rhsp) 131{ 132 const struct ac_rtld_symbol *lhs = lhsp; 133 const struct ac_rtld_symbol *rhs = rhsp; 134 if (rhs->align > lhs->align) 135 return 1; 136 if (rhs->align < lhs->align) 137 return -1; 138 return 0; 139} 140 141/** 142 * Sort the given symbol list by decreasing alignment and assign offsets. 143 */ 144static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned num_symbols, 145 uint64_t *ptotal_size) 146{ 147 qsort(symbols, num_symbols, sizeof(*symbols), compare_symbol_by_align); 148 149 uint64_t total_size = *ptotal_size; 150 151 for (unsigned i = 0; i < num_symbols; ++i) { 152 struct ac_rtld_symbol *s = &symbols[i]; 153 assert(util_is_power_of_two_nonzero(s->align)); 154 155 total_size = align64(total_size, s->align); 156 s->offset = total_size; 157 158 if (total_size + s->size < total_size) { 159 report_errorf("%s: size overflow", __FUNCTION__); 160 return false; 161 } 162 163 total_size += s->size; 164 } 165 166 *ptotal_size = total_size; 167 return true; 168} 169 170/** 171 * Read LDS symbols from the given \p section of the ELF of \p part and append 172 * them to the LDS symbols list. 173 * 174 * Shared LDS symbols are filtered out. 175 */ 176static bool read_private_lds_symbols(struct ac_rtld_binary *binary, unsigned part_idx, 177 Elf_Scn *section, uint32_t *lds_end_align) 178{ 179#define report_if(cond) \ 180 do { \ 181 if ((cond)) { \ 182 report_errorf(#cond); \ 183 return false; \ 184 } \ 185 } while (false) 186#define report_elf_if(cond) \ 187 do { \ 188 if ((cond)) { \ 189 report_elf_errorf(#cond); \ 190 return false; \ 191 } \ 192 } while (false) 193 194 struct ac_rtld_part *part = &binary->parts[part_idx]; 195 Elf64_Shdr *shdr = elf64_getshdr(section); 196 uint32_t strtabidx = shdr->sh_link; 197 Elf_Data *symbols_data = elf_getdata(section, NULL); 198 report_elf_if(!symbols_data); 199 200 const Elf64_Sym *symbol = symbols_data->d_buf; 201 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym); 202 203 for (size_t j = 0; j < num_symbols; ++j, ++symbol) { 204 struct ac_rtld_symbol s = {0}; 205 206 if (ELF64_ST_TYPE(symbol->st_info) == STT_AMDGPU_LDS) { 207 /* old-style LDS symbols from initial prototype -- remove eventually */ 208 s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16); 209 } else if (symbol->st_shndx == SHN_AMDGPU_LDS) { 210 s.align = MIN2(symbol->st_value, 1u << 16); 211 report_if(!util_is_power_of_two_nonzero(s.align)); 212 } else 213 continue; 214 215 report_if(symbol->st_size > 1u << 29); 216 217 s.name = elf_strptr(part->elf, strtabidx, symbol->st_name); 218 s.size = symbol->st_size; 219 s.part_idx = part_idx; 220 221 if (!strcmp(s.name, "__lds_end")) { 222 report_elf_if(s.size != 0); 223 *lds_end_align = MAX2(*lds_end_align, s.align); 224 continue; 225 } 226 227 const struct ac_rtld_symbol *shared = find_symbol(&binary->lds_symbols, s.name, part_idx); 228 if (shared) { 229 report_elf_if(s.align > shared->align); 230 report_elf_if(s.size > shared->size); 231 continue; 232 } 233 234 util_dynarray_append(&binary->lds_symbols, struct ac_rtld_symbol, s); 235 } 236 237 return true; 238 239#undef report_if 240#undef report_elf_if 241} 242 243/** 244 * Open a binary consisting of one or more shader parts. 245 * 246 * \param binary the uninitialized struct 247 * \param i binary opening parameters 248 */ 249bool ac_rtld_open(struct ac_rtld_binary *binary, struct ac_rtld_open_info i) 250{ 251 /* One of the libelf implementations 252 * (http://www.mr511.de/software/english.htm) requires calling 253 * elf_version() before elf_memory(). 254 */ 255 elf_version(EV_CURRENT); 256 257 memset(binary, 0, sizeof(*binary)); 258 memcpy(&binary->options, &i.options, sizeof(binary->options)); 259 binary->wave_size = i.wave_size; 260 binary->gfx_level = i.info->gfx_level; 261 binary->num_parts = i.num_parts; 262 binary->parts = calloc(sizeof(*binary->parts), i.num_parts); 263 if (!binary->parts) 264 return false; 265 266 uint64_t pasted_text_size = 0; 267 uint64_t rx_align = 1; 268 uint64_t rx_size = 0; 269 uint64_t exec_size = 0; 270 271#define report_if(cond) \ 272 do { \ 273 if ((cond)) { \ 274 report_errorf(#cond); \ 275 goto fail; \ 276 } \ 277 } while (false) 278#define report_elf_if(cond) \ 279 do { \ 280 if ((cond)) { \ 281 report_elf_errorf(#cond); \ 282 goto fail; \ 283 } \ 284 } while (false) 285 286 /* Copy and layout shared LDS symbols. */ 287 if (i.num_shared_lds_symbols) { 288 if (!util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol, 289 i.num_shared_lds_symbols)) 290 goto fail; 291 292 memcpy(binary->lds_symbols.data, i.shared_lds_symbols, binary->lds_symbols.size); 293 } 294 295 util_dynarray_foreach (&binary->lds_symbols, struct ac_rtld_symbol, symbol) 296 symbol->part_idx = ~0u; 297 298 unsigned max_lds_size = 64 * 1024; 299 300 if (i.info->gfx_level == GFX6 || 301 (i.shader_type != MESA_SHADER_COMPUTE && i.shader_type != MESA_SHADER_FRAGMENT)) 302 max_lds_size = 32 * 1024; 303 304 uint64_t shared_lds_size = 0; 305 if (!layout_symbols(binary->lds_symbols.data, i.num_shared_lds_symbols, &shared_lds_size)) 306 goto fail; 307 308 if (shared_lds_size > max_lds_size) { 309 fprintf(stderr, "ac_rtld error(1): too much LDS (used = %u, max = %u)\n", 310 (unsigned)shared_lds_size, max_lds_size); 311 goto fail; 312 } 313 binary->lds_size = shared_lds_size; 314 315 /* First pass over all parts: open ELFs, pre-determine the placement of 316 * sections in the memory image, and collect and layout private LDS symbols. */ 317 uint32_t lds_end_align = 0; 318 319 if (binary->options.halt_at_entry) 320 pasted_text_size += 4; 321 322 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) { 323 struct ac_rtld_part *part = &binary->parts[part_idx]; 324 unsigned part_lds_symbols_begin = 325 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol); 326 327 part->elf = elf_memory((char *)i.elf_ptrs[part_idx], i.elf_sizes[part_idx]); 328 report_elf_if(!part->elf); 329 330 const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf); 331 report_elf_if(!ehdr); 332 report_if(ehdr->e_machine != EM_AMDGPU); 333 334 size_t section_str_index; 335 size_t num_shdrs; 336 report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0); 337 report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0); 338 339 part->num_sections = num_shdrs; 340 part->sections = calloc(sizeof(*part->sections), num_shdrs); 341 report_if(!part->sections); 342 343 Elf_Scn *section = NULL; 344 while ((section = elf_nextscn(part->elf, section))) { 345 Elf64_Shdr *shdr = elf64_getshdr(section); 346 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)]; 347 s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name); 348 report_elf_if(!s->name); 349 350 /* Cannot actually handle linked objects yet */ 351 report_elf_if(shdr->sh_addr != 0); 352 353 /* Alignment must be 0 or a power of two */ 354 report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1)); 355 uint64_t sh_align = MAX2(shdr->sh_addralign, 1); 356 357 if (shdr->sh_flags & SHF_ALLOC && shdr->sh_type != SHT_NOTE) { 358 report_if(shdr->sh_flags & SHF_WRITE); 359 360 s->is_rx = true; 361 362 if (shdr->sh_flags & SHF_EXECINSTR) { 363 report_elf_if(shdr->sh_size & 3); 364 365 if (!strcmp(s->name, ".text")) 366 s->is_pasted_text = true; 367 368 exec_size += shdr->sh_size; 369 } 370 371 if (s->is_pasted_text) { 372 s->offset = pasted_text_size; 373 pasted_text_size += shdr->sh_size; 374 } else { 375 rx_align = align(rx_align, sh_align); 376 rx_size = align(rx_size, sh_align); 377 s->offset = rx_size; 378 rx_size += shdr->sh_size; 379 } 380 } else if (shdr->sh_type == SHT_SYMTAB) { 381 if (!read_private_lds_symbols(binary, part_idx, section, &lds_end_align)) 382 goto fail; 383 } 384 } 385 386 uint64_t part_lds_size = shared_lds_size; 387 if (!layout_symbols(util_dynarray_element(&binary->lds_symbols, struct ac_rtld_symbol, 388 part_lds_symbols_begin), 389 util_dynarray_num_elements(&binary->lds_symbols, struct ac_rtld_symbol) - 390 part_lds_symbols_begin, 391 &part_lds_size)) 392 goto fail; 393 binary->lds_size = MAX2(binary->lds_size, part_lds_size); 394 } 395 396 binary->rx_end_markers = pasted_text_size; 397 pasted_text_size += 4 * DEBUGGER_NUM_MARKERS; 398 399 /* __lds_end is a special symbol that points at the end of the memory 400 * occupied by other LDS symbols. Its alignment is taken as the 401 * maximum of its alignment over all shader parts where it occurs. 402 */ 403 if (lds_end_align) { 404 binary->lds_size = align(binary->lds_size, lds_end_align); 405 406 struct ac_rtld_symbol *lds_end = 407 util_dynarray_grow(&binary->lds_symbols, struct ac_rtld_symbol, 1); 408 lds_end->name = "__lds_end"; 409 lds_end->size = 0; 410 lds_end->align = lds_end_align; 411 lds_end->offset = binary->lds_size; 412 lds_end->part_idx = ~0u; 413 } 414 415 if (binary->lds_size > max_lds_size) { 416 fprintf(stderr, "ac_rtld error(2): too much LDS (used = %u, max = %u)\n", 417 (unsigned)binary->lds_size, max_lds_size); 418 goto fail; 419 } 420 421 /* Second pass: Adjust offsets of non-pasted text sections. */ 422 binary->rx_size = pasted_text_size; 423 binary->rx_size = align(binary->rx_size, rx_align); 424 425 for (unsigned part_idx = 0; part_idx < i.num_parts; ++part_idx) { 426 struct ac_rtld_part *part = &binary->parts[part_idx]; 427 size_t num_shdrs; 428 elf_getshdrnum(part->elf, &num_shdrs); 429 430 for (unsigned j = 0; j < num_shdrs; ++j) { 431 struct ac_rtld_section *s = &part->sections[j]; 432 if (s->is_rx && !s->is_pasted_text) 433 s->offset += binary->rx_size; 434 } 435 } 436 437 binary->rx_size += rx_size; 438 binary->exec_size = exec_size; 439 440 /* The SQ fetches up to N cache lines of 16 dwords 441 * ahead of the PC, configurable by SH_MEM_CONFIG and 442 * S_INST_PREFETCH. This can cause two issues: 443 * 444 * (1) Crossing a page boundary to an unmapped page. The logic 445 * does not distinguish between a required fetch and a "mere" 446 * prefetch and will fault. 447 * 448 * (2) Prefetching instructions that will be changed for a 449 * different shader. 450 * 451 * (2) is not currently an issue because we flush the I$ at IB 452 * boundaries, but (1) needs to be addressed. Due to buffer 453 * suballocation, we just play it safe. 454 */ 455 unsigned prefetch_distance = 0; 456 457 if (!i.info->has_graphics && i.info->family >= CHIP_ALDEBARAN) 458 prefetch_distance = 16; 459 else if (i.info->gfx_level >= GFX10) 460 prefetch_distance = 3; 461 462 if (prefetch_distance) { 463 if (i.info->gfx_level >= GFX11) 464 binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 128); 465 else 466 binary->rx_size = align(binary->rx_size + prefetch_distance * 64, 64); 467 } 468 469 return true; 470 471#undef report_if 472#undef report_elf_if 473 474fail: 475 ac_rtld_close(binary); 476 return false; 477} 478 479void ac_rtld_close(struct ac_rtld_binary *binary) 480{ 481 for (unsigned i = 0; i < binary->num_parts; ++i) { 482 struct ac_rtld_part *part = &binary->parts[i]; 483 free(part->sections); 484 elf_end(part->elf); 485 } 486 487 util_dynarray_fini(&binary->lds_symbols); 488 free(binary->parts); 489 binary->parts = NULL; 490 binary->num_parts = 0; 491} 492 493static bool get_section_by_name(struct ac_rtld_part *part, const char *name, const char **data, 494 size_t *nbytes) 495{ 496 for (unsigned i = 0; i < part->num_sections; ++i) { 497 struct ac_rtld_section *s = &part->sections[i]; 498 if (s->name && !strcmp(name, s->name)) { 499 Elf_Scn *target_scn = elf_getscn(part->elf, i); 500 Elf_Data *target_data = elf_getdata(target_scn, NULL); 501 if (!target_data) { 502 report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata"); 503 return false; 504 } 505 506 *data = target_data->d_buf; 507 *nbytes = target_data->d_size; 508 return true; 509 } 510 } 511 return false; 512} 513 514bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name, const char **data, 515 size_t *nbytes) 516{ 517 assert(binary->num_parts == 1); 518 return get_section_by_name(&binary->parts[0], name, data, nbytes); 519} 520 521bool ac_rtld_read_config(const struct radeon_info *info, struct ac_rtld_binary *binary, 522 struct ac_shader_config *config) 523{ 524 for (unsigned i = 0; i < binary->num_parts; ++i) { 525 struct ac_rtld_part *part = &binary->parts[i]; 526 const char *config_data; 527 size_t config_nbytes; 528 529 if (!get_section_by_name(part, ".AMDGPU.config", &config_data, &config_nbytes)) 530 return false; 531 532 /* TODO: be precise about scratch use? */ 533 struct ac_shader_config c = {0}; 534 ac_parse_shader_binary_config(config_data, config_nbytes, binary->wave_size, info, &c); 535 536 config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs); 537 config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs); 538 config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs); 539 config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs); 540 config->scratch_bytes_per_wave = 541 MAX2(config->scratch_bytes_per_wave, c.scratch_bytes_per_wave); 542 543 assert(i == 0 || config->float_mode == c.float_mode); 544 config->float_mode = c.float_mode; 545 546 /* SPI_PS_INPUT_ENA/ADDR can't be combined. Only the value from 547 * the main shader part is used. */ 548 assert(config->spi_ps_input_ena == 0 && config->spi_ps_input_addr == 0); 549 config->spi_ps_input_ena = c.spi_ps_input_ena; 550 config->spi_ps_input_addr = c.spi_ps_input_addr; 551 552 /* TODO: consistently use LDS symbols for this */ 553 config->lds_size = MAX2(config->lds_size, c.lds_size); 554 555 /* TODO: Should we combine these somehow? It's currently only 556 * used for radeonsi's compute, where multiple parts aren't used. */ 557 assert(config->rsrc1 == 0 && config->rsrc2 == 0); 558 config->rsrc1 = c.rsrc1; 559 config->rsrc2 = c.rsrc2; 560 } 561 562 return true; 563} 564 565static bool resolve_symbol(const struct ac_rtld_upload_info *u, unsigned part_idx, 566 const Elf64_Sym *sym, const char *name, uint64_t *value) 567{ 568 /* TODO: properly disentangle the undef and the LDS cases once 569 * STT_AMDGPU_LDS is retired. */ 570 if (sym->st_shndx == SHN_UNDEF || sym->st_shndx == SHN_AMDGPU_LDS) { 571 const struct ac_rtld_symbol *lds_sym = find_symbol(&u->binary->lds_symbols, name, part_idx); 572 573 if (lds_sym) { 574 *value = lds_sym->offset; 575 return true; 576 } 577 578 /* TODO: resolve from other parts */ 579 580 if (u->get_external_symbol(u->binary->gfx_level, u->cb_data, name, value)) 581 return true; 582 583 report_errorf("symbol %s: unknown", name); 584 return false; 585 } 586 587 struct ac_rtld_part *part = &u->binary->parts[part_idx]; 588 if (sym->st_shndx >= part->num_sections) { 589 report_errorf("symbol %s: section out of bounds", name); 590 return false; 591 } 592 593 struct ac_rtld_section *s = &part->sections[sym->st_shndx]; 594 if (!s->is_rx) { 595 report_errorf("symbol %s: bad section", name); 596 return false; 597 } 598 599 uint64_t section_base = u->rx_va + s->offset; 600 601 *value = section_base + sym->st_value; 602 return true; 603} 604 605static bool apply_relocs(const struct ac_rtld_upload_info *u, unsigned part_idx, 606 const Elf64_Shdr *reloc_shdr, const Elf_Data *reloc_data) 607{ 608#define report_if(cond) \ 609 do { \ 610 if ((cond)) { \ 611 report_errorf(#cond); \ 612 return false; \ 613 } \ 614 } while (false) 615#define report_elf_if(cond) \ 616 do { \ 617 if ((cond)) { \ 618 report_elf_errorf(#cond); \ 619 return false; \ 620 } \ 621 } while (false) 622 623 struct ac_rtld_part *part = &u->binary->parts[part_idx]; 624 Elf_Scn *target_scn = elf_getscn(part->elf, reloc_shdr->sh_info); 625 report_elf_if(!target_scn); 626 627 Elf_Data *target_data = elf_getdata(target_scn, NULL); 628 report_elf_if(!target_data); 629 630 Elf_Scn *symbols_scn = elf_getscn(part->elf, reloc_shdr->sh_link); 631 report_elf_if(!symbols_scn); 632 633 Elf64_Shdr *symbols_shdr = elf64_getshdr(symbols_scn); 634 report_elf_if(!symbols_shdr); 635 uint32_t strtabidx = symbols_shdr->sh_link; 636 637 Elf_Data *symbols_data = elf_getdata(symbols_scn, NULL); 638 report_elf_if(!symbols_data); 639 640 const Elf64_Sym *symbols = symbols_data->d_buf; 641 size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym); 642 643 struct ac_rtld_section *s = &part->sections[reloc_shdr->sh_info]; 644 report_if(!s->is_rx); 645 646 const char *orig_base = target_data->d_buf; 647 char *dst_base = u->rx_ptr + s->offset; 648 uint64_t va_base = u->rx_va + s->offset; 649 650 Elf64_Rel *rel = reloc_data->d_buf; 651 size_t num_relocs = reloc_data->d_size / sizeof(*rel); 652 for (size_t i = 0; i < num_relocs; ++i, ++rel) { 653 size_t r_sym = ELF64_R_SYM(rel->r_info); 654 unsigned r_type = ELF64_R_TYPE(rel->r_info); 655 656 const char *orig_ptr = orig_base + rel->r_offset; 657 char *dst_ptr = dst_base + rel->r_offset; 658 uint64_t va = va_base + rel->r_offset; 659 660 uint64_t symbol; 661 uint64_t addend; 662 663 if (r_sym == STN_UNDEF) { 664 symbol = 0; 665 } else { 666 report_elf_if(r_sym >= num_symbols); 667 668 const Elf64_Sym *sym = &symbols[r_sym]; 669 const char *symbol_name = elf_strptr(part->elf, strtabidx, sym->st_name); 670 report_elf_if(!symbol_name); 671 672 if (!resolve_symbol(u, part_idx, sym, symbol_name, &symbol)) 673 return false; 674 } 675 676 /* TODO: Should we also support .rela sections, where the 677 * addend is part of the relocation record? */ 678 679 /* Load the addend from the ELF instead of the destination, 680 * because the destination may be in VRAM. */ 681 switch (r_type) { 682 case R_AMDGPU_ABS32: 683 case R_AMDGPU_ABS32_LO: 684 case R_AMDGPU_ABS32_HI: 685 case R_AMDGPU_REL32: 686 case R_AMDGPU_REL32_LO: 687 case R_AMDGPU_REL32_HI: 688 addend = *(const uint32_t *)orig_ptr; 689 break; 690 case R_AMDGPU_ABS64: 691 case R_AMDGPU_REL64: 692 addend = *(const uint64_t *)orig_ptr; 693 break; 694 default: 695 report_errorf("unsupported r_type == %u", r_type); 696 return false; 697 } 698 699 uint64_t abs = symbol + addend; 700 701 switch (r_type) { 702 case R_AMDGPU_ABS32: 703 assert((uint32_t)abs == abs); 704 FALLTHROUGH; 705 case R_AMDGPU_ABS32_LO: 706 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs); 707 break; 708 case R_AMDGPU_ABS32_HI: 709 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs >> 32); 710 break; 711 case R_AMDGPU_ABS64: 712 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs); 713 break; 714 case R_AMDGPU_REL32: 715 assert((int64_t)(int32_t)(abs - va) == (int64_t)(abs - va)); 716 FALLTHROUGH; 717 case R_AMDGPU_REL32_LO: 718 *(uint32_t *)dst_ptr = util_cpu_to_le32(abs - va); 719 break; 720 case R_AMDGPU_REL32_HI: 721 *(uint32_t *)dst_ptr = util_cpu_to_le32((abs - va) >> 32); 722 break; 723 case R_AMDGPU_REL64: 724 *(uint64_t *)dst_ptr = util_cpu_to_le64(abs - va); 725 break; 726 default: 727 unreachable("bad r_type"); 728 } 729 } 730 731 return true; 732 733#undef report_if 734#undef report_elf_if 735} 736 737/** 738 * Upload the binary or binaries to the provided GPU buffers, including 739 * relocations. 740 */ 741int ac_rtld_upload(struct ac_rtld_upload_info *u) 742{ 743#define report_if(cond) \ 744 do { \ 745 if ((cond)) { \ 746 report_errorf(#cond); \ 747 return -1; \ 748 } \ 749 } while (false) 750#define report_elf_if(cond) \ 751 do { \ 752 if ((cond)) { \ 753 report_errorf(#cond); \ 754 return -1; \ 755 } \ 756 } while (false) 757 758 int size = 0; 759 if (u->binary->options.halt_at_entry) { 760 /* s_sethalt 1 */ 761 *(uint32_t *)u->rx_ptr = util_cpu_to_le32(0xbf8d0001); 762 } 763 764 /* First pass: upload raw section data and lay out private LDS symbols. */ 765 for (unsigned i = 0; i < u->binary->num_parts; ++i) { 766 struct ac_rtld_part *part = &u->binary->parts[i]; 767 768 Elf_Scn *section = NULL; 769 while ((section = elf_nextscn(part->elf, section))) { 770 Elf64_Shdr *shdr = elf64_getshdr(section); 771 struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)]; 772 773 if (!s->is_rx) 774 continue; 775 776 report_if(shdr->sh_type != SHT_PROGBITS); 777 778 Elf_Data *data = elf_getdata(section, NULL); 779 report_elf_if(!data || data->d_size != shdr->sh_size); 780 memcpy(u->rx_ptr + s->offset, data->d_buf, shdr->sh_size); 781 782 size = MAX2(size, s->offset + shdr->sh_size); 783 } 784 } 785 786 if (u->binary->rx_end_markers) { 787 uint32_t *dst = (uint32_t *)(u->rx_ptr + u->binary->rx_end_markers); 788 for (unsigned i = 0; i < DEBUGGER_NUM_MARKERS; ++i) 789 *dst++ = util_cpu_to_le32(DEBUGGER_END_OF_CODE_MARKER); 790 size += 4 * DEBUGGER_NUM_MARKERS; 791 } 792 793 /* Second pass: handle relocations, overwriting uploaded data where 794 * appropriate. */ 795 for (unsigned i = 0; i < u->binary->num_parts; ++i) { 796 struct ac_rtld_part *part = &u->binary->parts[i]; 797 Elf_Scn *section = NULL; 798 while ((section = elf_nextscn(part->elf, section))) { 799 Elf64_Shdr *shdr = elf64_getshdr(section); 800 if (shdr->sh_type == SHT_REL) { 801 Elf_Data *relocs = elf_getdata(section, NULL); 802 report_elf_if(!relocs || relocs->d_size != shdr->sh_size); 803 if (!apply_relocs(u, i, shdr, relocs)) 804 return -1; 805 } else if (shdr->sh_type == SHT_RELA) { 806 report_errorf("SHT_RELA not supported"); 807 return -1; 808 } 809 } 810 } 811 812 return size; 813 814#undef report_if 815#undef report_elf_if 816} 817