1/* 2 * Copyright 2008 Advanced Micro Devices, Inc. 3 * Copyright 2008 Red Hat Inc. 4 * Copyright 2009 Jerome Glisse. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * Authors: Dave Airlie 25 * Alex Deucher 26 * Jerome Glisse 27 */ 28#include <linux/dma-fence-array.h> 29#include <linux/interval_tree_generic.h> 30#include <linux/idr.h> 31#include <linux/dma-buf.h> 32 33#include <drm/amdgpu_drm.h> 34#include "amdgpu.h" 35#include "amdgpu_trace.h" 36#include "amdgpu_amdkfd.h" 37#include "amdgpu_gmc.h" 38#include "amdgpu_xgmi.h" 39#include "amdgpu_dma_buf.h" 40 41/** 42 * DOC: GPUVM 43 * 44 * GPUVM is similar to the legacy gart on older asics, however 45 * rather than there being a single global gart table 46 * for the entire GPU, there are multiple VM page tables active 47 * at any given time. The VM page tables can contain a mix 48 * vram pages and system memory pages and system memory pages 49 * can be mapped as snooped (cached system pages) or unsnooped 50 * (uncached system pages). 51 * Each VM has an ID associated with it and there is a page table 52 * associated with each VMID. When execting a command buffer, 53 * the kernel tells the the ring what VMID to use for that command 54 * buffer. VMIDs are allocated dynamically as commands are submitted. 55 * The userspace drivers maintain their own address space and the kernel 56 * sets up their pages tables accordingly when they submit their 57 * command buffers and a VMID is assigned. 58 * Cayman/Trinity support up to 8 active VMs at any given time; 59 * SI supports 16. 60 */ 61 62#define START(node) ((node)->start) 63#define LAST(node) ((node)->last) 64 65INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, 66 START, LAST, static, amdgpu_vm_it) 67 68#undef START 69#undef LAST 70 71/** 72 * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback 73 */ 74struct amdgpu_prt_cb { 75 76 /** 77 * @adev: amdgpu device 78 */ 79 struct amdgpu_device *adev; 80 81 /** 82 * @cb: callback 83 */ 84 struct dma_fence_cb cb; 85}; 86 87/* 88 * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS 89 * happens while holding this lock anywhere to prevent deadlocks when 90 * an MMU notifier runs in reclaim-FS context. 91 */ 92static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) 93{ 94 mutex_lock(&vm->eviction_lock); 95 vm->saved_flags = memalloc_nofs_save(); 96} 97 98static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) 99{ 100 if (mutex_trylock(&vm->eviction_lock)) { 101 vm->saved_flags = memalloc_nofs_save(); 102 return 1; 103 } 104 return 0; 105} 106 107static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) 108{ 109 memalloc_nofs_restore(vm->saved_flags); 110 mutex_unlock(&vm->eviction_lock); 111} 112 113/** 114 * amdgpu_vm_level_shift - return the addr shift for each level 115 * 116 * @adev: amdgpu_device pointer 117 * @level: VMPT level 118 * 119 * Returns: 120 * The number of bits the pfn needs to be right shifted for a level. 121 */ 122static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev, 123 unsigned level) 124{ 125 switch (level) { 126 case AMDGPU_VM_PDB2: 127 case AMDGPU_VM_PDB1: 128 case AMDGPU_VM_PDB0: 129 return 9 * (AMDGPU_VM_PDB0 - level) + 130 adev->vm_manager.block_size; 131 case AMDGPU_VM_PTB: 132 return 0; 133 default: 134 return ~0; 135 } 136} 137 138/** 139 * amdgpu_vm_num_entries - return the number of entries in a PD/PT 140 * 141 * @adev: amdgpu_device pointer 142 * @level: VMPT level 143 * 144 * Returns: 145 * The number of entries in a page directory or page table. 146 */ 147static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, 148 unsigned level) 149{ 150 unsigned shift = amdgpu_vm_level_shift(adev, 151 adev->vm_manager.root_level); 152 153 if (level == adev->vm_manager.root_level) 154 /* For the root directory */ 155 return round_up(adev->vm_manager.max_pfn, 1ULL << shift) 156 >> shift; 157 else if (level != AMDGPU_VM_PTB) 158 /* Everything in between */ 159 return 512; 160 else 161 /* For the page tables on the leaves */ 162 return AMDGPU_VM_PTE_COUNT(adev); 163} 164 165/** 166 * amdgpu_vm_num_ats_entries - return the number of ATS entries in the root PD 167 * 168 * @adev: amdgpu_device pointer 169 * 170 * Returns: 171 * The number of entries in the root page directory which needs the ATS setting. 172 */ 173static unsigned amdgpu_vm_num_ats_entries(struct amdgpu_device *adev) 174{ 175 unsigned shift; 176 177 shift = amdgpu_vm_level_shift(adev, adev->vm_manager.root_level); 178 return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT); 179} 180 181/** 182 * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT 183 * 184 * @adev: amdgpu_device pointer 185 * @level: VMPT level 186 * 187 * Returns: 188 * The mask to extract the entry number of a PD/PT from an address. 189 */ 190static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev, 191 unsigned int level) 192{ 193 if (level <= adev->vm_manager.root_level) 194 return 0xffffffff; 195 else if (level != AMDGPU_VM_PTB) 196 return 0x1ff; 197 else 198 return AMDGPU_VM_PTE_COUNT(adev) - 1; 199} 200 201/** 202 * amdgpu_vm_bo_size - returns the size of the BOs in bytes 203 * 204 * @adev: amdgpu_device pointer 205 * @level: VMPT level 206 * 207 * Returns: 208 * The size of the BO for a page directory or page table in bytes. 209 */ 210static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) 211{ 212 return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8); 213} 214 215/** 216 * amdgpu_vm_bo_evicted - vm_bo is evicted 217 * 218 * @vm_bo: vm_bo which is evicted 219 * 220 * State for PDs/PTs and per VM BOs which are not at the location they should 221 * be. 222 */ 223static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) 224{ 225 struct amdgpu_vm *vm = vm_bo->vm; 226 struct amdgpu_bo *bo = vm_bo->bo; 227 228 vm_bo->moved = true; 229 if (bo->tbo.type == ttm_bo_type_kernel) 230 list_move(&vm_bo->vm_status, &vm->evicted); 231 else 232 list_move_tail(&vm_bo->vm_status, &vm->evicted); 233} 234/** 235 * amdgpu_vm_bo_moved - vm_bo is moved 236 * 237 * @vm_bo: vm_bo which is moved 238 * 239 * State for per VM BOs which are moved, but that change is not yet reflected 240 * in the page tables. 241 */ 242static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) 243{ 244 list_move(&vm_bo->vm_status, &vm_bo->vm->moved); 245} 246 247/** 248 * amdgpu_vm_bo_idle - vm_bo is idle 249 * 250 * @vm_bo: vm_bo which is now idle 251 * 252 * State for PDs/PTs and per VM BOs which have gone through the state machine 253 * and are now idle. 254 */ 255static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) 256{ 257 list_move(&vm_bo->vm_status, &vm_bo->vm->idle); 258 vm_bo->moved = false; 259} 260 261/** 262 * amdgpu_vm_bo_invalidated - vm_bo is invalidated 263 * 264 * @vm_bo: vm_bo which is now invalidated 265 * 266 * State for normal BOs which are invalidated and that change not yet reflected 267 * in the PTs. 268 */ 269static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) 270{ 271 spin_lock(&vm_bo->vm->invalidated_lock); 272 list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); 273 spin_unlock(&vm_bo->vm->invalidated_lock); 274} 275 276/** 277 * amdgpu_vm_bo_relocated - vm_bo is reloacted 278 * 279 * @vm_bo: vm_bo which is relocated 280 * 281 * State for PDs/PTs which needs to update their parent PD. 282 * For the root PD, just move to idle state. 283 */ 284static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) 285{ 286 if (vm_bo->bo->parent) 287 list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); 288 else 289 amdgpu_vm_bo_idle(vm_bo); 290} 291 292/** 293 * amdgpu_vm_bo_done - vm_bo is done 294 * 295 * @vm_bo: vm_bo which is now done 296 * 297 * State for normal BOs which are invalidated and that change has been updated 298 * in the PTs. 299 */ 300static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) 301{ 302 spin_lock(&vm_bo->vm->invalidated_lock); 303 list_del_init(&vm_bo->vm_status); 304 spin_unlock(&vm_bo->vm->invalidated_lock); 305} 306 307/** 308 * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm 309 * 310 * @base: base structure for tracking BO usage in a VM 311 * @vm: vm to which bo is to be added 312 * @bo: amdgpu buffer object 313 * 314 * Initialize a bo_va_base structure and add it to the appropriate lists 315 * 316 */ 317static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, 318 struct amdgpu_vm *vm, 319 struct amdgpu_bo *bo) 320{ 321 base->vm = vm; 322 base->bo = bo; 323 base->next = NULL; 324 INIT_LIST_HEAD(&base->vm_status); 325 326 if (!bo) 327 return; 328 base->next = bo->vm_bo; 329 bo->vm_bo = base; 330 331 if (bo->tbo.base.resv != vm->root.base.bo->tbo.base.resv) 332 return; 333 334 vm->bulk_moveable = false; 335 if (bo->tbo.type == ttm_bo_type_kernel && bo->parent) 336 amdgpu_vm_bo_relocated(base); 337 else 338 amdgpu_vm_bo_idle(base); 339 340 if (bo->preferred_domains & 341 amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) 342 return; 343 344 /* 345 * we checked all the prerequisites, but it looks like this per vm bo 346 * is currently evicted. add the bo to the evicted list to make sure it 347 * is validated on next vm use to avoid fault. 348 * */ 349 amdgpu_vm_bo_evicted(base); 350} 351 352/** 353 * amdgpu_vm_pt_parent - get the parent page directory 354 * 355 * @pt: child page table 356 * 357 * Helper to get the parent entry for the child page table. NULL if we are at 358 * the root page directory. 359 */ 360static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) 361{ 362 struct amdgpu_bo *parent = pt->base.bo->parent; 363 364 if (!parent) 365 return NULL; 366 367 return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); 368} 369 370/* 371 * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt 372 */ 373struct amdgpu_vm_pt_cursor { 374 uint64_t pfn; 375 struct amdgpu_vm_pt *parent; 376 struct amdgpu_vm_pt *entry; 377 unsigned level; 378}; 379 380/** 381 * amdgpu_vm_pt_start - start PD/PT walk 382 * 383 * @adev: amdgpu_device pointer 384 * @vm: amdgpu_vm structure 385 * @start: start address of the walk 386 * @cursor: state to initialize 387 * 388 * Initialize a amdgpu_vm_pt_cursor to start a walk. 389 */ 390static void amdgpu_vm_pt_start(struct amdgpu_device *adev, 391 struct amdgpu_vm *vm, uint64_t start, 392 struct amdgpu_vm_pt_cursor *cursor) 393{ 394 cursor->pfn = start; 395 cursor->parent = NULL; 396 cursor->entry = &vm->root; 397 cursor->level = adev->vm_manager.root_level; 398} 399 400/** 401 * amdgpu_vm_pt_descendant - go to child node 402 * 403 * @adev: amdgpu_device pointer 404 * @cursor: current state 405 * 406 * Walk to the child node of the current node. 407 * Returns: 408 * True if the walk was possible, false otherwise. 409 */ 410static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, 411 struct amdgpu_vm_pt_cursor *cursor) 412{ 413 unsigned mask, shift, idx; 414 415 if (!cursor->entry->entries) 416 return false; 417 418 BUG_ON(!cursor->entry->base.bo); 419 mask = amdgpu_vm_entries_mask(adev, cursor->level); 420 shift = amdgpu_vm_level_shift(adev, cursor->level); 421 422 ++cursor->level; 423 idx = (cursor->pfn >> shift) & mask; 424 cursor->parent = cursor->entry; 425 cursor->entry = &cursor->entry->entries[idx]; 426 return true; 427} 428 429/** 430 * amdgpu_vm_pt_sibling - go to sibling node 431 * 432 * @adev: amdgpu_device pointer 433 * @cursor: current state 434 * 435 * Walk to the sibling node of the current node. 436 * Returns: 437 * True if the walk was possible, false otherwise. 438 */ 439static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, 440 struct amdgpu_vm_pt_cursor *cursor) 441{ 442 unsigned shift, num_entries; 443 444 /* Root doesn't have a sibling */ 445 if (!cursor->parent) 446 return false; 447 448 /* Go to our parents and see if we got a sibling */ 449 shift = amdgpu_vm_level_shift(adev, cursor->level - 1); 450 num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); 451 452 if (cursor->entry == &cursor->parent->entries[num_entries - 1]) 453 return false; 454 455 cursor->pfn += 1ULL << shift; 456 cursor->pfn &= ~((1ULL << shift) - 1); 457 ++cursor->entry; 458 return true; 459} 460 461/** 462 * amdgpu_vm_pt_ancestor - go to parent node 463 * 464 * @cursor: current state 465 * 466 * Walk to the parent node of the current node. 467 * Returns: 468 * True if the walk was possible, false otherwise. 469 */ 470static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) 471{ 472 if (!cursor->parent) 473 return false; 474 475 --cursor->level; 476 cursor->entry = cursor->parent; 477 cursor->parent = amdgpu_vm_pt_parent(cursor->parent); 478 return true; 479} 480 481/** 482 * amdgpu_vm_pt_next - get next PD/PT in hieratchy 483 * 484 * @adev: amdgpu_device pointer 485 * @cursor: current state 486 * 487 * Walk the PD/PT tree to the next node. 488 */ 489static void amdgpu_vm_pt_next(struct amdgpu_device *adev, 490 struct amdgpu_vm_pt_cursor *cursor) 491{ 492 /* First try a newborn child */ 493 if (amdgpu_vm_pt_descendant(adev, cursor)) 494 return; 495 496 /* If that didn't worked try to find a sibling */ 497 while (!amdgpu_vm_pt_sibling(adev, cursor)) { 498 /* No sibling, go to our parents and grandparents */ 499 if (!amdgpu_vm_pt_ancestor(cursor)) { 500 cursor->pfn = ~0ll; 501 return; 502 } 503 } 504} 505 506/** 507 * amdgpu_vm_pt_first_dfs - start a deep first search 508 * 509 * @adev: amdgpu_device structure 510 * @vm: amdgpu_vm structure 511 * @start: optional cursor to start with 512 * @cursor: state to initialize 513 * 514 * Starts a deep first traversal of the PD/PT tree. 515 */ 516static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, 517 struct amdgpu_vm *vm, 518 struct amdgpu_vm_pt_cursor *start, 519 struct amdgpu_vm_pt_cursor *cursor) 520{ 521 if (start) 522 *cursor = *start; 523 else 524 amdgpu_vm_pt_start(adev, vm, 0, cursor); 525 while (amdgpu_vm_pt_descendant(adev, cursor)); 526} 527 528/** 529 * amdgpu_vm_pt_continue_dfs - check if the deep first search should continue 530 * 531 * @start: starting point for the search 532 * @entry: current entry 533 * 534 * Returns: 535 * True when the search should continue, false otherwise. 536 */ 537static bool amdgpu_vm_pt_continue_dfs(struct amdgpu_vm_pt_cursor *start, 538 struct amdgpu_vm_pt *entry) 539{ 540 return entry && (!start || entry != start->entry); 541} 542 543/** 544 * amdgpu_vm_pt_next_dfs - get the next node for a deep first search 545 * 546 * @adev: amdgpu_device structure 547 * @cursor: current state 548 * 549 * Move the cursor to the next node in a deep first search. 550 */ 551static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, 552 struct amdgpu_vm_pt_cursor *cursor) 553{ 554 if (!cursor->entry) 555 return; 556 557 if (!cursor->parent) 558 cursor->entry = NULL; 559 else if (amdgpu_vm_pt_sibling(adev, cursor)) 560 while (amdgpu_vm_pt_descendant(adev, cursor)); 561 else 562 amdgpu_vm_pt_ancestor(cursor); 563} 564 565/* 566 * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs 567 */ 568#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) \ 569 for (amdgpu_vm_pt_first_dfs((adev), (vm), (start), &(cursor)), \ 570 (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ 571 amdgpu_vm_pt_continue_dfs((start), (entry)); \ 572 (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor))) 573 574/** 575 * amdgpu_vm_get_pd_bo - add the VM PD to a validation list 576 * 577 * @vm: vm providing the BOs 578 * @validated: head of validation list 579 * @entry: entry to add 580 * 581 * Add the page directory to the list of BOs to 582 * validate for command submission. 583 */ 584void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, 585 struct list_head *validated, 586 struct amdgpu_bo_list_entry *entry) 587{ 588 entry->priority = 0; 589 entry->tv.bo = &vm->root.base.bo->tbo; 590 /* Two for VM updates, one for TTM and one for the CS job */ 591 entry->tv.num_shared = 4; 592 entry->user_pages = NULL; 593 list_add(&entry->tv.head, validated); 594} 595 596/** 597 * amdgpu_vm_del_from_lru_notify - update bulk_moveable flag 598 * 599 * @bo: BO which was removed from the LRU 600 * 601 * Make sure the bulk_moveable flag is updated when a BO is removed from the 602 * LRU. 603 */ 604void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo) 605{ 606 struct amdgpu_bo *abo; 607 struct amdgpu_vm_bo_base *bo_base; 608 609 if (!amdgpu_bo_is_amdgpu_bo(bo)) 610 return; 611 612 if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT) 613 return; 614 615 abo = ttm_to_amdgpu_bo(bo); 616 if (!abo->parent) 617 return; 618 for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) { 619 struct amdgpu_vm *vm = bo_base->vm; 620 621 if (abo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) 622 vm->bulk_moveable = false; 623 } 624 625} 626/** 627 * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU 628 * 629 * @adev: amdgpu device pointer 630 * @vm: vm providing the BOs 631 * 632 * Move all BOs to the end of LRU and remember their positions to put them 633 * together. 634 */ 635void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, 636 struct amdgpu_vm *vm) 637{ 638 struct amdgpu_vm_bo_base *bo_base; 639 640 if (vm->bulk_moveable) { 641 spin_lock(&ttm_bo_glob.lru_lock); 642 ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); 643 spin_unlock(&ttm_bo_glob.lru_lock); 644 return; 645 } 646 647 memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); 648 649 spin_lock(&ttm_bo_glob.lru_lock); 650 list_for_each_entry(bo_base, &vm->idle, vm_status) { 651 struct amdgpu_bo *bo = bo_base->bo; 652 653 if (!bo->parent) 654 continue; 655 656 ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move); 657 if (bo->shadow) 658 ttm_bo_move_to_lru_tail(&bo->shadow->tbo, 659 &vm->lru_bulk_move); 660 } 661 spin_unlock(&ttm_bo_glob.lru_lock); 662 663 vm->bulk_moveable = true; 664} 665 666/** 667 * amdgpu_vm_validate_pt_bos - validate the page table BOs 668 * 669 * @adev: amdgpu device pointer 670 * @vm: vm providing the BOs 671 * @validate: callback to do the validation 672 * @param: parameter for the validation callback 673 * 674 * Validate the page table BOs on command submission if neccessary. 675 * 676 * Returns: 677 * Validation result. 678 */ 679int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, 680 int (*validate)(void *p, struct amdgpu_bo *bo), 681 void *param) 682{ 683 struct amdgpu_vm_bo_base *bo_base, *tmp; 684 int r; 685 686 vm->bulk_moveable &= list_empty(&vm->evicted); 687 688 list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { 689 struct amdgpu_bo *bo = bo_base->bo; 690 691 r = validate(param, bo); 692 if (r) 693 return r; 694 695 if (bo->tbo.type != ttm_bo_type_kernel) { 696 amdgpu_vm_bo_moved(bo_base); 697 } else { 698 vm->update_funcs->map_table(bo); 699 amdgpu_vm_bo_relocated(bo_base); 700 } 701 } 702 703 amdgpu_vm_eviction_lock(vm); 704 vm->evicting = false; 705 amdgpu_vm_eviction_unlock(vm); 706 707 return 0; 708} 709 710/** 711 * amdgpu_vm_ready - check VM is ready for updates 712 * 713 * @vm: VM to check 714 * 715 * Check if all VM PDs/PTs are ready for updates 716 * 717 * Returns: 718 * True if VM is not evicting. 719 */ 720bool amdgpu_vm_ready(struct amdgpu_vm *vm) 721{ 722 bool ret; 723 724 amdgpu_vm_eviction_lock(vm); 725 ret = !vm->evicting; 726 amdgpu_vm_eviction_unlock(vm); 727 728 return ret && list_empty(&vm->evicted); 729} 730 731/** 732 * amdgpu_vm_clear_bo - initially clear the PDs/PTs 733 * 734 * @adev: amdgpu_device pointer 735 * @vm: VM to clear BO from 736 * @bo: BO to clear 737 * @immediate: use an immediate update 738 * 739 * Root PD needs to be reserved when calling this. 740 * 741 * Returns: 742 * 0 on success, errno otherwise. 743 */ 744static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, 745 struct amdgpu_vm *vm, 746 struct amdgpu_bo *bo, 747 bool immediate) 748{ 749 struct ttm_operation_ctx ctx = { true, false }; 750 unsigned level = adev->vm_manager.root_level; 751 struct amdgpu_vm_update_params params; 752 struct amdgpu_bo *ancestor = bo; 753 unsigned entries, ats_entries; 754 uint64_t addr; 755 int r; 756 757 /* Figure out our place in the hierarchy */ 758 if (ancestor->parent) { 759 ++level; 760 while (ancestor->parent->parent) { 761 ++level; 762 ancestor = ancestor->parent; 763 } 764 } 765 766 entries = amdgpu_bo_size(bo) / 8; 767 if (!vm->pte_support_ats) { 768 ats_entries = 0; 769 770 } else if (!bo->parent) { 771 ats_entries = amdgpu_vm_num_ats_entries(adev); 772 ats_entries = min(ats_entries, entries); 773 entries -= ats_entries; 774 775 } else { 776 struct amdgpu_vm_pt *pt; 777 778 pt = container_of(ancestor->vm_bo, struct amdgpu_vm_pt, base); 779 ats_entries = amdgpu_vm_num_ats_entries(adev); 780 if ((pt - vm->root.entries) >= ats_entries) { 781 ats_entries = 0; 782 } else { 783 ats_entries = entries; 784 entries = 0; 785 } 786 } 787 788 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); 789 if (r) 790 return r; 791 792 if (bo->shadow) { 793 r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement, 794 &ctx); 795 if (r) 796 return r; 797 } 798 799 r = vm->update_funcs->map_table(bo); 800 if (r) 801 return r; 802 803 memset(¶ms, 0, sizeof(params)); 804 params.adev = adev; 805 params.vm = vm; 806 params.immediate = immediate; 807 808 r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); 809 if (r) 810 return r; 811 812 addr = 0; 813 if (ats_entries) { 814 uint64_t value = 0, flags; 815 816 flags = AMDGPU_PTE_DEFAULT_ATC; 817 if (level != AMDGPU_VM_PTB) { 818 /* Handle leaf PDEs as PTEs */ 819 flags |= AMDGPU_PDE_PTE; 820 amdgpu_gmc_get_vm_pde(adev, level, &value, &flags); 821 } 822 823 r = vm->update_funcs->update(¶ms, bo, addr, 0, ats_entries, 824 value, flags); 825 if (r) 826 return r; 827 828 addr += ats_entries * 8; 829 } 830 831 if (entries) { 832 uint64_t value = 0, flags = 0; 833 834 if (adev->asic_type >= CHIP_VEGA10) { 835 if (level != AMDGPU_VM_PTB) { 836 /* Handle leaf PDEs as PTEs */ 837 flags |= AMDGPU_PDE_PTE; 838 amdgpu_gmc_get_vm_pde(adev, level, 839 &value, &flags); 840 } else { 841 /* Workaround for fault priority problem on GMC9 */ 842 flags = AMDGPU_PTE_EXECUTABLE; 843 } 844 } 845 846 r = vm->update_funcs->update(¶ms, bo, addr, 0, entries, 847 value, flags); 848 if (r) 849 return r; 850 } 851 852 return vm->update_funcs->commit(¶ms, NULL); 853} 854 855/** 856 * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation 857 * 858 * @adev: amdgpu_device pointer 859 * @vm: requesting vm 860 * @level: the page table level 861 * @immediate: use a immediate update 862 * @bp: resulting BO allocation parameters 863 */ 864static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, 865 int level, bool immediate, 866 struct amdgpu_bo_param *bp) 867{ 868 memset(bp, 0, sizeof(*bp)); 869 870 bp->size = amdgpu_vm_bo_size(adev, level); 871 bp->byte_align = AMDGPU_GPU_PAGE_SIZE; 872 bp->domain = AMDGPU_GEM_DOMAIN_VRAM; 873 bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain); 874 bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | 875 AMDGPU_GEM_CREATE_CPU_GTT_USWC; 876 if (vm->use_cpu_for_update) 877 bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 878 else if (!vm->root.base.bo || vm->root.base.bo->shadow) 879 bp->flags |= AMDGPU_GEM_CREATE_SHADOW; 880 bp->type = ttm_bo_type_kernel; 881 bp->no_wait_gpu = immediate; 882 if (vm->root.base.bo) 883 bp->resv = vm->root.base.bo->tbo.base.resv; 884} 885 886/** 887 * amdgpu_vm_alloc_pts - Allocate a specific page table 888 * 889 * @adev: amdgpu_device pointer 890 * @vm: VM to allocate page tables for 891 * @cursor: Which page table to allocate 892 * @immediate: use an immediate update 893 * 894 * Make sure a specific page table or directory is allocated. 895 * 896 * Returns: 897 * 1 if page table needed to be allocated, 0 if page table was already 898 * allocated, negative errno if an error occurred. 899 */ 900static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, 901 struct amdgpu_vm *vm, 902 struct amdgpu_vm_pt_cursor *cursor, 903 bool immediate) 904{ 905 struct amdgpu_vm_pt *entry = cursor->entry; 906 struct amdgpu_bo_param bp; 907 struct amdgpu_bo *pt; 908 int r; 909 910 if (cursor->level < AMDGPU_VM_PTB && !entry->entries) { 911 unsigned num_entries; 912 913 num_entries = amdgpu_vm_num_entries(adev, cursor->level); 914 entry->entries = kvmalloc_array(num_entries, 915 sizeof(*entry->entries), 916 GFP_KERNEL | __GFP_ZERO); 917 if (!entry->entries) 918 return -ENOMEM; 919 } 920 921 if (entry->base.bo) 922 return 0; 923 924 amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp); 925 926 r = amdgpu_bo_create(adev, &bp, &pt); 927 if (r) 928 return r; 929 930 /* Keep a reference to the root directory to avoid 931 * freeing them up in the wrong order. 932 */ 933 pt->parent = amdgpu_bo_ref(cursor->parent->base.bo); 934 amdgpu_vm_bo_base_init(&entry->base, vm, pt); 935 936 r = amdgpu_vm_clear_bo(adev, vm, pt, immediate); 937 if (r) 938 goto error_free_pt; 939 940 return 0; 941 942error_free_pt: 943 amdgpu_bo_unref(&pt->shadow); 944 amdgpu_bo_unref(&pt); 945 return r; 946} 947 948/** 949 * amdgpu_vm_free_table - fre one PD/PT 950 * 951 * @entry: PDE to free 952 */ 953static void amdgpu_vm_free_table(struct amdgpu_vm_pt *entry) 954{ 955 if (entry->base.bo) { 956 entry->base.bo->vm_bo = NULL; 957 list_del(&entry->base.vm_status); 958 amdgpu_bo_unref(&entry->base.bo->shadow); 959 amdgpu_bo_unref(&entry->base.bo); 960 } 961 kvfree(entry->entries); 962 entry->entries = NULL; 963} 964 965/** 966 * amdgpu_vm_free_pts - free PD/PT levels 967 * 968 * @adev: amdgpu device structure 969 * @vm: amdgpu vm structure 970 * @start: optional cursor where to start freeing PDs/PTs 971 * 972 * Free the page directory or page table level and all sub levels. 973 */ 974static void amdgpu_vm_free_pts(struct amdgpu_device *adev, 975 struct amdgpu_vm *vm, 976 struct amdgpu_vm_pt_cursor *start) 977{ 978 struct amdgpu_vm_pt_cursor cursor; 979 struct amdgpu_vm_pt *entry; 980 981 vm->bulk_moveable = false; 982 983 for_each_amdgpu_vm_pt_dfs_safe(adev, vm, start, cursor, entry) 984 amdgpu_vm_free_table(entry); 985 986 if (start) 987 amdgpu_vm_free_table(start->entry); 988} 989 990/** 991 * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug 992 * 993 * @adev: amdgpu_device pointer 994 */ 995void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) 996{ 997 const struct amdgpu_ip_block *ip_block; 998 bool has_compute_vm_bug; 999 struct amdgpu_ring *ring; 1000 int i; 1001 1002 has_compute_vm_bug = false; 1003 1004 ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); 1005 if (ip_block) { 1006 /* Compute has a VM bug for GFX version < 7. 1007 Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ 1008 if (ip_block->version->major <= 7) 1009 has_compute_vm_bug = true; 1010 else if (ip_block->version->major == 8) 1011 if (adev->gfx.mec_fw_version < 673) 1012 has_compute_vm_bug = true; 1013 } 1014 1015 for (i = 0; i < adev->num_rings; i++) { 1016 ring = adev->rings[i]; 1017 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) 1018 /* only compute rings */ 1019 ring->has_compute_vm_bug = has_compute_vm_bug; 1020 else 1021 ring->has_compute_vm_bug = false; 1022 } 1023} 1024 1025/** 1026 * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job. 1027 * 1028 * @ring: ring on which the job will be submitted 1029 * @job: job to submit 1030 * 1031 * Returns: 1032 * True if sync is needed. 1033 */ 1034bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, 1035 struct amdgpu_job *job) 1036{ 1037 struct amdgpu_device *adev = ring->adev; 1038 unsigned vmhub = ring->funcs->vmhub; 1039 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 1040 struct amdgpu_vmid *id; 1041 bool gds_switch_needed; 1042 bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; 1043 1044 if (job->vmid == 0) 1045 return false; 1046 id = &id_mgr->ids[job->vmid]; 1047 gds_switch_needed = ring->funcs->emit_gds_switch && ( 1048 id->gds_base != job->gds_base || 1049 id->gds_size != job->gds_size || 1050 id->gws_base != job->gws_base || 1051 id->gws_size != job->gws_size || 1052 id->oa_base != job->oa_base || 1053 id->oa_size != job->oa_size); 1054 1055 if (amdgpu_vmid_had_gpu_reset(adev, id)) 1056 return true; 1057 1058 return vm_flush_needed || gds_switch_needed; 1059} 1060 1061/** 1062 * amdgpu_vm_flush - hardware flush the vm 1063 * 1064 * @ring: ring to use for flush 1065 * @job: related job 1066 * @need_pipe_sync: is pipe sync needed 1067 * 1068 * Emit a VM flush when it is necessary. 1069 * 1070 * Returns: 1071 * 0 on success, errno otherwise. 1072 */ 1073int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, 1074 bool need_pipe_sync) 1075{ 1076 struct amdgpu_device *adev = ring->adev; 1077 unsigned vmhub = ring->funcs->vmhub; 1078 struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; 1079 struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; 1080 bool gds_switch_needed = ring->funcs->emit_gds_switch && ( 1081 id->gds_base != job->gds_base || 1082 id->gds_size != job->gds_size || 1083 id->gws_base != job->gws_base || 1084 id->gws_size != job->gws_size || 1085 id->oa_base != job->oa_base || 1086 id->oa_size != job->oa_size); 1087 bool vm_flush_needed = job->vm_needs_flush; 1088 struct dma_fence *fence = NULL; 1089 bool pasid_mapping_needed = false; 1090 unsigned patch_offset = 0; 1091 bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL)); 1092 int r; 1093 1094 if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid) 1095 adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); 1096 1097 if (amdgpu_vmid_had_gpu_reset(adev, id)) { 1098 gds_switch_needed = true; 1099 vm_flush_needed = true; 1100 pasid_mapping_needed = true; 1101 } 1102 1103 mutex_lock(&id_mgr->lock); 1104 if (id->pasid != job->pasid || !id->pasid_mapping || 1105 !dma_fence_is_signaled(id->pasid_mapping)) 1106 pasid_mapping_needed = true; 1107 mutex_unlock(&id_mgr->lock); 1108 1109 gds_switch_needed &= !!ring->funcs->emit_gds_switch; 1110 vm_flush_needed &= !!ring->funcs->emit_vm_flush && 1111 job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; 1112 pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && 1113 ring->funcs->emit_wreg; 1114 1115 if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) 1116 return 0; 1117 1118 if (ring->funcs->init_cond_exec) 1119 patch_offset = amdgpu_ring_init_cond_exec(ring); 1120 1121 if (need_pipe_sync) 1122 amdgpu_ring_emit_pipeline_sync(ring); 1123 1124 if (vm_flush_needed) { 1125 trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr); 1126 amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr); 1127 } 1128 1129 if (pasid_mapping_needed) 1130 amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); 1131 1132 if (vm_flush_needed || pasid_mapping_needed) { 1133 r = amdgpu_fence_emit(ring, &fence, 0); 1134 if (r) 1135 return r; 1136 } 1137 1138 if (vm_flush_needed) { 1139 mutex_lock(&id_mgr->lock); 1140 dma_fence_put(id->last_flush); 1141 id->last_flush = dma_fence_get(fence); 1142 id->current_gpu_reset_count = 1143 atomic_read(&adev->gpu_reset_counter); 1144 mutex_unlock(&id_mgr->lock); 1145 } 1146 1147 if (pasid_mapping_needed) { 1148 mutex_lock(&id_mgr->lock); 1149 id->pasid = job->pasid; 1150 dma_fence_put(id->pasid_mapping); 1151 id->pasid_mapping = dma_fence_get(fence); 1152 mutex_unlock(&id_mgr->lock); 1153 } 1154 dma_fence_put(fence); 1155 1156 if (ring->funcs->emit_gds_switch && gds_switch_needed) { 1157 id->gds_base = job->gds_base; 1158 id->gds_size = job->gds_size; 1159 id->gws_base = job->gws_base; 1160 id->gws_size = job->gws_size; 1161 id->oa_base = job->oa_base; 1162 id->oa_size = job->oa_size; 1163 amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, 1164 job->gds_size, job->gws_base, 1165 job->gws_size, job->oa_base, 1166 job->oa_size); 1167 } 1168 1169 if (ring->funcs->patch_cond_exec) 1170 amdgpu_ring_patch_cond_exec(ring, patch_offset); 1171 1172 /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ 1173 if (ring->funcs->emit_switch_buffer) { 1174 amdgpu_ring_emit_switch_buffer(ring); 1175 amdgpu_ring_emit_switch_buffer(ring); 1176 } 1177 return 0; 1178} 1179 1180/** 1181 * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo 1182 * 1183 * @vm: requested vm 1184 * @bo: requested buffer object 1185 * 1186 * Find @bo inside the requested vm. 1187 * Search inside the @bos vm list for the requested vm 1188 * Returns the found bo_va or NULL if none is found 1189 * 1190 * Object has to be reserved! 1191 * 1192 * Returns: 1193 * Found bo_va or NULL. 1194 */ 1195struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, 1196 struct amdgpu_bo *bo) 1197{ 1198 struct amdgpu_vm_bo_base *base; 1199 1200 for (base = bo->vm_bo; base; base = base->next) { 1201 if (base->vm != vm) 1202 continue; 1203 1204 return container_of(base, struct amdgpu_bo_va, base); 1205 } 1206 return NULL; 1207} 1208 1209/** 1210 * amdgpu_vm_map_gart - Resolve gart mapping of addr 1211 * 1212 * @pages_addr: optional DMA address to use for lookup 1213 * @addr: the unmapped addr 1214 * 1215 * Look up the physical address of the page that the pte resolves 1216 * to. 1217 * 1218 * Returns: 1219 * The pointer for the page table entry. 1220 */ 1221uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) 1222{ 1223 uint64_t result; 1224 1225 /* page table offset */ 1226 result = pages_addr[addr >> PAGE_SHIFT]; 1227 1228 /* in case cpu page size != gpu page size*/ 1229 result |= addr & (~PAGE_MASK); 1230 1231 result &= 0xFFFFFFFFFFFFF000ULL; 1232 1233 return result; 1234} 1235 1236/** 1237 * amdgpu_vm_update_pde - update a single level in the hierarchy 1238 * 1239 * @params: parameters for the update 1240 * @vm: requested vm 1241 * @entry: entry to update 1242 * 1243 * Makes sure the requested entry in parent is up to date. 1244 */ 1245static int amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, 1246 struct amdgpu_vm *vm, 1247 struct amdgpu_vm_pt *entry) 1248{ 1249 struct amdgpu_vm_pt *parent = amdgpu_vm_pt_parent(entry); 1250 struct amdgpu_bo *bo = parent->base.bo, *pbo; 1251 uint64_t pde, pt, flags; 1252 unsigned level; 1253 1254 for (level = 0, pbo = bo->parent; pbo; ++level) 1255 pbo = pbo->parent; 1256 1257 level += params->adev->vm_manager.root_level; 1258 amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags); 1259 pde = (entry - parent->entries) * 8; 1260 return vm->update_funcs->update(params, bo, pde, pt, 1, 0, flags); 1261} 1262 1263/** 1264 * amdgpu_vm_invalidate_pds - mark all PDs as invalid 1265 * 1266 * @adev: amdgpu_device pointer 1267 * @vm: related vm 1268 * 1269 * Mark all PD level as invalid after an error. 1270 */ 1271static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, 1272 struct amdgpu_vm *vm) 1273{ 1274 struct amdgpu_vm_pt_cursor cursor; 1275 struct amdgpu_vm_pt *entry; 1276 1277 for_each_amdgpu_vm_pt_dfs_safe(adev, vm, NULL, cursor, entry) 1278 if (entry->base.bo && !entry->base.moved) 1279 amdgpu_vm_bo_relocated(&entry->base); 1280} 1281 1282/** 1283 * amdgpu_vm_update_pdes - make sure that all directories are valid 1284 * 1285 * @adev: amdgpu_device pointer 1286 * @vm: requested vm 1287 * @immediate: submit immediately to the paging queue 1288 * 1289 * Makes sure all directories are up to date. 1290 * 1291 * Returns: 1292 * 0 for success, error for failure. 1293 */ 1294int amdgpu_vm_update_pdes(struct amdgpu_device *adev, 1295 struct amdgpu_vm *vm, bool immediate) 1296{ 1297 struct amdgpu_vm_update_params params; 1298 int r; 1299 1300 if (list_empty(&vm->relocated)) 1301 return 0; 1302 1303 memset(¶ms, 0, sizeof(params)); 1304 params.adev = adev; 1305 params.vm = vm; 1306 params.immediate = immediate; 1307 1308 r = vm->update_funcs->prepare(¶ms, NULL, AMDGPU_SYNC_EXPLICIT); 1309 if (r) 1310 return r; 1311 1312 while (!list_empty(&vm->relocated)) { 1313 struct amdgpu_vm_pt *entry; 1314 1315 entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt, 1316 base.vm_status); 1317 amdgpu_vm_bo_idle(&entry->base); 1318 1319 r = amdgpu_vm_update_pde(¶ms, vm, entry); 1320 if (r) 1321 goto error; 1322 } 1323 1324 r = vm->update_funcs->commit(¶ms, &vm->last_update); 1325 if (r) 1326 goto error; 1327 return 0; 1328 1329error: 1330 amdgpu_vm_invalidate_pds(adev, vm); 1331 return r; 1332} 1333 1334/* 1335 * amdgpu_vm_update_flags - figure out flags for PTE updates 1336 * 1337 * Make sure to set the right flags for the PTEs at the desired level. 1338 */ 1339static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, 1340 struct amdgpu_bo *bo, unsigned level, 1341 uint64_t pe, uint64_t addr, 1342 unsigned count, uint32_t incr, 1343 uint64_t flags) 1344 1345{ 1346 if (level != AMDGPU_VM_PTB) { 1347 flags |= AMDGPU_PDE_PTE; 1348 amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); 1349 1350 } else if (params->adev->asic_type >= CHIP_VEGA10 && 1351 !(flags & AMDGPU_PTE_VALID) && 1352 !(flags & AMDGPU_PTE_PRT)) { 1353 1354 /* Workaround for fault priority problem on GMC9 */ 1355 flags |= AMDGPU_PTE_EXECUTABLE; 1356 } 1357 1358 params->vm->update_funcs->update(params, bo, pe, addr, count, incr, 1359 flags); 1360} 1361 1362/** 1363 * amdgpu_vm_fragment - get fragment for PTEs 1364 * 1365 * @params: see amdgpu_vm_update_params definition 1366 * @start: first PTE to handle 1367 * @end: last PTE to handle 1368 * @flags: hw mapping flags 1369 * @frag: resulting fragment size 1370 * @frag_end: end of this fragment 1371 * 1372 * Returns the first possible fragment for the start and end address. 1373 */ 1374static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, 1375 uint64_t start, uint64_t end, uint64_t flags, 1376 unsigned int *frag, uint64_t *frag_end) 1377{ 1378 /** 1379 * The MC L1 TLB supports variable sized pages, based on a fragment 1380 * field in the PTE. When this field is set to a non-zero value, page 1381 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE 1382 * flags are considered valid for all PTEs within the fragment range 1383 * and corresponding mappings are assumed to be physically contiguous. 1384 * 1385 * The L1 TLB can store a single PTE for the whole fragment, 1386 * significantly increasing the space available for translation 1387 * caching. This leads to large improvements in throughput when the 1388 * TLB is under pressure. 1389 * 1390 * The L2 TLB distributes small and large fragments into two 1391 * asymmetric partitions. The large fragment cache is significantly 1392 * larger. Thus, we try to use large fragments wherever possible. 1393 * Userspace can support this by aligning virtual base address and 1394 * allocation size to the fragment size. 1395 * 1396 * Starting with Vega10 the fragment size only controls the L1. The L2 1397 * is now directly feed with small/huge/giant pages from the walker. 1398 */ 1399 unsigned max_frag; 1400 1401 if (params->adev->asic_type < CHIP_VEGA10) 1402 max_frag = params->adev->vm_manager.fragment_size; 1403 else 1404 max_frag = 31; 1405 1406 /* system pages are non continuously */ 1407 if (params->pages_addr) { 1408 *frag = 0; 1409 *frag_end = end; 1410 return; 1411 } 1412 1413 /* This intentionally wraps around if no bit is set */ 1414 *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1); 1415 if (*frag >= max_frag) { 1416 *frag = max_frag; 1417 *frag_end = end & ~((1ULL << max_frag) - 1); 1418 } else { 1419 *frag_end = start + (1 << *frag); 1420 } 1421} 1422 1423/** 1424 * amdgpu_vm_update_ptes - make sure that page tables are valid 1425 * 1426 * @params: see amdgpu_vm_update_params definition 1427 * @start: start of GPU address range 1428 * @end: end of GPU address range 1429 * @dst: destination address to map to, the next dst inside the function 1430 * @flags: mapping flags 1431 * 1432 * Update the page tables in the range @start - @end. 1433 * 1434 * Returns: 1435 * 0 for success, -EINVAL for failure. 1436 */ 1437static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, 1438 uint64_t start, uint64_t end, 1439 uint64_t dst, uint64_t flags) 1440{ 1441 struct amdgpu_device *adev = params->adev; 1442 struct amdgpu_vm_pt_cursor cursor; 1443 uint64_t frag_start = start, frag_end; 1444 unsigned int frag; 1445 int r; 1446 1447 /* figure out the initial fragment */ 1448 amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); 1449 1450 /* walk over the address space and update the PTs */ 1451 amdgpu_vm_pt_start(adev, params->vm, start, &cursor); 1452 while (cursor.pfn < end) { 1453 unsigned shift, parent_shift, mask; 1454 uint64_t incr, entry_end, pe_start; 1455 struct amdgpu_bo *pt; 1456 1457 if (!params->unlocked) { 1458 /* make sure that the page tables covering the 1459 * address range are actually allocated 1460 */ 1461 r = amdgpu_vm_alloc_pts(params->adev, params->vm, 1462 &cursor, params->immediate); 1463 if (r) 1464 return r; 1465 } 1466 1467 shift = amdgpu_vm_level_shift(adev, cursor.level); 1468 parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); 1469 if (params->unlocked) { 1470 /* Unlocked updates are only allowed on the leaves */ 1471 if (amdgpu_vm_pt_descendant(adev, &cursor)) 1472 continue; 1473 } else if (adev->asic_type < CHIP_VEGA10 && 1474 (flags & AMDGPU_PTE_VALID)) { 1475 /* No huge page support before GMC v9 */ 1476 if (cursor.level != AMDGPU_VM_PTB) { 1477 if (!amdgpu_vm_pt_descendant(adev, &cursor)) 1478 return -ENOENT; 1479 continue; 1480 } 1481 } else if (frag < shift) { 1482 /* We can't use this level when the fragment size is 1483 * smaller than the address shift. Go to the next 1484 * child entry and try again. 1485 */ 1486 if (amdgpu_vm_pt_descendant(adev, &cursor)) 1487 continue; 1488 } else if (frag >= parent_shift) { 1489 /* If the fragment size is even larger than the parent 1490 * shift we should go up one level and check it again. 1491 */ 1492 if (!amdgpu_vm_pt_ancestor(&cursor)) 1493 return -EINVAL; 1494 continue; 1495 } 1496 1497 pt = cursor.entry->base.bo; 1498 if (!pt) { 1499 /* We need all PDs and PTs for mapping something, */ 1500 if (flags & AMDGPU_PTE_VALID) 1501 return -ENOENT; 1502 1503 /* but unmapping something can happen at a higher 1504 * level. 1505 */ 1506 if (!amdgpu_vm_pt_ancestor(&cursor)) 1507 return -EINVAL; 1508 1509 pt = cursor.entry->base.bo; 1510 shift = parent_shift; 1511 frag_end = max(frag_end, ALIGN(frag_start + 1, 1512 1ULL << shift)); 1513 } 1514 1515 /* Looks good so far, calculate parameters for the update */ 1516 incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; 1517 mask = amdgpu_vm_entries_mask(adev, cursor.level); 1518 pe_start = ((cursor.pfn >> shift) & mask) * 8; 1519 entry_end = ((uint64_t)mask + 1) << shift; 1520 entry_end += cursor.pfn & ~(entry_end - 1); 1521 entry_end = min(entry_end, end); 1522 1523 do { 1524 struct amdgpu_vm *vm = params->vm; 1525 uint64_t upd_end = min(entry_end, frag_end); 1526 unsigned nptes = (upd_end - frag_start) >> shift; 1527 uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag); 1528 1529 /* This can happen when we set higher level PDs to 1530 * silent to stop fault floods. 1531 */ 1532 nptes = max(nptes, 1u); 1533 1534 trace_amdgpu_vm_update_ptes(params, frag_start, upd_end, 1535 nptes, dst, incr, upd_flags, 1536 vm->task_info.pid, 1537 vm->immediate.fence_context); 1538 amdgpu_vm_update_flags(params, pt, cursor.level, 1539 pe_start, dst, nptes, incr, 1540 upd_flags); 1541 1542 pe_start += nptes * 8; 1543 dst += nptes * incr; 1544 1545 frag_start = upd_end; 1546 if (frag_start >= frag_end) { 1547 /* figure out the next fragment */ 1548 amdgpu_vm_fragment(params, frag_start, end, 1549 flags, &frag, &frag_end); 1550 if (frag < shift) 1551 break; 1552 } 1553 } while (frag_start < entry_end); 1554 1555 if (amdgpu_vm_pt_descendant(adev, &cursor)) { 1556 /* Free all child entries. 1557 * Update the tables with the flags and addresses and free up subsequent 1558 * tables in the case of huge pages or freed up areas. 1559 * This is the maximum you can free, because all other page tables are not 1560 * completely covered by the range and so potentially still in use. 1561 */ 1562 while (cursor.pfn < frag_start) { 1563 amdgpu_vm_free_pts(adev, params->vm, &cursor); 1564 amdgpu_vm_pt_next(adev, &cursor); 1565 } 1566 1567 } else if (frag >= shift) { 1568 /* or just move on to the next on the same level. */ 1569 amdgpu_vm_pt_next(adev, &cursor); 1570 } 1571 } 1572 1573 return 0; 1574} 1575 1576/** 1577 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table 1578 * 1579 * @adev: amdgpu_device pointer 1580 * @vm: requested vm 1581 * @immediate: immediate submission in a page fault 1582 * @unlocked: unlocked invalidation during MM callback 1583 * @resv: fences we need to sync to 1584 * @start: start of mapped range 1585 * @last: last mapped entry 1586 * @flags: flags for the entries 1587 * @addr: addr to set the area to 1588 * @pages_addr: DMA addresses to use for mapping 1589 * @fence: optional resulting fence 1590 * 1591 * Fill in the page table entries between @start and @last. 1592 * 1593 * Returns: 1594 * 0 for success, -EINVAL for failure. 1595 */ 1596static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, 1597 struct amdgpu_vm *vm, bool immediate, 1598 bool unlocked, struct dma_resv *resv, 1599 uint64_t start, uint64_t last, 1600 uint64_t flags, uint64_t addr, 1601 dma_addr_t *pages_addr, 1602 struct dma_fence **fence) 1603{ 1604 struct amdgpu_vm_update_params params; 1605 enum amdgpu_sync_mode sync_mode; 1606 int r; 1607 1608 memset(¶ms, 0, sizeof(params)); 1609 params.adev = adev; 1610 params.vm = vm; 1611 params.immediate = immediate; 1612 params.pages_addr = pages_addr; 1613 params.unlocked = unlocked; 1614 1615 /* Implicitly sync to command submissions in the same VM before 1616 * unmapping. Sync to moving fences before mapping. 1617 */ 1618 if (!(flags & AMDGPU_PTE_VALID)) 1619 sync_mode = AMDGPU_SYNC_EQ_OWNER; 1620 else 1621 sync_mode = AMDGPU_SYNC_EXPLICIT; 1622 1623 amdgpu_vm_eviction_lock(vm); 1624 if (vm->evicting) { 1625 r = -EBUSY; 1626 goto error_unlock; 1627 } 1628 1629 if (!unlocked && !dma_fence_is_signaled(vm->last_unlocked)) { 1630 struct dma_fence *tmp = dma_fence_get_stub(); 1631 1632 amdgpu_bo_fence(vm->root.base.bo, vm->last_unlocked, true); 1633 swap(vm->last_unlocked, tmp); 1634 dma_fence_put(tmp); 1635 } 1636 1637 r = vm->update_funcs->prepare(¶ms, resv, sync_mode); 1638 if (r) 1639 goto error_unlock; 1640 1641 r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); 1642 if (r) 1643 goto error_unlock; 1644 1645 r = vm->update_funcs->commit(¶ms, fence); 1646 1647error_unlock: 1648 amdgpu_vm_eviction_unlock(vm); 1649 return r; 1650} 1651 1652/** 1653 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks 1654 * 1655 * @adev: amdgpu_device pointer 1656 * @resv: fences we need to sync to 1657 * @pages_addr: DMA addresses to use for mapping 1658 * @vm: requested vm 1659 * @mapping: mapped range and flags to use for the update 1660 * @flags: HW flags for the mapping 1661 * @bo_adev: amdgpu_device pointer that bo actually been allocated 1662 * @nodes: array of drm_mm_nodes with the MC addresses 1663 * @fence: optional resulting fence 1664 * 1665 * Split the mapping into smaller chunks so that each update fits 1666 * into a SDMA IB. 1667 * 1668 * Returns: 1669 * 0 for success, -EINVAL for failure. 1670 */ 1671static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, 1672 struct dma_resv *resv, 1673 dma_addr_t *pages_addr, 1674 struct amdgpu_vm *vm, 1675 struct amdgpu_bo_va_mapping *mapping, 1676 uint64_t flags, 1677 struct amdgpu_device *bo_adev, 1678 struct drm_mm_node *nodes, 1679 struct dma_fence **fence) 1680{ 1681 unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; 1682 uint64_t pfn, start = mapping->start; 1683 int r; 1684 1685 /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here 1686 * but in case of something, we filter the flags in first place 1687 */ 1688 if (!(mapping->flags & AMDGPU_PTE_READABLE)) 1689 flags &= ~AMDGPU_PTE_READABLE; 1690 if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) 1691 flags &= ~AMDGPU_PTE_WRITEABLE; 1692 1693 /* Apply ASIC specific mapping flags */ 1694 amdgpu_gmc_get_vm_pte(adev, mapping, &flags); 1695 1696 trace_amdgpu_vm_bo_update(mapping); 1697 1698 pfn = mapping->offset >> PAGE_SHIFT; 1699 if (nodes) { 1700 while (pfn >= nodes->size) { 1701 pfn -= nodes->size; 1702 ++nodes; 1703 } 1704 } 1705 1706 do { 1707 dma_addr_t *dma_addr = NULL; 1708 uint64_t max_entries; 1709 uint64_t addr, last; 1710 1711 max_entries = mapping->last - start + 1; 1712 if (nodes) { 1713 addr = nodes->start << PAGE_SHIFT; 1714 max_entries = min((nodes->size - pfn) * 1715 AMDGPU_GPU_PAGES_IN_CPU_PAGE, max_entries); 1716 } else { 1717 addr = 0; 1718 } 1719 1720 if (pages_addr) { 1721 uint64_t count; 1722 1723 for (count = 1; 1724 count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1725 ++count) { 1726 uint64_t idx = pfn + count; 1727 1728 if (pages_addr[idx] != 1729 (pages_addr[idx - 1] + PAGE_SIZE)) 1730 break; 1731 } 1732 1733 if (count < min_linear_pages) { 1734 addr = pfn << PAGE_SHIFT; 1735 dma_addr = pages_addr; 1736 } else { 1737 addr = pages_addr[pfn]; 1738 max_entries = count * 1739 AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1740 } 1741 1742 } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { 1743 addr += bo_adev->vm_manager.vram_base_offset; 1744 addr += pfn << PAGE_SHIFT; 1745 } 1746 1747 last = start + max_entries - 1; 1748 r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, 1749 start, last, flags, addr, 1750 dma_addr, fence); 1751 if (r) 1752 return r; 1753 1754 pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE; 1755 if (nodes && nodes->size == pfn) { 1756 pfn = 0; 1757 ++nodes; 1758 } 1759 start = last + 1; 1760 1761 } while (unlikely(start != mapping->last + 1)); 1762 1763 return 0; 1764} 1765 1766/** 1767 * amdgpu_vm_bo_update - update all BO mappings in the vm page table 1768 * 1769 * @adev: amdgpu_device pointer 1770 * @bo_va: requested BO and VM object 1771 * @clear: if true clear the entries 1772 * 1773 * Fill in the page table entries for @bo_va. 1774 * 1775 * Returns: 1776 * 0 for success, -EINVAL for failure. 1777 */ 1778int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, 1779 bool clear) 1780{ 1781 struct amdgpu_bo *bo = bo_va->base.bo; 1782 struct amdgpu_vm *vm = bo_va->base.vm; 1783 struct amdgpu_bo_va_mapping *mapping; 1784 dma_addr_t *pages_addr = NULL; 1785 struct ttm_resource *mem; 1786 struct drm_mm_node *nodes; 1787 struct dma_fence **last_update; 1788 struct dma_resv *resv; 1789 uint64_t flags; 1790 struct amdgpu_device *bo_adev = adev; 1791 int r; 1792 1793 if (clear || !bo) { 1794 mem = NULL; 1795 nodes = NULL; 1796 resv = vm->root.base.bo->tbo.base.resv; 1797 } else { 1798 struct drm_gem_object *obj = &bo->tbo.base; 1799 struct ttm_dma_tt *ttm; 1800 1801 resv = bo->tbo.base.resv; 1802 if (obj->import_attach && bo_va->is_xgmi) { 1803 struct dma_buf *dma_buf = obj->import_attach->dmabuf; 1804 struct drm_gem_object *gobj = dma_buf->priv; 1805 struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); 1806 1807 if (abo->tbo.mem.mem_type == TTM_PL_VRAM) 1808 bo = gem_to_amdgpu_bo(gobj); 1809 } 1810 mem = &bo->tbo.mem; 1811 nodes = mem->mm_node; 1812 if (mem->mem_type == TTM_PL_TT) { 1813 ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); 1814 pages_addr = ttm->dma_address; 1815 } 1816 } 1817 1818 if (bo) { 1819 flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem); 1820 1821 if (amdgpu_bo_encrypted(bo)) 1822 flags |= AMDGPU_PTE_TMZ; 1823 1824 bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); 1825 } else { 1826 flags = 0x0; 1827 } 1828 1829 if (clear || (bo && bo->tbo.base.resv == 1830 vm->root.base.bo->tbo.base.resv)) 1831 last_update = &vm->last_update; 1832 else 1833 last_update = &bo_va->last_pt_update; 1834 1835 if (!clear && bo_va->base.moved) { 1836 bo_va->base.moved = false; 1837 list_splice_init(&bo_va->valids, &bo_va->invalids); 1838 1839 } else if (bo_va->cleared != clear) { 1840 list_splice_init(&bo_va->valids, &bo_va->invalids); 1841 } 1842 1843 list_for_each_entry(mapping, &bo_va->invalids, list) { 1844 r = amdgpu_vm_bo_split_mapping(adev, resv, pages_addr, vm, 1845 mapping, flags, bo_adev, nodes, 1846 last_update); 1847 if (r) 1848 return r; 1849 } 1850 1851 /* If the BO is not in its preferred location add it back to 1852 * the evicted list so that it gets validated again on the 1853 * next command submission. 1854 */ 1855 if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { 1856 uint32_t mem_type = bo->tbo.mem.mem_type; 1857 1858 if (!(bo->preferred_domains & 1859 amdgpu_mem_type_to_domain(mem_type))) 1860 amdgpu_vm_bo_evicted(&bo_va->base); 1861 else 1862 amdgpu_vm_bo_idle(&bo_va->base); 1863 } else { 1864 amdgpu_vm_bo_done(&bo_va->base); 1865 } 1866 1867 list_splice_init(&bo_va->invalids, &bo_va->valids); 1868 bo_va->cleared = clear; 1869 1870 if (trace_amdgpu_vm_bo_mapping_enabled()) { 1871 list_for_each_entry(mapping, &bo_va->valids, list) 1872 trace_amdgpu_vm_bo_mapping(mapping); 1873 } 1874 1875 return 0; 1876} 1877 1878/** 1879 * amdgpu_vm_update_prt_state - update the global PRT state 1880 * 1881 * @adev: amdgpu_device pointer 1882 */ 1883static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev) 1884{ 1885 unsigned long flags; 1886 bool enable; 1887 1888 spin_lock_irqsave(&adev->vm_manager.prt_lock, flags); 1889 enable = !!atomic_read(&adev->vm_manager.num_prt_users); 1890 adev->gmc.gmc_funcs->set_prt(adev, enable); 1891 spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags); 1892} 1893 1894/** 1895 * amdgpu_vm_prt_get - add a PRT user 1896 * 1897 * @adev: amdgpu_device pointer 1898 */ 1899static void amdgpu_vm_prt_get(struct amdgpu_device *adev) 1900{ 1901 if (!adev->gmc.gmc_funcs->set_prt) 1902 return; 1903 1904 if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1) 1905 amdgpu_vm_update_prt_state(adev); 1906} 1907 1908/** 1909 * amdgpu_vm_prt_put - drop a PRT user 1910 * 1911 * @adev: amdgpu_device pointer 1912 */ 1913static void amdgpu_vm_prt_put(struct amdgpu_device *adev) 1914{ 1915 if (atomic_dec_return(&adev->vm_manager.num_prt_users) == 0) 1916 amdgpu_vm_update_prt_state(adev); 1917} 1918 1919/** 1920 * amdgpu_vm_prt_cb - callback for updating the PRT status 1921 * 1922 * @fence: fence for the callback 1923 * @_cb: the callback function 1924 */ 1925static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb) 1926{ 1927 struct amdgpu_prt_cb *cb = container_of(_cb, struct amdgpu_prt_cb, cb); 1928 1929 amdgpu_vm_prt_put(cb->adev); 1930 kfree(cb); 1931} 1932 1933/** 1934 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status 1935 * 1936 * @adev: amdgpu_device pointer 1937 * @fence: fence for the callback 1938 */ 1939static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev, 1940 struct dma_fence *fence) 1941{ 1942 struct amdgpu_prt_cb *cb; 1943 1944 if (!adev->gmc.gmc_funcs->set_prt) 1945 return; 1946 1947 cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL); 1948 if (!cb) { 1949 /* Last resort when we are OOM */ 1950 if (fence) 1951 dma_fence_wait(fence, false); 1952 1953 amdgpu_vm_prt_put(adev); 1954 } else { 1955 cb->adev = adev; 1956 if (!fence || dma_fence_add_callback(fence, &cb->cb, 1957 amdgpu_vm_prt_cb)) 1958 amdgpu_vm_prt_cb(fence, &cb->cb); 1959 } 1960} 1961 1962/** 1963 * amdgpu_vm_free_mapping - free a mapping 1964 * 1965 * @adev: amdgpu_device pointer 1966 * @vm: requested vm 1967 * @mapping: mapping to be freed 1968 * @fence: fence of the unmap operation 1969 * 1970 * Free a mapping and make sure we decrease the PRT usage count if applicable. 1971 */ 1972static void amdgpu_vm_free_mapping(struct amdgpu_device *adev, 1973 struct amdgpu_vm *vm, 1974 struct amdgpu_bo_va_mapping *mapping, 1975 struct dma_fence *fence) 1976{ 1977 if (mapping->flags & AMDGPU_PTE_PRT) 1978 amdgpu_vm_add_prt_cb(adev, fence); 1979 kfree(mapping); 1980} 1981 1982/** 1983 * amdgpu_vm_prt_fini - finish all prt mappings 1984 * 1985 * @adev: amdgpu_device pointer 1986 * @vm: requested vm 1987 * 1988 * Register a cleanup callback to disable PRT support after VM dies. 1989 */ 1990static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 1991{ 1992 struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; 1993 struct dma_fence *excl, **shared; 1994 unsigned i, shared_count; 1995 int r; 1996 1997 r = dma_resv_get_fences_rcu(resv, &excl, 1998 &shared_count, &shared); 1999 if (r) { 2000 /* Not enough memory to grab the fence list, as last resort 2001 * block for all the fences to complete. 2002 */ 2003 dma_resv_wait_timeout_rcu(resv, true, false, 2004 MAX_SCHEDULE_TIMEOUT); 2005 return; 2006 } 2007 2008 /* Add a callback for each fence in the reservation object */ 2009 amdgpu_vm_prt_get(adev); 2010 amdgpu_vm_add_prt_cb(adev, excl); 2011 2012 for (i = 0; i < shared_count; ++i) { 2013 amdgpu_vm_prt_get(adev); 2014 amdgpu_vm_add_prt_cb(adev, shared[i]); 2015 } 2016 2017 kfree(shared); 2018} 2019 2020/** 2021 * amdgpu_vm_clear_freed - clear freed BOs in the PT 2022 * 2023 * @adev: amdgpu_device pointer 2024 * @vm: requested vm 2025 * @fence: optional resulting fence (unchanged if no work needed to be done 2026 * or if an error occurred) 2027 * 2028 * Make sure all freed BOs are cleared in the PT. 2029 * PTs have to be reserved and mutex must be locked! 2030 * 2031 * Returns: 2032 * 0 for success. 2033 * 2034 */ 2035int amdgpu_vm_clear_freed(struct amdgpu_device *adev, 2036 struct amdgpu_vm *vm, 2037 struct dma_fence **fence) 2038{ 2039 struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; 2040 struct amdgpu_bo_va_mapping *mapping; 2041 uint64_t init_pte_value = 0; 2042 struct dma_fence *f = NULL; 2043 int r; 2044 2045 while (!list_empty(&vm->freed)) { 2046 mapping = list_first_entry(&vm->freed, 2047 struct amdgpu_bo_va_mapping, list); 2048 list_del(&mapping->list); 2049 2050 if (vm->pte_support_ats && 2051 mapping->start < AMDGPU_GMC_HOLE_START) 2052 init_pte_value = AMDGPU_PTE_DEFAULT_ATC; 2053 2054 r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv, 2055 mapping->start, mapping->last, 2056 init_pte_value, 0, NULL, &f); 2057 amdgpu_vm_free_mapping(adev, vm, mapping, f); 2058 if (r) { 2059 dma_fence_put(f); 2060 return r; 2061 } 2062 } 2063 2064 if (fence && f) { 2065 dma_fence_put(*fence); 2066 *fence = f; 2067 } else { 2068 dma_fence_put(f); 2069 } 2070 2071 return 0; 2072 2073} 2074 2075/** 2076 * amdgpu_vm_handle_moved - handle moved BOs in the PT 2077 * 2078 * @adev: amdgpu_device pointer 2079 * @vm: requested vm 2080 * 2081 * Make sure all BOs which are moved are updated in the PTs. 2082 * 2083 * Returns: 2084 * 0 for success. 2085 * 2086 * PTs have to be reserved! 2087 */ 2088int amdgpu_vm_handle_moved(struct amdgpu_device *adev, 2089 struct amdgpu_vm *vm) 2090{ 2091 struct amdgpu_bo_va *bo_va, *tmp; 2092 struct dma_resv *resv; 2093 bool clear; 2094 int r; 2095 2096 list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { 2097 /* Per VM BOs never need to bo cleared in the page tables */ 2098 r = amdgpu_vm_bo_update(adev, bo_va, false); 2099 if (r) 2100 return r; 2101 } 2102 2103 spin_lock(&vm->invalidated_lock); 2104 while (!list_empty(&vm->invalidated)) { 2105 bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, 2106 base.vm_status); 2107 resv = bo_va->base.bo->tbo.base.resv; 2108 spin_unlock(&vm->invalidated_lock); 2109 2110 /* Try to reserve the BO to avoid clearing its ptes */ 2111 if (!amdgpu_vm_debug && dma_resv_trylock(resv)) 2112 clear = false; 2113 /* Somebody else is using the BO right now */ 2114 else 2115 clear = true; 2116 2117 r = amdgpu_vm_bo_update(adev, bo_va, clear); 2118 if (r) 2119 return r; 2120 2121 if (!clear) 2122 dma_resv_unlock(resv); 2123 spin_lock(&vm->invalidated_lock); 2124 } 2125 spin_unlock(&vm->invalidated_lock); 2126 2127 return 0; 2128} 2129 2130/** 2131 * amdgpu_vm_bo_add - add a bo to a specific vm 2132 * 2133 * @adev: amdgpu_device pointer 2134 * @vm: requested vm 2135 * @bo: amdgpu buffer object 2136 * 2137 * Add @bo into the requested vm. 2138 * Add @bo to the list of bos associated with the vm 2139 * 2140 * Returns: 2141 * Newly added bo_va or NULL for failure 2142 * 2143 * Object has to be reserved! 2144 */ 2145struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, 2146 struct amdgpu_vm *vm, 2147 struct amdgpu_bo *bo) 2148{ 2149 struct amdgpu_bo_va *bo_va; 2150 2151 bo_va = kzalloc(sizeof(struct amdgpu_bo_va), GFP_KERNEL); 2152 if (bo_va == NULL) { 2153 return NULL; 2154 } 2155 amdgpu_vm_bo_base_init(&bo_va->base, vm, bo); 2156 2157 bo_va->ref_count = 1; 2158 INIT_LIST_HEAD(&bo_va->valids); 2159 INIT_LIST_HEAD(&bo_va->invalids); 2160 2161 if (!bo) 2162 return bo_va; 2163 2164 if (amdgpu_dmabuf_is_xgmi_accessible(adev, bo)) { 2165 bo_va->is_xgmi = true; 2166 /* Power up XGMI if it can be potentially used */ 2167 amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MAX_VEGA20); 2168 } 2169 2170 return bo_va; 2171} 2172 2173 2174/** 2175 * amdgpu_vm_bo_insert_mapping - insert a new mapping 2176 * 2177 * @adev: amdgpu_device pointer 2178 * @bo_va: bo_va to store the address 2179 * @mapping: the mapping to insert 2180 * 2181 * Insert a new mapping into all structures. 2182 */ 2183static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, 2184 struct amdgpu_bo_va *bo_va, 2185 struct amdgpu_bo_va_mapping *mapping) 2186{ 2187 struct amdgpu_vm *vm = bo_va->base.vm; 2188 struct amdgpu_bo *bo = bo_va->base.bo; 2189 2190 mapping->bo_va = bo_va; 2191 list_add(&mapping->list, &bo_va->invalids); 2192 amdgpu_vm_it_insert(mapping, &vm->va); 2193 2194 if (mapping->flags & AMDGPU_PTE_PRT) 2195 amdgpu_vm_prt_get(adev); 2196 2197 if (bo && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv && 2198 !bo_va->base.moved) { 2199 list_move(&bo_va->base.vm_status, &vm->moved); 2200 } 2201 trace_amdgpu_vm_bo_map(bo_va, mapping); 2202} 2203 2204/* Validate operation parameters to prevent potential abuse */ 2205static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev, 2206 struct amdgpu_bo *bo, 2207 uint64_t saddr, 2208 uint64_t offset, 2209 uint64_t size) 2210{ 2211 uint64_t tmp, lpfn; 2212 2213 if (saddr & AMDGPU_GPU_PAGE_MASK 2214 || offset & AMDGPU_GPU_PAGE_MASK 2215 || size & AMDGPU_GPU_PAGE_MASK) 2216 return -EINVAL; 2217 2218 if (check_add_overflow(saddr, size, &tmp) 2219 || check_add_overflow(offset, size, &tmp) 2220 || size == 0 /* which also leads to end < begin */) 2221 return -EINVAL; 2222 2223 /* make sure object fit at this offset */ 2224 if (bo && offset + size > amdgpu_bo_size(bo)) 2225 return -EINVAL; 2226 2227 /* Ensure last pfn not exceed max_pfn */ 2228 lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT; 2229 if (lpfn >= adev->vm_manager.max_pfn) 2230 return -EINVAL; 2231 2232 return 0; 2233} 2234 2235/** 2236 * amdgpu_vm_bo_map - map bo inside a vm 2237 * 2238 * @adev: amdgpu_device pointer 2239 * @bo_va: bo_va to store the address 2240 * @saddr: where to map the BO 2241 * @offset: requested offset in the BO 2242 * @size: BO size in bytes 2243 * @flags: attributes of pages (read/write/valid/etc.) 2244 * 2245 * Add a mapping of the BO at the specefied addr into the VM. 2246 * 2247 * Returns: 2248 * 0 for success, error for failure. 2249 * 2250 * Object has to be reserved and unreserved outside! 2251 */ 2252int amdgpu_vm_bo_map(struct amdgpu_device *adev, 2253 struct amdgpu_bo_va *bo_va, 2254 uint64_t saddr, uint64_t offset, 2255 uint64_t size, uint64_t flags) 2256{ 2257 struct amdgpu_bo_va_mapping *mapping, *tmp; 2258 struct amdgpu_bo *bo = bo_va->base.bo; 2259 struct amdgpu_vm *vm = bo_va->base.vm; 2260 uint64_t eaddr; 2261 int r; 2262 2263 r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); 2264 if (r) 2265 return r; 2266 2267 saddr /= AMDGPU_GPU_PAGE_SIZE; 2268 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; 2269 2270 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2271 if (tmp) { 2272 /* bo and tmp overlap, invalid addr */ 2273 dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with " 2274 "0x%010Lx-0x%010Lx\n", bo, saddr, eaddr, 2275 tmp->start, tmp->last + 1); 2276 return -EINVAL; 2277 } 2278 2279 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 2280 if (!mapping) 2281 return -ENOMEM; 2282 2283 mapping->start = saddr; 2284 mapping->last = eaddr; 2285 mapping->offset = offset; 2286 mapping->flags = flags; 2287 2288 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 2289 2290 return 0; 2291} 2292 2293/** 2294 * amdgpu_vm_bo_replace_map - map bo inside a vm, replacing existing mappings 2295 * 2296 * @adev: amdgpu_device pointer 2297 * @bo_va: bo_va to store the address 2298 * @saddr: where to map the BO 2299 * @offset: requested offset in the BO 2300 * @size: BO size in bytes 2301 * @flags: attributes of pages (read/write/valid/etc.) 2302 * 2303 * Add a mapping of the BO at the specefied addr into the VM. Replace existing 2304 * mappings as we do so. 2305 * 2306 * Returns: 2307 * 0 for success, error for failure. 2308 * 2309 * Object has to be reserved and unreserved outside! 2310 */ 2311int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev, 2312 struct amdgpu_bo_va *bo_va, 2313 uint64_t saddr, uint64_t offset, 2314 uint64_t size, uint64_t flags) 2315{ 2316 struct amdgpu_bo_va_mapping *mapping; 2317 struct amdgpu_bo *bo = bo_va->base.bo; 2318 uint64_t eaddr; 2319 int r; 2320 2321 r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size); 2322 if (r) 2323 return r; 2324 2325 /* Allocate all the needed memory */ 2326 mapping = kmalloc(sizeof(*mapping), GFP_KERNEL); 2327 if (!mapping) 2328 return -ENOMEM; 2329 2330 r = amdgpu_vm_bo_clear_mappings(adev, bo_va->base.vm, saddr, size); 2331 if (r) { 2332 kfree(mapping); 2333 return r; 2334 } 2335 2336 saddr /= AMDGPU_GPU_PAGE_SIZE; 2337 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; 2338 2339 mapping->start = saddr; 2340 mapping->last = eaddr; 2341 mapping->offset = offset; 2342 mapping->flags = flags; 2343 2344 amdgpu_vm_bo_insert_map(adev, bo_va, mapping); 2345 2346 return 0; 2347} 2348 2349/** 2350 * amdgpu_vm_bo_unmap - remove bo mapping from vm 2351 * 2352 * @adev: amdgpu_device pointer 2353 * @bo_va: bo_va to remove the address from 2354 * @saddr: where to the BO is mapped 2355 * 2356 * Remove a mapping of the BO at the specefied addr from the VM. 2357 * 2358 * Returns: 2359 * 0 for success, error for failure. 2360 * 2361 * Object has to be reserved and unreserved outside! 2362 */ 2363int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, 2364 struct amdgpu_bo_va *bo_va, 2365 uint64_t saddr) 2366{ 2367 struct amdgpu_bo_va_mapping *mapping; 2368 struct amdgpu_vm *vm = bo_va->base.vm; 2369 bool valid = true; 2370 2371 saddr /= AMDGPU_GPU_PAGE_SIZE; 2372 2373 list_for_each_entry(mapping, &bo_va->valids, list) { 2374 if (mapping->start == saddr) 2375 break; 2376 } 2377 2378 if (&mapping->list == &bo_va->valids) { 2379 valid = false; 2380 2381 list_for_each_entry(mapping, &bo_va->invalids, list) { 2382 if (mapping->start == saddr) 2383 break; 2384 } 2385 2386 if (&mapping->list == &bo_va->invalids) 2387 return -ENOENT; 2388 } 2389 2390 list_del(&mapping->list); 2391 amdgpu_vm_it_remove(mapping, &vm->va); 2392 mapping->bo_va = NULL; 2393 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2394 2395 if (valid) 2396 list_add(&mapping->list, &vm->freed); 2397 else 2398 amdgpu_vm_free_mapping(adev, vm, mapping, 2399 bo_va->last_pt_update); 2400 2401 return 0; 2402} 2403 2404/** 2405 * amdgpu_vm_bo_clear_mappings - remove all mappings in a specific range 2406 * 2407 * @adev: amdgpu_device pointer 2408 * @vm: VM structure to use 2409 * @saddr: start of the range 2410 * @size: size of the range 2411 * 2412 * Remove all mappings in a range, split them as appropriate. 2413 * 2414 * Returns: 2415 * 0 for success, error for failure. 2416 */ 2417int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, 2418 struct amdgpu_vm *vm, 2419 uint64_t saddr, uint64_t size) 2420{ 2421 struct amdgpu_bo_va_mapping *before, *after, *tmp, *next; 2422 LIST_HEAD(removed); 2423 uint64_t eaddr; 2424 int r; 2425 2426 r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size); 2427 if (r) 2428 return r; 2429 2430 saddr /= AMDGPU_GPU_PAGE_SIZE; 2431 eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE; 2432 2433 /* Allocate all the needed memory */ 2434 before = kzalloc(sizeof(*before), GFP_KERNEL); 2435 if (!before) 2436 return -ENOMEM; 2437 INIT_LIST_HEAD(&before->list); 2438 2439 after = kzalloc(sizeof(*after), GFP_KERNEL); 2440 if (!after) { 2441 kfree(before); 2442 return -ENOMEM; 2443 } 2444 INIT_LIST_HEAD(&after->list); 2445 2446 /* Now gather all removed mappings */ 2447 tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr); 2448 while (tmp) { 2449 /* Remember mapping split at the start */ 2450 if (tmp->start < saddr) { 2451 before->start = tmp->start; 2452 before->last = saddr - 1; 2453 before->offset = tmp->offset; 2454 before->flags = tmp->flags; 2455 before->bo_va = tmp->bo_va; 2456 list_add(&before->list, &tmp->bo_va->invalids); 2457 } 2458 2459 /* Remember mapping split at the end */ 2460 if (tmp->last > eaddr) { 2461 after->start = eaddr + 1; 2462 after->last = tmp->last; 2463 after->offset = tmp->offset; 2464 after->offset += (after->start - tmp->start) << PAGE_SHIFT; 2465 after->flags = tmp->flags; 2466 after->bo_va = tmp->bo_va; 2467 list_add(&after->list, &tmp->bo_va->invalids); 2468 } 2469 2470 list_del(&tmp->list); 2471 list_add(&tmp->list, &removed); 2472 2473 tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr); 2474 } 2475 2476 /* And free them up */ 2477 list_for_each_entry_safe(tmp, next, &removed, list) { 2478 amdgpu_vm_it_remove(tmp, &vm->va); 2479 list_del(&tmp->list); 2480 2481 if (tmp->start < saddr) 2482 tmp->start = saddr; 2483 if (tmp->last > eaddr) 2484 tmp->last = eaddr; 2485 2486 tmp->bo_va = NULL; 2487 list_add(&tmp->list, &vm->freed); 2488 trace_amdgpu_vm_bo_unmap(NULL, tmp); 2489 } 2490 2491 /* Insert partial mapping before the range */ 2492 if (!list_empty(&before->list)) { 2493 amdgpu_vm_it_insert(before, &vm->va); 2494 if (before->flags & AMDGPU_PTE_PRT) 2495 amdgpu_vm_prt_get(adev); 2496 } else { 2497 kfree(before); 2498 } 2499 2500 /* Insert partial mapping after the range */ 2501 if (!list_empty(&after->list)) { 2502 amdgpu_vm_it_insert(after, &vm->va); 2503 if (after->flags & AMDGPU_PTE_PRT) 2504 amdgpu_vm_prt_get(adev); 2505 } else { 2506 kfree(after); 2507 } 2508 2509 return 0; 2510} 2511 2512/** 2513 * amdgpu_vm_bo_lookup_mapping - find mapping by address 2514 * 2515 * @vm: the requested VM 2516 * @addr: the address 2517 * 2518 * Find a mapping by it's address. 2519 * 2520 * Returns: 2521 * The amdgpu_bo_va_mapping matching for addr or NULL 2522 * 2523 */ 2524struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm, 2525 uint64_t addr) 2526{ 2527 return amdgpu_vm_it_iter_first(&vm->va, addr, addr); 2528} 2529 2530/** 2531 * amdgpu_vm_bo_trace_cs - trace all reserved mappings 2532 * 2533 * @vm: the requested vm 2534 * @ticket: CS ticket 2535 * 2536 * Trace all mappings of BOs reserved during a command submission. 2537 */ 2538void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket) 2539{ 2540 struct amdgpu_bo_va_mapping *mapping; 2541 2542 if (!trace_amdgpu_vm_bo_cs_enabled()) 2543 return; 2544 2545 for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping; 2546 mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) { 2547 if (mapping->bo_va && mapping->bo_va->base.bo) { 2548 struct amdgpu_bo *bo; 2549 2550 bo = mapping->bo_va->base.bo; 2551 if (dma_resv_locking_ctx(bo->tbo.base.resv) != 2552 ticket) 2553 continue; 2554 } 2555 2556 trace_amdgpu_vm_bo_cs(mapping); 2557 } 2558} 2559 2560/** 2561 * amdgpu_vm_bo_rmv - remove a bo to a specific vm 2562 * 2563 * @adev: amdgpu_device pointer 2564 * @bo_va: requested bo_va 2565 * 2566 * Remove @bo_va->bo from the requested vm. 2567 * 2568 * Object have to be reserved! 2569 */ 2570void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, 2571 struct amdgpu_bo_va *bo_va) 2572{ 2573 struct amdgpu_bo_va_mapping *mapping, *next; 2574 struct amdgpu_bo *bo = bo_va->base.bo; 2575 struct amdgpu_vm *vm = bo_va->base.vm; 2576 struct amdgpu_vm_bo_base **base; 2577 2578 if (bo) { 2579 if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) 2580 vm->bulk_moveable = false; 2581 2582 for (base = &bo_va->base.bo->vm_bo; *base; 2583 base = &(*base)->next) { 2584 if (*base != &bo_va->base) 2585 continue; 2586 2587 *base = bo_va->base.next; 2588 break; 2589 } 2590 } 2591 2592 spin_lock(&vm->invalidated_lock); 2593 list_del(&bo_va->base.vm_status); 2594 spin_unlock(&vm->invalidated_lock); 2595 2596 list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { 2597 list_del(&mapping->list); 2598 amdgpu_vm_it_remove(mapping, &vm->va); 2599 mapping->bo_va = NULL; 2600 trace_amdgpu_vm_bo_unmap(bo_va, mapping); 2601 list_add(&mapping->list, &vm->freed); 2602 } 2603 list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) { 2604 list_del(&mapping->list); 2605 amdgpu_vm_it_remove(mapping, &vm->va); 2606 amdgpu_vm_free_mapping(adev, vm, mapping, 2607 bo_va->last_pt_update); 2608 } 2609 2610 dma_fence_put(bo_va->last_pt_update); 2611 2612 if (bo && bo_va->is_xgmi) 2613 amdgpu_xgmi_set_pstate(adev, AMDGPU_XGMI_PSTATE_MIN); 2614 2615 kfree(bo_va); 2616} 2617 2618/** 2619 * amdgpu_vm_evictable - check if we can evict a VM 2620 * 2621 * @bo: A page table of the VM. 2622 * 2623 * Check if it is possible to evict a VM. 2624 */ 2625bool amdgpu_vm_evictable(struct amdgpu_bo *bo) 2626{ 2627 struct amdgpu_vm_bo_base *bo_base = bo->vm_bo; 2628 2629 /* Page tables of a destroyed VM can go away immediately */ 2630 if (!bo_base || !bo_base->vm) 2631 return true; 2632 2633 /* Don't evict VM page tables while they are busy */ 2634 if (!dma_resv_test_signaled_rcu(bo->tbo.base.resv, true)) 2635 return false; 2636 2637 /* Try to block ongoing updates */ 2638 if (!amdgpu_vm_eviction_trylock(bo_base->vm)) 2639 return false; 2640 2641 /* Don't evict VM page tables while they are updated */ 2642 if (!dma_fence_is_signaled(bo_base->vm->last_unlocked)) { 2643 amdgpu_vm_eviction_unlock(bo_base->vm); 2644 return false; 2645 } 2646 2647 bo_base->vm->evicting = true; 2648 amdgpu_vm_eviction_unlock(bo_base->vm); 2649 return true; 2650} 2651 2652/** 2653 * amdgpu_vm_bo_invalidate - mark the bo as invalid 2654 * 2655 * @adev: amdgpu_device pointer 2656 * @bo: amdgpu buffer object 2657 * @evicted: is the BO evicted 2658 * 2659 * Mark @bo as invalid. 2660 */ 2661void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, 2662 struct amdgpu_bo *bo, bool evicted) 2663{ 2664 struct amdgpu_vm_bo_base *bo_base; 2665 2666 /* shadow bo doesn't have bo base, its validation needs its parent */ 2667 if (bo->parent && bo->parent->shadow == bo) 2668 bo = bo->parent; 2669 2670 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { 2671 struct amdgpu_vm *vm = bo_base->vm; 2672 2673 if (evicted && bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) { 2674 amdgpu_vm_bo_evicted(bo_base); 2675 continue; 2676 } 2677 2678 if (bo_base->moved) 2679 continue; 2680 bo_base->moved = true; 2681 2682 if (bo->tbo.type == ttm_bo_type_kernel) 2683 amdgpu_vm_bo_relocated(bo_base); 2684 else if (bo->tbo.base.resv == vm->root.base.bo->tbo.base.resv) 2685 amdgpu_vm_bo_moved(bo_base); 2686 else 2687 amdgpu_vm_bo_invalidated(bo_base); 2688 } 2689} 2690 2691/** 2692 * amdgpu_vm_get_block_size - calculate VM page table size as power of two 2693 * 2694 * @vm_size: VM size 2695 * 2696 * Returns: 2697 * VM page table as power of two 2698 */ 2699static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size) 2700{ 2701 /* Total bits covered by PD + PTs */ 2702 unsigned bits = ilog2(vm_size) + 18; 2703 2704 /* Make sure the PD is 4K in size up to 8GB address space. 2705 Above that split equal between PD and PTs */ 2706 if (vm_size <= 8) 2707 return (bits - 9); 2708 else 2709 return ((bits + 3) / 2); 2710} 2711 2712/** 2713 * amdgpu_vm_adjust_size - adjust vm size, block size and fragment size 2714 * 2715 * @adev: amdgpu_device pointer 2716 * @min_vm_size: the minimum vm size in GB if it's set auto 2717 * @fragment_size_default: Default PTE fragment size 2718 * @max_level: max VMPT level 2719 * @max_bits: max address space size in bits 2720 * 2721 */ 2722void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, 2723 uint32_t fragment_size_default, unsigned max_level, 2724 unsigned max_bits) 2725{ 2726 unsigned int max_size = 1 << (max_bits - 30); 2727 unsigned int vm_size; 2728 uint64_t tmp; 2729 2730 /* adjust vm size first */ 2731 if (amdgpu_vm_size != -1) { 2732 vm_size = amdgpu_vm_size; 2733 if (vm_size > max_size) { 2734 dev_warn(adev->dev, "VM size (%d) too large, max is %u GB\n", 2735 amdgpu_vm_size, max_size); 2736 vm_size = max_size; 2737 } 2738 } else { 2739 struct sysinfo si; 2740 unsigned int phys_ram_gb; 2741 2742 /* Optimal VM size depends on the amount of physical 2743 * RAM available. Underlying requirements and 2744 * assumptions: 2745 * 2746 * - Need to map system memory and VRAM from all GPUs 2747 * - VRAM from other GPUs not known here 2748 * - Assume VRAM <= system memory 2749 * - On GFX8 and older, VM space can be segmented for 2750 * different MTYPEs 2751 * - Need to allow room for fragmentation, guard pages etc. 2752 * 2753 * This adds up to a rough guess of system memory x3. 2754 * Round up to power of two to maximize the available 2755 * VM size with the given page table size. 2756 */ 2757 si_meminfo(&si); 2758 phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit + 2759 (1 << 30) - 1) >> 30; 2760 vm_size = roundup_pow_of_two( 2761 min(max(phys_ram_gb * 3, min_vm_size), max_size)); 2762 } 2763 2764 adev->vm_manager.max_pfn = (uint64_t)vm_size << 18; 2765 2766 tmp = roundup_pow_of_two(adev->vm_manager.max_pfn); 2767 if (amdgpu_vm_block_size != -1) 2768 tmp >>= amdgpu_vm_block_size - 9; 2769 tmp = DIV_ROUND_UP(fls64(tmp) - 1, 9) - 1; 2770 adev->vm_manager.num_level = min(max_level, (unsigned)tmp); 2771 switch (adev->vm_manager.num_level) { 2772 case 3: 2773 adev->vm_manager.root_level = AMDGPU_VM_PDB2; 2774 break; 2775 case 2: 2776 adev->vm_manager.root_level = AMDGPU_VM_PDB1; 2777 break; 2778 case 1: 2779 adev->vm_manager.root_level = AMDGPU_VM_PDB0; 2780 break; 2781 default: 2782 dev_err(adev->dev, "VMPT only supports 2~4+1 levels\n"); 2783 } 2784 /* block size depends on vm size and hw setup*/ 2785 if (amdgpu_vm_block_size != -1) 2786 adev->vm_manager.block_size = 2787 min((unsigned)amdgpu_vm_block_size, max_bits 2788 - AMDGPU_GPU_PAGE_SHIFT 2789 - 9 * adev->vm_manager.num_level); 2790 else if (adev->vm_manager.num_level > 1) 2791 adev->vm_manager.block_size = 9; 2792 else 2793 adev->vm_manager.block_size = amdgpu_vm_get_block_size(tmp); 2794 2795 if (amdgpu_vm_fragment_size == -1) 2796 adev->vm_manager.fragment_size = fragment_size_default; 2797 else 2798 adev->vm_manager.fragment_size = amdgpu_vm_fragment_size; 2799 2800 DRM_INFO("vm size is %u GB, %u levels, block size is %u-bit, fragment size is %u-bit\n", 2801 vm_size, adev->vm_manager.num_level + 1, 2802 adev->vm_manager.block_size, 2803 adev->vm_manager.fragment_size); 2804} 2805 2806/** 2807 * amdgpu_vm_wait_idle - wait for the VM to become idle 2808 * 2809 * @vm: VM object to wait for 2810 * @timeout: timeout to wait for VM to become idle 2811 */ 2812long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) 2813{ 2814 timeout = dma_resv_wait_timeout_rcu(vm->root.base.bo->tbo.base.resv, 2815 true, true, timeout); 2816 if (timeout <= 0) 2817 return timeout; 2818 2819 return dma_fence_wait_timeout(vm->last_unlocked, true, timeout); 2820} 2821 2822/** 2823 * amdgpu_vm_init - initialize a vm instance 2824 * 2825 * @adev: amdgpu_device pointer 2826 * @vm: requested vm 2827 * @vm_context: Indicates if it GFX or Compute context 2828 * @pasid: Process address space identifier 2829 * 2830 * Init @vm fields. 2831 * 2832 * Returns: 2833 * 0 for success, error for failure. 2834 */ 2835int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, 2836 int vm_context, u32 pasid) 2837{ 2838 struct amdgpu_bo_param bp; 2839 struct amdgpu_bo *root; 2840 int r, i; 2841 2842 vm->va = RB_ROOT_CACHED; 2843 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 2844 vm->reserved_vmid[i] = NULL; 2845 INIT_LIST_HEAD(&vm->evicted); 2846 INIT_LIST_HEAD(&vm->relocated); 2847 INIT_LIST_HEAD(&vm->moved); 2848 INIT_LIST_HEAD(&vm->idle); 2849 INIT_LIST_HEAD(&vm->invalidated); 2850 spin_lock_init(&vm->invalidated_lock); 2851 INIT_LIST_HEAD(&vm->freed); 2852 2853 2854 /* create scheduler entities for page table updates */ 2855 r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, 2856 adev->vm_manager.vm_pte_scheds, 2857 adev->vm_manager.vm_pte_num_scheds, NULL); 2858 if (r) 2859 return r; 2860 2861 r = drm_sched_entity_init(&vm->delayed, DRM_SCHED_PRIORITY_NORMAL, 2862 adev->vm_manager.vm_pte_scheds, 2863 adev->vm_manager.vm_pte_num_scheds, NULL); 2864 if (r) 2865 goto error_free_immediate; 2866 2867 vm->pte_support_ats = false; 2868 vm->is_compute_context = false; 2869 2870 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { 2871 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2872 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 2873 2874 if (adev->asic_type == CHIP_RAVEN) 2875 vm->pte_support_ats = true; 2876 } else { 2877 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 2878 AMDGPU_VM_USE_CPU_FOR_GFX); 2879 } 2880 DRM_DEBUG_DRIVER("VM update mode is %s\n", 2881 vm->use_cpu_for_update ? "CPU" : "SDMA"); 2882 WARN_ONCE((vm->use_cpu_for_update && 2883 !amdgpu_gmc_vram_full_visible(&adev->gmc)), 2884 "CPU update of VM recommended only for large BAR system\n"); 2885 2886 if (vm->use_cpu_for_update) 2887 vm->update_funcs = &amdgpu_vm_cpu_funcs; 2888 else 2889 vm->update_funcs = &amdgpu_vm_sdma_funcs; 2890 vm->last_update = NULL; 2891 vm->last_unlocked = dma_fence_get_stub(); 2892 2893 mutex_init(&vm->eviction_lock); 2894 vm->evicting = false; 2895 2896 amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); 2897 if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) 2898 bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; 2899 r = amdgpu_bo_create(adev, &bp, &root); 2900 if (r) 2901 goto error_free_delayed; 2902 2903 r = amdgpu_bo_reserve(root, true); 2904 if (r) 2905 goto error_free_root; 2906 2907 r = dma_resv_reserve_shared(root->tbo.base.resv, 1); 2908 if (r) 2909 goto error_unreserve; 2910 2911 amdgpu_vm_bo_base_init(&vm->root.base, vm, root); 2912 2913 r = amdgpu_vm_clear_bo(adev, vm, root, false); 2914 if (r) 2915 goto error_unreserve; 2916 2917 amdgpu_bo_unreserve(vm->root.base.bo); 2918 2919 if (pasid) { 2920 unsigned long flags; 2921 2922 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 2923 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, 2924 GFP_ATOMIC); 2925 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 2926 if (r < 0) 2927 goto error_free_root; 2928 2929 vm->pasid = pasid; 2930 } 2931 2932 INIT_KFIFO(vm->faults); 2933 2934 return 0; 2935 2936error_unreserve: 2937 amdgpu_bo_unreserve(vm->root.base.bo); 2938 2939error_free_root: 2940 amdgpu_bo_unref(&vm->root.base.bo->shadow); 2941 amdgpu_bo_unref(&vm->root.base.bo); 2942 vm->root.base.bo = NULL; 2943 2944error_free_delayed: 2945 dma_fence_put(vm->last_unlocked); 2946 drm_sched_entity_destroy(&vm->delayed); 2947 2948error_free_immediate: 2949 drm_sched_entity_destroy(&vm->immediate); 2950 2951 return r; 2952} 2953 2954/** 2955 * amdgpu_vm_check_clean_reserved - check if a VM is clean 2956 * 2957 * @adev: amdgpu_device pointer 2958 * @vm: the VM to check 2959 * 2960 * check all entries of the root PD, if any subsequent PDs are allocated, 2961 * it means there are page table creating and filling, and is no a clean 2962 * VM 2963 * 2964 * Returns: 2965 * 0 if this VM is clean 2966 */ 2967static int amdgpu_vm_check_clean_reserved(struct amdgpu_device *adev, 2968 struct amdgpu_vm *vm) 2969{ 2970 enum amdgpu_vm_level root = adev->vm_manager.root_level; 2971 unsigned int entries = amdgpu_vm_num_entries(adev, root); 2972 unsigned int i = 0; 2973 2974 if (!(vm->root.entries)) 2975 return 0; 2976 2977 for (i = 0; i < entries; i++) { 2978 if (vm->root.entries[i].base.bo) 2979 return -EINVAL; 2980 } 2981 2982 return 0; 2983} 2984 2985/** 2986 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM 2987 * 2988 * @adev: amdgpu_device pointer 2989 * @vm: requested vm 2990 * @pasid: pasid to use 2991 * 2992 * This only works on GFX VMs that don't have any BOs added and no 2993 * page tables allocated yet. 2994 * 2995 * Changes the following VM parameters: 2996 * - use_cpu_for_update 2997 * - pte_supports_ats 2998 * - pasid (old PASID is released, because compute manages its own PASIDs) 2999 * 3000 * Reinitializes the page directory to reflect the changed ATS 3001 * setting. 3002 * 3003 * Returns: 3004 * 0 for success, -errno for errors. 3005 */ 3006int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, 3007 u32 pasid) 3008{ 3009 bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); 3010 int r; 3011 3012 r = amdgpu_bo_reserve(vm->root.base.bo, true); 3013 if (r) 3014 return r; 3015 3016 /* Sanity checks */ 3017 r = amdgpu_vm_check_clean_reserved(adev, vm); 3018 if (r) 3019 goto unreserve_bo; 3020 3021 if (pasid) { 3022 unsigned long flags; 3023 3024 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 3025 r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, 3026 GFP_ATOMIC); 3027 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 3028 3029 if (r == -ENOSPC) 3030 goto unreserve_bo; 3031 r = 0; 3032 } 3033 3034 /* Check if PD needs to be reinitialized and do it before 3035 * changing any other state, in case it fails. 3036 */ 3037 if (pte_support_ats != vm->pte_support_ats) { 3038 vm->pte_support_ats = pte_support_ats; 3039 r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo, false); 3040 if (r) 3041 goto free_idr; 3042 } 3043 3044 /* Update VM state */ 3045 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & 3046 AMDGPU_VM_USE_CPU_FOR_COMPUTE); 3047 DRM_DEBUG_DRIVER("VM update mode is %s\n", 3048 vm->use_cpu_for_update ? "CPU" : "SDMA"); 3049 WARN_ONCE((vm->use_cpu_for_update && 3050 !amdgpu_gmc_vram_full_visible(&adev->gmc)), 3051 "CPU update of VM recommended only for large BAR system\n"); 3052 3053 if (vm->use_cpu_for_update) { 3054 /* Sync with last SDMA update/clear before switching to CPU */ 3055 r = amdgpu_bo_sync_wait(vm->root.base.bo, 3056 AMDGPU_FENCE_OWNER_UNDEFINED, true); 3057 if (r) 3058 goto free_idr; 3059 3060 vm->update_funcs = &amdgpu_vm_cpu_funcs; 3061 } else { 3062 vm->update_funcs = &amdgpu_vm_sdma_funcs; 3063 } 3064 dma_fence_put(vm->last_update); 3065 vm->last_update = NULL; 3066 vm->is_compute_context = true; 3067 3068 if (vm->pasid) { 3069 unsigned long flags; 3070 3071 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 3072 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 3073 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 3074 3075 /* Free the original amdgpu allocated pasid 3076 * Will be replaced with kfd allocated pasid 3077 */ 3078 amdgpu_pasid_free(vm->pasid); 3079 vm->pasid = 0; 3080 } 3081 3082 /* Free the shadow bo for compute VM */ 3083 amdgpu_bo_unref(&vm->root.base.bo->shadow); 3084 3085 if (pasid) 3086 vm->pasid = pasid; 3087 3088 goto unreserve_bo; 3089 3090free_idr: 3091 if (pasid) { 3092 unsigned long flags; 3093 3094 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 3095 idr_remove(&adev->vm_manager.pasid_idr, pasid); 3096 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 3097 } 3098unreserve_bo: 3099 amdgpu_bo_unreserve(vm->root.base.bo); 3100 return r; 3101} 3102 3103/** 3104 * amdgpu_vm_release_compute - release a compute vm 3105 * @adev: amdgpu_device pointer 3106 * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute 3107 * 3108 * This is a correspondant of amdgpu_vm_make_compute. It decouples compute 3109 * pasid from vm. Compute should stop use of vm after this call. 3110 */ 3111void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) 3112{ 3113 if (vm->pasid) { 3114 unsigned long flags; 3115 3116 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 3117 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 3118 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 3119 } 3120 vm->pasid = 0; 3121 vm->is_compute_context = false; 3122} 3123 3124/** 3125 * amdgpu_vm_fini - tear down a vm instance 3126 * 3127 * @adev: amdgpu_device pointer 3128 * @vm: requested vm 3129 * 3130 * Tear down @vm. 3131 * Unbind the VM and remove all bos from the vm bo list 3132 */ 3133void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) 3134{ 3135 struct amdgpu_bo_va_mapping *mapping, *tmp; 3136 bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; 3137 struct amdgpu_bo *root; 3138 int i; 3139 3140 amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); 3141 3142 root = amdgpu_bo_ref(vm->root.base.bo); 3143 amdgpu_bo_reserve(root, true); 3144 if (vm->pasid) { 3145 unsigned long flags; 3146 3147 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 3148 idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); 3149 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 3150 vm->pasid = 0; 3151 } 3152 3153 dma_fence_wait(vm->last_unlocked, false); 3154 dma_fence_put(vm->last_unlocked); 3155 3156 list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { 3157 if (mapping->flags & AMDGPU_PTE_PRT && prt_fini_needed) { 3158 amdgpu_vm_prt_fini(adev, vm); 3159 prt_fini_needed = false; 3160 } 3161 3162 list_del(&mapping->list); 3163 amdgpu_vm_free_mapping(adev, vm, mapping, NULL); 3164 } 3165 3166 amdgpu_vm_free_pts(adev, vm, NULL); 3167 amdgpu_bo_unreserve(root); 3168 amdgpu_bo_unref(&root); 3169 WARN_ON(vm->root.base.bo); 3170 3171 drm_sched_entity_destroy(&vm->immediate); 3172 drm_sched_entity_destroy(&vm->delayed); 3173 3174 if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { 3175 dev_err(adev->dev, "still active bo inside vm\n"); 3176 } 3177 rbtree_postorder_for_each_entry_safe(mapping, tmp, 3178 &vm->va.rb_root, rb) { 3179 /* Don't remove the mapping here, we don't want to trigger a 3180 * rebalance and the tree is about to be destroyed anyway. 3181 */ 3182 list_del(&mapping->list); 3183 kfree(mapping); 3184 } 3185 3186 dma_fence_put(vm->last_update); 3187 for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) 3188 amdgpu_vmid_free_reserved(adev, vm, i); 3189} 3190 3191/** 3192 * amdgpu_vm_manager_init - init the VM manager 3193 * 3194 * @adev: amdgpu_device pointer 3195 * 3196 * Initialize the VM manager structures 3197 */ 3198void amdgpu_vm_manager_init(struct amdgpu_device *adev) 3199{ 3200 unsigned i; 3201 3202 /* Concurrent flushes are only possible starting with Vega10 and 3203 * are broken on Navi10 and Navi14. 3204 */ 3205 adev->vm_manager.concurrent_flush = !(adev->asic_type < CHIP_VEGA10 || 3206 adev->asic_type == CHIP_NAVI10 || 3207 adev->asic_type == CHIP_NAVI14); 3208 amdgpu_vmid_mgr_init(adev); 3209 3210 adev->vm_manager.fence_context = 3211 dma_fence_context_alloc(AMDGPU_MAX_RINGS); 3212 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) 3213 adev->vm_manager.seqno[i] = 0; 3214 3215 spin_lock_init(&adev->vm_manager.prt_lock); 3216 atomic_set(&adev->vm_manager.num_prt_users, 0); 3217 3218 /* If not overridden by the user, by default, only in large BAR systems 3219 * Compute VM tables will be updated by CPU 3220 */ 3221#ifdef CONFIG_X86_64 3222 if (amdgpu_vm_update_mode == -1) { 3223 /* For asic with VF MMIO access protection 3224 * avoid using CPU for VM table updates 3225 */ 3226 if (amdgpu_gmc_vram_full_visible(&adev->gmc) && 3227 !amdgpu_sriov_vf_mmio_access_protection(adev)) 3228 adev->vm_manager.vm_update_mode = 3229 AMDGPU_VM_USE_CPU_FOR_COMPUTE; 3230 else 3231 adev->vm_manager.vm_update_mode = 0; 3232 } else 3233 adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; 3234#else 3235 adev->vm_manager.vm_update_mode = 0; 3236#endif 3237 3238 idr_init(&adev->vm_manager.pasid_idr); 3239 spin_lock_init(&adev->vm_manager.pasid_lock); 3240} 3241 3242/** 3243 * amdgpu_vm_manager_fini - cleanup VM manager 3244 * 3245 * @adev: amdgpu_device pointer 3246 * 3247 * Cleanup the VM manager and free resources. 3248 */ 3249void amdgpu_vm_manager_fini(struct amdgpu_device *adev) 3250{ 3251 WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr)); 3252 idr_destroy(&adev->vm_manager.pasid_idr); 3253 3254 amdgpu_vmid_mgr_fini(adev); 3255} 3256 3257/** 3258 * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs. 3259 * 3260 * @dev: drm device pointer 3261 * @data: drm_amdgpu_vm 3262 * @filp: drm file pointer 3263 * 3264 * Returns: 3265 * 0 for success, -errno for errors. 3266 */ 3267int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) 3268{ 3269 union drm_amdgpu_vm *args = data; 3270 struct amdgpu_device *adev = drm_to_adev(dev); 3271 struct amdgpu_fpriv *fpriv = filp->driver_priv; 3272 long timeout = msecs_to_jiffies(2000); 3273 int r; 3274 3275 /* No valid flags defined yet */ 3276 if (args->in.flags) 3277 return -EINVAL; 3278 3279 switch (args->in.op) { 3280 case AMDGPU_VM_OP_RESERVE_VMID: 3281 /* We only have requirement to reserve vmid from gfxhub */ 3282 r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm, 3283 AMDGPU_GFXHUB_0); 3284 if (r) 3285 return r; 3286 break; 3287 case AMDGPU_VM_OP_UNRESERVE_VMID: 3288 if (amdgpu_sriov_runtime(adev)) 3289 timeout = 8 * timeout; 3290 3291 /* Wait vm idle to make sure the vmid set in SPM_VMID is 3292 * not referenced anymore. 3293 */ 3294 r = amdgpu_bo_reserve(fpriv->vm.root.base.bo, true); 3295 if (r) 3296 return r; 3297 3298 r = amdgpu_vm_wait_idle(&fpriv->vm, timeout); 3299 if (r < 0) 3300 return r; 3301 3302 amdgpu_bo_unreserve(fpriv->vm.root.base.bo); 3303 amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); 3304 break; 3305 default: 3306 return -EINVAL; 3307 } 3308 3309 return 0; 3310} 3311 3312/** 3313 * amdgpu_vm_get_task_info - Extracts task info for a PASID. 3314 * 3315 * @adev: drm device pointer 3316 * @pasid: PASID identifier for VM 3317 * @task_info: task_info to fill. 3318 */ 3319void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 pasid, 3320 struct amdgpu_task_info *task_info) 3321{ 3322 struct amdgpu_vm *vm; 3323 unsigned long flags; 3324 3325 spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); 3326 3327 vm = idr_find(&adev->vm_manager.pasid_idr, pasid); 3328 if (vm) 3329 *task_info = vm->task_info; 3330 3331 spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); 3332} 3333 3334/** 3335 * amdgpu_vm_set_task_info - Sets VMs task info. 3336 * 3337 * @vm: vm for which to set the info 3338 */ 3339void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) 3340{ 3341 if (vm->task_info.pid) 3342 return; 3343 3344 vm->task_info.pid = current->pid; 3345 get_task_comm(vm->task_info.task_name, current); 3346 3347 if (current->group_leader->mm != current->mm) 3348 return; 3349 3350 vm->task_info.tgid = current->group_leader->pid; 3351 get_task_comm(vm->task_info.process_name, current->group_leader); 3352} 3353 3354/** 3355 * amdgpu_vm_handle_fault - graceful handling of VM faults. 3356 * @adev: amdgpu device pointer 3357 * @pasid: PASID of the VM 3358 * @addr: Address of the fault 3359 * 3360 * Try to gracefully handle a VM fault. Return true if the fault was handled and 3361 * shouldn't be reported any more. 3362 */ 3363bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, 3364 uint64_t addr) 3365{ 3366 struct amdgpu_bo *root; 3367 uint64_t value, flags; 3368 struct amdgpu_vm *vm; 3369 long r; 3370 3371 spin_lock(&adev->vm_manager.pasid_lock); 3372 vm = idr_find(&adev->vm_manager.pasid_idr, pasid); 3373 if (vm) 3374 root = amdgpu_bo_ref(vm->root.base.bo); 3375 else 3376 root = NULL; 3377 spin_unlock(&adev->vm_manager.pasid_lock); 3378 3379 if (!root) 3380 return false; 3381 3382 r = amdgpu_bo_reserve(root, true); 3383 if (r) 3384 goto error_unref; 3385 3386 /* Double check that the VM still exists */ 3387 spin_lock(&adev->vm_manager.pasid_lock); 3388 vm = idr_find(&adev->vm_manager.pasid_idr, pasid); 3389 if (vm && vm->root.base.bo != root) 3390 vm = NULL; 3391 spin_unlock(&adev->vm_manager.pasid_lock); 3392 if (!vm) 3393 goto error_unlock; 3394 3395 addr /= AMDGPU_GPU_PAGE_SIZE; 3396 flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | 3397 AMDGPU_PTE_SYSTEM; 3398 3399 if (vm->is_compute_context) { 3400 /* Intentionally setting invalid PTE flag 3401 * combination to force a no-retry-fault 3402 */ 3403 flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | 3404 AMDGPU_PTE_TF; 3405 value = 0; 3406 3407 } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { 3408 /* Redirect the access to the dummy page */ 3409 value = adev->dummy_page_addr; 3410 flags |= AMDGPU_PTE_EXECUTABLE | AMDGPU_PTE_READABLE | 3411 AMDGPU_PTE_WRITEABLE; 3412 3413 } else { 3414 /* Let the hw retry silently on the PTE */ 3415 value = 0; 3416 } 3417 3418 r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr, 3419 addr + 1, flags, value, NULL, NULL); 3420 if (r) 3421 goto error_unlock; 3422 3423 r = amdgpu_vm_update_pdes(adev, vm, true); 3424 3425error_unlock: 3426 amdgpu_bo_unreserve(root); 3427 if (r < 0) 3428 DRM_ERROR("Can't handle page fault (%ld)\n", r); 3429 3430error_unref: 3431 amdgpu_bo_unref(&root); 3432 3433 return false; 3434} 3435