1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include <uapi/misc/habanalabs.h> 9#include "habanalabs.h" 10#include "../include/hw_ip/mmu/mmu_general.h" 11 12#include <linux/uaccess.h> 13#include <linux/slab.h> 14#include <linux/genalloc.h> 15 16#define HL_MMU_DEBUG 0 17 18/* 19 * The va ranges in context object contain a list with the available chunks of 20 * device virtual memory. 21 * There is one range for host allocations and one for DRAM allocations. 22 * 23 * On initialization each range contains one chunk of all of its available 24 * virtual range which is a half of the total device virtual range. 25 * 26 * On each mapping of physical pages, a suitable virtual range chunk (with a 27 * minimum size) is selected from the list. If the chunk size equals the 28 * requested size, the chunk is returned. Otherwise, the chunk is split into 29 * two chunks - one to return as result and a remainder to stay in the list. 30 * 31 * On each Unmapping of a virtual address, the relevant virtual chunk is 32 * returned to the list. The chunk is added to the list and if its edges match 33 * the edges of the adjacent chunks (means a contiguous chunk can be created), 34 * the chunks are merged. 35 * 36 * On finish, the list is checked to have only one chunk of all the relevant 37 * virtual range (which is a half of the device total virtual range). 38 * If not (means not all mappings were unmapped), a warning is printed. 39 */ 40 41/* 42 * alloc_device_memory - allocate device memory 43 * 44 * @ctx : current context 45 * @args : host parameters containing the requested size 46 * @ret_handle : result handle 47 * 48 * This function does the following: 49 * - Allocate the requested size rounded up to 2MB pages 50 * - Return unique handle 51 */ 52static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, 53 u32 *ret_handle) 54{ 55 struct hl_device *hdev = ctx->hdev; 56 struct hl_vm *vm = &hdev->vm; 57 struct hl_vm_phys_pg_pack *phys_pg_pack; 58 u64 paddr = 0, total_size, num_pgs, i; 59 u32 num_curr_pgs, page_size, page_shift; 60 int handle, rc; 61 bool contiguous; 62 63 num_curr_pgs = 0; 64 page_size = hdev->asic_prop.dram_page_size; 65 page_shift = __ffs(page_size); 66 num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift; 67 total_size = num_pgs << page_shift; 68 69 if (!total_size) { 70 dev_err(hdev->dev, "Cannot allocate 0 bytes\n"); 71 return -EINVAL; 72 } 73 74 contiguous = args->flags & HL_MEM_CONTIGUOUS; 75 76 if (contiguous) { 77 paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); 78 if (!paddr) { 79 dev_err(hdev->dev, 80 "failed to allocate %llu contiguous pages with total size of %llu\n", 81 num_pgs, total_size); 82 return -ENOMEM; 83 } 84 } 85 86 phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); 87 if (!phys_pg_pack) { 88 rc = -ENOMEM; 89 goto pages_pack_err; 90 } 91 92 phys_pg_pack->vm_type = VM_TYPE_PHYS_PACK; 93 phys_pg_pack->asid = ctx->asid; 94 phys_pg_pack->npages = num_pgs; 95 phys_pg_pack->page_size = page_size; 96 phys_pg_pack->total_size = total_size; 97 phys_pg_pack->flags = args->flags; 98 phys_pg_pack->contiguous = contiguous; 99 100 phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); 101 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { 102 rc = -ENOMEM; 103 goto pages_arr_err; 104 } 105 106 if (phys_pg_pack->contiguous) { 107 for (i = 0 ; i < num_pgs ; i++) 108 phys_pg_pack->pages[i] = paddr + i * page_size; 109 } else { 110 for (i = 0 ; i < num_pgs ; i++) { 111 phys_pg_pack->pages[i] = (u64) gen_pool_alloc( 112 vm->dram_pg_pool, 113 page_size); 114 if (!phys_pg_pack->pages[i]) { 115 dev_err(hdev->dev, 116 "Failed to allocate device memory (out of memory)\n"); 117 rc = -ENOMEM; 118 goto page_err; 119 } 120 121 num_curr_pgs++; 122 } 123 } 124 125 spin_lock(&vm->idr_lock); 126 handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0, 127 GFP_ATOMIC); 128 spin_unlock(&vm->idr_lock); 129 130 if (handle < 0) { 131 dev_err(hdev->dev, "Failed to get handle for page\n"); 132 rc = -EFAULT; 133 goto idr_err; 134 } 135 136 for (i = 0 ; i < num_pgs ; i++) 137 kref_get(&vm->dram_pg_pool_refcount); 138 139 phys_pg_pack->handle = handle; 140 141 atomic64_add(phys_pg_pack->total_size, &ctx->dram_phys_mem); 142 atomic64_add(phys_pg_pack->total_size, &hdev->dram_used_mem); 143 144 *ret_handle = handle; 145 146 return 0; 147 148idr_err: 149page_err: 150 if (!phys_pg_pack->contiguous) 151 for (i = 0 ; i < num_curr_pgs ; i++) 152 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], 153 page_size); 154 155 kvfree(phys_pg_pack->pages); 156pages_arr_err: 157 kfree(phys_pg_pack); 158pages_pack_err: 159 if (contiguous) 160 gen_pool_free(vm->dram_pg_pool, paddr, total_size); 161 162 return rc; 163} 164 165/* 166 * dma_map_host_va - DMA mapping of the given host virtual address. 167 * @hdev: habanalabs device structure 168 * @addr: the host virtual address of the memory area 169 * @size: the size of the memory area 170 * @p_userptr: pointer to result userptr structure 171 * 172 * This function does the following: 173 * - Allocate userptr structure 174 * - Pin the given host memory using the userptr structure 175 * - Perform DMA mapping to have the DMA addresses of the pages 176 */ 177static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size, 178 struct hl_userptr **p_userptr) 179{ 180 struct hl_userptr *userptr; 181 int rc; 182 183 userptr = kzalloc(sizeof(*userptr), GFP_KERNEL); 184 if (!userptr) { 185 rc = -ENOMEM; 186 goto userptr_err; 187 } 188 189 rc = hl_pin_host_memory(hdev, addr, size, userptr); 190 if (rc) { 191 dev_err(hdev->dev, "Failed to pin host memory\n"); 192 goto pin_err; 193 } 194 195 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, 196 userptr->sgt->nents, DMA_BIDIRECTIONAL); 197 if (rc) { 198 dev_err(hdev->dev, "failed to map sgt with DMA region\n"); 199 goto dma_map_err; 200 } 201 202 userptr->dma_mapped = true; 203 userptr->dir = DMA_BIDIRECTIONAL; 204 userptr->vm_type = VM_TYPE_USERPTR; 205 206 *p_userptr = userptr; 207 208 return 0; 209 210dma_map_err: 211 hl_unpin_host_memory(hdev, userptr); 212pin_err: 213 kfree(userptr); 214userptr_err: 215 216 return rc; 217} 218 219/* 220 * dma_unmap_host_va - DMA unmapping of the given host virtual address. 221 * @hdev: habanalabs device structure 222 * @userptr: userptr to free 223 * 224 * This function does the following: 225 * - Unpins the physical pages 226 * - Frees the userptr structure 227 */ 228static void dma_unmap_host_va(struct hl_device *hdev, 229 struct hl_userptr *userptr) 230{ 231 hl_unpin_host_memory(hdev, userptr); 232 kfree(userptr); 233} 234 235/* 236 * dram_pg_pool_do_release - free DRAM pages pool 237 * 238 * @ref : pointer to reference object 239 * 240 * This function does the following: 241 * - Frees the idr structure of physical pages handles 242 * - Frees the generic pool of DRAM physical pages 243 */ 244static void dram_pg_pool_do_release(struct kref *ref) 245{ 246 struct hl_vm *vm = container_of(ref, struct hl_vm, 247 dram_pg_pool_refcount); 248 249 /* 250 * free the idr here as only here we know for sure that there are no 251 * allocated physical pages and hence there are no handles in use 252 */ 253 idr_destroy(&vm->phys_pg_pack_handles); 254 gen_pool_destroy(vm->dram_pg_pool); 255} 256 257/* 258 * free_phys_pg_pack - free physical page pack 259 * @hdev: habanalabs device structure 260 * @phys_pg_pack: physical page pack to free 261 * 262 * This function does the following: 263 * - For DRAM memory only, iterate over the pack and free each physical block 264 * structure by returning it to the general pool 265 * - Free the hl_vm_phys_pg_pack structure 266 */ 267static void free_phys_pg_pack(struct hl_device *hdev, 268 struct hl_vm_phys_pg_pack *phys_pg_pack) 269{ 270 struct hl_vm *vm = &hdev->vm; 271 u64 i; 272 273 if (!phys_pg_pack->created_from_userptr) { 274 if (phys_pg_pack->contiguous) { 275 gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[0], 276 phys_pg_pack->total_size); 277 278 for (i = 0; i < phys_pg_pack->npages ; i++) 279 kref_put(&vm->dram_pg_pool_refcount, 280 dram_pg_pool_do_release); 281 } else { 282 for (i = 0 ; i < phys_pg_pack->npages ; i++) { 283 gen_pool_free(vm->dram_pg_pool, 284 phys_pg_pack->pages[i], 285 phys_pg_pack->page_size); 286 kref_put(&vm->dram_pg_pool_refcount, 287 dram_pg_pool_do_release); 288 } 289 } 290 } 291 292 kvfree(phys_pg_pack->pages); 293 kfree(phys_pg_pack); 294} 295 296/* 297 * free_device_memory - free device memory 298 * 299 * @ctx : current context 300 * @handle : handle of the memory chunk to free 301 * 302 * This function does the following: 303 * - Free the device memory related to the given handle 304 */ 305static int free_device_memory(struct hl_ctx *ctx, u32 handle) 306{ 307 struct hl_device *hdev = ctx->hdev; 308 struct hl_vm *vm = &hdev->vm; 309 struct hl_vm_phys_pg_pack *phys_pg_pack; 310 311 spin_lock(&vm->idr_lock); 312 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 313 if (phys_pg_pack) { 314 if (atomic_read(&phys_pg_pack->mapping_cnt) > 0) { 315 dev_err(hdev->dev, "handle %u is mapped, cannot free\n", 316 handle); 317 spin_unlock(&vm->idr_lock); 318 return -EINVAL; 319 } 320 321 /* 322 * must remove from idr before the freeing of the physical 323 * pages as the refcount of the pool is also the trigger of the 324 * idr destroy 325 */ 326 idr_remove(&vm->phys_pg_pack_handles, handle); 327 spin_unlock(&vm->idr_lock); 328 329 atomic64_sub(phys_pg_pack->total_size, &ctx->dram_phys_mem); 330 atomic64_sub(phys_pg_pack->total_size, &hdev->dram_used_mem); 331 332 free_phys_pg_pack(hdev, phys_pg_pack); 333 } else { 334 spin_unlock(&vm->idr_lock); 335 dev_err(hdev->dev, 336 "free device memory failed, no match for handle %u\n", 337 handle); 338 return -EINVAL; 339 } 340 341 return 0; 342} 343 344/* 345 * clear_va_list_locked - free virtual addresses list 346 * 347 * @hdev : habanalabs device structure 348 * @va_list : list of virtual addresses to free 349 * 350 * This function does the following: 351 * - Iterate over the list and free each virtual addresses block 352 * 353 * This function should be called only when va_list lock is taken 354 */ 355static void clear_va_list_locked(struct hl_device *hdev, 356 struct list_head *va_list) 357{ 358 struct hl_vm_va_block *va_block, *tmp; 359 360 list_for_each_entry_safe(va_block, tmp, va_list, node) { 361 list_del(&va_block->node); 362 kfree(va_block); 363 } 364} 365 366/* 367 * print_va_list_locked - print virtual addresses list 368 * 369 * @hdev : habanalabs device structure 370 * @va_list : list of virtual addresses to print 371 * 372 * This function does the following: 373 * - Iterate over the list and print each virtual addresses block 374 * 375 * This function should be called only when va_list lock is taken 376 */ 377static void print_va_list_locked(struct hl_device *hdev, 378 struct list_head *va_list) 379{ 380#if HL_MMU_DEBUG 381 struct hl_vm_va_block *va_block; 382 383 dev_dbg(hdev->dev, "print va list:\n"); 384 385 list_for_each_entry(va_block, va_list, node) 386 dev_dbg(hdev->dev, 387 "va block, start: 0x%llx, end: 0x%llx, size: %llu\n", 388 va_block->start, va_block->end, va_block->size); 389#endif 390} 391 392/* 393 * merge_va_blocks_locked - merge a virtual block if possible 394 * 395 * @hdev : pointer to the habanalabs device structure 396 * @va_list : pointer to the virtual addresses block list 397 * @va_block : virtual block to merge with adjacent blocks 398 * 399 * This function does the following: 400 * - Merge the given blocks with the adjacent blocks if their virtual ranges 401 * create a contiguous virtual range 402 * 403 * This Function should be called only when va_list lock is taken 404 */ 405static void merge_va_blocks_locked(struct hl_device *hdev, 406 struct list_head *va_list, struct hl_vm_va_block *va_block) 407{ 408 struct hl_vm_va_block *prev, *next; 409 410 prev = list_prev_entry(va_block, node); 411 if (&prev->node != va_list && prev->end + 1 == va_block->start) { 412 prev->end = va_block->end; 413 prev->size = prev->end - prev->start; 414 list_del(&va_block->node); 415 kfree(va_block); 416 va_block = prev; 417 } 418 419 next = list_next_entry(va_block, node); 420 if (&next->node != va_list && va_block->end + 1 == next->start) { 421 next->start = va_block->start; 422 next->size = next->end - next->start; 423 list_del(&va_block->node); 424 kfree(va_block); 425 } 426} 427 428/* 429 * add_va_block_locked - add a virtual block to the virtual addresses list 430 * 431 * @hdev : pointer to the habanalabs device structure 432 * @va_list : pointer to the virtual addresses block list 433 * @start : start virtual address 434 * @end : end virtual address 435 * 436 * This function does the following: 437 * - Add the given block to the virtual blocks list and merge with other 438 * blocks if a contiguous virtual block can be created 439 * 440 * This Function should be called only when va_list lock is taken 441 */ 442static int add_va_block_locked(struct hl_device *hdev, 443 struct list_head *va_list, u64 start, u64 end) 444{ 445 struct hl_vm_va_block *va_block, *res = NULL; 446 u64 size = end - start; 447 448 print_va_list_locked(hdev, va_list); 449 450 list_for_each_entry(va_block, va_list, node) { 451 /* TODO: remove upon matureness */ 452 if (hl_mem_area_crosses_range(start, size, va_block->start, 453 va_block->end)) { 454 dev_err(hdev->dev, 455 "block crossing ranges at start 0x%llx, end 0x%llx\n", 456 va_block->start, va_block->end); 457 return -EINVAL; 458 } 459 460 if (va_block->end < start) 461 res = va_block; 462 } 463 464 va_block = kmalloc(sizeof(*va_block), GFP_KERNEL); 465 if (!va_block) 466 return -ENOMEM; 467 468 va_block->start = start; 469 va_block->end = end; 470 va_block->size = size; 471 472 if (!res) 473 list_add(&va_block->node, va_list); 474 else 475 list_add(&va_block->node, &res->node); 476 477 merge_va_blocks_locked(hdev, va_list, va_block); 478 479 print_va_list_locked(hdev, va_list); 480 481 return 0; 482} 483 484/* 485 * add_va_block - wrapper for add_va_block_locked 486 * 487 * @hdev : pointer to the habanalabs device structure 488 * @va_list : pointer to the virtual addresses block list 489 * @start : start virtual address 490 * @end : end virtual address 491 * 492 * This function does the following: 493 * - Takes the list lock and calls add_va_block_locked 494 */ 495static inline int add_va_block(struct hl_device *hdev, 496 struct hl_va_range *va_range, u64 start, u64 end) 497{ 498 int rc; 499 500 mutex_lock(&va_range->lock); 501 rc = add_va_block_locked(hdev, &va_range->list, start, end); 502 mutex_unlock(&va_range->lock); 503 504 return rc; 505} 506 507/* 508 * get_va_block() - get a virtual block for the given size and alignment. 509 * @hdev: pointer to the habanalabs device structure. 510 * @va_range: pointer to the virtual addresses range. 511 * @size: requested block size. 512 * @hint_addr: hint for requested address by the user. 513 * @va_block_align: required alignment of the virtual block start address. 514 * 515 * This function does the following: 516 * - Iterate on the virtual block list to find a suitable virtual block for the 517 * given size and alignment. 518 * - Reserve the requested block and update the list. 519 * - Return the start address of the virtual block. 520 */ 521static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range, 522 u64 size, u64 hint_addr, u32 va_block_align) 523{ 524 struct hl_vm_va_block *va_block, *new_va_block = NULL; 525 u64 valid_start, valid_size, prev_start, prev_end, align_mask, 526 res_valid_start = 0, res_valid_size = 0; 527 bool add_prev = false; 528 529 align_mask = ~((u64)va_block_align - 1); 530 531 /* check if hint_addr is aligned */ 532 if (hint_addr & (va_block_align - 1)) 533 hint_addr = 0; 534 535 mutex_lock(&va_range->lock); 536 537 print_va_list_locked(hdev, &va_range->list); 538 539 list_for_each_entry(va_block, &va_range->list, node) { 540 /* calc the first possible aligned addr */ 541 valid_start = va_block->start; 542 543 if (valid_start & (va_block_align - 1)) { 544 valid_start &= align_mask; 545 valid_start += va_block_align; 546 if (valid_start > va_block->end) 547 continue; 548 } 549 550 valid_size = va_block->end - valid_start; 551 552 if (valid_size >= size && 553 (!new_va_block || valid_size < res_valid_size)) { 554 new_va_block = va_block; 555 res_valid_start = valid_start; 556 res_valid_size = valid_size; 557 } 558 559 if (hint_addr && hint_addr >= valid_start && 560 ((hint_addr + size) <= va_block->end)) { 561 new_va_block = va_block; 562 res_valid_start = hint_addr; 563 res_valid_size = valid_size; 564 break; 565 } 566 } 567 568 if (!new_va_block) { 569 dev_err(hdev->dev, "no available va block for size %llu\n", 570 size); 571 goto out; 572 } 573 574 if (res_valid_start > new_va_block->start) { 575 prev_start = new_va_block->start; 576 prev_end = res_valid_start - 1; 577 578 new_va_block->start = res_valid_start; 579 new_va_block->size = res_valid_size; 580 581 add_prev = true; 582 } 583 584 if (new_va_block->size > size) { 585 new_va_block->start += size; 586 new_va_block->size = new_va_block->end - new_va_block->start; 587 } else { 588 list_del(&new_va_block->node); 589 kfree(new_va_block); 590 } 591 592 if (add_prev) 593 add_va_block_locked(hdev, &va_range->list, prev_start, 594 prev_end); 595 596 print_va_list_locked(hdev, &va_range->list); 597out: 598 mutex_unlock(&va_range->lock); 599 600 return res_valid_start; 601} 602 603/* 604 * get_sg_info - get number of pages and the DMA address from SG list 605 * 606 * @sg : the SG list 607 * @dma_addr : pointer to DMA address to return 608 * 609 * Calculate the number of consecutive pages described by the SG list. Take the 610 * offset of the address in the first page, add to it the length and round it up 611 * to the number of needed pages. 612 */ 613static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr) 614{ 615 *dma_addr = sg_dma_address(sg); 616 617 return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) + 618 (PAGE_SIZE - 1)) >> PAGE_SHIFT; 619} 620 621/* 622 * init_phys_pg_pack_from_userptr - initialize physical page pack from host 623 * memory 624 * @ctx: current context 625 * @userptr: userptr to initialize from 626 * @pphys_pg_pack: result pointer 627 * 628 * This function does the following: 629 * - Pin the physical pages related to the given virtual block 630 * - Create a physical page pack from the physical pages related to the given 631 * virtual block 632 */ 633static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, 634 struct hl_userptr *userptr, 635 struct hl_vm_phys_pg_pack **pphys_pg_pack) 636{ 637 struct hl_vm_phys_pg_pack *phys_pg_pack; 638 struct scatterlist *sg; 639 dma_addr_t dma_addr; 640 u64 page_mask, total_npages; 641 u32 npages, page_size = PAGE_SIZE, 642 huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size; 643 bool first = true, is_huge_page_opt = true; 644 int rc, i, j; 645 u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size); 646 647 phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL); 648 if (!phys_pg_pack) 649 return -ENOMEM; 650 651 phys_pg_pack->vm_type = userptr->vm_type; 652 phys_pg_pack->created_from_userptr = true; 653 phys_pg_pack->asid = ctx->asid; 654 atomic_set(&phys_pg_pack->mapping_cnt, 1); 655 656 /* Only if all dma_addrs are aligned to 2MB and their 657 * sizes is at least 2MB, we can use huge page mapping. 658 * We limit the 2MB optimization to this condition, 659 * since later on we acquire the related VA range as one 660 * consecutive block. 661 */ 662 total_npages = 0; 663 for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { 664 npages = get_sg_info(sg, &dma_addr); 665 666 total_npages += npages; 667 668 if ((npages % pgs_in_huge_page) || 669 (dma_addr & (huge_page_size - 1))) 670 is_huge_page_opt = false; 671 } 672 673 if (is_huge_page_opt) { 674 page_size = huge_page_size; 675 do_div(total_npages, pgs_in_huge_page); 676 } 677 678 page_mask = ~(((u64) page_size) - 1); 679 680 phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), 681 GFP_KERNEL); 682 if (ZERO_OR_NULL_PTR(phys_pg_pack->pages)) { 683 rc = -ENOMEM; 684 goto page_pack_arr_mem_err; 685 } 686 687 phys_pg_pack->npages = total_npages; 688 phys_pg_pack->page_size = page_size; 689 phys_pg_pack->total_size = total_npages * page_size; 690 691 j = 0; 692 for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) { 693 npages = get_sg_info(sg, &dma_addr); 694 695 /* align down to physical page size and save the offset */ 696 if (first) { 697 first = false; 698 phys_pg_pack->offset = dma_addr & (page_size - 1); 699 dma_addr &= page_mask; 700 } 701 702 while (npages) { 703 phys_pg_pack->pages[j++] = dma_addr; 704 dma_addr += page_size; 705 706 if (is_huge_page_opt) 707 npages -= pgs_in_huge_page; 708 else 709 npages--; 710 } 711 } 712 713 *pphys_pg_pack = phys_pg_pack; 714 715 return 0; 716 717page_pack_arr_mem_err: 718 kfree(phys_pg_pack); 719 720 return rc; 721} 722 723/* 724 * map_phys_pg_pack - maps the physical page pack. 725 * @ctx: current context 726 * @vaddr: start address of the virtual area to map from 727 * @phys_pg_pack: the pack of physical pages to map to 728 * 729 * This function does the following: 730 * - Maps each chunk of virtual memory to matching physical chunk 731 * - Stores number of successful mappings in the given argument 732 * - Returns 0 on success, error code otherwise 733 */ 734static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, 735 struct hl_vm_phys_pg_pack *phys_pg_pack) 736{ 737 struct hl_device *hdev = ctx->hdev; 738 u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; 739 u32 page_size = phys_pg_pack->page_size; 740 int rc = 0; 741 742 for (i = 0 ; i < phys_pg_pack->npages ; i++) { 743 paddr = phys_pg_pack->pages[i]; 744 745 rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size, 746 (i + 1) == phys_pg_pack->npages); 747 if (rc) { 748 dev_err(hdev->dev, 749 "map failed for handle %u, npages: %llu, mapped: %llu", 750 phys_pg_pack->handle, phys_pg_pack->npages, 751 mapped_pg_cnt); 752 goto err; 753 } 754 755 mapped_pg_cnt++; 756 next_vaddr += page_size; 757 } 758 759 return 0; 760 761err: 762 next_vaddr = vaddr; 763 for (i = 0 ; i < mapped_pg_cnt ; i++) { 764 if (hl_mmu_unmap(ctx, next_vaddr, page_size, 765 (i + 1) == mapped_pg_cnt)) 766 dev_warn_ratelimited(hdev->dev, 767 "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n", 768 phys_pg_pack->handle, next_vaddr, 769 phys_pg_pack->pages[i], page_size); 770 771 next_vaddr += page_size; 772 } 773 774 return rc; 775} 776 777/* 778 * unmap_phys_pg_pack - unmaps the physical page pack 779 * @ctx: current context 780 * @vaddr: start address of the virtual area to unmap 781 * @phys_pg_pack: the pack of physical pages to unmap 782 */ 783static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr, 784 struct hl_vm_phys_pg_pack *phys_pg_pack) 785{ 786 struct hl_device *hdev = ctx->hdev; 787 u64 next_vaddr, i; 788 u32 page_size; 789 790 page_size = phys_pg_pack->page_size; 791 next_vaddr = vaddr; 792 793 for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) { 794 if (hl_mmu_unmap(ctx, next_vaddr, page_size, 795 (i + 1) == phys_pg_pack->npages)) 796 dev_warn_ratelimited(hdev->dev, 797 "unmap failed for vaddr: 0x%llx\n", next_vaddr); 798 799 /* 800 * unmapping on Palladium can be really long, so avoid a CPU 801 * soft lockup bug by sleeping a little between unmapping pages 802 */ 803 if (hdev->pldm) 804 usleep_range(500, 1000); 805 } 806} 807 808static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, 809 u64 *paddr) 810{ 811 struct hl_device *hdev = ctx->hdev; 812 struct hl_vm *vm = &hdev->vm; 813 struct hl_vm_phys_pg_pack *phys_pg_pack; 814 u32 handle; 815 816 handle = lower_32_bits(args->map_device.handle); 817 spin_lock(&vm->idr_lock); 818 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 819 if (!phys_pg_pack) { 820 spin_unlock(&vm->idr_lock); 821 dev_err(hdev->dev, "no match for handle %u\n", handle); 822 return -EINVAL; 823 } 824 825 *paddr = phys_pg_pack->pages[0]; 826 827 spin_unlock(&vm->idr_lock); 828 829 return 0; 830} 831 832/* 833 * map_device_va - map the given memory 834 * 835 * @ctx : current context 836 * @args : host parameters with handle/host virtual address 837 * @device_addr : pointer to result device virtual address 838 * 839 * This function does the following: 840 * - If given a physical device memory handle, map to a device virtual block 841 * and return the start address of this block 842 * - If given a host virtual address and size, find the related physical pages, 843 * map a device virtual block to this pages and return the start address of 844 * this block 845 */ 846static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, 847 u64 *device_addr) 848{ 849 struct hl_device *hdev = ctx->hdev; 850 struct hl_vm *vm = &hdev->vm; 851 struct hl_vm_phys_pg_pack *phys_pg_pack; 852 struct hl_userptr *userptr = NULL; 853 struct hl_vm_hash_node *hnode; 854 struct hl_va_range *va_range; 855 enum vm_type_t *vm_type; 856 u64 ret_vaddr, hint_addr; 857 u32 handle = 0, va_block_align; 858 int rc; 859 bool is_userptr = args->flags & HL_MEM_USERPTR; 860 861 /* Assume failure */ 862 *device_addr = 0; 863 864 if (is_userptr) { 865 u64 addr = args->map_host.host_virt_addr, 866 size = args->map_host.mem_size; 867 u32 page_size = hdev->asic_prop.pmmu.page_size, 868 huge_page_size = hdev->asic_prop.pmmu_huge.page_size; 869 870 rc = dma_map_host_va(hdev, addr, size, &userptr); 871 if (rc) { 872 dev_err(hdev->dev, "failed to get userptr from va\n"); 873 return rc; 874 } 875 876 rc = init_phys_pg_pack_from_userptr(ctx, userptr, 877 &phys_pg_pack); 878 if (rc) { 879 dev_err(hdev->dev, 880 "unable to init page pack for vaddr 0x%llx\n", 881 addr); 882 goto init_page_pack_err; 883 } 884 885 vm_type = (enum vm_type_t *) userptr; 886 hint_addr = args->map_host.hint_addr; 887 handle = phys_pg_pack->handle; 888 889 /* get required alignment */ 890 if (phys_pg_pack->page_size == page_size) { 891 va_range = ctx->host_va_range; 892 893 /* 894 * huge page alignment may be needed in case of regular 895 * page mapping, depending on the host VA alignment 896 */ 897 if (addr & (huge_page_size - 1)) 898 va_block_align = page_size; 899 else 900 va_block_align = huge_page_size; 901 } else { 902 /* 903 * huge page alignment is needed in case of huge page 904 * mapping 905 */ 906 va_range = ctx->host_huge_va_range; 907 va_block_align = huge_page_size; 908 } 909 } else { 910 handle = lower_32_bits(args->map_device.handle); 911 912 spin_lock(&vm->idr_lock); 913 phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle); 914 if (!phys_pg_pack) { 915 spin_unlock(&vm->idr_lock); 916 dev_err(hdev->dev, 917 "no match for handle %u\n", handle); 918 return -EINVAL; 919 } 920 921 /* increment now to avoid freeing device memory while mapping */ 922 atomic_inc(&phys_pg_pack->mapping_cnt); 923 924 spin_unlock(&vm->idr_lock); 925 926 vm_type = (enum vm_type_t *) phys_pg_pack; 927 928 hint_addr = args->map_device.hint_addr; 929 930 /* DRAM VA alignment is the same as the DRAM page size */ 931 va_range = ctx->dram_va_range; 932 va_block_align = hdev->asic_prop.dmmu.page_size; 933 } 934 935 /* 936 * relevant for mapping device physical memory only, as host memory is 937 * implicitly shared 938 */ 939 if (!is_userptr && !(phys_pg_pack->flags & HL_MEM_SHARED) && 940 phys_pg_pack->asid != ctx->asid) { 941 dev_err(hdev->dev, 942 "Failed to map memory, handle %u is not shared\n", 943 handle); 944 rc = -EPERM; 945 goto shared_err; 946 } 947 948 hnode = kzalloc(sizeof(*hnode), GFP_KERNEL); 949 if (!hnode) { 950 rc = -ENOMEM; 951 goto hnode_err; 952 } 953 954 ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size, 955 hint_addr, va_block_align); 956 if (!ret_vaddr) { 957 dev_err(hdev->dev, "no available va block for handle %u\n", 958 handle); 959 rc = -ENOMEM; 960 goto va_block_err; 961 } 962 963 mutex_lock(&ctx->mmu_lock); 964 965 rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); 966 if (rc) { 967 mutex_unlock(&ctx->mmu_lock); 968 dev_err(hdev->dev, "mapping page pack failed for handle %u\n", 969 handle); 970 goto map_err; 971 } 972 973 rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type); 974 975 mutex_unlock(&ctx->mmu_lock); 976 977 if (rc) { 978 dev_err(hdev->dev, 979 "mapping handle %u failed due to MMU cache invalidation\n", 980 handle); 981 goto map_err; 982 } 983 984 ret_vaddr += phys_pg_pack->offset; 985 986 hnode->ptr = vm_type; 987 hnode->vaddr = ret_vaddr; 988 989 mutex_lock(&ctx->mem_hash_lock); 990 hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); 991 mutex_unlock(&ctx->mem_hash_lock); 992 993 *device_addr = ret_vaddr; 994 995 if (is_userptr) 996 free_phys_pg_pack(hdev, phys_pg_pack); 997 998 return 0; 999 1000map_err: 1001 if (add_va_block(hdev, va_range, ret_vaddr, 1002 ret_vaddr + phys_pg_pack->total_size - 1)) 1003 dev_warn(hdev->dev, 1004 "release va block failed for handle 0x%x, vaddr: 0x%llx\n", 1005 handle, ret_vaddr); 1006 1007va_block_err: 1008 kfree(hnode); 1009hnode_err: 1010shared_err: 1011 atomic_dec(&phys_pg_pack->mapping_cnt); 1012 if (is_userptr) 1013 free_phys_pg_pack(hdev, phys_pg_pack); 1014init_page_pack_err: 1015 if (is_userptr) 1016 dma_unmap_host_va(hdev, userptr); 1017 1018 return rc; 1019} 1020 1021/* 1022 * unmap_device_va - unmap the given device virtual address 1023 * 1024 * @ctx : current context 1025 * @vaddr : device virtual address to unmap 1026 * @ctx_free : true if in context free flow, false otherwise. 1027 * 1028 * This function does the following: 1029 * - Unmap the physical pages related to the given virtual address 1030 * - return the device virtual block to the virtual block list 1031 */ 1032static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free) 1033{ 1034 struct hl_device *hdev = ctx->hdev; 1035 struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; 1036 struct hl_vm_hash_node *hnode = NULL; 1037 struct hl_userptr *userptr = NULL; 1038 struct hl_va_range *va_range; 1039 enum vm_type_t *vm_type; 1040 bool is_userptr; 1041 int rc = 0; 1042 1043 /* protect from double entrance */ 1044 mutex_lock(&ctx->mem_hash_lock); 1045 hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr) 1046 if (vaddr == hnode->vaddr) 1047 break; 1048 1049 if (!hnode) { 1050 mutex_unlock(&ctx->mem_hash_lock); 1051 dev_err(hdev->dev, 1052 "unmap failed, no mem hnode for vaddr 0x%llx\n", 1053 vaddr); 1054 return -EINVAL; 1055 } 1056 1057 hash_del(&hnode->node); 1058 mutex_unlock(&ctx->mem_hash_lock); 1059 1060 vm_type = hnode->ptr; 1061 1062 if (*vm_type == VM_TYPE_USERPTR) { 1063 is_userptr = true; 1064 userptr = hnode->ptr; 1065 rc = init_phys_pg_pack_from_userptr(ctx, userptr, 1066 &phys_pg_pack); 1067 if (rc) { 1068 dev_err(hdev->dev, 1069 "unable to init page pack for vaddr 0x%llx\n", 1070 vaddr); 1071 goto vm_type_err; 1072 } 1073 1074 if (phys_pg_pack->page_size == 1075 hdev->asic_prop.pmmu.page_size) 1076 va_range = ctx->host_va_range; 1077 else 1078 va_range = ctx->host_huge_va_range; 1079 } else if (*vm_type == VM_TYPE_PHYS_PACK) { 1080 is_userptr = false; 1081 va_range = ctx->dram_va_range; 1082 phys_pg_pack = hnode->ptr; 1083 } else { 1084 dev_warn(hdev->dev, 1085 "unmap failed, unknown vm desc for vaddr 0x%llx\n", 1086 vaddr); 1087 rc = -EFAULT; 1088 goto vm_type_err; 1089 } 1090 1091 if (atomic_read(&phys_pg_pack->mapping_cnt) == 0) { 1092 dev_err(hdev->dev, "vaddr 0x%llx is not mapped\n", vaddr); 1093 rc = -EINVAL; 1094 goto mapping_cnt_err; 1095 } 1096 1097 vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); 1098 1099 mutex_lock(&ctx->mmu_lock); 1100 1101 unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); 1102 1103 /* 1104 * During context free this function is called in a loop to clean all 1105 * the context mappings. Hence the cache invalidation can be called once 1106 * at the loop end rather than for each iteration 1107 */ 1108 if (!ctx_free) 1109 rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 1110 *vm_type); 1111 1112 mutex_unlock(&ctx->mmu_lock); 1113 1114 /* 1115 * If the context is closing we don't need to check for the MMU cache 1116 * invalidation return code and update the VA free list as in this flow 1117 * we invalidate the MMU cache outside of this unmap function and the VA 1118 * free list will be freed anyway. 1119 */ 1120 if (!ctx_free) { 1121 int tmp_rc; 1122 1123 if (rc) 1124 dev_err(hdev->dev, 1125 "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n", 1126 vaddr); 1127 1128 tmp_rc = add_va_block(hdev, va_range, vaddr, 1129 vaddr + phys_pg_pack->total_size - 1); 1130 if (tmp_rc) { 1131 dev_warn(hdev->dev, 1132 "add va block failed for vaddr: 0x%llx\n", 1133 vaddr); 1134 if (!rc) 1135 rc = tmp_rc; 1136 } 1137 } 1138 1139 atomic_dec(&phys_pg_pack->mapping_cnt); 1140 kfree(hnode); 1141 1142 if (is_userptr) { 1143 free_phys_pg_pack(hdev, phys_pg_pack); 1144 dma_unmap_host_va(hdev, userptr); 1145 } 1146 1147 return rc; 1148 1149mapping_cnt_err: 1150 if (is_userptr) 1151 free_phys_pg_pack(hdev, phys_pg_pack); 1152vm_type_err: 1153 mutex_lock(&ctx->mem_hash_lock); 1154 hash_add(ctx->mem_hash, &hnode->node, vaddr); 1155 mutex_unlock(&ctx->mem_hash_lock); 1156 1157 return rc; 1158} 1159 1160static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args) 1161{ 1162 struct hl_device *hdev = hpriv->hdev; 1163 struct hl_ctx *ctx = hpriv->ctx; 1164 u64 device_addr = 0; 1165 u32 handle = 0; 1166 int rc; 1167 1168 switch (args->in.op) { 1169 case HL_MEM_OP_ALLOC: 1170 if (args->in.alloc.mem_size == 0) { 1171 dev_err(hdev->dev, 1172 "alloc size must be larger than 0\n"); 1173 rc = -EINVAL; 1174 goto out; 1175 } 1176 1177 /* Force contiguous as there are no real MMU 1178 * translations to overcome physical memory gaps 1179 */ 1180 args->in.flags |= HL_MEM_CONTIGUOUS; 1181 rc = alloc_device_memory(ctx, &args->in, &handle); 1182 1183 memset(args, 0, sizeof(*args)); 1184 args->out.handle = (__u64) handle; 1185 break; 1186 1187 case HL_MEM_OP_FREE: 1188 rc = free_device_memory(ctx, args->in.free.handle); 1189 break; 1190 1191 case HL_MEM_OP_MAP: 1192 if (args->in.flags & HL_MEM_USERPTR) { 1193 device_addr = args->in.map_host.host_virt_addr; 1194 rc = 0; 1195 } else { 1196 rc = get_paddr_from_handle(ctx, &args->in, 1197 &device_addr); 1198 } 1199 1200 memset(args, 0, sizeof(*args)); 1201 args->out.device_virt_addr = device_addr; 1202 break; 1203 1204 case HL_MEM_OP_UNMAP: 1205 rc = 0; 1206 break; 1207 1208 default: 1209 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); 1210 rc = -ENOTTY; 1211 break; 1212 } 1213 1214out: 1215 return rc; 1216} 1217 1218int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) 1219{ 1220 union hl_mem_args *args = data; 1221 struct hl_device *hdev = hpriv->hdev; 1222 struct hl_ctx *ctx = hpriv->ctx; 1223 u64 device_addr = 0; 1224 u32 handle = 0; 1225 int rc; 1226 1227 if (hl_device_disabled_or_in_reset(hdev)) { 1228 dev_warn_ratelimited(hdev->dev, 1229 "Device is %s. Can't execute MEMORY IOCTL\n", 1230 atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); 1231 return -EBUSY; 1232 } 1233 1234 if (!hdev->mmu_enable) 1235 return mem_ioctl_no_mmu(hpriv, args); 1236 1237 switch (args->in.op) { 1238 case HL_MEM_OP_ALLOC: 1239 if (!hdev->dram_supports_virtual_memory) { 1240 dev_err(hdev->dev, "DRAM alloc is not supported\n"); 1241 rc = -EINVAL; 1242 goto out; 1243 } 1244 1245 if (args->in.alloc.mem_size == 0) { 1246 dev_err(hdev->dev, 1247 "alloc size must be larger than 0\n"); 1248 rc = -EINVAL; 1249 goto out; 1250 } 1251 rc = alloc_device_memory(ctx, &args->in, &handle); 1252 1253 memset(args, 0, sizeof(*args)); 1254 args->out.handle = (__u64) handle; 1255 break; 1256 1257 case HL_MEM_OP_FREE: 1258 rc = free_device_memory(ctx, args->in.free.handle); 1259 break; 1260 1261 case HL_MEM_OP_MAP: 1262 rc = map_device_va(ctx, &args->in, &device_addr); 1263 1264 memset(args, 0, sizeof(*args)); 1265 args->out.device_virt_addr = device_addr; 1266 break; 1267 1268 case HL_MEM_OP_UNMAP: 1269 rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr, 1270 false); 1271 break; 1272 1273 default: 1274 dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n"); 1275 rc = -ENOTTY; 1276 break; 1277 } 1278 1279out: 1280 return rc; 1281} 1282 1283static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size, 1284 u32 npages, u64 start, u32 offset, 1285 struct hl_userptr *userptr) 1286{ 1287 int rc; 1288 1289 if (!access_ok((void __user *) (uintptr_t) addr, size)) { 1290 dev_err(hdev->dev, "user pointer is invalid - 0x%llx\n", addr); 1291 return -EFAULT; 1292 } 1293 1294 userptr->vec = frame_vector_create(npages); 1295 if (!userptr->vec) { 1296 dev_err(hdev->dev, "Failed to create frame vector\n"); 1297 return -ENOMEM; 1298 } 1299 1300 rc = get_vaddr_frames(start, npages, FOLL_FORCE | FOLL_WRITE, 1301 userptr->vec); 1302 1303 if (rc != npages) { 1304 dev_err(hdev->dev, 1305 "Failed to map host memory, user ptr probably wrong\n"); 1306 if (rc < 0) 1307 goto destroy_framevec; 1308 rc = -EFAULT; 1309 goto put_framevec; 1310 } 1311 1312 if (frame_vector_to_pages(userptr->vec) < 0) { 1313 dev_err(hdev->dev, 1314 "Failed to translate frame vector to pages\n"); 1315 rc = -EFAULT; 1316 goto put_framevec; 1317 } 1318 1319 rc = sg_alloc_table_from_pages(userptr->sgt, 1320 frame_vector_pages(userptr->vec), 1321 npages, offset, size, GFP_ATOMIC); 1322 if (rc < 0) { 1323 dev_err(hdev->dev, "failed to create SG table from pages\n"); 1324 goto put_framevec; 1325 } 1326 1327 return 0; 1328 1329put_framevec: 1330 put_vaddr_frames(userptr->vec); 1331destroy_framevec: 1332 frame_vector_destroy(userptr->vec); 1333 return rc; 1334} 1335 1336/* 1337 * hl_pin_host_memory - pins a chunk of host memory. 1338 * @hdev: pointer to the habanalabs device structure 1339 * @addr: the host virtual address of the memory area 1340 * @size: the size of the memory area 1341 * @userptr: pointer to hl_userptr structure 1342 * 1343 * This function does the following: 1344 * - Pins the physical pages 1345 * - Create an SG list from those pages 1346 */ 1347int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, 1348 struct hl_userptr *userptr) 1349{ 1350 u64 start, end; 1351 u32 npages, offset; 1352 int rc; 1353 1354 if (!size) { 1355 dev_err(hdev->dev, "size to pin is invalid - %llu\n", size); 1356 return -EINVAL; 1357 } 1358 1359 /* 1360 * If the combination of the address and size requested for this memory 1361 * region causes an integer overflow, return error. 1362 */ 1363 if (((addr + size) < addr) || 1364 PAGE_ALIGN(addr + size) < (addr + size)) { 1365 dev_err(hdev->dev, 1366 "user pointer 0x%llx + %llu causes integer overflow\n", 1367 addr, size); 1368 return -EINVAL; 1369 } 1370 1371 /* 1372 * This function can be called also from data path, hence use atomic 1373 * always as it is not a big allocation. 1374 */ 1375 userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_ATOMIC); 1376 if (!userptr->sgt) 1377 return -ENOMEM; 1378 1379 start = addr & PAGE_MASK; 1380 offset = addr & ~PAGE_MASK; 1381 end = PAGE_ALIGN(addr + size); 1382 npages = (end - start) >> PAGE_SHIFT; 1383 1384 userptr->size = size; 1385 userptr->addr = addr; 1386 userptr->dma_mapped = false; 1387 INIT_LIST_HEAD(&userptr->job_node); 1388 1389 rc = get_user_memory(hdev, addr, size, npages, start, offset, 1390 userptr); 1391 if (rc) { 1392 dev_err(hdev->dev, 1393 "failed to get user memory for address 0x%llx\n", 1394 addr); 1395 goto free_sgt; 1396 } 1397 1398 hl_debugfs_add_userptr(hdev, userptr); 1399 1400 return 0; 1401 1402free_sgt: 1403 kfree(userptr->sgt); 1404 return rc; 1405} 1406 1407/* 1408 * hl_unpin_host_memory - unpins a chunk of host memory. 1409 * @hdev: pointer to the habanalabs device structure 1410 * @userptr: pointer to hl_userptr structure 1411 * 1412 * This function does the following: 1413 * - Unpins the physical pages related to the host memory 1414 * - Free the SG list 1415 */ 1416void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr) 1417{ 1418 struct page **pages; 1419 1420 hl_debugfs_remove_userptr(hdev, userptr); 1421 1422 if (userptr->dma_mapped) 1423 hdev->asic_funcs->hl_dma_unmap_sg(hdev, userptr->sgt->sgl, 1424 userptr->sgt->nents, 1425 userptr->dir); 1426 1427 pages = frame_vector_pages(userptr->vec); 1428 if (!IS_ERR(pages)) { 1429 int i; 1430 1431 for (i = 0; i < frame_vector_count(userptr->vec); i++) 1432 set_page_dirty_lock(pages[i]); 1433 } 1434 put_vaddr_frames(userptr->vec); 1435 frame_vector_destroy(userptr->vec); 1436 1437 list_del(&userptr->job_node); 1438 1439 sg_free_table(userptr->sgt); 1440 kfree(userptr->sgt); 1441} 1442 1443/* 1444 * hl_userptr_delete_list - clear userptr list 1445 * 1446 * @hdev : pointer to the habanalabs device structure 1447 * @userptr_list : pointer to the list to clear 1448 * 1449 * This function does the following: 1450 * - Iterates over the list and unpins the host memory and frees the userptr 1451 * structure. 1452 */ 1453void hl_userptr_delete_list(struct hl_device *hdev, 1454 struct list_head *userptr_list) 1455{ 1456 struct hl_userptr *userptr, *tmp; 1457 1458 list_for_each_entry_safe(userptr, tmp, userptr_list, job_node) { 1459 hl_unpin_host_memory(hdev, userptr); 1460 kfree(userptr); 1461 } 1462 1463 INIT_LIST_HEAD(userptr_list); 1464} 1465 1466/* 1467 * hl_userptr_is_pinned - returns whether the given userptr is pinned 1468 * 1469 * @hdev : pointer to the habanalabs device structure 1470 * @userptr_list : pointer to the list to clear 1471 * @userptr : pointer to userptr to check 1472 * 1473 * This function does the following: 1474 * - Iterates over the list and checks if the given userptr is in it, means is 1475 * pinned. If so, returns true, otherwise returns false. 1476 */ 1477bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr, 1478 u32 size, struct list_head *userptr_list, 1479 struct hl_userptr **userptr) 1480{ 1481 list_for_each_entry((*userptr), userptr_list, job_node) { 1482 if ((addr == (*userptr)->addr) && (size == (*userptr)->size)) 1483 return true; 1484 } 1485 1486 return false; 1487} 1488 1489/* 1490 * va_range_init - initialize virtual addresses range 1491 * @hdev: pointer to the habanalabs device structure 1492 * @va_range: pointer to the range to initialize 1493 * @start: range start address 1494 * @end: range end address 1495 * 1496 * This function does the following: 1497 * - Initializes the virtual addresses list of the given range with the given 1498 * addresses. 1499 */ 1500static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range, 1501 u64 start, u64 end) 1502{ 1503 int rc; 1504 1505 INIT_LIST_HEAD(&va_range->list); 1506 1507 /* PAGE_SIZE alignment */ 1508 1509 if (start & (PAGE_SIZE - 1)) { 1510 start &= PAGE_MASK; 1511 start += PAGE_SIZE; 1512 } 1513 1514 if (end & (PAGE_SIZE - 1)) 1515 end &= PAGE_MASK; 1516 1517 if (start >= end) { 1518 dev_err(hdev->dev, "too small vm range for va list\n"); 1519 return -EFAULT; 1520 } 1521 1522 rc = add_va_block(hdev, va_range, start, end); 1523 1524 if (rc) { 1525 dev_err(hdev->dev, "Failed to init host va list\n"); 1526 return rc; 1527 } 1528 1529 va_range->start_addr = start; 1530 va_range->end_addr = end; 1531 1532 return 0; 1533} 1534 1535/* 1536 * va_range_fini() - clear a virtual addresses range 1537 * @hdev: pointer to the habanalabs structure 1538 * va_range: pointer to virtual addresses range 1539 * 1540 * This function does the following: 1541 * - Frees the virtual addresses block list and its lock 1542 */ 1543static void va_range_fini(struct hl_device *hdev, 1544 struct hl_va_range *va_range) 1545{ 1546 mutex_lock(&va_range->lock); 1547 clear_va_list_locked(hdev, &va_range->list); 1548 mutex_unlock(&va_range->lock); 1549 1550 mutex_destroy(&va_range->lock); 1551 kfree(va_range); 1552} 1553 1554/* 1555 * vm_ctx_init_with_ranges() - initialize virtual memory for context 1556 * @ctx: pointer to the habanalabs context structure 1557 * @host_range_start: host virtual addresses range start. 1558 * @host_range_end: host virtual addresses range end. 1559 * @host_huge_range_start: host virtual addresses range start for memory 1560 * allocated with huge pages. 1561 * @host_huge_range_end: host virtual addresses range end for memory allocated 1562 * with huge pages. 1563 * @dram_range_start: dram virtual addresses range start. 1564 * @dram_range_end: dram virtual addresses range end. 1565 * 1566 * This function initializes the following: 1567 * - MMU for context 1568 * - Virtual address to area descriptor hashtable 1569 * - Virtual block list of available virtual memory 1570 */ 1571static int vm_ctx_init_with_ranges(struct hl_ctx *ctx, 1572 u64 host_range_start, 1573 u64 host_range_end, 1574 u64 host_huge_range_start, 1575 u64 host_huge_range_end, 1576 u64 dram_range_start, 1577 u64 dram_range_end) 1578{ 1579 struct hl_device *hdev = ctx->hdev; 1580 int rc; 1581 1582 ctx->host_va_range = kzalloc(sizeof(*ctx->host_va_range), GFP_KERNEL); 1583 if (!ctx->host_va_range) 1584 return -ENOMEM; 1585 1586 ctx->host_huge_va_range = kzalloc(sizeof(*ctx->host_huge_va_range), 1587 GFP_KERNEL); 1588 if (!ctx->host_huge_va_range) { 1589 rc = -ENOMEM; 1590 goto host_huge_va_range_err; 1591 } 1592 1593 ctx->dram_va_range = kzalloc(sizeof(*ctx->dram_va_range), GFP_KERNEL); 1594 if (!ctx->dram_va_range) { 1595 rc = -ENOMEM; 1596 goto dram_va_range_err; 1597 } 1598 1599 rc = hl_mmu_ctx_init(ctx); 1600 if (rc) { 1601 dev_err(hdev->dev, "failed to init context %d\n", ctx->asid); 1602 goto mmu_ctx_err; 1603 } 1604 1605 mutex_init(&ctx->mem_hash_lock); 1606 hash_init(ctx->mem_hash); 1607 1608 mutex_init(&ctx->host_va_range->lock); 1609 1610 rc = va_range_init(hdev, ctx->host_va_range, host_range_start, 1611 host_range_end); 1612 if (rc) { 1613 dev_err(hdev->dev, "failed to init host vm range\n"); 1614 goto host_page_range_err; 1615 } 1616 1617 if (hdev->pmmu_huge_range) { 1618 mutex_init(&ctx->host_huge_va_range->lock); 1619 1620 rc = va_range_init(hdev, ctx->host_huge_va_range, 1621 host_huge_range_start, 1622 host_huge_range_end); 1623 if (rc) { 1624 dev_err(hdev->dev, 1625 "failed to init host huge vm range\n"); 1626 goto host_hpage_range_err; 1627 } 1628 } else { 1629 kfree(ctx->host_huge_va_range); 1630 ctx->host_huge_va_range = ctx->host_va_range; 1631 } 1632 1633 mutex_init(&ctx->dram_va_range->lock); 1634 1635 rc = va_range_init(hdev, ctx->dram_va_range, dram_range_start, 1636 dram_range_end); 1637 if (rc) { 1638 dev_err(hdev->dev, "failed to init dram vm range\n"); 1639 goto dram_vm_err; 1640 } 1641 1642 hl_debugfs_add_ctx_mem_hash(hdev, ctx); 1643 1644 return 0; 1645 1646dram_vm_err: 1647 mutex_destroy(&ctx->dram_va_range->lock); 1648 1649 if (hdev->pmmu_huge_range) { 1650 mutex_lock(&ctx->host_huge_va_range->lock); 1651 clear_va_list_locked(hdev, &ctx->host_huge_va_range->list); 1652 mutex_unlock(&ctx->host_huge_va_range->lock); 1653 } 1654host_hpage_range_err: 1655 if (hdev->pmmu_huge_range) 1656 mutex_destroy(&ctx->host_huge_va_range->lock); 1657 mutex_lock(&ctx->host_va_range->lock); 1658 clear_va_list_locked(hdev, &ctx->host_va_range->list); 1659 mutex_unlock(&ctx->host_va_range->lock); 1660host_page_range_err: 1661 mutex_destroy(&ctx->host_va_range->lock); 1662 mutex_destroy(&ctx->mem_hash_lock); 1663 hl_mmu_ctx_fini(ctx); 1664mmu_ctx_err: 1665 kfree(ctx->dram_va_range); 1666dram_va_range_err: 1667 kfree(ctx->host_huge_va_range); 1668host_huge_va_range_err: 1669 kfree(ctx->host_va_range); 1670 1671 return rc; 1672} 1673 1674int hl_vm_ctx_init(struct hl_ctx *ctx) 1675{ 1676 struct asic_fixed_properties *prop = &ctx->hdev->asic_prop; 1677 u64 host_range_start, host_range_end, host_huge_range_start, 1678 host_huge_range_end, dram_range_start, dram_range_end; 1679 1680 atomic64_set(&ctx->dram_phys_mem, 0); 1681 1682 /* 1683 * - If MMU is enabled, init the ranges as usual. 1684 * - If MMU is disabled, in case of host mapping, the returned address 1685 * is the given one. 1686 * In case of DRAM mapping, the returned address is the physical 1687 * address of the memory related to the given handle. 1688 */ 1689 if (ctx->hdev->mmu_enable) { 1690 dram_range_start = prop->dmmu.start_addr; 1691 dram_range_end = prop->dmmu.end_addr; 1692 host_range_start = prop->pmmu.start_addr; 1693 host_range_end = prop->pmmu.end_addr; 1694 host_huge_range_start = prop->pmmu_huge.start_addr; 1695 host_huge_range_end = prop->pmmu_huge.end_addr; 1696 } else { 1697 dram_range_start = prop->dram_user_base_address; 1698 dram_range_end = prop->dram_end_address; 1699 host_range_start = prop->dram_user_base_address; 1700 host_range_end = prop->dram_end_address; 1701 host_huge_range_start = prop->dram_user_base_address; 1702 host_huge_range_end = prop->dram_end_address; 1703 } 1704 1705 return vm_ctx_init_with_ranges(ctx, host_range_start, host_range_end, 1706 host_huge_range_start, 1707 host_huge_range_end, 1708 dram_range_start, 1709 dram_range_end); 1710} 1711 1712/* 1713 * hl_vm_ctx_fini - virtual memory teardown of context 1714 * 1715 * @ctx : pointer to the habanalabs context structure 1716 * 1717 * This function perform teardown the following: 1718 * - Virtual block list of available virtual memory 1719 * - Virtual address to area descriptor hashtable 1720 * - MMU for context 1721 * 1722 * In addition this function does the following: 1723 * - Unmaps the existing hashtable nodes if the hashtable is not empty. The 1724 * hashtable should be empty as no valid mappings should exist at this 1725 * point. 1726 * - Frees any existing physical page list from the idr which relates to the 1727 * current context asid. 1728 * - This function checks the virtual block list for correctness. At this point 1729 * the list should contain one element which describes the whole virtual 1730 * memory range of the context. Otherwise, a warning is printed. 1731 */ 1732void hl_vm_ctx_fini(struct hl_ctx *ctx) 1733{ 1734 struct hl_device *hdev = ctx->hdev; 1735 struct hl_vm *vm = &hdev->vm; 1736 struct hl_vm_phys_pg_pack *phys_pg_list; 1737 struct hl_vm_hash_node *hnode; 1738 struct hlist_node *tmp_node; 1739 int i; 1740 1741 hl_debugfs_remove_ctx_mem_hash(hdev, ctx); 1742 1743 /* 1744 * Clearly something went wrong on hard reset so no point in printing 1745 * another side effect error 1746 */ 1747 if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash)) 1748 dev_notice(hdev->dev, 1749 "user released device without removing its memory mappings\n"); 1750 1751 hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) { 1752 dev_dbg(hdev->dev, 1753 "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n", 1754 hnode->vaddr, ctx->asid); 1755 unmap_device_va(ctx, hnode->vaddr, true); 1756 } 1757 1758 /* invalidate the cache once after the unmapping loop */ 1759 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); 1760 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK); 1761 1762 spin_lock(&vm->idr_lock); 1763 idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i) 1764 if (phys_pg_list->asid == ctx->asid) { 1765 dev_dbg(hdev->dev, 1766 "page list 0x%px of asid %d is still alive\n", 1767 phys_pg_list, ctx->asid); 1768 atomic64_sub(phys_pg_list->total_size, 1769 &hdev->dram_used_mem); 1770 free_phys_pg_pack(hdev, phys_pg_list); 1771 idr_remove(&vm->phys_pg_pack_handles, i); 1772 } 1773 spin_unlock(&vm->idr_lock); 1774 1775 va_range_fini(hdev, ctx->dram_va_range); 1776 if (hdev->pmmu_huge_range) 1777 va_range_fini(hdev, ctx->host_huge_va_range); 1778 va_range_fini(hdev, ctx->host_va_range); 1779 1780 mutex_destroy(&ctx->mem_hash_lock); 1781 hl_mmu_ctx_fini(ctx); 1782} 1783 1784/* 1785 * hl_vm_init - initialize virtual memory module 1786 * 1787 * @hdev : pointer to the habanalabs device structure 1788 * 1789 * This function initializes the following: 1790 * - MMU module 1791 * - DRAM physical pages pool of 2MB 1792 * - Idr for device memory allocation handles 1793 */ 1794int hl_vm_init(struct hl_device *hdev) 1795{ 1796 struct asic_fixed_properties *prop = &hdev->asic_prop; 1797 struct hl_vm *vm = &hdev->vm; 1798 int rc; 1799 1800 vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1); 1801 if (!vm->dram_pg_pool) { 1802 dev_err(hdev->dev, "Failed to create dram page pool\n"); 1803 return -ENOMEM; 1804 } 1805 1806 kref_init(&vm->dram_pg_pool_refcount); 1807 1808 rc = gen_pool_add(vm->dram_pg_pool, prop->dram_user_base_address, 1809 prop->dram_end_address - prop->dram_user_base_address, 1810 -1); 1811 1812 if (rc) { 1813 dev_err(hdev->dev, 1814 "Failed to add memory to dram page pool %d\n", rc); 1815 goto pool_add_err; 1816 } 1817 1818 spin_lock_init(&vm->idr_lock); 1819 idr_init(&vm->phys_pg_pack_handles); 1820 1821 atomic64_set(&hdev->dram_used_mem, 0); 1822 1823 vm->init_done = true; 1824 1825 return 0; 1826 1827pool_add_err: 1828 gen_pool_destroy(vm->dram_pg_pool); 1829 1830 return rc; 1831} 1832 1833/* 1834 * hl_vm_fini - virtual memory module teardown 1835 * 1836 * @hdev : pointer to the habanalabs device structure 1837 * 1838 * This function perform teardown to the following: 1839 * - Idr for device memory allocation handles 1840 * - DRAM physical pages pool of 2MB 1841 * - MMU module 1842 */ 1843void hl_vm_fini(struct hl_device *hdev) 1844{ 1845 struct hl_vm *vm = &hdev->vm; 1846 1847 if (!vm->init_done) 1848 return; 1849 1850 /* 1851 * At this point all the contexts should be freed and hence no DRAM 1852 * memory should be in use. Hence the DRAM pool should be freed here. 1853 */ 1854 if (kref_put(&vm->dram_pg_pool_refcount, dram_pg_pool_do_release) != 1) 1855 dev_warn(hdev->dev, "dram_pg_pool was not destroyed on %s\n", 1856 __func__); 1857 1858 vm->init_done = false; 1859} 1860