1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include <uapi/misc/habanalabs.h> 9#include "habanalabs.h" 10 11#include <linux/mm.h> 12#include <linux/slab.h> 13#include <linux/uaccess.h> 14#include <linux/genalloc.h> 15 16static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) 17{ 18 struct hl_device *hdev = ctx->hdev; 19 struct asic_fixed_properties *prop = &hdev->asic_prop; 20 struct hl_vm_va_block *va_block, *tmp; 21 dma_addr_t bus_addr; 22 u64 virt_addr; 23 u32 page_size = prop->pmmu.page_size; 24 s32 offset; 25 int rc; 26 27 if (!hdev->supports_cb_mapping) { 28 dev_err_ratelimited(hdev->dev, 29 "Cannot map CB because no VA range is allocated for CB mapping\n"); 30 return -EINVAL; 31 } 32 33 if (!hdev->mmu_enable) { 34 dev_err_ratelimited(hdev->dev, 35 "Cannot map CB because MMU is disabled\n"); 36 return -EINVAL; 37 } 38 39 INIT_LIST_HEAD(&cb->va_block_list); 40 41 for (bus_addr = cb->bus_address; 42 bus_addr < cb->bus_address + cb->size; 43 bus_addr += page_size) { 44 45 virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size); 46 if (!virt_addr) { 47 dev_err(hdev->dev, 48 "Failed to allocate device virtual address for CB\n"); 49 rc = -ENOMEM; 50 goto err_va_pool_free; 51 } 52 53 va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); 54 if (!va_block) { 55 rc = -ENOMEM; 56 gen_pool_free(ctx->cb_va_pool, virt_addr, page_size); 57 goto err_va_pool_free; 58 } 59 60 va_block->start = virt_addr; 61 va_block->end = virt_addr + page_size; 62 va_block->size = page_size; 63 list_add_tail(&va_block->node, &cb->va_block_list); 64 } 65 66 mutex_lock(&ctx->mmu_lock); 67 68 bus_addr = cb->bus_address; 69 offset = 0; 70 list_for_each_entry(va_block, &cb->va_block_list, node) { 71 rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size, 72 list_is_last(&va_block->node, 73 &cb->va_block_list)); 74 if (rc) { 75 dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", 76 va_block->start); 77 goto err_va_umap; 78 } 79 80 bus_addr += va_block->size; 81 offset += va_block->size; 82 } 83 84 hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR); 85 86 mutex_unlock(&ctx->mmu_lock); 87 88 cb->is_mmu_mapped = true; 89 90 return 0; 91 92err_va_umap: 93 list_for_each_entry(va_block, &cb->va_block_list, node) { 94 if (offset <= 0) 95 break; 96 hl_mmu_unmap(ctx, va_block->start, va_block->size, 97 offset <= va_block->size); 98 offset -= va_block->size; 99 } 100 101 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); 102 103 mutex_unlock(&ctx->mmu_lock); 104 105err_va_pool_free: 106 list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { 107 gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); 108 list_del(&va_block->node); 109 kfree(va_block); 110 } 111 112 return rc; 113} 114 115static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) 116{ 117 struct hl_device *hdev = ctx->hdev; 118 struct hl_vm_va_block *va_block, *tmp; 119 120 mutex_lock(&ctx->mmu_lock); 121 122 list_for_each_entry(va_block, &cb->va_block_list, node) 123 if (hl_mmu_unmap(ctx, va_block->start, va_block->size, 124 list_is_last(&va_block->node, 125 &cb->va_block_list))) 126 dev_warn_ratelimited(hdev->dev, 127 "Failed to unmap CB's va 0x%llx\n", 128 va_block->start); 129 130 hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR); 131 132 mutex_unlock(&ctx->mmu_lock); 133 134 list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { 135 gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); 136 list_del(&va_block->node); 137 kfree(va_block); 138 } 139} 140 141static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) 142{ 143 if (cb->is_internal) 144 gen_pool_free(hdev->internal_cb_pool, 145 (uintptr_t)cb->kernel_address, cb->size); 146 else 147 hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size, 148 cb->kernel_address, cb->bus_address); 149 150 kfree(cb); 151} 152 153static void cb_do_release(struct hl_device *hdev, struct hl_cb *cb) 154{ 155 if (cb->is_pool) { 156 spin_lock(&hdev->cb_pool_lock); 157 list_add(&cb->pool_list, &hdev->cb_pool); 158 spin_unlock(&hdev->cb_pool_lock); 159 } else { 160 cb_fini(hdev, cb); 161 } 162} 163 164static void cb_release(struct kref *ref) 165{ 166 struct hl_device *hdev; 167 struct hl_cb *cb; 168 169 cb = container_of(ref, struct hl_cb, refcount); 170 hdev = cb->hdev; 171 172 hl_debugfs_remove_cb(cb); 173 174 if (cb->is_mmu_mapped) 175 cb_unmap_mem(cb->ctx, cb); 176 177 hl_ctx_put(cb->ctx); 178 179 cb_do_release(hdev, cb); 180} 181 182static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size, 183 int ctx_id, bool internal_cb) 184{ 185 struct hl_cb *cb; 186 u32 cb_offset; 187 void *p; 188 189 /* 190 * We use of GFP_ATOMIC here because this function can be called from 191 * the latency-sensitive code path for command submission. Due to H/W 192 * limitations in some of the ASICs, the kernel must copy the user CB 193 * that is designated for an external queue and actually enqueue 194 * the kernel's copy. Hence, we must never sleep in this code section 195 * and must use GFP_ATOMIC for all memory allocations. 196 */ 197 if (ctx_id == HL_KERNEL_ASID_ID) 198 cb = kzalloc(sizeof(*cb), GFP_ATOMIC); 199 else 200 cb = kzalloc(sizeof(*cb), GFP_KERNEL); 201 202 if (!cb) 203 return NULL; 204 205 if (internal_cb) { 206 p = (void *) gen_pool_alloc(hdev->internal_cb_pool, cb_size); 207 if (!p) { 208 kfree(cb); 209 return NULL; 210 } 211 212 cb_offset = p - hdev->internal_cb_pool_virt_addr; 213 cb->is_internal = true; 214 cb->bus_address = hdev->internal_cb_va_base + cb_offset; 215 } else if (ctx_id == HL_KERNEL_ASID_ID) { 216 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, 217 &cb->bus_address, GFP_ATOMIC); 218 } else { 219 p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size, 220 &cb->bus_address, 221 GFP_USER | __GFP_ZERO); 222 } 223 224 if (!p) { 225 dev_err(hdev->dev, 226 "failed to allocate %d of dma memory for CB\n", 227 cb_size); 228 kfree(cb); 229 return NULL; 230 } 231 232 cb->kernel_address = p; 233 cb->size = cb_size; 234 235 return cb; 236} 237 238int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr, 239 struct hl_ctx *ctx, u32 cb_size, bool internal_cb, 240 bool map_cb, u64 *handle) 241{ 242 struct hl_cb *cb; 243 bool alloc_new_cb = true; 244 int rc, ctx_id = ctx->asid; 245 246 /* 247 * Can't use generic function to check this because of special case 248 * where we create a CB as part of the reset process 249 */ 250 if ((hdev->disabled) || ((atomic_read(&hdev->in_reset)) && 251 (ctx_id != HL_KERNEL_ASID_ID))) { 252 dev_warn_ratelimited(hdev->dev, 253 "Device is disabled or in reset. Can't create new CBs\n"); 254 rc = -EBUSY; 255 goto out_err; 256 } 257 258 if (cb_size > SZ_2M) { 259 dev_err(hdev->dev, "CB size %d must be less than %d\n", 260 cb_size, SZ_2M); 261 rc = -EINVAL; 262 goto out_err; 263 } 264 265 if (!internal_cb) { 266 /* Minimum allocation must be PAGE SIZE */ 267 if (cb_size < PAGE_SIZE) 268 cb_size = PAGE_SIZE; 269 270 if (ctx_id == HL_KERNEL_ASID_ID && 271 cb_size <= hdev->asic_prop.cb_pool_cb_size) { 272 273 spin_lock(&hdev->cb_pool_lock); 274 if (!list_empty(&hdev->cb_pool)) { 275 cb = list_first_entry(&hdev->cb_pool, 276 typeof(*cb), pool_list); 277 list_del(&cb->pool_list); 278 spin_unlock(&hdev->cb_pool_lock); 279 alloc_new_cb = false; 280 } else { 281 spin_unlock(&hdev->cb_pool_lock); 282 dev_dbg(hdev->dev, "CB pool is empty\n"); 283 } 284 } 285 } 286 287 if (alloc_new_cb) { 288 cb = hl_cb_alloc(hdev, cb_size, ctx_id, internal_cb); 289 if (!cb) { 290 rc = -ENOMEM; 291 goto out_err; 292 } 293 } 294 295 cb->hdev = hdev; 296 cb->ctx = ctx; 297 hl_ctx_get(hdev, cb->ctx); 298 299 if (map_cb) { 300 if (ctx_id == HL_KERNEL_ASID_ID) { 301 dev_err(hdev->dev, 302 "CB mapping is not supported for kernel context\n"); 303 rc = -EINVAL; 304 goto release_cb; 305 } 306 307 rc = cb_map_mem(ctx, cb); 308 if (rc) 309 goto release_cb; 310 } 311 312 spin_lock(&mgr->cb_lock); 313 rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC); 314 spin_unlock(&mgr->cb_lock); 315 316 if (rc < 0) { 317 dev_err(hdev->dev, "Failed to allocate IDR for a new CB\n"); 318 goto unmap_mem; 319 } 320 321 cb->id = (u64) rc; 322 323 kref_init(&cb->refcount); 324 spin_lock_init(&cb->lock); 325 326 /* 327 * idr is 32-bit so we can safely OR it with a mask that is above 328 * 32 bit 329 */ 330 *handle = cb->id | HL_MMAP_TYPE_CB; 331 *handle <<= PAGE_SHIFT; 332 333 hl_debugfs_add_cb(cb); 334 335 return 0; 336 337unmap_mem: 338 if (cb->is_mmu_mapped) 339 cb_unmap_mem(cb->ctx, cb); 340release_cb: 341 hl_ctx_put(cb->ctx); 342 cb_do_release(hdev, cb); 343out_err: 344 *handle = 0; 345 346 return rc; 347} 348 349int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle) 350{ 351 struct hl_cb *cb; 352 u32 handle; 353 int rc = 0; 354 355 /* 356 * handle was given to user to do mmap, I need to shift it back to 357 * how the idr module gave it to me 358 */ 359 cb_handle >>= PAGE_SHIFT; 360 handle = (u32) cb_handle; 361 362 spin_lock(&mgr->cb_lock); 363 364 cb = idr_find(&mgr->cb_handles, handle); 365 if (cb) { 366 idr_remove(&mgr->cb_handles, handle); 367 spin_unlock(&mgr->cb_lock); 368 kref_put(&cb->refcount, cb_release); 369 } else { 370 spin_unlock(&mgr->cb_lock); 371 dev_err(hdev->dev, 372 "CB destroy failed, no match to handle 0x%x\n", handle); 373 rc = -EINVAL; 374 } 375 376 return rc; 377} 378 379int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data) 380{ 381 union hl_cb_args *args = data; 382 struct hl_device *hdev = hpriv->hdev; 383 u64 handle = 0; 384 int rc; 385 386 if (hl_device_disabled_or_in_reset(hdev)) { 387 dev_warn_ratelimited(hdev->dev, 388 "Device is %s. Can't execute CB IOCTL\n", 389 atomic_read(&hdev->in_reset) ? "in_reset" : "disabled"); 390 return -EBUSY; 391 } 392 393 switch (args->in.op) { 394 case HL_CB_OP_CREATE: 395 if (args->in.cb_size > HL_MAX_CB_SIZE) { 396 dev_err(hdev->dev, 397 "User requested CB size %d must be less than %d\n", 398 args->in.cb_size, HL_MAX_CB_SIZE); 399 rc = -EINVAL; 400 } else { 401 rc = hl_cb_create(hdev, &hpriv->cb_mgr, hpriv->ctx, 402 args->in.cb_size, false, 403 !!(args->in.flags & HL_CB_FLAGS_MAP), 404 &handle); 405 } 406 407 memset(args, 0, sizeof(*args)); 408 args->out.cb_handle = handle; 409 break; 410 411 case HL_CB_OP_DESTROY: 412 rc = hl_cb_destroy(hdev, &hpriv->cb_mgr, 413 args->in.cb_handle); 414 break; 415 416 default: 417 rc = -ENOTTY; 418 break; 419 } 420 421 return rc; 422} 423 424static void cb_vm_close(struct vm_area_struct *vma) 425{ 426 struct hl_cb *cb = (struct hl_cb *) vma->vm_private_data; 427 long new_mmap_size; 428 429 new_mmap_size = cb->mmap_size - (vma->vm_end - vma->vm_start); 430 431 if (new_mmap_size > 0) { 432 cb->mmap_size = new_mmap_size; 433 return; 434 } 435 436 spin_lock(&cb->lock); 437 cb->mmap = false; 438 spin_unlock(&cb->lock); 439 440 hl_cb_put(cb); 441 vma->vm_private_data = NULL; 442} 443 444static const struct vm_operations_struct cb_vm_ops = { 445 .close = cb_vm_close 446}; 447 448int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma) 449{ 450 struct hl_device *hdev = hpriv->hdev; 451 struct hl_cb *cb; 452 u32 handle, user_cb_size; 453 int rc; 454 455 /* We use the page offset to hold the idr and thus we need to clear 456 * it before doing the mmap itself 457 */ 458 handle = vma->vm_pgoff; 459 vma->vm_pgoff = 0; 460 461 /* reference was taken here */ 462 cb = hl_cb_get(hdev, &hpriv->cb_mgr, handle); 463 if (!cb) { 464 dev_err(hdev->dev, 465 "CB mmap failed, no match to handle 0x%x\n", handle); 466 return -EINVAL; 467 } 468 469 /* Validation check */ 470 user_cb_size = vma->vm_end - vma->vm_start; 471 if (user_cb_size != ALIGN(cb->size, PAGE_SIZE)) { 472 dev_err(hdev->dev, 473 "CB mmap failed, mmap size 0x%lx != 0x%x cb size\n", 474 vma->vm_end - vma->vm_start, cb->size); 475 rc = -EINVAL; 476 goto put_cb; 477 } 478 479 if (!access_ok((void __user *) (uintptr_t) vma->vm_start, 480 user_cb_size)) { 481 dev_err(hdev->dev, 482 "user pointer is invalid - 0x%lx\n", 483 vma->vm_start); 484 485 rc = -EINVAL; 486 goto put_cb; 487 } 488 489 spin_lock(&cb->lock); 490 491 if (cb->mmap) { 492 dev_err(hdev->dev, 493 "CB mmap failed, CB already mmaped to user\n"); 494 rc = -EINVAL; 495 goto release_lock; 496 } 497 498 cb->mmap = true; 499 500 spin_unlock(&cb->lock); 501 502 vma->vm_ops = &cb_vm_ops; 503 504 /* 505 * Note: We're transferring the cb reference to 506 * vma->vm_private_data here. 507 */ 508 509 vma->vm_private_data = cb; 510 511 rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address, 512 cb->bus_address, cb->size); 513 if (rc) { 514 spin_lock(&cb->lock); 515 cb->mmap = false; 516 goto release_lock; 517 } 518 519 cb->mmap_size = cb->size; 520 521 return 0; 522 523release_lock: 524 spin_unlock(&cb->lock); 525put_cb: 526 hl_cb_put(cb); 527 return rc; 528} 529 530struct hl_cb *hl_cb_get(struct hl_device *hdev, struct hl_cb_mgr *mgr, 531 u32 handle) 532{ 533 struct hl_cb *cb; 534 535 spin_lock(&mgr->cb_lock); 536 cb = idr_find(&mgr->cb_handles, handle); 537 538 if (!cb) { 539 spin_unlock(&mgr->cb_lock); 540 dev_warn(hdev->dev, 541 "CB get failed, no match to handle 0x%x\n", handle); 542 return NULL; 543 } 544 545 kref_get(&cb->refcount); 546 547 spin_unlock(&mgr->cb_lock); 548 549 return cb; 550 551} 552 553void hl_cb_put(struct hl_cb *cb) 554{ 555 kref_put(&cb->refcount, cb_release); 556} 557 558void hl_cb_mgr_init(struct hl_cb_mgr *mgr) 559{ 560 spin_lock_init(&mgr->cb_lock); 561 idr_init(&mgr->cb_handles); 562} 563 564void hl_cb_mgr_fini(struct hl_device *hdev, struct hl_cb_mgr *mgr) 565{ 566 struct hl_cb *cb; 567 struct idr *idp; 568 u32 id; 569 570 idp = &mgr->cb_handles; 571 572 idr_for_each_entry(idp, cb, id) { 573 if (kref_put(&cb->refcount, cb_release) != 1) 574 dev_err(hdev->dev, 575 "CB %d for CTX ID %d is still alive\n", 576 id, cb->ctx->asid); 577 } 578 579 idr_destroy(&mgr->cb_handles); 580} 581 582struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size, 583 bool internal_cb) 584{ 585 u64 cb_handle; 586 struct hl_cb *cb; 587 int rc; 588 589 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, cb_size, 590 internal_cb, false, &cb_handle); 591 if (rc) { 592 dev_err(hdev->dev, 593 "Failed to allocate CB for the kernel driver %d\n", rc); 594 return NULL; 595 } 596 597 cb_handle >>= PAGE_SHIFT; 598 cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle); 599 /* hl_cb_get should never fail here so use kernel WARN */ 600 WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle); 601 if (!cb) 602 goto destroy_cb; 603 604 return cb; 605 606destroy_cb: 607 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb_handle << PAGE_SHIFT); 608 609 return NULL; 610} 611 612int hl_cb_pool_init(struct hl_device *hdev) 613{ 614 struct hl_cb *cb; 615 int i; 616 617 INIT_LIST_HEAD(&hdev->cb_pool); 618 spin_lock_init(&hdev->cb_pool_lock); 619 620 for (i = 0 ; i < hdev->asic_prop.cb_pool_cb_cnt ; i++) { 621 cb = hl_cb_alloc(hdev, hdev->asic_prop.cb_pool_cb_size, 622 HL_KERNEL_ASID_ID, false); 623 if (cb) { 624 cb->is_pool = true; 625 list_add(&cb->pool_list, &hdev->cb_pool); 626 } else { 627 hl_cb_pool_fini(hdev); 628 return -ENOMEM; 629 } 630 } 631 632 return 0; 633} 634 635int hl_cb_pool_fini(struct hl_device *hdev) 636{ 637 struct hl_cb *cb, *tmp; 638 639 list_for_each_entry_safe(cb, tmp, &hdev->cb_pool, pool_list) { 640 list_del(&cb->pool_list); 641 cb_fini(hdev, cb); 642 } 643 644 return 0; 645} 646 647int hl_cb_va_pool_init(struct hl_ctx *ctx) 648{ 649 struct hl_device *hdev = ctx->hdev; 650 struct asic_fixed_properties *prop = &hdev->asic_prop; 651 int rc; 652 653 if (!hdev->supports_cb_mapping) 654 return 0; 655 656 ctx->cb_va_pool = gen_pool_create(__ffs(prop->pmmu.page_size), -1); 657 if (!ctx->cb_va_pool) { 658 dev_err(hdev->dev, 659 "Failed to create VA gen pool for CB mapping\n"); 660 return -ENOMEM; 661 } 662 663 rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, 664 prop->cb_va_end_addr - prop->cb_va_start_addr, -1); 665 if (rc) { 666 dev_err(hdev->dev, 667 "Failed to add memory to VA gen pool for CB mapping\n"); 668 goto err_pool_destroy; 669 } 670 671 return 0; 672 673err_pool_destroy: 674 gen_pool_destroy(ctx->cb_va_pool); 675 676 return rc; 677} 678 679void hl_cb_va_pool_fini(struct hl_ctx *ctx) 680{ 681 struct hl_device *hdev = ctx->hdev; 682 683 if (!hdev->supports_cb_mapping) 684 return; 685 686 gen_pool_destroy(ctx->cb_va_pool); 687} 688