1// SPDX-License-Identifier: GPL-2.0 2 3/* 4 * Copyright 2016-2019 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8#include "habanalabs.h" 9#include "../include/hw_ip/mmu/mmu_general.h" 10 11#include <linux/genalloc.h> 12#include <linux/slab.h> 13 14static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); 15 16static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) 17{ 18 struct pgt_info *pgt_info = NULL; 19 20 hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, 21 (unsigned long) hop_addr) 22 if (hop_addr == pgt_info->shadow_addr) 23 break; 24 25 return pgt_info; 26} 27 28static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) 29{ 30 struct hl_device *hdev = ctx->hdev; 31 32 gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr, 33 hdev->asic_prop.mmu_hop_table_size); 34 hash_del(&pgt_info->node); 35 kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); 36 kfree(pgt_info); 37} 38 39static void free_hop(struct hl_ctx *ctx, u64 hop_addr) 40{ 41 struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); 42 43 _free_hop(ctx, pgt_info); 44} 45 46static u64 alloc_hop(struct hl_ctx *ctx) 47{ 48 struct hl_device *hdev = ctx->hdev; 49 struct asic_fixed_properties *prop = &hdev->asic_prop; 50 struct pgt_info *pgt_info; 51 u64 phys_addr, shadow_addr; 52 53 pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); 54 if (!pgt_info) 55 return ULLONG_MAX; 56 57 phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool, 58 prop->mmu_hop_table_size); 59 if (!phys_addr) { 60 dev_err(hdev->dev, "failed to allocate page\n"); 61 goto pool_add_err; 62 } 63 64 shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, 65 GFP_KERNEL); 66 if (!shadow_addr) 67 goto shadow_err; 68 69 pgt_info->phys_addr = phys_addr; 70 pgt_info->shadow_addr = shadow_addr; 71 pgt_info->ctx = ctx; 72 pgt_info->num_of_ptes = 0; 73 hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); 74 75 return shadow_addr; 76 77shadow_err: 78 gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr, 79 prop->mmu_hop_table_size); 80pool_add_err: 81 kfree(pgt_info); 82 83 return ULLONG_MAX; 84} 85 86static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) 87{ 88 return ctx->hdev->asic_prop.mmu_pgt_addr + 89 (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); 90} 91 92static inline u64 get_hop0_addr(struct hl_ctx *ctx) 93{ 94 return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 + 95 (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); 96} 97 98static void flush(struct hl_ctx *ctx) 99{ 100 /* flush all writes from all cores to reach PCI */ 101 mb(); 102 ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); 103} 104 105/* transform the value to physical address when writing to H/W */ 106static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) 107{ 108 /* 109 * The value to write is actually the address of the next shadow hop + 110 * flags at the 12 LSBs. 111 * Hence in order to get the value to write to the physical PTE, we 112 * clear the 12 LSBs and translate the shadow hop to its associated 113 * physical hop, and add back the original 12 LSBs. 114 */ 115 u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | 116 (val & FLAGS_MASK); 117 118 ctx->hdev->asic_funcs->write_pte(ctx->hdev, 119 get_phys_addr(ctx, shadow_pte_addr), 120 phys_val); 121 122 *(u64 *) (uintptr_t) shadow_pte_addr = val; 123} 124 125/* do not transform the value to physical address when writing to H/W */ 126static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, 127 u64 val) 128{ 129 ctx->hdev->asic_funcs->write_pte(ctx->hdev, 130 get_phys_addr(ctx, shadow_pte_addr), 131 val); 132 *(u64 *) (uintptr_t) shadow_pte_addr = val; 133} 134 135/* clear the last and present bits */ 136static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) 137{ 138 /* no need to transform the value to physical address */ 139 write_final_pte(ctx, pte_addr, 0); 140} 141 142static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) 143{ 144 get_pgt_info(ctx, hop_addr)->num_of_ptes++; 145} 146 147/* 148 * put_pte - decrement the num of ptes and free the hop if possible 149 * 150 * @ctx: pointer to the context structure 151 * @hop_addr: addr of the hop 152 * 153 * This function returns the number of ptes left on this hop. If the number is 154 * 0, it means the pte was freed. 155 */ 156static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) 157{ 158 struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); 159 int num_of_ptes_left; 160 161 pgt_info->num_of_ptes--; 162 163 /* 164 * Need to save the number of ptes left because free_hop might free 165 * the pgt_info 166 */ 167 num_of_ptes_left = pgt_info->num_of_ptes; 168 if (!num_of_ptes_left) 169 _free_hop(ctx, pgt_info); 170 171 return num_of_ptes_left; 172} 173 174static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, 175 u64 virt_addr, u64 mask, u64 shift) 176{ 177 return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * 178 ((virt_addr & mask) >> shift); 179} 180 181static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, 182 struct hl_mmu_properties *mmu_prop, 183 u64 hop_addr, u64 vaddr) 184{ 185 return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, 186 mmu_prop->hop0_shift); 187} 188 189static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, 190 struct hl_mmu_properties *mmu_prop, 191 u64 hop_addr, u64 vaddr) 192{ 193 return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, 194 mmu_prop->hop1_shift); 195} 196 197static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, 198 struct hl_mmu_properties *mmu_prop, 199 u64 hop_addr, u64 vaddr) 200{ 201 return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, 202 mmu_prop->hop2_shift); 203} 204 205static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, 206 struct hl_mmu_properties *mmu_prop, 207 u64 hop_addr, u64 vaddr) 208{ 209 return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, 210 mmu_prop->hop3_shift); 211} 212 213static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, 214 struct hl_mmu_properties *mmu_prop, 215 u64 hop_addr, u64 vaddr) 216{ 217 return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, 218 mmu_prop->hop4_shift); 219} 220 221static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) 222{ 223 if (curr_pte & PAGE_PRESENT_MASK) 224 return curr_pte & HOP_PHYS_ADDR_MASK; 225 else 226 return ULLONG_MAX; 227} 228 229static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, 230 bool *is_new_hop) 231{ 232 u64 hop_addr = get_next_hop_addr(ctx, curr_pte); 233 234 if (hop_addr == ULLONG_MAX) { 235 hop_addr = alloc_hop(ctx); 236 *is_new_hop = (hop_addr != ULLONG_MAX); 237 } 238 239 return hop_addr; 240} 241 242/* translates shadow address inside hop to a physical address */ 243static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) 244{ 245 u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); 246 u64 shadow_hop_addr = shadow_addr & ~page_mask; 247 u64 pte_offset = shadow_addr & page_mask; 248 u64 phys_hop_addr; 249 250 if (shadow_hop_addr != get_hop0_addr(ctx)) 251 phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; 252 else 253 phys_hop_addr = get_phys_hop0_addr(ctx); 254 255 return phys_hop_addr + pte_offset; 256} 257 258static int dram_default_mapping_init(struct hl_ctx *ctx) 259{ 260 struct hl_device *hdev = ctx->hdev; 261 struct asic_fixed_properties *prop = &hdev->asic_prop; 262 u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, 263 hop2_pte_addr, hop3_pte_addr, pte_val; 264 int rc, i, j, hop3_allocated = 0; 265 266 if ((!hdev->dram_supports_virtual_memory) || 267 (!hdev->dram_default_page_mapping) || 268 (ctx->asid == HL_KERNEL_ASID_ID)) 269 return 0; 270 271 num_of_hop3 = prop->dram_size_for_default_page_mapping; 272 do_div(num_of_hop3, prop->dram_page_size); 273 do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); 274 275 /* add hop1 and hop2 */ 276 total_hops = num_of_hop3 + 2; 277 278 ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); 279 if (!ctx->dram_default_hops) 280 return -ENOMEM; 281 282 hop0_addr = get_hop0_addr(ctx); 283 284 hop1_addr = alloc_hop(ctx); 285 if (hop1_addr == ULLONG_MAX) { 286 dev_err(hdev->dev, "failed to alloc hop 1\n"); 287 rc = -ENOMEM; 288 goto hop1_err; 289 } 290 291 ctx->dram_default_hops[total_hops - 1] = hop1_addr; 292 293 hop2_addr = alloc_hop(ctx); 294 if (hop2_addr == ULLONG_MAX) { 295 dev_err(hdev->dev, "failed to alloc hop 2\n"); 296 rc = -ENOMEM; 297 goto hop2_err; 298 } 299 300 ctx->dram_default_hops[total_hops - 2] = hop2_addr; 301 302 for (i = 0 ; i < num_of_hop3 ; i++) { 303 ctx->dram_default_hops[i] = alloc_hop(ctx); 304 if (ctx->dram_default_hops[i] == ULLONG_MAX) { 305 dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); 306 rc = -ENOMEM; 307 goto hop3_err; 308 } 309 hop3_allocated++; 310 } 311 312 /* need only pte 0 in hops 0 and 1 */ 313 pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; 314 write_pte(ctx, hop0_addr, pte_val); 315 316 pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; 317 write_pte(ctx, hop1_addr, pte_val); 318 get_pte(ctx, hop1_addr); 319 320 hop2_pte_addr = hop2_addr; 321 for (i = 0 ; i < num_of_hop3 ; i++) { 322 pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | 323 PAGE_PRESENT_MASK; 324 write_pte(ctx, hop2_pte_addr, pte_val); 325 get_pte(ctx, hop2_addr); 326 hop2_pte_addr += HL_PTE_SIZE; 327 } 328 329 pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | 330 LAST_MASK | PAGE_PRESENT_MASK; 331 332 for (i = 0 ; i < num_of_hop3 ; i++) { 333 hop3_pte_addr = ctx->dram_default_hops[i]; 334 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { 335 write_final_pte(ctx, hop3_pte_addr, pte_val); 336 get_pte(ctx, ctx->dram_default_hops[i]); 337 hop3_pte_addr += HL_PTE_SIZE; 338 } 339 } 340 341 flush(ctx); 342 343 return 0; 344 345hop3_err: 346 for (i = 0 ; i < hop3_allocated ; i++) 347 free_hop(ctx, ctx->dram_default_hops[i]); 348 349 free_hop(ctx, hop2_addr); 350hop2_err: 351 free_hop(ctx, hop1_addr); 352hop1_err: 353 kfree(ctx->dram_default_hops); 354 355 return rc; 356} 357 358static void dram_default_mapping_fini(struct hl_ctx *ctx) 359{ 360 struct hl_device *hdev = ctx->hdev; 361 struct asic_fixed_properties *prop = &hdev->asic_prop; 362 u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, 363 hop2_pte_addr, hop3_pte_addr; 364 int i, j; 365 366 if ((!hdev->dram_supports_virtual_memory) || 367 (!hdev->dram_default_page_mapping) || 368 (ctx->asid == HL_KERNEL_ASID_ID)) 369 return; 370 371 num_of_hop3 = prop->dram_size_for_default_page_mapping; 372 do_div(num_of_hop3, prop->dram_page_size); 373 do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); 374 375 hop0_addr = get_hop0_addr(ctx); 376 /* add hop1 and hop2 */ 377 total_hops = num_of_hop3 + 2; 378 hop1_addr = ctx->dram_default_hops[total_hops - 1]; 379 hop2_addr = ctx->dram_default_hops[total_hops - 2]; 380 381 for (i = 0 ; i < num_of_hop3 ; i++) { 382 hop3_pte_addr = ctx->dram_default_hops[i]; 383 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { 384 clear_pte(ctx, hop3_pte_addr); 385 put_pte(ctx, ctx->dram_default_hops[i]); 386 hop3_pte_addr += HL_PTE_SIZE; 387 } 388 } 389 390 hop2_pte_addr = hop2_addr; 391 hop2_pte_addr = hop2_addr; 392 for (i = 0 ; i < num_of_hop3 ; i++) { 393 clear_pte(ctx, hop2_pte_addr); 394 put_pte(ctx, hop2_addr); 395 hop2_pte_addr += HL_PTE_SIZE; 396 } 397 398 clear_pte(ctx, hop1_addr); 399 put_pte(ctx, hop1_addr); 400 clear_pte(ctx, hop0_addr); 401 402 kfree(ctx->dram_default_hops); 403 404 flush(ctx); 405} 406 407/** 408 * hl_mmu_v1_init() - initialize the MMU module. 409 * @hdev: habanalabs device structure. 410 * 411 * This function does the following: 412 * - Create a pool of pages for pgt_infos. 413 * - Create a shadow table for pgt 414 * 415 * Return: 0 for success, non-zero for failure. 416 */ 417static int hl_mmu_v1_init(struct hl_device *hdev) 418{ 419 struct asic_fixed_properties *prop = &hdev->asic_prop; 420 int rc; 421 422 hdev->mmu_priv.mmu_pgt_pool = 423 gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); 424 425 if (!hdev->mmu_priv.mmu_pgt_pool) { 426 dev_err(hdev->dev, "Failed to create page gen pool\n"); 427 return -ENOMEM; 428 } 429 430 rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr + 431 prop->mmu_hop0_tables_total_size, 432 prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, 433 -1); 434 if (rc) { 435 dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); 436 goto err_pool_add; 437 } 438 439 hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, 440 prop->mmu_hop_table_size, 441 GFP_KERNEL | __GFP_ZERO); 442 if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) { 443 rc = -ENOMEM; 444 goto err_pool_add; 445 } 446 447 /* MMU H/W init will be done in device hw_init() */ 448 449 return 0; 450 451err_pool_add: 452 gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); 453 454 return rc; 455} 456 457/** 458 * hl_mmu_fini() - release the MMU module. 459 * @hdev: habanalabs device structure. 460 * 461 * This function does the following: 462 * - Disable MMU in H/W. 463 * - Free the pgt_infos pool. 464 * 465 * All contexts should be freed before calling this function. 466 */ 467static void hl_mmu_v1_fini(struct hl_device *hdev) 468{ 469 /* MMU H/W fini was already done in device hw_fini() */ 470 471 kvfree(hdev->mmu_priv.mmu_shadow_hop0); 472 gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); 473} 474 475/** 476 * hl_mmu_ctx_init() - initialize a context for using the MMU module. 477 * @ctx: pointer to the context structure to initialize. 478 * 479 * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all 480 * page tables hops related to this context. 481 * Return: 0 on success, non-zero otherwise. 482 */ 483static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx) 484{ 485 mutex_init(&ctx->mmu_lock); 486 hash_init(ctx->mmu_shadow_hash); 487 488 return dram_default_mapping_init(ctx); 489} 490 491/* 492 * hl_mmu_ctx_fini - disable a ctx from using the mmu module 493 * 494 * @ctx: pointer to the context structure 495 * 496 * This function does the following: 497 * - Free any pgts which were not freed yet 498 * - Free the mutex 499 * - Free DRAM default page mapping hops 500 */ 501static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) 502{ 503 struct hl_device *hdev = ctx->hdev; 504 struct pgt_info *pgt_info; 505 struct hlist_node *tmp; 506 int i; 507 508 dram_default_mapping_fini(ctx); 509 510 if (!hash_empty(ctx->mmu_shadow_hash)) 511 dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", 512 ctx->asid); 513 514 hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { 515 dev_err_ratelimited(hdev->dev, 516 "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", 517 pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); 518 _free_hop(ctx, pgt_info); 519 } 520 521 mutex_destroy(&ctx->mmu_lock); 522} 523 524static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, 525 u64 virt_addr, bool is_dram_addr) 526{ 527 struct hl_device *hdev = ctx->hdev; 528 struct asic_fixed_properties *prop = &hdev->asic_prop; 529 struct hl_mmu_properties *mmu_prop; 530 u64 hop0_addr = 0, hop0_pte_addr = 0, 531 hop1_addr = 0, hop1_pte_addr = 0, 532 hop2_addr = 0, hop2_pte_addr = 0, 533 hop3_addr = 0, hop3_pte_addr = 0, 534 hop4_addr = 0, hop4_pte_addr = 0, 535 curr_pte; 536 bool is_huge, clear_hop3 = true; 537 538 /* shifts and masks are the same in PMMU and HPMMU, use one of them */ 539 mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; 540 541 hop0_addr = get_hop0_addr(ctx); 542 hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); 543 544 curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; 545 546 hop1_addr = get_next_hop_addr(ctx, curr_pte); 547 548 if (hop1_addr == ULLONG_MAX) 549 goto not_mapped; 550 551 hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); 552 553 curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; 554 555 hop2_addr = get_next_hop_addr(ctx, curr_pte); 556 557 if (hop2_addr == ULLONG_MAX) 558 goto not_mapped; 559 560 hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); 561 562 curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; 563 564 hop3_addr = get_next_hop_addr(ctx, curr_pte); 565 566 if (hop3_addr == ULLONG_MAX) 567 goto not_mapped; 568 569 hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); 570 571 curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; 572 573 is_huge = curr_pte & LAST_MASK; 574 575 if (is_dram_addr && !is_huge) { 576 dev_err(hdev->dev, 577 "DRAM unmapping should use huge pages only\n"); 578 return -EFAULT; 579 } 580 581 if (!is_huge) { 582 hop4_addr = get_next_hop_addr(ctx, curr_pte); 583 584 if (hop4_addr == ULLONG_MAX) 585 goto not_mapped; 586 587 hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, 588 virt_addr); 589 590 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; 591 592 clear_hop3 = false; 593 } 594 595 if (hdev->dram_default_page_mapping && is_dram_addr) { 596 u64 default_pte = (prop->mmu_dram_default_page_addr & 597 HOP_PHYS_ADDR_MASK) | LAST_MASK | 598 PAGE_PRESENT_MASK; 599 if (curr_pte == default_pte) { 600 dev_err(hdev->dev, 601 "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n", 602 virt_addr); 603 goto not_mapped; 604 } 605 606 if (!(curr_pte & PAGE_PRESENT_MASK)) { 607 dev_err(hdev->dev, 608 "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n", 609 virt_addr); 610 goto not_mapped; 611 } 612 613 write_final_pte(ctx, hop3_pte_addr, default_pte); 614 put_pte(ctx, hop3_addr); 615 } else { 616 if (!(curr_pte & PAGE_PRESENT_MASK)) 617 goto not_mapped; 618 619 if (hop4_addr) 620 clear_pte(ctx, hop4_pte_addr); 621 else 622 clear_pte(ctx, hop3_pte_addr); 623 624 if (hop4_addr && !put_pte(ctx, hop4_addr)) 625 clear_hop3 = true; 626 627 if (!clear_hop3) 628 goto mapped; 629 630 clear_pte(ctx, hop3_pte_addr); 631 632 if (put_pte(ctx, hop3_addr)) 633 goto mapped; 634 635 clear_pte(ctx, hop2_pte_addr); 636 637 if (put_pte(ctx, hop2_addr)) 638 goto mapped; 639 640 clear_pte(ctx, hop1_pte_addr); 641 642 if (put_pte(ctx, hop1_addr)) 643 goto mapped; 644 645 clear_pte(ctx, hop0_pte_addr); 646 } 647 648mapped: 649 return 0; 650 651not_mapped: 652 dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", 653 virt_addr); 654 655 return -EINVAL; 656} 657 658static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, 659 u32 page_size, bool is_dram_addr) 660{ 661 struct hl_device *hdev = ctx->hdev; 662 struct asic_fixed_properties *prop = &hdev->asic_prop; 663 struct hl_mmu_properties *mmu_prop; 664 u64 hop0_addr = 0, hop0_pte_addr = 0, 665 hop1_addr = 0, hop1_pte_addr = 0, 666 hop2_addr = 0, hop2_pte_addr = 0, 667 hop3_addr = 0, hop3_pte_addr = 0, 668 hop4_addr = 0, hop4_pte_addr = 0, 669 curr_pte = 0; 670 bool hop1_new = false, hop2_new = false, hop3_new = false, 671 hop4_new = false, is_huge; 672 int rc = -ENOMEM; 673 674 /* 675 * This mapping function can map a page or a huge page. For huge page 676 * there are only 3 hops rather than 4. Currently the DRAM allocation 677 * uses huge pages only but user memory could have been allocated with 678 * one of the two page sizes. Since this is a common code for all the 679 * three cases, we need this hugs page check. 680 */ 681 if (is_dram_addr) { 682 mmu_prop = &prop->dmmu; 683 is_huge = true; 684 } else if (page_size == prop->pmmu_huge.page_size) { 685 mmu_prop = &prop->pmmu_huge; 686 is_huge = true; 687 } else { 688 mmu_prop = &prop->pmmu; 689 is_huge = false; 690 } 691 692 hop0_addr = get_hop0_addr(ctx); 693 hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); 694 curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; 695 696 hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); 697 if (hop1_addr == ULLONG_MAX) 698 goto err; 699 700 hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); 701 curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; 702 703 hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); 704 if (hop2_addr == ULLONG_MAX) 705 goto err; 706 707 hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); 708 curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; 709 710 hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); 711 if (hop3_addr == ULLONG_MAX) 712 goto err; 713 714 hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); 715 curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; 716 717 if (!is_huge) { 718 hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); 719 if (hop4_addr == ULLONG_MAX) 720 goto err; 721 722 hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, 723 virt_addr); 724 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; 725 } 726 727 if (hdev->dram_default_page_mapping && is_dram_addr) { 728 u64 default_pte = (prop->mmu_dram_default_page_addr & 729 HOP_PHYS_ADDR_MASK) | LAST_MASK | 730 PAGE_PRESENT_MASK; 731 732 if (curr_pte != default_pte) { 733 dev_err(hdev->dev, 734 "DRAM: mapping already exists for virt_addr 0x%llx\n", 735 virt_addr); 736 rc = -EINVAL; 737 goto err; 738 } 739 740 if (hop1_new || hop2_new || hop3_new || hop4_new) { 741 dev_err(hdev->dev, 742 "DRAM mapping should not allocate more hops\n"); 743 rc = -EFAULT; 744 goto err; 745 } 746 } else if (curr_pte & PAGE_PRESENT_MASK) { 747 dev_err(hdev->dev, 748 "mapping already exists for virt_addr 0x%llx\n", 749 virt_addr); 750 751 dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", 752 *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); 753 dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", 754 *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); 755 dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", 756 *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); 757 dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", 758 *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); 759 760 if (!is_huge) 761 dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", 762 *(u64 *) (uintptr_t) hop4_pte_addr, 763 hop4_pte_addr); 764 765 rc = -EINVAL; 766 goto err; 767 } 768 769 curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK 770 | PAGE_PRESENT_MASK; 771 772 if (is_huge) 773 write_final_pte(ctx, hop3_pte_addr, curr_pte); 774 else 775 write_final_pte(ctx, hop4_pte_addr, curr_pte); 776 777 if (hop1_new) { 778 curr_pte = 779 (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; 780 write_pte(ctx, hop0_pte_addr, curr_pte); 781 } 782 if (hop2_new) { 783 curr_pte = 784 (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; 785 write_pte(ctx, hop1_pte_addr, curr_pte); 786 get_pte(ctx, hop1_addr); 787 } 788 if (hop3_new) { 789 curr_pte = 790 (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; 791 write_pte(ctx, hop2_pte_addr, curr_pte); 792 get_pte(ctx, hop2_addr); 793 } 794 795 if (!is_huge) { 796 if (hop4_new) { 797 curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | 798 PAGE_PRESENT_MASK; 799 write_pte(ctx, hop3_pte_addr, curr_pte); 800 get_pte(ctx, hop3_addr); 801 } 802 803 get_pte(ctx, hop4_addr); 804 } else { 805 get_pte(ctx, hop3_addr); 806 } 807 808 return 0; 809 810err: 811 if (hop4_new) 812 free_hop(ctx, hop4_addr); 813 if (hop3_new) 814 free_hop(ctx, hop3_addr); 815 if (hop2_new) 816 free_hop(ctx, hop2_addr); 817 if (hop1_new) 818 free_hop(ctx, hop1_addr); 819 820 return rc; 821} 822 823/* 824 * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out 825 * 826 * @ctx: pointer to the context structure 827 * 828 */ 829static void hl_mmu_v1_swap_out(struct hl_ctx *ctx) 830{ 831 832} 833 834/* 835 * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in 836 * 837 * @ctx: pointer to the context structure 838 * 839 */ 840static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) 841{ 842 843} 844 845/* 846 * hl_mmu_v1_prepare - prepare mmu for working with mmu v1 847 * 848 * @hdev: pointer to the device structure 849 */ 850void hl_mmu_v1_set_funcs(struct hl_device *hdev) 851{ 852 struct hl_mmu_funcs *mmu = &hdev->mmu_func; 853 854 mmu->init = hl_mmu_v1_init; 855 mmu->fini = hl_mmu_v1_fini; 856 mmu->ctx_init = hl_mmu_v1_ctx_init; 857 mmu->ctx_fini = hl_mmu_v1_ctx_fini; 858 mmu->map = _hl_mmu_v1_map; 859 mmu->unmap = _hl_mmu_v1_unmap; 860 mmu->flush = flush; 861 mmu->swap_out = hl_mmu_v1_swap_out; 862 mmu->swap_in = hl_mmu_v1_swap_in; 863} 864