162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright(c) 2020 Cornelis Networks, Inc. 462306a36Sopenharmony_ci * Copyright(c) 2016 - 2017 Intel Corporation. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#include <linux/list.h> 862306a36Sopenharmony_ci#include <linux/rculist.h> 962306a36Sopenharmony_ci#include <linux/mmu_notifier.h> 1062306a36Sopenharmony_ci#include <linux/interval_tree_generic.h> 1162306a36Sopenharmony_ci#include <linux/sched/mm.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include "mmu_rb.h" 1462306a36Sopenharmony_ci#include "trace.h" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_cistatic unsigned long mmu_node_start(struct mmu_rb_node *); 1762306a36Sopenharmony_cistatic unsigned long mmu_node_last(struct mmu_rb_node *); 1862306a36Sopenharmony_cistatic int mmu_notifier_range_start(struct mmu_notifier *, 1962306a36Sopenharmony_ci const struct mmu_notifier_range *); 2062306a36Sopenharmony_cistatic struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *, 2162306a36Sopenharmony_ci unsigned long, unsigned long); 2262306a36Sopenharmony_cistatic void release_immediate(struct kref *refcount); 2362306a36Sopenharmony_cistatic void handle_remove(struct work_struct *work); 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_cistatic const struct mmu_notifier_ops mn_opts = { 2662306a36Sopenharmony_ci .invalidate_range_start = mmu_notifier_range_start, 2762306a36Sopenharmony_ci}; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ciINTERVAL_TREE_DEFINE(struct mmu_rb_node, node, unsigned long, __last, 3062306a36Sopenharmony_ci mmu_node_start, mmu_node_last, static, __mmu_int_rb); 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_cistatic unsigned long mmu_node_start(struct mmu_rb_node *node) 3362306a36Sopenharmony_ci{ 3462306a36Sopenharmony_ci return node->addr & PAGE_MASK; 3562306a36Sopenharmony_ci} 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_cistatic unsigned long mmu_node_last(struct mmu_rb_node *node) 3862306a36Sopenharmony_ci{ 3962306a36Sopenharmony_ci return PAGE_ALIGN(node->addr + node->len) - 1; 4062306a36Sopenharmony_ci} 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ciint hfi1_mmu_rb_register(void *ops_arg, 4362306a36Sopenharmony_ci struct mmu_rb_ops *ops, 4462306a36Sopenharmony_ci struct workqueue_struct *wq, 4562306a36Sopenharmony_ci struct mmu_rb_handler **handler) 4662306a36Sopenharmony_ci{ 4762306a36Sopenharmony_ci struct mmu_rb_handler *h; 4862306a36Sopenharmony_ci void *free_ptr; 4962306a36Sopenharmony_ci int ret; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci free_ptr = kzalloc(sizeof(*h) + cache_line_size() - 1, GFP_KERNEL); 5262306a36Sopenharmony_ci if (!free_ptr) 5362306a36Sopenharmony_ci return -ENOMEM; 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci h = PTR_ALIGN(free_ptr, cache_line_size()); 5662306a36Sopenharmony_ci h->root = RB_ROOT_CACHED; 5762306a36Sopenharmony_ci h->ops = ops; 5862306a36Sopenharmony_ci h->ops_arg = ops_arg; 5962306a36Sopenharmony_ci INIT_HLIST_NODE(&h->mn.hlist); 6062306a36Sopenharmony_ci spin_lock_init(&h->lock); 6162306a36Sopenharmony_ci h->mn.ops = &mn_opts; 6262306a36Sopenharmony_ci INIT_WORK(&h->del_work, handle_remove); 6362306a36Sopenharmony_ci INIT_LIST_HEAD(&h->del_list); 6462306a36Sopenharmony_ci INIT_LIST_HEAD(&h->lru_list); 6562306a36Sopenharmony_ci h->wq = wq; 6662306a36Sopenharmony_ci h->free_ptr = free_ptr; 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci ret = mmu_notifier_register(&h->mn, current->mm); 6962306a36Sopenharmony_ci if (ret) { 7062306a36Sopenharmony_ci kfree(free_ptr); 7162306a36Sopenharmony_ci return ret; 7262306a36Sopenharmony_ci } 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci *handler = h; 7562306a36Sopenharmony_ci return 0; 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_civoid hfi1_mmu_rb_unregister(struct mmu_rb_handler *handler) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci struct mmu_rb_node *rbnode; 8162306a36Sopenharmony_ci struct rb_node *node; 8262306a36Sopenharmony_ci unsigned long flags; 8362306a36Sopenharmony_ci struct list_head del_list; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci /* Prevent freeing of mm until we are completely finished. */ 8662306a36Sopenharmony_ci mmgrab(handler->mn.mm); 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci /* Unregister first so we don't get any more notifications. */ 8962306a36Sopenharmony_ci mmu_notifier_unregister(&handler->mn, handler->mn.mm); 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci /* 9262306a36Sopenharmony_ci * Make sure the wq delete handler is finished running. It will not 9362306a36Sopenharmony_ci * be triggered once the mmu notifiers are unregistered above. 9462306a36Sopenharmony_ci */ 9562306a36Sopenharmony_ci flush_work(&handler->del_work); 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci INIT_LIST_HEAD(&del_list); 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci spin_lock_irqsave(&handler->lock, flags); 10062306a36Sopenharmony_ci while ((node = rb_first_cached(&handler->root))) { 10162306a36Sopenharmony_ci rbnode = rb_entry(node, struct mmu_rb_node, node); 10262306a36Sopenharmony_ci rb_erase_cached(node, &handler->root); 10362306a36Sopenharmony_ci /* move from LRU list to delete list */ 10462306a36Sopenharmony_ci list_move(&rbnode->list, &del_list); 10562306a36Sopenharmony_ci } 10662306a36Sopenharmony_ci spin_unlock_irqrestore(&handler->lock, flags); 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci while (!list_empty(&del_list)) { 10962306a36Sopenharmony_ci rbnode = list_first_entry(&del_list, struct mmu_rb_node, list); 11062306a36Sopenharmony_ci list_del(&rbnode->list); 11162306a36Sopenharmony_ci kref_put(&rbnode->refcount, release_immediate); 11262306a36Sopenharmony_ci } 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci /* Now the mm may be freed. */ 11562306a36Sopenharmony_ci mmdrop(handler->mn.mm); 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci kfree(handler->free_ptr); 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ciint hfi1_mmu_rb_insert(struct mmu_rb_handler *handler, 12162306a36Sopenharmony_ci struct mmu_rb_node *mnode) 12262306a36Sopenharmony_ci{ 12362306a36Sopenharmony_ci struct mmu_rb_node *node; 12462306a36Sopenharmony_ci unsigned long flags; 12562306a36Sopenharmony_ci int ret = 0; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci trace_hfi1_mmu_rb_insert(mnode); 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_ci if (current->mm != handler->mn.mm) 13062306a36Sopenharmony_ci return -EPERM; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci spin_lock_irqsave(&handler->lock, flags); 13362306a36Sopenharmony_ci node = __mmu_rb_search(handler, mnode->addr, mnode->len); 13462306a36Sopenharmony_ci if (node) { 13562306a36Sopenharmony_ci ret = -EEXIST; 13662306a36Sopenharmony_ci goto unlock; 13762306a36Sopenharmony_ci } 13862306a36Sopenharmony_ci __mmu_int_rb_insert(mnode, &handler->root); 13962306a36Sopenharmony_ci list_add_tail(&mnode->list, &handler->lru_list); 14062306a36Sopenharmony_ci mnode->handler = handler; 14162306a36Sopenharmony_ciunlock: 14262306a36Sopenharmony_ci spin_unlock_irqrestore(&handler->lock, flags); 14362306a36Sopenharmony_ci return ret; 14462306a36Sopenharmony_ci} 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_ci/* Caller must hold handler lock */ 14762306a36Sopenharmony_cistruct mmu_rb_node *hfi1_mmu_rb_get_first(struct mmu_rb_handler *handler, 14862306a36Sopenharmony_ci unsigned long addr, unsigned long len) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci struct mmu_rb_node *node; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci trace_hfi1_mmu_rb_search(addr, len); 15362306a36Sopenharmony_ci node = __mmu_int_rb_iter_first(&handler->root, addr, (addr + len) - 1); 15462306a36Sopenharmony_ci if (node) 15562306a36Sopenharmony_ci list_move_tail(&node->list, &handler->lru_list); 15662306a36Sopenharmony_ci return node; 15762306a36Sopenharmony_ci} 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci/* Caller must hold handler lock */ 16062306a36Sopenharmony_cistatic struct mmu_rb_node *__mmu_rb_search(struct mmu_rb_handler *handler, 16162306a36Sopenharmony_ci unsigned long addr, 16262306a36Sopenharmony_ci unsigned long len) 16362306a36Sopenharmony_ci{ 16462306a36Sopenharmony_ci struct mmu_rb_node *node = NULL; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci trace_hfi1_mmu_rb_search(addr, len); 16762306a36Sopenharmony_ci if (!handler->ops->filter) { 16862306a36Sopenharmony_ci node = __mmu_int_rb_iter_first(&handler->root, addr, 16962306a36Sopenharmony_ci (addr + len) - 1); 17062306a36Sopenharmony_ci } else { 17162306a36Sopenharmony_ci for (node = __mmu_int_rb_iter_first(&handler->root, addr, 17262306a36Sopenharmony_ci (addr + len) - 1); 17362306a36Sopenharmony_ci node; 17462306a36Sopenharmony_ci node = __mmu_int_rb_iter_next(node, addr, 17562306a36Sopenharmony_ci (addr + len) - 1)) { 17662306a36Sopenharmony_ci if (handler->ops->filter(node, addr, len)) 17762306a36Sopenharmony_ci return node; 17862306a36Sopenharmony_ci } 17962306a36Sopenharmony_ci } 18062306a36Sopenharmony_ci return node; 18162306a36Sopenharmony_ci} 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci/* 18462306a36Sopenharmony_ci * Must NOT call while holding mnode->handler->lock. 18562306a36Sopenharmony_ci * mnode->handler->ops->remove() may sleep and mnode->handler->lock is a 18662306a36Sopenharmony_ci * spinlock. 18762306a36Sopenharmony_ci */ 18862306a36Sopenharmony_cistatic void release_immediate(struct kref *refcount) 18962306a36Sopenharmony_ci{ 19062306a36Sopenharmony_ci struct mmu_rb_node *mnode = 19162306a36Sopenharmony_ci container_of(refcount, struct mmu_rb_node, refcount); 19262306a36Sopenharmony_ci trace_hfi1_mmu_release_node(mnode); 19362306a36Sopenharmony_ci mnode->handler->ops->remove(mnode->handler->ops_arg, mnode); 19462306a36Sopenharmony_ci} 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci/* Caller must hold mnode->handler->lock */ 19762306a36Sopenharmony_cistatic void release_nolock(struct kref *refcount) 19862306a36Sopenharmony_ci{ 19962306a36Sopenharmony_ci struct mmu_rb_node *mnode = 20062306a36Sopenharmony_ci container_of(refcount, struct mmu_rb_node, refcount); 20162306a36Sopenharmony_ci list_move(&mnode->list, &mnode->handler->del_list); 20262306a36Sopenharmony_ci queue_work(mnode->handler->wq, &mnode->handler->del_work); 20362306a36Sopenharmony_ci} 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci/* 20662306a36Sopenharmony_ci * struct mmu_rb_node->refcount kref_put() callback. 20762306a36Sopenharmony_ci * Adds mmu_rb_node to mmu_rb_node->handler->del_list and queues 20862306a36Sopenharmony_ci * handler->del_work on handler->wq. 20962306a36Sopenharmony_ci * Does not remove mmu_rb_node from handler->lru_list or handler->rb_root. 21062306a36Sopenharmony_ci * Acquires mmu_rb_node->handler->lock; do not call while already holding 21162306a36Sopenharmony_ci * handler->lock. 21262306a36Sopenharmony_ci */ 21362306a36Sopenharmony_civoid hfi1_mmu_rb_release(struct kref *refcount) 21462306a36Sopenharmony_ci{ 21562306a36Sopenharmony_ci struct mmu_rb_node *mnode = 21662306a36Sopenharmony_ci container_of(refcount, struct mmu_rb_node, refcount); 21762306a36Sopenharmony_ci struct mmu_rb_handler *handler = mnode->handler; 21862306a36Sopenharmony_ci unsigned long flags; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci spin_lock_irqsave(&handler->lock, flags); 22162306a36Sopenharmony_ci list_move(&mnode->list, &mnode->handler->del_list); 22262306a36Sopenharmony_ci spin_unlock_irqrestore(&handler->lock, flags); 22362306a36Sopenharmony_ci queue_work(handler->wq, &handler->del_work); 22462306a36Sopenharmony_ci} 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_civoid hfi1_mmu_rb_evict(struct mmu_rb_handler *handler, void *evict_arg) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci struct mmu_rb_node *rbnode, *ptr; 22962306a36Sopenharmony_ci struct list_head del_list; 23062306a36Sopenharmony_ci unsigned long flags; 23162306a36Sopenharmony_ci bool stop = false; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci if (current->mm != handler->mn.mm) 23462306a36Sopenharmony_ci return; 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci INIT_LIST_HEAD(&del_list); 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci spin_lock_irqsave(&handler->lock, flags); 23962306a36Sopenharmony_ci list_for_each_entry_safe(rbnode, ptr, &handler->lru_list, list) { 24062306a36Sopenharmony_ci /* refcount == 1 implies mmu_rb_handler has only rbnode ref */ 24162306a36Sopenharmony_ci if (kref_read(&rbnode->refcount) > 1) 24262306a36Sopenharmony_ci continue; 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci if (handler->ops->evict(handler->ops_arg, rbnode, evict_arg, 24562306a36Sopenharmony_ci &stop)) { 24662306a36Sopenharmony_ci __mmu_int_rb_remove(rbnode, &handler->root); 24762306a36Sopenharmony_ci /* move from LRU list to delete list */ 24862306a36Sopenharmony_ci list_move(&rbnode->list, &del_list); 24962306a36Sopenharmony_ci } 25062306a36Sopenharmony_ci if (stop) 25162306a36Sopenharmony_ci break; 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci spin_unlock_irqrestore(&handler->lock, flags); 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci list_for_each_entry_safe(rbnode, ptr, &del_list, list) { 25662306a36Sopenharmony_ci trace_hfi1_mmu_rb_evict(rbnode); 25762306a36Sopenharmony_ci kref_put(&rbnode->refcount, release_immediate); 25862306a36Sopenharmony_ci } 25962306a36Sopenharmony_ci} 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_cistatic int mmu_notifier_range_start(struct mmu_notifier *mn, 26262306a36Sopenharmony_ci const struct mmu_notifier_range *range) 26362306a36Sopenharmony_ci{ 26462306a36Sopenharmony_ci struct mmu_rb_handler *handler = 26562306a36Sopenharmony_ci container_of(mn, struct mmu_rb_handler, mn); 26662306a36Sopenharmony_ci struct rb_root_cached *root = &handler->root; 26762306a36Sopenharmony_ci struct mmu_rb_node *node, *ptr = NULL; 26862306a36Sopenharmony_ci unsigned long flags; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci spin_lock_irqsave(&handler->lock, flags); 27162306a36Sopenharmony_ci for (node = __mmu_int_rb_iter_first(root, range->start, range->end-1); 27262306a36Sopenharmony_ci node; node = ptr) { 27362306a36Sopenharmony_ci /* Guard against node removal. */ 27462306a36Sopenharmony_ci ptr = __mmu_int_rb_iter_next(node, range->start, 27562306a36Sopenharmony_ci range->end - 1); 27662306a36Sopenharmony_ci trace_hfi1_mmu_mem_invalidate(node); 27762306a36Sopenharmony_ci /* Remove from rb tree and lru_list. */ 27862306a36Sopenharmony_ci __mmu_int_rb_remove(node, root); 27962306a36Sopenharmony_ci list_del_init(&node->list); 28062306a36Sopenharmony_ci kref_put(&node->refcount, release_nolock); 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci spin_unlock_irqrestore(&handler->lock, flags); 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci return 0; 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci/* 28862306a36Sopenharmony_ci * Work queue function to remove all nodes that have been queued up to 28962306a36Sopenharmony_ci * be removed. The key feature is that mm->mmap_lock is not being held 29062306a36Sopenharmony_ci * and the remove callback can sleep while taking it, if needed. 29162306a36Sopenharmony_ci */ 29262306a36Sopenharmony_cistatic void handle_remove(struct work_struct *work) 29362306a36Sopenharmony_ci{ 29462306a36Sopenharmony_ci struct mmu_rb_handler *handler = container_of(work, 29562306a36Sopenharmony_ci struct mmu_rb_handler, 29662306a36Sopenharmony_ci del_work); 29762306a36Sopenharmony_ci struct list_head del_list; 29862306a36Sopenharmony_ci unsigned long flags; 29962306a36Sopenharmony_ci struct mmu_rb_node *node; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci /* remove anything that is queued to get removed */ 30262306a36Sopenharmony_ci spin_lock_irqsave(&handler->lock, flags); 30362306a36Sopenharmony_ci list_replace_init(&handler->del_list, &del_list); 30462306a36Sopenharmony_ci spin_unlock_irqrestore(&handler->lock, flags); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci while (!list_empty(&del_list)) { 30762306a36Sopenharmony_ci node = list_first_entry(&del_list, struct mmu_rb_node, list); 30862306a36Sopenharmony_ci list_del(&node->list); 30962306a36Sopenharmony_ci trace_hfi1_mmu_release_node(node); 31062306a36Sopenharmony_ci handler->ops->remove(handler->ops_arg, node); 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci} 313