18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/mm/mmu_notifier.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (C) 2008 Qumranet, Inc. 68c2ecf20Sopenharmony_ci * Copyright (C) 2008 SGI 78c2ecf20Sopenharmony_ci * Christoph Lameter <cl@linux.com> 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci 108c2ecf20Sopenharmony_ci#include <linux/rculist.h> 118c2ecf20Sopenharmony_ci#include <linux/mmu_notifier.h> 128c2ecf20Sopenharmony_ci#include <linux/export.h> 138c2ecf20Sopenharmony_ci#include <linux/mm.h> 148c2ecf20Sopenharmony_ci#include <linux/err.h> 158c2ecf20Sopenharmony_ci#include <linux/interval_tree.h> 168c2ecf20Sopenharmony_ci#include <linux/srcu.h> 178c2ecf20Sopenharmony_ci#include <linux/rcupdate.h> 188c2ecf20Sopenharmony_ci#include <linux/sched.h> 198c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 208c2ecf20Sopenharmony_ci#include <linux/slab.h> 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_ci/* global SRCU for all MMs */ 238c2ecf20Sopenharmony_ciDEFINE_STATIC_SRCU(srcu); 248c2ecf20Sopenharmony_ci 258c2ecf20Sopenharmony_ci#ifdef CONFIG_LOCKDEP 268c2ecf20Sopenharmony_cistruct lockdep_map __mmu_notifier_invalidate_range_start_map = { 278c2ecf20Sopenharmony_ci .name = "mmu_notifier_invalidate_range_start" 288c2ecf20Sopenharmony_ci}; 298c2ecf20Sopenharmony_ci#endif 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci/* 328c2ecf20Sopenharmony_ci * The mmu_notifier_subscriptions structure is allocated and installed in 338c2ecf20Sopenharmony_ci * mm->notifier_subscriptions inside the mm_take_all_locks() protected 348c2ecf20Sopenharmony_ci * critical section and it's released only when mm_count reaches zero 358c2ecf20Sopenharmony_ci * in mmdrop(). 368c2ecf20Sopenharmony_ci */ 378c2ecf20Sopenharmony_cistruct mmu_notifier_subscriptions { 388c2ecf20Sopenharmony_ci /* all mmu notifiers registered in this mm are queued in this list */ 398c2ecf20Sopenharmony_ci struct hlist_head list; 408c2ecf20Sopenharmony_ci bool has_itree; 418c2ecf20Sopenharmony_ci /* to serialize the list modifications and hlist_unhashed */ 428c2ecf20Sopenharmony_ci spinlock_t lock; 438c2ecf20Sopenharmony_ci unsigned long invalidate_seq; 448c2ecf20Sopenharmony_ci unsigned long active_invalidate_ranges; 458c2ecf20Sopenharmony_ci struct rb_root_cached itree; 468c2ecf20Sopenharmony_ci wait_queue_head_t wq; 478c2ecf20Sopenharmony_ci struct hlist_head deferred_list; 488c2ecf20Sopenharmony_ci}; 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci/* 518c2ecf20Sopenharmony_ci * This is a collision-retry read-side/write-side 'lock', a lot like a 528c2ecf20Sopenharmony_ci * seqcount, however this allows multiple write-sides to hold it at 538c2ecf20Sopenharmony_ci * once. Conceptually the write side is protecting the values of the PTEs in 548c2ecf20Sopenharmony_ci * this mm, such that PTES cannot be read into SPTEs (shadow PTEs) while any 558c2ecf20Sopenharmony_ci * writer exists. 568c2ecf20Sopenharmony_ci * 578c2ecf20Sopenharmony_ci * Note that the core mm creates nested invalidate_range_start()/end() regions 588c2ecf20Sopenharmony_ci * within the same thread, and runs invalidate_range_start()/end() in parallel 598c2ecf20Sopenharmony_ci * on multiple CPUs. This is designed to not reduce concurrency or block 608c2ecf20Sopenharmony_ci * progress on the mm side. 618c2ecf20Sopenharmony_ci * 628c2ecf20Sopenharmony_ci * As a secondary function, holding the full write side also serves to prevent 638c2ecf20Sopenharmony_ci * writers for the itree, this is an optimization to avoid extra locking 648c2ecf20Sopenharmony_ci * during invalidate_range_start/end notifiers. 658c2ecf20Sopenharmony_ci * 668c2ecf20Sopenharmony_ci * The write side has two states, fully excluded: 678c2ecf20Sopenharmony_ci * - mm->active_invalidate_ranges != 0 688c2ecf20Sopenharmony_ci * - subscriptions->invalidate_seq & 1 == True (odd) 698c2ecf20Sopenharmony_ci * - some range on the mm_struct is being invalidated 708c2ecf20Sopenharmony_ci * - the itree is not allowed to change 718c2ecf20Sopenharmony_ci * 728c2ecf20Sopenharmony_ci * And partially excluded: 738c2ecf20Sopenharmony_ci * - mm->active_invalidate_ranges != 0 748c2ecf20Sopenharmony_ci * - subscriptions->invalidate_seq & 1 == False (even) 758c2ecf20Sopenharmony_ci * - some range on the mm_struct is being invalidated 768c2ecf20Sopenharmony_ci * - the itree is allowed to change 778c2ecf20Sopenharmony_ci * 788c2ecf20Sopenharmony_ci * Operations on notifier_subscriptions->invalidate_seq (under spinlock): 798c2ecf20Sopenharmony_ci * seq |= 1 # Begin writing 808c2ecf20Sopenharmony_ci * seq++ # Release the writing state 818c2ecf20Sopenharmony_ci * seq & 1 # True if a writer exists 828c2ecf20Sopenharmony_ci * 838c2ecf20Sopenharmony_ci * The later state avoids some expensive work on inv_end in the common case of 848c2ecf20Sopenharmony_ci * no mmu_interval_notifier monitoring the VA. 858c2ecf20Sopenharmony_ci */ 868c2ecf20Sopenharmony_cistatic bool 878c2ecf20Sopenharmony_cimn_itree_is_invalidating(struct mmu_notifier_subscriptions *subscriptions) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci lockdep_assert_held(&subscriptions->lock); 908c2ecf20Sopenharmony_ci return subscriptions->invalidate_seq & 1; 918c2ecf20Sopenharmony_ci} 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_cistatic struct mmu_interval_notifier * 948c2ecf20Sopenharmony_cimn_itree_inv_start_range(struct mmu_notifier_subscriptions *subscriptions, 958c2ecf20Sopenharmony_ci const struct mmu_notifier_range *range, 968c2ecf20Sopenharmony_ci unsigned long *seq) 978c2ecf20Sopenharmony_ci{ 988c2ecf20Sopenharmony_ci struct interval_tree_node *node; 998c2ecf20Sopenharmony_ci struct mmu_interval_notifier *res = NULL; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci spin_lock(&subscriptions->lock); 1028c2ecf20Sopenharmony_ci subscriptions->active_invalidate_ranges++; 1038c2ecf20Sopenharmony_ci node = interval_tree_iter_first(&subscriptions->itree, range->start, 1048c2ecf20Sopenharmony_ci range->end - 1); 1058c2ecf20Sopenharmony_ci if (node) { 1068c2ecf20Sopenharmony_ci subscriptions->invalidate_seq |= 1; 1078c2ecf20Sopenharmony_ci res = container_of(node, struct mmu_interval_notifier, 1088c2ecf20Sopenharmony_ci interval_tree); 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci *seq = subscriptions->invalidate_seq; 1128c2ecf20Sopenharmony_ci spin_unlock(&subscriptions->lock); 1138c2ecf20Sopenharmony_ci return res; 1148c2ecf20Sopenharmony_ci} 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_cistatic struct mmu_interval_notifier * 1178c2ecf20Sopenharmony_cimn_itree_inv_next(struct mmu_interval_notifier *interval_sub, 1188c2ecf20Sopenharmony_ci const struct mmu_notifier_range *range) 1198c2ecf20Sopenharmony_ci{ 1208c2ecf20Sopenharmony_ci struct interval_tree_node *node; 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci node = interval_tree_iter_next(&interval_sub->interval_tree, 1238c2ecf20Sopenharmony_ci range->start, range->end - 1); 1248c2ecf20Sopenharmony_ci if (!node) 1258c2ecf20Sopenharmony_ci return NULL; 1268c2ecf20Sopenharmony_ci return container_of(node, struct mmu_interval_notifier, interval_tree); 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_cistatic void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci struct mmu_interval_notifier *interval_sub; 1328c2ecf20Sopenharmony_ci struct hlist_node *next; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci spin_lock(&subscriptions->lock); 1358c2ecf20Sopenharmony_ci if (--subscriptions->active_invalidate_ranges || 1368c2ecf20Sopenharmony_ci !mn_itree_is_invalidating(subscriptions)) { 1378c2ecf20Sopenharmony_ci spin_unlock(&subscriptions->lock); 1388c2ecf20Sopenharmony_ci return; 1398c2ecf20Sopenharmony_ci } 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci /* Make invalidate_seq even */ 1428c2ecf20Sopenharmony_ci subscriptions->invalidate_seq++; 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci /* 1458c2ecf20Sopenharmony_ci * The inv_end incorporates a deferred mechanism like rtnl_unlock(). 1468c2ecf20Sopenharmony_ci * Adds and removes are queued until the final inv_end happens then 1478c2ecf20Sopenharmony_ci * they are progressed. This arrangement for tree updates is used to 1488c2ecf20Sopenharmony_ci * avoid using a blocking lock during invalidate_range_start. 1498c2ecf20Sopenharmony_ci */ 1508c2ecf20Sopenharmony_ci hlist_for_each_entry_safe(interval_sub, next, 1518c2ecf20Sopenharmony_ci &subscriptions->deferred_list, 1528c2ecf20Sopenharmony_ci deferred_item) { 1538c2ecf20Sopenharmony_ci if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) 1548c2ecf20Sopenharmony_ci interval_tree_insert(&interval_sub->interval_tree, 1558c2ecf20Sopenharmony_ci &subscriptions->itree); 1568c2ecf20Sopenharmony_ci else 1578c2ecf20Sopenharmony_ci interval_tree_remove(&interval_sub->interval_tree, 1588c2ecf20Sopenharmony_ci &subscriptions->itree); 1598c2ecf20Sopenharmony_ci hlist_del(&interval_sub->deferred_item); 1608c2ecf20Sopenharmony_ci } 1618c2ecf20Sopenharmony_ci spin_unlock(&subscriptions->lock); 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci wake_up_all(&subscriptions->wq); 1648c2ecf20Sopenharmony_ci} 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci/** 1678c2ecf20Sopenharmony_ci * mmu_interval_read_begin - Begin a read side critical section against a VA 1688c2ecf20Sopenharmony_ci * range 1698c2ecf20Sopenharmony_ci * @interval_sub: The interval subscription 1708c2ecf20Sopenharmony_ci * 1718c2ecf20Sopenharmony_ci * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a 1728c2ecf20Sopenharmony_ci * collision-retry scheme similar to seqcount for the VA range under 1738c2ecf20Sopenharmony_ci * subscription. If the mm invokes invalidation during the critical section 1748c2ecf20Sopenharmony_ci * then mmu_interval_read_retry() will return true. 1758c2ecf20Sopenharmony_ci * 1768c2ecf20Sopenharmony_ci * This is useful to obtain shadow PTEs where teardown or setup of the SPTEs 1778c2ecf20Sopenharmony_ci * require a blocking context. The critical region formed by this can sleep, 1788c2ecf20Sopenharmony_ci * and the required 'user_lock' can also be a sleeping lock. 1798c2ecf20Sopenharmony_ci * 1808c2ecf20Sopenharmony_ci * The caller is required to provide a 'user_lock' to serialize both teardown 1818c2ecf20Sopenharmony_ci * and setup. 1828c2ecf20Sopenharmony_ci * 1838c2ecf20Sopenharmony_ci * The return value should be passed to mmu_interval_read_retry(). 1848c2ecf20Sopenharmony_ci */ 1858c2ecf20Sopenharmony_ciunsigned long 1868c2ecf20Sopenharmony_cimmu_interval_read_begin(struct mmu_interval_notifier *interval_sub) 1878c2ecf20Sopenharmony_ci{ 1888c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions = 1898c2ecf20Sopenharmony_ci interval_sub->mm->notifier_subscriptions; 1908c2ecf20Sopenharmony_ci unsigned long seq; 1918c2ecf20Sopenharmony_ci bool is_invalidating; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci /* 1948c2ecf20Sopenharmony_ci * If the subscription has a different seq value under the user_lock 1958c2ecf20Sopenharmony_ci * than we started with then it has collided. 1968c2ecf20Sopenharmony_ci * 1978c2ecf20Sopenharmony_ci * If the subscription currently has the same seq value as the 1988c2ecf20Sopenharmony_ci * subscriptions seq, then it is currently between 1998c2ecf20Sopenharmony_ci * invalidate_start/end and is colliding. 2008c2ecf20Sopenharmony_ci * 2018c2ecf20Sopenharmony_ci * The locking looks broadly like this: 2028c2ecf20Sopenharmony_ci * mn_tree_invalidate_start(): mmu_interval_read_begin(): 2038c2ecf20Sopenharmony_ci * spin_lock 2048c2ecf20Sopenharmony_ci * seq = READ_ONCE(interval_sub->invalidate_seq); 2058c2ecf20Sopenharmony_ci * seq == subs->invalidate_seq 2068c2ecf20Sopenharmony_ci * spin_unlock 2078c2ecf20Sopenharmony_ci * spin_lock 2088c2ecf20Sopenharmony_ci * seq = ++subscriptions->invalidate_seq 2098c2ecf20Sopenharmony_ci * spin_unlock 2108c2ecf20Sopenharmony_ci * op->invalidate_range(): 2118c2ecf20Sopenharmony_ci * user_lock 2128c2ecf20Sopenharmony_ci * mmu_interval_set_seq() 2138c2ecf20Sopenharmony_ci * interval_sub->invalidate_seq = seq 2148c2ecf20Sopenharmony_ci * user_unlock 2158c2ecf20Sopenharmony_ci * 2168c2ecf20Sopenharmony_ci * [Required: mmu_interval_read_retry() == true] 2178c2ecf20Sopenharmony_ci * 2188c2ecf20Sopenharmony_ci * mn_itree_inv_end(): 2198c2ecf20Sopenharmony_ci * spin_lock 2208c2ecf20Sopenharmony_ci * seq = ++subscriptions->invalidate_seq 2218c2ecf20Sopenharmony_ci * spin_unlock 2228c2ecf20Sopenharmony_ci * 2238c2ecf20Sopenharmony_ci * user_lock 2248c2ecf20Sopenharmony_ci * mmu_interval_read_retry(): 2258c2ecf20Sopenharmony_ci * interval_sub->invalidate_seq != seq 2268c2ecf20Sopenharmony_ci * user_unlock 2278c2ecf20Sopenharmony_ci * 2288c2ecf20Sopenharmony_ci * Barriers are not needed here as any races here are closed by an 2298c2ecf20Sopenharmony_ci * eventual mmu_interval_read_retry(), which provides a barrier via the 2308c2ecf20Sopenharmony_ci * user_lock. 2318c2ecf20Sopenharmony_ci */ 2328c2ecf20Sopenharmony_ci spin_lock(&subscriptions->lock); 2338c2ecf20Sopenharmony_ci /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ 2348c2ecf20Sopenharmony_ci seq = READ_ONCE(interval_sub->invalidate_seq); 2358c2ecf20Sopenharmony_ci is_invalidating = seq == subscriptions->invalidate_seq; 2368c2ecf20Sopenharmony_ci spin_unlock(&subscriptions->lock); 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci /* 2398c2ecf20Sopenharmony_ci * interval_sub->invalidate_seq must always be set to an odd value via 2408c2ecf20Sopenharmony_ci * mmu_interval_set_seq() using the provided cur_seq from 2418c2ecf20Sopenharmony_ci * mn_itree_inv_start_range(). This ensures that if seq does wrap we 2428c2ecf20Sopenharmony_ci * will always clear the below sleep in some reasonable time as 2438c2ecf20Sopenharmony_ci * subscriptions->invalidate_seq is even in the idle state. 2448c2ecf20Sopenharmony_ci */ 2458c2ecf20Sopenharmony_ci lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); 2468c2ecf20Sopenharmony_ci lock_map_release(&__mmu_notifier_invalidate_range_start_map); 2478c2ecf20Sopenharmony_ci if (is_invalidating) 2488c2ecf20Sopenharmony_ci wait_event(subscriptions->wq, 2498c2ecf20Sopenharmony_ci READ_ONCE(subscriptions->invalidate_seq) != seq); 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci /* 2528c2ecf20Sopenharmony_ci * Notice that mmu_interval_read_retry() can already be true at this 2538c2ecf20Sopenharmony_ci * point, avoiding loops here allows the caller to provide a global 2548c2ecf20Sopenharmony_ci * time bound. 2558c2ecf20Sopenharmony_ci */ 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci return seq; 2588c2ecf20Sopenharmony_ci} 2598c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_interval_read_begin); 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_cistatic void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions, 2628c2ecf20Sopenharmony_ci struct mm_struct *mm) 2638c2ecf20Sopenharmony_ci{ 2648c2ecf20Sopenharmony_ci struct mmu_notifier_range range = { 2658c2ecf20Sopenharmony_ci .flags = MMU_NOTIFIER_RANGE_BLOCKABLE, 2668c2ecf20Sopenharmony_ci .event = MMU_NOTIFY_RELEASE, 2678c2ecf20Sopenharmony_ci .mm = mm, 2688c2ecf20Sopenharmony_ci .start = 0, 2698c2ecf20Sopenharmony_ci .end = ULONG_MAX, 2708c2ecf20Sopenharmony_ci }; 2718c2ecf20Sopenharmony_ci struct mmu_interval_notifier *interval_sub; 2728c2ecf20Sopenharmony_ci unsigned long cur_seq; 2738c2ecf20Sopenharmony_ci bool ret; 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci for (interval_sub = 2768c2ecf20Sopenharmony_ci mn_itree_inv_start_range(subscriptions, &range, &cur_seq); 2778c2ecf20Sopenharmony_ci interval_sub; 2788c2ecf20Sopenharmony_ci interval_sub = mn_itree_inv_next(interval_sub, &range)) { 2798c2ecf20Sopenharmony_ci ret = interval_sub->ops->invalidate(interval_sub, &range, 2808c2ecf20Sopenharmony_ci cur_seq); 2818c2ecf20Sopenharmony_ci WARN_ON(!ret); 2828c2ecf20Sopenharmony_ci } 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci mn_itree_inv_end(subscriptions); 2858c2ecf20Sopenharmony_ci} 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci/* 2888c2ecf20Sopenharmony_ci * This function can't run concurrently against mmu_notifier_register 2898c2ecf20Sopenharmony_ci * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap 2908c2ecf20Sopenharmony_ci * runs with mm_users == 0. Other tasks may still invoke mmu notifiers 2918c2ecf20Sopenharmony_ci * in parallel despite there being no task using this mm any more, 2928c2ecf20Sopenharmony_ci * through the vmas outside of the exit_mmap context, such as with 2938c2ecf20Sopenharmony_ci * vmtruncate. This serializes against mmu_notifier_unregister with 2948c2ecf20Sopenharmony_ci * the notifier_subscriptions->lock in addition to SRCU and it serializes 2958c2ecf20Sopenharmony_ci * against the other mmu notifiers with SRCU. struct mmu_notifier_subscriptions 2968c2ecf20Sopenharmony_ci * can't go away from under us as exit_mmap holds an mm_count pin 2978c2ecf20Sopenharmony_ci * itself. 2988c2ecf20Sopenharmony_ci */ 2998c2ecf20Sopenharmony_cistatic void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions, 3008c2ecf20Sopenharmony_ci struct mm_struct *mm) 3018c2ecf20Sopenharmony_ci{ 3028c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 3038c2ecf20Sopenharmony_ci int id; 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci /* 3068c2ecf20Sopenharmony_ci * SRCU here will block mmu_notifier_unregister until 3078c2ecf20Sopenharmony_ci * ->release returns. 3088c2ecf20Sopenharmony_ci */ 3098c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 3108c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, 3118c2ecf20Sopenharmony_ci srcu_read_lock_held(&srcu)) 3128c2ecf20Sopenharmony_ci /* 3138c2ecf20Sopenharmony_ci * If ->release runs before mmu_notifier_unregister it must be 3148c2ecf20Sopenharmony_ci * handled, as it's the only way for the driver to flush all 3158c2ecf20Sopenharmony_ci * existing sptes and stop the driver from establishing any more 3168c2ecf20Sopenharmony_ci * sptes before all the pages in the mm are freed. 3178c2ecf20Sopenharmony_ci */ 3188c2ecf20Sopenharmony_ci if (subscription->ops->release) 3198c2ecf20Sopenharmony_ci subscription->ops->release(subscription, mm); 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci spin_lock(&subscriptions->lock); 3228c2ecf20Sopenharmony_ci while (unlikely(!hlist_empty(&subscriptions->list))) { 3238c2ecf20Sopenharmony_ci subscription = hlist_entry(subscriptions->list.first, 3248c2ecf20Sopenharmony_ci struct mmu_notifier, hlist); 3258c2ecf20Sopenharmony_ci /* 3268c2ecf20Sopenharmony_ci * We arrived before mmu_notifier_unregister so 3278c2ecf20Sopenharmony_ci * mmu_notifier_unregister will do nothing other than to wait 3288c2ecf20Sopenharmony_ci * for ->release to finish and for mmu_notifier_unregister to 3298c2ecf20Sopenharmony_ci * return. 3308c2ecf20Sopenharmony_ci */ 3318c2ecf20Sopenharmony_ci hlist_del_init_rcu(&subscription->hlist); 3328c2ecf20Sopenharmony_ci } 3338c2ecf20Sopenharmony_ci spin_unlock(&subscriptions->lock); 3348c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci /* 3378c2ecf20Sopenharmony_ci * synchronize_srcu here prevents mmu_notifier_release from returning to 3388c2ecf20Sopenharmony_ci * exit_mmap (which would proceed with freeing all pages in the mm) 3398c2ecf20Sopenharmony_ci * until the ->release method returns, if it was invoked by 3408c2ecf20Sopenharmony_ci * mmu_notifier_unregister. 3418c2ecf20Sopenharmony_ci * 3428c2ecf20Sopenharmony_ci * The notifier_subscriptions can't go away from under us because 3438c2ecf20Sopenharmony_ci * one mm_count is held by exit_mmap. 3448c2ecf20Sopenharmony_ci */ 3458c2ecf20Sopenharmony_ci synchronize_srcu(&srcu); 3468c2ecf20Sopenharmony_ci} 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_civoid __mmu_notifier_release(struct mm_struct *mm) 3498c2ecf20Sopenharmony_ci{ 3508c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions = 3518c2ecf20Sopenharmony_ci mm->notifier_subscriptions; 3528c2ecf20Sopenharmony_ci 3538c2ecf20Sopenharmony_ci if (subscriptions->has_itree) 3548c2ecf20Sopenharmony_ci mn_itree_release(subscriptions, mm); 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci if (!hlist_empty(&subscriptions->list)) 3578c2ecf20Sopenharmony_ci mn_hlist_release(subscriptions, mm); 3588c2ecf20Sopenharmony_ci} 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci/* 3618c2ecf20Sopenharmony_ci * If no young bitflag is supported by the hardware, ->clear_flush_young can 3628c2ecf20Sopenharmony_ci * unmap the address and return 1 or 0 depending if the mapping previously 3638c2ecf20Sopenharmony_ci * existed or not. 3648c2ecf20Sopenharmony_ci */ 3658c2ecf20Sopenharmony_ciint __mmu_notifier_clear_flush_young(struct mm_struct *mm, 3668c2ecf20Sopenharmony_ci unsigned long start, 3678c2ecf20Sopenharmony_ci unsigned long end) 3688c2ecf20Sopenharmony_ci{ 3698c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 3708c2ecf20Sopenharmony_ci int young = 0, id; 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 3738c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, 3748c2ecf20Sopenharmony_ci &mm->notifier_subscriptions->list, hlist, 3758c2ecf20Sopenharmony_ci srcu_read_lock_held(&srcu)) { 3768c2ecf20Sopenharmony_ci if (subscription->ops->clear_flush_young) 3778c2ecf20Sopenharmony_ci young |= subscription->ops->clear_flush_young( 3788c2ecf20Sopenharmony_ci subscription, mm, start, end); 3798c2ecf20Sopenharmony_ci } 3808c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci return young; 3838c2ecf20Sopenharmony_ci} 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ciint __mmu_notifier_clear_young(struct mm_struct *mm, 3868c2ecf20Sopenharmony_ci unsigned long start, 3878c2ecf20Sopenharmony_ci unsigned long end) 3888c2ecf20Sopenharmony_ci{ 3898c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 3908c2ecf20Sopenharmony_ci int young = 0, id; 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 3938c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, 3948c2ecf20Sopenharmony_ci &mm->notifier_subscriptions->list, hlist, 3958c2ecf20Sopenharmony_ci srcu_read_lock_held(&srcu)) { 3968c2ecf20Sopenharmony_ci if (subscription->ops->clear_young) 3978c2ecf20Sopenharmony_ci young |= subscription->ops->clear_young(subscription, 3988c2ecf20Sopenharmony_ci mm, start, end); 3998c2ecf20Sopenharmony_ci } 4008c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci return young; 4038c2ecf20Sopenharmony_ci} 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ciint __mmu_notifier_test_young(struct mm_struct *mm, 4068c2ecf20Sopenharmony_ci unsigned long address) 4078c2ecf20Sopenharmony_ci{ 4088c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 4098c2ecf20Sopenharmony_ci int young = 0, id; 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 4128c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, 4138c2ecf20Sopenharmony_ci &mm->notifier_subscriptions->list, hlist, 4148c2ecf20Sopenharmony_ci srcu_read_lock_held(&srcu)) { 4158c2ecf20Sopenharmony_ci if (subscription->ops->test_young) { 4168c2ecf20Sopenharmony_ci young = subscription->ops->test_young(subscription, mm, 4178c2ecf20Sopenharmony_ci address); 4188c2ecf20Sopenharmony_ci if (young) 4198c2ecf20Sopenharmony_ci break; 4208c2ecf20Sopenharmony_ci } 4218c2ecf20Sopenharmony_ci } 4228c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_ci return young; 4258c2ecf20Sopenharmony_ci} 4268c2ecf20Sopenharmony_ci 4278c2ecf20Sopenharmony_civoid __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, 4288c2ecf20Sopenharmony_ci pte_t pte) 4298c2ecf20Sopenharmony_ci{ 4308c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 4318c2ecf20Sopenharmony_ci int id; 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 4348c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, 4358c2ecf20Sopenharmony_ci &mm->notifier_subscriptions->list, hlist, 4368c2ecf20Sopenharmony_ci srcu_read_lock_held(&srcu)) { 4378c2ecf20Sopenharmony_ci if (subscription->ops->change_pte) 4388c2ecf20Sopenharmony_ci subscription->ops->change_pte(subscription, mm, address, 4398c2ecf20Sopenharmony_ci pte); 4408c2ecf20Sopenharmony_ci } 4418c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 4428c2ecf20Sopenharmony_ci} 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_cistatic int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions, 4458c2ecf20Sopenharmony_ci const struct mmu_notifier_range *range) 4468c2ecf20Sopenharmony_ci{ 4478c2ecf20Sopenharmony_ci struct mmu_interval_notifier *interval_sub; 4488c2ecf20Sopenharmony_ci unsigned long cur_seq; 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci for (interval_sub = 4518c2ecf20Sopenharmony_ci mn_itree_inv_start_range(subscriptions, range, &cur_seq); 4528c2ecf20Sopenharmony_ci interval_sub; 4538c2ecf20Sopenharmony_ci interval_sub = mn_itree_inv_next(interval_sub, range)) { 4548c2ecf20Sopenharmony_ci bool ret; 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci ret = interval_sub->ops->invalidate(interval_sub, range, 4578c2ecf20Sopenharmony_ci cur_seq); 4588c2ecf20Sopenharmony_ci if (!ret) { 4598c2ecf20Sopenharmony_ci if (WARN_ON(mmu_notifier_range_blockable(range))) 4608c2ecf20Sopenharmony_ci continue; 4618c2ecf20Sopenharmony_ci goto out_would_block; 4628c2ecf20Sopenharmony_ci } 4638c2ecf20Sopenharmony_ci } 4648c2ecf20Sopenharmony_ci return 0; 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ciout_would_block: 4678c2ecf20Sopenharmony_ci /* 4688c2ecf20Sopenharmony_ci * On -EAGAIN the non-blocking caller is not allowed to call 4698c2ecf20Sopenharmony_ci * invalidate_range_end() 4708c2ecf20Sopenharmony_ci */ 4718c2ecf20Sopenharmony_ci mn_itree_inv_end(subscriptions); 4728c2ecf20Sopenharmony_ci return -EAGAIN; 4738c2ecf20Sopenharmony_ci} 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_cistatic int mn_hlist_invalidate_range_start( 4768c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions, 4778c2ecf20Sopenharmony_ci struct mmu_notifier_range *range) 4788c2ecf20Sopenharmony_ci{ 4798c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 4808c2ecf20Sopenharmony_ci int ret = 0; 4818c2ecf20Sopenharmony_ci int id; 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 4848c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, 4858c2ecf20Sopenharmony_ci srcu_read_lock_held(&srcu)) { 4868c2ecf20Sopenharmony_ci const struct mmu_notifier_ops *ops = subscription->ops; 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci if (ops->invalidate_range_start) { 4898c2ecf20Sopenharmony_ci int _ret; 4908c2ecf20Sopenharmony_ci 4918c2ecf20Sopenharmony_ci if (!mmu_notifier_range_blockable(range)) 4928c2ecf20Sopenharmony_ci non_block_start(); 4938c2ecf20Sopenharmony_ci _ret = ops->invalidate_range_start(subscription, range); 4948c2ecf20Sopenharmony_ci if (!mmu_notifier_range_blockable(range)) 4958c2ecf20Sopenharmony_ci non_block_end(); 4968c2ecf20Sopenharmony_ci if (_ret) { 4978c2ecf20Sopenharmony_ci pr_info("%pS callback failed with %d in %sblockable context.\n", 4988c2ecf20Sopenharmony_ci ops->invalidate_range_start, _ret, 4998c2ecf20Sopenharmony_ci !mmu_notifier_range_blockable(range) ? 5008c2ecf20Sopenharmony_ci "non-" : 5018c2ecf20Sopenharmony_ci ""); 5028c2ecf20Sopenharmony_ci WARN_ON(mmu_notifier_range_blockable(range) || 5038c2ecf20Sopenharmony_ci _ret != -EAGAIN); 5048c2ecf20Sopenharmony_ci /* 5058c2ecf20Sopenharmony_ci * We call all the notifiers on any EAGAIN, 5068c2ecf20Sopenharmony_ci * there is no way for a notifier to know if 5078c2ecf20Sopenharmony_ci * its start method failed, thus a start that 5088c2ecf20Sopenharmony_ci * does EAGAIN can't also do end. 5098c2ecf20Sopenharmony_ci */ 5108c2ecf20Sopenharmony_ci WARN_ON(ops->invalidate_range_end); 5118c2ecf20Sopenharmony_ci ret = _ret; 5128c2ecf20Sopenharmony_ci } 5138c2ecf20Sopenharmony_ci } 5148c2ecf20Sopenharmony_ci } 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci if (ret) { 5178c2ecf20Sopenharmony_ci /* 5188c2ecf20Sopenharmony_ci * Must be non-blocking to get here. If there are multiple 5198c2ecf20Sopenharmony_ci * notifiers and one or more failed start, any that succeeded 5208c2ecf20Sopenharmony_ci * start are expecting their end to be called. Do so now. 5218c2ecf20Sopenharmony_ci */ 5228c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, &subscriptions->list, 5238c2ecf20Sopenharmony_ci hlist, srcu_read_lock_held(&srcu)) { 5248c2ecf20Sopenharmony_ci if (!subscription->ops->invalidate_range_end) 5258c2ecf20Sopenharmony_ci continue; 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci subscription->ops->invalidate_range_end(subscription, 5288c2ecf20Sopenharmony_ci range); 5298c2ecf20Sopenharmony_ci } 5308c2ecf20Sopenharmony_ci } 5318c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci return ret; 5348c2ecf20Sopenharmony_ci} 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ciint __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) 5378c2ecf20Sopenharmony_ci{ 5388c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions = 5398c2ecf20Sopenharmony_ci range->mm->notifier_subscriptions; 5408c2ecf20Sopenharmony_ci int ret; 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci if (subscriptions->has_itree) { 5438c2ecf20Sopenharmony_ci ret = mn_itree_invalidate(subscriptions, range); 5448c2ecf20Sopenharmony_ci if (ret) 5458c2ecf20Sopenharmony_ci return ret; 5468c2ecf20Sopenharmony_ci } 5478c2ecf20Sopenharmony_ci if (!hlist_empty(&subscriptions->list)) 5488c2ecf20Sopenharmony_ci return mn_hlist_invalidate_range_start(subscriptions, range); 5498c2ecf20Sopenharmony_ci return 0; 5508c2ecf20Sopenharmony_ci} 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_cistatic void 5538c2ecf20Sopenharmony_cimn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions, 5548c2ecf20Sopenharmony_ci struct mmu_notifier_range *range, bool only_end) 5558c2ecf20Sopenharmony_ci{ 5568c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 5578c2ecf20Sopenharmony_ci int id; 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 5608c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, 5618c2ecf20Sopenharmony_ci srcu_read_lock_held(&srcu)) { 5628c2ecf20Sopenharmony_ci /* 5638c2ecf20Sopenharmony_ci * Call invalidate_range here too to avoid the need for the 5648c2ecf20Sopenharmony_ci * subsystem of having to register an invalidate_range_end 5658c2ecf20Sopenharmony_ci * call-back when there is invalidate_range already. Usually a 5668c2ecf20Sopenharmony_ci * subsystem registers either invalidate_range_start()/end() or 5678c2ecf20Sopenharmony_ci * invalidate_range(), so this will be no additional overhead 5688c2ecf20Sopenharmony_ci * (besides the pointer check). 5698c2ecf20Sopenharmony_ci * 5708c2ecf20Sopenharmony_ci * We skip call to invalidate_range() if we know it is safe ie 5718c2ecf20Sopenharmony_ci * call site use mmu_notifier_invalidate_range_only_end() which 5728c2ecf20Sopenharmony_ci * is safe to do when we know that a call to invalidate_range() 5738c2ecf20Sopenharmony_ci * already happen under page table lock. 5748c2ecf20Sopenharmony_ci */ 5758c2ecf20Sopenharmony_ci if (!only_end && subscription->ops->invalidate_range) 5768c2ecf20Sopenharmony_ci subscription->ops->invalidate_range(subscription, 5778c2ecf20Sopenharmony_ci range->mm, 5788c2ecf20Sopenharmony_ci range->start, 5798c2ecf20Sopenharmony_ci range->end); 5808c2ecf20Sopenharmony_ci if (subscription->ops->invalidate_range_end) { 5818c2ecf20Sopenharmony_ci if (!mmu_notifier_range_blockable(range)) 5828c2ecf20Sopenharmony_ci non_block_start(); 5838c2ecf20Sopenharmony_ci subscription->ops->invalidate_range_end(subscription, 5848c2ecf20Sopenharmony_ci range); 5858c2ecf20Sopenharmony_ci if (!mmu_notifier_range_blockable(range)) 5868c2ecf20Sopenharmony_ci non_block_end(); 5878c2ecf20Sopenharmony_ci } 5888c2ecf20Sopenharmony_ci } 5898c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 5908c2ecf20Sopenharmony_ci} 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_civoid __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range, 5938c2ecf20Sopenharmony_ci bool only_end) 5948c2ecf20Sopenharmony_ci{ 5958c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions = 5968c2ecf20Sopenharmony_ci range->mm->notifier_subscriptions; 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); 5998c2ecf20Sopenharmony_ci if (subscriptions->has_itree) 6008c2ecf20Sopenharmony_ci mn_itree_inv_end(subscriptions); 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci if (!hlist_empty(&subscriptions->list)) 6038c2ecf20Sopenharmony_ci mn_hlist_invalidate_end(subscriptions, range, only_end); 6048c2ecf20Sopenharmony_ci lock_map_release(&__mmu_notifier_invalidate_range_start_map); 6058c2ecf20Sopenharmony_ci} 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_civoid __mmu_notifier_invalidate_range(struct mm_struct *mm, 6088c2ecf20Sopenharmony_ci unsigned long start, unsigned long end) 6098c2ecf20Sopenharmony_ci{ 6108c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 6118c2ecf20Sopenharmony_ci int id; 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 6148c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, 6158c2ecf20Sopenharmony_ci &mm->notifier_subscriptions->list, hlist, 6168c2ecf20Sopenharmony_ci srcu_read_lock_held(&srcu)) { 6178c2ecf20Sopenharmony_ci if (subscription->ops->invalidate_range) 6188c2ecf20Sopenharmony_ci subscription->ops->invalidate_range(subscription, mm, 6198c2ecf20Sopenharmony_ci start, end); 6208c2ecf20Sopenharmony_ci } 6218c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 6228c2ecf20Sopenharmony_ci} 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci/* 6258c2ecf20Sopenharmony_ci * Same as mmu_notifier_register but here the caller must hold the mmap_lock in 6268c2ecf20Sopenharmony_ci * write mode. A NULL mn signals the notifier is being registered for itree 6278c2ecf20Sopenharmony_ci * mode. 6288c2ecf20Sopenharmony_ci */ 6298c2ecf20Sopenharmony_ciint __mmu_notifier_register(struct mmu_notifier *subscription, 6308c2ecf20Sopenharmony_ci struct mm_struct *mm) 6318c2ecf20Sopenharmony_ci{ 6328c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions = NULL; 6338c2ecf20Sopenharmony_ci int ret; 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci mmap_assert_write_locked(mm); 6368c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&mm->mm_users) <= 0); 6378c2ecf20Sopenharmony_ci 6388c2ecf20Sopenharmony_ci if (IS_ENABLED(CONFIG_LOCKDEP)) { 6398c2ecf20Sopenharmony_ci fs_reclaim_acquire(GFP_KERNEL); 6408c2ecf20Sopenharmony_ci lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); 6418c2ecf20Sopenharmony_ci lock_map_release(&__mmu_notifier_invalidate_range_start_map); 6428c2ecf20Sopenharmony_ci fs_reclaim_release(GFP_KERNEL); 6438c2ecf20Sopenharmony_ci } 6448c2ecf20Sopenharmony_ci 6458c2ecf20Sopenharmony_ci if (!mm->notifier_subscriptions) { 6468c2ecf20Sopenharmony_ci /* 6478c2ecf20Sopenharmony_ci * kmalloc cannot be called under mm_take_all_locks(), but we 6488c2ecf20Sopenharmony_ci * know that mm->notifier_subscriptions can't change while we 6498c2ecf20Sopenharmony_ci * hold the write side of the mmap_lock. 6508c2ecf20Sopenharmony_ci */ 6518c2ecf20Sopenharmony_ci subscriptions = kzalloc( 6528c2ecf20Sopenharmony_ci sizeof(struct mmu_notifier_subscriptions), GFP_KERNEL); 6538c2ecf20Sopenharmony_ci if (!subscriptions) 6548c2ecf20Sopenharmony_ci return -ENOMEM; 6558c2ecf20Sopenharmony_ci 6568c2ecf20Sopenharmony_ci INIT_HLIST_HEAD(&subscriptions->list); 6578c2ecf20Sopenharmony_ci spin_lock_init(&subscriptions->lock); 6588c2ecf20Sopenharmony_ci subscriptions->invalidate_seq = 2; 6598c2ecf20Sopenharmony_ci subscriptions->itree = RB_ROOT_CACHED; 6608c2ecf20Sopenharmony_ci init_waitqueue_head(&subscriptions->wq); 6618c2ecf20Sopenharmony_ci INIT_HLIST_HEAD(&subscriptions->deferred_list); 6628c2ecf20Sopenharmony_ci } 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci ret = mm_take_all_locks(mm); 6658c2ecf20Sopenharmony_ci if (unlikely(ret)) 6668c2ecf20Sopenharmony_ci goto out_clean; 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci /* 6698c2ecf20Sopenharmony_ci * Serialize the update against mmu_notifier_unregister. A 6708c2ecf20Sopenharmony_ci * side note: mmu_notifier_release can't run concurrently with 6718c2ecf20Sopenharmony_ci * us because we hold the mm_users pin (either implicitly as 6728c2ecf20Sopenharmony_ci * current->mm or explicitly with get_task_mm() or similar). 6738c2ecf20Sopenharmony_ci * We can't race against any other mmu notifier method either 6748c2ecf20Sopenharmony_ci * thanks to mm_take_all_locks(). 6758c2ecf20Sopenharmony_ci * 6768c2ecf20Sopenharmony_ci * release semantics on the initialization of the 6778c2ecf20Sopenharmony_ci * mmu_notifier_subscriptions's contents are provided for unlocked 6788c2ecf20Sopenharmony_ci * readers. acquire can only be used while holding the mmgrab or 6798c2ecf20Sopenharmony_ci * mmget, and is safe because once created the 6808c2ecf20Sopenharmony_ci * mmu_notifier_subscriptions is not freed until the mm is destroyed. 6818c2ecf20Sopenharmony_ci * As above, users holding the mmap_lock or one of the 6828c2ecf20Sopenharmony_ci * mm_take_all_locks() do not need to use acquire semantics. 6838c2ecf20Sopenharmony_ci */ 6848c2ecf20Sopenharmony_ci if (subscriptions) 6858c2ecf20Sopenharmony_ci smp_store_release(&mm->notifier_subscriptions, subscriptions); 6868c2ecf20Sopenharmony_ci 6878c2ecf20Sopenharmony_ci if (subscription) { 6888c2ecf20Sopenharmony_ci /* Pairs with the mmdrop in mmu_notifier_unregister_* */ 6898c2ecf20Sopenharmony_ci mmgrab(mm); 6908c2ecf20Sopenharmony_ci subscription->mm = mm; 6918c2ecf20Sopenharmony_ci subscription->users = 1; 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_ci spin_lock(&mm->notifier_subscriptions->lock); 6948c2ecf20Sopenharmony_ci hlist_add_head_rcu(&subscription->hlist, 6958c2ecf20Sopenharmony_ci &mm->notifier_subscriptions->list); 6968c2ecf20Sopenharmony_ci spin_unlock(&mm->notifier_subscriptions->lock); 6978c2ecf20Sopenharmony_ci } else 6988c2ecf20Sopenharmony_ci mm->notifier_subscriptions->has_itree = true; 6998c2ecf20Sopenharmony_ci 7008c2ecf20Sopenharmony_ci mm_drop_all_locks(mm); 7018c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&mm->mm_users) <= 0); 7028c2ecf20Sopenharmony_ci return 0; 7038c2ecf20Sopenharmony_ci 7048c2ecf20Sopenharmony_ciout_clean: 7058c2ecf20Sopenharmony_ci kfree(subscriptions); 7068c2ecf20Sopenharmony_ci return ret; 7078c2ecf20Sopenharmony_ci} 7088c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__mmu_notifier_register); 7098c2ecf20Sopenharmony_ci 7108c2ecf20Sopenharmony_ci/** 7118c2ecf20Sopenharmony_ci * mmu_notifier_register - Register a notifier on a mm 7128c2ecf20Sopenharmony_ci * @subscription: The notifier to attach 7138c2ecf20Sopenharmony_ci * @mm: The mm to attach the notifier to 7148c2ecf20Sopenharmony_ci * 7158c2ecf20Sopenharmony_ci * Must not hold mmap_lock nor any other VM related lock when calling 7168c2ecf20Sopenharmony_ci * this registration function. Must also ensure mm_users can't go down 7178c2ecf20Sopenharmony_ci * to zero while this runs to avoid races with mmu_notifier_release, 7188c2ecf20Sopenharmony_ci * so mm has to be current->mm or the mm should be pinned safely such 7198c2ecf20Sopenharmony_ci * as with get_task_mm(). If the mm is not current->mm, the mm_users 7208c2ecf20Sopenharmony_ci * pin should be released by calling mmput after mmu_notifier_register 7218c2ecf20Sopenharmony_ci * returns. 7228c2ecf20Sopenharmony_ci * 7238c2ecf20Sopenharmony_ci * mmu_notifier_unregister() or mmu_notifier_put() must be always called to 7248c2ecf20Sopenharmony_ci * unregister the notifier. 7258c2ecf20Sopenharmony_ci * 7268c2ecf20Sopenharmony_ci * While the caller has a mmu_notifier get the subscription->mm pointer will remain 7278c2ecf20Sopenharmony_ci * valid, and can be converted to an active mm pointer via mmget_not_zero(). 7288c2ecf20Sopenharmony_ci */ 7298c2ecf20Sopenharmony_ciint mmu_notifier_register(struct mmu_notifier *subscription, 7308c2ecf20Sopenharmony_ci struct mm_struct *mm) 7318c2ecf20Sopenharmony_ci{ 7328c2ecf20Sopenharmony_ci int ret; 7338c2ecf20Sopenharmony_ci 7348c2ecf20Sopenharmony_ci mmap_write_lock(mm); 7358c2ecf20Sopenharmony_ci ret = __mmu_notifier_register(subscription, mm); 7368c2ecf20Sopenharmony_ci mmap_write_unlock(mm); 7378c2ecf20Sopenharmony_ci return ret; 7388c2ecf20Sopenharmony_ci} 7398c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_notifier_register); 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_cistatic struct mmu_notifier * 7428c2ecf20Sopenharmony_cifind_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops) 7438c2ecf20Sopenharmony_ci{ 7448c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 7458c2ecf20Sopenharmony_ci 7468c2ecf20Sopenharmony_ci spin_lock(&mm->notifier_subscriptions->lock); 7478c2ecf20Sopenharmony_ci hlist_for_each_entry_rcu(subscription, 7488c2ecf20Sopenharmony_ci &mm->notifier_subscriptions->list, hlist, 7498c2ecf20Sopenharmony_ci lockdep_is_held(&mm->notifier_subscriptions->lock)) { 7508c2ecf20Sopenharmony_ci if (subscription->ops != ops) 7518c2ecf20Sopenharmony_ci continue; 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci if (likely(subscription->users != UINT_MAX)) 7548c2ecf20Sopenharmony_ci subscription->users++; 7558c2ecf20Sopenharmony_ci else 7568c2ecf20Sopenharmony_ci subscription = ERR_PTR(-EOVERFLOW); 7578c2ecf20Sopenharmony_ci spin_unlock(&mm->notifier_subscriptions->lock); 7588c2ecf20Sopenharmony_ci return subscription; 7598c2ecf20Sopenharmony_ci } 7608c2ecf20Sopenharmony_ci spin_unlock(&mm->notifier_subscriptions->lock); 7618c2ecf20Sopenharmony_ci return NULL; 7628c2ecf20Sopenharmony_ci} 7638c2ecf20Sopenharmony_ci 7648c2ecf20Sopenharmony_ci/** 7658c2ecf20Sopenharmony_ci * mmu_notifier_get_locked - Return the single struct mmu_notifier for 7668c2ecf20Sopenharmony_ci * the mm & ops 7678c2ecf20Sopenharmony_ci * @ops: The operations struct being subscribe with 7688c2ecf20Sopenharmony_ci * @mm : The mm to attach notifiers too 7698c2ecf20Sopenharmony_ci * 7708c2ecf20Sopenharmony_ci * This function either allocates a new mmu_notifier via 7718c2ecf20Sopenharmony_ci * ops->alloc_notifier(), or returns an already existing notifier on the 7728c2ecf20Sopenharmony_ci * list. The value of the ops pointer is used to determine when two notifiers 7738c2ecf20Sopenharmony_ci * are the same. 7748c2ecf20Sopenharmony_ci * 7758c2ecf20Sopenharmony_ci * Each call to mmu_notifier_get() must be paired with a call to 7768c2ecf20Sopenharmony_ci * mmu_notifier_put(). The caller must hold the write side of mm->mmap_lock. 7778c2ecf20Sopenharmony_ci * 7788c2ecf20Sopenharmony_ci * While the caller has a mmu_notifier get the mm pointer will remain valid, 7798c2ecf20Sopenharmony_ci * and can be converted to an active mm pointer via mmget_not_zero(). 7808c2ecf20Sopenharmony_ci */ 7818c2ecf20Sopenharmony_cistruct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, 7828c2ecf20Sopenharmony_ci struct mm_struct *mm) 7838c2ecf20Sopenharmony_ci{ 7848c2ecf20Sopenharmony_ci struct mmu_notifier *subscription; 7858c2ecf20Sopenharmony_ci int ret; 7868c2ecf20Sopenharmony_ci 7878c2ecf20Sopenharmony_ci mmap_assert_write_locked(mm); 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci if (mm->notifier_subscriptions) { 7908c2ecf20Sopenharmony_ci subscription = find_get_mmu_notifier(mm, ops); 7918c2ecf20Sopenharmony_ci if (subscription) 7928c2ecf20Sopenharmony_ci return subscription; 7938c2ecf20Sopenharmony_ci } 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci subscription = ops->alloc_notifier(mm); 7968c2ecf20Sopenharmony_ci if (IS_ERR(subscription)) 7978c2ecf20Sopenharmony_ci return subscription; 7988c2ecf20Sopenharmony_ci subscription->ops = ops; 7998c2ecf20Sopenharmony_ci ret = __mmu_notifier_register(subscription, mm); 8008c2ecf20Sopenharmony_ci if (ret) 8018c2ecf20Sopenharmony_ci goto out_free; 8028c2ecf20Sopenharmony_ci return subscription; 8038c2ecf20Sopenharmony_ciout_free: 8048c2ecf20Sopenharmony_ci subscription->ops->free_notifier(subscription); 8058c2ecf20Sopenharmony_ci return ERR_PTR(ret); 8068c2ecf20Sopenharmony_ci} 8078c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_notifier_get_locked); 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci/* this is called after the last mmu_notifier_unregister() returned */ 8108c2ecf20Sopenharmony_civoid __mmu_notifier_subscriptions_destroy(struct mm_struct *mm) 8118c2ecf20Sopenharmony_ci{ 8128c2ecf20Sopenharmony_ci BUG_ON(!hlist_empty(&mm->notifier_subscriptions->list)); 8138c2ecf20Sopenharmony_ci kfree(mm->notifier_subscriptions); 8148c2ecf20Sopenharmony_ci mm->notifier_subscriptions = LIST_POISON1; /* debug */ 8158c2ecf20Sopenharmony_ci} 8168c2ecf20Sopenharmony_ci 8178c2ecf20Sopenharmony_ci/* 8188c2ecf20Sopenharmony_ci * This releases the mm_count pin automatically and frees the mm 8198c2ecf20Sopenharmony_ci * structure if it was the last user of it. It serializes against 8208c2ecf20Sopenharmony_ci * running mmu notifiers with SRCU and against mmu_notifier_unregister 8218c2ecf20Sopenharmony_ci * with the unregister lock + SRCU. All sptes must be dropped before 8228c2ecf20Sopenharmony_ci * calling mmu_notifier_unregister. ->release or any other notifier 8238c2ecf20Sopenharmony_ci * method may be invoked concurrently with mmu_notifier_unregister, 8248c2ecf20Sopenharmony_ci * and only after mmu_notifier_unregister returned we're guaranteed 8258c2ecf20Sopenharmony_ci * that ->release or any other method can't run anymore. 8268c2ecf20Sopenharmony_ci */ 8278c2ecf20Sopenharmony_civoid mmu_notifier_unregister(struct mmu_notifier *subscription, 8288c2ecf20Sopenharmony_ci struct mm_struct *mm) 8298c2ecf20Sopenharmony_ci{ 8308c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&mm->mm_count) <= 0); 8318c2ecf20Sopenharmony_ci 8328c2ecf20Sopenharmony_ci if (!hlist_unhashed(&subscription->hlist)) { 8338c2ecf20Sopenharmony_ci /* 8348c2ecf20Sopenharmony_ci * SRCU here will force exit_mmap to wait for ->release to 8358c2ecf20Sopenharmony_ci * finish before freeing the pages. 8368c2ecf20Sopenharmony_ci */ 8378c2ecf20Sopenharmony_ci int id; 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci id = srcu_read_lock(&srcu); 8408c2ecf20Sopenharmony_ci /* 8418c2ecf20Sopenharmony_ci * exit_mmap will block in mmu_notifier_release to guarantee 8428c2ecf20Sopenharmony_ci * that ->release is called before freeing the pages. 8438c2ecf20Sopenharmony_ci */ 8448c2ecf20Sopenharmony_ci if (subscription->ops->release) 8458c2ecf20Sopenharmony_ci subscription->ops->release(subscription, mm); 8468c2ecf20Sopenharmony_ci srcu_read_unlock(&srcu, id); 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci spin_lock(&mm->notifier_subscriptions->lock); 8498c2ecf20Sopenharmony_ci /* 8508c2ecf20Sopenharmony_ci * Can not use list_del_rcu() since __mmu_notifier_release 8518c2ecf20Sopenharmony_ci * can delete it before we hold the lock. 8528c2ecf20Sopenharmony_ci */ 8538c2ecf20Sopenharmony_ci hlist_del_init_rcu(&subscription->hlist); 8548c2ecf20Sopenharmony_ci spin_unlock(&mm->notifier_subscriptions->lock); 8558c2ecf20Sopenharmony_ci } 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_ci /* 8588c2ecf20Sopenharmony_ci * Wait for any running method to finish, of course including 8598c2ecf20Sopenharmony_ci * ->release if it was run by mmu_notifier_release instead of us. 8608c2ecf20Sopenharmony_ci */ 8618c2ecf20Sopenharmony_ci synchronize_srcu(&srcu); 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ci BUG_ON(atomic_read(&mm->mm_count) <= 0); 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_ci mmdrop(mm); 8668c2ecf20Sopenharmony_ci} 8678c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_notifier_unregister); 8688c2ecf20Sopenharmony_ci 8698c2ecf20Sopenharmony_cistatic void mmu_notifier_free_rcu(struct rcu_head *rcu) 8708c2ecf20Sopenharmony_ci{ 8718c2ecf20Sopenharmony_ci struct mmu_notifier *subscription = 8728c2ecf20Sopenharmony_ci container_of(rcu, struct mmu_notifier, rcu); 8738c2ecf20Sopenharmony_ci struct mm_struct *mm = subscription->mm; 8748c2ecf20Sopenharmony_ci 8758c2ecf20Sopenharmony_ci subscription->ops->free_notifier(subscription); 8768c2ecf20Sopenharmony_ci /* Pairs with the get in __mmu_notifier_register() */ 8778c2ecf20Sopenharmony_ci mmdrop(mm); 8788c2ecf20Sopenharmony_ci} 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci/** 8818c2ecf20Sopenharmony_ci * mmu_notifier_put - Release the reference on the notifier 8828c2ecf20Sopenharmony_ci * @subscription: The notifier to act on 8838c2ecf20Sopenharmony_ci * 8848c2ecf20Sopenharmony_ci * This function must be paired with each mmu_notifier_get(), it releases the 8858c2ecf20Sopenharmony_ci * reference obtained by the get. If this is the last reference then process 8868c2ecf20Sopenharmony_ci * to free the notifier will be run asynchronously. 8878c2ecf20Sopenharmony_ci * 8888c2ecf20Sopenharmony_ci * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release 8898c2ecf20Sopenharmony_ci * when the mm_struct is destroyed. Instead free_notifier is always called to 8908c2ecf20Sopenharmony_ci * release any resources held by the user. 8918c2ecf20Sopenharmony_ci * 8928c2ecf20Sopenharmony_ci * As ops->release is not guaranteed to be called, the user must ensure that 8938c2ecf20Sopenharmony_ci * all sptes are dropped, and no new sptes can be established before 8948c2ecf20Sopenharmony_ci * mmu_notifier_put() is called. 8958c2ecf20Sopenharmony_ci * 8968c2ecf20Sopenharmony_ci * This function can be called from the ops->release callback, however the 8978c2ecf20Sopenharmony_ci * caller must still ensure it is called pairwise with mmu_notifier_get(). 8988c2ecf20Sopenharmony_ci * 8998c2ecf20Sopenharmony_ci * Modules calling this function must call mmu_notifier_synchronize() in 9008c2ecf20Sopenharmony_ci * their __exit functions to ensure the async work is completed. 9018c2ecf20Sopenharmony_ci */ 9028c2ecf20Sopenharmony_civoid mmu_notifier_put(struct mmu_notifier *subscription) 9038c2ecf20Sopenharmony_ci{ 9048c2ecf20Sopenharmony_ci struct mm_struct *mm = subscription->mm; 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci spin_lock(&mm->notifier_subscriptions->lock); 9078c2ecf20Sopenharmony_ci if (WARN_ON(!subscription->users) || --subscription->users) 9088c2ecf20Sopenharmony_ci goto out_unlock; 9098c2ecf20Sopenharmony_ci hlist_del_init_rcu(&subscription->hlist); 9108c2ecf20Sopenharmony_ci spin_unlock(&mm->notifier_subscriptions->lock); 9118c2ecf20Sopenharmony_ci 9128c2ecf20Sopenharmony_ci call_srcu(&srcu, &subscription->rcu, mmu_notifier_free_rcu); 9138c2ecf20Sopenharmony_ci return; 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ciout_unlock: 9168c2ecf20Sopenharmony_ci spin_unlock(&mm->notifier_subscriptions->lock); 9178c2ecf20Sopenharmony_ci} 9188c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_notifier_put); 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_cistatic int __mmu_interval_notifier_insert( 9218c2ecf20Sopenharmony_ci struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, 9228c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions, unsigned long start, 9238c2ecf20Sopenharmony_ci unsigned long length, const struct mmu_interval_notifier_ops *ops) 9248c2ecf20Sopenharmony_ci{ 9258c2ecf20Sopenharmony_ci interval_sub->mm = mm; 9268c2ecf20Sopenharmony_ci interval_sub->ops = ops; 9278c2ecf20Sopenharmony_ci RB_CLEAR_NODE(&interval_sub->interval_tree.rb); 9288c2ecf20Sopenharmony_ci interval_sub->interval_tree.start = start; 9298c2ecf20Sopenharmony_ci /* 9308c2ecf20Sopenharmony_ci * Note that the representation of the intervals in the interval tree 9318c2ecf20Sopenharmony_ci * considers the ending point as contained in the interval. 9328c2ecf20Sopenharmony_ci */ 9338c2ecf20Sopenharmony_ci if (length == 0 || 9348c2ecf20Sopenharmony_ci check_add_overflow(start, length - 1, 9358c2ecf20Sopenharmony_ci &interval_sub->interval_tree.last)) 9368c2ecf20Sopenharmony_ci return -EOVERFLOW; 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci /* Must call with a mmget() held */ 9398c2ecf20Sopenharmony_ci if (WARN_ON(atomic_read(&mm->mm_users) <= 0)) 9408c2ecf20Sopenharmony_ci return -EINVAL; 9418c2ecf20Sopenharmony_ci 9428c2ecf20Sopenharmony_ci /* pairs with mmdrop in mmu_interval_notifier_remove() */ 9438c2ecf20Sopenharmony_ci mmgrab(mm); 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_ci /* 9468c2ecf20Sopenharmony_ci * If some invalidate_range_start/end region is going on in parallel 9478c2ecf20Sopenharmony_ci * we don't know what VA ranges are affected, so we must assume this 9488c2ecf20Sopenharmony_ci * new range is included. 9498c2ecf20Sopenharmony_ci * 9508c2ecf20Sopenharmony_ci * If the itree is invalidating then we are not allowed to change 9518c2ecf20Sopenharmony_ci * it. Retrying until invalidation is done is tricky due to the 9528c2ecf20Sopenharmony_ci * possibility for live lock, instead defer the add to 9538c2ecf20Sopenharmony_ci * mn_itree_inv_end() so this algorithm is deterministic. 9548c2ecf20Sopenharmony_ci * 9558c2ecf20Sopenharmony_ci * In all cases the value for the interval_sub->invalidate_seq should be 9568c2ecf20Sopenharmony_ci * odd, see mmu_interval_read_begin() 9578c2ecf20Sopenharmony_ci */ 9588c2ecf20Sopenharmony_ci spin_lock(&subscriptions->lock); 9598c2ecf20Sopenharmony_ci if (subscriptions->active_invalidate_ranges) { 9608c2ecf20Sopenharmony_ci if (mn_itree_is_invalidating(subscriptions)) 9618c2ecf20Sopenharmony_ci hlist_add_head(&interval_sub->deferred_item, 9628c2ecf20Sopenharmony_ci &subscriptions->deferred_list); 9638c2ecf20Sopenharmony_ci else { 9648c2ecf20Sopenharmony_ci subscriptions->invalidate_seq |= 1; 9658c2ecf20Sopenharmony_ci interval_tree_insert(&interval_sub->interval_tree, 9668c2ecf20Sopenharmony_ci &subscriptions->itree); 9678c2ecf20Sopenharmony_ci } 9688c2ecf20Sopenharmony_ci interval_sub->invalidate_seq = subscriptions->invalidate_seq; 9698c2ecf20Sopenharmony_ci } else { 9708c2ecf20Sopenharmony_ci WARN_ON(mn_itree_is_invalidating(subscriptions)); 9718c2ecf20Sopenharmony_ci /* 9728c2ecf20Sopenharmony_ci * The starting seq for a subscription not under invalidation 9738c2ecf20Sopenharmony_ci * should be odd, not equal to the current invalidate_seq and 9748c2ecf20Sopenharmony_ci * invalidate_seq should not 'wrap' to the new seq any time 9758c2ecf20Sopenharmony_ci * soon. 9768c2ecf20Sopenharmony_ci */ 9778c2ecf20Sopenharmony_ci interval_sub->invalidate_seq = 9788c2ecf20Sopenharmony_ci subscriptions->invalidate_seq - 1; 9798c2ecf20Sopenharmony_ci interval_tree_insert(&interval_sub->interval_tree, 9808c2ecf20Sopenharmony_ci &subscriptions->itree); 9818c2ecf20Sopenharmony_ci } 9828c2ecf20Sopenharmony_ci spin_unlock(&subscriptions->lock); 9838c2ecf20Sopenharmony_ci return 0; 9848c2ecf20Sopenharmony_ci} 9858c2ecf20Sopenharmony_ci 9868c2ecf20Sopenharmony_ci/** 9878c2ecf20Sopenharmony_ci * mmu_interval_notifier_insert - Insert an interval notifier 9888c2ecf20Sopenharmony_ci * @interval_sub: Interval subscription to register 9898c2ecf20Sopenharmony_ci * @start: Starting virtual address to monitor 9908c2ecf20Sopenharmony_ci * @length: Length of the range to monitor 9918c2ecf20Sopenharmony_ci * @mm: mm_struct to attach to 9928c2ecf20Sopenharmony_ci * @ops: Interval notifier operations to be called on matching events 9938c2ecf20Sopenharmony_ci * 9948c2ecf20Sopenharmony_ci * This function subscribes the interval notifier for notifications from the 9958c2ecf20Sopenharmony_ci * mm. Upon return the ops related to mmu_interval_notifier will be called 9968c2ecf20Sopenharmony_ci * whenever an event that intersects with the given range occurs. 9978c2ecf20Sopenharmony_ci * 9988c2ecf20Sopenharmony_ci * Upon return the range_notifier may not be present in the interval tree yet. 9998c2ecf20Sopenharmony_ci * The caller must use the normal interval notifier read flow via 10008c2ecf20Sopenharmony_ci * mmu_interval_read_begin() to establish SPTEs for this range. 10018c2ecf20Sopenharmony_ci */ 10028c2ecf20Sopenharmony_ciint mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub, 10038c2ecf20Sopenharmony_ci struct mm_struct *mm, unsigned long start, 10048c2ecf20Sopenharmony_ci unsigned long length, 10058c2ecf20Sopenharmony_ci const struct mmu_interval_notifier_ops *ops) 10068c2ecf20Sopenharmony_ci{ 10078c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions; 10088c2ecf20Sopenharmony_ci int ret; 10098c2ecf20Sopenharmony_ci 10108c2ecf20Sopenharmony_ci might_lock(&mm->mmap_lock); 10118c2ecf20Sopenharmony_ci 10128c2ecf20Sopenharmony_ci subscriptions = smp_load_acquire(&mm->notifier_subscriptions); 10138c2ecf20Sopenharmony_ci if (!subscriptions || !subscriptions->has_itree) { 10148c2ecf20Sopenharmony_ci ret = mmu_notifier_register(NULL, mm); 10158c2ecf20Sopenharmony_ci if (ret) 10168c2ecf20Sopenharmony_ci return ret; 10178c2ecf20Sopenharmony_ci subscriptions = mm->notifier_subscriptions; 10188c2ecf20Sopenharmony_ci } 10198c2ecf20Sopenharmony_ci return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, 10208c2ecf20Sopenharmony_ci start, length, ops); 10218c2ecf20Sopenharmony_ci} 10228c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_interval_notifier_insert); 10238c2ecf20Sopenharmony_ci 10248c2ecf20Sopenharmony_ciint mmu_interval_notifier_insert_locked( 10258c2ecf20Sopenharmony_ci struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, 10268c2ecf20Sopenharmony_ci unsigned long start, unsigned long length, 10278c2ecf20Sopenharmony_ci const struct mmu_interval_notifier_ops *ops) 10288c2ecf20Sopenharmony_ci{ 10298c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions = 10308c2ecf20Sopenharmony_ci mm->notifier_subscriptions; 10318c2ecf20Sopenharmony_ci int ret; 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_ci mmap_assert_write_locked(mm); 10348c2ecf20Sopenharmony_ci 10358c2ecf20Sopenharmony_ci if (!subscriptions || !subscriptions->has_itree) { 10368c2ecf20Sopenharmony_ci ret = __mmu_notifier_register(NULL, mm); 10378c2ecf20Sopenharmony_ci if (ret) 10388c2ecf20Sopenharmony_ci return ret; 10398c2ecf20Sopenharmony_ci subscriptions = mm->notifier_subscriptions; 10408c2ecf20Sopenharmony_ci } 10418c2ecf20Sopenharmony_ci return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, 10428c2ecf20Sopenharmony_ci start, length, ops); 10438c2ecf20Sopenharmony_ci} 10448c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked); 10458c2ecf20Sopenharmony_ci 10468c2ecf20Sopenharmony_cistatic bool 10478c2ecf20Sopenharmony_cimmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions, 10488c2ecf20Sopenharmony_ci unsigned long seq) 10498c2ecf20Sopenharmony_ci{ 10508c2ecf20Sopenharmony_ci bool ret; 10518c2ecf20Sopenharmony_ci 10528c2ecf20Sopenharmony_ci spin_lock(&subscriptions->lock); 10538c2ecf20Sopenharmony_ci ret = subscriptions->invalidate_seq != seq; 10548c2ecf20Sopenharmony_ci spin_unlock(&subscriptions->lock); 10558c2ecf20Sopenharmony_ci return ret; 10568c2ecf20Sopenharmony_ci} 10578c2ecf20Sopenharmony_ci 10588c2ecf20Sopenharmony_ci/** 10598c2ecf20Sopenharmony_ci * mmu_interval_notifier_remove - Remove a interval notifier 10608c2ecf20Sopenharmony_ci * @interval_sub: Interval subscription to unregister 10618c2ecf20Sopenharmony_ci * 10628c2ecf20Sopenharmony_ci * This function must be paired with mmu_interval_notifier_insert(). It cannot 10638c2ecf20Sopenharmony_ci * be called from any ops callback. 10648c2ecf20Sopenharmony_ci * 10658c2ecf20Sopenharmony_ci * Once this returns ops callbacks are no longer running on other CPUs and 10668c2ecf20Sopenharmony_ci * will not be called in future. 10678c2ecf20Sopenharmony_ci */ 10688c2ecf20Sopenharmony_civoid mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub) 10698c2ecf20Sopenharmony_ci{ 10708c2ecf20Sopenharmony_ci struct mm_struct *mm = interval_sub->mm; 10718c2ecf20Sopenharmony_ci struct mmu_notifier_subscriptions *subscriptions = 10728c2ecf20Sopenharmony_ci mm->notifier_subscriptions; 10738c2ecf20Sopenharmony_ci unsigned long seq = 0; 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_ci might_sleep(); 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_ci spin_lock(&subscriptions->lock); 10788c2ecf20Sopenharmony_ci if (mn_itree_is_invalidating(subscriptions)) { 10798c2ecf20Sopenharmony_ci /* 10808c2ecf20Sopenharmony_ci * remove is being called after insert put this on the 10818c2ecf20Sopenharmony_ci * deferred list, but before the deferred list was processed. 10828c2ecf20Sopenharmony_ci */ 10838c2ecf20Sopenharmony_ci if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) { 10848c2ecf20Sopenharmony_ci hlist_del(&interval_sub->deferred_item); 10858c2ecf20Sopenharmony_ci } else { 10868c2ecf20Sopenharmony_ci hlist_add_head(&interval_sub->deferred_item, 10878c2ecf20Sopenharmony_ci &subscriptions->deferred_list); 10888c2ecf20Sopenharmony_ci seq = subscriptions->invalidate_seq; 10898c2ecf20Sopenharmony_ci } 10908c2ecf20Sopenharmony_ci } else { 10918c2ecf20Sopenharmony_ci WARN_ON(RB_EMPTY_NODE(&interval_sub->interval_tree.rb)); 10928c2ecf20Sopenharmony_ci interval_tree_remove(&interval_sub->interval_tree, 10938c2ecf20Sopenharmony_ci &subscriptions->itree); 10948c2ecf20Sopenharmony_ci } 10958c2ecf20Sopenharmony_ci spin_unlock(&subscriptions->lock); 10968c2ecf20Sopenharmony_ci 10978c2ecf20Sopenharmony_ci /* 10988c2ecf20Sopenharmony_ci * The possible sleep on progress in the invalidation requires the 10998c2ecf20Sopenharmony_ci * caller not hold any locks held by invalidation callbacks. 11008c2ecf20Sopenharmony_ci */ 11018c2ecf20Sopenharmony_ci lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); 11028c2ecf20Sopenharmony_ci lock_map_release(&__mmu_notifier_invalidate_range_start_map); 11038c2ecf20Sopenharmony_ci if (seq) 11048c2ecf20Sopenharmony_ci wait_event(subscriptions->wq, 11058c2ecf20Sopenharmony_ci mmu_interval_seq_released(subscriptions, seq)); 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci /* pairs with mmgrab in mmu_interval_notifier_insert() */ 11088c2ecf20Sopenharmony_ci mmdrop(mm); 11098c2ecf20Sopenharmony_ci} 11108c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_interval_notifier_remove); 11118c2ecf20Sopenharmony_ci 11128c2ecf20Sopenharmony_ci/** 11138c2ecf20Sopenharmony_ci * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed 11148c2ecf20Sopenharmony_ci * 11158c2ecf20Sopenharmony_ci * This function ensures that all outstanding async SRU work from 11168c2ecf20Sopenharmony_ci * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops 11178c2ecf20Sopenharmony_ci * associated with an unused mmu_notifier will no longer be called. 11188c2ecf20Sopenharmony_ci * 11198c2ecf20Sopenharmony_ci * Before using the caller must ensure that all of its mmu_notifiers have been 11208c2ecf20Sopenharmony_ci * fully released via mmu_notifier_put(). 11218c2ecf20Sopenharmony_ci * 11228c2ecf20Sopenharmony_ci * Modules using the mmu_notifier_put() API should call this in their __exit 11238c2ecf20Sopenharmony_ci * function to avoid module unloading races. 11248c2ecf20Sopenharmony_ci */ 11258c2ecf20Sopenharmony_civoid mmu_notifier_synchronize(void) 11268c2ecf20Sopenharmony_ci{ 11278c2ecf20Sopenharmony_ci synchronize_srcu(&srcu); 11288c2ecf20Sopenharmony_ci} 11298c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_notifier_synchronize); 11308c2ecf20Sopenharmony_ci 11318c2ecf20Sopenharmony_cibool 11328c2ecf20Sopenharmony_cimmu_notifier_range_update_to_read_only(const struct mmu_notifier_range *range) 11338c2ecf20Sopenharmony_ci{ 11348c2ecf20Sopenharmony_ci if (!range->vma || range->event != MMU_NOTIFY_PROTECTION_VMA) 11358c2ecf20Sopenharmony_ci return false; 11368c2ecf20Sopenharmony_ci /* Return true if the vma still have the read flag set. */ 11378c2ecf20Sopenharmony_ci return range->vma->vm_flags & VM_READ; 11388c2ecf20Sopenharmony_ci} 11398c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(mmu_notifier_range_update_to_read_only); 1140