18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * dax: direct host memory access 48c2ecf20Sopenharmony_ci * Copyright (C) 2020 Red Hat, Inc. 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include "fuse_i.h" 88c2ecf20Sopenharmony_ci 98c2ecf20Sopenharmony_ci#include <linux/delay.h> 108c2ecf20Sopenharmony_ci#include <linux/dax.h> 118c2ecf20Sopenharmony_ci#include <linux/uio.h> 128c2ecf20Sopenharmony_ci#include <linux/pfn_t.h> 138c2ecf20Sopenharmony_ci#include <linux/iomap.h> 148c2ecf20Sopenharmony_ci#include <linux/interval_tree.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci/* 178c2ecf20Sopenharmony_ci * Default memory range size. A power of 2 so it agrees with common FUSE_INIT 188c2ecf20Sopenharmony_ci * map_alignment values 4KB and 64KB. 198c2ecf20Sopenharmony_ci */ 208c2ecf20Sopenharmony_ci#define FUSE_DAX_SHIFT 21 218c2ecf20Sopenharmony_ci#define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT) 228c2ecf20Sopenharmony_ci#define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE) 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci/* Number of ranges reclaimer will try to free in one invocation */ 258c2ecf20Sopenharmony_ci#define FUSE_DAX_RECLAIM_CHUNK (10) 268c2ecf20Sopenharmony_ci 278c2ecf20Sopenharmony_ci/* 288c2ecf20Sopenharmony_ci * Dax memory reclaim threshold in percetage of total ranges. When free 298c2ecf20Sopenharmony_ci * number of free ranges drops below this threshold, reclaim can trigger 308c2ecf20Sopenharmony_ci * Default is 20% 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_ci#define FUSE_DAX_RECLAIM_THRESHOLD (20) 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci/** Translation information for file offsets to DAX window offsets */ 358c2ecf20Sopenharmony_cistruct fuse_dax_mapping { 368c2ecf20Sopenharmony_ci /* Pointer to inode where this memory range is mapped */ 378c2ecf20Sopenharmony_ci struct inode *inode; 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci /* Will connect in fcd->free_ranges to keep track of free memory */ 408c2ecf20Sopenharmony_ci struct list_head list; 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci /* For interval tree in file/inode */ 438c2ecf20Sopenharmony_ci struct interval_tree_node itn; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci /* Will connect in fc->busy_ranges to keep track busy memory */ 468c2ecf20Sopenharmony_ci struct list_head busy_list; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci /** Position in DAX window */ 498c2ecf20Sopenharmony_ci u64 window_offset; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci /** Length of mapping, in bytes */ 528c2ecf20Sopenharmony_ci loff_t length; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci /* Is this mapping read-only or read-write */ 558c2ecf20Sopenharmony_ci bool writable; 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci /* reference count when the mapping is used by dax iomap. */ 588c2ecf20Sopenharmony_ci refcount_t refcnt; 598c2ecf20Sopenharmony_ci}; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci/* Per-inode dax map */ 628c2ecf20Sopenharmony_cistruct fuse_inode_dax { 638c2ecf20Sopenharmony_ci /* Semaphore to protect modifications to the dmap tree */ 648c2ecf20Sopenharmony_ci struct rw_semaphore sem; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci /* Sorted rb tree of struct fuse_dax_mapping elements */ 678c2ecf20Sopenharmony_ci struct rb_root_cached tree; 688c2ecf20Sopenharmony_ci unsigned long nr; 698c2ecf20Sopenharmony_ci}; 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_cistruct fuse_conn_dax { 728c2ecf20Sopenharmony_ci /* DAX device */ 738c2ecf20Sopenharmony_ci struct dax_device *dev; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci /* Lock protecting accessess to members of this structure */ 768c2ecf20Sopenharmony_ci spinlock_t lock; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci /* List of memory ranges which are busy */ 798c2ecf20Sopenharmony_ci unsigned long nr_busy_ranges; 808c2ecf20Sopenharmony_ci struct list_head busy_ranges; 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci /* Worker to free up memory ranges */ 838c2ecf20Sopenharmony_ci struct delayed_work free_work; 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_ci /* Wait queue for a dax range to become free */ 868c2ecf20Sopenharmony_ci wait_queue_head_t range_waitq; 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci /* DAX Window Free Ranges */ 898c2ecf20Sopenharmony_ci long nr_free_ranges; 908c2ecf20Sopenharmony_ci struct list_head free_ranges; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci unsigned long nr_ranges; 938c2ecf20Sopenharmony_ci}; 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_cistatic inline struct fuse_dax_mapping * 968c2ecf20Sopenharmony_cinode_to_dmap(struct interval_tree_node *node) 978c2ecf20Sopenharmony_ci{ 988c2ecf20Sopenharmony_ci if (!node) 998c2ecf20Sopenharmony_ci return NULL; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci return container_of(node, struct fuse_dax_mapping, itn); 1028c2ecf20Sopenharmony_ci} 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_cistatic struct fuse_dax_mapping * 1058c2ecf20Sopenharmony_cialloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode); 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_cistatic void 1088c2ecf20Sopenharmony_ci__kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) 1098c2ecf20Sopenharmony_ci{ 1108c2ecf20Sopenharmony_ci unsigned long free_threshold; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci /* If number of free ranges are below threshold, start reclaim */ 1138c2ecf20Sopenharmony_ci free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, 1148c2ecf20Sopenharmony_ci 1); 1158c2ecf20Sopenharmony_ci if (fcd->nr_free_ranges < free_threshold) 1168c2ecf20Sopenharmony_ci queue_delayed_work(system_long_wq, &fcd->free_work, 1178c2ecf20Sopenharmony_ci msecs_to_jiffies(delay_ms)); 1188c2ecf20Sopenharmony_ci} 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_cistatic void kick_dmap_free_worker(struct fuse_conn_dax *fcd, 1218c2ecf20Sopenharmony_ci unsigned long delay_ms) 1228c2ecf20Sopenharmony_ci{ 1238c2ecf20Sopenharmony_ci spin_lock(&fcd->lock); 1248c2ecf20Sopenharmony_ci __kick_dmap_free_worker(fcd, delay_ms); 1258c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 1268c2ecf20Sopenharmony_ci} 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_cistatic struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd) 1298c2ecf20Sopenharmony_ci{ 1308c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap; 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci spin_lock(&fcd->lock); 1338c2ecf20Sopenharmony_ci dmap = list_first_entry_or_null(&fcd->free_ranges, 1348c2ecf20Sopenharmony_ci struct fuse_dax_mapping, list); 1358c2ecf20Sopenharmony_ci if (dmap) { 1368c2ecf20Sopenharmony_ci list_del_init(&dmap->list); 1378c2ecf20Sopenharmony_ci WARN_ON(fcd->nr_free_ranges <= 0); 1388c2ecf20Sopenharmony_ci fcd->nr_free_ranges--; 1398c2ecf20Sopenharmony_ci } 1408c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci kick_dmap_free_worker(fcd, 0); 1438c2ecf20Sopenharmony_ci return dmap; 1448c2ecf20Sopenharmony_ci} 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci/* This assumes fcd->lock is held */ 1478c2ecf20Sopenharmony_cistatic void __dmap_remove_busy_list(struct fuse_conn_dax *fcd, 1488c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap) 1498c2ecf20Sopenharmony_ci{ 1508c2ecf20Sopenharmony_ci list_del_init(&dmap->busy_list); 1518c2ecf20Sopenharmony_ci WARN_ON(fcd->nr_busy_ranges == 0); 1528c2ecf20Sopenharmony_ci fcd->nr_busy_ranges--; 1538c2ecf20Sopenharmony_ci} 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_cistatic void dmap_remove_busy_list(struct fuse_conn_dax *fcd, 1568c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap) 1578c2ecf20Sopenharmony_ci{ 1588c2ecf20Sopenharmony_ci spin_lock(&fcd->lock); 1598c2ecf20Sopenharmony_ci __dmap_remove_busy_list(fcd, dmap); 1608c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 1618c2ecf20Sopenharmony_ci} 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci/* This assumes fcd->lock is held */ 1648c2ecf20Sopenharmony_cistatic void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd, 1658c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap) 1668c2ecf20Sopenharmony_ci{ 1678c2ecf20Sopenharmony_ci list_add_tail(&dmap->list, &fcd->free_ranges); 1688c2ecf20Sopenharmony_ci fcd->nr_free_ranges++; 1698c2ecf20Sopenharmony_ci wake_up(&fcd->range_waitq); 1708c2ecf20Sopenharmony_ci} 1718c2ecf20Sopenharmony_ci 1728c2ecf20Sopenharmony_cistatic void dmap_add_to_free_pool(struct fuse_conn_dax *fcd, 1738c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap) 1748c2ecf20Sopenharmony_ci{ 1758c2ecf20Sopenharmony_ci /* Return fuse_dax_mapping to free list */ 1768c2ecf20Sopenharmony_ci spin_lock(&fcd->lock); 1778c2ecf20Sopenharmony_ci __dmap_add_to_free_pool(fcd, dmap); 1788c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 1798c2ecf20Sopenharmony_ci} 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_cistatic int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx, 1828c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap, bool writable, 1838c2ecf20Sopenharmony_ci bool upgrade) 1848c2ecf20Sopenharmony_ci{ 1858c2ecf20Sopenharmony_ci struct fuse_mount *fm = get_fuse_mount(inode); 1868c2ecf20Sopenharmony_ci struct fuse_conn_dax *fcd = fm->fc->dax; 1878c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 1888c2ecf20Sopenharmony_ci struct fuse_setupmapping_in inarg; 1898c2ecf20Sopenharmony_ci loff_t offset = start_idx << FUSE_DAX_SHIFT; 1908c2ecf20Sopenharmony_ci FUSE_ARGS(args); 1918c2ecf20Sopenharmony_ci ssize_t err; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci WARN_ON(fcd->nr_free_ranges < 0); 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci /* Ask fuse daemon to setup mapping */ 1968c2ecf20Sopenharmony_ci memset(&inarg, 0, sizeof(inarg)); 1978c2ecf20Sopenharmony_ci inarg.foffset = offset; 1988c2ecf20Sopenharmony_ci inarg.fh = -1; 1998c2ecf20Sopenharmony_ci inarg.moffset = dmap->window_offset; 2008c2ecf20Sopenharmony_ci inarg.len = FUSE_DAX_SZ; 2018c2ecf20Sopenharmony_ci inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; 2028c2ecf20Sopenharmony_ci if (writable) 2038c2ecf20Sopenharmony_ci inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; 2048c2ecf20Sopenharmony_ci args.opcode = FUSE_SETUPMAPPING; 2058c2ecf20Sopenharmony_ci args.nodeid = fi->nodeid; 2068c2ecf20Sopenharmony_ci args.in_numargs = 1; 2078c2ecf20Sopenharmony_ci args.in_args[0].size = sizeof(inarg); 2088c2ecf20Sopenharmony_ci args.in_args[0].value = &inarg; 2098c2ecf20Sopenharmony_ci err = fuse_simple_request(fm, &args); 2108c2ecf20Sopenharmony_ci if (err < 0) 2118c2ecf20Sopenharmony_ci return err; 2128c2ecf20Sopenharmony_ci dmap->writable = writable; 2138c2ecf20Sopenharmony_ci if (!upgrade) { 2148c2ecf20Sopenharmony_ci /* 2158c2ecf20Sopenharmony_ci * We don't take a refernce on inode. inode is valid right now 2168c2ecf20Sopenharmony_ci * and when inode is going away, cleanup logic should first 2178c2ecf20Sopenharmony_ci * cleanup dmap entries. 2188c2ecf20Sopenharmony_ci */ 2198c2ecf20Sopenharmony_ci dmap->inode = inode; 2208c2ecf20Sopenharmony_ci dmap->itn.start = dmap->itn.last = start_idx; 2218c2ecf20Sopenharmony_ci /* Protected by fi->dax->sem */ 2228c2ecf20Sopenharmony_ci interval_tree_insert(&dmap->itn, &fi->dax->tree); 2238c2ecf20Sopenharmony_ci fi->dax->nr++; 2248c2ecf20Sopenharmony_ci spin_lock(&fcd->lock); 2258c2ecf20Sopenharmony_ci list_add_tail(&dmap->busy_list, &fcd->busy_ranges); 2268c2ecf20Sopenharmony_ci fcd->nr_busy_ranges++; 2278c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 2288c2ecf20Sopenharmony_ci } 2298c2ecf20Sopenharmony_ci return 0; 2308c2ecf20Sopenharmony_ci} 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_cistatic int fuse_send_removemapping(struct inode *inode, 2338c2ecf20Sopenharmony_ci struct fuse_removemapping_in *inargp, 2348c2ecf20Sopenharmony_ci struct fuse_removemapping_one *remove_one) 2358c2ecf20Sopenharmony_ci{ 2368c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 2378c2ecf20Sopenharmony_ci struct fuse_mount *fm = get_fuse_mount(inode); 2388c2ecf20Sopenharmony_ci FUSE_ARGS(args); 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci args.opcode = FUSE_REMOVEMAPPING; 2418c2ecf20Sopenharmony_ci args.nodeid = fi->nodeid; 2428c2ecf20Sopenharmony_ci args.in_numargs = 2; 2438c2ecf20Sopenharmony_ci args.in_args[0].size = sizeof(*inargp); 2448c2ecf20Sopenharmony_ci args.in_args[0].value = inargp; 2458c2ecf20Sopenharmony_ci args.in_args[1].size = inargp->count * sizeof(*remove_one); 2468c2ecf20Sopenharmony_ci args.in_args[1].value = remove_one; 2478c2ecf20Sopenharmony_ci return fuse_simple_request(fm, &args); 2488c2ecf20Sopenharmony_ci} 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_cistatic int dmap_removemapping_list(struct inode *inode, unsigned int num, 2518c2ecf20Sopenharmony_ci struct list_head *to_remove) 2528c2ecf20Sopenharmony_ci{ 2538c2ecf20Sopenharmony_ci struct fuse_removemapping_one *remove_one, *ptr; 2548c2ecf20Sopenharmony_ci struct fuse_removemapping_in inarg; 2558c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap; 2568c2ecf20Sopenharmony_ci int ret, i = 0, nr_alloc; 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); 2598c2ecf20Sopenharmony_ci remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS); 2608c2ecf20Sopenharmony_ci if (!remove_one) 2618c2ecf20Sopenharmony_ci return -ENOMEM; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci ptr = remove_one; 2648c2ecf20Sopenharmony_ci list_for_each_entry(dmap, to_remove, list) { 2658c2ecf20Sopenharmony_ci ptr->moffset = dmap->window_offset; 2668c2ecf20Sopenharmony_ci ptr->len = dmap->length; 2678c2ecf20Sopenharmony_ci ptr++; 2688c2ecf20Sopenharmony_ci i++; 2698c2ecf20Sopenharmony_ci num--; 2708c2ecf20Sopenharmony_ci if (i >= nr_alloc || num == 0) { 2718c2ecf20Sopenharmony_ci memset(&inarg, 0, sizeof(inarg)); 2728c2ecf20Sopenharmony_ci inarg.count = i; 2738c2ecf20Sopenharmony_ci ret = fuse_send_removemapping(inode, &inarg, 2748c2ecf20Sopenharmony_ci remove_one); 2758c2ecf20Sopenharmony_ci if (ret) 2768c2ecf20Sopenharmony_ci goto out; 2778c2ecf20Sopenharmony_ci ptr = remove_one; 2788c2ecf20Sopenharmony_ci i = 0; 2798c2ecf20Sopenharmony_ci } 2808c2ecf20Sopenharmony_ci } 2818c2ecf20Sopenharmony_ciout: 2828c2ecf20Sopenharmony_ci kfree(remove_one); 2838c2ecf20Sopenharmony_ci return ret; 2848c2ecf20Sopenharmony_ci} 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci/* 2878c2ecf20Sopenharmony_ci * Cleanup dmap entry and add back to free list. This should be called with 2888c2ecf20Sopenharmony_ci * fcd->lock held. 2898c2ecf20Sopenharmony_ci */ 2908c2ecf20Sopenharmony_cistatic void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd, 2918c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap) 2928c2ecf20Sopenharmony_ci{ 2938c2ecf20Sopenharmony_ci pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n", 2948c2ecf20Sopenharmony_ci dmap->itn.start, dmap->itn.last, dmap->window_offset, 2958c2ecf20Sopenharmony_ci dmap->length); 2968c2ecf20Sopenharmony_ci __dmap_remove_busy_list(fcd, dmap); 2978c2ecf20Sopenharmony_ci dmap->inode = NULL; 2988c2ecf20Sopenharmony_ci dmap->itn.start = dmap->itn.last = 0; 2998c2ecf20Sopenharmony_ci __dmap_add_to_free_pool(fcd, dmap); 3008c2ecf20Sopenharmony_ci} 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci/* 3038c2ecf20Sopenharmony_ci * Free inode dmap entries whose range falls inside [start, end]. 3048c2ecf20Sopenharmony_ci * Does not take any locks. At this point of time it should only be 3058c2ecf20Sopenharmony_ci * called from evict_inode() path where we know all dmap entries can be 3068c2ecf20Sopenharmony_ci * reclaimed. 3078c2ecf20Sopenharmony_ci */ 3088c2ecf20Sopenharmony_cistatic void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd, 3098c2ecf20Sopenharmony_ci struct inode *inode, 3108c2ecf20Sopenharmony_ci loff_t start, loff_t end) 3118c2ecf20Sopenharmony_ci{ 3128c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 3138c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap, *n; 3148c2ecf20Sopenharmony_ci int err, num = 0; 3158c2ecf20Sopenharmony_ci LIST_HEAD(to_remove); 3168c2ecf20Sopenharmony_ci unsigned long start_idx = start >> FUSE_DAX_SHIFT; 3178c2ecf20Sopenharmony_ci unsigned long end_idx = end >> FUSE_DAX_SHIFT; 3188c2ecf20Sopenharmony_ci struct interval_tree_node *node; 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci while (1) { 3218c2ecf20Sopenharmony_ci node = interval_tree_iter_first(&fi->dax->tree, start_idx, 3228c2ecf20Sopenharmony_ci end_idx); 3238c2ecf20Sopenharmony_ci if (!node) 3248c2ecf20Sopenharmony_ci break; 3258c2ecf20Sopenharmony_ci dmap = node_to_dmap(node); 3268c2ecf20Sopenharmony_ci /* inode is going away. There should not be any users of dmap */ 3278c2ecf20Sopenharmony_ci WARN_ON(refcount_read(&dmap->refcnt) > 1); 3288c2ecf20Sopenharmony_ci interval_tree_remove(&dmap->itn, &fi->dax->tree); 3298c2ecf20Sopenharmony_ci num++; 3308c2ecf20Sopenharmony_ci list_add(&dmap->list, &to_remove); 3318c2ecf20Sopenharmony_ci } 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci /* Nothing to remove */ 3348c2ecf20Sopenharmony_ci if (list_empty(&to_remove)) 3358c2ecf20Sopenharmony_ci return; 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci WARN_ON(fi->dax->nr < num); 3388c2ecf20Sopenharmony_ci fi->dax->nr -= num; 3398c2ecf20Sopenharmony_ci err = dmap_removemapping_list(inode, num, &to_remove); 3408c2ecf20Sopenharmony_ci if (err && err != -ENOTCONN) { 3418c2ecf20Sopenharmony_ci pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n", 3428c2ecf20Sopenharmony_ci start, end); 3438c2ecf20Sopenharmony_ci } 3448c2ecf20Sopenharmony_ci spin_lock(&fcd->lock); 3458c2ecf20Sopenharmony_ci list_for_each_entry_safe(dmap, n, &to_remove, list) { 3468c2ecf20Sopenharmony_ci list_del_init(&dmap->list); 3478c2ecf20Sopenharmony_ci dmap_reinit_add_to_free_pool(fcd, dmap); 3488c2ecf20Sopenharmony_ci } 3498c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 3508c2ecf20Sopenharmony_ci} 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_cistatic int dmap_removemapping_one(struct inode *inode, 3538c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap) 3548c2ecf20Sopenharmony_ci{ 3558c2ecf20Sopenharmony_ci struct fuse_removemapping_one forget_one; 3568c2ecf20Sopenharmony_ci struct fuse_removemapping_in inarg; 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci memset(&inarg, 0, sizeof(inarg)); 3598c2ecf20Sopenharmony_ci inarg.count = 1; 3608c2ecf20Sopenharmony_ci memset(&forget_one, 0, sizeof(forget_one)); 3618c2ecf20Sopenharmony_ci forget_one.moffset = dmap->window_offset; 3628c2ecf20Sopenharmony_ci forget_one.len = dmap->length; 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci return fuse_send_removemapping(inode, &inarg, &forget_one); 3658c2ecf20Sopenharmony_ci} 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci/* 3688c2ecf20Sopenharmony_ci * It is called from evict_inode() and by that time inode is going away. So 3698c2ecf20Sopenharmony_ci * this function does not take any locks like fi->dax->sem for traversing 3708c2ecf20Sopenharmony_ci * that fuse inode interval tree. If that lock is taken then lock validator 3718c2ecf20Sopenharmony_ci * complains of deadlock situation w.r.t fs_reclaim lock. 3728c2ecf20Sopenharmony_ci */ 3738c2ecf20Sopenharmony_civoid fuse_dax_inode_cleanup(struct inode *inode) 3748c2ecf20Sopenharmony_ci{ 3758c2ecf20Sopenharmony_ci struct fuse_conn *fc = get_fuse_conn(inode); 3768c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_ci /* 3798c2ecf20Sopenharmony_ci * fuse_evict_inode() has already called truncate_inode_pages_final() 3808c2ecf20Sopenharmony_ci * before we arrive here. So we should not have to worry about any 3818c2ecf20Sopenharmony_ci * pages/exception entries still associated with inode. 3828c2ecf20Sopenharmony_ci */ 3838c2ecf20Sopenharmony_ci inode_reclaim_dmap_range(fc->dax, inode, 0, -1); 3848c2ecf20Sopenharmony_ci WARN_ON(fi->dax->nr); 3858c2ecf20Sopenharmony_ci} 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_cistatic void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length) 3888c2ecf20Sopenharmony_ci{ 3898c2ecf20Sopenharmony_ci iomap->addr = IOMAP_NULL_ADDR; 3908c2ecf20Sopenharmony_ci iomap->length = length; 3918c2ecf20Sopenharmony_ci iomap->type = IOMAP_HOLE; 3928c2ecf20Sopenharmony_ci} 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_cistatic void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length, 3958c2ecf20Sopenharmony_ci struct iomap *iomap, struct fuse_dax_mapping *dmap, 3968c2ecf20Sopenharmony_ci unsigned int flags) 3978c2ecf20Sopenharmony_ci{ 3988c2ecf20Sopenharmony_ci loff_t offset, len; 3998c2ecf20Sopenharmony_ci loff_t i_size = i_size_read(inode); 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); 4028c2ecf20Sopenharmony_ci len = min(length, dmap->length - offset); 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci /* If length is beyond end of file, truncate further */ 4058c2ecf20Sopenharmony_ci if (pos + len > i_size) 4068c2ecf20Sopenharmony_ci len = i_size - pos; 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci if (len > 0) { 4098c2ecf20Sopenharmony_ci iomap->addr = dmap->window_offset + offset; 4108c2ecf20Sopenharmony_ci iomap->length = len; 4118c2ecf20Sopenharmony_ci if (flags & IOMAP_FAULT) 4128c2ecf20Sopenharmony_ci iomap->length = ALIGN(len, PAGE_SIZE); 4138c2ecf20Sopenharmony_ci iomap->type = IOMAP_MAPPED; 4148c2ecf20Sopenharmony_ci /* 4158c2ecf20Sopenharmony_ci * increace refcnt so that reclaim code knows this dmap is in 4168c2ecf20Sopenharmony_ci * use. This assumes fi->dax->sem mutex is held either 4178c2ecf20Sopenharmony_ci * shared/exclusive. 4188c2ecf20Sopenharmony_ci */ 4198c2ecf20Sopenharmony_ci refcount_inc(&dmap->refcnt); 4208c2ecf20Sopenharmony_ci 4218c2ecf20Sopenharmony_ci /* iomap->private should be NULL */ 4228c2ecf20Sopenharmony_ci WARN_ON_ONCE(iomap->private); 4238c2ecf20Sopenharmony_ci iomap->private = dmap; 4248c2ecf20Sopenharmony_ci } else { 4258c2ecf20Sopenharmony_ci /* Mapping beyond end of file is hole */ 4268c2ecf20Sopenharmony_ci fuse_fill_iomap_hole(iomap, length); 4278c2ecf20Sopenharmony_ci } 4288c2ecf20Sopenharmony_ci} 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_cistatic int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos, 4318c2ecf20Sopenharmony_ci loff_t length, unsigned int flags, 4328c2ecf20Sopenharmony_ci struct iomap *iomap) 4338c2ecf20Sopenharmony_ci{ 4348c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 4358c2ecf20Sopenharmony_ci struct fuse_conn *fc = get_fuse_conn(inode); 4368c2ecf20Sopenharmony_ci struct fuse_conn_dax *fcd = fc->dax; 4378c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; 4388c2ecf20Sopenharmony_ci int ret; 4398c2ecf20Sopenharmony_ci bool writable = flags & IOMAP_WRITE; 4408c2ecf20Sopenharmony_ci unsigned long start_idx = pos >> FUSE_DAX_SHIFT; 4418c2ecf20Sopenharmony_ci struct interval_tree_node *node; 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci /* 4448c2ecf20Sopenharmony_ci * Can't do inline reclaim in fault path. We call 4458c2ecf20Sopenharmony_ci * dax_layout_busy_page() before we free a range. And 4468c2ecf20Sopenharmony_ci * fuse_wait_dax_page() drops fi->i_mmap_sem lock and requires it. 4478c2ecf20Sopenharmony_ci * In fault path we enter with fi->i_mmap_sem held and can't drop 4488c2ecf20Sopenharmony_ci * it. Also in fault path we hold fi->i_mmap_sem shared and not 4498c2ecf20Sopenharmony_ci * exclusive, so that creates further issues with fuse_wait_dax_page(). 4508c2ecf20Sopenharmony_ci * Hence return -EAGAIN and fuse_dax_fault() will wait for a memory 4518c2ecf20Sopenharmony_ci * range to become free and retry. 4528c2ecf20Sopenharmony_ci */ 4538c2ecf20Sopenharmony_ci if (flags & IOMAP_FAULT) { 4548c2ecf20Sopenharmony_ci alloc_dmap = alloc_dax_mapping(fcd); 4558c2ecf20Sopenharmony_ci if (!alloc_dmap) 4568c2ecf20Sopenharmony_ci return -EAGAIN; 4578c2ecf20Sopenharmony_ci } else { 4588c2ecf20Sopenharmony_ci alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode); 4598c2ecf20Sopenharmony_ci if (IS_ERR(alloc_dmap)) 4608c2ecf20Sopenharmony_ci return PTR_ERR(alloc_dmap); 4618c2ecf20Sopenharmony_ci } 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ci /* If we are here, we should have memory allocated */ 4648c2ecf20Sopenharmony_ci if (WARN_ON(!alloc_dmap)) 4658c2ecf20Sopenharmony_ci return -EIO; 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci /* 4688c2ecf20Sopenharmony_ci * Take write lock so that only one caller can try to setup mapping 4698c2ecf20Sopenharmony_ci * and other waits. 4708c2ecf20Sopenharmony_ci */ 4718c2ecf20Sopenharmony_ci down_write(&fi->dax->sem); 4728c2ecf20Sopenharmony_ci /* 4738c2ecf20Sopenharmony_ci * We dropped lock. Check again if somebody else setup 4748c2ecf20Sopenharmony_ci * mapping already. 4758c2ecf20Sopenharmony_ci */ 4768c2ecf20Sopenharmony_ci node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 4778c2ecf20Sopenharmony_ci if (node) { 4788c2ecf20Sopenharmony_ci dmap = node_to_dmap(node); 4798c2ecf20Sopenharmony_ci fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 4808c2ecf20Sopenharmony_ci dmap_add_to_free_pool(fcd, alloc_dmap); 4818c2ecf20Sopenharmony_ci up_write(&fi->dax->sem); 4828c2ecf20Sopenharmony_ci return 0; 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci /* Setup one mapping */ 4868c2ecf20Sopenharmony_ci ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap, 4878c2ecf20Sopenharmony_ci writable, false); 4888c2ecf20Sopenharmony_ci if (ret < 0) { 4898c2ecf20Sopenharmony_ci dmap_add_to_free_pool(fcd, alloc_dmap); 4908c2ecf20Sopenharmony_ci up_write(&fi->dax->sem); 4918c2ecf20Sopenharmony_ci return ret; 4928c2ecf20Sopenharmony_ci } 4938c2ecf20Sopenharmony_ci fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags); 4948c2ecf20Sopenharmony_ci up_write(&fi->dax->sem); 4958c2ecf20Sopenharmony_ci return 0; 4968c2ecf20Sopenharmony_ci} 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_cistatic int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos, 4998c2ecf20Sopenharmony_ci loff_t length, unsigned int flags, 5008c2ecf20Sopenharmony_ci struct iomap *iomap) 5018c2ecf20Sopenharmony_ci{ 5028c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 5038c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap; 5048c2ecf20Sopenharmony_ci int ret; 5058c2ecf20Sopenharmony_ci unsigned long idx = pos >> FUSE_DAX_SHIFT; 5068c2ecf20Sopenharmony_ci struct interval_tree_node *node; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_ci /* 5098c2ecf20Sopenharmony_ci * Take exclusive lock so that only one caller can try to setup 5108c2ecf20Sopenharmony_ci * mapping and others wait. 5118c2ecf20Sopenharmony_ci */ 5128c2ecf20Sopenharmony_ci down_write(&fi->dax->sem); 5138c2ecf20Sopenharmony_ci node = interval_tree_iter_first(&fi->dax->tree, idx, idx); 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_ci /* We are holding either inode lock or i_mmap_sem, and that should 5168c2ecf20Sopenharmony_ci * ensure that dmap can't be truncated. We are holding a reference 5178c2ecf20Sopenharmony_ci * on dmap and that should make sure it can't be reclaimed. So dmap 5188c2ecf20Sopenharmony_ci * should still be there in tree despite the fact we dropped and 5198c2ecf20Sopenharmony_ci * re-acquired the fi->dax->sem lock. 5208c2ecf20Sopenharmony_ci */ 5218c2ecf20Sopenharmony_ci ret = -EIO; 5228c2ecf20Sopenharmony_ci if (WARN_ON(!node)) 5238c2ecf20Sopenharmony_ci goto out_err; 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci dmap = node_to_dmap(node); 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci /* We took an extra reference on dmap to make sure its not reclaimd. 5288c2ecf20Sopenharmony_ci * Now we hold fi->dax->sem lock and that reference is not needed 5298c2ecf20Sopenharmony_ci * anymore. Drop it. 5308c2ecf20Sopenharmony_ci */ 5318c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&dmap->refcnt)) { 5328c2ecf20Sopenharmony_ci /* refcount should not hit 0. This object only goes 5338c2ecf20Sopenharmony_ci * away when fuse connection goes away 5348c2ecf20Sopenharmony_ci */ 5358c2ecf20Sopenharmony_ci WARN_ON_ONCE(1); 5368c2ecf20Sopenharmony_ci } 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci /* Maybe another thread already upgraded mapping while we were not 5398c2ecf20Sopenharmony_ci * holding lock. 5408c2ecf20Sopenharmony_ci */ 5418c2ecf20Sopenharmony_ci if (dmap->writable) { 5428c2ecf20Sopenharmony_ci ret = 0; 5438c2ecf20Sopenharmony_ci goto out_fill_iomap; 5448c2ecf20Sopenharmony_ci } 5458c2ecf20Sopenharmony_ci 5468c2ecf20Sopenharmony_ci ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true, 5478c2ecf20Sopenharmony_ci true); 5488c2ecf20Sopenharmony_ci if (ret < 0) 5498c2ecf20Sopenharmony_ci goto out_err; 5508c2ecf20Sopenharmony_ciout_fill_iomap: 5518c2ecf20Sopenharmony_ci fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 5528c2ecf20Sopenharmony_ciout_err: 5538c2ecf20Sopenharmony_ci up_write(&fi->dax->sem); 5548c2ecf20Sopenharmony_ci return ret; 5558c2ecf20Sopenharmony_ci} 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci/* This is just for DAX and the mapping is ephemeral, do not use it for other 5588c2ecf20Sopenharmony_ci * purposes since there is no block device with a permanent mapping. 5598c2ecf20Sopenharmony_ci */ 5608c2ecf20Sopenharmony_cistatic int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, 5618c2ecf20Sopenharmony_ci unsigned int flags, struct iomap *iomap, 5628c2ecf20Sopenharmony_ci struct iomap *srcmap) 5638c2ecf20Sopenharmony_ci{ 5648c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 5658c2ecf20Sopenharmony_ci struct fuse_conn *fc = get_fuse_conn(inode); 5668c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap; 5678c2ecf20Sopenharmony_ci bool writable = flags & IOMAP_WRITE; 5688c2ecf20Sopenharmony_ci unsigned long start_idx = pos >> FUSE_DAX_SHIFT; 5698c2ecf20Sopenharmony_ci struct interval_tree_node *node; 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_ci /* We don't support FIEMAP */ 5728c2ecf20Sopenharmony_ci if (WARN_ON(flags & IOMAP_REPORT)) 5738c2ecf20Sopenharmony_ci return -EIO; 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci iomap->offset = pos; 5768c2ecf20Sopenharmony_ci iomap->flags = 0; 5778c2ecf20Sopenharmony_ci iomap->bdev = NULL; 5788c2ecf20Sopenharmony_ci iomap->dax_dev = fc->dax->dev; 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci /* 5818c2ecf20Sopenharmony_ci * Both read/write and mmap path can race here. So we need something 5828c2ecf20Sopenharmony_ci * to make sure if we are setting up mapping, then other path waits 5838c2ecf20Sopenharmony_ci * 5848c2ecf20Sopenharmony_ci * For now, use a semaphore for this. It probably needs to be 5858c2ecf20Sopenharmony_ci * optimized later. 5868c2ecf20Sopenharmony_ci */ 5878c2ecf20Sopenharmony_ci down_read(&fi->dax->sem); 5888c2ecf20Sopenharmony_ci node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 5898c2ecf20Sopenharmony_ci if (node) { 5908c2ecf20Sopenharmony_ci dmap = node_to_dmap(node); 5918c2ecf20Sopenharmony_ci if (writable && !dmap->writable) { 5928c2ecf20Sopenharmony_ci /* Upgrade read-only mapping to read-write. This will 5938c2ecf20Sopenharmony_ci * require exclusive fi->dax->sem lock as we don't want 5948c2ecf20Sopenharmony_ci * two threads to be trying to this simultaneously 5958c2ecf20Sopenharmony_ci * for same dmap. So drop shared lock and acquire 5968c2ecf20Sopenharmony_ci * exclusive lock. 5978c2ecf20Sopenharmony_ci * 5988c2ecf20Sopenharmony_ci * Before dropping fi->dax->sem lock, take reference 5998c2ecf20Sopenharmony_ci * on dmap so that its not freed by range reclaim. 6008c2ecf20Sopenharmony_ci */ 6018c2ecf20Sopenharmony_ci refcount_inc(&dmap->refcnt); 6028c2ecf20Sopenharmony_ci up_read(&fi->dax->sem); 6038c2ecf20Sopenharmony_ci pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n", 6048c2ecf20Sopenharmony_ci __func__, pos, length); 6058c2ecf20Sopenharmony_ci return fuse_upgrade_dax_mapping(inode, pos, length, 6068c2ecf20Sopenharmony_ci flags, iomap); 6078c2ecf20Sopenharmony_ci } else { 6088c2ecf20Sopenharmony_ci fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); 6098c2ecf20Sopenharmony_ci up_read(&fi->dax->sem); 6108c2ecf20Sopenharmony_ci return 0; 6118c2ecf20Sopenharmony_ci } 6128c2ecf20Sopenharmony_ci } else { 6138c2ecf20Sopenharmony_ci up_read(&fi->dax->sem); 6148c2ecf20Sopenharmony_ci pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", 6158c2ecf20Sopenharmony_ci __func__, pos, length); 6168c2ecf20Sopenharmony_ci if (pos >= i_size_read(inode)) 6178c2ecf20Sopenharmony_ci goto iomap_hole; 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci return fuse_setup_new_dax_mapping(inode, pos, length, flags, 6208c2ecf20Sopenharmony_ci iomap); 6218c2ecf20Sopenharmony_ci } 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci /* 6248c2ecf20Sopenharmony_ci * If read beyond end of file happnes, fs code seems to return 6258c2ecf20Sopenharmony_ci * it as hole 6268c2ecf20Sopenharmony_ci */ 6278c2ecf20Sopenharmony_ciiomap_hole: 6288c2ecf20Sopenharmony_ci fuse_fill_iomap_hole(iomap, length); 6298c2ecf20Sopenharmony_ci pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", 6308c2ecf20Sopenharmony_ci __func__, pos, length, iomap->length); 6318c2ecf20Sopenharmony_ci return 0; 6328c2ecf20Sopenharmony_ci} 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_cistatic int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length, 6358c2ecf20Sopenharmony_ci ssize_t written, unsigned int flags, 6368c2ecf20Sopenharmony_ci struct iomap *iomap) 6378c2ecf20Sopenharmony_ci{ 6388c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap = iomap->private; 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci if (dmap) { 6418c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&dmap->refcnt)) { 6428c2ecf20Sopenharmony_ci /* refcount should not hit 0. This object only goes 6438c2ecf20Sopenharmony_ci * away when fuse connection goes away 6448c2ecf20Sopenharmony_ci */ 6458c2ecf20Sopenharmony_ci WARN_ON_ONCE(1); 6468c2ecf20Sopenharmony_ci } 6478c2ecf20Sopenharmony_ci } 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci /* DAX writes beyond end-of-file aren't handled using iomap, so the 6508c2ecf20Sopenharmony_ci * file size is unchanged and there is nothing to do here. 6518c2ecf20Sopenharmony_ci */ 6528c2ecf20Sopenharmony_ci return 0; 6538c2ecf20Sopenharmony_ci} 6548c2ecf20Sopenharmony_ci 6558c2ecf20Sopenharmony_cistatic const struct iomap_ops fuse_iomap_ops = { 6568c2ecf20Sopenharmony_ci .iomap_begin = fuse_iomap_begin, 6578c2ecf20Sopenharmony_ci .iomap_end = fuse_iomap_end, 6588c2ecf20Sopenharmony_ci}; 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_cistatic void fuse_wait_dax_page(struct inode *inode) 6618c2ecf20Sopenharmony_ci{ 6628c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci up_write(&fi->i_mmap_sem); 6658c2ecf20Sopenharmony_ci schedule(); 6668c2ecf20Sopenharmony_ci down_write(&fi->i_mmap_sem); 6678c2ecf20Sopenharmony_ci} 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci/* Should be called with fi->i_mmap_sem lock held exclusively */ 6708c2ecf20Sopenharmony_cistatic int __fuse_dax_break_layouts(struct inode *inode, bool *retry, 6718c2ecf20Sopenharmony_ci loff_t start, loff_t end) 6728c2ecf20Sopenharmony_ci{ 6738c2ecf20Sopenharmony_ci struct page *page; 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci page = dax_layout_busy_page_range(inode->i_mapping, start, end); 6768c2ecf20Sopenharmony_ci if (!page) 6778c2ecf20Sopenharmony_ci return 0; 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci *retry = true; 6808c2ecf20Sopenharmony_ci return ___wait_var_event(&page->_refcount, 6818c2ecf20Sopenharmony_ci atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 6828c2ecf20Sopenharmony_ci 0, 0, fuse_wait_dax_page(inode)); 6838c2ecf20Sopenharmony_ci} 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci/* dmap_end == 0 leads to unmapping of whole file */ 6868c2ecf20Sopenharmony_ciint fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, 6878c2ecf20Sopenharmony_ci u64 dmap_end) 6888c2ecf20Sopenharmony_ci{ 6898c2ecf20Sopenharmony_ci bool retry; 6908c2ecf20Sopenharmony_ci int ret; 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci do { 6938c2ecf20Sopenharmony_ci retry = false; 6948c2ecf20Sopenharmony_ci ret = __fuse_dax_break_layouts(inode, &retry, dmap_start, 6958c2ecf20Sopenharmony_ci dmap_end); 6968c2ecf20Sopenharmony_ci } while (ret == 0 && retry); 6978c2ecf20Sopenharmony_ci 6988c2ecf20Sopenharmony_ci return ret; 6998c2ecf20Sopenharmony_ci} 7008c2ecf20Sopenharmony_ci 7018c2ecf20Sopenharmony_cissize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) 7028c2ecf20Sopenharmony_ci{ 7038c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 7048c2ecf20Sopenharmony_ci ssize_t ret; 7058c2ecf20Sopenharmony_ci 7068c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 7078c2ecf20Sopenharmony_ci if (!inode_trylock_shared(inode)) 7088c2ecf20Sopenharmony_ci return -EAGAIN; 7098c2ecf20Sopenharmony_ci } else { 7108c2ecf20Sopenharmony_ci inode_lock_shared(inode); 7118c2ecf20Sopenharmony_ci } 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops); 7148c2ecf20Sopenharmony_ci inode_unlock_shared(inode); 7158c2ecf20Sopenharmony_ci 7168c2ecf20Sopenharmony_ci /* TODO file_accessed(iocb->f_filp) */ 7178c2ecf20Sopenharmony_ci return ret; 7188c2ecf20Sopenharmony_ci} 7198c2ecf20Sopenharmony_ci 7208c2ecf20Sopenharmony_cistatic bool file_extending_write(struct kiocb *iocb, struct iov_iter *from) 7218c2ecf20Sopenharmony_ci{ 7228c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 7238c2ecf20Sopenharmony_ci 7248c2ecf20Sopenharmony_ci return (iov_iter_rw(from) == WRITE && 7258c2ecf20Sopenharmony_ci ((iocb->ki_pos) >= i_size_read(inode) || 7268c2ecf20Sopenharmony_ci (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); 7278c2ecf20Sopenharmony_ci} 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_cistatic ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) 7308c2ecf20Sopenharmony_ci{ 7318c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 7328c2ecf20Sopenharmony_ci struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 7338c2ecf20Sopenharmony_ci ssize_t ret; 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); 7368c2ecf20Sopenharmony_ci if (ret < 0) 7378c2ecf20Sopenharmony_ci return ret; 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci fuse_invalidate_attr(inode); 7408c2ecf20Sopenharmony_ci fuse_write_update_size(inode, iocb->ki_pos); 7418c2ecf20Sopenharmony_ci return ret; 7428c2ecf20Sopenharmony_ci} 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_cissize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) 7458c2ecf20Sopenharmony_ci{ 7468c2ecf20Sopenharmony_ci struct inode *inode = file_inode(iocb->ki_filp); 7478c2ecf20Sopenharmony_ci ssize_t ret; 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci if (iocb->ki_flags & IOCB_NOWAIT) { 7508c2ecf20Sopenharmony_ci if (!inode_trylock(inode)) 7518c2ecf20Sopenharmony_ci return -EAGAIN; 7528c2ecf20Sopenharmony_ci } else { 7538c2ecf20Sopenharmony_ci inode_lock(inode); 7548c2ecf20Sopenharmony_ci } 7558c2ecf20Sopenharmony_ci 7568c2ecf20Sopenharmony_ci ret = generic_write_checks(iocb, from); 7578c2ecf20Sopenharmony_ci if (ret <= 0) 7588c2ecf20Sopenharmony_ci goto out; 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci ret = file_remove_privs(iocb->ki_filp); 7618c2ecf20Sopenharmony_ci if (ret) 7628c2ecf20Sopenharmony_ci goto out; 7638c2ecf20Sopenharmony_ci /* TODO file_update_time() but we don't want metadata I/O */ 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci /* Do not use dax for file extending writes as write and on 7668c2ecf20Sopenharmony_ci * disk i_size increase are not atomic otherwise. 7678c2ecf20Sopenharmony_ci */ 7688c2ecf20Sopenharmony_ci if (file_extending_write(iocb, from)) 7698c2ecf20Sopenharmony_ci ret = fuse_dax_direct_write(iocb, from); 7708c2ecf20Sopenharmony_ci else 7718c2ecf20Sopenharmony_ci ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops); 7728c2ecf20Sopenharmony_ci 7738c2ecf20Sopenharmony_ciout: 7748c2ecf20Sopenharmony_ci inode_unlock(inode); 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci if (ret > 0) 7778c2ecf20Sopenharmony_ci ret = generic_write_sync(iocb, ret); 7788c2ecf20Sopenharmony_ci return ret; 7798c2ecf20Sopenharmony_ci} 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_cistatic int fuse_dax_writepages(struct address_space *mapping, 7828c2ecf20Sopenharmony_ci struct writeback_control *wbc) 7838c2ecf20Sopenharmony_ci{ 7848c2ecf20Sopenharmony_ci 7858c2ecf20Sopenharmony_ci struct inode *inode = mapping->host; 7868c2ecf20Sopenharmony_ci struct fuse_conn *fc = get_fuse_conn(inode); 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci return dax_writeback_mapping_range(mapping, fc->dax->dev, wbc); 7898c2ecf20Sopenharmony_ci} 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_cistatic vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, 7928c2ecf20Sopenharmony_ci enum page_entry_size pe_size, bool write) 7938c2ecf20Sopenharmony_ci{ 7948c2ecf20Sopenharmony_ci vm_fault_t ret; 7958c2ecf20Sopenharmony_ci struct inode *inode = file_inode(vmf->vma->vm_file); 7968c2ecf20Sopenharmony_ci struct super_block *sb = inode->i_sb; 7978c2ecf20Sopenharmony_ci pfn_t pfn; 7988c2ecf20Sopenharmony_ci int error = 0; 7998c2ecf20Sopenharmony_ci struct fuse_conn *fc = get_fuse_conn(inode); 8008c2ecf20Sopenharmony_ci struct fuse_conn_dax *fcd = fc->dax; 8018c2ecf20Sopenharmony_ci bool retry = false; 8028c2ecf20Sopenharmony_ci 8038c2ecf20Sopenharmony_ci if (write) 8048c2ecf20Sopenharmony_ci sb_start_pagefault(sb); 8058c2ecf20Sopenharmony_ciretry: 8068c2ecf20Sopenharmony_ci if (retry && !(fcd->nr_free_ranges > 0)) 8078c2ecf20Sopenharmony_ci wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0)); 8088c2ecf20Sopenharmony_ci 8098c2ecf20Sopenharmony_ci /* 8108c2ecf20Sopenharmony_ci * We need to serialize against not only truncate but also against 8118c2ecf20Sopenharmony_ci * fuse dax memory range reclaim. While a range is being reclaimed, 8128c2ecf20Sopenharmony_ci * we do not want any read/write/mmap to make progress and try 8138c2ecf20Sopenharmony_ci * to populate page cache or access memory we are trying to free. 8148c2ecf20Sopenharmony_ci */ 8158c2ecf20Sopenharmony_ci down_read(&get_fuse_inode(inode)->i_mmap_sem); 8168c2ecf20Sopenharmony_ci ret = dax_iomap_fault(vmf, pe_size, &pfn, &error, &fuse_iomap_ops); 8178c2ecf20Sopenharmony_ci if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { 8188c2ecf20Sopenharmony_ci error = 0; 8198c2ecf20Sopenharmony_ci retry = true; 8208c2ecf20Sopenharmony_ci up_read(&get_fuse_inode(inode)->i_mmap_sem); 8218c2ecf20Sopenharmony_ci goto retry; 8228c2ecf20Sopenharmony_ci } 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci if (ret & VM_FAULT_NEEDDSYNC) 8258c2ecf20Sopenharmony_ci ret = dax_finish_sync_fault(vmf, pe_size, pfn); 8268c2ecf20Sopenharmony_ci up_read(&get_fuse_inode(inode)->i_mmap_sem); 8278c2ecf20Sopenharmony_ci 8288c2ecf20Sopenharmony_ci if (write) 8298c2ecf20Sopenharmony_ci sb_end_pagefault(sb); 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_ci return ret; 8328c2ecf20Sopenharmony_ci} 8338c2ecf20Sopenharmony_ci 8348c2ecf20Sopenharmony_cistatic vm_fault_t fuse_dax_fault(struct vm_fault *vmf) 8358c2ecf20Sopenharmony_ci{ 8368c2ecf20Sopenharmony_ci return __fuse_dax_fault(vmf, PE_SIZE_PTE, 8378c2ecf20Sopenharmony_ci vmf->flags & FAULT_FLAG_WRITE); 8388c2ecf20Sopenharmony_ci} 8398c2ecf20Sopenharmony_ci 8408c2ecf20Sopenharmony_cistatic vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, 8418c2ecf20Sopenharmony_ci enum page_entry_size pe_size) 8428c2ecf20Sopenharmony_ci{ 8438c2ecf20Sopenharmony_ci return __fuse_dax_fault(vmf, pe_size, vmf->flags & FAULT_FLAG_WRITE); 8448c2ecf20Sopenharmony_ci} 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_cistatic vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) 8478c2ecf20Sopenharmony_ci{ 8488c2ecf20Sopenharmony_ci return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 8498c2ecf20Sopenharmony_ci} 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_cistatic vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) 8528c2ecf20Sopenharmony_ci{ 8538c2ecf20Sopenharmony_ci return __fuse_dax_fault(vmf, PE_SIZE_PTE, true); 8548c2ecf20Sopenharmony_ci} 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_cistatic const struct vm_operations_struct fuse_dax_vm_ops = { 8578c2ecf20Sopenharmony_ci .fault = fuse_dax_fault, 8588c2ecf20Sopenharmony_ci .huge_fault = fuse_dax_huge_fault, 8598c2ecf20Sopenharmony_ci .page_mkwrite = fuse_dax_page_mkwrite, 8608c2ecf20Sopenharmony_ci .pfn_mkwrite = fuse_dax_pfn_mkwrite, 8618c2ecf20Sopenharmony_ci}; 8628c2ecf20Sopenharmony_ci 8638c2ecf20Sopenharmony_ciint fuse_dax_mmap(struct file *file, struct vm_area_struct *vma) 8648c2ecf20Sopenharmony_ci{ 8658c2ecf20Sopenharmony_ci file_accessed(file); 8668c2ecf20Sopenharmony_ci vma->vm_ops = &fuse_dax_vm_ops; 8678c2ecf20Sopenharmony_ci vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; 8688c2ecf20Sopenharmony_ci return 0; 8698c2ecf20Sopenharmony_ci} 8708c2ecf20Sopenharmony_ci 8718c2ecf20Sopenharmony_cistatic int dmap_writeback_invalidate(struct inode *inode, 8728c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap) 8738c2ecf20Sopenharmony_ci{ 8748c2ecf20Sopenharmony_ci int ret; 8758c2ecf20Sopenharmony_ci loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT; 8768c2ecf20Sopenharmony_ci loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1); 8778c2ecf20Sopenharmony_ci 8788c2ecf20Sopenharmony_ci ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos); 8798c2ecf20Sopenharmony_ci if (ret) { 8808c2ecf20Sopenharmony_ci pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n", 8818c2ecf20Sopenharmony_ci ret, start_pos, end_pos); 8828c2ecf20Sopenharmony_ci return ret; 8838c2ecf20Sopenharmony_ci } 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci ret = invalidate_inode_pages2_range(inode->i_mapping, 8868c2ecf20Sopenharmony_ci start_pos >> PAGE_SHIFT, 8878c2ecf20Sopenharmony_ci end_pos >> PAGE_SHIFT); 8888c2ecf20Sopenharmony_ci if (ret) 8898c2ecf20Sopenharmony_ci pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n", 8908c2ecf20Sopenharmony_ci ret); 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_ci return ret; 8938c2ecf20Sopenharmony_ci} 8948c2ecf20Sopenharmony_ci 8958c2ecf20Sopenharmony_cistatic int reclaim_one_dmap_locked(struct inode *inode, 8968c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap) 8978c2ecf20Sopenharmony_ci{ 8988c2ecf20Sopenharmony_ci int ret; 8998c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci /* 9028c2ecf20Sopenharmony_ci * igrab() was done to make sure inode won't go under us, and this 9038c2ecf20Sopenharmony_ci * further avoids the race with evict(). 9048c2ecf20Sopenharmony_ci */ 9058c2ecf20Sopenharmony_ci ret = dmap_writeback_invalidate(inode, dmap); 9068c2ecf20Sopenharmony_ci if (ret) 9078c2ecf20Sopenharmony_ci return ret; 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_ci /* Remove dax mapping from inode interval tree now */ 9108c2ecf20Sopenharmony_ci interval_tree_remove(&dmap->itn, &fi->dax->tree); 9118c2ecf20Sopenharmony_ci fi->dax->nr--; 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci /* It is possible that umount/shutdown has killed the fuse connection 9148c2ecf20Sopenharmony_ci * and worker thread is trying to reclaim memory in parallel. Don't 9158c2ecf20Sopenharmony_ci * warn in that case. 9168c2ecf20Sopenharmony_ci */ 9178c2ecf20Sopenharmony_ci ret = dmap_removemapping_one(inode, dmap); 9188c2ecf20Sopenharmony_ci if (ret && ret != -ENOTCONN) { 9198c2ecf20Sopenharmony_ci pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n", 9208c2ecf20Sopenharmony_ci dmap->window_offset, dmap->length, ret); 9218c2ecf20Sopenharmony_ci } 9228c2ecf20Sopenharmony_ci return 0; 9238c2ecf20Sopenharmony_ci} 9248c2ecf20Sopenharmony_ci 9258c2ecf20Sopenharmony_ci/* Find first mapped dmap for an inode and return file offset. Caller needs 9268c2ecf20Sopenharmony_ci * to hold fi->dax->sem lock either shared or exclusive. 9278c2ecf20Sopenharmony_ci */ 9288c2ecf20Sopenharmony_cistatic struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode) 9298c2ecf20Sopenharmony_ci{ 9308c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 9318c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap; 9328c2ecf20Sopenharmony_ci struct interval_tree_node *node; 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_ci for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node; 9358c2ecf20Sopenharmony_ci node = interval_tree_iter_next(node, 0, -1)) { 9368c2ecf20Sopenharmony_ci dmap = node_to_dmap(node); 9378c2ecf20Sopenharmony_ci /* still in use. */ 9388c2ecf20Sopenharmony_ci if (refcount_read(&dmap->refcnt) > 1) 9398c2ecf20Sopenharmony_ci continue; 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_ci return dmap; 9428c2ecf20Sopenharmony_ci } 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_ci return NULL; 9458c2ecf20Sopenharmony_ci} 9468c2ecf20Sopenharmony_ci 9478c2ecf20Sopenharmony_ci/* 9488c2ecf20Sopenharmony_ci * Find first mapping in the tree and free it and return it. Do not add 9498c2ecf20Sopenharmony_ci * it back to free pool. 9508c2ecf20Sopenharmony_ci */ 9518c2ecf20Sopenharmony_cistatic struct fuse_dax_mapping * 9528c2ecf20Sopenharmony_ciinode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode, 9538c2ecf20Sopenharmony_ci bool *retry) 9548c2ecf20Sopenharmony_ci{ 9558c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 9568c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap; 9578c2ecf20Sopenharmony_ci u64 dmap_start, dmap_end; 9588c2ecf20Sopenharmony_ci unsigned long start_idx; 9598c2ecf20Sopenharmony_ci int ret; 9608c2ecf20Sopenharmony_ci struct interval_tree_node *node; 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci down_write(&fi->i_mmap_sem); 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci /* Lookup a dmap and corresponding file offset to reclaim. */ 9658c2ecf20Sopenharmony_ci down_read(&fi->dax->sem); 9668c2ecf20Sopenharmony_ci dmap = inode_lookup_first_dmap(inode); 9678c2ecf20Sopenharmony_ci if (dmap) { 9688c2ecf20Sopenharmony_ci start_idx = dmap->itn.start; 9698c2ecf20Sopenharmony_ci dmap_start = start_idx << FUSE_DAX_SHIFT; 9708c2ecf20Sopenharmony_ci dmap_end = dmap_start + FUSE_DAX_SZ - 1; 9718c2ecf20Sopenharmony_ci } 9728c2ecf20Sopenharmony_ci up_read(&fi->dax->sem); 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci if (!dmap) 9758c2ecf20Sopenharmony_ci goto out_mmap_sem; 9768c2ecf20Sopenharmony_ci /* 9778c2ecf20Sopenharmony_ci * Make sure there are no references to inode pages using 9788c2ecf20Sopenharmony_ci * get_user_pages() 9798c2ecf20Sopenharmony_ci */ 9808c2ecf20Sopenharmony_ci ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); 9818c2ecf20Sopenharmony_ci if (ret) { 9828c2ecf20Sopenharmony_ci pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n", 9838c2ecf20Sopenharmony_ci ret); 9848c2ecf20Sopenharmony_ci dmap = ERR_PTR(ret); 9858c2ecf20Sopenharmony_ci goto out_mmap_sem; 9868c2ecf20Sopenharmony_ci } 9878c2ecf20Sopenharmony_ci 9888c2ecf20Sopenharmony_ci down_write(&fi->dax->sem); 9898c2ecf20Sopenharmony_ci node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 9908c2ecf20Sopenharmony_ci /* Range already got reclaimed by somebody else */ 9918c2ecf20Sopenharmony_ci if (!node) { 9928c2ecf20Sopenharmony_ci if (retry) 9938c2ecf20Sopenharmony_ci *retry = true; 9948c2ecf20Sopenharmony_ci goto out_write_dmap_sem; 9958c2ecf20Sopenharmony_ci } 9968c2ecf20Sopenharmony_ci 9978c2ecf20Sopenharmony_ci dmap = node_to_dmap(node); 9988c2ecf20Sopenharmony_ci /* still in use. */ 9998c2ecf20Sopenharmony_ci if (refcount_read(&dmap->refcnt) > 1) { 10008c2ecf20Sopenharmony_ci dmap = NULL; 10018c2ecf20Sopenharmony_ci if (retry) 10028c2ecf20Sopenharmony_ci *retry = true; 10038c2ecf20Sopenharmony_ci goto out_write_dmap_sem; 10048c2ecf20Sopenharmony_ci } 10058c2ecf20Sopenharmony_ci 10068c2ecf20Sopenharmony_ci ret = reclaim_one_dmap_locked(inode, dmap); 10078c2ecf20Sopenharmony_ci if (ret < 0) { 10088c2ecf20Sopenharmony_ci dmap = ERR_PTR(ret); 10098c2ecf20Sopenharmony_ci goto out_write_dmap_sem; 10108c2ecf20Sopenharmony_ci } 10118c2ecf20Sopenharmony_ci 10128c2ecf20Sopenharmony_ci /* Clean up dmap. Do not add back to free list */ 10138c2ecf20Sopenharmony_ci dmap_remove_busy_list(fcd, dmap); 10148c2ecf20Sopenharmony_ci dmap->inode = NULL; 10158c2ecf20Sopenharmony_ci dmap->itn.start = dmap->itn.last = 0; 10168c2ecf20Sopenharmony_ci 10178c2ecf20Sopenharmony_ci pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n", 10188c2ecf20Sopenharmony_ci __func__, inode, dmap->window_offset, dmap->length); 10198c2ecf20Sopenharmony_ci 10208c2ecf20Sopenharmony_ciout_write_dmap_sem: 10218c2ecf20Sopenharmony_ci up_write(&fi->dax->sem); 10228c2ecf20Sopenharmony_ciout_mmap_sem: 10238c2ecf20Sopenharmony_ci up_write(&fi->i_mmap_sem); 10248c2ecf20Sopenharmony_ci return dmap; 10258c2ecf20Sopenharmony_ci} 10268c2ecf20Sopenharmony_ci 10278c2ecf20Sopenharmony_cistatic struct fuse_dax_mapping * 10288c2ecf20Sopenharmony_cialloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode) 10298c2ecf20Sopenharmony_ci{ 10308c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap; 10318c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_ci while (1) { 10348c2ecf20Sopenharmony_ci bool retry = false; 10358c2ecf20Sopenharmony_ci 10368c2ecf20Sopenharmony_ci dmap = alloc_dax_mapping(fcd); 10378c2ecf20Sopenharmony_ci if (dmap) 10388c2ecf20Sopenharmony_ci return dmap; 10398c2ecf20Sopenharmony_ci 10408c2ecf20Sopenharmony_ci dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry); 10418c2ecf20Sopenharmony_ci /* 10428c2ecf20Sopenharmony_ci * Either we got a mapping or it is an error, return in both 10438c2ecf20Sopenharmony_ci * the cases. 10448c2ecf20Sopenharmony_ci */ 10458c2ecf20Sopenharmony_ci if (dmap) 10468c2ecf20Sopenharmony_ci return dmap; 10478c2ecf20Sopenharmony_ci 10488c2ecf20Sopenharmony_ci /* If we could not reclaim a mapping because it 10498c2ecf20Sopenharmony_ci * had a reference or some other temporary failure, 10508c2ecf20Sopenharmony_ci * Try again. We want to give up inline reclaim only 10518c2ecf20Sopenharmony_ci * if there is no range assigned to this node. Otherwise 10528c2ecf20Sopenharmony_ci * if a deadlock is possible if we sleep with fi->i_mmap_sem 10538c2ecf20Sopenharmony_ci * held and worker to free memory can't make progress due 10548c2ecf20Sopenharmony_ci * to unavailability of fi->i_mmap_sem lock. So sleep 10558c2ecf20Sopenharmony_ci * only if fi->dax->nr=0 10568c2ecf20Sopenharmony_ci */ 10578c2ecf20Sopenharmony_ci if (retry) 10588c2ecf20Sopenharmony_ci continue; 10598c2ecf20Sopenharmony_ci /* 10608c2ecf20Sopenharmony_ci * There are no mappings which can be reclaimed. Wait for one. 10618c2ecf20Sopenharmony_ci * We are not holding fi->dax->sem. So it is possible 10628c2ecf20Sopenharmony_ci * that range gets added now. But as we are not holding 10638c2ecf20Sopenharmony_ci * fi->i_mmap_sem, worker should still be able to free up 10648c2ecf20Sopenharmony_ci * a range and wake us up. 10658c2ecf20Sopenharmony_ci */ 10668c2ecf20Sopenharmony_ci if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) { 10678c2ecf20Sopenharmony_ci if (wait_event_killable_exclusive(fcd->range_waitq, 10688c2ecf20Sopenharmony_ci (fcd->nr_free_ranges > 0))) { 10698c2ecf20Sopenharmony_ci return ERR_PTR(-EINTR); 10708c2ecf20Sopenharmony_ci } 10718c2ecf20Sopenharmony_ci } 10728c2ecf20Sopenharmony_ci } 10738c2ecf20Sopenharmony_ci} 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_cistatic int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd, 10768c2ecf20Sopenharmony_ci struct inode *inode, 10778c2ecf20Sopenharmony_ci unsigned long start_idx) 10788c2ecf20Sopenharmony_ci{ 10798c2ecf20Sopenharmony_ci int ret; 10808c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 10818c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap; 10828c2ecf20Sopenharmony_ci struct interval_tree_node *node; 10838c2ecf20Sopenharmony_ci 10848c2ecf20Sopenharmony_ci /* Find fuse dax mapping at file offset inode. */ 10858c2ecf20Sopenharmony_ci node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); 10868c2ecf20Sopenharmony_ci 10878c2ecf20Sopenharmony_ci /* Range already got cleaned up by somebody else */ 10888c2ecf20Sopenharmony_ci if (!node) 10898c2ecf20Sopenharmony_ci return 0; 10908c2ecf20Sopenharmony_ci dmap = node_to_dmap(node); 10918c2ecf20Sopenharmony_ci 10928c2ecf20Sopenharmony_ci /* still in use. */ 10938c2ecf20Sopenharmony_ci if (refcount_read(&dmap->refcnt) > 1) 10948c2ecf20Sopenharmony_ci return 0; 10958c2ecf20Sopenharmony_ci 10968c2ecf20Sopenharmony_ci ret = reclaim_one_dmap_locked(inode, dmap); 10978c2ecf20Sopenharmony_ci if (ret < 0) 10988c2ecf20Sopenharmony_ci return ret; 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci /* Cleanup dmap entry and add back to free list */ 11018c2ecf20Sopenharmony_ci spin_lock(&fcd->lock); 11028c2ecf20Sopenharmony_ci dmap_reinit_add_to_free_pool(fcd, dmap); 11038c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 11048c2ecf20Sopenharmony_ci return ret; 11058c2ecf20Sopenharmony_ci} 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci/* 11088c2ecf20Sopenharmony_ci * Free a range of memory. 11098c2ecf20Sopenharmony_ci * Locking: 11108c2ecf20Sopenharmony_ci * 1. Take fi->i_mmap_sem to block dax faults. 11118c2ecf20Sopenharmony_ci * 2. Take fi->dax->sem to protect interval tree and also to make sure 11128c2ecf20Sopenharmony_ci * read/write can not reuse a dmap which we might be freeing. 11138c2ecf20Sopenharmony_ci */ 11148c2ecf20Sopenharmony_cistatic int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd, 11158c2ecf20Sopenharmony_ci struct inode *inode, 11168c2ecf20Sopenharmony_ci unsigned long start_idx, 11178c2ecf20Sopenharmony_ci unsigned long end_idx) 11188c2ecf20Sopenharmony_ci{ 11198c2ecf20Sopenharmony_ci int ret; 11208c2ecf20Sopenharmony_ci struct fuse_inode *fi = get_fuse_inode(inode); 11218c2ecf20Sopenharmony_ci loff_t dmap_start = start_idx << FUSE_DAX_SHIFT; 11228c2ecf20Sopenharmony_ci loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1; 11238c2ecf20Sopenharmony_ci 11248c2ecf20Sopenharmony_ci down_write(&fi->i_mmap_sem); 11258c2ecf20Sopenharmony_ci ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); 11268c2ecf20Sopenharmony_ci if (ret) { 11278c2ecf20Sopenharmony_ci pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n", 11288c2ecf20Sopenharmony_ci ret); 11298c2ecf20Sopenharmony_ci goto out_mmap_sem; 11308c2ecf20Sopenharmony_ci } 11318c2ecf20Sopenharmony_ci 11328c2ecf20Sopenharmony_ci down_write(&fi->dax->sem); 11338c2ecf20Sopenharmony_ci ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx); 11348c2ecf20Sopenharmony_ci up_write(&fi->dax->sem); 11358c2ecf20Sopenharmony_ciout_mmap_sem: 11368c2ecf20Sopenharmony_ci up_write(&fi->i_mmap_sem); 11378c2ecf20Sopenharmony_ci return ret; 11388c2ecf20Sopenharmony_ci} 11398c2ecf20Sopenharmony_ci 11408c2ecf20Sopenharmony_cistatic int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd, 11418c2ecf20Sopenharmony_ci unsigned long nr_to_free) 11428c2ecf20Sopenharmony_ci{ 11438c2ecf20Sopenharmony_ci struct fuse_dax_mapping *dmap, *pos, *temp; 11448c2ecf20Sopenharmony_ci int ret, nr_freed = 0; 11458c2ecf20Sopenharmony_ci unsigned long start_idx = 0, end_idx = 0; 11468c2ecf20Sopenharmony_ci struct inode *inode = NULL; 11478c2ecf20Sopenharmony_ci 11488c2ecf20Sopenharmony_ci /* Pick first busy range and free it for now*/ 11498c2ecf20Sopenharmony_ci while (1) { 11508c2ecf20Sopenharmony_ci if (nr_freed >= nr_to_free) 11518c2ecf20Sopenharmony_ci break; 11528c2ecf20Sopenharmony_ci 11538c2ecf20Sopenharmony_ci dmap = NULL; 11548c2ecf20Sopenharmony_ci spin_lock(&fcd->lock); 11558c2ecf20Sopenharmony_ci 11568c2ecf20Sopenharmony_ci if (!fcd->nr_busy_ranges) { 11578c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 11588c2ecf20Sopenharmony_ci return 0; 11598c2ecf20Sopenharmony_ci } 11608c2ecf20Sopenharmony_ci 11618c2ecf20Sopenharmony_ci list_for_each_entry_safe(pos, temp, &fcd->busy_ranges, 11628c2ecf20Sopenharmony_ci busy_list) { 11638c2ecf20Sopenharmony_ci /* skip this range if it's in use. */ 11648c2ecf20Sopenharmony_ci if (refcount_read(&pos->refcnt) > 1) 11658c2ecf20Sopenharmony_ci continue; 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci inode = igrab(pos->inode); 11688c2ecf20Sopenharmony_ci /* 11698c2ecf20Sopenharmony_ci * This inode is going away. That will free 11708c2ecf20Sopenharmony_ci * up all the ranges anyway, continue to 11718c2ecf20Sopenharmony_ci * next range. 11728c2ecf20Sopenharmony_ci */ 11738c2ecf20Sopenharmony_ci if (!inode) 11748c2ecf20Sopenharmony_ci continue; 11758c2ecf20Sopenharmony_ci /* 11768c2ecf20Sopenharmony_ci * Take this element off list and add it tail. If 11778c2ecf20Sopenharmony_ci * this element can't be freed, it will help with 11788c2ecf20Sopenharmony_ci * selecting new element in next iteration of loop. 11798c2ecf20Sopenharmony_ci */ 11808c2ecf20Sopenharmony_ci dmap = pos; 11818c2ecf20Sopenharmony_ci list_move_tail(&dmap->busy_list, &fcd->busy_ranges); 11828c2ecf20Sopenharmony_ci start_idx = end_idx = dmap->itn.start; 11838c2ecf20Sopenharmony_ci break; 11848c2ecf20Sopenharmony_ci } 11858c2ecf20Sopenharmony_ci spin_unlock(&fcd->lock); 11868c2ecf20Sopenharmony_ci if (!dmap) 11878c2ecf20Sopenharmony_ci return 0; 11888c2ecf20Sopenharmony_ci 11898c2ecf20Sopenharmony_ci ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx); 11908c2ecf20Sopenharmony_ci iput(inode); 11918c2ecf20Sopenharmony_ci if (ret) 11928c2ecf20Sopenharmony_ci return ret; 11938c2ecf20Sopenharmony_ci nr_freed++; 11948c2ecf20Sopenharmony_ci } 11958c2ecf20Sopenharmony_ci return 0; 11968c2ecf20Sopenharmony_ci} 11978c2ecf20Sopenharmony_ci 11988c2ecf20Sopenharmony_cistatic void fuse_dax_free_mem_worker(struct work_struct *work) 11998c2ecf20Sopenharmony_ci{ 12008c2ecf20Sopenharmony_ci int ret; 12018c2ecf20Sopenharmony_ci struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax, 12028c2ecf20Sopenharmony_ci free_work.work); 12038c2ecf20Sopenharmony_ci ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK); 12048c2ecf20Sopenharmony_ci if (ret) { 12058c2ecf20Sopenharmony_ci pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n", 12068c2ecf20Sopenharmony_ci ret); 12078c2ecf20Sopenharmony_ci } 12088c2ecf20Sopenharmony_ci 12098c2ecf20Sopenharmony_ci /* If number of free ranges are still below threhold, requeue */ 12108c2ecf20Sopenharmony_ci kick_dmap_free_worker(fcd, 1); 12118c2ecf20Sopenharmony_ci} 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_cistatic void fuse_free_dax_mem_ranges(struct list_head *mem_list) 12148c2ecf20Sopenharmony_ci{ 12158c2ecf20Sopenharmony_ci struct fuse_dax_mapping *range, *temp; 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ci /* Free All allocated elements */ 12188c2ecf20Sopenharmony_ci list_for_each_entry_safe(range, temp, mem_list, list) { 12198c2ecf20Sopenharmony_ci list_del(&range->list); 12208c2ecf20Sopenharmony_ci if (!list_empty(&range->busy_list)) 12218c2ecf20Sopenharmony_ci list_del(&range->busy_list); 12228c2ecf20Sopenharmony_ci kfree(range); 12238c2ecf20Sopenharmony_ci } 12248c2ecf20Sopenharmony_ci} 12258c2ecf20Sopenharmony_ci 12268c2ecf20Sopenharmony_civoid fuse_dax_conn_free(struct fuse_conn *fc) 12278c2ecf20Sopenharmony_ci{ 12288c2ecf20Sopenharmony_ci if (fc->dax) { 12298c2ecf20Sopenharmony_ci fuse_free_dax_mem_ranges(&fc->dax->free_ranges); 12308c2ecf20Sopenharmony_ci kfree(fc->dax); 12318c2ecf20Sopenharmony_ci fc->dax = NULL; 12328c2ecf20Sopenharmony_ci } 12338c2ecf20Sopenharmony_ci} 12348c2ecf20Sopenharmony_ci 12358c2ecf20Sopenharmony_cistatic int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) 12368c2ecf20Sopenharmony_ci{ 12378c2ecf20Sopenharmony_ci long nr_pages, nr_ranges; 12388c2ecf20Sopenharmony_ci void *kaddr; 12398c2ecf20Sopenharmony_ci pfn_t pfn; 12408c2ecf20Sopenharmony_ci struct fuse_dax_mapping *range; 12418c2ecf20Sopenharmony_ci int ret, id; 12428c2ecf20Sopenharmony_ci size_t dax_size = -1; 12438c2ecf20Sopenharmony_ci unsigned long i; 12448c2ecf20Sopenharmony_ci 12458c2ecf20Sopenharmony_ci init_waitqueue_head(&fcd->range_waitq); 12468c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&fcd->free_ranges); 12478c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&fcd->busy_ranges); 12488c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); 12498c2ecf20Sopenharmony_ci 12508c2ecf20Sopenharmony_ci id = dax_read_lock(); 12518c2ecf20Sopenharmony_ci nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), &kaddr, 12528c2ecf20Sopenharmony_ci &pfn); 12538c2ecf20Sopenharmony_ci dax_read_unlock(id); 12548c2ecf20Sopenharmony_ci if (nr_pages < 0) { 12558c2ecf20Sopenharmony_ci pr_debug("dax_direct_access() returned %ld\n", nr_pages); 12568c2ecf20Sopenharmony_ci return nr_pages; 12578c2ecf20Sopenharmony_ci } 12588c2ecf20Sopenharmony_ci 12598c2ecf20Sopenharmony_ci nr_ranges = nr_pages/FUSE_DAX_PAGES; 12608c2ecf20Sopenharmony_ci pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n", 12618c2ecf20Sopenharmony_ci __func__, nr_pages, nr_ranges); 12628c2ecf20Sopenharmony_ci 12638c2ecf20Sopenharmony_ci for (i = 0; i < nr_ranges; i++) { 12648c2ecf20Sopenharmony_ci range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL); 12658c2ecf20Sopenharmony_ci ret = -ENOMEM; 12668c2ecf20Sopenharmony_ci if (!range) 12678c2ecf20Sopenharmony_ci goto out_err; 12688c2ecf20Sopenharmony_ci 12698c2ecf20Sopenharmony_ci /* TODO: This offset only works if virtio-fs driver is not 12708c2ecf20Sopenharmony_ci * having some memory hidden at the beginning. This needs 12718c2ecf20Sopenharmony_ci * better handling 12728c2ecf20Sopenharmony_ci */ 12738c2ecf20Sopenharmony_ci range->window_offset = i * FUSE_DAX_SZ; 12748c2ecf20Sopenharmony_ci range->length = FUSE_DAX_SZ; 12758c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&range->busy_list); 12768c2ecf20Sopenharmony_ci refcount_set(&range->refcnt, 1); 12778c2ecf20Sopenharmony_ci list_add_tail(&range->list, &fcd->free_ranges); 12788c2ecf20Sopenharmony_ci } 12798c2ecf20Sopenharmony_ci 12808c2ecf20Sopenharmony_ci fcd->nr_free_ranges = nr_ranges; 12818c2ecf20Sopenharmony_ci fcd->nr_ranges = nr_ranges; 12828c2ecf20Sopenharmony_ci return 0; 12838c2ecf20Sopenharmony_ciout_err: 12848c2ecf20Sopenharmony_ci /* Free All allocated elements */ 12858c2ecf20Sopenharmony_ci fuse_free_dax_mem_ranges(&fcd->free_ranges); 12868c2ecf20Sopenharmony_ci return ret; 12878c2ecf20Sopenharmony_ci} 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_ciint fuse_dax_conn_alloc(struct fuse_conn *fc, struct dax_device *dax_dev) 12908c2ecf20Sopenharmony_ci{ 12918c2ecf20Sopenharmony_ci struct fuse_conn_dax *fcd; 12928c2ecf20Sopenharmony_ci int err; 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci if (!dax_dev) 12958c2ecf20Sopenharmony_ci return 0; 12968c2ecf20Sopenharmony_ci 12978c2ecf20Sopenharmony_ci fcd = kzalloc(sizeof(*fcd), GFP_KERNEL); 12988c2ecf20Sopenharmony_ci if (!fcd) 12998c2ecf20Sopenharmony_ci return -ENOMEM; 13008c2ecf20Sopenharmony_ci 13018c2ecf20Sopenharmony_ci spin_lock_init(&fcd->lock); 13028c2ecf20Sopenharmony_ci fcd->dev = dax_dev; 13038c2ecf20Sopenharmony_ci err = fuse_dax_mem_range_init(fcd); 13048c2ecf20Sopenharmony_ci if (err) { 13058c2ecf20Sopenharmony_ci kfree(fcd); 13068c2ecf20Sopenharmony_ci return err; 13078c2ecf20Sopenharmony_ci } 13088c2ecf20Sopenharmony_ci 13098c2ecf20Sopenharmony_ci fc->dax = fcd; 13108c2ecf20Sopenharmony_ci return 0; 13118c2ecf20Sopenharmony_ci} 13128c2ecf20Sopenharmony_ci 13138c2ecf20Sopenharmony_cibool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi) 13148c2ecf20Sopenharmony_ci{ 13158c2ecf20Sopenharmony_ci struct fuse_conn *fc = get_fuse_conn_super(sb); 13168c2ecf20Sopenharmony_ci 13178c2ecf20Sopenharmony_ci fi->dax = NULL; 13188c2ecf20Sopenharmony_ci if (fc->dax) { 13198c2ecf20Sopenharmony_ci fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT); 13208c2ecf20Sopenharmony_ci if (!fi->dax) 13218c2ecf20Sopenharmony_ci return false; 13228c2ecf20Sopenharmony_ci 13238c2ecf20Sopenharmony_ci init_rwsem(&fi->dax->sem); 13248c2ecf20Sopenharmony_ci fi->dax->tree = RB_ROOT_CACHED; 13258c2ecf20Sopenharmony_ci } 13268c2ecf20Sopenharmony_ci 13278c2ecf20Sopenharmony_ci return true; 13288c2ecf20Sopenharmony_ci} 13298c2ecf20Sopenharmony_ci 13308c2ecf20Sopenharmony_cistatic const struct address_space_operations fuse_dax_file_aops = { 13318c2ecf20Sopenharmony_ci .writepages = fuse_dax_writepages, 13328c2ecf20Sopenharmony_ci .direct_IO = noop_direct_IO, 13338c2ecf20Sopenharmony_ci .set_page_dirty = noop_set_page_dirty, 13348c2ecf20Sopenharmony_ci .invalidatepage = noop_invalidatepage, 13358c2ecf20Sopenharmony_ci}; 13368c2ecf20Sopenharmony_ci 13378c2ecf20Sopenharmony_civoid fuse_dax_inode_init(struct inode *inode) 13388c2ecf20Sopenharmony_ci{ 13398c2ecf20Sopenharmony_ci struct fuse_conn *fc = get_fuse_conn(inode); 13408c2ecf20Sopenharmony_ci 13418c2ecf20Sopenharmony_ci if (!fc->dax) 13428c2ecf20Sopenharmony_ci return; 13438c2ecf20Sopenharmony_ci 13448c2ecf20Sopenharmony_ci inode->i_flags |= S_DAX; 13458c2ecf20Sopenharmony_ci inode->i_data.a_ops = &fuse_dax_file_aops; 13468c2ecf20Sopenharmony_ci} 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_cibool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) 13498c2ecf20Sopenharmony_ci{ 13508c2ecf20Sopenharmony_ci if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { 13518c2ecf20Sopenharmony_ci pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n", 13528c2ecf20Sopenharmony_ci map_alignment, FUSE_DAX_SZ); 13538c2ecf20Sopenharmony_ci return false; 13548c2ecf20Sopenharmony_ci } 13558c2ecf20Sopenharmony_ci return true; 13568c2ecf20Sopenharmony_ci} 13578c2ecf20Sopenharmony_ci 13588c2ecf20Sopenharmony_civoid fuse_dax_cancel_work(struct fuse_conn *fc) 13598c2ecf20Sopenharmony_ci{ 13608c2ecf20Sopenharmony_ci struct fuse_conn_dax *fcd = fc->dax; 13618c2ecf20Sopenharmony_ci 13628c2ecf20Sopenharmony_ci if (fcd) 13638c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&fcd->free_work); 13648c2ecf20Sopenharmony_ci 13658c2ecf20Sopenharmony_ci} 13668c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(fuse_dax_cancel_work); 1367