1/* 2 * Open file cache. 3 * 4 * (c) 2015 - Jeff Layton <jeff.layton@primarydata.com> 5 */ 6 7#include <linux/hash.h> 8#include <linux/slab.h> 9#include <linux/file.h> 10#include <linux/sched.h> 11#include <linux/list_lru.h> 12#include <linux/fsnotify_backend.h> 13#include <linux/fsnotify.h> 14#include <linux/seq_file.h> 15 16#include "vfs.h" 17#include "nfsd.h" 18#include "nfsfh.h" 19#include "netns.h" 20#include "filecache.h" 21#include "trace.h" 22 23#define NFSDDBG_FACILITY NFSDDBG_FH 24 25/* FIXME: dynamically size this for the machine somehow? */ 26#define NFSD_FILE_HASH_BITS 12 27#define NFSD_FILE_HASH_SIZE (1 << NFSD_FILE_HASH_BITS) 28#define NFSD_LAUNDRETTE_DELAY (2 * HZ) 29 30#define NFSD_FILE_SHUTDOWN (1) 31#define NFSD_FILE_LRU_THRESHOLD (4096UL) 32#define NFSD_FILE_LRU_LIMIT (NFSD_FILE_LRU_THRESHOLD << 2) 33 34/* We only care about NFSD_MAY_READ/WRITE for this cache */ 35#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE) 36 37struct nfsd_fcache_bucket { 38 struct hlist_head nfb_head; 39 spinlock_t nfb_lock; 40 unsigned int nfb_count; 41 unsigned int nfb_maxcount; 42}; 43 44static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits); 45 46struct nfsd_fcache_disposal { 47 struct list_head list; 48 struct work_struct work; 49 struct net *net; 50 spinlock_t lock; 51 struct list_head freeme; 52 struct rcu_head rcu; 53}; 54 55static struct workqueue_struct *nfsd_filecache_wq __read_mostly; 56 57static struct kmem_cache *nfsd_file_slab; 58static struct kmem_cache *nfsd_file_mark_slab; 59static struct nfsd_fcache_bucket *nfsd_file_hashtbl; 60static struct list_lru nfsd_file_lru; 61static long nfsd_file_lru_flags; 62static struct fsnotify_group *nfsd_file_fsnotify_group; 63static atomic_long_t nfsd_filecache_count; 64static struct delayed_work nfsd_filecache_laundrette; 65static DEFINE_SPINLOCK(laundrette_lock); 66static LIST_HEAD(laundrettes); 67 68static void nfsd_file_gc(void); 69 70static void 71nfsd_file_schedule_laundrette(void) 72{ 73 long count = atomic_long_read(&nfsd_filecache_count); 74 75 if (count == 0 || test_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags)) 76 return; 77 78 queue_delayed_work(system_wq, &nfsd_filecache_laundrette, 79 NFSD_LAUNDRETTE_DELAY); 80} 81 82static void 83nfsd_file_slab_free(struct rcu_head *rcu) 84{ 85 struct nfsd_file *nf = container_of(rcu, struct nfsd_file, nf_rcu); 86 87 put_cred(nf->nf_cred); 88 kmem_cache_free(nfsd_file_slab, nf); 89} 90 91static void 92nfsd_file_mark_free(struct fsnotify_mark *mark) 93{ 94 struct nfsd_file_mark *nfm = container_of(mark, struct nfsd_file_mark, 95 nfm_mark); 96 97 kmem_cache_free(nfsd_file_mark_slab, nfm); 98} 99 100static struct nfsd_file_mark * 101nfsd_file_mark_get(struct nfsd_file_mark *nfm) 102{ 103 if (!refcount_inc_not_zero(&nfm->nfm_ref)) 104 return NULL; 105 return nfm; 106} 107 108static void 109nfsd_file_mark_put(struct nfsd_file_mark *nfm) 110{ 111 if (refcount_dec_and_test(&nfm->nfm_ref)) { 112 fsnotify_destroy_mark(&nfm->nfm_mark, nfsd_file_fsnotify_group); 113 fsnotify_put_mark(&nfm->nfm_mark); 114 } 115} 116 117static struct nfsd_file_mark * 118nfsd_file_mark_find_or_create(struct nfsd_file *nf) 119{ 120 int err; 121 struct fsnotify_mark *mark; 122 struct nfsd_file_mark *nfm = NULL, *new; 123 struct inode *inode = nf->nf_inode; 124 125 do { 126 mutex_lock(&nfsd_file_fsnotify_group->mark_mutex); 127 mark = fsnotify_find_mark(&inode->i_fsnotify_marks, 128 nfsd_file_fsnotify_group); 129 if (mark) { 130 nfm = nfsd_file_mark_get(container_of(mark, 131 struct nfsd_file_mark, 132 nfm_mark)); 133 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 134 if (nfm) { 135 fsnotify_put_mark(mark); 136 break; 137 } 138 /* Avoid soft lockup race with nfsd_file_mark_put() */ 139 fsnotify_destroy_mark(mark, nfsd_file_fsnotify_group); 140 fsnotify_put_mark(mark); 141 } else 142 mutex_unlock(&nfsd_file_fsnotify_group->mark_mutex); 143 144 /* allocate a new nfm */ 145 new = kmem_cache_alloc(nfsd_file_mark_slab, GFP_KERNEL); 146 if (!new) 147 return NULL; 148 fsnotify_init_mark(&new->nfm_mark, nfsd_file_fsnotify_group); 149 new->nfm_mark.mask = FS_ATTRIB|FS_DELETE_SELF; 150 refcount_set(&new->nfm_ref, 1); 151 152 err = fsnotify_add_inode_mark(&new->nfm_mark, inode, 0); 153 154 /* 155 * If the add was successful, then return the object. 156 * Otherwise, we need to put the reference we hold on the 157 * nfm_mark. The fsnotify code will take a reference and put 158 * it on failure, so we can't just free it directly. It's also 159 * not safe to call fsnotify_destroy_mark on it as the 160 * mark->group will be NULL. Thus, we can't let the nfm_ref 161 * counter drive the destruction at this point. 162 */ 163 if (likely(!err)) 164 nfm = new; 165 else 166 fsnotify_put_mark(&new->nfm_mark); 167 } while (unlikely(err == -EEXIST)); 168 169 return nfm; 170} 171 172static struct nfsd_file * 173nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, 174 struct net *net) 175{ 176 struct nfsd_file *nf; 177 178 nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL); 179 if (nf) { 180 INIT_HLIST_NODE(&nf->nf_node); 181 INIT_LIST_HEAD(&nf->nf_lru); 182 nf->nf_file = NULL; 183 nf->nf_cred = get_current_cred(); 184 nf->nf_net = net; 185 nf->nf_flags = 0; 186 nf->nf_inode = inode; 187 nf->nf_hashval = hashval; 188 refcount_set(&nf->nf_ref, 1); 189 nf->nf_may = may & NFSD_FILE_MAY_MASK; 190 if (may & NFSD_MAY_NOT_BREAK_LEASE) { 191 if (may & NFSD_MAY_WRITE) 192 __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); 193 if (may & NFSD_MAY_READ) 194 __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 195 } 196 nf->nf_mark = NULL; 197 trace_nfsd_file_alloc(nf); 198 } 199 return nf; 200} 201 202static bool 203nfsd_file_free(struct nfsd_file *nf) 204{ 205 bool flush = false; 206 207 trace_nfsd_file_put_final(nf); 208 if (nf->nf_mark) 209 nfsd_file_mark_put(nf->nf_mark); 210 if (nf->nf_file) { 211 get_file(nf->nf_file); 212 filp_close(nf->nf_file, NULL); 213 fput(nf->nf_file); 214 flush = true; 215 } 216 call_rcu(&nf->nf_rcu, nfsd_file_slab_free); 217 return flush; 218} 219 220static bool 221nfsd_file_check_writeback(struct nfsd_file *nf) 222{ 223 struct file *file = nf->nf_file; 224 struct address_space *mapping; 225 226 if (!file || !(file->f_mode & FMODE_WRITE)) 227 return false; 228 mapping = file->f_mapping; 229 return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) || 230 mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK); 231} 232 233static int 234nfsd_file_check_write_error(struct nfsd_file *nf) 235{ 236 struct file *file = nf->nf_file; 237 238 if (!file || !(file->f_mode & FMODE_WRITE)) 239 return 0; 240 return filemap_check_wb_err(file->f_mapping, READ_ONCE(file->f_wb_err)); 241} 242 243static void 244nfsd_file_do_unhash(struct nfsd_file *nf) 245{ 246 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 247 248 trace_nfsd_file_unhash(nf); 249 250 if (nfsd_file_check_write_error(nf)) 251 nfsd_reset_boot_verifier(net_generic(nf->nf_net, nfsd_net_id)); 252 --nfsd_file_hashtbl[nf->nf_hashval].nfb_count; 253 hlist_del_rcu(&nf->nf_node); 254 atomic_long_dec(&nfsd_filecache_count); 255} 256 257static bool 258nfsd_file_unhash(struct nfsd_file *nf) 259{ 260 if (test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 261 nfsd_file_do_unhash(nf); 262 if (!list_empty(&nf->nf_lru)) 263 list_lru_del(&nfsd_file_lru, &nf->nf_lru); 264 return true; 265 } 266 return false; 267} 268 269/* 270 * Return true if the file was unhashed. 271 */ 272static bool 273nfsd_file_unhash_and_release_locked(struct nfsd_file *nf, struct list_head *dispose) 274{ 275 lockdep_assert_held(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 276 277 trace_nfsd_file_unhash_and_release_locked(nf); 278 if (!nfsd_file_unhash(nf)) 279 return false; 280 /* keep final reference for nfsd_file_lru_dispose */ 281 if (refcount_dec_not_one(&nf->nf_ref)) 282 return true; 283 284 list_add(&nf->nf_lru, dispose); 285 return true; 286} 287 288static void 289nfsd_file_put_noref(struct nfsd_file *nf) 290{ 291 trace_nfsd_file_put(nf); 292 293 if (refcount_dec_and_test(&nf->nf_ref)) { 294 WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags)); 295 nfsd_file_free(nf); 296 } 297} 298 299void 300nfsd_file_put(struct nfsd_file *nf) 301{ 302 bool is_hashed; 303 304 set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags); 305 if (refcount_read(&nf->nf_ref) > 2 || !nf->nf_file) { 306 nfsd_file_put_noref(nf); 307 return; 308 } 309 310 filemap_flush(nf->nf_file->f_mapping); 311 is_hashed = test_bit(NFSD_FILE_HASHED, &nf->nf_flags) != 0; 312 nfsd_file_put_noref(nf); 313 if (is_hashed) 314 nfsd_file_schedule_laundrette(); 315 if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT) 316 nfsd_file_gc(); 317} 318 319struct nfsd_file * 320nfsd_file_get(struct nfsd_file *nf) 321{ 322 if (likely(refcount_inc_not_zero(&nf->nf_ref))) 323 return nf; 324 return NULL; 325} 326 327static void 328nfsd_file_dispose_list(struct list_head *dispose) 329{ 330 struct nfsd_file *nf; 331 332 while(!list_empty(dispose)) { 333 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 334 list_del(&nf->nf_lru); 335 nfsd_file_put_noref(nf); 336 } 337} 338 339static void 340nfsd_file_dispose_list_sync(struct list_head *dispose) 341{ 342 bool flush = false; 343 struct nfsd_file *nf; 344 345 while(!list_empty(dispose)) { 346 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 347 list_del(&nf->nf_lru); 348 if (!refcount_dec_and_test(&nf->nf_ref)) 349 continue; 350 if (nfsd_file_free(nf)) 351 flush = true; 352 } 353 if (flush) 354 flush_delayed_fput(); 355} 356 357static void 358nfsd_file_list_remove_disposal(struct list_head *dst, 359 struct nfsd_fcache_disposal *l) 360{ 361 spin_lock(&l->lock); 362 list_splice_init(&l->freeme, dst); 363 spin_unlock(&l->lock); 364} 365 366static void 367nfsd_file_list_add_disposal(struct list_head *files, struct net *net) 368{ 369 struct nfsd_fcache_disposal *l; 370 371 rcu_read_lock(); 372 list_for_each_entry_rcu(l, &laundrettes, list) { 373 if (l->net == net) { 374 spin_lock(&l->lock); 375 list_splice_tail_init(files, &l->freeme); 376 spin_unlock(&l->lock); 377 queue_work(nfsd_filecache_wq, &l->work); 378 break; 379 } 380 } 381 rcu_read_unlock(); 382} 383 384static void 385nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src, 386 struct net *net) 387{ 388 struct nfsd_file *nf, *tmp; 389 390 list_for_each_entry_safe(nf, tmp, src, nf_lru) { 391 if (nf->nf_net == net) 392 list_move_tail(&nf->nf_lru, dst); 393 } 394} 395 396static void 397nfsd_file_dispose_list_delayed(struct list_head *dispose) 398{ 399 LIST_HEAD(list); 400 struct nfsd_file *nf; 401 402 while(!list_empty(dispose)) { 403 nf = list_first_entry(dispose, struct nfsd_file, nf_lru); 404 nfsd_file_list_add_pernet(&list, dispose, nf->nf_net); 405 nfsd_file_list_add_disposal(&list, nf->nf_net); 406 } 407} 408 409/* 410 * Note this can deadlock with nfsd_file_cache_purge. 411 */ 412static enum lru_status 413nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru, 414 spinlock_t *lock, void *arg) 415 __releases(lock) 416 __acquires(lock) 417{ 418 struct list_head *head = arg; 419 struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru); 420 421 /* 422 * Do a lockless refcount check. The hashtable holds one reference, so 423 * we look to see if anything else has a reference, or if any have 424 * been put since the shrinker last ran. Those don't get unhashed and 425 * released. 426 * 427 * Note that in the put path, we set the flag and then decrement the 428 * counter. Here we check the counter and then test and clear the flag. 429 * That order is deliberate to ensure that we can do this locklessly. 430 */ 431 if (refcount_read(&nf->nf_ref) > 1) 432 goto out_skip; 433 434 /* 435 * Don't throw out files that are still undergoing I/O or 436 * that have uncleared errors pending. 437 */ 438 if (nfsd_file_check_writeback(nf)) 439 goto out_skip; 440 441 if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) 442 goto out_skip; 443 444 if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 445 goto out_skip; 446 447 list_lru_isolate_move(lru, &nf->nf_lru, head); 448 return LRU_REMOVED; 449out_skip: 450 return LRU_SKIP; 451} 452 453static unsigned long 454nfsd_file_lru_walk_list(struct shrink_control *sc) 455{ 456 LIST_HEAD(head); 457 struct nfsd_file *nf; 458 unsigned long ret; 459 460 if (sc) 461 ret = list_lru_shrink_walk(&nfsd_file_lru, sc, 462 nfsd_file_lru_cb, &head); 463 else 464 ret = list_lru_walk(&nfsd_file_lru, 465 nfsd_file_lru_cb, 466 &head, LONG_MAX); 467 list_for_each_entry(nf, &head, nf_lru) { 468 spin_lock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 469 nfsd_file_do_unhash(nf); 470 spin_unlock(&nfsd_file_hashtbl[nf->nf_hashval].nfb_lock); 471 } 472 nfsd_file_dispose_list_delayed(&head); 473 return ret; 474} 475 476static void 477nfsd_file_gc(void) 478{ 479 nfsd_file_lru_walk_list(NULL); 480} 481 482static void 483nfsd_file_gc_worker(struct work_struct *work) 484{ 485 nfsd_file_gc(); 486 nfsd_file_schedule_laundrette(); 487} 488 489static unsigned long 490nfsd_file_lru_count(struct shrinker *s, struct shrink_control *sc) 491{ 492 return list_lru_count(&nfsd_file_lru); 493} 494 495static unsigned long 496nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc) 497{ 498 return nfsd_file_lru_walk_list(sc); 499} 500 501static struct shrinker nfsd_file_shrinker = { 502 .scan_objects = nfsd_file_lru_scan, 503 .count_objects = nfsd_file_lru_count, 504 .seeks = 1, 505}; 506 507static void 508__nfsd_file_close_inode(struct inode *inode, unsigned int hashval, 509 struct list_head *dispose) 510{ 511 struct nfsd_file *nf; 512 struct hlist_node *tmp; 513 514 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 515 hlist_for_each_entry_safe(nf, tmp, &nfsd_file_hashtbl[hashval].nfb_head, nf_node) { 516 if (inode == nf->nf_inode) 517 nfsd_file_unhash_and_release_locked(nf, dispose); 518 } 519 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 520} 521 522/** 523 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 524 * @inode: inode of the file to attempt to remove 525 * 526 * Walk the whole hash bucket, looking for any files that correspond to "inode". 527 * If any do, then unhash them and put the hashtable reference to them and 528 * destroy any that had their last reference put. Also ensure that any of the 529 * fputs also have their final __fput done as well. 530 */ 531void 532nfsd_file_close_inode_sync(struct inode *inode) 533{ 534 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 535 NFSD_FILE_HASH_BITS); 536 LIST_HEAD(dispose); 537 538 __nfsd_file_close_inode(inode, hashval, &dispose); 539 trace_nfsd_file_close_inode_sync(inode, hashval, !list_empty(&dispose)); 540 nfsd_file_dispose_list_sync(&dispose); 541} 542 543/** 544 * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file 545 * @inode: inode of the file to attempt to remove 546 * 547 * Walk the whole hash bucket, looking for any files that correspond to "inode". 548 * If any do, then unhash them and put the hashtable reference to them and 549 * destroy any that had their last reference put. 550 */ 551static void 552nfsd_file_close_inode(struct inode *inode) 553{ 554 unsigned int hashval = (unsigned int)hash_long(inode->i_ino, 555 NFSD_FILE_HASH_BITS); 556 LIST_HEAD(dispose); 557 558 __nfsd_file_close_inode(inode, hashval, &dispose); 559 trace_nfsd_file_close_inode(inode, hashval, !list_empty(&dispose)); 560 nfsd_file_dispose_list_delayed(&dispose); 561} 562 563/** 564 * nfsd_file_delayed_close - close unused nfsd_files 565 * @work: dummy 566 * 567 * Walk the LRU list and close any entries that have not been used since 568 * the last scan. 569 * 570 * Note this can deadlock with nfsd_file_cache_purge. 571 */ 572static void 573nfsd_file_delayed_close(struct work_struct *work) 574{ 575 LIST_HEAD(head); 576 struct nfsd_fcache_disposal *l = container_of(work, 577 struct nfsd_fcache_disposal, work); 578 579 nfsd_file_list_remove_disposal(&head, l); 580 nfsd_file_dispose_list(&head); 581} 582 583static int 584nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, 585 void *data) 586{ 587 struct file_lock *fl = data; 588 589 /* Only close files for F_SETLEASE leases */ 590 if (fl->fl_flags & FL_LEASE) 591 nfsd_file_close_inode_sync(file_inode(fl->fl_file)); 592 return 0; 593} 594 595static struct notifier_block nfsd_file_lease_notifier = { 596 .notifier_call = nfsd_file_lease_notifier_call, 597}; 598 599static int 600nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask, 601 struct inode *inode, struct inode *dir, 602 const struct qstr *name, u32 cookie) 603{ 604 trace_nfsd_file_fsnotify_handle_event(inode, mask); 605 606 /* Should be no marks on non-regular files */ 607 if (!S_ISREG(inode->i_mode)) { 608 WARN_ON_ONCE(1); 609 return 0; 610 } 611 612 /* don't close files if this was not the last link */ 613 if (mask & FS_ATTRIB) { 614 if (inode->i_nlink) 615 return 0; 616 } 617 618 nfsd_file_close_inode(inode); 619 return 0; 620} 621 622 623static const struct fsnotify_ops nfsd_file_fsnotify_ops = { 624 .handle_inode_event = nfsd_file_fsnotify_handle_event, 625 .free_mark = nfsd_file_mark_free, 626}; 627 628int 629nfsd_file_cache_init(void) 630{ 631 int ret = -ENOMEM; 632 unsigned int i; 633 634 clear_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 635 636 if (nfsd_file_hashtbl) 637 return 0; 638 639 nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", 0, 0); 640 if (!nfsd_filecache_wq) 641 goto out; 642 643 nfsd_file_hashtbl = kvcalloc(NFSD_FILE_HASH_SIZE, 644 sizeof(*nfsd_file_hashtbl), GFP_KERNEL); 645 if (!nfsd_file_hashtbl) { 646 pr_err("nfsd: unable to allocate nfsd_file_hashtbl\n"); 647 goto out_err; 648 } 649 650 nfsd_file_slab = kmem_cache_create("nfsd_file", 651 sizeof(struct nfsd_file), 0, 0, NULL); 652 if (!nfsd_file_slab) { 653 pr_err("nfsd: unable to create nfsd_file_slab\n"); 654 goto out_err; 655 } 656 657 nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark", 658 sizeof(struct nfsd_file_mark), 0, 0, NULL); 659 if (!nfsd_file_mark_slab) { 660 pr_err("nfsd: unable to create nfsd_file_mark_slab\n"); 661 goto out_err; 662 } 663 664 665 ret = list_lru_init(&nfsd_file_lru); 666 if (ret) { 667 pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret); 668 goto out_err; 669 } 670 671 ret = register_shrinker(&nfsd_file_shrinker); 672 if (ret) { 673 pr_err("nfsd: failed to register nfsd_file_shrinker: %d\n", ret); 674 goto out_lru; 675 } 676 677 ret = lease_register_notifier(&nfsd_file_lease_notifier); 678 if (ret) { 679 pr_err("nfsd: unable to register lease notifier: %d\n", ret); 680 goto out_shrinker; 681 } 682 683 nfsd_file_fsnotify_group = fsnotify_alloc_group(&nfsd_file_fsnotify_ops); 684 if (IS_ERR(nfsd_file_fsnotify_group)) { 685 pr_err("nfsd: unable to create fsnotify group: %ld\n", 686 PTR_ERR(nfsd_file_fsnotify_group)); 687 nfsd_file_fsnotify_group = NULL; 688 goto out_notifier; 689 } 690 691 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 692 INIT_HLIST_HEAD(&nfsd_file_hashtbl[i].nfb_head); 693 spin_lock_init(&nfsd_file_hashtbl[i].nfb_lock); 694 } 695 696 INIT_DELAYED_WORK(&nfsd_filecache_laundrette, nfsd_file_gc_worker); 697out: 698 return ret; 699out_notifier: 700 lease_unregister_notifier(&nfsd_file_lease_notifier); 701out_shrinker: 702 unregister_shrinker(&nfsd_file_shrinker); 703out_lru: 704 list_lru_destroy(&nfsd_file_lru); 705out_err: 706 kmem_cache_destroy(nfsd_file_slab); 707 nfsd_file_slab = NULL; 708 kmem_cache_destroy(nfsd_file_mark_slab); 709 nfsd_file_mark_slab = NULL; 710 kvfree(nfsd_file_hashtbl); 711 nfsd_file_hashtbl = NULL; 712 destroy_workqueue(nfsd_filecache_wq); 713 nfsd_filecache_wq = NULL; 714 goto out; 715} 716 717/* 718 * Note this can deadlock with nfsd_file_lru_cb. 719 */ 720void 721nfsd_file_cache_purge(struct net *net) 722{ 723 unsigned int i; 724 struct nfsd_file *nf; 725 struct hlist_node *next; 726 LIST_HEAD(dispose); 727 bool del; 728 729 if (!nfsd_file_hashtbl) 730 return; 731 732 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 733 struct nfsd_fcache_bucket *nfb = &nfsd_file_hashtbl[i]; 734 735 spin_lock(&nfb->nfb_lock); 736 hlist_for_each_entry_safe(nf, next, &nfb->nfb_head, nf_node) { 737 if (net && nf->nf_net != net) 738 continue; 739 del = nfsd_file_unhash_and_release_locked(nf, &dispose); 740 741 /* 742 * Deadlock detected! Something marked this entry as 743 * unhased, but hasn't removed it from the hash list. 744 */ 745 WARN_ON_ONCE(!del); 746 } 747 spin_unlock(&nfb->nfb_lock); 748 nfsd_file_dispose_list(&dispose); 749 } 750} 751 752static struct nfsd_fcache_disposal * 753nfsd_alloc_fcache_disposal(struct net *net) 754{ 755 struct nfsd_fcache_disposal *l; 756 757 l = kmalloc(sizeof(*l), GFP_KERNEL); 758 if (!l) 759 return NULL; 760 INIT_WORK(&l->work, nfsd_file_delayed_close); 761 l->net = net; 762 spin_lock_init(&l->lock); 763 INIT_LIST_HEAD(&l->freeme); 764 return l; 765} 766 767static void 768nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l) 769{ 770 rcu_assign_pointer(l->net, NULL); 771 cancel_work_sync(&l->work); 772 nfsd_file_dispose_list(&l->freeme); 773 kfree_rcu(l, rcu); 774} 775 776static void 777nfsd_add_fcache_disposal(struct nfsd_fcache_disposal *l) 778{ 779 spin_lock(&laundrette_lock); 780 list_add_tail_rcu(&l->list, &laundrettes); 781 spin_unlock(&laundrette_lock); 782} 783 784static void 785nfsd_del_fcache_disposal(struct nfsd_fcache_disposal *l) 786{ 787 spin_lock(&laundrette_lock); 788 list_del_rcu(&l->list); 789 spin_unlock(&laundrette_lock); 790} 791 792static int 793nfsd_alloc_fcache_disposal_net(struct net *net) 794{ 795 struct nfsd_fcache_disposal *l; 796 797 l = nfsd_alloc_fcache_disposal(net); 798 if (!l) 799 return -ENOMEM; 800 nfsd_add_fcache_disposal(l); 801 return 0; 802} 803 804static void 805nfsd_free_fcache_disposal_net(struct net *net) 806{ 807 struct nfsd_fcache_disposal *l; 808 809 rcu_read_lock(); 810 list_for_each_entry_rcu(l, &laundrettes, list) { 811 if (l->net != net) 812 continue; 813 nfsd_del_fcache_disposal(l); 814 rcu_read_unlock(); 815 nfsd_free_fcache_disposal(l); 816 return; 817 } 818 rcu_read_unlock(); 819} 820 821int 822nfsd_file_cache_start_net(struct net *net) 823{ 824 return nfsd_alloc_fcache_disposal_net(net); 825} 826 827void 828nfsd_file_cache_shutdown_net(struct net *net) 829{ 830 nfsd_file_cache_purge(net); 831 nfsd_free_fcache_disposal_net(net); 832} 833 834void 835nfsd_file_cache_shutdown(void) 836{ 837 set_bit(NFSD_FILE_SHUTDOWN, &nfsd_file_lru_flags); 838 839 lease_unregister_notifier(&nfsd_file_lease_notifier); 840 unregister_shrinker(&nfsd_file_shrinker); 841 /* 842 * make sure all callers of nfsd_file_lru_cb are done before 843 * calling nfsd_file_cache_purge 844 */ 845 cancel_delayed_work_sync(&nfsd_filecache_laundrette); 846 nfsd_file_cache_purge(NULL); 847 list_lru_destroy(&nfsd_file_lru); 848 rcu_barrier(); 849 fsnotify_put_group(nfsd_file_fsnotify_group); 850 nfsd_file_fsnotify_group = NULL; 851 kmem_cache_destroy(nfsd_file_slab); 852 nfsd_file_slab = NULL; 853 fsnotify_wait_marks_destroyed(); 854 kmem_cache_destroy(nfsd_file_mark_slab); 855 nfsd_file_mark_slab = NULL; 856 kvfree(nfsd_file_hashtbl); 857 nfsd_file_hashtbl = NULL; 858 destroy_workqueue(nfsd_filecache_wq); 859 nfsd_filecache_wq = NULL; 860} 861 862static bool 863nfsd_match_cred(const struct cred *c1, const struct cred *c2) 864{ 865 int i; 866 867 if (!uid_eq(c1->fsuid, c2->fsuid)) 868 return false; 869 if (!gid_eq(c1->fsgid, c2->fsgid)) 870 return false; 871 if (c1->group_info == NULL || c2->group_info == NULL) 872 return c1->group_info == c2->group_info; 873 if (c1->group_info->ngroups != c2->group_info->ngroups) 874 return false; 875 for (i = 0; i < c1->group_info->ngroups; i++) { 876 if (!gid_eq(c1->group_info->gid[i], c2->group_info->gid[i])) 877 return false; 878 } 879 return true; 880} 881 882static struct nfsd_file * 883nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, 884 unsigned int hashval, struct net *net) 885{ 886 struct nfsd_file *nf; 887 unsigned char need = may_flags & NFSD_FILE_MAY_MASK; 888 889 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 890 nf_node, lockdep_is_held(&nfsd_file_hashtbl[hashval].nfb_lock)) { 891 if (nf->nf_may != need) 892 continue; 893 if (nf->nf_inode != inode) 894 continue; 895 if (nf->nf_net != net) 896 continue; 897 if (!nfsd_match_cred(nf->nf_cred, current_cred())) 898 continue; 899 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) 900 continue; 901 if (nfsd_file_get(nf) != NULL) 902 return nf; 903 } 904 return NULL; 905} 906 907/** 908 * nfsd_file_is_cached - are there any cached open files for this fh? 909 * @inode: inode of the file to check 910 * 911 * Scan the hashtable for open files that match this fh. Returns true if there 912 * are any, and false if not. 913 */ 914bool 915nfsd_file_is_cached(struct inode *inode) 916{ 917 bool ret = false; 918 struct nfsd_file *nf; 919 unsigned int hashval; 920 921 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 922 923 rcu_read_lock(); 924 hlist_for_each_entry_rcu(nf, &nfsd_file_hashtbl[hashval].nfb_head, 925 nf_node) { 926 if (inode == nf->nf_inode) { 927 ret = true; 928 break; 929 } 930 } 931 rcu_read_unlock(); 932 trace_nfsd_file_is_cached(inode, hashval, (int)ret); 933 return ret; 934} 935 936__be32 937nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, 938 unsigned int may_flags, struct nfsd_file **pnf) 939{ 940 __be32 status; 941 struct net *net = SVC_NET(rqstp); 942 struct nfsd_file *nf, *new; 943 struct inode *inode; 944 unsigned int hashval; 945 bool retry = true; 946 947 /* FIXME: skip this if fh_dentry is already set? */ 948 status = fh_verify(rqstp, fhp, S_IFREG, 949 may_flags|NFSD_MAY_OWNER_OVERRIDE); 950 if (status != nfs_ok) 951 return status; 952 953 inode = d_inode(fhp->fh_dentry); 954 hashval = (unsigned int)hash_long(inode->i_ino, NFSD_FILE_HASH_BITS); 955retry: 956 rcu_read_lock(); 957 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 958 rcu_read_unlock(); 959 if (nf) 960 goto wait_for_construction; 961 962 new = nfsd_file_alloc(inode, may_flags, hashval, net); 963 if (!new) { 964 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, 965 NULL, nfserr_jukebox); 966 return nfserr_jukebox; 967 } 968 969 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 970 nf = nfsd_file_find_locked(inode, may_flags, hashval, net); 971 if (nf == NULL) 972 goto open_file; 973 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 974 nfsd_file_slab_free(&new->nf_rcu); 975 976wait_for_construction: 977 wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); 978 979 /* Did construction of this file fail? */ 980 if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { 981 if (!retry) { 982 status = nfserr_jukebox; 983 goto out; 984 } 985 retry = false; 986 nfsd_file_put_noref(nf); 987 goto retry; 988 } 989 990 this_cpu_inc(nfsd_file_cache_hits); 991 992 if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { 993 bool write = (may_flags & NFSD_MAY_WRITE); 994 995 if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || 996 (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { 997 status = nfserrno(nfsd_open_break_lease( 998 file_inode(nf->nf_file), may_flags)); 999 if (status == nfs_ok) { 1000 clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); 1001 if (write) 1002 clear_bit(NFSD_FILE_BREAK_WRITE, 1003 &nf->nf_flags); 1004 } 1005 } 1006 } 1007out: 1008 if (status == nfs_ok) { 1009 *pnf = nf; 1010 } else { 1011 nfsd_file_put(nf); 1012 nf = NULL; 1013 } 1014 1015 trace_nfsd_file_acquire(rqstp, hashval, inode, may_flags, nf, status); 1016 return status; 1017open_file: 1018 nf = new; 1019 /* Take reference for the hashtable */ 1020 refcount_inc(&nf->nf_ref); 1021 __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); 1022 __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); 1023 list_lru_add(&nfsd_file_lru, &nf->nf_lru); 1024 hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); 1025 ++nfsd_file_hashtbl[hashval].nfb_count; 1026 nfsd_file_hashtbl[hashval].nfb_maxcount = max(nfsd_file_hashtbl[hashval].nfb_maxcount, 1027 nfsd_file_hashtbl[hashval].nfb_count); 1028 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1029 if (atomic_long_inc_return(&nfsd_filecache_count) >= NFSD_FILE_LRU_THRESHOLD) 1030 nfsd_file_gc(); 1031 1032 nf->nf_mark = nfsd_file_mark_find_or_create(nf); 1033 if (nf->nf_mark) 1034 status = nfsd_open_verified(rqstp, fhp, S_IFREG, 1035 may_flags, &nf->nf_file); 1036 else 1037 status = nfserr_jukebox; 1038 /* 1039 * If construction failed, or we raced with a call to unlink() 1040 * then unhash. 1041 */ 1042 if (status != nfs_ok || inode->i_nlink == 0) { 1043 bool do_free; 1044 spin_lock(&nfsd_file_hashtbl[hashval].nfb_lock); 1045 do_free = nfsd_file_unhash(nf); 1046 spin_unlock(&nfsd_file_hashtbl[hashval].nfb_lock); 1047 if (do_free) 1048 nfsd_file_put_noref(nf); 1049 } 1050 clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags); 1051 smp_mb__after_atomic(); 1052 wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING); 1053 goto out; 1054} 1055 1056/* 1057 * Note that fields may be added, removed or reordered in the future. Programs 1058 * scraping this file for info should test the labels to ensure they're 1059 * getting the correct field. 1060 */ 1061static int nfsd_file_cache_stats_show(struct seq_file *m, void *v) 1062{ 1063 unsigned int i, count = 0, longest = 0; 1064 unsigned long hits = 0; 1065 1066 /* 1067 * No need for spinlocks here since we're not terribly interested in 1068 * accuracy. We do take the nfsd_mutex simply to ensure that we 1069 * don't end up racing with server shutdown 1070 */ 1071 mutex_lock(&nfsd_mutex); 1072 if (nfsd_file_hashtbl) { 1073 for (i = 0; i < NFSD_FILE_HASH_SIZE; i++) { 1074 count += nfsd_file_hashtbl[i].nfb_count; 1075 longest = max(longest, nfsd_file_hashtbl[i].nfb_count); 1076 } 1077 } 1078 mutex_unlock(&nfsd_mutex); 1079 1080 for_each_possible_cpu(i) 1081 hits += per_cpu(nfsd_file_cache_hits, i); 1082 1083 seq_printf(m, "total entries: %u\n", count); 1084 seq_printf(m, "longest chain: %u\n", longest); 1085 seq_printf(m, "cache hits: %lu\n", hits); 1086 return 0; 1087} 1088 1089int nfsd_file_cache_stats_open(struct inode *inode, struct file *file) 1090{ 1091 return single_open(file, nfsd_file_cache_stats_show, NULL); 1092} 1093