1/* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7*/ 8 9#include "fuse_i.h" 10 11#include <linux/pagemap.h> 12#include <linux/slab.h> 13#include <linux/file.h> 14#include <linux/seq_file.h> 15#include <linux/init.h> 16#include <linux/module.h> 17#include <linux/moduleparam.h> 18#include <linux/fs_context.h> 19#include <linux/fs_parser.h> 20#include <linux/statfs.h> 21#include <linux/random.h> 22#include <linux/sched.h> 23#include <linux/exportfs.h> 24#include <linux/posix_acl.h> 25#include <linux/pid_namespace.h> 26 27MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 28MODULE_DESCRIPTION("Filesystem in Userspace"); 29MODULE_LICENSE("GPL"); 30 31static struct kmem_cache *fuse_inode_cachep; 32struct list_head fuse_conn_list; 33DEFINE_MUTEX(fuse_mutex); 34 35static int set_global_limit(const char *val, const struct kernel_param *kp); 36 37unsigned max_user_bgreq; 38module_param_call(max_user_bgreq, set_global_limit, param_get_uint, 39 &max_user_bgreq, 0644); 40__MODULE_PARM_TYPE(max_user_bgreq, "uint"); 41MODULE_PARM_DESC(max_user_bgreq, 42 "Global limit for the maximum number of backgrounded requests an " 43 "unprivileged user can set"); 44 45unsigned max_user_congthresh; 46module_param_call(max_user_congthresh, set_global_limit, param_get_uint, 47 &max_user_congthresh, 0644); 48__MODULE_PARM_TYPE(max_user_congthresh, "uint"); 49MODULE_PARM_DESC(max_user_congthresh, 50 "Global limit for the maximum congestion threshold an " 51 "unprivileged user can set"); 52 53#define FUSE_SUPER_MAGIC 0x65735546 54 55#define FUSE_DEFAULT_BLKSIZE 512 56 57/** Maximum number of outstanding background requests */ 58#define FUSE_DEFAULT_MAX_BACKGROUND 12 59 60/** Congestion starts at 75% of maximum */ 61#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) 62 63#ifdef CONFIG_BLOCK 64static struct file_system_type fuseblk_fs_type; 65#endif 66 67struct fuse_forget_link *fuse_alloc_forget(void) 68{ 69 return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT); 70} 71 72static struct inode *fuse_alloc_inode(struct super_block *sb) 73{ 74 struct fuse_inode *fi; 75 76 fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL); 77 if (!fi) 78 return NULL; 79 80 fi->i_time = 0; 81 fi->inval_mask = 0; 82 fi->nodeid = 0; 83 fi->nlookup = 0; 84 fi->attr_version = 0; 85 fi->orig_ino = 0; 86 fi->state = 0; 87 mutex_init(&fi->mutex); 88 init_rwsem(&fi->i_mmap_sem); 89 spin_lock_init(&fi->lock); 90 fi->forget = fuse_alloc_forget(); 91 if (!fi->forget) 92 goto out_free; 93 94 if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi)) 95 goto out_free_forget; 96 97 return &fi->inode; 98 99out_free_forget: 100 kfree(fi->forget); 101out_free: 102 kmem_cache_free(fuse_inode_cachep, fi); 103 return NULL; 104} 105 106static void fuse_free_inode(struct inode *inode) 107{ 108 struct fuse_inode *fi = get_fuse_inode(inode); 109 110 mutex_destroy(&fi->mutex); 111 kfree(fi->forget); 112#ifdef CONFIG_FUSE_DAX 113 kfree(fi->dax); 114#endif 115 kmem_cache_free(fuse_inode_cachep, fi); 116} 117 118static void fuse_evict_inode(struct inode *inode) 119{ 120 struct fuse_inode *fi = get_fuse_inode(inode); 121 122 /* Will write inode on close/munmap and in all other dirtiers */ 123 WARN_ON(inode->i_state & I_DIRTY_INODE); 124 125 truncate_inode_pages_final(&inode->i_data); 126 clear_inode(inode); 127 if (inode->i_sb->s_flags & SB_ACTIVE) { 128 struct fuse_conn *fc = get_fuse_conn(inode); 129 130 if (FUSE_IS_DAX(inode)) 131 fuse_dax_inode_cleanup(inode); 132 if (fi->nlookup) { 133 fuse_queue_forget(fc, fi->forget, fi->nodeid, 134 fi->nlookup); 135 fi->forget = NULL; 136 } 137 } 138 if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) { 139 WARN_ON(!list_empty(&fi->write_files)); 140 WARN_ON(!list_empty(&fi->queued_writes)); 141 } 142} 143 144static int fuse_reconfigure(struct fs_context *fc) 145{ 146 struct super_block *sb = fc->root->d_sb; 147 148 sync_filesystem(sb); 149 if (fc->sb_flags & SB_MANDLOCK) 150 return -EINVAL; 151 152 return 0; 153} 154 155/* 156 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down 157 * so that it will fit. 158 */ 159static ino_t fuse_squash_ino(u64 ino64) 160{ 161 ino_t ino = (ino_t) ino64; 162 if (sizeof(ino_t) < sizeof(u64)) 163 ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8; 164 return ino; 165} 166 167void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, 168 u64 attr_valid) 169{ 170 struct fuse_conn *fc = get_fuse_conn(inode); 171 struct fuse_inode *fi = get_fuse_inode(inode); 172 173 lockdep_assert_held(&fi->lock); 174 175 fi->attr_version = atomic64_inc_return(&fc->attr_version); 176 fi->i_time = attr_valid; 177 WRITE_ONCE(fi->inval_mask, 0); 178 179 inode->i_ino = fuse_squash_ino(attr->ino); 180 inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); 181 set_nlink(inode, attr->nlink); 182 inode->i_uid = make_kuid(fc->user_ns, attr->uid); 183 inode->i_gid = make_kgid(fc->user_ns, attr->gid); 184 inode->i_blocks = attr->blocks; 185 186 /* Sanitize nsecs */ 187 attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1); 188 attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); 189 attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); 190 191 inode->i_atime.tv_sec = attr->atime; 192 inode->i_atime.tv_nsec = attr->atimensec; 193 /* mtime from server may be stale due to local buffered write */ 194 if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { 195 inode->i_mtime.tv_sec = attr->mtime; 196 inode->i_mtime.tv_nsec = attr->mtimensec; 197 inode->i_ctime.tv_sec = attr->ctime; 198 inode->i_ctime.tv_nsec = attr->ctimensec; 199 } 200 201 if (attr->blksize != 0) 202 inode->i_blkbits = ilog2(attr->blksize); 203 else 204 inode->i_blkbits = inode->i_sb->s_blocksize_bits; 205 206 /* 207 * Don't set the sticky bit in i_mode, unless we want the VFS 208 * to check permissions. This prevents failures due to the 209 * check in may_delete(). 210 */ 211 fi->orig_i_mode = inode->i_mode; 212 if (!fc->default_permissions) 213 inode->i_mode &= ~S_ISVTX; 214 215 fi->orig_ino = attr->ino; 216} 217 218void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, 219 u64 attr_valid, u64 attr_version) 220{ 221 struct fuse_conn *fc = get_fuse_conn(inode); 222 struct fuse_inode *fi = get_fuse_inode(inode); 223 bool is_wb = fc->writeback_cache; 224 loff_t oldsize; 225 struct timespec64 old_mtime; 226 227 spin_lock(&fi->lock); 228 if ((attr_version != 0 && fi->attr_version > attr_version) || 229 test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 230 spin_unlock(&fi->lock); 231 return; 232 } 233 234 old_mtime = inode->i_mtime; 235 fuse_change_attributes_common(inode, attr, attr_valid); 236 237 oldsize = inode->i_size; 238 /* 239 * In case of writeback_cache enabled, the cached writes beyond EOF 240 * extend local i_size without keeping userspace server in sync. So, 241 * attr->size coming from server can be stale. We cannot trust it. 242 */ 243 if (!is_wb || !S_ISREG(inode->i_mode)) 244 i_size_write(inode, attr->size); 245 spin_unlock(&fi->lock); 246 247 if (!is_wb && S_ISREG(inode->i_mode)) { 248 bool inval = false; 249 250 if (oldsize != attr->size) { 251 truncate_pagecache(inode, attr->size); 252 if (!fc->explicit_inval_data) 253 inval = true; 254 } else if (fc->auto_inval_data) { 255 struct timespec64 new_mtime = { 256 .tv_sec = attr->mtime, 257 .tv_nsec = attr->mtimensec, 258 }; 259 260 /* 261 * Auto inval mode also checks and invalidates if mtime 262 * has changed. 263 */ 264 if (!timespec64_equal(&old_mtime, &new_mtime)) 265 inval = true; 266 } 267 268 if (inval) 269 invalidate_inode_pages2(inode->i_mapping); 270 } 271} 272 273static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) 274{ 275 inode->i_mode = attr->mode & S_IFMT; 276 inode->i_size = attr->size; 277 inode->i_mtime.tv_sec = attr->mtime; 278 inode->i_mtime.tv_nsec = attr->mtimensec; 279 inode->i_ctime.tv_sec = attr->ctime; 280 inode->i_ctime.tv_nsec = attr->ctimensec; 281 if (S_ISREG(inode->i_mode)) { 282 fuse_init_common(inode); 283 fuse_init_file_inode(inode); 284 } else if (S_ISDIR(inode->i_mode)) 285 fuse_init_dir(inode); 286 else if (S_ISLNK(inode->i_mode)) 287 fuse_init_symlink(inode); 288 else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || 289 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { 290 fuse_init_common(inode); 291 init_special_inode(inode, inode->i_mode, 292 new_decode_dev(attr->rdev)); 293 } else 294 BUG(); 295} 296 297static int fuse_inode_eq(struct inode *inode, void *_nodeidp) 298{ 299 u64 nodeid = *(u64 *) _nodeidp; 300 if (get_node_id(inode) == nodeid) 301 return 1; 302 else 303 return 0; 304} 305 306static int fuse_inode_set(struct inode *inode, void *_nodeidp) 307{ 308 u64 nodeid = *(u64 *) _nodeidp; 309 get_fuse_inode(inode)->nodeid = nodeid; 310 return 0; 311} 312 313struct inode *fuse_iget(struct super_block *sb, u64 nodeid, 314 int generation, struct fuse_attr *attr, 315 u64 attr_valid, u64 attr_version) 316{ 317 struct inode *inode; 318 struct fuse_inode *fi; 319 struct fuse_conn *fc = get_fuse_conn_super(sb); 320 321 /* 322 * Auto mount points get their node id from the submount root, which is 323 * not a unique identifier within this filesystem. 324 * 325 * To avoid conflicts, do not place submount points into the inode hash 326 * table. 327 */ 328 if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) && 329 S_ISDIR(attr->mode)) { 330 inode = new_inode(sb); 331 if (!inode) 332 return NULL; 333 334 fuse_init_inode(inode, attr); 335 get_fuse_inode(inode)->nodeid = nodeid; 336 inode->i_flags |= S_AUTOMOUNT; 337 goto done; 338 } 339 340retry: 341 inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); 342 if (!inode) 343 return NULL; 344 345 if ((inode->i_state & I_NEW)) { 346 inode->i_flags |= S_NOATIME; 347 if (!fc->writeback_cache || !S_ISREG(attr->mode)) 348 inode->i_flags |= S_NOCMTIME; 349 inode->i_generation = generation; 350 fuse_init_inode(inode, attr); 351 unlock_new_inode(inode); 352 } else if (fuse_stale_inode(inode, generation, attr)) { 353 /* nodeid was reused, any I/O on the old inode should fail */ 354 fuse_make_bad(inode); 355 iput(inode); 356 goto retry; 357 } 358done: 359 fi = get_fuse_inode(inode); 360 spin_lock(&fi->lock); 361 fi->nlookup++; 362 spin_unlock(&fi->lock); 363 fuse_change_attributes(inode, attr, attr_valid, attr_version); 364 365 return inode; 366} 367 368struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, 369 struct fuse_mount **fm) 370{ 371 struct fuse_mount *fm_iter; 372 struct inode *inode; 373 374 WARN_ON(!rwsem_is_locked(&fc->killsb)); 375 list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { 376 if (!fm_iter->sb) 377 continue; 378 379 inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); 380 if (inode) { 381 if (fm) 382 *fm = fm_iter; 383 return inode; 384 } 385 } 386 387 return NULL; 388} 389 390int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, 391 loff_t offset, loff_t len) 392{ 393 struct fuse_inode *fi; 394 struct inode *inode; 395 pgoff_t pg_start; 396 pgoff_t pg_end; 397 398 inode = fuse_ilookup(fc, nodeid, NULL); 399 if (!inode) 400 return -ENOENT; 401 402 fi = get_fuse_inode(inode); 403 spin_lock(&fi->lock); 404 fi->attr_version = atomic64_inc_return(&fc->attr_version); 405 spin_unlock(&fi->lock); 406 407 fuse_invalidate_attr(inode); 408 forget_all_cached_acls(inode); 409 if (offset >= 0) { 410 pg_start = offset >> PAGE_SHIFT; 411 if (len <= 0) 412 pg_end = -1; 413 else 414 pg_end = (offset + len - 1) >> PAGE_SHIFT; 415 invalidate_inode_pages2_range(inode->i_mapping, 416 pg_start, pg_end); 417 } 418 iput(inode); 419 return 0; 420} 421 422bool fuse_lock_inode(struct inode *inode) 423{ 424 bool locked = false; 425 426 if (!get_fuse_conn(inode)->parallel_dirops) { 427 mutex_lock(&get_fuse_inode(inode)->mutex); 428 locked = true; 429 } 430 431 return locked; 432} 433 434void fuse_unlock_inode(struct inode *inode, bool locked) 435{ 436 if (locked) 437 mutex_unlock(&get_fuse_inode(inode)->mutex); 438} 439 440static void fuse_umount_begin(struct super_block *sb) 441{ 442 struct fuse_conn *fc = get_fuse_conn_super(sb); 443 444 if (!fc->no_force_umount) 445 fuse_abort_conn(fc); 446} 447 448static void fuse_send_destroy(struct fuse_mount *fm) 449{ 450 if (fm->fc->conn_init) { 451 FUSE_ARGS(args); 452 453 args.opcode = FUSE_DESTROY; 454 args.force = true; 455 args.nocreds = true; 456 fuse_simple_request(fm, &args); 457 } 458} 459 460static void fuse_put_super(struct super_block *sb) 461{ 462 struct fuse_mount *fm = get_fuse_mount_super(sb); 463 464 fuse_mount_put(fm); 465} 466 467static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) 468{ 469 stbuf->f_type = FUSE_SUPER_MAGIC; 470 stbuf->f_bsize = attr->bsize; 471 stbuf->f_frsize = attr->frsize; 472 stbuf->f_blocks = attr->blocks; 473 stbuf->f_bfree = attr->bfree; 474 stbuf->f_bavail = attr->bavail; 475 stbuf->f_files = attr->files; 476 stbuf->f_ffree = attr->ffree; 477 stbuf->f_namelen = attr->namelen; 478 /* fsid is left zero */ 479} 480 481static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) 482{ 483 struct super_block *sb = dentry->d_sb; 484 struct fuse_mount *fm = get_fuse_mount_super(sb); 485 FUSE_ARGS(args); 486 struct fuse_statfs_out outarg; 487 int err; 488 489 if (!fuse_allow_current_process(fm->fc)) { 490 buf->f_type = FUSE_SUPER_MAGIC; 491 return 0; 492 } 493 494 memset(&outarg, 0, sizeof(outarg)); 495 args.in_numargs = 0; 496 args.opcode = FUSE_STATFS; 497 args.nodeid = get_node_id(d_inode(dentry)); 498 args.out_numargs = 1; 499 args.out_args[0].size = sizeof(outarg); 500 args.out_args[0].value = &outarg; 501 err = fuse_simple_request(fm, &args); 502 if (!err) 503 convert_fuse_statfs(buf, &outarg.st); 504 return err; 505} 506 507enum { 508 OPT_SOURCE, 509 OPT_SUBTYPE, 510 OPT_FD, 511 OPT_ROOTMODE, 512 OPT_USER_ID, 513 OPT_GROUP_ID, 514 OPT_DEFAULT_PERMISSIONS, 515 OPT_ALLOW_OTHER, 516 OPT_MAX_READ, 517 OPT_BLKSIZE, 518 OPT_ERR 519}; 520 521static const struct fs_parameter_spec fuse_fs_parameters[] = { 522 fsparam_string ("source", OPT_SOURCE), 523 fsparam_u32 ("fd", OPT_FD), 524 fsparam_u32oct ("rootmode", OPT_ROOTMODE), 525 fsparam_u32 ("user_id", OPT_USER_ID), 526 fsparam_u32 ("group_id", OPT_GROUP_ID), 527 fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS), 528 fsparam_flag ("allow_other", OPT_ALLOW_OTHER), 529 fsparam_u32 ("max_read", OPT_MAX_READ), 530 fsparam_u32 ("blksize", OPT_BLKSIZE), 531 fsparam_string ("subtype", OPT_SUBTYPE), 532 {} 533}; 534 535static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) 536{ 537 struct fs_parse_result result; 538 struct fuse_fs_context *ctx = fc->fs_private; 539 int opt; 540 541 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { 542 /* 543 * Ignore options coming from mount(MS_REMOUNT) for backward 544 * compatibility. 545 */ 546 if (fc->oldapi) 547 return 0; 548 549 return invalfc(fc, "No changes allowed in reconfigure"); 550 } 551 552 opt = fs_parse(fc, fuse_fs_parameters, param, &result); 553 if (opt < 0) 554 return opt; 555 556 switch (opt) { 557 case OPT_SOURCE: 558 if (fc->source) 559 return invalfc(fc, "Multiple sources specified"); 560 fc->source = param->string; 561 param->string = NULL; 562 break; 563 564 case OPT_SUBTYPE: 565 if (ctx->subtype) 566 return invalfc(fc, "Multiple subtypes specified"); 567 ctx->subtype = param->string; 568 param->string = NULL; 569 return 0; 570 571 case OPT_FD: 572 ctx->fd = result.uint_32; 573 ctx->fd_present = true; 574 break; 575 576 case OPT_ROOTMODE: 577 if (!fuse_valid_type(result.uint_32)) 578 return invalfc(fc, "Invalid rootmode"); 579 ctx->rootmode = result.uint_32; 580 ctx->rootmode_present = true; 581 break; 582 583 case OPT_USER_ID: 584 ctx->user_id = make_kuid(fc->user_ns, result.uint_32); 585 if (!uid_valid(ctx->user_id)) 586 return invalfc(fc, "Invalid user_id"); 587 ctx->user_id_present = true; 588 break; 589 590 case OPT_GROUP_ID: 591 ctx->group_id = make_kgid(fc->user_ns, result.uint_32); 592 if (!gid_valid(ctx->group_id)) 593 return invalfc(fc, "Invalid group_id"); 594 ctx->group_id_present = true; 595 break; 596 597 case OPT_DEFAULT_PERMISSIONS: 598 ctx->default_permissions = true; 599 break; 600 601 case OPT_ALLOW_OTHER: 602 ctx->allow_other = true; 603 break; 604 605 case OPT_MAX_READ: 606 ctx->max_read = result.uint_32; 607 break; 608 609 case OPT_BLKSIZE: 610 if (!ctx->is_bdev) 611 return invalfc(fc, "blksize only supported for fuseblk"); 612 ctx->blksize = result.uint_32; 613 break; 614 615 default: 616 return -EINVAL; 617 } 618 619 return 0; 620} 621 622static void fuse_free_fc(struct fs_context *fc) 623{ 624 struct fuse_fs_context *ctx = fc->fs_private; 625 626 if (ctx) { 627 kfree(ctx->subtype); 628 kfree(ctx); 629 } 630} 631 632static int fuse_show_options(struct seq_file *m, struct dentry *root) 633{ 634 struct super_block *sb = root->d_sb; 635 struct fuse_conn *fc = get_fuse_conn_super(sb); 636 637 if (fc->legacy_opts_show) { 638 seq_printf(m, ",user_id=%u", 639 from_kuid_munged(fc->user_ns, fc->user_id)); 640 seq_printf(m, ",group_id=%u", 641 from_kgid_munged(fc->user_ns, fc->group_id)); 642 if (fc->default_permissions) 643 seq_puts(m, ",default_permissions"); 644 if (fc->allow_other) 645 seq_puts(m, ",allow_other"); 646 if (fc->max_read != ~0) 647 seq_printf(m, ",max_read=%u", fc->max_read); 648 if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE) 649 seq_printf(m, ",blksize=%lu", sb->s_blocksize); 650 } 651#ifdef CONFIG_FUSE_DAX 652 if (fc->dax) 653 seq_puts(m, ",dax"); 654#endif 655 656 return 0; 657} 658 659static void fuse_iqueue_init(struct fuse_iqueue *fiq, 660 const struct fuse_iqueue_ops *ops, 661 void *priv) 662{ 663 memset(fiq, 0, sizeof(struct fuse_iqueue)); 664 spin_lock_init(&fiq->lock); 665 init_waitqueue_head(&fiq->waitq); 666 INIT_LIST_HEAD(&fiq->pending); 667 INIT_LIST_HEAD(&fiq->interrupts); 668 fiq->forget_list_tail = &fiq->forget_list_head; 669 fiq->connected = 1; 670 fiq->ops = ops; 671 fiq->priv = priv; 672} 673 674static void fuse_pqueue_init(struct fuse_pqueue *fpq) 675{ 676 unsigned int i; 677 678 spin_lock_init(&fpq->lock); 679 for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) 680 INIT_LIST_HEAD(&fpq->processing[i]); 681 INIT_LIST_HEAD(&fpq->io); 682 fpq->connected = 1; 683} 684 685void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, 686 struct user_namespace *user_ns, 687 const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) 688{ 689 memset(fc, 0, sizeof(*fc)); 690 spin_lock_init(&fc->lock); 691 spin_lock_init(&fc->bg_lock); 692 init_rwsem(&fc->killsb); 693 refcount_set(&fc->count, 1); 694 atomic_set(&fc->dev_count, 1); 695 init_waitqueue_head(&fc->blocked_waitq); 696 fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); 697 INIT_LIST_HEAD(&fc->bg_queue); 698 INIT_LIST_HEAD(&fc->entry); 699 INIT_LIST_HEAD(&fc->devices); 700 atomic_set(&fc->num_waiting, 0); 701 fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; 702 fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; 703 atomic64_set(&fc->khctr, 0); 704 fc->polled_files = RB_ROOT; 705 fc->blocked = 0; 706 fc->initialized = 0; 707 fc->connected = 1; 708 atomic64_set(&fc->attr_version, 1); 709 get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); 710 fc->pid_ns = get_pid_ns(task_active_pid_ns(current)); 711 fc->user_ns = get_user_ns(user_ns); 712 fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; 713 fc->max_pages_limit = FUSE_MAX_MAX_PAGES; 714 715 INIT_LIST_HEAD(&fc->mounts); 716 list_add(&fm->fc_entry, &fc->mounts); 717 fm->fc = fc; 718 refcount_set(&fm->count, 1); 719} 720EXPORT_SYMBOL_GPL(fuse_conn_init); 721 722void fuse_conn_put(struct fuse_conn *fc) 723{ 724 if (refcount_dec_and_test(&fc->count)) { 725 struct fuse_iqueue *fiq = &fc->iq; 726 727 if (IS_ENABLED(CONFIG_FUSE_DAX)) 728 fuse_dax_conn_free(fc); 729 if (fiq->ops->release) 730 fiq->ops->release(fiq); 731 put_pid_ns(fc->pid_ns); 732 put_user_ns(fc->user_ns); 733 fc->release(fc); 734 } 735} 736EXPORT_SYMBOL_GPL(fuse_conn_put); 737 738struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) 739{ 740 refcount_inc(&fc->count); 741 return fc; 742} 743EXPORT_SYMBOL_GPL(fuse_conn_get); 744 745void fuse_mount_put(struct fuse_mount *fm) 746{ 747 if (refcount_dec_and_test(&fm->count)) { 748 if (fm->fc) 749 fuse_conn_put(fm->fc); 750 kfree(fm); 751 } 752} 753EXPORT_SYMBOL_GPL(fuse_mount_put); 754 755struct fuse_mount *fuse_mount_get(struct fuse_mount *fm) 756{ 757 refcount_inc(&fm->count); 758 return fm; 759} 760EXPORT_SYMBOL_GPL(fuse_mount_get); 761 762static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) 763{ 764 struct fuse_attr attr; 765 memset(&attr, 0, sizeof(attr)); 766 767 attr.mode = mode; 768 attr.ino = FUSE_ROOT_ID; 769 attr.nlink = 1; 770 return fuse_iget(sb, 1, 0, &attr, 0, 0); 771} 772 773struct fuse_inode_handle { 774 u64 nodeid; 775 u32 generation; 776}; 777 778static struct dentry *fuse_get_dentry(struct super_block *sb, 779 struct fuse_inode_handle *handle) 780{ 781 struct fuse_conn *fc = get_fuse_conn_super(sb); 782 struct inode *inode; 783 struct dentry *entry; 784 int err = -ESTALE; 785 786 if (handle->nodeid == 0) 787 goto out_err; 788 789 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); 790 if (!inode) { 791 struct fuse_entry_out outarg; 792 const struct qstr name = QSTR_INIT(".", 1); 793 794 if (!fc->export_support) 795 goto out_err; 796 797 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, 798 &inode); 799 if (err && err != -ENOENT) 800 goto out_err; 801 if (err || !inode) { 802 err = -ESTALE; 803 goto out_err; 804 } 805 err = -EIO; 806 if (get_node_id(inode) != handle->nodeid) 807 goto out_iput; 808 } 809 err = -ESTALE; 810 if (inode->i_generation != handle->generation) 811 goto out_iput; 812 813 entry = d_obtain_alias(inode); 814 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) 815 fuse_invalidate_entry_cache(entry); 816 817 return entry; 818 819 out_iput: 820 iput(inode); 821 out_err: 822 return ERR_PTR(err); 823} 824 825static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, 826 struct inode *parent) 827{ 828 int len = parent ? 6 : 3; 829 u64 nodeid; 830 u32 generation; 831 832 if (*max_len < len) { 833 *max_len = len; 834 return FILEID_INVALID; 835 } 836 837 nodeid = get_fuse_inode(inode)->nodeid; 838 generation = inode->i_generation; 839 840 fh[0] = (u32)(nodeid >> 32); 841 fh[1] = (u32)(nodeid & 0xffffffff); 842 fh[2] = generation; 843 844 if (parent) { 845 nodeid = get_fuse_inode(parent)->nodeid; 846 generation = parent->i_generation; 847 848 fh[3] = (u32)(nodeid >> 32); 849 fh[4] = (u32)(nodeid & 0xffffffff); 850 fh[5] = generation; 851 } 852 853 *max_len = len; 854 return parent ? 0x82 : 0x81; 855} 856 857static struct dentry *fuse_fh_to_dentry(struct super_block *sb, 858 struct fid *fid, int fh_len, int fh_type) 859{ 860 struct fuse_inode_handle handle; 861 862 if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3) 863 return NULL; 864 865 handle.nodeid = (u64) fid->raw[0] << 32; 866 handle.nodeid |= (u64) fid->raw[1]; 867 handle.generation = fid->raw[2]; 868 return fuse_get_dentry(sb, &handle); 869} 870 871static struct dentry *fuse_fh_to_parent(struct super_block *sb, 872 struct fid *fid, int fh_len, int fh_type) 873{ 874 struct fuse_inode_handle parent; 875 876 if (fh_type != 0x82 || fh_len < 6) 877 return NULL; 878 879 parent.nodeid = (u64) fid->raw[3] << 32; 880 parent.nodeid |= (u64) fid->raw[4]; 881 parent.generation = fid->raw[5]; 882 return fuse_get_dentry(sb, &parent); 883} 884 885static struct dentry *fuse_get_parent(struct dentry *child) 886{ 887 struct inode *child_inode = d_inode(child); 888 struct fuse_conn *fc = get_fuse_conn(child_inode); 889 struct inode *inode; 890 struct dentry *parent; 891 struct fuse_entry_out outarg; 892 const struct qstr name = QSTR_INIT("..", 2); 893 int err; 894 895 if (!fc->export_support) 896 return ERR_PTR(-ESTALE); 897 898 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 899 &name, &outarg, &inode); 900 if (err) { 901 if (err == -ENOENT) 902 return ERR_PTR(-ESTALE); 903 return ERR_PTR(err); 904 } 905 906 parent = d_obtain_alias(inode); 907 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) 908 fuse_invalidate_entry_cache(parent); 909 910 return parent; 911} 912 913static const struct export_operations fuse_export_operations = { 914 .fh_to_dentry = fuse_fh_to_dentry, 915 .fh_to_parent = fuse_fh_to_parent, 916 .encode_fh = fuse_encode_fh, 917 .get_parent = fuse_get_parent, 918}; 919 920static const struct super_operations fuse_super_operations = { 921 .alloc_inode = fuse_alloc_inode, 922 .free_inode = fuse_free_inode, 923 .evict_inode = fuse_evict_inode, 924 .write_inode = fuse_write_inode, 925 .drop_inode = generic_delete_inode, 926 .put_super = fuse_put_super, 927 .umount_begin = fuse_umount_begin, 928 .statfs = fuse_statfs, 929 .show_options = fuse_show_options, 930}; 931 932static void sanitize_global_limit(unsigned *limit) 933{ 934 /* 935 * The default maximum number of async requests is calculated to consume 936 * 1/2^13 of the total memory, assuming 392 bytes per request. 937 */ 938 if (*limit == 0) 939 *limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392; 940 941 if (*limit >= 1 << 16) 942 *limit = (1 << 16) - 1; 943} 944 945static int set_global_limit(const char *val, const struct kernel_param *kp) 946{ 947 int rv; 948 949 rv = param_set_uint(val, kp); 950 if (rv) 951 return rv; 952 953 sanitize_global_limit((unsigned *)kp->arg); 954 955 return 0; 956} 957 958static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) 959{ 960 int cap_sys_admin = capable(CAP_SYS_ADMIN); 961 962 if (arg->minor < 13) 963 return; 964 965 sanitize_global_limit(&max_user_bgreq); 966 sanitize_global_limit(&max_user_congthresh); 967 968 spin_lock(&fc->bg_lock); 969 if (arg->max_background) { 970 fc->max_background = arg->max_background; 971 972 if (!cap_sys_admin && fc->max_background > max_user_bgreq) 973 fc->max_background = max_user_bgreq; 974 } 975 if (arg->congestion_threshold) { 976 fc->congestion_threshold = arg->congestion_threshold; 977 978 if (!cap_sys_admin && 979 fc->congestion_threshold > max_user_congthresh) 980 fc->congestion_threshold = max_user_congthresh; 981 } 982 spin_unlock(&fc->bg_lock); 983} 984 985struct fuse_init_args { 986 struct fuse_args args; 987 struct fuse_init_in in; 988 struct fuse_init_out out; 989}; 990 991static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, 992 int error) 993{ 994 struct fuse_conn *fc = fm->fc; 995 struct fuse_init_args *ia = container_of(args, typeof(*ia), args); 996 struct fuse_init_out *arg = &ia->out; 997 bool ok = true; 998 999 if (error || arg->major != FUSE_KERNEL_VERSION) 1000 ok = false; 1001 else { 1002 unsigned long ra_pages; 1003 1004 process_init_limits(fc, arg); 1005 1006 if (arg->minor >= 6) { 1007 ra_pages = arg->max_readahead / PAGE_SIZE; 1008 if (arg->flags & FUSE_ASYNC_READ) 1009 fc->async_read = 1; 1010 if (!(arg->flags & FUSE_POSIX_LOCKS)) 1011 fc->no_lock = 1; 1012 if (arg->minor >= 17) { 1013 if (!(arg->flags & FUSE_FLOCK_LOCKS)) 1014 fc->no_flock = 1; 1015 } else { 1016 if (!(arg->flags & FUSE_POSIX_LOCKS)) 1017 fc->no_flock = 1; 1018 } 1019 if (arg->flags & FUSE_ATOMIC_O_TRUNC) 1020 fc->atomic_o_trunc = 1; 1021 if (arg->minor >= 9) { 1022 /* LOOKUP has dependency on proto version */ 1023 if (arg->flags & FUSE_EXPORT_SUPPORT) 1024 fc->export_support = 1; 1025 } 1026 if (arg->flags & FUSE_BIG_WRITES) 1027 fc->big_writes = 1; 1028 if (arg->flags & FUSE_DONT_MASK) 1029 fc->dont_mask = 1; 1030 if (arg->flags & FUSE_AUTO_INVAL_DATA) 1031 fc->auto_inval_data = 1; 1032 else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA) 1033 fc->explicit_inval_data = 1; 1034 if (arg->flags & FUSE_DO_READDIRPLUS) { 1035 fc->do_readdirplus = 1; 1036 if (arg->flags & FUSE_READDIRPLUS_AUTO) 1037 fc->readdirplus_auto = 1; 1038 } 1039 if (arg->flags & FUSE_ASYNC_DIO) 1040 fc->async_dio = 1; 1041 if (arg->flags & FUSE_WRITEBACK_CACHE) 1042 fc->writeback_cache = 1; 1043 if (arg->flags & FUSE_PARALLEL_DIROPS) 1044 fc->parallel_dirops = 1; 1045 if (arg->flags & FUSE_HANDLE_KILLPRIV) 1046 fc->handle_killpriv = 1; 1047 if (arg->time_gran && arg->time_gran <= 1000000000) 1048 fm->sb->s_time_gran = arg->time_gran; 1049 if ((arg->flags & FUSE_POSIX_ACL)) { 1050 fc->default_permissions = 1; 1051 fc->posix_acl = 1; 1052 fm->sb->s_xattr = fuse_acl_xattr_handlers; 1053 } 1054 if (arg->flags & FUSE_CACHE_SYMLINKS) 1055 fc->cache_symlinks = 1; 1056 if (arg->flags & FUSE_ABORT_ERROR) 1057 fc->abort_err = 1; 1058 if (arg->flags & FUSE_MAX_PAGES) { 1059 fc->max_pages = 1060 min_t(unsigned int, fc->max_pages_limit, 1061 max_t(unsigned int, arg->max_pages, 1)); 1062 } 1063 if (IS_ENABLED(CONFIG_FUSE_DAX) && 1064 arg->flags & FUSE_MAP_ALIGNMENT && 1065 !fuse_dax_check_alignment(fc, arg->map_alignment)) { 1066 ok = false; 1067 } 1068 } else { 1069 ra_pages = fc->max_read / PAGE_SIZE; 1070 fc->no_lock = 1; 1071 fc->no_flock = 1; 1072 } 1073 1074 fm->sb->s_bdi->ra_pages = 1075 min(fm->sb->s_bdi->ra_pages, ra_pages); 1076 fc->minor = arg->minor; 1077 fc->max_write = arg->minor < 5 ? 4096 : arg->max_write; 1078 fc->max_write = max_t(unsigned, 4096, fc->max_write); 1079 fc->conn_init = 1; 1080 } 1081 kfree(ia); 1082 1083 if (!ok) { 1084 fc->conn_init = 0; 1085 fc->conn_error = 1; 1086 } 1087 1088 fuse_set_initialized(fc); 1089 wake_up_all(&fc->blocked_waitq); 1090} 1091 1092void fuse_send_init(struct fuse_mount *fm) 1093{ 1094 struct fuse_init_args *ia; 1095 1096 ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL); 1097 1098 ia->in.major = FUSE_KERNEL_VERSION; 1099 ia->in.minor = FUSE_KERNEL_MINOR_VERSION; 1100 ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; 1101 ia->in.flags |= 1102 FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 1103 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | 1104 FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | 1105 FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | 1106 FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | 1107 FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT | 1108 FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL | 1109 FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS | 1110 FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA; 1111#ifdef CONFIG_FUSE_DAX 1112 if (fm->fc->dax) 1113 ia->in.flags |= FUSE_MAP_ALIGNMENT; 1114#endif 1115 if (fm->fc->auto_submounts) 1116 ia->in.flags |= FUSE_SUBMOUNTS; 1117 1118 ia->args.opcode = FUSE_INIT; 1119 ia->args.in_numargs = 1; 1120 ia->args.in_args[0].size = sizeof(ia->in); 1121 ia->args.in_args[0].value = &ia->in; 1122 ia->args.out_numargs = 1; 1123 /* Variable length argument used for backward compatibility 1124 with interface version < 7.5. Rest of init_out is zeroed 1125 by do_get_request(), so a short reply is not a problem */ 1126 ia->args.out_argvar = true; 1127 ia->args.out_args[0].size = sizeof(ia->out); 1128 ia->args.out_args[0].value = &ia->out; 1129 ia->args.force = true; 1130 ia->args.nocreds = true; 1131 ia->args.end = process_init_reply; 1132 1133 if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) 1134 process_init_reply(fm, &ia->args, -ENOTCONN); 1135} 1136EXPORT_SYMBOL_GPL(fuse_send_init); 1137 1138void fuse_free_conn(struct fuse_conn *fc) 1139{ 1140 WARN_ON(!list_empty(&fc->devices)); 1141 kfree_rcu(fc, rcu); 1142} 1143EXPORT_SYMBOL_GPL(fuse_free_conn); 1144 1145static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) 1146{ 1147 int err; 1148 char *suffix = ""; 1149 1150 if (sb->s_bdev) { 1151 suffix = "-fuseblk"; 1152 /* 1153 * sb->s_bdi points to blkdev's bdi however we want to redirect 1154 * it to our private bdi... 1155 */ 1156 bdi_put(sb->s_bdi); 1157 sb->s_bdi = &noop_backing_dev_info; 1158 } 1159 err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev), 1160 MINOR(fc->dev), suffix); 1161 if (err) 1162 return err; 1163 1164 /* fuse does it's own writeback accounting */ 1165 sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT; 1166 sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT; 1167 1168 /* 1169 * For a single fuse filesystem use max 1% of dirty + 1170 * writeback threshold. 1171 * 1172 * This gives about 1M of write buffer for memory maps on a 1173 * machine with 1G and 10% dirty_ratio, which should be more 1174 * than enough. 1175 * 1176 * Privileged users can raise it by writing to 1177 * 1178 * /sys/class/bdi/<bdi>/max_ratio 1179 */ 1180 bdi_set_max_ratio(sb->s_bdi, 1); 1181 1182 return 0; 1183} 1184 1185struct fuse_dev *fuse_dev_alloc(void) 1186{ 1187 struct fuse_dev *fud; 1188 struct list_head *pq; 1189 1190 fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL); 1191 if (!fud) 1192 return NULL; 1193 1194 pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL); 1195 if (!pq) { 1196 kfree(fud); 1197 return NULL; 1198 } 1199 1200 fud->pq.processing = pq; 1201 fuse_pqueue_init(&fud->pq); 1202 1203 return fud; 1204} 1205EXPORT_SYMBOL_GPL(fuse_dev_alloc); 1206 1207void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) 1208{ 1209 fud->fc = fuse_conn_get(fc); 1210 spin_lock(&fc->lock); 1211 list_add_tail(&fud->entry, &fc->devices); 1212 spin_unlock(&fc->lock); 1213} 1214EXPORT_SYMBOL_GPL(fuse_dev_install); 1215 1216struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) 1217{ 1218 struct fuse_dev *fud; 1219 1220 fud = fuse_dev_alloc(); 1221 if (!fud) 1222 return NULL; 1223 1224 fuse_dev_install(fud, fc); 1225 return fud; 1226} 1227EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); 1228 1229void fuse_dev_free(struct fuse_dev *fud) 1230{ 1231 struct fuse_conn *fc = fud->fc; 1232 1233 if (fc) { 1234 spin_lock(&fc->lock); 1235 list_del(&fud->entry); 1236 spin_unlock(&fc->lock); 1237 1238 fuse_conn_put(fc); 1239 } 1240 kfree(fud->pq.processing); 1241 kfree(fud); 1242} 1243EXPORT_SYMBOL_GPL(fuse_dev_free); 1244 1245static void fuse_fill_attr_from_inode(struct fuse_attr *attr, 1246 const struct fuse_inode *fi) 1247{ 1248 *attr = (struct fuse_attr){ 1249 .ino = fi->inode.i_ino, 1250 .size = fi->inode.i_size, 1251 .blocks = fi->inode.i_blocks, 1252 .atime = fi->inode.i_atime.tv_sec, 1253 .mtime = fi->inode.i_mtime.tv_sec, 1254 .ctime = fi->inode.i_ctime.tv_sec, 1255 .atimensec = fi->inode.i_atime.tv_nsec, 1256 .mtimensec = fi->inode.i_mtime.tv_nsec, 1257 .ctimensec = fi->inode.i_ctime.tv_nsec, 1258 .mode = fi->inode.i_mode, 1259 .nlink = fi->inode.i_nlink, 1260 .uid = fi->inode.i_uid.val, 1261 .gid = fi->inode.i_gid.val, 1262 .rdev = fi->inode.i_rdev, 1263 .blksize = 1u << fi->inode.i_blkbits, 1264 }; 1265} 1266 1267static void fuse_sb_defaults(struct super_block *sb) 1268{ 1269 sb->s_magic = FUSE_SUPER_MAGIC; 1270 sb->s_op = &fuse_super_operations; 1271 sb->s_xattr = fuse_xattr_handlers; 1272 sb->s_maxbytes = MAX_LFS_FILESIZE; 1273 sb->s_time_gran = 1; 1274 sb->s_export_op = &fuse_export_operations; 1275 sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE; 1276 if (sb->s_user_ns != &init_user_ns) 1277 sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER; 1278 sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION); 1279 1280 /* 1281 * If we are not in the initial user namespace posix 1282 * acls must be translated. 1283 */ 1284 if (sb->s_user_ns != &init_user_ns) 1285 sb->s_xattr = fuse_no_acl_xattr_handlers; 1286} 1287 1288int fuse_fill_super_submount(struct super_block *sb, 1289 struct fuse_inode *parent_fi) 1290{ 1291 struct fuse_mount *fm = get_fuse_mount_super(sb); 1292 struct super_block *parent_sb = parent_fi->inode.i_sb; 1293 struct fuse_attr root_attr; 1294 struct inode *root; 1295 1296 fuse_sb_defaults(sb); 1297 fm->sb = sb; 1298 1299 WARN_ON(sb->s_bdi != &noop_backing_dev_info); 1300 sb->s_bdi = bdi_get(parent_sb->s_bdi); 1301 1302 sb->s_xattr = parent_sb->s_xattr; 1303 sb->s_time_gran = parent_sb->s_time_gran; 1304 sb->s_blocksize = parent_sb->s_blocksize; 1305 sb->s_blocksize_bits = parent_sb->s_blocksize_bits; 1306 sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL); 1307 if (parent_sb->s_subtype && !sb->s_subtype) 1308 return -ENOMEM; 1309 1310 fuse_fill_attr_from_inode(&root_attr, parent_fi); 1311 root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); 1312 /* 1313 * This inode is just a duplicate, so it is not looked up and 1314 * its nlookup should not be incremented. fuse_iget() does 1315 * that, though, so undo it here. 1316 */ 1317 get_fuse_inode(root)->nlookup--; 1318 sb->s_d_op = &fuse_dentry_operations; 1319 sb->s_root = d_make_root(root); 1320 if (!sb->s_root) 1321 return -ENOMEM; 1322 1323 return 0; 1324} 1325 1326int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) 1327{ 1328 struct fuse_dev *fud = NULL; 1329 struct fuse_mount *fm = get_fuse_mount_super(sb); 1330 struct fuse_conn *fc = fm->fc; 1331 struct inode *root; 1332 struct dentry *root_dentry; 1333 int err; 1334 1335 err = -EINVAL; 1336 if (sb->s_flags & SB_MANDLOCK) 1337 goto err; 1338 1339 fuse_sb_defaults(sb); 1340 1341 if (ctx->is_bdev) { 1342#ifdef CONFIG_BLOCK 1343 err = -EINVAL; 1344 if (!sb_set_blocksize(sb, ctx->blksize)) 1345 goto err; 1346#endif 1347 } else { 1348 sb->s_blocksize = PAGE_SIZE; 1349 sb->s_blocksize_bits = PAGE_SHIFT; 1350 } 1351 1352 sb->s_subtype = ctx->subtype; 1353 ctx->subtype = NULL; 1354 if (IS_ENABLED(CONFIG_FUSE_DAX)) { 1355 err = fuse_dax_conn_alloc(fc, ctx->dax_dev); 1356 if (err) 1357 goto err; 1358 } 1359 1360 if (ctx->fudptr) { 1361 err = -ENOMEM; 1362 fud = fuse_dev_alloc_install(fc); 1363 if (!fud) 1364 goto err_free_dax; 1365 } 1366 1367 fc->dev = sb->s_dev; 1368 fm->sb = sb; 1369 err = fuse_bdi_init(fc, sb); 1370 if (err) 1371 goto err_dev_free; 1372 1373 /* Handle umasking inside the fuse code */ 1374 if (sb->s_flags & SB_POSIXACL) 1375 fc->dont_mask = 1; 1376 sb->s_flags |= SB_POSIXACL; 1377 1378 fc->default_permissions = ctx->default_permissions; 1379 fc->allow_other = ctx->allow_other; 1380 fc->user_id = ctx->user_id; 1381 fc->group_id = ctx->group_id; 1382 fc->legacy_opts_show = ctx->legacy_opts_show; 1383 fc->max_read = max_t(unsigned int, 4096, ctx->max_read); 1384 fc->destroy = ctx->destroy; 1385 fc->no_control = ctx->no_control; 1386 fc->no_force_umount = ctx->no_force_umount; 1387 1388 err = -ENOMEM; 1389 root = fuse_get_root_inode(sb, ctx->rootmode); 1390 sb->s_d_op = &fuse_root_dentry_operations; 1391 root_dentry = d_make_root(root); 1392 if (!root_dentry) 1393 goto err_dev_free; 1394 /* Root dentry doesn't have .d_revalidate */ 1395 sb->s_d_op = &fuse_dentry_operations; 1396 1397 mutex_lock(&fuse_mutex); 1398 err = -EINVAL; 1399 if (ctx->fudptr && *ctx->fudptr) 1400 goto err_unlock; 1401 1402 err = fuse_ctl_add_conn(fc); 1403 if (err) 1404 goto err_unlock; 1405 1406 list_add_tail(&fc->entry, &fuse_conn_list); 1407 sb->s_root = root_dentry; 1408 if (ctx->fudptr) 1409 *ctx->fudptr = fud; 1410 mutex_unlock(&fuse_mutex); 1411 return 0; 1412 1413 err_unlock: 1414 mutex_unlock(&fuse_mutex); 1415 dput(root_dentry); 1416 err_dev_free: 1417 if (fud) 1418 fuse_dev_free(fud); 1419 err_free_dax: 1420 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1421 fuse_dax_conn_free(fc); 1422 err: 1423 return err; 1424} 1425EXPORT_SYMBOL_GPL(fuse_fill_super_common); 1426 1427static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) 1428{ 1429 struct fuse_fs_context *ctx = fsc->fs_private; 1430 struct file *file; 1431 int err; 1432 struct fuse_conn *fc; 1433 struct fuse_mount *fm; 1434 1435 err = -EINVAL; 1436 file = fget(ctx->fd); 1437 if (!file) 1438 goto err; 1439 1440 /* 1441 * Require mount to happen from the same user namespace which 1442 * opened /dev/fuse to prevent potential attacks. 1443 */ 1444 if ((file->f_op != &fuse_dev_operations) || 1445 (file->f_cred->user_ns != sb->s_user_ns)) 1446 goto err_fput; 1447 ctx->fudptr = &file->private_data; 1448 1449 fc = kmalloc(sizeof(*fc), GFP_KERNEL); 1450 err = -ENOMEM; 1451 if (!fc) 1452 goto err_fput; 1453 1454 fm = kzalloc(sizeof(*fm), GFP_KERNEL); 1455 if (!fm) { 1456 kfree(fc); 1457 goto err_fput; 1458 } 1459 1460 fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL); 1461 fc->release = fuse_free_conn; 1462 1463 sb->s_fs_info = fm; 1464 1465 err = fuse_fill_super_common(sb, ctx); 1466 if (err) 1467 goto err_put_conn; 1468 /* 1469 * atomic_dec_and_test() in fput() provides the necessary 1470 * memory barrier for file->private_data to be visible on all 1471 * CPUs after this 1472 */ 1473 fput(file); 1474 fuse_send_init(get_fuse_mount_super(sb)); 1475 return 0; 1476 1477 err_put_conn: 1478 fuse_mount_put(fm); 1479 sb->s_fs_info = NULL; 1480 err_fput: 1481 fput(file); 1482 err: 1483 return err; 1484} 1485 1486static int fuse_get_tree(struct fs_context *fc) 1487{ 1488 struct fuse_fs_context *ctx = fc->fs_private; 1489 1490 if (!ctx->fd_present || !ctx->rootmode_present || 1491 !ctx->user_id_present || !ctx->group_id_present) 1492 return -EINVAL; 1493 1494#ifdef CONFIG_BLOCK 1495 if (ctx->is_bdev) 1496 return get_tree_bdev(fc, fuse_fill_super); 1497#endif 1498 1499 return get_tree_nodev(fc, fuse_fill_super); 1500} 1501 1502static const struct fs_context_operations fuse_context_ops = { 1503 .free = fuse_free_fc, 1504 .parse_param = fuse_parse_param, 1505 .reconfigure = fuse_reconfigure, 1506 .get_tree = fuse_get_tree, 1507}; 1508 1509/* 1510 * Set up the filesystem mount context. 1511 */ 1512static int fuse_init_fs_context(struct fs_context *fc) 1513{ 1514 struct fuse_fs_context *ctx; 1515 1516 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1517 if (!ctx) 1518 return -ENOMEM; 1519 1520 ctx->max_read = ~0; 1521 ctx->blksize = FUSE_DEFAULT_BLKSIZE; 1522 ctx->legacy_opts_show = true; 1523 1524#ifdef CONFIG_BLOCK 1525 if (fc->fs_type == &fuseblk_fs_type) { 1526 ctx->is_bdev = true; 1527 ctx->destroy = true; 1528 } 1529#endif 1530 1531 fc->fs_private = ctx; 1532 fc->ops = &fuse_context_ops; 1533 return 0; 1534} 1535 1536bool fuse_mount_remove(struct fuse_mount *fm) 1537{ 1538 struct fuse_conn *fc = fm->fc; 1539 bool last = false; 1540 1541 down_write(&fc->killsb); 1542 list_del_init(&fm->fc_entry); 1543 if (list_empty(&fc->mounts)) 1544 last = true; 1545 up_write(&fc->killsb); 1546 1547 return last; 1548} 1549EXPORT_SYMBOL_GPL(fuse_mount_remove); 1550 1551void fuse_conn_destroy(struct fuse_mount *fm) 1552{ 1553 struct fuse_conn *fc = fm->fc; 1554 1555 if (fc->destroy) 1556 fuse_send_destroy(fm); 1557 1558 fuse_abort_conn(fc); 1559 fuse_wait_aborted(fc); 1560 1561 if (!list_empty(&fc->entry)) { 1562 mutex_lock(&fuse_mutex); 1563 list_del(&fc->entry); 1564 fuse_ctl_remove_conn(fc); 1565 mutex_unlock(&fuse_mutex); 1566 } 1567} 1568EXPORT_SYMBOL_GPL(fuse_conn_destroy); 1569 1570static void fuse_kill_sb_anon(struct super_block *sb) 1571{ 1572 struct fuse_mount *fm = get_fuse_mount_super(sb); 1573 bool last; 1574 1575 if (fm) { 1576 last = fuse_mount_remove(fm); 1577 if (last) 1578 fuse_conn_destroy(fm); 1579 } 1580 kill_anon_super(sb); 1581} 1582 1583static struct file_system_type fuse_fs_type = { 1584 .owner = THIS_MODULE, 1585 .name = "fuse", 1586 .fs_flags = FS_HAS_SUBTYPE | FS_USERNS_MOUNT, 1587 .init_fs_context = fuse_init_fs_context, 1588 .parameters = fuse_fs_parameters, 1589 .kill_sb = fuse_kill_sb_anon, 1590}; 1591MODULE_ALIAS_FS("fuse"); 1592 1593#ifdef CONFIG_BLOCK 1594static void fuse_kill_sb_blk(struct super_block *sb) 1595{ 1596 struct fuse_mount *fm = get_fuse_mount_super(sb); 1597 bool last; 1598 1599 if (sb->s_root) { 1600 last = fuse_mount_remove(fm); 1601 if (last) 1602 fuse_conn_destroy(fm); 1603 } 1604 kill_block_super(sb); 1605} 1606 1607static struct file_system_type fuseblk_fs_type = { 1608 .owner = THIS_MODULE, 1609 .name = "fuseblk", 1610 .init_fs_context = fuse_init_fs_context, 1611 .parameters = fuse_fs_parameters, 1612 .kill_sb = fuse_kill_sb_blk, 1613 .fs_flags = FS_REQUIRES_DEV | FS_HAS_SUBTYPE, 1614}; 1615MODULE_ALIAS_FS("fuseblk"); 1616 1617static inline int register_fuseblk(void) 1618{ 1619 return register_filesystem(&fuseblk_fs_type); 1620} 1621 1622static inline void unregister_fuseblk(void) 1623{ 1624 unregister_filesystem(&fuseblk_fs_type); 1625} 1626#else 1627static inline int register_fuseblk(void) 1628{ 1629 return 0; 1630} 1631 1632static inline void unregister_fuseblk(void) 1633{ 1634} 1635#endif 1636 1637static void fuse_inode_init_once(void *foo) 1638{ 1639 struct inode *inode = foo; 1640 1641 inode_init_once(inode); 1642} 1643 1644static int __init fuse_fs_init(void) 1645{ 1646 int err; 1647 1648 fuse_inode_cachep = kmem_cache_create("fuse_inode", 1649 sizeof(struct fuse_inode), 0, 1650 SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT, 1651 fuse_inode_init_once); 1652 err = -ENOMEM; 1653 if (!fuse_inode_cachep) 1654 goto out; 1655 1656 err = register_fuseblk(); 1657 if (err) 1658 goto out2; 1659 1660 err = register_filesystem(&fuse_fs_type); 1661 if (err) 1662 goto out3; 1663 1664 return 0; 1665 1666 out3: 1667 unregister_fuseblk(); 1668 out2: 1669 kmem_cache_destroy(fuse_inode_cachep); 1670 out: 1671 return err; 1672} 1673 1674static void fuse_fs_cleanup(void) 1675{ 1676 unregister_filesystem(&fuse_fs_type); 1677 unregister_fuseblk(); 1678 1679 /* 1680 * Make sure all delayed rcu free inodes are flushed before we 1681 * destroy cache. 1682 */ 1683 rcu_barrier(); 1684 kmem_cache_destroy(fuse_inode_cachep); 1685} 1686 1687static struct kobject *fuse_kobj; 1688 1689static int fuse_sysfs_init(void) 1690{ 1691 int err; 1692 1693 fuse_kobj = kobject_create_and_add("fuse", fs_kobj); 1694 if (!fuse_kobj) { 1695 err = -ENOMEM; 1696 goto out_err; 1697 } 1698 1699 err = sysfs_create_mount_point(fuse_kobj, "connections"); 1700 if (err) 1701 goto out_fuse_unregister; 1702 1703 return 0; 1704 1705 out_fuse_unregister: 1706 kobject_put(fuse_kobj); 1707 out_err: 1708 return err; 1709} 1710 1711static void fuse_sysfs_cleanup(void) 1712{ 1713 sysfs_remove_mount_point(fuse_kobj, "connections"); 1714 kobject_put(fuse_kobj); 1715} 1716 1717static int __init fuse_init(void) 1718{ 1719 int res; 1720 1721 pr_info("init (API version %i.%i)\n", 1722 FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); 1723 1724 INIT_LIST_HEAD(&fuse_conn_list); 1725 res = fuse_fs_init(); 1726 if (res) 1727 goto err; 1728 1729 res = fuse_dev_init(); 1730 if (res) 1731 goto err_fs_cleanup; 1732 1733 res = fuse_sysfs_init(); 1734 if (res) 1735 goto err_dev_cleanup; 1736 1737 res = fuse_ctl_init(); 1738 if (res) 1739 goto err_sysfs_cleanup; 1740 1741 sanitize_global_limit(&max_user_bgreq); 1742 sanitize_global_limit(&max_user_congthresh); 1743 1744 return 0; 1745 1746 err_sysfs_cleanup: 1747 fuse_sysfs_cleanup(); 1748 err_dev_cleanup: 1749 fuse_dev_cleanup(); 1750 err_fs_cleanup: 1751 fuse_fs_cleanup(); 1752 err: 1753 return res; 1754} 1755 1756static void __exit fuse_exit(void) 1757{ 1758 pr_debug("exit\n"); 1759 1760 fuse_ctl_cleanup(); 1761 fuse_sysfs_cleanup(); 1762 fuse_fs_cleanup(); 1763 fuse_dev_cleanup(); 1764} 1765 1766module_init(fuse_init); 1767module_exit(fuse_exit); 1768