1/* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> 4 Copyright (C) 2017 Nikolaus Rath <Nikolaus@rath.org> 5 Copyright (C) 2018 Valve, Inc 6 7 This program can be distributed under the terms of the GNU GPLv2. 8 See the file COPYING. 9*/ 10 11/** @file 12 * 13 * This is a "high-performance" version of passthrough_ll.c. While 14 * passthrough_ll.c is designed to be as simple as possible, this 15 * example intended to be as efficient and correct as possible. 16 * 17 * passthrough_hp.cc mirrors a specified "source" directory under a 18 * specified the mountpoint with as much fidelity and performance as 19 * possible. 20 * 21 * If --nocache is specified, the source directory may be changed 22 * directly even while mounted and the filesystem will continue 23 * to work correctly. 24 * 25 * Without --nocache, the source directory is assumed to be modified 26 * only through the passthrough filesystem. This enables much better 27 * performance, but if changes are made directly to the source, they 28 * may not be immediately visible under the mountpoint and further 29 * access to the mountpoint may result in incorrect behavior, 30 * including data-loss. 31 * 32 * On its own, this filesystem fulfills no practical purpose. It is 33 * intended as a template upon which additional functionality can be 34 * built. 35 * 36 * Unless --nocache is specified, is only possible to write to files 37 * for which the mounting user has read permissions. This is because 38 * the writeback cache requires the kernel to be able to issue read 39 * requests for all files (which the passthrough filesystem cannot 40 * satisfy if it can't read the file in the underlying filesystem). 41 * 42 * ## Source code ## 43 * \include passthrough_hp.cc 44 */ 45 46#define FUSE_USE_VERSION FUSE_MAKE_VERSION(3, 12) 47 48#ifndef _GNU_SOURCE 49#define _GNU_SOURCE 50#endif 51 52// C includes 53#include <dirent.h> 54#include <err.h> 55#include <errno.h> 56#include <ftw.h> 57#include <fuse_lowlevel.h> 58#include <inttypes.h> 59#include <string.h> 60#include <sys/file.h> 61#include <sys/resource.h> 62#include <sys/xattr.h> 63#include <time.h> 64#include <unistd.h> 65#include <pthread.h> 66#include <limits.h> 67 68// C++ includes 69#include <cstddef> 70#include <cstdio> 71#include <cstdlib> 72#include <list> 73#include "cxxopts.hpp" 74#include <mutex> 75#include <fstream> 76#include <thread> 77#include <iomanip> 78 79using namespace std; 80 81#define SFS_DEFAULT_THREADS "-1" // take libfuse value as default 82#define SFS_DEFAULT_CLONE_FD "0" 83 84/* We are re-using pointers to our `struct sfs_inode` and `struct 85 sfs_dirp` elements as inodes and file handles. This means that we 86 must be able to store pointer a pointer in both a fuse_ino_t 87 variable and a uint64_t variable (used for file handles). */ 88static_assert(sizeof(fuse_ino_t) >= sizeof(void*), 89 "void* must fit into fuse_ino_t"); 90static_assert(sizeof(fuse_ino_t) >= sizeof(uint64_t), 91 "fuse_ino_t must be at least 64 bits"); 92 93 94/* Forward declarations */ 95struct Inode; 96static Inode& get_inode(fuse_ino_t ino); 97static void forget_one(fuse_ino_t ino, uint64_t n); 98 99// Uniquely identifies a file in the source directory tree. This could 100// be simplified to just ino_t since we require the source directory 101// not to contain any mountpoints. This hasn't been done yet in case 102// we need to reconsider this constraint (but relaxing this would have 103// the drawback that we can no longer re-use inode numbers, and thus 104// readdir() would need to do a full lookup() in order to report the 105// right inode number). 106typedef std::pair<ino_t, dev_t> SrcId; 107 108// Define a hash function for SrcId 109namespace std { 110 template<> 111 struct hash<SrcId> { 112 size_t operator()(const SrcId& id) const { 113 return hash<ino_t>{}(id.first) ^ hash<dev_t>{}(id.second); 114 } 115 }; 116} 117 118// Maps files in the source directory tree to inodes 119typedef std::unordered_map<SrcId, Inode> InodeMap; 120 121struct Inode { 122 int fd {-1}; 123 dev_t src_dev {0}; 124 ino_t src_ino {0}; 125 int generation {0}; 126 uint64_t nopen {0}; 127 uint64_t nlookup {0}; 128 std::mutex m; 129 130 // Delete copy constructor and assignments. We could implement 131 // move if we need it. 132 Inode() = default; 133 Inode(const Inode&) = delete; 134 Inode(Inode&& inode) = delete; 135 Inode& operator=(Inode&& inode) = delete; 136 Inode& operator=(const Inode&) = delete; 137 138 ~Inode() { 139 if(fd > 0) 140 close(fd); 141 } 142}; 143 144struct Fs { 145 // Must be acquired *after* any Inode.m locks. 146 std::mutex mutex; 147 InodeMap inodes; // protected by mutex 148 Inode root; 149 double timeout; 150 bool debug; 151 bool debug_fuse; 152 bool foreground; 153 std::string source; 154 size_t blocksize; 155 dev_t src_dev; 156 bool nosplice; 157 bool nocache; 158 size_t num_threads; 159 bool clone_fd; 160 std::string fuse_mount_options; 161}; 162static Fs fs{}; 163 164 165#define FUSE_BUF_COPY_FLAGS \ 166 (fs.nosplice ? \ 167 FUSE_BUF_NO_SPLICE : \ 168 static_cast<fuse_buf_copy_flags>(0)) 169 170 171static Inode& get_inode(fuse_ino_t ino) { 172 if (ino == FUSE_ROOT_ID) 173 return fs.root; 174 175 Inode* inode = reinterpret_cast<Inode*>(ino); 176 if(inode->fd == -1) { 177 cerr << "INTERNAL ERROR: Unknown inode " << ino << endl; 178 abort(); 179 } 180 return *inode; 181} 182 183 184static int get_fs_fd(fuse_ino_t ino) { 185 int fd = get_inode(ino).fd; 186 return fd; 187} 188 189 190static void sfs_init(void *userdata, fuse_conn_info *conn) { 191 (void)userdata; 192 if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) 193 conn->want |= FUSE_CAP_EXPORT_SUPPORT; 194 195 if (fs.timeout && conn->capable & FUSE_CAP_WRITEBACK_CACHE) 196 conn->want |= FUSE_CAP_WRITEBACK_CACHE; 197 198 if (conn->capable & FUSE_CAP_FLOCK_LOCKS) 199 conn->want |= FUSE_CAP_FLOCK_LOCKS; 200 201 if (fs.nosplice) { 202 // FUSE_CAP_SPLICE_READ is enabled in libfuse3 by default, 203 // see do_init() in in fuse_lowlevel.c 204 // Just unset both, in case FUSE_CAP_SPLICE_WRITE would also get enabled 205 // by default. 206 conn->want &= ~FUSE_CAP_SPLICE_READ; 207 conn->want &= ~FUSE_CAP_SPLICE_WRITE; 208 } else { 209 if (conn->capable & FUSE_CAP_SPLICE_WRITE) 210 conn->want |= FUSE_CAP_SPLICE_WRITE; 211 if (conn->capable & FUSE_CAP_SPLICE_READ) 212 conn->want |= FUSE_CAP_SPLICE_READ; 213 } 214} 215 216 217static void sfs_getattr(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { 218 (void)fi; 219 Inode& inode = get_inode(ino); 220 struct stat attr; 221 auto res = fstatat(inode.fd, "", &attr, 222 AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 223 if (res == -1) { 224 fuse_reply_err(req, errno); 225 return; 226 } 227 fuse_reply_attr(req, &attr, fs.timeout); 228} 229 230 231static void do_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, 232 int valid, struct fuse_file_info* fi) { 233 Inode& inode = get_inode(ino); 234 int ifd = inode.fd; 235 int res; 236 237 if (valid & FUSE_SET_ATTR_MODE) { 238 if (fi) { 239 res = fchmod(fi->fh, attr->st_mode); 240 } else { 241 char procname[64]; 242 sprintf(procname, "/proc/self/fd/%i", ifd); 243 res = chmod(procname, attr->st_mode); 244 } 245 if (res == -1) 246 goto out_err; 247 } 248 if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { 249 uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : static_cast<uid_t>(-1); 250 gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : static_cast<gid_t>(-1); 251 252 res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 253 if (res == -1) 254 goto out_err; 255 } 256 if (valid & FUSE_SET_ATTR_SIZE) { 257 if (fi) { 258 res = ftruncate(fi->fh, attr->st_size); 259 } else { 260 char procname[64]; 261 sprintf(procname, "/proc/self/fd/%i", ifd); 262 res = truncate(procname, attr->st_size); 263 } 264 if (res == -1) 265 goto out_err; 266 } 267 if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { 268 struct timespec tv[2]; 269 270 tv[0].tv_sec = 0; 271 tv[1].tv_sec = 0; 272 tv[0].tv_nsec = UTIME_OMIT; 273 tv[1].tv_nsec = UTIME_OMIT; 274 275 if (valid & FUSE_SET_ATTR_ATIME_NOW) 276 tv[0].tv_nsec = UTIME_NOW; 277 else if (valid & FUSE_SET_ATTR_ATIME) 278 tv[0] = attr->st_atim; 279 280 if (valid & FUSE_SET_ATTR_MTIME_NOW) 281 tv[1].tv_nsec = UTIME_NOW; 282 else if (valid & FUSE_SET_ATTR_MTIME) 283 tv[1] = attr->st_mtim; 284 285 if (fi) 286 res = futimens(fi->fh, tv); 287 else { 288#ifdef HAVE_UTIMENSAT 289 char procname[64]; 290 sprintf(procname, "/proc/self/fd/%i", ifd); 291 res = utimensat(AT_FDCWD, procname, tv, 0); 292#else 293 res = -1; 294 errno = EOPNOTSUPP; 295#endif 296 } 297 if (res == -1) 298 goto out_err; 299 } 300 return sfs_getattr(req, ino, fi); 301 302out_err: 303 fuse_reply_err(req, errno); 304} 305 306 307static void sfs_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, 308 int valid, fuse_file_info *fi) { 309 (void) ino; 310 do_setattr(req, ino, attr, valid, fi); 311} 312 313 314static int do_lookup(fuse_ino_t parent, const char *name, 315 fuse_entry_param *e) { 316 if (fs.debug) 317 cerr << "DEBUG: lookup(): name=" << name 318 << ", parent=" << parent << endl; 319 memset(e, 0, sizeof(*e)); 320 e->attr_timeout = fs.timeout; 321 e->entry_timeout = fs.timeout; 322 323 auto newfd = openat(get_fs_fd(parent), name, O_PATH | O_NOFOLLOW); 324 if (newfd == -1) 325 return errno; 326 327 auto res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 328 if (res == -1) { 329 auto saveerr = errno; 330 close(newfd); 331 if (fs.debug) 332 cerr << "DEBUG: lookup(): fstatat failed" << endl; 333 return saveerr; 334 } 335 336 if (e->attr.st_dev != fs.src_dev) { 337 cerr << "WARNING: Mountpoints in the source directory tree will be hidden." << endl; 338 return ENOTSUP; 339 } else if (e->attr.st_ino == FUSE_ROOT_ID) { 340 cerr << "ERROR: Source directory tree must not include inode " 341 << FUSE_ROOT_ID << endl; 342 return EIO; 343 } 344 345 SrcId id {e->attr.st_ino, e->attr.st_dev}; 346 unique_lock<mutex> fs_lock {fs.mutex}; 347 Inode* inode_p; 348 try { 349 inode_p = &fs.inodes[id]; 350 } catch (std::bad_alloc&) { 351 return ENOMEM; 352 } 353 e->ino = reinterpret_cast<fuse_ino_t>(inode_p); 354 Inode& inode {*inode_p}; 355 e->generation = inode.generation; 356 357 if (inode.fd == -ENOENT) { // found unlinked inode 358 if (fs.debug) 359 cerr << "DEBUG: lookup(): inode " << e->attr.st_ino 360 << " recycled; generation=" << inode.generation << endl; 361 /* fallthrough to new inode but keep existing inode.nlookup */ 362 } 363 364 if (inode.fd > 0) { // found existing inode 365 fs_lock.unlock(); 366 if (fs.debug) 367 cerr << "DEBUG: lookup(): inode " << e->attr.st_ino 368 << " (userspace) already known; fd = " << inode.fd << endl; 369 lock_guard<mutex> g {inode.m}; 370 371 inode.nlookup++; 372 if (fs.debug) 373 cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " 374 << "inode " << inode.src_ino 375 << " count " << inode.nlookup << endl; 376 377 378 close(newfd); 379 } else { // no existing inode 380 /* This is just here to make Helgrind happy. It violates the 381 lock ordering requirement (inode.m must be acquired before 382 fs.mutex), but this is of no consequence because at this 383 point no other thread has access to the inode mutex */ 384 lock_guard<mutex> g {inode.m}; 385 inode.src_ino = e->attr.st_ino; 386 inode.src_dev = e->attr.st_dev; 387 388 inode.nlookup++; 389 if (fs.debug) 390 cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " 391 << "inode " << inode.src_ino 392 << " count " << inode.nlookup << endl; 393 394 inode.fd = newfd; 395 fs_lock.unlock(); 396 397 if (fs.debug) 398 cerr << "DEBUG: lookup(): created userspace inode " << e->attr.st_ino 399 << "; fd = " << inode.fd << endl; 400 } 401 402 return 0; 403} 404 405 406static void sfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) { 407 fuse_entry_param e {}; 408 auto err = do_lookup(parent, name, &e); 409 if (err == ENOENT) { 410 e.attr_timeout = fs.timeout; 411 e.entry_timeout = fs.timeout; 412 e.ino = e.attr.st_ino = 0; 413 fuse_reply_entry(req, &e); 414 } else if (err) { 415 if (err == ENFILE || err == EMFILE) 416 cerr << "ERROR: Reached maximum number of file descriptors." << endl; 417 fuse_reply_err(req, err); 418 } else { 419 fuse_reply_entry(req, &e); 420 } 421} 422 423 424static void mknod_symlink(fuse_req_t req, fuse_ino_t parent, 425 const char *name, mode_t mode, dev_t rdev, 426 const char *link) { 427 int res; 428 Inode& inode_p = get_inode(parent); 429 auto saverr = ENOMEM; 430 431 if (S_ISDIR(mode)) 432 res = mkdirat(inode_p.fd, name, mode); 433 else if (S_ISLNK(mode)) 434 res = symlinkat(link, inode_p.fd, name); 435 else 436 res = mknodat(inode_p.fd, name, mode, rdev); 437 saverr = errno; 438 if (res == -1) 439 goto out; 440 441 fuse_entry_param e; 442 saverr = do_lookup(parent, name, &e); 443 if (saverr) 444 goto out; 445 446 fuse_reply_entry(req, &e); 447 return; 448 449out: 450 if (saverr == ENFILE || saverr == EMFILE) 451 cerr << "ERROR: Reached maximum number of file descriptors." << endl; 452 fuse_reply_err(req, saverr); 453} 454 455 456static void sfs_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, 457 mode_t mode, dev_t rdev) { 458 mknod_symlink(req, parent, name, mode, rdev, nullptr); 459} 460 461 462static void sfs_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, 463 mode_t mode) { 464 mknod_symlink(req, parent, name, S_IFDIR | mode, 0, nullptr); 465} 466 467 468static void sfs_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, 469 const char *name) { 470 mknod_symlink(req, parent, name, S_IFLNK, 0, link); 471} 472 473 474static void sfs_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, 475 const char *name) { 476 Inode& inode = get_inode(ino); 477 Inode& inode_p = get_inode(parent); 478 fuse_entry_param e {}; 479 480 e.attr_timeout = fs.timeout; 481 e.entry_timeout = fs.timeout; 482 483 char procname[64]; 484 sprintf(procname, "/proc/self/fd/%i", inode.fd); 485 auto res = linkat(AT_FDCWD, procname, inode_p.fd, name, AT_SYMLINK_FOLLOW); 486 if (res == -1) { 487 fuse_reply_err(req, errno); 488 return; 489 } 490 491 res = fstatat(inode.fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); 492 if (res == -1) { 493 fuse_reply_err(req, errno); 494 return; 495 } 496 e.ino = reinterpret_cast<fuse_ino_t>(&inode); 497 { 498 lock_guard<mutex> g {inode.m}; 499 inode.nlookup++; 500 if (fs.debug) 501 cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " 502 << "inode " << inode.src_ino 503 << " count " << inode.nlookup << endl; 504 } 505 506 fuse_reply_entry(req, &e); 507 return; 508} 509 510 511static void sfs_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) { 512 Inode& inode_p = get_inode(parent); 513 lock_guard<mutex> g {inode_p.m}; 514 auto res = unlinkat(inode_p.fd, name, AT_REMOVEDIR); 515 fuse_reply_err(req, res == -1 ? errno : 0); 516} 517 518 519static void sfs_rename(fuse_req_t req, fuse_ino_t parent, const char *name, 520 fuse_ino_t newparent, const char *newname, 521 unsigned int flags) { 522 Inode& inode_p = get_inode(parent); 523 Inode& inode_np = get_inode(newparent); 524 if (flags) { 525 fuse_reply_err(req, EINVAL); 526 return; 527 } 528 529 auto res = renameat(inode_p.fd, name, inode_np.fd, newname); 530 fuse_reply_err(req, res == -1 ? errno : 0); 531} 532 533 534static void sfs_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) { 535 Inode& inode_p = get_inode(parent); 536 // Release inode.fd before last unlink like nfsd EXPORT_OP_CLOSE_BEFORE_UNLINK 537 // to test reused inode numbers. 538 // Skip this when inode has an open file and when writeback cache is enabled. 539 if (!fs.timeout) { 540 fuse_entry_param e; 541 auto err = do_lookup(parent, name, &e); 542 if (err) { 543 fuse_reply_err(req, err); 544 return; 545 } 546 if (e.attr.st_nlink == 1) { 547 Inode& inode = get_inode(e.ino); 548 lock_guard<mutex> g {inode.m}; 549 if (inode.fd > 0 && !inode.nopen) { 550 if (fs.debug) 551 cerr << "DEBUG: unlink: release inode " << e.attr.st_ino 552 << "; fd=" << inode.fd << endl; 553 lock_guard<mutex> g_fs {fs.mutex}; 554 close(inode.fd); 555 inode.fd = -ENOENT; 556 inode.generation++; 557 } 558 } 559 560 // decrease the ref which lookup above had increased 561 forget_one(e.ino, 1); 562 } 563 auto res = unlinkat(inode_p.fd, name, 0); 564 fuse_reply_err(req, res == -1 ? errno : 0); 565} 566 567 568static void forget_one(fuse_ino_t ino, uint64_t n) { 569 Inode& inode = get_inode(ino); 570 unique_lock<mutex> l {inode.m}; 571 572 if(n > inode.nlookup) { 573 cerr << "INTERNAL ERROR: Negative lookup count for inode " 574 << inode.src_ino << endl; 575 abort(); 576 } 577 inode.nlookup -= n; 578 579 if (fs.debug) 580 cerr << "DEBUG:" << __func__ << ":" << __LINE__ << " " 581 << "inode " << inode.src_ino 582 << " count " << inode.nlookup << endl; 583 584 if (!inode.nlookup) { 585 if (fs.debug) 586 cerr << "DEBUG: forget: cleaning up inode " << inode.src_ino << endl; 587 { 588 lock_guard<mutex> g_fs {fs.mutex}; 589 l.unlock(); 590 fs.inodes.erase({inode.src_ino, inode.src_dev}); 591 } 592 } else if (fs.debug) 593 cerr << "DEBUG: forget: inode " << inode.src_ino 594 << " lookup count now " << inode.nlookup << endl; 595} 596 597static void sfs_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) { 598 forget_one(ino, nlookup); 599 fuse_reply_none(req); 600} 601 602 603static void sfs_forget_multi(fuse_req_t req, size_t count, 604 fuse_forget_data *forgets) { 605 for (int i = 0; i < count; i++) 606 forget_one(forgets[i].ino, forgets[i].nlookup); 607 fuse_reply_none(req); 608} 609 610 611static void sfs_readlink(fuse_req_t req, fuse_ino_t ino) { 612 Inode& inode = get_inode(ino); 613 char buf[PATH_MAX + 1]; 614 auto res = readlinkat(inode.fd, "", buf, sizeof(buf)); 615 if (res == -1) 616 fuse_reply_err(req, errno); 617 else if (res == sizeof(buf)) 618 fuse_reply_err(req, ENAMETOOLONG); 619 else { 620 buf[res] = '\0'; 621 fuse_reply_readlink(req, buf); 622 } 623} 624 625 626struct DirHandle { 627 DIR *dp {nullptr}; 628 off_t offset; 629 630 DirHandle() = default; 631 DirHandle(const DirHandle&) = delete; 632 DirHandle& operator=(const DirHandle&) = delete; 633 634 ~DirHandle() { 635 if(dp) 636 closedir(dp); 637 } 638}; 639 640 641static DirHandle *get_dir_handle(fuse_file_info *fi) { 642 return reinterpret_cast<DirHandle*>(fi->fh); 643} 644 645 646static void sfs_opendir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { 647 Inode& inode = get_inode(ino); 648 auto d = new (nothrow) DirHandle; 649 if (d == nullptr) { 650 fuse_reply_err(req, ENOMEM); 651 return; 652 } 653 654 // Make Helgrind happy - it can't know that there's an implicit 655 // synchronization due to the fact that other threads cannot 656 // access d until we've called fuse_reply_*. 657 lock_guard<mutex> g {inode.m}; 658 659 auto fd = openat(inode.fd, ".", O_RDONLY); 660 if (fd == -1) 661 goto out_errno; 662 663 // On success, dir stream takes ownership of fd, so we 664 // do not have to close it. 665 d->dp = fdopendir(fd); 666 if(d->dp == nullptr) 667 goto out_errno; 668 669 d->offset = 0; 670 671 fi->fh = reinterpret_cast<uint64_t>(d); 672 if(fs.timeout) { 673 fi->keep_cache = 1; 674 fi->cache_readdir = 1; 675 } 676 fuse_reply_open(req, fi); 677 return; 678 679out_errno: 680 auto error = errno; 681 delete d; 682 if (error == ENFILE || error == EMFILE) 683 cerr << "ERROR: Reached maximum number of file descriptors." << endl; 684 fuse_reply_err(req, error); 685} 686 687 688static bool is_dot_or_dotdot(const char *name) { 689 return name[0] == '.' && 690 (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); 691} 692 693 694static void do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, 695 off_t offset, fuse_file_info *fi, const int plus) { 696 auto d = get_dir_handle(fi); 697 Inode& inode = get_inode(ino); 698 lock_guard<mutex> g {inode.m}; 699 char *p; 700 auto rem = size; 701 int err = 0, count = 0; 702 703 if (fs.debug) 704 cerr << "DEBUG: readdir(): started with offset " 705 << offset << endl; 706 707 auto buf = new (nothrow) char[size]; 708 if (!buf) { 709 fuse_reply_err(req, ENOMEM); 710 return; 711 } 712 p = buf; 713 714 if (offset != d->offset) { 715 if (fs.debug) 716 cerr << "DEBUG: readdir(): seeking to " << offset << endl; 717 seekdir(d->dp, offset); 718 d->offset = offset; 719 } 720 721 while (1) { 722 struct dirent *entry; 723 errno = 0; 724 entry = readdir(d->dp); 725 if (!entry) { 726 if(errno) { 727 err = errno; 728 if (fs.debug) 729 warn("DEBUG: readdir(): readdir failed with"); 730 goto error; 731 } 732 break; // End of stream 733 } 734 d->offset = entry->d_off; 735 if (is_dot_or_dotdot(entry->d_name)) 736 continue; 737 738 fuse_entry_param e{}; 739 size_t entsize; 740 if (plus) { 741 err = do_lookup(ino, entry->d_name, &e); 742 if (err) 743 goto error; 744 entsize = fuse_add_direntry_plus(req, p, rem, entry->d_name, &e, entry->d_off); 745 } else { 746 e.attr.st_ino = entry->d_ino; 747 e.attr.st_mode = entry->d_type << 12; 748 entsize = fuse_add_direntry(req, p, rem, entry->d_name, &e.attr, entry->d_off); 749 } 750 751 if (entsize > rem) { 752 if (fs.debug) 753 cerr << "DEBUG: readdir(): buffer full, returning data. " << endl; 754 if (plus) 755 forget_one(e.ino, 1); 756 break; 757 } 758 759 p += entsize; 760 rem -= entsize; 761 count++; 762 if (fs.debug) { 763 cerr << "DEBUG: readdir(): added to buffer: " << entry->d_name 764 << ", ino " << e.attr.st_ino << ", offset " << entry->d_off << endl; 765 } 766 } 767 err = 0; 768error: 769 770 // If there's an error, we can only signal it if we haven't stored 771 // any entries yet - otherwise we'd end up with wrong lookup 772 // counts for the entries that are already in the buffer. So we 773 // return what we've collected until that point. 774 if (err && rem == size) { 775 if (err == ENFILE || err == EMFILE) 776 cerr << "ERROR: Reached maximum number of file descriptors." << endl; 777 fuse_reply_err(req, err); 778 } else { 779 if (fs.debug) 780 cerr << "DEBUG: readdir(): returning " << count 781 << " entries, curr offset " << d->offset << endl; 782 fuse_reply_buf(req, buf, size - rem); 783 } 784 delete[] buf; 785 return; 786} 787 788 789static void sfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, 790 off_t offset, fuse_file_info *fi) { 791 // operation logging is done in readdir to reduce code duplication 792 do_readdir(req, ino, size, offset, fi, 0); 793} 794 795 796static void sfs_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, 797 off_t offset, fuse_file_info *fi) { 798 // operation logging is done in readdir to reduce code duplication 799 do_readdir(req, ino, size, offset, fi, 1); 800} 801 802 803static void sfs_releasedir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { 804 (void) ino; 805 auto d = get_dir_handle(fi); 806 delete d; 807 fuse_reply_err(req, 0); 808} 809 810 811static void sfs_create(fuse_req_t req, fuse_ino_t parent, const char *name, 812 mode_t mode, fuse_file_info *fi) { 813 Inode& inode_p = get_inode(parent); 814 815 auto fd = openat(inode_p.fd, name, 816 (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); 817 if (fd == -1) { 818 auto err = errno; 819 if (err == ENFILE || err == EMFILE) 820 cerr << "ERROR: Reached maximum number of file descriptors." << endl; 821 fuse_reply_err(req, err); 822 return; 823 } 824 825 fi->fh = fd; 826 fuse_entry_param e; 827 auto err = do_lookup(parent, name, &e); 828 if (err) { 829 if (err == ENFILE || err == EMFILE) 830 cerr << "ERROR: Reached maximum number of file descriptors." << endl; 831 fuse_reply_err(req, err); 832 return; 833 } 834 835 Inode& inode = get_inode(e.ino); 836 lock_guard<mutex> g {inode.m}; 837 inode.nopen++; 838 fuse_reply_create(req, &e, fi); 839} 840 841 842static void sfs_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, 843 fuse_file_info *fi) { 844 (void) ino; 845 int res; 846 int fd = dirfd(get_dir_handle(fi)->dp); 847 if (datasync) 848 res = fdatasync(fd); 849 else 850 res = fsync(fd); 851 fuse_reply_err(req, res == -1 ? errno : 0); 852} 853 854 855static void sfs_open(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { 856 Inode& inode = get_inode(ino); 857 858 /* With writeback cache, kernel may send read requests even 859 when userspace opened write-only */ 860 if (fs.timeout && (fi->flags & O_ACCMODE) == O_WRONLY) { 861 fi->flags &= ~O_ACCMODE; 862 fi->flags |= O_RDWR; 863 } 864 865 /* With writeback cache, O_APPEND is handled by the kernel. This 866 breaks atomicity (since the file may change in the underlying 867 filesystem, so that the kernel's idea of the end of the file 868 isn't accurate anymore). However, no process should modify the 869 file in the underlying filesystem once it has been read, so 870 this is not a problem. */ 871 if (fs.timeout && fi->flags & O_APPEND) 872 fi->flags &= ~O_APPEND; 873 874 /* Unfortunately we cannot use inode.fd, because this was opened 875 with O_PATH (so it doesn't allow read/write access). */ 876 char buf[64]; 877 sprintf(buf, "/proc/self/fd/%i", inode.fd); 878 auto fd = open(buf, fi->flags & ~O_NOFOLLOW); 879 if (fd == -1) { 880 auto err = errno; 881 if (err == ENFILE || err == EMFILE) 882 cerr << "ERROR: Reached maximum number of file descriptors." << endl; 883 fuse_reply_err(req, err); 884 return; 885 } 886 887 lock_guard<mutex> g {inode.m}; 888 inode.nopen++; 889 fi->keep_cache = (fs.timeout != 0); 890 fi->noflush = (fs.timeout == 0 && (fi->flags & O_ACCMODE) == O_RDONLY); 891 fi->fh = fd; 892 fuse_reply_open(req, fi); 893} 894 895 896static void sfs_release(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { 897 Inode& inode = get_inode(ino); 898 lock_guard<mutex> g {inode.m}; 899 inode.nopen--; 900 close(fi->fh); 901 fuse_reply_err(req, 0); 902} 903 904 905static void sfs_flush(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { 906 (void) ino; 907 auto res = close(dup(fi->fh)); 908 fuse_reply_err(req, res == -1 ? errno : 0); 909} 910 911 912static void sfs_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, 913 fuse_file_info *fi) { 914 (void) ino; 915 int res; 916 if (datasync) 917 res = fdatasync(fi->fh); 918 else 919 res = fsync(fi->fh); 920 fuse_reply_err(req, res == -1 ? errno : 0); 921} 922 923 924static void do_read(fuse_req_t req, size_t size, off_t off, fuse_file_info *fi) { 925 926 fuse_bufvec buf = FUSE_BUFVEC_INIT(size); 927 buf.buf[0].flags = static_cast<fuse_buf_flags>( 928 FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK); 929 buf.buf[0].fd = fi->fh; 930 buf.buf[0].pos = off; 931 932 fuse_reply_data(req, &buf, FUSE_BUF_COPY_FLAGS); 933} 934 935static void sfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, 936 fuse_file_info *fi) { 937 (void) ino; 938 do_read(req, size, off, fi); 939} 940 941 942static void do_write_buf(fuse_req_t req, size_t size, off_t off, 943 fuse_bufvec *in_buf, fuse_file_info *fi) { 944 fuse_bufvec out_buf = FUSE_BUFVEC_INIT(size); 945 out_buf.buf[0].flags = static_cast<fuse_buf_flags>( 946 FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK); 947 out_buf.buf[0].fd = fi->fh; 948 out_buf.buf[0].pos = off; 949 950 auto res = fuse_buf_copy(&out_buf, in_buf, FUSE_BUF_COPY_FLAGS); 951 if (res < 0) 952 fuse_reply_err(req, -res); 953 else 954 fuse_reply_write(req, (size_t)res); 955} 956 957 958static void sfs_write_buf(fuse_req_t req, fuse_ino_t ino, fuse_bufvec *in_buf, 959 off_t off, fuse_file_info *fi) { 960 (void) ino; 961 auto size {fuse_buf_size(in_buf)}; 962 do_write_buf(req, size, off, in_buf, fi); 963} 964 965 966static void sfs_statfs(fuse_req_t req, fuse_ino_t ino) { 967 struct statvfs stbuf; 968 969 auto res = fstatvfs(get_fs_fd(ino), &stbuf); 970 if (res == -1) 971 fuse_reply_err(req, errno); 972 else 973 fuse_reply_statfs(req, &stbuf); 974} 975 976 977#ifdef HAVE_POSIX_FALLOCATE 978static void sfs_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, 979 off_t offset, off_t length, fuse_file_info *fi) { 980 (void) ino; 981 if (mode) { 982 fuse_reply_err(req, EOPNOTSUPP); 983 return; 984 } 985 986 auto err = posix_fallocate(fi->fh, offset, length); 987 fuse_reply_err(req, err); 988} 989#endif 990 991static void sfs_flock(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi, 992 int op) { 993 (void) ino; 994 auto res = flock(fi->fh, op); 995 fuse_reply_err(req, res == -1 ? errno : 0); 996} 997 998 999#ifdef HAVE_SETXATTR 1000static void sfs_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, 1001 size_t size) { 1002 char *value = nullptr; 1003 Inode& inode = get_inode(ino); 1004 ssize_t ret; 1005 int saverr; 1006 1007 char procname[64]; 1008 sprintf(procname, "/proc/self/fd/%i", inode.fd); 1009 1010 if (size) { 1011 value = new (nothrow) char[size]; 1012 if (value == nullptr) { 1013 saverr = ENOMEM; 1014 goto out; 1015 } 1016 1017 ret = getxattr(procname, name, value, size); 1018 if (ret == -1) 1019 goto out_err; 1020 saverr = 0; 1021 if (ret == 0) 1022 goto out; 1023 1024 fuse_reply_buf(req, value, ret); 1025 } else { 1026 ret = getxattr(procname, name, nullptr, 0); 1027 if (ret == -1) 1028 goto out_err; 1029 1030 fuse_reply_xattr(req, ret); 1031 } 1032out_free: 1033 delete[] value; 1034 return; 1035 1036out_err: 1037 saverr = errno; 1038out: 1039 fuse_reply_err(req, saverr); 1040 goto out_free; 1041} 1042 1043 1044static void sfs_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) { 1045 char *value = nullptr; 1046 Inode& inode = get_inode(ino); 1047 ssize_t ret; 1048 int saverr; 1049 1050 char procname[64]; 1051 sprintf(procname, "/proc/self/fd/%i", inode.fd); 1052 1053 if (size) { 1054 value = new (nothrow) char[size]; 1055 if (value == nullptr) { 1056 saverr = ENOMEM; 1057 goto out; 1058 } 1059 1060 ret = listxattr(procname, value, size); 1061 if (ret == -1) 1062 goto out_err; 1063 saverr = 0; 1064 if (ret == 0) 1065 goto out; 1066 1067 fuse_reply_buf(req, value, ret); 1068 } else { 1069 ret = listxattr(procname, nullptr, 0); 1070 if (ret == -1) 1071 goto out_err; 1072 1073 fuse_reply_xattr(req, ret); 1074 } 1075out_free: 1076 delete[] value; 1077 return; 1078out_err: 1079 saverr = errno; 1080out: 1081 fuse_reply_err(req, saverr); 1082 goto out_free; 1083} 1084 1085 1086static void sfs_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, 1087 const char *value, size_t size, int flags) { 1088 Inode& inode = get_inode(ino); 1089 ssize_t ret; 1090 int saverr; 1091 1092 char procname[64]; 1093 sprintf(procname, "/proc/self/fd/%i", inode.fd); 1094 1095 ret = setxattr(procname, name, value, size, flags); 1096 saverr = ret == -1 ? errno : 0; 1097 1098 fuse_reply_err(req, saverr); 1099} 1100 1101 1102static void sfs_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) { 1103 char procname[64]; 1104 Inode& inode = get_inode(ino); 1105 ssize_t ret; 1106 int saverr; 1107 1108 sprintf(procname, "/proc/self/fd/%i", inode.fd); 1109 ret = removexattr(procname, name); 1110 saverr = ret == -1 ? errno : 0; 1111 1112 fuse_reply_err(req, saverr); 1113} 1114#endif 1115 1116 1117static void assign_operations(fuse_lowlevel_ops &sfs_oper) { 1118 sfs_oper.init = sfs_init; 1119 sfs_oper.lookup = sfs_lookup; 1120 sfs_oper.mkdir = sfs_mkdir; 1121 sfs_oper.mknod = sfs_mknod; 1122 sfs_oper.symlink = sfs_symlink; 1123 sfs_oper.link = sfs_link; 1124 sfs_oper.unlink = sfs_unlink; 1125 sfs_oper.rmdir = sfs_rmdir; 1126 sfs_oper.rename = sfs_rename; 1127 sfs_oper.forget = sfs_forget; 1128 sfs_oper.forget_multi = sfs_forget_multi; 1129 sfs_oper.getattr = sfs_getattr; 1130 sfs_oper.setattr = sfs_setattr; 1131 sfs_oper.readlink = sfs_readlink; 1132 sfs_oper.opendir = sfs_opendir; 1133 sfs_oper.readdir = sfs_readdir; 1134 sfs_oper.readdirplus = sfs_readdirplus; 1135 sfs_oper.releasedir = sfs_releasedir; 1136 sfs_oper.fsyncdir = sfs_fsyncdir; 1137 sfs_oper.create = sfs_create; 1138 sfs_oper.open = sfs_open; 1139 sfs_oper.release = sfs_release; 1140 sfs_oper.flush = sfs_flush; 1141 sfs_oper.fsync = sfs_fsync; 1142 sfs_oper.read = sfs_read; 1143 sfs_oper.write_buf = sfs_write_buf; 1144 sfs_oper.statfs = sfs_statfs; 1145#ifdef HAVE_POSIX_FALLOCATE 1146 sfs_oper.fallocate = sfs_fallocate; 1147#endif 1148 sfs_oper.flock = sfs_flock; 1149#ifdef HAVE_SETXATTR 1150 sfs_oper.setxattr = sfs_setxattr; 1151 sfs_oper.getxattr = sfs_getxattr; 1152 sfs_oper.listxattr = sfs_listxattr; 1153 sfs_oper.removexattr = sfs_removexattr; 1154#endif 1155} 1156 1157static void print_usage(char *prog_name) { 1158 cout << "Usage: " << prog_name << " --help\n" 1159 << " " << prog_name << " [options] <source> <mountpoint>\n"; 1160} 1161 1162static cxxopts::ParseResult parse_wrapper(cxxopts::Options& parser, int& argc, char**& argv) { 1163 try { 1164 return parser.parse(argc, argv); 1165 } catch (cxxopts::option_not_exists_exception& exc) { 1166 std::cout << argv[0] << ": " << exc.what() << std::endl; 1167 print_usage(argv[0]); 1168 exit(2); 1169 } 1170} 1171 1172 1173static void string_split(std::string s, std::vector<std::string>& out, std::string delimiter) { 1174 size_t pos_start = 0, pos_end, delim_len = delimiter.length(); 1175 std::string token; 1176 1177 while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) { 1178 token = s.substr(pos_start, pos_end - pos_start); 1179 pos_start = pos_end + delim_len; 1180 out.push_back(token); 1181 } 1182 1183 out.push_back(s.substr(pos_start)); 1184} 1185 1186 1187static std::string string_join(const std::vector<std::string>& elems, char delim) 1188{ 1189 std::ostringstream out; 1190 for (auto ii = elems.begin(); ii != elems.end(); ++ii) { 1191 out << (*ii); 1192 if (ii + 1 != elems.end()) { 1193 out << delim; 1194 } 1195 } 1196 return out.str(); 1197} 1198 1199 1200static cxxopts::ParseResult parse_options(int argc, char **argv) { 1201 cxxopts::Options opt_parser(argv[0]); 1202 std::vector<std::string> mount_options; 1203 opt_parser.add_options() 1204 ("debug", "Enable filesystem debug messages") 1205 ("debug-fuse", "Enable libfuse debug messages") 1206 ("foreground", "Run in foreground") 1207 ("help", "Print help") 1208 ("nocache", "Disable all caching") 1209 ("nosplice", "Do not use splice(2) to transfer data") 1210 ("single", "Run single-threaded") 1211 ("o", "Mount options (see mount.fuse(5) - only use if you know what " 1212 "you are doing)", cxxopts::value(mount_options)) 1213 ("num-threads", "Number of libfuse worker threads", 1214 cxxopts::value<int>()->default_value(SFS_DEFAULT_THREADS)) 1215 ("clone-fd", "use separate fuse device fd for each thread", 1216 cxxopts::value<bool>()->implicit_value(SFS_DEFAULT_CLONE_FD)); 1217 1218 1219 // FIXME: Find a better way to limit the try clause to just 1220 // opt_parser.parse() (cf. https://github.com/jarro2783/cxxopts/issues/146) 1221 auto options = parse_wrapper(opt_parser, argc, argv); 1222 1223 if (options.count("help")) { 1224 print_usage(argv[0]); 1225 // Strip everything before the option list from the 1226 // default help string. 1227 auto help = opt_parser.help(); 1228 std::cout << std::endl << "options:" 1229 << help.substr(help.find("\n\n") + 1, string::npos); 1230 exit(0); 1231 1232 } else if (argc != 3) { 1233 std::cout << argv[0] << ": invalid number of arguments\n"; 1234 print_usage(argv[0]); 1235 exit(2); 1236 } 1237 1238 fs.debug = options.count("debug") != 0; 1239 fs.debug_fuse = options.count("debug-fuse") != 0; 1240 1241 fs.foreground = options.count("foreground") != 0; 1242 if (fs.debug || fs.debug_fuse) 1243 fs.foreground = true; 1244 1245 fs.nosplice = options.count("nosplice") != 0; 1246 fs.num_threads = options["num-threads"].as<int>(); 1247 fs.clone_fd = options["clone-fd"].as<bool>(); 1248 char* resolved_path = realpath(argv[1], NULL); 1249 if (resolved_path == NULL) 1250 warn("WARNING: realpath() failed with"); 1251 fs.source = std::string {resolved_path}; 1252 free(resolved_path); 1253 1254 std::vector<std::string> flattened_mount_opts; 1255 for (auto opt : mount_options) { 1256 string_split(opt, flattened_mount_opts, ","); 1257 } 1258 1259 bool found_fsname = false; 1260 for (auto opt : flattened_mount_opts) { 1261 if (opt.find("fsname=") == 0) { 1262 found_fsname = true; 1263 continue; 1264 } 1265 1266 /* Filter out some obviously incorrect options. */ 1267 if (opt == "fd") { 1268 std::cout << argv[0] << ": Unsupported mount option: " << opt << "\n"; 1269 print_usage(argv[0]); 1270 exit(2); 1271 } 1272 } 1273 if (!found_fsname) { 1274 flattened_mount_opts.push_back("fsname=" + fs.source); 1275 } 1276 flattened_mount_opts.push_back("default_permissions"); 1277 fs.fuse_mount_options = string_join(flattened_mount_opts, ','); 1278 return options; 1279} 1280 1281 1282static void maximize_fd_limit() { 1283 struct rlimit lim {}; 1284 auto res = getrlimit(RLIMIT_NOFILE, &lim); 1285 if (res != 0) { 1286 warn("WARNING: getrlimit() failed with"); 1287 return; 1288 } 1289 lim.rlim_cur = lim.rlim_max; 1290 res = setrlimit(RLIMIT_NOFILE, &lim); 1291 if (res != 0) 1292 warn("WARNING: setrlimit() failed with"); 1293} 1294 1295 1296int main(int argc, char *argv[]) { 1297 1298 struct fuse_loop_config *loop_config = NULL; 1299 1300 // Parse command line options 1301 auto options {parse_options(argc, argv)}; 1302 1303 // We need an fd for every dentry in our the filesystem that the 1304 // kernel knows about. This is way more than most processes need, 1305 // so try to get rid of any resource softlimit. 1306 maximize_fd_limit(); 1307 1308 // Initialize filesystem root 1309 fs.root.fd = -1; 1310 fs.root.nlookup = 9999; 1311 fs.timeout = options.count("nocache") ? 0 : 86400.0; 1312 1313 struct stat stat; 1314 auto ret = lstat(fs.source.c_str(), &stat); 1315 if (ret == -1) 1316 err(1, "ERROR: failed to stat source (\"%s\")", fs.source.c_str()); 1317 if (!S_ISDIR(stat.st_mode)) 1318 errx(1, "ERROR: source is not a directory"); 1319 fs.src_dev = stat.st_dev; 1320 1321 fs.root.fd = open(fs.source.c_str(), O_PATH); 1322 if (fs.root.fd == -1) 1323 err(1, "ERROR: open(\"%s\", O_PATH)", fs.source.c_str()); 1324 1325 // Initialize fuse 1326 fuse_args args = FUSE_ARGS_INIT(0, nullptr); 1327 if (fuse_opt_add_arg(&args, argv[0]) || 1328 fuse_opt_add_arg(&args, "-o") || 1329 fuse_opt_add_arg(&args, fs.fuse_mount_options.c_str()) || 1330 (fs.debug_fuse && fuse_opt_add_arg(&args, "-odebug"))) 1331 errx(3, "ERROR: Out of memory"); 1332 1333 fuse_lowlevel_ops sfs_oper {}; 1334 assign_operations(sfs_oper); 1335 auto se = fuse_session_new(&args, &sfs_oper, sizeof(sfs_oper), &fs); 1336 if (se == nullptr) 1337 goto err_out1; 1338 1339 if (fuse_set_signal_handlers(se) != 0) 1340 goto err_out2; 1341 1342 // Don't apply umask, use modes exactly as specified 1343 umask(0); 1344 1345 // Mount and run main loop 1346 loop_config = fuse_loop_cfg_create(); 1347 1348 if (fs.num_threads != -1) 1349 fuse_loop_cfg_set_idle_threads(loop_config, fs.num_threads); 1350 1351 if (fuse_session_mount(se, argv[2]) != 0) 1352 goto err_out3; 1353 1354 fuse_daemonize(fs.foreground); 1355 1356 if (options.count("single")) 1357 ret = fuse_session_loop(se); 1358 else 1359 ret = fuse_session_loop_mt(se, loop_config); 1360 1361 1362 fuse_session_unmount(se); 1363 1364err_out3: 1365 fuse_remove_signal_handlers(se); 1366err_out2: 1367 fuse_session_destroy(se); 1368err_out1: 1369 1370 fuse_loop_cfg_destroy(loop_config); 1371 fuse_opt_free_args(&args); 1372 1373 return ret ? 1 : 0; 1374} 1375 1376