1/* 2 FUSE: Filesystem in Userspace 3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu> 4 5 This program can be distributed under the terms of the GNU GPL. 6 See the file COPYING. 7*/ 8 9#include "fuse_i.h" 10 11#include <linux/pagemap.h> 12#include <linux/slab.h> 13#include <linux/kernel.h> 14#include <linux/sched.h> 15#include <linux/sched/signal.h> 16#include <linux/module.h> 17#include <linux/compat.h> 18#include <linux/swap.h> 19#include <linux/falloc.h> 20#include <linux/uio.h> 21#include <linux/fs.h> 22 23static struct page **fuse_pages_alloc(unsigned int npages, gfp_t flags, 24 struct fuse_page_desc **desc) 25{ 26 struct page **pages; 27 28 pages = kzalloc(npages * (sizeof(struct page *) + 29 sizeof(struct fuse_page_desc)), flags); 30 *desc = (void *) (pages + npages); 31 32 return pages; 33} 34 35static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 36 int opcode, struct fuse_open_out *outargp) 37{ 38 struct fuse_open_in inarg; 39 FUSE_ARGS(args); 40 41 memset(&inarg, 0, sizeof(inarg)); 42 inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY); 43 if (!fm->fc->atomic_o_trunc) 44 inarg.flags &= ~O_TRUNC; 45 args.opcode = opcode; 46 args.nodeid = nodeid; 47 args.in_numargs = 1; 48 args.in_args[0].size = sizeof(inarg); 49 args.in_args[0].value = &inarg; 50 args.out_numargs = 1; 51 args.out_args[0].size = sizeof(*outargp); 52 args.out_args[0].value = outargp; 53 54 return fuse_simple_request(fm, &args); 55} 56 57struct fuse_release_args { 58 struct fuse_args args; 59 struct fuse_release_in inarg; 60 struct inode *inode; 61}; 62 63struct fuse_file *fuse_file_alloc(struct fuse_mount *fm) 64{ 65 struct fuse_file *ff; 66 67 ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL_ACCOUNT); 68 if (unlikely(!ff)) 69 return NULL; 70 71 ff->fm = fm; 72 ff->release_args = kzalloc(sizeof(*ff->release_args), 73 GFP_KERNEL_ACCOUNT); 74 if (!ff->release_args) { 75 kfree(ff); 76 return NULL; 77 } 78 79 INIT_LIST_HEAD(&ff->write_entry); 80 mutex_init(&ff->readdir.lock); 81 refcount_set(&ff->count, 1); 82 RB_CLEAR_NODE(&ff->polled_node); 83 init_waitqueue_head(&ff->poll_wait); 84 85 ff->kh = atomic64_inc_return(&fm->fc->khctr); 86 87 return ff; 88} 89 90void fuse_file_free(struct fuse_file *ff) 91{ 92 kfree(ff->release_args); 93 mutex_destroy(&ff->readdir.lock); 94 kfree(ff); 95} 96 97static struct fuse_file *fuse_file_get(struct fuse_file *ff) 98{ 99 refcount_inc(&ff->count); 100 return ff; 101} 102 103static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args, 104 int error) 105{ 106 struct fuse_release_args *ra = container_of(args, typeof(*ra), args); 107 108 iput(ra->inode); 109 kfree(ra); 110} 111 112static void fuse_file_put(struct fuse_file *ff, bool sync, bool isdir) 113{ 114 if (refcount_dec_and_test(&ff->count)) { 115 struct fuse_args *args = &ff->release_args->args; 116 117 if (isdir ? ff->fm->fc->no_opendir : ff->fm->fc->no_open) { 118 /* Do nothing when client does not implement 'open' */ 119 fuse_release_end(ff->fm, args, 0); 120 } else if (sync) { 121 fuse_simple_request(ff->fm, args); 122 fuse_release_end(ff->fm, args, 0); 123 } else { 124 args->end = fuse_release_end; 125 if (fuse_simple_background(ff->fm, args, 126 GFP_KERNEL | __GFP_NOFAIL)) 127 fuse_release_end(ff->fm, args, -ENOTCONN); 128 } 129 kfree(ff); 130 } 131} 132 133int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, 134 bool isdir) 135{ 136 struct fuse_conn *fc = fm->fc; 137 struct fuse_file *ff; 138 int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; 139 140 ff = fuse_file_alloc(fm); 141 if (!ff) 142 return -ENOMEM; 143 144 ff->fh = 0; 145 /* Default for no-open */ 146 ff->open_flags = FOPEN_KEEP_CACHE | (isdir ? FOPEN_CACHE_DIR : 0); 147 if (isdir ? !fc->no_opendir : !fc->no_open) { 148 struct fuse_open_out outarg; 149 int err; 150 151 err = fuse_send_open(fm, nodeid, file, opcode, &outarg); 152 if (!err) { 153 ff->fh = outarg.fh; 154 ff->open_flags = outarg.open_flags; 155 156 } else if (err != -ENOSYS) { 157 fuse_file_free(ff); 158 return err; 159 } else { 160 if (isdir) 161 fc->no_opendir = 1; 162 else 163 fc->no_open = 1; 164 } 165 } 166 167 if (isdir) 168 ff->open_flags &= ~FOPEN_DIRECT_IO; 169 170 ff->nodeid = nodeid; 171 file->private_data = ff; 172 173 return 0; 174} 175EXPORT_SYMBOL_GPL(fuse_do_open); 176 177static void fuse_link_write_file(struct file *file) 178{ 179 struct inode *inode = file_inode(file); 180 struct fuse_inode *fi = get_fuse_inode(inode); 181 struct fuse_file *ff = file->private_data; 182 /* 183 * file may be written through mmap, so chain it onto the 184 * inodes's write_file list 185 */ 186 spin_lock(&fi->lock); 187 if (list_empty(&ff->write_entry)) 188 list_add(&ff->write_entry, &fi->write_files); 189 spin_unlock(&fi->lock); 190} 191 192void fuse_finish_open(struct inode *inode, struct file *file) 193{ 194 struct fuse_file *ff = file->private_data; 195 struct fuse_conn *fc = get_fuse_conn(inode); 196 197 if (ff->open_flags & FOPEN_STREAM) 198 stream_open(inode, file); 199 else if (ff->open_flags & FOPEN_NONSEEKABLE) 200 nonseekable_open(inode, file); 201 202 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) { 203 struct fuse_inode *fi = get_fuse_inode(inode); 204 205 spin_lock(&fi->lock); 206 fi->attr_version = atomic64_inc_return(&fc->attr_version); 207 i_size_write(inode, 0); 208 spin_unlock(&fi->lock); 209 fuse_invalidate_attr(inode); 210 if (fc->writeback_cache) 211 file_update_time(file); 212 } 213 if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) 214 fuse_link_write_file(file); 215} 216 217int fuse_open_common(struct inode *inode, struct file *file, bool isdir) 218{ 219 struct fuse_mount *fm = get_fuse_mount(inode); 220 struct fuse_conn *fc = fm->fc; 221 int err; 222 bool is_wb_truncate = (file->f_flags & O_TRUNC) && 223 fc->atomic_o_trunc && 224 fc->writeback_cache; 225 bool dax_truncate = (file->f_flags & O_TRUNC) && 226 fc->atomic_o_trunc && FUSE_IS_DAX(inode); 227 228 if (fuse_is_bad(inode)) 229 return -EIO; 230 231 err = generic_file_open(inode, file); 232 if (err) 233 return err; 234 235 if (is_wb_truncate || dax_truncate) 236 inode_lock(inode); 237 238 if (dax_truncate) { 239 down_write(&get_fuse_inode(inode)->i_mmap_sem); 240 err = fuse_dax_break_layouts(inode, 0, 0); 241 if (err) 242 goto out_inode_unlock; 243 } 244 245 if (is_wb_truncate || dax_truncate) 246 fuse_set_nowrite(inode); 247 248 err = fuse_do_open(fm, get_node_id(inode), file, isdir); 249 if (!err) 250 fuse_finish_open(inode, file); 251 252 if (is_wb_truncate || dax_truncate) 253 fuse_release_nowrite(inode); 254 if (!err) { 255 struct fuse_file *ff = file->private_data; 256 257 if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) 258 truncate_pagecache(inode, 0); 259 else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) 260 invalidate_inode_pages2(inode->i_mapping); 261 } 262 if (dax_truncate) 263 up_write(&get_fuse_inode(inode)->i_mmap_sem); 264 265out_inode_unlock: 266 if (is_wb_truncate || dax_truncate) 267 inode_unlock(inode); 268 269 return err; 270} 271 272static void fuse_prepare_release(struct fuse_inode *fi, struct fuse_file *ff, 273 int flags, int opcode) 274{ 275 struct fuse_conn *fc = ff->fm->fc; 276 struct fuse_release_args *ra = ff->release_args; 277 278 /* Inode is NULL on error path of fuse_create_open() */ 279 if (likely(fi)) { 280 spin_lock(&fi->lock); 281 list_del(&ff->write_entry); 282 spin_unlock(&fi->lock); 283 } 284 spin_lock(&fc->lock); 285 if (!RB_EMPTY_NODE(&ff->polled_node)) 286 rb_erase(&ff->polled_node, &fc->polled_files); 287 spin_unlock(&fc->lock); 288 289 wake_up_interruptible_all(&ff->poll_wait); 290 291 ra->inarg.fh = ff->fh; 292 ra->inarg.flags = flags; 293 ra->args.in_numargs = 1; 294 ra->args.in_args[0].size = sizeof(struct fuse_release_in); 295 ra->args.in_args[0].value = &ra->inarg; 296 ra->args.opcode = opcode; 297 ra->args.nodeid = ff->nodeid; 298 ra->args.force = true; 299 ra->args.nocreds = true; 300} 301 302void fuse_release_common(struct file *file, bool isdir) 303{ 304 struct fuse_inode *fi = get_fuse_inode(file_inode(file)); 305 struct fuse_file *ff = file->private_data; 306 struct fuse_release_args *ra = ff->release_args; 307 int opcode = isdir ? FUSE_RELEASEDIR : FUSE_RELEASE; 308 309 fuse_prepare_release(fi, ff, file->f_flags, opcode); 310 311 if (ff->flock) { 312 ra->inarg.release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; 313 ra->inarg.lock_owner = fuse_lock_owner_id(ff->fm->fc, 314 (fl_owner_t) file); 315 } 316 /* Hold inode until release is finished */ 317 ra->inode = igrab(file_inode(file)); 318 319 /* 320 * Normally this will send the RELEASE request, however if 321 * some asynchronous READ or WRITE requests are outstanding, 322 * the sending will be delayed. 323 * 324 * Make the release synchronous if this is a fuseblk mount, 325 * synchronous RELEASE is allowed (and desirable) in this case 326 * because the server can be trusted not to screw up. 327 */ 328 fuse_file_put(ff, ff->fm->fc->destroy, isdir); 329} 330 331static int fuse_open(struct inode *inode, struct file *file) 332{ 333 return fuse_open_common(inode, file, false); 334} 335 336static int fuse_release(struct inode *inode, struct file *file) 337{ 338 struct fuse_conn *fc = get_fuse_conn(inode); 339 340 /* see fuse_vma_close() for !writeback_cache case */ 341 if (fc->writeback_cache) 342 write_inode_now(inode, 1); 343 344 fuse_release_common(file, false); 345 346 /* return value is ignored by VFS */ 347 return 0; 348} 349 350void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, int flags) 351{ 352 WARN_ON(refcount_read(&ff->count) > 1); 353 fuse_prepare_release(fi, ff, flags, FUSE_RELEASE); 354 /* 355 * iput(NULL) is a no-op and since the refcount is 1 and everything's 356 * synchronous, we are fine with not doing igrab() here" 357 */ 358 fuse_file_put(ff, true, false); 359} 360EXPORT_SYMBOL_GPL(fuse_sync_release); 361 362/* 363 * Scramble the ID space with XTEA, so that the value of the files_struct 364 * pointer is not exposed to userspace. 365 */ 366u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) 367{ 368 u32 *k = fc->scramble_key; 369 u64 v = (unsigned long) id; 370 u32 v0 = v; 371 u32 v1 = v >> 32; 372 u32 sum = 0; 373 int i; 374 375 for (i = 0; i < 32; i++) { 376 v0 += ((v1 << 4 ^ v1 >> 5) + v1) ^ (sum + k[sum & 3]); 377 sum += 0x9E3779B9; 378 v1 += ((v0 << 4 ^ v0 >> 5) + v0) ^ (sum + k[sum>>11 & 3]); 379 } 380 381 return (u64) v0 + ((u64) v1 << 32); 382} 383 384struct fuse_writepage_args { 385 struct fuse_io_args ia; 386 struct rb_node writepages_entry; 387 struct list_head queue_entry; 388 struct fuse_writepage_args *next; 389 struct inode *inode; 390}; 391 392static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi, 393 pgoff_t idx_from, pgoff_t idx_to) 394{ 395 struct rb_node *n; 396 397 n = fi->writepages.rb_node; 398 399 while (n) { 400 struct fuse_writepage_args *wpa; 401 pgoff_t curr_index; 402 403 wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry); 404 WARN_ON(get_fuse_inode(wpa->inode) != fi); 405 curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT; 406 if (idx_from >= curr_index + wpa->ia.ap.num_pages) 407 n = n->rb_right; 408 else if (idx_to < curr_index) 409 n = n->rb_left; 410 else 411 return wpa; 412 } 413 return NULL; 414} 415 416/* 417 * Check if any page in a range is under writeback 418 * 419 * This is currently done by walking the list of writepage requests 420 * for the inode, which can be pretty inefficient. 421 */ 422static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, 423 pgoff_t idx_to) 424{ 425 struct fuse_inode *fi = get_fuse_inode(inode); 426 bool found; 427 428 spin_lock(&fi->lock); 429 found = fuse_find_writeback(fi, idx_from, idx_to); 430 spin_unlock(&fi->lock); 431 432 return found; 433} 434 435static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) 436{ 437 return fuse_range_is_writeback(inode, index, index); 438} 439 440/* 441 * Wait for page writeback to be completed. 442 * 443 * Since fuse doesn't rely on the VM writeback tracking, this has to 444 * use some other means. 445 */ 446static void fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) 447{ 448 struct fuse_inode *fi = get_fuse_inode(inode); 449 450 wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index)); 451} 452 453/* 454 * Wait for all pending writepages on the inode to finish. 455 * 456 * This is currently done by blocking further writes with FUSE_NOWRITE 457 * and waiting for all sent writes to complete. 458 * 459 * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage 460 * could conflict with truncation. 461 */ 462static void fuse_sync_writes(struct inode *inode) 463{ 464 fuse_set_nowrite(inode); 465 fuse_release_nowrite(inode); 466} 467 468static int fuse_flush(struct file *file, fl_owner_t id) 469{ 470 struct inode *inode = file_inode(file); 471 struct fuse_mount *fm = get_fuse_mount(inode); 472 struct fuse_file *ff = file->private_data; 473 struct fuse_flush_in inarg; 474 FUSE_ARGS(args); 475 int err; 476 477 if (fuse_is_bad(inode)) 478 return -EIO; 479 480 err = write_inode_now(inode, 1); 481 if (err) 482 return err; 483 484 inode_lock(inode); 485 fuse_sync_writes(inode); 486 inode_unlock(inode); 487 488 err = filemap_check_errors(file->f_mapping); 489 if (err) 490 return err; 491 492 err = 0; 493 if (fm->fc->no_flush) 494 goto inval_attr_out; 495 496 memset(&inarg, 0, sizeof(inarg)); 497 inarg.fh = ff->fh; 498 inarg.lock_owner = fuse_lock_owner_id(fm->fc, id); 499 args.opcode = FUSE_FLUSH; 500 args.nodeid = get_node_id(inode); 501 args.in_numargs = 1; 502 args.in_args[0].size = sizeof(inarg); 503 args.in_args[0].value = &inarg; 504 args.force = true; 505 506 err = fuse_simple_request(fm, &args); 507 if (err == -ENOSYS) { 508 fm->fc->no_flush = 1; 509 err = 0; 510 } 511 512inval_attr_out: 513 /* 514 * In memory i_blocks is not maintained by fuse, if writeback cache is 515 * enabled, i_blocks from cached attr may not be accurate. 516 */ 517 if (!err && fm->fc->writeback_cache) 518 fuse_invalidate_attr(inode); 519 return err; 520} 521 522int fuse_fsync_common(struct file *file, loff_t start, loff_t end, 523 int datasync, int opcode) 524{ 525 struct inode *inode = file->f_mapping->host; 526 struct fuse_mount *fm = get_fuse_mount(inode); 527 struct fuse_file *ff = file->private_data; 528 FUSE_ARGS(args); 529 struct fuse_fsync_in inarg; 530 531 memset(&inarg, 0, sizeof(inarg)); 532 inarg.fh = ff->fh; 533 inarg.fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0; 534 args.opcode = opcode; 535 args.nodeid = get_node_id(inode); 536 args.in_numargs = 1; 537 args.in_args[0].size = sizeof(inarg); 538 args.in_args[0].value = &inarg; 539 return fuse_simple_request(fm, &args); 540} 541 542static int fuse_fsync(struct file *file, loff_t start, loff_t end, 543 int datasync) 544{ 545 struct inode *inode = file->f_mapping->host; 546 struct fuse_conn *fc = get_fuse_conn(inode); 547 int err; 548 549 if (fuse_is_bad(inode)) 550 return -EIO; 551 552 inode_lock(inode); 553 554 /* 555 * Start writeback against all dirty pages of the inode, then 556 * wait for all outstanding writes, before sending the FSYNC 557 * request. 558 */ 559 err = file_write_and_wait_range(file, start, end); 560 if (err) 561 goto out; 562 563 fuse_sync_writes(inode); 564 565 /* 566 * Due to implementation of fuse writeback 567 * file_write_and_wait_range() does not catch errors. 568 * We have to do this directly after fuse_sync_writes() 569 */ 570 err = file_check_and_advance_wb_err(file); 571 if (err) 572 goto out; 573 574 err = sync_inode_metadata(inode, 1); 575 if (err) 576 goto out; 577 578 if (fc->no_fsync) 579 goto out; 580 581 err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNC); 582 if (err == -ENOSYS) { 583 fc->no_fsync = 1; 584 err = 0; 585 } 586out: 587 inode_unlock(inode); 588 589 return err; 590} 591 592void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, 593 size_t count, int opcode) 594{ 595 struct fuse_file *ff = file->private_data; 596 struct fuse_args *args = &ia->ap.args; 597 598 ia->read.in.fh = ff->fh; 599 ia->read.in.offset = pos; 600 ia->read.in.size = count; 601 ia->read.in.flags = file->f_flags; 602 args->opcode = opcode; 603 args->nodeid = ff->nodeid; 604 args->in_numargs = 1; 605 args->in_args[0].size = sizeof(ia->read.in); 606 args->in_args[0].value = &ia->read.in; 607 args->out_argvar = true; 608 args->out_numargs = 1; 609 args->out_args[0].size = count; 610} 611 612static void fuse_release_user_pages(struct fuse_args_pages *ap, 613 bool should_dirty) 614{ 615 unsigned int i; 616 617 for (i = 0; i < ap->num_pages; i++) { 618 if (should_dirty) 619 set_page_dirty_lock(ap->pages[i]); 620 put_page(ap->pages[i]); 621 } 622} 623 624static void fuse_io_release(struct kref *kref) 625{ 626 kfree(container_of(kref, struct fuse_io_priv, refcnt)); 627} 628 629static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io) 630{ 631 if (io->err) 632 return io->err; 633 634 if (io->bytes >= 0 && io->write) 635 return -EIO; 636 637 return io->bytes < 0 ? io->size : io->bytes; 638} 639 640/** 641 * In case of short read, the caller sets 'pos' to the position of 642 * actual end of fuse request in IO request. Otherwise, if bytes_requested 643 * == bytes_transferred or rw == WRITE, the caller sets 'pos' to -1. 644 * 645 * An example: 646 * User requested DIO read of 64K. It was splitted into two 32K fuse requests, 647 * both submitted asynchronously. The first of them was ACKed by userspace as 648 * fully completed (req->out.args[0].size == 32K) resulting in pos == -1. The 649 * second request was ACKed as short, e.g. only 1K was read, resulting in 650 * pos == 33K. 651 * 652 * Thus, when all fuse requests are completed, the minimal non-negative 'pos' 653 * will be equal to the length of the longest contiguous fragment of 654 * transferred data starting from the beginning of IO request. 655 */ 656static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos) 657{ 658 int left; 659 660 spin_lock(&io->lock); 661 if (err) 662 io->err = io->err ? : err; 663 else if (pos >= 0 && (io->bytes < 0 || pos < io->bytes)) 664 io->bytes = pos; 665 666 left = --io->reqs; 667 if (!left && io->blocking) 668 complete(io->done); 669 spin_unlock(&io->lock); 670 671 if (!left && !io->blocking) { 672 ssize_t res = fuse_get_res_by_io(io); 673 674 if (res >= 0) { 675 struct inode *inode = file_inode(io->iocb->ki_filp); 676 struct fuse_conn *fc = get_fuse_conn(inode); 677 struct fuse_inode *fi = get_fuse_inode(inode); 678 679 spin_lock(&fi->lock); 680 fi->attr_version = atomic64_inc_return(&fc->attr_version); 681 spin_unlock(&fi->lock); 682 } 683 684 io->iocb->ki_complete(io->iocb, res, 0); 685 } 686 687 kref_put(&io->refcnt, fuse_io_release); 688} 689 690static struct fuse_io_args *fuse_io_alloc(struct fuse_io_priv *io, 691 unsigned int npages) 692{ 693 struct fuse_io_args *ia; 694 695 ia = kzalloc(sizeof(*ia), GFP_KERNEL); 696 if (ia) { 697 ia->io = io; 698 ia->ap.pages = fuse_pages_alloc(npages, GFP_KERNEL, 699 &ia->ap.descs); 700 if (!ia->ap.pages) { 701 kfree(ia); 702 ia = NULL; 703 } 704 } 705 return ia; 706} 707 708static void fuse_io_free(struct fuse_io_args *ia) 709{ 710 kfree(ia->ap.pages); 711 kfree(ia); 712} 713 714static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args, 715 int err) 716{ 717 struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); 718 struct fuse_io_priv *io = ia->io; 719 ssize_t pos = -1; 720 721 fuse_release_user_pages(&ia->ap, io->should_dirty); 722 723 if (err) { 724 /* Nothing */ 725 } else if (io->write) { 726 if (ia->write.out.size > ia->write.in.size) { 727 err = -EIO; 728 } else if (ia->write.in.size != ia->write.out.size) { 729 pos = ia->write.in.offset - io->offset + 730 ia->write.out.size; 731 } 732 } else { 733 u32 outsize = args->out_args[0].size; 734 735 if (ia->read.in.size != outsize) 736 pos = ia->read.in.offset - io->offset + outsize; 737 } 738 739 fuse_aio_complete(io, err, pos); 740 fuse_io_free(ia); 741} 742 743static ssize_t fuse_async_req_send(struct fuse_mount *fm, 744 struct fuse_io_args *ia, size_t num_bytes) 745{ 746 ssize_t err; 747 struct fuse_io_priv *io = ia->io; 748 749 spin_lock(&io->lock); 750 kref_get(&io->refcnt); 751 io->size += num_bytes; 752 io->reqs++; 753 spin_unlock(&io->lock); 754 755 ia->ap.args.end = fuse_aio_complete_req; 756 ia->ap.args.may_block = io->should_dirty; 757 err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL); 758 if (err) 759 fuse_aio_complete_req(fm, &ia->ap.args, err); 760 761 return num_bytes; 762} 763 764static ssize_t fuse_send_read(struct fuse_io_args *ia, loff_t pos, size_t count, 765 fl_owner_t owner) 766{ 767 struct file *file = ia->io->iocb->ki_filp; 768 struct fuse_file *ff = file->private_data; 769 struct fuse_mount *fm = ff->fm; 770 771 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 772 if (owner != NULL) { 773 ia->read.in.read_flags |= FUSE_READ_LOCKOWNER; 774 ia->read.in.lock_owner = fuse_lock_owner_id(fm->fc, owner); 775 } 776 777 if (ia->io->async) 778 return fuse_async_req_send(fm, ia, count); 779 780 return fuse_simple_request(fm, &ia->ap.args); 781} 782 783static void fuse_read_update_size(struct inode *inode, loff_t size, 784 u64 attr_ver) 785{ 786 struct fuse_conn *fc = get_fuse_conn(inode); 787 struct fuse_inode *fi = get_fuse_inode(inode); 788 789 spin_lock(&fi->lock); 790 if (attr_ver >= fi->attr_version && size < inode->i_size && 791 !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { 792 fi->attr_version = atomic64_inc_return(&fc->attr_version); 793 i_size_write(inode, size); 794 } 795 spin_unlock(&fi->lock); 796} 797 798static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read, 799 struct fuse_args_pages *ap) 800{ 801 struct fuse_conn *fc = get_fuse_conn(inode); 802 803 if (fc->writeback_cache) { 804 /* 805 * A hole in a file. Some data after the hole are in page cache, 806 * but have not reached the client fs yet. So, the hole is not 807 * present there. 808 */ 809 int i; 810 int start_idx = num_read >> PAGE_SHIFT; 811 size_t off = num_read & (PAGE_SIZE - 1); 812 813 for (i = start_idx; i < ap->num_pages; i++) { 814 zero_user_segment(ap->pages[i], off, PAGE_SIZE); 815 off = 0; 816 } 817 } else { 818 loff_t pos = page_offset(ap->pages[0]) + num_read; 819 fuse_read_update_size(inode, pos, attr_ver); 820 } 821} 822 823static int fuse_do_readpage(struct file *file, struct page *page) 824{ 825 struct inode *inode = page->mapping->host; 826 struct fuse_mount *fm = get_fuse_mount(inode); 827 loff_t pos = page_offset(page); 828 struct fuse_page_desc desc = { .length = PAGE_SIZE }; 829 struct fuse_io_args ia = { 830 .ap.args.page_zeroing = true, 831 .ap.args.out_pages = true, 832 .ap.num_pages = 1, 833 .ap.pages = &page, 834 .ap.descs = &desc, 835 }; 836 ssize_t res; 837 u64 attr_ver; 838 839 /* 840 * Page writeback can extend beyond the lifetime of the 841 * page-cache page, so make sure we read a properly synced 842 * page. 843 */ 844 fuse_wait_on_page_writeback(inode, page->index); 845 846 attr_ver = fuse_get_attr_version(fm->fc); 847 848 /* Don't overflow end offset */ 849 if (pos + (desc.length - 1) == LLONG_MAX) 850 desc.length--; 851 852 fuse_read_args_fill(&ia, file, pos, desc.length, FUSE_READ); 853 res = fuse_simple_request(fm, &ia.ap.args); 854 if (res < 0) 855 return res; 856 /* 857 * Short read means EOF. If file size is larger, truncate it 858 */ 859 if (res < desc.length) 860 fuse_short_read(inode, attr_ver, res, &ia.ap); 861 862 SetPageUptodate(page); 863 864 return 0; 865} 866 867static int fuse_readpage(struct file *file, struct page *page) 868{ 869 struct inode *inode = page->mapping->host; 870 int err; 871 872 err = -EIO; 873 if (fuse_is_bad(inode)) 874 goto out; 875 876 err = fuse_do_readpage(file, page); 877 fuse_invalidate_atime(inode); 878 out: 879 unlock_page(page); 880 return err; 881} 882 883static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, 884 int err) 885{ 886 int i; 887 struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); 888 struct fuse_args_pages *ap = &ia->ap; 889 size_t count = ia->read.in.size; 890 size_t num_read = args->out_args[0].size; 891 struct address_space *mapping = NULL; 892 893 for (i = 0; mapping == NULL && i < ap->num_pages; i++) 894 mapping = ap->pages[i]->mapping; 895 896 if (mapping) { 897 struct inode *inode = mapping->host; 898 899 /* 900 * Short read means EOF. If file size is larger, truncate it 901 */ 902 if (!err && num_read < count) 903 fuse_short_read(inode, ia->read.attr_ver, num_read, ap); 904 905 fuse_invalidate_atime(inode); 906 } 907 908 for (i = 0; i < ap->num_pages; i++) { 909 struct page *page = ap->pages[i]; 910 911 if (!err) 912 SetPageUptodate(page); 913 else 914 SetPageError(page); 915 unlock_page(page); 916 put_page(page); 917 } 918 if (ia->ff) 919 fuse_file_put(ia->ff, false, false); 920 921 fuse_io_free(ia); 922} 923 924static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) 925{ 926 struct fuse_file *ff = file->private_data; 927 struct fuse_mount *fm = ff->fm; 928 struct fuse_args_pages *ap = &ia->ap; 929 loff_t pos = page_offset(ap->pages[0]); 930 size_t count = ap->num_pages << PAGE_SHIFT; 931 ssize_t res; 932 int err; 933 934 ap->args.out_pages = true; 935 ap->args.page_zeroing = true; 936 ap->args.page_replace = true; 937 938 /* Don't overflow end offset */ 939 if (pos + (count - 1) == LLONG_MAX) { 940 count--; 941 ap->descs[ap->num_pages - 1].length--; 942 } 943 WARN_ON((loff_t) (pos + count) < 0); 944 945 fuse_read_args_fill(ia, file, pos, count, FUSE_READ); 946 ia->read.attr_ver = fuse_get_attr_version(fm->fc); 947 if (fm->fc->async_read) { 948 ia->ff = fuse_file_get(ff); 949 ap->args.end = fuse_readpages_end; 950 err = fuse_simple_background(fm, &ap->args, GFP_KERNEL); 951 if (!err) 952 return; 953 } else { 954 res = fuse_simple_request(fm, &ap->args); 955 err = res < 0 ? res : 0; 956 } 957 fuse_readpages_end(fm, &ap->args, err); 958} 959 960static void fuse_readahead(struct readahead_control *rac) 961{ 962 struct inode *inode = rac->mapping->host; 963 struct fuse_conn *fc = get_fuse_conn(inode); 964 unsigned int i, max_pages, nr_pages = 0; 965 966 if (fuse_is_bad(inode)) 967 return; 968 969 max_pages = min_t(unsigned int, fc->max_pages, 970 fc->max_read / PAGE_SIZE); 971 972 for (;;) { 973 struct fuse_io_args *ia; 974 struct fuse_args_pages *ap; 975 976 nr_pages = readahead_count(rac) - nr_pages; 977 if (nr_pages > max_pages) 978 nr_pages = max_pages; 979 if (nr_pages == 0) 980 break; 981 ia = fuse_io_alloc(NULL, nr_pages); 982 if (!ia) 983 return; 984 ap = &ia->ap; 985 nr_pages = __readahead_batch(rac, ap->pages, nr_pages); 986 for (i = 0; i < nr_pages; i++) { 987 fuse_wait_on_page_writeback(inode, 988 readahead_index(rac) + i); 989 ap->descs[i].length = PAGE_SIZE; 990 } 991 ap->num_pages = nr_pages; 992 fuse_send_readpages(ia, rac->file); 993 } 994} 995 996static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) 997{ 998 struct inode *inode = iocb->ki_filp->f_mapping->host; 999 struct fuse_conn *fc = get_fuse_conn(inode); 1000 1001 /* 1002 * In auto invalidate mode, always update attributes on read. 1003 * Otherwise, only update if we attempt to read past EOF (to ensure 1004 * i_size is up to date). 1005 */ 1006 if (fc->auto_inval_data || 1007 (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { 1008 int err; 1009 err = fuse_update_attributes(inode, iocb->ki_filp); 1010 if (err) 1011 return err; 1012 } 1013 1014 return generic_file_read_iter(iocb, to); 1015} 1016 1017static void fuse_write_args_fill(struct fuse_io_args *ia, struct fuse_file *ff, 1018 loff_t pos, size_t count) 1019{ 1020 struct fuse_args *args = &ia->ap.args; 1021 1022 ia->write.in.fh = ff->fh; 1023 ia->write.in.offset = pos; 1024 ia->write.in.size = count; 1025 args->opcode = FUSE_WRITE; 1026 args->nodeid = ff->nodeid; 1027 args->in_numargs = 2; 1028 if (ff->fm->fc->minor < 9) 1029 args->in_args[0].size = FUSE_COMPAT_WRITE_IN_SIZE; 1030 else 1031 args->in_args[0].size = sizeof(ia->write.in); 1032 args->in_args[0].value = &ia->write.in; 1033 args->in_args[1].size = count; 1034 args->out_numargs = 1; 1035 args->out_args[0].size = sizeof(ia->write.out); 1036 args->out_args[0].value = &ia->write.out; 1037} 1038 1039static unsigned int fuse_write_flags(struct kiocb *iocb) 1040{ 1041 unsigned int flags = iocb->ki_filp->f_flags; 1042 1043 if (iocb->ki_flags & IOCB_DSYNC) 1044 flags |= O_DSYNC; 1045 if (iocb->ki_flags & IOCB_SYNC) 1046 flags |= O_SYNC; 1047 1048 return flags; 1049} 1050 1051static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos, 1052 size_t count, fl_owner_t owner) 1053{ 1054 struct kiocb *iocb = ia->io->iocb; 1055 struct file *file = iocb->ki_filp; 1056 struct fuse_file *ff = file->private_data; 1057 struct fuse_mount *fm = ff->fm; 1058 struct fuse_write_in *inarg = &ia->write.in; 1059 ssize_t err; 1060 1061 fuse_write_args_fill(ia, ff, pos, count); 1062 inarg->flags = fuse_write_flags(iocb); 1063 if (owner != NULL) { 1064 inarg->write_flags |= FUSE_WRITE_LOCKOWNER; 1065 inarg->lock_owner = fuse_lock_owner_id(fm->fc, owner); 1066 } 1067 1068 if (ia->io->async) 1069 return fuse_async_req_send(fm, ia, count); 1070 1071 err = fuse_simple_request(fm, &ia->ap.args); 1072 if (!err && ia->write.out.size > count) 1073 err = -EIO; 1074 1075 return err ?: ia->write.out.size; 1076} 1077 1078bool fuse_write_update_size(struct inode *inode, loff_t pos) 1079{ 1080 struct fuse_conn *fc = get_fuse_conn(inode); 1081 struct fuse_inode *fi = get_fuse_inode(inode); 1082 bool ret = false; 1083 1084 spin_lock(&fi->lock); 1085 fi->attr_version = atomic64_inc_return(&fc->attr_version); 1086 if (pos > inode->i_size) { 1087 i_size_write(inode, pos); 1088 ret = true; 1089 } 1090 spin_unlock(&fi->lock); 1091 1092 return ret; 1093} 1094 1095static ssize_t fuse_send_write_pages(struct fuse_io_args *ia, 1096 struct kiocb *iocb, struct inode *inode, 1097 loff_t pos, size_t count) 1098{ 1099 struct fuse_args_pages *ap = &ia->ap; 1100 struct file *file = iocb->ki_filp; 1101 struct fuse_file *ff = file->private_data; 1102 struct fuse_mount *fm = ff->fm; 1103 unsigned int offset, i; 1104 bool short_write; 1105 int err; 1106 1107 for (i = 0; i < ap->num_pages; i++) 1108 fuse_wait_on_page_writeback(inode, ap->pages[i]->index); 1109 1110 fuse_write_args_fill(ia, ff, pos, count); 1111 ia->write.in.flags = fuse_write_flags(iocb); 1112 1113 err = fuse_simple_request(fm, &ap->args); 1114 if (!err && ia->write.out.size > count) 1115 err = -EIO; 1116 1117 short_write = ia->write.out.size < count; 1118 offset = ap->descs[0].offset; 1119 count = ia->write.out.size; 1120 for (i = 0; i < ap->num_pages; i++) { 1121 struct page *page = ap->pages[i]; 1122 1123 if (err) { 1124 ClearPageUptodate(page); 1125 } else { 1126 if (count >= PAGE_SIZE - offset) 1127 count -= PAGE_SIZE - offset; 1128 else { 1129 if (short_write) 1130 ClearPageUptodate(page); 1131 count = 0; 1132 } 1133 offset = 0; 1134 } 1135 if (ia->write.page_locked && (i == ap->num_pages - 1)) 1136 unlock_page(page); 1137 put_page(page); 1138 } 1139 1140 return err; 1141} 1142 1143static ssize_t fuse_fill_write_pages(struct fuse_io_args *ia, 1144 struct address_space *mapping, 1145 struct iov_iter *ii, loff_t pos, 1146 unsigned int max_pages) 1147{ 1148 struct fuse_args_pages *ap = &ia->ap; 1149 struct fuse_conn *fc = get_fuse_conn(mapping->host); 1150 unsigned offset = pos & (PAGE_SIZE - 1); 1151 size_t count = 0; 1152 int err; 1153 1154 ap->args.in_pages = true; 1155 ap->descs[0].offset = offset; 1156 1157 do { 1158 size_t tmp; 1159 struct page *page; 1160 pgoff_t index = pos >> PAGE_SHIFT; 1161 size_t bytes = min_t(size_t, PAGE_SIZE - offset, 1162 iov_iter_count(ii)); 1163 1164 bytes = min_t(size_t, bytes, fc->max_write - count); 1165 1166 again: 1167 err = -EFAULT; 1168 if (iov_iter_fault_in_readable(ii, bytes)) 1169 break; 1170 1171 err = -ENOMEM; 1172 page = grab_cache_page_write_begin(mapping, index, 0); 1173 if (!page) 1174 break; 1175 1176 if (mapping_writably_mapped(mapping)) 1177 flush_dcache_page(page); 1178 1179 tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); 1180 flush_dcache_page(page); 1181 1182 iov_iter_advance(ii, tmp); 1183 if (!tmp) { 1184 unlock_page(page); 1185 put_page(page); 1186 bytes = min(bytes, iov_iter_single_seg_count(ii)); 1187 goto again; 1188 } 1189 1190 err = 0; 1191 ap->pages[ap->num_pages] = page; 1192 ap->descs[ap->num_pages].length = tmp; 1193 ap->num_pages++; 1194 1195 count += tmp; 1196 pos += tmp; 1197 offset += tmp; 1198 if (offset == PAGE_SIZE) 1199 offset = 0; 1200 1201 /* If we copied full page, mark it uptodate */ 1202 if (tmp == PAGE_SIZE) 1203 SetPageUptodate(page); 1204 1205 if (PageUptodate(page)) { 1206 unlock_page(page); 1207 } else { 1208 ia->write.page_locked = true; 1209 break; 1210 } 1211 if (!fc->big_writes) 1212 break; 1213 } while (iov_iter_count(ii) && count < fc->max_write && 1214 ap->num_pages < max_pages && offset == 0); 1215 1216 return count > 0 ? count : err; 1217} 1218 1219static inline unsigned int fuse_wr_pages(loff_t pos, size_t len, 1220 unsigned int max_pages) 1221{ 1222 return min_t(unsigned int, 1223 ((pos + len - 1) >> PAGE_SHIFT) - 1224 (pos >> PAGE_SHIFT) + 1, 1225 max_pages); 1226} 1227 1228static ssize_t fuse_perform_write(struct kiocb *iocb, 1229 struct address_space *mapping, 1230 struct iov_iter *ii, loff_t pos) 1231{ 1232 struct inode *inode = mapping->host; 1233 struct fuse_conn *fc = get_fuse_conn(inode); 1234 struct fuse_inode *fi = get_fuse_inode(inode); 1235 int err = 0; 1236 ssize_t res = 0; 1237 1238 if (inode->i_size < pos + iov_iter_count(ii)) 1239 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1240 1241 do { 1242 ssize_t count; 1243 struct fuse_io_args ia = {}; 1244 struct fuse_args_pages *ap = &ia.ap; 1245 unsigned int nr_pages = fuse_wr_pages(pos, iov_iter_count(ii), 1246 fc->max_pages); 1247 1248 ap->pages = fuse_pages_alloc(nr_pages, GFP_KERNEL, &ap->descs); 1249 if (!ap->pages) { 1250 err = -ENOMEM; 1251 break; 1252 } 1253 1254 count = fuse_fill_write_pages(&ia, mapping, ii, pos, nr_pages); 1255 if (count <= 0) { 1256 err = count; 1257 } else { 1258 err = fuse_send_write_pages(&ia, iocb, inode, 1259 pos, count); 1260 if (!err) { 1261 size_t num_written = ia.write.out.size; 1262 1263 res += num_written; 1264 pos += num_written; 1265 1266 /* break out of the loop on short write */ 1267 if (num_written != count) 1268 err = -EIO; 1269 } 1270 } 1271 kfree(ap->pages); 1272 } while (!err && iov_iter_count(ii)); 1273 1274 if (res > 0) 1275 fuse_write_update_size(inode, pos); 1276 1277 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 1278 fuse_invalidate_attr(inode); 1279 1280 return res > 0 ? res : err; 1281} 1282 1283static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from) 1284{ 1285 struct file *file = iocb->ki_filp; 1286 struct address_space *mapping = file->f_mapping; 1287 ssize_t written = 0; 1288 ssize_t written_buffered = 0; 1289 struct inode *inode = mapping->host; 1290 ssize_t err; 1291 loff_t endbyte = 0; 1292 1293 if (get_fuse_conn(inode)->writeback_cache) { 1294 /* Update size (EOF optimization) and mode (SUID clearing) */ 1295 err = fuse_update_attributes(mapping->host, file); 1296 if (err) 1297 return err; 1298 1299 return generic_file_write_iter(iocb, from); 1300 } 1301 1302 inode_lock(inode); 1303 1304 /* We can write back this queue in page reclaim */ 1305 current->backing_dev_info = inode_to_bdi(inode); 1306 1307 err = generic_write_checks(iocb, from); 1308 if (err <= 0) 1309 goto out; 1310 1311 err = file_remove_privs(file); 1312 if (err) 1313 goto out; 1314 1315 err = file_update_time(file); 1316 if (err) 1317 goto out; 1318 1319 if (iocb->ki_flags & IOCB_DIRECT) { 1320 loff_t pos = iocb->ki_pos; 1321 written = generic_file_direct_write(iocb, from); 1322 if (written < 0 || !iov_iter_count(from)) 1323 goto out; 1324 1325 pos += written; 1326 1327 written_buffered = fuse_perform_write(iocb, mapping, from, pos); 1328 if (written_buffered < 0) { 1329 err = written_buffered; 1330 goto out; 1331 } 1332 endbyte = pos + written_buffered - 1; 1333 1334 err = filemap_write_and_wait_range(file->f_mapping, pos, 1335 endbyte); 1336 if (err) 1337 goto out; 1338 1339 invalidate_mapping_pages(file->f_mapping, 1340 pos >> PAGE_SHIFT, 1341 endbyte >> PAGE_SHIFT); 1342 1343 written += written_buffered; 1344 iocb->ki_pos = pos + written_buffered; 1345 } else { 1346 written = fuse_perform_write(iocb, mapping, from, iocb->ki_pos); 1347 if (written >= 0) 1348 iocb->ki_pos += written; 1349 } 1350out: 1351 current->backing_dev_info = NULL; 1352 inode_unlock(inode); 1353 if (written > 0) 1354 written = generic_write_sync(iocb, written); 1355 1356 return written ? written : err; 1357} 1358 1359static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs, 1360 unsigned int index, 1361 unsigned int nr_pages) 1362{ 1363 int i; 1364 1365 for (i = index; i < index + nr_pages; i++) 1366 descs[i].length = PAGE_SIZE - descs[i].offset; 1367} 1368 1369static inline unsigned long fuse_get_user_addr(const struct iov_iter *ii) 1370{ 1371 return (unsigned long)ii->iov->iov_base + ii->iov_offset; 1372} 1373 1374static inline size_t fuse_get_frag_size(const struct iov_iter *ii, 1375 size_t max_size) 1376{ 1377 return min(iov_iter_single_seg_count(ii), max_size); 1378} 1379 1380static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, 1381 size_t *nbytesp, int write, 1382 unsigned int max_pages) 1383{ 1384 size_t nbytes = 0; /* # bytes already packed in req */ 1385 ssize_t ret = 0; 1386 1387 /* Special case for kernel I/O: can copy directly into the buffer */ 1388 if (iov_iter_is_kvec(ii)) { 1389 unsigned long user_addr = fuse_get_user_addr(ii); 1390 size_t frag_size = fuse_get_frag_size(ii, *nbytesp); 1391 1392 if (write) 1393 ap->args.in_args[1].value = (void *) user_addr; 1394 else 1395 ap->args.out_args[0].value = (void *) user_addr; 1396 1397 iov_iter_advance(ii, frag_size); 1398 *nbytesp = frag_size; 1399 return 0; 1400 } 1401 1402 while (nbytes < *nbytesp && ap->num_pages < max_pages) { 1403 unsigned npages; 1404 size_t start; 1405 ret = iov_iter_get_pages(ii, &ap->pages[ap->num_pages], 1406 *nbytesp - nbytes, 1407 max_pages - ap->num_pages, 1408 &start); 1409 if (ret < 0) 1410 break; 1411 1412 iov_iter_advance(ii, ret); 1413 nbytes += ret; 1414 1415 ret += start; 1416 npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE; 1417 1418 ap->descs[ap->num_pages].offset = start; 1419 fuse_page_descs_length_init(ap->descs, ap->num_pages, npages); 1420 1421 ap->num_pages += npages; 1422 ap->descs[ap->num_pages - 1].length -= 1423 (PAGE_SIZE - ret) & (PAGE_SIZE - 1); 1424 } 1425 1426 ap->args.user_pages = true; 1427 if (write) 1428 ap->args.in_pages = true; 1429 else 1430 ap->args.out_pages = true; 1431 1432 *nbytesp = nbytes; 1433 1434 return ret < 0 ? ret : 0; 1435} 1436 1437ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, 1438 loff_t *ppos, int flags) 1439{ 1440 int write = flags & FUSE_DIO_WRITE; 1441 int cuse = flags & FUSE_DIO_CUSE; 1442 struct file *file = io->iocb->ki_filp; 1443 struct inode *inode = file->f_mapping->host; 1444 struct fuse_file *ff = file->private_data; 1445 struct fuse_conn *fc = ff->fm->fc; 1446 size_t nmax = write ? fc->max_write : fc->max_read; 1447 loff_t pos = *ppos; 1448 size_t count = iov_iter_count(iter); 1449 pgoff_t idx_from = pos >> PAGE_SHIFT; 1450 pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT; 1451 ssize_t res = 0; 1452 int err = 0; 1453 struct fuse_io_args *ia; 1454 unsigned int max_pages; 1455 1456 max_pages = iov_iter_npages(iter, fc->max_pages); 1457 ia = fuse_io_alloc(io, max_pages); 1458 if (!ia) 1459 return -ENOMEM; 1460 1461 ia->io = io; 1462 if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { 1463 if (!write) 1464 inode_lock(inode); 1465 fuse_sync_writes(inode); 1466 if (!write) 1467 inode_unlock(inode); 1468 } 1469 1470 io->should_dirty = !write && iter_is_iovec(iter); 1471 while (count) { 1472 ssize_t nres; 1473 fl_owner_t owner = current->files; 1474 size_t nbytes = min(count, nmax); 1475 1476 err = fuse_get_user_pages(&ia->ap, iter, &nbytes, write, 1477 max_pages); 1478 if (err && !nbytes) 1479 break; 1480 1481 if (write) { 1482 if (!capable(CAP_FSETID)) 1483 ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV; 1484 1485 nres = fuse_send_write(ia, pos, nbytes, owner); 1486 } else { 1487 nres = fuse_send_read(ia, pos, nbytes, owner); 1488 } 1489 1490 if (!io->async || nres < 0) { 1491 fuse_release_user_pages(&ia->ap, io->should_dirty); 1492 fuse_io_free(ia); 1493 } 1494 ia = NULL; 1495 if (nres < 0) { 1496 iov_iter_revert(iter, nbytes); 1497 err = nres; 1498 break; 1499 } 1500 WARN_ON(nres > nbytes); 1501 1502 count -= nres; 1503 res += nres; 1504 pos += nres; 1505 if (nres != nbytes) { 1506 iov_iter_revert(iter, nbytes - nres); 1507 break; 1508 } 1509 if (count) { 1510 max_pages = iov_iter_npages(iter, fc->max_pages); 1511 ia = fuse_io_alloc(io, max_pages); 1512 if (!ia) 1513 break; 1514 } 1515 } 1516 if (ia) 1517 fuse_io_free(ia); 1518 if (res > 0) 1519 *ppos = pos; 1520 1521 return res > 0 ? res : err; 1522} 1523EXPORT_SYMBOL_GPL(fuse_direct_io); 1524 1525static ssize_t __fuse_direct_read(struct fuse_io_priv *io, 1526 struct iov_iter *iter, 1527 loff_t *ppos) 1528{ 1529 ssize_t res; 1530 struct inode *inode = file_inode(io->iocb->ki_filp); 1531 1532 res = fuse_direct_io(io, iter, ppos, 0); 1533 1534 fuse_invalidate_atime(inode); 1535 1536 return res; 1537} 1538 1539static ssize_t fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter); 1540 1541static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to) 1542{ 1543 ssize_t res; 1544 1545 if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { 1546 res = fuse_direct_IO(iocb, to); 1547 } else { 1548 struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 1549 1550 res = __fuse_direct_read(&io, to, &iocb->ki_pos); 1551 } 1552 1553 return res; 1554} 1555 1556static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from) 1557{ 1558 struct inode *inode = file_inode(iocb->ki_filp); 1559 struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); 1560 ssize_t res; 1561 1562 /* Don't allow parallel writes to the same file */ 1563 inode_lock(inode); 1564 res = generic_write_checks(iocb, from); 1565 if (res > 0) { 1566 if (!is_sync_kiocb(iocb) && iocb->ki_flags & IOCB_DIRECT) { 1567 res = fuse_direct_IO(iocb, from); 1568 } else { 1569 res = fuse_direct_io(&io, from, &iocb->ki_pos, 1570 FUSE_DIO_WRITE); 1571 } 1572 } 1573 fuse_invalidate_attr(inode); 1574 if (res > 0) 1575 fuse_write_update_size(inode, iocb->ki_pos); 1576 inode_unlock(inode); 1577 1578 return res; 1579} 1580 1581static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) 1582{ 1583 struct file *file = iocb->ki_filp; 1584 struct fuse_file *ff = file->private_data; 1585 struct inode *inode = file_inode(file); 1586 1587 if (fuse_is_bad(inode)) 1588 return -EIO; 1589 1590 if (FUSE_IS_DAX(inode)) 1591 return fuse_dax_read_iter(iocb, to); 1592 1593 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1594 return fuse_cache_read_iter(iocb, to); 1595 else 1596 return fuse_direct_read_iter(iocb, to); 1597} 1598 1599static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) 1600{ 1601 struct file *file = iocb->ki_filp; 1602 struct fuse_file *ff = file->private_data; 1603 struct inode *inode = file_inode(file); 1604 1605 if (fuse_is_bad(inode)) 1606 return -EIO; 1607 1608 if (FUSE_IS_DAX(inode)) 1609 return fuse_dax_write_iter(iocb, from); 1610 1611 if (!(ff->open_flags & FOPEN_DIRECT_IO)) 1612 return fuse_cache_write_iter(iocb, from); 1613 else 1614 return fuse_direct_write_iter(iocb, from); 1615} 1616 1617static void fuse_writepage_free(struct fuse_writepage_args *wpa) 1618{ 1619 struct fuse_args_pages *ap = &wpa->ia.ap; 1620 int i; 1621 1622 for (i = 0; i < ap->num_pages; i++) 1623 __free_page(ap->pages[i]); 1624 1625 if (wpa->ia.ff) 1626 fuse_file_put(wpa->ia.ff, false, false); 1627 1628 kfree(ap->pages); 1629 kfree(wpa); 1630} 1631 1632static void fuse_writepage_finish(struct fuse_mount *fm, 1633 struct fuse_writepage_args *wpa) 1634{ 1635 struct fuse_args_pages *ap = &wpa->ia.ap; 1636 struct inode *inode = wpa->inode; 1637 struct fuse_inode *fi = get_fuse_inode(inode); 1638 struct backing_dev_info *bdi = inode_to_bdi(inode); 1639 int i; 1640 1641 for (i = 0; i < ap->num_pages; i++) { 1642 dec_wb_stat(&bdi->wb, WB_WRITEBACK); 1643 dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP); 1644 wb_writeout_inc(&bdi->wb); 1645 } 1646 wake_up(&fi->page_waitq); 1647} 1648 1649/* Called under fi->lock, may release and reacquire it */ 1650static void fuse_send_writepage(struct fuse_mount *fm, 1651 struct fuse_writepage_args *wpa, loff_t size) 1652__releases(fi->lock) 1653__acquires(fi->lock) 1654{ 1655 struct fuse_writepage_args *aux, *next; 1656 struct fuse_inode *fi = get_fuse_inode(wpa->inode); 1657 struct fuse_write_in *inarg = &wpa->ia.write.in; 1658 struct fuse_args *args = &wpa->ia.ap.args; 1659 __u64 data_size = wpa->ia.ap.num_pages * PAGE_SIZE; 1660 int err; 1661 1662 fi->writectr++; 1663 if (inarg->offset + data_size <= size) { 1664 inarg->size = data_size; 1665 } else if (inarg->offset < size) { 1666 inarg->size = size - inarg->offset; 1667 } else { 1668 /* Got truncated off completely */ 1669 goto out_free; 1670 } 1671 1672 args->in_args[1].size = inarg->size; 1673 args->force = true; 1674 args->nocreds = true; 1675 1676 err = fuse_simple_background(fm, args, GFP_ATOMIC); 1677 if (err == -ENOMEM) { 1678 spin_unlock(&fi->lock); 1679 err = fuse_simple_background(fm, args, GFP_NOFS | __GFP_NOFAIL); 1680 spin_lock(&fi->lock); 1681 } 1682 1683 /* Fails on broken connection only */ 1684 if (unlikely(err)) 1685 goto out_free; 1686 1687 return; 1688 1689 out_free: 1690 fi->writectr--; 1691 rb_erase(&wpa->writepages_entry, &fi->writepages); 1692 fuse_writepage_finish(fm, wpa); 1693 spin_unlock(&fi->lock); 1694 1695 /* After fuse_writepage_finish() aux request list is private */ 1696 for (aux = wpa->next; aux; aux = next) { 1697 next = aux->next; 1698 aux->next = NULL; 1699 fuse_writepage_free(aux); 1700 } 1701 1702 fuse_writepage_free(wpa); 1703 spin_lock(&fi->lock); 1704} 1705 1706/* 1707 * If fi->writectr is positive (no truncate or fsync going on) send 1708 * all queued writepage requests. 1709 * 1710 * Called with fi->lock 1711 */ 1712void fuse_flush_writepages(struct inode *inode) 1713__releases(fi->lock) 1714__acquires(fi->lock) 1715{ 1716 struct fuse_mount *fm = get_fuse_mount(inode); 1717 struct fuse_inode *fi = get_fuse_inode(inode); 1718 loff_t crop = i_size_read(inode); 1719 struct fuse_writepage_args *wpa; 1720 1721 while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) { 1722 wpa = list_entry(fi->queued_writes.next, 1723 struct fuse_writepage_args, queue_entry); 1724 list_del_init(&wpa->queue_entry); 1725 fuse_send_writepage(fm, wpa, crop); 1726 } 1727} 1728 1729static struct fuse_writepage_args *fuse_insert_writeback(struct rb_root *root, 1730 struct fuse_writepage_args *wpa) 1731{ 1732 pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT; 1733 pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1; 1734 struct rb_node **p = &root->rb_node; 1735 struct rb_node *parent = NULL; 1736 1737 WARN_ON(!wpa->ia.ap.num_pages); 1738 while (*p) { 1739 struct fuse_writepage_args *curr; 1740 pgoff_t curr_index; 1741 1742 parent = *p; 1743 curr = rb_entry(parent, struct fuse_writepage_args, 1744 writepages_entry); 1745 WARN_ON(curr->inode != wpa->inode); 1746 curr_index = curr->ia.write.in.offset >> PAGE_SHIFT; 1747 1748 if (idx_from >= curr_index + curr->ia.ap.num_pages) 1749 p = &(*p)->rb_right; 1750 else if (idx_to < curr_index) 1751 p = &(*p)->rb_left; 1752 else 1753 return curr; 1754 } 1755 1756 rb_link_node(&wpa->writepages_entry, parent, p); 1757 rb_insert_color(&wpa->writepages_entry, root); 1758 return NULL; 1759} 1760 1761static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa) 1762{ 1763 WARN_ON(fuse_insert_writeback(root, wpa)); 1764} 1765 1766static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, 1767 int error) 1768{ 1769 struct fuse_writepage_args *wpa = 1770 container_of(args, typeof(*wpa), ia.ap.args); 1771 struct inode *inode = wpa->inode; 1772 struct fuse_inode *fi = get_fuse_inode(inode); 1773 struct fuse_conn *fc = get_fuse_conn(inode); 1774 1775 mapping_set_error(inode->i_mapping, error); 1776 /* 1777 * A writeback finished and this might have updated mtime/ctime on 1778 * server making local mtime/ctime stale. Hence invalidate attrs. 1779 * Do this only if writeback_cache is not enabled. If writeback_cache 1780 * is enabled, we trust local ctime/mtime. 1781 */ 1782 if (!fc->writeback_cache) 1783 fuse_invalidate_attr(inode); 1784 spin_lock(&fi->lock); 1785 rb_erase(&wpa->writepages_entry, &fi->writepages); 1786 while (wpa->next) { 1787 struct fuse_mount *fm = get_fuse_mount(inode); 1788 struct fuse_write_in *inarg = &wpa->ia.write.in; 1789 struct fuse_writepage_args *next = wpa->next; 1790 1791 wpa->next = next->next; 1792 next->next = NULL; 1793 next->ia.ff = fuse_file_get(wpa->ia.ff); 1794 tree_insert(&fi->writepages, next); 1795 1796 /* 1797 * Skip fuse_flush_writepages() to make it easy to crop requests 1798 * based on primary request size. 1799 * 1800 * 1st case (trivial): there are no concurrent activities using 1801 * fuse_set/release_nowrite. Then we're on safe side because 1802 * fuse_flush_writepages() would call fuse_send_writepage() 1803 * anyway. 1804 * 1805 * 2nd case: someone called fuse_set_nowrite and it is waiting 1806 * now for completion of all in-flight requests. This happens 1807 * rarely and no more than once per page, so this should be 1808 * okay. 1809 * 1810 * 3rd case: someone (e.g. fuse_do_setattr()) is in the middle 1811 * of fuse_set_nowrite..fuse_release_nowrite section. The fact 1812 * that fuse_set_nowrite returned implies that all in-flight 1813 * requests were completed along with all of their secondary 1814 * requests. Further primary requests are blocked by negative 1815 * writectr. Hence there cannot be any in-flight requests and 1816 * no invocations of fuse_writepage_end() while we're in 1817 * fuse_set_nowrite..fuse_release_nowrite section. 1818 */ 1819 fuse_send_writepage(fm, next, inarg->offset + inarg->size); 1820 } 1821 fi->writectr--; 1822 fuse_writepage_finish(fm, wpa); 1823 spin_unlock(&fi->lock); 1824 fuse_writepage_free(wpa); 1825} 1826 1827static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc, 1828 struct fuse_inode *fi) 1829{ 1830 struct fuse_file *ff = NULL; 1831 1832 spin_lock(&fi->lock); 1833 if (!list_empty(&fi->write_files)) { 1834 ff = list_entry(fi->write_files.next, struct fuse_file, 1835 write_entry); 1836 fuse_file_get(ff); 1837 } 1838 spin_unlock(&fi->lock); 1839 1840 return ff; 1841} 1842 1843static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc, 1844 struct fuse_inode *fi) 1845{ 1846 struct fuse_file *ff = __fuse_write_file_get(fc, fi); 1847 WARN_ON(!ff); 1848 return ff; 1849} 1850 1851int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) 1852{ 1853 struct fuse_conn *fc = get_fuse_conn(inode); 1854 struct fuse_inode *fi = get_fuse_inode(inode); 1855 struct fuse_file *ff; 1856 int err; 1857 1858 /* 1859 * Inode is always written before the last reference is dropped and 1860 * hence this should not be reached from reclaim. 1861 * 1862 * Writing back the inode from reclaim can deadlock if the request 1863 * processing itself needs an allocation. Allocations triggering 1864 * reclaim while serving a request can't be prevented, because it can 1865 * involve any number of unrelated userspace processes. 1866 */ 1867 WARN_ON(wbc->for_reclaim); 1868 1869 ff = __fuse_write_file_get(fc, fi); 1870 err = fuse_flush_times(inode, ff); 1871 if (ff) 1872 fuse_file_put(ff, false, false); 1873 1874 return err; 1875} 1876 1877static struct fuse_writepage_args *fuse_writepage_args_alloc(void) 1878{ 1879 struct fuse_writepage_args *wpa; 1880 struct fuse_args_pages *ap; 1881 1882 wpa = kzalloc(sizeof(*wpa), GFP_NOFS); 1883 if (wpa) { 1884 ap = &wpa->ia.ap; 1885 ap->num_pages = 0; 1886 ap->pages = fuse_pages_alloc(1, GFP_NOFS, &ap->descs); 1887 if (!ap->pages) { 1888 kfree(wpa); 1889 wpa = NULL; 1890 } 1891 } 1892 return wpa; 1893 1894} 1895 1896static int fuse_writepage_locked(struct page *page) 1897{ 1898 struct address_space *mapping = page->mapping; 1899 struct inode *inode = mapping->host; 1900 struct fuse_conn *fc = get_fuse_conn(inode); 1901 struct fuse_inode *fi = get_fuse_inode(inode); 1902 struct fuse_writepage_args *wpa; 1903 struct fuse_args_pages *ap; 1904 struct page *tmp_page; 1905 int error = -ENOMEM; 1906 1907 set_page_writeback(page); 1908 1909 wpa = fuse_writepage_args_alloc(); 1910 if (!wpa) 1911 goto err; 1912 ap = &wpa->ia.ap; 1913 1914 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 1915 if (!tmp_page) 1916 goto err_free; 1917 1918 error = -EIO; 1919 wpa->ia.ff = fuse_write_file_get(fc, fi); 1920 if (!wpa->ia.ff) 1921 goto err_nofile; 1922 1923 fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0); 1924 1925 copy_highpage(tmp_page, page); 1926 wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; 1927 wpa->next = NULL; 1928 ap->args.in_pages = true; 1929 ap->num_pages = 1; 1930 ap->pages[0] = tmp_page; 1931 ap->descs[0].offset = 0; 1932 ap->descs[0].length = PAGE_SIZE; 1933 ap->args.end = fuse_writepage_end; 1934 wpa->inode = inode; 1935 1936 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); 1937 inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); 1938 1939 spin_lock(&fi->lock); 1940 tree_insert(&fi->writepages, wpa); 1941 list_add_tail(&wpa->queue_entry, &fi->queued_writes); 1942 fuse_flush_writepages(inode); 1943 spin_unlock(&fi->lock); 1944 1945 end_page_writeback(page); 1946 1947 return 0; 1948 1949err_nofile: 1950 __free_page(tmp_page); 1951err_free: 1952 kfree(wpa); 1953err: 1954 mapping_set_error(page->mapping, error); 1955 end_page_writeback(page); 1956 return error; 1957} 1958 1959static int fuse_writepage(struct page *page, struct writeback_control *wbc) 1960{ 1961 int err; 1962 1963 if (fuse_page_is_writeback(page->mapping->host, page->index)) { 1964 /* 1965 * ->writepages() should be called for sync() and friends. We 1966 * should only get here on direct reclaim and then we are 1967 * allowed to skip a page which is already in flight 1968 */ 1969 WARN_ON(wbc->sync_mode == WB_SYNC_ALL); 1970 1971 redirty_page_for_writepage(wbc, page); 1972 unlock_page(page); 1973 return 0; 1974 } 1975 1976 err = fuse_writepage_locked(page); 1977 unlock_page(page); 1978 1979 return err; 1980} 1981 1982struct fuse_fill_wb_data { 1983 struct fuse_writepage_args *wpa; 1984 struct fuse_file *ff; 1985 struct inode *inode; 1986 struct page **orig_pages; 1987 unsigned int max_pages; 1988}; 1989 1990static bool fuse_pages_realloc(struct fuse_fill_wb_data *data) 1991{ 1992 struct fuse_args_pages *ap = &data->wpa->ia.ap; 1993 struct fuse_conn *fc = get_fuse_conn(data->inode); 1994 struct page **pages; 1995 struct fuse_page_desc *descs; 1996 unsigned int npages = min_t(unsigned int, 1997 max_t(unsigned int, data->max_pages * 2, 1998 FUSE_DEFAULT_MAX_PAGES_PER_REQ), 1999 fc->max_pages); 2000 WARN_ON(npages <= data->max_pages); 2001 2002 pages = fuse_pages_alloc(npages, GFP_NOFS, &descs); 2003 if (!pages) 2004 return false; 2005 2006 memcpy(pages, ap->pages, sizeof(struct page *) * ap->num_pages); 2007 memcpy(descs, ap->descs, sizeof(struct fuse_page_desc) * ap->num_pages); 2008 kfree(ap->pages); 2009 ap->pages = pages; 2010 ap->descs = descs; 2011 data->max_pages = npages; 2012 2013 return true; 2014} 2015 2016static void fuse_writepages_send(struct fuse_fill_wb_data *data) 2017{ 2018 struct fuse_writepage_args *wpa = data->wpa; 2019 struct inode *inode = data->inode; 2020 struct fuse_inode *fi = get_fuse_inode(inode); 2021 int num_pages = wpa->ia.ap.num_pages; 2022 int i; 2023 2024 wpa->ia.ff = fuse_file_get(data->ff); 2025 spin_lock(&fi->lock); 2026 list_add_tail(&wpa->queue_entry, &fi->queued_writes); 2027 fuse_flush_writepages(inode); 2028 spin_unlock(&fi->lock); 2029 2030 for (i = 0; i < num_pages; i++) 2031 end_page_writeback(data->orig_pages[i]); 2032} 2033 2034/* 2035 * Check under fi->lock if the page is under writeback, and insert it onto the 2036 * rb_tree if not. Otherwise iterate auxiliary write requests, to see if there's 2037 * one already added for a page at this offset. If there's none, then insert 2038 * this new request onto the auxiliary list, otherwise reuse the existing one by 2039 * swapping the new temp page with the old one. 2040 */ 2041static bool fuse_writepage_add(struct fuse_writepage_args *new_wpa, 2042 struct page *page) 2043{ 2044 struct fuse_inode *fi = get_fuse_inode(new_wpa->inode); 2045 struct fuse_writepage_args *tmp; 2046 struct fuse_writepage_args *old_wpa; 2047 struct fuse_args_pages *new_ap = &new_wpa->ia.ap; 2048 2049 WARN_ON(new_ap->num_pages != 0); 2050 new_ap->num_pages = 1; 2051 2052 spin_lock(&fi->lock); 2053 old_wpa = fuse_insert_writeback(&fi->writepages, new_wpa); 2054 if (!old_wpa) { 2055 spin_unlock(&fi->lock); 2056 return true; 2057 } 2058 2059 for (tmp = old_wpa->next; tmp; tmp = tmp->next) { 2060 pgoff_t curr_index; 2061 2062 WARN_ON(tmp->inode != new_wpa->inode); 2063 curr_index = tmp->ia.write.in.offset >> PAGE_SHIFT; 2064 if (curr_index == page->index) { 2065 WARN_ON(tmp->ia.ap.num_pages != 1); 2066 swap(tmp->ia.ap.pages[0], new_ap->pages[0]); 2067 break; 2068 } 2069 } 2070 2071 if (!tmp) { 2072 new_wpa->next = old_wpa->next; 2073 old_wpa->next = new_wpa; 2074 } 2075 2076 spin_unlock(&fi->lock); 2077 2078 if (tmp) { 2079 struct backing_dev_info *bdi = inode_to_bdi(new_wpa->inode); 2080 2081 dec_wb_stat(&bdi->wb, WB_WRITEBACK); 2082 dec_node_page_state(new_ap->pages[0], NR_WRITEBACK_TEMP); 2083 wb_writeout_inc(&bdi->wb); 2084 fuse_writepage_free(new_wpa); 2085 } 2086 2087 return false; 2088} 2089 2090static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page, 2091 struct fuse_args_pages *ap, 2092 struct fuse_fill_wb_data *data) 2093{ 2094 WARN_ON(!ap->num_pages); 2095 2096 /* 2097 * Being under writeback is unlikely but possible. For example direct 2098 * read to an mmaped fuse file will set the page dirty twice; once when 2099 * the pages are faulted with get_user_pages(), and then after the read 2100 * completed. 2101 */ 2102 if (fuse_page_is_writeback(data->inode, page->index)) 2103 return true; 2104 2105 /* Reached max pages */ 2106 if (ap->num_pages == fc->max_pages) 2107 return true; 2108 2109 /* Reached max write bytes */ 2110 if ((ap->num_pages + 1) * PAGE_SIZE > fc->max_write) 2111 return true; 2112 2113 /* Discontinuity */ 2114 if (data->orig_pages[ap->num_pages - 1]->index + 1 != page->index) 2115 return true; 2116 2117 /* Need to grow the pages array? If so, did the expansion fail? */ 2118 if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data)) 2119 return true; 2120 2121 return false; 2122} 2123 2124static int fuse_writepages_fill(struct page *page, 2125 struct writeback_control *wbc, void *_data) 2126{ 2127 struct fuse_fill_wb_data *data = _data; 2128 struct fuse_writepage_args *wpa = data->wpa; 2129 struct fuse_args_pages *ap = &wpa->ia.ap; 2130 struct inode *inode = data->inode; 2131 struct fuse_inode *fi = get_fuse_inode(inode); 2132 struct fuse_conn *fc = get_fuse_conn(inode); 2133 struct page *tmp_page; 2134 int err; 2135 2136 if (!data->ff) { 2137 err = -EIO; 2138 data->ff = fuse_write_file_get(fc, fi); 2139 if (!data->ff) 2140 goto out_unlock; 2141 } 2142 2143 if (wpa && fuse_writepage_need_send(fc, page, ap, data)) { 2144 fuse_writepages_send(data); 2145 data->wpa = NULL; 2146 } 2147 2148 err = -ENOMEM; 2149 tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 2150 if (!tmp_page) 2151 goto out_unlock; 2152 2153 /* 2154 * The page must not be redirtied until the writeout is completed 2155 * (i.e. userspace has sent a reply to the write request). Otherwise 2156 * there could be more than one temporary page instance for each real 2157 * page. 2158 * 2159 * This is ensured by holding the page lock in page_mkwrite() while 2160 * checking fuse_page_is_writeback(). We already hold the page lock 2161 * since clear_page_dirty_for_io() and keep it held until we add the 2162 * request to the fi->writepages list and increment ap->num_pages. 2163 * After this fuse_page_is_writeback() will indicate that the page is 2164 * under writeback, so we can release the page lock. 2165 */ 2166 if (data->wpa == NULL) { 2167 err = -ENOMEM; 2168 wpa = fuse_writepage_args_alloc(); 2169 if (!wpa) { 2170 __free_page(tmp_page); 2171 goto out_unlock; 2172 } 2173 data->max_pages = 1; 2174 2175 ap = &wpa->ia.ap; 2176 fuse_write_args_fill(&wpa->ia, data->ff, page_offset(page), 0); 2177 wpa->ia.write.in.write_flags |= FUSE_WRITE_CACHE; 2178 wpa->next = NULL; 2179 ap->args.in_pages = true; 2180 ap->args.end = fuse_writepage_end; 2181 ap->num_pages = 0; 2182 wpa->inode = inode; 2183 } 2184 set_page_writeback(page); 2185 2186 copy_highpage(tmp_page, page); 2187 ap->pages[ap->num_pages] = tmp_page; 2188 ap->descs[ap->num_pages].offset = 0; 2189 ap->descs[ap->num_pages].length = PAGE_SIZE; 2190 data->orig_pages[ap->num_pages] = page; 2191 2192 inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); 2193 inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP); 2194 2195 err = 0; 2196 if (data->wpa) { 2197 /* 2198 * Protected by fi->lock against concurrent access by 2199 * fuse_page_is_writeback(). 2200 */ 2201 spin_lock(&fi->lock); 2202 ap->num_pages++; 2203 spin_unlock(&fi->lock); 2204 } else if (fuse_writepage_add(wpa, page)) { 2205 data->wpa = wpa; 2206 } else { 2207 end_page_writeback(page); 2208 } 2209out_unlock: 2210 unlock_page(page); 2211 2212 return err; 2213} 2214 2215static int fuse_writepages(struct address_space *mapping, 2216 struct writeback_control *wbc) 2217{ 2218 struct inode *inode = mapping->host; 2219 struct fuse_conn *fc = get_fuse_conn(inode); 2220 struct fuse_fill_wb_data data; 2221 int err; 2222 2223 err = -EIO; 2224 if (fuse_is_bad(inode)) 2225 goto out; 2226 2227 data.inode = inode; 2228 data.wpa = NULL; 2229 data.ff = NULL; 2230 2231 err = -ENOMEM; 2232 data.orig_pages = kcalloc(fc->max_pages, 2233 sizeof(struct page *), 2234 GFP_NOFS); 2235 if (!data.orig_pages) 2236 goto out; 2237 2238 err = write_cache_pages(mapping, wbc, fuse_writepages_fill, &data); 2239 if (data.wpa) { 2240 WARN_ON(!data.wpa->ia.ap.num_pages); 2241 fuse_writepages_send(&data); 2242 } 2243 if (data.ff) 2244 fuse_file_put(data.ff, false, false); 2245 2246 kfree(data.orig_pages); 2247out: 2248 return err; 2249} 2250 2251/* 2252 * It's worthy to make sure that space is reserved on disk for the write, 2253 * but how to implement it without killing performance need more thinking. 2254 */ 2255static int fuse_write_begin(struct file *file, struct address_space *mapping, 2256 loff_t pos, unsigned len, unsigned flags, 2257 struct page **pagep, void **fsdata) 2258{ 2259 pgoff_t index = pos >> PAGE_SHIFT; 2260 struct fuse_conn *fc = get_fuse_conn(file_inode(file)); 2261 struct page *page; 2262 loff_t fsize; 2263 int err = -ENOMEM; 2264 2265 WARN_ON(!fc->writeback_cache); 2266 2267 page = grab_cache_page_write_begin(mapping, index, flags); 2268 if (!page) 2269 goto error; 2270 2271 fuse_wait_on_page_writeback(mapping->host, page->index); 2272 2273 if (PageUptodate(page) || len == PAGE_SIZE) 2274 goto success; 2275 /* 2276 * Check if the start this page comes after the end of file, in which 2277 * case the readpage can be optimized away. 2278 */ 2279 fsize = i_size_read(mapping->host); 2280 if (fsize <= (pos & PAGE_MASK)) { 2281 size_t off = pos & ~PAGE_MASK; 2282 if (off) 2283 zero_user_segment(page, 0, off); 2284 goto success; 2285 } 2286 err = fuse_do_readpage(file, page); 2287 if (err) 2288 goto cleanup; 2289success: 2290 *pagep = page; 2291 return 0; 2292 2293cleanup: 2294 unlock_page(page); 2295 put_page(page); 2296error: 2297 return err; 2298} 2299 2300static int fuse_write_end(struct file *file, struct address_space *mapping, 2301 loff_t pos, unsigned len, unsigned copied, 2302 struct page *page, void *fsdata) 2303{ 2304 struct inode *inode = page->mapping->host; 2305 2306 /* Haven't copied anything? Skip zeroing, size extending, dirtying. */ 2307 if (!copied) 2308 goto unlock; 2309 2310 if (!PageUptodate(page)) { 2311 /* Zero any unwritten bytes at the end of the page */ 2312 size_t endoff = (pos + copied) & ~PAGE_MASK; 2313 if (endoff) 2314 zero_user_segment(page, endoff, PAGE_SIZE); 2315 SetPageUptodate(page); 2316 } 2317 2318 fuse_write_update_size(inode, pos + copied); 2319 set_page_dirty(page); 2320 2321unlock: 2322 unlock_page(page); 2323 put_page(page); 2324 2325 return copied; 2326} 2327 2328static int fuse_launder_page(struct page *page) 2329{ 2330 int err = 0; 2331 if (clear_page_dirty_for_io(page)) { 2332 struct inode *inode = page->mapping->host; 2333 err = fuse_writepage_locked(page); 2334 if (!err) 2335 fuse_wait_on_page_writeback(inode, page->index); 2336 } 2337 return err; 2338} 2339 2340/* 2341 * Write back dirty pages now, because there may not be any suitable 2342 * open files later 2343 */ 2344static void fuse_vma_close(struct vm_area_struct *vma) 2345{ 2346 filemap_write_and_wait(vma->vm_file->f_mapping); 2347} 2348 2349/* 2350 * Wait for writeback against this page to complete before allowing it 2351 * to be marked dirty again, and hence written back again, possibly 2352 * before the previous writepage completed. 2353 * 2354 * Block here, instead of in ->writepage(), so that the userspace fs 2355 * can only block processes actually operating on the filesystem. 2356 * 2357 * Otherwise unprivileged userspace fs would be able to block 2358 * unrelated: 2359 * 2360 * - page migration 2361 * - sync(2) 2362 * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER 2363 */ 2364static vm_fault_t fuse_page_mkwrite(struct vm_fault *vmf) 2365{ 2366 struct page *page = vmf->page; 2367 struct inode *inode = file_inode(vmf->vma->vm_file); 2368 2369 file_update_time(vmf->vma->vm_file); 2370 lock_page(page); 2371 if (page->mapping != inode->i_mapping) { 2372 unlock_page(page); 2373 return VM_FAULT_NOPAGE; 2374 } 2375 2376 fuse_wait_on_page_writeback(inode, page->index); 2377 return VM_FAULT_LOCKED; 2378} 2379 2380static const struct vm_operations_struct fuse_file_vm_ops = { 2381 .close = fuse_vma_close, 2382 .fault = filemap_fault, 2383 .map_pages = filemap_map_pages, 2384 .page_mkwrite = fuse_page_mkwrite, 2385}; 2386 2387static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) 2388{ 2389 struct fuse_file *ff = file->private_data; 2390 2391 /* DAX mmap is superior to direct_io mmap */ 2392 if (FUSE_IS_DAX(file_inode(file))) 2393 return fuse_dax_mmap(file, vma); 2394 2395 if (ff->open_flags & FOPEN_DIRECT_IO) { 2396 /* Can't provide the coherency needed for MAP_SHARED */ 2397 if (vma->vm_flags & VM_MAYSHARE) 2398 return -ENODEV; 2399 2400 invalidate_inode_pages2(file->f_mapping); 2401 2402 return generic_file_mmap(file, vma); 2403 } 2404 2405 if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) 2406 fuse_link_write_file(file); 2407 2408 file_accessed(file); 2409 vma->vm_ops = &fuse_file_vm_ops; 2410 return 0; 2411} 2412 2413static int convert_fuse_file_lock(struct fuse_conn *fc, 2414 const struct fuse_file_lock *ffl, 2415 struct file_lock *fl) 2416{ 2417 switch (ffl->type) { 2418 case F_UNLCK: 2419 break; 2420 2421 case F_RDLCK: 2422 case F_WRLCK: 2423 if (ffl->start > OFFSET_MAX || ffl->end > OFFSET_MAX || 2424 ffl->end < ffl->start) 2425 return -EIO; 2426 2427 fl->fl_start = ffl->start; 2428 fl->fl_end = ffl->end; 2429 2430 /* 2431 * Convert pid into init's pid namespace. The locks API will 2432 * translate it into the caller's pid namespace. 2433 */ 2434 rcu_read_lock(); 2435 fl->fl_pid = pid_nr_ns(find_pid_ns(ffl->pid, fc->pid_ns), &init_pid_ns); 2436 rcu_read_unlock(); 2437 break; 2438 2439 default: 2440 return -EIO; 2441 } 2442 fl->fl_type = ffl->type; 2443 return 0; 2444} 2445 2446static void fuse_lk_fill(struct fuse_args *args, struct file *file, 2447 const struct file_lock *fl, int opcode, pid_t pid, 2448 int flock, struct fuse_lk_in *inarg) 2449{ 2450 struct inode *inode = file_inode(file); 2451 struct fuse_conn *fc = get_fuse_conn(inode); 2452 struct fuse_file *ff = file->private_data; 2453 2454 memset(inarg, 0, sizeof(*inarg)); 2455 inarg->fh = ff->fh; 2456 inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner); 2457 inarg->lk.start = fl->fl_start; 2458 inarg->lk.end = fl->fl_end; 2459 inarg->lk.type = fl->fl_type; 2460 inarg->lk.pid = pid; 2461 if (flock) 2462 inarg->lk_flags |= FUSE_LK_FLOCK; 2463 args->opcode = opcode; 2464 args->nodeid = get_node_id(inode); 2465 args->in_numargs = 1; 2466 args->in_args[0].size = sizeof(*inarg); 2467 args->in_args[0].value = inarg; 2468} 2469 2470static int fuse_getlk(struct file *file, struct file_lock *fl) 2471{ 2472 struct inode *inode = file_inode(file); 2473 struct fuse_mount *fm = get_fuse_mount(inode); 2474 FUSE_ARGS(args); 2475 struct fuse_lk_in inarg; 2476 struct fuse_lk_out outarg; 2477 int err; 2478 2479 fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg); 2480 args.out_numargs = 1; 2481 args.out_args[0].size = sizeof(outarg); 2482 args.out_args[0].value = &outarg; 2483 err = fuse_simple_request(fm, &args); 2484 if (!err) 2485 err = convert_fuse_file_lock(fm->fc, &outarg.lk, fl); 2486 2487 return err; 2488} 2489 2490static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) 2491{ 2492 struct inode *inode = file_inode(file); 2493 struct fuse_mount *fm = get_fuse_mount(inode); 2494 FUSE_ARGS(args); 2495 struct fuse_lk_in inarg; 2496 int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK; 2497 struct pid *pid = fl->fl_type != F_UNLCK ? task_tgid(current) : NULL; 2498 pid_t pid_nr = pid_nr_ns(pid, fm->fc->pid_ns); 2499 int err; 2500 2501 if (fl->fl_lmops && fl->fl_lmops->lm_grant) { 2502 /* NLM needs asynchronous locks, which we don't support yet */ 2503 return -ENOLCK; 2504 } 2505 2506 /* Unlock on close is handled by the flush method */ 2507 if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX) 2508 return 0; 2509 2510 fuse_lk_fill(&args, file, fl, opcode, pid_nr, flock, &inarg); 2511 err = fuse_simple_request(fm, &args); 2512 2513 /* locking is restartable */ 2514 if (err == -EINTR) 2515 err = -ERESTARTSYS; 2516 2517 return err; 2518} 2519 2520static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl) 2521{ 2522 struct inode *inode = file_inode(file); 2523 struct fuse_conn *fc = get_fuse_conn(inode); 2524 int err; 2525 2526 if (cmd == F_CANCELLK) { 2527 err = 0; 2528 } else if (cmd == F_GETLK) { 2529 if (fc->no_lock) { 2530 posix_test_lock(file, fl); 2531 err = 0; 2532 } else 2533 err = fuse_getlk(file, fl); 2534 } else { 2535 if (fc->no_lock) 2536 err = posix_lock_file(file, fl, NULL); 2537 else 2538 err = fuse_setlk(file, fl, 0); 2539 } 2540 return err; 2541} 2542 2543static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) 2544{ 2545 struct inode *inode = file_inode(file); 2546 struct fuse_conn *fc = get_fuse_conn(inode); 2547 int err; 2548 2549 if (fc->no_flock) { 2550 err = locks_lock_file_wait(file, fl); 2551 } else { 2552 struct fuse_file *ff = file->private_data; 2553 2554 /* emulate flock with POSIX locks */ 2555 ff->flock = true; 2556 err = fuse_setlk(file, fl, 1); 2557 } 2558 2559 return err; 2560} 2561 2562static sector_t fuse_bmap(struct address_space *mapping, sector_t block) 2563{ 2564 struct inode *inode = mapping->host; 2565 struct fuse_mount *fm = get_fuse_mount(inode); 2566 FUSE_ARGS(args); 2567 struct fuse_bmap_in inarg; 2568 struct fuse_bmap_out outarg; 2569 int err; 2570 2571 if (!inode->i_sb->s_bdev || fm->fc->no_bmap) 2572 return 0; 2573 2574 memset(&inarg, 0, sizeof(inarg)); 2575 inarg.block = block; 2576 inarg.blocksize = inode->i_sb->s_blocksize; 2577 args.opcode = FUSE_BMAP; 2578 args.nodeid = get_node_id(inode); 2579 args.in_numargs = 1; 2580 args.in_args[0].size = sizeof(inarg); 2581 args.in_args[0].value = &inarg; 2582 args.out_numargs = 1; 2583 args.out_args[0].size = sizeof(outarg); 2584 args.out_args[0].value = &outarg; 2585 err = fuse_simple_request(fm, &args); 2586 if (err == -ENOSYS) 2587 fm->fc->no_bmap = 1; 2588 2589 return err ? 0 : outarg.block; 2590} 2591 2592static loff_t fuse_lseek(struct file *file, loff_t offset, int whence) 2593{ 2594 struct inode *inode = file->f_mapping->host; 2595 struct fuse_mount *fm = get_fuse_mount(inode); 2596 struct fuse_file *ff = file->private_data; 2597 FUSE_ARGS(args); 2598 struct fuse_lseek_in inarg = { 2599 .fh = ff->fh, 2600 .offset = offset, 2601 .whence = whence 2602 }; 2603 struct fuse_lseek_out outarg; 2604 int err; 2605 2606 if (fm->fc->no_lseek) 2607 goto fallback; 2608 2609 args.opcode = FUSE_LSEEK; 2610 args.nodeid = ff->nodeid; 2611 args.in_numargs = 1; 2612 args.in_args[0].size = sizeof(inarg); 2613 args.in_args[0].value = &inarg; 2614 args.out_numargs = 1; 2615 args.out_args[0].size = sizeof(outarg); 2616 args.out_args[0].value = &outarg; 2617 err = fuse_simple_request(fm, &args); 2618 if (err) { 2619 if (err == -ENOSYS) { 2620 fm->fc->no_lseek = 1; 2621 goto fallback; 2622 } 2623 return err; 2624 } 2625 2626 return vfs_setpos(file, outarg.offset, inode->i_sb->s_maxbytes); 2627 2628fallback: 2629 err = fuse_update_attributes(inode, file); 2630 if (!err) 2631 return generic_file_llseek(file, offset, whence); 2632 else 2633 return err; 2634} 2635 2636static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) 2637{ 2638 loff_t retval; 2639 struct inode *inode = file_inode(file); 2640 2641 switch (whence) { 2642 case SEEK_SET: 2643 case SEEK_CUR: 2644 /* No i_mutex protection necessary for SEEK_CUR and SEEK_SET */ 2645 retval = generic_file_llseek(file, offset, whence); 2646 break; 2647 case SEEK_END: 2648 inode_lock(inode); 2649 retval = fuse_update_attributes(inode, file); 2650 if (!retval) 2651 retval = generic_file_llseek(file, offset, whence); 2652 inode_unlock(inode); 2653 break; 2654 case SEEK_HOLE: 2655 case SEEK_DATA: 2656 inode_lock(inode); 2657 retval = fuse_lseek(file, offset, whence); 2658 inode_unlock(inode); 2659 break; 2660 default: 2661 retval = -EINVAL; 2662 } 2663 2664 return retval; 2665} 2666 2667/* 2668 * CUSE servers compiled on 32bit broke on 64bit kernels because the 2669 * ABI was defined to be 'struct iovec' which is different on 32bit 2670 * and 64bit. Fortunately we can determine which structure the server 2671 * used from the size of the reply. 2672 */ 2673static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, 2674 size_t transferred, unsigned count, 2675 bool is_compat) 2676{ 2677#ifdef CONFIG_COMPAT 2678 if (count * sizeof(struct compat_iovec) == transferred) { 2679 struct compat_iovec *ciov = src; 2680 unsigned i; 2681 2682 /* 2683 * With this interface a 32bit server cannot support 2684 * non-compat (i.e. ones coming from 64bit apps) ioctl 2685 * requests 2686 */ 2687 if (!is_compat) 2688 return -EINVAL; 2689 2690 for (i = 0; i < count; i++) { 2691 dst[i].iov_base = compat_ptr(ciov[i].iov_base); 2692 dst[i].iov_len = ciov[i].iov_len; 2693 } 2694 return 0; 2695 } 2696#endif 2697 2698 if (count * sizeof(struct iovec) != transferred) 2699 return -EIO; 2700 2701 memcpy(dst, src, transferred); 2702 return 0; 2703} 2704 2705/* Make sure iov_length() won't overflow */ 2706static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov, 2707 size_t count) 2708{ 2709 size_t n; 2710 u32 max = fc->max_pages << PAGE_SHIFT; 2711 2712 for (n = 0; n < count; n++, iov++) { 2713 if (iov->iov_len > (size_t) max) 2714 return -ENOMEM; 2715 max -= iov->iov_len; 2716 } 2717 return 0; 2718} 2719 2720static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, 2721 void *src, size_t transferred, unsigned count, 2722 bool is_compat) 2723{ 2724 unsigned i; 2725 struct fuse_ioctl_iovec *fiov = src; 2726 2727 if (fc->minor < 16) { 2728 return fuse_copy_ioctl_iovec_old(dst, src, transferred, 2729 count, is_compat); 2730 } 2731 2732 if (count * sizeof(struct fuse_ioctl_iovec) != transferred) 2733 return -EIO; 2734 2735 for (i = 0; i < count; i++) { 2736 /* Did the server supply an inappropriate value? */ 2737 if (fiov[i].base != (unsigned long) fiov[i].base || 2738 fiov[i].len != (unsigned long) fiov[i].len) 2739 return -EIO; 2740 2741 dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; 2742 dst[i].iov_len = (size_t) fiov[i].len; 2743 2744#ifdef CONFIG_COMPAT 2745 if (is_compat && 2746 (ptr_to_compat(dst[i].iov_base) != fiov[i].base || 2747 (compat_size_t) dst[i].iov_len != fiov[i].len)) 2748 return -EIO; 2749#endif 2750 } 2751 2752 return 0; 2753} 2754 2755 2756/* 2757 * For ioctls, there is no generic way to determine how much memory 2758 * needs to be read and/or written. Furthermore, ioctls are allowed 2759 * to dereference the passed pointer, so the parameter requires deep 2760 * copying but FUSE has no idea whatsoever about what to copy in or 2761 * out. 2762 * 2763 * This is solved by allowing FUSE server to retry ioctl with 2764 * necessary in/out iovecs. Let's assume the ioctl implementation 2765 * needs to read in the following structure. 2766 * 2767 * struct a { 2768 * char *buf; 2769 * size_t buflen; 2770 * } 2771 * 2772 * On the first callout to FUSE server, inarg->in_size and 2773 * inarg->out_size will be NULL; then, the server completes the ioctl 2774 * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and 2775 * the actual iov array to 2776 * 2777 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } 2778 * 2779 * which tells FUSE to copy in the requested area and retry the ioctl. 2780 * On the second round, the server has access to the structure and 2781 * from that it can tell what to look for next, so on the invocation, 2782 * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to 2783 * 2784 * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, 2785 * { .iov_base = a.buf, .iov_len = a.buflen } } 2786 * 2787 * FUSE will copy both struct a and the pointed buffer from the 2788 * process doing the ioctl and retry ioctl with both struct a and the 2789 * buffer. 2790 * 2791 * This time, FUSE server has everything it needs and completes ioctl 2792 * without FUSE_IOCTL_RETRY which finishes the ioctl call. 2793 * 2794 * Copying data out works the same way. 2795 * 2796 * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel 2797 * automatically initializes in and out iovs by decoding @cmd with 2798 * _IOC_* macros and the server is not allowed to request RETRY. This 2799 * limits ioctl data transfers to well-formed ioctls and is the forced 2800 * behavior for all FUSE servers. 2801 */ 2802long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, 2803 unsigned int flags) 2804{ 2805 struct fuse_file *ff = file->private_data; 2806 struct fuse_mount *fm = ff->fm; 2807 struct fuse_ioctl_in inarg = { 2808 .fh = ff->fh, 2809 .cmd = cmd, 2810 .arg = arg, 2811 .flags = flags 2812 }; 2813 struct fuse_ioctl_out outarg; 2814 struct iovec *iov_page = NULL; 2815 struct iovec *in_iov = NULL, *out_iov = NULL; 2816 unsigned int in_iovs = 0, out_iovs = 0, max_pages; 2817 size_t in_size, out_size, c; 2818 ssize_t transferred; 2819 int err, i; 2820 struct iov_iter ii; 2821 struct fuse_args_pages ap = {}; 2822 2823#if BITS_PER_LONG == 32 2824 inarg.flags |= FUSE_IOCTL_32BIT; 2825#else 2826 if (flags & FUSE_IOCTL_COMPAT) { 2827 inarg.flags |= FUSE_IOCTL_32BIT; 2828#ifdef CONFIG_X86_X32 2829 if (in_x32_syscall()) 2830 inarg.flags |= FUSE_IOCTL_COMPAT_X32; 2831#endif 2832 } 2833#endif 2834 2835 /* assume all the iovs returned by client always fits in a page */ 2836 BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); 2837 2838 err = -ENOMEM; 2839 ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); 2840 iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); 2841 if (!ap.pages || !iov_page) 2842 goto out; 2843 2844 fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages); 2845 2846 /* 2847 * If restricted, initialize IO parameters as encoded in @cmd. 2848 * RETRY from server is not allowed. 2849 */ 2850 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { 2851 struct iovec *iov = iov_page; 2852 2853 iov->iov_base = (void __user *)arg; 2854 2855 switch (cmd) { 2856 case FS_IOC_GETFLAGS: 2857 case FS_IOC_SETFLAGS: 2858 iov->iov_len = sizeof(int); 2859 break; 2860 default: 2861 iov->iov_len = _IOC_SIZE(cmd); 2862 break; 2863 } 2864 2865 if (_IOC_DIR(cmd) & _IOC_WRITE) { 2866 in_iov = iov; 2867 in_iovs = 1; 2868 } 2869 2870 if (_IOC_DIR(cmd) & _IOC_READ) { 2871 out_iov = iov; 2872 out_iovs = 1; 2873 } 2874 } 2875 2876 retry: 2877 inarg.in_size = in_size = iov_length(in_iov, in_iovs); 2878 inarg.out_size = out_size = iov_length(out_iov, out_iovs); 2879 2880 /* 2881 * Out data can be used either for actual out data or iovs, 2882 * make sure there always is at least one page. 2883 */ 2884 out_size = max_t(size_t, out_size, PAGE_SIZE); 2885 max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); 2886 2887 /* make sure there are enough buffer pages and init request with them */ 2888 err = -ENOMEM; 2889 if (max_pages > fm->fc->max_pages) 2890 goto out; 2891 while (ap.num_pages < max_pages) { 2892 ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); 2893 if (!ap.pages[ap.num_pages]) 2894 goto out; 2895 ap.num_pages++; 2896 } 2897 2898 2899 /* okay, let's send it to the client */ 2900 ap.args.opcode = FUSE_IOCTL; 2901 ap.args.nodeid = ff->nodeid; 2902 ap.args.in_numargs = 1; 2903 ap.args.in_args[0].size = sizeof(inarg); 2904 ap.args.in_args[0].value = &inarg; 2905 if (in_size) { 2906 ap.args.in_numargs++; 2907 ap.args.in_args[1].size = in_size; 2908 ap.args.in_pages = true; 2909 2910 err = -EFAULT; 2911 iov_iter_init(&ii, WRITE, in_iov, in_iovs, in_size); 2912 for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { 2913 c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); 2914 if (c != PAGE_SIZE && iov_iter_count(&ii)) 2915 goto out; 2916 } 2917 } 2918 2919 ap.args.out_numargs = 2; 2920 ap.args.out_args[0].size = sizeof(outarg); 2921 ap.args.out_args[0].value = &outarg; 2922 ap.args.out_args[1].size = out_size; 2923 ap.args.out_pages = true; 2924 ap.args.out_argvar = true; 2925 2926 transferred = fuse_simple_request(fm, &ap.args); 2927 err = transferred; 2928 if (transferred < 0) 2929 goto out; 2930 2931 /* did it ask for retry? */ 2932 if (outarg.flags & FUSE_IOCTL_RETRY) { 2933 void *vaddr; 2934 2935 /* no retry if in restricted mode */ 2936 err = -EIO; 2937 if (!(flags & FUSE_IOCTL_UNRESTRICTED)) 2938 goto out; 2939 2940 in_iovs = outarg.in_iovs; 2941 out_iovs = outarg.out_iovs; 2942 2943 /* 2944 * Make sure things are in boundary, separate checks 2945 * are to protect against overflow. 2946 */ 2947 err = -ENOMEM; 2948 if (in_iovs > FUSE_IOCTL_MAX_IOV || 2949 out_iovs > FUSE_IOCTL_MAX_IOV || 2950 in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) 2951 goto out; 2952 2953 vaddr = kmap_atomic(ap.pages[0]); 2954 err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, 2955 transferred, in_iovs + out_iovs, 2956 (flags & FUSE_IOCTL_COMPAT) != 0); 2957 kunmap_atomic(vaddr); 2958 if (err) 2959 goto out; 2960 2961 in_iov = iov_page; 2962 out_iov = in_iov + in_iovs; 2963 2964 err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs); 2965 if (err) 2966 goto out; 2967 2968 err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs); 2969 if (err) 2970 goto out; 2971 2972 goto retry; 2973 } 2974 2975 err = -EIO; 2976 if (transferred > inarg.out_size) 2977 goto out; 2978 2979 err = -EFAULT; 2980 iov_iter_init(&ii, READ, out_iov, out_iovs, transferred); 2981 for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { 2982 c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); 2983 if (c != PAGE_SIZE && iov_iter_count(&ii)) 2984 goto out; 2985 } 2986 err = 0; 2987 out: 2988 free_page((unsigned long) iov_page); 2989 while (ap.num_pages) 2990 __free_page(ap.pages[--ap.num_pages]); 2991 kfree(ap.pages); 2992 2993 return err ? err : outarg.result; 2994} 2995EXPORT_SYMBOL_GPL(fuse_do_ioctl); 2996 2997long fuse_ioctl_common(struct file *file, unsigned int cmd, 2998 unsigned long arg, unsigned int flags) 2999{ 3000 struct inode *inode = file_inode(file); 3001 struct fuse_conn *fc = get_fuse_conn(inode); 3002 3003 if (!fuse_allow_current_process(fc)) 3004 return -EACCES; 3005 3006 if (fuse_is_bad(inode)) 3007 return -EIO; 3008 3009 return fuse_do_ioctl(file, cmd, arg, flags); 3010} 3011 3012static long fuse_file_ioctl(struct file *file, unsigned int cmd, 3013 unsigned long arg) 3014{ 3015 return fuse_ioctl_common(file, cmd, arg, 0); 3016} 3017 3018static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, 3019 unsigned long arg) 3020{ 3021 return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); 3022} 3023 3024/* 3025 * All files which have been polled are linked to RB tree 3026 * fuse_conn->polled_files which is indexed by kh. Walk the tree and 3027 * find the matching one. 3028 */ 3029static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, 3030 struct rb_node **parent_out) 3031{ 3032 struct rb_node **link = &fc->polled_files.rb_node; 3033 struct rb_node *last = NULL; 3034 3035 while (*link) { 3036 struct fuse_file *ff; 3037 3038 last = *link; 3039 ff = rb_entry(last, struct fuse_file, polled_node); 3040 3041 if (kh < ff->kh) 3042 link = &last->rb_left; 3043 else if (kh > ff->kh) 3044 link = &last->rb_right; 3045 else 3046 return link; 3047 } 3048 3049 if (parent_out) 3050 *parent_out = last; 3051 return link; 3052} 3053 3054/* 3055 * The file is about to be polled. Make sure it's on the polled_files 3056 * RB tree. Note that files once added to the polled_files tree are 3057 * not removed before the file is released. This is because a file 3058 * polled once is likely to be polled again. 3059 */ 3060static void fuse_register_polled_file(struct fuse_conn *fc, 3061 struct fuse_file *ff) 3062{ 3063 spin_lock(&fc->lock); 3064 if (RB_EMPTY_NODE(&ff->polled_node)) { 3065 struct rb_node **link, *parent; 3066 3067 link = fuse_find_polled_node(fc, ff->kh, &parent); 3068 BUG_ON(*link); 3069 rb_link_node(&ff->polled_node, parent, link); 3070 rb_insert_color(&ff->polled_node, &fc->polled_files); 3071 } 3072 spin_unlock(&fc->lock); 3073} 3074 3075__poll_t fuse_file_poll(struct file *file, poll_table *wait) 3076{ 3077 struct fuse_file *ff = file->private_data; 3078 struct fuse_mount *fm = ff->fm; 3079 struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; 3080 struct fuse_poll_out outarg; 3081 FUSE_ARGS(args); 3082 int err; 3083 3084 if (fm->fc->no_poll) 3085 return DEFAULT_POLLMASK; 3086 3087 poll_wait(file, &ff->poll_wait, wait); 3088 inarg.events = mangle_poll(poll_requested_events(wait)); 3089 3090 /* 3091 * Ask for notification iff there's someone waiting for it. 3092 * The client may ignore the flag and always notify. 3093 */ 3094 if (waitqueue_active(&ff->poll_wait)) { 3095 inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; 3096 fuse_register_polled_file(fm->fc, ff); 3097 } 3098 3099 args.opcode = FUSE_POLL; 3100 args.nodeid = ff->nodeid; 3101 args.in_numargs = 1; 3102 args.in_args[0].size = sizeof(inarg); 3103 args.in_args[0].value = &inarg; 3104 args.out_numargs = 1; 3105 args.out_args[0].size = sizeof(outarg); 3106 args.out_args[0].value = &outarg; 3107 err = fuse_simple_request(fm, &args); 3108 3109 if (!err) 3110 return demangle_poll(outarg.revents); 3111 if (err == -ENOSYS) { 3112 fm->fc->no_poll = 1; 3113 return DEFAULT_POLLMASK; 3114 } 3115 return EPOLLERR; 3116} 3117EXPORT_SYMBOL_GPL(fuse_file_poll); 3118 3119/* 3120 * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and 3121 * wakes up the poll waiters. 3122 */ 3123int fuse_notify_poll_wakeup(struct fuse_conn *fc, 3124 struct fuse_notify_poll_wakeup_out *outarg) 3125{ 3126 u64 kh = outarg->kh; 3127 struct rb_node **link; 3128 3129 spin_lock(&fc->lock); 3130 3131 link = fuse_find_polled_node(fc, kh, NULL); 3132 if (*link) { 3133 struct fuse_file *ff; 3134 3135 ff = rb_entry(*link, struct fuse_file, polled_node); 3136 wake_up_interruptible_sync(&ff->poll_wait); 3137 } 3138 3139 spin_unlock(&fc->lock); 3140 return 0; 3141} 3142 3143static void fuse_do_truncate(struct file *file) 3144{ 3145 struct inode *inode = file->f_mapping->host; 3146 struct iattr attr; 3147 3148 attr.ia_valid = ATTR_SIZE; 3149 attr.ia_size = i_size_read(inode); 3150 3151 attr.ia_file = file; 3152 attr.ia_valid |= ATTR_FILE; 3153 3154 fuse_do_setattr(file_dentry(file), &attr, file); 3155} 3156 3157static inline loff_t fuse_round_up(struct fuse_conn *fc, loff_t off) 3158{ 3159 return round_up(off, fc->max_pages << PAGE_SHIFT); 3160} 3161 3162static ssize_t 3163fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 3164{ 3165 DECLARE_COMPLETION_ONSTACK(wait); 3166 ssize_t ret = 0; 3167 struct file *file = iocb->ki_filp; 3168 struct fuse_file *ff = file->private_data; 3169 loff_t pos = 0; 3170 struct inode *inode; 3171 loff_t i_size; 3172 size_t count = iov_iter_count(iter), shortened = 0; 3173 loff_t offset = iocb->ki_pos; 3174 struct fuse_io_priv *io; 3175 3176 pos = offset; 3177 inode = file->f_mapping->host; 3178 i_size = i_size_read(inode); 3179 3180 if ((iov_iter_rw(iter) == READ) && (offset >= i_size)) 3181 return 0; 3182 3183 io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); 3184 if (!io) 3185 return -ENOMEM; 3186 spin_lock_init(&io->lock); 3187 kref_init(&io->refcnt); 3188 io->reqs = 1; 3189 io->bytes = -1; 3190 io->size = 0; 3191 io->offset = offset; 3192 io->write = (iov_iter_rw(iter) == WRITE); 3193 io->err = 0; 3194 /* 3195 * By default, we want to optimize all I/Os with async request 3196 * submission to the client filesystem if supported. 3197 */ 3198 io->async = ff->fm->fc->async_dio; 3199 io->iocb = iocb; 3200 io->blocking = is_sync_kiocb(iocb); 3201 3202 /* optimization for short read */ 3203 if (io->async && !io->write && offset + count > i_size) { 3204 iov_iter_truncate(iter, fuse_round_up(ff->fm->fc, i_size - offset)); 3205 shortened = count - iov_iter_count(iter); 3206 count -= shortened; 3207 } 3208 3209 /* 3210 * We cannot asynchronously extend the size of a file. 3211 * In such case the aio will behave exactly like sync io. 3212 */ 3213 if ((offset + count > i_size) && io->write) 3214 io->blocking = true; 3215 3216 if (io->async && io->blocking) { 3217 /* 3218 * Additional reference to keep io around after 3219 * calling fuse_aio_complete() 3220 */ 3221 kref_get(&io->refcnt); 3222 io->done = &wait; 3223 } 3224 3225 if (iov_iter_rw(iter) == WRITE) { 3226 ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE); 3227 fuse_invalidate_attr(inode); 3228 } else { 3229 ret = __fuse_direct_read(io, iter, &pos); 3230 } 3231 iov_iter_reexpand(iter, iov_iter_count(iter) + shortened); 3232 3233 if (io->async) { 3234 bool blocking = io->blocking; 3235 3236 fuse_aio_complete(io, ret < 0 ? ret : 0, -1); 3237 3238 /* we have a non-extending, async request, so return */ 3239 if (!blocking) 3240 return -EIOCBQUEUED; 3241 3242 wait_for_completion(&wait); 3243 ret = fuse_get_res_by_io(io); 3244 } 3245 3246 kref_put(&io->refcnt, fuse_io_release); 3247 3248 if (iov_iter_rw(iter) == WRITE) { 3249 if (ret > 0) 3250 fuse_write_update_size(inode, pos); 3251 else if (ret < 0 && offset + count > i_size) 3252 fuse_do_truncate(file); 3253 } 3254 3255 return ret; 3256} 3257 3258static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end) 3259{ 3260 int err = filemap_write_and_wait_range(inode->i_mapping, start, LLONG_MAX); 3261 3262 if (!err) 3263 fuse_sync_writes(inode); 3264 3265 return err; 3266} 3267 3268static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, 3269 loff_t length) 3270{ 3271 struct fuse_file *ff = file->private_data; 3272 struct inode *inode = file_inode(file); 3273 struct fuse_inode *fi = get_fuse_inode(inode); 3274 struct fuse_mount *fm = ff->fm; 3275 FUSE_ARGS(args); 3276 struct fuse_fallocate_in inarg = { 3277 .fh = ff->fh, 3278 .offset = offset, 3279 .length = length, 3280 .mode = mode 3281 }; 3282 int err; 3283 bool block_faults = FUSE_IS_DAX(inode) && 3284 (!(mode & FALLOC_FL_KEEP_SIZE) || 3285 (mode & FALLOC_FL_PUNCH_HOLE)); 3286 3287 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) 3288 return -EOPNOTSUPP; 3289 3290 if (fm->fc->no_fallocate) 3291 return -EOPNOTSUPP; 3292 3293 inode_lock(inode); 3294 if (block_faults) { 3295 down_write(&fi->i_mmap_sem); 3296 err = fuse_dax_break_layouts(inode, 0, 0); 3297 if (err) 3298 goto out; 3299 } 3300 3301 if (mode & FALLOC_FL_PUNCH_HOLE) { 3302 loff_t endbyte = offset + length - 1; 3303 3304 err = fuse_writeback_range(inode, offset, endbyte); 3305 if (err) 3306 goto out; 3307 } 3308 3309 if (!(mode & FALLOC_FL_KEEP_SIZE) && 3310 offset + length > i_size_read(inode)) { 3311 err = inode_newsize_ok(inode, offset + length); 3312 if (err) 3313 goto out; 3314 } 3315 3316 err = file_modified(file); 3317 if (err) 3318 goto out; 3319 3320 if (!(mode & FALLOC_FL_KEEP_SIZE)) 3321 set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 3322 3323 args.opcode = FUSE_FALLOCATE; 3324 args.nodeid = ff->nodeid; 3325 args.in_numargs = 1; 3326 args.in_args[0].size = sizeof(inarg); 3327 args.in_args[0].value = &inarg; 3328 err = fuse_simple_request(fm, &args); 3329 if (err == -ENOSYS) { 3330 fm->fc->no_fallocate = 1; 3331 err = -EOPNOTSUPP; 3332 } 3333 if (err) 3334 goto out; 3335 3336 /* we could have extended the file */ 3337 if (!(mode & FALLOC_FL_KEEP_SIZE)) { 3338 bool changed = fuse_write_update_size(inode, offset + length); 3339 3340 if (changed && fm->fc->writeback_cache) 3341 file_update_time(file); 3342 } 3343 3344 if (mode & FALLOC_FL_PUNCH_HOLE) 3345 truncate_pagecache_range(inode, offset, offset + length - 1); 3346 3347 fuse_invalidate_attr(inode); 3348 3349out: 3350 if (!(mode & FALLOC_FL_KEEP_SIZE)) 3351 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); 3352 3353 if (block_faults) 3354 up_write(&fi->i_mmap_sem); 3355 3356 inode_unlock(inode); 3357 3358 fuse_flush_time_update(inode); 3359 3360 return err; 3361} 3362 3363static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, 3364 struct file *file_out, loff_t pos_out, 3365 size_t len, unsigned int flags) 3366{ 3367 struct fuse_file *ff_in = file_in->private_data; 3368 struct fuse_file *ff_out = file_out->private_data; 3369 struct inode *inode_in = file_inode(file_in); 3370 struct inode *inode_out = file_inode(file_out); 3371 struct fuse_inode *fi_out = get_fuse_inode(inode_out); 3372 struct fuse_mount *fm = ff_in->fm; 3373 struct fuse_conn *fc = fm->fc; 3374 FUSE_ARGS(args); 3375 struct fuse_copy_file_range_in inarg = { 3376 .fh_in = ff_in->fh, 3377 .off_in = pos_in, 3378 .nodeid_out = ff_out->nodeid, 3379 .fh_out = ff_out->fh, 3380 .off_out = pos_out, 3381 .len = len, 3382 .flags = flags 3383 }; 3384 struct fuse_write_out outarg; 3385 ssize_t err; 3386 /* mark unstable when write-back is not used, and file_out gets 3387 * extended */ 3388 bool is_unstable = (!fc->writeback_cache) && 3389 ((pos_out + len) > inode_out->i_size); 3390 3391 if (fc->no_copy_file_range) 3392 return -EOPNOTSUPP; 3393 3394 if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) 3395 return -EXDEV; 3396 3397 inode_lock(inode_in); 3398 err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1); 3399 inode_unlock(inode_in); 3400 if (err) 3401 return err; 3402 3403 inode_lock(inode_out); 3404 3405 err = file_modified(file_out); 3406 if (err) 3407 goto out; 3408 3409 /* 3410 * Write out dirty pages in the destination file before sending the COPY 3411 * request to userspace. After the request is completed, truncate off 3412 * pages (including partial ones) from the cache that have been copied, 3413 * since these contain stale data at that point. 3414 * 3415 * This should be mostly correct, but if the COPY writes to partial 3416 * pages (at the start or end) and the parts not covered by the COPY are 3417 * written through a memory map after calling fuse_writeback_range(), 3418 * then these partial page modifications will be lost on truncation. 3419 * 3420 * It is unlikely that someone would rely on such mixed style 3421 * modifications. Yet this does give less guarantees than if the 3422 * copying was performed with write(2). 3423 * 3424 * To fix this a i_mmap_sem style lock could be used to prevent new 3425 * faults while the copy is ongoing. 3426 */ 3427 err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1); 3428 if (err) 3429 goto out; 3430 3431 if (is_unstable) 3432 set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); 3433 3434 args.opcode = FUSE_COPY_FILE_RANGE; 3435 args.nodeid = ff_in->nodeid; 3436 args.in_numargs = 1; 3437 args.in_args[0].size = sizeof(inarg); 3438 args.in_args[0].value = &inarg; 3439 args.out_numargs = 1; 3440 args.out_args[0].size = sizeof(outarg); 3441 args.out_args[0].value = &outarg; 3442 err = fuse_simple_request(fm, &args); 3443 if (err == -ENOSYS) { 3444 fc->no_copy_file_range = 1; 3445 err = -EOPNOTSUPP; 3446 } 3447 if (err) 3448 goto out; 3449 3450 truncate_inode_pages_range(inode_out->i_mapping, 3451 ALIGN_DOWN(pos_out, PAGE_SIZE), 3452 ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1); 3453 3454 if (fc->writeback_cache) { 3455 fuse_write_update_size(inode_out, pos_out + outarg.size); 3456 file_update_time(file_out); 3457 } 3458 3459 fuse_invalidate_attr(inode_out); 3460 3461 err = outarg.size; 3462out: 3463 if (is_unstable) 3464 clear_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state); 3465 3466 inode_unlock(inode_out); 3467 file_accessed(file_in); 3468 3469 fuse_flush_time_update(inode_out); 3470 3471 return err; 3472} 3473 3474static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off, 3475 struct file *dst_file, loff_t dst_off, 3476 size_t len, unsigned int flags) 3477{ 3478 ssize_t ret; 3479 3480 ret = __fuse_copy_file_range(src_file, src_off, dst_file, dst_off, 3481 len, flags); 3482 3483 if (ret == -EOPNOTSUPP || ret == -EXDEV) 3484 ret = generic_copy_file_range(src_file, src_off, dst_file, 3485 dst_off, len, flags); 3486 return ret; 3487} 3488 3489static const struct file_operations fuse_file_operations = { 3490 .llseek = fuse_file_llseek, 3491 .read_iter = fuse_file_read_iter, 3492 .write_iter = fuse_file_write_iter, 3493 .mmap = fuse_file_mmap, 3494 .open = fuse_open, 3495 .flush = fuse_flush, 3496 .release = fuse_release, 3497 .fsync = fuse_fsync, 3498 .lock = fuse_file_lock, 3499 .get_unmapped_area = thp_get_unmapped_area, 3500 .flock = fuse_file_flock, 3501 .splice_read = generic_file_splice_read, 3502 .splice_write = iter_file_splice_write, 3503 .unlocked_ioctl = fuse_file_ioctl, 3504 .compat_ioctl = fuse_file_compat_ioctl, 3505 .poll = fuse_file_poll, 3506 .fallocate = fuse_file_fallocate, 3507 .copy_file_range = fuse_copy_file_range, 3508}; 3509 3510static const struct address_space_operations fuse_file_aops = { 3511 .readpage = fuse_readpage, 3512 .readahead = fuse_readahead, 3513 .writepage = fuse_writepage, 3514 .writepages = fuse_writepages, 3515 .launder_page = fuse_launder_page, 3516 .set_page_dirty = __set_page_dirty_nobuffers, 3517 .bmap = fuse_bmap, 3518 .direct_IO = fuse_direct_IO, 3519 .write_begin = fuse_write_begin, 3520 .write_end = fuse_write_end, 3521}; 3522 3523void fuse_init_file_inode(struct inode *inode) 3524{ 3525 struct fuse_inode *fi = get_fuse_inode(inode); 3526 3527 inode->i_fop = &fuse_file_operations; 3528 inode->i_data.a_ops = &fuse_file_aops; 3529 3530 INIT_LIST_HEAD(&fi->write_files); 3531 INIT_LIST_HEAD(&fi->queued_writes); 3532 fi->writectr = 0; 3533 init_waitqueue_head(&fi->page_waitq); 3534 fi->writepages = RB_ROOT; 3535 3536 if (IS_ENABLED(CONFIG_FUSE_DAX)) 3537 fuse_dax_inode_init(inode); 3538} 3539