1// SPDX-License-Identifier: GPL-2.0 2/* 3 * virtio-fs: Virtio Filesystem 4 * Copyright (C) 2018 Red Hat, Inc. 5 */ 6 7#include <linux/fs.h> 8#include <linux/dax.h> 9#include <linux/pci.h> 10#include <linux/pfn_t.h> 11#include <linux/module.h> 12#include <linux/virtio.h> 13#include <linux/virtio_fs.h> 14#include <linux/delay.h> 15#include <linux/fs_context.h> 16#include <linux/fs_parser.h> 17#include <linux/highmem.h> 18#include <linux/uio.h> 19#include "fuse_i.h" 20 21/* Used to help calculate the FUSE connection's max_pages limit for a request's 22 * size. Parts of the struct fuse_req are sliced into scattergather lists in 23 * addition to the pages used, so this can help account for that overhead. 24 */ 25#define FUSE_HEADER_OVERHEAD 4 26 27/* List of virtio-fs device instances and a lock for the list. Also provides 28 * mutual exclusion in device removal and mounting path 29 */ 30static DEFINE_MUTEX(virtio_fs_mutex); 31static LIST_HEAD(virtio_fs_instances); 32 33enum { 34 VQ_HIPRIO, 35 VQ_REQUEST 36}; 37 38#define VQ_NAME_LEN 24 39 40/* Per-virtqueue state */ 41struct virtio_fs_vq { 42 spinlock_t lock; 43 struct virtqueue *vq; /* protected by ->lock */ 44 struct work_struct done_work; 45 struct list_head queued_reqs; 46 struct list_head end_reqs; /* End these requests */ 47 struct delayed_work dispatch_work; 48 struct fuse_dev *fud; 49 bool connected; 50 long in_flight; 51 struct completion in_flight_zero; /* No inflight requests */ 52 char name[VQ_NAME_LEN]; 53} ____cacheline_aligned_in_smp; 54 55/* A virtio-fs device instance */ 56struct virtio_fs { 57 struct kref refcount; 58 struct list_head list; /* on virtio_fs_instances */ 59 char *tag; 60 struct virtio_fs_vq *vqs; 61 unsigned int nvqs; /* number of virtqueues */ 62 unsigned int num_request_queues; /* number of request queues */ 63 struct dax_device *dax_dev; 64 65 /* DAX memory window where file contents are mapped */ 66 void *window_kaddr; 67 phys_addr_t window_phys_addr; 68 size_t window_len; 69}; 70 71struct virtio_fs_forget_req { 72 struct fuse_in_header ih; 73 struct fuse_forget_in arg; 74}; 75 76struct virtio_fs_forget { 77 /* This request can be temporarily queued on virt queue */ 78 struct list_head list; 79 struct virtio_fs_forget_req req; 80}; 81 82struct virtio_fs_req_work { 83 struct fuse_req *req; 84 struct virtio_fs_vq *fsvq; 85 struct work_struct done_work; 86}; 87 88static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 89 struct fuse_req *req, bool in_flight); 90 91enum { 92 OPT_DAX, 93}; 94 95static const struct fs_parameter_spec virtio_fs_parameters[] = { 96 fsparam_flag("dax", OPT_DAX), 97 {} 98}; 99 100static int virtio_fs_parse_param(struct fs_context *fc, 101 struct fs_parameter *param) 102{ 103 struct fs_parse_result result; 104 struct fuse_fs_context *ctx = fc->fs_private; 105 int opt; 106 107 opt = fs_parse(fc, virtio_fs_parameters, param, &result); 108 if (opt < 0) 109 return opt; 110 111 switch (opt) { 112 case OPT_DAX: 113 ctx->dax = 1; 114 break; 115 default: 116 return -EINVAL; 117 } 118 119 return 0; 120} 121 122static void virtio_fs_free_fc(struct fs_context *fc) 123{ 124 struct fuse_fs_context *ctx = fc->fs_private; 125 126 kfree(ctx); 127} 128 129static inline struct virtio_fs_vq *vq_to_fsvq(struct virtqueue *vq) 130{ 131 struct virtio_fs *fs = vq->vdev->priv; 132 133 return &fs->vqs[vq->index]; 134} 135 136static inline struct fuse_pqueue *vq_to_fpq(struct virtqueue *vq) 137{ 138 return &vq_to_fsvq(vq)->fud->pq; 139} 140 141/* Should be called with fsvq->lock held. */ 142static inline void inc_in_flight_req(struct virtio_fs_vq *fsvq) 143{ 144 fsvq->in_flight++; 145} 146 147/* Should be called with fsvq->lock held. */ 148static inline void dec_in_flight_req(struct virtio_fs_vq *fsvq) 149{ 150 WARN_ON(fsvq->in_flight <= 0); 151 fsvq->in_flight--; 152 if (!fsvq->in_flight) 153 complete(&fsvq->in_flight_zero); 154} 155 156static void release_virtio_fs_obj(struct kref *ref) 157{ 158 struct virtio_fs *vfs = container_of(ref, struct virtio_fs, refcount); 159 160 kfree(vfs->vqs); 161 kfree(vfs); 162} 163 164/* Make sure virtiofs_mutex is held */ 165static void virtio_fs_put(struct virtio_fs *fs) 166{ 167 kref_put(&fs->refcount, release_virtio_fs_obj); 168} 169 170static void virtio_fs_fiq_release(struct fuse_iqueue *fiq) 171{ 172 struct virtio_fs *vfs = fiq->priv; 173 174 mutex_lock(&virtio_fs_mutex); 175 virtio_fs_put(vfs); 176 mutex_unlock(&virtio_fs_mutex); 177} 178 179static void virtio_fs_drain_queue(struct virtio_fs_vq *fsvq) 180{ 181 WARN_ON(fsvq->in_flight < 0); 182 183 /* Wait for in flight requests to finish.*/ 184 spin_lock(&fsvq->lock); 185 if (fsvq->in_flight) { 186 /* We are holding virtio_fs_mutex. There should not be any 187 * waiters waiting for completion. 188 */ 189 reinit_completion(&fsvq->in_flight_zero); 190 spin_unlock(&fsvq->lock); 191 wait_for_completion(&fsvq->in_flight_zero); 192 } else { 193 spin_unlock(&fsvq->lock); 194 } 195 196 flush_work(&fsvq->done_work); 197 flush_delayed_work(&fsvq->dispatch_work); 198} 199 200static void virtio_fs_drain_all_queues_locked(struct virtio_fs *fs) 201{ 202 struct virtio_fs_vq *fsvq; 203 int i; 204 205 for (i = 0; i < fs->nvqs; i++) { 206 fsvq = &fs->vqs[i]; 207 virtio_fs_drain_queue(fsvq); 208 } 209} 210 211static void virtio_fs_drain_all_queues(struct virtio_fs *fs) 212{ 213 /* Provides mutual exclusion between ->remove and ->kill_sb 214 * paths. We don't want both of these draining queue at the 215 * same time. Current completion logic reinits completion 216 * and that means there should not be any other thread 217 * doing reinit or waiting for completion already. 218 */ 219 mutex_lock(&virtio_fs_mutex); 220 virtio_fs_drain_all_queues_locked(fs); 221 mutex_unlock(&virtio_fs_mutex); 222} 223 224static void virtio_fs_start_all_queues(struct virtio_fs *fs) 225{ 226 struct virtio_fs_vq *fsvq; 227 int i; 228 229 for (i = 0; i < fs->nvqs; i++) { 230 fsvq = &fs->vqs[i]; 231 spin_lock(&fsvq->lock); 232 fsvq->connected = true; 233 spin_unlock(&fsvq->lock); 234 } 235} 236 237/* Add a new instance to the list or return -EEXIST if tag name exists*/ 238static int virtio_fs_add_instance(struct virtio_fs *fs) 239{ 240 struct virtio_fs *fs2; 241 bool duplicate = false; 242 243 mutex_lock(&virtio_fs_mutex); 244 245 list_for_each_entry(fs2, &virtio_fs_instances, list) { 246 if (strcmp(fs->tag, fs2->tag) == 0) 247 duplicate = true; 248 } 249 250 if (!duplicate) 251 list_add_tail(&fs->list, &virtio_fs_instances); 252 253 mutex_unlock(&virtio_fs_mutex); 254 255 if (duplicate) 256 return -EEXIST; 257 return 0; 258} 259 260/* Return the virtio_fs with a given tag, or NULL */ 261static struct virtio_fs *virtio_fs_find_instance(const char *tag) 262{ 263 struct virtio_fs *fs; 264 265 mutex_lock(&virtio_fs_mutex); 266 267 list_for_each_entry(fs, &virtio_fs_instances, list) { 268 if (strcmp(fs->tag, tag) == 0) { 269 kref_get(&fs->refcount); 270 goto found; 271 } 272 } 273 274 fs = NULL; /* not found */ 275 276found: 277 mutex_unlock(&virtio_fs_mutex); 278 279 return fs; 280} 281 282static void virtio_fs_free_devs(struct virtio_fs *fs) 283{ 284 unsigned int i; 285 286 for (i = 0; i < fs->nvqs; i++) { 287 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 288 289 if (!fsvq->fud) 290 continue; 291 292 fuse_dev_free(fsvq->fud); 293 fsvq->fud = NULL; 294 } 295} 296 297/* Read filesystem name from virtio config into fs->tag (must kfree()). */ 298static int virtio_fs_read_tag(struct virtio_device *vdev, struct virtio_fs *fs) 299{ 300 char tag_buf[sizeof_field(struct virtio_fs_config, tag)]; 301 char *end; 302 size_t len; 303 304 virtio_cread_bytes(vdev, offsetof(struct virtio_fs_config, tag), 305 &tag_buf, sizeof(tag_buf)); 306 end = memchr(tag_buf, '\0', sizeof(tag_buf)); 307 if (end == tag_buf) 308 return -EINVAL; /* empty tag */ 309 if (!end) 310 end = &tag_buf[sizeof(tag_buf)]; 311 312 len = end - tag_buf; 313 fs->tag = devm_kmalloc(&vdev->dev, len + 1, GFP_KERNEL); 314 if (!fs->tag) 315 return -ENOMEM; 316 memcpy(fs->tag, tag_buf, len); 317 fs->tag[len] = '\0'; 318 return 0; 319} 320 321/* Work function for hiprio completion */ 322static void virtio_fs_hiprio_done_work(struct work_struct *work) 323{ 324 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 325 done_work); 326 struct virtqueue *vq = fsvq->vq; 327 328 /* Free completed FUSE_FORGET requests */ 329 spin_lock(&fsvq->lock); 330 do { 331 unsigned int len; 332 void *req; 333 334 virtqueue_disable_cb(vq); 335 336 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 337 kfree(req); 338 dec_in_flight_req(fsvq); 339 } 340 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 341 spin_unlock(&fsvq->lock); 342} 343 344static void virtio_fs_request_dispatch_work(struct work_struct *work) 345{ 346 struct fuse_req *req; 347 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 348 dispatch_work.work); 349 int ret; 350 351 pr_debug("virtio-fs: worker %s called.\n", __func__); 352 while (1) { 353 spin_lock(&fsvq->lock); 354 req = list_first_entry_or_null(&fsvq->end_reqs, struct fuse_req, 355 list); 356 if (!req) { 357 spin_unlock(&fsvq->lock); 358 break; 359 } 360 361 list_del_init(&req->list); 362 spin_unlock(&fsvq->lock); 363 fuse_request_end(req); 364 } 365 366 /* Dispatch pending requests */ 367 while (1) { 368 spin_lock(&fsvq->lock); 369 req = list_first_entry_or_null(&fsvq->queued_reqs, 370 struct fuse_req, list); 371 if (!req) { 372 spin_unlock(&fsvq->lock); 373 return; 374 } 375 list_del_init(&req->list); 376 spin_unlock(&fsvq->lock); 377 378 ret = virtio_fs_enqueue_req(fsvq, req, true); 379 if (ret < 0) { 380 if (ret == -ENOMEM || ret == -ENOSPC) { 381 spin_lock(&fsvq->lock); 382 list_add_tail(&req->list, &fsvq->queued_reqs); 383 schedule_delayed_work(&fsvq->dispatch_work, 384 msecs_to_jiffies(1)); 385 spin_unlock(&fsvq->lock); 386 return; 387 } 388 req->out.h.error = ret; 389 spin_lock(&fsvq->lock); 390 dec_in_flight_req(fsvq); 391 spin_unlock(&fsvq->lock); 392 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", 393 ret); 394 fuse_request_end(req); 395 } 396 } 397} 398 399/* 400 * Returns 1 if queue is full and sender should wait a bit before sending 401 * next request, 0 otherwise. 402 */ 403static int send_forget_request(struct virtio_fs_vq *fsvq, 404 struct virtio_fs_forget *forget, 405 bool in_flight) 406{ 407 struct scatterlist sg; 408 struct virtqueue *vq; 409 int ret = 0; 410 bool notify; 411 struct virtio_fs_forget_req *req = &forget->req; 412 413 spin_lock(&fsvq->lock); 414 if (!fsvq->connected) { 415 if (in_flight) 416 dec_in_flight_req(fsvq); 417 kfree(forget); 418 goto out; 419 } 420 421 sg_init_one(&sg, req, sizeof(*req)); 422 vq = fsvq->vq; 423 dev_dbg(&vq->vdev->dev, "%s\n", __func__); 424 425 ret = virtqueue_add_outbuf(vq, &sg, 1, forget, GFP_ATOMIC); 426 if (ret < 0) { 427 if (ret == -ENOMEM || ret == -ENOSPC) { 428 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Will try later\n", 429 ret); 430 list_add_tail(&forget->list, &fsvq->queued_reqs); 431 schedule_delayed_work(&fsvq->dispatch_work, 432 msecs_to_jiffies(1)); 433 if (!in_flight) 434 inc_in_flight_req(fsvq); 435 /* Queue is full */ 436 ret = 1; 437 } else { 438 pr_debug("virtio-fs: Could not queue FORGET: err=%d. Dropping it.\n", 439 ret); 440 kfree(forget); 441 if (in_flight) 442 dec_in_flight_req(fsvq); 443 } 444 goto out; 445 } 446 447 if (!in_flight) 448 inc_in_flight_req(fsvq); 449 notify = virtqueue_kick_prepare(vq); 450 spin_unlock(&fsvq->lock); 451 452 if (notify) 453 virtqueue_notify(vq); 454 return ret; 455out: 456 spin_unlock(&fsvq->lock); 457 return ret; 458} 459 460static void virtio_fs_hiprio_dispatch_work(struct work_struct *work) 461{ 462 struct virtio_fs_forget *forget; 463 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 464 dispatch_work.work); 465 pr_debug("virtio-fs: worker %s called.\n", __func__); 466 while (1) { 467 spin_lock(&fsvq->lock); 468 forget = list_first_entry_or_null(&fsvq->queued_reqs, 469 struct virtio_fs_forget, list); 470 if (!forget) { 471 spin_unlock(&fsvq->lock); 472 return; 473 } 474 475 list_del(&forget->list); 476 spin_unlock(&fsvq->lock); 477 if (send_forget_request(fsvq, forget, true)) 478 return; 479 } 480} 481 482/* Allocate and copy args into req->argbuf */ 483static int copy_args_to_argbuf(struct fuse_req *req) 484{ 485 struct fuse_args *args = req->args; 486 unsigned int offset = 0; 487 unsigned int num_in; 488 unsigned int num_out; 489 unsigned int len; 490 unsigned int i; 491 492 num_in = args->in_numargs - args->in_pages; 493 num_out = args->out_numargs - args->out_pages; 494 len = fuse_len_args(num_in, (struct fuse_arg *) args->in_args) + 495 fuse_len_args(num_out, args->out_args); 496 497 req->argbuf = kmalloc(len, GFP_ATOMIC); 498 if (!req->argbuf) 499 return -ENOMEM; 500 501 for (i = 0; i < num_in; i++) { 502 memcpy(req->argbuf + offset, 503 args->in_args[i].value, 504 args->in_args[i].size); 505 offset += args->in_args[i].size; 506 } 507 508 return 0; 509} 510 511/* Copy args out of and free req->argbuf */ 512static void copy_args_from_argbuf(struct fuse_args *args, struct fuse_req *req) 513{ 514 unsigned int remaining; 515 unsigned int offset; 516 unsigned int num_in; 517 unsigned int num_out; 518 unsigned int i; 519 520 remaining = req->out.h.len - sizeof(req->out.h); 521 num_in = args->in_numargs - args->in_pages; 522 num_out = args->out_numargs - args->out_pages; 523 offset = fuse_len_args(num_in, (struct fuse_arg *)args->in_args); 524 525 for (i = 0; i < num_out; i++) { 526 unsigned int argsize = args->out_args[i].size; 527 528 if (args->out_argvar && 529 i == args->out_numargs - 1 && 530 argsize > remaining) { 531 argsize = remaining; 532 } 533 534 memcpy(args->out_args[i].value, req->argbuf + offset, argsize); 535 offset += argsize; 536 537 if (i != args->out_numargs - 1) 538 remaining -= argsize; 539 } 540 541 /* Store the actual size of the variable-length arg */ 542 if (args->out_argvar) 543 args->out_args[args->out_numargs - 1].size = remaining; 544 545 kfree(req->argbuf); 546 req->argbuf = NULL; 547} 548 549/* Work function for request completion */ 550static void virtio_fs_request_complete(struct fuse_req *req, 551 struct virtio_fs_vq *fsvq) 552{ 553 struct fuse_pqueue *fpq = &fsvq->fud->pq; 554 struct fuse_args *args; 555 struct fuse_args_pages *ap; 556 unsigned int len, i, thislen; 557 struct page *page; 558 559 /* 560 * TODO verify that server properly follows FUSE protocol 561 * (oh.uniq, oh.len) 562 */ 563 args = req->args; 564 copy_args_from_argbuf(args, req); 565 566 if (args->out_pages && args->page_zeroing) { 567 len = args->out_args[args->out_numargs - 1].size; 568 ap = container_of(args, typeof(*ap), args); 569 for (i = 0; i < ap->num_pages; i++) { 570 thislen = ap->descs[i].length; 571 if (len < thislen) { 572 WARN_ON(ap->descs[i].offset); 573 page = ap->pages[i]; 574 zero_user_segment(page, len, thislen); 575 len = 0; 576 } else { 577 len -= thislen; 578 } 579 } 580 } 581 582 spin_lock(&fpq->lock); 583 clear_bit(FR_SENT, &req->flags); 584 spin_unlock(&fpq->lock); 585 586 fuse_request_end(req); 587 spin_lock(&fsvq->lock); 588 dec_in_flight_req(fsvq); 589 spin_unlock(&fsvq->lock); 590} 591 592static void virtio_fs_complete_req_work(struct work_struct *work) 593{ 594 struct virtio_fs_req_work *w = 595 container_of(work, typeof(*w), done_work); 596 597 virtio_fs_request_complete(w->req, w->fsvq); 598 kfree(w); 599} 600 601static void virtio_fs_requests_done_work(struct work_struct *work) 602{ 603 struct virtio_fs_vq *fsvq = container_of(work, struct virtio_fs_vq, 604 done_work); 605 struct fuse_pqueue *fpq = &fsvq->fud->pq; 606 struct virtqueue *vq = fsvq->vq; 607 struct fuse_req *req; 608 struct fuse_req *next; 609 unsigned int len; 610 LIST_HEAD(reqs); 611 612 /* Collect completed requests off the virtqueue */ 613 spin_lock(&fsvq->lock); 614 do { 615 virtqueue_disable_cb(vq); 616 617 while ((req = virtqueue_get_buf(vq, &len)) != NULL) { 618 spin_lock(&fpq->lock); 619 list_move_tail(&req->list, &reqs); 620 spin_unlock(&fpq->lock); 621 } 622 } while (!virtqueue_enable_cb(vq) && likely(!virtqueue_is_broken(vq))); 623 spin_unlock(&fsvq->lock); 624 625 /* End requests */ 626 list_for_each_entry_safe(req, next, &reqs, list) { 627 list_del_init(&req->list); 628 629 /* blocking async request completes in a worker context */ 630 if (req->args->may_block) { 631 struct virtio_fs_req_work *w; 632 633 w = kzalloc(sizeof(*w), GFP_NOFS | __GFP_NOFAIL); 634 INIT_WORK(&w->done_work, virtio_fs_complete_req_work); 635 w->fsvq = fsvq; 636 w->req = req; 637 schedule_work(&w->done_work); 638 } else { 639 virtio_fs_request_complete(req, fsvq); 640 } 641 } 642} 643 644/* Virtqueue interrupt handler */ 645static void virtio_fs_vq_done(struct virtqueue *vq) 646{ 647 struct virtio_fs_vq *fsvq = vq_to_fsvq(vq); 648 649 dev_dbg(&vq->vdev->dev, "%s %s\n", __func__, fsvq->name); 650 651 schedule_work(&fsvq->done_work); 652} 653 654static void virtio_fs_init_vq(struct virtio_fs_vq *fsvq, char *name, 655 int vq_type) 656{ 657 strncpy(fsvq->name, name, VQ_NAME_LEN); 658 spin_lock_init(&fsvq->lock); 659 INIT_LIST_HEAD(&fsvq->queued_reqs); 660 INIT_LIST_HEAD(&fsvq->end_reqs); 661 init_completion(&fsvq->in_flight_zero); 662 663 if (vq_type == VQ_REQUEST) { 664 INIT_WORK(&fsvq->done_work, virtio_fs_requests_done_work); 665 INIT_DELAYED_WORK(&fsvq->dispatch_work, 666 virtio_fs_request_dispatch_work); 667 } else { 668 INIT_WORK(&fsvq->done_work, virtio_fs_hiprio_done_work); 669 INIT_DELAYED_WORK(&fsvq->dispatch_work, 670 virtio_fs_hiprio_dispatch_work); 671 } 672} 673 674/* Initialize virtqueues */ 675static int virtio_fs_setup_vqs(struct virtio_device *vdev, 676 struct virtio_fs *fs) 677{ 678 struct virtqueue **vqs; 679 vq_callback_t **callbacks; 680 const char **names; 681 unsigned int i; 682 int ret = 0; 683 684 virtio_cread_le(vdev, struct virtio_fs_config, num_request_queues, 685 &fs->num_request_queues); 686 if (fs->num_request_queues == 0) 687 return -EINVAL; 688 689 fs->nvqs = VQ_REQUEST + fs->num_request_queues; 690 fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); 691 if (!fs->vqs) 692 return -ENOMEM; 693 694 vqs = kmalloc_array(fs->nvqs, sizeof(vqs[VQ_HIPRIO]), GFP_KERNEL); 695 callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), 696 GFP_KERNEL); 697 names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); 698 if (!vqs || !callbacks || !names) { 699 ret = -ENOMEM; 700 goto out; 701 } 702 703 /* Initialize the hiprio/forget request virtqueue */ 704 callbacks[VQ_HIPRIO] = virtio_fs_vq_done; 705 virtio_fs_init_vq(&fs->vqs[VQ_HIPRIO], "hiprio", VQ_HIPRIO); 706 names[VQ_HIPRIO] = fs->vqs[VQ_HIPRIO].name; 707 708 /* Initialize the requests virtqueues */ 709 for (i = VQ_REQUEST; i < fs->nvqs; i++) { 710 char vq_name[VQ_NAME_LEN]; 711 712 snprintf(vq_name, VQ_NAME_LEN, "requests.%u", i - VQ_REQUEST); 713 virtio_fs_init_vq(&fs->vqs[i], vq_name, VQ_REQUEST); 714 callbacks[i] = virtio_fs_vq_done; 715 names[i] = fs->vqs[i].name; 716 } 717 718 ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); 719 if (ret < 0) 720 goto out; 721 722 for (i = 0; i < fs->nvqs; i++) 723 fs->vqs[i].vq = vqs[i]; 724 725 virtio_fs_start_all_queues(fs); 726out: 727 kfree(names); 728 kfree(callbacks); 729 kfree(vqs); 730 if (ret) 731 kfree(fs->vqs); 732 return ret; 733} 734 735/* Free virtqueues (device must already be reset) */ 736static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, 737 struct virtio_fs *fs) 738{ 739 vdev->config->del_vqs(vdev); 740} 741 742/* Map a window offset to a page frame number. The window offset will have 743 * been produced by .iomap_begin(), which maps a file offset to a window 744 * offset. 745 */ 746static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, 747 long nr_pages, void **kaddr, pfn_t *pfn) 748{ 749 struct virtio_fs *fs = dax_get_private(dax_dev); 750 phys_addr_t offset = PFN_PHYS(pgoff); 751 size_t max_nr_pages = fs->window_len/PAGE_SIZE - pgoff; 752 753 if (kaddr) 754 *kaddr = fs->window_kaddr + offset; 755 if (pfn) 756 *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 757 PFN_DEV | PFN_MAP); 758 return nr_pages > max_nr_pages ? max_nr_pages : nr_pages; 759} 760 761static size_t virtio_fs_copy_from_iter(struct dax_device *dax_dev, 762 pgoff_t pgoff, void *addr, 763 size_t bytes, struct iov_iter *i) 764{ 765 return copy_from_iter(addr, bytes, i); 766} 767 768static size_t virtio_fs_copy_to_iter(struct dax_device *dax_dev, 769 pgoff_t pgoff, void *addr, 770 size_t bytes, struct iov_iter *i) 771{ 772 return copy_to_iter(addr, bytes, i); 773} 774 775static int virtio_fs_zero_page_range(struct dax_device *dax_dev, 776 pgoff_t pgoff, size_t nr_pages) 777{ 778 long rc; 779 void *kaddr; 780 781 rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); 782 if (rc < 0) 783 return rc; 784 memset(kaddr, 0, nr_pages << PAGE_SHIFT); 785 dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); 786 return 0; 787} 788 789static const struct dax_operations virtio_fs_dax_ops = { 790 .direct_access = virtio_fs_direct_access, 791 .copy_from_iter = virtio_fs_copy_from_iter, 792 .copy_to_iter = virtio_fs_copy_to_iter, 793 .zero_page_range = virtio_fs_zero_page_range, 794}; 795 796static void virtio_fs_cleanup_dax(void *data) 797{ 798 struct dax_device *dax_dev = data; 799 800 kill_dax(dax_dev); 801 put_dax(dax_dev); 802} 803 804static int virtio_fs_setup_dax(struct virtio_device *vdev, struct virtio_fs *fs) 805{ 806 struct virtio_shm_region cache_reg; 807 struct dev_pagemap *pgmap; 808 bool have_cache; 809 810 if (!IS_ENABLED(CONFIG_FUSE_DAX)) 811 return 0; 812 813 /* Get cache region */ 814 have_cache = virtio_get_shm_region(vdev, &cache_reg, 815 (u8)VIRTIO_FS_SHMCAP_ID_CACHE); 816 if (!have_cache) { 817 dev_notice(&vdev->dev, "%s: No cache capability\n", __func__); 818 return 0; 819 } 820 821 if (!devm_request_mem_region(&vdev->dev, cache_reg.addr, cache_reg.len, 822 dev_name(&vdev->dev))) { 823 dev_warn(&vdev->dev, "could not reserve region addr=0x%llx len=0x%llx\n", 824 cache_reg.addr, cache_reg.len); 825 return -EBUSY; 826 } 827 828 dev_notice(&vdev->dev, "Cache len: 0x%llx @ 0x%llx\n", cache_reg.len, 829 cache_reg.addr); 830 831 pgmap = devm_kzalloc(&vdev->dev, sizeof(*pgmap), GFP_KERNEL); 832 if (!pgmap) 833 return -ENOMEM; 834 835 pgmap->type = MEMORY_DEVICE_FS_DAX; 836 837 /* Ideally we would directly use the PCI BAR resource but 838 * devm_memremap_pages() wants its own copy in pgmap. So 839 * initialize a struct resource from scratch (only the start 840 * and end fields will be used). 841 */ 842 pgmap->range = (struct range) { 843 .start = (phys_addr_t) cache_reg.addr, 844 .end = (phys_addr_t) cache_reg.addr + cache_reg.len - 1, 845 }; 846 pgmap->nr_range = 1; 847 848 fs->window_kaddr = devm_memremap_pages(&vdev->dev, pgmap); 849 if (IS_ERR(fs->window_kaddr)) 850 return PTR_ERR(fs->window_kaddr); 851 852 fs->window_phys_addr = (phys_addr_t) cache_reg.addr; 853 fs->window_len = (phys_addr_t) cache_reg.len; 854 855 dev_dbg(&vdev->dev, "%s: window kaddr 0x%px phys_addr 0x%llx len 0x%llx\n", 856 __func__, fs->window_kaddr, cache_reg.addr, cache_reg.len); 857 858 fs->dax_dev = alloc_dax(fs, NULL, &virtio_fs_dax_ops, 0); 859 if (IS_ERR(fs->dax_dev)) 860 return PTR_ERR(fs->dax_dev); 861 862 return devm_add_action_or_reset(&vdev->dev, virtio_fs_cleanup_dax, 863 fs->dax_dev); 864} 865 866static int virtio_fs_probe(struct virtio_device *vdev) 867{ 868 struct virtio_fs *fs; 869 int ret; 870 871 fs = kzalloc(sizeof(*fs), GFP_KERNEL); 872 if (!fs) 873 return -ENOMEM; 874 kref_init(&fs->refcount); 875 vdev->priv = fs; 876 877 ret = virtio_fs_read_tag(vdev, fs); 878 if (ret < 0) 879 goto out; 880 881 ret = virtio_fs_setup_vqs(vdev, fs); 882 if (ret < 0) 883 goto out; 884 885 /* TODO vq affinity */ 886 887 ret = virtio_fs_setup_dax(vdev, fs); 888 if (ret < 0) 889 goto out_vqs; 890 891 /* Bring the device online in case the filesystem is mounted and 892 * requests need to be sent before we return. 893 */ 894 virtio_device_ready(vdev); 895 896 ret = virtio_fs_add_instance(fs); 897 if (ret < 0) 898 goto out_vqs; 899 900 return 0; 901 902out_vqs: 903 vdev->config->reset(vdev); 904 virtio_fs_cleanup_vqs(vdev, fs); 905 kfree(fs->vqs); 906 907out: 908 vdev->priv = NULL; 909 kfree(fs); 910 return ret; 911} 912 913static void virtio_fs_stop_all_queues(struct virtio_fs *fs) 914{ 915 struct virtio_fs_vq *fsvq; 916 int i; 917 918 for (i = 0; i < fs->nvqs; i++) { 919 fsvq = &fs->vqs[i]; 920 spin_lock(&fsvq->lock); 921 fsvq->connected = false; 922 spin_unlock(&fsvq->lock); 923 } 924} 925 926static void virtio_fs_remove(struct virtio_device *vdev) 927{ 928 struct virtio_fs *fs = vdev->priv; 929 930 mutex_lock(&virtio_fs_mutex); 931 /* This device is going away. No one should get new reference */ 932 list_del_init(&fs->list); 933 virtio_fs_stop_all_queues(fs); 934 virtio_fs_drain_all_queues_locked(fs); 935 vdev->config->reset(vdev); 936 virtio_fs_cleanup_vqs(vdev, fs); 937 938 vdev->priv = NULL; 939 /* Put device reference on virtio_fs object */ 940 virtio_fs_put(fs); 941 mutex_unlock(&virtio_fs_mutex); 942} 943 944#ifdef CONFIG_PM_SLEEP 945static int virtio_fs_freeze(struct virtio_device *vdev) 946{ 947 /* TODO need to save state here */ 948 pr_warn("virtio-fs: suspend/resume not yet supported\n"); 949 return -EOPNOTSUPP; 950} 951 952static int virtio_fs_restore(struct virtio_device *vdev) 953{ 954 /* TODO need to restore state here */ 955 return 0; 956} 957#endif /* CONFIG_PM_SLEEP */ 958 959static const struct virtio_device_id id_table[] = { 960 { VIRTIO_ID_FS, VIRTIO_DEV_ANY_ID }, 961 {}, 962}; 963 964static const unsigned int feature_table[] = {}; 965 966static struct virtio_driver virtio_fs_driver = { 967 .driver.name = KBUILD_MODNAME, 968 .driver.owner = THIS_MODULE, 969 .id_table = id_table, 970 .feature_table = feature_table, 971 .feature_table_size = ARRAY_SIZE(feature_table), 972 .probe = virtio_fs_probe, 973 .remove = virtio_fs_remove, 974#ifdef CONFIG_PM_SLEEP 975 .freeze = virtio_fs_freeze, 976 .restore = virtio_fs_restore, 977#endif 978}; 979 980static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) 981__releases(fiq->lock) 982{ 983 struct fuse_forget_link *link; 984 struct virtio_fs_forget *forget; 985 struct virtio_fs_forget_req *req; 986 struct virtio_fs *fs; 987 struct virtio_fs_vq *fsvq; 988 u64 unique; 989 990 link = fuse_dequeue_forget(fiq, 1, NULL); 991 unique = fuse_get_unique(fiq); 992 993 fs = fiq->priv; 994 fsvq = &fs->vqs[VQ_HIPRIO]; 995 spin_unlock(&fiq->lock); 996 997 /* Allocate a buffer for the request */ 998 forget = kmalloc(sizeof(*forget), GFP_NOFS | __GFP_NOFAIL); 999 req = &forget->req; 1000 1001 req->ih = (struct fuse_in_header){ 1002 .opcode = FUSE_FORGET, 1003 .nodeid = link->forget_one.nodeid, 1004 .unique = unique, 1005 .len = sizeof(*req), 1006 }; 1007 req->arg = (struct fuse_forget_in){ 1008 .nlookup = link->forget_one.nlookup, 1009 }; 1010 1011 send_forget_request(fsvq, forget, false); 1012 kfree(link); 1013} 1014 1015static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) 1016__releases(fiq->lock) 1017{ 1018 /* 1019 * TODO interrupts. 1020 * 1021 * Normal fs operations on a local filesystems aren't interruptible. 1022 * Exceptions are blocking lock operations; for example fcntl(F_SETLKW) 1023 * with shared lock between host and guest. 1024 */ 1025 spin_unlock(&fiq->lock); 1026} 1027 1028/* Count number of scatter-gather elements required */ 1029static unsigned int sg_count_fuse_pages(struct fuse_page_desc *page_descs, 1030 unsigned int num_pages, 1031 unsigned int total_len) 1032{ 1033 unsigned int i; 1034 unsigned int this_len; 1035 1036 for (i = 0; i < num_pages && total_len; i++) { 1037 this_len = min(page_descs[i].length, total_len); 1038 total_len -= this_len; 1039 } 1040 1041 return i; 1042} 1043 1044/* Return the number of scatter-gather list elements required */ 1045static unsigned int sg_count_fuse_req(struct fuse_req *req) 1046{ 1047 struct fuse_args *args = req->args; 1048 struct fuse_args_pages *ap = container_of(args, typeof(*ap), args); 1049 unsigned int size, total_sgs = 1 /* fuse_in_header */; 1050 1051 if (args->in_numargs - args->in_pages) 1052 total_sgs += 1; 1053 1054 if (args->in_pages) { 1055 size = args->in_args[args->in_numargs - 1].size; 1056 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1057 size); 1058 } 1059 1060 if (!test_bit(FR_ISREPLY, &req->flags)) 1061 return total_sgs; 1062 1063 total_sgs += 1 /* fuse_out_header */; 1064 1065 if (args->out_numargs - args->out_pages) 1066 total_sgs += 1; 1067 1068 if (args->out_pages) { 1069 size = args->out_args[args->out_numargs - 1].size; 1070 total_sgs += sg_count_fuse_pages(ap->descs, ap->num_pages, 1071 size); 1072 } 1073 1074 return total_sgs; 1075} 1076 1077/* Add pages to scatter-gather list and return number of elements used */ 1078static unsigned int sg_init_fuse_pages(struct scatterlist *sg, 1079 struct page **pages, 1080 struct fuse_page_desc *page_descs, 1081 unsigned int num_pages, 1082 unsigned int total_len) 1083{ 1084 unsigned int i; 1085 unsigned int this_len; 1086 1087 for (i = 0; i < num_pages && total_len; i++) { 1088 sg_init_table(&sg[i], 1); 1089 this_len = min(page_descs[i].length, total_len); 1090 sg_set_page(&sg[i], pages[i], this_len, page_descs[i].offset); 1091 total_len -= this_len; 1092 } 1093 1094 return i; 1095} 1096 1097/* Add args to scatter-gather list and return number of elements used */ 1098static unsigned int sg_init_fuse_args(struct scatterlist *sg, 1099 struct fuse_req *req, 1100 struct fuse_arg *args, 1101 unsigned int numargs, 1102 bool argpages, 1103 void *argbuf, 1104 unsigned int *len_used) 1105{ 1106 struct fuse_args_pages *ap = container_of(req->args, typeof(*ap), args); 1107 unsigned int total_sgs = 0; 1108 unsigned int len; 1109 1110 len = fuse_len_args(numargs - argpages, args); 1111 if (len) 1112 sg_init_one(&sg[total_sgs++], argbuf, len); 1113 1114 if (argpages) 1115 total_sgs += sg_init_fuse_pages(&sg[total_sgs], 1116 ap->pages, ap->descs, 1117 ap->num_pages, 1118 args[numargs - 1].size); 1119 1120 if (len_used) 1121 *len_used = len; 1122 1123 return total_sgs; 1124} 1125 1126/* Add a request to a virtqueue and kick the device */ 1127static int virtio_fs_enqueue_req(struct virtio_fs_vq *fsvq, 1128 struct fuse_req *req, bool in_flight) 1129{ 1130 /* requests need at least 4 elements */ 1131 struct scatterlist *stack_sgs[6]; 1132 struct scatterlist stack_sg[ARRAY_SIZE(stack_sgs)]; 1133 struct scatterlist **sgs = stack_sgs; 1134 struct scatterlist *sg = stack_sg; 1135 struct virtqueue *vq; 1136 struct fuse_args *args = req->args; 1137 unsigned int argbuf_used = 0; 1138 unsigned int out_sgs = 0; 1139 unsigned int in_sgs = 0; 1140 unsigned int total_sgs; 1141 unsigned int i; 1142 int ret; 1143 bool notify; 1144 struct fuse_pqueue *fpq; 1145 1146 /* Does the sglist fit on the stack? */ 1147 total_sgs = sg_count_fuse_req(req); 1148 if (total_sgs > ARRAY_SIZE(stack_sgs)) { 1149 sgs = kmalloc_array(total_sgs, sizeof(sgs[0]), GFP_ATOMIC); 1150 sg = kmalloc_array(total_sgs, sizeof(sg[0]), GFP_ATOMIC); 1151 if (!sgs || !sg) { 1152 ret = -ENOMEM; 1153 goto out; 1154 } 1155 } 1156 1157 /* Use a bounce buffer since stack args cannot be mapped */ 1158 ret = copy_args_to_argbuf(req); 1159 if (ret < 0) 1160 goto out; 1161 1162 /* Request elements */ 1163 sg_init_one(&sg[out_sgs++], &req->in.h, sizeof(req->in.h)); 1164 out_sgs += sg_init_fuse_args(&sg[out_sgs], req, 1165 (struct fuse_arg *)args->in_args, 1166 args->in_numargs, args->in_pages, 1167 req->argbuf, &argbuf_used); 1168 1169 /* Reply elements */ 1170 if (test_bit(FR_ISREPLY, &req->flags)) { 1171 sg_init_one(&sg[out_sgs + in_sgs++], 1172 &req->out.h, sizeof(req->out.h)); 1173 in_sgs += sg_init_fuse_args(&sg[out_sgs + in_sgs], req, 1174 args->out_args, args->out_numargs, 1175 args->out_pages, 1176 req->argbuf + argbuf_used, NULL); 1177 } 1178 1179 WARN_ON(out_sgs + in_sgs != total_sgs); 1180 1181 for (i = 0; i < total_sgs; i++) 1182 sgs[i] = &sg[i]; 1183 1184 spin_lock(&fsvq->lock); 1185 1186 if (!fsvq->connected) { 1187 spin_unlock(&fsvq->lock); 1188 ret = -ENOTCONN; 1189 goto out; 1190 } 1191 1192 vq = fsvq->vq; 1193 ret = virtqueue_add_sgs(vq, sgs, out_sgs, in_sgs, req, GFP_ATOMIC); 1194 if (ret < 0) { 1195 spin_unlock(&fsvq->lock); 1196 goto out; 1197 } 1198 1199 /* Request successfully sent. */ 1200 fpq = &fsvq->fud->pq; 1201 spin_lock(&fpq->lock); 1202 list_add_tail(&req->list, fpq->processing); 1203 spin_unlock(&fpq->lock); 1204 set_bit(FR_SENT, &req->flags); 1205 /* matches barrier in request_wait_answer() */ 1206 smp_mb__after_atomic(); 1207 1208 if (!in_flight) 1209 inc_in_flight_req(fsvq); 1210 notify = virtqueue_kick_prepare(vq); 1211 1212 spin_unlock(&fsvq->lock); 1213 1214 if (notify) 1215 virtqueue_notify(vq); 1216 1217out: 1218 if (ret < 0 && req->argbuf) { 1219 kfree(req->argbuf); 1220 req->argbuf = NULL; 1221 } 1222 if (sgs != stack_sgs) { 1223 kfree(sgs); 1224 kfree(sg); 1225 } 1226 1227 return ret; 1228} 1229 1230static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) 1231__releases(fiq->lock) 1232{ 1233 unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ 1234 struct virtio_fs *fs; 1235 struct fuse_req *req; 1236 struct virtio_fs_vq *fsvq; 1237 int ret; 1238 1239 WARN_ON(list_empty(&fiq->pending)); 1240 req = list_last_entry(&fiq->pending, struct fuse_req, list); 1241 clear_bit(FR_PENDING, &req->flags); 1242 list_del_init(&req->list); 1243 WARN_ON(!list_empty(&fiq->pending)); 1244 spin_unlock(&fiq->lock); 1245 1246 fs = fiq->priv; 1247 1248 pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", 1249 __func__, req->in.h.opcode, req->in.h.unique, 1250 req->in.h.nodeid, req->in.h.len, 1251 fuse_len_args(req->args->out_numargs, req->args->out_args)); 1252 1253 fsvq = &fs->vqs[queue_id]; 1254 ret = virtio_fs_enqueue_req(fsvq, req, false); 1255 if (ret < 0) { 1256 if (ret == -ENOMEM || ret == -ENOSPC) { 1257 /* 1258 * Virtqueue full. Retry submission from worker 1259 * context as we might be holding fc->bg_lock. 1260 */ 1261 spin_lock(&fsvq->lock); 1262 list_add_tail(&req->list, &fsvq->queued_reqs); 1263 inc_in_flight_req(fsvq); 1264 schedule_delayed_work(&fsvq->dispatch_work, 1265 msecs_to_jiffies(1)); 1266 spin_unlock(&fsvq->lock); 1267 return; 1268 } 1269 req->out.h.error = ret; 1270 pr_err("virtio-fs: virtio_fs_enqueue_req() failed %d\n", ret); 1271 1272 /* Can't end request in submission context. Use a worker */ 1273 spin_lock(&fsvq->lock); 1274 list_add_tail(&req->list, &fsvq->end_reqs); 1275 schedule_delayed_work(&fsvq->dispatch_work, 0); 1276 spin_unlock(&fsvq->lock); 1277 return; 1278 } 1279} 1280 1281static const struct fuse_iqueue_ops virtio_fs_fiq_ops = { 1282 .wake_forget_and_unlock = virtio_fs_wake_forget_and_unlock, 1283 .wake_interrupt_and_unlock = virtio_fs_wake_interrupt_and_unlock, 1284 .wake_pending_and_unlock = virtio_fs_wake_pending_and_unlock, 1285 .release = virtio_fs_fiq_release, 1286}; 1287 1288static inline void virtio_fs_ctx_set_defaults(struct fuse_fs_context *ctx) 1289{ 1290 ctx->rootmode = S_IFDIR; 1291 ctx->default_permissions = 1; 1292 ctx->allow_other = 1; 1293 ctx->max_read = UINT_MAX; 1294 ctx->blksize = 512; 1295 ctx->destroy = true; 1296 ctx->no_control = true; 1297 ctx->no_force_umount = true; 1298} 1299 1300static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) 1301{ 1302 struct fuse_mount *fm = get_fuse_mount_super(sb); 1303 struct fuse_conn *fc = fm->fc; 1304 struct virtio_fs *fs = fc->iq.priv; 1305 struct fuse_fs_context *ctx = fsc->fs_private; 1306 unsigned int i; 1307 int err; 1308 1309 virtio_fs_ctx_set_defaults(ctx); 1310 mutex_lock(&virtio_fs_mutex); 1311 1312 /* After holding mutex, make sure virtiofs device is still there. 1313 * Though we are holding a reference to it, drive ->remove might 1314 * still have cleaned up virtual queues. In that case bail out. 1315 */ 1316 err = -EINVAL; 1317 if (list_empty(&fs->list)) { 1318 pr_info("virtio-fs: tag <%s> not found\n", fs->tag); 1319 goto err; 1320 } 1321 1322 err = -ENOMEM; 1323 /* Allocate fuse_dev for hiprio and notification queues */ 1324 for (i = 0; i < fs->nvqs; i++) { 1325 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1326 1327 fsvq->fud = fuse_dev_alloc(); 1328 if (!fsvq->fud) 1329 goto err_free_fuse_devs; 1330 } 1331 1332 /* virtiofs allocates and installs its own fuse devices */ 1333 ctx->fudptr = NULL; 1334 if (ctx->dax) { 1335 if (!fs->dax_dev) { 1336 err = -EINVAL; 1337 pr_err("virtio-fs: dax can't be enabled as filesystem" 1338 " device does not support it.\n"); 1339 goto err_free_fuse_devs; 1340 } 1341 ctx->dax_dev = fs->dax_dev; 1342 } 1343 err = fuse_fill_super_common(sb, ctx); 1344 if (err < 0) 1345 goto err_free_fuse_devs; 1346 1347 for (i = 0; i < fs->nvqs; i++) { 1348 struct virtio_fs_vq *fsvq = &fs->vqs[i]; 1349 1350 fuse_dev_install(fsvq->fud, fc); 1351 } 1352 1353 /* Previous unmount will stop all queues. Start these again */ 1354 virtio_fs_start_all_queues(fs); 1355 fuse_send_init(fm); 1356 mutex_unlock(&virtio_fs_mutex); 1357 return 0; 1358 1359err_free_fuse_devs: 1360 virtio_fs_free_devs(fs); 1361err: 1362 mutex_unlock(&virtio_fs_mutex); 1363 return err; 1364} 1365 1366static void virtio_fs_conn_destroy(struct fuse_mount *fm) 1367{ 1368 struct fuse_conn *fc = fm->fc; 1369 struct virtio_fs *vfs = fc->iq.priv; 1370 struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; 1371 1372 /* Stop dax worker. Soon evict_inodes() will be called which 1373 * will free all memory ranges belonging to all inodes. 1374 */ 1375 if (IS_ENABLED(CONFIG_FUSE_DAX)) 1376 fuse_dax_cancel_work(fc); 1377 1378 /* Stop forget queue. Soon destroy will be sent */ 1379 spin_lock(&fsvq->lock); 1380 fsvq->connected = false; 1381 spin_unlock(&fsvq->lock); 1382 virtio_fs_drain_all_queues(vfs); 1383 1384 fuse_conn_destroy(fm); 1385 1386 /* fuse_conn_destroy() must have sent destroy. Stop all queues 1387 * and drain one more time and free fuse devices. Freeing fuse 1388 * devices will drop their reference on fuse_conn and that in 1389 * turn will drop its reference on virtio_fs object. 1390 */ 1391 virtio_fs_stop_all_queues(vfs); 1392 virtio_fs_drain_all_queues(vfs); 1393 virtio_fs_free_devs(vfs); 1394} 1395 1396static void virtio_kill_sb(struct super_block *sb) 1397{ 1398 struct fuse_mount *fm = get_fuse_mount_super(sb); 1399 bool last; 1400 1401 /* If mount failed, we can still be called without any fc */ 1402 if (sb->s_root) { 1403 last = fuse_mount_remove(fm); 1404 if (last) 1405 virtio_fs_conn_destroy(fm); 1406 } 1407 kill_anon_super(sb); 1408} 1409 1410static int virtio_fs_test_super(struct super_block *sb, 1411 struct fs_context *fsc) 1412{ 1413 struct fuse_mount *fsc_fm = fsc->s_fs_info; 1414 struct fuse_mount *sb_fm = get_fuse_mount_super(sb); 1415 1416 return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; 1417} 1418 1419static int virtio_fs_set_super(struct super_block *sb, 1420 struct fs_context *fsc) 1421{ 1422 int err; 1423 1424 err = get_anon_bdev(&sb->s_dev); 1425 if (!err) 1426 fuse_mount_get(fsc->s_fs_info); 1427 1428 return err; 1429} 1430 1431static int virtio_fs_get_tree(struct fs_context *fsc) 1432{ 1433 struct virtio_fs *fs; 1434 struct super_block *sb; 1435 struct fuse_conn *fc = NULL; 1436 struct fuse_mount *fm; 1437 unsigned int virtqueue_size; 1438 int err = -EIO; 1439 1440 /* This gets a reference on virtio_fs object. This ptr gets installed 1441 * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() 1442 * to drop the reference to this object. 1443 */ 1444 fs = virtio_fs_find_instance(fsc->source); 1445 if (!fs) { 1446 pr_info("virtio-fs: tag <%s> not found\n", fsc->source); 1447 return -EINVAL; 1448 } 1449 1450 virtqueue_size = virtqueue_get_vring_size(fs->vqs[VQ_REQUEST].vq); 1451 if (WARN_ON(virtqueue_size <= FUSE_HEADER_OVERHEAD)) 1452 goto out_err; 1453 1454 err = -ENOMEM; 1455 fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL); 1456 if (!fc) 1457 goto out_err; 1458 1459 fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL); 1460 if (!fm) 1461 goto out_err; 1462 1463 fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); 1464 fc->release = fuse_free_conn; 1465 fc->delete_stale = true; 1466 fc->auto_submounts = true; 1467 1468 /* Tell FUSE to split requests that exceed the virtqueue's size */ 1469 fc->max_pages_limit = min_t(unsigned int, fc->max_pages_limit, 1470 virtqueue_size - FUSE_HEADER_OVERHEAD); 1471 1472 fsc->s_fs_info = fm; 1473 sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super); 1474 fuse_mount_put(fm); 1475 if (IS_ERR(sb)) 1476 return PTR_ERR(sb); 1477 1478 if (!sb->s_root) { 1479 err = virtio_fs_fill_super(sb, fsc); 1480 if (err) { 1481 fuse_mount_put(fm); 1482 sb->s_fs_info = NULL; 1483 deactivate_locked_super(sb); 1484 return err; 1485 } 1486 1487 sb->s_flags |= SB_ACTIVE; 1488 } 1489 1490 WARN_ON(fsc->root); 1491 fsc->root = dget(sb->s_root); 1492 return 0; 1493 1494out_err: 1495 kfree(fc); 1496 mutex_lock(&virtio_fs_mutex); 1497 virtio_fs_put(fs); 1498 mutex_unlock(&virtio_fs_mutex); 1499 return err; 1500} 1501 1502static const struct fs_context_operations virtio_fs_context_ops = { 1503 .free = virtio_fs_free_fc, 1504 .parse_param = virtio_fs_parse_param, 1505 .get_tree = virtio_fs_get_tree, 1506}; 1507 1508static int virtio_fs_init_fs_context(struct fs_context *fsc) 1509{ 1510 struct fuse_fs_context *ctx; 1511 1512 ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL); 1513 if (!ctx) 1514 return -ENOMEM; 1515 fsc->fs_private = ctx; 1516 fsc->ops = &virtio_fs_context_ops; 1517 return 0; 1518} 1519 1520static struct file_system_type virtio_fs_type = { 1521 .owner = THIS_MODULE, 1522 .name = "virtiofs", 1523 .init_fs_context = virtio_fs_init_fs_context, 1524 .kill_sb = virtio_kill_sb, 1525}; 1526 1527static int __init virtio_fs_init(void) 1528{ 1529 int ret; 1530 1531 ret = register_virtio_driver(&virtio_fs_driver); 1532 if (ret < 0) 1533 return ret; 1534 1535 ret = register_filesystem(&virtio_fs_type); 1536 if (ret < 0) { 1537 unregister_virtio_driver(&virtio_fs_driver); 1538 return ret; 1539 } 1540 1541 return 0; 1542} 1543module_init(virtio_fs_init); 1544 1545static void __exit virtio_fs_exit(void) 1546{ 1547 unregister_filesystem(&virtio_fs_type); 1548 unregister_virtio_driver(&virtio_fs_driver); 1549} 1550module_exit(virtio_fs_exit); 1551 1552MODULE_AUTHOR("Stefan Hajnoczi <stefanha@redhat.com>"); 1553MODULE_DESCRIPTION("Virtio Filesystem"); 1554MODULE_LICENSE("GPL"); 1555MODULE_ALIAS_FS(KBUILD_MODNAME); 1556MODULE_DEVICE_TABLE(virtio, id_table); 1557