1// SPDX-License-Identifier: GPL-2.0-only 2//#define DEBUG 3#include <linux/spinlock.h> 4#include <linux/slab.h> 5#include <linux/blkdev.h> 6#include <linux/hdreg.h> 7#include <linux/module.h> 8#include <linux/mutex.h> 9#include <linux/interrupt.h> 10#include <linux/virtio.h> 11#include <linux/virtio_blk.h> 12#include <linux/scatterlist.h> 13#include <linux/string_helpers.h> 14#include <linux/idr.h> 15#include <linux/blk-mq.h> 16#include <linux/blk-mq-virtio.h> 17#include <linux/numa.h> 18#include <uapi/linux/virtio_ring.h> 19 20#define PART_BITS 4 21#define VQ_NAME_LEN 16 22#define MAX_DISCARD_SEGMENTS 256u 23 24static int major; 25static DEFINE_IDA(vd_index_ida); 26 27static struct workqueue_struct *virtblk_wq; 28 29struct virtio_blk_vq { 30 struct virtqueue *vq; 31 spinlock_t lock; 32 char name[VQ_NAME_LEN]; 33} ____cacheline_aligned_in_smp; 34 35struct virtio_blk { 36 /* 37 * This mutex must be held by anything that may run after 38 * virtblk_remove() sets vblk->vdev to NULL. 39 * 40 * blk-mq, virtqueue processing, and sysfs attribute code paths are 41 * shut down before vblk->vdev is set to NULL and therefore do not need 42 * to hold this mutex. 43 */ 44 struct mutex vdev_mutex; 45 struct virtio_device *vdev; 46 47 /* The disk structure for the kernel. */ 48 struct gendisk *disk; 49 50 /* Block layer tags. */ 51 struct blk_mq_tag_set tag_set; 52 53 /* Process context for config space updates */ 54 struct work_struct config_work; 55 56 /* 57 * Tracks references from block_device_operations open/release and 58 * virtio_driver probe/remove so this object can be freed once no 59 * longer in use. 60 */ 61 refcount_t refs; 62 63 /* What host tells us, plus 2 for header & tailer. */ 64 unsigned int sg_elems; 65 66 /* Ida index - used to track minor number allocations. */ 67 int index; 68 69 /* num of vqs */ 70 int num_vqs; 71 struct virtio_blk_vq *vqs; 72}; 73 74struct virtblk_req { 75 struct virtio_blk_outhdr out_hdr; 76 u8 status; 77 struct scatterlist sg[]; 78}; 79 80static inline blk_status_t virtblk_result(struct virtblk_req *vbr) 81{ 82 switch (vbr->status) { 83 case VIRTIO_BLK_S_OK: 84 return BLK_STS_OK; 85 case VIRTIO_BLK_S_UNSUPP: 86 return BLK_STS_NOTSUPP; 87 default: 88 return BLK_STS_IOERR; 89 } 90} 91 92static int virtblk_add_req(struct virtqueue *vq, struct virtblk_req *vbr, 93 struct scatterlist *data_sg, bool have_data) 94{ 95 struct scatterlist hdr, status, *sgs[3]; 96 unsigned int num_out = 0, num_in = 0; 97 98 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr)); 99 sgs[num_out++] = &hdr; 100 101 if (have_data) { 102 if (vbr->out_hdr.type & cpu_to_virtio32(vq->vdev, VIRTIO_BLK_T_OUT)) 103 sgs[num_out++] = data_sg; 104 else 105 sgs[num_out + num_in++] = data_sg; 106 } 107 108 sg_init_one(&status, &vbr->status, sizeof(vbr->status)); 109 sgs[num_out + num_in++] = &status; 110 111 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); 112} 113 114static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) 115{ 116 unsigned short segments = blk_rq_nr_discard_segments(req); 117 unsigned short n = 0; 118 struct virtio_blk_discard_write_zeroes *range; 119 struct bio *bio; 120 u32 flags = 0; 121 122 if (unmap) 123 flags |= VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP; 124 125 range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC); 126 if (!range) 127 return -ENOMEM; 128 129 /* 130 * Single max discard segment means multi-range discard isn't 131 * supported, and block layer only runs contiguity merge like 132 * normal RW request. So we can't reply on bio for retrieving 133 * each range info. 134 */ 135 if (queue_max_discard_segments(req->q) == 1) { 136 range[0].flags = cpu_to_le32(flags); 137 range[0].num_sectors = cpu_to_le32(blk_rq_sectors(req)); 138 range[0].sector = cpu_to_le64(blk_rq_pos(req)); 139 n = 1; 140 } else { 141 __rq_for_each_bio(bio, req) { 142 u64 sector = bio->bi_iter.bi_sector; 143 u32 num_sectors = bio->bi_iter.bi_size >> SECTOR_SHIFT; 144 145 range[n].flags = cpu_to_le32(flags); 146 range[n].num_sectors = cpu_to_le32(num_sectors); 147 range[n].sector = cpu_to_le64(sector); 148 n++; 149 } 150 } 151 152 WARN_ON_ONCE(n != segments); 153 154 req->special_vec.bv_page = virt_to_page(range); 155 req->special_vec.bv_offset = offset_in_page(range); 156 req->special_vec.bv_len = sizeof(*range) * segments; 157 req->rq_flags |= RQF_SPECIAL_PAYLOAD; 158 159 return 0; 160} 161 162static inline void virtblk_request_done(struct request *req) 163{ 164 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 165 166 if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { 167 kfree(page_address(req->special_vec.bv_page) + 168 req->special_vec.bv_offset); 169 } 170 171 blk_mq_end_request(req, virtblk_result(vbr)); 172} 173 174static void virtblk_done(struct virtqueue *vq) 175{ 176 struct virtio_blk *vblk = vq->vdev->priv; 177 bool req_done = false; 178 int qid = vq->index; 179 struct virtblk_req *vbr; 180 unsigned long flags; 181 unsigned int len; 182 183 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 184 do { 185 virtqueue_disable_cb(vq); 186 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) { 187 struct request *req = blk_mq_rq_from_pdu(vbr); 188 189 if (likely(!blk_should_fake_timeout(req->q))) 190 blk_mq_complete_request(req); 191 req_done = true; 192 } 193 if (unlikely(virtqueue_is_broken(vq))) 194 break; 195 } while (!virtqueue_enable_cb(vq)); 196 197 /* In case queue is stopped waiting for more buffers. */ 198 if (req_done) 199 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 200 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 201} 202 203static void virtio_commit_rqs(struct blk_mq_hw_ctx *hctx) 204{ 205 struct virtio_blk *vblk = hctx->queue->queuedata; 206 struct virtio_blk_vq *vq = &vblk->vqs[hctx->queue_num]; 207 bool kick; 208 209 spin_lock_irq(&vq->lock); 210 kick = virtqueue_kick_prepare(vq->vq); 211 spin_unlock_irq(&vq->lock); 212 213 if (kick) 214 virtqueue_notify(vq->vq); 215} 216 217static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, 218 const struct blk_mq_queue_data *bd) 219{ 220 struct virtio_blk *vblk = hctx->queue->queuedata; 221 struct request *req = bd->rq; 222 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 223 unsigned long flags; 224 unsigned int num; 225 int qid = hctx->queue_num; 226 int err; 227 bool notify = false; 228 bool unmap = false; 229 u32 type; 230 231 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); 232 233 switch (req_op(req)) { 234 case REQ_OP_READ: 235 case REQ_OP_WRITE: 236 type = 0; 237 break; 238 case REQ_OP_FLUSH: 239 type = VIRTIO_BLK_T_FLUSH; 240 break; 241 case REQ_OP_DISCARD: 242 type = VIRTIO_BLK_T_DISCARD; 243 break; 244 case REQ_OP_WRITE_ZEROES: 245 type = VIRTIO_BLK_T_WRITE_ZEROES; 246 unmap = !(req->cmd_flags & REQ_NOUNMAP); 247 break; 248 case REQ_OP_DRV_IN: 249 type = VIRTIO_BLK_T_GET_ID; 250 break; 251 default: 252 WARN_ON_ONCE(1); 253 return BLK_STS_IOERR; 254 } 255 256 vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); 257 vbr->out_hdr.sector = type ? 258 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); 259 vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); 260 261 blk_mq_start_request(req); 262 263 if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { 264 err = virtblk_setup_discard_write_zeroes(req, unmap); 265 if (err) 266 return BLK_STS_RESOURCE; 267 } 268 269 num = blk_rq_map_sg(hctx->queue, req, vbr->sg); 270 if (num) { 271 if (rq_data_dir(req) == WRITE) 272 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); 273 else 274 vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); 275 } 276 277 spin_lock_irqsave(&vblk->vqs[qid].lock, flags); 278 err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); 279 if (err) { 280 virtqueue_kick(vblk->vqs[qid].vq); 281 /* Don't stop the queue if -ENOMEM: we may have failed to 282 * bounce the buffer due to global resource outage. 283 */ 284 if (err == -ENOSPC) 285 blk_mq_stop_hw_queue(hctx); 286 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 287 switch (err) { 288 case -ENOSPC: 289 return BLK_STS_DEV_RESOURCE; 290 case -ENOMEM: 291 return BLK_STS_RESOURCE; 292 default: 293 return BLK_STS_IOERR; 294 } 295 } 296 297 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 298 notify = true; 299 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 300 301 if (notify) 302 virtqueue_notify(vblk->vqs[qid].vq); 303 return BLK_STS_OK; 304} 305 306/* return id (s/n) string for *disk to *id_str 307 */ 308static int virtblk_get_id(struct gendisk *disk, char *id_str) 309{ 310 struct virtio_blk *vblk = disk->private_data; 311 struct request_queue *q = vblk->disk->queue; 312 struct request *req; 313 int err; 314 315 req = blk_get_request(q, REQ_OP_DRV_IN, 0); 316 if (IS_ERR(req)) 317 return PTR_ERR(req); 318 319 err = blk_rq_map_kern(q, req, id_str, VIRTIO_BLK_ID_BYTES, GFP_KERNEL); 320 if (err) 321 goto out; 322 323 blk_execute_rq(vblk->disk->queue, vblk->disk, req, false); 324 err = blk_status_to_errno(virtblk_result(blk_mq_rq_to_pdu(req))); 325out: 326 blk_put_request(req); 327 return err; 328} 329 330static void virtblk_get(struct virtio_blk *vblk) 331{ 332 refcount_inc(&vblk->refs); 333} 334 335static void virtblk_put(struct virtio_blk *vblk) 336{ 337 if (refcount_dec_and_test(&vblk->refs)) { 338 ida_simple_remove(&vd_index_ida, vblk->index); 339 mutex_destroy(&vblk->vdev_mutex); 340 kfree(vblk); 341 } 342} 343 344static int virtblk_open(struct block_device *bd, fmode_t mode) 345{ 346 struct virtio_blk *vblk = bd->bd_disk->private_data; 347 int ret = 0; 348 349 mutex_lock(&vblk->vdev_mutex); 350 351 if (vblk->vdev) 352 virtblk_get(vblk); 353 else 354 ret = -ENXIO; 355 356 mutex_unlock(&vblk->vdev_mutex); 357 return ret; 358} 359 360static void virtblk_release(struct gendisk *disk, fmode_t mode) 361{ 362 struct virtio_blk *vblk = disk->private_data; 363 364 virtblk_put(vblk); 365} 366 367/* We provide getgeo only to please some old bootloader/partitioning tools */ 368static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo) 369{ 370 struct virtio_blk *vblk = bd->bd_disk->private_data; 371 int ret = 0; 372 373 mutex_lock(&vblk->vdev_mutex); 374 375 if (!vblk->vdev) { 376 ret = -ENXIO; 377 goto out; 378 } 379 380 /* see if the host passed in geometry config */ 381 if (virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_GEOMETRY)) { 382 virtio_cread(vblk->vdev, struct virtio_blk_config, 383 geometry.cylinders, &geo->cylinders); 384 virtio_cread(vblk->vdev, struct virtio_blk_config, 385 geometry.heads, &geo->heads); 386 virtio_cread(vblk->vdev, struct virtio_blk_config, 387 geometry.sectors, &geo->sectors); 388 } else { 389 /* some standard values, similar to sd */ 390 geo->heads = 1 << 6; 391 geo->sectors = 1 << 5; 392 geo->cylinders = get_capacity(bd->bd_disk) >> 11; 393 } 394out: 395 mutex_unlock(&vblk->vdev_mutex); 396 return ret; 397} 398 399static const struct block_device_operations virtblk_fops = { 400 .owner = THIS_MODULE, 401 .open = virtblk_open, 402 .release = virtblk_release, 403 .getgeo = virtblk_getgeo, 404}; 405 406static int index_to_minor(int index) 407{ 408 return index << PART_BITS; 409} 410 411static int minor_to_index(int minor) 412{ 413 return minor >> PART_BITS; 414} 415 416static ssize_t serial_show(struct device *dev, 417 struct device_attribute *attr, char *buf) 418{ 419 struct gendisk *disk = dev_to_disk(dev); 420 int err; 421 422 /* sysfs gives us a PAGE_SIZE buffer */ 423 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES); 424 425 buf[VIRTIO_BLK_ID_BYTES] = '\0'; 426 err = virtblk_get_id(disk, buf); 427 if (!err) 428 return strlen(buf); 429 430 if (err == -EIO) /* Unsupported? Make it empty. */ 431 return 0; 432 433 return err; 434} 435 436static DEVICE_ATTR_RO(serial); 437 438/* The queue's logical block size must be set before calling this */ 439static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) 440{ 441 struct virtio_device *vdev = vblk->vdev; 442 struct request_queue *q = vblk->disk->queue; 443 char cap_str_2[10], cap_str_10[10]; 444 unsigned long long nblocks; 445 u64 capacity; 446 447 /* Host must always specify the capacity. */ 448 virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); 449 450 /* If capacity is too big, truncate with warning. */ 451 if ((sector_t)capacity != capacity) { 452 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", 453 (unsigned long long)capacity); 454 capacity = (sector_t)-1; 455 } 456 457 nblocks = DIV_ROUND_UP_ULL(capacity, queue_logical_block_size(q) >> 9); 458 459 string_get_size(nblocks, queue_logical_block_size(q), 460 STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); 461 string_get_size(nblocks, queue_logical_block_size(q), 462 STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); 463 464 dev_notice(&vdev->dev, 465 "[%s] %s%llu %d-byte logical blocks (%s/%s)\n", 466 vblk->disk->disk_name, 467 resize ? "new size: " : "", 468 nblocks, 469 queue_logical_block_size(q), 470 cap_str_10, 471 cap_str_2); 472 473 set_capacity_revalidate_and_notify(vblk->disk, capacity, true); 474} 475 476static void virtblk_config_changed_work(struct work_struct *work) 477{ 478 struct virtio_blk *vblk = 479 container_of(work, struct virtio_blk, config_work); 480 481 virtblk_update_capacity(vblk, true); 482} 483 484static void virtblk_config_changed(struct virtio_device *vdev) 485{ 486 struct virtio_blk *vblk = vdev->priv; 487 488 queue_work(virtblk_wq, &vblk->config_work); 489} 490 491static int init_vq(struct virtio_blk *vblk) 492{ 493 int err; 494 int i; 495 vq_callback_t **callbacks; 496 const char **names; 497 struct virtqueue **vqs; 498 unsigned short num_vqs; 499 struct virtio_device *vdev = vblk->vdev; 500 struct irq_affinity desc = { 0, }; 501 502 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ, 503 struct virtio_blk_config, num_queues, 504 &num_vqs); 505 if (err) 506 num_vqs = 1; 507 508 num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); 509 510 vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); 511 if (!vblk->vqs) 512 return -ENOMEM; 513 514 names = kmalloc_array(num_vqs, sizeof(*names), GFP_KERNEL); 515 callbacks = kmalloc_array(num_vqs, sizeof(*callbacks), GFP_KERNEL); 516 vqs = kmalloc_array(num_vqs, sizeof(*vqs), GFP_KERNEL); 517 if (!names || !callbacks || !vqs) { 518 err = -ENOMEM; 519 goto out; 520 } 521 522 for (i = 0; i < num_vqs; i++) { 523 callbacks[i] = virtblk_done; 524 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i); 525 names[i] = vblk->vqs[i].name; 526 } 527 528 /* Discover virtqueues and write information to configuration. */ 529 err = virtio_find_vqs(vdev, num_vqs, vqs, callbacks, names, &desc); 530 if (err) 531 goto out; 532 533 for (i = 0; i < num_vqs; i++) { 534 spin_lock_init(&vblk->vqs[i].lock); 535 vblk->vqs[i].vq = vqs[i]; 536 } 537 vblk->num_vqs = num_vqs; 538 539out: 540 kfree(vqs); 541 kfree(callbacks); 542 kfree(names); 543 if (err) 544 kfree(vblk->vqs); 545 return err; 546} 547 548/* 549 * Legacy naming scheme used for virtio devices. We are stuck with it for 550 * virtio blk but don't ever use it for any new driver. 551 */ 552static int virtblk_name_format(char *prefix, int index, char *buf, int buflen) 553{ 554 const int base = 'z' - 'a' + 1; 555 char *begin = buf + strlen(prefix); 556 char *end = buf + buflen; 557 char *p; 558 int unit; 559 560 p = end - 1; 561 *p = '\0'; 562 unit = base; 563 do { 564 if (p == begin) 565 return -EINVAL; 566 *--p = 'a' + (index % unit); 567 index = (index / unit) - 1; 568 } while (index >= 0); 569 570 memmove(begin, p, end - p); 571 memcpy(buf, prefix, strlen(prefix)); 572 573 return 0; 574} 575 576static int virtblk_get_cache_mode(struct virtio_device *vdev) 577{ 578 u8 writeback; 579 int err; 580 581 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE, 582 struct virtio_blk_config, wce, 583 &writeback); 584 585 /* 586 * If WCE is not configurable and flush is not available, 587 * assume no writeback cache is in use. 588 */ 589 if (err) 590 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_FLUSH); 591 592 return writeback; 593} 594 595static void virtblk_update_cache_mode(struct virtio_device *vdev) 596{ 597 u8 writeback = virtblk_get_cache_mode(vdev); 598 struct virtio_blk *vblk = vdev->priv; 599 600 blk_queue_write_cache(vblk->disk->queue, writeback, false); 601 revalidate_disk_size(vblk->disk, true); 602} 603 604static const char *const virtblk_cache_types[] = { 605 "write through", "write back" 606}; 607 608static ssize_t 609cache_type_store(struct device *dev, struct device_attribute *attr, 610 const char *buf, size_t count) 611{ 612 struct gendisk *disk = dev_to_disk(dev); 613 struct virtio_blk *vblk = disk->private_data; 614 struct virtio_device *vdev = vblk->vdev; 615 int i; 616 617 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE)); 618 i = sysfs_match_string(virtblk_cache_types, buf); 619 if (i < 0) 620 return i; 621 622 virtio_cwrite8(vdev, offsetof(struct virtio_blk_config, wce), i); 623 virtblk_update_cache_mode(vdev); 624 return count; 625} 626 627static ssize_t 628cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) 629{ 630 struct gendisk *disk = dev_to_disk(dev); 631 struct virtio_blk *vblk = disk->private_data; 632 u8 writeback = virtblk_get_cache_mode(vblk->vdev); 633 634 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); 635 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); 636} 637 638static DEVICE_ATTR_RW(cache_type); 639 640static struct attribute *virtblk_attrs[] = { 641 &dev_attr_serial.attr, 642 &dev_attr_cache_type.attr, 643 NULL, 644}; 645 646static umode_t virtblk_attrs_are_visible(struct kobject *kobj, 647 struct attribute *a, int n) 648{ 649 struct device *dev = kobj_to_dev(kobj); 650 struct gendisk *disk = dev_to_disk(dev); 651 struct virtio_blk *vblk = disk->private_data; 652 struct virtio_device *vdev = vblk->vdev; 653 654 if (a == &dev_attr_cache_type.attr && 655 !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) 656 return S_IRUGO; 657 658 return a->mode; 659} 660 661static const struct attribute_group virtblk_attr_group = { 662 .attrs = virtblk_attrs, 663 .is_visible = virtblk_attrs_are_visible, 664}; 665 666static const struct attribute_group *virtblk_attr_groups[] = { 667 &virtblk_attr_group, 668 NULL, 669}; 670 671static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, 672 unsigned int hctx_idx, unsigned int numa_node) 673{ 674 struct virtio_blk *vblk = set->driver_data; 675 struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); 676 677 sg_init_table(vbr->sg, vblk->sg_elems); 678 return 0; 679} 680 681static int virtblk_map_queues(struct blk_mq_tag_set *set) 682{ 683 struct virtio_blk *vblk = set->driver_data; 684 685 return blk_mq_virtio_map_queues(&set->map[HCTX_TYPE_DEFAULT], 686 vblk->vdev, 0); 687} 688 689static const struct blk_mq_ops virtio_mq_ops = { 690 .queue_rq = virtio_queue_rq, 691 .commit_rqs = virtio_commit_rqs, 692 .complete = virtblk_request_done, 693 .init_request = virtblk_init_request, 694 .map_queues = virtblk_map_queues, 695}; 696 697static unsigned int virtblk_queue_depth; 698module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); 699 700static int virtblk_probe(struct virtio_device *vdev) 701{ 702 struct virtio_blk *vblk; 703 struct request_queue *q; 704 int err, index; 705 706 u32 v, blk_size, max_size, sg_elems, opt_io_size; 707 u16 min_io_size; 708 u8 physical_block_exp, alignment_offset; 709 710 if (!vdev->config->get) { 711 dev_err(&vdev->dev, "%s failure: config access disabled\n", 712 __func__); 713 return -EINVAL; 714 } 715 716 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS), 717 GFP_KERNEL); 718 if (err < 0) 719 goto out; 720 index = err; 721 722 /* We need to know how many segments before we allocate. */ 723 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SEG_MAX, 724 struct virtio_blk_config, seg_max, 725 &sg_elems); 726 727 /* We need at least one SG element, whatever they say. */ 728 if (err || !sg_elems) 729 sg_elems = 1; 730 731 /* We need an extra sg elements at head and tail. */ 732 sg_elems += 2; 733 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); 734 if (!vblk) { 735 err = -ENOMEM; 736 goto out_free_index; 737 } 738 739 /* This reference is dropped in virtblk_remove(). */ 740 refcount_set(&vblk->refs, 1); 741 mutex_init(&vblk->vdev_mutex); 742 743 vblk->vdev = vdev; 744 vblk->sg_elems = sg_elems; 745 746 INIT_WORK(&vblk->config_work, virtblk_config_changed_work); 747 748 err = init_vq(vblk); 749 if (err) 750 goto out_free_vblk; 751 752 /* FIXME: How many partitions? How long is a piece of string? */ 753 vblk->disk = alloc_disk(1 << PART_BITS); 754 if (!vblk->disk) { 755 err = -ENOMEM; 756 goto out_free_vq; 757 } 758 759 /* Default queue sizing is to fill the ring. */ 760 if (!virtblk_queue_depth) { 761 virtblk_queue_depth = vblk->vqs[0].vq->num_free; 762 /* ... but without indirect descs, we use 2 descs per req */ 763 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 764 virtblk_queue_depth /= 2; 765 } 766 767 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 768 vblk->tag_set.ops = &virtio_mq_ops; 769 vblk->tag_set.queue_depth = virtblk_queue_depth; 770 vblk->tag_set.numa_node = NUMA_NO_NODE; 771 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 772 vblk->tag_set.cmd_size = 773 sizeof(struct virtblk_req) + 774 sizeof(struct scatterlist) * sg_elems; 775 vblk->tag_set.driver_data = vblk; 776 vblk->tag_set.nr_hw_queues = vblk->num_vqs; 777 778 err = blk_mq_alloc_tag_set(&vblk->tag_set); 779 if (err) 780 goto out_put_disk; 781 782 q = blk_mq_init_queue(&vblk->tag_set); 783 if (IS_ERR(q)) { 784 err = -ENOMEM; 785 goto out_free_tags; 786 } 787 vblk->disk->queue = q; 788 789 q->queuedata = vblk; 790 791 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); 792 793 vblk->disk->major = major; 794 vblk->disk->first_minor = index_to_minor(index); 795 vblk->disk->private_data = vblk; 796 vblk->disk->fops = &virtblk_fops; 797 vblk->disk->flags |= GENHD_FL_EXT_DEVT; 798 vblk->index = index; 799 800 /* configure queue flush support */ 801 virtblk_update_cache_mode(vdev); 802 803 /* If disk is read-only in the host, the guest should obey */ 804 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO)) 805 set_disk_ro(vblk->disk, 1); 806 807 /* We can handle whatever the host told us to handle. */ 808 blk_queue_max_segments(q, vblk->sg_elems-2); 809 810 /* No real sector limit. */ 811 blk_queue_max_hw_sectors(q, -1U); 812 813 max_size = virtio_max_dma_size(vdev); 814 815 /* Host can optionally specify maximum segment size and number of 816 * segments. */ 817 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_SIZE_MAX, 818 struct virtio_blk_config, size_max, &v); 819 if (!err) 820 max_size = min(max_size, v); 821 822 blk_queue_max_segment_size(q, max_size); 823 824 /* Host can optionally specify the block size of the device */ 825 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, 826 struct virtio_blk_config, blk_size, 827 &blk_size); 828 if (!err) { 829 err = blk_validate_block_size(blk_size); 830 if (err) { 831 dev_err(&vdev->dev, 832 "virtio_blk: invalid block size: 0x%x\n", 833 blk_size); 834 goto out_free_tags; 835 } 836 837 blk_queue_logical_block_size(q, blk_size); 838 } else 839 blk_size = queue_logical_block_size(q); 840 841 /* Use topology information if available */ 842 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 843 struct virtio_blk_config, physical_block_exp, 844 &physical_block_exp); 845 if (!err && physical_block_exp) 846 blk_queue_physical_block_size(q, 847 blk_size * (1 << physical_block_exp)); 848 849 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 850 struct virtio_blk_config, alignment_offset, 851 &alignment_offset); 852 if (!err && alignment_offset) 853 blk_queue_alignment_offset(q, blk_size * alignment_offset); 854 855 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 856 struct virtio_blk_config, min_io_size, 857 &min_io_size); 858 if (!err && min_io_size) 859 blk_queue_io_min(q, blk_size * min_io_size); 860 861 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_TOPOLOGY, 862 struct virtio_blk_config, opt_io_size, 863 &opt_io_size); 864 if (!err && opt_io_size) 865 blk_queue_io_opt(q, blk_size * opt_io_size); 866 867 if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) { 868 virtio_cread(vdev, struct virtio_blk_config, 869 discard_sector_alignment, &v); 870 if (v) 871 q->limits.discard_granularity = v << SECTOR_SHIFT; 872 else 873 q->limits.discard_granularity = blk_size; 874 875 virtio_cread(vdev, struct virtio_blk_config, 876 max_discard_sectors, &v); 877 blk_queue_max_discard_sectors(q, v ? v : UINT_MAX); 878 879 virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, 880 &v); 881 882 /* 883 * max_discard_seg == 0 is out of spec but we always 884 * handled it. 885 */ 886 if (!v) 887 v = sg_elems - 2; 888 blk_queue_max_discard_segments(q, 889 min(v, MAX_DISCARD_SEGMENTS)); 890 891 blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); 892 } 893 894 if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) { 895 virtio_cread(vdev, struct virtio_blk_config, 896 max_write_zeroes_sectors, &v); 897 blk_queue_max_write_zeroes_sectors(q, v ? v : UINT_MAX); 898 } 899 900 virtblk_update_capacity(vblk, false); 901 virtio_device_ready(vdev); 902 903 device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); 904 return 0; 905 906out_free_tags: 907 blk_mq_free_tag_set(&vblk->tag_set); 908out_put_disk: 909 put_disk(vblk->disk); 910out_free_vq: 911 vdev->config->del_vqs(vdev); 912 kfree(vblk->vqs); 913out_free_vblk: 914 kfree(vblk); 915out_free_index: 916 ida_simple_remove(&vd_index_ida, index); 917out: 918 return err; 919} 920 921static void virtblk_remove(struct virtio_device *vdev) 922{ 923 struct virtio_blk *vblk = vdev->priv; 924 925 /* Make sure no work handler is accessing the device. */ 926 flush_work(&vblk->config_work); 927 928 del_gendisk(vblk->disk); 929 blk_cleanup_queue(vblk->disk->queue); 930 931 blk_mq_free_tag_set(&vblk->tag_set); 932 933 mutex_lock(&vblk->vdev_mutex); 934 935 /* Stop all the virtqueues. */ 936 vdev->config->reset(vdev); 937 938 /* Virtqueues are stopped, nothing can use vblk->vdev anymore. */ 939 vblk->vdev = NULL; 940 941 put_disk(vblk->disk); 942 vdev->config->del_vqs(vdev); 943 kfree(vblk->vqs); 944 945 mutex_unlock(&vblk->vdev_mutex); 946 947 virtblk_put(vblk); 948} 949 950#ifdef CONFIG_PM_SLEEP 951static int virtblk_freeze(struct virtio_device *vdev) 952{ 953 struct virtio_blk *vblk = vdev->priv; 954 955 /* Ensure we don't receive any more interrupts */ 956 vdev->config->reset(vdev); 957 958 /* Make sure no work handler is accessing the device. */ 959 flush_work(&vblk->config_work); 960 961 blk_mq_quiesce_queue(vblk->disk->queue); 962 963 vdev->config->del_vqs(vdev); 964 kfree(vblk->vqs); 965 966 return 0; 967} 968 969static int virtblk_restore(struct virtio_device *vdev) 970{ 971 struct virtio_blk *vblk = vdev->priv; 972 int ret; 973 974 ret = init_vq(vdev->priv); 975 if (ret) 976 return ret; 977 978 virtio_device_ready(vdev); 979 980 blk_mq_unquiesce_queue(vblk->disk->queue); 981 return 0; 982} 983#endif 984 985static const struct virtio_device_id id_table[] = { 986 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID }, 987 { 0 }, 988}; 989 990static unsigned int features_legacy[] = { 991 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 992 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 993 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 994 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 995} 996; 997static unsigned int features[] = { 998 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 999 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, 1000 VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE, 1001 VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD, VIRTIO_BLK_F_WRITE_ZEROES, 1002}; 1003 1004static struct virtio_driver virtio_blk = { 1005 .feature_table = features, 1006 .feature_table_size = ARRAY_SIZE(features), 1007 .feature_table_legacy = features_legacy, 1008 .feature_table_size_legacy = ARRAY_SIZE(features_legacy), 1009 .driver.name = KBUILD_MODNAME, 1010 .driver.owner = THIS_MODULE, 1011 .id_table = id_table, 1012 .probe = virtblk_probe, 1013 .remove = virtblk_remove, 1014 .config_changed = virtblk_config_changed, 1015#ifdef CONFIG_PM_SLEEP 1016 .freeze = virtblk_freeze, 1017 .restore = virtblk_restore, 1018#endif 1019}; 1020 1021static int __init init(void) 1022{ 1023 int error; 1024 1025 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); 1026 if (!virtblk_wq) 1027 return -ENOMEM; 1028 1029 major = register_blkdev(0, "virtblk"); 1030 if (major < 0) { 1031 error = major; 1032 goto out_destroy_workqueue; 1033 } 1034 1035 error = register_virtio_driver(&virtio_blk); 1036 if (error) 1037 goto out_unregister_blkdev; 1038 return 0; 1039 1040out_unregister_blkdev: 1041 unregister_blkdev(major, "virtblk"); 1042out_destroy_workqueue: 1043 destroy_workqueue(virtblk_wq); 1044 return error; 1045} 1046 1047static void __exit fini(void) 1048{ 1049 unregister_virtio_driver(&virtio_blk); 1050 unregister_blkdev(major, "virtblk"); 1051 destroy_workqueue(virtblk_wq); 1052} 1053module_init(init); 1054module_exit(fini); 1055 1056MODULE_DEVICE_TABLE(virtio, id_table); 1057MODULE_DESCRIPTION("Virtio block driver"); 1058MODULE_LICENSE("GPL"); 1059