1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 2018 Cambridge Greys Ltd 4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com) 5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) 6 */ 7 8/* 2001-09-28...2002-04-17 9 * Partition stuff by James_McMechan@hotmail.com 10 * old style ubd by setting UBD_SHIFT to 0 11 * 2002-09-27...2002-10-18 massive tinkering for 2.5 12 * partitions have changed in 2.5 13 * 2003-01-29 more tinkering for 2.5.59-1 14 * This should now address the sysfs problems and has 15 * the symlink for devfs to allow for booting with 16 * the common /dev/ubd/discX/... names rather than 17 * only /dev/ubdN/discN this version also has lots of 18 * clean ups preparing for ubd-many. 19 * James McMechan 20 */ 21 22#define UBD_SHIFT 4 23 24#include <linux/module.h> 25#include <linux/init.h> 26#include <linux/blkdev.h> 27#include <linux/blk-mq.h> 28#include <linux/ata.h> 29#include <linux/hdreg.h> 30#include <linux/major.h> 31#include <linux/cdrom.h> 32#include <linux/proc_fs.h> 33#include <linux/seq_file.h> 34#include <linux/ctype.h> 35#include <linux/slab.h> 36#include <linux/vmalloc.h> 37#include <linux/platform_device.h> 38#include <linux/scatterlist.h> 39#include <asm/tlbflush.h> 40#include <kern_util.h> 41#include "mconsole_kern.h" 42#include <init.h> 43#include <irq_kern.h> 44#include "ubd.h" 45#include <os.h> 46#include "cow.h" 47 48/* Max request size is determined by sector mask - 32K */ 49#define UBD_MAX_REQUEST (8 * sizeof(long)) 50 51struct io_desc { 52 char *buffer; 53 unsigned long length; 54 unsigned long sector_mask; 55 unsigned long long cow_offset; 56 unsigned long bitmap_words[2]; 57}; 58 59struct io_thread_req { 60 struct request *req; 61 int fds[2]; 62 unsigned long offsets[2]; 63 unsigned long long offset; 64 int sectorsize; 65 int error; 66 67 int desc_cnt; 68 /* io_desc has to be the last element of the struct */ 69 struct io_desc io_desc[]; 70}; 71 72 73static struct io_thread_req * (*irq_req_buffer)[]; 74static struct io_thread_req *irq_remainder; 75static int irq_remainder_size; 76 77static struct io_thread_req * (*io_req_buffer)[]; 78static struct io_thread_req *io_remainder; 79static int io_remainder_size; 80 81 82 83static inline int ubd_test_bit(__u64 bit, unsigned char *data) 84{ 85 __u64 n; 86 int bits, off; 87 88 bits = sizeof(data[0]) * 8; 89 n = bit / bits; 90 off = bit % bits; 91 return (data[n] & (1 << off)) != 0; 92} 93 94static inline void ubd_set_bit(__u64 bit, unsigned char *data) 95{ 96 __u64 n; 97 int bits, off; 98 99 bits = sizeof(data[0]) * 8; 100 n = bit / bits; 101 off = bit % bits; 102 data[n] |= (1 << off); 103} 104/*End stuff from ubd_user.h*/ 105 106#define DRIVER_NAME "uml-blkdev" 107 108static DEFINE_MUTEX(ubd_lock); 109static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */ 110 111static int ubd_open(struct gendisk *disk, blk_mode_t mode); 112static void ubd_release(struct gendisk *disk); 113static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, 114 unsigned int cmd, unsigned long arg); 115static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); 116 117#define MAX_DEV (16) 118 119static const struct block_device_operations ubd_blops = { 120 .owner = THIS_MODULE, 121 .open = ubd_open, 122 .release = ubd_release, 123 .ioctl = ubd_ioctl, 124 .compat_ioctl = blkdev_compat_ptr_ioctl, 125 .getgeo = ubd_getgeo, 126}; 127 128/* Protected by ubd_lock */ 129static struct gendisk *ubd_gendisk[MAX_DEV]; 130 131#ifdef CONFIG_BLK_DEV_UBD_SYNC 132#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ 133 .cl = 1 }) 134#else 135#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ 136 .cl = 1 }) 137#endif 138static struct openflags global_openflags = OPEN_FLAGS; 139 140struct cow { 141 /* backing file name */ 142 char *file; 143 /* backing file fd */ 144 int fd; 145 unsigned long *bitmap; 146 unsigned long bitmap_len; 147 int bitmap_offset; 148 int data_offset; 149}; 150 151#define MAX_SG 64 152 153struct ubd { 154 /* name (and fd, below) of the file opened for writing, either the 155 * backing or the cow file. */ 156 char *file; 157 char *serial; 158 int count; 159 int fd; 160 __u64 size; 161 struct openflags boot_openflags; 162 struct openflags openflags; 163 unsigned shared:1; 164 unsigned no_cow:1; 165 unsigned no_trim:1; 166 struct cow cow; 167 struct platform_device pdev; 168 struct request_queue *queue; 169 struct blk_mq_tag_set tag_set; 170 spinlock_t lock; 171}; 172 173#define DEFAULT_COW { \ 174 .file = NULL, \ 175 .fd = -1, \ 176 .bitmap = NULL, \ 177 .bitmap_offset = 0, \ 178 .data_offset = 0, \ 179} 180 181#define DEFAULT_UBD { \ 182 .file = NULL, \ 183 .serial = NULL, \ 184 .count = 0, \ 185 .fd = -1, \ 186 .size = -1, \ 187 .boot_openflags = OPEN_FLAGS, \ 188 .openflags = OPEN_FLAGS, \ 189 .no_cow = 0, \ 190 .no_trim = 0, \ 191 .shared = 0, \ 192 .cow = DEFAULT_COW, \ 193 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ 194} 195 196/* Protected by ubd_lock */ 197static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD }; 198 199static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, 200 const struct blk_mq_queue_data *bd); 201 202static int fake_ide_setup(char *str) 203{ 204 pr_warn("The fake_ide option has been removed\n"); 205 return 1; 206} 207__setup("fake_ide", fake_ide_setup); 208 209__uml_help(fake_ide_setup, 210"fake_ide\n" 211" Obsolete stub.\n\n" 212); 213 214static int parse_unit(char **ptr) 215{ 216 char *str = *ptr, *end; 217 int n = -1; 218 219 if(isdigit(*str)) { 220 n = simple_strtoul(str, &end, 0); 221 if(end == str) 222 return -1; 223 *ptr = end; 224 } 225 else if (('a' <= *str) && (*str <= 'z')) { 226 n = *str - 'a'; 227 str++; 228 *ptr = str; 229 } 230 return n; 231} 232 233/* If *index_out == -1 at exit, the passed option was a general one; 234 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it 235 * should not be freed on exit. 236 */ 237static int ubd_setup_common(char *str, int *index_out, char **error_out) 238{ 239 struct ubd *ubd_dev; 240 struct openflags flags = global_openflags; 241 char *file, *backing_file, *serial; 242 int n, err = 0, i; 243 244 if(index_out) *index_out = -1; 245 n = *str; 246 if(n == '='){ 247 str++; 248 if(!strcmp(str, "sync")){ 249 global_openflags = of_sync(global_openflags); 250 return err; 251 } 252 253 pr_warn("fake major not supported any more\n"); 254 return 0; 255 } 256 257 n = parse_unit(&str); 258 if(n < 0){ 259 *error_out = "Couldn't parse device number"; 260 return -EINVAL; 261 } 262 if(n >= MAX_DEV){ 263 *error_out = "Device number out of range"; 264 return 1; 265 } 266 267 err = -EBUSY; 268 mutex_lock(&ubd_lock); 269 270 ubd_dev = &ubd_devs[n]; 271 if(ubd_dev->file != NULL){ 272 *error_out = "Device is already configured"; 273 goto out; 274 } 275 276 if (index_out) 277 *index_out = n; 278 279 err = -EINVAL; 280 for (i = 0; i < sizeof("rscdt="); i++) { 281 switch (*str) { 282 case 'r': 283 flags.w = 0; 284 break; 285 case 's': 286 flags.s = 1; 287 break; 288 case 'd': 289 ubd_dev->no_cow = 1; 290 break; 291 case 'c': 292 ubd_dev->shared = 1; 293 break; 294 case 't': 295 ubd_dev->no_trim = 1; 296 break; 297 case '=': 298 str++; 299 goto break_loop; 300 default: 301 *error_out = "Expected '=' or flag letter " 302 "(r, s, c, t or d)"; 303 goto out; 304 } 305 str++; 306 } 307 308 if (*str == '=') 309 *error_out = "Too many flags specified"; 310 else 311 *error_out = "Missing '='"; 312 goto out; 313 314break_loop: 315 file = strsep(&str, ",:"); 316 if (*file == '\0') 317 file = NULL; 318 319 backing_file = strsep(&str, ",:"); 320 if (backing_file && *backing_file == '\0') 321 backing_file = NULL; 322 323 serial = strsep(&str, ",:"); 324 if (serial && *serial == '\0') 325 serial = NULL; 326 327 if (backing_file && ubd_dev->no_cow) { 328 *error_out = "Can't specify both 'd' and a cow file"; 329 goto out; 330 } 331 332 err = 0; 333 ubd_dev->file = file; 334 ubd_dev->cow.file = backing_file; 335 ubd_dev->serial = serial; 336 ubd_dev->boot_openflags = flags; 337out: 338 mutex_unlock(&ubd_lock); 339 return err; 340} 341 342static int ubd_setup(char *str) 343{ 344 char *error; 345 int err; 346 347 err = ubd_setup_common(str, NULL, &error); 348 if(err) 349 printk(KERN_ERR "Failed to initialize device with \"%s\" : " 350 "%s\n", str, error); 351 return 1; 352} 353 354__setup("ubd", ubd_setup); 355__uml_help(ubd_setup, 356"ubd<n><flags>=<filename>[(:|,)<filename2>][(:|,)<serial>]\n" 357" This is used to associate a device with a file in the underlying\n" 358" filesystem. When specifying two filenames, the first one is the\n" 359" COW name and the second is the backing file name. As separator you can\n" 360" use either a ':' or a ',': the first one allows writing things like;\n" 361" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" 362" while with a ',' the shell would not expand the 2nd '~'.\n" 363" When using only one filename, UML will detect whether to treat it like\n" 364" a COW file or a backing file. To override this detection, add the 'd'\n" 365" flag:\n" 366" ubd0d=BackingFile\n" 367" Usually, there is a filesystem in the file, but \n" 368" that's not required. Swap devices containing swap files can be\n" 369" specified like this. Also, a file which doesn't contain a\n" 370" filesystem can have its contents read in the virtual \n" 371" machine by running 'dd' on the device. <n> must be in the range\n" 372" 0 to 7. Appending an 'r' to the number will cause that device\n" 373" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" 374" an 's' will cause data to be written to disk on the host immediately.\n" 375" 'c' will cause the device to be treated as being shared between multiple\n" 376" UMLs and file locking will be turned off - this is appropriate for a\n" 377" cluster filesystem and inappropriate at almost all other times.\n\n" 378" 't' will disable trim/discard support on the device (enabled by default).\n\n" 379" An optional device serial number can be exposed using the serial parameter\n" 380" on the cmdline which is exposed as a sysfs entry. This is particularly\n" 381" useful when a unique number should be given to the device. Note when\n" 382" specifying a label, the filename2 must be also presented. It can be\n" 383" an empty string, in which case the backing file is not used:\n" 384" ubd0=File,,Serial\n" 385); 386 387static int udb_setup(char *str) 388{ 389 printk("udb%s specified on command line is almost certainly a ubd -> " 390 "udb TYPO\n", str); 391 return 1; 392} 393 394__setup("udb", udb_setup); 395__uml_help(udb_setup, 396"udb\n" 397" This option is here solely to catch ubd -> udb typos, which can be\n" 398" to impossible to catch visually unless you specifically look for\n" 399" them. The only result of any option starting with 'udb' is an error\n" 400" in the boot output.\n\n" 401); 402 403/* Only changed by ubd_init, which is an initcall. */ 404static int thread_fd = -1; 405 406/* Function to read several request pointers at a time 407* handling fractional reads if (and as) needed 408*/ 409 410static int bulk_req_safe_read( 411 int fd, 412 struct io_thread_req * (*request_buffer)[], 413 struct io_thread_req **remainder, 414 int *remainder_size, 415 int max_recs 416 ) 417{ 418 int n = 0; 419 int res = 0; 420 421 if (*remainder_size > 0) { 422 memmove( 423 (char *) request_buffer, 424 (char *) remainder, *remainder_size 425 ); 426 n = *remainder_size; 427 } 428 429 res = os_read_file( 430 fd, 431 ((char *) request_buffer) + *remainder_size, 432 sizeof(struct io_thread_req *)*max_recs 433 - *remainder_size 434 ); 435 if (res > 0) { 436 n += res; 437 if ((n % sizeof(struct io_thread_req *)) > 0) { 438 /* 439 * Read somehow returned not a multiple of dword 440 * theoretically possible, but never observed in the 441 * wild, so read routine must be able to handle it 442 */ 443 *remainder_size = n % sizeof(struct io_thread_req *); 444 WARN(*remainder_size > 0, "UBD IPC read returned a partial result"); 445 memmove( 446 remainder, 447 ((char *) request_buffer) + 448 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *), 449 *remainder_size 450 ); 451 n = n - *remainder_size; 452 } 453 } else { 454 n = res; 455 } 456 return n; 457} 458 459/* Called without dev->lock held, and only in interrupt context. */ 460static void ubd_handler(void) 461{ 462 int n; 463 int count; 464 465 while(1){ 466 n = bulk_req_safe_read( 467 thread_fd, 468 irq_req_buffer, 469 &irq_remainder, 470 &irq_remainder_size, 471 UBD_REQ_BUFFER_SIZE 472 ); 473 if (n < 0) { 474 if(n == -EAGAIN) 475 break; 476 printk(KERN_ERR "spurious interrupt in ubd_handler, " 477 "err = %d\n", -n); 478 return; 479 } 480 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { 481 struct io_thread_req *io_req = (*irq_req_buffer)[count]; 482 483 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { 484 blk_queue_max_discard_sectors(io_req->req->q, 0); 485 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); 486 } 487 blk_mq_end_request(io_req->req, io_req->error); 488 kfree(io_req); 489 } 490 } 491} 492 493static irqreturn_t ubd_intr(int irq, void *dev) 494{ 495 ubd_handler(); 496 return IRQ_HANDLED; 497} 498 499/* Only changed by ubd_init, which is an initcall. */ 500static int io_pid = -1; 501 502static void kill_io_thread(void) 503{ 504 if(io_pid != -1) 505 os_kill_process(io_pid, 1); 506} 507 508__uml_exitcall(kill_io_thread); 509 510static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) 511{ 512 char *file; 513 int fd; 514 int err; 515 516 __u32 version; 517 __u32 align; 518 char *backing_file; 519 time64_t mtime; 520 unsigned long long size; 521 int sector_size; 522 int bitmap_offset; 523 524 if (ubd_dev->file && ubd_dev->cow.file) { 525 file = ubd_dev->cow.file; 526 527 goto out; 528 } 529 530 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0); 531 if (fd < 0) 532 return fd; 533 534 err = read_cow_header(file_reader, &fd, &version, &backing_file, \ 535 &mtime, &size, §or_size, &align, &bitmap_offset); 536 os_close_file(fd); 537 538 if(err == -EINVAL) 539 file = ubd_dev->file; 540 else 541 file = backing_file; 542 543out: 544 return os_file_size(file, size_out); 545} 546 547static int read_cow_bitmap(int fd, void *buf, int offset, int len) 548{ 549 int err; 550 551 err = os_pread_file(fd, buf, len, offset); 552 if (err < 0) 553 return err; 554 555 return 0; 556} 557 558static int backing_file_mismatch(char *file, __u64 size, time64_t mtime) 559{ 560 time64_t modtime; 561 unsigned long long actual; 562 int err; 563 564 err = os_file_modtime(file, &modtime); 565 if (err < 0) { 566 printk(KERN_ERR "Failed to get modification time of backing " 567 "file \"%s\", err = %d\n", file, -err); 568 return err; 569 } 570 571 err = os_file_size(file, &actual); 572 if (err < 0) { 573 printk(KERN_ERR "Failed to get size of backing file \"%s\", " 574 "err = %d\n", file, -err); 575 return err; 576 } 577 578 if (actual != size) { 579 /*__u64 can be a long on AMD64 and with %lu GCC complains; so 580 * the typecast.*/ 581 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header " 582 "vs backing file\n", (unsigned long long) size, actual); 583 return -EINVAL; 584 } 585 if (modtime != mtime) { 586 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs " 587 "backing file\n", mtime, modtime); 588 return -EINVAL; 589 } 590 return 0; 591} 592 593static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) 594{ 595 struct uml_stat buf1, buf2; 596 int err; 597 598 if (from_cmdline == NULL) 599 return 0; 600 if (!strcmp(from_cmdline, from_cow)) 601 return 0; 602 603 err = os_stat_file(from_cmdline, &buf1); 604 if (err < 0) { 605 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline, 606 -err); 607 return 0; 608 } 609 err = os_stat_file(from_cow, &buf2); 610 if (err < 0) { 611 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow, 612 -err); 613 return 1; 614 } 615 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) 616 return 0; 617 618 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, " 619 "\"%s\" specified in COW header of \"%s\"\n", 620 from_cmdline, from_cow, cow); 621 return 1; 622} 623 624static int open_ubd_file(char *file, struct openflags *openflags, int shared, 625 char **backing_file_out, int *bitmap_offset_out, 626 unsigned long *bitmap_len_out, int *data_offset_out, 627 int *create_cow_out) 628{ 629 time64_t mtime; 630 unsigned long long size; 631 __u32 version, align; 632 char *backing_file; 633 int fd, err, sectorsize, asked_switch, mode = 0644; 634 635 fd = os_open_file(file, *openflags, mode); 636 if (fd < 0) { 637 if ((fd == -ENOENT) && (create_cow_out != NULL)) 638 *create_cow_out = 1; 639 if (!openflags->w || 640 ((fd != -EROFS) && (fd != -EACCES))) 641 return fd; 642 openflags->w = 0; 643 fd = os_open_file(file, *openflags, mode); 644 if (fd < 0) 645 return fd; 646 } 647 648 if (shared) 649 printk(KERN_INFO "Not locking \"%s\" on the host\n", file); 650 else { 651 err = os_lock_file(fd, openflags->w); 652 if (err < 0) { 653 printk(KERN_ERR "Failed to lock '%s', err = %d\n", 654 file, -err); 655 goto out_close; 656 } 657 } 658 659 /* Successful return case! */ 660 if (backing_file_out == NULL) 661 return fd; 662 663 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, 664 &size, §orsize, &align, bitmap_offset_out); 665 if (err && (*backing_file_out != NULL)) { 666 printk(KERN_ERR "Failed to read COW header from COW file " 667 "\"%s\", errno = %d\n", file, -err); 668 goto out_close; 669 } 670 if (err) 671 return fd; 672 673 asked_switch = path_requires_switch(*backing_file_out, backing_file, 674 file); 675 676 /* Allow switching only if no mismatch. */ 677 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, 678 mtime)) { 679 printk(KERN_ERR "Switching backing file to '%s'\n", 680 *backing_file_out); 681 err = write_cow_header(file, fd, *backing_file_out, 682 sectorsize, align, &size); 683 if (err) { 684 printk(KERN_ERR "Switch failed, errno = %d\n", -err); 685 goto out_close; 686 } 687 } else { 688 *backing_file_out = backing_file; 689 err = backing_file_mismatch(*backing_file_out, size, mtime); 690 if (err) 691 goto out_close; 692 } 693 694 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, 695 bitmap_len_out, data_offset_out); 696 697 return fd; 698 out_close: 699 os_close_file(fd); 700 return err; 701} 702 703static int create_cow_file(char *cow_file, char *backing_file, 704 struct openflags flags, 705 int sectorsize, int alignment, int *bitmap_offset_out, 706 unsigned long *bitmap_len_out, int *data_offset_out) 707{ 708 int err, fd; 709 710 flags.c = 1; 711 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); 712 if (fd < 0) { 713 err = fd; 714 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n", 715 cow_file, -err); 716 goto out; 717 } 718 719 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, 720 bitmap_offset_out, bitmap_len_out, 721 data_offset_out); 722 if (!err) 723 return fd; 724 os_close_file(fd); 725 out: 726 return err; 727} 728 729static void ubd_close_dev(struct ubd *ubd_dev) 730{ 731 os_close_file(ubd_dev->fd); 732 if(ubd_dev->cow.file == NULL) 733 return; 734 735 os_close_file(ubd_dev->cow.fd); 736 vfree(ubd_dev->cow.bitmap); 737 ubd_dev->cow.bitmap = NULL; 738} 739 740static int ubd_open_dev(struct ubd *ubd_dev) 741{ 742 struct openflags flags; 743 char **back_ptr; 744 int err, create_cow, *create_ptr; 745 int fd; 746 747 ubd_dev->openflags = ubd_dev->boot_openflags; 748 create_cow = 0; 749 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL; 750 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file; 751 752 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared, 753 back_ptr, &ubd_dev->cow.bitmap_offset, 754 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset, 755 create_ptr); 756 757 if((fd == -ENOENT) && create_cow){ 758 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, 759 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE, 760 &ubd_dev->cow.bitmap_offset, 761 &ubd_dev->cow.bitmap_len, 762 &ubd_dev->cow.data_offset); 763 if(fd >= 0){ 764 printk(KERN_INFO "Creating \"%s\" as COW file for " 765 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file); 766 } 767 } 768 769 if(fd < 0){ 770 printk("Failed to open '%s', errno = %d\n", ubd_dev->file, 771 -fd); 772 return fd; 773 } 774 ubd_dev->fd = fd; 775 776 if(ubd_dev->cow.file != NULL){ 777 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long)); 778 779 err = -ENOMEM; 780 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len); 781 if(ubd_dev->cow.bitmap == NULL){ 782 printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); 783 goto error; 784 } 785 flush_tlb_kernel_vm(); 786 787 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, 788 ubd_dev->cow.bitmap_offset, 789 ubd_dev->cow.bitmap_len); 790 if(err < 0) 791 goto error; 792 793 flags = ubd_dev->openflags; 794 flags.w = 0; 795 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL, 796 NULL, NULL, NULL, NULL); 797 if(err < 0) goto error; 798 ubd_dev->cow.fd = err; 799 } 800 if (ubd_dev->no_trim == 0) { 801 ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE; 802 blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); 803 blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); 804 } 805 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); 806 return 0; 807 error: 808 os_close_file(ubd_dev->fd); 809 return err; 810} 811 812static void ubd_device_release(struct device *dev) 813{ 814 struct ubd *ubd_dev = dev_get_drvdata(dev); 815 816 blk_mq_free_tag_set(&ubd_dev->tag_set); 817 *ubd_dev = ((struct ubd) DEFAULT_UBD); 818} 819 820static ssize_t serial_show(struct device *dev, 821 struct device_attribute *attr, char *buf) 822{ 823 struct gendisk *disk = dev_to_disk(dev); 824 struct ubd *ubd_dev = disk->private_data; 825 826 if (!ubd_dev) 827 return 0; 828 829 return sprintf(buf, "%s", ubd_dev->serial); 830} 831 832static DEVICE_ATTR_RO(serial); 833 834static struct attribute *ubd_attrs[] = { 835 &dev_attr_serial.attr, 836 NULL, 837}; 838 839static umode_t ubd_attrs_are_visible(struct kobject *kobj, 840 struct attribute *a, int n) 841{ 842 return a->mode; 843} 844 845static const struct attribute_group ubd_attr_group = { 846 .attrs = ubd_attrs, 847 .is_visible = ubd_attrs_are_visible, 848}; 849 850static const struct attribute_group *ubd_attr_groups[] = { 851 &ubd_attr_group, 852 NULL, 853}; 854 855static int ubd_disk_register(int major, u64 size, int unit, 856 struct gendisk *disk) 857{ 858 disk->major = major; 859 disk->first_minor = unit << UBD_SHIFT; 860 disk->minors = 1 << UBD_SHIFT; 861 disk->fops = &ubd_blops; 862 set_capacity(disk, size / 512); 863 sprintf(disk->disk_name, "ubd%c", 'a' + unit); 864 865 ubd_devs[unit].pdev.id = unit; 866 ubd_devs[unit].pdev.name = DRIVER_NAME; 867 ubd_devs[unit].pdev.dev.release = ubd_device_release; 868 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]); 869 platform_device_register(&ubd_devs[unit].pdev); 870 871 disk->private_data = &ubd_devs[unit]; 872 disk->queue = ubd_devs[unit].queue; 873 return device_add_disk(&ubd_devs[unit].pdev.dev, disk, ubd_attr_groups); 874} 875 876#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) 877 878static const struct blk_mq_ops ubd_mq_ops = { 879 .queue_rq = ubd_queue_rq, 880}; 881 882static int ubd_add(int n, char **error_out) 883{ 884 struct ubd *ubd_dev = &ubd_devs[n]; 885 struct gendisk *disk; 886 int err = 0; 887 888 if(ubd_dev->file == NULL) 889 goto out; 890 891 err = ubd_file_size(ubd_dev, &ubd_dev->size); 892 if(err < 0){ 893 *error_out = "Couldn't determine size of device's file"; 894 goto out; 895 } 896 897 ubd_dev->size = ROUND_BLOCK(ubd_dev->size); 898 899 ubd_dev->tag_set.ops = &ubd_mq_ops; 900 ubd_dev->tag_set.queue_depth = 64; 901 ubd_dev->tag_set.numa_node = NUMA_NO_NODE; 902 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 903 ubd_dev->tag_set.driver_data = ubd_dev; 904 ubd_dev->tag_set.nr_hw_queues = 1; 905 906 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); 907 if (err) 908 goto out; 909 910 disk = blk_mq_alloc_disk(&ubd_dev->tag_set, ubd_dev); 911 if (IS_ERR(disk)) { 912 err = PTR_ERR(disk); 913 goto out_cleanup_tags; 914 } 915 ubd_dev->queue = disk->queue; 916 917 blk_queue_write_cache(ubd_dev->queue, true, false); 918 blk_queue_max_segments(ubd_dev->queue, MAX_SG); 919 blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); 920 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, disk); 921 if (err) 922 goto out_cleanup_disk; 923 924 ubd_gendisk[n] = disk; 925 return 0; 926 927out_cleanup_disk: 928 put_disk(disk); 929out_cleanup_tags: 930 blk_mq_free_tag_set(&ubd_dev->tag_set); 931out: 932 return err; 933} 934 935static int ubd_config(char *str, char **error_out) 936{ 937 int n, ret; 938 939 /* This string is possibly broken up and stored, so it's only 940 * freed if ubd_setup_common fails, or if only general options 941 * were set. 942 */ 943 str = kstrdup(str, GFP_KERNEL); 944 if (str == NULL) { 945 *error_out = "Failed to allocate memory"; 946 return -ENOMEM; 947 } 948 949 ret = ubd_setup_common(str, &n, error_out); 950 if (ret) 951 goto err_free; 952 953 if (n == -1) { 954 ret = 0; 955 goto err_free; 956 } 957 958 mutex_lock(&ubd_lock); 959 ret = ubd_add(n, error_out); 960 if (ret) 961 ubd_devs[n].file = NULL; 962 mutex_unlock(&ubd_lock); 963 964out: 965 return ret; 966 967err_free: 968 kfree(str); 969 goto out; 970} 971 972static int ubd_get_config(char *name, char *str, int size, char **error_out) 973{ 974 struct ubd *ubd_dev; 975 int n, len = 0; 976 977 n = parse_unit(&name); 978 if((n >= MAX_DEV) || (n < 0)){ 979 *error_out = "ubd_get_config : device number out of range"; 980 return -1; 981 } 982 983 ubd_dev = &ubd_devs[n]; 984 mutex_lock(&ubd_lock); 985 986 if(ubd_dev->file == NULL){ 987 CONFIG_CHUNK(str, size, len, "", 1); 988 goto out; 989 } 990 991 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0); 992 993 if(ubd_dev->cow.file != NULL){ 994 CONFIG_CHUNK(str, size, len, ",", 0); 995 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1); 996 } 997 else CONFIG_CHUNK(str, size, len, "", 1); 998 999 out: 1000 mutex_unlock(&ubd_lock); 1001 return len; 1002} 1003 1004static int ubd_id(char **str, int *start_out, int *end_out) 1005{ 1006 int n; 1007 1008 n = parse_unit(str); 1009 *start_out = 0; 1010 *end_out = MAX_DEV - 1; 1011 return n; 1012} 1013 1014static int ubd_remove(int n, char **error_out) 1015{ 1016 struct gendisk *disk = ubd_gendisk[n]; 1017 struct ubd *ubd_dev; 1018 int err = -ENODEV; 1019 1020 mutex_lock(&ubd_lock); 1021 1022 ubd_dev = &ubd_devs[n]; 1023 1024 if(ubd_dev->file == NULL) 1025 goto out; 1026 1027 /* you cannot remove a open disk */ 1028 err = -EBUSY; 1029 if(ubd_dev->count > 0) 1030 goto out; 1031 1032 ubd_gendisk[n] = NULL; 1033 if(disk != NULL){ 1034 del_gendisk(disk); 1035 put_disk(disk); 1036 } 1037 1038 err = 0; 1039 platform_device_unregister(&ubd_dev->pdev); 1040out: 1041 mutex_unlock(&ubd_lock); 1042 return err; 1043} 1044 1045/* All these are called by mconsole in process context and without 1046 * ubd-specific locks. The structure itself is const except for .list. 1047 */ 1048static struct mc_device ubd_mc = { 1049 .list = LIST_HEAD_INIT(ubd_mc.list), 1050 .name = "ubd", 1051 .config = ubd_config, 1052 .get_config = ubd_get_config, 1053 .id = ubd_id, 1054 .remove = ubd_remove, 1055}; 1056 1057static int __init ubd_mc_init(void) 1058{ 1059 mconsole_register_dev(&ubd_mc); 1060 return 0; 1061} 1062 1063__initcall(ubd_mc_init); 1064 1065static int __init ubd0_init(void) 1066{ 1067 struct ubd *ubd_dev = &ubd_devs[0]; 1068 1069 mutex_lock(&ubd_lock); 1070 if(ubd_dev->file == NULL) 1071 ubd_dev->file = "root_fs"; 1072 mutex_unlock(&ubd_lock); 1073 1074 return 0; 1075} 1076 1077__initcall(ubd0_init); 1078 1079/* Used in ubd_init, which is an initcall */ 1080static struct platform_driver ubd_driver = { 1081 .driver = { 1082 .name = DRIVER_NAME, 1083 }, 1084}; 1085 1086static int __init ubd_init(void) 1087{ 1088 char *error; 1089 int i, err; 1090 1091 if (register_blkdev(UBD_MAJOR, "ubd")) 1092 return -1; 1093 1094 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, 1095 sizeof(struct io_thread_req *), 1096 GFP_KERNEL 1097 ); 1098 irq_remainder = 0; 1099 1100 if (irq_req_buffer == NULL) { 1101 printk(KERN_ERR "Failed to initialize ubd buffering\n"); 1102 return -1; 1103 } 1104 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, 1105 sizeof(struct io_thread_req *), 1106 GFP_KERNEL 1107 ); 1108 1109 io_remainder = 0; 1110 1111 if (io_req_buffer == NULL) { 1112 printk(KERN_ERR "Failed to initialize ubd buffering\n"); 1113 return -1; 1114 } 1115 platform_driver_register(&ubd_driver); 1116 mutex_lock(&ubd_lock); 1117 for (i = 0; i < MAX_DEV; i++){ 1118 err = ubd_add(i, &error); 1119 if(err) 1120 printk(KERN_ERR "Failed to initialize ubd device %d :" 1121 "%s\n", i, error); 1122 } 1123 mutex_unlock(&ubd_lock); 1124 return 0; 1125} 1126 1127late_initcall(ubd_init); 1128 1129static int __init ubd_driver_init(void){ 1130 unsigned long stack; 1131 int err; 1132 1133 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ 1134 if(global_openflags.s){ 1135 printk(KERN_INFO "ubd: Synchronous mode\n"); 1136 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is 1137 * enough. So use anyway the io thread. */ 1138 } 1139 stack = alloc_stack(0, 0); 1140 io_pid = start_io_thread(stack + PAGE_SIZE, &thread_fd); 1141 if(io_pid < 0){ 1142 printk(KERN_ERR 1143 "ubd : Failed to start I/O thread (errno = %d) - " 1144 "falling back to synchronous I/O\n", -io_pid); 1145 io_pid = -1; 1146 return 0; 1147 } 1148 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 1149 0, "ubd", ubd_devs); 1150 if(err < 0) 1151 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); 1152 return 0; 1153} 1154 1155device_initcall(ubd_driver_init); 1156 1157static int ubd_open(struct gendisk *disk, blk_mode_t mode) 1158{ 1159 struct ubd *ubd_dev = disk->private_data; 1160 int err = 0; 1161 1162 mutex_lock(&ubd_mutex); 1163 if(ubd_dev->count == 0){ 1164 err = ubd_open_dev(ubd_dev); 1165 if(err){ 1166 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", 1167 disk->disk_name, ubd_dev->file, -err); 1168 goto out; 1169 } 1170 } 1171 ubd_dev->count++; 1172 set_disk_ro(disk, !ubd_dev->openflags.w); 1173out: 1174 mutex_unlock(&ubd_mutex); 1175 return err; 1176} 1177 1178static void ubd_release(struct gendisk *disk) 1179{ 1180 struct ubd *ubd_dev = disk->private_data; 1181 1182 mutex_lock(&ubd_mutex); 1183 if(--ubd_dev->count == 0) 1184 ubd_close_dev(ubd_dev); 1185 mutex_unlock(&ubd_mutex); 1186} 1187 1188static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, 1189 __u64 *cow_offset, unsigned long *bitmap, 1190 __u64 bitmap_offset, unsigned long *bitmap_words, 1191 __u64 bitmap_len) 1192{ 1193 __u64 sector = io_offset >> SECTOR_SHIFT; 1194 int i, update_bitmap = 0; 1195 1196 for (i = 0; i < length >> SECTOR_SHIFT; i++) { 1197 if(cow_mask != NULL) 1198 ubd_set_bit(i, (unsigned char *) cow_mask); 1199 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1200 continue; 1201 1202 update_bitmap = 1; 1203 ubd_set_bit(sector + i, (unsigned char *) bitmap); 1204 } 1205 1206 if(!update_bitmap) 1207 return; 1208 1209 *cow_offset = sector / (sizeof(unsigned long) * 8); 1210 1211 /* This takes care of the case where we're exactly at the end of the 1212 * device, and *cow_offset + 1 is off the end. So, just back it up 1213 * by one word. Thanks to Lynn Kerby for the fix and James McMechan 1214 * for the original diagnosis. 1215 */ 1216 if (*cow_offset == (DIV_ROUND_UP(bitmap_len, 1217 sizeof(unsigned long)) - 1)) 1218 (*cow_offset)--; 1219 1220 bitmap_words[0] = bitmap[*cow_offset]; 1221 bitmap_words[1] = bitmap[*cow_offset + 1]; 1222 1223 *cow_offset *= sizeof(unsigned long); 1224 *cow_offset += bitmap_offset; 1225} 1226 1227static void cowify_req(struct io_thread_req *req, struct io_desc *segment, 1228 unsigned long offset, unsigned long *bitmap, 1229 __u64 bitmap_offset, __u64 bitmap_len) 1230{ 1231 __u64 sector = offset >> SECTOR_SHIFT; 1232 int i; 1233 1234 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) 1235 panic("Operation too long"); 1236 1237 if (req_op(req->req) == REQ_OP_READ) { 1238 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { 1239 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1240 ubd_set_bit(i, (unsigned char *) 1241 &segment->sector_mask); 1242 } 1243 } else { 1244 cowify_bitmap(offset, segment->length, &segment->sector_mask, 1245 &segment->cow_offset, bitmap, bitmap_offset, 1246 segment->bitmap_words, bitmap_len); 1247 } 1248} 1249 1250static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, 1251 struct request *req) 1252{ 1253 struct bio_vec bvec; 1254 struct req_iterator iter; 1255 int i = 0; 1256 unsigned long byte_offset = io_req->offset; 1257 enum req_op op = req_op(req); 1258 1259 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { 1260 io_req->io_desc[0].buffer = NULL; 1261 io_req->io_desc[0].length = blk_rq_bytes(req); 1262 } else { 1263 rq_for_each_segment(bvec, req, iter) { 1264 BUG_ON(i >= io_req->desc_cnt); 1265 1266 io_req->io_desc[i].buffer = bvec_virt(&bvec); 1267 io_req->io_desc[i].length = bvec.bv_len; 1268 i++; 1269 } 1270 } 1271 1272 if (dev->cow.file) { 1273 for (i = 0; i < io_req->desc_cnt; i++) { 1274 cowify_req(io_req, &io_req->io_desc[i], byte_offset, 1275 dev->cow.bitmap, dev->cow.bitmap_offset, 1276 dev->cow.bitmap_len); 1277 byte_offset += io_req->io_desc[i].length; 1278 } 1279 1280 } 1281} 1282 1283static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, 1284 int desc_cnt) 1285{ 1286 struct io_thread_req *io_req; 1287 int i; 1288 1289 io_req = kmalloc(sizeof(*io_req) + 1290 (desc_cnt * sizeof(struct io_desc)), 1291 GFP_ATOMIC); 1292 if (!io_req) 1293 return NULL; 1294 1295 io_req->req = req; 1296 if (dev->cow.file) 1297 io_req->fds[0] = dev->cow.fd; 1298 else 1299 io_req->fds[0] = dev->fd; 1300 io_req->error = 0; 1301 io_req->sectorsize = SECTOR_SIZE; 1302 io_req->fds[1] = dev->fd; 1303 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; 1304 io_req->offsets[0] = 0; 1305 io_req->offsets[1] = dev->cow.data_offset; 1306 1307 for (i = 0 ; i < desc_cnt; i++) { 1308 io_req->io_desc[i].sector_mask = 0; 1309 io_req->io_desc[i].cow_offset = -1; 1310 } 1311 1312 return io_req; 1313} 1314 1315static int ubd_submit_request(struct ubd *dev, struct request *req) 1316{ 1317 int segs = 0; 1318 struct io_thread_req *io_req; 1319 int ret; 1320 enum req_op op = req_op(req); 1321 1322 if (op == REQ_OP_FLUSH) 1323 segs = 0; 1324 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) 1325 segs = 1; 1326 else 1327 segs = blk_rq_nr_phys_segments(req); 1328 1329 io_req = ubd_alloc_req(dev, req, segs); 1330 if (!io_req) 1331 return -ENOMEM; 1332 1333 io_req->desc_cnt = segs; 1334 if (segs) 1335 ubd_map_req(dev, io_req, req); 1336 1337 ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); 1338 if (ret != sizeof(io_req)) { 1339 if (ret != -EAGAIN) 1340 pr_err("write to io thread failed: %d\n", -ret); 1341 kfree(io_req); 1342 } 1343 return ret; 1344} 1345 1346static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, 1347 const struct blk_mq_queue_data *bd) 1348{ 1349 struct ubd *ubd_dev = hctx->queue->queuedata; 1350 struct request *req = bd->rq; 1351 int ret = 0, res = BLK_STS_OK; 1352 1353 blk_mq_start_request(req); 1354 1355 spin_lock_irq(&ubd_dev->lock); 1356 1357 switch (req_op(req)) { 1358 case REQ_OP_FLUSH: 1359 case REQ_OP_READ: 1360 case REQ_OP_WRITE: 1361 case REQ_OP_DISCARD: 1362 case REQ_OP_WRITE_ZEROES: 1363 ret = ubd_submit_request(ubd_dev, req); 1364 break; 1365 default: 1366 WARN_ON_ONCE(1); 1367 res = BLK_STS_NOTSUPP; 1368 } 1369 1370 spin_unlock_irq(&ubd_dev->lock); 1371 1372 if (ret < 0) { 1373 if (ret == -ENOMEM) 1374 res = BLK_STS_RESOURCE; 1375 else 1376 res = BLK_STS_DEV_RESOURCE; 1377 } 1378 1379 return res; 1380} 1381 1382static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) 1383{ 1384 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1385 1386 geo->heads = 128; 1387 geo->sectors = 32; 1388 geo->cylinders = ubd_dev->size / (128 * 32 * 512); 1389 return 0; 1390} 1391 1392static int ubd_ioctl(struct block_device *bdev, blk_mode_t mode, 1393 unsigned int cmd, unsigned long arg) 1394{ 1395 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1396 u16 ubd_id[ATA_ID_WORDS]; 1397 1398 switch (cmd) { 1399 struct cdrom_volctrl volume; 1400 case HDIO_GET_IDENTITY: 1401 memset(&ubd_id, 0, ATA_ID_WORDS * 2); 1402 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512); 1403 ubd_id[ATA_ID_HEADS] = 128; 1404 ubd_id[ATA_ID_SECTORS] = 32; 1405 if(copy_to_user((char __user *) arg, (char *) &ubd_id, 1406 sizeof(ubd_id))) 1407 return -EFAULT; 1408 return 0; 1409 1410 case CDROMVOLREAD: 1411 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume))) 1412 return -EFAULT; 1413 volume.channel0 = 255; 1414 volume.channel1 = 255; 1415 volume.channel2 = 255; 1416 volume.channel3 = 255; 1417 if(copy_to_user((char __user *) arg, &volume, sizeof(volume))) 1418 return -EFAULT; 1419 return 0; 1420 } 1421 return -EINVAL; 1422} 1423 1424static int map_error(int error_code) 1425{ 1426 switch (error_code) { 1427 case 0: 1428 return BLK_STS_OK; 1429 case ENOSYS: 1430 case EOPNOTSUPP: 1431 return BLK_STS_NOTSUPP; 1432 case ENOSPC: 1433 return BLK_STS_NOSPC; 1434 } 1435 return BLK_STS_IOERR; 1436} 1437 1438/* 1439 * Everything from here onwards *IS NOT PART OF THE KERNEL* 1440 * 1441 * The following functions are part of UML hypervisor code. 1442 * All functions from here onwards are executed as a helper 1443 * thread and are not allowed to execute any kernel functions. 1444 * 1445 * Any communication must occur strictly via shared memory and IPC. 1446 * 1447 * Do not add printks, locks, kernel memory operations, etc - it 1448 * will result in unpredictable behaviour and/or crashes. 1449 */ 1450 1451static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) 1452{ 1453 int n; 1454 1455 if (segment->cow_offset == -1) 1456 return map_error(0); 1457 1458 n = os_pwrite_file(req->fds[1], &segment->bitmap_words, 1459 sizeof(segment->bitmap_words), segment->cow_offset); 1460 if (n != sizeof(segment->bitmap_words)) 1461 return map_error(-n); 1462 1463 return map_error(0); 1464} 1465 1466static void do_io(struct io_thread_req *req, struct io_desc *desc) 1467{ 1468 char *buf = NULL; 1469 unsigned long len; 1470 int n, nsectors, start, end, bit; 1471 __u64 off; 1472 1473 /* FLUSH is really a special case, we cannot "case" it with others */ 1474 1475 if (req_op(req->req) == REQ_OP_FLUSH) { 1476 /* fds[0] is always either the rw image or our cow file */ 1477 req->error = map_error(-os_sync_file(req->fds[0])); 1478 return; 1479 } 1480 1481 nsectors = desc->length / req->sectorsize; 1482 start = 0; 1483 do { 1484 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); 1485 end = start; 1486 while((end < nsectors) && 1487 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) 1488 end++; 1489 1490 off = req->offset + req->offsets[bit] + 1491 start * req->sectorsize; 1492 len = (end - start) * req->sectorsize; 1493 if (desc->buffer != NULL) 1494 buf = &desc->buffer[start * req->sectorsize]; 1495 1496 switch (req_op(req->req)) { 1497 case REQ_OP_READ: 1498 n = 0; 1499 do { 1500 buf = &buf[n]; 1501 len -= n; 1502 n = os_pread_file(req->fds[bit], buf, len, off); 1503 if (n < 0) { 1504 req->error = map_error(-n); 1505 return; 1506 } 1507 } while((n < len) && (n != 0)); 1508 if (n < len) memset(&buf[n], 0, len - n); 1509 break; 1510 case REQ_OP_WRITE: 1511 n = os_pwrite_file(req->fds[bit], buf, len, off); 1512 if(n != len){ 1513 req->error = map_error(-n); 1514 return; 1515 } 1516 break; 1517 case REQ_OP_DISCARD: 1518 n = os_falloc_punch(req->fds[bit], off, len); 1519 if (n) { 1520 req->error = map_error(-n); 1521 return; 1522 } 1523 break; 1524 case REQ_OP_WRITE_ZEROES: 1525 n = os_falloc_zeroes(req->fds[bit], off, len); 1526 if (n) { 1527 req->error = map_error(-n); 1528 return; 1529 } 1530 break; 1531 default: 1532 WARN_ON_ONCE(1); 1533 req->error = BLK_STS_NOTSUPP; 1534 return; 1535 } 1536 1537 start = end; 1538 } while(start < nsectors); 1539 1540 req->offset += len; 1541 req->error = update_bitmap(req, desc); 1542} 1543 1544/* Changed in start_io_thread, which is serialized by being called only 1545 * from ubd_init, which is an initcall. 1546 */ 1547int kernel_fd = -1; 1548 1549/* Only changed by the io thread. XXX: currently unused. */ 1550static int io_count; 1551 1552int io_thread(void *arg) 1553{ 1554 int n, count, written, res; 1555 1556 os_fix_helper_signals(); 1557 1558 while(1){ 1559 n = bulk_req_safe_read( 1560 kernel_fd, 1561 io_req_buffer, 1562 &io_remainder, 1563 &io_remainder_size, 1564 UBD_REQ_BUFFER_SIZE 1565 ); 1566 if (n <= 0) { 1567 if (n == -EAGAIN) 1568 ubd_read_poll(-1); 1569 1570 continue; 1571 } 1572 1573 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { 1574 struct io_thread_req *req = (*io_req_buffer)[count]; 1575 int i; 1576 1577 io_count++; 1578 for (i = 0; !req->error && i < req->desc_cnt; i++) 1579 do_io(req, &(req->io_desc[i])); 1580 1581 } 1582 1583 written = 0; 1584 1585 do { 1586 res = os_write_file(kernel_fd, 1587 ((char *) io_req_buffer) + written, 1588 n - written); 1589 if (res >= 0) { 1590 written += res; 1591 } 1592 if (written < n) { 1593 ubd_write_poll(-1); 1594 } 1595 } while (written < n); 1596 } 1597 1598 return 0; 1599} 1600