1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Copyright (C) 2018 Cambridge Greys Ltd 4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com) 5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) 6 */ 7 8/* 2001-09-28...2002-04-17 9 * Partition stuff by James_McMechan@hotmail.com 10 * old style ubd by setting UBD_SHIFT to 0 11 * 2002-09-27...2002-10-18 massive tinkering for 2.5 12 * partitions have changed in 2.5 13 * 2003-01-29 more tinkering for 2.5.59-1 14 * This should now address the sysfs problems and has 15 * the symlink for devfs to allow for booting with 16 * the common /dev/ubd/discX/... names rather than 17 * only /dev/ubdN/discN this version also has lots of 18 * clean ups preparing for ubd-many. 19 * James McMechan 20 */ 21 22#define UBD_SHIFT 4 23 24#include <linux/module.h> 25#include <linux/init.h> 26#include <linux/blkdev.h> 27#include <linux/blk-mq.h> 28#include <linux/ata.h> 29#include <linux/hdreg.h> 30#include <linux/cdrom.h> 31#include <linux/proc_fs.h> 32#include <linux/seq_file.h> 33#include <linux/ctype.h> 34#include <linux/slab.h> 35#include <linux/vmalloc.h> 36#include <linux/platform_device.h> 37#include <linux/scatterlist.h> 38#include <asm/tlbflush.h> 39#include <kern_util.h> 40#include "mconsole_kern.h" 41#include <init.h> 42#include <irq_kern.h> 43#include "ubd.h" 44#include <os.h> 45#include "cow.h" 46 47/* Max request size is determined by sector mask - 32K */ 48#define UBD_MAX_REQUEST (8 * sizeof(long)) 49 50struct io_desc { 51 char *buffer; 52 unsigned long length; 53 unsigned long sector_mask; 54 unsigned long long cow_offset; 55 unsigned long bitmap_words[2]; 56}; 57 58struct io_thread_req { 59 struct request *req; 60 int fds[2]; 61 unsigned long offsets[2]; 62 unsigned long long offset; 63 int sectorsize; 64 int error; 65 66 int desc_cnt; 67 /* io_desc has to be the last element of the struct */ 68 struct io_desc io_desc[]; 69}; 70 71 72static struct io_thread_req * (*irq_req_buffer)[]; 73static struct io_thread_req *irq_remainder; 74static int irq_remainder_size; 75 76static struct io_thread_req * (*io_req_buffer)[]; 77static struct io_thread_req *io_remainder; 78static int io_remainder_size; 79 80 81 82static inline int ubd_test_bit(__u64 bit, unsigned char *data) 83{ 84 __u64 n; 85 int bits, off; 86 87 bits = sizeof(data[0]) * 8; 88 n = bit / bits; 89 off = bit % bits; 90 return (data[n] & (1 << off)) != 0; 91} 92 93static inline void ubd_set_bit(__u64 bit, unsigned char *data) 94{ 95 __u64 n; 96 int bits, off; 97 98 bits = sizeof(data[0]) * 8; 99 n = bit / bits; 100 off = bit % bits; 101 data[n] |= (1 << off); 102} 103/*End stuff from ubd_user.h*/ 104 105#define DRIVER_NAME "uml-blkdev" 106 107static DEFINE_MUTEX(ubd_lock); 108static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */ 109 110static int ubd_open(struct block_device *bdev, fmode_t mode); 111static void ubd_release(struct gendisk *disk, fmode_t mode); 112static int ubd_ioctl(struct block_device *bdev, fmode_t mode, 113 unsigned int cmd, unsigned long arg); 114static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo); 115 116#define MAX_DEV (16) 117 118static const struct block_device_operations ubd_blops = { 119 .owner = THIS_MODULE, 120 .open = ubd_open, 121 .release = ubd_release, 122 .ioctl = ubd_ioctl, 123 .compat_ioctl = blkdev_compat_ptr_ioctl, 124 .getgeo = ubd_getgeo, 125}; 126 127/* Protected by ubd_lock */ 128static int fake_major = UBD_MAJOR; 129static struct gendisk *ubd_gendisk[MAX_DEV]; 130static struct gendisk *fake_gendisk[MAX_DEV]; 131 132#ifdef CONFIG_BLK_DEV_UBD_SYNC 133#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \ 134 .cl = 1 }) 135#else 136#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \ 137 .cl = 1 }) 138#endif 139static struct openflags global_openflags = OPEN_FLAGS; 140 141struct cow { 142 /* backing file name */ 143 char *file; 144 /* backing file fd */ 145 int fd; 146 unsigned long *bitmap; 147 unsigned long bitmap_len; 148 int bitmap_offset; 149 int data_offset; 150}; 151 152#define MAX_SG 64 153 154struct ubd { 155 /* name (and fd, below) of the file opened for writing, either the 156 * backing or the cow file. */ 157 char *file; 158 int count; 159 int fd; 160 __u64 size; 161 struct openflags boot_openflags; 162 struct openflags openflags; 163 unsigned shared:1; 164 unsigned no_cow:1; 165 unsigned no_trim:1; 166 struct cow cow; 167 struct platform_device pdev; 168 struct request_queue *queue; 169 struct blk_mq_tag_set tag_set; 170 spinlock_t lock; 171}; 172 173#define DEFAULT_COW { \ 174 .file = NULL, \ 175 .fd = -1, \ 176 .bitmap = NULL, \ 177 .bitmap_offset = 0, \ 178 .data_offset = 0, \ 179} 180 181#define DEFAULT_UBD { \ 182 .file = NULL, \ 183 .count = 0, \ 184 .fd = -1, \ 185 .size = -1, \ 186 .boot_openflags = OPEN_FLAGS, \ 187 .openflags = OPEN_FLAGS, \ 188 .no_cow = 0, \ 189 .no_trim = 0, \ 190 .shared = 0, \ 191 .cow = DEFAULT_COW, \ 192 .lock = __SPIN_LOCK_UNLOCKED(ubd_devs.lock), \ 193} 194 195/* Protected by ubd_lock */ 196static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD }; 197 198/* Only changed by fake_ide_setup which is a setup */ 199static int fake_ide = 0; 200static struct proc_dir_entry *proc_ide_root = NULL; 201static struct proc_dir_entry *proc_ide = NULL; 202 203static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, 204 const struct blk_mq_queue_data *bd); 205 206static void make_proc_ide(void) 207{ 208 proc_ide_root = proc_mkdir("ide", NULL); 209 proc_ide = proc_mkdir("ide0", proc_ide_root); 210} 211 212static int fake_ide_media_proc_show(struct seq_file *m, void *v) 213{ 214 seq_puts(m, "disk\n"); 215 return 0; 216} 217 218static void make_ide_entries(const char *dev_name) 219{ 220 struct proc_dir_entry *dir, *ent; 221 char name[64]; 222 223 if(proc_ide_root == NULL) make_proc_ide(); 224 225 dir = proc_mkdir(dev_name, proc_ide); 226 if(!dir) return; 227 228 ent = proc_create_single("media", S_IRUGO, dir, 229 fake_ide_media_proc_show); 230 if(!ent) return; 231 snprintf(name, sizeof(name), "ide0/%s", dev_name); 232 proc_symlink(dev_name, proc_ide_root, name); 233} 234 235static int fake_ide_setup(char *str) 236{ 237 fake_ide = 1; 238 return 1; 239} 240 241__setup("fake_ide", fake_ide_setup); 242 243__uml_help(fake_ide_setup, 244"fake_ide\n" 245" Create ide0 entries that map onto ubd devices.\n\n" 246); 247 248static int parse_unit(char **ptr) 249{ 250 char *str = *ptr, *end; 251 int n = -1; 252 253 if(isdigit(*str)) { 254 n = simple_strtoul(str, &end, 0); 255 if(end == str) 256 return -1; 257 *ptr = end; 258 } 259 else if (('a' <= *str) && (*str <= 'z')) { 260 n = *str - 'a'; 261 str++; 262 *ptr = str; 263 } 264 return n; 265} 266 267/* If *index_out == -1 at exit, the passed option was a general one; 268 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it 269 * should not be freed on exit. 270 */ 271static int ubd_setup_common(char *str, int *index_out, char **error_out) 272{ 273 struct ubd *ubd_dev; 274 struct openflags flags = global_openflags; 275 char *backing_file; 276 int n, err = 0, i; 277 278 if(index_out) *index_out = -1; 279 n = *str; 280 if(n == '='){ 281 char *end; 282 int major; 283 284 str++; 285 if(!strcmp(str, "sync")){ 286 global_openflags = of_sync(global_openflags); 287 return err; 288 } 289 290 err = -EINVAL; 291 major = simple_strtoul(str, &end, 0); 292 if((*end != '\0') || (end == str)){ 293 *error_out = "Didn't parse major number"; 294 return err; 295 } 296 297 mutex_lock(&ubd_lock); 298 if (fake_major != UBD_MAJOR) { 299 *error_out = "Can't assign a fake major twice"; 300 goto out1; 301 } 302 303 fake_major = major; 304 305 printk(KERN_INFO "Setting extra ubd major number to %d\n", 306 major); 307 err = 0; 308 out1: 309 mutex_unlock(&ubd_lock); 310 return err; 311 } 312 313 n = parse_unit(&str); 314 if(n < 0){ 315 *error_out = "Couldn't parse device number"; 316 return -EINVAL; 317 } 318 if(n >= MAX_DEV){ 319 *error_out = "Device number out of range"; 320 return 1; 321 } 322 323 err = -EBUSY; 324 mutex_lock(&ubd_lock); 325 326 ubd_dev = &ubd_devs[n]; 327 if(ubd_dev->file != NULL){ 328 *error_out = "Device is already configured"; 329 goto out; 330 } 331 332 if (index_out) 333 *index_out = n; 334 335 err = -EINVAL; 336 for (i = 0; i < sizeof("rscdt="); i++) { 337 switch (*str) { 338 case 'r': 339 flags.w = 0; 340 break; 341 case 's': 342 flags.s = 1; 343 break; 344 case 'd': 345 ubd_dev->no_cow = 1; 346 break; 347 case 'c': 348 ubd_dev->shared = 1; 349 break; 350 case 't': 351 ubd_dev->no_trim = 1; 352 break; 353 case '=': 354 str++; 355 goto break_loop; 356 default: 357 *error_out = "Expected '=' or flag letter " 358 "(r, s, c, t or d)"; 359 goto out; 360 } 361 str++; 362 } 363 364 if (*str == '=') 365 *error_out = "Too many flags specified"; 366 else 367 *error_out = "Missing '='"; 368 goto out; 369 370break_loop: 371 backing_file = strchr(str, ','); 372 373 if (backing_file == NULL) 374 backing_file = strchr(str, ':'); 375 376 if(backing_file != NULL){ 377 if(ubd_dev->no_cow){ 378 *error_out = "Can't specify both 'd' and a cow file"; 379 goto out; 380 } 381 else { 382 *backing_file = '\0'; 383 backing_file++; 384 } 385 } 386 err = 0; 387 ubd_dev->file = str; 388 ubd_dev->cow.file = backing_file; 389 ubd_dev->boot_openflags = flags; 390out: 391 mutex_unlock(&ubd_lock); 392 return err; 393} 394 395static int ubd_setup(char *str) 396{ 397 char *error; 398 int err; 399 400 err = ubd_setup_common(str, NULL, &error); 401 if(err) 402 printk(KERN_ERR "Failed to initialize device with \"%s\" : " 403 "%s\n", str, error); 404 return 1; 405} 406 407__setup("ubd", ubd_setup); 408__uml_help(ubd_setup, 409"ubd<n><flags>=<filename>[(:|,)<filename2>]\n" 410" This is used to associate a device with a file in the underlying\n" 411" filesystem. When specifying two filenames, the first one is the\n" 412" COW name and the second is the backing file name. As separator you can\n" 413" use either a ':' or a ',': the first one allows writing things like;\n" 414" ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n" 415" while with a ',' the shell would not expand the 2nd '~'.\n" 416" When using only one filename, UML will detect whether to treat it like\n" 417" a COW file or a backing file. To override this detection, add the 'd'\n" 418" flag:\n" 419" ubd0d=BackingFile\n" 420" Usually, there is a filesystem in the file, but \n" 421" that's not required. Swap devices containing swap files can be\n" 422" specified like this. Also, a file which doesn't contain a\n" 423" filesystem can have its contents read in the virtual \n" 424" machine by running 'dd' on the device. <n> must be in the range\n" 425" 0 to 7. Appending an 'r' to the number will cause that device\n" 426" to be mounted read-only. For example ubd1r=./ext_fs. Appending\n" 427" an 's' will cause data to be written to disk on the host immediately.\n" 428" 'c' will cause the device to be treated as being shared between multiple\n" 429" UMLs and file locking will be turned off - this is appropriate for a\n" 430" cluster filesystem and inappropriate at almost all other times.\n\n" 431" 't' will disable trim/discard support on the device (enabled by default).\n\n" 432); 433 434static int udb_setup(char *str) 435{ 436 printk("udb%s specified on command line is almost certainly a ubd -> " 437 "udb TYPO\n", str); 438 return 1; 439} 440 441__setup("udb", udb_setup); 442__uml_help(udb_setup, 443"udb\n" 444" This option is here solely to catch ubd -> udb typos, which can be\n" 445" to impossible to catch visually unless you specifically look for\n" 446" them. The only result of any option starting with 'udb' is an error\n" 447" in the boot output.\n\n" 448); 449 450/* Only changed by ubd_init, which is an initcall. */ 451static int thread_fd = -1; 452 453/* Function to read several request pointers at a time 454* handling fractional reads if (and as) needed 455*/ 456 457static int bulk_req_safe_read( 458 int fd, 459 struct io_thread_req * (*request_buffer)[], 460 struct io_thread_req **remainder, 461 int *remainder_size, 462 int max_recs 463 ) 464{ 465 int n = 0; 466 int res = 0; 467 468 if (*remainder_size > 0) { 469 memmove( 470 (char *) request_buffer, 471 (char *) remainder, *remainder_size 472 ); 473 n = *remainder_size; 474 } 475 476 res = os_read_file( 477 fd, 478 ((char *) request_buffer) + *remainder_size, 479 sizeof(struct io_thread_req *)*max_recs 480 - *remainder_size 481 ); 482 if (res > 0) { 483 n += res; 484 if ((n % sizeof(struct io_thread_req *)) > 0) { 485 /* 486 * Read somehow returned not a multiple of dword 487 * theoretically possible, but never observed in the 488 * wild, so read routine must be able to handle it 489 */ 490 *remainder_size = n % sizeof(struct io_thread_req *); 491 WARN(*remainder_size > 0, "UBD IPC read returned a partial result"); 492 memmove( 493 remainder, 494 ((char *) request_buffer) + 495 (n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *), 496 *remainder_size 497 ); 498 n = n - *remainder_size; 499 } 500 } else { 501 n = res; 502 } 503 return n; 504} 505 506/* Called without dev->lock held, and only in interrupt context. */ 507static void ubd_handler(void) 508{ 509 int n; 510 int count; 511 512 while(1){ 513 n = bulk_req_safe_read( 514 thread_fd, 515 irq_req_buffer, 516 &irq_remainder, 517 &irq_remainder_size, 518 UBD_REQ_BUFFER_SIZE 519 ); 520 if (n < 0) { 521 if(n == -EAGAIN) 522 break; 523 printk(KERN_ERR "spurious interrupt in ubd_handler, " 524 "err = %d\n", -n); 525 return; 526 } 527 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { 528 struct io_thread_req *io_req = (*irq_req_buffer)[count]; 529 530 if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) { 531 blk_queue_max_discard_sectors(io_req->req->q, 0); 532 blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); 533 blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q); 534 } 535 blk_mq_end_request(io_req->req, io_req->error); 536 kfree(io_req); 537 } 538 } 539} 540 541static irqreturn_t ubd_intr(int irq, void *dev) 542{ 543 ubd_handler(); 544 return IRQ_HANDLED; 545} 546 547/* Only changed by ubd_init, which is an initcall. */ 548static int io_pid = -1; 549 550static void kill_io_thread(void) 551{ 552 if(io_pid != -1) 553 os_kill_process(io_pid, 1); 554} 555 556__uml_exitcall(kill_io_thread); 557 558static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out) 559{ 560 char *file; 561 int fd; 562 int err; 563 564 __u32 version; 565 __u32 align; 566 char *backing_file; 567 time64_t mtime; 568 unsigned long long size; 569 int sector_size; 570 int bitmap_offset; 571 572 if (ubd_dev->file && ubd_dev->cow.file) { 573 file = ubd_dev->cow.file; 574 575 goto out; 576 } 577 578 fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0); 579 if (fd < 0) 580 return fd; 581 582 err = read_cow_header(file_reader, &fd, &version, &backing_file, \ 583 &mtime, &size, §or_size, &align, &bitmap_offset); 584 os_close_file(fd); 585 586 if(err == -EINVAL) 587 file = ubd_dev->file; 588 else 589 file = backing_file; 590 591out: 592 return os_file_size(file, size_out); 593} 594 595static int read_cow_bitmap(int fd, void *buf, int offset, int len) 596{ 597 int err; 598 599 err = os_pread_file(fd, buf, len, offset); 600 if (err < 0) 601 return err; 602 603 return 0; 604} 605 606static int backing_file_mismatch(char *file, __u64 size, time64_t mtime) 607{ 608 time64_t modtime; 609 unsigned long long actual; 610 int err; 611 612 err = os_file_modtime(file, &modtime); 613 if (err < 0) { 614 printk(KERN_ERR "Failed to get modification time of backing " 615 "file \"%s\", err = %d\n", file, -err); 616 return err; 617 } 618 619 err = os_file_size(file, &actual); 620 if (err < 0) { 621 printk(KERN_ERR "Failed to get size of backing file \"%s\", " 622 "err = %d\n", file, -err); 623 return err; 624 } 625 626 if (actual != size) { 627 /*__u64 can be a long on AMD64 and with %lu GCC complains; so 628 * the typecast.*/ 629 printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header " 630 "vs backing file\n", (unsigned long long) size, actual); 631 return -EINVAL; 632 } 633 if (modtime != mtime) { 634 printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs " 635 "backing file\n", mtime, modtime); 636 return -EINVAL; 637 } 638 return 0; 639} 640 641static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow) 642{ 643 struct uml_stat buf1, buf2; 644 int err; 645 646 if (from_cmdline == NULL) 647 return 0; 648 if (!strcmp(from_cmdline, from_cow)) 649 return 0; 650 651 err = os_stat_file(from_cmdline, &buf1); 652 if (err < 0) { 653 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline, 654 -err); 655 return 0; 656 } 657 err = os_stat_file(from_cow, &buf2); 658 if (err < 0) { 659 printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow, 660 -err); 661 return 1; 662 } 663 if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) 664 return 0; 665 666 printk(KERN_ERR "Backing file mismatch - \"%s\" requested, " 667 "\"%s\" specified in COW header of \"%s\"\n", 668 from_cmdline, from_cow, cow); 669 return 1; 670} 671 672static int open_ubd_file(char *file, struct openflags *openflags, int shared, 673 char **backing_file_out, int *bitmap_offset_out, 674 unsigned long *bitmap_len_out, int *data_offset_out, 675 int *create_cow_out) 676{ 677 time64_t mtime; 678 unsigned long long size; 679 __u32 version, align; 680 char *backing_file; 681 int fd, err, sectorsize, asked_switch, mode = 0644; 682 683 fd = os_open_file(file, *openflags, mode); 684 if (fd < 0) { 685 if ((fd == -ENOENT) && (create_cow_out != NULL)) 686 *create_cow_out = 1; 687 if (!openflags->w || 688 ((fd != -EROFS) && (fd != -EACCES))) 689 return fd; 690 openflags->w = 0; 691 fd = os_open_file(file, *openflags, mode); 692 if (fd < 0) 693 return fd; 694 } 695 696 if (shared) 697 printk(KERN_INFO "Not locking \"%s\" on the host\n", file); 698 else { 699 err = os_lock_file(fd, openflags->w); 700 if (err < 0) { 701 printk(KERN_ERR "Failed to lock '%s', err = %d\n", 702 file, -err); 703 goto out_close; 704 } 705 } 706 707 /* Successful return case! */ 708 if (backing_file_out == NULL) 709 return fd; 710 711 err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, 712 &size, §orsize, &align, bitmap_offset_out); 713 if (err && (*backing_file_out != NULL)) { 714 printk(KERN_ERR "Failed to read COW header from COW file " 715 "\"%s\", errno = %d\n", file, -err); 716 goto out_close; 717 } 718 if (err) 719 return fd; 720 721 asked_switch = path_requires_switch(*backing_file_out, backing_file, 722 file); 723 724 /* Allow switching only if no mismatch. */ 725 if (asked_switch && !backing_file_mismatch(*backing_file_out, size, 726 mtime)) { 727 printk(KERN_ERR "Switching backing file to '%s'\n", 728 *backing_file_out); 729 err = write_cow_header(file, fd, *backing_file_out, 730 sectorsize, align, &size); 731 if (err) { 732 printk(KERN_ERR "Switch failed, errno = %d\n", -err); 733 goto out_close; 734 } 735 } else { 736 *backing_file_out = backing_file; 737 err = backing_file_mismatch(*backing_file_out, size, mtime); 738 if (err) 739 goto out_close; 740 } 741 742 cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, 743 bitmap_len_out, data_offset_out); 744 745 return fd; 746 out_close: 747 os_close_file(fd); 748 return err; 749} 750 751static int create_cow_file(char *cow_file, char *backing_file, 752 struct openflags flags, 753 int sectorsize, int alignment, int *bitmap_offset_out, 754 unsigned long *bitmap_len_out, int *data_offset_out) 755{ 756 int err, fd; 757 758 flags.c = 1; 759 fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL); 760 if (fd < 0) { 761 err = fd; 762 printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n", 763 cow_file, -err); 764 goto out; 765 } 766 767 err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, 768 bitmap_offset_out, bitmap_len_out, 769 data_offset_out); 770 if (!err) 771 return fd; 772 os_close_file(fd); 773 out: 774 return err; 775} 776 777static void ubd_close_dev(struct ubd *ubd_dev) 778{ 779 os_close_file(ubd_dev->fd); 780 if(ubd_dev->cow.file == NULL) 781 return; 782 783 os_close_file(ubd_dev->cow.fd); 784 vfree(ubd_dev->cow.bitmap); 785 ubd_dev->cow.bitmap = NULL; 786} 787 788static int ubd_open_dev(struct ubd *ubd_dev) 789{ 790 struct openflags flags; 791 char **back_ptr; 792 int err, create_cow, *create_ptr; 793 int fd; 794 795 ubd_dev->openflags = ubd_dev->boot_openflags; 796 create_cow = 0; 797 create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL; 798 back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file; 799 800 fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared, 801 back_ptr, &ubd_dev->cow.bitmap_offset, 802 &ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset, 803 create_ptr); 804 805 if((fd == -ENOENT) && create_cow){ 806 fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file, 807 ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE, 808 &ubd_dev->cow.bitmap_offset, 809 &ubd_dev->cow.bitmap_len, 810 &ubd_dev->cow.data_offset); 811 if(fd >= 0){ 812 printk(KERN_INFO "Creating \"%s\" as COW file for " 813 "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file); 814 } 815 } 816 817 if(fd < 0){ 818 printk("Failed to open '%s', errno = %d\n", ubd_dev->file, 819 -fd); 820 return fd; 821 } 822 ubd_dev->fd = fd; 823 824 if(ubd_dev->cow.file != NULL){ 825 blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long)); 826 827 err = -ENOMEM; 828 ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len); 829 if(ubd_dev->cow.bitmap == NULL){ 830 printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); 831 goto error; 832 } 833 flush_tlb_kernel_vm(); 834 835 err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap, 836 ubd_dev->cow.bitmap_offset, 837 ubd_dev->cow.bitmap_len); 838 if(err < 0) 839 goto error; 840 841 flags = ubd_dev->openflags; 842 flags.w = 0; 843 err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL, 844 NULL, NULL, NULL, NULL); 845 if(err < 0) goto error; 846 ubd_dev->cow.fd = err; 847 } 848 if (ubd_dev->no_trim == 0) { 849 ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE; 850 ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE; 851 blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST); 852 blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST); 853 blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue); 854 } 855 blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue); 856 return 0; 857 error: 858 os_close_file(ubd_dev->fd); 859 return err; 860} 861 862static void ubd_device_release(struct device *dev) 863{ 864 struct ubd *ubd_dev = dev_get_drvdata(dev); 865 866 blk_cleanup_queue(ubd_dev->queue); 867 blk_mq_free_tag_set(&ubd_dev->tag_set); 868 *ubd_dev = ((struct ubd) DEFAULT_UBD); 869} 870 871static int ubd_disk_register(int major, u64 size, int unit, 872 struct gendisk **disk_out) 873{ 874 struct device *parent = NULL; 875 struct gendisk *disk; 876 877 disk = alloc_disk(1 << UBD_SHIFT); 878 if(disk == NULL) 879 return -ENOMEM; 880 881 disk->major = major; 882 disk->first_minor = unit << UBD_SHIFT; 883 disk->fops = &ubd_blops; 884 set_capacity(disk, size / 512); 885 if (major == UBD_MAJOR) 886 sprintf(disk->disk_name, "ubd%c", 'a' + unit); 887 else 888 sprintf(disk->disk_name, "ubd_fake%d", unit); 889 890 /* sysfs register (not for ide fake devices) */ 891 if (major == UBD_MAJOR) { 892 ubd_devs[unit].pdev.id = unit; 893 ubd_devs[unit].pdev.name = DRIVER_NAME; 894 ubd_devs[unit].pdev.dev.release = ubd_device_release; 895 dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]); 896 platform_device_register(&ubd_devs[unit].pdev); 897 parent = &ubd_devs[unit].pdev.dev; 898 } 899 900 disk->private_data = &ubd_devs[unit]; 901 disk->queue = ubd_devs[unit].queue; 902 device_add_disk(parent, disk, NULL); 903 904 *disk_out = disk; 905 return 0; 906} 907 908#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE)) 909 910static const struct blk_mq_ops ubd_mq_ops = { 911 .queue_rq = ubd_queue_rq, 912}; 913 914static int ubd_add(int n, char **error_out) 915{ 916 struct ubd *ubd_dev = &ubd_devs[n]; 917 int err = 0; 918 919 if(ubd_dev->file == NULL) 920 goto out; 921 922 err = ubd_file_size(ubd_dev, &ubd_dev->size); 923 if(err < 0){ 924 *error_out = "Couldn't determine size of device's file"; 925 goto out; 926 } 927 928 ubd_dev->size = ROUND_BLOCK(ubd_dev->size); 929 930 ubd_dev->tag_set.ops = &ubd_mq_ops; 931 ubd_dev->tag_set.queue_depth = 64; 932 ubd_dev->tag_set.numa_node = NUMA_NO_NODE; 933 ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 934 ubd_dev->tag_set.driver_data = ubd_dev; 935 ubd_dev->tag_set.nr_hw_queues = 1; 936 937 err = blk_mq_alloc_tag_set(&ubd_dev->tag_set); 938 if (err) 939 goto out; 940 941 ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set); 942 if (IS_ERR(ubd_dev->queue)) { 943 err = PTR_ERR(ubd_dev->queue); 944 goto out_cleanup_tags; 945 } 946 947 ubd_dev->queue->queuedata = ubd_dev; 948 blk_queue_write_cache(ubd_dev->queue, true, false); 949 950 blk_queue_max_segments(ubd_dev->queue, MAX_SG); 951 blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1); 952 err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); 953 if(err){ 954 *error_out = "Failed to register device"; 955 goto out_cleanup_tags; 956 } 957 958 if (fake_major != UBD_MAJOR) 959 ubd_disk_register(fake_major, ubd_dev->size, n, 960 &fake_gendisk[n]); 961 962 /* 963 * Perhaps this should also be under the "if (fake_major)" above 964 * using the fake_disk->disk_name 965 */ 966 if (fake_ide) 967 make_ide_entries(ubd_gendisk[n]->disk_name); 968 969 err = 0; 970out: 971 return err; 972 973out_cleanup_tags: 974 blk_mq_free_tag_set(&ubd_dev->tag_set); 975 if (!(IS_ERR(ubd_dev->queue))) 976 blk_cleanup_queue(ubd_dev->queue); 977 goto out; 978} 979 980static int ubd_config(char *str, char **error_out) 981{ 982 int n, ret; 983 984 /* This string is possibly broken up and stored, so it's only 985 * freed if ubd_setup_common fails, or if only general options 986 * were set. 987 */ 988 str = kstrdup(str, GFP_KERNEL); 989 if (str == NULL) { 990 *error_out = "Failed to allocate memory"; 991 return -ENOMEM; 992 } 993 994 ret = ubd_setup_common(str, &n, error_out); 995 if (ret) 996 goto err_free; 997 998 if (n == -1) { 999 ret = 0; 1000 goto err_free; 1001 } 1002 1003 mutex_lock(&ubd_lock); 1004 ret = ubd_add(n, error_out); 1005 if (ret) 1006 ubd_devs[n].file = NULL; 1007 mutex_unlock(&ubd_lock); 1008 1009out: 1010 return ret; 1011 1012err_free: 1013 kfree(str); 1014 goto out; 1015} 1016 1017static int ubd_get_config(char *name, char *str, int size, char **error_out) 1018{ 1019 struct ubd *ubd_dev; 1020 int n, len = 0; 1021 1022 n = parse_unit(&name); 1023 if((n >= MAX_DEV) || (n < 0)){ 1024 *error_out = "ubd_get_config : device number out of range"; 1025 return -1; 1026 } 1027 1028 ubd_dev = &ubd_devs[n]; 1029 mutex_lock(&ubd_lock); 1030 1031 if(ubd_dev->file == NULL){ 1032 CONFIG_CHUNK(str, size, len, "", 1); 1033 goto out; 1034 } 1035 1036 CONFIG_CHUNK(str, size, len, ubd_dev->file, 0); 1037 1038 if(ubd_dev->cow.file != NULL){ 1039 CONFIG_CHUNK(str, size, len, ",", 0); 1040 CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1); 1041 } 1042 else CONFIG_CHUNK(str, size, len, "", 1); 1043 1044 out: 1045 mutex_unlock(&ubd_lock); 1046 return len; 1047} 1048 1049static int ubd_id(char **str, int *start_out, int *end_out) 1050{ 1051 int n; 1052 1053 n = parse_unit(str); 1054 *start_out = 0; 1055 *end_out = MAX_DEV - 1; 1056 return n; 1057} 1058 1059static int ubd_remove(int n, char **error_out) 1060{ 1061 struct gendisk *disk = ubd_gendisk[n]; 1062 struct ubd *ubd_dev; 1063 int err = -ENODEV; 1064 1065 mutex_lock(&ubd_lock); 1066 1067 ubd_dev = &ubd_devs[n]; 1068 1069 if(ubd_dev->file == NULL) 1070 goto out; 1071 1072 /* you cannot remove a open disk */ 1073 err = -EBUSY; 1074 if(ubd_dev->count > 0) 1075 goto out; 1076 1077 ubd_gendisk[n] = NULL; 1078 if(disk != NULL){ 1079 del_gendisk(disk); 1080 put_disk(disk); 1081 } 1082 1083 if(fake_gendisk[n] != NULL){ 1084 del_gendisk(fake_gendisk[n]); 1085 put_disk(fake_gendisk[n]); 1086 fake_gendisk[n] = NULL; 1087 } 1088 1089 err = 0; 1090 platform_device_unregister(&ubd_dev->pdev); 1091out: 1092 mutex_unlock(&ubd_lock); 1093 return err; 1094} 1095 1096/* All these are called by mconsole in process context and without 1097 * ubd-specific locks. The structure itself is const except for .list. 1098 */ 1099static struct mc_device ubd_mc = { 1100 .list = LIST_HEAD_INIT(ubd_mc.list), 1101 .name = "ubd", 1102 .config = ubd_config, 1103 .get_config = ubd_get_config, 1104 .id = ubd_id, 1105 .remove = ubd_remove, 1106}; 1107 1108static int __init ubd_mc_init(void) 1109{ 1110 mconsole_register_dev(&ubd_mc); 1111 return 0; 1112} 1113 1114__initcall(ubd_mc_init); 1115 1116static int __init ubd0_init(void) 1117{ 1118 struct ubd *ubd_dev = &ubd_devs[0]; 1119 1120 mutex_lock(&ubd_lock); 1121 if(ubd_dev->file == NULL) 1122 ubd_dev->file = "root_fs"; 1123 mutex_unlock(&ubd_lock); 1124 1125 return 0; 1126} 1127 1128__initcall(ubd0_init); 1129 1130/* Used in ubd_init, which is an initcall */ 1131static struct platform_driver ubd_driver = { 1132 .driver = { 1133 .name = DRIVER_NAME, 1134 }, 1135}; 1136 1137static int __init ubd_init(void) 1138{ 1139 char *error; 1140 int i, err; 1141 1142 if (register_blkdev(UBD_MAJOR, "ubd")) 1143 return -1; 1144 1145 if (fake_major != UBD_MAJOR) { 1146 char name[sizeof("ubd_nnn\0")]; 1147 1148 snprintf(name, sizeof(name), "ubd_%d", fake_major); 1149 if (register_blkdev(fake_major, "ubd")) 1150 return -1; 1151 } 1152 1153 irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, 1154 sizeof(struct io_thread_req *), 1155 GFP_KERNEL 1156 ); 1157 irq_remainder = 0; 1158 1159 if (irq_req_buffer == NULL) { 1160 printk(KERN_ERR "Failed to initialize ubd buffering\n"); 1161 return -1; 1162 } 1163 io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE, 1164 sizeof(struct io_thread_req *), 1165 GFP_KERNEL 1166 ); 1167 1168 io_remainder = 0; 1169 1170 if (io_req_buffer == NULL) { 1171 printk(KERN_ERR "Failed to initialize ubd buffering\n"); 1172 return -1; 1173 } 1174 platform_driver_register(&ubd_driver); 1175 mutex_lock(&ubd_lock); 1176 for (i = 0; i < MAX_DEV; i++){ 1177 err = ubd_add(i, &error); 1178 if(err) 1179 printk(KERN_ERR "Failed to initialize ubd device %d :" 1180 "%s\n", i, error); 1181 } 1182 mutex_unlock(&ubd_lock); 1183 return 0; 1184} 1185 1186late_initcall(ubd_init); 1187 1188static int __init ubd_driver_init(void){ 1189 unsigned long stack; 1190 int err; 1191 1192 /* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/ 1193 if(global_openflags.s){ 1194 printk(KERN_INFO "ubd: Synchronous mode\n"); 1195 /* Letting ubd=sync be like using ubd#s= instead of ubd#= is 1196 * enough. So use anyway the io thread. */ 1197 } 1198 stack = alloc_stack(0, 0); 1199 io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), 1200 &thread_fd); 1201 if(io_pid < 0){ 1202 printk(KERN_ERR 1203 "ubd : Failed to start I/O thread (errno = %d) - " 1204 "falling back to synchronous I/O\n", -io_pid); 1205 io_pid = -1; 1206 return 0; 1207 } 1208 err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, 1209 0, "ubd", ubd_devs); 1210 if(err != 0) 1211 printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); 1212 return 0; 1213} 1214 1215device_initcall(ubd_driver_init); 1216 1217static int ubd_open(struct block_device *bdev, fmode_t mode) 1218{ 1219 struct gendisk *disk = bdev->bd_disk; 1220 struct ubd *ubd_dev = disk->private_data; 1221 int err = 0; 1222 1223 mutex_lock(&ubd_mutex); 1224 if(ubd_dev->count == 0){ 1225 err = ubd_open_dev(ubd_dev); 1226 if(err){ 1227 printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", 1228 disk->disk_name, ubd_dev->file, -err); 1229 goto out; 1230 } 1231 } 1232 ubd_dev->count++; 1233 set_disk_ro(disk, !ubd_dev->openflags.w); 1234 1235 /* This should no more be needed. And it didn't work anyway to exclude 1236 * read-write remounting of filesystems.*/ 1237 /*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){ 1238 if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev); 1239 err = -EROFS; 1240 }*/ 1241out: 1242 mutex_unlock(&ubd_mutex); 1243 return err; 1244} 1245 1246static void ubd_release(struct gendisk *disk, fmode_t mode) 1247{ 1248 struct ubd *ubd_dev = disk->private_data; 1249 1250 mutex_lock(&ubd_mutex); 1251 if(--ubd_dev->count == 0) 1252 ubd_close_dev(ubd_dev); 1253 mutex_unlock(&ubd_mutex); 1254} 1255 1256static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, 1257 __u64 *cow_offset, unsigned long *bitmap, 1258 __u64 bitmap_offset, unsigned long *bitmap_words, 1259 __u64 bitmap_len) 1260{ 1261 __u64 sector = io_offset >> SECTOR_SHIFT; 1262 int i, update_bitmap = 0; 1263 1264 for (i = 0; i < length >> SECTOR_SHIFT; i++) { 1265 if(cow_mask != NULL) 1266 ubd_set_bit(i, (unsigned char *) cow_mask); 1267 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1268 continue; 1269 1270 update_bitmap = 1; 1271 ubd_set_bit(sector + i, (unsigned char *) bitmap); 1272 } 1273 1274 if(!update_bitmap) 1275 return; 1276 1277 *cow_offset = sector / (sizeof(unsigned long) * 8); 1278 1279 /* This takes care of the case where we're exactly at the end of the 1280 * device, and *cow_offset + 1 is off the end. So, just back it up 1281 * by one word. Thanks to Lynn Kerby for the fix and James McMechan 1282 * for the original diagnosis. 1283 */ 1284 if (*cow_offset == (DIV_ROUND_UP(bitmap_len, 1285 sizeof(unsigned long)) - 1)) 1286 (*cow_offset)--; 1287 1288 bitmap_words[0] = bitmap[*cow_offset]; 1289 bitmap_words[1] = bitmap[*cow_offset + 1]; 1290 1291 *cow_offset *= sizeof(unsigned long); 1292 *cow_offset += bitmap_offset; 1293} 1294 1295static void cowify_req(struct io_thread_req *req, struct io_desc *segment, 1296 unsigned long offset, unsigned long *bitmap, 1297 __u64 bitmap_offset, __u64 bitmap_len) 1298{ 1299 __u64 sector = offset >> SECTOR_SHIFT; 1300 int i; 1301 1302 if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT) 1303 panic("Operation too long"); 1304 1305 if (req_op(req->req) == REQ_OP_READ) { 1306 for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) { 1307 if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) 1308 ubd_set_bit(i, (unsigned char *) 1309 &segment->sector_mask); 1310 } 1311 } else { 1312 cowify_bitmap(offset, segment->length, &segment->sector_mask, 1313 &segment->cow_offset, bitmap, bitmap_offset, 1314 segment->bitmap_words, bitmap_len); 1315 } 1316} 1317 1318static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req, 1319 struct request *req) 1320{ 1321 struct bio_vec bvec; 1322 struct req_iterator iter; 1323 int i = 0; 1324 unsigned long byte_offset = io_req->offset; 1325 int op = req_op(req); 1326 1327 if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) { 1328 io_req->io_desc[0].buffer = NULL; 1329 io_req->io_desc[0].length = blk_rq_bytes(req); 1330 } else { 1331 rq_for_each_segment(bvec, req, iter) { 1332 BUG_ON(i >= io_req->desc_cnt); 1333 1334 io_req->io_desc[i].buffer = 1335 page_address(bvec.bv_page) + bvec.bv_offset; 1336 io_req->io_desc[i].length = bvec.bv_len; 1337 i++; 1338 } 1339 } 1340 1341 if (dev->cow.file) { 1342 for (i = 0; i < io_req->desc_cnt; i++) { 1343 cowify_req(io_req, &io_req->io_desc[i], byte_offset, 1344 dev->cow.bitmap, dev->cow.bitmap_offset, 1345 dev->cow.bitmap_len); 1346 byte_offset += io_req->io_desc[i].length; 1347 } 1348 1349 } 1350} 1351 1352static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req, 1353 int desc_cnt) 1354{ 1355 struct io_thread_req *io_req; 1356 int i; 1357 1358 io_req = kmalloc(sizeof(*io_req) + 1359 (desc_cnt * sizeof(struct io_desc)), 1360 GFP_ATOMIC); 1361 if (!io_req) 1362 return NULL; 1363 1364 io_req->req = req; 1365 if (dev->cow.file) 1366 io_req->fds[0] = dev->cow.fd; 1367 else 1368 io_req->fds[0] = dev->fd; 1369 io_req->error = 0; 1370 io_req->sectorsize = SECTOR_SIZE; 1371 io_req->fds[1] = dev->fd; 1372 io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT; 1373 io_req->offsets[0] = 0; 1374 io_req->offsets[1] = dev->cow.data_offset; 1375 1376 for (i = 0 ; i < desc_cnt; i++) { 1377 io_req->io_desc[i].sector_mask = 0; 1378 io_req->io_desc[i].cow_offset = -1; 1379 } 1380 1381 return io_req; 1382} 1383 1384static int ubd_submit_request(struct ubd *dev, struct request *req) 1385{ 1386 int segs = 0; 1387 struct io_thread_req *io_req; 1388 int ret; 1389 int op = req_op(req); 1390 1391 if (op == REQ_OP_FLUSH) 1392 segs = 0; 1393 else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) 1394 segs = 1; 1395 else 1396 segs = blk_rq_nr_phys_segments(req); 1397 1398 io_req = ubd_alloc_req(dev, req, segs); 1399 if (!io_req) 1400 return -ENOMEM; 1401 1402 io_req->desc_cnt = segs; 1403 if (segs) 1404 ubd_map_req(dev, io_req, req); 1405 1406 ret = os_write_file(thread_fd, &io_req, sizeof(io_req)); 1407 if (ret != sizeof(io_req)) { 1408 if (ret != -EAGAIN) 1409 pr_err("write to io thread failed: %d\n", -ret); 1410 kfree(io_req); 1411 } 1412 return ret; 1413} 1414 1415static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx, 1416 const struct blk_mq_queue_data *bd) 1417{ 1418 struct ubd *ubd_dev = hctx->queue->queuedata; 1419 struct request *req = bd->rq; 1420 int ret = 0, res = BLK_STS_OK; 1421 1422 blk_mq_start_request(req); 1423 1424 spin_lock_irq(&ubd_dev->lock); 1425 1426 switch (req_op(req)) { 1427 case REQ_OP_FLUSH: 1428 case REQ_OP_READ: 1429 case REQ_OP_WRITE: 1430 case REQ_OP_DISCARD: 1431 case REQ_OP_WRITE_ZEROES: 1432 ret = ubd_submit_request(ubd_dev, req); 1433 break; 1434 default: 1435 WARN_ON_ONCE(1); 1436 res = BLK_STS_NOTSUPP; 1437 } 1438 1439 spin_unlock_irq(&ubd_dev->lock); 1440 1441 if (ret < 0) { 1442 if (ret == -ENOMEM) 1443 res = BLK_STS_RESOURCE; 1444 else 1445 res = BLK_STS_DEV_RESOURCE; 1446 } 1447 1448 return res; 1449} 1450 1451static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo) 1452{ 1453 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1454 1455 geo->heads = 128; 1456 geo->sectors = 32; 1457 geo->cylinders = ubd_dev->size / (128 * 32 * 512); 1458 return 0; 1459} 1460 1461static int ubd_ioctl(struct block_device *bdev, fmode_t mode, 1462 unsigned int cmd, unsigned long arg) 1463{ 1464 struct ubd *ubd_dev = bdev->bd_disk->private_data; 1465 u16 ubd_id[ATA_ID_WORDS]; 1466 1467 switch (cmd) { 1468 struct cdrom_volctrl volume; 1469 case HDIO_GET_IDENTITY: 1470 memset(&ubd_id, 0, ATA_ID_WORDS * 2); 1471 ubd_id[ATA_ID_CYLS] = ubd_dev->size / (128 * 32 * 512); 1472 ubd_id[ATA_ID_HEADS] = 128; 1473 ubd_id[ATA_ID_SECTORS] = 32; 1474 if(copy_to_user((char __user *) arg, (char *) &ubd_id, 1475 sizeof(ubd_id))) 1476 return -EFAULT; 1477 return 0; 1478 1479 case CDROMVOLREAD: 1480 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume))) 1481 return -EFAULT; 1482 volume.channel0 = 255; 1483 volume.channel1 = 255; 1484 volume.channel2 = 255; 1485 volume.channel3 = 255; 1486 if(copy_to_user((char __user *) arg, &volume, sizeof(volume))) 1487 return -EFAULT; 1488 return 0; 1489 } 1490 return -EINVAL; 1491} 1492 1493static int map_error(int error_code) 1494{ 1495 switch (error_code) { 1496 case 0: 1497 return BLK_STS_OK; 1498 case ENOSYS: 1499 case EOPNOTSUPP: 1500 return BLK_STS_NOTSUPP; 1501 case ENOSPC: 1502 return BLK_STS_NOSPC; 1503 } 1504 return BLK_STS_IOERR; 1505} 1506 1507/* 1508 * Everything from here onwards *IS NOT PART OF THE KERNEL* 1509 * 1510 * The following functions are part of UML hypervisor code. 1511 * All functions from here onwards are executed as a helper 1512 * thread and are not allowed to execute any kernel functions. 1513 * 1514 * Any communication must occur strictly via shared memory and IPC. 1515 * 1516 * Do not add printks, locks, kernel memory operations, etc - it 1517 * will result in unpredictable behaviour and/or crashes. 1518 */ 1519 1520static int update_bitmap(struct io_thread_req *req, struct io_desc *segment) 1521{ 1522 int n; 1523 1524 if (segment->cow_offset == -1) 1525 return map_error(0); 1526 1527 n = os_pwrite_file(req->fds[1], &segment->bitmap_words, 1528 sizeof(segment->bitmap_words), segment->cow_offset); 1529 if (n != sizeof(segment->bitmap_words)) 1530 return map_error(-n); 1531 1532 return map_error(0); 1533} 1534 1535static void do_io(struct io_thread_req *req, struct io_desc *desc) 1536{ 1537 char *buf = NULL; 1538 unsigned long len; 1539 int n, nsectors, start, end, bit; 1540 __u64 off; 1541 1542 /* FLUSH is really a special case, we cannot "case" it with others */ 1543 1544 if (req_op(req->req) == REQ_OP_FLUSH) { 1545 /* fds[0] is always either the rw image or our cow file */ 1546 req->error = map_error(-os_sync_file(req->fds[0])); 1547 return; 1548 } 1549 1550 nsectors = desc->length / req->sectorsize; 1551 start = 0; 1552 do { 1553 bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask); 1554 end = start; 1555 while((end < nsectors) && 1556 (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit)) 1557 end++; 1558 1559 off = req->offset + req->offsets[bit] + 1560 start * req->sectorsize; 1561 len = (end - start) * req->sectorsize; 1562 if (desc->buffer != NULL) 1563 buf = &desc->buffer[start * req->sectorsize]; 1564 1565 switch (req_op(req->req)) { 1566 case REQ_OP_READ: 1567 n = 0; 1568 do { 1569 buf = &buf[n]; 1570 len -= n; 1571 n = os_pread_file(req->fds[bit], buf, len, off); 1572 if (n < 0) { 1573 req->error = map_error(-n); 1574 return; 1575 } 1576 } while((n < len) && (n != 0)); 1577 if (n < len) memset(&buf[n], 0, len - n); 1578 break; 1579 case REQ_OP_WRITE: 1580 n = os_pwrite_file(req->fds[bit], buf, len, off); 1581 if(n != len){ 1582 req->error = map_error(-n); 1583 return; 1584 } 1585 break; 1586 case REQ_OP_DISCARD: 1587 case REQ_OP_WRITE_ZEROES: 1588 n = os_falloc_punch(req->fds[bit], off, len); 1589 if (n) { 1590 req->error = map_error(-n); 1591 return; 1592 } 1593 break; 1594 default: 1595 WARN_ON_ONCE(1); 1596 req->error = BLK_STS_NOTSUPP; 1597 return; 1598 } 1599 1600 start = end; 1601 } while(start < nsectors); 1602 1603 req->offset += len; 1604 req->error = update_bitmap(req, desc); 1605} 1606 1607/* Changed in start_io_thread, which is serialized by being called only 1608 * from ubd_init, which is an initcall. 1609 */ 1610int kernel_fd = -1; 1611 1612/* Only changed by the io thread. XXX: currently unused. */ 1613static int io_count = 0; 1614 1615int io_thread(void *arg) 1616{ 1617 int n, count, written, res; 1618 1619 os_fix_helper_signals(); 1620 1621 while(1){ 1622 n = bulk_req_safe_read( 1623 kernel_fd, 1624 io_req_buffer, 1625 &io_remainder, 1626 &io_remainder_size, 1627 UBD_REQ_BUFFER_SIZE 1628 ); 1629 if (n <= 0) { 1630 if (n == -EAGAIN) 1631 ubd_read_poll(-1); 1632 1633 continue; 1634 } 1635 1636 for (count = 0; count < n/sizeof(struct io_thread_req *); count++) { 1637 struct io_thread_req *req = (*io_req_buffer)[count]; 1638 int i; 1639 1640 io_count++; 1641 for (i = 0; !req->error && i < req->desc_cnt; i++) 1642 do_io(req, &(req->io_desc[i])); 1643 1644 } 1645 1646 written = 0; 1647 1648 do { 1649 res = os_write_file(kernel_fd, 1650 ((char *) io_req_buffer) + written, 1651 n - written); 1652 if (res >= 0) { 1653 written += res; 1654 } 1655 if (written < n) { 1656 ubd_write_poll(-1); 1657 } 1658 } while (written < n); 1659 } 1660 1661 return 0; 1662} 1663