1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (C) 2018 Cambridge Greys Ltd
4 * Copyright (C) 2015-2016 Anton Ivanov (aivanov@brocade.com)
5 * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
6 */
7
8/* 2001-09-28...2002-04-17
9 * Partition stuff by James_McMechan@hotmail.com
10 * old style ubd by setting UBD_SHIFT to 0
11 * 2002-09-27...2002-10-18 massive tinkering for 2.5
12 * partitions have changed in 2.5
13 * 2003-01-29 more tinkering for 2.5.59-1
14 * This should now address the sysfs problems and has
15 * the symlink for devfs to allow for booting with
16 * the common /dev/ubd/discX/... names rather than
17 * only /dev/ubdN/discN this version also has lots of
18 * clean ups preparing for ubd-many.
19 * James McMechan
20 */
21
22#define UBD_SHIFT 4
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/blkdev.h>
27#include <linux/blk-mq.h>
28#include <linux/ata.h>
29#include <linux/hdreg.h>
30#include <linux/cdrom.h>
31#include <linux/proc_fs.h>
32#include <linux/seq_file.h>
33#include <linux/ctype.h>
34#include <linux/slab.h>
35#include <linux/vmalloc.h>
36#include <linux/platform_device.h>
37#include <linux/scatterlist.h>
38#include <asm/tlbflush.h>
39#include <kern_util.h>
40#include "mconsole_kern.h"
41#include <init.h>
42#include <irq_kern.h>
43#include "ubd.h"
44#include <os.h>
45#include "cow.h"
46
47/* Max request size is determined by sector mask - 32K */
48#define UBD_MAX_REQUEST (8 * sizeof(long))
49
50struct io_desc {
51	char *buffer;
52	unsigned long length;
53	unsigned long sector_mask;
54	unsigned long long cow_offset;
55	unsigned long bitmap_words[2];
56};
57
58struct io_thread_req {
59	struct request *req;
60	int fds[2];
61	unsigned long offsets[2];
62	unsigned long long offset;
63	int sectorsize;
64	int error;
65
66	int desc_cnt;
67	/* io_desc has to be the last element of the struct */
68	struct io_desc io_desc[];
69};
70
71
72static struct io_thread_req * (*irq_req_buffer)[];
73static struct io_thread_req *irq_remainder;
74static int irq_remainder_size;
75
76static struct io_thread_req * (*io_req_buffer)[];
77static struct io_thread_req *io_remainder;
78static int io_remainder_size;
79
80
81
82static inline int ubd_test_bit(__u64 bit, unsigned char *data)
83{
84	__u64 n;
85	int bits, off;
86
87	bits = sizeof(data[0]) * 8;
88	n = bit / bits;
89	off = bit % bits;
90	return (data[n] & (1 << off)) != 0;
91}
92
93static inline void ubd_set_bit(__u64 bit, unsigned char *data)
94{
95	__u64 n;
96	int bits, off;
97
98	bits = sizeof(data[0]) * 8;
99	n = bit / bits;
100	off = bit % bits;
101	data[n] |= (1 << off);
102}
103/*End stuff from ubd_user.h*/
104
105#define DRIVER_NAME "uml-blkdev"
106
107static DEFINE_MUTEX(ubd_lock);
108static DEFINE_MUTEX(ubd_mutex); /* replaces BKL, might not be needed */
109
110static int ubd_open(struct block_device *bdev, fmode_t mode);
111static void ubd_release(struct gendisk *disk, fmode_t mode);
112static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
113		     unsigned int cmd, unsigned long arg);
114static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo);
115
116#define MAX_DEV (16)
117
118static const struct block_device_operations ubd_blops = {
119        .owner		= THIS_MODULE,
120        .open		= ubd_open,
121        .release	= ubd_release,
122        .ioctl		= ubd_ioctl,
123        .compat_ioctl	= blkdev_compat_ptr_ioctl,
124	.getgeo		= ubd_getgeo,
125};
126
127/* Protected by ubd_lock */
128static int fake_major = UBD_MAJOR;
129static struct gendisk *ubd_gendisk[MAX_DEV];
130static struct gendisk *fake_gendisk[MAX_DEV];
131
132#ifdef CONFIG_BLK_DEV_UBD_SYNC
133#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
134					 .cl = 1 })
135#else
136#define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
137					 .cl = 1 })
138#endif
139static struct openflags global_openflags = OPEN_FLAGS;
140
141struct cow {
142	/* backing file name */
143	char *file;
144	/* backing file fd */
145	int fd;
146	unsigned long *bitmap;
147	unsigned long bitmap_len;
148	int bitmap_offset;
149	int data_offset;
150};
151
152#define MAX_SG 64
153
154struct ubd {
155	/* name (and fd, below) of the file opened for writing, either the
156	 * backing or the cow file. */
157	char *file;
158	int count;
159	int fd;
160	__u64 size;
161	struct openflags boot_openflags;
162	struct openflags openflags;
163	unsigned shared:1;
164	unsigned no_cow:1;
165	unsigned no_trim:1;
166	struct cow cow;
167	struct platform_device pdev;
168	struct request_queue *queue;
169	struct blk_mq_tag_set tag_set;
170	spinlock_t lock;
171};
172
173#define DEFAULT_COW { \
174	.file =			NULL, \
175	.fd =			-1,	\
176	.bitmap =		NULL, \
177	.bitmap_offset =	0, \
178	.data_offset =		0, \
179}
180
181#define DEFAULT_UBD { \
182	.file = 		NULL, \
183	.count =		0, \
184	.fd =			-1, \
185	.size =			-1, \
186	.boot_openflags =	OPEN_FLAGS, \
187	.openflags =		OPEN_FLAGS, \
188	.no_cow =               0, \
189	.no_trim =		0, \
190	.shared =		0, \
191	.cow =			DEFAULT_COW, \
192	.lock =			__SPIN_LOCK_UNLOCKED(ubd_devs.lock), \
193}
194
195/* Protected by ubd_lock */
196static struct ubd ubd_devs[MAX_DEV] = { [0 ... MAX_DEV - 1] = DEFAULT_UBD };
197
198/* Only changed by fake_ide_setup which is a setup */
199static int fake_ide = 0;
200static struct proc_dir_entry *proc_ide_root = NULL;
201static struct proc_dir_entry *proc_ide = NULL;
202
203static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
204				 const struct blk_mq_queue_data *bd);
205
206static void make_proc_ide(void)
207{
208	proc_ide_root = proc_mkdir("ide", NULL);
209	proc_ide = proc_mkdir("ide0", proc_ide_root);
210}
211
212static int fake_ide_media_proc_show(struct seq_file *m, void *v)
213{
214	seq_puts(m, "disk\n");
215	return 0;
216}
217
218static void make_ide_entries(const char *dev_name)
219{
220	struct proc_dir_entry *dir, *ent;
221	char name[64];
222
223	if(proc_ide_root == NULL) make_proc_ide();
224
225	dir = proc_mkdir(dev_name, proc_ide);
226	if(!dir) return;
227
228	ent = proc_create_single("media", S_IRUGO, dir,
229			fake_ide_media_proc_show);
230	if(!ent) return;
231	snprintf(name, sizeof(name), "ide0/%s", dev_name);
232	proc_symlink(dev_name, proc_ide_root, name);
233}
234
235static int fake_ide_setup(char *str)
236{
237	fake_ide = 1;
238	return 1;
239}
240
241__setup("fake_ide", fake_ide_setup);
242
243__uml_help(fake_ide_setup,
244"fake_ide\n"
245"    Create ide0 entries that map onto ubd devices.\n\n"
246);
247
248static int parse_unit(char **ptr)
249{
250	char *str = *ptr, *end;
251	int n = -1;
252
253	if(isdigit(*str)) {
254		n = simple_strtoul(str, &end, 0);
255		if(end == str)
256			return -1;
257		*ptr = end;
258	}
259	else if (('a' <= *str) && (*str <= 'z')) {
260		n = *str - 'a';
261		str++;
262		*ptr = str;
263	}
264	return n;
265}
266
267/* If *index_out == -1 at exit, the passed option was a general one;
268 * otherwise, the str pointer is used (and owned) inside ubd_devs array, so it
269 * should not be freed on exit.
270 */
271static int ubd_setup_common(char *str, int *index_out, char **error_out)
272{
273	struct ubd *ubd_dev;
274	struct openflags flags = global_openflags;
275	char *backing_file;
276	int n, err = 0, i;
277
278	if(index_out) *index_out = -1;
279	n = *str;
280	if(n == '='){
281		char *end;
282		int major;
283
284		str++;
285		if(!strcmp(str, "sync")){
286			global_openflags = of_sync(global_openflags);
287			return err;
288		}
289
290		err = -EINVAL;
291		major = simple_strtoul(str, &end, 0);
292		if((*end != '\0') || (end == str)){
293			*error_out = "Didn't parse major number";
294			return err;
295		}
296
297		mutex_lock(&ubd_lock);
298		if (fake_major != UBD_MAJOR) {
299			*error_out = "Can't assign a fake major twice";
300			goto out1;
301		}
302
303		fake_major = major;
304
305		printk(KERN_INFO "Setting extra ubd major number to %d\n",
306		       major);
307		err = 0;
308	out1:
309		mutex_unlock(&ubd_lock);
310		return err;
311	}
312
313	n = parse_unit(&str);
314	if(n < 0){
315		*error_out = "Couldn't parse device number";
316		return -EINVAL;
317	}
318	if(n >= MAX_DEV){
319		*error_out = "Device number out of range";
320		return 1;
321	}
322
323	err = -EBUSY;
324	mutex_lock(&ubd_lock);
325
326	ubd_dev = &ubd_devs[n];
327	if(ubd_dev->file != NULL){
328		*error_out = "Device is already configured";
329		goto out;
330	}
331
332	if (index_out)
333		*index_out = n;
334
335	err = -EINVAL;
336	for (i = 0; i < sizeof("rscdt="); i++) {
337		switch (*str) {
338		case 'r':
339			flags.w = 0;
340			break;
341		case 's':
342			flags.s = 1;
343			break;
344		case 'd':
345			ubd_dev->no_cow = 1;
346			break;
347		case 'c':
348			ubd_dev->shared = 1;
349			break;
350		case 't':
351			ubd_dev->no_trim = 1;
352			break;
353		case '=':
354			str++;
355			goto break_loop;
356		default:
357			*error_out = "Expected '=' or flag letter "
358				"(r, s, c, t or d)";
359			goto out;
360		}
361		str++;
362	}
363
364	if (*str == '=')
365		*error_out = "Too many flags specified";
366	else
367		*error_out = "Missing '='";
368	goto out;
369
370break_loop:
371	backing_file = strchr(str, ',');
372
373	if (backing_file == NULL)
374		backing_file = strchr(str, ':');
375
376	if(backing_file != NULL){
377		if(ubd_dev->no_cow){
378			*error_out = "Can't specify both 'd' and a cow file";
379			goto out;
380		}
381		else {
382			*backing_file = '\0';
383			backing_file++;
384		}
385	}
386	err = 0;
387	ubd_dev->file = str;
388	ubd_dev->cow.file = backing_file;
389	ubd_dev->boot_openflags = flags;
390out:
391	mutex_unlock(&ubd_lock);
392	return err;
393}
394
395static int ubd_setup(char *str)
396{
397	char *error;
398	int err;
399
400	err = ubd_setup_common(str, NULL, &error);
401	if(err)
402		printk(KERN_ERR "Failed to initialize device with \"%s\" : "
403		       "%s\n", str, error);
404	return 1;
405}
406
407__setup("ubd", ubd_setup);
408__uml_help(ubd_setup,
409"ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
410"    This is used to associate a device with a file in the underlying\n"
411"    filesystem. When specifying two filenames, the first one is the\n"
412"    COW name and the second is the backing file name. As separator you can\n"
413"    use either a ':' or a ',': the first one allows writing things like;\n"
414"	ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
415"    while with a ',' the shell would not expand the 2nd '~'.\n"
416"    When using only one filename, UML will detect whether to treat it like\n"
417"    a COW file or a backing file. To override this detection, add the 'd'\n"
418"    flag:\n"
419"	ubd0d=BackingFile\n"
420"    Usually, there is a filesystem in the file, but \n"
421"    that's not required. Swap devices containing swap files can be\n"
422"    specified like this. Also, a file which doesn't contain a\n"
423"    filesystem can have its contents read in the virtual \n"
424"    machine by running 'dd' on the device. <n> must be in the range\n"
425"    0 to 7. Appending an 'r' to the number will cause that device\n"
426"    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
427"    an 's' will cause data to be written to disk on the host immediately.\n"
428"    'c' will cause the device to be treated as being shared between multiple\n"
429"    UMLs and file locking will be turned off - this is appropriate for a\n"
430"    cluster filesystem and inappropriate at almost all other times.\n\n"
431"    't' will disable trim/discard support on the device (enabled by default).\n\n"
432);
433
434static int udb_setup(char *str)
435{
436	printk("udb%s specified on command line is almost certainly a ubd -> "
437	       "udb TYPO\n", str);
438	return 1;
439}
440
441__setup("udb", udb_setup);
442__uml_help(udb_setup,
443"udb\n"
444"    This option is here solely to catch ubd -> udb typos, which can be\n"
445"    to impossible to catch visually unless you specifically look for\n"
446"    them.  The only result of any option starting with 'udb' is an error\n"
447"    in the boot output.\n\n"
448);
449
450/* Only changed by ubd_init, which is an initcall. */
451static int thread_fd = -1;
452
453/* Function to read several request pointers at a time
454* handling fractional reads if (and as) needed
455*/
456
457static int bulk_req_safe_read(
458	int fd,
459	struct io_thread_req * (*request_buffer)[],
460	struct io_thread_req **remainder,
461	int *remainder_size,
462	int max_recs
463	)
464{
465	int n = 0;
466	int res = 0;
467
468	if (*remainder_size > 0) {
469		memmove(
470			(char *) request_buffer,
471			(char *) remainder, *remainder_size
472		);
473		n = *remainder_size;
474	}
475
476	res = os_read_file(
477			fd,
478			((char *) request_buffer) + *remainder_size,
479			sizeof(struct io_thread_req *)*max_recs
480				- *remainder_size
481		);
482	if (res > 0) {
483		n += res;
484		if ((n % sizeof(struct io_thread_req *)) > 0) {
485			/*
486			* Read somehow returned not a multiple of dword
487			* theoretically possible, but never observed in the
488			* wild, so read routine must be able to handle it
489			*/
490			*remainder_size = n % sizeof(struct io_thread_req *);
491			WARN(*remainder_size > 0, "UBD IPC read returned a partial result");
492			memmove(
493				remainder,
494				((char *) request_buffer) +
495					(n/sizeof(struct io_thread_req *))*sizeof(struct io_thread_req *),
496				*remainder_size
497			);
498			n = n - *remainder_size;
499		}
500	} else {
501		n = res;
502	}
503	return n;
504}
505
506/* Called without dev->lock held, and only in interrupt context. */
507static void ubd_handler(void)
508{
509	int n;
510	int count;
511
512	while(1){
513		n = bulk_req_safe_read(
514			thread_fd,
515			irq_req_buffer,
516			&irq_remainder,
517			&irq_remainder_size,
518			UBD_REQ_BUFFER_SIZE
519		);
520		if (n < 0) {
521			if(n == -EAGAIN)
522				break;
523			printk(KERN_ERR "spurious interrupt in ubd_handler, "
524			       "err = %d\n", -n);
525			return;
526		}
527		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
528			struct io_thread_req *io_req = (*irq_req_buffer)[count];
529
530			if ((io_req->error == BLK_STS_NOTSUPP) && (req_op(io_req->req) == REQ_OP_DISCARD)) {
531				blk_queue_max_discard_sectors(io_req->req->q, 0);
532				blk_queue_max_write_zeroes_sectors(io_req->req->q, 0);
533				blk_queue_flag_clear(QUEUE_FLAG_DISCARD, io_req->req->q);
534			}
535			blk_mq_end_request(io_req->req, io_req->error);
536			kfree(io_req);
537		}
538	}
539}
540
541static irqreturn_t ubd_intr(int irq, void *dev)
542{
543	ubd_handler();
544	return IRQ_HANDLED;
545}
546
547/* Only changed by ubd_init, which is an initcall. */
548static int io_pid = -1;
549
550static void kill_io_thread(void)
551{
552	if(io_pid != -1)
553		os_kill_process(io_pid, 1);
554}
555
556__uml_exitcall(kill_io_thread);
557
558static inline int ubd_file_size(struct ubd *ubd_dev, __u64 *size_out)
559{
560	char *file;
561	int fd;
562	int err;
563
564	__u32 version;
565	__u32 align;
566	char *backing_file;
567	time64_t mtime;
568	unsigned long long size;
569	int sector_size;
570	int bitmap_offset;
571
572	if (ubd_dev->file && ubd_dev->cow.file) {
573		file = ubd_dev->cow.file;
574
575		goto out;
576	}
577
578	fd = os_open_file(ubd_dev->file, of_read(OPENFLAGS()), 0);
579	if (fd < 0)
580		return fd;
581
582	err = read_cow_header(file_reader, &fd, &version, &backing_file, \
583		&mtime, &size, &sector_size, &align, &bitmap_offset);
584	os_close_file(fd);
585
586	if(err == -EINVAL)
587		file = ubd_dev->file;
588	else
589		file = backing_file;
590
591out:
592	return os_file_size(file, size_out);
593}
594
595static int read_cow_bitmap(int fd, void *buf, int offset, int len)
596{
597	int err;
598
599	err = os_pread_file(fd, buf, len, offset);
600	if (err < 0)
601		return err;
602
603	return 0;
604}
605
606static int backing_file_mismatch(char *file, __u64 size, time64_t mtime)
607{
608	time64_t modtime;
609	unsigned long long actual;
610	int err;
611
612	err = os_file_modtime(file, &modtime);
613	if (err < 0) {
614		printk(KERN_ERR "Failed to get modification time of backing "
615		       "file \"%s\", err = %d\n", file, -err);
616		return err;
617	}
618
619	err = os_file_size(file, &actual);
620	if (err < 0) {
621		printk(KERN_ERR "Failed to get size of backing file \"%s\", "
622		       "err = %d\n", file, -err);
623		return err;
624	}
625
626	if (actual != size) {
627		/*__u64 can be a long on AMD64 and with %lu GCC complains; so
628		 * the typecast.*/
629		printk(KERN_ERR "Size mismatch (%llu vs %llu) of COW header "
630		       "vs backing file\n", (unsigned long long) size, actual);
631		return -EINVAL;
632	}
633	if (modtime != mtime) {
634		printk(KERN_ERR "mtime mismatch (%lld vs %lld) of COW header vs "
635		       "backing file\n", mtime, modtime);
636		return -EINVAL;
637	}
638	return 0;
639}
640
641static int path_requires_switch(char *from_cmdline, char *from_cow, char *cow)
642{
643	struct uml_stat buf1, buf2;
644	int err;
645
646	if (from_cmdline == NULL)
647		return 0;
648	if (!strcmp(from_cmdline, from_cow))
649		return 0;
650
651	err = os_stat_file(from_cmdline, &buf1);
652	if (err < 0) {
653		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cmdline,
654		       -err);
655		return 0;
656	}
657	err = os_stat_file(from_cow, &buf2);
658	if (err < 0) {
659		printk(KERN_ERR "Couldn't stat '%s', err = %d\n", from_cow,
660		       -err);
661		return 1;
662	}
663	if ((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
664		return 0;
665
666	printk(KERN_ERR "Backing file mismatch - \"%s\" requested, "
667	       "\"%s\" specified in COW header of \"%s\"\n",
668	       from_cmdline, from_cow, cow);
669	return 1;
670}
671
672static int open_ubd_file(char *file, struct openflags *openflags, int shared,
673		  char **backing_file_out, int *bitmap_offset_out,
674		  unsigned long *bitmap_len_out, int *data_offset_out,
675		  int *create_cow_out)
676{
677	time64_t mtime;
678	unsigned long long size;
679	__u32 version, align;
680	char *backing_file;
681	int fd, err, sectorsize, asked_switch, mode = 0644;
682
683	fd = os_open_file(file, *openflags, mode);
684	if (fd < 0) {
685		if ((fd == -ENOENT) && (create_cow_out != NULL))
686			*create_cow_out = 1;
687		if (!openflags->w ||
688		    ((fd != -EROFS) && (fd != -EACCES)))
689			return fd;
690		openflags->w = 0;
691		fd = os_open_file(file, *openflags, mode);
692		if (fd < 0)
693			return fd;
694	}
695
696	if (shared)
697		printk(KERN_INFO "Not locking \"%s\" on the host\n", file);
698	else {
699		err = os_lock_file(fd, openflags->w);
700		if (err < 0) {
701			printk(KERN_ERR "Failed to lock '%s', err = %d\n",
702			       file, -err);
703			goto out_close;
704		}
705	}
706
707	/* Successful return case! */
708	if (backing_file_out == NULL)
709		return fd;
710
711	err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
712			      &size, &sectorsize, &align, bitmap_offset_out);
713	if (err && (*backing_file_out != NULL)) {
714		printk(KERN_ERR "Failed to read COW header from COW file "
715		       "\"%s\", errno = %d\n", file, -err);
716		goto out_close;
717	}
718	if (err)
719		return fd;
720
721	asked_switch = path_requires_switch(*backing_file_out, backing_file,
722					    file);
723
724	/* Allow switching only if no mismatch. */
725	if (asked_switch && !backing_file_mismatch(*backing_file_out, size,
726						   mtime)) {
727		printk(KERN_ERR "Switching backing file to '%s'\n",
728		       *backing_file_out);
729		err = write_cow_header(file, fd, *backing_file_out,
730				       sectorsize, align, &size);
731		if (err) {
732			printk(KERN_ERR "Switch failed, errno = %d\n", -err);
733			goto out_close;
734		}
735	} else {
736		*backing_file_out = backing_file;
737		err = backing_file_mismatch(*backing_file_out, size, mtime);
738		if (err)
739			goto out_close;
740	}
741
742	cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
743		  bitmap_len_out, data_offset_out);
744
745	return fd;
746 out_close:
747	os_close_file(fd);
748	return err;
749}
750
751static int create_cow_file(char *cow_file, char *backing_file,
752		    struct openflags flags,
753		    int sectorsize, int alignment, int *bitmap_offset_out,
754		    unsigned long *bitmap_len_out, int *data_offset_out)
755{
756	int err, fd;
757
758	flags.c = 1;
759	fd = open_ubd_file(cow_file, &flags, 0, NULL, NULL, NULL, NULL, NULL);
760	if (fd < 0) {
761		err = fd;
762		printk(KERN_ERR "Open of COW file '%s' failed, errno = %d\n",
763		       cow_file, -err);
764		goto out;
765	}
766
767	err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
768			    bitmap_offset_out, bitmap_len_out,
769			    data_offset_out);
770	if (!err)
771		return fd;
772	os_close_file(fd);
773 out:
774	return err;
775}
776
777static void ubd_close_dev(struct ubd *ubd_dev)
778{
779	os_close_file(ubd_dev->fd);
780	if(ubd_dev->cow.file == NULL)
781		return;
782
783	os_close_file(ubd_dev->cow.fd);
784	vfree(ubd_dev->cow.bitmap);
785	ubd_dev->cow.bitmap = NULL;
786}
787
788static int ubd_open_dev(struct ubd *ubd_dev)
789{
790	struct openflags flags;
791	char **back_ptr;
792	int err, create_cow, *create_ptr;
793	int fd;
794
795	ubd_dev->openflags = ubd_dev->boot_openflags;
796	create_cow = 0;
797	create_ptr = (ubd_dev->cow.file != NULL) ? &create_cow : NULL;
798	back_ptr = ubd_dev->no_cow ? NULL : &ubd_dev->cow.file;
799
800	fd = open_ubd_file(ubd_dev->file, &ubd_dev->openflags, ubd_dev->shared,
801				back_ptr, &ubd_dev->cow.bitmap_offset,
802				&ubd_dev->cow.bitmap_len, &ubd_dev->cow.data_offset,
803				create_ptr);
804
805	if((fd == -ENOENT) && create_cow){
806		fd = create_cow_file(ubd_dev->file, ubd_dev->cow.file,
807					  ubd_dev->openflags, SECTOR_SIZE, PAGE_SIZE,
808					  &ubd_dev->cow.bitmap_offset,
809					  &ubd_dev->cow.bitmap_len,
810					  &ubd_dev->cow.data_offset);
811		if(fd >= 0){
812			printk(KERN_INFO "Creating \"%s\" as COW file for "
813			       "\"%s\"\n", ubd_dev->file, ubd_dev->cow.file);
814		}
815	}
816
817	if(fd < 0){
818		printk("Failed to open '%s', errno = %d\n", ubd_dev->file,
819		       -fd);
820		return fd;
821	}
822	ubd_dev->fd = fd;
823
824	if(ubd_dev->cow.file != NULL){
825		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
826
827		err = -ENOMEM;
828		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
829		if(ubd_dev->cow.bitmap == NULL){
830			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
831			goto error;
832		}
833		flush_tlb_kernel_vm();
834
835		err = read_cow_bitmap(ubd_dev->fd, ubd_dev->cow.bitmap,
836				      ubd_dev->cow.bitmap_offset,
837				      ubd_dev->cow.bitmap_len);
838		if(err < 0)
839			goto error;
840
841		flags = ubd_dev->openflags;
842		flags.w = 0;
843		err = open_ubd_file(ubd_dev->cow.file, &flags, ubd_dev->shared, NULL,
844				    NULL, NULL, NULL, NULL);
845		if(err < 0) goto error;
846		ubd_dev->cow.fd = err;
847	}
848	if (ubd_dev->no_trim == 0) {
849		ubd_dev->queue->limits.discard_granularity = SECTOR_SIZE;
850		ubd_dev->queue->limits.discard_alignment = SECTOR_SIZE;
851		blk_queue_max_discard_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
852		blk_queue_max_write_zeroes_sectors(ubd_dev->queue, UBD_MAX_REQUEST);
853		blk_queue_flag_set(QUEUE_FLAG_DISCARD, ubd_dev->queue);
854	}
855	blk_queue_flag_set(QUEUE_FLAG_NONROT, ubd_dev->queue);
856	return 0;
857 error:
858	os_close_file(ubd_dev->fd);
859	return err;
860}
861
862static void ubd_device_release(struct device *dev)
863{
864	struct ubd *ubd_dev = dev_get_drvdata(dev);
865
866	blk_cleanup_queue(ubd_dev->queue);
867	blk_mq_free_tag_set(&ubd_dev->tag_set);
868	*ubd_dev = ((struct ubd) DEFAULT_UBD);
869}
870
871static int ubd_disk_register(int major, u64 size, int unit,
872			     struct gendisk **disk_out)
873{
874	struct device *parent = NULL;
875	struct gendisk *disk;
876
877	disk = alloc_disk(1 << UBD_SHIFT);
878	if(disk == NULL)
879		return -ENOMEM;
880
881	disk->major = major;
882	disk->first_minor = unit << UBD_SHIFT;
883	disk->fops = &ubd_blops;
884	set_capacity(disk, size / 512);
885	if (major == UBD_MAJOR)
886		sprintf(disk->disk_name, "ubd%c", 'a' + unit);
887	else
888		sprintf(disk->disk_name, "ubd_fake%d", unit);
889
890	/* sysfs register (not for ide fake devices) */
891	if (major == UBD_MAJOR) {
892		ubd_devs[unit].pdev.id   = unit;
893		ubd_devs[unit].pdev.name = DRIVER_NAME;
894		ubd_devs[unit].pdev.dev.release = ubd_device_release;
895		dev_set_drvdata(&ubd_devs[unit].pdev.dev, &ubd_devs[unit]);
896		platform_device_register(&ubd_devs[unit].pdev);
897		parent = &ubd_devs[unit].pdev.dev;
898	}
899
900	disk->private_data = &ubd_devs[unit];
901	disk->queue = ubd_devs[unit].queue;
902	device_add_disk(parent, disk, NULL);
903
904	*disk_out = disk;
905	return 0;
906}
907
908#define ROUND_BLOCK(n) ((n + (SECTOR_SIZE - 1)) & (-SECTOR_SIZE))
909
910static const struct blk_mq_ops ubd_mq_ops = {
911	.queue_rq = ubd_queue_rq,
912};
913
914static int ubd_add(int n, char **error_out)
915{
916	struct ubd *ubd_dev = &ubd_devs[n];
917	int err = 0;
918
919	if(ubd_dev->file == NULL)
920		goto out;
921
922	err = ubd_file_size(ubd_dev, &ubd_dev->size);
923	if(err < 0){
924		*error_out = "Couldn't determine size of device's file";
925		goto out;
926	}
927
928	ubd_dev->size = ROUND_BLOCK(ubd_dev->size);
929
930	ubd_dev->tag_set.ops = &ubd_mq_ops;
931	ubd_dev->tag_set.queue_depth = 64;
932	ubd_dev->tag_set.numa_node = NUMA_NO_NODE;
933	ubd_dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
934	ubd_dev->tag_set.driver_data = ubd_dev;
935	ubd_dev->tag_set.nr_hw_queues = 1;
936
937	err = blk_mq_alloc_tag_set(&ubd_dev->tag_set);
938	if (err)
939		goto out;
940
941	ubd_dev->queue = blk_mq_init_queue(&ubd_dev->tag_set);
942	if (IS_ERR(ubd_dev->queue)) {
943		err = PTR_ERR(ubd_dev->queue);
944		goto out_cleanup_tags;
945	}
946
947	ubd_dev->queue->queuedata = ubd_dev;
948	blk_queue_write_cache(ubd_dev->queue, true, false);
949
950	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
951	blk_queue_segment_boundary(ubd_dev->queue, PAGE_SIZE - 1);
952	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
953	if(err){
954		*error_out = "Failed to register device";
955		goto out_cleanup_tags;
956	}
957
958	if (fake_major != UBD_MAJOR)
959		ubd_disk_register(fake_major, ubd_dev->size, n,
960				  &fake_gendisk[n]);
961
962	/*
963	 * Perhaps this should also be under the "if (fake_major)" above
964	 * using the fake_disk->disk_name
965	 */
966	if (fake_ide)
967		make_ide_entries(ubd_gendisk[n]->disk_name);
968
969	err = 0;
970out:
971	return err;
972
973out_cleanup_tags:
974	blk_mq_free_tag_set(&ubd_dev->tag_set);
975	if (!(IS_ERR(ubd_dev->queue)))
976		blk_cleanup_queue(ubd_dev->queue);
977	goto out;
978}
979
980static int ubd_config(char *str, char **error_out)
981{
982	int n, ret;
983
984	/* This string is possibly broken up and stored, so it's only
985	 * freed if ubd_setup_common fails, or if only general options
986	 * were set.
987	 */
988	str = kstrdup(str, GFP_KERNEL);
989	if (str == NULL) {
990		*error_out = "Failed to allocate memory";
991		return -ENOMEM;
992	}
993
994	ret = ubd_setup_common(str, &n, error_out);
995	if (ret)
996		goto err_free;
997
998	if (n == -1) {
999		ret = 0;
1000		goto err_free;
1001	}
1002
1003	mutex_lock(&ubd_lock);
1004	ret = ubd_add(n, error_out);
1005	if (ret)
1006		ubd_devs[n].file = NULL;
1007	mutex_unlock(&ubd_lock);
1008
1009out:
1010	return ret;
1011
1012err_free:
1013	kfree(str);
1014	goto out;
1015}
1016
1017static int ubd_get_config(char *name, char *str, int size, char **error_out)
1018{
1019	struct ubd *ubd_dev;
1020	int n, len = 0;
1021
1022	n = parse_unit(&name);
1023	if((n >= MAX_DEV) || (n < 0)){
1024		*error_out = "ubd_get_config : device number out of range";
1025		return -1;
1026	}
1027
1028	ubd_dev = &ubd_devs[n];
1029	mutex_lock(&ubd_lock);
1030
1031	if(ubd_dev->file == NULL){
1032		CONFIG_CHUNK(str, size, len, "", 1);
1033		goto out;
1034	}
1035
1036	CONFIG_CHUNK(str, size, len, ubd_dev->file, 0);
1037
1038	if(ubd_dev->cow.file != NULL){
1039		CONFIG_CHUNK(str, size, len, ",", 0);
1040		CONFIG_CHUNK(str, size, len, ubd_dev->cow.file, 1);
1041	}
1042	else CONFIG_CHUNK(str, size, len, "", 1);
1043
1044 out:
1045	mutex_unlock(&ubd_lock);
1046	return len;
1047}
1048
1049static int ubd_id(char **str, int *start_out, int *end_out)
1050{
1051	int n;
1052
1053	n = parse_unit(str);
1054	*start_out = 0;
1055	*end_out = MAX_DEV - 1;
1056	return n;
1057}
1058
1059static int ubd_remove(int n, char **error_out)
1060{
1061	struct gendisk *disk = ubd_gendisk[n];
1062	struct ubd *ubd_dev;
1063	int err = -ENODEV;
1064
1065	mutex_lock(&ubd_lock);
1066
1067	ubd_dev = &ubd_devs[n];
1068
1069	if(ubd_dev->file == NULL)
1070		goto out;
1071
1072	/* you cannot remove a open disk */
1073	err = -EBUSY;
1074	if(ubd_dev->count > 0)
1075		goto out;
1076
1077	ubd_gendisk[n] = NULL;
1078	if(disk != NULL){
1079		del_gendisk(disk);
1080		put_disk(disk);
1081	}
1082
1083	if(fake_gendisk[n] != NULL){
1084		del_gendisk(fake_gendisk[n]);
1085		put_disk(fake_gendisk[n]);
1086		fake_gendisk[n] = NULL;
1087	}
1088
1089	err = 0;
1090	platform_device_unregister(&ubd_dev->pdev);
1091out:
1092	mutex_unlock(&ubd_lock);
1093	return err;
1094}
1095
1096/* All these are called by mconsole in process context and without
1097 * ubd-specific locks.  The structure itself is const except for .list.
1098 */
1099static struct mc_device ubd_mc = {
1100	.list		= LIST_HEAD_INIT(ubd_mc.list),
1101	.name		= "ubd",
1102	.config		= ubd_config,
1103	.get_config	= ubd_get_config,
1104	.id		= ubd_id,
1105	.remove		= ubd_remove,
1106};
1107
1108static int __init ubd_mc_init(void)
1109{
1110	mconsole_register_dev(&ubd_mc);
1111	return 0;
1112}
1113
1114__initcall(ubd_mc_init);
1115
1116static int __init ubd0_init(void)
1117{
1118	struct ubd *ubd_dev = &ubd_devs[0];
1119
1120	mutex_lock(&ubd_lock);
1121	if(ubd_dev->file == NULL)
1122		ubd_dev->file = "root_fs";
1123	mutex_unlock(&ubd_lock);
1124
1125	return 0;
1126}
1127
1128__initcall(ubd0_init);
1129
1130/* Used in ubd_init, which is an initcall */
1131static struct platform_driver ubd_driver = {
1132	.driver = {
1133		.name  = DRIVER_NAME,
1134	},
1135};
1136
1137static int __init ubd_init(void)
1138{
1139	char *error;
1140	int i, err;
1141
1142	if (register_blkdev(UBD_MAJOR, "ubd"))
1143		return -1;
1144
1145	if (fake_major != UBD_MAJOR) {
1146		char name[sizeof("ubd_nnn\0")];
1147
1148		snprintf(name, sizeof(name), "ubd_%d", fake_major);
1149		if (register_blkdev(fake_major, "ubd"))
1150			return -1;
1151	}
1152
1153	irq_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1154				       sizeof(struct io_thread_req *),
1155				       GFP_KERNEL
1156		);
1157	irq_remainder = 0;
1158
1159	if (irq_req_buffer == NULL) {
1160		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1161		return -1;
1162	}
1163	io_req_buffer = kmalloc_array(UBD_REQ_BUFFER_SIZE,
1164				      sizeof(struct io_thread_req *),
1165				      GFP_KERNEL
1166		);
1167
1168	io_remainder = 0;
1169
1170	if (io_req_buffer == NULL) {
1171		printk(KERN_ERR "Failed to initialize ubd buffering\n");
1172		return -1;
1173	}
1174	platform_driver_register(&ubd_driver);
1175	mutex_lock(&ubd_lock);
1176	for (i = 0; i < MAX_DEV; i++){
1177		err = ubd_add(i, &error);
1178		if(err)
1179			printk(KERN_ERR "Failed to initialize ubd device %d :"
1180			       "%s\n", i, error);
1181	}
1182	mutex_unlock(&ubd_lock);
1183	return 0;
1184}
1185
1186late_initcall(ubd_init);
1187
1188static int __init ubd_driver_init(void){
1189	unsigned long stack;
1190	int err;
1191
1192	/* Set by CONFIG_BLK_DEV_UBD_SYNC or ubd=sync.*/
1193	if(global_openflags.s){
1194		printk(KERN_INFO "ubd: Synchronous mode\n");
1195		/* Letting ubd=sync be like using ubd#s= instead of ubd#= is
1196		 * enough. So use anyway the io thread. */
1197	}
1198	stack = alloc_stack(0, 0);
1199	io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
1200				 &thread_fd);
1201	if(io_pid < 0){
1202		printk(KERN_ERR
1203		       "ubd : Failed to start I/O thread (errno = %d) - "
1204		       "falling back to synchronous I/O\n", -io_pid);
1205		io_pid = -1;
1206		return 0;
1207	}
1208	err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
1209			     0, "ubd", ubd_devs);
1210	if(err != 0)
1211		printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
1212	return 0;
1213}
1214
1215device_initcall(ubd_driver_init);
1216
1217static int ubd_open(struct block_device *bdev, fmode_t mode)
1218{
1219	struct gendisk *disk = bdev->bd_disk;
1220	struct ubd *ubd_dev = disk->private_data;
1221	int err = 0;
1222
1223	mutex_lock(&ubd_mutex);
1224	if(ubd_dev->count == 0){
1225		err = ubd_open_dev(ubd_dev);
1226		if(err){
1227			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
1228			       disk->disk_name, ubd_dev->file, -err);
1229			goto out;
1230		}
1231	}
1232	ubd_dev->count++;
1233	set_disk_ro(disk, !ubd_dev->openflags.w);
1234
1235	/* This should no more be needed. And it didn't work anyway to exclude
1236	 * read-write remounting of filesystems.*/
1237	/*if((mode & FMODE_WRITE) && !ubd_dev->openflags.w){
1238	        if(--ubd_dev->count == 0) ubd_close_dev(ubd_dev);
1239	        err = -EROFS;
1240	}*/
1241out:
1242	mutex_unlock(&ubd_mutex);
1243	return err;
1244}
1245
1246static void ubd_release(struct gendisk *disk, fmode_t mode)
1247{
1248	struct ubd *ubd_dev = disk->private_data;
1249
1250	mutex_lock(&ubd_mutex);
1251	if(--ubd_dev->count == 0)
1252		ubd_close_dev(ubd_dev);
1253	mutex_unlock(&ubd_mutex);
1254}
1255
1256static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
1257			  __u64 *cow_offset, unsigned long *bitmap,
1258			  __u64 bitmap_offset, unsigned long *bitmap_words,
1259			  __u64 bitmap_len)
1260{
1261	__u64 sector = io_offset >> SECTOR_SHIFT;
1262	int i, update_bitmap = 0;
1263
1264	for (i = 0; i < length >> SECTOR_SHIFT; i++) {
1265		if(cow_mask != NULL)
1266			ubd_set_bit(i, (unsigned char *) cow_mask);
1267		if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1268			continue;
1269
1270		update_bitmap = 1;
1271		ubd_set_bit(sector + i, (unsigned char *) bitmap);
1272	}
1273
1274	if(!update_bitmap)
1275		return;
1276
1277	*cow_offset = sector / (sizeof(unsigned long) * 8);
1278
1279	/* This takes care of the case where we're exactly at the end of the
1280	 * device, and *cow_offset + 1 is off the end.  So, just back it up
1281	 * by one word.  Thanks to Lynn Kerby for the fix and James McMechan
1282	 * for the original diagnosis.
1283	 */
1284	if (*cow_offset == (DIV_ROUND_UP(bitmap_len,
1285					 sizeof(unsigned long)) - 1))
1286		(*cow_offset)--;
1287
1288	bitmap_words[0] = bitmap[*cow_offset];
1289	bitmap_words[1] = bitmap[*cow_offset + 1];
1290
1291	*cow_offset *= sizeof(unsigned long);
1292	*cow_offset += bitmap_offset;
1293}
1294
1295static void cowify_req(struct io_thread_req *req, struct io_desc *segment,
1296		       unsigned long offset, unsigned long *bitmap,
1297		       __u64 bitmap_offset, __u64 bitmap_len)
1298{
1299	__u64 sector = offset >> SECTOR_SHIFT;
1300	int i;
1301
1302	if (segment->length > (sizeof(segment->sector_mask) * 8) << SECTOR_SHIFT)
1303		panic("Operation too long");
1304
1305	if (req_op(req->req) == REQ_OP_READ) {
1306		for (i = 0; i < segment->length >> SECTOR_SHIFT; i++) {
1307			if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
1308				ubd_set_bit(i, (unsigned char *)
1309					    &segment->sector_mask);
1310		}
1311	} else {
1312		cowify_bitmap(offset, segment->length, &segment->sector_mask,
1313			      &segment->cow_offset, bitmap, bitmap_offset,
1314			      segment->bitmap_words, bitmap_len);
1315	}
1316}
1317
1318static void ubd_map_req(struct ubd *dev, struct io_thread_req *io_req,
1319			struct request *req)
1320{
1321	struct bio_vec bvec;
1322	struct req_iterator iter;
1323	int i = 0;
1324	unsigned long byte_offset = io_req->offset;
1325	int op = req_op(req);
1326
1327	if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD) {
1328		io_req->io_desc[0].buffer = NULL;
1329		io_req->io_desc[0].length = blk_rq_bytes(req);
1330	} else {
1331		rq_for_each_segment(bvec, req, iter) {
1332			BUG_ON(i >= io_req->desc_cnt);
1333
1334			io_req->io_desc[i].buffer =
1335				page_address(bvec.bv_page) + bvec.bv_offset;
1336			io_req->io_desc[i].length = bvec.bv_len;
1337			i++;
1338		}
1339	}
1340
1341	if (dev->cow.file) {
1342		for (i = 0; i < io_req->desc_cnt; i++) {
1343			cowify_req(io_req, &io_req->io_desc[i], byte_offset,
1344				   dev->cow.bitmap, dev->cow.bitmap_offset,
1345				   dev->cow.bitmap_len);
1346			byte_offset += io_req->io_desc[i].length;
1347		}
1348
1349	}
1350}
1351
1352static struct io_thread_req *ubd_alloc_req(struct ubd *dev, struct request *req,
1353					   int desc_cnt)
1354{
1355	struct io_thread_req *io_req;
1356	int i;
1357
1358	io_req = kmalloc(sizeof(*io_req) +
1359			 (desc_cnt * sizeof(struct io_desc)),
1360			 GFP_ATOMIC);
1361	if (!io_req)
1362		return NULL;
1363
1364	io_req->req = req;
1365	if (dev->cow.file)
1366		io_req->fds[0] = dev->cow.fd;
1367	else
1368		io_req->fds[0] = dev->fd;
1369	io_req->error = 0;
1370	io_req->sectorsize = SECTOR_SIZE;
1371	io_req->fds[1] = dev->fd;
1372	io_req->offset = (u64) blk_rq_pos(req) << SECTOR_SHIFT;
1373	io_req->offsets[0] = 0;
1374	io_req->offsets[1] = dev->cow.data_offset;
1375
1376	for (i = 0 ; i < desc_cnt; i++) {
1377		io_req->io_desc[i].sector_mask = 0;
1378		io_req->io_desc[i].cow_offset = -1;
1379	}
1380
1381	return io_req;
1382}
1383
1384static int ubd_submit_request(struct ubd *dev, struct request *req)
1385{
1386	int segs = 0;
1387	struct io_thread_req *io_req;
1388	int ret;
1389	int op = req_op(req);
1390
1391	if (op == REQ_OP_FLUSH)
1392		segs = 0;
1393	else if (op == REQ_OP_WRITE_ZEROES || op == REQ_OP_DISCARD)
1394		segs = 1;
1395	else
1396		segs = blk_rq_nr_phys_segments(req);
1397
1398	io_req = ubd_alloc_req(dev, req, segs);
1399	if (!io_req)
1400		return -ENOMEM;
1401
1402	io_req->desc_cnt = segs;
1403	if (segs)
1404		ubd_map_req(dev, io_req, req);
1405
1406	ret = os_write_file(thread_fd, &io_req, sizeof(io_req));
1407	if (ret != sizeof(io_req)) {
1408		if (ret != -EAGAIN)
1409			pr_err("write to io thread failed: %d\n", -ret);
1410		kfree(io_req);
1411	}
1412	return ret;
1413}
1414
1415static blk_status_t ubd_queue_rq(struct blk_mq_hw_ctx *hctx,
1416				 const struct blk_mq_queue_data *bd)
1417{
1418	struct ubd *ubd_dev = hctx->queue->queuedata;
1419	struct request *req = bd->rq;
1420	int ret = 0, res = BLK_STS_OK;
1421
1422	blk_mq_start_request(req);
1423
1424	spin_lock_irq(&ubd_dev->lock);
1425
1426	switch (req_op(req)) {
1427	case REQ_OP_FLUSH:
1428	case REQ_OP_READ:
1429	case REQ_OP_WRITE:
1430	case REQ_OP_DISCARD:
1431	case REQ_OP_WRITE_ZEROES:
1432		ret = ubd_submit_request(ubd_dev, req);
1433		break;
1434	default:
1435		WARN_ON_ONCE(1);
1436		res = BLK_STS_NOTSUPP;
1437	}
1438
1439	spin_unlock_irq(&ubd_dev->lock);
1440
1441	if (ret < 0) {
1442		if (ret == -ENOMEM)
1443			res = BLK_STS_RESOURCE;
1444		else
1445			res = BLK_STS_DEV_RESOURCE;
1446	}
1447
1448	return res;
1449}
1450
1451static int ubd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
1452{
1453	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1454
1455	geo->heads = 128;
1456	geo->sectors = 32;
1457	geo->cylinders = ubd_dev->size / (128 * 32 * 512);
1458	return 0;
1459}
1460
1461static int ubd_ioctl(struct block_device *bdev, fmode_t mode,
1462		     unsigned int cmd, unsigned long arg)
1463{
1464	struct ubd *ubd_dev = bdev->bd_disk->private_data;
1465	u16 ubd_id[ATA_ID_WORDS];
1466
1467	switch (cmd) {
1468		struct cdrom_volctrl volume;
1469	case HDIO_GET_IDENTITY:
1470		memset(&ubd_id, 0, ATA_ID_WORDS * 2);
1471		ubd_id[ATA_ID_CYLS]	= ubd_dev->size / (128 * 32 * 512);
1472		ubd_id[ATA_ID_HEADS]	= 128;
1473		ubd_id[ATA_ID_SECTORS]	= 32;
1474		if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1475				 sizeof(ubd_id)))
1476			return -EFAULT;
1477		return 0;
1478
1479	case CDROMVOLREAD:
1480		if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1481			return -EFAULT;
1482		volume.channel0 = 255;
1483		volume.channel1 = 255;
1484		volume.channel2 = 255;
1485		volume.channel3 = 255;
1486		if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1487			return -EFAULT;
1488		return 0;
1489	}
1490	return -EINVAL;
1491}
1492
1493static int map_error(int error_code)
1494{
1495	switch (error_code) {
1496	case 0:
1497		return BLK_STS_OK;
1498	case ENOSYS:
1499	case EOPNOTSUPP:
1500		return BLK_STS_NOTSUPP;
1501	case ENOSPC:
1502		return BLK_STS_NOSPC;
1503	}
1504	return BLK_STS_IOERR;
1505}
1506
1507/*
1508 * Everything from here onwards *IS NOT PART OF THE KERNEL*
1509 *
1510 * The following functions are part of UML hypervisor code.
1511 * All functions from here onwards are executed as a helper
1512 * thread and are not allowed to execute any kernel functions.
1513 *
1514 * Any communication must occur strictly via shared memory and IPC.
1515 *
1516 * Do not add printks, locks, kernel memory operations, etc - it
1517 * will result in unpredictable behaviour and/or crashes.
1518 */
1519
1520static int update_bitmap(struct io_thread_req *req, struct io_desc *segment)
1521{
1522	int n;
1523
1524	if (segment->cow_offset == -1)
1525		return map_error(0);
1526
1527	n = os_pwrite_file(req->fds[1], &segment->bitmap_words,
1528			  sizeof(segment->bitmap_words), segment->cow_offset);
1529	if (n != sizeof(segment->bitmap_words))
1530		return map_error(-n);
1531
1532	return map_error(0);
1533}
1534
1535static void do_io(struct io_thread_req *req, struct io_desc *desc)
1536{
1537	char *buf = NULL;
1538	unsigned long len;
1539	int n, nsectors, start, end, bit;
1540	__u64 off;
1541
1542	/* FLUSH is really a special case, we cannot "case" it with others */
1543
1544	if (req_op(req->req) == REQ_OP_FLUSH) {
1545		/* fds[0] is always either the rw image or our cow file */
1546		req->error = map_error(-os_sync_file(req->fds[0]));
1547		return;
1548	}
1549
1550	nsectors = desc->length / req->sectorsize;
1551	start = 0;
1552	do {
1553		bit = ubd_test_bit(start, (unsigned char *) &desc->sector_mask);
1554		end = start;
1555		while((end < nsectors) &&
1556		      (ubd_test_bit(end, (unsigned char *) &desc->sector_mask) == bit))
1557			end++;
1558
1559		off = req->offset + req->offsets[bit] +
1560			start * req->sectorsize;
1561		len = (end - start) * req->sectorsize;
1562		if (desc->buffer != NULL)
1563			buf = &desc->buffer[start * req->sectorsize];
1564
1565		switch (req_op(req->req)) {
1566		case REQ_OP_READ:
1567			n = 0;
1568			do {
1569				buf = &buf[n];
1570				len -= n;
1571				n = os_pread_file(req->fds[bit], buf, len, off);
1572				if (n < 0) {
1573					req->error = map_error(-n);
1574					return;
1575				}
1576			} while((n < len) && (n != 0));
1577			if (n < len) memset(&buf[n], 0, len - n);
1578			break;
1579		case REQ_OP_WRITE:
1580			n = os_pwrite_file(req->fds[bit], buf, len, off);
1581			if(n != len){
1582				req->error = map_error(-n);
1583				return;
1584			}
1585			break;
1586		case REQ_OP_DISCARD:
1587		case REQ_OP_WRITE_ZEROES:
1588			n = os_falloc_punch(req->fds[bit], off, len);
1589			if (n) {
1590				req->error = map_error(-n);
1591				return;
1592			}
1593			break;
1594		default:
1595			WARN_ON_ONCE(1);
1596			req->error = BLK_STS_NOTSUPP;
1597			return;
1598		}
1599
1600		start = end;
1601	} while(start < nsectors);
1602
1603	req->offset += len;
1604	req->error = update_bitmap(req, desc);
1605}
1606
1607/* Changed in start_io_thread, which is serialized by being called only
1608 * from ubd_init, which is an initcall.
1609 */
1610int kernel_fd = -1;
1611
1612/* Only changed by the io thread. XXX: currently unused. */
1613static int io_count = 0;
1614
1615int io_thread(void *arg)
1616{
1617	int n, count, written, res;
1618
1619	os_fix_helper_signals();
1620
1621	while(1){
1622		n = bulk_req_safe_read(
1623			kernel_fd,
1624			io_req_buffer,
1625			&io_remainder,
1626			&io_remainder_size,
1627			UBD_REQ_BUFFER_SIZE
1628		);
1629		if (n <= 0) {
1630			if (n == -EAGAIN)
1631				ubd_read_poll(-1);
1632
1633			continue;
1634		}
1635
1636		for (count = 0; count < n/sizeof(struct io_thread_req *); count++) {
1637			struct io_thread_req *req = (*io_req_buffer)[count];
1638			int i;
1639
1640			io_count++;
1641			for (i = 0; !req->error && i < req->desc_cnt; i++)
1642				do_io(req, &(req->io_desc[i]));
1643
1644		}
1645
1646		written = 0;
1647
1648		do {
1649			res = os_write_file(kernel_fd,
1650					    ((char *) io_req_buffer) + written,
1651					    n - written);
1652			if (res >= 0) {
1653				written += res;
1654			}
1655			if (written < n) {
1656				ubd_write_poll(-1);
1657			}
1658		} while (written < n);
1659	}
1660
1661	return 0;
1662}
1663