1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Virtio vhost-user driver
4 *
5 * Copyright(c) 2019 Intel Corporation
6 *
7 * This driver allows virtio devices to be used over a vhost-user socket.
8 *
9 * Guest devices can be instantiated by kernel module or command line
10 * parameters. One device will be created for each parameter. Syntax:
11 *
12 *		virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]
13 * where:
14 *		<socket>	:= vhost-user socket path to connect
15 *		<virtio_id>	:= virtio device id (as in virtio_ids.h)
16 *		<platform_id>	:= (optional) platform device id
17 *
18 * example:
19 *		virtio_uml.device=/var/uml.socket:1
20 *
21 * Based on Virtio MMIO driver by Pawel Moll, copyright 2011-2014, ARM Ltd.
22 */
23#include <linux/module.h>
24#include <linux/platform_device.h>
25#include <linux/slab.h>
26#include <linux/virtio.h>
27#include <linux/virtio_config.h>
28#include <linux/virtio_ring.h>
29#include <linux/time-internal.h>
30#include <shared/as-layout.h>
31#include <irq_kern.h>
32#include <init.h>
33#include <os.h>
34#include "vhost_user.h"
35
36/* Workaround due to a conflict between irq_user.h and irqreturn.h */
37#ifdef IRQ_NONE
38#undef IRQ_NONE
39#endif
40
41#define MAX_SUPPORTED_QUEUE_SIZE	256
42
43#define to_virtio_uml_device(_vdev) \
44	container_of(_vdev, struct virtio_uml_device, vdev)
45
46struct virtio_uml_platform_data {
47	u32 virtio_device_id;
48	const char *socket_path;
49	struct work_struct conn_broken_wk;
50	struct platform_device *pdev;
51};
52
53struct virtio_uml_device {
54	struct virtio_device vdev;
55	struct platform_device *pdev;
56
57	spinlock_t sock_lock;
58	int sock, req_fd;
59	u64 features;
60	u64 protocol_features;
61	u8 status;
62	u8 registered:1;
63};
64
65struct virtio_uml_vq_info {
66	int kick_fd, call_fd;
67	char name[32];
68#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
69	struct virtqueue *vq;
70	vq_callback_t *callback;
71	struct time_travel_event defer;
72#endif
73};
74
75extern unsigned long long physmem_size, highmem;
76
77#define vu_err(vu_dev, ...)	dev_err(&(vu_dev)->pdev->dev, ##__VA_ARGS__)
78
79/* Vhost-user protocol */
80
81static int full_sendmsg_fds(int fd, const void *buf, unsigned int len,
82			    const int *fds, unsigned int fds_num)
83{
84	int rc;
85
86	do {
87		rc = os_sendmsg_fds(fd, buf, len, fds, fds_num);
88		if (rc > 0) {
89			buf += rc;
90			len -= rc;
91			fds = NULL;
92			fds_num = 0;
93		}
94	} while (len && (rc >= 0 || rc == -EINTR));
95
96	if (rc < 0)
97		return rc;
98	return 0;
99}
100
101static int full_read(int fd, void *buf, int len, bool abortable)
102{
103	int rc;
104
105	do {
106		rc = os_read_file(fd, buf, len);
107		if (rc > 0) {
108			buf += rc;
109			len -= rc;
110		}
111	} while (len && (rc > 0 || rc == -EINTR || (!abortable && rc == -EAGAIN)));
112
113	if (rc < 0)
114		return rc;
115	if (rc == 0)
116		return -ECONNRESET;
117	return 0;
118}
119
120static int vhost_user_recv_header(int fd, struct vhost_user_msg *msg)
121{
122	return full_read(fd, msg, sizeof(msg->header), true);
123}
124
125static int vhost_user_recv(struct virtio_uml_device *vu_dev,
126			   int fd, struct vhost_user_msg *msg,
127			   size_t max_payload_size, bool wait)
128{
129	size_t size;
130	int rc;
131
132	/*
133	 * In virtio time-travel mode, we're handling all the vhost-user
134	 * FDs by polling them whenever appropriate. However, we may get
135	 * into a situation where we're sending out an interrupt message
136	 * to a device (e.g. a net device) and need to handle a simulation
137	 * time message while doing so, e.g. one that tells us to update
138	 * our idea of how long we can run without scheduling.
139	 *
140	 * Thus, we need to not just read() from the given fd, but need
141	 * to also handle messages for the simulation time - this function
142	 * does that for us while waiting for the given fd to be readable.
143	 */
144	if (wait)
145		time_travel_wait_readable(fd);
146
147	rc = vhost_user_recv_header(fd, msg);
148
149	if (rc == -ECONNRESET && vu_dev->registered) {
150		struct virtio_uml_platform_data *pdata;
151
152		pdata = vu_dev->pdev->dev.platform_data;
153
154		virtio_break_device(&vu_dev->vdev);
155		schedule_work(&pdata->conn_broken_wk);
156	}
157	if (rc)
158		return rc;
159	size = msg->header.size;
160	if (size > max_payload_size)
161		return -EPROTO;
162	return full_read(fd, &msg->payload, size, false);
163}
164
165static int vhost_user_recv_resp(struct virtio_uml_device *vu_dev,
166				struct vhost_user_msg *msg,
167				size_t max_payload_size)
168{
169	int rc = vhost_user_recv(vu_dev, vu_dev->sock, msg,
170				 max_payload_size, true);
171
172	if (rc)
173		return rc;
174
175	if (msg->header.flags != (VHOST_USER_FLAG_REPLY | VHOST_USER_VERSION))
176		return -EPROTO;
177
178	return 0;
179}
180
181static int vhost_user_recv_u64(struct virtio_uml_device *vu_dev,
182			       u64 *value)
183{
184	struct vhost_user_msg msg;
185	int rc = vhost_user_recv_resp(vu_dev, &msg,
186				      sizeof(msg.payload.integer));
187
188	if (rc)
189		return rc;
190	if (msg.header.size != sizeof(msg.payload.integer))
191		return -EPROTO;
192	*value = msg.payload.integer;
193	return 0;
194}
195
196static int vhost_user_recv_req(struct virtio_uml_device *vu_dev,
197			       struct vhost_user_msg *msg,
198			       size_t max_payload_size)
199{
200	int rc = vhost_user_recv(vu_dev, vu_dev->req_fd, msg,
201				 max_payload_size, false);
202
203	if (rc)
204		return rc;
205
206	if ((msg->header.flags & ~VHOST_USER_FLAG_NEED_REPLY) !=
207			VHOST_USER_VERSION)
208		return -EPROTO;
209
210	return 0;
211}
212
213static int vhost_user_send(struct virtio_uml_device *vu_dev,
214			   bool need_response, struct vhost_user_msg *msg,
215			   int *fds, size_t num_fds)
216{
217	size_t size = sizeof(msg->header) + msg->header.size;
218	unsigned long flags;
219	bool request_ack;
220	int rc;
221
222	msg->header.flags |= VHOST_USER_VERSION;
223
224	/*
225	 * The need_response flag indicates that we already need a response,
226	 * e.g. to read the features. In these cases, don't request an ACK as
227	 * it is meaningless. Also request an ACK only if supported.
228	 */
229	request_ack = !need_response;
230	if (!(vu_dev->protocol_features &
231			BIT_ULL(VHOST_USER_PROTOCOL_F_REPLY_ACK)))
232		request_ack = false;
233
234	if (request_ack)
235		msg->header.flags |= VHOST_USER_FLAG_NEED_REPLY;
236
237	spin_lock_irqsave(&vu_dev->sock_lock, flags);
238	rc = full_sendmsg_fds(vu_dev->sock, msg, size, fds, num_fds);
239	if (rc < 0)
240		goto out;
241
242	if (request_ack) {
243		uint64_t status;
244
245		rc = vhost_user_recv_u64(vu_dev, &status);
246		if (rc)
247			goto out;
248
249		if (status) {
250			vu_err(vu_dev, "slave reports error: %llu\n", status);
251			rc = -EIO;
252			goto out;
253		}
254	}
255
256out:
257	spin_unlock_irqrestore(&vu_dev->sock_lock, flags);
258	return rc;
259}
260
261static int vhost_user_send_no_payload(struct virtio_uml_device *vu_dev,
262				      bool need_response, u32 request)
263{
264	struct vhost_user_msg msg = {
265		.header.request = request,
266	};
267
268	return vhost_user_send(vu_dev, need_response, &msg, NULL, 0);
269}
270
271static int vhost_user_send_no_payload_fd(struct virtio_uml_device *vu_dev,
272					 u32 request, int fd)
273{
274	struct vhost_user_msg msg = {
275		.header.request = request,
276	};
277
278	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
279}
280
281static int vhost_user_send_u64(struct virtio_uml_device *vu_dev,
282			       u32 request, u64 value)
283{
284	struct vhost_user_msg msg = {
285		.header.request = request,
286		.header.size = sizeof(msg.payload.integer),
287		.payload.integer = value,
288	};
289
290	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
291}
292
293static int vhost_user_set_owner(struct virtio_uml_device *vu_dev)
294{
295	return vhost_user_send_no_payload(vu_dev, false, VHOST_USER_SET_OWNER);
296}
297
298static int vhost_user_get_features(struct virtio_uml_device *vu_dev,
299				   u64 *features)
300{
301	int rc = vhost_user_send_no_payload(vu_dev, true,
302					    VHOST_USER_GET_FEATURES);
303
304	if (rc)
305		return rc;
306	return vhost_user_recv_u64(vu_dev, features);
307}
308
309static int vhost_user_set_features(struct virtio_uml_device *vu_dev,
310				   u64 features)
311{
312	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_FEATURES, features);
313}
314
315static int vhost_user_get_protocol_features(struct virtio_uml_device *vu_dev,
316					    u64 *protocol_features)
317{
318	int rc = vhost_user_send_no_payload(vu_dev, true,
319			VHOST_USER_GET_PROTOCOL_FEATURES);
320
321	if (rc)
322		return rc;
323	return vhost_user_recv_u64(vu_dev, protocol_features);
324}
325
326static int vhost_user_set_protocol_features(struct virtio_uml_device *vu_dev,
327					    u64 protocol_features)
328{
329	return vhost_user_send_u64(vu_dev, VHOST_USER_SET_PROTOCOL_FEATURES,
330				   protocol_features);
331}
332
333static void vhost_user_reply(struct virtio_uml_device *vu_dev,
334			     struct vhost_user_msg *msg, int response)
335{
336	struct vhost_user_msg reply = {
337		.payload.integer = response,
338	};
339	size_t size = sizeof(reply.header) + sizeof(reply.payload.integer);
340	int rc;
341
342	reply.header = msg->header;
343	reply.header.flags &= ~VHOST_USER_FLAG_NEED_REPLY;
344	reply.header.flags |= VHOST_USER_FLAG_REPLY;
345	reply.header.size = sizeof(reply.payload.integer);
346
347	rc = full_sendmsg_fds(vu_dev->req_fd, &reply, size, NULL, 0);
348
349	if (rc)
350		vu_err(vu_dev,
351		       "sending reply to slave request failed: %d (size %zu)\n",
352		       rc, size);
353}
354
355static irqreturn_t vu_req_interrupt(int irq, void *data)
356{
357	struct virtio_uml_device *vu_dev = data;
358	struct virtqueue *vq;
359	int response = 1;
360	struct {
361		struct vhost_user_msg msg;
362		u8 extra_payload[512];
363	} msg;
364	int rc;
365
366	rc = vhost_user_recv_req(vu_dev, &msg.msg,
367				 sizeof(msg.msg.payload) +
368				 sizeof(msg.extra_payload));
369
370	if (rc)
371		return IRQ_NONE;
372
373	switch (msg.msg.header.request) {
374	case VHOST_USER_SLAVE_CONFIG_CHANGE_MSG:
375		virtio_config_changed(&vu_dev->vdev);
376		response = 0;
377		break;
378	case VHOST_USER_SLAVE_VRING_CALL:
379		virtio_device_for_each_vq((&vu_dev->vdev), vq) {
380			if (vq->index == msg.msg.payload.vring_state.index) {
381				response = 0;
382				vring_interrupt(0 /* ignored */, vq);
383				break;
384			}
385		}
386		break;
387	case VHOST_USER_SLAVE_IOTLB_MSG:
388		/* not supported - VIRTIO_F_ACCESS_PLATFORM */
389	case VHOST_USER_SLAVE_VRING_HOST_NOTIFIER_MSG:
390		/* not supported - VHOST_USER_PROTOCOL_F_HOST_NOTIFIER */
391	default:
392		vu_err(vu_dev, "unexpected slave request %d\n",
393		       msg.msg.header.request);
394	}
395
396	if (msg.msg.header.flags & VHOST_USER_FLAG_NEED_REPLY)
397		vhost_user_reply(vu_dev, &msg.msg, response);
398
399	return IRQ_HANDLED;
400}
401
402static int vhost_user_init_slave_req(struct virtio_uml_device *vu_dev)
403{
404	int rc, req_fds[2];
405
406	/* Use a pipe for slave req fd, SIGIO is not supported for eventfd */
407	rc = os_pipe(req_fds, true, true);
408	if (rc < 0)
409		return rc;
410	vu_dev->req_fd = req_fds[0];
411
412	rc = um_request_irq(VIRTIO_IRQ, vu_dev->req_fd, IRQ_READ,
413			    vu_req_interrupt, IRQF_SHARED,
414			    vu_dev->pdev->name, vu_dev);
415	if (rc)
416		goto err_close;
417
418	rc = vhost_user_send_no_payload_fd(vu_dev, VHOST_USER_SET_SLAVE_REQ_FD,
419					   req_fds[1]);
420	if (rc)
421		goto err_free_irq;
422
423	goto out;
424
425err_free_irq:
426	um_free_irq(VIRTIO_IRQ, vu_dev);
427err_close:
428	os_close_file(req_fds[0]);
429out:
430	/* Close unused write end of request fds */
431	os_close_file(req_fds[1]);
432	return rc;
433}
434
435static int vhost_user_init(struct virtio_uml_device *vu_dev)
436{
437	int rc = vhost_user_set_owner(vu_dev);
438
439	if (rc)
440		return rc;
441	rc = vhost_user_get_features(vu_dev, &vu_dev->features);
442	if (rc)
443		return rc;
444
445	if (vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)) {
446		rc = vhost_user_get_protocol_features(vu_dev,
447				&vu_dev->protocol_features);
448		if (rc)
449			return rc;
450		vu_dev->protocol_features &= VHOST_USER_SUPPORTED_PROTOCOL_F;
451		rc = vhost_user_set_protocol_features(vu_dev,
452				vu_dev->protocol_features);
453		if (rc)
454			return rc;
455	}
456
457	if (vu_dev->protocol_features &
458			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
459		rc = vhost_user_init_slave_req(vu_dev);
460		if (rc)
461			return rc;
462	}
463
464	return 0;
465}
466
467static void vhost_user_get_config(struct virtio_uml_device *vu_dev,
468				  u32 offset, void *buf, u32 len)
469{
470	u32 cfg_size = offset + len;
471	struct vhost_user_msg *msg;
472	size_t payload_size = sizeof(msg->payload.config) + cfg_size;
473	size_t msg_size = sizeof(msg->header) + payload_size;
474	int rc;
475
476	if (!(vu_dev->protocol_features &
477	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
478		return;
479
480	msg = kzalloc(msg_size, GFP_KERNEL);
481	if (!msg)
482		return;
483	msg->header.request = VHOST_USER_GET_CONFIG;
484	msg->header.size = payload_size;
485	msg->payload.config.offset = 0;
486	msg->payload.config.size = cfg_size;
487
488	rc = vhost_user_send(vu_dev, true, msg, NULL, 0);
489	if (rc) {
490		vu_err(vu_dev, "sending VHOST_USER_GET_CONFIG failed: %d\n",
491		       rc);
492		goto free;
493	}
494
495	rc = vhost_user_recv_resp(vu_dev, msg, msg_size);
496	if (rc) {
497		vu_err(vu_dev,
498		       "receiving VHOST_USER_GET_CONFIG response failed: %d\n",
499		       rc);
500		goto free;
501	}
502
503	if (msg->header.size != payload_size ||
504	    msg->payload.config.size != cfg_size) {
505		rc = -EPROTO;
506		vu_err(vu_dev,
507		       "Invalid VHOST_USER_GET_CONFIG sizes (payload %d expected %zu, config %u expected %u)\n",
508		       msg->header.size, payload_size,
509		       msg->payload.config.size, cfg_size);
510		goto free;
511	}
512	memcpy(buf, msg->payload.config.payload + offset, len);
513
514free:
515	kfree(msg);
516}
517
518static void vhost_user_set_config(struct virtio_uml_device *vu_dev,
519				  u32 offset, const void *buf, u32 len)
520{
521	struct vhost_user_msg *msg;
522	size_t payload_size = sizeof(msg->payload.config) + len;
523	size_t msg_size = sizeof(msg->header) + payload_size;
524	int rc;
525
526	if (!(vu_dev->protocol_features &
527	      BIT_ULL(VHOST_USER_PROTOCOL_F_CONFIG)))
528		return;
529
530	msg = kzalloc(msg_size, GFP_KERNEL);
531	if (!msg)
532		return;
533	msg->header.request = VHOST_USER_SET_CONFIG;
534	msg->header.size = payload_size;
535	msg->payload.config.offset = offset;
536	msg->payload.config.size = len;
537	memcpy(msg->payload.config.payload, buf, len);
538
539	rc = vhost_user_send(vu_dev, false, msg, NULL, 0);
540	if (rc)
541		vu_err(vu_dev, "sending VHOST_USER_SET_CONFIG failed: %d\n",
542		       rc);
543
544	kfree(msg);
545}
546
547static int vhost_user_init_mem_region(u64 addr, u64 size, int *fd_out,
548				      struct vhost_user_mem_region *region_out)
549{
550	unsigned long long mem_offset;
551	int rc = phys_mapping(addr, &mem_offset);
552
553	if (WARN(rc < 0, "phys_mapping of 0x%llx returned %d\n", addr, rc))
554		return -EFAULT;
555	*fd_out = rc;
556	region_out->guest_addr = addr;
557	region_out->user_addr = addr;
558	region_out->size = size;
559	region_out->mmap_offset = mem_offset;
560
561	/* Ensure mapping is valid for the entire region */
562	rc = phys_mapping(addr + size - 1, &mem_offset);
563	if (WARN(rc != *fd_out, "phys_mapping of 0x%llx failed: %d != %d\n",
564		 addr + size - 1, rc, *fd_out))
565		return -EFAULT;
566	return 0;
567}
568
569static int vhost_user_set_mem_table(struct virtio_uml_device *vu_dev)
570{
571	struct vhost_user_msg msg = {
572		.header.request = VHOST_USER_SET_MEM_TABLE,
573		.header.size = sizeof(msg.payload.mem_regions),
574		.payload.mem_regions.num = 1,
575	};
576	unsigned long reserved = uml_reserved - uml_physmem;
577	int fds[2];
578	int rc;
579
580	/*
581	 * This is a bit tricky, see also the comment with setup_physmem().
582	 *
583	 * Essentially, setup_physmem() uses a file to mmap() our physmem,
584	 * but the code and data we *already* have is omitted. To us, this
585	 * is no difference, since they both become part of our address
586	 * space and memory consumption. To somebody looking in from the
587	 * outside, however, it is different because the part of our memory
588	 * consumption that's already part of the binary (code/data) is not
589	 * mapped from the file, so it's not visible to another mmap from
590	 * the file descriptor.
591	 *
592	 * Thus, don't advertise this space to the vhost-user slave. This
593	 * means that the slave will likely abort or similar when we give
594	 * it an address from the hidden range, since it's not marked as
595	 * a valid address, but at least that way we detect the issue and
596	 * don't just have the slave read an all-zeroes buffer from the
597	 * shared memory file, or write something there that we can never
598	 * see (depending on the direction of the virtqueue traffic.)
599	 *
600	 * Since we usually don't want to use .text for virtio buffers,
601	 * this effectively means that you cannot use
602	 *  1) global variables, which are in the .bss and not in the shm
603	 *     file-backed memory
604	 *  2) the stack in some processes, depending on where they have
605	 *     their stack (or maybe only no interrupt stack?)
606	 *
607	 * The stack is already not typically valid for DMA, so this isn't
608	 * much of a restriction, but global variables might be encountered.
609	 *
610	 * It might be possible to fix it by copying around the data that's
611	 * between bss_start and where we map the file now, but it's not
612	 * something that you typically encounter with virtio drivers, so
613	 * it didn't seem worthwhile.
614	 */
615	rc = vhost_user_init_mem_region(reserved, physmem_size - reserved,
616					&fds[0],
617					&msg.payload.mem_regions.regions[0]);
618
619	if (rc < 0)
620		return rc;
621	if (highmem) {
622		msg.payload.mem_regions.num++;
623		rc = vhost_user_init_mem_region(__pa(end_iomem), highmem,
624				&fds[1], &msg.payload.mem_regions.regions[1]);
625		if (rc < 0)
626			return rc;
627	}
628
629	return vhost_user_send(vu_dev, false, &msg, fds,
630			       msg.payload.mem_regions.num);
631}
632
633static int vhost_user_set_vring_state(struct virtio_uml_device *vu_dev,
634				      u32 request, u32 index, u32 num)
635{
636	struct vhost_user_msg msg = {
637		.header.request = request,
638		.header.size = sizeof(msg.payload.vring_state),
639		.payload.vring_state.index = index,
640		.payload.vring_state.num = num,
641	};
642
643	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
644}
645
646static int vhost_user_set_vring_num(struct virtio_uml_device *vu_dev,
647				    u32 index, u32 num)
648{
649	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_NUM,
650					  index, num);
651}
652
653static int vhost_user_set_vring_base(struct virtio_uml_device *vu_dev,
654				     u32 index, u32 offset)
655{
656	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_BASE,
657					  index, offset);
658}
659
660static int vhost_user_set_vring_addr(struct virtio_uml_device *vu_dev,
661				     u32 index, u64 desc, u64 used, u64 avail,
662				     u64 log)
663{
664	struct vhost_user_msg msg = {
665		.header.request = VHOST_USER_SET_VRING_ADDR,
666		.header.size = sizeof(msg.payload.vring_addr),
667		.payload.vring_addr.index = index,
668		.payload.vring_addr.desc = desc,
669		.payload.vring_addr.used = used,
670		.payload.vring_addr.avail = avail,
671		.payload.vring_addr.log = log,
672	};
673
674	return vhost_user_send(vu_dev, false, &msg, NULL, 0);
675}
676
677static int vhost_user_set_vring_fd(struct virtio_uml_device *vu_dev,
678				   u32 request, int index, int fd)
679{
680	struct vhost_user_msg msg = {
681		.header.request = request,
682		.header.size = sizeof(msg.payload.integer),
683		.payload.integer = index,
684	};
685
686	if (index & ~VHOST_USER_VRING_INDEX_MASK)
687		return -EINVAL;
688	if (fd < 0) {
689		msg.payload.integer |= VHOST_USER_VRING_POLL_MASK;
690		return vhost_user_send(vu_dev, false, &msg, NULL, 0);
691	}
692	return vhost_user_send(vu_dev, false, &msg, &fd, 1);
693}
694
695static int vhost_user_set_vring_call(struct virtio_uml_device *vu_dev,
696				     int index, int fd)
697{
698	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_CALL,
699				       index, fd);
700}
701
702static int vhost_user_set_vring_kick(struct virtio_uml_device *vu_dev,
703				     int index, int fd)
704{
705	return vhost_user_set_vring_fd(vu_dev, VHOST_USER_SET_VRING_KICK,
706				       index, fd);
707}
708
709static int vhost_user_set_vring_enable(struct virtio_uml_device *vu_dev,
710				       u32 index, bool enable)
711{
712	if (!(vu_dev->features & BIT_ULL(VHOST_USER_F_PROTOCOL_FEATURES)))
713		return 0;
714
715	return vhost_user_set_vring_state(vu_dev, VHOST_USER_SET_VRING_ENABLE,
716					  index, enable);
717}
718
719
720/* Virtio interface */
721
722static bool vu_notify(struct virtqueue *vq)
723{
724	struct virtio_uml_vq_info *info = vq->priv;
725	const uint64_t n = 1;
726	int rc;
727
728	time_travel_propagate_time();
729
730	if (info->kick_fd < 0) {
731		struct virtio_uml_device *vu_dev;
732
733		vu_dev = to_virtio_uml_device(vq->vdev);
734
735		return vhost_user_set_vring_state(vu_dev, VHOST_USER_VRING_KICK,
736						  vq->index, 0) == 0;
737	}
738
739	do {
740		rc = os_write_file(info->kick_fd, &n, sizeof(n));
741	} while (rc == -EINTR);
742	return !WARN(rc != sizeof(n), "write returned %d\n", rc);
743}
744
745static irqreturn_t vu_interrupt(int irq, void *opaque)
746{
747	struct virtqueue *vq = opaque;
748	struct virtio_uml_vq_info *info = vq->priv;
749	uint64_t n;
750	int rc;
751	irqreturn_t ret = IRQ_NONE;
752
753	do {
754		rc = os_read_file(info->call_fd, &n, sizeof(n));
755		if (rc == sizeof(n))
756			ret |= vring_interrupt(irq, vq);
757	} while (rc == sizeof(n) || rc == -EINTR);
758	WARN(rc != -EAGAIN, "read returned %d\n", rc);
759	return ret;
760}
761
762
763static void vu_get(struct virtio_device *vdev, unsigned offset,
764		   void *buf, unsigned len)
765{
766	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
767
768	vhost_user_get_config(vu_dev, offset, buf, len);
769}
770
771static void vu_set(struct virtio_device *vdev, unsigned offset,
772		   const void *buf, unsigned len)
773{
774	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
775
776	vhost_user_set_config(vu_dev, offset, buf, len);
777}
778
779static u8 vu_get_status(struct virtio_device *vdev)
780{
781	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
782
783	return vu_dev->status;
784}
785
786static void vu_set_status(struct virtio_device *vdev, u8 status)
787{
788	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
789
790	vu_dev->status = status;
791}
792
793static void vu_reset(struct virtio_device *vdev)
794{
795	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
796
797	vu_dev->status = 0;
798}
799
800static void vu_del_vq(struct virtqueue *vq)
801{
802	struct virtio_uml_vq_info *info = vq->priv;
803
804	if (info->call_fd >= 0) {
805		um_free_irq(VIRTIO_IRQ, vq);
806		os_close_file(info->call_fd);
807	}
808
809	if (info->kick_fd >= 0)
810		os_close_file(info->kick_fd);
811
812	vring_del_virtqueue(vq);
813	kfree(info);
814}
815
816static void vu_del_vqs(struct virtio_device *vdev)
817{
818	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
819	struct virtqueue *vq, *n;
820	u64 features;
821
822	/* Note: reverse order as a workaround to a decoding bug in snabb */
823	list_for_each_entry_reverse(vq, &vdev->vqs, list)
824		WARN_ON(vhost_user_set_vring_enable(vu_dev, vq->index, false));
825
826	/* Ensure previous messages have been processed */
827	WARN_ON(vhost_user_get_features(vu_dev, &features));
828
829	list_for_each_entry_safe(vq, n, &vdev->vqs, list)
830		vu_del_vq(vq);
831}
832
833static int vu_setup_vq_call_fd(struct virtio_uml_device *vu_dev,
834			       struct virtqueue *vq)
835{
836	struct virtio_uml_vq_info *info = vq->priv;
837	int call_fds[2];
838	int rc;
839
840	/* no call FD needed/desired in this case */
841	if (vu_dev->protocol_features &
842			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS) &&
843	    vu_dev->protocol_features &
844			BIT_ULL(VHOST_USER_PROTOCOL_F_SLAVE_REQ)) {
845		info->call_fd = -1;
846		return 0;
847	}
848
849	/* Use a pipe for call fd, since SIGIO is not supported for eventfd */
850	rc = os_pipe(call_fds, true, true);
851	if (rc < 0)
852		return rc;
853
854	info->call_fd = call_fds[0];
855	rc = um_request_irq(VIRTIO_IRQ, info->call_fd, IRQ_READ,
856			    vu_interrupt, IRQF_SHARED, info->name, vq);
857	if (rc)
858		goto close_both;
859
860	rc = vhost_user_set_vring_call(vu_dev, vq->index, call_fds[1]);
861	if (rc)
862		goto release_irq;
863
864	goto out;
865
866release_irq:
867	um_free_irq(VIRTIO_IRQ, vq);
868close_both:
869	os_close_file(call_fds[0]);
870out:
871	/* Close (unused) write end of call fds */
872	os_close_file(call_fds[1]);
873
874	return rc;
875}
876
877#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
878static void vu_defer_irq_handle(struct time_travel_event *d)
879{
880	struct virtio_uml_vq_info *info;
881
882	info = container_of(d, struct virtio_uml_vq_info, defer);
883	info->callback(info->vq);
884}
885
886static void vu_defer_irq_callback(struct virtqueue *vq)
887{
888	struct virtio_uml_vq_info *info = vq->priv;
889
890	time_travel_add_irq_event(&info->defer);
891}
892#endif
893
894static struct virtqueue *vu_setup_vq(struct virtio_device *vdev,
895				     unsigned index, vq_callback_t *callback,
896				     const char *name, bool ctx)
897{
898	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
899	struct platform_device *pdev = vu_dev->pdev;
900	struct virtio_uml_vq_info *info;
901	struct virtqueue *vq;
902	int num = MAX_SUPPORTED_QUEUE_SIZE;
903	int rc;
904
905	info = kzalloc(sizeof(*info), GFP_KERNEL);
906	if (!info) {
907		rc = -ENOMEM;
908		goto error_kzalloc;
909	}
910	snprintf(info->name, sizeof(info->name), "%s.%d-%s", pdev->name,
911		 pdev->id, name);
912
913#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
914	/*
915	 * When we get an interrupt, we must bounce it through the simulation
916	 * calendar (the simtime device), except for the simtime device itself
917	 * since that's part of the simulation control.
918	 */
919	if (time_travel_mode == TT_MODE_EXTERNAL && callback) {
920		info->callback = callback;
921		callback = vu_defer_irq_callback;
922		time_travel_set_event_fn(&info->defer, vu_defer_irq_handle);
923	}
924#endif
925
926	vq = vring_create_virtqueue(index, num, PAGE_SIZE, vdev, true, true,
927				    ctx, vu_notify, callback, info->name);
928	if (!vq) {
929		rc = -ENOMEM;
930		goto error_create;
931	}
932	vq->priv = info;
933	num = virtqueue_get_vring_size(vq);
934#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
935	info->vq = vq;
936#endif
937
938	if (vu_dev->protocol_features &
939			BIT_ULL(VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS)) {
940		info->kick_fd = -1;
941	} else {
942		rc = os_eventfd(0, 0);
943		if (rc < 0)
944			goto error_kick;
945		info->kick_fd = rc;
946	}
947
948	rc = vu_setup_vq_call_fd(vu_dev, vq);
949	if (rc)
950		goto error_call;
951
952	rc = vhost_user_set_vring_num(vu_dev, index, num);
953	if (rc)
954		goto error_setup;
955
956	rc = vhost_user_set_vring_base(vu_dev, index, 0);
957	if (rc)
958		goto error_setup;
959
960	rc = vhost_user_set_vring_addr(vu_dev, index,
961				       virtqueue_get_desc_addr(vq),
962				       virtqueue_get_used_addr(vq),
963				       virtqueue_get_avail_addr(vq),
964				       (u64) -1);
965	if (rc)
966		goto error_setup;
967
968	return vq;
969
970error_setup:
971	if (info->call_fd >= 0) {
972		um_free_irq(VIRTIO_IRQ, vq);
973		os_close_file(info->call_fd);
974	}
975error_call:
976	if (info->kick_fd >= 0)
977		os_close_file(info->kick_fd);
978error_kick:
979	vring_del_virtqueue(vq);
980error_create:
981	kfree(info);
982error_kzalloc:
983	return ERR_PTR(rc);
984}
985
986static int vu_find_vqs(struct virtio_device *vdev, unsigned nvqs,
987		       struct virtqueue *vqs[], vq_callback_t *callbacks[],
988		       const char * const names[], const bool *ctx,
989		       struct irq_affinity *desc)
990{
991	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
992	int i, queue_idx = 0, rc;
993	struct virtqueue *vq;
994
995	rc = vhost_user_set_mem_table(vu_dev);
996	if (rc)
997		return rc;
998
999	for (i = 0; i < nvqs; ++i) {
1000		if (!names[i]) {
1001			vqs[i] = NULL;
1002			continue;
1003		}
1004
1005		vqs[i] = vu_setup_vq(vdev, queue_idx++, callbacks[i], names[i],
1006				     ctx ? ctx[i] : false);
1007		if (IS_ERR(vqs[i])) {
1008			rc = PTR_ERR(vqs[i]);
1009			goto error_setup;
1010		}
1011	}
1012
1013	list_for_each_entry(vq, &vdev->vqs, list) {
1014		struct virtio_uml_vq_info *info = vq->priv;
1015
1016		if (info->kick_fd >= 0) {
1017			rc = vhost_user_set_vring_kick(vu_dev, vq->index,
1018						       info->kick_fd);
1019			if (rc)
1020				goto error_setup;
1021		}
1022
1023		rc = vhost_user_set_vring_enable(vu_dev, vq->index, true);
1024		if (rc)
1025			goto error_setup;
1026	}
1027
1028	return 0;
1029
1030error_setup:
1031	vu_del_vqs(vdev);
1032	return rc;
1033}
1034
1035static u64 vu_get_features(struct virtio_device *vdev)
1036{
1037	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1038
1039	return vu_dev->features;
1040}
1041
1042static int vu_finalize_features(struct virtio_device *vdev)
1043{
1044	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1045	u64 supported = vdev->features & VHOST_USER_SUPPORTED_F;
1046
1047	vring_transport_features(vdev);
1048	vu_dev->features = vdev->features | supported;
1049
1050	return vhost_user_set_features(vu_dev, vu_dev->features);
1051}
1052
1053static const char *vu_bus_name(struct virtio_device *vdev)
1054{
1055	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1056
1057	return vu_dev->pdev->name;
1058}
1059
1060static const struct virtio_config_ops virtio_uml_config_ops = {
1061	.get = vu_get,
1062	.set = vu_set,
1063	.get_status = vu_get_status,
1064	.set_status = vu_set_status,
1065	.reset = vu_reset,
1066	.find_vqs = vu_find_vqs,
1067	.del_vqs = vu_del_vqs,
1068	.get_features = vu_get_features,
1069	.finalize_features = vu_finalize_features,
1070	.bus_name = vu_bus_name,
1071};
1072
1073static void virtio_uml_release_dev(struct device *d)
1074{
1075	struct virtio_device *vdev =
1076			container_of(d, struct virtio_device, dev);
1077	struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev);
1078
1079	time_travel_propagate_time();
1080
1081	/* might not have been opened due to not negotiating the feature */
1082	if (vu_dev->req_fd >= 0) {
1083		um_free_irq(VIRTIO_IRQ, vu_dev);
1084		os_close_file(vu_dev->req_fd);
1085	}
1086
1087	os_close_file(vu_dev->sock);
1088	kfree(vu_dev);
1089}
1090
1091/* Platform device */
1092
1093static int virtio_uml_probe(struct platform_device *pdev)
1094{
1095	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1096	struct virtio_uml_device *vu_dev;
1097	int rc;
1098
1099	if (!pdata)
1100		return -EINVAL;
1101
1102	vu_dev = kzalloc(sizeof(*vu_dev), GFP_KERNEL);
1103	if (!vu_dev)
1104		return -ENOMEM;
1105
1106	vu_dev->vdev.dev.parent = &pdev->dev;
1107	vu_dev->vdev.dev.release = virtio_uml_release_dev;
1108	vu_dev->vdev.config = &virtio_uml_config_ops;
1109	vu_dev->vdev.id.device = pdata->virtio_device_id;
1110	vu_dev->vdev.id.vendor = VIRTIO_DEV_ANY_ID;
1111	vu_dev->pdev = pdev;
1112	vu_dev->req_fd = -1;
1113
1114	time_travel_propagate_time();
1115
1116	do {
1117		rc = os_connect_socket(pdata->socket_path);
1118	} while (rc == -EINTR);
1119	if (rc < 0)
1120		goto error_free;
1121	vu_dev->sock = rc;
1122
1123	spin_lock_init(&vu_dev->sock_lock);
1124
1125	rc = vhost_user_init(vu_dev);
1126	if (rc)
1127		goto error_init;
1128
1129	platform_set_drvdata(pdev, vu_dev);
1130
1131	rc = register_virtio_device(&vu_dev->vdev);
1132	if (rc)
1133		put_device(&vu_dev->vdev.dev);
1134	vu_dev->registered = 1;
1135	return rc;
1136
1137error_init:
1138	os_close_file(vu_dev->sock);
1139error_free:
1140	kfree(vu_dev);
1141	return rc;
1142}
1143
1144static int virtio_uml_remove(struct platform_device *pdev)
1145{
1146	struct virtio_uml_device *vu_dev = platform_get_drvdata(pdev);
1147
1148	unregister_virtio_device(&vu_dev->vdev);
1149	return 0;
1150}
1151
1152/* Command line device list */
1153
1154static void vu_cmdline_release_dev(struct device *d)
1155{
1156}
1157
1158static struct device vu_cmdline_parent = {
1159	.init_name = "virtio-uml-cmdline",
1160	.release = vu_cmdline_release_dev,
1161};
1162
1163static bool vu_cmdline_parent_registered;
1164static int vu_cmdline_id;
1165
1166static int vu_unregister_cmdline_device(struct device *dev, void *data)
1167{
1168	struct platform_device *pdev = to_platform_device(dev);
1169	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1170
1171	kfree(pdata->socket_path);
1172	platform_device_unregister(pdev);
1173	return 0;
1174}
1175
1176static void vu_conn_broken(struct work_struct *wk)
1177{
1178	struct virtio_uml_platform_data *pdata;
1179
1180	pdata = container_of(wk, struct virtio_uml_platform_data, conn_broken_wk);
1181	vu_unregister_cmdline_device(&pdata->pdev->dev, NULL);
1182}
1183
1184static int vu_cmdline_set(const char *device, const struct kernel_param *kp)
1185{
1186	const char *ids = strchr(device, ':');
1187	unsigned int virtio_device_id;
1188	int processed, consumed, err;
1189	char *socket_path;
1190	struct virtio_uml_platform_data pdata, *ppdata;
1191	struct platform_device *pdev;
1192
1193	if (!ids || ids == device)
1194		return -EINVAL;
1195
1196	processed = sscanf(ids, ":%u%n:%d%n",
1197			   &virtio_device_id, &consumed,
1198			   &vu_cmdline_id, &consumed);
1199
1200	if (processed < 1 || ids[consumed])
1201		return -EINVAL;
1202
1203	if (!vu_cmdline_parent_registered) {
1204		err = device_register(&vu_cmdline_parent);
1205		if (err) {
1206			pr_err("Failed to register parent device!\n");
1207			put_device(&vu_cmdline_parent);
1208			return err;
1209		}
1210		vu_cmdline_parent_registered = true;
1211	}
1212
1213	socket_path = kmemdup_nul(device, ids - device, GFP_KERNEL);
1214	if (!socket_path)
1215		return -ENOMEM;
1216
1217	pdata.virtio_device_id = (u32) virtio_device_id;
1218	pdata.socket_path = socket_path;
1219
1220	pr_info("Registering device virtio-uml.%d id=%d at %s\n",
1221		vu_cmdline_id, virtio_device_id, socket_path);
1222
1223	pdev = platform_device_register_data(&vu_cmdline_parent, "virtio-uml",
1224					     vu_cmdline_id++, &pdata,
1225					     sizeof(pdata));
1226	err = PTR_ERR_OR_ZERO(pdev);
1227	if (err)
1228		goto free;
1229
1230	ppdata = pdev->dev.platform_data;
1231	ppdata->pdev = pdev;
1232	INIT_WORK(&ppdata->conn_broken_wk, vu_conn_broken);
1233
1234	return 0;
1235
1236free:
1237	kfree(socket_path);
1238	return err;
1239}
1240
1241static int vu_cmdline_get_device(struct device *dev, void *data)
1242{
1243	struct platform_device *pdev = to_platform_device(dev);
1244	struct virtio_uml_platform_data *pdata = pdev->dev.platform_data;
1245	char *buffer = data;
1246	unsigned int len = strlen(buffer);
1247
1248	snprintf(buffer + len, PAGE_SIZE - len, "%s:%d:%d\n",
1249		 pdata->socket_path, pdata->virtio_device_id, pdev->id);
1250	return 0;
1251}
1252
1253static int vu_cmdline_get(char *buffer, const struct kernel_param *kp)
1254{
1255	buffer[0] = '\0';
1256	if (vu_cmdline_parent_registered)
1257		device_for_each_child(&vu_cmdline_parent, buffer,
1258				      vu_cmdline_get_device);
1259	return strlen(buffer) + 1;
1260}
1261
1262static const struct kernel_param_ops vu_cmdline_param_ops = {
1263	.set = vu_cmdline_set,
1264	.get = vu_cmdline_get,
1265};
1266
1267device_param_cb(device, &vu_cmdline_param_ops, NULL, S_IRUSR);
1268__uml_help(vu_cmdline_param_ops,
1269"virtio_uml.device=<socket>:<virtio_id>[:<platform_id>]\n"
1270"    Configure a virtio device over a vhost-user socket.\n"
1271"    See virtio_ids.h for a list of possible virtio device id values.\n"
1272"    Optionally use a specific platform_device id.\n\n"
1273);
1274
1275
1276static void vu_unregister_cmdline_devices(void)
1277{
1278	if (vu_cmdline_parent_registered) {
1279		device_for_each_child(&vu_cmdline_parent, NULL,
1280				      vu_unregister_cmdline_device);
1281		device_unregister(&vu_cmdline_parent);
1282		vu_cmdline_parent_registered = false;
1283	}
1284}
1285
1286/* Platform driver */
1287
1288static const struct of_device_id virtio_uml_match[] = {
1289	{ .compatible = "virtio,uml", },
1290	{ }
1291};
1292MODULE_DEVICE_TABLE(of, virtio_uml_match);
1293
1294static struct platform_driver virtio_uml_driver = {
1295	.probe = virtio_uml_probe,
1296	.remove = virtio_uml_remove,
1297	.driver = {
1298		.name = "virtio-uml",
1299		.of_match_table = virtio_uml_match,
1300	},
1301};
1302
1303static int __init virtio_uml_init(void)
1304{
1305	return platform_driver_register(&virtio_uml_driver);
1306}
1307
1308static void __exit virtio_uml_exit(void)
1309{
1310	platform_driver_unregister(&virtio_uml_driver);
1311	vu_unregister_cmdline_devices();
1312}
1313
1314module_init(virtio_uml_init);
1315module_exit(virtio_uml_exit);
1316__uml_exitcall(virtio_uml_exit);
1317
1318MODULE_DESCRIPTION("UML driver for vhost-user virtio devices");
1319MODULE_LICENSE("GPL");
1320