18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* Copyright (C) 2009 Red Hat, Inc. 38c2ecf20Sopenharmony_ci * Copyright (C) 2006 Rusty Russell IBM Corporation 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Author: Michael S. Tsirkin <mst@redhat.com> 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Inspiration, some code, and most witty comments come from 88c2ecf20Sopenharmony_ci * Documentation/virtual/lguest/lguest.c, by Rusty Russell 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Generic code for virtio server in host kernel. 118c2ecf20Sopenharmony_ci */ 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ci#include <linux/eventfd.h> 148c2ecf20Sopenharmony_ci#include <linux/vhost.h> 158c2ecf20Sopenharmony_ci#include <linux/uio.h> 168c2ecf20Sopenharmony_ci#include <linux/mm.h> 178c2ecf20Sopenharmony_ci#include <linux/miscdevice.h> 188c2ecf20Sopenharmony_ci#include <linux/mutex.h> 198c2ecf20Sopenharmony_ci#include <linux/poll.h> 208c2ecf20Sopenharmony_ci#include <linux/file.h> 218c2ecf20Sopenharmony_ci#include <linux/highmem.h> 228c2ecf20Sopenharmony_ci#include <linux/slab.h> 238c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 248c2ecf20Sopenharmony_ci#include <linux/kthread.h> 258c2ecf20Sopenharmony_ci#include <linux/cgroup.h> 268c2ecf20Sopenharmony_ci#include <linux/module.h> 278c2ecf20Sopenharmony_ci#include <linux/sort.h> 288c2ecf20Sopenharmony_ci#include <linux/sched/mm.h> 298c2ecf20Sopenharmony_ci#include <linux/sched/signal.h> 308c2ecf20Sopenharmony_ci#include <linux/interval_tree_generic.h> 318c2ecf20Sopenharmony_ci#include <linux/nospec.h> 328c2ecf20Sopenharmony_ci#include <linux/kcov.h> 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci#include "vhost.h" 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_cistatic ushort max_mem_regions = 64; 378c2ecf20Sopenharmony_cimodule_param(max_mem_regions, ushort, 0444); 388c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_mem_regions, 398c2ecf20Sopenharmony_ci "Maximum number of memory regions in memory map. (default: 64)"); 408c2ecf20Sopenharmony_cistatic int max_iotlb_entries = 2048; 418c2ecf20Sopenharmony_cimodule_param(max_iotlb_entries, int, 0444); 428c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_iotlb_entries, 438c2ecf20Sopenharmony_ci "Maximum number of iotlb entries. (default: 2048)"); 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cienum { 468c2ecf20Sopenharmony_ci VHOST_MEMORY_F_LOG = 0x1, 478c2ecf20Sopenharmony_ci}; 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci#define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) 508c2ecf20Sopenharmony_ci#define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY 538c2ecf20Sopenharmony_cistatic void vhost_disable_cross_endian(struct vhost_virtqueue *vq) 548c2ecf20Sopenharmony_ci{ 558c2ecf20Sopenharmony_ci vq->user_be = !virtio_legacy_is_little_endian(); 568c2ecf20Sopenharmony_ci} 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_cistatic void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci vq->user_be = true; 618c2ecf20Sopenharmony_ci} 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_cistatic void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) 648c2ecf20Sopenharmony_ci{ 658c2ecf20Sopenharmony_ci vq->user_be = false; 668c2ecf20Sopenharmony_ci} 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_cistatic long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) 698c2ecf20Sopenharmony_ci{ 708c2ecf20Sopenharmony_ci struct vhost_vring_state s; 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci if (vq->private_data) 738c2ecf20Sopenharmony_ci return -EBUSY; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci if (copy_from_user(&s, argp, sizeof(s))) 768c2ecf20Sopenharmony_ci return -EFAULT; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci if (s.num != VHOST_VRING_LITTLE_ENDIAN && 798c2ecf20Sopenharmony_ci s.num != VHOST_VRING_BIG_ENDIAN) 808c2ecf20Sopenharmony_ci return -EINVAL; 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci if (s.num == VHOST_VRING_BIG_ENDIAN) 838c2ecf20Sopenharmony_ci vhost_enable_cross_endian_big(vq); 848c2ecf20Sopenharmony_ci else 858c2ecf20Sopenharmony_ci vhost_enable_cross_endian_little(vq); 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci return 0; 888c2ecf20Sopenharmony_ci} 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_cistatic long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, 918c2ecf20Sopenharmony_ci int __user *argp) 928c2ecf20Sopenharmony_ci{ 938c2ecf20Sopenharmony_ci struct vhost_vring_state s = { 948c2ecf20Sopenharmony_ci .index = idx, 958c2ecf20Sopenharmony_ci .num = vq->user_be 968c2ecf20Sopenharmony_ci }; 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci if (copy_to_user(argp, &s, sizeof(s))) 998c2ecf20Sopenharmony_ci return -EFAULT; 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci return 0; 1028c2ecf20Sopenharmony_ci} 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_cistatic void vhost_init_is_le(struct vhost_virtqueue *vq) 1058c2ecf20Sopenharmony_ci{ 1068c2ecf20Sopenharmony_ci /* Note for legacy virtio: user_be is initialized at reset time 1078c2ecf20Sopenharmony_ci * according to the host endianness. If userspace does not set an 1088c2ecf20Sopenharmony_ci * explicit endianness, the default behavior is native endian, as 1098c2ecf20Sopenharmony_ci * expected by legacy virtio. 1108c2ecf20Sopenharmony_ci */ 1118c2ecf20Sopenharmony_ci vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; 1128c2ecf20Sopenharmony_ci} 1138c2ecf20Sopenharmony_ci#else 1148c2ecf20Sopenharmony_cistatic void vhost_disable_cross_endian(struct vhost_virtqueue *vq) 1158c2ecf20Sopenharmony_ci{ 1168c2ecf20Sopenharmony_ci} 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_cistatic long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) 1198c2ecf20Sopenharmony_ci{ 1208c2ecf20Sopenharmony_ci return -ENOIOCTLCMD; 1218c2ecf20Sopenharmony_ci} 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_cistatic long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, 1248c2ecf20Sopenharmony_ci int __user *argp) 1258c2ecf20Sopenharmony_ci{ 1268c2ecf20Sopenharmony_ci return -ENOIOCTLCMD; 1278c2ecf20Sopenharmony_ci} 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_cistatic void vhost_init_is_le(struct vhost_virtqueue *vq) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) 1328c2ecf20Sopenharmony_ci || virtio_legacy_is_little_endian(); 1338c2ecf20Sopenharmony_ci} 1348c2ecf20Sopenharmony_ci#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_cistatic void vhost_reset_is_le(struct vhost_virtqueue *vq) 1378c2ecf20Sopenharmony_ci{ 1388c2ecf20Sopenharmony_ci vhost_init_is_le(vq); 1398c2ecf20Sopenharmony_ci} 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_cistruct vhost_flush_struct { 1428c2ecf20Sopenharmony_ci struct vhost_work work; 1438c2ecf20Sopenharmony_ci struct completion wait_event; 1448c2ecf20Sopenharmony_ci}; 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_cistatic void vhost_flush_work(struct vhost_work *work) 1478c2ecf20Sopenharmony_ci{ 1488c2ecf20Sopenharmony_ci struct vhost_flush_struct *s; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci s = container_of(work, struct vhost_flush_struct, work); 1518c2ecf20Sopenharmony_ci complete(&s->wait_event); 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_cistatic void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, 1558c2ecf20Sopenharmony_ci poll_table *pt) 1568c2ecf20Sopenharmony_ci{ 1578c2ecf20Sopenharmony_ci struct vhost_poll *poll; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci poll = container_of(pt, struct vhost_poll, table); 1608c2ecf20Sopenharmony_ci poll->wqh = wqh; 1618c2ecf20Sopenharmony_ci add_wait_queue(wqh, &poll->wait); 1628c2ecf20Sopenharmony_ci} 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_cistatic int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, 1658c2ecf20Sopenharmony_ci void *key) 1668c2ecf20Sopenharmony_ci{ 1678c2ecf20Sopenharmony_ci struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); 1688c2ecf20Sopenharmony_ci struct vhost_work *work = &poll->work; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci if (!(key_to_poll(key) & poll->mask)) 1718c2ecf20Sopenharmony_ci return 0; 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci if (!poll->dev->use_worker) 1748c2ecf20Sopenharmony_ci work->fn(work); 1758c2ecf20Sopenharmony_ci else 1768c2ecf20Sopenharmony_ci vhost_poll_queue(poll); 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_ci return 0; 1798c2ecf20Sopenharmony_ci} 1808c2ecf20Sopenharmony_ci 1818c2ecf20Sopenharmony_civoid vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) 1828c2ecf20Sopenharmony_ci{ 1838c2ecf20Sopenharmony_ci clear_bit(VHOST_WORK_QUEUED, &work->flags); 1848c2ecf20Sopenharmony_ci work->fn = fn; 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_work_init); 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci/* Init poll structure */ 1898c2ecf20Sopenharmony_civoid vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, 1908c2ecf20Sopenharmony_ci __poll_t mask, struct vhost_dev *dev) 1918c2ecf20Sopenharmony_ci{ 1928c2ecf20Sopenharmony_ci init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); 1938c2ecf20Sopenharmony_ci init_poll_funcptr(&poll->table, vhost_poll_func); 1948c2ecf20Sopenharmony_ci poll->mask = mask; 1958c2ecf20Sopenharmony_ci poll->dev = dev; 1968c2ecf20Sopenharmony_ci poll->wqh = NULL; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci vhost_work_init(&poll->work, fn); 1998c2ecf20Sopenharmony_ci} 2008c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_init); 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci/* Start polling a file. We add ourselves to file's wait queue. The caller must 2038c2ecf20Sopenharmony_ci * keep a reference to a file until after vhost_poll_stop is called. */ 2048c2ecf20Sopenharmony_ciint vhost_poll_start(struct vhost_poll *poll, struct file *file) 2058c2ecf20Sopenharmony_ci{ 2068c2ecf20Sopenharmony_ci __poll_t mask; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci if (poll->wqh) 2098c2ecf20Sopenharmony_ci return 0; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci mask = vfs_poll(file, &poll->table); 2128c2ecf20Sopenharmony_ci if (mask) 2138c2ecf20Sopenharmony_ci vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); 2148c2ecf20Sopenharmony_ci if (mask & EPOLLERR) { 2158c2ecf20Sopenharmony_ci vhost_poll_stop(poll); 2168c2ecf20Sopenharmony_ci return -EINVAL; 2178c2ecf20Sopenharmony_ci } 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci return 0; 2208c2ecf20Sopenharmony_ci} 2218c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_start); 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci/* Stop polling a file. After this function returns, it becomes safe to drop the 2248c2ecf20Sopenharmony_ci * file reference. You must also flush afterwards. */ 2258c2ecf20Sopenharmony_civoid vhost_poll_stop(struct vhost_poll *poll) 2268c2ecf20Sopenharmony_ci{ 2278c2ecf20Sopenharmony_ci if (poll->wqh) { 2288c2ecf20Sopenharmony_ci remove_wait_queue(poll->wqh, &poll->wait); 2298c2ecf20Sopenharmony_ci poll->wqh = NULL; 2308c2ecf20Sopenharmony_ci } 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_stop); 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_civoid vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) 2358c2ecf20Sopenharmony_ci{ 2368c2ecf20Sopenharmony_ci struct vhost_flush_struct flush; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci if (dev->worker) { 2398c2ecf20Sopenharmony_ci init_completion(&flush.wait_event); 2408c2ecf20Sopenharmony_ci vhost_work_init(&flush.work, vhost_flush_work); 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci vhost_work_queue(dev, &flush.work); 2438c2ecf20Sopenharmony_ci wait_for_completion(&flush.wait_event); 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ci} 2468c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_work_flush); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci/* Flush any work that has been scheduled. When calling this, don't hold any 2498c2ecf20Sopenharmony_ci * locks that are also used by the callback. */ 2508c2ecf20Sopenharmony_civoid vhost_poll_flush(struct vhost_poll *poll) 2518c2ecf20Sopenharmony_ci{ 2528c2ecf20Sopenharmony_ci vhost_work_flush(poll->dev, &poll->work); 2538c2ecf20Sopenharmony_ci} 2548c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_flush); 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_civoid vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) 2578c2ecf20Sopenharmony_ci{ 2588c2ecf20Sopenharmony_ci if (!dev->worker) 2598c2ecf20Sopenharmony_ci return; 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_ci if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { 2628c2ecf20Sopenharmony_ci /* We can only add the work to the list after we're 2638c2ecf20Sopenharmony_ci * sure it was not in the list. 2648c2ecf20Sopenharmony_ci * test_and_set_bit() implies a memory barrier. 2658c2ecf20Sopenharmony_ci */ 2668c2ecf20Sopenharmony_ci llist_add(&work->node, &dev->work_list); 2678c2ecf20Sopenharmony_ci wake_up_process(dev->worker); 2688c2ecf20Sopenharmony_ci } 2698c2ecf20Sopenharmony_ci} 2708c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_work_queue); 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci/* A lockless hint for busy polling code to exit the loop */ 2738c2ecf20Sopenharmony_cibool vhost_has_work(struct vhost_dev *dev) 2748c2ecf20Sopenharmony_ci{ 2758c2ecf20Sopenharmony_ci return !llist_empty(&dev->work_list); 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_has_work); 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_civoid vhost_poll_queue(struct vhost_poll *poll) 2808c2ecf20Sopenharmony_ci{ 2818c2ecf20Sopenharmony_ci vhost_work_queue(poll->dev, &poll->work); 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_queue); 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_cistatic void __vhost_vq_meta_reset(struct vhost_virtqueue *vq) 2868c2ecf20Sopenharmony_ci{ 2878c2ecf20Sopenharmony_ci int j; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci for (j = 0; j < VHOST_NUM_ADDRS; j++) 2908c2ecf20Sopenharmony_ci vq->meta_iotlb[j] = NULL; 2918c2ecf20Sopenharmony_ci} 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_cistatic void vhost_vq_meta_reset(struct vhost_dev *d) 2948c2ecf20Sopenharmony_ci{ 2958c2ecf20Sopenharmony_ci int i; 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) 2988c2ecf20Sopenharmony_ci __vhost_vq_meta_reset(d->vqs[i]); 2998c2ecf20Sopenharmony_ci} 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_cistatic void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) 3028c2ecf20Sopenharmony_ci{ 3038c2ecf20Sopenharmony_ci call_ctx->ctx = NULL; 3048c2ecf20Sopenharmony_ci memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); 3058c2ecf20Sopenharmony_ci} 3068c2ecf20Sopenharmony_ci 3078c2ecf20Sopenharmony_cibool vhost_vq_is_setup(struct vhost_virtqueue *vq) 3088c2ecf20Sopenharmony_ci{ 3098c2ecf20Sopenharmony_ci return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq); 3108c2ecf20Sopenharmony_ci} 3118c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_is_setup); 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_cistatic void vhost_vq_reset(struct vhost_dev *dev, 3148c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq) 3158c2ecf20Sopenharmony_ci{ 3168c2ecf20Sopenharmony_ci vq->num = 1; 3178c2ecf20Sopenharmony_ci vq->desc = NULL; 3188c2ecf20Sopenharmony_ci vq->avail = NULL; 3198c2ecf20Sopenharmony_ci vq->used = NULL; 3208c2ecf20Sopenharmony_ci vq->last_avail_idx = 0; 3218c2ecf20Sopenharmony_ci vq->avail_idx = 0; 3228c2ecf20Sopenharmony_ci vq->last_used_idx = 0; 3238c2ecf20Sopenharmony_ci vq->signalled_used = 0; 3248c2ecf20Sopenharmony_ci vq->signalled_used_valid = false; 3258c2ecf20Sopenharmony_ci vq->used_flags = 0; 3268c2ecf20Sopenharmony_ci vq->log_used = false; 3278c2ecf20Sopenharmony_ci vq->log_addr = -1ull; 3288c2ecf20Sopenharmony_ci vq->private_data = NULL; 3298c2ecf20Sopenharmony_ci vq->acked_features = 0; 3308c2ecf20Sopenharmony_ci vq->acked_backend_features = 0; 3318c2ecf20Sopenharmony_ci vq->log_base = NULL; 3328c2ecf20Sopenharmony_ci vq->error_ctx = NULL; 3338c2ecf20Sopenharmony_ci vq->kick = NULL; 3348c2ecf20Sopenharmony_ci vq->log_ctx = NULL; 3358c2ecf20Sopenharmony_ci vhost_disable_cross_endian(vq); 3368c2ecf20Sopenharmony_ci vhost_reset_is_le(vq); 3378c2ecf20Sopenharmony_ci vq->busyloop_timeout = 0; 3388c2ecf20Sopenharmony_ci vq->umem = NULL; 3398c2ecf20Sopenharmony_ci vq->iotlb = NULL; 3408c2ecf20Sopenharmony_ci vhost_vring_call_reset(&vq->call_ctx); 3418c2ecf20Sopenharmony_ci __vhost_vq_meta_reset(vq); 3428c2ecf20Sopenharmony_ci} 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_cistatic int vhost_worker(void *data) 3458c2ecf20Sopenharmony_ci{ 3468c2ecf20Sopenharmony_ci struct vhost_dev *dev = data; 3478c2ecf20Sopenharmony_ci struct vhost_work *work, *work_next; 3488c2ecf20Sopenharmony_ci struct llist_node *node; 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci kthread_use_mm(dev->mm); 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci for (;;) { 3538c2ecf20Sopenharmony_ci /* mb paired w/ kthread_stop */ 3548c2ecf20Sopenharmony_ci set_current_state(TASK_INTERRUPTIBLE); 3558c2ecf20Sopenharmony_ci 3568c2ecf20Sopenharmony_ci if (kthread_should_stop()) { 3578c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 3588c2ecf20Sopenharmony_ci break; 3598c2ecf20Sopenharmony_ci } 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci node = llist_del_all(&dev->work_list); 3628c2ecf20Sopenharmony_ci if (!node) 3638c2ecf20Sopenharmony_ci schedule(); 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci node = llist_reverse_order(node); 3668c2ecf20Sopenharmony_ci /* make sure flag is seen after deletion */ 3678c2ecf20Sopenharmony_ci smp_wmb(); 3688c2ecf20Sopenharmony_ci llist_for_each_entry_safe(work, work_next, node, node) { 3698c2ecf20Sopenharmony_ci clear_bit(VHOST_WORK_QUEUED, &work->flags); 3708c2ecf20Sopenharmony_ci __set_current_state(TASK_RUNNING); 3718c2ecf20Sopenharmony_ci kcov_remote_start_common(dev->kcov_handle); 3728c2ecf20Sopenharmony_ci work->fn(work); 3738c2ecf20Sopenharmony_ci kcov_remote_stop(); 3748c2ecf20Sopenharmony_ci if (need_resched()) 3758c2ecf20Sopenharmony_ci schedule(); 3768c2ecf20Sopenharmony_ci } 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci kthread_unuse_mm(dev->mm); 3798c2ecf20Sopenharmony_ci return 0; 3808c2ecf20Sopenharmony_ci} 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_cistatic void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) 3838c2ecf20Sopenharmony_ci{ 3848c2ecf20Sopenharmony_ci kfree(vq->indirect); 3858c2ecf20Sopenharmony_ci vq->indirect = NULL; 3868c2ecf20Sopenharmony_ci kfree(vq->log); 3878c2ecf20Sopenharmony_ci vq->log = NULL; 3888c2ecf20Sopenharmony_ci kfree(vq->heads); 3898c2ecf20Sopenharmony_ci vq->heads = NULL; 3908c2ecf20Sopenharmony_ci} 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci/* Helper to allocate iovec buffers for all vqs. */ 3938c2ecf20Sopenharmony_cistatic long vhost_dev_alloc_iovecs(struct vhost_dev *dev) 3948c2ecf20Sopenharmony_ci{ 3958c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq; 3968c2ecf20Sopenharmony_ci int i; 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 3998c2ecf20Sopenharmony_ci vq = dev->vqs[i]; 4008c2ecf20Sopenharmony_ci vq->indirect = kmalloc_array(UIO_MAXIOV, 4018c2ecf20Sopenharmony_ci sizeof(*vq->indirect), 4028c2ecf20Sopenharmony_ci GFP_KERNEL); 4038c2ecf20Sopenharmony_ci vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log), 4048c2ecf20Sopenharmony_ci GFP_KERNEL); 4058c2ecf20Sopenharmony_ci vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), 4068c2ecf20Sopenharmony_ci GFP_KERNEL); 4078c2ecf20Sopenharmony_ci if (!vq->indirect || !vq->log || !vq->heads) 4088c2ecf20Sopenharmony_ci goto err_nomem; 4098c2ecf20Sopenharmony_ci } 4108c2ecf20Sopenharmony_ci return 0; 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_cierr_nomem: 4138c2ecf20Sopenharmony_ci for (; i >= 0; --i) 4148c2ecf20Sopenharmony_ci vhost_vq_free_iovecs(dev->vqs[i]); 4158c2ecf20Sopenharmony_ci return -ENOMEM; 4168c2ecf20Sopenharmony_ci} 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_cistatic void vhost_dev_free_iovecs(struct vhost_dev *dev) 4198c2ecf20Sopenharmony_ci{ 4208c2ecf20Sopenharmony_ci int i; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) 4238c2ecf20Sopenharmony_ci vhost_vq_free_iovecs(dev->vqs[i]); 4248c2ecf20Sopenharmony_ci} 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_cibool vhost_exceeds_weight(struct vhost_virtqueue *vq, 4278c2ecf20Sopenharmony_ci int pkts, int total_len) 4288c2ecf20Sopenharmony_ci{ 4298c2ecf20Sopenharmony_ci struct vhost_dev *dev = vq->dev; 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci if ((dev->byte_weight && total_len >= dev->byte_weight) || 4328c2ecf20Sopenharmony_ci pkts >= dev->weight) { 4338c2ecf20Sopenharmony_ci vhost_poll_queue(&vq->poll); 4348c2ecf20Sopenharmony_ci return true; 4358c2ecf20Sopenharmony_ci } 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci return false; 4388c2ecf20Sopenharmony_ci} 4398c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_exceeds_weight); 4408c2ecf20Sopenharmony_ci 4418c2ecf20Sopenharmony_cistatic size_t vhost_get_avail_size(struct vhost_virtqueue *vq, 4428c2ecf20Sopenharmony_ci unsigned int num) 4438c2ecf20Sopenharmony_ci{ 4448c2ecf20Sopenharmony_ci size_t event __maybe_unused = 4458c2ecf20Sopenharmony_ci vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci return sizeof(*vq->avail) + 4488c2ecf20Sopenharmony_ci sizeof(*vq->avail->ring) * num + event; 4498c2ecf20Sopenharmony_ci} 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_cistatic size_t vhost_get_used_size(struct vhost_virtqueue *vq, 4528c2ecf20Sopenharmony_ci unsigned int num) 4538c2ecf20Sopenharmony_ci{ 4548c2ecf20Sopenharmony_ci size_t event __maybe_unused = 4558c2ecf20Sopenharmony_ci vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci return sizeof(*vq->used) + 4588c2ecf20Sopenharmony_ci sizeof(*vq->used->ring) * num + event; 4598c2ecf20Sopenharmony_ci} 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_cistatic size_t vhost_get_desc_size(struct vhost_virtqueue *vq, 4628c2ecf20Sopenharmony_ci unsigned int num) 4638c2ecf20Sopenharmony_ci{ 4648c2ecf20Sopenharmony_ci return sizeof(*vq->desc) * num; 4658c2ecf20Sopenharmony_ci} 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_civoid vhost_dev_init(struct vhost_dev *dev, 4688c2ecf20Sopenharmony_ci struct vhost_virtqueue **vqs, int nvqs, 4698c2ecf20Sopenharmony_ci int iov_limit, int weight, int byte_weight, 4708c2ecf20Sopenharmony_ci bool use_worker, 4718c2ecf20Sopenharmony_ci int (*msg_handler)(struct vhost_dev *dev, 4728c2ecf20Sopenharmony_ci struct vhost_iotlb_msg *msg)) 4738c2ecf20Sopenharmony_ci{ 4748c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq; 4758c2ecf20Sopenharmony_ci int i; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci dev->vqs = vqs; 4788c2ecf20Sopenharmony_ci dev->nvqs = nvqs; 4798c2ecf20Sopenharmony_ci mutex_init(&dev->mutex); 4808c2ecf20Sopenharmony_ci dev->log_ctx = NULL; 4818c2ecf20Sopenharmony_ci dev->umem = NULL; 4828c2ecf20Sopenharmony_ci dev->iotlb = NULL; 4838c2ecf20Sopenharmony_ci dev->mm = NULL; 4848c2ecf20Sopenharmony_ci dev->worker = NULL; 4858c2ecf20Sopenharmony_ci dev->iov_limit = iov_limit; 4868c2ecf20Sopenharmony_ci dev->weight = weight; 4878c2ecf20Sopenharmony_ci dev->byte_weight = byte_weight; 4888c2ecf20Sopenharmony_ci dev->use_worker = use_worker; 4898c2ecf20Sopenharmony_ci dev->msg_handler = msg_handler; 4908c2ecf20Sopenharmony_ci init_llist_head(&dev->work_list); 4918c2ecf20Sopenharmony_ci init_waitqueue_head(&dev->wait); 4928c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&dev->read_list); 4938c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&dev->pending_list); 4948c2ecf20Sopenharmony_ci spin_lock_init(&dev->iotlb_lock); 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 4988c2ecf20Sopenharmony_ci vq = dev->vqs[i]; 4998c2ecf20Sopenharmony_ci vq->log = NULL; 5008c2ecf20Sopenharmony_ci vq->indirect = NULL; 5018c2ecf20Sopenharmony_ci vq->heads = NULL; 5028c2ecf20Sopenharmony_ci vq->dev = dev; 5038c2ecf20Sopenharmony_ci mutex_init(&vq->mutex); 5048c2ecf20Sopenharmony_ci vhost_vq_reset(dev, vq); 5058c2ecf20Sopenharmony_ci if (vq->handle_kick) 5068c2ecf20Sopenharmony_ci vhost_poll_init(&vq->poll, vq->handle_kick, 5078c2ecf20Sopenharmony_ci EPOLLIN, dev); 5088c2ecf20Sopenharmony_ci } 5098c2ecf20Sopenharmony_ci} 5108c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_init); 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci/* Caller should have device mutex */ 5138c2ecf20Sopenharmony_cilong vhost_dev_check_owner(struct vhost_dev *dev) 5148c2ecf20Sopenharmony_ci{ 5158c2ecf20Sopenharmony_ci /* Are you the owner? If not, I don't think you mean to do that */ 5168c2ecf20Sopenharmony_ci return dev->mm == current->mm ? 0 : -EPERM; 5178c2ecf20Sopenharmony_ci} 5188c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_check_owner); 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_cistruct vhost_attach_cgroups_struct { 5218c2ecf20Sopenharmony_ci struct vhost_work work; 5228c2ecf20Sopenharmony_ci struct task_struct *owner; 5238c2ecf20Sopenharmony_ci int ret; 5248c2ecf20Sopenharmony_ci}; 5258c2ecf20Sopenharmony_ci 5268c2ecf20Sopenharmony_cistatic void vhost_attach_cgroups_work(struct vhost_work *work) 5278c2ecf20Sopenharmony_ci{ 5288c2ecf20Sopenharmony_ci struct vhost_attach_cgroups_struct *s; 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci s = container_of(work, struct vhost_attach_cgroups_struct, work); 5318c2ecf20Sopenharmony_ci s->ret = cgroup_attach_task_all(s->owner, current); 5328c2ecf20Sopenharmony_ci} 5338c2ecf20Sopenharmony_ci 5348c2ecf20Sopenharmony_cistatic int vhost_attach_cgroups(struct vhost_dev *dev) 5358c2ecf20Sopenharmony_ci{ 5368c2ecf20Sopenharmony_ci struct vhost_attach_cgroups_struct attach; 5378c2ecf20Sopenharmony_ci 5388c2ecf20Sopenharmony_ci attach.owner = current; 5398c2ecf20Sopenharmony_ci vhost_work_init(&attach.work, vhost_attach_cgroups_work); 5408c2ecf20Sopenharmony_ci vhost_work_queue(dev, &attach.work); 5418c2ecf20Sopenharmony_ci vhost_work_flush(dev, &attach.work); 5428c2ecf20Sopenharmony_ci return attach.ret; 5438c2ecf20Sopenharmony_ci} 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci/* Caller should have device mutex */ 5468c2ecf20Sopenharmony_cibool vhost_dev_has_owner(struct vhost_dev *dev) 5478c2ecf20Sopenharmony_ci{ 5488c2ecf20Sopenharmony_ci return dev->mm; 5498c2ecf20Sopenharmony_ci} 5508c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_has_owner); 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_cistatic void vhost_attach_mm(struct vhost_dev *dev) 5538c2ecf20Sopenharmony_ci{ 5548c2ecf20Sopenharmony_ci /* No owner, become one */ 5558c2ecf20Sopenharmony_ci if (dev->use_worker) { 5568c2ecf20Sopenharmony_ci dev->mm = get_task_mm(current); 5578c2ecf20Sopenharmony_ci } else { 5588c2ecf20Sopenharmony_ci /* vDPA device does not use worker thead, so there's 5598c2ecf20Sopenharmony_ci * no need to hold the address space for mm. This help 5608c2ecf20Sopenharmony_ci * to avoid deadlock in the case of mmap() which may 5618c2ecf20Sopenharmony_ci * held the refcnt of the file and depends on release 5628c2ecf20Sopenharmony_ci * method to remove vma. 5638c2ecf20Sopenharmony_ci */ 5648c2ecf20Sopenharmony_ci dev->mm = current->mm; 5658c2ecf20Sopenharmony_ci mmgrab(dev->mm); 5668c2ecf20Sopenharmony_ci } 5678c2ecf20Sopenharmony_ci} 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_cistatic void vhost_detach_mm(struct vhost_dev *dev) 5708c2ecf20Sopenharmony_ci{ 5718c2ecf20Sopenharmony_ci if (!dev->mm) 5728c2ecf20Sopenharmony_ci return; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci if (dev->use_worker) 5758c2ecf20Sopenharmony_ci mmput(dev->mm); 5768c2ecf20Sopenharmony_ci else 5778c2ecf20Sopenharmony_ci mmdrop(dev->mm); 5788c2ecf20Sopenharmony_ci 5798c2ecf20Sopenharmony_ci dev->mm = NULL; 5808c2ecf20Sopenharmony_ci} 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci/* Caller should have device mutex */ 5838c2ecf20Sopenharmony_cilong vhost_dev_set_owner(struct vhost_dev *dev) 5848c2ecf20Sopenharmony_ci{ 5858c2ecf20Sopenharmony_ci struct task_struct *worker; 5868c2ecf20Sopenharmony_ci int err; 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci /* Is there an owner already? */ 5898c2ecf20Sopenharmony_ci if (vhost_dev_has_owner(dev)) { 5908c2ecf20Sopenharmony_ci err = -EBUSY; 5918c2ecf20Sopenharmony_ci goto err_mm; 5928c2ecf20Sopenharmony_ci } 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_ci vhost_attach_mm(dev); 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci dev->kcov_handle = kcov_common_handle(); 5978c2ecf20Sopenharmony_ci if (dev->use_worker) { 5988c2ecf20Sopenharmony_ci worker = kthread_create(vhost_worker, dev, 5998c2ecf20Sopenharmony_ci "vhost-%d", current->pid); 6008c2ecf20Sopenharmony_ci if (IS_ERR(worker)) { 6018c2ecf20Sopenharmony_ci err = PTR_ERR(worker); 6028c2ecf20Sopenharmony_ci goto err_worker; 6038c2ecf20Sopenharmony_ci } 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci dev->worker = worker; 6068c2ecf20Sopenharmony_ci wake_up_process(worker); /* avoid contributing to loadavg */ 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci err = vhost_attach_cgroups(dev); 6098c2ecf20Sopenharmony_ci if (err) 6108c2ecf20Sopenharmony_ci goto err_cgroup; 6118c2ecf20Sopenharmony_ci } 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_ci err = vhost_dev_alloc_iovecs(dev); 6148c2ecf20Sopenharmony_ci if (err) 6158c2ecf20Sopenharmony_ci goto err_cgroup; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci return 0; 6188c2ecf20Sopenharmony_cierr_cgroup: 6198c2ecf20Sopenharmony_ci if (dev->worker) { 6208c2ecf20Sopenharmony_ci kthread_stop(dev->worker); 6218c2ecf20Sopenharmony_ci dev->worker = NULL; 6228c2ecf20Sopenharmony_ci } 6238c2ecf20Sopenharmony_cierr_worker: 6248c2ecf20Sopenharmony_ci vhost_detach_mm(dev); 6258c2ecf20Sopenharmony_ci dev->kcov_handle = 0; 6268c2ecf20Sopenharmony_cierr_mm: 6278c2ecf20Sopenharmony_ci return err; 6288c2ecf20Sopenharmony_ci} 6298c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_set_owner); 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_cistatic struct vhost_iotlb *iotlb_alloc(void) 6328c2ecf20Sopenharmony_ci{ 6338c2ecf20Sopenharmony_ci return vhost_iotlb_alloc(max_iotlb_entries, 6348c2ecf20Sopenharmony_ci VHOST_IOTLB_FLAG_RETIRE); 6358c2ecf20Sopenharmony_ci} 6368c2ecf20Sopenharmony_ci 6378c2ecf20Sopenharmony_cistruct vhost_iotlb *vhost_dev_reset_owner_prepare(void) 6388c2ecf20Sopenharmony_ci{ 6398c2ecf20Sopenharmony_ci return iotlb_alloc(); 6408c2ecf20Sopenharmony_ci} 6418c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci/* Caller should have device mutex */ 6448c2ecf20Sopenharmony_civoid vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) 6458c2ecf20Sopenharmony_ci{ 6468c2ecf20Sopenharmony_ci int i; 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_ci vhost_dev_cleanup(dev); 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci dev->umem = umem; 6518c2ecf20Sopenharmony_ci /* We don't need VQ locks below since vhost_dev_cleanup makes sure 6528c2ecf20Sopenharmony_ci * VQs aren't running. 6538c2ecf20Sopenharmony_ci */ 6548c2ecf20Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) 6558c2ecf20Sopenharmony_ci dev->vqs[i]->umem = umem; 6568c2ecf20Sopenharmony_ci} 6578c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_reset_owner); 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_civoid vhost_dev_stop(struct vhost_dev *dev) 6608c2ecf20Sopenharmony_ci{ 6618c2ecf20Sopenharmony_ci int i; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 6648c2ecf20Sopenharmony_ci if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) { 6658c2ecf20Sopenharmony_ci vhost_poll_stop(&dev->vqs[i]->poll); 6668c2ecf20Sopenharmony_ci vhost_poll_flush(&dev->vqs[i]->poll); 6678c2ecf20Sopenharmony_ci } 6688c2ecf20Sopenharmony_ci } 6698c2ecf20Sopenharmony_ci} 6708c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_stop); 6718c2ecf20Sopenharmony_ci 6728c2ecf20Sopenharmony_civoid vhost_clear_msg(struct vhost_dev *dev) 6738c2ecf20Sopenharmony_ci{ 6748c2ecf20Sopenharmony_ci struct vhost_msg_node *node, *n; 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci spin_lock(&dev->iotlb_lock); 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci list_for_each_entry_safe(node, n, &dev->read_list, node) { 6798c2ecf20Sopenharmony_ci list_del(&node->node); 6808c2ecf20Sopenharmony_ci kfree(node); 6818c2ecf20Sopenharmony_ci } 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci list_for_each_entry_safe(node, n, &dev->pending_list, node) { 6848c2ecf20Sopenharmony_ci list_del(&node->node); 6858c2ecf20Sopenharmony_ci kfree(node); 6868c2ecf20Sopenharmony_ci } 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci spin_unlock(&dev->iotlb_lock); 6898c2ecf20Sopenharmony_ci} 6908c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_clear_msg); 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_civoid vhost_dev_cleanup(struct vhost_dev *dev) 6938c2ecf20Sopenharmony_ci{ 6948c2ecf20Sopenharmony_ci int i; 6958c2ecf20Sopenharmony_ci 6968c2ecf20Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 6978c2ecf20Sopenharmony_ci if (dev->vqs[i]->error_ctx) 6988c2ecf20Sopenharmony_ci eventfd_ctx_put(dev->vqs[i]->error_ctx); 6998c2ecf20Sopenharmony_ci if (dev->vqs[i]->kick) 7008c2ecf20Sopenharmony_ci fput(dev->vqs[i]->kick); 7018c2ecf20Sopenharmony_ci if (dev->vqs[i]->call_ctx.ctx) 7028c2ecf20Sopenharmony_ci eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); 7038c2ecf20Sopenharmony_ci vhost_vq_reset(dev, dev->vqs[i]); 7048c2ecf20Sopenharmony_ci } 7058c2ecf20Sopenharmony_ci vhost_dev_free_iovecs(dev); 7068c2ecf20Sopenharmony_ci if (dev->log_ctx) 7078c2ecf20Sopenharmony_ci eventfd_ctx_put(dev->log_ctx); 7088c2ecf20Sopenharmony_ci dev->log_ctx = NULL; 7098c2ecf20Sopenharmony_ci /* No one will access memory at this point */ 7108c2ecf20Sopenharmony_ci vhost_iotlb_free(dev->umem); 7118c2ecf20Sopenharmony_ci dev->umem = NULL; 7128c2ecf20Sopenharmony_ci vhost_iotlb_free(dev->iotlb); 7138c2ecf20Sopenharmony_ci dev->iotlb = NULL; 7148c2ecf20Sopenharmony_ci vhost_clear_msg(dev); 7158c2ecf20Sopenharmony_ci wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); 7168c2ecf20Sopenharmony_ci WARN_ON(!llist_empty(&dev->work_list)); 7178c2ecf20Sopenharmony_ci if (dev->worker) { 7188c2ecf20Sopenharmony_ci kthread_stop(dev->worker); 7198c2ecf20Sopenharmony_ci dev->worker = NULL; 7208c2ecf20Sopenharmony_ci dev->kcov_handle = 0; 7218c2ecf20Sopenharmony_ci } 7228c2ecf20Sopenharmony_ci vhost_detach_mm(dev); 7238c2ecf20Sopenharmony_ci} 7248c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_cleanup); 7258c2ecf20Sopenharmony_ci 7268c2ecf20Sopenharmony_cistatic bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) 7278c2ecf20Sopenharmony_ci{ 7288c2ecf20Sopenharmony_ci u64 a = addr / VHOST_PAGE_SIZE / 8; 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci /* Make sure 64 bit math will not overflow. */ 7318c2ecf20Sopenharmony_ci if (a > ULONG_MAX - (unsigned long)log_base || 7328c2ecf20Sopenharmony_ci a + (unsigned long)log_base > ULONG_MAX) 7338c2ecf20Sopenharmony_ci return false; 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_ci return access_ok(log_base + a, 7368c2ecf20Sopenharmony_ci (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); 7378c2ecf20Sopenharmony_ci} 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci/* Make sure 64 bit math will not overflow. */ 7408c2ecf20Sopenharmony_cistatic bool vhost_overflow(u64 uaddr, u64 size) 7418c2ecf20Sopenharmony_ci{ 7428c2ecf20Sopenharmony_ci if (uaddr > ULONG_MAX || size > ULONG_MAX) 7438c2ecf20Sopenharmony_ci return true; 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci if (!size) 7468c2ecf20Sopenharmony_ci return false; 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci return uaddr > ULONG_MAX - size + 1; 7498c2ecf20Sopenharmony_ci} 7508c2ecf20Sopenharmony_ci 7518c2ecf20Sopenharmony_ci/* Caller should have vq mutex and device mutex. */ 7528c2ecf20Sopenharmony_cistatic bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem, 7538c2ecf20Sopenharmony_ci int log_all) 7548c2ecf20Sopenharmony_ci{ 7558c2ecf20Sopenharmony_ci struct vhost_iotlb_map *map; 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci if (!umem) 7588c2ecf20Sopenharmony_ci return false; 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci list_for_each_entry(map, &umem->list, link) { 7618c2ecf20Sopenharmony_ci unsigned long a = map->addr; 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci if (vhost_overflow(map->addr, map->size)) 7648c2ecf20Sopenharmony_ci return false; 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci if (!access_ok((void __user *)a, map->size)) 7688c2ecf20Sopenharmony_ci return false; 7698c2ecf20Sopenharmony_ci else if (log_all && !log_access_ok(log_base, 7708c2ecf20Sopenharmony_ci map->start, 7718c2ecf20Sopenharmony_ci map->size)) 7728c2ecf20Sopenharmony_ci return false; 7738c2ecf20Sopenharmony_ci } 7748c2ecf20Sopenharmony_ci return true; 7758c2ecf20Sopenharmony_ci} 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_cistatic inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, 7788c2ecf20Sopenharmony_ci u64 addr, unsigned int size, 7798c2ecf20Sopenharmony_ci int type) 7808c2ecf20Sopenharmony_ci{ 7818c2ecf20Sopenharmony_ci const struct vhost_iotlb_map *map = vq->meta_iotlb[type]; 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci if (!map) 7848c2ecf20Sopenharmony_ci return NULL; 7858c2ecf20Sopenharmony_ci 7868c2ecf20Sopenharmony_ci return (void __user *)(uintptr_t)(map->addr + addr - map->start); 7878c2ecf20Sopenharmony_ci} 7888c2ecf20Sopenharmony_ci 7898c2ecf20Sopenharmony_ci/* Can we switch to this memory table? */ 7908c2ecf20Sopenharmony_ci/* Caller should have device mutex but not vq mutex */ 7918c2ecf20Sopenharmony_cistatic bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem, 7928c2ecf20Sopenharmony_ci int log_all) 7938c2ecf20Sopenharmony_ci{ 7948c2ecf20Sopenharmony_ci int i; 7958c2ecf20Sopenharmony_ci 7968c2ecf20Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 7978c2ecf20Sopenharmony_ci bool ok; 7988c2ecf20Sopenharmony_ci bool log; 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci mutex_lock(&d->vqs[i]->mutex); 8018c2ecf20Sopenharmony_ci log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); 8028c2ecf20Sopenharmony_ci /* If ring is inactive, will check when it's enabled. */ 8038c2ecf20Sopenharmony_ci if (d->vqs[i]->private_data) 8048c2ecf20Sopenharmony_ci ok = vq_memory_access_ok(d->vqs[i]->log_base, 8058c2ecf20Sopenharmony_ci umem, log); 8068c2ecf20Sopenharmony_ci else 8078c2ecf20Sopenharmony_ci ok = true; 8088c2ecf20Sopenharmony_ci mutex_unlock(&d->vqs[i]->mutex); 8098c2ecf20Sopenharmony_ci if (!ok) 8108c2ecf20Sopenharmony_ci return false; 8118c2ecf20Sopenharmony_ci } 8128c2ecf20Sopenharmony_ci return true; 8138c2ecf20Sopenharmony_ci} 8148c2ecf20Sopenharmony_ci 8158c2ecf20Sopenharmony_cistatic int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, 8168c2ecf20Sopenharmony_ci struct iovec iov[], int iov_size, int access); 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_cistatic int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to, 8198c2ecf20Sopenharmony_ci const void *from, unsigned size) 8208c2ecf20Sopenharmony_ci{ 8218c2ecf20Sopenharmony_ci int ret; 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci if (!vq->iotlb) 8248c2ecf20Sopenharmony_ci return __copy_to_user(to, from, size); 8258c2ecf20Sopenharmony_ci else { 8268c2ecf20Sopenharmony_ci /* This function should be called after iotlb 8278c2ecf20Sopenharmony_ci * prefetch, which means we're sure that all vq 8288c2ecf20Sopenharmony_ci * could be access through iotlb. So -EAGAIN should 8298c2ecf20Sopenharmony_ci * not happen in this case. 8308c2ecf20Sopenharmony_ci */ 8318c2ecf20Sopenharmony_ci struct iov_iter t; 8328c2ecf20Sopenharmony_ci void __user *uaddr = vhost_vq_meta_fetch(vq, 8338c2ecf20Sopenharmony_ci (u64)(uintptr_t)to, size, 8348c2ecf20Sopenharmony_ci VHOST_ADDR_USED); 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_ci if (uaddr) 8378c2ecf20Sopenharmony_ci return __copy_to_user(uaddr, from, size); 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, 8408c2ecf20Sopenharmony_ci ARRAY_SIZE(vq->iotlb_iov), 8418c2ecf20Sopenharmony_ci VHOST_ACCESS_WO); 8428c2ecf20Sopenharmony_ci if (ret < 0) 8438c2ecf20Sopenharmony_ci goto out; 8448c2ecf20Sopenharmony_ci iov_iter_init(&t, WRITE, vq->iotlb_iov, ret, size); 8458c2ecf20Sopenharmony_ci ret = copy_to_iter(from, size, &t); 8468c2ecf20Sopenharmony_ci if (ret == size) 8478c2ecf20Sopenharmony_ci ret = 0; 8488c2ecf20Sopenharmony_ci } 8498c2ecf20Sopenharmony_ciout: 8508c2ecf20Sopenharmony_ci return ret; 8518c2ecf20Sopenharmony_ci} 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_cistatic int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, 8548c2ecf20Sopenharmony_ci void __user *from, unsigned size) 8558c2ecf20Sopenharmony_ci{ 8568c2ecf20Sopenharmony_ci int ret; 8578c2ecf20Sopenharmony_ci 8588c2ecf20Sopenharmony_ci if (!vq->iotlb) 8598c2ecf20Sopenharmony_ci return __copy_from_user(to, from, size); 8608c2ecf20Sopenharmony_ci else { 8618c2ecf20Sopenharmony_ci /* This function should be called after iotlb 8628c2ecf20Sopenharmony_ci * prefetch, which means we're sure that vq 8638c2ecf20Sopenharmony_ci * could be access through iotlb. So -EAGAIN should 8648c2ecf20Sopenharmony_ci * not happen in this case. 8658c2ecf20Sopenharmony_ci */ 8668c2ecf20Sopenharmony_ci void __user *uaddr = vhost_vq_meta_fetch(vq, 8678c2ecf20Sopenharmony_ci (u64)(uintptr_t)from, size, 8688c2ecf20Sopenharmony_ci VHOST_ADDR_DESC); 8698c2ecf20Sopenharmony_ci struct iov_iter f; 8708c2ecf20Sopenharmony_ci 8718c2ecf20Sopenharmony_ci if (uaddr) 8728c2ecf20Sopenharmony_ci return __copy_from_user(to, uaddr, size); 8738c2ecf20Sopenharmony_ci 8748c2ecf20Sopenharmony_ci ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, 8758c2ecf20Sopenharmony_ci ARRAY_SIZE(vq->iotlb_iov), 8768c2ecf20Sopenharmony_ci VHOST_ACCESS_RO); 8778c2ecf20Sopenharmony_ci if (ret < 0) { 8788c2ecf20Sopenharmony_ci vq_err(vq, "IOTLB translation failure: uaddr " 8798c2ecf20Sopenharmony_ci "%p size 0x%llx\n", from, 8808c2ecf20Sopenharmony_ci (unsigned long long) size); 8818c2ecf20Sopenharmony_ci goto out; 8828c2ecf20Sopenharmony_ci } 8838c2ecf20Sopenharmony_ci iov_iter_init(&f, READ, vq->iotlb_iov, ret, size); 8848c2ecf20Sopenharmony_ci ret = copy_from_iter(to, size, &f); 8858c2ecf20Sopenharmony_ci if (ret == size) 8868c2ecf20Sopenharmony_ci ret = 0; 8878c2ecf20Sopenharmony_ci } 8888c2ecf20Sopenharmony_ci 8898c2ecf20Sopenharmony_ciout: 8908c2ecf20Sopenharmony_ci return ret; 8918c2ecf20Sopenharmony_ci} 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_cistatic void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, 8948c2ecf20Sopenharmony_ci void __user *addr, unsigned int size, 8958c2ecf20Sopenharmony_ci int type) 8968c2ecf20Sopenharmony_ci{ 8978c2ecf20Sopenharmony_ci int ret; 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_ci ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, 9008c2ecf20Sopenharmony_ci ARRAY_SIZE(vq->iotlb_iov), 9018c2ecf20Sopenharmony_ci VHOST_ACCESS_RO); 9028c2ecf20Sopenharmony_ci if (ret < 0) { 9038c2ecf20Sopenharmony_ci vq_err(vq, "IOTLB translation failure: uaddr " 9048c2ecf20Sopenharmony_ci "%p size 0x%llx\n", addr, 9058c2ecf20Sopenharmony_ci (unsigned long long) size); 9068c2ecf20Sopenharmony_ci return NULL; 9078c2ecf20Sopenharmony_ci } 9088c2ecf20Sopenharmony_ci 9098c2ecf20Sopenharmony_ci if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { 9108c2ecf20Sopenharmony_ci vq_err(vq, "Non atomic userspace memory access: uaddr " 9118c2ecf20Sopenharmony_ci "%p size 0x%llx\n", addr, 9128c2ecf20Sopenharmony_ci (unsigned long long) size); 9138c2ecf20Sopenharmony_ci return NULL; 9148c2ecf20Sopenharmony_ci } 9158c2ecf20Sopenharmony_ci 9168c2ecf20Sopenharmony_ci return vq->iotlb_iov[0].iov_base; 9178c2ecf20Sopenharmony_ci} 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci/* This function should be called after iotlb 9208c2ecf20Sopenharmony_ci * prefetch, which means we're sure that vq 9218c2ecf20Sopenharmony_ci * could be access through iotlb. So -EAGAIN should 9228c2ecf20Sopenharmony_ci * not happen in this case. 9238c2ecf20Sopenharmony_ci */ 9248c2ecf20Sopenharmony_cistatic inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, 9258c2ecf20Sopenharmony_ci void __user *addr, unsigned int size, 9268c2ecf20Sopenharmony_ci int type) 9278c2ecf20Sopenharmony_ci{ 9288c2ecf20Sopenharmony_ci void __user *uaddr = vhost_vq_meta_fetch(vq, 9298c2ecf20Sopenharmony_ci (u64)(uintptr_t)addr, size, type); 9308c2ecf20Sopenharmony_ci if (uaddr) 9318c2ecf20Sopenharmony_ci return uaddr; 9328c2ecf20Sopenharmony_ci 9338c2ecf20Sopenharmony_ci return __vhost_get_user_slow(vq, addr, size, type); 9348c2ecf20Sopenharmony_ci} 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ci#define vhost_put_user(vq, x, ptr) \ 9378c2ecf20Sopenharmony_ci({ \ 9388c2ecf20Sopenharmony_ci int ret; \ 9398c2ecf20Sopenharmony_ci if (!vq->iotlb) { \ 9408c2ecf20Sopenharmony_ci ret = __put_user(x, ptr); \ 9418c2ecf20Sopenharmony_ci } else { \ 9428c2ecf20Sopenharmony_ci __typeof__(ptr) to = \ 9438c2ecf20Sopenharmony_ci (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ 9448c2ecf20Sopenharmony_ci sizeof(*ptr), VHOST_ADDR_USED); \ 9458c2ecf20Sopenharmony_ci if (to != NULL) \ 9468c2ecf20Sopenharmony_ci ret = __put_user(x, to); \ 9478c2ecf20Sopenharmony_ci else \ 9488c2ecf20Sopenharmony_ci ret = -EFAULT; \ 9498c2ecf20Sopenharmony_ci } \ 9508c2ecf20Sopenharmony_ci ret; \ 9518c2ecf20Sopenharmony_ci}) 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_cistatic inline int vhost_put_avail_event(struct vhost_virtqueue *vq) 9548c2ecf20Sopenharmony_ci{ 9558c2ecf20Sopenharmony_ci return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), 9568c2ecf20Sopenharmony_ci vhost_avail_event(vq)); 9578c2ecf20Sopenharmony_ci} 9588c2ecf20Sopenharmony_ci 9598c2ecf20Sopenharmony_cistatic inline int vhost_put_used(struct vhost_virtqueue *vq, 9608c2ecf20Sopenharmony_ci struct vring_used_elem *head, int idx, 9618c2ecf20Sopenharmony_ci int count) 9628c2ecf20Sopenharmony_ci{ 9638c2ecf20Sopenharmony_ci return vhost_copy_to_user(vq, vq->used->ring + idx, head, 9648c2ecf20Sopenharmony_ci count * sizeof(*head)); 9658c2ecf20Sopenharmony_ci} 9668c2ecf20Sopenharmony_ci 9678c2ecf20Sopenharmony_cistatic inline int vhost_put_used_flags(struct vhost_virtqueue *vq) 9688c2ecf20Sopenharmony_ci 9698c2ecf20Sopenharmony_ci{ 9708c2ecf20Sopenharmony_ci return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), 9718c2ecf20Sopenharmony_ci &vq->used->flags); 9728c2ecf20Sopenharmony_ci} 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_cistatic inline int vhost_put_used_idx(struct vhost_virtqueue *vq) 9758c2ecf20Sopenharmony_ci 9768c2ecf20Sopenharmony_ci{ 9778c2ecf20Sopenharmony_ci return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), 9788c2ecf20Sopenharmony_ci &vq->used->idx); 9798c2ecf20Sopenharmony_ci} 9808c2ecf20Sopenharmony_ci 9818c2ecf20Sopenharmony_ci#define vhost_get_user(vq, x, ptr, type) \ 9828c2ecf20Sopenharmony_ci({ \ 9838c2ecf20Sopenharmony_ci int ret; \ 9848c2ecf20Sopenharmony_ci if (!vq->iotlb) { \ 9858c2ecf20Sopenharmony_ci ret = __get_user(x, ptr); \ 9868c2ecf20Sopenharmony_ci } else { \ 9878c2ecf20Sopenharmony_ci __typeof__(ptr) from = \ 9888c2ecf20Sopenharmony_ci (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ 9898c2ecf20Sopenharmony_ci sizeof(*ptr), \ 9908c2ecf20Sopenharmony_ci type); \ 9918c2ecf20Sopenharmony_ci if (from != NULL) \ 9928c2ecf20Sopenharmony_ci ret = __get_user(x, from); \ 9938c2ecf20Sopenharmony_ci else \ 9948c2ecf20Sopenharmony_ci ret = -EFAULT; \ 9958c2ecf20Sopenharmony_ci } \ 9968c2ecf20Sopenharmony_ci ret; \ 9978c2ecf20Sopenharmony_ci}) 9988c2ecf20Sopenharmony_ci 9998c2ecf20Sopenharmony_ci#define vhost_get_avail(vq, x, ptr) \ 10008c2ecf20Sopenharmony_ci vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL) 10018c2ecf20Sopenharmony_ci 10028c2ecf20Sopenharmony_ci#define vhost_get_used(vq, x, ptr) \ 10038c2ecf20Sopenharmony_ci vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) 10048c2ecf20Sopenharmony_ci 10058c2ecf20Sopenharmony_cistatic void vhost_dev_lock_vqs(struct vhost_dev *d) 10068c2ecf20Sopenharmony_ci{ 10078c2ecf20Sopenharmony_ci int i = 0; 10088c2ecf20Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) 10098c2ecf20Sopenharmony_ci mutex_lock_nested(&d->vqs[i]->mutex, i); 10108c2ecf20Sopenharmony_ci} 10118c2ecf20Sopenharmony_ci 10128c2ecf20Sopenharmony_cistatic void vhost_dev_unlock_vqs(struct vhost_dev *d) 10138c2ecf20Sopenharmony_ci{ 10148c2ecf20Sopenharmony_ci int i = 0; 10158c2ecf20Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) 10168c2ecf20Sopenharmony_ci mutex_unlock(&d->vqs[i]->mutex); 10178c2ecf20Sopenharmony_ci} 10188c2ecf20Sopenharmony_ci 10198c2ecf20Sopenharmony_cistatic inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, 10208c2ecf20Sopenharmony_ci __virtio16 *idx) 10218c2ecf20Sopenharmony_ci{ 10228c2ecf20Sopenharmony_ci return vhost_get_avail(vq, *idx, &vq->avail->idx); 10238c2ecf20Sopenharmony_ci} 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_cistatic inline int vhost_get_avail_head(struct vhost_virtqueue *vq, 10268c2ecf20Sopenharmony_ci __virtio16 *head, int idx) 10278c2ecf20Sopenharmony_ci{ 10288c2ecf20Sopenharmony_ci return vhost_get_avail(vq, *head, 10298c2ecf20Sopenharmony_ci &vq->avail->ring[idx & (vq->num - 1)]); 10308c2ecf20Sopenharmony_ci} 10318c2ecf20Sopenharmony_ci 10328c2ecf20Sopenharmony_cistatic inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, 10338c2ecf20Sopenharmony_ci __virtio16 *flags) 10348c2ecf20Sopenharmony_ci{ 10358c2ecf20Sopenharmony_ci return vhost_get_avail(vq, *flags, &vq->avail->flags); 10368c2ecf20Sopenharmony_ci} 10378c2ecf20Sopenharmony_ci 10388c2ecf20Sopenharmony_cistatic inline int vhost_get_used_event(struct vhost_virtqueue *vq, 10398c2ecf20Sopenharmony_ci __virtio16 *event) 10408c2ecf20Sopenharmony_ci{ 10418c2ecf20Sopenharmony_ci return vhost_get_avail(vq, *event, vhost_used_event(vq)); 10428c2ecf20Sopenharmony_ci} 10438c2ecf20Sopenharmony_ci 10448c2ecf20Sopenharmony_cistatic inline int vhost_get_used_idx(struct vhost_virtqueue *vq, 10458c2ecf20Sopenharmony_ci __virtio16 *idx) 10468c2ecf20Sopenharmony_ci{ 10478c2ecf20Sopenharmony_ci return vhost_get_used(vq, *idx, &vq->used->idx); 10488c2ecf20Sopenharmony_ci} 10498c2ecf20Sopenharmony_ci 10508c2ecf20Sopenharmony_cistatic inline int vhost_get_desc(struct vhost_virtqueue *vq, 10518c2ecf20Sopenharmony_ci struct vring_desc *desc, int idx) 10528c2ecf20Sopenharmony_ci{ 10538c2ecf20Sopenharmony_ci return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); 10548c2ecf20Sopenharmony_ci} 10558c2ecf20Sopenharmony_ci 10568c2ecf20Sopenharmony_cistatic void vhost_iotlb_notify_vq(struct vhost_dev *d, 10578c2ecf20Sopenharmony_ci struct vhost_iotlb_msg *msg) 10588c2ecf20Sopenharmony_ci{ 10598c2ecf20Sopenharmony_ci struct vhost_msg_node *node, *n; 10608c2ecf20Sopenharmony_ci 10618c2ecf20Sopenharmony_ci spin_lock(&d->iotlb_lock); 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci list_for_each_entry_safe(node, n, &d->pending_list, node) { 10648c2ecf20Sopenharmony_ci struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; 10658c2ecf20Sopenharmony_ci if (msg->iova <= vq_msg->iova && 10668c2ecf20Sopenharmony_ci msg->iova + msg->size - 1 >= vq_msg->iova && 10678c2ecf20Sopenharmony_ci vq_msg->type == VHOST_IOTLB_MISS) { 10688c2ecf20Sopenharmony_ci vhost_poll_queue(&node->vq->poll); 10698c2ecf20Sopenharmony_ci list_del(&node->node); 10708c2ecf20Sopenharmony_ci kfree(node); 10718c2ecf20Sopenharmony_ci } 10728c2ecf20Sopenharmony_ci } 10738c2ecf20Sopenharmony_ci 10748c2ecf20Sopenharmony_ci spin_unlock(&d->iotlb_lock); 10758c2ecf20Sopenharmony_ci} 10768c2ecf20Sopenharmony_ci 10778c2ecf20Sopenharmony_cistatic bool umem_access_ok(u64 uaddr, u64 size, int access) 10788c2ecf20Sopenharmony_ci{ 10798c2ecf20Sopenharmony_ci unsigned long a = uaddr; 10808c2ecf20Sopenharmony_ci 10818c2ecf20Sopenharmony_ci /* Make sure 64 bit math will not overflow. */ 10828c2ecf20Sopenharmony_ci if (vhost_overflow(uaddr, size)) 10838c2ecf20Sopenharmony_ci return false; 10848c2ecf20Sopenharmony_ci 10858c2ecf20Sopenharmony_ci if ((access & VHOST_ACCESS_RO) && 10868c2ecf20Sopenharmony_ci !access_ok((void __user *)a, size)) 10878c2ecf20Sopenharmony_ci return false; 10888c2ecf20Sopenharmony_ci if ((access & VHOST_ACCESS_WO) && 10898c2ecf20Sopenharmony_ci !access_ok((void __user *)a, size)) 10908c2ecf20Sopenharmony_ci return false; 10918c2ecf20Sopenharmony_ci return true; 10928c2ecf20Sopenharmony_ci} 10938c2ecf20Sopenharmony_ci 10948c2ecf20Sopenharmony_cistatic int vhost_process_iotlb_msg(struct vhost_dev *dev, 10958c2ecf20Sopenharmony_ci struct vhost_iotlb_msg *msg) 10968c2ecf20Sopenharmony_ci{ 10978c2ecf20Sopenharmony_ci int ret = 0; 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_ci mutex_lock(&dev->mutex); 11008c2ecf20Sopenharmony_ci vhost_dev_lock_vqs(dev); 11018c2ecf20Sopenharmony_ci switch (msg->type) { 11028c2ecf20Sopenharmony_ci case VHOST_IOTLB_UPDATE: 11038c2ecf20Sopenharmony_ci if (!dev->iotlb) { 11048c2ecf20Sopenharmony_ci ret = -EFAULT; 11058c2ecf20Sopenharmony_ci break; 11068c2ecf20Sopenharmony_ci } 11078c2ecf20Sopenharmony_ci if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) { 11088c2ecf20Sopenharmony_ci ret = -EFAULT; 11098c2ecf20Sopenharmony_ci break; 11108c2ecf20Sopenharmony_ci } 11118c2ecf20Sopenharmony_ci vhost_vq_meta_reset(dev); 11128c2ecf20Sopenharmony_ci if (vhost_iotlb_add_range(dev->iotlb, msg->iova, 11138c2ecf20Sopenharmony_ci msg->iova + msg->size - 1, 11148c2ecf20Sopenharmony_ci msg->uaddr, msg->perm)) { 11158c2ecf20Sopenharmony_ci ret = -ENOMEM; 11168c2ecf20Sopenharmony_ci break; 11178c2ecf20Sopenharmony_ci } 11188c2ecf20Sopenharmony_ci vhost_iotlb_notify_vq(dev, msg); 11198c2ecf20Sopenharmony_ci break; 11208c2ecf20Sopenharmony_ci case VHOST_IOTLB_INVALIDATE: 11218c2ecf20Sopenharmony_ci if (!dev->iotlb) { 11228c2ecf20Sopenharmony_ci ret = -EFAULT; 11238c2ecf20Sopenharmony_ci break; 11248c2ecf20Sopenharmony_ci } 11258c2ecf20Sopenharmony_ci vhost_vq_meta_reset(dev); 11268c2ecf20Sopenharmony_ci vhost_iotlb_del_range(dev->iotlb, msg->iova, 11278c2ecf20Sopenharmony_ci msg->iova + msg->size - 1); 11288c2ecf20Sopenharmony_ci break; 11298c2ecf20Sopenharmony_ci default: 11308c2ecf20Sopenharmony_ci ret = -EINVAL; 11318c2ecf20Sopenharmony_ci break; 11328c2ecf20Sopenharmony_ci } 11338c2ecf20Sopenharmony_ci 11348c2ecf20Sopenharmony_ci vhost_dev_unlock_vqs(dev); 11358c2ecf20Sopenharmony_ci mutex_unlock(&dev->mutex); 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci return ret; 11388c2ecf20Sopenharmony_ci} 11398c2ecf20Sopenharmony_cissize_t vhost_chr_write_iter(struct vhost_dev *dev, 11408c2ecf20Sopenharmony_ci struct iov_iter *from) 11418c2ecf20Sopenharmony_ci{ 11428c2ecf20Sopenharmony_ci struct vhost_iotlb_msg msg; 11438c2ecf20Sopenharmony_ci size_t offset; 11448c2ecf20Sopenharmony_ci int type, ret; 11458c2ecf20Sopenharmony_ci 11468c2ecf20Sopenharmony_ci ret = copy_from_iter(&type, sizeof(type), from); 11478c2ecf20Sopenharmony_ci if (ret != sizeof(type)) { 11488c2ecf20Sopenharmony_ci ret = -EINVAL; 11498c2ecf20Sopenharmony_ci goto done; 11508c2ecf20Sopenharmony_ci } 11518c2ecf20Sopenharmony_ci 11528c2ecf20Sopenharmony_ci switch (type) { 11538c2ecf20Sopenharmony_ci case VHOST_IOTLB_MSG: 11548c2ecf20Sopenharmony_ci /* There maybe a hole after type for V1 message type, 11558c2ecf20Sopenharmony_ci * so skip it here. 11568c2ecf20Sopenharmony_ci */ 11578c2ecf20Sopenharmony_ci offset = offsetof(struct vhost_msg, iotlb) - sizeof(int); 11588c2ecf20Sopenharmony_ci break; 11598c2ecf20Sopenharmony_ci case VHOST_IOTLB_MSG_V2: 11608c2ecf20Sopenharmony_ci offset = sizeof(__u32); 11618c2ecf20Sopenharmony_ci break; 11628c2ecf20Sopenharmony_ci default: 11638c2ecf20Sopenharmony_ci ret = -EINVAL; 11648c2ecf20Sopenharmony_ci goto done; 11658c2ecf20Sopenharmony_ci } 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci iov_iter_advance(from, offset); 11688c2ecf20Sopenharmony_ci ret = copy_from_iter(&msg, sizeof(msg), from); 11698c2ecf20Sopenharmony_ci if (ret != sizeof(msg)) { 11708c2ecf20Sopenharmony_ci ret = -EINVAL; 11718c2ecf20Sopenharmony_ci goto done; 11728c2ecf20Sopenharmony_ci } 11738c2ecf20Sopenharmony_ci 11748c2ecf20Sopenharmony_ci if (dev->msg_handler) 11758c2ecf20Sopenharmony_ci ret = dev->msg_handler(dev, &msg); 11768c2ecf20Sopenharmony_ci else 11778c2ecf20Sopenharmony_ci ret = vhost_process_iotlb_msg(dev, &msg); 11788c2ecf20Sopenharmony_ci if (ret) { 11798c2ecf20Sopenharmony_ci ret = -EFAULT; 11808c2ecf20Sopenharmony_ci goto done; 11818c2ecf20Sopenharmony_ci } 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) : 11848c2ecf20Sopenharmony_ci sizeof(struct vhost_msg_v2); 11858c2ecf20Sopenharmony_cidone: 11868c2ecf20Sopenharmony_ci return ret; 11878c2ecf20Sopenharmony_ci} 11888c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vhost_chr_write_iter); 11898c2ecf20Sopenharmony_ci 11908c2ecf20Sopenharmony_ci__poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev, 11918c2ecf20Sopenharmony_ci poll_table *wait) 11928c2ecf20Sopenharmony_ci{ 11938c2ecf20Sopenharmony_ci __poll_t mask = 0; 11948c2ecf20Sopenharmony_ci 11958c2ecf20Sopenharmony_ci poll_wait(file, &dev->wait, wait); 11968c2ecf20Sopenharmony_ci 11978c2ecf20Sopenharmony_ci if (!list_empty(&dev->read_list)) 11988c2ecf20Sopenharmony_ci mask |= EPOLLIN | EPOLLRDNORM; 11998c2ecf20Sopenharmony_ci 12008c2ecf20Sopenharmony_ci return mask; 12018c2ecf20Sopenharmony_ci} 12028c2ecf20Sopenharmony_ciEXPORT_SYMBOL(vhost_chr_poll); 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_cissize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, 12058c2ecf20Sopenharmony_ci int noblock) 12068c2ecf20Sopenharmony_ci{ 12078c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 12088c2ecf20Sopenharmony_ci struct vhost_msg_node *node; 12098c2ecf20Sopenharmony_ci ssize_t ret = 0; 12108c2ecf20Sopenharmony_ci unsigned size = sizeof(struct vhost_msg); 12118c2ecf20Sopenharmony_ci 12128c2ecf20Sopenharmony_ci if (iov_iter_count(to) < size) 12138c2ecf20Sopenharmony_ci return 0; 12148c2ecf20Sopenharmony_ci 12158c2ecf20Sopenharmony_ci while (1) { 12168c2ecf20Sopenharmony_ci if (!noblock) 12178c2ecf20Sopenharmony_ci prepare_to_wait(&dev->wait, &wait, 12188c2ecf20Sopenharmony_ci TASK_INTERRUPTIBLE); 12198c2ecf20Sopenharmony_ci 12208c2ecf20Sopenharmony_ci node = vhost_dequeue_msg(dev, &dev->read_list); 12218c2ecf20Sopenharmony_ci if (node) 12228c2ecf20Sopenharmony_ci break; 12238c2ecf20Sopenharmony_ci if (noblock) { 12248c2ecf20Sopenharmony_ci ret = -EAGAIN; 12258c2ecf20Sopenharmony_ci break; 12268c2ecf20Sopenharmony_ci } 12278c2ecf20Sopenharmony_ci if (signal_pending(current)) { 12288c2ecf20Sopenharmony_ci ret = -ERESTARTSYS; 12298c2ecf20Sopenharmony_ci break; 12308c2ecf20Sopenharmony_ci } 12318c2ecf20Sopenharmony_ci if (!dev->iotlb) { 12328c2ecf20Sopenharmony_ci ret = -EBADFD; 12338c2ecf20Sopenharmony_ci break; 12348c2ecf20Sopenharmony_ci } 12358c2ecf20Sopenharmony_ci 12368c2ecf20Sopenharmony_ci schedule(); 12378c2ecf20Sopenharmony_ci } 12388c2ecf20Sopenharmony_ci 12398c2ecf20Sopenharmony_ci if (!noblock) 12408c2ecf20Sopenharmony_ci finish_wait(&dev->wait, &wait); 12418c2ecf20Sopenharmony_ci 12428c2ecf20Sopenharmony_ci if (node) { 12438c2ecf20Sopenharmony_ci struct vhost_iotlb_msg *msg; 12448c2ecf20Sopenharmony_ci void *start = &node->msg; 12458c2ecf20Sopenharmony_ci 12468c2ecf20Sopenharmony_ci switch (node->msg.type) { 12478c2ecf20Sopenharmony_ci case VHOST_IOTLB_MSG: 12488c2ecf20Sopenharmony_ci size = sizeof(node->msg); 12498c2ecf20Sopenharmony_ci msg = &node->msg.iotlb; 12508c2ecf20Sopenharmony_ci break; 12518c2ecf20Sopenharmony_ci case VHOST_IOTLB_MSG_V2: 12528c2ecf20Sopenharmony_ci size = sizeof(node->msg_v2); 12538c2ecf20Sopenharmony_ci msg = &node->msg_v2.iotlb; 12548c2ecf20Sopenharmony_ci break; 12558c2ecf20Sopenharmony_ci default: 12568c2ecf20Sopenharmony_ci BUG(); 12578c2ecf20Sopenharmony_ci break; 12588c2ecf20Sopenharmony_ci } 12598c2ecf20Sopenharmony_ci 12608c2ecf20Sopenharmony_ci ret = copy_to_iter(start, size, to); 12618c2ecf20Sopenharmony_ci if (ret != size || msg->type != VHOST_IOTLB_MISS) { 12628c2ecf20Sopenharmony_ci kfree(node); 12638c2ecf20Sopenharmony_ci return ret; 12648c2ecf20Sopenharmony_ci } 12658c2ecf20Sopenharmony_ci vhost_enqueue_msg(dev, &dev->pending_list, node); 12668c2ecf20Sopenharmony_ci } 12678c2ecf20Sopenharmony_ci 12688c2ecf20Sopenharmony_ci return ret; 12698c2ecf20Sopenharmony_ci} 12708c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_chr_read_iter); 12718c2ecf20Sopenharmony_ci 12728c2ecf20Sopenharmony_cistatic int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) 12738c2ecf20Sopenharmony_ci{ 12748c2ecf20Sopenharmony_ci struct vhost_dev *dev = vq->dev; 12758c2ecf20Sopenharmony_ci struct vhost_msg_node *node; 12768c2ecf20Sopenharmony_ci struct vhost_iotlb_msg *msg; 12778c2ecf20Sopenharmony_ci bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2); 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_ci node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG); 12808c2ecf20Sopenharmony_ci if (!node) 12818c2ecf20Sopenharmony_ci return -ENOMEM; 12828c2ecf20Sopenharmony_ci 12838c2ecf20Sopenharmony_ci if (v2) { 12848c2ecf20Sopenharmony_ci node->msg_v2.type = VHOST_IOTLB_MSG_V2; 12858c2ecf20Sopenharmony_ci msg = &node->msg_v2.iotlb; 12868c2ecf20Sopenharmony_ci } else { 12878c2ecf20Sopenharmony_ci msg = &node->msg.iotlb; 12888c2ecf20Sopenharmony_ci } 12898c2ecf20Sopenharmony_ci 12908c2ecf20Sopenharmony_ci msg->type = VHOST_IOTLB_MISS; 12918c2ecf20Sopenharmony_ci msg->iova = iova; 12928c2ecf20Sopenharmony_ci msg->perm = access; 12938c2ecf20Sopenharmony_ci 12948c2ecf20Sopenharmony_ci vhost_enqueue_msg(dev, &dev->read_list, node); 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_ci return 0; 12978c2ecf20Sopenharmony_ci} 12988c2ecf20Sopenharmony_ci 12998c2ecf20Sopenharmony_cistatic bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, 13008c2ecf20Sopenharmony_ci vring_desc_t __user *desc, 13018c2ecf20Sopenharmony_ci vring_avail_t __user *avail, 13028c2ecf20Sopenharmony_ci vring_used_t __user *used) 13038c2ecf20Sopenharmony_ci 13048c2ecf20Sopenharmony_ci{ 13058c2ecf20Sopenharmony_ci /* If an IOTLB device is present, the vring addresses are 13068c2ecf20Sopenharmony_ci * GIOVAs. Access validation occurs at prefetch time. */ 13078c2ecf20Sopenharmony_ci if (vq->iotlb) 13088c2ecf20Sopenharmony_ci return true; 13098c2ecf20Sopenharmony_ci 13108c2ecf20Sopenharmony_ci return access_ok(desc, vhost_get_desc_size(vq, num)) && 13118c2ecf20Sopenharmony_ci access_ok(avail, vhost_get_avail_size(vq, num)) && 13128c2ecf20Sopenharmony_ci access_ok(used, vhost_get_used_size(vq, num)); 13138c2ecf20Sopenharmony_ci} 13148c2ecf20Sopenharmony_ci 13158c2ecf20Sopenharmony_cistatic void vhost_vq_meta_update(struct vhost_virtqueue *vq, 13168c2ecf20Sopenharmony_ci const struct vhost_iotlb_map *map, 13178c2ecf20Sopenharmony_ci int type) 13188c2ecf20Sopenharmony_ci{ 13198c2ecf20Sopenharmony_ci int access = (type == VHOST_ADDR_USED) ? 13208c2ecf20Sopenharmony_ci VHOST_ACCESS_WO : VHOST_ACCESS_RO; 13218c2ecf20Sopenharmony_ci 13228c2ecf20Sopenharmony_ci if (likely(map->perm & access)) 13238c2ecf20Sopenharmony_ci vq->meta_iotlb[type] = map; 13248c2ecf20Sopenharmony_ci} 13258c2ecf20Sopenharmony_ci 13268c2ecf20Sopenharmony_cistatic bool iotlb_access_ok(struct vhost_virtqueue *vq, 13278c2ecf20Sopenharmony_ci int access, u64 addr, u64 len, int type) 13288c2ecf20Sopenharmony_ci{ 13298c2ecf20Sopenharmony_ci const struct vhost_iotlb_map *map; 13308c2ecf20Sopenharmony_ci struct vhost_iotlb *umem = vq->iotlb; 13318c2ecf20Sopenharmony_ci u64 s = 0, size, orig_addr = addr, last = addr + len - 1; 13328c2ecf20Sopenharmony_ci 13338c2ecf20Sopenharmony_ci if (vhost_vq_meta_fetch(vq, addr, len, type)) 13348c2ecf20Sopenharmony_ci return true; 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ci while (len > s) { 13378c2ecf20Sopenharmony_ci map = vhost_iotlb_itree_first(umem, addr, last); 13388c2ecf20Sopenharmony_ci if (map == NULL || map->start > addr) { 13398c2ecf20Sopenharmony_ci vhost_iotlb_miss(vq, addr, access); 13408c2ecf20Sopenharmony_ci return false; 13418c2ecf20Sopenharmony_ci } else if (!(map->perm & access)) { 13428c2ecf20Sopenharmony_ci /* Report the possible access violation by 13438c2ecf20Sopenharmony_ci * request another translation from userspace. 13448c2ecf20Sopenharmony_ci */ 13458c2ecf20Sopenharmony_ci return false; 13468c2ecf20Sopenharmony_ci } 13478c2ecf20Sopenharmony_ci 13488c2ecf20Sopenharmony_ci size = map->size - addr + map->start; 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci if (orig_addr == addr && size >= len) 13518c2ecf20Sopenharmony_ci vhost_vq_meta_update(vq, map, type); 13528c2ecf20Sopenharmony_ci 13538c2ecf20Sopenharmony_ci s += size; 13548c2ecf20Sopenharmony_ci addr += size; 13558c2ecf20Sopenharmony_ci } 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci return true; 13588c2ecf20Sopenharmony_ci} 13598c2ecf20Sopenharmony_ci 13608c2ecf20Sopenharmony_ciint vq_meta_prefetch(struct vhost_virtqueue *vq) 13618c2ecf20Sopenharmony_ci{ 13628c2ecf20Sopenharmony_ci unsigned int num = vq->num; 13638c2ecf20Sopenharmony_ci 13648c2ecf20Sopenharmony_ci if (!vq->iotlb) 13658c2ecf20Sopenharmony_ci return 1; 13668c2ecf20Sopenharmony_ci 13678c2ecf20Sopenharmony_ci return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc, 13688c2ecf20Sopenharmony_ci vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && 13698c2ecf20Sopenharmony_ci iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail, 13708c2ecf20Sopenharmony_ci vhost_get_avail_size(vq, num), 13718c2ecf20Sopenharmony_ci VHOST_ADDR_AVAIL) && 13728c2ecf20Sopenharmony_ci iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used, 13738c2ecf20Sopenharmony_ci vhost_get_used_size(vq, num), VHOST_ADDR_USED); 13748c2ecf20Sopenharmony_ci} 13758c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vq_meta_prefetch); 13768c2ecf20Sopenharmony_ci 13778c2ecf20Sopenharmony_ci/* Can we log writes? */ 13788c2ecf20Sopenharmony_ci/* Caller should have device mutex but not vq mutex */ 13798c2ecf20Sopenharmony_cibool vhost_log_access_ok(struct vhost_dev *dev) 13808c2ecf20Sopenharmony_ci{ 13818c2ecf20Sopenharmony_ci return memory_access_ok(dev, dev->umem, 1); 13828c2ecf20Sopenharmony_ci} 13838c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_log_access_ok); 13848c2ecf20Sopenharmony_ci 13858c2ecf20Sopenharmony_cistatic bool vq_log_used_access_ok(struct vhost_virtqueue *vq, 13868c2ecf20Sopenharmony_ci void __user *log_base, 13878c2ecf20Sopenharmony_ci bool log_used, 13888c2ecf20Sopenharmony_ci u64 log_addr) 13898c2ecf20Sopenharmony_ci{ 13908c2ecf20Sopenharmony_ci /* If an IOTLB device is present, log_addr is a GIOVA that 13918c2ecf20Sopenharmony_ci * will never be logged by log_used(). */ 13928c2ecf20Sopenharmony_ci if (vq->iotlb) 13938c2ecf20Sopenharmony_ci return true; 13948c2ecf20Sopenharmony_ci 13958c2ecf20Sopenharmony_ci return !log_used || log_access_ok(log_base, log_addr, 13968c2ecf20Sopenharmony_ci vhost_get_used_size(vq, vq->num)); 13978c2ecf20Sopenharmony_ci} 13988c2ecf20Sopenharmony_ci 13998c2ecf20Sopenharmony_ci/* Verify access for write logging. */ 14008c2ecf20Sopenharmony_ci/* Caller should have vq mutex and device mutex */ 14018c2ecf20Sopenharmony_cistatic bool vq_log_access_ok(struct vhost_virtqueue *vq, 14028c2ecf20Sopenharmony_ci void __user *log_base) 14038c2ecf20Sopenharmony_ci{ 14048c2ecf20Sopenharmony_ci return vq_memory_access_ok(log_base, vq->umem, 14058c2ecf20Sopenharmony_ci vhost_has_feature(vq, VHOST_F_LOG_ALL)) && 14068c2ecf20Sopenharmony_ci vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); 14078c2ecf20Sopenharmony_ci} 14088c2ecf20Sopenharmony_ci 14098c2ecf20Sopenharmony_ci/* Can we start vq? */ 14108c2ecf20Sopenharmony_ci/* Caller should have vq mutex and device mutex */ 14118c2ecf20Sopenharmony_cibool vhost_vq_access_ok(struct vhost_virtqueue *vq) 14128c2ecf20Sopenharmony_ci{ 14138c2ecf20Sopenharmony_ci if (!vq_log_access_ok(vq, vq->log_base)) 14148c2ecf20Sopenharmony_ci return false; 14158c2ecf20Sopenharmony_ci 14168c2ecf20Sopenharmony_ci return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); 14178c2ecf20Sopenharmony_ci} 14188c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_access_ok); 14198c2ecf20Sopenharmony_ci 14208c2ecf20Sopenharmony_cistatic long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) 14218c2ecf20Sopenharmony_ci{ 14228c2ecf20Sopenharmony_ci struct vhost_memory mem, *newmem; 14238c2ecf20Sopenharmony_ci struct vhost_memory_region *region; 14248c2ecf20Sopenharmony_ci struct vhost_iotlb *newumem, *oldumem; 14258c2ecf20Sopenharmony_ci unsigned long size = offsetof(struct vhost_memory, regions); 14268c2ecf20Sopenharmony_ci int i; 14278c2ecf20Sopenharmony_ci 14288c2ecf20Sopenharmony_ci if (copy_from_user(&mem, m, size)) 14298c2ecf20Sopenharmony_ci return -EFAULT; 14308c2ecf20Sopenharmony_ci if (mem.padding) 14318c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 14328c2ecf20Sopenharmony_ci if (mem.nregions > max_mem_regions) 14338c2ecf20Sopenharmony_ci return -E2BIG; 14348c2ecf20Sopenharmony_ci newmem = kvzalloc(struct_size(newmem, regions, mem.nregions), 14358c2ecf20Sopenharmony_ci GFP_KERNEL); 14368c2ecf20Sopenharmony_ci if (!newmem) 14378c2ecf20Sopenharmony_ci return -ENOMEM; 14388c2ecf20Sopenharmony_ci 14398c2ecf20Sopenharmony_ci memcpy(newmem, &mem, size); 14408c2ecf20Sopenharmony_ci if (copy_from_user(newmem->regions, m->regions, 14418c2ecf20Sopenharmony_ci flex_array_size(newmem, regions, mem.nregions))) { 14428c2ecf20Sopenharmony_ci kvfree(newmem); 14438c2ecf20Sopenharmony_ci return -EFAULT; 14448c2ecf20Sopenharmony_ci } 14458c2ecf20Sopenharmony_ci 14468c2ecf20Sopenharmony_ci newumem = iotlb_alloc(); 14478c2ecf20Sopenharmony_ci if (!newumem) { 14488c2ecf20Sopenharmony_ci kvfree(newmem); 14498c2ecf20Sopenharmony_ci return -ENOMEM; 14508c2ecf20Sopenharmony_ci } 14518c2ecf20Sopenharmony_ci 14528c2ecf20Sopenharmony_ci for (region = newmem->regions; 14538c2ecf20Sopenharmony_ci region < newmem->regions + mem.nregions; 14548c2ecf20Sopenharmony_ci region++) { 14558c2ecf20Sopenharmony_ci if (vhost_iotlb_add_range(newumem, 14568c2ecf20Sopenharmony_ci region->guest_phys_addr, 14578c2ecf20Sopenharmony_ci region->guest_phys_addr + 14588c2ecf20Sopenharmony_ci region->memory_size - 1, 14598c2ecf20Sopenharmony_ci region->userspace_addr, 14608c2ecf20Sopenharmony_ci VHOST_MAP_RW)) 14618c2ecf20Sopenharmony_ci goto err; 14628c2ecf20Sopenharmony_ci } 14638c2ecf20Sopenharmony_ci 14648c2ecf20Sopenharmony_ci if (!memory_access_ok(d, newumem, 0)) 14658c2ecf20Sopenharmony_ci goto err; 14668c2ecf20Sopenharmony_ci 14678c2ecf20Sopenharmony_ci oldumem = d->umem; 14688c2ecf20Sopenharmony_ci d->umem = newumem; 14698c2ecf20Sopenharmony_ci 14708c2ecf20Sopenharmony_ci /* All memory accesses are done under some VQ mutex. */ 14718c2ecf20Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 14728c2ecf20Sopenharmony_ci mutex_lock(&d->vqs[i]->mutex); 14738c2ecf20Sopenharmony_ci d->vqs[i]->umem = newumem; 14748c2ecf20Sopenharmony_ci mutex_unlock(&d->vqs[i]->mutex); 14758c2ecf20Sopenharmony_ci } 14768c2ecf20Sopenharmony_ci 14778c2ecf20Sopenharmony_ci kvfree(newmem); 14788c2ecf20Sopenharmony_ci vhost_iotlb_free(oldumem); 14798c2ecf20Sopenharmony_ci return 0; 14808c2ecf20Sopenharmony_ci 14818c2ecf20Sopenharmony_cierr: 14828c2ecf20Sopenharmony_ci vhost_iotlb_free(newumem); 14838c2ecf20Sopenharmony_ci kvfree(newmem); 14848c2ecf20Sopenharmony_ci return -EFAULT; 14858c2ecf20Sopenharmony_ci} 14868c2ecf20Sopenharmony_ci 14878c2ecf20Sopenharmony_cistatic long vhost_vring_set_num(struct vhost_dev *d, 14888c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq, 14898c2ecf20Sopenharmony_ci void __user *argp) 14908c2ecf20Sopenharmony_ci{ 14918c2ecf20Sopenharmony_ci struct vhost_vring_state s; 14928c2ecf20Sopenharmony_ci 14938c2ecf20Sopenharmony_ci /* Resizing ring with an active backend? 14948c2ecf20Sopenharmony_ci * You don't want to do that. */ 14958c2ecf20Sopenharmony_ci if (vq->private_data) 14968c2ecf20Sopenharmony_ci return -EBUSY; 14978c2ecf20Sopenharmony_ci 14988c2ecf20Sopenharmony_ci if (copy_from_user(&s, argp, sizeof s)) 14998c2ecf20Sopenharmony_ci return -EFAULT; 15008c2ecf20Sopenharmony_ci 15018c2ecf20Sopenharmony_ci if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) 15028c2ecf20Sopenharmony_ci return -EINVAL; 15038c2ecf20Sopenharmony_ci vq->num = s.num; 15048c2ecf20Sopenharmony_ci 15058c2ecf20Sopenharmony_ci return 0; 15068c2ecf20Sopenharmony_ci} 15078c2ecf20Sopenharmony_ci 15088c2ecf20Sopenharmony_cistatic long vhost_vring_set_addr(struct vhost_dev *d, 15098c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq, 15108c2ecf20Sopenharmony_ci void __user *argp) 15118c2ecf20Sopenharmony_ci{ 15128c2ecf20Sopenharmony_ci struct vhost_vring_addr a; 15138c2ecf20Sopenharmony_ci 15148c2ecf20Sopenharmony_ci if (copy_from_user(&a, argp, sizeof a)) 15158c2ecf20Sopenharmony_ci return -EFAULT; 15168c2ecf20Sopenharmony_ci if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) 15178c2ecf20Sopenharmony_ci return -EOPNOTSUPP; 15188c2ecf20Sopenharmony_ci 15198c2ecf20Sopenharmony_ci /* For 32bit, verify that the top 32bits of the user 15208c2ecf20Sopenharmony_ci data are set to zero. */ 15218c2ecf20Sopenharmony_ci if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || 15228c2ecf20Sopenharmony_ci (u64)(unsigned long)a.used_user_addr != a.used_user_addr || 15238c2ecf20Sopenharmony_ci (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) 15248c2ecf20Sopenharmony_ci return -EFAULT; 15258c2ecf20Sopenharmony_ci 15268c2ecf20Sopenharmony_ci /* Make sure it's safe to cast pointers to vring types. */ 15278c2ecf20Sopenharmony_ci BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); 15288c2ecf20Sopenharmony_ci BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); 15298c2ecf20Sopenharmony_ci if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || 15308c2ecf20Sopenharmony_ci (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || 15318c2ecf20Sopenharmony_ci (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) 15328c2ecf20Sopenharmony_ci return -EINVAL; 15338c2ecf20Sopenharmony_ci 15348c2ecf20Sopenharmony_ci /* We only verify access here if backend is configured. 15358c2ecf20Sopenharmony_ci * If it is not, we don't as size might not have been setup. 15368c2ecf20Sopenharmony_ci * We will verify when backend is configured. */ 15378c2ecf20Sopenharmony_ci if (vq->private_data) { 15388c2ecf20Sopenharmony_ci if (!vq_access_ok(vq, vq->num, 15398c2ecf20Sopenharmony_ci (void __user *)(unsigned long)a.desc_user_addr, 15408c2ecf20Sopenharmony_ci (void __user *)(unsigned long)a.avail_user_addr, 15418c2ecf20Sopenharmony_ci (void __user *)(unsigned long)a.used_user_addr)) 15428c2ecf20Sopenharmony_ci return -EINVAL; 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci /* Also validate log access for used ring if enabled. */ 15458c2ecf20Sopenharmony_ci if (!vq_log_used_access_ok(vq, vq->log_base, 15468c2ecf20Sopenharmony_ci a.flags & (0x1 << VHOST_VRING_F_LOG), 15478c2ecf20Sopenharmony_ci a.log_guest_addr)) 15488c2ecf20Sopenharmony_ci return -EINVAL; 15498c2ecf20Sopenharmony_ci } 15508c2ecf20Sopenharmony_ci 15518c2ecf20Sopenharmony_ci vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); 15528c2ecf20Sopenharmony_ci vq->desc = (void __user *)(unsigned long)a.desc_user_addr; 15538c2ecf20Sopenharmony_ci vq->avail = (void __user *)(unsigned long)a.avail_user_addr; 15548c2ecf20Sopenharmony_ci vq->log_addr = a.log_guest_addr; 15558c2ecf20Sopenharmony_ci vq->used = (void __user *)(unsigned long)a.used_user_addr; 15568c2ecf20Sopenharmony_ci 15578c2ecf20Sopenharmony_ci return 0; 15588c2ecf20Sopenharmony_ci} 15598c2ecf20Sopenharmony_ci 15608c2ecf20Sopenharmony_cistatic long vhost_vring_set_num_addr(struct vhost_dev *d, 15618c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq, 15628c2ecf20Sopenharmony_ci unsigned int ioctl, 15638c2ecf20Sopenharmony_ci void __user *argp) 15648c2ecf20Sopenharmony_ci{ 15658c2ecf20Sopenharmony_ci long r; 15668c2ecf20Sopenharmony_ci 15678c2ecf20Sopenharmony_ci mutex_lock(&vq->mutex); 15688c2ecf20Sopenharmony_ci 15698c2ecf20Sopenharmony_ci switch (ioctl) { 15708c2ecf20Sopenharmony_ci case VHOST_SET_VRING_NUM: 15718c2ecf20Sopenharmony_ci r = vhost_vring_set_num(d, vq, argp); 15728c2ecf20Sopenharmony_ci break; 15738c2ecf20Sopenharmony_ci case VHOST_SET_VRING_ADDR: 15748c2ecf20Sopenharmony_ci r = vhost_vring_set_addr(d, vq, argp); 15758c2ecf20Sopenharmony_ci break; 15768c2ecf20Sopenharmony_ci default: 15778c2ecf20Sopenharmony_ci BUG(); 15788c2ecf20Sopenharmony_ci } 15798c2ecf20Sopenharmony_ci 15808c2ecf20Sopenharmony_ci mutex_unlock(&vq->mutex); 15818c2ecf20Sopenharmony_ci 15828c2ecf20Sopenharmony_ci return r; 15838c2ecf20Sopenharmony_ci} 15848c2ecf20Sopenharmony_cilong vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) 15858c2ecf20Sopenharmony_ci{ 15868c2ecf20Sopenharmony_ci struct file *eventfp, *filep = NULL; 15878c2ecf20Sopenharmony_ci bool pollstart = false, pollstop = false; 15888c2ecf20Sopenharmony_ci struct eventfd_ctx *ctx = NULL; 15898c2ecf20Sopenharmony_ci u32 __user *idxp = argp; 15908c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq; 15918c2ecf20Sopenharmony_ci struct vhost_vring_state s; 15928c2ecf20Sopenharmony_ci struct vhost_vring_file f; 15938c2ecf20Sopenharmony_ci u32 idx; 15948c2ecf20Sopenharmony_ci long r; 15958c2ecf20Sopenharmony_ci 15968c2ecf20Sopenharmony_ci r = get_user(idx, idxp); 15978c2ecf20Sopenharmony_ci if (r < 0) 15988c2ecf20Sopenharmony_ci return r; 15998c2ecf20Sopenharmony_ci if (idx >= d->nvqs) 16008c2ecf20Sopenharmony_ci return -ENOBUFS; 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci idx = array_index_nospec(idx, d->nvqs); 16038c2ecf20Sopenharmony_ci vq = d->vqs[idx]; 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_ci if (ioctl == VHOST_SET_VRING_NUM || 16068c2ecf20Sopenharmony_ci ioctl == VHOST_SET_VRING_ADDR) { 16078c2ecf20Sopenharmony_ci return vhost_vring_set_num_addr(d, vq, ioctl, argp); 16088c2ecf20Sopenharmony_ci } 16098c2ecf20Sopenharmony_ci 16108c2ecf20Sopenharmony_ci mutex_lock(&vq->mutex); 16118c2ecf20Sopenharmony_ci 16128c2ecf20Sopenharmony_ci switch (ioctl) { 16138c2ecf20Sopenharmony_ci case VHOST_SET_VRING_BASE: 16148c2ecf20Sopenharmony_ci /* Moving base with an active backend? 16158c2ecf20Sopenharmony_ci * You don't want to do that. */ 16168c2ecf20Sopenharmony_ci if (vq->private_data) { 16178c2ecf20Sopenharmony_ci r = -EBUSY; 16188c2ecf20Sopenharmony_ci break; 16198c2ecf20Sopenharmony_ci } 16208c2ecf20Sopenharmony_ci if (copy_from_user(&s, argp, sizeof s)) { 16218c2ecf20Sopenharmony_ci r = -EFAULT; 16228c2ecf20Sopenharmony_ci break; 16238c2ecf20Sopenharmony_ci } 16248c2ecf20Sopenharmony_ci if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 16258c2ecf20Sopenharmony_ci vq->last_avail_idx = s.num & 0xffff; 16268c2ecf20Sopenharmony_ci vq->last_used_idx = (s.num >> 16) & 0xffff; 16278c2ecf20Sopenharmony_ci } else { 16288c2ecf20Sopenharmony_ci if (s.num > 0xffff) { 16298c2ecf20Sopenharmony_ci r = -EINVAL; 16308c2ecf20Sopenharmony_ci break; 16318c2ecf20Sopenharmony_ci } 16328c2ecf20Sopenharmony_ci vq->last_avail_idx = s.num; 16338c2ecf20Sopenharmony_ci } 16348c2ecf20Sopenharmony_ci /* Forget the cached index value. */ 16358c2ecf20Sopenharmony_ci vq->avail_idx = vq->last_avail_idx; 16368c2ecf20Sopenharmony_ci break; 16378c2ecf20Sopenharmony_ci case VHOST_GET_VRING_BASE: 16388c2ecf20Sopenharmony_ci s.index = idx; 16398c2ecf20Sopenharmony_ci if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) 16408c2ecf20Sopenharmony_ci s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); 16418c2ecf20Sopenharmony_ci else 16428c2ecf20Sopenharmony_ci s.num = vq->last_avail_idx; 16438c2ecf20Sopenharmony_ci if (copy_to_user(argp, &s, sizeof s)) 16448c2ecf20Sopenharmony_ci r = -EFAULT; 16458c2ecf20Sopenharmony_ci break; 16468c2ecf20Sopenharmony_ci case VHOST_SET_VRING_KICK: 16478c2ecf20Sopenharmony_ci if (copy_from_user(&f, argp, sizeof f)) { 16488c2ecf20Sopenharmony_ci r = -EFAULT; 16498c2ecf20Sopenharmony_ci break; 16508c2ecf20Sopenharmony_ci } 16518c2ecf20Sopenharmony_ci eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); 16528c2ecf20Sopenharmony_ci if (IS_ERR(eventfp)) { 16538c2ecf20Sopenharmony_ci r = PTR_ERR(eventfp); 16548c2ecf20Sopenharmony_ci break; 16558c2ecf20Sopenharmony_ci } 16568c2ecf20Sopenharmony_ci if (eventfp != vq->kick) { 16578c2ecf20Sopenharmony_ci pollstop = (filep = vq->kick) != NULL; 16588c2ecf20Sopenharmony_ci pollstart = (vq->kick = eventfp) != NULL; 16598c2ecf20Sopenharmony_ci } else 16608c2ecf20Sopenharmony_ci filep = eventfp; 16618c2ecf20Sopenharmony_ci break; 16628c2ecf20Sopenharmony_ci case VHOST_SET_VRING_CALL: 16638c2ecf20Sopenharmony_ci if (copy_from_user(&f, argp, sizeof f)) { 16648c2ecf20Sopenharmony_ci r = -EFAULT; 16658c2ecf20Sopenharmony_ci break; 16668c2ecf20Sopenharmony_ci } 16678c2ecf20Sopenharmony_ci ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); 16688c2ecf20Sopenharmony_ci if (IS_ERR(ctx)) { 16698c2ecf20Sopenharmony_ci r = PTR_ERR(ctx); 16708c2ecf20Sopenharmony_ci break; 16718c2ecf20Sopenharmony_ci } 16728c2ecf20Sopenharmony_ci 16738c2ecf20Sopenharmony_ci swap(ctx, vq->call_ctx.ctx); 16748c2ecf20Sopenharmony_ci break; 16758c2ecf20Sopenharmony_ci case VHOST_SET_VRING_ERR: 16768c2ecf20Sopenharmony_ci if (copy_from_user(&f, argp, sizeof f)) { 16778c2ecf20Sopenharmony_ci r = -EFAULT; 16788c2ecf20Sopenharmony_ci break; 16798c2ecf20Sopenharmony_ci } 16808c2ecf20Sopenharmony_ci ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); 16818c2ecf20Sopenharmony_ci if (IS_ERR(ctx)) { 16828c2ecf20Sopenharmony_ci r = PTR_ERR(ctx); 16838c2ecf20Sopenharmony_ci break; 16848c2ecf20Sopenharmony_ci } 16858c2ecf20Sopenharmony_ci swap(ctx, vq->error_ctx); 16868c2ecf20Sopenharmony_ci break; 16878c2ecf20Sopenharmony_ci case VHOST_SET_VRING_ENDIAN: 16888c2ecf20Sopenharmony_ci r = vhost_set_vring_endian(vq, argp); 16898c2ecf20Sopenharmony_ci break; 16908c2ecf20Sopenharmony_ci case VHOST_GET_VRING_ENDIAN: 16918c2ecf20Sopenharmony_ci r = vhost_get_vring_endian(vq, idx, argp); 16928c2ecf20Sopenharmony_ci break; 16938c2ecf20Sopenharmony_ci case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: 16948c2ecf20Sopenharmony_ci if (copy_from_user(&s, argp, sizeof(s))) { 16958c2ecf20Sopenharmony_ci r = -EFAULT; 16968c2ecf20Sopenharmony_ci break; 16978c2ecf20Sopenharmony_ci } 16988c2ecf20Sopenharmony_ci vq->busyloop_timeout = s.num; 16998c2ecf20Sopenharmony_ci break; 17008c2ecf20Sopenharmony_ci case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: 17018c2ecf20Sopenharmony_ci s.index = idx; 17028c2ecf20Sopenharmony_ci s.num = vq->busyloop_timeout; 17038c2ecf20Sopenharmony_ci if (copy_to_user(argp, &s, sizeof(s))) 17048c2ecf20Sopenharmony_ci r = -EFAULT; 17058c2ecf20Sopenharmony_ci break; 17068c2ecf20Sopenharmony_ci default: 17078c2ecf20Sopenharmony_ci r = -ENOIOCTLCMD; 17088c2ecf20Sopenharmony_ci } 17098c2ecf20Sopenharmony_ci 17108c2ecf20Sopenharmony_ci if (pollstop && vq->handle_kick) 17118c2ecf20Sopenharmony_ci vhost_poll_stop(&vq->poll); 17128c2ecf20Sopenharmony_ci 17138c2ecf20Sopenharmony_ci if (!IS_ERR_OR_NULL(ctx)) 17148c2ecf20Sopenharmony_ci eventfd_ctx_put(ctx); 17158c2ecf20Sopenharmony_ci if (filep) 17168c2ecf20Sopenharmony_ci fput(filep); 17178c2ecf20Sopenharmony_ci 17188c2ecf20Sopenharmony_ci if (pollstart && vq->handle_kick) 17198c2ecf20Sopenharmony_ci r = vhost_poll_start(&vq->poll, vq->kick); 17208c2ecf20Sopenharmony_ci 17218c2ecf20Sopenharmony_ci mutex_unlock(&vq->mutex); 17228c2ecf20Sopenharmony_ci 17238c2ecf20Sopenharmony_ci if (pollstop && vq->handle_kick) 17248c2ecf20Sopenharmony_ci vhost_poll_flush(&vq->poll); 17258c2ecf20Sopenharmony_ci return r; 17268c2ecf20Sopenharmony_ci} 17278c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vring_ioctl); 17288c2ecf20Sopenharmony_ci 17298c2ecf20Sopenharmony_ciint vhost_init_device_iotlb(struct vhost_dev *d, bool enabled) 17308c2ecf20Sopenharmony_ci{ 17318c2ecf20Sopenharmony_ci struct vhost_iotlb *niotlb, *oiotlb; 17328c2ecf20Sopenharmony_ci int i; 17338c2ecf20Sopenharmony_ci 17348c2ecf20Sopenharmony_ci niotlb = iotlb_alloc(); 17358c2ecf20Sopenharmony_ci if (!niotlb) 17368c2ecf20Sopenharmony_ci return -ENOMEM; 17378c2ecf20Sopenharmony_ci 17388c2ecf20Sopenharmony_ci oiotlb = d->iotlb; 17398c2ecf20Sopenharmony_ci d->iotlb = niotlb; 17408c2ecf20Sopenharmony_ci 17418c2ecf20Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 17428c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq = d->vqs[i]; 17438c2ecf20Sopenharmony_ci 17448c2ecf20Sopenharmony_ci mutex_lock(&vq->mutex); 17458c2ecf20Sopenharmony_ci vq->iotlb = niotlb; 17468c2ecf20Sopenharmony_ci __vhost_vq_meta_reset(vq); 17478c2ecf20Sopenharmony_ci mutex_unlock(&vq->mutex); 17488c2ecf20Sopenharmony_ci } 17498c2ecf20Sopenharmony_ci 17508c2ecf20Sopenharmony_ci vhost_iotlb_free(oiotlb); 17518c2ecf20Sopenharmony_ci 17528c2ecf20Sopenharmony_ci return 0; 17538c2ecf20Sopenharmony_ci} 17548c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_init_device_iotlb); 17558c2ecf20Sopenharmony_ci 17568c2ecf20Sopenharmony_ci/* Caller must have device mutex */ 17578c2ecf20Sopenharmony_cilong vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) 17588c2ecf20Sopenharmony_ci{ 17598c2ecf20Sopenharmony_ci struct eventfd_ctx *ctx; 17608c2ecf20Sopenharmony_ci u64 p; 17618c2ecf20Sopenharmony_ci long r; 17628c2ecf20Sopenharmony_ci int i, fd; 17638c2ecf20Sopenharmony_ci 17648c2ecf20Sopenharmony_ci /* If you are not the owner, you can become one */ 17658c2ecf20Sopenharmony_ci if (ioctl == VHOST_SET_OWNER) { 17668c2ecf20Sopenharmony_ci r = vhost_dev_set_owner(d); 17678c2ecf20Sopenharmony_ci goto done; 17688c2ecf20Sopenharmony_ci } 17698c2ecf20Sopenharmony_ci 17708c2ecf20Sopenharmony_ci /* You must be the owner to do anything else */ 17718c2ecf20Sopenharmony_ci r = vhost_dev_check_owner(d); 17728c2ecf20Sopenharmony_ci if (r) 17738c2ecf20Sopenharmony_ci goto done; 17748c2ecf20Sopenharmony_ci 17758c2ecf20Sopenharmony_ci switch (ioctl) { 17768c2ecf20Sopenharmony_ci case VHOST_SET_MEM_TABLE: 17778c2ecf20Sopenharmony_ci r = vhost_set_memory(d, argp); 17788c2ecf20Sopenharmony_ci break; 17798c2ecf20Sopenharmony_ci case VHOST_SET_LOG_BASE: 17808c2ecf20Sopenharmony_ci if (copy_from_user(&p, argp, sizeof p)) { 17818c2ecf20Sopenharmony_ci r = -EFAULT; 17828c2ecf20Sopenharmony_ci break; 17838c2ecf20Sopenharmony_ci } 17848c2ecf20Sopenharmony_ci if ((u64)(unsigned long)p != p) { 17858c2ecf20Sopenharmony_ci r = -EFAULT; 17868c2ecf20Sopenharmony_ci break; 17878c2ecf20Sopenharmony_ci } 17888c2ecf20Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 17898c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq; 17908c2ecf20Sopenharmony_ci void __user *base = (void __user *)(unsigned long)p; 17918c2ecf20Sopenharmony_ci vq = d->vqs[i]; 17928c2ecf20Sopenharmony_ci mutex_lock(&vq->mutex); 17938c2ecf20Sopenharmony_ci /* If ring is inactive, will check when it's enabled. */ 17948c2ecf20Sopenharmony_ci if (vq->private_data && !vq_log_access_ok(vq, base)) 17958c2ecf20Sopenharmony_ci r = -EFAULT; 17968c2ecf20Sopenharmony_ci else 17978c2ecf20Sopenharmony_ci vq->log_base = base; 17988c2ecf20Sopenharmony_ci mutex_unlock(&vq->mutex); 17998c2ecf20Sopenharmony_ci } 18008c2ecf20Sopenharmony_ci break; 18018c2ecf20Sopenharmony_ci case VHOST_SET_LOG_FD: 18028c2ecf20Sopenharmony_ci r = get_user(fd, (int __user *)argp); 18038c2ecf20Sopenharmony_ci if (r < 0) 18048c2ecf20Sopenharmony_ci break; 18058c2ecf20Sopenharmony_ci ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 18068c2ecf20Sopenharmony_ci if (IS_ERR(ctx)) { 18078c2ecf20Sopenharmony_ci r = PTR_ERR(ctx); 18088c2ecf20Sopenharmony_ci break; 18098c2ecf20Sopenharmony_ci } 18108c2ecf20Sopenharmony_ci swap(ctx, d->log_ctx); 18118c2ecf20Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 18128c2ecf20Sopenharmony_ci mutex_lock(&d->vqs[i]->mutex); 18138c2ecf20Sopenharmony_ci d->vqs[i]->log_ctx = d->log_ctx; 18148c2ecf20Sopenharmony_ci mutex_unlock(&d->vqs[i]->mutex); 18158c2ecf20Sopenharmony_ci } 18168c2ecf20Sopenharmony_ci if (ctx) 18178c2ecf20Sopenharmony_ci eventfd_ctx_put(ctx); 18188c2ecf20Sopenharmony_ci break; 18198c2ecf20Sopenharmony_ci default: 18208c2ecf20Sopenharmony_ci r = -ENOIOCTLCMD; 18218c2ecf20Sopenharmony_ci break; 18228c2ecf20Sopenharmony_ci } 18238c2ecf20Sopenharmony_cidone: 18248c2ecf20Sopenharmony_ci return r; 18258c2ecf20Sopenharmony_ci} 18268c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_ioctl); 18278c2ecf20Sopenharmony_ci 18288c2ecf20Sopenharmony_ci/* TODO: This is really inefficient. We need something like get_user() 18298c2ecf20Sopenharmony_ci * (instruction directly accesses the data, with an exception table entry 18308c2ecf20Sopenharmony_ci * returning -EFAULT). See Documentation/x86/exception-tables.rst. 18318c2ecf20Sopenharmony_ci */ 18328c2ecf20Sopenharmony_cistatic int set_bit_to_user(int nr, void __user *addr) 18338c2ecf20Sopenharmony_ci{ 18348c2ecf20Sopenharmony_ci unsigned long log = (unsigned long)addr; 18358c2ecf20Sopenharmony_ci struct page *page; 18368c2ecf20Sopenharmony_ci void *base; 18378c2ecf20Sopenharmony_ci int bit = nr + (log % PAGE_SIZE) * 8; 18388c2ecf20Sopenharmony_ci int r; 18398c2ecf20Sopenharmony_ci 18408c2ecf20Sopenharmony_ci r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page); 18418c2ecf20Sopenharmony_ci if (r < 0) 18428c2ecf20Sopenharmony_ci return r; 18438c2ecf20Sopenharmony_ci BUG_ON(r != 1); 18448c2ecf20Sopenharmony_ci base = kmap_atomic(page); 18458c2ecf20Sopenharmony_ci set_bit(bit, base); 18468c2ecf20Sopenharmony_ci kunmap_atomic(base); 18478c2ecf20Sopenharmony_ci unpin_user_pages_dirty_lock(&page, 1, true); 18488c2ecf20Sopenharmony_ci return 0; 18498c2ecf20Sopenharmony_ci} 18508c2ecf20Sopenharmony_ci 18518c2ecf20Sopenharmony_cistatic int log_write(void __user *log_base, 18528c2ecf20Sopenharmony_ci u64 write_address, u64 write_length) 18538c2ecf20Sopenharmony_ci{ 18548c2ecf20Sopenharmony_ci u64 write_page = write_address / VHOST_PAGE_SIZE; 18558c2ecf20Sopenharmony_ci int r; 18568c2ecf20Sopenharmony_ci 18578c2ecf20Sopenharmony_ci if (!write_length) 18588c2ecf20Sopenharmony_ci return 0; 18598c2ecf20Sopenharmony_ci write_length += write_address % VHOST_PAGE_SIZE; 18608c2ecf20Sopenharmony_ci for (;;) { 18618c2ecf20Sopenharmony_ci u64 base = (u64)(unsigned long)log_base; 18628c2ecf20Sopenharmony_ci u64 log = base + write_page / 8; 18638c2ecf20Sopenharmony_ci int bit = write_page % 8; 18648c2ecf20Sopenharmony_ci if ((u64)(unsigned long)log != log) 18658c2ecf20Sopenharmony_ci return -EFAULT; 18668c2ecf20Sopenharmony_ci r = set_bit_to_user(bit, (void __user *)(unsigned long)log); 18678c2ecf20Sopenharmony_ci if (r < 0) 18688c2ecf20Sopenharmony_ci return r; 18698c2ecf20Sopenharmony_ci if (write_length <= VHOST_PAGE_SIZE) 18708c2ecf20Sopenharmony_ci break; 18718c2ecf20Sopenharmony_ci write_length -= VHOST_PAGE_SIZE; 18728c2ecf20Sopenharmony_ci write_page += 1; 18738c2ecf20Sopenharmony_ci } 18748c2ecf20Sopenharmony_ci return r; 18758c2ecf20Sopenharmony_ci} 18768c2ecf20Sopenharmony_ci 18778c2ecf20Sopenharmony_cistatic int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) 18788c2ecf20Sopenharmony_ci{ 18798c2ecf20Sopenharmony_ci struct vhost_iotlb *umem = vq->umem; 18808c2ecf20Sopenharmony_ci struct vhost_iotlb_map *u; 18818c2ecf20Sopenharmony_ci u64 start, end, l, min; 18828c2ecf20Sopenharmony_ci int r; 18838c2ecf20Sopenharmony_ci bool hit = false; 18848c2ecf20Sopenharmony_ci 18858c2ecf20Sopenharmony_ci while (len) { 18868c2ecf20Sopenharmony_ci min = len; 18878c2ecf20Sopenharmony_ci /* More than one GPAs can be mapped into a single HVA. So 18888c2ecf20Sopenharmony_ci * iterate all possible umems here to be safe. 18898c2ecf20Sopenharmony_ci */ 18908c2ecf20Sopenharmony_ci list_for_each_entry(u, &umem->list, link) { 18918c2ecf20Sopenharmony_ci if (u->addr > hva - 1 + len || 18928c2ecf20Sopenharmony_ci u->addr - 1 + u->size < hva) 18938c2ecf20Sopenharmony_ci continue; 18948c2ecf20Sopenharmony_ci start = max(u->addr, hva); 18958c2ecf20Sopenharmony_ci end = min(u->addr - 1 + u->size, hva - 1 + len); 18968c2ecf20Sopenharmony_ci l = end - start + 1; 18978c2ecf20Sopenharmony_ci r = log_write(vq->log_base, 18988c2ecf20Sopenharmony_ci u->start + start - u->addr, 18998c2ecf20Sopenharmony_ci l); 19008c2ecf20Sopenharmony_ci if (r < 0) 19018c2ecf20Sopenharmony_ci return r; 19028c2ecf20Sopenharmony_ci hit = true; 19038c2ecf20Sopenharmony_ci min = min(l, min); 19048c2ecf20Sopenharmony_ci } 19058c2ecf20Sopenharmony_ci 19068c2ecf20Sopenharmony_ci if (!hit) 19078c2ecf20Sopenharmony_ci return -EFAULT; 19088c2ecf20Sopenharmony_ci 19098c2ecf20Sopenharmony_ci len -= min; 19108c2ecf20Sopenharmony_ci hva += min; 19118c2ecf20Sopenharmony_ci } 19128c2ecf20Sopenharmony_ci 19138c2ecf20Sopenharmony_ci return 0; 19148c2ecf20Sopenharmony_ci} 19158c2ecf20Sopenharmony_ci 19168c2ecf20Sopenharmony_cistatic int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) 19178c2ecf20Sopenharmony_ci{ 19188c2ecf20Sopenharmony_ci struct iovec *iov = vq->log_iov; 19198c2ecf20Sopenharmony_ci int i, ret; 19208c2ecf20Sopenharmony_ci 19218c2ecf20Sopenharmony_ci if (!vq->iotlb) 19228c2ecf20Sopenharmony_ci return log_write(vq->log_base, vq->log_addr + used_offset, len); 19238c2ecf20Sopenharmony_ci 19248c2ecf20Sopenharmony_ci ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, 19258c2ecf20Sopenharmony_ci len, iov, 64, VHOST_ACCESS_WO); 19268c2ecf20Sopenharmony_ci if (ret < 0) 19278c2ecf20Sopenharmony_ci return ret; 19288c2ecf20Sopenharmony_ci 19298c2ecf20Sopenharmony_ci for (i = 0; i < ret; i++) { 19308c2ecf20Sopenharmony_ci ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, 19318c2ecf20Sopenharmony_ci iov[i].iov_len); 19328c2ecf20Sopenharmony_ci if (ret) 19338c2ecf20Sopenharmony_ci return ret; 19348c2ecf20Sopenharmony_ci } 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_ci return 0; 19378c2ecf20Sopenharmony_ci} 19388c2ecf20Sopenharmony_ci 19398c2ecf20Sopenharmony_ciint vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, 19408c2ecf20Sopenharmony_ci unsigned int log_num, u64 len, struct iovec *iov, int count) 19418c2ecf20Sopenharmony_ci{ 19428c2ecf20Sopenharmony_ci int i, r; 19438c2ecf20Sopenharmony_ci 19448c2ecf20Sopenharmony_ci /* Make sure data written is seen before log. */ 19458c2ecf20Sopenharmony_ci smp_wmb(); 19468c2ecf20Sopenharmony_ci 19478c2ecf20Sopenharmony_ci if (vq->iotlb) { 19488c2ecf20Sopenharmony_ci for (i = 0; i < count; i++) { 19498c2ecf20Sopenharmony_ci r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, 19508c2ecf20Sopenharmony_ci iov[i].iov_len); 19518c2ecf20Sopenharmony_ci if (r < 0) 19528c2ecf20Sopenharmony_ci return r; 19538c2ecf20Sopenharmony_ci } 19548c2ecf20Sopenharmony_ci return 0; 19558c2ecf20Sopenharmony_ci } 19568c2ecf20Sopenharmony_ci 19578c2ecf20Sopenharmony_ci for (i = 0; i < log_num; ++i) { 19588c2ecf20Sopenharmony_ci u64 l = min(log[i].len, len); 19598c2ecf20Sopenharmony_ci r = log_write(vq->log_base, log[i].addr, l); 19608c2ecf20Sopenharmony_ci if (r < 0) 19618c2ecf20Sopenharmony_ci return r; 19628c2ecf20Sopenharmony_ci len -= l; 19638c2ecf20Sopenharmony_ci if (!len) { 19648c2ecf20Sopenharmony_ci if (vq->log_ctx) 19658c2ecf20Sopenharmony_ci eventfd_signal(vq->log_ctx, 1); 19668c2ecf20Sopenharmony_ci return 0; 19678c2ecf20Sopenharmony_ci } 19688c2ecf20Sopenharmony_ci } 19698c2ecf20Sopenharmony_ci /* Length written exceeds what we have stored. This is a bug. */ 19708c2ecf20Sopenharmony_ci BUG(); 19718c2ecf20Sopenharmony_ci return 0; 19728c2ecf20Sopenharmony_ci} 19738c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_log_write); 19748c2ecf20Sopenharmony_ci 19758c2ecf20Sopenharmony_cistatic int vhost_update_used_flags(struct vhost_virtqueue *vq) 19768c2ecf20Sopenharmony_ci{ 19778c2ecf20Sopenharmony_ci void __user *used; 19788c2ecf20Sopenharmony_ci if (vhost_put_used_flags(vq)) 19798c2ecf20Sopenharmony_ci return -EFAULT; 19808c2ecf20Sopenharmony_ci if (unlikely(vq->log_used)) { 19818c2ecf20Sopenharmony_ci /* Make sure the flag is seen before log. */ 19828c2ecf20Sopenharmony_ci smp_wmb(); 19838c2ecf20Sopenharmony_ci /* Log used flag write. */ 19848c2ecf20Sopenharmony_ci used = &vq->used->flags; 19858c2ecf20Sopenharmony_ci log_used(vq, (used - (void __user *)vq->used), 19868c2ecf20Sopenharmony_ci sizeof vq->used->flags); 19878c2ecf20Sopenharmony_ci if (vq->log_ctx) 19888c2ecf20Sopenharmony_ci eventfd_signal(vq->log_ctx, 1); 19898c2ecf20Sopenharmony_ci } 19908c2ecf20Sopenharmony_ci return 0; 19918c2ecf20Sopenharmony_ci} 19928c2ecf20Sopenharmony_ci 19938c2ecf20Sopenharmony_cistatic int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) 19948c2ecf20Sopenharmony_ci{ 19958c2ecf20Sopenharmony_ci if (vhost_put_avail_event(vq)) 19968c2ecf20Sopenharmony_ci return -EFAULT; 19978c2ecf20Sopenharmony_ci if (unlikely(vq->log_used)) { 19988c2ecf20Sopenharmony_ci void __user *used; 19998c2ecf20Sopenharmony_ci /* Make sure the event is seen before log. */ 20008c2ecf20Sopenharmony_ci smp_wmb(); 20018c2ecf20Sopenharmony_ci /* Log avail event write */ 20028c2ecf20Sopenharmony_ci used = vhost_avail_event(vq); 20038c2ecf20Sopenharmony_ci log_used(vq, (used - (void __user *)vq->used), 20048c2ecf20Sopenharmony_ci sizeof *vhost_avail_event(vq)); 20058c2ecf20Sopenharmony_ci if (vq->log_ctx) 20068c2ecf20Sopenharmony_ci eventfd_signal(vq->log_ctx, 1); 20078c2ecf20Sopenharmony_ci } 20088c2ecf20Sopenharmony_ci return 0; 20098c2ecf20Sopenharmony_ci} 20108c2ecf20Sopenharmony_ci 20118c2ecf20Sopenharmony_ciint vhost_vq_init_access(struct vhost_virtqueue *vq) 20128c2ecf20Sopenharmony_ci{ 20138c2ecf20Sopenharmony_ci __virtio16 last_used_idx; 20148c2ecf20Sopenharmony_ci int r; 20158c2ecf20Sopenharmony_ci bool is_le = vq->is_le; 20168c2ecf20Sopenharmony_ci 20178c2ecf20Sopenharmony_ci if (!vq->private_data) 20188c2ecf20Sopenharmony_ci return 0; 20198c2ecf20Sopenharmony_ci 20208c2ecf20Sopenharmony_ci vhost_init_is_le(vq); 20218c2ecf20Sopenharmony_ci 20228c2ecf20Sopenharmony_ci r = vhost_update_used_flags(vq); 20238c2ecf20Sopenharmony_ci if (r) 20248c2ecf20Sopenharmony_ci goto err; 20258c2ecf20Sopenharmony_ci vq->signalled_used_valid = false; 20268c2ecf20Sopenharmony_ci if (!vq->iotlb && 20278c2ecf20Sopenharmony_ci !access_ok(&vq->used->idx, sizeof vq->used->idx)) { 20288c2ecf20Sopenharmony_ci r = -EFAULT; 20298c2ecf20Sopenharmony_ci goto err; 20308c2ecf20Sopenharmony_ci } 20318c2ecf20Sopenharmony_ci r = vhost_get_used_idx(vq, &last_used_idx); 20328c2ecf20Sopenharmony_ci if (r) { 20338c2ecf20Sopenharmony_ci vq_err(vq, "Can't access used idx at %p\n", 20348c2ecf20Sopenharmony_ci &vq->used->idx); 20358c2ecf20Sopenharmony_ci goto err; 20368c2ecf20Sopenharmony_ci } 20378c2ecf20Sopenharmony_ci vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); 20388c2ecf20Sopenharmony_ci return 0; 20398c2ecf20Sopenharmony_ci 20408c2ecf20Sopenharmony_cierr: 20418c2ecf20Sopenharmony_ci vq->is_le = is_le; 20428c2ecf20Sopenharmony_ci return r; 20438c2ecf20Sopenharmony_ci} 20448c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_init_access); 20458c2ecf20Sopenharmony_ci 20468c2ecf20Sopenharmony_cistatic int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, 20478c2ecf20Sopenharmony_ci struct iovec iov[], int iov_size, int access) 20488c2ecf20Sopenharmony_ci{ 20498c2ecf20Sopenharmony_ci const struct vhost_iotlb_map *map; 20508c2ecf20Sopenharmony_ci struct vhost_dev *dev = vq->dev; 20518c2ecf20Sopenharmony_ci struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; 20528c2ecf20Sopenharmony_ci struct iovec *_iov; 20538c2ecf20Sopenharmony_ci u64 s = 0, last = addr + len - 1; 20548c2ecf20Sopenharmony_ci int ret = 0; 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_ci while ((u64)len > s) { 20578c2ecf20Sopenharmony_ci u64 size; 20588c2ecf20Sopenharmony_ci if (unlikely(ret >= iov_size)) { 20598c2ecf20Sopenharmony_ci ret = -ENOBUFS; 20608c2ecf20Sopenharmony_ci break; 20618c2ecf20Sopenharmony_ci } 20628c2ecf20Sopenharmony_ci 20638c2ecf20Sopenharmony_ci map = vhost_iotlb_itree_first(umem, addr, last); 20648c2ecf20Sopenharmony_ci if (map == NULL || map->start > addr) { 20658c2ecf20Sopenharmony_ci if (umem != dev->iotlb) { 20668c2ecf20Sopenharmony_ci ret = -EFAULT; 20678c2ecf20Sopenharmony_ci break; 20688c2ecf20Sopenharmony_ci } 20698c2ecf20Sopenharmony_ci ret = -EAGAIN; 20708c2ecf20Sopenharmony_ci break; 20718c2ecf20Sopenharmony_ci } else if (!(map->perm & access)) { 20728c2ecf20Sopenharmony_ci ret = -EPERM; 20738c2ecf20Sopenharmony_ci break; 20748c2ecf20Sopenharmony_ci } 20758c2ecf20Sopenharmony_ci 20768c2ecf20Sopenharmony_ci _iov = iov + ret; 20778c2ecf20Sopenharmony_ci size = map->size - addr + map->start; 20788c2ecf20Sopenharmony_ci _iov->iov_len = min((u64)len - s, size); 20798c2ecf20Sopenharmony_ci _iov->iov_base = (void __user *)(unsigned long) 20808c2ecf20Sopenharmony_ci (map->addr + addr - map->start); 20818c2ecf20Sopenharmony_ci s += size; 20828c2ecf20Sopenharmony_ci addr += size; 20838c2ecf20Sopenharmony_ci ++ret; 20848c2ecf20Sopenharmony_ci } 20858c2ecf20Sopenharmony_ci 20868c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 20878c2ecf20Sopenharmony_ci vhost_iotlb_miss(vq, addr, access); 20888c2ecf20Sopenharmony_ci return ret; 20898c2ecf20Sopenharmony_ci} 20908c2ecf20Sopenharmony_ci 20918c2ecf20Sopenharmony_ci/* Each buffer in the virtqueues is actually a chain of descriptors. This 20928c2ecf20Sopenharmony_ci * function returns the next descriptor in the chain, 20938c2ecf20Sopenharmony_ci * or -1U if we're at the end. */ 20948c2ecf20Sopenharmony_cistatic unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) 20958c2ecf20Sopenharmony_ci{ 20968c2ecf20Sopenharmony_ci unsigned int next; 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_ci /* If this descriptor says it doesn't chain, we're done. */ 20998c2ecf20Sopenharmony_ci if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT))) 21008c2ecf20Sopenharmony_ci return -1U; 21018c2ecf20Sopenharmony_ci 21028c2ecf20Sopenharmony_ci /* Check they're not leading us off end of descriptors. */ 21038c2ecf20Sopenharmony_ci next = vhost16_to_cpu(vq, READ_ONCE(desc->next)); 21048c2ecf20Sopenharmony_ci return next; 21058c2ecf20Sopenharmony_ci} 21068c2ecf20Sopenharmony_ci 21078c2ecf20Sopenharmony_cistatic int get_indirect(struct vhost_virtqueue *vq, 21088c2ecf20Sopenharmony_ci struct iovec iov[], unsigned int iov_size, 21098c2ecf20Sopenharmony_ci unsigned int *out_num, unsigned int *in_num, 21108c2ecf20Sopenharmony_ci struct vhost_log *log, unsigned int *log_num, 21118c2ecf20Sopenharmony_ci struct vring_desc *indirect) 21128c2ecf20Sopenharmony_ci{ 21138c2ecf20Sopenharmony_ci struct vring_desc desc; 21148c2ecf20Sopenharmony_ci unsigned int i = 0, count, found = 0; 21158c2ecf20Sopenharmony_ci u32 len = vhost32_to_cpu(vq, indirect->len); 21168c2ecf20Sopenharmony_ci struct iov_iter from; 21178c2ecf20Sopenharmony_ci int ret, access; 21188c2ecf20Sopenharmony_ci 21198c2ecf20Sopenharmony_ci /* Sanity check */ 21208c2ecf20Sopenharmony_ci if (unlikely(len % sizeof desc)) { 21218c2ecf20Sopenharmony_ci vq_err(vq, "Invalid length in indirect descriptor: " 21228c2ecf20Sopenharmony_ci "len 0x%llx not multiple of 0x%zx\n", 21238c2ecf20Sopenharmony_ci (unsigned long long)len, 21248c2ecf20Sopenharmony_ci sizeof desc); 21258c2ecf20Sopenharmony_ci return -EINVAL; 21268c2ecf20Sopenharmony_ci } 21278c2ecf20Sopenharmony_ci 21288c2ecf20Sopenharmony_ci ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, 21298c2ecf20Sopenharmony_ci UIO_MAXIOV, VHOST_ACCESS_RO); 21308c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) { 21318c2ecf20Sopenharmony_ci if (ret != -EAGAIN) 21328c2ecf20Sopenharmony_ci vq_err(vq, "Translation failure %d in indirect.\n", ret); 21338c2ecf20Sopenharmony_ci return ret; 21348c2ecf20Sopenharmony_ci } 21358c2ecf20Sopenharmony_ci iov_iter_init(&from, READ, vq->indirect, ret, len); 21368c2ecf20Sopenharmony_ci count = len / sizeof desc; 21378c2ecf20Sopenharmony_ci /* Buffers are chained via a 16 bit next field, so 21388c2ecf20Sopenharmony_ci * we can have at most 2^16 of these. */ 21398c2ecf20Sopenharmony_ci if (unlikely(count > USHRT_MAX + 1)) { 21408c2ecf20Sopenharmony_ci vq_err(vq, "Indirect buffer length too big: %d\n", 21418c2ecf20Sopenharmony_ci indirect->len); 21428c2ecf20Sopenharmony_ci return -E2BIG; 21438c2ecf20Sopenharmony_ci } 21448c2ecf20Sopenharmony_ci 21458c2ecf20Sopenharmony_ci do { 21468c2ecf20Sopenharmony_ci unsigned iov_count = *in_num + *out_num; 21478c2ecf20Sopenharmony_ci if (unlikely(++found > count)) { 21488c2ecf20Sopenharmony_ci vq_err(vq, "Loop detected: last one at %u " 21498c2ecf20Sopenharmony_ci "indirect size %u\n", 21508c2ecf20Sopenharmony_ci i, count); 21518c2ecf20Sopenharmony_ci return -EINVAL; 21528c2ecf20Sopenharmony_ci } 21538c2ecf20Sopenharmony_ci if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) { 21548c2ecf20Sopenharmony_ci vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", 21558c2ecf20Sopenharmony_ci i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); 21568c2ecf20Sopenharmony_ci return -EINVAL; 21578c2ecf20Sopenharmony_ci } 21588c2ecf20Sopenharmony_ci if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) { 21598c2ecf20Sopenharmony_ci vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", 21608c2ecf20Sopenharmony_ci i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); 21618c2ecf20Sopenharmony_ci return -EINVAL; 21628c2ecf20Sopenharmony_ci } 21638c2ecf20Sopenharmony_ci 21648c2ecf20Sopenharmony_ci if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) 21658c2ecf20Sopenharmony_ci access = VHOST_ACCESS_WO; 21668c2ecf20Sopenharmony_ci else 21678c2ecf20Sopenharmony_ci access = VHOST_ACCESS_RO; 21688c2ecf20Sopenharmony_ci 21698c2ecf20Sopenharmony_ci ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), 21708c2ecf20Sopenharmony_ci vhost32_to_cpu(vq, desc.len), iov + iov_count, 21718c2ecf20Sopenharmony_ci iov_size - iov_count, access); 21728c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) { 21738c2ecf20Sopenharmony_ci if (ret != -EAGAIN) 21748c2ecf20Sopenharmony_ci vq_err(vq, "Translation failure %d indirect idx %d\n", 21758c2ecf20Sopenharmony_ci ret, i); 21768c2ecf20Sopenharmony_ci return ret; 21778c2ecf20Sopenharmony_ci } 21788c2ecf20Sopenharmony_ci /* If this is an input descriptor, increment that count. */ 21798c2ecf20Sopenharmony_ci if (access == VHOST_ACCESS_WO) { 21808c2ecf20Sopenharmony_ci *in_num += ret; 21818c2ecf20Sopenharmony_ci if (unlikely(log && ret)) { 21828c2ecf20Sopenharmony_ci log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); 21838c2ecf20Sopenharmony_ci log[*log_num].len = vhost32_to_cpu(vq, desc.len); 21848c2ecf20Sopenharmony_ci ++*log_num; 21858c2ecf20Sopenharmony_ci } 21868c2ecf20Sopenharmony_ci } else { 21878c2ecf20Sopenharmony_ci /* If it's an output descriptor, they're all supposed 21888c2ecf20Sopenharmony_ci * to come before any input descriptors. */ 21898c2ecf20Sopenharmony_ci if (unlikely(*in_num)) { 21908c2ecf20Sopenharmony_ci vq_err(vq, "Indirect descriptor " 21918c2ecf20Sopenharmony_ci "has out after in: idx %d\n", i); 21928c2ecf20Sopenharmony_ci return -EINVAL; 21938c2ecf20Sopenharmony_ci } 21948c2ecf20Sopenharmony_ci *out_num += ret; 21958c2ecf20Sopenharmony_ci } 21968c2ecf20Sopenharmony_ci } while ((i = next_desc(vq, &desc)) != -1); 21978c2ecf20Sopenharmony_ci return 0; 21988c2ecf20Sopenharmony_ci} 21998c2ecf20Sopenharmony_ci 22008c2ecf20Sopenharmony_ci/* This looks in the virtqueue and for the first available buffer, and converts 22018c2ecf20Sopenharmony_ci * it to an iovec for convenient access. Since descriptors consist of some 22028c2ecf20Sopenharmony_ci * number of output then some number of input descriptors, it's actually two 22038c2ecf20Sopenharmony_ci * iovecs, but we pack them into one and note how many of each there were. 22048c2ecf20Sopenharmony_ci * 22058c2ecf20Sopenharmony_ci * This function returns the descriptor number found, or vq->num (which is 22068c2ecf20Sopenharmony_ci * never a valid descriptor number) if none was found. A negative code is 22078c2ecf20Sopenharmony_ci * returned on error. */ 22088c2ecf20Sopenharmony_ciint vhost_get_vq_desc(struct vhost_virtqueue *vq, 22098c2ecf20Sopenharmony_ci struct iovec iov[], unsigned int iov_size, 22108c2ecf20Sopenharmony_ci unsigned int *out_num, unsigned int *in_num, 22118c2ecf20Sopenharmony_ci struct vhost_log *log, unsigned int *log_num) 22128c2ecf20Sopenharmony_ci{ 22138c2ecf20Sopenharmony_ci struct vring_desc desc; 22148c2ecf20Sopenharmony_ci unsigned int i, head, found = 0; 22158c2ecf20Sopenharmony_ci u16 last_avail_idx; 22168c2ecf20Sopenharmony_ci __virtio16 avail_idx; 22178c2ecf20Sopenharmony_ci __virtio16 ring_head; 22188c2ecf20Sopenharmony_ci int ret, access; 22198c2ecf20Sopenharmony_ci 22208c2ecf20Sopenharmony_ci /* Check it isn't doing very strange things with descriptor numbers. */ 22218c2ecf20Sopenharmony_ci last_avail_idx = vq->last_avail_idx; 22228c2ecf20Sopenharmony_ci 22238c2ecf20Sopenharmony_ci if (vq->avail_idx == vq->last_avail_idx) { 22248c2ecf20Sopenharmony_ci if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) { 22258c2ecf20Sopenharmony_ci vq_err(vq, "Failed to access avail idx at %p\n", 22268c2ecf20Sopenharmony_ci &vq->avail->idx); 22278c2ecf20Sopenharmony_ci return -EFAULT; 22288c2ecf20Sopenharmony_ci } 22298c2ecf20Sopenharmony_ci vq->avail_idx = vhost16_to_cpu(vq, avail_idx); 22308c2ecf20Sopenharmony_ci 22318c2ecf20Sopenharmony_ci if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { 22328c2ecf20Sopenharmony_ci vq_err(vq, "Guest moved used index from %u to %u", 22338c2ecf20Sopenharmony_ci last_avail_idx, vq->avail_idx); 22348c2ecf20Sopenharmony_ci return -EFAULT; 22358c2ecf20Sopenharmony_ci } 22368c2ecf20Sopenharmony_ci 22378c2ecf20Sopenharmony_ci /* If there's nothing new since last we looked, return 22388c2ecf20Sopenharmony_ci * invalid. 22398c2ecf20Sopenharmony_ci */ 22408c2ecf20Sopenharmony_ci if (vq->avail_idx == last_avail_idx) 22418c2ecf20Sopenharmony_ci return vq->num; 22428c2ecf20Sopenharmony_ci 22438c2ecf20Sopenharmony_ci /* Only get avail ring entries after they have been 22448c2ecf20Sopenharmony_ci * exposed by guest. 22458c2ecf20Sopenharmony_ci */ 22468c2ecf20Sopenharmony_ci smp_rmb(); 22478c2ecf20Sopenharmony_ci } 22488c2ecf20Sopenharmony_ci 22498c2ecf20Sopenharmony_ci /* Grab the next descriptor number they're advertising, and increment 22508c2ecf20Sopenharmony_ci * the index we've seen. */ 22518c2ecf20Sopenharmony_ci if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { 22528c2ecf20Sopenharmony_ci vq_err(vq, "Failed to read head: idx %d address %p\n", 22538c2ecf20Sopenharmony_ci last_avail_idx, 22548c2ecf20Sopenharmony_ci &vq->avail->ring[last_avail_idx % vq->num]); 22558c2ecf20Sopenharmony_ci return -EFAULT; 22568c2ecf20Sopenharmony_ci } 22578c2ecf20Sopenharmony_ci 22588c2ecf20Sopenharmony_ci head = vhost16_to_cpu(vq, ring_head); 22598c2ecf20Sopenharmony_ci 22608c2ecf20Sopenharmony_ci /* If their number is silly, that's an error. */ 22618c2ecf20Sopenharmony_ci if (unlikely(head >= vq->num)) { 22628c2ecf20Sopenharmony_ci vq_err(vq, "Guest says index %u > %u is available", 22638c2ecf20Sopenharmony_ci head, vq->num); 22648c2ecf20Sopenharmony_ci return -EINVAL; 22658c2ecf20Sopenharmony_ci } 22668c2ecf20Sopenharmony_ci 22678c2ecf20Sopenharmony_ci /* When we start there are none of either input nor output. */ 22688c2ecf20Sopenharmony_ci *out_num = *in_num = 0; 22698c2ecf20Sopenharmony_ci if (unlikely(log)) 22708c2ecf20Sopenharmony_ci *log_num = 0; 22718c2ecf20Sopenharmony_ci 22728c2ecf20Sopenharmony_ci i = head; 22738c2ecf20Sopenharmony_ci do { 22748c2ecf20Sopenharmony_ci unsigned iov_count = *in_num + *out_num; 22758c2ecf20Sopenharmony_ci if (unlikely(i >= vq->num)) { 22768c2ecf20Sopenharmony_ci vq_err(vq, "Desc index is %u > %u, head = %u", 22778c2ecf20Sopenharmony_ci i, vq->num, head); 22788c2ecf20Sopenharmony_ci return -EINVAL; 22798c2ecf20Sopenharmony_ci } 22808c2ecf20Sopenharmony_ci if (unlikely(++found > vq->num)) { 22818c2ecf20Sopenharmony_ci vq_err(vq, "Loop detected: last one at %u " 22828c2ecf20Sopenharmony_ci "vq size %u head %u\n", 22838c2ecf20Sopenharmony_ci i, vq->num, head); 22848c2ecf20Sopenharmony_ci return -EINVAL; 22858c2ecf20Sopenharmony_ci } 22868c2ecf20Sopenharmony_ci ret = vhost_get_desc(vq, &desc, i); 22878c2ecf20Sopenharmony_ci if (unlikely(ret)) { 22888c2ecf20Sopenharmony_ci vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", 22898c2ecf20Sopenharmony_ci i, vq->desc + i); 22908c2ecf20Sopenharmony_ci return -EFAULT; 22918c2ecf20Sopenharmony_ci } 22928c2ecf20Sopenharmony_ci if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) { 22938c2ecf20Sopenharmony_ci ret = get_indirect(vq, iov, iov_size, 22948c2ecf20Sopenharmony_ci out_num, in_num, 22958c2ecf20Sopenharmony_ci log, log_num, &desc); 22968c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) { 22978c2ecf20Sopenharmony_ci if (ret != -EAGAIN) 22988c2ecf20Sopenharmony_ci vq_err(vq, "Failure detected " 22998c2ecf20Sopenharmony_ci "in indirect descriptor at idx %d\n", i); 23008c2ecf20Sopenharmony_ci return ret; 23018c2ecf20Sopenharmony_ci } 23028c2ecf20Sopenharmony_ci continue; 23038c2ecf20Sopenharmony_ci } 23048c2ecf20Sopenharmony_ci 23058c2ecf20Sopenharmony_ci if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) 23068c2ecf20Sopenharmony_ci access = VHOST_ACCESS_WO; 23078c2ecf20Sopenharmony_ci else 23088c2ecf20Sopenharmony_ci access = VHOST_ACCESS_RO; 23098c2ecf20Sopenharmony_ci ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), 23108c2ecf20Sopenharmony_ci vhost32_to_cpu(vq, desc.len), iov + iov_count, 23118c2ecf20Sopenharmony_ci iov_size - iov_count, access); 23128c2ecf20Sopenharmony_ci if (unlikely(ret < 0)) { 23138c2ecf20Sopenharmony_ci if (ret != -EAGAIN) 23148c2ecf20Sopenharmony_ci vq_err(vq, "Translation failure %d descriptor idx %d\n", 23158c2ecf20Sopenharmony_ci ret, i); 23168c2ecf20Sopenharmony_ci return ret; 23178c2ecf20Sopenharmony_ci } 23188c2ecf20Sopenharmony_ci if (access == VHOST_ACCESS_WO) { 23198c2ecf20Sopenharmony_ci /* If this is an input descriptor, 23208c2ecf20Sopenharmony_ci * increment that count. */ 23218c2ecf20Sopenharmony_ci *in_num += ret; 23228c2ecf20Sopenharmony_ci if (unlikely(log && ret)) { 23238c2ecf20Sopenharmony_ci log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); 23248c2ecf20Sopenharmony_ci log[*log_num].len = vhost32_to_cpu(vq, desc.len); 23258c2ecf20Sopenharmony_ci ++*log_num; 23268c2ecf20Sopenharmony_ci } 23278c2ecf20Sopenharmony_ci } else { 23288c2ecf20Sopenharmony_ci /* If it's an output descriptor, they're all supposed 23298c2ecf20Sopenharmony_ci * to come before any input descriptors. */ 23308c2ecf20Sopenharmony_ci if (unlikely(*in_num)) { 23318c2ecf20Sopenharmony_ci vq_err(vq, "Descriptor has out after in: " 23328c2ecf20Sopenharmony_ci "idx %d\n", i); 23338c2ecf20Sopenharmony_ci return -EINVAL; 23348c2ecf20Sopenharmony_ci } 23358c2ecf20Sopenharmony_ci *out_num += ret; 23368c2ecf20Sopenharmony_ci } 23378c2ecf20Sopenharmony_ci } while ((i = next_desc(vq, &desc)) != -1); 23388c2ecf20Sopenharmony_ci 23398c2ecf20Sopenharmony_ci /* On success, increment avail index. */ 23408c2ecf20Sopenharmony_ci vq->last_avail_idx++; 23418c2ecf20Sopenharmony_ci 23428c2ecf20Sopenharmony_ci /* Assume notifications from guest are disabled at this point, 23438c2ecf20Sopenharmony_ci * if they aren't we would need to update avail_event index. */ 23448c2ecf20Sopenharmony_ci BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); 23458c2ecf20Sopenharmony_ci return head; 23468c2ecf20Sopenharmony_ci} 23478c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_get_vq_desc); 23488c2ecf20Sopenharmony_ci 23498c2ecf20Sopenharmony_ci/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ 23508c2ecf20Sopenharmony_civoid vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) 23518c2ecf20Sopenharmony_ci{ 23528c2ecf20Sopenharmony_ci vq->last_avail_idx -= n; 23538c2ecf20Sopenharmony_ci} 23548c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_discard_vq_desc); 23558c2ecf20Sopenharmony_ci 23568c2ecf20Sopenharmony_ci/* After we've used one of their buffers, we tell them about it. We'll then 23578c2ecf20Sopenharmony_ci * want to notify the guest, using eventfd. */ 23588c2ecf20Sopenharmony_ciint vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) 23598c2ecf20Sopenharmony_ci{ 23608c2ecf20Sopenharmony_ci struct vring_used_elem heads = { 23618c2ecf20Sopenharmony_ci cpu_to_vhost32(vq, head), 23628c2ecf20Sopenharmony_ci cpu_to_vhost32(vq, len) 23638c2ecf20Sopenharmony_ci }; 23648c2ecf20Sopenharmony_ci 23658c2ecf20Sopenharmony_ci return vhost_add_used_n(vq, &heads, 1); 23668c2ecf20Sopenharmony_ci} 23678c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_add_used); 23688c2ecf20Sopenharmony_ci 23698c2ecf20Sopenharmony_cistatic int __vhost_add_used_n(struct vhost_virtqueue *vq, 23708c2ecf20Sopenharmony_ci struct vring_used_elem *heads, 23718c2ecf20Sopenharmony_ci unsigned count) 23728c2ecf20Sopenharmony_ci{ 23738c2ecf20Sopenharmony_ci vring_used_elem_t __user *used; 23748c2ecf20Sopenharmony_ci u16 old, new; 23758c2ecf20Sopenharmony_ci int start; 23768c2ecf20Sopenharmony_ci 23778c2ecf20Sopenharmony_ci start = vq->last_used_idx & (vq->num - 1); 23788c2ecf20Sopenharmony_ci used = vq->used->ring + start; 23798c2ecf20Sopenharmony_ci if (vhost_put_used(vq, heads, start, count)) { 23808c2ecf20Sopenharmony_ci vq_err(vq, "Failed to write used"); 23818c2ecf20Sopenharmony_ci return -EFAULT; 23828c2ecf20Sopenharmony_ci } 23838c2ecf20Sopenharmony_ci if (unlikely(vq->log_used)) { 23848c2ecf20Sopenharmony_ci /* Make sure data is seen before log. */ 23858c2ecf20Sopenharmony_ci smp_wmb(); 23868c2ecf20Sopenharmony_ci /* Log used ring entry write. */ 23878c2ecf20Sopenharmony_ci log_used(vq, ((void __user *)used - (void __user *)vq->used), 23888c2ecf20Sopenharmony_ci count * sizeof *used); 23898c2ecf20Sopenharmony_ci } 23908c2ecf20Sopenharmony_ci old = vq->last_used_idx; 23918c2ecf20Sopenharmony_ci new = (vq->last_used_idx += count); 23928c2ecf20Sopenharmony_ci /* If the driver never bothers to signal in a very long while, 23938c2ecf20Sopenharmony_ci * used index might wrap around. If that happens, invalidate 23948c2ecf20Sopenharmony_ci * signalled_used index we stored. TODO: make sure driver 23958c2ecf20Sopenharmony_ci * signals at least once in 2^16 and remove this. */ 23968c2ecf20Sopenharmony_ci if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) 23978c2ecf20Sopenharmony_ci vq->signalled_used_valid = false; 23988c2ecf20Sopenharmony_ci return 0; 23998c2ecf20Sopenharmony_ci} 24008c2ecf20Sopenharmony_ci 24018c2ecf20Sopenharmony_ci/* After we've used one of their buffers, we tell them about it. We'll then 24028c2ecf20Sopenharmony_ci * want to notify the guest, using eventfd. */ 24038c2ecf20Sopenharmony_ciint vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, 24048c2ecf20Sopenharmony_ci unsigned count) 24058c2ecf20Sopenharmony_ci{ 24068c2ecf20Sopenharmony_ci int start, n, r; 24078c2ecf20Sopenharmony_ci 24088c2ecf20Sopenharmony_ci start = vq->last_used_idx & (vq->num - 1); 24098c2ecf20Sopenharmony_ci n = vq->num - start; 24108c2ecf20Sopenharmony_ci if (n < count) { 24118c2ecf20Sopenharmony_ci r = __vhost_add_used_n(vq, heads, n); 24128c2ecf20Sopenharmony_ci if (r < 0) 24138c2ecf20Sopenharmony_ci return r; 24148c2ecf20Sopenharmony_ci heads += n; 24158c2ecf20Sopenharmony_ci count -= n; 24168c2ecf20Sopenharmony_ci } 24178c2ecf20Sopenharmony_ci r = __vhost_add_used_n(vq, heads, count); 24188c2ecf20Sopenharmony_ci 24198c2ecf20Sopenharmony_ci /* Make sure buffer is written before we update index. */ 24208c2ecf20Sopenharmony_ci smp_wmb(); 24218c2ecf20Sopenharmony_ci if (vhost_put_used_idx(vq)) { 24228c2ecf20Sopenharmony_ci vq_err(vq, "Failed to increment used idx"); 24238c2ecf20Sopenharmony_ci return -EFAULT; 24248c2ecf20Sopenharmony_ci } 24258c2ecf20Sopenharmony_ci if (unlikely(vq->log_used)) { 24268c2ecf20Sopenharmony_ci /* Make sure used idx is seen before log. */ 24278c2ecf20Sopenharmony_ci smp_wmb(); 24288c2ecf20Sopenharmony_ci /* Log used index update. */ 24298c2ecf20Sopenharmony_ci log_used(vq, offsetof(struct vring_used, idx), 24308c2ecf20Sopenharmony_ci sizeof vq->used->idx); 24318c2ecf20Sopenharmony_ci if (vq->log_ctx) 24328c2ecf20Sopenharmony_ci eventfd_signal(vq->log_ctx, 1); 24338c2ecf20Sopenharmony_ci } 24348c2ecf20Sopenharmony_ci return r; 24358c2ecf20Sopenharmony_ci} 24368c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_add_used_n); 24378c2ecf20Sopenharmony_ci 24388c2ecf20Sopenharmony_cistatic bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) 24398c2ecf20Sopenharmony_ci{ 24408c2ecf20Sopenharmony_ci __u16 old, new; 24418c2ecf20Sopenharmony_ci __virtio16 event; 24428c2ecf20Sopenharmony_ci bool v; 24438c2ecf20Sopenharmony_ci /* Flush out used index updates. This is paired 24448c2ecf20Sopenharmony_ci * with the barrier that the Guest executes when enabling 24458c2ecf20Sopenharmony_ci * interrupts. */ 24468c2ecf20Sopenharmony_ci smp_mb(); 24478c2ecf20Sopenharmony_ci 24488c2ecf20Sopenharmony_ci if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) && 24498c2ecf20Sopenharmony_ci unlikely(vq->avail_idx == vq->last_avail_idx)) 24508c2ecf20Sopenharmony_ci return true; 24518c2ecf20Sopenharmony_ci 24528c2ecf20Sopenharmony_ci if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { 24538c2ecf20Sopenharmony_ci __virtio16 flags; 24548c2ecf20Sopenharmony_ci if (vhost_get_avail_flags(vq, &flags)) { 24558c2ecf20Sopenharmony_ci vq_err(vq, "Failed to get flags"); 24568c2ecf20Sopenharmony_ci return true; 24578c2ecf20Sopenharmony_ci } 24588c2ecf20Sopenharmony_ci return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT)); 24598c2ecf20Sopenharmony_ci } 24608c2ecf20Sopenharmony_ci old = vq->signalled_used; 24618c2ecf20Sopenharmony_ci v = vq->signalled_used_valid; 24628c2ecf20Sopenharmony_ci new = vq->signalled_used = vq->last_used_idx; 24638c2ecf20Sopenharmony_ci vq->signalled_used_valid = true; 24648c2ecf20Sopenharmony_ci 24658c2ecf20Sopenharmony_ci if (unlikely(!v)) 24668c2ecf20Sopenharmony_ci return true; 24678c2ecf20Sopenharmony_ci 24688c2ecf20Sopenharmony_ci if (vhost_get_used_event(vq, &event)) { 24698c2ecf20Sopenharmony_ci vq_err(vq, "Failed to get used event idx"); 24708c2ecf20Sopenharmony_ci return true; 24718c2ecf20Sopenharmony_ci } 24728c2ecf20Sopenharmony_ci return vring_need_event(vhost16_to_cpu(vq, event), new, old); 24738c2ecf20Sopenharmony_ci} 24748c2ecf20Sopenharmony_ci 24758c2ecf20Sopenharmony_ci/* This actually signals the guest, using eventfd. */ 24768c2ecf20Sopenharmony_civoid vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) 24778c2ecf20Sopenharmony_ci{ 24788c2ecf20Sopenharmony_ci /* Signal the Guest tell them we used something up. */ 24798c2ecf20Sopenharmony_ci if (vq->call_ctx.ctx && vhost_notify(dev, vq)) 24808c2ecf20Sopenharmony_ci eventfd_signal(vq->call_ctx.ctx, 1); 24818c2ecf20Sopenharmony_ci} 24828c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_signal); 24838c2ecf20Sopenharmony_ci 24848c2ecf20Sopenharmony_ci/* And here's the combo meal deal. Supersize me! */ 24858c2ecf20Sopenharmony_civoid vhost_add_used_and_signal(struct vhost_dev *dev, 24868c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq, 24878c2ecf20Sopenharmony_ci unsigned int head, int len) 24888c2ecf20Sopenharmony_ci{ 24898c2ecf20Sopenharmony_ci vhost_add_used(vq, head, len); 24908c2ecf20Sopenharmony_ci vhost_signal(dev, vq); 24918c2ecf20Sopenharmony_ci} 24928c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_add_used_and_signal); 24938c2ecf20Sopenharmony_ci 24948c2ecf20Sopenharmony_ci/* multi-buffer version of vhost_add_used_and_signal */ 24958c2ecf20Sopenharmony_civoid vhost_add_used_and_signal_n(struct vhost_dev *dev, 24968c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq, 24978c2ecf20Sopenharmony_ci struct vring_used_elem *heads, unsigned count) 24988c2ecf20Sopenharmony_ci{ 24998c2ecf20Sopenharmony_ci vhost_add_used_n(vq, heads, count); 25008c2ecf20Sopenharmony_ci vhost_signal(dev, vq); 25018c2ecf20Sopenharmony_ci} 25028c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); 25038c2ecf20Sopenharmony_ci 25048c2ecf20Sopenharmony_ci/* return true if we're sure that avaiable ring is empty */ 25058c2ecf20Sopenharmony_cibool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) 25068c2ecf20Sopenharmony_ci{ 25078c2ecf20Sopenharmony_ci __virtio16 avail_idx; 25088c2ecf20Sopenharmony_ci int r; 25098c2ecf20Sopenharmony_ci 25108c2ecf20Sopenharmony_ci if (vq->avail_idx != vq->last_avail_idx) 25118c2ecf20Sopenharmony_ci return false; 25128c2ecf20Sopenharmony_ci 25138c2ecf20Sopenharmony_ci r = vhost_get_avail_idx(vq, &avail_idx); 25148c2ecf20Sopenharmony_ci if (unlikely(r)) 25158c2ecf20Sopenharmony_ci return false; 25168c2ecf20Sopenharmony_ci vq->avail_idx = vhost16_to_cpu(vq, avail_idx); 25178c2ecf20Sopenharmony_ci 25188c2ecf20Sopenharmony_ci return vq->avail_idx == vq->last_avail_idx; 25198c2ecf20Sopenharmony_ci} 25208c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_avail_empty); 25218c2ecf20Sopenharmony_ci 25228c2ecf20Sopenharmony_ci/* OK, now we need to know about added descriptors. */ 25238c2ecf20Sopenharmony_cibool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) 25248c2ecf20Sopenharmony_ci{ 25258c2ecf20Sopenharmony_ci __virtio16 avail_idx; 25268c2ecf20Sopenharmony_ci int r; 25278c2ecf20Sopenharmony_ci 25288c2ecf20Sopenharmony_ci if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) 25298c2ecf20Sopenharmony_ci return false; 25308c2ecf20Sopenharmony_ci vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; 25318c2ecf20Sopenharmony_ci if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { 25328c2ecf20Sopenharmony_ci r = vhost_update_used_flags(vq); 25338c2ecf20Sopenharmony_ci if (r) { 25348c2ecf20Sopenharmony_ci vq_err(vq, "Failed to enable notification at %p: %d\n", 25358c2ecf20Sopenharmony_ci &vq->used->flags, r); 25368c2ecf20Sopenharmony_ci return false; 25378c2ecf20Sopenharmony_ci } 25388c2ecf20Sopenharmony_ci } else { 25398c2ecf20Sopenharmony_ci r = vhost_update_avail_event(vq, vq->avail_idx); 25408c2ecf20Sopenharmony_ci if (r) { 25418c2ecf20Sopenharmony_ci vq_err(vq, "Failed to update avail event index at %p: %d\n", 25428c2ecf20Sopenharmony_ci vhost_avail_event(vq), r); 25438c2ecf20Sopenharmony_ci return false; 25448c2ecf20Sopenharmony_ci } 25458c2ecf20Sopenharmony_ci } 25468c2ecf20Sopenharmony_ci /* They could have slipped one in as we were doing that: make 25478c2ecf20Sopenharmony_ci * sure it's written, then check again. */ 25488c2ecf20Sopenharmony_ci smp_mb(); 25498c2ecf20Sopenharmony_ci r = vhost_get_avail_idx(vq, &avail_idx); 25508c2ecf20Sopenharmony_ci if (r) { 25518c2ecf20Sopenharmony_ci vq_err(vq, "Failed to check avail idx at %p: %d\n", 25528c2ecf20Sopenharmony_ci &vq->avail->idx, r); 25538c2ecf20Sopenharmony_ci return false; 25548c2ecf20Sopenharmony_ci } 25558c2ecf20Sopenharmony_ci 25568c2ecf20Sopenharmony_ci return vhost16_to_cpu(vq, avail_idx) != vq->avail_idx; 25578c2ecf20Sopenharmony_ci} 25588c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_enable_notify); 25598c2ecf20Sopenharmony_ci 25608c2ecf20Sopenharmony_ci/* We don't need to be notified again. */ 25618c2ecf20Sopenharmony_civoid vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) 25628c2ecf20Sopenharmony_ci{ 25638c2ecf20Sopenharmony_ci int r; 25648c2ecf20Sopenharmony_ci 25658c2ecf20Sopenharmony_ci if (vq->used_flags & VRING_USED_F_NO_NOTIFY) 25668c2ecf20Sopenharmony_ci return; 25678c2ecf20Sopenharmony_ci vq->used_flags |= VRING_USED_F_NO_NOTIFY; 25688c2ecf20Sopenharmony_ci if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { 25698c2ecf20Sopenharmony_ci r = vhost_update_used_flags(vq); 25708c2ecf20Sopenharmony_ci if (r) 25718c2ecf20Sopenharmony_ci vq_err(vq, "Failed to disable notification at %p: %d\n", 25728c2ecf20Sopenharmony_ci &vq->used->flags, r); 25738c2ecf20Sopenharmony_ci } 25748c2ecf20Sopenharmony_ci} 25758c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_disable_notify); 25768c2ecf20Sopenharmony_ci 25778c2ecf20Sopenharmony_ci/* Create a new message. */ 25788c2ecf20Sopenharmony_cistruct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) 25798c2ecf20Sopenharmony_ci{ 25808c2ecf20Sopenharmony_ci /* Make sure all padding within the structure is initialized. */ 25818c2ecf20Sopenharmony_ci struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); 25828c2ecf20Sopenharmony_ci if (!node) 25838c2ecf20Sopenharmony_ci return NULL; 25848c2ecf20Sopenharmony_ci 25858c2ecf20Sopenharmony_ci node->vq = vq; 25868c2ecf20Sopenharmony_ci node->msg.type = type; 25878c2ecf20Sopenharmony_ci return node; 25888c2ecf20Sopenharmony_ci} 25898c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_new_msg); 25908c2ecf20Sopenharmony_ci 25918c2ecf20Sopenharmony_civoid vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, 25928c2ecf20Sopenharmony_ci struct vhost_msg_node *node) 25938c2ecf20Sopenharmony_ci{ 25948c2ecf20Sopenharmony_ci spin_lock(&dev->iotlb_lock); 25958c2ecf20Sopenharmony_ci list_add_tail(&node->node, head); 25968c2ecf20Sopenharmony_ci spin_unlock(&dev->iotlb_lock); 25978c2ecf20Sopenharmony_ci 25988c2ecf20Sopenharmony_ci wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); 25998c2ecf20Sopenharmony_ci} 26008c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_enqueue_msg); 26018c2ecf20Sopenharmony_ci 26028c2ecf20Sopenharmony_cistruct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, 26038c2ecf20Sopenharmony_ci struct list_head *head) 26048c2ecf20Sopenharmony_ci{ 26058c2ecf20Sopenharmony_ci struct vhost_msg_node *node = NULL; 26068c2ecf20Sopenharmony_ci 26078c2ecf20Sopenharmony_ci spin_lock(&dev->iotlb_lock); 26088c2ecf20Sopenharmony_ci if (!list_empty(head)) { 26098c2ecf20Sopenharmony_ci node = list_first_entry(head, struct vhost_msg_node, 26108c2ecf20Sopenharmony_ci node); 26118c2ecf20Sopenharmony_ci list_del(&node->node); 26128c2ecf20Sopenharmony_ci } 26138c2ecf20Sopenharmony_ci spin_unlock(&dev->iotlb_lock); 26148c2ecf20Sopenharmony_ci 26158c2ecf20Sopenharmony_ci return node; 26168c2ecf20Sopenharmony_ci} 26178c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dequeue_msg); 26188c2ecf20Sopenharmony_ci 26198c2ecf20Sopenharmony_civoid vhost_set_backend_features(struct vhost_dev *dev, u64 features) 26208c2ecf20Sopenharmony_ci{ 26218c2ecf20Sopenharmony_ci struct vhost_virtqueue *vq; 26228c2ecf20Sopenharmony_ci int i; 26238c2ecf20Sopenharmony_ci 26248c2ecf20Sopenharmony_ci mutex_lock(&dev->mutex); 26258c2ecf20Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 26268c2ecf20Sopenharmony_ci vq = dev->vqs[i]; 26278c2ecf20Sopenharmony_ci mutex_lock(&vq->mutex); 26288c2ecf20Sopenharmony_ci vq->acked_backend_features = features; 26298c2ecf20Sopenharmony_ci mutex_unlock(&vq->mutex); 26308c2ecf20Sopenharmony_ci } 26318c2ecf20Sopenharmony_ci mutex_unlock(&dev->mutex); 26328c2ecf20Sopenharmony_ci} 26338c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_set_backend_features); 26348c2ecf20Sopenharmony_ci 26358c2ecf20Sopenharmony_cistatic int __init vhost_init(void) 26368c2ecf20Sopenharmony_ci{ 26378c2ecf20Sopenharmony_ci return 0; 26388c2ecf20Sopenharmony_ci} 26398c2ecf20Sopenharmony_ci 26408c2ecf20Sopenharmony_cistatic void __exit vhost_exit(void) 26418c2ecf20Sopenharmony_ci{ 26428c2ecf20Sopenharmony_ci} 26438c2ecf20Sopenharmony_ci 26448c2ecf20Sopenharmony_cimodule_init(vhost_init); 26458c2ecf20Sopenharmony_cimodule_exit(vhost_exit); 26468c2ecf20Sopenharmony_ci 26478c2ecf20Sopenharmony_ciMODULE_VERSION("0.0.1"); 26488c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2"); 26498c2ecf20Sopenharmony_ciMODULE_AUTHOR("Michael S. Tsirkin"); 26508c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Host kernel accelerator for virtio"); 2651