162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* Copyright (C) 2009 Red Hat, Inc. 362306a36Sopenharmony_ci * Copyright (C) 2006 Rusty Russell IBM Corporation 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Author: Michael S. Tsirkin <mst@redhat.com> 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Inspiration, some code, and most witty comments come from 862306a36Sopenharmony_ci * Documentation/virtual/lguest/lguest.c, by Rusty Russell 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Generic code for virtio server in host kernel. 1162306a36Sopenharmony_ci */ 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <linux/eventfd.h> 1462306a36Sopenharmony_ci#include <linux/vhost.h> 1562306a36Sopenharmony_ci#include <linux/uio.h> 1662306a36Sopenharmony_ci#include <linux/mm.h> 1762306a36Sopenharmony_ci#include <linux/miscdevice.h> 1862306a36Sopenharmony_ci#include <linux/mutex.h> 1962306a36Sopenharmony_ci#include <linux/poll.h> 2062306a36Sopenharmony_ci#include <linux/file.h> 2162306a36Sopenharmony_ci#include <linux/highmem.h> 2262306a36Sopenharmony_ci#include <linux/slab.h> 2362306a36Sopenharmony_ci#include <linux/vmalloc.h> 2462306a36Sopenharmony_ci#include <linux/kthread.h> 2562306a36Sopenharmony_ci#include <linux/module.h> 2662306a36Sopenharmony_ci#include <linux/sort.h> 2762306a36Sopenharmony_ci#include <linux/sched/mm.h> 2862306a36Sopenharmony_ci#include <linux/sched/signal.h> 2962306a36Sopenharmony_ci#include <linux/sched/vhost_task.h> 3062306a36Sopenharmony_ci#include <linux/interval_tree_generic.h> 3162306a36Sopenharmony_ci#include <linux/nospec.h> 3262306a36Sopenharmony_ci#include <linux/kcov.h> 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#include "vhost.h" 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_cistatic ushort max_mem_regions = 64; 3762306a36Sopenharmony_cimodule_param(max_mem_regions, ushort, 0444); 3862306a36Sopenharmony_ciMODULE_PARM_DESC(max_mem_regions, 3962306a36Sopenharmony_ci "Maximum number of memory regions in memory map. (default: 64)"); 4062306a36Sopenharmony_cistatic int max_iotlb_entries = 2048; 4162306a36Sopenharmony_cimodule_param(max_iotlb_entries, int, 0444); 4262306a36Sopenharmony_ciMODULE_PARM_DESC(max_iotlb_entries, 4362306a36Sopenharmony_ci "Maximum number of iotlb entries. (default: 2048)"); 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_cienum { 4662306a36Sopenharmony_ci VHOST_MEMORY_F_LOG = 0x1, 4762306a36Sopenharmony_ci}; 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci#define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num]) 5062306a36Sopenharmony_ci#define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num]) 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY 5362306a36Sopenharmony_cistatic void vhost_disable_cross_endian(struct vhost_virtqueue *vq) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci vq->user_be = !virtio_legacy_is_little_endian(); 5662306a36Sopenharmony_ci} 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistatic void vhost_enable_cross_endian_big(struct vhost_virtqueue *vq) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci vq->user_be = true; 6162306a36Sopenharmony_ci} 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_cistatic void vhost_enable_cross_endian_little(struct vhost_virtqueue *vq) 6462306a36Sopenharmony_ci{ 6562306a36Sopenharmony_ci vq->user_be = false; 6662306a36Sopenharmony_ci} 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_cistatic long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) 6962306a36Sopenharmony_ci{ 7062306a36Sopenharmony_ci struct vhost_vring_state s; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci if (vq->private_data) 7362306a36Sopenharmony_ci return -EBUSY; 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci if (copy_from_user(&s, argp, sizeof(s))) 7662306a36Sopenharmony_ci return -EFAULT; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci if (s.num != VHOST_VRING_LITTLE_ENDIAN && 7962306a36Sopenharmony_ci s.num != VHOST_VRING_BIG_ENDIAN) 8062306a36Sopenharmony_ci return -EINVAL; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci if (s.num == VHOST_VRING_BIG_ENDIAN) 8362306a36Sopenharmony_ci vhost_enable_cross_endian_big(vq); 8462306a36Sopenharmony_ci else 8562306a36Sopenharmony_ci vhost_enable_cross_endian_little(vq); 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci return 0; 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cistatic long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, 9162306a36Sopenharmony_ci int __user *argp) 9262306a36Sopenharmony_ci{ 9362306a36Sopenharmony_ci struct vhost_vring_state s = { 9462306a36Sopenharmony_ci .index = idx, 9562306a36Sopenharmony_ci .num = vq->user_be 9662306a36Sopenharmony_ci }; 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci if (copy_to_user(argp, &s, sizeof(s))) 9962306a36Sopenharmony_ci return -EFAULT; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci return 0; 10262306a36Sopenharmony_ci} 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_cistatic void vhost_init_is_le(struct vhost_virtqueue *vq) 10562306a36Sopenharmony_ci{ 10662306a36Sopenharmony_ci /* Note for legacy virtio: user_be is initialized at reset time 10762306a36Sopenharmony_ci * according to the host endianness. If userspace does not set an 10862306a36Sopenharmony_ci * explicit endianness, the default behavior is native endian, as 10962306a36Sopenharmony_ci * expected by legacy virtio. 11062306a36Sopenharmony_ci */ 11162306a36Sopenharmony_ci vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be; 11262306a36Sopenharmony_ci} 11362306a36Sopenharmony_ci#else 11462306a36Sopenharmony_cistatic void vhost_disable_cross_endian(struct vhost_virtqueue *vq) 11562306a36Sopenharmony_ci{ 11662306a36Sopenharmony_ci} 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_cistatic long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci return -ENOIOCTLCMD; 12162306a36Sopenharmony_ci} 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_cistatic long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx, 12462306a36Sopenharmony_ci int __user *argp) 12562306a36Sopenharmony_ci{ 12662306a36Sopenharmony_ci return -ENOIOCTLCMD; 12762306a36Sopenharmony_ci} 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_cistatic void vhost_init_is_le(struct vhost_virtqueue *vq) 13062306a36Sopenharmony_ci{ 13162306a36Sopenharmony_ci vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) 13262306a36Sopenharmony_ci || virtio_legacy_is_little_endian(); 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */ 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_cistatic void vhost_reset_is_le(struct vhost_virtqueue *vq) 13762306a36Sopenharmony_ci{ 13862306a36Sopenharmony_ci vhost_init_is_le(vq); 13962306a36Sopenharmony_ci} 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_cistruct vhost_flush_struct { 14262306a36Sopenharmony_ci struct vhost_work work; 14362306a36Sopenharmony_ci struct completion wait_event; 14462306a36Sopenharmony_ci}; 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_cistatic void vhost_flush_work(struct vhost_work *work) 14762306a36Sopenharmony_ci{ 14862306a36Sopenharmony_ci struct vhost_flush_struct *s; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci s = container_of(work, struct vhost_flush_struct, work); 15162306a36Sopenharmony_ci complete(&s->wait_event); 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_cistatic void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, 15562306a36Sopenharmony_ci poll_table *pt) 15662306a36Sopenharmony_ci{ 15762306a36Sopenharmony_ci struct vhost_poll *poll; 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci poll = container_of(pt, struct vhost_poll, table); 16062306a36Sopenharmony_ci poll->wqh = wqh; 16162306a36Sopenharmony_ci add_wait_queue(wqh, &poll->wait); 16262306a36Sopenharmony_ci} 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_cistatic int vhost_poll_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, 16562306a36Sopenharmony_ci void *key) 16662306a36Sopenharmony_ci{ 16762306a36Sopenharmony_ci struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait); 16862306a36Sopenharmony_ci struct vhost_work *work = &poll->work; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci if (!(key_to_poll(key) & poll->mask)) 17162306a36Sopenharmony_ci return 0; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci if (!poll->dev->use_worker) 17462306a36Sopenharmony_ci work->fn(work); 17562306a36Sopenharmony_ci else 17662306a36Sopenharmony_ci vhost_poll_queue(poll); 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci return 0; 17962306a36Sopenharmony_ci} 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_civoid vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) 18262306a36Sopenharmony_ci{ 18362306a36Sopenharmony_ci clear_bit(VHOST_WORK_QUEUED, &work->flags); 18462306a36Sopenharmony_ci work->fn = fn; 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_work_init); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci/* Init poll structure */ 18962306a36Sopenharmony_civoid vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, 19062306a36Sopenharmony_ci __poll_t mask, struct vhost_dev *dev, 19162306a36Sopenharmony_ci struct vhost_virtqueue *vq) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); 19462306a36Sopenharmony_ci init_poll_funcptr(&poll->table, vhost_poll_func); 19562306a36Sopenharmony_ci poll->mask = mask; 19662306a36Sopenharmony_ci poll->dev = dev; 19762306a36Sopenharmony_ci poll->wqh = NULL; 19862306a36Sopenharmony_ci poll->vq = vq; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci vhost_work_init(&poll->work, fn); 20162306a36Sopenharmony_ci} 20262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_init); 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci/* Start polling a file. We add ourselves to file's wait queue. The caller must 20562306a36Sopenharmony_ci * keep a reference to a file until after vhost_poll_stop is called. */ 20662306a36Sopenharmony_ciint vhost_poll_start(struct vhost_poll *poll, struct file *file) 20762306a36Sopenharmony_ci{ 20862306a36Sopenharmony_ci __poll_t mask; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci if (poll->wqh) 21162306a36Sopenharmony_ci return 0; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci mask = vfs_poll(file, &poll->table); 21462306a36Sopenharmony_ci if (mask) 21562306a36Sopenharmony_ci vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); 21662306a36Sopenharmony_ci if (mask & EPOLLERR) { 21762306a36Sopenharmony_ci vhost_poll_stop(poll); 21862306a36Sopenharmony_ci return -EINVAL; 21962306a36Sopenharmony_ci } 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci return 0; 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_start); 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci/* Stop polling a file. After this function returns, it becomes safe to drop the 22662306a36Sopenharmony_ci * file reference. You must also flush afterwards. */ 22762306a36Sopenharmony_civoid vhost_poll_stop(struct vhost_poll *poll) 22862306a36Sopenharmony_ci{ 22962306a36Sopenharmony_ci if (poll->wqh) { 23062306a36Sopenharmony_ci remove_wait_queue(poll->wqh, &poll->wait); 23162306a36Sopenharmony_ci poll->wqh = NULL; 23262306a36Sopenharmony_ci } 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_stop); 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_cistatic void vhost_worker_queue(struct vhost_worker *worker, 23762306a36Sopenharmony_ci struct vhost_work *work) 23862306a36Sopenharmony_ci{ 23962306a36Sopenharmony_ci if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { 24062306a36Sopenharmony_ci /* We can only add the work to the list after we're 24162306a36Sopenharmony_ci * sure it was not in the list. 24262306a36Sopenharmony_ci * test_and_set_bit() implies a memory barrier. 24362306a36Sopenharmony_ci */ 24462306a36Sopenharmony_ci llist_add(&work->node, &worker->work_list); 24562306a36Sopenharmony_ci vhost_task_wake(worker->vtsk); 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_cibool vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci struct vhost_worker *worker; 25262306a36Sopenharmony_ci bool queued = false; 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_ci rcu_read_lock(); 25562306a36Sopenharmony_ci worker = rcu_dereference(vq->worker); 25662306a36Sopenharmony_ci if (worker) { 25762306a36Sopenharmony_ci queued = true; 25862306a36Sopenharmony_ci vhost_worker_queue(worker, work); 25962306a36Sopenharmony_ci } 26062306a36Sopenharmony_ci rcu_read_unlock(); 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci return queued; 26362306a36Sopenharmony_ci} 26462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_work_queue); 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_civoid vhost_vq_flush(struct vhost_virtqueue *vq) 26762306a36Sopenharmony_ci{ 26862306a36Sopenharmony_ci struct vhost_flush_struct flush; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci init_completion(&flush.wait_event); 27162306a36Sopenharmony_ci vhost_work_init(&flush.work, vhost_flush_work); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci if (vhost_vq_work_queue(vq, &flush.work)) 27462306a36Sopenharmony_ci wait_for_completion(&flush.wait_event); 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_flush); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci/** 27962306a36Sopenharmony_ci * vhost_worker_flush - flush a worker 28062306a36Sopenharmony_ci * @worker: worker to flush 28162306a36Sopenharmony_ci * 28262306a36Sopenharmony_ci * This does not use RCU to protect the worker, so the device or worker 28362306a36Sopenharmony_ci * mutex must be held. 28462306a36Sopenharmony_ci */ 28562306a36Sopenharmony_cistatic void vhost_worker_flush(struct vhost_worker *worker) 28662306a36Sopenharmony_ci{ 28762306a36Sopenharmony_ci struct vhost_flush_struct flush; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci init_completion(&flush.wait_event); 29062306a36Sopenharmony_ci vhost_work_init(&flush.work, vhost_flush_work); 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci vhost_worker_queue(worker, &flush.work); 29362306a36Sopenharmony_ci wait_for_completion(&flush.wait_event); 29462306a36Sopenharmony_ci} 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_civoid vhost_dev_flush(struct vhost_dev *dev) 29762306a36Sopenharmony_ci{ 29862306a36Sopenharmony_ci struct vhost_worker *worker; 29962306a36Sopenharmony_ci unsigned long i; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci xa_for_each(&dev->worker_xa, i, worker) { 30262306a36Sopenharmony_ci mutex_lock(&worker->mutex); 30362306a36Sopenharmony_ci if (!worker->attachment_cnt) { 30462306a36Sopenharmony_ci mutex_unlock(&worker->mutex); 30562306a36Sopenharmony_ci continue; 30662306a36Sopenharmony_ci } 30762306a36Sopenharmony_ci vhost_worker_flush(worker); 30862306a36Sopenharmony_ci mutex_unlock(&worker->mutex); 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci} 31162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_flush); 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci/* A lockless hint for busy polling code to exit the loop */ 31462306a36Sopenharmony_cibool vhost_vq_has_work(struct vhost_virtqueue *vq) 31562306a36Sopenharmony_ci{ 31662306a36Sopenharmony_ci struct vhost_worker *worker; 31762306a36Sopenharmony_ci bool has_work = false; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci rcu_read_lock(); 32062306a36Sopenharmony_ci worker = rcu_dereference(vq->worker); 32162306a36Sopenharmony_ci if (worker && !llist_empty(&worker->work_list)) 32262306a36Sopenharmony_ci has_work = true; 32362306a36Sopenharmony_ci rcu_read_unlock(); 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci return has_work; 32662306a36Sopenharmony_ci} 32762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_has_work); 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_civoid vhost_poll_queue(struct vhost_poll *poll) 33062306a36Sopenharmony_ci{ 33162306a36Sopenharmony_ci vhost_vq_work_queue(poll->vq, &poll->work); 33262306a36Sopenharmony_ci} 33362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_poll_queue); 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_cistatic void __vhost_vq_meta_reset(struct vhost_virtqueue *vq) 33662306a36Sopenharmony_ci{ 33762306a36Sopenharmony_ci int j; 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci for (j = 0; j < VHOST_NUM_ADDRS; j++) 34062306a36Sopenharmony_ci vq->meta_iotlb[j] = NULL; 34162306a36Sopenharmony_ci} 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_cistatic void vhost_vq_meta_reset(struct vhost_dev *d) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci int i; 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) 34862306a36Sopenharmony_ci __vhost_vq_meta_reset(d->vqs[i]); 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_cistatic void vhost_vring_call_reset(struct vhost_vring_call *call_ctx) 35262306a36Sopenharmony_ci{ 35362306a36Sopenharmony_ci call_ctx->ctx = NULL; 35462306a36Sopenharmony_ci memset(&call_ctx->producer, 0x0, sizeof(struct irq_bypass_producer)); 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_cibool vhost_vq_is_setup(struct vhost_virtqueue *vq) 35862306a36Sopenharmony_ci{ 35962306a36Sopenharmony_ci return vq->avail && vq->desc && vq->used && vhost_vq_access_ok(vq); 36062306a36Sopenharmony_ci} 36162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_is_setup); 36262306a36Sopenharmony_ci 36362306a36Sopenharmony_cistatic void vhost_vq_reset(struct vhost_dev *dev, 36462306a36Sopenharmony_ci struct vhost_virtqueue *vq) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci vq->num = 1; 36762306a36Sopenharmony_ci vq->desc = NULL; 36862306a36Sopenharmony_ci vq->avail = NULL; 36962306a36Sopenharmony_ci vq->used = NULL; 37062306a36Sopenharmony_ci vq->last_avail_idx = 0; 37162306a36Sopenharmony_ci vq->avail_idx = 0; 37262306a36Sopenharmony_ci vq->last_used_idx = 0; 37362306a36Sopenharmony_ci vq->signalled_used = 0; 37462306a36Sopenharmony_ci vq->signalled_used_valid = false; 37562306a36Sopenharmony_ci vq->used_flags = 0; 37662306a36Sopenharmony_ci vq->log_used = false; 37762306a36Sopenharmony_ci vq->log_addr = -1ull; 37862306a36Sopenharmony_ci vq->private_data = NULL; 37962306a36Sopenharmony_ci vq->acked_features = 0; 38062306a36Sopenharmony_ci vq->acked_backend_features = 0; 38162306a36Sopenharmony_ci vq->log_base = NULL; 38262306a36Sopenharmony_ci vq->error_ctx = NULL; 38362306a36Sopenharmony_ci vq->kick = NULL; 38462306a36Sopenharmony_ci vq->log_ctx = NULL; 38562306a36Sopenharmony_ci vhost_disable_cross_endian(vq); 38662306a36Sopenharmony_ci vhost_reset_is_le(vq); 38762306a36Sopenharmony_ci vq->busyloop_timeout = 0; 38862306a36Sopenharmony_ci vq->umem = NULL; 38962306a36Sopenharmony_ci vq->iotlb = NULL; 39062306a36Sopenharmony_ci rcu_assign_pointer(vq->worker, NULL); 39162306a36Sopenharmony_ci vhost_vring_call_reset(&vq->call_ctx); 39262306a36Sopenharmony_ci __vhost_vq_meta_reset(vq); 39362306a36Sopenharmony_ci} 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_cistatic bool vhost_worker(void *data) 39662306a36Sopenharmony_ci{ 39762306a36Sopenharmony_ci struct vhost_worker *worker = data; 39862306a36Sopenharmony_ci struct vhost_work *work, *work_next; 39962306a36Sopenharmony_ci struct llist_node *node; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci node = llist_del_all(&worker->work_list); 40262306a36Sopenharmony_ci if (node) { 40362306a36Sopenharmony_ci __set_current_state(TASK_RUNNING); 40462306a36Sopenharmony_ci 40562306a36Sopenharmony_ci node = llist_reverse_order(node); 40662306a36Sopenharmony_ci /* make sure flag is seen after deletion */ 40762306a36Sopenharmony_ci smp_wmb(); 40862306a36Sopenharmony_ci llist_for_each_entry_safe(work, work_next, node, node) { 40962306a36Sopenharmony_ci clear_bit(VHOST_WORK_QUEUED, &work->flags); 41062306a36Sopenharmony_ci kcov_remote_start_common(worker->kcov_handle); 41162306a36Sopenharmony_ci work->fn(work); 41262306a36Sopenharmony_ci kcov_remote_stop(); 41362306a36Sopenharmony_ci cond_resched(); 41462306a36Sopenharmony_ci } 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci return !!node; 41862306a36Sopenharmony_ci} 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_cistatic void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) 42162306a36Sopenharmony_ci{ 42262306a36Sopenharmony_ci kfree(vq->indirect); 42362306a36Sopenharmony_ci vq->indirect = NULL; 42462306a36Sopenharmony_ci kfree(vq->log); 42562306a36Sopenharmony_ci vq->log = NULL; 42662306a36Sopenharmony_ci kfree(vq->heads); 42762306a36Sopenharmony_ci vq->heads = NULL; 42862306a36Sopenharmony_ci} 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci/* Helper to allocate iovec buffers for all vqs. */ 43162306a36Sopenharmony_cistatic long vhost_dev_alloc_iovecs(struct vhost_dev *dev) 43262306a36Sopenharmony_ci{ 43362306a36Sopenharmony_ci struct vhost_virtqueue *vq; 43462306a36Sopenharmony_ci int i; 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 43762306a36Sopenharmony_ci vq = dev->vqs[i]; 43862306a36Sopenharmony_ci vq->indirect = kmalloc_array(UIO_MAXIOV, 43962306a36Sopenharmony_ci sizeof(*vq->indirect), 44062306a36Sopenharmony_ci GFP_KERNEL); 44162306a36Sopenharmony_ci vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log), 44262306a36Sopenharmony_ci GFP_KERNEL); 44362306a36Sopenharmony_ci vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), 44462306a36Sopenharmony_ci GFP_KERNEL); 44562306a36Sopenharmony_ci if (!vq->indirect || !vq->log || !vq->heads) 44662306a36Sopenharmony_ci goto err_nomem; 44762306a36Sopenharmony_ci } 44862306a36Sopenharmony_ci return 0; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_cierr_nomem: 45162306a36Sopenharmony_ci for (; i >= 0; --i) 45262306a36Sopenharmony_ci vhost_vq_free_iovecs(dev->vqs[i]); 45362306a36Sopenharmony_ci return -ENOMEM; 45462306a36Sopenharmony_ci} 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_cistatic void vhost_dev_free_iovecs(struct vhost_dev *dev) 45762306a36Sopenharmony_ci{ 45862306a36Sopenharmony_ci int i; 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) 46162306a36Sopenharmony_ci vhost_vq_free_iovecs(dev->vqs[i]); 46262306a36Sopenharmony_ci} 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_cibool vhost_exceeds_weight(struct vhost_virtqueue *vq, 46562306a36Sopenharmony_ci int pkts, int total_len) 46662306a36Sopenharmony_ci{ 46762306a36Sopenharmony_ci struct vhost_dev *dev = vq->dev; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci if ((dev->byte_weight && total_len >= dev->byte_weight) || 47062306a36Sopenharmony_ci pkts >= dev->weight) { 47162306a36Sopenharmony_ci vhost_poll_queue(&vq->poll); 47262306a36Sopenharmony_ci return true; 47362306a36Sopenharmony_ci } 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci return false; 47662306a36Sopenharmony_ci} 47762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_exceeds_weight); 47862306a36Sopenharmony_ci 47962306a36Sopenharmony_cistatic size_t vhost_get_avail_size(struct vhost_virtqueue *vq, 48062306a36Sopenharmony_ci unsigned int num) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci size_t event __maybe_unused = 48362306a36Sopenharmony_ci vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci return size_add(struct_size(vq->avail, ring, num), event); 48662306a36Sopenharmony_ci} 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_cistatic size_t vhost_get_used_size(struct vhost_virtqueue *vq, 48962306a36Sopenharmony_ci unsigned int num) 49062306a36Sopenharmony_ci{ 49162306a36Sopenharmony_ci size_t event __maybe_unused = 49262306a36Sopenharmony_ci vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; 49362306a36Sopenharmony_ci 49462306a36Sopenharmony_ci return size_add(struct_size(vq->used, ring, num), event); 49562306a36Sopenharmony_ci} 49662306a36Sopenharmony_ci 49762306a36Sopenharmony_cistatic size_t vhost_get_desc_size(struct vhost_virtqueue *vq, 49862306a36Sopenharmony_ci unsigned int num) 49962306a36Sopenharmony_ci{ 50062306a36Sopenharmony_ci return sizeof(*vq->desc) * num; 50162306a36Sopenharmony_ci} 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_civoid vhost_dev_init(struct vhost_dev *dev, 50462306a36Sopenharmony_ci struct vhost_virtqueue **vqs, int nvqs, 50562306a36Sopenharmony_ci int iov_limit, int weight, int byte_weight, 50662306a36Sopenharmony_ci bool use_worker, 50762306a36Sopenharmony_ci int (*msg_handler)(struct vhost_dev *dev, u32 asid, 50862306a36Sopenharmony_ci struct vhost_iotlb_msg *msg)) 50962306a36Sopenharmony_ci{ 51062306a36Sopenharmony_ci struct vhost_virtqueue *vq; 51162306a36Sopenharmony_ci int i; 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci dev->vqs = vqs; 51462306a36Sopenharmony_ci dev->nvqs = nvqs; 51562306a36Sopenharmony_ci mutex_init(&dev->mutex); 51662306a36Sopenharmony_ci dev->log_ctx = NULL; 51762306a36Sopenharmony_ci dev->umem = NULL; 51862306a36Sopenharmony_ci dev->iotlb = NULL; 51962306a36Sopenharmony_ci dev->mm = NULL; 52062306a36Sopenharmony_ci dev->iov_limit = iov_limit; 52162306a36Sopenharmony_ci dev->weight = weight; 52262306a36Sopenharmony_ci dev->byte_weight = byte_weight; 52362306a36Sopenharmony_ci dev->use_worker = use_worker; 52462306a36Sopenharmony_ci dev->msg_handler = msg_handler; 52562306a36Sopenharmony_ci init_waitqueue_head(&dev->wait); 52662306a36Sopenharmony_ci INIT_LIST_HEAD(&dev->read_list); 52762306a36Sopenharmony_ci INIT_LIST_HEAD(&dev->pending_list); 52862306a36Sopenharmony_ci spin_lock_init(&dev->iotlb_lock); 52962306a36Sopenharmony_ci xa_init_flags(&dev->worker_xa, XA_FLAGS_ALLOC); 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 53262306a36Sopenharmony_ci vq = dev->vqs[i]; 53362306a36Sopenharmony_ci vq->log = NULL; 53462306a36Sopenharmony_ci vq->indirect = NULL; 53562306a36Sopenharmony_ci vq->heads = NULL; 53662306a36Sopenharmony_ci vq->dev = dev; 53762306a36Sopenharmony_ci mutex_init(&vq->mutex); 53862306a36Sopenharmony_ci vhost_vq_reset(dev, vq); 53962306a36Sopenharmony_ci if (vq->handle_kick) 54062306a36Sopenharmony_ci vhost_poll_init(&vq->poll, vq->handle_kick, 54162306a36Sopenharmony_ci EPOLLIN, dev, vq); 54262306a36Sopenharmony_ci } 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_init); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci/* Caller should have device mutex */ 54762306a36Sopenharmony_cilong vhost_dev_check_owner(struct vhost_dev *dev) 54862306a36Sopenharmony_ci{ 54962306a36Sopenharmony_ci /* Are you the owner? If not, I don't think you mean to do that */ 55062306a36Sopenharmony_ci return dev->mm == current->mm ? 0 : -EPERM; 55162306a36Sopenharmony_ci} 55262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_check_owner); 55362306a36Sopenharmony_ci 55462306a36Sopenharmony_ci/* Caller should have device mutex */ 55562306a36Sopenharmony_cibool vhost_dev_has_owner(struct vhost_dev *dev) 55662306a36Sopenharmony_ci{ 55762306a36Sopenharmony_ci return dev->mm; 55862306a36Sopenharmony_ci} 55962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_has_owner); 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_cistatic void vhost_attach_mm(struct vhost_dev *dev) 56262306a36Sopenharmony_ci{ 56362306a36Sopenharmony_ci /* No owner, become one */ 56462306a36Sopenharmony_ci if (dev->use_worker) { 56562306a36Sopenharmony_ci dev->mm = get_task_mm(current); 56662306a36Sopenharmony_ci } else { 56762306a36Sopenharmony_ci /* vDPA device does not use worker thead, so there's 56862306a36Sopenharmony_ci * no need to hold the address space for mm. This help 56962306a36Sopenharmony_ci * to avoid deadlock in the case of mmap() which may 57062306a36Sopenharmony_ci * held the refcnt of the file and depends on release 57162306a36Sopenharmony_ci * method to remove vma. 57262306a36Sopenharmony_ci */ 57362306a36Sopenharmony_ci dev->mm = current->mm; 57462306a36Sopenharmony_ci mmgrab(dev->mm); 57562306a36Sopenharmony_ci } 57662306a36Sopenharmony_ci} 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_cistatic void vhost_detach_mm(struct vhost_dev *dev) 57962306a36Sopenharmony_ci{ 58062306a36Sopenharmony_ci if (!dev->mm) 58162306a36Sopenharmony_ci return; 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci if (dev->use_worker) 58462306a36Sopenharmony_ci mmput(dev->mm); 58562306a36Sopenharmony_ci else 58662306a36Sopenharmony_ci mmdrop(dev->mm); 58762306a36Sopenharmony_ci 58862306a36Sopenharmony_ci dev->mm = NULL; 58962306a36Sopenharmony_ci} 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_cistatic void vhost_worker_destroy(struct vhost_dev *dev, 59262306a36Sopenharmony_ci struct vhost_worker *worker) 59362306a36Sopenharmony_ci{ 59462306a36Sopenharmony_ci if (!worker) 59562306a36Sopenharmony_ci return; 59662306a36Sopenharmony_ci 59762306a36Sopenharmony_ci WARN_ON(!llist_empty(&worker->work_list)); 59862306a36Sopenharmony_ci xa_erase(&dev->worker_xa, worker->id); 59962306a36Sopenharmony_ci vhost_task_stop(worker->vtsk); 60062306a36Sopenharmony_ci kfree(worker); 60162306a36Sopenharmony_ci} 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_cistatic void vhost_workers_free(struct vhost_dev *dev) 60462306a36Sopenharmony_ci{ 60562306a36Sopenharmony_ci struct vhost_worker *worker; 60662306a36Sopenharmony_ci unsigned long i; 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci if (!dev->use_worker) 60962306a36Sopenharmony_ci return; 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; i++) 61262306a36Sopenharmony_ci rcu_assign_pointer(dev->vqs[i]->worker, NULL); 61362306a36Sopenharmony_ci /* 61462306a36Sopenharmony_ci * Free the default worker we created and cleanup workers userspace 61562306a36Sopenharmony_ci * created but couldn't clean up (it forgot or crashed). 61662306a36Sopenharmony_ci */ 61762306a36Sopenharmony_ci xa_for_each(&dev->worker_xa, i, worker) 61862306a36Sopenharmony_ci vhost_worker_destroy(dev, worker); 61962306a36Sopenharmony_ci xa_destroy(&dev->worker_xa); 62062306a36Sopenharmony_ci} 62162306a36Sopenharmony_ci 62262306a36Sopenharmony_cistatic struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) 62362306a36Sopenharmony_ci{ 62462306a36Sopenharmony_ci struct vhost_worker *worker; 62562306a36Sopenharmony_ci struct vhost_task *vtsk; 62662306a36Sopenharmony_ci char name[TASK_COMM_LEN]; 62762306a36Sopenharmony_ci int ret; 62862306a36Sopenharmony_ci u32 id; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT); 63162306a36Sopenharmony_ci if (!worker) 63262306a36Sopenharmony_ci return NULL; 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_ci snprintf(name, sizeof(name), "vhost-%d", current->pid); 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci vtsk = vhost_task_create(vhost_worker, worker, name); 63762306a36Sopenharmony_ci if (!vtsk) 63862306a36Sopenharmony_ci goto free_worker; 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci mutex_init(&worker->mutex); 64162306a36Sopenharmony_ci init_llist_head(&worker->work_list); 64262306a36Sopenharmony_ci worker->kcov_handle = kcov_common_handle(); 64362306a36Sopenharmony_ci worker->vtsk = vtsk; 64462306a36Sopenharmony_ci 64562306a36Sopenharmony_ci vhost_task_start(vtsk); 64662306a36Sopenharmony_ci 64762306a36Sopenharmony_ci ret = xa_alloc(&dev->worker_xa, &id, worker, xa_limit_32b, GFP_KERNEL); 64862306a36Sopenharmony_ci if (ret < 0) 64962306a36Sopenharmony_ci goto stop_worker; 65062306a36Sopenharmony_ci worker->id = id; 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci return worker; 65362306a36Sopenharmony_ci 65462306a36Sopenharmony_cistop_worker: 65562306a36Sopenharmony_ci vhost_task_stop(vtsk); 65662306a36Sopenharmony_cifree_worker: 65762306a36Sopenharmony_ci kfree(worker); 65862306a36Sopenharmony_ci return NULL; 65962306a36Sopenharmony_ci} 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci/* Caller must have device mutex */ 66262306a36Sopenharmony_cistatic void __vhost_vq_attach_worker(struct vhost_virtqueue *vq, 66362306a36Sopenharmony_ci struct vhost_worker *worker) 66462306a36Sopenharmony_ci{ 66562306a36Sopenharmony_ci struct vhost_worker *old_worker; 66662306a36Sopenharmony_ci 66762306a36Sopenharmony_ci old_worker = rcu_dereference_check(vq->worker, 66862306a36Sopenharmony_ci lockdep_is_held(&vq->dev->mutex)); 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci mutex_lock(&worker->mutex); 67162306a36Sopenharmony_ci worker->attachment_cnt++; 67262306a36Sopenharmony_ci mutex_unlock(&worker->mutex); 67362306a36Sopenharmony_ci rcu_assign_pointer(vq->worker, worker); 67462306a36Sopenharmony_ci 67562306a36Sopenharmony_ci if (!old_worker) 67662306a36Sopenharmony_ci return; 67762306a36Sopenharmony_ci /* 67862306a36Sopenharmony_ci * Take the worker mutex to make sure we see the work queued from 67962306a36Sopenharmony_ci * device wide flushes which doesn't use RCU for execution. 68062306a36Sopenharmony_ci */ 68162306a36Sopenharmony_ci mutex_lock(&old_worker->mutex); 68262306a36Sopenharmony_ci old_worker->attachment_cnt--; 68362306a36Sopenharmony_ci /* 68462306a36Sopenharmony_ci * We don't want to call synchronize_rcu for every vq during setup 68562306a36Sopenharmony_ci * because it will slow down VM startup. If we haven't done 68662306a36Sopenharmony_ci * VHOST_SET_VRING_KICK and not done the driver specific 68762306a36Sopenharmony_ci * SET_ENDPOINT/RUNNUNG then we can skip the sync since there will 68862306a36Sopenharmony_ci * not be any works queued for scsi and net. 68962306a36Sopenharmony_ci */ 69062306a36Sopenharmony_ci mutex_lock(&vq->mutex); 69162306a36Sopenharmony_ci if (!vhost_vq_get_backend(vq) && !vq->kick) { 69262306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 69362306a36Sopenharmony_ci mutex_unlock(&old_worker->mutex); 69462306a36Sopenharmony_ci /* 69562306a36Sopenharmony_ci * vsock can queue anytime after VHOST_VSOCK_SET_GUEST_CID. 69662306a36Sopenharmony_ci * Warn if it adds support for multiple workers but forgets to 69762306a36Sopenharmony_ci * handle the early queueing case. 69862306a36Sopenharmony_ci */ 69962306a36Sopenharmony_ci WARN_ON(!old_worker->attachment_cnt && 70062306a36Sopenharmony_ci !llist_empty(&old_worker->work_list)); 70162306a36Sopenharmony_ci return; 70262306a36Sopenharmony_ci } 70362306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 70462306a36Sopenharmony_ci 70562306a36Sopenharmony_ci /* Make sure new vq queue/flush/poll calls see the new worker */ 70662306a36Sopenharmony_ci synchronize_rcu(); 70762306a36Sopenharmony_ci /* Make sure whatever was queued gets run */ 70862306a36Sopenharmony_ci vhost_worker_flush(old_worker); 70962306a36Sopenharmony_ci mutex_unlock(&old_worker->mutex); 71062306a36Sopenharmony_ci} 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci /* Caller must have device mutex */ 71362306a36Sopenharmony_cistatic int vhost_vq_attach_worker(struct vhost_virtqueue *vq, 71462306a36Sopenharmony_ci struct vhost_vring_worker *info) 71562306a36Sopenharmony_ci{ 71662306a36Sopenharmony_ci unsigned long index = info->worker_id; 71762306a36Sopenharmony_ci struct vhost_dev *dev = vq->dev; 71862306a36Sopenharmony_ci struct vhost_worker *worker; 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci if (!dev->use_worker) 72162306a36Sopenharmony_ci return -EINVAL; 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); 72462306a36Sopenharmony_ci if (!worker || worker->id != info->worker_id) 72562306a36Sopenharmony_ci return -ENODEV; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci __vhost_vq_attach_worker(vq, worker); 72862306a36Sopenharmony_ci return 0; 72962306a36Sopenharmony_ci} 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci/* Caller must have device mutex */ 73262306a36Sopenharmony_cistatic int vhost_new_worker(struct vhost_dev *dev, 73362306a36Sopenharmony_ci struct vhost_worker_state *info) 73462306a36Sopenharmony_ci{ 73562306a36Sopenharmony_ci struct vhost_worker *worker; 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci worker = vhost_worker_create(dev); 73862306a36Sopenharmony_ci if (!worker) 73962306a36Sopenharmony_ci return -ENOMEM; 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci info->worker_id = worker->id; 74262306a36Sopenharmony_ci return 0; 74362306a36Sopenharmony_ci} 74462306a36Sopenharmony_ci 74562306a36Sopenharmony_ci/* Caller must have device mutex */ 74662306a36Sopenharmony_cistatic int vhost_free_worker(struct vhost_dev *dev, 74762306a36Sopenharmony_ci struct vhost_worker_state *info) 74862306a36Sopenharmony_ci{ 74962306a36Sopenharmony_ci unsigned long index = info->worker_id; 75062306a36Sopenharmony_ci struct vhost_worker *worker; 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci worker = xa_find(&dev->worker_xa, &index, UINT_MAX, XA_PRESENT); 75362306a36Sopenharmony_ci if (!worker || worker->id != info->worker_id) 75462306a36Sopenharmony_ci return -ENODEV; 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci mutex_lock(&worker->mutex); 75762306a36Sopenharmony_ci if (worker->attachment_cnt) { 75862306a36Sopenharmony_ci mutex_unlock(&worker->mutex); 75962306a36Sopenharmony_ci return -EBUSY; 76062306a36Sopenharmony_ci } 76162306a36Sopenharmony_ci mutex_unlock(&worker->mutex); 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci vhost_worker_destroy(dev, worker); 76462306a36Sopenharmony_ci return 0; 76562306a36Sopenharmony_ci} 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_cistatic int vhost_get_vq_from_user(struct vhost_dev *dev, void __user *argp, 76862306a36Sopenharmony_ci struct vhost_virtqueue **vq, u32 *id) 76962306a36Sopenharmony_ci{ 77062306a36Sopenharmony_ci u32 __user *idxp = argp; 77162306a36Sopenharmony_ci u32 idx; 77262306a36Sopenharmony_ci long r; 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci r = get_user(idx, idxp); 77562306a36Sopenharmony_ci if (r < 0) 77662306a36Sopenharmony_ci return r; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci if (idx >= dev->nvqs) 77962306a36Sopenharmony_ci return -ENOBUFS; 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci idx = array_index_nospec(idx, dev->nvqs); 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci *vq = dev->vqs[idx]; 78462306a36Sopenharmony_ci *id = idx; 78562306a36Sopenharmony_ci return 0; 78662306a36Sopenharmony_ci} 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci/* Caller must have device mutex */ 78962306a36Sopenharmony_cilong vhost_worker_ioctl(struct vhost_dev *dev, unsigned int ioctl, 79062306a36Sopenharmony_ci void __user *argp) 79162306a36Sopenharmony_ci{ 79262306a36Sopenharmony_ci struct vhost_vring_worker ring_worker; 79362306a36Sopenharmony_ci struct vhost_worker_state state; 79462306a36Sopenharmony_ci struct vhost_worker *worker; 79562306a36Sopenharmony_ci struct vhost_virtqueue *vq; 79662306a36Sopenharmony_ci long ret; 79762306a36Sopenharmony_ci u32 idx; 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_ci if (!dev->use_worker) 80062306a36Sopenharmony_ci return -EINVAL; 80162306a36Sopenharmony_ci 80262306a36Sopenharmony_ci if (!vhost_dev_has_owner(dev)) 80362306a36Sopenharmony_ci return -EINVAL; 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci ret = vhost_dev_check_owner(dev); 80662306a36Sopenharmony_ci if (ret) 80762306a36Sopenharmony_ci return ret; 80862306a36Sopenharmony_ci 80962306a36Sopenharmony_ci switch (ioctl) { 81062306a36Sopenharmony_ci /* dev worker ioctls */ 81162306a36Sopenharmony_ci case VHOST_NEW_WORKER: 81262306a36Sopenharmony_ci ret = vhost_new_worker(dev, &state); 81362306a36Sopenharmony_ci if (!ret && copy_to_user(argp, &state, sizeof(state))) 81462306a36Sopenharmony_ci ret = -EFAULT; 81562306a36Sopenharmony_ci return ret; 81662306a36Sopenharmony_ci case VHOST_FREE_WORKER: 81762306a36Sopenharmony_ci if (copy_from_user(&state, argp, sizeof(state))) 81862306a36Sopenharmony_ci return -EFAULT; 81962306a36Sopenharmony_ci return vhost_free_worker(dev, &state); 82062306a36Sopenharmony_ci /* vring worker ioctls */ 82162306a36Sopenharmony_ci case VHOST_ATTACH_VRING_WORKER: 82262306a36Sopenharmony_ci case VHOST_GET_VRING_WORKER: 82362306a36Sopenharmony_ci break; 82462306a36Sopenharmony_ci default: 82562306a36Sopenharmony_ci return -ENOIOCTLCMD; 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci 82862306a36Sopenharmony_ci ret = vhost_get_vq_from_user(dev, argp, &vq, &idx); 82962306a36Sopenharmony_ci if (ret) 83062306a36Sopenharmony_ci return ret; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci switch (ioctl) { 83362306a36Sopenharmony_ci case VHOST_ATTACH_VRING_WORKER: 83462306a36Sopenharmony_ci if (copy_from_user(&ring_worker, argp, sizeof(ring_worker))) { 83562306a36Sopenharmony_ci ret = -EFAULT; 83662306a36Sopenharmony_ci break; 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci ret = vhost_vq_attach_worker(vq, &ring_worker); 84062306a36Sopenharmony_ci break; 84162306a36Sopenharmony_ci case VHOST_GET_VRING_WORKER: 84262306a36Sopenharmony_ci worker = rcu_dereference_check(vq->worker, 84362306a36Sopenharmony_ci lockdep_is_held(&dev->mutex)); 84462306a36Sopenharmony_ci if (!worker) { 84562306a36Sopenharmony_ci ret = -EINVAL; 84662306a36Sopenharmony_ci break; 84762306a36Sopenharmony_ci } 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci ring_worker.index = idx; 85062306a36Sopenharmony_ci ring_worker.worker_id = worker->id; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci if (copy_to_user(argp, &ring_worker, sizeof(ring_worker))) 85362306a36Sopenharmony_ci ret = -EFAULT; 85462306a36Sopenharmony_ci break; 85562306a36Sopenharmony_ci default: 85662306a36Sopenharmony_ci ret = -ENOIOCTLCMD; 85762306a36Sopenharmony_ci break; 85862306a36Sopenharmony_ci } 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci return ret; 86162306a36Sopenharmony_ci} 86262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_worker_ioctl); 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_ci/* Caller should have device mutex */ 86562306a36Sopenharmony_cilong vhost_dev_set_owner(struct vhost_dev *dev) 86662306a36Sopenharmony_ci{ 86762306a36Sopenharmony_ci struct vhost_worker *worker; 86862306a36Sopenharmony_ci int err, i; 86962306a36Sopenharmony_ci 87062306a36Sopenharmony_ci /* Is there an owner already? */ 87162306a36Sopenharmony_ci if (vhost_dev_has_owner(dev)) { 87262306a36Sopenharmony_ci err = -EBUSY; 87362306a36Sopenharmony_ci goto err_mm; 87462306a36Sopenharmony_ci } 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci vhost_attach_mm(dev); 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci err = vhost_dev_alloc_iovecs(dev); 87962306a36Sopenharmony_ci if (err) 88062306a36Sopenharmony_ci goto err_iovecs; 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci if (dev->use_worker) { 88362306a36Sopenharmony_ci /* 88462306a36Sopenharmony_ci * This should be done last, because vsock can queue work 88562306a36Sopenharmony_ci * before VHOST_SET_OWNER so it simplifies the failure path 88662306a36Sopenharmony_ci * below since we don't have to worry about vsock queueing 88762306a36Sopenharmony_ci * while we free the worker. 88862306a36Sopenharmony_ci */ 88962306a36Sopenharmony_ci worker = vhost_worker_create(dev); 89062306a36Sopenharmony_ci if (!worker) { 89162306a36Sopenharmony_ci err = -ENOMEM; 89262306a36Sopenharmony_ci goto err_worker; 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; i++) 89662306a36Sopenharmony_ci __vhost_vq_attach_worker(dev->vqs[i], worker); 89762306a36Sopenharmony_ci } 89862306a36Sopenharmony_ci 89962306a36Sopenharmony_ci return 0; 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_cierr_worker: 90262306a36Sopenharmony_ci vhost_dev_free_iovecs(dev); 90362306a36Sopenharmony_cierr_iovecs: 90462306a36Sopenharmony_ci vhost_detach_mm(dev); 90562306a36Sopenharmony_cierr_mm: 90662306a36Sopenharmony_ci return err; 90762306a36Sopenharmony_ci} 90862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_set_owner); 90962306a36Sopenharmony_ci 91062306a36Sopenharmony_cistatic struct vhost_iotlb *iotlb_alloc(void) 91162306a36Sopenharmony_ci{ 91262306a36Sopenharmony_ci return vhost_iotlb_alloc(max_iotlb_entries, 91362306a36Sopenharmony_ci VHOST_IOTLB_FLAG_RETIRE); 91462306a36Sopenharmony_ci} 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_cistruct vhost_iotlb *vhost_dev_reset_owner_prepare(void) 91762306a36Sopenharmony_ci{ 91862306a36Sopenharmony_ci return iotlb_alloc(); 91962306a36Sopenharmony_ci} 92062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_reset_owner_prepare); 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_ci/* Caller should have device mutex */ 92362306a36Sopenharmony_civoid vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_iotlb *umem) 92462306a36Sopenharmony_ci{ 92562306a36Sopenharmony_ci int i; 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci vhost_dev_cleanup(dev); 92862306a36Sopenharmony_ci 92962306a36Sopenharmony_ci dev->umem = umem; 93062306a36Sopenharmony_ci /* We don't need VQ locks below since vhost_dev_cleanup makes sure 93162306a36Sopenharmony_ci * VQs aren't running. 93262306a36Sopenharmony_ci */ 93362306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) 93462306a36Sopenharmony_ci dev->vqs[i]->umem = umem; 93562306a36Sopenharmony_ci} 93662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_reset_owner); 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_civoid vhost_dev_stop(struct vhost_dev *dev) 93962306a36Sopenharmony_ci{ 94062306a36Sopenharmony_ci int i; 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 94362306a36Sopenharmony_ci if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) 94462306a36Sopenharmony_ci vhost_poll_stop(&dev->vqs[i]->poll); 94562306a36Sopenharmony_ci } 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_ci vhost_dev_flush(dev); 94862306a36Sopenharmony_ci} 94962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_stop); 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_civoid vhost_clear_msg(struct vhost_dev *dev) 95262306a36Sopenharmony_ci{ 95362306a36Sopenharmony_ci struct vhost_msg_node *node, *n; 95462306a36Sopenharmony_ci 95562306a36Sopenharmony_ci spin_lock(&dev->iotlb_lock); 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci list_for_each_entry_safe(node, n, &dev->read_list, node) { 95862306a36Sopenharmony_ci list_del(&node->node); 95962306a36Sopenharmony_ci kfree(node); 96062306a36Sopenharmony_ci } 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci list_for_each_entry_safe(node, n, &dev->pending_list, node) { 96362306a36Sopenharmony_ci list_del(&node->node); 96462306a36Sopenharmony_ci kfree(node); 96562306a36Sopenharmony_ci } 96662306a36Sopenharmony_ci 96762306a36Sopenharmony_ci spin_unlock(&dev->iotlb_lock); 96862306a36Sopenharmony_ci} 96962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_clear_msg); 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_civoid vhost_dev_cleanup(struct vhost_dev *dev) 97262306a36Sopenharmony_ci{ 97362306a36Sopenharmony_ci int i; 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 97662306a36Sopenharmony_ci if (dev->vqs[i]->error_ctx) 97762306a36Sopenharmony_ci eventfd_ctx_put(dev->vqs[i]->error_ctx); 97862306a36Sopenharmony_ci if (dev->vqs[i]->kick) 97962306a36Sopenharmony_ci fput(dev->vqs[i]->kick); 98062306a36Sopenharmony_ci if (dev->vqs[i]->call_ctx.ctx) 98162306a36Sopenharmony_ci eventfd_ctx_put(dev->vqs[i]->call_ctx.ctx); 98262306a36Sopenharmony_ci vhost_vq_reset(dev, dev->vqs[i]); 98362306a36Sopenharmony_ci } 98462306a36Sopenharmony_ci vhost_dev_free_iovecs(dev); 98562306a36Sopenharmony_ci if (dev->log_ctx) 98662306a36Sopenharmony_ci eventfd_ctx_put(dev->log_ctx); 98762306a36Sopenharmony_ci dev->log_ctx = NULL; 98862306a36Sopenharmony_ci /* No one will access memory at this point */ 98962306a36Sopenharmony_ci vhost_iotlb_free(dev->umem); 99062306a36Sopenharmony_ci dev->umem = NULL; 99162306a36Sopenharmony_ci vhost_iotlb_free(dev->iotlb); 99262306a36Sopenharmony_ci dev->iotlb = NULL; 99362306a36Sopenharmony_ci vhost_clear_msg(dev); 99462306a36Sopenharmony_ci wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); 99562306a36Sopenharmony_ci vhost_workers_free(dev); 99662306a36Sopenharmony_ci vhost_detach_mm(dev); 99762306a36Sopenharmony_ci} 99862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_cleanup); 99962306a36Sopenharmony_ci 100062306a36Sopenharmony_cistatic bool log_access_ok(void __user *log_base, u64 addr, unsigned long sz) 100162306a36Sopenharmony_ci{ 100262306a36Sopenharmony_ci u64 a = addr / VHOST_PAGE_SIZE / 8; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci /* Make sure 64 bit math will not overflow. */ 100562306a36Sopenharmony_ci if (a > ULONG_MAX - (unsigned long)log_base || 100662306a36Sopenharmony_ci a + (unsigned long)log_base > ULONG_MAX) 100762306a36Sopenharmony_ci return false; 100862306a36Sopenharmony_ci 100962306a36Sopenharmony_ci return access_ok(log_base + a, 101062306a36Sopenharmony_ci (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8); 101162306a36Sopenharmony_ci} 101262306a36Sopenharmony_ci 101362306a36Sopenharmony_ci/* Make sure 64 bit math will not overflow. */ 101462306a36Sopenharmony_cistatic bool vhost_overflow(u64 uaddr, u64 size) 101562306a36Sopenharmony_ci{ 101662306a36Sopenharmony_ci if (uaddr > ULONG_MAX || size > ULONG_MAX) 101762306a36Sopenharmony_ci return true; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci if (!size) 102062306a36Sopenharmony_ci return false; 102162306a36Sopenharmony_ci 102262306a36Sopenharmony_ci return uaddr > ULONG_MAX - size + 1; 102362306a36Sopenharmony_ci} 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_ci/* Caller should have vq mutex and device mutex. */ 102662306a36Sopenharmony_cistatic bool vq_memory_access_ok(void __user *log_base, struct vhost_iotlb *umem, 102762306a36Sopenharmony_ci int log_all) 102862306a36Sopenharmony_ci{ 102962306a36Sopenharmony_ci struct vhost_iotlb_map *map; 103062306a36Sopenharmony_ci 103162306a36Sopenharmony_ci if (!umem) 103262306a36Sopenharmony_ci return false; 103362306a36Sopenharmony_ci 103462306a36Sopenharmony_ci list_for_each_entry(map, &umem->list, link) { 103562306a36Sopenharmony_ci unsigned long a = map->addr; 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci if (vhost_overflow(map->addr, map->size)) 103862306a36Sopenharmony_ci return false; 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci if (!access_ok((void __user *)a, map->size)) 104262306a36Sopenharmony_ci return false; 104362306a36Sopenharmony_ci else if (log_all && !log_access_ok(log_base, 104462306a36Sopenharmony_ci map->start, 104562306a36Sopenharmony_ci map->size)) 104662306a36Sopenharmony_ci return false; 104762306a36Sopenharmony_ci } 104862306a36Sopenharmony_ci return true; 104962306a36Sopenharmony_ci} 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_cistatic inline void __user *vhost_vq_meta_fetch(struct vhost_virtqueue *vq, 105262306a36Sopenharmony_ci u64 addr, unsigned int size, 105362306a36Sopenharmony_ci int type) 105462306a36Sopenharmony_ci{ 105562306a36Sopenharmony_ci const struct vhost_iotlb_map *map = vq->meta_iotlb[type]; 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ci if (!map) 105862306a36Sopenharmony_ci return NULL; 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci return (void __user *)(uintptr_t)(map->addr + addr - map->start); 106162306a36Sopenharmony_ci} 106262306a36Sopenharmony_ci 106362306a36Sopenharmony_ci/* Can we switch to this memory table? */ 106462306a36Sopenharmony_ci/* Caller should have device mutex but not vq mutex */ 106562306a36Sopenharmony_cistatic bool memory_access_ok(struct vhost_dev *d, struct vhost_iotlb *umem, 106662306a36Sopenharmony_ci int log_all) 106762306a36Sopenharmony_ci{ 106862306a36Sopenharmony_ci int i; 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 107162306a36Sopenharmony_ci bool ok; 107262306a36Sopenharmony_ci bool log; 107362306a36Sopenharmony_ci 107462306a36Sopenharmony_ci mutex_lock(&d->vqs[i]->mutex); 107562306a36Sopenharmony_ci log = log_all || vhost_has_feature(d->vqs[i], VHOST_F_LOG_ALL); 107662306a36Sopenharmony_ci /* If ring is inactive, will check when it's enabled. */ 107762306a36Sopenharmony_ci if (d->vqs[i]->private_data) 107862306a36Sopenharmony_ci ok = vq_memory_access_ok(d->vqs[i]->log_base, 107962306a36Sopenharmony_ci umem, log); 108062306a36Sopenharmony_ci else 108162306a36Sopenharmony_ci ok = true; 108262306a36Sopenharmony_ci mutex_unlock(&d->vqs[i]->mutex); 108362306a36Sopenharmony_ci if (!ok) 108462306a36Sopenharmony_ci return false; 108562306a36Sopenharmony_ci } 108662306a36Sopenharmony_ci return true; 108762306a36Sopenharmony_ci} 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_cistatic int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, 109062306a36Sopenharmony_ci struct iovec iov[], int iov_size, int access); 109162306a36Sopenharmony_ci 109262306a36Sopenharmony_cistatic int vhost_copy_to_user(struct vhost_virtqueue *vq, void __user *to, 109362306a36Sopenharmony_ci const void *from, unsigned size) 109462306a36Sopenharmony_ci{ 109562306a36Sopenharmony_ci int ret; 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_ci if (!vq->iotlb) 109862306a36Sopenharmony_ci return __copy_to_user(to, from, size); 109962306a36Sopenharmony_ci else { 110062306a36Sopenharmony_ci /* This function should be called after iotlb 110162306a36Sopenharmony_ci * prefetch, which means we're sure that all vq 110262306a36Sopenharmony_ci * could be access through iotlb. So -EAGAIN should 110362306a36Sopenharmony_ci * not happen in this case. 110462306a36Sopenharmony_ci */ 110562306a36Sopenharmony_ci struct iov_iter t; 110662306a36Sopenharmony_ci void __user *uaddr = vhost_vq_meta_fetch(vq, 110762306a36Sopenharmony_ci (u64)(uintptr_t)to, size, 110862306a36Sopenharmony_ci VHOST_ADDR_USED); 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci if (uaddr) 111162306a36Sopenharmony_ci return __copy_to_user(uaddr, from, size); 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci ret = translate_desc(vq, (u64)(uintptr_t)to, size, vq->iotlb_iov, 111462306a36Sopenharmony_ci ARRAY_SIZE(vq->iotlb_iov), 111562306a36Sopenharmony_ci VHOST_ACCESS_WO); 111662306a36Sopenharmony_ci if (ret < 0) 111762306a36Sopenharmony_ci goto out; 111862306a36Sopenharmony_ci iov_iter_init(&t, ITER_DEST, vq->iotlb_iov, ret, size); 111962306a36Sopenharmony_ci ret = copy_to_iter(from, size, &t); 112062306a36Sopenharmony_ci if (ret == size) 112162306a36Sopenharmony_ci ret = 0; 112262306a36Sopenharmony_ci } 112362306a36Sopenharmony_ciout: 112462306a36Sopenharmony_ci return ret; 112562306a36Sopenharmony_ci} 112662306a36Sopenharmony_ci 112762306a36Sopenharmony_cistatic int vhost_copy_from_user(struct vhost_virtqueue *vq, void *to, 112862306a36Sopenharmony_ci void __user *from, unsigned size) 112962306a36Sopenharmony_ci{ 113062306a36Sopenharmony_ci int ret; 113162306a36Sopenharmony_ci 113262306a36Sopenharmony_ci if (!vq->iotlb) 113362306a36Sopenharmony_ci return __copy_from_user(to, from, size); 113462306a36Sopenharmony_ci else { 113562306a36Sopenharmony_ci /* This function should be called after iotlb 113662306a36Sopenharmony_ci * prefetch, which means we're sure that vq 113762306a36Sopenharmony_ci * could be access through iotlb. So -EAGAIN should 113862306a36Sopenharmony_ci * not happen in this case. 113962306a36Sopenharmony_ci */ 114062306a36Sopenharmony_ci void __user *uaddr = vhost_vq_meta_fetch(vq, 114162306a36Sopenharmony_ci (u64)(uintptr_t)from, size, 114262306a36Sopenharmony_ci VHOST_ADDR_DESC); 114362306a36Sopenharmony_ci struct iov_iter f; 114462306a36Sopenharmony_ci 114562306a36Sopenharmony_ci if (uaddr) 114662306a36Sopenharmony_ci return __copy_from_user(to, uaddr, size); 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci ret = translate_desc(vq, (u64)(uintptr_t)from, size, vq->iotlb_iov, 114962306a36Sopenharmony_ci ARRAY_SIZE(vq->iotlb_iov), 115062306a36Sopenharmony_ci VHOST_ACCESS_RO); 115162306a36Sopenharmony_ci if (ret < 0) { 115262306a36Sopenharmony_ci vq_err(vq, "IOTLB translation failure: uaddr " 115362306a36Sopenharmony_ci "%p size 0x%llx\n", from, 115462306a36Sopenharmony_ci (unsigned long long) size); 115562306a36Sopenharmony_ci goto out; 115662306a36Sopenharmony_ci } 115762306a36Sopenharmony_ci iov_iter_init(&f, ITER_SOURCE, vq->iotlb_iov, ret, size); 115862306a36Sopenharmony_ci ret = copy_from_iter(to, size, &f); 115962306a36Sopenharmony_ci if (ret == size) 116062306a36Sopenharmony_ci ret = 0; 116162306a36Sopenharmony_ci } 116262306a36Sopenharmony_ci 116362306a36Sopenharmony_ciout: 116462306a36Sopenharmony_ci return ret; 116562306a36Sopenharmony_ci} 116662306a36Sopenharmony_ci 116762306a36Sopenharmony_cistatic void __user *__vhost_get_user_slow(struct vhost_virtqueue *vq, 116862306a36Sopenharmony_ci void __user *addr, unsigned int size, 116962306a36Sopenharmony_ci int type) 117062306a36Sopenharmony_ci{ 117162306a36Sopenharmony_ci int ret; 117262306a36Sopenharmony_ci 117362306a36Sopenharmony_ci ret = translate_desc(vq, (u64)(uintptr_t)addr, size, vq->iotlb_iov, 117462306a36Sopenharmony_ci ARRAY_SIZE(vq->iotlb_iov), 117562306a36Sopenharmony_ci VHOST_ACCESS_RO); 117662306a36Sopenharmony_ci if (ret < 0) { 117762306a36Sopenharmony_ci vq_err(vq, "IOTLB translation failure: uaddr " 117862306a36Sopenharmony_ci "%p size 0x%llx\n", addr, 117962306a36Sopenharmony_ci (unsigned long long) size); 118062306a36Sopenharmony_ci return NULL; 118162306a36Sopenharmony_ci } 118262306a36Sopenharmony_ci 118362306a36Sopenharmony_ci if (ret != 1 || vq->iotlb_iov[0].iov_len != size) { 118462306a36Sopenharmony_ci vq_err(vq, "Non atomic userspace memory access: uaddr " 118562306a36Sopenharmony_ci "%p size 0x%llx\n", addr, 118662306a36Sopenharmony_ci (unsigned long long) size); 118762306a36Sopenharmony_ci return NULL; 118862306a36Sopenharmony_ci } 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_ci return vq->iotlb_iov[0].iov_base; 119162306a36Sopenharmony_ci} 119262306a36Sopenharmony_ci 119362306a36Sopenharmony_ci/* This function should be called after iotlb 119462306a36Sopenharmony_ci * prefetch, which means we're sure that vq 119562306a36Sopenharmony_ci * could be access through iotlb. So -EAGAIN should 119662306a36Sopenharmony_ci * not happen in this case. 119762306a36Sopenharmony_ci */ 119862306a36Sopenharmony_cistatic inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, 119962306a36Sopenharmony_ci void __user *addr, unsigned int size, 120062306a36Sopenharmony_ci int type) 120162306a36Sopenharmony_ci{ 120262306a36Sopenharmony_ci void __user *uaddr = vhost_vq_meta_fetch(vq, 120362306a36Sopenharmony_ci (u64)(uintptr_t)addr, size, type); 120462306a36Sopenharmony_ci if (uaddr) 120562306a36Sopenharmony_ci return uaddr; 120662306a36Sopenharmony_ci 120762306a36Sopenharmony_ci return __vhost_get_user_slow(vq, addr, size, type); 120862306a36Sopenharmony_ci} 120962306a36Sopenharmony_ci 121062306a36Sopenharmony_ci#define vhost_put_user(vq, x, ptr) \ 121162306a36Sopenharmony_ci({ \ 121262306a36Sopenharmony_ci int ret; \ 121362306a36Sopenharmony_ci if (!vq->iotlb) { \ 121462306a36Sopenharmony_ci ret = __put_user(x, ptr); \ 121562306a36Sopenharmony_ci } else { \ 121662306a36Sopenharmony_ci __typeof__(ptr) to = \ 121762306a36Sopenharmony_ci (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ 121862306a36Sopenharmony_ci sizeof(*ptr), VHOST_ADDR_USED); \ 121962306a36Sopenharmony_ci if (to != NULL) \ 122062306a36Sopenharmony_ci ret = __put_user(x, to); \ 122162306a36Sopenharmony_ci else \ 122262306a36Sopenharmony_ci ret = -EFAULT; \ 122362306a36Sopenharmony_ci } \ 122462306a36Sopenharmony_ci ret; \ 122562306a36Sopenharmony_ci}) 122662306a36Sopenharmony_ci 122762306a36Sopenharmony_cistatic inline int vhost_put_avail_event(struct vhost_virtqueue *vq) 122862306a36Sopenharmony_ci{ 122962306a36Sopenharmony_ci return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), 123062306a36Sopenharmony_ci vhost_avail_event(vq)); 123162306a36Sopenharmony_ci} 123262306a36Sopenharmony_ci 123362306a36Sopenharmony_cistatic inline int vhost_put_used(struct vhost_virtqueue *vq, 123462306a36Sopenharmony_ci struct vring_used_elem *head, int idx, 123562306a36Sopenharmony_ci int count) 123662306a36Sopenharmony_ci{ 123762306a36Sopenharmony_ci return vhost_copy_to_user(vq, vq->used->ring + idx, head, 123862306a36Sopenharmony_ci count * sizeof(*head)); 123962306a36Sopenharmony_ci} 124062306a36Sopenharmony_ci 124162306a36Sopenharmony_cistatic inline int vhost_put_used_flags(struct vhost_virtqueue *vq) 124262306a36Sopenharmony_ci 124362306a36Sopenharmony_ci{ 124462306a36Sopenharmony_ci return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), 124562306a36Sopenharmony_ci &vq->used->flags); 124662306a36Sopenharmony_ci} 124762306a36Sopenharmony_ci 124862306a36Sopenharmony_cistatic inline int vhost_put_used_idx(struct vhost_virtqueue *vq) 124962306a36Sopenharmony_ci 125062306a36Sopenharmony_ci{ 125162306a36Sopenharmony_ci return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), 125262306a36Sopenharmony_ci &vq->used->idx); 125362306a36Sopenharmony_ci} 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_ci#define vhost_get_user(vq, x, ptr, type) \ 125662306a36Sopenharmony_ci({ \ 125762306a36Sopenharmony_ci int ret; \ 125862306a36Sopenharmony_ci if (!vq->iotlb) { \ 125962306a36Sopenharmony_ci ret = __get_user(x, ptr); \ 126062306a36Sopenharmony_ci } else { \ 126162306a36Sopenharmony_ci __typeof__(ptr) from = \ 126262306a36Sopenharmony_ci (__typeof__(ptr)) __vhost_get_user(vq, ptr, \ 126362306a36Sopenharmony_ci sizeof(*ptr), \ 126462306a36Sopenharmony_ci type); \ 126562306a36Sopenharmony_ci if (from != NULL) \ 126662306a36Sopenharmony_ci ret = __get_user(x, from); \ 126762306a36Sopenharmony_ci else \ 126862306a36Sopenharmony_ci ret = -EFAULT; \ 126962306a36Sopenharmony_ci } \ 127062306a36Sopenharmony_ci ret; \ 127162306a36Sopenharmony_ci}) 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci#define vhost_get_avail(vq, x, ptr) \ 127462306a36Sopenharmony_ci vhost_get_user(vq, x, ptr, VHOST_ADDR_AVAIL) 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci#define vhost_get_used(vq, x, ptr) \ 127762306a36Sopenharmony_ci vhost_get_user(vq, x, ptr, VHOST_ADDR_USED) 127862306a36Sopenharmony_ci 127962306a36Sopenharmony_cistatic void vhost_dev_lock_vqs(struct vhost_dev *d) 128062306a36Sopenharmony_ci{ 128162306a36Sopenharmony_ci int i = 0; 128262306a36Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) 128362306a36Sopenharmony_ci mutex_lock_nested(&d->vqs[i]->mutex, i); 128462306a36Sopenharmony_ci} 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_cistatic void vhost_dev_unlock_vqs(struct vhost_dev *d) 128762306a36Sopenharmony_ci{ 128862306a36Sopenharmony_ci int i = 0; 128962306a36Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) 129062306a36Sopenharmony_ci mutex_unlock(&d->vqs[i]->mutex); 129162306a36Sopenharmony_ci} 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_cistatic inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, 129462306a36Sopenharmony_ci __virtio16 *idx) 129562306a36Sopenharmony_ci{ 129662306a36Sopenharmony_ci return vhost_get_avail(vq, *idx, &vq->avail->idx); 129762306a36Sopenharmony_ci} 129862306a36Sopenharmony_ci 129962306a36Sopenharmony_cistatic inline int vhost_get_avail_head(struct vhost_virtqueue *vq, 130062306a36Sopenharmony_ci __virtio16 *head, int idx) 130162306a36Sopenharmony_ci{ 130262306a36Sopenharmony_ci return vhost_get_avail(vq, *head, 130362306a36Sopenharmony_ci &vq->avail->ring[idx & (vq->num - 1)]); 130462306a36Sopenharmony_ci} 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_cistatic inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, 130762306a36Sopenharmony_ci __virtio16 *flags) 130862306a36Sopenharmony_ci{ 130962306a36Sopenharmony_ci return vhost_get_avail(vq, *flags, &vq->avail->flags); 131062306a36Sopenharmony_ci} 131162306a36Sopenharmony_ci 131262306a36Sopenharmony_cistatic inline int vhost_get_used_event(struct vhost_virtqueue *vq, 131362306a36Sopenharmony_ci __virtio16 *event) 131462306a36Sopenharmony_ci{ 131562306a36Sopenharmony_ci return vhost_get_avail(vq, *event, vhost_used_event(vq)); 131662306a36Sopenharmony_ci} 131762306a36Sopenharmony_ci 131862306a36Sopenharmony_cistatic inline int vhost_get_used_idx(struct vhost_virtqueue *vq, 131962306a36Sopenharmony_ci __virtio16 *idx) 132062306a36Sopenharmony_ci{ 132162306a36Sopenharmony_ci return vhost_get_used(vq, *idx, &vq->used->idx); 132262306a36Sopenharmony_ci} 132362306a36Sopenharmony_ci 132462306a36Sopenharmony_cistatic inline int vhost_get_desc(struct vhost_virtqueue *vq, 132562306a36Sopenharmony_ci struct vring_desc *desc, int idx) 132662306a36Sopenharmony_ci{ 132762306a36Sopenharmony_ci return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); 132862306a36Sopenharmony_ci} 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_cistatic void vhost_iotlb_notify_vq(struct vhost_dev *d, 133162306a36Sopenharmony_ci struct vhost_iotlb_msg *msg) 133262306a36Sopenharmony_ci{ 133362306a36Sopenharmony_ci struct vhost_msg_node *node, *n; 133462306a36Sopenharmony_ci 133562306a36Sopenharmony_ci spin_lock(&d->iotlb_lock); 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci list_for_each_entry_safe(node, n, &d->pending_list, node) { 133862306a36Sopenharmony_ci struct vhost_iotlb_msg *vq_msg = &node->msg.iotlb; 133962306a36Sopenharmony_ci if (msg->iova <= vq_msg->iova && 134062306a36Sopenharmony_ci msg->iova + msg->size - 1 >= vq_msg->iova && 134162306a36Sopenharmony_ci vq_msg->type == VHOST_IOTLB_MISS) { 134262306a36Sopenharmony_ci vhost_poll_queue(&node->vq->poll); 134362306a36Sopenharmony_ci list_del(&node->node); 134462306a36Sopenharmony_ci kfree(node); 134562306a36Sopenharmony_ci } 134662306a36Sopenharmony_ci } 134762306a36Sopenharmony_ci 134862306a36Sopenharmony_ci spin_unlock(&d->iotlb_lock); 134962306a36Sopenharmony_ci} 135062306a36Sopenharmony_ci 135162306a36Sopenharmony_cistatic bool umem_access_ok(u64 uaddr, u64 size, int access) 135262306a36Sopenharmony_ci{ 135362306a36Sopenharmony_ci unsigned long a = uaddr; 135462306a36Sopenharmony_ci 135562306a36Sopenharmony_ci /* Make sure 64 bit math will not overflow. */ 135662306a36Sopenharmony_ci if (vhost_overflow(uaddr, size)) 135762306a36Sopenharmony_ci return false; 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci if ((access & VHOST_ACCESS_RO) && 136062306a36Sopenharmony_ci !access_ok((void __user *)a, size)) 136162306a36Sopenharmony_ci return false; 136262306a36Sopenharmony_ci if ((access & VHOST_ACCESS_WO) && 136362306a36Sopenharmony_ci !access_ok((void __user *)a, size)) 136462306a36Sopenharmony_ci return false; 136562306a36Sopenharmony_ci return true; 136662306a36Sopenharmony_ci} 136762306a36Sopenharmony_ci 136862306a36Sopenharmony_cistatic int vhost_process_iotlb_msg(struct vhost_dev *dev, u32 asid, 136962306a36Sopenharmony_ci struct vhost_iotlb_msg *msg) 137062306a36Sopenharmony_ci{ 137162306a36Sopenharmony_ci int ret = 0; 137262306a36Sopenharmony_ci 137362306a36Sopenharmony_ci if (asid != 0) 137462306a36Sopenharmony_ci return -EINVAL; 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci mutex_lock(&dev->mutex); 137762306a36Sopenharmony_ci vhost_dev_lock_vqs(dev); 137862306a36Sopenharmony_ci switch (msg->type) { 137962306a36Sopenharmony_ci case VHOST_IOTLB_UPDATE: 138062306a36Sopenharmony_ci if (!dev->iotlb) { 138162306a36Sopenharmony_ci ret = -EFAULT; 138262306a36Sopenharmony_ci break; 138362306a36Sopenharmony_ci } 138462306a36Sopenharmony_ci if (!umem_access_ok(msg->uaddr, msg->size, msg->perm)) { 138562306a36Sopenharmony_ci ret = -EFAULT; 138662306a36Sopenharmony_ci break; 138762306a36Sopenharmony_ci } 138862306a36Sopenharmony_ci vhost_vq_meta_reset(dev); 138962306a36Sopenharmony_ci if (vhost_iotlb_add_range(dev->iotlb, msg->iova, 139062306a36Sopenharmony_ci msg->iova + msg->size - 1, 139162306a36Sopenharmony_ci msg->uaddr, msg->perm)) { 139262306a36Sopenharmony_ci ret = -ENOMEM; 139362306a36Sopenharmony_ci break; 139462306a36Sopenharmony_ci } 139562306a36Sopenharmony_ci vhost_iotlb_notify_vq(dev, msg); 139662306a36Sopenharmony_ci break; 139762306a36Sopenharmony_ci case VHOST_IOTLB_INVALIDATE: 139862306a36Sopenharmony_ci if (!dev->iotlb) { 139962306a36Sopenharmony_ci ret = -EFAULT; 140062306a36Sopenharmony_ci break; 140162306a36Sopenharmony_ci } 140262306a36Sopenharmony_ci vhost_vq_meta_reset(dev); 140362306a36Sopenharmony_ci vhost_iotlb_del_range(dev->iotlb, msg->iova, 140462306a36Sopenharmony_ci msg->iova + msg->size - 1); 140562306a36Sopenharmony_ci break; 140662306a36Sopenharmony_ci default: 140762306a36Sopenharmony_ci ret = -EINVAL; 140862306a36Sopenharmony_ci break; 140962306a36Sopenharmony_ci } 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci vhost_dev_unlock_vqs(dev); 141262306a36Sopenharmony_ci mutex_unlock(&dev->mutex); 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci return ret; 141562306a36Sopenharmony_ci} 141662306a36Sopenharmony_cissize_t vhost_chr_write_iter(struct vhost_dev *dev, 141762306a36Sopenharmony_ci struct iov_iter *from) 141862306a36Sopenharmony_ci{ 141962306a36Sopenharmony_ci struct vhost_iotlb_msg msg; 142062306a36Sopenharmony_ci size_t offset; 142162306a36Sopenharmony_ci int type, ret; 142262306a36Sopenharmony_ci u32 asid = 0; 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci ret = copy_from_iter(&type, sizeof(type), from); 142562306a36Sopenharmony_ci if (ret != sizeof(type)) { 142662306a36Sopenharmony_ci ret = -EINVAL; 142762306a36Sopenharmony_ci goto done; 142862306a36Sopenharmony_ci } 142962306a36Sopenharmony_ci 143062306a36Sopenharmony_ci switch (type) { 143162306a36Sopenharmony_ci case VHOST_IOTLB_MSG: 143262306a36Sopenharmony_ci /* There maybe a hole after type for V1 message type, 143362306a36Sopenharmony_ci * so skip it here. 143462306a36Sopenharmony_ci */ 143562306a36Sopenharmony_ci offset = offsetof(struct vhost_msg, iotlb) - sizeof(int); 143662306a36Sopenharmony_ci break; 143762306a36Sopenharmony_ci case VHOST_IOTLB_MSG_V2: 143862306a36Sopenharmony_ci if (vhost_backend_has_feature(dev->vqs[0], 143962306a36Sopenharmony_ci VHOST_BACKEND_F_IOTLB_ASID)) { 144062306a36Sopenharmony_ci ret = copy_from_iter(&asid, sizeof(asid), from); 144162306a36Sopenharmony_ci if (ret != sizeof(asid)) { 144262306a36Sopenharmony_ci ret = -EINVAL; 144362306a36Sopenharmony_ci goto done; 144462306a36Sopenharmony_ci } 144562306a36Sopenharmony_ci offset = 0; 144662306a36Sopenharmony_ci } else 144762306a36Sopenharmony_ci offset = sizeof(__u32); 144862306a36Sopenharmony_ci break; 144962306a36Sopenharmony_ci default: 145062306a36Sopenharmony_ci ret = -EINVAL; 145162306a36Sopenharmony_ci goto done; 145262306a36Sopenharmony_ci } 145362306a36Sopenharmony_ci 145462306a36Sopenharmony_ci iov_iter_advance(from, offset); 145562306a36Sopenharmony_ci ret = copy_from_iter(&msg, sizeof(msg), from); 145662306a36Sopenharmony_ci if (ret != sizeof(msg)) { 145762306a36Sopenharmony_ci ret = -EINVAL; 145862306a36Sopenharmony_ci goto done; 145962306a36Sopenharmony_ci } 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_ci if (msg.type == VHOST_IOTLB_UPDATE && msg.size == 0) { 146262306a36Sopenharmony_ci ret = -EINVAL; 146362306a36Sopenharmony_ci goto done; 146462306a36Sopenharmony_ci } 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci if (dev->msg_handler) 146762306a36Sopenharmony_ci ret = dev->msg_handler(dev, asid, &msg); 146862306a36Sopenharmony_ci else 146962306a36Sopenharmony_ci ret = vhost_process_iotlb_msg(dev, asid, &msg); 147062306a36Sopenharmony_ci if (ret) { 147162306a36Sopenharmony_ci ret = -EFAULT; 147262306a36Sopenharmony_ci goto done; 147362306a36Sopenharmony_ci } 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci ret = (type == VHOST_IOTLB_MSG) ? sizeof(struct vhost_msg) : 147662306a36Sopenharmony_ci sizeof(struct vhost_msg_v2); 147762306a36Sopenharmony_cidone: 147862306a36Sopenharmony_ci return ret; 147962306a36Sopenharmony_ci} 148062306a36Sopenharmony_ciEXPORT_SYMBOL(vhost_chr_write_iter); 148162306a36Sopenharmony_ci 148262306a36Sopenharmony_ci__poll_t vhost_chr_poll(struct file *file, struct vhost_dev *dev, 148362306a36Sopenharmony_ci poll_table *wait) 148462306a36Sopenharmony_ci{ 148562306a36Sopenharmony_ci __poll_t mask = 0; 148662306a36Sopenharmony_ci 148762306a36Sopenharmony_ci poll_wait(file, &dev->wait, wait); 148862306a36Sopenharmony_ci 148962306a36Sopenharmony_ci if (!list_empty(&dev->read_list)) 149062306a36Sopenharmony_ci mask |= EPOLLIN | EPOLLRDNORM; 149162306a36Sopenharmony_ci 149262306a36Sopenharmony_ci return mask; 149362306a36Sopenharmony_ci} 149462306a36Sopenharmony_ciEXPORT_SYMBOL(vhost_chr_poll); 149562306a36Sopenharmony_ci 149662306a36Sopenharmony_cissize_t vhost_chr_read_iter(struct vhost_dev *dev, struct iov_iter *to, 149762306a36Sopenharmony_ci int noblock) 149862306a36Sopenharmony_ci{ 149962306a36Sopenharmony_ci DEFINE_WAIT(wait); 150062306a36Sopenharmony_ci struct vhost_msg_node *node; 150162306a36Sopenharmony_ci ssize_t ret = 0; 150262306a36Sopenharmony_ci unsigned size = sizeof(struct vhost_msg); 150362306a36Sopenharmony_ci 150462306a36Sopenharmony_ci if (iov_iter_count(to) < size) 150562306a36Sopenharmony_ci return 0; 150662306a36Sopenharmony_ci 150762306a36Sopenharmony_ci while (1) { 150862306a36Sopenharmony_ci if (!noblock) 150962306a36Sopenharmony_ci prepare_to_wait(&dev->wait, &wait, 151062306a36Sopenharmony_ci TASK_INTERRUPTIBLE); 151162306a36Sopenharmony_ci 151262306a36Sopenharmony_ci node = vhost_dequeue_msg(dev, &dev->read_list); 151362306a36Sopenharmony_ci if (node) 151462306a36Sopenharmony_ci break; 151562306a36Sopenharmony_ci if (noblock) { 151662306a36Sopenharmony_ci ret = -EAGAIN; 151762306a36Sopenharmony_ci break; 151862306a36Sopenharmony_ci } 151962306a36Sopenharmony_ci if (signal_pending(current)) { 152062306a36Sopenharmony_ci ret = -ERESTARTSYS; 152162306a36Sopenharmony_ci break; 152262306a36Sopenharmony_ci } 152362306a36Sopenharmony_ci if (!dev->iotlb) { 152462306a36Sopenharmony_ci ret = -EBADFD; 152562306a36Sopenharmony_ci break; 152662306a36Sopenharmony_ci } 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci schedule(); 152962306a36Sopenharmony_ci } 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci if (!noblock) 153262306a36Sopenharmony_ci finish_wait(&dev->wait, &wait); 153362306a36Sopenharmony_ci 153462306a36Sopenharmony_ci if (node) { 153562306a36Sopenharmony_ci struct vhost_iotlb_msg *msg; 153662306a36Sopenharmony_ci void *start = &node->msg; 153762306a36Sopenharmony_ci 153862306a36Sopenharmony_ci switch (node->msg.type) { 153962306a36Sopenharmony_ci case VHOST_IOTLB_MSG: 154062306a36Sopenharmony_ci size = sizeof(node->msg); 154162306a36Sopenharmony_ci msg = &node->msg.iotlb; 154262306a36Sopenharmony_ci break; 154362306a36Sopenharmony_ci case VHOST_IOTLB_MSG_V2: 154462306a36Sopenharmony_ci size = sizeof(node->msg_v2); 154562306a36Sopenharmony_ci msg = &node->msg_v2.iotlb; 154662306a36Sopenharmony_ci break; 154762306a36Sopenharmony_ci default: 154862306a36Sopenharmony_ci BUG(); 154962306a36Sopenharmony_ci break; 155062306a36Sopenharmony_ci } 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci ret = copy_to_iter(start, size, to); 155362306a36Sopenharmony_ci if (ret != size || msg->type != VHOST_IOTLB_MISS) { 155462306a36Sopenharmony_ci kfree(node); 155562306a36Sopenharmony_ci return ret; 155662306a36Sopenharmony_ci } 155762306a36Sopenharmony_ci vhost_enqueue_msg(dev, &dev->pending_list, node); 155862306a36Sopenharmony_ci } 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_ci return ret; 156162306a36Sopenharmony_ci} 156262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_chr_read_iter); 156362306a36Sopenharmony_ci 156462306a36Sopenharmony_cistatic int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) 156562306a36Sopenharmony_ci{ 156662306a36Sopenharmony_ci struct vhost_dev *dev = vq->dev; 156762306a36Sopenharmony_ci struct vhost_msg_node *node; 156862306a36Sopenharmony_ci struct vhost_iotlb_msg *msg; 156962306a36Sopenharmony_ci bool v2 = vhost_backend_has_feature(vq, VHOST_BACKEND_F_IOTLB_MSG_V2); 157062306a36Sopenharmony_ci 157162306a36Sopenharmony_ci node = vhost_new_msg(vq, v2 ? VHOST_IOTLB_MSG_V2 : VHOST_IOTLB_MSG); 157262306a36Sopenharmony_ci if (!node) 157362306a36Sopenharmony_ci return -ENOMEM; 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_ci if (v2) { 157662306a36Sopenharmony_ci node->msg_v2.type = VHOST_IOTLB_MSG_V2; 157762306a36Sopenharmony_ci msg = &node->msg_v2.iotlb; 157862306a36Sopenharmony_ci } else { 157962306a36Sopenharmony_ci msg = &node->msg.iotlb; 158062306a36Sopenharmony_ci } 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci msg->type = VHOST_IOTLB_MISS; 158362306a36Sopenharmony_ci msg->iova = iova; 158462306a36Sopenharmony_ci msg->perm = access; 158562306a36Sopenharmony_ci 158662306a36Sopenharmony_ci vhost_enqueue_msg(dev, &dev->read_list, node); 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_ci return 0; 158962306a36Sopenharmony_ci} 159062306a36Sopenharmony_ci 159162306a36Sopenharmony_cistatic bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, 159262306a36Sopenharmony_ci vring_desc_t __user *desc, 159362306a36Sopenharmony_ci vring_avail_t __user *avail, 159462306a36Sopenharmony_ci vring_used_t __user *used) 159562306a36Sopenharmony_ci 159662306a36Sopenharmony_ci{ 159762306a36Sopenharmony_ci /* If an IOTLB device is present, the vring addresses are 159862306a36Sopenharmony_ci * GIOVAs. Access validation occurs at prefetch time. */ 159962306a36Sopenharmony_ci if (vq->iotlb) 160062306a36Sopenharmony_ci return true; 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci return access_ok(desc, vhost_get_desc_size(vq, num)) && 160362306a36Sopenharmony_ci access_ok(avail, vhost_get_avail_size(vq, num)) && 160462306a36Sopenharmony_ci access_ok(used, vhost_get_used_size(vq, num)); 160562306a36Sopenharmony_ci} 160662306a36Sopenharmony_ci 160762306a36Sopenharmony_cistatic void vhost_vq_meta_update(struct vhost_virtqueue *vq, 160862306a36Sopenharmony_ci const struct vhost_iotlb_map *map, 160962306a36Sopenharmony_ci int type) 161062306a36Sopenharmony_ci{ 161162306a36Sopenharmony_ci int access = (type == VHOST_ADDR_USED) ? 161262306a36Sopenharmony_ci VHOST_ACCESS_WO : VHOST_ACCESS_RO; 161362306a36Sopenharmony_ci 161462306a36Sopenharmony_ci if (likely(map->perm & access)) 161562306a36Sopenharmony_ci vq->meta_iotlb[type] = map; 161662306a36Sopenharmony_ci} 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_cistatic bool iotlb_access_ok(struct vhost_virtqueue *vq, 161962306a36Sopenharmony_ci int access, u64 addr, u64 len, int type) 162062306a36Sopenharmony_ci{ 162162306a36Sopenharmony_ci const struct vhost_iotlb_map *map; 162262306a36Sopenharmony_ci struct vhost_iotlb *umem = vq->iotlb; 162362306a36Sopenharmony_ci u64 s = 0, size, orig_addr = addr, last = addr + len - 1; 162462306a36Sopenharmony_ci 162562306a36Sopenharmony_ci if (vhost_vq_meta_fetch(vq, addr, len, type)) 162662306a36Sopenharmony_ci return true; 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci while (len > s) { 162962306a36Sopenharmony_ci map = vhost_iotlb_itree_first(umem, addr, last); 163062306a36Sopenharmony_ci if (map == NULL || map->start > addr) { 163162306a36Sopenharmony_ci vhost_iotlb_miss(vq, addr, access); 163262306a36Sopenharmony_ci return false; 163362306a36Sopenharmony_ci } else if (!(map->perm & access)) { 163462306a36Sopenharmony_ci /* Report the possible access violation by 163562306a36Sopenharmony_ci * request another translation from userspace. 163662306a36Sopenharmony_ci */ 163762306a36Sopenharmony_ci return false; 163862306a36Sopenharmony_ci } 163962306a36Sopenharmony_ci 164062306a36Sopenharmony_ci size = map->size - addr + map->start; 164162306a36Sopenharmony_ci 164262306a36Sopenharmony_ci if (orig_addr == addr && size >= len) 164362306a36Sopenharmony_ci vhost_vq_meta_update(vq, map, type); 164462306a36Sopenharmony_ci 164562306a36Sopenharmony_ci s += size; 164662306a36Sopenharmony_ci addr += size; 164762306a36Sopenharmony_ci } 164862306a36Sopenharmony_ci 164962306a36Sopenharmony_ci return true; 165062306a36Sopenharmony_ci} 165162306a36Sopenharmony_ci 165262306a36Sopenharmony_ciint vq_meta_prefetch(struct vhost_virtqueue *vq) 165362306a36Sopenharmony_ci{ 165462306a36Sopenharmony_ci unsigned int num = vq->num; 165562306a36Sopenharmony_ci 165662306a36Sopenharmony_ci if (!vq->iotlb) 165762306a36Sopenharmony_ci return 1; 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_ci return iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->desc, 166062306a36Sopenharmony_ci vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && 166162306a36Sopenharmony_ci iotlb_access_ok(vq, VHOST_MAP_RO, (u64)(uintptr_t)vq->avail, 166262306a36Sopenharmony_ci vhost_get_avail_size(vq, num), 166362306a36Sopenharmony_ci VHOST_ADDR_AVAIL) && 166462306a36Sopenharmony_ci iotlb_access_ok(vq, VHOST_MAP_WO, (u64)(uintptr_t)vq->used, 166562306a36Sopenharmony_ci vhost_get_used_size(vq, num), VHOST_ADDR_USED); 166662306a36Sopenharmony_ci} 166762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vq_meta_prefetch); 166862306a36Sopenharmony_ci 166962306a36Sopenharmony_ci/* Can we log writes? */ 167062306a36Sopenharmony_ci/* Caller should have device mutex but not vq mutex */ 167162306a36Sopenharmony_cibool vhost_log_access_ok(struct vhost_dev *dev) 167262306a36Sopenharmony_ci{ 167362306a36Sopenharmony_ci return memory_access_ok(dev, dev->umem, 1); 167462306a36Sopenharmony_ci} 167562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_log_access_ok); 167662306a36Sopenharmony_ci 167762306a36Sopenharmony_cistatic bool vq_log_used_access_ok(struct vhost_virtqueue *vq, 167862306a36Sopenharmony_ci void __user *log_base, 167962306a36Sopenharmony_ci bool log_used, 168062306a36Sopenharmony_ci u64 log_addr) 168162306a36Sopenharmony_ci{ 168262306a36Sopenharmony_ci /* If an IOTLB device is present, log_addr is a GIOVA that 168362306a36Sopenharmony_ci * will never be logged by log_used(). */ 168462306a36Sopenharmony_ci if (vq->iotlb) 168562306a36Sopenharmony_ci return true; 168662306a36Sopenharmony_ci 168762306a36Sopenharmony_ci return !log_used || log_access_ok(log_base, log_addr, 168862306a36Sopenharmony_ci vhost_get_used_size(vq, vq->num)); 168962306a36Sopenharmony_ci} 169062306a36Sopenharmony_ci 169162306a36Sopenharmony_ci/* Verify access for write logging. */ 169262306a36Sopenharmony_ci/* Caller should have vq mutex and device mutex */ 169362306a36Sopenharmony_cistatic bool vq_log_access_ok(struct vhost_virtqueue *vq, 169462306a36Sopenharmony_ci void __user *log_base) 169562306a36Sopenharmony_ci{ 169662306a36Sopenharmony_ci return vq_memory_access_ok(log_base, vq->umem, 169762306a36Sopenharmony_ci vhost_has_feature(vq, VHOST_F_LOG_ALL)) && 169862306a36Sopenharmony_ci vq_log_used_access_ok(vq, log_base, vq->log_used, vq->log_addr); 169962306a36Sopenharmony_ci} 170062306a36Sopenharmony_ci 170162306a36Sopenharmony_ci/* Can we start vq? */ 170262306a36Sopenharmony_ci/* Caller should have vq mutex and device mutex */ 170362306a36Sopenharmony_cibool vhost_vq_access_ok(struct vhost_virtqueue *vq) 170462306a36Sopenharmony_ci{ 170562306a36Sopenharmony_ci if (!vq_log_access_ok(vq, vq->log_base)) 170662306a36Sopenharmony_ci return false; 170762306a36Sopenharmony_ci 170862306a36Sopenharmony_ci return vq_access_ok(vq, vq->num, vq->desc, vq->avail, vq->used); 170962306a36Sopenharmony_ci} 171062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_access_ok); 171162306a36Sopenharmony_ci 171262306a36Sopenharmony_cistatic long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) 171362306a36Sopenharmony_ci{ 171462306a36Sopenharmony_ci struct vhost_memory mem, *newmem; 171562306a36Sopenharmony_ci struct vhost_memory_region *region; 171662306a36Sopenharmony_ci struct vhost_iotlb *newumem, *oldumem; 171762306a36Sopenharmony_ci unsigned long size = offsetof(struct vhost_memory, regions); 171862306a36Sopenharmony_ci int i; 171962306a36Sopenharmony_ci 172062306a36Sopenharmony_ci if (copy_from_user(&mem, m, size)) 172162306a36Sopenharmony_ci return -EFAULT; 172262306a36Sopenharmony_ci if (mem.padding) 172362306a36Sopenharmony_ci return -EOPNOTSUPP; 172462306a36Sopenharmony_ci if (mem.nregions > max_mem_regions) 172562306a36Sopenharmony_ci return -E2BIG; 172662306a36Sopenharmony_ci newmem = kvzalloc(struct_size(newmem, regions, mem.nregions), 172762306a36Sopenharmony_ci GFP_KERNEL); 172862306a36Sopenharmony_ci if (!newmem) 172962306a36Sopenharmony_ci return -ENOMEM; 173062306a36Sopenharmony_ci 173162306a36Sopenharmony_ci memcpy(newmem, &mem, size); 173262306a36Sopenharmony_ci if (copy_from_user(newmem->regions, m->regions, 173362306a36Sopenharmony_ci flex_array_size(newmem, regions, mem.nregions))) { 173462306a36Sopenharmony_ci kvfree(newmem); 173562306a36Sopenharmony_ci return -EFAULT; 173662306a36Sopenharmony_ci } 173762306a36Sopenharmony_ci 173862306a36Sopenharmony_ci newumem = iotlb_alloc(); 173962306a36Sopenharmony_ci if (!newumem) { 174062306a36Sopenharmony_ci kvfree(newmem); 174162306a36Sopenharmony_ci return -ENOMEM; 174262306a36Sopenharmony_ci } 174362306a36Sopenharmony_ci 174462306a36Sopenharmony_ci for (region = newmem->regions; 174562306a36Sopenharmony_ci region < newmem->regions + mem.nregions; 174662306a36Sopenharmony_ci region++) { 174762306a36Sopenharmony_ci if (vhost_iotlb_add_range(newumem, 174862306a36Sopenharmony_ci region->guest_phys_addr, 174962306a36Sopenharmony_ci region->guest_phys_addr + 175062306a36Sopenharmony_ci region->memory_size - 1, 175162306a36Sopenharmony_ci region->userspace_addr, 175262306a36Sopenharmony_ci VHOST_MAP_RW)) 175362306a36Sopenharmony_ci goto err; 175462306a36Sopenharmony_ci } 175562306a36Sopenharmony_ci 175662306a36Sopenharmony_ci if (!memory_access_ok(d, newumem, 0)) 175762306a36Sopenharmony_ci goto err; 175862306a36Sopenharmony_ci 175962306a36Sopenharmony_ci oldumem = d->umem; 176062306a36Sopenharmony_ci d->umem = newumem; 176162306a36Sopenharmony_ci 176262306a36Sopenharmony_ci /* All memory accesses are done under some VQ mutex. */ 176362306a36Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 176462306a36Sopenharmony_ci mutex_lock(&d->vqs[i]->mutex); 176562306a36Sopenharmony_ci d->vqs[i]->umem = newumem; 176662306a36Sopenharmony_ci mutex_unlock(&d->vqs[i]->mutex); 176762306a36Sopenharmony_ci } 176862306a36Sopenharmony_ci 176962306a36Sopenharmony_ci kvfree(newmem); 177062306a36Sopenharmony_ci vhost_iotlb_free(oldumem); 177162306a36Sopenharmony_ci return 0; 177262306a36Sopenharmony_ci 177362306a36Sopenharmony_cierr: 177462306a36Sopenharmony_ci vhost_iotlb_free(newumem); 177562306a36Sopenharmony_ci kvfree(newmem); 177662306a36Sopenharmony_ci return -EFAULT; 177762306a36Sopenharmony_ci} 177862306a36Sopenharmony_ci 177962306a36Sopenharmony_cistatic long vhost_vring_set_num(struct vhost_dev *d, 178062306a36Sopenharmony_ci struct vhost_virtqueue *vq, 178162306a36Sopenharmony_ci void __user *argp) 178262306a36Sopenharmony_ci{ 178362306a36Sopenharmony_ci struct vhost_vring_state s; 178462306a36Sopenharmony_ci 178562306a36Sopenharmony_ci /* Resizing ring with an active backend? 178662306a36Sopenharmony_ci * You don't want to do that. */ 178762306a36Sopenharmony_ci if (vq->private_data) 178862306a36Sopenharmony_ci return -EBUSY; 178962306a36Sopenharmony_ci 179062306a36Sopenharmony_ci if (copy_from_user(&s, argp, sizeof s)) 179162306a36Sopenharmony_ci return -EFAULT; 179262306a36Sopenharmony_ci 179362306a36Sopenharmony_ci if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) 179462306a36Sopenharmony_ci return -EINVAL; 179562306a36Sopenharmony_ci vq->num = s.num; 179662306a36Sopenharmony_ci 179762306a36Sopenharmony_ci return 0; 179862306a36Sopenharmony_ci} 179962306a36Sopenharmony_ci 180062306a36Sopenharmony_cistatic long vhost_vring_set_addr(struct vhost_dev *d, 180162306a36Sopenharmony_ci struct vhost_virtqueue *vq, 180262306a36Sopenharmony_ci void __user *argp) 180362306a36Sopenharmony_ci{ 180462306a36Sopenharmony_ci struct vhost_vring_addr a; 180562306a36Sopenharmony_ci 180662306a36Sopenharmony_ci if (copy_from_user(&a, argp, sizeof a)) 180762306a36Sopenharmony_ci return -EFAULT; 180862306a36Sopenharmony_ci if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) 180962306a36Sopenharmony_ci return -EOPNOTSUPP; 181062306a36Sopenharmony_ci 181162306a36Sopenharmony_ci /* For 32bit, verify that the top 32bits of the user 181262306a36Sopenharmony_ci data are set to zero. */ 181362306a36Sopenharmony_ci if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr || 181462306a36Sopenharmony_ci (u64)(unsigned long)a.used_user_addr != a.used_user_addr || 181562306a36Sopenharmony_ci (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) 181662306a36Sopenharmony_ci return -EFAULT; 181762306a36Sopenharmony_ci 181862306a36Sopenharmony_ci /* Make sure it's safe to cast pointers to vring types. */ 181962306a36Sopenharmony_ci BUILD_BUG_ON(__alignof__ *vq->avail > VRING_AVAIL_ALIGN_SIZE); 182062306a36Sopenharmony_ci BUILD_BUG_ON(__alignof__ *vq->used > VRING_USED_ALIGN_SIZE); 182162306a36Sopenharmony_ci if ((a.avail_user_addr & (VRING_AVAIL_ALIGN_SIZE - 1)) || 182262306a36Sopenharmony_ci (a.used_user_addr & (VRING_USED_ALIGN_SIZE - 1)) || 182362306a36Sopenharmony_ci (a.log_guest_addr & (VRING_USED_ALIGN_SIZE - 1))) 182462306a36Sopenharmony_ci return -EINVAL; 182562306a36Sopenharmony_ci 182662306a36Sopenharmony_ci /* We only verify access here if backend is configured. 182762306a36Sopenharmony_ci * If it is not, we don't as size might not have been setup. 182862306a36Sopenharmony_ci * We will verify when backend is configured. */ 182962306a36Sopenharmony_ci if (vq->private_data) { 183062306a36Sopenharmony_ci if (!vq_access_ok(vq, vq->num, 183162306a36Sopenharmony_ci (void __user *)(unsigned long)a.desc_user_addr, 183262306a36Sopenharmony_ci (void __user *)(unsigned long)a.avail_user_addr, 183362306a36Sopenharmony_ci (void __user *)(unsigned long)a.used_user_addr)) 183462306a36Sopenharmony_ci return -EINVAL; 183562306a36Sopenharmony_ci 183662306a36Sopenharmony_ci /* Also validate log access for used ring if enabled. */ 183762306a36Sopenharmony_ci if (!vq_log_used_access_ok(vq, vq->log_base, 183862306a36Sopenharmony_ci a.flags & (0x1 << VHOST_VRING_F_LOG), 183962306a36Sopenharmony_ci a.log_guest_addr)) 184062306a36Sopenharmony_ci return -EINVAL; 184162306a36Sopenharmony_ci } 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_ci vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG)); 184462306a36Sopenharmony_ci vq->desc = (void __user *)(unsigned long)a.desc_user_addr; 184562306a36Sopenharmony_ci vq->avail = (void __user *)(unsigned long)a.avail_user_addr; 184662306a36Sopenharmony_ci vq->log_addr = a.log_guest_addr; 184762306a36Sopenharmony_ci vq->used = (void __user *)(unsigned long)a.used_user_addr; 184862306a36Sopenharmony_ci 184962306a36Sopenharmony_ci return 0; 185062306a36Sopenharmony_ci} 185162306a36Sopenharmony_ci 185262306a36Sopenharmony_cistatic long vhost_vring_set_num_addr(struct vhost_dev *d, 185362306a36Sopenharmony_ci struct vhost_virtqueue *vq, 185462306a36Sopenharmony_ci unsigned int ioctl, 185562306a36Sopenharmony_ci void __user *argp) 185662306a36Sopenharmony_ci{ 185762306a36Sopenharmony_ci long r; 185862306a36Sopenharmony_ci 185962306a36Sopenharmony_ci mutex_lock(&vq->mutex); 186062306a36Sopenharmony_ci 186162306a36Sopenharmony_ci switch (ioctl) { 186262306a36Sopenharmony_ci case VHOST_SET_VRING_NUM: 186362306a36Sopenharmony_ci r = vhost_vring_set_num(d, vq, argp); 186462306a36Sopenharmony_ci break; 186562306a36Sopenharmony_ci case VHOST_SET_VRING_ADDR: 186662306a36Sopenharmony_ci r = vhost_vring_set_addr(d, vq, argp); 186762306a36Sopenharmony_ci break; 186862306a36Sopenharmony_ci default: 186962306a36Sopenharmony_ci BUG(); 187062306a36Sopenharmony_ci } 187162306a36Sopenharmony_ci 187262306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 187362306a36Sopenharmony_ci 187462306a36Sopenharmony_ci return r; 187562306a36Sopenharmony_ci} 187662306a36Sopenharmony_cilong vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) 187762306a36Sopenharmony_ci{ 187862306a36Sopenharmony_ci struct file *eventfp, *filep = NULL; 187962306a36Sopenharmony_ci bool pollstart = false, pollstop = false; 188062306a36Sopenharmony_ci struct eventfd_ctx *ctx = NULL; 188162306a36Sopenharmony_ci struct vhost_virtqueue *vq; 188262306a36Sopenharmony_ci struct vhost_vring_state s; 188362306a36Sopenharmony_ci struct vhost_vring_file f; 188462306a36Sopenharmony_ci u32 idx; 188562306a36Sopenharmony_ci long r; 188662306a36Sopenharmony_ci 188762306a36Sopenharmony_ci r = vhost_get_vq_from_user(d, argp, &vq, &idx); 188862306a36Sopenharmony_ci if (r < 0) 188962306a36Sopenharmony_ci return r; 189062306a36Sopenharmony_ci 189162306a36Sopenharmony_ci if (ioctl == VHOST_SET_VRING_NUM || 189262306a36Sopenharmony_ci ioctl == VHOST_SET_VRING_ADDR) { 189362306a36Sopenharmony_ci return vhost_vring_set_num_addr(d, vq, ioctl, argp); 189462306a36Sopenharmony_ci } 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci mutex_lock(&vq->mutex); 189762306a36Sopenharmony_ci 189862306a36Sopenharmony_ci switch (ioctl) { 189962306a36Sopenharmony_ci case VHOST_SET_VRING_BASE: 190062306a36Sopenharmony_ci /* Moving base with an active backend? 190162306a36Sopenharmony_ci * You don't want to do that. */ 190262306a36Sopenharmony_ci if (vq->private_data) { 190362306a36Sopenharmony_ci r = -EBUSY; 190462306a36Sopenharmony_ci break; 190562306a36Sopenharmony_ci } 190662306a36Sopenharmony_ci if (copy_from_user(&s, argp, sizeof s)) { 190762306a36Sopenharmony_ci r = -EFAULT; 190862306a36Sopenharmony_ci break; 190962306a36Sopenharmony_ci } 191062306a36Sopenharmony_ci if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) { 191162306a36Sopenharmony_ci vq->last_avail_idx = s.num & 0xffff; 191262306a36Sopenharmony_ci vq->last_used_idx = (s.num >> 16) & 0xffff; 191362306a36Sopenharmony_ci } else { 191462306a36Sopenharmony_ci if (s.num > 0xffff) { 191562306a36Sopenharmony_ci r = -EINVAL; 191662306a36Sopenharmony_ci break; 191762306a36Sopenharmony_ci } 191862306a36Sopenharmony_ci vq->last_avail_idx = s.num; 191962306a36Sopenharmony_ci } 192062306a36Sopenharmony_ci /* Forget the cached index value. */ 192162306a36Sopenharmony_ci vq->avail_idx = vq->last_avail_idx; 192262306a36Sopenharmony_ci break; 192362306a36Sopenharmony_ci case VHOST_GET_VRING_BASE: 192462306a36Sopenharmony_ci s.index = idx; 192562306a36Sopenharmony_ci if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) 192662306a36Sopenharmony_ci s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16); 192762306a36Sopenharmony_ci else 192862306a36Sopenharmony_ci s.num = vq->last_avail_idx; 192962306a36Sopenharmony_ci if (copy_to_user(argp, &s, sizeof s)) 193062306a36Sopenharmony_ci r = -EFAULT; 193162306a36Sopenharmony_ci break; 193262306a36Sopenharmony_ci case VHOST_SET_VRING_KICK: 193362306a36Sopenharmony_ci if (copy_from_user(&f, argp, sizeof f)) { 193462306a36Sopenharmony_ci r = -EFAULT; 193562306a36Sopenharmony_ci break; 193662306a36Sopenharmony_ci } 193762306a36Sopenharmony_ci eventfp = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_fget(f.fd); 193862306a36Sopenharmony_ci if (IS_ERR(eventfp)) { 193962306a36Sopenharmony_ci r = PTR_ERR(eventfp); 194062306a36Sopenharmony_ci break; 194162306a36Sopenharmony_ci } 194262306a36Sopenharmony_ci if (eventfp != vq->kick) { 194362306a36Sopenharmony_ci pollstop = (filep = vq->kick) != NULL; 194462306a36Sopenharmony_ci pollstart = (vq->kick = eventfp) != NULL; 194562306a36Sopenharmony_ci } else 194662306a36Sopenharmony_ci filep = eventfp; 194762306a36Sopenharmony_ci break; 194862306a36Sopenharmony_ci case VHOST_SET_VRING_CALL: 194962306a36Sopenharmony_ci if (copy_from_user(&f, argp, sizeof f)) { 195062306a36Sopenharmony_ci r = -EFAULT; 195162306a36Sopenharmony_ci break; 195262306a36Sopenharmony_ci } 195362306a36Sopenharmony_ci ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); 195462306a36Sopenharmony_ci if (IS_ERR(ctx)) { 195562306a36Sopenharmony_ci r = PTR_ERR(ctx); 195662306a36Sopenharmony_ci break; 195762306a36Sopenharmony_ci } 195862306a36Sopenharmony_ci 195962306a36Sopenharmony_ci swap(ctx, vq->call_ctx.ctx); 196062306a36Sopenharmony_ci break; 196162306a36Sopenharmony_ci case VHOST_SET_VRING_ERR: 196262306a36Sopenharmony_ci if (copy_from_user(&f, argp, sizeof f)) { 196362306a36Sopenharmony_ci r = -EFAULT; 196462306a36Sopenharmony_ci break; 196562306a36Sopenharmony_ci } 196662306a36Sopenharmony_ci ctx = f.fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(f.fd); 196762306a36Sopenharmony_ci if (IS_ERR(ctx)) { 196862306a36Sopenharmony_ci r = PTR_ERR(ctx); 196962306a36Sopenharmony_ci break; 197062306a36Sopenharmony_ci } 197162306a36Sopenharmony_ci swap(ctx, vq->error_ctx); 197262306a36Sopenharmony_ci break; 197362306a36Sopenharmony_ci case VHOST_SET_VRING_ENDIAN: 197462306a36Sopenharmony_ci r = vhost_set_vring_endian(vq, argp); 197562306a36Sopenharmony_ci break; 197662306a36Sopenharmony_ci case VHOST_GET_VRING_ENDIAN: 197762306a36Sopenharmony_ci r = vhost_get_vring_endian(vq, idx, argp); 197862306a36Sopenharmony_ci break; 197962306a36Sopenharmony_ci case VHOST_SET_VRING_BUSYLOOP_TIMEOUT: 198062306a36Sopenharmony_ci if (copy_from_user(&s, argp, sizeof(s))) { 198162306a36Sopenharmony_ci r = -EFAULT; 198262306a36Sopenharmony_ci break; 198362306a36Sopenharmony_ci } 198462306a36Sopenharmony_ci vq->busyloop_timeout = s.num; 198562306a36Sopenharmony_ci break; 198662306a36Sopenharmony_ci case VHOST_GET_VRING_BUSYLOOP_TIMEOUT: 198762306a36Sopenharmony_ci s.index = idx; 198862306a36Sopenharmony_ci s.num = vq->busyloop_timeout; 198962306a36Sopenharmony_ci if (copy_to_user(argp, &s, sizeof(s))) 199062306a36Sopenharmony_ci r = -EFAULT; 199162306a36Sopenharmony_ci break; 199262306a36Sopenharmony_ci default: 199362306a36Sopenharmony_ci r = -ENOIOCTLCMD; 199462306a36Sopenharmony_ci } 199562306a36Sopenharmony_ci 199662306a36Sopenharmony_ci if (pollstop && vq->handle_kick) 199762306a36Sopenharmony_ci vhost_poll_stop(&vq->poll); 199862306a36Sopenharmony_ci 199962306a36Sopenharmony_ci if (!IS_ERR_OR_NULL(ctx)) 200062306a36Sopenharmony_ci eventfd_ctx_put(ctx); 200162306a36Sopenharmony_ci if (filep) 200262306a36Sopenharmony_ci fput(filep); 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci if (pollstart && vq->handle_kick) 200562306a36Sopenharmony_ci r = vhost_poll_start(&vq->poll, vq->kick); 200662306a36Sopenharmony_ci 200762306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci if (pollstop && vq->handle_kick) 201062306a36Sopenharmony_ci vhost_dev_flush(vq->poll.dev); 201162306a36Sopenharmony_ci return r; 201262306a36Sopenharmony_ci} 201362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vring_ioctl); 201462306a36Sopenharmony_ci 201562306a36Sopenharmony_ciint vhost_init_device_iotlb(struct vhost_dev *d) 201662306a36Sopenharmony_ci{ 201762306a36Sopenharmony_ci struct vhost_iotlb *niotlb, *oiotlb; 201862306a36Sopenharmony_ci int i; 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_ci niotlb = iotlb_alloc(); 202162306a36Sopenharmony_ci if (!niotlb) 202262306a36Sopenharmony_ci return -ENOMEM; 202362306a36Sopenharmony_ci 202462306a36Sopenharmony_ci oiotlb = d->iotlb; 202562306a36Sopenharmony_ci d->iotlb = niotlb; 202662306a36Sopenharmony_ci 202762306a36Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 202862306a36Sopenharmony_ci struct vhost_virtqueue *vq = d->vqs[i]; 202962306a36Sopenharmony_ci 203062306a36Sopenharmony_ci mutex_lock(&vq->mutex); 203162306a36Sopenharmony_ci vq->iotlb = niotlb; 203262306a36Sopenharmony_ci __vhost_vq_meta_reset(vq); 203362306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 203462306a36Sopenharmony_ci } 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_ci vhost_iotlb_free(oiotlb); 203762306a36Sopenharmony_ci 203862306a36Sopenharmony_ci return 0; 203962306a36Sopenharmony_ci} 204062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_init_device_iotlb); 204162306a36Sopenharmony_ci 204262306a36Sopenharmony_ci/* Caller must have device mutex */ 204362306a36Sopenharmony_cilong vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) 204462306a36Sopenharmony_ci{ 204562306a36Sopenharmony_ci struct eventfd_ctx *ctx; 204662306a36Sopenharmony_ci u64 p; 204762306a36Sopenharmony_ci long r; 204862306a36Sopenharmony_ci int i, fd; 204962306a36Sopenharmony_ci 205062306a36Sopenharmony_ci /* If you are not the owner, you can become one */ 205162306a36Sopenharmony_ci if (ioctl == VHOST_SET_OWNER) { 205262306a36Sopenharmony_ci r = vhost_dev_set_owner(d); 205362306a36Sopenharmony_ci goto done; 205462306a36Sopenharmony_ci } 205562306a36Sopenharmony_ci 205662306a36Sopenharmony_ci /* You must be the owner to do anything else */ 205762306a36Sopenharmony_ci r = vhost_dev_check_owner(d); 205862306a36Sopenharmony_ci if (r) 205962306a36Sopenharmony_ci goto done; 206062306a36Sopenharmony_ci 206162306a36Sopenharmony_ci switch (ioctl) { 206262306a36Sopenharmony_ci case VHOST_SET_MEM_TABLE: 206362306a36Sopenharmony_ci r = vhost_set_memory(d, argp); 206462306a36Sopenharmony_ci break; 206562306a36Sopenharmony_ci case VHOST_SET_LOG_BASE: 206662306a36Sopenharmony_ci if (copy_from_user(&p, argp, sizeof p)) { 206762306a36Sopenharmony_ci r = -EFAULT; 206862306a36Sopenharmony_ci break; 206962306a36Sopenharmony_ci } 207062306a36Sopenharmony_ci if ((u64)(unsigned long)p != p) { 207162306a36Sopenharmony_ci r = -EFAULT; 207262306a36Sopenharmony_ci break; 207362306a36Sopenharmony_ci } 207462306a36Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 207562306a36Sopenharmony_ci struct vhost_virtqueue *vq; 207662306a36Sopenharmony_ci void __user *base = (void __user *)(unsigned long)p; 207762306a36Sopenharmony_ci vq = d->vqs[i]; 207862306a36Sopenharmony_ci mutex_lock(&vq->mutex); 207962306a36Sopenharmony_ci /* If ring is inactive, will check when it's enabled. */ 208062306a36Sopenharmony_ci if (vq->private_data && !vq_log_access_ok(vq, base)) 208162306a36Sopenharmony_ci r = -EFAULT; 208262306a36Sopenharmony_ci else 208362306a36Sopenharmony_ci vq->log_base = base; 208462306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 208562306a36Sopenharmony_ci } 208662306a36Sopenharmony_ci break; 208762306a36Sopenharmony_ci case VHOST_SET_LOG_FD: 208862306a36Sopenharmony_ci r = get_user(fd, (int __user *)argp); 208962306a36Sopenharmony_ci if (r < 0) 209062306a36Sopenharmony_ci break; 209162306a36Sopenharmony_ci ctx = fd == VHOST_FILE_UNBIND ? NULL : eventfd_ctx_fdget(fd); 209262306a36Sopenharmony_ci if (IS_ERR(ctx)) { 209362306a36Sopenharmony_ci r = PTR_ERR(ctx); 209462306a36Sopenharmony_ci break; 209562306a36Sopenharmony_ci } 209662306a36Sopenharmony_ci swap(ctx, d->log_ctx); 209762306a36Sopenharmony_ci for (i = 0; i < d->nvqs; ++i) { 209862306a36Sopenharmony_ci mutex_lock(&d->vqs[i]->mutex); 209962306a36Sopenharmony_ci d->vqs[i]->log_ctx = d->log_ctx; 210062306a36Sopenharmony_ci mutex_unlock(&d->vqs[i]->mutex); 210162306a36Sopenharmony_ci } 210262306a36Sopenharmony_ci if (ctx) 210362306a36Sopenharmony_ci eventfd_ctx_put(ctx); 210462306a36Sopenharmony_ci break; 210562306a36Sopenharmony_ci default: 210662306a36Sopenharmony_ci r = -ENOIOCTLCMD; 210762306a36Sopenharmony_ci break; 210862306a36Sopenharmony_ci } 210962306a36Sopenharmony_cidone: 211062306a36Sopenharmony_ci return r; 211162306a36Sopenharmony_ci} 211262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dev_ioctl); 211362306a36Sopenharmony_ci 211462306a36Sopenharmony_ci/* TODO: This is really inefficient. We need something like get_user() 211562306a36Sopenharmony_ci * (instruction directly accesses the data, with an exception table entry 211662306a36Sopenharmony_ci * returning -EFAULT). See Documentation/arch/x86/exception-tables.rst. 211762306a36Sopenharmony_ci */ 211862306a36Sopenharmony_cistatic int set_bit_to_user(int nr, void __user *addr) 211962306a36Sopenharmony_ci{ 212062306a36Sopenharmony_ci unsigned long log = (unsigned long)addr; 212162306a36Sopenharmony_ci struct page *page; 212262306a36Sopenharmony_ci void *base; 212362306a36Sopenharmony_ci int bit = nr + (log % PAGE_SIZE) * 8; 212462306a36Sopenharmony_ci int r; 212562306a36Sopenharmony_ci 212662306a36Sopenharmony_ci r = pin_user_pages_fast(log, 1, FOLL_WRITE, &page); 212762306a36Sopenharmony_ci if (r < 0) 212862306a36Sopenharmony_ci return r; 212962306a36Sopenharmony_ci BUG_ON(r != 1); 213062306a36Sopenharmony_ci base = kmap_atomic(page); 213162306a36Sopenharmony_ci set_bit(bit, base); 213262306a36Sopenharmony_ci kunmap_atomic(base); 213362306a36Sopenharmony_ci unpin_user_pages_dirty_lock(&page, 1, true); 213462306a36Sopenharmony_ci return 0; 213562306a36Sopenharmony_ci} 213662306a36Sopenharmony_ci 213762306a36Sopenharmony_cistatic int log_write(void __user *log_base, 213862306a36Sopenharmony_ci u64 write_address, u64 write_length) 213962306a36Sopenharmony_ci{ 214062306a36Sopenharmony_ci u64 write_page = write_address / VHOST_PAGE_SIZE; 214162306a36Sopenharmony_ci int r; 214262306a36Sopenharmony_ci 214362306a36Sopenharmony_ci if (!write_length) 214462306a36Sopenharmony_ci return 0; 214562306a36Sopenharmony_ci write_length += write_address % VHOST_PAGE_SIZE; 214662306a36Sopenharmony_ci for (;;) { 214762306a36Sopenharmony_ci u64 base = (u64)(unsigned long)log_base; 214862306a36Sopenharmony_ci u64 log = base + write_page / 8; 214962306a36Sopenharmony_ci int bit = write_page % 8; 215062306a36Sopenharmony_ci if ((u64)(unsigned long)log != log) 215162306a36Sopenharmony_ci return -EFAULT; 215262306a36Sopenharmony_ci r = set_bit_to_user(bit, (void __user *)(unsigned long)log); 215362306a36Sopenharmony_ci if (r < 0) 215462306a36Sopenharmony_ci return r; 215562306a36Sopenharmony_ci if (write_length <= VHOST_PAGE_SIZE) 215662306a36Sopenharmony_ci break; 215762306a36Sopenharmony_ci write_length -= VHOST_PAGE_SIZE; 215862306a36Sopenharmony_ci write_page += 1; 215962306a36Sopenharmony_ci } 216062306a36Sopenharmony_ci return r; 216162306a36Sopenharmony_ci} 216262306a36Sopenharmony_ci 216362306a36Sopenharmony_cistatic int log_write_hva(struct vhost_virtqueue *vq, u64 hva, u64 len) 216462306a36Sopenharmony_ci{ 216562306a36Sopenharmony_ci struct vhost_iotlb *umem = vq->umem; 216662306a36Sopenharmony_ci struct vhost_iotlb_map *u; 216762306a36Sopenharmony_ci u64 start, end, l, min; 216862306a36Sopenharmony_ci int r; 216962306a36Sopenharmony_ci bool hit = false; 217062306a36Sopenharmony_ci 217162306a36Sopenharmony_ci while (len) { 217262306a36Sopenharmony_ci min = len; 217362306a36Sopenharmony_ci /* More than one GPAs can be mapped into a single HVA. So 217462306a36Sopenharmony_ci * iterate all possible umems here to be safe. 217562306a36Sopenharmony_ci */ 217662306a36Sopenharmony_ci list_for_each_entry(u, &umem->list, link) { 217762306a36Sopenharmony_ci if (u->addr > hva - 1 + len || 217862306a36Sopenharmony_ci u->addr - 1 + u->size < hva) 217962306a36Sopenharmony_ci continue; 218062306a36Sopenharmony_ci start = max(u->addr, hva); 218162306a36Sopenharmony_ci end = min(u->addr - 1 + u->size, hva - 1 + len); 218262306a36Sopenharmony_ci l = end - start + 1; 218362306a36Sopenharmony_ci r = log_write(vq->log_base, 218462306a36Sopenharmony_ci u->start + start - u->addr, 218562306a36Sopenharmony_ci l); 218662306a36Sopenharmony_ci if (r < 0) 218762306a36Sopenharmony_ci return r; 218862306a36Sopenharmony_ci hit = true; 218962306a36Sopenharmony_ci min = min(l, min); 219062306a36Sopenharmony_ci } 219162306a36Sopenharmony_ci 219262306a36Sopenharmony_ci if (!hit) 219362306a36Sopenharmony_ci return -EFAULT; 219462306a36Sopenharmony_ci 219562306a36Sopenharmony_ci len -= min; 219662306a36Sopenharmony_ci hva += min; 219762306a36Sopenharmony_ci } 219862306a36Sopenharmony_ci 219962306a36Sopenharmony_ci return 0; 220062306a36Sopenharmony_ci} 220162306a36Sopenharmony_ci 220262306a36Sopenharmony_cistatic int log_used(struct vhost_virtqueue *vq, u64 used_offset, u64 len) 220362306a36Sopenharmony_ci{ 220462306a36Sopenharmony_ci struct iovec *iov = vq->log_iov; 220562306a36Sopenharmony_ci int i, ret; 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci if (!vq->iotlb) 220862306a36Sopenharmony_ci return log_write(vq->log_base, vq->log_addr + used_offset, len); 220962306a36Sopenharmony_ci 221062306a36Sopenharmony_ci ret = translate_desc(vq, (uintptr_t)vq->used + used_offset, 221162306a36Sopenharmony_ci len, iov, 64, VHOST_ACCESS_WO); 221262306a36Sopenharmony_ci if (ret < 0) 221362306a36Sopenharmony_ci return ret; 221462306a36Sopenharmony_ci 221562306a36Sopenharmony_ci for (i = 0; i < ret; i++) { 221662306a36Sopenharmony_ci ret = log_write_hva(vq, (uintptr_t)iov[i].iov_base, 221762306a36Sopenharmony_ci iov[i].iov_len); 221862306a36Sopenharmony_ci if (ret) 221962306a36Sopenharmony_ci return ret; 222062306a36Sopenharmony_ci } 222162306a36Sopenharmony_ci 222262306a36Sopenharmony_ci return 0; 222362306a36Sopenharmony_ci} 222462306a36Sopenharmony_ci 222562306a36Sopenharmony_ciint vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, 222662306a36Sopenharmony_ci unsigned int log_num, u64 len, struct iovec *iov, int count) 222762306a36Sopenharmony_ci{ 222862306a36Sopenharmony_ci int i, r; 222962306a36Sopenharmony_ci 223062306a36Sopenharmony_ci /* Make sure data written is seen before log. */ 223162306a36Sopenharmony_ci smp_wmb(); 223262306a36Sopenharmony_ci 223362306a36Sopenharmony_ci if (vq->iotlb) { 223462306a36Sopenharmony_ci for (i = 0; i < count; i++) { 223562306a36Sopenharmony_ci r = log_write_hva(vq, (uintptr_t)iov[i].iov_base, 223662306a36Sopenharmony_ci iov[i].iov_len); 223762306a36Sopenharmony_ci if (r < 0) 223862306a36Sopenharmony_ci return r; 223962306a36Sopenharmony_ci } 224062306a36Sopenharmony_ci return 0; 224162306a36Sopenharmony_ci } 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_ci for (i = 0; i < log_num; ++i) { 224462306a36Sopenharmony_ci u64 l = min(log[i].len, len); 224562306a36Sopenharmony_ci r = log_write(vq->log_base, log[i].addr, l); 224662306a36Sopenharmony_ci if (r < 0) 224762306a36Sopenharmony_ci return r; 224862306a36Sopenharmony_ci len -= l; 224962306a36Sopenharmony_ci if (!len) { 225062306a36Sopenharmony_ci if (vq->log_ctx) 225162306a36Sopenharmony_ci eventfd_signal(vq->log_ctx, 1); 225262306a36Sopenharmony_ci return 0; 225362306a36Sopenharmony_ci } 225462306a36Sopenharmony_ci } 225562306a36Sopenharmony_ci /* Length written exceeds what we have stored. This is a bug. */ 225662306a36Sopenharmony_ci BUG(); 225762306a36Sopenharmony_ci return 0; 225862306a36Sopenharmony_ci} 225962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_log_write); 226062306a36Sopenharmony_ci 226162306a36Sopenharmony_cistatic int vhost_update_used_flags(struct vhost_virtqueue *vq) 226262306a36Sopenharmony_ci{ 226362306a36Sopenharmony_ci void __user *used; 226462306a36Sopenharmony_ci if (vhost_put_used_flags(vq)) 226562306a36Sopenharmony_ci return -EFAULT; 226662306a36Sopenharmony_ci if (unlikely(vq->log_used)) { 226762306a36Sopenharmony_ci /* Make sure the flag is seen before log. */ 226862306a36Sopenharmony_ci smp_wmb(); 226962306a36Sopenharmony_ci /* Log used flag write. */ 227062306a36Sopenharmony_ci used = &vq->used->flags; 227162306a36Sopenharmony_ci log_used(vq, (used - (void __user *)vq->used), 227262306a36Sopenharmony_ci sizeof vq->used->flags); 227362306a36Sopenharmony_ci if (vq->log_ctx) 227462306a36Sopenharmony_ci eventfd_signal(vq->log_ctx, 1); 227562306a36Sopenharmony_ci } 227662306a36Sopenharmony_ci return 0; 227762306a36Sopenharmony_ci} 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_cistatic int vhost_update_avail_event(struct vhost_virtqueue *vq) 228062306a36Sopenharmony_ci{ 228162306a36Sopenharmony_ci if (vhost_put_avail_event(vq)) 228262306a36Sopenharmony_ci return -EFAULT; 228362306a36Sopenharmony_ci if (unlikely(vq->log_used)) { 228462306a36Sopenharmony_ci void __user *used; 228562306a36Sopenharmony_ci /* Make sure the event is seen before log. */ 228662306a36Sopenharmony_ci smp_wmb(); 228762306a36Sopenharmony_ci /* Log avail event write */ 228862306a36Sopenharmony_ci used = vhost_avail_event(vq); 228962306a36Sopenharmony_ci log_used(vq, (used - (void __user *)vq->used), 229062306a36Sopenharmony_ci sizeof *vhost_avail_event(vq)); 229162306a36Sopenharmony_ci if (vq->log_ctx) 229262306a36Sopenharmony_ci eventfd_signal(vq->log_ctx, 1); 229362306a36Sopenharmony_ci } 229462306a36Sopenharmony_ci return 0; 229562306a36Sopenharmony_ci} 229662306a36Sopenharmony_ci 229762306a36Sopenharmony_ciint vhost_vq_init_access(struct vhost_virtqueue *vq) 229862306a36Sopenharmony_ci{ 229962306a36Sopenharmony_ci __virtio16 last_used_idx; 230062306a36Sopenharmony_ci int r; 230162306a36Sopenharmony_ci bool is_le = vq->is_le; 230262306a36Sopenharmony_ci 230362306a36Sopenharmony_ci if (!vq->private_data) 230462306a36Sopenharmony_ci return 0; 230562306a36Sopenharmony_ci 230662306a36Sopenharmony_ci vhost_init_is_le(vq); 230762306a36Sopenharmony_ci 230862306a36Sopenharmony_ci r = vhost_update_used_flags(vq); 230962306a36Sopenharmony_ci if (r) 231062306a36Sopenharmony_ci goto err; 231162306a36Sopenharmony_ci vq->signalled_used_valid = false; 231262306a36Sopenharmony_ci if (!vq->iotlb && 231362306a36Sopenharmony_ci !access_ok(&vq->used->idx, sizeof vq->used->idx)) { 231462306a36Sopenharmony_ci r = -EFAULT; 231562306a36Sopenharmony_ci goto err; 231662306a36Sopenharmony_ci } 231762306a36Sopenharmony_ci r = vhost_get_used_idx(vq, &last_used_idx); 231862306a36Sopenharmony_ci if (r) { 231962306a36Sopenharmony_ci vq_err(vq, "Can't access used idx at %p\n", 232062306a36Sopenharmony_ci &vq->used->idx); 232162306a36Sopenharmony_ci goto err; 232262306a36Sopenharmony_ci } 232362306a36Sopenharmony_ci vq->last_used_idx = vhost16_to_cpu(vq, last_used_idx); 232462306a36Sopenharmony_ci return 0; 232562306a36Sopenharmony_ci 232662306a36Sopenharmony_cierr: 232762306a36Sopenharmony_ci vq->is_le = is_le; 232862306a36Sopenharmony_ci return r; 232962306a36Sopenharmony_ci} 233062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_init_access); 233162306a36Sopenharmony_ci 233262306a36Sopenharmony_cistatic int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, 233362306a36Sopenharmony_ci struct iovec iov[], int iov_size, int access) 233462306a36Sopenharmony_ci{ 233562306a36Sopenharmony_ci const struct vhost_iotlb_map *map; 233662306a36Sopenharmony_ci struct vhost_dev *dev = vq->dev; 233762306a36Sopenharmony_ci struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; 233862306a36Sopenharmony_ci struct iovec *_iov; 233962306a36Sopenharmony_ci u64 s = 0, last = addr + len - 1; 234062306a36Sopenharmony_ci int ret = 0; 234162306a36Sopenharmony_ci 234262306a36Sopenharmony_ci while ((u64)len > s) { 234362306a36Sopenharmony_ci u64 size; 234462306a36Sopenharmony_ci if (unlikely(ret >= iov_size)) { 234562306a36Sopenharmony_ci ret = -ENOBUFS; 234662306a36Sopenharmony_ci break; 234762306a36Sopenharmony_ci } 234862306a36Sopenharmony_ci 234962306a36Sopenharmony_ci map = vhost_iotlb_itree_first(umem, addr, last); 235062306a36Sopenharmony_ci if (map == NULL || map->start > addr) { 235162306a36Sopenharmony_ci if (umem != dev->iotlb) { 235262306a36Sopenharmony_ci ret = -EFAULT; 235362306a36Sopenharmony_ci break; 235462306a36Sopenharmony_ci } 235562306a36Sopenharmony_ci ret = -EAGAIN; 235662306a36Sopenharmony_ci break; 235762306a36Sopenharmony_ci } else if (!(map->perm & access)) { 235862306a36Sopenharmony_ci ret = -EPERM; 235962306a36Sopenharmony_ci break; 236062306a36Sopenharmony_ci } 236162306a36Sopenharmony_ci 236262306a36Sopenharmony_ci _iov = iov + ret; 236362306a36Sopenharmony_ci size = map->size - addr + map->start; 236462306a36Sopenharmony_ci _iov->iov_len = min((u64)len - s, size); 236562306a36Sopenharmony_ci _iov->iov_base = (void __user *)(unsigned long) 236662306a36Sopenharmony_ci (map->addr + addr - map->start); 236762306a36Sopenharmony_ci s += size; 236862306a36Sopenharmony_ci addr += size; 236962306a36Sopenharmony_ci ++ret; 237062306a36Sopenharmony_ci } 237162306a36Sopenharmony_ci 237262306a36Sopenharmony_ci if (ret == -EAGAIN) 237362306a36Sopenharmony_ci vhost_iotlb_miss(vq, addr, access); 237462306a36Sopenharmony_ci return ret; 237562306a36Sopenharmony_ci} 237662306a36Sopenharmony_ci 237762306a36Sopenharmony_ci/* Each buffer in the virtqueues is actually a chain of descriptors. This 237862306a36Sopenharmony_ci * function returns the next descriptor in the chain, 237962306a36Sopenharmony_ci * or -1U if we're at the end. */ 238062306a36Sopenharmony_cistatic unsigned next_desc(struct vhost_virtqueue *vq, struct vring_desc *desc) 238162306a36Sopenharmony_ci{ 238262306a36Sopenharmony_ci unsigned int next; 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_ci /* If this descriptor says it doesn't chain, we're done. */ 238562306a36Sopenharmony_ci if (!(desc->flags & cpu_to_vhost16(vq, VRING_DESC_F_NEXT))) 238662306a36Sopenharmony_ci return -1U; 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci /* Check they're not leading us off end of descriptors. */ 238962306a36Sopenharmony_ci next = vhost16_to_cpu(vq, READ_ONCE(desc->next)); 239062306a36Sopenharmony_ci return next; 239162306a36Sopenharmony_ci} 239262306a36Sopenharmony_ci 239362306a36Sopenharmony_cistatic int get_indirect(struct vhost_virtqueue *vq, 239462306a36Sopenharmony_ci struct iovec iov[], unsigned int iov_size, 239562306a36Sopenharmony_ci unsigned int *out_num, unsigned int *in_num, 239662306a36Sopenharmony_ci struct vhost_log *log, unsigned int *log_num, 239762306a36Sopenharmony_ci struct vring_desc *indirect) 239862306a36Sopenharmony_ci{ 239962306a36Sopenharmony_ci struct vring_desc desc; 240062306a36Sopenharmony_ci unsigned int i = 0, count, found = 0; 240162306a36Sopenharmony_ci u32 len = vhost32_to_cpu(vq, indirect->len); 240262306a36Sopenharmony_ci struct iov_iter from; 240362306a36Sopenharmony_ci int ret, access; 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci /* Sanity check */ 240662306a36Sopenharmony_ci if (unlikely(len % sizeof desc)) { 240762306a36Sopenharmony_ci vq_err(vq, "Invalid length in indirect descriptor: " 240862306a36Sopenharmony_ci "len 0x%llx not multiple of 0x%zx\n", 240962306a36Sopenharmony_ci (unsigned long long)len, 241062306a36Sopenharmony_ci sizeof desc); 241162306a36Sopenharmony_ci return -EINVAL; 241262306a36Sopenharmony_ci } 241362306a36Sopenharmony_ci 241462306a36Sopenharmony_ci ret = translate_desc(vq, vhost64_to_cpu(vq, indirect->addr), len, vq->indirect, 241562306a36Sopenharmony_ci UIO_MAXIOV, VHOST_ACCESS_RO); 241662306a36Sopenharmony_ci if (unlikely(ret < 0)) { 241762306a36Sopenharmony_ci if (ret != -EAGAIN) 241862306a36Sopenharmony_ci vq_err(vq, "Translation failure %d in indirect.\n", ret); 241962306a36Sopenharmony_ci return ret; 242062306a36Sopenharmony_ci } 242162306a36Sopenharmony_ci iov_iter_init(&from, ITER_SOURCE, vq->indirect, ret, len); 242262306a36Sopenharmony_ci count = len / sizeof desc; 242362306a36Sopenharmony_ci /* Buffers are chained via a 16 bit next field, so 242462306a36Sopenharmony_ci * we can have at most 2^16 of these. */ 242562306a36Sopenharmony_ci if (unlikely(count > USHRT_MAX + 1)) { 242662306a36Sopenharmony_ci vq_err(vq, "Indirect buffer length too big: %d\n", 242762306a36Sopenharmony_ci indirect->len); 242862306a36Sopenharmony_ci return -E2BIG; 242962306a36Sopenharmony_ci } 243062306a36Sopenharmony_ci 243162306a36Sopenharmony_ci do { 243262306a36Sopenharmony_ci unsigned iov_count = *in_num + *out_num; 243362306a36Sopenharmony_ci if (unlikely(++found > count)) { 243462306a36Sopenharmony_ci vq_err(vq, "Loop detected: last one at %u " 243562306a36Sopenharmony_ci "indirect size %u\n", 243662306a36Sopenharmony_ci i, count); 243762306a36Sopenharmony_ci return -EINVAL; 243862306a36Sopenharmony_ci } 243962306a36Sopenharmony_ci if (unlikely(!copy_from_iter_full(&desc, sizeof(desc), &from))) { 244062306a36Sopenharmony_ci vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n", 244162306a36Sopenharmony_ci i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); 244262306a36Sopenharmony_ci return -EINVAL; 244362306a36Sopenharmony_ci } 244462306a36Sopenharmony_ci if (unlikely(desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT))) { 244562306a36Sopenharmony_ci vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n", 244662306a36Sopenharmony_ci i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc); 244762306a36Sopenharmony_ci return -EINVAL; 244862306a36Sopenharmony_ci } 244962306a36Sopenharmony_ci 245062306a36Sopenharmony_ci if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) 245162306a36Sopenharmony_ci access = VHOST_ACCESS_WO; 245262306a36Sopenharmony_ci else 245362306a36Sopenharmony_ci access = VHOST_ACCESS_RO; 245462306a36Sopenharmony_ci 245562306a36Sopenharmony_ci ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), 245662306a36Sopenharmony_ci vhost32_to_cpu(vq, desc.len), iov + iov_count, 245762306a36Sopenharmony_ci iov_size - iov_count, access); 245862306a36Sopenharmony_ci if (unlikely(ret < 0)) { 245962306a36Sopenharmony_ci if (ret != -EAGAIN) 246062306a36Sopenharmony_ci vq_err(vq, "Translation failure %d indirect idx %d\n", 246162306a36Sopenharmony_ci ret, i); 246262306a36Sopenharmony_ci return ret; 246362306a36Sopenharmony_ci } 246462306a36Sopenharmony_ci /* If this is an input descriptor, increment that count. */ 246562306a36Sopenharmony_ci if (access == VHOST_ACCESS_WO) { 246662306a36Sopenharmony_ci *in_num += ret; 246762306a36Sopenharmony_ci if (unlikely(log && ret)) { 246862306a36Sopenharmony_ci log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); 246962306a36Sopenharmony_ci log[*log_num].len = vhost32_to_cpu(vq, desc.len); 247062306a36Sopenharmony_ci ++*log_num; 247162306a36Sopenharmony_ci } 247262306a36Sopenharmony_ci } else { 247362306a36Sopenharmony_ci /* If it's an output descriptor, they're all supposed 247462306a36Sopenharmony_ci * to come before any input descriptors. */ 247562306a36Sopenharmony_ci if (unlikely(*in_num)) { 247662306a36Sopenharmony_ci vq_err(vq, "Indirect descriptor " 247762306a36Sopenharmony_ci "has out after in: idx %d\n", i); 247862306a36Sopenharmony_ci return -EINVAL; 247962306a36Sopenharmony_ci } 248062306a36Sopenharmony_ci *out_num += ret; 248162306a36Sopenharmony_ci } 248262306a36Sopenharmony_ci } while ((i = next_desc(vq, &desc)) != -1); 248362306a36Sopenharmony_ci return 0; 248462306a36Sopenharmony_ci} 248562306a36Sopenharmony_ci 248662306a36Sopenharmony_ci/* This looks in the virtqueue and for the first available buffer, and converts 248762306a36Sopenharmony_ci * it to an iovec for convenient access. Since descriptors consist of some 248862306a36Sopenharmony_ci * number of output then some number of input descriptors, it's actually two 248962306a36Sopenharmony_ci * iovecs, but we pack them into one and note how many of each there were. 249062306a36Sopenharmony_ci * 249162306a36Sopenharmony_ci * This function returns the descriptor number found, or vq->num (which is 249262306a36Sopenharmony_ci * never a valid descriptor number) if none was found. A negative code is 249362306a36Sopenharmony_ci * returned on error. */ 249462306a36Sopenharmony_ciint vhost_get_vq_desc(struct vhost_virtqueue *vq, 249562306a36Sopenharmony_ci struct iovec iov[], unsigned int iov_size, 249662306a36Sopenharmony_ci unsigned int *out_num, unsigned int *in_num, 249762306a36Sopenharmony_ci struct vhost_log *log, unsigned int *log_num) 249862306a36Sopenharmony_ci{ 249962306a36Sopenharmony_ci struct vring_desc desc; 250062306a36Sopenharmony_ci unsigned int i, head, found = 0; 250162306a36Sopenharmony_ci u16 last_avail_idx; 250262306a36Sopenharmony_ci __virtio16 avail_idx; 250362306a36Sopenharmony_ci __virtio16 ring_head; 250462306a36Sopenharmony_ci int ret, access; 250562306a36Sopenharmony_ci 250662306a36Sopenharmony_ci /* Check it isn't doing very strange things with descriptor numbers. */ 250762306a36Sopenharmony_ci last_avail_idx = vq->last_avail_idx; 250862306a36Sopenharmony_ci 250962306a36Sopenharmony_ci if (vq->avail_idx == vq->last_avail_idx) { 251062306a36Sopenharmony_ci if (unlikely(vhost_get_avail_idx(vq, &avail_idx))) { 251162306a36Sopenharmony_ci vq_err(vq, "Failed to access avail idx at %p\n", 251262306a36Sopenharmony_ci &vq->avail->idx); 251362306a36Sopenharmony_ci return -EFAULT; 251462306a36Sopenharmony_ci } 251562306a36Sopenharmony_ci vq->avail_idx = vhost16_to_cpu(vq, avail_idx); 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_ci if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) { 251862306a36Sopenharmony_ci vq_err(vq, "Guest moved used index from %u to %u", 251962306a36Sopenharmony_ci last_avail_idx, vq->avail_idx); 252062306a36Sopenharmony_ci return -EFAULT; 252162306a36Sopenharmony_ci } 252262306a36Sopenharmony_ci 252362306a36Sopenharmony_ci /* If there's nothing new since last we looked, return 252462306a36Sopenharmony_ci * invalid. 252562306a36Sopenharmony_ci */ 252662306a36Sopenharmony_ci if (vq->avail_idx == last_avail_idx) 252762306a36Sopenharmony_ci return vq->num; 252862306a36Sopenharmony_ci 252962306a36Sopenharmony_ci /* Only get avail ring entries after they have been 253062306a36Sopenharmony_ci * exposed by guest. 253162306a36Sopenharmony_ci */ 253262306a36Sopenharmony_ci smp_rmb(); 253362306a36Sopenharmony_ci } 253462306a36Sopenharmony_ci 253562306a36Sopenharmony_ci /* Grab the next descriptor number they're advertising, and increment 253662306a36Sopenharmony_ci * the index we've seen. */ 253762306a36Sopenharmony_ci if (unlikely(vhost_get_avail_head(vq, &ring_head, last_avail_idx))) { 253862306a36Sopenharmony_ci vq_err(vq, "Failed to read head: idx %d address %p\n", 253962306a36Sopenharmony_ci last_avail_idx, 254062306a36Sopenharmony_ci &vq->avail->ring[last_avail_idx % vq->num]); 254162306a36Sopenharmony_ci return -EFAULT; 254262306a36Sopenharmony_ci } 254362306a36Sopenharmony_ci 254462306a36Sopenharmony_ci head = vhost16_to_cpu(vq, ring_head); 254562306a36Sopenharmony_ci 254662306a36Sopenharmony_ci /* If their number is silly, that's an error. */ 254762306a36Sopenharmony_ci if (unlikely(head >= vq->num)) { 254862306a36Sopenharmony_ci vq_err(vq, "Guest says index %u > %u is available", 254962306a36Sopenharmony_ci head, vq->num); 255062306a36Sopenharmony_ci return -EINVAL; 255162306a36Sopenharmony_ci } 255262306a36Sopenharmony_ci 255362306a36Sopenharmony_ci /* When we start there are none of either input nor output. */ 255462306a36Sopenharmony_ci *out_num = *in_num = 0; 255562306a36Sopenharmony_ci if (unlikely(log)) 255662306a36Sopenharmony_ci *log_num = 0; 255762306a36Sopenharmony_ci 255862306a36Sopenharmony_ci i = head; 255962306a36Sopenharmony_ci do { 256062306a36Sopenharmony_ci unsigned iov_count = *in_num + *out_num; 256162306a36Sopenharmony_ci if (unlikely(i >= vq->num)) { 256262306a36Sopenharmony_ci vq_err(vq, "Desc index is %u > %u, head = %u", 256362306a36Sopenharmony_ci i, vq->num, head); 256462306a36Sopenharmony_ci return -EINVAL; 256562306a36Sopenharmony_ci } 256662306a36Sopenharmony_ci if (unlikely(++found > vq->num)) { 256762306a36Sopenharmony_ci vq_err(vq, "Loop detected: last one at %u " 256862306a36Sopenharmony_ci "vq size %u head %u\n", 256962306a36Sopenharmony_ci i, vq->num, head); 257062306a36Sopenharmony_ci return -EINVAL; 257162306a36Sopenharmony_ci } 257262306a36Sopenharmony_ci ret = vhost_get_desc(vq, &desc, i); 257362306a36Sopenharmony_ci if (unlikely(ret)) { 257462306a36Sopenharmony_ci vq_err(vq, "Failed to get descriptor: idx %d addr %p\n", 257562306a36Sopenharmony_ci i, vq->desc + i); 257662306a36Sopenharmony_ci return -EFAULT; 257762306a36Sopenharmony_ci } 257862306a36Sopenharmony_ci if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_INDIRECT)) { 257962306a36Sopenharmony_ci ret = get_indirect(vq, iov, iov_size, 258062306a36Sopenharmony_ci out_num, in_num, 258162306a36Sopenharmony_ci log, log_num, &desc); 258262306a36Sopenharmony_ci if (unlikely(ret < 0)) { 258362306a36Sopenharmony_ci if (ret != -EAGAIN) 258462306a36Sopenharmony_ci vq_err(vq, "Failure detected " 258562306a36Sopenharmony_ci "in indirect descriptor at idx %d\n", i); 258662306a36Sopenharmony_ci return ret; 258762306a36Sopenharmony_ci } 258862306a36Sopenharmony_ci continue; 258962306a36Sopenharmony_ci } 259062306a36Sopenharmony_ci 259162306a36Sopenharmony_ci if (desc.flags & cpu_to_vhost16(vq, VRING_DESC_F_WRITE)) 259262306a36Sopenharmony_ci access = VHOST_ACCESS_WO; 259362306a36Sopenharmony_ci else 259462306a36Sopenharmony_ci access = VHOST_ACCESS_RO; 259562306a36Sopenharmony_ci ret = translate_desc(vq, vhost64_to_cpu(vq, desc.addr), 259662306a36Sopenharmony_ci vhost32_to_cpu(vq, desc.len), iov + iov_count, 259762306a36Sopenharmony_ci iov_size - iov_count, access); 259862306a36Sopenharmony_ci if (unlikely(ret < 0)) { 259962306a36Sopenharmony_ci if (ret != -EAGAIN) 260062306a36Sopenharmony_ci vq_err(vq, "Translation failure %d descriptor idx %d\n", 260162306a36Sopenharmony_ci ret, i); 260262306a36Sopenharmony_ci return ret; 260362306a36Sopenharmony_ci } 260462306a36Sopenharmony_ci if (access == VHOST_ACCESS_WO) { 260562306a36Sopenharmony_ci /* If this is an input descriptor, 260662306a36Sopenharmony_ci * increment that count. */ 260762306a36Sopenharmony_ci *in_num += ret; 260862306a36Sopenharmony_ci if (unlikely(log && ret)) { 260962306a36Sopenharmony_ci log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); 261062306a36Sopenharmony_ci log[*log_num].len = vhost32_to_cpu(vq, desc.len); 261162306a36Sopenharmony_ci ++*log_num; 261262306a36Sopenharmony_ci } 261362306a36Sopenharmony_ci } else { 261462306a36Sopenharmony_ci /* If it's an output descriptor, they're all supposed 261562306a36Sopenharmony_ci * to come before any input descriptors. */ 261662306a36Sopenharmony_ci if (unlikely(*in_num)) { 261762306a36Sopenharmony_ci vq_err(vq, "Descriptor has out after in: " 261862306a36Sopenharmony_ci "idx %d\n", i); 261962306a36Sopenharmony_ci return -EINVAL; 262062306a36Sopenharmony_ci } 262162306a36Sopenharmony_ci *out_num += ret; 262262306a36Sopenharmony_ci } 262362306a36Sopenharmony_ci } while ((i = next_desc(vq, &desc)) != -1); 262462306a36Sopenharmony_ci 262562306a36Sopenharmony_ci /* On success, increment avail index. */ 262662306a36Sopenharmony_ci vq->last_avail_idx++; 262762306a36Sopenharmony_ci 262862306a36Sopenharmony_ci /* Assume notifications from guest are disabled at this point, 262962306a36Sopenharmony_ci * if they aren't we would need to update avail_event index. */ 263062306a36Sopenharmony_ci BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); 263162306a36Sopenharmony_ci return head; 263262306a36Sopenharmony_ci} 263362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_get_vq_desc); 263462306a36Sopenharmony_ci 263562306a36Sopenharmony_ci/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */ 263662306a36Sopenharmony_civoid vhost_discard_vq_desc(struct vhost_virtqueue *vq, int n) 263762306a36Sopenharmony_ci{ 263862306a36Sopenharmony_ci vq->last_avail_idx -= n; 263962306a36Sopenharmony_ci} 264062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_discard_vq_desc); 264162306a36Sopenharmony_ci 264262306a36Sopenharmony_ci/* After we've used one of their buffers, we tell them about it. We'll then 264362306a36Sopenharmony_ci * want to notify the guest, using eventfd. */ 264462306a36Sopenharmony_ciint vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) 264562306a36Sopenharmony_ci{ 264662306a36Sopenharmony_ci struct vring_used_elem heads = { 264762306a36Sopenharmony_ci cpu_to_vhost32(vq, head), 264862306a36Sopenharmony_ci cpu_to_vhost32(vq, len) 264962306a36Sopenharmony_ci }; 265062306a36Sopenharmony_ci 265162306a36Sopenharmony_ci return vhost_add_used_n(vq, &heads, 1); 265262306a36Sopenharmony_ci} 265362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_add_used); 265462306a36Sopenharmony_ci 265562306a36Sopenharmony_cistatic int __vhost_add_used_n(struct vhost_virtqueue *vq, 265662306a36Sopenharmony_ci struct vring_used_elem *heads, 265762306a36Sopenharmony_ci unsigned count) 265862306a36Sopenharmony_ci{ 265962306a36Sopenharmony_ci vring_used_elem_t __user *used; 266062306a36Sopenharmony_ci u16 old, new; 266162306a36Sopenharmony_ci int start; 266262306a36Sopenharmony_ci 266362306a36Sopenharmony_ci start = vq->last_used_idx & (vq->num - 1); 266462306a36Sopenharmony_ci used = vq->used->ring + start; 266562306a36Sopenharmony_ci if (vhost_put_used(vq, heads, start, count)) { 266662306a36Sopenharmony_ci vq_err(vq, "Failed to write used"); 266762306a36Sopenharmony_ci return -EFAULT; 266862306a36Sopenharmony_ci } 266962306a36Sopenharmony_ci if (unlikely(vq->log_used)) { 267062306a36Sopenharmony_ci /* Make sure data is seen before log. */ 267162306a36Sopenharmony_ci smp_wmb(); 267262306a36Sopenharmony_ci /* Log used ring entry write. */ 267362306a36Sopenharmony_ci log_used(vq, ((void __user *)used - (void __user *)vq->used), 267462306a36Sopenharmony_ci count * sizeof *used); 267562306a36Sopenharmony_ci } 267662306a36Sopenharmony_ci old = vq->last_used_idx; 267762306a36Sopenharmony_ci new = (vq->last_used_idx += count); 267862306a36Sopenharmony_ci /* If the driver never bothers to signal in a very long while, 267962306a36Sopenharmony_ci * used index might wrap around. If that happens, invalidate 268062306a36Sopenharmony_ci * signalled_used index we stored. TODO: make sure driver 268162306a36Sopenharmony_ci * signals at least once in 2^16 and remove this. */ 268262306a36Sopenharmony_ci if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) 268362306a36Sopenharmony_ci vq->signalled_used_valid = false; 268462306a36Sopenharmony_ci return 0; 268562306a36Sopenharmony_ci} 268662306a36Sopenharmony_ci 268762306a36Sopenharmony_ci/* After we've used one of their buffers, we tell them about it. We'll then 268862306a36Sopenharmony_ci * want to notify the guest, using eventfd. */ 268962306a36Sopenharmony_ciint vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, 269062306a36Sopenharmony_ci unsigned count) 269162306a36Sopenharmony_ci{ 269262306a36Sopenharmony_ci int start, n, r; 269362306a36Sopenharmony_ci 269462306a36Sopenharmony_ci start = vq->last_used_idx & (vq->num - 1); 269562306a36Sopenharmony_ci n = vq->num - start; 269662306a36Sopenharmony_ci if (n < count) { 269762306a36Sopenharmony_ci r = __vhost_add_used_n(vq, heads, n); 269862306a36Sopenharmony_ci if (r < 0) 269962306a36Sopenharmony_ci return r; 270062306a36Sopenharmony_ci heads += n; 270162306a36Sopenharmony_ci count -= n; 270262306a36Sopenharmony_ci } 270362306a36Sopenharmony_ci r = __vhost_add_used_n(vq, heads, count); 270462306a36Sopenharmony_ci 270562306a36Sopenharmony_ci /* Make sure buffer is written before we update index. */ 270662306a36Sopenharmony_ci smp_wmb(); 270762306a36Sopenharmony_ci if (vhost_put_used_idx(vq)) { 270862306a36Sopenharmony_ci vq_err(vq, "Failed to increment used idx"); 270962306a36Sopenharmony_ci return -EFAULT; 271062306a36Sopenharmony_ci } 271162306a36Sopenharmony_ci if (unlikely(vq->log_used)) { 271262306a36Sopenharmony_ci /* Make sure used idx is seen before log. */ 271362306a36Sopenharmony_ci smp_wmb(); 271462306a36Sopenharmony_ci /* Log used index update. */ 271562306a36Sopenharmony_ci log_used(vq, offsetof(struct vring_used, idx), 271662306a36Sopenharmony_ci sizeof vq->used->idx); 271762306a36Sopenharmony_ci if (vq->log_ctx) 271862306a36Sopenharmony_ci eventfd_signal(vq->log_ctx, 1); 271962306a36Sopenharmony_ci } 272062306a36Sopenharmony_ci return r; 272162306a36Sopenharmony_ci} 272262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_add_used_n); 272362306a36Sopenharmony_ci 272462306a36Sopenharmony_cistatic bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) 272562306a36Sopenharmony_ci{ 272662306a36Sopenharmony_ci __u16 old, new; 272762306a36Sopenharmony_ci __virtio16 event; 272862306a36Sopenharmony_ci bool v; 272962306a36Sopenharmony_ci /* Flush out used index updates. This is paired 273062306a36Sopenharmony_ci * with the barrier that the Guest executes when enabling 273162306a36Sopenharmony_ci * interrupts. */ 273262306a36Sopenharmony_ci smp_mb(); 273362306a36Sopenharmony_ci 273462306a36Sopenharmony_ci if (vhost_has_feature(vq, VIRTIO_F_NOTIFY_ON_EMPTY) && 273562306a36Sopenharmony_ci unlikely(vq->avail_idx == vq->last_avail_idx)) 273662306a36Sopenharmony_ci return true; 273762306a36Sopenharmony_ci 273862306a36Sopenharmony_ci if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { 273962306a36Sopenharmony_ci __virtio16 flags; 274062306a36Sopenharmony_ci if (vhost_get_avail_flags(vq, &flags)) { 274162306a36Sopenharmony_ci vq_err(vq, "Failed to get flags"); 274262306a36Sopenharmony_ci return true; 274362306a36Sopenharmony_ci } 274462306a36Sopenharmony_ci return !(flags & cpu_to_vhost16(vq, VRING_AVAIL_F_NO_INTERRUPT)); 274562306a36Sopenharmony_ci } 274662306a36Sopenharmony_ci old = vq->signalled_used; 274762306a36Sopenharmony_ci v = vq->signalled_used_valid; 274862306a36Sopenharmony_ci new = vq->signalled_used = vq->last_used_idx; 274962306a36Sopenharmony_ci vq->signalled_used_valid = true; 275062306a36Sopenharmony_ci 275162306a36Sopenharmony_ci if (unlikely(!v)) 275262306a36Sopenharmony_ci return true; 275362306a36Sopenharmony_ci 275462306a36Sopenharmony_ci if (vhost_get_used_event(vq, &event)) { 275562306a36Sopenharmony_ci vq_err(vq, "Failed to get used event idx"); 275662306a36Sopenharmony_ci return true; 275762306a36Sopenharmony_ci } 275862306a36Sopenharmony_ci return vring_need_event(vhost16_to_cpu(vq, event), new, old); 275962306a36Sopenharmony_ci} 276062306a36Sopenharmony_ci 276162306a36Sopenharmony_ci/* This actually signals the guest, using eventfd. */ 276262306a36Sopenharmony_civoid vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) 276362306a36Sopenharmony_ci{ 276462306a36Sopenharmony_ci /* Signal the Guest tell them we used something up. */ 276562306a36Sopenharmony_ci if (vq->call_ctx.ctx && vhost_notify(dev, vq)) 276662306a36Sopenharmony_ci eventfd_signal(vq->call_ctx.ctx, 1); 276762306a36Sopenharmony_ci} 276862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_signal); 276962306a36Sopenharmony_ci 277062306a36Sopenharmony_ci/* And here's the combo meal deal. Supersize me! */ 277162306a36Sopenharmony_civoid vhost_add_used_and_signal(struct vhost_dev *dev, 277262306a36Sopenharmony_ci struct vhost_virtqueue *vq, 277362306a36Sopenharmony_ci unsigned int head, int len) 277462306a36Sopenharmony_ci{ 277562306a36Sopenharmony_ci vhost_add_used(vq, head, len); 277662306a36Sopenharmony_ci vhost_signal(dev, vq); 277762306a36Sopenharmony_ci} 277862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_add_used_and_signal); 277962306a36Sopenharmony_ci 278062306a36Sopenharmony_ci/* multi-buffer version of vhost_add_used_and_signal */ 278162306a36Sopenharmony_civoid vhost_add_used_and_signal_n(struct vhost_dev *dev, 278262306a36Sopenharmony_ci struct vhost_virtqueue *vq, 278362306a36Sopenharmony_ci struct vring_used_elem *heads, unsigned count) 278462306a36Sopenharmony_ci{ 278562306a36Sopenharmony_ci vhost_add_used_n(vq, heads, count); 278662306a36Sopenharmony_ci vhost_signal(dev, vq); 278762306a36Sopenharmony_ci} 278862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_add_used_and_signal_n); 278962306a36Sopenharmony_ci 279062306a36Sopenharmony_ci/* return true if we're sure that avaiable ring is empty */ 279162306a36Sopenharmony_cibool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq) 279262306a36Sopenharmony_ci{ 279362306a36Sopenharmony_ci __virtio16 avail_idx; 279462306a36Sopenharmony_ci int r; 279562306a36Sopenharmony_ci 279662306a36Sopenharmony_ci if (vq->avail_idx != vq->last_avail_idx) 279762306a36Sopenharmony_ci return false; 279862306a36Sopenharmony_ci 279962306a36Sopenharmony_ci r = vhost_get_avail_idx(vq, &avail_idx); 280062306a36Sopenharmony_ci if (unlikely(r)) 280162306a36Sopenharmony_ci return false; 280262306a36Sopenharmony_ci vq->avail_idx = vhost16_to_cpu(vq, avail_idx); 280362306a36Sopenharmony_ci 280462306a36Sopenharmony_ci return vq->avail_idx == vq->last_avail_idx; 280562306a36Sopenharmony_ci} 280662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_vq_avail_empty); 280762306a36Sopenharmony_ci 280862306a36Sopenharmony_ci/* OK, now we need to know about added descriptors. */ 280962306a36Sopenharmony_cibool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) 281062306a36Sopenharmony_ci{ 281162306a36Sopenharmony_ci __virtio16 avail_idx; 281262306a36Sopenharmony_ci int r; 281362306a36Sopenharmony_ci 281462306a36Sopenharmony_ci if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) 281562306a36Sopenharmony_ci return false; 281662306a36Sopenharmony_ci vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; 281762306a36Sopenharmony_ci if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { 281862306a36Sopenharmony_ci r = vhost_update_used_flags(vq); 281962306a36Sopenharmony_ci if (r) { 282062306a36Sopenharmony_ci vq_err(vq, "Failed to enable notification at %p: %d\n", 282162306a36Sopenharmony_ci &vq->used->flags, r); 282262306a36Sopenharmony_ci return false; 282362306a36Sopenharmony_ci } 282462306a36Sopenharmony_ci } else { 282562306a36Sopenharmony_ci r = vhost_update_avail_event(vq); 282662306a36Sopenharmony_ci if (r) { 282762306a36Sopenharmony_ci vq_err(vq, "Failed to update avail event index at %p: %d\n", 282862306a36Sopenharmony_ci vhost_avail_event(vq), r); 282962306a36Sopenharmony_ci return false; 283062306a36Sopenharmony_ci } 283162306a36Sopenharmony_ci } 283262306a36Sopenharmony_ci /* They could have slipped one in as we were doing that: make 283362306a36Sopenharmony_ci * sure it's written, then check again. */ 283462306a36Sopenharmony_ci smp_mb(); 283562306a36Sopenharmony_ci r = vhost_get_avail_idx(vq, &avail_idx); 283662306a36Sopenharmony_ci if (r) { 283762306a36Sopenharmony_ci vq_err(vq, "Failed to check avail idx at %p: %d\n", 283862306a36Sopenharmony_ci &vq->avail->idx, r); 283962306a36Sopenharmony_ci return false; 284062306a36Sopenharmony_ci } 284162306a36Sopenharmony_ci vq->avail_idx = vhost16_to_cpu(vq, avail_idx); 284262306a36Sopenharmony_ci 284362306a36Sopenharmony_ci return vq->avail_idx != vq->last_avail_idx; 284462306a36Sopenharmony_ci} 284562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_enable_notify); 284662306a36Sopenharmony_ci 284762306a36Sopenharmony_ci/* We don't need to be notified again. */ 284862306a36Sopenharmony_civoid vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) 284962306a36Sopenharmony_ci{ 285062306a36Sopenharmony_ci int r; 285162306a36Sopenharmony_ci 285262306a36Sopenharmony_ci if (vq->used_flags & VRING_USED_F_NO_NOTIFY) 285362306a36Sopenharmony_ci return; 285462306a36Sopenharmony_ci vq->used_flags |= VRING_USED_F_NO_NOTIFY; 285562306a36Sopenharmony_ci if (!vhost_has_feature(vq, VIRTIO_RING_F_EVENT_IDX)) { 285662306a36Sopenharmony_ci r = vhost_update_used_flags(vq); 285762306a36Sopenharmony_ci if (r) 285862306a36Sopenharmony_ci vq_err(vq, "Failed to disable notification at %p: %d\n", 285962306a36Sopenharmony_ci &vq->used->flags, r); 286062306a36Sopenharmony_ci } 286162306a36Sopenharmony_ci} 286262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_disable_notify); 286362306a36Sopenharmony_ci 286462306a36Sopenharmony_ci/* Create a new message. */ 286562306a36Sopenharmony_cistruct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type) 286662306a36Sopenharmony_ci{ 286762306a36Sopenharmony_ci /* Make sure all padding within the structure is initialized. */ 286862306a36Sopenharmony_ci struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL); 286962306a36Sopenharmony_ci if (!node) 287062306a36Sopenharmony_ci return NULL; 287162306a36Sopenharmony_ci 287262306a36Sopenharmony_ci node->vq = vq; 287362306a36Sopenharmony_ci node->msg.type = type; 287462306a36Sopenharmony_ci return node; 287562306a36Sopenharmony_ci} 287662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_new_msg); 287762306a36Sopenharmony_ci 287862306a36Sopenharmony_civoid vhost_enqueue_msg(struct vhost_dev *dev, struct list_head *head, 287962306a36Sopenharmony_ci struct vhost_msg_node *node) 288062306a36Sopenharmony_ci{ 288162306a36Sopenharmony_ci spin_lock(&dev->iotlb_lock); 288262306a36Sopenharmony_ci list_add_tail(&node->node, head); 288362306a36Sopenharmony_ci spin_unlock(&dev->iotlb_lock); 288462306a36Sopenharmony_ci 288562306a36Sopenharmony_ci wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); 288662306a36Sopenharmony_ci} 288762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_enqueue_msg); 288862306a36Sopenharmony_ci 288962306a36Sopenharmony_cistruct vhost_msg_node *vhost_dequeue_msg(struct vhost_dev *dev, 289062306a36Sopenharmony_ci struct list_head *head) 289162306a36Sopenharmony_ci{ 289262306a36Sopenharmony_ci struct vhost_msg_node *node = NULL; 289362306a36Sopenharmony_ci 289462306a36Sopenharmony_ci spin_lock(&dev->iotlb_lock); 289562306a36Sopenharmony_ci if (!list_empty(head)) { 289662306a36Sopenharmony_ci node = list_first_entry(head, struct vhost_msg_node, 289762306a36Sopenharmony_ci node); 289862306a36Sopenharmony_ci list_del(&node->node); 289962306a36Sopenharmony_ci } 290062306a36Sopenharmony_ci spin_unlock(&dev->iotlb_lock); 290162306a36Sopenharmony_ci 290262306a36Sopenharmony_ci return node; 290362306a36Sopenharmony_ci} 290462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_dequeue_msg); 290562306a36Sopenharmony_ci 290662306a36Sopenharmony_civoid vhost_set_backend_features(struct vhost_dev *dev, u64 features) 290762306a36Sopenharmony_ci{ 290862306a36Sopenharmony_ci struct vhost_virtqueue *vq; 290962306a36Sopenharmony_ci int i; 291062306a36Sopenharmony_ci 291162306a36Sopenharmony_ci mutex_lock(&dev->mutex); 291262306a36Sopenharmony_ci for (i = 0; i < dev->nvqs; ++i) { 291362306a36Sopenharmony_ci vq = dev->vqs[i]; 291462306a36Sopenharmony_ci mutex_lock(&vq->mutex); 291562306a36Sopenharmony_ci vq->acked_backend_features = features; 291662306a36Sopenharmony_ci mutex_unlock(&vq->mutex); 291762306a36Sopenharmony_ci } 291862306a36Sopenharmony_ci mutex_unlock(&dev->mutex); 291962306a36Sopenharmony_ci} 292062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(vhost_set_backend_features); 292162306a36Sopenharmony_ci 292262306a36Sopenharmony_cistatic int __init vhost_init(void) 292362306a36Sopenharmony_ci{ 292462306a36Sopenharmony_ci return 0; 292562306a36Sopenharmony_ci} 292662306a36Sopenharmony_ci 292762306a36Sopenharmony_cistatic void __exit vhost_exit(void) 292862306a36Sopenharmony_ci{ 292962306a36Sopenharmony_ci} 293062306a36Sopenharmony_ci 293162306a36Sopenharmony_cimodule_init(vhost_init); 293262306a36Sopenharmony_cimodule_exit(vhost_exit); 293362306a36Sopenharmony_ci 293462306a36Sopenharmony_ciMODULE_VERSION("0.0.1"); 293562306a36Sopenharmony_ciMODULE_LICENSE("GPL v2"); 293662306a36Sopenharmony_ciMODULE_AUTHOR("Michael S. Tsirkin"); 293762306a36Sopenharmony_ciMODULE_DESCRIPTION("Host kernel accelerator for virtio"); 2938