162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Copyright (c) 2009, Microsoft Corporation. 562306a36Sopenharmony_ci * 662306a36Sopenharmony_ci * Authors: 762306a36Sopenharmony_ci * Haiyang Zhang <haiyangz@microsoft.com> 862306a36Sopenharmony_ci * Hank Janssen <hjanssen@microsoft.com> 962306a36Sopenharmony_ci * K. Y. Srinivasan <kys@microsoft.com> 1062306a36Sopenharmony_ci */ 1162306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <linux/kernel.h> 1462306a36Sopenharmony_ci#include <linux/mm.h> 1562306a36Sopenharmony_ci#include <linux/hyperv.h> 1662306a36Sopenharmony_ci#include <linux/uio.h> 1762306a36Sopenharmony_ci#include <linux/vmalloc.h> 1862306a36Sopenharmony_ci#include <linux/slab.h> 1962306a36Sopenharmony_ci#include <linux/prefetch.h> 2062306a36Sopenharmony_ci#include <linux/io.h> 2162306a36Sopenharmony_ci#include <asm/mshyperv.h> 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci#include "hyperv_vmbus.h" 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#define VMBUS_PKT_TRAILER 8 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci/* 2862306a36Sopenharmony_ci * When we write to the ring buffer, check if the host needs to 2962306a36Sopenharmony_ci * be signaled. Here is the details of this protocol: 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * 1. The host guarantees that while it is draining the 3262306a36Sopenharmony_ci * ring buffer, it will set the interrupt_mask to 3362306a36Sopenharmony_ci * indicate it does not need to be interrupted when 3462306a36Sopenharmony_ci * new data is placed. 3562306a36Sopenharmony_ci * 3662306a36Sopenharmony_ci * 2. The host guarantees that it will completely drain 3762306a36Sopenharmony_ci * the ring buffer before exiting the read loop. Further, 3862306a36Sopenharmony_ci * once the ring buffer is empty, it will clear the 3962306a36Sopenharmony_ci * interrupt_mask and re-check to see if new data has 4062306a36Sopenharmony_ci * arrived. 4162306a36Sopenharmony_ci * 4262306a36Sopenharmony_ci * KYS: Oct. 30, 2016: 4362306a36Sopenharmony_ci * It looks like Windows hosts have logic to deal with DOS attacks that 4462306a36Sopenharmony_ci * can be triggered if it receives interrupts when it is not expecting 4562306a36Sopenharmony_ci * the interrupt. The host expects interrupts only when the ring 4662306a36Sopenharmony_ci * transitions from empty to non-empty (or full to non full on the guest 4762306a36Sopenharmony_ci * to host ring). 4862306a36Sopenharmony_ci * So, base the signaling decision solely on the ring state until the 4962306a36Sopenharmony_ci * host logic is fixed. 5062306a36Sopenharmony_ci */ 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_cistatic void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel) 5362306a36Sopenharmony_ci{ 5462306a36Sopenharmony_ci struct hv_ring_buffer_info *rbi = &channel->outbound; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci virt_mb(); 5762306a36Sopenharmony_ci if (READ_ONCE(rbi->ring_buffer->interrupt_mask)) 5862306a36Sopenharmony_ci return; 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci /* check interrupt_mask before read_index */ 6162306a36Sopenharmony_ci virt_rmb(); 6262306a36Sopenharmony_ci /* 6362306a36Sopenharmony_ci * This is the only case we need to signal when the 6462306a36Sopenharmony_ci * ring transitions from being empty to non-empty. 6562306a36Sopenharmony_ci */ 6662306a36Sopenharmony_ci if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) { 6762306a36Sopenharmony_ci ++channel->intr_out_empty; 6862306a36Sopenharmony_ci vmbus_setevent(channel); 6962306a36Sopenharmony_ci } 7062306a36Sopenharmony_ci} 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci/* Get the next write location for the specified ring buffer. */ 7362306a36Sopenharmony_cistatic inline u32 7462306a36Sopenharmony_cihv_get_next_write_location(struct hv_ring_buffer_info *ring_info) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci u32 next = ring_info->ring_buffer->write_index; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci return next; 7962306a36Sopenharmony_ci} 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci/* Set the next write location for the specified ring buffer. */ 8262306a36Sopenharmony_cistatic inline void 8362306a36Sopenharmony_cihv_set_next_write_location(struct hv_ring_buffer_info *ring_info, 8462306a36Sopenharmony_ci u32 next_write_location) 8562306a36Sopenharmony_ci{ 8662306a36Sopenharmony_ci ring_info->ring_buffer->write_index = next_write_location; 8762306a36Sopenharmony_ci} 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci/* Get the size of the ring buffer. */ 9062306a36Sopenharmony_cistatic inline u32 9162306a36Sopenharmony_cihv_get_ring_buffersize(const struct hv_ring_buffer_info *ring_info) 9262306a36Sopenharmony_ci{ 9362306a36Sopenharmony_ci return ring_info->ring_datasize; 9462306a36Sopenharmony_ci} 9562306a36Sopenharmony_ci 9662306a36Sopenharmony_ci/* Get the read and write indices as u64 of the specified ring buffer. */ 9762306a36Sopenharmony_cistatic inline u64 9862306a36Sopenharmony_cihv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci return (u64)ring_info->ring_buffer->write_index << 32; 10162306a36Sopenharmony_ci} 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci/* 10462306a36Sopenharmony_ci * Helper routine to copy from source to ring buffer. 10562306a36Sopenharmony_ci * Assume there is enough room. Handles wrap-around in dest case only!! 10662306a36Sopenharmony_ci */ 10762306a36Sopenharmony_cistatic u32 hv_copyto_ringbuffer( 10862306a36Sopenharmony_ci struct hv_ring_buffer_info *ring_info, 10962306a36Sopenharmony_ci u32 start_write_offset, 11062306a36Sopenharmony_ci const void *src, 11162306a36Sopenharmony_ci u32 srclen) 11262306a36Sopenharmony_ci{ 11362306a36Sopenharmony_ci void *ring_buffer = hv_get_ring_buffer(ring_info); 11462306a36Sopenharmony_ci u32 ring_buffer_size = hv_get_ring_buffersize(ring_info); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci memcpy(ring_buffer + start_write_offset, src, srclen); 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci start_write_offset += srclen; 11962306a36Sopenharmony_ci if (start_write_offset >= ring_buffer_size) 12062306a36Sopenharmony_ci start_write_offset -= ring_buffer_size; 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci return start_write_offset; 12362306a36Sopenharmony_ci} 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci/* 12662306a36Sopenharmony_ci * 12762306a36Sopenharmony_ci * hv_get_ringbuffer_availbytes() 12862306a36Sopenharmony_ci * 12962306a36Sopenharmony_ci * Get number of bytes available to read and to write to 13062306a36Sopenharmony_ci * for the specified ring buffer 13162306a36Sopenharmony_ci */ 13262306a36Sopenharmony_cistatic void 13362306a36Sopenharmony_cihv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi, 13462306a36Sopenharmony_ci u32 *read, u32 *write) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci u32 read_loc, write_loc, dsize; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci /* Capture the read/write indices before they changed */ 13962306a36Sopenharmony_ci read_loc = READ_ONCE(rbi->ring_buffer->read_index); 14062306a36Sopenharmony_ci write_loc = READ_ONCE(rbi->ring_buffer->write_index); 14162306a36Sopenharmony_ci dsize = rbi->ring_datasize; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) : 14462306a36Sopenharmony_ci read_loc - write_loc; 14562306a36Sopenharmony_ci *read = dsize - *write; 14662306a36Sopenharmony_ci} 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci/* Get various debug metrics for the specified ring buffer. */ 14962306a36Sopenharmony_ciint hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info, 15062306a36Sopenharmony_ci struct hv_ring_buffer_debug_info *debug_info) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci u32 bytes_avail_towrite; 15362306a36Sopenharmony_ci u32 bytes_avail_toread; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci mutex_lock(&ring_info->ring_buffer_mutex); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci if (!ring_info->ring_buffer) { 15862306a36Sopenharmony_ci mutex_unlock(&ring_info->ring_buffer_mutex); 15962306a36Sopenharmony_ci return -EINVAL; 16062306a36Sopenharmony_ci } 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci hv_get_ringbuffer_availbytes(ring_info, 16362306a36Sopenharmony_ci &bytes_avail_toread, 16462306a36Sopenharmony_ci &bytes_avail_towrite); 16562306a36Sopenharmony_ci debug_info->bytes_avail_toread = bytes_avail_toread; 16662306a36Sopenharmony_ci debug_info->bytes_avail_towrite = bytes_avail_towrite; 16762306a36Sopenharmony_ci debug_info->current_read_index = ring_info->ring_buffer->read_index; 16862306a36Sopenharmony_ci debug_info->current_write_index = ring_info->ring_buffer->write_index; 16962306a36Sopenharmony_ci debug_info->current_interrupt_mask 17062306a36Sopenharmony_ci = ring_info->ring_buffer->interrupt_mask; 17162306a36Sopenharmony_ci mutex_unlock(&ring_info->ring_buffer_mutex); 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci return 0; 17462306a36Sopenharmony_ci} 17562306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_ci/* Initialize a channel's ring buffer info mutex locks */ 17862306a36Sopenharmony_civoid hv_ringbuffer_pre_init(struct vmbus_channel *channel) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci mutex_init(&channel->inbound.ring_buffer_mutex); 18162306a36Sopenharmony_ci mutex_init(&channel->outbound.ring_buffer_mutex); 18262306a36Sopenharmony_ci} 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci/* Initialize the ring buffer. */ 18562306a36Sopenharmony_ciint hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, 18662306a36Sopenharmony_ci struct page *pages, u32 page_cnt, u32 max_pkt_size) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci struct page **pages_wraparound; 18962306a36Sopenharmony_ci int i; 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE)); 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci /* 19462306a36Sopenharmony_ci * First page holds struct hv_ring_buffer, do wraparound mapping for 19562306a36Sopenharmony_ci * the rest. 19662306a36Sopenharmony_ci */ 19762306a36Sopenharmony_ci pages_wraparound = kcalloc(page_cnt * 2 - 1, 19862306a36Sopenharmony_ci sizeof(struct page *), 19962306a36Sopenharmony_ci GFP_KERNEL); 20062306a36Sopenharmony_ci if (!pages_wraparound) 20162306a36Sopenharmony_ci return -ENOMEM; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci pages_wraparound[0] = pages; 20462306a36Sopenharmony_ci for (i = 0; i < 2 * (page_cnt - 1); i++) 20562306a36Sopenharmony_ci pages_wraparound[i + 1] = 20662306a36Sopenharmony_ci &pages[i % (page_cnt - 1) + 1]; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci ring_info->ring_buffer = (struct hv_ring_buffer *) 20962306a36Sopenharmony_ci vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, 21062306a36Sopenharmony_ci pgprot_decrypted(PAGE_KERNEL)); 21162306a36Sopenharmony_ci 21262306a36Sopenharmony_ci kfree(pages_wraparound); 21362306a36Sopenharmony_ci if (!ring_info->ring_buffer) 21462306a36Sopenharmony_ci return -ENOMEM; 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci /* 21762306a36Sopenharmony_ci * Ensure the header page is zero'ed since 21862306a36Sopenharmony_ci * encryption status may have changed. 21962306a36Sopenharmony_ci */ 22062306a36Sopenharmony_ci memset(ring_info->ring_buffer, 0, HV_HYP_PAGE_SIZE); 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci ring_info->ring_buffer->read_index = 22362306a36Sopenharmony_ci ring_info->ring_buffer->write_index = 0; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci /* Set the feature bit for enabling flow control. */ 22662306a36Sopenharmony_ci ring_info->ring_buffer->feature_bits.value = 1; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci ring_info->ring_size = page_cnt << PAGE_SHIFT; 22962306a36Sopenharmony_ci ring_info->ring_size_div10_reciprocal = 23062306a36Sopenharmony_ci reciprocal_value(ring_info->ring_size / 10); 23162306a36Sopenharmony_ci ring_info->ring_datasize = ring_info->ring_size - 23262306a36Sopenharmony_ci sizeof(struct hv_ring_buffer); 23362306a36Sopenharmony_ci ring_info->priv_read_index = 0; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci /* Initialize buffer that holds copies of incoming packets */ 23662306a36Sopenharmony_ci if (max_pkt_size) { 23762306a36Sopenharmony_ci ring_info->pkt_buffer = kzalloc(max_pkt_size, GFP_KERNEL); 23862306a36Sopenharmony_ci if (!ring_info->pkt_buffer) 23962306a36Sopenharmony_ci return -ENOMEM; 24062306a36Sopenharmony_ci ring_info->pkt_buffer_size = max_pkt_size; 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci spin_lock_init(&ring_info->ring_lock); 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci return 0; 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci/* Cleanup the ring buffer. */ 24962306a36Sopenharmony_civoid hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info) 25062306a36Sopenharmony_ci{ 25162306a36Sopenharmony_ci mutex_lock(&ring_info->ring_buffer_mutex); 25262306a36Sopenharmony_ci vunmap(ring_info->ring_buffer); 25362306a36Sopenharmony_ci ring_info->ring_buffer = NULL; 25462306a36Sopenharmony_ci mutex_unlock(&ring_info->ring_buffer_mutex); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci kfree(ring_info->pkt_buffer); 25762306a36Sopenharmony_ci ring_info->pkt_buffer = NULL; 25862306a36Sopenharmony_ci ring_info->pkt_buffer_size = 0; 25962306a36Sopenharmony_ci} 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci/* 26262306a36Sopenharmony_ci * Check if the ring buffer spinlock is available to take or not; used on 26362306a36Sopenharmony_ci * atomic contexts, like panic path (see the Hyper-V framebuffer driver). 26462306a36Sopenharmony_ci */ 26562306a36Sopenharmony_ci 26662306a36Sopenharmony_cibool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel) 26762306a36Sopenharmony_ci{ 26862306a36Sopenharmony_ci struct hv_ring_buffer_info *rinfo = &channel->outbound; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci return spin_is_locked(&rinfo->ring_lock); 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy); 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci/* Write to the ring buffer. */ 27562306a36Sopenharmony_ciint hv_ringbuffer_write(struct vmbus_channel *channel, 27662306a36Sopenharmony_ci const struct kvec *kv_list, u32 kv_count, 27762306a36Sopenharmony_ci u64 requestid, u64 *trans_id) 27862306a36Sopenharmony_ci{ 27962306a36Sopenharmony_ci int i; 28062306a36Sopenharmony_ci u32 bytes_avail_towrite; 28162306a36Sopenharmony_ci u32 totalbytes_towrite = sizeof(u64); 28262306a36Sopenharmony_ci u32 next_write_location; 28362306a36Sopenharmony_ci u32 old_write; 28462306a36Sopenharmony_ci u64 prev_indices; 28562306a36Sopenharmony_ci unsigned long flags; 28662306a36Sopenharmony_ci struct hv_ring_buffer_info *outring_info = &channel->outbound; 28762306a36Sopenharmony_ci struct vmpacket_descriptor *desc = kv_list[0].iov_base; 28862306a36Sopenharmony_ci u64 __trans_id, rqst_id = VMBUS_NO_RQSTOR; 28962306a36Sopenharmony_ci 29062306a36Sopenharmony_ci if (channel->rescind) 29162306a36Sopenharmony_ci return -ENODEV; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci for (i = 0; i < kv_count; i++) 29462306a36Sopenharmony_ci totalbytes_towrite += kv_list[i].iov_len; 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci spin_lock_irqsave(&outring_info->ring_lock, flags); 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci bytes_avail_towrite = hv_get_bytes_to_write(outring_info); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci /* 30162306a36Sopenharmony_ci * If there is only room for the packet, assume it is full. 30262306a36Sopenharmony_ci * Otherwise, the next time around, we think the ring buffer 30362306a36Sopenharmony_ci * is empty since the read index == write index. 30462306a36Sopenharmony_ci */ 30562306a36Sopenharmony_ci if (bytes_avail_towrite <= totalbytes_towrite) { 30662306a36Sopenharmony_ci ++channel->out_full_total; 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci if (!channel->out_full_flag) { 30962306a36Sopenharmony_ci ++channel->out_full_first; 31062306a36Sopenharmony_ci channel->out_full_flag = true; 31162306a36Sopenharmony_ci } 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci spin_unlock_irqrestore(&outring_info->ring_lock, flags); 31462306a36Sopenharmony_ci return -EAGAIN; 31562306a36Sopenharmony_ci } 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci channel->out_full_flag = false; 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci /* Write to the ring buffer */ 32062306a36Sopenharmony_ci next_write_location = hv_get_next_write_location(outring_info); 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci old_write = next_write_location; 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci for (i = 0; i < kv_count; i++) { 32562306a36Sopenharmony_ci next_write_location = hv_copyto_ringbuffer(outring_info, 32662306a36Sopenharmony_ci next_write_location, 32762306a36Sopenharmony_ci kv_list[i].iov_base, 32862306a36Sopenharmony_ci kv_list[i].iov_len); 32962306a36Sopenharmony_ci } 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_ci /* 33262306a36Sopenharmony_ci * Allocate the request ID after the data has been copied into the 33362306a36Sopenharmony_ci * ring buffer. Once this request ID is allocated, the completion 33462306a36Sopenharmony_ci * path could find the data and free it. 33562306a36Sopenharmony_ci */ 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) { 33862306a36Sopenharmony_ci if (channel->next_request_id_callback != NULL) { 33962306a36Sopenharmony_ci rqst_id = channel->next_request_id_callback(channel, requestid); 34062306a36Sopenharmony_ci if (rqst_id == VMBUS_RQST_ERROR) { 34162306a36Sopenharmony_ci spin_unlock_irqrestore(&outring_info->ring_lock, flags); 34262306a36Sopenharmony_ci return -EAGAIN; 34362306a36Sopenharmony_ci } 34462306a36Sopenharmony_ci } 34562306a36Sopenharmony_ci } 34662306a36Sopenharmony_ci desc = hv_get_ring_buffer(outring_info) + old_write; 34762306a36Sopenharmony_ci __trans_id = (rqst_id == VMBUS_NO_RQSTOR) ? requestid : rqst_id; 34862306a36Sopenharmony_ci /* 34962306a36Sopenharmony_ci * Ensure the compiler doesn't generate code that reads the value of 35062306a36Sopenharmony_ci * the transaction ID from the ring buffer, which is shared with the 35162306a36Sopenharmony_ci * Hyper-V host and subject to being changed at any time. 35262306a36Sopenharmony_ci */ 35362306a36Sopenharmony_ci WRITE_ONCE(desc->trans_id, __trans_id); 35462306a36Sopenharmony_ci if (trans_id) 35562306a36Sopenharmony_ci *trans_id = __trans_id; 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci /* Set previous packet start */ 35862306a36Sopenharmony_ci prev_indices = hv_get_ring_bufferindices(outring_info); 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci next_write_location = hv_copyto_ringbuffer(outring_info, 36162306a36Sopenharmony_ci next_write_location, 36262306a36Sopenharmony_ci &prev_indices, 36362306a36Sopenharmony_ci sizeof(u64)); 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci /* Issue a full memory barrier before updating the write index */ 36662306a36Sopenharmony_ci virt_mb(); 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci /* Now, update the write location */ 36962306a36Sopenharmony_ci hv_set_next_write_location(outring_info, next_write_location); 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci spin_unlock_irqrestore(&outring_info->ring_lock, flags); 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci hv_signal_on_write(old_write, channel); 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci if (channel->rescind) { 37762306a36Sopenharmony_ci if (rqst_id != VMBUS_NO_RQSTOR) { 37862306a36Sopenharmony_ci /* Reclaim request ID to avoid leak of IDs */ 37962306a36Sopenharmony_ci if (channel->request_addr_callback != NULL) 38062306a36Sopenharmony_ci channel->request_addr_callback(channel, rqst_id); 38162306a36Sopenharmony_ci } 38262306a36Sopenharmony_ci return -ENODEV; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci return 0; 38662306a36Sopenharmony_ci} 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ciint hv_ringbuffer_read(struct vmbus_channel *channel, 38962306a36Sopenharmony_ci void *buffer, u32 buflen, u32 *buffer_actual_len, 39062306a36Sopenharmony_ci u64 *requestid, bool raw) 39162306a36Sopenharmony_ci{ 39262306a36Sopenharmony_ci struct vmpacket_descriptor *desc; 39362306a36Sopenharmony_ci u32 packetlen, offset; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci if (unlikely(buflen == 0)) 39662306a36Sopenharmony_ci return -EINVAL; 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_ci *buffer_actual_len = 0; 39962306a36Sopenharmony_ci *requestid = 0; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci /* Make sure there is something to read */ 40262306a36Sopenharmony_ci desc = hv_pkt_iter_first(channel); 40362306a36Sopenharmony_ci if (desc == NULL) { 40462306a36Sopenharmony_ci /* 40562306a36Sopenharmony_ci * No error is set when there is even no header, drivers are 40662306a36Sopenharmony_ci * supposed to analyze buffer_actual_len. 40762306a36Sopenharmony_ci */ 40862306a36Sopenharmony_ci return 0; 40962306a36Sopenharmony_ci } 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci offset = raw ? 0 : (desc->offset8 << 3); 41262306a36Sopenharmony_ci packetlen = (desc->len8 << 3) - offset; 41362306a36Sopenharmony_ci *buffer_actual_len = packetlen; 41462306a36Sopenharmony_ci *requestid = desc->trans_id; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci if (unlikely(packetlen > buflen)) 41762306a36Sopenharmony_ci return -ENOBUFS; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci /* since ring is double mapped, only one copy is necessary */ 42062306a36Sopenharmony_ci memcpy(buffer, (const char *)desc + offset, packetlen); 42162306a36Sopenharmony_ci 42262306a36Sopenharmony_ci /* Advance ring index to next packet descriptor */ 42362306a36Sopenharmony_ci __hv_pkt_iter_next(channel, desc); 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci /* Notify host of update */ 42662306a36Sopenharmony_ci hv_pkt_iter_close(channel); 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci return 0; 42962306a36Sopenharmony_ci} 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci/* 43262306a36Sopenharmony_ci * Determine number of bytes available in ring buffer after 43362306a36Sopenharmony_ci * the current iterator (priv_read_index) location. 43462306a36Sopenharmony_ci * 43562306a36Sopenharmony_ci * This is similar to hv_get_bytes_to_read but with private 43662306a36Sopenharmony_ci * read index instead. 43762306a36Sopenharmony_ci */ 43862306a36Sopenharmony_cistatic u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) 43962306a36Sopenharmony_ci{ 44062306a36Sopenharmony_ci u32 priv_read_loc = rbi->priv_read_index; 44162306a36Sopenharmony_ci u32 write_loc; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci /* 44462306a36Sopenharmony_ci * The Hyper-V host writes the packet data, then uses 44562306a36Sopenharmony_ci * store_release() to update the write_index. Use load_acquire() 44662306a36Sopenharmony_ci * here to prevent loads of the packet data from being re-ordered 44762306a36Sopenharmony_ci * before the read of the write_index and potentially getting 44862306a36Sopenharmony_ci * stale data. 44962306a36Sopenharmony_ci */ 45062306a36Sopenharmony_ci write_loc = virt_load_acquire(&rbi->ring_buffer->write_index); 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci if (write_loc >= priv_read_loc) 45362306a36Sopenharmony_ci return write_loc - priv_read_loc; 45462306a36Sopenharmony_ci else 45562306a36Sopenharmony_ci return (rbi->ring_datasize - priv_read_loc) + write_loc; 45662306a36Sopenharmony_ci} 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci/* 45962306a36Sopenharmony_ci * Get first vmbus packet from ring buffer after read_index 46062306a36Sopenharmony_ci * 46162306a36Sopenharmony_ci * If ring buffer is empty, returns NULL and no other action needed. 46262306a36Sopenharmony_ci */ 46362306a36Sopenharmony_cistruct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel) 46462306a36Sopenharmony_ci{ 46562306a36Sopenharmony_ci struct hv_ring_buffer_info *rbi = &channel->inbound; 46662306a36Sopenharmony_ci struct vmpacket_descriptor *desc, *desc_copy; 46762306a36Sopenharmony_ci u32 bytes_avail, pkt_len, pkt_offset; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci hv_debug_delay_test(channel, MESSAGE_DELAY); 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci bytes_avail = hv_pkt_iter_avail(rbi); 47262306a36Sopenharmony_ci if (bytes_avail < sizeof(struct vmpacket_descriptor)) 47362306a36Sopenharmony_ci return NULL; 47462306a36Sopenharmony_ci bytes_avail = min(rbi->pkt_buffer_size, bytes_avail); 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ci desc = (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index); 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci /* 47962306a36Sopenharmony_ci * Ensure the compiler does not use references to incoming Hyper-V values (which 48062306a36Sopenharmony_ci * could change at any moment) when reading local variables later in the code 48162306a36Sopenharmony_ci */ 48262306a36Sopenharmony_ci pkt_len = READ_ONCE(desc->len8) << 3; 48362306a36Sopenharmony_ci pkt_offset = READ_ONCE(desc->offset8) << 3; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci /* 48662306a36Sopenharmony_ci * If pkt_len is invalid, set it to the smaller of hv_pkt_iter_avail() and 48762306a36Sopenharmony_ci * rbi->pkt_buffer_size 48862306a36Sopenharmony_ci */ 48962306a36Sopenharmony_ci if (pkt_len < sizeof(struct vmpacket_descriptor) || pkt_len > bytes_avail) 49062306a36Sopenharmony_ci pkt_len = bytes_avail; 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci /* 49362306a36Sopenharmony_ci * If pkt_offset is invalid, arbitrarily set it to 49462306a36Sopenharmony_ci * the size of vmpacket_descriptor 49562306a36Sopenharmony_ci */ 49662306a36Sopenharmony_ci if (pkt_offset < sizeof(struct vmpacket_descriptor) || pkt_offset > pkt_len) 49762306a36Sopenharmony_ci pkt_offset = sizeof(struct vmpacket_descriptor); 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci /* Copy the Hyper-V packet out of the ring buffer */ 50062306a36Sopenharmony_ci desc_copy = (struct vmpacket_descriptor *)rbi->pkt_buffer; 50162306a36Sopenharmony_ci memcpy(desc_copy, desc, pkt_len); 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci /* 50462306a36Sopenharmony_ci * Hyper-V could still change len8 and offset8 after the earlier read. 50562306a36Sopenharmony_ci * Ensure that desc_copy has legal values for len8 and offset8 that 50662306a36Sopenharmony_ci * are consistent with the copy we just made 50762306a36Sopenharmony_ci */ 50862306a36Sopenharmony_ci desc_copy->len8 = pkt_len >> 3; 50962306a36Sopenharmony_ci desc_copy->offset8 = pkt_offset >> 3; 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci return desc_copy; 51262306a36Sopenharmony_ci} 51362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hv_pkt_iter_first); 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci/* 51662306a36Sopenharmony_ci * Get next vmbus packet from ring buffer. 51762306a36Sopenharmony_ci * 51862306a36Sopenharmony_ci * Advances the current location (priv_read_index) and checks for more 51962306a36Sopenharmony_ci * data. If the end of the ring buffer is reached, then return NULL. 52062306a36Sopenharmony_ci */ 52162306a36Sopenharmony_cistruct vmpacket_descriptor * 52262306a36Sopenharmony_ci__hv_pkt_iter_next(struct vmbus_channel *channel, 52362306a36Sopenharmony_ci const struct vmpacket_descriptor *desc) 52462306a36Sopenharmony_ci{ 52562306a36Sopenharmony_ci struct hv_ring_buffer_info *rbi = &channel->inbound; 52662306a36Sopenharmony_ci u32 packetlen = desc->len8 << 3; 52762306a36Sopenharmony_ci u32 dsize = rbi->ring_datasize; 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci hv_debug_delay_test(channel, MESSAGE_DELAY); 53062306a36Sopenharmony_ci /* bump offset to next potential packet */ 53162306a36Sopenharmony_ci rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER; 53262306a36Sopenharmony_ci if (rbi->priv_read_index >= dsize) 53362306a36Sopenharmony_ci rbi->priv_read_index -= dsize; 53462306a36Sopenharmony_ci 53562306a36Sopenharmony_ci /* more data? */ 53662306a36Sopenharmony_ci return hv_pkt_iter_first(channel); 53762306a36Sopenharmony_ci} 53862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(__hv_pkt_iter_next); 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ci/* How many bytes were read in this iterator cycle */ 54162306a36Sopenharmony_cistatic u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi, 54262306a36Sopenharmony_ci u32 start_read_index) 54362306a36Sopenharmony_ci{ 54462306a36Sopenharmony_ci if (rbi->priv_read_index >= start_read_index) 54562306a36Sopenharmony_ci return rbi->priv_read_index - start_read_index; 54662306a36Sopenharmony_ci else 54762306a36Sopenharmony_ci return rbi->ring_datasize - start_read_index + 54862306a36Sopenharmony_ci rbi->priv_read_index; 54962306a36Sopenharmony_ci} 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci/* 55262306a36Sopenharmony_ci * Update host ring buffer after iterating over packets. If the host has 55362306a36Sopenharmony_ci * stopped queuing new entries because it found the ring buffer full, and 55462306a36Sopenharmony_ci * sufficient space is being freed up, signal the host. But be careful to 55562306a36Sopenharmony_ci * only signal the host when necessary, both for performance reasons and 55662306a36Sopenharmony_ci * because Hyper-V protects itself by throttling guests that signal 55762306a36Sopenharmony_ci * inappropriately. 55862306a36Sopenharmony_ci * 55962306a36Sopenharmony_ci * Determining when to signal is tricky. There are three key data inputs 56062306a36Sopenharmony_ci * that must be handled in this order to avoid race conditions: 56162306a36Sopenharmony_ci * 56262306a36Sopenharmony_ci * 1. Update the read_index 56362306a36Sopenharmony_ci * 2. Read the pending_send_sz 56462306a36Sopenharmony_ci * 3. Read the current write_index 56562306a36Sopenharmony_ci * 56662306a36Sopenharmony_ci * The interrupt_mask is not used to determine when to signal. The 56762306a36Sopenharmony_ci * interrupt_mask is used only on the guest->host ring buffer when 56862306a36Sopenharmony_ci * sending requests to the host. The host does not use it on the host-> 56962306a36Sopenharmony_ci * guest ring buffer to indicate whether it should be signaled. 57062306a36Sopenharmony_ci */ 57162306a36Sopenharmony_civoid hv_pkt_iter_close(struct vmbus_channel *channel) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci struct hv_ring_buffer_info *rbi = &channel->inbound; 57462306a36Sopenharmony_ci u32 curr_write_sz, pending_sz, bytes_read, start_read_index; 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci /* 57762306a36Sopenharmony_ci * Make sure all reads are done before we update the read index since 57862306a36Sopenharmony_ci * the writer may start writing to the read area once the read index 57962306a36Sopenharmony_ci * is updated. 58062306a36Sopenharmony_ci */ 58162306a36Sopenharmony_ci virt_rmb(); 58262306a36Sopenharmony_ci start_read_index = rbi->ring_buffer->read_index; 58362306a36Sopenharmony_ci rbi->ring_buffer->read_index = rbi->priv_read_index; 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_ci /* 58662306a36Sopenharmony_ci * Older versions of Hyper-V (before WS2102 and Win8) do not 58762306a36Sopenharmony_ci * implement pending_send_sz and simply poll if the host->guest 58862306a36Sopenharmony_ci * ring buffer is full. No signaling is needed or expected. 58962306a36Sopenharmony_ci */ 59062306a36Sopenharmony_ci if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz) 59162306a36Sopenharmony_ci return; 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci /* 59462306a36Sopenharmony_ci * Issue a full memory barrier before making the signaling decision. 59562306a36Sopenharmony_ci * If reading pending_send_sz were to be reordered and happen 59662306a36Sopenharmony_ci * before we commit the new read_index, a race could occur. If the 59762306a36Sopenharmony_ci * host were to set the pending_send_sz after we have sampled 59862306a36Sopenharmony_ci * pending_send_sz, and the ring buffer blocks before we commit the 59962306a36Sopenharmony_ci * read index, we could miss sending the interrupt. Issue a full 60062306a36Sopenharmony_ci * memory barrier to address this. 60162306a36Sopenharmony_ci */ 60262306a36Sopenharmony_ci virt_mb(); 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci /* 60562306a36Sopenharmony_ci * If the pending_send_sz is zero, then the ring buffer is not 60662306a36Sopenharmony_ci * blocked and there is no need to signal. This is far by the 60762306a36Sopenharmony_ci * most common case, so exit quickly for best performance. 60862306a36Sopenharmony_ci */ 60962306a36Sopenharmony_ci pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); 61062306a36Sopenharmony_ci if (!pending_sz) 61162306a36Sopenharmony_ci return; 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ci /* 61462306a36Sopenharmony_ci * Ensure the read of write_index in hv_get_bytes_to_write() 61562306a36Sopenharmony_ci * happens after the read of pending_send_sz. 61662306a36Sopenharmony_ci */ 61762306a36Sopenharmony_ci virt_rmb(); 61862306a36Sopenharmony_ci curr_write_sz = hv_get_bytes_to_write(rbi); 61962306a36Sopenharmony_ci bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index); 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci /* 62262306a36Sopenharmony_ci * We want to signal the host only if we're transitioning 62362306a36Sopenharmony_ci * from a "not enough free space" state to a "enough free 62462306a36Sopenharmony_ci * space" state. For example, it's possible that this function 62562306a36Sopenharmony_ci * could run and free up enough space to signal the host, and then 62662306a36Sopenharmony_ci * run again and free up additional space before the host has a 62762306a36Sopenharmony_ci * chance to clear the pending_send_sz. The 2nd invocation would 62862306a36Sopenharmony_ci * be a null transition from "enough free space" to "enough free 62962306a36Sopenharmony_ci * space", which doesn't warrant a signal. 63062306a36Sopenharmony_ci * 63162306a36Sopenharmony_ci * Exactly filling the ring buffer is treated as "not enough 63262306a36Sopenharmony_ci * space". The ring buffer always must have at least one byte 63362306a36Sopenharmony_ci * empty so the empty and full conditions are distinguishable. 63462306a36Sopenharmony_ci * hv_get_bytes_to_write() doesn't fully tell the truth in 63562306a36Sopenharmony_ci * this regard. 63662306a36Sopenharmony_ci * 63762306a36Sopenharmony_ci * So first check if we were in the "enough free space" state 63862306a36Sopenharmony_ci * before we began the iteration. If so, the host was not 63962306a36Sopenharmony_ci * blocked, and there's no need to signal. 64062306a36Sopenharmony_ci */ 64162306a36Sopenharmony_ci if (curr_write_sz - bytes_read > pending_sz) 64262306a36Sopenharmony_ci return; 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_ci /* 64562306a36Sopenharmony_ci * Similarly, if the new state is "not enough space", then 64662306a36Sopenharmony_ci * there's no need to signal. 64762306a36Sopenharmony_ci */ 64862306a36Sopenharmony_ci if (curr_write_sz <= pending_sz) 64962306a36Sopenharmony_ci return; 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci ++channel->intr_in_full; 65262306a36Sopenharmony_ci vmbus_setevent(channel); 65362306a36Sopenharmony_ci} 65462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(hv_pkt_iter_close); 655