18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Copyright (c) 2009, Microsoft Corporation.
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Authors:
78c2ecf20Sopenharmony_ci *   Haiyang Zhang <haiyangz@microsoft.com>
88c2ecf20Sopenharmony_ci *   Hank Janssen  <hjanssen@microsoft.com>
98c2ecf20Sopenharmony_ci *   K. Y. Srinivasan <kys@microsoft.com>
108c2ecf20Sopenharmony_ci */
118c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include <linux/kernel.h>
148c2ecf20Sopenharmony_ci#include <linux/mm.h>
158c2ecf20Sopenharmony_ci#include <linux/hyperv.h>
168c2ecf20Sopenharmony_ci#include <linux/uio.h>
178c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
188c2ecf20Sopenharmony_ci#include <linux/slab.h>
198c2ecf20Sopenharmony_ci#include <linux/prefetch.h>
208c2ecf20Sopenharmony_ci
218c2ecf20Sopenharmony_ci#include "hyperv_vmbus.h"
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ci#define VMBUS_PKT_TRAILER	8
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_ci/*
268c2ecf20Sopenharmony_ci * When we write to the ring buffer, check if the host needs to
278c2ecf20Sopenharmony_ci * be signaled. Here is the details of this protocol:
288c2ecf20Sopenharmony_ci *
298c2ecf20Sopenharmony_ci *	1. The host guarantees that while it is draining the
308c2ecf20Sopenharmony_ci *	   ring buffer, it will set the interrupt_mask to
318c2ecf20Sopenharmony_ci *	   indicate it does not need to be interrupted when
328c2ecf20Sopenharmony_ci *	   new data is placed.
338c2ecf20Sopenharmony_ci *
348c2ecf20Sopenharmony_ci *	2. The host guarantees that it will completely drain
358c2ecf20Sopenharmony_ci *	   the ring buffer before exiting the read loop. Further,
368c2ecf20Sopenharmony_ci *	   once the ring buffer is empty, it will clear the
378c2ecf20Sopenharmony_ci *	   interrupt_mask and re-check to see if new data has
388c2ecf20Sopenharmony_ci *	   arrived.
398c2ecf20Sopenharmony_ci *
408c2ecf20Sopenharmony_ci * KYS: Oct. 30, 2016:
418c2ecf20Sopenharmony_ci * It looks like Windows hosts have logic to deal with DOS attacks that
428c2ecf20Sopenharmony_ci * can be triggered if it receives interrupts when it is not expecting
438c2ecf20Sopenharmony_ci * the interrupt. The host expects interrupts only when the ring
448c2ecf20Sopenharmony_ci * transitions from empty to non-empty (or full to non full on the guest
458c2ecf20Sopenharmony_ci * to host ring).
468c2ecf20Sopenharmony_ci * So, base the signaling decision solely on the ring state until the
478c2ecf20Sopenharmony_ci * host logic is fixed.
488c2ecf20Sopenharmony_ci */
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_cistatic void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel)
518c2ecf20Sopenharmony_ci{
528c2ecf20Sopenharmony_ci	struct hv_ring_buffer_info *rbi = &channel->outbound;
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci	virt_mb();
558c2ecf20Sopenharmony_ci	if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
568c2ecf20Sopenharmony_ci		return;
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	/* check interrupt_mask before read_index */
598c2ecf20Sopenharmony_ci	virt_rmb();
608c2ecf20Sopenharmony_ci	/*
618c2ecf20Sopenharmony_ci	 * This is the only case we need to signal when the
628c2ecf20Sopenharmony_ci	 * ring transitions from being empty to non-empty.
638c2ecf20Sopenharmony_ci	 */
648c2ecf20Sopenharmony_ci	if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) {
658c2ecf20Sopenharmony_ci		++channel->intr_out_empty;
668c2ecf20Sopenharmony_ci		vmbus_setevent(channel);
678c2ecf20Sopenharmony_ci	}
688c2ecf20Sopenharmony_ci}
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci/* Get the next write location for the specified ring buffer. */
718c2ecf20Sopenharmony_cistatic inline u32
728c2ecf20Sopenharmony_cihv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
738c2ecf20Sopenharmony_ci{
748c2ecf20Sopenharmony_ci	u32 next = ring_info->ring_buffer->write_index;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	return next;
778c2ecf20Sopenharmony_ci}
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci/* Set the next write location for the specified ring buffer. */
808c2ecf20Sopenharmony_cistatic inline void
818c2ecf20Sopenharmony_cihv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
828c2ecf20Sopenharmony_ci		     u32 next_write_location)
838c2ecf20Sopenharmony_ci{
848c2ecf20Sopenharmony_ci	ring_info->ring_buffer->write_index = next_write_location;
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci/* Set the next read location for the specified ring buffer. */
888c2ecf20Sopenharmony_cistatic inline void
898c2ecf20Sopenharmony_cihv_set_next_read_location(struct hv_ring_buffer_info *ring_info,
908c2ecf20Sopenharmony_ci		    u32 next_read_location)
918c2ecf20Sopenharmony_ci{
928c2ecf20Sopenharmony_ci	ring_info->ring_buffer->read_index = next_read_location;
938c2ecf20Sopenharmony_ci	ring_info->priv_read_index = next_read_location;
948c2ecf20Sopenharmony_ci}
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci/* Get the size of the ring buffer. */
978c2ecf20Sopenharmony_cistatic inline u32
988c2ecf20Sopenharmony_cihv_get_ring_buffersize(const struct hv_ring_buffer_info *ring_info)
998c2ecf20Sopenharmony_ci{
1008c2ecf20Sopenharmony_ci	return ring_info->ring_datasize;
1018c2ecf20Sopenharmony_ci}
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci/* Get the read and write indices as u64 of the specified ring buffer. */
1048c2ecf20Sopenharmony_cistatic inline u64
1058c2ecf20Sopenharmony_cihv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
1068c2ecf20Sopenharmony_ci{
1078c2ecf20Sopenharmony_ci	return (u64)ring_info->ring_buffer->write_index << 32;
1088c2ecf20Sopenharmony_ci}
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci/*
1118c2ecf20Sopenharmony_ci * Helper routine to copy from source to ring buffer.
1128c2ecf20Sopenharmony_ci * Assume there is enough room. Handles wrap-around in dest case only!!
1138c2ecf20Sopenharmony_ci */
1148c2ecf20Sopenharmony_cistatic u32 hv_copyto_ringbuffer(
1158c2ecf20Sopenharmony_ci	struct hv_ring_buffer_info	*ring_info,
1168c2ecf20Sopenharmony_ci	u32				start_write_offset,
1178c2ecf20Sopenharmony_ci	const void			*src,
1188c2ecf20Sopenharmony_ci	u32				srclen)
1198c2ecf20Sopenharmony_ci{
1208c2ecf20Sopenharmony_ci	void *ring_buffer = hv_get_ring_buffer(ring_info);
1218c2ecf20Sopenharmony_ci	u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	memcpy(ring_buffer + start_write_offset, src, srclen);
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ci	start_write_offset += srclen;
1268c2ecf20Sopenharmony_ci	if (start_write_offset >= ring_buffer_size)
1278c2ecf20Sopenharmony_ci		start_write_offset -= ring_buffer_size;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	return start_write_offset;
1308c2ecf20Sopenharmony_ci}
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci/*
1338c2ecf20Sopenharmony_ci *
1348c2ecf20Sopenharmony_ci * hv_get_ringbuffer_availbytes()
1358c2ecf20Sopenharmony_ci *
1368c2ecf20Sopenharmony_ci * Get number of bytes available to read and to write to
1378c2ecf20Sopenharmony_ci * for the specified ring buffer
1388c2ecf20Sopenharmony_ci */
1398c2ecf20Sopenharmony_cistatic void
1408c2ecf20Sopenharmony_cihv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
1418c2ecf20Sopenharmony_ci			     u32 *read, u32 *write)
1428c2ecf20Sopenharmony_ci{
1438c2ecf20Sopenharmony_ci	u32 read_loc, write_loc, dsize;
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	/* Capture the read/write indices before they changed */
1468c2ecf20Sopenharmony_ci	read_loc = READ_ONCE(rbi->ring_buffer->read_index);
1478c2ecf20Sopenharmony_ci	write_loc = READ_ONCE(rbi->ring_buffer->write_index);
1488c2ecf20Sopenharmony_ci	dsize = rbi->ring_datasize;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	*write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
1518c2ecf20Sopenharmony_ci		read_loc - write_loc;
1528c2ecf20Sopenharmony_ci	*read = dsize - *write;
1538c2ecf20Sopenharmony_ci}
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci/* Get various debug metrics for the specified ring buffer. */
1568c2ecf20Sopenharmony_ciint hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
1578c2ecf20Sopenharmony_ci				struct hv_ring_buffer_debug_info *debug_info)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	u32 bytes_avail_towrite;
1608c2ecf20Sopenharmony_ci	u32 bytes_avail_toread;
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci	mutex_lock(&ring_info->ring_buffer_mutex);
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci	if (!ring_info->ring_buffer) {
1658c2ecf20Sopenharmony_ci		mutex_unlock(&ring_info->ring_buffer_mutex);
1668c2ecf20Sopenharmony_ci		return -EINVAL;
1678c2ecf20Sopenharmony_ci	}
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci	hv_get_ringbuffer_availbytes(ring_info,
1708c2ecf20Sopenharmony_ci				     &bytes_avail_toread,
1718c2ecf20Sopenharmony_ci				     &bytes_avail_towrite);
1728c2ecf20Sopenharmony_ci	debug_info->bytes_avail_toread = bytes_avail_toread;
1738c2ecf20Sopenharmony_ci	debug_info->bytes_avail_towrite = bytes_avail_towrite;
1748c2ecf20Sopenharmony_ci	debug_info->current_read_index = ring_info->ring_buffer->read_index;
1758c2ecf20Sopenharmony_ci	debug_info->current_write_index = ring_info->ring_buffer->write_index;
1768c2ecf20Sopenharmony_ci	debug_info->current_interrupt_mask
1778c2ecf20Sopenharmony_ci		= ring_info->ring_buffer->interrupt_mask;
1788c2ecf20Sopenharmony_ci	mutex_unlock(&ring_info->ring_buffer_mutex);
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	return 0;
1818c2ecf20Sopenharmony_ci}
1828c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo);
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci/* Initialize a channel's ring buffer info mutex locks */
1858c2ecf20Sopenharmony_civoid hv_ringbuffer_pre_init(struct vmbus_channel *channel)
1868c2ecf20Sopenharmony_ci{
1878c2ecf20Sopenharmony_ci	mutex_init(&channel->inbound.ring_buffer_mutex);
1888c2ecf20Sopenharmony_ci	mutex_init(&channel->outbound.ring_buffer_mutex);
1898c2ecf20Sopenharmony_ci}
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci/* Initialize the ring buffer. */
1928c2ecf20Sopenharmony_ciint hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
1938c2ecf20Sopenharmony_ci		       struct page *pages, u32 page_cnt)
1948c2ecf20Sopenharmony_ci{
1958c2ecf20Sopenharmony_ci	int i;
1968c2ecf20Sopenharmony_ci	struct page **pages_wraparound;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	/*
2018c2ecf20Sopenharmony_ci	 * First page holds struct hv_ring_buffer, do wraparound mapping for
2028c2ecf20Sopenharmony_ci	 * the rest.
2038c2ecf20Sopenharmony_ci	 */
2048c2ecf20Sopenharmony_ci	pages_wraparound = kcalloc(page_cnt * 2 - 1, sizeof(struct page *),
2058c2ecf20Sopenharmony_ci				   GFP_KERNEL);
2068c2ecf20Sopenharmony_ci	if (!pages_wraparound)
2078c2ecf20Sopenharmony_ci		return -ENOMEM;
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci	pages_wraparound[0] = pages;
2108c2ecf20Sopenharmony_ci	for (i = 0; i < 2 * (page_cnt - 1); i++)
2118c2ecf20Sopenharmony_ci		pages_wraparound[i + 1] = &pages[i % (page_cnt - 1) + 1];
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	ring_info->ring_buffer = (struct hv_ring_buffer *)
2148c2ecf20Sopenharmony_ci		vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP, PAGE_KERNEL);
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci	kfree(pages_wraparound);
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	if (!ring_info->ring_buffer)
2208c2ecf20Sopenharmony_ci		return -ENOMEM;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	ring_info->ring_buffer->read_index =
2238c2ecf20Sopenharmony_ci		ring_info->ring_buffer->write_index = 0;
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	/* Set the feature bit for enabling flow control. */
2268c2ecf20Sopenharmony_ci	ring_info->ring_buffer->feature_bits.value = 1;
2278c2ecf20Sopenharmony_ci
2288c2ecf20Sopenharmony_ci	ring_info->ring_size = page_cnt << PAGE_SHIFT;
2298c2ecf20Sopenharmony_ci	ring_info->ring_size_div10_reciprocal =
2308c2ecf20Sopenharmony_ci		reciprocal_value(ring_info->ring_size / 10);
2318c2ecf20Sopenharmony_ci	ring_info->ring_datasize = ring_info->ring_size -
2328c2ecf20Sopenharmony_ci		sizeof(struct hv_ring_buffer);
2338c2ecf20Sopenharmony_ci	ring_info->priv_read_index = 0;
2348c2ecf20Sopenharmony_ci
2358c2ecf20Sopenharmony_ci	spin_lock_init(&ring_info->ring_lock);
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	return 0;
2388c2ecf20Sopenharmony_ci}
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci/* Cleanup the ring buffer. */
2418c2ecf20Sopenharmony_civoid hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
2428c2ecf20Sopenharmony_ci{
2438c2ecf20Sopenharmony_ci	mutex_lock(&ring_info->ring_buffer_mutex);
2448c2ecf20Sopenharmony_ci	vunmap(ring_info->ring_buffer);
2458c2ecf20Sopenharmony_ci	ring_info->ring_buffer = NULL;
2468c2ecf20Sopenharmony_ci	mutex_unlock(&ring_info->ring_buffer_mutex);
2478c2ecf20Sopenharmony_ci}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci/*
2508c2ecf20Sopenharmony_ci * Check if the ring buffer spinlock is available to take or not; used on
2518c2ecf20Sopenharmony_ci * atomic contexts, like panic path (see the Hyper-V framebuffer driver).
2528c2ecf20Sopenharmony_ci */
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_cibool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel)
2558c2ecf20Sopenharmony_ci{
2568c2ecf20Sopenharmony_ci	struct hv_ring_buffer_info *rinfo = &channel->outbound;
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	return spin_is_locked(&rinfo->ring_lock);
2598c2ecf20Sopenharmony_ci}
2608c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy);
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci/* Write to the ring buffer. */
2638c2ecf20Sopenharmony_ciint hv_ringbuffer_write(struct vmbus_channel *channel,
2648c2ecf20Sopenharmony_ci			const struct kvec *kv_list, u32 kv_count)
2658c2ecf20Sopenharmony_ci{
2668c2ecf20Sopenharmony_ci	int i;
2678c2ecf20Sopenharmony_ci	u32 bytes_avail_towrite;
2688c2ecf20Sopenharmony_ci	u32 totalbytes_towrite = sizeof(u64);
2698c2ecf20Sopenharmony_ci	u32 next_write_location;
2708c2ecf20Sopenharmony_ci	u32 old_write;
2718c2ecf20Sopenharmony_ci	u64 prev_indices;
2728c2ecf20Sopenharmony_ci	unsigned long flags;
2738c2ecf20Sopenharmony_ci	struct hv_ring_buffer_info *outring_info = &channel->outbound;
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	if (channel->rescind)
2768c2ecf20Sopenharmony_ci		return -ENODEV;
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_ci	for (i = 0; i < kv_count; i++)
2798c2ecf20Sopenharmony_ci		totalbytes_towrite += kv_list[i].iov_len;
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci	spin_lock_irqsave(&outring_info->ring_lock, flags);
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	bytes_avail_towrite = hv_get_bytes_to_write(outring_info);
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci	/*
2868c2ecf20Sopenharmony_ci	 * If there is only room for the packet, assume it is full.
2878c2ecf20Sopenharmony_ci	 * Otherwise, the next time around, we think the ring buffer
2888c2ecf20Sopenharmony_ci	 * is empty since the read index == write index.
2898c2ecf20Sopenharmony_ci	 */
2908c2ecf20Sopenharmony_ci	if (bytes_avail_towrite <= totalbytes_towrite) {
2918c2ecf20Sopenharmony_ci		++channel->out_full_total;
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci		if (!channel->out_full_flag) {
2948c2ecf20Sopenharmony_ci			++channel->out_full_first;
2958c2ecf20Sopenharmony_ci			channel->out_full_flag = true;
2968c2ecf20Sopenharmony_ci		}
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&outring_info->ring_lock, flags);
2998c2ecf20Sopenharmony_ci		return -EAGAIN;
3008c2ecf20Sopenharmony_ci	}
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	channel->out_full_flag = false;
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci	/* Write to the ring buffer */
3058c2ecf20Sopenharmony_ci	next_write_location = hv_get_next_write_location(outring_info);
3068c2ecf20Sopenharmony_ci
3078c2ecf20Sopenharmony_ci	old_write = next_write_location;
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_ci	for (i = 0; i < kv_count; i++) {
3108c2ecf20Sopenharmony_ci		next_write_location = hv_copyto_ringbuffer(outring_info,
3118c2ecf20Sopenharmony_ci						     next_write_location,
3128c2ecf20Sopenharmony_ci						     kv_list[i].iov_base,
3138c2ecf20Sopenharmony_ci						     kv_list[i].iov_len);
3148c2ecf20Sopenharmony_ci	}
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	/* Set previous packet start */
3178c2ecf20Sopenharmony_ci	prev_indices = hv_get_ring_bufferindices(outring_info);
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	next_write_location = hv_copyto_ringbuffer(outring_info,
3208c2ecf20Sopenharmony_ci					     next_write_location,
3218c2ecf20Sopenharmony_ci					     &prev_indices,
3228c2ecf20Sopenharmony_ci					     sizeof(u64));
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	/* Issue a full memory barrier before updating the write index */
3258c2ecf20Sopenharmony_ci	virt_mb();
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	/* Now, update the write location */
3288c2ecf20Sopenharmony_ci	hv_set_next_write_location(outring_info, next_write_location);
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&outring_info->ring_lock, flags);
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ci	hv_signal_on_write(old_write, channel);
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci	if (channel->rescind)
3368c2ecf20Sopenharmony_ci		return -ENODEV;
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci	return 0;
3398c2ecf20Sopenharmony_ci}
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ciint hv_ringbuffer_read(struct vmbus_channel *channel,
3428c2ecf20Sopenharmony_ci		       void *buffer, u32 buflen, u32 *buffer_actual_len,
3438c2ecf20Sopenharmony_ci		       u64 *requestid, bool raw)
3448c2ecf20Sopenharmony_ci{
3458c2ecf20Sopenharmony_ci	struct vmpacket_descriptor *desc;
3468c2ecf20Sopenharmony_ci	u32 packetlen, offset;
3478c2ecf20Sopenharmony_ci
3488c2ecf20Sopenharmony_ci	if (unlikely(buflen == 0))
3498c2ecf20Sopenharmony_ci		return -EINVAL;
3508c2ecf20Sopenharmony_ci
3518c2ecf20Sopenharmony_ci	*buffer_actual_len = 0;
3528c2ecf20Sopenharmony_ci	*requestid = 0;
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	/* Make sure there is something to read */
3558c2ecf20Sopenharmony_ci	desc = hv_pkt_iter_first(channel);
3568c2ecf20Sopenharmony_ci	if (desc == NULL) {
3578c2ecf20Sopenharmony_ci		/*
3588c2ecf20Sopenharmony_ci		 * No error is set when there is even no header, drivers are
3598c2ecf20Sopenharmony_ci		 * supposed to analyze buffer_actual_len.
3608c2ecf20Sopenharmony_ci		 */
3618c2ecf20Sopenharmony_ci		return 0;
3628c2ecf20Sopenharmony_ci	}
3638c2ecf20Sopenharmony_ci
3648c2ecf20Sopenharmony_ci	offset = raw ? 0 : (desc->offset8 << 3);
3658c2ecf20Sopenharmony_ci	packetlen = (desc->len8 << 3) - offset;
3668c2ecf20Sopenharmony_ci	*buffer_actual_len = packetlen;
3678c2ecf20Sopenharmony_ci	*requestid = desc->trans_id;
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci	if (unlikely(packetlen > buflen))
3708c2ecf20Sopenharmony_ci		return -ENOBUFS;
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci	/* since ring is double mapped, only one copy is necessary */
3738c2ecf20Sopenharmony_ci	memcpy(buffer, (const char *)desc + offset, packetlen);
3748c2ecf20Sopenharmony_ci
3758c2ecf20Sopenharmony_ci	/* Advance ring index to next packet descriptor */
3768c2ecf20Sopenharmony_ci	__hv_pkt_iter_next(channel, desc);
3778c2ecf20Sopenharmony_ci
3788c2ecf20Sopenharmony_ci	/* Notify host of update */
3798c2ecf20Sopenharmony_ci	hv_pkt_iter_close(channel);
3808c2ecf20Sopenharmony_ci
3818c2ecf20Sopenharmony_ci	return 0;
3828c2ecf20Sopenharmony_ci}
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci/*
3858c2ecf20Sopenharmony_ci * Determine number of bytes available in ring buffer after
3868c2ecf20Sopenharmony_ci * the current iterator (priv_read_index) location.
3878c2ecf20Sopenharmony_ci *
3888c2ecf20Sopenharmony_ci * This is similar to hv_get_bytes_to_read but with private
3898c2ecf20Sopenharmony_ci * read index instead.
3908c2ecf20Sopenharmony_ci */
3918c2ecf20Sopenharmony_cistatic u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
3928c2ecf20Sopenharmony_ci{
3938c2ecf20Sopenharmony_ci	u32 priv_read_loc = rbi->priv_read_index;
3948c2ecf20Sopenharmony_ci	u32 write_loc;
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci	/*
3978c2ecf20Sopenharmony_ci	 * The Hyper-V host writes the packet data, then uses
3988c2ecf20Sopenharmony_ci	 * store_release() to update the write_index.  Use load_acquire()
3998c2ecf20Sopenharmony_ci	 * here to prevent loads of the packet data from being re-ordered
4008c2ecf20Sopenharmony_ci	 * before the read of the write_index and potentially getting
4018c2ecf20Sopenharmony_ci	 * stale data.
4028c2ecf20Sopenharmony_ci	 */
4038c2ecf20Sopenharmony_ci	write_loc = virt_load_acquire(&rbi->ring_buffer->write_index);
4048c2ecf20Sopenharmony_ci
4058c2ecf20Sopenharmony_ci	if (write_loc >= priv_read_loc)
4068c2ecf20Sopenharmony_ci		return write_loc - priv_read_loc;
4078c2ecf20Sopenharmony_ci	else
4088c2ecf20Sopenharmony_ci		return (rbi->ring_datasize - priv_read_loc) + write_loc;
4098c2ecf20Sopenharmony_ci}
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci/*
4128c2ecf20Sopenharmony_ci * Get first vmbus packet from ring buffer after read_index
4138c2ecf20Sopenharmony_ci *
4148c2ecf20Sopenharmony_ci * If ring buffer is empty, returns NULL and no other action needed.
4158c2ecf20Sopenharmony_ci */
4168c2ecf20Sopenharmony_cistruct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
4178c2ecf20Sopenharmony_ci{
4188c2ecf20Sopenharmony_ci	struct hv_ring_buffer_info *rbi = &channel->inbound;
4198c2ecf20Sopenharmony_ci	struct vmpacket_descriptor *desc;
4208c2ecf20Sopenharmony_ci
4218c2ecf20Sopenharmony_ci	hv_debug_delay_test(channel, MESSAGE_DELAY);
4228c2ecf20Sopenharmony_ci	if (hv_pkt_iter_avail(rbi) < sizeof(struct vmpacket_descriptor))
4238c2ecf20Sopenharmony_ci		return NULL;
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	desc = hv_get_ring_buffer(rbi) + rbi->priv_read_index;
4268c2ecf20Sopenharmony_ci	if (desc)
4278c2ecf20Sopenharmony_ci		prefetch((char *)desc + (desc->len8 << 3));
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci	return desc;
4308c2ecf20Sopenharmony_ci}
4318c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hv_pkt_iter_first);
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci/*
4348c2ecf20Sopenharmony_ci * Get next vmbus packet from ring buffer.
4358c2ecf20Sopenharmony_ci *
4368c2ecf20Sopenharmony_ci * Advances the current location (priv_read_index) and checks for more
4378c2ecf20Sopenharmony_ci * data. If the end of the ring buffer is reached, then return NULL.
4388c2ecf20Sopenharmony_ci */
4398c2ecf20Sopenharmony_cistruct vmpacket_descriptor *
4408c2ecf20Sopenharmony_ci__hv_pkt_iter_next(struct vmbus_channel *channel,
4418c2ecf20Sopenharmony_ci		   const struct vmpacket_descriptor *desc)
4428c2ecf20Sopenharmony_ci{
4438c2ecf20Sopenharmony_ci	struct hv_ring_buffer_info *rbi = &channel->inbound;
4448c2ecf20Sopenharmony_ci	u32 packetlen = desc->len8 << 3;
4458c2ecf20Sopenharmony_ci	u32 dsize = rbi->ring_datasize;
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci	hv_debug_delay_test(channel, MESSAGE_DELAY);
4488c2ecf20Sopenharmony_ci	/* bump offset to next potential packet */
4498c2ecf20Sopenharmony_ci	rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
4508c2ecf20Sopenharmony_ci	if (rbi->priv_read_index >= dsize)
4518c2ecf20Sopenharmony_ci		rbi->priv_read_index -= dsize;
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci	/* more data? */
4548c2ecf20Sopenharmony_ci	return hv_pkt_iter_first(channel);
4558c2ecf20Sopenharmony_ci}
4568c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci/* How many bytes were read in this iterator cycle */
4598c2ecf20Sopenharmony_cistatic u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi,
4608c2ecf20Sopenharmony_ci					u32 start_read_index)
4618c2ecf20Sopenharmony_ci{
4628c2ecf20Sopenharmony_ci	if (rbi->priv_read_index >= start_read_index)
4638c2ecf20Sopenharmony_ci		return rbi->priv_read_index - start_read_index;
4648c2ecf20Sopenharmony_ci	else
4658c2ecf20Sopenharmony_ci		return rbi->ring_datasize - start_read_index +
4668c2ecf20Sopenharmony_ci			rbi->priv_read_index;
4678c2ecf20Sopenharmony_ci}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci/*
4708c2ecf20Sopenharmony_ci * Update host ring buffer after iterating over packets. If the host has
4718c2ecf20Sopenharmony_ci * stopped queuing new entries because it found the ring buffer full, and
4728c2ecf20Sopenharmony_ci * sufficient space is being freed up, signal the host. But be careful to
4738c2ecf20Sopenharmony_ci * only signal the host when necessary, both for performance reasons and
4748c2ecf20Sopenharmony_ci * because Hyper-V protects itself by throttling guests that signal
4758c2ecf20Sopenharmony_ci * inappropriately.
4768c2ecf20Sopenharmony_ci *
4778c2ecf20Sopenharmony_ci * Determining when to signal is tricky. There are three key data inputs
4788c2ecf20Sopenharmony_ci * that must be handled in this order to avoid race conditions:
4798c2ecf20Sopenharmony_ci *
4808c2ecf20Sopenharmony_ci * 1. Update the read_index
4818c2ecf20Sopenharmony_ci * 2. Read the pending_send_sz
4828c2ecf20Sopenharmony_ci * 3. Read the current write_index
4838c2ecf20Sopenharmony_ci *
4848c2ecf20Sopenharmony_ci * The interrupt_mask is not used to determine when to signal. The
4858c2ecf20Sopenharmony_ci * interrupt_mask is used only on the guest->host ring buffer when
4868c2ecf20Sopenharmony_ci * sending requests to the host. The host does not use it on the host->
4878c2ecf20Sopenharmony_ci * guest ring buffer to indicate whether it should be signaled.
4888c2ecf20Sopenharmony_ci */
4898c2ecf20Sopenharmony_civoid hv_pkt_iter_close(struct vmbus_channel *channel)
4908c2ecf20Sopenharmony_ci{
4918c2ecf20Sopenharmony_ci	struct hv_ring_buffer_info *rbi = &channel->inbound;
4928c2ecf20Sopenharmony_ci	u32 curr_write_sz, pending_sz, bytes_read, start_read_index;
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ci	/*
4958c2ecf20Sopenharmony_ci	 * Make sure all reads are done before we update the read index since
4968c2ecf20Sopenharmony_ci	 * the writer may start writing to the read area once the read index
4978c2ecf20Sopenharmony_ci	 * is updated.
4988c2ecf20Sopenharmony_ci	 */
4998c2ecf20Sopenharmony_ci	virt_rmb();
5008c2ecf20Sopenharmony_ci	start_read_index = rbi->ring_buffer->read_index;
5018c2ecf20Sopenharmony_ci	rbi->ring_buffer->read_index = rbi->priv_read_index;
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci	/*
5048c2ecf20Sopenharmony_ci	 * Older versions of Hyper-V (before WS2102 and Win8) do not
5058c2ecf20Sopenharmony_ci	 * implement pending_send_sz and simply poll if the host->guest
5068c2ecf20Sopenharmony_ci	 * ring buffer is full.  No signaling is needed or expected.
5078c2ecf20Sopenharmony_ci	 */
5088c2ecf20Sopenharmony_ci	if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz)
5098c2ecf20Sopenharmony_ci		return;
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci	/*
5128c2ecf20Sopenharmony_ci	 * Issue a full memory barrier before making the signaling decision.
5138c2ecf20Sopenharmony_ci	 * If reading pending_send_sz were to be reordered and happen
5148c2ecf20Sopenharmony_ci	 * before we commit the new read_index, a race could occur.  If the
5158c2ecf20Sopenharmony_ci	 * host were to set the pending_send_sz after we have sampled
5168c2ecf20Sopenharmony_ci	 * pending_send_sz, and the ring buffer blocks before we commit the
5178c2ecf20Sopenharmony_ci	 * read index, we could miss sending the interrupt. Issue a full
5188c2ecf20Sopenharmony_ci	 * memory barrier to address this.
5198c2ecf20Sopenharmony_ci	 */
5208c2ecf20Sopenharmony_ci	virt_mb();
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci	/*
5238c2ecf20Sopenharmony_ci	 * If the pending_send_sz is zero, then the ring buffer is not
5248c2ecf20Sopenharmony_ci	 * blocked and there is no need to signal.  This is far by the
5258c2ecf20Sopenharmony_ci	 * most common case, so exit quickly for best performance.
5268c2ecf20Sopenharmony_ci	 */
5278c2ecf20Sopenharmony_ci	pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
5288c2ecf20Sopenharmony_ci	if (!pending_sz)
5298c2ecf20Sopenharmony_ci		return;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	/*
5328c2ecf20Sopenharmony_ci	 * Ensure the read of write_index in hv_get_bytes_to_write()
5338c2ecf20Sopenharmony_ci	 * happens after the read of pending_send_sz.
5348c2ecf20Sopenharmony_ci	 */
5358c2ecf20Sopenharmony_ci	virt_rmb();
5368c2ecf20Sopenharmony_ci	curr_write_sz = hv_get_bytes_to_write(rbi);
5378c2ecf20Sopenharmony_ci	bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index);
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	/*
5408c2ecf20Sopenharmony_ci	 * We want to signal the host only if we're transitioning
5418c2ecf20Sopenharmony_ci	 * from a "not enough free space" state to a "enough free
5428c2ecf20Sopenharmony_ci	 * space" state.  For example, it's possible that this function
5438c2ecf20Sopenharmony_ci	 * could run and free up enough space to signal the host, and then
5448c2ecf20Sopenharmony_ci	 * run again and free up additional space before the host has a
5458c2ecf20Sopenharmony_ci	 * chance to clear the pending_send_sz.  The 2nd invocation would
5468c2ecf20Sopenharmony_ci	 * be a null transition from "enough free space" to "enough free
5478c2ecf20Sopenharmony_ci	 * space", which doesn't warrant a signal.
5488c2ecf20Sopenharmony_ci	 *
5498c2ecf20Sopenharmony_ci	 * Exactly filling the ring buffer is treated as "not enough
5508c2ecf20Sopenharmony_ci	 * space". The ring buffer always must have at least one byte
5518c2ecf20Sopenharmony_ci	 * empty so the empty and full conditions are distinguishable.
5528c2ecf20Sopenharmony_ci	 * hv_get_bytes_to_write() doesn't fully tell the truth in
5538c2ecf20Sopenharmony_ci	 * this regard.
5548c2ecf20Sopenharmony_ci	 *
5558c2ecf20Sopenharmony_ci	 * So first check if we were in the "enough free space" state
5568c2ecf20Sopenharmony_ci	 * before we began the iteration. If so, the host was not
5578c2ecf20Sopenharmony_ci	 * blocked, and there's no need to signal.
5588c2ecf20Sopenharmony_ci	 */
5598c2ecf20Sopenharmony_ci	if (curr_write_sz - bytes_read > pending_sz)
5608c2ecf20Sopenharmony_ci		return;
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	/*
5638c2ecf20Sopenharmony_ci	 * Similarly, if the new state is "not enough space", then
5648c2ecf20Sopenharmony_ci	 * there's no need to signal.
5658c2ecf20Sopenharmony_ci	 */
5668c2ecf20Sopenharmony_ci	if (curr_write_sz <= pending_sz)
5678c2ecf20Sopenharmony_ci		return;
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci	++channel->intr_in_full;
5708c2ecf20Sopenharmony_ci	vmbus_setevent(channel);
5718c2ecf20Sopenharmony_ci}
5728c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(hv_pkt_iter_close);
573