18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2016 Red Hat, Inc.
48c2ecf20Sopenharmony_ci * Author: Michael S. Tsirkin <mst@redhat.com>
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
78c2ecf20Sopenharmony_ci * signalling, unconditionally.
88c2ecf20Sopenharmony_ci */
98c2ecf20Sopenharmony_ci#define _GNU_SOURCE
108c2ecf20Sopenharmony_ci#include "main.h"
118c2ecf20Sopenharmony_ci#include <stdlib.h>
128c2ecf20Sopenharmony_ci#include <stdio.h>
138c2ecf20Sopenharmony_ci#include <string.h>
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci/* Next - Where next entry will be written.
168c2ecf20Sopenharmony_ci * Prev - "Next" value when event triggered previously.
178c2ecf20Sopenharmony_ci * Event - Peer requested event after writing this entry.
188c2ecf20Sopenharmony_ci */
198c2ecf20Sopenharmony_cistatic inline bool need_event(unsigned short event,
208c2ecf20Sopenharmony_ci			      unsigned short next,
218c2ecf20Sopenharmony_ci			      unsigned short prev)
228c2ecf20Sopenharmony_ci{
238c2ecf20Sopenharmony_ci	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
248c2ecf20Sopenharmony_ci}
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci/* Design:
278c2ecf20Sopenharmony_ci * Guest adds descriptors with unique index values and DESC_HW in flags.
288c2ecf20Sopenharmony_ci * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
298c2ecf20Sopenharmony_ci * Flags are always set last.
308c2ecf20Sopenharmony_ci */
318c2ecf20Sopenharmony_ci#define DESC_HW 0x1
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_cistruct desc {
348c2ecf20Sopenharmony_ci	unsigned short flags;
358c2ecf20Sopenharmony_ci	unsigned short index;
368c2ecf20Sopenharmony_ci	unsigned len;
378c2ecf20Sopenharmony_ci	unsigned long long addr;
388c2ecf20Sopenharmony_ci};
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci/* how much padding is needed to avoid false cache sharing */
418c2ecf20Sopenharmony_ci#define HOST_GUEST_PADDING 0x80
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_ci/* Mostly read */
448c2ecf20Sopenharmony_cistruct event {
458c2ecf20Sopenharmony_ci	unsigned short kick_index;
468c2ecf20Sopenharmony_ci	unsigned char reserved0[HOST_GUEST_PADDING - 2];
478c2ecf20Sopenharmony_ci	unsigned short call_index;
488c2ecf20Sopenharmony_ci	unsigned char reserved1[HOST_GUEST_PADDING - 2];
498c2ecf20Sopenharmony_ci};
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_cistruct data {
528c2ecf20Sopenharmony_ci	void *buf; /* descriptor is writeable, we can't get buf from there */
538c2ecf20Sopenharmony_ci	void *data;
548c2ecf20Sopenharmony_ci} *data;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_cistruct desc *ring;
578c2ecf20Sopenharmony_cistruct event *event;
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_cistruct guest {
608c2ecf20Sopenharmony_ci	unsigned avail_idx;
618c2ecf20Sopenharmony_ci	unsigned last_used_idx;
628c2ecf20Sopenharmony_ci	unsigned num_free;
638c2ecf20Sopenharmony_ci	unsigned kicked_avail_idx;
648c2ecf20Sopenharmony_ci	unsigned char reserved[HOST_GUEST_PADDING - 12];
658c2ecf20Sopenharmony_ci} guest;
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_cistruct host {
688c2ecf20Sopenharmony_ci	/* we do not need to track last avail index
698c2ecf20Sopenharmony_ci	 * unless we have more than one in flight.
708c2ecf20Sopenharmony_ci	 */
718c2ecf20Sopenharmony_ci	unsigned used_idx;
728c2ecf20Sopenharmony_ci	unsigned called_used_idx;
738c2ecf20Sopenharmony_ci	unsigned char reserved[HOST_GUEST_PADDING - 4];
748c2ecf20Sopenharmony_ci} host;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci/* implemented by ring */
778c2ecf20Sopenharmony_civoid alloc_ring(void)
788c2ecf20Sopenharmony_ci{
798c2ecf20Sopenharmony_ci	int ret;
808c2ecf20Sopenharmony_ci	int i;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
838c2ecf20Sopenharmony_ci	if (ret) {
848c2ecf20Sopenharmony_ci		perror("Unable to allocate ring buffer.\n");
858c2ecf20Sopenharmony_ci		exit(3);
868c2ecf20Sopenharmony_ci	}
878c2ecf20Sopenharmony_ci	event = calloc(1, sizeof(*event));
888c2ecf20Sopenharmony_ci	if (!event) {
898c2ecf20Sopenharmony_ci		perror("Unable to allocate event buffer.\n");
908c2ecf20Sopenharmony_ci		exit(3);
918c2ecf20Sopenharmony_ci	}
928c2ecf20Sopenharmony_ci	guest.avail_idx = 0;
938c2ecf20Sopenharmony_ci	guest.kicked_avail_idx = -1;
948c2ecf20Sopenharmony_ci	guest.last_used_idx = 0;
958c2ecf20Sopenharmony_ci	host.used_idx = 0;
968c2ecf20Sopenharmony_ci	host.called_used_idx = -1;
978c2ecf20Sopenharmony_ci	for (i = 0; i < ring_size; ++i) {
988c2ecf20Sopenharmony_ci		struct desc desc = {
998c2ecf20Sopenharmony_ci			.index = i,
1008c2ecf20Sopenharmony_ci		};
1018c2ecf20Sopenharmony_ci		ring[i] = desc;
1028c2ecf20Sopenharmony_ci	}
1038c2ecf20Sopenharmony_ci	guest.num_free = ring_size;
1048c2ecf20Sopenharmony_ci	data = calloc(ring_size, sizeof(*data));
1058c2ecf20Sopenharmony_ci	if (!data) {
1068c2ecf20Sopenharmony_ci		perror("Unable to allocate data buffer.\n");
1078c2ecf20Sopenharmony_ci		exit(3);
1088c2ecf20Sopenharmony_ci	}
1098c2ecf20Sopenharmony_ci}
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci/* guest side */
1128c2ecf20Sopenharmony_ciint add_inbuf(unsigned len, void *buf, void *datap)
1138c2ecf20Sopenharmony_ci{
1148c2ecf20Sopenharmony_ci	unsigned head, index;
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	if (!guest.num_free)
1178c2ecf20Sopenharmony_ci		return -1;
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ci	guest.num_free--;
1208c2ecf20Sopenharmony_ci	head = (ring_size - 1) & (guest.avail_idx++);
1218c2ecf20Sopenharmony_ci
1228c2ecf20Sopenharmony_ci	/* Start with a write. On MESI architectures this helps
1238c2ecf20Sopenharmony_ci	 * avoid a shared state with consumer that is polling this descriptor.
1248c2ecf20Sopenharmony_ci	 */
1258c2ecf20Sopenharmony_ci	ring[head].addr = (unsigned long)(void*)buf;
1268c2ecf20Sopenharmony_ci	ring[head].len = len;
1278c2ecf20Sopenharmony_ci	/* read below might bypass write above. That is OK because it's just an
1288c2ecf20Sopenharmony_ci	 * optimization. If this happens, we will get the cache line in a
1298c2ecf20Sopenharmony_ci	 * shared state which is unfortunate, but probably not worth it to
1308c2ecf20Sopenharmony_ci	 * add an explicit full barrier to avoid this.
1318c2ecf20Sopenharmony_ci	 */
1328c2ecf20Sopenharmony_ci	barrier();
1338c2ecf20Sopenharmony_ci	index = ring[head].index;
1348c2ecf20Sopenharmony_ci	data[index].buf = buf;
1358c2ecf20Sopenharmony_ci	data[index].data = datap;
1368c2ecf20Sopenharmony_ci	/* Barrier A (for pairing) */
1378c2ecf20Sopenharmony_ci	smp_release();
1388c2ecf20Sopenharmony_ci	ring[head].flags = DESC_HW;
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_ci	return 0;
1418c2ecf20Sopenharmony_ci}
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_civoid *get_buf(unsigned *lenp, void **bufp)
1448c2ecf20Sopenharmony_ci{
1458c2ecf20Sopenharmony_ci	unsigned head = (ring_size - 1) & guest.last_used_idx;
1468c2ecf20Sopenharmony_ci	unsigned index;
1478c2ecf20Sopenharmony_ci	void *datap;
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	if (ring[head].flags & DESC_HW)
1508c2ecf20Sopenharmony_ci		return NULL;
1518c2ecf20Sopenharmony_ci	/* Barrier B (for pairing) */
1528c2ecf20Sopenharmony_ci	smp_acquire();
1538c2ecf20Sopenharmony_ci	*lenp = ring[head].len;
1548c2ecf20Sopenharmony_ci	index = ring[head].index & (ring_size - 1);
1558c2ecf20Sopenharmony_ci	datap = data[index].data;
1568c2ecf20Sopenharmony_ci	*bufp = data[index].buf;
1578c2ecf20Sopenharmony_ci	data[index].buf = NULL;
1588c2ecf20Sopenharmony_ci	data[index].data = NULL;
1598c2ecf20Sopenharmony_ci	guest.num_free++;
1608c2ecf20Sopenharmony_ci	guest.last_used_idx++;
1618c2ecf20Sopenharmony_ci	return datap;
1628c2ecf20Sopenharmony_ci}
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_cibool used_empty()
1658c2ecf20Sopenharmony_ci{
1668c2ecf20Sopenharmony_ci	unsigned head = (ring_size - 1) & guest.last_used_idx;
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	return (ring[head].flags & DESC_HW);
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_civoid disable_call()
1728c2ecf20Sopenharmony_ci{
1738c2ecf20Sopenharmony_ci	/* Doing nothing to disable calls might cause
1748c2ecf20Sopenharmony_ci	 * extra interrupts, but reduces the number of cache misses.
1758c2ecf20Sopenharmony_ci	 */
1768c2ecf20Sopenharmony_ci}
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_cibool enable_call()
1798c2ecf20Sopenharmony_ci{
1808c2ecf20Sopenharmony_ci	event->call_index = guest.last_used_idx;
1818c2ecf20Sopenharmony_ci	/* Flush call index write */
1828c2ecf20Sopenharmony_ci	/* Barrier D (for pairing) */
1838c2ecf20Sopenharmony_ci	smp_mb();
1848c2ecf20Sopenharmony_ci	return used_empty();
1858c2ecf20Sopenharmony_ci}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_civoid kick_available(void)
1888c2ecf20Sopenharmony_ci{
1898c2ecf20Sopenharmony_ci	bool need;
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	/* Flush in previous flags write */
1928c2ecf20Sopenharmony_ci	/* Barrier C (for pairing) */
1938c2ecf20Sopenharmony_ci	smp_mb();
1948c2ecf20Sopenharmony_ci	need = need_event(event->kick_index,
1958c2ecf20Sopenharmony_ci			   guest.avail_idx,
1968c2ecf20Sopenharmony_ci			   guest.kicked_avail_idx);
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	guest.kicked_avail_idx = guest.avail_idx;
1998c2ecf20Sopenharmony_ci	if (need)
2008c2ecf20Sopenharmony_ci		kick();
2018c2ecf20Sopenharmony_ci}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci/* host side */
2048c2ecf20Sopenharmony_civoid disable_kick()
2058c2ecf20Sopenharmony_ci{
2068c2ecf20Sopenharmony_ci	/* Doing nothing to disable kicks might cause
2078c2ecf20Sopenharmony_ci	 * extra interrupts, but reduces the number of cache misses.
2088c2ecf20Sopenharmony_ci	 */
2098c2ecf20Sopenharmony_ci}
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_cibool enable_kick()
2128c2ecf20Sopenharmony_ci{
2138c2ecf20Sopenharmony_ci	event->kick_index = host.used_idx;
2148c2ecf20Sopenharmony_ci	/* Barrier C (for pairing) */
2158c2ecf20Sopenharmony_ci	smp_mb();
2168c2ecf20Sopenharmony_ci	return avail_empty();
2178c2ecf20Sopenharmony_ci}
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_cibool avail_empty()
2208c2ecf20Sopenharmony_ci{
2218c2ecf20Sopenharmony_ci	unsigned head = (ring_size - 1) & host.used_idx;
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	return !(ring[head].flags & DESC_HW);
2248c2ecf20Sopenharmony_ci}
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_cibool use_buf(unsigned *lenp, void **bufp)
2278c2ecf20Sopenharmony_ci{
2288c2ecf20Sopenharmony_ci	unsigned head = (ring_size - 1) & host.used_idx;
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	if (!(ring[head].flags & DESC_HW))
2318c2ecf20Sopenharmony_ci		return false;
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	/* make sure length read below is not speculated */
2348c2ecf20Sopenharmony_ci	/* Barrier A (for pairing) */
2358c2ecf20Sopenharmony_ci	smp_acquire();
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	/* simple in-order completion: we don't need
2388c2ecf20Sopenharmony_ci	 * to touch index at all. This also means we
2398c2ecf20Sopenharmony_ci	 * can just modify the descriptor in-place.
2408c2ecf20Sopenharmony_ci	 */
2418c2ecf20Sopenharmony_ci	ring[head].len--;
2428c2ecf20Sopenharmony_ci	/* Make sure len is valid before flags.
2438c2ecf20Sopenharmony_ci	 * Note: alternative is to write len and flags in one access -
2448c2ecf20Sopenharmony_ci	 * possible on 64 bit architectures but wmb is free on Intel anyway
2458c2ecf20Sopenharmony_ci	 * so I have no way to test whether it's a gain.
2468c2ecf20Sopenharmony_ci	 */
2478c2ecf20Sopenharmony_ci	/* Barrier B (for pairing) */
2488c2ecf20Sopenharmony_ci	smp_release();
2498c2ecf20Sopenharmony_ci	ring[head].flags = 0;
2508c2ecf20Sopenharmony_ci	host.used_idx++;
2518c2ecf20Sopenharmony_ci	return true;
2528c2ecf20Sopenharmony_ci}
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_civoid call_used(void)
2558c2ecf20Sopenharmony_ci{
2568c2ecf20Sopenharmony_ci	bool need;
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	/* Flush in previous flags write */
2598c2ecf20Sopenharmony_ci	/* Barrier D (for pairing) */
2608c2ecf20Sopenharmony_ci	smp_mb();
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	need = need_event(event->call_index,
2638c2ecf20Sopenharmony_ci			host.used_idx,
2648c2ecf20Sopenharmony_ci			host.called_used_idx);
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	host.called_used_idx = host.used_idx;
2678c2ecf20Sopenharmony_ci
2688c2ecf20Sopenharmony_ci	if (need)
2698c2ecf20Sopenharmony_ci		call();
2708c2ecf20Sopenharmony_ci}
271