18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2016 Red Hat, Inc.
48c2ecf20Sopenharmony_ci * Author: Michael S. Tsirkin <mst@redhat.com>
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Partial implementation of virtio 0.9. event index is used for signalling,
78c2ecf20Sopenharmony_ci * unconditionally. Design roughly follows linux kernel implementation in order
88c2ecf20Sopenharmony_ci * to be able to judge its performance.
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci#define _GNU_SOURCE
118c2ecf20Sopenharmony_ci#include "main.h"
128c2ecf20Sopenharmony_ci#include <stdlib.h>
138c2ecf20Sopenharmony_ci#include <stdio.h>
148c2ecf20Sopenharmony_ci#include <assert.h>
158c2ecf20Sopenharmony_ci#include <string.h>
168c2ecf20Sopenharmony_ci#include <linux/virtio_ring.h>
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_cistruct data {
198c2ecf20Sopenharmony_ci	void *data;
208c2ecf20Sopenharmony_ci} *data;
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_cistruct vring ring;
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci/* enabling the below activates experimental ring polling code
258c2ecf20Sopenharmony_ci * (which skips index reads on consumer in favor of looking at
268c2ecf20Sopenharmony_ci * high bits of ring id ^ 0x8000).
278c2ecf20Sopenharmony_ci */
288c2ecf20Sopenharmony_ci/* #ifdef RING_POLL */
298c2ecf20Sopenharmony_ci/* enabling the below activates experimental in-order code
308c2ecf20Sopenharmony_ci * (which skips ring updates and reads and writes len in descriptor).
318c2ecf20Sopenharmony_ci */
328c2ecf20Sopenharmony_ci/* #ifdef INORDER */
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci#if defined(RING_POLL) && defined(INORDER)
358c2ecf20Sopenharmony_ci#error "RING_POLL and INORDER are mutually exclusive"
368c2ecf20Sopenharmony_ci#endif
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci/* how much padding is needed to avoid false cache sharing */
398c2ecf20Sopenharmony_ci#define HOST_GUEST_PADDING 0x80
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_cistruct guest {
428c2ecf20Sopenharmony_ci	unsigned short avail_idx;
438c2ecf20Sopenharmony_ci	unsigned short last_used_idx;
448c2ecf20Sopenharmony_ci	unsigned short num_free;
458c2ecf20Sopenharmony_ci	unsigned short kicked_avail_idx;
468c2ecf20Sopenharmony_ci#ifndef INORDER
478c2ecf20Sopenharmony_ci	unsigned short free_head;
488c2ecf20Sopenharmony_ci#else
498c2ecf20Sopenharmony_ci	unsigned short reserved_free_head;
508c2ecf20Sopenharmony_ci#endif
518c2ecf20Sopenharmony_ci	unsigned char reserved[HOST_GUEST_PADDING - 10];
528c2ecf20Sopenharmony_ci} guest;
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_cistruct host {
558c2ecf20Sopenharmony_ci	/* we do not need to track last avail index
568c2ecf20Sopenharmony_ci	 * unless we have more than one in flight.
578c2ecf20Sopenharmony_ci	 */
588c2ecf20Sopenharmony_ci	unsigned short used_idx;
598c2ecf20Sopenharmony_ci	unsigned short called_used_idx;
608c2ecf20Sopenharmony_ci	unsigned char reserved[HOST_GUEST_PADDING - 4];
618c2ecf20Sopenharmony_ci} host;
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci/* implemented by ring */
648c2ecf20Sopenharmony_civoid alloc_ring(void)
658c2ecf20Sopenharmony_ci{
668c2ecf20Sopenharmony_ci	int ret;
678c2ecf20Sopenharmony_ci	int i;
688c2ecf20Sopenharmony_ci	void *p;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
718c2ecf20Sopenharmony_ci	if (ret) {
728c2ecf20Sopenharmony_ci		perror("Unable to allocate ring buffer.\n");
738c2ecf20Sopenharmony_ci		exit(3);
748c2ecf20Sopenharmony_ci	}
758c2ecf20Sopenharmony_ci	memset(p, 0, vring_size(ring_size, 0x1000));
768c2ecf20Sopenharmony_ci	vring_init(&ring, ring_size, p, 0x1000);
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	guest.avail_idx = 0;
798c2ecf20Sopenharmony_ci	guest.kicked_avail_idx = -1;
808c2ecf20Sopenharmony_ci	guest.last_used_idx = 0;
818c2ecf20Sopenharmony_ci#ifndef INORDER
828c2ecf20Sopenharmony_ci	/* Put everything in free lists. */
838c2ecf20Sopenharmony_ci	guest.free_head = 0;
848c2ecf20Sopenharmony_ci#endif
858c2ecf20Sopenharmony_ci	for (i = 0; i < ring_size - 1; i++)
868c2ecf20Sopenharmony_ci		ring.desc[i].next = i + 1;
878c2ecf20Sopenharmony_ci	host.used_idx = 0;
888c2ecf20Sopenharmony_ci	host.called_used_idx = -1;
898c2ecf20Sopenharmony_ci	guest.num_free = ring_size;
908c2ecf20Sopenharmony_ci	data = malloc(ring_size * sizeof *data);
918c2ecf20Sopenharmony_ci	if (!data) {
928c2ecf20Sopenharmony_ci		perror("Unable to allocate data buffer.\n");
938c2ecf20Sopenharmony_ci		exit(3);
948c2ecf20Sopenharmony_ci	}
958c2ecf20Sopenharmony_ci	memset(data, 0, ring_size * sizeof *data);
968c2ecf20Sopenharmony_ci}
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_ci/* guest side */
998c2ecf20Sopenharmony_ciint add_inbuf(unsigned len, void *buf, void *datap)
1008c2ecf20Sopenharmony_ci{
1018c2ecf20Sopenharmony_ci	unsigned head;
1028c2ecf20Sopenharmony_ci#ifndef INORDER
1038c2ecf20Sopenharmony_ci	unsigned avail;
1048c2ecf20Sopenharmony_ci#endif
1058c2ecf20Sopenharmony_ci	struct vring_desc *desc;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	if (!guest.num_free)
1088c2ecf20Sopenharmony_ci		return -1;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci#ifdef INORDER
1118c2ecf20Sopenharmony_ci	head = (ring_size - 1) & (guest.avail_idx++);
1128c2ecf20Sopenharmony_ci#else
1138c2ecf20Sopenharmony_ci	head = guest.free_head;
1148c2ecf20Sopenharmony_ci#endif
1158c2ecf20Sopenharmony_ci	guest.num_free--;
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	desc = ring.desc;
1188c2ecf20Sopenharmony_ci	desc[head].flags = VRING_DESC_F_NEXT;
1198c2ecf20Sopenharmony_ci	desc[head].addr = (unsigned long)(void *)buf;
1208c2ecf20Sopenharmony_ci	desc[head].len = len;
1218c2ecf20Sopenharmony_ci	/* We do it like this to simulate the way
1228c2ecf20Sopenharmony_ci	 * we'd have to flip it if we had multiple
1238c2ecf20Sopenharmony_ci	 * descriptors.
1248c2ecf20Sopenharmony_ci	 */
1258c2ecf20Sopenharmony_ci	desc[head].flags &= ~VRING_DESC_F_NEXT;
1268c2ecf20Sopenharmony_ci#ifndef INORDER
1278c2ecf20Sopenharmony_ci	guest.free_head = desc[head].next;
1288c2ecf20Sopenharmony_ci#endif
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	data[head].data = datap;
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci#ifdef RING_POLL
1338c2ecf20Sopenharmony_ci	/* Barrier A (for pairing) */
1348c2ecf20Sopenharmony_ci	smp_release();
1358c2ecf20Sopenharmony_ci	avail = guest.avail_idx++;
1368c2ecf20Sopenharmony_ci	ring.avail->ring[avail & (ring_size - 1)] =
1378c2ecf20Sopenharmony_ci		(head | (avail & ~(ring_size - 1))) ^ 0x8000;
1388c2ecf20Sopenharmony_ci#else
1398c2ecf20Sopenharmony_ci#ifndef INORDER
1408c2ecf20Sopenharmony_ci	/* Barrier A (for pairing) */
1418c2ecf20Sopenharmony_ci	smp_release();
1428c2ecf20Sopenharmony_ci	avail = (ring_size - 1) & (guest.avail_idx++);
1438c2ecf20Sopenharmony_ci	ring.avail->ring[avail] = head;
1448c2ecf20Sopenharmony_ci#endif
1458c2ecf20Sopenharmony_ci	/* Barrier A (for pairing) */
1468c2ecf20Sopenharmony_ci	smp_release();
1478c2ecf20Sopenharmony_ci#endif
1488c2ecf20Sopenharmony_ci	ring.avail->idx = guest.avail_idx;
1498c2ecf20Sopenharmony_ci	return 0;
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_civoid *get_buf(unsigned *lenp, void **bufp)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	unsigned head;
1558c2ecf20Sopenharmony_ci	unsigned index;
1568c2ecf20Sopenharmony_ci	void *datap;
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_ci#ifdef RING_POLL
1598c2ecf20Sopenharmony_ci	head = (ring_size - 1) & guest.last_used_idx;
1608c2ecf20Sopenharmony_ci	index = ring.used->ring[head].id;
1618c2ecf20Sopenharmony_ci	if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
1628c2ecf20Sopenharmony_ci		return NULL;
1638c2ecf20Sopenharmony_ci	/* Barrier B (for pairing) */
1648c2ecf20Sopenharmony_ci	smp_acquire();
1658c2ecf20Sopenharmony_ci	index &= ring_size - 1;
1668c2ecf20Sopenharmony_ci#else
1678c2ecf20Sopenharmony_ci	if (ring.used->idx == guest.last_used_idx)
1688c2ecf20Sopenharmony_ci		return NULL;
1698c2ecf20Sopenharmony_ci	/* Barrier B (for pairing) */
1708c2ecf20Sopenharmony_ci	smp_acquire();
1718c2ecf20Sopenharmony_ci#ifdef INORDER
1728c2ecf20Sopenharmony_ci	head = (ring_size - 1) & guest.last_used_idx;
1738c2ecf20Sopenharmony_ci	index = head;
1748c2ecf20Sopenharmony_ci#else
1758c2ecf20Sopenharmony_ci	head = (ring_size - 1) & guest.last_used_idx;
1768c2ecf20Sopenharmony_ci	index = ring.used->ring[head].id;
1778c2ecf20Sopenharmony_ci#endif
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci#endif
1808c2ecf20Sopenharmony_ci#ifdef INORDER
1818c2ecf20Sopenharmony_ci	*lenp = ring.desc[index].len;
1828c2ecf20Sopenharmony_ci#else
1838c2ecf20Sopenharmony_ci	*lenp = ring.used->ring[head].len;
1848c2ecf20Sopenharmony_ci#endif
1858c2ecf20Sopenharmony_ci	datap = data[index].data;
1868c2ecf20Sopenharmony_ci	*bufp = (void*)(unsigned long)ring.desc[index].addr;
1878c2ecf20Sopenharmony_ci	data[index].data = NULL;
1888c2ecf20Sopenharmony_ci#ifndef INORDER
1898c2ecf20Sopenharmony_ci	ring.desc[index].next = guest.free_head;
1908c2ecf20Sopenharmony_ci	guest.free_head = index;
1918c2ecf20Sopenharmony_ci#endif
1928c2ecf20Sopenharmony_ci	guest.num_free++;
1938c2ecf20Sopenharmony_ci	guest.last_used_idx++;
1948c2ecf20Sopenharmony_ci	return datap;
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_cibool used_empty()
1988c2ecf20Sopenharmony_ci{
1998c2ecf20Sopenharmony_ci	unsigned short last_used_idx = guest.last_used_idx;
2008c2ecf20Sopenharmony_ci#ifdef RING_POLL
2018c2ecf20Sopenharmony_ci	unsigned short head = last_used_idx & (ring_size - 1);
2028c2ecf20Sopenharmony_ci	unsigned index = ring.used->ring[head].id;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
2058c2ecf20Sopenharmony_ci#else
2068c2ecf20Sopenharmony_ci	return ring.used->idx == last_used_idx;
2078c2ecf20Sopenharmony_ci#endif
2088c2ecf20Sopenharmony_ci}
2098c2ecf20Sopenharmony_ci
2108c2ecf20Sopenharmony_civoid disable_call()
2118c2ecf20Sopenharmony_ci{
2128c2ecf20Sopenharmony_ci	/* Doing nothing to disable calls might cause
2138c2ecf20Sopenharmony_ci	 * extra interrupts, but reduces the number of cache misses.
2148c2ecf20Sopenharmony_ci	 */
2158c2ecf20Sopenharmony_ci}
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_cibool enable_call()
2188c2ecf20Sopenharmony_ci{
2198c2ecf20Sopenharmony_ci	vring_used_event(&ring) = guest.last_used_idx;
2208c2ecf20Sopenharmony_ci	/* Flush call index write */
2218c2ecf20Sopenharmony_ci	/* Barrier D (for pairing) */
2228c2ecf20Sopenharmony_ci	smp_mb();
2238c2ecf20Sopenharmony_ci	return used_empty();
2248c2ecf20Sopenharmony_ci}
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_civoid kick_available(void)
2278c2ecf20Sopenharmony_ci{
2288c2ecf20Sopenharmony_ci	bool need;
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_ci	/* Flush in previous flags write */
2318c2ecf20Sopenharmony_ci	/* Barrier C (for pairing) */
2328c2ecf20Sopenharmony_ci	smp_mb();
2338c2ecf20Sopenharmony_ci	need = vring_need_event(vring_avail_event(&ring),
2348c2ecf20Sopenharmony_ci				guest.avail_idx,
2358c2ecf20Sopenharmony_ci				guest.kicked_avail_idx);
2368c2ecf20Sopenharmony_ci
2378c2ecf20Sopenharmony_ci	guest.kicked_avail_idx = guest.avail_idx;
2388c2ecf20Sopenharmony_ci	if (need)
2398c2ecf20Sopenharmony_ci		kick();
2408c2ecf20Sopenharmony_ci}
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci/* host side */
2438c2ecf20Sopenharmony_civoid disable_kick()
2448c2ecf20Sopenharmony_ci{
2458c2ecf20Sopenharmony_ci	/* Doing nothing to disable kicks might cause
2468c2ecf20Sopenharmony_ci	 * extra interrupts, but reduces the number of cache misses.
2478c2ecf20Sopenharmony_ci	 */
2488c2ecf20Sopenharmony_ci}
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_cibool enable_kick()
2518c2ecf20Sopenharmony_ci{
2528c2ecf20Sopenharmony_ci	vring_avail_event(&ring) = host.used_idx;
2538c2ecf20Sopenharmony_ci	/* Barrier C (for pairing) */
2548c2ecf20Sopenharmony_ci	smp_mb();
2558c2ecf20Sopenharmony_ci	return avail_empty();
2568c2ecf20Sopenharmony_ci}
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_cibool avail_empty()
2598c2ecf20Sopenharmony_ci{
2608c2ecf20Sopenharmony_ci	unsigned head = host.used_idx;
2618c2ecf20Sopenharmony_ci#ifdef RING_POLL
2628c2ecf20Sopenharmony_ci	unsigned index = ring.avail->ring[head & (ring_size - 1)];
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci	return ((index ^ head ^ 0x8000) & ~(ring_size - 1));
2658c2ecf20Sopenharmony_ci#else
2668c2ecf20Sopenharmony_ci	return head == ring.avail->idx;
2678c2ecf20Sopenharmony_ci#endif
2688c2ecf20Sopenharmony_ci}
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_cibool use_buf(unsigned *lenp, void **bufp)
2718c2ecf20Sopenharmony_ci{
2728c2ecf20Sopenharmony_ci	unsigned used_idx = host.used_idx;
2738c2ecf20Sopenharmony_ci	struct vring_desc *desc;
2748c2ecf20Sopenharmony_ci	unsigned head;
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci#ifdef RING_POLL
2778c2ecf20Sopenharmony_ci	head = ring.avail->ring[used_idx & (ring_size - 1)];
2788c2ecf20Sopenharmony_ci	if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
2798c2ecf20Sopenharmony_ci		return false;
2808c2ecf20Sopenharmony_ci	/* Barrier A (for pairing) */
2818c2ecf20Sopenharmony_ci	smp_acquire();
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	used_idx &= ring_size - 1;
2848c2ecf20Sopenharmony_ci	desc = &ring.desc[head & (ring_size - 1)];
2858c2ecf20Sopenharmony_ci#else
2868c2ecf20Sopenharmony_ci	if (used_idx == ring.avail->idx)
2878c2ecf20Sopenharmony_ci		return false;
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci	/* Barrier A (for pairing) */
2908c2ecf20Sopenharmony_ci	smp_acquire();
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci	used_idx &= ring_size - 1;
2938c2ecf20Sopenharmony_ci#ifdef INORDER
2948c2ecf20Sopenharmony_ci	head = used_idx;
2958c2ecf20Sopenharmony_ci#else
2968c2ecf20Sopenharmony_ci	head = ring.avail->ring[used_idx];
2978c2ecf20Sopenharmony_ci#endif
2988c2ecf20Sopenharmony_ci	desc = &ring.desc[head];
2998c2ecf20Sopenharmony_ci#endif
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	*lenp = desc->len;
3028c2ecf20Sopenharmony_ci	*bufp = (void *)(unsigned long)desc->addr;
3038c2ecf20Sopenharmony_ci
3048c2ecf20Sopenharmony_ci#ifdef INORDER
3058c2ecf20Sopenharmony_ci	desc->len = desc->len - 1;
3068c2ecf20Sopenharmony_ci#else
3078c2ecf20Sopenharmony_ci	/* now update used ring */
3088c2ecf20Sopenharmony_ci	ring.used->ring[used_idx].id = head;
3098c2ecf20Sopenharmony_ci	ring.used->ring[used_idx].len = desc->len - 1;
3108c2ecf20Sopenharmony_ci#endif
3118c2ecf20Sopenharmony_ci	/* Barrier B (for pairing) */
3128c2ecf20Sopenharmony_ci	smp_release();
3138c2ecf20Sopenharmony_ci	host.used_idx++;
3148c2ecf20Sopenharmony_ci	ring.used->idx = host.used_idx;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	return true;
3178c2ecf20Sopenharmony_ci}
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_civoid call_used(void)
3208c2ecf20Sopenharmony_ci{
3218c2ecf20Sopenharmony_ci	bool need;
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	/* Flush in previous flags write */
3248c2ecf20Sopenharmony_ci	/* Barrier D (for pairing) */
3258c2ecf20Sopenharmony_ci	smp_mb();
3268c2ecf20Sopenharmony_ci	need = vring_need_event(vring_used_event(&ring),
3278c2ecf20Sopenharmony_ci				host.used_idx,
3288c2ecf20Sopenharmony_ci				host.called_used_idx);
3298c2ecf20Sopenharmony_ci
3308c2ecf20Sopenharmony_ci	host.called_used_idx = host.used_idx;
3318c2ecf20Sopenharmony_ci	if (need)
3328c2ecf20Sopenharmony_ci		call();
3338c2ecf20Sopenharmony_ci}
334