18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2016 Red Hat, Inc. 48c2ecf20Sopenharmony_ci * Author: Michael S. Tsirkin <mst@redhat.com> 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Partial implementation of virtio 0.9. event index is used for signalling, 78c2ecf20Sopenharmony_ci * unconditionally. Design roughly follows linux kernel implementation in order 88c2ecf20Sopenharmony_ci * to be able to judge its performance. 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci#define _GNU_SOURCE 118c2ecf20Sopenharmony_ci#include "main.h" 128c2ecf20Sopenharmony_ci#include <stdlib.h> 138c2ecf20Sopenharmony_ci#include <stdio.h> 148c2ecf20Sopenharmony_ci#include <assert.h> 158c2ecf20Sopenharmony_ci#include <string.h> 168c2ecf20Sopenharmony_ci#include <linux/virtio_ring.h> 178c2ecf20Sopenharmony_ci 188c2ecf20Sopenharmony_cistruct data { 198c2ecf20Sopenharmony_ci void *data; 208c2ecf20Sopenharmony_ci} *data; 218c2ecf20Sopenharmony_ci 228c2ecf20Sopenharmony_cistruct vring ring; 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci/* enabling the below activates experimental ring polling code 258c2ecf20Sopenharmony_ci * (which skips index reads on consumer in favor of looking at 268c2ecf20Sopenharmony_ci * high bits of ring id ^ 0x8000). 278c2ecf20Sopenharmony_ci */ 288c2ecf20Sopenharmony_ci/* #ifdef RING_POLL */ 298c2ecf20Sopenharmony_ci/* enabling the below activates experimental in-order code 308c2ecf20Sopenharmony_ci * (which skips ring updates and reads and writes len in descriptor). 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_ci/* #ifdef INORDER */ 338c2ecf20Sopenharmony_ci 348c2ecf20Sopenharmony_ci#if defined(RING_POLL) && defined(INORDER) 358c2ecf20Sopenharmony_ci#error "RING_POLL and INORDER are mutually exclusive" 368c2ecf20Sopenharmony_ci#endif 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci/* how much padding is needed to avoid false cache sharing */ 398c2ecf20Sopenharmony_ci#define HOST_GUEST_PADDING 0x80 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_cistruct guest { 428c2ecf20Sopenharmony_ci unsigned short avail_idx; 438c2ecf20Sopenharmony_ci unsigned short last_used_idx; 448c2ecf20Sopenharmony_ci unsigned short num_free; 458c2ecf20Sopenharmony_ci unsigned short kicked_avail_idx; 468c2ecf20Sopenharmony_ci#ifndef INORDER 478c2ecf20Sopenharmony_ci unsigned short free_head; 488c2ecf20Sopenharmony_ci#else 498c2ecf20Sopenharmony_ci unsigned short reserved_free_head; 508c2ecf20Sopenharmony_ci#endif 518c2ecf20Sopenharmony_ci unsigned char reserved[HOST_GUEST_PADDING - 10]; 528c2ecf20Sopenharmony_ci} guest; 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_cistruct host { 558c2ecf20Sopenharmony_ci /* we do not need to track last avail index 568c2ecf20Sopenharmony_ci * unless we have more than one in flight. 578c2ecf20Sopenharmony_ci */ 588c2ecf20Sopenharmony_ci unsigned short used_idx; 598c2ecf20Sopenharmony_ci unsigned short called_used_idx; 608c2ecf20Sopenharmony_ci unsigned char reserved[HOST_GUEST_PADDING - 4]; 618c2ecf20Sopenharmony_ci} host; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci/* implemented by ring */ 648c2ecf20Sopenharmony_civoid alloc_ring(void) 658c2ecf20Sopenharmony_ci{ 668c2ecf20Sopenharmony_ci int ret; 678c2ecf20Sopenharmony_ci int i; 688c2ecf20Sopenharmony_ci void *p; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000)); 718c2ecf20Sopenharmony_ci if (ret) { 728c2ecf20Sopenharmony_ci perror("Unable to allocate ring buffer.\n"); 738c2ecf20Sopenharmony_ci exit(3); 748c2ecf20Sopenharmony_ci } 758c2ecf20Sopenharmony_ci memset(p, 0, vring_size(ring_size, 0x1000)); 768c2ecf20Sopenharmony_ci vring_init(&ring, ring_size, p, 0x1000); 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci guest.avail_idx = 0; 798c2ecf20Sopenharmony_ci guest.kicked_avail_idx = -1; 808c2ecf20Sopenharmony_ci guest.last_used_idx = 0; 818c2ecf20Sopenharmony_ci#ifndef INORDER 828c2ecf20Sopenharmony_ci /* Put everything in free lists. */ 838c2ecf20Sopenharmony_ci guest.free_head = 0; 848c2ecf20Sopenharmony_ci#endif 858c2ecf20Sopenharmony_ci for (i = 0; i < ring_size - 1; i++) 868c2ecf20Sopenharmony_ci ring.desc[i].next = i + 1; 878c2ecf20Sopenharmony_ci host.used_idx = 0; 888c2ecf20Sopenharmony_ci host.called_used_idx = -1; 898c2ecf20Sopenharmony_ci guest.num_free = ring_size; 908c2ecf20Sopenharmony_ci data = malloc(ring_size * sizeof *data); 918c2ecf20Sopenharmony_ci if (!data) { 928c2ecf20Sopenharmony_ci perror("Unable to allocate data buffer.\n"); 938c2ecf20Sopenharmony_ci exit(3); 948c2ecf20Sopenharmony_ci } 958c2ecf20Sopenharmony_ci memset(data, 0, ring_size * sizeof *data); 968c2ecf20Sopenharmony_ci} 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci/* guest side */ 998c2ecf20Sopenharmony_ciint add_inbuf(unsigned len, void *buf, void *datap) 1008c2ecf20Sopenharmony_ci{ 1018c2ecf20Sopenharmony_ci unsigned head; 1028c2ecf20Sopenharmony_ci#ifndef INORDER 1038c2ecf20Sopenharmony_ci unsigned avail; 1048c2ecf20Sopenharmony_ci#endif 1058c2ecf20Sopenharmony_ci struct vring_desc *desc; 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci if (!guest.num_free) 1088c2ecf20Sopenharmony_ci return -1; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci#ifdef INORDER 1118c2ecf20Sopenharmony_ci head = (ring_size - 1) & (guest.avail_idx++); 1128c2ecf20Sopenharmony_ci#else 1138c2ecf20Sopenharmony_ci head = guest.free_head; 1148c2ecf20Sopenharmony_ci#endif 1158c2ecf20Sopenharmony_ci guest.num_free--; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci desc = ring.desc; 1188c2ecf20Sopenharmony_ci desc[head].flags = VRING_DESC_F_NEXT; 1198c2ecf20Sopenharmony_ci desc[head].addr = (unsigned long)(void *)buf; 1208c2ecf20Sopenharmony_ci desc[head].len = len; 1218c2ecf20Sopenharmony_ci /* We do it like this to simulate the way 1228c2ecf20Sopenharmony_ci * we'd have to flip it if we had multiple 1238c2ecf20Sopenharmony_ci * descriptors. 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_ci desc[head].flags &= ~VRING_DESC_F_NEXT; 1268c2ecf20Sopenharmony_ci#ifndef INORDER 1278c2ecf20Sopenharmony_ci guest.free_head = desc[head].next; 1288c2ecf20Sopenharmony_ci#endif 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci data[head].data = datap; 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci#ifdef RING_POLL 1338c2ecf20Sopenharmony_ci /* Barrier A (for pairing) */ 1348c2ecf20Sopenharmony_ci smp_release(); 1358c2ecf20Sopenharmony_ci avail = guest.avail_idx++; 1368c2ecf20Sopenharmony_ci ring.avail->ring[avail & (ring_size - 1)] = 1378c2ecf20Sopenharmony_ci (head | (avail & ~(ring_size - 1))) ^ 0x8000; 1388c2ecf20Sopenharmony_ci#else 1398c2ecf20Sopenharmony_ci#ifndef INORDER 1408c2ecf20Sopenharmony_ci /* Barrier A (for pairing) */ 1418c2ecf20Sopenharmony_ci smp_release(); 1428c2ecf20Sopenharmony_ci avail = (ring_size - 1) & (guest.avail_idx++); 1438c2ecf20Sopenharmony_ci ring.avail->ring[avail] = head; 1448c2ecf20Sopenharmony_ci#endif 1458c2ecf20Sopenharmony_ci /* Barrier A (for pairing) */ 1468c2ecf20Sopenharmony_ci smp_release(); 1478c2ecf20Sopenharmony_ci#endif 1488c2ecf20Sopenharmony_ci ring.avail->idx = guest.avail_idx; 1498c2ecf20Sopenharmony_ci return 0; 1508c2ecf20Sopenharmony_ci} 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_civoid *get_buf(unsigned *lenp, void **bufp) 1538c2ecf20Sopenharmony_ci{ 1548c2ecf20Sopenharmony_ci unsigned head; 1558c2ecf20Sopenharmony_ci unsigned index; 1568c2ecf20Sopenharmony_ci void *datap; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci#ifdef RING_POLL 1598c2ecf20Sopenharmony_ci head = (ring_size - 1) & guest.last_used_idx; 1608c2ecf20Sopenharmony_ci index = ring.used->ring[head].id; 1618c2ecf20Sopenharmony_ci if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1)) 1628c2ecf20Sopenharmony_ci return NULL; 1638c2ecf20Sopenharmony_ci /* Barrier B (for pairing) */ 1648c2ecf20Sopenharmony_ci smp_acquire(); 1658c2ecf20Sopenharmony_ci index &= ring_size - 1; 1668c2ecf20Sopenharmony_ci#else 1678c2ecf20Sopenharmony_ci if (ring.used->idx == guest.last_used_idx) 1688c2ecf20Sopenharmony_ci return NULL; 1698c2ecf20Sopenharmony_ci /* Barrier B (for pairing) */ 1708c2ecf20Sopenharmony_ci smp_acquire(); 1718c2ecf20Sopenharmony_ci#ifdef INORDER 1728c2ecf20Sopenharmony_ci head = (ring_size - 1) & guest.last_used_idx; 1738c2ecf20Sopenharmony_ci index = head; 1748c2ecf20Sopenharmony_ci#else 1758c2ecf20Sopenharmony_ci head = (ring_size - 1) & guest.last_used_idx; 1768c2ecf20Sopenharmony_ci index = ring.used->ring[head].id; 1778c2ecf20Sopenharmony_ci#endif 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci#endif 1808c2ecf20Sopenharmony_ci#ifdef INORDER 1818c2ecf20Sopenharmony_ci *lenp = ring.desc[index].len; 1828c2ecf20Sopenharmony_ci#else 1838c2ecf20Sopenharmony_ci *lenp = ring.used->ring[head].len; 1848c2ecf20Sopenharmony_ci#endif 1858c2ecf20Sopenharmony_ci datap = data[index].data; 1868c2ecf20Sopenharmony_ci *bufp = (void*)(unsigned long)ring.desc[index].addr; 1878c2ecf20Sopenharmony_ci data[index].data = NULL; 1888c2ecf20Sopenharmony_ci#ifndef INORDER 1898c2ecf20Sopenharmony_ci ring.desc[index].next = guest.free_head; 1908c2ecf20Sopenharmony_ci guest.free_head = index; 1918c2ecf20Sopenharmony_ci#endif 1928c2ecf20Sopenharmony_ci guest.num_free++; 1938c2ecf20Sopenharmony_ci guest.last_used_idx++; 1948c2ecf20Sopenharmony_ci return datap; 1958c2ecf20Sopenharmony_ci} 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_cibool used_empty() 1988c2ecf20Sopenharmony_ci{ 1998c2ecf20Sopenharmony_ci unsigned short last_used_idx = guest.last_used_idx; 2008c2ecf20Sopenharmony_ci#ifdef RING_POLL 2018c2ecf20Sopenharmony_ci unsigned short head = last_used_idx & (ring_size - 1); 2028c2ecf20Sopenharmony_ci unsigned index = ring.used->ring[head].id; 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1); 2058c2ecf20Sopenharmony_ci#else 2068c2ecf20Sopenharmony_ci return ring.used->idx == last_used_idx; 2078c2ecf20Sopenharmony_ci#endif 2088c2ecf20Sopenharmony_ci} 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_civoid disable_call() 2118c2ecf20Sopenharmony_ci{ 2128c2ecf20Sopenharmony_ci /* Doing nothing to disable calls might cause 2138c2ecf20Sopenharmony_ci * extra interrupts, but reduces the number of cache misses. 2148c2ecf20Sopenharmony_ci */ 2158c2ecf20Sopenharmony_ci} 2168c2ecf20Sopenharmony_ci 2178c2ecf20Sopenharmony_cibool enable_call() 2188c2ecf20Sopenharmony_ci{ 2198c2ecf20Sopenharmony_ci vring_used_event(&ring) = guest.last_used_idx; 2208c2ecf20Sopenharmony_ci /* Flush call index write */ 2218c2ecf20Sopenharmony_ci /* Barrier D (for pairing) */ 2228c2ecf20Sopenharmony_ci smp_mb(); 2238c2ecf20Sopenharmony_ci return used_empty(); 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_civoid kick_available(void) 2278c2ecf20Sopenharmony_ci{ 2288c2ecf20Sopenharmony_ci bool need; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci /* Flush in previous flags write */ 2318c2ecf20Sopenharmony_ci /* Barrier C (for pairing) */ 2328c2ecf20Sopenharmony_ci smp_mb(); 2338c2ecf20Sopenharmony_ci need = vring_need_event(vring_avail_event(&ring), 2348c2ecf20Sopenharmony_ci guest.avail_idx, 2358c2ecf20Sopenharmony_ci guest.kicked_avail_idx); 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci guest.kicked_avail_idx = guest.avail_idx; 2388c2ecf20Sopenharmony_ci if (need) 2398c2ecf20Sopenharmony_ci kick(); 2408c2ecf20Sopenharmony_ci} 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci/* host side */ 2438c2ecf20Sopenharmony_civoid disable_kick() 2448c2ecf20Sopenharmony_ci{ 2458c2ecf20Sopenharmony_ci /* Doing nothing to disable kicks might cause 2468c2ecf20Sopenharmony_ci * extra interrupts, but reduces the number of cache misses. 2478c2ecf20Sopenharmony_ci */ 2488c2ecf20Sopenharmony_ci} 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_cibool enable_kick() 2518c2ecf20Sopenharmony_ci{ 2528c2ecf20Sopenharmony_ci vring_avail_event(&ring) = host.used_idx; 2538c2ecf20Sopenharmony_ci /* Barrier C (for pairing) */ 2548c2ecf20Sopenharmony_ci smp_mb(); 2558c2ecf20Sopenharmony_ci return avail_empty(); 2568c2ecf20Sopenharmony_ci} 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_cibool avail_empty() 2598c2ecf20Sopenharmony_ci{ 2608c2ecf20Sopenharmony_ci unsigned head = host.used_idx; 2618c2ecf20Sopenharmony_ci#ifdef RING_POLL 2628c2ecf20Sopenharmony_ci unsigned index = ring.avail->ring[head & (ring_size - 1)]; 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci return ((index ^ head ^ 0x8000) & ~(ring_size - 1)); 2658c2ecf20Sopenharmony_ci#else 2668c2ecf20Sopenharmony_ci return head == ring.avail->idx; 2678c2ecf20Sopenharmony_ci#endif 2688c2ecf20Sopenharmony_ci} 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_cibool use_buf(unsigned *lenp, void **bufp) 2718c2ecf20Sopenharmony_ci{ 2728c2ecf20Sopenharmony_ci unsigned used_idx = host.used_idx; 2738c2ecf20Sopenharmony_ci struct vring_desc *desc; 2748c2ecf20Sopenharmony_ci unsigned head; 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci#ifdef RING_POLL 2778c2ecf20Sopenharmony_ci head = ring.avail->ring[used_idx & (ring_size - 1)]; 2788c2ecf20Sopenharmony_ci if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1)) 2798c2ecf20Sopenharmony_ci return false; 2808c2ecf20Sopenharmony_ci /* Barrier A (for pairing) */ 2818c2ecf20Sopenharmony_ci smp_acquire(); 2828c2ecf20Sopenharmony_ci 2838c2ecf20Sopenharmony_ci used_idx &= ring_size - 1; 2848c2ecf20Sopenharmony_ci desc = &ring.desc[head & (ring_size - 1)]; 2858c2ecf20Sopenharmony_ci#else 2868c2ecf20Sopenharmony_ci if (used_idx == ring.avail->idx) 2878c2ecf20Sopenharmony_ci return false; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci /* Barrier A (for pairing) */ 2908c2ecf20Sopenharmony_ci smp_acquire(); 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci used_idx &= ring_size - 1; 2938c2ecf20Sopenharmony_ci#ifdef INORDER 2948c2ecf20Sopenharmony_ci head = used_idx; 2958c2ecf20Sopenharmony_ci#else 2968c2ecf20Sopenharmony_ci head = ring.avail->ring[used_idx]; 2978c2ecf20Sopenharmony_ci#endif 2988c2ecf20Sopenharmony_ci desc = &ring.desc[head]; 2998c2ecf20Sopenharmony_ci#endif 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci *lenp = desc->len; 3028c2ecf20Sopenharmony_ci *bufp = (void *)(unsigned long)desc->addr; 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci#ifdef INORDER 3058c2ecf20Sopenharmony_ci desc->len = desc->len - 1; 3068c2ecf20Sopenharmony_ci#else 3078c2ecf20Sopenharmony_ci /* now update used ring */ 3088c2ecf20Sopenharmony_ci ring.used->ring[used_idx].id = head; 3098c2ecf20Sopenharmony_ci ring.used->ring[used_idx].len = desc->len - 1; 3108c2ecf20Sopenharmony_ci#endif 3118c2ecf20Sopenharmony_ci /* Barrier B (for pairing) */ 3128c2ecf20Sopenharmony_ci smp_release(); 3138c2ecf20Sopenharmony_ci host.used_idx++; 3148c2ecf20Sopenharmony_ci ring.used->idx = host.used_idx; 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci return true; 3178c2ecf20Sopenharmony_ci} 3188c2ecf20Sopenharmony_ci 3198c2ecf20Sopenharmony_civoid call_used(void) 3208c2ecf20Sopenharmony_ci{ 3218c2ecf20Sopenharmony_ci bool need; 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci /* Flush in previous flags write */ 3248c2ecf20Sopenharmony_ci /* Barrier D (for pairing) */ 3258c2ecf20Sopenharmony_ci smp_mb(); 3268c2ecf20Sopenharmony_ci need = vring_need_event(vring_used_event(&ring), 3278c2ecf20Sopenharmony_ci host.used_idx, 3288c2ecf20Sopenharmony_ci host.called_used_idx); 3298c2ecf20Sopenharmony_ci 3308c2ecf20Sopenharmony_ci host.called_used_idx = host.used_idx; 3318c2ecf20Sopenharmony_ci if (need) 3328c2ecf20Sopenharmony_ci call(); 3338c2ecf20Sopenharmony_ci} 334