18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright (C) 2016 Red Hat, Inc. 48c2ecf20Sopenharmony_ci * Author: Michael S. Tsirkin <mst@redhat.com> 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Simple descriptor-based ring. virtio 0.9 compatible event index is used for 78c2ecf20Sopenharmony_ci * signalling, unconditionally. 88c2ecf20Sopenharmony_ci */ 98c2ecf20Sopenharmony_ci#define _GNU_SOURCE 108c2ecf20Sopenharmony_ci#include "main.h" 118c2ecf20Sopenharmony_ci#include <stdlib.h> 128c2ecf20Sopenharmony_ci#include <stdio.h> 138c2ecf20Sopenharmony_ci#include <string.h> 148c2ecf20Sopenharmony_ci 158c2ecf20Sopenharmony_ci/* Next - Where next entry will be written. 168c2ecf20Sopenharmony_ci * Prev - "Next" value when event triggered previously. 178c2ecf20Sopenharmony_ci * Event - Peer requested event after writing this entry. 188c2ecf20Sopenharmony_ci */ 198c2ecf20Sopenharmony_cistatic inline bool need_event(unsigned short event, 208c2ecf20Sopenharmony_ci unsigned short next, 218c2ecf20Sopenharmony_ci unsigned short prev) 228c2ecf20Sopenharmony_ci{ 238c2ecf20Sopenharmony_ci return (unsigned short)(next - event - 1) < (unsigned short)(next - prev); 248c2ecf20Sopenharmony_ci} 258c2ecf20Sopenharmony_ci 268c2ecf20Sopenharmony_ci/* Design: 278c2ecf20Sopenharmony_ci * Guest adds descriptors with unique index values and DESC_HW in flags. 288c2ecf20Sopenharmony_ci * Host overwrites used descriptors with correct len, index, and DESC_HW clear. 298c2ecf20Sopenharmony_ci * Flags are always set last. 308c2ecf20Sopenharmony_ci */ 318c2ecf20Sopenharmony_ci#define DESC_HW 0x1 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_cistruct desc { 348c2ecf20Sopenharmony_ci unsigned short flags; 358c2ecf20Sopenharmony_ci unsigned short index; 368c2ecf20Sopenharmony_ci unsigned len; 378c2ecf20Sopenharmony_ci unsigned long long addr; 388c2ecf20Sopenharmony_ci}; 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci/* how much padding is needed to avoid false cache sharing */ 418c2ecf20Sopenharmony_ci#define HOST_GUEST_PADDING 0x80 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci/* Mostly read */ 448c2ecf20Sopenharmony_cistruct event { 458c2ecf20Sopenharmony_ci unsigned short kick_index; 468c2ecf20Sopenharmony_ci unsigned char reserved0[HOST_GUEST_PADDING - 2]; 478c2ecf20Sopenharmony_ci unsigned short call_index; 488c2ecf20Sopenharmony_ci unsigned char reserved1[HOST_GUEST_PADDING - 2]; 498c2ecf20Sopenharmony_ci}; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_cistruct data { 528c2ecf20Sopenharmony_ci void *buf; /* descriptor is writeable, we can't get buf from there */ 538c2ecf20Sopenharmony_ci void *data; 548c2ecf20Sopenharmony_ci} *data; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_cistruct desc *ring; 578c2ecf20Sopenharmony_cistruct event *event; 588c2ecf20Sopenharmony_ci 598c2ecf20Sopenharmony_cistruct guest { 608c2ecf20Sopenharmony_ci unsigned avail_idx; 618c2ecf20Sopenharmony_ci unsigned last_used_idx; 628c2ecf20Sopenharmony_ci unsigned num_free; 638c2ecf20Sopenharmony_ci unsigned kicked_avail_idx; 648c2ecf20Sopenharmony_ci unsigned char reserved[HOST_GUEST_PADDING - 12]; 658c2ecf20Sopenharmony_ci} guest; 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_cistruct host { 688c2ecf20Sopenharmony_ci /* we do not need to track last avail index 698c2ecf20Sopenharmony_ci * unless we have more than one in flight. 708c2ecf20Sopenharmony_ci */ 718c2ecf20Sopenharmony_ci unsigned used_idx; 728c2ecf20Sopenharmony_ci unsigned called_used_idx; 738c2ecf20Sopenharmony_ci unsigned char reserved[HOST_GUEST_PADDING - 4]; 748c2ecf20Sopenharmony_ci} host; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci/* implemented by ring */ 778c2ecf20Sopenharmony_civoid alloc_ring(void) 788c2ecf20Sopenharmony_ci{ 798c2ecf20Sopenharmony_ci int ret; 808c2ecf20Sopenharmony_ci int i; 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring); 838c2ecf20Sopenharmony_ci if (ret) { 848c2ecf20Sopenharmony_ci perror("Unable to allocate ring buffer.\n"); 858c2ecf20Sopenharmony_ci exit(3); 868c2ecf20Sopenharmony_ci } 878c2ecf20Sopenharmony_ci event = calloc(1, sizeof(*event)); 888c2ecf20Sopenharmony_ci if (!event) { 898c2ecf20Sopenharmony_ci perror("Unable to allocate event buffer.\n"); 908c2ecf20Sopenharmony_ci exit(3); 918c2ecf20Sopenharmony_ci } 928c2ecf20Sopenharmony_ci guest.avail_idx = 0; 938c2ecf20Sopenharmony_ci guest.kicked_avail_idx = -1; 948c2ecf20Sopenharmony_ci guest.last_used_idx = 0; 958c2ecf20Sopenharmony_ci host.used_idx = 0; 968c2ecf20Sopenharmony_ci host.called_used_idx = -1; 978c2ecf20Sopenharmony_ci for (i = 0; i < ring_size; ++i) { 988c2ecf20Sopenharmony_ci struct desc desc = { 998c2ecf20Sopenharmony_ci .index = i, 1008c2ecf20Sopenharmony_ci }; 1018c2ecf20Sopenharmony_ci ring[i] = desc; 1028c2ecf20Sopenharmony_ci } 1038c2ecf20Sopenharmony_ci guest.num_free = ring_size; 1048c2ecf20Sopenharmony_ci data = calloc(ring_size, sizeof(*data)); 1058c2ecf20Sopenharmony_ci if (!data) { 1068c2ecf20Sopenharmony_ci perror("Unable to allocate data buffer.\n"); 1078c2ecf20Sopenharmony_ci exit(3); 1088c2ecf20Sopenharmony_ci } 1098c2ecf20Sopenharmony_ci} 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci/* guest side */ 1128c2ecf20Sopenharmony_ciint add_inbuf(unsigned len, void *buf, void *datap) 1138c2ecf20Sopenharmony_ci{ 1148c2ecf20Sopenharmony_ci unsigned head, index; 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci if (!guest.num_free) 1178c2ecf20Sopenharmony_ci return -1; 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci guest.num_free--; 1208c2ecf20Sopenharmony_ci head = (ring_size - 1) & (guest.avail_idx++); 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci /* Start with a write. On MESI architectures this helps 1238c2ecf20Sopenharmony_ci * avoid a shared state with consumer that is polling this descriptor. 1248c2ecf20Sopenharmony_ci */ 1258c2ecf20Sopenharmony_ci ring[head].addr = (unsigned long)(void*)buf; 1268c2ecf20Sopenharmony_ci ring[head].len = len; 1278c2ecf20Sopenharmony_ci /* read below might bypass write above. That is OK because it's just an 1288c2ecf20Sopenharmony_ci * optimization. If this happens, we will get the cache line in a 1298c2ecf20Sopenharmony_ci * shared state which is unfortunate, but probably not worth it to 1308c2ecf20Sopenharmony_ci * add an explicit full barrier to avoid this. 1318c2ecf20Sopenharmony_ci */ 1328c2ecf20Sopenharmony_ci barrier(); 1338c2ecf20Sopenharmony_ci index = ring[head].index; 1348c2ecf20Sopenharmony_ci data[index].buf = buf; 1358c2ecf20Sopenharmony_ci data[index].data = datap; 1368c2ecf20Sopenharmony_ci /* Barrier A (for pairing) */ 1378c2ecf20Sopenharmony_ci smp_release(); 1388c2ecf20Sopenharmony_ci ring[head].flags = DESC_HW; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_ci return 0; 1418c2ecf20Sopenharmony_ci} 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_civoid *get_buf(unsigned *lenp, void **bufp) 1448c2ecf20Sopenharmony_ci{ 1458c2ecf20Sopenharmony_ci unsigned head = (ring_size - 1) & guest.last_used_idx; 1468c2ecf20Sopenharmony_ci unsigned index; 1478c2ecf20Sopenharmony_ci void *datap; 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci if (ring[head].flags & DESC_HW) 1508c2ecf20Sopenharmony_ci return NULL; 1518c2ecf20Sopenharmony_ci /* Barrier B (for pairing) */ 1528c2ecf20Sopenharmony_ci smp_acquire(); 1538c2ecf20Sopenharmony_ci *lenp = ring[head].len; 1548c2ecf20Sopenharmony_ci index = ring[head].index & (ring_size - 1); 1558c2ecf20Sopenharmony_ci datap = data[index].data; 1568c2ecf20Sopenharmony_ci *bufp = data[index].buf; 1578c2ecf20Sopenharmony_ci data[index].buf = NULL; 1588c2ecf20Sopenharmony_ci data[index].data = NULL; 1598c2ecf20Sopenharmony_ci guest.num_free++; 1608c2ecf20Sopenharmony_ci guest.last_used_idx++; 1618c2ecf20Sopenharmony_ci return datap; 1628c2ecf20Sopenharmony_ci} 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_cibool used_empty() 1658c2ecf20Sopenharmony_ci{ 1668c2ecf20Sopenharmony_ci unsigned head = (ring_size - 1) & guest.last_used_idx; 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci return (ring[head].flags & DESC_HW); 1698c2ecf20Sopenharmony_ci} 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_civoid disable_call() 1728c2ecf20Sopenharmony_ci{ 1738c2ecf20Sopenharmony_ci /* Doing nothing to disable calls might cause 1748c2ecf20Sopenharmony_ci * extra interrupts, but reduces the number of cache misses. 1758c2ecf20Sopenharmony_ci */ 1768c2ecf20Sopenharmony_ci} 1778c2ecf20Sopenharmony_ci 1788c2ecf20Sopenharmony_cibool enable_call() 1798c2ecf20Sopenharmony_ci{ 1808c2ecf20Sopenharmony_ci event->call_index = guest.last_used_idx; 1818c2ecf20Sopenharmony_ci /* Flush call index write */ 1828c2ecf20Sopenharmony_ci /* Barrier D (for pairing) */ 1838c2ecf20Sopenharmony_ci smp_mb(); 1848c2ecf20Sopenharmony_ci return used_empty(); 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_civoid kick_available(void) 1888c2ecf20Sopenharmony_ci{ 1898c2ecf20Sopenharmony_ci bool need; 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci /* Flush in previous flags write */ 1928c2ecf20Sopenharmony_ci /* Barrier C (for pairing) */ 1938c2ecf20Sopenharmony_ci smp_mb(); 1948c2ecf20Sopenharmony_ci need = need_event(event->kick_index, 1958c2ecf20Sopenharmony_ci guest.avail_idx, 1968c2ecf20Sopenharmony_ci guest.kicked_avail_idx); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci guest.kicked_avail_idx = guest.avail_idx; 1998c2ecf20Sopenharmony_ci if (need) 2008c2ecf20Sopenharmony_ci kick(); 2018c2ecf20Sopenharmony_ci} 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci/* host side */ 2048c2ecf20Sopenharmony_civoid disable_kick() 2058c2ecf20Sopenharmony_ci{ 2068c2ecf20Sopenharmony_ci /* Doing nothing to disable kicks might cause 2078c2ecf20Sopenharmony_ci * extra interrupts, but reduces the number of cache misses. 2088c2ecf20Sopenharmony_ci */ 2098c2ecf20Sopenharmony_ci} 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_cibool enable_kick() 2128c2ecf20Sopenharmony_ci{ 2138c2ecf20Sopenharmony_ci event->kick_index = host.used_idx; 2148c2ecf20Sopenharmony_ci /* Barrier C (for pairing) */ 2158c2ecf20Sopenharmony_ci smp_mb(); 2168c2ecf20Sopenharmony_ci return avail_empty(); 2178c2ecf20Sopenharmony_ci} 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_cibool avail_empty() 2208c2ecf20Sopenharmony_ci{ 2218c2ecf20Sopenharmony_ci unsigned head = (ring_size - 1) & host.used_idx; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci return !(ring[head].flags & DESC_HW); 2248c2ecf20Sopenharmony_ci} 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_cibool use_buf(unsigned *lenp, void **bufp) 2278c2ecf20Sopenharmony_ci{ 2288c2ecf20Sopenharmony_ci unsigned head = (ring_size - 1) & host.used_idx; 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci if (!(ring[head].flags & DESC_HW)) 2318c2ecf20Sopenharmony_ci return false; 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci /* make sure length read below is not speculated */ 2348c2ecf20Sopenharmony_ci /* Barrier A (for pairing) */ 2358c2ecf20Sopenharmony_ci smp_acquire(); 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci /* simple in-order completion: we don't need 2388c2ecf20Sopenharmony_ci * to touch index at all. This also means we 2398c2ecf20Sopenharmony_ci * can just modify the descriptor in-place. 2408c2ecf20Sopenharmony_ci */ 2418c2ecf20Sopenharmony_ci ring[head].len--; 2428c2ecf20Sopenharmony_ci /* Make sure len is valid before flags. 2438c2ecf20Sopenharmony_ci * Note: alternative is to write len and flags in one access - 2448c2ecf20Sopenharmony_ci * possible on 64 bit architectures but wmb is free on Intel anyway 2458c2ecf20Sopenharmony_ci * so I have no way to test whether it's a gain. 2468c2ecf20Sopenharmony_ci */ 2478c2ecf20Sopenharmony_ci /* Barrier B (for pairing) */ 2488c2ecf20Sopenharmony_ci smp_release(); 2498c2ecf20Sopenharmony_ci ring[head].flags = 0; 2508c2ecf20Sopenharmony_ci host.used_idx++; 2518c2ecf20Sopenharmony_ci return true; 2528c2ecf20Sopenharmony_ci} 2538c2ecf20Sopenharmony_ci 2548c2ecf20Sopenharmony_civoid call_used(void) 2558c2ecf20Sopenharmony_ci{ 2568c2ecf20Sopenharmony_ci bool need; 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci /* Flush in previous flags write */ 2598c2ecf20Sopenharmony_ci /* Barrier D (for pairing) */ 2608c2ecf20Sopenharmony_ci smp_mb(); 2618c2ecf20Sopenharmony_ci 2628c2ecf20Sopenharmony_ci need = need_event(event->call_index, 2638c2ecf20Sopenharmony_ci host.used_idx, 2648c2ecf20Sopenharmony_ci host.called_used_idx); 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci host.called_used_idx = host.used_idx; 2678c2ecf20Sopenharmony_ci 2688c2ecf20Sopenharmony_ci if (need) 2698c2ecf20Sopenharmony_ci call(); 2708c2ecf20Sopenharmony_ci} 271