18c2ecf20Sopenharmony_ci#include <sys/types.h> 28c2ecf20Sopenharmony_ci#include <sys/stat.h> 38c2ecf20Sopenharmony_ci#include <sys/mman.h> 48c2ecf20Sopenharmony_ci#include <unistd.h> 58c2ecf20Sopenharmony_ci#include <errno.h> 68c2ecf20Sopenharmony_ci#include <string.h> 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include "liburing.h" 98c2ecf20Sopenharmony_ci#include "barrier.h" 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_cistatic int __io_uring_get_cqe(struct io_uring *ring, 128c2ecf20Sopenharmony_ci struct io_uring_cqe **cqe_ptr, int wait) 138c2ecf20Sopenharmony_ci{ 148c2ecf20Sopenharmony_ci struct io_uring_cq *cq = &ring->cq; 158c2ecf20Sopenharmony_ci const unsigned mask = *cq->kring_mask; 168c2ecf20Sopenharmony_ci unsigned head; 178c2ecf20Sopenharmony_ci int ret; 188c2ecf20Sopenharmony_ci 198c2ecf20Sopenharmony_ci *cqe_ptr = NULL; 208c2ecf20Sopenharmony_ci head = *cq->khead; 218c2ecf20Sopenharmony_ci do { 228c2ecf20Sopenharmony_ci /* 238c2ecf20Sopenharmony_ci * It's necessary to use a read_barrier() before reading 248c2ecf20Sopenharmony_ci * the CQ tail, since the kernel updates it locklessly. The 258c2ecf20Sopenharmony_ci * kernel has the matching store barrier for the update. The 268c2ecf20Sopenharmony_ci * kernel also ensures that previous stores to CQEs are ordered 278c2ecf20Sopenharmony_ci * with the tail update. 288c2ecf20Sopenharmony_ci */ 298c2ecf20Sopenharmony_ci read_barrier(); 308c2ecf20Sopenharmony_ci if (head != *cq->ktail) { 318c2ecf20Sopenharmony_ci *cqe_ptr = &cq->cqes[head & mask]; 328c2ecf20Sopenharmony_ci break; 338c2ecf20Sopenharmony_ci } 348c2ecf20Sopenharmony_ci if (!wait) 358c2ecf20Sopenharmony_ci break; 368c2ecf20Sopenharmony_ci ret = io_uring_enter(ring->ring_fd, 0, 1, 378c2ecf20Sopenharmony_ci IORING_ENTER_GETEVENTS, NULL); 388c2ecf20Sopenharmony_ci if (ret < 0) 398c2ecf20Sopenharmony_ci return -errno; 408c2ecf20Sopenharmony_ci } while (1); 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_ci return 0; 438c2ecf20Sopenharmony_ci} 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_ci/* 468c2ecf20Sopenharmony_ci * Return an IO completion, if one is readily available. Returns 0 with 478c2ecf20Sopenharmony_ci * cqe_ptr filled in on success, -errno on failure. 488c2ecf20Sopenharmony_ci */ 498c2ecf20Sopenharmony_ciint io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) 508c2ecf20Sopenharmony_ci{ 518c2ecf20Sopenharmony_ci return __io_uring_get_cqe(ring, cqe_ptr, 0); 528c2ecf20Sopenharmony_ci} 538c2ecf20Sopenharmony_ci 548c2ecf20Sopenharmony_ci/* 558c2ecf20Sopenharmony_ci * Return an IO completion, waiting for it if necessary. Returns 0 with 568c2ecf20Sopenharmony_ci * cqe_ptr filled in on success, -errno on failure. 578c2ecf20Sopenharmony_ci */ 588c2ecf20Sopenharmony_ciint io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci return __io_uring_get_cqe(ring, cqe_ptr, 1); 618c2ecf20Sopenharmony_ci} 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci/* 648c2ecf20Sopenharmony_ci * Submit sqes acquired from io_uring_get_sqe() to the kernel. 658c2ecf20Sopenharmony_ci * 668c2ecf20Sopenharmony_ci * Returns number of sqes submitted 678c2ecf20Sopenharmony_ci */ 688c2ecf20Sopenharmony_ciint io_uring_submit(struct io_uring *ring) 698c2ecf20Sopenharmony_ci{ 708c2ecf20Sopenharmony_ci struct io_uring_sq *sq = &ring->sq; 718c2ecf20Sopenharmony_ci const unsigned mask = *sq->kring_mask; 728c2ecf20Sopenharmony_ci unsigned ktail, ktail_next, submitted, to_submit; 738c2ecf20Sopenharmony_ci int ret; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_ci /* 768c2ecf20Sopenharmony_ci * If we have pending IO in the kring, submit it first. We need a 778c2ecf20Sopenharmony_ci * read barrier here to match the kernels store barrier when updating 788c2ecf20Sopenharmony_ci * the SQ head. 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_ci read_barrier(); 818c2ecf20Sopenharmony_ci if (*sq->khead != *sq->ktail) { 828c2ecf20Sopenharmony_ci submitted = *sq->kring_entries; 838c2ecf20Sopenharmony_ci goto submit; 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci if (sq->sqe_head == sq->sqe_tail) 878c2ecf20Sopenharmony_ci return 0; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci /* 908c2ecf20Sopenharmony_ci * Fill in sqes that we have queued up, adding them to the kernel ring 918c2ecf20Sopenharmony_ci */ 928c2ecf20Sopenharmony_ci submitted = 0; 938c2ecf20Sopenharmony_ci ktail = ktail_next = *sq->ktail; 948c2ecf20Sopenharmony_ci to_submit = sq->sqe_tail - sq->sqe_head; 958c2ecf20Sopenharmony_ci while (to_submit--) { 968c2ecf20Sopenharmony_ci ktail_next++; 978c2ecf20Sopenharmony_ci read_barrier(); 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci sq->array[ktail & mask] = sq->sqe_head & mask; 1008c2ecf20Sopenharmony_ci ktail = ktail_next; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci sq->sqe_head++; 1038c2ecf20Sopenharmony_ci submitted++; 1048c2ecf20Sopenharmony_ci } 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci if (!submitted) 1078c2ecf20Sopenharmony_ci return 0; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci if (*sq->ktail != ktail) { 1108c2ecf20Sopenharmony_ci /* 1118c2ecf20Sopenharmony_ci * First write barrier ensures that the SQE stores are updated 1128c2ecf20Sopenharmony_ci * with the tail update. This is needed so that the kernel 1138c2ecf20Sopenharmony_ci * will never see a tail update without the preceeding sQE 1148c2ecf20Sopenharmony_ci * stores being done. 1158c2ecf20Sopenharmony_ci */ 1168c2ecf20Sopenharmony_ci write_barrier(); 1178c2ecf20Sopenharmony_ci *sq->ktail = ktail; 1188c2ecf20Sopenharmony_ci /* 1198c2ecf20Sopenharmony_ci * The kernel has the matching read barrier for reading the 1208c2ecf20Sopenharmony_ci * SQ tail. 1218c2ecf20Sopenharmony_ci */ 1228c2ecf20Sopenharmony_ci write_barrier(); 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_cisubmit: 1268c2ecf20Sopenharmony_ci ret = io_uring_enter(ring->ring_fd, submitted, 0, 1278c2ecf20Sopenharmony_ci IORING_ENTER_GETEVENTS, NULL); 1288c2ecf20Sopenharmony_ci if (ret < 0) 1298c2ecf20Sopenharmony_ci return -errno; 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci return ret; 1328c2ecf20Sopenharmony_ci} 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci/* 1358c2ecf20Sopenharmony_ci * Return an sqe to fill. Application must later call io_uring_submit() 1368c2ecf20Sopenharmony_ci * when it's ready to tell the kernel about it. The caller may call this 1378c2ecf20Sopenharmony_ci * function multiple times before calling io_uring_submit(). 1388c2ecf20Sopenharmony_ci * 1398c2ecf20Sopenharmony_ci * Returns a vacant sqe, or NULL if we're full. 1408c2ecf20Sopenharmony_ci */ 1418c2ecf20Sopenharmony_cistruct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) 1428c2ecf20Sopenharmony_ci{ 1438c2ecf20Sopenharmony_ci struct io_uring_sq *sq = &ring->sq; 1448c2ecf20Sopenharmony_ci unsigned next = sq->sqe_tail + 1; 1458c2ecf20Sopenharmony_ci struct io_uring_sqe *sqe; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci /* 1488c2ecf20Sopenharmony_ci * All sqes are used 1498c2ecf20Sopenharmony_ci */ 1508c2ecf20Sopenharmony_ci if (next - sq->sqe_head > *sq->kring_entries) 1518c2ecf20Sopenharmony_ci return NULL; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask]; 1548c2ecf20Sopenharmony_ci sq->sqe_tail = next; 1558c2ecf20Sopenharmony_ci return sqe; 1568c2ecf20Sopenharmony_ci} 157