18c2ecf20Sopenharmony_ci#include <sys/types.h>
28c2ecf20Sopenharmony_ci#include <sys/stat.h>
38c2ecf20Sopenharmony_ci#include <sys/mman.h>
48c2ecf20Sopenharmony_ci#include <unistd.h>
58c2ecf20Sopenharmony_ci#include <errno.h>
68c2ecf20Sopenharmony_ci#include <string.h>
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include "liburing.h"
98c2ecf20Sopenharmony_ci#include "barrier.h"
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_cistatic int __io_uring_get_cqe(struct io_uring *ring,
128c2ecf20Sopenharmony_ci			      struct io_uring_cqe **cqe_ptr, int wait)
138c2ecf20Sopenharmony_ci{
148c2ecf20Sopenharmony_ci	struct io_uring_cq *cq = &ring->cq;
158c2ecf20Sopenharmony_ci	const unsigned mask = *cq->kring_mask;
168c2ecf20Sopenharmony_ci	unsigned head;
178c2ecf20Sopenharmony_ci	int ret;
188c2ecf20Sopenharmony_ci
198c2ecf20Sopenharmony_ci	*cqe_ptr = NULL;
208c2ecf20Sopenharmony_ci	head = *cq->khead;
218c2ecf20Sopenharmony_ci	do {
228c2ecf20Sopenharmony_ci		/*
238c2ecf20Sopenharmony_ci		 * It's necessary to use a read_barrier() before reading
248c2ecf20Sopenharmony_ci		 * the CQ tail, since the kernel updates it locklessly. The
258c2ecf20Sopenharmony_ci		 * kernel has the matching store barrier for the update. The
268c2ecf20Sopenharmony_ci		 * kernel also ensures that previous stores to CQEs are ordered
278c2ecf20Sopenharmony_ci		 * with the tail update.
288c2ecf20Sopenharmony_ci		 */
298c2ecf20Sopenharmony_ci		read_barrier();
308c2ecf20Sopenharmony_ci		if (head != *cq->ktail) {
318c2ecf20Sopenharmony_ci			*cqe_ptr = &cq->cqes[head & mask];
328c2ecf20Sopenharmony_ci			break;
338c2ecf20Sopenharmony_ci		}
348c2ecf20Sopenharmony_ci		if (!wait)
358c2ecf20Sopenharmony_ci			break;
368c2ecf20Sopenharmony_ci		ret = io_uring_enter(ring->ring_fd, 0, 1,
378c2ecf20Sopenharmony_ci					IORING_ENTER_GETEVENTS, NULL);
388c2ecf20Sopenharmony_ci		if (ret < 0)
398c2ecf20Sopenharmony_ci			return -errno;
408c2ecf20Sopenharmony_ci	} while (1);
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci	return 0;
438c2ecf20Sopenharmony_ci}
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci/*
468c2ecf20Sopenharmony_ci * Return an IO completion, if one is readily available. Returns 0 with
478c2ecf20Sopenharmony_ci * cqe_ptr filled in on success, -errno on failure.
488c2ecf20Sopenharmony_ci */
498c2ecf20Sopenharmony_ciint io_uring_peek_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
508c2ecf20Sopenharmony_ci{
518c2ecf20Sopenharmony_ci	return __io_uring_get_cqe(ring, cqe_ptr, 0);
528c2ecf20Sopenharmony_ci}
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci/*
558c2ecf20Sopenharmony_ci * Return an IO completion, waiting for it if necessary. Returns 0 with
568c2ecf20Sopenharmony_ci * cqe_ptr filled in on success, -errno on failure.
578c2ecf20Sopenharmony_ci */
588c2ecf20Sopenharmony_ciint io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	return __io_uring_get_cqe(ring, cqe_ptr, 1);
618c2ecf20Sopenharmony_ci}
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci/*
648c2ecf20Sopenharmony_ci * Submit sqes acquired from io_uring_get_sqe() to the kernel.
658c2ecf20Sopenharmony_ci *
668c2ecf20Sopenharmony_ci * Returns number of sqes submitted
678c2ecf20Sopenharmony_ci */
688c2ecf20Sopenharmony_ciint io_uring_submit(struct io_uring *ring)
698c2ecf20Sopenharmony_ci{
708c2ecf20Sopenharmony_ci	struct io_uring_sq *sq = &ring->sq;
718c2ecf20Sopenharmony_ci	const unsigned mask = *sq->kring_mask;
728c2ecf20Sopenharmony_ci	unsigned ktail, ktail_next, submitted, to_submit;
738c2ecf20Sopenharmony_ci	int ret;
748c2ecf20Sopenharmony_ci
758c2ecf20Sopenharmony_ci	/*
768c2ecf20Sopenharmony_ci	 * If we have pending IO in the kring, submit it first. We need a
778c2ecf20Sopenharmony_ci	 * read barrier here to match the kernels store barrier when updating
788c2ecf20Sopenharmony_ci	 * the SQ head.
798c2ecf20Sopenharmony_ci	 */
808c2ecf20Sopenharmony_ci	read_barrier();
818c2ecf20Sopenharmony_ci	if (*sq->khead != *sq->ktail) {
828c2ecf20Sopenharmony_ci		submitted = *sq->kring_entries;
838c2ecf20Sopenharmony_ci		goto submit;
848c2ecf20Sopenharmony_ci	}
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	if (sq->sqe_head == sq->sqe_tail)
878c2ecf20Sopenharmony_ci		return 0;
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	/*
908c2ecf20Sopenharmony_ci	 * Fill in sqes that we have queued up, adding them to the kernel ring
918c2ecf20Sopenharmony_ci	 */
928c2ecf20Sopenharmony_ci	submitted = 0;
938c2ecf20Sopenharmony_ci	ktail = ktail_next = *sq->ktail;
948c2ecf20Sopenharmony_ci	to_submit = sq->sqe_tail - sq->sqe_head;
958c2ecf20Sopenharmony_ci	while (to_submit--) {
968c2ecf20Sopenharmony_ci		ktail_next++;
978c2ecf20Sopenharmony_ci		read_barrier();
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci		sq->array[ktail & mask] = sq->sqe_head & mask;
1008c2ecf20Sopenharmony_ci		ktail = ktail_next;
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci		sq->sqe_head++;
1038c2ecf20Sopenharmony_ci		submitted++;
1048c2ecf20Sopenharmony_ci	}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	if (!submitted)
1078c2ecf20Sopenharmony_ci		return 0;
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	if (*sq->ktail != ktail) {
1108c2ecf20Sopenharmony_ci		/*
1118c2ecf20Sopenharmony_ci		 * First write barrier ensures that the SQE stores are updated
1128c2ecf20Sopenharmony_ci		 * with the tail update. This is needed so that the kernel
1138c2ecf20Sopenharmony_ci		 * will never see a tail update without the preceeding sQE
1148c2ecf20Sopenharmony_ci		 * stores being done.
1158c2ecf20Sopenharmony_ci		 */
1168c2ecf20Sopenharmony_ci		write_barrier();
1178c2ecf20Sopenharmony_ci		*sq->ktail = ktail;
1188c2ecf20Sopenharmony_ci		/*
1198c2ecf20Sopenharmony_ci		 * The kernel has the matching read barrier for reading the
1208c2ecf20Sopenharmony_ci		 * SQ tail.
1218c2ecf20Sopenharmony_ci		 */
1228c2ecf20Sopenharmony_ci		write_barrier();
1238c2ecf20Sopenharmony_ci	}
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_cisubmit:
1268c2ecf20Sopenharmony_ci	ret = io_uring_enter(ring->ring_fd, submitted, 0,
1278c2ecf20Sopenharmony_ci				IORING_ENTER_GETEVENTS, NULL);
1288c2ecf20Sopenharmony_ci	if (ret < 0)
1298c2ecf20Sopenharmony_ci		return -errno;
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci	return ret;
1328c2ecf20Sopenharmony_ci}
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci/*
1358c2ecf20Sopenharmony_ci * Return an sqe to fill. Application must later call io_uring_submit()
1368c2ecf20Sopenharmony_ci * when it's ready to tell the kernel about it. The caller may call this
1378c2ecf20Sopenharmony_ci * function multiple times before calling io_uring_submit().
1388c2ecf20Sopenharmony_ci *
1398c2ecf20Sopenharmony_ci * Returns a vacant sqe, or NULL if we're full.
1408c2ecf20Sopenharmony_ci */
1418c2ecf20Sopenharmony_cistruct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
1428c2ecf20Sopenharmony_ci{
1438c2ecf20Sopenharmony_ci	struct io_uring_sq *sq = &ring->sq;
1448c2ecf20Sopenharmony_ci	unsigned next = sq->sqe_tail + 1;
1458c2ecf20Sopenharmony_ci	struct io_uring_sqe *sqe;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	/*
1488c2ecf20Sopenharmony_ci	 * All sqes are used
1498c2ecf20Sopenharmony_ci	 */
1508c2ecf20Sopenharmony_ci	if (next - sq->sqe_head > *sq->kring_entries)
1518c2ecf20Sopenharmony_ci		return NULL;
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask];
1548c2ecf20Sopenharmony_ci	sq->sqe_tail = next;
1558c2ecf20Sopenharmony_ci	return sqe;
1568c2ecf20Sopenharmony_ci}
157