18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Simple benchmark program that uses the various features of io_uring
48c2ecf20Sopenharmony_ci * to provide fast random access to a device/file. It has various
58c2ecf20Sopenharmony_ci * options that are control how we use io_uring, see the OPTIONS section
68c2ecf20Sopenharmony_ci * below. This uses the raw io_uring interface.
78c2ecf20Sopenharmony_ci *
88c2ecf20Sopenharmony_ci * Copyright (C) 2018-2019 Jens Axboe
98c2ecf20Sopenharmony_ci */
108c2ecf20Sopenharmony_ci#include <stdio.h>
118c2ecf20Sopenharmony_ci#include <errno.h>
128c2ecf20Sopenharmony_ci#include <assert.h>
138c2ecf20Sopenharmony_ci#include <stdlib.h>
148c2ecf20Sopenharmony_ci#include <stddef.h>
158c2ecf20Sopenharmony_ci#include <signal.h>
168c2ecf20Sopenharmony_ci#include <inttypes.h>
178c2ecf20Sopenharmony_ci
188c2ecf20Sopenharmony_ci#include <sys/types.h>
198c2ecf20Sopenharmony_ci#include <sys/stat.h>
208c2ecf20Sopenharmony_ci#include <sys/ioctl.h>
218c2ecf20Sopenharmony_ci#include <sys/syscall.h>
228c2ecf20Sopenharmony_ci#include <sys/resource.h>
238c2ecf20Sopenharmony_ci#include <sys/mman.h>
248c2ecf20Sopenharmony_ci#include <sys/uio.h>
258c2ecf20Sopenharmony_ci#include <linux/fs.h>
268c2ecf20Sopenharmony_ci#include <fcntl.h>
278c2ecf20Sopenharmony_ci#include <unistd.h>
288c2ecf20Sopenharmony_ci#include <string.h>
298c2ecf20Sopenharmony_ci#include <pthread.h>
308c2ecf20Sopenharmony_ci#include <sched.h>
318c2ecf20Sopenharmony_ci
328c2ecf20Sopenharmony_ci#include "liburing.h"
338c2ecf20Sopenharmony_ci#include "barrier.h"
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_ci#define min(a, b)		((a < b) ? (a) : (b))
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_cistruct io_sq_ring {
388c2ecf20Sopenharmony_ci	unsigned *head;
398c2ecf20Sopenharmony_ci	unsigned *tail;
408c2ecf20Sopenharmony_ci	unsigned *ring_mask;
418c2ecf20Sopenharmony_ci	unsigned *ring_entries;
428c2ecf20Sopenharmony_ci	unsigned *flags;
438c2ecf20Sopenharmony_ci	unsigned *array;
448c2ecf20Sopenharmony_ci};
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_cistruct io_cq_ring {
478c2ecf20Sopenharmony_ci	unsigned *head;
488c2ecf20Sopenharmony_ci	unsigned *tail;
498c2ecf20Sopenharmony_ci	unsigned *ring_mask;
508c2ecf20Sopenharmony_ci	unsigned *ring_entries;
518c2ecf20Sopenharmony_ci	struct io_uring_cqe *cqes;
528c2ecf20Sopenharmony_ci};
538c2ecf20Sopenharmony_ci
548c2ecf20Sopenharmony_ci#define DEPTH			128
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci#define BATCH_SUBMIT		32
578c2ecf20Sopenharmony_ci#define BATCH_COMPLETE		32
588c2ecf20Sopenharmony_ci
598c2ecf20Sopenharmony_ci#define BS			4096
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci#define MAX_FDS			16
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_cistatic unsigned sq_ring_mask, cq_ring_mask;
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_cistruct file {
668c2ecf20Sopenharmony_ci	unsigned long max_blocks;
678c2ecf20Sopenharmony_ci	unsigned pending_ios;
688c2ecf20Sopenharmony_ci	int real_fd;
698c2ecf20Sopenharmony_ci	int fixed_fd;
708c2ecf20Sopenharmony_ci};
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_cistruct submitter {
738c2ecf20Sopenharmony_ci	pthread_t thread;
748c2ecf20Sopenharmony_ci	int ring_fd;
758c2ecf20Sopenharmony_ci	struct drand48_data rand;
768c2ecf20Sopenharmony_ci	struct io_sq_ring sq_ring;
778c2ecf20Sopenharmony_ci	struct io_uring_sqe *sqes;
788c2ecf20Sopenharmony_ci	struct iovec iovecs[DEPTH];
798c2ecf20Sopenharmony_ci	struct io_cq_ring cq_ring;
808c2ecf20Sopenharmony_ci	int inflight;
818c2ecf20Sopenharmony_ci	unsigned long reaps;
828c2ecf20Sopenharmony_ci	unsigned long done;
838c2ecf20Sopenharmony_ci	unsigned long calls;
848c2ecf20Sopenharmony_ci	volatile int finish;
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	__s32 *fds;
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci	struct file files[MAX_FDS];
898c2ecf20Sopenharmony_ci	unsigned nr_files;
908c2ecf20Sopenharmony_ci	unsigned cur_file;
918c2ecf20Sopenharmony_ci};
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_cistatic struct submitter submitters[1];
948c2ecf20Sopenharmony_cistatic volatile int finish;
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci/*
978c2ecf20Sopenharmony_ci * OPTIONS: Set these to test the various features of io_uring.
988c2ecf20Sopenharmony_ci */
998c2ecf20Sopenharmony_cistatic int polled = 1;		/* use IO polling */
1008c2ecf20Sopenharmony_cistatic int fixedbufs = 1;	/* use fixed user buffers */
1018c2ecf20Sopenharmony_cistatic int register_files = 1;	/* use fixed files */
1028c2ecf20Sopenharmony_cistatic int buffered = 0;	/* use buffered IO, not O_DIRECT */
1038c2ecf20Sopenharmony_cistatic int sq_thread_poll = 0;	/* use kernel submission/poller thread */
1048c2ecf20Sopenharmony_cistatic int sq_thread_cpu = -1;	/* pin above thread to this CPU */
1058c2ecf20Sopenharmony_cistatic int do_nop = 0;		/* no-op SQ ring commands */
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_cistatic int io_uring_register_buffers(struct submitter *s)
1088c2ecf20Sopenharmony_ci{
1098c2ecf20Sopenharmony_ci	if (do_nop)
1108c2ecf20Sopenharmony_ci		return 0;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	return io_uring_register(s->ring_fd, IORING_REGISTER_BUFFERS, s->iovecs,
1138c2ecf20Sopenharmony_ci					DEPTH);
1148c2ecf20Sopenharmony_ci}
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_cistatic int io_uring_register_files(struct submitter *s)
1178c2ecf20Sopenharmony_ci{
1188c2ecf20Sopenharmony_ci	unsigned i;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	if (do_nop)
1218c2ecf20Sopenharmony_ci		return 0;
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	s->fds = calloc(s->nr_files, sizeof(__s32));
1248c2ecf20Sopenharmony_ci	for (i = 0; i < s->nr_files; i++) {
1258c2ecf20Sopenharmony_ci		s->fds[i] = s->files[i].real_fd;
1268c2ecf20Sopenharmony_ci		s->files[i].fixed_fd = i;
1278c2ecf20Sopenharmony_ci	}
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	return io_uring_register(s->ring_fd, IORING_REGISTER_FILES, s->fds,
1308c2ecf20Sopenharmony_ci					s->nr_files);
1318c2ecf20Sopenharmony_ci}
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_cistatic int lk_gettid(void)
1348c2ecf20Sopenharmony_ci{
1358c2ecf20Sopenharmony_ci	return syscall(__NR_gettid);
1368c2ecf20Sopenharmony_ci}
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_cistatic unsigned file_depth(struct submitter *s)
1398c2ecf20Sopenharmony_ci{
1408c2ecf20Sopenharmony_ci	return (DEPTH + s->nr_files - 1) / s->nr_files;
1418c2ecf20Sopenharmony_ci}
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_cistatic void init_io(struct submitter *s, unsigned index)
1448c2ecf20Sopenharmony_ci{
1458c2ecf20Sopenharmony_ci	struct io_uring_sqe *sqe = &s->sqes[index];
1468c2ecf20Sopenharmony_ci	unsigned long offset;
1478c2ecf20Sopenharmony_ci	struct file *f;
1488c2ecf20Sopenharmony_ci	long r;
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ci	if (do_nop) {
1518c2ecf20Sopenharmony_ci		sqe->opcode = IORING_OP_NOP;
1528c2ecf20Sopenharmony_ci		return;
1538c2ecf20Sopenharmony_ci	}
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	if (s->nr_files == 1) {
1568c2ecf20Sopenharmony_ci		f = &s->files[0];
1578c2ecf20Sopenharmony_ci	} else {
1588c2ecf20Sopenharmony_ci		f = &s->files[s->cur_file];
1598c2ecf20Sopenharmony_ci		if (f->pending_ios >= file_depth(s)) {
1608c2ecf20Sopenharmony_ci			s->cur_file++;
1618c2ecf20Sopenharmony_ci			if (s->cur_file == s->nr_files)
1628c2ecf20Sopenharmony_ci				s->cur_file = 0;
1638c2ecf20Sopenharmony_ci			f = &s->files[s->cur_file];
1648c2ecf20Sopenharmony_ci		}
1658c2ecf20Sopenharmony_ci	}
1668c2ecf20Sopenharmony_ci	f->pending_ios++;
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	lrand48_r(&s->rand, &r);
1698c2ecf20Sopenharmony_ci	offset = (r % (f->max_blocks - 1)) * BS;
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	if (register_files) {
1728c2ecf20Sopenharmony_ci		sqe->flags = IOSQE_FIXED_FILE;
1738c2ecf20Sopenharmony_ci		sqe->fd = f->fixed_fd;
1748c2ecf20Sopenharmony_ci	} else {
1758c2ecf20Sopenharmony_ci		sqe->flags = 0;
1768c2ecf20Sopenharmony_ci		sqe->fd = f->real_fd;
1778c2ecf20Sopenharmony_ci	}
1788c2ecf20Sopenharmony_ci	if (fixedbufs) {
1798c2ecf20Sopenharmony_ci		sqe->opcode = IORING_OP_READ_FIXED;
1808c2ecf20Sopenharmony_ci		sqe->addr = (unsigned long) s->iovecs[index].iov_base;
1818c2ecf20Sopenharmony_ci		sqe->len = BS;
1828c2ecf20Sopenharmony_ci		sqe->buf_index = index;
1838c2ecf20Sopenharmony_ci	} else {
1848c2ecf20Sopenharmony_ci		sqe->opcode = IORING_OP_READV;
1858c2ecf20Sopenharmony_ci		sqe->addr = (unsigned long) &s->iovecs[index];
1868c2ecf20Sopenharmony_ci		sqe->len = 1;
1878c2ecf20Sopenharmony_ci		sqe->buf_index = 0;
1888c2ecf20Sopenharmony_ci	}
1898c2ecf20Sopenharmony_ci	sqe->ioprio = 0;
1908c2ecf20Sopenharmony_ci	sqe->off = offset;
1918c2ecf20Sopenharmony_ci	sqe->user_data = (unsigned long) f;
1928c2ecf20Sopenharmony_ci}
1938c2ecf20Sopenharmony_ci
1948c2ecf20Sopenharmony_cistatic int prep_more_ios(struct submitter *s, unsigned max_ios)
1958c2ecf20Sopenharmony_ci{
1968c2ecf20Sopenharmony_ci	struct io_sq_ring *ring = &s->sq_ring;
1978c2ecf20Sopenharmony_ci	unsigned index, tail, next_tail, prepped = 0;
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	next_tail = tail = *ring->tail;
2008c2ecf20Sopenharmony_ci	do {
2018c2ecf20Sopenharmony_ci		next_tail++;
2028c2ecf20Sopenharmony_ci		read_barrier();
2038c2ecf20Sopenharmony_ci		if (next_tail == *ring->head)
2048c2ecf20Sopenharmony_ci			break;
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci		index = tail & sq_ring_mask;
2078c2ecf20Sopenharmony_ci		init_io(s, index);
2088c2ecf20Sopenharmony_ci		ring->array[index] = index;
2098c2ecf20Sopenharmony_ci		prepped++;
2108c2ecf20Sopenharmony_ci		tail = next_tail;
2118c2ecf20Sopenharmony_ci	} while (prepped < max_ios);
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_ci	if (*ring->tail != tail) {
2148c2ecf20Sopenharmony_ci		/* order tail store with writes to sqes above */
2158c2ecf20Sopenharmony_ci		write_barrier();
2168c2ecf20Sopenharmony_ci		*ring->tail = tail;
2178c2ecf20Sopenharmony_ci		write_barrier();
2188c2ecf20Sopenharmony_ci	}
2198c2ecf20Sopenharmony_ci	return prepped;
2208c2ecf20Sopenharmony_ci}
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_cistatic int get_file_size(struct file *f)
2238c2ecf20Sopenharmony_ci{
2248c2ecf20Sopenharmony_ci	struct stat st;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	if (fstat(f->real_fd, &st) < 0)
2278c2ecf20Sopenharmony_ci		return -1;
2288c2ecf20Sopenharmony_ci	if (S_ISBLK(st.st_mode)) {
2298c2ecf20Sopenharmony_ci		unsigned long long bytes;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci		if (ioctl(f->real_fd, BLKGETSIZE64, &bytes) != 0)
2328c2ecf20Sopenharmony_ci			return -1;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci		f->max_blocks = bytes / BS;
2358c2ecf20Sopenharmony_ci		return 0;
2368c2ecf20Sopenharmony_ci	} else if (S_ISREG(st.st_mode)) {
2378c2ecf20Sopenharmony_ci		f->max_blocks = st.st_size / BS;
2388c2ecf20Sopenharmony_ci		return 0;
2398c2ecf20Sopenharmony_ci	}
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	return -1;
2428c2ecf20Sopenharmony_ci}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_cistatic int reap_events(struct submitter *s)
2458c2ecf20Sopenharmony_ci{
2468c2ecf20Sopenharmony_ci	struct io_cq_ring *ring = &s->cq_ring;
2478c2ecf20Sopenharmony_ci	struct io_uring_cqe *cqe;
2488c2ecf20Sopenharmony_ci	unsigned head, reaped = 0;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	head = *ring->head;
2518c2ecf20Sopenharmony_ci	do {
2528c2ecf20Sopenharmony_ci		struct file *f;
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci		read_barrier();
2558c2ecf20Sopenharmony_ci		if (head == *ring->tail)
2568c2ecf20Sopenharmony_ci			break;
2578c2ecf20Sopenharmony_ci		cqe = &ring->cqes[head & cq_ring_mask];
2588c2ecf20Sopenharmony_ci		if (!do_nop) {
2598c2ecf20Sopenharmony_ci			f = (struct file *) (uintptr_t) cqe->user_data;
2608c2ecf20Sopenharmony_ci			f->pending_ios--;
2618c2ecf20Sopenharmony_ci			if (cqe->res != BS) {
2628c2ecf20Sopenharmony_ci				printf("io: unexpected ret=%d\n", cqe->res);
2638c2ecf20Sopenharmony_ci				if (polled && cqe->res == -EOPNOTSUPP)
2648c2ecf20Sopenharmony_ci					printf("Your filesystem doesn't support poll\n");
2658c2ecf20Sopenharmony_ci				return -1;
2668c2ecf20Sopenharmony_ci			}
2678c2ecf20Sopenharmony_ci		}
2688c2ecf20Sopenharmony_ci		reaped++;
2698c2ecf20Sopenharmony_ci		head++;
2708c2ecf20Sopenharmony_ci	} while (1);
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci	s->inflight -= reaped;
2738c2ecf20Sopenharmony_ci	*ring->head = head;
2748c2ecf20Sopenharmony_ci	write_barrier();
2758c2ecf20Sopenharmony_ci	return reaped;
2768c2ecf20Sopenharmony_ci}
2778c2ecf20Sopenharmony_ci
2788c2ecf20Sopenharmony_cistatic void *submitter_fn(void *data)
2798c2ecf20Sopenharmony_ci{
2808c2ecf20Sopenharmony_ci	struct submitter *s = data;
2818c2ecf20Sopenharmony_ci	struct io_sq_ring *ring = &s->sq_ring;
2828c2ecf20Sopenharmony_ci	int ret, prepped;
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	printf("submitter=%d\n", lk_gettid());
2858c2ecf20Sopenharmony_ci
2868c2ecf20Sopenharmony_ci	srand48_r(pthread_self(), &s->rand);
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci	prepped = 0;
2898c2ecf20Sopenharmony_ci	do {
2908c2ecf20Sopenharmony_ci		int to_wait, to_submit, this_reap, to_prep;
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci		if (!prepped && s->inflight < DEPTH) {
2938c2ecf20Sopenharmony_ci			to_prep = min(DEPTH - s->inflight, BATCH_SUBMIT);
2948c2ecf20Sopenharmony_ci			prepped = prep_more_ios(s, to_prep);
2958c2ecf20Sopenharmony_ci		}
2968c2ecf20Sopenharmony_ci		s->inflight += prepped;
2978c2ecf20Sopenharmony_cisubmit_more:
2988c2ecf20Sopenharmony_ci		to_submit = prepped;
2998c2ecf20Sopenharmony_cisubmit:
3008c2ecf20Sopenharmony_ci		if (to_submit && (s->inflight + to_submit <= DEPTH))
3018c2ecf20Sopenharmony_ci			to_wait = 0;
3028c2ecf20Sopenharmony_ci		else
3038c2ecf20Sopenharmony_ci			to_wait = min(s->inflight + to_submit, BATCH_COMPLETE);
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci		/*
3068c2ecf20Sopenharmony_ci		 * Only need to call io_uring_enter if we're not using SQ thread
3078c2ecf20Sopenharmony_ci		 * poll, or if IORING_SQ_NEED_WAKEUP is set.
3088c2ecf20Sopenharmony_ci		 */
3098c2ecf20Sopenharmony_ci		if (!sq_thread_poll || (*ring->flags & IORING_SQ_NEED_WAKEUP)) {
3108c2ecf20Sopenharmony_ci			unsigned flags = 0;
3118c2ecf20Sopenharmony_ci
3128c2ecf20Sopenharmony_ci			if (to_wait)
3138c2ecf20Sopenharmony_ci				flags = IORING_ENTER_GETEVENTS;
3148c2ecf20Sopenharmony_ci			if ((*ring->flags & IORING_SQ_NEED_WAKEUP))
3158c2ecf20Sopenharmony_ci				flags |= IORING_ENTER_SQ_WAKEUP;
3168c2ecf20Sopenharmony_ci			ret = io_uring_enter(s->ring_fd, to_submit, to_wait,
3178c2ecf20Sopenharmony_ci						flags, NULL);
3188c2ecf20Sopenharmony_ci			s->calls++;
3198c2ecf20Sopenharmony_ci		}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci		/*
3228c2ecf20Sopenharmony_ci		 * For non SQ thread poll, we already got the events we needed
3238c2ecf20Sopenharmony_ci		 * through the io_uring_enter() above. For SQ thread poll, we
3248c2ecf20Sopenharmony_ci		 * need to loop here until we find enough events.
3258c2ecf20Sopenharmony_ci		 */
3268c2ecf20Sopenharmony_ci		this_reap = 0;
3278c2ecf20Sopenharmony_ci		do {
3288c2ecf20Sopenharmony_ci			int r;
3298c2ecf20Sopenharmony_ci			r = reap_events(s);
3308c2ecf20Sopenharmony_ci			if (r == -1) {
3318c2ecf20Sopenharmony_ci				s->finish = 1;
3328c2ecf20Sopenharmony_ci				break;
3338c2ecf20Sopenharmony_ci			} else if (r > 0)
3348c2ecf20Sopenharmony_ci				this_reap += r;
3358c2ecf20Sopenharmony_ci		} while (sq_thread_poll && this_reap < to_wait);
3368c2ecf20Sopenharmony_ci		s->reaps += this_reap;
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci		if (ret >= 0) {
3398c2ecf20Sopenharmony_ci			if (!ret) {
3408c2ecf20Sopenharmony_ci				to_submit = 0;
3418c2ecf20Sopenharmony_ci				if (s->inflight)
3428c2ecf20Sopenharmony_ci					goto submit;
3438c2ecf20Sopenharmony_ci				continue;
3448c2ecf20Sopenharmony_ci			} else if (ret < to_submit) {
3458c2ecf20Sopenharmony_ci				int diff = to_submit - ret;
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci				s->done += ret;
3488c2ecf20Sopenharmony_ci				prepped -= diff;
3498c2ecf20Sopenharmony_ci				goto submit_more;
3508c2ecf20Sopenharmony_ci			}
3518c2ecf20Sopenharmony_ci			s->done += ret;
3528c2ecf20Sopenharmony_ci			prepped = 0;
3538c2ecf20Sopenharmony_ci			continue;
3548c2ecf20Sopenharmony_ci		} else if (ret < 0) {
3558c2ecf20Sopenharmony_ci			if (errno == EAGAIN) {
3568c2ecf20Sopenharmony_ci				if (s->finish)
3578c2ecf20Sopenharmony_ci					break;
3588c2ecf20Sopenharmony_ci				if (this_reap)
3598c2ecf20Sopenharmony_ci					goto submit;
3608c2ecf20Sopenharmony_ci				to_submit = 0;
3618c2ecf20Sopenharmony_ci				goto submit;
3628c2ecf20Sopenharmony_ci			}
3638c2ecf20Sopenharmony_ci			printf("io_submit: %s\n", strerror(errno));
3648c2ecf20Sopenharmony_ci			break;
3658c2ecf20Sopenharmony_ci		}
3668c2ecf20Sopenharmony_ci	} while (!s->finish);
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	finish = 1;
3698c2ecf20Sopenharmony_ci	return NULL;
3708c2ecf20Sopenharmony_ci}
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_cistatic void sig_int(int sig)
3738c2ecf20Sopenharmony_ci{
3748c2ecf20Sopenharmony_ci	printf("Exiting on signal %d\n", sig);
3758c2ecf20Sopenharmony_ci	submitters[0].finish = 1;
3768c2ecf20Sopenharmony_ci	finish = 1;
3778c2ecf20Sopenharmony_ci}
3788c2ecf20Sopenharmony_ci
3798c2ecf20Sopenharmony_cistatic void arm_sig_int(void)
3808c2ecf20Sopenharmony_ci{
3818c2ecf20Sopenharmony_ci	struct sigaction act;
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	memset(&act, 0, sizeof(act));
3848c2ecf20Sopenharmony_ci	act.sa_handler = sig_int;
3858c2ecf20Sopenharmony_ci	act.sa_flags = SA_RESTART;
3868c2ecf20Sopenharmony_ci	sigaction(SIGINT, &act, NULL);
3878c2ecf20Sopenharmony_ci}
3888c2ecf20Sopenharmony_ci
3898c2ecf20Sopenharmony_cistatic int setup_ring(struct submitter *s)
3908c2ecf20Sopenharmony_ci{
3918c2ecf20Sopenharmony_ci	struct io_sq_ring *sring = &s->sq_ring;
3928c2ecf20Sopenharmony_ci	struct io_cq_ring *cring = &s->cq_ring;
3938c2ecf20Sopenharmony_ci	struct io_uring_params p;
3948c2ecf20Sopenharmony_ci	int ret, fd;
3958c2ecf20Sopenharmony_ci	void *ptr;
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	memset(&p, 0, sizeof(p));
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	if (polled && !do_nop)
4008c2ecf20Sopenharmony_ci		p.flags |= IORING_SETUP_IOPOLL;
4018c2ecf20Sopenharmony_ci	if (sq_thread_poll) {
4028c2ecf20Sopenharmony_ci		p.flags |= IORING_SETUP_SQPOLL;
4038c2ecf20Sopenharmony_ci		if (sq_thread_cpu != -1) {
4048c2ecf20Sopenharmony_ci			p.flags |= IORING_SETUP_SQ_AFF;
4058c2ecf20Sopenharmony_ci			p.sq_thread_cpu = sq_thread_cpu;
4068c2ecf20Sopenharmony_ci		}
4078c2ecf20Sopenharmony_ci	}
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	fd = io_uring_setup(DEPTH, &p);
4108c2ecf20Sopenharmony_ci	if (fd < 0) {
4118c2ecf20Sopenharmony_ci		perror("io_uring_setup");
4128c2ecf20Sopenharmony_ci		return 1;
4138c2ecf20Sopenharmony_ci	}
4148c2ecf20Sopenharmony_ci	s->ring_fd = fd;
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	if (fixedbufs) {
4178c2ecf20Sopenharmony_ci		ret = io_uring_register_buffers(s);
4188c2ecf20Sopenharmony_ci		if (ret < 0) {
4198c2ecf20Sopenharmony_ci			perror("io_uring_register_buffers");
4208c2ecf20Sopenharmony_ci			return 1;
4218c2ecf20Sopenharmony_ci		}
4228c2ecf20Sopenharmony_ci	}
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	if (register_files) {
4258c2ecf20Sopenharmony_ci		ret = io_uring_register_files(s);
4268c2ecf20Sopenharmony_ci		if (ret < 0) {
4278c2ecf20Sopenharmony_ci			perror("io_uring_register_files");
4288c2ecf20Sopenharmony_ci			return 1;
4298c2ecf20Sopenharmony_ci		}
4308c2ecf20Sopenharmony_ci	}
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci	ptr = mmap(0, p.sq_off.array + p.sq_entries * sizeof(__u32),
4338c2ecf20Sopenharmony_ci			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
4348c2ecf20Sopenharmony_ci			IORING_OFF_SQ_RING);
4358c2ecf20Sopenharmony_ci	printf("sq_ring ptr = 0x%p\n", ptr);
4368c2ecf20Sopenharmony_ci	sring->head = ptr + p.sq_off.head;
4378c2ecf20Sopenharmony_ci	sring->tail = ptr + p.sq_off.tail;
4388c2ecf20Sopenharmony_ci	sring->ring_mask = ptr + p.sq_off.ring_mask;
4398c2ecf20Sopenharmony_ci	sring->ring_entries = ptr + p.sq_off.ring_entries;
4408c2ecf20Sopenharmony_ci	sring->flags = ptr + p.sq_off.flags;
4418c2ecf20Sopenharmony_ci	sring->array = ptr + p.sq_off.array;
4428c2ecf20Sopenharmony_ci	sq_ring_mask = *sring->ring_mask;
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
4458c2ecf20Sopenharmony_ci			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
4468c2ecf20Sopenharmony_ci			IORING_OFF_SQES);
4478c2ecf20Sopenharmony_ci	printf("sqes ptr    = 0x%p\n", s->sqes);
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	ptr = mmap(0, p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe),
4508c2ecf20Sopenharmony_ci			PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
4518c2ecf20Sopenharmony_ci			IORING_OFF_CQ_RING);
4528c2ecf20Sopenharmony_ci	printf("cq_ring ptr = 0x%p\n", ptr);
4538c2ecf20Sopenharmony_ci	cring->head = ptr + p.cq_off.head;
4548c2ecf20Sopenharmony_ci	cring->tail = ptr + p.cq_off.tail;
4558c2ecf20Sopenharmony_ci	cring->ring_mask = ptr + p.cq_off.ring_mask;
4568c2ecf20Sopenharmony_ci	cring->ring_entries = ptr + p.cq_off.ring_entries;
4578c2ecf20Sopenharmony_ci	cring->cqes = ptr + p.cq_off.cqes;
4588c2ecf20Sopenharmony_ci	cq_ring_mask = *cring->ring_mask;
4598c2ecf20Sopenharmony_ci	return 0;
4608c2ecf20Sopenharmony_ci}
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_cistatic void file_depths(char *buf)
4638c2ecf20Sopenharmony_ci{
4648c2ecf20Sopenharmony_ci	struct submitter *s = &submitters[0];
4658c2ecf20Sopenharmony_ci	unsigned i;
4668c2ecf20Sopenharmony_ci	char *p;
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci	buf[0] = '\0';
4698c2ecf20Sopenharmony_ci	p = buf;
4708c2ecf20Sopenharmony_ci	for (i = 0; i < s->nr_files; i++) {
4718c2ecf20Sopenharmony_ci		struct file *f = &s->files[i];
4728c2ecf20Sopenharmony_ci
4738c2ecf20Sopenharmony_ci		if (i + 1 == s->nr_files)
4748c2ecf20Sopenharmony_ci			p += sprintf(p, "%d", f->pending_ios);
4758c2ecf20Sopenharmony_ci		else
4768c2ecf20Sopenharmony_ci			p += sprintf(p, "%d, ", f->pending_ios);
4778c2ecf20Sopenharmony_ci	}
4788c2ecf20Sopenharmony_ci}
4798c2ecf20Sopenharmony_ci
4808c2ecf20Sopenharmony_ciint main(int argc, char *argv[])
4818c2ecf20Sopenharmony_ci{
4828c2ecf20Sopenharmony_ci	struct submitter *s = &submitters[0];
4838c2ecf20Sopenharmony_ci	unsigned long done, calls, reap;
4848c2ecf20Sopenharmony_ci	int err, i, flags, fd;
4858c2ecf20Sopenharmony_ci	char *fdepths;
4868c2ecf20Sopenharmony_ci	void *ret;
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci	if (!do_nop && argc < 2) {
4898c2ecf20Sopenharmony_ci		printf("%s: filename\n", argv[0]);
4908c2ecf20Sopenharmony_ci		return 1;
4918c2ecf20Sopenharmony_ci	}
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci	flags = O_RDONLY | O_NOATIME;
4948c2ecf20Sopenharmony_ci	if (!buffered)
4958c2ecf20Sopenharmony_ci		flags |= O_DIRECT;
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci	i = 1;
4988c2ecf20Sopenharmony_ci	while (!do_nop && i < argc) {
4998c2ecf20Sopenharmony_ci		struct file *f;
5008c2ecf20Sopenharmony_ci
5018c2ecf20Sopenharmony_ci		if (s->nr_files == MAX_FDS) {
5028c2ecf20Sopenharmony_ci			printf("Max number of files (%d) reached\n", MAX_FDS);
5038c2ecf20Sopenharmony_ci			break;
5048c2ecf20Sopenharmony_ci		}
5058c2ecf20Sopenharmony_ci		fd = open(argv[i], flags);
5068c2ecf20Sopenharmony_ci		if (fd < 0) {
5078c2ecf20Sopenharmony_ci			perror("open");
5088c2ecf20Sopenharmony_ci			return 1;
5098c2ecf20Sopenharmony_ci		}
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci		f = &s->files[s->nr_files];
5128c2ecf20Sopenharmony_ci		f->real_fd = fd;
5138c2ecf20Sopenharmony_ci		if (get_file_size(f)) {
5148c2ecf20Sopenharmony_ci			printf("failed getting size of device/file\n");
5158c2ecf20Sopenharmony_ci			return 1;
5168c2ecf20Sopenharmony_ci		}
5178c2ecf20Sopenharmony_ci		if (f->max_blocks <= 1) {
5188c2ecf20Sopenharmony_ci			printf("Zero file/device size?\n");
5198c2ecf20Sopenharmony_ci			return 1;
5208c2ecf20Sopenharmony_ci		}
5218c2ecf20Sopenharmony_ci		f->max_blocks--;
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_ci		printf("Added file %s\n", argv[i]);
5248c2ecf20Sopenharmony_ci		s->nr_files++;
5258c2ecf20Sopenharmony_ci		i++;
5268c2ecf20Sopenharmony_ci	}
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	if (fixedbufs) {
5298c2ecf20Sopenharmony_ci		struct rlimit rlim;
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci		rlim.rlim_cur = RLIM_INFINITY;
5328c2ecf20Sopenharmony_ci		rlim.rlim_max = RLIM_INFINITY;
5338c2ecf20Sopenharmony_ci		if (setrlimit(RLIMIT_MEMLOCK, &rlim) < 0) {
5348c2ecf20Sopenharmony_ci			perror("setrlimit");
5358c2ecf20Sopenharmony_ci			return 1;
5368c2ecf20Sopenharmony_ci		}
5378c2ecf20Sopenharmony_ci	}
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci	arm_sig_int();
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci	for (i = 0; i < DEPTH; i++) {
5428c2ecf20Sopenharmony_ci		void *buf;
5438c2ecf20Sopenharmony_ci
5448c2ecf20Sopenharmony_ci		if (posix_memalign(&buf, BS, BS)) {
5458c2ecf20Sopenharmony_ci			printf("failed alloc\n");
5468c2ecf20Sopenharmony_ci			return 1;
5478c2ecf20Sopenharmony_ci		}
5488c2ecf20Sopenharmony_ci		s->iovecs[i].iov_base = buf;
5498c2ecf20Sopenharmony_ci		s->iovecs[i].iov_len = BS;
5508c2ecf20Sopenharmony_ci	}
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_ci	err = setup_ring(s);
5538c2ecf20Sopenharmony_ci	if (err) {
5548c2ecf20Sopenharmony_ci		printf("ring setup failed: %s, %d\n", strerror(errno), err);
5558c2ecf20Sopenharmony_ci		return 1;
5568c2ecf20Sopenharmony_ci	}
5578c2ecf20Sopenharmony_ci	printf("polled=%d, fixedbufs=%d, buffered=%d", polled, fixedbufs, buffered);
5588c2ecf20Sopenharmony_ci	printf(" QD=%d, sq_ring=%d, cq_ring=%d\n", DEPTH, *s->sq_ring.ring_entries, *s->cq_ring.ring_entries);
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	pthread_create(&s->thread, NULL, submitter_fn, s);
5618c2ecf20Sopenharmony_ci
5628c2ecf20Sopenharmony_ci	fdepths = malloc(8 * s->nr_files);
5638c2ecf20Sopenharmony_ci	reap = calls = done = 0;
5648c2ecf20Sopenharmony_ci	do {
5658c2ecf20Sopenharmony_ci		unsigned long this_done = 0;
5668c2ecf20Sopenharmony_ci		unsigned long this_reap = 0;
5678c2ecf20Sopenharmony_ci		unsigned long this_call = 0;
5688c2ecf20Sopenharmony_ci		unsigned long rpc = 0, ipc = 0;
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci		sleep(1);
5718c2ecf20Sopenharmony_ci		this_done += s->done;
5728c2ecf20Sopenharmony_ci		this_call += s->calls;
5738c2ecf20Sopenharmony_ci		this_reap += s->reaps;
5748c2ecf20Sopenharmony_ci		if (this_call - calls) {
5758c2ecf20Sopenharmony_ci			rpc = (this_done - done) / (this_call - calls);
5768c2ecf20Sopenharmony_ci			ipc = (this_reap - reap) / (this_call - calls);
5778c2ecf20Sopenharmony_ci		} else
5788c2ecf20Sopenharmony_ci			rpc = ipc = -1;
5798c2ecf20Sopenharmony_ci		file_depths(fdepths);
5808c2ecf20Sopenharmony_ci		printf("IOPS=%lu, IOS/call=%ld/%ld, inflight=%u (%s)\n",
5818c2ecf20Sopenharmony_ci				this_done - done, rpc, ipc, s->inflight,
5828c2ecf20Sopenharmony_ci				fdepths);
5838c2ecf20Sopenharmony_ci		done = this_done;
5848c2ecf20Sopenharmony_ci		calls = this_call;
5858c2ecf20Sopenharmony_ci		reap = this_reap;
5868c2ecf20Sopenharmony_ci	} while (!finish);
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci	pthread_join(s->thread, &ret);
5898c2ecf20Sopenharmony_ci	close(s->ring_fd);
5908c2ecf20Sopenharmony_ci	free(fdepths);
5918c2ecf20Sopenharmony_ci	return 0;
5928c2ecf20Sopenharmony_ci}
593