18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright 2018 Google Inc.
48c2ecf20Sopenharmony_ci * Author: Eric Dumazet (edumazet@google.com)
58c2ecf20Sopenharmony_ci *
68c2ecf20Sopenharmony_ci * Reference program demonstrating tcp mmap() usage,
78c2ecf20Sopenharmony_ci * and SO_RCVLOWAT hints for receiver.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Note : NIC with header split is needed to use mmap() on TCP :
108c2ecf20Sopenharmony_ci * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
118c2ecf20Sopenharmony_ci *
128c2ecf20Sopenharmony_ci * How to use on loopback interface :
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci *  ifconfig lo mtu 61512  # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
158c2ecf20Sopenharmony_ci *  tcp_mmap -s -z &
168c2ecf20Sopenharmony_ci *  tcp_mmap -H ::1 -z
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci *  Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
198c2ecf20Sopenharmony_ci *      (4096 : page size on x86, 12: TCP TS option length)
208c2ecf20Sopenharmony_ci *  tcp_mmap -s -z -M $((4096+12)) &
218c2ecf20Sopenharmony_ci *  tcp_mmap -H ::1 -z -M $((4096+12))
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
248c2ecf20Sopenharmony_ci *       We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
258c2ecf20Sopenharmony_ci *
268c2ecf20Sopenharmony_ci * $ ./tcp_mmap -s &                                 # Without mmap()
278c2ecf20Sopenharmony_ci * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
288c2ecf20Sopenharmony_ci * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
298c2ecf20Sopenharmony_ci *   cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
308c2ecf20Sopenharmony_ci * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
318c2ecf20Sopenharmony_ci *  cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
328c2ecf20Sopenharmony_ci * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
338c2ecf20Sopenharmony_ci *   cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
348c2ecf20Sopenharmony_ci * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
358c2ecf20Sopenharmony_ci *   cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
368c2ecf20Sopenharmony_ci * $ kill %1   # kill tcp_mmap server
378c2ecf20Sopenharmony_ci *
388c2ecf20Sopenharmony_ci * $ ./tcp_mmap -s -z &                              # With mmap()
398c2ecf20Sopenharmony_ci * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
408c2ecf20Sopenharmony_ci * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
418c2ecf20Sopenharmony_ci *   cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
428c2ecf20Sopenharmony_ci * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
438c2ecf20Sopenharmony_ci *   cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
448c2ecf20Sopenharmony_ci * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
458c2ecf20Sopenharmony_ci *   cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
468c2ecf20Sopenharmony_ci * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
478c2ecf20Sopenharmony_ci *   cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
488c2ecf20Sopenharmony_ci */
498c2ecf20Sopenharmony_ci#define _GNU_SOURCE
508c2ecf20Sopenharmony_ci#include <pthread.h>
518c2ecf20Sopenharmony_ci#include <sys/types.h>
528c2ecf20Sopenharmony_ci#include <fcntl.h>
538c2ecf20Sopenharmony_ci#include <error.h>
548c2ecf20Sopenharmony_ci#include <sys/socket.h>
558c2ecf20Sopenharmony_ci#include <sys/mman.h>
568c2ecf20Sopenharmony_ci#include <sys/resource.h>
578c2ecf20Sopenharmony_ci#include <unistd.h>
588c2ecf20Sopenharmony_ci#include <string.h>
598c2ecf20Sopenharmony_ci#include <stdlib.h>
608c2ecf20Sopenharmony_ci#include <stdio.h>
618c2ecf20Sopenharmony_ci#include <errno.h>
628c2ecf20Sopenharmony_ci#include <time.h>
638c2ecf20Sopenharmony_ci#include <sys/time.h>
648c2ecf20Sopenharmony_ci#include <netinet/in.h>
658c2ecf20Sopenharmony_ci#include <arpa/inet.h>
668c2ecf20Sopenharmony_ci#include <poll.h>
678c2ecf20Sopenharmony_ci#include <linux/tcp.h>
688c2ecf20Sopenharmony_ci#include <assert.h>
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci#ifndef MSG_ZEROCOPY
718c2ecf20Sopenharmony_ci#define MSG_ZEROCOPY    0x4000000
728c2ecf20Sopenharmony_ci#endif
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci#define FILE_SZ (1ULL << 35)
758c2ecf20Sopenharmony_cistatic int cfg_family = AF_INET6;
768c2ecf20Sopenharmony_cistatic socklen_t cfg_alen = sizeof(struct sockaddr_in6);
778c2ecf20Sopenharmony_cistatic int cfg_port = 8787;
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_cistatic int rcvbuf; /* Default: autotuning.  Can be set with -r <integer> option */
808c2ecf20Sopenharmony_cistatic int sndbuf; /* Default: autotuning.  Can be set with -w <integer> option */
818c2ecf20Sopenharmony_cistatic int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
828c2ecf20Sopenharmony_cistatic int xflg; /* hash received data (simple xor) (-h option) */
838c2ecf20Sopenharmony_cistatic int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
848c2ecf20Sopenharmony_ci
858c2ecf20Sopenharmony_cistatic size_t chunk_size  = 512*1024;
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistatic size_t map_align;
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ciunsigned long htotal;
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_cistatic inline void prefetch(const void *x)
928c2ecf20Sopenharmony_ci{
938c2ecf20Sopenharmony_ci#if defined(__x86_64__)
948c2ecf20Sopenharmony_ci	asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
958c2ecf20Sopenharmony_ci#endif
968c2ecf20Sopenharmony_ci}
978c2ecf20Sopenharmony_ci
988c2ecf20Sopenharmony_civoid hash_zone(void *zone, unsigned int length)
998c2ecf20Sopenharmony_ci{
1008c2ecf20Sopenharmony_ci	unsigned long temp = htotal;
1018c2ecf20Sopenharmony_ci
1028c2ecf20Sopenharmony_ci	while (length >= 8*sizeof(long)) {
1038c2ecf20Sopenharmony_ci		prefetch(zone + 384);
1048c2ecf20Sopenharmony_ci		temp ^= *(unsigned long *)zone;
1058c2ecf20Sopenharmony_ci		temp ^= *(unsigned long *)(zone + sizeof(long));
1068c2ecf20Sopenharmony_ci		temp ^= *(unsigned long *)(zone + 2*sizeof(long));
1078c2ecf20Sopenharmony_ci		temp ^= *(unsigned long *)(zone + 3*sizeof(long));
1088c2ecf20Sopenharmony_ci		temp ^= *(unsigned long *)(zone + 4*sizeof(long));
1098c2ecf20Sopenharmony_ci		temp ^= *(unsigned long *)(zone + 5*sizeof(long));
1108c2ecf20Sopenharmony_ci		temp ^= *(unsigned long *)(zone + 6*sizeof(long));
1118c2ecf20Sopenharmony_ci		temp ^= *(unsigned long *)(zone + 7*sizeof(long));
1128c2ecf20Sopenharmony_ci		zone += 8*sizeof(long);
1138c2ecf20Sopenharmony_ci		length -= 8*sizeof(long);
1148c2ecf20Sopenharmony_ci	}
1158c2ecf20Sopenharmony_ci	while (length >= 1) {
1168c2ecf20Sopenharmony_ci		temp ^= *(unsigned char *)zone;
1178c2ecf20Sopenharmony_ci		zone += 1;
1188c2ecf20Sopenharmony_ci		length--;
1198c2ecf20Sopenharmony_ci	}
1208c2ecf20Sopenharmony_ci	htotal = temp;
1218c2ecf20Sopenharmony_ci}
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci#define ALIGN_UP(x, align_to)	(((x) + ((align_to)-1)) & ~((align_to)-1))
1248c2ecf20Sopenharmony_ci#define ALIGN_PTR_UP(p, ptr_align_to)	((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_cistatic void *mmap_large_buffer(size_t need, size_t *allocated)
1288c2ecf20Sopenharmony_ci{
1298c2ecf20Sopenharmony_ci	void *buffer;
1308c2ecf20Sopenharmony_ci	size_t sz;
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	/* Attempt to use huge pages if possible. */
1338c2ecf20Sopenharmony_ci	sz = ALIGN_UP(need, map_align);
1348c2ecf20Sopenharmony_ci	buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
1358c2ecf20Sopenharmony_ci		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_ci	if (buffer == (void *)-1) {
1388c2ecf20Sopenharmony_ci		sz = need;
1398c2ecf20Sopenharmony_ci		buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
1408c2ecf20Sopenharmony_ci			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
1418c2ecf20Sopenharmony_ci		if (buffer != (void *)-1)
1428c2ecf20Sopenharmony_ci			fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
1438c2ecf20Sopenharmony_ci	}
1448c2ecf20Sopenharmony_ci	*allocated = sz;
1458c2ecf20Sopenharmony_ci	return buffer;
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_civoid *child_thread(void *arg)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	unsigned long total_mmap = 0, total = 0;
1518c2ecf20Sopenharmony_ci	struct tcp_zerocopy_receive zc;
1528c2ecf20Sopenharmony_ci	unsigned long delta_usec;
1538c2ecf20Sopenharmony_ci	int flags = MAP_SHARED;
1548c2ecf20Sopenharmony_ci	struct timeval t0, t1;
1558c2ecf20Sopenharmony_ci	char *buffer = NULL;
1568c2ecf20Sopenharmony_ci	void *raddr = NULL;
1578c2ecf20Sopenharmony_ci	void *addr = NULL;
1588c2ecf20Sopenharmony_ci	double throughput;
1598c2ecf20Sopenharmony_ci	struct rusage ru;
1608c2ecf20Sopenharmony_ci	size_t buffer_sz;
1618c2ecf20Sopenharmony_ci	int lu, fd;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	fd = (int)(unsigned long)arg;
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci	gettimeofday(&t0, NULL);
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci	fcntl(fd, F_SETFL, O_NDELAY);
1688c2ecf20Sopenharmony_ci	buffer = mmap_large_buffer(chunk_size, &buffer_sz);
1698c2ecf20Sopenharmony_ci	if (buffer == (void *)-1) {
1708c2ecf20Sopenharmony_ci		perror("mmap");
1718c2ecf20Sopenharmony_ci		goto error;
1728c2ecf20Sopenharmony_ci	}
1738c2ecf20Sopenharmony_ci	if (zflg) {
1748c2ecf20Sopenharmony_ci		raddr = mmap(NULL, chunk_size + map_align, PROT_READ, flags, fd, 0);
1758c2ecf20Sopenharmony_ci		if (raddr == (void *)-1) {
1768c2ecf20Sopenharmony_ci			perror("mmap");
1778c2ecf20Sopenharmony_ci			zflg = 0;
1788c2ecf20Sopenharmony_ci		} else {
1798c2ecf20Sopenharmony_ci			addr = ALIGN_PTR_UP(raddr, map_align);
1808c2ecf20Sopenharmony_ci		}
1818c2ecf20Sopenharmony_ci	}
1828c2ecf20Sopenharmony_ci	while (1) {
1838c2ecf20Sopenharmony_ci		struct pollfd pfd = { .fd = fd, .events = POLLIN, };
1848c2ecf20Sopenharmony_ci		int sub;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci		poll(&pfd, 1, 10000);
1878c2ecf20Sopenharmony_ci		if (zflg) {
1888c2ecf20Sopenharmony_ci			socklen_t zc_len = sizeof(zc);
1898c2ecf20Sopenharmony_ci			int res;
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci			memset(&zc, 0, sizeof(zc));
1928c2ecf20Sopenharmony_ci			zc.address = (__u64)((unsigned long)addr);
1938c2ecf20Sopenharmony_ci			zc.length = chunk_size;
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_ci			res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
1968c2ecf20Sopenharmony_ci					 &zc, &zc_len);
1978c2ecf20Sopenharmony_ci			if (res == -1)
1988c2ecf20Sopenharmony_ci				break;
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci			if (zc.length) {
2018c2ecf20Sopenharmony_ci				assert(zc.length <= chunk_size);
2028c2ecf20Sopenharmony_ci				total_mmap += zc.length;
2038c2ecf20Sopenharmony_ci				if (xflg)
2048c2ecf20Sopenharmony_ci					hash_zone(addr, zc.length);
2058c2ecf20Sopenharmony_ci				/* It is more efficient to unmap the pages right now,
2068c2ecf20Sopenharmony_ci				 * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
2078c2ecf20Sopenharmony_ci				 */
2088c2ecf20Sopenharmony_ci				madvise(addr, zc.length, MADV_DONTNEED);
2098c2ecf20Sopenharmony_ci				total += zc.length;
2108c2ecf20Sopenharmony_ci			}
2118c2ecf20Sopenharmony_ci			if (zc.recv_skip_hint) {
2128c2ecf20Sopenharmony_ci				assert(zc.recv_skip_hint <= chunk_size);
2138c2ecf20Sopenharmony_ci				lu = read(fd, buffer, zc.recv_skip_hint);
2148c2ecf20Sopenharmony_ci				if (lu > 0) {
2158c2ecf20Sopenharmony_ci					if (xflg)
2168c2ecf20Sopenharmony_ci						hash_zone(buffer, lu);
2178c2ecf20Sopenharmony_ci					total += lu;
2188c2ecf20Sopenharmony_ci				}
2198c2ecf20Sopenharmony_ci			}
2208c2ecf20Sopenharmony_ci			continue;
2218c2ecf20Sopenharmony_ci		}
2228c2ecf20Sopenharmony_ci		sub = 0;
2238c2ecf20Sopenharmony_ci		while (sub < chunk_size) {
2248c2ecf20Sopenharmony_ci			lu = read(fd, buffer + sub, chunk_size - sub);
2258c2ecf20Sopenharmony_ci			if (lu == 0)
2268c2ecf20Sopenharmony_ci				goto end;
2278c2ecf20Sopenharmony_ci			if (lu < 0)
2288c2ecf20Sopenharmony_ci				break;
2298c2ecf20Sopenharmony_ci			if (xflg)
2308c2ecf20Sopenharmony_ci				hash_zone(buffer + sub, lu);
2318c2ecf20Sopenharmony_ci			total += lu;
2328c2ecf20Sopenharmony_ci			sub += lu;
2338c2ecf20Sopenharmony_ci		}
2348c2ecf20Sopenharmony_ci	}
2358c2ecf20Sopenharmony_ciend:
2368c2ecf20Sopenharmony_ci	gettimeofday(&t1, NULL);
2378c2ecf20Sopenharmony_ci	delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci	throughput = 0;
2408c2ecf20Sopenharmony_ci	if (delta_usec)
2418c2ecf20Sopenharmony_ci		throughput = total * 8.0 / (double)delta_usec / 1000.0;
2428c2ecf20Sopenharmony_ci	getrusage(RUSAGE_THREAD, &ru);
2438c2ecf20Sopenharmony_ci	if (total > 1024*1024) {
2448c2ecf20Sopenharmony_ci		unsigned long total_usec;
2458c2ecf20Sopenharmony_ci		unsigned long mb = total >> 20;
2468c2ecf20Sopenharmony_ci		total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
2478c2ecf20Sopenharmony_ci			     1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
2488c2ecf20Sopenharmony_ci		printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
2498c2ecf20Sopenharmony_ci		       "  cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
2508c2ecf20Sopenharmony_ci				total / (1024.0 * 1024.0),
2518c2ecf20Sopenharmony_ci				100.0*total_mmap/total,
2528c2ecf20Sopenharmony_ci				(double)delta_usec / 1000000.0,
2538c2ecf20Sopenharmony_ci				throughput,
2548c2ecf20Sopenharmony_ci				(double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
2558c2ecf20Sopenharmony_ci				(double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
2568c2ecf20Sopenharmony_ci				(double)total_usec/mb,
2578c2ecf20Sopenharmony_ci				ru.ru_nvcsw);
2588c2ecf20Sopenharmony_ci	}
2598c2ecf20Sopenharmony_cierror:
2608c2ecf20Sopenharmony_ci	munmap(buffer, buffer_sz);
2618c2ecf20Sopenharmony_ci	close(fd);
2628c2ecf20Sopenharmony_ci	if (zflg)
2638c2ecf20Sopenharmony_ci		munmap(raddr, chunk_size + map_align);
2648c2ecf20Sopenharmony_ci	pthread_exit(0);
2658c2ecf20Sopenharmony_ci}
2668c2ecf20Sopenharmony_ci
2678c2ecf20Sopenharmony_cistatic void apply_rcvsnd_buf(int fd)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	if (rcvbuf && setsockopt(fd, SOL_SOCKET,
2708c2ecf20Sopenharmony_ci				 SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
2718c2ecf20Sopenharmony_ci		perror("setsockopt SO_RCVBUF");
2728c2ecf20Sopenharmony_ci	}
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	if (sndbuf && setsockopt(fd, SOL_SOCKET,
2758c2ecf20Sopenharmony_ci				 SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
2768c2ecf20Sopenharmony_ci		perror("setsockopt SO_SNDBUF");
2778c2ecf20Sopenharmony_ci	}
2788c2ecf20Sopenharmony_ci}
2798c2ecf20Sopenharmony_ci
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_cistatic void setup_sockaddr(int domain, const char *str_addr,
2828c2ecf20Sopenharmony_ci			   struct sockaddr_storage *sockaddr)
2838c2ecf20Sopenharmony_ci{
2848c2ecf20Sopenharmony_ci	struct sockaddr_in6 *addr6 = (void *) sockaddr;
2858c2ecf20Sopenharmony_ci	struct sockaddr_in *addr4 = (void *) sockaddr;
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	switch (domain) {
2888c2ecf20Sopenharmony_ci	case PF_INET:
2898c2ecf20Sopenharmony_ci		memset(addr4, 0, sizeof(*addr4));
2908c2ecf20Sopenharmony_ci		addr4->sin_family = AF_INET;
2918c2ecf20Sopenharmony_ci		addr4->sin_port = htons(cfg_port);
2928c2ecf20Sopenharmony_ci		if (str_addr &&
2938c2ecf20Sopenharmony_ci		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
2948c2ecf20Sopenharmony_ci			error(1, 0, "ipv4 parse error: %s", str_addr);
2958c2ecf20Sopenharmony_ci		break;
2968c2ecf20Sopenharmony_ci	case PF_INET6:
2978c2ecf20Sopenharmony_ci		memset(addr6, 0, sizeof(*addr6));
2988c2ecf20Sopenharmony_ci		addr6->sin6_family = AF_INET6;
2998c2ecf20Sopenharmony_ci		addr6->sin6_port = htons(cfg_port);
3008c2ecf20Sopenharmony_ci		if (str_addr &&
3018c2ecf20Sopenharmony_ci		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
3028c2ecf20Sopenharmony_ci			error(1, 0, "ipv6 parse error: %s", str_addr);
3038c2ecf20Sopenharmony_ci		break;
3048c2ecf20Sopenharmony_ci	default:
3058c2ecf20Sopenharmony_ci		error(1, 0, "illegal domain");
3068c2ecf20Sopenharmony_ci	}
3078c2ecf20Sopenharmony_ci}
3088c2ecf20Sopenharmony_ci
3098c2ecf20Sopenharmony_cistatic void do_accept(int fdlisten)
3108c2ecf20Sopenharmony_ci{
3118c2ecf20Sopenharmony_ci	pthread_attr_t attr;
3128c2ecf20Sopenharmony_ci	int rcvlowat;
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci	pthread_attr_init(&attr);
3158c2ecf20Sopenharmony_ci	pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	rcvlowat = chunk_size;
3188c2ecf20Sopenharmony_ci	if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
3198c2ecf20Sopenharmony_ci		       &rcvlowat, sizeof(rcvlowat)) == -1) {
3208c2ecf20Sopenharmony_ci		perror("setsockopt SO_RCVLOWAT");
3218c2ecf20Sopenharmony_ci	}
3228c2ecf20Sopenharmony_ci
3238c2ecf20Sopenharmony_ci	apply_rcvsnd_buf(fdlisten);
3248c2ecf20Sopenharmony_ci
3258c2ecf20Sopenharmony_ci	while (1) {
3268c2ecf20Sopenharmony_ci		struct sockaddr_in addr;
3278c2ecf20Sopenharmony_ci		socklen_t addrlen = sizeof(addr);
3288c2ecf20Sopenharmony_ci		pthread_t th;
3298c2ecf20Sopenharmony_ci		int fd, res;
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_ci		fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
3328c2ecf20Sopenharmony_ci		if (fd == -1) {
3338c2ecf20Sopenharmony_ci			perror("accept");
3348c2ecf20Sopenharmony_ci			continue;
3358c2ecf20Sopenharmony_ci		}
3368c2ecf20Sopenharmony_ci		res = pthread_create(&th, &attr, child_thread,
3378c2ecf20Sopenharmony_ci				     (void *)(unsigned long)fd);
3388c2ecf20Sopenharmony_ci		if (res) {
3398c2ecf20Sopenharmony_ci			errno = res;
3408c2ecf20Sopenharmony_ci			perror("pthread_create");
3418c2ecf20Sopenharmony_ci			close(fd);
3428c2ecf20Sopenharmony_ci		}
3438c2ecf20Sopenharmony_ci	}
3448c2ecf20Sopenharmony_ci}
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci/* Each thread should reserve a big enough vma to avoid
3478c2ecf20Sopenharmony_ci * spinlock collisions in ptl locks.
3488c2ecf20Sopenharmony_ci * This size is 2MB on x86_64, and is exported in /proc/meminfo.
3498c2ecf20Sopenharmony_ci */
3508c2ecf20Sopenharmony_cistatic unsigned long default_huge_page_size(void)
3518c2ecf20Sopenharmony_ci{
3528c2ecf20Sopenharmony_ci	FILE *f = fopen("/proc/meminfo", "r");
3538c2ecf20Sopenharmony_ci	unsigned long hps = 0;
3548c2ecf20Sopenharmony_ci	size_t linelen = 0;
3558c2ecf20Sopenharmony_ci	char *line = NULL;
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci	if (!f)
3588c2ecf20Sopenharmony_ci		return 0;
3598c2ecf20Sopenharmony_ci	while (getline(&line, &linelen, f) > 0) {
3608c2ecf20Sopenharmony_ci		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
3618c2ecf20Sopenharmony_ci			hps <<= 10;
3628c2ecf20Sopenharmony_ci			break;
3638c2ecf20Sopenharmony_ci		}
3648c2ecf20Sopenharmony_ci	}
3658c2ecf20Sopenharmony_ci	free(line);
3668c2ecf20Sopenharmony_ci	fclose(f);
3678c2ecf20Sopenharmony_ci	return hps;
3688c2ecf20Sopenharmony_ci}
3698c2ecf20Sopenharmony_ci
3708c2ecf20Sopenharmony_ciint main(int argc, char *argv[])
3718c2ecf20Sopenharmony_ci{
3728c2ecf20Sopenharmony_ci	struct sockaddr_storage listenaddr, addr;
3738c2ecf20Sopenharmony_ci	unsigned int max_pacing_rate = 0;
3748c2ecf20Sopenharmony_ci	uint64_t total = 0;
3758c2ecf20Sopenharmony_ci	char *host = NULL;
3768c2ecf20Sopenharmony_ci	int fd, c, on = 1;
3778c2ecf20Sopenharmony_ci	size_t buffer_sz;
3788c2ecf20Sopenharmony_ci	char *buffer;
3798c2ecf20Sopenharmony_ci	int sflg = 0;
3808c2ecf20Sopenharmony_ci	int mss = 0;
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci	while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) {
3838c2ecf20Sopenharmony_ci		switch (c) {
3848c2ecf20Sopenharmony_ci		case '4':
3858c2ecf20Sopenharmony_ci			cfg_family = PF_INET;
3868c2ecf20Sopenharmony_ci			cfg_alen = sizeof(struct sockaddr_in);
3878c2ecf20Sopenharmony_ci			break;
3888c2ecf20Sopenharmony_ci		case '6':
3898c2ecf20Sopenharmony_ci			cfg_family = PF_INET6;
3908c2ecf20Sopenharmony_ci			cfg_alen = sizeof(struct sockaddr_in6);
3918c2ecf20Sopenharmony_ci			break;
3928c2ecf20Sopenharmony_ci		case 'p':
3938c2ecf20Sopenharmony_ci			cfg_port = atoi(optarg);
3948c2ecf20Sopenharmony_ci			break;
3958c2ecf20Sopenharmony_ci		case 'H':
3968c2ecf20Sopenharmony_ci			host = optarg;
3978c2ecf20Sopenharmony_ci			break;
3988c2ecf20Sopenharmony_ci		case 's': /* server : listen for incoming connections */
3998c2ecf20Sopenharmony_ci			sflg++;
4008c2ecf20Sopenharmony_ci			break;
4018c2ecf20Sopenharmony_ci		case 'r':
4028c2ecf20Sopenharmony_ci			rcvbuf = atoi(optarg);
4038c2ecf20Sopenharmony_ci			break;
4048c2ecf20Sopenharmony_ci		case 'w':
4058c2ecf20Sopenharmony_ci			sndbuf = atoi(optarg);
4068c2ecf20Sopenharmony_ci			break;
4078c2ecf20Sopenharmony_ci		case 'z':
4088c2ecf20Sopenharmony_ci			zflg = 1;
4098c2ecf20Sopenharmony_ci			break;
4108c2ecf20Sopenharmony_ci		case 'M':
4118c2ecf20Sopenharmony_ci			mss = atoi(optarg);
4128c2ecf20Sopenharmony_ci			break;
4138c2ecf20Sopenharmony_ci		case 'x':
4148c2ecf20Sopenharmony_ci			xflg = 1;
4158c2ecf20Sopenharmony_ci			break;
4168c2ecf20Sopenharmony_ci		case 'k':
4178c2ecf20Sopenharmony_ci			keepflag = 1;
4188c2ecf20Sopenharmony_ci			break;
4198c2ecf20Sopenharmony_ci		case 'P':
4208c2ecf20Sopenharmony_ci			max_pacing_rate = atoi(optarg) ;
4218c2ecf20Sopenharmony_ci			break;
4228c2ecf20Sopenharmony_ci		case 'C':
4238c2ecf20Sopenharmony_ci			chunk_size = atol(optarg);
4248c2ecf20Sopenharmony_ci			break;
4258c2ecf20Sopenharmony_ci		case 'a':
4268c2ecf20Sopenharmony_ci			map_align = atol(optarg);
4278c2ecf20Sopenharmony_ci			break;
4288c2ecf20Sopenharmony_ci		default:
4298c2ecf20Sopenharmony_ci			exit(1);
4308c2ecf20Sopenharmony_ci		}
4318c2ecf20Sopenharmony_ci	}
4328c2ecf20Sopenharmony_ci	if (!map_align) {
4338c2ecf20Sopenharmony_ci		map_align = default_huge_page_size();
4348c2ecf20Sopenharmony_ci		/* if really /proc/meminfo is not helping,
4358c2ecf20Sopenharmony_ci		 * we use the default x86_64 hugepagesize.
4368c2ecf20Sopenharmony_ci		 */
4378c2ecf20Sopenharmony_ci		if (!map_align)
4388c2ecf20Sopenharmony_ci			map_align = 2*1024*1024;
4398c2ecf20Sopenharmony_ci	}
4408c2ecf20Sopenharmony_ci	if (sflg) {
4418c2ecf20Sopenharmony_ci		int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci		if (fdlisten == -1) {
4448c2ecf20Sopenharmony_ci			perror("socket");
4458c2ecf20Sopenharmony_ci			exit(1);
4468c2ecf20Sopenharmony_ci		}
4478c2ecf20Sopenharmony_ci		apply_rcvsnd_buf(fdlisten);
4488c2ecf20Sopenharmony_ci		setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci		setup_sockaddr(cfg_family, host, &listenaddr);
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci		if (mss &&
4538c2ecf20Sopenharmony_ci		    setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
4548c2ecf20Sopenharmony_ci			       &mss, sizeof(mss)) == -1) {
4558c2ecf20Sopenharmony_ci			perror("setsockopt TCP_MAXSEG");
4568c2ecf20Sopenharmony_ci			exit(1);
4578c2ecf20Sopenharmony_ci		}
4588c2ecf20Sopenharmony_ci		if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
4598c2ecf20Sopenharmony_ci			perror("bind");
4608c2ecf20Sopenharmony_ci			exit(1);
4618c2ecf20Sopenharmony_ci		}
4628c2ecf20Sopenharmony_ci		if (listen(fdlisten, 128) == -1) {
4638c2ecf20Sopenharmony_ci			perror("listen");
4648c2ecf20Sopenharmony_ci			exit(1);
4658c2ecf20Sopenharmony_ci		}
4668c2ecf20Sopenharmony_ci		do_accept(fdlisten);
4678c2ecf20Sopenharmony_ci	}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	buffer = mmap_large_buffer(chunk_size, &buffer_sz);
4708c2ecf20Sopenharmony_ci	if (buffer == (char *)-1) {
4718c2ecf20Sopenharmony_ci		perror("mmap");
4728c2ecf20Sopenharmony_ci		exit(1);
4738c2ecf20Sopenharmony_ci	}
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_ci	fd = socket(cfg_family, SOCK_STREAM, 0);
4768c2ecf20Sopenharmony_ci	if (fd == -1) {
4778c2ecf20Sopenharmony_ci		perror("socket");
4788c2ecf20Sopenharmony_ci		exit(1);
4798c2ecf20Sopenharmony_ci	}
4808c2ecf20Sopenharmony_ci	apply_rcvsnd_buf(fd);
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci	setup_sockaddr(cfg_family, host, &addr);
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_ci	if (mss &&
4858c2ecf20Sopenharmony_ci	    setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
4868c2ecf20Sopenharmony_ci		perror("setsockopt TCP_MAXSEG");
4878c2ecf20Sopenharmony_ci		exit(1);
4888c2ecf20Sopenharmony_ci	}
4898c2ecf20Sopenharmony_ci	if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
4908c2ecf20Sopenharmony_ci		perror("connect");
4918c2ecf20Sopenharmony_ci		exit(1);
4928c2ecf20Sopenharmony_ci	}
4938c2ecf20Sopenharmony_ci	if (max_pacing_rate &&
4948c2ecf20Sopenharmony_ci	    setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
4958c2ecf20Sopenharmony_ci		       &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
4968c2ecf20Sopenharmony_ci		perror("setsockopt SO_MAX_PACING_RATE");
4978c2ecf20Sopenharmony_ci
4988c2ecf20Sopenharmony_ci	if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
4998c2ecf20Sopenharmony_ci			       &on, sizeof(on)) == -1) {
5008c2ecf20Sopenharmony_ci		perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
5018c2ecf20Sopenharmony_ci		zflg = 0;
5028c2ecf20Sopenharmony_ci	}
5038c2ecf20Sopenharmony_ci	while (total < FILE_SZ) {
5048c2ecf20Sopenharmony_ci		int64_t wr = FILE_SZ - total;
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_ci		if (wr > chunk_size)
5078c2ecf20Sopenharmony_ci			wr = chunk_size;
5088c2ecf20Sopenharmony_ci		/* Note : we just want to fill the pipe with 0 bytes */
5098c2ecf20Sopenharmony_ci		wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0);
5108c2ecf20Sopenharmony_ci		if (wr <= 0)
5118c2ecf20Sopenharmony_ci			break;
5128c2ecf20Sopenharmony_ci		total += wr;
5138c2ecf20Sopenharmony_ci	}
5148c2ecf20Sopenharmony_ci	close(fd);
5158c2ecf20Sopenharmony_ci	munmap(buffer, buffer_sz);
5168c2ecf20Sopenharmony_ci	return 0;
5178c2ecf20Sopenharmony_ci}
518