18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * Copyright 2018 Google Inc. 48c2ecf20Sopenharmony_ci * Author: Eric Dumazet (edumazet@google.com) 58c2ecf20Sopenharmony_ci * 68c2ecf20Sopenharmony_ci * Reference program demonstrating tcp mmap() usage, 78c2ecf20Sopenharmony_ci * and SO_RCVLOWAT hints for receiver. 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Note : NIC with header split is needed to use mmap() on TCP : 108c2ecf20Sopenharmony_ci * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload. 118c2ecf20Sopenharmony_ci * 128c2ecf20Sopenharmony_ci * How to use on loopback interface : 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header) 158c2ecf20Sopenharmony_ci * tcp_mmap -s -z & 168c2ecf20Sopenharmony_ci * tcp_mmap -H ::1 -z 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12) 198c2ecf20Sopenharmony_ci * (4096 : page size on x86, 12: TCP TS option length) 208c2ecf20Sopenharmony_ci * tcp_mmap -s -z -M $((4096+12)) & 218c2ecf20Sopenharmony_ci * tcp_mmap -H ::1 -z -M $((4096+12)) 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface. 248c2ecf20Sopenharmony_ci * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;) 258c2ecf20Sopenharmony_ci * 268c2ecf20Sopenharmony_ci * $ ./tcp_mmap -s & # Without mmap() 278c2ecf20Sopenharmony_ci * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done 288c2ecf20Sopenharmony_ci * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit 298c2ecf20Sopenharmony_ci * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches 308c2ecf20Sopenharmony_ci * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit 318c2ecf20Sopenharmony_ci * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches 328c2ecf20Sopenharmony_ci * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit 338c2ecf20Sopenharmony_ci * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches 348c2ecf20Sopenharmony_ci * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit 358c2ecf20Sopenharmony_ci * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches 368c2ecf20Sopenharmony_ci * $ kill %1 # kill tcp_mmap server 378c2ecf20Sopenharmony_ci * 388c2ecf20Sopenharmony_ci * $ ./tcp_mmap -s -z & # With mmap() 398c2ecf20Sopenharmony_ci * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done 408c2ecf20Sopenharmony_ci * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit 418c2ecf20Sopenharmony_ci * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches 428c2ecf20Sopenharmony_ci * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit 438c2ecf20Sopenharmony_ci * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches 448c2ecf20Sopenharmony_ci * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit 458c2ecf20Sopenharmony_ci * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches 468c2ecf20Sopenharmony_ci * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit 478c2ecf20Sopenharmony_ci * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches 488c2ecf20Sopenharmony_ci */ 498c2ecf20Sopenharmony_ci#define _GNU_SOURCE 508c2ecf20Sopenharmony_ci#include <pthread.h> 518c2ecf20Sopenharmony_ci#include <sys/types.h> 528c2ecf20Sopenharmony_ci#include <fcntl.h> 538c2ecf20Sopenharmony_ci#include <error.h> 548c2ecf20Sopenharmony_ci#include <sys/socket.h> 558c2ecf20Sopenharmony_ci#include <sys/mman.h> 568c2ecf20Sopenharmony_ci#include <sys/resource.h> 578c2ecf20Sopenharmony_ci#include <unistd.h> 588c2ecf20Sopenharmony_ci#include <string.h> 598c2ecf20Sopenharmony_ci#include <stdlib.h> 608c2ecf20Sopenharmony_ci#include <stdio.h> 618c2ecf20Sopenharmony_ci#include <errno.h> 628c2ecf20Sopenharmony_ci#include <time.h> 638c2ecf20Sopenharmony_ci#include <sys/time.h> 648c2ecf20Sopenharmony_ci#include <netinet/in.h> 658c2ecf20Sopenharmony_ci#include <arpa/inet.h> 668c2ecf20Sopenharmony_ci#include <poll.h> 678c2ecf20Sopenharmony_ci#include <linux/tcp.h> 688c2ecf20Sopenharmony_ci#include <assert.h> 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci#ifndef MSG_ZEROCOPY 718c2ecf20Sopenharmony_ci#define MSG_ZEROCOPY 0x4000000 728c2ecf20Sopenharmony_ci#endif 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci#define FILE_SZ (1ULL << 35) 758c2ecf20Sopenharmony_cistatic int cfg_family = AF_INET6; 768c2ecf20Sopenharmony_cistatic socklen_t cfg_alen = sizeof(struct sockaddr_in6); 778c2ecf20Sopenharmony_cistatic int cfg_port = 8787; 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_cistatic int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */ 808c2ecf20Sopenharmony_cistatic int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */ 818c2ecf20Sopenharmony_cistatic int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */ 828c2ecf20Sopenharmony_cistatic int xflg; /* hash received data (simple xor) (-h option) */ 838c2ecf20Sopenharmony_cistatic int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */ 848c2ecf20Sopenharmony_ci 858c2ecf20Sopenharmony_cistatic size_t chunk_size = 512*1024; 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_cistatic size_t map_align; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ciunsigned long htotal; 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_cistatic inline void prefetch(const void *x) 928c2ecf20Sopenharmony_ci{ 938c2ecf20Sopenharmony_ci#if defined(__x86_64__) 948c2ecf20Sopenharmony_ci asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x)); 958c2ecf20Sopenharmony_ci#endif 968c2ecf20Sopenharmony_ci} 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_civoid hash_zone(void *zone, unsigned int length) 998c2ecf20Sopenharmony_ci{ 1008c2ecf20Sopenharmony_ci unsigned long temp = htotal; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci while (length >= 8*sizeof(long)) { 1038c2ecf20Sopenharmony_ci prefetch(zone + 384); 1048c2ecf20Sopenharmony_ci temp ^= *(unsigned long *)zone; 1058c2ecf20Sopenharmony_ci temp ^= *(unsigned long *)(zone + sizeof(long)); 1068c2ecf20Sopenharmony_ci temp ^= *(unsigned long *)(zone + 2*sizeof(long)); 1078c2ecf20Sopenharmony_ci temp ^= *(unsigned long *)(zone + 3*sizeof(long)); 1088c2ecf20Sopenharmony_ci temp ^= *(unsigned long *)(zone + 4*sizeof(long)); 1098c2ecf20Sopenharmony_ci temp ^= *(unsigned long *)(zone + 5*sizeof(long)); 1108c2ecf20Sopenharmony_ci temp ^= *(unsigned long *)(zone + 6*sizeof(long)); 1118c2ecf20Sopenharmony_ci temp ^= *(unsigned long *)(zone + 7*sizeof(long)); 1128c2ecf20Sopenharmony_ci zone += 8*sizeof(long); 1138c2ecf20Sopenharmony_ci length -= 8*sizeof(long); 1148c2ecf20Sopenharmony_ci } 1158c2ecf20Sopenharmony_ci while (length >= 1) { 1168c2ecf20Sopenharmony_ci temp ^= *(unsigned char *)zone; 1178c2ecf20Sopenharmony_ci zone += 1; 1188c2ecf20Sopenharmony_ci length--; 1198c2ecf20Sopenharmony_ci } 1208c2ecf20Sopenharmony_ci htotal = temp; 1218c2ecf20Sopenharmony_ci} 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) 1248c2ecf20Sopenharmony_ci#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_cistatic void *mmap_large_buffer(size_t need, size_t *allocated) 1288c2ecf20Sopenharmony_ci{ 1298c2ecf20Sopenharmony_ci void *buffer; 1308c2ecf20Sopenharmony_ci size_t sz; 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci /* Attempt to use huge pages if possible. */ 1338c2ecf20Sopenharmony_ci sz = ALIGN_UP(need, map_align); 1348c2ecf20Sopenharmony_ci buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, 1358c2ecf20Sopenharmony_ci MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci if (buffer == (void *)-1) { 1388c2ecf20Sopenharmony_ci sz = need; 1398c2ecf20Sopenharmony_ci buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, 1408c2ecf20Sopenharmony_ci MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 1418c2ecf20Sopenharmony_ci if (buffer != (void *)-1) 1428c2ecf20Sopenharmony_ci fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n"); 1438c2ecf20Sopenharmony_ci } 1448c2ecf20Sopenharmony_ci *allocated = sz; 1458c2ecf20Sopenharmony_ci return buffer; 1468c2ecf20Sopenharmony_ci} 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_civoid *child_thread(void *arg) 1498c2ecf20Sopenharmony_ci{ 1508c2ecf20Sopenharmony_ci unsigned long total_mmap = 0, total = 0; 1518c2ecf20Sopenharmony_ci struct tcp_zerocopy_receive zc; 1528c2ecf20Sopenharmony_ci unsigned long delta_usec; 1538c2ecf20Sopenharmony_ci int flags = MAP_SHARED; 1548c2ecf20Sopenharmony_ci struct timeval t0, t1; 1558c2ecf20Sopenharmony_ci char *buffer = NULL; 1568c2ecf20Sopenharmony_ci void *raddr = NULL; 1578c2ecf20Sopenharmony_ci void *addr = NULL; 1588c2ecf20Sopenharmony_ci double throughput; 1598c2ecf20Sopenharmony_ci struct rusage ru; 1608c2ecf20Sopenharmony_ci size_t buffer_sz; 1618c2ecf20Sopenharmony_ci int lu, fd; 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci fd = (int)(unsigned long)arg; 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci gettimeofday(&t0, NULL); 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci fcntl(fd, F_SETFL, O_NDELAY); 1688c2ecf20Sopenharmony_ci buffer = mmap_large_buffer(chunk_size, &buffer_sz); 1698c2ecf20Sopenharmony_ci if (buffer == (void *)-1) { 1708c2ecf20Sopenharmony_ci perror("mmap"); 1718c2ecf20Sopenharmony_ci goto error; 1728c2ecf20Sopenharmony_ci } 1738c2ecf20Sopenharmony_ci if (zflg) { 1748c2ecf20Sopenharmony_ci raddr = mmap(NULL, chunk_size + map_align, PROT_READ, flags, fd, 0); 1758c2ecf20Sopenharmony_ci if (raddr == (void *)-1) { 1768c2ecf20Sopenharmony_ci perror("mmap"); 1778c2ecf20Sopenharmony_ci zflg = 0; 1788c2ecf20Sopenharmony_ci } else { 1798c2ecf20Sopenharmony_ci addr = ALIGN_PTR_UP(raddr, map_align); 1808c2ecf20Sopenharmony_ci } 1818c2ecf20Sopenharmony_ci } 1828c2ecf20Sopenharmony_ci while (1) { 1838c2ecf20Sopenharmony_ci struct pollfd pfd = { .fd = fd, .events = POLLIN, }; 1848c2ecf20Sopenharmony_ci int sub; 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci poll(&pfd, 1, 10000); 1878c2ecf20Sopenharmony_ci if (zflg) { 1888c2ecf20Sopenharmony_ci socklen_t zc_len = sizeof(zc); 1898c2ecf20Sopenharmony_ci int res; 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci memset(&zc, 0, sizeof(zc)); 1928c2ecf20Sopenharmony_ci zc.address = (__u64)((unsigned long)addr); 1938c2ecf20Sopenharmony_ci zc.length = chunk_size; 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_ci res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, 1968c2ecf20Sopenharmony_ci &zc, &zc_len); 1978c2ecf20Sopenharmony_ci if (res == -1) 1988c2ecf20Sopenharmony_ci break; 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci if (zc.length) { 2018c2ecf20Sopenharmony_ci assert(zc.length <= chunk_size); 2028c2ecf20Sopenharmony_ci total_mmap += zc.length; 2038c2ecf20Sopenharmony_ci if (xflg) 2048c2ecf20Sopenharmony_ci hash_zone(addr, zc.length); 2058c2ecf20Sopenharmony_ci /* It is more efficient to unmap the pages right now, 2068c2ecf20Sopenharmony_ci * instead of doing this in next TCP_ZEROCOPY_RECEIVE. 2078c2ecf20Sopenharmony_ci */ 2088c2ecf20Sopenharmony_ci madvise(addr, zc.length, MADV_DONTNEED); 2098c2ecf20Sopenharmony_ci total += zc.length; 2108c2ecf20Sopenharmony_ci } 2118c2ecf20Sopenharmony_ci if (zc.recv_skip_hint) { 2128c2ecf20Sopenharmony_ci assert(zc.recv_skip_hint <= chunk_size); 2138c2ecf20Sopenharmony_ci lu = read(fd, buffer, zc.recv_skip_hint); 2148c2ecf20Sopenharmony_ci if (lu > 0) { 2158c2ecf20Sopenharmony_ci if (xflg) 2168c2ecf20Sopenharmony_ci hash_zone(buffer, lu); 2178c2ecf20Sopenharmony_ci total += lu; 2188c2ecf20Sopenharmony_ci } 2198c2ecf20Sopenharmony_ci } 2208c2ecf20Sopenharmony_ci continue; 2218c2ecf20Sopenharmony_ci } 2228c2ecf20Sopenharmony_ci sub = 0; 2238c2ecf20Sopenharmony_ci while (sub < chunk_size) { 2248c2ecf20Sopenharmony_ci lu = read(fd, buffer + sub, chunk_size - sub); 2258c2ecf20Sopenharmony_ci if (lu == 0) 2268c2ecf20Sopenharmony_ci goto end; 2278c2ecf20Sopenharmony_ci if (lu < 0) 2288c2ecf20Sopenharmony_ci break; 2298c2ecf20Sopenharmony_ci if (xflg) 2308c2ecf20Sopenharmony_ci hash_zone(buffer + sub, lu); 2318c2ecf20Sopenharmony_ci total += lu; 2328c2ecf20Sopenharmony_ci sub += lu; 2338c2ecf20Sopenharmony_ci } 2348c2ecf20Sopenharmony_ci } 2358c2ecf20Sopenharmony_ciend: 2368c2ecf20Sopenharmony_ci gettimeofday(&t1, NULL); 2378c2ecf20Sopenharmony_ci delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec; 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci throughput = 0; 2408c2ecf20Sopenharmony_ci if (delta_usec) 2418c2ecf20Sopenharmony_ci throughput = total * 8.0 / (double)delta_usec / 1000.0; 2428c2ecf20Sopenharmony_ci getrusage(RUSAGE_THREAD, &ru); 2438c2ecf20Sopenharmony_ci if (total > 1024*1024) { 2448c2ecf20Sopenharmony_ci unsigned long total_usec; 2458c2ecf20Sopenharmony_ci unsigned long mb = total >> 20; 2468c2ecf20Sopenharmony_ci total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec + 2478c2ecf20Sopenharmony_ci 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec; 2488c2ecf20Sopenharmony_ci printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n" 2498c2ecf20Sopenharmony_ci " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n", 2508c2ecf20Sopenharmony_ci total / (1024.0 * 1024.0), 2518c2ecf20Sopenharmony_ci 100.0*total_mmap/total, 2528c2ecf20Sopenharmony_ci (double)delta_usec / 1000000.0, 2538c2ecf20Sopenharmony_ci throughput, 2548c2ecf20Sopenharmony_ci (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0, 2558c2ecf20Sopenharmony_ci (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0, 2568c2ecf20Sopenharmony_ci (double)total_usec/mb, 2578c2ecf20Sopenharmony_ci ru.ru_nvcsw); 2588c2ecf20Sopenharmony_ci } 2598c2ecf20Sopenharmony_cierror: 2608c2ecf20Sopenharmony_ci munmap(buffer, buffer_sz); 2618c2ecf20Sopenharmony_ci close(fd); 2628c2ecf20Sopenharmony_ci if (zflg) 2638c2ecf20Sopenharmony_ci munmap(raddr, chunk_size + map_align); 2648c2ecf20Sopenharmony_ci pthread_exit(0); 2658c2ecf20Sopenharmony_ci} 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_cistatic void apply_rcvsnd_buf(int fd) 2688c2ecf20Sopenharmony_ci{ 2698c2ecf20Sopenharmony_ci if (rcvbuf && setsockopt(fd, SOL_SOCKET, 2708c2ecf20Sopenharmony_ci SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) { 2718c2ecf20Sopenharmony_ci perror("setsockopt SO_RCVBUF"); 2728c2ecf20Sopenharmony_ci } 2738c2ecf20Sopenharmony_ci 2748c2ecf20Sopenharmony_ci if (sndbuf && setsockopt(fd, SOL_SOCKET, 2758c2ecf20Sopenharmony_ci SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) { 2768c2ecf20Sopenharmony_ci perror("setsockopt SO_SNDBUF"); 2778c2ecf20Sopenharmony_ci } 2788c2ecf20Sopenharmony_ci} 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_cistatic void setup_sockaddr(int domain, const char *str_addr, 2828c2ecf20Sopenharmony_ci struct sockaddr_storage *sockaddr) 2838c2ecf20Sopenharmony_ci{ 2848c2ecf20Sopenharmony_ci struct sockaddr_in6 *addr6 = (void *) sockaddr; 2858c2ecf20Sopenharmony_ci struct sockaddr_in *addr4 = (void *) sockaddr; 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci switch (domain) { 2888c2ecf20Sopenharmony_ci case PF_INET: 2898c2ecf20Sopenharmony_ci memset(addr4, 0, sizeof(*addr4)); 2908c2ecf20Sopenharmony_ci addr4->sin_family = AF_INET; 2918c2ecf20Sopenharmony_ci addr4->sin_port = htons(cfg_port); 2928c2ecf20Sopenharmony_ci if (str_addr && 2938c2ecf20Sopenharmony_ci inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) 2948c2ecf20Sopenharmony_ci error(1, 0, "ipv4 parse error: %s", str_addr); 2958c2ecf20Sopenharmony_ci break; 2968c2ecf20Sopenharmony_ci case PF_INET6: 2978c2ecf20Sopenharmony_ci memset(addr6, 0, sizeof(*addr6)); 2988c2ecf20Sopenharmony_ci addr6->sin6_family = AF_INET6; 2998c2ecf20Sopenharmony_ci addr6->sin6_port = htons(cfg_port); 3008c2ecf20Sopenharmony_ci if (str_addr && 3018c2ecf20Sopenharmony_ci inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) 3028c2ecf20Sopenharmony_ci error(1, 0, "ipv6 parse error: %s", str_addr); 3038c2ecf20Sopenharmony_ci break; 3048c2ecf20Sopenharmony_ci default: 3058c2ecf20Sopenharmony_ci error(1, 0, "illegal domain"); 3068c2ecf20Sopenharmony_ci } 3078c2ecf20Sopenharmony_ci} 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_cistatic void do_accept(int fdlisten) 3108c2ecf20Sopenharmony_ci{ 3118c2ecf20Sopenharmony_ci pthread_attr_t attr; 3128c2ecf20Sopenharmony_ci int rcvlowat; 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci pthread_attr_init(&attr); 3158c2ecf20Sopenharmony_ci pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci rcvlowat = chunk_size; 3188c2ecf20Sopenharmony_ci if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT, 3198c2ecf20Sopenharmony_ci &rcvlowat, sizeof(rcvlowat)) == -1) { 3208c2ecf20Sopenharmony_ci perror("setsockopt SO_RCVLOWAT"); 3218c2ecf20Sopenharmony_ci } 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_ci apply_rcvsnd_buf(fdlisten); 3248c2ecf20Sopenharmony_ci 3258c2ecf20Sopenharmony_ci while (1) { 3268c2ecf20Sopenharmony_ci struct sockaddr_in addr; 3278c2ecf20Sopenharmony_ci socklen_t addrlen = sizeof(addr); 3288c2ecf20Sopenharmony_ci pthread_t th; 3298c2ecf20Sopenharmony_ci int fd, res; 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen); 3328c2ecf20Sopenharmony_ci if (fd == -1) { 3338c2ecf20Sopenharmony_ci perror("accept"); 3348c2ecf20Sopenharmony_ci continue; 3358c2ecf20Sopenharmony_ci } 3368c2ecf20Sopenharmony_ci res = pthread_create(&th, &attr, child_thread, 3378c2ecf20Sopenharmony_ci (void *)(unsigned long)fd); 3388c2ecf20Sopenharmony_ci if (res) { 3398c2ecf20Sopenharmony_ci errno = res; 3408c2ecf20Sopenharmony_ci perror("pthread_create"); 3418c2ecf20Sopenharmony_ci close(fd); 3428c2ecf20Sopenharmony_ci } 3438c2ecf20Sopenharmony_ci } 3448c2ecf20Sopenharmony_ci} 3458c2ecf20Sopenharmony_ci 3468c2ecf20Sopenharmony_ci/* Each thread should reserve a big enough vma to avoid 3478c2ecf20Sopenharmony_ci * spinlock collisions in ptl locks. 3488c2ecf20Sopenharmony_ci * This size is 2MB on x86_64, and is exported in /proc/meminfo. 3498c2ecf20Sopenharmony_ci */ 3508c2ecf20Sopenharmony_cistatic unsigned long default_huge_page_size(void) 3518c2ecf20Sopenharmony_ci{ 3528c2ecf20Sopenharmony_ci FILE *f = fopen("/proc/meminfo", "r"); 3538c2ecf20Sopenharmony_ci unsigned long hps = 0; 3548c2ecf20Sopenharmony_ci size_t linelen = 0; 3558c2ecf20Sopenharmony_ci char *line = NULL; 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci if (!f) 3588c2ecf20Sopenharmony_ci return 0; 3598c2ecf20Sopenharmony_ci while (getline(&line, &linelen, f) > 0) { 3608c2ecf20Sopenharmony_ci if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { 3618c2ecf20Sopenharmony_ci hps <<= 10; 3628c2ecf20Sopenharmony_ci break; 3638c2ecf20Sopenharmony_ci } 3648c2ecf20Sopenharmony_ci } 3658c2ecf20Sopenharmony_ci free(line); 3668c2ecf20Sopenharmony_ci fclose(f); 3678c2ecf20Sopenharmony_ci return hps; 3688c2ecf20Sopenharmony_ci} 3698c2ecf20Sopenharmony_ci 3708c2ecf20Sopenharmony_ciint main(int argc, char *argv[]) 3718c2ecf20Sopenharmony_ci{ 3728c2ecf20Sopenharmony_ci struct sockaddr_storage listenaddr, addr; 3738c2ecf20Sopenharmony_ci unsigned int max_pacing_rate = 0; 3748c2ecf20Sopenharmony_ci uint64_t total = 0; 3758c2ecf20Sopenharmony_ci char *host = NULL; 3768c2ecf20Sopenharmony_ci int fd, c, on = 1; 3778c2ecf20Sopenharmony_ci size_t buffer_sz; 3788c2ecf20Sopenharmony_ci char *buffer; 3798c2ecf20Sopenharmony_ci int sflg = 0; 3808c2ecf20Sopenharmony_ci int mss = 0; 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) { 3838c2ecf20Sopenharmony_ci switch (c) { 3848c2ecf20Sopenharmony_ci case '4': 3858c2ecf20Sopenharmony_ci cfg_family = PF_INET; 3868c2ecf20Sopenharmony_ci cfg_alen = sizeof(struct sockaddr_in); 3878c2ecf20Sopenharmony_ci break; 3888c2ecf20Sopenharmony_ci case '6': 3898c2ecf20Sopenharmony_ci cfg_family = PF_INET6; 3908c2ecf20Sopenharmony_ci cfg_alen = sizeof(struct sockaddr_in6); 3918c2ecf20Sopenharmony_ci break; 3928c2ecf20Sopenharmony_ci case 'p': 3938c2ecf20Sopenharmony_ci cfg_port = atoi(optarg); 3948c2ecf20Sopenharmony_ci break; 3958c2ecf20Sopenharmony_ci case 'H': 3968c2ecf20Sopenharmony_ci host = optarg; 3978c2ecf20Sopenharmony_ci break; 3988c2ecf20Sopenharmony_ci case 's': /* server : listen for incoming connections */ 3998c2ecf20Sopenharmony_ci sflg++; 4008c2ecf20Sopenharmony_ci break; 4018c2ecf20Sopenharmony_ci case 'r': 4028c2ecf20Sopenharmony_ci rcvbuf = atoi(optarg); 4038c2ecf20Sopenharmony_ci break; 4048c2ecf20Sopenharmony_ci case 'w': 4058c2ecf20Sopenharmony_ci sndbuf = atoi(optarg); 4068c2ecf20Sopenharmony_ci break; 4078c2ecf20Sopenharmony_ci case 'z': 4088c2ecf20Sopenharmony_ci zflg = 1; 4098c2ecf20Sopenharmony_ci break; 4108c2ecf20Sopenharmony_ci case 'M': 4118c2ecf20Sopenharmony_ci mss = atoi(optarg); 4128c2ecf20Sopenharmony_ci break; 4138c2ecf20Sopenharmony_ci case 'x': 4148c2ecf20Sopenharmony_ci xflg = 1; 4158c2ecf20Sopenharmony_ci break; 4168c2ecf20Sopenharmony_ci case 'k': 4178c2ecf20Sopenharmony_ci keepflag = 1; 4188c2ecf20Sopenharmony_ci break; 4198c2ecf20Sopenharmony_ci case 'P': 4208c2ecf20Sopenharmony_ci max_pacing_rate = atoi(optarg) ; 4218c2ecf20Sopenharmony_ci break; 4228c2ecf20Sopenharmony_ci case 'C': 4238c2ecf20Sopenharmony_ci chunk_size = atol(optarg); 4248c2ecf20Sopenharmony_ci break; 4258c2ecf20Sopenharmony_ci case 'a': 4268c2ecf20Sopenharmony_ci map_align = atol(optarg); 4278c2ecf20Sopenharmony_ci break; 4288c2ecf20Sopenharmony_ci default: 4298c2ecf20Sopenharmony_ci exit(1); 4308c2ecf20Sopenharmony_ci } 4318c2ecf20Sopenharmony_ci } 4328c2ecf20Sopenharmony_ci if (!map_align) { 4338c2ecf20Sopenharmony_ci map_align = default_huge_page_size(); 4348c2ecf20Sopenharmony_ci /* if really /proc/meminfo is not helping, 4358c2ecf20Sopenharmony_ci * we use the default x86_64 hugepagesize. 4368c2ecf20Sopenharmony_ci */ 4378c2ecf20Sopenharmony_ci if (!map_align) 4388c2ecf20Sopenharmony_ci map_align = 2*1024*1024; 4398c2ecf20Sopenharmony_ci } 4408c2ecf20Sopenharmony_ci if (sflg) { 4418c2ecf20Sopenharmony_ci int fdlisten = socket(cfg_family, SOCK_STREAM, 0); 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci if (fdlisten == -1) { 4448c2ecf20Sopenharmony_ci perror("socket"); 4458c2ecf20Sopenharmony_ci exit(1); 4468c2ecf20Sopenharmony_ci } 4478c2ecf20Sopenharmony_ci apply_rcvsnd_buf(fdlisten); 4488c2ecf20Sopenharmony_ci setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)); 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci setup_sockaddr(cfg_family, host, &listenaddr); 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci if (mss && 4538c2ecf20Sopenharmony_ci setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG, 4548c2ecf20Sopenharmony_ci &mss, sizeof(mss)) == -1) { 4558c2ecf20Sopenharmony_ci perror("setsockopt TCP_MAXSEG"); 4568c2ecf20Sopenharmony_ci exit(1); 4578c2ecf20Sopenharmony_ci } 4588c2ecf20Sopenharmony_ci if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) { 4598c2ecf20Sopenharmony_ci perror("bind"); 4608c2ecf20Sopenharmony_ci exit(1); 4618c2ecf20Sopenharmony_ci } 4628c2ecf20Sopenharmony_ci if (listen(fdlisten, 128) == -1) { 4638c2ecf20Sopenharmony_ci perror("listen"); 4648c2ecf20Sopenharmony_ci exit(1); 4658c2ecf20Sopenharmony_ci } 4668c2ecf20Sopenharmony_ci do_accept(fdlisten); 4678c2ecf20Sopenharmony_ci } 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci buffer = mmap_large_buffer(chunk_size, &buffer_sz); 4708c2ecf20Sopenharmony_ci if (buffer == (char *)-1) { 4718c2ecf20Sopenharmony_ci perror("mmap"); 4728c2ecf20Sopenharmony_ci exit(1); 4738c2ecf20Sopenharmony_ci } 4748c2ecf20Sopenharmony_ci 4758c2ecf20Sopenharmony_ci fd = socket(cfg_family, SOCK_STREAM, 0); 4768c2ecf20Sopenharmony_ci if (fd == -1) { 4778c2ecf20Sopenharmony_ci perror("socket"); 4788c2ecf20Sopenharmony_ci exit(1); 4798c2ecf20Sopenharmony_ci } 4808c2ecf20Sopenharmony_ci apply_rcvsnd_buf(fd); 4818c2ecf20Sopenharmony_ci 4828c2ecf20Sopenharmony_ci setup_sockaddr(cfg_family, host, &addr); 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci if (mss && 4858c2ecf20Sopenharmony_ci setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) { 4868c2ecf20Sopenharmony_ci perror("setsockopt TCP_MAXSEG"); 4878c2ecf20Sopenharmony_ci exit(1); 4888c2ecf20Sopenharmony_ci } 4898c2ecf20Sopenharmony_ci if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) { 4908c2ecf20Sopenharmony_ci perror("connect"); 4918c2ecf20Sopenharmony_ci exit(1); 4928c2ecf20Sopenharmony_ci } 4938c2ecf20Sopenharmony_ci if (max_pacing_rate && 4948c2ecf20Sopenharmony_ci setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE, 4958c2ecf20Sopenharmony_ci &max_pacing_rate, sizeof(max_pacing_rate)) == -1) 4968c2ecf20Sopenharmony_ci perror("setsockopt SO_MAX_PACING_RATE"); 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 4998c2ecf20Sopenharmony_ci &on, sizeof(on)) == -1) { 5008c2ecf20Sopenharmony_ci perror("setsockopt SO_ZEROCOPY, (-z option disabled)"); 5018c2ecf20Sopenharmony_ci zflg = 0; 5028c2ecf20Sopenharmony_ci } 5038c2ecf20Sopenharmony_ci while (total < FILE_SZ) { 5048c2ecf20Sopenharmony_ci int64_t wr = FILE_SZ - total; 5058c2ecf20Sopenharmony_ci 5068c2ecf20Sopenharmony_ci if (wr > chunk_size) 5078c2ecf20Sopenharmony_ci wr = chunk_size; 5088c2ecf20Sopenharmony_ci /* Note : we just want to fill the pipe with 0 bytes */ 5098c2ecf20Sopenharmony_ci wr = send(fd, buffer, (size_t)wr, zflg ? MSG_ZEROCOPY : 0); 5108c2ecf20Sopenharmony_ci if (wr <= 0) 5118c2ecf20Sopenharmony_ci break; 5128c2ecf20Sopenharmony_ci total += wr; 5138c2ecf20Sopenharmony_ci } 5148c2ecf20Sopenharmony_ci close(fd); 5158c2ecf20Sopenharmony_ci munmap(buffer, buffer_sz); 5168c2ecf20Sopenharmony_ci return 0; 5178c2ecf20Sopenharmony_ci} 518