162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * hugepage-mremap:
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Example of remapping huge page memory in a user application using the
662306a36Sopenharmony_ci * mremap system call.  The path to a file in a hugetlbfs filesystem must
762306a36Sopenharmony_ci * be passed as the last argument to this test.  The amount of memory used
862306a36Sopenharmony_ci * by this test in MBs can optionally be passed as an argument.  If no memory
962306a36Sopenharmony_ci * amount is passed, the default amount is 10MB.
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * To make sure the test triggers pmd sharing and goes through the 'unshare'
1262306a36Sopenharmony_ci * path in the mremap code use 1GB (1024) or more.
1362306a36Sopenharmony_ci */
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#define _GNU_SOURCE
1662306a36Sopenharmony_ci#include <stdlib.h>
1762306a36Sopenharmony_ci#include <stdio.h>
1862306a36Sopenharmony_ci#include <unistd.h>
1962306a36Sopenharmony_ci#include <sys/mman.h>
2062306a36Sopenharmony_ci#include <errno.h>
2162306a36Sopenharmony_ci#include <fcntl.h> /* Definition of O_* constants */
2262306a36Sopenharmony_ci#include <sys/syscall.h> /* Definition of SYS_* constants */
2362306a36Sopenharmony_ci#include <linux/userfaultfd.h>
2462306a36Sopenharmony_ci#include <sys/ioctl.h>
2562306a36Sopenharmony_ci#include <string.h>
2662306a36Sopenharmony_ci#include <stdbool.h>
2762306a36Sopenharmony_ci#include "vm_util.h"
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#define DEFAULT_LENGTH_MB 10UL
3062306a36Sopenharmony_ci#define MB_TO_BYTES(x) (x * 1024 * 1024)
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
3362306a36Sopenharmony_ci#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_cistatic void check_bytes(char *addr)
3662306a36Sopenharmony_ci{
3762306a36Sopenharmony_ci	printf("First hex is %x\n", *((unsigned int *)addr));
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic void write_bytes(char *addr, size_t len)
4162306a36Sopenharmony_ci{
4262306a36Sopenharmony_ci	unsigned long i;
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_ci	for (i = 0; i < len; i++)
4562306a36Sopenharmony_ci		*(addr + i) = (char)i;
4662306a36Sopenharmony_ci}
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_cistatic int read_bytes(char *addr, size_t len)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	unsigned long i;
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	check_bytes(addr);
5362306a36Sopenharmony_ci	for (i = 0; i < len; i++)
5462306a36Sopenharmony_ci		if (*(addr + i) != (char)i) {
5562306a36Sopenharmony_ci			printf("Mismatch at %lu\n", i);
5662306a36Sopenharmony_ci			return 1;
5762306a36Sopenharmony_ci		}
5862306a36Sopenharmony_ci	return 0;
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_cistatic void register_region_with_uffd(char *addr, size_t len)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	long uffd; /* userfaultfd file descriptor */
6462306a36Sopenharmony_ci	struct uffdio_api uffdio_api;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	/* Create and enable userfaultfd object. */
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
6962306a36Sopenharmony_ci	if (uffd == -1) {
7062306a36Sopenharmony_ci		perror("userfaultfd");
7162306a36Sopenharmony_ci		exit(1);
7262306a36Sopenharmony_ci	}
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	uffdio_api.api = UFFD_API;
7562306a36Sopenharmony_ci	uffdio_api.features = 0;
7662306a36Sopenharmony_ci	if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
7762306a36Sopenharmony_ci		perror("ioctl-UFFDIO_API");
7862306a36Sopenharmony_ci		exit(1);
7962306a36Sopenharmony_ci	}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci	/* Create a private anonymous mapping. The memory will be
8262306a36Sopenharmony_ci	 * demand-zero paged--that is, not yet allocated. When we
8362306a36Sopenharmony_ci	 * actually touch the memory, it will be allocated via
8462306a36Sopenharmony_ci	 * the userfaultfd.
8562306a36Sopenharmony_ci	 */
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci	addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
8862306a36Sopenharmony_ci		    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
8962306a36Sopenharmony_ci	if (addr == MAP_FAILED) {
9062306a36Sopenharmony_ci		perror("mmap");
9162306a36Sopenharmony_ci		exit(1);
9262306a36Sopenharmony_ci	}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	printf("Address returned by mmap() = %p\n", addr);
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	/* Register the memory range of the mapping we just created for
9762306a36Sopenharmony_ci	 * handling by the userfaultfd object. In mode, we request to track
9862306a36Sopenharmony_ci	 * missing pages (i.e., pages that have not yet been faulted in).
9962306a36Sopenharmony_ci	 */
10062306a36Sopenharmony_ci	if (uffd_register(uffd, addr, len, true, false, false)) {
10162306a36Sopenharmony_ci		perror("ioctl-UFFDIO_REGISTER");
10262306a36Sopenharmony_ci		exit(1);
10362306a36Sopenharmony_ci	}
10462306a36Sopenharmony_ci}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ciint main(int argc, char *argv[])
10762306a36Sopenharmony_ci{
10862306a36Sopenharmony_ci	size_t length = 0;
10962306a36Sopenharmony_ci	int ret = 0, fd;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	if (argc >= 2 && !strcmp(argv[1], "-h")) {
11262306a36Sopenharmony_ci		printf("Usage: %s [length_in_MB]\n", argv[0]);
11362306a36Sopenharmony_ci		exit(1);
11462306a36Sopenharmony_ci	}
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	/* Read memory length as the first arg if valid, otherwise fallback to
11762306a36Sopenharmony_ci	 * the default length.
11862306a36Sopenharmony_ci	 */
11962306a36Sopenharmony_ci	if (argc >= 2)
12062306a36Sopenharmony_ci		length = (size_t)atoi(argv[1]);
12162306a36Sopenharmony_ci	else
12262306a36Sopenharmony_ci		length = DEFAULT_LENGTH_MB;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	length = MB_TO_BYTES(length);
12562306a36Sopenharmony_ci	fd = memfd_create(argv[0], MFD_HUGETLB);
12662306a36Sopenharmony_ci	if (fd < 0) {
12762306a36Sopenharmony_ci		perror("Open failed");
12862306a36Sopenharmony_ci		exit(1);
12962306a36Sopenharmony_ci	}
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_ci	/* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
13262306a36Sopenharmony_ci	unsigned long suggested_addr = 0x7eaa40000000;
13362306a36Sopenharmony_ci	void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
13462306a36Sopenharmony_ci			   MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
13562306a36Sopenharmony_ci	printf("Map haddr: Returned address is %p\n", haddr);
13662306a36Sopenharmony_ci	if (haddr == MAP_FAILED) {
13762306a36Sopenharmony_ci		perror("mmap1");
13862306a36Sopenharmony_ci		exit(1);
13962306a36Sopenharmony_ci	}
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	/* mmap again to a dummy address to hopefully trigger pmd sharing. */
14262306a36Sopenharmony_ci	suggested_addr = 0x7daa40000000;
14362306a36Sopenharmony_ci	void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
14462306a36Sopenharmony_ci			   MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
14562306a36Sopenharmony_ci	printf("Map daddr: Returned address is %p\n", daddr);
14662306a36Sopenharmony_ci	if (daddr == MAP_FAILED) {
14762306a36Sopenharmony_ci		perror("mmap3");
14862306a36Sopenharmony_ci		exit(1);
14962306a36Sopenharmony_ci	}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	suggested_addr = 0x7faa40000000;
15262306a36Sopenharmony_ci	void *vaddr =
15362306a36Sopenharmony_ci		mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
15462306a36Sopenharmony_ci	printf("Map vaddr: Returned address is %p\n", vaddr);
15562306a36Sopenharmony_ci	if (vaddr == MAP_FAILED) {
15662306a36Sopenharmony_ci		perror("mmap2");
15762306a36Sopenharmony_ci		exit(1);
15862306a36Sopenharmony_ci	}
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	register_region_with_uffd(haddr, length);
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	void *addr = mremap(haddr, length, length,
16362306a36Sopenharmony_ci			    MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
16462306a36Sopenharmony_ci	if (addr == MAP_FAILED) {
16562306a36Sopenharmony_ci		perror("mremap");
16662306a36Sopenharmony_ci		exit(1);
16762306a36Sopenharmony_ci	}
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	printf("Mremap: Returned address is %p\n", addr);
17062306a36Sopenharmony_ci	check_bytes(addr);
17162306a36Sopenharmony_ci	write_bytes(addr, length);
17262306a36Sopenharmony_ci	ret = read_bytes(addr, length);
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	munmap(addr, length);
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	addr = mremap(addr, length, length, 0);
17762306a36Sopenharmony_ci	if (addr != MAP_FAILED) {
17862306a36Sopenharmony_ci		printf("mremap: Expected failure, but call succeeded\n");
17962306a36Sopenharmony_ci		exit(1);
18062306a36Sopenharmony_ci	}
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	close(fd);
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci	return ret;
18562306a36Sopenharmony_ci}
186