1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright 2020 Google LLC
4 */
5#define _GNU_SOURCE
6
7#include <errno.h>
8#include <stdlib.h>
9#include <stdio.h>
10#include <string.h>
11#include <sys/mman.h>
12#include <time.h>
13#include <stdbool.h>
14
15#include "../kselftest.h"
16
17#define EXPECT_SUCCESS 0
18#define EXPECT_FAILURE 1
19#define NON_OVERLAPPING 0
20#define OVERLAPPING 1
21#define NS_PER_SEC 1000000000ULL
22#define VALIDATION_DEFAULT_THRESHOLD 4	/* 4MB */
23#define VALIDATION_NO_THRESHOLD 0	/* Verify the entire region */
24
25#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
26
27struct config {
28	unsigned long long src_alignment;
29	unsigned long long dest_alignment;
30	unsigned long long region_size;
31	int overlapping;
32};
33
34struct test {
35	const char *name;
36	struct config config;
37	int expect_failure;
38};
39
40enum {
41	_1KB = 1ULL << 10,	/* 1KB -> not page aligned */
42	_4KB = 4ULL << 10,
43	_8KB = 8ULL << 10,
44	_1MB = 1ULL << 20,
45	_2MB = 2ULL << 20,
46	_4MB = 4ULL << 20,
47	_1GB = 1ULL << 30,
48	_2GB = 2ULL << 30,
49	PMD = _2MB,
50	PUD = _1GB,
51};
52
53#define PTE page_size
54
55#define MAKE_TEST(source_align, destination_align, size,	\
56		  overlaps, should_fail, test_name)		\
57(struct test){							\
58	.name = test_name,					\
59	.config = {						\
60		.src_alignment = source_align,			\
61		.dest_alignment = destination_align,		\
62		.region_size = size,				\
63		.overlapping = overlaps,			\
64	},							\
65	.expect_failure = should_fail				\
66}
67
68/*
69 * Returns false if the requested remap region overlaps with an
70 * existing mapping (e.g text, stack) else returns true.
71 */
72static bool is_remap_region_valid(void *addr, unsigned long long size)
73{
74	void *remap_addr = NULL;
75	bool ret = true;
76
77	/* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
78	remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
79					 MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
80					 -1, 0);
81
82	if (remap_addr == MAP_FAILED) {
83		if (errno == EEXIST)
84			ret = false;
85	} else {
86		munmap(remap_addr, size);
87	}
88
89	return ret;
90}
91
92/* Returns mmap_min_addr sysctl tunable from procfs */
93static unsigned long long get_mmap_min_addr(void)
94{
95	FILE *fp;
96	int n_matched;
97	static unsigned long long addr;
98
99	if (addr)
100		return addr;
101
102	fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
103	if (fp == NULL) {
104		ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
105			strerror(errno));
106		exit(KSFT_SKIP);
107	}
108
109	n_matched = fscanf(fp, "%llu", &addr);
110	if (n_matched != 1) {
111		ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
112			strerror(errno));
113		fclose(fp);
114		exit(KSFT_SKIP);
115	}
116
117	fclose(fp);
118	return addr;
119}
120
121/*
122 * Using /proc/self/maps, assert that the specified address range is contained
123 * within a single mapping.
124 */
125static bool is_range_mapped(FILE *maps_fp, void *start, void *end)
126{
127	char *line = NULL;
128	size_t len = 0;
129	bool success = false;
130
131	rewind(maps_fp);
132
133	while (getline(&line, &len, maps_fp) != -1) {
134		char *first = strtok(line, "- ");
135		void *first_val = (void *)strtol(first, NULL, 16);
136		char *second = strtok(NULL, "- ");
137		void *second_val = (void *) strtol(second, NULL, 16);
138
139		if (first_val <= start && second_val >= end) {
140			success = true;
141			break;
142		}
143	}
144
145	return success;
146}
147
148/*
149 * This test validates that merge is called when expanding a mapping.
150 * Mapping containing three pages is created, middle page is unmapped
151 * and then the mapping containing the first page is expanded so that
152 * it fills the created hole. The two parts should merge creating
153 * single mapping with three pages.
154 */
155static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size)
156{
157	char *test_name = "mremap expand merge";
158	bool success = false;
159	char *remap, *start;
160
161	start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
162		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
163
164	if (start == MAP_FAILED) {
165		ksft_print_msg("mmap failed: %s\n", strerror(errno));
166		goto out;
167	}
168
169	munmap(start + page_size, page_size);
170	remap = mremap(start, page_size, 2 * page_size, 0);
171	if (remap == MAP_FAILED) {
172		ksft_print_msg("mremap failed: %s\n", strerror(errno));
173		munmap(start, page_size);
174		munmap(start + 2 * page_size, page_size);
175		goto out;
176	}
177
178	success = is_range_mapped(maps_fp, start, start + 3 * page_size);
179	munmap(start, 3 * page_size);
180
181out:
182	if (success)
183		ksft_test_result_pass("%s\n", test_name);
184	else
185		ksft_test_result_fail("%s\n", test_name);
186}
187
188/*
189 * Similar to mremap_expand_merge() except instead of removing the middle page,
190 * we remove the last then attempt to remap offset from the second page. This
191 * should result in the mapping being restored to its former state.
192 */
193static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size)
194{
195
196	char *test_name = "mremap expand merge offset";
197	bool success = false;
198	char *remap, *start;
199
200	start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
201		     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
202
203	if (start == MAP_FAILED) {
204		ksft_print_msg("mmap failed: %s\n", strerror(errno));
205		goto out;
206	}
207
208	/* Unmap final page to ensure we have space to expand. */
209	munmap(start + 2 * page_size, page_size);
210	remap = mremap(start + page_size, page_size, 2 * page_size, 0);
211	if (remap == MAP_FAILED) {
212		ksft_print_msg("mremap failed: %s\n", strerror(errno));
213		munmap(start, 2 * page_size);
214		goto out;
215	}
216
217	success = is_range_mapped(maps_fp, start, start + 3 * page_size);
218	munmap(start, 3 * page_size);
219
220out:
221	if (success)
222		ksft_test_result_pass("%s\n", test_name);
223	else
224		ksft_test_result_fail("%s\n", test_name);
225}
226
227/*
228 * Returns the start address of the mapping on success, else returns
229 * NULL on failure.
230 */
231static void *get_source_mapping(struct config c)
232{
233	unsigned long long addr = 0ULL;
234	void *src_addr = NULL;
235	unsigned long long mmap_min_addr;
236
237	mmap_min_addr = get_mmap_min_addr();
238
239retry:
240	addr += c.src_alignment;
241	if (addr < mmap_min_addr)
242		goto retry;
243
244	src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
245					MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
246					-1, 0);
247	if (src_addr == MAP_FAILED) {
248		if (errno == EPERM || errno == EEXIST)
249			goto retry;
250		goto error;
251	}
252	/*
253	 * Check that the address is aligned to the specified alignment.
254	 * Addresses which have alignments that are multiples of that
255	 * specified are not considered valid. For instance, 1GB address is
256	 * 2MB-aligned, however it will not be considered valid for a
257	 * requested alignment of 2MB. This is done to reduce coincidental
258	 * alignment in the tests.
259	 */
260	if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
261			!((unsigned long long) src_addr & c.src_alignment)) {
262		munmap(src_addr, c.region_size);
263		goto retry;
264	}
265
266	if (!src_addr)
267		goto error;
268
269	return src_addr;
270error:
271	ksft_print_msg("Failed to map source region: %s\n",
272			strerror(errno));
273	return NULL;
274}
275
276/* Returns the time taken for the remap on success else returns -1. */
277static long long remap_region(struct config c, unsigned int threshold_mb,
278			      char pattern_seed)
279{
280	void *addr, *src_addr, *dest_addr;
281	unsigned long long i;
282	struct timespec t_start = {0, 0}, t_end = {0, 0};
283	long long  start_ns, end_ns, align_mask, ret, offset;
284	unsigned long long threshold;
285
286	if (threshold_mb == VALIDATION_NO_THRESHOLD)
287		threshold = c.region_size;
288	else
289		threshold = MIN(threshold_mb * _1MB, c.region_size);
290
291	src_addr = get_source_mapping(c);
292	if (!src_addr) {
293		ret = -1;
294		goto out;
295	}
296
297	/* Set byte pattern */
298	srand(pattern_seed);
299	for (i = 0; i < threshold; i++)
300		memset((char *) src_addr + i, (char) rand(), 1);
301
302	/* Mask to zero out lower bits of address for alignment */
303	align_mask = ~(c.dest_alignment - 1);
304	/* Offset of destination address from the end of the source region */
305	offset = (c.overlapping) ? -c.dest_alignment : c.dest_alignment;
306	addr = (void *) (((unsigned long long) src_addr + c.region_size
307			  + offset) & align_mask);
308
309	/* See comment in get_source_mapping() */
310	if (!((unsigned long long) addr & c.dest_alignment))
311		addr = (void *) ((unsigned long long) addr | c.dest_alignment);
312
313	/* Don't destroy existing mappings unless expected to overlap */
314	while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
315		/* Check for unsigned overflow */
316		if (addr + c.dest_alignment < addr) {
317			ksft_print_msg("Couldn't find a valid region to remap to\n");
318			ret = -1;
319			goto out;
320		}
321		addr += c.dest_alignment;
322	}
323
324	clock_gettime(CLOCK_MONOTONIC, &t_start);
325	dest_addr = mremap(src_addr, c.region_size, c.region_size,
326					  MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
327	clock_gettime(CLOCK_MONOTONIC, &t_end);
328
329	if (dest_addr == MAP_FAILED) {
330		ksft_print_msg("mremap failed: %s\n", strerror(errno));
331		ret = -1;
332		goto clean_up_src;
333	}
334
335	/* Verify byte pattern after remapping */
336	srand(pattern_seed);
337	for (i = 0; i < threshold; i++) {
338		char c = (char) rand();
339
340		if (((char *) dest_addr)[i] != c) {
341			ksft_print_msg("Data after remap doesn't match at offset %d\n",
342				       i);
343			ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
344					((char *) dest_addr)[i] & 0xff);
345			ret = -1;
346			goto clean_up_dest;
347		}
348	}
349
350	start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
351	end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
352	ret = end_ns - start_ns;
353
354/*
355 * Since the destination address is specified using MREMAP_FIXED, subsequent
356 * mremap will unmap any previous mapping at the address range specified by
357 * dest_addr and region_size. This significantly affects the remap time of
358 * subsequent tests. So we clean up mappings after each test.
359 */
360clean_up_dest:
361	munmap(dest_addr, c.region_size);
362clean_up_src:
363	munmap(src_addr, c.region_size);
364out:
365	return ret;
366}
367
368static void run_mremap_test_case(struct test test_case, int *failures,
369				 unsigned int threshold_mb,
370				 unsigned int pattern_seed)
371{
372	long long remap_time = remap_region(test_case.config, threshold_mb,
373					    pattern_seed);
374
375	if (remap_time < 0) {
376		if (test_case.expect_failure)
377			ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
378					      test_case.name);
379		else {
380			ksft_test_result_fail("%s\n", test_case.name);
381			*failures += 1;
382		}
383	} else {
384		/*
385		 * Comparing mremap time is only applicable if entire region
386		 * was faulted in.
387		 */
388		if (threshold_mb == VALIDATION_NO_THRESHOLD ||
389		    test_case.config.region_size <= threshold_mb * _1MB)
390			ksft_test_result_pass("%s\n\tmremap time: %12lldns\n",
391					      test_case.name, remap_time);
392		else
393			ksft_test_result_pass("%s\n", test_case.name);
394	}
395}
396
397static void usage(const char *cmd)
398{
399	fprintf(stderr,
400		"Usage: %s [[-t <threshold_mb>] [-p <pattern_seed>]]\n"
401		"-t\t only validate threshold_mb of the remapped region\n"
402		"  \t if 0 is supplied no threshold is used; all tests\n"
403		"  \t are run and remapped regions validated fully.\n"
404		"  \t The default threshold used is 4MB.\n"
405		"-p\t provide a seed to generate the random pattern for\n"
406		"  \t validating the remapped region.\n", cmd);
407}
408
409static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
410		      unsigned int *pattern_seed)
411{
412	const char *optstr = "t:p:";
413	int opt;
414
415	while ((opt = getopt(argc, argv, optstr)) != -1) {
416		switch (opt) {
417		case 't':
418			*threshold_mb = atoi(optarg);
419			break;
420		case 'p':
421			*pattern_seed = atoi(optarg);
422			break;
423		default:
424			usage(argv[0]);
425			return -1;
426		}
427	}
428
429	if (optind < argc) {
430		usage(argv[0]);
431		return -1;
432	}
433
434	return 0;
435}
436
437#define MAX_TEST 13
438#define MAX_PERF_TEST 3
439int main(int argc, char **argv)
440{
441	int failures = 0;
442	int i, run_perf_tests;
443	unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
444	unsigned int pattern_seed;
445	int num_expand_tests = 2;
446	struct test test_cases[MAX_TEST];
447	struct test perf_test_cases[MAX_PERF_TEST];
448	int page_size;
449	time_t t;
450	FILE *maps_fp;
451
452	pattern_seed = (unsigned int) time(&t);
453
454	if (parse_args(argc, argv, &threshold_mb, &pattern_seed) < 0)
455		exit(EXIT_FAILURE);
456
457	ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
458		       threshold_mb, pattern_seed);
459
460	page_size = sysconf(_SC_PAGESIZE);
461
462	/* Expected mremap failures */
463	test_cases[0] =	MAKE_TEST(page_size, page_size, page_size,
464				  OVERLAPPING, EXPECT_FAILURE,
465				  "mremap - Source and Destination Regions Overlapping");
466
467	test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size,
468				  NON_OVERLAPPING, EXPECT_FAILURE,
469				  "mremap - Destination Address Misaligned (1KB-aligned)");
470	test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size,
471				  NON_OVERLAPPING, EXPECT_FAILURE,
472				  "mremap - Source Address Misaligned (1KB-aligned)");
473
474	/* Src addr PTE aligned */
475	test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2,
476				  NON_OVERLAPPING, EXPECT_SUCCESS,
477				  "8KB mremap - Source PTE-aligned, Destination PTE-aligned");
478
479	/* Src addr 1MB aligned */
480	test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
481				  "2MB mremap - Source 1MB-aligned, Destination PTE-aligned");
482	test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
483				  "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
484
485	/* Src addr PMD aligned */
486	test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
487				  "4MB mremap - Source PMD-aligned, Destination PTE-aligned");
488	test_cases[7] =	MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
489				  "4MB mremap - Source PMD-aligned, Destination 1MB-aligned");
490	test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
491				  "4MB mremap - Source PMD-aligned, Destination PMD-aligned");
492
493	/* Src addr PUD aligned */
494	test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
495				  "2GB mremap - Source PUD-aligned, Destination PTE-aligned");
496	test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
497				   "2GB mremap - Source PUD-aligned, Destination 1MB-aligned");
498	test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
499				   "2GB mremap - Source PUD-aligned, Destination PMD-aligned");
500	test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
501				   "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
502
503	perf_test_cases[0] =  MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
504					"1GB mremap - Source PTE-aligned, Destination PTE-aligned");
505	/*
506	 * mremap 1GB region - Page table level aligned time
507	 * comparison.
508	 */
509	perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
510				       "1GB mremap - Source PMD-aligned, Destination PMD-aligned");
511	perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
512				       "1GB mremap - Source PUD-aligned, Destination PUD-aligned");
513
514	run_perf_tests =  (threshold_mb == VALIDATION_NO_THRESHOLD) ||
515				(threshold_mb * _1MB >= _1GB);
516
517	ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
518		      ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests);
519
520	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
521		run_mremap_test_case(test_cases[i], &failures, threshold_mb,
522				     pattern_seed);
523
524	maps_fp = fopen("/proc/self/maps", "r");
525
526	if (maps_fp == NULL) {
527		ksft_print_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
528		exit(KSFT_FAIL);
529	}
530
531	mremap_expand_merge(maps_fp, page_size);
532	mremap_expand_merge_offset(maps_fp, page_size);
533
534	fclose(maps_fp);
535
536	if (run_perf_tests) {
537		ksft_print_msg("\n%s\n",
538		 "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
539		for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
540			run_mremap_test_case(perf_test_cases[i], &failures,
541					     threshold_mb, pattern_seed);
542	}
543
544	if (failures > 0)
545		ksft_exit_fail();
546	else
547		ksft_exit_pass();
548}
549