1#define _GNU_SOURCE
2#include <ctype.h>
3#include <errno.h>
4#include <fcntl.h>
5#include <limits.h>
6#include <dirent.h>
7#include <signal.h>
8#include <stdio.h>
9#include <stdlib.h>
10#include <stdbool.h>
11#include <string.h>
12#include <unistd.h>
13
14#include <linux/mman.h>
15#include <sys/mman.h>
16#include <sys/wait.h>
17#include <sys/types.h>
18#include <sys/stat.h>
19#include <sys/sysmacros.h>
20#include <sys/vfs.h>
21
22#include "linux/magic.h"
23
24#include "vm_util.h"
25
26#define BASE_ADDR ((void *)(1UL << 30))
27static unsigned long hpage_pmd_size;
28static unsigned long page_size;
29static int hpage_pmd_nr;
30
31#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
32#define PID_SMAPS "/proc/self/smaps"
33#define TEST_FILE "collapse_test_file"
34
35#define MAX_LINE_LENGTH 500
36
37enum vma_type {
38	VMA_ANON,
39	VMA_FILE,
40	VMA_SHMEM,
41};
42
43struct mem_ops {
44	void *(*setup_area)(int nr_hpages);
45	void (*cleanup_area)(void *p, unsigned long size);
46	void (*fault)(void *p, unsigned long start, unsigned long end);
47	bool (*check_huge)(void *addr, int nr_hpages);
48	const char *name;
49};
50
51static struct mem_ops *file_ops;
52static struct mem_ops *anon_ops;
53static struct mem_ops *shmem_ops;
54
55struct collapse_context {
56	void (*collapse)(const char *msg, char *p, int nr_hpages,
57			 struct mem_ops *ops, bool expect);
58	bool enforce_pte_scan_limits;
59	const char *name;
60};
61
62static struct collapse_context *khugepaged_context;
63static struct collapse_context *madvise_context;
64
65struct file_info {
66	const char *dir;
67	char path[PATH_MAX];
68	enum vma_type type;
69	int fd;
70	char dev_queue_read_ahead_path[PATH_MAX];
71};
72
73static struct file_info finfo;
74
75enum thp_enabled {
76	THP_ALWAYS,
77	THP_MADVISE,
78	THP_NEVER,
79};
80
81static const char *thp_enabled_strings[] = {
82	"always",
83	"madvise",
84	"never",
85	NULL
86};
87
88enum thp_defrag {
89	THP_DEFRAG_ALWAYS,
90	THP_DEFRAG_DEFER,
91	THP_DEFRAG_DEFER_MADVISE,
92	THP_DEFRAG_MADVISE,
93	THP_DEFRAG_NEVER,
94};
95
96static const char *thp_defrag_strings[] = {
97	"always",
98	"defer",
99	"defer+madvise",
100	"madvise",
101	"never",
102	NULL
103};
104
105enum shmem_enabled {
106	SHMEM_ALWAYS,
107	SHMEM_WITHIN_SIZE,
108	SHMEM_ADVISE,
109	SHMEM_NEVER,
110	SHMEM_DENY,
111	SHMEM_FORCE,
112};
113
114static const char *shmem_enabled_strings[] = {
115	"always",
116	"within_size",
117	"advise",
118	"never",
119	"deny",
120	"force",
121	NULL
122};
123
124struct khugepaged_settings {
125	bool defrag;
126	unsigned int alloc_sleep_millisecs;
127	unsigned int scan_sleep_millisecs;
128	unsigned int max_ptes_none;
129	unsigned int max_ptes_swap;
130	unsigned int max_ptes_shared;
131	unsigned long pages_to_scan;
132};
133
134struct settings {
135	enum thp_enabled thp_enabled;
136	enum thp_defrag thp_defrag;
137	enum shmem_enabled shmem_enabled;
138	bool use_zero_page;
139	struct khugepaged_settings khugepaged;
140	unsigned long read_ahead_kb;
141};
142
143static struct settings saved_settings;
144static bool skip_settings_restore;
145
146static int exit_status;
147
148static void success(const char *msg)
149{
150	printf(" \e[32m%s\e[0m\n", msg);
151}
152
153static void fail(const char *msg)
154{
155	printf(" \e[31m%s\e[0m\n", msg);
156	exit_status++;
157}
158
159static void skip(const char *msg)
160{
161	printf(" \e[33m%s\e[0m\n", msg);
162}
163
164static int read_file(const char *path, char *buf, size_t buflen)
165{
166	int fd;
167	ssize_t numread;
168
169	fd = open(path, O_RDONLY);
170	if (fd == -1)
171		return 0;
172
173	numread = read(fd, buf, buflen - 1);
174	if (numread < 1) {
175		close(fd);
176		return 0;
177	}
178
179	buf[numread] = '\0';
180	close(fd);
181
182	return (unsigned int) numread;
183}
184
185static int write_file(const char *path, const char *buf, size_t buflen)
186{
187	int fd;
188	ssize_t numwritten;
189
190	fd = open(path, O_WRONLY);
191	if (fd == -1) {
192		printf("open(%s)\n", path);
193		exit(EXIT_FAILURE);
194		return 0;
195	}
196
197	numwritten = write(fd, buf, buflen - 1);
198	close(fd);
199	if (numwritten < 1) {
200		printf("write(%s)\n", buf);
201		exit(EXIT_FAILURE);
202		return 0;
203	}
204
205	return (unsigned int) numwritten;
206}
207
208static int read_string(const char *name, const char *strings[])
209{
210	char path[PATH_MAX];
211	char buf[256];
212	char *c;
213	int ret;
214
215	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
216	if (ret >= PATH_MAX) {
217		printf("%s: Pathname is too long\n", __func__);
218		exit(EXIT_FAILURE);
219	}
220
221	if (!read_file(path, buf, sizeof(buf))) {
222		perror(path);
223		exit(EXIT_FAILURE);
224	}
225
226	c = strchr(buf, '[');
227	if (!c) {
228		printf("%s: Parse failure\n", __func__);
229		exit(EXIT_FAILURE);
230	}
231
232	c++;
233	memmove(buf, c, sizeof(buf) - (c - buf));
234
235	c = strchr(buf, ']');
236	if (!c) {
237		printf("%s: Parse failure\n", __func__);
238		exit(EXIT_FAILURE);
239	}
240	*c = '\0';
241
242	ret = 0;
243	while (strings[ret]) {
244		if (!strcmp(strings[ret], buf))
245			return ret;
246		ret++;
247	}
248
249	printf("Failed to parse %s\n", name);
250	exit(EXIT_FAILURE);
251}
252
253static void write_string(const char *name, const char *val)
254{
255	char path[PATH_MAX];
256	int ret;
257
258	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
259	if (ret >= PATH_MAX) {
260		printf("%s: Pathname is too long\n", __func__);
261		exit(EXIT_FAILURE);
262	}
263
264	if (!write_file(path, val, strlen(val) + 1)) {
265		perror(path);
266		exit(EXIT_FAILURE);
267	}
268}
269
270static const unsigned long _read_num(const char *path)
271{
272	char buf[21];
273
274	if (read_file(path, buf, sizeof(buf)) < 0) {
275		perror("read_file(read_num)");
276		exit(EXIT_FAILURE);
277	}
278
279	return strtoul(buf, NULL, 10);
280}
281
282static const unsigned long read_num(const char *name)
283{
284	char path[PATH_MAX];
285	int ret;
286
287	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
288	if (ret >= PATH_MAX) {
289		printf("%s: Pathname is too long\n", __func__);
290		exit(EXIT_FAILURE);
291	}
292	return _read_num(path);
293}
294
295static void _write_num(const char *path, unsigned long num)
296{
297	char buf[21];
298
299	sprintf(buf, "%ld", num);
300	if (!write_file(path, buf, strlen(buf) + 1)) {
301		perror(path);
302		exit(EXIT_FAILURE);
303	}
304}
305
306static void write_num(const char *name, unsigned long num)
307{
308	char path[PATH_MAX];
309	int ret;
310
311	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
312	if (ret >= PATH_MAX) {
313		printf("%s: Pathname is too long\n", __func__);
314		exit(EXIT_FAILURE);
315	}
316	_write_num(path, num);
317}
318
319static void write_settings(struct settings *settings)
320{
321	struct khugepaged_settings *khugepaged = &settings->khugepaged;
322
323	write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
324	write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
325	write_string("shmem_enabled",
326			shmem_enabled_strings[settings->shmem_enabled]);
327	write_num("use_zero_page", settings->use_zero_page);
328
329	write_num("khugepaged/defrag", khugepaged->defrag);
330	write_num("khugepaged/alloc_sleep_millisecs",
331			khugepaged->alloc_sleep_millisecs);
332	write_num("khugepaged/scan_sleep_millisecs",
333			khugepaged->scan_sleep_millisecs);
334	write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
335	write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
336	write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
337	write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
338
339	if (file_ops && finfo.type == VMA_FILE)
340		_write_num(finfo.dev_queue_read_ahead_path,
341			   settings->read_ahead_kb);
342}
343
344#define MAX_SETTINGS_DEPTH 4
345static struct settings settings_stack[MAX_SETTINGS_DEPTH];
346static int settings_index;
347
348static struct settings *current_settings(void)
349{
350	if (!settings_index) {
351		printf("Fail: No settings set");
352		exit(EXIT_FAILURE);
353	}
354	return settings_stack + settings_index - 1;
355}
356
357static void push_settings(struct settings *settings)
358{
359	if (settings_index >= MAX_SETTINGS_DEPTH) {
360		printf("Fail: Settings stack exceeded");
361		exit(EXIT_FAILURE);
362	}
363	settings_stack[settings_index++] = *settings;
364	write_settings(current_settings());
365}
366
367static void pop_settings(void)
368{
369	if (settings_index <= 0) {
370		printf("Fail: Settings stack empty");
371		exit(EXIT_FAILURE);
372	}
373	--settings_index;
374	write_settings(current_settings());
375}
376
377static void restore_settings(int sig)
378{
379	if (skip_settings_restore)
380		goto out;
381
382	printf("Restore THP and khugepaged settings...");
383	write_settings(&saved_settings);
384	success("OK");
385	if (sig)
386		exit(EXIT_FAILURE);
387out:
388	exit(exit_status);
389}
390
391static void save_settings(void)
392{
393	printf("Save THP and khugepaged settings...");
394	saved_settings = (struct settings) {
395		.thp_enabled = read_string("enabled", thp_enabled_strings),
396		.thp_defrag = read_string("defrag", thp_defrag_strings),
397		.shmem_enabled =
398			read_string("shmem_enabled", shmem_enabled_strings),
399		.use_zero_page = read_num("use_zero_page"),
400	};
401	saved_settings.khugepaged = (struct khugepaged_settings) {
402		.defrag = read_num("khugepaged/defrag"),
403		.alloc_sleep_millisecs =
404			read_num("khugepaged/alloc_sleep_millisecs"),
405		.scan_sleep_millisecs =
406			read_num("khugepaged/scan_sleep_millisecs"),
407		.max_ptes_none = read_num("khugepaged/max_ptes_none"),
408		.max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
409		.max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
410		.pages_to_scan = read_num("khugepaged/pages_to_scan"),
411	};
412	if (file_ops && finfo.type == VMA_FILE)
413		saved_settings.read_ahead_kb =
414				_read_num(finfo.dev_queue_read_ahead_path);
415
416	success("OK");
417
418	signal(SIGTERM, restore_settings);
419	signal(SIGINT, restore_settings);
420	signal(SIGHUP, restore_settings);
421	signal(SIGQUIT, restore_settings);
422}
423
424static void get_finfo(const char *dir)
425{
426	struct stat path_stat;
427	struct statfs fs;
428	char buf[1 << 10];
429	char path[PATH_MAX];
430	char *str, *end;
431
432	finfo.dir = dir;
433	stat(finfo.dir, &path_stat);
434	if (!S_ISDIR(path_stat.st_mode)) {
435		printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
436		exit(EXIT_FAILURE);
437	}
438	if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
439		     finfo.dir) >= sizeof(finfo.path)) {
440		printf("%s: Pathname is too long\n", __func__);
441		exit(EXIT_FAILURE);
442	}
443	if (statfs(finfo.dir, &fs)) {
444		perror("statfs()");
445		exit(EXIT_FAILURE);
446	}
447	finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
448	if (finfo.type == VMA_SHMEM)
449		return;
450
451	/* Find owning device's queue/read_ahead_kb control */
452	if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
453		     major(path_stat.st_dev), minor(path_stat.st_dev))
454	    >= sizeof(path)) {
455		printf("%s: Pathname is too long\n", __func__);
456		exit(EXIT_FAILURE);
457	}
458	if (read_file(path, buf, sizeof(buf)) < 0) {
459		perror("read_file(read_num)");
460		exit(EXIT_FAILURE);
461	}
462	if (strstr(buf, "DEVTYPE=disk")) {
463		/* Found it */
464		if (snprintf(finfo.dev_queue_read_ahead_path,
465			     sizeof(finfo.dev_queue_read_ahead_path),
466			     "/sys/dev/block/%d:%d/queue/read_ahead_kb",
467			     major(path_stat.st_dev), minor(path_stat.st_dev))
468		    >= sizeof(finfo.dev_queue_read_ahead_path)) {
469			printf("%s: Pathname is too long\n", __func__);
470			exit(EXIT_FAILURE);
471		}
472		return;
473	}
474	if (!strstr(buf, "DEVTYPE=partition")) {
475		printf("%s: Unknown device type: %s\n", __func__, path);
476		exit(EXIT_FAILURE);
477	}
478	/*
479	 * Partition of block device - need to find actual device.
480	 * Using naming convention that devnameN is partition of
481	 * device devname.
482	 */
483	str = strstr(buf, "DEVNAME=");
484	if (!str) {
485		printf("%s: Could not read: %s", __func__, path);
486		exit(EXIT_FAILURE);
487	}
488	str += 8;
489	end = str;
490	while (*end) {
491		if (isdigit(*end)) {
492			*end = '\0';
493			if (snprintf(finfo.dev_queue_read_ahead_path,
494				     sizeof(finfo.dev_queue_read_ahead_path),
495				     "/sys/block/%s/queue/read_ahead_kb",
496				     str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
497				printf("%s: Pathname is too long\n", __func__);
498				exit(EXIT_FAILURE);
499			}
500			return;
501		}
502		++end;
503	}
504	printf("%s: Could not read: %s\n", __func__, path);
505	exit(EXIT_FAILURE);
506}
507
508static bool check_swap(void *addr, unsigned long size)
509{
510	bool swap = false;
511	int ret;
512	FILE *fp;
513	char buffer[MAX_LINE_LENGTH];
514	char addr_pattern[MAX_LINE_LENGTH];
515
516	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
517		       (unsigned long) addr);
518	if (ret >= MAX_LINE_LENGTH) {
519		printf("%s: Pattern is too long\n", __func__);
520		exit(EXIT_FAILURE);
521	}
522
523
524	fp = fopen(PID_SMAPS, "r");
525	if (!fp) {
526		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
527		exit(EXIT_FAILURE);
528	}
529	if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
530		goto err_out;
531
532	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
533		       size >> 10);
534	if (ret >= MAX_LINE_LENGTH) {
535		printf("%s: Pattern is too long\n", __func__);
536		exit(EXIT_FAILURE);
537	}
538	/*
539	 * Fetch the Swap: in the same block and check whether it got
540	 * the expected number of hugeepages next.
541	 */
542	if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer)))
543		goto err_out;
544
545	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
546		goto err_out;
547
548	swap = true;
549err_out:
550	fclose(fp);
551	return swap;
552}
553
554static void *alloc_mapping(int nr)
555{
556	void *p;
557
558	p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
559		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
560	if (p != BASE_ADDR) {
561		printf("Failed to allocate VMA at %p\n", BASE_ADDR);
562		exit(EXIT_FAILURE);
563	}
564
565	return p;
566}
567
568static void fill_memory(int *p, unsigned long start, unsigned long end)
569{
570	int i;
571
572	for (i = start / page_size; i < end / page_size; i++)
573		p[i * page_size / sizeof(*p)] = i + 0xdead0000;
574}
575
576/*
577 * MADV_COLLAPSE is a best-effort request and may fail if an internal
578 * resource is temporarily unavailable, in which case it will set errno to
579 * EAGAIN.  In such a case, immediately reattempt the operation one more
580 * time.
581 */
582static int madvise_collapse_retry(void *p, unsigned long size)
583{
584	bool retry = true;
585	int ret;
586
587retry:
588	ret = madvise(p, size, MADV_COLLAPSE);
589	if (ret && errno == EAGAIN && retry) {
590		retry = false;
591		goto retry;
592	}
593	return ret;
594}
595
596/*
597 * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with
598 * validate_memory()'able contents.
599 */
600static void *alloc_hpage(struct mem_ops *ops)
601{
602	void *p = ops->setup_area(1);
603
604	ops->fault(p, 0, hpage_pmd_size);
605
606	/*
607	 * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE.
608	 * The latter is ineligible for collapse by MADV_COLLAPSE
609	 * while the former might cause MADV_COLLAPSE to race with
610	 * khugepaged on low-load system (like a test machine), which
611	 * would cause MADV_COLLAPSE to fail with EAGAIN.
612	 */
613	printf("Allocate huge page...");
614	if (madvise_collapse_retry(p, hpage_pmd_size)) {
615		perror("madvise(MADV_COLLAPSE)");
616		exit(EXIT_FAILURE);
617	}
618	if (!ops->check_huge(p, 1)) {
619		perror("madvise(MADV_COLLAPSE)");
620		exit(EXIT_FAILURE);
621	}
622	if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
623		perror("madvise(MADV_HUGEPAGE)");
624		exit(EXIT_FAILURE);
625	}
626	success("OK");
627	return p;
628}
629
630static void validate_memory(int *p, unsigned long start, unsigned long end)
631{
632	int i;
633
634	for (i = start / page_size; i < end / page_size; i++) {
635		if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
636			printf("Page %d is corrupted: %#x\n",
637					i, p[i * page_size / sizeof(*p)]);
638			exit(EXIT_FAILURE);
639		}
640	}
641}
642
643static void *anon_setup_area(int nr_hpages)
644{
645	return alloc_mapping(nr_hpages);
646}
647
648static void anon_cleanup_area(void *p, unsigned long size)
649{
650	munmap(p, size);
651}
652
653static void anon_fault(void *p, unsigned long start, unsigned long end)
654{
655	fill_memory(p, start, end);
656}
657
658static bool anon_check_huge(void *addr, int nr_hpages)
659{
660	return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
661}
662
663static void *file_setup_area(int nr_hpages)
664{
665	int fd;
666	void *p;
667	unsigned long size;
668
669	unlink(finfo.path);  /* Cleanup from previous failed tests */
670	printf("Creating %s for collapse%s...", finfo.path,
671	       finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
672	fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
673		  777);
674	if (fd < 0) {
675		perror("open()");
676		exit(EXIT_FAILURE);
677	}
678
679	size = nr_hpages * hpage_pmd_size;
680	p = alloc_mapping(nr_hpages);
681	fill_memory(p, 0, size);
682	write(fd, p, size);
683	close(fd);
684	munmap(p, size);
685	success("OK");
686
687	printf("Opening %s read only for collapse...", finfo.path);
688	finfo.fd = open(finfo.path, O_RDONLY, 777);
689	if (finfo.fd < 0) {
690		perror("open()");
691		exit(EXIT_FAILURE);
692	}
693	p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC,
694		 MAP_PRIVATE, finfo.fd, 0);
695	if (p == MAP_FAILED || p != BASE_ADDR) {
696		perror("mmap()");
697		exit(EXIT_FAILURE);
698	}
699
700	/* Drop page cache */
701	write_file("/proc/sys/vm/drop_caches", "3", 2);
702	success("OK");
703	return p;
704}
705
706static void file_cleanup_area(void *p, unsigned long size)
707{
708	munmap(p, size);
709	close(finfo.fd);
710	unlink(finfo.path);
711}
712
713static void file_fault(void *p, unsigned long start, unsigned long end)
714{
715	if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
716		perror("madvise(MADV_POPULATE_READ");
717		exit(EXIT_FAILURE);
718	}
719}
720
721static bool file_check_huge(void *addr, int nr_hpages)
722{
723	switch (finfo.type) {
724	case VMA_FILE:
725		return check_huge_file(addr, nr_hpages, hpage_pmd_size);
726	case VMA_SHMEM:
727		return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
728	default:
729		exit(EXIT_FAILURE);
730		return false;
731	}
732}
733
734static void *shmem_setup_area(int nr_hpages)
735{
736	void *p;
737	unsigned long size = nr_hpages * hpage_pmd_size;
738
739	finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
740	if (finfo.fd < 0)  {
741		perror("memfd_create()");
742		exit(EXIT_FAILURE);
743	}
744	if (ftruncate(finfo.fd, size)) {
745		perror("ftruncate()");
746		exit(EXIT_FAILURE);
747	}
748	p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
749		 0);
750	if (p != BASE_ADDR) {
751		perror("mmap()");
752		exit(EXIT_FAILURE);
753	}
754	return p;
755}
756
757static void shmem_cleanup_area(void *p, unsigned long size)
758{
759	munmap(p, size);
760	close(finfo.fd);
761}
762
763static bool shmem_check_huge(void *addr, int nr_hpages)
764{
765	return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
766}
767
768static struct mem_ops __anon_ops = {
769	.setup_area = &anon_setup_area,
770	.cleanup_area = &anon_cleanup_area,
771	.fault = &anon_fault,
772	.check_huge = &anon_check_huge,
773	.name = "anon",
774};
775
776static struct mem_ops __file_ops = {
777	.setup_area = &file_setup_area,
778	.cleanup_area = &file_cleanup_area,
779	.fault = &file_fault,
780	.check_huge = &file_check_huge,
781	.name = "file",
782};
783
784static struct mem_ops __shmem_ops = {
785	.setup_area = &shmem_setup_area,
786	.cleanup_area = &shmem_cleanup_area,
787	.fault = &anon_fault,
788	.check_huge = &shmem_check_huge,
789	.name = "shmem",
790};
791
792static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
793			       struct mem_ops *ops, bool expect)
794{
795	int ret;
796	struct settings settings = *current_settings();
797
798	printf("%s...", msg);
799
800	/*
801	 * Prevent khugepaged interference and tests that MADV_COLLAPSE
802	 * ignores /sys/kernel/mm/transparent_hugepage/enabled
803	 */
804	settings.thp_enabled = THP_NEVER;
805	settings.shmem_enabled = SHMEM_NEVER;
806	push_settings(&settings);
807
808	/* Clear VM_NOHUGEPAGE */
809	madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
810	ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size);
811	if (((bool)ret) == expect)
812		fail("Fail: Bad return value");
813	else if (!ops->check_huge(p, expect ? nr_hpages : 0))
814		fail("Fail: check_huge()");
815	else
816		success("OK");
817
818	pop_settings();
819}
820
821static void madvise_collapse(const char *msg, char *p, int nr_hpages,
822			     struct mem_ops *ops, bool expect)
823{
824	/* Sanity check */
825	if (!ops->check_huge(p, 0)) {
826		printf("Unexpected huge page\n");
827		exit(EXIT_FAILURE);
828	}
829	__madvise_collapse(msg, p, nr_hpages, ops, expect);
830}
831
832#define TICK 500000
833static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
834			  struct mem_ops *ops)
835{
836	int full_scans;
837	int timeout = 6; /* 3 seconds */
838
839	/* Sanity check */
840	if (!ops->check_huge(p, 0)) {
841		printf("Unexpected huge page\n");
842		exit(EXIT_FAILURE);
843	}
844
845	madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
846
847	/* Wait until the second full_scan completed */
848	full_scans = read_num("khugepaged/full_scans") + 2;
849
850	printf("%s...", msg);
851	while (timeout--) {
852		if (ops->check_huge(p, nr_hpages))
853			break;
854		if (read_num("khugepaged/full_scans") >= full_scans)
855			break;
856		printf(".");
857		usleep(TICK);
858	}
859
860	madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
861
862	return timeout == -1;
863}
864
865static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
866				struct mem_ops *ops, bool expect)
867{
868	if (wait_for_scan(msg, p, nr_hpages, ops)) {
869		if (expect)
870			fail("Timeout");
871		else
872			success("OK");
873		return;
874	}
875
876	/*
877	 * For file and shmem memory, khugepaged only retracts pte entries after
878	 * putting the new hugepage in the page cache. The hugepage must be
879	 * subsequently refaulted to install the pmd mapping for the mm.
880	 */
881	if (ops != &__anon_ops)
882		ops->fault(p, 0, nr_hpages * hpage_pmd_size);
883
884	if (ops->check_huge(p, expect ? nr_hpages : 0))
885		success("OK");
886	else
887		fail("Fail");
888}
889
890static struct collapse_context __khugepaged_context = {
891	.collapse = &khugepaged_collapse,
892	.enforce_pte_scan_limits = true,
893	.name = "khugepaged",
894};
895
896static struct collapse_context __madvise_context = {
897	.collapse = &madvise_collapse,
898	.enforce_pte_scan_limits = false,
899	.name = "madvise",
900};
901
902static bool is_tmpfs(struct mem_ops *ops)
903{
904	return ops == &__file_ops && finfo.type == VMA_SHMEM;
905}
906
907static void alloc_at_fault(void)
908{
909	struct settings settings = *current_settings();
910	char *p;
911
912	settings.thp_enabled = THP_ALWAYS;
913	push_settings(&settings);
914
915	p = alloc_mapping(1);
916	*p = 1;
917	printf("Allocate huge page on fault...");
918	if (check_huge_anon(p, 1, hpage_pmd_size))
919		success("OK");
920	else
921		fail("Fail");
922
923	pop_settings();
924
925	madvise(p, page_size, MADV_DONTNEED);
926	printf("Split huge PMD on MADV_DONTNEED...");
927	if (check_huge_anon(p, 0, hpage_pmd_size))
928		success("OK");
929	else
930		fail("Fail");
931	munmap(p, hpage_pmd_size);
932}
933
934static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
935{
936	void *p;
937	int nr_hpages = 4;
938	unsigned long size = nr_hpages * hpage_pmd_size;
939
940	p = ops->setup_area(nr_hpages);
941	ops->fault(p, 0, size);
942	c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
943		    ops, true);
944	validate_memory(p, 0, size);
945	ops->cleanup_area(p, size);
946}
947
948static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
949{
950	void *p;
951
952	p = ops->setup_area(1);
953	c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
954	ops->cleanup_area(p, hpage_pmd_size);
955}
956
957static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
958{
959	void *p;
960
961	p = ops->setup_area(1);
962	ops->fault(p, 0, page_size);
963	c->collapse("Collapse PTE table with single PTE entry present", p,
964		    1, ops, true);
965	ops->cleanup_area(p, hpage_pmd_size);
966}
967
968static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
969{
970	int max_ptes_none = hpage_pmd_nr / 2;
971	struct settings settings = *current_settings();
972	void *p;
973
974	settings.khugepaged.max_ptes_none = max_ptes_none;
975	push_settings(&settings);
976
977	p = ops->setup_area(1);
978
979	if (is_tmpfs(ops)) {
980		/* shmem pages always in the page cache */
981		printf("tmpfs...");
982		skip("Skip");
983		goto skip;
984	}
985
986	ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
987	c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
988		    ops, !c->enforce_pte_scan_limits);
989	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
990
991	if (c->enforce_pte_scan_limits) {
992		ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
993		c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
994			    true);
995		validate_memory(p, 0,
996				(hpage_pmd_nr - max_ptes_none) * page_size);
997	}
998skip:
999	ops->cleanup_area(p, hpage_pmd_size);
1000	pop_settings();
1001}
1002
1003static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
1004{
1005	void *p;
1006
1007	p = ops->setup_area(1);
1008	ops->fault(p, 0, hpage_pmd_size);
1009
1010	printf("Swapout one page...");
1011	if (madvise(p, page_size, MADV_PAGEOUT)) {
1012		perror("madvise(MADV_PAGEOUT)");
1013		exit(EXIT_FAILURE);
1014	}
1015	if (check_swap(p, page_size)) {
1016		success("OK");
1017	} else {
1018		fail("Fail");
1019		goto out;
1020	}
1021
1022	c->collapse("Collapse with swapping in single PTE entry", p, 1, ops,
1023		    true);
1024	validate_memory(p, 0, hpage_pmd_size);
1025out:
1026	ops->cleanup_area(p, hpage_pmd_size);
1027}
1028
1029static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
1030{
1031	int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
1032	void *p;
1033
1034	p = ops->setup_area(1);
1035	ops->fault(p, 0, hpage_pmd_size);
1036
1037	printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
1038	if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
1039		perror("madvise(MADV_PAGEOUT)");
1040		exit(EXIT_FAILURE);
1041	}
1042	if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
1043		success("OK");
1044	} else {
1045		fail("Fail");
1046		goto out;
1047	}
1048
1049	c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops,
1050		    !c->enforce_pte_scan_limits);
1051	validate_memory(p, 0, hpage_pmd_size);
1052
1053	if (c->enforce_pte_scan_limits) {
1054		ops->fault(p, 0, hpage_pmd_size);
1055		printf("Swapout %d of %d pages...", max_ptes_swap,
1056		       hpage_pmd_nr);
1057		if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
1058			perror("madvise(MADV_PAGEOUT)");
1059			exit(EXIT_FAILURE);
1060		}
1061		if (check_swap(p, max_ptes_swap * page_size)) {
1062			success("OK");
1063		} else {
1064			fail("Fail");
1065			goto out;
1066		}
1067
1068		c->collapse("Collapse with max_ptes_swap pages swapped out", p,
1069			    1, ops, true);
1070		validate_memory(p, 0, hpage_pmd_size);
1071	}
1072out:
1073	ops->cleanup_area(p, hpage_pmd_size);
1074}
1075
1076static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
1077{
1078	void *p;
1079
1080	p = alloc_hpage(ops);
1081
1082	if (is_tmpfs(ops)) {
1083		/* MADV_DONTNEED won't evict tmpfs pages */
1084		printf("tmpfs...");
1085		skip("Skip");
1086		goto skip;
1087	}
1088
1089	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1090	printf("Split huge page leaving single PTE mapping compound page...");
1091	madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
1092	if (ops->check_huge(p, 0))
1093		success("OK");
1094	else
1095		fail("Fail");
1096
1097	c->collapse("Collapse PTE table with single PTE mapping compound page",
1098		    p, 1, ops, true);
1099	validate_memory(p, 0, page_size);
1100skip:
1101	ops->cleanup_area(p, hpage_pmd_size);
1102}
1103
1104static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
1105{
1106	void *p;
1107
1108	p = alloc_hpage(ops);
1109	printf("Split huge page leaving single PTE page table full of compound pages...");
1110	madvise(p, page_size, MADV_NOHUGEPAGE);
1111	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1112	if (ops->check_huge(p, 0))
1113		success("OK");
1114	else
1115		fail("Fail");
1116
1117	c->collapse("Collapse PTE table full of compound pages", p, 1, ops,
1118		    true);
1119	validate_memory(p, 0, hpage_pmd_size);
1120	ops->cleanup_area(p, hpage_pmd_size);
1121}
1122
1123static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
1124{
1125	void *p;
1126	int i;
1127
1128	p = ops->setup_area(1);
1129	for (i = 0; i < hpage_pmd_nr; i++) {
1130		printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
1131				i + 1, hpage_pmd_nr);
1132
1133		madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
1134		ops->fault(BASE_ADDR, 0, hpage_pmd_size);
1135		if (!ops->check_huge(BASE_ADDR, 1)) {
1136			printf("Failed to allocate huge page\n");
1137			exit(EXIT_FAILURE);
1138		}
1139		madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
1140
1141		p = mremap(BASE_ADDR - i * page_size,
1142				i * page_size + hpage_pmd_size,
1143				(i + 1) * page_size,
1144				MREMAP_MAYMOVE | MREMAP_FIXED,
1145				BASE_ADDR + 2 * hpage_pmd_size);
1146		if (p == MAP_FAILED) {
1147			perror("mremap+unmap");
1148			exit(EXIT_FAILURE);
1149		}
1150
1151		p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
1152				(i + 1) * page_size,
1153				(i + 1) * page_size + hpage_pmd_size,
1154				MREMAP_MAYMOVE | MREMAP_FIXED,
1155				BASE_ADDR - (i + 1) * page_size);
1156		if (p == MAP_FAILED) {
1157			perror("mremap+alloc");
1158			exit(EXIT_FAILURE);
1159		}
1160	}
1161
1162	ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
1163	ops->fault(p, 0, hpage_pmd_size);
1164	if (!ops->check_huge(p, 1))
1165		success("OK");
1166	else
1167		fail("Fail");
1168
1169	c->collapse("Collapse PTE table full of different compound pages", p, 1,
1170		    ops, true);
1171
1172	validate_memory(p, 0, hpage_pmd_size);
1173	ops->cleanup_area(p, hpage_pmd_size);
1174}
1175
1176static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
1177{
1178	int wstatus;
1179	void *p;
1180
1181	p = ops->setup_area(1);
1182
1183	printf("Allocate small page...");
1184	ops->fault(p, 0, page_size);
1185	if (ops->check_huge(p, 0))
1186		success("OK");
1187	else
1188		fail("Fail");
1189
1190	printf("Share small page over fork()...");
1191	if (!fork()) {
1192		/* Do not touch settings on child exit */
1193		skip_settings_restore = true;
1194		exit_status = 0;
1195
1196		if (ops->check_huge(p, 0))
1197			success("OK");
1198		else
1199			fail("Fail");
1200
1201		ops->fault(p, page_size, 2 * page_size);
1202		c->collapse("Collapse PTE table with single page shared with parent process",
1203			    p, 1, ops, true);
1204
1205		validate_memory(p, 0, page_size);
1206		ops->cleanup_area(p, hpage_pmd_size);
1207		exit(exit_status);
1208	}
1209
1210	wait(&wstatus);
1211	exit_status += WEXITSTATUS(wstatus);
1212
1213	printf("Check if parent still has small page...");
1214	if (ops->check_huge(p, 0))
1215		success("OK");
1216	else
1217		fail("Fail");
1218	validate_memory(p, 0, page_size);
1219	ops->cleanup_area(p, hpage_pmd_size);
1220}
1221
1222static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
1223{
1224	int wstatus;
1225	void *p;
1226
1227	p = alloc_hpage(ops);
1228	printf("Share huge page over fork()...");
1229	if (!fork()) {
1230		/* Do not touch settings on child exit */
1231		skip_settings_restore = true;
1232		exit_status = 0;
1233
1234		if (ops->check_huge(p, 1))
1235			success("OK");
1236		else
1237			fail("Fail");
1238
1239		printf("Split huge page PMD in child process...");
1240		madvise(p, page_size, MADV_NOHUGEPAGE);
1241		madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
1242		if (ops->check_huge(p, 0))
1243			success("OK");
1244		else
1245			fail("Fail");
1246		ops->fault(p, 0, page_size);
1247
1248		write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
1249		c->collapse("Collapse PTE table full of compound pages in child",
1250			    p, 1, ops, true);
1251		write_num("khugepaged/max_ptes_shared",
1252			  current_settings()->khugepaged.max_ptes_shared);
1253
1254		validate_memory(p, 0, hpage_pmd_size);
1255		ops->cleanup_area(p, hpage_pmd_size);
1256		exit(exit_status);
1257	}
1258
1259	wait(&wstatus);
1260	exit_status += WEXITSTATUS(wstatus);
1261
1262	printf("Check if parent still has huge page...");
1263	if (ops->check_huge(p, 1))
1264		success("OK");
1265	else
1266		fail("Fail");
1267	validate_memory(p, 0, hpage_pmd_size);
1268	ops->cleanup_area(p, hpage_pmd_size);
1269}
1270
1271static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
1272{
1273	int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
1274	int wstatus;
1275	void *p;
1276
1277	p = alloc_hpage(ops);
1278	printf("Share huge page over fork()...");
1279	if (!fork()) {
1280		/* Do not touch settings on child exit */
1281		skip_settings_restore = true;
1282		exit_status = 0;
1283
1284		if (ops->check_huge(p, 1))
1285			success("OK");
1286		else
1287			fail("Fail");
1288
1289		printf("Trigger CoW on page %d of %d...",
1290				hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
1291		ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
1292		if (ops->check_huge(p, 0))
1293			success("OK");
1294		else
1295			fail("Fail");
1296
1297		c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
1298			    1, ops, !c->enforce_pte_scan_limits);
1299
1300		if (c->enforce_pte_scan_limits) {
1301			printf("Trigger CoW on page %d of %d...",
1302			       hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
1303			ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
1304				    page_size);
1305			if (ops->check_huge(p, 0))
1306				success("OK");
1307			else
1308				fail("Fail");
1309
1310			c->collapse("Collapse with max_ptes_shared PTEs shared",
1311				    p, 1, ops, true);
1312		}
1313
1314		validate_memory(p, 0, hpage_pmd_size);
1315		ops->cleanup_area(p, hpage_pmd_size);
1316		exit(exit_status);
1317	}
1318
1319	wait(&wstatus);
1320	exit_status += WEXITSTATUS(wstatus);
1321
1322	printf("Check if parent still has huge page...");
1323	if (ops->check_huge(p, 1))
1324		success("OK");
1325	else
1326		fail("Fail");
1327	validate_memory(p, 0, hpage_pmd_size);
1328	ops->cleanup_area(p, hpage_pmd_size);
1329}
1330
1331static void madvise_collapse_existing_thps(struct collapse_context *c,
1332					   struct mem_ops *ops)
1333{
1334	void *p;
1335
1336	p = ops->setup_area(1);
1337	ops->fault(p, 0, hpage_pmd_size);
1338	c->collapse("Collapse fully populated PTE table...", p, 1, ops, true);
1339	validate_memory(p, 0, hpage_pmd_size);
1340
1341	/* c->collapse() will find a hugepage and complain - call directly. */
1342	__madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
1343	validate_memory(p, 0, hpage_pmd_size);
1344	ops->cleanup_area(p, hpage_pmd_size);
1345}
1346
1347/*
1348 * Test race with khugepaged where page tables have been retracted and
1349 * pmd cleared.
1350 */
1351static void madvise_retracted_page_tables(struct collapse_context *c,
1352					  struct mem_ops *ops)
1353{
1354	void *p;
1355	int nr_hpages = 1;
1356	unsigned long size = nr_hpages * hpage_pmd_size;
1357
1358	p = ops->setup_area(nr_hpages);
1359	ops->fault(p, 0, size);
1360
1361	/* Let khugepaged collapse and leave pmd cleared */
1362	if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages,
1363			  ops)) {
1364		fail("Timeout");
1365		return;
1366	}
1367	success("OK");
1368	c->collapse("Install huge PMD from page cache", p, nr_hpages, ops,
1369		    true);
1370	validate_memory(p, 0, size);
1371	ops->cleanup_area(p, size);
1372}
1373
1374static void usage(void)
1375{
1376	fprintf(stderr, "\nUsage: ./khugepaged <test type> [dir]\n\n");
1377	fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
1378	fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
1379	fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
1380	fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
1381	fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
1382	fprintf(stderr,	"\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
1383	fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
1384	fprintf(stderr,	"\tmounted with huge=madvise option for khugepaged tests to work\n");
1385	exit(1);
1386}
1387
1388static void parse_test_type(int argc, const char **argv)
1389{
1390	char *buf;
1391	const char *token;
1392
1393	if (argc == 1) {
1394		/* Backwards compatibility */
1395		khugepaged_context =  &__khugepaged_context;
1396		madvise_context =  &__madvise_context;
1397		anon_ops = &__anon_ops;
1398		return;
1399	}
1400
1401	buf = strdup(argv[1]);
1402	token = strsep(&buf, ":");
1403
1404	if (!strcmp(token, "all")) {
1405		khugepaged_context =  &__khugepaged_context;
1406		madvise_context =  &__madvise_context;
1407	} else if (!strcmp(token, "khugepaged")) {
1408		khugepaged_context =  &__khugepaged_context;
1409	} else if (!strcmp(token, "madvise")) {
1410		madvise_context =  &__madvise_context;
1411	} else {
1412		usage();
1413	}
1414
1415	if (!buf)
1416		usage();
1417
1418	if (!strcmp(buf, "all")) {
1419		file_ops =  &__file_ops;
1420		anon_ops = &__anon_ops;
1421		shmem_ops = &__shmem_ops;
1422	} else if (!strcmp(buf, "anon")) {
1423		anon_ops = &__anon_ops;
1424	} else if (!strcmp(buf, "file")) {
1425		file_ops =  &__file_ops;
1426	} else if (!strcmp(buf, "shmem")) {
1427		shmem_ops = &__shmem_ops;
1428	} else {
1429		usage();
1430	}
1431
1432	if (!file_ops)
1433		return;
1434
1435	if (argc != 3)
1436		usage();
1437}
1438
1439int main(int argc, const char **argv)
1440{
1441	struct settings default_settings = {
1442		.thp_enabled = THP_MADVISE,
1443		.thp_defrag = THP_DEFRAG_ALWAYS,
1444		.shmem_enabled = SHMEM_ADVISE,
1445		.use_zero_page = 0,
1446		.khugepaged = {
1447			.defrag = 1,
1448			.alloc_sleep_millisecs = 10,
1449			.scan_sleep_millisecs = 10,
1450		},
1451		/*
1452		 * When testing file-backed memory, the collapse path
1453		 * looks at how many pages are found in the page cache, not
1454		 * what pages are mapped. Disable read ahead optimization so
1455		 * pages don't find their way into the page cache unless
1456		 * we mem_ops->fault() them in.
1457		 */
1458		.read_ahead_kb = 0,
1459	};
1460
1461	parse_test_type(argc, argv);
1462
1463	if (file_ops)
1464		get_finfo(argv[2]);
1465
1466	setbuf(stdout, NULL);
1467
1468	page_size = getpagesize();
1469	hpage_pmd_size = read_pmd_pagesize();
1470	if (!hpage_pmd_size) {
1471		printf("Reading PMD pagesize failed");
1472		exit(EXIT_FAILURE);
1473	}
1474	hpage_pmd_nr = hpage_pmd_size / page_size;
1475
1476	default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1477	default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1478	default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1479	default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1480
1481	save_settings();
1482	push_settings(&default_settings);
1483
1484	alloc_at_fault();
1485
1486#define TEST(t, c, o) do { \
1487	if (c && o) { \
1488		printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
1489		t(c, o); \
1490	} \
1491	} while (0)
1492
1493	TEST(collapse_full, khugepaged_context, anon_ops);
1494	TEST(collapse_full, khugepaged_context, file_ops);
1495	TEST(collapse_full, khugepaged_context, shmem_ops);
1496	TEST(collapse_full, madvise_context, anon_ops);
1497	TEST(collapse_full, madvise_context, file_ops);
1498	TEST(collapse_full, madvise_context, shmem_ops);
1499
1500	TEST(collapse_empty, khugepaged_context, anon_ops);
1501	TEST(collapse_empty, madvise_context, anon_ops);
1502
1503	TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
1504	TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
1505	TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
1506	TEST(collapse_single_pte_entry, madvise_context, anon_ops);
1507	TEST(collapse_single_pte_entry, madvise_context, file_ops);
1508	TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
1509
1510	TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
1511	TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
1512	TEST(collapse_max_ptes_none, madvise_context, anon_ops);
1513	TEST(collapse_max_ptes_none, madvise_context, file_ops);
1514
1515	TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
1516	TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
1517	TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
1518	TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
1519
1520	TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
1521	TEST(collapse_full_of_compound, khugepaged_context, file_ops);
1522	TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
1523	TEST(collapse_full_of_compound, madvise_context, anon_ops);
1524	TEST(collapse_full_of_compound, madvise_context, file_ops);
1525	TEST(collapse_full_of_compound, madvise_context, shmem_ops);
1526
1527	TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
1528	TEST(collapse_compound_extreme, madvise_context, anon_ops);
1529
1530	TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops);
1531	TEST(collapse_swapin_single_pte, madvise_context, anon_ops);
1532
1533	TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops);
1534	TEST(collapse_max_ptes_swap, madvise_context, anon_ops);
1535
1536	TEST(collapse_fork, khugepaged_context, anon_ops);
1537	TEST(collapse_fork, madvise_context, anon_ops);
1538
1539	TEST(collapse_fork_compound, khugepaged_context, anon_ops);
1540	TEST(collapse_fork_compound, madvise_context, anon_ops);
1541
1542	TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops);
1543	TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
1544
1545	TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
1546	TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
1547	TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
1548
1549	TEST(madvise_retracted_page_tables, madvise_context, file_ops);
1550	TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
1551
1552	restore_settings(0);
1553}
1554