1#define _GNU_SOURCE
2#include <fcntl.h>
3#include <limits.h>
4#include <signal.h>
5#include <stdio.h>
6#include <stdlib.h>
7#include <stdbool.h>
8#include <string.h>
9#include <unistd.h>
10
11#include <sys/mman.h>
12#include <sys/wait.h>
13
14#ifndef MADV_PAGEOUT
15#define MADV_PAGEOUT 21
16#endif
17
18#define BASE_ADDR ((void *)(1UL << 30))
19static unsigned long hpage_pmd_size;
20static unsigned long page_size;
21static int hpage_pmd_nr;
22
23#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
24#define PID_SMAPS "/proc/self/smaps"
25
26enum thp_enabled {
27	THP_ALWAYS,
28	THP_MADVISE,
29	THP_NEVER,
30};
31
32static const char *thp_enabled_strings[] = {
33	"always",
34	"madvise",
35	"never",
36	NULL
37};
38
39enum thp_defrag {
40	THP_DEFRAG_ALWAYS,
41	THP_DEFRAG_DEFER,
42	THP_DEFRAG_DEFER_MADVISE,
43	THP_DEFRAG_MADVISE,
44	THP_DEFRAG_NEVER,
45};
46
47static const char *thp_defrag_strings[] = {
48	"always",
49	"defer",
50	"defer+madvise",
51	"madvise",
52	"never",
53	NULL
54};
55
56enum shmem_enabled {
57	SHMEM_ALWAYS,
58	SHMEM_WITHIN_SIZE,
59	SHMEM_ADVISE,
60	SHMEM_NEVER,
61	SHMEM_DENY,
62	SHMEM_FORCE,
63};
64
65static const char *shmem_enabled_strings[] = {
66	"always",
67	"within_size",
68	"advise",
69	"never",
70	"deny",
71	"force",
72	NULL
73};
74
75struct khugepaged_settings {
76	bool defrag;
77	unsigned int alloc_sleep_millisecs;
78	unsigned int scan_sleep_millisecs;
79	unsigned int max_ptes_none;
80	unsigned int max_ptes_swap;
81	unsigned int max_ptes_shared;
82	unsigned long pages_to_scan;
83};
84
85struct settings {
86	enum thp_enabled thp_enabled;
87	enum thp_defrag thp_defrag;
88	enum shmem_enabled shmem_enabled;
89	bool debug_cow;
90	bool use_zero_page;
91	struct khugepaged_settings khugepaged;
92};
93
94static struct settings default_settings = {
95	.thp_enabled = THP_MADVISE,
96	.thp_defrag = THP_DEFRAG_ALWAYS,
97	.shmem_enabled = SHMEM_NEVER,
98	.debug_cow = 0,
99	.use_zero_page = 0,
100	.khugepaged = {
101		.defrag = 1,
102		.alloc_sleep_millisecs = 10,
103		.scan_sleep_millisecs = 10,
104	},
105};
106
107static struct settings saved_settings;
108static bool skip_settings_restore;
109
110static int exit_status;
111
112static void success(const char *msg)
113{
114	printf(" \e[32m%s\e[0m\n", msg);
115}
116
117static void fail(const char *msg)
118{
119	printf(" \e[31m%s\e[0m\n", msg);
120	exit_status++;
121}
122
123static int read_file(const char *path, char *buf, size_t buflen)
124{
125	int fd;
126	ssize_t numread;
127
128	fd = open(path, O_RDONLY);
129	if (fd == -1)
130		return 0;
131
132	numread = read(fd, buf, buflen - 1);
133	if (numread < 1) {
134		close(fd);
135		return 0;
136	}
137
138	buf[numread] = '\0';
139	close(fd);
140
141	return (unsigned int) numread;
142}
143
144static int write_file(const char *path, const char *buf, size_t buflen)
145{
146	int fd;
147	ssize_t numwritten;
148
149	fd = open(path, O_WRONLY);
150	if (fd == -1)
151		return 0;
152
153	numwritten = write(fd, buf, buflen - 1);
154	close(fd);
155	if (numwritten < 1)
156		return 0;
157
158	return (unsigned int) numwritten;
159}
160
161static int read_string(const char *name, const char *strings[])
162{
163	char path[PATH_MAX];
164	char buf[256];
165	char *c;
166	int ret;
167
168	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
169	if (ret >= PATH_MAX) {
170		printf("%s: Pathname is too long\n", __func__);
171		exit(EXIT_FAILURE);
172	}
173
174	if (!read_file(path, buf, sizeof(buf))) {
175		perror(path);
176		exit(EXIT_FAILURE);
177	}
178
179	c = strchr(buf, '[');
180	if (!c) {
181		printf("%s: Parse failure\n", __func__);
182		exit(EXIT_FAILURE);
183	}
184
185	c++;
186	memmove(buf, c, sizeof(buf) - (c - buf));
187
188	c = strchr(buf, ']');
189	if (!c) {
190		printf("%s: Parse failure\n", __func__);
191		exit(EXIT_FAILURE);
192	}
193	*c = '\0';
194
195	ret = 0;
196	while (strings[ret]) {
197		if (!strcmp(strings[ret], buf))
198			return ret;
199		ret++;
200	}
201
202	printf("Failed to parse %s\n", name);
203	exit(EXIT_FAILURE);
204}
205
206static void write_string(const char *name, const char *val)
207{
208	char path[PATH_MAX];
209	int ret;
210
211	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
212	if (ret >= PATH_MAX) {
213		printf("%s: Pathname is too long\n", __func__);
214		exit(EXIT_FAILURE);
215	}
216
217	if (!write_file(path, val, strlen(val) + 1)) {
218		perror(path);
219		exit(EXIT_FAILURE);
220	}
221}
222
223static const unsigned long read_num(const char *name)
224{
225	char path[PATH_MAX];
226	char buf[21];
227	int ret;
228
229	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
230	if (ret >= PATH_MAX) {
231		printf("%s: Pathname is too long\n", __func__);
232		exit(EXIT_FAILURE);
233	}
234
235	ret = read_file(path, buf, sizeof(buf));
236	if (ret < 0) {
237		perror("read_file(read_num)");
238		exit(EXIT_FAILURE);
239	}
240
241	return strtoul(buf, NULL, 10);
242}
243
244static void write_num(const char *name, unsigned long num)
245{
246	char path[PATH_MAX];
247	char buf[21];
248	int ret;
249
250	ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
251	if (ret >= PATH_MAX) {
252		printf("%s: Pathname is too long\n", __func__);
253		exit(EXIT_FAILURE);
254	}
255
256	sprintf(buf, "%ld", num);
257	if (!write_file(path, buf, strlen(buf) + 1)) {
258		perror(path);
259		exit(EXIT_FAILURE);
260	}
261}
262
263static void write_settings(struct settings *settings)
264{
265	struct khugepaged_settings *khugepaged = &settings->khugepaged;
266
267	write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
268	write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
269	write_string("shmem_enabled",
270			shmem_enabled_strings[settings->shmem_enabled]);
271	write_num("debug_cow", settings->debug_cow);
272	write_num("use_zero_page", settings->use_zero_page);
273
274	write_num("khugepaged/defrag", khugepaged->defrag);
275	write_num("khugepaged/alloc_sleep_millisecs",
276			khugepaged->alloc_sleep_millisecs);
277	write_num("khugepaged/scan_sleep_millisecs",
278			khugepaged->scan_sleep_millisecs);
279	write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
280	write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
281	write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
282	write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
283}
284
285static void restore_settings(int sig)
286{
287	if (skip_settings_restore)
288		goto out;
289
290	printf("Restore THP and khugepaged settings...");
291	write_settings(&saved_settings);
292	success("OK");
293	if (sig)
294		exit(EXIT_FAILURE);
295out:
296	exit(exit_status);
297}
298
299static void save_settings(void)
300{
301	printf("Save THP and khugepaged settings...");
302	saved_settings = (struct settings) {
303		.thp_enabled = read_string("enabled", thp_enabled_strings),
304		.thp_defrag = read_string("defrag", thp_defrag_strings),
305		.shmem_enabled =
306			read_string("shmem_enabled", shmem_enabled_strings),
307		.debug_cow = read_num("debug_cow"),
308		.use_zero_page = read_num("use_zero_page"),
309	};
310	saved_settings.khugepaged = (struct khugepaged_settings) {
311		.defrag = read_num("khugepaged/defrag"),
312		.alloc_sleep_millisecs =
313			read_num("khugepaged/alloc_sleep_millisecs"),
314		.scan_sleep_millisecs =
315			read_num("khugepaged/scan_sleep_millisecs"),
316		.max_ptes_none = read_num("khugepaged/max_ptes_none"),
317		.max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
318		.max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
319		.pages_to_scan = read_num("khugepaged/pages_to_scan"),
320	};
321	success("OK");
322
323	signal(SIGTERM, restore_settings);
324	signal(SIGINT, restore_settings);
325	signal(SIGHUP, restore_settings);
326	signal(SIGQUIT, restore_settings);
327}
328
329static void adjust_settings(void)
330{
331
332	printf("Adjust settings...");
333	write_settings(&default_settings);
334	success("OK");
335}
336
337#define MAX_LINE_LENGTH 500
338
339static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
340{
341	while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
342		if (!strncmp(buf, pattern, strlen(pattern)))
343			return true;
344	}
345	return false;
346}
347
348static bool check_huge(void *addr)
349{
350	bool thp = false;
351	int ret;
352	FILE *fp;
353	char buffer[MAX_LINE_LENGTH];
354	char addr_pattern[MAX_LINE_LENGTH];
355
356	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
357		       (unsigned long) addr);
358	if (ret >= MAX_LINE_LENGTH) {
359		printf("%s: Pattern is too long\n", __func__);
360		exit(EXIT_FAILURE);
361	}
362
363
364	fp = fopen(PID_SMAPS, "r");
365	if (!fp) {
366		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
367		exit(EXIT_FAILURE);
368	}
369	if (!check_for_pattern(fp, addr_pattern, buffer))
370		goto err_out;
371
372	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
373		       hpage_pmd_size >> 10);
374	if (ret >= MAX_LINE_LENGTH) {
375		printf("%s: Pattern is too long\n", __func__);
376		exit(EXIT_FAILURE);
377	}
378	/*
379	 * Fetch the AnonHugePages: in the same block and check whether it got
380	 * the expected number of hugeepages next.
381	 */
382	if (!check_for_pattern(fp, "AnonHugePages:", buffer))
383		goto err_out;
384
385	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
386		goto err_out;
387
388	thp = true;
389err_out:
390	fclose(fp);
391	return thp;
392}
393
394
395static bool check_swap(void *addr, unsigned long size)
396{
397	bool swap = false;
398	int ret;
399	FILE *fp;
400	char buffer[MAX_LINE_LENGTH];
401	char addr_pattern[MAX_LINE_LENGTH];
402
403	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
404		       (unsigned long) addr);
405	if (ret >= MAX_LINE_LENGTH) {
406		printf("%s: Pattern is too long\n", __func__);
407		exit(EXIT_FAILURE);
408	}
409
410
411	fp = fopen(PID_SMAPS, "r");
412	if (!fp) {
413		printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
414		exit(EXIT_FAILURE);
415	}
416	if (!check_for_pattern(fp, addr_pattern, buffer))
417		goto err_out;
418
419	ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
420		       size >> 10);
421	if (ret >= MAX_LINE_LENGTH) {
422		printf("%s: Pattern is too long\n", __func__);
423		exit(EXIT_FAILURE);
424	}
425	/*
426	 * Fetch the Swap: in the same block and check whether it got
427	 * the expected number of hugeepages next.
428	 */
429	if (!check_for_pattern(fp, "Swap:", buffer))
430		goto err_out;
431
432	if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
433		goto err_out;
434
435	swap = true;
436err_out:
437	fclose(fp);
438	return swap;
439}
440
441static void *alloc_mapping(void)
442{
443	void *p;
444
445	p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
446			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
447	if (p != BASE_ADDR) {
448		printf("Failed to allocate VMA at %p\n", BASE_ADDR);
449		exit(EXIT_FAILURE);
450	}
451
452	return p;
453}
454
455static void fill_memory(int *p, unsigned long start, unsigned long end)
456{
457	int i;
458
459	for (i = start / page_size; i < end / page_size; i++)
460		p[i * page_size / sizeof(*p)] = i + 0xdead0000;
461}
462
463static void validate_memory(int *p, unsigned long start, unsigned long end)
464{
465	int i;
466
467	for (i = start / page_size; i < end / page_size; i++) {
468		if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
469			printf("Page %d is corrupted: %#x\n",
470					i, p[i * page_size / sizeof(*p)]);
471			exit(EXIT_FAILURE);
472		}
473	}
474}
475
476#define TICK 500000
477static bool wait_for_scan(const char *msg, char *p)
478{
479	int full_scans;
480	int timeout = 6; /* 3 seconds */
481
482	/* Sanity check */
483	if (check_huge(p)) {
484		printf("Unexpected huge page\n");
485		exit(EXIT_FAILURE);
486	}
487
488	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
489
490	/* Wait until the second full_scan completed */
491	full_scans = read_num("khugepaged/full_scans") + 2;
492
493	printf("%s...", msg);
494	while (timeout--) {
495		if (check_huge(p))
496			break;
497		if (read_num("khugepaged/full_scans") >= full_scans)
498			break;
499		printf(".");
500		usleep(TICK);
501	}
502
503	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
504
505	return timeout == -1;
506}
507
508static void alloc_at_fault(void)
509{
510	struct settings settings = default_settings;
511	char *p;
512
513	settings.thp_enabled = THP_ALWAYS;
514	write_settings(&settings);
515
516	p = alloc_mapping();
517	*p = 1;
518	printf("Allocate huge page on fault...");
519	if (check_huge(p))
520		success("OK");
521	else
522		fail("Fail");
523
524	write_settings(&default_settings);
525
526	madvise(p, page_size, MADV_DONTNEED);
527	printf("Split huge PMD on MADV_DONTNEED...");
528	if (!check_huge(p))
529		success("OK");
530	else
531		fail("Fail");
532	munmap(p, hpage_pmd_size);
533}
534
535static void collapse_full(void)
536{
537	void *p;
538
539	p = alloc_mapping();
540	fill_memory(p, 0, hpage_pmd_size);
541	if (wait_for_scan("Collapse fully populated PTE table", p))
542		fail("Timeout");
543	else if (check_huge(p))
544		success("OK");
545	else
546		fail("Fail");
547	validate_memory(p, 0, hpage_pmd_size);
548	munmap(p, hpage_pmd_size);
549}
550
551static void collapse_empty(void)
552{
553	void *p;
554
555	p = alloc_mapping();
556	if (wait_for_scan("Do not collapse empty PTE table", p))
557		fail("Timeout");
558	else if (check_huge(p))
559		fail("Fail");
560	else
561		success("OK");
562	munmap(p, hpage_pmd_size);
563}
564
565static void collapse_single_pte_entry(void)
566{
567	void *p;
568
569	p = alloc_mapping();
570	fill_memory(p, 0, page_size);
571	if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
572		fail("Timeout");
573	else if (check_huge(p))
574		success("OK");
575	else
576		fail("Fail");
577	validate_memory(p, 0, page_size);
578	munmap(p, hpage_pmd_size);
579}
580
581static void collapse_max_ptes_none(void)
582{
583	int max_ptes_none = hpage_pmd_nr / 2;
584	struct settings settings = default_settings;
585	void *p;
586
587	settings.khugepaged.max_ptes_none = max_ptes_none;
588	write_settings(&settings);
589
590	p = alloc_mapping();
591
592	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
593	if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
594		fail("Timeout");
595	else if (check_huge(p))
596		fail("Fail");
597	else
598		success("OK");
599	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
600
601	fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
602	if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
603		fail("Timeout");
604	else if (check_huge(p))
605		success("OK");
606	else
607		fail("Fail");
608	validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
609
610	munmap(p, hpage_pmd_size);
611	write_settings(&default_settings);
612}
613
614static void collapse_swapin_single_pte(void)
615{
616	void *p;
617	p = alloc_mapping();
618	fill_memory(p, 0, hpage_pmd_size);
619
620	printf("Swapout one page...");
621	if (madvise(p, page_size, MADV_PAGEOUT)) {
622		perror("madvise(MADV_PAGEOUT)");
623		exit(EXIT_FAILURE);
624	}
625	if (check_swap(p, page_size)) {
626		success("OK");
627	} else {
628		fail("Fail");
629		goto out;
630	}
631
632	if (wait_for_scan("Collapse with swapping in single PTE entry", p))
633		fail("Timeout");
634	else if (check_huge(p))
635		success("OK");
636	else
637		fail("Fail");
638	validate_memory(p, 0, hpage_pmd_size);
639out:
640	munmap(p, hpage_pmd_size);
641}
642
643static void collapse_max_ptes_swap(void)
644{
645	int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
646	void *p;
647
648	p = alloc_mapping();
649
650	fill_memory(p, 0, hpage_pmd_size);
651	printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
652	if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
653		perror("madvise(MADV_PAGEOUT)");
654		exit(EXIT_FAILURE);
655	}
656	if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
657		success("OK");
658	} else {
659		fail("Fail");
660		goto out;
661	}
662
663	if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
664		fail("Timeout");
665	else if (check_huge(p))
666		fail("Fail");
667	else
668		success("OK");
669	validate_memory(p, 0, hpage_pmd_size);
670
671	fill_memory(p, 0, hpage_pmd_size);
672	printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
673	if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
674		perror("madvise(MADV_PAGEOUT)");
675		exit(EXIT_FAILURE);
676	}
677	if (check_swap(p, max_ptes_swap * page_size)) {
678		success("OK");
679	} else {
680		fail("Fail");
681		goto out;
682	}
683
684	if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
685		fail("Timeout");
686	else if (check_huge(p))
687		success("OK");
688	else
689		fail("Fail");
690	validate_memory(p, 0, hpage_pmd_size);
691out:
692	munmap(p, hpage_pmd_size);
693}
694
695static void collapse_single_pte_entry_compound(void)
696{
697	void *p;
698
699	p = alloc_mapping();
700
701	printf("Allocate huge page...");
702	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
703	fill_memory(p, 0, hpage_pmd_size);
704	if (check_huge(p))
705		success("OK");
706	else
707		fail("Fail");
708	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
709
710	printf("Split huge page leaving single PTE mapping compound page...");
711	madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
712	if (!check_huge(p))
713		success("OK");
714	else
715		fail("Fail");
716
717	if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
718		fail("Timeout");
719	else if (check_huge(p))
720		success("OK");
721	else
722		fail("Fail");
723	validate_memory(p, 0, page_size);
724	munmap(p, hpage_pmd_size);
725}
726
727static void collapse_full_of_compound(void)
728{
729	void *p;
730
731	p = alloc_mapping();
732
733	printf("Allocate huge page...");
734	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
735	fill_memory(p, 0, hpage_pmd_size);
736	if (check_huge(p))
737		success("OK");
738	else
739		fail("Fail");
740
741	printf("Split huge page leaving single PTE page table full of compound pages...");
742	madvise(p, page_size, MADV_NOHUGEPAGE);
743	madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
744	if (!check_huge(p))
745		success("OK");
746	else
747		fail("Fail");
748
749	if (wait_for_scan("Collapse PTE table full of compound pages", p))
750		fail("Timeout");
751	else if (check_huge(p))
752		success("OK");
753	else
754		fail("Fail");
755	validate_memory(p, 0, hpage_pmd_size);
756	munmap(p, hpage_pmd_size);
757}
758
759static void collapse_compound_extreme(void)
760{
761	void *p;
762	int i;
763
764	p = alloc_mapping();
765	for (i = 0; i < hpage_pmd_nr; i++) {
766		printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
767				i + 1, hpage_pmd_nr);
768
769		madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
770		fill_memory(BASE_ADDR, 0, hpage_pmd_size);
771		if (!check_huge(BASE_ADDR)) {
772			printf("Failed to allocate huge page\n");
773			exit(EXIT_FAILURE);
774		}
775		madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
776
777		p = mremap(BASE_ADDR - i * page_size,
778				i * page_size + hpage_pmd_size,
779				(i + 1) * page_size,
780				MREMAP_MAYMOVE | MREMAP_FIXED,
781				BASE_ADDR + 2 * hpage_pmd_size);
782		if (p == MAP_FAILED) {
783			perror("mremap+unmap");
784			exit(EXIT_FAILURE);
785		}
786
787		p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
788				(i + 1) * page_size,
789				(i + 1) * page_size + hpage_pmd_size,
790				MREMAP_MAYMOVE | MREMAP_FIXED,
791				BASE_ADDR - (i + 1) * page_size);
792		if (p == MAP_FAILED) {
793			perror("mremap+alloc");
794			exit(EXIT_FAILURE);
795		}
796	}
797
798	munmap(BASE_ADDR, hpage_pmd_size);
799	fill_memory(p, 0, hpage_pmd_size);
800	if (!check_huge(p))
801		success("OK");
802	else
803		fail("Fail");
804
805	if (wait_for_scan("Collapse PTE table full of different compound pages", p))
806		fail("Timeout");
807	else if (check_huge(p))
808		success("OK");
809	else
810		fail("Fail");
811
812	validate_memory(p, 0, hpage_pmd_size);
813	munmap(p, hpage_pmd_size);
814}
815
816static void collapse_fork(void)
817{
818	int wstatus;
819	void *p;
820
821	p = alloc_mapping();
822
823	printf("Allocate small page...");
824	fill_memory(p, 0, page_size);
825	if (!check_huge(p))
826		success("OK");
827	else
828		fail("Fail");
829
830	printf("Share small page over fork()...");
831	if (!fork()) {
832		/* Do not touch settings on child exit */
833		skip_settings_restore = true;
834		exit_status = 0;
835
836		if (!check_huge(p))
837			success("OK");
838		else
839			fail("Fail");
840
841		fill_memory(p, page_size, 2 * page_size);
842
843		if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
844			fail("Timeout");
845		else if (check_huge(p))
846			success("OK");
847		else
848			fail("Fail");
849
850		validate_memory(p, 0, page_size);
851		munmap(p, hpage_pmd_size);
852		exit(exit_status);
853	}
854
855	wait(&wstatus);
856	exit_status += WEXITSTATUS(wstatus);
857
858	printf("Check if parent still has small page...");
859	if (!check_huge(p))
860		success("OK");
861	else
862		fail("Fail");
863	validate_memory(p, 0, page_size);
864	munmap(p, hpage_pmd_size);
865}
866
867static void collapse_fork_compound(void)
868{
869	int wstatus;
870	void *p;
871
872	p = alloc_mapping();
873
874	printf("Allocate huge page...");
875	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
876	fill_memory(p, 0, hpage_pmd_size);
877	if (check_huge(p))
878		success("OK");
879	else
880		fail("Fail");
881
882	printf("Share huge page over fork()...");
883	if (!fork()) {
884		/* Do not touch settings on child exit */
885		skip_settings_restore = true;
886		exit_status = 0;
887
888		if (check_huge(p))
889			success("OK");
890		else
891			fail("Fail");
892
893		printf("Split huge page PMD in child process...");
894		madvise(p, page_size, MADV_NOHUGEPAGE);
895		madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
896		if (!check_huge(p))
897			success("OK");
898		else
899			fail("Fail");
900		fill_memory(p, 0, page_size);
901
902		write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
903		if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
904			fail("Timeout");
905		else if (check_huge(p))
906			success("OK");
907		else
908			fail("Fail");
909		write_num("khugepaged/max_ptes_shared",
910				default_settings.khugepaged.max_ptes_shared);
911
912		validate_memory(p, 0, hpage_pmd_size);
913		munmap(p, hpage_pmd_size);
914		exit(exit_status);
915	}
916
917	wait(&wstatus);
918	exit_status += WEXITSTATUS(wstatus);
919
920	printf("Check if parent still has huge page...");
921	if (check_huge(p))
922		success("OK");
923	else
924		fail("Fail");
925	validate_memory(p, 0, hpage_pmd_size);
926	munmap(p, hpage_pmd_size);
927}
928
929static void collapse_max_ptes_shared()
930{
931	int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
932	int wstatus;
933	void *p;
934
935	p = alloc_mapping();
936
937	printf("Allocate huge page...");
938	madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
939	fill_memory(p, 0, hpage_pmd_size);
940	if (check_huge(p))
941		success("OK");
942	else
943		fail("Fail");
944
945	printf("Share huge page over fork()...");
946	if (!fork()) {
947		/* Do not touch settings on child exit */
948		skip_settings_restore = true;
949		exit_status = 0;
950
951		if (check_huge(p))
952			success("OK");
953		else
954			fail("Fail");
955
956		printf("Trigger CoW on page %d of %d...",
957				hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
958		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
959		if (!check_huge(p))
960			success("OK");
961		else
962			fail("Fail");
963
964		if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
965			fail("Timeout");
966		else if (!check_huge(p))
967			success("OK");
968		else
969			fail("Fail");
970
971		printf("Trigger CoW on page %d of %d...",
972				hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
973		fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
974		if (!check_huge(p))
975			success("OK");
976		else
977			fail("Fail");
978
979
980		if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
981			fail("Timeout");
982		else if (check_huge(p))
983			success("OK");
984		else
985			fail("Fail");
986
987		validate_memory(p, 0, hpage_pmd_size);
988		munmap(p, hpage_pmd_size);
989		exit(exit_status);
990	}
991
992	wait(&wstatus);
993	exit_status += WEXITSTATUS(wstatus);
994
995	printf("Check if parent still has huge page...");
996	if (check_huge(p))
997		success("OK");
998	else
999		fail("Fail");
1000	validate_memory(p, 0, hpage_pmd_size);
1001	munmap(p, hpage_pmd_size);
1002}
1003
1004int main(void)
1005{
1006	setbuf(stdout, NULL);
1007
1008	page_size = getpagesize();
1009	hpage_pmd_size = read_num("hpage_pmd_size");
1010	hpage_pmd_nr = hpage_pmd_size / page_size;
1011
1012	default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
1013	default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
1014	default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
1015	default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
1016
1017	save_settings();
1018	adjust_settings();
1019
1020	alloc_at_fault();
1021	collapse_full();
1022	collapse_empty();
1023	collapse_single_pte_entry();
1024	collapse_max_ptes_none();
1025	collapse_swapin_single_pte();
1026	collapse_max_ptes_swap();
1027	collapse_single_pte_entry_compound();
1028	collapse_full_of_compound();
1029	collapse_compound_extreme();
1030	collapse_fork();
1031	collapse_fork_compound();
1032	collapse_max_ptes_shared();
1033
1034	restore_settings(0);
1035}
1036