1/*
2 * ttranshuge.c: hwpoison test for THP(Transparent Huge Page).
3 *
4 * Copyright (C) 2011, FUJITSU LIMITED.
5 *   Author: Jin Dongming <jin.dongming@css.cn.fujitsu.com>
6 *
7 * This program is released under the GPLv2.
8 *
9 * This program is based on tinject.c and thugetlb.c in tsrc/ directory
10 * in mcetest tool.
11 */
12
13/*
14 * Even if THP is supported by Kernel, it could not be sure all the pages
15 * you gotten belong to THP.
16 *
17 * Following is the structure of the memory mapped by mmap()
18 * when the requested memory size is 8M and the THP's size is 2M,
19 *     O: means page belongs to 4k page;
20 *     T: means page belongs to THP.
21 *             Base             .....                   (Base + Size)
22 *     Size :  0M . . . . . 2M . . . . . 4M . . . . . 6M . . . . . 8M
23 *     case0:  OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO
24 *             No THP.
25 *     case1:  OOOOOOOTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTOOOOOO
26 *             Mixed with THP where it is possible.
27 *     case2:  OOOOOOOOOOOOOOOOOOOOOOOOOOTTTTTTTTTTTTTTTTTTTTTTTTTT
28 *             Mixed with THP only some part of where it is possible.
29 *     case3:  TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
30 *             All pages are belong to THP.
31 *
32 * So the function find_thp_addr() could not be sure the calculated
33 * address is the address of THP. And in the above structure,
34 * the right address of THP could not be gotten in case 0 and 2 and
35 * could be gotten in case 1 and 3 only.
36 *
37 * According to my experience, the most case gotten by APL is case 1.
38 * So this program is made based on the case 1.
39 *
40 * To improve the rate of THP mapped by mmap(), it is better to do
41 * hwpoison test:
42 *     - After reboot immediately.
43 *       Because there is a lot of freed memory.
44 *     - In the system which has plenty of memory prepared.
45 *       This can avoid hwpoison test failure caused by not enough memory.
46 */
47
48#define _GNU_SOURCE 1
49#include <stdlib.h>
50#include <stdio.h>
51#include <string.h>
52
53#include <unistd.h>
54#include <getopt.h>
55#include <signal.h>
56
57#include <sys/prctl.h>
58#include <sys/mman.h>
59#include <sys/wait.h>
60
61/*
62 * This file supposes the following as default.
63 *     Regular Page Size  :  4K(4096Bytes)
64 *     THP's Size         :  2M(2UL * 1024 *1024Bytes)
65 *     Poisoned Page Size :  4K(4096Bytes)
66 */
67#define DEFAULT_PS			4096UL
68#define PS_MASK(ps_size)		((unsigned long)(ps_size -1))
69#define DEFAULT_THP_SIZE		0x200000UL
70#define THP_MASK(thp_size)		((unsigned long)(thp_size - 1))
71
72#define REQ_MEM_SIZE			(8UL * 1024 * 1024)
73
74#define MADV_POISON			100
75#define MADV_HUGEPAGE			14
76
77#define PR_MCE_KILL			33
78#define PR_MCE_KILL_SET			1
79#define PR_MCE_KILL_EARLY		1
80#define PR_MCE_KILL_LATE		0
81
82#define THP_SUCCESS			0
83#define THP_FAILURE			-1
84
85#define print_err(fmt, ...)		printf("[ERROR] "fmt, ##__VA_ARGS__)
86#define print_success(fmt, ...)		printf("[SUCCESS] "fmt, ##__VA_ARGS__)
87#define print_failure(fmt, ...)		printf("[FAILURE] "fmt, ##__VA_ARGS__)
88
89static char *corrupt_page_addr;
90static char *mem_addr;
91
92static unsigned int early_kill = 0;
93static unsigned int avoid_touch = 0;
94
95static int corrupt_page = -1;
96
97static unsigned long thp_addr = 0;
98
99static void print_prep_info(void)
100{
101	printf("\n%s Poison Test of THP.\n\n"
102		"Information:\n"
103		"    PID %d\n"
104		"    PS(page size) 0x%lx\n"
105		"    mmap()'ed Memory Address %p; size 0x%lx\n"
106		"    THP(Transparent Huge Page) Address 0x%lx; size 0x%lx\n"
107		"    %s Page Poison Test At %p\n\n",
108
109		early_kill ? "Early Kill" : "Late Kill",
110		getpid(),
111		DEFAULT_PS,
112		mem_addr, REQ_MEM_SIZE,
113		thp_addr, DEFAULT_THP_SIZE,
114		(corrupt_page == 0) ? "Head" : "Tail", corrupt_page_addr
115	);
116}
117
118/*
119 * Usage:
120 *     If avoid_flag == 1,
121 *         access all the memory except one DEFAULT_PS size memory
122 *         after the address in global variable corrupt_page_addr;
123 *     else
124 *         access all the memory from addr to (addr + size).
125 */
126static int read_mem(char *addr, unsigned long size, int avoid_flag)
127{
128	int ret = 0;
129	unsigned long i = 0;
130
131	for (i = 0; i < size; i++) {
132		if ((avoid_flag) &&
133		    ((addr + i) >= corrupt_page_addr) &&
134		    ((addr + i) < (corrupt_page_addr + DEFAULT_PS)))
135			continue;
136
137		if (*(addr + i) != (char)('a' + (i % 26))) {
138			print_err("Mismatch at 0x%lx.\n",
139					(unsigned long)(addr + i));
140			ret = -1;
141			break;
142		}
143	}
144
145	return ret;
146}
147
148static void write_mem(char *addr, unsigned long size)
149{
150	int i = 0;
151
152	for (i = 0; i < size; i++) {
153		*(addr + i) = (char)('a' + (i % 26));
154	}
155}
156
157/*
158 * Usage:
159 *     Use MADV_HUGEPAGE to make sure the page could be mapped as THP
160 *     when /sys/kernel/mm/transparent_hugepage/enabled is set with
161 *     madvise.
162 *
163 * Note:
164 *     MADV_HUGEPAGE must be set between mmap and read/write operation.
165 *     And it must follow mmap(). Please refer to patches of
166 *     MADV_HUGEPAGE about THP for more details.
167 *
168 * Patch Information:
169 *     Title: thp: khugepaged: make khugepaged aware about madvise
170 *     commit 60ab3244ec85c44276c585a2a20d3750402e1cf4
171 */
172static int request_thp_with_madvise(unsigned long start)
173{
174	unsigned long madvise_addr = start & ~PS_MASK(DEFAULT_PS);
175	unsigned long madvise_size = REQ_MEM_SIZE - (start % DEFAULT_PS);
176
177	return madvise((void *)madvise_addr, madvise_size, MADV_HUGEPAGE);
178}
179
180/*
181 * Usage:
182 *     This function is used for getting the address of first THP.
183 *
184 * Note:
185 *     This function could not make sure the address is the address of THP
186 *     really. Please refer to the explanation of mmap() of THP
187 *     at the head of this file.
188 */
189static unsigned long find_thp_addr(unsigned long start, unsigned long size)
190{
191	unsigned long thp_align_addr = (start + (DEFAULT_THP_SIZE - 1)) &
192					~THP_MASK(DEFAULT_THP_SIZE);
193
194	if ((thp_align_addr >= start) &&
195	    ((thp_align_addr + DEFAULT_THP_SIZE) < (start + size)))
196		return thp_align_addr;
197
198	return 0;
199}
200
201static int prep_memory_map(void)
202{
203	mem_addr = (char *)mmap(NULL, REQ_MEM_SIZE, PROT_WRITE | PROT_READ,
204				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
205	if (mem_addr == NULL) {
206		print_err("Failed to mmap requested memory: size 0x%lx.\n",
207				REQ_MEM_SIZE);
208		return THP_FAILURE;
209	}
210
211	return THP_SUCCESS;
212}
213
214static int prep_injection(void)
215{
216	/* enabled(=madvise) in /sys/kernel/mm/transparent_hugepage/. */
217	if (request_thp_with_madvise((unsigned long)mem_addr) < 0) {
218		print_err("Failed to request THP for [madvise] in enabled.\n");
219		return THP_FAILURE;
220	}
221
222	write_mem(mem_addr, REQ_MEM_SIZE);
223	if (read_mem(mem_addr, REQ_MEM_SIZE, 0) < 0) {
224		print_err("Data is Mismatched(prep_injection).\n");
225		return THP_FAILURE;
226	}
227
228	/* find the address of THP. */
229	thp_addr = find_thp_addr((unsigned long)mem_addr, REQ_MEM_SIZE);
230	if (!thp_addr) {
231		print_err("No THP mapped.\n");
232		return THP_FAILURE;
233	}
234
235	/* Calculate the address of the page which will be poisoned */
236	if (corrupt_page < 0)
237		corrupt_page = 0;
238
239	corrupt_page_addr = (char *)(thp_addr + corrupt_page * DEFAULT_PS);
240
241	/* Process will be killed here by kernel(SIGBUS AO). */
242	prctl(PR_MCE_KILL, PR_MCE_KILL_SET,
243		early_kill ? PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE,
244		NULL, NULL);
245
246	return THP_SUCCESS;
247}
248
249static int do_injection(void)
250{
251	/* Early Kill */
252	if (madvise((void *)corrupt_page_addr, DEFAULT_PS, MADV_POISON) != 0) {
253		print_err("Failed to poison at 0x%p.\n", corrupt_page_addr);
254		printf("[INFO] Please check the authority of current user.\n");
255		return THP_FAILURE;
256	}
257
258	return THP_SUCCESS;
259}
260
261static int post_injection(void)
262{
263
264	if (early_kill) {
265		print_err("Failed to be killed by SIGBUS(Action Optional).\n");
266		return THP_FAILURE;
267	}
268
269	/* Late Kill */
270	if (read_mem(mem_addr, REQ_MEM_SIZE, avoid_touch) < 0) {
271		print_err("Data is Mismatched(do_injection).\n");
272		return THP_FAILURE;
273	}
274
275	if (!avoid_touch) {
276		print_err("Failed to be killed by SIGBUS(Action Required).\n");
277		return THP_FAILURE;
278	}
279
280	return THP_SUCCESS;
281}
282
283static void post_memory_map()
284{
285	munmap(mem_addr, REQ_MEM_SIZE);
286}
287
288static void usage(char *program)
289{
290	printf("%s [-o offset] [-ea]\n"
291" Usage:\n"
292"	-o|--offset offset(page unit)	Position of error injection from the first THP.\n"
293"	-e|--early-kill			Set PR_MCE_KILL_EARLY(default NOT early-kill).\n"
294"	-a|--avoid-touch		Avoid touching error page(page unit) and\n"
295"					only used when early-kill is not set.\n"
296"	-h|--help\n\n"
297" Examples:\n"
298"	1. Inject the 2nd page(4k) of THP and early killed.\n"
299"	%s -o 1 -e\n\n"
300"	2. Inject the 4th page(4k) of THP, late killed and untouched.\n"
301"	%s --offset 3 --avoid-touch\n\n"
302" Note:\n"
303"	Options				Default set\n"
304"	early-kill			no\n"
305"	offset				0(head page)\n"
306"	avoid-touch			no\n\n"
307	, program, program, program);
308}
309
310static struct option opts[] = {
311	{ "offset"		, 1, NULL, 'o' },
312	{ "avoid-touch"		, 0, NULL, 'a' },
313	{ "early-kill"		, 0, NULL, 'e' },
314	{ "help"		, 0, NULL, 'h' },
315	{ NULL			, 0, NULL,  0  }
316};
317
318static void get_options_or_die(int argc, char *argv[])
319{
320	char c;
321
322	while ((c = getopt_long(argc, argv, "o:aeh", opts, NULL)) != -1) {
323		switch (c) {
324		case 'o':
325			corrupt_page = strtol(optarg, NULL, 10);
326			break;
327		case 'a':
328			avoid_touch = 1;
329			break;
330		case 'e':
331			early_kill = 1;
332			break;
333		case 'h':
334			usage(argv[0]);
335			exit(0);
336		default:
337			print_err("Wrong options, please check options!\n");
338			usage(argv[0]);
339			exit(1);
340		}
341	}
342
343	if ((avoid_touch) && (corrupt_page == -1)) {
344		print_err("Avoid which page?\n");
345		usage(argv[0]);
346		exit(1);
347	}
348}
349
350int main(int argc, char *argv[])
351{
352	int ret = THP_FAILURE;
353	pid_t child;
354	siginfo_t sig;
355
356	/*
357	 * 1. Options check.
358	 */
359	get_options_or_die(argc, argv);
360
361	/* Fork a child process for test */
362	child = fork();
363	if (child < 0) {
364		print_err("Failed to fork child process.\n");
365		return THP_FAILURE;
366	}
367
368	if (child == 0) {
369		/* Child process */
370
371		int ret = THP_FAILURE;
372
373		signal(SIGBUS, SIG_DFL);
374
375		/*
376		 * 2. Groundwork for hwpoison injection.
377		 */
378		if (prep_memory_map() == THP_FAILURE)
379			_exit(1);
380
381		if (prep_injection() == THP_FAILURE)
382			goto free_mem;
383
384		/* Print the prepared information before hwpoison injection. */
385		print_prep_info();
386
387		/*
388		 * 3. Hwpoison Injection.
389		 */
390		if (do_injection() == THP_FAILURE)
391			goto free_mem;
392
393		if (post_injection() == THP_FAILURE)
394			goto free_mem;
395
396		ret = THP_SUCCESS;
397free_mem:
398		post_memory_map();
399
400		if (ret == THP_SUCCESS)
401			_exit(0);
402
403		_exit(1);
404	}
405
406	/* Parent process */
407
408	if (waitid(P_PID, child, &sig, WEXITED) < 0) {
409		print_err("Failed to wait child process.\n");
410		return THP_FAILURE;
411	}
412
413	/*
414	 * 4. Check the result of hwpoison injection.
415	 */
416	if (avoid_touch) {
417		if (sig.si_code == CLD_EXITED && sig.si_status == 0) {
418			print_success("Child process survived.\n");
419			ret = THP_SUCCESS;
420		} else
421			print_failure("Child process could not survive.\n");
422	} else {
423		if (sig.si_code == CLD_KILLED && sig.si_status == SIGBUS) {
424			print_success("Child process was killed by SIGBUS.\n");
425			ret = THP_SUCCESS;
426		} else
427			print_failure("Child process could not be killed"
428					" by SIGBUS.\n");
429	}
430
431	return ret;
432}
433