1/*
2 * Test program for Linux poison memory error recovery.
3 * This injects poison into various mapping cases and triggers the poison
4 * handling.  Requires special injection support in the kernel.
5 *
6 * Copyright 2009, 2010 Intel Corporation
7 *
8 * tinjpage is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License as published by the Free Software Foundation; version
11 * 2.
12 *
13 * tinjpage is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * General Public License for more details.
17 *
18 * You should find a copy of v2 of the GNU General Public License somewhere
19 * on your Linux system; if not, write to the Free Software Foundation,
20 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 *
22 * Authors: Andi Kleen, Fengguang Wu
23 */
24#define _GNU_SOURCE 1
25#include <stdio.h>
26#include <signal.h>
27#include <unistd.h>
28#include <sys/fcntl.h>
29#include <sys/wait.h>
30#include <sys/mman.h>
31#include <stdlib.h>
32#include <setjmp.h>
33#include <errno.h>
34#include <string.h>
35#include <time.h>
36#include <pthread.h>
37#include <sys/ipc.h>
38#include <sys/shm.h>
39#include <sys/sem.h>
40#include "utils.h"
41#include "hugepage.h"
42
43#define MADV_POISON 100
44
45#define TMPDIR "./"
46#define PATHBUFLEN 100
47
48#define Perror(x) failure++, perror(x)
49#define PAIR(x) x, sizeof(x)-1
50#define mb() asm volatile("" ::: "memory")
51#if defined(__i386__) || defined(__x86_64__)
52#define cpu_relax() asm volatile("rep ; nop" ::: "memory")
53#else
54#define cpu_relax() mb()
55#endif
56
57typedef unsigned long long u64;
58
59int PS;
60int failure;
61int unexpected;
62int early_kill;
63int test_hugepage;
64
65void *checked_mmap(void *start, size_t length, int prot, int flags,
66                   int fd, off_t offset)
67{
68	void *map = mmap(start, length, prot, flags, fd, offset);
69	if (map == (void*)-1L)
70		err("mmap");
71	return map;
72}
73
74void munmap_reserve(void *page, int size)
75{
76	if (munmap(page, size) < 0)
77		err("munmap");
78	if (mmap(page, size, PROT_NONE, MAP_PRIVATE|MAP_FIXED, 0, 0) < 0)
79		err("mmap2");
80}
81
82void *xmalloc(size_t s)
83{
84	void *p = malloc(s);
85	if (!p)
86		exit(ENOMEM);
87	return p;
88}
89
90static int ilog2(int n)
91{
92	int r = 0;
93	n--;
94	while (n) {
95		n >>= 1;
96		r++;
97	}
98	return r;
99}
100
101int recovercount;
102sigjmp_buf recover_ctx;
103sigjmp_buf early_recover_ctx;
104void *expected_addr;
105
106/* Work around glibc not defining this yet */
107struct my_siginfo {
108	int si_signo;
109	int si_errno;
110	int si_code;
111	union {
112	struct {
113		void  *_addr; /* faulting insn/memory ref. */
114#ifdef __ARCH_SI_TRAPNO
115		int _trapno;	/* TRAP # which caused the signal */
116#endif
117		short _addr_lsb; /* LSB of the reported address */
118	} _sigfault;
119	} _sifields;
120};
121#undef si_addr_lsb
122#define si_addr_lsb _sifields._sigfault._addr_lsb
123
124void sighandler(int sig, siginfo_t *si, void *arg)
125{
126	if (si->si_addr != expected_addr) {
127		printf("XXX: Unexpected address in signal %p (expected %p)\n", si->si_addr,
128			expected_addr);
129		failure++;
130	}
131
132	int lsb = ((struct my_siginfo *)si)->si_addr_lsb;
133	if (test_hugepage) {
134		if (lsb != ilog2(HPS)) {
135			printf("LATER: Unexpected addr lsb in siginfo %d\n", lsb);
136		}
137	} else {
138		if (lsb != ilog2(sysconf(_SC_PAGE_SIZE))) {
139			printf("LATER: Unexpected addr lsb in siginfo %d\n", lsb);
140		}
141	}
142
143	printf("\tsignal %d code %d addr %p\n", sig, si->si_code, si->si_addr);
144
145	if (--recovercount == 0) {
146		write(1, PAIR("I seem to be in a signal loop. bailing out.\n"));
147		exit(1);
148	}
149
150	if (si->si_code == 4)
151		siglongjmp(recover_ctx, 1);
152	else
153		siglongjmp(early_recover_ctx, 1);
154}
155
156enum rmode {
157	MREAD = 0,
158	MWRITE = 1,
159	MREAD_OK = 2,
160	MWRITE_OK = 3,
161	MNOTHING = -1,
162};
163
164void inject_madvise(char *page)
165{
166	if (madvise(page, PS, MADV_POISON) != 0) {
167		if (errno == EINVAL) {
168			printf("Kernel doesn't support poison injection\n");
169			exit(0);
170		}
171		Perror("madvise");
172	}
173}
174
175u64 page_to_pfn(char *page)
176{
177	static int pagemap_fd = -1;
178	u64 pfn;
179
180	if (pagemap_fd < 0)  {
181		pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
182		if (pagemap_fd < 0)
183			err("/proc/self/pagemap not supported");
184	}
185
186	if (pread(pagemap_fd, &pfn, sizeof(u64),
187		((u64)page / PS)*sizeof(u64)) != sizeof(u64))
188		err("Cannot read from pagemap");
189
190	pfn &= (1ULL<<56)-1;
191	return pfn;
192}
193
194/*
195 * Inject Action Optional #MC
196 * with mce-inject using the software injector.
197 *
198 * This tests the low level machine check handler too.
199 *
200 * Slightly racy with page migration because we don't mlock the page.
201 */
202void inject_mce_inject(char *page)
203{
204	u64 pfn = page_to_pfn(page);
205	FILE *mce_inject;
206
207	mce_inject = popen("mce-inject", "w");
208	if (!mce_inject) {
209		fprintf(stderr, "Cannot open pipe to mce-inject: %s\n",
210				strerror(errno));
211		exit(1);
212	}
213
214	fprintf(mce_inject,
215		"CPU 0 BANK 3 STATUS UNCORRECTED SRAO 0xc0\n"
216		"MCGSTATUS RIPV MCIP\n"
217		"ADDR %#llx\n"
218		"MISC 0x8c\n"
219		"RIP 0x73:0x1eadbabe\n", pfn);
220
221	if (ferror(mce_inject) || fclose(mce_inject) < 0) {
222		fprintf(stderr, "mce-inject failed: %s\n", strerror(errno));
223		exit(1);
224	}
225}
226
227void (*inject)(char *page) = inject_madvise;
228
229void poison(char *msg, char *page, enum rmode mode)
230{
231	expected_addr = page;
232	recovercount = 5;
233
234	if (sigsetjmp(early_recover_ctx, 1) == 0) {
235		inject(page);
236
237		if (early_kill && (mode == MWRITE || mode == MREAD)) {
238			printf("XXX: %s: process is not early killed\n", msg);
239			failure++;
240		}
241
242		return;
243	}
244
245	if (early_kill) {
246		if (mode == MREAD_OK || mode == MWRITE_OK) {
247			printf("XXX: %s: killed\n", msg);
248			failure++;
249		} else
250			printf("\trecovered\n");
251	}
252}
253
254void recover(char *msg, char *page, enum rmode mode)
255{
256	expected_addr = page;
257	recovercount = 5;
258
259	if (sigsetjmp(recover_ctx, 1) == 0) {
260		switch (mode) {
261		case MWRITE:
262			printf("\twriting 2\n");
263			*page = 2;
264			break;
265		case MWRITE_OK:
266			printf("\twriting 4\n");
267			*page = 4;
268			return;
269		case MREAD:
270			printf("\treading %x\n", *(unsigned char *)page);
271			break;
272		case MREAD_OK:
273			printf("\treading %x\n", *(unsigned char *)page);
274			return;
275		case MNOTHING:
276			return;
277		}
278		/* signal or kill should have happened */
279		printf("XXX: %s: page not poisoned after injection\n", msg);
280		failure++;
281		return;
282	}
283	if (mode == MREAD_OK || mode == MWRITE_OK) {
284		printf("XXX: %s: killed\n", msg);
285		failure++;
286	} else
287		printf("\trecovered\n");
288}
289
290void testmem(char *msg, char *page, enum rmode mode)
291{
292	printf("\t%s poisoning page %p\n", msg, page);
293	poison(msg, page, mode);
294	recover(msg, page, mode);
295}
296
297void expecterr(char *msg, int err)
298{
299	if (err) {
300		printf("\texpected error %d on %s\n", errno, msg);
301	} else {
302		failure++;
303		printf("XXX: unexpected no error on %s\n", msg);
304	}
305}
306
307/*
308 * Any optional error is really a deficiency in the kernel VFS error reporting
309 * and should be eventually fixed and turned into a expecterr
310 */
311void optionalerr(char *msg, int err)
312{
313	if (err) {
314		printf("\texpected optional error %d on %s\n", errno, msg);
315	} else {
316		unexpected++;
317		printf("LATER: expected likely incorrect no error on %s\n", msg);
318	}
319}
320
321static int tmpcount;
322int tempfd(void)
323{
324	int fd;
325	char buf[PATHBUFLEN];
326	snprintf(buf, sizeof buf, TMPDIR "~poison%d",tmpcount++);
327	fd = open(buf, O_CREAT|O_RDWR, 0600);
328	if (fd >= 0)
329		unlink(buf);
330	if (fd < 0)
331		err("opening temporary file in " TMPDIR);
332	return fd;
333}
334
335int playfile(char *buf)
336{
337	int fd;
338	if (buf[0] == 0)
339		snprintf(buf, PATHBUFLEN, TMPDIR "~poison%d", tmpcount++);
340	fd = open(buf, O_CREAT|O_RDWR|O_TRUNC, 0600);
341	if (fd < 0)
342		err("opening temporary file in " TMPDIR);
343
344	const int NPAGES = 5;
345	char *tmp = xmalloc(PS * NPAGES);
346	int i;
347	for (i = 0; i < PS*NPAGES; i++)
348		tmp[i] = i;
349	write(fd, tmp, PS*NPAGES);
350
351	lseek(fd, 0, SEEK_SET);
352	return fd;
353}
354
355static void dirty_anonymous(void)
356{
357	char *page;
358	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE, 0, 0);
359	testmem("dirty", page, MWRITE);
360}
361
362static void dirty_anonymous_unmap(void)
363{
364	char *page;
365	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_POPULATE, 0, 0);
366	testmem("dirty", page, MWRITE);
367	munmap_reserve(page, PS);
368}
369
370static void mlocked_anonymous(void)
371{
372	char *page;
373	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_LOCKED, 0, 0);
374	testmem("mlocked", page, MWRITE);
375}
376
377static void do_file_clean(int flags, char *name)
378{
379	char *page;
380	char fn[30];
381	snprintf(fn, 30, TMPDIR "~test%d", tmpcount++);
382	int fd = open(fn, O_RDWR|O_TRUNC|O_CREAT);
383	if (fd < 0)
384		err("open temp file");
385	write(fd, fn, 4);
386	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_SHARED|flags,
387		fd, 0);
388	fsync(fd);
389	close(fd);
390	testmem(name, page, MREAD_OK);
391	 /* reread page from disk */
392	printf("\t reading %x\n", *(unsigned char *)page);
393	testmem(name, page, MWRITE_OK);
394}
395
396static void file_clean(void)
397{
398	do_file_clean(0, "file clean");
399}
400
401static void file_clean_mlocked(void)
402{
403	do_file_clean(MAP_LOCKED, "file clean mlocked");
404}
405
406static char *ndesc(char *buf, char *name, char *add)
407{
408	snprintf(buf, 100, "%s %s", name, add);
409	return buf;
410}
411
412static void do_file_dirty(int flags, char *name)
413{
414	char nbuf[100];
415	char *page;
416	char fn[PATHBUFLEN];
417	fn[0] = 0;
418	int fd = playfile(fn);
419
420	page = checked_mmap(NULL, PS, PROT_READ,
421			MAP_SHARED|MAP_POPULATE|flags, fd, 0);
422	testmem(ndesc(nbuf, name, "initial"), page, MREAD);
423	expecterr("msync expect error", msync(page, PS, MS_SYNC) < 0);
424	close(fd);
425	munmap_reserve(page, PS);
426
427	fd = open(fn, O_RDONLY);
428	if (fd < 0) err("reopening temp file");
429	page = checked_mmap(NULL, PS, PROT_READ, MAP_SHARED|MAP_POPULATE|flags,
430				fd, 0);
431	recover(ndesc(nbuf, name, "populated"), page, MREAD_OK);
432	close(fd);
433	munmap_reserve(page, PS);
434
435	fd = open(fn, O_RDONLY);
436	if (fd < 0) err("reopening temp file");
437	page = checked_mmap(NULL, PS, PROT_READ, MAP_SHARED|flags, fd, 0);
438	recover(ndesc(nbuf, name, "fault"), page, MREAD_OK);
439	close(fd);
440	munmap_reserve(page, PS);
441
442	fd = open(fn, O_RDWR);
443	char buf[128];
444	/* the earlier close has eaten the error */
445	optionalerr("explicit read after poison", read(fd, buf, sizeof buf) < 0);
446	optionalerr("explicit write after poison", write(fd, "foobar", 6) < 0);
447	optionalerr("fsync expect error", fsync(fd) < 0);
448	close(fd);
449
450	/* should unlink return an error here? */
451	if (unlink(fn) < 0)
452		perror("unlink");
453}
454
455static void file_dirty(void)
456{
457	do_file_dirty(0, "file dirty");
458}
459
460static void file_dirty_mlocked(void)
461{
462	do_file_dirty(MAP_LOCKED, "file dirty mlocked");
463}
464
465/* TBD */
466static void file_hole(void)
467{
468	int fd = tempfd();
469	char *page;
470
471	ftruncate(fd, PS);
472	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
473	*page = 1;
474	testmem("hole file dirty", page, MREAD);
475	/* hole error reporting doesn't work in kernel currently, so optional */
476	optionalerr("hole fsync expect error", fsync(fd) < 0);
477	optionalerr("hole msync expect error", msync(page, PS, MS_SYNC) < 0);
478	close(fd);
479}
480
481static void nonlinear(void)
482{
483	int fd;
484	const int NPAGES = 10;
485	int i;
486	char *page;
487	char *tmp;
488
489	fd = tempfd();
490	tmp = xmalloc(PS);
491	for (i = 0; i < NPAGES; i++)  {
492		memset(tmp, i, PS);
493		write(fd, tmp, PS);
494	}
495	free(tmp);
496	page = checked_mmap(NULL, PS*NPAGES, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
497	int k = NPAGES - 1;
498	for (i = 0; i < NPAGES; i++, k--) {
499		if (remap_file_pages(page + i*PS, PS, 0, k, 0))
500			perror("remap_file_pages");
501	}
502	*page = 1;
503	testmem("rfp file dirty", page, MREAD);
504	expecterr("rfp fsync expect error", fsync(fd) < 0);
505	optionalerr("rfp msync expect error", msync(page, PS, MS_SYNC) < 0);
506	close(fd);
507}
508
509/*
510 * These tests are currently too racy to be enabled.
511 */
512
513/*
514 * This is quite timing dependent. The sniper might hit the page
515 * before it is dirtied. If that happens tweak the delay
516 * (should auto tune)
517 */
518enum {
519	DELAY_NS = 30,
520};
521
522volatile enum sstate { START, WAITING, SNIPE } sstate;
523
524void waitfor(enum sstate w, enum sstate s)
525{
526	sstate = w;
527	mb();
528	while (sstate != s)
529		cpu_relax();
530}
531
532struct poison_arg {
533	char *msg;
534	char *page;
535	enum rmode mode;
536};
537
538void *sniper(void *p)
539{
540	struct poison_arg *arg = p;
541
542	waitfor(START, WAITING);
543	nanosleep(&((struct timespec) { .tv_nsec = DELAY_NS }), NULL);
544	poison(arg->msg, arg->page, arg->mode);
545	return NULL;
546}
547
548int setup_sniper(struct poison_arg *arg)
549{
550	if (sysconf(_SC_NPROCESSORS_ONLN) < 2)  {
551		printf("%s: Need at least two CPUs. Not tested\n", arg->msg);
552		return -1;
553	}
554	sstate = START;
555	mb();
556	pthread_t thr;
557	if (pthread_create(&thr, NULL, sniper, arg) < 0)
558		err("pthread_create");
559	pthread_detach(thr);
560	return 0;
561}
562
563static void under_io_dirty(void)
564{
565	struct poison_arg arg;
566	int fd = tempfd();
567	char *page;
568
569	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, fd, 0);
570
571	arg.page = page;
572	arg.msg  = "under io dirty";
573	arg.mode = MWRITE;
574	if (setup_sniper(&arg) < 0)
575		return;
576
577	write(fd, "xyz", 3);
578	waitfor(WAITING, WAITING);
579	expecterr("write under io", fsync(fd) < 0);
580	close(fd);
581}
582
583static void under_io_clean(void)
584{
585	struct poison_arg arg;
586	char fn[PATHBUFLEN];
587	int fd;
588	char *page;
589	char buf[10];
590
591 	fd = playfile(fn);
592	page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE, MAP_SHARED|MAP_POPULATE, fd, 0);
593	madvise(page, PS, MADV_DONTNEED);
594
595	arg.page = page;
596	arg.msg  = "under io clean";
597	arg.mode = MREAD_OK;
598	if (setup_sniper(&arg) < 0)
599		return;
600
601	waitfor(WAITING, WAITING);
602	// what is correct here?
603	if (pread(fd, buf, 10, 0) != 0)
604		perror("pread under io clean");
605	close(fd);
606}
607
608/*
609 * semaphore get/put wrapper
610 */
611int get_semaphore(int sem_id, struct sembuf *sembuffer)
612{
613	sembuffer->sem_num = 0;
614	sembuffer->sem_op  = -1;
615	sembuffer->sem_flg = SEM_UNDO;
616	return semop(sem_id, sembuffer, 1);
617}
618
619int put_semaphore(int sem_id, struct sembuf *sembuffer)
620{
621	sembuffer->sem_num = 0;
622	sembuffer->sem_op  = 1;
623	sembuffer->sem_flg = SEM_UNDO;
624	return semop(sem_id, sembuffer, 1);
625}
626
627/* memory sharing mode */
628enum shared_mode {
629	MMAP_SHARED = 0,
630	IPV_SHARED  = 1,
631};
632
633/*
634 * testcase for shared pages, where
635 *  if early_kill == 0, parent access the shared page hwpoisoned by child, and
636 *  if early_kill == 1, parent will be killed by SIGBUS from child.
637 * This testcase checks whether if a shared page is hwpoisoned by one process,
638 * another process sharing the page will be killed expectedly.
639 */
640static void do_shared(int shared_mode)
641{
642	int shm_id = -1, sem_id = -1, semaphore;
643	pid_t pid;
644	char *shared_page = NULL;
645	struct sembuf sembuffer;
646
647	if (shared_mode == MMAP_SHARED) {
648		shared_page = checked_mmap(NULL, PS, PROT_READ|PROT_WRITE,
649				MAP_SHARED|MAP_ANONYMOUS|MAP_POPULATE, 0, 0);
650	} else if (shared_mode == IPV_SHARED) {
651		shm_id = shmget(IPC_PRIVATE, PS, 0666|IPC_CREAT);
652		if (shm_id == -1)
653			err("shmget");
654	} else {
655		printf("XXX: invalid shared_mode\n");
656		return;
657	}
658
659	if (early_kill) {
660		sem_id = semget(IPC_PRIVATE, 1, 0666|IPC_CREAT);
661		if (sem_id == -1) {
662			perror("semget");
663			goto cleanup;
664		}
665		semaphore = semctl(sem_id, 0, SETVAL, 1);
666		if (semaphore == -1) {
667			perror("semctl");
668			goto cleanup;
669		}
670		if (get_semaphore(sem_id, &sembuffer)) {
671			perror("get_semaphore");
672			goto cleanup;
673		}
674	}
675
676	pid = fork();
677	if (pid < 0) {
678		perror("fork");
679		goto cleanup;
680	}
681
682	if (shared_mode == IPV_SHARED) {
683		shared_page = shmat(shm_id, NULL, 0);
684		if (shared_page == (char *)-1) {
685			perror("shmat");
686			goto cleanup;
687		}
688	}
689
690	memset(shared_page, 'a', 3);
691
692	if (early_kill) {
693		struct sigaction sa = {
694			.sa_sigaction = sighandler,
695			.sa_flags = SA_SIGINFO
696		};
697		sigaction(SIGBUS, &sa, NULL);
698		expected_addr = shared_page;
699	}
700
701	if (pid) {
702		siginfo_t sig;
703
704		if (early_kill && sigsetjmp(early_recover_ctx, 1) == 0) {
705			if (put_semaphore(sem_id, &sembuffer))
706				err("get_semaphore");
707			/* waiting for SIGBUS from child */
708			sleep(10);
709			printf("XXX timeout: child process does not send signal\n");
710			failure++;
711			goto cleanup;
712		}
713		waitid(P_PID, pid, &sig, WEXITED);
714
715		/*
716		 * check child termination status
717		 * late kill       : child should exit
718		 * suicide version : child should be killed by signal
719		 * early kill      : child should be killed by signal
720		 */
721		if (!early_kill) {
722			struct sigaction sigact;
723			sigaction(SIGBUS, NULL, &sigact);
724
725			if (sigact.sa_handler == SIG_DFL) {/* suicide version */
726				if (sig.si_code != CLD_KILLED)
727					goto child_error;
728			} else { /* late kill */
729				if (sig.si_code != CLD_EXITED)
730					goto child_error;
731			}
732		} else { /* early kill */
733			if (sig.si_code != CLD_EXITED)
734				goto child_error;
735		}
736
737		if (!early_kill)
738			recover("ipv shared page (parent)",
739				shared_page, MWRITE);
740
741		if (shared_mode == IPV_SHARED && shmdt(shared_page) == -1) {
742			perror("shmdt");
743			goto cleanup;
744		}
745	}
746
747	if (!pid) {
748		failure = 0;
749
750		if (early_kill)
751			if (get_semaphore(sem_id, &sembuffer))
752				err("get_semaphore");
753		testmem("ipv shared page", shared_page, MWRITE);
754
755		if (shared_mode == IPV_SHARED && shmdt(shared_page) == -1)
756			err("shmdt");
757
758		fflush(stdout);
759		_exit(failure);
760	}
761
762cleanup:
763	if (shared_page) {
764		if (shared_mode == IPV_SHARED)
765			shmdt(shared_page);
766		else
767			munmap_reserve(shared_page, PS);
768	}
769	if (shm_id >= 0 && shmctl(shm_id, IPC_RMID, NULL) < 0)
770		err("shmctl IPC_RMID");
771	if (sem_id >= 0 && semctl(sem_id, 0, IPC_RMID) < 0)
772		err("semctl IPC_RMID");
773	return;
774
775child_error:
776	printf("XXX child process was terminated unexpectedly\n");
777	failure++;
778	goto cleanup;
779}
780
781static void mmap_shared(void)
782{
783	do_shared(MMAP_SHARED);
784}
785
786static void ipv_shared(void)
787{
788	do_shared(IPV_SHARED);
789}
790
791static void anonymous_hugepage(void)
792{
793	char *page;
794	/* Hugepage isn't supported. */
795	if (!HPS)
796		return;
797	test_hugepage = 1;
798	page = alloc_anonymous_hugepage(HPS, 1);
799	/* prefault */
800	page[0] = 'a';
801	testmem("anonymous hugepage", page, MWRITE);
802	free_anonymous_hugepage(page, HPS);
803	test_hugepage = 0;
804}
805
806static void file_backed_hugepage(void)
807{
808	char *page;
809	char buf[PATHBUFLEN];
810	int fd;
811	/* Hugepage isn't supported. */
812	if (!HPS)
813		return;
814	test_hugepage = 1;
815	snprintf(buf, PATHBUFLEN, "%s/test%d", hugetlbfsdir, tmpcount++);
816	page = alloc_filebacked_hugepage(buf, HPS, 0, &fd);
817	/* prefault */
818	page[0] = 'a';
819	testmem("file backed hugepage", page, MWRITE);
820	free_filebacked_hugepage(page, HPS, fd, buf);
821	test_hugepage = 0;
822}
823
824static void shm_hugepage(void)
825{
826	char *page;
827	/* Hugepage isn't supported. */
828	if (!HPS)
829		return;
830	test_hugepage = 1;
831	page = alloc_shm_hugepage(&tmpcount, HPS);
832	/* prefault */
833	page[0] = 'a';
834	testmem("shared memory hugepage", page, MWRITE);
835	free_shm_hugepage(tmpcount, page);
836	tmpcount++;
837	test_hugepage = 0;
838}
839
840struct testcase {
841	void (*f)(void);
842	char *name;
843	int survivable;
844} cases[] = {
845	{ dirty_anonymous, "dirty anonymous" },
846	{ dirty_anonymous_unmap, "dirty anonymous unmap" },
847	{ mlocked_anonymous, "mlocked anonymous" },
848	{ file_clean, "file clean", 1 },
849	{ file_dirty, "file dirty" },
850	{ file_hole, "file hole" },
851	{ file_clean_mlocked, "file clean mlocked", 1 },
852	{ file_dirty_mlocked, "file dirty mlocked"},
853	{ nonlinear, "nonlinear" },
854	{ mmap_shared, "mmap shared" },
855	{ ipv_shared, "ipv shared" },
856	{ anonymous_hugepage, "anonymous hugepage" },
857	{ file_backed_hugepage, "file backed hugepage" },
858	{ shm_hugepage, "shared memory hugepage" },
859	{},	/* dummy 1 for sniper */
860	{},	/* dummy 2 for sniper */
861	{}
862};
863
864struct testcase snipercases[] = {
865	{ under_io_dirty, "under io dirty" },
866	{ under_io_clean, "under io clean" },
867};
868
869void usage(void)
870{
871	fprintf(stderr, "Usage: tinjpage [--sniper]\n"
872			"Test hwpoison injection on pages in various states\n"
873			"--mce-inject    Use mce-inject for injection\n"
874			"--sniper  Enable racy sniper tests (likely broken)\n");
875	exit(1);
876}
877
878void handle_opts(char **av)
879{
880	while (*++av) {
881		if (!strcmp(*av, "--sniper")) {
882			struct testcase *t;
883			for (t = cases; t->f; t++)
884				;
885			*t++ = snipercases[0];
886			*t++ = snipercases[1];
887		}
888		else if (!strcmp(*av, "--mce-inject")) {
889			inject = inject_mce_inject;
890		} else
891			usage();
892	}
893}
894
895int main(int ac, char **av)
896{
897	if (av[1])
898		handle_opts(av);
899
900	PS = getpagesize();
901	if (hugetlbfs_root(hugetlbfsdir))
902		HPS = gethugepagesize();
903
904	/* don't kill me at poison time, but possibly at page fault time */
905	early_kill = 0;
906	system("sysctl -w vm.memory_failure_early_kill=0");
907
908	struct sigaction sa = {
909		.sa_sigaction = sighandler,
910		.sa_flags = SA_SIGINFO
911	};
912
913	struct testcase *t;
914	/* catch signals */
915	sigaction(SIGBUS, &sa, NULL);
916	for (t = cases; t->f; t++) {
917		printf("---- testing %s\n", t->name);
918		t->f();
919	}
920
921	/* suicide version */
922	for (t = cases; t->f; t++) {
923		printf("---- testing %s in child\n", t->name);
924		pid_t child = fork();
925		if (child == 0) {
926			signal(SIGBUS, SIG_DFL);
927			t->f();
928			if (t->survivable)
929				_exit(2);
930			write(1, t->name, strlen(t->name));
931			write(1, PAIR(" didn't kill itself?\n"));
932			_exit(1);
933		} else {
934			siginfo_t sig;
935			if (waitid(P_PID, child, &sig, WEXITED) < 0)
936				perror("waitid");
937			else {
938				if (t->survivable) {
939					if (sig.si_code != CLD_EXITED) {
940						printf("XXX: %s: child not survived\n", t->name);
941						failure++;
942					}
943				} else {
944					if (sig.si_code != CLD_KILLED || sig.si_status != SIGBUS) {
945						printf("XXX: %s: child not killed by SIGBUS\n", t->name);
946						failure++;
947					}
948				}
949			}
950		}
951	}
952
953	/* early kill version */
954	early_kill = 1;
955	system("sysctl -w vm.memory_failure_early_kill=1");
956
957	sigaction(SIGBUS, &sa, NULL);
958	for (t = cases; t->f; t++) {
959		printf("---- testing %s (early kill)\n", t->name);
960		t->f();
961	}
962
963	if (failure > 0) {
964		printf("FAILURE -- %d cases broken!\n", failure);
965		return 1;
966	}
967	printf("SUCCESS\n");
968	return 0;
969}
970