1/* 2 * ttranshuge.c: hwpoison test for THP(Transparent Huge Page). 3 * 4 * Copyright (C) 2011, FUJITSU LIMITED. 5 * Author: Jin Dongming <jin.dongming@css.cn.fujitsu.com> 6 * 7 * This program is released under the GPLv2. 8 * 9 * This program is based on tinject.c and thugetlb.c in tsrc/ directory 10 * in mcetest tool. 11 */ 12 13/* 14 * Even if THP is supported by Kernel, it could not be sure all the pages 15 * you gotten belong to THP. 16 * 17 * Following is the structure of the memory mapped by mmap() 18 * when the requested memory size is 8M and the THP's size is 2M, 19 * O: means page belongs to 4k page; 20 * T: means page belongs to THP. 21 * Base ..... (Base + Size) 22 * Size : 0M . . . . . 2M . . . . . 4M . . . . . 6M . . . . . 8M 23 * case0: OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOO 24 * No THP. 25 * case1: OOOOOOOTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTOOOOOO 26 * Mixed with THP where it is possible. 27 * case2: OOOOOOOOOOOOOOOOOOOOOOOOOOTTTTTTTTTTTTTTTTTTTTTTTTTT 28 * Mixed with THP only some part of where it is possible. 29 * case3: TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT 30 * All pages are belong to THP. 31 * 32 * So the function find_thp_addr() could not be sure the calculated 33 * address is the address of THP. And in the above structure, 34 * the right address of THP could not be gotten in case 0 and 2 and 35 * could be gotten in case 1 and 3 only. 36 * 37 * According to my experience, the most case gotten by APL is case 1. 38 * So this program is made based on the case 1. 39 * 40 * To improve the rate of THP mapped by mmap(), it is better to do 41 * hwpoison test: 42 * - After reboot immediately. 43 * Because there is a lot of freed memory. 44 * - In the system which has plenty of memory prepared. 45 * This can avoid hwpoison test failure caused by not enough memory. 46 */ 47 48#define _GNU_SOURCE 1 49#include <stdlib.h> 50#include <stdio.h> 51#include <string.h> 52 53#include <unistd.h> 54#include <getopt.h> 55#include <signal.h> 56 57#include <sys/prctl.h> 58#include <sys/mman.h> 59#include <sys/wait.h> 60 61/* 62 * This file supposes the following as default. 63 * Regular Page Size : 4K(4096Bytes) 64 * THP's Size : 2M(2UL * 1024 *1024Bytes) 65 * Poisoned Page Size : 4K(4096Bytes) 66 */ 67#define DEFAULT_PS 4096UL 68#define PS_MASK(ps_size) ((unsigned long)(ps_size -1)) 69#define DEFAULT_THP_SIZE 0x200000UL 70#define THP_MASK(thp_size) ((unsigned long)(thp_size - 1)) 71 72#define REQ_MEM_SIZE (8UL * 1024 * 1024) 73 74#define MADV_POISON 100 75#define MADV_HUGEPAGE 14 76 77#define PR_MCE_KILL 33 78#define PR_MCE_KILL_SET 1 79#define PR_MCE_KILL_EARLY 1 80#define PR_MCE_KILL_LATE 0 81 82#define THP_SUCCESS 0 83#define THP_FAILURE -1 84 85#define print_err(fmt, ...) printf("[ERROR] "fmt, ##__VA_ARGS__) 86#define print_success(fmt, ...) printf("[SUCCESS] "fmt, ##__VA_ARGS__) 87#define print_failure(fmt, ...) printf("[FAILURE] "fmt, ##__VA_ARGS__) 88 89static char *corrupt_page_addr; 90static char *mem_addr; 91 92static unsigned int early_kill = 0; 93static unsigned int avoid_touch = 0; 94 95static int corrupt_page = -1; 96 97static unsigned long thp_addr = 0; 98 99static void print_prep_info(void) 100{ 101 printf("\n%s Poison Test of THP.\n\n" 102 "Information:\n" 103 " PID %d\n" 104 " PS(page size) 0x%lx\n" 105 " mmap()'ed Memory Address %p; size 0x%lx\n" 106 " THP(Transparent Huge Page) Address 0x%lx; size 0x%lx\n" 107 " %s Page Poison Test At %p\n\n", 108 109 early_kill ? "Early Kill" : "Late Kill", 110 getpid(), 111 DEFAULT_PS, 112 mem_addr, REQ_MEM_SIZE, 113 thp_addr, DEFAULT_THP_SIZE, 114 (corrupt_page == 0) ? "Head" : "Tail", corrupt_page_addr 115 ); 116} 117 118/* 119 * Usage: 120 * If avoid_flag == 1, 121 * access all the memory except one DEFAULT_PS size memory 122 * after the address in global variable corrupt_page_addr; 123 * else 124 * access all the memory from addr to (addr + size). 125 */ 126static int read_mem(char *addr, unsigned long size, int avoid_flag) 127{ 128 int ret = 0; 129 unsigned long i = 0; 130 131 for (i = 0; i < size; i++) { 132 if ((avoid_flag) && 133 ((addr + i) >= corrupt_page_addr) && 134 ((addr + i) < (corrupt_page_addr + DEFAULT_PS))) 135 continue; 136 137 if (*(addr + i) != (char)('a' + (i % 26))) { 138 print_err("Mismatch at 0x%lx.\n", 139 (unsigned long)(addr + i)); 140 ret = -1; 141 break; 142 } 143 } 144 145 return ret; 146} 147 148static void write_mem(char *addr, unsigned long size) 149{ 150 int i = 0; 151 152 for (i = 0; i < size; i++) { 153 *(addr + i) = (char)('a' + (i % 26)); 154 } 155} 156 157/* 158 * Usage: 159 * Use MADV_HUGEPAGE to make sure the page could be mapped as THP 160 * when /sys/kernel/mm/transparent_hugepage/enabled is set with 161 * madvise. 162 * 163 * Note: 164 * MADV_HUGEPAGE must be set between mmap and read/write operation. 165 * And it must follow mmap(). Please refer to patches of 166 * MADV_HUGEPAGE about THP for more details. 167 * 168 * Patch Information: 169 * Title: thp: khugepaged: make khugepaged aware about madvise 170 * commit 60ab3244ec85c44276c585a2a20d3750402e1cf4 171 */ 172static int request_thp_with_madvise(unsigned long start) 173{ 174 unsigned long madvise_addr = start & ~PS_MASK(DEFAULT_PS); 175 unsigned long madvise_size = REQ_MEM_SIZE - (start % DEFAULT_PS); 176 177 return madvise((void *)madvise_addr, madvise_size, MADV_HUGEPAGE); 178} 179 180/* 181 * Usage: 182 * This function is used for getting the address of first THP. 183 * 184 * Note: 185 * This function could not make sure the address is the address of THP 186 * really. Please refer to the explanation of mmap() of THP 187 * at the head of this file. 188 */ 189static unsigned long find_thp_addr(unsigned long start, unsigned long size) 190{ 191 unsigned long thp_align_addr = (start + (DEFAULT_THP_SIZE - 1)) & 192 ~THP_MASK(DEFAULT_THP_SIZE); 193 194 if ((thp_align_addr >= start) && 195 ((thp_align_addr + DEFAULT_THP_SIZE) < (start + size))) 196 return thp_align_addr; 197 198 return 0; 199} 200 201static int prep_memory_map(void) 202{ 203 mem_addr = (char *)mmap(NULL, REQ_MEM_SIZE, PROT_WRITE | PROT_READ, 204 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 205 if (mem_addr == NULL) { 206 print_err("Failed to mmap requested memory: size 0x%lx.\n", 207 REQ_MEM_SIZE); 208 return THP_FAILURE; 209 } 210 211 return THP_SUCCESS; 212} 213 214static int prep_injection(void) 215{ 216 /* enabled(=madvise) in /sys/kernel/mm/transparent_hugepage/. */ 217 if (request_thp_with_madvise((unsigned long)mem_addr) < 0) { 218 print_err("Failed to request THP for [madvise] in enabled.\n"); 219 return THP_FAILURE; 220 } 221 222 write_mem(mem_addr, REQ_MEM_SIZE); 223 if (read_mem(mem_addr, REQ_MEM_SIZE, 0) < 0) { 224 print_err("Data is Mismatched(prep_injection).\n"); 225 return THP_FAILURE; 226 } 227 228 /* find the address of THP. */ 229 thp_addr = find_thp_addr((unsigned long)mem_addr, REQ_MEM_SIZE); 230 if (!thp_addr) { 231 print_err("No THP mapped.\n"); 232 return THP_FAILURE; 233 } 234 235 /* Calculate the address of the page which will be poisoned */ 236 if (corrupt_page < 0) 237 corrupt_page = 0; 238 239 corrupt_page_addr = (char *)(thp_addr + corrupt_page * DEFAULT_PS); 240 241 /* Process will be killed here by kernel(SIGBUS AO). */ 242 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, 243 early_kill ? PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE, 244 NULL, NULL); 245 246 return THP_SUCCESS; 247} 248 249static int do_injection(void) 250{ 251 /* Early Kill */ 252 if (madvise((void *)corrupt_page_addr, DEFAULT_PS, MADV_POISON) != 0) { 253 print_err("Failed to poison at 0x%p.\n", corrupt_page_addr); 254 printf("[INFO] Please check the authority of current user.\n"); 255 return THP_FAILURE; 256 } 257 258 return THP_SUCCESS; 259} 260 261static int post_injection(void) 262{ 263 264 if (early_kill) { 265 print_err("Failed to be killed by SIGBUS(Action Optional).\n"); 266 return THP_FAILURE; 267 } 268 269 /* Late Kill */ 270 if (read_mem(mem_addr, REQ_MEM_SIZE, avoid_touch) < 0) { 271 print_err("Data is Mismatched(do_injection).\n"); 272 return THP_FAILURE; 273 } 274 275 if (!avoid_touch) { 276 print_err("Failed to be killed by SIGBUS(Action Required).\n"); 277 return THP_FAILURE; 278 } 279 280 return THP_SUCCESS; 281} 282 283static void post_memory_map() 284{ 285 munmap(mem_addr, REQ_MEM_SIZE); 286} 287 288static void usage(char *program) 289{ 290 printf("%s [-o offset] [-ea]\n" 291" Usage:\n" 292" -o|--offset offset(page unit) Position of error injection from the first THP.\n" 293" -e|--early-kill Set PR_MCE_KILL_EARLY(default NOT early-kill).\n" 294" -a|--avoid-touch Avoid touching error page(page unit) and\n" 295" only used when early-kill is not set.\n" 296" -h|--help\n\n" 297" Examples:\n" 298" 1. Inject the 2nd page(4k) of THP and early killed.\n" 299" %s -o 1 -e\n\n" 300" 2. Inject the 4th page(4k) of THP, late killed and untouched.\n" 301" %s --offset 3 --avoid-touch\n\n" 302" Note:\n" 303" Options Default set\n" 304" early-kill no\n" 305" offset 0(head page)\n" 306" avoid-touch no\n\n" 307 , program, program, program); 308} 309 310static struct option opts[] = { 311 { "offset" , 1, NULL, 'o' }, 312 { "avoid-touch" , 0, NULL, 'a' }, 313 { "early-kill" , 0, NULL, 'e' }, 314 { "help" , 0, NULL, 'h' }, 315 { NULL , 0, NULL, 0 } 316}; 317 318static void get_options_or_die(int argc, char *argv[]) 319{ 320 char c; 321 322 while ((c = getopt_long(argc, argv, "o:aeh", opts, NULL)) != -1) { 323 switch (c) { 324 case 'o': 325 corrupt_page = strtol(optarg, NULL, 10); 326 break; 327 case 'a': 328 avoid_touch = 1; 329 break; 330 case 'e': 331 early_kill = 1; 332 break; 333 case 'h': 334 usage(argv[0]); 335 exit(0); 336 default: 337 print_err("Wrong options, please check options!\n"); 338 usage(argv[0]); 339 exit(1); 340 } 341 } 342 343 if ((avoid_touch) && (corrupt_page == -1)) { 344 print_err("Avoid which page?\n"); 345 usage(argv[0]); 346 exit(1); 347 } 348} 349 350int main(int argc, char *argv[]) 351{ 352 int ret = THP_FAILURE; 353 pid_t child; 354 siginfo_t sig; 355 356 /* 357 * 1. Options check. 358 */ 359 get_options_or_die(argc, argv); 360 361 /* Fork a child process for test */ 362 child = fork(); 363 if (child < 0) { 364 print_err("Failed to fork child process.\n"); 365 return THP_FAILURE; 366 } 367 368 if (child == 0) { 369 /* Child process */ 370 371 int ret = THP_FAILURE; 372 373 signal(SIGBUS, SIG_DFL); 374 375 /* 376 * 2. Groundwork for hwpoison injection. 377 */ 378 if (prep_memory_map() == THP_FAILURE) 379 _exit(1); 380 381 if (prep_injection() == THP_FAILURE) 382 goto free_mem; 383 384 /* Print the prepared information before hwpoison injection. */ 385 print_prep_info(); 386 387 /* 388 * 3. Hwpoison Injection. 389 */ 390 if (do_injection() == THP_FAILURE) 391 goto free_mem; 392 393 if (post_injection() == THP_FAILURE) 394 goto free_mem; 395 396 ret = THP_SUCCESS; 397free_mem: 398 post_memory_map(); 399 400 if (ret == THP_SUCCESS) 401 _exit(0); 402 403 _exit(1); 404 } 405 406 /* Parent process */ 407 408 if (waitid(P_PID, child, &sig, WEXITED) < 0) { 409 print_err("Failed to wait child process.\n"); 410 return THP_FAILURE; 411 } 412 413 /* 414 * 4. Check the result of hwpoison injection. 415 */ 416 if (avoid_touch) { 417 if (sig.si_code == CLD_EXITED && sig.si_status == 0) { 418 print_success("Child process survived.\n"); 419 ret = THP_SUCCESS; 420 } else 421 print_failure("Child process could not survive.\n"); 422 } else { 423 if (sig.si_code == CLD_KILLED && sig.si_status == SIGBUS) { 424 print_success("Child process was killed by SIGBUS.\n"); 425 ret = THP_SUCCESS; 426 } else 427 print_failure("Child process could not be killed" 428 " by SIGBUS.\n"); 429 } 430 431 return ret; 432} 433