1/*
2 * Copyright (C) 2012-2017  Red Hat, Inc.
3 *
4 * This program is free software;  you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY;  without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12 * the GNU General Public License for more details.
13 *
14 * Description:
15 *
16 * The program is designed to test max_map_count tunable file
17 *
18 * The kernel Documentation say that:
19 * /proc/sys/vm/max_map_count contains the maximum number of memory map
20 * areas a process may have. Memory map areas are used as a side-effect
21 * of calling malloc, directly by mmap and mprotect, and also when
22 * loading shared libraries.
23 *
24 * Each process has his own maps file: /proc/[pid]/maps, and each line
25 * indicates a map entry, so it can caculate the amount of maps by reading
26 * the file lines' number to check the tunable performance.
27 *
28 * The program tries to invoke mmap() endlessly until it triggers MAP_FAILED,
29 * then reads the process's maps file /proc/[pid]/maps, save the line number to
30 * map_count variable, and compare it with /proc/sys/vm/max_map_count,
31 * map_count should be greater than max_map_count by 1;
32 *
33 * Note: On some architectures there is a special vma VSYSCALL, which
34 * is allocated without incrementing mm->map_count variable. On these
35 * architectures each /proc/<pid>/maps has at the end:
36 * ...
37 * ...
38 * ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0   [vsyscall]
39 *
40 * so we ignore this line during /proc/[pid]/maps reading.
41 */
42
43#define _GNU_SOURCE
44#include <sys/wait.h>
45#include <errno.h>
46#include <fcntl.h>
47#include <stdbool.h>
48#include <stdio.h>
49#include <stdlib.h>
50#include <sys/utsname.h>
51#include "mem.h"
52
53#define MAP_COUNT_DEFAULT	1024
54#define MAX_MAP_COUNT		65536L
55
56/* This is a filter to exclude map entries which aren't accounted
57 * for in the vm_area_struct's map_count.
58 */
59static bool filter_map(const char *line)
60{
61	char buf[BUFSIZ];
62	int ret;
63
64	ret = sscanf(line, "%*p-%*p %*4s %*p %*2d:%*2d %*d %s", buf);
65	if (ret != 1)
66		return false;
67
68	switch (tst_arch.type) {
69	case TST_X86:
70	case TST_X86_64:
71		/* On x86, there's an old compat vsyscall page */
72		if (!strcmp(buf, "[vsyscall]"))
73			return true;
74		break;
75	case TST_IA64:
76		/* On ia64, the vdso is not a proper mapping */
77		if (!strcmp(buf, "[vdso]"))
78			return true;
79		break;
80	case TST_ARM:
81		/* Skip it when run it in aarch64 */
82		if (tst_kernel_bits() == 64)
83			return false;
84
85		/* Older arm kernels didn't label their vdso maps */
86		if (!strncmp(line, "ffff0000-ffff1000", 17))
87			return true;
88		break;
89	default:
90		break;
91	};
92
93	return false;
94}
95
96static long count_maps(pid_t pid)
97{
98	FILE *fp;
99	size_t len;
100	char *line = NULL;
101	char buf[BUFSIZ];
102	long map_count = 0;
103
104	snprintf(buf, BUFSIZ, "/proc/%d/maps", pid);
105	fp = fopen(buf, "r");
106	if (fp == NULL)
107		tst_brk(TBROK | TERRNO, "fopen %s", buf);
108	while (getline(&line, &len, fp) != -1) {
109		/* exclude vdso and vsyscall */
110		if (filter_map(line))
111			continue;
112		map_count++;
113	}
114	fclose(fp);
115
116	return map_count;
117}
118
119static void max_map_count_test(void)
120{
121	int status;
122	pid_t pid;
123	long max_maps;
124	long map_count;
125	long max_iters;
126	long memfree;
127
128	/*
129	 * XXX Due to a possible kernel bug, oom-killer can be easily
130	 * triggered when doing small piece mmaps in huge amount even if
131	 * enough free memory available. Also it has been observed that
132	 * oom-killer often kill wrong victims in this situation, we
133	 * decided to do following steps to make sure no oom happen:
134	 * 1) use a safe maximum max_map_count value as upper-bound,
135	 *    we set it 65536 in this case, i.e., we don't test too big
136	 *    value;
137	 * 2) make sure total mapping isn't larger than
138	 *        CommitLimit - Committed_AS
139	 */
140	memfree = SAFE_READ_MEMINFO("CommitLimit:") - SAFE_READ_MEMINFO("Committed_AS:");
141	/* 64 used as a bias to make sure no overflow happen */
142	max_iters = memfree / sysconf(_SC_PAGESIZE) * 1024 - 64;
143	if (max_iters > MAX_MAP_COUNT)
144		max_iters = MAX_MAP_COUNT;
145
146	max_maps = MAP_COUNT_DEFAULT;
147	if (max_iters < max_maps)
148		tst_brk(TCONF, "test requires more free memory");
149
150	while (max_maps <= max_iters) {
151		set_sys_tune("max_map_count", max_maps, 1);
152
153		switch (pid = SAFE_FORK()) {
154		case 0:
155			while (mmap(NULL, 1, PROT_READ,
156				    MAP_SHARED | MAP_ANONYMOUS, -1, 0)
157			       != MAP_FAILED) ;
158			if (raise(SIGSTOP) != 0)
159				tst_brk(TBROK | TERRNO, "raise");
160			exit(0);
161		default:
162			break;
163		}
164		/* wait child done mmap and stop */
165		SAFE_WAITPID(pid, &status, WUNTRACED);
166		if (!WIFSTOPPED(status))
167			tst_brk(TBROK, "child did not stopped");
168
169		map_count = count_maps(pid);
170		/* Note max_maps will be exceeded by one for
171		 * the sysctl setting of max_map_count. This
172		 * is the mm failure point at the time of
173		 * writing this COMMENT!
174		*/
175		if (map_count == (max_maps + 1))
176			tst_res(TPASS, "%ld map entries in total "
177				 "as expected.", max_maps);
178		else
179			tst_res(TFAIL, "%ld map entries in total, but "
180				 "expected %ld entries", map_count, max_maps);
181
182		/* make child continue to exit */
183		SAFE_KILL(pid, SIGCONT);
184		SAFE_WAITPID(pid, &status, 0);
185
186		max_maps = max_maps << 1;
187	}
188}
189
190static struct tst_test test = {
191	.needs_root = 1,
192	.forks_child = 1,
193	.test_all = max_map_count_test,
194	.save_restore = (const struct tst_path_val[]) {
195		{"/proc/sys/vm/overcommit_memory", "0", TST_SR_TBROK},
196		{"/proc/sys/vm/max_map_count", NULL, TST_SR_TBROK},
197		{}
198	},
199};
200