1/*
2 * Copyright (C) 2012 Linux Test Project, Inc.
3 *
4 * This program is free software;  you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY;  without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12 * the GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program;  if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19#include "config.h"
20#include <errno.h>
21#if HAVE_NUMA_H
22#include <numa.h>
23#endif
24#if HAVE_NUMAIF_H
25#include <numaif.h>
26#endif
27#include <stdarg.h>
28#include <stdio.h>
29#include <string.h>
30#include <stdlib.h>
31#include <unistd.h>
32#include <errno.h>
33
34#include "test.h"
35#include "safe_macros.h"
36#include "numa_helper.h"
37#include "lapi/syscalls.h"
38
39unsigned long get_max_node(void)
40{
41	unsigned long max_node = 0;
42#ifdef HAVE_NUMA_V2
43	max_node = numa_max_possible_node() + 1;
44#endif
45	return max_node;
46}
47
48#ifdef HAVE_NUMA_V2
49static void get_nodemask_allnodes(nodemask_t * nodemask, unsigned long max_node)
50{
51	unsigned long nodemask_size = max_node / 8;
52	int i;
53	char fn[64];
54	struct stat st;
55
56	memset(nodemask, 0, nodemask_size);
57	for (i = 0; i < (int)max_node; i++) {
58		sprintf(fn, "/sys/devices/system/node/node%d", i);
59		if (stat(fn, &st) == 0)
60			nodemask_set(nodemask, i);
61	}
62}
63
64static int filter_nodemask_mem(nodemask_t * nodemask, unsigned long max_node)
65{
66#ifdef MPOL_F_MEMS_ALLOWED
67	unsigned long nodemask_size = max_node / 8;
68	memset(nodemask, 0, nodemask_size);
69	/*
70	 * avoid numa_get_mems_allowed(), because of bug in getpol()
71	 * utility function in older versions:
72	 * http://www.spinics.net/lists/linux-numa/msg00849.html
73	 *
74	 * At the moment numa_available() implementation also uses
75	 * get_mempolicy, but let's make explicit check for ENOSYS
76	 * here as well in case it changes in future. Silent ignore
77	 * of ENOSYS is OK, because without NUMA caller gets empty
78	 * set of nodes anyway.
79	 */
80	if (syscall(__NR_get_mempolicy, NULL, nodemask->n,
81		    max_node, 0, MPOL_F_MEMS_ALLOWED) < 0) {
82		if (errno == ENOSYS)
83			return 0;
84		return -2;
85	}
86#else
87	int i;
88	/*
89	 * old libnuma/kernel don't have MPOL_F_MEMS_ALLOWED, so let's assume
90	 * that we can use any node with memory > 0
91	 */
92	for (i = 0; i < (int)max_node; i++) {
93		if (!nodemask_isset(nodemask, i))
94			continue;
95		if (numa_node_size64(i, NULL) <= 0)
96			nodemask_clr(nodemask, i);
97	}
98#endif /* MPOL_F_MEMS_ALLOWED */
99	return 0;
100}
101
102static int cpumask_has_cpus(char *cpumask, size_t len)
103{
104	unsigned int j;
105	for (j = 0; j < len; j++)
106		if (cpumask[j] == '\0')
107			return 0;
108		else if ((cpumask[j] > '0' && cpumask[j] <= '9') ||
109			 (cpumask[j] >= 'a' && cpumask[j] <= 'f'))
110			return 1;
111	return 0;
112
113}
114
115static void filter_nodemask_cpu(nodemask_t * nodemask, unsigned long max_node)
116{
117	char *cpumask = NULL;
118	char fn[64];
119	FILE *f;
120	size_t len;
121	int i, ret;
122
123	for (i = 0; i < (int)max_node; i++) {
124		if (!nodemask_isset(nodemask, i))
125			continue;
126		sprintf(fn, "/sys/devices/system/node/node%d/cpumap", i);
127		f = fopen(fn, "r");
128		if (f) {
129			ret = getdelim(&cpumask, &len, '\n', f);
130			if ((ret > 0) && (!cpumask_has_cpus(cpumask, len)))
131				nodemask_clr(nodemask, i);
132			fclose(f);
133		}
134	}
135	free(cpumask);
136}
137#endif /* HAVE_NUMA_V2 */
138
139/*
140 * get_allowed_nodes_arr - get number and array of available nodes
141 * @num_nodes: pointer where number of available nodes will be stored
142 * @nodes: array of available node ids, this is MPOL_F_MEMS_ALLOWED
143 *                 node bitmask compacted (without holes), so that each field
144 *                 contains node number. If NULL only num_nodes is
145 *                 returned, otherwise it cotains new allocated array,
146 *                 which caller is responsible to free.
147 * RETURNS:
148 *     0 on success
149 *    -1 on allocation failure
150 *    -2 on get_mempolicy failure
151 */
152int get_allowed_nodes_arr(int flag, int *num_nodes, int **nodes)
153{
154	int ret = 0;
155#ifdef HAVE_NUMA_V2
156	int i;
157	nodemask_t *nodemask = NULL;
158#endif
159	*num_nodes = 0;
160	if (nodes)
161		*nodes = NULL;
162
163#ifdef HAVE_NUMA_V2
164	unsigned long max_node, nodemask_size;
165
166	if (numa_available() == -1)
167		return 0;
168
169	max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
170	nodemask_size = max_node / 8;
171
172	nodemask = malloc(nodemask_size);
173	if (nodes)
174		*nodes = malloc(sizeof(int) * max_node);
175
176	do {
177		if (nodemask == NULL || (nodes && (*nodes == NULL))) {
178			ret = -1;
179			break;
180		}
181
182		/* allow all nodes at start, then filter based on flags */
183		get_nodemask_allnodes(nodemask, max_node);
184		if ((flag & NH_MEMS) == NH_MEMS) {
185			ret = filter_nodemask_mem(nodemask, max_node);
186			if (ret < 0)
187				break;
188		}
189		if ((flag & NH_CPUS) == NH_CPUS)
190			filter_nodemask_cpu(nodemask, max_node);
191
192		for (i = 0; i < (int)max_node; i++) {
193			if (nodemask_isset(nodemask, i)) {
194				if (nodes)
195					(*nodes)[*num_nodes] = i;
196				(*num_nodes)++;
197			}
198		}
199	} while (0);
200	free(nodemask);
201#endif /* HAVE_NUMA_V2 */
202	return ret;
203}
204
205/*
206 * get_allowed_nodes - convenience function to get fixed number of nodes
207 * @count: how many nodes to get
208 * @...: int pointers, where node ids will be stored
209 * RETURNS:
210 *     0 on success
211 *    -1 on allocation failure
212 *    -2 on get_mempolicy failure
213 *    -3 on not enough allowed nodes
214 */
215int get_allowed_nodes(int flag, int count, ...)
216{
217	int ret;
218	int i, *nodep;
219	va_list ap;
220	int num_nodes = 0;
221	int *nodes = NULL;
222
223	ret = get_allowed_nodes_arr(flag, &num_nodes, &nodes);
224	if (ret < 0)
225		return ret;
226
227	va_start(ap, count);
228	for (i = 0; i < count; i++) {
229		nodep = va_arg(ap, int *);
230		if (i < num_nodes) {
231			*nodep = nodes[i];
232		} else {
233			ret = -3;
234			errno = EINVAL;
235			break;
236		}
237	}
238	free(nodes);
239	va_end(ap);
240
241	return ret;
242}
243
244static void print_node_info(int flag)
245{
246	int *allowed_nodes = NULL;
247	int i, ret, num_nodes;
248
249	ret = get_allowed_nodes_arr(flag, &num_nodes, &allowed_nodes);
250	printf("nodes (flag=%d): ", flag);
251	if (ret == 0) {
252		for (i = 0; i < num_nodes; i++)
253			printf("%d ", allowed_nodes[i]);
254		printf("\n");
255	} else
256		printf("error(%d)\n", ret);
257	free(allowed_nodes);
258}
259
260/*
261 * nh_dump_nodes - dump info about nodes to stdout
262 */
263void nh_dump_nodes(void)
264{
265	print_node_info(0);
266	print_node_info(NH_MEMS);
267	print_node_info(NH_CPUS);
268	print_node_info(NH_MEMS | NH_CPUS);
269}
270
271/*
272 * is_numa - judge a system is NUMA system or not
273 * @flag: NH_MEMS and/or NH_CPUS
274 * @min_nodes: find at least 'min_nodes' nodes with memory
275 * NOTE: the function is designed to try to find at least 'min_nodes'
276 * available nodes, where each node contains memory.
277 * WARN: Don't use this func in child, as it calls tst_brkm()
278 * RETURNS:
279 *     0 - it's not a NUMA system
280 *     1 - it's a NUMA system
281 */
282int is_numa(void (*cleanup_fn)(void), int flag, int min_nodes)
283{
284	int ret;
285	int numa_nodes = 0;
286
287	ret = get_allowed_nodes_arr(flag, &numa_nodes, NULL);
288	if (ret < 0)
289		tst_brkm(TBROK | TERRNO, cleanup_fn, "get_allowed_nodes_arr");
290
291	if (numa_nodes >= min_nodes)
292		return 1;
293	else
294		return 0;
295}
296