1f08c3bdfSopenharmony_ci/*
2f08c3bdfSopenharmony_ci * Copyright (C) 2012 Linux Test Project, Inc.
3f08c3bdfSopenharmony_ci *
4f08c3bdfSopenharmony_ci * This program is free software;  you can redistribute it and/or modify
5f08c3bdfSopenharmony_ci * it under the terms of the GNU General Public License as published by
6f08c3bdfSopenharmony_ci * the Free Software Foundation; either version 2 of the License, or
7f08c3bdfSopenharmony_ci * (at your option) any later version.
8f08c3bdfSopenharmony_ci *
9f08c3bdfSopenharmony_ci * This program is distributed in the hope that it will be useful,
10f08c3bdfSopenharmony_ci * but WITHOUT ANY WARRANTY;  without even the implied warranty of
11f08c3bdfSopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12f08c3bdfSopenharmony_ci * the GNU General Public License for more details.
13f08c3bdfSopenharmony_ci *
14f08c3bdfSopenharmony_ci * You should have received a copy of the GNU General Public License
15f08c3bdfSopenharmony_ci * along with this program;  if not, write to the Free Software
16f08c3bdfSopenharmony_ci * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17f08c3bdfSopenharmony_ci */
18f08c3bdfSopenharmony_ci
19f08c3bdfSopenharmony_ci#include "config.h"
20f08c3bdfSopenharmony_ci#include <errno.h>
21f08c3bdfSopenharmony_ci#if HAVE_NUMA_H
22f08c3bdfSopenharmony_ci#include <numa.h>
23f08c3bdfSopenharmony_ci#endif
24f08c3bdfSopenharmony_ci#if HAVE_NUMAIF_H
25f08c3bdfSopenharmony_ci#include <numaif.h>
26f08c3bdfSopenharmony_ci#endif
27f08c3bdfSopenharmony_ci#include <stdarg.h>
28f08c3bdfSopenharmony_ci#include <stdio.h>
29f08c3bdfSopenharmony_ci#include <string.h>
30f08c3bdfSopenharmony_ci#include <stdlib.h>
31f08c3bdfSopenharmony_ci#include <unistd.h>
32f08c3bdfSopenharmony_ci#include <errno.h>
33f08c3bdfSopenharmony_ci
34f08c3bdfSopenharmony_ci#include "test.h"
35f08c3bdfSopenharmony_ci#include "safe_macros.h"
36f08c3bdfSopenharmony_ci#include "numa_helper.h"
37f08c3bdfSopenharmony_ci#include "lapi/syscalls.h"
38f08c3bdfSopenharmony_ci
39f08c3bdfSopenharmony_ciunsigned long get_max_node(void)
40f08c3bdfSopenharmony_ci{
41f08c3bdfSopenharmony_ci	unsigned long max_node = 0;
42f08c3bdfSopenharmony_ci#ifdef HAVE_NUMA_V2
43f08c3bdfSopenharmony_ci	max_node = numa_max_possible_node() + 1;
44f08c3bdfSopenharmony_ci#endif
45f08c3bdfSopenharmony_ci	return max_node;
46f08c3bdfSopenharmony_ci}
47f08c3bdfSopenharmony_ci
48f08c3bdfSopenharmony_ci#ifdef HAVE_NUMA_V2
49f08c3bdfSopenharmony_cistatic void get_nodemask_allnodes(nodemask_t * nodemask, unsigned long max_node)
50f08c3bdfSopenharmony_ci{
51f08c3bdfSopenharmony_ci	unsigned long nodemask_size = max_node / 8;
52f08c3bdfSopenharmony_ci	int i;
53f08c3bdfSopenharmony_ci	char fn[64];
54f08c3bdfSopenharmony_ci	struct stat st;
55f08c3bdfSopenharmony_ci
56f08c3bdfSopenharmony_ci	memset(nodemask, 0, nodemask_size);
57f08c3bdfSopenharmony_ci	for (i = 0; i < (int)max_node; i++) {
58f08c3bdfSopenharmony_ci		sprintf(fn, "/sys/devices/system/node/node%d", i);
59f08c3bdfSopenharmony_ci		if (stat(fn, &st) == 0)
60f08c3bdfSopenharmony_ci			nodemask_set(nodemask, i);
61f08c3bdfSopenharmony_ci	}
62f08c3bdfSopenharmony_ci}
63f08c3bdfSopenharmony_ci
64f08c3bdfSopenharmony_cistatic int filter_nodemask_mem(nodemask_t * nodemask, unsigned long max_node)
65f08c3bdfSopenharmony_ci{
66f08c3bdfSopenharmony_ci#ifdef MPOL_F_MEMS_ALLOWED
67f08c3bdfSopenharmony_ci	unsigned long nodemask_size = max_node / 8;
68f08c3bdfSopenharmony_ci	memset(nodemask, 0, nodemask_size);
69f08c3bdfSopenharmony_ci	/*
70f08c3bdfSopenharmony_ci	 * avoid numa_get_mems_allowed(), because of bug in getpol()
71f08c3bdfSopenharmony_ci	 * utility function in older versions:
72f08c3bdfSopenharmony_ci	 * http://www.spinics.net/lists/linux-numa/msg00849.html
73f08c3bdfSopenharmony_ci	 *
74f08c3bdfSopenharmony_ci	 * At the moment numa_available() implementation also uses
75f08c3bdfSopenharmony_ci	 * get_mempolicy, but let's make explicit check for ENOSYS
76f08c3bdfSopenharmony_ci	 * here as well in case it changes in future. Silent ignore
77f08c3bdfSopenharmony_ci	 * of ENOSYS is OK, because without NUMA caller gets empty
78f08c3bdfSopenharmony_ci	 * set of nodes anyway.
79f08c3bdfSopenharmony_ci	 */
80f08c3bdfSopenharmony_ci	if (syscall(__NR_get_mempolicy, NULL, nodemask->n,
81f08c3bdfSopenharmony_ci		    max_node, 0, MPOL_F_MEMS_ALLOWED) < 0) {
82f08c3bdfSopenharmony_ci		if (errno == ENOSYS)
83f08c3bdfSopenharmony_ci			return 0;
84f08c3bdfSopenharmony_ci		return -2;
85f08c3bdfSopenharmony_ci	}
86f08c3bdfSopenharmony_ci#else
87f08c3bdfSopenharmony_ci	int i;
88f08c3bdfSopenharmony_ci	/*
89f08c3bdfSopenharmony_ci	 * old libnuma/kernel don't have MPOL_F_MEMS_ALLOWED, so let's assume
90f08c3bdfSopenharmony_ci	 * that we can use any node with memory > 0
91f08c3bdfSopenharmony_ci	 */
92f08c3bdfSopenharmony_ci	for (i = 0; i < (int)max_node; i++) {
93f08c3bdfSopenharmony_ci		if (!nodemask_isset(nodemask, i))
94f08c3bdfSopenharmony_ci			continue;
95f08c3bdfSopenharmony_ci		if (numa_node_size64(i, NULL) <= 0)
96f08c3bdfSopenharmony_ci			nodemask_clr(nodemask, i);
97f08c3bdfSopenharmony_ci	}
98f08c3bdfSopenharmony_ci#endif /* MPOL_F_MEMS_ALLOWED */
99f08c3bdfSopenharmony_ci	return 0;
100f08c3bdfSopenharmony_ci}
101f08c3bdfSopenharmony_ci
102f08c3bdfSopenharmony_cistatic int cpumask_has_cpus(char *cpumask, size_t len)
103f08c3bdfSopenharmony_ci{
104f08c3bdfSopenharmony_ci	unsigned int j;
105f08c3bdfSopenharmony_ci	for (j = 0; j < len; j++)
106f08c3bdfSopenharmony_ci		if (cpumask[j] == '\0')
107f08c3bdfSopenharmony_ci			return 0;
108f08c3bdfSopenharmony_ci		else if ((cpumask[j] > '0' && cpumask[j] <= '9') ||
109f08c3bdfSopenharmony_ci			 (cpumask[j] >= 'a' && cpumask[j] <= 'f'))
110f08c3bdfSopenharmony_ci			return 1;
111f08c3bdfSopenharmony_ci	return 0;
112f08c3bdfSopenharmony_ci
113f08c3bdfSopenharmony_ci}
114f08c3bdfSopenharmony_ci
115f08c3bdfSopenharmony_cistatic void filter_nodemask_cpu(nodemask_t * nodemask, unsigned long max_node)
116f08c3bdfSopenharmony_ci{
117f08c3bdfSopenharmony_ci	char *cpumask = NULL;
118f08c3bdfSopenharmony_ci	char fn[64];
119f08c3bdfSopenharmony_ci	FILE *f;
120f08c3bdfSopenharmony_ci	size_t len;
121f08c3bdfSopenharmony_ci	int i, ret;
122f08c3bdfSopenharmony_ci
123f08c3bdfSopenharmony_ci	for (i = 0; i < (int)max_node; i++) {
124f08c3bdfSopenharmony_ci		if (!nodemask_isset(nodemask, i))
125f08c3bdfSopenharmony_ci			continue;
126f08c3bdfSopenharmony_ci		sprintf(fn, "/sys/devices/system/node/node%d/cpumap", i);
127f08c3bdfSopenharmony_ci		f = fopen(fn, "r");
128f08c3bdfSopenharmony_ci		if (f) {
129f08c3bdfSopenharmony_ci			ret = getdelim(&cpumask, &len, '\n', f);
130f08c3bdfSopenharmony_ci			if ((ret > 0) && (!cpumask_has_cpus(cpumask, len)))
131f08c3bdfSopenharmony_ci				nodemask_clr(nodemask, i);
132f08c3bdfSopenharmony_ci			fclose(f);
133f08c3bdfSopenharmony_ci		}
134f08c3bdfSopenharmony_ci	}
135f08c3bdfSopenharmony_ci	free(cpumask);
136f08c3bdfSopenharmony_ci}
137f08c3bdfSopenharmony_ci#endif /* HAVE_NUMA_V2 */
138f08c3bdfSopenharmony_ci
139f08c3bdfSopenharmony_ci/*
140f08c3bdfSopenharmony_ci * get_allowed_nodes_arr - get number and array of available nodes
141f08c3bdfSopenharmony_ci * @num_nodes: pointer where number of available nodes will be stored
142f08c3bdfSopenharmony_ci * @nodes: array of available node ids, this is MPOL_F_MEMS_ALLOWED
143f08c3bdfSopenharmony_ci *                 node bitmask compacted (without holes), so that each field
144f08c3bdfSopenharmony_ci *                 contains node number. If NULL only num_nodes is
145f08c3bdfSopenharmony_ci *                 returned, otherwise it cotains new allocated array,
146f08c3bdfSopenharmony_ci *                 which caller is responsible to free.
147f08c3bdfSopenharmony_ci * RETURNS:
148f08c3bdfSopenharmony_ci *     0 on success
149f08c3bdfSopenharmony_ci *    -1 on allocation failure
150f08c3bdfSopenharmony_ci *    -2 on get_mempolicy failure
151f08c3bdfSopenharmony_ci */
152f08c3bdfSopenharmony_ciint get_allowed_nodes_arr(int flag, int *num_nodes, int **nodes)
153f08c3bdfSopenharmony_ci{
154f08c3bdfSopenharmony_ci	int ret = 0;
155f08c3bdfSopenharmony_ci#ifdef HAVE_NUMA_V2
156f08c3bdfSopenharmony_ci	int i;
157f08c3bdfSopenharmony_ci	nodemask_t *nodemask = NULL;
158f08c3bdfSopenharmony_ci#endif
159f08c3bdfSopenharmony_ci	*num_nodes = 0;
160f08c3bdfSopenharmony_ci	if (nodes)
161f08c3bdfSopenharmony_ci		*nodes = NULL;
162f08c3bdfSopenharmony_ci
163f08c3bdfSopenharmony_ci#ifdef HAVE_NUMA_V2
164f08c3bdfSopenharmony_ci	unsigned long max_node, nodemask_size;
165f08c3bdfSopenharmony_ci
166f08c3bdfSopenharmony_ci	if (numa_available() == -1)
167f08c3bdfSopenharmony_ci		return 0;
168f08c3bdfSopenharmony_ci
169f08c3bdfSopenharmony_ci	max_node = LTP_ALIGN(get_max_node(), sizeof(unsigned long)*8);
170f08c3bdfSopenharmony_ci	nodemask_size = max_node / 8;
171f08c3bdfSopenharmony_ci
172f08c3bdfSopenharmony_ci	nodemask = malloc(nodemask_size);
173f08c3bdfSopenharmony_ci	if (nodes)
174f08c3bdfSopenharmony_ci		*nodes = malloc(sizeof(int) * max_node);
175f08c3bdfSopenharmony_ci
176f08c3bdfSopenharmony_ci	do {
177f08c3bdfSopenharmony_ci		if (nodemask == NULL || (nodes && (*nodes == NULL))) {
178f08c3bdfSopenharmony_ci			ret = -1;
179f08c3bdfSopenharmony_ci			break;
180f08c3bdfSopenharmony_ci		}
181f08c3bdfSopenharmony_ci
182f08c3bdfSopenharmony_ci		/* allow all nodes at start, then filter based on flags */
183f08c3bdfSopenharmony_ci		get_nodemask_allnodes(nodemask, max_node);
184f08c3bdfSopenharmony_ci		if ((flag & NH_MEMS) == NH_MEMS) {
185f08c3bdfSopenharmony_ci			ret = filter_nodemask_mem(nodemask, max_node);
186f08c3bdfSopenharmony_ci			if (ret < 0)
187f08c3bdfSopenharmony_ci				break;
188f08c3bdfSopenharmony_ci		}
189f08c3bdfSopenharmony_ci		if ((flag & NH_CPUS) == NH_CPUS)
190f08c3bdfSopenharmony_ci			filter_nodemask_cpu(nodemask, max_node);
191f08c3bdfSopenharmony_ci
192f08c3bdfSopenharmony_ci		for (i = 0; i < (int)max_node; i++) {
193f08c3bdfSopenharmony_ci			if (nodemask_isset(nodemask, i)) {
194f08c3bdfSopenharmony_ci				if (nodes)
195f08c3bdfSopenharmony_ci					(*nodes)[*num_nodes] = i;
196f08c3bdfSopenharmony_ci				(*num_nodes)++;
197f08c3bdfSopenharmony_ci			}
198f08c3bdfSopenharmony_ci		}
199f08c3bdfSopenharmony_ci	} while (0);
200f08c3bdfSopenharmony_ci	free(nodemask);
201f08c3bdfSopenharmony_ci#endif /* HAVE_NUMA_V2 */
202f08c3bdfSopenharmony_ci	return ret;
203f08c3bdfSopenharmony_ci}
204f08c3bdfSopenharmony_ci
205f08c3bdfSopenharmony_ci/*
206f08c3bdfSopenharmony_ci * get_allowed_nodes - convenience function to get fixed number of nodes
207f08c3bdfSopenharmony_ci * @count: how many nodes to get
208f08c3bdfSopenharmony_ci * @...: int pointers, where node ids will be stored
209f08c3bdfSopenharmony_ci * RETURNS:
210f08c3bdfSopenharmony_ci *     0 on success
211f08c3bdfSopenharmony_ci *    -1 on allocation failure
212f08c3bdfSopenharmony_ci *    -2 on get_mempolicy failure
213f08c3bdfSopenharmony_ci *    -3 on not enough allowed nodes
214f08c3bdfSopenharmony_ci */
215f08c3bdfSopenharmony_ciint get_allowed_nodes(int flag, int count, ...)
216f08c3bdfSopenharmony_ci{
217f08c3bdfSopenharmony_ci	int ret;
218f08c3bdfSopenharmony_ci	int i, *nodep;
219f08c3bdfSopenharmony_ci	va_list ap;
220f08c3bdfSopenharmony_ci	int num_nodes = 0;
221f08c3bdfSopenharmony_ci	int *nodes = NULL;
222f08c3bdfSopenharmony_ci
223f08c3bdfSopenharmony_ci	ret = get_allowed_nodes_arr(flag, &num_nodes, &nodes);
224f08c3bdfSopenharmony_ci	if (ret < 0)
225f08c3bdfSopenharmony_ci		return ret;
226f08c3bdfSopenharmony_ci
227f08c3bdfSopenharmony_ci	va_start(ap, count);
228f08c3bdfSopenharmony_ci	for (i = 0; i < count; i++) {
229f08c3bdfSopenharmony_ci		nodep = va_arg(ap, int *);
230f08c3bdfSopenharmony_ci		if (i < num_nodes) {
231f08c3bdfSopenharmony_ci			*nodep = nodes[i];
232f08c3bdfSopenharmony_ci		} else {
233f08c3bdfSopenharmony_ci			ret = -3;
234f08c3bdfSopenharmony_ci			errno = EINVAL;
235f08c3bdfSopenharmony_ci			break;
236f08c3bdfSopenharmony_ci		}
237f08c3bdfSopenharmony_ci	}
238f08c3bdfSopenharmony_ci	free(nodes);
239f08c3bdfSopenharmony_ci	va_end(ap);
240f08c3bdfSopenharmony_ci
241f08c3bdfSopenharmony_ci	return ret;
242f08c3bdfSopenharmony_ci}
243f08c3bdfSopenharmony_ci
244f08c3bdfSopenharmony_cistatic void print_node_info(int flag)
245f08c3bdfSopenharmony_ci{
246f08c3bdfSopenharmony_ci	int *allowed_nodes = NULL;
247f08c3bdfSopenharmony_ci	int i, ret, num_nodes;
248f08c3bdfSopenharmony_ci
249f08c3bdfSopenharmony_ci	ret = get_allowed_nodes_arr(flag, &num_nodes, &allowed_nodes);
250f08c3bdfSopenharmony_ci	printf("nodes (flag=%d): ", flag);
251f08c3bdfSopenharmony_ci	if (ret == 0) {
252f08c3bdfSopenharmony_ci		for (i = 0; i < num_nodes; i++)
253f08c3bdfSopenharmony_ci			printf("%d ", allowed_nodes[i]);
254f08c3bdfSopenharmony_ci		printf("\n");
255f08c3bdfSopenharmony_ci	} else
256f08c3bdfSopenharmony_ci		printf("error(%d)\n", ret);
257f08c3bdfSopenharmony_ci	free(allowed_nodes);
258f08c3bdfSopenharmony_ci}
259f08c3bdfSopenharmony_ci
260f08c3bdfSopenharmony_ci/*
261f08c3bdfSopenharmony_ci * nh_dump_nodes - dump info about nodes to stdout
262f08c3bdfSopenharmony_ci */
263f08c3bdfSopenharmony_civoid nh_dump_nodes(void)
264f08c3bdfSopenharmony_ci{
265f08c3bdfSopenharmony_ci	print_node_info(0);
266f08c3bdfSopenharmony_ci	print_node_info(NH_MEMS);
267f08c3bdfSopenharmony_ci	print_node_info(NH_CPUS);
268f08c3bdfSopenharmony_ci	print_node_info(NH_MEMS | NH_CPUS);
269f08c3bdfSopenharmony_ci}
270f08c3bdfSopenharmony_ci
271f08c3bdfSopenharmony_ci/*
272f08c3bdfSopenharmony_ci * is_numa - judge a system is NUMA system or not
273f08c3bdfSopenharmony_ci * @flag: NH_MEMS and/or NH_CPUS
274f08c3bdfSopenharmony_ci * @min_nodes: find at least 'min_nodes' nodes with memory
275f08c3bdfSopenharmony_ci * NOTE: the function is designed to try to find at least 'min_nodes'
276f08c3bdfSopenharmony_ci * available nodes, where each node contains memory.
277f08c3bdfSopenharmony_ci * WARN: Don't use this func in child, as it calls tst_brkm()
278f08c3bdfSopenharmony_ci * RETURNS:
279f08c3bdfSopenharmony_ci *     0 - it's not a NUMA system
280f08c3bdfSopenharmony_ci *     1 - it's a NUMA system
281f08c3bdfSopenharmony_ci */
282f08c3bdfSopenharmony_ciint is_numa(void (*cleanup_fn)(void), int flag, int min_nodes)
283f08c3bdfSopenharmony_ci{
284f08c3bdfSopenharmony_ci	int ret;
285f08c3bdfSopenharmony_ci	int numa_nodes = 0;
286f08c3bdfSopenharmony_ci
287f08c3bdfSopenharmony_ci	ret = get_allowed_nodes_arr(flag, &numa_nodes, NULL);
288f08c3bdfSopenharmony_ci	if (ret < 0)
289f08c3bdfSopenharmony_ci		tst_brkm(TBROK | TERRNO, cleanup_fn, "get_allowed_nodes_arr");
290f08c3bdfSopenharmony_ci
291f08c3bdfSopenharmony_ci	if (numa_nodes >= min_nodes)
292f08c3bdfSopenharmony_ci		return 1;
293f08c3bdfSopenharmony_ci	else
294f08c3bdfSopenharmony_ci		return 0;
295f08c3bdfSopenharmony_ci}
296