1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Based on Christian Brauner's clone3() example.
5 * These tests are assuming to be running in the host's
6 * PID namespace.
7 */
8
9#define _GNU_SOURCE
10#include <errno.h>
11#include <linux/types.h>
12#include <linux/sched.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <stdbool.h>
16#include <sys/syscall.h>
17#include <sys/types.h>
18#include <sys/un.h>
19#include <sys/wait.h>
20#include <unistd.h>
21#include <sched.h>
22
23#include "../kselftest.h"
24#include "clone3_selftests.h"
25
26#ifndef MAX_PID_NS_LEVEL
27#define MAX_PID_NS_LEVEL 32
28#endif
29
30static int pipe_1[2];
31static int pipe_2[2];
32
33static void child_exit(int ret)
34{
35	fflush(stdout);
36	fflush(stderr);
37	_exit(ret);
38}
39
40static int call_clone3_set_tid(pid_t *set_tid,
41			       size_t set_tid_size,
42			       int flags,
43			       int expected_pid,
44			       bool wait_for_it)
45{
46	int status;
47	pid_t pid = -1;
48
49	struct __clone_args args = {
50		.flags = flags,
51		.exit_signal = SIGCHLD,
52		.set_tid = ptr_to_u64(set_tid),
53		.set_tid_size = set_tid_size,
54	};
55
56	pid = sys_clone3(&args, sizeof(args));
57	if (pid < 0) {
58		ksft_print_msg("%s - Failed to create new process\n",
59			       strerror(errno));
60		return -errno;
61	}
62
63	if (pid == 0) {
64		int ret;
65		char tmp = 0;
66		int exit_code = EXIT_SUCCESS;
67
68		ksft_print_msg("I am the child, my PID is %d (expected %d)\n",
69			       getpid(), set_tid[0]);
70		if (wait_for_it) {
71			ksft_print_msg("[%d] Child is ready and waiting\n",
72				       getpid());
73
74			/* Signal the parent that the child is ready */
75			close(pipe_1[0]);
76			ret = write(pipe_1[1], &tmp, 1);
77			if (ret != 1) {
78				ksft_print_msg(
79					"Writing to pipe returned %d", ret);
80				exit_code = EXIT_FAILURE;
81			}
82			close(pipe_1[1]);
83			close(pipe_2[1]);
84			ret = read(pipe_2[0], &tmp, 1);
85			if (ret != 1) {
86				ksft_print_msg(
87					"Reading from pipe returned %d", ret);
88				exit_code = EXIT_FAILURE;
89			}
90			close(pipe_2[0]);
91		}
92
93		if (set_tid[0] != getpid())
94			child_exit(EXIT_FAILURE);
95		child_exit(exit_code);
96	}
97
98	if (expected_pid == 0 || expected_pid == pid) {
99		ksft_print_msg("I am the parent (%d). My child's pid is %d\n",
100			       getpid(), pid);
101	} else {
102		ksft_print_msg(
103			"Expected child pid %d does not match actual pid %d\n",
104			expected_pid, pid);
105		return -1;
106	}
107
108	if (waitpid(pid, &status, 0) < 0) {
109		ksft_print_msg("Child returned %s\n", strerror(errno));
110		return -errno;
111	}
112
113	if (!WIFEXITED(status))
114		return -1;
115
116	return WEXITSTATUS(status);
117}
118
119static void test_clone3_set_tid(pid_t *set_tid,
120				size_t set_tid_size,
121				int flags,
122				int expected,
123				int expected_pid,
124				bool wait_for_it)
125{
126	int ret;
127
128	ksft_print_msg(
129		"[%d] Trying clone3() with CLONE_SET_TID to %d and 0x%x\n",
130		getpid(), set_tid[0], flags);
131	ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
132				  wait_for_it);
133	ksft_print_msg(
134		"[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
135		getpid(), set_tid[0], ret, expected);
136	if (ret != expected)
137		ksft_test_result_fail(
138			"[%d] Result (%d) is different than expected (%d)\n",
139			getpid(), ret, expected);
140	else
141		ksft_test_result_pass(
142			"[%d] Result (%d) matches expectation (%d)\n",
143			getpid(), ret, expected);
144}
145int main(int argc, char *argv[])
146{
147	FILE *f;
148	char buf;
149	char *line;
150	int status;
151	int ret = -1;
152	size_t len = 0;
153	int pid_max = 0;
154	uid_t uid = getuid();
155	char proc_path[100] = {0};
156	pid_t pid, ns1, ns2, ns3, ns_pid;
157	pid_t set_tid[MAX_PID_NS_LEVEL * 2];
158
159	ksft_print_header();
160	ksft_set_plan(29);
161	test_clone3_supported();
162
163	if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0)
164		ksft_exit_fail_msg("pipe() failed\n");
165
166	f = fopen("/proc/sys/kernel/pid_max", "r");
167	if (f == NULL)
168		ksft_exit_fail_msg(
169			"%s - Could not open /proc/sys/kernel/pid_max\n",
170			strerror(errno));
171	fscanf(f, "%d", &pid_max);
172	fclose(f);
173	ksft_print_msg("/proc/sys/kernel/pid_max %d\n", pid_max);
174
175	/* Try invalid settings */
176	memset(&set_tid, 0, sizeof(set_tid));
177	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
178
179	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
180
181	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
182			-EINVAL, 0, 0);
183
184	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
185
186	/*
187	 * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
188	 * nested PID namespace.
189	 */
190	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
191
192	memset(&set_tid, 0xff, sizeof(set_tid));
193	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
194
195	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
196
197	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
198			-EINVAL, 0, 0);
199
200	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
201
202	/*
203	 * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
204	 * nested PID namespace.
205	 */
206	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
207
208	memset(&set_tid, 0, sizeof(set_tid));
209	/* Try with an invalid PID */
210	set_tid[0] = 0;
211	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
212
213	set_tid[0] = -1;
214	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
215
216	/* Claim that the set_tid array actually contains 2 elements. */
217	test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
218
219	/* Try it in a new PID namespace */
220	if (uid == 0)
221		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
222	else
223		ksft_test_result_skip("Clone3() with set_tid requires root\n");
224
225	/* Try with a valid PID (1) this should return -EEXIST. */
226	set_tid[0] = 1;
227	if (uid == 0)
228		test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
229	else
230		ksft_test_result_skip("Clone3() with set_tid requires root\n");
231
232	/* Try it in a new PID namespace */
233	if (uid == 0)
234		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
235	else
236		ksft_test_result_skip("Clone3() with set_tid requires root\n");
237
238	/* pid_max should fail everywhere */
239	set_tid[0] = pid_max;
240	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
241
242	if (uid == 0)
243		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
244	else
245		ksft_test_result_skip("Clone3() with set_tid requires root\n");
246
247	if (uid != 0) {
248		/*
249		 * All remaining tests require root. Tell the framework
250		 * that all those tests are skipped as non-root.
251		 */
252		ksft_cnt.ksft_xskip += ksft_plan - ksft_test_num();
253		goto out;
254	}
255
256	/* Find the current active PID */
257	pid = fork();
258	if (pid == 0) {
259		ksft_print_msg("Child has PID %d\n", getpid());
260		child_exit(EXIT_SUCCESS);
261	}
262	if (waitpid(pid, &status, 0) < 0)
263		ksft_exit_fail_msg("Waiting for child %d failed", pid);
264
265	/* After the child has finished, its PID should be free. */
266	set_tid[0] = pid;
267	test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
268
269	/* This should fail as there is no PID 1 in that namespace */
270	test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
271
272	/*
273	 * Creating a process with PID 1 in the newly created most nested
274	 * PID namespace and PID 'pid' in the parent PID namespace. This
275	 * needs to work.
276	 */
277	set_tid[0] = 1;
278	set_tid[1] = pid;
279	test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
280
281	ksft_print_msg("unshare PID namespace\n");
282	if (unshare(CLONE_NEWPID) == -1)
283		ksft_exit_fail_msg("unshare(CLONE_NEWPID) failed: %s\n",
284				strerror(errno));
285
286	set_tid[0] = pid;
287
288	/* This should fail as there is no PID 1 in that namespace */
289	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
290
291	/* Let's create a PID 1 */
292	ns_pid = fork();
293	if (ns_pid == 0) {
294		/*
295		 * This and the next test cases check that all pid-s are
296		 * released on error paths.
297		 */
298		set_tid[0] = 43;
299		set_tid[1] = -1;
300		test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
301
302		set_tid[0] = 43;
303		set_tid[1] = pid;
304		test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0);
305
306		ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
307		set_tid[0] = 2;
308		test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
309
310		set_tid[0] = 1;
311		set_tid[1] = -1;
312		set_tid[2] = pid;
313		/* This should fail as there is invalid PID at level '1'. */
314		test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
315
316		set_tid[0] = 1;
317		set_tid[1] = 42;
318		set_tid[2] = pid;
319		/*
320		 * This should fail as there are not enough active PID
321		 * namespaces. Again assuming this is running in the host's
322		 * PID namespace. Not yet nested.
323		 */
324		test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
325
326		/*
327		 * This should work and from the parent we should see
328		 * something like 'NSpid:	pid	42	1'.
329		 */
330		test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true);
331
332		child_exit(ksft_cnt.ksft_fail);
333	}
334
335	close(pipe_1[1]);
336	close(pipe_2[0]);
337	while (read(pipe_1[0], &buf, 1) > 0) {
338		ksft_print_msg("[%d] Child is ready and waiting\n", getpid());
339		break;
340	}
341
342	snprintf(proc_path, sizeof(proc_path), "/proc/%d/status", pid);
343	f = fopen(proc_path, "r");
344	if (f == NULL)
345		ksft_exit_fail_msg(
346			"%s - Could not open %s\n",
347			strerror(errno), proc_path);
348
349	while (getline(&line, &len, f) != -1) {
350		if (strstr(line, "NSpid")) {
351			int i;
352
353			/* Verify that all generated PIDs are as expected. */
354			i = sscanf(line, "NSpid:\t%d\t%d\t%d",
355				   &ns3, &ns2, &ns1);
356			if (i != 3) {
357				ksft_print_msg(
358					"Unexpected 'NSPid:' entry: %s",
359					line);
360				ns1 = ns2 = ns3 = 0;
361			}
362			break;
363		}
364	}
365	fclose(f);
366	free(line);
367	close(pipe_2[0]);
368
369	/* Tell the clone3()'d child to finish. */
370	write(pipe_2[1], &buf, 1);
371	close(pipe_2[1]);
372
373	if (waitpid(ns_pid, &status, 0) < 0) {
374		ksft_print_msg("Child returned %s\n", strerror(errno));
375		ret = -errno;
376		goto out;
377	}
378
379	if (!WIFEXITED(status))
380		ksft_test_result_fail("Child error\n");
381
382	ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status));
383	ksft_cnt.ksft_fail = WEXITSTATUS(status);
384
385	if (ns3 == pid && ns2 == 42 && ns1 == 1)
386		ksft_test_result_pass(
387			"PIDs in all namespaces as expected (%d,%d,%d)\n",
388			ns3, ns2, ns1);
389	else
390		ksft_test_result_fail(
391			"PIDs in all namespaces not as expected (%d,%d,%d)\n",
392			ns3, ns2, ns1);
393out:
394	ret = 0;
395
396	return !ret ? ksft_exit_pass() : ksft_exit_fail();
397}
398