1/* Authors: Gregory P. Smith & Jeffrey Yasskin */ 2#ifndef Py_BUILD_CORE_BUILTIN 3# define Py_BUILD_CORE_MODULE 1 4#endif 5 6#include "Python.h" 7#include "pycore_fileutils.h" 8#if defined(HAVE_PIPE2) && !defined(_GNU_SOURCE) 9# define _GNU_SOURCE 10#endif 11#include <unistd.h> 12#include <fcntl.h> 13#ifdef HAVE_SYS_TYPES_H 14#include <sys/types.h> 15#endif 16#if defined(HAVE_SYS_STAT_H) 17#include <sys/stat.h> 18#endif 19#ifdef HAVE_SYS_SYSCALL_H 20#include <sys/syscall.h> 21#endif 22#if defined(HAVE_SYS_RESOURCE_H) 23#include <sys/resource.h> 24#endif 25#ifdef HAVE_DIRENT_H 26#include <dirent.h> 27#endif 28#ifdef HAVE_GRP_H 29#include <grp.h> 30#endif /* HAVE_GRP_H */ 31 32#include "posixmodule.h" 33 34#ifdef _Py_MEMORY_SANITIZER 35# include <sanitizer/msan_interface.h> 36#endif 37 38#if defined(__ANDROID__) && __ANDROID_API__ < 21 && !defined(SYS_getdents64) 39# include <sys/linux-syscalls.h> 40# define SYS_getdents64 __NR_getdents64 41#endif 42 43#if defined(__linux__) && defined(HAVE_VFORK) && defined(HAVE_SIGNAL_H) && \ 44 defined(HAVE_PTHREAD_SIGMASK) && !defined(HAVE_BROKEN_PTHREAD_SIGMASK) 45/* If this is ever expanded to non-Linux platforms, verify what calls are 46 * allowed after vfork(). Ex: setsid() may be disallowed on macOS? */ 47# include <signal.h> 48# define VFORK_USABLE 1 49#endif 50 51#if defined(__sun) && defined(__SVR4) 52/* readdir64 is used to work around Solaris 9 bug 6395699. */ 53# define readdir readdir64 54# define dirent dirent64 55# if !defined(HAVE_DIRFD) 56/* Some versions of Solaris lack dirfd(). */ 57# define dirfd(dirp) ((dirp)->dd_fd) 58# define HAVE_DIRFD 59# endif 60#endif 61 62#if defined(__FreeBSD__) || (defined(__APPLE__) && defined(__MACH__)) || defined(__DragonFly__) 63# define FD_DIR "/dev/fd" 64#else 65# define FD_DIR "/proc/self/fd" 66#endif 67 68#ifdef NGROUPS_MAX 69#define MAX_GROUPS NGROUPS_MAX 70#else 71#define MAX_GROUPS 64 72#endif 73 74#define POSIX_CALL(call) do { if ((call) == -1) goto error; } while (0) 75 76static struct PyModuleDef _posixsubprocessmodule; 77 78/* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */ 79static int 80_pos_int_from_ascii(const char *name) 81{ 82 int num = 0; 83 while (*name >= '0' && *name <= '9') { 84 num = num * 10 + (*name - '0'); 85 ++name; 86 } 87 if (*name) 88 return -1; /* Non digit found, not a number. */ 89 return num; 90} 91 92 93#if defined(__FreeBSD__) || defined(__DragonFly__) 94/* When /dev/fd isn't mounted it is often a static directory populated 95 * with 0 1 2 or entries for 0 .. 63 on FreeBSD, NetBSD, OpenBSD and DragonFlyBSD. 96 * NetBSD and OpenBSD have a /proc fs available (though not necessarily 97 * mounted) and do not have fdescfs for /dev/fd. MacOS X has a devfs 98 * that properly supports /dev/fd. 99 */ 100static int 101_is_fdescfs_mounted_on_dev_fd(void) 102{ 103 struct stat dev_stat; 104 struct stat dev_fd_stat; 105 if (stat("/dev", &dev_stat) != 0) 106 return 0; 107 if (stat(FD_DIR, &dev_fd_stat) != 0) 108 return 0; 109 if (dev_stat.st_dev == dev_fd_stat.st_dev) 110 return 0; /* / == /dev == /dev/fd means it is static. #fail */ 111 return 1; 112} 113#endif 114 115 116/* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */ 117static int 118_sanity_check_python_fd_sequence(PyObject *fd_sequence) 119{ 120 Py_ssize_t seq_idx; 121 long prev_fd = -1; 122 for (seq_idx = 0; seq_idx < PyTuple_GET_SIZE(fd_sequence); ++seq_idx) { 123 PyObject* py_fd = PyTuple_GET_ITEM(fd_sequence, seq_idx); 124 long iter_fd; 125 if (!PyLong_Check(py_fd)) { 126 return 1; 127 } 128 iter_fd = PyLong_AsLong(py_fd); 129 if (iter_fd < 0 || iter_fd <= prev_fd || iter_fd > INT_MAX) { 130 /* Negative, overflow, unsorted, too big for a fd. */ 131 return 1; 132 } 133 prev_fd = iter_fd; 134 } 135 return 0; 136} 137 138 139/* Is fd found in the sorted Python Sequence? */ 140static int 141_is_fd_in_sorted_fd_sequence(int fd, int *fd_sequence, 142 Py_ssize_t fd_sequence_len) 143{ 144 /* Binary search. */ 145 Py_ssize_t search_min = 0; 146 Py_ssize_t search_max = fd_sequence_len - 1; 147 if (search_max < 0) 148 return 0; 149 do { 150 long middle = (search_min + search_max) / 2; 151 long middle_fd = fd_sequence[middle]; 152 if (fd == middle_fd) 153 return 1; 154 if (fd > middle_fd) 155 search_min = middle + 1; 156 else 157 search_max = middle - 1; 158 } while (search_min <= search_max); 159 return 0; 160} 161 162/* 163 * Do all the Python C API calls in the parent process to turn the pass_fds 164 * "py_fds_to_keep" tuple into a C array. The caller owns allocation and 165 * freeing of the array. 166 * 167 * On error an unknown number of array elements may have been filled in. 168 * A Python exception has been set when an error is returned. 169 * 170 * Returns: -1 on error, 0 on success. 171 */ 172static int 173convert_fds_to_keep_to_c(PyObject *py_fds_to_keep, int *c_fds_to_keep) 174{ 175 Py_ssize_t i, len; 176 177 len = PyTuple_GET_SIZE(py_fds_to_keep); 178 for (i = 0; i < len; ++i) { 179 PyObject* fdobj = PyTuple_GET_ITEM(py_fds_to_keep, i); 180 long fd = PyLong_AsLong(fdobj); 181 if (fd == -1 && PyErr_Occurred()) { 182 return -1; 183 } 184 if (fd < 0 || fd > INT_MAX) { 185 PyErr_SetString(PyExc_ValueError, 186 "fd out of range in fds_to_keep."); 187 return -1; 188 } 189 c_fds_to_keep[i] = (int)fd; 190 } 191 return 0; 192} 193 194 195/* This function must be async-signal-safe as it is called from child_exec() 196 * after fork() or vfork(). 197 */ 198static int 199make_inheritable(int *c_fds_to_keep, Py_ssize_t len, int errpipe_write) 200{ 201 Py_ssize_t i; 202 203 for (i = 0; i < len; ++i) { 204 int fd = c_fds_to_keep[i]; 205 if (fd == errpipe_write) { 206 /* errpipe_write is part of fds_to_keep. It must be closed at 207 exec(), but kept open in the child process until exec() is 208 called. */ 209 continue; 210 } 211 if (_Py_set_inheritable_async_safe(fd, 1, NULL) < 0) 212 return -1; 213 } 214 return 0; 215} 216 217 218/* Get the maximum file descriptor that could be opened by this process. 219 * This function is async signal safe for use between fork() and exec(). 220 */ 221static long 222safe_get_max_fd(void) 223{ 224 long local_max_fd; 225#if defined(__NetBSD__) 226 local_max_fd = fcntl(0, F_MAXFD); 227 if (local_max_fd >= 0) 228 return local_max_fd; 229#endif 230#if defined(HAVE_SYS_RESOURCE_H) && defined(__OpenBSD__) 231 struct rlimit rl; 232 /* Not on the POSIX async signal safe functions list but likely 233 * safe. TODO - Someone should audit OpenBSD to make sure. */ 234 if (getrlimit(RLIMIT_NOFILE, &rl) >= 0) 235 return (long) rl.rlim_max; 236#endif 237#ifdef _SC_OPEN_MAX 238 local_max_fd = sysconf(_SC_OPEN_MAX); 239 if (local_max_fd == -1) 240#endif 241 local_max_fd = 256; /* Matches legacy Lib/subprocess.py behavior. */ 242 return local_max_fd; 243} 244 245 246/* Close all file descriptors in the given range except for those in 247 * fds_to_keep by invoking closer on each subrange. 248 * 249 * If end_fd == -1, it's guessed via safe_get_max_fd(), but it isn't 250 * possible to know for sure what the max fd to go up to is for 251 * processes with the capability of raising their maximum, or in case 252 * a process opened a high fd and then lowered its maximum. 253 */ 254static int 255_close_range_except(int start_fd, 256 int end_fd, 257 int *fds_to_keep, 258 Py_ssize_t fds_to_keep_len, 259 int (*closer)(int, int)) 260{ 261 if (end_fd == -1) { 262 end_fd = Py_MIN(safe_get_max_fd(), INT_MAX); 263 } 264 Py_ssize_t keep_seq_idx; 265 /* As fds_to_keep is sorted we can loop through the list closing 266 * fds in between any in the keep list falling within our range. */ 267 for (keep_seq_idx = 0; keep_seq_idx < fds_to_keep_len; ++keep_seq_idx) { 268 int keep_fd = fds_to_keep[keep_seq_idx]; 269 if (keep_fd < start_fd) 270 continue; 271 if (closer(start_fd, keep_fd - 1) != 0) 272 return -1; 273 start_fd = keep_fd + 1; 274 } 275 if (start_fd <= end_fd) { 276 if (closer(start_fd, end_fd) != 0) 277 return -1; 278 } 279 return 0; 280} 281 282#if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H) 283/* It doesn't matter if d_name has room for NAME_MAX chars; we're using this 284 * only to read a directory of short file descriptor number names. The kernel 285 * will return an error if we didn't give it enough space. Highly Unlikely. 286 * This structure is very old and stable: It will not change unless the kernel 287 * chooses to break compatibility with all existing binaries. Highly Unlikely. 288 */ 289struct linux_dirent64 { 290 unsigned long long d_ino; 291 long long d_off; 292 unsigned short d_reclen; /* Length of this linux_dirent */ 293 unsigned char d_type; 294 char d_name[256]; /* Filename (null-terminated) */ 295}; 296 297static int 298_brute_force_closer(int first, int last) 299{ 300 for (int i = first; i <= last; i++) { 301 /* Ignore errors */ 302 (void)close(i); 303 } 304 return 0; 305} 306 307/* Close all open file descriptors in the range from start_fd and higher 308 * Do not close any in the sorted fds_to_keep list. 309 * 310 * This version is async signal safe as it does not make any unsafe C library 311 * calls, malloc calls or handle any locks. It is _unfortunate_ to be forced 312 * to resort to making a kernel system call directly but this is the ONLY api 313 * available that does no harm. opendir/readdir/closedir perform memory 314 * allocation and locking so while they usually work they are not guaranteed 315 * to (especially if you have replaced your malloc implementation). A version 316 * of this function that uses those can be found in the _maybe_unsafe variant. 317 * 318 * This is Linux specific because that is all I am ready to test it on. It 319 * should be easy to add OS specific dirent or dirent64 structures and modify 320 * it with some cpp #define magic to work on other OSes as well if you want. 321 */ 322static void 323_close_open_fds_safe(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len) 324{ 325 int fd_dir_fd; 326 327 fd_dir_fd = _Py_open_noraise(FD_DIR, O_RDONLY); 328 if (fd_dir_fd == -1) { 329 /* No way to get a list of open fds. */ 330 _close_range_except(start_fd, -1, 331 fds_to_keep, fds_to_keep_len, 332 _brute_force_closer); 333 return; 334 } else { 335 char buffer[sizeof(struct linux_dirent64)]; 336 int bytes; 337 while ((bytes = syscall(SYS_getdents64, fd_dir_fd, 338 (struct linux_dirent64 *)buffer, 339 sizeof(buffer))) > 0) { 340 struct linux_dirent64 *entry; 341 int offset; 342#ifdef _Py_MEMORY_SANITIZER 343 __msan_unpoison(buffer, bytes); 344#endif 345 for (offset = 0; offset < bytes; offset += entry->d_reclen) { 346 int fd; 347 entry = (struct linux_dirent64 *)(buffer + offset); 348 if ((fd = _pos_int_from_ascii(entry->d_name)) < 0) 349 continue; /* Not a number. */ 350 if (fd != fd_dir_fd && fd >= start_fd && 351 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep, 352 fds_to_keep_len)) { 353 close(fd); 354 } 355 } 356 } 357 close(fd_dir_fd); 358 } 359} 360 361#define _close_open_fds_fallback _close_open_fds_safe 362 363#else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */ 364 365static int 366_unsafe_closer(int first, int last) 367{ 368 _Py_closerange(first, last); 369 return 0; 370} 371 372/* Close all open file descriptors from start_fd and higher. 373 * Do not close any in the sorted fds_to_keep tuple. 374 * 375 * This function violates the strict use of async signal safe functions. :( 376 * It calls opendir(), readdir() and closedir(). Of these, the one most 377 * likely to ever cause a problem is opendir() as it performs an internal 378 * malloc(). Practically this should not be a problem. The Java VM makes the 379 * same calls between fork and exec in its own UNIXProcess_md.c implementation. 380 * 381 * readdir_r() is not used because it provides no benefit. It is typically 382 * implemented as readdir() followed by memcpy(). See also: 383 * http://womble.decadent.org.uk/readdir_r-advisory.html 384 */ 385static void 386_close_open_fds_maybe_unsafe(int start_fd, int *fds_to_keep, 387 Py_ssize_t fds_to_keep_len) 388{ 389 DIR *proc_fd_dir; 390#ifndef HAVE_DIRFD 391 while (_is_fd_in_sorted_fd_sequence(start_fd, fds_to_keep, 392 fds_to_keep_len)) { 393 ++start_fd; 394 } 395 /* Close our lowest fd before we call opendir so that it is likely to 396 * reuse that fd otherwise we might close opendir's file descriptor in 397 * our loop. This trick assumes that fd's are allocated on a lowest 398 * available basis. */ 399 close(start_fd); 400 ++start_fd; 401#endif 402 403#if defined(__FreeBSD__) || defined(__DragonFly__) 404 if (!_is_fdescfs_mounted_on_dev_fd()) 405 proc_fd_dir = NULL; 406 else 407#endif 408 proc_fd_dir = opendir(FD_DIR); 409 if (!proc_fd_dir) { 410 /* No way to get a list of open fds. */ 411 _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len, 412 _unsafe_closer); 413 } else { 414 struct dirent *dir_entry; 415#ifdef HAVE_DIRFD 416 int fd_used_by_opendir = dirfd(proc_fd_dir); 417#else 418 int fd_used_by_opendir = start_fd - 1; 419#endif 420 errno = 0; 421 while ((dir_entry = readdir(proc_fd_dir))) { 422 int fd; 423 if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0) 424 continue; /* Not a number. */ 425 if (fd != fd_used_by_opendir && fd >= start_fd && 426 !_is_fd_in_sorted_fd_sequence(fd, fds_to_keep, 427 fds_to_keep_len)) { 428 close(fd); 429 } 430 errno = 0; 431 } 432 if (errno) { 433 /* readdir error, revert behavior. Highly Unlikely. */ 434 _close_range_except(start_fd, -1, fds_to_keep, fds_to_keep_len, 435 _unsafe_closer); 436 } 437 closedir(proc_fd_dir); 438 } 439} 440 441#define _close_open_fds_fallback _close_open_fds_maybe_unsafe 442 443#endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */ 444 445/* We can use close_range() library function only if it's known to be 446 * async-signal-safe. 447 * 448 * On Linux, glibc explicitly documents it to be a thin wrapper over 449 * the system call, and other C libraries are likely to follow glibc. 450 */ 451#if defined(HAVE_CLOSE_RANGE) && \ 452 (defined(__linux__) || defined(__FreeBSD__)) 453#define HAVE_ASYNC_SAFE_CLOSE_RANGE 454 455static int 456_close_range_closer(int first, int last) 457{ 458 return close_range(first, last, 0); 459} 460#endif 461 462static void 463_close_open_fds(int start_fd, int *fds_to_keep, Py_ssize_t fds_to_keep_len) 464{ 465#ifdef HAVE_ASYNC_SAFE_CLOSE_RANGE 466 if (_close_range_except( 467 start_fd, INT_MAX, fds_to_keep, fds_to_keep_len, 468 _close_range_closer) == 0) { 469 return; 470 } 471#endif 472 _close_open_fds_fallback(start_fd, fds_to_keep, fds_to_keep_len); 473} 474 475#ifdef VFORK_USABLE 476/* Reset dispositions for all signals to SIG_DFL except for ignored 477 * signals. This way we ensure that no signal handlers can run 478 * after we unblock signals in a child created by vfork(). 479 */ 480static void 481reset_signal_handlers(const sigset_t *child_sigmask) 482{ 483 struct sigaction sa_dfl = {.sa_handler = SIG_DFL}; 484 for (int sig = 1; sig < _NSIG; sig++) { 485 /* Dispositions for SIGKILL and SIGSTOP can't be changed. */ 486 if (sig == SIGKILL || sig == SIGSTOP) { 487 continue; 488 } 489 490 /* There is no need to reset the disposition of signals that will 491 * remain blocked across execve() since the kernel will do it. */ 492 if (sigismember(child_sigmask, sig) == 1) { 493 continue; 494 } 495 496 struct sigaction sa; 497 /* C libraries usually return EINVAL for signals used 498 * internally (e.g. for thread cancellation), so simply 499 * skip errors here. */ 500 if (sigaction(sig, NULL, &sa) == -1) { 501 continue; 502 } 503 504 /* void *h works as these fields are both pointer types already. */ 505 void *h = (sa.sa_flags & SA_SIGINFO ? (void *)sa.sa_sigaction : 506 (void *)sa.sa_handler); 507 if (h == SIG_IGN || h == SIG_DFL) { 508 continue; 509 } 510 511 /* This call can't reasonably fail, but if it does, terminating 512 * the child seems to be too harsh, so ignore errors. */ 513 (void) sigaction(sig, &sa_dfl, NULL); 514 } 515} 516#endif /* VFORK_USABLE */ 517 518 519/* 520 * This function is code executed in the child process immediately after 521 * (v)fork to set things up and call exec(). 522 * 523 * All of the code in this function must only use async-signal-safe functions, 524 * listed at `man 7 signal` or 525 * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html. 526 * 527 * This restriction is documented at 528 * http://www.opengroup.org/onlinepubs/009695399/functions/fork.html. 529 * 530 * If this function is called after vfork(), even more care must be taken. 531 * The lack of preparations that C libraries normally take on fork(), 532 * as well as sharing the address space with the parent, might make even 533 * async-signal-safe functions vfork-unsafe. In particular, on Linux, 534 * set*id() and setgroups() library functions must not be called, since 535 * they have to interact with the library-level thread list and send 536 * library-internal signals to implement per-process credentials semantics 537 * required by POSIX but not supported natively on Linux. Another reason to 538 * avoid this family of functions is that sharing an address space between 539 * processes running with different privileges is inherently insecure. 540 * See bpo-35823 for further discussion and references. 541 * 542 * In some C libraries, setrlimit() has the same thread list/signalling 543 * behavior since resource limits were per-thread attributes before 544 * Linux 2.6.10. Musl, as of 1.2.1, is known to have this issue 545 * (https://www.openwall.com/lists/musl/2020/10/15/6). 546 * 547 * If vfork-unsafe functionality is desired after vfork(), consider using 548 * syscall() to obtain it. 549 */ 550Py_NO_INLINE static void 551child_exec(char *const exec_array[], 552 char *const argv[], 553 char *const envp[], 554 const char *cwd, 555 int p2cread, int p2cwrite, 556 int c2pread, int c2pwrite, 557 int errread, int errwrite, 558 int errpipe_read, int errpipe_write, 559 int close_fds, int restore_signals, 560 int call_setsid, pid_t pgid_to_set, 561 int call_setgid, gid_t gid, 562 int call_setgroups, size_t groups_size, const gid_t *groups, 563 int call_setuid, uid_t uid, int child_umask, 564 const void *child_sigmask, 565 int *fds_to_keep, Py_ssize_t fds_to_keep_len, 566 PyObject *preexec_fn, 567 PyObject *preexec_fn_args_tuple) 568{ 569 int i, saved_errno, reached_preexec = 0; 570 PyObject *result; 571 const char* err_msg = ""; 572 /* Buffer large enough to hold a hex integer. We can't malloc. */ 573 char hex_errno[sizeof(saved_errno)*2+1]; 574 575 if (make_inheritable(fds_to_keep, fds_to_keep_len, errpipe_write) < 0) 576 goto error; 577 578 /* Close parent's pipe ends. */ 579 if (p2cwrite != -1) 580 POSIX_CALL(close(p2cwrite)); 581 if (c2pread != -1) 582 POSIX_CALL(close(c2pread)); 583 if (errread != -1) 584 POSIX_CALL(close(errread)); 585 POSIX_CALL(close(errpipe_read)); 586 587 /* When duping fds, if there arises a situation where one of the fds is 588 either 0, 1 or 2, it is possible that it is overwritten (#12607). */ 589 if (c2pwrite == 0) { 590 POSIX_CALL(c2pwrite = dup(c2pwrite)); 591 /* issue32270 */ 592 if (_Py_set_inheritable_async_safe(c2pwrite, 0, NULL) < 0) { 593 goto error; 594 } 595 } 596 while (errwrite == 0 || errwrite == 1) { 597 POSIX_CALL(errwrite = dup(errwrite)); 598 /* issue32270 */ 599 if (_Py_set_inheritable_async_safe(errwrite, 0, NULL) < 0) { 600 goto error; 601 } 602 } 603 604 /* Dup fds for child. 605 dup2() removes the CLOEXEC flag but we must do it ourselves if dup2() 606 would be a no-op (issue #10806). */ 607 if (p2cread == 0) { 608 if (_Py_set_inheritable_async_safe(p2cread, 1, NULL) < 0) 609 goto error; 610 } 611 else if (p2cread != -1) 612 POSIX_CALL(dup2(p2cread, 0)); /* stdin */ 613 614 if (c2pwrite == 1) { 615 if (_Py_set_inheritable_async_safe(c2pwrite, 1, NULL) < 0) 616 goto error; 617 } 618 else if (c2pwrite != -1) 619 POSIX_CALL(dup2(c2pwrite, 1)); /* stdout */ 620 621 if (errwrite == 2) { 622 if (_Py_set_inheritable_async_safe(errwrite, 1, NULL) < 0) 623 goto error; 624 } 625 else if (errwrite != -1) 626 POSIX_CALL(dup2(errwrite, 2)); /* stderr */ 627 628 /* We no longer manually close p2cread, c2pwrite, and errwrite here as 629 * _close_open_fds takes care when it is not already non-inheritable. */ 630 631 if (cwd) 632 POSIX_CALL(chdir(cwd)); 633 634 if (child_umask >= 0) 635 umask(child_umask); /* umask() always succeeds. */ 636 637 if (restore_signals) 638 _Py_RestoreSignals(); 639 640#ifdef VFORK_USABLE 641 if (child_sigmask) { 642 reset_signal_handlers(child_sigmask); 643 if ((errno = pthread_sigmask(SIG_SETMASK, child_sigmask, NULL))) { 644 goto error; 645 } 646 } 647#endif 648 649#ifdef HAVE_SETSID 650 if (call_setsid) 651 POSIX_CALL(setsid()); 652#endif 653 654#ifdef HAVE_SETPGID 655 if (pgid_to_set >= 0) 656 POSIX_CALL(setpgid(0, pgid_to_set)); 657#endif 658 659#ifdef HAVE_SETGROUPS 660 if (call_setgroups) 661 POSIX_CALL(setgroups(groups_size, groups)); 662#endif /* HAVE_SETGROUPS */ 663 664#ifdef HAVE_SETREGID 665 if (call_setgid) 666 POSIX_CALL(setregid(gid, gid)); 667#endif /* HAVE_SETREGID */ 668 669#ifdef HAVE_SETREUID 670 if (call_setuid) 671 POSIX_CALL(setreuid(uid, uid)); 672#endif /* HAVE_SETREUID */ 673 674 675 reached_preexec = 1; 676 if (preexec_fn != Py_None && preexec_fn_args_tuple) { 677 /* This is where the user has asked us to deadlock their program. */ 678 result = PyObject_Call(preexec_fn, preexec_fn_args_tuple, NULL); 679 if (result == NULL) { 680 /* Stringifying the exception or traceback would involve 681 * memory allocation and thus potential for deadlock. 682 * We've already faced potential deadlock by calling back 683 * into Python in the first place, so it probably doesn't 684 * matter but we avoid it to minimize the possibility. */ 685 err_msg = "Exception occurred in preexec_fn."; 686 errno = 0; /* We don't want to report an OSError. */ 687 goto error; 688 } 689 /* Py_DECREF(result); - We're about to exec so why bother? */ 690 } 691 692 /* close FDs after executing preexec_fn, which might open FDs */ 693 if (close_fds) { 694 /* TODO HP-UX could use pstat_getproc() if anyone cares about it. */ 695 _close_open_fds(3, fds_to_keep, fds_to_keep_len); 696 } 697 698 /* This loop matches the Lib/os.py _execvpe()'s PATH search when */ 699 /* given the executable_list generated by Lib/subprocess.py. */ 700 saved_errno = 0; 701 for (i = 0; exec_array[i] != NULL; ++i) { 702 const char *executable = exec_array[i]; 703 if (envp) { 704 execve(executable, argv, envp); 705 } else { 706 execv(executable, argv); 707 } 708 if (errno != ENOENT && errno != ENOTDIR && saved_errno == 0) { 709 saved_errno = errno; 710 } 711 } 712 /* Report the first exec error, not the last. */ 713 if (saved_errno) 714 errno = saved_errno; 715 716error: 717 saved_errno = errno; 718 /* Report the posix error to our parent process. */ 719 /* We ignore all write() return values as the total size of our writes is 720 less than PIPEBUF and we cannot do anything about an error anyways. 721 Use _Py_write_noraise() to retry write() if it is interrupted by a 722 signal (fails with EINTR). */ 723 if (saved_errno) { 724 char *cur; 725 _Py_write_noraise(errpipe_write, "OSError:", 8); 726 cur = hex_errno + sizeof(hex_errno); 727 while (saved_errno != 0 && cur != hex_errno) { 728 *--cur = Py_hexdigits[saved_errno % 16]; 729 saved_errno /= 16; 730 } 731 _Py_write_noraise(errpipe_write, cur, hex_errno + sizeof(hex_errno) - cur); 732 _Py_write_noraise(errpipe_write, ":", 1); 733 if (!reached_preexec) { 734 /* Indicate to the parent that the error happened before exec(). */ 735 _Py_write_noraise(errpipe_write, "noexec", 6); 736 } 737 /* We can't call strerror(saved_errno). It is not async signal safe. 738 * The parent process will look the error message up. */ 739 } else { 740 _Py_write_noraise(errpipe_write, "SubprocessError:0:", 18); 741 _Py_write_noraise(errpipe_write, err_msg, strlen(err_msg)); 742 } 743} 744 745 746/* The main purpose of this wrapper function is to isolate vfork() from both 747 * subprocess_fork_exec() and child_exec(). A child process created via 748 * vfork() executes on the same stack as the parent process while the latter is 749 * suspended, so this function should not be inlined to avoid compiler bugs 750 * that might clobber data needed by the parent later. Additionally, 751 * child_exec() should not be inlined to avoid spurious -Wclobber warnings from 752 * GCC (see bpo-35823). 753 */ 754Py_NO_INLINE static pid_t 755do_fork_exec(char *const exec_array[], 756 char *const argv[], 757 char *const envp[], 758 const char *cwd, 759 int p2cread, int p2cwrite, 760 int c2pread, int c2pwrite, 761 int errread, int errwrite, 762 int errpipe_read, int errpipe_write, 763 int close_fds, int restore_signals, 764 int call_setsid, pid_t pgid_to_set, 765 int call_setgid, gid_t gid, 766 int call_setgroups, size_t groups_size, const gid_t *groups, 767 int call_setuid, uid_t uid, int child_umask, 768 const void *child_sigmask, 769 int *fds_to_keep, Py_ssize_t fds_to_keep_len, 770 PyObject *preexec_fn, 771 PyObject *preexec_fn_args_tuple) 772{ 773 774 pid_t pid; 775 776#ifdef VFORK_USABLE 777 if (child_sigmask) { 778 /* These are checked by our caller; verify them in debug builds. */ 779 assert(!call_setuid); 780 assert(!call_setgid); 781 assert(!call_setgroups); 782 assert(preexec_fn == Py_None); 783 784 pid = vfork(); 785 if (pid == -1) { 786 /* If vfork() fails, fall back to using fork(). When it isn't 787 * allowed in a process by the kernel, vfork can return -1 788 * with errno EINVAL. https://bugs.python.org/issue47151. */ 789 pid = fork(); 790 } 791 } else 792#endif 793 { 794 pid = fork(); 795 } 796 797 if (pid != 0) { 798 return pid; 799 } 800 801 /* Child process. 802 * See the comment above child_exec() for restrictions imposed on 803 * the code below. 804 */ 805 806 if (preexec_fn != Py_None) { 807 /* We'll be calling back into Python later so we need to do this. 808 * This call may not be async-signal-safe but neither is calling 809 * back into Python. The user asked us to use hope as a strategy 810 * to avoid deadlock... */ 811 PyOS_AfterFork_Child(); 812 } 813 814 child_exec(exec_array, argv, envp, cwd, 815 p2cread, p2cwrite, c2pread, c2pwrite, 816 errread, errwrite, errpipe_read, errpipe_write, 817 close_fds, restore_signals, call_setsid, pgid_to_set, 818 call_setgid, gid, call_setgroups, groups_size, groups, 819 call_setuid, uid, child_umask, child_sigmask, 820 fds_to_keep, fds_to_keep_len, 821 preexec_fn, preexec_fn_args_tuple); 822 _exit(255); 823 return 0; /* Dead code to avoid a potential compiler warning. */ 824} 825 826 827static PyObject * 828subprocess_fork_exec(PyObject *module, PyObject *args) 829{ 830 PyObject *gc_module = NULL; 831 PyObject *executable_list, *py_fds_to_keep; 832 PyObject *env_list, *preexec_fn; 833 PyObject *process_args, *converted_args = NULL, *fast_args = NULL; 834 PyObject *preexec_fn_args_tuple = NULL; 835 PyObject *groups_list; 836 PyObject *uid_object, *gid_object; 837 int p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite; 838 int errpipe_read, errpipe_write, close_fds, restore_signals; 839 int call_setsid; 840 pid_t pgid_to_set = -1; 841 int call_setgid = 0, call_setgroups = 0, call_setuid = 0; 842 uid_t uid; 843 gid_t gid, *groups = NULL; 844 int child_umask; 845 PyObject *cwd_obj, *cwd_obj2 = NULL; 846 const char *cwd; 847 pid_t pid = -1; 848 int need_to_reenable_gc = 0; 849 char *const *exec_array, *const *argv = NULL, *const *envp = NULL; 850 Py_ssize_t arg_num, num_groups = 0; 851 int need_after_fork = 0; 852 int saved_errno = 0; 853 int allow_vfork; 854 int *c_fds_to_keep = NULL; 855 856 if (!PyArg_ParseTuple( 857 args, "OOpO!OOiiiiiiiiii" _Py_PARSE_PID "OOOiOp:fork_exec", 858 &process_args, &executable_list, 859 &close_fds, &PyTuple_Type, &py_fds_to_keep, 860 &cwd_obj, &env_list, 861 &p2cread, &p2cwrite, &c2pread, &c2pwrite, 862 &errread, &errwrite, &errpipe_read, &errpipe_write, 863 &restore_signals, &call_setsid, &pgid_to_set, 864 &gid_object, &groups_list, &uid_object, &child_umask, 865 &preexec_fn, &allow_vfork)) 866 return NULL; 867 868 if ((preexec_fn != Py_None) && 869 (PyInterpreterState_Get() != PyInterpreterState_Main())) { 870 PyErr_SetString(PyExc_RuntimeError, 871 "preexec_fn not supported within subinterpreters"); 872 return NULL; 873 } 874 875 if (close_fds && errpipe_write < 3) { /* precondition */ 876 PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3"); 877 return NULL; 878 } 879 if (_sanity_check_python_fd_sequence(py_fds_to_keep)) { 880 PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep"); 881 return NULL; 882 } 883 884 PyInterpreterState *interp = PyInterpreterState_Get(); 885 const PyConfig *config = _PyInterpreterState_GetConfig(interp); 886 if (config->_isolated_interpreter) { 887 PyErr_SetString(PyExc_RuntimeError, 888 "subprocess not supported for isolated subinterpreters"); 889 return NULL; 890 } 891 892 /* We need to call gc.disable() when we'll be calling preexec_fn */ 893 if (preexec_fn != Py_None) { 894 need_to_reenable_gc = PyGC_Disable(); 895 } 896 897 exec_array = _PySequence_BytesToCharpArray(executable_list); 898 if (!exec_array) 899 goto cleanup; 900 901 /* Convert args and env into appropriate arguments for exec() */ 902 /* These conversions are done in the parent process to avoid allocating 903 or freeing memory in the child process. */ 904 if (process_args != Py_None) { 905 Py_ssize_t num_args; 906 /* Equivalent to: */ 907 /* tuple(PyUnicode_FSConverter(arg) for arg in process_args) */ 908 fast_args = PySequence_Fast(process_args, "argv must be a tuple"); 909 if (fast_args == NULL) 910 goto cleanup; 911 num_args = PySequence_Fast_GET_SIZE(fast_args); 912 converted_args = PyTuple_New(num_args); 913 if (converted_args == NULL) 914 goto cleanup; 915 for (arg_num = 0; arg_num < num_args; ++arg_num) { 916 PyObject *borrowed_arg, *converted_arg; 917 if (PySequence_Fast_GET_SIZE(fast_args) != num_args) { 918 PyErr_SetString(PyExc_RuntimeError, "args changed during iteration"); 919 goto cleanup; 920 } 921 borrowed_arg = PySequence_Fast_GET_ITEM(fast_args, arg_num); 922 if (PyUnicode_FSConverter(borrowed_arg, &converted_arg) == 0) 923 goto cleanup; 924 PyTuple_SET_ITEM(converted_args, arg_num, converted_arg); 925 } 926 927 argv = _PySequence_BytesToCharpArray(converted_args); 928 Py_CLEAR(converted_args); 929 Py_CLEAR(fast_args); 930 if (!argv) 931 goto cleanup; 932 } 933 934 if (env_list != Py_None) { 935 envp = _PySequence_BytesToCharpArray(env_list); 936 if (!envp) 937 goto cleanup; 938 } 939 940 if (cwd_obj != Py_None) { 941 if (PyUnicode_FSConverter(cwd_obj, &cwd_obj2) == 0) 942 goto cleanup; 943 cwd = PyBytes_AsString(cwd_obj2); 944 } else { 945 cwd = NULL; 946 } 947 948 if (groups_list != Py_None) { 949#ifdef HAVE_SETGROUPS 950 Py_ssize_t i; 951 gid_t gid; 952 953 if (!PyList_Check(groups_list)) { 954 PyErr_SetString(PyExc_TypeError, 955 "setgroups argument must be a list"); 956 goto cleanup; 957 } 958 num_groups = PySequence_Size(groups_list); 959 960 if (num_groups < 0) 961 goto cleanup; 962 963 if (num_groups > MAX_GROUPS) { 964 PyErr_SetString(PyExc_ValueError, "too many groups"); 965 goto cleanup; 966 } 967 968 if ((groups = PyMem_RawMalloc(num_groups * sizeof(gid_t))) == NULL) { 969 PyErr_SetString(PyExc_MemoryError, 970 "failed to allocate memory for group list"); 971 goto cleanup; 972 } 973 974 for (i = 0; i < num_groups; i++) { 975 PyObject *elem; 976 elem = PySequence_GetItem(groups_list, i); 977 if (!elem) 978 goto cleanup; 979 if (!PyLong_Check(elem)) { 980 PyErr_SetString(PyExc_TypeError, 981 "groups must be integers"); 982 Py_DECREF(elem); 983 goto cleanup; 984 } else { 985 if (!_Py_Gid_Converter(elem, &gid)) { 986 Py_DECREF(elem); 987 PyErr_SetString(PyExc_ValueError, "invalid group id"); 988 goto cleanup; 989 } 990 groups[i] = gid; 991 } 992 Py_DECREF(elem); 993 } 994 call_setgroups = 1; 995 996#else /* HAVE_SETGROUPS */ 997 PyErr_BadInternalCall(); 998 goto cleanup; 999#endif /* HAVE_SETGROUPS */ 1000 } 1001 1002 if (gid_object != Py_None) { 1003#ifdef HAVE_SETREGID 1004 if (!_Py_Gid_Converter(gid_object, &gid)) 1005 goto cleanup; 1006 1007 call_setgid = 1; 1008 1009#else /* HAVE_SETREGID */ 1010 PyErr_BadInternalCall(); 1011 goto cleanup; 1012#endif /* HAVE_SETREUID */ 1013 } 1014 1015 if (uid_object != Py_None) { 1016#ifdef HAVE_SETREUID 1017 if (!_Py_Uid_Converter(uid_object, &uid)) 1018 goto cleanup; 1019 1020 call_setuid = 1; 1021 1022#else /* HAVE_SETREUID */ 1023 PyErr_BadInternalCall(); 1024 goto cleanup; 1025#endif /* HAVE_SETREUID */ 1026 } 1027 1028 Py_ssize_t fds_to_keep_len = PyTuple_GET_SIZE(py_fds_to_keep); 1029 c_fds_to_keep = PyMem_Malloc(fds_to_keep_len * sizeof(int)); 1030 if (c_fds_to_keep == NULL) { 1031 PyErr_SetString(PyExc_MemoryError, "failed to malloc c_fds_to_keep"); 1032 goto cleanup; 1033 } 1034 if (convert_fds_to_keep_to_c(py_fds_to_keep, c_fds_to_keep) < 0) { 1035 goto cleanup; 1036 } 1037 1038 /* This must be the last thing done before fork() because we do not 1039 * want to call PyOS_BeforeFork() if there is any chance of another 1040 * error leading to the cleanup: code without calling fork(). */ 1041 if (preexec_fn != Py_None) { 1042 preexec_fn_args_tuple = PyTuple_New(0); 1043 if (!preexec_fn_args_tuple) 1044 goto cleanup; 1045 PyOS_BeforeFork(); 1046 need_after_fork = 1; 1047 } 1048 1049 /* NOTE: When old_sigmask is non-NULL, do_fork_exec() may use vfork(). */ 1050 const void *old_sigmask = NULL; 1051#ifdef VFORK_USABLE 1052 /* Use vfork() only if it's safe. See the comment above child_exec(). */ 1053 sigset_t old_sigs; 1054 if (preexec_fn == Py_None && allow_vfork && 1055 !call_setuid && !call_setgid && !call_setgroups) { 1056 /* Block all signals to ensure that no signal handlers are run in the 1057 * child process while it shares memory with us. Note that signals 1058 * used internally by C libraries won't be blocked by 1059 * pthread_sigmask(), but signal handlers installed by C libraries 1060 * normally service only signals originating from *within the process*, 1061 * so it should be sufficient to consider any library function that 1062 * might send such a signal to be vfork-unsafe and do not call it in 1063 * the child. 1064 */ 1065 sigset_t all_sigs; 1066 sigfillset(&all_sigs); 1067 if ((saved_errno = pthread_sigmask(SIG_BLOCK, &all_sigs, &old_sigs))) { 1068 goto cleanup; 1069 } 1070 old_sigmask = &old_sigs; 1071 } 1072#endif 1073 1074 pid = do_fork_exec(exec_array, argv, envp, cwd, 1075 p2cread, p2cwrite, c2pread, c2pwrite, 1076 errread, errwrite, errpipe_read, errpipe_write, 1077 close_fds, restore_signals, call_setsid, pgid_to_set, 1078 call_setgid, gid, call_setgroups, num_groups, groups, 1079 call_setuid, uid, child_umask, old_sigmask, 1080 c_fds_to_keep, fds_to_keep_len, 1081 preexec_fn, preexec_fn_args_tuple); 1082 1083 /* Parent (original) process */ 1084 if (pid == -1) { 1085 /* Capture errno for the exception. */ 1086 saved_errno = errno; 1087 } 1088 1089#ifdef VFORK_USABLE 1090 if (old_sigmask) { 1091 /* vfork() semantics guarantees that the parent is blocked 1092 * until the child performs _exit() or execve(), so it is safe 1093 * to unblock signals once we're here. 1094 * Note that in environments where vfork() is implemented as fork(), 1095 * such as QEMU user-mode emulation, the parent won't be blocked, 1096 * but it won't share the address space with the child, 1097 * so it's still safe to unblock the signals. 1098 * 1099 * We don't handle errors here because this call can't fail 1100 * if valid arguments are given, and because there is no good 1101 * way for the caller to deal with a failure to restore 1102 * the thread signal mask. */ 1103 (void) pthread_sigmask(SIG_SETMASK, old_sigmask, NULL); 1104 } 1105#endif 1106 1107 if (need_after_fork) 1108 PyOS_AfterFork_Parent(); 1109 1110cleanup: 1111 if (c_fds_to_keep != NULL) { 1112 PyMem_Free(c_fds_to_keep); 1113 } 1114 1115 if (saved_errno != 0) { 1116 errno = saved_errno; 1117 /* We can't call this above as PyOS_AfterFork_Parent() calls back 1118 * into Python code which would see the unreturned error. */ 1119 PyErr_SetFromErrno(PyExc_OSError); 1120 } 1121 1122 Py_XDECREF(preexec_fn_args_tuple); 1123 PyMem_RawFree(groups); 1124 Py_XDECREF(cwd_obj2); 1125 if (envp) 1126 _Py_FreeCharPArray(envp); 1127 Py_XDECREF(converted_args); 1128 Py_XDECREF(fast_args); 1129 if (argv) 1130 _Py_FreeCharPArray(argv); 1131 if (exec_array) 1132 _Py_FreeCharPArray(exec_array); 1133 1134 if (need_to_reenable_gc) { 1135 PyGC_Enable(); 1136 } 1137 Py_XDECREF(gc_module); 1138 1139 return pid == -1 ? NULL : PyLong_FromPid(pid); 1140} 1141 1142 1143PyDoc_STRVAR(subprocess_fork_exec_doc, 1144"fork_exec(args, executable_list, close_fds, pass_fds, cwd, env,\n\ 1145 p2cread, p2cwrite, c2pread, c2pwrite,\n\ 1146 errread, errwrite, errpipe_read, errpipe_write,\n\ 1147 restore_signals, call_setsid, pgid_to_set,\n\ 1148 gid, groups_list, uid,\n\ 1149 preexec_fn)\n\ 1150\n\ 1151Forks a child process, closes parent file descriptors as appropriate in the\n\ 1152child and dups the few that are needed before calling exec() in the child\n\ 1153process.\n\ 1154\n\ 1155If close_fds is true, close file descriptors 3 and higher, except those listed\n\ 1156in the sorted tuple pass_fds.\n\ 1157\n\ 1158The preexec_fn, if supplied, will be called immediately before closing file\n\ 1159descriptors and exec.\n\ 1160WARNING: preexec_fn is NOT SAFE if your application uses threads.\n\ 1161 It may trigger infrequent, difficult to debug deadlocks.\n\ 1162\n\ 1163If an error occurs in the child process before the exec, it is\n\ 1164serialized and written to the errpipe_write fd per subprocess.py.\n\ 1165\n\ 1166Returns: the child process's PID.\n\ 1167\n\ 1168Raises: Only on an error in the parent process.\n\ 1169"); 1170 1171/* module level code ********************************************************/ 1172 1173PyDoc_STRVAR(module_doc, 1174"A POSIX helper for the subprocess module."); 1175 1176static PyMethodDef module_methods[] = { 1177 {"fork_exec", subprocess_fork_exec, METH_VARARGS, subprocess_fork_exec_doc}, 1178 {NULL, NULL} /* sentinel */ 1179}; 1180 1181static PyModuleDef_Slot _posixsubprocess_slots[] = { 1182 {0, NULL} 1183}; 1184 1185static struct PyModuleDef _posixsubprocessmodule = { 1186 PyModuleDef_HEAD_INIT, 1187 .m_name = "_posixsubprocess", 1188 .m_doc = module_doc, 1189 .m_size = 0, 1190 .m_methods = module_methods, 1191 .m_slots = _posixsubprocess_slots, 1192}; 1193 1194PyMODINIT_FUNC 1195PyInit__posixsubprocess(void) 1196{ 1197 return PyModuleDef_Init(&_posixsubprocessmodule); 1198} 1199