1#![allow(unsafe_code)]
2
3use core::convert::TryFrom;
4use core::mem::MaybeUninit;
5use core::num::NonZeroU64;
6use core::ptr;
7use core::ptr::NonNull;
8use core::sync::atomic::AtomicU8;
9
10use bitflags::bitflags;
11
12use crate::backend::c::{c_int, c_uint, c_void};
13use crate::backend::process::syscalls;
14use crate::ffi::{CStr, CString};
15use crate::io;
16use crate::process::{
17    prctl_1arg, prctl_2args, prctl_3args, prctl_get_at_arg2_optional, Pid,
18    PointerAuthenticationKeys,
19};
20
21//
22// PR_GET_KEEPCAPS/PR_SET_KEEPCAPS
23//
24
25const PR_GET_KEEPCAPS: c_int = 7;
26
27/// Get the current state of the calling thread's `keep capabilities` flag.
28///
29/// # References
30/// - [`prctl(PR_GET_KEEPCAPS,...)`]
31///
32/// [`prctl(PR_GET_KEEPCAPS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
33#[inline]
34pub fn get_keep_capabilities() -> io::Result<bool> {
35    unsafe { prctl_1arg(PR_GET_KEEPCAPS) }.map(|r| r != 0)
36}
37
38const PR_SET_KEEPCAPS: c_int = 8;
39
40/// Set the state of the calling thread's `keep capabilities` flag.
41///
42/// # References
43/// - [`prctl(PR_SET_KEEPCAPS,...)`]
44///
45/// [`prctl(PR_SET_KEEPCAPS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
46#[inline]
47pub fn set_keep_capabilities(enable: bool) -> io::Result<()> {
48    unsafe { prctl_2args(PR_SET_KEEPCAPS, enable as usize as *mut _) }.map(|_r| ())
49}
50
51//
52// PR_GET_NAME/PR_SET_NAME
53//
54
55const PR_GET_NAME: c_int = 16;
56
57/// Get the name of the calling thread.
58///
59/// # References
60/// - [`prctl(PR_GET_NAME,...)`]
61///
62/// [`prctl(PR_GET_NAME,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
63#[inline]
64pub fn name() -> io::Result<CString> {
65    let mut buffer = [0_u8; 16];
66    unsafe { prctl_2args(PR_GET_NAME, buffer.as_mut_ptr().cast())? };
67
68    let len = buffer.iter().position(|&x| x == 0_u8).unwrap_or(0);
69    CString::new(&buffer[..len]).map_err(|_r| io::Errno::ILSEQ)
70}
71
72const PR_SET_NAME: c_int = 15;
73
74/// Set the name of the calling thread.
75///
76/// # References
77/// - [`prctl(PR_SET_NAME,...)`]
78///
79/// [`prctl(PR_SET_NAME,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
80#[inline]
81pub fn set_name(name: &CStr) -> io::Result<()> {
82    unsafe { prctl_2args(PR_SET_NAME, name.as_ptr() as *mut _) }.map(|_r| ())
83}
84
85//
86// PR_GET_SECCOMP/PR_SET_SECCOMP
87//
88
89//const PR_GET_SECCOMP: c_int = 21;
90
91const SECCOMP_MODE_DISABLED: i32 = 0;
92const SECCOMP_MODE_STRICT: i32 = 1;
93const SECCOMP_MODE_FILTER: i32 = 2;
94
95/// `SECCOMP_MODE_*`.
96#[derive(Copy, Clone, Debug, Eq, PartialEq)]
97#[repr(i32)]
98pub enum SecureComputingMode {
99    /// Secure computing is not in use.
100    Disabled = SECCOMP_MODE_DISABLED,
101    /// Use hard-coded filter.
102    Strict = SECCOMP_MODE_STRICT,
103    /// Use user-supplied filter.
104    Filter = SECCOMP_MODE_FILTER,
105}
106
107impl TryFrom<i32> for SecureComputingMode {
108    type Error = io::Errno;
109
110    fn try_from(value: i32) -> Result<Self, Self::Error> {
111        match value {
112            SECCOMP_MODE_DISABLED => Ok(Self::Disabled),
113            SECCOMP_MODE_STRICT => Ok(Self::Strict),
114            SECCOMP_MODE_FILTER => Ok(Self::Filter),
115            _ => Err(io::Errno::RANGE),
116        }
117    }
118}
119
120/*
121/// Get the secure computing mode of the calling thread.
122///
123/// If the caller is not in secure computing mode, this returns [`SecureComputingMode::Disabled`].
124/// If the caller is in strict secure computing mode, then this call will cause a `SIGKILL` signal
125/// to be sent to the process.
126/// If the caller is in filter mode, and this system call is allowed by the seccomp filters,
127/// it returns [`SecureComputingMode::Filter`]; otherwise, the process is killed with
128/// a `SIGKILL` signal.
129///
130/// Since Linux 3.8, the Seccomp field of the `/proc/[pid]/status` file provides a method
131/// of obtaining the same information, without the risk that the process is killed; see `proc(5)`.
132///
133/// # References
134/// - [`prctl(PR_GET_SECCOMP,...)`]
135///
136/// [`prctl(PR_GET_SECCOMP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
137#[inline]
138pub fn secure_computing_mode() -> io::Result<SecureComputingMode> {
139    unsafe { prctl_1arg(PR_GET_SECCOMP) }.and_then(TryInto::try_into)
140}
141*/
142
143const PR_SET_SECCOMP: c_int = 22;
144
145/// Set the secure computing mode for the calling thread, to limit the available system calls.
146///
147/// # References
148/// - [`prctl(PR_SET_SECCOMP,...)`]
149///
150/// [`prctl(PR_SET_SECCOMP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
151#[inline]
152pub fn set_secure_computing_mode(mode: SecureComputingMode) -> io::Result<()> {
153    unsafe { prctl_2args(PR_SET_SECCOMP, mode as usize as *mut _) }.map(|_r| ())
154}
155
156//
157// PR_CAPBSET_READ/PR_CAPBSET_DROP
158//
159
160const PR_CAPBSET_READ: c_int = 23;
161
162const CAP_CHOWN: u32 = 0;
163const CAP_DAC_OVERRIDE: u32 = 1;
164const CAP_DAC_READ_SEARCH: u32 = 2;
165const CAP_FOWNER: u32 = 3;
166const CAP_FSETID: u32 = 4;
167const CAP_KILL: u32 = 5;
168const CAP_SETGID: u32 = 6;
169const CAP_SETUID: u32 = 7;
170const CAP_SETPCAP: u32 = 8;
171const CAP_LINUX_IMMUTABLE: u32 = 9;
172const CAP_NET_BIND_SERVICE: u32 = 10;
173const CAP_NET_BROADCAST: u32 = 11;
174const CAP_NET_ADMIN: u32 = 12;
175const CAP_NET_RAW: u32 = 13;
176const CAP_IPC_LOCK: u32 = 14;
177const CAP_IPC_OWNER: u32 = 15;
178const CAP_SYS_MODULE: u32 = 16;
179const CAP_SYS_RAWIO: u32 = 17;
180const CAP_SYS_CHROOT: u32 = 18;
181const CAP_SYS_PTRACE: u32 = 19;
182const CAP_SYS_PACCT: u32 = 20;
183const CAP_SYS_ADMIN: u32 = 21;
184const CAP_SYS_BOOT: u32 = 22;
185const CAP_SYS_NICE: u32 = 23;
186const CAP_SYS_RESOURCE: u32 = 24;
187const CAP_SYS_TIME: u32 = 25;
188const CAP_SYS_TTY_CONFIG: u32 = 26;
189const CAP_MKNOD: u32 = 27;
190const CAP_LEASE: u32 = 28;
191const CAP_AUDIT_WRITE: u32 = 29;
192const CAP_AUDIT_CONTROL: u32 = 30;
193const CAP_SETFCAP: u32 = 31;
194const CAP_MAC_OVERRIDE: u32 = 32;
195const CAP_MAC_ADMIN: u32 = 33;
196const CAP_SYSLOG: u32 = 34;
197const CAP_WAKE_ALARM: u32 = 35;
198const CAP_BLOCK_SUSPEND: u32 = 36;
199const CAP_AUDIT_READ: u32 = 37;
200const CAP_PERFMON: u32 = 38;
201const CAP_BPF: u32 = 39;
202const CAP_CHECKPOINT_RESTORE: u32 = 40;
203
204/// Linux per-thread capability.
205#[derive(Copy, Clone, Debug, Eq, PartialEq)]
206#[repr(u32)]
207pub enum Capability {
208    /// In a system with the `_POSIX_CHOWN_RESTRICTED` option defined, this overrides
209    /// the restriction of changing file ownership and group ownership.
210    ChangeOwnership = CAP_CHOWN,
211    /// Override all DAC access, including ACL execute access if `_POSIX_ACL` is defined.
212    /// Excluding DAC access covered by [`Capability::LinuxImmutable`].
213    DACOverride = CAP_DAC_OVERRIDE,
214    /// Overrides all DAC restrictions regarding read and search on files and directories,
215    /// including ACL restrictions if `_POSIX_ACL` is defined. Excluding DAC access covered
216    /// by [`Capability::LinuxImmutable`].
217    DACReadSearch = CAP_DAC_READ_SEARCH,
218    /// Overrides all restrictions about allowed operations on files, where file owner ID must be
219    /// equal to the user ID, except where [`Capability::FileSetID`] is applicable.
220    /// It doesn't override MAC and DAC restrictions.
221    FileOwner = CAP_FOWNER,
222    /// Overrides the following restrictions that the effective user ID shall match the file owner
223    /// ID when setting the `S_ISUID` and `S_ISGID` bits on that file; that the effective group ID
224    /// (or one of the supplementary group IDs) shall match the file owner ID when setting the
225    /// `S_ISGID` bit on that file; that the `S_ISUID` and `S_ISGID` bits are cleared on successful
226    /// return from `chown` (not implemented).
227    FileSetID = CAP_FSETID,
228    /// Overrides the restriction that the real or effective user ID of a process sending a signal
229    /// must match the real or effective user ID of the process receiving the signal.
230    Kill = CAP_KILL,
231    /// Allows `setgid` manipulation. Allows `setgroups`. Allows forged gids on socket
232    /// credentials passing.
233    SetGroupID = CAP_SETGID,
234    /// Allows `set*uid` manipulation (including fsuid). Allows forged pids on socket
235    /// credentials passing.
236    SetUserID = CAP_SETUID,
237    /// Without VFS support for capabilities:
238    /// - Transfer any capability in your permitted set to any pid.
239    /// - remove any capability in your permitted set from any pid.
240    ///   With VFS support for capabilities (neither of above, but)
241    /// - Add any capability from current's capability bounding set to the current process'
242    ///   inheritable set.
243    /// - Allow taking bits out of capability bounding set.
244    /// - Allow modification of the securebits for a process.
245    SetPermittedCapabilities = CAP_SETPCAP,
246    /// Allow modification of `S_IMMUTABLE` and `S_APPEND` file attributes.
247    LinuxImmutable = CAP_LINUX_IMMUTABLE,
248    /// Allows binding to TCP/UDP sockets below 1024. Allows binding to ATM VCIs below 32.
249    NetBindService = CAP_NET_BIND_SERVICE,
250    /// Allow broadcasting, listen to multicast.
251    NetBroadcast = CAP_NET_BROADCAST,
252    /// Allow interface configuration. Allow administration of IP firewall, masquerading and
253    /// accounting. Allow setting debug option on sockets. Allow modification of routing tables.
254    /// Allow setting arbitrary process / process group ownership on sockets. Allow binding to any
255    /// address for transparent proxying (also via [`Capability::NetRaw`]). Allow setting TOS
256    /// (type of service). Allow setting promiscuous mode. Allow clearing driver statistics.
257    /// Allow multicasting. Allow read/write of device-specific registers. Allow activation of ATM
258    /// control sockets.
259    NetAdmin = CAP_NET_ADMIN,
260    /// Allow use of `RAW` sockets. Allow use of `PACKET` sockets. Allow binding to any address for
261    /// transparent proxying (also via [`Capability::NetAdmin`]).
262    NetRaw = CAP_NET_RAW,
263    /// Allow locking of shared memory segments. Allow mlock and mlockall (which doesn't really have
264    /// anything to do with IPC).
265    IPCLock = CAP_IPC_LOCK,
266    /// Override IPC ownership checks.
267    IPCOwner = CAP_IPC_OWNER,
268    /// Insert and remove kernel modules - modify kernel without limit.
269    SystemModule = CAP_SYS_MODULE,
270    /// Allow ioperm/iopl access. Allow sending USB messages to any device via `/dev/bus/usb`.
271    SystemRawIO = CAP_SYS_RAWIO,
272    /// Allow use of `chroot`.
273    SystemChangeRoot = CAP_SYS_CHROOT,
274    /// Allow `ptrace` of any process.
275    SystemProcessTrace = CAP_SYS_PTRACE,
276    /// Allow configuration of process accounting.
277    SystemProcessAccounting = CAP_SYS_PACCT,
278    /// Allow configuration of the secure attention key. Allow administration of the random device.
279    /// Allow examination and configuration of disk quotas. Allow setting the domainname.
280    /// Allow setting the hostname. Allow `mount` and `umount`, setting up new smb connection.
281    /// Allow some autofs root ioctls. Allow nfsservctl. Allow `VM86_REQUEST_IRQ`.
282    /// Allow to read/write pci config on alpha. Allow `irix_prctl` on mips (setstacksize).
283    /// Allow flushing all cache on m68k (`sys_cacheflush`). Allow removing semaphores.
284    /// Used instead of [`Capability::ChangeOwnership`] to "chown" IPC message queues, semaphores
285    /// and shared memory. Allow locking/unlocking of shared memory segment. Allow turning swap
286    /// on/off. Allow forged pids on socket credentials passing. Allow setting readahead and
287    /// flushing buffers on block devices. Allow setting geometry in floppy driver. Allow turning
288    /// DMA on/off in `xd` driver. Allow administration of md devices (mostly the above, but some
289    /// extra ioctls). Allow tuning the ide driver. Allow access to the nvram device. Allow
290    /// administration of `apm_bios`, serial and bttv (TV) device. Allow manufacturer commands in
291    /// isdn CAPI support driver. Allow reading non-standardized portions of pci configuration
292    /// space. Allow DDI debug ioctl on sbpcd driver. Allow setting up serial ports. Allow sending
293    /// raw qic-117 commands. Allow enabling/disabling tagged queuing on SCSI controllers and
294    /// sending arbitrary SCSI commands. Allow setting encryption key on loopback filesystem.
295    /// Allow setting zone reclaim policy. Allow everything under
296    /// [`Capability::BerkeleyPacketFilters`] and [`Capability::PerformanceMonitoring`] for backward
297    /// compatibility.
298    SystemAdmin = CAP_SYS_ADMIN,
299    /// Allow use of `reboot`.
300    SystemBoot = CAP_SYS_BOOT,
301    /// Allow raising priority and setting priority on other (different UID) processes. Allow use of
302    /// FIFO and round-robin (realtime) scheduling on own processes and setting the scheduling
303    /// algorithm used by another process. Allow setting cpu affinity on other processes.
304    /// Allow setting realtime ioprio class. Allow setting ioprio class on other processes.
305    SystemNice = CAP_SYS_NICE,
306    /// Override resource limits. Set resource limits. Override quota limits. Override reserved
307    /// space on ext2 filesystem. Modify data journaling mode on ext3 filesystem (uses journaling
308    /// resources). NOTE: ext2 honors fsuid when checking for resource overrides, so you can
309    /// override using fsuid too. Override size restrictions on IPC message queues. Allow more than
310    /// 64hz interrupts from the real-time clock. Override max number of consoles on console
311    /// allocation. Override max number of keymaps. Control memory reclaim behavior.
312    SystemResource = CAP_SYS_RESOURCE,
313    /// Allow manipulation of system clock. Allow `irix_stime` on mips. Allow setting the real-time
314    /// clock.
315    SystemTime = CAP_SYS_TIME,
316    /// Allow configuration of tty devices. Allow `vhangup` of tty.
317    SystemTTYConfig = CAP_SYS_TTY_CONFIG,
318    /// Allow the privileged aspects of `mknod`.
319    MakeNode = CAP_MKNOD,
320    /// Allow taking of leases on files.
321    Lease = CAP_LEASE,
322    /// Allow writing the audit log via unicast netlink socket.
323    AuditWrite = CAP_AUDIT_WRITE,
324    /// Allow configuration of audit via unicast netlink socket.
325    AuditControl = CAP_AUDIT_CONTROL,
326    /// Set or remove capabilities on files. Map `uid=0` into a child user namespace.
327    SetFileCapabilities = CAP_SETFCAP,
328    /// Override MAC access. The base kernel enforces no MAC policy. An LSM may enforce a MAC
329    /// policy, and if it does and it chooses to implement capability based overrides of that
330    /// policy, this is the capability it should use to do so.
331    MACOverride = CAP_MAC_OVERRIDE,
332    /// Allow MAC configuration or state changes. The base kernel requires no MAC configuration.
333    /// An LSM may enforce a MAC policy, and if it does and it chooses to implement capability based
334    /// checks on modifications to that policy or the data required to maintain it, this is the
335    /// capability it should use to do so.
336    MACAdmin = CAP_MAC_ADMIN,
337    /// Allow configuring the kernel's `syslog` (`printk` behaviour).
338    SystemLog = CAP_SYSLOG,
339    /// Allow triggering something that will wake the system.
340    WakeAlarm = CAP_WAKE_ALARM,
341    /// Allow preventing system suspends.
342    BlockSuspend = CAP_BLOCK_SUSPEND,
343    /// Allow reading the audit log via multicast netlink socket.
344    AuditRead = CAP_AUDIT_READ,
345    /// Allow system performance and observability privileged operations using `perf_events`,
346    /// `i915_perf` and other kernel subsystems.
347    PerformanceMonitoring = CAP_PERFMON,
348    /// This capability allows the following BPF operations:
349    /// - Creating all types of BPF maps
350    /// - Advanced verifier features
351    ///   - Indirect variable access
352    ///   - Bounded loops
353    ///   - BPF to BPF function calls
354    ///   - Scalar precision tracking
355    ///   - Larger complexity limits
356    ///   - Dead code elimination
357    ///   - And potentially other features
358    /// - Loading BPF Type Format (BTF) data
359    /// - Retrieve `xlated` and JITed code of BPF programs
360    /// - Use `bpf_spin_lock` helper
361    ///
362    /// [`Capability::PerformanceMonitoring`] relaxes the verifier checks further:
363    /// - BPF progs can use of pointer-to-integer conversions
364    /// - speculation attack hardening measures are bypassed
365    /// - `bpf_probe_read` to read arbitrary kernel memory is allowed
366    /// - `bpf_trace_printk` to print kernel memory is allowed
367    ///
368    /// [`Capability::SystemAdmin`] is required to use bpf_probe_write_user.
369    ///
370    /// [`Capability::SystemAdmin`] is required to iterate system wide loaded
371    /// programs, maps, links, BTFs and convert their IDs to file descriptors.
372    ///
373    /// [`Capability::PerformanceMonitoring`] and [`Capability::BerkeleyPacketFilters`] are required
374    /// to load tracing programs.
375    /// [`Capability::NetAdmin`] and [`Capability::BerkeleyPacketFilters`] are required to load
376    /// networking programs.
377    BerkeleyPacketFilters = CAP_BPF,
378    /// Allow checkpoint/restore related operations. Allow PID selection during `clone3`.
379    /// Allow writing to `ns_last_pid`.
380    CheckpointRestore = CAP_CHECKPOINT_RESTORE,
381}
382
383/// Check if the specified capability is in the calling thread's capability bounding set.
384///
385/// # References
386/// - [`prctl(PR_CAPBSET_READ,...)`]
387///
388/// [`prctl(PR_CAPBSET_READ,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
389#[inline]
390pub fn is_in_capability_bounding_set(capability: Capability) -> io::Result<bool> {
391    unsafe { prctl_2args(PR_CAPBSET_READ, capability as usize as *mut _) }.map(|r| r != 0)
392}
393
394const PR_CAPBSET_DROP: c_int = 24;
395
396/// If the calling thread has the [`Capability::SetPermittedCapabilities`] capability within its
397/// user namespace, then drop the specified capability from the thread's capability bounding set.
398///
399/// # References
400/// - [`prctl(PR_CAPBSET_DROP,...)`]
401///
402/// [`prctl(PR_CAPBSET_DROP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
403#[inline]
404pub fn remove_capability_from_capability_bounding_set(capability: Capability) -> io::Result<()> {
405    unsafe { prctl_2args(PR_CAPBSET_DROP, capability as usize as *mut _) }.map(|_r| ())
406}
407
408//
409// PR_GET_SECUREBITS/PR_SET_SECUREBITS
410//
411
412const PR_GET_SECUREBITS: c_int = 27;
413
414bitflags! {
415    /// `SECBIT_*`.
416    pub struct CapabilitiesSecureBits: u32 {
417        /// If this bit is set, then the kernel does not grant capabilities when
418        /// a `set-user-ID-root` program is executed, or when a process with an effective or real
419        /// UID of 0 calls `execve`.
420        const NO_ROOT = 1_u32 << 0;
421        /// Set [`NO_ROOT`] irreversibly.
422        const NO_ROOT_LOCKED = 1_u32 << 1;
423        /// Setting this flag stops the kernel from adjusting the process's permitted, effective,
424        /// and ambient capability sets when the thread's effective and filesystem UIDs are switched
425        /// between zero and nonzero values.
426        const NO_SETUID_FIXUP = 1_u32 << 2;
427        /// Set [`NO_SETUID_FIXUP`] irreversibly.
428        const NO_SETUID_FIXUP_LOCKED = 1_u32 << 3;
429        /// Setting this flag allows a thread that has one or more 0 UIDs to retain capabilities in
430        /// its permitted set when it switches all of its UIDs to nonzero values.
431        const KEEP_CAPS = 1_u32 << 4;
432        /// Set [`KEEP_CAPS`] irreversibly.
433        const KEEP_CAPS_LOCKED = 1_u32 << 5;
434        /// Setting this flag disallows raising ambient capabilities via the `prctl`'s
435        /// `PR_CAP_AMBIENT_RAISE` operation.
436        const NO_CAP_AMBIENT_RAISE = 1_u32 << 6;
437        /// Set [`NO_CAP_AMBIENT_RAISE`] irreversibly.
438        const NO_CAP_AMBIENT_RAISE_LOCKED = 1_u32 << 7;
439    }
440}
441
442/// Get the `securebits` flags of the calling thread.
443///
444/// # References
445/// - [`prctl(PR_GET_SECUREBITS,...)`]
446///
447/// [`prctl(PR_GET_SECUREBITS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
448#[inline]
449pub fn capabilities_secure_bits() -> io::Result<CapabilitiesSecureBits> {
450    let r = unsafe { prctl_1arg(PR_GET_SECUREBITS)? } as c_uint;
451    CapabilitiesSecureBits::from_bits(r).ok_or(io::Errno::RANGE)
452}
453
454const PR_SET_SECUREBITS: c_int = 28;
455
456/// Set the `securebits` flags of the calling thread.
457///
458/// # References
459/// - [`prctl(PR_SET_SECUREBITS,...)`]
460///
461/// [`prctl(PR_SET_SECUREBITS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
462#[inline]
463pub fn set_capabilities_secure_bits(bits: CapabilitiesSecureBits) -> io::Result<()> {
464    unsafe { prctl_2args(PR_SET_SECUREBITS, bits.bits() as usize as *mut _) }.map(|_r| ())
465}
466
467//
468// PR_GET_TIMERSLACK/PR_SET_TIMERSLACK
469//
470
471const PR_GET_TIMERSLACK: c_int = 30;
472
473/// Get the `current` timer slack value of the calling thread.
474///
475/// # References
476/// - [`prctl(PR_GET_TIMERSLACK,...)`]
477///
478/// [`prctl(PR_GET_TIMERSLACK,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
479#[inline]
480pub fn current_timer_slack() -> io::Result<u64> {
481    unsafe { prctl_1arg(PR_GET_TIMERSLACK) }.map(|r| r as u64)
482}
483
484const PR_SET_TIMERSLACK: c_int = 29;
485
486/// Sets the `current` timer slack value for the calling thread.
487///
488/// # References
489/// - [`prctl(PR_SET_TIMERSLACK,...)`]
490///
491/// [`prctl(PR_SET_TIMERSLACK,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
492#[inline]
493pub fn set_current_timer_slack(value: Option<NonZeroU64>) -> io::Result<()> {
494    let value = usize::try_from(value.map_or(0, NonZeroU64::get)).map_err(|_r| io::Errno::RANGE)?;
495    unsafe { prctl_2args(PR_SET_TIMERSLACK, value as *mut _) }.map(|_r| ())
496}
497
498//
499// PR_GET_NO_NEW_PRIVS/PR_SET_NO_NEW_PRIVS
500//
501
502const PR_GET_NO_NEW_PRIVS: c_int = 39;
503
504/// Get the value of the `no_new_privs` attribute for the calling thread.
505///
506/// # References
507/// - [`prctl(PR_GET_NO_NEW_PRIVS,...)`]
508///
509/// [`prctl(PR_GET_NO_NEW_PRIVS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
510#[inline]
511pub fn no_new_privs() -> io::Result<bool> {
512    unsafe { prctl_1arg(PR_GET_NO_NEW_PRIVS) }.map(|r| r != 0)
513}
514
515const PR_SET_NO_NEW_PRIVS: c_int = 38;
516
517/// Set the calling thread's `no_new_privs` attribute.
518///
519/// # References
520/// - [`prctl(PR_SET_NO_NEW_PRIVS,...)`]
521///
522/// [`prctl(PR_SET_NO_NEW_PRIVS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
523#[inline]
524pub fn set_no_new_privs(no_new_privs: bool) -> io::Result<()> {
525    unsafe { prctl_2args(PR_SET_NO_NEW_PRIVS, no_new_privs as usize as *mut _) }.map(|_r| ())
526}
527
528//
529// PR_GET_TID_ADDRESS
530//
531
532const PR_GET_TID_ADDRESS: c_int = 40;
533
534/// Get the `clear_child_tid` address set by `set_tid_address`
535/// and `clone`'s `CLONE_CHILD_CLEARTID` flag.
536///
537/// # References
538/// - [`prctl(PR_GET_TID_ADDRESS,...)`]
539///
540/// [`prctl(PR_GET_TID_ADDRESS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
541#[inline]
542pub fn get_clear_child_tid_address() -> io::Result<Option<NonNull<c_void>>> {
543    unsafe { prctl_get_at_arg2_optional::<*mut c_void>(PR_GET_TID_ADDRESS) }.map(NonNull::new)
544}
545
546//
547// PR_GET_THP_DISABLE/PR_SET_THP_DISABLE
548//
549
550const PR_GET_THP_DISABLE: c_int = 42;
551
552/// Get the current setting of the `THP disable` flag for the calling thread.
553///
554/// # References
555/// - [`prctl(PR_GET_THP_DISABLE,...)`]
556///
557/// [`prctl(PR_GET_THP_DISABLE,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
558#[inline]
559pub fn transparent_huge_pages_are_disabled() -> io::Result<bool> {
560    unsafe { prctl_1arg(PR_GET_THP_DISABLE) }.map(|r| r != 0)
561}
562
563const PR_SET_THP_DISABLE: c_int = 41;
564
565/// Set the state of the `THP disable` flag for the calling thread.
566///
567/// # References
568/// - [`prctl(PR_SET_THP_DISABLE,...)`]
569///
570/// [`prctl(PR_SET_THP_DISABLE,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
571#[inline]
572pub fn disable_transparent_huge_pages(thp_disable: bool) -> io::Result<()> {
573    unsafe { prctl_2args(PR_SET_THP_DISABLE, thp_disable as usize as *mut _) }.map(|_r| ())
574}
575
576//
577// PR_CAP_AMBIENT
578//
579
580const PR_CAP_AMBIENT: c_int = 47;
581
582const PR_CAP_AMBIENT_IS_SET: usize = 1;
583
584/// Check if the specified capability is in the ambient set.
585///
586/// # References
587/// - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,...)`]
588///
589/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
590#[inline]
591pub fn capability_is_in_ambient_capability_set(capability: Capability) -> io::Result<bool> {
592    let cap = capability as usize as *mut _;
593    unsafe { prctl_3args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET as *mut _, cap) }.map(|r| r != 0)
594}
595
596const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4;
597
598/// Remove all capabilities from the ambient set.
599///
600/// # References
601/// - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,...)`]
602///
603/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
604#[inline]
605pub fn clear_ambient_capability_set() -> io::Result<()> {
606    unsafe { prctl_2args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL as *mut _) }.map(|_r| ())
607}
608
609const PR_CAP_AMBIENT_RAISE: usize = 2;
610const PR_CAP_AMBIENT_LOWER: usize = 3;
611
612/// Add or remove the specified capability to the ambient set.
613///
614/// # References
615/// - [`prctl(PR_CAP_AMBIENT,...)`]
616///
617/// [`prctl(PR_CAP_AMBIENT,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
618#[inline]
619pub fn configure_capability_in_ambient_capability_set(
620    capability: Capability,
621    enable: bool,
622) -> io::Result<()> {
623    let sub_operation = if enable {
624        PR_CAP_AMBIENT_RAISE
625    } else {
626        PR_CAP_AMBIENT_LOWER
627    };
628    let cap = capability as usize as *mut _;
629
630    unsafe { prctl_3args(PR_CAP_AMBIENT, sub_operation as *mut _, cap) }.map(|_r| ())
631}
632
633//
634// PR_SVE_GET_VL/PR_SVE_SET_VL
635//
636
637const PR_SVE_GET_VL: c_int = 51;
638
639const PR_SVE_VL_LEN_MASK: u32 = 0xffff;
640const PR_SVE_VL_INHERIT: u32 = 1_u32 << 17;
641
642/// Scalable Vector Extension vector length configuration.
643#[derive(Copy, Clone, Debug, Eq, PartialEq)]
644pub struct SVEVectorLengthConfig {
645    /// Vector length in bytes.
646    pub vector_length_in_bytes: u32,
647    /// Vector length inherited across `execve`.
648    pub vector_length_inherited_across_execve: bool,
649}
650
651/// Get the thread's current SVE vector length configuration.
652///
653/// # References
654/// - [`prctl(PR_SVE_GET_VL,...)`]
655///
656/// [`prctl(PR_SVE_GET_VL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
657#[inline]
658pub fn sve_vector_length_configuration() -> io::Result<SVEVectorLengthConfig> {
659    let bits = unsafe { prctl_1arg(PR_SVE_GET_VL)? } as c_uint;
660    Ok(SVEVectorLengthConfig {
661        vector_length_in_bytes: bits & PR_SVE_VL_LEN_MASK,
662        vector_length_inherited_across_execve: (bits & PR_SVE_VL_INHERIT) != 0,
663    })
664}
665
666const PR_SVE_SET_VL: c_int = 50;
667
668const PR_SVE_SET_VL_ONEXEC: u32 = 1_u32 << 18;
669
670/// Configure the thread's vector length of Scalable Vector Extension.
671///
672/// # References
673/// - [`prctl(PR_SVE_SET_VL,...)`]
674///
675/// # Safety
676///
677/// Please ensure the conditions necessary to safely call this function,
678/// as detailed in the references above.
679///
680/// [`prctl(PR_SVE_SET_VL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
681#[inline]
682pub unsafe fn set_sve_vector_length_configuration(
683    vector_length_in_bytes: usize,
684    vector_length_inherited_across_execve: bool,
685    defer_change_to_next_execve: bool,
686) -> io::Result<()> {
687    let vector_length_in_bytes =
688        u32::try_from(vector_length_in_bytes).map_err(|_r| io::Errno::RANGE)?;
689
690    let mut bits = vector_length_in_bytes & PR_SVE_VL_LEN_MASK;
691
692    if vector_length_inherited_across_execve {
693        bits |= PR_SVE_VL_INHERIT;
694    }
695
696    if defer_change_to_next_execve {
697        bits |= PR_SVE_SET_VL_ONEXEC;
698    }
699
700    prctl_2args(PR_SVE_SET_VL, bits as usize as *mut _).map(|_r| ())
701}
702
703//
704// PR_PAC_RESET_KEYS
705//
706
707const PR_PAC_RESET_KEYS: c_int = 54;
708
709/// Securely reset the thread's pointer authentication keys to fresh random values generated
710/// by the kernel.
711///
712/// # References
713/// - [`prctl(PR_PAC_RESET_KEYS,...)`]
714///
715/// # Safety
716///
717/// Please ensure the conditions necessary to safely call this function,
718/// as detailed in the references above.
719///
720/// [`prctl(PR_PAC_RESET_KEYS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
721#[inline]
722pub unsafe fn reset_pointer_authentication_keys(
723    keys: Option<PointerAuthenticationKeys>,
724) -> io::Result<()> {
725    let keys = keys.as_ref().map_or(0_u32, PointerAuthenticationKeys::bits);
726    prctl_2args(PR_PAC_RESET_KEYS, keys as usize as *mut _).map(|_r| ())
727}
728
729//
730// PR_GET_TAGGED_ADDR_CTRL/PR_SET_TAGGED_ADDR_CTRL
731//
732
733const PR_GET_TAGGED_ADDR_CTRL: c_int = 56;
734
735const PR_MTE_TAG_SHIFT: u32 = 3;
736const PR_MTE_TAG_MASK: u32 = 0xffff_u32 << PR_MTE_TAG_SHIFT;
737
738bitflags! {
739    /// Zero means addresses that are passed for the purpose of being dereferenced by the kernel must be untagged.
740    pub struct TaggedAddressMode: u32 {
741        /// Addresses that are passed for the purpose of being dereferenced by the kernel may be tagged.
742        const ENABLED = 1_u32 << 0;
743        /// Synchronous tag check fault mode.
744        const TCF_SYNC = 1_u32 << 1;
745        /// Asynchronous tag check fault mode.
746        const TCF_ASYNC = 1_u32 << 2;
747    }
748}
749
750/// Get the current tagged address mode for the calling thread.
751///
752/// # References
753/// - [`prctl(PR_GET_TAGGED_ADDR_CTRL,...)`]
754///
755/// [`prctl(PR_GET_TAGGED_ADDR_CTRL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
756#[inline]
757pub fn current_tagged_address_mode() -> io::Result<(Option<TaggedAddressMode>, u32)> {
758    let r = unsafe { prctl_1arg(PR_GET_TAGGED_ADDR_CTRL)? } as c_uint;
759    let mode = r & 0b111_u32;
760    let mte_tag = (r & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT;
761    Ok((TaggedAddressMode::from_bits(mode), mte_tag))
762}
763
764const PR_SET_TAGGED_ADDR_CTRL: c_int = 55;
765
766/// Controls support for passing tagged user-space addresses to the kernel.
767///
768/// # References
769/// - [`prctl(PR_SET_TAGGED_ADDR_CTRL,...)`]
770///
771/// # Safety
772///
773/// Please ensure the conditions necessary to safely call this function,
774/// as detailed in the references above.
775///
776/// [`prctl(PR_SET_TAGGED_ADDR_CTRL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
777#[inline]
778pub unsafe fn set_current_tagged_address_mode(
779    mode: Option<TaggedAddressMode>,
780    mte_tag: u32,
781) -> io::Result<()> {
782    let config = mode.as_ref().map_or(0_u32, TaggedAddressMode::bits)
783        | ((mte_tag << PR_MTE_TAG_SHIFT) & PR_MTE_TAG_MASK);
784    prctl_2args(PR_SET_TAGGED_ADDR_CTRL, config as usize as *mut _).map(|_r| ())
785}
786
787//
788// PR_SET_SYSCALL_USER_DISPATCH
789//
790
791const PR_SET_SYSCALL_USER_DISPATCH: c_int = 59;
792
793const PR_SYS_DISPATCH_OFF: usize = 0;
794
795/// Disable Syscall User Dispatch mechanism.
796///
797/// # References
798/// - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,...)`]
799///
800/// # Safety
801///
802/// Please ensure the conditions necessary to safely call this function,
803/// as detailed in the references above.
804///
805/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
806#[inline]
807pub unsafe fn disable_syscall_user_dispatch() -> io::Result<()> {
808    prctl_2args(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF as *mut _).map(|_r| ())
809}
810
811const PR_SYS_DISPATCH_ON: usize = 1;
812
813/// Allow system calls to be executed.
814const SYSCALL_DISPATCH_FILTER_ALLOW: u8 = 0;
815/// Block system calls from executing.
816const SYSCALL_DISPATCH_FILTER_BLOCK: u8 = 1;
817
818/// Value of the fast switch flag controlling system calls user dispatch mechanism without the need
819/// to issue a syscall.
820#[derive(Copy, Clone, Debug, Eq, PartialEq)]
821#[repr(u8)]
822pub enum SysCallUserDispatchFastSwitch {
823    /// System calls are allowed to execute.
824    Allow = SYSCALL_DISPATCH_FILTER_ALLOW,
825    /// System calls are blocked from executing.
826    Block = SYSCALL_DISPATCH_FILTER_BLOCK,
827}
828
829impl TryFrom<u8> for SysCallUserDispatchFastSwitch {
830    type Error = io::Errno;
831
832    fn try_from(value: u8) -> Result<Self, Self::Error> {
833        match value {
834            SYSCALL_DISPATCH_FILTER_ALLOW => Ok(Self::Allow),
835            SYSCALL_DISPATCH_FILTER_BLOCK => Ok(Self::Block),
836            _ => Err(io::Errno::RANGE),
837        }
838    }
839}
840
841/// Enable Syscall User Dispatch mechanism.
842///
843/// # References
844/// - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,...)`]
845///
846/// # Safety
847///
848/// Please ensure the conditions necessary to safely call this function,
849/// as detailed in the references above.
850///
851/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html
852#[inline]
853pub unsafe fn enable_syscall_user_dispatch(
854    always_allowed_region: &[u8],
855    fast_switch_flag: &AtomicU8,
856) -> io::Result<()> {
857    syscalls::prctl(
858        PR_SET_SYSCALL_USER_DISPATCH,
859        PR_SYS_DISPATCH_ON as *mut _,
860        always_allowed_region.as_ptr() as *mut _,
861        always_allowed_region.len() as *mut _,
862        fast_switch_flag as *const AtomicU8 as *mut _,
863    )
864    .map(|_r| ())
865}
866
867//
868// PR_SCHED_CORE
869//
870
871const PR_SCHED_CORE: c_int = 62;
872
873const PR_SCHED_CORE_GET: usize = 0;
874
875const PR_SCHED_CORE_SCOPE_THREAD: u32 = 0;
876const PR_SCHED_CORE_SCOPE_THREAD_GROUP: u32 = 1;
877const PR_SCHED_CORE_SCOPE_PROCESS_GROUP: u32 = 2;
878
879/// `PR_SCHED_CORE_SCOPE_*`.
880#[derive(Copy, Clone, Debug, Eq, PartialEq)]
881#[repr(u32)]
882pub enum CoreSchedulingScope {
883    /// Operation will be performed for the thread.
884    Thread = PR_SCHED_CORE_SCOPE_THREAD,
885    /// Operation will be performed for all tasks in the task group of the process.
886    ThreadGroup = PR_SCHED_CORE_SCOPE_THREAD_GROUP,
887    /// Operation will be performed for all processes in the process group.
888    ProcessGroup = PR_SCHED_CORE_SCOPE_PROCESS_GROUP,
889}
890
891impl TryFrom<u32> for CoreSchedulingScope {
892    type Error = io::Errno;
893
894    fn try_from(value: u32) -> Result<Self, Self::Error> {
895        match value {
896            PR_SCHED_CORE_SCOPE_THREAD => Ok(Self::Thread),
897            PR_SCHED_CORE_SCOPE_THREAD_GROUP => Ok(Self::ThreadGroup),
898            PR_SCHED_CORE_SCOPE_PROCESS_GROUP => Ok(Self::ProcessGroup),
899            _ => Err(io::Errno::RANGE),
900        }
901    }
902}
903
904/// Get core scheduling cookie of a process.
905///
906/// # References
907/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,...)`]
908///
909/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html
910#[inline]
911pub fn core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<u64> {
912    let mut value: MaybeUninit<u64> = MaybeUninit::uninit();
913    unsafe {
914        syscalls::prctl(
915            PR_SCHED_CORE,
916            PR_SCHED_CORE_GET as *mut _,
917            pid.as_raw_nonzero().get() as usize as *mut _,
918            scope as usize as *mut _,
919            value.as_mut_ptr().cast(),
920        )?;
921        Ok(value.assume_init())
922    }
923}
924
925const PR_SCHED_CORE_CREATE: usize = 1;
926
927/// Create unique core scheduling cookie.
928///
929/// # References
930/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,...)`]
931///
932/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html
933#[inline]
934pub fn create_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
935    unsafe {
936        syscalls::prctl(
937            PR_SCHED_CORE,
938            PR_SCHED_CORE_CREATE as *mut _,
939            pid.as_raw_nonzero().get() as usize as *mut _,
940            scope as usize as *mut _,
941            ptr::null_mut(),
942        )
943        .map(|_r| ())
944    }
945}
946
947const PR_SCHED_CORE_SHARE_TO: usize = 2;
948
949/// Push core scheduling cookie to a process.
950///
951/// # References
952/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,...)`]
953///
954/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html
955#[inline]
956pub fn push_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
957    unsafe {
958        syscalls::prctl(
959            PR_SCHED_CORE,
960            PR_SCHED_CORE_SHARE_TO as *mut _,
961            pid.as_raw_nonzero().get() as usize as *mut _,
962            scope as usize as *mut _,
963            ptr::null_mut(),
964        )
965        .map(|_r| ())
966    }
967}
968
969const PR_SCHED_CORE_SHARE_FROM: usize = 3;
970
971/// Pull core scheduling cookie from a process.
972///
973/// # References
974/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,...)`]
975///
976/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html
977#[inline]
978pub fn pull_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> {
979    unsafe {
980        syscalls::prctl(
981            PR_SCHED_CORE,
982            PR_SCHED_CORE_SHARE_FROM as *mut _,
983            pid.as_raw_nonzero().get() as usize as *mut _,
984            scope as usize as *mut _,
985            ptr::null_mut(),
986        )
987        .map(|_r| ())
988    }
989}
990