1#![allow(unsafe_code)] 2 3use core::convert::TryFrom; 4use core::mem::MaybeUninit; 5use core::num::NonZeroU64; 6use core::ptr; 7use core::ptr::NonNull; 8use core::sync::atomic::AtomicU8; 9 10use bitflags::bitflags; 11 12use crate::backend::c::{c_int, c_uint, c_void}; 13use crate::backend::process::syscalls; 14use crate::ffi::{CStr, CString}; 15use crate::io; 16use crate::process::{ 17 prctl_1arg, prctl_2args, prctl_3args, prctl_get_at_arg2_optional, Pid, 18 PointerAuthenticationKeys, 19}; 20 21// 22// PR_GET_KEEPCAPS/PR_SET_KEEPCAPS 23// 24 25const PR_GET_KEEPCAPS: c_int = 7; 26 27/// Get the current state of the calling thread's `keep capabilities` flag. 28/// 29/// # References 30/// - [`prctl(PR_GET_KEEPCAPS,...)`] 31/// 32/// [`prctl(PR_GET_KEEPCAPS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 33#[inline] 34pub fn get_keep_capabilities() -> io::Result<bool> { 35 unsafe { prctl_1arg(PR_GET_KEEPCAPS) }.map(|r| r != 0) 36} 37 38const PR_SET_KEEPCAPS: c_int = 8; 39 40/// Set the state of the calling thread's `keep capabilities` flag. 41/// 42/// # References 43/// - [`prctl(PR_SET_KEEPCAPS,...)`] 44/// 45/// [`prctl(PR_SET_KEEPCAPS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 46#[inline] 47pub fn set_keep_capabilities(enable: bool) -> io::Result<()> { 48 unsafe { prctl_2args(PR_SET_KEEPCAPS, enable as usize as *mut _) }.map(|_r| ()) 49} 50 51// 52// PR_GET_NAME/PR_SET_NAME 53// 54 55const PR_GET_NAME: c_int = 16; 56 57/// Get the name of the calling thread. 58/// 59/// # References 60/// - [`prctl(PR_GET_NAME,...)`] 61/// 62/// [`prctl(PR_GET_NAME,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 63#[inline] 64pub fn name() -> io::Result<CString> { 65 let mut buffer = [0_u8; 16]; 66 unsafe { prctl_2args(PR_GET_NAME, buffer.as_mut_ptr().cast())? }; 67 68 let len = buffer.iter().position(|&x| x == 0_u8).unwrap_or(0); 69 CString::new(&buffer[..len]).map_err(|_r| io::Errno::ILSEQ) 70} 71 72const PR_SET_NAME: c_int = 15; 73 74/// Set the name of the calling thread. 75/// 76/// # References 77/// - [`prctl(PR_SET_NAME,...)`] 78/// 79/// [`prctl(PR_SET_NAME,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 80#[inline] 81pub fn set_name(name: &CStr) -> io::Result<()> { 82 unsafe { prctl_2args(PR_SET_NAME, name.as_ptr() as *mut _) }.map(|_r| ()) 83} 84 85// 86// PR_GET_SECCOMP/PR_SET_SECCOMP 87// 88 89//const PR_GET_SECCOMP: c_int = 21; 90 91const SECCOMP_MODE_DISABLED: i32 = 0; 92const SECCOMP_MODE_STRICT: i32 = 1; 93const SECCOMP_MODE_FILTER: i32 = 2; 94 95/// `SECCOMP_MODE_*`. 96#[derive(Copy, Clone, Debug, Eq, PartialEq)] 97#[repr(i32)] 98pub enum SecureComputingMode { 99 /// Secure computing is not in use. 100 Disabled = SECCOMP_MODE_DISABLED, 101 /// Use hard-coded filter. 102 Strict = SECCOMP_MODE_STRICT, 103 /// Use user-supplied filter. 104 Filter = SECCOMP_MODE_FILTER, 105} 106 107impl TryFrom<i32> for SecureComputingMode { 108 type Error = io::Errno; 109 110 fn try_from(value: i32) -> Result<Self, Self::Error> { 111 match value { 112 SECCOMP_MODE_DISABLED => Ok(Self::Disabled), 113 SECCOMP_MODE_STRICT => Ok(Self::Strict), 114 SECCOMP_MODE_FILTER => Ok(Self::Filter), 115 _ => Err(io::Errno::RANGE), 116 } 117 } 118} 119 120/* 121/// Get the secure computing mode of the calling thread. 122/// 123/// If the caller is not in secure computing mode, this returns [`SecureComputingMode::Disabled`]. 124/// If the caller is in strict secure computing mode, then this call will cause a `SIGKILL` signal 125/// to be sent to the process. 126/// If the caller is in filter mode, and this system call is allowed by the seccomp filters, 127/// it returns [`SecureComputingMode::Filter`]; otherwise, the process is killed with 128/// a `SIGKILL` signal. 129/// 130/// Since Linux 3.8, the Seccomp field of the `/proc/[pid]/status` file provides a method 131/// of obtaining the same information, without the risk that the process is killed; see `proc(5)`. 132/// 133/// # References 134/// - [`prctl(PR_GET_SECCOMP,...)`] 135/// 136/// [`prctl(PR_GET_SECCOMP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 137#[inline] 138pub fn secure_computing_mode() -> io::Result<SecureComputingMode> { 139 unsafe { prctl_1arg(PR_GET_SECCOMP) }.and_then(TryInto::try_into) 140} 141*/ 142 143const PR_SET_SECCOMP: c_int = 22; 144 145/// Set the secure computing mode for the calling thread, to limit the available system calls. 146/// 147/// # References 148/// - [`prctl(PR_SET_SECCOMP,...)`] 149/// 150/// [`prctl(PR_SET_SECCOMP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 151#[inline] 152pub fn set_secure_computing_mode(mode: SecureComputingMode) -> io::Result<()> { 153 unsafe { prctl_2args(PR_SET_SECCOMP, mode as usize as *mut _) }.map(|_r| ()) 154} 155 156// 157// PR_CAPBSET_READ/PR_CAPBSET_DROP 158// 159 160const PR_CAPBSET_READ: c_int = 23; 161 162const CAP_CHOWN: u32 = 0; 163const CAP_DAC_OVERRIDE: u32 = 1; 164const CAP_DAC_READ_SEARCH: u32 = 2; 165const CAP_FOWNER: u32 = 3; 166const CAP_FSETID: u32 = 4; 167const CAP_KILL: u32 = 5; 168const CAP_SETGID: u32 = 6; 169const CAP_SETUID: u32 = 7; 170const CAP_SETPCAP: u32 = 8; 171const CAP_LINUX_IMMUTABLE: u32 = 9; 172const CAP_NET_BIND_SERVICE: u32 = 10; 173const CAP_NET_BROADCAST: u32 = 11; 174const CAP_NET_ADMIN: u32 = 12; 175const CAP_NET_RAW: u32 = 13; 176const CAP_IPC_LOCK: u32 = 14; 177const CAP_IPC_OWNER: u32 = 15; 178const CAP_SYS_MODULE: u32 = 16; 179const CAP_SYS_RAWIO: u32 = 17; 180const CAP_SYS_CHROOT: u32 = 18; 181const CAP_SYS_PTRACE: u32 = 19; 182const CAP_SYS_PACCT: u32 = 20; 183const CAP_SYS_ADMIN: u32 = 21; 184const CAP_SYS_BOOT: u32 = 22; 185const CAP_SYS_NICE: u32 = 23; 186const CAP_SYS_RESOURCE: u32 = 24; 187const CAP_SYS_TIME: u32 = 25; 188const CAP_SYS_TTY_CONFIG: u32 = 26; 189const CAP_MKNOD: u32 = 27; 190const CAP_LEASE: u32 = 28; 191const CAP_AUDIT_WRITE: u32 = 29; 192const CAP_AUDIT_CONTROL: u32 = 30; 193const CAP_SETFCAP: u32 = 31; 194const CAP_MAC_OVERRIDE: u32 = 32; 195const CAP_MAC_ADMIN: u32 = 33; 196const CAP_SYSLOG: u32 = 34; 197const CAP_WAKE_ALARM: u32 = 35; 198const CAP_BLOCK_SUSPEND: u32 = 36; 199const CAP_AUDIT_READ: u32 = 37; 200const CAP_PERFMON: u32 = 38; 201const CAP_BPF: u32 = 39; 202const CAP_CHECKPOINT_RESTORE: u32 = 40; 203 204/// Linux per-thread capability. 205#[derive(Copy, Clone, Debug, Eq, PartialEq)] 206#[repr(u32)] 207pub enum Capability { 208 /// In a system with the `_POSIX_CHOWN_RESTRICTED` option defined, this overrides 209 /// the restriction of changing file ownership and group ownership. 210 ChangeOwnership = CAP_CHOWN, 211 /// Override all DAC access, including ACL execute access if `_POSIX_ACL` is defined. 212 /// Excluding DAC access covered by [`Capability::LinuxImmutable`]. 213 DACOverride = CAP_DAC_OVERRIDE, 214 /// Overrides all DAC restrictions regarding read and search on files and directories, 215 /// including ACL restrictions if `_POSIX_ACL` is defined. Excluding DAC access covered 216 /// by [`Capability::LinuxImmutable`]. 217 DACReadSearch = CAP_DAC_READ_SEARCH, 218 /// Overrides all restrictions about allowed operations on files, where file owner ID must be 219 /// equal to the user ID, except where [`Capability::FileSetID`] is applicable. 220 /// It doesn't override MAC and DAC restrictions. 221 FileOwner = CAP_FOWNER, 222 /// Overrides the following restrictions that the effective user ID shall match the file owner 223 /// ID when setting the `S_ISUID` and `S_ISGID` bits on that file; that the effective group ID 224 /// (or one of the supplementary group IDs) shall match the file owner ID when setting the 225 /// `S_ISGID` bit on that file; that the `S_ISUID` and `S_ISGID` bits are cleared on successful 226 /// return from `chown` (not implemented). 227 FileSetID = CAP_FSETID, 228 /// Overrides the restriction that the real or effective user ID of a process sending a signal 229 /// must match the real or effective user ID of the process receiving the signal. 230 Kill = CAP_KILL, 231 /// Allows `setgid` manipulation. Allows `setgroups`. Allows forged gids on socket 232 /// credentials passing. 233 SetGroupID = CAP_SETGID, 234 /// Allows `set*uid` manipulation (including fsuid). Allows forged pids on socket 235 /// credentials passing. 236 SetUserID = CAP_SETUID, 237 /// Without VFS support for capabilities: 238 /// - Transfer any capability in your permitted set to any pid. 239 /// - remove any capability in your permitted set from any pid. 240 /// With VFS support for capabilities (neither of above, but) 241 /// - Add any capability from current's capability bounding set to the current process' 242 /// inheritable set. 243 /// - Allow taking bits out of capability bounding set. 244 /// - Allow modification of the securebits for a process. 245 SetPermittedCapabilities = CAP_SETPCAP, 246 /// Allow modification of `S_IMMUTABLE` and `S_APPEND` file attributes. 247 LinuxImmutable = CAP_LINUX_IMMUTABLE, 248 /// Allows binding to TCP/UDP sockets below 1024. Allows binding to ATM VCIs below 32. 249 NetBindService = CAP_NET_BIND_SERVICE, 250 /// Allow broadcasting, listen to multicast. 251 NetBroadcast = CAP_NET_BROADCAST, 252 /// Allow interface configuration. Allow administration of IP firewall, masquerading and 253 /// accounting. Allow setting debug option on sockets. Allow modification of routing tables. 254 /// Allow setting arbitrary process / process group ownership on sockets. Allow binding to any 255 /// address for transparent proxying (also via [`Capability::NetRaw`]). Allow setting TOS 256 /// (type of service). Allow setting promiscuous mode. Allow clearing driver statistics. 257 /// Allow multicasting. Allow read/write of device-specific registers. Allow activation of ATM 258 /// control sockets. 259 NetAdmin = CAP_NET_ADMIN, 260 /// Allow use of `RAW` sockets. Allow use of `PACKET` sockets. Allow binding to any address for 261 /// transparent proxying (also via [`Capability::NetAdmin`]). 262 NetRaw = CAP_NET_RAW, 263 /// Allow locking of shared memory segments. Allow mlock and mlockall (which doesn't really have 264 /// anything to do with IPC). 265 IPCLock = CAP_IPC_LOCK, 266 /// Override IPC ownership checks. 267 IPCOwner = CAP_IPC_OWNER, 268 /// Insert and remove kernel modules - modify kernel without limit. 269 SystemModule = CAP_SYS_MODULE, 270 /// Allow ioperm/iopl access. Allow sending USB messages to any device via `/dev/bus/usb`. 271 SystemRawIO = CAP_SYS_RAWIO, 272 /// Allow use of `chroot`. 273 SystemChangeRoot = CAP_SYS_CHROOT, 274 /// Allow `ptrace` of any process. 275 SystemProcessTrace = CAP_SYS_PTRACE, 276 /// Allow configuration of process accounting. 277 SystemProcessAccounting = CAP_SYS_PACCT, 278 /// Allow configuration of the secure attention key. Allow administration of the random device. 279 /// Allow examination and configuration of disk quotas. Allow setting the domainname. 280 /// Allow setting the hostname. Allow `mount` and `umount`, setting up new smb connection. 281 /// Allow some autofs root ioctls. Allow nfsservctl. Allow `VM86_REQUEST_IRQ`. 282 /// Allow to read/write pci config on alpha. Allow `irix_prctl` on mips (setstacksize). 283 /// Allow flushing all cache on m68k (`sys_cacheflush`). Allow removing semaphores. 284 /// Used instead of [`Capability::ChangeOwnership`] to "chown" IPC message queues, semaphores 285 /// and shared memory. Allow locking/unlocking of shared memory segment. Allow turning swap 286 /// on/off. Allow forged pids on socket credentials passing. Allow setting readahead and 287 /// flushing buffers on block devices. Allow setting geometry in floppy driver. Allow turning 288 /// DMA on/off in `xd` driver. Allow administration of md devices (mostly the above, but some 289 /// extra ioctls). Allow tuning the ide driver. Allow access to the nvram device. Allow 290 /// administration of `apm_bios`, serial and bttv (TV) device. Allow manufacturer commands in 291 /// isdn CAPI support driver. Allow reading non-standardized portions of pci configuration 292 /// space. Allow DDI debug ioctl on sbpcd driver. Allow setting up serial ports. Allow sending 293 /// raw qic-117 commands. Allow enabling/disabling tagged queuing on SCSI controllers and 294 /// sending arbitrary SCSI commands. Allow setting encryption key on loopback filesystem. 295 /// Allow setting zone reclaim policy. Allow everything under 296 /// [`Capability::BerkeleyPacketFilters`] and [`Capability::PerformanceMonitoring`] for backward 297 /// compatibility. 298 SystemAdmin = CAP_SYS_ADMIN, 299 /// Allow use of `reboot`. 300 SystemBoot = CAP_SYS_BOOT, 301 /// Allow raising priority and setting priority on other (different UID) processes. Allow use of 302 /// FIFO and round-robin (realtime) scheduling on own processes and setting the scheduling 303 /// algorithm used by another process. Allow setting cpu affinity on other processes. 304 /// Allow setting realtime ioprio class. Allow setting ioprio class on other processes. 305 SystemNice = CAP_SYS_NICE, 306 /// Override resource limits. Set resource limits. Override quota limits. Override reserved 307 /// space on ext2 filesystem. Modify data journaling mode on ext3 filesystem (uses journaling 308 /// resources). NOTE: ext2 honors fsuid when checking for resource overrides, so you can 309 /// override using fsuid too. Override size restrictions on IPC message queues. Allow more than 310 /// 64hz interrupts from the real-time clock. Override max number of consoles on console 311 /// allocation. Override max number of keymaps. Control memory reclaim behavior. 312 SystemResource = CAP_SYS_RESOURCE, 313 /// Allow manipulation of system clock. Allow `irix_stime` on mips. Allow setting the real-time 314 /// clock. 315 SystemTime = CAP_SYS_TIME, 316 /// Allow configuration of tty devices. Allow `vhangup` of tty. 317 SystemTTYConfig = CAP_SYS_TTY_CONFIG, 318 /// Allow the privileged aspects of `mknod`. 319 MakeNode = CAP_MKNOD, 320 /// Allow taking of leases on files. 321 Lease = CAP_LEASE, 322 /// Allow writing the audit log via unicast netlink socket. 323 AuditWrite = CAP_AUDIT_WRITE, 324 /// Allow configuration of audit via unicast netlink socket. 325 AuditControl = CAP_AUDIT_CONTROL, 326 /// Set or remove capabilities on files. Map `uid=0` into a child user namespace. 327 SetFileCapabilities = CAP_SETFCAP, 328 /// Override MAC access. The base kernel enforces no MAC policy. An LSM may enforce a MAC 329 /// policy, and if it does and it chooses to implement capability based overrides of that 330 /// policy, this is the capability it should use to do so. 331 MACOverride = CAP_MAC_OVERRIDE, 332 /// Allow MAC configuration or state changes. The base kernel requires no MAC configuration. 333 /// An LSM may enforce a MAC policy, and if it does and it chooses to implement capability based 334 /// checks on modifications to that policy or the data required to maintain it, this is the 335 /// capability it should use to do so. 336 MACAdmin = CAP_MAC_ADMIN, 337 /// Allow configuring the kernel's `syslog` (`printk` behaviour). 338 SystemLog = CAP_SYSLOG, 339 /// Allow triggering something that will wake the system. 340 WakeAlarm = CAP_WAKE_ALARM, 341 /// Allow preventing system suspends. 342 BlockSuspend = CAP_BLOCK_SUSPEND, 343 /// Allow reading the audit log via multicast netlink socket. 344 AuditRead = CAP_AUDIT_READ, 345 /// Allow system performance and observability privileged operations using `perf_events`, 346 /// `i915_perf` and other kernel subsystems. 347 PerformanceMonitoring = CAP_PERFMON, 348 /// This capability allows the following BPF operations: 349 /// - Creating all types of BPF maps 350 /// - Advanced verifier features 351 /// - Indirect variable access 352 /// - Bounded loops 353 /// - BPF to BPF function calls 354 /// - Scalar precision tracking 355 /// - Larger complexity limits 356 /// - Dead code elimination 357 /// - And potentially other features 358 /// - Loading BPF Type Format (BTF) data 359 /// - Retrieve `xlated` and JITed code of BPF programs 360 /// - Use `bpf_spin_lock` helper 361 /// 362 /// [`Capability::PerformanceMonitoring`] relaxes the verifier checks further: 363 /// - BPF progs can use of pointer-to-integer conversions 364 /// - speculation attack hardening measures are bypassed 365 /// - `bpf_probe_read` to read arbitrary kernel memory is allowed 366 /// - `bpf_trace_printk` to print kernel memory is allowed 367 /// 368 /// [`Capability::SystemAdmin`] is required to use bpf_probe_write_user. 369 /// 370 /// [`Capability::SystemAdmin`] is required to iterate system wide loaded 371 /// programs, maps, links, BTFs and convert their IDs to file descriptors. 372 /// 373 /// [`Capability::PerformanceMonitoring`] and [`Capability::BerkeleyPacketFilters`] are required 374 /// to load tracing programs. 375 /// [`Capability::NetAdmin`] and [`Capability::BerkeleyPacketFilters`] are required to load 376 /// networking programs. 377 BerkeleyPacketFilters = CAP_BPF, 378 /// Allow checkpoint/restore related operations. Allow PID selection during `clone3`. 379 /// Allow writing to `ns_last_pid`. 380 CheckpointRestore = CAP_CHECKPOINT_RESTORE, 381} 382 383/// Check if the specified capability is in the calling thread's capability bounding set. 384/// 385/// # References 386/// - [`prctl(PR_CAPBSET_READ,...)`] 387/// 388/// [`prctl(PR_CAPBSET_READ,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 389#[inline] 390pub fn is_in_capability_bounding_set(capability: Capability) -> io::Result<bool> { 391 unsafe { prctl_2args(PR_CAPBSET_READ, capability as usize as *mut _) }.map(|r| r != 0) 392} 393 394const PR_CAPBSET_DROP: c_int = 24; 395 396/// If the calling thread has the [`Capability::SetPermittedCapabilities`] capability within its 397/// user namespace, then drop the specified capability from the thread's capability bounding set. 398/// 399/// # References 400/// - [`prctl(PR_CAPBSET_DROP,...)`] 401/// 402/// [`prctl(PR_CAPBSET_DROP,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 403#[inline] 404pub fn remove_capability_from_capability_bounding_set(capability: Capability) -> io::Result<()> { 405 unsafe { prctl_2args(PR_CAPBSET_DROP, capability as usize as *mut _) }.map(|_r| ()) 406} 407 408// 409// PR_GET_SECUREBITS/PR_SET_SECUREBITS 410// 411 412const PR_GET_SECUREBITS: c_int = 27; 413 414bitflags! { 415 /// `SECBIT_*`. 416 pub struct CapabilitiesSecureBits: u32 { 417 /// If this bit is set, then the kernel does not grant capabilities when 418 /// a `set-user-ID-root` program is executed, or when a process with an effective or real 419 /// UID of 0 calls `execve`. 420 const NO_ROOT = 1_u32 << 0; 421 /// Set [`NO_ROOT`] irreversibly. 422 const NO_ROOT_LOCKED = 1_u32 << 1; 423 /// Setting this flag stops the kernel from adjusting the process's permitted, effective, 424 /// and ambient capability sets when the thread's effective and filesystem UIDs are switched 425 /// between zero and nonzero values. 426 const NO_SETUID_FIXUP = 1_u32 << 2; 427 /// Set [`NO_SETUID_FIXUP`] irreversibly. 428 const NO_SETUID_FIXUP_LOCKED = 1_u32 << 3; 429 /// Setting this flag allows a thread that has one or more 0 UIDs to retain capabilities in 430 /// its permitted set when it switches all of its UIDs to nonzero values. 431 const KEEP_CAPS = 1_u32 << 4; 432 /// Set [`KEEP_CAPS`] irreversibly. 433 const KEEP_CAPS_LOCKED = 1_u32 << 5; 434 /// Setting this flag disallows raising ambient capabilities via the `prctl`'s 435 /// `PR_CAP_AMBIENT_RAISE` operation. 436 const NO_CAP_AMBIENT_RAISE = 1_u32 << 6; 437 /// Set [`NO_CAP_AMBIENT_RAISE`] irreversibly. 438 const NO_CAP_AMBIENT_RAISE_LOCKED = 1_u32 << 7; 439 } 440} 441 442/// Get the `securebits` flags of the calling thread. 443/// 444/// # References 445/// - [`prctl(PR_GET_SECUREBITS,...)`] 446/// 447/// [`prctl(PR_GET_SECUREBITS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 448#[inline] 449pub fn capabilities_secure_bits() -> io::Result<CapabilitiesSecureBits> { 450 let r = unsafe { prctl_1arg(PR_GET_SECUREBITS)? } as c_uint; 451 CapabilitiesSecureBits::from_bits(r).ok_or(io::Errno::RANGE) 452} 453 454const PR_SET_SECUREBITS: c_int = 28; 455 456/// Set the `securebits` flags of the calling thread. 457/// 458/// # References 459/// - [`prctl(PR_SET_SECUREBITS,...)`] 460/// 461/// [`prctl(PR_SET_SECUREBITS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 462#[inline] 463pub fn set_capabilities_secure_bits(bits: CapabilitiesSecureBits) -> io::Result<()> { 464 unsafe { prctl_2args(PR_SET_SECUREBITS, bits.bits() as usize as *mut _) }.map(|_r| ()) 465} 466 467// 468// PR_GET_TIMERSLACK/PR_SET_TIMERSLACK 469// 470 471const PR_GET_TIMERSLACK: c_int = 30; 472 473/// Get the `current` timer slack value of the calling thread. 474/// 475/// # References 476/// - [`prctl(PR_GET_TIMERSLACK,...)`] 477/// 478/// [`prctl(PR_GET_TIMERSLACK,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 479#[inline] 480pub fn current_timer_slack() -> io::Result<u64> { 481 unsafe { prctl_1arg(PR_GET_TIMERSLACK) }.map(|r| r as u64) 482} 483 484const PR_SET_TIMERSLACK: c_int = 29; 485 486/// Sets the `current` timer slack value for the calling thread. 487/// 488/// # References 489/// - [`prctl(PR_SET_TIMERSLACK,...)`] 490/// 491/// [`prctl(PR_SET_TIMERSLACK,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 492#[inline] 493pub fn set_current_timer_slack(value: Option<NonZeroU64>) -> io::Result<()> { 494 let value = usize::try_from(value.map_or(0, NonZeroU64::get)).map_err(|_r| io::Errno::RANGE)?; 495 unsafe { prctl_2args(PR_SET_TIMERSLACK, value as *mut _) }.map(|_r| ()) 496} 497 498// 499// PR_GET_NO_NEW_PRIVS/PR_SET_NO_NEW_PRIVS 500// 501 502const PR_GET_NO_NEW_PRIVS: c_int = 39; 503 504/// Get the value of the `no_new_privs` attribute for the calling thread. 505/// 506/// # References 507/// - [`prctl(PR_GET_NO_NEW_PRIVS,...)`] 508/// 509/// [`prctl(PR_GET_NO_NEW_PRIVS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 510#[inline] 511pub fn no_new_privs() -> io::Result<bool> { 512 unsafe { prctl_1arg(PR_GET_NO_NEW_PRIVS) }.map(|r| r != 0) 513} 514 515const PR_SET_NO_NEW_PRIVS: c_int = 38; 516 517/// Set the calling thread's `no_new_privs` attribute. 518/// 519/// # References 520/// - [`prctl(PR_SET_NO_NEW_PRIVS,...)`] 521/// 522/// [`prctl(PR_SET_NO_NEW_PRIVS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 523#[inline] 524pub fn set_no_new_privs(no_new_privs: bool) -> io::Result<()> { 525 unsafe { prctl_2args(PR_SET_NO_NEW_PRIVS, no_new_privs as usize as *mut _) }.map(|_r| ()) 526} 527 528// 529// PR_GET_TID_ADDRESS 530// 531 532const PR_GET_TID_ADDRESS: c_int = 40; 533 534/// Get the `clear_child_tid` address set by `set_tid_address` 535/// and `clone`'s `CLONE_CHILD_CLEARTID` flag. 536/// 537/// # References 538/// - [`prctl(PR_GET_TID_ADDRESS,...)`] 539/// 540/// [`prctl(PR_GET_TID_ADDRESS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 541#[inline] 542pub fn get_clear_child_tid_address() -> io::Result<Option<NonNull<c_void>>> { 543 unsafe { prctl_get_at_arg2_optional::<*mut c_void>(PR_GET_TID_ADDRESS) }.map(NonNull::new) 544} 545 546// 547// PR_GET_THP_DISABLE/PR_SET_THP_DISABLE 548// 549 550const PR_GET_THP_DISABLE: c_int = 42; 551 552/// Get the current setting of the `THP disable` flag for the calling thread. 553/// 554/// # References 555/// - [`prctl(PR_GET_THP_DISABLE,...)`] 556/// 557/// [`prctl(PR_GET_THP_DISABLE,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 558#[inline] 559pub fn transparent_huge_pages_are_disabled() -> io::Result<bool> { 560 unsafe { prctl_1arg(PR_GET_THP_DISABLE) }.map(|r| r != 0) 561} 562 563const PR_SET_THP_DISABLE: c_int = 41; 564 565/// Set the state of the `THP disable` flag for the calling thread. 566/// 567/// # References 568/// - [`prctl(PR_SET_THP_DISABLE,...)`] 569/// 570/// [`prctl(PR_SET_THP_DISABLE,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 571#[inline] 572pub fn disable_transparent_huge_pages(thp_disable: bool) -> io::Result<()> { 573 unsafe { prctl_2args(PR_SET_THP_DISABLE, thp_disable as usize as *mut _) }.map(|_r| ()) 574} 575 576// 577// PR_CAP_AMBIENT 578// 579 580const PR_CAP_AMBIENT: c_int = 47; 581 582const PR_CAP_AMBIENT_IS_SET: usize = 1; 583 584/// Check if the specified capability is in the ambient set. 585/// 586/// # References 587/// - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,...)`] 588/// 589/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_IS_SET,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 590#[inline] 591pub fn capability_is_in_ambient_capability_set(capability: Capability) -> io::Result<bool> { 592 let cap = capability as usize as *mut _; 593 unsafe { prctl_3args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET as *mut _, cap) }.map(|r| r != 0) 594} 595 596const PR_CAP_AMBIENT_CLEAR_ALL: usize = 4; 597 598/// Remove all capabilities from the ambient set. 599/// 600/// # References 601/// - [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,...)`] 602/// 603/// [`prctl(PR_CAP_AMBIENT,PR_CAP_AMBIENT_CLEAR_ALL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 604#[inline] 605pub fn clear_ambient_capability_set() -> io::Result<()> { 606 unsafe { prctl_2args(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL as *mut _) }.map(|_r| ()) 607} 608 609const PR_CAP_AMBIENT_RAISE: usize = 2; 610const PR_CAP_AMBIENT_LOWER: usize = 3; 611 612/// Add or remove the specified capability to the ambient set. 613/// 614/// # References 615/// - [`prctl(PR_CAP_AMBIENT,...)`] 616/// 617/// [`prctl(PR_CAP_AMBIENT,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 618#[inline] 619pub fn configure_capability_in_ambient_capability_set( 620 capability: Capability, 621 enable: bool, 622) -> io::Result<()> { 623 let sub_operation = if enable { 624 PR_CAP_AMBIENT_RAISE 625 } else { 626 PR_CAP_AMBIENT_LOWER 627 }; 628 let cap = capability as usize as *mut _; 629 630 unsafe { prctl_3args(PR_CAP_AMBIENT, sub_operation as *mut _, cap) }.map(|_r| ()) 631} 632 633// 634// PR_SVE_GET_VL/PR_SVE_SET_VL 635// 636 637const PR_SVE_GET_VL: c_int = 51; 638 639const PR_SVE_VL_LEN_MASK: u32 = 0xffff; 640const PR_SVE_VL_INHERIT: u32 = 1_u32 << 17; 641 642/// Scalable Vector Extension vector length configuration. 643#[derive(Copy, Clone, Debug, Eq, PartialEq)] 644pub struct SVEVectorLengthConfig { 645 /// Vector length in bytes. 646 pub vector_length_in_bytes: u32, 647 /// Vector length inherited across `execve`. 648 pub vector_length_inherited_across_execve: bool, 649} 650 651/// Get the thread's current SVE vector length configuration. 652/// 653/// # References 654/// - [`prctl(PR_SVE_GET_VL,...)`] 655/// 656/// [`prctl(PR_SVE_GET_VL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 657#[inline] 658pub fn sve_vector_length_configuration() -> io::Result<SVEVectorLengthConfig> { 659 let bits = unsafe { prctl_1arg(PR_SVE_GET_VL)? } as c_uint; 660 Ok(SVEVectorLengthConfig { 661 vector_length_in_bytes: bits & PR_SVE_VL_LEN_MASK, 662 vector_length_inherited_across_execve: (bits & PR_SVE_VL_INHERIT) != 0, 663 }) 664} 665 666const PR_SVE_SET_VL: c_int = 50; 667 668const PR_SVE_SET_VL_ONEXEC: u32 = 1_u32 << 18; 669 670/// Configure the thread's vector length of Scalable Vector Extension. 671/// 672/// # References 673/// - [`prctl(PR_SVE_SET_VL,...)`] 674/// 675/// # Safety 676/// 677/// Please ensure the conditions necessary to safely call this function, 678/// as detailed in the references above. 679/// 680/// [`prctl(PR_SVE_SET_VL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 681#[inline] 682pub unsafe fn set_sve_vector_length_configuration( 683 vector_length_in_bytes: usize, 684 vector_length_inherited_across_execve: bool, 685 defer_change_to_next_execve: bool, 686) -> io::Result<()> { 687 let vector_length_in_bytes = 688 u32::try_from(vector_length_in_bytes).map_err(|_r| io::Errno::RANGE)?; 689 690 let mut bits = vector_length_in_bytes & PR_SVE_VL_LEN_MASK; 691 692 if vector_length_inherited_across_execve { 693 bits |= PR_SVE_VL_INHERIT; 694 } 695 696 if defer_change_to_next_execve { 697 bits |= PR_SVE_SET_VL_ONEXEC; 698 } 699 700 prctl_2args(PR_SVE_SET_VL, bits as usize as *mut _).map(|_r| ()) 701} 702 703// 704// PR_PAC_RESET_KEYS 705// 706 707const PR_PAC_RESET_KEYS: c_int = 54; 708 709/// Securely reset the thread's pointer authentication keys to fresh random values generated 710/// by the kernel. 711/// 712/// # References 713/// - [`prctl(PR_PAC_RESET_KEYS,...)`] 714/// 715/// # Safety 716/// 717/// Please ensure the conditions necessary to safely call this function, 718/// as detailed in the references above. 719/// 720/// [`prctl(PR_PAC_RESET_KEYS,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 721#[inline] 722pub unsafe fn reset_pointer_authentication_keys( 723 keys: Option<PointerAuthenticationKeys>, 724) -> io::Result<()> { 725 let keys = keys.as_ref().map_or(0_u32, PointerAuthenticationKeys::bits); 726 prctl_2args(PR_PAC_RESET_KEYS, keys as usize as *mut _).map(|_r| ()) 727} 728 729// 730// PR_GET_TAGGED_ADDR_CTRL/PR_SET_TAGGED_ADDR_CTRL 731// 732 733const PR_GET_TAGGED_ADDR_CTRL: c_int = 56; 734 735const PR_MTE_TAG_SHIFT: u32 = 3; 736const PR_MTE_TAG_MASK: u32 = 0xffff_u32 << PR_MTE_TAG_SHIFT; 737 738bitflags! { 739 /// Zero means addresses that are passed for the purpose of being dereferenced by the kernel must be untagged. 740 pub struct TaggedAddressMode: u32 { 741 /// Addresses that are passed for the purpose of being dereferenced by the kernel may be tagged. 742 const ENABLED = 1_u32 << 0; 743 /// Synchronous tag check fault mode. 744 const TCF_SYNC = 1_u32 << 1; 745 /// Asynchronous tag check fault mode. 746 const TCF_ASYNC = 1_u32 << 2; 747 } 748} 749 750/// Get the current tagged address mode for the calling thread. 751/// 752/// # References 753/// - [`prctl(PR_GET_TAGGED_ADDR_CTRL,...)`] 754/// 755/// [`prctl(PR_GET_TAGGED_ADDR_CTRL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 756#[inline] 757pub fn current_tagged_address_mode() -> io::Result<(Option<TaggedAddressMode>, u32)> { 758 let r = unsafe { prctl_1arg(PR_GET_TAGGED_ADDR_CTRL)? } as c_uint; 759 let mode = r & 0b111_u32; 760 let mte_tag = (r & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT; 761 Ok((TaggedAddressMode::from_bits(mode), mte_tag)) 762} 763 764const PR_SET_TAGGED_ADDR_CTRL: c_int = 55; 765 766/// Controls support for passing tagged user-space addresses to the kernel. 767/// 768/// # References 769/// - [`prctl(PR_SET_TAGGED_ADDR_CTRL,...)`] 770/// 771/// # Safety 772/// 773/// Please ensure the conditions necessary to safely call this function, 774/// as detailed in the references above. 775/// 776/// [`prctl(PR_SET_TAGGED_ADDR_CTRL,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 777#[inline] 778pub unsafe fn set_current_tagged_address_mode( 779 mode: Option<TaggedAddressMode>, 780 mte_tag: u32, 781) -> io::Result<()> { 782 let config = mode.as_ref().map_or(0_u32, TaggedAddressMode::bits) 783 | ((mte_tag << PR_MTE_TAG_SHIFT) & PR_MTE_TAG_MASK); 784 prctl_2args(PR_SET_TAGGED_ADDR_CTRL, config as usize as *mut _).map(|_r| ()) 785} 786 787// 788// PR_SET_SYSCALL_USER_DISPATCH 789// 790 791const PR_SET_SYSCALL_USER_DISPATCH: c_int = 59; 792 793const PR_SYS_DISPATCH_OFF: usize = 0; 794 795/// Disable Syscall User Dispatch mechanism. 796/// 797/// # References 798/// - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,...)`] 799/// 800/// # Safety 801/// 802/// Please ensure the conditions necessary to safely call this function, 803/// as detailed in the references above. 804/// 805/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_OFF,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 806#[inline] 807pub unsafe fn disable_syscall_user_dispatch() -> io::Result<()> { 808 prctl_2args(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF as *mut _).map(|_r| ()) 809} 810 811const PR_SYS_DISPATCH_ON: usize = 1; 812 813/// Allow system calls to be executed. 814const SYSCALL_DISPATCH_FILTER_ALLOW: u8 = 0; 815/// Block system calls from executing. 816const SYSCALL_DISPATCH_FILTER_BLOCK: u8 = 1; 817 818/// Value of the fast switch flag controlling system calls user dispatch mechanism without the need 819/// to issue a syscall. 820#[derive(Copy, Clone, Debug, Eq, PartialEq)] 821#[repr(u8)] 822pub enum SysCallUserDispatchFastSwitch { 823 /// System calls are allowed to execute. 824 Allow = SYSCALL_DISPATCH_FILTER_ALLOW, 825 /// System calls are blocked from executing. 826 Block = SYSCALL_DISPATCH_FILTER_BLOCK, 827} 828 829impl TryFrom<u8> for SysCallUserDispatchFastSwitch { 830 type Error = io::Errno; 831 832 fn try_from(value: u8) -> Result<Self, Self::Error> { 833 match value { 834 SYSCALL_DISPATCH_FILTER_ALLOW => Ok(Self::Allow), 835 SYSCALL_DISPATCH_FILTER_BLOCK => Ok(Self::Block), 836 _ => Err(io::Errno::RANGE), 837 } 838 } 839} 840 841/// Enable Syscall User Dispatch mechanism. 842/// 843/// # References 844/// - [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,...)`] 845/// 846/// # Safety 847/// 848/// Please ensure the conditions necessary to safely call this function, 849/// as detailed in the references above. 850/// 851/// [`prctl(PR_SET_SYSCALL_USER_DISPATCH,PR_SYS_DISPATCH_ON,...)`]: https://man7.org/linux/man-pages/man2/prctl.2.html 852#[inline] 853pub unsafe fn enable_syscall_user_dispatch( 854 always_allowed_region: &[u8], 855 fast_switch_flag: &AtomicU8, 856) -> io::Result<()> { 857 syscalls::prctl( 858 PR_SET_SYSCALL_USER_DISPATCH, 859 PR_SYS_DISPATCH_ON as *mut _, 860 always_allowed_region.as_ptr() as *mut _, 861 always_allowed_region.len() as *mut _, 862 fast_switch_flag as *const AtomicU8 as *mut _, 863 ) 864 .map(|_r| ()) 865} 866 867// 868// PR_SCHED_CORE 869// 870 871const PR_SCHED_CORE: c_int = 62; 872 873const PR_SCHED_CORE_GET: usize = 0; 874 875const PR_SCHED_CORE_SCOPE_THREAD: u32 = 0; 876const PR_SCHED_CORE_SCOPE_THREAD_GROUP: u32 = 1; 877const PR_SCHED_CORE_SCOPE_PROCESS_GROUP: u32 = 2; 878 879/// `PR_SCHED_CORE_SCOPE_*`. 880#[derive(Copy, Clone, Debug, Eq, PartialEq)] 881#[repr(u32)] 882pub enum CoreSchedulingScope { 883 /// Operation will be performed for the thread. 884 Thread = PR_SCHED_CORE_SCOPE_THREAD, 885 /// Operation will be performed for all tasks in the task group of the process. 886 ThreadGroup = PR_SCHED_CORE_SCOPE_THREAD_GROUP, 887 /// Operation will be performed for all processes in the process group. 888 ProcessGroup = PR_SCHED_CORE_SCOPE_PROCESS_GROUP, 889} 890 891impl TryFrom<u32> for CoreSchedulingScope { 892 type Error = io::Errno; 893 894 fn try_from(value: u32) -> Result<Self, Self::Error> { 895 match value { 896 PR_SCHED_CORE_SCOPE_THREAD => Ok(Self::Thread), 897 PR_SCHED_CORE_SCOPE_THREAD_GROUP => Ok(Self::ThreadGroup), 898 PR_SCHED_CORE_SCOPE_PROCESS_GROUP => Ok(Self::ProcessGroup), 899 _ => Err(io::Errno::RANGE), 900 } 901 } 902} 903 904/// Get core scheduling cookie of a process. 905/// 906/// # References 907/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,...)`] 908/// 909/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_GET,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html 910#[inline] 911pub fn core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<u64> { 912 let mut value: MaybeUninit<u64> = MaybeUninit::uninit(); 913 unsafe { 914 syscalls::prctl( 915 PR_SCHED_CORE, 916 PR_SCHED_CORE_GET as *mut _, 917 pid.as_raw_nonzero().get() as usize as *mut _, 918 scope as usize as *mut _, 919 value.as_mut_ptr().cast(), 920 )?; 921 Ok(value.assume_init()) 922 } 923} 924 925const PR_SCHED_CORE_CREATE: usize = 1; 926 927/// Create unique core scheduling cookie. 928/// 929/// # References 930/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,...)`] 931/// 932/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_CREATE,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html 933#[inline] 934pub fn create_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { 935 unsafe { 936 syscalls::prctl( 937 PR_SCHED_CORE, 938 PR_SCHED_CORE_CREATE as *mut _, 939 pid.as_raw_nonzero().get() as usize as *mut _, 940 scope as usize as *mut _, 941 ptr::null_mut(), 942 ) 943 .map(|_r| ()) 944 } 945} 946 947const PR_SCHED_CORE_SHARE_TO: usize = 2; 948 949/// Push core scheduling cookie to a process. 950/// 951/// # References 952/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,...)`] 953/// 954/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_TO,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html 955#[inline] 956pub fn push_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { 957 unsafe { 958 syscalls::prctl( 959 PR_SCHED_CORE, 960 PR_SCHED_CORE_SHARE_TO as *mut _, 961 pid.as_raw_nonzero().get() as usize as *mut _, 962 scope as usize as *mut _, 963 ptr::null_mut(), 964 ) 965 .map(|_r| ()) 966 } 967} 968 969const PR_SCHED_CORE_SHARE_FROM: usize = 3; 970 971/// Pull core scheduling cookie from a process. 972/// 973/// # References 974/// - [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,...)`] 975/// 976/// [`prctl(PR_SCHED_CORE,PR_SCHED_CORE_SHARE_FROM,...)`]: https://www.kernel.org/doc/html/v5.18/admin-guide/hw-vuln/core-scheduling.html 977#[inline] 978pub fn pull_core_scheduling_cookie(pid: Pid, scope: CoreSchedulingScope) -> io::Result<()> { 979 unsafe { 980 syscalls::prctl( 981 PR_SCHED_CORE, 982 PR_SCHED_CORE_SHARE_FROM as *mut _, 983 pid.as_raw_nonzero().get() as usize as *mut _, 984 scope as usize as *mut _, 985 ptr::null_mut(), 986 ) 987 .map(|_r| ()) 988 } 989} 990