1//! This crate allows interacting with the data stored by [`OsStr`] and 2//! [`OsString`], without resorting to panics or corruption for invalid UTF-8. 3//! Thus, methods can be used that are already defined on [`[u8]`][slice] and 4//! [`Vec<u8>`]. 5//! 6//! Typically, the only way to losslessly construct [`OsStr`] or [`OsString`] 7//! from a byte sequence is to use `OsStr::new(str::from_utf8(bytes)?)`, which 8//! requires the bytes to be valid in UTF-8. However, since this crate makes 9//! conversions directly between the platform encoding and raw bytes, even some 10//! strings invalid in UTF-8 can be converted. 11//! 12//! In most cases, [`RawOsStr`] and [`RawOsString`] should be used. 13//! [`OsStrBytes`] and [`OsStringBytes`] provide lower-level APIs that are 14//! easier to misuse. 15//! 16//! # Encoding 17//! 18//! The encoding of bytes returned or accepted by methods of this crate is 19//! intentionally left unspecified. It may vary for different platforms, so 20//! defining it would run contrary to the goal of generic string handling. 21//! However, the following invariants will always be upheld: 22//! 23//! - The encoding will be compatible with UTF-8. In particular, splitting an 24//! encoded byte sequence by a UTF-8–encoded character always produces 25//! other valid byte sequences. They can be re-encoded without error using 26//! [`RawOsString::into_os_string`] and similar methods. 27//! 28//! - All characters valid in platform strings are representable. [`OsStr`] and 29//! [`OsString`] can always be losslessly reconstructed from extracted bytes. 30//! 31//! Note that the chosen encoding may not match how Rust stores these strings 32//! internally, which is undocumented. For instance, the result of calling 33//! [`OsStr::len`] will not necessarily match the number of bytes this crate 34//! uses to represent the same string. 35//! 36//! Additionally, concatenation may yield unexpected results without a UTF-8 37//! separator. If two platform strings need to be concatenated, the only safe 38//! way to do so is using [`OsString::push`]. This limitation also makes it 39//! undesirable to use the bytes in interchange. 40//! 41//! Since this encoding can change between versions and platforms, it should 42//! not be used for storage. The standard library provides implementations of 43//! [`OsStrExt`] and [`OsStringExt`] for various platforms, which should be 44//! preferred for that use case. 45//! 46//! # User Input 47//! 48//! Traits in this crate should ideally not be used to convert byte sequences 49//! that did not originate from [`OsStr`] or a related struct. The encoding 50//! used by this crate is an implementation detail, so it does not make sense 51//! to expose it to users. 52//! 53//! Crate [bstr] offers some useful alternative methods, such as 54//! [`ByteSlice::to_os_str`] and [`ByteVec::into_os_string`], that are meant 55//! for user input. But, they reject some byte sequences used to represent 56//! valid platform strings, which would be undesirable for reliable path 57//! handling. They are best used only when accepting unknown input. 58//! 59//! This crate is meant to help when you already have an instance of [`OsStr`] 60//! and need to modify the data in a lossless way. 61//! 62//! # Features 63//! 64//! These features are optional and can be enabled or disabled in a 65//! "Cargo.toml" file. 66//! 67//! ### Default Features 68//! 69//! - **memchr** - 70//! Changes the implementation to use crate [memchr] for better performance. 71//! This feature is useless when "raw\_os\_str" is disabled. 72//! 73//! For more information, see [`RawOsStr`][memchr complexity]. 74//! 75//! - **raw\_os\_str** - 76//! Provides: 77//! - [`iter`] 78//! - [`Pattern`] 79//! - [`RawOsStr`] 80//! - [`RawOsStrCow`] 81//! - [`RawOsString`] 82//! 83//! ### Optional Features 84//! 85//! - **checked\_conversions** - 86//! Provides: 87//! - [`EncodingError`] 88//! - [`OsStrBytes::from_raw_bytes`] 89//! - [`OsStringBytes::from_raw_vec`] 90//! - [`RawOsStr::from_raw_bytes`] 91//! - [`RawOsString::from_raw_vec`] 92//! 93//! Because this feature should not be used in libraries, the 94//! "OS_STR_BYTES_CHECKED_CONVERSIONS" environment variable must be defined 95//! during compilation. 96//! 97//! - **print\_bytes** - 98//! Provides implementations of [`print_bytes::ToBytes`] for [`RawOsStr`] and 99//! [`RawOsString`]. 100//! 101//! - **uniquote** - 102//! Provides implementations of [`uniquote::Quote`] for [`RawOsStr`] and 103//! [`RawOsString`]. 104//! 105//! # Implementation 106//! 107//! Some methods return [`Cow`] to account for platform differences. However, 108//! no guarantee is made that the same variant of that enum will always be 109//! returned for the same platform. Whichever can be constructed most 110//! efficiently will be returned. 111//! 112//! All traits are [sealed], meaning that they can only be implemented by this 113//! crate. Otherwise, backward compatibility would be more difficult to 114//! maintain for new features. 115//! 116//! # Complexity 117//! 118//! Conversion method complexities will vary based on what functionality is 119//! available for the platform. At worst, they will all be linear, but some can 120//! take constant time. For example, [`RawOsString::into_os_string`] might be 121//! able to reuse its allocation. 122//! 123//! # Examples 124//! 125//! ``` 126//! # use std::io; 127//! # 128//! # #[cfg(feature = "raw_os_str")] 129//! # { 130//! # #[cfg(any())] 131//! use std::env; 132//! use std::fs; 133//! 134//! use os_str_bytes::RawOsStr; 135//! 136//! # mod env { 137//! # use std::env; 138//! # use std::ffi::OsString; 139//! # 140//! # pub fn args_os() -> impl Iterator<Item = OsString> { 141//! # let mut file = env::temp_dir(); 142//! # file.push("os_str_bytes\u{E9}.txt"); 143//! # return vec![OsString::new(), file.into_os_string()].into_iter(); 144//! # } 145//! # } 146//! # 147//! for file in env::args_os().skip(1) { 148//! if !RawOsStr::new(&file).starts_with('-') { 149//! let string = "Hello, world!"; 150//! fs::write(&file, string)?; 151//! assert_eq!(string, fs::read_to_string(file)?); 152//! } 153//! } 154//! # } 155//! # 156//! # Ok::<_, io::Error>(()) 157//! ``` 158//! 159//! [bstr]: https://crates.io/crates/bstr 160//! [`ByteSlice::to_os_str`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteSlice.html#method.to_os_str 161//! [`ByteVec::into_os_string`]: https://docs.rs/bstr/0.2.12/bstr/trait.ByteVec.html#method.into_os_string 162//! [memchr complexity]: RawOsStr#complexity 163//! [memchr]: https://crates.io/crates/memchr 164//! [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt 165//! [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt 166//! [sealed]: https://rust-lang.github.io/api-guidelines/future-proofing.html#c-sealed 167//! [print\_bytes]: https://crates.io/crates/print_bytes 168 169#![cfg_attr(not(feature = "checked_conversions"), allow(deprecated))] 170// Only require a nightly compiler when building documentation for docs.rs. 171// This is a private option that should not be used. 172// https://github.com/rust-lang/docs.rs/issues/147#issuecomment-389544407 173// https://github.com/dylni/os_str_bytes/issues/2 174#![cfg_attr(os_str_bytes_docs_rs, feature(doc_cfg))] 175// Nightly is also currently required for the SGX platform. 176#![cfg_attr( 177 all(target_vendor = "fortanix", target_env = "sgx"), 178 feature(sgx_platform) 179)] 180#![warn(unsafe_op_in_unsafe_fn)] 181#![warn(unused_results)] 182 183use std::borrow::Cow; 184use std::error::Error; 185use std::ffi::OsStr; 186use std::ffi::OsString; 187use std::fmt; 188use std::fmt::Display; 189use std::fmt::Formatter; 190use std::path::Path; 191use std::path::PathBuf; 192use std::result; 193 194macro_rules! if_checked_conversions { 195 ( $($item:item)+ ) => { 196 $( 197 #[cfg(feature = "checked_conversions")] 198 $item 199 )+ 200 }; 201} 202 203#[cfg(not(os_str_bytes_docs_rs))] 204if_checked_conversions! { 205 const _: &str = env!( 206 "OS_STR_BYTES_CHECKED_CONVERSIONS", 207 "The 'OS_STR_BYTES_CHECKED_CONVERSIONS' environment variable must be \ 208 defined to use the 'checked_conversions' feature.", 209 ); 210} 211 212#[rustfmt::skip] 213macro_rules! deprecated_checked_conversion { 214 ( $message:expr , $item:item ) => { 215 #[cfg_attr( 216 not(feature = "checked_conversions"), 217 deprecated = $message 218 )] 219 $item 220 }; 221} 222 223macro_rules! expect_encoded { 224 ( $result:expr ) => { 225 $result.expect("invalid raw bytes") 226 }; 227} 228 229macro_rules! if_raw_str { 230 ( $($item:item)+ ) => { 231 $( 232 #[cfg(feature = "raw_os_str")] 233 $item 234 )+ 235 }; 236} 237 238#[cfg_attr( 239 all(target_family = "wasm", target_os = "unknown"), 240 path = "wasm/mod.rs" 241)] 242#[cfg_attr(windows, path = "windows/mod.rs")] 243#[cfg_attr( 244 not(any(all(target_family = "wasm", target_os = "unknown"), windows)), 245 path = "common/mod.rs" 246)] 247mod imp; 248 249#[cfg(any( 250 all( 251 feature = "raw_os_str", 252 target_family = "wasm", 253 target_os = "unknown", 254 ), 255 windows, 256))] 257mod util; 258 259if_raw_str! { 260 pub mod iter; 261 262 mod pattern; 263 pub use pattern::Pattern; 264 265 mod raw_str; 266 pub use raw_str::RawOsStr; 267 pub use raw_str::RawOsStrCow; 268 pub use raw_str::RawOsString; 269} 270 271deprecated_checked_conversion! { 272 "use `OsStrBytes::assert_from_raw_bytes` or \ 273 `OsStringBytes::assert_from_raw_vec` instead, or enable the \ 274 'checked_conversions' feature", 275 /// The error that occurs when a byte sequence is not representable in the 276 /// platform encoding. 277 /// 278 /// [`Result::unwrap`] should almost always be called on results containing 279 /// this error. It should be known whether or not byte sequences are 280 /// properly encoded for the platform, since [the module-level 281 /// documentation][encoding] discourages using encoded bytes in 282 /// interchange. Results are returned primarily to make panicking behavior 283 /// explicit. 284 /// 285 /// On Unix, this error is never returned, but [`OsStrExt`] or 286 /// [`OsStringExt`] should be used instead if that needs to be guaranteed. 287 /// 288 /// [encoding]: self#encoding 289 /// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt 290 /// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt 291 /// [`Result::unwrap`]: ::std::result::Result::unwrap 292 #[derive(Clone, Debug, Eq, PartialEq)] 293 #[cfg_attr( 294 os_str_bytes_docs_rs, 295 doc(cfg(feature = "checked_conversions")) 296 )] 297 pub struct EncodingError(imp::EncodingError); 298} 299 300impl Display for EncodingError { 301 #[inline] 302 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 303 self.0.fmt(f) 304 } 305} 306 307impl Error for EncodingError {} 308 309type Result<T> = result::Result<T, EncodingError>; 310 311fn from_raw_bytes<'a, S>( 312 string: S, 313) -> result::Result<Cow<'a, OsStr>, imp::EncodingError> 314where 315 S: Into<Cow<'a, [u8]>>, 316{ 317 match string.into() { 318 Cow::Borrowed(string) => imp::os_str_from_bytes(string), 319 Cow::Owned(string) => imp::os_string_from_vec(string).map(Cow::Owned), 320 } 321} 322 323fn cow_os_str_into_path(string: Cow<'_, OsStr>) -> Cow<'_, Path> { 324 match string { 325 Cow::Borrowed(string) => Cow::Borrowed(Path::new(string)), 326 Cow::Owned(string) => Cow::Owned(string.into()), 327 } 328} 329 330/// A platform agnostic variant of [`OsStrExt`]. 331/// 332/// For more information, see [the module-level documentation][module]. 333/// 334/// [module]: self 335/// [`OsStrExt`]: ::std::os::unix::ffi::OsStrExt 336pub trait OsStrBytes: private::Sealed + ToOwned { 337 /// Converts a byte string into an equivalent platform-native string. 338 /// 339 /// # Panics 340 /// 341 /// Panics if the string is not valid for the [unspecified encoding] used 342 /// by this crate. 343 /// 344 /// # Examples 345 /// 346 /// ``` 347 /// use std::env; 348 /// use std::ffi::OsStr; 349 /// # use std::io; 350 /// 351 /// use os_str_bytes::OsStrBytes; 352 /// 353 /// let os_string = env::current_exe()?; 354 /// let os_bytes = os_string.to_raw_bytes(); 355 /// assert_eq!(os_string, OsStr::assert_from_raw_bytes(os_bytes)); 356 /// # 357 /// # Ok::<_, io::Error>(()) 358 /// ``` 359 /// 360 /// [unspecified encoding]: self#encoding 361 #[must_use = "method should not be used for validation"] 362 #[track_caller] 363 fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> 364 where 365 S: Into<Cow<'a, [u8]>>; 366 367 deprecated_checked_conversion! { 368 "use `assert_from_raw_bytes` instead, or enable the \ 369 'checked_conversions' feature", 370 /// Converts a byte string into an equivalent platform-native string. 371 /// 372 /// [`assert_from_raw_bytes`] should almost always be used instead. For 373 /// more information, see [`EncodingError`]. 374 /// 375 /// # Errors 376 /// 377 /// See documentation for [`EncodingError`]. 378 /// 379 /// # Examples 380 /// 381 /// ``` 382 /// use std::env; 383 /// use std::ffi::OsStr; 384 /// # use std::io; 385 /// 386 /// use os_str_bytes::OsStrBytes; 387 /// 388 /// let os_string = env::current_exe()?; 389 /// let os_bytes = os_string.to_raw_bytes(); 390 /// assert_eq!(os_string, OsStr::from_raw_bytes(os_bytes).unwrap()); 391 /// # 392 /// # Ok::<_, io::Error>(()) 393 /// ``` 394 /// 395 /// [`assert_from_raw_bytes`]: Self::assert_from_raw_bytes 396 #[cfg_attr( 397 os_str_bytes_docs_rs, 398 doc(cfg(feature = "checked_conversions")) 399 )] 400 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> 401 where 402 S: Into<Cow<'a, [u8]>>; 403 } 404 405 /// Converts a platform-native string into an equivalent byte string. 406 /// 407 /// The returned string will use an [unspecified encoding]. 408 /// 409 /// # Examples 410 /// 411 /// ``` 412 /// use std::ffi::OsStr; 413 /// 414 /// use os_str_bytes::OsStrBytes; 415 /// 416 /// let string = "foobar"; 417 /// let os_string = OsStr::new(string); 418 /// assert_eq!(string.as_bytes(), &*os_string.to_raw_bytes()); 419 /// ``` 420 /// 421 /// [unspecified encoding]: self#encoding 422 #[must_use] 423 fn to_raw_bytes(&self) -> Cow<'_, [u8]>; 424} 425 426impl OsStrBytes for OsStr { 427 #[inline] 428 fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> 429 where 430 S: Into<Cow<'a, [u8]>>, 431 { 432 expect_encoded!(from_raw_bytes(string)) 433 } 434 435 #[inline] 436 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> 437 where 438 S: Into<Cow<'a, [u8]>>, 439 { 440 from_raw_bytes(string).map_err(EncodingError) 441 } 442 443 #[inline] 444 fn to_raw_bytes(&self) -> Cow<'_, [u8]> { 445 imp::os_str_to_bytes(self) 446 } 447} 448 449impl OsStrBytes for Path { 450 #[inline] 451 fn assert_from_raw_bytes<'a, S>(string: S) -> Cow<'a, Self> 452 where 453 S: Into<Cow<'a, [u8]>>, 454 { 455 cow_os_str_into_path(OsStr::assert_from_raw_bytes(string)) 456 } 457 458 #[inline] 459 fn from_raw_bytes<'a, S>(string: S) -> Result<Cow<'a, Self>> 460 where 461 S: Into<Cow<'a, [u8]>>, 462 { 463 OsStr::from_raw_bytes(string).map(cow_os_str_into_path) 464 } 465 466 #[inline] 467 fn to_raw_bytes(&self) -> Cow<'_, [u8]> { 468 self.as_os_str().to_raw_bytes() 469 } 470} 471 472/// A platform agnostic variant of [`OsStringExt`]. 473/// 474/// For more information, see [the module-level documentation][module]. 475/// 476/// [module]: self 477/// [`OsStringExt`]: ::std::os::unix::ffi::OsStringExt 478pub trait OsStringBytes: private::Sealed + Sized { 479 /// Converts a byte string into an equivalent platform-native string. 480 /// 481 /// # Panics 482 /// 483 /// Panics if the string is not valid for the [unspecified encoding] used 484 /// by this crate. 485 /// 486 /// # Examples 487 /// 488 /// ``` 489 /// use std::env; 490 /// use std::ffi::OsString; 491 /// # use std::io; 492 /// 493 /// use os_str_bytes::OsStringBytes; 494 /// 495 /// let os_string = env::current_exe()?; 496 /// let os_bytes = os_string.clone().into_raw_vec(); 497 /// assert_eq!(os_string, OsString::assert_from_raw_vec(os_bytes)); 498 /// # 499 /// # Ok::<_, io::Error>(()) 500 /// ``` 501 /// 502 /// [unspecified encoding]: self#encoding 503 #[must_use = "method should not be used for validation"] 504 #[track_caller] 505 fn assert_from_raw_vec(string: Vec<u8>) -> Self; 506 507 deprecated_checked_conversion! { 508 "use `assert_from_raw_vec` instead, or enable the \ 509 'checked_conversions' feature", 510 /// Converts a byte string into an equivalent platform-native string. 511 /// 512 /// [`assert_from_raw_vec`] should almost always be used instead. For 513 /// more information, see [`EncodingError`]. 514 /// 515 /// # Errors 516 /// 517 /// See documentation for [`EncodingError`]. 518 /// 519 /// # Examples 520 /// 521 /// ``` 522 /// use std::env; 523 /// use std::ffi::OsString; 524 /// # use std::io; 525 /// 526 /// use os_str_bytes::OsStringBytes; 527 /// 528 /// let os_string = env::current_exe()?; 529 /// let os_bytes = os_string.clone().into_raw_vec(); 530 /// assert_eq!(os_string, OsString::from_raw_vec(os_bytes).unwrap()); 531 /// # 532 /// # Ok::<_, io::Error>(()) 533 /// ``` 534 /// 535 /// [`assert_from_raw_vec`]: Self::assert_from_raw_vec 536 #[cfg_attr( 537 os_str_bytes_docs_rs, 538 doc(cfg(feature = "checked_conversions")) 539 )] 540 fn from_raw_vec(string: Vec<u8>) -> Result<Self>; 541 } 542 543 /// Converts a platform-native string into an equivalent byte string. 544 /// 545 /// The returned string will use an [unspecified encoding]. 546 /// 547 /// # Examples 548 /// 549 /// ``` 550 /// use std::ffi::OsString; 551 /// 552 /// use os_str_bytes::OsStringBytes; 553 /// 554 /// let string = "foobar".to_owned(); 555 /// let os_string: OsString = string.clone().into(); 556 /// assert_eq!(string.into_bytes(), os_string.into_raw_vec()); 557 /// ``` 558 /// 559 /// [unspecified encoding]: self#encoding 560 #[must_use] 561 fn into_raw_vec(self) -> Vec<u8>; 562} 563 564impl OsStringBytes for OsString { 565 #[inline] 566 fn assert_from_raw_vec(string: Vec<u8>) -> Self { 567 expect_encoded!(imp::os_string_from_vec(string)) 568 } 569 570 #[inline] 571 fn from_raw_vec(string: Vec<u8>) -> Result<Self> { 572 imp::os_string_from_vec(string).map_err(EncodingError) 573 } 574 575 #[inline] 576 fn into_raw_vec(self) -> Vec<u8> { 577 imp::os_string_into_vec(self) 578 } 579} 580 581impl OsStringBytes for PathBuf { 582 #[inline] 583 fn assert_from_raw_vec(string: Vec<u8>) -> Self { 584 OsString::assert_from_raw_vec(string).into() 585 } 586 587 #[inline] 588 fn from_raw_vec(string: Vec<u8>) -> Result<Self> { 589 OsString::from_raw_vec(string).map(Into::into) 590 } 591 592 #[inline] 593 fn into_raw_vec(self) -> Vec<u8> { 594 self.into_os_string().into_raw_vec() 595 } 596} 597 598mod private { 599 use std::ffi::OsStr; 600 use std::ffi::OsString; 601 use std::path::Path; 602 use std::path::PathBuf; 603 604 if_raw_str! { 605 use std::borrow::Cow; 606 607 use super::RawOsStr; 608 } 609 610 pub trait Sealed {} 611 612 impl Sealed for char {} 613 impl Sealed for OsStr {} 614 impl Sealed for OsString {} 615 impl Sealed for Path {} 616 impl Sealed for PathBuf {} 617 impl Sealed for &str {} 618 impl Sealed for &String {} 619 620 if_raw_str! { 621 impl Sealed for Cow<'_, RawOsStr> {} 622 } 623} 624