1// These functions are necessarily inefficient, because they must revert 2// encoding conversions performed by the standard library. However, there is 3// currently no better alternative. 4 5use std::borrow::Cow; 6use std::error::Error; 7use std::ffi::OsStr; 8use std::ffi::OsString; 9use std::fmt; 10use std::fmt::Display; 11use std::fmt::Formatter; 12use std::ops::Not; 13use std::os::windows::ffi::OsStrExt; 14use std::os::windows::ffi::OsStringExt; 15use std::result; 16use std::str; 17 18if_raw_str! { 19 pub(super) mod raw; 20} 21 22mod wtf8; 23use wtf8::DecodeWide; 24 25#[cfg(test)] 26mod tests; 27 28#[derive(Clone, Copy, Debug, Eq, PartialEq)] 29pub(super) enum EncodingError { 30 Byte(u8), 31 CodePoint(u32), 32 End(), 33} 34 35impl EncodingError { 36 fn position(&self) -> Cow<'_, str> { 37 match self { 38 Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)), 39 Self::CodePoint(code_point) => { 40 Cow::Owned(format!("code point U+{:04X}", code_point)) 41 } 42 Self::End() => Cow::Borrowed("end of string"), 43 } 44 } 45} 46 47impl Display for EncodingError { 48 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 49 write!( 50 f, 51 "byte sequence is not representable in the platform encoding; \ 52 error at {}", 53 self.position(), 54 ) 55 } 56} 57 58impl Error for EncodingError {} 59 60type Result<T> = result::Result<T, EncodingError>; 61 62fn from_bytes(string: &[u8]) -> Result<Option<OsString>> { 63 let mut encoder = wtf8::encode_wide(string); 64 65 // Collecting an iterator into a result ignores the size hint: 66 // https://github.com/rust-lang/rust/issues/48994 67 let mut encoded_string = Vec::with_capacity(encoder.size_hint().0); 68 for wchar in &mut encoder { 69 encoded_string.push(wchar?); 70 } 71 72 debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8()); 73 Ok(encoder 74 .is_still_utf8() 75 .not() 76 .then(|| OsStringExt::from_wide(&encoded_string))) 77} 78 79fn to_bytes(os_string: &OsStr) -> Vec<u8> { 80 let encoder = OsStrExt::encode_wide(os_string); 81 82 let mut string = Vec::with_capacity(encoder.size_hint().0); 83 string.extend(DecodeWide::new(encoder)); 84 string 85} 86 87pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> { 88 from_bytes(string).map(|os_string| { 89 os_string.map(Cow::Owned).unwrap_or_else(|| { 90 // SAFETY: This slice was validated to be UTF-8. 91 Cow::Borrowed(OsStr::new(unsafe { 92 str::from_utf8_unchecked(string) 93 })) 94 }) 95 }) 96} 97 98pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> { 99 Cow::Owned(to_bytes(os_string)) 100} 101 102pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> { 103 from_bytes(&string).map(|os_string| { 104 os_string.unwrap_or_else(|| { 105 // SAFETY: This slice was validated to be UTF-8. 106 unsafe { String::from_utf8_unchecked(string) }.into() 107 }) 108 }) 109} 110 111pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> { 112 to_bytes(&os_string) 113} 114