1// These functions are necessarily inefficient, because they must revert
2// encoding conversions performed by the standard library. However, there is
3// currently no better alternative.
4
5use std::borrow::Cow;
6use std::error::Error;
7use std::ffi::OsStr;
8use std::ffi::OsString;
9use std::fmt;
10use std::fmt::Display;
11use std::fmt::Formatter;
12use std::ops::Not;
13use std::os::windows::ffi::OsStrExt;
14use std::os::windows::ffi::OsStringExt;
15use std::result;
16use std::str;
17
18if_raw_str! {
19    pub(super) mod raw;
20}
21
22mod wtf8;
23use wtf8::DecodeWide;
24
25#[cfg(test)]
26mod tests;
27
28#[derive(Clone, Copy, Debug, Eq, PartialEq)]
29pub(super) enum EncodingError {
30    Byte(u8),
31    CodePoint(u32),
32    End(),
33}
34
35impl EncodingError {
36    fn position(&self) -> Cow<'_, str> {
37        match self {
38            Self::Byte(byte) => Cow::Owned(format!("byte b'\\x{:02X}'", byte)),
39            Self::CodePoint(code_point) => {
40                Cow::Owned(format!("code point U+{:04X}", code_point))
41            }
42            Self::End() => Cow::Borrowed("end of string"),
43        }
44    }
45}
46
47impl Display for EncodingError {
48    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
49        write!(
50            f,
51            "byte sequence is not representable in the platform encoding; \
52             error at {}",
53            self.position(),
54        )
55    }
56}
57
58impl Error for EncodingError {}
59
60type Result<T> = result::Result<T, EncodingError>;
61
62fn from_bytes(string: &[u8]) -> Result<Option<OsString>> {
63    let mut encoder = wtf8::encode_wide(string);
64
65    // Collecting an iterator into a result ignores the size hint:
66    // https://github.com/rust-lang/rust/issues/48994
67    let mut encoded_string = Vec::with_capacity(encoder.size_hint().0);
68    for wchar in &mut encoder {
69        encoded_string.push(wchar?);
70    }
71
72    debug_assert_eq!(str::from_utf8(string).is_ok(), encoder.is_still_utf8());
73    Ok(encoder
74        .is_still_utf8()
75        .not()
76        .then(|| OsStringExt::from_wide(&encoded_string)))
77}
78
79fn to_bytes(os_string: &OsStr) -> Vec<u8> {
80    let encoder = OsStrExt::encode_wide(os_string);
81
82    let mut string = Vec::with_capacity(encoder.size_hint().0);
83    string.extend(DecodeWide::new(encoder));
84    string
85}
86
87pub(super) fn os_str_from_bytes(string: &[u8]) -> Result<Cow<'_, OsStr>> {
88    from_bytes(string).map(|os_string| {
89        os_string.map(Cow::Owned).unwrap_or_else(|| {
90            // SAFETY: This slice was validated to be UTF-8.
91            Cow::Borrowed(OsStr::new(unsafe {
92                str::from_utf8_unchecked(string)
93            }))
94        })
95    })
96}
97
98pub(super) fn os_str_to_bytes(os_string: &OsStr) -> Cow<'_, [u8]> {
99    Cow::Owned(to_bytes(os_string))
100}
101
102pub(super) fn os_string_from_vec(string: Vec<u8>) -> Result<OsString> {
103    from_bytes(&string).map(|os_string| {
104        os_string.unwrap_or_else(|| {
105            // SAFETY: This slice was validated to be UTF-8.
106            unsafe { String::from_utf8_unchecked(string) }.into()
107        })
108    })
109}
110
111pub(super) fn os_string_into_vec(os_string: OsString) -> Vec<u8> {
112    to_bytes(&os_string)
113}
114