1c67d6573Sopenharmony_ciuse std::char;
2c67d6573Sopenharmony_ciuse std::cmp::Ordering;
3c67d6573Sopenharmony_ciuse std::fmt;
4c67d6573Sopenharmony_ciuse std::ops;
5c67d6573Sopenharmony_ciuse std::u32;
6c67d6573Sopenharmony_ci
7c67d6573Sopenharmony_ciuse crate::literal::LiteralSearcher;
8c67d6573Sopenharmony_ciuse crate::prog::InstEmptyLook;
9c67d6573Sopenharmony_ciuse crate::utf8::{decode_last_utf8, decode_utf8};
10c67d6573Sopenharmony_ci
11c67d6573Sopenharmony_ci/// Represents a location in the input.
12c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug)]
13c67d6573Sopenharmony_cipub struct InputAt {
14c67d6573Sopenharmony_ci    pos: usize,
15c67d6573Sopenharmony_ci    c: Char,
16c67d6573Sopenharmony_ci    byte: Option<u8>,
17c67d6573Sopenharmony_ci    len: usize,
18c67d6573Sopenharmony_ci}
19c67d6573Sopenharmony_ci
20c67d6573Sopenharmony_ciimpl InputAt {
21c67d6573Sopenharmony_ci    /// Returns true iff this position is at the beginning of the input.
22c67d6573Sopenharmony_ci    pub fn is_start(&self) -> bool {
23c67d6573Sopenharmony_ci        self.pos == 0
24c67d6573Sopenharmony_ci    }
25c67d6573Sopenharmony_ci
26c67d6573Sopenharmony_ci    /// Returns true iff this position is past the end of the input.
27c67d6573Sopenharmony_ci    pub fn is_end(&self) -> bool {
28c67d6573Sopenharmony_ci        self.c.is_none() && self.byte.is_none()
29c67d6573Sopenharmony_ci    }
30c67d6573Sopenharmony_ci
31c67d6573Sopenharmony_ci    /// Returns the character at this position.
32c67d6573Sopenharmony_ci    ///
33c67d6573Sopenharmony_ci    /// If this position is just before or after the input, then an absent
34c67d6573Sopenharmony_ci    /// character is returned.
35c67d6573Sopenharmony_ci    pub fn char(&self) -> Char {
36c67d6573Sopenharmony_ci        self.c
37c67d6573Sopenharmony_ci    }
38c67d6573Sopenharmony_ci
39c67d6573Sopenharmony_ci    /// Returns the byte at this position.
40c67d6573Sopenharmony_ci    pub fn byte(&self) -> Option<u8> {
41c67d6573Sopenharmony_ci        self.byte
42c67d6573Sopenharmony_ci    }
43c67d6573Sopenharmony_ci
44c67d6573Sopenharmony_ci    /// Returns the UTF-8 width of the character at this position.
45c67d6573Sopenharmony_ci    pub fn len(&self) -> usize {
46c67d6573Sopenharmony_ci        self.len
47c67d6573Sopenharmony_ci    }
48c67d6573Sopenharmony_ci
49c67d6573Sopenharmony_ci    /// Returns whether the UTF-8 width of the character at this position
50c67d6573Sopenharmony_ci    /// is zero.
51c67d6573Sopenharmony_ci    pub fn is_empty(&self) -> bool {
52c67d6573Sopenharmony_ci        self.len == 0
53c67d6573Sopenharmony_ci    }
54c67d6573Sopenharmony_ci
55c67d6573Sopenharmony_ci    /// Returns the byte offset of this position.
56c67d6573Sopenharmony_ci    pub fn pos(&self) -> usize {
57c67d6573Sopenharmony_ci        self.pos
58c67d6573Sopenharmony_ci    }
59c67d6573Sopenharmony_ci
60c67d6573Sopenharmony_ci    /// Returns the byte offset of the next position in the input.
61c67d6573Sopenharmony_ci    pub fn next_pos(&self) -> usize {
62c67d6573Sopenharmony_ci        self.pos + self.len
63c67d6573Sopenharmony_ci    }
64c67d6573Sopenharmony_ci}
65c67d6573Sopenharmony_ci
66c67d6573Sopenharmony_ci/// An abstraction over input used in the matching engines.
67c67d6573Sopenharmony_cipub trait Input: fmt::Debug {
68c67d6573Sopenharmony_ci    /// Return an encoding of the position at byte offset `i`.
69c67d6573Sopenharmony_ci    fn at(&self, i: usize) -> InputAt;
70c67d6573Sopenharmony_ci
71c67d6573Sopenharmony_ci    /// Return the Unicode character occurring next to `at`.
72c67d6573Sopenharmony_ci    ///
73c67d6573Sopenharmony_ci    /// If no such character could be decoded, then `Char` is absent.
74c67d6573Sopenharmony_ci    fn next_char(&self, at: InputAt) -> Char;
75c67d6573Sopenharmony_ci
76c67d6573Sopenharmony_ci    /// Return the Unicode character occurring previous to `at`.
77c67d6573Sopenharmony_ci    ///
78c67d6573Sopenharmony_ci    /// If no such character could be decoded, then `Char` is absent.
79c67d6573Sopenharmony_ci    fn previous_char(&self, at: InputAt) -> Char;
80c67d6573Sopenharmony_ci
81c67d6573Sopenharmony_ci    /// Return true if the given empty width instruction matches at the
82c67d6573Sopenharmony_ci    /// input position given.
83c67d6573Sopenharmony_ci    fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool;
84c67d6573Sopenharmony_ci
85c67d6573Sopenharmony_ci    /// Scan the input for a matching prefix.
86c67d6573Sopenharmony_ci    fn prefix_at(
87c67d6573Sopenharmony_ci        &self,
88c67d6573Sopenharmony_ci        prefixes: &LiteralSearcher,
89c67d6573Sopenharmony_ci        at: InputAt,
90c67d6573Sopenharmony_ci    ) -> Option<InputAt>;
91c67d6573Sopenharmony_ci
92c67d6573Sopenharmony_ci    /// The number of bytes in the input.
93c67d6573Sopenharmony_ci    fn len(&self) -> usize;
94c67d6573Sopenharmony_ci
95c67d6573Sopenharmony_ci    /// Whether the input is empty.
96c67d6573Sopenharmony_ci    fn is_empty(&self) -> bool {
97c67d6573Sopenharmony_ci        self.len() == 0
98c67d6573Sopenharmony_ci    }
99c67d6573Sopenharmony_ci
100c67d6573Sopenharmony_ci    /// Return the given input as a sequence of bytes.
101c67d6573Sopenharmony_ci    fn as_bytes(&self) -> &[u8];
102c67d6573Sopenharmony_ci}
103c67d6573Sopenharmony_ci
104c67d6573Sopenharmony_ciimpl<'a, T: Input> Input for &'a T {
105c67d6573Sopenharmony_ci    fn at(&self, i: usize) -> InputAt {
106c67d6573Sopenharmony_ci        (**self).at(i)
107c67d6573Sopenharmony_ci    }
108c67d6573Sopenharmony_ci
109c67d6573Sopenharmony_ci    fn next_char(&self, at: InputAt) -> Char {
110c67d6573Sopenharmony_ci        (**self).next_char(at)
111c67d6573Sopenharmony_ci    }
112c67d6573Sopenharmony_ci
113c67d6573Sopenharmony_ci    fn previous_char(&self, at: InputAt) -> Char {
114c67d6573Sopenharmony_ci        (**self).previous_char(at)
115c67d6573Sopenharmony_ci    }
116c67d6573Sopenharmony_ci
117c67d6573Sopenharmony_ci    fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
118c67d6573Sopenharmony_ci        (**self).is_empty_match(at, empty)
119c67d6573Sopenharmony_ci    }
120c67d6573Sopenharmony_ci
121c67d6573Sopenharmony_ci    fn prefix_at(
122c67d6573Sopenharmony_ci        &self,
123c67d6573Sopenharmony_ci        prefixes: &LiteralSearcher,
124c67d6573Sopenharmony_ci        at: InputAt,
125c67d6573Sopenharmony_ci    ) -> Option<InputAt> {
126c67d6573Sopenharmony_ci        (**self).prefix_at(prefixes, at)
127c67d6573Sopenharmony_ci    }
128c67d6573Sopenharmony_ci
129c67d6573Sopenharmony_ci    fn len(&self) -> usize {
130c67d6573Sopenharmony_ci        (**self).len()
131c67d6573Sopenharmony_ci    }
132c67d6573Sopenharmony_ci
133c67d6573Sopenharmony_ci    fn as_bytes(&self) -> &[u8] {
134c67d6573Sopenharmony_ci        (**self).as_bytes()
135c67d6573Sopenharmony_ci    }
136c67d6573Sopenharmony_ci}
137c67d6573Sopenharmony_ci
138c67d6573Sopenharmony_ci/// An input reader over characters.
139c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug)]
140c67d6573Sopenharmony_cipub struct CharInput<'t>(&'t [u8]);
141c67d6573Sopenharmony_ci
142c67d6573Sopenharmony_ciimpl<'t> CharInput<'t> {
143c67d6573Sopenharmony_ci    /// Return a new character input reader for the given string.
144c67d6573Sopenharmony_ci    pub fn new(s: &'t [u8]) -> CharInput<'t> {
145c67d6573Sopenharmony_ci        CharInput(s)
146c67d6573Sopenharmony_ci    }
147c67d6573Sopenharmony_ci}
148c67d6573Sopenharmony_ci
149c67d6573Sopenharmony_ciimpl<'t> ops::Deref for CharInput<'t> {
150c67d6573Sopenharmony_ci    type Target = [u8];
151c67d6573Sopenharmony_ci
152c67d6573Sopenharmony_ci    fn deref(&self) -> &[u8] {
153c67d6573Sopenharmony_ci        self.0
154c67d6573Sopenharmony_ci    }
155c67d6573Sopenharmony_ci}
156c67d6573Sopenharmony_ci
157c67d6573Sopenharmony_ciimpl<'t> Input for CharInput<'t> {
158c67d6573Sopenharmony_ci    fn at(&self, i: usize) -> InputAt {
159c67d6573Sopenharmony_ci        if i >= self.len() {
160c67d6573Sopenharmony_ci            InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 }
161c67d6573Sopenharmony_ci        } else {
162c67d6573Sopenharmony_ci            let c = decode_utf8(&self[i..]).map(|(c, _)| c).into();
163c67d6573Sopenharmony_ci            InputAt { pos: i, c, byte: None, len: c.len_utf8() }
164c67d6573Sopenharmony_ci        }
165c67d6573Sopenharmony_ci    }
166c67d6573Sopenharmony_ci
167c67d6573Sopenharmony_ci    fn next_char(&self, at: InputAt) -> Char {
168c67d6573Sopenharmony_ci        at.char()
169c67d6573Sopenharmony_ci    }
170c67d6573Sopenharmony_ci
171c67d6573Sopenharmony_ci    fn previous_char(&self, at: InputAt) -> Char {
172c67d6573Sopenharmony_ci        decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into()
173c67d6573Sopenharmony_ci    }
174c67d6573Sopenharmony_ci
175c67d6573Sopenharmony_ci    fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
176c67d6573Sopenharmony_ci        use crate::prog::EmptyLook::*;
177c67d6573Sopenharmony_ci        match empty.look {
178c67d6573Sopenharmony_ci            StartLine => {
179c67d6573Sopenharmony_ci                let c = self.previous_char(at);
180c67d6573Sopenharmony_ci                at.pos() == 0 || c == '\n'
181c67d6573Sopenharmony_ci            }
182c67d6573Sopenharmony_ci            EndLine => {
183c67d6573Sopenharmony_ci                let c = self.next_char(at);
184c67d6573Sopenharmony_ci                at.pos() == self.len() || c == '\n'
185c67d6573Sopenharmony_ci            }
186c67d6573Sopenharmony_ci            StartText => at.pos() == 0,
187c67d6573Sopenharmony_ci            EndText => at.pos() == self.len(),
188c67d6573Sopenharmony_ci            WordBoundary => {
189c67d6573Sopenharmony_ci                let (c1, c2) = (self.previous_char(at), self.next_char(at));
190c67d6573Sopenharmony_ci                c1.is_word_char() != c2.is_word_char()
191c67d6573Sopenharmony_ci            }
192c67d6573Sopenharmony_ci            NotWordBoundary => {
193c67d6573Sopenharmony_ci                let (c1, c2) = (self.previous_char(at), self.next_char(at));
194c67d6573Sopenharmony_ci                c1.is_word_char() == c2.is_word_char()
195c67d6573Sopenharmony_ci            }
196c67d6573Sopenharmony_ci            WordBoundaryAscii => {
197c67d6573Sopenharmony_ci                let (c1, c2) = (self.previous_char(at), self.next_char(at));
198c67d6573Sopenharmony_ci                c1.is_word_byte() != c2.is_word_byte()
199c67d6573Sopenharmony_ci            }
200c67d6573Sopenharmony_ci            NotWordBoundaryAscii => {
201c67d6573Sopenharmony_ci                let (c1, c2) = (self.previous_char(at), self.next_char(at));
202c67d6573Sopenharmony_ci                c1.is_word_byte() == c2.is_word_byte()
203c67d6573Sopenharmony_ci            }
204c67d6573Sopenharmony_ci        }
205c67d6573Sopenharmony_ci    }
206c67d6573Sopenharmony_ci
207c67d6573Sopenharmony_ci    fn prefix_at(
208c67d6573Sopenharmony_ci        &self,
209c67d6573Sopenharmony_ci        prefixes: &LiteralSearcher,
210c67d6573Sopenharmony_ci        at: InputAt,
211c67d6573Sopenharmony_ci    ) -> Option<InputAt> {
212c67d6573Sopenharmony_ci        prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s))
213c67d6573Sopenharmony_ci    }
214c67d6573Sopenharmony_ci
215c67d6573Sopenharmony_ci    fn len(&self) -> usize {
216c67d6573Sopenharmony_ci        self.0.len()
217c67d6573Sopenharmony_ci    }
218c67d6573Sopenharmony_ci
219c67d6573Sopenharmony_ci    fn as_bytes(&self) -> &[u8] {
220c67d6573Sopenharmony_ci        self.0
221c67d6573Sopenharmony_ci    }
222c67d6573Sopenharmony_ci}
223c67d6573Sopenharmony_ci
224c67d6573Sopenharmony_ci/// An input reader over bytes.
225c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug)]
226c67d6573Sopenharmony_cipub struct ByteInput<'t> {
227c67d6573Sopenharmony_ci    text: &'t [u8],
228c67d6573Sopenharmony_ci    only_utf8: bool,
229c67d6573Sopenharmony_ci}
230c67d6573Sopenharmony_ci
231c67d6573Sopenharmony_ciimpl<'t> ByteInput<'t> {
232c67d6573Sopenharmony_ci    /// Return a new byte-based input reader for the given string.
233c67d6573Sopenharmony_ci    pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> {
234c67d6573Sopenharmony_ci        ByteInput { text, only_utf8 }
235c67d6573Sopenharmony_ci    }
236c67d6573Sopenharmony_ci}
237c67d6573Sopenharmony_ci
238c67d6573Sopenharmony_ciimpl<'t> ops::Deref for ByteInput<'t> {
239c67d6573Sopenharmony_ci    type Target = [u8];
240c67d6573Sopenharmony_ci
241c67d6573Sopenharmony_ci    fn deref(&self) -> &[u8] {
242c67d6573Sopenharmony_ci        self.text
243c67d6573Sopenharmony_ci    }
244c67d6573Sopenharmony_ci}
245c67d6573Sopenharmony_ci
246c67d6573Sopenharmony_ciimpl<'t> Input for ByteInput<'t> {
247c67d6573Sopenharmony_ci    fn at(&self, i: usize) -> InputAt {
248c67d6573Sopenharmony_ci        if i >= self.len() {
249c67d6573Sopenharmony_ci            InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 }
250c67d6573Sopenharmony_ci        } else {
251c67d6573Sopenharmony_ci            InputAt {
252c67d6573Sopenharmony_ci                pos: i,
253c67d6573Sopenharmony_ci                c: None.into(),
254c67d6573Sopenharmony_ci                byte: self.get(i).cloned(),
255c67d6573Sopenharmony_ci                len: 1,
256c67d6573Sopenharmony_ci            }
257c67d6573Sopenharmony_ci        }
258c67d6573Sopenharmony_ci    }
259c67d6573Sopenharmony_ci
260c67d6573Sopenharmony_ci    fn next_char(&self, at: InputAt) -> Char {
261c67d6573Sopenharmony_ci        decode_utf8(&self[at.pos()..]).map(|(c, _)| c).into()
262c67d6573Sopenharmony_ci    }
263c67d6573Sopenharmony_ci
264c67d6573Sopenharmony_ci    fn previous_char(&self, at: InputAt) -> Char {
265c67d6573Sopenharmony_ci        decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into()
266c67d6573Sopenharmony_ci    }
267c67d6573Sopenharmony_ci
268c67d6573Sopenharmony_ci    fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool {
269c67d6573Sopenharmony_ci        use crate::prog::EmptyLook::*;
270c67d6573Sopenharmony_ci        match empty.look {
271c67d6573Sopenharmony_ci            StartLine => {
272c67d6573Sopenharmony_ci                let c = self.previous_char(at);
273c67d6573Sopenharmony_ci                at.pos() == 0 || c == '\n'
274c67d6573Sopenharmony_ci            }
275c67d6573Sopenharmony_ci            EndLine => {
276c67d6573Sopenharmony_ci                let c = self.next_char(at);
277c67d6573Sopenharmony_ci                at.pos() == self.len() || c == '\n'
278c67d6573Sopenharmony_ci            }
279c67d6573Sopenharmony_ci            StartText => at.pos() == 0,
280c67d6573Sopenharmony_ci            EndText => at.pos() == self.len(),
281c67d6573Sopenharmony_ci            WordBoundary => {
282c67d6573Sopenharmony_ci                let (c1, c2) = (self.previous_char(at), self.next_char(at));
283c67d6573Sopenharmony_ci                c1.is_word_char() != c2.is_word_char()
284c67d6573Sopenharmony_ci            }
285c67d6573Sopenharmony_ci            NotWordBoundary => {
286c67d6573Sopenharmony_ci                let (c1, c2) = (self.previous_char(at), self.next_char(at));
287c67d6573Sopenharmony_ci                c1.is_word_char() == c2.is_word_char()
288c67d6573Sopenharmony_ci            }
289c67d6573Sopenharmony_ci            WordBoundaryAscii => {
290c67d6573Sopenharmony_ci                let (c1, c2) = (self.previous_char(at), self.next_char(at));
291c67d6573Sopenharmony_ci                if self.only_utf8 {
292c67d6573Sopenharmony_ci                    // If we must match UTF-8, then we can't match word
293c67d6573Sopenharmony_ci                    // boundaries at invalid UTF-8.
294c67d6573Sopenharmony_ci                    if c1.is_none() && !at.is_start() {
295c67d6573Sopenharmony_ci                        return false;
296c67d6573Sopenharmony_ci                    }
297c67d6573Sopenharmony_ci                    if c2.is_none() && !at.is_end() {
298c67d6573Sopenharmony_ci                        return false;
299c67d6573Sopenharmony_ci                    }
300c67d6573Sopenharmony_ci                }
301c67d6573Sopenharmony_ci                c1.is_word_byte() != c2.is_word_byte()
302c67d6573Sopenharmony_ci            }
303c67d6573Sopenharmony_ci            NotWordBoundaryAscii => {
304c67d6573Sopenharmony_ci                let (c1, c2) = (self.previous_char(at), self.next_char(at));
305c67d6573Sopenharmony_ci                if self.only_utf8 {
306c67d6573Sopenharmony_ci                    // If we must match UTF-8, then we can't match word
307c67d6573Sopenharmony_ci                    // boundaries at invalid UTF-8.
308c67d6573Sopenharmony_ci                    if c1.is_none() && !at.is_start() {
309c67d6573Sopenharmony_ci                        return false;
310c67d6573Sopenharmony_ci                    }
311c67d6573Sopenharmony_ci                    if c2.is_none() && !at.is_end() {
312c67d6573Sopenharmony_ci                        return false;
313c67d6573Sopenharmony_ci                    }
314c67d6573Sopenharmony_ci                }
315c67d6573Sopenharmony_ci                c1.is_word_byte() == c2.is_word_byte()
316c67d6573Sopenharmony_ci            }
317c67d6573Sopenharmony_ci        }
318c67d6573Sopenharmony_ci    }
319c67d6573Sopenharmony_ci
320c67d6573Sopenharmony_ci    fn prefix_at(
321c67d6573Sopenharmony_ci        &self,
322c67d6573Sopenharmony_ci        prefixes: &LiteralSearcher,
323c67d6573Sopenharmony_ci        at: InputAt,
324c67d6573Sopenharmony_ci    ) -> Option<InputAt> {
325c67d6573Sopenharmony_ci        prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s))
326c67d6573Sopenharmony_ci    }
327c67d6573Sopenharmony_ci
328c67d6573Sopenharmony_ci    fn len(&self) -> usize {
329c67d6573Sopenharmony_ci        self.text.len()
330c67d6573Sopenharmony_ci    }
331c67d6573Sopenharmony_ci
332c67d6573Sopenharmony_ci    fn as_bytes(&self) -> &[u8] {
333c67d6573Sopenharmony_ci        self.text
334c67d6573Sopenharmony_ci    }
335c67d6573Sopenharmony_ci}
336c67d6573Sopenharmony_ci
337c67d6573Sopenharmony_ci/// An inline representation of `Option<char>`.
338c67d6573Sopenharmony_ci///
339c67d6573Sopenharmony_ci/// This eliminates the need to do case analysis on `Option<char>` to determine
340c67d6573Sopenharmony_ci/// ordinality with other characters.
341c67d6573Sopenharmony_ci///
342c67d6573Sopenharmony_ci/// (The `Option<char>` is not related to encoding. Instead, it is used in the
343c67d6573Sopenharmony_ci/// matching engines to represent the beginning and ending boundaries of the
344c67d6573Sopenharmony_ci/// search text.)
345c67d6573Sopenharmony_ci#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
346c67d6573Sopenharmony_cipub struct Char(u32);
347c67d6573Sopenharmony_ci
348c67d6573Sopenharmony_ciimpl fmt::Debug for Char {
349c67d6573Sopenharmony_ci    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
350c67d6573Sopenharmony_ci        match char::from_u32(self.0) {
351c67d6573Sopenharmony_ci            None => write!(f, "Empty"),
352c67d6573Sopenharmony_ci            Some(c) => write!(f, "{:?}", c),
353c67d6573Sopenharmony_ci        }
354c67d6573Sopenharmony_ci    }
355c67d6573Sopenharmony_ci}
356c67d6573Sopenharmony_ci
357c67d6573Sopenharmony_ciimpl Char {
358c67d6573Sopenharmony_ci    /// Returns true iff the character is absent.
359c67d6573Sopenharmony_ci    #[inline]
360c67d6573Sopenharmony_ci    pub fn is_none(self) -> bool {
361c67d6573Sopenharmony_ci        self.0 == u32::MAX
362c67d6573Sopenharmony_ci    }
363c67d6573Sopenharmony_ci
364c67d6573Sopenharmony_ci    /// Returns the length of the character's UTF-8 encoding.
365c67d6573Sopenharmony_ci    ///
366c67d6573Sopenharmony_ci    /// If the character is absent, then `1` is returned.
367c67d6573Sopenharmony_ci    #[inline]
368c67d6573Sopenharmony_ci    pub fn len_utf8(self) -> usize {
369c67d6573Sopenharmony_ci        char::from_u32(self.0).map_or(1, |c| c.len_utf8())
370c67d6573Sopenharmony_ci    }
371c67d6573Sopenharmony_ci
372c67d6573Sopenharmony_ci    /// Returns true iff the character is a word character.
373c67d6573Sopenharmony_ci    ///
374c67d6573Sopenharmony_ci    /// If the character is absent, then false is returned.
375c67d6573Sopenharmony_ci    pub fn is_word_char(self) -> bool {
376c67d6573Sopenharmony_ci        // is_word_character can panic if the Unicode data for \w isn't
377c67d6573Sopenharmony_ci        // available. However, our compiler ensures that if a Unicode word
378c67d6573Sopenharmony_ci        // boundary is used, then the data must also be available. If it isn't,
379c67d6573Sopenharmony_ci        // then the compiler returns an error.
380c67d6573Sopenharmony_ci        char::from_u32(self.0).map_or(false, regex_syntax::is_word_character)
381c67d6573Sopenharmony_ci    }
382c67d6573Sopenharmony_ci
383c67d6573Sopenharmony_ci    /// Returns true iff the byte is a word byte.
384c67d6573Sopenharmony_ci    ///
385c67d6573Sopenharmony_ci    /// If the byte is absent, then false is returned.
386c67d6573Sopenharmony_ci    pub fn is_word_byte(self) -> bool {
387c67d6573Sopenharmony_ci        match char::from_u32(self.0) {
388c67d6573Sopenharmony_ci            Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8),
389c67d6573Sopenharmony_ci            None | Some(_) => false,
390c67d6573Sopenharmony_ci        }
391c67d6573Sopenharmony_ci    }
392c67d6573Sopenharmony_ci}
393c67d6573Sopenharmony_ci
394c67d6573Sopenharmony_ciimpl From<char> for Char {
395c67d6573Sopenharmony_ci    fn from(c: char) -> Char {
396c67d6573Sopenharmony_ci        Char(c as u32)
397c67d6573Sopenharmony_ci    }
398c67d6573Sopenharmony_ci}
399c67d6573Sopenharmony_ci
400c67d6573Sopenharmony_ciimpl From<Option<char>> for Char {
401c67d6573Sopenharmony_ci    fn from(c: Option<char>) -> Char {
402c67d6573Sopenharmony_ci        c.map_or(Char(u32::MAX), |c| c.into())
403c67d6573Sopenharmony_ci    }
404c67d6573Sopenharmony_ci}
405c67d6573Sopenharmony_ci
406c67d6573Sopenharmony_ciimpl PartialEq<char> for Char {
407c67d6573Sopenharmony_ci    #[inline]
408c67d6573Sopenharmony_ci    fn eq(&self, other: &char) -> bool {
409c67d6573Sopenharmony_ci        self.0 == *other as u32
410c67d6573Sopenharmony_ci    }
411c67d6573Sopenharmony_ci}
412c67d6573Sopenharmony_ci
413c67d6573Sopenharmony_ciimpl PartialEq<Char> for char {
414c67d6573Sopenharmony_ci    #[inline]
415c67d6573Sopenharmony_ci    fn eq(&self, other: &Char) -> bool {
416c67d6573Sopenharmony_ci        *self as u32 == other.0
417c67d6573Sopenharmony_ci    }
418c67d6573Sopenharmony_ci}
419c67d6573Sopenharmony_ci
420c67d6573Sopenharmony_ciimpl PartialOrd<char> for Char {
421c67d6573Sopenharmony_ci    #[inline]
422c67d6573Sopenharmony_ci    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
423c67d6573Sopenharmony_ci        self.0.partial_cmp(&(*other as u32))
424c67d6573Sopenharmony_ci    }
425c67d6573Sopenharmony_ci}
426c67d6573Sopenharmony_ci
427c67d6573Sopenharmony_ciimpl PartialOrd<Char> for char {
428c67d6573Sopenharmony_ci    #[inline]
429c67d6573Sopenharmony_ci    fn partial_cmp(&self, other: &Char) -> Option<Ordering> {
430c67d6573Sopenharmony_ci        (*self as u32).partial_cmp(&other.0)
431c67d6573Sopenharmony_ci    }
432c67d6573Sopenharmony_ci}
433