1c67d6573Sopenharmony_ciuse std::char; 2c67d6573Sopenharmony_ciuse std::cmp::Ordering; 3c67d6573Sopenharmony_ciuse std::fmt; 4c67d6573Sopenharmony_ciuse std::ops; 5c67d6573Sopenharmony_ciuse std::u32; 6c67d6573Sopenharmony_ci 7c67d6573Sopenharmony_ciuse crate::literal::LiteralSearcher; 8c67d6573Sopenharmony_ciuse crate::prog::InstEmptyLook; 9c67d6573Sopenharmony_ciuse crate::utf8::{decode_last_utf8, decode_utf8}; 10c67d6573Sopenharmony_ci 11c67d6573Sopenharmony_ci/// Represents a location in the input. 12c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug)] 13c67d6573Sopenharmony_cipub struct InputAt { 14c67d6573Sopenharmony_ci pos: usize, 15c67d6573Sopenharmony_ci c: Char, 16c67d6573Sopenharmony_ci byte: Option<u8>, 17c67d6573Sopenharmony_ci len: usize, 18c67d6573Sopenharmony_ci} 19c67d6573Sopenharmony_ci 20c67d6573Sopenharmony_ciimpl InputAt { 21c67d6573Sopenharmony_ci /// Returns true iff this position is at the beginning of the input. 22c67d6573Sopenharmony_ci pub fn is_start(&self) -> bool { 23c67d6573Sopenharmony_ci self.pos == 0 24c67d6573Sopenharmony_ci } 25c67d6573Sopenharmony_ci 26c67d6573Sopenharmony_ci /// Returns true iff this position is past the end of the input. 27c67d6573Sopenharmony_ci pub fn is_end(&self) -> bool { 28c67d6573Sopenharmony_ci self.c.is_none() && self.byte.is_none() 29c67d6573Sopenharmony_ci } 30c67d6573Sopenharmony_ci 31c67d6573Sopenharmony_ci /// Returns the character at this position. 32c67d6573Sopenharmony_ci /// 33c67d6573Sopenharmony_ci /// If this position is just before or after the input, then an absent 34c67d6573Sopenharmony_ci /// character is returned. 35c67d6573Sopenharmony_ci pub fn char(&self) -> Char { 36c67d6573Sopenharmony_ci self.c 37c67d6573Sopenharmony_ci } 38c67d6573Sopenharmony_ci 39c67d6573Sopenharmony_ci /// Returns the byte at this position. 40c67d6573Sopenharmony_ci pub fn byte(&self) -> Option<u8> { 41c67d6573Sopenharmony_ci self.byte 42c67d6573Sopenharmony_ci } 43c67d6573Sopenharmony_ci 44c67d6573Sopenharmony_ci /// Returns the UTF-8 width of the character at this position. 45c67d6573Sopenharmony_ci pub fn len(&self) -> usize { 46c67d6573Sopenharmony_ci self.len 47c67d6573Sopenharmony_ci } 48c67d6573Sopenharmony_ci 49c67d6573Sopenharmony_ci /// Returns whether the UTF-8 width of the character at this position 50c67d6573Sopenharmony_ci /// is zero. 51c67d6573Sopenharmony_ci pub fn is_empty(&self) -> bool { 52c67d6573Sopenharmony_ci self.len == 0 53c67d6573Sopenharmony_ci } 54c67d6573Sopenharmony_ci 55c67d6573Sopenharmony_ci /// Returns the byte offset of this position. 56c67d6573Sopenharmony_ci pub fn pos(&self) -> usize { 57c67d6573Sopenharmony_ci self.pos 58c67d6573Sopenharmony_ci } 59c67d6573Sopenharmony_ci 60c67d6573Sopenharmony_ci /// Returns the byte offset of the next position in the input. 61c67d6573Sopenharmony_ci pub fn next_pos(&self) -> usize { 62c67d6573Sopenharmony_ci self.pos + self.len 63c67d6573Sopenharmony_ci } 64c67d6573Sopenharmony_ci} 65c67d6573Sopenharmony_ci 66c67d6573Sopenharmony_ci/// An abstraction over input used in the matching engines. 67c67d6573Sopenharmony_cipub trait Input: fmt::Debug { 68c67d6573Sopenharmony_ci /// Return an encoding of the position at byte offset `i`. 69c67d6573Sopenharmony_ci fn at(&self, i: usize) -> InputAt; 70c67d6573Sopenharmony_ci 71c67d6573Sopenharmony_ci /// Return the Unicode character occurring next to `at`. 72c67d6573Sopenharmony_ci /// 73c67d6573Sopenharmony_ci /// If no such character could be decoded, then `Char` is absent. 74c67d6573Sopenharmony_ci fn next_char(&self, at: InputAt) -> Char; 75c67d6573Sopenharmony_ci 76c67d6573Sopenharmony_ci /// Return the Unicode character occurring previous to `at`. 77c67d6573Sopenharmony_ci /// 78c67d6573Sopenharmony_ci /// If no such character could be decoded, then `Char` is absent. 79c67d6573Sopenharmony_ci fn previous_char(&self, at: InputAt) -> Char; 80c67d6573Sopenharmony_ci 81c67d6573Sopenharmony_ci /// Return true if the given empty width instruction matches at the 82c67d6573Sopenharmony_ci /// input position given. 83c67d6573Sopenharmony_ci fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool; 84c67d6573Sopenharmony_ci 85c67d6573Sopenharmony_ci /// Scan the input for a matching prefix. 86c67d6573Sopenharmony_ci fn prefix_at( 87c67d6573Sopenharmony_ci &self, 88c67d6573Sopenharmony_ci prefixes: &LiteralSearcher, 89c67d6573Sopenharmony_ci at: InputAt, 90c67d6573Sopenharmony_ci ) -> Option<InputAt>; 91c67d6573Sopenharmony_ci 92c67d6573Sopenharmony_ci /// The number of bytes in the input. 93c67d6573Sopenharmony_ci fn len(&self) -> usize; 94c67d6573Sopenharmony_ci 95c67d6573Sopenharmony_ci /// Whether the input is empty. 96c67d6573Sopenharmony_ci fn is_empty(&self) -> bool { 97c67d6573Sopenharmony_ci self.len() == 0 98c67d6573Sopenharmony_ci } 99c67d6573Sopenharmony_ci 100c67d6573Sopenharmony_ci /// Return the given input as a sequence of bytes. 101c67d6573Sopenharmony_ci fn as_bytes(&self) -> &[u8]; 102c67d6573Sopenharmony_ci} 103c67d6573Sopenharmony_ci 104c67d6573Sopenharmony_ciimpl<'a, T: Input> Input for &'a T { 105c67d6573Sopenharmony_ci fn at(&self, i: usize) -> InputAt { 106c67d6573Sopenharmony_ci (**self).at(i) 107c67d6573Sopenharmony_ci } 108c67d6573Sopenharmony_ci 109c67d6573Sopenharmony_ci fn next_char(&self, at: InputAt) -> Char { 110c67d6573Sopenharmony_ci (**self).next_char(at) 111c67d6573Sopenharmony_ci } 112c67d6573Sopenharmony_ci 113c67d6573Sopenharmony_ci fn previous_char(&self, at: InputAt) -> Char { 114c67d6573Sopenharmony_ci (**self).previous_char(at) 115c67d6573Sopenharmony_ci } 116c67d6573Sopenharmony_ci 117c67d6573Sopenharmony_ci fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { 118c67d6573Sopenharmony_ci (**self).is_empty_match(at, empty) 119c67d6573Sopenharmony_ci } 120c67d6573Sopenharmony_ci 121c67d6573Sopenharmony_ci fn prefix_at( 122c67d6573Sopenharmony_ci &self, 123c67d6573Sopenharmony_ci prefixes: &LiteralSearcher, 124c67d6573Sopenharmony_ci at: InputAt, 125c67d6573Sopenharmony_ci ) -> Option<InputAt> { 126c67d6573Sopenharmony_ci (**self).prefix_at(prefixes, at) 127c67d6573Sopenharmony_ci } 128c67d6573Sopenharmony_ci 129c67d6573Sopenharmony_ci fn len(&self) -> usize { 130c67d6573Sopenharmony_ci (**self).len() 131c67d6573Sopenharmony_ci } 132c67d6573Sopenharmony_ci 133c67d6573Sopenharmony_ci fn as_bytes(&self) -> &[u8] { 134c67d6573Sopenharmony_ci (**self).as_bytes() 135c67d6573Sopenharmony_ci } 136c67d6573Sopenharmony_ci} 137c67d6573Sopenharmony_ci 138c67d6573Sopenharmony_ci/// An input reader over characters. 139c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug)] 140c67d6573Sopenharmony_cipub struct CharInput<'t>(&'t [u8]); 141c67d6573Sopenharmony_ci 142c67d6573Sopenharmony_ciimpl<'t> CharInput<'t> { 143c67d6573Sopenharmony_ci /// Return a new character input reader for the given string. 144c67d6573Sopenharmony_ci pub fn new(s: &'t [u8]) -> CharInput<'t> { 145c67d6573Sopenharmony_ci CharInput(s) 146c67d6573Sopenharmony_ci } 147c67d6573Sopenharmony_ci} 148c67d6573Sopenharmony_ci 149c67d6573Sopenharmony_ciimpl<'t> ops::Deref for CharInput<'t> { 150c67d6573Sopenharmony_ci type Target = [u8]; 151c67d6573Sopenharmony_ci 152c67d6573Sopenharmony_ci fn deref(&self) -> &[u8] { 153c67d6573Sopenharmony_ci self.0 154c67d6573Sopenharmony_ci } 155c67d6573Sopenharmony_ci} 156c67d6573Sopenharmony_ci 157c67d6573Sopenharmony_ciimpl<'t> Input for CharInput<'t> { 158c67d6573Sopenharmony_ci fn at(&self, i: usize) -> InputAt { 159c67d6573Sopenharmony_ci if i >= self.len() { 160c67d6573Sopenharmony_ci InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } 161c67d6573Sopenharmony_ci } else { 162c67d6573Sopenharmony_ci let c = decode_utf8(&self[i..]).map(|(c, _)| c).into(); 163c67d6573Sopenharmony_ci InputAt { pos: i, c, byte: None, len: c.len_utf8() } 164c67d6573Sopenharmony_ci } 165c67d6573Sopenharmony_ci } 166c67d6573Sopenharmony_ci 167c67d6573Sopenharmony_ci fn next_char(&self, at: InputAt) -> Char { 168c67d6573Sopenharmony_ci at.char() 169c67d6573Sopenharmony_ci } 170c67d6573Sopenharmony_ci 171c67d6573Sopenharmony_ci fn previous_char(&self, at: InputAt) -> Char { 172c67d6573Sopenharmony_ci decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() 173c67d6573Sopenharmony_ci } 174c67d6573Sopenharmony_ci 175c67d6573Sopenharmony_ci fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { 176c67d6573Sopenharmony_ci use crate::prog::EmptyLook::*; 177c67d6573Sopenharmony_ci match empty.look { 178c67d6573Sopenharmony_ci StartLine => { 179c67d6573Sopenharmony_ci let c = self.previous_char(at); 180c67d6573Sopenharmony_ci at.pos() == 0 || c == '\n' 181c67d6573Sopenharmony_ci } 182c67d6573Sopenharmony_ci EndLine => { 183c67d6573Sopenharmony_ci let c = self.next_char(at); 184c67d6573Sopenharmony_ci at.pos() == self.len() || c == '\n' 185c67d6573Sopenharmony_ci } 186c67d6573Sopenharmony_ci StartText => at.pos() == 0, 187c67d6573Sopenharmony_ci EndText => at.pos() == self.len(), 188c67d6573Sopenharmony_ci WordBoundary => { 189c67d6573Sopenharmony_ci let (c1, c2) = (self.previous_char(at), self.next_char(at)); 190c67d6573Sopenharmony_ci c1.is_word_char() != c2.is_word_char() 191c67d6573Sopenharmony_ci } 192c67d6573Sopenharmony_ci NotWordBoundary => { 193c67d6573Sopenharmony_ci let (c1, c2) = (self.previous_char(at), self.next_char(at)); 194c67d6573Sopenharmony_ci c1.is_word_char() == c2.is_word_char() 195c67d6573Sopenharmony_ci } 196c67d6573Sopenharmony_ci WordBoundaryAscii => { 197c67d6573Sopenharmony_ci let (c1, c2) = (self.previous_char(at), self.next_char(at)); 198c67d6573Sopenharmony_ci c1.is_word_byte() != c2.is_word_byte() 199c67d6573Sopenharmony_ci } 200c67d6573Sopenharmony_ci NotWordBoundaryAscii => { 201c67d6573Sopenharmony_ci let (c1, c2) = (self.previous_char(at), self.next_char(at)); 202c67d6573Sopenharmony_ci c1.is_word_byte() == c2.is_word_byte() 203c67d6573Sopenharmony_ci } 204c67d6573Sopenharmony_ci } 205c67d6573Sopenharmony_ci } 206c67d6573Sopenharmony_ci 207c67d6573Sopenharmony_ci fn prefix_at( 208c67d6573Sopenharmony_ci &self, 209c67d6573Sopenharmony_ci prefixes: &LiteralSearcher, 210c67d6573Sopenharmony_ci at: InputAt, 211c67d6573Sopenharmony_ci ) -> Option<InputAt> { 212c67d6573Sopenharmony_ci prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) 213c67d6573Sopenharmony_ci } 214c67d6573Sopenharmony_ci 215c67d6573Sopenharmony_ci fn len(&self) -> usize { 216c67d6573Sopenharmony_ci self.0.len() 217c67d6573Sopenharmony_ci } 218c67d6573Sopenharmony_ci 219c67d6573Sopenharmony_ci fn as_bytes(&self) -> &[u8] { 220c67d6573Sopenharmony_ci self.0 221c67d6573Sopenharmony_ci } 222c67d6573Sopenharmony_ci} 223c67d6573Sopenharmony_ci 224c67d6573Sopenharmony_ci/// An input reader over bytes. 225c67d6573Sopenharmony_ci#[derive(Clone, Copy, Debug)] 226c67d6573Sopenharmony_cipub struct ByteInput<'t> { 227c67d6573Sopenharmony_ci text: &'t [u8], 228c67d6573Sopenharmony_ci only_utf8: bool, 229c67d6573Sopenharmony_ci} 230c67d6573Sopenharmony_ci 231c67d6573Sopenharmony_ciimpl<'t> ByteInput<'t> { 232c67d6573Sopenharmony_ci /// Return a new byte-based input reader for the given string. 233c67d6573Sopenharmony_ci pub fn new(text: &'t [u8], only_utf8: bool) -> ByteInput<'t> { 234c67d6573Sopenharmony_ci ByteInput { text, only_utf8 } 235c67d6573Sopenharmony_ci } 236c67d6573Sopenharmony_ci} 237c67d6573Sopenharmony_ci 238c67d6573Sopenharmony_ciimpl<'t> ops::Deref for ByteInput<'t> { 239c67d6573Sopenharmony_ci type Target = [u8]; 240c67d6573Sopenharmony_ci 241c67d6573Sopenharmony_ci fn deref(&self) -> &[u8] { 242c67d6573Sopenharmony_ci self.text 243c67d6573Sopenharmony_ci } 244c67d6573Sopenharmony_ci} 245c67d6573Sopenharmony_ci 246c67d6573Sopenharmony_ciimpl<'t> Input for ByteInput<'t> { 247c67d6573Sopenharmony_ci fn at(&self, i: usize) -> InputAt { 248c67d6573Sopenharmony_ci if i >= self.len() { 249c67d6573Sopenharmony_ci InputAt { pos: self.len(), c: None.into(), byte: None, len: 0 } 250c67d6573Sopenharmony_ci } else { 251c67d6573Sopenharmony_ci InputAt { 252c67d6573Sopenharmony_ci pos: i, 253c67d6573Sopenharmony_ci c: None.into(), 254c67d6573Sopenharmony_ci byte: self.get(i).cloned(), 255c67d6573Sopenharmony_ci len: 1, 256c67d6573Sopenharmony_ci } 257c67d6573Sopenharmony_ci } 258c67d6573Sopenharmony_ci } 259c67d6573Sopenharmony_ci 260c67d6573Sopenharmony_ci fn next_char(&self, at: InputAt) -> Char { 261c67d6573Sopenharmony_ci decode_utf8(&self[at.pos()..]).map(|(c, _)| c).into() 262c67d6573Sopenharmony_ci } 263c67d6573Sopenharmony_ci 264c67d6573Sopenharmony_ci fn previous_char(&self, at: InputAt) -> Char { 265c67d6573Sopenharmony_ci decode_last_utf8(&self[..at.pos()]).map(|(c, _)| c).into() 266c67d6573Sopenharmony_ci } 267c67d6573Sopenharmony_ci 268c67d6573Sopenharmony_ci fn is_empty_match(&self, at: InputAt, empty: &InstEmptyLook) -> bool { 269c67d6573Sopenharmony_ci use crate::prog::EmptyLook::*; 270c67d6573Sopenharmony_ci match empty.look { 271c67d6573Sopenharmony_ci StartLine => { 272c67d6573Sopenharmony_ci let c = self.previous_char(at); 273c67d6573Sopenharmony_ci at.pos() == 0 || c == '\n' 274c67d6573Sopenharmony_ci } 275c67d6573Sopenharmony_ci EndLine => { 276c67d6573Sopenharmony_ci let c = self.next_char(at); 277c67d6573Sopenharmony_ci at.pos() == self.len() || c == '\n' 278c67d6573Sopenharmony_ci } 279c67d6573Sopenharmony_ci StartText => at.pos() == 0, 280c67d6573Sopenharmony_ci EndText => at.pos() == self.len(), 281c67d6573Sopenharmony_ci WordBoundary => { 282c67d6573Sopenharmony_ci let (c1, c2) = (self.previous_char(at), self.next_char(at)); 283c67d6573Sopenharmony_ci c1.is_word_char() != c2.is_word_char() 284c67d6573Sopenharmony_ci } 285c67d6573Sopenharmony_ci NotWordBoundary => { 286c67d6573Sopenharmony_ci let (c1, c2) = (self.previous_char(at), self.next_char(at)); 287c67d6573Sopenharmony_ci c1.is_word_char() == c2.is_word_char() 288c67d6573Sopenharmony_ci } 289c67d6573Sopenharmony_ci WordBoundaryAscii => { 290c67d6573Sopenharmony_ci let (c1, c2) = (self.previous_char(at), self.next_char(at)); 291c67d6573Sopenharmony_ci if self.only_utf8 { 292c67d6573Sopenharmony_ci // If we must match UTF-8, then we can't match word 293c67d6573Sopenharmony_ci // boundaries at invalid UTF-8. 294c67d6573Sopenharmony_ci if c1.is_none() && !at.is_start() { 295c67d6573Sopenharmony_ci return false; 296c67d6573Sopenharmony_ci } 297c67d6573Sopenharmony_ci if c2.is_none() && !at.is_end() { 298c67d6573Sopenharmony_ci return false; 299c67d6573Sopenharmony_ci } 300c67d6573Sopenharmony_ci } 301c67d6573Sopenharmony_ci c1.is_word_byte() != c2.is_word_byte() 302c67d6573Sopenharmony_ci } 303c67d6573Sopenharmony_ci NotWordBoundaryAscii => { 304c67d6573Sopenharmony_ci let (c1, c2) = (self.previous_char(at), self.next_char(at)); 305c67d6573Sopenharmony_ci if self.only_utf8 { 306c67d6573Sopenharmony_ci // If we must match UTF-8, then we can't match word 307c67d6573Sopenharmony_ci // boundaries at invalid UTF-8. 308c67d6573Sopenharmony_ci if c1.is_none() && !at.is_start() { 309c67d6573Sopenharmony_ci return false; 310c67d6573Sopenharmony_ci } 311c67d6573Sopenharmony_ci if c2.is_none() && !at.is_end() { 312c67d6573Sopenharmony_ci return false; 313c67d6573Sopenharmony_ci } 314c67d6573Sopenharmony_ci } 315c67d6573Sopenharmony_ci c1.is_word_byte() == c2.is_word_byte() 316c67d6573Sopenharmony_ci } 317c67d6573Sopenharmony_ci } 318c67d6573Sopenharmony_ci } 319c67d6573Sopenharmony_ci 320c67d6573Sopenharmony_ci fn prefix_at( 321c67d6573Sopenharmony_ci &self, 322c67d6573Sopenharmony_ci prefixes: &LiteralSearcher, 323c67d6573Sopenharmony_ci at: InputAt, 324c67d6573Sopenharmony_ci ) -> Option<InputAt> { 325c67d6573Sopenharmony_ci prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s)) 326c67d6573Sopenharmony_ci } 327c67d6573Sopenharmony_ci 328c67d6573Sopenharmony_ci fn len(&self) -> usize { 329c67d6573Sopenharmony_ci self.text.len() 330c67d6573Sopenharmony_ci } 331c67d6573Sopenharmony_ci 332c67d6573Sopenharmony_ci fn as_bytes(&self) -> &[u8] { 333c67d6573Sopenharmony_ci self.text 334c67d6573Sopenharmony_ci } 335c67d6573Sopenharmony_ci} 336c67d6573Sopenharmony_ci 337c67d6573Sopenharmony_ci/// An inline representation of `Option<char>`. 338c67d6573Sopenharmony_ci/// 339c67d6573Sopenharmony_ci/// This eliminates the need to do case analysis on `Option<char>` to determine 340c67d6573Sopenharmony_ci/// ordinality with other characters. 341c67d6573Sopenharmony_ci/// 342c67d6573Sopenharmony_ci/// (The `Option<char>` is not related to encoding. Instead, it is used in the 343c67d6573Sopenharmony_ci/// matching engines to represent the beginning and ending boundaries of the 344c67d6573Sopenharmony_ci/// search text.) 345c67d6573Sopenharmony_ci#[derive(Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] 346c67d6573Sopenharmony_cipub struct Char(u32); 347c67d6573Sopenharmony_ci 348c67d6573Sopenharmony_ciimpl fmt::Debug for Char { 349c67d6573Sopenharmony_ci fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 350c67d6573Sopenharmony_ci match char::from_u32(self.0) { 351c67d6573Sopenharmony_ci None => write!(f, "Empty"), 352c67d6573Sopenharmony_ci Some(c) => write!(f, "{:?}", c), 353c67d6573Sopenharmony_ci } 354c67d6573Sopenharmony_ci } 355c67d6573Sopenharmony_ci} 356c67d6573Sopenharmony_ci 357c67d6573Sopenharmony_ciimpl Char { 358c67d6573Sopenharmony_ci /// Returns true iff the character is absent. 359c67d6573Sopenharmony_ci #[inline] 360c67d6573Sopenharmony_ci pub fn is_none(self) -> bool { 361c67d6573Sopenharmony_ci self.0 == u32::MAX 362c67d6573Sopenharmony_ci } 363c67d6573Sopenharmony_ci 364c67d6573Sopenharmony_ci /// Returns the length of the character's UTF-8 encoding. 365c67d6573Sopenharmony_ci /// 366c67d6573Sopenharmony_ci /// If the character is absent, then `1` is returned. 367c67d6573Sopenharmony_ci #[inline] 368c67d6573Sopenharmony_ci pub fn len_utf8(self) -> usize { 369c67d6573Sopenharmony_ci char::from_u32(self.0).map_or(1, |c| c.len_utf8()) 370c67d6573Sopenharmony_ci } 371c67d6573Sopenharmony_ci 372c67d6573Sopenharmony_ci /// Returns true iff the character is a word character. 373c67d6573Sopenharmony_ci /// 374c67d6573Sopenharmony_ci /// If the character is absent, then false is returned. 375c67d6573Sopenharmony_ci pub fn is_word_char(self) -> bool { 376c67d6573Sopenharmony_ci // is_word_character can panic if the Unicode data for \w isn't 377c67d6573Sopenharmony_ci // available. However, our compiler ensures that if a Unicode word 378c67d6573Sopenharmony_ci // boundary is used, then the data must also be available. If it isn't, 379c67d6573Sopenharmony_ci // then the compiler returns an error. 380c67d6573Sopenharmony_ci char::from_u32(self.0).map_or(false, regex_syntax::is_word_character) 381c67d6573Sopenharmony_ci } 382c67d6573Sopenharmony_ci 383c67d6573Sopenharmony_ci /// Returns true iff the byte is a word byte. 384c67d6573Sopenharmony_ci /// 385c67d6573Sopenharmony_ci /// If the byte is absent, then false is returned. 386c67d6573Sopenharmony_ci pub fn is_word_byte(self) -> bool { 387c67d6573Sopenharmony_ci match char::from_u32(self.0) { 388c67d6573Sopenharmony_ci Some(c) if c <= '\u{7F}' => regex_syntax::is_word_byte(c as u8), 389c67d6573Sopenharmony_ci None | Some(_) => false, 390c67d6573Sopenharmony_ci } 391c67d6573Sopenharmony_ci } 392c67d6573Sopenharmony_ci} 393c67d6573Sopenharmony_ci 394c67d6573Sopenharmony_ciimpl From<char> for Char { 395c67d6573Sopenharmony_ci fn from(c: char) -> Char { 396c67d6573Sopenharmony_ci Char(c as u32) 397c67d6573Sopenharmony_ci } 398c67d6573Sopenharmony_ci} 399c67d6573Sopenharmony_ci 400c67d6573Sopenharmony_ciimpl From<Option<char>> for Char { 401c67d6573Sopenharmony_ci fn from(c: Option<char>) -> Char { 402c67d6573Sopenharmony_ci c.map_or(Char(u32::MAX), |c| c.into()) 403c67d6573Sopenharmony_ci } 404c67d6573Sopenharmony_ci} 405c67d6573Sopenharmony_ci 406c67d6573Sopenharmony_ciimpl PartialEq<char> for Char { 407c67d6573Sopenharmony_ci #[inline] 408c67d6573Sopenharmony_ci fn eq(&self, other: &char) -> bool { 409c67d6573Sopenharmony_ci self.0 == *other as u32 410c67d6573Sopenharmony_ci } 411c67d6573Sopenharmony_ci} 412c67d6573Sopenharmony_ci 413c67d6573Sopenharmony_ciimpl PartialEq<Char> for char { 414c67d6573Sopenharmony_ci #[inline] 415c67d6573Sopenharmony_ci fn eq(&self, other: &Char) -> bool { 416c67d6573Sopenharmony_ci *self as u32 == other.0 417c67d6573Sopenharmony_ci } 418c67d6573Sopenharmony_ci} 419c67d6573Sopenharmony_ci 420c67d6573Sopenharmony_ciimpl PartialOrd<char> for Char { 421c67d6573Sopenharmony_ci #[inline] 422c67d6573Sopenharmony_ci fn partial_cmp(&self, other: &char) -> Option<Ordering> { 423c67d6573Sopenharmony_ci self.0.partial_cmp(&(*other as u32)) 424c67d6573Sopenharmony_ci } 425c67d6573Sopenharmony_ci} 426c67d6573Sopenharmony_ci 427c67d6573Sopenharmony_ciimpl PartialOrd<Char> for char { 428c67d6573Sopenharmony_ci #[inline] 429c67d6573Sopenharmony_ci fn partial_cmp(&self, other: &Char) -> Option<Ordering> { 430c67d6573Sopenharmony_ci (*self as u32).partial_cmp(&other.0) 431c67d6573Sopenharmony_ci } 432c67d6573Sopenharmony_ci} 433