1use std::collections::BTreeSet as Set; 2use std::fs; 3use std::io::{self, Write}; 4use std::path::Path; 5use std::process; 6 7pub struct Properties { 8 xid_start: Set<u32>, 9 xid_continue: Set<u32>, 10} 11 12impl Properties { 13 pub fn is_xid_start(&self, ch: char) -> bool { 14 self.xid_start.contains(&(ch as u32)) 15 } 16 17 pub fn is_xid_continue(&self, ch: char) -> bool { 18 self.xid_continue.contains(&(ch as u32)) 19 } 20} 21 22pub fn parse_xid_properties(ucd_dir: &Path) -> Properties { 23 let mut properties = Properties { 24 xid_start: Set::new(), 25 xid_continue: Set::new(), 26 }; 27 28 let filename = "DerivedCoreProperties.txt"; 29 let path = ucd_dir.join(filename); 30 let contents = fs::read_to_string(path).unwrap_or_else(|err| { 31 let suggestion = 32 "Download from https://www.unicode.org/Public/zipped/l5.0.0/UCD.zip and unzip."; 33 let _ = writeln!(io::stderr(), "{}: {err}\n{suggestion}", ucd_dir.display()); 34 process::exit(1); 35 }); 36 37 for (i, line) in contents.lines().enumerate() { 38 if line.starts_with('#') || line.trim().is_empty() { 39 continue; 40 } 41 let (lo, hi, name) = parse_line(line).unwrap_or_else(|| { 42 let _ = writeln!(io::stderr(), "{filename} line {i} is unexpected:\n{line}"); 43 process::exit(1); 44 }); 45 let set = match name { 46 "XID_Start" => &mut properties.xid_start, 47 "XID_Continue" => &mut properties.xid_continue, 48 _ => continue, 49 }; 50 set.extend(lo..=hi); 51 } 52 53 properties 54} 55 56fn parse_line(line: &str) -> Option<(u32, u32, &str)> { 57 let (mut codepoint, rest) = line.split_once(';')?; 58 59 let (lo, hi); 60 codepoint = codepoint.trim(); 61 if let Some((a, b)) = codepoint.split_once("..") { 62 lo = parse_codepoint(a)?; 63 hi = parse_codepoint(b)?; 64 } else { 65 lo = parse_codepoint(codepoint)?; 66 hi = lo; 67 } 68 69 let name = rest.trim().split('#').next()?.trim_end(); 70 Some((lo, hi, name)) 71} 72 73fn parse_codepoint(s: &str) -> Option<u32> { 74 u32::from_str_radix(s, 16).ok() 75} 76