1use std::collections::BTreeSet as Set;
2use std::fs;
3use std::io::{self, Write};
4use std::path::Path;
5use std::process;
6
7pub struct Properties {
8    xid_start: Set<u32>,
9    xid_continue: Set<u32>,
10}
11
12impl Properties {
13    pub fn is_xid_start(&self, ch: char) -> bool {
14        self.xid_start.contains(&(ch as u32))
15    }
16
17    pub fn is_xid_continue(&self, ch: char) -> bool {
18        self.xid_continue.contains(&(ch as u32))
19    }
20}
21
22pub fn parse_xid_properties(ucd_dir: &Path) -> Properties {
23    let mut properties = Properties {
24        xid_start: Set::new(),
25        xid_continue: Set::new(),
26    };
27
28    let filename = "DerivedCoreProperties.txt";
29    let path = ucd_dir.join(filename);
30    let contents = fs::read_to_string(path).unwrap_or_else(|err| {
31        let suggestion =
32            "Download from https://www.unicode.org/Public/zipped/l5.0.0/UCD.zip and unzip.";
33        let _ = writeln!(io::stderr(), "{}: {err}\n{suggestion}", ucd_dir.display());
34        process::exit(1);
35    });
36
37    for (i, line) in contents.lines().enumerate() {
38        if line.starts_with('#') || line.trim().is_empty() {
39            continue;
40        }
41        let (lo, hi, name) = parse_line(line).unwrap_or_else(|| {
42            let _ = writeln!(io::stderr(), "{filename} line {i} is unexpected:\n{line}");
43            process::exit(1);
44        });
45        let set = match name {
46            "XID_Start" => &mut properties.xid_start,
47            "XID_Continue" => &mut properties.xid_continue,
48            _ => continue,
49        };
50        set.extend(lo..=hi);
51    }
52
53    properties
54}
55
56fn parse_line(line: &str) -> Option<(u32, u32, &str)> {
57    let (mut codepoint, rest) = line.split_once(';')?;
58
59    let (lo, hi);
60    codepoint = codepoint.trim();
61    if let Some((a, b)) = codepoint.split_once("..") {
62        lo = parse_codepoint(a)?;
63        hi = parse_codepoint(b)?;
64    } else {
65        lo = parse_codepoint(codepoint)?;
66        hi = lo;
67    }
68
69    let name = rest.trim().split('#').next()?.trim_end();
70    Some((lo, hi, name))
71}
72
73fn parse_codepoint(s: &str) -> Option<u32> {
74    u32::from_str_radix(s, 16).ok()
75}
76