1 pub(crate) fn find_words_ascii_space(line: &str) -> impl Iterator<Item = &'_ str> + '_ { 2 let mut start = 0; 3 let mut in_whitespace = false; 4 let mut char_indices = line.char_indices(); 5 6 std::iter::from_fn(move || { 7 for (idx, ch) in char_indices.by_ref() { 8 if in_whitespace && ch != ' ' { 9 let word = &line[start..idx]; 10 start = idx; 11 in_whitespace = ch == ' '; 12 return Some(word); 13 } 14 15 in_whitespace = ch == ' '; 16 } 17 18 if start < line.len() { 19 let word = &line[start..]; 20 start = line.len(); 21 return Some(word); 22 } 23 24 None 25 }) 26 } 27 28 #[cfg(test)] 29 mod tests { 30 use super::*; 31 32 macro_rules! test_find_words { 33 ($ascii_name:ident, 34 $([ $line:expr, $ascii_words:expr ]),+) => { 35 #[test] 36 fn $ascii_name() { 37 $( 38 let expected_words: Vec<&str> = $ascii_words.to_vec(); 39 let actual_words = find_words_ascii_space($line) 40 .collect::<Vec<_>>(); 41 assert_eq!(actual_words, expected_words, "Line: {:?}", $line); 42 )+ 43 } 44 }; 45 } 46 47 test_find_words!(ascii_space_empty, ["", []]); 48 49 test_find_words!(ascii_single_word, ["foo", ["foo"]]); 50 51 test_find_words!(ascii_two_words, ["foo bar", ["foo ", "bar"]]); 52 53 test_find_words!( 54 ascii_multiple_words, 55 ["foo bar", ["foo ", "bar"]], 56 ["x y z", ["x ", "y ", "z"]] 57 ); 58 59 test_find_words!(ascii_only_whitespace, [" ", [" "]], [" ", [" "]]); 60 61 test_find_words!( 62 ascii_inter_word_whitespace, 63 ["foo bar", ["foo ", "bar"]] 64 ); 65 66 test_find_words!(ascii_trailing_whitespace, ["foo ", ["foo "]]); 67 68 test_find_words!(ascii_leading_whitespace, [" foo", [" ", "foo"]]); 69 70 test_find_words!( 71 ascii_multi_column_char, 72 ["\u{1f920}", ["\u{1f920}"]] // cowboy emoji 73 ); 74 75 test_find_words!( 76 ascii_hyphens, 77 ["foo-bar", ["foo-bar"]], 78 ["foo- bar", ["foo- ", "bar"]], 79 ["foo - bar", ["foo ", "- ", "bar"]], 80 ["foo -bar", ["foo ", "-bar"]] 81 ); 82 83 test_find_words!(ascii_newline, ["foo\nbar", ["foo\nbar"]]); 84 85 test_find_words!(ascii_tab, ["foo\tbar", ["foo\tbar"]]); 86 87 test_find_words!( 88 ascii_non_breaking_space, 89 ["foo\u{00A0}bar", ["foo\u{00A0}bar"]] 90 ); 91 } 92