182e69de5Sopenharmony_ci//! This library implements string similarity metrics.
282e69de5Sopenharmony_ci
382e69de5Sopenharmony_ci#![forbid(unsafe_code)]
482e69de5Sopenharmony_ci
582e69de5Sopenharmony_ciuse std::char;
682e69de5Sopenharmony_ciuse std::cmp::{max, min};
782e69de5Sopenharmony_ciuse std::collections::HashMap;
882e69de5Sopenharmony_ciuse std::error::Error;
982e69de5Sopenharmony_ciuse std::fmt::{self, Display, Formatter};
1082e69de5Sopenharmony_ciuse std::hash::Hash;
1182e69de5Sopenharmony_ciuse std::str::Chars;
1282e69de5Sopenharmony_ci
1382e69de5Sopenharmony_ci#[derive(Debug, PartialEq)]
1482e69de5Sopenharmony_cipub enum StrSimError {
1582e69de5Sopenharmony_ci    DifferentLengthArgs,
1682e69de5Sopenharmony_ci}
1782e69de5Sopenharmony_ci
1882e69de5Sopenharmony_ciimpl Display for StrSimError {
1982e69de5Sopenharmony_ci    fn fmt(&self, fmt: &mut Formatter) -> Result<(), fmt::Error> {
2082e69de5Sopenharmony_ci        let text = match self {
2182e69de5Sopenharmony_ci            StrSimError::DifferentLengthArgs => "Differing length arguments provided",
2282e69de5Sopenharmony_ci        };
2382e69de5Sopenharmony_ci
2482e69de5Sopenharmony_ci        write!(fmt, "{}", text)
2582e69de5Sopenharmony_ci    }
2682e69de5Sopenharmony_ci}
2782e69de5Sopenharmony_ci
2882e69de5Sopenharmony_ciimpl Error for StrSimError {}
2982e69de5Sopenharmony_ci
3082e69de5Sopenharmony_cipub type HammingResult = Result<usize, StrSimError>;
3182e69de5Sopenharmony_ci
3282e69de5Sopenharmony_ci/// Calculates the number of positions in the two sequences where the elements
3382e69de5Sopenharmony_ci/// differ. Returns an error if the sequences have different lengths.
3482e69de5Sopenharmony_cipub fn generic_hamming<Iter1, Iter2, Elem1, Elem2>(a: Iter1, b: Iter2) -> HammingResult
3582e69de5Sopenharmony_ci    where Iter1: IntoIterator<Item=Elem1>,
3682e69de5Sopenharmony_ci          Iter2: IntoIterator<Item=Elem2>,
3782e69de5Sopenharmony_ci          Elem1: PartialEq<Elem2> {
3882e69de5Sopenharmony_ci    let (mut ita, mut itb) = (a.into_iter(), b.into_iter());
3982e69de5Sopenharmony_ci    let mut count = 0;
4082e69de5Sopenharmony_ci    loop {
4182e69de5Sopenharmony_ci        match (ita.next(), itb.next()){
4282e69de5Sopenharmony_ci            (Some(x), Some(y)) => if x != y { count += 1 },
4382e69de5Sopenharmony_ci            (None, None) => return Ok(count),
4482e69de5Sopenharmony_ci            _ => return Err(StrSimError::DifferentLengthArgs),
4582e69de5Sopenharmony_ci        }
4682e69de5Sopenharmony_ci    }
4782e69de5Sopenharmony_ci}
4882e69de5Sopenharmony_ci
4982e69de5Sopenharmony_ci/// Calculates the number of positions in the two strings where the characters
5082e69de5Sopenharmony_ci/// differ. Returns an error if the strings have different lengths.
5182e69de5Sopenharmony_ci///
5282e69de5Sopenharmony_ci/// ```
5382e69de5Sopenharmony_ci/// use strsim::{hamming, StrSimError::DifferentLengthArgs};
5482e69de5Sopenharmony_ci///
5582e69de5Sopenharmony_ci/// assert_eq!(Ok(3), hamming("hamming", "hammers"));
5682e69de5Sopenharmony_ci///
5782e69de5Sopenharmony_ci/// assert_eq!(Err(DifferentLengthArgs), hamming("hamming", "ham"));
5882e69de5Sopenharmony_ci/// ```
5982e69de5Sopenharmony_cipub fn hamming(a: &str, b: &str) -> HammingResult {
6082e69de5Sopenharmony_ci    generic_hamming(a.chars(), b.chars())
6182e69de5Sopenharmony_ci}
6282e69de5Sopenharmony_ci
6382e69de5Sopenharmony_ci/// Calculates the Jaro similarity between two sequences. The returned value
6482e69de5Sopenharmony_ci/// is between 0.0 and 1.0 (higher value means more similar).
6582e69de5Sopenharmony_cipub fn generic_jaro<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64
6682e69de5Sopenharmony_ci    where &'a Iter1: IntoIterator<Item=Elem1>,
6782e69de5Sopenharmony_ci          &'b Iter2: IntoIterator<Item=Elem2>,
6882e69de5Sopenharmony_ci          Elem1: PartialEq<Elem2> {
6982e69de5Sopenharmony_ci    let a_len = a.into_iter().count();
7082e69de5Sopenharmony_ci    let b_len = b.into_iter().count();
7182e69de5Sopenharmony_ci
7282e69de5Sopenharmony_ci    // The check for lengths of one here is to prevent integer overflow when
7382e69de5Sopenharmony_ci    // calculating the search range.
7482e69de5Sopenharmony_ci    if a_len == 0 && b_len == 0 {
7582e69de5Sopenharmony_ci        return 1.0;
7682e69de5Sopenharmony_ci    } else if a_len == 0 || b_len == 0 {
7782e69de5Sopenharmony_ci        return 0.0;
7882e69de5Sopenharmony_ci    } else if a_len == 1 && b_len == 1 {
7982e69de5Sopenharmony_ci        return if a.into_iter().eq(b.into_iter()) { 1.0} else { 0.0 };
8082e69de5Sopenharmony_ci    }
8182e69de5Sopenharmony_ci
8282e69de5Sopenharmony_ci    let search_range = (max(a_len, b_len) / 2) - 1;
8382e69de5Sopenharmony_ci
8482e69de5Sopenharmony_ci    let mut b_consumed = Vec::with_capacity(b_len);
8582e69de5Sopenharmony_ci    for _ in 0..b_len {
8682e69de5Sopenharmony_ci        b_consumed.push(false);
8782e69de5Sopenharmony_ci    }
8882e69de5Sopenharmony_ci    let mut matches = 0.0;
8982e69de5Sopenharmony_ci
9082e69de5Sopenharmony_ci    let mut transpositions = 0.0;
9182e69de5Sopenharmony_ci    let mut b_match_index = 0;
9282e69de5Sopenharmony_ci
9382e69de5Sopenharmony_ci    for (i, a_elem) in a.into_iter().enumerate() {
9482e69de5Sopenharmony_ci        let min_bound =
9582e69de5Sopenharmony_ci            // prevent integer wrapping
9682e69de5Sopenharmony_ci            if i > search_range {
9782e69de5Sopenharmony_ci                max(0, i - search_range)
9882e69de5Sopenharmony_ci            } else {
9982e69de5Sopenharmony_ci                0
10082e69de5Sopenharmony_ci            };
10182e69de5Sopenharmony_ci
10282e69de5Sopenharmony_ci        let max_bound = min(b_len - 1, i + search_range);
10382e69de5Sopenharmony_ci
10482e69de5Sopenharmony_ci        if min_bound > max_bound {
10582e69de5Sopenharmony_ci            continue;
10682e69de5Sopenharmony_ci        }
10782e69de5Sopenharmony_ci
10882e69de5Sopenharmony_ci        for (j, b_elem) in b.into_iter().enumerate() {
10982e69de5Sopenharmony_ci            if min_bound <= j && j <= max_bound && a_elem == b_elem &&
11082e69de5Sopenharmony_ci                !b_consumed[j] {
11182e69de5Sopenharmony_ci                b_consumed[j] = true;
11282e69de5Sopenharmony_ci                matches += 1.0;
11382e69de5Sopenharmony_ci
11482e69de5Sopenharmony_ci                if j < b_match_index {
11582e69de5Sopenharmony_ci                    transpositions += 1.0;
11682e69de5Sopenharmony_ci                }
11782e69de5Sopenharmony_ci                b_match_index = j;
11882e69de5Sopenharmony_ci
11982e69de5Sopenharmony_ci                break;
12082e69de5Sopenharmony_ci            }
12182e69de5Sopenharmony_ci        }
12282e69de5Sopenharmony_ci    }
12382e69de5Sopenharmony_ci
12482e69de5Sopenharmony_ci    if matches == 0.0 {
12582e69de5Sopenharmony_ci        0.0
12682e69de5Sopenharmony_ci    } else {
12782e69de5Sopenharmony_ci        (1.0 / 3.0) * ((matches / a_len as f64) +
12882e69de5Sopenharmony_ci            (matches / b_len as f64) +
12982e69de5Sopenharmony_ci            ((matches - transpositions) / matches))
13082e69de5Sopenharmony_ci    }
13182e69de5Sopenharmony_ci}
13282e69de5Sopenharmony_ci
13382e69de5Sopenharmony_cistruct StringWrapper<'a>(&'a str);
13482e69de5Sopenharmony_ci
13582e69de5Sopenharmony_ciimpl<'a, 'b> IntoIterator for &'a StringWrapper<'b> {
13682e69de5Sopenharmony_ci    type Item = char;
13782e69de5Sopenharmony_ci    type IntoIter = Chars<'b>;
13882e69de5Sopenharmony_ci
13982e69de5Sopenharmony_ci    fn into_iter(self) -> Self::IntoIter {
14082e69de5Sopenharmony_ci        self.0.chars()
14182e69de5Sopenharmony_ci    }
14282e69de5Sopenharmony_ci}
14382e69de5Sopenharmony_ci
14482e69de5Sopenharmony_ci/// Calculates the Jaro similarity between two strings. The returned value
14582e69de5Sopenharmony_ci/// is between 0.0 and 1.0 (higher value means more similar).
14682e69de5Sopenharmony_ci///
14782e69de5Sopenharmony_ci/// ```
14882e69de5Sopenharmony_ci/// use strsim::jaro;
14982e69de5Sopenharmony_ci///
15082e69de5Sopenharmony_ci/// assert!((0.392 - jaro("Friedrich Nietzsche", "Jean-Paul Sartre")).abs() <
15182e69de5Sopenharmony_ci///         0.001);
15282e69de5Sopenharmony_ci/// ```
15382e69de5Sopenharmony_cipub fn jaro(a: &str, b: &str) -> f64 {
15482e69de5Sopenharmony_ci    generic_jaro(&StringWrapper(a), &StringWrapper(b))
15582e69de5Sopenharmony_ci}
15682e69de5Sopenharmony_ci
15782e69de5Sopenharmony_ci/// Like Jaro but gives a boost to sequences that have a common prefix.
15882e69de5Sopenharmony_cipub fn generic_jaro_winkler<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> f64
15982e69de5Sopenharmony_ci    where &'a Iter1: IntoIterator<Item=Elem1>,
16082e69de5Sopenharmony_ci          &'b Iter2: IntoIterator<Item=Elem2>,
16182e69de5Sopenharmony_ci          Elem1: PartialEq<Elem2> {
16282e69de5Sopenharmony_ci    let jaro_distance = generic_jaro(a, b);
16382e69de5Sopenharmony_ci
16482e69de5Sopenharmony_ci    // Don't limit the length of the common prefix
16582e69de5Sopenharmony_ci    let prefix_length = a.into_iter()
16682e69de5Sopenharmony_ci        .zip(b.into_iter())
16782e69de5Sopenharmony_ci        .take_while(|&(ref a_elem, ref b_elem)| a_elem == b_elem)
16882e69de5Sopenharmony_ci        .count();
16982e69de5Sopenharmony_ci
17082e69de5Sopenharmony_ci    let jaro_winkler_distance =
17182e69de5Sopenharmony_ci        jaro_distance + (0.1 * prefix_length as f64 * (1.0 - jaro_distance));
17282e69de5Sopenharmony_ci
17382e69de5Sopenharmony_ci    if jaro_winkler_distance <= 1.0 {
17482e69de5Sopenharmony_ci        jaro_winkler_distance
17582e69de5Sopenharmony_ci    } else {
17682e69de5Sopenharmony_ci        1.0
17782e69de5Sopenharmony_ci    }
17882e69de5Sopenharmony_ci}
17982e69de5Sopenharmony_ci
18082e69de5Sopenharmony_ci/// Like Jaro but gives a boost to strings that have a common prefix.
18182e69de5Sopenharmony_ci///
18282e69de5Sopenharmony_ci/// ```
18382e69de5Sopenharmony_ci/// use strsim::jaro_winkler;
18482e69de5Sopenharmony_ci///
18582e69de5Sopenharmony_ci/// assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
18682e69de5Sopenharmony_ci///         0.001);
18782e69de5Sopenharmony_ci/// ```
18882e69de5Sopenharmony_cipub fn jaro_winkler(a: &str, b: &str) -> f64 {
18982e69de5Sopenharmony_ci    generic_jaro_winkler(&StringWrapper(a), &StringWrapper(b))
19082e69de5Sopenharmony_ci}
19182e69de5Sopenharmony_ci
19282e69de5Sopenharmony_ci/// Calculates the minimum number of insertions, deletions, and substitutions
19382e69de5Sopenharmony_ci/// required to change one sequence into the other.
19482e69de5Sopenharmony_ci///
19582e69de5Sopenharmony_ci/// ```
19682e69de5Sopenharmony_ci/// use strsim::generic_levenshtein;
19782e69de5Sopenharmony_ci///
19882e69de5Sopenharmony_ci/// assert_eq!(3, generic_levenshtein(&[1,2,3], &[1,2,3,4,5,6]));
19982e69de5Sopenharmony_ci/// ```
20082e69de5Sopenharmony_cipub fn generic_levenshtein<'a, 'b, Iter1, Iter2, Elem1, Elem2>(a: &'a Iter1, b: &'b Iter2) -> usize
20182e69de5Sopenharmony_ci    where &'a Iter1: IntoIterator<Item=Elem1>,
20282e69de5Sopenharmony_ci          &'b Iter2: IntoIterator<Item=Elem2>,
20382e69de5Sopenharmony_ci          Elem1: PartialEq<Elem2> {
20482e69de5Sopenharmony_ci    let b_len = b.into_iter().count();
20582e69de5Sopenharmony_ci
20682e69de5Sopenharmony_ci    if a.into_iter().next().is_none() { return b_len; }
20782e69de5Sopenharmony_ci
20882e69de5Sopenharmony_ci    let mut cache: Vec<usize> = (1..b_len+1).collect();
20982e69de5Sopenharmony_ci
21082e69de5Sopenharmony_ci    let mut result = 0;
21182e69de5Sopenharmony_ci
21282e69de5Sopenharmony_ci    for (i, a_elem) in a.into_iter().enumerate() {
21382e69de5Sopenharmony_ci        result = i + 1;
21482e69de5Sopenharmony_ci        let mut distance_b = i;
21582e69de5Sopenharmony_ci
21682e69de5Sopenharmony_ci        for (j, b_elem) in b.into_iter().enumerate() {
21782e69de5Sopenharmony_ci            let cost = if a_elem == b_elem { 0usize } else { 1usize };
21882e69de5Sopenharmony_ci            let distance_a = distance_b + cost;
21982e69de5Sopenharmony_ci            distance_b = cache[j];
22082e69de5Sopenharmony_ci            result = min(result + 1, min(distance_a, distance_b + 1));
22182e69de5Sopenharmony_ci            cache[j] = result;
22282e69de5Sopenharmony_ci        }
22382e69de5Sopenharmony_ci    }
22482e69de5Sopenharmony_ci
22582e69de5Sopenharmony_ci    result
22682e69de5Sopenharmony_ci}
22782e69de5Sopenharmony_ci
22882e69de5Sopenharmony_ci/// Calculates the minimum number of insertions, deletions, and substitutions
22982e69de5Sopenharmony_ci/// required to change one string into the other.
23082e69de5Sopenharmony_ci///
23182e69de5Sopenharmony_ci/// ```
23282e69de5Sopenharmony_ci/// use strsim::levenshtein;
23382e69de5Sopenharmony_ci///
23482e69de5Sopenharmony_ci/// assert_eq!(3, levenshtein("kitten", "sitting"));
23582e69de5Sopenharmony_ci/// ```
23682e69de5Sopenharmony_cipub fn levenshtein(a: &str, b: &str) -> usize {
23782e69de5Sopenharmony_ci    generic_levenshtein(&StringWrapper(a), &StringWrapper(b))
23882e69de5Sopenharmony_ci}
23982e69de5Sopenharmony_ci
24082e69de5Sopenharmony_ci/// Calculates a normalized score of the Levenshtein algorithm between 0.0 and
24182e69de5Sopenharmony_ci/// 1.0 (inclusive), where 1.0 means the strings are the same.
24282e69de5Sopenharmony_ci///
24382e69de5Sopenharmony_ci/// ```
24482e69de5Sopenharmony_ci/// use strsim::normalized_levenshtein;
24582e69de5Sopenharmony_ci///
24682e69de5Sopenharmony_ci/// assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
24782e69de5Sopenharmony_ci/// assert!((normalized_levenshtein("", "") - 1.0).abs() < 0.00001);
24882e69de5Sopenharmony_ci/// assert!(normalized_levenshtein("", "second").abs() < 0.00001);
24982e69de5Sopenharmony_ci/// assert!(normalized_levenshtein("first", "").abs() < 0.00001);
25082e69de5Sopenharmony_ci/// assert!((normalized_levenshtein("string", "string") - 1.0).abs() < 0.00001);
25182e69de5Sopenharmony_ci/// ```
25282e69de5Sopenharmony_cipub fn normalized_levenshtein(a: &str, b: &str) -> f64 {
25382e69de5Sopenharmony_ci    if a.is_empty() && b.is_empty() {
25482e69de5Sopenharmony_ci        return 1.0;
25582e69de5Sopenharmony_ci    }
25682e69de5Sopenharmony_ci    1.0 - (levenshtein(a, b) as f64) / (a.chars().count().max(b.chars().count()) as f64)
25782e69de5Sopenharmony_ci}
25882e69de5Sopenharmony_ci
25982e69de5Sopenharmony_ci/// Like Levenshtein but allows for adjacent transpositions. Each substring can
26082e69de5Sopenharmony_ci/// only be edited once.
26182e69de5Sopenharmony_ci///
26282e69de5Sopenharmony_ci/// ```
26382e69de5Sopenharmony_ci/// use strsim::osa_distance;
26482e69de5Sopenharmony_ci///
26582e69de5Sopenharmony_ci/// assert_eq!(3, osa_distance("ab", "bca"));
26682e69de5Sopenharmony_ci/// ```
26782e69de5Sopenharmony_cipub fn osa_distance(a: &str, b: &str) -> usize {
26882e69de5Sopenharmony_ci    let a_len = a.chars().count();
26982e69de5Sopenharmony_ci    let b_len = b.chars().count();
27082e69de5Sopenharmony_ci    if a == b { return 0; }
27182e69de5Sopenharmony_ci    else if a_len == 0 { return b_len; }
27282e69de5Sopenharmony_ci    else if b_len == 0 { return a_len; }
27382e69de5Sopenharmony_ci
27482e69de5Sopenharmony_ci    let mut prev_two_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
27582e69de5Sopenharmony_ci    let mut prev_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
27682e69de5Sopenharmony_ci    let mut curr_distances: Vec<usize> = Vec::with_capacity(b_len + 1);
27782e69de5Sopenharmony_ci
27882e69de5Sopenharmony_ci    let mut prev_a_char = char::MAX;
27982e69de5Sopenharmony_ci    let mut prev_b_char = char::MAX;
28082e69de5Sopenharmony_ci
28182e69de5Sopenharmony_ci    for i in 0..(b_len + 1) {
28282e69de5Sopenharmony_ci        prev_two_distances.push(i);
28382e69de5Sopenharmony_ci        prev_distances.push(i);
28482e69de5Sopenharmony_ci        curr_distances.push(0);
28582e69de5Sopenharmony_ci    }
28682e69de5Sopenharmony_ci
28782e69de5Sopenharmony_ci    for (i, a_char) in a.chars().enumerate() {
28882e69de5Sopenharmony_ci        curr_distances[0] = i + 1;
28982e69de5Sopenharmony_ci
29082e69de5Sopenharmony_ci        for (j, b_char) in b.chars().enumerate() {
29182e69de5Sopenharmony_ci            let cost = if a_char == b_char { 0 } else { 1 };
29282e69de5Sopenharmony_ci            curr_distances[j + 1] = min(curr_distances[j] + 1,
29382e69de5Sopenharmony_ci                                        min(prev_distances[j + 1] + 1,
29482e69de5Sopenharmony_ci                                            prev_distances[j] + cost));
29582e69de5Sopenharmony_ci            if i > 0 && j > 0 && a_char != b_char &&
29682e69de5Sopenharmony_ci                a_char == prev_b_char && b_char == prev_a_char {
29782e69de5Sopenharmony_ci                curr_distances[j + 1] = min(curr_distances[j + 1],
29882e69de5Sopenharmony_ci                                            prev_two_distances[j - 1] + 1);
29982e69de5Sopenharmony_ci            }
30082e69de5Sopenharmony_ci
30182e69de5Sopenharmony_ci            prev_b_char = b_char;
30282e69de5Sopenharmony_ci        }
30382e69de5Sopenharmony_ci
30482e69de5Sopenharmony_ci        prev_two_distances.clone_from(&prev_distances);
30582e69de5Sopenharmony_ci        prev_distances.clone_from(&curr_distances);
30682e69de5Sopenharmony_ci        prev_a_char = a_char;
30782e69de5Sopenharmony_ci    }
30882e69de5Sopenharmony_ci
30982e69de5Sopenharmony_ci    curr_distances[b_len]
31082e69de5Sopenharmony_ci
31182e69de5Sopenharmony_ci}
31282e69de5Sopenharmony_ci
31382e69de5Sopenharmony_ci/* Returns the final index for a value in a single vector that represents a fixed
31482e69de5Sopenharmony_ci   2d grid */
31582e69de5Sopenharmony_cifn flat_index(i: usize, j: usize, width: usize) -> usize {
31682e69de5Sopenharmony_ci    j * width + i
31782e69de5Sopenharmony_ci}
31882e69de5Sopenharmony_ci
31982e69de5Sopenharmony_ci/// Like optimal string alignment, but substrings can be edited an unlimited
32082e69de5Sopenharmony_ci/// number of times, and the triangle inequality holds.
32182e69de5Sopenharmony_ci///
32282e69de5Sopenharmony_ci/// ```
32382e69de5Sopenharmony_ci/// use strsim::generic_damerau_levenshtein;
32482e69de5Sopenharmony_ci///
32582e69de5Sopenharmony_ci/// assert_eq!(2, generic_damerau_levenshtein(&[1,2], &[2,3,1]));
32682e69de5Sopenharmony_ci/// ```
32782e69de5Sopenharmony_cipub fn generic_damerau_levenshtein<Elem>(a_elems: &[Elem], b_elems: &[Elem]) -> usize
32882e69de5Sopenharmony_ci    where Elem: Eq + Hash + Clone {
32982e69de5Sopenharmony_ci    let a_len = a_elems.len();
33082e69de5Sopenharmony_ci    let b_len = b_elems.len();
33182e69de5Sopenharmony_ci
33282e69de5Sopenharmony_ci    if a_len == 0 { return b_len; }
33382e69de5Sopenharmony_ci    if b_len == 0 { return a_len; }
33482e69de5Sopenharmony_ci
33582e69de5Sopenharmony_ci    let width = a_len + 2;
33682e69de5Sopenharmony_ci    let mut distances = vec![0; (a_len + 2) * (b_len + 2)];
33782e69de5Sopenharmony_ci    let max_distance = a_len + b_len;
33882e69de5Sopenharmony_ci    distances[0] = max_distance;
33982e69de5Sopenharmony_ci
34082e69de5Sopenharmony_ci    for i in 0..(a_len + 1) {
34182e69de5Sopenharmony_ci        distances[flat_index(i + 1, 0, width)] = max_distance;
34282e69de5Sopenharmony_ci        distances[flat_index(i + 1, 1, width)] = i;
34382e69de5Sopenharmony_ci    }
34482e69de5Sopenharmony_ci
34582e69de5Sopenharmony_ci    for j in 0..(b_len + 1) {
34682e69de5Sopenharmony_ci        distances[flat_index(0, j + 1, width)] = max_distance;
34782e69de5Sopenharmony_ci        distances[flat_index(1, j + 1, width)] = j;
34882e69de5Sopenharmony_ci    }
34982e69de5Sopenharmony_ci
35082e69de5Sopenharmony_ci    let mut elems: HashMap<Elem, usize> = HashMap::with_capacity(64);
35182e69de5Sopenharmony_ci
35282e69de5Sopenharmony_ci    for i in 1..(a_len + 1) {
35382e69de5Sopenharmony_ci        let mut db = 0;
35482e69de5Sopenharmony_ci
35582e69de5Sopenharmony_ci        for j in 1..(b_len + 1) {
35682e69de5Sopenharmony_ci            let k = match elems.get(&b_elems[j - 1]) {
35782e69de5Sopenharmony_ci                Some(&value) => value,
35882e69de5Sopenharmony_ci                None => 0
35982e69de5Sopenharmony_ci            };
36082e69de5Sopenharmony_ci
36182e69de5Sopenharmony_ci            let insertion_cost = distances[flat_index(i, j + 1, width)] + 1;
36282e69de5Sopenharmony_ci            let deletion_cost = distances[flat_index(i + 1, j, width)] + 1;
36382e69de5Sopenharmony_ci            let transposition_cost = distances[flat_index(k, db, width)] +
36482e69de5Sopenharmony_ci                (i - k - 1) + 1 + (j - db - 1);
36582e69de5Sopenharmony_ci
36682e69de5Sopenharmony_ci            let mut substitution_cost = distances[flat_index(i, j, width)] + 1;
36782e69de5Sopenharmony_ci            if a_elems[i - 1] == b_elems[j - 1] {
36882e69de5Sopenharmony_ci                db = j;
36982e69de5Sopenharmony_ci                substitution_cost -= 1;
37082e69de5Sopenharmony_ci            }
37182e69de5Sopenharmony_ci
37282e69de5Sopenharmony_ci            distances[flat_index(i + 1, j + 1, width)] = min(substitution_cost,
37382e69de5Sopenharmony_ci                min(insertion_cost, min(deletion_cost, transposition_cost)));
37482e69de5Sopenharmony_ci        }
37582e69de5Sopenharmony_ci
37682e69de5Sopenharmony_ci        elems.insert(a_elems[i - 1].clone(), i);
37782e69de5Sopenharmony_ci    }
37882e69de5Sopenharmony_ci
37982e69de5Sopenharmony_ci    distances[flat_index(a_len + 1, b_len + 1, width)]
38082e69de5Sopenharmony_ci}
38182e69de5Sopenharmony_ci
38282e69de5Sopenharmony_ci/// Like optimal string alignment, but substrings can be edited an unlimited
38382e69de5Sopenharmony_ci/// number of times, and the triangle inequality holds.
38482e69de5Sopenharmony_ci///
38582e69de5Sopenharmony_ci/// ```
38682e69de5Sopenharmony_ci/// use strsim::damerau_levenshtein;
38782e69de5Sopenharmony_ci///
38882e69de5Sopenharmony_ci/// assert_eq!(2, damerau_levenshtein("ab", "bca"));
38982e69de5Sopenharmony_ci/// ```
39082e69de5Sopenharmony_cipub fn damerau_levenshtein(a: &str, b: &str) -> usize {
39182e69de5Sopenharmony_ci    let (x, y): (Vec<_>, Vec<_>) = (a.chars().collect(), b.chars().collect());
39282e69de5Sopenharmony_ci    generic_damerau_levenshtein(x.as_slice(), y.as_slice())
39382e69de5Sopenharmony_ci}
39482e69de5Sopenharmony_ci
39582e69de5Sopenharmony_ci/// Calculates a normalized score of the Damerau–Levenshtein algorithm between
39682e69de5Sopenharmony_ci/// 0.0 and 1.0 (inclusive), where 1.0 means the strings are the same.
39782e69de5Sopenharmony_ci///
39882e69de5Sopenharmony_ci/// ```
39982e69de5Sopenharmony_ci/// use strsim::normalized_damerau_levenshtein;
40082e69de5Sopenharmony_ci///
40182e69de5Sopenharmony_ci/// assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
40282e69de5Sopenharmony_ci/// assert!((normalized_damerau_levenshtein("", "") - 1.0).abs() < 0.00001);
40382e69de5Sopenharmony_ci/// assert!(normalized_damerau_levenshtein("", "flower").abs() < 0.00001);
40482e69de5Sopenharmony_ci/// assert!(normalized_damerau_levenshtein("tree", "").abs() < 0.00001);
40582e69de5Sopenharmony_ci/// assert!((normalized_damerau_levenshtein("sunglasses", "sunglasses") - 1.0).abs() < 0.00001);
40682e69de5Sopenharmony_ci/// ```
40782e69de5Sopenharmony_cipub fn normalized_damerau_levenshtein(a: &str, b: &str) -> f64 {
40882e69de5Sopenharmony_ci    if a.is_empty() && b.is_empty() {
40982e69de5Sopenharmony_ci        return 1.0;
41082e69de5Sopenharmony_ci    }
41182e69de5Sopenharmony_ci    1.0 - (damerau_levenshtein(a, b) as f64) / (a.chars().count().max(b.chars().count()) as f64)
41282e69de5Sopenharmony_ci}
41382e69de5Sopenharmony_ci
41482e69de5Sopenharmony_ci/// Returns an Iterator of char tuples.
41582e69de5Sopenharmony_cifn bigrams(s: &str) -> impl Iterator<Item=(char, char)> + '_ {
41682e69de5Sopenharmony_ci    s.chars().zip(s.chars().skip(1))
41782e69de5Sopenharmony_ci}
41882e69de5Sopenharmony_ci
41982e69de5Sopenharmony_ci
42082e69de5Sopenharmony_ci/// Calculates a Sørensen-Dice similarity distance using bigrams.
42182e69de5Sopenharmony_ci/// See http://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient.
42282e69de5Sopenharmony_ci///
42382e69de5Sopenharmony_ci/// ```
42482e69de5Sopenharmony_ci/// use strsim::sorensen_dice;
42582e69de5Sopenharmony_ci///
42682e69de5Sopenharmony_ci/// assert_eq!(1.0, sorensen_dice("", ""));
42782e69de5Sopenharmony_ci/// assert_eq!(0.0, sorensen_dice("", "a"));
42882e69de5Sopenharmony_ci/// assert_eq!(0.0, sorensen_dice("french", "quebec"));
42982e69de5Sopenharmony_ci/// assert_eq!(1.0, sorensen_dice("ferris", "ferris"));
43082e69de5Sopenharmony_ci/// assert_eq!(1.0, sorensen_dice("ferris", "ferris"));
43182e69de5Sopenharmony_ci/// assert_eq!(0.8888888888888888, sorensen_dice("feris", "ferris"));
43282e69de5Sopenharmony_ci/// ```
43382e69de5Sopenharmony_cipub fn sorensen_dice(a: &str, b: &str) -> f64 {
43482e69de5Sopenharmony_ci    // implementation guided by
43582e69de5Sopenharmony_ci    // https://github.com/aceakash/string-similarity/blob/f83ba3cd7bae874c20c429774e911ae8cff8bced/src/index.js#L6
43682e69de5Sopenharmony_ci
43782e69de5Sopenharmony_ci    let a: String = a.chars().filter(|&x| !char::is_whitespace(x)).collect();
43882e69de5Sopenharmony_ci    let b: String = b.chars().filter(|&x| !char::is_whitespace(x)).collect();
43982e69de5Sopenharmony_ci
44082e69de5Sopenharmony_ci    if a.len() == 0 && b.len() == 0 {
44182e69de5Sopenharmony_ci        return 1.0;
44282e69de5Sopenharmony_ci    }
44382e69de5Sopenharmony_ci
44482e69de5Sopenharmony_ci    if a.len() == 0 || b.len() == 0 {
44582e69de5Sopenharmony_ci        return 0.0;
44682e69de5Sopenharmony_ci    }
44782e69de5Sopenharmony_ci
44882e69de5Sopenharmony_ci    if a == b {
44982e69de5Sopenharmony_ci        return 1.0;
45082e69de5Sopenharmony_ci    }
45182e69de5Sopenharmony_ci
45282e69de5Sopenharmony_ci    if a.len() == 1 && b.len() == 1 {
45382e69de5Sopenharmony_ci        return 0.0;
45482e69de5Sopenharmony_ci    }
45582e69de5Sopenharmony_ci
45682e69de5Sopenharmony_ci    if a.len() < 2 || b.len() < 2 {
45782e69de5Sopenharmony_ci        return 0.0;
45882e69de5Sopenharmony_ci    }
45982e69de5Sopenharmony_ci
46082e69de5Sopenharmony_ci    let mut a_bigrams: HashMap<(char, char), usize> = HashMap::new();
46182e69de5Sopenharmony_ci
46282e69de5Sopenharmony_ci    for bigram in bigrams(&a) {
46382e69de5Sopenharmony_ci        *a_bigrams.entry(bigram).or_insert(0) += 1;
46482e69de5Sopenharmony_ci    }
46582e69de5Sopenharmony_ci
46682e69de5Sopenharmony_ci    let mut intersection_size = 0;
46782e69de5Sopenharmony_ci
46882e69de5Sopenharmony_ci    for bigram in bigrams(&b) {
46982e69de5Sopenharmony_ci        a_bigrams.entry(bigram).and_modify(|bi| {
47082e69de5Sopenharmony_ci            if *bi > 0 {
47182e69de5Sopenharmony_ci                *bi -= 1;
47282e69de5Sopenharmony_ci                intersection_size += 1;
47382e69de5Sopenharmony_ci            }
47482e69de5Sopenharmony_ci        });
47582e69de5Sopenharmony_ci    }
47682e69de5Sopenharmony_ci
47782e69de5Sopenharmony_ci    (2 * intersection_size) as f64 / (a.len() + b.len() - 2) as f64
47882e69de5Sopenharmony_ci}
47982e69de5Sopenharmony_ci
48082e69de5Sopenharmony_ci
48182e69de5Sopenharmony_ci#[cfg(test)]
48282e69de5Sopenharmony_cimod tests {
48382e69de5Sopenharmony_ci    use super::*;
48482e69de5Sopenharmony_ci
48582e69de5Sopenharmony_ci    #[test]
48682e69de5Sopenharmony_ci    fn bigrams_iterator() {
48782e69de5Sopenharmony_ci        let mut bi = bigrams("abcde");
48882e69de5Sopenharmony_ci
48982e69de5Sopenharmony_ci        assert_eq!(Some(('a', 'b')), bi.next());
49082e69de5Sopenharmony_ci        assert_eq!(Some(('b', 'c')), bi.next());
49182e69de5Sopenharmony_ci        assert_eq!(Some(('c', 'd')), bi.next());
49282e69de5Sopenharmony_ci        assert_eq!(Some(('d', 'e')), bi.next());
49382e69de5Sopenharmony_ci        assert_eq!(None, bi.next());
49482e69de5Sopenharmony_ci    }
49582e69de5Sopenharmony_ci
49682e69de5Sopenharmony_ci    fn assert_hamming_dist(dist: usize, str1: &str, str2: &str) {
49782e69de5Sopenharmony_ci        assert_eq!(Ok(dist), hamming(str1, str2));
49882e69de5Sopenharmony_ci    }
49982e69de5Sopenharmony_ci
50082e69de5Sopenharmony_ci    #[test]
50182e69de5Sopenharmony_ci    fn hamming_empty() {
50282e69de5Sopenharmony_ci        assert_hamming_dist(0, "", "")
50382e69de5Sopenharmony_ci    }
50482e69de5Sopenharmony_ci
50582e69de5Sopenharmony_ci    #[test]
50682e69de5Sopenharmony_ci    fn hamming_same() {
50782e69de5Sopenharmony_ci        assert_hamming_dist(0, "hamming", "hamming")
50882e69de5Sopenharmony_ci    }
50982e69de5Sopenharmony_ci
51082e69de5Sopenharmony_ci    #[test]
51182e69de5Sopenharmony_ci    fn hamming_numbers() {
51282e69de5Sopenharmony_ci        assert_eq!(Ok(1), generic_hamming(&[1, 2, 4], &[1, 2, 3]));
51382e69de5Sopenharmony_ci    }
51482e69de5Sopenharmony_ci
51582e69de5Sopenharmony_ci    #[test]
51682e69de5Sopenharmony_ci    fn hamming_diff() {
51782e69de5Sopenharmony_ci        assert_hamming_dist(3, "hamming", "hammers")
51882e69de5Sopenharmony_ci    }
51982e69de5Sopenharmony_ci
52082e69de5Sopenharmony_ci    #[test]
52182e69de5Sopenharmony_ci    fn hamming_diff_multibyte() {
52282e69de5Sopenharmony_ci        assert_hamming_dist(2, "hamming", "h香mmüng");
52382e69de5Sopenharmony_ci    }
52482e69de5Sopenharmony_ci
52582e69de5Sopenharmony_ci    #[test]
52682e69de5Sopenharmony_ci    fn hamming_unequal_length() {
52782e69de5Sopenharmony_ci        assert_eq!(
52882e69de5Sopenharmony_ci            Err(StrSimError::DifferentLengthArgs),
52982e69de5Sopenharmony_ci            generic_hamming("ham".chars(), "hamming".chars())
53082e69de5Sopenharmony_ci        );
53182e69de5Sopenharmony_ci    }
53282e69de5Sopenharmony_ci
53382e69de5Sopenharmony_ci    #[test]
53482e69de5Sopenharmony_ci    fn hamming_names() {
53582e69de5Sopenharmony_ci        assert_hamming_dist(14, "Friedrich Nietzs", "Jean-Paul Sartre")
53682e69de5Sopenharmony_ci    }
53782e69de5Sopenharmony_ci
53882e69de5Sopenharmony_ci    #[test]
53982e69de5Sopenharmony_ci    fn jaro_both_empty() {
54082e69de5Sopenharmony_ci        assert_eq!(1.0, jaro("", ""));
54182e69de5Sopenharmony_ci    }
54282e69de5Sopenharmony_ci
54382e69de5Sopenharmony_ci    #[test]
54482e69de5Sopenharmony_ci    fn jaro_first_empty() {
54582e69de5Sopenharmony_ci        assert_eq!(0.0, jaro("", "jaro"));
54682e69de5Sopenharmony_ci    }
54782e69de5Sopenharmony_ci
54882e69de5Sopenharmony_ci    #[test]
54982e69de5Sopenharmony_ci    fn jaro_second_empty() {
55082e69de5Sopenharmony_ci        assert_eq!(0.0, jaro("distance", ""));
55182e69de5Sopenharmony_ci    }
55282e69de5Sopenharmony_ci
55382e69de5Sopenharmony_ci    #[test]
55482e69de5Sopenharmony_ci    fn jaro_same() {
55582e69de5Sopenharmony_ci        assert_eq!(1.0, jaro("jaro", "jaro"));
55682e69de5Sopenharmony_ci    }
55782e69de5Sopenharmony_ci
55882e69de5Sopenharmony_ci    #[test]
55982e69de5Sopenharmony_ci    fn jaro_multibyte() {
56082e69de5Sopenharmony_ci        assert!((0.818 - jaro("testabctest", "testöঙ香test")) < 0.001);
56182e69de5Sopenharmony_ci        assert!((0.818 - jaro("testöঙ香test", "testabctest")) < 0.001);
56282e69de5Sopenharmony_ci    }
56382e69de5Sopenharmony_ci
56482e69de5Sopenharmony_ci    #[test]
56582e69de5Sopenharmony_ci    fn jaro_diff_short() {
56682e69de5Sopenharmony_ci        assert!((0.767 - jaro("dixon", "dicksonx")).abs() < 0.001);
56782e69de5Sopenharmony_ci    }
56882e69de5Sopenharmony_ci
56982e69de5Sopenharmony_ci    #[test]
57082e69de5Sopenharmony_ci    fn jaro_diff_one_character() {
57182e69de5Sopenharmony_ci        assert_eq!(0.0, jaro("a", "b"));
57282e69de5Sopenharmony_ci    }
57382e69de5Sopenharmony_ci
57482e69de5Sopenharmony_ci    #[test]
57582e69de5Sopenharmony_ci    fn jaro_same_one_character() {
57682e69de5Sopenharmony_ci        assert_eq!(1.0, jaro("a", "a"));
57782e69de5Sopenharmony_ci    }
57882e69de5Sopenharmony_ci
57982e69de5Sopenharmony_ci    #[test]
58082e69de5Sopenharmony_ci    fn generic_jaro_diff() {
58182e69de5Sopenharmony_ci        assert_eq!(0.0, generic_jaro(&[1, 2], &[3, 4]));
58282e69de5Sopenharmony_ci    }
58382e69de5Sopenharmony_ci
58482e69de5Sopenharmony_ci    #[test]
58582e69de5Sopenharmony_ci    fn jaro_diff_one_and_two() {
58682e69de5Sopenharmony_ci        assert!((0.83 - jaro("a", "ab")).abs() < 0.01);
58782e69de5Sopenharmony_ci    }
58882e69de5Sopenharmony_ci
58982e69de5Sopenharmony_ci    #[test]
59082e69de5Sopenharmony_ci    fn jaro_diff_two_and_one() {
59182e69de5Sopenharmony_ci        assert!((0.83 - jaro("ab", "a")).abs() < 0.01);
59282e69de5Sopenharmony_ci    }
59382e69de5Sopenharmony_ci
59482e69de5Sopenharmony_ci    #[test]
59582e69de5Sopenharmony_ci    fn jaro_diff_no_transposition() {
59682e69de5Sopenharmony_ci        assert!((0.822 - jaro("dwayne", "duane")).abs() < 0.001);
59782e69de5Sopenharmony_ci    }
59882e69de5Sopenharmony_ci
59982e69de5Sopenharmony_ci    #[test]
60082e69de5Sopenharmony_ci    fn jaro_diff_with_transposition() {
60182e69de5Sopenharmony_ci        assert!((0.944 - jaro("martha", "marhta")).abs() < 0.001);
60282e69de5Sopenharmony_ci    }
60382e69de5Sopenharmony_ci
60482e69de5Sopenharmony_ci    #[test]
60582e69de5Sopenharmony_ci    fn jaro_names() {
60682e69de5Sopenharmony_ci        assert!((0.392 - jaro("Friedrich Nietzsche",
60782e69de5Sopenharmony_ci                              "Jean-Paul Sartre")).abs() < 0.001);
60882e69de5Sopenharmony_ci    }
60982e69de5Sopenharmony_ci
61082e69de5Sopenharmony_ci    #[test]
61182e69de5Sopenharmony_ci    fn jaro_winkler_both_empty() {
61282e69de5Sopenharmony_ci        assert_eq!(1.0, jaro_winkler("", ""));
61382e69de5Sopenharmony_ci    }
61482e69de5Sopenharmony_ci
61582e69de5Sopenharmony_ci    #[test]
61682e69de5Sopenharmony_ci    fn jaro_winkler_first_empty() {
61782e69de5Sopenharmony_ci        assert_eq!(0.0, jaro_winkler("", "jaro-winkler"));
61882e69de5Sopenharmony_ci    }
61982e69de5Sopenharmony_ci
62082e69de5Sopenharmony_ci    #[test]
62182e69de5Sopenharmony_ci    fn jaro_winkler_second_empty() {
62282e69de5Sopenharmony_ci        assert_eq!(0.0, jaro_winkler("distance", ""));
62382e69de5Sopenharmony_ci    }
62482e69de5Sopenharmony_ci
62582e69de5Sopenharmony_ci    #[test]
62682e69de5Sopenharmony_ci    fn jaro_winkler_same() {
62782e69de5Sopenharmony_ci        assert_eq!(1.0, jaro_winkler("Jaro-Winkler", "Jaro-Winkler"));
62882e69de5Sopenharmony_ci    }
62982e69de5Sopenharmony_ci
63082e69de5Sopenharmony_ci    #[test]
63182e69de5Sopenharmony_ci    fn jaro_winkler_multibyte() {
63282e69de5Sopenharmony_ci        assert!((0.89 - jaro_winkler("testabctest", "testöঙ香test")).abs() <
63382e69de5Sopenharmony_ci            0.001);
63482e69de5Sopenharmony_ci        assert!((0.89 - jaro_winkler("testöঙ香test", "testabctest")).abs() <
63582e69de5Sopenharmony_ci            0.001);
63682e69de5Sopenharmony_ci    }
63782e69de5Sopenharmony_ci
63882e69de5Sopenharmony_ci    #[test]
63982e69de5Sopenharmony_ci    fn jaro_winkler_diff_short() {
64082e69de5Sopenharmony_ci        assert!((0.813 - jaro_winkler("dixon", "dicksonx")).abs() < 0.001);
64182e69de5Sopenharmony_ci        assert!((0.813 - jaro_winkler("dicksonx", "dixon")).abs() < 0.001);
64282e69de5Sopenharmony_ci    }
64382e69de5Sopenharmony_ci
64482e69de5Sopenharmony_ci    #[test]
64582e69de5Sopenharmony_ci    fn jaro_winkler_diff_one_character() {
64682e69de5Sopenharmony_ci        assert_eq!(0.0, jaro_winkler("a", "b"));
64782e69de5Sopenharmony_ci    }
64882e69de5Sopenharmony_ci
64982e69de5Sopenharmony_ci    #[test]
65082e69de5Sopenharmony_ci    fn jaro_winkler_same_one_character() {
65182e69de5Sopenharmony_ci        assert_eq!(1.0, jaro_winkler("a", "a"));
65282e69de5Sopenharmony_ci    }
65382e69de5Sopenharmony_ci
65482e69de5Sopenharmony_ci    #[test]
65582e69de5Sopenharmony_ci    fn jaro_winkler_diff_no_transposition() {
65682e69de5Sopenharmony_ci        assert!((0.840 - jaro_winkler("dwayne", "duane")).abs() < 0.001);
65782e69de5Sopenharmony_ci    }
65882e69de5Sopenharmony_ci
65982e69de5Sopenharmony_ci    #[test]
66082e69de5Sopenharmony_ci    fn jaro_winkler_diff_with_transposition() {
66182e69de5Sopenharmony_ci        assert!((0.961 - jaro_winkler("martha", "marhta")).abs() < 0.001);
66282e69de5Sopenharmony_ci    }
66382e69de5Sopenharmony_ci
66482e69de5Sopenharmony_ci    #[test]
66582e69de5Sopenharmony_ci    fn jaro_winkler_names() {
66682e69de5Sopenharmony_ci        assert!((0.562 - jaro_winkler("Friedrich Nietzsche",
66782e69de5Sopenharmony_ci                                      "Fran-Paul Sartre")).abs() < 0.001);
66882e69de5Sopenharmony_ci    }
66982e69de5Sopenharmony_ci
67082e69de5Sopenharmony_ci    #[test]
67182e69de5Sopenharmony_ci    fn jaro_winkler_long_prefix() {
67282e69de5Sopenharmony_ci        assert!((0.911 - jaro_winkler("cheeseburger", "cheese fries")).abs() <
67382e69de5Sopenharmony_ci            0.001);
67482e69de5Sopenharmony_ci    }
67582e69de5Sopenharmony_ci
67682e69de5Sopenharmony_ci    #[test]
67782e69de5Sopenharmony_ci    fn jaro_winkler_more_names() {
67882e69de5Sopenharmony_ci        assert!((0.868 - jaro_winkler("Thorkel", "Thorgier")).abs() < 0.001);
67982e69de5Sopenharmony_ci    }
68082e69de5Sopenharmony_ci
68182e69de5Sopenharmony_ci    #[test]
68282e69de5Sopenharmony_ci    fn jaro_winkler_length_of_one() {
68382e69de5Sopenharmony_ci        assert!((0.738 - jaro_winkler("Dinsdale", "D")).abs() < 0.001);
68482e69de5Sopenharmony_ci    }
68582e69de5Sopenharmony_ci
68682e69de5Sopenharmony_ci    #[test]
68782e69de5Sopenharmony_ci    fn jaro_winkler_very_long_prefix() {
68882e69de5Sopenharmony_ci        assert!((1.0 - jaro_winkler("thequickbrownfoxjumpedoverx",
68982e69de5Sopenharmony_ci                                    "thequickbrownfoxjumpedovery")).abs() <
69082e69de5Sopenharmony_ci            0.001);
69182e69de5Sopenharmony_ci    }
69282e69de5Sopenharmony_ci
69382e69de5Sopenharmony_ci    #[test]
69482e69de5Sopenharmony_ci    fn levenshtein_empty() {
69582e69de5Sopenharmony_ci        assert_eq!(0, levenshtein("", ""));
69682e69de5Sopenharmony_ci    }
69782e69de5Sopenharmony_ci
69882e69de5Sopenharmony_ci    #[test]
69982e69de5Sopenharmony_ci    fn levenshtein_same() {
70082e69de5Sopenharmony_ci        assert_eq!(0, levenshtein("levenshtein", "levenshtein"));
70182e69de5Sopenharmony_ci    }
70282e69de5Sopenharmony_ci
70382e69de5Sopenharmony_ci    #[test]
70482e69de5Sopenharmony_ci    fn levenshtein_diff_short() {
70582e69de5Sopenharmony_ci        assert_eq!(3, levenshtein("kitten", "sitting"));
70682e69de5Sopenharmony_ci    }
70782e69de5Sopenharmony_ci
70882e69de5Sopenharmony_ci    #[test]
70982e69de5Sopenharmony_ci    fn levenshtein_diff_with_space() {
71082e69de5Sopenharmony_ci        assert_eq!(5, levenshtein("hello, world", "bye, world"));
71182e69de5Sopenharmony_ci    }
71282e69de5Sopenharmony_ci
71382e69de5Sopenharmony_ci    #[test]
71482e69de5Sopenharmony_ci    fn levenshtein_diff_multibyte() {
71582e69de5Sopenharmony_ci        assert_eq!(3, levenshtein("öঙ香", "abc"));
71682e69de5Sopenharmony_ci        assert_eq!(3, levenshtein("abc", "öঙ香"));
71782e69de5Sopenharmony_ci    }
71882e69de5Sopenharmony_ci
71982e69de5Sopenharmony_ci    #[test]
72082e69de5Sopenharmony_ci    fn levenshtein_diff_longer() {
72182e69de5Sopenharmony_ci        let a = "The quick brown fox jumped over the angry dog.";
72282e69de5Sopenharmony_ci        let b = "Lorem ipsum dolor sit amet, dicta latine an eam.";
72382e69de5Sopenharmony_ci        assert_eq!(37, levenshtein(a, b));
72482e69de5Sopenharmony_ci    }
72582e69de5Sopenharmony_ci
72682e69de5Sopenharmony_ci    #[test]
72782e69de5Sopenharmony_ci    fn levenshtein_first_empty() {
72882e69de5Sopenharmony_ci        assert_eq!(7, levenshtein("", "sitting"));
72982e69de5Sopenharmony_ci    }
73082e69de5Sopenharmony_ci
73182e69de5Sopenharmony_ci    #[test]
73282e69de5Sopenharmony_ci    fn levenshtein_second_empty() {
73382e69de5Sopenharmony_ci        assert_eq!(6, levenshtein("kitten", ""));
73482e69de5Sopenharmony_ci    }
73582e69de5Sopenharmony_ci
73682e69de5Sopenharmony_ci    #[test]
73782e69de5Sopenharmony_ci    fn normalized_levenshtein_diff_short() {
73882e69de5Sopenharmony_ci        assert!((normalized_levenshtein("kitten", "sitting") - 0.57142).abs() < 0.00001);
73982e69de5Sopenharmony_ci    }
74082e69de5Sopenharmony_ci
74182e69de5Sopenharmony_ci    #[test]
74282e69de5Sopenharmony_ci    fn normalized_levenshtein_for_empty_strings() {
74382e69de5Sopenharmony_ci        assert!((normalized_levenshtein("", "") - 1.0).abs() < 0.00001);
74482e69de5Sopenharmony_ci    }
74582e69de5Sopenharmony_ci
74682e69de5Sopenharmony_ci    #[test]
74782e69de5Sopenharmony_ci    fn normalized_levenshtein_first_empty() {
74882e69de5Sopenharmony_ci        assert!(normalized_levenshtein("", "second").abs() < 0.00001);
74982e69de5Sopenharmony_ci    }
75082e69de5Sopenharmony_ci
75182e69de5Sopenharmony_ci    #[test]
75282e69de5Sopenharmony_ci    fn normalized_levenshtein_second_empty() {
75382e69de5Sopenharmony_ci        assert!(normalized_levenshtein("first", "").abs() < 0.00001);
75482e69de5Sopenharmony_ci    }
75582e69de5Sopenharmony_ci
75682e69de5Sopenharmony_ci    #[test]
75782e69de5Sopenharmony_ci    fn normalized_levenshtein_identical_strings() {
75882e69de5Sopenharmony_ci        assert!((normalized_levenshtein("identical", "identical") - 1.0).abs() < 0.00001);
75982e69de5Sopenharmony_ci    }
76082e69de5Sopenharmony_ci
76182e69de5Sopenharmony_ci    #[test]
76282e69de5Sopenharmony_ci    fn osa_distance_empty() {
76382e69de5Sopenharmony_ci        assert_eq!(0, osa_distance("", ""));
76482e69de5Sopenharmony_ci    }
76582e69de5Sopenharmony_ci
76682e69de5Sopenharmony_ci    #[test]
76782e69de5Sopenharmony_ci    fn osa_distance_same() {
76882e69de5Sopenharmony_ci        assert_eq!(0, osa_distance("damerau", "damerau"));
76982e69de5Sopenharmony_ci    }
77082e69de5Sopenharmony_ci
77182e69de5Sopenharmony_ci    #[test]
77282e69de5Sopenharmony_ci    fn osa_distance_first_empty() {
77382e69de5Sopenharmony_ci        assert_eq!(7, osa_distance("", "damerau"));
77482e69de5Sopenharmony_ci    }
77582e69de5Sopenharmony_ci
77682e69de5Sopenharmony_ci    #[test]
77782e69de5Sopenharmony_ci    fn osa_distance_second_empty() {
77882e69de5Sopenharmony_ci        assert_eq!(7, osa_distance("damerau", ""));
77982e69de5Sopenharmony_ci    }
78082e69de5Sopenharmony_ci
78182e69de5Sopenharmony_ci    #[test]
78282e69de5Sopenharmony_ci    fn osa_distance_diff() {
78382e69de5Sopenharmony_ci        assert_eq!(3, osa_distance("ca", "abc"));
78482e69de5Sopenharmony_ci    }
78582e69de5Sopenharmony_ci
78682e69de5Sopenharmony_ci    #[test]
78782e69de5Sopenharmony_ci    fn osa_distance_diff_short() {
78882e69de5Sopenharmony_ci        assert_eq!(3, osa_distance("damerau", "aderua"));
78982e69de5Sopenharmony_ci    }
79082e69de5Sopenharmony_ci
79182e69de5Sopenharmony_ci    #[test]
79282e69de5Sopenharmony_ci    fn osa_distance_diff_reversed() {
79382e69de5Sopenharmony_ci        assert_eq!(3, osa_distance("aderua", "damerau"));
79482e69de5Sopenharmony_ci    }
79582e69de5Sopenharmony_ci
79682e69de5Sopenharmony_ci    #[test]
79782e69de5Sopenharmony_ci    fn osa_distance_diff_multibyte() {
79882e69de5Sopenharmony_ci        assert_eq!(3, osa_distance("öঙ香", "abc"));
79982e69de5Sopenharmony_ci        assert_eq!(3, osa_distance("abc", "öঙ香"));
80082e69de5Sopenharmony_ci    }
80182e69de5Sopenharmony_ci
80282e69de5Sopenharmony_ci    #[test]
80382e69de5Sopenharmony_ci    fn osa_distance_diff_unequal_length() {
80482e69de5Sopenharmony_ci        assert_eq!(6, osa_distance("damerau", "aderuaxyz"));
80582e69de5Sopenharmony_ci    }
80682e69de5Sopenharmony_ci
80782e69de5Sopenharmony_ci    #[test]
80882e69de5Sopenharmony_ci    fn osa_distance_diff_unequal_length_reversed() {
80982e69de5Sopenharmony_ci        assert_eq!(6, osa_distance("aderuaxyz", "damerau"));
81082e69de5Sopenharmony_ci    }
81182e69de5Sopenharmony_ci
81282e69de5Sopenharmony_ci    #[test]
81382e69de5Sopenharmony_ci    fn osa_distance_diff_comedians() {
81482e69de5Sopenharmony_ci        assert_eq!(5, osa_distance("Stewart", "Colbert"));
81582e69de5Sopenharmony_ci    }
81682e69de5Sopenharmony_ci
81782e69de5Sopenharmony_ci    #[test]
81882e69de5Sopenharmony_ci    fn osa_distance_many_transpositions() {
81982e69de5Sopenharmony_ci        assert_eq!(4, osa_distance("abcdefghijkl", "bacedfgihjlk"));
82082e69de5Sopenharmony_ci    }
82182e69de5Sopenharmony_ci
82282e69de5Sopenharmony_ci    #[test]
82382e69de5Sopenharmony_ci    fn osa_distance_diff_longer() {
82482e69de5Sopenharmony_ci        let a = "The quick brown fox jumped over the angry dog.";
82582e69de5Sopenharmony_ci        let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
82682e69de5Sopenharmony_ci        assert_eq!(36, osa_distance(a, b));
82782e69de5Sopenharmony_ci    }
82882e69de5Sopenharmony_ci
82982e69de5Sopenharmony_ci    #[test]
83082e69de5Sopenharmony_ci    fn osa_distance_beginning_transposition() {
83182e69de5Sopenharmony_ci        assert_eq!(1, osa_distance("foobar", "ofobar"));
83282e69de5Sopenharmony_ci    }
83382e69de5Sopenharmony_ci
83482e69de5Sopenharmony_ci    #[test]
83582e69de5Sopenharmony_ci    fn osa_distance_end_transposition() {
83682e69de5Sopenharmony_ci        assert_eq!(1, osa_distance("specter", "spectre"));
83782e69de5Sopenharmony_ci    }
83882e69de5Sopenharmony_ci
83982e69de5Sopenharmony_ci    #[test]
84082e69de5Sopenharmony_ci    fn osa_distance_restricted_edit() {
84182e69de5Sopenharmony_ci        assert_eq!(4, osa_distance("a cat", "an abct"));
84282e69de5Sopenharmony_ci    }
84382e69de5Sopenharmony_ci
84482e69de5Sopenharmony_ci    #[test]
84582e69de5Sopenharmony_ci    fn damerau_levenshtein_empty() {
84682e69de5Sopenharmony_ci        assert_eq!(0, damerau_levenshtein("", ""));
84782e69de5Sopenharmony_ci    }
84882e69de5Sopenharmony_ci
84982e69de5Sopenharmony_ci    #[test]
85082e69de5Sopenharmony_ci    fn damerau_levenshtein_same() {
85182e69de5Sopenharmony_ci        assert_eq!(0, damerau_levenshtein("damerau", "damerau"));
85282e69de5Sopenharmony_ci    }
85382e69de5Sopenharmony_ci
85482e69de5Sopenharmony_ci    #[test]
85582e69de5Sopenharmony_ci    fn damerau_levenshtein_first_empty() {
85682e69de5Sopenharmony_ci        assert_eq!(7, damerau_levenshtein("", "damerau"));
85782e69de5Sopenharmony_ci    }
85882e69de5Sopenharmony_ci
85982e69de5Sopenharmony_ci    #[test]
86082e69de5Sopenharmony_ci    fn damerau_levenshtein_second_empty() {
86182e69de5Sopenharmony_ci        assert_eq!(7, damerau_levenshtein("damerau", ""));
86282e69de5Sopenharmony_ci    }
86382e69de5Sopenharmony_ci
86482e69de5Sopenharmony_ci    #[test]
86582e69de5Sopenharmony_ci    fn damerau_levenshtein_diff() {
86682e69de5Sopenharmony_ci        assert_eq!(2, damerau_levenshtein("ca", "abc"));
86782e69de5Sopenharmony_ci    }
86882e69de5Sopenharmony_ci
86982e69de5Sopenharmony_ci    #[test]
87082e69de5Sopenharmony_ci    fn damerau_levenshtein_diff_short() {
87182e69de5Sopenharmony_ci        assert_eq!(3, damerau_levenshtein("damerau", "aderua"));
87282e69de5Sopenharmony_ci    }
87382e69de5Sopenharmony_ci
87482e69de5Sopenharmony_ci    #[test]
87582e69de5Sopenharmony_ci    fn damerau_levenshtein_diff_reversed() {
87682e69de5Sopenharmony_ci        assert_eq!(3, damerau_levenshtein("aderua", "damerau"));
87782e69de5Sopenharmony_ci    }
87882e69de5Sopenharmony_ci
87982e69de5Sopenharmony_ci    #[test]
88082e69de5Sopenharmony_ci    fn damerau_levenshtein_diff_multibyte() {
88182e69de5Sopenharmony_ci        assert_eq!(3, damerau_levenshtein("öঙ香", "abc"));
88282e69de5Sopenharmony_ci        assert_eq!(3, damerau_levenshtein("abc", "öঙ香"));
88382e69de5Sopenharmony_ci    }
88482e69de5Sopenharmony_ci
88582e69de5Sopenharmony_ci    #[test]
88682e69de5Sopenharmony_ci    fn damerau_levenshtein_diff_unequal_length() {
88782e69de5Sopenharmony_ci        assert_eq!(6, damerau_levenshtein("damerau", "aderuaxyz"));
88882e69de5Sopenharmony_ci    }
88982e69de5Sopenharmony_ci
89082e69de5Sopenharmony_ci    #[test]
89182e69de5Sopenharmony_ci    fn damerau_levenshtein_diff_unequal_length_reversed() {
89282e69de5Sopenharmony_ci        assert_eq!(6, damerau_levenshtein("aderuaxyz", "damerau"));
89382e69de5Sopenharmony_ci    }
89482e69de5Sopenharmony_ci
89582e69de5Sopenharmony_ci    #[test]
89682e69de5Sopenharmony_ci    fn damerau_levenshtein_diff_comedians() {
89782e69de5Sopenharmony_ci        assert_eq!(5, damerau_levenshtein("Stewart", "Colbert"));
89882e69de5Sopenharmony_ci    }
89982e69de5Sopenharmony_ci
90082e69de5Sopenharmony_ci    #[test]
90182e69de5Sopenharmony_ci    fn damerau_levenshtein_many_transpositions() {
90282e69de5Sopenharmony_ci        assert_eq!(4, damerau_levenshtein("abcdefghijkl", "bacedfgihjlk"));
90382e69de5Sopenharmony_ci    }
90482e69de5Sopenharmony_ci
90582e69de5Sopenharmony_ci    #[test]
90682e69de5Sopenharmony_ci    fn damerau_levenshtein_diff_longer() {
90782e69de5Sopenharmony_ci        let a = "The quick brown fox jumped over the angry dog.";
90882e69de5Sopenharmony_ci        let b = "Lehem ipsum dolor sit amet, dicta latine an eam.";
90982e69de5Sopenharmony_ci        assert_eq!(36, damerau_levenshtein(a, b));
91082e69de5Sopenharmony_ci    }
91182e69de5Sopenharmony_ci
91282e69de5Sopenharmony_ci    #[test]
91382e69de5Sopenharmony_ci    fn damerau_levenshtein_beginning_transposition() {
91482e69de5Sopenharmony_ci        assert_eq!(1, damerau_levenshtein("foobar", "ofobar"));
91582e69de5Sopenharmony_ci    }
91682e69de5Sopenharmony_ci
91782e69de5Sopenharmony_ci    #[test]
91882e69de5Sopenharmony_ci    fn damerau_levenshtein_end_transposition() {
91982e69de5Sopenharmony_ci        assert_eq!(1, damerau_levenshtein("specter", "spectre"));
92082e69de5Sopenharmony_ci    }
92182e69de5Sopenharmony_ci
92282e69de5Sopenharmony_ci    #[test]
92382e69de5Sopenharmony_ci    fn damerau_levenshtein_unrestricted_edit() {
92482e69de5Sopenharmony_ci        assert_eq!(3, damerau_levenshtein("a cat", "an abct"));
92582e69de5Sopenharmony_ci    }
92682e69de5Sopenharmony_ci
92782e69de5Sopenharmony_ci    #[test]
92882e69de5Sopenharmony_ci    fn normalized_damerau_levenshtein_diff_short() {
92982e69de5Sopenharmony_ci        assert!((normalized_damerau_levenshtein("levenshtein", "löwenbräu") - 0.27272).abs() < 0.00001);
93082e69de5Sopenharmony_ci    }
93182e69de5Sopenharmony_ci
93282e69de5Sopenharmony_ci    #[test]
93382e69de5Sopenharmony_ci    fn normalized_damerau_levenshtein_for_empty_strings() {
93482e69de5Sopenharmony_ci        assert!((normalized_damerau_levenshtein("", "") - 1.0).abs() < 0.00001);
93582e69de5Sopenharmony_ci    }
93682e69de5Sopenharmony_ci
93782e69de5Sopenharmony_ci    #[test]
93882e69de5Sopenharmony_ci    fn normalized_damerau_levenshtein_first_empty() {
93982e69de5Sopenharmony_ci        assert!(normalized_damerau_levenshtein("", "flower").abs() < 0.00001);
94082e69de5Sopenharmony_ci    }
94182e69de5Sopenharmony_ci
94282e69de5Sopenharmony_ci    #[test]
94382e69de5Sopenharmony_ci    fn normalized_damerau_levenshtein_second_empty() {
94482e69de5Sopenharmony_ci        assert!(normalized_damerau_levenshtein("tree", "").abs() < 0.00001);
94582e69de5Sopenharmony_ci    }
94682e69de5Sopenharmony_ci
94782e69de5Sopenharmony_ci    #[test]
94882e69de5Sopenharmony_ci    fn normalized_damerau_levenshtein_identical_strings() {
94982e69de5Sopenharmony_ci        assert!((normalized_damerau_levenshtein("sunglasses", "sunglasses") - 1.0).abs() < 0.00001);
95082e69de5Sopenharmony_ci    }
95182e69de5Sopenharmony_ci
95282e69de5Sopenharmony_ci    #[test]
95382e69de5Sopenharmony_ci    fn sorensen_dice_all() {
95482e69de5Sopenharmony_ci        // test cases taken from
95582e69de5Sopenharmony_ci        // https://github.com/aceakash/string-similarity/blob/f83ba3cd7bae874c20c429774e911ae8cff8bced/src/spec/index.spec.js#L11
95682e69de5Sopenharmony_ci
95782e69de5Sopenharmony_ci        assert_eq!(1.0, sorensen_dice("a", "a"));
95882e69de5Sopenharmony_ci        assert_eq!(0.0, sorensen_dice("a", "b"));
95982e69de5Sopenharmony_ci        assert_eq!(1.0, sorensen_dice("", ""));
96082e69de5Sopenharmony_ci        assert_eq!(0.0, sorensen_dice("a", ""));
96182e69de5Sopenharmony_ci        assert_eq!(0.0, sorensen_dice("", "a"));
96282e69de5Sopenharmony_ci        assert_eq!(1.0, sorensen_dice("apple event", "apple    event"));
96382e69de5Sopenharmony_ci        assert_eq!(0.9090909090909091, sorensen_dice("iphone", "iphone x"));
96482e69de5Sopenharmony_ci        assert_eq!(0.0, sorensen_dice("french", "quebec"));
96582e69de5Sopenharmony_ci        assert_eq!(1.0, sorensen_dice("france", "france"));
96682e69de5Sopenharmony_ci        assert_eq!(0.2, sorensen_dice("fRaNce", "france"));
96782e69de5Sopenharmony_ci        assert_eq!(0.8, sorensen_dice("healed", "sealed"));
96882e69de5Sopenharmony_ci        assert_eq!(
96982e69de5Sopenharmony_ci            0.7878787878787878,
97082e69de5Sopenharmony_ci            sorensen_dice("web applications", "applications of the web")
97182e69de5Sopenharmony_ci        );
97282e69de5Sopenharmony_ci        assert_eq!(
97382e69de5Sopenharmony_ci            0.92,
97482e69de5Sopenharmony_ci            sorensen_dice(
97582e69de5Sopenharmony_ci                "this will have a typo somewhere",
97682e69de5Sopenharmony_ci                "this will huve a typo somewhere"
97782e69de5Sopenharmony_ci            )
97882e69de5Sopenharmony_ci        );
97982e69de5Sopenharmony_ci        assert_eq!(
98082e69de5Sopenharmony_ci            0.6060606060606061,
98182e69de5Sopenharmony_ci            sorensen_dice(
98282e69de5Sopenharmony_ci                "Olive-green table for sale, in extremely good condition.",
98382e69de5Sopenharmony_ci                "For sale: table in very good  condition, olive green in colour."
98482e69de5Sopenharmony_ci            )
98582e69de5Sopenharmony_ci        );
98682e69de5Sopenharmony_ci        assert_eq!(
98782e69de5Sopenharmony_ci            0.2558139534883721,
98882e69de5Sopenharmony_ci            sorensen_dice(
98982e69de5Sopenharmony_ci                "Olive-green table for sale, in extremely good condition.",
99082e69de5Sopenharmony_ci                "For sale: green Subaru Impreza, 210,000 miles"
99182e69de5Sopenharmony_ci            )
99282e69de5Sopenharmony_ci        );
99382e69de5Sopenharmony_ci        assert_eq!(
99482e69de5Sopenharmony_ci            0.1411764705882353,
99582e69de5Sopenharmony_ci            sorensen_dice(
99682e69de5Sopenharmony_ci                "Olive-green table for sale, in extremely good condition.",
99782e69de5Sopenharmony_ci                "Wanted: mountain bike with at least 21 gears."
99882e69de5Sopenharmony_ci            )
99982e69de5Sopenharmony_ci        );
100082e69de5Sopenharmony_ci        assert_eq!(
100182e69de5Sopenharmony_ci            0.7741935483870968,
100282e69de5Sopenharmony_ci            sorensen_dice("this has one extra word", "this has one word")
100382e69de5Sopenharmony_ci        );
100482e69de5Sopenharmony_ci    }
100582e69de5Sopenharmony_ci}
1006