1fb6c1f39Sopenharmony_ciuse crate::data; 2fb6c1f39Sopenharmony_ci 3fb6c1f39Sopenharmony_ci#[derive(Clone, Copy, Debug)] 4fb6c1f39Sopenharmony_cipub struct Input { 5fb6c1f39Sopenharmony_ci /// A name describing the corpus, used to identify it in benchmarks. 6fb6c1f39Sopenharmony_ci pub name: &'static str, 7fb6c1f39Sopenharmony_ci /// The haystack to search. 8fb6c1f39Sopenharmony_ci pub corpus: &'static str, 9fb6c1f39Sopenharmony_ci /// Queries that are expected to never occur. 10fb6c1f39Sopenharmony_ci pub never: &'static [Query], 11fb6c1f39Sopenharmony_ci /// Queries that are expected to occur rarely. 12fb6c1f39Sopenharmony_ci pub rare: &'static [Query], 13fb6c1f39Sopenharmony_ci /// Queries that are expected to fairly common. 14fb6c1f39Sopenharmony_ci pub common: &'static [Query], 15fb6c1f39Sopenharmony_ci} 16fb6c1f39Sopenharmony_ci 17fb6c1f39Sopenharmony_ci/// A substring search query for a particular haystack. 18fb6c1f39Sopenharmony_ci#[derive(Clone, Copy, Debug)] 19fb6c1f39Sopenharmony_cipub struct Query { 20fb6c1f39Sopenharmony_ci /// A name for this query, used to identify it in benchmarks. 21fb6c1f39Sopenharmony_ci pub name: &'static str, 22fb6c1f39Sopenharmony_ci /// The needle to search for. 23fb6c1f39Sopenharmony_ci pub needle: &'static str, 24fb6c1f39Sopenharmony_ci /// The expected number of occurrences. 25fb6c1f39Sopenharmony_ci pub count: usize, 26fb6c1f39Sopenharmony_ci} 27fb6c1f39Sopenharmony_ci 28fb6c1f39Sopenharmony_cipub const INPUTS: &'static [Input] = &[ 29fb6c1f39Sopenharmony_ci Input { 30fb6c1f39Sopenharmony_ci name: "code-rust-library", 31fb6c1f39Sopenharmony_ci corpus: data::CODE_RUST_LIBRARY, 32fb6c1f39Sopenharmony_ci never: &[ 33fb6c1f39Sopenharmony_ci Query { name: "fn-strength", needle: "fn strength", count: 0 }, 34fb6c1f39Sopenharmony_ci Query { 35fb6c1f39Sopenharmony_ci name: "fn-strength-paren", 36fb6c1f39Sopenharmony_ci needle: "fn strength(", 37fb6c1f39Sopenharmony_ci count: 0, 38fb6c1f39Sopenharmony_ci }, 39fb6c1f39Sopenharmony_ci Query { name: "fn-quux", needle: "fn quux(", count: 0 }, 40fb6c1f39Sopenharmony_ci ], 41fb6c1f39Sopenharmony_ci rare: &[ 42fb6c1f39Sopenharmony_ci Query { 43fb6c1f39Sopenharmony_ci name: "fn-from-str", 44fb6c1f39Sopenharmony_ci needle: "pub fn from_str(", 45fb6c1f39Sopenharmony_ci count: 1, 46fb6c1f39Sopenharmony_ci }, 47fb6c1f39Sopenharmony_ci ], 48fb6c1f39Sopenharmony_ci common: &[ 49fb6c1f39Sopenharmony_ci Query { name: "fn-is-empty", needle: "fn is_empty(", count: 17 }, 50fb6c1f39Sopenharmony_ci Query { name: "fn", needle: "fn", count: 2985 }, 51fb6c1f39Sopenharmony_ci Query { name: "paren", needle: "(", count: 30193 }, 52fb6c1f39Sopenharmony_ci Query { name: "let", needle: "let", count: 4737 }, 53fb6c1f39Sopenharmony_ci ], 54fb6c1f39Sopenharmony_ci }, 55fb6c1f39Sopenharmony_ci Input { 56fb6c1f39Sopenharmony_ci name: "huge-en", 57fb6c1f39Sopenharmony_ci corpus: data::SUBTITLE_EN_HUGE, 58fb6c1f39Sopenharmony_ci never: &[ 59fb6c1f39Sopenharmony_ci Query { name: "john-watson", needle: "John Watson", count: 0 }, 60fb6c1f39Sopenharmony_ci Query { name: "all-common-bytes", needle: "sternness", count: 0 }, 61fb6c1f39Sopenharmony_ci Query { name: "some-rare-bytes", needle: "quartz", count: 0 }, 62fb6c1f39Sopenharmony_ci Query { name: "two-space", needle: " ", count: 0 }, 63fb6c1f39Sopenharmony_ci ], 64fb6c1f39Sopenharmony_ci rare: &[ 65fb6c1f39Sopenharmony_ci Query { 66fb6c1f39Sopenharmony_ci name: "sherlock-holmes", 67fb6c1f39Sopenharmony_ci needle: "Sherlock Holmes", 68fb6c1f39Sopenharmony_ci count: 1, 69fb6c1f39Sopenharmony_ci }, 70fb6c1f39Sopenharmony_ci Query { name: "sherlock", needle: "Sherlock", count: 1 }, 71fb6c1f39Sopenharmony_ci Query { 72fb6c1f39Sopenharmony_ci name: "medium-needle", 73fb6c1f39Sopenharmony_ci needle: "homer, marge, bart, lisa, maggie", 74fb6c1f39Sopenharmony_ci count: 1, 75fb6c1f39Sopenharmony_ci }, 76fb6c1f39Sopenharmony_ci Query { 77fb6c1f39Sopenharmony_ci name: "long-needle", 78fb6c1f39Sopenharmony_ci needle: "I feel afraid of Mostafa\nHe is stronger and older than I am, and more experienced\nShould I turn back?\nDoc you're beginning to sound like Sherlock Holmes.", 79fb6c1f39Sopenharmony_ci count: 1, 80fb6c1f39Sopenharmony_ci }, 81fb6c1f39Sopenharmony_ci Query { 82fb6c1f39Sopenharmony_ci name: "huge-needle", 83fb6c1f39Sopenharmony_ci needle: "Since we will meet anyway, then the sooner, the better\nTomorrow at 4:30 in front of the Horse-Riding Club\nNo, 4:30\nI am confused, almost lost\nAs if an invisible hand pushed me towards an unknown fate\nI needed someone by my side\nI needed someone to guide me to the path of security\nBut I had no one\nI couldn't ask my father's opinion, nor his wife's\nI felt just as lonely as I had before\nI feel afraid of Mostafa\nHe is stronger and older than I am, and more experienced\nShould I turn back?\nDoc you're beginning to sound like Sherlock Holmes.", 84fb6c1f39Sopenharmony_ci count: 1, 85fb6c1f39Sopenharmony_ci }, 86fb6c1f39Sopenharmony_ci ], 87fb6c1f39Sopenharmony_ci common: &[ 88fb6c1f39Sopenharmony_ci Query { name: "that", needle: "that", count: 865 }, 89fb6c1f39Sopenharmony_ci Query { name: "one-space", needle: " ", count: 96606 }, 90fb6c1f39Sopenharmony_ci Query { name: "you", needle: "you", count: 5009 }, 91fb6c1f39Sopenharmony_ci // It would be nice to benchmark this case, although it's not 92fb6c1f39Sopenharmony_ci // terribly important. The problem is that std's substring 93fb6c1f39Sopenharmony_ci // implementation (correctly) never returns match offsets that 94fb6c1f39Sopenharmony_ci // split an encoded codepoint, where as memmem on bytes will. So 95fb6c1f39Sopenharmony_ci // the counts differ. We could modify our harness to skip this on 96fb6c1f39Sopenharmony_ci // std, but it seems like much ado about nothing. 97fb6c1f39Sopenharmony_ci // Query { name: "empty", needle: "", count: 613655 }, 98fb6c1f39Sopenharmony_ci ], 99fb6c1f39Sopenharmony_ci }, 100fb6c1f39Sopenharmony_ci Input { 101fb6c1f39Sopenharmony_ci name: "huge-ru", 102fb6c1f39Sopenharmony_ci corpus: data::SUBTITLE_RU_HUGE, 103fb6c1f39Sopenharmony_ci never: &[Query { 104fb6c1f39Sopenharmony_ci name: "john-watson", 105fb6c1f39Sopenharmony_ci needle: "Джон Уотсон", 106fb6c1f39Sopenharmony_ci count: 0, 107fb6c1f39Sopenharmony_ci }], 108fb6c1f39Sopenharmony_ci rare: &[ 109fb6c1f39Sopenharmony_ci Query { 110fb6c1f39Sopenharmony_ci name: "sherlock-holmes", 111fb6c1f39Sopenharmony_ci needle: "Шерлок Холмс", 112fb6c1f39Sopenharmony_ci count: 1, 113fb6c1f39Sopenharmony_ci }, 114fb6c1f39Sopenharmony_ci Query { name: "sherlock", needle: "Шерлок", count: 1 }, 115fb6c1f39Sopenharmony_ci ], 116fb6c1f39Sopenharmony_ci common: &[ 117fb6c1f39Sopenharmony_ci Query { name: "that", needle: "что", count: 998 }, 118fb6c1f39Sopenharmony_ci Query { name: "not", needle: "не", count: 3092 }, 119fb6c1f39Sopenharmony_ci Query { name: "one-space", needle: " ", count: 46941 }, 120fb6c1f39Sopenharmony_ci ], 121fb6c1f39Sopenharmony_ci }, 122fb6c1f39Sopenharmony_ci Input { 123fb6c1f39Sopenharmony_ci name: "huge-zh", 124fb6c1f39Sopenharmony_ci corpus: data::SUBTITLE_ZH_HUGE, 125fb6c1f39Sopenharmony_ci never: &[Query { 126fb6c1f39Sopenharmony_ci name: "john-watson", needle: "约翰·沃森", count: 0 127fb6c1f39Sopenharmony_ci }], 128fb6c1f39Sopenharmony_ci rare: &[ 129fb6c1f39Sopenharmony_ci Query { 130fb6c1f39Sopenharmony_ci name: "sherlock-holmes", 131fb6c1f39Sopenharmony_ci needle: "夏洛克·福尔摩斯", 132fb6c1f39Sopenharmony_ci count: 1, 133fb6c1f39Sopenharmony_ci }, 134fb6c1f39Sopenharmony_ci Query { name: "sherlock", needle: "夏洛克", count: 1 }, 135fb6c1f39Sopenharmony_ci ], 136fb6c1f39Sopenharmony_ci common: &[ 137fb6c1f39Sopenharmony_ci Query { name: "that", needle: "那", count: 1056 }, 138fb6c1f39Sopenharmony_ci Query { name: "do-not", needle: "不", count: 2751 }, 139fb6c1f39Sopenharmony_ci Query { name: "one-space", needle: " ", count: 17232 }, 140fb6c1f39Sopenharmony_ci ], 141fb6c1f39Sopenharmony_ci }, 142fb6c1f39Sopenharmony_ci Input { 143fb6c1f39Sopenharmony_ci name: "teeny-en", 144fb6c1f39Sopenharmony_ci corpus: data::SUBTITLE_EN_TEENY, 145fb6c1f39Sopenharmony_ci never: &[ 146fb6c1f39Sopenharmony_ci Query { name: "john-watson", needle: "John Watson", count: 0 }, 147fb6c1f39Sopenharmony_ci Query { name: "all-common-bytes", needle: "sternness", count: 0 }, 148fb6c1f39Sopenharmony_ci Query { name: "some-rare-bytes", needle: "quartz", count: 0 }, 149fb6c1f39Sopenharmony_ci Query { name: "two-space", needle: " ", count: 0 }, 150fb6c1f39Sopenharmony_ci ], 151fb6c1f39Sopenharmony_ci rare: &[ 152fb6c1f39Sopenharmony_ci Query { 153fb6c1f39Sopenharmony_ci name: "sherlock-holmes", 154fb6c1f39Sopenharmony_ci needle: "Sherlock Holmes", 155fb6c1f39Sopenharmony_ci count: 1, 156fb6c1f39Sopenharmony_ci }, 157fb6c1f39Sopenharmony_ci Query { name: "sherlock", needle: "Sherlock", count: 1 }, 158fb6c1f39Sopenharmony_ci ], 159fb6c1f39Sopenharmony_ci common: &[], 160fb6c1f39Sopenharmony_ci }, 161fb6c1f39Sopenharmony_ci Input { 162fb6c1f39Sopenharmony_ci name: "teeny-ru", 163fb6c1f39Sopenharmony_ci corpus: data::SUBTITLE_RU_TEENY, 164fb6c1f39Sopenharmony_ci never: &[Query { 165fb6c1f39Sopenharmony_ci name: "john-watson", 166fb6c1f39Sopenharmony_ci needle: "Джон Уотсон", 167fb6c1f39Sopenharmony_ci count: 0, 168fb6c1f39Sopenharmony_ci }], 169fb6c1f39Sopenharmony_ci rare: &[ 170fb6c1f39Sopenharmony_ci Query { 171fb6c1f39Sopenharmony_ci name: "sherlock-holmes", 172fb6c1f39Sopenharmony_ci needle: "Шерлок Холмс", 173fb6c1f39Sopenharmony_ci count: 1, 174fb6c1f39Sopenharmony_ci }, 175fb6c1f39Sopenharmony_ci Query { name: "sherlock", needle: "Шерлок", count: 1 }, 176fb6c1f39Sopenharmony_ci ], 177fb6c1f39Sopenharmony_ci common: &[], 178fb6c1f39Sopenharmony_ci }, 179fb6c1f39Sopenharmony_ci Input { 180fb6c1f39Sopenharmony_ci name: "teeny-zh", 181fb6c1f39Sopenharmony_ci corpus: data::SUBTITLE_ZH_TEENY, 182fb6c1f39Sopenharmony_ci never: &[Query { 183fb6c1f39Sopenharmony_ci name: "john-watson", needle: "约翰·沃森", count: 0 184fb6c1f39Sopenharmony_ci }], 185fb6c1f39Sopenharmony_ci rare: &[ 186fb6c1f39Sopenharmony_ci Query { 187fb6c1f39Sopenharmony_ci name: "sherlock-holmes", 188fb6c1f39Sopenharmony_ci needle: "夏洛克·福尔摩斯", 189fb6c1f39Sopenharmony_ci count: 1, 190fb6c1f39Sopenharmony_ci }, 191fb6c1f39Sopenharmony_ci Query { name: "sherlock", needle: "夏洛克", count: 1 }, 192fb6c1f39Sopenharmony_ci ], 193fb6c1f39Sopenharmony_ci common: &[], 194fb6c1f39Sopenharmony_ci }, 195fb6c1f39Sopenharmony_ci Input { 196fb6c1f39Sopenharmony_ci name: "pathological-md5-huge", 197fb6c1f39Sopenharmony_ci corpus: data::PATHOLOGICAL_MD5_HUGE, 198fb6c1f39Sopenharmony_ci never: &[Query { 199fb6c1f39Sopenharmony_ci name: "no-hash", 200fb6c1f39Sopenharmony_ci needle: "61a1a40effcf97de24505f154a306597", 201fb6c1f39Sopenharmony_ci count: 0, 202fb6c1f39Sopenharmony_ci }], 203fb6c1f39Sopenharmony_ci rare: &[Query { 204fb6c1f39Sopenharmony_ci name: "last-hash", 205fb6c1f39Sopenharmony_ci needle: "831df319d8597f5bc793d690f08b159b", 206fb6c1f39Sopenharmony_ci count: 1, 207fb6c1f39Sopenharmony_ci }], 208fb6c1f39Sopenharmony_ci common: &[Query { name: "two-bytes", needle: "fe", count: 520 }], 209fb6c1f39Sopenharmony_ci }, 210fb6c1f39Sopenharmony_ci Input { 211fb6c1f39Sopenharmony_ci name: "pathological-repeated-rare-huge", 212fb6c1f39Sopenharmony_ci corpus: data::PATHOLOGICAL_REPEATED_RARE_HUGE, 213fb6c1f39Sopenharmony_ci never: &[Query { name: "tricky", needle: "abczdef", count: 0 }], 214fb6c1f39Sopenharmony_ci rare: &[], 215fb6c1f39Sopenharmony_ci common: &[Query { name: "match", needle: "zzzzzzzzzz", count: 50010 }], 216fb6c1f39Sopenharmony_ci }, 217fb6c1f39Sopenharmony_ci Input { 218fb6c1f39Sopenharmony_ci name: "pathological-repeated-rare-small", 219fb6c1f39Sopenharmony_ci corpus: data::PATHOLOGICAL_REPEATED_RARE_SMALL, 220fb6c1f39Sopenharmony_ci never: &[Query { name: "tricky", needle: "abczdef", count: 0 }], 221fb6c1f39Sopenharmony_ci rare: &[], 222fb6c1f39Sopenharmony_ci common: &[Query { name: "match", needle: "zzzzzzzzzz", count: 100 }], 223fb6c1f39Sopenharmony_ci }, 224fb6c1f39Sopenharmony_ci Input { 225fb6c1f39Sopenharmony_ci name: "pathological-defeat-simple-vector", 226fb6c1f39Sopenharmony_ci corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR, 227fb6c1f39Sopenharmony_ci never: &[], 228fb6c1f39Sopenharmony_ci rare: &[Query { 229fb6c1f39Sopenharmony_ci name: "alphabet", 230fb6c1f39Sopenharmony_ci needle: "qbz", 231fb6c1f39Sopenharmony_ci count: 1, 232fb6c1f39Sopenharmony_ci }], 233fb6c1f39Sopenharmony_ci common: &[], 234fb6c1f39Sopenharmony_ci }, 235fb6c1f39Sopenharmony_ci Input { 236fb6c1f39Sopenharmony_ci name: "pathological-defeat-simple-vector-freq", 237fb6c1f39Sopenharmony_ci corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR_FREQ, 238fb6c1f39Sopenharmony_ci never: &[], 239fb6c1f39Sopenharmony_ci rare: &[Query { 240fb6c1f39Sopenharmony_ci name: "alphabet", 241fb6c1f39Sopenharmony_ci needle: "qjaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaz", 242fb6c1f39Sopenharmony_ci count: 1, 243fb6c1f39Sopenharmony_ci }], 244fb6c1f39Sopenharmony_ci common: &[], 245fb6c1f39Sopenharmony_ci }, 246fb6c1f39Sopenharmony_ci Input { 247fb6c1f39Sopenharmony_ci name: "pathological-defeat-simple-vector-repeated", 248fb6c1f39Sopenharmony_ci corpus: data::PATHOLOGICAL_DEFEAT_SIMPLE_VECTOR_REPEATED, 249fb6c1f39Sopenharmony_ci never: &[], 250fb6c1f39Sopenharmony_ci rare: &[Query { 251fb6c1f39Sopenharmony_ci name: "alphabet", 252fb6c1f39Sopenharmony_ci needle: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzaz", 253fb6c1f39Sopenharmony_ci count: 1, 254fb6c1f39Sopenharmony_ci }], 255fb6c1f39Sopenharmony_ci common: &[], 256fb6c1f39Sopenharmony_ci }, 257fb6c1f39Sopenharmony_ci]; 258