18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Authors: Thomas Graf <tgraf@suug.ch> 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * ========================================================================== 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * Implements a linear-time string-matching algorithm due to Knuth, 108c2ecf20Sopenharmony_ci * Morris, and Pratt [1]. Their algorithm avoids the explicit 118c2ecf20Sopenharmony_ci * computation of the transition function DELTA altogether. Its 128c2ecf20Sopenharmony_ci * matching time is O(n), for n being length(text), using just an 138c2ecf20Sopenharmony_ci * auxiliary function PI[1..m], for m being length(pattern), 148c2ecf20Sopenharmony_ci * precomputed from the pattern in time O(m). The array PI allows 158c2ecf20Sopenharmony_ci * the transition function DELTA to be computed efficiently 168c2ecf20Sopenharmony_ci * "on the fly" as needed. Roughly speaking, for any state 178c2ecf20Sopenharmony_ci * "q" = 0,1,...,m and any character "a" in SIGMA, the value 188c2ecf20Sopenharmony_ci * PI["q"] contains the information that is independent of "a" and 198c2ecf20Sopenharmony_ci * is needed to compute DELTA("q", "a") [2]. Since the array PI 208c2ecf20Sopenharmony_ci * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we 218c2ecf20Sopenharmony_ci * save a factor of |SIGMA| in the preprocessing time by computing 228c2ecf20Sopenharmony_ci * PI rather than DELTA. 238c2ecf20Sopenharmony_ci * 248c2ecf20Sopenharmony_ci * [1] Cormen, Leiserson, Rivest, Stein 258c2ecf20Sopenharmony_ci * Introdcution to Algorithms, 2nd Edition, MIT Press 268c2ecf20Sopenharmony_ci * [2] See finite automaton theory 278c2ecf20Sopenharmony_ci */ 288c2ecf20Sopenharmony_ci 298c2ecf20Sopenharmony_ci#include <linux/module.h> 308c2ecf20Sopenharmony_ci#include <linux/types.h> 318c2ecf20Sopenharmony_ci#include <linux/string.h> 328c2ecf20Sopenharmony_ci#include <linux/ctype.h> 338c2ecf20Sopenharmony_ci#include <linux/textsearch.h> 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_cistruct ts_kmp 368c2ecf20Sopenharmony_ci{ 378c2ecf20Sopenharmony_ci u8 * pattern; 388c2ecf20Sopenharmony_ci unsigned int pattern_len; 398c2ecf20Sopenharmony_ci unsigned int prefix_tbl[]; 408c2ecf20Sopenharmony_ci}; 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_cistatic unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) 438c2ecf20Sopenharmony_ci{ 448c2ecf20Sopenharmony_ci struct ts_kmp *kmp = ts_config_priv(conf); 458c2ecf20Sopenharmony_ci unsigned int i, q = 0, text_len, consumed = state->offset; 468c2ecf20Sopenharmony_ci const u8 *text; 478c2ecf20Sopenharmony_ci const int icase = conf->flags & TS_IGNORECASE; 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_ci for (;;) { 508c2ecf20Sopenharmony_ci text_len = conf->get_next_block(consumed, &text, conf, state); 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_ci if (unlikely(text_len == 0)) 538c2ecf20Sopenharmony_ci break; 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci for (i = 0; i < text_len; i++) { 568c2ecf20Sopenharmony_ci while (q > 0 && kmp->pattern[q] 578c2ecf20Sopenharmony_ci != (icase ? toupper(text[i]) : text[i])) 588c2ecf20Sopenharmony_ci q = kmp->prefix_tbl[q - 1]; 598c2ecf20Sopenharmony_ci if (kmp->pattern[q] 608c2ecf20Sopenharmony_ci == (icase ? toupper(text[i]) : text[i])) 618c2ecf20Sopenharmony_ci q++; 628c2ecf20Sopenharmony_ci if (unlikely(q == kmp->pattern_len)) { 638c2ecf20Sopenharmony_ci state->offset = consumed + i + 1; 648c2ecf20Sopenharmony_ci return state->offset - kmp->pattern_len; 658c2ecf20Sopenharmony_ci } 668c2ecf20Sopenharmony_ci } 678c2ecf20Sopenharmony_ci 688c2ecf20Sopenharmony_ci consumed += text_len; 698c2ecf20Sopenharmony_ci } 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci return UINT_MAX; 728c2ecf20Sopenharmony_ci} 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_cistatic inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, 758c2ecf20Sopenharmony_ci unsigned int *prefix_tbl, int flags) 768c2ecf20Sopenharmony_ci{ 778c2ecf20Sopenharmony_ci unsigned int k, q; 788c2ecf20Sopenharmony_ci const u8 icase = flags & TS_IGNORECASE; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci for (k = 0, q = 1; q < len; q++) { 818c2ecf20Sopenharmony_ci while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k]) 828c2ecf20Sopenharmony_ci != (icase ? toupper(pattern[q]) : pattern[q])) 838c2ecf20Sopenharmony_ci k = prefix_tbl[k-1]; 848c2ecf20Sopenharmony_ci if ((icase ? toupper(pattern[k]) : pattern[k]) 858c2ecf20Sopenharmony_ci == (icase ? toupper(pattern[q]) : pattern[q])) 868c2ecf20Sopenharmony_ci k++; 878c2ecf20Sopenharmony_ci prefix_tbl[q] = k; 888c2ecf20Sopenharmony_ci } 898c2ecf20Sopenharmony_ci} 908c2ecf20Sopenharmony_ci 918c2ecf20Sopenharmony_cistatic struct ts_config *kmp_init(const void *pattern, unsigned int len, 928c2ecf20Sopenharmony_ci gfp_t gfp_mask, int flags) 938c2ecf20Sopenharmony_ci{ 948c2ecf20Sopenharmony_ci struct ts_config *conf; 958c2ecf20Sopenharmony_ci struct ts_kmp *kmp; 968c2ecf20Sopenharmony_ci int i; 978c2ecf20Sopenharmony_ci unsigned int prefix_tbl_len = len * sizeof(unsigned int); 988c2ecf20Sopenharmony_ci size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci conf = alloc_ts_config(priv_size, gfp_mask); 1018c2ecf20Sopenharmony_ci if (IS_ERR(conf)) 1028c2ecf20Sopenharmony_ci return conf; 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci conf->flags = flags; 1058c2ecf20Sopenharmony_ci kmp = ts_config_priv(conf); 1068c2ecf20Sopenharmony_ci kmp->pattern_len = len; 1078c2ecf20Sopenharmony_ci compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags); 1088c2ecf20Sopenharmony_ci kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; 1098c2ecf20Sopenharmony_ci if (flags & TS_IGNORECASE) 1108c2ecf20Sopenharmony_ci for (i = 0; i < len; i++) 1118c2ecf20Sopenharmony_ci kmp->pattern[i] = toupper(((u8 *)pattern)[i]); 1128c2ecf20Sopenharmony_ci else 1138c2ecf20Sopenharmony_ci memcpy(kmp->pattern, pattern, len); 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci return conf; 1168c2ecf20Sopenharmony_ci} 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_cistatic void *kmp_get_pattern(struct ts_config *conf) 1198c2ecf20Sopenharmony_ci{ 1208c2ecf20Sopenharmony_ci struct ts_kmp *kmp = ts_config_priv(conf); 1218c2ecf20Sopenharmony_ci return kmp->pattern; 1228c2ecf20Sopenharmony_ci} 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_cistatic unsigned int kmp_get_pattern_len(struct ts_config *conf) 1258c2ecf20Sopenharmony_ci{ 1268c2ecf20Sopenharmony_ci struct ts_kmp *kmp = ts_config_priv(conf); 1278c2ecf20Sopenharmony_ci return kmp->pattern_len; 1288c2ecf20Sopenharmony_ci} 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_cistatic struct ts_ops kmp_ops = { 1318c2ecf20Sopenharmony_ci .name = "kmp", 1328c2ecf20Sopenharmony_ci .find = kmp_find, 1338c2ecf20Sopenharmony_ci .init = kmp_init, 1348c2ecf20Sopenharmony_ci .get_pattern = kmp_get_pattern, 1358c2ecf20Sopenharmony_ci .get_pattern_len = kmp_get_pattern_len, 1368c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 1378c2ecf20Sopenharmony_ci .list = LIST_HEAD_INIT(kmp_ops.list) 1388c2ecf20Sopenharmony_ci}; 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_cistatic int __init init_kmp(void) 1418c2ecf20Sopenharmony_ci{ 1428c2ecf20Sopenharmony_ci return textsearch_register(&kmp_ops); 1438c2ecf20Sopenharmony_ci} 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_cistatic void __exit exit_kmp(void) 1468c2ecf20Sopenharmony_ci{ 1478c2ecf20Sopenharmony_ci textsearch_unregister(&kmp_ops); 1488c2ecf20Sopenharmony_ci} 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_cimodule_init(init_kmp); 1538c2ecf20Sopenharmony_cimodule_exit(exit_kmp); 154