18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * lib/ts_kmp.c		Knuth-Morris-Pratt text search implementation
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * Authors:	Thomas Graf <tgraf@suug.ch>
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * ==========================================================================
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci *   Implements a linear-time string-matching algorithm due to Knuth,
108c2ecf20Sopenharmony_ci *   Morris, and Pratt [1]. Their algorithm avoids the explicit
118c2ecf20Sopenharmony_ci *   computation of the transition function DELTA altogether. Its
128c2ecf20Sopenharmony_ci *   matching time is O(n), for n being length(text), using just an
138c2ecf20Sopenharmony_ci *   auxiliary function PI[1..m], for m being length(pattern),
148c2ecf20Sopenharmony_ci *   precomputed from the pattern in time O(m). The array PI allows
158c2ecf20Sopenharmony_ci *   the transition function DELTA to be computed efficiently
168c2ecf20Sopenharmony_ci *   "on the fly" as needed. Roughly speaking, for any state
178c2ecf20Sopenharmony_ci *   "q" = 0,1,...,m and any character "a" in SIGMA, the value
188c2ecf20Sopenharmony_ci *   PI["q"] contains the information that is independent of "a" and
198c2ecf20Sopenharmony_ci *   is needed to compute DELTA("q", "a") [2]. Since the array PI
208c2ecf20Sopenharmony_ci *   has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
218c2ecf20Sopenharmony_ci *   save a factor of |SIGMA| in the preprocessing time by computing
228c2ecf20Sopenharmony_ci *   PI rather than DELTA.
238c2ecf20Sopenharmony_ci *
248c2ecf20Sopenharmony_ci *   [1] Cormen, Leiserson, Rivest, Stein
258c2ecf20Sopenharmony_ci *       Introdcution to Algorithms, 2nd Edition, MIT Press
268c2ecf20Sopenharmony_ci *   [2] See finite automaton theory
278c2ecf20Sopenharmony_ci */
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci#include <linux/module.h>
308c2ecf20Sopenharmony_ci#include <linux/types.h>
318c2ecf20Sopenharmony_ci#include <linux/string.h>
328c2ecf20Sopenharmony_ci#include <linux/ctype.h>
338c2ecf20Sopenharmony_ci#include <linux/textsearch.h>
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_cistruct ts_kmp
368c2ecf20Sopenharmony_ci{
378c2ecf20Sopenharmony_ci	u8 *		pattern;
388c2ecf20Sopenharmony_ci	unsigned int	pattern_len;
398c2ecf20Sopenharmony_ci	unsigned int	prefix_tbl[];
408c2ecf20Sopenharmony_ci};
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_cistatic unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
438c2ecf20Sopenharmony_ci{
448c2ecf20Sopenharmony_ci	struct ts_kmp *kmp = ts_config_priv(conf);
458c2ecf20Sopenharmony_ci	unsigned int i, q = 0, text_len, consumed = state->offset;
468c2ecf20Sopenharmony_ci	const u8 *text;
478c2ecf20Sopenharmony_ci	const int icase = conf->flags & TS_IGNORECASE;
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci	for (;;) {
508c2ecf20Sopenharmony_ci		text_len = conf->get_next_block(consumed, &text, conf, state);
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci		if (unlikely(text_len == 0))
538c2ecf20Sopenharmony_ci			break;
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci		for (i = 0; i < text_len; i++) {
568c2ecf20Sopenharmony_ci			while (q > 0 && kmp->pattern[q]
578c2ecf20Sopenharmony_ci			    != (icase ? toupper(text[i]) : text[i]))
588c2ecf20Sopenharmony_ci				q = kmp->prefix_tbl[q - 1];
598c2ecf20Sopenharmony_ci			if (kmp->pattern[q]
608c2ecf20Sopenharmony_ci			    == (icase ? toupper(text[i]) : text[i]))
618c2ecf20Sopenharmony_ci				q++;
628c2ecf20Sopenharmony_ci			if (unlikely(q == kmp->pattern_len)) {
638c2ecf20Sopenharmony_ci				state->offset = consumed + i + 1;
648c2ecf20Sopenharmony_ci				return state->offset - kmp->pattern_len;
658c2ecf20Sopenharmony_ci			}
668c2ecf20Sopenharmony_ci		}
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci		consumed += text_len;
698c2ecf20Sopenharmony_ci	}
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	return UINT_MAX;
728c2ecf20Sopenharmony_ci}
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_cistatic inline void compute_prefix_tbl(const u8 *pattern, unsigned int len,
758c2ecf20Sopenharmony_ci				      unsigned int *prefix_tbl, int flags)
768c2ecf20Sopenharmony_ci{
778c2ecf20Sopenharmony_ci	unsigned int k, q;
788c2ecf20Sopenharmony_ci	const u8 icase = flags & TS_IGNORECASE;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	for (k = 0, q = 1; q < len; q++) {
818c2ecf20Sopenharmony_ci		while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k])
828c2ecf20Sopenharmony_ci		    != (icase ? toupper(pattern[q]) : pattern[q]))
838c2ecf20Sopenharmony_ci			k = prefix_tbl[k-1];
848c2ecf20Sopenharmony_ci		if ((icase ? toupper(pattern[k]) : pattern[k])
858c2ecf20Sopenharmony_ci		    == (icase ? toupper(pattern[q]) : pattern[q]))
868c2ecf20Sopenharmony_ci			k++;
878c2ecf20Sopenharmony_ci		prefix_tbl[q] = k;
888c2ecf20Sopenharmony_ci	}
898c2ecf20Sopenharmony_ci}
908c2ecf20Sopenharmony_ci
918c2ecf20Sopenharmony_cistatic struct ts_config *kmp_init(const void *pattern, unsigned int len,
928c2ecf20Sopenharmony_ci				  gfp_t gfp_mask, int flags)
938c2ecf20Sopenharmony_ci{
948c2ecf20Sopenharmony_ci	struct ts_config *conf;
958c2ecf20Sopenharmony_ci	struct ts_kmp *kmp;
968c2ecf20Sopenharmony_ci	int i;
978c2ecf20Sopenharmony_ci	unsigned int prefix_tbl_len = len * sizeof(unsigned int);
988c2ecf20Sopenharmony_ci	size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	conf = alloc_ts_config(priv_size, gfp_mask);
1018c2ecf20Sopenharmony_ci	if (IS_ERR(conf))
1028c2ecf20Sopenharmony_ci		return conf;
1038c2ecf20Sopenharmony_ci
1048c2ecf20Sopenharmony_ci	conf->flags = flags;
1058c2ecf20Sopenharmony_ci	kmp = ts_config_priv(conf);
1068c2ecf20Sopenharmony_ci	kmp->pattern_len = len;
1078c2ecf20Sopenharmony_ci	compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags);
1088c2ecf20Sopenharmony_ci	kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
1098c2ecf20Sopenharmony_ci	if (flags & TS_IGNORECASE)
1108c2ecf20Sopenharmony_ci		for (i = 0; i < len; i++)
1118c2ecf20Sopenharmony_ci			kmp->pattern[i] = toupper(((u8 *)pattern)[i]);
1128c2ecf20Sopenharmony_ci	else
1138c2ecf20Sopenharmony_ci		memcpy(kmp->pattern, pattern, len);
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	return conf;
1168c2ecf20Sopenharmony_ci}
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_cistatic void *kmp_get_pattern(struct ts_config *conf)
1198c2ecf20Sopenharmony_ci{
1208c2ecf20Sopenharmony_ci	struct ts_kmp *kmp = ts_config_priv(conf);
1218c2ecf20Sopenharmony_ci	return kmp->pattern;
1228c2ecf20Sopenharmony_ci}
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_cistatic unsigned int kmp_get_pattern_len(struct ts_config *conf)
1258c2ecf20Sopenharmony_ci{
1268c2ecf20Sopenharmony_ci	struct ts_kmp *kmp = ts_config_priv(conf);
1278c2ecf20Sopenharmony_ci	return kmp->pattern_len;
1288c2ecf20Sopenharmony_ci}
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_cistatic struct ts_ops kmp_ops = {
1318c2ecf20Sopenharmony_ci	.name		  = "kmp",
1328c2ecf20Sopenharmony_ci	.find		  = kmp_find,
1338c2ecf20Sopenharmony_ci	.init		  = kmp_init,
1348c2ecf20Sopenharmony_ci	.get_pattern	  = kmp_get_pattern,
1358c2ecf20Sopenharmony_ci	.get_pattern_len  = kmp_get_pattern_len,
1368c2ecf20Sopenharmony_ci	.owner		  = THIS_MODULE,
1378c2ecf20Sopenharmony_ci	.list		  = LIST_HEAD_INIT(kmp_ops.list)
1388c2ecf20Sopenharmony_ci};
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_cistatic int __init init_kmp(void)
1418c2ecf20Sopenharmony_ci{
1428c2ecf20Sopenharmony_ci	return textsearch_register(&kmp_ops);
1438c2ecf20Sopenharmony_ci}
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_cistatic void __exit exit_kmp(void)
1468c2ecf20Sopenharmony_ci{
1478c2ecf20Sopenharmony_ci	textsearch_unregister(&kmp_ops);
1488c2ecf20Sopenharmony_ci}
1498c2ecf20Sopenharmony_ci
1508c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_cimodule_init(init_kmp);
1538c2ecf20Sopenharmony_cimodule_exit(exit_kmp);
154