162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * lib/textsearch.c Generic text search interface 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Authors: Thomas Graf <tgraf@suug.ch> 662306a36Sopenharmony_ci * Pablo Neira Ayuso <pablo@netfilter.org> 762306a36Sopenharmony_ci * 862306a36Sopenharmony_ci * ========================================================================== 962306a36Sopenharmony_ci */ 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci/** 1262306a36Sopenharmony_ci * DOC: ts_intro 1362306a36Sopenharmony_ci * INTRODUCTION 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * The textsearch infrastructure provides text searching facilities for 1662306a36Sopenharmony_ci * both linear and non-linear data. Individual search algorithms are 1762306a36Sopenharmony_ci * implemented in modules and chosen by the user. 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * ARCHITECTURE 2062306a36Sopenharmony_ci * 2162306a36Sopenharmony_ci * .. code-block:: none 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * User 2462306a36Sopenharmony_ci * +----------------+ 2562306a36Sopenharmony_ci * | finish()|<--------------(6)-----------------+ 2662306a36Sopenharmony_ci * |get_next_block()|<--------------(5)---------------+ | 2762306a36Sopenharmony_ci * | | Algorithm | | 2862306a36Sopenharmony_ci * | | +------------------------------+ 2962306a36Sopenharmony_ci * | | | init() find() destroy() | 3062306a36Sopenharmony_ci * | | +------------------------------+ 3162306a36Sopenharmony_ci * | | Core API ^ ^ ^ 3262306a36Sopenharmony_ci * | | +---------------+ (2) (4) (8) 3362306a36Sopenharmony_ci * | (1)|----->| prepare() |---+ | | 3462306a36Sopenharmony_ci * | (3)|----->| find()/next() |-----------+ | 3562306a36Sopenharmony_ci * | (7)|----->| destroy() |----------------------+ 3662306a36Sopenharmony_ci * +----------------+ +---------------+ 3762306a36Sopenharmony_ci * 3862306a36Sopenharmony_ci * (1) User configures a search by calling textsearch_prepare() specifying 3962306a36Sopenharmony_ci * the search parameters such as the pattern and algorithm name. 4062306a36Sopenharmony_ci * (2) Core requests the algorithm to allocate and initialize a search 4162306a36Sopenharmony_ci * configuration according to the specified parameters. 4262306a36Sopenharmony_ci * (3) User starts the search(es) by calling textsearch_find() or 4362306a36Sopenharmony_ci * textsearch_next() to fetch subsequent occurrences. A state variable 4462306a36Sopenharmony_ci * is provided to the algorithm to store persistent variables. 4562306a36Sopenharmony_ci * (4) Core eventually resets the search offset and forwards the find() 4662306a36Sopenharmony_ci * request to the algorithm. 4762306a36Sopenharmony_ci * (5) Algorithm calls get_next_block() provided by the user continuously 4862306a36Sopenharmony_ci * to fetch the data to be searched in block by block. 4962306a36Sopenharmony_ci * (6) Algorithm invokes finish() after the last call to get_next_block 5062306a36Sopenharmony_ci * to clean up any leftovers from get_next_block. (Optional) 5162306a36Sopenharmony_ci * (7) User destroys the configuration by calling textsearch_destroy(). 5262306a36Sopenharmony_ci * (8) Core notifies the algorithm to destroy algorithm specific 5362306a36Sopenharmony_ci * allocations. (Optional) 5462306a36Sopenharmony_ci * 5562306a36Sopenharmony_ci * USAGE 5662306a36Sopenharmony_ci * 5762306a36Sopenharmony_ci * Before a search can be performed, a configuration must be created 5862306a36Sopenharmony_ci * by calling textsearch_prepare() specifying the searching algorithm, 5962306a36Sopenharmony_ci * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE 6062306a36Sopenharmony_ci * to perform case insensitive matching. But it might slow down 6162306a36Sopenharmony_ci * performance of algorithm, so you should use it at own your risk. 6262306a36Sopenharmony_ci * The returned configuration may then be used for an arbitrary 6362306a36Sopenharmony_ci * amount of times and even in parallel as long as a separate struct 6462306a36Sopenharmony_ci * ts_state variable is provided to every instance. 6562306a36Sopenharmony_ci * 6662306a36Sopenharmony_ci * The actual search is performed by either calling 6762306a36Sopenharmony_ci * textsearch_find_continuous() for linear data or by providing 6862306a36Sopenharmony_ci * an own get_next_block() implementation and 6962306a36Sopenharmony_ci * calling textsearch_find(). Both functions return 7062306a36Sopenharmony_ci * the position of the first occurrence of the pattern or UINT_MAX if 7162306a36Sopenharmony_ci * no match was found. Subsequent occurrences can be found by calling 7262306a36Sopenharmony_ci * textsearch_next() regardless of the linearity of the data. 7362306a36Sopenharmony_ci * 7462306a36Sopenharmony_ci * Once you're done using a configuration it must be given back via 7562306a36Sopenharmony_ci * textsearch_destroy. 7662306a36Sopenharmony_ci * 7762306a36Sopenharmony_ci * EXAMPLE:: 7862306a36Sopenharmony_ci * 7962306a36Sopenharmony_ci * int pos; 8062306a36Sopenharmony_ci * struct ts_config *conf; 8162306a36Sopenharmony_ci * struct ts_state state; 8262306a36Sopenharmony_ci * const char *pattern = "chicken"; 8362306a36Sopenharmony_ci * const char *example = "We dance the funky chicken"; 8462306a36Sopenharmony_ci * 8562306a36Sopenharmony_ci * conf = textsearch_prepare("kmp", pattern, strlen(pattern), 8662306a36Sopenharmony_ci * GFP_KERNEL, TS_AUTOLOAD); 8762306a36Sopenharmony_ci * if (IS_ERR(conf)) { 8862306a36Sopenharmony_ci * err = PTR_ERR(conf); 8962306a36Sopenharmony_ci * goto errout; 9062306a36Sopenharmony_ci * } 9162306a36Sopenharmony_ci * 9262306a36Sopenharmony_ci * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); 9362306a36Sopenharmony_ci * if (pos != UINT_MAX) 9462306a36Sopenharmony_ci * panic("Oh my god, dancing chickens at %d\n", pos); 9562306a36Sopenharmony_ci * 9662306a36Sopenharmony_ci * textsearch_destroy(conf); 9762306a36Sopenharmony_ci */ 9862306a36Sopenharmony_ci/* ========================================================================== */ 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci#include <linux/module.h> 10162306a36Sopenharmony_ci#include <linux/types.h> 10262306a36Sopenharmony_ci#include <linux/string.h> 10362306a36Sopenharmony_ci#include <linux/init.h> 10462306a36Sopenharmony_ci#include <linux/rculist.h> 10562306a36Sopenharmony_ci#include <linux/rcupdate.h> 10662306a36Sopenharmony_ci#include <linux/err.h> 10762306a36Sopenharmony_ci#include <linux/textsearch.h> 10862306a36Sopenharmony_ci#include <linux/slab.h> 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_cistatic LIST_HEAD(ts_ops); 11162306a36Sopenharmony_cistatic DEFINE_SPINLOCK(ts_mod_lock); 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_cistatic inline struct ts_ops *lookup_ts_algo(const char *name) 11462306a36Sopenharmony_ci{ 11562306a36Sopenharmony_ci struct ts_ops *o; 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci rcu_read_lock(); 11862306a36Sopenharmony_ci list_for_each_entry_rcu(o, &ts_ops, list) { 11962306a36Sopenharmony_ci if (!strcmp(name, o->name)) { 12062306a36Sopenharmony_ci if (!try_module_get(o->owner)) 12162306a36Sopenharmony_ci o = NULL; 12262306a36Sopenharmony_ci rcu_read_unlock(); 12362306a36Sopenharmony_ci return o; 12462306a36Sopenharmony_ci } 12562306a36Sopenharmony_ci } 12662306a36Sopenharmony_ci rcu_read_unlock(); 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci return NULL; 12962306a36Sopenharmony_ci} 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci/** 13262306a36Sopenharmony_ci * textsearch_register - register a textsearch module 13362306a36Sopenharmony_ci * @ops: operations lookup table 13462306a36Sopenharmony_ci * 13562306a36Sopenharmony_ci * This function must be called by textsearch modules to announce 13662306a36Sopenharmony_ci * their presence. The specified &@ops must have %name set to a 13762306a36Sopenharmony_ci * unique identifier and the callbacks find(), init(), get_pattern(), 13862306a36Sopenharmony_ci * and get_pattern_len() must be implemented. 13962306a36Sopenharmony_ci * 14062306a36Sopenharmony_ci * Returns 0 or -EEXISTS if another module has already registered 14162306a36Sopenharmony_ci * with same name. 14262306a36Sopenharmony_ci */ 14362306a36Sopenharmony_ciint textsearch_register(struct ts_ops *ops) 14462306a36Sopenharmony_ci{ 14562306a36Sopenharmony_ci int err = -EEXIST; 14662306a36Sopenharmony_ci struct ts_ops *o; 14762306a36Sopenharmony_ci 14862306a36Sopenharmony_ci if (ops->name == NULL || ops->find == NULL || ops->init == NULL || 14962306a36Sopenharmony_ci ops->get_pattern == NULL || ops->get_pattern_len == NULL) 15062306a36Sopenharmony_ci return -EINVAL; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci spin_lock(&ts_mod_lock); 15362306a36Sopenharmony_ci list_for_each_entry(o, &ts_ops, list) { 15462306a36Sopenharmony_ci if (!strcmp(ops->name, o->name)) 15562306a36Sopenharmony_ci goto errout; 15662306a36Sopenharmony_ci } 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_ci list_add_tail_rcu(&ops->list, &ts_ops); 15962306a36Sopenharmony_ci err = 0; 16062306a36Sopenharmony_cierrout: 16162306a36Sopenharmony_ci spin_unlock(&ts_mod_lock); 16262306a36Sopenharmony_ci return err; 16362306a36Sopenharmony_ci} 16462306a36Sopenharmony_ciEXPORT_SYMBOL(textsearch_register); 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci/** 16762306a36Sopenharmony_ci * textsearch_unregister - unregister a textsearch module 16862306a36Sopenharmony_ci * @ops: operations lookup table 16962306a36Sopenharmony_ci * 17062306a36Sopenharmony_ci * This function must be called by textsearch modules to announce 17162306a36Sopenharmony_ci * their disappearance for examples when the module gets unloaded. 17262306a36Sopenharmony_ci * The &ops parameter must be the same as the one during the 17362306a36Sopenharmony_ci * registration. 17462306a36Sopenharmony_ci * 17562306a36Sopenharmony_ci * Returns 0 on success or -ENOENT if no matching textsearch 17662306a36Sopenharmony_ci * registration was found. 17762306a36Sopenharmony_ci */ 17862306a36Sopenharmony_ciint textsearch_unregister(struct ts_ops *ops) 17962306a36Sopenharmony_ci{ 18062306a36Sopenharmony_ci int err = 0; 18162306a36Sopenharmony_ci struct ts_ops *o; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci spin_lock(&ts_mod_lock); 18462306a36Sopenharmony_ci list_for_each_entry(o, &ts_ops, list) { 18562306a36Sopenharmony_ci if (o == ops) { 18662306a36Sopenharmony_ci list_del_rcu(&o->list); 18762306a36Sopenharmony_ci goto out; 18862306a36Sopenharmony_ci } 18962306a36Sopenharmony_ci } 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci err = -ENOENT; 19262306a36Sopenharmony_ciout: 19362306a36Sopenharmony_ci spin_unlock(&ts_mod_lock); 19462306a36Sopenharmony_ci return err; 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ciEXPORT_SYMBOL(textsearch_unregister); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_cistruct ts_linear_state 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci unsigned int len; 20162306a36Sopenharmony_ci const void *data; 20262306a36Sopenharmony_ci}; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_cistatic unsigned int get_linear_data(unsigned int consumed, const u8 **dst, 20562306a36Sopenharmony_ci struct ts_config *conf, 20662306a36Sopenharmony_ci struct ts_state *state) 20762306a36Sopenharmony_ci{ 20862306a36Sopenharmony_ci struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 20962306a36Sopenharmony_ci 21062306a36Sopenharmony_ci if (likely(consumed < st->len)) { 21162306a36Sopenharmony_ci *dst = st->data + consumed; 21262306a36Sopenharmony_ci return st->len - consumed; 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci return 0; 21662306a36Sopenharmony_ci} 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci/** 21962306a36Sopenharmony_ci * textsearch_find_continuous - search a pattern in continuous/linear data 22062306a36Sopenharmony_ci * @conf: search configuration 22162306a36Sopenharmony_ci * @state: search state 22262306a36Sopenharmony_ci * @data: data to search in 22362306a36Sopenharmony_ci * @len: length of data 22462306a36Sopenharmony_ci * 22562306a36Sopenharmony_ci * A simplified version of textsearch_find() for continuous/linear data. 22662306a36Sopenharmony_ci * Call textsearch_next() to retrieve subsequent matches. 22762306a36Sopenharmony_ci * 22862306a36Sopenharmony_ci * Returns the position of first occurrence of the pattern or 22962306a36Sopenharmony_ci * %UINT_MAX if no occurrence was found. 23062306a36Sopenharmony_ci */ 23162306a36Sopenharmony_ciunsigned int textsearch_find_continuous(struct ts_config *conf, 23262306a36Sopenharmony_ci struct ts_state *state, 23362306a36Sopenharmony_ci const void *data, unsigned int len) 23462306a36Sopenharmony_ci{ 23562306a36Sopenharmony_ci struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_ci conf->get_next_block = get_linear_data; 23862306a36Sopenharmony_ci st->data = data; 23962306a36Sopenharmony_ci st->len = len; 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci return textsearch_find(conf, state); 24262306a36Sopenharmony_ci} 24362306a36Sopenharmony_ciEXPORT_SYMBOL(textsearch_find_continuous); 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci/** 24662306a36Sopenharmony_ci * textsearch_prepare - Prepare a search 24762306a36Sopenharmony_ci * @algo: name of search algorithm 24862306a36Sopenharmony_ci * @pattern: pattern data 24962306a36Sopenharmony_ci * @len: length of pattern 25062306a36Sopenharmony_ci * @gfp_mask: allocation mask 25162306a36Sopenharmony_ci * @flags: search flags 25262306a36Sopenharmony_ci * 25362306a36Sopenharmony_ci * Looks up the search algorithm module and creates a new textsearch 25462306a36Sopenharmony_ci * configuration for the specified pattern. 25562306a36Sopenharmony_ci * 25662306a36Sopenharmony_ci * Note: The format of the pattern may not be compatible between 25762306a36Sopenharmony_ci * the various search algorithms. 25862306a36Sopenharmony_ci * 25962306a36Sopenharmony_ci * Returns a new textsearch configuration according to the specified 26062306a36Sopenharmony_ci * parameters or a ERR_PTR(). If a zero length pattern is passed, this 26162306a36Sopenharmony_ci * function returns EINVAL. 26262306a36Sopenharmony_ci */ 26362306a36Sopenharmony_cistruct ts_config *textsearch_prepare(const char *algo, const void *pattern, 26462306a36Sopenharmony_ci unsigned int len, gfp_t gfp_mask, int flags) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci int err = -ENOENT; 26762306a36Sopenharmony_ci struct ts_config *conf; 26862306a36Sopenharmony_ci struct ts_ops *ops; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci if (len == 0) 27162306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci ops = lookup_ts_algo(algo); 27462306a36Sopenharmony_ci#ifdef CONFIG_MODULES 27562306a36Sopenharmony_ci /* 27662306a36Sopenharmony_ci * Why not always autoload you may ask. Some users are 27762306a36Sopenharmony_ci * in a situation where requesting a module may deadlock, 27862306a36Sopenharmony_ci * especially when the module is located on a NFS mount. 27962306a36Sopenharmony_ci */ 28062306a36Sopenharmony_ci if (ops == NULL && flags & TS_AUTOLOAD) { 28162306a36Sopenharmony_ci request_module("ts_%s", algo); 28262306a36Sopenharmony_ci ops = lookup_ts_algo(algo); 28362306a36Sopenharmony_ci } 28462306a36Sopenharmony_ci#endif 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_ci if (ops == NULL) 28762306a36Sopenharmony_ci goto errout; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci conf = ops->init(pattern, len, gfp_mask, flags); 29062306a36Sopenharmony_ci if (IS_ERR(conf)) { 29162306a36Sopenharmony_ci err = PTR_ERR(conf); 29262306a36Sopenharmony_ci goto errout; 29362306a36Sopenharmony_ci } 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci conf->ops = ops; 29662306a36Sopenharmony_ci return conf; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_cierrout: 29962306a36Sopenharmony_ci if (ops) 30062306a36Sopenharmony_ci module_put(ops->owner); 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci return ERR_PTR(err); 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ciEXPORT_SYMBOL(textsearch_prepare); 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci/** 30762306a36Sopenharmony_ci * textsearch_destroy - destroy a search configuration 30862306a36Sopenharmony_ci * @conf: search configuration 30962306a36Sopenharmony_ci * 31062306a36Sopenharmony_ci * Releases all references of the configuration and frees 31162306a36Sopenharmony_ci * up the memory. 31262306a36Sopenharmony_ci */ 31362306a36Sopenharmony_civoid textsearch_destroy(struct ts_config *conf) 31462306a36Sopenharmony_ci{ 31562306a36Sopenharmony_ci if (conf->ops) { 31662306a36Sopenharmony_ci if (conf->ops->destroy) 31762306a36Sopenharmony_ci conf->ops->destroy(conf); 31862306a36Sopenharmony_ci module_put(conf->ops->owner); 31962306a36Sopenharmony_ci } 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci kfree(conf); 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ciEXPORT_SYMBOL(textsearch_destroy); 324