18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * lib/textsearch.c Generic text search interface 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Authors: Thomas Graf <tgraf@suug.ch> 68c2ecf20Sopenharmony_ci * Pablo Neira Ayuso <pablo@netfilter.org> 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * ========================================================================== 98c2ecf20Sopenharmony_ci */ 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci/** 128c2ecf20Sopenharmony_ci * DOC: ts_intro 138c2ecf20Sopenharmony_ci * INTRODUCTION 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * The textsearch infrastructure provides text searching facilities for 168c2ecf20Sopenharmony_ci * both linear and non-linear data. Individual search algorithms are 178c2ecf20Sopenharmony_ci * implemented in modules and chosen by the user. 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * ARCHITECTURE 208c2ecf20Sopenharmony_ci * 218c2ecf20Sopenharmony_ci * .. code-block:: none 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * User 248c2ecf20Sopenharmony_ci * +----------------+ 258c2ecf20Sopenharmony_ci * | finish()|<--------------(6)-----------------+ 268c2ecf20Sopenharmony_ci * |get_next_block()|<--------------(5)---------------+ | 278c2ecf20Sopenharmony_ci * | | Algorithm | | 288c2ecf20Sopenharmony_ci * | | +------------------------------+ 298c2ecf20Sopenharmony_ci * | | | init() find() destroy() | 308c2ecf20Sopenharmony_ci * | | +------------------------------+ 318c2ecf20Sopenharmony_ci * | | Core API ^ ^ ^ 328c2ecf20Sopenharmony_ci * | | +---------------+ (2) (4) (8) 338c2ecf20Sopenharmony_ci * | (1)|----->| prepare() |---+ | | 348c2ecf20Sopenharmony_ci * | (3)|----->| find()/next() |-----------+ | 358c2ecf20Sopenharmony_ci * | (7)|----->| destroy() |----------------------+ 368c2ecf20Sopenharmony_ci * +----------------+ +---------------+ 378c2ecf20Sopenharmony_ci * 388c2ecf20Sopenharmony_ci * (1) User configures a search by calling textsearch_prepare() specifying 398c2ecf20Sopenharmony_ci * the search parameters such as the pattern and algorithm name. 408c2ecf20Sopenharmony_ci * (2) Core requests the algorithm to allocate and initialize a search 418c2ecf20Sopenharmony_ci * configuration according to the specified parameters. 428c2ecf20Sopenharmony_ci * (3) User starts the search(es) by calling textsearch_find() or 438c2ecf20Sopenharmony_ci * textsearch_next() to fetch subsequent occurrences. A state variable 448c2ecf20Sopenharmony_ci * is provided to the algorithm to store persistent variables. 458c2ecf20Sopenharmony_ci * (4) Core eventually resets the search offset and forwards the find() 468c2ecf20Sopenharmony_ci * request to the algorithm. 478c2ecf20Sopenharmony_ci * (5) Algorithm calls get_next_block() provided by the user continuously 488c2ecf20Sopenharmony_ci * to fetch the data to be searched in block by block. 498c2ecf20Sopenharmony_ci * (6) Algorithm invokes finish() after the last call to get_next_block 508c2ecf20Sopenharmony_ci * to clean up any leftovers from get_next_block. (Optional) 518c2ecf20Sopenharmony_ci * (7) User destroys the configuration by calling textsearch_destroy(). 528c2ecf20Sopenharmony_ci * (8) Core notifies the algorithm to destroy algorithm specific 538c2ecf20Sopenharmony_ci * allocations. (Optional) 548c2ecf20Sopenharmony_ci * 558c2ecf20Sopenharmony_ci * USAGE 568c2ecf20Sopenharmony_ci * 578c2ecf20Sopenharmony_ci * Before a search can be performed, a configuration must be created 588c2ecf20Sopenharmony_ci * by calling textsearch_prepare() specifying the searching algorithm, 598c2ecf20Sopenharmony_ci * the pattern to look for and flags. As a flag, you can set TS_IGNORECASE 608c2ecf20Sopenharmony_ci * to perform case insensitive matching. But it might slow down 618c2ecf20Sopenharmony_ci * performance of algorithm, so you should use it at own your risk. 628c2ecf20Sopenharmony_ci * The returned configuration may then be used for an arbitrary 638c2ecf20Sopenharmony_ci * amount of times and even in parallel as long as a separate struct 648c2ecf20Sopenharmony_ci * ts_state variable is provided to every instance. 658c2ecf20Sopenharmony_ci * 668c2ecf20Sopenharmony_ci * The actual search is performed by either calling 678c2ecf20Sopenharmony_ci * textsearch_find_continuous() for linear data or by providing 688c2ecf20Sopenharmony_ci * an own get_next_block() implementation and 698c2ecf20Sopenharmony_ci * calling textsearch_find(). Both functions return 708c2ecf20Sopenharmony_ci * the position of the first occurrence of the pattern or UINT_MAX if 718c2ecf20Sopenharmony_ci * no match was found. Subsequent occurrences can be found by calling 728c2ecf20Sopenharmony_ci * textsearch_next() regardless of the linearity of the data. 738c2ecf20Sopenharmony_ci * 748c2ecf20Sopenharmony_ci * Once you're done using a configuration it must be given back via 758c2ecf20Sopenharmony_ci * textsearch_destroy. 768c2ecf20Sopenharmony_ci * 778c2ecf20Sopenharmony_ci * EXAMPLE:: 788c2ecf20Sopenharmony_ci * 798c2ecf20Sopenharmony_ci * int pos; 808c2ecf20Sopenharmony_ci * struct ts_config *conf; 818c2ecf20Sopenharmony_ci * struct ts_state state; 828c2ecf20Sopenharmony_ci * const char *pattern = "chicken"; 838c2ecf20Sopenharmony_ci * const char *example = "We dance the funky chicken"; 848c2ecf20Sopenharmony_ci * 858c2ecf20Sopenharmony_ci * conf = textsearch_prepare("kmp", pattern, strlen(pattern), 868c2ecf20Sopenharmony_ci * GFP_KERNEL, TS_AUTOLOAD); 878c2ecf20Sopenharmony_ci * if (IS_ERR(conf)) { 888c2ecf20Sopenharmony_ci * err = PTR_ERR(conf); 898c2ecf20Sopenharmony_ci * goto errout; 908c2ecf20Sopenharmony_ci * } 918c2ecf20Sopenharmony_ci * 928c2ecf20Sopenharmony_ci * pos = textsearch_find_continuous(conf, &state, example, strlen(example)); 938c2ecf20Sopenharmony_ci * if (pos != UINT_MAX) 948c2ecf20Sopenharmony_ci * panic("Oh my god, dancing chickens at %d\n", pos); 958c2ecf20Sopenharmony_ci * 968c2ecf20Sopenharmony_ci * textsearch_destroy(conf); 978c2ecf20Sopenharmony_ci */ 988c2ecf20Sopenharmony_ci/* ========================================================================== */ 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ci#include <linux/module.h> 1018c2ecf20Sopenharmony_ci#include <linux/types.h> 1028c2ecf20Sopenharmony_ci#include <linux/string.h> 1038c2ecf20Sopenharmony_ci#include <linux/init.h> 1048c2ecf20Sopenharmony_ci#include <linux/rculist.h> 1058c2ecf20Sopenharmony_ci#include <linux/rcupdate.h> 1068c2ecf20Sopenharmony_ci#include <linux/err.h> 1078c2ecf20Sopenharmony_ci#include <linux/textsearch.h> 1088c2ecf20Sopenharmony_ci#include <linux/slab.h> 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_cistatic LIST_HEAD(ts_ops); 1118c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(ts_mod_lock); 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_cistatic inline struct ts_ops *lookup_ts_algo(const char *name) 1148c2ecf20Sopenharmony_ci{ 1158c2ecf20Sopenharmony_ci struct ts_ops *o; 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci rcu_read_lock(); 1188c2ecf20Sopenharmony_ci list_for_each_entry_rcu(o, &ts_ops, list) { 1198c2ecf20Sopenharmony_ci if (!strcmp(name, o->name)) { 1208c2ecf20Sopenharmony_ci if (!try_module_get(o->owner)) 1218c2ecf20Sopenharmony_ci o = NULL; 1228c2ecf20Sopenharmony_ci rcu_read_unlock(); 1238c2ecf20Sopenharmony_ci return o; 1248c2ecf20Sopenharmony_ci } 1258c2ecf20Sopenharmony_ci } 1268c2ecf20Sopenharmony_ci rcu_read_unlock(); 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci return NULL; 1298c2ecf20Sopenharmony_ci} 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci/** 1328c2ecf20Sopenharmony_ci * textsearch_register - register a textsearch module 1338c2ecf20Sopenharmony_ci * @ops: operations lookup table 1348c2ecf20Sopenharmony_ci * 1358c2ecf20Sopenharmony_ci * This function must be called by textsearch modules to announce 1368c2ecf20Sopenharmony_ci * their presence. The specified &@ops must have %name set to a 1378c2ecf20Sopenharmony_ci * unique identifier and the callbacks find(), init(), get_pattern(), 1388c2ecf20Sopenharmony_ci * and get_pattern_len() must be implemented. 1398c2ecf20Sopenharmony_ci * 1408c2ecf20Sopenharmony_ci * Returns 0 or -EEXISTS if another module has already registered 1418c2ecf20Sopenharmony_ci * with same name. 1428c2ecf20Sopenharmony_ci */ 1438c2ecf20Sopenharmony_ciint textsearch_register(struct ts_ops *ops) 1448c2ecf20Sopenharmony_ci{ 1458c2ecf20Sopenharmony_ci int err = -EEXIST; 1468c2ecf20Sopenharmony_ci struct ts_ops *o; 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_ci if (ops->name == NULL || ops->find == NULL || ops->init == NULL || 1498c2ecf20Sopenharmony_ci ops->get_pattern == NULL || ops->get_pattern_len == NULL) 1508c2ecf20Sopenharmony_ci return -EINVAL; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci spin_lock(&ts_mod_lock); 1538c2ecf20Sopenharmony_ci list_for_each_entry(o, &ts_ops, list) { 1548c2ecf20Sopenharmony_ci if (!strcmp(ops->name, o->name)) 1558c2ecf20Sopenharmony_ci goto errout; 1568c2ecf20Sopenharmony_ci } 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_ci list_add_tail_rcu(&ops->list, &ts_ops); 1598c2ecf20Sopenharmony_ci err = 0; 1608c2ecf20Sopenharmony_cierrout: 1618c2ecf20Sopenharmony_ci spin_unlock(&ts_mod_lock); 1628c2ecf20Sopenharmony_ci return err; 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ciEXPORT_SYMBOL(textsearch_register); 1658c2ecf20Sopenharmony_ci 1668c2ecf20Sopenharmony_ci/** 1678c2ecf20Sopenharmony_ci * textsearch_unregister - unregister a textsearch module 1688c2ecf20Sopenharmony_ci * @ops: operations lookup table 1698c2ecf20Sopenharmony_ci * 1708c2ecf20Sopenharmony_ci * This function must be called by textsearch modules to announce 1718c2ecf20Sopenharmony_ci * their disappearance for examples when the module gets unloaded. 1728c2ecf20Sopenharmony_ci * The &ops parameter must be the same as the one during the 1738c2ecf20Sopenharmony_ci * registration. 1748c2ecf20Sopenharmony_ci * 1758c2ecf20Sopenharmony_ci * Returns 0 on success or -ENOENT if no matching textsearch 1768c2ecf20Sopenharmony_ci * registration was found. 1778c2ecf20Sopenharmony_ci */ 1788c2ecf20Sopenharmony_ciint textsearch_unregister(struct ts_ops *ops) 1798c2ecf20Sopenharmony_ci{ 1808c2ecf20Sopenharmony_ci int err = 0; 1818c2ecf20Sopenharmony_ci struct ts_ops *o; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci spin_lock(&ts_mod_lock); 1848c2ecf20Sopenharmony_ci list_for_each_entry(o, &ts_ops, list) { 1858c2ecf20Sopenharmony_ci if (o == ops) { 1868c2ecf20Sopenharmony_ci list_del_rcu(&o->list); 1878c2ecf20Sopenharmony_ci goto out; 1888c2ecf20Sopenharmony_ci } 1898c2ecf20Sopenharmony_ci } 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci err = -ENOENT; 1928c2ecf20Sopenharmony_ciout: 1938c2ecf20Sopenharmony_ci spin_unlock(&ts_mod_lock); 1948c2ecf20Sopenharmony_ci return err; 1958c2ecf20Sopenharmony_ci} 1968c2ecf20Sopenharmony_ciEXPORT_SYMBOL(textsearch_unregister); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_cistruct ts_linear_state 1998c2ecf20Sopenharmony_ci{ 2008c2ecf20Sopenharmony_ci unsigned int len; 2018c2ecf20Sopenharmony_ci const void *data; 2028c2ecf20Sopenharmony_ci}; 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_cistatic unsigned int get_linear_data(unsigned int consumed, const u8 **dst, 2058c2ecf20Sopenharmony_ci struct ts_config *conf, 2068c2ecf20Sopenharmony_ci struct ts_state *state) 2078c2ecf20Sopenharmony_ci{ 2088c2ecf20Sopenharmony_ci struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci if (likely(consumed < st->len)) { 2118c2ecf20Sopenharmony_ci *dst = st->data + consumed; 2128c2ecf20Sopenharmony_ci return st->len - consumed; 2138c2ecf20Sopenharmony_ci } 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci return 0; 2168c2ecf20Sopenharmony_ci} 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci/** 2198c2ecf20Sopenharmony_ci * textsearch_find_continuous - search a pattern in continuous/linear data 2208c2ecf20Sopenharmony_ci * @conf: search configuration 2218c2ecf20Sopenharmony_ci * @state: search state 2228c2ecf20Sopenharmony_ci * @data: data to search in 2238c2ecf20Sopenharmony_ci * @len: length of data 2248c2ecf20Sopenharmony_ci * 2258c2ecf20Sopenharmony_ci * A simplified version of textsearch_find() for continuous/linear data. 2268c2ecf20Sopenharmony_ci * Call textsearch_next() to retrieve subsequent matches. 2278c2ecf20Sopenharmony_ci * 2288c2ecf20Sopenharmony_ci * Returns the position of first occurrence of the pattern or 2298c2ecf20Sopenharmony_ci * %UINT_MAX if no occurrence was found. 2308c2ecf20Sopenharmony_ci */ 2318c2ecf20Sopenharmony_ciunsigned int textsearch_find_continuous(struct ts_config *conf, 2328c2ecf20Sopenharmony_ci struct ts_state *state, 2338c2ecf20Sopenharmony_ci const void *data, unsigned int len) 2348c2ecf20Sopenharmony_ci{ 2358c2ecf20Sopenharmony_ci struct ts_linear_state *st = (struct ts_linear_state *) state->cb; 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci conf->get_next_block = get_linear_data; 2388c2ecf20Sopenharmony_ci st->data = data; 2398c2ecf20Sopenharmony_ci st->len = len; 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci return textsearch_find(conf, state); 2428c2ecf20Sopenharmony_ci} 2438c2ecf20Sopenharmony_ciEXPORT_SYMBOL(textsearch_find_continuous); 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci/** 2468c2ecf20Sopenharmony_ci * textsearch_prepare - Prepare a search 2478c2ecf20Sopenharmony_ci * @algo: name of search algorithm 2488c2ecf20Sopenharmony_ci * @pattern: pattern data 2498c2ecf20Sopenharmony_ci * @len: length of pattern 2508c2ecf20Sopenharmony_ci * @gfp_mask: allocation mask 2518c2ecf20Sopenharmony_ci * @flags: search flags 2528c2ecf20Sopenharmony_ci * 2538c2ecf20Sopenharmony_ci * Looks up the search algorithm module and creates a new textsearch 2548c2ecf20Sopenharmony_ci * configuration for the specified pattern. 2558c2ecf20Sopenharmony_ci * 2568c2ecf20Sopenharmony_ci * Note: The format of the pattern may not be compatible between 2578c2ecf20Sopenharmony_ci * the various search algorithms. 2588c2ecf20Sopenharmony_ci * 2598c2ecf20Sopenharmony_ci * Returns a new textsearch configuration according to the specified 2608c2ecf20Sopenharmony_ci * parameters or a ERR_PTR(). If a zero length pattern is passed, this 2618c2ecf20Sopenharmony_ci * function returns EINVAL. 2628c2ecf20Sopenharmony_ci */ 2638c2ecf20Sopenharmony_cistruct ts_config *textsearch_prepare(const char *algo, const void *pattern, 2648c2ecf20Sopenharmony_ci unsigned int len, gfp_t gfp_mask, int flags) 2658c2ecf20Sopenharmony_ci{ 2668c2ecf20Sopenharmony_ci int err = -ENOENT; 2678c2ecf20Sopenharmony_ci struct ts_config *conf; 2688c2ecf20Sopenharmony_ci struct ts_ops *ops; 2698c2ecf20Sopenharmony_ci 2708c2ecf20Sopenharmony_ci if (len == 0) 2718c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci ops = lookup_ts_algo(algo); 2748c2ecf20Sopenharmony_ci#ifdef CONFIG_MODULES 2758c2ecf20Sopenharmony_ci /* 2768c2ecf20Sopenharmony_ci * Why not always autoload you may ask. Some users are 2778c2ecf20Sopenharmony_ci * in a situation where requesting a module may deadlock, 2788c2ecf20Sopenharmony_ci * especially when the module is located on a NFS mount. 2798c2ecf20Sopenharmony_ci */ 2808c2ecf20Sopenharmony_ci if (ops == NULL && flags & TS_AUTOLOAD) { 2818c2ecf20Sopenharmony_ci request_module("ts_%s", algo); 2828c2ecf20Sopenharmony_ci ops = lookup_ts_algo(algo); 2838c2ecf20Sopenharmony_ci } 2848c2ecf20Sopenharmony_ci#endif 2858c2ecf20Sopenharmony_ci 2868c2ecf20Sopenharmony_ci if (ops == NULL) 2878c2ecf20Sopenharmony_ci goto errout; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci conf = ops->init(pattern, len, gfp_mask, flags); 2908c2ecf20Sopenharmony_ci if (IS_ERR(conf)) { 2918c2ecf20Sopenharmony_ci err = PTR_ERR(conf); 2928c2ecf20Sopenharmony_ci goto errout; 2938c2ecf20Sopenharmony_ci } 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci conf->ops = ops; 2968c2ecf20Sopenharmony_ci return conf; 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_cierrout: 2998c2ecf20Sopenharmony_ci if (ops) 3008c2ecf20Sopenharmony_ci module_put(ops->owner); 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci return ERR_PTR(err); 3038c2ecf20Sopenharmony_ci} 3048c2ecf20Sopenharmony_ciEXPORT_SYMBOL(textsearch_prepare); 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci/** 3078c2ecf20Sopenharmony_ci * textsearch_destroy - destroy a search configuration 3088c2ecf20Sopenharmony_ci * @conf: search configuration 3098c2ecf20Sopenharmony_ci * 3108c2ecf20Sopenharmony_ci * Releases all references of the configuration and frees 3118c2ecf20Sopenharmony_ci * up the memory. 3128c2ecf20Sopenharmony_ci */ 3138c2ecf20Sopenharmony_civoid textsearch_destroy(struct ts_config *conf) 3148c2ecf20Sopenharmony_ci{ 3158c2ecf20Sopenharmony_ci if (conf->ops) { 3168c2ecf20Sopenharmony_ci if (conf->ops->destroy) 3178c2ecf20Sopenharmony_ci conf->ops->destroy(conf); 3188c2ecf20Sopenharmony_ci module_put(conf->ops->owner); 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci kfree(conf); 3228c2ecf20Sopenharmony_ci} 3238c2ecf20Sopenharmony_ciEXPORT_SYMBOL(textsearch_destroy); 324