16cd6a6acSopenharmony_ci#ifndef SRC_REGEX_H_
26cd6a6acSopenharmony_ci#define SRC_REGEX_H_
36cd6a6acSopenharmony_ci
46cd6a6acSopenharmony_ci#include <stdbool.h>
56cd6a6acSopenharmony_ci#include <stdio.h>
66cd6a6acSopenharmony_ci
76cd6a6acSopenharmony_ci#ifdef USE_PCRE2
86cd6a6acSopenharmony_ci#include <pcre2.h>
96cd6a6acSopenharmony_ci#else
106cd6a6acSopenharmony_ci#include <pcre.h>
116cd6a6acSopenharmony_ci#endif
126cd6a6acSopenharmony_ci
136cd6a6acSopenharmony_ci
146cd6a6acSopenharmony_cienum { REGEX_MATCH,
156cd6a6acSopenharmony_ci       REGEX_MATCH_PARTIAL,
166cd6a6acSopenharmony_ci       REGEX_NO_MATCH,
176cd6a6acSopenharmony_ci       REGEX_ERROR = -1,
186cd6a6acSopenharmony_ci};
196cd6a6acSopenharmony_ci
206cd6a6acSopenharmony_cistruct regex_data;
216cd6a6acSopenharmony_ci
226cd6a6acSopenharmony_ci#ifdef USE_PCRE2
236cd6a6acSopenharmony_cistruct regex_error_data {
246cd6a6acSopenharmony_ci	int error_code;
256cd6a6acSopenharmony_ci	PCRE2_SIZE error_offset;
266cd6a6acSopenharmony_ci};
276cd6a6acSopenharmony_ci#else
286cd6a6acSopenharmony_cistruct regex_error_data {
296cd6a6acSopenharmony_ci	char const *error_buffer;
306cd6a6acSopenharmony_ci	int error_offset;
316cd6a6acSopenharmony_ci};
326cd6a6acSopenharmony_ci#endif
336cd6a6acSopenharmony_ci
346cd6a6acSopenharmony_cistruct mmap_area;
356cd6a6acSopenharmony_ci
366cd6a6acSopenharmony_ci/**
376cd6a6acSopenharmony_ci * regex_arch_string return a string that represents the pointer width, the
386cd6a6acSopenharmony_ci * width of what the backend considers a size type, and the endianness of the
396cd6a6acSopenharmony_ci * system that this library was build for. (e.g. for x86_64: "8-8-el").
406cd6a6acSopenharmony_ci * This is required when loading stored regular espressions. PCRE2 regular
416cd6a6acSopenharmony_ci * expressions are not portable across architectures that do not have a
426cd6a6acSopenharmony_ci * matching arch-string.
436cd6a6acSopenharmony_ci */
446cd6a6acSopenharmony_cichar const *regex_arch_string(void) ;
456cd6a6acSopenharmony_ci
466cd6a6acSopenharmony_ci/**
476cd6a6acSopenharmony_ci * regex_version returns the version string of the underlying regular
486cd6a6acSopenharmony_ci * regular expressions library. In the case of PCRE it just returns the
496cd6a6acSopenharmony_ci * result of pcre_version(). In the case of PCRE2, the very first time this
506cd6a6acSopenharmony_ci * function is called it allocates a buffer large enough to hold the version
516cd6a6acSopenharmony_ci * string and reads the PCRE2_CONFIG_VERSION option to fill the buffer.
526cd6a6acSopenharmony_ci * The allocated buffer will linger in memory until the calling process is being
536cd6a6acSopenharmony_ci * reaped.
546cd6a6acSopenharmony_ci *
556cd6a6acSopenharmony_ci * It may return NULL on error.
566cd6a6acSopenharmony_ci */
576cd6a6acSopenharmony_cichar const *regex_version(void) ;
586cd6a6acSopenharmony_ci/**
596cd6a6acSopenharmony_ci * This constructor function allocates a buffer for a regex_data structure.
606cd6a6acSopenharmony_ci * The buffer is being initialized with zeroes.
616cd6a6acSopenharmony_ci */
626cd6a6acSopenharmony_cistruct regex_data *regex_data_create(void) ;
636cd6a6acSopenharmony_ci/**
646cd6a6acSopenharmony_ci * This complementary destructor function frees the a given regex_data buffer.
656cd6a6acSopenharmony_ci * It also frees any non NULL member pointers with the appropriate pcreX_X_free
666cd6a6acSopenharmony_ci * function. For PCRE this function respects the extra_owned field and frees
676cd6a6acSopenharmony_ci * the pcre_extra data conditionally. Calling this function on a NULL pointer is
686cd6a6acSopenharmony_ci * save.
696cd6a6acSopenharmony_ci */
706cd6a6acSopenharmony_civoid regex_data_free(struct regex_data *regex) ;
716cd6a6acSopenharmony_ci/**
726cd6a6acSopenharmony_ci * This function compiles the regular expression. Additionally, it prepares
736cd6a6acSopenharmony_ci * data structures required by the different underlying engines. For PCRE
746cd6a6acSopenharmony_ci * it calls pcre_study to generate optional data required for optimized
756cd6a6acSopenharmony_ci * execution of the compiled pattern. In the case of PCRE2, it allocates
766cd6a6acSopenharmony_ci * a pcre2_match_data structure of appropriate size to hold all possible
776cd6a6acSopenharmony_ci * matches created by the pattern.
786cd6a6acSopenharmony_ci *
796cd6a6acSopenharmony_ci * @arg regex If successful, the structure returned through *regex was allocated
806cd6a6acSopenharmony_ci *            with regex_data_create and must be freed with regex_data_free.
816cd6a6acSopenharmony_ci * @arg pattern_string The pattern string that is to be compiled.
826cd6a6acSopenharmony_ci * @arg errordata A pointer to a regex_error_data structure must be passed
836cd6a6acSopenharmony_ci *                to this function. This structure depends on the underlying
846cd6a6acSopenharmony_ci *                implementation. It can be passed to regex_format_error
856cd6a6acSopenharmony_ci *                to generate a human readable error message.
866cd6a6acSopenharmony_ci * @retval 0 on success
876cd6a6acSopenharmony_ci * @retval -1 on error
886cd6a6acSopenharmony_ci */
896cd6a6acSopenharmony_ciint regex_prepare_data(struct regex_data **regex, char const *pattern_string,
906cd6a6acSopenharmony_ci		       struct regex_error_data *errordata) ;
916cd6a6acSopenharmony_ci/**
926cd6a6acSopenharmony_ci * This function loads a serialized precompiled pattern from a contiguous
936cd6a6acSopenharmony_ci * data region given by map_area.
946cd6a6acSopenharmony_ci *
956cd6a6acSopenharmony_ci * @arg map_area Description of the memory region holding a serialized
966cd6a6acSopenharmony_ci *               representation of the precompiled pattern.
976cd6a6acSopenharmony_ci * @arg regex If successful, the structure returned through *regex was allocated
986cd6a6acSopenharmony_ci *            with regex_data_create and must be freed with regex_data_free.
996cd6a6acSopenharmony_ci * @arg do_load_precompregex If non-zero precompiled patterns get loaded from
1006cd6a6acSopenharmony_ci *			     the mmap region (ignored by PCRE1 back-end).
1016cd6a6acSopenharmony_ci * @arg regex_compiled Set to true if a precompiled pattern was loaded
1026cd6a6acSopenharmony_ci * 		       into regex, otherwise set to false to indicate later
1036cd6a6acSopenharmony_ci *		       compilation must occur
1046cd6a6acSopenharmony_ci *
1056cd6a6acSopenharmony_ci * @retval 0 on success
1066cd6a6acSopenharmony_ci * @retval -1 on error
1076cd6a6acSopenharmony_ci */
1086cd6a6acSopenharmony_ciint regex_load_mmap(struct mmap_area *map_area,
1096cd6a6acSopenharmony_ci		    struct regex_data **regex,
1106cd6a6acSopenharmony_ci		    int do_load_precompregex,
1116cd6a6acSopenharmony_ci		    bool *regex_compiled) ;
1126cd6a6acSopenharmony_ci/**
1136cd6a6acSopenharmony_ci * This function stores a precompiled regular expression to a file.
1146cd6a6acSopenharmony_ci * In the case of PCRE, it just dumps the binary representation of the
1156cd6a6acSopenharmony_ci * precomplied pattern into a file. In the case of PCRE2, it uses the
1166cd6a6acSopenharmony_ci * serialization function provided by the library.
1176cd6a6acSopenharmony_ci *
1186cd6a6acSopenharmony_ci * @arg regex The precomplied regular expression data.
1196cd6a6acSopenharmony_ci * @arg fp A file stream specifying the output file.
1206cd6a6acSopenharmony_ci * @arg do_write_precompregex If non-zero precompiled patterns are written to
1216cd6a6acSopenharmony_ci *			      the output file (ignored by PCRE1 back-end).
1226cd6a6acSopenharmony_ci */
1236cd6a6acSopenharmony_ciint regex_writef(struct regex_data *regex, FILE *fp,
1246cd6a6acSopenharmony_ci		 int do_write_precompregex) ;
1256cd6a6acSopenharmony_ci/**
1266cd6a6acSopenharmony_ci * This function applies a precompiled pattern to a subject string and
1276cd6a6acSopenharmony_ci * returns whether or not a match was found.
1286cd6a6acSopenharmony_ci *
1296cd6a6acSopenharmony_ci * @arg regex The precompiled pattern.
1306cd6a6acSopenharmony_ci * @arg subject The subject string.
1316cd6a6acSopenharmony_ci * @arg partial Boolean indicating if partial matches are wanted. A nonzero
1326cd6a6acSopenharmony_ci *              value is equivalent to specifying PCRE[2]_PARTIAL_SOFT as
1336cd6a6acSopenharmony_ci *              option to pcre_exec of pcre2_match.
1346cd6a6acSopenharmony_ci * @retval REGEX_MATCH if a match was found
1356cd6a6acSopenharmony_ci * @retval REGEX_MATCH_PARTIAL if a partial match was found
1366cd6a6acSopenharmony_ci * @retval REGEX_NO_MATCH if no match was found
1376cd6a6acSopenharmony_ci * @retval REGEX_ERROR if an error was encountered during the execution of the
1386cd6a6acSopenharmony_ci *                     regular expression
1396cd6a6acSopenharmony_ci */
1406cd6a6acSopenharmony_ciint regex_match(struct regex_data *regex, char const *subject,
1416cd6a6acSopenharmony_ci		int partial) ;
1426cd6a6acSopenharmony_ci/**
1436cd6a6acSopenharmony_ci * This function compares two compiled regular expressions (regex1 and regex2).
1446cd6a6acSopenharmony_ci * It compares the binary representations of the compiled patterns. It is a very
1456cd6a6acSopenharmony_ci * crude approximation because the binary representation holds data like
1466cd6a6acSopenharmony_ci * reference counters, that has nothing to do with the actual state machine.
1476cd6a6acSopenharmony_ci *
1486cd6a6acSopenharmony_ci * @retval SELABEL_EQUAL if the pattern's binary representations are exactly
1496cd6a6acSopenharmony_ci *                       the same
1506cd6a6acSopenharmony_ci * @retval SELABEL_INCOMPARABLE otherwise
1516cd6a6acSopenharmony_ci */
1526cd6a6acSopenharmony_ciint regex_cmp(struct regex_data *regex1, struct regex_data *regex2) ;
1536cd6a6acSopenharmony_ci/**
1546cd6a6acSopenharmony_ci * This function takes the error data returned by regex_prepare_data and turns
1556cd6a6acSopenharmony_ci * it in to a human readable error message.
1566cd6a6acSopenharmony_ci * If the buffer given to hold the error message is to small it truncates the
1576cd6a6acSopenharmony_ci * message and indicates the truncation with an ellipsis ("...") at the end of
1586cd6a6acSopenharmony_ci * the buffer.
1596cd6a6acSopenharmony_ci *
1606cd6a6acSopenharmony_ci * @arg error_data Error data as returned by regex_prepare_data.
1616cd6a6acSopenharmony_ci * @arg buffer String buffer to hold the formatted error string.
1626cd6a6acSopenharmony_ci * @arg buf_size Total size of the given buffer in bytes.
1636cd6a6acSopenharmony_ci */
1646cd6a6acSopenharmony_civoid regex_format_error(struct regex_error_data const *error_data, char *buffer,
1656cd6a6acSopenharmony_ci			size_t buf_size) ;
1666cd6a6acSopenharmony_ci#endif /* SRC_REGEX_H_ */
167