xref: /third_party/selinux/libselinux/src/regex.c (revision 6cd6a6ac)
16cd6a6acSopenharmony_ci#include <assert.h>
26cd6a6acSopenharmony_ci#include <pthread.h>
36cd6a6acSopenharmony_ci#include <stdint.h>
46cd6a6acSopenharmony_ci#include <stdio.h>
56cd6a6acSopenharmony_ci#include <string.h>
66cd6a6acSopenharmony_ci
76cd6a6acSopenharmony_ci#include "regex.h"
86cd6a6acSopenharmony_ci#include "label_file.h"
96cd6a6acSopenharmony_ci#include "selinux_internal.h"
106cd6a6acSopenharmony_ci
116cd6a6acSopenharmony_ci#ifdef USE_PCRE2
126cd6a6acSopenharmony_ci#define REGEX_ARCH_SIZE_T PCRE2_SIZE
136cd6a6acSopenharmony_ci#else
146cd6a6acSopenharmony_ci#define REGEX_ARCH_SIZE_T size_t
156cd6a6acSopenharmony_ci#endif
166cd6a6acSopenharmony_ci
176cd6a6acSopenharmony_ci#ifndef __BYTE_ORDER__
186cd6a6acSopenharmony_ci
196cd6a6acSopenharmony_ci/* If the compiler doesn't define __BYTE_ORDER__, try to use the C
206cd6a6acSopenharmony_ci * library <endian.h> header definitions. */
216cd6a6acSopenharmony_ci#include <endian.h>
226cd6a6acSopenharmony_ci#ifndef __BYTE_ORDER
236cd6a6acSopenharmony_ci#error Neither __BYTE_ORDER__ nor __BYTE_ORDER defined. Unable to determine endianness.
246cd6a6acSopenharmony_ci#endif
256cd6a6acSopenharmony_ci
266cd6a6acSopenharmony_ci#define __ORDER_LITTLE_ENDIAN __LITTLE_ENDIAN
276cd6a6acSopenharmony_ci#define __ORDER_BIG_ENDIAN __BIG_ENDIAN
286cd6a6acSopenharmony_ci#define __BYTE_ORDER__ __BYTE_ORDER
296cd6a6acSopenharmony_ci
306cd6a6acSopenharmony_ci#endif
316cd6a6acSopenharmony_ci
326cd6a6acSopenharmony_ci#ifdef USE_PCRE2
336cd6a6acSopenharmony_cistatic pthread_key_t match_data_key;
346cd6a6acSopenharmony_cistatic int match_data_key_initialized = -1;
356cd6a6acSopenharmony_cistatic pthread_mutex_t key_mutex = PTHREAD_MUTEX_INITIALIZER;
366cd6a6acSopenharmony_cistatic __thread char match_data_initialized;
376cd6a6acSopenharmony_ci
386cd6a6acSopenharmony_cichar const *regex_arch_string(void)
396cd6a6acSopenharmony_ci{
406cd6a6acSopenharmony_ci	static char arch_string_buffer[32];
416cd6a6acSopenharmony_ci	static char const *arch_string = "";
426cd6a6acSopenharmony_ci	char const *endianness = NULL;
436cd6a6acSopenharmony_ci	int rc;
446cd6a6acSopenharmony_ci
456cd6a6acSopenharmony_ci	if (arch_string[0] == '\0') {
466cd6a6acSopenharmony_ci		if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
476cd6a6acSopenharmony_ci			endianness = "el";
486cd6a6acSopenharmony_ci		else if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
496cd6a6acSopenharmony_ci			endianness = "eb";
506cd6a6acSopenharmony_ci
516cd6a6acSopenharmony_ci		if (!endianness)
526cd6a6acSopenharmony_ci			return NULL;
536cd6a6acSopenharmony_ci
546cd6a6acSopenharmony_ci		rc = snprintf(arch_string_buffer, sizeof(arch_string_buffer),
556cd6a6acSopenharmony_ci				"%zu-%zu-%s", sizeof(void *),
566cd6a6acSopenharmony_ci				sizeof(REGEX_ARCH_SIZE_T),
576cd6a6acSopenharmony_ci				endianness);
586cd6a6acSopenharmony_ci		if (rc < 0)
596cd6a6acSopenharmony_ci			abort();
606cd6a6acSopenharmony_ci
616cd6a6acSopenharmony_ci		arch_string = &arch_string_buffer[0];
626cd6a6acSopenharmony_ci	}
636cd6a6acSopenharmony_ci	return arch_string;
646cd6a6acSopenharmony_ci}
656cd6a6acSopenharmony_ci
666cd6a6acSopenharmony_cistruct regex_data {
676cd6a6acSopenharmony_ci	pcre2_code *regex; /* compiled regular expression */
686cd6a6acSopenharmony_ci};
696cd6a6acSopenharmony_ci
706cd6a6acSopenharmony_ciint regex_prepare_data(struct regex_data **regex, char const *pattern_string,
716cd6a6acSopenharmony_ci		       struct regex_error_data *errordata)
726cd6a6acSopenharmony_ci{
736cd6a6acSopenharmony_ci	memset(errordata, 0, sizeof(struct regex_error_data));
746cd6a6acSopenharmony_ci
756cd6a6acSopenharmony_ci	*regex = regex_data_create();
766cd6a6acSopenharmony_ci	if (!(*regex))
776cd6a6acSopenharmony_ci		return -1;
786cd6a6acSopenharmony_ci
796cd6a6acSopenharmony_ci	(*regex)->regex = pcre2_compile(
806cd6a6acSopenharmony_ci	    (PCRE2_SPTR)pattern_string, PCRE2_ZERO_TERMINATED, PCRE2_DOTALL,
816cd6a6acSopenharmony_ci	    &errordata->error_code, &errordata->error_offset, NULL);
826cd6a6acSopenharmony_ci	if (!(*regex)->regex) {
836cd6a6acSopenharmony_ci		goto err;
846cd6a6acSopenharmony_ci	}
856cd6a6acSopenharmony_ci
866cd6a6acSopenharmony_ci	return 0;
876cd6a6acSopenharmony_ci
886cd6a6acSopenharmony_cierr:
896cd6a6acSopenharmony_ci	regex_data_free(*regex);
906cd6a6acSopenharmony_ci	*regex = NULL;
916cd6a6acSopenharmony_ci	return -1;
926cd6a6acSopenharmony_ci}
936cd6a6acSopenharmony_ci
946cd6a6acSopenharmony_cichar const *regex_version(void)
956cd6a6acSopenharmony_ci{
966cd6a6acSopenharmony_ci	static char version_buf[256];
976cd6a6acSopenharmony_ci	size_t len = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
986cd6a6acSopenharmony_ci	if (len <= 0 || len > sizeof(version_buf))
996cd6a6acSopenharmony_ci		return NULL;
1006cd6a6acSopenharmony_ci
1016cd6a6acSopenharmony_ci	pcre2_config(PCRE2_CONFIG_VERSION, version_buf);
1026cd6a6acSopenharmony_ci	return version_buf;
1036cd6a6acSopenharmony_ci}
1046cd6a6acSopenharmony_ci
1056cd6a6acSopenharmony_ciint regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
1066cd6a6acSopenharmony_ci		    int do_load_precompregex, bool *regex_compiled)
1076cd6a6acSopenharmony_ci{
1086cd6a6acSopenharmony_ci	int rc;
1096cd6a6acSopenharmony_ci	uint32_t entry_len;
1106cd6a6acSopenharmony_ci
1116cd6a6acSopenharmony_ci	*regex_compiled = false;
1126cd6a6acSopenharmony_ci	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
1136cd6a6acSopenharmony_ci	if (rc < 0)
1146cd6a6acSopenharmony_ci		return -1;
1156cd6a6acSopenharmony_ci
1166cd6a6acSopenharmony_ci	if (entry_len && do_load_precompregex) {
1176cd6a6acSopenharmony_ci		/*
1186cd6a6acSopenharmony_ci		 * this should yield exactly one because we store one pattern at
1196cd6a6acSopenharmony_ci		 * a time
1206cd6a6acSopenharmony_ci		 */
1216cd6a6acSopenharmony_ci		rc = pcre2_serialize_get_number_of_codes(mmap_area->next_addr);
1226cd6a6acSopenharmony_ci		if (rc != 1)
1236cd6a6acSopenharmony_ci			return -1;
1246cd6a6acSopenharmony_ci
1256cd6a6acSopenharmony_ci		*regex = regex_data_create();
1266cd6a6acSopenharmony_ci		if (!*regex)
1276cd6a6acSopenharmony_ci			return -1;
1286cd6a6acSopenharmony_ci
1296cd6a6acSopenharmony_ci		rc = pcre2_serialize_decode(&(*regex)->regex, 1,
1306cd6a6acSopenharmony_ci					    (PCRE2_SPTR)mmap_area->next_addr,
1316cd6a6acSopenharmony_ci					    NULL);
1326cd6a6acSopenharmony_ci		if (rc != 1)
1336cd6a6acSopenharmony_ci			goto err;
1346cd6a6acSopenharmony_ci
1356cd6a6acSopenharmony_ci		*regex_compiled = true;
1366cd6a6acSopenharmony_ci	}
1376cd6a6acSopenharmony_ci
1386cd6a6acSopenharmony_ci	/* and skip the decoded bit */
1396cd6a6acSopenharmony_ci	rc = next_entry(NULL, mmap_area, entry_len);
1406cd6a6acSopenharmony_ci	if (rc < 0)
1416cd6a6acSopenharmony_ci		goto err;
1426cd6a6acSopenharmony_ci
1436cd6a6acSopenharmony_ci	return 0;
1446cd6a6acSopenharmony_cierr:
1456cd6a6acSopenharmony_ci	regex_data_free(*regex);
1466cd6a6acSopenharmony_ci	*regex = NULL;
1476cd6a6acSopenharmony_ci	return -1;
1486cd6a6acSopenharmony_ci}
1496cd6a6acSopenharmony_ci
1506cd6a6acSopenharmony_ciint regex_writef(struct regex_data *regex, FILE *fp, int do_write_precompregex)
1516cd6a6acSopenharmony_ci{
1526cd6a6acSopenharmony_ci	int rc = 0;
1536cd6a6acSopenharmony_ci	size_t len;
1546cd6a6acSopenharmony_ci	PCRE2_SIZE serialized_size;
1556cd6a6acSopenharmony_ci	uint32_t to_write = 0;
1566cd6a6acSopenharmony_ci	PCRE2_UCHAR *bytes = NULL;
1576cd6a6acSopenharmony_ci
1586cd6a6acSopenharmony_ci	if (do_write_precompregex) {
1596cd6a6acSopenharmony_ci		/* encode the pattern for serialization */
1606cd6a6acSopenharmony_ci		rc = pcre2_serialize_encode((const pcre2_code **)&regex->regex,
1616cd6a6acSopenharmony_ci					    1, &bytes, &serialized_size, NULL);
1626cd6a6acSopenharmony_ci		if (rc != 1) {
1636cd6a6acSopenharmony_ci			rc = -1;
1646cd6a6acSopenharmony_ci			goto out;
1656cd6a6acSopenharmony_ci		}
1666cd6a6acSopenharmony_ci		to_write = serialized_size;
1676cd6a6acSopenharmony_ci	}
1686cd6a6acSopenharmony_ci
1696cd6a6acSopenharmony_ci	/* write serialized pattern's size */
1706cd6a6acSopenharmony_ci	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
1716cd6a6acSopenharmony_ci	if (len != 1) {
1726cd6a6acSopenharmony_ci		rc = -1;
1736cd6a6acSopenharmony_ci		goto out;
1746cd6a6acSopenharmony_ci	}
1756cd6a6acSopenharmony_ci
1766cd6a6acSopenharmony_ci	if (do_write_precompregex) {
1776cd6a6acSopenharmony_ci		/* write serialized pattern */
1786cd6a6acSopenharmony_ci		len = fwrite(bytes, 1, to_write, fp);
1796cd6a6acSopenharmony_ci		if (len != to_write)
1806cd6a6acSopenharmony_ci			rc = -1;
1816cd6a6acSopenharmony_ci	}
1826cd6a6acSopenharmony_ci
1836cd6a6acSopenharmony_ciout:
1846cd6a6acSopenharmony_ci	if (bytes)
1856cd6a6acSopenharmony_ci		pcre2_serialize_free(bytes);
1866cd6a6acSopenharmony_ci
1876cd6a6acSopenharmony_ci	return rc;
1886cd6a6acSopenharmony_ci}
1896cd6a6acSopenharmony_ci
1906cd6a6acSopenharmony_cistatic void __attribute__((destructor)) match_data_thread_free(void *key)
1916cd6a6acSopenharmony_ci{
1926cd6a6acSopenharmony_ci	void *value;
1936cd6a6acSopenharmony_ci	pcre2_match_data *match_data;
1946cd6a6acSopenharmony_ci
1956cd6a6acSopenharmony_ci	if (match_data_key_initialized <= 0 || !match_data_initialized)
1966cd6a6acSopenharmony_ci		return;
1976cd6a6acSopenharmony_ci
1986cd6a6acSopenharmony_ci	value = __selinux_getspecific(match_data_key);
1996cd6a6acSopenharmony_ci	match_data = value ? value : key;
2006cd6a6acSopenharmony_ci
2016cd6a6acSopenharmony_ci	pcre2_match_data_free(match_data);
2026cd6a6acSopenharmony_ci
2036cd6a6acSopenharmony_ci	__pthread_mutex_lock(&key_mutex);
2046cd6a6acSopenharmony_ci	if (--match_data_key_initialized == 1) {
2056cd6a6acSopenharmony_ci		__selinux_key_delete(match_data_key);
2066cd6a6acSopenharmony_ci		match_data_key_initialized = -1;
2076cd6a6acSopenharmony_ci	}
2086cd6a6acSopenharmony_ci	__pthread_mutex_unlock(&key_mutex);
2096cd6a6acSopenharmony_ci}
2106cd6a6acSopenharmony_ci
2116cd6a6acSopenharmony_civoid regex_data_free(struct regex_data *regex)
2126cd6a6acSopenharmony_ci{
2136cd6a6acSopenharmony_ci	if (regex) {
2146cd6a6acSopenharmony_ci		if (regex->regex)
2156cd6a6acSopenharmony_ci			pcre2_code_free(regex->regex);
2166cd6a6acSopenharmony_ci		free(regex);
2176cd6a6acSopenharmony_ci	}
2186cd6a6acSopenharmony_ci}
2196cd6a6acSopenharmony_ci
2206cd6a6acSopenharmony_ciint regex_match(struct regex_data *regex, char const *subject, int partial)
2216cd6a6acSopenharmony_ci{
2226cd6a6acSopenharmony_ci	int rc;
2236cd6a6acSopenharmony_ci	bool slow;
2246cd6a6acSopenharmony_ci	pcre2_match_data *match_data = NULL;
2256cd6a6acSopenharmony_ci
2266cd6a6acSopenharmony_ci	if (match_data_key_initialized > 0) {
2276cd6a6acSopenharmony_ci		if (match_data_initialized == 0) {
2286cd6a6acSopenharmony_ci			match_data = pcre2_match_data_create(1, NULL);
2296cd6a6acSopenharmony_ci			if (match_data) {
2306cd6a6acSopenharmony_ci				match_data_initialized = 1;
2316cd6a6acSopenharmony_ci				__selinux_setspecific(match_data_key,
2326cd6a6acSopenharmony_ci							match_data);
2336cd6a6acSopenharmony_ci				__pthread_mutex_lock(&key_mutex);
2346cd6a6acSopenharmony_ci				match_data_key_initialized++;
2356cd6a6acSopenharmony_ci				__pthread_mutex_unlock(&key_mutex);
2366cd6a6acSopenharmony_ci			}
2376cd6a6acSopenharmony_ci		} else
2386cd6a6acSopenharmony_ci			match_data = __selinux_getspecific(match_data_key);
2396cd6a6acSopenharmony_ci	}
2406cd6a6acSopenharmony_ci
2416cd6a6acSopenharmony_ci	slow = (match_data_key_initialized <= 0 || match_data == NULL);
2426cd6a6acSopenharmony_ci	if (slow) {
2436cd6a6acSopenharmony_ci		match_data = pcre2_match_data_create_from_pattern(regex->regex,
2446cd6a6acSopenharmony_ci									NULL);
2456cd6a6acSopenharmony_ci		if (!match_data)
2466cd6a6acSopenharmony_ci			return REGEX_ERROR;
2476cd6a6acSopenharmony_ci	}
2486cd6a6acSopenharmony_ci
2496cd6a6acSopenharmony_ci	rc = pcre2_match(
2506cd6a6acSopenharmony_ci	    regex->regex, (PCRE2_SPTR)subject, PCRE2_ZERO_TERMINATED, 0,
2516cd6a6acSopenharmony_ci	    partial ? PCRE2_PARTIAL_SOFT : 0, match_data, NULL);
2526cd6a6acSopenharmony_ci
2536cd6a6acSopenharmony_ci	if (slow)
2546cd6a6acSopenharmony_ci		pcre2_match_data_free(match_data);
2556cd6a6acSopenharmony_ci
2566cd6a6acSopenharmony_ci	if (rc >= 0)
2576cd6a6acSopenharmony_ci		return REGEX_MATCH;
2586cd6a6acSopenharmony_ci	switch (rc) {
2596cd6a6acSopenharmony_ci	case PCRE2_ERROR_PARTIAL:
2606cd6a6acSopenharmony_ci		return REGEX_MATCH_PARTIAL;
2616cd6a6acSopenharmony_ci	case PCRE2_ERROR_NOMATCH:
2626cd6a6acSopenharmony_ci		return REGEX_NO_MATCH;
2636cd6a6acSopenharmony_ci	default:
2646cd6a6acSopenharmony_ci		return REGEX_ERROR;
2656cd6a6acSopenharmony_ci	}
2666cd6a6acSopenharmony_ci}
2676cd6a6acSopenharmony_ci
2686cd6a6acSopenharmony_ci/*
2696cd6a6acSopenharmony_ci * TODO Replace this compare function with something that actually compares the
2706cd6a6acSopenharmony_ci * regular expressions.
2716cd6a6acSopenharmony_ci * This compare function basically just compares the binary representations of
2726cd6a6acSopenharmony_ci * the automatons, and because this representation contains pointers and
2736cd6a6acSopenharmony_ci * metadata, it can only return a match if regex1 == regex2.
2746cd6a6acSopenharmony_ci * Preferably, this function would be replaced with an algorithm that computes
2756cd6a6acSopenharmony_ci * the equivalence of the automatons systematically.
2766cd6a6acSopenharmony_ci */
2776cd6a6acSopenharmony_ciint regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
2786cd6a6acSopenharmony_ci{
2796cd6a6acSopenharmony_ci	int rc;
2806cd6a6acSopenharmony_ci	size_t len1, len2;
2816cd6a6acSopenharmony_ci	rc = pcre2_pattern_info(regex1->regex, PCRE2_INFO_SIZE, &len1);
2826cd6a6acSopenharmony_ci	assert(rc == 0);
2836cd6a6acSopenharmony_ci	rc = pcre2_pattern_info(regex2->regex, PCRE2_INFO_SIZE, &len2);
2846cd6a6acSopenharmony_ci	assert(rc == 0);
2856cd6a6acSopenharmony_ci	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
2866cd6a6acSopenharmony_ci		return SELABEL_INCOMPARABLE;
2876cd6a6acSopenharmony_ci
2886cd6a6acSopenharmony_ci	return SELABEL_EQUAL;
2896cd6a6acSopenharmony_ci}
2906cd6a6acSopenharmony_ci
2916cd6a6acSopenharmony_cistruct regex_data *regex_data_create(void)
2926cd6a6acSopenharmony_ci{
2936cd6a6acSopenharmony_ci	struct regex_data *regex_data =
2946cd6a6acSopenharmony_ci		(struct regex_data *)calloc(1, sizeof(struct regex_data));
2956cd6a6acSopenharmony_ci	if (!regex_data)
2966cd6a6acSopenharmony_ci		return NULL;
2976cd6a6acSopenharmony_ci
2986cd6a6acSopenharmony_ci	__pthread_mutex_lock(&key_mutex);
2996cd6a6acSopenharmony_ci	if (match_data_key_initialized < 0) {
3006cd6a6acSopenharmony_ci		match_data_key_initialized = !__selinux_key_create(
3016cd6a6acSopenharmony_ci							&match_data_key,
3026cd6a6acSopenharmony_ci							match_data_thread_free);
3036cd6a6acSopenharmony_ci	}
3046cd6a6acSopenharmony_ci	__pthread_mutex_unlock(&key_mutex);
3056cd6a6acSopenharmony_ci
3066cd6a6acSopenharmony_ci	return regex_data;
3076cd6a6acSopenharmony_ci}
3086cd6a6acSopenharmony_ci
3096cd6a6acSopenharmony_ci#else // !USE_PCRE2
3106cd6a6acSopenharmony_cichar const *regex_arch_string(void)
3116cd6a6acSopenharmony_ci{
3126cd6a6acSopenharmony_ci	return "N/A";
3136cd6a6acSopenharmony_ci}
3146cd6a6acSopenharmony_ci
3156cd6a6acSopenharmony_ci/* Prior to version 8.20, libpcre did not have pcre_free_study() */
3166cd6a6acSopenharmony_ci#if (PCRE_MAJOR < 8 || (PCRE_MAJOR == 8 && PCRE_MINOR < 20))
3176cd6a6acSopenharmony_ci#define pcre_free_study pcre_free
3186cd6a6acSopenharmony_ci#endif
3196cd6a6acSopenharmony_ci
3206cd6a6acSopenharmony_cistruct regex_data {
3216cd6a6acSopenharmony_ci	int owned;   /*
3226cd6a6acSopenharmony_ci		      * non zero if regex and pcre_extra is owned by this
3236cd6a6acSopenharmony_ci		      * structure and thus must be freed on destruction.
3246cd6a6acSopenharmony_ci		      */
3256cd6a6acSopenharmony_ci	pcre *regex; /* compiled regular expression */
3266cd6a6acSopenharmony_ci	union {
3276cd6a6acSopenharmony_ci		pcre_extra *sd; /* pointer to extra compiled stuff */
3286cd6a6acSopenharmony_ci		pcre_extra lsd; /* used to hold the mmap'd version */
3296cd6a6acSopenharmony_ci	};
3306cd6a6acSopenharmony_ci};
3316cd6a6acSopenharmony_ci
3326cd6a6acSopenharmony_ciint regex_prepare_data(struct regex_data **regex, char const *pattern_string,
3336cd6a6acSopenharmony_ci		       struct regex_error_data *errordata)
3346cd6a6acSopenharmony_ci{
3356cd6a6acSopenharmony_ci	memset(errordata, 0, sizeof(struct regex_error_data));
3366cd6a6acSopenharmony_ci
3376cd6a6acSopenharmony_ci	*regex = regex_data_create();
3386cd6a6acSopenharmony_ci	if (!(*regex))
3396cd6a6acSopenharmony_ci		return -1;
3406cd6a6acSopenharmony_ci
3416cd6a6acSopenharmony_ci	(*regex)->regex =
3426cd6a6acSopenharmony_ci	    pcre_compile(pattern_string, PCRE_DOTALL, &errordata->error_buffer,
3436cd6a6acSopenharmony_ci			 &errordata->error_offset, NULL);
3446cd6a6acSopenharmony_ci	if (!(*regex)->regex)
3456cd6a6acSopenharmony_ci		goto err;
3466cd6a6acSopenharmony_ci
3476cd6a6acSopenharmony_ci	(*regex)->owned = 1;
3486cd6a6acSopenharmony_ci
3496cd6a6acSopenharmony_ci	(*regex)->sd = pcre_study((*regex)->regex, 0, &errordata->error_buffer);
3506cd6a6acSopenharmony_ci	if (!(*regex)->sd && errordata->error_buffer)
3516cd6a6acSopenharmony_ci		goto err;
3526cd6a6acSopenharmony_ci
3536cd6a6acSopenharmony_ci	return 0;
3546cd6a6acSopenharmony_ci
3556cd6a6acSopenharmony_cierr:
3566cd6a6acSopenharmony_ci	regex_data_free(*regex);
3576cd6a6acSopenharmony_ci	*regex = NULL;
3586cd6a6acSopenharmony_ci	return -1;
3596cd6a6acSopenharmony_ci}
3606cd6a6acSopenharmony_ci
3616cd6a6acSopenharmony_cichar const *regex_version(void)
3626cd6a6acSopenharmony_ci{
3636cd6a6acSopenharmony_ci	return pcre_version();
3646cd6a6acSopenharmony_ci}
3656cd6a6acSopenharmony_ci
3666cd6a6acSopenharmony_ciint regex_load_mmap(struct mmap_area *mmap_area, struct regex_data **regex,
3676cd6a6acSopenharmony_ci		    int do_load_precompregex __attribute__((unused)), bool *regex_compiled)
3686cd6a6acSopenharmony_ci{
3696cd6a6acSopenharmony_ci	int rc;
3706cd6a6acSopenharmony_ci	uint32_t entry_len;
3716cd6a6acSopenharmony_ci	size_t info_len;
3726cd6a6acSopenharmony_ci
3736cd6a6acSopenharmony_ci	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
3746cd6a6acSopenharmony_ci	if (rc < 0 || !entry_len)
3756cd6a6acSopenharmony_ci		return -1;
3766cd6a6acSopenharmony_ci
3776cd6a6acSopenharmony_ci	*regex = regex_data_create();
3786cd6a6acSopenharmony_ci	if (!(*regex))
3796cd6a6acSopenharmony_ci		return -1;
3806cd6a6acSopenharmony_ci
3816cd6a6acSopenharmony_ci	(*regex)->owned = 0;
3826cd6a6acSopenharmony_ci	(*regex)->regex = (pcre *)mmap_area->next_addr;
3836cd6a6acSopenharmony_ci	rc = next_entry(NULL, mmap_area, entry_len);
3846cd6a6acSopenharmony_ci	if (rc < 0)
3856cd6a6acSopenharmony_ci		goto err;
3866cd6a6acSopenharmony_ci
3876cd6a6acSopenharmony_ci	/*
3886cd6a6acSopenharmony_ci	 * Check that regex lengths match. pcre_fullinfo()
3896cd6a6acSopenharmony_ci	 * also validates its magic number.
3906cd6a6acSopenharmony_ci	 */
3916cd6a6acSopenharmony_ci	rc = pcre_fullinfo((*regex)->regex, NULL, PCRE_INFO_SIZE, &info_len);
3926cd6a6acSopenharmony_ci	if (rc < 0 || info_len != entry_len)
3936cd6a6acSopenharmony_ci		goto err;
3946cd6a6acSopenharmony_ci
3956cd6a6acSopenharmony_ci	rc = next_entry(&entry_len, mmap_area, sizeof(uint32_t));
3966cd6a6acSopenharmony_ci	if (rc < 0)
3976cd6a6acSopenharmony_ci		goto err;
3986cd6a6acSopenharmony_ci
3996cd6a6acSopenharmony_ci	if (entry_len) {
4006cd6a6acSopenharmony_ci		(*regex)->lsd.study_data = (void *)mmap_area->next_addr;
4016cd6a6acSopenharmony_ci		(*regex)->lsd.flags |= PCRE_EXTRA_STUDY_DATA;
4026cd6a6acSopenharmony_ci		rc = next_entry(NULL, mmap_area, entry_len);
4036cd6a6acSopenharmony_ci		if (rc < 0)
4046cd6a6acSopenharmony_ci			goto err;
4056cd6a6acSopenharmony_ci
4066cd6a6acSopenharmony_ci		/* Check that study data lengths match. */
4076cd6a6acSopenharmony_ci		rc = pcre_fullinfo((*regex)->regex, &(*regex)->lsd,
4086cd6a6acSopenharmony_ci				   PCRE_INFO_STUDYSIZE, &info_len);
4096cd6a6acSopenharmony_ci		if (rc < 0 || info_len != entry_len)
4106cd6a6acSopenharmony_ci			goto err;
4116cd6a6acSopenharmony_ci	}
4126cd6a6acSopenharmony_ci
4136cd6a6acSopenharmony_ci	*regex_compiled = true;
4146cd6a6acSopenharmony_ci	return 0;
4156cd6a6acSopenharmony_ci
4166cd6a6acSopenharmony_cierr:
4176cd6a6acSopenharmony_ci	regex_data_free(*regex);
4186cd6a6acSopenharmony_ci	*regex = NULL;
4196cd6a6acSopenharmony_ci	return -1;
4206cd6a6acSopenharmony_ci}
4216cd6a6acSopenharmony_ci
4226cd6a6acSopenharmony_cistatic inline pcre_extra *get_pcre_extra(struct regex_data *regex)
4236cd6a6acSopenharmony_ci{
4246cd6a6acSopenharmony_ci	if (!regex) return NULL;
4256cd6a6acSopenharmony_ci	if (regex->owned) {
4266cd6a6acSopenharmony_ci		return regex->sd;
4276cd6a6acSopenharmony_ci	} else if (regex->lsd.study_data) {
4286cd6a6acSopenharmony_ci		return &regex->lsd;
4296cd6a6acSopenharmony_ci	} else {
4306cd6a6acSopenharmony_ci		return NULL;
4316cd6a6acSopenharmony_ci	}
4326cd6a6acSopenharmony_ci}
4336cd6a6acSopenharmony_ci
4346cd6a6acSopenharmony_ciint regex_writef(struct regex_data *regex, FILE *fp,
4356cd6a6acSopenharmony_ci		 int do_write_precompregex __attribute__((unused)))
4366cd6a6acSopenharmony_ci{
4376cd6a6acSopenharmony_ci	int rc;
4386cd6a6acSopenharmony_ci	size_t len;
4396cd6a6acSopenharmony_ci	uint32_t to_write;
4406cd6a6acSopenharmony_ci	size_t size;
4416cd6a6acSopenharmony_ci	pcre_extra *sd = get_pcre_extra(regex);
4426cd6a6acSopenharmony_ci
4436cd6a6acSopenharmony_ci	/* determine the size of the pcre data in bytes */
4446cd6a6acSopenharmony_ci	rc = pcre_fullinfo(regex->regex, NULL, PCRE_INFO_SIZE, &size);
4456cd6a6acSopenharmony_ci	if (rc < 0)
4466cd6a6acSopenharmony_ci		return -1;
4476cd6a6acSopenharmony_ci
4486cd6a6acSopenharmony_ci	/* write the number of bytes in the pcre data */
4496cd6a6acSopenharmony_ci	to_write = size;
4506cd6a6acSopenharmony_ci	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
4516cd6a6acSopenharmony_ci	if (len != 1)
4526cd6a6acSopenharmony_ci		return -1;
4536cd6a6acSopenharmony_ci
4546cd6a6acSopenharmony_ci	/* write the actual pcre data as a char array */
4556cd6a6acSopenharmony_ci	len = fwrite(regex->regex, 1, to_write, fp);
4566cd6a6acSopenharmony_ci	if (len != to_write)
4576cd6a6acSopenharmony_ci		return -1;
4586cd6a6acSopenharmony_ci
4596cd6a6acSopenharmony_ci	if (sd) {
4606cd6a6acSopenharmony_ci		/* determine the size of the pcre study info */
4616cd6a6acSopenharmony_ci		rc =
4626cd6a6acSopenharmony_ci		    pcre_fullinfo(regex->regex, sd, PCRE_INFO_STUDYSIZE, &size);
4636cd6a6acSopenharmony_ci		if (rc < 0)
4646cd6a6acSopenharmony_ci			return -1;
4656cd6a6acSopenharmony_ci	} else
4666cd6a6acSopenharmony_ci		size = 0;
4676cd6a6acSopenharmony_ci
4686cd6a6acSopenharmony_ci	/* write the number of bytes in the pcre study data */
4696cd6a6acSopenharmony_ci	to_write = size;
4706cd6a6acSopenharmony_ci	len = fwrite(&to_write, sizeof(uint32_t), 1, fp);
4716cd6a6acSopenharmony_ci	if (len != 1)
4726cd6a6acSopenharmony_ci		return -1;
4736cd6a6acSopenharmony_ci
4746cd6a6acSopenharmony_ci	if (sd) {
4756cd6a6acSopenharmony_ci		/* write the actual pcre study data as a char array */
4766cd6a6acSopenharmony_ci		len = fwrite(sd->study_data, 1, to_write, fp);
4776cd6a6acSopenharmony_ci		if (len != to_write)
4786cd6a6acSopenharmony_ci			return -1;
4796cd6a6acSopenharmony_ci	}
4806cd6a6acSopenharmony_ci
4816cd6a6acSopenharmony_ci	return 0;
4826cd6a6acSopenharmony_ci}
4836cd6a6acSopenharmony_ci
4846cd6a6acSopenharmony_civoid regex_data_free(struct regex_data *regex)
4856cd6a6acSopenharmony_ci{
4866cd6a6acSopenharmony_ci	if (regex) {
4876cd6a6acSopenharmony_ci		if (regex->owned) {
4886cd6a6acSopenharmony_ci			if (regex->regex)
4896cd6a6acSopenharmony_ci				pcre_free(regex->regex);
4906cd6a6acSopenharmony_ci			if (regex->sd)
4916cd6a6acSopenharmony_ci				pcre_free_study(regex->sd);
4926cd6a6acSopenharmony_ci		}
4936cd6a6acSopenharmony_ci		free(regex);
4946cd6a6acSopenharmony_ci	}
4956cd6a6acSopenharmony_ci}
4966cd6a6acSopenharmony_ci
4976cd6a6acSopenharmony_ciint regex_match(struct regex_data *regex, char const *subject, int partial)
4986cd6a6acSopenharmony_ci{
4996cd6a6acSopenharmony_ci	int rc;
5006cd6a6acSopenharmony_ci
5016cd6a6acSopenharmony_ci	rc = pcre_exec(regex->regex, get_pcre_extra(regex),
5026cd6a6acSopenharmony_ci		       subject, strlen(subject), 0,
5036cd6a6acSopenharmony_ci		       partial ? PCRE_PARTIAL_SOFT : 0, NULL, 0);
5046cd6a6acSopenharmony_ci	switch (rc) {
5056cd6a6acSopenharmony_ci	case 0:
5066cd6a6acSopenharmony_ci		return REGEX_MATCH;
5076cd6a6acSopenharmony_ci	case PCRE_ERROR_PARTIAL:
5086cd6a6acSopenharmony_ci		return REGEX_MATCH_PARTIAL;
5096cd6a6acSopenharmony_ci	case PCRE_ERROR_NOMATCH:
5106cd6a6acSopenharmony_ci		return REGEX_NO_MATCH;
5116cd6a6acSopenharmony_ci	default:
5126cd6a6acSopenharmony_ci		return REGEX_ERROR;
5136cd6a6acSopenharmony_ci	}
5146cd6a6acSopenharmony_ci}
5156cd6a6acSopenharmony_ci
5166cd6a6acSopenharmony_ci/*
5176cd6a6acSopenharmony_ci * TODO Replace this compare function with something that actually compares the
5186cd6a6acSopenharmony_ci * regular expressions.
5196cd6a6acSopenharmony_ci * This compare function basically just compares the binary representations of
5206cd6a6acSopenharmony_ci * the automatons, and because this representation contains pointers and
5216cd6a6acSopenharmony_ci * metadata, it can only return a match if regex1 == regex2.
5226cd6a6acSopenharmony_ci * Preferably, this function would be replaced with an algorithm that computes
5236cd6a6acSopenharmony_ci * the equivalence of the automatons systematically.
5246cd6a6acSopenharmony_ci */
5256cd6a6acSopenharmony_ciint regex_cmp(struct regex_data *regex1, struct regex_data *regex2)
5266cd6a6acSopenharmony_ci{
5276cd6a6acSopenharmony_ci	int rc;
5286cd6a6acSopenharmony_ci	size_t len1, len2;
5296cd6a6acSopenharmony_ci	rc = pcre_fullinfo(regex1->regex, NULL, PCRE_INFO_SIZE, &len1);
5306cd6a6acSopenharmony_ci	assert(rc == 0);
5316cd6a6acSopenharmony_ci	rc = pcre_fullinfo(regex2->regex, NULL, PCRE_INFO_SIZE, &len2);
5326cd6a6acSopenharmony_ci	assert(rc == 0);
5336cd6a6acSopenharmony_ci	if (len1 != len2 || memcmp(regex1->regex, regex2->regex, len1))
5346cd6a6acSopenharmony_ci		return SELABEL_INCOMPARABLE;
5356cd6a6acSopenharmony_ci
5366cd6a6acSopenharmony_ci	return SELABEL_EQUAL;
5376cd6a6acSopenharmony_ci}
5386cd6a6acSopenharmony_ci
5396cd6a6acSopenharmony_cistruct regex_data *regex_data_create(void)
5406cd6a6acSopenharmony_ci{
5416cd6a6acSopenharmony_ci	return (struct regex_data *)calloc(1, sizeof(struct regex_data));
5426cd6a6acSopenharmony_ci}
5436cd6a6acSopenharmony_ci
5446cd6a6acSopenharmony_ci#endif
5456cd6a6acSopenharmony_ci
5466cd6a6acSopenharmony_civoid regex_format_error(struct regex_error_data const *error_data, char *buffer,
5476cd6a6acSopenharmony_ci			size_t buf_size)
5486cd6a6acSopenharmony_ci{
5496cd6a6acSopenharmony_ci	unsigned the_end_length = buf_size > 4 ? 4 : buf_size;
5506cd6a6acSopenharmony_ci	char *ptr = &buffer[buf_size - the_end_length];
5516cd6a6acSopenharmony_ci	int rc = 0;
5526cd6a6acSopenharmony_ci	size_t pos = 0;
5536cd6a6acSopenharmony_ci	if (!buffer || !buf_size)
5546cd6a6acSopenharmony_ci		return;
5556cd6a6acSopenharmony_ci	rc = snprintf(buffer, buf_size, "REGEX back-end error: ");
5566cd6a6acSopenharmony_ci	if (rc < 0)
5576cd6a6acSopenharmony_ci		/*
5586cd6a6acSopenharmony_ci		 * If snprintf fails it constitutes a logical error that needs
5596cd6a6acSopenharmony_ci		 * fixing.
5606cd6a6acSopenharmony_ci		 */
5616cd6a6acSopenharmony_ci		abort();
5626cd6a6acSopenharmony_ci
5636cd6a6acSopenharmony_ci	pos += rc;
5646cd6a6acSopenharmony_ci	if (pos >= buf_size)
5656cd6a6acSopenharmony_ci		goto truncated;
5666cd6a6acSopenharmony_ci
5676cd6a6acSopenharmony_ci	/* Return early if there is no error to format */
5686cd6a6acSopenharmony_ci#ifdef USE_PCRE2
5696cd6a6acSopenharmony_ci	if (!error_data->error_code) {
5706cd6a6acSopenharmony_ci		rc = snprintf(buffer + pos, buf_size - pos, "no error code");
5716cd6a6acSopenharmony_ci		if (rc < 0)
5726cd6a6acSopenharmony_ci			abort();
5736cd6a6acSopenharmony_ci		pos += rc;
5746cd6a6acSopenharmony_ci		if (pos >= buf_size)
5756cd6a6acSopenharmony_ci			goto truncated;
5766cd6a6acSopenharmony_ci		return;
5776cd6a6acSopenharmony_ci	}
5786cd6a6acSopenharmony_ci#else
5796cd6a6acSopenharmony_ci	if (!error_data->error_buffer) {
5806cd6a6acSopenharmony_ci		rc = snprintf(buffer + pos, buf_size - pos, "empty error");
5816cd6a6acSopenharmony_ci		if (rc < 0)
5826cd6a6acSopenharmony_ci			abort();
5836cd6a6acSopenharmony_ci		pos += rc;
5846cd6a6acSopenharmony_ci		if (pos >= buf_size)
5856cd6a6acSopenharmony_ci			goto truncated;
5866cd6a6acSopenharmony_ci		return;
5876cd6a6acSopenharmony_ci	}
5886cd6a6acSopenharmony_ci#endif
5896cd6a6acSopenharmony_ci
5906cd6a6acSopenharmony_ci	if (error_data->error_offset > 0) {
5916cd6a6acSopenharmony_ci#ifdef USE_PCRE2
5926cd6a6acSopenharmony_ci		rc = snprintf(buffer + pos, buf_size - pos, "At offset %zu: ",
5936cd6a6acSopenharmony_ci			      error_data->error_offset);
5946cd6a6acSopenharmony_ci#else
5956cd6a6acSopenharmony_ci		rc = snprintf(buffer + pos, buf_size - pos, "At offset %d: ",
5966cd6a6acSopenharmony_ci			      error_data->error_offset);
5976cd6a6acSopenharmony_ci#endif
5986cd6a6acSopenharmony_ci		if (rc < 0)
5996cd6a6acSopenharmony_ci			abort();
6006cd6a6acSopenharmony_ci		pos += rc;
6016cd6a6acSopenharmony_ci		if (pos >= buf_size)
6026cd6a6acSopenharmony_ci			goto truncated;
6036cd6a6acSopenharmony_ci	}
6046cd6a6acSopenharmony_ci
6056cd6a6acSopenharmony_ci#ifdef USE_PCRE2
6066cd6a6acSopenharmony_ci	rc = pcre2_get_error_message(error_data->error_code,
6076cd6a6acSopenharmony_ci				     (PCRE2_UCHAR *)(buffer + pos),
6086cd6a6acSopenharmony_ci				     buf_size - pos);
6096cd6a6acSopenharmony_ci	if (rc == PCRE2_ERROR_NOMEMORY)
6106cd6a6acSopenharmony_ci		goto truncated;
6116cd6a6acSopenharmony_ci#else
6126cd6a6acSopenharmony_ci	rc = snprintf(buffer + pos, buf_size - pos, "%s",
6136cd6a6acSopenharmony_ci		      error_data->error_buffer);
6146cd6a6acSopenharmony_ci	if (rc < 0)
6156cd6a6acSopenharmony_ci		abort();
6166cd6a6acSopenharmony_ci
6176cd6a6acSopenharmony_ci	if ((size_t)rc < strlen(error_data->error_buffer))
6186cd6a6acSopenharmony_ci		goto truncated;
6196cd6a6acSopenharmony_ci#endif
6206cd6a6acSopenharmony_ci
6216cd6a6acSopenharmony_ci	return;
6226cd6a6acSopenharmony_ci
6236cd6a6acSopenharmony_citruncated:
6246cd6a6acSopenharmony_ci	/* replace end of string with "..." to indicate that it was truncated */
6256cd6a6acSopenharmony_ci	switch (the_end_length) {
6266cd6a6acSopenharmony_ci	/* no break statements, fall-through is intended */
6276cd6a6acSopenharmony_ci	case 4:
6286cd6a6acSopenharmony_ci		*ptr++ = '.';
6296cd6a6acSopenharmony_ci		/* FALLTHRU */
6306cd6a6acSopenharmony_ci	case 3:
6316cd6a6acSopenharmony_ci		*ptr++ = '.';
6326cd6a6acSopenharmony_ci		/* FALLTHRU */
6336cd6a6acSopenharmony_ci	case 2:
6346cd6a6acSopenharmony_ci		*ptr++ = '.';
6356cd6a6acSopenharmony_ci		/* FALLTHRU */
6366cd6a6acSopenharmony_ci	case 1:
6376cd6a6acSopenharmony_ci		*ptr++ = '\0';
6386cd6a6acSopenharmony_ci		/* FALLTHRU */
6396cd6a6acSopenharmony_ci	default:
6406cd6a6acSopenharmony_ci		break;
6416cd6a6acSopenharmony_ci	}
6426cd6a6acSopenharmony_ci	return;
6436cd6a6acSopenharmony_ci}
644