18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
28c2ecf20Sopenharmony_ci#include <linux/module.h>
38c2ecf20Sopenharmony_ci#include <linux/kernel.h>
48c2ecf20Sopenharmony_ci#include <linux/string.h>
58c2ecf20Sopenharmony_ci#include <linux/slab.h>
68c2ecf20Sopenharmony_ci#include <linux/parser.h>
78c2ecf20Sopenharmony_ci#include <linux/errno.h>
88c2ecf20Sopenharmony_ci#include <linux/unicode.h>
98c2ecf20Sopenharmony_ci#include <linux/stringhash.h>
108c2ecf20Sopenharmony_ci
118c2ecf20Sopenharmony_ci#include "utf8n.h"
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ciint utf8_validate(const struct unicode_map *um, const struct qstr *str)
148c2ecf20Sopenharmony_ci{
158c2ecf20Sopenharmony_ci	const struct utf8data *data = utf8nfdi(um->version);
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_ci	if (utf8nlen(data, str->name, str->len) < 0)
188c2ecf20Sopenharmony_ci		return -1;
198c2ecf20Sopenharmony_ci	return 0;
208c2ecf20Sopenharmony_ci}
218c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_validate);
228c2ecf20Sopenharmony_ci
238c2ecf20Sopenharmony_ciint utf8_strncmp(const struct unicode_map *um,
248c2ecf20Sopenharmony_ci		 const struct qstr *s1, const struct qstr *s2)
258c2ecf20Sopenharmony_ci{
268c2ecf20Sopenharmony_ci	const struct utf8data *data = utf8nfdi(um->version);
278c2ecf20Sopenharmony_ci	struct utf8cursor cur1, cur2;
288c2ecf20Sopenharmony_ci	int c1, c2;
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
318c2ecf20Sopenharmony_ci		return -EINVAL;
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
348c2ecf20Sopenharmony_ci		return -EINVAL;
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci	do {
378c2ecf20Sopenharmony_ci		c1 = utf8byte(&cur1);
388c2ecf20Sopenharmony_ci		c2 = utf8byte(&cur2);
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci		if (c1 < 0 || c2 < 0)
418c2ecf20Sopenharmony_ci			return -EINVAL;
428c2ecf20Sopenharmony_ci		if (c1 != c2)
438c2ecf20Sopenharmony_ci			return 1;
448c2ecf20Sopenharmony_ci	} while (c1);
458c2ecf20Sopenharmony_ci
468c2ecf20Sopenharmony_ci	return 0;
478c2ecf20Sopenharmony_ci}
488c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_strncmp);
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_ciint utf8_strncasecmp(const struct unicode_map *um,
518c2ecf20Sopenharmony_ci		     const struct qstr *s1, const struct qstr *s2)
528c2ecf20Sopenharmony_ci{
538c2ecf20Sopenharmony_ci	const struct utf8data *data = utf8nfdicf(um->version);
548c2ecf20Sopenharmony_ci	struct utf8cursor cur1, cur2;
558c2ecf20Sopenharmony_ci	int c1, c2;
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_ci	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
588c2ecf20Sopenharmony_ci		return -EINVAL;
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
618c2ecf20Sopenharmony_ci		return -EINVAL;
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	do {
648c2ecf20Sopenharmony_ci		c1 = utf8byte(&cur1);
658c2ecf20Sopenharmony_ci		c2 = utf8byte(&cur2);
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci		if (c1 < 0 || c2 < 0)
688c2ecf20Sopenharmony_ci			return -EINVAL;
698c2ecf20Sopenharmony_ci		if (c1 != c2)
708c2ecf20Sopenharmony_ci			return 1;
718c2ecf20Sopenharmony_ci	} while (c1);
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_ci	return 0;
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_strncasecmp);
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci/* String cf is expected to be a valid UTF-8 casefolded
788c2ecf20Sopenharmony_ci * string.
798c2ecf20Sopenharmony_ci */
808c2ecf20Sopenharmony_ciint utf8_strncasecmp_folded(const struct unicode_map *um,
818c2ecf20Sopenharmony_ci			    const struct qstr *cf,
828c2ecf20Sopenharmony_ci			    const struct qstr *s1)
838c2ecf20Sopenharmony_ci{
848c2ecf20Sopenharmony_ci	const struct utf8data *data = utf8nfdicf(um->version);
858c2ecf20Sopenharmony_ci	struct utf8cursor cur1;
868c2ecf20Sopenharmony_ci	int c1, c2;
878c2ecf20Sopenharmony_ci	int i = 0;
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ci	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
908c2ecf20Sopenharmony_ci		return -EINVAL;
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	do {
938c2ecf20Sopenharmony_ci		c1 = utf8byte(&cur1);
948c2ecf20Sopenharmony_ci		c2 = cf->name[i++];
958c2ecf20Sopenharmony_ci		if (c1 < 0)
968c2ecf20Sopenharmony_ci			return -EINVAL;
978c2ecf20Sopenharmony_ci		if (c1 != c2)
988c2ecf20Sopenharmony_ci			return 1;
998c2ecf20Sopenharmony_ci	} while (c1);
1008c2ecf20Sopenharmony_ci
1018c2ecf20Sopenharmony_ci	return 0;
1028c2ecf20Sopenharmony_ci}
1038c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_strncasecmp_folded);
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ciint utf8_casefold(const struct unicode_map *um, const struct qstr *str,
1068c2ecf20Sopenharmony_ci		  unsigned char *dest, size_t dlen)
1078c2ecf20Sopenharmony_ci{
1088c2ecf20Sopenharmony_ci	const struct utf8data *data = utf8nfdicf(um->version);
1098c2ecf20Sopenharmony_ci	struct utf8cursor cur;
1108c2ecf20Sopenharmony_ci	size_t nlen = 0;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
1138c2ecf20Sopenharmony_ci		return -EINVAL;
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	for (nlen = 0; nlen < dlen; nlen++) {
1168c2ecf20Sopenharmony_ci		int c = utf8byte(&cur);
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci		dest[nlen] = c;
1198c2ecf20Sopenharmony_ci		if (!c)
1208c2ecf20Sopenharmony_ci			return nlen;
1218c2ecf20Sopenharmony_ci		if (c == -1)
1228c2ecf20Sopenharmony_ci			break;
1238c2ecf20Sopenharmony_ci	}
1248c2ecf20Sopenharmony_ci	return -EINVAL;
1258c2ecf20Sopenharmony_ci}
1268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_casefold);
1278c2ecf20Sopenharmony_ci
1288c2ecf20Sopenharmony_ciint utf8_casefold_hash(const struct unicode_map *um, const void *salt,
1298c2ecf20Sopenharmony_ci		       struct qstr *str)
1308c2ecf20Sopenharmony_ci{
1318c2ecf20Sopenharmony_ci	const struct utf8data *data = utf8nfdicf(um->version);
1328c2ecf20Sopenharmony_ci	struct utf8cursor cur;
1338c2ecf20Sopenharmony_ci	int c;
1348c2ecf20Sopenharmony_ci	unsigned long hash = init_name_hash(salt);
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
1378c2ecf20Sopenharmony_ci		return -EINVAL;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	while ((c = utf8byte(&cur))) {
1408c2ecf20Sopenharmony_ci		if (c < 0)
1418c2ecf20Sopenharmony_ci			return -EINVAL;
1428c2ecf20Sopenharmony_ci		hash = partial_name_hash((unsigned char)c, hash);
1438c2ecf20Sopenharmony_ci	}
1448c2ecf20Sopenharmony_ci	str->hash = end_name_hash(hash);
1458c2ecf20Sopenharmony_ci	return 0;
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_casefold_hash);
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ciint utf8_normalize(const struct unicode_map *um, const struct qstr *str,
1508c2ecf20Sopenharmony_ci		   unsigned char *dest, size_t dlen)
1518c2ecf20Sopenharmony_ci{
1528c2ecf20Sopenharmony_ci	const struct utf8data *data = utf8nfdi(um->version);
1538c2ecf20Sopenharmony_ci	struct utf8cursor cur;
1548c2ecf20Sopenharmony_ci	ssize_t nlen = 0;
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
1578c2ecf20Sopenharmony_ci		return -EINVAL;
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci	for (nlen = 0; nlen < dlen; nlen++) {
1608c2ecf20Sopenharmony_ci		int c = utf8byte(&cur);
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ci		dest[nlen] = c;
1638c2ecf20Sopenharmony_ci		if (!c)
1648c2ecf20Sopenharmony_ci			return nlen;
1658c2ecf20Sopenharmony_ci		if (c == -1)
1668c2ecf20Sopenharmony_ci			break;
1678c2ecf20Sopenharmony_ci	}
1688c2ecf20Sopenharmony_ci	return -EINVAL;
1698c2ecf20Sopenharmony_ci}
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_normalize);
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_cistatic int utf8_parse_version(const char *version, unsigned int *maj,
1748c2ecf20Sopenharmony_ci			      unsigned int *min, unsigned int *rev)
1758c2ecf20Sopenharmony_ci{
1768c2ecf20Sopenharmony_ci	substring_t args[3];
1778c2ecf20Sopenharmony_ci	char version_string[12];
1788c2ecf20Sopenharmony_ci	static const struct match_token token[] = {
1798c2ecf20Sopenharmony_ci		{1, "%d.%d.%d"},
1808c2ecf20Sopenharmony_ci		{0, NULL}
1818c2ecf20Sopenharmony_ci	};
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	strncpy(version_string, version, sizeof(version_string));
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	if (match_token(version_string, token, args) != 1)
1868c2ecf20Sopenharmony_ci		return -EINVAL;
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	if (match_int(&args[0], maj) || match_int(&args[1], min) ||
1898c2ecf20Sopenharmony_ci	    match_int(&args[2], rev))
1908c2ecf20Sopenharmony_ci		return -EINVAL;
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_ci	return 0;
1938c2ecf20Sopenharmony_ci}
1948c2ecf20Sopenharmony_ci
1958c2ecf20Sopenharmony_cistruct unicode_map *utf8_load(const char *version)
1968c2ecf20Sopenharmony_ci{
1978c2ecf20Sopenharmony_ci	struct unicode_map *um = NULL;
1988c2ecf20Sopenharmony_ci	int unicode_version;
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	if (version) {
2018c2ecf20Sopenharmony_ci		unsigned int maj, min, rev;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci		if (utf8_parse_version(version, &maj, &min, &rev) < 0)
2048c2ecf20Sopenharmony_ci			return ERR_PTR(-EINVAL);
2058c2ecf20Sopenharmony_ci
2068c2ecf20Sopenharmony_ci		if (!utf8version_is_supported(maj, min, rev))
2078c2ecf20Sopenharmony_ci			return ERR_PTR(-EINVAL);
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci		unicode_version = UNICODE_AGE(maj, min, rev);
2108c2ecf20Sopenharmony_ci	} else {
2118c2ecf20Sopenharmony_ci		unicode_version = utf8version_latest();
2128c2ecf20Sopenharmony_ci		printk(KERN_WARNING"UTF-8 version not specified. "
2138c2ecf20Sopenharmony_ci		       "Assuming latest supported version (%d.%d.%d).",
2148c2ecf20Sopenharmony_ci		       (unicode_version >> 16) & 0xff,
2158c2ecf20Sopenharmony_ci		       (unicode_version >> 8) & 0xff,
2168c2ecf20Sopenharmony_ci		       (unicode_version & 0xff));
2178c2ecf20Sopenharmony_ci	}
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
2208c2ecf20Sopenharmony_ci	if (!um)
2218c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	um->charset = "UTF-8";
2248c2ecf20Sopenharmony_ci	um->version = unicode_version;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	return um;
2278c2ecf20Sopenharmony_ci}
2288c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_load);
2298c2ecf20Sopenharmony_ci
2308c2ecf20Sopenharmony_civoid utf8_unload(struct unicode_map *um)
2318c2ecf20Sopenharmony_ci{
2328c2ecf20Sopenharmony_ci	kfree(um);
2338c2ecf20Sopenharmony_ci}
2348c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_unload);
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2");
237