18c2ecf20Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 28c2ecf20Sopenharmony_ci#include <linux/module.h> 38c2ecf20Sopenharmony_ci#include <linux/kernel.h> 48c2ecf20Sopenharmony_ci#include <linux/string.h> 58c2ecf20Sopenharmony_ci#include <linux/slab.h> 68c2ecf20Sopenharmony_ci#include <linux/parser.h> 78c2ecf20Sopenharmony_ci#include <linux/errno.h> 88c2ecf20Sopenharmony_ci#include <linux/unicode.h> 98c2ecf20Sopenharmony_ci#include <linux/stringhash.h> 108c2ecf20Sopenharmony_ci 118c2ecf20Sopenharmony_ci#include "utf8n.h" 128c2ecf20Sopenharmony_ci 138c2ecf20Sopenharmony_ciint utf8_validate(const struct unicode_map *um, const struct qstr *str) 148c2ecf20Sopenharmony_ci{ 158c2ecf20Sopenharmony_ci const struct utf8data *data = utf8nfdi(um->version); 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ci if (utf8nlen(data, str->name, str->len) < 0) 188c2ecf20Sopenharmony_ci return -1; 198c2ecf20Sopenharmony_ci return 0; 208c2ecf20Sopenharmony_ci} 218c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_validate); 228c2ecf20Sopenharmony_ci 238c2ecf20Sopenharmony_ciint utf8_strncmp(const struct unicode_map *um, 248c2ecf20Sopenharmony_ci const struct qstr *s1, const struct qstr *s2) 258c2ecf20Sopenharmony_ci{ 268c2ecf20Sopenharmony_ci const struct utf8data *data = utf8nfdi(um->version); 278c2ecf20Sopenharmony_ci struct utf8cursor cur1, cur2; 288c2ecf20Sopenharmony_ci int c1, c2; 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 318c2ecf20Sopenharmony_ci return -EINVAL; 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 348c2ecf20Sopenharmony_ci return -EINVAL; 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci do { 378c2ecf20Sopenharmony_ci c1 = utf8byte(&cur1); 388c2ecf20Sopenharmony_ci c2 = utf8byte(&cur2); 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci if (c1 < 0 || c2 < 0) 418c2ecf20Sopenharmony_ci return -EINVAL; 428c2ecf20Sopenharmony_ci if (c1 != c2) 438c2ecf20Sopenharmony_ci return 1; 448c2ecf20Sopenharmony_ci } while (c1); 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci return 0; 478c2ecf20Sopenharmony_ci} 488c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_strncmp); 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ciint utf8_strncasecmp(const struct unicode_map *um, 518c2ecf20Sopenharmony_ci const struct qstr *s1, const struct qstr *s2) 528c2ecf20Sopenharmony_ci{ 538c2ecf20Sopenharmony_ci const struct utf8data *data = utf8nfdicf(um->version); 548c2ecf20Sopenharmony_ci struct utf8cursor cur1, cur2; 558c2ecf20Sopenharmony_ci int c1, c2; 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_ci if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 588c2ecf20Sopenharmony_ci return -EINVAL; 598c2ecf20Sopenharmony_ci 608c2ecf20Sopenharmony_ci if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) 618c2ecf20Sopenharmony_ci return -EINVAL; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci do { 648c2ecf20Sopenharmony_ci c1 = utf8byte(&cur1); 658c2ecf20Sopenharmony_ci c2 = utf8byte(&cur2); 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci if (c1 < 0 || c2 < 0) 688c2ecf20Sopenharmony_ci return -EINVAL; 698c2ecf20Sopenharmony_ci if (c1 != c2) 708c2ecf20Sopenharmony_ci return 1; 718c2ecf20Sopenharmony_ci } while (c1); 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci return 0; 748c2ecf20Sopenharmony_ci} 758c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_strncasecmp); 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci/* String cf is expected to be a valid UTF-8 casefolded 788c2ecf20Sopenharmony_ci * string. 798c2ecf20Sopenharmony_ci */ 808c2ecf20Sopenharmony_ciint utf8_strncasecmp_folded(const struct unicode_map *um, 818c2ecf20Sopenharmony_ci const struct qstr *cf, 828c2ecf20Sopenharmony_ci const struct qstr *s1) 838c2ecf20Sopenharmony_ci{ 848c2ecf20Sopenharmony_ci const struct utf8data *data = utf8nfdicf(um->version); 858c2ecf20Sopenharmony_ci struct utf8cursor cur1; 868c2ecf20Sopenharmony_ci int c1, c2; 878c2ecf20Sopenharmony_ci int i = 0; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_ci if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) 908c2ecf20Sopenharmony_ci return -EINVAL; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci do { 938c2ecf20Sopenharmony_ci c1 = utf8byte(&cur1); 948c2ecf20Sopenharmony_ci c2 = cf->name[i++]; 958c2ecf20Sopenharmony_ci if (c1 < 0) 968c2ecf20Sopenharmony_ci return -EINVAL; 978c2ecf20Sopenharmony_ci if (c1 != c2) 988c2ecf20Sopenharmony_ci return 1; 998c2ecf20Sopenharmony_ci } while (c1); 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci return 0; 1028c2ecf20Sopenharmony_ci} 1038c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_strncasecmp_folded); 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_ciint utf8_casefold(const struct unicode_map *um, const struct qstr *str, 1068c2ecf20Sopenharmony_ci unsigned char *dest, size_t dlen) 1078c2ecf20Sopenharmony_ci{ 1088c2ecf20Sopenharmony_ci const struct utf8data *data = utf8nfdicf(um->version); 1098c2ecf20Sopenharmony_ci struct utf8cursor cur; 1108c2ecf20Sopenharmony_ci size_t nlen = 0; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1138c2ecf20Sopenharmony_ci return -EINVAL; 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci for (nlen = 0; nlen < dlen; nlen++) { 1168c2ecf20Sopenharmony_ci int c = utf8byte(&cur); 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci dest[nlen] = c; 1198c2ecf20Sopenharmony_ci if (!c) 1208c2ecf20Sopenharmony_ci return nlen; 1218c2ecf20Sopenharmony_ci if (c == -1) 1228c2ecf20Sopenharmony_ci break; 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci return -EINVAL; 1258c2ecf20Sopenharmony_ci} 1268c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_casefold); 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ciint utf8_casefold_hash(const struct unicode_map *um, const void *salt, 1298c2ecf20Sopenharmony_ci struct qstr *str) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci const struct utf8data *data = utf8nfdicf(um->version); 1328c2ecf20Sopenharmony_ci struct utf8cursor cur; 1338c2ecf20Sopenharmony_ci int c; 1348c2ecf20Sopenharmony_ci unsigned long hash = init_name_hash(salt); 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1378c2ecf20Sopenharmony_ci return -EINVAL; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci while ((c = utf8byte(&cur))) { 1408c2ecf20Sopenharmony_ci if (c < 0) 1418c2ecf20Sopenharmony_ci return -EINVAL; 1428c2ecf20Sopenharmony_ci hash = partial_name_hash((unsigned char)c, hash); 1438c2ecf20Sopenharmony_ci } 1448c2ecf20Sopenharmony_ci str->hash = end_name_hash(hash); 1458c2ecf20Sopenharmony_ci return 0; 1468c2ecf20Sopenharmony_ci} 1478c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_casefold_hash); 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ciint utf8_normalize(const struct unicode_map *um, const struct qstr *str, 1508c2ecf20Sopenharmony_ci unsigned char *dest, size_t dlen) 1518c2ecf20Sopenharmony_ci{ 1528c2ecf20Sopenharmony_ci const struct utf8data *data = utf8nfdi(um->version); 1538c2ecf20Sopenharmony_ci struct utf8cursor cur; 1548c2ecf20Sopenharmony_ci ssize_t nlen = 0; 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci if (utf8ncursor(&cur, data, str->name, str->len) < 0) 1578c2ecf20Sopenharmony_ci return -EINVAL; 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci for (nlen = 0; nlen < dlen; nlen++) { 1608c2ecf20Sopenharmony_ci int c = utf8byte(&cur); 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci dest[nlen] = c; 1638c2ecf20Sopenharmony_ci if (!c) 1648c2ecf20Sopenharmony_ci return nlen; 1658c2ecf20Sopenharmony_ci if (c == -1) 1668c2ecf20Sopenharmony_ci break; 1678c2ecf20Sopenharmony_ci } 1688c2ecf20Sopenharmony_ci return -EINVAL; 1698c2ecf20Sopenharmony_ci} 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_normalize); 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_cistatic int utf8_parse_version(const char *version, unsigned int *maj, 1748c2ecf20Sopenharmony_ci unsigned int *min, unsigned int *rev) 1758c2ecf20Sopenharmony_ci{ 1768c2ecf20Sopenharmony_ci substring_t args[3]; 1778c2ecf20Sopenharmony_ci char version_string[12]; 1788c2ecf20Sopenharmony_ci static const struct match_token token[] = { 1798c2ecf20Sopenharmony_ci {1, "%d.%d.%d"}, 1808c2ecf20Sopenharmony_ci {0, NULL} 1818c2ecf20Sopenharmony_ci }; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci strncpy(version_string, version, sizeof(version_string)); 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci if (match_token(version_string, token, args) != 1) 1868c2ecf20Sopenharmony_ci return -EINVAL; 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci if (match_int(&args[0], maj) || match_int(&args[1], min) || 1898c2ecf20Sopenharmony_ci match_int(&args[2], rev)) 1908c2ecf20Sopenharmony_ci return -EINVAL; 1918c2ecf20Sopenharmony_ci 1928c2ecf20Sopenharmony_ci return 0; 1938c2ecf20Sopenharmony_ci} 1948c2ecf20Sopenharmony_ci 1958c2ecf20Sopenharmony_cistruct unicode_map *utf8_load(const char *version) 1968c2ecf20Sopenharmony_ci{ 1978c2ecf20Sopenharmony_ci struct unicode_map *um = NULL; 1988c2ecf20Sopenharmony_ci int unicode_version; 1998c2ecf20Sopenharmony_ci 2008c2ecf20Sopenharmony_ci if (version) { 2018c2ecf20Sopenharmony_ci unsigned int maj, min, rev; 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci if (utf8_parse_version(version, &maj, &min, &rev) < 0) 2048c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 2058c2ecf20Sopenharmony_ci 2068c2ecf20Sopenharmony_ci if (!utf8version_is_supported(maj, min, rev)) 2078c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci unicode_version = UNICODE_AGE(maj, min, rev); 2108c2ecf20Sopenharmony_ci } else { 2118c2ecf20Sopenharmony_ci unicode_version = utf8version_latest(); 2128c2ecf20Sopenharmony_ci printk(KERN_WARNING"UTF-8 version not specified. " 2138c2ecf20Sopenharmony_ci "Assuming latest supported version (%d.%d.%d).", 2148c2ecf20Sopenharmony_ci (unicode_version >> 16) & 0xff, 2158c2ecf20Sopenharmony_ci (unicode_version >> 8) & 0xff, 2168c2ecf20Sopenharmony_ci (unicode_version & 0xff)); 2178c2ecf20Sopenharmony_ci } 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL); 2208c2ecf20Sopenharmony_ci if (!um) 2218c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci um->charset = "UTF-8"; 2248c2ecf20Sopenharmony_ci um->version = unicode_version; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci return um; 2278c2ecf20Sopenharmony_ci} 2288c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_load); 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_civoid utf8_unload(struct unicode_map *um) 2318c2ecf20Sopenharmony_ci{ 2328c2ecf20Sopenharmony_ci kfree(um); 2338c2ecf20Sopenharmony_ci} 2348c2ecf20Sopenharmony_ciEXPORT_SYMBOL(utf8_unload); 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL v2"); 237