xref: /kernel/linux/linux-5.10/fs/unicode/utf8-core.c (revision 8c2ecf20)
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/module.h>
3#include <linux/kernel.h>
4#include <linux/string.h>
5#include <linux/slab.h>
6#include <linux/parser.h>
7#include <linux/errno.h>
8#include <linux/unicode.h>
9#include <linux/stringhash.h>
10
11#include "utf8n.h"
12
13int utf8_validate(const struct unicode_map *um, const struct qstr *str)
14{
15	const struct utf8data *data = utf8nfdi(um->version);
16
17	if (utf8nlen(data, str->name, str->len) < 0)
18		return -1;
19	return 0;
20}
21EXPORT_SYMBOL(utf8_validate);
22
23int utf8_strncmp(const struct unicode_map *um,
24		 const struct qstr *s1, const struct qstr *s2)
25{
26	const struct utf8data *data = utf8nfdi(um->version);
27	struct utf8cursor cur1, cur2;
28	int c1, c2;
29
30	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
31		return -EINVAL;
32
33	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
34		return -EINVAL;
35
36	do {
37		c1 = utf8byte(&cur1);
38		c2 = utf8byte(&cur2);
39
40		if (c1 < 0 || c2 < 0)
41			return -EINVAL;
42		if (c1 != c2)
43			return 1;
44	} while (c1);
45
46	return 0;
47}
48EXPORT_SYMBOL(utf8_strncmp);
49
50int utf8_strncasecmp(const struct unicode_map *um,
51		     const struct qstr *s1, const struct qstr *s2)
52{
53	const struct utf8data *data = utf8nfdicf(um->version);
54	struct utf8cursor cur1, cur2;
55	int c1, c2;
56
57	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
58		return -EINVAL;
59
60	if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0)
61		return -EINVAL;
62
63	do {
64		c1 = utf8byte(&cur1);
65		c2 = utf8byte(&cur2);
66
67		if (c1 < 0 || c2 < 0)
68			return -EINVAL;
69		if (c1 != c2)
70			return 1;
71	} while (c1);
72
73	return 0;
74}
75EXPORT_SYMBOL(utf8_strncasecmp);
76
77/* String cf is expected to be a valid UTF-8 casefolded
78 * string.
79 */
80int utf8_strncasecmp_folded(const struct unicode_map *um,
81			    const struct qstr *cf,
82			    const struct qstr *s1)
83{
84	const struct utf8data *data = utf8nfdicf(um->version);
85	struct utf8cursor cur1;
86	int c1, c2;
87	int i = 0;
88
89	if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0)
90		return -EINVAL;
91
92	do {
93		c1 = utf8byte(&cur1);
94		c2 = cf->name[i++];
95		if (c1 < 0)
96			return -EINVAL;
97		if (c1 != c2)
98			return 1;
99	} while (c1);
100
101	return 0;
102}
103EXPORT_SYMBOL(utf8_strncasecmp_folded);
104
105int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
106		  unsigned char *dest, size_t dlen)
107{
108	const struct utf8data *data = utf8nfdicf(um->version);
109	struct utf8cursor cur;
110	size_t nlen = 0;
111
112	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
113		return -EINVAL;
114
115	for (nlen = 0; nlen < dlen; nlen++) {
116		int c = utf8byte(&cur);
117
118		dest[nlen] = c;
119		if (!c)
120			return nlen;
121		if (c == -1)
122			break;
123	}
124	return -EINVAL;
125}
126EXPORT_SYMBOL(utf8_casefold);
127
128int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
129		       struct qstr *str)
130{
131	const struct utf8data *data = utf8nfdicf(um->version);
132	struct utf8cursor cur;
133	int c;
134	unsigned long hash = init_name_hash(salt);
135
136	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
137		return -EINVAL;
138
139	while ((c = utf8byte(&cur))) {
140		if (c < 0)
141			return -EINVAL;
142		hash = partial_name_hash((unsigned char)c, hash);
143	}
144	str->hash = end_name_hash(hash);
145	return 0;
146}
147EXPORT_SYMBOL(utf8_casefold_hash);
148
149int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
150		   unsigned char *dest, size_t dlen)
151{
152	const struct utf8data *data = utf8nfdi(um->version);
153	struct utf8cursor cur;
154	ssize_t nlen = 0;
155
156	if (utf8ncursor(&cur, data, str->name, str->len) < 0)
157		return -EINVAL;
158
159	for (nlen = 0; nlen < dlen; nlen++) {
160		int c = utf8byte(&cur);
161
162		dest[nlen] = c;
163		if (!c)
164			return nlen;
165		if (c == -1)
166			break;
167	}
168	return -EINVAL;
169}
170
171EXPORT_SYMBOL(utf8_normalize);
172
173static int utf8_parse_version(const char *version, unsigned int *maj,
174			      unsigned int *min, unsigned int *rev)
175{
176	substring_t args[3];
177	char version_string[12];
178	static const struct match_token token[] = {
179		{1, "%d.%d.%d"},
180		{0, NULL}
181	};
182
183	strncpy(version_string, version, sizeof(version_string));
184
185	if (match_token(version_string, token, args) != 1)
186		return -EINVAL;
187
188	if (match_int(&args[0], maj) || match_int(&args[1], min) ||
189	    match_int(&args[2], rev))
190		return -EINVAL;
191
192	return 0;
193}
194
195struct unicode_map *utf8_load(const char *version)
196{
197	struct unicode_map *um = NULL;
198	int unicode_version;
199
200	if (version) {
201		unsigned int maj, min, rev;
202
203		if (utf8_parse_version(version, &maj, &min, &rev) < 0)
204			return ERR_PTR(-EINVAL);
205
206		if (!utf8version_is_supported(maj, min, rev))
207			return ERR_PTR(-EINVAL);
208
209		unicode_version = UNICODE_AGE(maj, min, rev);
210	} else {
211		unicode_version = utf8version_latest();
212		printk(KERN_WARNING"UTF-8 version not specified. "
213		       "Assuming latest supported version (%d.%d.%d).",
214		       (unicode_version >> 16) & 0xff,
215		       (unicode_version >> 8) & 0xff,
216		       (unicode_version & 0xff));
217	}
218
219	um = kzalloc(sizeof(struct unicode_map), GFP_KERNEL);
220	if (!um)
221		return ERR_PTR(-ENOMEM);
222
223	um->charset = "UTF-8";
224	um->version = unicode_version;
225
226	return um;
227}
228EXPORT_SYMBOL(utf8_load);
229
230void utf8_unload(struct unicode_map *um)
231{
232	kfree(um);
233}
234EXPORT_SYMBOL(utf8_unload);
235
236MODULE_LICENSE("GPL v2");
237