162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright (c) 2014 SGI.
462306a36Sopenharmony_ci * All rights reserved.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#ifndef UTF8NORM_H
862306a36Sopenharmony_ci#define UTF8NORM_H
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/types.h>
1162306a36Sopenharmony_ci#include <linux/export.h>
1262306a36Sopenharmony_ci#include <linux/string.h>
1362306a36Sopenharmony_ci#include <linux/module.h>
1462306a36Sopenharmony_ci#include <linux/unicode.h>
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ciint utf8version_is_supported(const struct unicode_map *um, unsigned int version);
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci/*
1962306a36Sopenharmony_ci * Determine the length of the normalized from of the string,
2062306a36Sopenharmony_ci * excluding any terminating NULL byte.
2162306a36Sopenharmony_ci * Returns 0 if only ignorable code points are present.
2262306a36Sopenharmony_ci * Returns -1 if the input is not valid UTF-8.
2362306a36Sopenharmony_ci */
2462306a36Sopenharmony_cissize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
2562306a36Sopenharmony_ci		const char *s, size_t len);
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci/* Needed in struct utf8cursor below. */
2862306a36Sopenharmony_ci#define UTF8HANGULLEAF	(12)
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/*
3162306a36Sopenharmony_ci * Cursor structure used by the normalizer.
3262306a36Sopenharmony_ci */
3362306a36Sopenharmony_cistruct utf8cursor {
3462306a36Sopenharmony_ci	const struct unicode_map *um;
3562306a36Sopenharmony_ci	enum utf8_normalization n;
3662306a36Sopenharmony_ci	const char	*s;
3762306a36Sopenharmony_ci	const char	*p;
3862306a36Sopenharmony_ci	const char	*ss;
3962306a36Sopenharmony_ci	const char	*sp;
4062306a36Sopenharmony_ci	unsigned int	len;
4162306a36Sopenharmony_ci	unsigned int	slen;
4262306a36Sopenharmony_ci	short int	ccc;
4362306a36Sopenharmony_ci	short int	nccc;
4462306a36Sopenharmony_ci	unsigned char	hangul[UTF8HANGULLEAF];
4562306a36Sopenharmony_ci};
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci/*
4862306a36Sopenharmony_ci * Initialize a utf8cursor to normalize a string.
4962306a36Sopenharmony_ci * Returns 0 on success.
5062306a36Sopenharmony_ci * Returns -1 on failure.
5162306a36Sopenharmony_ci */
5262306a36Sopenharmony_ciint utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
5362306a36Sopenharmony_ci		enum utf8_normalization n, const char *s, size_t len);
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/*
5662306a36Sopenharmony_ci * Get the next byte in the normalization.
5762306a36Sopenharmony_ci * Returns a value > 0 && < 256 on success.
5862306a36Sopenharmony_ci * Returns 0 when the end of the normalization is reached.
5962306a36Sopenharmony_ci * Returns -1 if the string being normalized is not valid UTF-8.
6062306a36Sopenharmony_ci */
6162306a36Sopenharmony_ciextern int utf8byte(struct utf8cursor *u8c);
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_cistruct utf8data {
6462306a36Sopenharmony_ci	unsigned int maxage;
6562306a36Sopenharmony_ci	unsigned int offset;
6662306a36Sopenharmony_ci};
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_cistruct utf8data_table {
6962306a36Sopenharmony_ci	const unsigned int *utf8agetab;
7062306a36Sopenharmony_ci	int utf8agetab_size;
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	const struct utf8data *utf8nfdicfdata;
7362306a36Sopenharmony_ci	int utf8nfdicfdata_size;
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci	const struct utf8data *utf8nfdidata;
7662306a36Sopenharmony_ci	int utf8nfdidata_size;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	const unsigned char *utf8data;
7962306a36Sopenharmony_ci};
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ciextern struct utf8data_table utf8_data_table;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci#endif /* UTF8NORM_H */
84