162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0-only */ 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright (c) 2014 SGI. 462306a36Sopenharmony_ci * All rights reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci 762306a36Sopenharmony_ci#ifndef UTF8NORM_H 862306a36Sopenharmony_ci#define UTF8NORM_H 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/types.h> 1162306a36Sopenharmony_ci#include <linux/export.h> 1262306a36Sopenharmony_ci#include <linux/string.h> 1362306a36Sopenharmony_ci#include <linux/module.h> 1462306a36Sopenharmony_ci#include <linux/unicode.h> 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_ciint utf8version_is_supported(const struct unicode_map *um, unsigned int version); 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci/* 1962306a36Sopenharmony_ci * Determine the length of the normalized from of the string, 2062306a36Sopenharmony_ci * excluding any terminating NULL byte. 2162306a36Sopenharmony_ci * Returns 0 if only ignorable code points are present. 2262306a36Sopenharmony_ci * Returns -1 if the input is not valid UTF-8. 2362306a36Sopenharmony_ci */ 2462306a36Sopenharmony_cissize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n, 2562306a36Sopenharmony_ci const char *s, size_t len); 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci/* Needed in struct utf8cursor below. */ 2862306a36Sopenharmony_ci#define UTF8HANGULLEAF (12) 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci/* 3162306a36Sopenharmony_ci * Cursor structure used by the normalizer. 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_cistruct utf8cursor { 3462306a36Sopenharmony_ci const struct unicode_map *um; 3562306a36Sopenharmony_ci enum utf8_normalization n; 3662306a36Sopenharmony_ci const char *s; 3762306a36Sopenharmony_ci const char *p; 3862306a36Sopenharmony_ci const char *ss; 3962306a36Sopenharmony_ci const char *sp; 4062306a36Sopenharmony_ci unsigned int len; 4162306a36Sopenharmony_ci unsigned int slen; 4262306a36Sopenharmony_ci short int ccc; 4362306a36Sopenharmony_ci short int nccc; 4462306a36Sopenharmony_ci unsigned char hangul[UTF8HANGULLEAF]; 4562306a36Sopenharmony_ci}; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci/* 4862306a36Sopenharmony_ci * Initialize a utf8cursor to normalize a string. 4962306a36Sopenharmony_ci * Returns 0 on success. 5062306a36Sopenharmony_ci * Returns -1 on failure. 5162306a36Sopenharmony_ci */ 5262306a36Sopenharmony_ciint utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um, 5362306a36Sopenharmony_ci enum utf8_normalization n, const char *s, size_t len); 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci/* 5662306a36Sopenharmony_ci * Get the next byte in the normalization. 5762306a36Sopenharmony_ci * Returns a value > 0 && < 256 on success. 5862306a36Sopenharmony_ci * Returns 0 when the end of the normalization is reached. 5962306a36Sopenharmony_ci * Returns -1 if the string being normalized is not valid UTF-8. 6062306a36Sopenharmony_ci */ 6162306a36Sopenharmony_ciextern int utf8byte(struct utf8cursor *u8c); 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_cistruct utf8data { 6462306a36Sopenharmony_ci unsigned int maxage; 6562306a36Sopenharmony_ci unsigned int offset; 6662306a36Sopenharmony_ci}; 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_cistruct utf8data_table { 6962306a36Sopenharmony_ci const unsigned int *utf8agetab; 7062306a36Sopenharmony_ci int utf8agetab_size; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci const struct utf8data *utf8nfdicfdata; 7362306a36Sopenharmony_ci int utf8nfdicfdata_size; 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci const struct utf8data *utf8nfdidata; 7662306a36Sopenharmony_ci int utf8nfdidata_size; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci const unsigned char *utf8data; 7962306a36Sopenharmony_ci}; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ciextern struct utf8data_table utf8_data_table; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci#endif /* UTF8NORM_H */ 84