12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 32e5b6d6dSopenharmony_ci/* 42e5b6d6dSopenharmony_ci******************************************************************************* 52e5b6d6dSopenharmony_ci* 62e5b6d6dSopenharmony_ci* Copyright (C) 2009-2015, International Business Machines 72e5b6d6dSopenharmony_ci* Corporation and others. All Rights Reserved. 82e5b6d6dSopenharmony_ci* 92e5b6d6dSopenharmony_ci******************************************************************************* 102e5b6d6dSopenharmony_ci* file name: unorm2.h 112e5b6d6dSopenharmony_ci* encoding: UTF-8 122e5b6d6dSopenharmony_ci* tab size: 8 (not used) 132e5b6d6dSopenharmony_ci* indentation:4 142e5b6d6dSopenharmony_ci* 152e5b6d6dSopenharmony_ci* created on: 2009dec15 162e5b6d6dSopenharmony_ci* created by: Markus W. Scherer 172e5b6d6dSopenharmony_ci*/ 182e5b6d6dSopenharmony_ci 192e5b6d6dSopenharmony_ci#ifndef __UNORM2_H__ 202e5b6d6dSopenharmony_ci#define __UNORM2_H__ 212e5b6d6dSopenharmony_ci 222e5b6d6dSopenharmony_ci/** 232e5b6d6dSopenharmony_ci * \file 242e5b6d6dSopenharmony_ci * \brief C API: New API for Unicode Normalization. 252e5b6d6dSopenharmony_ci * 262e5b6d6dSopenharmony_ci * Unicode normalization functionality for standard Unicode normalization or 272e5b6d6dSopenharmony_ci * for using custom mapping tables. 282e5b6d6dSopenharmony_ci * All instances of UNormalizer2 are unmodifiable/immutable. 292e5b6d6dSopenharmony_ci * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. 302e5b6d6dSopenharmony_ci * For more details see the Normalizer2 C++ class. 312e5b6d6dSopenharmony_ci */ 322e5b6d6dSopenharmony_ci 332e5b6d6dSopenharmony_ci#include "unicode/utypes.h" 342e5b6d6dSopenharmony_ci#include "unicode/stringoptions.h" 352e5b6d6dSopenharmony_ci#include "unicode/uset.h" 362e5b6d6dSopenharmony_ci 372e5b6d6dSopenharmony_ci#if U_SHOW_CPLUSPLUS_API 382e5b6d6dSopenharmony_ci#include "unicode/localpointer.h" 392e5b6d6dSopenharmony_ci#endif // U_SHOW_CPLUSPLUS_API 402e5b6d6dSopenharmony_ci 412e5b6d6dSopenharmony_ci/** 422e5b6d6dSopenharmony_ci * Constants for normalization modes. 432e5b6d6dSopenharmony_ci * For details about standard Unicode normalization forms 442e5b6d6dSopenharmony_ci * and about the algorithms which are also used with custom mapping tables 452e5b6d6dSopenharmony_ci * see http://www.unicode.org/unicode/reports/tr15/ 462e5b6d6dSopenharmony_ci * @stable ICU 4.4 472e5b6d6dSopenharmony_ci */ 482e5b6d6dSopenharmony_citypedef enum { 492e5b6d6dSopenharmony_ci /** 502e5b6d6dSopenharmony_ci * Decomposition followed by composition. 512e5b6d6dSopenharmony_ci * Same as standard NFC when using an "nfc" instance. 522e5b6d6dSopenharmony_ci * Same as standard NFKC when using an "nfkc" instance. 532e5b6d6dSopenharmony_ci * For details about standard Unicode normalization forms 542e5b6d6dSopenharmony_ci * see http://www.unicode.org/unicode/reports/tr15/ 552e5b6d6dSopenharmony_ci * @stable ICU 4.4 562e5b6d6dSopenharmony_ci */ 572e5b6d6dSopenharmony_ci UNORM2_COMPOSE, 582e5b6d6dSopenharmony_ci /** 592e5b6d6dSopenharmony_ci * Map, and reorder canonically. 602e5b6d6dSopenharmony_ci * Same as standard NFD when using an "nfc" instance. 612e5b6d6dSopenharmony_ci * Same as standard NFKD when using an "nfkc" instance. 622e5b6d6dSopenharmony_ci * For details about standard Unicode normalization forms 632e5b6d6dSopenharmony_ci * see http://www.unicode.org/unicode/reports/tr15/ 642e5b6d6dSopenharmony_ci * @stable ICU 4.4 652e5b6d6dSopenharmony_ci */ 662e5b6d6dSopenharmony_ci UNORM2_DECOMPOSE, 672e5b6d6dSopenharmony_ci /** 682e5b6d6dSopenharmony_ci * "Fast C or D" form. 692e5b6d6dSopenharmony_ci * If a string is in this form, then further decomposition <i>without reordering</i> 702e5b6d6dSopenharmony_ci * would yield the same form as DECOMPOSE. 712e5b6d6dSopenharmony_ci * Text in "Fast C or D" form can be processed efficiently with data tables 722e5b6d6dSopenharmony_ci * that are "canonically closed", that is, that provide equivalent data for 732e5b6d6dSopenharmony_ci * equivalent text, without having to be fully normalized. 742e5b6d6dSopenharmony_ci * Not a standard Unicode normalization form. 752e5b6d6dSopenharmony_ci * Not a unique form: Different FCD strings can be canonically equivalent. 762e5b6d6dSopenharmony_ci * For details see http://www.unicode.org/notes/tn5/#FCD 772e5b6d6dSopenharmony_ci * @stable ICU 4.4 782e5b6d6dSopenharmony_ci */ 792e5b6d6dSopenharmony_ci UNORM2_FCD, 802e5b6d6dSopenharmony_ci /** 812e5b6d6dSopenharmony_ci * Compose only contiguously. 822e5b6d6dSopenharmony_ci * Also known as "FCC" or "Fast C Contiguous". 832e5b6d6dSopenharmony_ci * The result will often but not always be in NFC. 842e5b6d6dSopenharmony_ci * The result will conform to FCD which is useful for processing. 852e5b6d6dSopenharmony_ci * Not a standard Unicode normalization form. 862e5b6d6dSopenharmony_ci * For details see http://www.unicode.org/notes/tn5/#FCC 872e5b6d6dSopenharmony_ci * @stable ICU 4.4 882e5b6d6dSopenharmony_ci */ 892e5b6d6dSopenharmony_ci UNORM2_COMPOSE_CONTIGUOUS 902e5b6d6dSopenharmony_ci} UNormalization2Mode; 912e5b6d6dSopenharmony_ci 922e5b6d6dSopenharmony_ci/** 932e5b6d6dSopenharmony_ci * Result values for normalization quick check functions. 942e5b6d6dSopenharmony_ci * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms 952e5b6d6dSopenharmony_ci * @stable ICU 2.0 962e5b6d6dSopenharmony_ci */ 972e5b6d6dSopenharmony_citypedef enum UNormalizationCheckResult { 982e5b6d6dSopenharmony_ci /** 992e5b6d6dSopenharmony_ci * The input string is not in the normalization form. 1002e5b6d6dSopenharmony_ci * @stable ICU 2.0 1012e5b6d6dSopenharmony_ci */ 1022e5b6d6dSopenharmony_ci UNORM_NO, 1032e5b6d6dSopenharmony_ci /** 1042e5b6d6dSopenharmony_ci * The input string is in the normalization form. 1052e5b6d6dSopenharmony_ci * @stable ICU 2.0 1062e5b6d6dSopenharmony_ci */ 1072e5b6d6dSopenharmony_ci UNORM_YES, 1082e5b6d6dSopenharmony_ci /** 1092e5b6d6dSopenharmony_ci * The input string may or may not be in the normalization form. 1102e5b6d6dSopenharmony_ci * This value is only returned for composition forms like NFC and FCC, 1112e5b6d6dSopenharmony_ci * when a backward-combining character is found for which the surrounding text 1122e5b6d6dSopenharmony_ci * would have to be analyzed further. 1132e5b6d6dSopenharmony_ci * @stable ICU 2.0 1142e5b6d6dSopenharmony_ci */ 1152e5b6d6dSopenharmony_ci UNORM_MAYBE 1162e5b6d6dSopenharmony_ci} UNormalizationCheckResult; 1172e5b6d6dSopenharmony_ci 1182e5b6d6dSopenharmony_ci/** 1192e5b6d6dSopenharmony_ci * Opaque C service object type for the new normalization API. 1202e5b6d6dSopenharmony_ci * @stable ICU 4.4 1212e5b6d6dSopenharmony_ci */ 1222e5b6d6dSopenharmony_cistruct UNormalizer2; 1232e5b6d6dSopenharmony_citypedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ 1242e5b6d6dSopenharmony_ci 1252e5b6d6dSopenharmony_ci#if !UCONFIG_NO_NORMALIZATION 1262e5b6d6dSopenharmony_ci 1272e5b6d6dSopenharmony_ci/** 1282e5b6d6dSopenharmony_ci * Returns a UNormalizer2 instance for Unicode NFC normalization. 1292e5b6d6dSopenharmony_ci * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). 1302e5b6d6dSopenharmony_ci * Returns an unmodifiable singleton instance. Do not delete it. 1312e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 1322e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 1332e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 1342e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 1352e5b6d6dSopenharmony_ci * @return the requested Normalizer2, if successful 1362e5b6d6dSopenharmony_ci * @stable ICU 49 1372e5b6d6dSopenharmony_ci */ 1382e5b6d6dSopenharmony_ciU_CAPI const UNormalizer2 * U_EXPORT2 1392e5b6d6dSopenharmony_ciunorm2_getNFCInstance(UErrorCode *pErrorCode); 1402e5b6d6dSopenharmony_ci 1412e5b6d6dSopenharmony_ci/** 1422e5b6d6dSopenharmony_ci * Returns a UNormalizer2 instance for Unicode NFD normalization. 1432e5b6d6dSopenharmony_ci * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). 1442e5b6d6dSopenharmony_ci * Returns an unmodifiable singleton instance. Do not delete it. 1452e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 1462e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 1472e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 1482e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 1492e5b6d6dSopenharmony_ci * @return the requested Normalizer2, if successful 1502e5b6d6dSopenharmony_ci * @stable ICU 49 1512e5b6d6dSopenharmony_ci */ 1522e5b6d6dSopenharmony_ciU_CAPI const UNormalizer2 * U_EXPORT2 1532e5b6d6dSopenharmony_ciunorm2_getNFDInstance(UErrorCode *pErrorCode); 1542e5b6d6dSopenharmony_ci 1552e5b6d6dSopenharmony_ci/** 1562e5b6d6dSopenharmony_ci * Returns a UNormalizer2 instance for Unicode NFKC normalization. 1572e5b6d6dSopenharmony_ci * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). 1582e5b6d6dSopenharmony_ci * Returns an unmodifiable singleton instance. Do not delete it. 1592e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 1602e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 1612e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 1622e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 1632e5b6d6dSopenharmony_ci * @return the requested Normalizer2, if successful 1642e5b6d6dSopenharmony_ci * @stable ICU 49 1652e5b6d6dSopenharmony_ci */ 1662e5b6d6dSopenharmony_ciU_CAPI const UNormalizer2 * U_EXPORT2 1672e5b6d6dSopenharmony_ciunorm2_getNFKCInstance(UErrorCode *pErrorCode); 1682e5b6d6dSopenharmony_ci 1692e5b6d6dSopenharmony_ci/** 1702e5b6d6dSopenharmony_ci * Returns a UNormalizer2 instance for Unicode NFKD normalization. 1712e5b6d6dSopenharmony_ci * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). 1722e5b6d6dSopenharmony_ci * Returns an unmodifiable singleton instance. Do not delete it. 1732e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 1742e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 1752e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 1762e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 1772e5b6d6dSopenharmony_ci * @return the requested Normalizer2, if successful 1782e5b6d6dSopenharmony_ci * @stable ICU 49 1792e5b6d6dSopenharmony_ci */ 1802e5b6d6dSopenharmony_ciU_CAPI const UNormalizer2 * U_EXPORT2 1812e5b6d6dSopenharmony_ciunorm2_getNFKDInstance(UErrorCode *pErrorCode); 1822e5b6d6dSopenharmony_ci 1832e5b6d6dSopenharmony_ci/** 1842e5b6d6dSopenharmony_ci * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. 1852e5b6d6dSopenharmony_ci * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). 1862e5b6d6dSopenharmony_ci * Returns an unmodifiable singleton instance. Do not delete it. 1872e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 1882e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 1892e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 1902e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 1912e5b6d6dSopenharmony_ci * @return the requested Normalizer2, if successful 1922e5b6d6dSopenharmony_ci * @stable ICU 49 1932e5b6d6dSopenharmony_ci */ 1942e5b6d6dSopenharmony_ciU_CAPI const UNormalizer2 * U_EXPORT2 1952e5b6d6dSopenharmony_ciunorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); 1962e5b6d6dSopenharmony_ci 1972e5b6d6dSopenharmony_ci/** 1982e5b6d6dSopenharmony_ci * Returns a UNormalizer2 instance which uses the specified data file 1992e5b6d6dSopenharmony_ci * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 2002e5b6d6dSopenharmony_ci * and which composes or decomposes text according to the specified mode. 2012e5b6d6dSopenharmony_ci * Returns an unmodifiable singleton instance. Do not delete it. 2022e5b6d6dSopenharmony_ci * 2032e5b6d6dSopenharmony_ci * Use packageName=NULL for data files that are part of ICU's own data. 2042e5b6d6dSopenharmony_ci * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 2052e5b6d6dSopenharmony_ci * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 2062e5b6d6dSopenharmony_ci * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 2072e5b6d6dSopenharmony_ci * 2082e5b6d6dSopenharmony_ci * @param packageName NULL for ICU built-in data, otherwise application data package name 2092e5b6d6dSopenharmony_ci * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 2102e5b6d6dSopenharmony_ci * @param mode normalization mode (compose or decompose etc.) 2112e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 2122e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 2132e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 2142e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 2152e5b6d6dSopenharmony_ci * @return the requested UNormalizer2, if successful 2162e5b6d6dSopenharmony_ci * @stable ICU 4.4 2172e5b6d6dSopenharmony_ci */ 2182e5b6d6dSopenharmony_ciU_CAPI const UNormalizer2 * U_EXPORT2 2192e5b6d6dSopenharmony_ciunorm2_getInstance(const char *packageName, 2202e5b6d6dSopenharmony_ci const char *name, 2212e5b6d6dSopenharmony_ci UNormalization2Mode mode, 2222e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 2232e5b6d6dSopenharmony_ci 2242e5b6d6dSopenharmony_ci/** 2252e5b6d6dSopenharmony_ci * Constructs a filtered normalizer wrapping any UNormalizer2 instance 2262e5b6d6dSopenharmony_ci * and a filter set. 2272e5b6d6dSopenharmony_ci * Both are aliased and must not be modified or deleted while this object 2282e5b6d6dSopenharmony_ci * is used. 2292e5b6d6dSopenharmony_ci * The filter set should be frozen; otherwise the performance will suffer greatly. 2302e5b6d6dSopenharmony_ci * @param norm2 wrapped UNormalizer2 instance 2312e5b6d6dSopenharmony_ci * @param filterSet USet which determines the characters to be normalized 2322e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 2332e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 2342e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 2352e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 2362e5b6d6dSopenharmony_ci * @return the requested UNormalizer2, if successful 2372e5b6d6dSopenharmony_ci * @stable ICU 4.4 2382e5b6d6dSopenharmony_ci */ 2392e5b6d6dSopenharmony_ciU_CAPI UNormalizer2 * U_EXPORT2 2402e5b6d6dSopenharmony_ciunorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); 2412e5b6d6dSopenharmony_ci 2422e5b6d6dSopenharmony_ci/** 2432e5b6d6dSopenharmony_ci * Closes a UNormalizer2 instance from unorm2_openFiltered(). 2442e5b6d6dSopenharmony_ci * Do not close instances from unorm2_getInstance()! 2452e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance to be closed 2462e5b6d6dSopenharmony_ci * @stable ICU 4.4 2472e5b6d6dSopenharmony_ci */ 2482e5b6d6dSopenharmony_ciU_CAPI void U_EXPORT2 2492e5b6d6dSopenharmony_ciunorm2_close(UNormalizer2 *norm2); 2502e5b6d6dSopenharmony_ci 2512e5b6d6dSopenharmony_ci#if U_SHOW_CPLUSPLUS_API 2522e5b6d6dSopenharmony_ci 2532e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN 2542e5b6d6dSopenharmony_ci 2552e5b6d6dSopenharmony_ci/** 2562e5b6d6dSopenharmony_ci * \class LocalUNormalizer2Pointer 2572e5b6d6dSopenharmony_ci * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). 2582e5b6d6dSopenharmony_ci * For most methods see the LocalPointerBase base class. 2592e5b6d6dSopenharmony_ci * 2602e5b6d6dSopenharmony_ci * @see LocalPointerBase 2612e5b6d6dSopenharmony_ci * @see LocalPointer 2622e5b6d6dSopenharmony_ci * @stable ICU 4.4 2632e5b6d6dSopenharmony_ci */ 2642e5b6d6dSopenharmony_ciU_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); 2652e5b6d6dSopenharmony_ci 2662e5b6d6dSopenharmony_ciU_NAMESPACE_END 2672e5b6d6dSopenharmony_ci 2682e5b6d6dSopenharmony_ci#endif 2692e5b6d6dSopenharmony_ci 2702e5b6d6dSopenharmony_ci/** 2712e5b6d6dSopenharmony_ci * Writes the normalized form of the source string to the destination string 2722e5b6d6dSopenharmony_ci * (replacing its contents) and returns the length of the destination string. 2732e5b6d6dSopenharmony_ci * The source and destination strings must be different buffers. 2742e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 2752e5b6d6dSopenharmony_ci * @param src source string 2762e5b6d6dSopenharmony_ci * @param length length of the source string, or -1 if NUL-terminated 2772e5b6d6dSopenharmony_ci * @param dest destination string; its contents is replaced with normalized src 2782e5b6d6dSopenharmony_ci * @param capacity number of UChars that can be written to dest 2792e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 2802e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 2812e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 2822e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 2832e5b6d6dSopenharmony_ci * @return dest 2842e5b6d6dSopenharmony_ci * @stable ICU 4.4 2852e5b6d6dSopenharmony_ci */ 2862e5b6d6dSopenharmony_ciU_CAPI int32_t U_EXPORT2 2872e5b6d6dSopenharmony_ciunorm2_normalize(const UNormalizer2 *norm2, 2882e5b6d6dSopenharmony_ci const UChar *src, int32_t length, 2892e5b6d6dSopenharmony_ci UChar *dest, int32_t capacity, 2902e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 2912e5b6d6dSopenharmony_ci/** 2922e5b6d6dSopenharmony_ci * Appends the normalized form of the second string to the first string 2932e5b6d6dSopenharmony_ci * (merging them at the boundary) and returns the length of the first string. 2942e5b6d6dSopenharmony_ci * The result is normalized if the first string was normalized. 2952e5b6d6dSopenharmony_ci * The first and second strings must be different buffers. 2962e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 2972e5b6d6dSopenharmony_ci * @param first string, should be normalized 2982e5b6d6dSopenharmony_ci * @param firstLength length of the first string, or -1 if NUL-terminated 2992e5b6d6dSopenharmony_ci * @param firstCapacity number of UChars that can be written to first 3002e5b6d6dSopenharmony_ci * @param second string, will be normalized 3012e5b6d6dSopenharmony_ci * @param secondLength length of the source string, or -1 if NUL-terminated 3022e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 3032e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 3042e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 3052e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 3062e5b6d6dSopenharmony_ci * @return first 3072e5b6d6dSopenharmony_ci * @stable ICU 4.4 3082e5b6d6dSopenharmony_ci */ 3092e5b6d6dSopenharmony_ciU_CAPI int32_t U_EXPORT2 3102e5b6d6dSopenharmony_ciunorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 3112e5b6d6dSopenharmony_ci UChar *first, int32_t firstLength, int32_t firstCapacity, 3122e5b6d6dSopenharmony_ci const UChar *second, int32_t secondLength, 3132e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 3142e5b6d6dSopenharmony_ci/** 3152e5b6d6dSopenharmony_ci * Appends the second string to the first string 3162e5b6d6dSopenharmony_ci * (merging them at the boundary) and returns the length of the first string. 3172e5b6d6dSopenharmony_ci * The result is normalized if both the strings were normalized. 3182e5b6d6dSopenharmony_ci * The first and second strings must be different buffers. 3192e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 3202e5b6d6dSopenharmony_ci * @param first string, should be normalized 3212e5b6d6dSopenharmony_ci * @param firstLength length of the first string, or -1 if NUL-terminated 3222e5b6d6dSopenharmony_ci * @param firstCapacity number of UChars that can be written to first 3232e5b6d6dSopenharmony_ci * @param second string, should be normalized 3242e5b6d6dSopenharmony_ci * @param secondLength length of the source string, or -1 if NUL-terminated 3252e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 3262e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 3272e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 3282e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 3292e5b6d6dSopenharmony_ci * @return first 3302e5b6d6dSopenharmony_ci * @stable ICU 4.4 3312e5b6d6dSopenharmony_ci */ 3322e5b6d6dSopenharmony_ciU_CAPI int32_t U_EXPORT2 3332e5b6d6dSopenharmony_ciunorm2_append(const UNormalizer2 *norm2, 3342e5b6d6dSopenharmony_ci UChar *first, int32_t firstLength, int32_t firstCapacity, 3352e5b6d6dSopenharmony_ci const UChar *second, int32_t secondLength, 3362e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 3372e5b6d6dSopenharmony_ci 3382e5b6d6dSopenharmony_ci/** 3392e5b6d6dSopenharmony_ci * Gets the decomposition mapping of c. 3402e5b6d6dSopenharmony_ci * Roughly equivalent to normalizing the String form of c 3412e5b6d6dSopenharmony_ci * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function 3422e5b6d6dSopenharmony_ci * returns a negative value and does not write a string 3432e5b6d6dSopenharmony_ci * if c does not have a decomposition mapping in this instance's data. 3442e5b6d6dSopenharmony_ci * This function is independent of the mode of the UNormalizer2. 3452e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 3462e5b6d6dSopenharmony_ci * @param c code point 3472e5b6d6dSopenharmony_ci * @param decomposition String buffer which will be set to c's 3482e5b6d6dSopenharmony_ci * decomposition mapping, if there is one. 3492e5b6d6dSopenharmony_ci * @param capacity number of UChars that can be written to decomposition 3502e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 3512e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 3522e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 3532e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 3542e5b6d6dSopenharmony_ci * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value 3552e5b6d6dSopenharmony_ci * @stable ICU 4.6 3562e5b6d6dSopenharmony_ci */ 3572e5b6d6dSopenharmony_ciU_CAPI int32_t U_EXPORT2 3582e5b6d6dSopenharmony_ciunorm2_getDecomposition(const UNormalizer2 *norm2, 3592e5b6d6dSopenharmony_ci UChar32 c, UChar *decomposition, int32_t capacity, 3602e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 3612e5b6d6dSopenharmony_ci 3622e5b6d6dSopenharmony_ci/** 3632e5b6d6dSopenharmony_ci * Gets the raw decomposition mapping of c. 3642e5b6d6dSopenharmony_ci * 3652e5b6d6dSopenharmony_ci * This is similar to the unorm2_getDecomposition() function but returns the 3662e5b6d6dSopenharmony_ci * raw decomposition mapping as specified in UnicodeData.txt or 3672e5b6d6dSopenharmony_ci * (for custom data) in the mapping files processed by the gennorm2 tool. 3682e5b6d6dSopenharmony_ci * By contrast, unorm2_getDecomposition() returns the processed, 3692e5b6d6dSopenharmony_ci * recursively-decomposed version of this mapping. 3702e5b6d6dSopenharmony_ci * 3712e5b6d6dSopenharmony_ci * When used on a standard NFKC Normalizer2 instance, 3722e5b6d6dSopenharmony_ci * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 3732e5b6d6dSopenharmony_ci * 3742e5b6d6dSopenharmony_ci * When used on a standard NFC Normalizer2 instance, 3752e5b6d6dSopenharmony_ci * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 3762e5b6d6dSopenharmony_ci * in this case, the result contains either one or two code points (=1..4 UChars). 3772e5b6d6dSopenharmony_ci * 3782e5b6d6dSopenharmony_ci * This function is independent of the mode of the UNormalizer2. 3792e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 3802e5b6d6dSopenharmony_ci * @param c code point 3812e5b6d6dSopenharmony_ci * @param decomposition String buffer which will be set to c's 3822e5b6d6dSopenharmony_ci * raw decomposition mapping, if there is one. 3832e5b6d6dSopenharmony_ci * @param capacity number of UChars that can be written to decomposition 3842e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 3852e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 3862e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 3872e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 3882e5b6d6dSopenharmony_ci * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value 3892e5b6d6dSopenharmony_ci * @stable ICU 49 3902e5b6d6dSopenharmony_ci */ 3912e5b6d6dSopenharmony_ciU_CAPI int32_t U_EXPORT2 3922e5b6d6dSopenharmony_ciunorm2_getRawDecomposition(const UNormalizer2 *norm2, 3932e5b6d6dSopenharmony_ci UChar32 c, UChar *decomposition, int32_t capacity, 3942e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 3952e5b6d6dSopenharmony_ci 3962e5b6d6dSopenharmony_ci/** 3972e5b6d6dSopenharmony_ci * Performs pairwise composition of a & b and returns the composite if there is one. 3982e5b6d6dSopenharmony_ci * 3992e5b6d6dSopenharmony_ci * Returns a composite code point c only if c has a two-way mapping to a+b. 4002e5b6d6dSopenharmony_ci * In standard Unicode normalization, this means that 4012e5b6d6dSopenharmony_ci * c has a canonical decomposition to a+b 4022e5b6d6dSopenharmony_ci * and c does not have the Full_Composition_Exclusion property. 4032e5b6d6dSopenharmony_ci * 4042e5b6d6dSopenharmony_ci * This function is independent of the mode of the UNormalizer2. 4052e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 4062e5b6d6dSopenharmony_ci * @param a A (normalization starter) code point. 4072e5b6d6dSopenharmony_ci * @param b Another code point. 4082e5b6d6dSopenharmony_ci * @return The non-negative composite code point if there is one; otherwise a negative value. 4092e5b6d6dSopenharmony_ci * @stable ICU 49 4102e5b6d6dSopenharmony_ci */ 4112e5b6d6dSopenharmony_ciU_CAPI UChar32 U_EXPORT2 4122e5b6d6dSopenharmony_ciunorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); 4132e5b6d6dSopenharmony_ci 4142e5b6d6dSopenharmony_ci/** 4152e5b6d6dSopenharmony_ci * Gets the combining class of c. 4162e5b6d6dSopenharmony_ci * The default implementation returns 0 4172e5b6d6dSopenharmony_ci * but all standard implementations return the Unicode Canonical_Combining_Class value. 4182e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 4192e5b6d6dSopenharmony_ci * @param c code point 4202e5b6d6dSopenharmony_ci * @return c's combining class 4212e5b6d6dSopenharmony_ci * @stable ICU 49 4222e5b6d6dSopenharmony_ci */ 4232e5b6d6dSopenharmony_ciU_CAPI uint8_t U_EXPORT2 4242e5b6d6dSopenharmony_ciunorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); 4252e5b6d6dSopenharmony_ci 4262e5b6d6dSopenharmony_ci/** 4272e5b6d6dSopenharmony_ci * Tests if the string is normalized. 4282e5b6d6dSopenharmony_ci * Internally, in cases where the quickCheck() method would return "maybe" 4292e5b6d6dSopenharmony_ci * (which is only possible for the two COMPOSE modes) this method 4302e5b6d6dSopenharmony_ci * resolves to "yes" or "no" to provide a definitive result, 4312e5b6d6dSopenharmony_ci * at the cost of doing more work in those cases. 4322e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 4332e5b6d6dSopenharmony_ci * @param s input string 4342e5b6d6dSopenharmony_ci * @param length length of the string, or -1 if NUL-terminated 4352e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 4362e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 4372e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 4382e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 4392e5b6d6dSopenharmony_ci * @return true if s is normalized 4402e5b6d6dSopenharmony_ci * @stable ICU 4.4 4412e5b6d6dSopenharmony_ci */ 4422e5b6d6dSopenharmony_ciU_CAPI UBool U_EXPORT2 4432e5b6d6dSopenharmony_ciunorm2_isNormalized(const UNormalizer2 *norm2, 4442e5b6d6dSopenharmony_ci const UChar *s, int32_t length, 4452e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 4462e5b6d6dSopenharmony_ci 4472e5b6d6dSopenharmony_ci/** 4482e5b6d6dSopenharmony_ci * Tests if the string is normalized. 4492e5b6d6dSopenharmony_ci * For the two COMPOSE modes, the result could be "maybe" in cases that 4502e5b6d6dSopenharmony_ci * would take a little more work to resolve definitively. 4512e5b6d6dSopenharmony_ci * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 4522e5b6d6dSopenharmony_ci * combination of quick check + normalization, to avoid 4532e5b6d6dSopenharmony_ci * re-checking the "yes" prefix. 4542e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 4552e5b6d6dSopenharmony_ci * @param s input string 4562e5b6d6dSopenharmony_ci * @param length length of the string, or -1 if NUL-terminated 4572e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 4582e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 4592e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 4602e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 4612e5b6d6dSopenharmony_ci * @return UNormalizationCheckResult 4622e5b6d6dSopenharmony_ci * @stable ICU 4.4 4632e5b6d6dSopenharmony_ci */ 4642e5b6d6dSopenharmony_ciU_CAPI UNormalizationCheckResult U_EXPORT2 4652e5b6d6dSopenharmony_ciunorm2_quickCheck(const UNormalizer2 *norm2, 4662e5b6d6dSopenharmony_ci const UChar *s, int32_t length, 4672e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 4682e5b6d6dSopenharmony_ci 4692e5b6d6dSopenharmony_ci/** 4702e5b6d6dSopenharmony_ci * Returns the end of the normalized substring of the input string. 4712e5b6d6dSopenharmony_ci * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 4722e5b6d6dSopenharmony_ci * the substring <code>UnicodeString(s, 0, end)</code> 4732e5b6d6dSopenharmony_ci * will pass the quick check with a "yes" result. 4742e5b6d6dSopenharmony_ci * 4752e5b6d6dSopenharmony_ci * The returned end index is usually one or more characters before the 4762e5b6d6dSopenharmony_ci * "no" or "maybe" character: The end index is at a normalization boundary. 4772e5b6d6dSopenharmony_ci * (See the class documentation for more about normalization boundaries.) 4782e5b6d6dSopenharmony_ci * 4792e5b6d6dSopenharmony_ci * When the goal is a normalized string and most input strings are expected 4802e5b6d6dSopenharmony_ci * to be normalized already, then call this method, 4812e5b6d6dSopenharmony_ci * and if it returns a prefix shorter than the input string, 4822e5b6d6dSopenharmony_ci * copy that prefix and use normalizeSecondAndAppend() for the remainder. 4832e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 4842e5b6d6dSopenharmony_ci * @param s input string 4852e5b6d6dSopenharmony_ci * @param length length of the string, or -1 if NUL-terminated 4862e5b6d6dSopenharmony_ci * @param pErrorCode Standard ICU error code. Its input value must 4872e5b6d6dSopenharmony_ci * pass the U_SUCCESS() test, or else the function returns 4882e5b6d6dSopenharmony_ci * immediately. Check for U_FAILURE() on output or use with 4892e5b6d6dSopenharmony_ci * function chaining. (See User Guide for details.) 4902e5b6d6dSopenharmony_ci * @return "yes" span end index 4912e5b6d6dSopenharmony_ci * @stable ICU 4.4 4922e5b6d6dSopenharmony_ci */ 4932e5b6d6dSopenharmony_ciU_CAPI int32_t U_EXPORT2 4942e5b6d6dSopenharmony_ciunorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 4952e5b6d6dSopenharmony_ci const UChar *s, int32_t length, 4962e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 4972e5b6d6dSopenharmony_ci 4982e5b6d6dSopenharmony_ci/** 4992e5b6d6dSopenharmony_ci * Tests if the character always has a normalization boundary before it, 5002e5b6d6dSopenharmony_ci * regardless of context. 5012e5b6d6dSopenharmony_ci * For details see the Normalizer2 base class documentation. 5022e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 5032e5b6d6dSopenharmony_ci * @param c character to test 5042e5b6d6dSopenharmony_ci * @return true if c has a normalization boundary before it 5052e5b6d6dSopenharmony_ci * @stable ICU 4.4 5062e5b6d6dSopenharmony_ci */ 5072e5b6d6dSopenharmony_ciU_CAPI UBool U_EXPORT2 5082e5b6d6dSopenharmony_ciunorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); 5092e5b6d6dSopenharmony_ci 5102e5b6d6dSopenharmony_ci/** 5112e5b6d6dSopenharmony_ci * Tests if the character always has a normalization boundary after it, 5122e5b6d6dSopenharmony_ci * regardless of context. 5132e5b6d6dSopenharmony_ci * For details see the Normalizer2 base class documentation. 5142e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 5152e5b6d6dSopenharmony_ci * @param c character to test 5162e5b6d6dSopenharmony_ci * @return true if c has a normalization boundary after it 5172e5b6d6dSopenharmony_ci * @stable ICU 4.4 5182e5b6d6dSopenharmony_ci */ 5192e5b6d6dSopenharmony_ciU_CAPI UBool U_EXPORT2 5202e5b6d6dSopenharmony_ciunorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); 5212e5b6d6dSopenharmony_ci 5222e5b6d6dSopenharmony_ci/** 5232e5b6d6dSopenharmony_ci * Tests if the character is normalization-inert. 5242e5b6d6dSopenharmony_ci * For details see the Normalizer2 base class documentation. 5252e5b6d6dSopenharmony_ci * @param norm2 UNormalizer2 instance 5262e5b6d6dSopenharmony_ci * @param c character to test 5272e5b6d6dSopenharmony_ci * @return true if c is normalization-inert 5282e5b6d6dSopenharmony_ci * @stable ICU 4.4 5292e5b6d6dSopenharmony_ci */ 5302e5b6d6dSopenharmony_ciU_CAPI UBool U_EXPORT2 5312e5b6d6dSopenharmony_ciunorm2_isInert(const UNormalizer2 *norm2, UChar32 c); 5322e5b6d6dSopenharmony_ci 5332e5b6d6dSopenharmony_ci/** 5342e5b6d6dSopenharmony_ci * Compares two strings for canonical equivalence. 5352e5b6d6dSopenharmony_ci * Further options include case-insensitive comparison and 5362e5b6d6dSopenharmony_ci * code point order (as opposed to code unit order). 5372e5b6d6dSopenharmony_ci * 5382e5b6d6dSopenharmony_ci * Canonical equivalence between two strings is defined as their normalized 5392e5b6d6dSopenharmony_ci * forms (NFD or NFC) being identical. 5402e5b6d6dSopenharmony_ci * This function compares strings incrementally instead of normalizing 5412e5b6d6dSopenharmony_ci * (and optionally case-folding) both strings entirely, 5422e5b6d6dSopenharmony_ci * improving performance significantly. 5432e5b6d6dSopenharmony_ci * 5442e5b6d6dSopenharmony_ci * Bulk normalization is only necessary if the strings do not fulfill the FCD 5452e5b6d6dSopenharmony_ci * conditions. Only in this case, and only if the strings are relatively long, 5462e5b6d6dSopenharmony_ci * is memory allocated temporarily. 5472e5b6d6dSopenharmony_ci * For FCD strings and short non-FCD strings there is no memory allocation. 5482e5b6d6dSopenharmony_ci * 5492e5b6d6dSopenharmony_ci * Semantically, this is equivalent to 5502e5b6d6dSopenharmony_ci * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) 5512e5b6d6dSopenharmony_ci * where code point order and foldCase are all optional. 5522e5b6d6dSopenharmony_ci * 5532e5b6d6dSopenharmony_ci * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match 5542e5b6d6dSopenharmony_ci * the case folding must be performed first, then the normalization. 5552e5b6d6dSopenharmony_ci * 5562e5b6d6dSopenharmony_ci * @param s1 First source string. 5572e5b6d6dSopenharmony_ci * @param length1 Length of first source string, or -1 if NUL-terminated. 5582e5b6d6dSopenharmony_ci * 5592e5b6d6dSopenharmony_ci * @param s2 Second source string. 5602e5b6d6dSopenharmony_ci * @param length2 Length of second source string, or -1 if NUL-terminated. 5612e5b6d6dSopenharmony_ci * 5622e5b6d6dSopenharmony_ci * @param options A bit set of options: 5632e5b6d6dSopenharmony_ci * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 5642e5b6d6dSopenharmony_ci * Case-sensitive comparison in code unit order, and the input strings 5652e5b6d6dSopenharmony_ci * are quick-checked for FCD. 5662e5b6d6dSopenharmony_ci * 5672e5b6d6dSopenharmony_ci * - UNORM_INPUT_IS_FCD 5682e5b6d6dSopenharmony_ci * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. 5692e5b6d6dSopenharmony_ci * If not set, the function will quickCheck for FCD 5702e5b6d6dSopenharmony_ci * and normalize if necessary. 5712e5b6d6dSopenharmony_ci * 5722e5b6d6dSopenharmony_ci * - U_COMPARE_CODE_POINT_ORDER 5732e5b6d6dSopenharmony_ci * Set to choose code point order instead of code unit order 5742e5b6d6dSopenharmony_ci * (see u_strCompare for details). 5752e5b6d6dSopenharmony_ci * 5762e5b6d6dSopenharmony_ci * - U_COMPARE_IGNORE_CASE 5772e5b6d6dSopenharmony_ci * Set to compare strings case-insensitively using case folding, 5782e5b6d6dSopenharmony_ci * instead of case-sensitively. 5792e5b6d6dSopenharmony_ci * If set, then the following case folding options are used. 5802e5b6d6dSopenharmony_ci * 5812e5b6d6dSopenharmony_ci * - Options as used with case-insensitive comparisons, currently: 5822e5b6d6dSopenharmony_ci * 5832e5b6d6dSopenharmony_ci * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 5842e5b6d6dSopenharmony_ci * (see u_strCaseCompare for details) 5852e5b6d6dSopenharmony_ci * 5862e5b6d6dSopenharmony_ci * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT 5872e5b6d6dSopenharmony_ci * 5882e5b6d6dSopenharmony_ci * @param pErrorCode ICU error code in/out parameter. 5892e5b6d6dSopenharmony_ci * Must fulfill U_SUCCESS before the function call. 5902e5b6d6dSopenharmony_ci * @return <0 or 0 or >0 as usual for string comparisons 5912e5b6d6dSopenharmony_ci * 5922e5b6d6dSopenharmony_ci * @see unorm_normalize 5932e5b6d6dSopenharmony_ci * @see UNORM_FCD 5942e5b6d6dSopenharmony_ci * @see u_strCompare 5952e5b6d6dSopenharmony_ci * @see u_strCaseCompare 5962e5b6d6dSopenharmony_ci * 5972e5b6d6dSopenharmony_ci * @stable ICU 2.2 5982e5b6d6dSopenharmony_ci */ 5992e5b6d6dSopenharmony_ciU_CAPI int32_t U_EXPORT2 6002e5b6d6dSopenharmony_ciunorm_compare(const UChar *s1, int32_t length1, 6012e5b6d6dSopenharmony_ci const UChar *s2, int32_t length2, 6022e5b6d6dSopenharmony_ci uint32_t options, 6032e5b6d6dSopenharmony_ci UErrorCode *pErrorCode); 6042e5b6d6dSopenharmony_ci 6052e5b6d6dSopenharmony_ci#endif /* !UCONFIG_NO_NORMALIZATION */ 6062e5b6d6dSopenharmony_ci#endif /* __UNORM2_H__ */ 607