12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 32e5b6d6dSopenharmony_ci/* 42e5b6d6dSopenharmony_ci ******************************************************************************* 52e5b6d6dSopenharmony_ci * 62e5b6d6dSopenharmony_ci * Copyright (C) 2003-2011, International Business Machines 72e5b6d6dSopenharmony_ci * Corporation and others. All Rights Reserved. 82e5b6d6dSopenharmony_ci * 92e5b6d6dSopenharmony_ci ******************************************************************************* 102e5b6d6dSopenharmony_ci * file name: idnaref.h 112e5b6d6dSopenharmony_ci * encoding: UTF-8 122e5b6d6dSopenharmony_ci * tab size: 8 (not used) 132e5b6d6dSopenharmony_ci * indentation:4 142e5b6d6dSopenharmony_ci * 152e5b6d6dSopenharmony_ci * created on: 2003feb1 162e5b6d6dSopenharmony_ci * created by: Ram Viswanadha 172e5b6d6dSopenharmony_ci */ 182e5b6d6dSopenharmony_ci 192e5b6d6dSopenharmony_ci#ifndef __IDNAREF_H__ 202e5b6d6dSopenharmony_ci#define __IDNAREF_H__ 212e5b6d6dSopenharmony_ci 222e5b6d6dSopenharmony_ci#include "unicode/utypes.h" 232e5b6d6dSopenharmony_ci 242e5b6d6dSopenharmony_ci#if !UCONFIG_NO_IDNA 252e5b6d6dSopenharmony_ci 262e5b6d6dSopenharmony_ci#include "unicode/parseerr.h" 272e5b6d6dSopenharmony_ci 282e5b6d6dSopenharmony_ci#define IDNAREF_DEFAULT 0x0000 292e5b6d6dSopenharmony_ci#define IDNAREF_ALLOW_UNASSIGNED 0x0001 302e5b6d6dSopenharmony_ci#define IDNAREF_USE_STD3_RULES 0x0002 312e5b6d6dSopenharmony_ci 322e5b6d6dSopenharmony_ci/** 332e5b6d6dSopenharmony_ci * This function implements the ToASCII operation as defined in the IDNA draft. 342e5b6d6dSopenharmony_ci * This operation is done on <b>single labels</b> before sending it to something that expects 352e5b6d6dSopenharmony_ci * ASCII names. A label is an individual part of a domain name. Labels are usually 362e5b6d6dSopenharmony_ci * separated by dots; for e.g." "www.example.com" is composed of 3 labels 372e5b6d6dSopenharmony_ci * "www","example", and "com". 382e5b6d6dSopenharmony_ci * 392e5b6d6dSopenharmony_ci * 402e5b6d6dSopenharmony_ci * @param src Input Unicode label. 412e5b6d6dSopenharmony_ci * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 422e5b6d6dSopenharmony_ci * @param dest Output Unicode array with ACE encoded ASCII label. 432e5b6d6dSopenharmony_ci * @param destCapacity Size of dest. 442e5b6d6dSopenharmony_ci * @param options A bit set of options: 452e5b6d6dSopenharmony_ci * 462e5b6d6dSopenharmony_ci * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 472e5b6d6dSopenharmony_ci * If true unassigned values are treated as normal Unicode code points. 482e5b6d6dSopenharmony_ci * If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 492e5b6d6dSopenharmony_ci * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 502e5b6d6dSopenharmony_ci * If true and the input does not satisfy STD3 rules, the operation 512e5b6d6dSopenharmony_ci * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 522e5b6d6dSopenharmony_ci * 532e5b6d6dSopenharmony_ci * @param parseError Pointer to UParseError struct to receive information on position 542e5b6d6dSopenharmony_ci * of error if an error is encountered. Can be NULL. 552e5b6d6dSopenharmony_ci * @param status ICU in/out error code parameter. 562e5b6d6dSopenharmony_ci * U_INVALID_CHAR_FOUND if src contains 572e5b6d6dSopenharmony_ci * unmatched single surrogates. 582e5b6d6dSopenharmony_ci * U_INDEX_OUTOFBOUNDS_ERROR if src contains 592e5b6d6dSopenharmony_ci * too many code points. 602e5b6d6dSopenharmony_ci * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 612e5b6d6dSopenharmony_ci * @return Number of ASCII characters converted. 622e5b6d6dSopenharmony_ci */ 632e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2 642e5b6d6dSopenharmony_ciidnaref_toASCII(const UChar* src, int32_t srcLength, 652e5b6d6dSopenharmony_ci UChar* dest, int32_t destCapacity, 662e5b6d6dSopenharmony_ci int32_t options, 672e5b6d6dSopenharmony_ci UParseError* parseError, 682e5b6d6dSopenharmony_ci UErrorCode* status); 692e5b6d6dSopenharmony_ci 702e5b6d6dSopenharmony_ci 712e5b6d6dSopenharmony_ci/** 722e5b6d6dSopenharmony_ci * This function implements the ToUnicode operation as defined in the IDNA draft. 732e5b6d6dSopenharmony_ci * This operation is done on <b>single labels</b> before sending it to something that expects 742e5b6d6dSopenharmony_ci * ASCII names. A label is an individual part of a domain name. Labels are usually 752e5b6d6dSopenharmony_ci * separated by dots; for e.g." "www.example.com" is composed of 3 labels 762e5b6d6dSopenharmony_ci * "www","example", and "com". 772e5b6d6dSopenharmony_ci * 782e5b6d6dSopenharmony_ci * @param src Input ASCII (ACE encoded) label. 792e5b6d6dSopenharmony_ci * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 802e5b6d6dSopenharmony_ci * @param dest Output Converted Unicode array. 812e5b6d6dSopenharmony_ci * @param destCapacity Size of dest. 822e5b6d6dSopenharmony_ci * @param options A bit set of options: 832e5b6d6dSopenharmony_ci * 842e5b6d6dSopenharmony_ci * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 852e5b6d6dSopenharmony_ci * If true unassigned values are treated as normal Unicode code points. 862e5b6d6dSopenharmony_ci * If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 872e5b6d6dSopenharmony_ci * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 882e5b6d6dSopenharmony_ci * If true and the input does not satisfy STD3 rules, the operation 892e5b6d6dSopenharmony_ci * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 902e5b6d6dSopenharmony_ci * 912e5b6d6dSopenharmony_ci * @param parseError Pointer to UParseError struct to receive information on position 922e5b6d6dSopenharmony_ci * of error if an error is encountered. Can be NULL. 932e5b6d6dSopenharmony_ci * @param status ICU in/out error code parameter. 942e5b6d6dSopenharmony_ci * U_INVALID_CHAR_FOUND if src contains 952e5b6d6dSopenharmony_ci * unmatched single surrogates. 962e5b6d6dSopenharmony_ci * U_INDEX_OUTOFBOUNDS_ERROR if src contains 972e5b6d6dSopenharmony_ci * too many code points. 982e5b6d6dSopenharmony_ci * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 992e5b6d6dSopenharmony_ci * @return Number of Unicode characters converted. 1002e5b6d6dSopenharmony_ci */ 1012e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2 1022e5b6d6dSopenharmony_ciidnaref_toUnicode(const UChar* src, int32_t srcLength, 1032e5b6d6dSopenharmony_ci UChar* dest, int32_t destCapacity, 1042e5b6d6dSopenharmony_ci int32_t options, 1052e5b6d6dSopenharmony_ci UParseError* parseError, 1062e5b6d6dSopenharmony_ci UErrorCode* status); 1072e5b6d6dSopenharmony_ci 1082e5b6d6dSopenharmony_ci 1092e5b6d6dSopenharmony_ci/** 1102e5b6d6dSopenharmony_ci * Convenience function that implements the IDNToASCII operation as defined in the IDNA draft. 1112e5b6d6dSopenharmony_ci * This operation is done on complete domain names, e.g: "www.example.com". 1122e5b6d6dSopenharmony_ci * It is important to note that this operation can fail. If it fails, then the input 1132e5b6d6dSopenharmony_ci * domain name cannot be used as an Internationalized Domain Name and the application 1142e5b6d6dSopenharmony_ci * should have methods defined to deal with the failure. 1152e5b6d6dSopenharmony_ci * 1162e5b6d6dSopenharmony_ci * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name 1172e5b6d6dSopenharmony_ci * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 1182e5b6d6dSopenharmony_ci * and then convert. This function does not offer that level of granularity. The options once 1192e5b6d6dSopenharmony_ci * set will apply to all labels in the domain name 1202e5b6d6dSopenharmony_ci * 1212e5b6d6dSopenharmony_ci * @param src Input ASCII IDN. 1222e5b6d6dSopenharmony_ci * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 1232e5b6d6dSopenharmony_ci * @param dest Output Unicode array. 1242e5b6d6dSopenharmony_ci * @param destCapacity Size of dest. 1252e5b6d6dSopenharmony_ci * @param options A bit set of options: 1262e5b6d6dSopenharmony_ci * 1272e5b6d6dSopenharmony_ci * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 1282e5b6d6dSopenharmony_ci * If true unassigned values are treated as normal Unicode code points. 1292e5b6d6dSopenharmony_ci * If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 1302e5b6d6dSopenharmony_ci * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 1312e5b6d6dSopenharmony_ci * If true and the input does not satisfy STD3 rules, the operation 1322e5b6d6dSopenharmony_ci * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 1332e5b6d6dSopenharmony_ci * 1342e5b6d6dSopenharmony_ci * @param parseError Pointer to UParseError struct to receive information on position 1352e5b6d6dSopenharmony_ci * of error if an error is encountered. Can be NULL. 1362e5b6d6dSopenharmony_ci * @param status ICU in/out error code parameter. 1372e5b6d6dSopenharmony_ci * U_INVALID_CHAR_FOUND if src contains 1382e5b6d6dSopenharmony_ci * unmatched single surrogates. 1392e5b6d6dSopenharmony_ci * U_INDEX_OUTOFBOUNDS_ERROR if src contains 1402e5b6d6dSopenharmony_ci * too many code points. 1412e5b6d6dSopenharmony_ci * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 1422e5b6d6dSopenharmony_ci * @return Number of ASCII characters converted. 1432e5b6d6dSopenharmony_ci */ 1442e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2 1452e5b6d6dSopenharmony_ciidnaref_IDNToASCII( const UChar* src, int32_t srcLength, 1462e5b6d6dSopenharmony_ci UChar* dest, int32_t destCapacity, 1472e5b6d6dSopenharmony_ci int32_t options, 1482e5b6d6dSopenharmony_ci UParseError* parseError, 1492e5b6d6dSopenharmony_ci UErrorCode* status); 1502e5b6d6dSopenharmony_ci 1512e5b6d6dSopenharmony_ci/** 1522e5b6d6dSopenharmony_ci * Convenience function that implements the IDNToUnicode operation as defined in the IDNA draft. 1532e5b6d6dSopenharmony_ci * This operation is done on complete domain names, e.g: "www.example.com". 1542e5b6d6dSopenharmony_ci * 1552e5b6d6dSopenharmony_ci * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name 1562e5b6d6dSopenharmony_ci * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 1572e5b6d6dSopenharmony_ci * and then convert. This function does not offer that level of granularity. The options once 1582e5b6d6dSopenharmony_ci * set will apply to all labels in the domain name 1592e5b6d6dSopenharmony_ci * 1602e5b6d6dSopenharmony_ci * @param src Input Unicode IDN. 1612e5b6d6dSopenharmony_ci * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 1622e5b6d6dSopenharmony_ci * @param dest Output ASCII array. 1632e5b6d6dSopenharmony_ci * @param destCapacity Size of dest. 1642e5b6d6dSopenharmony_ci * @param options A bit set of options: 1652e5b6d6dSopenharmony_ci * 1662e5b6d6dSopenharmony_ci * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 1672e5b6d6dSopenharmony_ci * If true unassigned values are treated as normal Unicode code points. 1682e5b6d6dSopenharmony_ci * If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 1692e5b6d6dSopenharmony_ci * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 1702e5b6d6dSopenharmony_ci * If true and the input does not satisfy STD3 rules, the operation 1712e5b6d6dSopenharmony_ci * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 1722e5b6d6dSopenharmony_ci * 1732e5b6d6dSopenharmony_ci * @param parseError Pointer to UParseError struct to receive information on position 1742e5b6d6dSopenharmony_ci * of error if an error is encountered. Can be NULL. 1752e5b6d6dSopenharmony_ci * @param status ICU in/out error code parameter. 1762e5b6d6dSopenharmony_ci * U_INVALID_CHAR_FOUND if src contains 1772e5b6d6dSopenharmony_ci * unmatched single surrogates. 1782e5b6d6dSopenharmony_ci * U_INDEX_OUTOFBOUNDS_ERROR if src contains 1792e5b6d6dSopenharmony_ci * too many code points. 1802e5b6d6dSopenharmony_ci * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 1812e5b6d6dSopenharmony_ci * @return Number of ASCII characters converted. 1822e5b6d6dSopenharmony_ci */ 1832e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2 1842e5b6d6dSopenharmony_ciidnaref_IDNToUnicode( const UChar* src, int32_t srcLength, 1852e5b6d6dSopenharmony_ci UChar* dest, int32_t destCapacity, 1862e5b6d6dSopenharmony_ci int32_t options, 1872e5b6d6dSopenharmony_ci UParseError* parseError, 1882e5b6d6dSopenharmony_ci UErrorCode* status); 1892e5b6d6dSopenharmony_ci 1902e5b6d6dSopenharmony_ci/** 1912e5b6d6dSopenharmony_ci * Compare two strings for IDNs for equivalence. 1922e5b6d6dSopenharmony_ci * This function splits the domain names into labels and compares them. 1932e5b6d6dSopenharmony_ci * According to IDN draft, whenever two labels are compared, they are 1942e5b6d6dSopenharmony_ci * considered equal if and only if their ASCII forms (obtained by 1952e5b6d6dSopenharmony_ci * applying toASCII) match using an case-insensitive ASCII comparison. 1962e5b6d6dSopenharmony_ci * Two domain names are considered a match if and only if all labels 1972e5b6d6dSopenharmony_ci * match regardless of whether label separators match. 1982e5b6d6dSopenharmony_ci * 1992e5b6d6dSopenharmony_ci * @param s1 First source string. 2002e5b6d6dSopenharmony_ci * @param length1 Length of first source string, or -1 if NUL-terminated. 2012e5b6d6dSopenharmony_ci * 2022e5b6d6dSopenharmony_ci * @param s2 Second source string. 2032e5b6d6dSopenharmony_ci * @param length2 Length of second source string, or -1 if NUL-terminated. 2042e5b6d6dSopenharmony_ci * @param options A bit set of options: 2052e5b6d6dSopenharmony_ci * 2062e5b6d6dSopenharmony_ci * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 2072e5b6d6dSopenharmony_ci * If true unassigned values are treated as normal Unicode code points. 2082e5b6d6dSopenharmony_ci * If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 2092e5b6d6dSopenharmony_ci * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 2102e5b6d6dSopenharmony_ci * If true and the input does not satisfy STD3 rules, the operation 2112e5b6d6dSopenharmony_ci * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 2122e5b6d6dSopenharmony_ci * 2132e5b6d6dSopenharmony_ci * @param status ICU error code in/out parameter. 2142e5b6d6dSopenharmony_ci * Must fulfill U_SUCCESS before the function call. 2152e5b6d6dSopenharmony_ci * @return <0 or 0 or >0 as usual for string comparisons 2162e5b6d6dSopenharmony_ci */ 2172e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2 2182e5b6d6dSopenharmony_ciidnaref_compare( const UChar *s1, int32_t length1, 2192e5b6d6dSopenharmony_ci const UChar *s2, int32_t length2, 2202e5b6d6dSopenharmony_ci int32_t options, 2212e5b6d6dSopenharmony_ci UErrorCode* status); 2222e5b6d6dSopenharmony_ci 2232e5b6d6dSopenharmony_ci#endif /* #if !UCONFIG_NO_IDNA */ 2242e5b6d6dSopenharmony_ci 2252e5b6d6dSopenharmony_ci#endif 226