12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci/*
42e5b6d6dSopenharmony_ci *******************************************************************************
52e5b6d6dSopenharmony_ci *
62e5b6d6dSopenharmony_ci *   Copyright (C) 2003-2011, International Business Machines
72e5b6d6dSopenharmony_ci *   Corporation and others.  All Rights Reserved.
82e5b6d6dSopenharmony_ci *
92e5b6d6dSopenharmony_ci *******************************************************************************
102e5b6d6dSopenharmony_ci *   file name:  idnaref.h
112e5b6d6dSopenharmony_ci *   encoding:   UTF-8
122e5b6d6dSopenharmony_ci *   tab size:   8 (not used)
132e5b6d6dSopenharmony_ci *   indentation:4
142e5b6d6dSopenharmony_ci *
152e5b6d6dSopenharmony_ci *   created on: 2003feb1
162e5b6d6dSopenharmony_ci *   created by: Ram Viswanadha
172e5b6d6dSopenharmony_ci */
182e5b6d6dSopenharmony_ci
192e5b6d6dSopenharmony_ci#ifndef __IDNAREF_H__
202e5b6d6dSopenharmony_ci#define __IDNAREF_H__
212e5b6d6dSopenharmony_ci
222e5b6d6dSopenharmony_ci#include "unicode/utypes.h"
232e5b6d6dSopenharmony_ci
242e5b6d6dSopenharmony_ci#if !UCONFIG_NO_IDNA
252e5b6d6dSopenharmony_ci
262e5b6d6dSopenharmony_ci#include "unicode/parseerr.h"
272e5b6d6dSopenharmony_ci
282e5b6d6dSopenharmony_ci#define IDNAREF_DEFAULT          0x0000
292e5b6d6dSopenharmony_ci#define IDNAREF_ALLOW_UNASSIGNED 0x0001
302e5b6d6dSopenharmony_ci#define IDNAREF_USE_STD3_RULES   0x0002
312e5b6d6dSopenharmony_ci
322e5b6d6dSopenharmony_ci/**
332e5b6d6dSopenharmony_ci * This function implements the ToASCII operation as defined in the IDNA draft.
342e5b6d6dSopenharmony_ci * This operation is done on <b>single labels</b> before sending it to something that expects
352e5b6d6dSopenharmony_ci * ASCII names. A label is an individual part of a domain name. Labels are usually
362e5b6d6dSopenharmony_ci * separated by dots; for e.g." "www.example.com" is composed of 3 labels
372e5b6d6dSopenharmony_ci * "www","example", and "com".
382e5b6d6dSopenharmony_ci *
392e5b6d6dSopenharmony_ci *
402e5b6d6dSopenharmony_ci * @param src               Input Unicode label.
412e5b6d6dSopenharmony_ci * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
422e5b6d6dSopenharmony_ci * @param dest              Output Unicode array with ACE encoded ASCII label.
432e5b6d6dSopenharmony_ci * @param destCapacity      Size of dest.
442e5b6d6dSopenharmony_ci * @param options           A bit set of options:
452e5b6d6dSopenharmony_ci *
462e5b6d6dSopenharmony_ci *  - idnaref_UNASSIGNED        Unassigned values can be converted to ASCII for query operations
472e5b6d6dSopenharmony_ci *                          If true unassigned values are treated as normal Unicode code points.
482e5b6d6dSopenharmony_ci *                          If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
492e5b6d6dSopenharmony_ci *  - idnaref_USE_STD3_RULES    Use STD3 ASCII rules for host name syntax restrictions
502e5b6d6dSopenharmony_ci *                          If true and the input does not satisfy STD3 rules, the operation
512e5b6d6dSopenharmony_ci *                          will fail with U_IDNA_STD3_ASCII_RULES_ERROR
522e5b6d6dSopenharmony_ci *
532e5b6d6dSopenharmony_ci * @param parseError        Pointer to UParseError struct to receive information on position
542e5b6d6dSopenharmony_ci *                          of error if an error is encountered. Can be NULL.
552e5b6d6dSopenharmony_ci * @param status            ICU in/out error code parameter.
562e5b6d6dSopenharmony_ci *                          U_INVALID_CHAR_FOUND if src contains
572e5b6d6dSopenharmony_ci *                          unmatched single surrogates.
582e5b6d6dSopenharmony_ci *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
592e5b6d6dSopenharmony_ci *                          too many code points.
602e5b6d6dSopenharmony_ci *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
612e5b6d6dSopenharmony_ci * @return                  Number of ASCII characters converted.
622e5b6d6dSopenharmony_ci */
632e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2
642e5b6d6dSopenharmony_ciidnaref_toASCII(const UChar* src, int32_t srcLength,
652e5b6d6dSopenharmony_ci              UChar* dest, int32_t destCapacity,
662e5b6d6dSopenharmony_ci              int32_t options,
672e5b6d6dSopenharmony_ci              UParseError* parseError,
682e5b6d6dSopenharmony_ci              UErrorCode* status);
692e5b6d6dSopenharmony_ci
702e5b6d6dSopenharmony_ci
712e5b6d6dSopenharmony_ci/**
722e5b6d6dSopenharmony_ci * This function implements the ToUnicode operation as defined in the IDNA draft.
732e5b6d6dSopenharmony_ci * This operation is done on <b>single labels</b> before sending it to something that expects
742e5b6d6dSopenharmony_ci * ASCII names. A label is an individual part of a domain name. Labels are usually
752e5b6d6dSopenharmony_ci * separated by dots; for e.g." "www.example.com" is composed of 3 labels
762e5b6d6dSopenharmony_ci * "www","example", and "com".
772e5b6d6dSopenharmony_ci *
782e5b6d6dSopenharmony_ci * @param src               Input ASCII (ACE encoded) label.
792e5b6d6dSopenharmony_ci * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
802e5b6d6dSopenharmony_ci * @param dest Output       Converted Unicode array.
812e5b6d6dSopenharmony_ci * @param destCapacity      Size of dest.
822e5b6d6dSopenharmony_ci * @param options           A bit set of options:
832e5b6d6dSopenharmony_ci *
842e5b6d6dSopenharmony_ci *  - idnaref_UNASSIGNED        Unassigned values can be converted to ASCII for query operations
852e5b6d6dSopenharmony_ci *                          If true unassigned values are treated as normal Unicode code points.
862e5b6d6dSopenharmony_ci *                          If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
872e5b6d6dSopenharmony_ci *  - idnaref_USE_STD3_RULES    Use STD3 ASCII rules for host name syntax restrictions
882e5b6d6dSopenharmony_ci *                          If true and the input does not satisfy STD3 rules, the operation
892e5b6d6dSopenharmony_ci *                          will fail with U_IDNA_STD3_ASCII_RULES_ERROR
902e5b6d6dSopenharmony_ci *
912e5b6d6dSopenharmony_ci * @param parseError        Pointer to UParseError struct to receive information on position
922e5b6d6dSopenharmony_ci *                          of error if an error is encountered. Can be NULL.
932e5b6d6dSopenharmony_ci * @param status            ICU in/out error code parameter.
942e5b6d6dSopenharmony_ci *                          U_INVALID_CHAR_FOUND if src contains
952e5b6d6dSopenharmony_ci *                          unmatched single surrogates.
962e5b6d6dSopenharmony_ci *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
972e5b6d6dSopenharmony_ci *                          too many code points.
982e5b6d6dSopenharmony_ci *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
992e5b6d6dSopenharmony_ci * @return                  Number of Unicode characters converted.
1002e5b6d6dSopenharmony_ci */
1012e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2
1022e5b6d6dSopenharmony_ciidnaref_toUnicode(const UChar* src, int32_t srcLength,
1032e5b6d6dSopenharmony_ci                UChar* dest, int32_t destCapacity,
1042e5b6d6dSopenharmony_ci                int32_t options,
1052e5b6d6dSopenharmony_ci                UParseError* parseError,
1062e5b6d6dSopenharmony_ci                UErrorCode* status);
1072e5b6d6dSopenharmony_ci
1082e5b6d6dSopenharmony_ci
1092e5b6d6dSopenharmony_ci/**
1102e5b6d6dSopenharmony_ci * Convenience function that implements the IDNToASCII operation as defined in the IDNA draft.
1112e5b6d6dSopenharmony_ci * This operation is done on complete domain names, e.g: "www.example.com".
1122e5b6d6dSopenharmony_ci * It is important to note that this operation can fail. If it fails, then the input
1132e5b6d6dSopenharmony_ci * domain name cannot be used as an Internationalized Domain Name and the application
1142e5b6d6dSopenharmony_ci * should have methods defined to deal with the failure.
1152e5b6d6dSopenharmony_ci *
1162e5b6d6dSopenharmony_ci * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name
1172e5b6d6dSopenharmony_ci * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
1182e5b6d6dSopenharmony_ci * and then convert. This function does not offer that level of granularity. The options once
1192e5b6d6dSopenharmony_ci * set will apply to all labels in the domain name
1202e5b6d6dSopenharmony_ci *
1212e5b6d6dSopenharmony_ci * @param src               Input ASCII IDN.
1222e5b6d6dSopenharmony_ci * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
1232e5b6d6dSopenharmony_ci * @param dest Output       Unicode array.
1242e5b6d6dSopenharmony_ci * @param destCapacity      Size of dest.
1252e5b6d6dSopenharmony_ci * @param options           A bit set of options:
1262e5b6d6dSopenharmony_ci *
1272e5b6d6dSopenharmony_ci *  - idnaref_UNASSIGNED        Unassigned values can be converted to ASCII for query operations
1282e5b6d6dSopenharmony_ci *                          If true unassigned values are treated as normal Unicode code points.
1292e5b6d6dSopenharmony_ci *                          If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
1302e5b6d6dSopenharmony_ci *  - idnaref_USE_STD3_RULES    Use STD3 ASCII rules for host name syntax restrictions
1312e5b6d6dSopenharmony_ci *                          If true and the input does not satisfy STD3 rules, the operation
1322e5b6d6dSopenharmony_ci *                          will fail with U_IDNA_STD3_ASCII_RULES_ERROR
1332e5b6d6dSopenharmony_ci *
1342e5b6d6dSopenharmony_ci * @param parseError        Pointer to UParseError struct to receive information on position
1352e5b6d6dSopenharmony_ci *                          of error if an error is encountered. Can be NULL.
1362e5b6d6dSopenharmony_ci * @param status            ICU in/out error code parameter.
1372e5b6d6dSopenharmony_ci *                          U_INVALID_CHAR_FOUND if src contains
1382e5b6d6dSopenharmony_ci *                          unmatched single surrogates.
1392e5b6d6dSopenharmony_ci *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
1402e5b6d6dSopenharmony_ci *                          too many code points.
1412e5b6d6dSopenharmony_ci *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
1422e5b6d6dSopenharmony_ci * @return                  Number of ASCII characters converted.
1432e5b6d6dSopenharmony_ci */
1442e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2
1452e5b6d6dSopenharmony_ciidnaref_IDNToASCII(  const UChar* src, int32_t srcLength,
1462e5b6d6dSopenharmony_ci                   UChar* dest, int32_t destCapacity,
1472e5b6d6dSopenharmony_ci                   int32_t options,
1482e5b6d6dSopenharmony_ci                   UParseError* parseError,
1492e5b6d6dSopenharmony_ci                   UErrorCode* status);
1502e5b6d6dSopenharmony_ci
1512e5b6d6dSopenharmony_ci/**
1522e5b6d6dSopenharmony_ci * Convenience function that implements the IDNToUnicode operation as defined in the IDNA draft.
1532e5b6d6dSopenharmony_ci * This operation is done on complete domain names, e.g: "www.example.com".
1542e5b6d6dSopenharmony_ci *
1552e5b6d6dSopenharmony_ci * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name
1562e5b6d6dSopenharmony_ci * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
1572e5b6d6dSopenharmony_ci * and then convert. This function does not offer that level of granularity. The options once
1582e5b6d6dSopenharmony_ci * set will apply to all labels in the domain name
1592e5b6d6dSopenharmony_ci *
1602e5b6d6dSopenharmony_ci * @param src               Input Unicode IDN.
1612e5b6d6dSopenharmony_ci * @param srcLength         Number of UChars in src, or -1 if NUL-terminated.
1622e5b6d6dSopenharmony_ci * @param dest Output       ASCII array.
1632e5b6d6dSopenharmony_ci * @param destCapacity      Size of dest.
1642e5b6d6dSopenharmony_ci * @param options           A bit set of options:
1652e5b6d6dSopenharmony_ci *
1662e5b6d6dSopenharmony_ci *  - idnaref_UNASSIGNED        Unassigned values can be converted to ASCII for query operations
1672e5b6d6dSopenharmony_ci *                          If true unassigned values are treated as normal Unicode code points.
1682e5b6d6dSopenharmony_ci *                          If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
1692e5b6d6dSopenharmony_ci *  - idnaref_USE_STD3_RULES    Use STD3 ASCII rules for host name syntax restrictions
1702e5b6d6dSopenharmony_ci *                          If true and the input does not satisfy STD3 rules, the operation
1712e5b6d6dSopenharmony_ci *                          will fail with U_IDNA_STD3_ASCII_RULES_ERROR
1722e5b6d6dSopenharmony_ci *
1732e5b6d6dSopenharmony_ci * @param parseError        Pointer to UParseError struct to receive information on position
1742e5b6d6dSopenharmony_ci *                          of error if an error is encountered. Can be NULL.
1752e5b6d6dSopenharmony_ci * @param status            ICU in/out error code parameter.
1762e5b6d6dSopenharmony_ci *                          U_INVALID_CHAR_FOUND if src contains
1772e5b6d6dSopenharmony_ci *                          unmatched single surrogates.
1782e5b6d6dSopenharmony_ci *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains
1792e5b6d6dSopenharmony_ci *                          too many code points.
1802e5b6d6dSopenharmony_ci *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
1812e5b6d6dSopenharmony_ci * @return                  Number of ASCII characters converted.
1822e5b6d6dSopenharmony_ci */
1832e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2
1842e5b6d6dSopenharmony_ciidnaref_IDNToUnicode(  const UChar* src, int32_t srcLength,
1852e5b6d6dSopenharmony_ci                     UChar* dest, int32_t destCapacity,
1862e5b6d6dSopenharmony_ci                     int32_t options,
1872e5b6d6dSopenharmony_ci                     UParseError* parseError,
1882e5b6d6dSopenharmony_ci                     UErrorCode* status);
1892e5b6d6dSopenharmony_ci
1902e5b6d6dSopenharmony_ci/**
1912e5b6d6dSopenharmony_ci * Compare two strings for IDNs for equivalence.
1922e5b6d6dSopenharmony_ci * This function splits the domain names into labels and compares them.
1932e5b6d6dSopenharmony_ci * According to IDN draft, whenever two labels are compared, they are
1942e5b6d6dSopenharmony_ci * considered equal if and only if their ASCII forms (obtained by
1952e5b6d6dSopenharmony_ci * applying toASCII) match using an case-insensitive ASCII comparison.
1962e5b6d6dSopenharmony_ci * Two domain names are considered a match if and only if all labels
1972e5b6d6dSopenharmony_ci * match regardless of whether label separators match.
1982e5b6d6dSopenharmony_ci *
1992e5b6d6dSopenharmony_ci * @param s1                First source string.
2002e5b6d6dSopenharmony_ci * @param length1           Length of first source string, or -1 if NUL-terminated.
2012e5b6d6dSopenharmony_ci *
2022e5b6d6dSopenharmony_ci * @param s2                Second source string.
2032e5b6d6dSopenharmony_ci * @param length2           Length of second source string, or -1 if NUL-terminated.
2042e5b6d6dSopenharmony_ci * @param options           A bit set of options:
2052e5b6d6dSopenharmony_ci *
2062e5b6d6dSopenharmony_ci *  - idnaref_UNASSIGNED        Unassigned values can be converted to ASCII for query operations
2072e5b6d6dSopenharmony_ci *                          If true unassigned values are treated as normal Unicode code points.
2082e5b6d6dSopenharmony_ci *                          If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
2092e5b6d6dSopenharmony_ci *  - idnaref_USE_STD3_RULES    Use STD3 ASCII rules for host name syntax restrictions
2102e5b6d6dSopenharmony_ci *                          If true and the input does not satisfy STD3 rules, the operation
2112e5b6d6dSopenharmony_ci *                          will fail with U_IDNA_STD3_ASCII_RULES_ERROR
2122e5b6d6dSopenharmony_ci *
2132e5b6d6dSopenharmony_ci * @param status            ICU error code in/out parameter.
2142e5b6d6dSopenharmony_ci *                          Must fulfill U_SUCCESS before the function call.
2152e5b6d6dSopenharmony_ci * @return <0 or 0 or >0 as usual for string comparisons
2162e5b6d6dSopenharmony_ci */
2172e5b6d6dSopenharmony_ciU_CFUNC int32_t U_EXPORT2
2182e5b6d6dSopenharmony_ciidnaref_compare(  const UChar *s1, int32_t length1,
2192e5b6d6dSopenharmony_ci                const UChar *s2, int32_t length2,
2202e5b6d6dSopenharmony_ci                int32_t options,
2212e5b6d6dSopenharmony_ci                UErrorCode* status);
2222e5b6d6dSopenharmony_ci
2232e5b6d6dSopenharmony_ci#endif /* #if !UCONFIG_NO_IDNA */
2242e5b6d6dSopenharmony_ci
2252e5b6d6dSopenharmony_ci#endif
226