1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: uidna.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003feb1 16 * created by: Ram Viswanadha 17 */ 18 19 #ifndef __UIDNA_H__ 20 #define __UIDNA_H__ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_IDNA 25 26 #include <stdbool.h> 27 #include "unicode/parseerr.h" 28 29 /** 30 * \file 31 * \brief C API: Internationalizing Domain Names in Applications (IDNA) 32 * 33 * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h. 34 * 35 * The C API functions which do take a UIDNA * service object pointer 36 * implement UTS #46 and IDNA2008. 37 * 38 * IDNA2003 is obsolete. 39 * The C API functions which do not take a service object pointer 40 * implement IDNA2003. They are all deprecated. 41 */ 42 43 /* 44 * IDNA option bit set values. 45 */ 46 enum { 47 /** 48 * Default options value: None of the other options are set. 49 * For use in static worker and factory methods. 50 * @stable ICU 2.6 51 */ 52 UIDNA_DEFAULT=0, 53 /** 54 * Option to check whether the input conforms to the STD3 ASCII rules, 55 * for example the restriction of labels to LDH characters 56 * (ASCII Letters, Digits and Hyphen-Minus). 57 * For use in static worker and factory methods. 58 * @stable ICU 2.6 59 */ 60 UIDNA_USE_STD3_RULES=2, 61 /** 62 * IDNA option to check for whether the input conforms to the BiDi rules. 63 * For use in static worker and factory methods. 64 * <p>This option is ignored by the IDNA2003 implementation. 65 * (IDNA2003 always performs a BiDi check.) 66 * @stable ICU 4.6 67 */ 68 UIDNA_CHECK_BIDI=4, 69 /** 70 * IDNA option to check for whether the input conforms to the CONTEXTJ rules. 71 * For use in static worker and factory methods. 72 * <p>This option is ignored by the IDNA2003 implementation. 73 * (The CONTEXTJ check is new in IDNA2008.) 74 * @stable ICU 4.6 75 */ 76 UIDNA_CHECK_CONTEXTJ=8, 77 /** 78 * IDNA option for nontransitional processing in ToASCII(). 79 * For use in static worker and factory methods. 80 * <p>By default, ToASCII() uses transitional processing. 81 * <p>This option is ignored by the IDNA2003 implementation. 82 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 83 * @stable ICU 4.6 84 */ 85 UIDNA_NONTRANSITIONAL_TO_ASCII=0x10, 86 /** 87 * IDNA option for nontransitional processing in ToUnicode(). 88 * For use in static worker and factory methods. 89 * <p>By default, ToUnicode() uses transitional processing. 90 * <p>This option is ignored by the IDNA2003 implementation. 91 * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.) 92 * @stable ICU 4.6 93 */ 94 UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20, 95 /** 96 * IDNA option to check for whether the input conforms to the CONTEXTO rules. 97 * For use in static worker and factory methods. 98 * <p>This option is ignored by the IDNA2003 implementation. 99 * (The CONTEXTO check is new in IDNA2008.) 100 * <p>This is for use by registries for IDNA2008 conformance. 101 * UTS #46 does not require the CONTEXTO check. 102 * @stable ICU 49 103 */ 104 UIDNA_CHECK_CONTEXTO=0x40 105 }; 106 107 /** 108 * Opaque C service object type for the new IDNA API. 109 * @stable ICU 4.6 110 */ 111 struct UIDNA; 112 typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */ 113 114 /** 115 * Returns a UIDNA instance which implements UTS #46. 116 * Returns an unmodifiable instance, owned by the caller. 117 * Cache it for multiple operations, and uidna_close() it when done. 118 * The instance is thread-safe, that is, it can be used concurrently. 119 * 120 * For details about the UTS #46 implementation see the IDNA C++ class in idna.h. 121 * 122 * @param options Bit set to modify the processing and error checking. 123 * See option bit set values in uidna.h. 124 * @param pErrorCode Standard ICU error code. Its input value must 125 * pass the U_SUCCESS() test, or else the function returns 126 * immediately. Check for U_FAILURE() on output or use with 127 * function chaining. (See User Guide for details.) 128 * @return the UTS #46 UIDNA instance, if successful 129 * @stable ICU 4.6 130 */ 131 U_CAPI UIDNA * U_EXPORT2 132 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode); 133 134 /** 135 * Closes a UIDNA instance. 136 * @param idna UIDNA instance to be closed 137 * @stable ICU 4.6 138 */ 139 U_CAPI void U_EXPORT2 140 uidna_close(UIDNA *idna); 141 142 /** 143 * Output container for IDNA processing errors. 144 * Initialize with UIDNA_INFO_INITIALIZER: 145 * \code 146 * UIDNAInfo info = UIDNA_INFO_INITIALIZER; 147 * int32_t length = uidna_nameToASCII(..., &info, &errorCode); 148 * if(U_SUCCESS(errorCode) && info.errors!=0) { ... } 149 * \endcode 150 * @stable ICU 4.6 151 */ 152 typedef struct UIDNAInfo { 153 /** sizeof(UIDNAInfo) @stable ICU 4.6 */ 154 int16_t size; 155 /** 156 * Set to true if transitional and nontransitional processing produce different results. 157 * For details see C++ IDNAInfo::isTransitionalDifferent(). 158 * @stable ICU 4.6 159 */ 160 UBool isTransitionalDifferent; 161 UBool reservedB3; /**< Reserved field, do not use. @internal */ 162 /** 163 * Bit set indicating IDNA processing errors. 0 if no errors. 164 * See UIDNA_ERROR_... constants. 165 * @stable ICU 4.6 166 */ 167 uint32_t errors; 168 int32_t reservedI2; /**< Reserved field, do not use. @internal */ 169 int32_t reservedI3; /**< Reserved field, do not use. @internal */ 170 } UIDNAInfo; 171 172 /** 173 * Static initializer for a UIDNAInfo struct. 174 * @stable ICU 4.6 175 */ 176 #define UIDNA_INFO_INITIALIZER { \ 177 (int16_t)sizeof(UIDNAInfo), \ 178 false, false, \ 179 0, 0, 0 } 180 181 /** 182 * Converts a single domain name label into its ASCII form for DNS lookup. 183 * If any processing step fails, then pInfo->errors will be non-zero and 184 * the result might not be an ASCII string. 185 * The label might be modified according to the types of errors. 186 * Labels with severe errors will be left in (or turned into) their Unicode form. 187 * 188 * The UErrorCode indicates an error only in exceptional cases, 189 * such as a U_MEMORY_ALLOCATION_ERROR. 190 * 191 * @param idna UIDNA instance 192 * @param label Input domain name label 193 * @param length Label length, or -1 if NUL-terminated 194 * @param dest Destination string buffer 195 * @param capacity Destination buffer capacity 196 * @param pInfo Output container of IDNA processing details. 197 * @param pErrorCode Standard ICU error code. Its input value must 198 * pass the U_SUCCESS() test, or else the function returns 199 * immediately. Check for U_FAILURE() on output or use with 200 * function chaining. (See User Guide for details.) 201 * @return destination string length 202 * @stable ICU 4.6 203 */ 204 U_CAPI int32_t U_EXPORT2 205 uidna_labelToASCII(const UIDNA *idna, 206 const UChar *label, int32_t length, 207 UChar *dest, int32_t capacity, 208 UIDNAInfo *pInfo, UErrorCode *pErrorCode); 209 210 /** 211 * Converts a single domain name label into its Unicode form for human-readable display. 212 * If any processing step fails, then pInfo->errors will be non-zero. 213 * The label might be modified according to the types of errors. 214 * 215 * The UErrorCode indicates an error only in exceptional cases, 216 * such as a U_MEMORY_ALLOCATION_ERROR. 217 * 218 * @param idna UIDNA instance 219 * @param label Input domain name label 220 * @param length Label length, or -1 if NUL-terminated 221 * @param dest Destination string buffer 222 * @param capacity Destination buffer capacity 223 * @param pInfo Output container of IDNA processing details. 224 * @param pErrorCode Standard ICU error code. Its input value must 225 * pass the U_SUCCESS() test, or else the function returns 226 * immediately. Check for U_FAILURE() on output or use with 227 * function chaining. (See User Guide for details.) 228 * @return destination string length 229 * @stable ICU 4.6 230 */ 231 U_CAPI int32_t U_EXPORT2 232 uidna_labelToUnicode(const UIDNA *idna, 233 const UChar *label, int32_t length, 234 UChar *dest, int32_t capacity, 235 UIDNAInfo *pInfo, UErrorCode *pErrorCode); 236 237 /** 238 * Converts a whole domain name into its ASCII form for DNS lookup. 239 * If any processing step fails, then pInfo->errors will be non-zero and 240 * the result might not be an ASCII string. 241 * The domain name might be modified according to the types of errors. 242 * Labels with severe errors will be left in (or turned into) their Unicode form. 243 * 244 * The UErrorCode indicates an error only in exceptional cases, 245 * such as a U_MEMORY_ALLOCATION_ERROR. 246 * 247 * @param idna UIDNA instance 248 * @param name Input domain name 249 * @param length Domain name length, or -1 if NUL-terminated 250 * @param dest Destination string buffer 251 * @param capacity Destination buffer capacity 252 * @param pInfo Output container of IDNA processing details. 253 * @param pErrorCode Standard ICU error code. Its input value must 254 * pass the U_SUCCESS() test, or else the function returns 255 * immediately. Check for U_FAILURE() on output or use with 256 * function chaining. (See User Guide for details.) 257 * @return destination string length 258 * @stable ICU 4.6 259 */ 260 U_CAPI int32_t U_EXPORT2 261 uidna_nameToASCII(const UIDNA *idna, 262 const UChar *name, int32_t length, 263 UChar *dest, int32_t capacity, 264 UIDNAInfo *pInfo, UErrorCode *pErrorCode); 265 266 /** 267 * Converts a whole domain name into its Unicode form for human-readable display. 268 * If any processing step fails, then pInfo->errors will be non-zero. 269 * The domain name might be modified according to the types of errors. 270 * 271 * The UErrorCode indicates an error only in exceptional cases, 272 * such as a U_MEMORY_ALLOCATION_ERROR. 273 * 274 * @param idna UIDNA instance 275 * @param name Input domain name 276 * @param length Domain name length, or -1 if NUL-terminated 277 * @param dest Destination string buffer 278 * @param capacity Destination buffer capacity 279 * @param pInfo Output container of IDNA processing details. 280 * @param pErrorCode Standard ICU error code. Its input value must 281 * pass the U_SUCCESS() test, or else the function returns 282 * immediately. Check for U_FAILURE() on output or use with 283 * function chaining. (See User Guide for details.) 284 * @return destination string length 285 * @stable ICU 4.6 286 */ 287 U_CAPI int32_t U_EXPORT2 288 uidna_nameToUnicode(const UIDNA *idna, 289 const UChar *name, int32_t length, 290 UChar *dest, int32_t capacity, 291 UIDNAInfo *pInfo, UErrorCode *pErrorCode); 292 293 /* UTF-8 versions of the processing methods --------------------------------- */ 294 295 /** 296 * Converts a single domain name label into its ASCII form for DNS lookup. 297 * UTF-8 version of uidna_labelToASCII(), same behavior. 298 * 299 * @param idna UIDNA instance 300 * @param label Input domain name label 301 * @param length Label length, or -1 if NUL-terminated 302 * @param dest Destination string buffer 303 * @param capacity Destination buffer capacity 304 * @param pInfo Output container of IDNA processing details. 305 * @param pErrorCode Standard ICU error code. Its input value must 306 * pass the U_SUCCESS() test, or else the function returns 307 * immediately. Check for U_FAILURE() on output or use with 308 * function chaining. (See User Guide for details.) 309 * @return destination string length 310 * @stable ICU 4.6 311 */ 312 U_CAPI int32_t U_EXPORT2 313 uidna_labelToASCII_UTF8(const UIDNA *idna, 314 const char *label, int32_t length, 315 char *dest, int32_t capacity, 316 UIDNAInfo *pInfo, UErrorCode *pErrorCode); 317 318 /** 319 * Converts a single domain name label into its Unicode form for human-readable display. 320 * UTF-8 version of uidna_labelToUnicode(), same behavior. 321 * 322 * @param idna UIDNA instance 323 * @param label Input domain name label 324 * @param length Label length, or -1 if NUL-terminated 325 * @param dest Destination string buffer 326 * @param capacity Destination buffer capacity 327 * @param pInfo Output container of IDNA processing details. 328 * @param pErrorCode Standard ICU error code. Its input value must 329 * pass the U_SUCCESS() test, or else the function returns 330 * immediately. Check for U_FAILURE() on output or use with 331 * function chaining. (See User Guide for details.) 332 * @return destination string length 333 * @stable ICU 4.6 334 */ 335 U_CAPI int32_t U_EXPORT2 336 uidna_labelToUnicodeUTF8(const UIDNA *idna, 337 const char *label, int32_t length, 338 char *dest, int32_t capacity, 339 UIDNAInfo *pInfo, UErrorCode *pErrorCode); 340 341 /** 342 * Converts a whole domain name into its ASCII form for DNS lookup. 343 * UTF-8 version of uidna_nameToASCII(), same behavior. 344 * 345 * @param idna UIDNA instance 346 * @param name Input domain name 347 * @param length Domain name length, or -1 if NUL-terminated 348 * @param dest Destination string buffer 349 * @param capacity Destination buffer capacity 350 * @param pInfo Output container of IDNA processing details. 351 * @param pErrorCode Standard ICU error code. Its input value must 352 * pass the U_SUCCESS() test, or else the function returns 353 * immediately. Check for U_FAILURE() on output or use with 354 * function chaining. (See User Guide for details.) 355 * @return destination string length 356 * @stable ICU 4.6 357 */ 358 U_CAPI int32_t U_EXPORT2 359 uidna_nameToASCII_UTF8(const UIDNA *idna, 360 const char *name, int32_t length, 361 char *dest, int32_t capacity, 362 UIDNAInfo *pInfo, UErrorCode *pErrorCode); 363 364 /** 365 * Converts a whole domain name into its Unicode form for human-readable display. 366 * UTF-8 version of uidna_nameToUnicode(), same behavior. 367 * 368 * @param idna UIDNA instance 369 * @param name Input domain name 370 * @param length Domain name length, or -1 if NUL-terminated 371 * @param dest Destination string buffer 372 * @param capacity Destination buffer capacity 373 * @param pInfo Output container of IDNA processing details. 374 * @param pErrorCode Standard ICU error code. Its input value must 375 * pass the U_SUCCESS() test, or else the function returns 376 * immediately. Check for U_FAILURE() on output or use with 377 * function chaining. (See User Guide for details.) 378 * @return destination string length 379 * @stable ICU 4.6 380 */ 381 U_CAPI int32_t U_EXPORT2 382 uidna_nameToUnicodeUTF8(const UIDNA *idna, 383 const char *name, int32_t length, 384 char *dest, int32_t capacity, 385 UIDNAInfo *pInfo, UErrorCode *pErrorCode); 386 387 /* 388 * IDNA error bit set values. 389 * When a domain name or label fails a processing step or does not meet the 390 * validity criteria, then one or more of these error bits are set. 391 */ 392 enum { 393 /** 394 * A non-final domain name label (or the whole domain name) is empty. 395 * @stable ICU 4.6 396 */ 397 UIDNA_ERROR_EMPTY_LABEL=1, 398 /** 399 * A domain name label is longer than 63 bytes. 400 * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 401 * This is only checked in ToASCII operations, and only if the output label is all-ASCII. 402 * @stable ICU 4.6 403 */ 404 UIDNA_ERROR_LABEL_TOO_LONG=2, 405 /** 406 * A domain name is longer than 255 bytes in its storage form. 407 * (See STD13/RFC1034 3.1. Name space specifications and terminology.) 408 * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII. 409 * @stable ICU 4.6 410 */ 411 UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4, 412 /** 413 * A label starts with a hyphen-minus ('-'). 414 * @stable ICU 4.6 415 */ 416 UIDNA_ERROR_LEADING_HYPHEN=8, 417 /** 418 * A label ends with a hyphen-minus ('-'). 419 * @stable ICU 4.6 420 */ 421 UIDNA_ERROR_TRAILING_HYPHEN=0x10, 422 /** 423 * A label contains hyphen-minus ('-') in the third and fourth positions. 424 * @stable ICU 4.6 425 */ 426 UIDNA_ERROR_HYPHEN_3_4=0x20, 427 /** 428 * A label starts with a combining mark. 429 * @stable ICU 4.6 430 */ 431 UIDNA_ERROR_LEADING_COMBINING_MARK=0x40, 432 /** 433 * A label or domain name contains disallowed characters. 434 * @stable ICU 4.6 435 */ 436 UIDNA_ERROR_DISALLOWED=0x80, 437 /** 438 * A label starts with "xn--" but does not contain valid Punycode. 439 * That is, an xn-- label failed Punycode decoding. 440 * @stable ICU 4.6 441 */ 442 UIDNA_ERROR_PUNYCODE=0x100, 443 /** 444 * A label contains a dot=full stop. 445 * This can occur in an input string for a single-label function. 446 * @stable ICU 4.6 447 */ 448 UIDNA_ERROR_LABEL_HAS_DOT=0x200, 449 /** 450 * An ACE label does not contain a valid label string. 451 * The label was successfully ACE (Punycode) decoded but the resulting 452 * string had severe validation errors. For example, 453 * it might contain characters that are not allowed in ACE labels, 454 * or it might not be normalized. 455 * @stable ICU 4.6 456 */ 457 UIDNA_ERROR_INVALID_ACE_LABEL=0x400, 458 /** 459 * A label does not meet the IDNA BiDi requirements (for right-to-left characters). 460 * @stable ICU 4.6 461 */ 462 UIDNA_ERROR_BIDI=0x800, 463 /** 464 * A label does not meet the IDNA CONTEXTJ requirements. 465 * @stable ICU 4.6 466 */ 467 UIDNA_ERROR_CONTEXTJ=0x1000, 468 /** 469 * A label does not meet the IDNA CONTEXTO requirements for punctuation characters. 470 * Some punctuation characters "Would otherwise have been DISALLOWED" 471 * but are allowed in certain contexts. (RFC 5892) 472 * @stable ICU 49 473 */ 474 UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000, 475 /** 476 * A label does not meet the IDNA CONTEXTO requirements for digits. 477 * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx). 478 * @stable ICU 49 479 */ 480 UIDNA_ERROR_CONTEXTO_DIGITS=0x4000 481 }; 482 #endif /* #if !UCONFIG_NO_IDNA */ 483 484 #endif 485