1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  *******************************************************************************
5  *
6  *   Copyright (C) 2003-2014, International Business Machines
7  *   Corporation and others.  All Rights Reserved.
8  *
9  *******************************************************************************
10  *   file name:  uidna.h
11  *   encoding:   UTF-8
12  *   tab size:   8 (not used)
13  *   indentation:4
14  *
15  *   created on: 2003feb1
16  *   created by: Ram Viswanadha
17  */
18 
19 #ifndef __UIDNA_H__
20 #define __UIDNA_H__
21 
22 #include "unicode/utypes.h"
23 
24 #if !UCONFIG_NO_IDNA
25 
26 #include <stdbool.h>
27 #include "unicode/parseerr.h"
28 
29 /**
30  * \file
31  * \brief C API: Internationalizing Domain Names in Applications (IDNA)
32  *
33  * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
34  *
35  * The C API functions which do take a UIDNA * service object pointer
36  * implement UTS #46 and IDNA2008.
37  *
38  * IDNA2003 is obsolete.
39  * The C API functions which do not take a service object pointer
40  * implement IDNA2003. They are all deprecated.
41  */
42 
43 /*
44  * IDNA option bit set values.
45  */
46 enum {
47     /**
48      * Default options value: None of the other options are set.
49      * For use in static worker and factory methods.
50      * @stable ICU 2.6
51      */
52     UIDNA_DEFAULT=0,
53     /**
54      * Option to check whether the input conforms to the STD3 ASCII rules,
55      * for example the restriction of labels to LDH characters
56      * (ASCII Letters, Digits and Hyphen-Minus).
57      * For use in static worker and factory methods.
58      * @stable ICU 2.6
59      */
60     UIDNA_USE_STD3_RULES=2,
61     /**
62      * IDNA option to check for whether the input conforms to the BiDi rules.
63      * For use in static worker and factory methods.
64      * <p>This option is ignored by the IDNA2003 implementation.
65      * (IDNA2003 always performs a BiDi check.)
66      * @stable ICU 4.6
67      */
68     UIDNA_CHECK_BIDI=4,
69     /**
70      * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
71      * For use in static worker and factory methods.
72      * <p>This option is ignored by the IDNA2003 implementation.
73      * (The CONTEXTJ check is new in IDNA2008.)
74      * @stable ICU 4.6
75      */
76     UIDNA_CHECK_CONTEXTJ=8,
77     /**
78      * IDNA option for nontransitional processing in ToASCII().
79      * For use in static worker and factory methods.
80      * <p>By default, ToASCII() uses transitional processing.
81      * <p>This option is ignored by the IDNA2003 implementation.
82      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
83      * @stable ICU 4.6
84      */
85     UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
86     /**
87      * IDNA option for nontransitional processing in ToUnicode().
88      * For use in static worker and factory methods.
89      * <p>By default, ToUnicode() uses transitional processing.
90      * <p>This option is ignored by the IDNA2003 implementation.
91      * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
92      * @stable ICU 4.6
93      */
94     UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
95     /**
96      * IDNA option to check for whether the input conforms to the CONTEXTO rules.
97      * For use in static worker and factory methods.
98      * <p>This option is ignored by the IDNA2003 implementation.
99      * (The CONTEXTO check is new in IDNA2008.)
100      * <p>This is for use by registries for IDNA2008 conformance.
101      * UTS #46 does not require the CONTEXTO check.
102      * @stable ICU 49
103      */
104     UIDNA_CHECK_CONTEXTO=0x40
105 };
106 
107 /**
108  * Opaque C service object type for the new IDNA API.
109  * @stable ICU 4.6
110  */
111 struct UIDNA;
112 typedef struct UIDNA UIDNA;  /**< C typedef for struct UIDNA. @stable ICU 4.6 */
113 
114 /**
115  * Returns a UIDNA instance which implements UTS #46.
116  * Returns an unmodifiable instance, owned by the caller.
117  * Cache it for multiple operations, and uidna_close() it when done.
118  * The instance is thread-safe, that is, it can be used concurrently.
119  *
120  * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
121  *
122  * @param options Bit set to modify the processing and error checking.
123  *                See option bit set values in uidna.h.
124  * @param pErrorCode Standard ICU error code. Its input value must
125  *                  pass the U_SUCCESS() test, or else the function returns
126  *                  immediately. Check for U_FAILURE() on output or use with
127  *                  function chaining. (See User Guide for details.)
128  * @return the UTS #46 UIDNA instance, if successful
129  * @stable ICU 4.6
130  */
131 U_CAPI UIDNA * U_EXPORT2
132 uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
133 
134 /**
135  * Closes a UIDNA instance.
136  * @param idna UIDNA instance to be closed
137  * @stable ICU 4.6
138  */
139 U_CAPI void U_EXPORT2
140 uidna_close(UIDNA *idna);
141 
142 /**
143  * Output container for IDNA processing errors.
144  * Initialize with UIDNA_INFO_INITIALIZER:
145  * \code
146  * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
147  * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
148  * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
149  * \endcode
150  * @stable ICU 4.6
151  */
152 typedef struct UIDNAInfo {
153     /** sizeof(UIDNAInfo) @stable ICU 4.6 */
154     int16_t size;
155     /**
156      * Set to true if transitional and nontransitional processing produce different results.
157      * For details see C++ IDNAInfo::isTransitionalDifferent().
158      * @stable ICU 4.6
159      */
160     UBool isTransitionalDifferent;
161     UBool reservedB3;  /**< Reserved field, do not use. @internal */
162     /**
163      * Bit set indicating IDNA processing errors. 0 if no errors.
164      * See UIDNA_ERROR_... constants.
165      * @stable ICU 4.6
166      */
167     uint32_t errors;
168     int32_t reservedI2;  /**< Reserved field, do not use. @internal */
169     int32_t reservedI3;  /**< Reserved field, do not use. @internal */
170 } UIDNAInfo;
171 
172 /**
173  * Static initializer for a UIDNAInfo struct.
174  * @stable ICU 4.6
175  */
176 #define UIDNA_INFO_INITIALIZER { \
177     (int16_t)sizeof(UIDNAInfo), \
178     false, false, \
179     0, 0, 0 }
180 
181 /**
182  * Converts a single domain name label into its ASCII form for DNS lookup.
183  * If any processing step fails, then pInfo->errors will be non-zero and
184  * the result might not be an ASCII string.
185  * The label might be modified according to the types of errors.
186  * Labels with severe errors will be left in (or turned into) their Unicode form.
187  *
188  * The UErrorCode indicates an error only in exceptional cases,
189  * such as a U_MEMORY_ALLOCATION_ERROR.
190  *
191  * @param idna UIDNA instance
192  * @param label Input domain name label
193  * @param length Label length, or -1 if NUL-terminated
194  * @param dest Destination string buffer
195  * @param capacity Destination buffer capacity
196  * @param pInfo Output container of IDNA processing details.
197  * @param pErrorCode Standard ICU error code. Its input value must
198  *                  pass the U_SUCCESS() test, or else the function returns
199  *                  immediately. Check for U_FAILURE() on output or use with
200  *                  function chaining. (See User Guide for details.)
201  * @return destination string length
202  * @stable ICU 4.6
203  */
204 U_CAPI int32_t U_EXPORT2
205 uidna_labelToASCII(const UIDNA *idna,
206                    const UChar *label, int32_t length,
207                    UChar *dest, int32_t capacity,
208                    UIDNAInfo *pInfo, UErrorCode *pErrorCode);
209 
210 /**
211  * Converts a single domain name label into its Unicode form for human-readable display.
212  * If any processing step fails, then pInfo->errors will be non-zero.
213  * The label might be modified according to the types of errors.
214  *
215  * The UErrorCode indicates an error only in exceptional cases,
216  * such as a U_MEMORY_ALLOCATION_ERROR.
217  *
218  * @param idna UIDNA instance
219  * @param label Input domain name label
220  * @param length Label length, or -1 if NUL-terminated
221  * @param dest Destination string buffer
222  * @param capacity Destination buffer capacity
223  * @param pInfo Output container of IDNA processing details.
224  * @param pErrorCode Standard ICU error code. Its input value must
225  *                  pass the U_SUCCESS() test, or else the function returns
226  *                  immediately. Check for U_FAILURE() on output or use with
227  *                  function chaining. (See User Guide for details.)
228  * @return destination string length
229  * @stable ICU 4.6
230  */
231 U_CAPI int32_t U_EXPORT2
232 uidna_labelToUnicode(const UIDNA *idna,
233                      const UChar *label, int32_t length,
234                      UChar *dest, int32_t capacity,
235                      UIDNAInfo *pInfo, UErrorCode *pErrorCode);
236 
237 /**
238  * Converts a whole domain name into its ASCII form for DNS lookup.
239  * If any processing step fails, then pInfo->errors will be non-zero and
240  * the result might not be an ASCII string.
241  * The domain name might be modified according to the types of errors.
242  * Labels with severe errors will be left in (or turned into) their Unicode form.
243  *
244  * The UErrorCode indicates an error only in exceptional cases,
245  * such as a U_MEMORY_ALLOCATION_ERROR.
246  *
247  * @param idna UIDNA instance
248  * @param name Input domain name
249  * @param length Domain name length, or -1 if NUL-terminated
250  * @param dest Destination string buffer
251  * @param capacity Destination buffer capacity
252  * @param pInfo Output container of IDNA processing details.
253  * @param pErrorCode Standard ICU error code. Its input value must
254  *                  pass the U_SUCCESS() test, or else the function returns
255  *                  immediately. Check for U_FAILURE() on output or use with
256  *                  function chaining. (See User Guide for details.)
257  * @return destination string length
258  * @stable ICU 4.6
259  */
260 U_CAPI int32_t U_EXPORT2
261 uidna_nameToASCII(const UIDNA *idna,
262                   const UChar *name, int32_t length,
263                   UChar *dest, int32_t capacity,
264                   UIDNAInfo *pInfo, UErrorCode *pErrorCode);
265 
266 /**
267  * Converts a whole domain name into its Unicode form for human-readable display.
268  * If any processing step fails, then pInfo->errors will be non-zero.
269  * The domain name might be modified according to the types of errors.
270  *
271  * The UErrorCode indicates an error only in exceptional cases,
272  * such as a U_MEMORY_ALLOCATION_ERROR.
273  *
274  * @param idna UIDNA instance
275  * @param name Input domain name
276  * @param length Domain name length, or -1 if NUL-terminated
277  * @param dest Destination string buffer
278  * @param capacity Destination buffer capacity
279  * @param pInfo Output container of IDNA processing details.
280  * @param pErrorCode Standard ICU error code. Its input value must
281  *                  pass the U_SUCCESS() test, or else the function returns
282  *                  immediately. Check for U_FAILURE() on output or use with
283  *                  function chaining. (See User Guide for details.)
284  * @return destination string length
285  * @stable ICU 4.6
286  */
287 U_CAPI int32_t U_EXPORT2
288 uidna_nameToUnicode(const UIDNA *idna,
289                     const UChar *name, int32_t length,
290                     UChar *dest, int32_t capacity,
291                     UIDNAInfo *pInfo, UErrorCode *pErrorCode);
292 
293 /* UTF-8 versions of the processing methods --------------------------------- */
294 
295 /**
296  * Converts a single domain name label into its ASCII form for DNS lookup.
297  * UTF-8 version of uidna_labelToASCII(), same behavior.
298  *
299  * @param idna UIDNA instance
300  * @param label Input domain name label
301  * @param length Label length, or -1 if NUL-terminated
302  * @param dest Destination string buffer
303  * @param capacity Destination buffer capacity
304  * @param pInfo Output container of IDNA processing details.
305  * @param pErrorCode Standard ICU error code. Its input value must
306  *                  pass the U_SUCCESS() test, or else the function returns
307  *                  immediately. Check for U_FAILURE() on output or use with
308  *                  function chaining. (See User Guide for details.)
309  * @return destination string length
310  * @stable ICU 4.6
311  */
312 U_CAPI int32_t U_EXPORT2
313 uidna_labelToASCII_UTF8(const UIDNA *idna,
314                         const char *label, int32_t length,
315                         char *dest, int32_t capacity,
316                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
317 
318 /**
319  * Converts a single domain name label into its Unicode form for human-readable display.
320  * UTF-8 version of uidna_labelToUnicode(), same behavior.
321  *
322  * @param idna UIDNA instance
323  * @param label Input domain name label
324  * @param length Label length, or -1 if NUL-terminated
325  * @param dest Destination string buffer
326  * @param capacity Destination buffer capacity
327  * @param pInfo Output container of IDNA processing details.
328  * @param pErrorCode Standard ICU error code. Its input value must
329  *                  pass the U_SUCCESS() test, or else the function returns
330  *                  immediately. Check for U_FAILURE() on output or use with
331  *                  function chaining. (See User Guide for details.)
332  * @return destination string length
333  * @stable ICU 4.6
334  */
335 U_CAPI int32_t U_EXPORT2
336 uidna_labelToUnicodeUTF8(const UIDNA *idna,
337                          const char *label, int32_t length,
338                          char *dest, int32_t capacity,
339                          UIDNAInfo *pInfo, UErrorCode *pErrorCode);
340 
341 /**
342  * Converts a whole domain name into its ASCII form for DNS lookup.
343  * UTF-8 version of uidna_nameToASCII(), same behavior.
344  *
345  * @param idna UIDNA instance
346  * @param name Input domain name
347  * @param length Domain name length, or -1 if NUL-terminated
348  * @param dest Destination string buffer
349  * @param capacity Destination buffer capacity
350  * @param pInfo Output container of IDNA processing details.
351  * @param pErrorCode Standard ICU error code. Its input value must
352  *                  pass the U_SUCCESS() test, or else the function returns
353  *                  immediately. Check for U_FAILURE() on output or use with
354  *                  function chaining. (See User Guide for details.)
355  * @return destination string length
356  * @stable ICU 4.6
357  */
358 U_CAPI int32_t U_EXPORT2
359 uidna_nameToASCII_UTF8(const UIDNA *idna,
360                        const char *name, int32_t length,
361                        char *dest, int32_t capacity,
362                        UIDNAInfo *pInfo, UErrorCode *pErrorCode);
363 
364 /**
365  * Converts a whole domain name into its Unicode form for human-readable display.
366  * UTF-8 version of uidna_nameToUnicode(), same behavior.
367  *
368  * @param idna UIDNA instance
369  * @param name Input domain name
370  * @param length Domain name length, or -1 if NUL-terminated
371  * @param dest Destination string buffer
372  * @param capacity Destination buffer capacity
373  * @param pInfo Output container of IDNA processing details.
374  * @param pErrorCode Standard ICU error code. Its input value must
375  *                  pass the U_SUCCESS() test, or else the function returns
376  *                  immediately. Check for U_FAILURE() on output or use with
377  *                  function chaining. (See User Guide for details.)
378  * @return destination string length
379  * @stable ICU 4.6
380  */
381 U_CAPI int32_t U_EXPORT2
382 uidna_nameToUnicodeUTF8(const UIDNA *idna,
383                         const char *name, int32_t length,
384                         char *dest, int32_t capacity,
385                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
386 
387 /*
388  * IDNA error bit set values.
389  * When a domain name or label fails a processing step or does not meet the
390  * validity criteria, then one or more of these error bits are set.
391  */
392 enum {
393     /**
394      * A non-final domain name label (or the whole domain name) is empty.
395      * @stable ICU 4.6
396      */
397     UIDNA_ERROR_EMPTY_LABEL=1,
398     /**
399      * A domain name label is longer than 63 bytes.
400      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
401      * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
402      * @stable ICU 4.6
403      */
404     UIDNA_ERROR_LABEL_TOO_LONG=2,
405     /**
406      * A domain name is longer than 255 bytes in its storage form.
407      * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
408      * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
409      * @stable ICU 4.6
410      */
411     UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
412     /**
413      * A label starts with a hyphen-minus ('-').
414      * @stable ICU 4.6
415      */
416     UIDNA_ERROR_LEADING_HYPHEN=8,
417     /**
418      * A label ends with a hyphen-minus ('-').
419      * @stable ICU 4.6
420      */
421     UIDNA_ERROR_TRAILING_HYPHEN=0x10,
422     /**
423      * A label contains hyphen-minus ('-') in the third and fourth positions.
424      * @stable ICU 4.6
425      */
426     UIDNA_ERROR_HYPHEN_3_4=0x20,
427     /**
428      * A label starts with a combining mark.
429      * @stable ICU 4.6
430      */
431     UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
432     /**
433      * A label or domain name contains disallowed characters.
434      * @stable ICU 4.6
435      */
436     UIDNA_ERROR_DISALLOWED=0x80,
437     /**
438      * A label starts with "xn--" but does not contain valid Punycode.
439      * That is, an xn-- label failed Punycode decoding.
440      * @stable ICU 4.6
441      */
442     UIDNA_ERROR_PUNYCODE=0x100,
443     /**
444      * A label contains a dot=full stop.
445      * This can occur in an input string for a single-label function.
446      * @stable ICU 4.6
447      */
448     UIDNA_ERROR_LABEL_HAS_DOT=0x200,
449     /**
450      * An ACE label does not contain a valid label string.
451      * The label was successfully ACE (Punycode) decoded but the resulting
452      * string had severe validation errors. For example,
453      * it might contain characters that are not allowed in ACE labels,
454      * or it might not be normalized.
455      * @stable ICU 4.6
456      */
457     UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
458     /**
459      * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
460      * @stable ICU 4.6
461      */
462     UIDNA_ERROR_BIDI=0x800,
463     /**
464      * A label does not meet the IDNA CONTEXTJ requirements.
465      * @stable ICU 4.6
466      */
467     UIDNA_ERROR_CONTEXTJ=0x1000,
468     /**
469      * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
470      * Some punctuation characters "Would otherwise have been DISALLOWED"
471      * but are allowed in certain contexts. (RFC 5892)
472      * @stable ICU 49
473      */
474     UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
475     /**
476      * A label does not meet the IDNA CONTEXTO requirements for digits.
477      * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
478      * @stable ICU 49
479      */
480     UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
481 };
482 #endif /* #if !UCONFIG_NO_IDNA */
483 
484 #endif
485