1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  uidna.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003feb1
16 *   created by: Ram Viswanadha
17 */
18
19#ifndef __UIDNA_H__
20#define __UIDNA_H__
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_IDNA
25
26#include <stdbool.h>
27#include "unicode/parseerr.h"
28
29/**
30 * \file
31 * \brief C API: Internationalizing Domain Names in Applications (IDNA)
32 *
33 * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
34 *
35 * The C API functions which do take a UIDNA * service object pointer
36 * implement UTS #46 and IDNA2008.
37 *
38 * IDNA2003 is obsolete.
39 * The C API functions which do not take a service object pointer
40 * implement IDNA2003. They are all deprecated.
41 */
42
43/*
44 * IDNA option bit set values.
45 */
46enum {
47    /**
48     * Default options value: None of the other options are set.
49     * For use in static worker and factory methods.
50     * @stable ICU 2.6
51     */
52    UIDNA_DEFAULT=0,
53    /**
54     * Option to check whether the input conforms to the STD3 ASCII rules,
55     * for example the restriction of labels to LDH characters
56     * (ASCII Letters, Digits and Hyphen-Minus).
57     * For use in static worker and factory methods.
58     * @stable ICU 2.6
59     */
60    UIDNA_USE_STD3_RULES=2,
61    /**
62     * IDNA option to check for whether the input conforms to the BiDi rules.
63     * For use in static worker and factory methods.
64     * <p>This option is ignored by the IDNA2003 implementation.
65     * (IDNA2003 always performs a BiDi check.)
66     * @stable ICU 4.6
67     */
68    UIDNA_CHECK_BIDI=4,
69    /**
70     * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
71     * For use in static worker and factory methods.
72     * <p>This option is ignored by the IDNA2003 implementation.
73     * (The CONTEXTJ check is new in IDNA2008.)
74     * @stable ICU 4.6
75     */
76    UIDNA_CHECK_CONTEXTJ=8,
77    /**
78     * IDNA option for nontransitional processing in ToASCII().
79     * For use in static worker and factory methods.
80     * <p>By default, ToASCII() uses transitional processing.
81     * <p>This option is ignored by the IDNA2003 implementation.
82     * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
83     * @stable ICU 4.6
84     */
85    UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
86    /**
87     * IDNA option for nontransitional processing in ToUnicode().
88     * For use in static worker and factory methods.
89     * <p>By default, ToUnicode() uses transitional processing.
90     * <p>This option is ignored by the IDNA2003 implementation.
91     * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
92     * @stable ICU 4.6
93     */
94    UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
95    /**
96     * IDNA option to check for whether the input conforms to the CONTEXTO rules.
97     * For use in static worker and factory methods.
98     * <p>This option is ignored by the IDNA2003 implementation.
99     * (The CONTEXTO check is new in IDNA2008.)
100     * <p>This is for use by registries for IDNA2008 conformance.
101     * UTS #46 does not require the CONTEXTO check.
102     * @stable ICU 49
103     */
104    UIDNA_CHECK_CONTEXTO=0x40
105};
106
107/**
108 * Opaque C service object type for the new IDNA API.
109 * @stable ICU 4.6
110 */
111struct UIDNA;
112typedef struct UIDNA UIDNA;  /**< C typedef for struct UIDNA. @stable ICU 4.6 */
113
114/**
115 * Returns a UIDNA instance which implements UTS #46.
116 * Returns an unmodifiable instance, owned by the caller.
117 * Cache it for multiple operations, and uidna_close() it when done.
118 * The instance is thread-safe, that is, it can be used concurrently.
119 *
120 * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
121 *
122 * @param options Bit set to modify the processing and error checking.
123 *                See option bit set values in uidna.h.
124 * @param pErrorCode Standard ICU error code. Its input value must
125 *                  pass the U_SUCCESS() test, or else the function returns
126 *                  immediately. Check for U_FAILURE() on output or use with
127 *                  function chaining. (See User Guide for details.)
128 * @return the UTS #46 UIDNA instance, if successful
129 * @stable ICU 4.6
130 */
131U_CAPI UIDNA * U_EXPORT2
132uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
133
134/**
135 * Closes a UIDNA instance.
136 * @param idna UIDNA instance to be closed
137 * @stable ICU 4.6
138 */
139U_CAPI void U_EXPORT2
140uidna_close(UIDNA *idna);
141
142/**
143 * Output container for IDNA processing errors.
144 * Initialize with UIDNA_INFO_INITIALIZER:
145 * \code
146 * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
147 * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
148 * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
149 * \endcode
150 * @stable ICU 4.6
151 */
152typedef struct UIDNAInfo {
153    /** sizeof(UIDNAInfo) @stable ICU 4.6 */
154    int16_t size;
155    /**
156     * Set to true if transitional and nontransitional processing produce different results.
157     * For details see C++ IDNAInfo::isTransitionalDifferent().
158     * @stable ICU 4.6
159     */
160    UBool isTransitionalDifferent;
161    UBool reservedB3;  /**< Reserved field, do not use. @internal */
162    /**
163     * Bit set indicating IDNA processing errors. 0 if no errors.
164     * See UIDNA_ERROR_... constants.
165     * @stable ICU 4.6
166     */
167    uint32_t errors;
168    int32_t reservedI2;  /**< Reserved field, do not use. @internal */
169    int32_t reservedI3;  /**< Reserved field, do not use. @internal */
170} UIDNAInfo;
171
172/**
173 * Static initializer for a UIDNAInfo struct.
174 * @stable ICU 4.6
175 */
176#define UIDNA_INFO_INITIALIZER { \
177    (int16_t)sizeof(UIDNAInfo), \
178    false, false, \
179    0, 0, 0 }
180
181/**
182 * Converts a single domain name label into its ASCII form for DNS lookup.
183 * If any processing step fails, then pInfo->errors will be non-zero and
184 * the result might not be an ASCII string.
185 * The label might be modified according to the types of errors.
186 * Labels with severe errors will be left in (or turned into) their Unicode form.
187 *
188 * The UErrorCode indicates an error only in exceptional cases,
189 * such as a U_MEMORY_ALLOCATION_ERROR.
190 *
191 * @param idna UIDNA instance
192 * @param label Input domain name label
193 * @param length Label length, or -1 if NUL-terminated
194 * @param dest Destination string buffer
195 * @param capacity Destination buffer capacity
196 * @param pInfo Output container of IDNA processing details.
197 * @param pErrorCode Standard ICU error code. Its input value must
198 *                  pass the U_SUCCESS() test, or else the function returns
199 *                  immediately. Check for U_FAILURE() on output or use with
200 *                  function chaining. (See User Guide for details.)
201 * @return destination string length
202 * @stable ICU 4.6
203 */
204U_CAPI int32_t U_EXPORT2
205uidna_labelToASCII(const UIDNA *idna,
206                   const UChar *label, int32_t length,
207                   UChar *dest, int32_t capacity,
208                   UIDNAInfo *pInfo, UErrorCode *pErrorCode);
209
210/**
211 * Converts a single domain name label into its Unicode form for human-readable display.
212 * If any processing step fails, then pInfo->errors will be non-zero.
213 * The label might be modified according to the types of errors.
214 *
215 * The UErrorCode indicates an error only in exceptional cases,
216 * such as a U_MEMORY_ALLOCATION_ERROR.
217 *
218 * @param idna UIDNA instance
219 * @param label Input domain name label
220 * @param length Label length, or -1 if NUL-terminated
221 * @param dest Destination string buffer
222 * @param capacity Destination buffer capacity
223 * @param pInfo Output container of IDNA processing details.
224 * @param pErrorCode Standard ICU error code. Its input value must
225 *                  pass the U_SUCCESS() test, or else the function returns
226 *                  immediately. Check for U_FAILURE() on output or use with
227 *                  function chaining. (See User Guide for details.)
228 * @return destination string length
229 * @stable ICU 4.6
230 */
231U_CAPI int32_t U_EXPORT2
232uidna_labelToUnicode(const UIDNA *idna,
233                     const UChar *label, int32_t length,
234                     UChar *dest, int32_t capacity,
235                     UIDNAInfo *pInfo, UErrorCode *pErrorCode);
236
237/**
238 * Converts a whole domain name into its ASCII form for DNS lookup.
239 * If any processing step fails, then pInfo->errors will be non-zero and
240 * the result might not be an ASCII string.
241 * The domain name might be modified according to the types of errors.
242 * Labels with severe errors will be left in (or turned into) their Unicode form.
243 *
244 * The UErrorCode indicates an error only in exceptional cases,
245 * such as a U_MEMORY_ALLOCATION_ERROR.
246 *
247 * @param idna UIDNA instance
248 * @param name Input domain name
249 * @param length Domain name length, or -1 if NUL-terminated
250 * @param dest Destination string buffer
251 * @param capacity Destination buffer capacity
252 * @param pInfo Output container of IDNA processing details.
253 * @param pErrorCode Standard ICU error code. Its input value must
254 *                  pass the U_SUCCESS() test, or else the function returns
255 *                  immediately. Check for U_FAILURE() on output or use with
256 *                  function chaining. (See User Guide for details.)
257 * @return destination string length
258 * @stable ICU 4.6
259 */
260U_CAPI int32_t U_EXPORT2
261uidna_nameToASCII(const UIDNA *idna,
262                  const UChar *name, int32_t length,
263                  UChar *dest, int32_t capacity,
264                  UIDNAInfo *pInfo, UErrorCode *pErrorCode);
265
266/**
267 * Converts a whole domain name into its Unicode form for human-readable display.
268 * If any processing step fails, then pInfo->errors will be non-zero.
269 * The domain name might be modified according to the types of errors.
270 *
271 * The UErrorCode indicates an error only in exceptional cases,
272 * such as a U_MEMORY_ALLOCATION_ERROR.
273 *
274 * @param idna UIDNA instance
275 * @param name Input domain name
276 * @param length Domain name length, or -1 if NUL-terminated
277 * @param dest Destination string buffer
278 * @param capacity Destination buffer capacity
279 * @param pInfo Output container of IDNA processing details.
280 * @param pErrorCode Standard ICU error code. Its input value must
281 *                  pass the U_SUCCESS() test, or else the function returns
282 *                  immediately. Check for U_FAILURE() on output or use with
283 *                  function chaining. (See User Guide for details.)
284 * @return destination string length
285 * @stable ICU 4.6
286 */
287U_CAPI int32_t U_EXPORT2
288uidna_nameToUnicode(const UIDNA *idna,
289                    const UChar *name, int32_t length,
290                    UChar *dest, int32_t capacity,
291                    UIDNAInfo *pInfo, UErrorCode *pErrorCode);
292
293/* UTF-8 versions of the processing methods --------------------------------- */
294
295/**
296 * Converts a single domain name label into its ASCII form for DNS lookup.
297 * UTF-8 version of uidna_labelToASCII(), same behavior.
298 *
299 * @param idna UIDNA instance
300 * @param label Input domain name label
301 * @param length Label length, or -1 if NUL-terminated
302 * @param dest Destination string buffer
303 * @param capacity Destination buffer capacity
304 * @param pInfo Output container of IDNA processing details.
305 * @param pErrorCode Standard ICU error code. Its input value must
306 *                  pass the U_SUCCESS() test, or else the function returns
307 *                  immediately. Check for U_FAILURE() on output or use with
308 *                  function chaining. (See User Guide for details.)
309 * @return destination string length
310 * @stable ICU 4.6
311 */
312U_CAPI int32_t U_EXPORT2
313uidna_labelToASCII_UTF8(const UIDNA *idna,
314                        const char *label, int32_t length,
315                        char *dest, int32_t capacity,
316                        UIDNAInfo *pInfo, UErrorCode *pErrorCode);
317
318/**
319 * Converts a single domain name label into its Unicode form for human-readable display.
320 * UTF-8 version of uidna_labelToUnicode(), same behavior.
321 *
322 * @param idna UIDNA instance
323 * @param label Input domain name label
324 * @param length Label length, or -1 if NUL-terminated
325 * @param dest Destination string buffer
326 * @param capacity Destination buffer capacity
327 * @param pInfo Output container of IDNA processing details.
328 * @param pErrorCode Standard ICU error code. Its input value must
329 *                  pass the U_SUCCESS() test, or else the function returns
330 *                  immediately. Check for U_FAILURE() on output or use with
331 *                  function chaining. (See User Guide for details.)
332 * @return destination string length
333 * @stable ICU 4.6
334 */
335U_CAPI int32_t U_EXPORT2
336uidna_labelToUnicodeUTF8(const UIDNA *idna,
337                         const char *label, int32_t length,
338                         char *dest, int32_t capacity,
339                         UIDNAInfo *pInfo, UErrorCode *pErrorCode);
340
341/**
342 * Converts a whole domain name into its ASCII form for DNS lookup.
343 * UTF-8 version of uidna_nameToASCII(), same behavior.
344 *
345 * @param idna UIDNA instance
346 * @param name Input domain name
347 * @param length Domain name length, or -1 if NUL-terminated
348 * @param dest Destination string buffer
349 * @param capacity Destination buffer capacity
350 * @param pInfo Output container of IDNA processing details.
351 * @param pErrorCode Standard ICU error code. Its input value must
352 *                  pass the U_SUCCESS() test, or else the function returns
353 *                  immediately. Check for U_FAILURE() on output or use with
354 *                  function chaining. (See User Guide for details.)
355 * @return destination string length
356 * @stable ICU 4.6
357 */
358U_CAPI int32_t U_EXPORT2
359uidna_nameToASCII_UTF8(const UIDNA *idna,
360                       const char *name, int32_t length,
361                       char *dest, int32_t capacity,
362                       UIDNAInfo *pInfo, UErrorCode *pErrorCode);
363
364/**
365 * Converts a whole domain name into its Unicode form for human-readable display.
366 * UTF-8 version of uidna_nameToUnicode(), same behavior.
367 *
368 * @param idna UIDNA instance
369 * @param name Input domain name
370 * @param length Domain name length, or -1 if NUL-terminated
371 * @param dest Destination string buffer
372 * @param capacity Destination buffer capacity
373 * @param pInfo Output container of IDNA processing details.
374 * @param pErrorCode Standard ICU error code. Its input value must
375 *                  pass the U_SUCCESS() test, or else the function returns
376 *                  immediately. Check for U_FAILURE() on output or use with
377 *                  function chaining. (See User Guide for details.)
378 * @return destination string length
379 * @stable ICU 4.6
380 */
381U_CAPI int32_t U_EXPORT2
382uidna_nameToUnicodeUTF8(const UIDNA *idna,
383                        const char *name, int32_t length,
384                        char *dest, int32_t capacity,
385                        UIDNAInfo *pInfo, UErrorCode *pErrorCode);
386
387/*
388 * IDNA error bit set values.
389 * When a domain name or label fails a processing step or does not meet the
390 * validity criteria, then one or more of these error bits are set.
391 */
392enum {
393    /**
394     * A non-final domain name label (or the whole domain name) is empty.
395     * @stable ICU 4.6
396     */
397    UIDNA_ERROR_EMPTY_LABEL=1,
398    /**
399     * A domain name label is longer than 63 bytes.
400     * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
401     * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
402     * @stable ICU 4.6
403     */
404    UIDNA_ERROR_LABEL_TOO_LONG=2,
405    /**
406     * A domain name is longer than 255 bytes in its storage form.
407     * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
408     * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
409     * @stable ICU 4.6
410     */
411    UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
412    /**
413     * A label starts with a hyphen-minus ('-').
414     * @stable ICU 4.6
415     */
416    UIDNA_ERROR_LEADING_HYPHEN=8,
417    /**
418     * A label ends with a hyphen-minus ('-').
419     * @stable ICU 4.6
420     */
421    UIDNA_ERROR_TRAILING_HYPHEN=0x10,
422    /**
423     * A label contains hyphen-minus ('-') in the third and fourth positions.
424     * @stable ICU 4.6
425     */
426    UIDNA_ERROR_HYPHEN_3_4=0x20,
427    /**
428     * A label starts with a combining mark.
429     * @stable ICU 4.6
430     */
431    UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
432    /**
433     * A label or domain name contains disallowed characters.
434     * @stable ICU 4.6
435     */
436    UIDNA_ERROR_DISALLOWED=0x80,
437    /**
438     * A label starts with "xn--" but does not contain valid Punycode.
439     * That is, an xn-- label failed Punycode decoding.
440     * @stable ICU 4.6
441     */
442    UIDNA_ERROR_PUNYCODE=0x100,
443    /**
444     * A label contains a dot=full stop.
445     * This can occur in an input string for a single-label function.
446     * @stable ICU 4.6
447     */
448    UIDNA_ERROR_LABEL_HAS_DOT=0x200,
449    /**
450     * An ACE label does not contain a valid label string.
451     * The label was successfully ACE (Punycode) decoded but the resulting
452     * string had severe validation errors. For example,
453     * it might contain characters that are not allowed in ACE labels,
454     * or it might not be normalized.
455     * @stable ICU 4.6
456     */
457    UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
458    /**
459     * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
460     * @stable ICU 4.6
461     */
462    UIDNA_ERROR_BIDI=0x800,
463    /**
464     * A label does not meet the IDNA CONTEXTJ requirements.
465     * @stable ICU 4.6
466     */
467    UIDNA_ERROR_CONTEXTJ=0x1000,
468    /**
469     * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
470     * Some punctuation characters "Would otherwise have been DISALLOWED"
471     * but are allowed in certain contexts. (RFC 5892)
472     * @stable ICU 49
473     */
474    UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
475    /**
476     * A label does not meet the IDNA CONTEXTO requirements for digits.
477     * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
478     * @stable ICU 49
479     */
480    UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
481};
482#endif /* #if !UCONFIG_NO_IDNA */
483
484#endif
485