1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*   Copyright (C) 2010-2012, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*   file name:  idna.h
9*   encoding:   UTF-8
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2010mar05
14*   created by: Markus W. Scherer
15*/
16
17#ifndef __IDNA_H__
18#define __IDNA_H__
19
20/**
21 * \file
22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
23 */
24
25#include "unicode/utypes.h"
26
27#if U_SHOW_CPLUSPLUS_API
28
29#if !UCONFIG_NO_IDNA
30
31#include "unicode/bytestream.h"
32#include "unicode/stringpiece.h"
33#include "unicode/uidna.h"
34#include "unicode/unistr.h"
35
36U_NAMESPACE_BEGIN
37
38class IDNAInfo;
39
40/**
41 * Abstract base class for IDNA processing.
42 * See http://www.unicode.org/reports/tr46/
43 * and http://www.ietf.org/rfc/rfc3490.txt
44 *
45 * The IDNA class is not intended for public subclassing.
46 *
47 * This C++ API currently only implements UTS #46.
48 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
49 * and IDNA2003 (functions that do not use a service object).
50 * @stable ICU 4.6
51 */
52class U_COMMON_API IDNA : public UObject {
53public:
54    /**
55     * Destructor.
56     * @stable ICU 4.6
57     */
58    ~IDNA();
59
60    /**
61     * Returns an IDNA instance which implements UTS #46.
62     * Returns an unmodifiable instance, owned by the caller.
63     * Cache it for multiple operations, and delete it when done.
64     * The instance is thread-safe, that is, it can be used concurrently.
65     *
66     * UTS #46 defines Unicode IDNA Compatibility Processing,
67     * updated to the latest version of Unicode and compatible with both
68     * IDNA2003 and IDNA2008.
69     *
70     * The worker functions use transitional processing, including deviation mappings,
71     * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
72     * is used in which case the deviation characters are passed through without change.
73     *
74     * Disallowed characters are mapped to U+FFFD.
75     *
76     * For available options see the uidna.h header.
77     * Operations with the UTS #46 instance do not support the
78     * UIDNA_ALLOW_UNASSIGNED option.
79     *
80     * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
81     * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
82     * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
83     *
84     * @param options Bit set to modify the processing and error checking.
85     *                See option bit set values in uidna.h.
86     * @param errorCode Standard ICU error code. Its input value must
87     *                  pass the U_SUCCESS() test, or else the function returns
88     *                  immediately. Check for U_FAILURE() on output or use with
89     *                  function chaining. (See User Guide for details.)
90     * @return the UTS #46 IDNA instance, if successful
91     * @stable ICU 4.6
92     */
93    static IDNA *
94    createUTS46Instance(uint32_t options, UErrorCode &errorCode);
95
96    /**
97     * Converts a single domain name label into its ASCII form for DNS lookup.
98     * If any processing step fails, then info.hasErrors() will be true and
99     * the result might not be an ASCII string.
100     * The label might be modified according to the types of errors.
101     * Labels with severe errors will be left in (or turned into) their Unicode form.
102     *
103     * The UErrorCode indicates an error only in exceptional cases,
104     * such as a U_MEMORY_ALLOCATION_ERROR.
105     *
106     * @param label Input domain name label
107     * @param dest Destination string object
108     * @param info Output container of IDNA processing details.
109     * @param errorCode Standard ICU error code. Its input value must
110     *                  pass the U_SUCCESS() test, or else the function returns
111     *                  immediately. Check for U_FAILURE() on output or use with
112     *                  function chaining. (See User Guide for details.)
113     * @return dest
114     * @stable ICU 4.6
115     */
116    virtual UnicodeString &
117    labelToASCII(const UnicodeString &label, UnicodeString &dest,
118                 IDNAInfo &info, UErrorCode &errorCode) const = 0;
119
120    /**
121     * Converts a single domain name label into its Unicode form for human-readable display.
122     * If any processing step fails, then info.hasErrors() will be true.
123     * The label might be modified according to the types of errors.
124     *
125     * The UErrorCode indicates an error only in exceptional cases,
126     * such as a U_MEMORY_ALLOCATION_ERROR.
127     *
128     * @param label Input domain name label
129     * @param dest Destination string object
130     * @param info Output container of IDNA processing details.
131     * @param errorCode Standard ICU error code. Its input value must
132     *                  pass the U_SUCCESS() test, or else the function returns
133     *                  immediately. Check for U_FAILURE() on output or use with
134     *                  function chaining. (See User Guide for details.)
135     * @return dest
136     * @stable ICU 4.6
137     */
138    virtual UnicodeString &
139    labelToUnicode(const UnicodeString &label, UnicodeString &dest,
140                   IDNAInfo &info, UErrorCode &errorCode) const = 0;
141
142    /**
143     * Converts a whole domain name into its ASCII form for DNS lookup.
144     * If any processing step fails, then info.hasErrors() will be true and
145     * the result might not be an ASCII string.
146     * The domain name might be modified according to the types of errors.
147     * Labels with severe errors will be left in (or turned into) their Unicode form.
148     *
149     * The UErrorCode indicates an error only in exceptional cases,
150     * such as a U_MEMORY_ALLOCATION_ERROR.
151     *
152     * @param name Input domain name
153     * @param dest Destination string object
154     * @param info Output container of IDNA processing details.
155     * @param errorCode Standard ICU error code. Its input value must
156     *                  pass the U_SUCCESS() test, or else the function returns
157     *                  immediately. Check for U_FAILURE() on output or use with
158     *                  function chaining. (See User Guide for details.)
159     * @return dest
160     * @stable ICU 4.6
161     */
162    virtual UnicodeString &
163    nameToASCII(const UnicodeString &name, UnicodeString &dest,
164                IDNAInfo &info, UErrorCode &errorCode) const = 0;
165
166    /**
167     * Converts a whole domain name into its Unicode form for human-readable display.
168     * If any processing step fails, then info.hasErrors() will be true.
169     * The domain name might be modified according to the types of errors.
170     *
171     * The UErrorCode indicates an error only in exceptional cases,
172     * such as a U_MEMORY_ALLOCATION_ERROR.
173     *
174     * @param name Input domain name
175     * @param dest Destination string object
176     * @param info Output container of IDNA processing details.
177     * @param errorCode Standard ICU error code. Its input value must
178     *                  pass the U_SUCCESS() test, or else the function returns
179     *                  immediately. Check for U_FAILURE() on output or use with
180     *                  function chaining. (See User Guide for details.)
181     * @return dest
182     * @stable ICU 4.6
183     */
184    virtual UnicodeString &
185    nameToUnicode(const UnicodeString &name, UnicodeString &dest,
186                  IDNAInfo &info, UErrorCode &errorCode) const = 0;
187
188    // UTF-8 versions of the processing methods ---------------------------- ***
189
190    /**
191     * Converts a single domain name label into its ASCII form for DNS lookup.
192     * UTF-8 version of labelToASCII(), same behavior.
193     *
194     * @param label Input domain name label
195     * @param dest Destination byte sink; Flush()ed if successful
196     * @param info Output container of IDNA processing details.
197     * @param errorCode Standard ICU error code. Its input value must
198     *                  pass the U_SUCCESS() test, or else the function returns
199     *                  immediately. Check for U_FAILURE() on output or use with
200     *                  function chaining. (See User Guide for details.)
201     * @return dest
202     * @stable ICU 4.6
203     */
204    virtual void
205    labelToASCII_UTF8(StringPiece label, ByteSink &dest,
206                      IDNAInfo &info, UErrorCode &errorCode) const;
207
208    /**
209     * Converts a single domain name label into its Unicode form for human-readable display.
210     * UTF-8 version of labelToUnicode(), same behavior.
211     *
212     * @param label Input domain name label
213     * @param dest Destination byte sink; Flush()ed if successful
214     * @param info Output container of IDNA processing details.
215     * @param errorCode Standard ICU error code. Its input value must
216     *                  pass the U_SUCCESS() test, or else the function returns
217     *                  immediately. Check for U_FAILURE() on output or use with
218     *                  function chaining. (See User Guide for details.)
219     * @return dest
220     * @stable ICU 4.6
221     */
222    virtual void
223    labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
224                       IDNAInfo &info, UErrorCode &errorCode) const;
225
226    /**
227     * Converts a whole domain name into its ASCII form for DNS lookup.
228     * UTF-8 version of nameToASCII(), same behavior.
229     *
230     * @param name Input domain name
231     * @param dest Destination byte sink; Flush()ed if successful
232     * @param info Output container of IDNA processing details.
233     * @param errorCode Standard ICU error code. Its input value must
234     *                  pass the U_SUCCESS() test, or else the function returns
235     *                  immediately. Check for U_FAILURE() on output or use with
236     *                  function chaining. (See User Guide for details.)
237     * @return dest
238     * @stable ICU 4.6
239     */
240    virtual void
241    nameToASCII_UTF8(StringPiece name, ByteSink &dest,
242                     IDNAInfo &info, UErrorCode &errorCode) const;
243
244    /**
245     * Converts a whole domain name into its Unicode form for human-readable display.
246     * UTF-8 version of nameToUnicode(), same behavior.
247     *
248     * @param name Input domain name
249     * @param dest Destination byte sink; Flush()ed if successful
250     * @param info Output container of IDNA processing details.
251     * @param errorCode Standard ICU error code. Its input value must
252     *                  pass the U_SUCCESS() test, or else the function returns
253     *                  immediately. Check for U_FAILURE() on output or use with
254     *                  function chaining. (See User Guide for details.)
255     * @return dest
256     * @stable ICU 4.6
257     */
258    virtual void
259    nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
260                      IDNAInfo &info, UErrorCode &errorCode) const;
261};
262
263class UTS46;
264
265/**
266 * Output container for IDNA processing errors.
267 * The IDNAInfo class is not suitable for subclassing.
268 * @stable ICU 4.6
269 */
270class U_COMMON_API IDNAInfo : public UMemory {
271public:
272    /**
273     * Constructor for stack allocation.
274     * @stable ICU 4.6
275     */
276    IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
277    /**
278     * Were there IDNA processing errors?
279     * @return true if there were processing errors
280     * @stable ICU 4.6
281     */
282    UBool hasErrors() const { return errors!=0; }
283    /**
284     * Returns a bit set indicating IDNA processing errors.
285     * See UIDNA_ERROR_... constants in uidna.h.
286     * @return bit set of processing errors
287     * @stable ICU 4.6
288     */
289    uint32_t getErrors() const { return errors; }
290    /**
291     * Returns true if transitional and nontransitional processing produce different results.
292     * This is the case when the input label or domain name contains
293     * one or more deviation characters outside a Punycode label (see UTS #46).
294     * <ul>
295     * <li>With nontransitional processing, such characters are
296     * copied to the destination string.
297     * <li>With transitional processing, such characters are
298     * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
299     * </ul>
300     * @return true if transitional and nontransitional processing produce different results
301     * @stable ICU 4.6
302     */
303    UBool isTransitionalDifferent() const { return isTransDiff; }
304
305private:
306    friend class UTS46;
307
308    IDNAInfo(const IDNAInfo &other) = delete;  // no copying
309    IDNAInfo &operator=(const IDNAInfo &other) = delete;  // no copying
310
311    void reset() {
312        errors=labelErrors=0;
313        isTransDiff=false;
314        isBiDi=false;
315        isOkBiDi=true;
316    }
317
318    uint32_t errors, labelErrors;
319    UBool isTransDiff;
320    UBool isBiDi;
321    UBool isOkBiDi;
322};
323
324U_NAMESPACE_END
325
326#endif  // UCONFIG_NO_IDNA
327
328#endif /* U_SHOW_CPLUSPLUS_API */
329
330#endif  // __IDNA_H__
331