1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2003-2011, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  nptrans.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2003feb1
16 *   created by: Ram Viswanadha
17 */
18
19#ifndef NPTRANS_H
20#define NPTRANS_H
21
22#include "unicode/utypes.h"
23
24#if !UCONFIG_NO_IDNA
25#if !UCONFIG_NO_TRANSLITERATION
26
27#include "unicode/uniset.h"
28#include "unicode/ures.h"
29#include "unicode/translit.h"
30
31#include "intltest.h"
32
33
34#define ASCII_SPACE 0x0020
35
36class NamePrepTransform {
37
38private :
39    Transliterator *mapping;
40    UnicodeSet unassigned;
41    UnicodeSet prohibited;
42    UnicodeSet labelSeparatorSet;
43    UResourceBundle *bundle;
44    NamePrepTransform(UParseError& parseError, UErrorCode& status);
45
46
47public :
48
49    static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status);
50
51    virtual ~NamePrepTransform();
52
53
54    inline UBool isProhibited(UChar32 ch);
55
56    /**
57     * ICU "poor man's RTTI", returns a UClassID for the actual class.
58     */
59    inline UClassID getDynamicClassID() const { return getStaticClassID(); }
60
61    /**
62     * ICU "poor man's RTTI", returns a UClassID for this class.
63     */
64    static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; }
65
66    /**
67     * Map every character in input stream with mapping character
68     * in the mapping table and populate the output stream.
69     * For any individual character the mapping table may specify
70     * that that a character be mapped to nothing, mapped to one
71     * other character or to a string of other characters.
72     *
73     * @param src           Pointer to UChar buffer containing a single label
74     * @param srcLength     Number of characters in the source label
75     * @param dest          Pointer to the destination buffer to receive the output
76     * @param destCapacity  The capacity of destination array
77     * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
78     *                          If true unassigned values are treated as normal Unicode code point.
79     *                          If false the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
80     * @param status        ICU error code in/out parameter.
81     *                      Must fulfill U_SUCCESS before the function call.
82     * @return The number of UChars in the destination buffer
83     */
84    int32_t map(const UChar* src, int32_t srcLength,
85                        UChar* dest, int32_t destCapacity,
86                        UBool allowUnassigned,
87                        UParseError* parseError,
88                        UErrorCode& status );
89
90    /**
91     * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
92     * checks for prohited and BiDi characters in the order defined by RFC 3454
93     *
94     * @param src           Pointer to UChar buffer containing a single label
95     * @param srcLength     Number of characters in the source label
96     * @param dest          Pointer to the destination buffer to receive the output
97     * @param destCapacity  The capacity of destination array
98     * @param allowUnassigned   Unassigned values can be converted to ASCII for query operations
99     *                          If true unassigned values are treated as normal Unicode code point.
100     *                          If false the operation fails with U_UNASSIGNED_CODE_POINT error code.
101     * @param status        ICU error code in/out parameter.
102     *                      Must fulfill U_SUCCESS before the function call.
103     * @return The number of UChars in the destination buffer
104     */
105    int32_t process(const UChar* src, int32_t srcLength,
106                            UChar* dest, int32_t destCapacity,
107                            UBool allowUnassigned,
108                            UParseError* parseError,
109                            UErrorCode& status );
110
111    /**
112     * Ascertain if the given code point is a label separator as specified by IDNA
113     *
114     * @return true is the code point is a label separator
115     */
116    UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
117
118    inline UBool isLDHChar(UChar32 ch);
119
120private:
121    /**
122     * The address of this static class variable serves as this class's ID
123     * for ICU "poor man's RTTI".
124     */
125    static const char fgClassID;
126};
127
128inline UBool NamePrepTransform::isLDHChar(UChar32 ch){
129    // high runner case
130    if(ch>0x007A){
131        return false;
132    }
133    //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
134    if( (ch==0x002D) ||
135        (0x0030 <= ch && ch <= 0x0039) ||
136        (0x0041 <= ch && ch <= 0x005A) ||
137        (0x0061 <= ch && ch <= 0x007A)
138      ){
139        return true;
140    }
141    return false;
142}
143
144#endif /* #if !UCONFIG_NO_TRANSLITERATION */
145#else
146class NamePrepTransform {
147};
148#endif /* #if !UCONFIG_NO_IDNA */
149
150#endif
151
152/*
153 * Hey, Emacs, please set the following:
154 *
155 * Local Variables:
156 * indent-tabs-mode: nil
157 * End:
158 *
159 */
160