xref: /third_party/icu/icu4c/source/i18n/esctrn.cpp (revision 2e5b6d6d)
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5*   Copyright (c) 2001-2011, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7**********************************************************************
8*   Date        Name        Description
9*   11/19/2001  aliu        Creation.
10**********************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_TRANSLITERATION
16
17#include "unicode/utf16.h"
18#include "esctrn.h"
19#include "util.h"
20
21U_NAMESPACE_BEGIN
22
23static const UChar UNIPRE[] = {85,43,0}; // "U+"
24static const UChar BS_u[] = {92,117,0}; // "\\u"
25static const UChar BS_U[] = {92,85,0}; // "\\U"
26static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
27static const UChar XML10PRE[] = {38,35,0}; // "&#"
28static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
29static const UChar SEMI[] = {59,0}; // ";"
30static const UChar RBRACE[] = {125,0}; // "}"
31
32UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
33
34/**
35 * Factory methods
36 */
37static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
38    // Unicode: "U+10FFFF" hex, min=4, max=6
39    return new EscapeTransliterator(ID, UnicodeString(true, UNIPRE, 2), UnicodeString(), 16, 4, true, NULL);
40}
41static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
42    // Java: "\\uFFFF" hex, min=4, max=4
43    return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, false, NULL);
44}
45static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
46    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
47    return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, true,
48             new EscapeTransliterator(UnicodeString(), UnicodeString(true, BS_U, 2), UnicodeString(), 16, 8, true, NULL));
49}
50static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
51    // XML: "" hex, min=1, max=6
52    return new EscapeTransliterator(ID, UnicodeString(true, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, true, NULL);
53}
54static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
55    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
56    return new EscapeTransliterator(ID, UnicodeString(true, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, true, NULL);
57}
58static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
59    // Perl: "\\x{263A}" hex, min=1, max=6
60    return new EscapeTransliterator(ID, UnicodeString(true, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, true, NULL);
61}
62
63/**
64 * Registers standard variants with the system.  Called by
65 * Transliterator during initialization.
66 */
67void EscapeTransliterator::registerIDs() {
68    Token t = integerToken(0);
69
70    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
71
72    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
73
74    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
75
76    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
77
78    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
79
80    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
81
82    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
83}
84
85/**
86 * Constructs an escape transliterator with the given ID and
87 * parameters.  See the class member documentation for details.
88 */
89EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
90                         const UnicodeString& _prefix, const UnicodeString& _suffix,
91                         int32_t _radix, int32_t _minDigits,
92                         UBool _grokSupplementals,
93                         EscapeTransliterator* adoptedSupplementalHandler) :
94    Transliterator(newID, NULL)
95{
96    this->prefix = _prefix;
97    this->suffix = _suffix;
98    this->radix = _radix;
99    this->minDigits = _minDigits;
100    this->grokSupplementals = _grokSupplementals;
101    this->supplementalHandler = adoptedSupplementalHandler;
102}
103
104/**
105 * Copy constructor.
106 */
107EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
108    Transliterator(o),
109    prefix(o.prefix),
110    suffix(o.suffix),
111    radix(o.radix),
112    minDigits(o.minDigits),
113    grokSupplementals(o.grokSupplementals) {
114    supplementalHandler = (o.supplementalHandler != 0) ?
115        new EscapeTransliterator(*o.supplementalHandler) : NULL;
116}
117
118EscapeTransliterator::~EscapeTransliterator() {
119    delete supplementalHandler;
120}
121
122/**
123 * Transliterator API.
124 */
125EscapeTransliterator* EscapeTransliterator::clone() const {
126    return new EscapeTransliterator(*this);
127}
128
129/**
130 * Implements {@link Transliterator#handleTransliterate}.
131 */
132void EscapeTransliterator::handleTransliterate(Replaceable& text,
133                                               UTransPosition& pos,
134                                               UBool /*isIncremental*/) const
135{
136    /* TODO: Verify that isIncremental can be ignored */
137    int32_t start = pos.start;
138    int32_t limit = pos.limit;
139
140    UnicodeString buf(prefix);
141    int32_t prefixLen = prefix.length();
142    UBool redoPrefix = false;
143
144    while (start < limit) {
145        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
146        int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
147
148        if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
149            buf.truncate(0);
150            buf.append(supplementalHandler->prefix);
151            ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
152                                  supplementalHandler->minDigits);
153            buf.append(supplementalHandler->suffix);
154            redoPrefix = true;
155        } else {
156            if (redoPrefix) {
157                buf.truncate(0);
158                buf.append(prefix);
159                redoPrefix = false;
160            } else {
161                buf.truncate(prefixLen);
162            }
163            ICU_Utility::appendNumber(buf, c, radix, minDigits);
164            buf.append(suffix);
165        }
166
167        text.handleReplaceBetween(start, start + charLen, buf);
168        start += buf.length();
169        limit += buf.length() - charLen;
170    }
171
172    pos.contextLimit += limit - pos.limit;
173    pos.limit = limit;
174    pos.start = start;
175}
176
177U_NAMESPACE_END
178
179#endif /* #if !UCONFIG_NO_TRANSLITERATION */
180
181//eof
182