xref: /third_party/icu/icu4c/source/i18n/esctrn.cpp (revision 2e5b6d6d)
12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
32e5b6d6dSopenharmony_ci/*
42e5b6d6dSopenharmony_ci**********************************************************************
52e5b6d6dSopenharmony_ci*   Copyright (c) 2001-2011, International Business Machines
62e5b6d6dSopenharmony_ci*   Corporation and others.  All Rights Reserved.
72e5b6d6dSopenharmony_ci**********************************************************************
82e5b6d6dSopenharmony_ci*   Date        Name        Description
92e5b6d6dSopenharmony_ci*   11/19/2001  aliu        Creation.
102e5b6d6dSopenharmony_ci**********************************************************************
112e5b6d6dSopenharmony_ci*/
122e5b6d6dSopenharmony_ci
132e5b6d6dSopenharmony_ci#include "unicode/utypes.h"
142e5b6d6dSopenharmony_ci
152e5b6d6dSopenharmony_ci#if !UCONFIG_NO_TRANSLITERATION
162e5b6d6dSopenharmony_ci
172e5b6d6dSopenharmony_ci#include "unicode/utf16.h"
182e5b6d6dSopenharmony_ci#include "esctrn.h"
192e5b6d6dSopenharmony_ci#include "util.h"
202e5b6d6dSopenharmony_ci
212e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN
222e5b6d6dSopenharmony_ci
232e5b6d6dSopenharmony_cistatic const UChar UNIPRE[] = {85,43,0}; // "U+"
242e5b6d6dSopenharmony_cistatic const UChar BS_u[] = {92,117,0}; // "\\u"
252e5b6d6dSopenharmony_cistatic const UChar BS_U[] = {92,85,0}; // "\\U"
262e5b6d6dSopenharmony_cistatic const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
272e5b6d6dSopenharmony_cistatic const UChar XML10PRE[] = {38,35,0}; // "&#"
282e5b6d6dSopenharmony_cistatic const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
292e5b6d6dSopenharmony_cistatic const UChar SEMI[] = {59,0}; // ";"
302e5b6d6dSopenharmony_cistatic const UChar RBRACE[] = {125,0}; // "}"
312e5b6d6dSopenharmony_ci
322e5b6d6dSopenharmony_ciUOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
332e5b6d6dSopenharmony_ci
342e5b6d6dSopenharmony_ci/**
352e5b6d6dSopenharmony_ci * Factory methods
362e5b6d6dSopenharmony_ci */
372e5b6d6dSopenharmony_cistatic Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
382e5b6d6dSopenharmony_ci    // Unicode: "U+10FFFF" hex, min=4, max=6
392e5b6d6dSopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, UNIPRE, 2), UnicodeString(), 16, 4, true, NULL);
402e5b6d6dSopenharmony_ci}
412e5b6d6dSopenharmony_cistatic Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
422e5b6d6dSopenharmony_ci    // Java: "\\uFFFF" hex, min=4, max=4
432e5b6d6dSopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, false, NULL);
442e5b6d6dSopenharmony_ci}
452e5b6d6dSopenharmony_cistatic Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
462e5b6d6dSopenharmony_ci    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
472e5b6d6dSopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, true,
482e5b6d6dSopenharmony_ci             new EscapeTransliterator(UnicodeString(), UnicodeString(true, BS_U, 2), UnicodeString(), 16, 8, true, NULL));
492e5b6d6dSopenharmony_ci}
502e5b6d6dSopenharmony_cistatic Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
512e5b6d6dSopenharmony_ci    // XML: "" hex, min=1, max=6
522e5b6d6dSopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, true, NULL);
532e5b6d6dSopenharmony_ci}
542e5b6d6dSopenharmony_cistatic Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
552e5b6d6dSopenharmony_ci    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
562e5b6d6dSopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, true, NULL);
572e5b6d6dSopenharmony_ci}
582e5b6d6dSopenharmony_cistatic Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
592e5b6d6dSopenharmony_ci    // Perl: "\\x{263A}" hex, min=1, max=6
602e5b6d6dSopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, true, NULL);
612e5b6d6dSopenharmony_ci}
622e5b6d6dSopenharmony_ci
632e5b6d6dSopenharmony_ci/**
642e5b6d6dSopenharmony_ci * Registers standard variants with the system.  Called by
652e5b6d6dSopenharmony_ci * Transliterator during initialization.
662e5b6d6dSopenharmony_ci */
672e5b6d6dSopenharmony_civoid EscapeTransliterator::registerIDs() {
682e5b6d6dSopenharmony_ci    Token t = integerToken(0);
692e5b6d6dSopenharmony_ci
702e5b6d6dSopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
712e5b6d6dSopenharmony_ci
722e5b6d6dSopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
732e5b6d6dSopenharmony_ci
742e5b6d6dSopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
752e5b6d6dSopenharmony_ci
762e5b6d6dSopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
772e5b6d6dSopenharmony_ci
782e5b6d6dSopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
792e5b6d6dSopenharmony_ci
802e5b6d6dSopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
812e5b6d6dSopenharmony_ci
822e5b6d6dSopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
832e5b6d6dSopenharmony_ci}
842e5b6d6dSopenharmony_ci
852e5b6d6dSopenharmony_ci/**
862e5b6d6dSopenharmony_ci * Constructs an escape transliterator with the given ID and
872e5b6d6dSopenharmony_ci * parameters.  See the class member documentation for details.
882e5b6d6dSopenharmony_ci */
892e5b6d6dSopenharmony_ciEscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
902e5b6d6dSopenharmony_ci                         const UnicodeString& _prefix, const UnicodeString& _suffix,
912e5b6d6dSopenharmony_ci                         int32_t _radix, int32_t _minDigits,
922e5b6d6dSopenharmony_ci                         UBool _grokSupplementals,
932e5b6d6dSopenharmony_ci                         EscapeTransliterator* adoptedSupplementalHandler) :
942e5b6d6dSopenharmony_ci    Transliterator(newID, NULL)
952e5b6d6dSopenharmony_ci{
962e5b6d6dSopenharmony_ci    this->prefix = _prefix;
972e5b6d6dSopenharmony_ci    this->suffix = _suffix;
982e5b6d6dSopenharmony_ci    this->radix = _radix;
992e5b6d6dSopenharmony_ci    this->minDigits = _minDigits;
1002e5b6d6dSopenharmony_ci    this->grokSupplementals = _grokSupplementals;
1012e5b6d6dSopenharmony_ci    this->supplementalHandler = adoptedSupplementalHandler;
1022e5b6d6dSopenharmony_ci}
1032e5b6d6dSopenharmony_ci
1042e5b6d6dSopenharmony_ci/**
1052e5b6d6dSopenharmony_ci * Copy constructor.
1062e5b6d6dSopenharmony_ci */
1072e5b6d6dSopenharmony_ciEscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
1082e5b6d6dSopenharmony_ci    Transliterator(o),
1092e5b6d6dSopenharmony_ci    prefix(o.prefix),
1102e5b6d6dSopenharmony_ci    suffix(o.suffix),
1112e5b6d6dSopenharmony_ci    radix(o.radix),
1122e5b6d6dSopenharmony_ci    minDigits(o.minDigits),
1132e5b6d6dSopenharmony_ci    grokSupplementals(o.grokSupplementals) {
1142e5b6d6dSopenharmony_ci    supplementalHandler = (o.supplementalHandler != 0) ?
1152e5b6d6dSopenharmony_ci        new EscapeTransliterator(*o.supplementalHandler) : NULL;
1162e5b6d6dSopenharmony_ci}
1172e5b6d6dSopenharmony_ci
1182e5b6d6dSopenharmony_ciEscapeTransliterator::~EscapeTransliterator() {
1192e5b6d6dSopenharmony_ci    delete supplementalHandler;
1202e5b6d6dSopenharmony_ci}
1212e5b6d6dSopenharmony_ci
1222e5b6d6dSopenharmony_ci/**
1232e5b6d6dSopenharmony_ci * Transliterator API.
1242e5b6d6dSopenharmony_ci */
1252e5b6d6dSopenharmony_ciEscapeTransliterator* EscapeTransliterator::clone() const {
1262e5b6d6dSopenharmony_ci    return new EscapeTransliterator(*this);
1272e5b6d6dSopenharmony_ci}
1282e5b6d6dSopenharmony_ci
1292e5b6d6dSopenharmony_ci/**
1302e5b6d6dSopenharmony_ci * Implements {@link Transliterator#handleTransliterate}.
1312e5b6d6dSopenharmony_ci */
1322e5b6d6dSopenharmony_civoid EscapeTransliterator::handleTransliterate(Replaceable& text,
1332e5b6d6dSopenharmony_ci                                               UTransPosition& pos,
1342e5b6d6dSopenharmony_ci                                               UBool /*isIncremental*/) const
1352e5b6d6dSopenharmony_ci{
1362e5b6d6dSopenharmony_ci    /* TODO: Verify that isIncremental can be ignored */
1372e5b6d6dSopenharmony_ci    int32_t start = pos.start;
1382e5b6d6dSopenharmony_ci    int32_t limit = pos.limit;
1392e5b6d6dSopenharmony_ci
1402e5b6d6dSopenharmony_ci    UnicodeString buf(prefix);
1412e5b6d6dSopenharmony_ci    int32_t prefixLen = prefix.length();
1422e5b6d6dSopenharmony_ci    UBool redoPrefix = false;
1432e5b6d6dSopenharmony_ci
1442e5b6d6dSopenharmony_ci    while (start < limit) {
1452e5b6d6dSopenharmony_ci        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
1462e5b6d6dSopenharmony_ci        int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
1472e5b6d6dSopenharmony_ci
1482e5b6d6dSopenharmony_ci        if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
1492e5b6d6dSopenharmony_ci            buf.truncate(0);
1502e5b6d6dSopenharmony_ci            buf.append(supplementalHandler->prefix);
1512e5b6d6dSopenharmony_ci            ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
1522e5b6d6dSopenharmony_ci                                  supplementalHandler->minDigits);
1532e5b6d6dSopenharmony_ci            buf.append(supplementalHandler->suffix);
1542e5b6d6dSopenharmony_ci            redoPrefix = true;
1552e5b6d6dSopenharmony_ci        } else {
1562e5b6d6dSopenharmony_ci            if (redoPrefix) {
1572e5b6d6dSopenharmony_ci                buf.truncate(0);
1582e5b6d6dSopenharmony_ci                buf.append(prefix);
1592e5b6d6dSopenharmony_ci                redoPrefix = false;
1602e5b6d6dSopenharmony_ci            } else {
1612e5b6d6dSopenharmony_ci                buf.truncate(prefixLen);
1622e5b6d6dSopenharmony_ci            }
1632e5b6d6dSopenharmony_ci            ICU_Utility::appendNumber(buf, c, radix, minDigits);
1642e5b6d6dSopenharmony_ci            buf.append(suffix);
1652e5b6d6dSopenharmony_ci        }
1662e5b6d6dSopenharmony_ci
1672e5b6d6dSopenharmony_ci        text.handleReplaceBetween(start, start + charLen, buf);
1682e5b6d6dSopenharmony_ci        start += buf.length();
1692e5b6d6dSopenharmony_ci        limit += buf.length() - charLen;
1702e5b6d6dSopenharmony_ci    }
1712e5b6d6dSopenharmony_ci
1722e5b6d6dSopenharmony_ci    pos.contextLimit += limit - pos.limit;
1732e5b6d6dSopenharmony_ci    pos.limit = limit;
1742e5b6d6dSopenharmony_ci    pos.start = start;
1752e5b6d6dSopenharmony_ci}
1762e5b6d6dSopenharmony_ci
1772e5b6d6dSopenharmony_ciU_NAMESPACE_END
1782e5b6d6dSopenharmony_ci
1792e5b6d6dSopenharmony_ci#endif /* #if !UCONFIG_NO_TRANSLITERATION */
1802e5b6d6dSopenharmony_ci
1812e5b6d6dSopenharmony_ci//eof
182