11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others.
21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html
31cb0ef41Sopenharmony_ci/*
41cb0ef41Sopenharmony_ci**********************************************************************
51cb0ef41Sopenharmony_ci*   Copyright (c) 2001-2011, International Business Machines
61cb0ef41Sopenharmony_ci*   Corporation and others.  All Rights Reserved.
71cb0ef41Sopenharmony_ci**********************************************************************
81cb0ef41Sopenharmony_ci*   Date        Name        Description
91cb0ef41Sopenharmony_ci*   11/19/2001  aliu        Creation.
101cb0ef41Sopenharmony_ci**********************************************************************
111cb0ef41Sopenharmony_ci*/
121cb0ef41Sopenharmony_ci
131cb0ef41Sopenharmony_ci#include "unicode/utypes.h"
141cb0ef41Sopenharmony_ci
151cb0ef41Sopenharmony_ci#if !UCONFIG_NO_TRANSLITERATION
161cb0ef41Sopenharmony_ci
171cb0ef41Sopenharmony_ci#include "unicode/utf16.h"
181cb0ef41Sopenharmony_ci#include "esctrn.h"
191cb0ef41Sopenharmony_ci#include "util.h"
201cb0ef41Sopenharmony_ci
211cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN
221cb0ef41Sopenharmony_ci
231cb0ef41Sopenharmony_cistatic const char16_t UNIPRE[] = {85,43,0}; // "U+"
241cb0ef41Sopenharmony_cistatic const char16_t BS_u[] = {92,117,0}; // "\\u"
251cb0ef41Sopenharmony_cistatic const char16_t BS_U[] = {92,85,0}; // "\\U"
261cb0ef41Sopenharmony_cistatic const char16_t XMLPRE[] = {38,35,120,0}; // "&#x"
271cb0ef41Sopenharmony_cistatic const char16_t XML10PRE[] = {38,35,0}; // "&#"
281cb0ef41Sopenharmony_cistatic const char16_t PERLPRE[] = {92,120,123,0}; // "\\x{"
291cb0ef41Sopenharmony_cistatic const char16_t SEMI[] = {59,0}; // ";"
301cb0ef41Sopenharmony_cistatic const char16_t RBRACE[] = {125,0}; // "}"
311cb0ef41Sopenharmony_ci
321cb0ef41Sopenharmony_ciUOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
331cb0ef41Sopenharmony_ci
341cb0ef41Sopenharmony_ci/**
351cb0ef41Sopenharmony_ci * Factory methods
361cb0ef41Sopenharmony_ci */
371cb0ef41Sopenharmony_cistatic Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
381cb0ef41Sopenharmony_ci    // Unicode: "U+10FFFF" hex, min=4, max=6
391cb0ef41Sopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, UNIPRE, 2), UnicodeString(), 16, 4, true, nullptr);
401cb0ef41Sopenharmony_ci}
411cb0ef41Sopenharmony_cistatic Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
421cb0ef41Sopenharmony_ci    // Java: "\\uFFFF" hex, min=4, max=4
431cb0ef41Sopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, false, nullptr);
441cb0ef41Sopenharmony_ci}
451cb0ef41Sopenharmony_cistatic Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
461cb0ef41Sopenharmony_ci    // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
471cb0ef41Sopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, true,
481cb0ef41Sopenharmony_ci             new EscapeTransliterator(UnicodeString(), UnicodeString(true, BS_U, 2), UnicodeString(), 16, 8, true, nullptr));
491cb0ef41Sopenharmony_ci}
501cb0ef41Sopenharmony_cistatic Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
511cb0ef41Sopenharmony_ci    // XML: "" hex, min=1, max=6
521cb0ef41Sopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, true, nullptr);
531cb0ef41Sopenharmony_ci}
541cb0ef41Sopenharmony_cistatic Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
551cb0ef41Sopenharmony_ci    // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
561cb0ef41Sopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, true, nullptr);
571cb0ef41Sopenharmony_ci}
581cb0ef41Sopenharmony_cistatic Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
591cb0ef41Sopenharmony_ci    // Perl: "\\x{263A}" hex, min=1, max=6
601cb0ef41Sopenharmony_ci    return new EscapeTransliterator(ID, UnicodeString(true, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, true, nullptr);
611cb0ef41Sopenharmony_ci}
621cb0ef41Sopenharmony_ci
631cb0ef41Sopenharmony_ci/**
641cb0ef41Sopenharmony_ci * Registers standard variants with the system.  Called by
651cb0ef41Sopenharmony_ci * Transliterator during initialization.
661cb0ef41Sopenharmony_ci */
671cb0ef41Sopenharmony_civoid EscapeTransliterator::registerIDs() {
681cb0ef41Sopenharmony_ci    Token t = integerToken(0);
691cb0ef41Sopenharmony_ci
701cb0ef41Sopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
711cb0ef41Sopenharmony_ci
721cb0ef41Sopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
731cb0ef41Sopenharmony_ci
741cb0ef41Sopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
751cb0ef41Sopenharmony_ci
761cb0ef41Sopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
771cb0ef41Sopenharmony_ci
781cb0ef41Sopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
791cb0ef41Sopenharmony_ci
801cb0ef41Sopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
811cb0ef41Sopenharmony_ci
821cb0ef41Sopenharmony_ci    Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
831cb0ef41Sopenharmony_ci}
841cb0ef41Sopenharmony_ci
851cb0ef41Sopenharmony_ci/**
861cb0ef41Sopenharmony_ci * Constructs an escape transliterator with the given ID and
871cb0ef41Sopenharmony_ci * parameters.  See the class member documentation for details.
881cb0ef41Sopenharmony_ci */
891cb0ef41Sopenharmony_ciEscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
901cb0ef41Sopenharmony_ci                         const UnicodeString& _prefix, const UnicodeString& _suffix,
911cb0ef41Sopenharmony_ci                         int32_t _radix, int32_t _minDigits,
921cb0ef41Sopenharmony_ci                         UBool _grokSupplementals,
931cb0ef41Sopenharmony_ci                         EscapeTransliterator* adoptedSupplementalHandler) :
941cb0ef41Sopenharmony_ci    Transliterator(newID, nullptr)
951cb0ef41Sopenharmony_ci{
961cb0ef41Sopenharmony_ci    this->prefix = _prefix;
971cb0ef41Sopenharmony_ci    this->suffix = _suffix;
981cb0ef41Sopenharmony_ci    this->radix = _radix;
991cb0ef41Sopenharmony_ci    this->minDigits = _minDigits;
1001cb0ef41Sopenharmony_ci    this->grokSupplementals = _grokSupplementals;
1011cb0ef41Sopenharmony_ci    this->supplementalHandler = adoptedSupplementalHandler;
1021cb0ef41Sopenharmony_ci}
1031cb0ef41Sopenharmony_ci
1041cb0ef41Sopenharmony_ci/**
1051cb0ef41Sopenharmony_ci * Copy constructor.
1061cb0ef41Sopenharmony_ci */
1071cb0ef41Sopenharmony_ciEscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
1081cb0ef41Sopenharmony_ci    Transliterator(o),
1091cb0ef41Sopenharmony_ci    prefix(o.prefix),
1101cb0ef41Sopenharmony_ci    suffix(o.suffix),
1111cb0ef41Sopenharmony_ci    radix(o.radix),
1121cb0ef41Sopenharmony_ci    minDigits(o.minDigits),
1131cb0ef41Sopenharmony_ci    grokSupplementals(o.grokSupplementals) {
1141cb0ef41Sopenharmony_ci    supplementalHandler = (o.supplementalHandler != 0) ?
1151cb0ef41Sopenharmony_ci        new EscapeTransliterator(*o.supplementalHandler) : nullptr;
1161cb0ef41Sopenharmony_ci}
1171cb0ef41Sopenharmony_ci
1181cb0ef41Sopenharmony_ciEscapeTransliterator::~EscapeTransliterator() {
1191cb0ef41Sopenharmony_ci    delete supplementalHandler;
1201cb0ef41Sopenharmony_ci}
1211cb0ef41Sopenharmony_ci
1221cb0ef41Sopenharmony_ci/**
1231cb0ef41Sopenharmony_ci * Transliterator API.
1241cb0ef41Sopenharmony_ci */
1251cb0ef41Sopenharmony_ciEscapeTransliterator* EscapeTransliterator::clone() const {
1261cb0ef41Sopenharmony_ci    return new EscapeTransliterator(*this);
1271cb0ef41Sopenharmony_ci}
1281cb0ef41Sopenharmony_ci
1291cb0ef41Sopenharmony_ci/**
1301cb0ef41Sopenharmony_ci * Implements {@link Transliterator#handleTransliterate}.
1311cb0ef41Sopenharmony_ci */
1321cb0ef41Sopenharmony_civoid EscapeTransliterator::handleTransliterate(Replaceable& text,
1331cb0ef41Sopenharmony_ci                                               UTransPosition& pos,
1341cb0ef41Sopenharmony_ci                                               UBool /*isIncremental*/) const
1351cb0ef41Sopenharmony_ci{
1361cb0ef41Sopenharmony_ci    /* TODO: Verify that isIncremental can be ignored */
1371cb0ef41Sopenharmony_ci    int32_t start = pos.start;
1381cb0ef41Sopenharmony_ci    int32_t limit = pos.limit;
1391cb0ef41Sopenharmony_ci
1401cb0ef41Sopenharmony_ci    UnicodeString buf(prefix);
1411cb0ef41Sopenharmony_ci    int32_t prefixLen = prefix.length();
1421cb0ef41Sopenharmony_ci    UBool redoPrefix = false;
1431cb0ef41Sopenharmony_ci
1441cb0ef41Sopenharmony_ci    while (start < limit) {
1451cb0ef41Sopenharmony_ci        int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
1461cb0ef41Sopenharmony_ci        int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
1471cb0ef41Sopenharmony_ci
1481cb0ef41Sopenharmony_ci        if ((c & 0xFFFF0000) != 0 && supplementalHandler != nullptr) {
1491cb0ef41Sopenharmony_ci            buf.truncate(0);
1501cb0ef41Sopenharmony_ci            buf.append(supplementalHandler->prefix);
1511cb0ef41Sopenharmony_ci            ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
1521cb0ef41Sopenharmony_ci                                  supplementalHandler->minDigits);
1531cb0ef41Sopenharmony_ci            buf.append(supplementalHandler->suffix);
1541cb0ef41Sopenharmony_ci            redoPrefix = true;
1551cb0ef41Sopenharmony_ci        } else {
1561cb0ef41Sopenharmony_ci            if (redoPrefix) {
1571cb0ef41Sopenharmony_ci                buf.truncate(0);
1581cb0ef41Sopenharmony_ci                buf.append(prefix);
1591cb0ef41Sopenharmony_ci                redoPrefix = false;
1601cb0ef41Sopenharmony_ci            } else {
1611cb0ef41Sopenharmony_ci                buf.truncate(prefixLen);
1621cb0ef41Sopenharmony_ci            }
1631cb0ef41Sopenharmony_ci            ICU_Utility::appendNumber(buf, c, radix, minDigits);
1641cb0ef41Sopenharmony_ci            buf.append(suffix);
1651cb0ef41Sopenharmony_ci        }
1661cb0ef41Sopenharmony_ci
1671cb0ef41Sopenharmony_ci        text.handleReplaceBetween(start, start + charLen, buf);
1681cb0ef41Sopenharmony_ci        start += buf.length();
1691cb0ef41Sopenharmony_ci        limit += buf.length() - charLen;
1701cb0ef41Sopenharmony_ci    }
1711cb0ef41Sopenharmony_ci
1721cb0ef41Sopenharmony_ci    pos.contextLimit += limit - pos.limit;
1731cb0ef41Sopenharmony_ci    pos.limit = limit;
1741cb0ef41Sopenharmony_ci    pos.start = start;
1751cb0ef41Sopenharmony_ci}
1761cb0ef41Sopenharmony_ci
1771cb0ef41Sopenharmony_ciU_NAMESPACE_END
1781cb0ef41Sopenharmony_ci
1791cb0ef41Sopenharmony_ci#endif /* #if !UCONFIG_NO_TRANSLITERATION */
1801cb0ef41Sopenharmony_ci
1811cb0ef41Sopenharmony_ci//eof
182