11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci********************************************************************** 51cb0ef41Sopenharmony_ci* Copyright (c) 2001-2011, International Business Machines 61cb0ef41Sopenharmony_ci* Corporation and others. All Rights Reserved. 71cb0ef41Sopenharmony_ci********************************************************************** 81cb0ef41Sopenharmony_ci* Date Name Description 91cb0ef41Sopenharmony_ci* 11/19/2001 aliu Creation. 101cb0ef41Sopenharmony_ci********************************************************************** 111cb0ef41Sopenharmony_ci*/ 121cb0ef41Sopenharmony_ci 131cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ci#if !UCONFIG_NO_TRANSLITERATION 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ci#include "unicode/utf16.h" 181cb0ef41Sopenharmony_ci#include "esctrn.h" 191cb0ef41Sopenharmony_ci#include "util.h" 201cb0ef41Sopenharmony_ci 211cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN 221cb0ef41Sopenharmony_ci 231cb0ef41Sopenharmony_cistatic const char16_t UNIPRE[] = {85,43,0}; // "U+" 241cb0ef41Sopenharmony_cistatic const char16_t BS_u[] = {92,117,0}; // "\\u" 251cb0ef41Sopenharmony_cistatic const char16_t BS_U[] = {92,85,0}; // "\\U" 261cb0ef41Sopenharmony_cistatic const char16_t XMLPRE[] = {38,35,120,0}; // "&#x" 271cb0ef41Sopenharmony_cistatic const char16_t XML10PRE[] = {38,35,0}; // "&#" 281cb0ef41Sopenharmony_cistatic const char16_t PERLPRE[] = {92,120,123,0}; // "\\x{" 291cb0ef41Sopenharmony_cistatic const char16_t SEMI[] = {59,0}; // ";" 301cb0ef41Sopenharmony_cistatic const char16_t RBRACE[] = {125,0}; // "}" 311cb0ef41Sopenharmony_ci 321cb0ef41Sopenharmony_ciUOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) 331cb0ef41Sopenharmony_ci 341cb0ef41Sopenharmony_ci/** 351cb0ef41Sopenharmony_ci * Factory methods 361cb0ef41Sopenharmony_ci */ 371cb0ef41Sopenharmony_cistatic Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { 381cb0ef41Sopenharmony_ci // Unicode: "U+10FFFF" hex, min=4, max=6 391cb0ef41Sopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, UNIPRE, 2), UnicodeString(), 16, 4, true, nullptr); 401cb0ef41Sopenharmony_ci} 411cb0ef41Sopenharmony_cistatic Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { 421cb0ef41Sopenharmony_ci // Java: "\\uFFFF" hex, min=4, max=4 431cb0ef41Sopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, false, nullptr); 441cb0ef41Sopenharmony_ci} 451cb0ef41Sopenharmony_cistatic Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { 461cb0ef41Sopenharmony_ci // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 471cb0ef41Sopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, true, 481cb0ef41Sopenharmony_ci new EscapeTransliterator(UnicodeString(), UnicodeString(true, BS_U, 2), UnicodeString(), 16, 8, true, nullptr)); 491cb0ef41Sopenharmony_ci} 501cb0ef41Sopenharmony_cistatic Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { 511cb0ef41Sopenharmony_ci // XML: "" hex, min=1, max=6 521cb0ef41Sopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, true, nullptr); 531cb0ef41Sopenharmony_ci} 541cb0ef41Sopenharmony_cistatic Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { 551cb0ef41Sopenharmony_ci // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") 561cb0ef41Sopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, true, nullptr); 571cb0ef41Sopenharmony_ci} 581cb0ef41Sopenharmony_cistatic Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { 591cb0ef41Sopenharmony_ci // Perl: "\\x{263A}" hex, min=1, max=6 601cb0ef41Sopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, true, nullptr); 611cb0ef41Sopenharmony_ci} 621cb0ef41Sopenharmony_ci 631cb0ef41Sopenharmony_ci/** 641cb0ef41Sopenharmony_ci * Registers standard variants with the system. Called by 651cb0ef41Sopenharmony_ci * Transliterator during initialization. 661cb0ef41Sopenharmony_ci */ 671cb0ef41Sopenharmony_civoid EscapeTransliterator::registerIDs() { 681cb0ef41Sopenharmony_ci Token t = integerToken(0); 691cb0ef41Sopenharmony_ci 701cb0ef41Sopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); 711cb0ef41Sopenharmony_ci 721cb0ef41Sopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); 731cb0ef41Sopenharmony_ci 741cb0ef41Sopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); 751cb0ef41Sopenharmony_ci 761cb0ef41Sopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); 791cb0ef41Sopenharmony_ci 801cb0ef41Sopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); 811cb0ef41Sopenharmony_ci 821cb0ef41Sopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); 831cb0ef41Sopenharmony_ci} 841cb0ef41Sopenharmony_ci 851cb0ef41Sopenharmony_ci/** 861cb0ef41Sopenharmony_ci * Constructs an escape transliterator with the given ID and 871cb0ef41Sopenharmony_ci * parameters. See the class member documentation for details. 881cb0ef41Sopenharmony_ci */ 891cb0ef41Sopenharmony_ciEscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, 901cb0ef41Sopenharmony_ci const UnicodeString& _prefix, const UnicodeString& _suffix, 911cb0ef41Sopenharmony_ci int32_t _radix, int32_t _minDigits, 921cb0ef41Sopenharmony_ci UBool _grokSupplementals, 931cb0ef41Sopenharmony_ci EscapeTransliterator* adoptedSupplementalHandler) : 941cb0ef41Sopenharmony_ci Transliterator(newID, nullptr) 951cb0ef41Sopenharmony_ci{ 961cb0ef41Sopenharmony_ci this->prefix = _prefix; 971cb0ef41Sopenharmony_ci this->suffix = _suffix; 981cb0ef41Sopenharmony_ci this->radix = _radix; 991cb0ef41Sopenharmony_ci this->minDigits = _minDigits; 1001cb0ef41Sopenharmony_ci this->grokSupplementals = _grokSupplementals; 1011cb0ef41Sopenharmony_ci this->supplementalHandler = adoptedSupplementalHandler; 1021cb0ef41Sopenharmony_ci} 1031cb0ef41Sopenharmony_ci 1041cb0ef41Sopenharmony_ci/** 1051cb0ef41Sopenharmony_ci * Copy constructor. 1061cb0ef41Sopenharmony_ci */ 1071cb0ef41Sopenharmony_ciEscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : 1081cb0ef41Sopenharmony_ci Transliterator(o), 1091cb0ef41Sopenharmony_ci prefix(o.prefix), 1101cb0ef41Sopenharmony_ci suffix(o.suffix), 1111cb0ef41Sopenharmony_ci radix(o.radix), 1121cb0ef41Sopenharmony_ci minDigits(o.minDigits), 1131cb0ef41Sopenharmony_ci grokSupplementals(o.grokSupplementals) { 1141cb0ef41Sopenharmony_ci supplementalHandler = (o.supplementalHandler != 0) ? 1151cb0ef41Sopenharmony_ci new EscapeTransliterator(*o.supplementalHandler) : nullptr; 1161cb0ef41Sopenharmony_ci} 1171cb0ef41Sopenharmony_ci 1181cb0ef41Sopenharmony_ciEscapeTransliterator::~EscapeTransliterator() { 1191cb0ef41Sopenharmony_ci delete supplementalHandler; 1201cb0ef41Sopenharmony_ci} 1211cb0ef41Sopenharmony_ci 1221cb0ef41Sopenharmony_ci/** 1231cb0ef41Sopenharmony_ci * Transliterator API. 1241cb0ef41Sopenharmony_ci */ 1251cb0ef41Sopenharmony_ciEscapeTransliterator* EscapeTransliterator::clone() const { 1261cb0ef41Sopenharmony_ci return new EscapeTransliterator(*this); 1271cb0ef41Sopenharmony_ci} 1281cb0ef41Sopenharmony_ci 1291cb0ef41Sopenharmony_ci/** 1301cb0ef41Sopenharmony_ci * Implements {@link Transliterator#handleTransliterate}. 1311cb0ef41Sopenharmony_ci */ 1321cb0ef41Sopenharmony_civoid EscapeTransliterator::handleTransliterate(Replaceable& text, 1331cb0ef41Sopenharmony_ci UTransPosition& pos, 1341cb0ef41Sopenharmony_ci UBool /*isIncremental*/) const 1351cb0ef41Sopenharmony_ci{ 1361cb0ef41Sopenharmony_ci /* TODO: Verify that isIncremental can be ignored */ 1371cb0ef41Sopenharmony_ci int32_t start = pos.start; 1381cb0ef41Sopenharmony_ci int32_t limit = pos.limit; 1391cb0ef41Sopenharmony_ci 1401cb0ef41Sopenharmony_ci UnicodeString buf(prefix); 1411cb0ef41Sopenharmony_ci int32_t prefixLen = prefix.length(); 1421cb0ef41Sopenharmony_ci UBool redoPrefix = false; 1431cb0ef41Sopenharmony_ci 1441cb0ef41Sopenharmony_ci while (start < limit) { 1451cb0ef41Sopenharmony_ci int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); 1461cb0ef41Sopenharmony_ci int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; 1471cb0ef41Sopenharmony_ci 1481cb0ef41Sopenharmony_ci if ((c & 0xFFFF0000) != 0 && supplementalHandler != nullptr) { 1491cb0ef41Sopenharmony_ci buf.truncate(0); 1501cb0ef41Sopenharmony_ci buf.append(supplementalHandler->prefix); 1511cb0ef41Sopenharmony_ci ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, 1521cb0ef41Sopenharmony_ci supplementalHandler->minDigits); 1531cb0ef41Sopenharmony_ci buf.append(supplementalHandler->suffix); 1541cb0ef41Sopenharmony_ci redoPrefix = true; 1551cb0ef41Sopenharmony_ci } else { 1561cb0ef41Sopenharmony_ci if (redoPrefix) { 1571cb0ef41Sopenharmony_ci buf.truncate(0); 1581cb0ef41Sopenharmony_ci buf.append(prefix); 1591cb0ef41Sopenharmony_ci redoPrefix = false; 1601cb0ef41Sopenharmony_ci } else { 1611cb0ef41Sopenharmony_ci buf.truncate(prefixLen); 1621cb0ef41Sopenharmony_ci } 1631cb0ef41Sopenharmony_ci ICU_Utility::appendNumber(buf, c, radix, minDigits); 1641cb0ef41Sopenharmony_ci buf.append(suffix); 1651cb0ef41Sopenharmony_ci } 1661cb0ef41Sopenharmony_ci 1671cb0ef41Sopenharmony_ci text.handleReplaceBetween(start, start + charLen, buf); 1681cb0ef41Sopenharmony_ci start += buf.length(); 1691cb0ef41Sopenharmony_ci limit += buf.length() - charLen; 1701cb0ef41Sopenharmony_ci } 1711cb0ef41Sopenharmony_ci 1721cb0ef41Sopenharmony_ci pos.contextLimit += limit - pos.limit; 1731cb0ef41Sopenharmony_ci pos.limit = limit; 1741cb0ef41Sopenharmony_ci pos.start = start; 1751cb0ef41Sopenharmony_ci} 1761cb0ef41Sopenharmony_ci 1771cb0ef41Sopenharmony_ciU_NAMESPACE_END 1781cb0ef41Sopenharmony_ci 1791cb0ef41Sopenharmony_ci#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1801cb0ef41Sopenharmony_ci 1811cb0ef41Sopenharmony_ci//eof 182