12e5b6d6dSopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 22e5b6d6dSopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 32e5b6d6dSopenharmony_ci/* 42e5b6d6dSopenharmony_ci********************************************************************** 52e5b6d6dSopenharmony_ci* Copyright (c) 2001-2011, International Business Machines 62e5b6d6dSopenharmony_ci* Corporation and others. All Rights Reserved. 72e5b6d6dSopenharmony_ci********************************************************************** 82e5b6d6dSopenharmony_ci* Date Name Description 92e5b6d6dSopenharmony_ci* 11/19/2001 aliu Creation. 102e5b6d6dSopenharmony_ci********************************************************************** 112e5b6d6dSopenharmony_ci*/ 122e5b6d6dSopenharmony_ci 132e5b6d6dSopenharmony_ci#include "unicode/utypes.h" 142e5b6d6dSopenharmony_ci 152e5b6d6dSopenharmony_ci#if !UCONFIG_NO_TRANSLITERATION 162e5b6d6dSopenharmony_ci 172e5b6d6dSopenharmony_ci#include "unicode/utf16.h" 182e5b6d6dSopenharmony_ci#include "esctrn.h" 192e5b6d6dSopenharmony_ci#include "util.h" 202e5b6d6dSopenharmony_ci 212e5b6d6dSopenharmony_ciU_NAMESPACE_BEGIN 222e5b6d6dSopenharmony_ci 232e5b6d6dSopenharmony_cistatic const UChar UNIPRE[] = {85,43,0}; // "U+" 242e5b6d6dSopenharmony_cistatic const UChar BS_u[] = {92,117,0}; // "\\u" 252e5b6d6dSopenharmony_cistatic const UChar BS_U[] = {92,85,0}; // "\\U" 262e5b6d6dSopenharmony_cistatic const UChar XMLPRE[] = {38,35,120,0}; // "&#x" 272e5b6d6dSopenharmony_cistatic const UChar XML10PRE[] = {38,35,0}; // "&#" 282e5b6d6dSopenharmony_cistatic const UChar PERLPRE[] = {92,120,123,0}; // "\\x{" 292e5b6d6dSopenharmony_cistatic const UChar SEMI[] = {59,0}; // ";" 302e5b6d6dSopenharmony_cistatic const UChar RBRACE[] = {125,0}; // "}" 312e5b6d6dSopenharmony_ci 322e5b6d6dSopenharmony_ciUOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator) 332e5b6d6dSopenharmony_ci 342e5b6d6dSopenharmony_ci/** 352e5b6d6dSopenharmony_ci * Factory methods 362e5b6d6dSopenharmony_ci */ 372e5b6d6dSopenharmony_cistatic Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) { 382e5b6d6dSopenharmony_ci // Unicode: "U+10FFFF" hex, min=4, max=6 392e5b6d6dSopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, UNIPRE, 2), UnicodeString(), 16, 4, true, NULL); 402e5b6d6dSopenharmony_ci} 412e5b6d6dSopenharmony_cistatic Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) { 422e5b6d6dSopenharmony_ci // Java: "\\uFFFF" hex, min=4, max=4 432e5b6d6dSopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, false, NULL); 442e5b6d6dSopenharmony_ci} 452e5b6d6dSopenharmony_cistatic Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) { 462e5b6d6dSopenharmony_ci // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8 472e5b6d6dSopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, BS_u, 2), UnicodeString(), 16, 4, true, 482e5b6d6dSopenharmony_ci new EscapeTransliterator(UnicodeString(), UnicodeString(true, BS_U, 2), UnicodeString(), 16, 8, true, NULL)); 492e5b6d6dSopenharmony_ci} 502e5b6d6dSopenharmony_cistatic Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) { 512e5b6d6dSopenharmony_ci // XML: "" hex, min=1, max=6 522e5b6d6dSopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, true, NULL); 532e5b6d6dSopenharmony_ci} 542e5b6d6dSopenharmony_cistatic Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) { 552e5b6d6dSopenharmony_ci // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex") 562e5b6d6dSopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, true, NULL); 572e5b6d6dSopenharmony_ci} 582e5b6d6dSopenharmony_cistatic Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) { 592e5b6d6dSopenharmony_ci // Perl: "\\x{263A}" hex, min=1, max=6 602e5b6d6dSopenharmony_ci return new EscapeTransliterator(ID, UnicodeString(true, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, true, NULL); 612e5b6d6dSopenharmony_ci} 622e5b6d6dSopenharmony_ci 632e5b6d6dSopenharmony_ci/** 642e5b6d6dSopenharmony_ci * Registers standard variants with the system. Called by 652e5b6d6dSopenharmony_ci * Transliterator during initialization. 662e5b6d6dSopenharmony_ci */ 672e5b6d6dSopenharmony_civoid EscapeTransliterator::registerIDs() { 682e5b6d6dSopenharmony_ci Token t = integerToken(0); 692e5b6d6dSopenharmony_ci 702e5b6d6dSopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t); 712e5b6d6dSopenharmony_ci 722e5b6d6dSopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t); 732e5b6d6dSopenharmony_ci 742e5b6d6dSopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t); 752e5b6d6dSopenharmony_ci 762e5b6d6dSopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t); 772e5b6d6dSopenharmony_ci 782e5b6d6dSopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t); 792e5b6d6dSopenharmony_ci 802e5b6d6dSopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t); 812e5b6d6dSopenharmony_ci 822e5b6d6dSopenharmony_ci Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t); 832e5b6d6dSopenharmony_ci} 842e5b6d6dSopenharmony_ci 852e5b6d6dSopenharmony_ci/** 862e5b6d6dSopenharmony_ci * Constructs an escape transliterator with the given ID and 872e5b6d6dSopenharmony_ci * parameters. See the class member documentation for details. 882e5b6d6dSopenharmony_ci */ 892e5b6d6dSopenharmony_ciEscapeTransliterator::EscapeTransliterator(const UnicodeString& newID, 902e5b6d6dSopenharmony_ci const UnicodeString& _prefix, const UnicodeString& _suffix, 912e5b6d6dSopenharmony_ci int32_t _radix, int32_t _minDigits, 922e5b6d6dSopenharmony_ci UBool _grokSupplementals, 932e5b6d6dSopenharmony_ci EscapeTransliterator* adoptedSupplementalHandler) : 942e5b6d6dSopenharmony_ci Transliterator(newID, NULL) 952e5b6d6dSopenharmony_ci{ 962e5b6d6dSopenharmony_ci this->prefix = _prefix; 972e5b6d6dSopenharmony_ci this->suffix = _suffix; 982e5b6d6dSopenharmony_ci this->radix = _radix; 992e5b6d6dSopenharmony_ci this->minDigits = _minDigits; 1002e5b6d6dSopenharmony_ci this->grokSupplementals = _grokSupplementals; 1012e5b6d6dSopenharmony_ci this->supplementalHandler = adoptedSupplementalHandler; 1022e5b6d6dSopenharmony_ci} 1032e5b6d6dSopenharmony_ci 1042e5b6d6dSopenharmony_ci/** 1052e5b6d6dSopenharmony_ci * Copy constructor. 1062e5b6d6dSopenharmony_ci */ 1072e5b6d6dSopenharmony_ciEscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) : 1082e5b6d6dSopenharmony_ci Transliterator(o), 1092e5b6d6dSopenharmony_ci prefix(o.prefix), 1102e5b6d6dSopenharmony_ci suffix(o.suffix), 1112e5b6d6dSopenharmony_ci radix(o.radix), 1122e5b6d6dSopenharmony_ci minDigits(o.minDigits), 1132e5b6d6dSopenharmony_ci grokSupplementals(o.grokSupplementals) { 1142e5b6d6dSopenharmony_ci supplementalHandler = (o.supplementalHandler != 0) ? 1152e5b6d6dSopenharmony_ci new EscapeTransliterator(*o.supplementalHandler) : NULL; 1162e5b6d6dSopenharmony_ci} 1172e5b6d6dSopenharmony_ci 1182e5b6d6dSopenharmony_ciEscapeTransliterator::~EscapeTransliterator() { 1192e5b6d6dSopenharmony_ci delete supplementalHandler; 1202e5b6d6dSopenharmony_ci} 1212e5b6d6dSopenharmony_ci 1222e5b6d6dSopenharmony_ci/** 1232e5b6d6dSopenharmony_ci * Transliterator API. 1242e5b6d6dSopenharmony_ci */ 1252e5b6d6dSopenharmony_ciEscapeTransliterator* EscapeTransliterator::clone() const { 1262e5b6d6dSopenharmony_ci return new EscapeTransliterator(*this); 1272e5b6d6dSopenharmony_ci} 1282e5b6d6dSopenharmony_ci 1292e5b6d6dSopenharmony_ci/** 1302e5b6d6dSopenharmony_ci * Implements {@link Transliterator#handleTransliterate}. 1312e5b6d6dSopenharmony_ci */ 1322e5b6d6dSopenharmony_civoid EscapeTransliterator::handleTransliterate(Replaceable& text, 1332e5b6d6dSopenharmony_ci UTransPosition& pos, 1342e5b6d6dSopenharmony_ci UBool /*isIncremental*/) const 1352e5b6d6dSopenharmony_ci{ 1362e5b6d6dSopenharmony_ci /* TODO: Verify that isIncremental can be ignored */ 1372e5b6d6dSopenharmony_ci int32_t start = pos.start; 1382e5b6d6dSopenharmony_ci int32_t limit = pos.limit; 1392e5b6d6dSopenharmony_ci 1402e5b6d6dSopenharmony_ci UnicodeString buf(prefix); 1412e5b6d6dSopenharmony_ci int32_t prefixLen = prefix.length(); 1422e5b6d6dSopenharmony_ci UBool redoPrefix = false; 1432e5b6d6dSopenharmony_ci 1442e5b6d6dSopenharmony_ci while (start < limit) { 1452e5b6d6dSopenharmony_ci int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start); 1462e5b6d6dSopenharmony_ci int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1; 1472e5b6d6dSopenharmony_ci 1482e5b6d6dSopenharmony_ci if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) { 1492e5b6d6dSopenharmony_ci buf.truncate(0); 1502e5b6d6dSopenharmony_ci buf.append(supplementalHandler->prefix); 1512e5b6d6dSopenharmony_ci ICU_Utility::appendNumber(buf, c, supplementalHandler->radix, 1522e5b6d6dSopenharmony_ci supplementalHandler->minDigits); 1532e5b6d6dSopenharmony_ci buf.append(supplementalHandler->suffix); 1542e5b6d6dSopenharmony_ci redoPrefix = true; 1552e5b6d6dSopenharmony_ci } else { 1562e5b6d6dSopenharmony_ci if (redoPrefix) { 1572e5b6d6dSopenharmony_ci buf.truncate(0); 1582e5b6d6dSopenharmony_ci buf.append(prefix); 1592e5b6d6dSopenharmony_ci redoPrefix = false; 1602e5b6d6dSopenharmony_ci } else { 1612e5b6d6dSopenharmony_ci buf.truncate(prefixLen); 1622e5b6d6dSopenharmony_ci } 1632e5b6d6dSopenharmony_ci ICU_Utility::appendNumber(buf, c, radix, minDigits); 1642e5b6d6dSopenharmony_ci buf.append(suffix); 1652e5b6d6dSopenharmony_ci } 1662e5b6d6dSopenharmony_ci 1672e5b6d6dSopenharmony_ci text.handleReplaceBetween(start, start + charLen, buf); 1682e5b6d6dSopenharmony_ci start += buf.length(); 1692e5b6d6dSopenharmony_ci limit += buf.length() - charLen; 1702e5b6d6dSopenharmony_ci } 1712e5b6d6dSopenharmony_ci 1722e5b6d6dSopenharmony_ci pos.contextLimit += limit - pos.limit; 1732e5b6d6dSopenharmony_ci pos.limit = limit; 1742e5b6d6dSopenharmony_ci pos.start = start; 1752e5b6d6dSopenharmony_ci} 1762e5b6d6dSopenharmony_ci 1772e5b6d6dSopenharmony_ciU_NAMESPACE_END 1782e5b6d6dSopenharmony_ci 1792e5b6d6dSopenharmony_ci#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1802e5b6d6dSopenharmony_ci 1812e5b6d6dSopenharmony_ci//eof 182