11cb0ef41Sopenharmony_ci// © 2016 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci/* 41cb0ef41Sopenharmony_ci*************************************************************************** 51cb0ef41Sopenharmony_ci* Copyright (C) 2008-2015, International Business Machines Corporation 61cb0ef41Sopenharmony_ci* and others. All Rights Reserved. 71cb0ef41Sopenharmony_ci*************************************************************************** 81cb0ef41Sopenharmony_ci* file name: uspoof.cpp 91cb0ef41Sopenharmony_ci* encoding: UTF-8 101cb0ef41Sopenharmony_ci* tab size: 8 (not used) 111cb0ef41Sopenharmony_ci* indentation:4 121cb0ef41Sopenharmony_ci* 131cb0ef41Sopenharmony_ci* created on: 2008Feb13 141cb0ef41Sopenharmony_ci* created by: Andy Heninger 151cb0ef41Sopenharmony_ci* 161cb0ef41Sopenharmony_ci* Unicode Spoof Detection 171cb0ef41Sopenharmony_ci*/ 181cb0ef41Sopenharmony_ci#include "unicode/ubidi.h" 191cb0ef41Sopenharmony_ci#include "unicode/utypes.h" 201cb0ef41Sopenharmony_ci#include "unicode/normalizer2.h" 211cb0ef41Sopenharmony_ci#include "unicode/uspoof.h" 221cb0ef41Sopenharmony_ci#include "unicode/ustring.h" 231cb0ef41Sopenharmony_ci#include "unicode/utf16.h" 241cb0ef41Sopenharmony_ci#include "cmemory.h" 251cb0ef41Sopenharmony_ci#include "cstring.h" 261cb0ef41Sopenharmony_ci#include "mutex.h" 271cb0ef41Sopenharmony_ci#include "scriptset.h" 281cb0ef41Sopenharmony_ci#include "uassert.h" 291cb0ef41Sopenharmony_ci#include "ucln_in.h" 301cb0ef41Sopenharmony_ci#include "uspoof_impl.h" 311cb0ef41Sopenharmony_ci#include "umutex.h" 321cb0ef41Sopenharmony_ci 331cb0ef41Sopenharmony_ci 341cb0ef41Sopenharmony_ci#if !UCONFIG_NO_NORMALIZATION 351cb0ef41Sopenharmony_ci 361cb0ef41Sopenharmony_ciU_NAMESPACE_USE 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_ci 391cb0ef41Sopenharmony_ci// 401cb0ef41Sopenharmony_ci// Static Objects used by the spoof impl, their thread safe initialization and their cleanup. 411cb0ef41Sopenharmony_ci// 421cb0ef41Sopenharmony_cistatic UnicodeSet *gInclusionSet = nullptr; 431cb0ef41Sopenharmony_cistatic UnicodeSet *gRecommendedSet = nullptr; 441cb0ef41Sopenharmony_cistatic const Normalizer2 *gNfdNormalizer = nullptr; 451cb0ef41Sopenharmony_cistatic UInitOnce gSpoofInitStaticsOnce {}; 461cb0ef41Sopenharmony_ci 471cb0ef41Sopenharmony_cinamespace { 481cb0ef41Sopenharmony_ci 491cb0ef41Sopenharmony_ciUBool U_CALLCONV 501cb0ef41Sopenharmony_ciuspoof_cleanup() { 511cb0ef41Sopenharmony_ci delete gInclusionSet; 521cb0ef41Sopenharmony_ci gInclusionSet = nullptr; 531cb0ef41Sopenharmony_ci delete gRecommendedSet; 541cb0ef41Sopenharmony_ci gRecommendedSet = nullptr; 551cb0ef41Sopenharmony_ci gNfdNormalizer = nullptr; 561cb0ef41Sopenharmony_ci gSpoofInitStaticsOnce.reset(); 571cb0ef41Sopenharmony_ci return true; 581cb0ef41Sopenharmony_ci} 591cb0ef41Sopenharmony_ci 601cb0ef41Sopenharmony_civoid U_CALLCONV initializeStatics(UErrorCode &status) { 611cb0ef41Sopenharmony_ci static const char16_t *inclusionPat = 621cb0ef41Sopenharmony_ci u"['\\-.\\:\\u00B7\\u0375\\u058A\\u05F3\\u05F4\\u06FD\\u06FE\\u0F0B\\u2010" 631cb0ef41Sopenharmony_ci u"\\u2019\\u2027\\u30A0\\u30FB]"; 641cb0ef41Sopenharmony_ci gInclusionSet = new UnicodeSet(UnicodeString(inclusionPat), status); 651cb0ef41Sopenharmony_ci if (gInclusionSet == nullptr) { 661cb0ef41Sopenharmony_ci status = U_MEMORY_ALLOCATION_ERROR; 671cb0ef41Sopenharmony_ci return; 681cb0ef41Sopenharmony_ci } 691cb0ef41Sopenharmony_ci gInclusionSet->freeze(); 701cb0ef41Sopenharmony_ci 711cb0ef41Sopenharmony_ci // Note: data from IdentifierStatus.txt & IdentifierType.txt 721cb0ef41Sopenharmony_ci // There is tooling to generate this constant in the unicodetools project: 731cb0ef41Sopenharmony_ci // org.unicode.text.tools.RecommendedSetGenerator 741cb0ef41Sopenharmony_ci // It will print the Java and C++ code to the console for easy copy-paste into this file. 751cb0ef41Sopenharmony_ci static const char16_t *recommendedPat = 761cb0ef41Sopenharmony_ci u"[0-9A-Z_a-z\\u00C0-\\u00D6\\u00D8-\\u00F6\\u00F8-\\u0131\\u0134-\\u013E" 771cb0ef41Sopenharmony_ci u"\\u0141-\\u0148\\u014A-\\u017E\\u018F\\u01A0\\u01A1\\u01AF\\u01B0\\u01CD-" 781cb0ef41Sopenharmony_ci u"\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01F0\\u01F4\\u01F5\\u01F8-\\u021B\\u021E" 791cb0ef41Sopenharmony_ci u"\\u021F\\u0226-\\u0233\\u0259\\u02BB\\u02BC\\u02EC\\u0300-\\u0304\\u0306-" 801cb0ef41Sopenharmony_ci u"\\u030C\\u030F-\\u0311\\u0313\\u0314\\u031B\\u0323-\\u0328\\u032D\\u032E" 811cb0ef41Sopenharmony_ci u"\\u0330\\u0331\\u0335\\u0338\\u0339\\u0342\\u0345\\u037B-\\u037D\\u0386" 821cb0ef41Sopenharmony_ci u"\\u0388-\\u038A\\u038C\\u038E-\\u03A1\\u03A3-\\u03CE\\u03FC-\\u045F\\u048A-" 831cb0ef41Sopenharmony_ci u"\\u04FF\\u0510-\\u0529\\u052E\\u052F\\u0531-\\u0556\\u0559\\u0561-\\u0586" 841cb0ef41Sopenharmony_ci u"\\u05B4\\u05D0-\\u05EA\\u05EF-\\u05F2\\u0620-\\u063F\\u0641-\\u0655\\u0660-" 851cb0ef41Sopenharmony_ci u"\\u0669\\u0670-\\u0672\\u0674\\u0679-\\u068D\\u068F-\\u06A0\\u06A2-\\u06D3" 861cb0ef41Sopenharmony_ci u"\\u06D5\\u06E5\\u06E6\\u06EE-\\u06FC\\u06FF\\u0750-\\u07B1\\u0870-\\u0887" 871cb0ef41Sopenharmony_ci u"\\u0889-\\u088E\\u08A0-\\u08AC\\u08B2\\u08B5-\\u08C9\\u0901-\\u094D\\u094F" 881cb0ef41Sopenharmony_ci u"\\u0950\\u0956\\u0957\\u0960-\\u0963\\u0966-\\u096F\\u0971-\\u0977\\u0979-" 891cb0ef41Sopenharmony_ci u"\\u097F\\u0981-\\u0983\\u0985-\\u098C\\u098F\\u0990\\u0993-\\u09A8\\u09AA-" 901cb0ef41Sopenharmony_ci u"\\u09B0\\u09B2\\u09B6-\\u09B9\\u09BC-\\u09C4\\u09C7\\u09C8\\u09CB-\\u09CE" 911cb0ef41Sopenharmony_ci u"\\u09D7\\u09E0-\\u09E3\\u09E6-\\u09F1\\u09FE\\u0A01-\\u0A03\\u0A05-\\u0A0A" 921cb0ef41Sopenharmony_ci u"\\u0A0F\\u0A10\\u0A13-\\u0A28\\u0A2A-\\u0A30\\u0A32\\u0A35\\u0A38\\u0A39" 931cb0ef41Sopenharmony_ci u"\\u0A3C\\u0A3E-\\u0A42\\u0A47\\u0A48\\u0A4B-\\u0A4D\\u0A5C\\u0A66-\\u0A74" 941cb0ef41Sopenharmony_ci u"\\u0A81-\\u0A83\\u0A85-\\u0A8D\\u0A8F-\\u0A91\\u0A93-\\u0AA8\\u0AAA-\\u0AB0" 951cb0ef41Sopenharmony_ci u"\\u0AB2\\u0AB3\\u0AB5-\\u0AB9\\u0ABC-\\u0AC5\\u0AC7-\\u0AC9\\u0ACB-\\u0ACD" 961cb0ef41Sopenharmony_ci u"\\u0AD0\\u0AE0-\\u0AE3\\u0AE6-\\u0AEF\\u0AFA-\\u0AFF\\u0B01-\\u0B03\\u0B05-" 971cb0ef41Sopenharmony_ci u"\\u0B0C\\u0B0F\\u0B10\\u0B13-\\u0B28\\u0B2A-\\u0B30\\u0B32\\u0B33\\u0B35-" 981cb0ef41Sopenharmony_ci u"\\u0B39\\u0B3C-\\u0B43\\u0B47\\u0B48\\u0B4B-\\u0B4D\\u0B55-\\u0B57\\u0B5F-" 991cb0ef41Sopenharmony_ci u"\\u0B61\\u0B66-\\u0B6F\\u0B71\\u0B82\\u0B83\\u0B85-\\u0B8A\\u0B8E-\\u0B90" 1001cb0ef41Sopenharmony_ci u"\\u0B92-\\u0B95\\u0B99\\u0B9A\\u0B9C\\u0B9E\\u0B9F\\u0BA3\\u0BA4\\u0BA8-" 1011cb0ef41Sopenharmony_ci u"\\u0BAA\\u0BAE-\\u0BB9\\u0BBE-\\u0BC2\\u0BC6-\\u0BC8\\u0BCA-\\u0BCD\\u0BD0" 1021cb0ef41Sopenharmony_ci u"\\u0BD7\\u0BE6-\\u0BEF\\u0C01-\\u0C0C\\u0C0E-\\u0C10\\u0C12-\\u0C28\\u0C2A-" 1031cb0ef41Sopenharmony_ci u"\\u0C33\\u0C35-\\u0C39\\u0C3C-\\u0C44\\u0C46-\\u0C48\\u0C4A-\\u0C4D\\u0C55" 1041cb0ef41Sopenharmony_ci u"\\u0C56\\u0C5D\\u0C60\\u0C61\\u0C66-\\u0C6F\\u0C80\\u0C82\\u0C83\\u0C85-" 1051cb0ef41Sopenharmony_ci u"\\u0C8C\\u0C8E-\\u0C90\\u0C92-\\u0CA8\\u0CAA-\\u0CB3\\u0CB5-\\u0CB9\\u0CBC-" 1061cb0ef41Sopenharmony_ci u"\\u0CC4\\u0CC6-\\u0CC8\\u0CCA-\\u0CCD\\u0CD5\\u0CD6\\u0CDD\\u0CE0-\\u0CE3" 1071cb0ef41Sopenharmony_ci u"\\u0CE6-\\u0CEF\\u0CF1-\\u0CF3\\u0D00\\u0D02\\u0D03\\u0D05-\\u0D0C\\u0D0E-" 1081cb0ef41Sopenharmony_ci u"\\u0D10\\u0D12-\\u0D3A\\u0D3D-\\u0D43\\u0D46-\\u0D48\\u0D4A-\\u0D4E\\u0D54-" 1091cb0ef41Sopenharmony_ci u"\\u0D57\\u0D60\\u0D61\\u0D66-\\u0D6F\\u0D7A-\\u0D7F\\u0D82\\u0D83\\u0D85-" 1101cb0ef41Sopenharmony_ci u"\\u0D8E\\u0D91-\\u0D96\\u0D9A-\\u0DA5\\u0DA7-\\u0DB1\\u0DB3-\\u0DBB\\u0DBD" 1111cb0ef41Sopenharmony_ci u"\\u0DC0-\\u0DC6\\u0DCA\\u0DCF-\\u0DD4\\u0DD6\\u0DD8-\\u0DDE\\u0DF2\\u0E01-" 1121cb0ef41Sopenharmony_ci u"\\u0E32\\u0E34-\\u0E3A\\u0E40-\\u0E4E\\u0E50-\\u0E59\\u0E81\\u0E82\\u0E84" 1131cb0ef41Sopenharmony_ci u"\\u0E86-\\u0E8A\\u0E8C-\\u0EA3\\u0EA5\\u0EA7-\\u0EB2\\u0EB4-\\u0EBD\\u0EC0-" 1141cb0ef41Sopenharmony_ci u"\\u0EC4\\u0EC6\\u0EC8-\\u0ECE\\u0ED0-\\u0ED9\\u0EDE\\u0EDF\\u0F00\\u0F20-" 1151cb0ef41Sopenharmony_ci u"\\u0F29\\u0F35\\u0F37\\u0F3E-\\u0F42\\u0F44-\\u0F47\\u0F49-\\u0F4C\\u0F4E-" 1161cb0ef41Sopenharmony_ci u"\\u0F51\\u0F53-\\u0F56\\u0F58-\\u0F5B\\u0F5D-\\u0F68\\u0F6A-\\u0F6C\\u0F71" 1171cb0ef41Sopenharmony_ci u"\\u0F72\\u0F74\\u0F7A-\\u0F80\\u0F82-\\u0F84\\u0F86-\\u0F92\\u0F94-\\u0F97" 1181cb0ef41Sopenharmony_ci u"\\u0F99-\\u0F9C\\u0F9E-\\u0FA1\\u0FA3-\\u0FA6\\u0FA8-\\u0FAB\\u0FAD-\\u0FB8" 1191cb0ef41Sopenharmony_ci u"\\u0FBA-\\u0FBC\\u0FC6\\u1000-\\u1049\\u1050-\\u109D\\u10C7\\u10CD\\u10D0-" 1201cb0ef41Sopenharmony_ci u"\\u10F0\\u10F7-\\u10FA\\u10FD-\\u10FF\\u1200-\\u1248\\u124A-\\u124D\\u1250-" 1211cb0ef41Sopenharmony_ci u"\\u1256\\u1258\\u125A-\\u125D\\u1260-\\u1288\\u128A-\\u128D\\u1290-\\u12B0" 1221cb0ef41Sopenharmony_ci u"\\u12B2-\\u12B5\\u12B8-\\u12BE\\u12C0\\u12C2-\\u12C5\\u12C8-\\u12D6\\u12D8-" 1231cb0ef41Sopenharmony_ci u"\\u1310\\u1312-\\u1315\\u1318-\\u135A\\u135D-\\u135F\\u1380-\\u138F\\u1780-" 1241cb0ef41Sopenharmony_ci u"\\u17A2\\u17A5-\\u17A7\\u17A9-\\u17B3\\u17B6-\\u17CD\\u17D0\\u17D2\\u17D7" 1251cb0ef41Sopenharmony_ci u"\\u17DC\\u17E0-\\u17E9\\u1C90-\\u1CBA\\u1CBD-\\u1CBF\\u1E00-\\u1E99\\u1E9E" 1261cb0ef41Sopenharmony_ci u"\\u1EA0-\\u1EF9\\u1F00-\\u1F15\\u1F18-\\u1F1D\\u1F20-\\u1F45\\u1F48-\\u1F4D" 1271cb0ef41Sopenharmony_ci u"\\u1F50-\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F-\\u1F70\\u1F72\\u1F74\\u1F76" 1281cb0ef41Sopenharmony_ci u"\\u1F78\\u1F7A\\u1F7C\\u1F80-\\u1FB4\\u1FB6-\\u1FBA\\u1FBC\\u1FC2-\\u1FC4" 1291cb0ef41Sopenharmony_ci u"\\u1FC6-\\u1FC8\\u1FCA\\u1FCC\\u1FD0-\\u1FD2\\u1FD6-\\u1FDA\\u1FE0-\\u1FE2" 1301cb0ef41Sopenharmony_ci u"\\u1FE4-\\u1FEA\\u1FEC\\u1FF2-\\u1FF4\\u1FF6-\\u1FF8\\u1FFA\\u1FFC\\u2D27" 1311cb0ef41Sopenharmony_ci u"\\u2D2D\\u2D80-\\u2D96\\u2DA0-\\u2DA6\\u2DA8-\\u2DAE\\u2DB0-\\u2DB6\\u2DB8-" 1321cb0ef41Sopenharmony_ci u"\\u2DBE\\u2DC0-\\u2DC6\\u2DC8-\\u2DCE\\u2DD0-\\u2DD6\\u2DD8-\\u2DDE\\u3005-" 1331cb0ef41Sopenharmony_ci u"\\u3007\\u3041-\\u3096\\u3099\\u309A\\u309D\\u309E\\u30A1-\\u30FA\\u30FC-" 1341cb0ef41Sopenharmony_ci u"\\u30FE\\u3105-\\u312D\\u312F\\u31A0-\\u31BF\\u3400-\\u4DBF\\u4E00-\\u9FFF" 1351cb0ef41Sopenharmony_ci u"\\uA67F\\uA717-\\uA71F\\uA788\\uA78D\\uA792\\uA793\\uA7AA\\uA7C0-\\uA7CA" 1361cb0ef41Sopenharmony_ci u"\\uA7D0\\uA7D1\\uA7D3\\uA7D5-\\uA7D9\\uA9E7-\\uA9FE\\uAA60-\\uAA76\\uAA7A-" 1371cb0ef41Sopenharmony_ci u"\\uAA7F\\uAB01-\\uAB06\\uAB09-\\uAB0E\\uAB11-\\uAB16\\uAB20-\\uAB26\\uAB28-" 1381cb0ef41Sopenharmony_ci u"\\uAB2E\\uAB66\\uAB67\\uAC00-\\uD7A3\\uFA0E\\uFA0F\\uFA11\\uFA13\\uFA14" 1391cb0ef41Sopenharmony_ci u"\\uFA1F\\uFA21\\uFA23\\uFA24\\uFA27-\\uFA29\\U00011301\\U00011303" 1401cb0ef41Sopenharmony_ci u"\\U0001133B\\U0001133C\\U00016FF0\\U00016FF1\\U0001B11F-\\U0001B122" 1411cb0ef41Sopenharmony_ci u"\\U0001B132\\U0001B150-\\U0001B152\\U0001B155\\U0001B164-\\U0001B167" 1421cb0ef41Sopenharmony_ci u"\\U0001DF00-\\U0001DF1E\\U0001DF25-\\U0001DF2A\\U0001E08F\\U0001E7E0-" 1431cb0ef41Sopenharmony_ci u"\\U0001E7E6\\U0001E7E8-\\U0001E7EB\\U0001E7ED\\U0001E7EE\\U0001E7F0-" 1441cb0ef41Sopenharmony_ci u"\\U0001E7FE\\U00020000-\\U0002A6DF\\U0002A700-\\U0002B739\\U0002B740-" 1451cb0ef41Sopenharmony_ci u"\\U0002B81D\\U0002B820-\\U0002CEA1\\U0002CEB0-\\U0002EBE0\\U0002EBF0-" 1461cb0ef41Sopenharmony_ci u"\\U0002EE5D\\U00030000-\\U0003134A\\U00031350-\\U000323AF]"; 1471cb0ef41Sopenharmony_ci 1481cb0ef41Sopenharmony_ci gRecommendedSet = new UnicodeSet(UnicodeString(recommendedPat), status); 1491cb0ef41Sopenharmony_ci if (gRecommendedSet == nullptr) { 1501cb0ef41Sopenharmony_ci status = U_MEMORY_ALLOCATION_ERROR; 1511cb0ef41Sopenharmony_ci delete gInclusionSet; 1521cb0ef41Sopenharmony_ci return; 1531cb0ef41Sopenharmony_ci } 1541cb0ef41Sopenharmony_ci gRecommendedSet->freeze(); 1551cb0ef41Sopenharmony_ci gNfdNormalizer = Normalizer2::getNFDInstance(status); 1561cb0ef41Sopenharmony_ci ucln_i18n_registerCleanup(UCLN_I18N_SPOOF, uspoof_cleanup); 1571cb0ef41Sopenharmony_ci} 1581cb0ef41Sopenharmony_ci 1591cb0ef41Sopenharmony_ci} // namespace 1601cb0ef41Sopenharmony_ci 1611cb0ef41Sopenharmony_ciU_CFUNC void uspoof_internalInitStatics(UErrorCode *status) { 1621cb0ef41Sopenharmony_ci umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 1631cb0ef41Sopenharmony_ci} 1641cb0ef41Sopenharmony_ci 1651cb0ef41Sopenharmony_ciU_CAPI USpoofChecker * U_EXPORT2 1661cb0ef41Sopenharmony_ciuspoof_open(UErrorCode *status) { 1671cb0ef41Sopenharmony_ci umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 1681cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 1691cb0ef41Sopenharmony_ci return nullptr; 1701cb0ef41Sopenharmony_ci } 1711cb0ef41Sopenharmony_ci SpoofImpl *si = new SpoofImpl(*status); 1721cb0ef41Sopenharmony_ci if (si == nullptr) { 1731cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 1741cb0ef41Sopenharmony_ci return nullptr; 1751cb0ef41Sopenharmony_ci } 1761cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 1771cb0ef41Sopenharmony_ci delete si; 1781cb0ef41Sopenharmony_ci return nullptr; 1791cb0ef41Sopenharmony_ci } 1801cb0ef41Sopenharmony_ci return si->asUSpoofChecker(); 1811cb0ef41Sopenharmony_ci} 1821cb0ef41Sopenharmony_ci 1831cb0ef41Sopenharmony_ci 1841cb0ef41Sopenharmony_ciU_CAPI USpoofChecker * U_EXPORT2 1851cb0ef41Sopenharmony_ciuspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, 1861cb0ef41Sopenharmony_ci UErrorCode *status) { 1871cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 1881cb0ef41Sopenharmony_ci return nullptr; 1891cb0ef41Sopenharmony_ci } 1901cb0ef41Sopenharmony_ci 1911cb0ef41Sopenharmony_ci if (data == nullptr) { 1921cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 1931cb0ef41Sopenharmony_ci return nullptr; 1941cb0ef41Sopenharmony_ci } 1951cb0ef41Sopenharmony_ci 1961cb0ef41Sopenharmony_ci umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 1971cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) 1981cb0ef41Sopenharmony_ci { 1991cb0ef41Sopenharmony_ci return nullptr; 2001cb0ef41Sopenharmony_ci } 2011cb0ef41Sopenharmony_ci 2021cb0ef41Sopenharmony_ci SpoofData *sd = new SpoofData(data, length, *status); 2031cb0ef41Sopenharmony_ci if (sd == nullptr) { 2041cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 2051cb0ef41Sopenharmony_ci return nullptr; 2061cb0ef41Sopenharmony_ci } 2071cb0ef41Sopenharmony_ci 2081cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 2091cb0ef41Sopenharmony_ci delete sd; 2101cb0ef41Sopenharmony_ci return nullptr; 2111cb0ef41Sopenharmony_ci } 2121cb0ef41Sopenharmony_ci 2131cb0ef41Sopenharmony_ci SpoofImpl *si = new SpoofImpl(sd, *status); 2141cb0ef41Sopenharmony_ci if (si == nullptr) { 2151cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 2161cb0ef41Sopenharmony_ci delete sd; // explicit delete as the destructor for si won't be called. 2171cb0ef41Sopenharmony_ci return nullptr; 2181cb0ef41Sopenharmony_ci } 2191cb0ef41Sopenharmony_ci 2201cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 2211cb0ef41Sopenharmony_ci delete si; // no delete for sd, as the si destructor will delete it. 2221cb0ef41Sopenharmony_ci return nullptr; 2231cb0ef41Sopenharmony_ci } 2241cb0ef41Sopenharmony_ci 2251cb0ef41Sopenharmony_ci if (pActualLength != nullptr) { 2261cb0ef41Sopenharmony_ci *pActualLength = sd->size(); 2271cb0ef41Sopenharmony_ci } 2281cb0ef41Sopenharmony_ci return si->asUSpoofChecker(); 2291cb0ef41Sopenharmony_ci} 2301cb0ef41Sopenharmony_ci 2311cb0ef41Sopenharmony_ci 2321cb0ef41Sopenharmony_ciU_CAPI USpoofChecker * U_EXPORT2 2331cb0ef41Sopenharmony_ciuspoof_clone(const USpoofChecker *sc, UErrorCode *status) { 2341cb0ef41Sopenharmony_ci const SpoofImpl *src = SpoofImpl::validateThis(sc, *status); 2351cb0ef41Sopenharmony_ci if (src == nullptr) { 2361cb0ef41Sopenharmony_ci return nullptr; 2371cb0ef41Sopenharmony_ci } 2381cb0ef41Sopenharmony_ci SpoofImpl *result = new SpoofImpl(*src, *status); // copy constructor 2391cb0ef41Sopenharmony_ci if (result == nullptr) { 2401cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 2411cb0ef41Sopenharmony_ci return nullptr; 2421cb0ef41Sopenharmony_ci } 2431cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 2441cb0ef41Sopenharmony_ci delete result; 2451cb0ef41Sopenharmony_ci result = nullptr; 2461cb0ef41Sopenharmony_ci } 2471cb0ef41Sopenharmony_ci return result->asUSpoofChecker(); 2481cb0ef41Sopenharmony_ci} 2491cb0ef41Sopenharmony_ci 2501cb0ef41Sopenharmony_ci 2511cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 2521cb0ef41Sopenharmony_ciuspoof_close(USpoofChecker *sc) { 2531cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2541cb0ef41Sopenharmony_ci SpoofImpl *This = SpoofImpl::validateThis(sc, status); 2551cb0ef41Sopenharmony_ci delete This; 2561cb0ef41Sopenharmony_ci} 2571cb0ef41Sopenharmony_ci 2581cb0ef41Sopenharmony_ci 2591cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 2601cb0ef41Sopenharmony_ciuspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status) { 2611cb0ef41Sopenharmony_ci SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 2621cb0ef41Sopenharmony_ci if (This == nullptr) { 2631cb0ef41Sopenharmony_ci return; 2641cb0ef41Sopenharmony_ci } 2651cb0ef41Sopenharmony_ci 2661cb0ef41Sopenharmony_ci // Verify that the requested checks are all ones (bits) that 2671cb0ef41Sopenharmony_ci // are acceptable, known values. 2681cb0ef41Sopenharmony_ci if (checks & ~(USPOOF_ALL_CHECKS | USPOOF_AUX_INFO)) { 2691cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 2701cb0ef41Sopenharmony_ci return; 2711cb0ef41Sopenharmony_ci } 2721cb0ef41Sopenharmony_ci 2731cb0ef41Sopenharmony_ci This->fChecks = checks; 2741cb0ef41Sopenharmony_ci} 2751cb0ef41Sopenharmony_ci 2761cb0ef41Sopenharmony_ci 2771cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 2781cb0ef41Sopenharmony_ciuspoof_getChecks(const USpoofChecker *sc, UErrorCode *status) { 2791cb0ef41Sopenharmony_ci const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 2801cb0ef41Sopenharmony_ci if (This == nullptr) { 2811cb0ef41Sopenharmony_ci return 0; 2821cb0ef41Sopenharmony_ci } 2831cb0ef41Sopenharmony_ci return This->fChecks; 2841cb0ef41Sopenharmony_ci} 2851cb0ef41Sopenharmony_ci 2861cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 2871cb0ef41Sopenharmony_ciuspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel) { 2881cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2891cb0ef41Sopenharmony_ci SpoofImpl *This = SpoofImpl::validateThis(sc, status); 2901cb0ef41Sopenharmony_ci if (This != nullptr) { 2911cb0ef41Sopenharmony_ci This->fRestrictionLevel = restrictionLevel; 2921cb0ef41Sopenharmony_ci This->fChecks |= USPOOF_RESTRICTION_LEVEL; 2931cb0ef41Sopenharmony_ci } 2941cb0ef41Sopenharmony_ci} 2951cb0ef41Sopenharmony_ci 2961cb0ef41Sopenharmony_ciU_CAPI URestrictionLevel U_EXPORT2 2971cb0ef41Sopenharmony_ciuspoof_getRestrictionLevel(const USpoofChecker *sc) { 2981cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 2991cb0ef41Sopenharmony_ci const SpoofImpl *This = SpoofImpl::validateThis(sc, status); 3001cb0ef41Sopenharmony_ci if (This == nullptr) { 3011cb0ef41Sopenharmony_ci return USPOOF_UNRESTRICTIVE; 3021cb0ef41Sopenharmony_ci } 3031cb0ef41Sopenharmony_ci return This->fRestrictionLevel; 3041cb0ef41Sopenharmony_ci} 3051cb0ef41Sopenharmony_ci 3061cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 3071cb0ef41Sopenharmony_ciuspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status) { 3081cb0ef41Sopenharmony_ci SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 3091cb0ef41Sopenharmony_ci if (This == nullptr) { 3101cb0ef41Sopenharmony_ci return; 3111cb0ef41Sopenharmony_ci } 3121cb0ef41Sopenharmony_ci This->setAllowedLocales(localesList, *status); 3131cb0ef41Sopenharmony_ci} 3141cb0ef41Sopenharmony_ci 3151cb0ef41Sopenharmony_ciU_CAPI const char * U_EXPORT2 3161cb0ef41Sopenharmony_ciuspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status) { 3171cb0ef41Sopenharmony_ci SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 3181cb0ef41Sopenharmony_ci if (This == nullptr) { 3191cb0ef41Sopenharmony_ci return nullptr; 3201cb0ef41Sopenharmony_ci } 3211cb0ef41Sopenharmony_ci return This->getAllowedLocales(*status); 3221cb0ef41Sopenharmony_ci} 3231cb0ef41Sopenharmony_ci 3241cb0ef41Sopenharmony_ci 3251cb0ef41Sopenharmony_ciU_CAPI const USet * U_EXPORT2 3261cb0ef41Sopenharmony_ciuspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status) { 3271cb0ef41Sopenharmony_ci const UnicodeSet *result = uspoof_getAllowedUnicodeSet(sc, status); 3281cb0ef41Sopenharmony_ci return result->toUSet(); 3291cb0ef41Sopenharmony_ci} 3301cb0ef41Sopenharmony_ci 3311cb0ef41Sopenharmony_ciU_CAPI const UnicodeSet * U_EXPORT2 3321cb0ef41Sopenharmony_ciuspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status) { 3331cb0ef41Sopenharmony_ci const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 3341cb0ef41Sopenharmony_ci if (This == nullptr) { 3351cb0ef41Sopenharmony_ci return nullptr; 3361cb0ef41Sopenharmony_ci } 3371cb0ef41Sopenharmony_ci return This->fAllowedCharsSet; 3381cb0ef41Sopenharmony_ci} 3391cb0ef41Sopenharmony_ci 3401cb0ef41Sopenharmony_ci 3411cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 3421cb0ef41Sopenharmony_ciuspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status) { 3431cb0ef41Sopenharmony_ci const UnicodeSet *set = UnicodeSet::fromUSet(chars); 3441cb0ef41Sopenharmony_ci uspoof_setAllowedUnicodeSet(sc, set, status); 3451cb0ef41Sopenharmony_ci} 3461cb0ef41Sopenharmony_ci 3471cb0ef41Sopenharmony_ci 3481cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 3491cb0ef41Sopenharmony_ciuspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) { 3501cb0ef41Sopenharmony_ci SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 3511cb0ef41Sopenharmony_ci if (This == nullptr) { 3521cb0ef41Sopenharmony_ci return; 3531cb0ef41Sopenharmony_ci } 3541cb0ef41Sopenharmony_ci if (chars->isBogus()) { 3551cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 3561cb0ef41Sopenharmony_ci return; 3571cb0ef41Sopenharmony_ci } 3581cb0ef41Sopenharmony_ci UnicodeSet *clonedSet = chars->clone(); 3591cb0ef41Sopenharmony_ci if (clonedSet == nullptr || clonedSet->isBogus()) { 3601cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 3611cb0ef41Sopenharmony_ci return; 3621cb0ef41Sopenharmony_ci } 3631cb0ef41Sopenharmony_ci clonedSet->freeze(); 3641cb0ef41Sopenharmony_ci delete This->fAllowedCharsSet; 3651cb0ef41Sopenharmony_ci This->fAllowedCharsSet = clonedSet; 3661cb0ef41Sopenharmony_ci This->fChecks |= USPOOF_CHAR_LIMIT; 3671cb0ef41Sopenharmony_ci} 3681cb0ef41Sopenharmony_ci 3691cb0ef41Sopenharmony_ci 3701cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 3711cb0ef41Sopenharmony_ciuspoof_check(const USpoofChecker *sc, 3721cb0ef41Sopenharmony_ci const char16_t *id, int32_t length, 3731cb0ef41Sopenharmony_ci int32_t *position, 3741cb0ef41Sopenharmony_ci UErrorCode *status) { 3751cb0ef41Sopenharmony_ci 3761cb0ef41Sopenharmony_ci // Backwards compatibility: 3771cb0ef41Sopenharmony_ci if (position != nullptr) { 3781cb0ef41Sopenharmony_ci *position = 0; 3791cb0ef41Sopenharmony_ci } 3801cb0ef41Sopenharmony_ci 3811cb0ef41Sopenharmony_ci // Delegate to uspoof_check2 3821cb0ef41Sopenharmony_ci return uspoof_check2(sc, id, length, nullptr, status); 3831cb0ef41Sopenharmony_ci} 3841cb0ef41Sopenharmony_ci 3851cb0ef41Sopenharmony_ci 3861cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 3871cb0ef41Sopenharmony_ciuspoof_check2(const USpoofChecker *sc, 3881cb0ef41Sopenharmony_ci const char16_t* id, int32_t length, 3891cb0ef41Sopenharmony_ci USpoofCheckResult* checkResult, 3901cb0ef41Sopenharmony_ci UErrorCode *status) { 3911cb0ef41Sopenharmony_ci 3921cb0ef41Sopenharmony_ci const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 3931cb0ef41Sopenharmony_ci if (This == nullptr) { 3941cb0ef41Sopenharmony_ci return 0; 3951cb0ef41Sopenharmony_ci } 3961cb0ef41Sopenharmony_ci if (length < -1) { 3971cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 3981cb0ef41Sopenharmony_ci return 0; 3991cb0ef41Sopenharmony_ci } 4001cb0ef41Sopenharmony_ci UnicodeString idStr((length == -1), id, length); // Aliasing constructor. 4011cb0ef41Sopenharmony_ci int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status); 4021cb0ef41Sopenharmony_ci return result; 4031cb0ef41Sopenharmony_ci} 4041cb0ef41Sopenharmony_ci 4051cb0ef41Sopenharmony_ci 4061cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 4071cb0ef41Sopenharmony_ciuspoof_checkUTF8(const USpoofChecker *sc, 4081cb0ef41Sopenharmony_ci const char *id, int32_t length, 4091cb0ef41Sopenharmony_ci int32_t *position, 4101cb0ef41Sopenharmony_ci UErrorCode *status) { 4111cb0ef41Sopenharmony_ci 4121cb0ef41Sopenharmony_ci // Backwards compatibility: 4131cb0ef41Sopenharmony_ci if (position != nullptr) { 4141cb0ef41Sopenharmony_ci *position = 0; 4151cb0ef41Sopenharmony_ci } 4161cb0ef41Sopenharmony_ci 4171cb0ef41Sopenharmony_ci // Delegate to uspoof_check2 4181cb0ef41Sopenharmony_ci return uspoof_check2UTF8(sc, id, length, nullptr, status); 4191cb0ef41Sopenharmony_ci} 4201cb0ef41Sopenharmony_ci 4211cb0ef41Sopenharmony_ci 4221cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 4231cb0ef41Sopenharmony_ciuspoof_check2UTF8(const USpoofChecker *sc, 4241cb0ef41Sopenharmony_ci const char *id, int32_t length, 4251cb0ef41Sopenharmony_ci USpoofCheckResult* checkResult, 4261cb0ef41Sopenharmony_ci UErrorCode *status) { 4271cb0ef41Sopenharmony_ci 4281cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 4291cb0ef41Sopenharmony_ci return 0; 4301cb0ef41Sopenharmony_ci } 4311cb0ef41Sopenharmony_ci UnicodeString idStr = UnicodeString::fromUTF8(StringPiece(id, length>=0 ? length : static_cast<int32_t>(uprv_strlen(id)))); 4321cb0ef41Sopenharmony_ci int32_t result = uspoof_check2UnicodeString(sc, idStr, checkResult, status); 4331cb0ef41Sopenharmony_ci return result; 4341cb0ef41Sopenharmony_ci} 4351cb0ef41Sopenharmony_ci 4361cb0ef41Sopenharmony_ci 4371cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 4381cb0ef41Sopenharmony_ciuspoof_areConfusable(const USpoofChecker *sc, 4391cb0ef41Sopenharmony_ci const char16_t *id1, int32_t length1, 4401cb0ef41Sopenharmony_ci const char16_t *id2, int32_t length2, 4411cb0ef41Sopenharmony_ci UErrorCode *status) { 4421cb0ef41Sopenharmony_ci SpoofImpl::validateThis(sc, *status); 4431cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 4441cb0ef41Sopenharmony_ci return 0; 4451cb0ef41Sopenharmony_ci } 4461cb0ef41Sopenharmony_ci if (length1 < -1 || length2 < -1) { 4471cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 4481cb0ef41Sopenharmony_ci return 0; 4491cb0ef41Sopenharmony_ci } 4501cb0ef41Sopenharmony_ci 4511cb0ef41Sopenharmony_ci UnicodeString id1Str((length1==-1), id1, length1); // Aliasing constructor 4521cb0ef41Sopenharmony_ci UnicodeString id2Str((length2==-1), id2, length2); // Aliasing constructor 4531cb0ef41Sopenharmony_ci return uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); 4541cb0ef41Sopenharmony_ci} 4551cb0ef41Sopenharmony_ci 4561cb0ef41Sopenharmony_ci 4571cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 4581cb0ef41Sopenharmony_ciuspoof_areConfusableUTF8(const USpoofChecker *sc, 4591cb0ef41Sopenharmony_ci const char *id1, int32_t length1, 4601cb0ef41Sopenharmony_ci const char *id2, int32_t length2, 4611cb0ef41Sopenharmony_ci UErrorCode *status) { 4621cb0ef41Sopenharmony_ci SpoofImpl::validateThis(sc, *status); 4631cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 4641cb0ef41Sopenharmony_ci return 0; 4651cb0ef41Sopenharmony_ci } 4661cb0ef41Sopenharmony_ci if (length1 < -1 || length2 < -1) { 4671cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 4681cb0ef41Sopenharmony_ci return 0; 4691cb0ef41Sopenharmony_ci } 4701cb0ef41Sopenharmony_ci UnicodeString id1Str = UnicodeString::fromUTF8(StringPiece(id1, length1>=0? length1 : static_cast<int32_t>(uprv_strlen(id1)))); 4711cb0ef41Sopenharmony_ci UnicodeString id2Str = UnicodeString::fromUTF8(StringPiece(id2, length2>=0? length2 : static_cast<int32_t>(uprv_strlen(id2)))); 4721cb0ef41Sopenharmony_ci int32_t results = uspoof_areConfusableUnicodeString(sc, id1Str, id2Str, status); 4731cb0ef41Sopenharmony_ci return results; 4741cb0ef41Sopenharmony_ci} 4751cb0ef41Sopenharmony_ci 4761cb0ef41Sopenharmony_ci 4771cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 4781cb0ef41Sopenharmony_ciuspoof_areConfusableUnicodeString(const USpoofChecker *sc, 4791cb0ef41Sopenharmony_ci const icu::UnicodeString &id1, 4801cb0ef41Sopenharmony_ci const icu::UnicodeString &id2, 4811cb0ef41Sopenharmony_ci UErrorCode *status) { 4821cb0ef41Sopenharmony_ci const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 4831cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 4841cb0ef41Sopenharmony_ci return 0; 4851cb0ef41Sopenharmony_ci } 4861cb0ef41Sopenharmony_ci // 4871cb0ef41Sopenharmony_ci // See section 4 of UAX 39 for the algorithm for checking whether two strings are confusable, 4881cb0ef41Sopenharmony_ci // and for definitions of the types (single, whole, mixed-script) of confusables. 4891cb0ef41Sopenharmony_ci 4901cb0ef41Sopenharmony_ci // We only care about a few of the check flags. Ignore the others. 4911cb0ef41Sopenharmony_ci // If no tests relevant to this function have been specified, return an error. 4921cb0ef41Sopenharmony_ci // TODO: is this really the right thing to do? It's probably an error on the caller's part, 4931cb0ef41Sopenharmony_ci // but logically we would just return 0 (no error). 4941cb0ef41Sopenharmony_ci if ((This->fChecks & USPOOF_CONFUSABLE) == 0) { 4951cb0ef41Sopenharmony_ci *status = U_INVALID_STATE_ERROR; 4961cb0ef41Sopenharmony_ci return 0; 4971cb0ef41Sopenharmony_ci } 4981cb0ef41Sopenharmony_ci 4991cb0ef41Sopenharmony_ci // Compute the skeletons and check for confusability. 5001cb0ef41Sopenharmony_ci UnicodeString id1Skeleton; 5011cb0ef41Sopenharmony_ci uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id1, id1Skeleton, status); 5021cb0ef41Sopenharmony_ci UnicodeString id2Skeleton; 5031cb0ef41Sopenharmony_ci uspoof_getSkeletonUnicodeString(sc, 0 /* deprecated */, id2, id2Skeleton, status); 5041cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { return 0; } 5051cb0ef41Sopenharmony_ci if (id1Skeleton != id2Skeleton) { 5061cb0ef41Sopenharmony_ci return 0; 5071cb0ef41Sopenharmony_ci } 5081cb0ef41Sopenharmony_ci 5091cb0ef41Sopenharmony_ci // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate classes 5101cb0ef41Sopenharmony_ci // of confusables according to UTS 39 section 4. 5111cb0ef41Sopenharmony_ci // Start by computing the resolved script sets of id1 and id2. 5121cb0ef41Sopenharmony_ci ScriptSet id1RSS; 5131cb0ef41Sopenharmony_ci This->getResolvedScriptSet(id1, id1RSS, *status); 5141cb0ef41Sopenharmony_ci ScriptSet id2RSS; 5151cb0ef41Sopenharmony_ci This->getResolvedScriptSet(id2, id2RSS, *status); 5161cb0ef41Sopenharmony_ci 5171cb0ef41Sopenharmony_ci // Turn on all applicable flags 5181cb0ef41Sopenharmony_ci int32_t result = 0; 5191cb0ef41Sopenharmony_ci if (id1RSS.intersects(id2RSS)) { 5201cb0ef41Sopenharmony_ci result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 5211cb0ef41Sopenharmony_ci } else { 5221cb0ef41Sopenharmony_ci result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 5231cb0ef41Sopenharmony_ci if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) { 5241cb0ef41Sopenharmony_ci result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 5251cb0ef41Sopenharmony_ci } 5261cb0ef41Sopenharmony_ci } 5271cb0ef41Sopenharmony_ci 5281cb0ef41Sopenharmony_ci // Turn off flags that the user doesn't want 5291cb0ef41Sopenharmony_ci if ((This->fChecks & USPOOF_SINGLE_SCRIPT_CONFUSABLE) == 0) { 5301cb0ef41Sopenharmony_ci result &= ~USPOOF_SINGLE_SCRIPT_CONFUSABLE; 5311cb0ef41Sopenharmony_ci } 5321cb0ef41Sopenharmony_ci if ((This->fChecks & USPOOF_MIXED_SCRIPT_CONFUSABLE) == 0) { 5331cb0ef41Sopenharmony_ci result &= ~USPOOF_MIXED_SCRIPT_CONFUSABLE; 5341cb0ef41Sopenharmony_ci } 5351cb0ef41Sopenharmony_ci if ((This->fChecks & USPOOF_WHOLE_SCRIPT_CONFUSABLE) == 0) { 5361cb0ef41Sopenharmony_ci result &= ~USPOOF_WHOLE_SCRIPT_CONFUSABLE; 5371cb0ef41Sopenharmony_ci } 5381cb0ef41Sopenharmony_ci 5391cb0ef41Sopenharmony_ci return result; 5401cb0ef41Sopenharmony_ci} 5411cb0ef41Sopenharmony_ci 5421cb0ef41Sopenharmony_ciU_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction, 5431cb0ef41Sopenharmony_ci const char16_t *id1, int32_t length1, 5441cb0ef41Sopenharmony_ci const char16_t *id2, int32_t length2, 5451cb0ef41Sopenharmony_ci UErrorCode *status) { 5461cb0ef41Sopenharmony_ci UnicodeString id1Str((length1 == -1), id1, length1); // Aliasing constructor 5471cb0ef41Sopenharmony_ci UnicodeString id2Str((length2 == -1), id2, length2); // Aliasing constructor 5481cb0ef41Sopenharmony_ci if (id1Str.isBogus() || id2Str.isBogus()) { 5491cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 5501cb0ef41Sopenharmony_ci return 0; 5511cb0ef41Sopenharmony_ci } 5521cb0ef41Sopenharmony_ci return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status); 5531cb0ef41Sopenharmony_ci} 5541cb0ef41Sopenharmony_ci 5551cb0ef41Sopenharmony_ciU_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction, 5561cb0ef41Sopenharmony_ci const char *id1, int32_t length1, const char *id2, 5571cb0ef41Sopenharmony_ci int32_t length2, UErrorCode *status) { 5581cb0ef41Sopenharmony_ci if (length1 < -1 || length2 < -1) { 5591cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 5601cb0ef41Sopenharmony_ci return 0; 5611cb0ef41Sopenharmony_ci } 5621cb0ef41Sopenharmony_ci UnicodeString id1Str = UnicodeString::fromUTF8( 5631cb0ef41Sopenharmony_ci StringPiece(id1, length1 >= 0 ? length1 : static_cast<int32_t>(uprv_strlen(id1)))); 5641cb0ef41Sopenharmony_ci UnicodeString id2Str = UnicodeString::fromUTF8( 5651cb0ef41Sopenharmony_ci StringPiece(id2, length2 >= 0 ? length2 : static_cast<int32_t>(uprv_strlen(id2)))); 5661cb0ef41Sopenharmony_ci return uspoof_areBidiConfusableUnicodeString(sc, direction, id1Str, id2Str, status); 5671cb0ef41Sopenharmony_ci} 5681cb0ef41Sopenharmony_ci 5691cb0ef41Sopenharmony_ciU_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc, 5701cb0ef41Sopenharmony_ci UBiDiDirection direction, 5711cb0ef41Sopenharmony_ci const icu::UnicodeString &id1, 5721cb0ef41Sopenharmony_ci const icu::UnicodeString &id2, 5731cb0ef41Sopenharmony_ci UErrorCode *status) { 5741cb0ef41Sopenharmony_ci const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 5751cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 5761cb0ef41Sopenharmony_ci return 0; 5771cb0ef41Sopenharmony_ci } 5781cb0ef41Sopenharmony_ci // 5791cb0ef41Sopenharmony_ci // See section 4 of UTS 39 for the algorithm for checking whether two strings are confusable, 5801cb0ef41Sopenharmony_ci // and for definitions of the types (single, whole, mixed-script) of confusables. 5811cb0ef41Sopenharmony_ci 5821cb0ef41Sopenharmony_ci // We only care about a few of the check flags. Ignore the others. 5831cb0ef41Sopenharmony_ci // If no tests relevant to this function have been specified, return an error. 5841cb0ef41Sopenharmony_ci // TODO: is this really the right thing to do? It's probably an error on the caller's part, 5851cb0ef41Sopenharmony_ci // but logically we would just return 0 (no error). 5861cb0ef41Sopenharmony_ci if ((This->fChecks & USPOOF_CONFUSABLE) == 0) { 5871cb0ef41Sopenharmony_ci *status = U_INVALID_STATE_ERROR; 5881cb0ef41Sopenharmony_ci return 0; 5891cb0ef41Sopenharmony_ci } 5901cb0ef41Sopenharmony_ci 5911cb0ef41Sopenharmony_ci // Compute the skeletons and check for confusability. 5921cb0ef41Sopenharmony_ci UnicodeString id1Skeleton; 5931cb0ef41Sopenharmony_ci uspoof_getBidiSkeletonUnicodeString(sc, direction, id1, id1Skeleton, status); 5941cb0ef41Sopenharmony_ci UnicodeString id2Skeleton; 5951cb0ef41Sopenharmony_ci uspoof_getBidiSkeletonUnicodeString(sc, direction, id2, id2Skeleton, status); 5961cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 5971cb0ef41Sopenharmony_ci return 0; 5981cb0ef41Sopenharmony_ci } 5991cb0ef41Sopenharmony_ci if (id1Skeleton != id2Skeleton) { 6001cb0ef41Sopenharmony_ci return 0; 6011cb0ef41Sopenharmony_ci } 6021cb0ef41Sopenharmony_ci 6031cb0ef41Sopenharmony_ci // If we get here, the strings are confusable. Now we just need to set the flags for the appropriate 6041cb0ef41Sopenharmony_ci // classes of confusables according to UTS 39 section 4. Start by computing the resolved script sets 6051cb0ef41Sopenharmony_ci // of id1 and id2. 6061cb0ef41Sopenharmony_ci ScriptSet id1RSS; 6071cb0ef41Sopenharmony_ci This->getResolvedScriptSet(id1, id1RSS, *status); 6081cb0ef41Sopenharmony_ci ScriptSet id2RSS; 6091cb0ef41Sopenharmony_ci This->getResolvedScriptSet(id2, id2RSS, *status); 6101cb0ef41Sopenharmony_ci 6111cb0ef41Sopenharmony_ci // Turn on all applicable flags 6121cb0ef41Sopenharmony_ci uint32_t result = 0; 6131cb0ef41Sopenharmony_ci if (id1RSS.intersects(id2RSS)) { 6141cb0ef41Sopenharmony_ci result |= USPOOF_SINGLE_SCRIPT_CONFUSABLE; 6151cb0ef41Sopenharmony_ci } else { 6161cb0ef41Sopenharmony_ci result |= USPOOF_MIXED_SCRIPT_CONFUSABLE; 6171cb0ef41Sopenharmony_ci if (!id1RSS.isEmpty() && !id2RSS.isEmpty()) { 6181cb0ef41Sopenharmony_ci result |= USPOOF_WHOLE_SCRIPT_CONFUSABLE; 6191cb0ef41Sopenharmony_ci } 6201cb0ef41Sopenharmony_ci } 6211cb0ef41Sopenharmony_ci 6221cb0ef41Sopenharmony_ci // Turn off flags that the user doesn't want 6231cb0ef41Sopenharmony_ci return result & This->fChecks; 6241cb0ef41Sopenharmony_ci} 6251cb0ef41Sopenharmony_ci 6261cb0ef41Sopenharmony_ci 6271cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 6281cb0ef41Sopenharmony_ciuspoof_checkUnicodeString(const USpoofChecker *sc, 6291cb0ef41Sopenharmony_ci const icu::UnicodeString &id, 6301cb0ef41Sopenharmony_ci int32_t *position, 6311cb0ef41Sopenharmony_ci UErrorCode *status) { 6321cb0ef41Sopenharmony_ci 6331cb0ef41Sopenharmony_ci // Backwards compatibility: 6341cb0ef41Sopenharmony_ci if (position != nullptr) { 6351cb0ef41Sopenharmony_ci *position = 0; 6361cb0ef41Sopenharmony_ci } 6371cb0ef41Sopenharmony_ci 6381cb0ef41Sopenharmony_ci // Delegate to uspoof_check2 6391cb0ef41Sopenharmony_ci return uspoof_check2UnicodeString(sc, id, nullptr, status); 6401cb0ef41Sopenharmony_ci} 6411cb0ef41Sopenharmony_ci 6421cb0ef41Sopenharmony_cinamespace { 6431cb0ef41Sopenharmony_ci 6441cb0ef41Sopenharmony_ciint32_t checkImpl(const SpoofImpl* This, const UnicodeString& id, CheckResult* checkResult, UErrorCode* status) { 6451cb0ef41Sopenharmony_ci U_ASSERT(This != nullptr); 6461cb0ef41Sopenharmony_ci U_ASSERT(checkResult != nullptr); 6471cb0ef41Sopenharmony_ci checkResult->clear(); 6481cb0ef41Sopenharmony_ci int32_t result = 0; 6491cb0ef41Sopenharmony_ci 6501cb0ef41Sopenharmony_ci if (0 != (This->fChecks & USPOOF_RESTRICTION_LEVEL)) { 6511cb0ef41Sopenharmony_ci URestrictionLevel idRestrictionLevel = This->getRestrictionLevel(id, *status); 6521cb0ef41Sopenharmony_ci if (idRestrictionLevel > This->fRestrictionLevel) { 6531cb0ef41Sopenharmony_ci result |= USPOOF_RESTRICTION_LEVEL; 6541cb0ef41Sopenharmony_ci } 6551cb0ef41Sopenharmony_ci checkResult->fRestrictionLevel = idRestrictionLevel; 6561cb0ef41Sopenharmony_ci } 6571cb0ef41Sopenharmony_ci 6581cb0ef41Sopenharmony_ci if (0 != (This->fChecks & USPOOF_MIXED_NUMBERS)) { 6591cb0ef41Sopenharmony_ci UnicodeSet numerics; 6601cb0ef41Sopenharmony_ci This->getNumerics(id, numerics, *status); 6611cb0ef41Sopenharmony_ci if (numerics.size() > 1) { 6621cb0ef41Sopenharmony_ci result |= USPOOF_MIXED_NUMBERS; 6631cb0ef41Sopenharmony_ci } 6641cb0ef41Sopenharmony_ci checkResult->fNumerics = numerics; // UnicodeSet::operator= 6651cb0ef41Sopenharmony_ci } 6661cb0ef41Sopenharmony_ci 6671cb0ef41Sopenharmony_ci if (0 != (This->fChecks & USPOOF_HIDDEN_OVERLAY)) { 6681cb0ef41Sopenharmony_ci int32_t index = This->findHiddenOverlay(id, *status); 6691cb0ef41Sopenharmony_ci if (index != -1) { 6701cb0ef41Sopenharmony_ci result |= USPOOF_HIDDEN_OVERLAY; 6711cb0ef41Sopenharmony_ci } 6721cb0ef41Sopenharmony_ci } 6731cb0ef41Sopenharmony_ci 6741cb0ef41Sopenharmony_ci 6751cb0ef41Sopenharmony_ci if (0 != (This->fChecks & USPOOF_CHAR_LIMIT)) { 6761cb0ef41Sopenharmony_ci int32_t i; 6771cb0ef41Sopenharmony_ci UChar32 c; 6781cb0ef41Sopenharmony_ci int32_t length = id.length(); 6791cb0ef41Sopenharmony_ci for (i=0; i<length ;) { 6801cb0ef41Sopenharmony_ci c = id.char32At(i); 6811cb0ef41Sopenharmony_ci i += U16_LENGTH(c); 6821cb0ef41Sopenharmony_ci if (!This->fAllowedCharsSet->contains(c)) { 6831cb0ef41Sopenharmony_ci result |= USPOOF_CHAR_LIMIT; 6841cb0ef41Sopenharmony_ci break; 6851cb0ef41Sopenharmony_ci } 6861cb0ef41Sopenharmony_ci } 6871cb0ef41Sopenharmony_ci } 6881cb0ef41Sopenharmony_ci 6891cb0ef41Sopenharmony_ci if (0 != (This->fChecks & USPOOF_INVISIBLE)) { 6901cb0ef41Sopenharmony_ci // This check needs to be done on NFD input 6911cb0ef41Sopenharmony_ci UnicodeString nfdText; 6921cb0ef41Sopenharmony_ci gNfdNormalizer->normalize(id, nfdText, *status); 6931cb0ef41Sopenharmony_ci int32_t nfdLength = nfdText.length(); 6941cb0ef41Sopenharmony_ci 6951cb0ef41Sopenharmony_ci // scan for more than one occurrence of the same non-spacing mark 6961cb0ef41Sopenharmony_ci // in a sequence of non-spacing marks. 6971cb0ef41Sopenharmony_ci int32_t i; 6981cb0ef41Sopenharmony_ci UChar32 c; 6991cb0ef41Sopenharmony_ci UChar32 firstNonspacingMark = 0; 7001cb0ef41Sopenharmony_ci UBool haveMultipleMarks = false; 7011cb0ef41Sopenharmony_ci UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence. 7021cb0ef41Sopenharmony_ci 7031cb0ef41Sopenharmony_ci for (i=0; i<nfdLength ;) { 7041cb0ef41Sopenharmony_ci c = nfdText.char32At(i); 7051cb0ef41Sopenharmony_ci i += U16_LENGTH(c); 7061cb0ef41Sopenharmony_ci if (u_charType(c) != U_NON_SPACING_MARK) { 7071cb0ef41Sopenharmony_ci firstNonspacingMark = 0; 7081cb0ef41Sopenharmony_ci if (haveMultipleMarks) { 7091cb0ef41Sopenharmony_ci marksSeenSoFar.clear(); 7101cb0ef41Sopenharmony_ci haveMultipleMarks = false; 7111cb0ef41Sopenharmony_ci } 7121cb0ef41Sopenharmony_ci continue; 7131cb0ef41Sopenharmony_ci } 7141cb0ef41Sopenharmony_ci if (firstNonspacingMark == 0) { 7151cb0ef41Sopenharmony_ci firstNonspacingMark = c; 7161cb0ef41Sopenharmony_ci continue; 7171cb0ef41Sopenharmony_ci } 7181cb0ef41Sopenharmony_ci if (!haveMultipleMarks) { 7191cb0ef41Sopenharmony_ci marksSeenSoFar.add(firstNonspacingMark); 7201cb0ef41Sopenharmony_ci haveMultipleMarks = true; 7211cb0ef41Sopenharmony_ci } 7221cb0ef41Sopenharmony_ci if (marksSeenSoFar.contains(c)) { 7231cb0ef41Sopenharmony_ci // report the error, and stop scanning. 7241cb0ef41Sopenharmony_ci // No need to find more than the first failure. 7251cb0ef41Sopenharmony_ci result |= USPOOF_INVISIBLE; 7261cb0ef41Sopenharmony_ci break; 7271cb0ef41Sopenharmony_ci } 7281cb0ef41Sopenharmony_ci marksSeenSoFar.add(c); 7291cb0ef41Sopenharmony_ci } 7301cb0ef41Sopenharmony_ci } 7311cb0ef41Sopenharmony_ci 7321cb0ef41Sopenharmony_ci checkResult->fChecks = result; 7331cb0ef41Sopenharmony_ci return checkResult->toCombinedBitmask(This->fChecks); 7341cb0ef41Sopenharmony_ci} 7351cb0ef41Sopenharmony_ci 7361cb0ef41Sopenharmony_ci} // namespace 7371cb0ef41Sopenharmony_ci 7381cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 7391cb0ef41Sopenharmony_ciuspoof_check2UnicodeString(const USpoofChecker *sc, 7401cb0ef41Sopenharmony_ci const icu::UnicodeString &id, 7411cb0ef41Sopenharmony_ci USpoofCheckResult* checkResult, 7421cb0ef41Sopenharmony_ci UErrorCode *status) { 7431cb0ef41Sopenharmony_ci const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 7441cb0ef41Sopenharmony_ci if (This == nullptr) { 7451cb0ef41Sopenharmony_ci return false; 7461cb0ef41Sopenharmony_ci } 7471cb0ef41Sopenharmony_ci 7481cb0ef41Sopenharmony_ci if (checkResult != nullptr) { 7491cb0ef41Sopenharmony_ci CheckResult* ThisCheckResult = CheckResult::validateThis(checkResult, *status); 7501cb0ef41Sopenharmony_ci if (ThisCheckResult == nullptr) { 7511cb0ef41Sopenharmony_ci return false; 7521cb0ef41Sopenharmony_ci } 7531cb0ef41Sopenharmony_ci return checkImpl(This, id, ThisCheckResult, status); 7541cb0ef41Sopenharmony_ci } else { 7551cb0ef41Sopenharmony_ci // Stack-allocate the checkResult since this method doesn't return it 7561cb0ef41Sopenharmony_ci CheckResult stackCheckResult; 7571cb0ef41Sopenharmony_ci return checkImpl(This, id, &stackCheckResult, status); 7581cb0ef41Sopenharmony_ci } 7591cb0ef41Sopenharmony_ci} 7601cb0ef41Sopenharmony_ci 7611cb0ef41Sopenharmony_ci 7621cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 7631cb0ef41Sopenharmony_ciuspoof_getSkeleton(const USpoofChecker *sc, 7641cb0ef41Sopenharmony_ci uint32_t type, 7651cb0ef41Sopenharmony_ci const char16_t *id, int32_t length, 7661cb0ef41Sopenharmony_ci char16_t *dest, int32_t destCapacity, 7671cb0ef41Sopenharmony_ci UErrorCode *status) { 7681cb0ef41Sopenharmony_ci 7691cb0ef41Sopenharmony_ci SpoofImpl::validateThis(sc, *status); 7701cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 7711cb0ef41Sopenharmony_ci return 0; 7721cb0ef41Sopenharmony_ci } 7731cb0ef41Sopenharmony_ci if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) { 7741cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 7751cb0ef41Sopenharmony_ci return 0; 7761cb0ef41Sopenharmony_ci } 7771cb0ef41Sopenharmony_ci 7781cb0ef41Sopenharmony_ci UnicodeString idStr((length==-1), id, length); // Aliasing constructor 7791cb0ef41Sopenharmony_ci UnicodeString destStr; 7801cb0ef41Sopenharmony_ci uspoof_getSkeletonUnicodeString(sc, type, idStr, destStr, status); 7811cb0ef41Sopenharmony_ci destStr.extract(dest, destCapacity, *status); 7821cb0ef41Sopenharmony_ci return destStr.length(); 7831cb0ef41Sopenharmony_ci} 7841cb0ef41Sopenharmony_ci 7851cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction, 7861cb0ef41Sopenharmony_ci const UChar *id, int32_t length, UChar *dest, 7871cb0ef41Sopenharmony_ci int32_t destCapacity, UErrorCode *status) { 7881cb0ef41Sopenharmony_ci UnicodeString idStr((length == -1), id, length); // Aliasing constructor 7891cb0ef41Sopenharmony_ci if (idStr.isBogus()) { 7901cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 7911cb0ef41Sopenharmony_ci return 0; 7921cb0ef41Sopenharmony_ci } 7931cb0ef41Sopenharmony_ci UnicodeString destStr; 7941cb0ef41Sopenharmony_ci uspoof_getBidiSkeletonUnicodeString(sc, direction, idStr, destStr, status); 7951cb0ef41Sopenharmony_ci return destStr.extract(dest, destCapacity, *status); 7961cb0ef41Sopenharmony_ci} 7971cb0ef41Sopenharmony_ci 7981cb0ef41Sopenharmony_ci 7991cb0ef41Sopenharmony_ci 8001cb0ef41Sopenharmony_ciU_I18N_API UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc, 8011cb0ef41Sopenharmony_ci UBiDiDirection direction, 8021cb0ef41Sopenharmony_ci const UnicodeString &id, 8031cb0ef41Sopenharmony_ci UnicodeString &dest, 8041cb0ef41Sopenharmony_ci UErrorCode *status) { 8051cb0ef41Sopenharmony_ci dest.remove(); 8061cb0ef41Sopenharmony_ci if (direction != UBIDI_LTR && direction != UBIDI_RTL) { 8071cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 8081cb0ef41Sopenharmony_ci return dest; 8091cb0ef41Sopenharmony_ci } 8101cb0ef41Sopenharmony_ci UBiDi *bidi = ubidi_open(); 8111cb0ef41Sopenharmony_ci ubidi_setPara(bidi, id.getBuffer(), id.length(), direction, 8121cb0ef41Sopenharmony_ci /*embeddingLevels*/ nullptr, status); 8131cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 8141cb0ef41Sopenharmony_ci ubidi_close(bidi); 8151cb0ef41Sopenharmony_ci return dest; 8161cb0ef41Sopenharmony_ci } 8171cb0ef41Sopenharmony_ci UnicodeString reordered; 8181cb0ef41Sopenharmony_ci int32_t const size = ubidi_getProcessedLength(bidi); 8191cb0ef41Sopenharmony_ci UChar* const reorderedBuffer = reordered.getBuffer(size); 8201cb0ef41Sopenharmony_ci if (reorderedBuffer == nullptr) { 8211cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 8221cb0ef41Sopenharmony_ci ubidi_close(bidi); 8231cb0ef41Sopenharmony_ci return dest; 8241cb0ef41Sopenharmony_ci } 8251cb0ef41Sopenharmony_ci ubidi_writeReordered(bidi, reorderedBuffer, size, 8261cb0ef41Sopenharmony_ci UBIDI_KEEP_BASE_COMBINING | UBIDI_DO_MIRRORING, status); 8271cb0ef41Sopenharmony_ci reordered.releaseBuffer(size); 8281cb0ef41Sopenharmony_ci ubidi_close(bidi); 8291cb0ef41Sopenharmony_ci 8301cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 8311cb0ef41Sopenharmony_ci return dest; 8321cb0ef41Sopenharmony_ci } 8331cb0ef41Sopenharmony_ci 8341cb0ef41Sopenharmony_ci // The type parameter is deprecated since ICU 58; any number may be passed. 8351cb0ef41Sopenharmony_ci constexpr uint32_t deprecatedType = 58; 8361cb0ef41Sopenharmony_ci return uspoof_getSkeletonUnicodeString(sc, deprecatedType, reordered, dest, status); 8371cb0ef41Sopenharmony_ci} 8381cb0ef41Sopenharmony_ci 8391cb0ef41Sopenharmony_ci 8401cb0ef41Sopenharmony_ci 8411cb0ef41Sopenharmony_ciU_I18N_API UnicodeString & U_EXPORT2 8421cb0ef41Sopenharmony_ciuspoof_getSkeletonUnicodeString(const USpoofChecker *sc, 8431cb0ef41Sopenharmony_ci uint32_t /*type*/, 8441cb0ef41Sopenharmony_ci const UnicodeString &id, 8451cb0ef41Sopenharmony_ci UnicodeString &dest, 8461cb0ef41Sopenharmony_ci UErrorCode *status) { 8471cb0ef41Sopenharmony_ci const SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 8481cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 8491cb0ef41Sopenharmony_ci return dest; 8501cb0ef41Sopenharmony_ci } 8511cb0ef41Sopenharmony_ci 8521cb0ef41Sopenharmony_ci UnicodeString nfdId; 8531cb0ef41Sopenharmony_ci gNfdNormalizer->normalize(id, nfdId, *status); 8541cb0ef41Sopenharmony_ci 8551cb0ef41Sopenharmony_ci // Apply the skeleton mapping to the NFD normalized input string 8561cb0ef41Sopenharmony_ci // Accumulate the skeleton, possibly unnormalized, in a UnicodeString. 8571cb0ef41Sopenharmony_ci int32_t inputIndex = 0; 8581cb0ef41Sopenharmony_ci UnicodeString skelStr; 8591cb0ef41Sopenharmony_ci int32_t normalizedLen = nfdId.length(); 8601cb0ef41Sopenharmony_ci for (inputIndex=0; inputIndex < normalizedLen; ) { 8611cb0ef41Sopenharmony_ci UChar32 c = nfdId.char32At(inputIndex); 8621cb0ef41Sopenharmony_ci inputIndex += U16_LENGTH(c); 8631cb0ef41Sopenharmony_ci if (!u_hasBinaryProperty(c, UCHAR_DEFAULT_IGNORABLE_CODE_POINT)) { 8641cb0ef41Sopenharmony_ci This->fSpoofData->confusableLookup(c, skelStr); 8651cb0ef41Sopenharmony_ci } 8661cb0ef41Sopenharmony_ci } 8671cb0ef41Sopenharmony_ci 8681cb0ef41Sopenharmony_ci gNfdNormalizer->normalize(skelStr, dest, *status); 8691cb0ef41Sopenharmony_ci return dest; 8701cb0ef41Sopenharmony_ci} 8711cb0ef41Sopenharmony_ci 8721cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, 8731cb0ef41Sopenharmony_ci int32_t length, char *dest, int32_t destCapacity, 8741cb0ef41Sopenharmony_ci UErrorCode *status) { 8751cb0ef41Sopenharmony_ci SpoofImpl::validateThis(sc, *status); 8761cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 8771cb0ef41Sopenharmony_ci return 0; 8781cb0ef41Sopenharmony_ci } 8791cb0ef41Sopenharmony_ci if (length<-1 || destCapacity<0 || (destCapacity==0 && dest!=nullptr)) { 8801cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 8811cb0ef41Sopenharmony_ci return 0; 8821cb0ef41Sopenharmony_ci } 8831cb0ef41Sopenharmony_ci 8841cb0ef41Sopenharmony_ci UnicodeString srcStr = UnicodeString::fromUTF8( 8851cb0ef41Sopenharmony_ci StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id)))); 8861cb0ef41Sopenharmony_ci UnicodeString destStr; 8871cb0ef41Sopenharmony_ci uspoof_getSkeletonUnicodeString(sc, type, srcStr, destStr, status); 8881cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 8891cb0ef41Sopenharmony_ci return 0; 8901cb0ef41Sopenharmony_ci } 8911cb0ef41Sopenharmony_ci 8921cb0ef41Sopenharmony_ci int32_t lengthInUTF8 = 0; 8931cb0ef41Sopenharmony_ci u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status); 8941cb0ef41Sopenharmony_ci return lengthInUTF8; 8951cb0ef41Sopenharmony_ci} 8961cb0ef41Sopenharmony_ci 8971cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction, 8981cb0ef41Sopenharmony_ci const char *id, int32_t length, char *dest, 8991cb0ef41Sopenharmony_ci int32_t destCapacity, UErrorCode *status) { 9001cb0ef41Sopenharmony_ci if (length < -1) { 9011cb0ef41Sopenharmony_ci *status = U_ILLEGAL_ARGUMENT_ERROR; 9021cb0ef41Sopenharmony_ci return 0; 9031cb0ef41Sopenharmony_ci } 9041cb0ef41Sopenharmony_ci 9051cb0ef41Sopenharmony_ci UnicodeString srcStr = UnicodeString::fromUTF8( 9061cb0ef41Sopenharmony_ci StringPiece(id, length >= 0 ? length : static_cast<int32_t>(uprv_strlen(id)))); 9071cb0ef41Sopenharmony_ci UnicodeString destStr; 9081cb0ef41Sopenharmony_ci uspoof_getBidiSkeletonUnicodeString(sc, direction, srcStr, destStr, status); 9091cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { 9101cb0ef41Sopenharmony_ci return 0; 9111cb0ef41Sopenharmony_ci } 9121cb0ef41Sopenharmony_ci 9131cb0ef41Sopenharmony_ci int32_t lengthInUTF8 = 0; 9141cb0ef41Sopenharmony_ci u_strToUTF8(dest, destCapacity, &lengthInUTF8, destStr.getBuffer(), destStr.length(), status); 9151cb0ef41Sopenharmony_ci return lengthInUTF8; 9161cb0ef41Sopenharmony_ci} 9171cb0ef41Sopenharmony_ci 9181cb0ef41Sopenharmony_ci 9191cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 9201cb0ef41Sopenharmony_ciuspoof_serialize(USpoofChecker *sc,void *buf, int32_t capacity, UErrorCode *status) { 9211cb0ef41Sopenharmony_ci SpoofImpl *This = SpoofImpl::validateThis(sc, *status); 9221cb0ef41Sopenharmony_ci if (This == nullptr) { 9231cb0ef41Sopenharmony_ci U_ASSERT(U_FAILURE(*status)); 9241cb0ef41Sopenharmony_ci return 0; 9251cb0ef41Sopenharmony_ci } 9261cb0ef41Sopenharmony_ci 9271cb0ef41Sopenharmony_ci return This->fSpoofData->serialize(buf, capacity, *status); 9281cb0ef41Sopenharmony_ci} 9291cb0ef41Sopenharmony_ci 9301cb0ef41Sopenharmony_ciU_CAPI const USet * U_EXPORT2 9311cb0ef41Sopenharmony_ciuspoof_getInclusionSet(UErrorCode *status) { 9321cb0ef41Sopenharmony_ci umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 9331cb0ef41Sopenharmony_ci return gInclusionSet->toUSet(); 9341cb0ef41Sopenharmony_ci} 9351cb0ef41Sopenharmony_ci 9361cb0ef41Sopenharmony_ciU_CAPI const USet * U_EXPORT2 9371cb0ef41Sopenharmony_ciuspoof_getRecommendedSet(UErrorCode *status) { 9381cb0ef41Sopenharmony_ci umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 9391cb0ef41Sopenharmony_ci return gRecommendedSet->toUSet(); 9401cb0ef41Sopenharmony_ci} 9411cb0ef41Sopenharmony_ci 9421cb0ef41Sopenharmony_ciU_I18N_API const UnicodeSet * U_EXPORT2 9431cb0ef41Sopenharmony_ciuspoof_getInclusionUnicodeSet(UErrorCode *status) { 9441cb0ef41Sopenharmony_ci umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 9451cb0ef41Sopenharmony_ci return gInclusionSet; 9461cb0ef41Sopenharmony_ci} 9471cb0ef41Sopenharmony_ci 9481cb0ef41Sopenharmony_ciU_I18N_API const UnicodeSet * U_EXPORT2 9491cb0ef41Sopenharmony_ciuspoof_getRecommendedUnicodeSet(UErrorCode *status) { 9501cb0ef41Sopenharmony_ci umtx_initOnce(gSpoofInitStaticsOnce, &initializeStatics, *status); 9511cb0ef41Sopenharmony_ci return gRecommendedSet; 9521cb0ef41Sopenharmony_ci} 9531cb0ef41Sopenharmony_ci 9541cb0ef41Sopenharmony_ci//------------------ 9551cb0ef41Sopenharmony_ci// CheckResult APIs 9561cb0ef41Sopenharmony_ci//------------------ 9571cb0ef41Sopenharmony_ci 9581cb0ef41Sopenharmony_ciU_CAPI USpoofCheckResult* U_EXPORT2 9591cb0ef41Sopenharmony_ciuspoof_openCheckResult(UErrorCode *status) { 9601cb0ef41Sopenharmony_ci CheckResult* checkResult = new CheckResult(); 9611cb0ef41Sopenharmony_ci if (checkResult == nullptr) { 9621cb0ef41Sopenharmony_ci *status = U_MEMORY_ALLOCATION_ERROR; 9631cb0ef41Sopenharmony_ci return nullptr; 9641cb0ef41Sopenharmony_ci } 9651cb0ef41Sopenharmony_ci return checkResult->asUSpoofCheckResult(); 9661cb0ef41Sopenharmony_ci} 9671cb0ef41Sopenharmony_ci 9681cb0ef41Sopenharmony_ciU_CAPI void U_EXPORT2 9691cb0ef41Sopenharmony_ciuspoof_closeCheckResult(USpoofCheckResult* checkResult) { 9701cb0ef41Sopenharmony_ci UErrorCode status = U_ZERO_ERROR; 9711cb0ef41Sopenharmony_ci CheckResult* This = CheckResult::validateThis(checkResult, status); 9721cb0ef41Sopenharmony_ci delete This; 9731cb0ef41Sopenharmony_ci} 9741cb0ef41Sopenharmony_ci 9751cb0ef41Sopenharmony_ciU_CAPI int32_t U_EXPORT2 9761cb0ef41Sopenharmony_ciuspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status) { 9771cb0ef41Sopenharmony_ci const CheckResult* This = CheckResult::validateThis(checkResult, *status); 9781cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { return 0; } 9791cb0ef41Sopenharmony_ci return This->fChecks; 9801cb0ef41Sopenharmony_ci} 9811cb0ef41Sopenharmony_ci 9821cb0ef41Sopenharmony_ciU_CAPI URestrictionLevel U_EXPORT2 9831cb0ef41Sopenharmony_ciuspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status) { 9841cb0ef41Sopenharmony_ci const CheckResult* This = CheckResult::validateThis(checkResult, *status); 9851cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { return USPOOF_UNRESTRICTIVE; } 9861cb0ef41Sopenharmony_ci return This->fRestrictionLevel; 9871cb0ef41Sopenharmony_ci} 9881cb0ef41Sopenharmony_ci 9891cb0ef41Sopenharmony_ciU_CAPI const USet* U_EXPORT2 9901cb0ef41Sopenharmony_ciuspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status) { 9911cb0ef41Sopenharmony_ci const CheckResult* This = CheckResult::validateThis(checkResult, *status); 9921cb0ef41Sopenharmony_ci if (U_FAILURE(*status)) { return nullptr; } 9931cb0ef41Sopenharmony_ci return This->fNumerics.toUSet(); 9941cb0ef41Sopenharmony_ci} 9951cb0ef41Sopenharmony_ci 9961cb0ef41Sopenharmony_ci 9971cb0ef41Sopenharmony_ci 9981cb0ef41Sopenharmony_ci#endif // !UCONFIG_NO_NORMALIZATION 999