17db96d56Sopenharmony_ci/* 27db96d56Sopenharmony_ci __ __ _ 37db96d56Sopenharmony_ci ___\ \/ /_ __ __ _| |_ 47db96d56Sopenharmony_ci / _ \\ /| '_ \ / _` | __| 57db96d56Sopenharmony_ci | __// \| |_) | (_| | |_ 67db96d56Sopenharmony_ci \___/_/\_\ .__/ \__,_|\__| 77db96d56Sopenharmony_ci |_| XML parser 87db96d56Sopenharmony_ci 97db96d56Sopenharmony_ci Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 107db96d56Sopenharmony_ci Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> 117db96d56Sopenharmony_ci Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 127db96d56Sopenharmony_ci Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> 137db96d56Sopenharmony_ci Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> 147db96d56Sopenharmony_ci Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> 157db96d56Sopenharmony_ci Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org> 167db96d56Sopenharmony_ci Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> 177db96d56Sopenharmony_ci Copyright (c) 2016 Don Lewis <truckman@apache.org> 187db96d56Sopenharmony_ci Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 197db96d56Sopenharmony_ci Copyright (c) 2017 Alexander Bluhm <alexander.bluhm@gmx.net> 207db96d56Sopenharmony_ci Copyright (c) 2017 Benbuck Nason <bnason@netflix.com> 217db96d56Sopenharmony_ci Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> 227db96d56Sopenharmony_ci Copyright (c) 2019 David Loffredo <loffredo@steptools.com> 237db96d56Sopenharmony_ci Copyright (c) 2021 Dong-hee Na <donghee.na@python.org> 247db96d56Sopenharmony_ci Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> 257db96d56Sopenharmony_ci Licensed under the MIT license: 267db96d56Sopenharmony_ci 277db96d56Sopenharmony_ci Permission is hereby granted, free of charge, to any person obtaining 287db96d56Sopenharmony_ci a copy of this software and associated documentation files (the 297db96d56Sopenharmony_ci "Software"), to deal in the Software without restriction, including 307db96d56Sopenharmony_ci without limitation the rights to use, copy, modify, merge, publish, 317db96d56Sopenharmony_ci distribute, sublicense, and/or sell copies of the Software, and to permit 327db96d56Sopenharmony_ci persons to whom the Software is furnished to do so, subject to the 337db96d56Sopenharmony_ci following conditions: 347db96d56Sopenharmony_ci 357db96d56Sopenharmony_ci The above copyright notice and this permission notice shall be included 367db96d56Sopenharmony_ci in all copies or substantial portions of the Software. 377db96d56Sopenharmony_ci 387db96d56Sopenharmony_ci THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 397db96d56Sopenharmony_ci EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 407db96d56Sopenharmony_ci MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 417db96d56Sopenharmony_ci NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 427db96d56Sopenharmony_ci DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 437db96d56Sopenharmony_ci OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 447db96d56Sopenharmony_ci USE OR OTHER DEALINGS IN THE SOFTWARE. 457db96d56Sopenharmony_ci*/ 467db96d56Sopenharmony_ci 477db96d56Sopenharmony_ci#include <expat_config.h> 487db96d56Sopenharmony_ci 497db96d56Sopenharmony_ci#include <stddef.h> 507db96d56Sopenharmony_ci#include <string.h> /* memcpy */ 517db96d56Sopenharmony_ci#include <stdbool.h> 527db96d56Sopenharmony_ci 537db96d56Sopenharmony_ci#ifdef _WIN32 547db96d56Sopenharmony_ci# include "winconfig.h" 557db96d56Sopenharmony_ci#endif 567db96d56Sopenharmony_ci 577db96d56Sopenharmony_ci#include "expat_external.h" 587db96d56Sopenharmony_ci#include "internal.h" 597db96d56Sopenharmony_ci#include "xmltok.h" 607db96d56Sopenharmony_ci#include "nametab.h" 617db96d56Sopenharmony_ci 627db96d56Sopenharmony_ci#ifdef XML_DTD 637db96d56Sopenharmony_ci# define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok) 647db96d56Sopenharmony_ci#else 657db96d56Sopenharmony_ci# define IGNORE_SECTION_TOK_VTABLE /* as nothing */ 667db96d56Sopenharmony_ci#endif 677db96d56Sopenharmony_ci 687db96d56Sopenharmony_ci#define VTABLE1 \ 697db96d56Sopenharmony_ci {PREFIX(prologTok), PREFIX(contentTok), \ 707db96d56Sopenharmony_ci PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE}, \ 717db96d56Sopenharmony_ci {PREFIX(attributeValueTok), PREFIX(entityValueTok)}, \ 727db96d56Sopenharmony_ci PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS), \ 737db96d56Sopenharmony_ci PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName), \ 747db96d56Sopenharmony_ci PREFIX(updatePosition), PREFIX(isPublicId) 757db96d56Sopenharmony_ci 767db96d56Sopenharmony_ci#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16) 777db96d56Sopenharmony_ci 787db96d56Sopenharmony_ci#define UCS2_GET_NAMING(pages, hi, lo) \ 797db96d56Sopenharmony_ci (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F))) 807db96d56Sopenharmony_ci 817db96d56Sopenharmony_ci/* A 2 byte UTF-8 representation splits the characters 11 bits between 827db96d56Sopenharmony_ci the bottom 5 and 6 bits of the bytes. We need 8 bits to index into 837db96d56Sopenharmony_ci pages, 3 bits to add to that index and 5 bits to generate the mask. 847db96d56Sopenharmony_ci*/ 857db96d56Sopenharmony_ci#define UTF8_GET_NAMING2(pages, byte) \ 867db96d56Sopenharmony_ci (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \ 877db96d56Sopenharmony_ci + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)] \ 887db96d56Sopenharmony_ci & (1u << (((byte)[1]) & 0x1F))) 897db96d56Sopenharmony_ci 907db96d56Sopenharmony_ci/* A 3 byte UTF-8 representation splits the characters 16 bits between 917db96d56Sopenharmony_ci the bottom 4, 6 and 6 bits of the bytes. We need 8 bits to index 927db96d56Sopenharmony_ci into pages, 3 bits to add to that index and 5 bits to generate the 937db96d56Sopenharmony_ci mask. 947db96d56Sopenharmony_ci*/ 957db96d56Sopenharmony_ci#define UTF8_GET_NAMING3(pages, byte) \ 967db96d56Sopenharmony_ci (namingBitmap \ 977db96d56Sopenharmony_ci [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)] \ 987db96d56Sopenharmony_ci << 3) \ 997db96d56Sopenharmony_ci + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \ 1007db96d56Sopenharmony_ci & (1u << (((byte)[2]) & 0x1F))) 1017db96d56Sopenharmony_ci 1027db96d56Sopenharmony_ci/* Detection of invalid UTF-8 sequences is based on Table 3.1B 1037db96d56Sopenharmony_ci of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/ 1047db96d56Sopenharmony_ci with the additional restriction of not allowing the Unicode 1057db96d56Sopenharmony_ci code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE). 1067db96d56Sopenharmony_ci Implementation details: 1077db96d56Sopenharmony_ci (A & 0x80) == 0 means A < 0x80 1087db96d56Sopenharmony_ci and 1097db96d56Sopenharmony_ci (A & 0xC0) == 0xC0 means A > 0xBF 1107db96d56Sopenharmony_ci*/ 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci#define UTF8_INVALID2(p) \ 1137db96d56Sopenharmony_ci ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0) 1147db96d56Sopenharmony_ci 1157db96d56Sopenharmony_ci#define UTF8_INVALID3(p) \ 1167db96d56Sopenharmony_ci (((p)[2] & 0x80) == 0 \ 1177db96d56Sopenharmony_ci || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD \ 1187db96d56Sopenharmony_ci : ((p)[2] & 0xC0) == 0xC0) \ 1197db96d56Sopenharmony_ci || ((*p) == 0xE0 \ 1207db96d56Sopenharmony_ci ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0 \ 1217db96d56Sopenharmony_ci : ((p)[1] & 0x80) == 0 \ 1227db96d56Sopenharmony_ci || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0))) 1237db96d56Sopenharmony_ci 1247db96d56Sopenharmony_ci#define UTF8_INVALID4(p) \ 1257db96d56Sopenharmony_ci (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0 \ 1267db96d56Sopenharmony_ci || ((p)[2] & 0xC0) == 0xC0 \ 1277db96d56Sopenharmony_ci || ((*p) == 0xF0 \ 1287db96d56Sopenharmony_ci ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0 \ 1297db96d56Sopenharmony_ci : ((p)[1] & 0x80) == 0 \ 1307db96d56Sopenharmony_ci || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0))) 1317db96d56Sopenharmony_ci 1327db96d56Sopenharmony_cistatic int PTRFASTCALL 1337db96d56Sopenharmony_ciisNever(const ENCODING *enc, const char *p) { 1347db96d56Sopenharmony_ci UNUSED_P(enc); 1357db96d56Sopenharmony_ci UNUSED_P(p); 1367db96d56Sopenharmony_ci return 0; 1377db96d56Sopenharmony_ci} 1387db96d56Sopenharmony_ci 1397db96d56Sopenharmony_cistatic int PTRFASTCALL 1407db96d56Sopenharmony_ciutf8_isName2(const ENCODING *enc, const char *p) { 1417db96d56Sopenharmony_ci UNUSED_P(enc); 1427db96d56Sopenharmony_ci return UTF8_GET_NAMING2(namePages, (const unsigned char *)p); 1437db96d56Sopenharmony_ci} 1447db96d56Sopenharmony_ci 1457db96d56Sopenharmony_cistatic int PTRFASTCALL 1467db96d56Sopenharmony_ciutf8_isName3(const ENCODING *enc, const char *p) { 1477db96d56Sopenharmony_ci UNUSED_P(enc); 1487db96d56Sopenharmony_ci return UTF8_GET_NAMING3(namePages, (const unsigned char *)p); 1497db96d56Sopenharmony_ci} 1507db96d56Sopenharmony_ci 1517db96d56Sopenharmony_ci#define utf8_isName4 isNever 1527db96d56Sopenharmony_ci 1537db96d56Sopenharmony_cistatic int PTRFASTCALL 1547db96d56Sopenharmony_ciutf8_isNmstrt2(const ENCODING *enc, const char *p) { 1557db96d56Sopenharmony_ci UNUSED_P(enc); 1567db96d56Sopenharmony_ci return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p); 1577db96d56Sopenharmony_ci} 1587db96d56Sopenharmony_ci 1597db96d56Sopenharmony_cistatic int PTRFASTCALL 1607db96d56Sopenharmony_ciutf8_isNmstrt3(const ENCODING *enc, const char *p) { 1617db96d56Sopenharmony_ci UNUSED_P(enc); 1627db96d56Sopenharmony_ci return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p); 1637db96d56Sopenharmony_ci} 1647db96d56Sopenharmony_ci 1657db96d56Sopenharmony_ci#define utf8_isNmstrt4 isNever 1667db96d56Sopenharmony_ci 1677db96d56Sopenharmony_cistatic int PTRFASTCALL 1687db96d56Sopenharmony_ciutf8_isInvalid2(const ENCODING *enc, const char *p) { 1697db96d56Sopenharmony_ci UNUSED_P(enc); 1707db96d56Sopenharmony_ci return UTF8_INVALID2((const unsigned char *)p); 1717db96d56Sopenharmony_ci} 1727db96d56Sopenharmony_ci 1737db96d56Sopenharmony_cistatic int PTRFASTCALL 1747db96d56Sopenharmony_ciutf8_isInvalid3(const ENCODING *enc, const char *p) { 1757db96d56Sopenharmony_ci UNUSED_P(enc); 1767db96d56Sopenharmony_ci return UTF8_INVALID3((const unsigned char *)p); 1777db96d56Sopenharmony_ci} 1787db96d56Sopenharmony_ci 1797db96d56Sopenharmony_cistatic int PTRFASTCALL 1807db96d56Sopenharmony_ciutf8_isInvalid4(const ENCODING *enc, const char *p) { 1817db96d56Sopenharmony_ci UNUSED_P(enc); 1827db96d56Sopenharmony_ci return UTF8_INVALID4((const unsigned char *)p); 1837db96d56Sopenharmony_ci} 1847db96d56Sopenharmony_ci 1857db96d56Sopenharmony_cistruct normal_encoding { 1867db96d56Sopenharmony_ci ENCODING enc; 1877db96d56Sopenharmony_ci unsigned char type[256]; 1887db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 1897db96d56Sopenharmony_ci int(PTRFASTCALL *byteType)(const ENCODING *, const char *); 1907db96d56Sopenharmony_ci int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *); 1917db96d56Sopenharmony_ci int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *); 1927db96d56Sopenharmony_ci int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *); 1937db96d56Sopenharmony_ci int(PTRCALL *charMatches)(const ENCODING *, const char *, int); 1947db96d56Sopenharmony_ci#endif /* XML_MIN_SIZE */ 1957db96d56Sopenharmony_ci int(PTRFASTCALL *isName2)(const ENCODING *, const char *); 1967db96d56Sopenharmony_ci int(PTRFASTCALL *isName3)(const ENCODING *, const char *); 1977db96d56Sopenharmony_ci int(PTRFASTCALL *isName4)(const ENCODING *, const char *); 1987db96d56Sopenharmony_ci int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *); 1997db96d56Sopenharmony_ci int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *); 2007db96d56Sopenharmony_ci int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *); 2017db96d56Sopenharmony_ci int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *); 2027db96d56Sopenharmony_ci int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *); 2037db96d56Sopenharmony_ci int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *); 2047db96d56Sopenharmony_ci}; 2057db96d56Sopenharmony_ci 2067db96d56Sopenharmony_ci#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc)) 2077db96d56Sopenharmony_ci 2087db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 2097db96d56Sopenharmony_ci 2107db96d56Sopenharmony_ci# define STANDARD_VTABLE(E) \ 2117db96d56Sopenharmony_ci E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches, 2127db96d56Sopenharmony_ci 2137db96d56Sopenharmony_ci#else 2147db96d56Sopenharmony_ci 2157db96d56Sopenharmony_ci# define STANDARD_VTABLE(E) /* as nothing */ 2167db96d56Sopenharmony_ci 2177db96d56Sopenharmony_ci#endif 2187db96d56Sopenharmony_ci 2197db96d56Sopenharmony_ci#define NORMAL_VTABLE(E) \ 2207db96d56Sopenharmony_ci E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3, \ 2217db96d56Sopenharmony_ci E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4 2227db96d56Sopenharmony_ci 2237db96d56Sopenharmony_ci#define NULL_VTABLE \ 2247db96d56Sopenharmony_ci /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL, \ 2257db96d56Sopenharmony_ci /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL, \ 2267db96d56Sopenharmony_ci /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL 2277db96d56Sopenharmony_ci 2287db96d56Sopenharmony_cistatic int FASTCALL checkCharRefNumber(int); 2297db96d56Sopenharmony_ci 2307db96d56Sopenharmony_ci#include "xmltok_impl.h" 2317db96d56Sopenharmony_ci#include "ascii.h" 2327db96d56Sopenharmony_ci 2337db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 2347db96d56Sopenharmony_ci# define sb_isNameMin isNever 2357db96d56Sopenharmony_ci# define sb_isNmstrtMin isNever 2367db96d56Sopenharmony_ci#endif 2377db96d56Sopenharmony_ci 2387db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 2397db96d56Sopenharmony_ci# define MINBPC(enc) ((enc)->minBytesPerChar) 2407db96d56Sopenharmony_ci#else 2417db96d56Sopenharmony_ci/* minimum bytes per character */ 2427db96d56Sopenharmony_ci# define MINBPC(enc) 1 2437db96d56Sopenharmony_ci#endif 2447db96d56Sopenharmony_ci 2457db96d56Sopenharmony_ci#define SB_BYTE_TYPE(enc, p) \ 2467db96d56Sopenharmony_ci (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]) 2477db96d56Sopenharmony_ci 2487db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 2497db96d56Sopenharmony_cistatic int PTRFASTCALL 2507db96d56Sopenharmony_cisb_byteType(const ENCODING *enc, const char *p) { 2517db96d56Sopenharmony_ci return SB_BYTE_TYPE(enc, p); 2527db96d56Sopenharmony_ci} 2537db96d56Sopenharmony_ci# define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p)) 2547db96d56Sopenharmony_ci#else 2557db96d56Sopenharmony_ci# define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p) 2567db96d56Sopenharmony_ci#endif 2577db96d56Sopenharmony_ci 2587db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 2597db96d56Sopenharmony_ci# define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p)) 2607db96d56Sopenharmony_cistatic int PTRFASTCALL 2617db96d56Sopenharmony_cisb_byteToAscii(const ENCODING *enc, const char *p) { 2627db96d56Sopenharmony_ci UNUSED_P(enc); 2637db96d56Sopenharmony_ci return *p; 2647db96d56Sopenharmony_ci} 2657db96d56Sopenharmony_ci#else 2667db96d56Sopenharmony_ci# define BYTE_TO_ASCII(enc, p) (*(p)) 2677db96d56Sopenharmony_ci#endif 2687db96d56Sopenharmony_ci 2697db96d56Sopenharmony_ci#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p)) 2707db96d56Sopenharmony_ci#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p)) 2717db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 2727db96d56Sopenharmony_ci# define IS_INVALID_CHAR(enc, p, n) \ 2737db96d56Sopenharmony_ci (AS_NORMAL_ENCODING(enc)->isInvalid##n \ 2747db96d56Sopenharmony_ci && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) 2757db96d56Sopenharmony_ci#else 2767db96d56Sopenharmony_ci# define IS_INVALID_CHAR(enc, p, n) \ 2777db96d56Sopenharmony_ci (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p)) 2787db96d56Sopenharmony_ci#endif 2797db96d56Sopenharmony_ci 2807db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 2817db96d56Sopenharmony_ci# define IS_NAME_CHAR_MINBPC(enc, p) \ 2827db96d56Sopenharmony_ci (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p)) 2837db96d56Sopenharmony_ci# define IS_NMSTRT_CHAR_MINBPC(enc, p) \ 2847db96d56Sopenharmony_ci (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p)) 2857db96d56Sopenharmony_ci#else 2867db96d56Sopenharmony_ci# define IS_NAME_CHAR_MINBPC(enc, p) (0) 2877db96d56Sopenharmony_ci# define IS_NMSTRT_CHAR_MINBPC(enc, p) (0) 2887db96d56Sopenharmony_ci#endif 2897db96d56Sopenharmony_ci 2907db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 2917db96d56Sopenharmony_ci# define CHAR_MATCHES(enc, p, c) \ 2927db96d56Sopenharmony_ci (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c)) 2937db96d56Sopenharmony_cistatic int PTRCALL 2947db96d56Sopenharmony_cisb_charMatches(const ENCODING *enc, const char *p, int c) { 2957db96d56Sopenharmony_ci UNUSED_P(enc); 2967db96d56Sopenharmony_ci return *p == c; 2977db96d56Sopenharmony_ci} 2987db96d56Sopenharmony_ci#else 2997db96d56Sopenharmony_ci/* c is an ASCII character */ 3007db96d56Sopenharmony_ci# define CHAR_MATCHES(enc, p, c) (*(p) == (c)) 3017db96d56Sopenharmony_ci#endif 3027db96d56Sopenharmony_ci 3037db96d56Sopenharmony_ci#define PREFIX(ident) normal_##ident 3047db96d56Sopenharmony_ci#define XML_TOK_IMPL_C 3057db96d56Sopenharmony_ci#include "xmltok_impl.c" 3067db96d56Sopenharmony_ci#undef XML_TOK_IMPL_C 3077db96d56Sopenharmony_ci 3087db96d56Sopenharmony_ci#undef MINBPC 3097db96d56Sopenharmony_ci#undef BYTE_TYPE 3107db96d56Sopenharmony_ci#undef BYTE_TO_ASCII 3117db96d56Sopenharmony_ci#undef CHAR_MATCHES 3127db96d56Sopenharmony_ci#undef IS_NAME_CHAR 3137db96d56Sopenharmony_ci#undef IS_NAME_CHAR_MINBPC 3147db96d56Sopenharmony_ci#undef IS_NMSTRT_CHAR 3157db96d56Sopenharmony_ci#undef IS_NMSTRT_CHAR_MINBPC 3167db96d56Sopenharmony_ci#undef IS_INVALID_CHAR 3177db96d56Sopenharmony_ci 3187db96d56Sopenharmony_cienum { /* UTF8_cvalN is value of masked first byte of N byte sequence */ 3197db96d56Sopenharmony_ci UTF8_cval1 = 0x00, 3207db96d56Sopenharmony_ci UTF8_cval2 = 0xc0, 3217db96d56Sopenharmony_ci UTF8_cval3 = 0xe0, 3227db96d56Sopenharmony_ci UTF8_cval4 = 0xf0 3237db96d56Sopenharmony_ci}; 3247db96d56Sopenharmony_ci 3257db96d56Sopenharmony_civoid 3267db96d56Sopenharmony_ci_INTERNAL_trim_to_complete_utf8_characters(const char *from, 3277db96d56Sopenharmony_ci const char **fromLimRef) { 3287db96d56Sopenharmony_ci const char *fromLim = *fromLimRef; 3297db96d56Sopenharmony_ci size_t walked = 0; 3307db96d56Sopenharmony_ci for (; fromLim > from; fromLim--, walked++) { 3317db96d56Sopenharmony_ci const unsigned char prev = (unsigned char)fromLim[-1]; 3327db96d56Sopenharmony_ci if ((prev & 0xf8u) 3337db96d56Sopenharmony_ci == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */ 3347db96d56Sopenharmony_ci if (walked + 1 >= 4) { 3357db96d56Sopenharmony_ci fromLim += 4 - 1; 3367db96d56Sopenharmony_ci break; 3377db96d56Sopenharmony_ci } else { 3387db96d56Sopenharmony_ci walked = 0; 3397db96d56Sopenharmony_ci } 3407db96d56Sopenharmony_ci } else if ((prev & 0xf0u) 3417db96d56Sopenharmony_ci == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */ 3427db96d56Sopenharmony_ci if (walked + 1 >= 3) { 3437db96d56Sopenharmony_ci fromLim += 3 - 1; 3447db96d56Sopenharmony_ci break; 3457db96d56Sopenharmony_ci } else { 3467db96d56Sopenharmony_ci walked = 0; 3477db96d56Sopenharmony_ci } 3487db96d56Sopenharmony_ci } else if ((prev & 0xe0u) 3497db96d56Sopenharmony_ci == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */ 3507db96d56Sopenharmony_ci if (walked + 1 >= 2) { 3517db96d56Sopenharmony_ci fromLim += 2 - 1; 3527db96d56Sopenharmony_ci break; 3537db96d56Sopenharmony_ci } else { 3547db96d56Sopenharmony_ci walked = 0; 3557db96d56Sopenharmony_ci } 3567db96d56Sopenharmony_ci } else if ((prev & 0x80u) 3577db96d56Sopenharmony_ci == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */ 3587db96d56Sopenharmony_ci break; 3597db96d56Sopenharmony_ci } 3607db96d56Sopenharmony_ci } 3617db96d56Sopenharmony_ci *fromLimRef = fromLim; 3627db96d56Sopenharmony_ci} 3637db96d56Sopenharmony_ci 3647db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL 3657db96d56Sopenharmony_ciutf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, 3667db96d56Sopenharmony_ci char **toP, const char *toLim) { 3677db96d56Sopenharmony_ci bool input_incomplete = false; 3687db96d56Sopenharmony_ci bool output_exhausted = false; 3697db96d56Sopenharmony_ci 3707db96d56Sopenharmony_ci /* Avoid copying partial characters (due to limited space). */ 3717db96d56Sopenharmony_ci const ptrdiff_t bytesAvailable = fromLim - *fromP; 3727db96d56Sopenharmony_ci const ptrdiff_t bytesStorable = toLim - *toP; 3737db96d56Sopenharmony_ci UNUSED_P(enc); 3747db96d56Sopenharmony_ci if (bytesAvailable > bytesStorable) { 3757db96d56Sopenharmony_ci fromLim = *fromP + bytesStorable; 3767db96d56Sopenharmony_ci output_exhausted = true; 3777db96d56Sopenharmony_ci } 3787db96d56Sopenharmony_ci 3797db96d56Sopenharmony_ci /* Avoid copying partial characters (from incomplete input). */ 3807db96d56Sopenharmony_ci { 3817db96d56Sopenharmony_ci const char *const fromLimBefore = fromLim; 3827db96d56Sopenharmony_ci _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim); 3837db96d56Sopenharmony_ci if (fromLim < fromLimBefore) { 3847db96d56Sopenharmony_ci input_incomplete = true; 3857db96d56Sopenharmony_ci } 3867db96d56Sopenharmony_ci } 3877db96d56Sopenharmony_ci 3887db96d56Sopenharmony_ci { 3897db96d56Sopenharmony_ci const ptrdiff_t bytesToCopy = fromLim - *fromP; 3907db96d56Sopenharmony_ci memcpy(*toP, *fromP, bytesToCopy); 3917db96d56Sopenharmony_ci *fromP += bytesToCopy; 3927db96d56Sopenharmony_ci *toP += bytesToCopy; 3937db96d56Sopenharmony_ci } 3947db96d56Sopenharmony_ci 3957db96d56Sopenharmony_ci if (output_exhausted) /* needs to go first */ 3967db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; 3977db96d56Sopenharmony_ci else if (input_incomplete) 3987db96d56Sopenharmony_ci return XML_CONVERT_INPUT_INCOMPLETE; 3997db96d56Sopenharmony_ci else 4007db96d56Sopenharmony_ci return XML_CONVERT_COMPLETED; 4017db96d56Sopenharmony_ci} 4027db96d56Sopenharmony_ci 4037db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL 4047db96d56Sopenharmony_ciutf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, 4057db96d56Sopenharmony_ci unsigned short **toP, const unsigned short *toLim) { 4067db96d56Sopenharmony_ci enum XML_Convert_Result res = XML_CONVERT_COMPLETED; 4077db96d56Sopenharmony_ci unsigned short *to = *toP; 4087db96d56Sopenharmony_ci const char *from = *fromP; 4097db96d56Sopenharmony_ci while (from < fromLim && to < toLim) { 4107db96d56Sopenharmony_ci switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { 4117db96d56Sopenharmony_ci case BT_LEAD2: 4127db96d56Sopenharmony_ci if (fromLim - from < 2) { 4137db96d56Sopenharmony_ci res = XML_CONVERT_INPUT_INCOMPLETE; 4147db96d56Sopenharmony_ci goto after; 4157db96d56Sopenharmony_ci } 4167db96d56Sopenharmony_ci *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); 4177db96d56Sopenharmony_ci from += 2; 4187db96d56Sopenharmony_ci break; 4197db96d56Sopenharmony_ci case BT_LEAD3: 4207db96d56Sopenharmony_ci if (fromLim - from < 3) { 4217db96d56Sopenharmony_ci res = XML_CONVERT_INPUT_INCOMPLETE; 4227db96d56Sopenharmony_ci goto after; 4237db96d56Sopenharmony_ci } 4247db96d56Sopenharmony_ci *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6) 4257db96d56Sopenharmony_ci | (from[2] & 0x3f)); 4267db96d56Sopenharmony_ci from += 3; 4277db96d56Sopenharmony_ci break; 4287db96d56Sopenharmony_ci case BT_LEAD4: { 4297db96d56Sopenharmony_ci unsigned long n; 4307db96d56Sopenharmony_ci if (toLim - to < 2) { 4317db96d56Sopenharmony_ci res = XML_CONVERT_OUTPUT_EXHAUSTED; 4327db96d56Sopenharmony_ci goto after; 4337db96d56Sopenharmony_ci } 4347db96d56Sopenharmony_ci if (fromLim - from < 4) { 4357db96d56Sopenharmony_ci res = XML_CONVERT_INPUT_INCOMPLETE; 4367db96d56Sopenharmony_ci goto after; 4377db96d56Sopenharmony_ci } 4387db96d56Sopenharmony_ci n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) 4397db96d56Sopenharmony_ci | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); 4407db96d56Sopenharmony_ci n -= 0x10000; 4417db96d56Sopenharmony_ci to[0] = (unsigned short)((n >> 10) | 0xD800); 4427db96d56Sopenharmony_ci to[1] = (unsigned short)((n & 0x3FF) | 0xDC00); 4437db96d56Sopenharmony_ci to += 2; 4447db96d56Sopenharmony_ci from += 4; 4457db96d56Sopenharmony_ci } break; 4467db96d56Sopenharmony_ci default: 4477db96d56Sopenharmony_ci *to++ = *from++; 4487db96d56Sopenharmony_ci break; 4497db96d56Sopenharmony_ci } 4507db96d56Sopenharmony_ci } 4517db96d56Sopenharmony_ci if (from < fromLim) 4527db96d56Sopenharmony_ci res = XML_CONVERT_OUTPUT_EXHAUSTED; 4537db96d56Sopenharmony_ciafter: 4547db96d56Sopenharmony_ci *fromP = from; 4557db96d56Sopenharmony_ci *toP = to; 4567db96d56Sopenharmony_ci return res; 4577db96d56Sopenharmony_ci} 4587db96d56Sopenharmony_ci 4597db96d56Sopenharmony_ci#ifdef XML_NS 4607db96d56Sopenharmony_cistatic const struct normal_encoding utf8_encoding_ns 4617db96d56Sopenharmony_ci = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, 4627db96d56Sopenharmony_ci { 4637db96d56Sopenharmony_ci# include "asciitab.h" 4647db96d56Sopenharmony_ci# include "utf8tab.h" 4657db96d56Sopenharmony_ci }, 4667db96d56Sopenharmony_ci STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; 4677db96d56Sopenharmony_ci#endif 4687db96d56Sopenharmony_ci 4697db96d56Sopenharmony_cistatic const struct normal_encoding utf8_encoding 4707db96d56Sopenharmony_ci = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, 4717db96d56Sopenharmony_ci { 4727db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT 4737db96d56Sopenharmony_ci#include "asciitab.h" 4747db96d56Sopenharmony_ci#undef BT_COLON 4757db96d56Sopenharmony_ci#include "utf8tab.h" 4767db96d56Sopenharmony_ci }, 4777db96d56Sopenharmony_ci STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; 4787db96d56Sopenharmony_ci 4797db96d56Sopenharmony_ci#ifdef XML_NS 4807db96d56Sopenharmony_ci 4817db96d56Sopenharmony_cistatic const struct normal_encoding internal_utf8_encoding_ns 4827db96d56Sopenharmony_ci = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, 4837db96d56Sopenharmony_ci { 4847db96d56Sopenharmony_ci# include "iasciitab.h" 4857db96d56Sopenharmony_ci# include "utf8tab.h" 4867db96d56Sopenharmony_ci }, 4877db96d56Sopenharmony_ci STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; 4887db96d56Sopenharmony_ci 4897db96d56Sopenharmony_ci#endif 4907db96d56Sopenharmony_ci 4917db96d56Sopenharmony_cistatic const struct normal_encoding internal_utf8_encoding 4927db96d56Sopenharmony_ci = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0}, 4937db96d56Sopenharmony_ci { 4947db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT 4957db96d56Sopenharmony_ci#include "iasciitab.h" 4967db96d56Sopenharmony_ci#undef BT_COLON 4977db96d56Sopenharmony_ci#include "utf8tab.h" 4987db96d56Sopenharmony_ci }, 4997db96d56Sopenharmony_ci STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)}; 5007db96d56Sopenharmony_ci 5017db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL 5027db96d56Sopenharmony_cilatin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, 5037db96d56Sopenharmony_ci char **toP, const char *toLim) { 5047db96d56Sopenharmony_ci UNUSED_P(enc); 5057db96d56Sopenharmony_ci for (;;) { 5067db96d56Sopenharmony_ci unsigned char c; 5077db96d56Sopenharmony_ci if (*fromP == fromLim) 5087db96d56Sopenharmony_ci return XML_CONVERT_COMPLETED; 5097db96d56Sopenharmony_ci c = (unsigned char)**fromP; 5107db96d56Sopenharmony_ci if (c & 0x80) { 5117db96d56Sopenharmony_ci if (toLim - *toP < 2) 5127db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; 5137db96d56Sopenharmony_ci *(*toP)++ = (char)((c >> 6) | UTF8_cval2); 5147db96d56Sopenharmony_ci *(*toP)++ = (char)((c & 0x3f) | 0x80); 5157db96d56Sopenharmony_ci (*fromP)++; 5167db96d56Sopenharmony_ci } else { 5177db96d56Sopenharmony_ci if (*toP == toLim) 5187db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; 5197db96d56Sopenharmony_ci *(*toP)++ = *(*fromP)++; 5207db96d56Sopenharmony_ci } 5217db96d56Sopenharmony_ci } 5227db96d56Sopenharmony_ci} 5237db96d56Sopenharmony_ci 5247db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL 5257db96d56Sopenharmony_cilatin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, 5267db96d56Sopenharmony_ci unsigned short **toP, const unsigned short *toLim) { 5277db96d56Sopenharmony_ci UNUSED_P(enc); 5287db96d56Sopenharmony_ci while (*fromP < fromLim && *toP < toLim) 5297db96d56Sopenharmony_ci *(*toP)++ = (unsigned char)*(*fromP)++; 5307db96d56Sopenharmony_ci 5317db96d56Sopenharmony_ci if ((*toP == toLim) && (*fromP < fromLim)) 5327db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; 5337db96d56Sopenharmony_ci else 5347db96d56Sopenharmony_ci return XML_CONVERT_COMPLETED; 5357db96d56Sopenharmony_ci} 5367db96d56Sopenharmony_ci 5377db96d56Sopenharmony_ci#ifdef XML_NS 5387db96d56Sopenharmony_ci 5397db96d56Sopenharmony_cistatic const struct normal_encoding latin1_encoding_ns 5407db96d56Sopenharmony_ci = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, 5417db96d56Sopenharmony_ci { 5427db96d56Sopenharmony_ci# include "asciitab.h" 5437db96d56Sopenharmony_ci# include "latin1tab.h" 5447db96d56Sopenharmony_ci }, 5457db96d56Sopenharmony_ci STANDARD_VTABLE(sb_) NULL_VTABLE}; 5467db96d56Sopenharmony_ci 5477db96d56Sopenharmony_ci#endif 5487db96d56Sopenharmony_ci 5497db96d56Sopenharmony_cistatic const struct normal_encoding latin1_encoding 5507db96d56Sopenharmony_ci = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0}, 5517db96d56Sopenharmony_ci { 5527db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT 5537db96d56Sopenharmony_ci#include "asciitab.h" 5547db96d56Sopenharmony_ci#undef BT_COLON 5557db96d56Sopenharmony_ci#include "latin1tab.h" 5567db96d56Sopenharmony_ci }, 5577db96d56Sopenharmony_ci STANDARD_VTABLE(sb_) NULL_VTABLE}; 5587db96d56Sopenharmony_ci 5597db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL 5607db96d56Sopenharmony_ciascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, 5617db96d56Sopenharmony_ci char **toP, const char *toLim) { 5627db96d56Sopenharmony_ci UNUSED_P(enc); 5637db96d56Sopenharmony_ci while (*fromP < fromLim && *toP < toLim) 5647db96d56Sopenharmony_ci *(*toP)++ = *(*fromP)++; 5657db96d56Sopenharmony_ci 5667db96d56Sopenharmony_ci if ((*toP == toLim) && (*fromP < fromLim)) 5677db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; 5687db96d56Sopenharmony_ci else 5697db96d56Sopenharmony_ci return XML_CONVERT_COMPLETED; 5707db96d56Sopenharmony_ci} 5717db96d56Sopenharmony_ci 5727db96d56Sopenharmony_ci#ifdef XML_NS 5737db96d56Sopenharmony_ci 5747db96d56Sopenharmony_cistatic const struct normal_encoding ascii_encoding_ns 5757db96d56Sopenharmony_ci = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, 5767db96d56Sopenharmony_ci { 5777db96d56Sopenharmony_ci# include "asciitab.h" 5787db96d56Sopenharmony_ci /* BT_NONXML == 0 */ 5797db96d56Sopenharmony_ci }, 5807db96d56Sopenharmony_ci STANDARD_VTABLE(sb_) NULL_VTABLE}; 5817db96d56Sopenharmony_ci 5827db96d56Sopenharmony_ci#endif 5837db96d56Sopenharmony_ci 5847db96d56Sopenharmony_cistatic const struct normal_encoding ascii_encoding 5857db96d56Sopenharmony_ci = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0}, 5867db96d56Sopenharmony_ci { 5877db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT 5887db96d56Sopenharmony_ci#include "asciitab.h" 5897db96d56Sopenharmony_ci#undef BT_COLON 5907db96d56Sopenharmony_ci /* BT_NONXML == 0 */ 5917db96d56Sopenharmony_ci }, 5927db96d56Sopenharmony_ci STANDARD_VTABLE(sb_) NULL_VTABLE}; 5937db96d56Sopenharmony_ci 5947db96d56Sopenharmony_cistatic int PTRFASTCALL 5957db96d56Sopenharmony_ciunicode_byte_type(char hi, char lo) { 5967db96d56Sopenharmony_ci switch ((unsigned char)hi) { 5977db96d56Sopenharmony_ci /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */ 5987db96d56Sopenharmony_ci case 0xD8: 5997db96d56Sopenharmony_ci case 0xD9: 6007db96d56Sopenharmony_ci case 0xDA: 6017db96d56Sopenharmony_ci case 0xDB: 6027db96d56Sopenharmony_ci return BT_LEAD4; 6037db96d56Sopenharmony_ci /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */ 6047db96d56Sopenharmony_ci case 0xDC: 6057db96d56Sopenharmony_ci case 0xDD: 6067db96d56Sopenharmony_ci case 0xDE: 6077db96d56Sopenharmony_ci case 0xDF: 6087db96d56Sopenharmony_ci return BT_TRAIL; 6097db96d56Sopenharmony_ci case 0xFF: 6107db96d56Sopenharmony_ci switch ((unsigned char)lo) { 6117db96d56Sopenharmony_ci case 0xFF: /* noncharacter-FFFF */ 6127db96d56Sopenharmony_ci case 0xFE: /* noncharacter-FFFE */ 6137db96d56Sopenharmony_ci return BT_NONXML; 6147db96d56Sopenharmony_ci } 6157db96d56Sopenharmony_ci break; 6167db96d56Sopenharmony_ci } 6177db96d56Sopenharmony_ci return BT_NONASCII; 6187db96d56Sopenharmony_ci} 6197db96d56Sopenharmony_ci 6207db96d56Sopenharmony_ci#define DEFINE_UTF16_TO_UTF8(E) \ 6217db96d56Sopenharmony_ci static enum XML_Convert_Result PTRCALL E##toUtf8( \ 6227db96d56Sopenharmony_ci const ENCODING *enc, const char **fromP, const char *fromLim, \ 6237db96d56Sopenharmony_ci char **toP, const char *toLim) { \ 6247db96d56Sopenharmony_ci const char *from = *fromP; \ 6257db96d56Sopenharmony_ci UNUSED_P(enc); \ 6267db96d56Sopenharmony_ci fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ 6277db96d56Sopenharmony_ci for (; from < fromLim; from += 2) { \ 6287db96d56Sopenharmony_ci int plane; \ 6297db96d56Sopenharmony_ci unsigned char lo2; \ 6307db96d56Sopenharmony_ci unsigned char lo = GET_LO(from); \ 6317db96d56Sopenharmony_ci unsigned char hi = GET_HI(from); \ 6327db96d56Sopenharmony_ci switch (hi) { \ 6337db96d56Sopenharmony_ci case 0: \ 6347db96d56Sopenharmony_ci if (lo < 0x80) { \ 6357db96d56Sopenharmony_ci if (*toP == toLim) { \ 6367db96d56Sopenharmony_ci *fromP = from; \ 6377db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; \ 6387db96d56Sopenharmony_ci } \ 6397db96d56Sopenharmony_ci *(*toP)++ = lo; \ 6407db96d56Sopenharmony_ci break; \ 6417db96d56Sopenharmony_ci } \ 6427db96d56Sopenharmony_ci /* fall through */ \ 6437db96d56Sopenharmony_ci case 0x1: \ 6447db96d56Sopenharmony_ci case 0x2: \ 6457db96d56Sopenharmony_ci case 0x3: \ 6467db96d56Sopenharmony_ci case 0x4: \ 6477db96d56Sopenharmony_ci case 0x5: \ 6487db96d56Sopenharmony_ci case 0x6: \ 6497db96d56Sopenharmony_ci case 0x7: \ 6507db96d56Sopenharmony_ci if (toLim - *toP < 2) { \ 6517db96d56Sopenharmony_ci *fromP = from; \ 6527db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; \ 6537db96d56Sopenharmony_ci } \ 6547db96d56Sopenharmony_ci *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ 6557db96d56Sopenharmony_ci *(*toP)++ = ((lo & 0x3f) | 0x80); \ 6567db96d56Sopenharmony_ci break; \ 6577db96d56Sopenharmony_ci default: \ 6587db96d56Sopenharmony_ci if (toLim - *toP < 3) { \ 6597db96d56Sopenharmony_ci *fromP = from; \ 6607db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; \ 6617db96d56Sopenharmony_ci } \ 6627db96d56Sopenharmony_ci /* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ 6637db96d56Sopenharmony_ci *(*toP)++ = ((hi >> 4) | UTF8_cval3); \ 6647db96d56Sopenharmony_ci *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80); \ 6657db96d56Sopenharmony_ci *(*toP)++ = ((lo & 0x3f) | 0x80); \ 6667db96d56Sopenharmony_ci break; \ 6677db96d56Sopenharmony_ci case 0xD8: \ 6687db96d56Sopenharmony_ci case 0xD9: \ 6697db96d56Sopenharmony_ci case 0xDA: \ 6707db96d56Sopenharmony_ci case 0xDB: \ 6717db96d56Sopenharmony_ci if (toLim - *toP < 4) { \ 6727db96d56Sopenharmony_ci *fromP = from; \ 6737db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; \ 6747db96d56Sopenharmony_ci } \ 6757db96d56Sopenharmony_ci if (fromLim - from < 4) { \ 6767db96d56Sopenharmony_ci *fromP = from; \ 6777db96d56Sopenharmony_ci return XML_CONVERT_INPUT_INCOMPLETE; \ 6787db96d56Sopenharmony_ci } \ 6797db96d56Sopenharmony_ci plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ 6807db96d56Sopenharmony_ci *(*toP)++ = (char)((plane >> 2) | UTF8_cval4); \ 6817db96d56Sopenharmony_ci *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80); \ 6827db96d56Sopenharmony_ci from += 2; \ 6837db96d56Sopenharmony_ci lo2 = GET_LO(from); \ 6847db96d56Sopenharmony_ci *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2) \ 6857db96d56Sopenharmony_ci | (lo2 >> 6) | 0x80); \ 6867db96d56Sopenharmony_ci *(*toP)++ = ((lo2 & 0x3f) | 0x80); \ 6877db96d56Sopenharmony_ci break; \ 6887db96d56Sopenharmony_ci } \ 6897db96d56Sopenharmony_ci } \ 6907db96d56Sopenharmony_ci *fromP = from; \ 6917db96d56Sopenharmony_ci if (from < fromLim) \ 6927db96d56Sopenharmony_ci return XML_CONVERT_INPUT_INCOMPLETE; \ 6937db96d56Sopenharmony_ci else \ 6947db96d56Sopenharmony_ci return XML_CONVERT_COMPLETED; \ 6957db96d56Sopenharmony_ci } 6967db96d56Sopenharmony_ci 6977db96d56Sopenharmony_ci#define DEFINE_UTF16_TO_UTF16(E) \ 6987db96d56Sopenharmony_ci static enum XML_Convert_Result PTRCALL E##toUtf16( \ 6997db96d56Sopenharmony_ci const ENCODING *enc, const char **fromP, const char *fromLim, \ 7007db96d56Sopenharmony_ci unsigned short **toP, const unsigned short *toLim) { \ 7017db96d56Sopenharmony_ci enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ 7027db96d56Sopenharmony_ci UNUSED_P(enc); \ 7037db96d56Sopenharmony_ci fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ 7047db96d56Sopenharmony_ci /* Avoid copying first half only of surrogate */ \ 7057db96d56Sopenharmony_ci if (fromLim - *fromP > ((toLim - *toP) << 1) \ 7067db96d56Sopenharmony_ci && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ 7077db96d56Sopenharmony_ci fromLim -= 2; \ 7087db96d56Sopenharmony_ci res = XML_CONVERT_INPUT_INCOMPLETE; \ 7097db96d56Sopenharmony_ci } \ 7107db96d56Sopenharmony_ci for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ 7117db96d56Sopenharmony_ci *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ 7127db96d56Sopenharmony_ci if ((*toP == toLim) && (*fromP < fromLim)) \ 7137db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; \ 7147db96d56Sopenharmony_ci else \ 7157db96d56Sopenharmony_ci return res; \ 7167db96d56Sopenharmony_ci } 7177db96d56Sopenharmony_ci 7187db96d56Sopenharmony_ci#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8))) 7197db96d56Sopenharmony_ci#define GET_LO(ptr) ((unsigned char)(ptr)[0]) 7207db96d56Sopenharmony_ci#define GET_HI(ptr) ((unsigned char)(ptr)[1]) 7217db96d56Sopenharmony_ci 7227db96d56Sopenharmony_ciDEFINE_UTF16_TO_UTF8(little2_) 7237db96d56Sopenharmony_ciDEFINE_UTF16_TO_UTF16(little2_) 7247db96d56Sopenharmony_ci 7257db96d56Sopenharmony_ci#undef SET2 7267db96d56Sopenharmony_ci#undef GET_LO 7277db96d56Sopenharmony_ci#undef GET_HI 7287db96d56Sopenharmony_ci 7297db96d56Sopenharmony_ci#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF))) 7307db96d56Sopenharmony_ci#define GET_LO(ptr) ((unsigned char)(ptr)[1]) 7317db96d56Sopenharmony_ci#define GET_HI(ptr) ((unsigned char)(ptr)[0]) 7327db96d56Sopenharmony_ci 7337db96d56Sopenharmony_ciDEFINE_UTF16_TO_UTF8(big2_) 7347db96d56Sopenharmony_ciDEFINE_UTF16_TO_UTF16(big2_) 7357db96d56Sopenharmony_ci 7367db96d56Sopenharmony_ci#undef SET2 7377db96d56Sopenharmony_ci#undef GET_LO 7387db96d56Sopenharmony_ci#undef GET_HI 7397db96d56Sopenharmony_ci 7407db96d56Sopenharmony_ci#define LITTLE2_BYTE_TYPE(enc, p) \ 7417db96d56Sopenharmony_ci ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)] \ 7427db96d56Sopenharmony_ci : unicode_byte_type((p)[1], (p)[0])) 7437db96d56Sopenharmony_ci#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1) 7447db96d56Sopenharmony_ci#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c)) 7457db96d56Sopenharmony_ci#define LITTLE2_IS_NAME_CHAR_MINBPC(p) \ 7467db96d56Sopenharmony_ci UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0]) 7477db96d56Sopenharmony_ci#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) \ 7487db96d56Sopenharmony_ci UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0]) 7497db96d56Sopenharmony_ci 7507db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 7517db96d56Sopenharmony_ci 7527db96d56Sopenharmony_cistatic int PTRFASTCALL 7537db96d56Sopenharmony_cilittle2_byteType(const ENCODING *enc, const char *p) { 7547db96d56Sopenharmony_ci return LITTLE2_BYTE_TYPE(enc, p); 7557db96d56Sopenharmony_ci} 7567db96d56Sopenharmony_ci 7577db96d56Sopenharmony_cistatic int PTRFASTCALL 7587db96d56Sopenharmony_cilittle2_byteToAscii(const ENCODING *enc, const char *p) { 7597db96d56Sopenharmony_ci UNUSED_P(enc); 7607db96d56Sopenharmony_ci return LITTLE2_BYTE_TO_ASCII(p); 7617db96d56Sopenharmony_ci} 7627db96d56Sopenharmony_ci 7637db96d56Sopenharmony_cistatic int PTRCALL 7647db96d56Sopenharmony_cilittle2_charMatches(const ENCODING *enc, const char *p, int c) { 7657db96d56Sopenharmony_ci UNUSED_P(enc); 7667db96d56Sopenharmony_ci return LITTLE2_CHAR_MATCHES(p, c); 7677db96d56Sopenharmony_ci} 7687db96d56Sopenharmony_ci 7697db96d56Sopenharmony_cistatic int PTRFASTCALL 7707db96d56Sopenharmony_cilittle2_isNameMin(const ENCODING *enc, const char *p) { 7717db96d56Sopenharmony_ci UNUSED_P(enc); 7727db96d56Sopenharmony_ci return LITTLE2_IS_NAME_CHAR_MINBPC(p); 7737db96d56Sopenharmony_ci} 7747db96d56Sopenharmony_ci 7757db96d56Sopenharmony_cistatic int PTRFASTCALL 7767db96d56Sopenharmony_cilittle2_isNmstrtMin(const ENCODING *enc, const char *p) { 7777db96d56Sopenharmony_ci UNUSED_P(enc); 7787db96d56Sopenharmony_ci return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p); 7797db96d56Sopenharmony_ci} 7807db96d56Sopenharmony_ci 7817db96d56Sopenharmony_ci# undef VTABLE 7827db96d56Sopenharmony_ci# define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16 7837db96d56Sopenharmony_ci 7847db96d56Sopenharmony_ci#else /* not XML_MIN_SIZE */ 7857db96d56Sopenharmony_ci 7867db96d56Sopenharmony_ci# undef PREFIX 7877db96d56Sopenharmony_ci# define PREFIX(ident) little2_##ident 7887db96d56Sopenharmony_ci# define MINBPC(enc) 2 7897db96d56Sopenharmony_ci/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ 7907db96d56Sopenharmony_ci# define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p) 7917db96d56Sopenharmony_ci# define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p) 7927db96d56Sopenharmony_ci# define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c) 7937db96d56Sopenharmony_ci# define IS_NAME_CHAR(enc, p, n) 0 7947db96d56Sopenharmony_ci# define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p) 7957db96d56Sopenharmony_ci# define IS_NMSTRT_CHAR(enc, p, n) (0) 7967db96d56Sopenharmony_ci# define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p) 7977db96d56Sopenharmony_ci 7987db96d56Sopenharmony_ci# define XML_TOK_IMPL_C 7997db96d56Sopenharmony_ci# include "xmltok_impl.c" 8007db96d56Sopenharmony_ci# undef XML_TOK_IMPL_C 8017db96d56Sopenharmony_ci 8027db96d56Sopenharmony_ci# undef MINBPC 8037db96d56Sopenharmony_ci# undef BYTE_TYPE 8047db96d56Sopenharmony_ci# undef BYTE_TO_ASCII 8057db96d56Sopenharmony_ci# undef CHAR_MATCHES 8067db96d56Sopenharmony_ci# undef IS_NAME_CHAR 8077db96d56Sopenharmony_ci# undef IS_NAME_CHAR_MINBPC 8087db96d56Sopenharmony_ci# undef IS_NMSTRT_CHAR 8097db96d56Sopenharmony_ci# undef IS_NMSTRT_CHAR_MINBPC 8107db96d56Sopenharmony_ci# undef IS_INVALID_CHAR 8117db96d56Sopenharmony_ci 8127db96d56Sopenharmony_ci#endif /* not XML_MIN_SIZE */ 8137db96d56Sopenharmony_ci 8147db96d56Sopenharmony_ci#ifdef XML_NS 8157db96d56Sopenharmony_ci 8167db96d56Sopenharmony_cistatic const struct normal_encoding little2_encoding_ns 8177db96d56Sopenharmony_ci = {{VTABLE, 2, 0, 8187db96d56Sopenharmony_ci# if BYTEORDER == 1234 8197db96d56Sopenharmony_ci 1 8207db96d56Sopenharmony_ci# else 8217db96d56Sopenharmony_ci 0 8227db96d56Sopenharmony_ci# endif 8237db96d56Sopenharmony_ci }, 8247db96d56Sopenharmony_ci { 8257db96d56Sopenharmony_ci# include "asciitab.h" 8267db96d56Sopenharmony_ci# include "latin1tab.h" 8277db96d56Sopenharmony_ci }, 8287db96d56Sopenharmony_ci STANDARD_VTABLE(little2_) NULL_VTABLE}; 8297db96d56Sopenharmony_ci 8307db96d56Sopenharmony_ci#endif 8317db96d56Sopenharmony_ci 8327db96d56Sopenharmony_cistatic const struct normal_encoding little2_encoding 8337db96d56Sopenharmony_ci = {{VTABLE, 2, 0, 8347db96d56Sopenharmony_ci#if BYTEORDER == 1234 8357db96d56Sopenharmony_ci 1 8367db96d56Sopenharmony_ci#else 8377db96d56Sopenharmony_ci 0 8387db96d56Sopenharmony_ci#endif 8397db96d56Sopenharmony_ci }, 8407db96d56Sopenharmony_ci { 8417db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT 8427db96d56Sopenharmony_ci#include "asciitab.h" 8437db96d56Sopenharmony_ci#undef BT_COLON 8447db96d56Sopenharmony_ci#include "latin1tab.h" 8457db96d56Sopenharmony_ci }, 8467db96d56Sopenharmony_ci STANDARD_VTABLE(little2_) NULL_VTABLE}; 8477db96d56Sopenharmony_ci 8487db96d56Sopenharmony_ci#if BYTEORDER != 4321 8497db96d56Sopenharmony_ci 8507db96d56Sopenharmony_ci# ifdef XML_NS 8517db96d56Sopenharmony_ci 8527db96d56Sopenharmony_cistatic const struct normal_encoding internal_little2_encoding_ns 8537db96d56Sopenharmony_ci = {{VTABLE, 2, 0, 1}, 8547db96d56Sopenharmony_ci { 8557db96d56Sopenharmony_ci# include "iasciitab.h" 8567db96d56Sopenharmony_ci# include "latin1tab.h" 8577db96d56Sopenharmony_ci }, 8587db96d56Sopenharmony_ci STANDARD_VTABLE(little2_) NULL_VTABLE}; 8597db96d56Sopenharmony_ci 8607db96d56Sopenharmony_ci# endif 8617db96d56Sopenharmony_ci 8627db96d56Sopenharmony_cistatic const struct normal_encoding internal_little2_encoding 8637db96d56Sopenharmony_ci = {{VTABLE, 2, 0, 1}, 8647db96d56Sopenharmony_ci { 8657db96d56Sopenharmony_ci# define BT_COLON BT_NMSTRT 8667db96d56Sopenharmony_ci# include "iasciitab.h" 8677db96d56Sopenharmony_ci# undef BT_COLON 8687db96d56Sopenharmony_ci# include "latin1tab.h" 8697db96d56Sopenharmony_ci }, 8707db96d56Sopenharmony_ci STANDARD_VTABLE(little2_) NULL_VTABLE}; 8717db96d56Sopenharmony_ci 8727db96d56Sopenharmony_ci#endif 8737db96d56Sopenharmony_ci 8747db96d56Sopenharmony_ci#define BIG2_BYTE_TYPE(enc, p) \ 8757db96d56Sopenharmony_ci ((p)[0] == 0 \ 8767db96d56Sopenharmony_ci ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]] \ 8777db96d56Sopenharmony_ci : unicode_byte_type((p)[0], (p)[1])) 8787db96d56Sopenharmony_ci#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1) 8797db96d56Sopenharmony_ci#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c)) 8807db96d56Sopenharmony_ci#define BIG2_IS_NAME_CHAR_MINBPC(p) \ 8817db96d56Sopenharmony_ci UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1]) 8827db96d56Sopenharmony_ci#define BIG2_IS_NMSTRT_CHAR_MINBPC(p) \ 8837db96d56Sopenharmony_ci UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1]) 8847db96d56Sopenharmony_ci 8857db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE 8867db96d56Sopenharmony_ci 8877db96d56Sopenharmony_cistatic int PTRFASTCALL 8887db96d56Sopenharmony_cibig2_byteType(const ENCODING *enc, const char *p) { 8897db96d56Sopenharmony_ci return BIG2_BYTE_TYPE(enc, p); 8907db96d56Sopenharmony_ci} 8917db96d56Sopenharmony_ci 8927db96d56Sopenharmony_cistatic int PTRFASTCALL 8937db96d56Sopenharmony_cibig2_byteToAscii(const ENCODING *enc, const char *p) { 8947db96d56Sopenharmony_ci UNUSED_P(enc); 8957db96d56Sopenharmony_ci return BIG2_BYTE_TO_ASCII(p); 8967db96d56Sopenharmony_ci} 8977db96d56Sopenharmony_ci 8987db96d56Sopenharmony_cistatic int PTRCALL 8997db96d56Sopenharmony_cibig2_charMatches(const ENCODING *enc, const char *p, int c) { 9007db96d56Sopenharmony_ci UNUSED_P(enc); 9017db96d56Sopenharmony_ci return BIG2_CHAR_MATCHES(p, c); 9027db96d56Sopenharmony_ci} 9037db96d56Sopenharmony_ci 9047db96d56Sopenharmony_cistatic int PTRFASTCALL 9057db96d56Sopenharmony_cibig2_isNameMin(const ENCODING *enc, const char *p) { 9067db96d56Sopenharmony_ci UNUSED_P(enc); 9077db96d56Sopenharmony_ci return BIG2_IS_NAME_CHAR_MINBPC(p); 9087db96d56Sopenharmony_ci} 9097db96d56Sopenharmony_ci 9107db96d56Sopenharmony_cistatic int PTRFASTCALL 9117db96d56Sopenharmony_cibig2_isNmstrtMin(const ENCODING *enc, const char *p) { 9127db96d56Sopenharmony_ci UNUSED_P(enc); 9137db96d56Sopenharmony_ci return BIG2_IS_NMSTRT_CHAR_MINBPC(p); 9147db96d56Sopenharmony_ci} 9157db96d56Sopenharmony_ci 9167db96d56Sopenharmony_ci# undef VTABLE 9177db96d56Sopenharmony_ci# define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16 9187db96d56Sopenharmony_ci 9197db96d56Sopenharmony_ci#else /* not XML_MIN_SIZE */ 9207db96d56Sopenharmony_ci 9217db96d56Sopenharmony_ci# undef PREFIX 9227db96d56Sopenharmony_ci# define PREFIX(ident) big2_##ident 9237db96d56Sopenharmony_ci# define MINBPC(enc) 2 9247db96d56Sopenharmony_ci/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */ 9257db96d56Sopenharmony_ci# define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p) 9267db96d56Sopenharmony_ci# define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p) 9277db96d56Sopenharmony_ci# define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c) 9287db96d56Sopenharmony_ci# define IS_NAME_CHAR(enc, p, n) 0 9297db96d56Sopenharmony_ci# define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p) 9307db96d56Sopenharmony_ci# define IS_NMSTRT_CHAR(enc, p, n) (0) 9317db96d56Sopenharmony_ci# define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p) 9327db96d56Sopenharmony_ci 9337db96d56Sopenharmony_ci# define XML_TOK_IMPL_C 9347db96d56Sopenharmony_ci# include "xmltok_impl.c" 9357db96d56Sopenharmony_ci# undef XML_TOK_IMPL_C 9367db96d56Sopenharmony_ci 9377db96d56Sopenharmony_ci# undef MINBPC 9387db96d56Sopenharmony_ci# undef BYTE_TYPE 9397db96d56Sopenharmony_ci# undef BYTE_TO_ASCII 9407db96d56Sopenharmony_ci# undef CHAR_MATCHES 9417db96d56Sopenharmony_ci# undef IS_NAME_CHAR 9427db96d56Sopenharmony_ci# undef IS_NAME_CHAR_MINBPC 9437db96d56Sopenharmony_ci# undef IS_NMSTRT_CHAR 9447db96d56Sopenharmony_ci# undef IS_NMSTRT_CHAR_MINBPC 9457db96d56Sopenharmony_ci# undef IS_INVALID_CHAR 9467db96d56Sopenharmony_ci 9477db96d56Sopenharmony_ci#endif /* not XML_MIN_SIZE */ 9487db96d56Sopenharmony_ci 9497db96d56Sopenharmony_ci#ifdef XML_NS 9507db96d56Sopenharmony_ci 9517db96d56Sopenharmony_cistatic const struct normal_encoding big2_encoding_ns 9527db96d56Sopenharmony_ci = {{VTABLE, 2, 0, 9537db96d56Sopenharmony_ci# if BYTEORDER == 4321 9547db96d56Sopenharmony_ci 1 9557db96d56Sopenharmony_ci# else 9567db96d56Sopenharmony_ci 0 9577db96d56Sopenharmony_ci# endif 9587db96d56Sopenharmony_ci }, 9597db96d56Sopenharmony_ci { 9607db96d56Sopenharmony_ci# include "asciitab.h" 9617db96d56Sopenharmony_ci# include "latin1tab.h" 9627db96d56Sopenharmony_ci }, 9637db96d56Sopenharmony_ci STANDARD_VTABLE(big2_) NULL_VTABLE}; 9647db96d56Sopenharmony_ci 9657db96d56Sopenharmony_ci#endif 9667db96d56Sopenharmony_ci 9677db96d56Sopenharmony_cistatic const struct normal_encoding big2_encoding 9687db96d56Sopenharmony_ci = {{VTABLE, 2, 0, 9697db96d56Sopenharmony_ci#if BYTEORDER == 4321 9707db96d56Sopenharmony_ci 1 9717db96d56Sopenharmony_ci#else 9727db96d56Sopenharmony_ci 0 9737db96d56Sopenharmony_ci#endif 9747db96d56Sopenharmony_ci }, 9757db96d56Sopenharmony_ci { 9767db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT 9777db96d56Sopenharmony_ci#include "asciitab.h" 9787db96d56Sopenharmony_ci#undef BT_COLON 9797db96d56Sopenharmony_ci#include "latin1tab.h" 9807db96d56Sopenharmony_ci }, 9817db96d56Sopenharmony_ci STANDARD_VTABLE(big2_) NULL_VTABLE}; 9827db96d56Sopenharmony_ci 9837db96d56Sopenharmony_ci#if BYTEORDER != 1234 9847db96d56Sopenharmony_ci 9857db96d56Sopenharmony_ci# ifdef XML_NS 9867db96d56Sopenharmony_ci 9877db96d56Sopenharmony_cistatic const struct normal_encoding internal_big2_encoding_ns 9887db96d56Sopenharmony_ci = {{VTABLE, 2, 0, 1}, 9897db96d56Sopenharmony_ci { 9907db96d56Sopenharmony_ci# include "iasciitab.h" 9917db96d56Sopenharmony_ci# include "latin1tab.h" 9927db96d56Sopenharmony_ci }, 9937db96d56Sopenharmony_ci STANDARD_VTABLE(big2_) NULL_VTABLE}; 9947db96d56Sopenharmony_ci 9957db96d56Sopenharmony_ci# endif 9967db96d56Sopenharmony_ci 9977db96d56Sopenharmony_cistatic const struct normal_encoding internal_big2_encoding 9987db96d56Sopenharmony_ci = {{VTABLE, 2, 0, 1}, 9997db96d56Sopenharmony_ci { 10007db96d56Sopenharmony_ci# define BT_COLON BT_NMSTRT 10017db96d56Sopenharmony_ci# include "iasciitab.h" 10027db96d56Sopenharmony_ci# undef BT_COLON 10037db96d56Sopenharmony_ci# include "latin1tab.h" 10047db96d56Sopenharmony_ci }, 10057db96d56Sopenharmony_ci STANDARD_VTABLE(big2_) NULL_VTABLE}; 10067db96d56Sopenharmony_ci 10077db96d56Sopenharmony_ci#endif 10087db96d56Sopenharmony_ci 10097db96d56Sopenharmony_ci#undef PREFIX 10107db96d56Sopenharmony_ci 10117db96d56Sopenharmony_cistatic int FASTCALL 10127db96d56Sopenharmony_cistreqci(const char *s1, const char *s2) { 10137db96d56Sopenharmony_ci for (;;) { 10147db96d56Sopenharmony_ci char c1 = *s1++; 10157db96d56Sopenharmony_ci char c2 = *s2++; 10167db96d56Sopenharmony_ci if (ASCII_a <= c1 && c1 <= ASCII_z) 10177db96d56Sopenharmony_ci c1 += ASCII_A - ASCII_a; 10187db96d56Sopenharmony_ci if (ASCII_a <= c2 && c2 <= ASCII_z) 10197db96d56Sopenharmony_ci /* The following line will never get executed. streqci() is 10207db96d56Sopenharmony_ci * only called from two places, both of which guarantee to put 10217db96d56Sopenharmony_ci * upper-case strings into s2. 10227db96d56Sopenharmony_ci */ 10237db96d56Sopenharmony_ci c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */ 10247db96d56Sopenharmony_ci if (c1 != c2) 10257db96d56Sopenharmony_ci return 0; 10267db96d56Sopenharmony_ci if (! c1) 10277db96d56Sopenharmony_ci break; 10287db96d56Sopenharmony_ci } 10297db96d56Sopenharmony_ci return 1; 10307db96d56Sopenharmony_ci} 10317db96d56Sopenharmony_ci 10327db96d56Sopenharmony_cistatic void PTRCALL 10337db96d56Sopenharmony_ciinitUpdatePosition(const ENCODING *enc, const char *ptr, const char *end, 10347db96d56Sopenharmony_ci POSITION *pos) { 10357db96d56Sopenharmony_ci UNUSED_P(enc); 10367db96d56Sopenharmony_ci normal_updatePosition(&utf8_encoding.enc, ptr, end, pos); 10377db96d56Sopenharmony_ci} 10387db96d56Sopenharmony_ci 10397db96d56Sopenharmony_cistatic int 10407db96d56Sopenharmony_citoAscii(const ENCODING *enc, const char *ptr, const char *end) { 10417db96d56Sopenharmony_ci char buf[1]; 10427db96d56Sopenharmony_ci char *p = buf; 10437db96d56Sopenharmony_ci XmlUtf8Convert(enc, &ptr, end, &p, p + 1); 10447db96d56Sopenharmony_ci if (p == buf) 10457db96d56Sopenharmony_ci return -1; 10467db96d56Sopenharmony_ci else 10477db96d56Sopenharmony_ci return buf[0]; 10487db96d56Sopenharmony_ci} 10497db96d56Sopenharmony_ci 10507db96d56Sopenharmony_cistatic int FASTCALL 10517db96d56Sopenharmony_ciisSpace(int c) { 10527db96d56Sopenharmony_ci switch (c) { 10537db96d56Sopenharmony_ci case 0x20: 10547db96d56Sopenharmony_ci case 0xD: 10557db96d56Sopenharmony_ci case 0xA: 10567db96d56Sopenharmony_ci case 0x9: 10577db96d56Sopenharmony_ci return 1; 10587db96d56Sopenharmony_ci } 10597db96d56Sopenharmony_ci return 0; 10607db96d56Sopenharmony_ci} 10617db96d56Sopenharmony_ci 10627db96d56Sopenharmony_ci/* Return 1 if there's just optional white space or there's an S 10637db96d56Sopenharmony_ci followed by name=val. 10647db96d56Sopenharmony_ci*/ 10657db96d56Sopenharmony_cistatic int 10667db96d56Sopenharmony_ciparsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end, 10677db96d56Sopenharmony_ci const char **namePtr, const char **nameEndPtr, 10687db96d56Sopenharmony_ci const char **valPtr, const char **nextTokPtr) { 10697db96d56Sopenharmony_ci int c; 10707db96d56Sopenharmony_ci char open; 10717db96d56Sopenharmony_ci if (ptr == end) { 10727db96d56Sopenharmony_ci *namePtr = NULL; 10737db96d56Sopenharmony_ci return 1; 10747db96d56Sopenharmony_ci } 10757db96d56Sopenharmony_ci if (! isSpace(toAscii(enc, ptr, end))) { 10767db96d56Sopenharmony_ci *nextTokPtr = ptr; 10777db96d56Sopenharmony_ci return 0; 10787db96d56Sopenharmony_ci } 10797db96d56Sopenharmony_ci do { 10807db96d56Sopenharmony_ci ptr += enc->minBytesPerChar; 10817db96d56Sopenharmony_ci } while (isSpace(toAscii(enc, ptr, end))); 10827db96d56Sopenharmony_ci if (ptr == end) { 10837db96d56Sopenharmony_ci *namePtr = NULL; 10847db96d56Sopenharmony_ci return 1; 10857db96d56Sopenharmony_ci } 10867db96d56Sopenharmony_ci *namePtr = ptr; 10877db96d56Sopenharmony_ci for (;;) { 10887db96d56Sopenharmony_ci c = toAscii(enc, ptr, end); 10897db96d56Sopenharmony_ci if (c == -1) { 10907db96d56Sopenharmony_ci *nextTokPtr = ptr; 10917db96d56Sopenharmony_ci return 0; 10927db96d56Sopenharmony_ci } 10937db96d56Sopenharmony_ci if (c == ASCII_EQUALS) { 10947db96d56Sopenharmony_ci *nameEndPtr = ptr; 10957db96d56Sopenharmony_ci break; 10967db96d56Sopenharmony_ci } 10977db96d56Sopenharmony_ci if (isSpace(c)) { 10987db96d56Sopenharmony_ci *nameEndPtr = ptr; 10997db96d56Sopenharmony_ci do { 11007db96d56Sopenharmony_ci ptr += enc->minBytesPerChar; 11017db96d56Sopenharmony_ci } while (isSpace(c = toAscii(enc, ptr, end))); 11027db96d56Sopenharmony_ci if (c != ASCII_EQUALS) { 11037db96d56Sopenharmony_ci *nextTokPtr = ptr; 11047db96d56Sopenharmony_ci return 0; 11057db96d56Sopenharmony_ci } 11067db96d56Sopenharmony_ci break; 11077db96d56Sopenharmony_ci } 11087db96d56Sopenharmony_ci ptr += enc->minBytesPerChar; 11097db96d56Sopenharmony_ci } 11107db96d56Sopenharmony_ci if (ptr == *namePtr) { 11117db96d56Sopenharmony_ci *nextTokPtr = ptr; 11127db96d56Sopenharmony_ci return 0; 11137db96d56Sopenharmony_ci } 11147db96d56Sopenharmony_ci ptr += enc->minBytesPerChar; 11157db96d56Sopenharmony_ci c = toAscii(enc, ptr, end); 11167db96d56Sopenharmony_ci while (isSpace(c)) { 11177db96d56Sopenharmony_ci ptr += enc->minBytesPerChar; 11187db96d56Sopenharmony_ci c = toAscii(enc, ptr, end); 11197db96d56Sopenharmony_ci } 11207db96d56Sopenharmony_ci if (c != ASCII_QUOT && c != ASCII_APOS) { 11217db96d56Sopenharmony_ci *nextTokPtr = ptr; 11227db96d56Sopenharmony_ci return 0; 11237db96d56Sopenharmony_ci } 11247db96d56Sopenharmony_ci open = (char)c; 11257db96d56Sopenharmony_ci ptr += enc->minBytesPerChar; 11267db96d56Sopenharmony_ci *valPtr = ptr; 11277db96d56Sopenharmony_ci for (;; ptr += enc->minBytesPerChar) { 11287db96d56Sopenharmony_ci c = toAscii(enc, ptr, end); 11297db96d56Sopenharmony_ci if (c == open) 11307db96d56Sopenharmony_ci break; 11317db96d56Sopenharmony_ci if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z) 11327db96d56Sopenharmony_ci && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD 11337db96d56Sopenharmony_ci && c != ASCII_MINUS && c != ASCII_UNDERSCORE) { 11347db96d56Sopenharmony_ci *nextTokPtr = ptr; 11357db96d56Sopenharmony_ci return 0; 11367db96d56Sopenharmony_ci } 11377db96d56Sopenharmony_ci } 11387db96d56Sopenharmony_ci *nextTokPtr = ptr + enc->minBytesPerChar; 11397db96d56Sopenharmony_ci return 1; 11407db96d56Sopenharmony_ci} 11417db96d56Sopenharmony_ci 11427db96d56Sopenharmony_cistatic const char KW_version[] 11437db96d56Sopenharmony_ci = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'}; 11447db96d56Sopenharmony_ci 11457db96d56Sopenharmony_cistatic const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d, 11467db96d56Sopenharmony_ci ASCII_i, ASCII_n, ASCII_g, '\0'}; 11477db96d56Sopenharmony_ci 11487db96d56Sopenharmony_cistatic const char KW_standalone[] 11497db96d56Sopenharmony_ci = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a, 11507db96d56Sopenharmony_ci ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'}; 11517db96d56Sopenharmony_ci 11527db96d56Sopenharmony_cistatic const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'}; 11537db96d56Sopenharmony_ci 11547db96d56Sopenharmony_cistatic const char KW_no[] = {ASCII_n, ASCII_o, '\0'}; 11557db96d56Sopenharmony_ci 11567db96d56Sopenharmony_cistatic int 11577db96d56Sopenharmony_cidoParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *, 11587db96d56Sopenharmony_ci const char *), 11597db96d56Sopenharmony_ci int isGeneralTextEntity, const ENCODING *enc, const char *ptr, 11607db96d56Sopenharmony_ci const char *end, const char **badPtr, const char **versionPtr, 11617db96d56Sopenharmony_ci const char **versionEndPtr, const char **encodingName, 11627db96d56Sopenharmony_ci const ENCODING **encoding, int *standalone) { 11637db96d56Sopenharmony_ci const char *val = NULL; 11647db96d56Sopenharmony_ci const char *name = NULL; 11657db96d56Sopenharmony_ci const char *nameEnd = NULL; 11667db96d56Sopenharmony_ci ptr += 5 * enc->minBytesPerChar; 11677db96d56Sopenharmony_ci end -= 2 * enc->minBytesPerChar; 11687db96d56Sopenharmony_ci if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr) 11697db96d56Sopenharmony_ci || ! name) { 11707db96d56Sopenharmony_ci *badPtr = ptr; 11717db96d56Sopenharmony_ci return 0; 11727db96d56Sopenharmony_ci } 11737db96d56Sopenharmony_ci if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) { 11747db96d56Sopenharmony_ci if (! isGeneralTextEntity) { 11757db96d56Sopenharmony_ci *badPtr = name; 11767db96d56Sopenharmony_ci return 0; 11777db96d56Sopenharmony_ci } 11787db96d56Sopenharmony_ci } else { 11797db96d56Sopenharmony_ci if (versionPtr) 11807db96d56Sopenharmony_ci *versionPtr = val; 11817db96d56Sopenharmony_ci if (versionEndPtr) 11827db96d56Sopenharmony_ci *versionEndPtr = ptr; 11837db96d56Sopenharmony_ci if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 11847db96d56Sopenharmony_ci *badPtr = ptr; 11857db96d56Sopenharmony_ci return 0; 11867db96d56Sopenharmony_ci } 11877db96d56Sopenharmony_ci if (! name) { 11887db96d56Sopenharmony_ci if (isGeneralTextEntity) { 11897db96d56Sopenharmony_ci /* a TextDecl must have an EncodingDecl */ 11907db96d56Sopenharmony_ci *badPtr = ptr; 11917db96d56Sopenharmony_ci return 0; 11927db96d56Sopenharmony_ci } 11937db96d56Sopenharmony_ci return 1; 11947db96d56Sopenharmony_ci } 11957db96d56Sopenharmony_ci } 11967db96d56Sopenharmony_ci if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) { 11977db96d56Sopenharmony_ci int c = toAscii(enc, val, end); 11987db96d56Sopenharmony_ci if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) { 11997db96d56Sopenharmony_ci *badPtr = val; 12007db96d56Sopenharmony_ci return 0; 12017db96d56Sopenharmony_ci } 12027db96d56Sopenharmony_ci if (encodingName) 12037db96d56Sopenharmony_ci *encodingName = val; 12047db96d56Sopenharmony_ci if (encoding) 12057db96d56Sopenharmony_ci *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar); 12067db96d56Sopenharmony_ci if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) { 12077db96d56Sopenharmony_ci *badPtr = ptr; 12087db96d56Sopenharmony_ci return 0; 12097db96d56Sopenharmony_ci } 12107db96d56Sopenharmony_ci if (! name) 12117db96d56Sopenharmony_ci return 1; 12127db96d56Sopenharmony_ci } 12137db96d56Sopenharmony_ci if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone) 12147db96d56Sopenharmony_ci || isGeneralTextEntity) { 12157db96d56Sopenharmony_ci *badPtr = name; 12167db96d56Sopenharmony_ci return 0; 12177db96d56Sopenharmony_ci } 12187db96d56Sopenharmony_ci if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) { 12197db96d56Sopenharmony_ci if (standalone) 12207db96d56Sopenharmony_ci *standalone = 1; 12217db96d56Sopenharmony_ci } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) { 12227db96d56Sopenharmony_ci if (standalone) 12237db96d56Sopenharmony_ci *standalone = 0; 12247db96d56Sopenharmony_ci } else { 12257db96d56Sopenharmony_ci *badPtr = val; 12267db96d56Sopenharmony_ci return 0; 12277db96d56Sopenharmony_ci } 12287db96d56Sopenharmony_ci while (isSpace(toAscii(enc, ptr, end))) 12297db96d56Sopenharmony_ci ptr += enc->minBytesPerChar; 12307db96d56Sopenharmony_ci if (ptr != end) { 12317db96d56Sopenharmony_ci *badPtr = ptr; 12327db96d56Sopenharmony_ci return 0; 12337db96d56Sopenharmony_ci } 12347db96d56Sopenharmony_ci return 1; 12357db96d56Sopenharmony_ci} 12367db96d56Sopenharmony_ci 12377db96d56Sopenharmony_cistatic int FASTCALL 12387db96d56Sopenharmony_cicheckCharRefNumber(int result) { 12397db96d56Sopenharmony_ci switch (result >> 8) { 12407db96d56Sopenharmony_ci case 0xD8: 12417db96d56Sopenharmony_ci case 0xD9: 12427db96d56Sopenharmony_ci case 0xDA: 12437db96d56Sopenharmony_ci case 0xDB: 12447db96d56Sopenharmony_ci case 0xDC: 12457db96d56Sopenharmony_ci case 0xDD: 12467db96d56Sopenharmony_ci case 0xDE: 12477db96d56Sopenharmony_ci case 0xDF: 12487db96d56Sopenharmony_ci return -1; 12497db96d56Sopenharmony_ci case 0: 12507db96d56Sopenharmony_ci if (latin1_encoding.type[result] == BT_NONXML) 12517db96d56Sopenharmony_ci return -1; 12527db96d56Sopenharmony_ci break; 12537db96d56Sopenharmony_ci case 0xFF: 12547db96d56Sopenharmony_ci if (result == 0xFFFE || result == 0xFFFF) 12557db96d56Sopenharmony_ci return -1; 12567db96d56Sopenharmony_ci break; 12577db96d56Sopenharmony_ci } 12587db96d56Sopenharmony_ci return result; 12597db96d56Sopenharmony_ci} 12607db96d56Sopenharmony_ci 12617db96d56Sopenharmony_ciint FASTCALL 12627db96d56Sopenharmony_ciXmlUtf8Encode(int c, char *buf) { 12637db96d56Sopenharmony_ci enum { 12647db96d56Sopenharmony_ci /* minN is minimum legal resulting value for N byte sequence */ 12657db96d56Sopenharmony_ci min2 = 0x80, 12667db96d56Sopenharmony_ci min3 = 0x800, 12677db96d56Sopenharmony_ci min4 = 0x10000 12687db96d56Sopenharmony_ci }; 12697db96d56Sopenharmony_ci 12707db96d56Sopenharmony_ci if (c < 0) 12717db96d56Sopenharmony_ci return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */ 12727db96d56Sopenharmony_ci if (c < min2) { 12737db96d56Sopenharmony_ci buf[0] = (char)(c | UTF8_cval1); 12747db96d56Sopenharmony_ci return 1; 12757db96d56Sopenharmony_ci } 12767db96d56Sopenharmony_ci if (c < min3) { 12777db96d56Sopenharmony_ci buf[0] = (char)((c >> 6) | UTF8_cval2); 12787db96d56Sopenharmony_ci buf[1] = (char)((c & 0x3f) | 0x80); 12797db96d56Sopenharmony_ci return 2; 12807db96d56Sopenharmony_ci } 12817db96d56Sopenharmony_ci if (c < min4) { 12827db96d56Sopenharmony_ci buf[0] = (char)((c >> 12) | UTF8_cval3); 12837db96d56Sopenharmony_ci buf[1] = (char)(((c >> 6) & 0x3f) | 0x80); 12847db96d56Sopenharmony_ci buf[2] = (char)((c & 0x3f) | 0x80); 12857db96d56Sopenharmony_ci return 3; 12867db96d56Sopenharmony_ci } 12877db96d56Sopenharmony_ci if (c < 0x110000) { 12887db96d56Sopenharmony_ci buf[0] = (char)((c >> 18) | UTF8_cval4); 12897db96d56Sopenharmony_ci buf[1] = (char)(((c >> 12) & 0x3f) | 0x80); 12907db96d56Sopenharmony_ci buf[2] = (char)(((c >> 6) & 0x3f) | 0x80); 12917db96d56Sopenharmony_ci buf[3] = (char)((c & 0x3f) | 0x80); 12927db96d56Sopenharmony_ci return 4; 12937db96d56Sopenharmony_ci } 12947db96d56Sopenharmony_ci return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */ 12957db96d56Sopenharmony_ci} 12967db96d56Sopenharmony_ci 12977db96d56Sopenharmony_ciint FASTCALL 12987db96d56Sopenharmony_ciXmlUtf16Encode(int charNum, unsigned short *buf) { 12997db96d56Sopenharmony_ci if (charNum < 0) 13007db96d56Sopenharmony_ci return 0; 13017db96d56Sopenharmony_ci if (charNum < 0x10000) { 13027db96d56Sopenharmony_ci buf[0] = (unsigned short)charNum; 13037db96d56Sopenharmony_ci return 1; 13047db96d56Sopenharmony_ci } 13057db96d56Sopenharmony_ci if (charNum < 0x110000) { 13067db96d56Sopenharmony_ci charNum -= 0x10000; 13077db96d56Sopenharmony_ci buf[0] = (unsigned short)((charNum >> 10) + 0xD800); 13087db96d56Sopenharmony_ci buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00); 13097db96d56Sopenharmony_ci return 2; 13107db96d56Sopenharmony_ci } 13117db96d56Sopenharmony_ci return 0; 13127db96d56Sopenharmony_ci} 13137db96d56Sopenharmony_ci 13147db96d56Sopenharmony_cistruct unknown_encoding { 13157db96d56Sopenharmony_ci struct normal_encoding normal; 13167db96d56Sopenharmony_ci CONVERTER convert; 13177db96d56Sopenharmony_ci void *userData; 13187db96d56Sopenharmony_ci unsigned short utf16[256]; 13197db96d56Sopenharmony_ci char utf8[256][4]; 13207db96d56Sopenharmony_ci}; 13217db96d56Sopenharmony_ci 13227db96d56Sopenharmony_ci#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc)) 13237db96d56Sopenharmony_ci 13247db96d56Sopenharmony_ciint 13257db96d56Sopenharmony_ciXmlSizeOfUnknownEncoding(void) { 13267db96d56Sopenharmony_ci return sizeof(struct unknown_encoding); 13277db96d56Sopenharmony_ci} 13287db96d56Sopenharmony_ci 13297db96d56Sopenharmony_cistatic int PTRFASTCALL 13307db96d56Sopenharmony_ciunknown_isName(const ENCODING *enc, const char *p) { 13317db96d56Sopenharmony_ci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 13327db96d56Sopenharmony_ci int c = uenc->convert(uenc->userData, p); 13337db96d56Sopenharmony_ci if (c & ~0xFFFF) 13347db96d56Sopenharmony_ci return 0; 13357db96d56Sopenharmony_ci return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF); 13367db96d56Sopenharmony_ci} 13377db96d56Sopenharmony_ci 13387db96d56Sopenharmony_cistatic int PTRFASTCALL 13397db96d56Sopenharmony_ciunknown_isNmstrt(const ENCODING *enc, const char *p) { 13407db96d56Sopenharmony_ci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 13417db96d56Sopenharmony_ci int c = uenc->convert(uenc->userData, p); 13427db96d56Sopenharmony_ci if (c & ~0xFFFF) 13437db96d56Sopenharmony_ci return 0; 13447db96d56Sopenharmony_ci return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF); 13457db96d56Sopenharmony_ci} 13467db96d56Sopenharmony_ci 13477db96d56Sopenharmony_cistatic int PTRFASTCALL 13487db96d56Sopenharmony_ciunknown_isInvalid(const ENCODING *enc, const char *p) { 13497db96d56Sopenharmony_ci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 13507db96d56Sopenharmony_ci int c = uenc->convert(uenc->userData, p); 13517db96d56Sopenharmony_ci return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; 13527db96d56Sopenharmony_ci} 13537db96d56Sopenharmony_ci 13547db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL 13557db96d56Sopenharmony_ciunknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim, 13567db96d56Sopenharmony_ci char **toP, const char *toLim) { 13577db96d56Sopenharmony_ci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 13587db96d56Sopenharmony_ci char buf[XML_UTF8_ENCODE_MAX]; 13597db96d56Sopenharmony_ci for (;;) { 13607db96d56Sopenharmony_ci const char *utf8; 13617db96d56Sopenharmony_ci int n; 13627db96d56Sopenharmony_ci if (*fromP == fromLim) 13637db96d56Sopenharmony_ci return XML_CONVERT_COMPLETED; 13647db96d56Sopenharmony_ci utf8 = uenc->utf8[(unsigned char)**fromP]; 13657db96d56Sopenharmony_ci n = *utf8++; 13667db96d56Sopenharmony_ci if (n == 0) { 13677db96d56Sopenharmony_ci int c = uenc->convert(uenc->userData, *fromP); 13687db96d56Sopenharmony_ci n = XmlUtf8Encode(c, buf); 13697db96d56Sopenharmony_ci if (n > toLim - *toP) 13707db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; 13717db96d56Sopenharmony_ci utf8 = buf; 13727db96d56Sopenharmony_ci *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 13737db96d56Sopenharmony_ci - (BT_LEAD2 - 2)); 13747db96d56Sopenharmony_ci } else { 13757db96d56Sopenharmony_ci if (n > toLim - *toP) 13767db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; 13777db96d56Sopenharmony_ci (*fromP)++; 13787db96d56Sopenharmony_ci } 13797db96d56Sopenharmony_ci memcpy(*toP, utf8, n); 13807db96d56Sopenharmony_ci *toP += n; 13817db96d56Sopenharmony_ci } 13827db96d56Sopenharmony_ci} 13837db96d56Sopenharmony_ci 13847db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL 13857db96d56Sopenharmony_ciunknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim, 13867db96d56Sopenharmony_ci unsigned short **toP, const unsigned short *toLim) { 13877db96d56Sopenharmony_ci const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); 13887db96d56Sopenharmony_ci while (*fromP < fromLim && *toP < toLim) { 13897db96d56Sopenharmony_ci unsigned short c = uenc->utf16[(unsigned char)**fromP]; 13907db96d56Sopenharmony_ci if (c == 0) { 13917db96d56Sopenharmony_ci c = (unsigned short)uenc->convert(uenc->userData, *fromP); 13927db96d56Sopenharmony_ci *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] 13937db96d56Sopenharmony_ci - (BT_LEAD2 - 2)); 13947db96d56Sopenharmony_ci } else 13957db96d56Sopenharmony_ci (*fromP)++; 13967db96d56Sopenharmony_ci *(*toP)++ = c; 13977db96d56Sopenharmony_ci } 13987db96d56Sopenharmony_ci 13997db96d56Sopenharmony_ci if ((*toP == toLim) && (*fromP < fromLim)) 14007db96d56Sopenharmony_ci return XML_CONVERT_OUTPUT_EXHAUSTED; 14017db96d56Sopenharmony_ci else 14027db96d56Sopenharmony_ci return XML_CONVERT_COMPLETED; 14037db96d56Sopenharmony_ci} 14047db96d56Sopenharmony_ci 14057db96d56Sopenharmony_ciENCODING * 14067db96d56Sopenharmony_ciXmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert, 14077db96d56Sopenharmony_ci void *userData) { 14087db96d56Sopenharmony_ci int i; 14097db96d56Sopenharmony_ci struct unknown_encoding *e = (struct unknown_encoding *)mem; 14107db96d56Sopenharmony_ci memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding)); 14117db96d56Sopenharmony_ci for (i = 0; i < 128; i++) 14127db96d56Sopenharmony_ci if (latin1_encoding.type[i] != BT_OTHER 14137db96d56Sopenharmony_ci && latin1_encoding.type[i] != BT_NONXML && table[i] != i) 14147db96d56Sopenharmony_ci return 0; 14157db96d56Sopenharmony_ci for (i = 0; i < 256; i++) { 14167db96d56Sopenharmony_ci int c = table[i]; 14177db96d56Sopenharmony_ci if (c == -1) { 14187db96d56Sopenharmony_ci e->normal.type[i] = BT_MALFORM; 14197db96d56Sopenharmony_ci /* This shouldn't really get used. */ 14207db96d56Sopenharmony_ci e->utf16[i] = 0xFFFF; 14217db96d56Sopenharmony_ci e->utf8[i][0] = 1; 14227db96d56Sopenharmony_ci e->utf8[i][1] = 0; 14237db96d56Sopenharmony_ci } else if (c < 0) { 14247db96d56Sopenharmony_ci if (c < -4) 14257db96d56Sopenharmony_ci return 0; 14267db96d56Sopenharmony_ci /* Multi-byte sequences need a converter function */ 14277db96d56Sopenharmony_ci if (! convert) 14287db96d56Sopenharmony_ci return 0; 14297db96d56Sopenharmony_ci e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2)); 14307db96d56Sopenharmony_ci e->utf8[i][0] = 0; 14317db96d56Sopenharmony_ci e->utf16[i] = 0; 14327db96d56Sopenharmony_ci } else if (c < 0x80) { 14337db96d56Sopenharmony_ci if (latin1_encoding.type[c] != BT_OTHER 14347db96d56Sopenharmony_ci && latin1_encoding.type[c] != BT_NONXML && c != i) 14357db96d56Sopenharmony_ci return 0; 14367db96d56Sopenharmony_ci e->normal.type[i] = latin1_encoding.type[c]; 14377db96d56Sopenharmony_ci e->utf8[i][0] = 1; 14387db96d56Sopenharmony_ci e->utf8[i][1] = (char)c; 14397db96d56Sopenharmony_ci e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c); 14407db96d56Sopenharmony_ci } else if (checkCharRefNumber(c) < 0) { 14417db96d56Sopenharmony_ci e->normal.type[i] = BT_NONXML; 14427db96d56Sopenharmony_ci /* This shouldn't really get used. */ 14437db96d56Sopenharmony_ci e->utf16[i] = 0xFFFF; 14447db96d56Sopenharmony_ci e->utf8[i][0] = 1; 14457db96d56Sopenharmony_ci e->utf8[i][1] = 0; 14467db96d56Sopenharmony_ci } else { 14477db96d56Sopenharmony_ci if (c > 0xFFFF) 14487db96d56Sopenharmony_ci return 0; 14497db96d56Sopenharmony_ci if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff)) 14507db96d56Sopenharmony_ci e->normal.type[i] = BT_NMSTRT; 14517db96d56Sopenharmony_ci else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff)) 14527db96d56Sopenharmony_ci e->normal.type[i] = BT_NAME; 14537db96d56Sopenharmony_ci else 14547db96d56Sopenharmony_ci e->normal.type[i] = BT_OTHER; 14557db96d56Sopenharmony_ci e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1); 14567db96d56Sopenharmony_ci e->utf16[i] = (unsigned short)c; 14577db96d56Sopenharmony_ci } 14587db96d56Sopenharmony_ci } 14597db96d56Sopenharmony_ci e->userData = userData; 14607db96d56Sopenharmony_ci e->convert = convert; 14617db96d56Sopenharmony_ci if (convert) { 14627db96d56Sopenharmony_ci e->normal.isName2 = unknown_isName; 14637db96d56Sopenharmony_ci e->normal.isName3 = unknown_isName; 14647db96d56Sopenharmony_ci e->normal.isName4 = unknown_isName; 14657db96d56Sopenharmony_ci e->normal.isNmstrt2 = unknown_isNmstrt; 14667db96d56Sopenharmony_ci e->normal.isNmstrt3 = unknown_isNmstrt; 14677db96d56Sopenharmony_ci e->normal.isNmstrt4 = unknown_isNmstrt; 14687db96d56Sopenharmony_ci e->normal.isInvalid2 = unknown_isInvalid; 14697db96d56Sopenharmony_ci e->normal.isInvalid3 = unknown_isInvalid; 14707db96d56Sopenharmony_ci e->normal.isInvalid4 = unknown_isInvalid; 14717db96d56Sopenharmony_ci } 14727db96d56Sopenharmony_ci e->normal.enc.utf8Convert = unknown_toUtf8; 14737db96d56Sopenharmony_ci e->normal.enc.utf16Convert = unknown_toUtf16; 14747db96d56Sopenharmony_ci return &(e->normal.enc); 14757db96d56Sopenharmony_ci} 14767db96d56Sopenharmony_ci 14777db96d56Sopenharmony_ci/* If this enumeration is changed, getEncodingIndex and encodings 14787db96d56Sopenharmony_cimust also be changed. */ 14797db96d56Sopenharmony_cienum { 14807db96d56Sopenharmony_ci UNKNOWN_ENC = -1, 14817db96d56Sopenharmony_ci ISO_8859_1_ENC = 0, 14827db96d56Sopenharmony_ci US_ASCII_ENC, 14837db96d56Sopenharmony_ci UTF_8_ENC, 14847db96d56Sopenharmony_ci UTF_16_ENC, 14857db96d56Sopenharmony_ci UTF_16BE_ENC, 14867db96d56Sopenharmony_ci UTF_16LE_ENC, 14877db96d56Sopenharmony_ci /* must match encodingNames up to here */ 14887db96d56Sopenharmony_ci NO_ENC 14897db96d56Sopenharmony_ci}; 14907db96d56Sopenharmony_ci 14917db96d56Sopenharmony_cistatic const char KW_ISO_8859_1[] 14927db96d56Sopenharmony_ci = {ASCII_I, ASCII_S, ASCII_O, ASCII_MINUS, ASCII_8, ASCII_8, 14937db96d56Sopenharmony_ci ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1, '\0'}; 14947db96d56Sopenharmony_cistatic const char KW_US_ASCII[] 14957db96d56Sopenharmony_ci = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S, 14967db96d56Sopenharmony_ci ASCII_C, ASCII_I, ASCII_I, '\0'}; 14977db96d56Sopenharmony_cistatic const char KW_UTF_8[] 14987db96d56Sopenharmony_ci = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'}; 14997db96d56Sopenharmony_cistatic const char KW_UTF_16[] 15007db96d56Sopenharmony_ci = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'}; 15017db96d56Sopenharmony_cistatic const char KW_UTF_16BE[] 15027db96d56Sopenharmony_ci = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, 15037db96d56Sopenharmony_ci ASCII_6, ASCII_B, ASCII_E, '\0'}; 15047db96d56Sopenharmony_cistatic const char KW_UTF_16LE[] 15057db96d56Sopenharmony_ci = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, 15067db96d56Sopenharmony_ci ASCII_6, ASCII_L, ASCII_E, '\0'}; 15077db96d56Sopenharmony_ci 15087db96d56Sopenharmony_cistatic int FASTCALL 15097db96d56Sopenharmony_cigetEncodingIndex(const char *name) { 15107db96d56Sopenharmony_ci static const char *const encodingNames[] = { 15117db96d56Sopenharmony_ci KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE, 15127db96d56Sopenharmony_ci }; 15137db96d56Sopenharmony_ci int i; 15147db96d56Sopenharmony_ci if (name == NULL) 15157db96d56Sopenharmony_ci return NO_ENC; 15167db96d56Sopenharmony_ci for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++) 15177db96d56Sopenharmony_ci if (streqci(name, encodingNames[i])) 15187db96d56Sopenharmony_ci return i; 15197db96d56Sopenharmony_ci return UNKNOWN_ENC; 15207db96d56Sopenharmony_ci} 15217db96d56Sopenharmony_ci 15227db96d56Sopenharmony_ci/* For binary compatibility, we store the index of the encoding 15237db96d56Sopenharmony_ci specified at initialization in the isUtf16 member. 15247db96d56Sopenharmony_ci*/ 15257db96d56Sopenharmony_ci 15267db96d56Sopenharmony_ci#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16) 15277db96d56Sopenharmony_ci#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i) 15287db96d56Sopenharmony_ci 15297db96d56Sopenharmony_ci/* This is what detects the encoding. encodingTable maps from 15307db96d56Sopenharmony_ci encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of 15317db96d56Sopenharmony_ci the external (protocol) specified encoding; state is 15327db96d56Sopenharmony_ci XML_CONTENT_STATE if we're parsing an external text entity, and 15337db96d56Sopenharmony_ci XML_PROLOG_STATE otherwise. 15347db96d56Sopenharmony_ci*/ 15357db96d56Sopenharmony_ci 15367db96d56Sopenharmony_cistatic int 15377db96d56Sopenharmony_ciinitScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc, 15387db96d56Sopenharmony_ci int state, const char *ptr, const char *end, const char **nextTokPtr) { 15397db96d56Sopenharmony_ci const ENCODING **encPtr; 15407db96d56Sopenharmony_ci 15417db96d56Sopenharmony_ci if (ptr >= end) 15427db96d56Sopenharmony_ci return XML_TOK_NONE; 15437db96d56Sopenharmony_ci encPtr = enc->encPtr; 15447db96d56Sopenharmony_ci if (ptr + 1 == end) { 15457db96d56Sopenharmony_ci /* only a single byte available for auto-detection */ 15467db96d56Sopenharmony_ci#ifndef XML_DTD /* FIXME */ 15477db96d56Sopenharmony_ci /* a well-formed document entity must have more than one byte */ 15487db96d56Sopenharmony_ci if (state != XML_CONTENT_STATE) 15497db96d56Sopenharmony_ci return XML_TOK_PARTIAL; 15507db96d56Sopenharmony_ci#endif 15517db96d56Sopenharmony_ci /* so we're parsing an external text entity... */ 15527db96d56Sopenharmony_ci /* if UTF-16 was externally specified, then we need at least 2 bytes */ 15537db96d56Sopenharmony_ci switch (INIT_ENC_INDEX(enc)) { 15547db96d56Sopenharmony_ci case UTF_16_ENC: 15557db96d56Sopenharmony_ci case UTF_16LE_ENC: 15567db96d56Sopenharmony_ci case UTF_16BE_ENC: 15577db96d56Sopenharmony_ci return XML_TOK_PARTIAL; 15587db96d56Sopenharmony_ci } 15597db96d56Sopenharmony_ci switch ((unsigned char)*ptr) { 15607db96d56Sopenharmony_ci case 0xFE: 15617db96d56Sopenharmony_ci case 0xFF: 15627db96d56Sopenharmony_ci case 0xEF: /* possibly first byte of UTF-8 BOM */ 15637db96d56Sopenharmony_ci if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) 15647db96d56Sopenharmony_ci break; 15657db96d56Sopenharmony_ci /* fall through */ 15667db96d56Sopenharmony_ci case 0x00: 15677db96d56Sopenharmony_ci case 0x3C: 15687db96d56Sopenharmony_ci return XML_TOK_PARTIAL; 15697db96d56Sopenharmony_ci } 15707db96d56Sopenharmony_ci } else { 15717db96d56Sopenharmony_ci switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) { 15727db96d56Sopenharmony_ci case 0xFEFF: 15737db96d56Sopenharmony_ci if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) 15747db96d56Sopenharmony_ci break; 15757db96d56Sopenharmony_ci *nextTokPtr = ptr + 2; 15767db96d56Sopenharmony_ci *encPtr = encodingTable[UTF_16BE_ENC]; 15777db96d56Sopenharmony_ci return XML_TOK_BOM; 15787db96d56Sopenharmony_ci /* 00 3C is handled in the default case */ 15797db96d56Sopenharmony_ci case 0x3C00: 15807db96d56Sopenharmony_ci if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC 15817db96d56Sopenharmony_ci || INIT_ENC_INDEX(enc) == UTF_16_ENC) 15827db96d56Sopenharmony_ci && state == XML_CONTENT_STATE) 15837db96d56Sopenharmony_ci break; 15847db96d56Sopenharmony_ci *encPtr = encodingTable[UTF_16LE_ENC]; 15857db96d56Sopenharmony_ci return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 15867db96d56Sopenharmony_ci case 0xFFFE: 15877db96d56Sopenharmony_ci if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE) 15887db96d56Sopenharmony_ci break; 15897db96d56Sopenharmony_ci *nextTokPtr = ptr + 2; 15907db96d56Sopenharmony_ci *encPtr = encodingTable[UTF_16LE_ENC]; 15917db96d56Sopenharmony_ci return XML_TOK_BOM; 15927db96d56Sopenharmony_ci case 0xEFBB: 15937db96d56Sopenharmony_ci /* Maybe a UTF-8 BOM (EF BB BF) */ 15947db96d56Sopenharmony_ci /* If there's an explicitly specified (external) encoding 15957db96d56Sopenharmony_ci of ISO-8859-1 or some flavour of UTF-16 15967db96d56Sopenharmony_ci and this is an external text entity, 15977db96d56Sopenharmony_ci don't look for the BOM, 15987db96d56Sopenharmony_ci because it might be a legal data. 15997db96d56Sopenharmony_ci */ 16007db96d56Sopenharmony_ci if (state == XML_CONTENT_STATE) { 16017db96d56Sopenharmony_ci int e = INIT_ENC_INDEX(enc); 16027db96d56Sopenharmony_ci if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC 16037db96d56Sopenharmony_ci || e == UTF_16_ENC) 16047db96d56Sopenharmony_ci break; 16057db96d56Sopenharmony_ci } 16067db96d56Sopenharmony_ci if (ptr + 2 == end) 16077db96d56Sopenharmony_ci return XML_TOK_PARTIAL; 16087db96d56Sopenharmony_ci if ((unsigned char)ptr[2] == 0xBF) { 16097db96d56Sopenharmony_ci *nextTokPtr = ptr + 3; 16107db96d56Sopenharmony_ci *encPtr = encodingTable[UTF_8_ENC]; 16117db96d56Sopenharmony_ci return XML_TOK_BOM; 16127db96d56Sopenharmony_ci } 16137db96d56Sopenharmony_ci break; 16147db96d56Sopenharmony_ci default: 16157db96d56Sopenharmony_ci if (ptr[0] == '\0') { 16167db96d56Sopenharmony_ci /* 0 isn't a legal data character. Furthermore a document 16177db96d56Sopenharmony_ci entity can only start with ASCII characters. So the only 16187db96d56Sopenharmony_ci way this can fail to be big-endian UTF-16 if it it's an 16197db96d56Sopenharmony_ci external parsed general entity that's labelled as 16207db96d56Sopenharmony_ci UTF-16LE. 16217db96d56Sopenharmony_ci */ 16227db96d56Sopenharmony_ci if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC) 16237db96d56Sopenharmony_ci break; 16247db96d56Sopenharmony_ci *encPtr = encodingTable[UTF_16BE_ENC]; 16257db96d56Sopenharmony_ci return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 16267db96d56Sopenharmony_ci } else if (ptr[1] == '\0') { 16277db96d56Sopenharmony_ci /* We could recover here in the case: 16287db96d56Sopenharmony_ci - parsing an external entity 16297db96d56Sopenharmony_ci - second byte is 0 16307db96d56Sopenharmony_ci - no externally specified encoding 16317db96d56Sopenharmony_ci - no encoding declaration 16327db96d56Sopenharmony_ci by assuming UTF-16LE. But we don't, because this would mean when 16337db96d56Sopenharmony_ci presented just with a single byte, we couldn't reliably determine 16347db96d56Sopenharmony_ci whether we needed further bytes. 16357db96d56Sopenharmony_ci */ 16367db96d56Sopenharmony_ci if (state == XML_CONTENT_STATE) 16377db96d56Sopenharmony_ci break; 16387db96d56Sopenharmony_ci *encPtr = encodingTable[UTF_16LE_ENC]; 16397db96d56Sopenharmony_ci return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 16407db96d56Sopenharmony_ci } 16417db96d56Sopenharmony_ci break; 16427db96d56Sopenharmony_ci } 16437db96d56Sopenharmony_ci } 16447db96d56Sopenharmony_ci *encPtr = encodingTable[INIT_ENC_INDEX(enc)]; 16457db96d56Sopenharmony_ci return XmlTok(*encPtr, state, ptr, end, nextTokPtr); 16467db96d56Sopenharmony_ci} 16477db96d56Sopenharmony_ci 16487db96d56Sopenharmony_ci#define NS(x) x 16497db96d56Sopenharmony_ci#define ns(x) x 16507db96d56Sopenharmony_ci#define XML_TOK_NS_C 16517db96d56Sopenharmony_ci#include "xmltok_ns.c" 16527db96d56Sopenharmony_ci#undef XML_TOK_NS_C 16537db96d56Sopenharmony_ci#undef NS 16547db96d56Sopenharmony_ci#undef ns 16557db96d56Sopenharmony_ci 16567db96d56Sopenharmony_ci#ifdef XML_NS 16577db96d56Sopenharmony_ci 16587db96d56Sopenharmony_ci# define NS(x) x##NS 16597db96d56Sopenharmony_ci# define ns(x) x##_ns 16607db96d56Sopenharmony_ci 16617db96d56Sopenharmony_ci# define XML_TOK_NS_C 16627db96d56Sopenharmony_ci# include "xmltok_ns.c" 16637db96d56Sopenharmony_ci# undef XML_TOK_NS_C 16647db96d56Sopenharmony_ci 16657db96d56Sopenharmony_ci# undef NS 16667db96d56Sopenharmony_ci# undef ns 16677db96d56Sopenharmony_ci 16687db96d56Sopenharmony_ciENCODING * 16697db96d56Sopenharmony_ciXmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert, 16707db96d56Sopenharmony_ci void *userData) { 16717db96d56Sopenharmony_ci ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData); 16727db96d56Sopenharmony_ci if (enc) 16737db96d56Sopenharmony_ci ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON; 16747db96d56Sopenharmony_ci return enc; 16757db96d56Sopenharmony_ci} 16767db96d56Sopenharmony_ci 16777db96d56Sopenharmony_ci#endif /* XML_NS */ 1678