17db96d56Sopenharmony_ci/*
27db96d56Sopenharmony_ci                            __  __            _
37db96d56Sopenharmony_ci                         ___\ \/ /_ __   __ _| |_
47db96d56Sopenharmony_ci                        / _ \\  /| '_ \ / _` | __|
57db96d56Sopenharmony_ci                       |  __//  \| |_) | (_| | |_
67db96d56Sopenharmony_ci                        \___/_/\_\ .__/ \__,_|\__|
77db96d56Sopenharmony_ci                                 |_| XML parser
87db96d56Sopenharmony_ci
97db96d56Sopenharmony_ci   Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
107db96d56Sopenharmony_ci   Copyright (c) 2000      Clark Cooper <coopercc@users.sourceforge.net>
117db96d56Sopenharmony_ci   Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
127db96d56Sopenharmony_ci   Copyright (c) 2002      Greg Stein <gstein@users.sourceforge.net>
137db96d56Sopenharmony_ci   Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net>
147db96d56Sopenharmony_ci   Copyright (c) 2005-2009 Steven Solie <steven@solie.ca>
157db96d56Sopenharmony_ci   Copyright (c) 2016-2022 Sebastian Pipping <sebastian@pipping.org>
167db96d56Sopenharmony_ci   Copyright (c) 2016      Pascal Cuoq <cuoq@trust-in-soft.com>
177db96d56Sopenharmony_ci   Copyright (c) 2016      Don Lewis <truckman@apache.org>
187db96d56Sopenharmony_ci   Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
197db96d56Sopenharmony_ci   Copyright (c) 2017      Alexander Bluhm <alexander.bluhm@gmx.net>
207db96d56Sopenharmony_ci   Copyright (c) 2017      Benbuck Nason <bnason@netflix.com>
217db96d56Sopenharmony_ci   Copyright (c) 2017      José Gutiérrez de la Concha <jose@zeroc.com>
227db96d56Sopenharmony_ci   Copyright (c) 2019      David Loffredo <loffredo@steptools.com>
237db96d56Sopenharmony_ci   Copyright (c) 2021      Dong-hee Na <donghee.na@python.org>
247db96d56Sopenharmony_ci   Copyright (c) 2022      Martin Ettl <ettl.martin78@googlemail.com>
257db96d56Sopenharmony_ci   Licensed under the MIT license:
267db96d56Sopenharmony_ci
277db96d56Sopenharmony_ci   Permission is  hereby granted,  free of charge,  to any  person obtaining
287db96d56Sopenharmony_ci   a  copy  of  this  software   and  associated  documentation  files  (the
297db96d56Sopenharmony_ci   "Software"),  to  deal in  the  Software  without restriction,  including
307db96d56Sopenharmony_ci   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
317db96d56Sopenharmony_ci   distribute, sublicense, and/or sell copies of the Software, and to permit
327db96d56Sopenharmony_ci   persons  to whom  the Software  is  furnished to  do so,  subject to  the
337db96d56Sopenharmony_ci   following conditions:
347db96d56Sopenharmony_ci
357db96d56Sopenharmony_ci   The above copyright  notice and this permission notice  shall be included
367db96d56Sopenharmony_ci   in all copies or substantial portions of the Software.
377db96d56Sopenharmony_ci
387db96d56Sopenharmony_ci   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
397db96d56Sopenharmony_ci   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
407db96d56Sopenharmony_ci   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
417db96d56Sopenharmony_ci   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
427db96d56Sopenharmony_ci   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
437db96d56Sopenharmony_ci   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
447db96d56Sopenharmony_ci   USE OR OTHER DEALINGS IN THE SOFTWARE.
457db96d56Sopenharmony_ci*/
467db96d56Sopenharmony_ci
477db96d56Sopenharmony_ci#include <expat_config.h>
487db96d56Sopenharmony_ci
497db96d56Sopenharmony_ci#include <stddef.h>
507db96d56Sopenharmony_ci#include <string.h> /* memcpy */
517db96d56Sopenharmony_ci#include <stdbool.h>
527db96d56Sopenharmony_ci
537db96d56Sopenharmony_ci#ifdef _WIN32
547db96d56Sopenharmony_ci#  include "winconfig.h"
557db96d56Sopenharmony_ci#endif
567db96d56Sopenharmony_ci
577db96d56Sopenharmony_ci#include "expat_external.h"
587db96d56Sopenharmony_ci#include "internal.h"
597db96d56Sopenharmony_ci#include "xmltok.h"
607db96d56Sopenharmony_ci#include "nametab.h"
617db96d56Sopenharmony_ci
627db96d56Sopenharmony_ci#ifdef XML_DTD
637db96d56Sopenharmony_ci#  define IGNORE_SECTION_TOK_VTABLE , PREFIX(ignoreSectionTok)
647db96d56Sopenharmony_ci#else
657db96d56Sopenharmony_ci#  define IGNORE_SECTION_TOK_VTABLE /* as nothing */
667db96d56Sopenharmony_ci#endif
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_ci#define VTABLE1                                                                \
697db96d56Sopenharmony_ci  {PREFIX(prologTok), PREFIX(contentTok),                                      \
707db96d56Sopenharmony_ci   PREFIX(cdataSectionTok) IGNORE_SECTION_TOK_VTABLE},                         \
717db96d56Sopenharmony_ci      {PREFIX(attributeValueTok), PREFIX(entityValueTok)},                     \
727db96d56Sopenharmony_ci      PREFIX(nameMatchesAscii), PREFIX(nameLength), PREFIX(skipS),             \
737db96d56Sopenharmony_ci      PREFIX(getAtts), PREFIX(charRefNumber), PREFIX(predefinedEntityName),    \
747db96d56Sopenharmony_ci      PREFIX(updatePosition), PREFIX(isPublicId)
757db96d56Sopenharmony_ci
767db96d56Sopenharmony_ci#define VTABLE VTABLE1, PREFIX(toUtf8), PREFIX(toUtf16)
777db96d56Sopenharmony_ci
787db96d56Sopenharmony_ci#define UCS2_GET_NAMING(pages, hi, lo)                                         \
797db96d56Sopenharmony_ci  (namingBitmap[(pages[hi] << 3) + ((lo) >> 5)] & (1u << ((lo)&0x1F)))
807db96d56Sopenharmony_ci
817db96d56Sopenharmony_ci/* A 2 byte UTF-8 representation splits the characters 11 bits between
827db96d56Sopenharmony_ci   the bottom 5 and 6 bits of the bytes.  We need 8 bits to index into
837db96d56Sopenharmony_ci   pages, 3 bits to add to that index and 5 bits to generate the mask.
847db96d56Sopenharmony_ci*/
857db96d56Sopenharmony_ci#define UTF8_GET_NAMING2(pages, byte)                                          \
867db96d56Sopenharmony_ci  (namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3)                         \
877db96d56Sopenharmony_ci                + ((((byte)[0]) & 3) << 1) + ((((byte)[1]) >> 5) & 1)]         \
887db96d56Sopenharmony_ci   & (1u << (((byte)[1]) & 0x1F)))
897db96d56Sopenharmony_ci
907db96d56Sopenharmony_ci/* A 3 byte UTF-8 representation splits the characters 16 bits between
917db96d56Sopenharmony_ci   the bottom 4, 6 and 6 bits of the bytes.  We need 8 bits to index
927db96d56Sopenharmony_ci   into pages, 3 bits to add to that index and 5 bits to generate the
937db96d56Sopenharmony_ci   mask.
947db96d56Sopenharmony_ci*/
957db96d56Sopenharmony_ci#define UTF8_GET_NAMING3(pages, byte)                                          \
967db96d56Sopenharmony_ci  (namingBitmap                                                                \
977db96d56Sopenharmony_ci       [((pages)[((((byte)[0]) & 0xF) << 4) + ((((byte)[1]) >> 2) & 0xF)]      \
987db96d56Sopenharmony_ci         << 3)                                                                 \
997db96d56Sopenharmony_ci        + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
1007db96d56Sopenharmony_ci   & (1u << (((byte)[2]) & 0x1F)))
1017db96d56Sopenharmony_ci
1027db96d56Sopenharmony_ci/* Detection of invalid UTF-8 sequences is based on Table 3.1B
1037db96d56Sopenharmony_ci   of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
1047db96d56Sopenharmony_ci   with the additional restriction of not allowing the Unicode
1057db96d56Sopenharmony_ci   code points 0xFFFF and 0xFFFE (sequences EF,BF,BF and EF,BF,BE).
1067db96d56Sopenharmony_ci   Implementation details:
1077db96d56Sopenharmony_ci     (A & 0x80) == 0     means A < 0x80
1087db96d56Sopenharmony_ci   and
1097db96d56Sopenharmony_ci     (A & 0xC0) == 0xC0  means A > 0xBF
1107db96d56Sopenharmony_ci*/
1117db96d56Sopenharmony_ci
1127db96d56Sopenharmony_ci#define UTF8_INVALID2(p)                                                       \
1137db96d56Sopenharmony_ci  ((*p) < 0xC2 || ((p)[1] & 0x80) == 0 || ((p)[1] & 0xC0) == 0xC0)
1147db96d56Sopenharmony_ci
1157db96d56Sopenharmony_ci#define UTF8_INVALID3(p)                                                       \
1167db96d56Sopenharmony_ci  (((p)[2] & 0x80) == 0                                                        \
1177db96d56Sopenharmony_ci   || ((*p) == 0xEF && (p)[1] == 0xBF ? (p)[2] > 0xBD                          \
1187db96d56Sopenharmony_ci                                      : ((p)[2] & 0xC0) == 0xC0)               \
1197db96d56Sopenharmony_ci   || ((*p) == 0xE0                                                            \
1207db96d56Sopenharmony_ci           ? (p)[1] < 0xA0 || ((p)[1] & 0xC0) == 0xC0                          \
1217db96d56Sopenharmony_ci           : ((p)[1] & 0x80) == 0                                              \
1227db96d56Sopenharmony_ci                 || ((*p) == 0xED ? (p)[1] > 0x9F : ((p)[1] & 0xC0) == 0xC0)))
1237db96d56Sopenharmony_ci
1247db96d56Sopenharmony_ci#define UTF8_INVALID4(p)                                                       \
1257db96d56Sopenharmony_ci  (((p)[3] & 0x80) == 0 || ((p)[3] & 0xC0) == 0xC0 || ((p)[2] & 0x80) == 0     \
1267db96d56Sopenharmony_ci   || ((p)[2] & 0xC0) == 0xC0                                                  \
1277db96d56Sopenharmony_ci   || ((*p) == 0xF0                                                            \
1287db96d56Sopenharmony_ci           ? (p)[1] < 0x90 || ((p)[1] & 0xC0) == 0xC0                          \
1297db96d56Sopenharmony_ci           : ((p)[1] & 0x80) == 0                                              \
1307db96d56Sopenharmony_ci                 || ((*p) == 0xF4 ? (p)[1] > 0x8F : ((p)[1] & 0xC0) == 0xC0)))
1317db96d56Sopenharmony_ci
1327db96d56Sopenharmony_cistatic int PTRFASTCALL
1337db96d56Sopenharmony_ciisNever(const ENCODING *enc, const char *p) {
1347db96d56Sopenharmony_ci  UNUSED_P(enc);
1357db96d56Sopenharmony_ci  UNUSED_P(p);
1367db96d56Sopenharmony_ci  return 0;
1377db96d56Sopenharmony_ci}
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_cistatic int PTRFASTCALL
1407db96d56Sopenharmony_ciutf8_isName2(const ENCODING *enc, const char *p) {
1417db96d56Sopenharmony_ci  UNUSED_P(enc);
1427db96d56Sopenharmony_ci  return UTF8_GET_NAMING2(namePages, (const unsigned char *)p);
1437db96d56Sopenharmony_ci}
1447db96d56Sopenharmony_ci
1457db96d56Sopenharmony_cistatic int PTRFASTCALL
1467db96d56Sopenharmony_ciutf8_isName3(const ENCODING *enc, const char *p) {
1477db96d56Sopenharmony_ci  UNUSED_P(enc);
1487db96d56Sopenharmony_ci  return UTF8_GET_NAMING3(namePages, (const unsigned char *)p);
1497db96d56Sopenharmony_ci}
1507db96d56Sopenharmony_ci
1517db96d56Sopenharmony_ci#define utf8_isName4 isNever
1527db96d56Sopenharmony_ci
1537db96d56Sopenharmony_cistatic int PTRFASTCALL
1547db96d56Sopenharmony_ciutf8_isNmstrt2(const ENCODING *enc, const char *p) {
1557db96d56Sopenharmony_ci  UNUSED_P(enc);
1567db96d56Sopenharmony_ci  return UTF8_GET_NAMING2(nmstrtPages, (const unsigned char *)p);
1577db96d56Sopenharmony_ci}
1587db96d56Sopenharmony_ci
1597db96d56Sopenharmony_cistatic int PTRFASTCALL
1607db96d56Sopenharmony_ciutf8_isNmstrt3(const ENCODING *enc, const char *p) {
1617db96d56Sopenharmony_ci  UNUSED_P(enc);
1627db96d56Sopenharmony_ci  return UTF8_GET_NAMING3(nmstrtPages, (const unsigned char *)p);
1637db96d56Sopenharmony_ci}
1647db96d56Sopenharmony_ci
1657db96d56Sopenharmony_ci#define utf8_isNmstrt4 isNever
1667db96d56Sopenharmony_ci
1677db96d56Sopenharmony_cistatic int PTRFASTCALL
1687db96d56Sopenharmony_ciutf8_isInvalid2(const ENCODING *enc, const char *p) {
1697db96d56Sopenharmony_ci  UNUSED_P(enc);
1707db96d56Sopenharmony_ci  return UTF8_INVALID2((const unsigned char *)p);
1717db96d56Sopenharmony_ci}
1727db96d56Sopenharmony_ci
1737db96d56Sopenharmony_cistatic int PTRFASTCALL
1747db96d56Sopenharmony_ciutf8_isInvalid3(const ENCODING *enc, const char *p) {
1757db96d56Sopenharmony_ci  UNUSED_P(enc);
1767db96d56Sopenharmony_ci  return UTF8_INVALID3((const unsigned char *)p);
1777db96d56Sopenharmony_ci}
1787db96d56Sopenharmony_ci
1797db96d56Sopenharmony_cistatic int PTRFASTCALL
1807db96d56Sopenharmony_ciutf8_isInvalid4(const ENCODING *enc, const char *p) {
1817db96d56Sopenharmony_ci  UNUSED_P(enc);
1827db96d56Sopenharmony_ci  return UTF8_INVALID4((const unsigned char *)p);
1837db96d56Sopenharmony_ci}
1847db96d56Sopenharmony_ci
1857db96d56Sopenharmony_cistruct normal_encoding {
1867db96d56Sopenharmony_ci  ENCODING enc;
1877db96d56Sopenharmony_ci  unsigned char type[256];
1887db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
1897db96d56Sopenharmony_ci  int(PTRFASTCALL *byteType)(const ENCODING *, const char *);
1907db96d56Sopenharmony_ci  int(PTRFASTCALL *isNameMin)(const ENCODING *, const char *);
1917db96d56Sopenharmony_ci  int(PTRFASTCALL *isNmstrtMin)(const ENCODING *, const char *);
1927db96d56Sopenharmony_ci  int(PTRFASTCALL *byteToAscii)(const ENCODING *, const char *);
1937db96d56Sopenharmony_ci  int(PTRCALL *charMatches)(const ENCODING *, const char *, int);
1947db96d56Sopenharmony_ci#endif /* XML_MIN_SIZE */
1957db96d56Sopenharmony_ci  int(PTRFASTCALL *isName2)(const ENCODING *, const char *);
1967db96d56Sopenharmony_ci  int(PTRFASTCALL *isName3)(const ENCODING *, const char *);
1977db96d56Sopenharmony_ci  int(PTRFASTCALL *isName4)(const ENCODING *, const char *);
1987db96d56Sopenharmony_ci  int(PTRFASTCALL *isNmstrt2)(const ENCODING *, const char *);
1997db96d56Sopenharmony_ci  int(PTRFASTCALL *isNmstrt3)(const ENCODING *, const char *);
2007db96d56Sopenharmony_ci  int(PTRFASTCALL *isNmstrt4)(const ENCODING *, const char *);
2017db96d56Sopenharmony_ci  int(PTRFASTCALL *isInvalid2)(const ENCODING *, const char *);
2027db96d56Sopenharmony_ci  int(PTRFASTCALL *isInvalid3)(const ENCODING *, const char *);
2037db96d56Sopenharmony_ci  int(PTRFASTCALL *isInvalid4)(const ENCODING *, const char *);
2047db96d56Sopenharmony_ci};
2057db96d56Sopenharmony_ci
2067db96d56Sopenharmony_ci#define AS_NORMAL_ENCODING(enc) ((const struct normal_encoding *)(enc))
2077db96d56Sopenharmony_ci
2087db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
2097db96d56Sopenharmony_ci
2107db96d56Sopenharmony_ci#  define STANDARD_VTABLE(E)                                                   \
2117db96d56Sopenharmony_ci    E##byteType, E##isNameMin, E##isNmstrtMin, E##byteToAscii, E##charMatches,
2127db96d56Sopenharmony_ci
2137db96d56Sopenharmony_ci#else
2147db96d56Sopenharmony_ci
2157db96d56Sopenharmony_ci#  define STANDARD_VTABLE(E) /* as nothing */
2167db96d56Sopenharmony_ci
2177db96d56Sopenharmony_ci#endif
2187db96d56Sopenharmony_ci
2197db96d56Sopenharmony_ci#define NORMAL_VTABLE(E)                                                       \
2207db96d56Sopenharmony_ci  E##isName2, E##isName3, E##isName4, E##isNmstrt2, E##isNmstrt3,              \
2217db96d56Sopenharmony_ci      E##isNmstrt4, E##isInvalid2, E##isInvalid3, E##isInvalid4
2227db96d56Sopenharmony_ci
2237db96d56Sopenharmony_ci#define NULL_VTABLE                                                            \
2247db96d56Sopenharmony_ci  /* isName2 */ NULL, /* isName3 */ NULL, /* isName4 */ NULL,                  \
2257db96d56Sopenharmony_ci      /* isNmstrt2 */ NULL, /* isNmstrt3 */ NULL, /* isNmstrt4 */ NULL,        \
2267db96d56Sopenharmony_ci      /* isInvalid2 */ NULL, /* isInvalid3 */ NULL, /* isInvalid4 */ NULL
2277db96d56Sopenharmony_ci
2287db96d56Sopenharmony_cistatic int FASTCALL checkCharRefNumber(int);
2297db96d56Sopenharmony_ci
2307db96d56Sopenharmony_ci#include "xmltok_impl.h"
2317db96d56Sopenharmony_ci#include "ascii.h"
2327db96d56Sopenharmony_ci
2337db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
2347db96d56Sopenharmony_ci#  define sb_isNameMin isNever
2357db96d56Sopenharmony_ci#  define sb_isNmstrtMin isNever
2367db96d56Sopenharmony_ci#endif
2377db96d56Sopenharmony_ci
2387db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
2397db96d56Sopenharmony_ci#  define MINBPC(enc) ((enc)->minBytesPerChar)
2407db96d56Sopenharmony_ci#else
2417db96d56Sopenharmony_ci/* minimum bytes per character */
2427db96d56Sopenharmony_ci#  define MINBPC(enc) 1
2437db96d56Sopenharmony_ci#endif
2447db96d56Sopenharmony_ci
2457db96d56Sopenharmony_ci#define SB_BYTE_TYPE(enc, p)                                                   \
2467db96d56Sopenharmony_ci  (((struct normal_encoding *)(enc))->type[(unsigned char)*(p)])
2477db96d56Sopenharmony_ci
2487db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
2497db96d56Sopenharmony_cistatic int PTRFASTCALL
2507db96d56Sopenharmony_cisb_byteType(const ENCODING *enc, const char *p) {
2517db96d56Sopenharmony_ci  return SB_BYTE_TYPE(enc, p);
2527db96d56Sopenharmony_ci}
2537db96d56Sopenharmony_ci#  define BYTE_TYPE(enc, p) (AS_NORMAL_ENCODING(enc)->byteType(enc, p))
2547db96d56Sopenharmony_ci#else
2557db96d56Sopenharmony_ci#  define BYTE_TYPE(enc, p) SB_BYTE_TYPE(enc, p)
2567db96d56Sopenharmony_ci#endif
2577db96d56Sopenharmony_ci
2587db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
2597db96d56Sopenharmony_ci#  define BYTE_TO_ASCII(enc, p) (AS_NORMAL_ENCODING(enc)->byteToAscii(enc, p))
2607db96d56Sopenharmony_cistatic int PTRFASTCALL
2617db96d56Sopenharmony_cisb_byteToAscii(const ENCODING *enc, const char *p) {
2627db96d56Sopenharmony_ci  UNUSED_P(enc);
2637db96d56Sopenharmony_ci  return *p;
2647db96d56Sopenharmony_ci}
2657db96d56Sopenharmony_ci#else
2667db96d56Sopenharmony_ci#  define BYTE_TO_ASCII(enc, p) (*(p))
2677db96d56Sopenharmony_ci#endif
2687db96d56Sopenharmony_ci
2697db96d56Sopenharmony_ci#define IS_NAME_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isName##n(enc, p))
2707db96d56Sopenharmony_ci#define IS_NMSTRT_CHAR(enc, p, n) (AS_NORMAL_ENCODING(enc)->isNmstrt##n(enc, p))
2717db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
2727db96d56Sopenharmony_ci#  define IS_INVALID_CHAR(enc, p, n)                                           \
2737db96d56Sopenharmony_ci    (AS_NORMAL_ENCODING(enc)->isInvalid##n                                     \
2747db96d56Sopenharmony_ci     && AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
2757db96d56Sopenharmony_ci#else
2767db96d56Sopenharmony_ci#  define IS_INVALID_CHAR(enc, p, n)                                           \
2777db96d56Sopenharmony_ci    (AS_NORMAL_ENCODING(enc)->isInvalid##n(enc, p))
2787db96d56Sopenharmony_ci#endif
2797db96d56Sopenharmony_ci
2807db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
2817db96d56Sopenharmony_ci#  define IS_NAME_CHAR_MINBPC(enc, p)                                          \
2827db96d56Sopenharmony_ci    (AS_NORMAL_ENCODING(enc)->isNameMin(enc, p))
2837db96d56Sopenharmony_ci#  define IS_NMSTRT_CHAR_MINBPC(enc, p)                                        \
2847db96d56Sopenharmony_ci    (AS_NORMAL_ENCODING(enc)->isNmstrtMin(enc, p))
2857db96d56Sopenharmony_ci#else
2867db96d56Sopenharmony_ci#  define IS_NAME_CHAR_MINBPC(enc, p) (0)
2877db96d56Sopenharmony_ci#  define IS_NMSTRT_CHAR_MINBPC(enc, p) (0)
2887db96d56Sopenharmony_ci#endif
2897db96d56Sopenharmony_ci
2907db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
2917db96d56Sopenharmony_ci#  define CHAR_MATCHES(enc, p, c)                                              \
2927db96d56Sopenharmony_ci    (AS_NORMAL_ENCODING(enc)->charMatches(enc, p, c))
2937db96d56Sopenharmony_cistatic int PTRCALL
2947db96d56Sopenharmony_cisb_charMatches(const ENCODING *enc, const char *p, int c) {
2957db96d56Sopenharmony_ci  UNUSED_P(enc);
2967db96d56Sopenharmony_ci  return *p == c;
2977db96d56Sopenharmony_ci}
2987db96d56Sopenharmony_ci#else
2997db96d56Sopenharmony_ci/* c is an ASCII character */
3007db96d56Sopenharmony_ci#  define CHAR_MATCHES(enc, p, c) (*(p) == (c))
3017db96d56Sopenharmony_ci#endif
3027db96d56Sopenharmony_ci
3037db96d56Sopenharmony_ci#define PREFIX(ident) normal_##ident
3047db96d56Sopenharmony_ci#define XML_TOK_IMPL_C
3057db96d56Sopenharmony_ci#include "xmltok_impl.c"
3067db96d56Sopenharmony_ci#undef XML_TOK_IMPL_C
3077db96d56Sopenharmony_ci
3087db96d56Sopenharmony_ci#undef MINBPC
3097db96d56Sopenharmony_ci#undef BYTE_TYPE
3107db96d56Sopenharmony_ci#undef BYTE_TO_ASCII
3117db96d56Sopenharmony_ci#undef CHAR_MATCHES
3127db96d56Sopenharmony_ci#undef IS_NAME_CHAR
3137db96d56Sopenharmony_ci#undef IS_NAME_CHAR_MINBPC
3147db96d56Sopenharmony_ci#undef IS_NMSTRT_CHAR
3157db96d56Sopenharmony_ci#undef IS_NMSTRT_CHAR_MINBPC
3167db96d56Sopenharmony_ci#undef IS_INVALID_CHAR
3177db96d56Sopenharmony_ci
3187db96d56Sopenharmony_cienum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
3197db96d56Sopenharmony_ci       UTF8_cval1 = 0x00,
3207db96d56Sopenharmony_ci       UTF8_cval2 = 0xc0,
3217db96d56Sopenharmony_ci       UTF8_cval3 = 0xe0,
3227db96d56Sopenharmony_ci       UTF8_cval4 = 0xf0
3237db96d56Sopenharmony_ci};
3247db96d56Sopenharmony_ci
3257db96d56Sopenharmony_civoid
3267db96d56Sopenharmony_ci_INTERNAL_trim_to_complete_utf8_characters(const char *from,
3277db96d56Sopenharmony_ci                                           const char **fromLimRef) {
3287db96d56Sopenharmony_ci  const char *fromLim = *fromLimRef;
3297db96d56Sopenharmony_ci  size_t walked = 0;
3307db96d56Sopenharmony_ci  for (; fromLim > from; fromLim--, walked++) {
3317db96d56Sopenharmony_ci    const unsigned char prev = (unsigned char)fromLim[-1];
3327db96d56Sopenharmony_ci    if ((prev & 0xf8u)
3337db96d56Sopenharmony_ci        == 0xf0u) { /* 4-byte character, lead by 0b11110xxx byte */
3347db96d56Sopenharmony_ci      if (walked + 1 >= 4) {
3357db96d56Sopenharmony_ci        fromLim += 4 - 1;
3367db96d56Sopenharmony_ci        break;
3377db96d56Sopenharmony_ci      } else {
3387db96d56Sopenharmony_ci        walked = 0;
3397db96d56Sopenharmony_ci      }
3407db96d56Sopenharmony_ci    } else if ((prev & 0xf0u)
3417db96d56Sopenharmony_ci               == 0xe0u) { /* 3-byte character, lead by 0b1110xxxx byte */
3427db96d56Sopenharmony_ci      if (walked + 1 >= 3) {
3437db96d56Sopenharmony_ci        fromLim += 3 - 1;
3447db96d56Sopenharmony_ci        break;
3457db96d56Sopenharmony_ci      } else {
3467db96d56Sopenharmony_ci        walked = 0;
3477db96d56Sopenharmony_ci      }
3487db96d56Sopenharmony_ci    } else if ((prev & 0xe0u)
3497db96d56Sopenharmony_ci               == 0xc0u) { /* 2-byte character, lead by 0b110xxxxx byte */
3507db96d56Sopenharmony_ci      if (walked + 1 >= 2) {
3517db96d56Sopenharmony_ci        fromLim += 2 - 1;
3527db96d56Sopenharmony_ci        break;
3537db96d56Sopenharmony_ci      } else {
3547db96d56Sopenharmony_ci        walked = 0;
3557db96d56Sopenharmony_ci      }
3567db96d56Sopenharmony_ci    } else if ((prev & 0x80u)
3577db96d56Sopenharmony_ci               == 0x00u) { /* 1-byte character, matching 0b0xxxxxxx */
3587db96d56Sopenharmony_ci      break;
3597db96d56Sopenharmony_ci    }
3607db96d56Sopenharmony_ci  }
3617db96d56Sopenharmony_ci  *fromLimRef = fromLim;
3627db96d56Sopenharmony_ci}
3637db96d56Sopenharmony_ci
3647db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL
3657db96d56Sopenharmony_ciutf8_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
3667db96d56Sopenharmony_ci            char **toP, const char *toLim) {
3677db96d56Sopenharmony_ci  bool input_incomplete = false;
3687db96d56Sopenharmony_ci  bool output_exhausted = false;
3697db96d56Sopenharmony_ci
3707db96d56Sopenharmony_ci  /* Avoid copying partial characters (due to limited space). */
3717db96d56Sopenharmony_ci  const ptrdiff_t bytesAvailable = fromLim - *fromP;
3727db96d56Sopenharmony_ci  const ptrdiff_t bytesStorable = toLim - *toP;
3737db96d56Sopenharmony_ci  UNUSED_P(enc);
3747db96d56Sopenharmony_ci  if (bytesAvailable > bytesStorable) {
3757db96d56Sopenharmony_ci    fromLim = *fromP + bytesStorable;
3767db96d56Sopenharmony_ci    output_exhausted = true;
3777db96d56Sopenharmony_ci  }
3787db96d56Sopenharmony_ci
3797db96d56Sopenharmony_ci  /* Avoid copying partial characters (from incomplete input). */
3807db96d56Sopenharmony_ci  {
3817db96d56Sopenharmony_ci    const char *const fromLimBefore = fromLim;
3827db96d56Sopenharmony_ci    _INTERNAL_trim_to_complete_utf8_characters(*fromP, &fromLim);
3837db96d56Sopenharmony_ci    if (fromLim < fromLimBefore) {
3847db96d56Sopenharmony_ci      input_incomplete = true;
3857db96d56Sopenharmony_ci    }
3867db96d56Sopenharmony_ci  }
3877db96d56Sopenharmony_ci
3887db96d56Sopenharmony_ci  {
3897db96d56Sopenharmony_ci    const ptrdiff_t bytesToCopy = fromLim - *fromP;
3907db96d56Sopenharmony_ci    memcpy(*toP, *fromP, bytesToCopy);
3917db96d56Sopenharmony_ci    *fromP += bytesToCopy;
3927db96d56Sopenharmony_ci    *toP += bytesToCopy;
3937db96d56Sopenharmony_ci  }
3947db96d56Sopenharmony_ci
3957db96d56Sopenharmony_ci  if (output_exhausted) /* needs to go first */
3967db96d56Sopenharmony_ci    return XML_CONVERT_OUTPUT_EXHAUSTED;
3977db96d56Sopenharmony_ci  else if (input_incomplete)
3987db96d56Sopenharmony_ci    return XML_CONVERT_INPUT_INCOMPLETE;
3997db96d56Sopenharmony_ci  else
4007db96d56Sopenharmony_ci    return XML_CONVERT_COMPLETED;
4017db96d56Sopenharmony_ci}
4027db96d56Sopenharmony_ci
4037db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL
4047db96d56Sopenharmony_ciutf8_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
4057db96d56Sopenharmony_ci             unsigned short **toP, const unsigned short *toLim) {
4067db96d56Sopenharmony_ci  enum XML_Convert_Result res = XML_CONVERT_COMPLETED;
4077db96d56Sopenharmony_ci  unsigned short *to = *toP;
4087db96d56Sopenharmony_ci  const char *from = *fromP;
4097db96d56Sopenharmony_ci  while (from < fromLim && to < toLim) {
4107db96d56Sopenharmony_ci    switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) {
4117db96d56Sopenharmony_ci    case BT_LEAD2:
4127db96d56Sopenharmony_ci      if (fromLim - from < 2) {
4137db96d56Sopenharmony_ci        res = XML_CONVERT_INPUT_INCOMPLETE;
4147db96d56Sopenharmony_ci        goto after;
4157db96d56Sopenharmony_ci      }
4167db96d56Sopenharmony_ci      *to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f));
4177db96d56Sopenharmony_ci      from += 2;
4187db96d56Sopenharmony_ci      break;
4197db96d56Sopenharmony_ci    case BT_LEAD3:
4207db96d56Sopenharmony_ci      if (fromLim - from < 3) {
4217db96d56Sopenharmony_ci        res = XML_CONVERT_INPUT_INCOMPLETE;
4227db96d56Sopenharmony_ci        goto after;
4237db96d56Sopenharmony_ci      }
4247db96d56Sopenharmony_ci      *to++ = (unsigned short)(((from[0] & 0xf) << 12) | ((from[1] & 0x3f) << 6)
4257db96d56Sopenharmony_ci                               | (from[2] & 0x3f));
4267db96d56Sopenharmony_ci      from += 3;
4277db96d56Sopenharmony_ci      break;
4287db96d56Sopenharmony_ci    case BT_LEAD4: {
4297db96d56Sopenharmony_ci      unsigned long n;
4307db96d56Sopenharmony_ci      if (toLim - to < 2) {
4317db96d56Sopenharmony_ci        res = XML_CONVERT_OUTPUT_EXHAUSTED;
4327db96d56Sopenharmony_ci        goto after;
4337db96d56Sopenharmony_ci      }
4347db96d56Sopenharmony_ci      if (fromLim - from < 4) {
4357db96d56Sopenharmony_ci        res = XML_CONVERT_INPUT_INCOMPLETE;
4367db96d56Sopenharmony_ci        goto after;
4377db96d56Sopenharmony_ci      }
4387db96d56Sopenharmony_ci      n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12)
4397db96d56Sopenharmony_ci          | ((from[2] & 0x3f) << 6) | (from[3] & 0x3f);
4407db96d56Sopenharmony_ci      n -= 0x10000;
4417db96d56Sopenharmony_ci      to[0] = (unsigned short)((n >> 10) | 0xD800);
4427db96d56Sopenharmony_ci      to[1] = (unsigned short)((n & 0x3FF) | 0xDC00);
4437db96d56Sopenharmony_ci      to += 2;
4447db96d56Sopenharmony_ci      from += 4;
4457db96d56Sopenharmony_ci    } break;
4467db96d56Sopenharmony_ci    default:
4477db96d56Sopenharmony_ci      *to++ = *from++;
4487db96d56Sopenharmony_ci      break;
4497db96d56Sopenharmony_ci    }
4507db96d56Sopenharmony_ci  }
4517db96d56Sopenharmony_ci  if (from < fromLim)
4527db96d56Sopenharmony_ci    res = XML_CONVERT_OUTPUT_EXHAUSTED;
4537db96d56Sopenharmony_ciafter:
4547db96d56Sopenharmony_ci  *fromP = from;
4557db96d56Sopenharmony_ci  *toP = to;
4567db96d56Sopenharmony_ci  return res;
4577db96d56Sopenharmony_ci}
4587db96d56Sopenharmony_ci
4597db96d56Sopenharmony_ci#ifdef XML_NS
4607db96d56Sopenharmony_cistatic const struct normal_encoding utf8_encoding_ns
4617db96d56Sopenharmony_ci    = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
4627db96d56Sopenharmony_ci       {
4637db96d56Sopenharmony_ci#  include "asciitab.h"
4647db96d56Sopenharmony_ci#  include "utf8tab.h"
4657db96d56Sopenharmony_ci       },
4667db96d56Sopenharmony_ci       STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
4677db96d56Sopenharmony_ci#endif
4687db96d56Sopenharmony_ci
4697db96d56Sopenharmony_cistatic const struct normal_encoding utf8_encoding
4707db96d56Sopenharmony_ci    = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
4717db96d56Sopenharmony_ci       {
4727db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT
4737db96d56Sopenharmony_ci#include "asciitab.h"
4747db96d56Sopenharmony_ci#undef BT_COLON
4757db96d56Sopenharmony_ci#include "utf8tab.h"
4767db96d56Sopenharmony_ci       },
4777db96d56Sopenharmony_ci       STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
4787db96d56Sopenharmony_ci
4797db96d56Sopenharmony_ci#ifdef XML_NS
4807db96d56Sopenharmony_ci
4817db96d56Sopenharmony_cistatic const struct normal_encoding internal_utf8_encoding_ns
4827db96d56Sopenharmony_ci    = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
4837db96d56Sopenharmony_ci       {
4847db96d56Sopenharmony_ci#  include "iasciitab.h"
4857db96d56Sopenharmony_ci#  include "utf8tab.h"
4867db96d56Sopenharmony_ci       },
4877db96d56Sopenharmony_ci       STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
4887db96d56Sopenharmony_ci
4897db96d56Sopenharmony_ci#endif
4907db96d56Sopenharmony_ci
4917db96d56Sopenharmony_cistatic const struct normal_encoding internal_utf8_encoding
4927db96d56Sopenharmony_ci    = {{VTABLE1, utf8_toUtf8, utf8_toUtf16, 1, 1, 0},
4937db96d56Sopenharmony_ci       {
4947db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT
4957db96d56Sopenharmony_ci#include "iasciitab.h"
4967db96d56Sopenharmony_ci#undef BT_COLON
4977db96d56Sopenharmony_ci#include "utf8tab.h"
4987db96d56Sopenharmony_ci       },
4997db96d56Sopenharmony_ci       STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_)};
5007db96d56Sopenharmony_ci
5017db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL
5027db96d56Sopenharmony_cilatin1_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
5037db96d56Sopenharmony_ci              char **toP, const char *toLim) {
5047db96d56Sopenharmony_ci  UNUSED_P(enc);
5057db96d56Sopenharmony_ci  for (;;) {
5067db96d56Sopenharmony_ci    unsigned char c;
5077db96d56Sopenharmony_ci    if (*fromP == fromLim)
5087db96d56Sopenharmony_ci      return XML_CONVERT_COMPLETED;
5097db96d56Sopenharmony_ci    c = (unsigned char)**fromP;
5107db96d56Sopenharmony_ci    if (c & 0x80) {
5117db96d56Sopenharmony_ci      if (toLim - *toP < 2)
5127db96d56Sopenharmony_ci        return XML_CONVERT_OUTPUT_EXHAUSTED;
5137db96d56Sopenharmony_ci      *(*toP)++ = (char)((c >> 6) | UTF8_cval2);
5147db96d56Sopenharmony_ci      *(*toP)++ = (char)((c & 0x3f) | 0x80);
5157db96d56Sopenharmony_ci      (*fromP)++;
5167db96d56Sopenharmony_ci    } else {
5177db96d56Sopenharmony_ci      if (*toP == toLim)
5187db96d56Sopenharmony_ci        return XML_CONVERT_OUTPUT_EXHAUSTED;
5197db96d56Sopenharmony_ci      *(*toP)++ = *(*fromP)++;
5207db96d56Sopenharmony_ci    }
5217db96d56Sopenharmony_ci  }
5227db96d56Sopenharmony_ci}
5237db96d56Sopenharmony_ci
5247db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL
5257db96d56Sopenharmony_cilatin1_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
5267db96d56Sopenharmony_ci               unsigned short **toP, const unsigned short *toLim) {
5277db96d56Sopenharmony_ci  UNUSED_P(enc);
5287db96d56Sopenharmony_ci  while (*fromP < fromLim && *toP < toLim)
5297db96d56Sopenharmony_ci    *(*toP)++ = (unsigned char)*(*fromP)++;
5307db96d56Sopenharmony_ci
5317db96d56Sopenharmony_ci  if ((*toP == toLim) && (*fromP < fromLim))
5327db96d56Sopenharmony_ci    return XML_CONVERT_OUTPUT_EXHAUSTED;
5337db96d56Sopenharmony_ci  else
5347db96d56Sopenharmony_ci    return XML_CONVERT_COMPLETED;
5357db96d56Sopenharmony_ci}
5367db96d56Sopenharmony_ci
5377db96d56Sopenharmony_ci#ifdef XML_NS
5387db96d56Sopenharmony_ci
5397db96d56Sopenharmony_cistatic const struct normal_encoding latin1_encoding_ns
5407db96d56Sopenharmony_ci    = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
5417db96d56Sopenharmony_ci       {
5427db96d56Sopenharmony_ci#  include "asciitab.h"
5437db96d56Sopenharmony_ci#  include "latin1tab.h"
5447db96d56Sopenharmony_ci       },
5457db96d56Sopenharmony_ci       STANDARD_VTABLE(sb_) NULL_VTABLE};
5467db96d56Sopenharmony_ci
5477db96d56Sopenharmony_ci#endif
5487db96d56Sopenharmony_ci
5497db96d56Sopenharmony_cistatic const struct normal_encoding latin1_encoding
5507db96d56Sopenharmony_ci    = {{VTABLE1, latin1_toUtf8, latin1_toUtf16, 1, 0, 0},
5517db96d56Sopenharmony_ci       {
5527db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT
5537db96d56Sopenharmony_ci#include "asciitab.h"
5547db96d56Sopenharmony_ci#undef BT_COLON
5557db96d56Sopenharmony_ci#include "latin1tab.h"
5567db96d56Sopenharmony_ci       },
5577db96d56Sopenharmony_ci       STANDARD_VTABLE(sb_) NULL_VTABLE};
5587db96d56Sopenharmony_ci
5597db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL
5607db96d56Sopenharmony_ciascii_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
5617db96d56Sopenharmony_ci             char **toP, const char *toLim) {
5627db96d56Sopenharmony_ci  UNUSED_P(enc);
5637db96d56Sopenharmony_ci  while (*fromP < fromLim && *toP < toLim)
5647db96d56Sopenharmony_ci    *(*toP)++ = *(*fromP)++;
5657db96d56Sopenharmony_ci
5667db96d56Sopenharmony_ci  if ((*toP == toLim) && (*fromP < fromLim))
5677db96d56Sopenharmony_ci    return XML_CONVERT_OUTPUT_EXHAUSTED;
5687db96d56Sopenharmony_ci  else
5697db96d56Sopenharmony_ci    return XML_CONVERT_COMPLETED;
5707db96d56Sopenharmony_ci}
5717db96d56Sopenharmony_ci
5727db96d56Sopenharmony_ci#ifdef XML_NS
5737db96d56Sopenharmony_ci
5747db96d56Sopenharmony_cistatic const struct normal_encoding ascii_encoding_ns
5757db96d56Sopenharmony_ci    = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
5767db96d56Sopenharmony_ci       {
5777db96d56Sopenharmony_ci#  include "asciitab.h"
5787db96d56Sopenharmony_ci           /* BT_NONXML == 0 */
5797db96d56Sopenharmony_ci       },
5807db96d56Sopenharmony_ci       STANDARD_VTABLE(sb_) NULL_VTABLE};
5817db96d56Sopenharmony_ci
5827db96d56Sopenharmony_ci#endif
5837db96d56Sopenharmony_ci
5847db96d56Sopenharmony_cistatic const struct normal_encoding ascii_encoding
5857db96d56Sopenharmony_ci    = {{VTABLE1, ascii_toUtf8, latin1_toUtf16, 1, 1, 0},
5867db96d56Sopenharmony_ci       {
5877db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT
5887db96d56Sopenharmony_ci#include "asciitab.h"
5897db96d56Sopenharmony_ci#undef BT_COLON
5907db96d56Sopenharmony_ci           /* BT_NONXML == 0 */
5917db96d56Sopenharmony_ci       },
5927db96d56Sopenharmony_ci       STANDARD_VTABLE(sb_) NULL_VTABLE};
5937db96d56Sopenharmony_ci
5947db96d56Sopenharmony_cistatic int PTRFASTCALL
5957db96d56Sopenharmony_ciunicode_byte_type(char hi, char lo) {
5967db96d56Sopenharmony_ci  switch ((unsigned char)hi) {
5977db96d56Sopenharmony_ci  /* 0xD800-0xDBFF first 16-bit code unit or high surrogate (W1) */
5987db96d56Sopenharmony_ci  case 0xD8:
5997db96d56Sopenharmony_ci  case 0xD9:
6007db96d56Sopenharmony_ci  case 0xDA:
6017db96d56Sopenharmony_ci  case 0xDB:
6027db96d56Sopenharmony_ci    return BT_LEAD4;
6037db96d56Sopenharmony_ci  /* 0xDC00-0xDFFF second 16-bit code unit or low surrogate (W2) */
6047db96d56Sopenharmony_ci  case 0xDC:
6057db96d56Sopenharmony_ci  case 0xDD:
6067db96d56Sopenharmony_ci  case 0xDE:
6077db96d56Sopenharmony_ci  case 0xDF:
6087db96d56Sopenharmony_ci    return BT_TRAIL;
6097db96d56Sopenharmony_ci  case 0xFF:
6107db96d56Sopenharmony_ci    switch ((unsigned char)lo) {
6117db96d56Sopenharmony_ci    case 0xFF: /* noncharacter-FFFF */
6127db96d56Sopenharmony_ci    case 0xFE: /* noncharacter-FFFE */
6137db96d56Sopenharmony_ci      return BT_NONXML;
6147db96d56Sopenharmony_ci    }
6157db96d56Sopenharmony_ci    break;
6167db96d56Sopenharmony_ci  }
6177db96d56Sopenharmony_ci  return BT_NONASCII;
6187db96d56Sopenharmony_ci}
6197db96d56Sopenharmony_ci
6207db96d56Sopenharmony_ci#define DEFINE_UTF16_TO_UTF8(E)                                                \
6217db96d56Sopenharmony_ci  static enum XML_Convert_Result PTRCALL E##toUtf8(                            \
6227db96d56Sopenharmony_ci      const ENCODING *enc, const char **fromP, const char *fromLim,            \
6237db96d56Sopenharmony_ci      char **toP, const char *toLim) {                                         \
6247db96d56Sopenharmony_ci    const char *from = *fromP;                                                 \
6257db96d56Sopenharmony_ci    UNUSED_P(enc);                                                             \
6267db96d56Sopenharmony_ci    fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */      \
6277db96d56Sopenharmony_ci    for (; from < fromLim; from += 2) {                                        \
6287db96d56Sopenharmony_ci      int plane;                                                               \
6297db96d56Sopenharmony_ci      unsigned char lo2;                                                       \
6307db96d56Sopenharmony_ci      unsigned char lo = GET_LO(from);                                         \
6317db96d56Sopenharmony_ci      unsigned char hi = GET_HI(from);                                         \
6327db96d56Sopenharmony_ci      switch (hi) {                                                            \
6337db96d56Sopenharmony_ci      case 0:                                                                  \
6347db96d56Sopenharmony_ci        if (lo < 0x80) {                                                       \
6357db96d56Sopenharmony_ci          if (*toP == toLim) {                                                 \
6367db96d56Sopenharmony_ci            *fromP = from;                                                     \
6377db96d56Sopenharmony_ci            return XML_CONVERT_OUTPUT_EXHAUSTED;                               \
6387db96d56Sopenharmony_ci          }                                                                    \
6397db96d56Sopenharmony_ci          *(*toP)++ = lo;                                                      \
6407db96d56Sopenharmony_ci          break;                                                               \
6417db96d56Sopenharmony_ci        }                                                                      \
6427db96d56Sopenharmony_ci        /* fall through */                                                     \
6437db96d56Sopenharmony_ci      case 0x1:                                                                \
6447db96d56Sopenharmony_ci      case 0x2:                                                                \
6457db96d56Sopenharmony_ci      case 0x3:                                                                \
6467db96d56Sopenharmony_ci      case 0x4:                                                                \
6477db96d56Sopenharmony_ci      case 0x5:                                                                \
6487db96d56Sopenharmony_ci      case 0x6:                                                                \
6497db96d56Sopenharmony_ci      case 0x7:                                                                \
6507db96d56Sopenharmony_ci        if (toLim - *toP < 2) {                                                \
6517db96d56Sopenharmony_ci          *fromP = from;                                                       \
6527db96d56Sopenharmony_ci          return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
6537db96d56Sopenharmony_ci        }                                                                      \
6547db96d56Sopenharmony_ci        *(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2);                      \
6557db96d56Sopenharmony_ci        *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
6567db96d56Sopenharmony_ci        break;                                                                 \
6577db96d56Sopenharmony_ci      default:                                                                 \
6587db96d56Sopenharmony_ci        if (toLim - *toP < 3) {                                                \
6597db96d56Sopenharmony_ci          *fromP = from;                                                       \
6607db96d56Sopenharmony_ci          return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
6617db96d56Sopenharmony_ci        }                                                                      \
6627db96d56Sopenharmony_ci        /* 16 bits divided 4, 6, 6 amongst 3 bytes */                          \
6637db96d56Sopenharmony_ci        *(*toP)++ = ((hi >> 4) | UTF8_cval3);                                  \
6647db96d56Sopenharmony_ci        *(*toP)++ = (((hi & 0xf) << 2) | (lo >> 6) | 0x80);                    \
6657db96d56Sopenharmony_ci        *(*toP)++ = ((lo & 0x3f) | 0x80);                                      \
6667db96d56Sopenharmony_ci        break;                                                                 \
6677db96d56Sopenharmony_ci      case 0xD8:                                                               \
6687db96d56Sopenharmony_ci      case 0xD9:                                                               \
6697db96d56Sopenharmony_ci      case 0xDA:                                                               \
6707db96d56Sopenharmony_ci      case 0xDB:                                                               \
6717db96d56Sopenharmony_ci        if (toLim - *toP < 4) {                                                \
6727db96d56Sopenharmony_ci          *fromP = from;                                                       \
6737db96d56Sopenharmony_ci          return XML_CONVERT_OUTPUT_EXHAUSTED;                                 \
6747db96d56Sopenharmony_ci        }                                                                      \
6757db96d56Sopenharmony_ci        if (fromLim - from < 4) {                                              \
6767db96d56Sopenharmony_ci          *fromP = from;                                                       \
6777db96d56Sopenharmony_ci          return XML_CONVERT_INPUT_INCOMPLETE;                                 \
6787db96d56Sopenharmony_ci        }                                                                      \
6797db96d56Sopenharmony_ci        plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1;                   \
6807db96d56Sopenharmony_ci        *(*toP)++ = (char)((plane >> 2) | UTF8_cval4);                         \
6817db96d56Sopenharmony_ci        *(*toP)++ = (((lo >> 2) & 0xF) | ((plane & 0x3) << 4) | 0x80);         \
6827db96d56Sopenharmony_ci        from += 2;                                                             \
6837db96d56Sopenharmony_ci        lo2 = GET_LO(from);                                                    \
6847db96d56Sopenharmony_ci        *(*toP)++ = (((lo & 0x3) << 4) | ((GET_HI(from) & 0x3) << 2)           \
6857db96d56Sopenharmony_ci                     | (lo2 >> 6) | 0x80);                                     \
6867db96d56Sopenharmony_ci        *(*toP)++ = ((lo2 & 0x3f) | 0x80);                                     \
6877db96d56Sopenharmony_ci        break;                                                                 \
6887db96d56Sopenharmony_ci      }                                                                        \
6897db96d56Sopenharmony_ci    }                                                                          \
6907db96d56Sopenharmony_ci    *fromP = from;                                                             \
6917db96d56Sopenharmony_ci    if (from < fromLim)                                                        \
6927db96d56Sopenharmony_ci      return XML_CONVERT_INPUT_INCOMPLETE;                                     \
6937db96d56Sopenharmony_ci    else                                                                       \
6947db96d56Sopenharmony_ci      return XML_CONVERT_COMPLETED;                                            \
6957db96d56Sopenharmony_ci  }
6967db96d56Sopenharmony_ci
6977db96d56Sopenharmony_ci#define DEFINE_UTF16_TO_UTF16(E)                                               \
6987db96d56Sopenharmony_ci  static enum XML_Convert_Result PTRCALL E##toUtf16(                           \
6997db96d56Sopenharmony_ci      const ENCODING *enc, const char **fromP, const char *fromLim,            \
7007db96d56Sopenharmony_ci      unsigned short **toP, const unsigned short *toLim) {                     \
7017db96d56Sopenharmony_ci    enum XML_Convert_Result res = XML_CONVERT_COMPLETED;                       \
7027db96d56Sopenharmony_ci    UNUSED_P(enc);                                                             \
7037db96d56Sopenharmony_ci    fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */  \
7047db96d56Sopenharmony_ci    /* Avoid copying first half only of surrogate */                           \
7057db96d56Sopenharmony_ci    if (fromLim - *fromP > ((toLim - *toP) << 1)                               \
7067db96d56Sopenharmony_ci        && (GET_HI(fromLim - 2) & 0xF8) == 0xD8) {                             \
7077db96d56Sopenharmony_ci      fromLim -= 2;                                                            \
7087db96d56Sopenharmony_ci      res = XML_CONVERT_INPUT_INCOMPLETE;                                      \
7097db96d56Sopenharmony_ci    }                                                                          \
7107db96d56Sopenharmony_ci    for (; *fromP < fromLim && *toP < toLim; *fromP += 2)                      \
7117db96d56Sopenharmony_ci      *(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP);                      \
7127db96d56Sopenharmony_ci    if ((*toP == toLim) && (*fromP < fromLim))                                 \
7137db96d56Sopenharmony_ci      return XML_CONVERT_OUTPUT_EXHAUSTED;                                     \
7147db96d56Sopenharmony_ci    else                                                                       \
7157db96d56Sopenharmony_ci      return res;                                                              \
7167db96d56Sopenharmony_ci  }
7177db96d56Sopenharmony_ci
7187db96d56Sopenharmony_ci#define SET2(ptr, ch) (((ptr)[0] = ((ch)&0xff)), ((ptr)[1] = ((ch) >> 8)))
7197db96d56Sopenharmony_ci#define GET_LO(ptr) ((unsigned char)(ptr)[0])
7207db96d56Sopenharmony_ci#define GET_HI(ptr) ((unsigned char)(ptr)[1])
7217db96d56Sopenharmony_ci
7227db96d56Sopenharmony_ciDEFINE_UTF16_TO_UTF8(little2_)
7237db96d56Sopenharmony_ciDEFINE_UTF16_TO_UTF16(little2_)
7247db96d56Sopenharmony_ci
7257db96d56Sopenharmony_ci#undef SET2
7267db96d56Sopenharmony_ci#undef GET_LO
7277db96d56Sopenharmony_ci#undef GET_HI
7287db96d56Sopenharmony_ci
7297db96d56Sopenharmony_ci#define SET2(ptr, ch) (((ptr)[0] = ((ch) >> 8)), ((ptr)[1] = ((ch)&0xFF)))
7307db96d56Sopenharmony_ci#define GET_LO(ptr) ((unsigned char)(ptr)[1])
7317db96d56Sopenharmony_ci#define GET_HI(ptr) ((unsigned char)(ptr)[0])
7327db96d56Sopenharmony_ci
7337db96d56Sopenharmony_ciDEFINE_UTF16_TO_UTF8(big2_)
7347db96d56Sopenharmony_ciDEFINE_UTF16_TO_UTF16(big2_)
7357db96d56Sopenharmony_ci
7367db96d56Sopenharmony_ci#undef SET2
7377db96d56Sopenharmony_ci#undef GET_LO
7387db96d56Sopenharmony_ci#undef GET_HI
7397db96d56Sopenharmony_ci
7407db96d56Sopenharmony_ci#define LITTLE2_BYTE_TYPE(enc, p)                                              \
7417db96d56Sopenharmony_ci  ((p)[1] == 0 ? ((struct normal_encoding *)(enc))->type[(unsigned char)*(p)]  \
7427db96d56Sopenharmony_ci               : unicode_byte_type((p)[1], (p)[0]))
7437db96d56Sopenharmony_ci#define LITTLE2_BYTE_TO_ASCII(p) ((p)[1] == 0 ? (p)[0] : -1)
7447db96d56Sopenharmony_ci#define LITTLE2_CHAR_MATCHES(p, c) ((p)[1] == 0 && (p)[0] == (c))
7457db96d56Sopenharmony_ci#define LITTLE2_IS_NAME_CHAR_MINBPC(p)                                         \
7467db96d56Sopenharmony_ci  UCS2_GET_NAMING(namePages, (unsigned char)p[1], (unsigned char)p[0])
7477db96d56Sopenharmony_ci#define LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)                                       \
7487db96d56Sopenharmony_ci  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[1], (unsigned char)p[0])
7497db96d56Sopenharmony_ci
7507db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
7517db96d56Sopenharmony_ci
7527db96d56Sopenharmony_cistatic int PTRFASTCALL
7537db96d56Sopenharmony_cilittle2_byteType(const ENCODING *enc, const char *p) {
7547db96d56Sopenharmony_ci  return LITTLE2_BYTE_TYPE(enc, p);
7557db96d56Sopenharmony_ci}
7567db96d56Sopenharmony_ci
7577db96d56Sopenharmony_cistatic int PTRFASTCALL
7587db96d56Sopenharmony_cilittle2_byteToAscii(const ENCODING *enc, const char *p) {
7597db96d56Sopenharmony_ci  UNUSED_P(enc);
7607db96d56Sopenharmony_ci  return LITTLE2_BYTE_TO_ASCII(p);
7617db96d56Sopenharmony_ci}
7627db96d56Sopenharmony_ci
7637db96d56Sopenharmony_cistatic int PTRCALL
7647db96d56Sopenharmony_cilittle2_charMatches(const ENCODING *enc, const char *p, int c) {
7657db96d56Sopenharmony_ci  UNUSED_P(enc);
7667db96d56Sopenharmony_ci  return LITTLE2_CHAR_MATCHES(p, c);
7677db96d56Sopenharmony_ci}
7687db96d56Sopenharmony_ci
7697db96d56Sopenharmony_cistatic int PTRFASTCALL
7707db96d56Sopenharmony_cilittle2_isNameMin(const ENCODING *enc, const char *p) {
7717db96d56Sopenharmony_ci  UNUSED_P(enc);
7727db96d56Sopenharmony_ci  return LITTLE2_IS_NAME_CHAR_MINBPC(p);
7737db96d56Sopenharmony_ci}
7747db96d56Sopenharmony_ci
7757db96d56Sopenharmony_cistatic int PTRFASTCALL
7767db96d56Sopenharmony_cilittle2_isNmstrtMin(const ENCODING *enc, const char *p) {
7777db96d56Sopenharmony_ci  UNUSED_P(enc);
7787db96d56Sopenharmony_ci  return LITTLE2_IS_NMSTRT_CHAR_MINBPC(p);
7797db96d56Sopenharmony_ci}
7807db96d56Sopenharmony_ci
7817db96d56Sopenharmony_ci#  undef VTABLE
7827db96d56Sopenharmony_ci#  define VTABLE VTABLE1, little2_toUtf8, little2_toUtf16
7837db96d56Sopenharmony_ci
7847db96d56Sopenharmony_ci#else /* not XML_MIN_SIZE */
7857db96d56Sopenharmony_ci
7867db96d56Sopenharmony_ci#  undef PREFIX
7877db96d56Sopenharmony_ci#  define PREFIX(ident) little2_##ident
7887db96d56Sopenharmony_ci#  define MINBPC(enc) 2
7897db96d56Sopenharmony_ci/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
7907db96d56Sopenharmony_ci#  define BYTE_TYPE(enc, p) LITTLE2_BYTE_TYPE(enc, p)
7917db96d56Sopenharmony_ci#  define BYTE_TO_ASCII(enc, p) LITTLE2_BYTE_TO_ASCII(p)
7927db96d56Sopenharmony_ci#  define CHAR_MATCHES(enc, p, c) LITTLE2_CHAR_MATCHES(p, c)
7937db96d56Sopenharmony_ci#  define IS_NAME_CHAR(enc, p, n) 0
7947db96d56Sopenharmony_ci#  define IS_NAME_CHAR_MINBPC(enc, p) LITTLE2_IS_NAME_CHAR_MINBPC(p)
7957db96d56Sopenharmony_ci#  define IS_NMSTRT_CHAR(enc, p, n) (0)
7967db96d56Sopenharmony_ci#  define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(p)
7977db96d56Sopenharmony_ci
7987db96d56Sopenharmony_ci#  define XML_TOK_IMPL_C
7997db96d56Sopenharmony_ci#  include "xmltok_impl.c"
8007db96d56Sopenharmony_ci#  undef XML_TOK_IMPL_C
8017db96d56Sopenharmony_ci
8027db96d56Sopenharmony_ci#  undef MINBPC
8037db96d56Sopenharmony_ci#  undef BYTE_TYPE
8047db96d56Sopenharmony_ci#  undef BYTE_TO_ASCII
8057db96d56Sopenharmony_ci#  undef CHAR_MATCHES
8067db96d56Sopenharmony_ci#  undef IS_NAME_CHAR
8077db96d56Sopenharmony_ci#  undef IS_NAME_CHAR_MINBPC
8087db96d56Sopenharmony_ci#  undef IS_NMSTRT_CHAR
8097db96d56Sopenharmony_ci#  undef IS_NMSTRT_CHAR_MINBPC
8107db96d56Sopenharmony_ci#  undef IS_INVALID_CHAR
8117db96d56Sopenharmony_ci
8127db96d56Sopenharmony_ci#endif /* not XML_MIN_SIZE */
8137db96d56Sopenharmony_ci
8147db96d56Sopenharmony_ci#ifdef XML_NS
8157db96d56Sopenharmony_ci
8167db96d56Sopenharmony_cistatic const struct normal_encoding little2_encoding_ns
8177db96d56Sopenharmony_ci    = {{VTABLE, 2, 0,
8187db96d56Sopenharmony_ci#  if BYTEORDER == 1234
8197db96d56Sopenharmony_ci        1
8207db96d56Sopenharmony_ci#  else
8217db96d56Sopenharmony_ci        0
8227db96d56Sopenharmony_ci#  endif
8237db96d56Sopenharmony_ci       },
8247db96d56Sopenharmony_ci       {
8257db96d56Sopenharmony_ci#  include "asciitab.h"
8267db96d56Sopenharmony_ci#  include "latin1tab.h"
8277db96d56Sopenharmony_ci       },
8287db96d56Sopenharmony_ci       STANDARD_VTABLE(little2_) NULL_VTABLE};
8297db96d56Sopenharmony_ci
8307db96d56Sopenharmony_ci#endif
8317db96d56Sopenharmony_ci
8327db96d56Sopenharmony_cistatic const struct normal_encoding little2_encoding
8337db96d56Sopenharmony_ci    = {{VTABLE, 2, 0,
8347db96d56Sopenharmony_ci#if BYTEORDER == 1234
8357db96d56Sopenharmony_ci        1
8367db96d56Sopenharmony_ci#else
8377db96d56Sopenharmony_ci        0
8387db96d56Sopenharmony_ci#endif
8397db96d56Sopenharmony_ci       },
8407db96d56Sopenharmony_ci       {
8417db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT
8427db96d56Sopenharmony_ci#include "asciitab.h"
8437db96d56Sopenharmony_ci#undef BT_COLON
8447db96d56Sopenharmony_ci#include "latin1tab.h"
8457db96d56Sopenharmony_ci       },
8467db96d56Sopenharmony_ci       STANDARD_VTABLE(little2_) NULL_VTABLE};
8477db96d56Sopenharmony_ci
8487db96d56Sopenharmony_ci#if BYTEORDER != 4321
8497db96d56Sopenharmony_ci
8507db96d56Sopenharmony_ci#  ifdef XML_NS
8517db96d56Sopenharmony_ci
8527db96d56Sopenharmony_cistatic const struct normal_encoding internal_little2_encoding_ns
8537db96d56Sopenharmony_ci    = {{VTABLE, 2, 0, 1},
8547db96d56Sopenharmony_ci       {
8557db96d56Sopenharmony_ci#    include "iasciitab.h"
8567db96d56Sopenharmony_ci#    include "latin1tab.h"
8577db96d56Sopenharmony_ci       },
8587db96d56Sopenharmony_ci       STANDARD_VTABLE(little2_) NULL_VTABLE};
8597db96d56Sopenharmony_ci
8607db96d56Sopenharmony_ci#  endif
8617db96d56Sopenharmony_ci
8627db96d56Sopenharmony_cistatic const struct normal_encoding internal_little2_encoding
8637db96d56Sopenharmony_ci    = {{VTABLE, 2, 0, 1},
8647db96d56Sopenharmony_ci       {
8657db96d56Sopenharmony_ci#  define BT_COLON BT_NMSTRT
8667db96d56Sopenharmony_ci#  include "iasciitab.h"
8677db96d56Sopenharmony_ci#  undef BT_COLON
8687db96d56Sopenharmony_ci#  include "latin1tab.h"
8697db96d56Sopenharmony_ci       },
8707db96d56Sopenharmony_ci       STANDARD_VTABLE(little2_) NULL_VTABLE};
8717db96d56Sopenharmony_ci
8727db96d56Sopenharmony_ci#endif
8737db96d56Sopenharmony_ci
8747db96d56Sopenharmony_ci#define BIG2_BYTE_TYPE(enc, p)                                                 \
8757db96d56Sopenharmony_ci  ((p)[0] == 0                                                                 \
8767db96d56Sopenharmony_ci       ? ((struct normal_encoding *)(enc))->type[(unsigned char)(p)[1]]        \
8777db96d56Sopenharmony_ci       : unicode_byte_type((p)[0], (p)[1]))
8787db96d56Sopenharmony_ci#define BIG2_BYTE_TO_ASCII(p) ((p)[0] == 0 ? (p)[1] : -1)
8797db96d56Sopenharmony_ci#define BIG2_CHAR_MATCHES(p, c) ((p)[0] == 0 && (p)[1] == (c))
8807db96d56Sopenharmony_ci#define BIG2_IS_NAME_CHAR_MINBPC(p)                                            \
8817db96d56Sopenharmony_ci  UCS2_GET_NAMING(namePages, (unsigned char)p[0], (unsigned char)p[1])
8827db96d56Sopenharmony_ci#define BIG2_IS_NMSTRT_CHAR_MINBPC(p)                                          \
8837db96d56Sopenharmony_ci  UCS2_GET_NAMING(nmstrtPages, (unsigned char)p[0], (unsigned char)p[1])
8847db96d56Sopenharmony_ci
8857db96d56Sopenharmony_ci#ifdef XML_MIN_SIZE
8867db96d56Sopenharmony_ci
8877db96d56Sopenharmony_cistatic int PTRFASTCALL
8887db96d56Sopenharmony_cibig2_byteType(const ENCODING *enc, const char *p) {
8897db96d56Sopenharmony_ci  return BIG2_BYTE_TYPE(enc, p);
8907db96d56Sopenharmony_ci}
8917db96d56Sopenharmony_ci
8927db96d56Sopenharmony_cistatic int PTRFASTCALL
8937db96d56Sopenharmony_cibig2_byteToAscii(const ENCODING *enc, const char *p) {
8947db96d56Sopenharmony_ci  UNUSED_P(enc);
8957db96d56Sopenharmony_ci  return BIG2_BYTE_TO_ASCII(p);
8967db96d56Sopenharmony_ci}
8977db96d56Sopenharmony_ci
8987db96d56Sopenharmony_cistatic int PTRCALL
8997db96d56Sopenharmony_cibig2_charMatches(const ENCODING *enc, const char *p, int c) {
9007db96d56Sopenharmony_ci  UNUSED_P(enc);
9017db96d56Sopenharmony_ci  return BIG2_CHAR_MATCHES(p, c);
9027db96d56Sopenharmony_ci}
9037db96d56Sopenharmony_ci
9047db96d56Sopenharmony_cistatic int PTRFASTCALL
9057db96d56Sopenharmony_cibig2_isNameMin(const ENCODING *enc, const char *p) {
9067db96d56Sopenharmony_ci  UNUSED_P(enc);
9077db96d56Sopenharmony_ci  return BIG2_IS_NAME_CHAR_MINBPC(p);
9087db96d56Sopenharmony_ci}
9097db96d56Sopenharmony_ci
9107db96d56Sopenharmony_cistatic int PTRFASTCALL
9117db96d56Sopenharmony_cibig2_isNmstrtMin(const ENCODING *enc, const char *p) {
9127db96d56Sopenharmony_ci  UNUSED_P(enc);
9137db96d56Sopenharmony_ci  return BIG2_IS_NMSTRT_CHAR_MINBPC(p);
9147db96d56Sopenharmony_ci}
9157db96d56Sopenharmony_ci
9167db96d56Sopenharmony_ci#  undef VTABLE
9177db96d56Sopenharmony_ci#  define VTABLE VTABLE1, big2_toUtf8, big2_toUtf16
9187db96d56Sopenharmony_ci
9197db96d56Sopenharmony_ci#else /* not XML_MIN_SIZE */
9207db96d56Sopenharmony_ci
9217db96d56Sopenharmony_ci#  undef PREFIX
9227db96d56Sopenharmony_ci#  define PREFIX(ident) big2_##ident
9237db96d56Sopenharmony_ci#  define MINBPC(enc) 2
9247db96d56Sopenharmony_ci/* CHAR_MATCHES is guaranteed to have MINBPC bytes available. */
9257db96d56Sopenharmony_ci#  define BYTE_TYPE(enc, p) BIG2_BYTE_TYPE(enc, p)
9267db96d56Sopenharmony_ci#  define BYTE_TO_ASCII(enc, p) BIG2_BYTE_TO_ASCII(p)
9277db96d56Sopenharmony_ci#  define CHAR_MATCHES(enc, p, c) BIG2_CHAR_MATCHES(p, c)
9287db96d56Sopenharmony_ci#  define IS_NAME_CHAR(enc, p, n) 0
9297db96d56Sopenharmony_ci#  define IS_NAME_CHAR_MINBPC(enc, p) BIG2_IS_NAME_CHAR_MINBPC(p)
9307db96d56Sopenharmony_ci#  define IS_NMSTRT_CHAR(enc, p, n) (0)
9317db96d56Sopenharmony_ci#  define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(p)
9327db96d56Sopenharmony_ci
9337db96d56Sopenharmony_ci#  define XML_TOK_IMPL_C
9347db96d56Sopenharmony_ci#  include "xmltok_impl.c"
9357db96d56Sopenharmony_ci#  undef XML_TOK_IMPL_C
9367db96d56Sopenharmony_ci
9377db96d56Sopenharmony_ci#  undef MINBPC
9387db96d56Sopenharmony_ci#  undef BYTE_TYPE
9397db96d56Sopenharmony_ci#  undef BYTE_TO_ASCII
9407db96d56Sopenharmony_ci#  undef CHAR_MATCHES
9417db96d56Sopenharmony_ci#  undef IS_NAME_CHAR
9427db96d56Sopenharmony_ci#  undef IS_NAME_CHAR_MINBPC
9437db96d56Sopenharmony_ci#  undef IS_NMSTRT_CHAR
9447db96d56Sopenharmony_ci#  undef IS_NMSTRT_CHAR_MINBPC
9457db96d56Sopenharmony_ci#  undef IS_INVALID_CHAR
9467db96d56Sopenharmony_ci
9477db96d56Sopenharmony_ci#endif /* not XML_MIN_SIZE */
9487db96d56Sopenharmony_ci
9497db96d56Sopenharmony_ci#ifdef XML_NS
9507db96d56Sopenharmony_ci
9517db96d56Sopenharmony_cistatic const struct normal_encoding big2_encoding_ns
9527db96d56Sopenharmony_ci    = {{VTABLE, 2, 0,
9537db96d56Sopenharmony_ci#  if BYTEORDER == 4321
9547db96d56Sopenharmony_ci        1
9557db96d56Sopenharmony_ci#  else
9567db96d56Sopenharmony_ci        0
9577db96d56Sopenharmony_ci#  endif
9587db96d56Sopenharmony_ci       },
9597db96d56Sopenharmony_ci       {
9607db96d56Sopenharmony_ci#  include "asciitab.h"
9617db96d56Sopenharmony_ci#  include "latin1tab.h"
9627db96d56Sopenharmony_ci       },
9637db96d56Sopenharmony_ci       STANDARD_VTABLE(big2_) NULL_VTABLE};
9647db96d56Sopenharmony_ci
9657db96d56Sopenharmony_ci#endif
9667db96d56Sopenharmony_ci
9677db96d56Sopenharmony_cistatic const struct normal_encoding big2_encoding
9687db96d56Sopenharmony_ci    = {{VTABLE, 2, 0,
9697db96d56Sopenharmony_ci#if BYTEORDER == 4321
9707db96d56Sopenharmony_ci        1
9717db96d56Sopenharmony_ci#else
9727db96d56Sopenharmony_ci        0
9737db96d56Sopenharmony_ci#endif
9747db96d56Sopenharmony_ci       },
9757db96d56Sopenharmony_ci       {
9767db96d56Sopenharmony_ci#define BT_COLON BT_NMSTRT
9777db96d56Sopenharmony_ci#include "asciitab.h"
9787db96d56Sopenharmony_ci#undef BT_COLON
9797db96d56Sopenharmony_ci#include "latin1tab.h"
9807db96d56Sopenharmony_ci       },
9817db96d56Sopenharmony_ci       STANDARD_VTABLE(big2_) NULL_VTABLE};
9827db96d56Sopenharmony_ci
9837db96d56Sopenharmony_ci#if BYTEORDER != 1234
9847db96d56Sopenharmony_ci
9857db96d56Sopenharmony_ci#  ifdef XML_NS
9867db96d56Sopenharmony_ci
9877db96d56Sopenharmony_cistatic const struct normal_encoding internal_big2_encoding_ns
9887db96d56Sopenharmony_ci    = {{VTABLE, 2, 0, 1},
9897db96d56Sopenharmony_ci       {
9907db96d56Sopenharmony_ci#    include "iasciitab.h"
9917db96d56Sopenharmony_ci#    include "latin1tab.h"
9927db96d56Sopenharmony_ci       },
9937db96d56Sopenharmony_ci       STANDARD_VTABLE(big2_) NULL_VTABLE};
9947db96d56Sopenharmony_ci
9957db96d56Sopenharmony_ci#  endif
9967db96d56Sopenharmony_ci
9977db96d56Sopenharmony_cistatic const struct normal_encoding internal_big2_encoding
9987db96d56Sopenharmony_ci    = {{VTABLE, 2, 0, 1},
9997db96d56Sopenharmony_ci       {
10007db96d56Sopenharmony_ci#  define BT_COLON BT_NMSTRT
10017db96d56Sopenharmony_ci#  include "iasciitab.h"
10027db96d56Sopenharmony_ci#  undef BT_COLON
10037db96d56Sopenharmony_ci#  include "latin1tab.h"
10047db96d56Sopenharmony_ci       },
10057db96d56Sopenharmony_ci       STANDARD_VTABLE(big2_) NULL_VTABLE};
10067db96d56Sopenharmony_ci
10077db96d56Sopenharmony_ci#endif
10087db96d56Sopenharmony_ci
10097db96d56Sopenharmony_ci#undef PREFIX
10107db96d56Sopenharmony_ci
10117db96d56Sopenharmony_cistatic int FASTCALL
10127db96d56Sopenharmony_cistreqci(const char *s1, const char *s2) {
10137db96d56Sopenharmony_ci  for (;;) {
10147db96d56Sopenharmony_ci    char c1 = *s1++;
10157db96d56Sopenharmony_ci    char c2 = *s2++;
10167db96d56Sopenharmony_ci    if (ASCII_a <= c1 && c1 <= ASCII_z)
10177db96d56Sopenharmony_ci      c1 += ASCII_A - ASCII_a;
10187db96d56Sopenharmony_ci    if (ASCII_a <= c2 && c2 <= ASCII_z)
10197db96d56Sopenharmony_ci      /* The following line will never get executed.  streqci() is
10207db96d56Sopenharmony_ci       * only called from two places, both of which guarantee to put
10217db96d56Sopenharmony_ci       * upper-case strings into s2.
10227db96d56Sopenharmony_ci       */
10237db96d56Sopenharmony_ci      c2 += ASCII_A - ASCII_a; /* LCOV_EXCL_LINE */
10247db96d56Sopenharmony_ci    if (c1 != c2)
10257db96d56Sopenharmony_ci      return 0;
10267db96d56Sopenharmony_ci    if (! c1)
10277db96d56Sopenharmony_ci      break;
10287db96d56Sopenharmony_ci  }
10297db96d56Sopenharmony_ci  return 1;
10307db96d56Sopenharmony_ci}
10317db96d56Sopenharmony_ci
10327db96d56Sopenharmony_cistatic void PTRCALL
10337db96d56Sopenharmony_ciinitUpdatePosition(const ENCODING *enc, const char *ptr, const char *end,
10347db96d56Sopenharmony_ci                   POSITION *pos) {
10357db96d56Sopenharmony_ci  UNUSED_P(enc);
10367db96d56Sopenharmony_ci  normal_updatePosition(&utf8_encoding.enc, ptr, end, pos);
10377db96d56Sopenharmony_ci}
10387db96d56Sopenharmony_ci
10397db96d56Sopenharmony_cistatic int
10407db96d56Sopenharmony_citoAscii(const ENCODING *enc, const char *ptr, const char *end) {
10417db96d56Sopenharmony_ci  char buf[1];
10427db96d56Sopenharmony_ci  char *p = buf;
10437db96d56Sopenharmony_ci  XmlUtf8Convert(enc, &ptr, end, &p, p + 1);
10447db96d56Sopenharmony_ci  if (p == buf)
10457db96d56Sopenharmony_ci    return -1;
10467db96d56Sopenharmony_ci  else
10477db96d56Sopenharmony_ci    return buf[0];
10487db96d56Sopenharmony_ci}
10497db96d56Sopenharmony_ci
10507db96d56Sopenharmony_cistatic int FASTCALL
10517db96d56Sopenharmony_ciisSpace(int c) {
10527db96d56Sopenharmony_ci  switch (c) {
10537db96d56Sopenharmony_ci  case 0x20:
10547db96d56Sopenharmony_ci  case 0xD:
10557db96d56Sopenharmony_ci  case 0xA:
10567db96d56Sopenharmony_ci  case 0x9:
10577db96d56Sopenharmony_ci    return 1;
10587db96d56Sopenharmony_ci  }
10597db96d56Sopenharmony_ci  return 0;
10607db96d56Sopenharmony_ci}
10617db96d56Sopenharmony_ci
10627db96d56Sopenharmony_ci/* Return 1 if there's just optional white space or there's an S
10637db96d56Sopenharmony_ci   followed by name=val.
10647db96d56Sopenharmony_ci*/
10657db96d56Sopenharmony_cistatic int
10667db96d56Sopenharmony_ciparsePseudoAttribute(const ENCODING *enc, const char *ptr, const char *end,
10677db96d56Sopenharmony_ci                     const char **namePtr, const char **nameEndPtr,
10687db96d56Sopenharmony_ci                     const char **valPtr, const char **nextTokPtr) {
10697db96d56Sopenharmony_ci  int c;
10707db96d56Sopenharmony_ci  char open;
10717db96d56Sopenharmony_ci  if (ptr == end) {
10727db96d56Sopenharmony_ci    *namePtr = NULL;
10737db96d56Sopenharmony_ci    return 1;
10747db96d56Sopenharmony_ci  }
10757db96d56Sopenharmony_ci  if (! isSpace(toAscii(enc, ptr, end))) {
10767db96d56Sopenharmony_ci    *nextTokPtr = ptr;
10777db96d56Sopenharmony_ci    return 0;
10787db96d56Sopenharmony_ci  }
10797db96d56Sopenharmony_ci  do {
10807db96d56Sopenharmony_ci    ptr += enc->minBytesPerChar;
10817db96d56Sopenharmony_ci  } while (isSpace(toAscii(enc, ptr, end)));
10827db96d56Sopenharmony_ci  if (ptr == end) {
10837db96d56Sopenharmony_ci    *namePtr = NULL;
10847db96d56Sopenharmony_ci    return 1;
10857db96d56Sopenharmony_ci  }
10867db96d56Sopenharmony_ci  *namePtr = ptr;
10877db96d56Sopenharmony_ci  for (;;) {
10887db96d56Sopenharmony_ci    c = toAscii(enc, ptr, end);
10897db96d56Sopenharmony_ci    if (c == -1) {
10907db96d56Sopenharmony_ci      *nextTokPtr = ptr;
10917db96d56Sopenharmony_ci      return 0;
10927db96d56Sopenharmony_ci    }
10937db96d56Sopenharmony_ci    if (c == ASCII_EQUALS) {
10947db96d56Sopenharmony_ci      *nameEndPtr = ptr;
10957db96d56Sopenharmony_ci      break;
10967db96d56Sopenharmony_ci    }
10977db96d56Sopenharmony_ci    if (isSpace(c)) {
10987db96d56Sopenharmony_ci      *nameEndPtr = ptr;
10997db96d56Sopenharmony_ci      do {
11007db96d56Sopenharmony_ci        ptr += enc->minBytesPerChar;
11017db96d56Sopenharmony_ci      } while (isSpace(c = toAscii(enc, ptr, end)));
11027db96d56Sopenharmony_ci      if (c != ASCII_EQUALS) {
11037db96d56Sopenharmony_ci        *nextTokPtr = ptr;
11047db96d56Sopenharmony_ci        return 0;
11057db96d56Sopenharmony_ci      }
11067db96d56Sopenharmony_ci      break;
11077db96d56Sopenharmony_ci    }
11087db96d56Sopenharmony_ci    ptr += enc->minBytesPerChar;
11097db96d56Sopenharmony_ci  }
11107db96d56Sopenharmony_ci  if (ptr == *namePtr) {
11117db96d56Sopenharmony_ci    *nextTokPtr = ptr;
11127db96d56Sopenharmony_ci    return 0;
11137db96d56Sopenharmony_ci  }
11147db96d56Sopenharmony_ci  ptr += enc->minBytesPerChar;
11157db96d56Sopenharmony_ci  c = toAscii(enc, ptr, end);
11167db96d56Sopenharmony_ci  while (isSpace(c)) {
11177db96d56Sopenharmony_ci    ptr += enc->minBytesPerChar;
11187db96d56Sopenharmony_ci    c = toAscii(enc, ptr, end);
11197db96d56Sopenharmony_ci  }
11207db96d56Sopenharmony_ci  if (c != ASCII_QUOT && c != ASCII_APOS) {
11217db96d56Sopenharmony_ci    *nextTokPtr = ptr;
11227db96d56Sopenharmony_ci    return 0;
11237db96d56Sopenharmony_ci  }
11247db96d56Sopenharmony_ci  open = (char)c;
11257db96d56Sopenharmony_ci  ptr += enc->minBytesPerChar;
11267db96d56Sopenharmony_ci  *valPtr = ptr;
11277db96d56Sopenharmony_ci  for (;; ptr += enc->minBytesPerChar) {
11287db96d56Sopenharmony_ci    c = toAscii(enc, ptr, end);
11297db96d56Sopenharmony_ci    if (c == open)
11307db96d56Sopenharmony_ci      break;
11317db96d56Sopenharmony_ci    if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)
11327db96d56Sopenharmony_ci        && ! (ASCII_0 <= c && c <= ASCII_9) && c != ASCII_PERIOD
11337db96d56Sopenharmony_ci        && c != ASCII_MINUS && c != ASCII_UNDERSCORE) {
11347db96d56Sopenharmony_ci      *nextTokPtr = ptr;
11357db96d56Sopenharmony_ci      return 0;
11367db96d56Sopenharmony_ci    }
11377db96d56Sopenharmony_ci  }
11387db96d56Sopenharmony_ci  *nextTokPtr = ptr + enc->minBytesPerChar;
11397db96d56Sopenharmony_ci  return 1;
11407db96d56Sopenharmony_ci}
11417db96d56Sopenharmony_ci
11427db96d56Sopenharmony_cistatic const char KW_version[]
11437db96d56Sopenharmony_ci    = {ASCII_v, ASCII_e, ASCII_r, ASCII_s, ASCII_i, ASCII_o, ASCII_n, '\0'};
11447db96d56Sopenharmony_ci
11457db96d56Sopenharmony_cistatic const char KW_encoding[] = {ASCII_e, ASCII_n, ASCII_c, ASCII_o, ASCII_d,
11467db96d56Sopenharmony_ci                                   ASCII_i, ASCII_n, ASCII_g, '\0'};
11477db96d56Sopenharmony_ci
11487db96d56Sopenharmony_cistatic const char KW_standalone[]
11497db96d56Sopenharmony_ci    = {ASCII_s, ASCII_t, ASCII_a, ASCII_n, ASCII_d, ASCII_a,
11507db96d56Sopenharmony_ci       ASCII_l, ASCII_o, ASCII_n, ASCII_e, '\0'};
11517db96d56Sopenharmony_ci
11527db96d56Sopenharmony_cistatic const char KW_yes[] = {ASCII_y, ASCII_e, ASCII_s, '\0'};
11537db96d56Sopenharmony_ci
11547db96d56Sopenharmony_cistatic const char KW_no[] = {ASCII_n, ASCII_o, '\0'};
11557db96d56Sopenharmony_ci
11567db96d56Sopenharmony_cistatic int
11577db96d56Sopenharmony_cidoParseXmlDecl(const ENCODING *(*encodingFinder)(const ENCODING *, const char *,
11587db96d56Sopenharmony_ci                                                 const char *),
11597db96d56Sopenharmony_ci               int isGeneralTextEntity, const ENCODING *enc, const char *ptr,
11607db96d56Sopenharmony_ci               const char *end, const char **badPtr, const char **versionPtr,
11617db96d56Sopenharmony_ci               const char **versionEndPtr, const char **encodingName,
11627db96d56Sopenharmony_ci               const ENCODING **encoding, int *standalone) {
11637db96d56Sopenharmony_ci  const char *val = NULL;
11647db96d56Sopenharmony_ci  const char *name = NULL;
11657db96d56Sopenharmony_ci  const char *nameEnd = NULL;
11667db96d56Sopenharmony_ci  ptr += 5 * enc->minBytesPerChar;
11677db96d56Sopenharmony_ci  end -= 2 * enc->minBytesPerChar;
11687db96d56Sopenharmony_ci  if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)
11697db96d56Sopenharmony_ci      || ! name) {
11707db96d56Sopenharmony_ci    *badPtr = ptr;
11717db96d56Sopenharmony_ci    return 0;
11727db96d56Sopenharmony_ci  }
11737db96d56Sopenharmony_ci  if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_version)) {
11747db96d56Sopenharmony_ci    if (! isGeneralTextEntity) {
11757db96d56Sopenharmony_ci      *badPtr = name;
11767db96d56Sopenharmony_ci      return 0;
11777db96d56Sopenharmony_ci    }
11787db96d56Sopenharmony_ci  } else {
11797db96d56Sopenharmony_ci    if (versionPtr)
11807db96d56Sopenharmony_ci      *versionPtr = val;
11817db96d56Sopenharmony_ci    if (versionEndPtr)
11827db96d56Sopenharmony_ci      *versionEndPtr = ptr;
11837db96d56Sopenharmony_ci    if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
11847db96d56Sopenharmony_ci      *badPtr = ptr;
11857db96d56Sopenharmony_ci      return 0;
11867db96d56Sopenharmony_ci    }
11877db96d56Sopenharmony_ci    if (! name) {
11887db96d56Sopenharmony_ci      if (isGeneralTextEntity) {
11897db96d56Sopenharmony_ci        /* a TextDecl must have an EncodingDecl */
11907db96d56Sopenharmony_ci        *badPtr = ptr;
11917db96d56Sopenharmony_ci        return 0;
11927db96d56Sopenharmony_ci      }
11937db96d56Sopenharmony_ci      return 1;
11947db96d56Sopenharmony_ci    }
11957db96d56Sopenharmony_ci  }
11967db96d56Sopenharmony_ci  if (XmlNameMatchesAscii(enc, name, nameEnd, KW_encoding)) {
11977db96d56Sopenharmony_ci    int c = toAscii(enc, val, end);
11987db96d56Sopenharmony_ci    if (! (ASCII_a <= c && c <= ASCII_z) && ! (ASCII_A <= c && c <= ASCII_Z)) {
11997db96d56Sopenharmony_ci      *badPtr = val;
12007db96d56Sopenharmony_ci      return 0;
12017db96d56Sopenharmony_ci    }
12027db96d56Sopenharmony_ci    if (encodingName)
12037db96d56Sopenharmony_ci      *encodingName = val;
12047db96d56Sopenharmony_ci    if (encoding)
12057db96d56Sopenharmony_ci      *encoding = encodingFinder(enc, val, ptr - enc->minBytesPerChar);
12067db96d56Sopenharmony_ci    if (! parsePseudoAttribute(enc, ptr, end, &name, &nameEnd, &val, &ptr)) {
12077db96d56Sopenharmony_ci      *badPtr = ptr;
12087db96d56Sopenharmony_ci      return 0;
12097db96d56Sopenharmony_ci    }
12107db96d56Sopenharmony_ci    if (! name)
12117db96d56Sopenharmony_ci      return 1;
12127db96d56Sopenharmony_ci  }
12137db96d56Sopenharmony_ci  if (! XmlNameMatchesAscii(enc, name, nameEnd, KW_standalone)
12147db96d56Sopenharmony_ci      || isGeneralTextEntity) {
12157db96d56Sopenharmony_ci    *badPtr = name;
12167db96d56Sopenharmony_ci    return 0;
12177db96d56Sopenharmony_ci  }
12187db96d56Sopenharmony_ci  if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_yes)) {
12197db96d56Sopenharmony_ci    if (standalone)
12207db96d56Sopenharmony_ci      *standalone = 1;
12217db96d56Sopenharmony_ci  } else if (XmlNameMatchesAscii(enc, val, ptr - enc->minBytesPerChar, KW_no)) {
12227db96d56Sopenharmony_ci    if (standalone)
12237db96d56Sopenharmony_ci      *standalone = 0;
12247db96d56Sopenharmony_ci  } else {
12257db96d56Sopenharmony_ci    *badPtr = val;
12267db96d56Sopenharmony_ci    return 0;
12277db96d56Sopenharmony_ci  }
12287db96d56Sopenharmony_ci  while (isSpace(toAscii(enc, ptr, end)))
12297db96d56Sopenharmony_ci    ptr += enc->minBytesPerChar;
12307db96d56Sopenharmony_ci  if (ptr != end) {
12317db96d56Sopenharmony_ci    *badPtr = ptr;
12327db96d56Sopenharmony_ci    return 0;
12337db96d56Sopenharmony_ci  }
12347db96d56Sopenharmony_ci  return 1;
12357db96d56Sopenharmony_ci}
12367db96d56Sopenharmony_ci
12377db96d56Sopenharmony_cistatic int FASTCALL
12387db96d56Sopenharmony_cicheckCharRefNumber(int result) {
12397db96d56Sopenharmony_ci  switch (result >> 8) {
12407db96d56Sopenharmony_ci  case 0xD8:
12417db96d56Sopenharmony_ci  case 0xD9:
12427db96d56Sopenharmony_ci  case 0xDA:
12437db96d56Sopenharmony_ci  case 0xDB:
12447db96d56Sopenharmony_ci  case 0xDC:
12457db96d56Sopenharmony_ci  case 0xDD:
12467db96d56Sopenharmony_ci  case 0xDE:
12477db96d56Sopenharmony_ci  case 0xDF:
12487db96d56Sopenharmony_ci    return -1;
12497db96d56Sopenharmony_ci  case 0:
12507db96d56Sopenharmony_ci    if (latin1_encoding.type[result] == BT_NONXML)
12517db96d56Sopenharmony_ci      return -1;
12527db96d56Sopenharmony_ci    break;
12537db96d56Sopenharmony_ci  case 0xFF:
12547db96d56Sopenharmony_ci    if (result == 0xFFFE || result == 0xFFFF)
12557db96d56Sopenharmony_ci      return -1;
12567db96d56Sopenharmony_ci    break;
12577db96d56Sopenharmony_ci  }
12587db96d56Sopenharmony_ci  return result;
12597db96d56Sopenharmony_ci}
12607db96d56Sopenharmony_ci
12617db96d56Sopenharmony_ciint FASTCALL
12627db96d56Sopenharmony_ciXmlUtf8Encode(int c, char *buf) {
12637db96d56Sopenharmony_ci  enum {
12647db96d56Sopenharmony_ci    /* minN is minimum legal resulting value for N byte sequence */
12657db96d56Sopenharmony_ci    min2 = 0x80,
12667db96d56Sopenharmony_ci    min3 = 0x800,
12677db96d56Sopenharmony_ci    min4 = 0x10000
12687db96d56Sopenharmony_ci  };
12697db96d56Sopenharmony_ci
12707db96d56Sopenharmony_ci  if (c < 0)
12717db96d56Sopenharmony_ci    return 0; /* LCOV_EXCL_LINE: this case is always eliminated beforehand */
12727db96d56Sopenharmony_ci  if (c < min2) {
12737db96d56Sopenharmony_ci    buf[0] = (char)(c | UTF8_cval1);
12747db96d56Sopenharmony_ci    return 1;
12757db96d56Sopenharmony_ci  }
12767db96d56Sopenharmony_ci  if (c < min3) {
12777db96d56Sopenharmony_ci    buf[0] = (char)((c >> 6) | UTF8_cval2);
12787db96d56Sopenharmony_ci    buf[1] = (char)((c & 0x3f) | 0x80);
12797db96d56Sopenharmony_ci    return 2;
12807db96d56Sopenharmony_ci  }
12817db96d56Sopenharmony_ci  if (c < min4) {
12827db96d56Sopenharmony_ci    buf[0] = (char)((c >> 12) | UTF8_cval3);
12837db96d56Sopenharmony_ci    buf[1] = (char)(((c >> 6) & 0x3f) | 0x80);
12847db96d56Sopenharmony_ci    buf[2] = (char)((c & 0x3f) | 0x80);
12857db96d56Sopenharmony_ci    return 3;
12867db96d56Sopenharmony_ci  }
12877db96d56Sopenharmony_ci  if (c < 0x110000) {
12887db96d56Sopenharmony_ci    buf[0] = (char)((c >> 18) | UTF8_cval4);
12897db96d56Sopenharmony_ci    buf[1] = (char)(((c >> 12) & 0x3f) | 0x80);
12907db96d56Sopenharmony_ci    buf[2] = (char)(((c >> 6) & 0x3f) | 0x80);
12917db96d56Sopenharmony_ci    buf[3] = (char)((c & 0x3f) | 0x80);
12927db96d56Sopenharmony_ci    return 4;
12937db96d56Sopenharmony_ci  }
12947db96d56Sopenharmony_ci  return 0; /* LCOV_EXCL_LINE: this case too is eliminated before calling */
12957db96d56Sopenharmony_ci}
12967db96d56Sopenharmony_ci
12977db96d56Sopenharmony_ciint FASTCALL
12987db96d56Sopenharmony_ciXmlUtf16Encode(int charNum, unsigned short *buf) {
12997db96d56Sopenharmony_ci  if (charNum < 0)
13007db96d56Sopenharmony_ci    return 0;
13017db96d56Sopenharmony_ci  if (charNum < 0x10000) {
13027db96d56Sopenharmony_ci    buf[0] = (unsigned short)charNum;
13037db96d56Sopenharmony_ci    return 1;
13047db96d56Sopenharmony_ci  }
13057db96d56Sopenharmony_ci  if (charNum < 0x110000) {
13067db96d56Sopenharmony_ci    charNum -= 0x10000;
13077db96d56Sopenharmony_ci    buf[0] = (unsigned short)((charNum >> 10) + 0xD800);
13087db96d56Sopenharmony_ci    buf[1] = (unsigned short)((charNum & 0x3FF) + 0xDC00);
13097db96d56Sopenharmony_ci    return 2;
13107db96d56Sopenharmony_ci  }
13117db96d56Sopenharmony_ci  return 0;
13127db96d56Sopenharmony_ci}
13137db96d56Sopenharmony_ci
13147db96d56Sopenharmony_cistruct unknown_encoding {
13157db96d56Sopenharmony_ci  struct normal_encoding normal;
13167db96d56Sopenharmony_ci  CONVERTER convert;
13177db96d56Sopenharmony_ci  void *userData;
13187db96d56Sopenharmony_ci  unsigned short utf16[256];
13197db96d56Sopenharmony_ci  char utf8[256][4];
13207db96d56Sopenharmony_ci};
13217db96d56Sopenharmony_ci
13227db96d56Sopenharmony_ci#define AS_UNKNOWN_ENCODING(enc) ((const struct unknown_encoding *)(enc))
13237db96d56Sopenharmony_ci
13247db96d56Sopenharmony_ciint
13257db96d56Sopenharmony_ciXmlSizeOfUnknownEncoding(void) {
13267db96d56Sopenharmony_ci  return sizeof(struct unknown_encoding);
13277db96d56Sopenharmony_ci}
13287db96d56Sopenharmony_ci
13297db96d56Sopenharmony_cistatic int PTRFASTCALL
13307db96d56Sopenharmony_ciunknown_isName(const ENCODING *enc, const char *p) {
13317db96d56Sopenharmony_ci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
13327db96d56Sopenharmony_ci  int c = uenc->convert(uenc->userData, p);
13337db96d56Sopenharmony_ci  if (c & ~0xFFFF)
13347db96d56Sopenharmony_ci    return 0;
13357db96d56Sopenharmony_ci  return UCS2_GET_NAMING(namePages, c >> 8, c & 0xFF);
13367db96d56Sopenharmony_ci}
13377db96d56Sopenharmony_ci
13387db96d56Sopenharmony_cistatic int PTRFASTCALL
13397db96d56Sopenharmony_ciunknown_isNmstrt(const ENCODING *enc, const char *p) {
13407db96d56Sopenharmony_ci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
13417db96d56Sopenharmony_ci  int c = uenc->convert(uenc->userData, p);
13427db96d56Sopenharmony_ci  if (c & ~0xFFFF)
13437db96d56Sopenharmony_ci    return 0;
13447db96d56Sopenharmony_ci  return UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xFF);
13457db96d56Sopenharmony_ci}
13467db96d56Sopenharmony_ci
13477db96d56Sopenharmony_cistatic int PTRFASTCALL
13487db96d56Sopenharmony_ciunknown_isInvalid(const ENCODING *enc, const char *p) {
13497db96d56Sopenharmony_ci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
13507db96d56Sopenharmony_ci  int c = uenc->convert(uenc->userData, p);
13517db96d56Sopenharmony_ci  return (c & ~0xFFFF) || checkCharRefNumber(c) < 0;
13527db96d56Sopenharmony_ci}
13537db96d56Sopenharmony_ci
13547db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL
13557db96d56Sopenharmony_ciunknown_toUtf8(const ENCODING *enc, const char **fromP, const char *fromLim,
13567db96d56Sopenharmony_ci               char **toP, const char *toLim) {
13577db96d56Sopenharmony_ci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
13587db96d56Sopenharmony_ci  char buf[XML_UTF8_ENCODE_MAX];
13597db96d56Sopenharmony_ci  for (;;) {
13607db96d56Sopenharmony_ci    const char *utf8;
13617db96d56Sopenharmony_ci    int n;
13627db96d56Sopenharmony_ci    if (*fromP == fromLim)
13637db96d56Sopenharmony_ci      return XML_CONVERT_COMPLETED;
13647db96d56Sopenharmony_ci    utf8 = uenc->utf8[(unsigned char)**fromP];
13657db96d56Sopenharmony_ci    n = *utf8++;
13667db96d56Sopenharmony_ci    if (n == 0) {
13677db96d56Sopenharmony_ci      int c = uenc->convert(uenc->userData, *fromP);
13687db96d56Sopenharmony_ci      n = XmlUtf8Encode(c, buf);
13697db96d56Sopenharmony_ci      if (n > toLim - *toP)
13707db96d56Sopenharmony_ci        return XML_CONVERT_OUTPUT_EXHAUSTED;
13717db96d56Sopenharmony_ci      utf8 = buf;
13727db96d56Sopenharmony_ci      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
13737db96d56Sopenharmony_ci                 - (BT_LEAD2 - 2));
13747db96d56Sopenharmony_ci    } else {
13757db96d56Sopenharmony_ci      if (n > toLim - *toP)
13767db96d56Sopenharmony_ci        return XML_CONVERT_OUTPUT_EXHAUSTED;
13777db96d56Sopenharmony_ci      (*fromP)++;
13787db96d56Sopenharmony_ci    }
13797db96d56Sopenharmony_ci    memcpy(*toP, utf8, n);
13807db96d56Sopenharmony_ci    *toP += n;
13817db96d56Sopenharmony_ci  }
13827db96d56Sopenharmony_ci}
13837db96d56Sopenharmony_ci
13847db96d56Sopenharmony_cistatic enum XML_Convert_Result PTRCALL
13857db96d56Sopenharmony_ciunknown_toUtf16(const ENCODING *enc, const char **fromP, const char *fromLim,
13867db96d56Sopenharmony_ci                unsigned short **toP, const unsigned short *toLim) {
13877db96d56Sopenharmony_ci  const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc);
13887db96d56Sopenharmony_ci  while (*fromP < fromLim && *toP < toLim) {
13897db96d56Sopenharmony_ci    unsigned short c = uenc->utf16[(unsigned char)**fromP];
13907db96d56Sopenharmony_ci    if (c == 0) {
13917db96d56Sopenharmony_ci      c = (unsigned short)uenc->convert(uenc->userData, *fromP);
13927db96d56Sopenharmony_ci      *fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP]
13937db96d56Sopenharmony_ci                 - (BT_LEAD2 - 2));
13947db96d56Sopenharmony_ci    } else
13957db96d56Sopenharmony_ci      (*fromP)++;
13967db96d56Sopenharmony_ci    *(*toP)++ = c;
13977db96d56Sopenharmony_ci  }
13987db96d56Sopenharmony_ci
13997db96d56Sopenharmony_ci  if ((*toP == toLim) && (*fromP < fromLim))
14007db96d56Sopenharmony_ci    return XML_CONVERT_OUTPUT_EXHAUSTED;
14017db96d56Sopenharmony_ci  else
14027db96d56Sopenharmony_ci    return XML_CONVERT_COMPLETED;
14037db96d56Sopenharmony_ci}
14047db96d56Sopenharmony_ci
14057db96d56Sopenharmony_ciENCODING *
14067db96d56Sopenharmony_ciXmlInitUnknownEncoding(void *mem, int *table, CONVERTER convert,
14077db96d56Sopenharmony_ci                       void *userData) {
14087db96d56Sopenharmony_ci  int i;
14097db96d56Sopenharmony_ci  struct unknown_encoding *e = (struct unknown_encoding *)mem;
14107db96d56Sopenharmony_ci  memcpy(mem, &latin1_encoding, sizeof(struct normal_encoding));
14117db96d56Sopenharmony_ci  for (i = 0; i < 128; i++)
14127db96d56Sopenharmony_ci    if (latin1_encoding.type[i] != BT_OTHER
14137db96d56Sopenharmony_ci        && latin1_encoding.type[i] != BT_NONXML && table[i] != i)
14147db96d56Sopenharmony_ci      return 0;
14157db96d56Sopenharmony_ci  for (i = 0; i < 256; i++) {
14167db96d56Sopenharmony_ci    int c = table[i];
14177db96d56Sopenharmony_ci    if (c == -1) {
14187db96d56Sopenharmony_ci      e->normal.type[i] = BT_MALFORM;
14197db96d56Sopenharmony_ci      /* This shouldn't really get used. */
14207db96d56Sopenharmony_ci      e->utf16[i] = 0xFFFF;
14217db96d56Sopenharmony_ci      e->utf8[i][0] = 1;
14227db96d56Sopenharmony_ci      e->utf8[i][1] = 0;
14237db96d56Sopenharmony_ci    } else if (c < 0) {
14247db96d56Sopenharmony_ci      if (c < -4)
14257db96d56Sopenharmony_ci        return 0;
14267db96d56Sopenharmony_ci      /* Multi-byte sequences need a converter function */
14277db96d56Sopenharmony_ci      if (! convert)
14287db96d56Sopenharmony_ci        return 0;
14297db96d56Sopenharmony_ci      e->normal.type[i] = (unsigned char)(BT_LEAD2 - (c + 2));
14307db96d56Sopenharmony_ci      e->utf8[i][0] = 0;
14317db96d56Sopenharmony_ci      e->utf16[i] = 0;
14327db96d56Sopenharmony_ci    } else if (c < 0x80) {
14337db96d56Sopenharmony_ci      if (latin1_encoding.type[c] != BT_OTHER
14347db96d56Sopenharmony_ci          && latin1_encoding.type[c] != BT_NONXML && c != i)
14357db96d56Sopenharmony_ci        return 0;
14367db96d56Sopenharmony_ci      e->normal.type[i] = latin1_encoding.type[c];
14377db96d56Sopenharmony_ci      e->utf8[i][0] = 1;
14387db96d56Sopenharmony_ci      e->utf8[i][1] = (char)c;
14397db96d56Sopenharmony_ci      e->utf16[i] = (unsigned short)(c == 0 ? 0xFFFF : c);
14407db96d56Sopenharmony_ci    } else if (checkCharRefNumber(c) < 0) {
14417db96d56Sopenharmony_ci      e->normal.type[i] = BT_NONXML;
14427db96d56Sopenharmony_ci      /* This shouldn't really get used. */
14437db96d56Sopenharmony_ci      e->utf16[i] = 0xFFFF;
14447db96d56Sopenharmony_ci      e->utf8[i][0] = 1;
14457db96d56Sopenharmony_ci      e->utf8[i][1] = 0;
14467db96d56Sopenharmony_ci    } else {
14477db96d56Sopenharmony_ci      if (c > 0xFFFF)
14487db96d56Sopenharmony_ci        return 0;
14497db96d56Sopenharmony_ci      if (UCS2_GET_NAMING(nmstrtPages, c >> 8, c & 0xff))
14507db96d56Sopenharmony_ci        e->normal.type[i] = BT_NMSTRT;
14517db96d56Sopenharmony_ci      else if (UCS2_GET_NAMING(namePages, c >> 8, c & 0xff))
14527db96d56Sopenharmony_ci        e->normal.type[i] = BT_NAME;
14537db96d56Sopenharmony_ci      else
14547db96d56Sopenharmony_ci        e->normal.type[i] = BT_OTHER;
14557db96d56Sopenharmony_ci      e->utf8[i][0] = (char)XmlUtf8Encode(c, e->utf8[i] + 1);
14567db96d56Sopenharmony_ci      e->utf16[i] = (unsigned short)c;
14577db96d56Sopenharmony_ci    }
14587db96d56Sopenharmony_ci  }
14597db96d56Sopenharmony_ci  e->userData = userData;
14607db96d56Sopenharmony_ci  e->convert = convert;
14617db96d56Sopenharmony_ci  if (convert) {
14627db96d56Sopenharmony_ci    e->normal.isName2 = unknown_isName;
14637db96d56Sopenharmony_ci    e->normal.isName3 = unknown_isName;
14647db96d56Sopenharmony_ci    e->normal.isName4 = unknown_isName;
14657db96d56Sopenharmony_ci    e->normal.isNmstrt2 = unknown_isNmstrt;
14667db96d56Sopenharmony_ci    e->normal.isNmstrt3 = unknown_isNmstrt;
14677db96d56Sopenharmony_ci    e->normal.isNmstrt4 = unknown_isNmstrt;
14687db96d56Sopenharmony_ci    e->normal.isInvalid2 = unknown_isInvalid;
14697db96d56Sopenharmony_ci    e->normal.isInvalid3 = unknown_isInvalid;
14707db96d56Sopenharmony_ci    e->normal.isInvalid4 = unknown_isInvalid;
14717db96d56Sopenharmony_ci  }
14727db96d56Sopenharmony_ci  e->normal.enc.utf8Convert = unknown_toUtf8;
14737db96d56Sopenharmony_ci  e->normal.enc.utf16Convert = unknown_toUtf16;
14747db96d56Sopenharmony_ci  return &(e->normal.enc);
14757db96d56Sopenharmony_ci}
14767db96d56Sopenharmony_ci
14777db96d56Sopenharmony_ci/* If this enumeration is changed, getEncodingIndex and encodings
14787db96d56Sopenharmony_cimust also be changed. */
14797db96d56Sopenharmony_cienum {
14807db96d56Sopenharmony_ci  UNKNOWN_ENC = -1,
14817db96d56Sopenharmony_ci  ISO_8859_1_ENC = 0,
14827db96d56Sopenharmony_ci  US_ASCII_ENC,
14837db96d56Sopenharmony_ci  UTF_8_ENC,
14847db96d56Sopenharmony_ci  UTF_16_ENC,
14857db96d56Sopenharmony_ci  UTF_16BE_ENC,
14867db96d56Sopenharmony_ci  UTF_16LE_ENC,
14877db96d56Sopenharmony_ci  /* must match encodingNames up to here */
14887db96d56Sopenharmony_ci  NO_ENC
14897db96d56Sopenharmony_ci};
14907db96d56Sopenharmony_ci
14917db96d56Sopenharmony_cistatic const char KW_ISO_8859_1[]
14927db96d56Sopenharmony_ci    = {ASCII_I, ASCII_S, ASCII_O,     ASCII_MINUS, ASCII_8, ASCII_8,
14937db96d56Sopenharmony_ci       ASCII_5, ASCII_9, ASCII_MINUS, ASCII_1,     '\0'};
14947db96d56Sopenharmony_cistatic const char KW_US_ASCII[]
14957db96d56Sopenharmony_ci    = {ASCII_U, ASCII_S, ASCII_MINUS, ASCII_A, ASCII_S,
14967db96d56Sopenharmony_ci       ASCII_C, ASCII_I, ASCII_I,     '\0'};
14977db96d56Sopenharmony_cistatic const char KW_UTF_8[]
14987db96d56Sopenharmony_ci    = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_8, '\0'};
14997db96d56Sopenharmony_cistatic const char KW_UTF_16[]
15007db96d56Sopenharmony_ci    = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1, ASCII_6, '\0'};
15017db96d56Sopenharmony_cistatic const char KW_UTF_16BE[]
15027db96d56Sopenharmony_ci    = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
15037db96d56Sopenharmony_ci       ASCII_6, ASCII_B, ASCII_E, '\0'};
15047db96d56Sopenharmony_cistatic const char KW_UTF_16LE[]
15057db96d56Sopenharmony_ci    = {ASCII_U, ASCII_T, ASCII_F, ASCII_MINUS, ASCII_1,
15067db96d56Sopenharmony_ci       ASCII_6, ASCII_L, ASCII_E, '\0'};
15077db96d56Sopenharmony_ci
15087db96d56Sopenharmony_cistatic int FASTCALL
15097db96d56Sopenharmony_cigetEncodingIndex(const char *name) {
15107db96d56Sopenharmony_ci  static const char *const encodingNames[] = {
15117db96d56Sopenharmony_ci      KW_ISO_8859_1, KW_US_ASCII, KW_UTF_8, KW_UTF_16, KW_UTF_16BE, KW_UTF_16LE,
15127db96d56Sopenharmony_ci  };
15137db96d56Sopenharmony_ci  int i;
15147db96d56Sopenharmony_ci  if (name == NULL)
15157db96d56Sopenharmony_ci    return NO_ENC;
15167db96d56Sopenharmony_ci  for (i = 0; i < (int)(sizeof(encodingNames) / sizeof(encodingNames[0])); i++)
15177db96d56Sopenharmony_ci    if (streqci(name, encodingNames[i]))
15187db96d56Sopenharmony_ci      return i;
15197db96d56Sopenharmony_ci  return UNKNOWN_ENC;
15207db96d56Sopenharmony_ci}
15217db96d56Sopenharmony_ci
15227db96d56Sopenharmony_ci/* For binary compatibility, we store the index of the encoding
15237db96d56Sopenharmony_ci   specified at initialization in the isUtf16 member.
15247db96d56Sopenharmony_ci*/
15257db96d56Sopenharmony_ci
15267db96d56Sopenharmony_ci#define INIT_ENC_INDEX(enc) ((int)(enc)->initEnc.isUtf16)
15277db96d56Sopenharmony_ci#define SET_INIT_ENC_INDEX(enc, i) ((enc)->initEnc.isUtf16 = (char)i)
15287db96d56Sopenharmony_ci
15297db96d56Sopenharmony_ci/* This is what detects the encoding.  encodingTable maps from
15307db96d56Sopenharmony_ci   encoding indices to encodings; INIT_ENC_INDEX(enc) is the index of
15317db96d56Sopenharmony_ci   the external (protocol) specified encoding; state is
15327db96d56Sopenharmony_ci   XML_CONTENT_STATE if we're parsing an external text entity, and
15337db96d56Sopenharmony_ci   XML_PROLOG_STATE otherwise.
15347db96d56Sopenharmony_ci*/
15357db96d56Sopenharmony_ci
15367db96d56Sopenharmony_cistatic int
15377db96d56Sopenharmony_ciinitScan(const ENCODING *const *encodingTable, const INIT_ENCODING *enc,
15387db96d56Sopenharmony_ci         int state, const char *ptr, const char *end, const char **nextTokPtr) {
15397db96d56Sopenharmony_ci  const ENCODING **encPtr;
15407db96d56Sopenharmony_ci
15417db96d56Sopenharmony_ci  if (ptr >= end)
15427db96d56Sopenharmony_ci    return XML_TOK_NONE;
15437db96d56Sopenharmony_ci  encPtr = enc->encPtr;
15447db96d56Sopenharmony_ci  if (ptr + 1 == end) {
15457db96d56Sopenharmony_ci    /* only a single byte available for auto-detection */
15467db96d56Sopenharmony_ci#ifndef XML_DTD /* FIXME */
15477db96d56Sopenharmony_ci    /* a well-formed document entity must have more than one byte */
15487db96d56Sopenharmony_ci    if (state != XML_CONTENT_STATE)
15497db96d56Sopenharmony_ci      return XML_TOK_PARTIAL;
15507db96d56Sopenharmony_ci#endif
15517db96d56Sopenharmony_ci    /* so we're parsing an external text entity... */
15527db96d56Sopenharmony_ci    /* if UTF-16 was externally specified, then we need at least 2 bytes */
15537db96d56Sopenharmony_ci    switch (INIT_ENC_INDEX(enc)) {
15547db96d56Sopenharmony_ci    case UTF_16_ENC:
15557db96d56Sopenharmony_ci    case UTF_16LE_ENC:
15567db96d56Sopenharmony_ci    case UTF_16BE_ENC:
15577db96d56Sopenharmony_ci      return XML_TOK_PARTIAL;
15587db96d56Sopenharmony_ci    }
15597db96d56Sopenharmony_ci    switch ((unsigned char)*ptr) {
15607db96d56Sopenharmony_ci    case 0xFE:
15617db96d56Sopenharmony_ci    case 0xFF:
15627db96d56Sopenharmony_ci    case 0xEF: /* possibly first byte of UTF-8 BOM */
15637db96d56Sopenharmony_ci      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
15647db96d56Sopenharmony_ci        break;
15657db96d56Sopenharmony_ci      /* fall through */
15667db96d56Sopenharmony_ci    case 0x00:
15677db96d56Sopenharmony_ci    case 0x3C:
15687db96d56Sopenharmony_ci      return XML_TOK_PARTIAL;
15697db96d56Sopenharmony_ci    }
15707db96d56Sopenharmony_ci  } else {
15717db96d56Sopenharmony_ci    switch (((unsigned char)ptr[0] << 8) | (unsigned char)ptr[1]) {
15727db96d56Sopenharmony_ci    case 0xFEFF:
15737db96d56Sopenharmony_ci      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
15747db96d56Sopenharmony_ci        break;
15757db96d56Sopenharmony_ci      *nextTokPtr = ptr + 2;
15767db96d56Sopenharmony_ci      *encPtr = encodingTable[UTF_16BE_ENC];
15777db96d56Sopenharmony_ci      return XML_TOK_BOM;
15787db96d56Sopenharmony_ci    /* 00 3C is handled in the default case */
15797db96d56Sopenharmony_ci    case 0x3C00:
15807db96d56Sopenharmony_ci      if ((INIT_ENC_INDEX(enc) == UTF_16BE_ENC
15817db96d56Sopenharmony_ci           || INIT_ENC_INDEX(enc) == UTF_16_ENC)
15827db96d56Sopenharmony_ci          && state == XML_CONTENT_STATE)
15837db96d56Sopenharmony_ci        break;
15847db96d56Sopenharmony_ci      *encPtr = encodingTable[UTF_16LE_ENC];
15857db96d56Sopenharmony_ci      return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
15867db96d56Sopenharmony_ci    case 0xFFFE:
15877db96d56Sopenharmony_ci      if (INIT_ENC_INDEX(enc) == ISO_8859_1_ENC && state == XML_CONTENT_STATE)
15887db96d56Sopenharmony_ci        break;
15897db96d56Sopenharmony_ci      *nextTokPtr = ptr + 2;
15907db96d56Sopenharmony_ci      *encPtr = encodingTable[UTF_16LE_ENC];
15917db96d56Sopenharmony_ci      return XML_TOK_BOM;
15927db96d56Sopenharmony_ci    case 0xEFBB:
15937db96d56Sopenharmony_ci      /* Maybe a UTF-8 BOM (EF BB BF) */
15947db96d56Sopenharmony_ci      /* If there's an explicitly specified (external) encoding
15957db96d56Sopenharmony_ci         of ISO-8859-1 or some flavour of UTF-16
15967db96d56Sopenharmony_ci         and this is an external text entity,
15977db96d56Sopenharmony_ci         don't look for the BOM,
15987db96d56Sopenharmony_ci         because it might be a legal data.
15997db96d56Sopenharmony_ci      */
16007db96d56Sopenharmony_ci      if (state == XML_CONTENT_STATE) {
16017db96d56Sopenharmony_ci        int e = INIT_ENC_INDEX(enc);
16027db96d56Sopenharmony_ci        if (e == ISO_8859_1_ENC || e == UTF_16BE_ENC || e == UTF_16LE_ENC
16037db96d56Sopenharmony_ci            || e == UTF_16_ENC)
16047db96d56Sopenharmony_ci          break;
16057db96d56Sopenharmony_ci      }
16067db96d56Sopenharmony_ci      if (ptr + 2 == end)
16077db96d56Sopenharmony_ci        return XML_TOK_PARTIAL;
16087db96d56Sopenharmony_ci      if ((unsigned char)ptr[2] == 0xBF) {
16097db96d56Sopenharmony_ci        *nextTokPtr = ptr + 3;
16107db96d56Sopenharmony_ci        *encPtr = encodingTable[UTF_8_ENC];
16117db96d56Sopenharmony_ci        return XML_TOK_BOM;
16127db96d56Sopenharmony_ci      }
16137db96d56Sopenharmony_ci      break;
16147db96d56Sopenharmony_ci    default:
16157db96d56Sopenharmony_ci      if (ptr[0] == '\0') {
16167db96d56Sopenharmony_ci        /* 0 isn't a legal data character. Furthermore a document
16177db96d56Sopenharmony_ci           entity can only start with ASCII characters.  So the only
16187db96d56Sopenharmony_ci           way this can fail to be big-endian UTF-16 if it it's an
16197db96d56Sopenharmony_ci           external parsed general entity that's labelled as
16207db96d56Sopenharmony_ci           UTF-16LE.
16217db96d56Sopenharmony_ci        */
16227db96d56Sopenharmony_ci        if (state == XML_CONTENT_STATE && INIT_ENC_INDEX(enc) == UTF_16LE_ENC)
16237db96d56Sopenharmony_ci          break;
16247db96d56Sopenharmony_ci        *encPtr = encodingTable[UTF_16BE_ENC];
16257db96d56Sopenharmony_ci        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
16267db96d56Sopenharmony_ci      } else if (ptr[1] == '\0') {
16277db96d56Sopenharmony_ci        /* We could recover here in the case:
16287db96d56Sopenharmony_ci            - parsing an external entity
16297db96d56Sopenharmony_ci            - second byte is 0
16307db96d56Sopenharmony_ci            - no externally specified encoding
16317db96d56Sopenharmony_ci            - no encoding declaration
16327db96d56Sopenharmony_ci           by assuming UTF-16LE.  But we don't, because this would mean when
16337db96d56Sopenharmony_ci           presented just with a single byte, we couldn't reliably determine
16347db96d56Sopenharmony_ci           whether we needed further bytes.
16357db96d56Sopenharmony_ci        */
16367db96d56Sopenharmony_ci        if (state == XML_CONTENT_STATE)
16377db96d56Sopenharmony_ci          break;
16387db96d56Sopenharmony_ci        *encPtr = encodingTable[UTF_16LE_ENC];
16397db96d56Sopenharmony_ci        return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
16407db96d56Sopenharmony_ci      }
16417db96d56Sopenharmony_ci      break;
16427db96d56Sopenharmony_ci    }
16437db96d56Sopenharmony_ci  }
16447db96d56Sopenharmony_ci  *encPtr = encodingTable[INIT_ENC_INDEX(enc)];
16457db96d56Sopenharmony_ci  return XmlTok(*encPtr, state, ptr, end, nextTokPtr);
16467db96d56Sopenharmony_ci}
16477db96d56Sopenharmony_ci
16487db96d56Sopenharmony_ci#define NS(x) x
16497db96d56Sopenharmony_ci#define ns(x) x
16507db96d56Sopenharmony_ci#define XML_TOK_NS_C
16517db96d56Sopenharmony_ci#include "xmltok_ns.c"
16527db96d56Sopenharmony_ci#undef XML_TOK_NS_C
16537db96d56Sopenharmony_ci#undef NS
16547db96d56Sopenharmony_ci#undef ns
16557db96d56Sopenharmony_ci
16567db96d56Sopenharmony_ci#ifdef XML_NS
16577db96d56Sopenharmony_ci
16587db96d56Sopenharmony_ci#  define NS(x) x##NS
16597db96d56Sopenharmony_ci#  define ns(x) x##_ns
16607db96d56Sopenharmony_ci
16617db96d56Sopenharmony_ci#  define XML_TOK_NS_C
16627db96d56Sopenharmony_ci#  include "xmltok_ns.c"
16637db96d56Sopenharmony_ci#  undef XML_TOK_NS_C
16647db96d56Sopenharmony_ci
16657db96d56Sopenharmony_ci#  undef NS
16667db96d56Sopenharmony_ci#  undef ns
16677db96d56Sopenharmony_ci
16687db96d56Sopenharmony_ciENCODING *
16697db96d56Sopenharmony_ciXmlInitUnknownEncodingNS(void *mem, int *table, CONVERTER convert,
16707db96d56Sopenharmony_ci                         void *userData) {
16717db96d56Sopenharmony_ci  ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
16727db96d56Sopenharmony_ci  if (enc)
16737db96d56Sopenharmony_ci    ((struct normal_encoding *)enc)->type[ASCII_COLON] = BT_COLON;
16747db96d56Sopenharmony_ci  return enc;
16757db96d56Sopenharmony_ci}
16767db96d56Sopenharmony_ci
16777db96d56Sopenharmony_ci#endif /* XML_NS */
1678