153aa9179Sopenharmony_ciFrom 4ad71c2d72beef0d10cf75aa417db10d77846f75 Mon Sep 17 00:00:00 2001
253aa9179Sopenharmony_ciFrom: Nick Wellnhofer <wellnhofer@aevum.de>
353aa9179Sopenharmony_ciDate: Sat, 20 Aug 2022 16:19:34 +0200
453aa9179Sopenharmony_ciSubject: [PATCH] Fix xmlCtxtReadDoc with encoding
553aa9179Sopenharmony_ci
653aa9179Sopenharmony_cixmlCtxtReadDoc used to create an input stream involving
753aa9179Sopenharmony_cixmlNewStringInputStream. This would create a stream without an input
853aa9179Sopenharmony_cibuffer, causing problems with encodings (see #34).
953aa9179Sopenharmony_ci
1053aa9179Sopenharmony_ciAfter commit aab584dc3, an error was returned even with UTF-8 encodings
1153aa9179Sopenharmony_ciwhich happened to work before.
1253aa9179Sopenharmony_ci
1353aa9179Sopenharmony_ciMake xmlCtxtReadDoc call xmlCtxtReadMemory which doesn't suffer from
1453aa9179Sopenharmony_cithese issues. Also fix htmlCtxtReadDoc.
1553aa9179Sopenharmony_ci
1653aa9179Sopenharmony_ciFixes #397.
1753aa9179Sopenharmony_ciReference:https://github.com/GNOME/libxml2/commit/4ad71c2d72beef0d10cf75aa417db10d77846f75
1853aa9179Sopenharmony_ciConflict:NA
1953aa9179Sopenharmony_ci---
2053aa9179Sopenharmony_ci HTMLparser.c | 17 ++++-------------
2153aa9179Sopenharmony_ci parser.c     | 16 +++-------------
2253aa9179Sopenharmony_ci 2 files changed, 7 insertions(+), 26 deletions(-)
2353aa9179Sopenharmony_ci
2453aa9179Sopenharmony_cidiff --git a/HTMLparser.c b/HTMLparser.c
2553aa9179Sopenharmony_ciindex 98d73f3..a4168f3 100644
2653aa9179Sopenharmony_ci--- a/HTMLparser.c
2753aa9179Sopenharmony_ci+++ b/HTMLparser.c
2853aa9179Sopenharmony_ci@@ -7087,22 +7087,13 @@ htmlDocPtr
2953aa9179Sopenharmony_ci htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur,
3053aa9179Sopenharmony_ci                const char *URL, const char *encoding, int options)
3153aa9179Sopenharmony_ci {
3253aa9179Sopenharmony_ci-    xmlParserInputPtr stream;
3353aa9179Sopenharmony_ci+    const char *buf;
3453aa9179Sopenharmony_ci 
3553aa9179Sopenharmony_ci     if (cur == NULL)
3653aa9179Sopenharmony_ci         return (NULL);
3753aa9179Sopenharmony_ci-    if (ctxt == NULL)
3853aa9179Sopenharmony_ci-        return (NULL);
3953aa9179Sopenharmony_ci-    xmlInitParser();
4053aa9179Sopenharmony_ci-
4153aa9179Sopenharmony_ci-    htmlCtxtReset(ctxt);
4253aa9179Sopenharmony_ci-
4353aa9179Sopenharmony_ci-    stream = xmlNewStringInputStream(ctxt, cur);
4453aa9179Sopenharmony_ci-    if (stream == NULL) {
4553aa9179Sopenharmony_ci-        return (NULL);
4653aa9179Sopenharmony_ci-    }
4753aa9179Sopenharmony_ci-    inputPush(ctxt, stream);
4853aa9179Sopenharmony_ci-    return (htmlDoRead(ctxt, URL, encoding, options, 1));
4953aa9179Sopenharmony_ci+    buf = (const char *) cur;
5053aa9179Sopenharmony_ci+    return (htmlCtxtReadMemory(ctxt, buf, strlen(buf), URL, encoding,
5153aa9179Sopenharmony_ci+                               options));
5253aa9179Sopenharmony_ci }
5353aa9179Sopenharmony_ci 
5453aa9179Sopenharmony_ci /**
5553aa9179Sopenharmony_cidiff --git a/parser.c b/parser.c
5653aa9179Sopenharmony_ciindex 6b04bbf..fbeb7af 100644
5753aa9179Sopenharmony_ci--- a/parser.c
5853aa9179Sopenharmony_ci+++ b/parser.c
5953aa9179Sopenharmony_ci@@ -15374,22 +15374,12 @@ xmlDocPtr
6053aa9179Sopenharmony_ci xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur,
6153aa9179Sopenharmony_ci                const char *URL, const char *encoding, int options)
6253aa9179Sopenharmony_ci {
6353aa9179Sopenharmony_ci-    xmlParserInputPtr stream;
6453aa9179Sopenharmony_ci+    const char *buf;
6553aa9179Sopenharmony_ci 
6653aa9179Sopenharmony_ci     if (cur == NULL)
6753aa9179Sopenharmony_ci         return (NULL);
6853aa9179Sopenharmony_ci-    if (ctxt == NULL)
6953aa9179Sopenharmony_ci-        return (NULL);
7053aa9179Sopenharmony_ci-    xmlInitParser();
7153aa9179Sopenharmony_ci-
7253aa9179Sopenharmony_ci-    xmlCtxtReset(ctxt);
7353aa9179Sopenharmony_ci-
7453aa9179Sopenharmony_ci-    stream = xmlNewStringInputStream(ctxt, cur);
7553aa9179Sopenharmony_ci-    if (stream == NULL) {
7653aa9179Sopenharmony_ci-        return (NULL);
7753aa9179Sopenharmony_ci-    }
7853aa9179Sopenharmony_ci-    inputPush(ctxt, stream);
7953aa9179Sopenharmony_ci-    return (xmlDoRead(ctxt, URL, encoding, options, 1));
8053aa9179Sopenharmony_ci+    buf = (const char *) cur;
8153aa9179Sopenharmony_ci+    return (xmlCtxtReadMemory(ctxt, buf, strlen(buf), URL, encoding, options));
8253aa9179Sopenharmony_ci }
8353aa9179Sopenharmony_ci 
8453aa9179Sopenharmony_ci /**
8553aa9179Sopenharmony_ci-- 
8653aa9179Sopenharmony_ci2.27.0
8753aa9179Sopenharmony_ci
88