153aa9179Sopenharmony_ciFrom 4ad71c2d72beef0d10cf75aa417db10d77846f75 Mon Sep 17 00:00:00 2001 253aa9179Sopenharmony_ciFrom: Nick Wellnhofer <wellnhofer@aevum.de> 353aa9179Sopenharmony_ciDate: Sat, 20 Aug 2022 16:19:34 +0200 453aa9179Sopenharmony_ciSubject: [PATCH] Fix xmlCtxtReadDoc with encoding 553aa9179Sopenharmony_ci 653aa9179Sopenharmony_cixmlCtxtReadDoc used to create an input stream involving 753aa9179Sopenharmony_cixmlNewStringInputStream. This would create a stream without an input 853aa9179Sopenharmony_cibuffer, causing problems with encodings (see #34). 953aa9179Sopenharmony_ci 1053aa9179Sopenharmony_ciAfter commit aab584dc3, an error was returned even with UTF-8 encodings 1153aa9179Sopenharmony_ciwhich happened to work before. 1253aa9179Sopenharmony_ci 1353aa9179Sopenharmony_ciMake xmlCtxtReadDoc call xmlCtxtReadMemory which doesn't suffer from 1453aa9179Sopenharmony_cithese issues. Also fix htmlCtxtReadDoc. 1553aa9179Sopenharmony_ci 1653aa9179Sopenharmony_ciFixes #397. 1753aa9179Sopenharmony_ciReference:https://github.com/GNOME/libxml2/commit/4ad71c2d72beef0d10cf75aa417db10d77846f75 1853aa9179Sopenharmony_ciConflict:NA 1953aa9179Sopenharmony_ci--- 2053aa9179Sopenharmony_ci HTMLparser.c | 17 ++++------------- 2153aa9179Sopenharmony_ci parser.c | 16 +++------------- 2253aa9179Sopenharmony_ci 2 files changed, 7 insertions(+), 26 deletions(-) 2353aa9179Sopenharmony_ci 2453aa9179Sopenharmony_cidiff --git a/HTMLparser.c b/HTMLparser.c 2553aa9179Sopenharmony_ciindex 98d73f3..a4168f3 100644 2653aa9179Sopenharmony_ci--- a/HTMLparser.c 2753aa9179Sopenharmony_ci+++ b/HTMLparser.c 2853aa9179Sopenharmony_ci@@ -7087,22 +7087,13 @@ htmlDocPtr 2953aa9179Sopenharmony_ci htmlCtxtReadDoc(htmlParserCtxtPtr ctxt, const xmlChar * cur, 3053aa9179Sopenharmony_ci const char *URL, const char *encoding, int options) 3153aa9179Sopenharmony_ci { 3253aa9179Sopenharmony_ci- xmlParserInputPtr stream; 3353aa9179Sopenharmony_ci+ const char *buf; 3453aa9179Sopenharmony_ci 3553aa9179Sopenharmony_ci if (cur == NULL) 3653aa9179Sopenharmony_ci return (NULL); 3753aa9179Sopenharmony_ci- if (ctxt == NULL) 3853aa9179Sopenharmony_ci- return (NULL); 3953aa9179Sopenharmony_ci- xmlInitParser(); 4053aa9179Sopenharmony_ci- 4153aa9179Sopenharmony_ci- htmlCtxtReset(ctxt); 4253aa9179Sopenharmony_ci- 4353aa9179Sopenharmony_ci- stream = xmlNewStringInputStream(ctxt, cur); 4453aa9179Sopenharmony_ci- if (stream == NULL) { 4553aa9179Sopenharmony_ci- return (NULL); 4653aa9179Sopenharmony_ci- } 4753aa9179Sopenharmony_ci- inputPush(ctxt, stream); 4853aa9179Sopenharmony_ci- return (htmlDoRead(ctxt, URL, encoding, options, 1)); 4953aa9179Sopenharmony_ci+ buf = (const char *) cur; 5053aa9179Sopenharmony_ci+ return (htmlCtxtReadMemory(ctxt, buf, strlen(buf), URL, encoding, 5153aa9179Sopenharmony_ci+ options)); 5253aa9179Sopenharmony_ci } 5353aa9179Sopenharmony_ci 5453aa9179Sopenharmony_ci /** 5553aa9179Sopenharmony_cidiff --git a/parser.c b/parser.c 5653aa9179Sopenharmony_ciindex 6b04bbf..fbeb7af 100644 5753aa9179Sopenharmony_ci--- a/parser.c 5853aa9179Sopenharmony_ci+++ b/parser.c 5953aa9179Sopenharmony_ci@@ -15374,22 +15374,12 @@ xmlDocPtr 6053aa9179Sopenharmony_ci xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar * cur, 6153aa9179Sopenharmony_ci const char *URL, const char *encoding, int options) 6253aa9179Sopenharmony_ci { 6353aa9179Sopenharmony_ci- xmlParserInputPtr stream; 6453aa9179Sopenharmony_ci+ const char *buf; 6553aa9179Sopenharmony_ci 6653aa9179Sopenharmony_ci if (cur == NULL) 6753aa9179Sopenharmony_ci return (NULL); 6853aa9179Sopenharmony_ci- if (ctxt == NULL) 6953aa9179Sopenharmony_ci- return (NULL); 7053aa9179Sopenharmony_ci- xmlInitParser(); 7153aa9179Sopenharmony_ci- 7253aa9179Sopenharmony_ci- xmlCtxtReset(ctxt); 7353aa9179Sopenharmony_ci- 7453aa9179Sopenharmony_ci- stream = xmlNewStringInputStream(ctxt, cur); 7553aa9179Sopenharmony_ci- if (stream == NULL) { 7653aa9179Sopenharmony_ci- return (NULL); 7753aa9179Sopenharmony_ci- } 7853aa9179Sopenharmony_ci- inputPush(ctxt, stream); 7953aa9179Sopenharmony_ci- return (xmlDoRead(ctxt, URL, encoding, options, 1)); 8053aa9179Sopenharmony_ci+ buf = (const char *) cur; 8153aa9179Sopenharmony_ci+ return (xmlCtxtReadMemory(ctxt, buf, strlen(buf), URL, encoding, options)); 8253aa9179Sopenharmony_ci } 8353aa9179Sopenharmony_ci 8453aa9179Sopenharmony_ci /** 8553aa9179Sopenharmony_ci-- 8653aa9179Sopenharmony_ci2.27.0 8753aa9179Sopenharmony_ci 88