153aa9179Sopenharmony_ciFrom 38f04779f7afd758db6210123ec0b64c489595c5 Mon Sep 17 00:00:00 2001 253aa9179Sopenharmony_ciFrom: Nick Wellnhofer <wellnhofer@aevum.de> 353aa9179Sopenharmony_ciDate: Mon, 22 Aug 2022 13:33:35 +0200 453aa9179Sopenharmony_ciSubject: [PATCH] Fix HTML parser with threads and --without-legacy 553aa9179Sopenharmony_ci 653aa9179Sopenharmony_ciIf the legacy functions are disabled, the default "V1" HTML SAX handler 753aa9179Sopenharmony_ciisn't initialized in threads other than the main thread. 853aa9179Sopenharmony_cihtmlInitParserCtxt would later use the empty V1 SAX handler, resulting 953aa9179Sopenharmony_ciin NULL documents. 1053aa9179Sopenharmony_ci 1153aa9179Sopenharmony_ciChange htmlInitParserCtxt to initialize the HTML SAX handler by calling 1253aa9179Sopenharmony_cixmlSAX2InitHtmlDefaultSAXHandler. This removes the ability to change the 1353aa9179Sopenharmony_cidefault handler but is more in line with the XML parser which 1453aa9179Sopenharmony_ciinitializes the SAX handler by calling xmlSAXVersion, ignoring the V1 1553aa9179Sopenharmony_cidefault handler. 1653aa9179Sopenharmony_ci 1753aa9179Sopenharmony_ciFixes #399. 1853aa9179Sopenharmony_ciReference:https://github.com/GNOME/libxml2/commit/38f04779f7afd758db6210123ec0b64c489595c5 1953aa9179Sopenharmony_ciConflict:NA 2053aa9179Sopenharmony_ci--- 2153aa9179Sopenharmony_ci HTMLparser.c | 11 ++++------- 2253aa9179Sopenharmony_ci 1 file changed, 4 insertions(+), 7 deletions(-) 2353aa9179Sopenharmony_ci 2453aa9179Sopenharmony_cidiff --git a/HTMLparser.c b/HTMLparser.c 2553aa9179Sopenharmony_ciindex e95d86b..98d73f3 100644 2653aa9179Sopenharmony_ci--- a/HTMLparser.c 2753aa9179Sopenharmony_ci+++ b/HTMLparser.c 2853aa9179Sopenharmony_ci@@ -5039,8 +5039,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) 2953aa9179Sopenharmony_ci htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n"); 3053aa9179Sopenharmony_ci return(-1); 3153aa9179Sopenharmony_ci } 3253aa9179Sopenharmony_ci- else 3353aa9179Sopenharmony_ci- memset(sax, 0, sizeof(htmlSAXHandler)); 3453aa9179Sopenharmony_ci+ memset(sax, 0, sizeof(htmlSAXHandler)); 3553aa9179Sopenharmony_ci 3653aa9179Sopenharmony_ci /* Allocate the Input stack */ 3753aa9179Sopenharmony_ci ctxt->inputTab = (htmlParserInputPtr *) 3853aa9179Sopenharmony_ci@@ -5099,11 +5098,9 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt) 3953aa9179Sopenharmony_ci ctxt->nodeInfoNr = 0; 4053aa9179Sopenharmony_ci ctxt->nodeInfoMax = 0; 4153aa9179Sopenharmony_ci 4253aa9179Sopenharmony_ci- if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler; 4353aa9179Sopenharmony_ci- else { 4453aa9179Sopenharmony_ci- ctxt->sax = sax; 4553aa9179Sopenharmony_ci- memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1)); 4653aa9179Sopenharmony_ci- } 4753aa9179Sopenharmony_ci+ ctxt->sax = sax; 4853aa9179Sopenharmony_ci+ xmlSAX2InitHtmlDefaultSAXHandler(sax); 4953aa9179Sopenharmony_ci+ 5053aa9179Sopenharmony_ci ctxt->userData = ctxt; 5153aa9179Sopenharmony_ci ctxt->myDoc = NULL; 5253aa9179Sopenharmony_ci ctxt->wellFormed = 1; 5353aa9179Sopenharmony_ci-- 5453aa9179Sopenharmony_ci2.27.0 5553aa9179Sopenharmony_ci 56