153aa9179Sopenharmony_ciFrom 38f04779f7afd758db6210123ec0b64c489595c5 Mon Sep 17 00:00:00 2001
253aa9179Sopenharmony_ciFrom: Nick Wellnhofer <wellnhofer@aevum.de>
353aa9179Sopenharmony_ciDate: Mon, 22 Aug 2022 13:33:35 +0200
453aa9179Sopenharmony_ciSubject: [PATCH] Fix HTML parser with threads and --without-legacy
553aa9179Sopenharmony_ci
653aa9179Sopenharmony_ciIf the legacy functions are disabled, the default "V1" HTML SAX handler
753aa9179Sopenharmony_ciisn't initialized in threads other than the main thread.
853aa9179Sopenharmony_cihtmlInitParserCtxt would later use the empty V1 SAX handler, resulting
953aa9179Sopenharmony_ciin NULL documents.
1053aa9179Sopenharmony_ci
1153aa9179Sopenharmony_ciChange htmlInitParserCtxt to initialize the HTML SAX handler by calling
1253aa9179Sopenharmony_cixmlSAX2InitHtmlDefaultSAXHandler. This removes the ability to change the
1353aa9179Sopenharmony_cidefault handler but is more in line with the XML parser which
1453aa9179Sopenharmony_ciinitializes the SAX handler by calling xmlSAXVersion, ignoring the V1
1553aa9179Sopenharmony_cidefault handler.
1653aa9179Sopenharmony_ci
1753aa9179Sopenharmony_ciFixes #399.
1853aa9179Sopenharmony_ciReference:https://github.com/GNOME/libxml2/commit/38f04779f7afd758db6210123ec0b64c489595c5
1953aa9179Sopenharmony_ciConflict:NA
2053aa9179Sopenharmony_ci---
2153aa9179Sopenharmony_ci HTMLparser.c | 11 ++++-------
2253aa9179Sopenharmony_ci 1 file changed, 4 insertions(+), 7 deletions(-)
2353aa9179Sopenharmony_ci
2453aa9179Sopenharmony_cidiff --git a/HTMLparser.c b/HTMLparser.c
2553aa9179Sopenharmony_ciindex e95d86b..98d73f3 100644
2653aa9179Sopenharmony_ci--- a/HTMLparser.c
2753aa9179Sopenharmony_ci+++ b/HTMLparser.c
2853aa9179Sopenharmony_ci@@ -5039,8 +5039,7 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
2953aa9179Sopenharmony_ci         htmlErrMemory(NULL, "htmlInitParserCtxt: out of memory\n");
3053aa9179Sopenharmony_ci 	return(-1);
3153aa9179Sopenharmony_ci     }
3253aa9179Sopenharmony_ci-    else
3353aa9179Sopenharmony_ci-        memset(sax, 0, sizeof(htmlSAXHandler));
3453aa9179Sopenharmony_ci+    memset(sax, 0, sizeof(htmlSAXHandler));
3553aa9179Sopenharmony_ci 
3653aa9179Sopenharmony_ci     /* Allocate the Input stack */
3753aa9179Sopenharmony_ci     ctxt->inputTab = (htmlParserInputPtr *)
3853aa9179Sopenharmony_ci@@ -5099,11 +5098,9 @@ htmlInitParserCtxt(htmlParserCtxtPtr ctxt)
3953aa9179Sopenharmony_ci     ctxt->nodeInfoNr  = 0;
4053aa9179Sopenharmony_ci     ctxt->nodeInfoMax = 0;
4153aa9179Sopenharmony_ci 
4253aa9179Sopenharmony_ci-    if (sax == NULL) ctxt->sax = (xmlSAXHandlerPtr) &htmlDefaultSAXHandler;
4353aa9179Sopenharmony_ci-    else {
4453aa9179Sopenharmony_ci-        ctxt->sax = sax;
4553aa9179Sopenharmony_ci-	memcpy(sax, &htmlDefaultSAXHandler, sizeof(xmlSAXHandlerV1));
4653aa9179Sopenharmony_ci-    }
4753aa9179Sopenharmony_ci+    ctxt->sax = sax;
4853aa9179Sopenharmony_ci+    xmlSAX2InitHtmlDefaultSAXHandler(sax);
4953aa9179Sopenharmony_ci+
5053aa9179Sopenharmony_ci     ctxt->userData = ctxt;
5153aa9179Sopenharmony_ci     ctxt->myDoc = NULL;
5253aa9179Sopenharmony_ci     ctxt->wellFormed = 1;
5353aa9179Sopenharmony_ci-- 
5453aa9179Sopenharmony_ci2.27.0
5553aa9179Sopenharmony_ci
56