153aa9179Sopenharmony_ciFrom a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338 Mon Sep 17 00:00:00 2001 253aa9179Sopenharmony_ciFrom: Nick Wellnhofer <wellnhofer@aevum.de> 353aa9179Sopenharmony_ciDate: Thu, 9 Mar 2023 06:58:24 +0100 453aa9179Sopenharmony_ciSubject: [PATCH] regexp: Fix determinism checks 553aa9179Sopenharmony_ci 653aa9179Sopenharmony_ciSwap arguments in initial call to xmlFARecurseDeterminism. 753aa9179Sopenharmony_ci 853aa9179Sopenharmony_ciFix the check whether we revisit the initial state in 953aa9179Sopenharmony_cixmlFARecurseDeterminism. 1053aa9179Sopenharmony_ci 1153aa9179Sopenharmony_ciIf there are transitions with equal atoms and targets but different 1253aa9179Sopenharmony_cicounters, treat the regex as deterministic but mark the transitions as 1353aa9179Sopenharmony_cinon-deterministic internally. 1453aa9179Sopenharmony_ci 1553aa9179Sopenharmony_ciDon't overwrite zero return value of xmlFAComputesDeterminism 1653aa9179Sopenharmony_ciwith non-zero value from xmlFARecurseDeterminism. 1753aa9179Sopenharmony_ci 1853aa9179Sopenharmony_ciMost of these errors lead to non-deterministic regexes not being 1953aa9179Sopenharmony_cidetected which typically isn't an issue. The improved code may break 2053aa9179Sopenharmony_ciusers who relied on buggy behavior or cause other bugs to become 2153aa9179Sopenharmony_civisible. 2253aa9179Sopenharmony_ci 2353aa9179Sopenharmony_ciFixes #469. 2453aa9179Sopenharmony_ci 2553aa9179Sopenharmony_ciReference:https://github.com/GNOME/libxml2/commit/a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338 2653aa9179Sopenharmony_ciConflict:NA 2753aa9179Sopenharmony_ci 2853aa9179Sopenharmony_ci--- 2953aa9179Sopenharmony_ci xmlregexp.c | 34 +++++++++++++++++++++++----------- 3053aa9179Sopenharmony_ci 1 file changed, 23 insertions(+), 11 deletions(-) 3153aa9179Sopenharmony_ci 3253aa9179Sopenharmony_cidiff --git a/xmlregexp.c b/xmlregexp.c 3353aa9179Sopenharmony_ciindex df0626c..c89f0c7 100644 3453aa9179Sopenharmony_ci--- a/xmlregexp.c 3553aa9179Sopenharmony_ci+++ b/xmlregexp.c 3653aa9179Sopenharmony_ci@@ -2665,7 +2665,7 @@ not_determinist: 3753aa9179Sopenharmony_ci */ 3853aa9179Sopenharmony_ci static int 3953aa9179Sopenharmony_ci xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 4053aa9179Sopenharmony_ci- int to, xmlRegAtomPtr atom) { 4153aa9179Sopenharmony_ci+ int fromnr, int tonr, xmlRegAtomPtr atom) { 4253aa9179Sopenharmony_ci int ret = 1; 4353aa9179Sopenharmony_ci int res; 4453aa9179Sopenharmony_ci int transnr, nbTrans; 4553aa9179Sopenharmony_ci@@ -2690,21 +2690,23 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, 4653aa9179Sopenharmony_ci /* 4753aa9179Sopenharmony_ci * check transitions conflicting with the one looked at 4853aa9179Sopenharmony_ci */ 4953aa9179Sopenharmony_ci+ if ((t1->to < 0) || (t1->to == fromnr)) 5053aa9179Sopenharmony_ci+ continue; 5153aa9179Sopenharmony_ci if (t1->atom == NULL) { 5253aa9179Sopenharmony_ci- if (t1->to < 0) 5353aa9179Sopenharmony_ci- continue; 5453aa9179Sopenharmony_ci state->markd = XML_REGEXP_MARK_VISITED; 5553aa9179Sopenharmony_ci res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 5653aa9179Sopenharmony_ci- to, atom); 5753aa9179Sopenharmony_ci+ fromnr, tonr, atom); 5853aa9179Sopenharmony_ci if (res == 0) { 5953aa9179Sopenharmony_ci ret = 0; 6053aa9179Sopenharmony_ci /* t1->nd = 1; */ 6153aa9179Sopenharmony_ci } 6253aa9179Sopenharmony_ci continue; 6353aa9179Sopenharmony_ci } 6453aa9179Sopenharmony_ci- if (t1->to != to) 6553aa9179Sopenharmony_ci- continue; 6653aa9179Sopenharmony_ci if (xmlFACompareAtoms(t1->atom, atom, deep)) { 6753aa9179Sopenharmony_ci+ /* Treat equal transitions as deterministic. */ 6853aa9179Sopenharmony_ci+ if ((t1->to != tonr) || 6953aa9179Sopenharmony_ci+ (!xmlFAEqualAtoms(t1->atom, atom, deep))) 7053aa9179Sopenharmony_ci+ ret = 0; 7153aa9179Sopenharmony_ci ret = 0; 7253aa9179Sopenharmony_ci /* mark the transition as non-deterministic */ 7353aa9179Sopenharmony_ci t1->nd = 1; 7453aa9179Sopenharmony_ci@@ -2837,29 +2839,39 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) { 7553aa9179Sopenharmony_ci * find transitions which indicate a conflict 7653aa9179Sopenharmony_ci */ 7753aa9179Sopenharmony_ci if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) { 7853aa9179Sopenharmony_ci- ret = 0; 7953aa9179Sopenharmony_ci+ /* 8053aa9179Sopenharmony_ci+ * Treat equal counter transitions that couldn't be 8153aa9179Sopenharmony_ci+ * eliminated as deterministic. 8253aa9179Sopenharmony_ci+ */ 8353aa9179Sopenharmony_ci+ if ((t1->to != t2->to) || 8453aa9179Sopenharmony_ci+ (t1->counter == t2->counter) || 8553aa9179Sopenharmony_ci+ (!xmlFAEqualAtoms(t1->atom, t2->atom, deep))) 8653aa9179Sopenharmony_ci+ ret = 0; 8753aa9179Sopenharmony_ci /* mark the transitions as non-deterministic ones */ 8853aa9179Sopenharmony_ci t1->nd = 1; 8953aa9179Sopenharmony_ci t2->nd = 1; 9053aa9179Sopenharmony_ci last = t1; 9153aa9179Sopenharmony_ci } 9253aa9179Sopenharmony_ci } else { 9353aa9179Sopenharmony_ci+ int res; 9453aa9179Sopenharmony_ci+ 9553aa9179Sopenharmony_ci /* 9653aa9179Sopenharmony_ci * do the closure in case of remaining specific 9753aa9179Sopenharmony_ci * epsilon transitions like choices or all 9853aa9179Sopenharmony_ci */ 9953aa9179Sopenharmony_ci- ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to], 10053aa9179Sopenharmony_ci- t2->to, t2->atom); 10153aa9179Sopenharmony_ci- xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]); 10253aa9179Sopenharmony_ci+ res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to], 10353aa9179Sopenharmony_ci+ statenr, t1->to, t1->atom); 10453aa9179Sopenharmony_ci+ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]); 10553aa9179Sopenharmony_ci /* don't shortcut the computation so all non deterministic 10653aa9179Sopenharmony_ci transition get marked down 10753aa9179Sopenharmony_ci if (ret == 0) 10853aa9179Sopenharmony_ci return(0); 10953aa9179Sopenharmony_ci */ 11053aa9179Sopenharmony_ci- if (ret == 0) { 11153aa9179Sopenharmony_ci+ if (res == 0) { 11253aa9179Sopenharmony_ci t1->nd = 1; 11353aa9179Sopenharmony_ci /* t2->nd = 1; */ 11453aa9179Sopenharmony_ci last = t1; 11553aa9179Sopenharmony_ci+ ret = 0; 11653aa9179Sopenharmony_ci } 11753aa9179Sopenharmony_ci } 11853aa9179Sopenharmony_ci } 11953aa9179Sopenharmony_ci-- 12053aa9179Sopenharmony_ci2.27.0 12153aa9179Sopenharmony_ci 122