153aa9179Sopenharmony_ciFrom a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338 Mon Sep 17 00:00:00 2001
253aa9179Sopenharmony_ciFrom: Nick Wellnhofer <wellnhofer@aevum.de>
353aa9179Sopenharmony_ciDate: Thu, 9 Mar 2023 06:58:24 +0100
453aa9179Sopenharmony_ciSubject: [PATCH] regexp: Fix determinism checks
553aa9179Sopenharmony_ci
653aa9179Sopenharmony_ciSwap arguments in initial call to xmlFARecurseDeterminism.
753aa9179Sopenharmony_ci
853aa9179Sopenharmony_ciFix the check whether we revisit the initial state in
953aa9179Sopenharmony_cixmlFARecurseDeterminism.
1053aa9179Sopenharmony_ci
1153aa9179Sopenharmony_ciIf there are transitions with equal atoms and targets but different
1253aa9179Sopenharmony_cicounters, treat the regex as deterministic but mark the transitions as
1353aa9179Sopenharmony_cinon-deterministic internally.
1453aa9179Sopenharmony_ci
1553aa9179Sopenharmony_ciDon't overwrite zero return value of xmlFAComputesDeterminism
1653aa9179Sopenharmony_ciwith non-zero value from xmlFARecurseDeterminism.
1753aa9179Sopenharmony_ci
1853aa9179Sopenharmony_ciMost of these errors lead to non-deterministic regexes not being
1953aa9179Sopenharmony_cidetected which typically isn't an issue. The improved code may break
2053aa9179Sopenharmony_ciusers who relied on buggy behavior or cause other bugs to become
2153aa9179Sopenharmony_civisible.
2253aa9179Sopenharmony_ci
2353aa9179Sopenharmony_ciFixes #469.
2453aa9179Sopenharmony_ci
2553aa9179Sopenharmony_ciReference:https://github.com/GNOME/libxml2/commit/a06eaa6119ca5b296b8105dc8c9a34ed5fc1f338
2653aa9179Sopenharmony_ciConflict:NA
2753aa9179Sopenharmony_ci
2853aa9179Sopenharmony_ci---
2953aa9179Sopenharmony_ci xmlregexp.c | 34 +++++++++++++++++++++++-----------
3053aa9179Sopenharmony_ci 1 file changed, 23 insertions(+), 11 deletions(-)
3153aa9179Sopenharmony_ci
3253aa9179Sopenharmony_cidiff --git a/xmlregexp.c b/xmlregexp.c
3353aa9179Sopenharmony_ciindex df0626c..c89f0c7 100644
3453aa9179Sopenharmony_ci--- a/xmlregexp.c
3553aa9179Sopenharmony_ci+++ b/xmlregexp.c
3653aa9179Sopenharmony_ci@@ -2665,7 +2665,7 @@ not_determinist:
3753aa9179Sopenharmony_ci  */
3853aa9179Sopenharmony_ci static int
3953aa9179Sopenharmony_ci xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
4053aa9179Sopenharmony_ci-	                 int to, xmlRegAtomPtr atom) {
4153aa9179Sopenharmony_ci+	                int fromnr, int tonr, xmlRegAtomPtr atom) {
4253aa9179Sopenharmony_ci     int ret = 1;
4353aa9179Sopenharmony_ci     int res;
4453aa9179Sopenharmony_ci     int transnr, nbTrans;
4553aa9179Sopenharmony_ci@@ -2690,21 +2690,23 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
4653aa9179Sopenharmony_ci 	/*
4753aa9179Sopenharmony_ci 	 * check transitions conflicting with the one looked at
4853aa9179Sopenharmony_ci 	 */
4953aa9179Sopenharmony_ci+        if ((t1->to < 0) || (t1->to == fromnr))
5053aa9179Sopenharmony_ci+            continue;
5153aa9179Sopenharmony_ci 	if (t1->atom == NULL) {
5253aa9179Sopenharmony_ci-	    if (t1->to < 0)
5353aa9179Sopenharmony_ci-		continue;
5453aa9179Sopenharmony_ci 	    state->markd = XML_REGEXP_MARK_VISITED;
5553aa9179Sopenharmony_ci 	    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
5653aa9179Sopenharmony_ci-		                           to, atom);
5753aa9179Sopenharmony_ci+		                          fromnr, tonr, atom);
5853aa9179Sopenharmony_ci 	    if (res == 0) {
5953aa9179Sopenharmony_ci 	        ret = 0;
6053aa9179Sopenharmony_ci 		/* t1->nd = 1; */
6153aa9179Sopenharmony_ci 	    }
6253aa9179Sopenharmony_ci 	    continue;
6353aa9179Sopenharmony_ci 	}
6453aa9179Sopenharmony_ci-	if (t1->to != to)
6553aa9179Sopenharmony_ci-	    continue;
6653aa9179Sopenharmony_ci 	if (xmlFACompareAtoms(t1->atom, atom, deep)) {
6753aa9179Sopenharmony_ci+            /* Treat equal transitions as deterministic. */
6853aa9179Sopenharmony_ci+            if ((t1->to != tonr) ||
6953aa9179Sopenharmony_ci+                (!xmlFAEqualAtoms(t1->atom, atom, deep)))
7053aa9179Sopenharmony_ci+                ret = 0;
7153aa9179Sopenharmony_ci 	    ret = 0;
7253aa9179Sopenharmony_ci 	    /* mark the transition as non-deterministic */
7353aa9179Sopenharmony_ci 	    t1->nd = 1;
7453aa9179Sopenharmony_ci@@ -2837,29 +2839,39 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
7553aa9179Sopenharmony_ci                      * find transitions which indicate a conflict
7653aa9179Sopenharmony_ci                      */
7753aa9179Sopenharmony_ci 		    if (xmlFACompareAtoms(t1->atom, t2->atom, 1)) {
7853aa9179Sopenharmony_ci-			ret = 0;
7953aa9179Sopenharmony_ci+                        /*
8053aa9179Sopenharmony_ci+                         * Treat equal counter transitions that couldn't be
8153aa9179Sopenharmony_ci+                         * eliminated as deterministic.
8253aa9179Sopenharmony_ci+                         */
8353aa9179Sopenharmony_ci+                        if ((t1->to != t2->to) ||
8453aa9179Sopenharmony_ci+                            (t1->counter == t2->counter) ||
8553aa9179Sopenharmony_ci+                            (!xmlFAEqualAtoms(t1->atom, t2->atom, deep)))
8653aa9179Sopenharmony_ci+                            ret = 0;
8753aa9179Sopenharmony_ci 			/* mark the transitions as non-deterministic ones */
8853aa9179Sopenharmony_ci 			t1->nd = 1;
8953aa9179Sopenharmony_ci 			t2->nd = 1;
9053aa9179Sopenharmony_ci 			last = t1;
9153aa9179Sopenharmony_ci 		    }
9253aa9179Sopenharmony_ci 		} else {
9353aa9179Sopenharmony_ci+                    int res;
9453aa9179Sopenharmony_ci+
9553aa9179Sopenharmony_ci 		    /*
9653aa9179Sopenharmony_ci 		     * do the closure in case of remaining specific
9753aa9179Sopenharmony_ci 		     * epsilon transitions like choices or all
9853aa9179Sopenharmony_ci 		     */
9953aa9179Sopenharmony_ci-		    ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
10053aa9179Sopenharmony_ci-						   t2->to, t2->atom);
10153aa9179Sopenharmony_ci-                    xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
10253aa9179Sopenharmony_ci+		    res = xmlFARecurseDeterminism(ctxt, ctxt->states[t2->to],
10353aa9179Sopenharmony_ci+						  statenr, t1->to, t1->atom);
10453aa9179Sopenharmony_ci+                    xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t2->to]);
10553aa9179Sopenharmony_ci 		    /* don't shortcut the computation so all non deterministic
10653aa9179Sopenharmony_ci 		       transition get marked down
10753aa9179Sopenharmony_ci 		    if (ret == 0)
10853aa9179Sopenharmony_ci 			return(0);
10953aa9179Sopenharmony_ci 		     */
11053aa9179Sopenharmony_ci-		    if (ret == 0) {
11153aa9179Sopenharmony_ci+		    if (res == 0) {
11253aa9179Sopenharmony_ci 			t1->nd = 1;
11353aa9179Sopenharmony_ci 			/* t2->nd = 1; */
11453aa9179Sopenharmony_ci 			last = t1;
11553aa9179Sopenharmony_ci+                        ret = 0;
11653aa9179Sopenharmony_ci 		    }
11753aa9179Sopenharmony_ci 		}
11853aa9179Sopenharmony_ci 	    }
11953aa9179Sopenharmony_ci-- 
12053aa9179Sopenharmony_ci2.27.0
12153aa9179Sopenharmony_ci
122