1b5975d6bSopenharmony_ciFrom bec68b2d74853de5e23ee40c890433fa336ffbc5 Mon Sep 17 00:00:00 2001
2b5975d6bSopenharmony_ciFrom: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
3b5975d6bSopenharmony_ciDate: Fri, 9 Sep 2022 18:30:15 +0200
4b5975d6bSopenharmony_ciSubject: [PATCH] glib/regex: Do not use JIT when using unsupported match
5b5975d6bSopenharmony_ci options
6b5975d6bSopenharmony_ci
7b5975d6bSopenharmony_ciDo not store jit status for regex unless during initial compilation.
8b5975d6bSopenharmony_ciAfter that, decide whether to use it depending on matching options.
9b5975d6bSopenharmony_ci
10b5975d6bSopenharmony_ciIn fact there are some matching options that are incompatible with JIT,
11b5975d6bSopenharmony_cias the PCRE2 docs states:
12b5975d6bSopenharmony_ci
13b5975d6bSopenharmony_ci  Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not
14b5975d6bSopenharmony_ci  supported by the just-in-time (JIT) compiler. If it is set, JIT
15b5975d6bSopenharmony_ci  matching is disabled and the interpretive code in pcre2_match() is
16b5975d6bSopenharmony_ci  run. Apart from PCRE2_NO_JIT (obviously), the remaining options are
17b5975d6bSopenharmony_ci  supported for JIT matching.
18b5975d6bSopenharmony_ci
19b5975d6bSopenharmony_ciFixes: GNOME/gtksourceview#283
20b5975d6bSopenharmony_ci---
21b5975d6bSopenharmony_ci glib/gregex.c      | 38 ++++++++++++++++---------
22b5975d6bSopenharmony_ci glib/tests/regex.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++
23b5975d6bSopenharmony_ci 2 files changed, 94 insertions(+), 13 deletions(-)
24b5975d6bSopenharmony_ci
25b5975d6bSopenharmony_cidiff --git a/glib/gregex.c b/glib/gregex.c
26b5975d6bSopenharmony_ciindex fe7473e628..220a1a11ac 100644
27b5975d6bSopenharmony_ci--- a/glib/gregex.c
28b5975d6bSopenharmony_ci+++ b/glib/gregex.c
29b5975d6bSopenharmony_ci@@ -201,6 +201,13 @@
30b5975d6bSopenharmony_ci                               PCRE2_NEWLINE_CRLF |   \
31b5975d6bSopenharmony_ci                               PCRE2_NEWLINE_ANYCRLF)
32b5975d6bSopenharmony_ci 
33b5975d6bSopenharmony_ci+/* Some match options are not supported when using JIT as stated in the
34b5975d6bSopenharmony_ci+ * pcre2jit man page under the 芦UNSUPPORTED OPTIONS AND PATTERN ITEMS禄 section:
35b5975d6bSopenharmony_ci+ *   https://www.pcre.org/current/doc/html/pcre2jit.html#SEC5
36b5975d6bSopenharmony_ci+ */
37b5975d6bSopenharmony_ci+#define G_REGEX_PCRE2_JIT_UNSUPPORTED_OPTIONS (PCRE2_ANCHORED | \
38b5975d6bSopenharmony_ci+                                               PCRE2_ENDANCHORED)
39b5975d6bSopenharmony_ci+
40b5975d6bSopenharmony_ci #define G_REGEX_COMPILE_NEWLINE_MASK (G_REGEX_NEWLINE_CR      | \
41b5975d6bSopenharmony_ci                                       G_REGEX_NEWLINE_LF      | \
42b5975d6bSopenharmony_ci                                       G_REGEX_NEWLINE_CRLF    | \
43b5975d6bSopenharmony_ci@@ -869,7 +876,7 @@ recalc_match_offsets (GMatchInfo *match_info,
44b5975d6bSopenharmony_ci   return TRUE;
45b5975d6bSopenharmony_ci }
46b5975d6bSopenharmony_ci 
47b5975d6bSopenharmony_ci-static void
48b5975d6bSopenharmony_ci+static JITStatus
49b5975d6bSopenharmony_ci enable_jit_with_match_options (GRegex   *regex,
50b5975d6bSopenharmony_ci                                uint32_t  match_options)
51b5975d6bSopenharmony_ci {
52b5975d6bSopenharmony_ci@@ -877,9 +884,13 @@ enable_jit_with_match_options (GRegex   *regex,
53b5975d6bSopenharmony_ci   uint32_t old_jit_options, new_jit_options;
54b5975d6bSopenharmony_ci 
55b5975d6bSopenharmony_ci   if (!(regex->orig_compile_opts & G_REGEX_OPTIMIZE))
56b5975d6bSopenharmony_ci-    return;
57b5975d6bSopenharmony_ci+    return JIT_STATUS_DISABLED;
58b5975d6bSopenharmony_ci+
59b5975d6bSopenharmony_ci   if (regex->jit_status == JIT_STATUS_DISABLED)
60b5975d6bSopenharmony_ci-    return;
61b5975d6bSopenharmony_ci+    return JIT_STATUS_DISABLED;
62b5975d6bSopenharmony_ci+
63b5975d6bSopenharmony_ci+  if (match_options & G_REGEX_PCRE2_JIT_UNSUPPORTED_OPTIONS)
64b5975d6bSopenharmony_ci+    return JIT_STATUS_DISABLED;
65b5975d6bSopenharmony_ci 
66b5975d6bSopenharmony_ci   old_jit_options = regex->jit_options;
67b5975d6bSopenharmony_ci   new_jit_options = old_jit_options | PCRE2_JIT_COMPLETE;
68b5975d6bSopenharmony_ci@@ -890,34 +901,34 @@ enable_jit_with_match_options (GRegex   *regex,
69b5975d6bSopenharmony_ci 
70b5975d6bSopenharmony_ci   /* no new options enabled */
71b5975d6bSopenharmony_ci   if (new_jit_options == old_jit_options)
72b5975d6bSopenharmony_ci-    return;
73b5975d6bSopenharmony_ci+    return regex->jit_status;
74b5975d6bSopenharmony_ci 
75b5975d6bSopenharmony_ci   retval = pcre2_jit_compile (regex->pcre_re, new_jit_options);
76b5975d6bSopenharmony_ci   switch (retval)
77b5975d6bSopenharmony_ci     {
78b5975d6bSopenharmony_ci     case 0: /* JIT enabled successfully */
79b5975d6bSopenharmony_ci-      regex->jit_status = JIT_STATUS_ENABLED;
80b5975d6bSopenharmony_ci       regex->jit_options = new_jit_options;
81b5975d6bSopenharmony_ci-      break;
82b5975d6bSopenharmony_ci+      return JIT_STATUS_ENABLED;
83b5975d6bSopenharmony_ci     case PCRE2_ERROR_NOMEMORY:
84b5975d6bSopenharmony_ci       g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, "
85b5975d6bSopenharmony_ci                "but JIT was unable to allocate executable memory for the "
86b5975d6bSopenharmony_ci                "compiler. Falling back to interpretive code.");
87b5975d6bSopenharmony_ci-      regex->jit_status = JIT_STATUS_DISABLED;
88b5975d6bSopenharmony_ci-      break;
89b5975d6bSopenharmony_ci+      return JIT_STATUS_DISABLED;
90b5975d6bSopenharmony_ci     case PCRE2_ERROR_JIT_BADOPTION:
91b5975d6bSopenharmony_ci       g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, "
92b5975d6bSopenharmony_ci                "but JIT support is not available. Falling back to "
93b5975d6bSopenharmony_ci                "interpretive code.");
94b5975d6bSopenharmony_ci-      regex->jit_status = JIT_STATUS_DISABLED;
95b5975d6bSopenharmony_ci+      return JIT_STATUS_DISABLED;
96b5975d6bSopenharmony_ci       break;
97b5975d6bSopenharmony_ci     default:
98b5975d6bSopenharmony_ci       g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, "
99b5975d6bSopenharmony_ci                "but request for JIT support had unexpectedly failed (error %d). "
100b5975d6bSopenharmony_ci                "Falling back to interpretive code.", retval);
101b5975d6bSopenharmony_ci-      regex->jit_status = JIT_STATUS_DISABLED;
102b5975d6bSopenharmony_ci+      return JIT_STATUS_DISABLED;
103b5975d6bSopenharmony_ci       break;
104b5975d6bSopenharmony_ci     }
105b5975d6bSopenharmony_ci+
106b5975d6bSopenharmony_ci+  return regex->jit_status;
107b5975d6bSopenharmony_ci }
108b5975d6bSopenharmony_ci 
109b5975d6bSopenharmony_ci /**
110b5975d6bSopenharmony_ci@@ -1039,6 +1050,7 @@ gboolean
111b5975d6bSopenharmony_ci g_match_info_next (GMatchInfo  *match_info,
112b5975d6bSopenharmony_ci                    GError     **error)
113b5975d6bSopenharmony_ci {
114b5975d6bSopenharmony_ci+  JITStatus jit_status;
115b5975d6bSopenharmony_ci   gint prev_match_start;
116b5975d6bSopenharmony_ci   gint prev_match_end;
117b5975d6bSopenharmony_ci   uint32_t opts;
118b5975d6bSopenharmony_ci@@ -1060,8 +1072,8 @@ g_match_info_next (GMatchInfo  *match_info,
119b5975d6bSopenharmony_ci 
120b5975d6bSopenharmony_ci   opts = match_info->regex->match_opts | match_info->match_opts;
121b5975d6bSopenharmony_ci 
122b5975d6bSopenharmony_ci-  enable_jit_with_match_options (match_info->regex, opts);
123b5975d6bSopenharmony_ci-  if (match_info->regex->jit_status == JIT_STATUS_ENABLED)
124b5975d6bSopenharmony_ci+  jit_status = enable_jit_with_match_options (match_info->regex, opts);
125b5975d6bSopenharmony_ci+  if (jit_status == JIT_STATUS_ENABLED)
126b5975d6bSopenharmony_ci     {
127b5975d6bSopenharmony_ci       match_info->matches = pcre2_jit_match (match_info->regex->pcre_re,
128b5975d6bSopenharmony_ci                                              (PCRE2_SPTR8) match_info->string,
129b5975d6bSopenharmony_ci@@ -1727,7 +1739,7 @@ g_regex_new (const gchar         *pattern,
130b5975d6bSopenharmony_ci   regex->orig_compile_opts = compile_options;
131b5975d6bSopenharmony_ci   regex->match_opts = pcre_match_options;
132b5975d6bSopenharmony_ci   regex->orig_match_opts = match_options;
133b5975d6bSopenharmony_ci-  enable_jit_with_match_options (regex, regex->match_opts);
134b5975d6bSopenharmony_ci+  regex->jit_status = enable_jit_with_match_options (regex, regex->match_opts);
135b5975d6bSopenharmony_ci 
136b5975d6bSopenharmony_ci   return regex;
137b5975d6bSopenharmony_ci }
138b5975d6bSopenharmony_cidiff --git a/glib/tests/regex.c b/glib/tests/regex.c
139b5975d6bSopenharmony_ciindex 26844d63a7..2052ba0204 100644
140b5975d6bSopenharmony_ci--- a/glib/tests/regex.c
141b5975d6bSopenharmony_ci+++ b/glib/tests/regex.c
142b5975d6bSopenharmony_ci@@ -2334,6 +2334,67 @@ test_compile_errors (void)
143b5975d6bSopenharmony_ci   g_clear_error (&error);
144b5975d6bSopenharmony_ci }
145b5975d6bSopenharmony_ci 
146b5975d6bSopenharmony_ci+static void
147b5975d6bSopenharmony_ci+test_jit_unsupported_matching_options (void)
148b5975d6bSopenharmony_ci+{
149b5975d6bSopenharmony_ci+  GRegex *regex;
150b5975d6bSopenharmony_ci+  GMatchInfo *info;
151b5975d6bSopenharmony_ci+  gchar *substring;
152b5975d6bSopenharmony_ci+
153b5975d6bSopenharmony_ci+  regex = g_regex_new ("(\\w+)#(\\w+)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, NULL);
154b5975d6bSopenharmony_ci+
155b5975d6bSopenharmony_ci+  g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_DEFAULT, &info));
156b5975d6bSopenharmony_ci+  g_assert_cmpint (g_match_info_get_match_count (info), ==, 3);
157b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 1);
158b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "aa");
159b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
160b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 2);
161b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "bb");
162b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
163b5975d6bSopenharmony_ci+  g_assert_true (g_match_info_next (info, NULL));
164b5975d6bSopenharmony_ci+  g_assert_cmpint (g_match_info_get_match_count (info), ==, 3);
165b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 1);
166b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "cc");
167b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
168b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 2);
169b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "dd");
170b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
171b5975d6bSopenharmony_ci+  g_assert_false (g_match_info_next (info, NULL));
172b5975d6bSopenharmony_ci+  g_match_info_free (info);
173b5975d6bSopenharmony_ci+
174b5975d6bSopenharmony_ci+  g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_ANCHORED, &info));
175b5975d6bSopenharmony_ci+  g_assert_cmpint (g_match_info_get_match_count (info), ==, 3);
176b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 1);
177b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "aa");
178b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
179b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 2);
180b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "bb");
181b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
182b5975d6bSopenharmony_ci+  g_assert_false (g_match_info_next (info, NULL));
183b5975d6bSopenharmony_ci+  g_match_info_free (info);
184b5975d6bSopenharmony_ci+
185b5975d6bSopenharmony_ci+  g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_DEFAULT, &info));
186b5975d6bSopenharmony_ci+  g_assert_cmpint (g_match_info_get_match_count (info), ==, 3);
187b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 1);
188b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "aa");
189b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
190b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 2);
191b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "bb");
192b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
193b5975d6bSopenharmony_ci+  g_assert_true (g_match_info_next (info, NULL));
194b5975d6bSopenharmony_ci+  g_assert_cmpint (g_match_info_get_match_count (info), ==, 3);
195b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 1);
196b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "cc");
197b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
198b5975d6bSopenharmony_ci+  substring = g_match_info_fetch (info, 2);
199b5975d6bSopenharmony_ci+  g_assert_cmpstr (substring, ==, "dd");
200b5975d6bSopenharmony_ci+  g_clear_pointer (&substring, g_free);
201b5975d6bSopenharmony_ci+  g_assert_false (g_match_info_next (info, NULL));
202b5975d6bSopenharmony_ci+  g_match_info_free (info);
203b5975d6bSopenharmony_ci+
204b5975d6bSopenharmony_ci+  g_regex_unref (regex);
205b5975d6bSopenharmony_ci+}
206b5975d6bSopenharmony_ci+
207b5975d6bSopenharmony_ci int
208b5975d6bSopenharmony_ci main (int argc, char *argv[])
209b5975d6bSopenharmony_ci {
210b5975d6bSopenharmony_ci@@ -2352,6 +2413,7 @@ main (int argc, char *argv[])
211b5975d6bSopenharmony_ci   g_test_add_func ("/regex/explicit-crlf", test_explicit_crlf);
212b5975d6bSopenharmony_ci   g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind);
213b5975d6bSopenharmony_ci   g_test_add_func ("/regex/compile-errors", test_compile_errors);
214b5975d6bSopenharmony_ci+  g_test_add_func ("/regex/jit-unsupported-matching", test_jit_unsupported_matching_options);
215b5975d6bSopenharmony_ci 
216b5975d6bSopenharmony_ci   /* TEST_NEW(pattern, compile_opts, match_opts) */
217b5975d6bSopenharmony_ci   TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL);
218b5975d6bSopenharmony_ci@@ -2488,6 +2550,7 @@ main (int argc, char *argv[])
219b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE("a", "ab", 0, G_REGEX_MATCH_ANCHORED, TRUE);
220b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE("a", "a", G_REGEX_CASELESS, 0, TRUE);
221b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE("a", "A", G_REGEX_CASELESS, 0, TRUE);
222b5975d6bSopenharmony_ci+  TEST_MATCH_SIMPLE("\\C\\C", "ab", G_REGEX_OPTIMIZE | G_REGEX_RAW, 0, TRUE);
223b5975d6bSopenharmony_ci   /* These are needed to test extended properties. */
224b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE(AGRAVE, AGRAVE, G_REGEX_CASELESS, 0, TRUE);
225b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE(AGRAVE, AGRAVE_UPPER, G_REGEX_CASELESS, 0, TRUE);
226b5975d6bSopenharmony_ci@@ -2947,6 +3010,12 @@ main (int argc, char *argv[])
227b5975d6bSopenharmony_ci   TEST_REPLACE("\\S+", "hello world", 0, "\\U-\\0-", "-HELLO- -WORLD-");
228b5975d6bSopenharmony_ci   TEST_REPLACE(".", "a", 0, "\\A", NULL);
229b5975d6bSopenharmony_ci   TEST_REPLACE(".", "a", 0, "\\g", NULL);
230b5975d6bSopenharmony_ci+  TEST_REPLACE_OPTIONS("(\\w+)#(\\w+)", "aa#bb cc#dd", 0, "\\2#\\1", "bb#aa dd#cc",
231b5975d6bSopenharmony_ci+                       G_REGEX_OPTIMIZE|G_REGEX_MULTILINE|G_REGEX_CASELESS,
232b5975d6bSopenharmony_ci+                       0);
233b5975d6bSopenharmony_ci+  TEST_REPLACE_OPTIONS("(\\w+)#(\\w+)", "aa#bb cc#dd", 0, "\\2#\\1", "bb#aa cc#dd",
234b5975d6bSopenharmony_ci+                       G_REGEX_OPTIMIZE|G_REGEX_MULTILINE|G_REGEX_CASELESS,
235b5975d6bSopenharmony_ci+                       G_REGEX_MATCH_ANCHORED);
236b5975d6bSopenharmony_ci 
237b5975d6bSopenharmony_ci   /* TEST_REPLACE_LIT(pattern, string, start_position, replacement, expected) */
238b5975d6bSopenharmony_ci   TEST_REPLACE_LIT("a", "ababa", 0, "A", "AbAbA");
239b5975d6bSopenharmony_ci-- 
240b5975d6bSopenharmony_ciGitLab
241b5975d6bSopenharmony_ci
242