1b5975d6bSopenharmony_ciFrom bec68b2d74853de5e23ee40c890433fa336ffbc5 Mon Sep 17 00:00:00 2001 2b5975d6bSopenharmony_ciFrom: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net> 3b5975d6bSopenharmony_ciDate: Fri, 9 Sep 2022 18:30:15 +0200 4b5975d6bSopenharmony_ciSubject: [PATCH] glib/regex: Do not use JIT when using unsupported match 5b5975d6bSopenharmony_ci options 6b5975d6bSopenharmony_ci 7b5975d6bSopenharmony_ciDo not store jit status for regex unless during initial compilation. 8b5975d6bSopenharmony_ciAfter that, decide whether to use it depending on matching options. 9b5975d6bSopenharmony_ci 10b5975d6bSopenharmony_ciIn fact there are some matching options that are incompatible with JIT, 11b5975d6bSopenharmony_cias the PCRE2 docs states: 12b5975d6bSopenharmony_ci 13b5975d6bSopenharmony_ci Setting PCRE2_ANCHORED or PCRE2_ENDANCHORED at match time is not 14b5975d6bSopenharmony_ci supported by the just-in-time (JIT) compiler. If it is set, JIT 15b5975d6bSopenharmony_ci matching is disabled and the interpretive code in pcre2_match() is 16b5975d6bSopenharmony_ci run. Apart from PCRE2_NO_JIT (obviously), the remaining options are 17b5975d6bSopenharmony_ci supported for JIT matching. 18b5975d6bSopenharmony_ci 19b5975d6bSopenharmony_ciFixes: GNOME/gtksourceview#283 20b5975d6bSopenharmony_ci--- 21b5975d6bSopenharmony_ci glib/gregex.c | 38 ++++++++++++++++--------- 22b5975d6bSopenharmony_ci glib/tests/regex.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 23b5975d6bSopenharmony_ci 2 files changed, 94 insertions(+), 13 deletions(-) 24b5975d6bSopenharmony_ci 25b5975d6bSopenharmony_cidiff --git a/glib/gregex.c b/glib/gregex.c 26b5975d6bSopenharmony_ciindex fe7473e628..220a1a11ac 100644 27b5975d6bSopenharmony_ci--- a/glib/gregex.c 28b5975d6bSopenharmony_ci+++ b/glib/gregex.c 29b5975d6bSopenharmony_ci@@ -201,6 +201,13 @@ 30b5975d6bSopenharmony_ci PCRE2_NEWLINE_CRLF | \ 31b5975d6bSopenharmony_ci PCRE2_NEWLINE_ANYCRLF) 32b5975d6bSopenharmony_ci 33b5975d6bSopenharmony_ci+/* Some match options are not supported when using JIT as stated in the 34b5975d6bSopenharmony_ci+ * pcre2jit man page under the 芦UNSUPPORTED OPTIONS AND PATTERN ITEMS禄 section: 35b5975d6bSopenharmony_ci+ * https://www.pcre.org/current/doc/html/pcre2jit.html#SEC5 36b5975d6bSopenharmony_ci+ */ 37b5975d6bSopenharmony_ci+#define G_REGEX_PCRE2_JIT_UNSUPPORTED_OPTIONS (PCRE2_ANCHORED | \ 38b5975d6bSopenharmony_ci+ PCRE2_ENDANCHORED) 39b5975d6bSopenharmony_ci+ 40b5975d6bSopenharmony_ci #define G_REGEX_COMPILE_NEWLINE_MASK (G_REGEX_NEWLINE_CR | \ 41b5975d6bSopenharmony_ci G_REGEX_NEWLINE_LF | \ 42b5975d6bSopenharmony_ci G_REGEX_NEWLINE_CRLF | \ 43b5975d6bSopenharmony_ci@@ -869,7 +876,7 @@ recalc_match_offsets (GMatchInfo *match_info, 44b5975d6bSopenharmony_ci return TRUE; 45b5975d6bSopenharmony_ci } 46b5975d6bSopenharmony_ci 47b5975d6bSopenharmony_ci-static void 48b5975d6bSopenharmony_ci+static JITStatus 49b5975d6bSopenharmony_ci enable_jit_with_match_options (GRegex *regex, 50b5975d6bSopenharmony_ci uint32_t match_options) 51b5975d6bSopenharmony_ci { 52b5975d6bSopenharmony_ci@@ -877,9 +884,13 @@ enable_jit_with_match_options (GRegex *regex, 53b5975d6bSopenharmony_ci uint32_t old_jit_options, new_jit_options; 54b5975d6bSopenharmony_ci 55b5975d6bSopenharmony_ci if (!(regex->orig_compile_opts & G_REGEX_OPTIMIZE)) 56b5975d6bSopenharmony_ci- return; 57b5975d6bSopenharmony_ci+ return JIT_STATUS_DISABLED; 58b5975d6bSopenharmony_ci+ 59b5975d6bSopenharmony_ci if (regex->jit_status == JIT_STATUS_DISABLED) 60b5975d6bSopenharmony_ci- return; 61b5975d6bSopenharmony_ci+ return JIT_STATUS_DISABLED; 62b5975d6bSopenharmony_ci+ 63b5975d6bSopenharmony_ci+ if (match_options & G_REGEX_PCRE2_JIT_UNSUPPORTED_OPTIONS) 64b5975d6bSopenharmony_ci+ return JIT_STATUS_DISABLED; 65b5975d6bSopenharmony_ci 66b5975d6bSopenharmony_ci old_jit_options = regex->jit_options; 67b5975d6bSopenharmony_ci new_jit_options = old_jit_options | PCRE2_JIT_COMPLETE; 68b5975d6bSopenharmony_ci@@ -890,34 +901,34 @@ enable_jit_with_match_options (GRegex *regex, 69b5975d6bSopenharmony_ci 70b5975d6bSopenharmony_ci /* no new options enabled */ 71b5975d6bSopenharmony_ci if (new_jit_options == old_jit_options) 72b5975d6bSopenharmony_ci- return; 73b5975d6bSopenharmony_ci+ return regex->jit_status; 74b5975d6bSopenharmony_ci 75b5975d6bSopenharmony_ci retval = pcre2_jit_compile (regex->pcre_re, new_jit_options); 76b5975d6bSopenharmony_ci switch (retval) 77b5975d6bSopenharmony_ci { 78b5975d6bSopenharmony_ci case 0: /* JIT enabled successfully */ 79b5975d6bSopenharmony_ci- regex->jit_status = JIT_STATUS_ENABLED; 80b5975d6bSopenharmony_ci regex->jit_options = new_jit_options; 81b5975d6bSopenharmony_ci- break; 82b5975d6bSopenharmony_ci+ return JIT_STATUS_ENABLED; 83b5975d6bSopenharmony_ci case PCRE2_ERROR_NOMEMORY: 84b5975d6bSopenharmony_ci g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 85b5975d6bSopenharmony_ci "but JIT was unable to allocate executable memory for the " 86b5975d6bSopenharmony_ci "compiler. Falling back to interpretive code."); 87b5975d6bSopenharmony_ci- regex->jit_status = JIT_STATUS_DISABLED; 88b5975d6bSopenharmony_ci- break; 89b5975d6bSopenharmony_ci+ return JIT_STATUS_DISABLED; 90b5975d6bSopenharmony_ci case PCRE2_ERROR_JIT_BADOPTION: 91b5975d6bSopenharmony_ci g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 92b5975d6bSopenharmony_ci "but JIT support is not available. Falling back to " 93b5975d6bSopenharmony_ci "interpretive code."); 94b5975d6bSopenharmony_ci- regex->jit_status = JIT_STATUS_DISABLED; 95b5975d6bSopenharmony_ci+ return JIT_STATUS_DISABLED; 96b5975d6bSopenharmony_ci break; 97b5975d6bSopenharmony_ci default: 98b5975d6bSopenharmony_ci g_debug ("JIT compilation was requested with G_REGEX_OPTIMIZE, " 99b5975d6bSopenharmony_ci "but request for JIT support had unexpectedly failed (error %d). " 100b5975d6bSopenharmony_ci "Falling back to interpretive code.", retval); 101b5975d6bSopenharmony_ci- regex->jit_status = JIT_STATUS_DISABLED; 102b5975d6bSopenharmony_ci+ return JIT_STATUS_DISABLED; 103b5975d6bSopenharmony_ci break; 104b5975d6bSopenharmony_ci } 105b5975d6bSopenharmony_ci+ 106b5975d6bSopenharmony_ci+ return regex->jit_status; 107b5975d6bSopenharmony_ci } 108b5975d6bSopenharmony_ci 109b5975d6bSopenharmony_ci /** 110b5975d6bSopenharmony_ci@@ -1039,6 +1050,7 @@ gboolean 111b5975d6bSopenharmony_ci g_match_info_next (GMatchInfo *match_info, 112b5975d6bSopenharmony_ci GError **error) 113b5975d6bSopenharmony_ci { 114b5975d6bSopenharmony_ci+ JITStatus jit_status; 115b5975d6bSopenharmony_ci gint prev_match_start; 116b5975d6bSopenharmony_ci gint prev_match_end; 117b5975d6bSopenharmony_ci uint32_t opts; 118b5975d6bSopenharmony_ci@@ -1060,8 +1072,8 @@ g_match_info_next (GMatchInfo *match_info, 119b5975d6bSopenharmony_ci 120b5975d6bSopenharmony_ci opts = match_info->regex->match_opts | match_info->match_opts; 121b5975d6bSopenharmony_ci 122b5975d6bSopenharmony_ci- enable_jit_with_match_options (match_info->regex, opts); 123b5975d6bSopenharmony_ci- if (match_info->regex->jit_status == JIT_STATUS_ENABLED) 124b5975d6bSopenharmony_ci+ jit_status = enable_jit_with_match_options (match_info->regex, opts); 125b5975d6bSopenharmony_ci+ if (jit_status == JIT_STATUS_ENABLED) 126b5975d6bSopenharmony_ci { 127b5975d6bSopenharmony_ci match_info->matches = pcre2_jit_match (match_info->regex->pcre_re, 128b5975d6bSopenharmony_ci (PCRE2_SPTR8) match_info->string, 129b5975d6bSopenharmony_ci@@ -1727,7 +1739,7 @@ g_regex_new (const gchar *pattern, 130b5975d6bSopenharmony_ci regex->orig_compile_opts = compile_options; 131b5975d6bSopenharmony_ci regex->match_opts = pcre_match_options; 132b5975d6bSopenharmony_ci regex->orig_match_opts = match_options; 133b5975d6bSopenharmony_ci- enable_jit_with_match_options (regex, regex->match_opts); 134b5975d6bSopenharmony_ci+ regex->jit_status = enable_jit_with_match_options (regex, regex->match_opts); 135b5975d6bSopenharmony_ci 136b5975d6bSopenharmony_ci return regex; 137b5975d6bSopenharmony_ci } 138b5975d6bSopenharmony_cidiff --git a/glib/tests/regex.c b/glib/tests/regex.c 139b5975d6bSopenharmony_ciindex 26844d63a7..2052ba0204 100644 140b5975d6bSopenharmony_ci--- a/glib/tests/regex.c 141b5975d6bSopenharmony_ci+++ b/glib/tests/regex.c 142b5975d6bSopenharmony_ci@@ -2334,6 +2334,67 @@ test_compile_errors (void) 143b5975d6bSopenharmony_ci g_clear_error (&error); 144b5975d6bSopenharmony_ci } 145b5975d6bSopenharmony_ci 146b5975d6bSopenharmony_ci+static void 147b5975d6bSopenharmony_ci+test_jit_unsupported_matching_options (void) 148b5975d6bSopenharmony_ci+{ 149b5975d6bSopenharmony_ci+ GRegex *regex; 150b5975d6bSopenharmony_ci+ GMatchInfo *info; 151b5975d6bSopenharmony_ci+ gchar *substring; 152b5975d6bSopenharmony_ci+ 153b5975d6bSopenharmony_ci+ regex = g_regex_new ("(\\w+)#(\\w+)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, NULL); 154b5975d6bSopenharmony_ci+ 155b5975d6bSopenharmony_ci+ g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_DEFAULT, &info)); 156b5975d6bSopenharmony_ci+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 157b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 1); 158b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "aa"); 159b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 160b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 2); 161b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "bb"); 162b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 163b5975d6bSopenharmony_ci+ g_assert_true (g_match_info_next (info, NULL)); 164b5975d6bSopenharmony_ci+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 165b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 1); 166b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "cc"); 167b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 168b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 2); 169b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "dd"); 170b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 171b5975d6bSopenharmony_ci+ g_assert_false (g_match_info_next (info, NULL)); 172b5975d6bSopenharmony_ci+ g_match_info_free (info); 173b5975d6bSopenharmony_ci+ 174b5975d6bSopenharmony_ci+ g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_ANCHORED, &info)); 175b5975d6bSopenharmony_ci+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 176b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 1); 177b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "aa"); 178b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 179b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 2); 180b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "bb"); 181b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 182b5975d6bSopenharmony_ci+ g_assert_false (g_match_info_next (info, NULL)); 183b5975d6bSopenharmony_ci+ g_match_info_free (info); 184b5975d6bSopenharmony_ci+ 185b5975d6bSopenharmony_ci+ g_assert_true (g_regex_match (regex, "aa#bb cc#dd", G_REGEX_MATCH_DEFAULT, &info)); 186b5975d6bSopenharmony_ci+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 187b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 1); 188b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "aa"); 189b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 190b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 2); 191b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "bb"); 192b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 193b5975d6bSopenharmony_ci+ g_assert_true (g_match_info_next (info, NULL)); 194b5975d6bSopenharmony_ci+ g_assert_cmpint (g_match_info_get_match_count (info), ==, 3); 195b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 1); 196b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "cc"); 197b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 198b5975d6bSopenharmony_ci+ substring = g_match_info_fetch (info, 2); 199b5975d6bSopenharmony_ci+ g_assert_cmpstr (substring, ==, "dd"); 200b5975d6bSopenharmony_ci+ g_clear_pointer (&substring, g_free); 201b5975d6bSopenharmony_ci+ g_assert_false (g_match_info_next (info, NULL)); 202b5975d6bSopenharmony_ci+ g_match_info_free (info); 203b5975d6bSopenharmony_ci+ 204b5975d6bSopenharmony_ci+ g_regex_unref (regex); 205b5975d6bSopenharmony_ci+} 206b5975d6bSopenharmony_ci+ 207b5975d6bSopenharmony_ci int 208b5975d6bSopenharmony_ci main (int argc, char *argv[]) 209b5975d6bSopenharmony_ci { 210b5975d6bSopenharmony_ci@@ -2352,6 +2413,7 @@ main (int argc, char *argv[]) 211b5975d6bSopenharmony_ci g_test_add_func ("/regex/explicit-crlf", test_explicit_crlf); 212b5975d6bSopenharmony_ci g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind); 213b5975d6bSopenharmony_ci g_test_add_func ("/regex/compile-errors", test_compile_errors); 214b5975d6bSopenharmony_ci+ g_test_add_func ("/regex/jit-unsupported-matching", test_jit_unsupported_matching_options); 215b5975d6bSopenharmony_ci 216b5975d6bSopenharmony_ci /* TEST_NEW(pattern, compile_opts, match_opts) */ 217b5975d6bSopenharmony_ci TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL); 218b5975d6bSopenharmony_ci@@ -2488,6 +2550,7 @@ main (int argc, char *argv[]) 219b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE("a", "ab", 0, G_REGEX_MATCH_ANCHORED, TRUE); 220b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE("a", "a", G_REGEX_CASELESS, 0, TRUE); 221b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE("a", "A", G_REGEX_CASELESS, 0, TRUE); 222b5975d6bSopenharmony_ci+ TEST_MATCH_SIMPLE("\\C\\C", "ab", G_REGEX_OPTIMIZE | G_REGEX_RAW, 0, TRUE); 223b5975d6bSopenharmony_ci /* These are needed to test extended properties. */ 224b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE(AGRAVE, AGRAVE, G_REGEX_CASELESS, 0, TRUE); 225b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE(AGRAVE, AGRAVE_UPPER, G_REGEX_CASELESS, 0, TRUE); 226b5975d6bSopenharmony_ci@@ -2947,6 +3010,12 @@ main (int argc, char *argv[]) 227b5975d6bSopenharmony_ci TEST_REPLACE("\\S+", "hello world", 0, "\\U-\\0-", "-HELLO- -WORLD-"); 228b5975d6bSopenharmony_ci TEST_REPLACE(".", "a", 0, "\\A", NULL); 229b5975d6bSopenharmony_ci TEST_REPLACE(".", "a", 0, "\\g", NULL); 230b5975d6bSopenharmony_ci+ TEST_REPLACE_OPTIONS("(\\w+)#(\\w+)", "aa#bb cc#dd", 0, "\\2#\\1", "bb#aa dd#cc", 231b5975d6bSopenharmony_ci+ G_REGEX_OPTIMIZE|G_REGEX_MULTILINE|G_REGEX_CASELESS, 232b5975d6bSopenharmony_ci+ 0); 233b5975d6bSopenharmony_ci+ TEST_REPLACE_OPTIONS("(\\w+)#(\\w+)", "aa#bb cc#dd", 0, "\\2#\\1", "bb#aa cc#dd", 234b5975d6bSopenharmony_ci+ G_REGEX_OPTIMIZE|G_REGEX_MULTILINE|G_REGEX_CASELESS, 235b5975d6bSopenharmony_ci+ G_REGEX_MATCH_ANCHORED); 236b5975d6bSopenharmony_ci 237b5975d6bSopenharmony_ci /* TEST_REPLACE_LIT(pattern, string, start_position, replacement, expected) */ 238b5975d6bSopenharmony_ci TEST_REPLACE_LIT("a", "ababa", 0, "A", "AbAbA"); 239b5975d6bSopenharmony_ci-- 240b5975d6bSopenharmony_ciGitLab 241b5975d6bSopenharmony_ci 242