1b5975d6bSopenharmony_ciFrom 6caf952e48dbed40b5dcff01a94f57ba079b526c Mon Sep 17 00:00:00 2001 2b5975d6bSopenharmony_ciFrom: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net> 3b5975d6bSopenharmony_ciDate: Tue, 20 Sep 2022 18:06:35 +0200 4b5975d6bSopenharmony_ciSubject: [PATCH] gregex: Use pcre2 error messages if we don't provide a 5b5975d6bSopenharmony_ci specific one 6b5975d6bSopenharmony_ci 7b5975d6bSopenharmony_ciIn case we got a compilation or match error we should try to provide 8b5975d6bSopenharmony_cisome useful error message, if possible, before returning a quite obscure 9b5975d6bSopenharmony_ci"internal error" or "unknown error" string. 10b5975d6bSopenharmony_ci 11b5975d6bSopenharmony_ciSo rely on PCRE2 strings even if they're not translated they can provide 12b5975d6bSopenharmony_cibetter information than the ones we're currently giving. 13b5975d6bSopenharmony_ci 14b5975d6bSopenharmony_ciRelated to: https://gitlab.gnome.org/GNOME/glib/-/issues/2691 15b5975d6bSopenharmony_ciRelated to: https://gitlab.gnome.org/GNOME/glib/-/issues/2760 16b5975d6bSopenharmony_ci 17b5975d6bSopenharmony_ciConflict:NA 18b5975d6bSopenharmony_ciReference:https://gitlab.gnome.org/GNOME/glib/-/commit/6caf952e48dbed40b5dcff01a94f57ba079b526c 19b5975d6bSopenharmony_ci 20b5975d6bSopenharmony_ci--- 21b5975d6bSopenharmony_ci glib/gregex.c | 64 ++++++++++++++++++++++++++++++++++++++++------ 22b5975d6bSopenharmony_ci glib/tests/regex.c | 2 ++ 23b5975d6bSopenharmony_ci 2 files changed, 58 insertions(+), 8 deletions(-) 24b5975d6bSopenharmony_ci 25b5975d6bSopenharmony_cidiff --git a/glib/gregex.c b/glib/gregex.c 26b5975d6bSopenharmony_ciindex 220a1a11ac..fcc28d62f4 100644 27b5975d6bSopenharmony_ci--- a/glib/gregex.c 28b5975d6bSopenharmony_ci+++ b/glib/gregex.c 29b5975d6bSopenharmony_ci@@ -456,8 +456,25 @@ get_pcre2_bsr_match_options (GRegexMatchFlags match_flags) 30b5975d6bSopenharmony_ci return 0; 31b5975d6bSopenharmony_ci } 32b5975d6bSopenharmony_ci 33b5975d6bSopenharmony_ci+static char * 34b5975d6bSopenharmony_ci+get_pcre2_error_string (int errcode) 35b5975d6bSopenharmony_ci+{ 36b5975d6bSopenharmony_ci+ PCRE2_UCHAR8 error_msg[2048]; 37b5975d6bSopenharmony_ci+ int err_length; 38b5975d6bSopenharmony_ci+ 39b5975d6bSopenharmony_ci+ err_length = pcre2_get_error_message (errcode, error_msg, 40b5975d6bSopenharmony_ci+ G_N_ELEMENTS (error_msg)); 41b5975d6bSopenharmony_ci+ 42b5975d6bSopenharmony_ci+ if (err_length <= 0) 43b5975d6bSopenharmony_ci+ return NULL; 44b5975d6bSopenharmony_ci+ 45b5975d6bSopenharmony_ci+ /* The array is always filled with a trailing zero */ 46b5975d6bSopenharmony_ci+ g_assert ((size_t) err_length < G_N_ELEMENTS (error_msg)); 47b5975d6bSopenharmony_ci+ return g_memdup2 (error_msg, err_length + 1); 48b5975d6bSopenharmony_ci+} 49b5975d6bSopenharmony_ci+ 50b5975d6bSopenharmony_ci static const gchar * 51b5975d6bSopenharmony_ci-match_error (gint errcode) 52b5975d6bSopenharmony_ci+translate_match_error (gint errcode) 53b5975d6bSopenharmony_ci { 54b5975d6bSopenharmony_ci switch (errcode) 55b5975d6bSopenharmony_ci { 56b5975d6bSopenharmony_ci@@ -511,7 +528,24 @@ match_error (gint errcode) 57b5975d6bSopenharmony_ci default: 58b5975d6bSopenharmony_ci break; 59b5975d6bSopenharmony_ci } 60b5975d6bSopenharmony_ci- return _("unknown error"); 61b5975d6bSopenharmony_ci+ return NULL; 62b5975d6bSopenharmony_ci+} 63b5975d6bSopenharmony_ci+ 64b5975d6bSopenharmony_ci+static char * 65b5975d6bSopenharmony_ci+get_match_error_message (int errcode) 66b5975d6bSopenharmony_ci+{ 67b5975d6bSopenharmony_ci+ const char *msg = translate_match_error (errcode); 68b5975d6bSopenharmony_ci+ char *error_string; 69b5975d6bSopenharmony_ci+ 70b5975d6bSopenharmony_ci+ if (msg) 71b5975d6bSopenharmony_ci+ return g_strdup (msg); 72b5975d6bSopenharmony_ci+ 73b5975d6bSopenharmony_ci+ error_string = get_pcre2_error_string (errcode); 74b5975d6bSopenharmony_ci+ 75b5975d6bSopenharmony_ci+ if (error_string) 76b5975d6bSopenharmony_ci+ return error_string; 77b5975d6bSopenharmony_ci+ 78b5975d6bSopenharmony_ci+ return g_strdup (_("unknown error")); 79b5975d6bSopenharmony_ci } 80b5975d6bSopenharmony_ci 81b5975d6bSopenharmony_ci static void 82b5975d6bSopenharmony_ci@@ -743,7 +777,6 @@ translate_compile_error (gint *errcode, const gchar **errmsg) 83b5975d6bSopenharmony_ci case PCRE2_ERROR_INTERNAL_BAD_CODE: 84b5975d6bSopenharmony_ci case PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP: 85b5975d6bSopenharmony_ci *errcode = G_REGEX_ERROR_INTERNAL; 86b5975d6bSopenharmony_ci- *errmsg = _("internal error"); 87b5975d6bSopenharmony_ci break; 88b5975d6bSopenharmony_ci case PCRE2_ERROR_INVALID_SUBPATTERN_NAME: 89b5975d6bSopenharmony_ci case PCRE2_ERROR_CLASS_INVALID_RANGE: 90b5975d6bSopenharmony_ci@@ -772,12 +805,10 @@ translate_compile_error (gint *errcode, const gchar **errmsg) 91b5975d6bSopenharmony_ci case PCRE2_ERROR_BAD_LITERAL_OPTIONS: 92b5975d6bSopenharmony_ci default: 93b5975d6bSopenharmony_ci *errcode = G_REGEX_ERROR_COMPILE; 94b5975d6bSopenharmony_ci- *errmsg = _("internal error"); 95b5975d6bSopenharmony_ci break; 96b5975d6bSopenharmony_ci } 97b5975d6bSopenharmony_ci 98b5975d6bSopenharmony_ci g_assert (*errcode != -1); 99b5975d6bSopenharmony_ci- g_assert (*errmsg != NULL); 100b5975d6bSopenharmony_ci } 101b5975d6bSopenharmony_ci 102b5975d6bSopenharmony_ci /* GMatchInfo */ 103b5975d6bSopenharmony_ci@@ -1096,9 +1127,12 @@ g_match_info_next (GMatchInfo *match_info, 104b5975d6bSopenharmony_ci 105b5975d6bSopenharmony_ci if (IS_PCRE2_ERROR (match_info->matches)) 106b5975d6bSopenharmony_ci { 107b5975d6bSopenharmony_ci+ gchar *error_msg = get_match_error_message (match_info->matches); 108b5975d6bSopenharmony_ci+ 109b5975d6bSopenharmony_ci g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, 110b5975d6bSopenharmony_ci _("Error while matching regular expression %s: %s"), 111b5975d6bSopenharmony_ci- match_info->regex->pattern, match_error (match_info->matches)); 112b5975d6bSopenharmony_ci+ match_info->regex->pattern, error_msg); 113b5975d6bSopenharmony_ci+ g_clear_pointer (&error_msg, g_free); 114b5975d6bSopenharmony_ci return FALSE; 115b5975d6bSopenharmony_ci } 116b5975d6bSopenharmony_ci else if (match_info->matches == 0) 117b5975d6bSopenharmony_ci@@ -1800,11 +1834,20 @@ regex_compile (const gchar *pattern, 118b5975d6bSopenharmony_ci { 119b5975d6bSopenharmony_ci GError *tmp_error; 120b5975d6bSopenharmony_ci gchar *offset_str; 121b5975d6bSopenharmony_ci+ gchar *pcre2_errmsg = NULL; 122b5975d6bSopenharmony_ci+ int original_errcode; 123b5975d6bSopenharmony_ci 124b5975d6bSopenharmony_ci /* Translate the PCRE error code to GRegexError and use a translated 125b5975d6bSopenharmony_ci * error message if possible */ 126b5975d6bSopenharmony_ci+ original_errcode = errcode; 127b5975d6bSopenharmony_ci translate_compile_error (&errcode, &errmsg); 128b5975d6bSopenharmony_ci 129b5975d6bSopenharmony_ci+ if (!errmsg) 130b5975d6bSopenharmony_ci+ { 131b5975d6bSopenharmony_ci+ errmsg = _("unknown error"); 132b5975d6bSopenharmony_ci+ pcre2_errmsg = get_pcre2_error_string (original_errcode); 133b5975d6bSopenharmony_ci+ } 134b5975d6bSopenharmony_ci+ 135b5975d6bSopenharmony_ci /* PCRE uses byte offsets but we want to show character offsets */ 136b5975d6bSopenharmony_ci erroffset = g_utf8_pointer_to_offset (pattern, &pattern[erroffset]); 137b5975d6bSopenharmony_ci 138b5975d6bSopenharmony_ci@@ -1812,9 +1855,11 @@ regex_compile (const gchar *pattern, 139b5975d6bSopenharmony_ci tmp_error = g_error_new (G_REGEX_ERROR, errcode, 140b5975d6bSopenharmony_ci _("Error while compiling regular expression ‘%s’ " 141b5975d6bSopenharmony_ci "at char %s: %s"), 142b5975d6bSopenharmony_ci- pattern, offset_str, errmsg); 143b5975d6bSopenharmony_ci+ pattern, offset_str, 144b5975d6bSopenharmony_ci+ pcre2_errmsg ? pcre2_errmsg : errmsg); 145b5975d6bSopenharmony_ci g_propagate_error (error, tmp_error); 146b5975d6bSopenharmony_ci g_free (offset_str); 147b5975d6bSopenharmony_ci+ g_clear_pointer (&pcre2_errmsg, g_free); 148b5975d6bSopenharmony_ci 149b5975d6bSopenharmony_ci return NULL; 150b5975d6bSopenharmony_ci } 151b5975d6bSopenharmony_ci@@ -2402,9 +2447,12 @@ g_regex_match_all_full (const GRegex *regex, 152b5975d6bSopenharmony_ci } 153b5975d6bSopenharmony_ci else if (IS_PCRE2_ERROR (info->matches)) 154b5975d6bSopenharmony_ci { 155b5975d6bSopenharmony_ci+ gchar *error_msg = get_match_error_message (info->matches); 156b5975d6bSopenharmony_ci+ 157b5975d6bSopenharmony_ci g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, 158b5975d6bSopenharmony_ci _("Error while matching regular expression %s: %s"), 159b5975d6bSopenharmony_ci- regex->pattern, match_error (info->matches)); 160b5975d6bSopenharmony_ci+ regex->pattern, error_msg); 161b5975d6bSopenharmony_ci+ g_clear_pointer (&error_msg, g_free); 162b5975d6bSopenharmony_ci } 163b5975d6bSopenharmony_ci else if (info->matches != PCRE2_ERROR_NOMATCH) 164b5975d6bSopenharmony_ci { 165b5975d6bSopenharmony_cidiff --git a/glib/tests/regex.c b/glib/tests/regex.c 166b5975d6bSopenharmony_ciindex 9803d49659..52af212f29 100644 167b5975d6bSopenharmony_ci--- a/glib/tests/regex.c 168b5975d6bSopenharmony_ci+++ b/glib/tests/regex.c 169b5975d6bSopenharmony_ci@@ -2560,6 +2560,7 @@ main (int argc, char *argv[]) 170b5975d6bSopenharmony_ci TEST_NEW_FAIL ("[a-z", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS); 171b5975d6bSopenharmony_ci TEST_NEW_FAIL ("[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS); 172b5975d6bSopenharmony_ci TEST_NEW_FAIL ("[z-a]", 0, G_REGEX_ERROR_RANGE_OUT_OF_ORDER); 173b5975d6bSopenharmony_ci+ TEST_NEW_FAIL ("^[[:alnum:]-_.]+$", 0, G_REGEX_ERROR_COMPILE); 174b5975d6bSopenharmony_ci TEST_NEW_FAIL ("{2,4}", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT); 175b5975d6bSopenharmony_ci TEST_NEW_FAIL ("a(?u)", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER); 176b5975d6bSopenharmony_ci TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME); 177b5975d6bSopenharmony_ci@@ -2636,6 +2637,7 @@ main (int argc, char *argv[]) 178b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE("a", "a", G_REGEX_CASELESS, 0, TRUE); 179b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE("a", "A", G_REGEX_CASELESS, 0, TRUE); 180b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE("\\C\\C", "ab", G_REGEX_OPTIMIZE | G_REGEX_RAW, 0, TRUE); 181b5975d6bSopenharmony_ci+ TEST_MATCH_SIMPLE("^[[:alnum:]\\-_.]+$", "admin-foo", 0, 0, TRUE); 182b5975d6bSopenharmony_ci /* These are needed to test extended properties. */ 183b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE(AGRAVE, AGRAVE, G_REGEX_CASELESS, 0, TRUE); 184b5975d6bSopenharmony_ci TEST_MATCH_SIMPLE(AGRAVE, AGRAVE_UPPER, G_REGEX_CASELESS, 0, TRUE); 185b5975d6bSopenharmony_ci-- 186b5975d6bSopenharmony_ciGitLab 187b5975d6bSopenharmony_ci 188