1b5975d6bSopenharmony_ciFrom 6caf952e48dbed40b5dcff01a94f57ba079b526c Mon Sep 17 00:00:00 2001
2b5975d6bSopenharmony_ciFrom: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
3b5975d6bSopenharmony_ciDate: Tue, 20 Sep 2022 18:06:35 +0200
4b5975d6bSopenharmony_ciSubject: [PATCH] gregex: Use pcre2 error messages if we don't provide a
5b5975d6bSopenharmony_ci specific one
6b5975d6bSopenharmony_ci
7b5975d6bSopenharmony_ciIn case we got a compilation or match error we should try to provide
8b5975d6bSopenharmony_cisome useful error message, if possible, before returning a quite obscure
9b5975d6bSopenharmony_ci"internal error" or "unknown error" string.
10b5975d6bSopenharmony_ci
11b5975d6bSopenharmony_ciSo rely on PCRE2 strings even if they're not translated they can provide
12b5975d6bSopenharmony_cibetter information than the ones we're currently giving.
13b5975d6bSopenharmony_ci
14b5975d6bSopenharmony_ciRelated to: https://gitlab.gnome.org/GNOME/glib/-/issues/2691
15b5975d6bSopenharmony_ciRelated to: https://gitlab.gnome.org/GNOME/glib/-/issues/2760
16b5975d6bSopenharmony_ci
17b5975d6bSopenharmony_ciConflict:NA
18b5975d6bSopenharmony_ciReference:https://gitlab.gnome.org/GNOME/glib/-/commit/6caf952e48dbed40b5dcff01a94f57ba079b526c
19b5975d6bSopenharmony_ci
20b5975d6bSopenharmony_ci---
21b5975d6bSopenharmony_ci glib/gregex.c      | 64 ++++++++++++++++++++++++++++++++++++++++------
22b5975d6bSopenharmony_ci glib/tests/regex.c |  2 ++
23b5975d6bSopenharmony_ci 2 files changed, 58 insertions(+), 8 deletions(-)
24b5975d6bSopenharmony_ci
25b5975d6bSopenharmony_cidiff --git a/glib/gregex.c b/glib/gregex.c
26b5975d6bSopenharmony_ciindex 220a1a11ac..fcc28d62f4 100644
27b5975d6bSopenharmony_ci--- a/glib/gregex.c
28b5975d6bSopenharmony_ci+++ b/glib/gregex.c
29b5975d6bSopenharmony_ci@@ -456,8 +456,25 @@ get_pcre2_bsr_match_options (GRegexMatchFlags match_flags)
30b5975d6bSopenharmony_ci   return 0;
31b5975d6bSopenharmony_ci }
32b5975d6bSopenharmony_ci 
33b5975d6bSopenharmony_ci+static char *
34b5975d6bSopenharmony_ci+get_pcre2_error_string (int errcode)
35b5975d6bSopenharmony_ci+{
36b5975d6bSopenharmony_ci+  PCRE2_UCHAR8 error_msg[2048];
37b5975d6bSopenharmony_ci+  int err_length;
38b5975d6bSopenharmony_ci+
39b5975d6bSopenharmony_ci+  err_length = pcre2_get_error_message (errcode, error_msg,
40b5975d6bSopenharmony_ci+                                        G_N_ELEMENTS (error_msg));
41b5975d6bSopenharmony_ci+
42b5975d6bSopenharmony_ci+  if (err_length <= 0)
43b5975d6bSopenharmony_ci+    return NULL;
44b5975d6bSopenharmony_ci+
45b5975d6bSopenharmony_ci+  /* The array is always filled with a trailing zero */
46b5975d6bSopenharmony_ci+  g_assert ((size_t) err_length < G_N_ELEMENTS (error_msg));
47b5975d6bSopenharmony_ci+  return g_memdup2 (error_msg, err_length + 1);
48b5975d6bSopenharmony_ci+}
49b5975d6bSopenharmony_ci+
50b5975d6bSopenharmony_ci static const gchar *
51b5975d6bSopenharmony_ci-match_error (gint errcode)
52b5975d6bSopenharmony_ci+translate_match_error (gint errcode)
53b5975d6bSopenharmony_ci {
54b5975d6bSopenharmony_ci   switch (errcode)
55b5975d6bSopenharmony_ci     {
56b5975d6bSopenharmony_ci@@ -511,7 +528,24 @@ match_error (gint errcode)
57b5975d6bSopenharmony_ci     default:
58b5975d6bSopenharmony_ci       break;
59b5975d6bSopenharmony_ci     }
60b5975d6bSopenharmony_ci-  return _("unknown error");
61b5975d6bSopenharmony_ci+  return NULL;
62b5975d6bSopenharmony_ci+}
63b5975d6bSopenharmony_ci+
64b5975d6bSopenharmony_ci+static char *
65b5975d6bSopenharmony_ci+get_match_error_message (int errcode)
66b5975d6bSopenharmony_ci+{
67b5975d6bSopenharmony_ci+  const char *msg = translate_match_error (errcode);
68b5975d6bSopenharmony_ci+  char *error_string;
69b5975d6bSopenharmony_ci+
70b5975d6bSopenharmony_ci+  if (msg)
71b5975d6bSopenharmony_ci+    return g_strdup (msg);
72b5975d6bSopenharmony_ci+
73b5975d6bSopenharmony_ci+  error_string = get_pcre2_error_string (errcode);
74b5975d6bSopenharmony_ci+
75b5975d6bSopenharmony_ci+  if (error_string)
76b5975d6bSopenharmony_ci+    return error_string;
77b5975d6bSopenharmony_ci+
78b5975d6bSopenharmony_ci+  return g_strdup (_("unknown error"));
79b5975d6bSopenharmony_ci }
80b5975d6bSopenharmony_ci 
81b5975d6bSopenharmony_ci static void
82b5975d6bSopenharmony_ci@@ -743,7 +777,6 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
83b5975d6bSopenharmony_ci     case PCRE2_ERROR_INTERNAL_BAD_CODE:
84b5975d6bSopenharmony_ci     case PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP:
85b5975d6bSopenharmony_ci       *errcode = G_REGEX_ERROR_INTERNAL;
86b5975d6bSopenharmony_ci-      *errmsg = _("internal error");
87b5975d6bSopenharmony_ci       break;
88b5975d6bSopenharmony_ci     case PCRE2_ERROR_INVALID_SUBPATTERN_NAME:
89b5975d6bSopenharmony_ci     case PCRE2_ERROR_CLASS_INVALID_RANGE:
90b5975d6bSopenharmony_ci@@ -772,12 +805,10 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
91b5975d6bSopenharmony_ci     case PCRE2_ERROR_BAD_LITERAL_OPTIONS:
92b5975d6bSopenharmony_ci     default:
93b5975d6bSopenharmony_ci       *errcode = G_REGEX_ERROR_COMPILE;
94b5975d6bSopenharmony_ci-      *errmsg = _("internal error");
95b5975d6bSopenharmony_ci       break;
96b5975d6bSopenharmony_ci     }
97b5975d6bSopenharmony_ci 
98b5975d6bSopenharmony_ci   g_assert (*errcode != -1);
99b5975d6bSopenharmony_ci-  g_assert (*errmsg != NULL);
100b5975d6bSopenharmony_ci }
101b5975d6bSopenharmony_ci 
102b5975d6bSopenharmony_ci /* GMatchInfo */
103b5975d6bSopenharmony_ci@@ -1096,9 +1127,12 @@ g_match_info_next (GMatchInfo  *match_info,
104b5975d6bSopenharmony_ci 
105b5975d6bSopenharmony_ci   if (IS_PCRE2_ERROR (match_info->matches))
106b5975d6bSopenharmony_ci     {
107b5975d6bSopenharmony_ci+      gchar *error_msg = get_match_error_message (match_info->matches);
108b5975d6bSopenharmony_ci+
109b5975d6bSopenharmony_ci       g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
110b5975d6bSopenharmony_ci                    _("Error while matching regular expression %s: %s"),
111b5975d6bSopenharmony_ci-                   match_info->regex->pattern, match_error (match_info->matches));
112b5975d6bSopenharmony_ci+                   match_info->regex->pattern, error_msg);
113b5975d6bSopenharmony_ci+      g_clear_pointer (&error_msg, g_free);
114b5975d6bSopenharmony_ci       return FALSE;
115b5975d6bSopenharmony_ci     }
116b5975d6bSopenharmony_ci   else if (match_info->matches == 0)
117b5975d6bSopenharmony_ci@@ -1800,11 +1834,20 @@ regex_compile (const gchar  *pattern,
118b5975d6bSopenharmony_ci     {
119b5975d6bSopenharmony_ci       GError *tmp_error;
120b5975d6bSopenharmony_ci       gchar *offset_str;
121b5975d6bSopenharmony_ci+      gchar *pcre2_errmsg = NULL;
122b5975d6bSopenharmony_ci+      int original_errcode;
123b5975d6bSopenharmony_ci 
124b5975d6bSopenharmony_ci       /* Translate the PCRE error code to GRegexError and use a translated
125b5975d6bSopenharmony_ci        * error message if possible */
126b5975d6bSopenharmony_ci+      original_errcode = errcode;
127b5975d6bSopenharmony_ci       translate_compile_error (&errcode, &errmsg);
128b5975d6bSopenharmony_ci 
129b5975d6bSopenharmony_ci+      if (!errmsg)
130b5975d6bSopenharmony_ci+        {
131b5975d6bSopenharmony_ci+          errmsg = _("unknown error");
132b5975d6bSopenharmony_ci+          pcre2_errmsg = get_pcre2_error_string (original_errcode);
133b5975d6bSopenharmony_ci+        }
134b5975d6bSopenharmony_ci+
135b5975d6bSopenharmony_ci       /* PCRE uses byte offsets but we want to show character offsets */
136b5975d6bSopenharmony_ci       erroffset = g_utf8_pointer_to_offset (pattern, &pattern[erroffset]);
137b5975d6bSopenharmony_ci 
138b5975d6bSopenharmony_ci@@ -1812,9 +1855,11 @@ regex_compile (const gchar  *pattern,
139b5975d6bSopenharmony_ci       tmp_error = g_error_new (G_REGEX_ERROR, errcode,
140b5975d6bSopenharmony_ci                                _("Error while compiling regular expression ‘%s’ "
141b5975d6bSopenharmony_ci                                  "at char %s: %s"),
142b5975d6bSopenharmony_ci-                               pattern, offset_str, errmsg);
143b5975d6bSopenharmony_ci+                               pattern, offset_str,
144b5975d6bSopenharmony_ci+                               pcre2_errmsg ? pcre2_errmsg : errmsg);
145b5975d6bSopenharmony_ci       g_propagate_error (error, tmp_error);
146b5975d6bSopenharmony_ci       g_free (offset_str);
147b5975d6bSopenharmony_ci+      g_clear_pointer (&pcre2_errmsg, g_free);
148b5975d6bSopenharmony_ci 
149b5975d6bSopenharmony_ci       return NULL;
150b5975d6bSopenharmony_ci     }
151b5975d6bSopenharmony_ci@@ -2402,9 +2447,12 @@ g_regex_match_all_full (const GRegex      *regex,
152b5975d6bSopenharmony_ci         }
153b5975d6bSopenharmony_ci       else if (IS_PCRE2_ERROR (info->matches))
154b5975d6bSopenharmony_ci         {
155b5975d6bSopenharmony_ci+          gchar *error_msg = get_match_error_message (info->matches);
156b5975d6bSopenharmony_ci+
157b5975d6bSopenharmony_ci           g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
158b5975d6bSopenharmony_ci                        _("Error while matching regular expression %s: %s"),
159b5975d6bSopenharmony_ci-                       regex->pattern, match_error (info->matches));
160b5975d6bSopenharmony_ci+                       regex->pattern, error_msg);
161b5975d6bSopenharmony_ci+          g_clear_pointer (&error_msg, g_free);
162b5975d6bSopenharmony_ci         }
163b5975d6bSopenharmony_ci       else if (info->matches != PCRE2_ERROR_NOMATCH)
164b5975d6bSopenharmony_ci         {
165b5975d6bSopenharmony_cidiff --git a/glib/tests/regex.c b/glib/tests/regex.c
166b5975d6bSopenharmony_ciindex 9803d49659..52af212f29 100644
167b5975d6bSopenharmony_ci--- a/glib/tests/regex.c
168b5975d6bSopenharmony_ci+++ b/glib/tests/regex.c
169b5975d6bSopenharmony_ci@@ -2560,6 +2560,7 @@ main (int argc, char *argv[])
170b5975d6bSopenharmony_ci   TEST_NEW_FAIL ("[a-z", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS);
171b5975d6bSopenharmony_ci   TEST_NEW_FAIL ("[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS);
172b5975d6bSopenharmony_ci   TEST_NEW_FAIL ("[z-a]", 0, G_REGEX_ERROR_RANGE_OUT_OF_ORDER);
173b5975d6bSopenharmony_ci+  TEST_NEW_FAIL ("^[[:alnum:]-_.]+$", 0, G_REGEX_ERROR_COMPILE);
174b5975d6bSopenharmony_ci   TEST_NEW_FAIL ("{2,4}", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT);
175b5975d6bSopenharmony_ci   TEST_NEW_FAIL ("a(?u)", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER);
176b5975d6bSopenharmony_ci   TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
177b5975d6bSopenharmony_ci@@ -2636,6 +2637,7 @@ main (int argc, char *argv[])
178b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE("a", "a", G_REGEX_CASELESS, 0, TRUE);
179b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE("a", "A", G_REGEX_CASELESS, 0, TRUE);
180b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE("\\C\\C", "ab", G_REGEX_OPTIMIZE | G_REGEX_RAW, 0, TRUE);
181b5975d6bSopenharmony_ci+  TEST_MATCH_SIMPLE("^[[:alnum:]\\-_.]+$", "admin-foo", 0, 0, TRUE);
182b5975d6bSopenharmony_ci   /* These are needed to test extended properties. */
183b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE(AGRAVE, AGRAVE, G_REGEX_CASELESS, 0, TRUE);
184b5975d6bSopenharmony_ci   TEST_MATCH_SIMPLE(AGRAVE, AGRAVE_UPPER, G_REGEX_CASELESS, 0, TRUE);
185b5975d6bSopenharmony_ci-- 
186b5975d6bSopenharmony_ciGitLab
187b5975d6bSopenharmony_ci
188