1b5975d6bSopenharmony_ciFrom d639c4ec009537b743dcd2209184638d9f5d68b9 Mon Sep 17 00:00:00 2001
2b5975d6bSopenharmony_ciFrom: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
3b5975d6bSopenharmony_ciDate: Tue, 6 Sep 2022 14:49:10 +0200
4b5975d6bSopenharmony_ciSubject: [PATCH] regex: Do not mix PCRE2 Compile, Match, Newline and BSR flags
5b5975d6bSopenharmony_ci
6b5975d6bSopenharmony_ciAs per the PCRE2 port we still used to try to map the old GRegex flags
7b5975d6bSopenharmony_ci(PCRE1 based) with the new PCRE2 ones, but doing that we were also
8b5975d6bSopenharmony_cimixing flags with enums, leading to unexpected behaviors when trying to
9b5975d6bSopenharmony_ciget new line and BSR options out of bigger flags arrays.
10b5975d6bSopenharmony_ci
11b5975d6bSopenharmony_ciSo, avoid doing any mapping and store the values as native PCRE2 flags
12b5975d6bSopenharmony_ciinternally and converting them back only when requested.
13b5975d6bSopenharmony_ci
14b5975d6bSopenharmony_ciThis fixes some regressions on newline handling.
15b5975d6bSopenharmony_ci
16b5975d6bSopenharmony_ciFixes: #2729
17b5975d6bSopenharmony_ciFixes: #2688
18b5975d6bSopenharmony_ciFixes: GNOME/gtksourceview#278
19b5975d6bSopenharmony_ci---
20b5975d6bSopenharmony_ci glib/gregex.c      | 637 +++++++++++++++++++++++----------------------
21b5975d6bSopenharmony_ci glib/tests/regex.c |  18 ++
22b5975d6bSopenharmony_ci 2 files changed, 341 insertions(+), 314 deletions(-)
23b5975d6bSopenharmony_ci
24b5975d6bSopenharmony_cidiff --git a/glib/gregex.c b/glib/gregex.c
25b5975d6bSopenharmony_ciindex a16ea98..95695f7 100644
26b5975d6bSopenharmony_ci--- a/glib/gregex.c
27b5975d6bSopenharmony_ci+++ b/glib/gregex.c
28b5975d6bSopenharmony_ci@@ -3,6 +3,7 @@
29b5975d6bSopenharmony_ci  * Copyright (C) 1999, 2000 Scott Wimer
30b5975d6bSopenharmony_ci  * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
31b5975d6bSopenharmony_ci  * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
32b5975d6bSopenharmony_ci+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com>
33b5975d6bSopenharmony_ci  *
34b5975d6bSopenharmony_ci  * This library is free software; you can redistribute it and/or
35b5975d6bSopenharmony_ci  * modify it under the terms of the GNU Lesser General Public
36b5975d6bSopenharmony_ci@@ -108,62 +109,105 @@
37b5975d6bSopenharmony_ci  * library written by Philip Hazel.
38b5975d6bSopenharmony_ci  */
39b5975d6bSopenharmony_ci 
40b5975d6bSopenharmony_ci-/* Signifies that flags have already been converted from pcre1 to pcre2. The
41b5975d6bSopenharmony_ci- * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h,
42b5975d6bSopenharmony_ci- * but it is not used in gregex, so we can reuse it for this flag.
43b5975d6bSopenharmony_ci- */
44b5975d6bSopenharmony_ci-#define G_REGEX_FLAGS_CONVERTED 0x04000000u
45b5975d6bSopenharmony_ci+#define G_REGEX_PCRE_GENERIC_MASK (PCRE2_ANCHORED       | \
46b5975d6bSopenharmony_ci+                                   PCRE2_NO_UTF_CHECK   | \
47b5975d6bSopenharmony_ci+                                   PCRE2_ENDANCHORED)
48b5975d6bSopenharmony_ci+
49b5975d6bSopenharmony_ci /* Mask of all the possible values for GRegexCompileFlags. */
50b5975d6bSopenharmony_ci-#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS |        \
51b5975d6bSopenharmony_ci-                              PCRE2_MULTILINE |       \
52b5975d6bSopenharmony_ci-                              PCRE2_DOTALL |          \
53b5975d6bSopenharmony_ci-                              PCRE2_EXTENDED |        \
54b5975d6bSopenharmony_ci-                              PCRE2_ANCHORED |        \
55b5975d6bSopenharmony_ci-                              PCRE2_DOLLAR_ENDONLY |  \
56b5975d6bSopenharmony_ci-                              PCRE2_UNGREEDY |        \
57b5975d6bSopenharmony_ci-                              PCRE2_UTF |             \
58b5975d6bSopenharmony_ci-                              PCRE2_NO_AUTO_CAPTURE | \
59b5975d6bSopenharmony_ci-                              PCRE2_FIRSTLINE |       \
60b5975d6bSopenharmony_ci-                              PCRE2_DUPNAMES |        \
61b5975d6bSopenharmony_ci-                              PCRE2_NEWLINE_CR |      \
62b5975d6bSopenharmony_ci-                              PCRE2_NEWLINE_LF |      \
63b5975d6bSopenharmony_ci-                              PCRE2_NEWLINE_CRLF |    \
64b5975d6bSopenharmony_ci-                              PCRE2_NEWLINE_ANYCRLF | \
65b5975d6bSopenharmony_ci-                              PCRE2_BSR_ANYCRLF |     \
66b5975d6bSopenharmony_ci-                              G_REGEX_FLAGS_CONVERTED)
67b5975d6bSopenharmony_ci-
68b5975d6bSopenharmony_ci-/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
69b5975d6bSopenharmony_ci-#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
70b5975d6bSopenharmony_ci-#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \
71b5975d6bSopenharmony_ci-                                      G_REGEX_FLAGS_CONVERTED)
72b5975d6bSopenharmony_ci+#define G_REGEX_COMPILE_MASK (G_REGEX_DEFAULT          | \
73b5975d6bSopenharmony_ci+                              G_REGEX_CASELESS         | \
74b5975d6bSopenharmony_ci+                              G_REGEX_MULTILINE        | \
75b5975d6bSopenharmony_ci+                              G_REGEX_DOTALL           | \
76b5975d6bSopenharmony_ci+                              G_REGEX_EXTENDED         | \
77b5975d6bSopenharmony_ci+                              G_REGEX_ANCHORED         | \
78b5975d6bSopenharmony_ci+                              G_REGEX_DOLLAR_ENDONLY   | \
79b5975d6bSopenharmony_ci+                              G_REGEX_UNGREEDY         | \
80b5975d6bSopenharmony_ci+                              G_REGEX_RAW              | \
81b5975d6bSopenharmony_ci+                              G_REGEX_NO_AUTO_CAPTURE  | \
82b5975d6bSopenharmony_ci+                              G_REGEX_OPTIMIZE         | \
83b5975d6bSopenharmony_ci+                              G_REGEX_FIRSTLINE        | \
84b5975d6bSopenharmony_ci+                              G_REGEX_DUPNAMES         | \
85b5975d6bSopenharmony_ci+                              G_REGEX_NEWLINE_CR       | \
86b5975d6bSopenharmony_ci+                              G_REGEX_NEWLINE_LF       | \
87b5975d6bSopenharmony_ci+                              G_REGEX_NEWLINE_CRLF     | \
88b5975d6bSopenharmony_ci+                              G_REGEX_NEWLINE_ANYCRLF  | \
89b5975d6bSopenharmony_ci+                              G_REGEX_BSR_ANYCRLF)
90b5975d6bSopenharmony_ci+
91b5975d6bSopenharmony_ci+#define G_REGEX_PCRE2_COMPILE_MASK (PCRE2_ALLOW_EMPTY_CLASS    | \
92b5975d6bSopenharmony_ci+                                    PCRE2_ALT_BSUX             | \
93b5975d6bSopenharmony_ci+                                    PCRE2_AUTO_CALLOUT         | \
94b5975d6bSopenharmony_ci+                                    PCRE2_CASELESS             | \
95b5975d6bSopenharmony_ci+                                    PCRE2_DOLLAR_ENDONLY       | \
96b5975d6bSopenharmony_ci+                                    PCRE2_DOTALL               | \
97b5975d6bSopenharmony_ci+                                    PCRE2_DUPNAMES             | \
98b5975d6bSopenharmony_ci+                                    PCRE2_EXTENDED             | \
99b5975d6bSopenharmony_ci+                                    PCRE2_FIRSTLINE            | \
100b5975d6bSopenharmony_ci+                                    PCRE2_MATCH_UNSET_BACKREF  | \
101b5975d6bSopenharmony_ci+                                    PCRE2_MULTILINE            | \
102b5975d6bSopenharmony_ci+                                    PCRE2_NEVER_UCP            | \
103b5975d6bSopenharmony_ci+                                    PCRE2_NEVER_UTF            | \
104b5975d6bSopenharmony_ci+                                    PCRE2_NO_AUTO_CAPTURE      | \
105b5975d6bSopenharmony_ci+                                    PCRE2_NO_AUTO_POSSESS      | \
106b5975d6bSopenharmony_ci+                                    PCRE2_NO_DOTSTAR_ANCHOR    | \
107b5975d6bSopenharmony_ci+                                    PCRE2_NO_START_OPTIMIZE    | \
108b5975d6bSopenharmony_ci+                                    PCRE2_UCP                  | \
109b5975d6bSopenharmony_ci+                                    PCRE2_UNGREEDY             | \
110b5975d6bSopenharmony_ci+                                    PCRE2_UTF                  | \
111b5975d6bSopenharmony_ci+                                    PCRE2_NEVER_BACKSLASH_C    | \
112b5975d6bSopenharmony_ci+                                    PCRE2_ALT_CIRCUMFLEX       | \
113b5975d6bSopenharmony_ci+                                    PCRE2_ALT_VERBNAMES        | \
114b5975d6bSopenharmony_ci+                                    PCRE2_USE_OFFSET_LIMIT     | \
115b5975d6bSopenharmony_ci+                                    PCRE2_EXTENDED_MORE        | \
116b5975d6bSopenharmony_ci+                                    PCRE2_LITERAL              | \
117b5975d6bSopenharmony_ci+                                    PCRE2_MATCH_INVALID_UTF    | \
118b5975d6bSopenharmony_ci+                                    G_REGEX_PCRE_GENERIC_MASK)
119b5975d6bSopenharmony_ci+
120b5975d6bSopenharmony_ci+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF)
121b5975d6bSopenharmony_ci 
122b5975d6bSopenharmony_ci /* Mask of all the possible values for GRegexMatchFlags. */
123b5975d6bSopenharmony_ci-#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED |         \
124b5975d6bSopenharmony_ci-                            PCRE2_NOTBOL |           \
125b5975d6bSopenharmony_ci-                            PCRE2_NOTEOL |           \
126b5975d6bSopenharmony_ci-                            PCRE2_NOTEMPTY |         \
127b5975d6bSopenharmony_ci-                            PCRE2_NEWLINE_CR |       \
128b5975d6bSopenharmony_ci-                            PCRE2_NEWLINE_LF |       \
129b5975d6bSopenharmony_ci-                            PCRE2_NEWLINE_CRLF |     \
130b5975d6bSopenharmony_ci-                            PCRE2_NEWLINE_ANY |      \
131b5975d6bSopenharmony_ci-                            PCRE2_NEWLINE_ANYCRLF |  \
132b5975d6bSopenharmony_ci-                            PCRE2_BSR_ANYCRLF |      \
133b5975d6bSopenharmony_ci-                            PCRE2_BSR_UNICODE |      \
134b5975d6bSopenharmony_ci-                            PCRE2_PARTIAL_SOFT |     \
135b5975d6bSopenharmony_ci-                            PCRE2_PARTIAL_HARD |     \
136b5975d6bSopenharmony_ci-                            PCRE2_NOTEMPTY_ATSTART | \
137b5975d6bSopenharmony_ci-                            G_REGEX_FLAGS_CONVERTED)
138b5975d6bSopenharmony_ci-
139b5975d6bSopenharmony_ci+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_DEFAULT          | \
140b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_ANCHORED         | \
141b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NOTBOL           | \
142b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NOTEOL           | \
143b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NOTEMPTY         | \
144b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_PARTIAL          | \
145b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NEWLINE_CR       | \
146b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NEWLINE_LF       | \
147b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NEWLINE_CRLF     | \
148b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NEWLINE_ANY      | \
149b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NEWLINE_ANYCRLF  | \
150b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_BSR_ANYCRLF      | \
151b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_BSR_ANY          | \
152b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_PARTIAL_SOFT     | \
153b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_PARTIAL_HARD     | \
154b5975d6bSopenharmony_ci+                            G_REGEX_MATCH_NOTEMPTY_ATSTART)
155b5975d6bSopenharmony_ci+
156b5975d6bSopenharmony_ci+#define G_REGEX_PCRE2_MATCH_MASK (PCRE2_NOTBOL                      |\
157b5975d6bSopenharmony_ci+                                  PCRE2_NOTEOL                      |\
158b5975d6bSopenharmony_ci+                                  PCRE2_NOTEMPTY                    |\
159b5975d6bSopenharmony_ci+                                  PCRE2_NOTEMPTY_ATSTART            |\
160b5975d6bSopenharmony_ci+                                  PCRE2_PARTIAL_SOFT                |\
161b5975d6bSopenharmony_ci+                                  PCRE2_PARTIAL_HARD                |\
162b5975d6bSopenharmony_ci+                                  PCRE2_NO_JIT                      |\
163b5975d6bSopenharmony_ci+                                  PCRE2_COPY_MATCHED_SUBJECT        |\
164b5975d6bSopenharmony_ci+                                  G_REGEX_PCRE_GENERIC_MASK)
165b5975d6bSopenharmony_ci+
166b5975d6bSopenharmony_ci+/* TODO: Support PCRE2_NEWLINE_NUL */
167b5975d6bSopenharmony_ci #define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR |     \
168b5975d6bSopenharmony_ci                               PCRE2_NEWLINE_LF |     \
169b5975d6bSopenharmony_ci                               PCRE2_NEWLINE_CRLF |   \
170b5975d6bSopenharmony_ci                               PCRE2_NEWLINE_ANYCRLF)
171b5975d6bSopenharmony_ci 
172b5975d6bSopenharmony_ci-#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR |      \
173b5975d6bSopenharmony_ci-                                    PCRE2_NEWLINE_LF |      \
174b5975d6bSopenharmony_ci-                                    PCRE2_NEWLINE_CRLF |    \
175b5975d6bSopenharmony_ci-                                    PCRE2_NEWLINE_ANYCRLF | \
176b5975d6bSopenharmony_ci-                                    PCRE2_NEWLINE_ANY)
177b5975d6bSopenharmony_ci+#define G_REGEX_COMPILE_NEWLINE_MASK (G_REGEX_NEWLINE_CR      | \
178b5975d6bSopenharmony_ci+                                      G_REGEX_NEWLINE_LF      | \
179b5975d6bSopenharmony_ci+                                      G_REGEX_NEWLINE_CRLF    | \
180b5975d6bSopenharmony_ci+                                      G_REGEX_NEWLINE_ANYCRLF)
181b5975d6bSopenharmony_ci+
182b5975d6bSopenharmony_ci+#define G_REGEX_MATCH_NEWLINE_MASK (G_REGEX_MATCH_NEWLINE_CR      | \
183b5975d6bSopenharmony_ci+                                    G_REGEX_MATCH_NEWLINE_LF      | \
184b5975d6bSopenharmony_ci+                                    G_REGEX_MATCH_NEWLINE_CRLF    | \
185b5975d6bSopenharmony_ci+                                    G_REGEX_MATCH_NEWLINE_ANY    | \
186b5975d6bSopenharmony_ci+                                    G_REGEX_MATCH_NEWLINE_ANYCRLF)
187b5975d6bSopenharmony_ci 
188b5975d6bSopenharmony_ci /* if the string is in UTF-8 use g_utf8_ functions, else use
189b5975d6bSopenharmony_ci  * use just +/- 1. */
190b5975d6bSopenharmony_ci@@ -178,7 +222,7 @@ struct _GMatchInfo
191b5975d6bSopenharmony_ci {
192b5975d6bSopenharmony_ci   gint ref_count;               /* the ref count (atomic) */
193b5975d6bSopenharmony_ci   GRegex *regex;                /* the regex */
194b5975d6bSopenharmony_ci-  GRegexMatchFlags match_opts;  /* options used at match time on the regex */
195b5975d6bSopenharmony_ci+  uint32_t match_opts;          /* pcre match options used at match time on the regex */
196b5975d6bSopenharmony_ci   gint matches;                 /* number of matching sub patterns, guaranteed to be <= (n_subpatterns + 1) if doing a single match (rather than matching all) */
197b5975d6bSopenharmony_ci   gint n_subpatterns;           /* total number of sub patterns in the regex */
198b5975d6bSopenharmony_ci   gint pos;                     /* position in the string where last match left off */
199b5975d6bSopenharmony_ci@@ -204,9 +248,10 @@ struct _GRegex
200b5975d6bSopenharmony_ci   gint ref_count;               /* the ref count for the immutable part (atomic) */
201b5975d6bSopenharmony_ci   gchar *pattern;               /* the pattern */
202b5975d6bSopenharmony_ci   pcre2_code *pcre_re;          /* compiled form of the pattern */
203b5975d6bSopenharmony_ci-  GRegexCompileFlags compile_opts;      /* options used at compile time on the pattern, pcre2 values */
204b5975d6bSopenharmony_ci+  uint32_t compile_opts;        /* options used at compile time on the pattern, pcre2 values */
205b5975d6bSopenharmony_ci   GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */
206b5975d6bSopenharmony_ci-  GRegexMatchFlags match_opts;  /* options used at match time on the regex */
207b5975d6bSopenharmony_ci+  uint32_t match_opts;          /* pcre2 options used at match time on the regex */
208b5975d6bSopenharmony_ci+  GRegexMatchFlags orig_match_opts; /* options used as default match options, gregex values */
209b5975d6bSopenharmony_ci   gint jit_options;             /* options which were enabled for jit compiler */
210b5975d6bSopenharmony_ci   JITStatus jit_status;         /* indicates the status of jit compiler for this compiled regex */
211b5975d6bSopenharmony_ci };
212b5975d6bSopenharmony_ci@@ -223,197 +268,182 @@ static GList    *split_replacement              (const gchar *replacement,
213b5975d6bSopenharmony_ci                                                  GError **error);
214b5975d6bSopenharmony_ci static void      free_interpolation_data        (InterpolationData *data);
215b5975d6bSopenharmony_ci 
216b5975d6bSopenharmony_ci-static gint
217b5975d6bSopenharmony_ci-map_to_pcre2_compile_flags (gint pcre1_flags)
218b5975d6bSopenharmony_ci+static uint32_t
219b5975d6bSopenharmony_ci+get_pcre2_compile_options (GRegexCompileFlags compile_flags)
220b5975d6bSopenharmony_ci {
221b5975d6bSopenharmony_ci-  /* Maps compile flags from pcre1 to pcre2 values
222b5975d6bSopenharmony_ci-   */
223b5975d6bSopenharmony_ci-  gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
224b5975d6bSopenharmony_ci-
225b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
226b5975d6bSopenharmony_ci-    return pcre1_flags;
227b5975d6bSopenharmony_ci+  /* Maps compile flags to pcre2 values */
228b5975d6bSopenharmony_ci+  uint32_t pcre2_flags = 0;
229b5975d6bSopenharmony_ci 
230b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_CASELESS)
231b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_CASELESS)
232b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_CASELESS;
233b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MULTILINE)
234b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_MULTILINE)
235b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_MULTILINE;
236b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_DOTALL)
237b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_DOTALL)
238b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_DOTALL;
239b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_EXTENDED)
240b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_EXTENDED)
241b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_EXTENDED;
242b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_ANCHORED)
243b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_ANCHORED)
244b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_ANCHORED;
245b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY)
246b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_DOLLAR_ENDONLY)
247b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_DOLLAR_ENDONLY;
248b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_UNGREEDY)
249b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_UNGREEDY)
250b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_UNGREEDY;
251b5975d6bSopenharmony_ci-  if (!(pcre1_flags & G_REGEX_RAW))
252b5975d6bSopenharmony_ci+  if (!(compile_flags & G_REGEX_RAW))
253b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_UTF;
254b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE)
255b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_NO_AUTO_CAPTURE)
256b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_NO_AUTO_CAPTURE;
257b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_FIRSTLINE)
258b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_FIRSTLINE)
259b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_FIRSTLINE;
260b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_DUPNAMES)
261b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_DUPNAMES)
262b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_DUPNAMES;
263b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_NEWLINE_CR)
264b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_CR;
265b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_NEWLINE_LF)
266b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_LF;
267b5975d6bSopenharmony_ci-  /* Check for exact match for a composite flag */
268b5975d6bSopenharmony_ci-  if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF)
269b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_CRLF;
270b5975d6bSopenharmony_ci-  /* Check for exact match for a composite flag */
271b5975d6bSopenharmony_ci-  if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF)
272b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
273b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_BSR_ANYCRLF)
274b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_BSR_ANYCRLF;
275b5975d6bSopenharmony_ci-
276b5975d6bSopenharmony_ci-  /* these are not available in pcre2, but we use G_REGEX_OPTIMIZE as a special
277b5975d6bSopenharmony_ci-   * case to request JIT compilation */
278b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_OPTIMIZE)
279b5975d6bSopenharmony_ci-    pcre2_flags |= 0;
280b5975d6bSopenharmony_ci-G_GNUC_BEGIN_IGNORE_DEPRECATIONS
281b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT)
282b5975d6bSopenharmony_ci-    pcre2_flags |= 0;
283b5975d6bSopenharmony_ci-G_GNUC_END_IGNORE_DEPRECATIONS
284b5975d6bSopenharmony_ci-
285b5975d6bSopenharmony_ci-  return pcre2_flags;
286b5975d6bSopenharmony_ci+
287b5975d6bSopenharmony_ci+  return pcre2_flags & G_REGEX_PCRE2_COMPILE_MASK;
288b5975d6bSopenharmony_ci }
289b5975d6bSopenharmony_ci 
290b5975d6bSopenharmony_ci-static gint
291b5975d6bSopenharmony_ci-map_to_pcre2_match_flags (gint pcre1_flags)
292b5975d6bSopenharmony_ci+static uint32_t
293b5975d6bSopenharmony_ci+get_pcre2_match_options (GRegexMatchFlags   match_flags,
294b5975d6bSopenharmony_ci+                         GRegexCompileFlags compile_flags)
295b5975d6bSopenharmony_ci {
296b5975d6bSopenharmony_ci-  /* Maps match flags from pcre1 to pcre2 values
297b5975d6bSopenharmony_ci-   */
298b5975d6bSopenharmony_ci-  gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
299b5975d6bSopenharmony_ci-
300b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
301b5975d6bSopenharmony_ci-    return pcre1_flags;
302b5975d6bSopenharmony_ci+  /* Maps match flags to pcre2 values */
303b5975d6bSopenharmony_ci+  uint32_t pcre2_flags = 0;
304b5975d6bSopenharmony_ci 
305b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_ANCHORED)
306b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_ANCHORED)
307b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_ANCHORED;
308b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_NOTBOL)
309b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_NOTBOL)
310b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_NOTBOL;
311b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_NOTEOL)
312b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_NOTEOL)
313b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_NOTEOL;
314b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY)
315b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_NOTEMPTY)
316b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_NOTEMPTY;
317b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR)
318b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_CR;
319b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF)
320b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_LF;
321b5975d6bSopenharmony_ci-  /* Check for exact match for a composite flag */
322b5975d6bSopenharmony_ci-  if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF)
323b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_CRLF;
324b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY)
325b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_ANY;
326b5975d6bSopenharmony_ci-  /* Check for exact match for a composite flag */
327b5975d6bSopenharmony_ci-  if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF)
328b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
329b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF)
330b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_BSR_ANYCRLF;
331b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_BSR_ANY)
332b5975d6bSopenharmony_ci-    pcre2_flags |= PCRE2_BSR_UNICODE;
333b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT)
334b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_PARTIAL_SOFT)
335b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_PARTIAL_SOFT;
336b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD)
337b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_PARTIAL_HARD)
338b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_PARTIAL_HARD;
339b5975d6bSopenharmony_ci-  if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
340b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
341b5975d6bSopenharmony_ci     pcre2_flags |= PCRE2_NOTEMPTY_ATSTART;
342b5975d6bSopenharmony_ci 
343b5975d6bSopenharmony_ci-  return pcre2_flags;
344b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_RAW)
345b5975d6bSopenharmony_ci+    pcre2_flags |= PCRE2_NO_UTF_CHECK;
346b5975d6bSopenharmony_ci+
347b5975d6bSopenharmony_ci+  return pcre2_flags & G_REGEX_PCRE2_MATCH_MASK;
348b5975d6bSopenharmony_ci }
349b5975d6bSopenharmony_ci 
350b5975d6bSopenharmony_ci-static gint
351b5975d6bSopenharmony_ci-map_to_pcre1_compile_flags (gint pcre2_flags)
352b5975d6bSopenharmony_ci+static GRegexCompileFlags
353b5975d6bSopenharmony_ci+g_regex_compile_flags_from_pcre2 (uint32_t pcre2_flags)
354b5975d6bSopenharmony_ci {
355b5975d6bSopenharmony_ci-  /* Maps compile flags from pcre2 to pcre1 values
356b5975d6bSopenharmony_ci-   */
357b5975d6bSopenharmony_ci-  gint pcre1_flags = 0;
358b5975d6bSopenharmony_ci-
359b5975d6bSopenharmony_ci-  if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
360b5975d6bSopenharmony_ci-    return pcre2_flags;
361b5975d6bSopenharmony_ci+  GRegexCompileFlags compile_flags = G_REGEX_DEFAULT;
362b5975d6bSopenharmony_ci 
363b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_CASELESS)
364b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_CASELESS;
365b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_CASELESS;
366b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_MULTILINE)
367b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MULTILINE;
368b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_MULTILINE;
369b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_DOTALL)
370b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_DOTALL;
371b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_DOTALL;
372b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_EXTENDED)
373b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_EXTENDED;
374b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_EXTENDED;
375b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_ANCHORED)
376b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_ANCHORED;
377b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_ANCHORED;
378b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_DOLLAR_ENDONLY)
379b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_DOLLAR_ENDONLY;
380b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_DOLLAR_ENDONLY;
381b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_UNGREEDY)
382b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_UNGREEDY;
383b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_UNGREEDY;
384b5975d6bSopenharmony_ci   if (!(pcre2_flags & PCRE2_UTF))
385b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_RAW;
386b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_RAW;
387b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE)
388b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE;
389b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_NO_AUTO_CAPTURE;
390b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_FIRSTLINE)
391b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_FIRSTLINE;
392b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_FIRSTLINE;
393b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_DUPNAMES)
394b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_DUPNAMES;
395b5975d6bSopenharmony_ci-  if (pcre2_flags & PCRE2_NEWLINE_CR)
396b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_NEWLINE_CR;
397b5975d6bSopenharmony_ci-  if (pcre2_flags & PCRE2_NEWLINE_LF)
398b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_NEWLINE_LF;
399b5975d6bSopenharmony_ci-  /* Check for exact match for a composite flag */
400b5975d6bSopenharmony_ci-  if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
401b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_NEWLINE_CRLF;
402b5975d6bSopenharmony_ci-  /* Check for exact match for a composite flag */
403b5975d6bSopenharmony_ci-  if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
404b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF;
405b5975d6bSopenharmony_ci-  if (pcre2_flags & PCRE2_BSR_ANYCRLF)
406b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_BSR_ANYCRLF;
407b5975d6bSopenharmony_ci-
408b5975d6bSopenharmony_ci-  return pcre1_flags;
409b5975d6bSopenharmony_ci+    compile_flags |= G_REGEX_DUPNAMES;
410b5975d6bSopenharmony_ci+
411b5975d6bSopenharmony_ci+  return compile_flags & G_REGEX_COMPILE_MASK;
412b5975d6bSopenharmony_ci }
413b5975d6bSopenharmony_ci 
414b5975d6bSopenharmony_ci-static gint
415b5975d6bSopenharmony_ci-map_to_pcre1_match_flags (gint pcre2_flags)
416b5975d6bSopenharmony_ci+static GRegexMatchFlags
417b5975d6bSopenharmony_ci+g_regex_match_flags_from_pcre2 (uint32_t pcre2_flags)
418b5975d6bSopenharmony_ci {
419b5975d6bSopenharmony_ci-  /* Maps match flags from pcre2 to pcre1 values
420b5975d6bSopenharmony_ci-   */
421b5975d6bSopenharmony_ci-  gint pcre1_flags = 0;
422b5975d6bSopenharmony_ci-
423b5975d6bSopenharmony_ci-  if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
424b5975d6bSopenharmony_ci-    return pcre2_flags;
425b5975d6bSopenharmony_ci+  GRegexMatchFlags match_flags = G_REGEX_MATCH_DEFAULT;
426b5975d6bSopenharmony_ci 
427b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_ANCHORED)
428b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_ANCHORED;
429b5975d6bSopenharmony_ci+    match_flags |= G_REGEX_MATCH_ANCHORED;
430b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_NOTBOL)
431b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NOTBOL;
432b5975d6bSopenharmony_ci+    match_flags |= G_REGEX_MATCH_NOTBOL;
433b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_NOTEOL)
434b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NOTEOL;
435b5975d6bSopenharmony_ci+    match_flags |= G_REGEX_MATCH_NOTEOL;
436b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_NOTEMPTY)
437b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NOTEMPTY;
438b5975d6bSopenharmony_ci-  if (pcre2_flags & PCRE2_NEWLINE_CR)
439b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR;
440b5975d6bSopenharmony_ci-  if (pcre2_flags & PCRE2_NEWLINE_LF)
441b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF;
442b5975d6bSopenharmony_ci-  /* Check for exact match for a composite flag */
443b5975d6bSopenharmony_ci-  if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
444b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF;
445b5975d6bSopenharmony_ci-  if (pcre2_flags & PCRE2_NEWLINE_ANY)
446b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY;
447b5975d6bSopenharmony_ci-  /* Check for exact match for a composite flag */
448b5975d6bSopenharmony_ci-  if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
449b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF;
450b5975d6bSopenharmony_ci-  if (pcre2_flags & PCRE2_BSR_ANYCRLF)
451b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF;
452b5975d6bSopenharmony_ci-  if (pcre2_flags & PCRE2_BSR_UNICODE)
453b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_BSR_ANY;
454b5975d6bSopenharmony_ci+    match_flags |= G_REGEX_MATCH_NOTEMPTY;
455b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_PARTIAL_SOFT)
456b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
457b5975d6bSopenharmony_ci+    match_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
458b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_PARTIAL_HARD)
459b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD;
460b5975d6bSopenharmony_ci+    match_flags |= G_REGEX_MATCH_PARTIAL_HARD;
461b5975d6bSopenharmony_ci   if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART)
462b5975d6bSopenharmony_ci-    pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
463b5975d6bSopenharmony_ci+    match_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
464b5975d6bSopenharmony_ci+
465b5975d6bSopenharmony_ci+  return (match_flags & G_REGEX_MATCH_MASK);
466b5975d6bSopenharmony_ci+}
467b5975d6bSopenharmony_ci+
468b5975d6bSopenharmony_ci+static uint32_t
469b5975d6bSopenharmony_ci+get_pcre2_newline_compile_options (GRegexCompileFlags compile_flags)
470b5975d6bSopenharmony_ci+{
471b5975d6bSopenharmony_ci+  compile_flags &= G_REGEX_COMPILE_NEWLINE_MASK;
472b5975d6bSopenharmony_ci+
473b5975d6bSopenharmony_ci+  switch (compile_flags)
474b5975d6bSopenharmony_ci+    {
475b5975d6bSopenharmony_ci+    case G_REGEX_NEWLINE_CR:
476b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_CR;
477b5975d6bSopenharmony_ci+    case G_REGEX_NEWLINE_LF:
478b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_LF;
479b5975d6bSopenharmony_ci+    case G_REGEX_NEWLINE_CRLF:
480b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_CRLF;
481b5975d6bSopenharmony_ci+    case G_REGEX_NEWLINE_ANYCRLF:
482b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_ANYCRLF;
483b5975d6bSopenharmony_ci+    default:
484b5975d6bSopenharmony_ci+      if (compile_flags != 0)
485b5975d6bSopenharmony_ci+        return 0;
486b5975d6bSopenharmony_ci+
487b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_ANY;
488b5975d6bSopenharmony_ci+    }
489b5975d6bSopenharmony_ci+}
490b5975d6bSopenharmony_ci+
491b5975d6bSopenharmony_ci+static uint32_t
492b5975d6bSopenharmony_ci+get_pcre2_newline_match_options (GRegexMatchFlags match_flags)
493b5975d6bSopenharmony_ci+{
494b5975d6bSopenharmony_ci+  switch (match_flags & G_REGEX_MATCH_NEWLINE_MASK)
495b5975d6bSopenharmony_ci+    {
496b5975d6bSopenharmony_ci+    case G_REGEX_MATCH_NEWLINE_CR:
497b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_CR;
498b5975d6bSopenharmony_ci+    case G_REGEX_MATCH_NEWLINE_LF:
499b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_LF;
500b5975d6bSopenharmony_ci+    case G_REGEX_MATCH_NEWLINE_CRLF:
501b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_CRLF;
502b5975d6bSopenharmony_ci+    case G_REGEX_MATCH_NEWLINE_ANY:
503b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_ANY;
504b5975d6bSopenharmony_ci+    case G_REGEX_MATCH_NEWLINE_ANYCRLF:
505b5975d6bSopenharmony_ci+      return PCRE2_NEWLINE_ANYCRLF;
506b5975d6bSopenharmony_ci+    default:
507b5975d6bSopenharmony_ci+      return 0;
508b5975d6bSopenharmony_ci+    }
509b5975d6bSopenharmony_ci+}
510b5975d6bSopenharmony_ci+
511b5975d6bSopenharmony_ci+static uint32_t
512b5975d6bSopenharmony_ci+get_pcre2_bsr_compile_options (GRegexCompileFlags compile_flags)
513b5975d6bSopenharmony_ci+{
514b5975d6bSopenharmony_ci+  if (compile_flags & G_REGEX_BSR_ANYCRLF)
515b5975d6bSopenharmony_ci+    return PCRE2_BSR_ANYCRLF;
516b5975d6bSopenharmony_ci 
517b5975d6bSopenharmony_ci-  return pcre1_flags;
518b5975d6bSopenharmony_ci+  return PCRE2_BSR_UNICODE;
519b5975d6bSopenharmony_ci+}
520b5975d6bSopenharmony_ci+
521b5975d6bSopenharmony_ci+static uint32_t
522b5975d6bSopenharmony_ci+get_pcre2_bsr_match_options (GRegexMatchFlags match_flags)
523b5975d6bSopenharmony_ci+{
524b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_BSR_ANYCRLF)
525b5975d6bSopenharmony_ci+    return PCRE2_BSR_ANYCRLF;
526b5975d6bSopenharmony_ci+
527b5975d6bSopenharmony_ci+  if (match_flags & G_REGEX_MATCH_BSR_ANY)
528b5975d6bSopenharmony_ci+    return PCRE2_BSR_UNICODE;
529b5975d6bSopenharmony_ci+
530b5975d6bSopenharmony_ci+  return 0;
531b5975d6bSopenharmony_ci }
532b5975d6bSopenharmony_ci 
533b5975d6bSopenharmony_ci static const gchar *
534b5975d6bSopenharmony_ci@@ -742,12 +772,12 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
535b5975d6bSopenharmony_ci /* GMatchInfo */
536b5975d6bSopenharmony_ci 
537b5975d6bSopenharmony_ci static GMatchInfo *
538b5975d6bSopenharmony_ci-match_info_new (const GRegex *regex,
539b5975d6bSopenharmony_ci-                const gchar  *string,
540b5975d6bSopenharmony_ci-                gint          string_len,
541b5975d6bSopenharmony_ci-                gint          start_position,
542b5975d6bSopenharmony_ci-                gint          match_options,
543b5975d6bSopenharmony_ci-                gboolean      is_dfa)
544b5975d6bSopenharmony_ci+match_info_new (const GRegex     *regex,
545b5975d6bSopenharmony_ci+                const gchar      *string,
546b5975d6bSopenharmony_ci+                gint              string_len,
547b5975d6bSopenharmony_ci+                gint              start_position,
548b5975d6bSopenharmony_ci+                GRegexMatchFlags  match_options,
549b5975d6bSopenharmony_ci+                gboolean          is_dfa)
550b5975d6bSopenharmony_ci {
551b5975d6bSopenharmony_ci   GMatchInfo *match_info;
552b5975d6bSopenharmony_ci 
553b5975d6bSopenharmony_ci@@ -761,7 +791,8 @@ match_info_new (const GRegex *regex,
554b5975d6bSopenharmony_ci   match_info->string_len = string_len;
555b5975d6bSopenharmony_ci   match_info->matches = PCRE2_ERROR_NOMATCH;
556b5975d6bSopenharmony_ci   match_info->pos = start_position;
557b5975d6bSopenharmony_ci-  match_info->match_opts = match_options;
558b5975d6bSopenharmony_ci+  match_info->match_opts =
559b5975d6bSopenharmony_ci+    get_pcre2_match_options (match_options, regex->orig_compile_opts);
560b5975d6bSopenharmony_ci 
561b5975d6bSopenharmony_ci   pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT,
562b5975d6bSopenharmony_ci                       &match_info->n_subpatterns);
563b5975d6bSopenharmony_ci@@ -822,8 +853,8 @@ recalc_match_offsets (GMatchInfo *match_info,
564b5975d6bSopenharmony_ci }
565b5975d6bSopenharmony_ci 
566b5975d6bSopenharmony_ci static void
567b5975d6bSopenharmony_ci-enable_jit_with_match_options (GRegex *regex,
568b5975d6bSopenharmony_ci-                               GRegexMatchFlags match_options)
569b5975d6bSopenharmony_ci+enable_jit_with_match_options (GRegex   *regex,
570b5975d6bSopenharmony_ci+                               uint32_t  match_options)
571b5975d6bSopenharmony_ci {
572b5975d6bSopenharmony_ci   gint old_jit_options, new_jit_options, retval;
573b5975d6bSopenharmony_ci 
574b5975d6bSopenharmony_ci@@ -1009,7 +1040,7 @@ g_match_info_next (GMatchInfo  *match_info,
575b5975d6bSopenharmony_ci       return FALSE;
576b5975d6bSopenharmony_ci     }
577b5975d6bSopenharmony_ci 
578b5975d6bSopenharmony_ci-  opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts);
579b5975d6bSopenharmony_ci+  opts = match_info->regex->match_opts | match_info->match_opts;
580b5975d6bSopenharmony_ci 
581b5975d6bSopenharmony_ci   enable_jit_with_match_options (match_info->regex, opts);
582b5975d6bSopenharmony_ci   if (match_info->regex->jit_status == JIT_STATUS_ENABLED)
583b5975d6bSopenharmony_ci@@ -1018,7 +1049,7 @@ g_match_info_next (GMatchInfo  *match_info,
584b5975d6bSopenharmony_ci                                              (PCRE2_SPTR8) match_info->string,
585b5975d6bSopenharmony_ci                                              match_info->string_len,
586b5975d6bSopenharmony_ci                                              match_info->pos,
587b5975d6bSopenharmony_ci-                                             opts & ~G_REGEX_FLAGS_CONVERTED,
588b5975d6bSopenharmony_ci+                                             opts,
589b5975d6bSopenharmony_ci                                              match_info->match_data,
590b5975d6bSopenharmony_ci                                              match_info->match_context);
591b5975d6bSopenharmony_ci     }
592b5975d6bSopenharmony_ci@@ -1028,7 +1059,7 @@ g_match_info_next (GMatchInfo  *match_info,
593b5975d6bSopenharmony_ci                                          (PCRE2_SPTR8) match_info->string,
594b5975d6bSopenharmony_ci                                          match_info->string_len,
595b5975d6bSopenharmony_ci                                          match_info->pos,
596b5975d6bSopenharmony_ci-                                         opts & ~G_REGEX_FLAGS_CONVERTED,
597b5975d6bSopenharmony_ci+                                         opts,
598b5975d6bSopenharmony_ci                                          match_info->match_data,
599b5975d6bSopenharmony_ci                                          match_info->match_context);
600b5975d6bSopenharmony_ci     }
601b5975d6bSopenharmony_ci@@ -1563,14 +1594,14 @@ g_regex_unref (GRegex *regex)
602b5975d6bSopenharmony_ci     }
603b5975d6bSopenharmony_ci }
604b5975d6bSopenharmony_ci 
605b5975d6bSopenharmony_ci-/*
606b5975d6bSopenharmony_ci- * @match_options: (inout) (optional):
607b5975d6bSopenharmony_ci- */
608b5975d6bSopenharmony_ci-static pcre2_code *regex_compile (const gchar *pattern,
609b5975d6bSopenharmony_ci-                                  GRegexCompileFlags compile_options,
610b5975d6bSopenharmony_ci-                                  GRegexCompileFlags *compile_options_out,
611b5975d6bSopenharmony_ci-                                  GRegexMatchFlags *match_options,
612b5975d6bSopenharmony_ci-                                  GError **error);
613b5975d6bSopenharmony_ci+static pcre2_code * regex_compile (const gchar  *pattern,
614b5975d6bSopenharmony_ci+                                   uint32_t      compile_options,
615b5975d6bSopenharmony_ci+                                   uint32_t      newline_options,
616b5975d6bSopenharmony_ci+                                   uint32_t      bsr_options,
617b5975d6bSopenharmony_ci+                                   GError      **error);
618b5975d6bSopenharmony_ci+
619b5975d6bSopenharmony_ci+static uint32_t get_pcre2_inline_compile_options (pcre2_code *re,
620b5975d6bSopenharmony_ci+                                                  uint32_t    compile_options);
621b5975d6bSopenharmony_ci 
622b5975d6bSopenharmony_ci /**
623b5975d6bSopenharmony_ci  * g_regex_new:
624b5975d6bSopenharmony_ci@@ -1596,11 +1627,10 @@ g_regex_new (const gchar         *pattern,
625b5975d6bSopenharmony_ci   GRegex *regex;
626b5975d6bSopenharmony_ci   pcre2_code *re;
627b5975d6bSopenharmony_ci   static gsize initialised = 0;
628b5975d6bSopenharmony_ci-  GRegexCompileFlags orig_compile_opts;
629b5975d6bSopenharmony_ci-
630b5975d6bSopenharmony_ci-  orig_compile_opts = compile_options;
631b5975d6bSopenharmony_ci-  compile_options = map_to_pcre2_compile_flags (compile_options);
632b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
633b5975d6bSopenharmony_ci+  uint32_t pcre_compile_options;
634b5975d6bSopenharmony_ci+  uint32_t pcre_match_options;
635b5975d6bSopenharmony_ci+  uint32_t newline_options;
636b5975d6bSopenharmony_ci+  uint32_t bsr_options;
637b5975d6bSopenharmony_ci 
638b5975d6bSopenharmony_ci   g_return_val_if_fail (pattern != NULL, NULL);
639b5975d6bSopenharmony_ci   g_return_val_if_fail (error == NULL || *error == NULL, NULL);
640b5975d6bSopenharmony_ci@@ -1618,113 +1648,97 @@ g_regex_new (const gchar         *pattern,
641b5975d6bSopenharmony_ci       g_once_init_leave (&initialised, supports_utf8 ? 1 : 2);
642b5975d6bSopenharmony_ci     }
643b5975d6bSopenharmony_ci 
644b5975d6bSopenharmony_ci-  if (G_UNLIKELY (initialised != 1)) 
645b5975d6bSopenharmony_ci+  if (G_UNLIKELY (initialised != 1))
646b5975d6bSopenharmony_ci     {
647b5975d6bSopenharmony_ci       g_set_error_literal (error, G_REGEX_ERROR, G_REGEX_ERROR_COMPILE, 
648b5975d6bSopenharmony_ci                            _("PCRE library is compiled with incompatible options"));
649b5975d6bSopenharmony_ci       return NULL;
650b5975d6bSopenharmony_ci     }
651b5975d6bSopenharmony_ci 
652b5975d6bSopenharmony_ci-  switch (compile_options & G_REGEX_NEWLINE_MASK)
653b5975d6bSopenharmony_ci+  pcre_compile_options = get_pcre2_compile_options (compile_options);
654b5975d6bSopenharmony_ci+  pcre_match_options = get_pcre2_match_options (match_options, compile_options);
655b5975d6bSopenharmony_ci+
656b5975d6bSopenharmony_ci+  newline_options = get_pcre2_newline_match_options (match_options);
657b5975d6bSopenharmony_ci+  if (newline_options == 0)
658b5975d6bSopenharmony_ci+    newline_options = get_pcre2_newline_compile_options (compile_options);
659b5975d6bSopenharmony_ci+
660b5975d6bSopenharmony_ci+  if (newline_options == 0)
661b5975d6bSopenharmony_ci     {
662b5975d6bSopenharmony_ci-    case 0: /* PCRE2_NEWLINE_ANY */
663b5975d6bSopenharmony_ci-    case PCRE2_NEWLINE_CR:
664b5975d6bSopenharmony_ci-    case PCRE2_NEWLINE_LF:
665b5975d6bSopenharmony_ci-    case PCRE2_NEWLINE_CRLF:
666b5975d6bSopenharmony_ci-    case PCRE2_NEWLINE_ANYCRLF:
667b5975d6bSopenharmony_ci-      break;
668b5975d6bSopenharmony_ci-    default:
669b5975d6bSopenharmony_ci       g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
670b5975d6bSopenharmony_ci                    "Invalid newline flags");
671b5975d6bSopenharmony_ci       return NULL;
672b5975d6bSopenharmony_ci     }
673b5975d6bSopenharmony_ci 
674b5975d6bSopenharmony_ci-  re = regex_compile (pattern, compile_options, &compile_options,
675b5975d6bSopenharmony_ci-                      &match_options, error);
676b5975d6bSopenharmony_ci+  bsr_options = get_pcre2_bsr_match_options (match_options);
677b5975d6bSopenharmony_ci+  if (!bsr_options)
678b5975d6bSopenharmony_ci+    bsr_options = get_pcre2_bsr_compile_options (compile_options);
679b5975d6bSopenharmony_ci+
680b5975d6bSopenharmony_ci+  re = regex_compile (pattern, pcre_compile_options,
681b5975d6bSopenharmony_ci+                      newline_options, bsr_options, error);
682b5975d6bSopenharmony_ci   if (re == NULL)
683b5975d6bSopenharmony_ci     return NULL;
684b5975d6bSopenharmony_ci 
685b5975d6bSopenharmony_ci+  pcre_compile_options |=
686b5975d6bSopenharmony_ci+    get_pcre2_inline_compile_options (re, pcre_compile_options);
687b5975d6bSopenharmony_ci+
688b5975d6bSopenharmony_ci   regex = g_new0 (GRegex, 1);
689b5975d6bSopenharmony_ci   regex->ref_count = 1;
690b5975d6bSopenharmony_ci   regex->pattern = g_strdup (pattern);
691b5975d6bSopenharmony_ci   regex->pcre_re = re;
692b5975d6bSopenharmony_ci-  regex->compile_opts = compile_options;
693b5975d6bSopenharmony_ci-  regex->orig_compile_opts = orig_compile_opts;
694b5975d6bSopenharmony_ci-  regex->match_opts = match_options;
695b5975d6bSopenharmony_ci+  regex->compile_opts = pcre_compile_options;
696b5975d6bSopenharmony_ci+  regex->orig_compile_opts = compile_options;
697b5975d6bSopenharmony_ci+  regex->match_opts = pcre_match_options;
698b5975d6bSopenharmony_ci+  regex->orig_match_opts = match_options;
699b5975d6bSopenharmony_ci   enable_jit_with_match_options (regex, regex->match_opts);
700b5975d6bSopenharmony_ci 
701b5975d6bSopenharmony_ci   return regex;
702b5975d6bSopenharmony_ci }
703b5975d6bSopenharmony_ci 
704b5975d6bSopenharmony_ci-static gint
705b5975d6bSopenharmony_ci-extract_newline_options (const GRegexCompileFlags compile_options,
706b5975d6bSopenharmony_ci-                         const GRegexMatchFlags *match_options)
707b5975d6bSopenharmony_ci-{
708b5975d6bSopenharmony_ci-  gint newline_options = PCRE2_NEWLINE_ANY;
709b5975d6bSopenharmony_ci-
710b5975d6bSopenharmony_ci-  if (compile_options & G_REGEX_NEWLINE_MASK)
711b5975d6bSopenharmony_ci-    newline_options = compile_options & G_REGEX_NEWLINE_MASK;
712b5975d6bSopenharmony_ci-  if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK)
713b5975d6bSopenharmony_ci-    newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK;
714b5975d6bSopenharmony_ci-
715b5975d6bSopenharmony_ci-  return newline_options;
716b5975d6bSopenharmony_ci-}
717b5975d6bSopenharmony_ci-
718b5975d6bSopenharmony_ci-static gint
719b5975d6bSopenharmony_ci-extract_bsr_options (const GRegexCompileFlags compile_options,
720b5975d6bSopenharmony_ci-                     const GRegexMatchFlags *match_options)
721b5975d6bSopenharmony_ci-{
722b5975d6bSopenharmony_ci-  gint bsr_options = PCRE2_BSR_UNICODE;
723b5975d6bSopenharmony_ci-
724b5975d6bSopenharmony_ci-  if (compile_options & PCRE2_BSR_ANYCRLF)
725b5975d6bSopenharmony_ci-    bsr_options = PCRE2_BSR_ANYCRLF;
726b5975d6bSopenharmony_ci-  if (match_options && *match_options & PCRE2_BSR_ANYCRLF)
727b5975d6bSopenharmony_ci-    bsr_options = PCRE2_BSR_ANYCRLF;
728b5975d6bSopenharmony_ci-  if (match_options && *match_options & PCRE2_BSR_UNICODE)
729b5975d6bSopenharmony_ci-    bsr_options = PCRE2_BSR_UNICODE;
730b5975d6bSopenharmony_ci-
731b5975d6bSopenharmony_ci-  return bsr_options;
732b5975d6bSopenharmony_ci-}
733b5975d6bSopenharmony_ci-
734b5975d6bSopenharmony_ci static pcre2_code *
735b5975d6bSopenharmony_ci-regex_compile (const gchar *pattern,
736b5975d6bSopenharmony_ci-               GRegexCompileFlags compile_options,
737b5975d6bSopenharmony_ci-               GRegexCompileFlags *compile_options_out,
738b5975d6bSopenharmony_ci-               GRegexMatchFlags *match_options,
739b5975d6bSopenharmony_ci-               GError **error)
740b5975d6bSopenharmony_ci+regex_compile (const gchar  *pattern,
741b5975d6bSopenharmony_ci+               uint32_t      compile_options,
742b5975d6bSopenharmony_ci+               uint32_t      newline_options,
743b5975d6bSopenharmony_ci+               uint32_t      bsr_options,
744b5975d6bSopenharmony_ci+               GError      **error)
745b5975d6bSopenharmony_ci {
746b5975d6bSopenharmony_ci   pcre2_code *re;
747b5975d6bSopenharmony_ci   pcre2_compile_context *context;
748b5975d6bSopenharmony_ci   const gchar *errmsg;
749b5975d6bSopenharmony_ci   PCRE2_SIZE erroffset;
750b5975d6bSopenharmony_ci   gint errcode;
751b5975d6bSopenharmony_ci-  GRegexCompileFlags nonpcre_compile_options;
752b5975d6bSopenharmony_ci-  uint32_t pcre_compile_options;
753b5975d6bSopenharmony_ci-
754b5975d6bSopenharmony_ci-  nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
755b5975d6bSopenharmony_ci 
756b5975d6bSopenharmony_ci   context = pcre2_compile_context_create (NULL);
757b5975d6bSopenharmony_ci 
758b5975d6bSopenharmony_ci   /* set newline options */
759b5975d6bSopenharmony_ci-  pcre2_set_newline (context, extract_newline_options (compile_options, match_options));
760b5975d6bSopenharmony_ci+  if (pcre2_set_newline (context, newline_options) != 0)
761b5975d6bSopenharmony_ci+    {
762b5975d6bSopenharmony_ci+      g_set_error (error, G_REGEX_ERROR,
763b5975d6bSopenharmony_ci+                   G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
764b5975d6bSopenharmony_ci+                   "Invalid newline flags");
765b5975d6bSopenharmony_ci+      pcre2_compile_context_free (context);
766b5975d6bSopenharmony_ci+      return NULL;
767b5975d6bSopenharmony_ci+    }
768b5975d6bSopenharmony_ci 
769b5975d6bSopenharmony_ci   /* set bsr options */
770b5975d6bSopenharmony_ci-  pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options));
771b5975d6bSopenharmony_ci+  if (pcre2_set_bsr (context, bsr_options) != 0)
772b5975d6bSopenharmony_ci+    {
773b5975d6bSopenharmony_ci+      g_set_error (error, G_REGEX_ERROR,
774b5975d6bSopenharmony_ci+                   G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
775b5975d6bSopenharmony_ci+                   "Invalid BSR flags");
776b5975d6bSopenharmony_ci+      pcre2_compile_context_free (context);
777b5975d6bSopenharmony_ci+      return NULL;
778b5975d6bSopenharmony_ci+    }
779b5975d6bSopenharmony_ci 
780b5975d6bSopenharmony_ci   /* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */
781b5975d6bSopenharmony_ci   if (compile_options & PCRE2_UTF)
782b5975d6bSopenharmony_ci-    {
783b5975d6bSopenharmony_ci-      compile_options |= PCRE2_NO_UTF_CHECK;
784b5975d6bSopenharmony_ci-      if (match_options != NULL)
785b5975d6bSopenharmony_ci-        *match_options |= PCRE2_NO_UTF_CHECK;
786b5975d6bSopenharmony_ci-    }
787b5975d6bSopenharmony_ci+    compile_options |= PCRE2_NO_UTF_CHECK;
788b5975d6bSopenharmony_ci 
789b5975d6bSopenharmony_ci   compile_options |= PCRE2_UCP;
790b5975d6bSopenharmony_ci 
791b5975d6bSopenharmony_ci   /* compile the pattern */
792b5975d6bSopenharmony_ci   re = pcre2_compile ((PCRE2_SPTR8) pattern,
793b5975d6bSopenharmony_ci                       PCRE2_ZERO_TERMINATED,
794b5975d6bSopenharmony_ci-                      compile_options & ~G_REGEX_FLAGS_CONVERTED,
795b5975d6bSopenharmony_ci+                      compile_options,
796b5975d6bSopenharmony_ci                       &errcode,
797b5975d6bSopenharmony_ci                       &erroffset,
798b5975d6bSopenharmony_ci                       context);
799b5975d6bSopenharmony_ci@@ -1755,16 +1769,22 @@ regex_compile (const gchar *pattern,
800b5975d6bSopenharmony_ci       return NULL;
801b5975d6bSopenharmony_ci     }
802b5975d6bSopenharmony_ci 
803b5975d6bSopenharmony_ci+  return re;
804b5975d6bSopenharmony_ci+}
805b5975d6bSopenharmony_ci+
806b5975d6bSopenharmony_ci+static uint32_t
807b5975d6bSopenharmony_ci+get_pcre2_inline_compile_options (pcre2_code *re,
808b5975d6bSopenharmony_ci+                                  uint32_t    compile_options)
809b5975d6bSopenharmony_ci+{
810b5975d6bSopenharmony_ci+  uint32_t pcre_compile_options;
811b5975d6bSopenharmony_ci+  uint32_t nonpcre_compile_options;
812b5975d6bSopenharmony_ci+
813b5975d6bSopenharmony_ci   /* For options set at the beginning of the pattern, pcre puts them into
814b5975d6bSopenharmony_ci    * compile options, e.g. "(?i)foo" will make the pcre structure store
815b5975d6bSopenharmony_ci    * PCRE2_CASELESS even though it wasn't explicitly given for compilation. */
816b5975d6bSopenharmony_ci+  nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
817b5975d6bSopenharmony_ci   pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options);
818b5975d6bSopenharmony_ci-  compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK;
819b5975d6bSopenharmony_ci-
820b5975d6bSopenharmony_ci-  /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */
821b5975d6bSopenharmony_ci-  if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF)
822b5975d6bSopenharmony_ci-    compile_options &= ~PCRE2_NEWLINE_ANY;
823b5975d6bSopenharmony_ci-
824b5975d6bSopenharmony_ci+  compile_options = pcre_compile_options & G_REGEX_PCRE2_COMPILE_MASK;
825b5975d6bSopenharmony_ci   compile_options |= nonpcre_compile_options;
826b5975d6bSopenharmony_ci 
827b5975d6bSopenharmony_ci   if (!(compile_options & PCRE2_DUPNAMES))
828b5975d6bSopenharmony_ci@@ -1775,10 +1795,7 @@ regex_compile (const gchar *pattern,
829b5975d6bSopenharmony_ci         compile_options |= PCRE2_DUPNAMES;
830b5975d6bSopenharmony_ci     }
831b5975d6bSopenharmony_ci 
832b5975d6bSopenharmony_ci-  if (compile_options_out != 0)
833b5975d6bSopenharmony_ci-    *compile_options_out = compile_options;
834b5975d6bSopenharmony_ci-
835b5975d6bSopenharmony_ci-  return re;
836b5975d6bSopenharmony_ci+  return compile_options;
837b5975d6bSopenharmony_ci }
838b5975d6bSopenharmony_ci 
839b5975d6bSopenharmony_ci /**
840b5975d6bSopenharmony_ci@@ -1940,7 +1957,7 @@ g_regex_get_compile_flags (const GRegex *regex)
841b5975d6bSopenharmony_ci       break;
842b5975d6bSopenharmony_ci     }
843b5975d6bSopenharmony_ci 
844b5975d6bSopenharmony_ci-  return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags;
845b5975d6bSopenharmony_ci+  return g_regex_compile_flags_from_pcre2 (regex->compile_opts) | extra_flags;
846b5975d6bSopenharmony_ci }
847b5975d6bSopenharmony_ci 
848b5975d6bSopenharmony_ci /**
849b5975d6bSopenharmony_ci@@ -1956,9 +1973,15 @@ g_regex_get_compile_flags (const GRegex *regex)
850b5975d6bSopenharmony_ci GRegexMatchFlags
851b5975d6bSopenharmony_ci g_regex_get_match_flags (const GRegex *regex)
852b5975d6bSopenharmony_ci {
853b5975d6bSopenharmony_ci+  uint32_t flags;
854b5975d6bSopenharmony_ci+
855b5975d6bSopenharmony_ci   g_return_val_if_fail (regex != NULL, 0);
856b5975d6bSopenharmony_ci 
857b5975d6bSopenharmony_ci-  return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK);
858b5975d6bSopenharmony_ci+  flags = g_regex_match_flags_from_pcre2 (regex->match_opts);
859b5975d6bSopenharmony_ci+  flags |= (regex->orig_match_opts & G_REGEX_MATCH_NEWLINE_MASK);
860b5975d6bSopenharmony_ci+  flags |= (regex->orig_match_opts & (G_REGEX_MATCH_BSR_ANY | G_REGEX_MATCH_BSR_ANYCRLF));
861b5975d6bSopenharmony_ci+
862b5975d6bSopenharmony_ci+  return flags;
863b5975d6bSopenharmony_ci }
864b5975d6bSopenharmony_ci 
865b5975d6bSopenharmony_ci /**
866b5975d6bSopenharmony_ci@@ -1992,9 +2015,6 @@ g_regex_match_simple (const gchar        *pattern,
867b5975d6bSopenharmony_ci   GRegex *regex;
868b5975d6bSopenharmony_ci   gboolean result;
869b5975d6bSopenharmony_ci 
870b5975d6bSopenharmony_ci-  compile_options = map_to_pcre2_compile_flags (compile_options);
871b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
872b5975d6bSopenharmony_ci-
873b5975d6bSopenharmony_ci   regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL);
874b5975d6bSopenharmony_ci   if (!regex)
875b5975d6bSopenharmony_ci     return FALSE;
876b5975d6bSopenharmony_ci@@ -2062,8 +2082,6 @@ g_regex_match (const GRegex      *regex,
877b5975d6bSopenharmony_ci                GRegexMatchFlags   match_options,
878b5975d6bSopenharmony_ci                GMatchInfo       **match_info)
879b5975d6bSopenharmony_ci {
880b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
881b5975d6bSopenharmony_ci-
882b5975d6bSopenharmony_ci   return g_regex_match_full (regex, string, -1, 0, match_options,
883b5975d6bSopenharmony_ci                              match_info, NULL);
884b5975d6bSopenharmony_ci }
885b5975d6bSopenharmony_ci@@ -2147,8 +2165,6 @@ g_regex_match_full (const GRegex      *regex,
886b5975d6bSopenharmony_ci   GMatchInfo *info;
887b5975d6bSopenharmony_ci   gboolean match_ok;
888b5975d6bSopenharmony_ci 
889b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
890b5975d6bSopenharmony_ci-
891b5975d6bSopenharmony_ci   g_return_val_if_fail (regex != NULL, FALSE);
892b5975d6bSopenharmony_ci   g_return_val_if_fail (string != NULL, FALSE);
893b5975d6bSopenharmony_ci   g_return_val_if_fail (start_position >= 0, FALSE);
894b5975d6bSopenharmony_ci@@ -2199,8 +2215,6 @@ g_regex_match_all (const GRegex      *regex,
895b5975d6bSopenharmony_ci                    GRegexMatchFlags   match_options,
896b5975d6bSopenharmony_ci                    GMatchInfo       **match_info)
897b5975d6bSopenharmony_ci {
898b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
899b5975d6bSopenharmony_ci-
900b5975d6bSopenharmony_ci   return g_regex_match_all_full (regex, string, -1, 0, match_options,
901b5975d6bSopenharmony_ci                                  match_info, NULL);
902b5975d6bSopenharmony_ci }
903b5975d6bSopenharmony_ci@@ -2272,8 +2286,8 @@ g_regex_match_all_full (const GRegex      *regex,
904b5975d6bSopenharmony_ci   gboolean done;
905b5975d6bSopenharmony_ci   pcre2_code *pcre_re;
906b5975d6bSopenharmony_ci   gboolean retval;
907b5975d6bSopenharmony_ci-
908b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
909b5975d6bSopenharmony_ci+  uint32_t newline_options;
910b5975d6bSopenharmony_ci+  uint32_t bsr_options;
911b5975d6bSopenharmony_ci 
912b5975d6bSopenharmony_ci   g_return_val_if_fail (regex != NULL, FALSE);
913b5975d6bSopenharmony_ci   g_return_val_if_fail (string != NULL, FALSE);
914b5975d6bSopenharmony_ci@@ -2281,6 +2295,14 @@ g_regex_match_all_full (const GRegex      *regex,
915b5975d6bSopenharmony_ci   g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
916b5975d6bSopenharmony_ci   g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE);
917b5975d6bSopenharmony_ci 
918b5975d6bSopenharmony_ci+  newline_options = get_pcre2_newline_match_options (match_options);
919b5975d6bSopenharmony_ci+  if (!newline_options)
920b5975d6bSopenharmony_ci+    newline_options = get_pcre2_newline_compile_options (regex->orig_compile_opts);
921b5975d6bSopenharmony_ci+
922b5975d6bSopenharmony_ci+  bsr_options = get_pcre2_bsr_match_options (match_options);
923b5975d6bSopenharmony_ci+  if (!bsr_options)
924b5975d6bSopenharmony_ci+    bsr_options = get_pcre2_bsr_compile_options (regex->orig_compile_opts);
925b5975d6bSopenharmony_ci+
926b5975d6bSopenharmony_ci   /* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an
927b5975d6bSopenharmony_ci    * optimization for normal regex matching, but results in omitting some
928b5975d6bSopenharmony_ci    * shorter matches here, and an observable behaviour change.
929b5975d6bSopenharmony_ci@@ -2289,7 +2311,7 @@ g_regex_match_all_full (const GRegex      *regex,
930b5975d6bSopenharmony_ci    * codesearch.debian.net, so don't bother caching the recompiled RE. */
931b5975d6bSopenharmony_ci   pcre_re = regex_compile (regex->pattern,
932b5975d6bSopenharmony_ci                            regex->compile_opts | PCRE2_NO_AUTO_POSSESS,
933b5975d6bSopenharmony_ci-                           NULL, NULL, error);
934b5975d6bSopenharmony_ci+                           newline_options, bsr_options, error);
935b5975d6bSopenharmony_ci   if (pcre_re == NULL)
936b5975d6bSopenharmony_ci     return FALSE;
937b5975d6bSopenharmony_ci 
938b5975d6bSopenharmony_ci@@ -2303,7 +2325,7 @@ g_regex_match_all_full (const GRegex      *regex,
939b5975d6bSopenharmony_ci       info->matches = pcre2_dfa_match (pcre_re,
940b5975d6bSopenharmony_ci                                        (PCRE2_SPTR8) info->string, info->string_len,
941b5975d6bSopenharmony_ci                                        info->pos,
942b5975d6bSopenharmony_ci-                                       (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED,
943b5975d6bSopenharmony_ci+                                       (regex->match_opts | info->match_opts),
944b5975d6bSopenharmony_ci                                        info->match_data,
945b5975d6bSopenharmony_ci                                        info->match_context,
946b5975d6bSopenharmony_ci                                        info->workspace, info->n_workspace);
947b5975d6bSopenharmony_ci@@ -2436,9 +2458,6 @@ g_regex_split_simple (const gchar        *pattern,
948b5975d6bSopenharmony_ci   GRegex *regex;
949b5975d6bSopenharmony_ci   gchar **result;
950b5975d6bSopenharmony_ci 
951b5975d6bSopenharmony_ci-  compile_options = map_to_pcre2_compile_flags (compile_options);
952b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
953b5975d6bSopenharmony_ci-
954b5975d6bSopenharmony_ci   regex = g_regex_new (pattern, compile_options, 0, NULL);
955b5975d6bSopenharmony_ci   if (!regex)
956b5975d6bSopenharmony_ci     return NULL;
957b5975d6bSopenharmony_ci@@ -2482,8 +2501,6 @@ g_regex_split (const GRegex     *regex,
958b5975d6bSopenharmony_ci                const gchar      *string,
959b5975d6bSopenharmony_ci                GRegexMatchFlags  match_options)
960b5975d6bSopenharmony_ci {
961b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
962b5975d6bSopenharmony_ci-
963b5975d6bSopenharmony_ci   return g_regex_split_full (regex, string, -1, 0,
964b5975d6bSopenharmony_ci                              match_options, 0, NULL);
965b5975d6bSopenharmony_ci }
966b5975d6bSopenharmony_ci@@ -2548,8 +2565,6 @@ g_regex_split_full (const GRegex      *regex,
967b5975d6bSopenharmony_ci   /* the returned array of char **s */
968b5975d6bSopenharmony_ci   gchar **string_list;
969b5975d6bSopenharmony_ci 
970b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
971b5975d6bSopenharmony_ci-
972b5975d6bSopenharmony_ci   g_return_val_if_fail (regex != NULL, NULL);
973b5975d6bSopenharmony_ci   g_return_val_if_fail (string != NULL, NULL);
974b5975d6bSopenharmony_ci   g_return_val_if_fail (start_position >= 0, NULL);
975b5975d6bSopenharmony_ci@@ -3174,8 +3189,6 @@ g_regex_replace (const GRegex      *regex,
976b5975d6bSopenharmony_ci   GList *list;
977b5975d6bSopenharmony_ci   GError *tmp_error = NULL;
978b5975d6bSopenharmony_ci 
979b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
980b5975d6bSopenharmony_ci-
981b5975d6bSopenharmony_ci   g_return_val_if_fail (regex != NULL, NULL);
982b5975d6bSopenharmony_ci   g_return_val_if_fail (string != NULL, NULL);
983b5975d6bSopenharmony_ci   g_return_val_if_fail (start_position >= 0, NULL);
984b5975d6bSopenharmony_ci@@ -3245,8 +3258,6 @@ g_regex_replace_literal (const GRegex      *regex,
985b5975d6bSopenharmony_ci                          GRegexMatchFlags   match_options,
986b5975d6bSopenharmony_ci                          GError           **error)
987b5975d6bSopenharmony_ci {
988b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
989b5975d6bSopenharmony_ci-
990b5975d6bSopenharmony_ci   g_return_val_if_fail (replacement != NULL, NULL);
991b5975d6bSopenharmony_ci   g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
992b5975d6bSopenharmony_ci 
993b5975d6bSopenharmony_ci@@ -3335,8 +3346,6 @@ g_regex_replace_eval (const GRegex        *regex,
994b5975d6bSopenharmony_ci   gboolean done = FALSE;
995b5975d6bSopenharmony_ci   GError *tmp_error = NULL;
996b5975d6bSopenharmony_ci 
997b5975d6bSopenharmony_ci-  match_options = map_to_pcre2_match_flags (match_options);
998b5975d6bSopenharmony_ci-
999b5975d6bSopenharmony_ci   g_return_val_if_fail (regex != NULL, NULL);
1000b5975d6bSopenharmony_ci   g_return_val_if_fail (string != NULL, NULL);
1001b5975d6bSopenharmony_ci   g_return_val_if_fail (start_position >= 0, NULL);
1002b5975d6bSopenharmony_cidiff --git a/glib/tests/regex.c b/glib/tests/regex.c
1003b5975d6bSopenharmony_ciindex 0d01d59..79e6b4a 100644
1004b5975d6bSopenharmony_ci--- a/glib/tests/regex.c
1005b5975d6bSopenharmony_ci+++ b/glib/tests/regex.c
1006b5975d6bSopenharmony_ci@@ -1,6 +1,7 @@
1007b5975d6bSopenharmony_ci /*
1008b5975d6bSopenharmony_ci  * Copyright (C) 2005 - 2006, Marco Barisione <marco@barisione.org>
1009b5975d6bSopenharmony_ci  * Copyright (C) 2010 Red Hat, Inc.
1010b5975d6bSopenharmony_ci+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com>
1011b5975d6bSopenharmony_ci  *
1012b5975d6bSopenharmony_ci  * This library is free software; you can redistribute it and/or
1013b5975d6bSopenharmony_ci  * modify it under the terms of the GNU Lesser General Public
1014b5975d6bSopenharmony_ci@@ -2353,7 +2354,13 @@ main (int argc, char *argv[])
1015b5975d6bSopenharmony_ci 
1016b5975d6bSopenharmony_ci   /* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */
1017b5975d6bSopenharmony_ci   TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0);
1018b5975d6bSopenharmony_ci+  TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY,
1019b5975d6bSopenharmony_ci+                        G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY);
1020b5975d6bSopenharmony_ci+  TEST_NEW_CHECK_FLAGS ("a", 0, G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF,
1021b5975d6bSopenharmony_ci+                        G_REGEX_NEWLINE_ANYCRLF | G_REGEX_BSR_ANYCRLF,
1022b5975d6bSopenharmony_ci+                        G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF);
1023b5975d6bSopenharmony_ci   TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0);
1024b5975d6bSopenharmony_ci+  TEST_NEW_CHECK_FLAGS ("(?J)a", 0, 0, G_REGEX_DUPNAMES, 0);
1025b5975d6bSopenharmony_ci   TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
1026b5975d6bSopenharmony_ci   TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0);
1027b5975d6bSopenharmony_ci   TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0);
1028b5975d6bSopenharmony_ci@@ -2559,6 +2566,8 @@ main (int argc, char *argv[])
1029b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, 0, "a\rb\rc", -1, 0, 0, TRUE);
1030b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_LF, 0, "a\rb\rc", -1, 0, 0, FALSE);
1031b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CRLF, 0, "a\rb\rc", -1, 0, 0, FALSE);
1032b5975d6bSopenharmony_ci+  TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\nc", -1, 0, 0, TRUE);
1033b5975d6bSopenharmony_ci+  TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\rc", -1, 0, 0, TRUE);
1034b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\nb\nc", -1, 0, 0, FALSE);
1035b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\nb\nc", -1, 0, 0, TRUE);
1036b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\nb\nc", -1, 0, 0, FALSE);
1037b5975d6bSopenharmony_ci@@ -2568,6 +2577,8 @@ main (int argc, char *argv[])
1038b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\rb\rc", -1, 0, 0, TRUE);
1039b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\rb\rc", -1, 0, 0, FALSE);
1040b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE);
1041b5975d6bSopenharmony_ci+  TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\rc", -1, 0, 0, TRUE);
1042b5975d6bSopenharmony_ci+  TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\nc", -1, 0, 0, TRUE);
1043b5975d6bSopenharmony_ci 
1044b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\nb\nc", -1, 0, 0, TRUE);
1045b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\rb\rc", -1, 0, 0, TRUE);
1046b5975d6bSopenharmony_ci@@ -2577,6 +2588,13 @@ main (int argc, char *argv[])
1047b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\r\nb\r\nc", -1, 0, 0, TRUE);
1048b5975d6bSopenharmony_ci   TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE);
1049b5975d6bSopenharmony_ci 
1050b5975d6bSopenharmony_ci+  /* See https://gitlab.gnome.org/GNOME/glib/-/issues/2729#note_1544130 */
1051b5975d6bSopenharmony_ci+  TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANY, "a", -1, 0, 0, TRUE);
1052b5975d6bSopenharmony_ci+  TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a", -1, 0, 0, TRUE);
1053b5975d6bSopenharmony_ci+  TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE);
1054b5975d6bSopenharmony_ci+  TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a", -1, 0, 0, TRUE);
1055b5975d6bSopenharmony_ci+  TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a", -1, 0, 0, TRUE);
1056b5975d6bSopenharmony_ci+
1057b5975d6bSopenharmony_ci   TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
1058b5975d6bSopenharmony_ci   TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
1059b5975d6bSopenharmony_ci   TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
1060b5975d6bSopenharmony_ci-- 
1061b5975d6bSopenharmony_ci2.33.0
1062b5975d6bSopenharmony_ciGitLab
1063