1b5975d6bSopenharmony_ciFrom d639c4ec009537b743dcd2209184638d9f5d68b9 Mon Sep 17 00:00:00 2001 2b5975d6bSopenharmony_ciFrom: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net> 3b5975d6bSopenharmony_ciDate: Tue, 6 Sep 2022 14:49:10 +0200 4b5975d6bSopenharmony_ciSubject: [PATCH] regex: Do not mix PCRE2 Compile, Match, Newline and BSR flags 5b5975d6bSopenharmony_ci 6b5975d6bSopenharmony_ciAs per the PCRE2 port we still used to try to map the old GRegex flags 7b5975d6bSopenharmony_ci(PCRE1 based) with the new PCRE2 ones, but doing that we were also 8b5975d6bSopenharmony_cimixing flags with enums, leading to unexpected behaviors when trying to 9b5975d6bSopenharmony_ciget new line and BSR options out of bigger flags arrays. 10b5975d6bSopenharmony_ci 11b5975d6bSopenharmony_ciSo, avoid doing any mapping and store the values as native PCRE2 flags 12b5975d6bSopenharmony_ciinternally and converting them back only when requested. 13b5975d6bSopenharmony_ci 14b5975d6bSopenharmony_ciThis fixes some regressions on newline handling. 15b5975d6bSopenharmony_ci 16b5975d6bSopenharmony_ciFixes: #2729 17b5975d6bSopenharmony_ciFixes: #2688 18b5975d6bSopenharmony_ciFixes: GNOME/gtksourceview#278 19b5975d6bSopenharmony_ci--- 20b5975d6bSopenharmony_ci glib/gregex.c | 637 +++++++++++++++++++++++---------------------- 21b5975d6bSopenharmony_ci glib/tests/regex.c | 18 ++ 22b5975d6bSopenharmony_ci 2 files changed, 341 insertions(+), 314 deletions(-) 23b5975d6bSopenharmony_ci 24b5975d6bSopenharmony_cidiff --git a/glib/gregex.c b/glib/gregex.c 25b5975d6bSopenharmony_ciindex a16ea98..95695f7 100644 26b5975d6bSopenharmony_ci--- a/glib/gregex.c 27b5975d6bSopenharmony_ci+++ b/glib/gregex.c 28b5975d6bSopenharmony_ci@@ -3,6 +3,7 @@ 29b5975d6bSopenharmony_ci * Copyright (C) 1999, 2000 Scott Wimer 30b5975d6bSopenharmony_ci * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com> 31b5975d6bSopenharmony_ci * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org> 32b5975d6bSopenharmony_ci+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com> 33b5975d6bSopenharmony_ci * 34b5975d6bSopenharmony_ci * This library is free software; you can redistribute it and/or 35b5975d6bSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 36b5975d6bSopenharmony_ci@@ -108,62 +109,105 @@ 37b5975d6bSopenharmony_ci * library written by Philip Hazel. 38b5975d6bSopenharmony_ci */ 39b5975d6bSopenharmony_ci 40b5975d6bSopenharmony_ci-/* Signifies that flags have already been converted from pcre1 to pcre2. The 41b5975d6bSopenharmony_ci- * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h, 42b5975d6bSopenharmony_ci- * but it is not used in gregex, so we can reuse it for this flag. 43b5975d6bSopenharmony_ci- */ 44b5975d6bSopenharmony_ci-#define G_REGEX_FLAGS_CONVERTED 0x04000000u 45b5975d6bSopenharmony_ci+#define G_REGEX_PCRE_GENERIC_MASK (PCRE2_ANCHORED | \ 46b5975d6bSopenharmony_ci+ PCRE2_NO_UTF_CHECK | \ 47b5975d6bSopenharmony_ci+ PCRE2_ENDANCHORED) 48b5975d6bSopenharmony_ci+ 49b5975d6bSopenharmony_ci /* Mask of all the possible values for GRegexCompileFlags. */ 50b5975d6bSopenharmony_ci-#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \ 51b5975d6bSopenharmony_ci- PCRE2_MULTILINE | \ 52b5975d6bSopenharmony_ci- PCRE2_DOTALL | \ 53b5975d6bSopenharmony_ci- PCRE2_EXTENDED | \ 54b5975d6bSopenharmony_ci- PCRE2_ANCHORED | \ 55b5975d6bSopenharmony_ci- PCRE2_DOLLAR_ENDONLY | \ 56b5975d6bSopenharmony_ci- PCRE2_UNGREEDY | \ 57b5975d6bSopenharmony_ci- PCRE2_UTF | \ 58b5975d6bSopenharmony_ci- PCRE2_NO_AUTO_CAPTURE | \ 59b5975d6bSopenharmony_ci- PCRE2_FIRSTLINE | \ 60b5975d6bSopenharmony_ci- PCRE2_DUPNAMES | \ 61b5975d6bSopenharmony_ci- PCRE2_NEWLINE_CR | \ 62b5975d6bSopenharmony_ci- PCRE2_NEWLINE_LF | \ 63b5975d6bSopenharmony_ci- PCRE2_NEWLINE_CRLF | \ 64b5975d6bSopenharmony_ci- PCRE2_NEWLINE_ANYCRLF | \ 65b5975d6bSopenharmony_ci- PCRE2_BSR_ANYCRLF | \ 66b5975d6bSopenharmony_ci- G_REGEX_FLAGS_CONVERTED) 67b5975d6bSopenharmony_ci- 68b5975d6bSopenharmony_ci-/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ 69b5975d6bSopenharmony_ci-#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) 70b5975d6bSopenharmony_ci-#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \ 71b5975d6bSopenharmony_ci- G_REGEX_FLAGS_CONVERTED) 72b5975d6bSopenharmony_ci+#define G_REGEX_COMPILE_MASK (G_REGEX_DEFAULT | \ 73b5975d6bSopenharmony_ci+ G_REGEX_CASELESS | \ 74b5975d6bSopenharmony_ci+ G_REGEX_MULTILINE | \ 75b5975d6bSopenharmony_ci+ G_REGEX_DOTALL | \ 76b5975d6bSopenharmony_ci+ G_REGEX_EXTENDED | \ 77b5975d6bSopenharmony_ci+ G_REGEX_ANCHORED | \ 78b5975d6bSopenharmony_ci+ G_REGEX_DOLLAR_ENDONLY | \ 79b5975d6bSopenharmony_ci+ G_REGEX_UNGREEDY | \ 80b5975d6bSopenharmony_ci+ G_REGEX_RAW | \ 81b5975d6bSopenharmony_ci+ G_REGEX_NO_AUTO_CAPTURE | \ 82b5975d6bSopenharmony_ci+ G_REGEX_OPTIMIZE | \ 83b5975d6bSopenharmony_ci+ G_REGEX_FIRSTLINE | \ 84b5975d6bSopenharmony_ci+ G_REGEX_DUPNAMES | \ 85b5975d6bSopenharmony_ci+ G_REGEX_NEWLINE_CR | \ 86b5975d6bSopenharmony_ci+ G_REGEX_NEWLINE_LF | \ 87b5975d6bSopenharmony_ci+ G_REGEX_NEWLINE_CRLF | \ 88b5975d6bSopenharmony_ci+ G_REGEX_NEWLINE_ANYCRLF | \ 89b5975d6bSopenharmony_ci+ G_REGEX_BSR_ANYCRLF) 90b5975d6bSopenharmony_ci+ 91b5975d6bSopenharmony_ci+#define G_REGEX_PCRE2_COMPILE_MASK (PCRE2_ALLOW_EMPTY_CLASS | \ 92b5975d6bSopenharmony_ci+ PCRE2_ALT_BSUX | \ 93b5975d6bSopenharmony_ci+ PCRE2_AUTO_CALLOUT | \ 94b5975d6bSopenharmony_ci+ PCRE2_CASELESS | \ 95b5975d6bSopenharmony_ci+ PCRE2_DOLLAR_ENDONLY | \ 96b5975d6bSopenharmony_ci+ PCRE2_DOTALL | \ 97b5975d6bSopenharmony_ci+ PCRE2_DUPNAMES | \ 98b5975d6bSopenharmony_ci+ PCRE2_EXTENDED | \ 99b5975d6bSopenharmony_ci+ PCRE2_FIRSTLINE | \ 100b5975d6bSopenharmony_ci+ PCRE2_MATCH_UNSET_BACKREF | \ 101b5975d6bSopenharmony_ci+ PCRE2_MULTILINE | \ 102b5975d6bSopenharmony_ci+ PCRE2_NEVER_UCP | \ 103b5975d6bSopenharmony_ci+ PCRE2_NEVER_UTF | \ 104b5975d6bSopenharmony_ci+ PCRE2_NO_AUTO_CAPTURE | \ 105b5975d6bSopenharmony_ci+ PCRE2_NO_AUTO_POSSESS | \ 106b5975d6bSopenharmony_ci+ PCRE2_NO_DOTSTAR_ANCHOR | \ 107b5975d6bSopenharmony_ci+ PCRE2_NO_START_OPTIMIZE | \ 108b5975d6bSopenharmony_ci+ PCRE2_UCP | \ 109b5975d6bSopenharmony_ci+ PCRE2_UNGREEDY | \ 110b5975d6bSopenharmony_ci+ PCRE2_UTF | \ 111b5975d6bSopenharmony_ci+ PCRE2_NEVER_BACKSLASH_C | \ 112b5975d6bSopenharmony_ci+ PCRE2_ALT_CIRCUMFLEX | \ 113b5975d6bSopenharmony_ci+ PCRE2_ALT_VERBNAMES | \ 114b5975d6bSopenharmony_ci+ PCRE2_USE_OFFSET_LIMIT | \ 115b5975d6bSopenharmony_ci+ PCRE2_EXTENDED_MORE | \ 116b5975d6bSopenharmony_ci+ PCRE2_LITERAL | \ 117b5975d6bSopenharmony_ci+ PCRE2_MATCH_INVALID_UTF | \ 118b5975d6bSopenharmony_ci+ G_REGEX_PCRE_GENERIC_MASK) 119b5975d6bSopenharmony_ci+ 120b5975d6bSopenharmony_ci+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF) 121b5975d6bSopenharmony_ci 122b5975d6bSopenharmony_ci /* Mask of all the possible values for GRegexMatchFlags. */ 123b5975d6bSopenharmony_ci-#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \ 124b5975d6bSopenharmony_ci- PCRE2_NOTBOL | \ 125b5975d6bSopenharmony_ci- PCRE2_NOTEOL | \ 126b5975d6bSopenharmony_ci- PCRE2_NOTEMPTY | \ 127b5975d6bSopenharmony_ci- PCRE2_NEWLINE_CR | \ 128b5975d6bSopenharmony_ci- PCRE2_NEWLINE_LF | \ 129b5975d6bSopenharmony_ci- PCRE2_NEWLINE_CRLF | \ 130b5975d6bSopenharmony_ci- PCRE2_NEWLINE_ANY | \ 131b5975d6bSopenharmony_ci- PCRE2_NEWLINE_ANYCRLF | \ 132b5975d6bSopenharmony_ci- PCRE2_BSR_ANYCRLF | \ 133b5975d6bSopenharmony_ci- PCRE2_BSR_UNICODE | \ 134b5975d6bSopenharmony_ci- PCRE2_PARTIAL_SOFT | \ 135b5975d6bSopenharmony_ci- PCRE2_PARTIAL_HARD | \ 136b5975d6bSopenharmony_ci- PCRE2_NOTEMPTY_ATSTART | \ 137b5975d6bSopenharmony_ci- G_REGEX_FLAGS_CONVERTED) 138b5975d6bSopenharmony_ci- 139b5975d6bSopenharmony_ci+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_DEFAULT | \ 140b5975d6bSopenharmony_ci+ G_REGEX_MATCH_ANCHORED | \ 141b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NOTBOL | \ 142b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NOTEOL | \ 143b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NOTEMPTY | \ 144b5975d6bSopenharmony_ci+ G_REGEX_MATCH_PARTIAL | \ 145b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_CR | \ 146b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_LF | \ 147b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_CRLF | \ 148b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_ANY | \ 149b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_ANYCRLF | \ 150b5975d6bSopenharmony_ci+ G_REGEX_MATCH_BSR_ANYCRLF | \ 151b5975d6bSopenharmony_ci+ G_REGEX_MATCH_BSR_ANY | \ 152b5975d6bSopenharmony_ci+ G_REGEX_MATCH_PARTIAL_SOFT | \ 153b5975d6bSopenharmony_ci+ G_REGEX_MATCH_PARTIAL_HARD | \ 154b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NOTEMPTY_ATSTART) 155b5975d6bSopenharmony_ci+ 156b5975d6bSopenharmony_ci+#define G_REGEX_PCRE2_MATCH_MASK (PCRE2_NOTBOL |\ 157b5975d6bSopenharmony_ci+ PCRE2_NOTEOL |\ 158b5975d6bSopenharmony_ci+ PCRE2_NOTEMPTY |\ 159b5975d6bSopenharmony_ci+ PCRE2_NOTEMPTY_ATSTART |\ 160b5975d6bSopenharmony_ci+ PCRE2_PARTIAL_SOFT |\ 161b5975d6bSopenharmony_ci+ PCRE2_PARTIAL_HARD |\ 162b5975d6bSopenharmony_ci+ PCRE2_NO_JIT |\ 163b5975d6bSopenharmony_ci+ PCRE2_COPY_MATCHED_SUBJECT |\ 164b5975d6bSopenharmony_ci+ G_REGEX_PCRE_GENERIC_MASK) 165b5975d6bSopenharmony_ci+ 166b5975d6bSopenharmony_ci+/* TODO: Support PCRE2_NEWLINE_NUL */ 167b5975d6bSopenharmony_ci #define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \ 168b5975d6bSopenharmony_ci PCRE2_NEWLINE_LF | \ 169b5975d6bSopenharmony_ci PCRE2_NEWLINE_CRLF | \ 170b5975d6bSopenharmony_ci PCRE2_NEWLINE_ANYCRLF) 171b5975d6bSopenharmony_ci 172b5975d6bSopenharmony_ci-#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \ 173b5975d6bSopenharmony_ci- PCRE2_NEWLINE_LF | \ 174b5975d6bSopenharmony_ci- PCRE2_NEWLINE_CRLF | \ 175b5975d6bSopenharmony_ci- PCRE2_NEWLINE_ANYCRLF | \ 176b5975d6bSopenharmony_ci- PCRE2_NEWLINE_ANY) 177b5975d6bSopenharmony_ci+#define G_REGEX_COMPILE_NEWLINE_MASK (G_REGEX_NEWLINE_CR | \ 178b5975d6bSopenharmony_ci+ G_REGEX_NEWLINE_LF | \ 179b5975d6bSopenharmony_ci+ G_REGEX_NEWLINE_CRLF | \ 180b5975d6bSopenharmony_ci+ G_REGEX_NEWLINE_ANYCRLF) 181b5975d6bSopenharmony_ci+ 182b5975d6bSopenharmony_ci+#define G_REGEX_MATCH_NEWLINE_MASK (G_REGEX_MATCH_NEWLINE_CR | \ 183b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_LF | \ 184b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_CRLF | \ 185b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_ANY | \ 186b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_ANYCRLF) 187b5975d6bSopenharmony_ci 188b5975d6bSopenharmony_ci /* if the string is in UTF-8 use g_utf8_ functions, else use 189b5975d6bSopenharmony_ci * use just +/- 1. */ 190b5975d6bSopenharmony_ci@@ -178,7 +222,7 @@ struct _GMatchInfo 191b5975d6bSopenharmony_ci { 192b5975d6bSopenharmony_ci gint ref_count; /* the ref count (atomic) */ 193b5975d6bSopenharmony_ci GRegex *regex; /* the regex */ 194b5975d6bSopenharmony_ci- GRegexMatchFlags match_opts; /* options used at match time on the regex */ 195b5975d6bSopenharmony_ci+ uint32_t match_opts; /* pcre match options used at match time on the regex */ 196b5975d6bSopenharmony_ci gint matches; /* number of matching sub patterns, guaranteed to be <= (n_subpatterns + 1) if doing a single match (rather than matching all) */ 197b5975d6bSopenharmony_ci gint n_subpatterns; /* total number of sub patterns in the regex */ 198b5975d6bSopenharmony_ci gint pos; /* position in the string where last match left off */ 199b5975d6bSopenharmony_ci@@ -204,9 +248,10 @@ struct _GRegex 200b5975d6bSopenharmony_ci gint ref_count; /* the ref count for the immutable part (atomic) */ 201b5975d6bSopenharmony_ci gchar *pattern; /* the pattern */ 202b5975d6bSopenharmony_ci pcre2_code *pcre_re; /* compiled form of the pattern */ 203b5975d6bSopenharmony_ci- GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */ 204b5975d6bSopenharmony_ci+ uint32_t compile_opts; /* options used at compile time on the pattern, pcre2 values */ 205b5975d6bSopenharmony_ci GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */ 206b5975d6bSopenharmony_ci- GRegexMatchFlags match_opts; /* options used at match time on the regex */ 207b5975d6bSopenharmony_ci+ uint32_t match_opts; /* pcre2 options used at match time on the regex */ 208b5975d6bSopenharmony_ci+ GRegexMatchFlags orig_match_opts; /* options used as default match options, gregex values */ 209b5975d6bSopenharmony_ci gint jit_options; /* options which were enabled for jit compiler */ 210b5975d6bSopenharmony_ci JITStatus jit_status; /* indicates the status of jit compiler for this compiled regex */ 211b5975d6bSopenharmony_ci }; 212b5975d6bSopenharmony_ci@@ -223,197 +268,182 @@ static GList *split_replacement (const gchar *replacement, 213b5975d6bSopenharmony_ci GError **error); 214b5975d6bSopenharmony_ci static void free_interpolation_data (InterpolationData *data); 215b5975d6bSopenharmony_ci 216b5975d6bSopenharmony_ci-static gint 217b5975d6bSopenharmony_ci-map_to_pcre2_compile_flags (gint pcre1_flags) 218b5975d6bSopenharmony_ci+static uint32_t 219b5975d6bSopenharmony_ci+get_pcre2_compile_options (GRegexCompileFlags compile_flags) 220b5975d6bSopenharmony_ci { 221b5975d6bSopenharmony_ci- /* Maps compile flags from pcre1 to pcre2 values 222b5975d6bSopenharmony_ci- */ 223b5975d6bSopenharmony_ci- gint pcre2_flags = G_REGEX_FLAGS_CONVERTED; 224b5975d6bSopenharmony_ci- 225b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_FLAGS_CONVERTED) 226b5975d6bSopenharmony_ci- return pcre1_flags; 227b5975d6bSopenharmony_ci+ /* Maps compile flags to pcre2 values */ 228b5975d6bSopenharmony_ci+ uint32_t pcre2_flags = 0; 229b5975d6bSopenharmony_ci 230b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_CASELESS) 231b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_CASELESS) 232b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_CASELESS; 233b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MULTILINE) 234b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_MULTILINE) 235b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_MULTILINE; 236b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_DOTALL) 237b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_DOTALL) 238b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_DOTALL; 239b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_EXTENDED) 240b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_EXTENDED) 241b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_EXTENDED; 242b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_ANCHORED) 243b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_ANCHORED) 244b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_ANCHORED; 245b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY) 246b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_DOLLAR_ENDONLY) 247b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_DOLLAR_ENDONLY; 248b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_UNGREEDY) 249b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_UNGREEDY) 250b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_UNGREEDY; 251b5975d6bSopenharmony_ci- if (!(pcre1_flags & G_REGEX_RAW)) 252b5975d6bSopenharmony_ci+ if (!(compile_flags & G_REGEX_RAW)) 253b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_UTF; 254b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE) 255b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_NO_AUTO_CAPTURE) 256b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_NO_AUTO_CAPTURE; 257b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_FIRSTLINE) 258b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_FIRSTLINE) 259b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_FIRSTLINE; 260b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_DUPNAMES) 261b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_DUPNAMES) 262b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_DUPNAMES; 263b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_NEWLINE_CR) 264b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_CR; 265b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_NEWLINE_LF) 266b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_LF; 267b5975d6bSopenharmony_ci- /* Check for exact match for a composite flag */ 268b5975d6bSopenharmony_ci- if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF) 269b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_CRLF; 270b5975d6bSopenharmony_ci- /* Check for exact match for a composite flag */ 271b5975d6bSopenharmony_ci- if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF) 272b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_ANYCRLF; 273b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_BSR_ANYCRLF) 274b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_BSR_ANYCRLF; 275b5975d6bSopenharmony_ci- 276b5975d6bSopenharmony_ci- /* these are not available in pcre2, but we use G_REGEX_OPTIMIZE as a special 277b5975d6bSopenharmony_ci- * case to request JIT compilation */ 278b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_OPTIMIZE) 279b5975d6bSopenharmony_ci- pcre2_flags |= 0; 280b5975d6bSopenharmony_ci-G_GNUC_BEGIN_IGNORE_DEPRECATIONS 281b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT) 282b5975d6bSopenharmony_ci- pcre2_flags |= 0; 283b5975d6bSopenharmony_ci-G_GNUC_END_IGNORE_DEPRECATIONS 284b5975d6bSopenharmony_ci- 285b5975d6bSopenharmony_ci- return pcre2_flags; 286b5975d6bSopenharmony_ci+ 287b5975d6bSopenharmony_ci+ return pcre2_flags & G_REGEX_PCRE2_COMPILE_MASK; 288b5975d6bSopenharmony_ci } 289b5975d6bSopenharmony_ci 290b5975d6bSopenharmony_ci-static gint 291b5975d6bSopenharmony_ci-map_to_pcre2_match_flags (gint pcre1_flags) 292b5975d6bSopenharmony_ci+static uint32_t 293b5975d6bSopenharmony_ci+get_pcre2_match_options (GRegexMatchFlags match_flags, 294b5975d6bSopenharmony_ci+ GRegexCompileFlags compile_flags) 295b5975d6bSopenharmony_ci { 296b5975d6bSopenharmony_ci- /* Maps match flags from pcre1 to pcre2 values 297b5975d6bSopenharmony_ci- */ 298b5975d6bSopenharmony_ci- gint pcre2_flags = G_REGEX_FLAGS_CONVERTED; 299b5975d6bSopenharmony_ci- 300b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_FLAGS_CONVERTED) 301b5975d6bSopenharmony_ci- return pcre1_flags; 302b5975d6bSopenharmony_ci+ /* Maps match flags to pcre2 values */ 303b5975d6bSopenharmony_ci+ uint32_t pcre2_flags = 0; 304b5975d6bSopenharmony_ci 305b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_ANCHORED) 306b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_ANCHORED) 307b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_ANCHORED; 308b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_NOTBOL) 309b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_NOTBOL) 310b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_NOTBOL; 311b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_NOTEOL) 312b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_NOTEOL) 313b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_NOTEOL; 314b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY) 315b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_NOTEMPTY) 316b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_NOTEMPTY; 317b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR) 318b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_CR; 319b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF) 320b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_LF; 321b5975d6bSopenharmony_ci- /* Check for exact match for a composite flag */ 322b5975d6bSopenharmony_ci- if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF) 323b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_CRLF; 324b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY) 325b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_ANY; 326b5975d6bSopenharmony_ci- /* Check for exact match for a composite flag */ 327b5975d6bSopenharmony_ci- if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF) 328b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_NEWLINE_ANYCRLF; 329b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF) 330b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_BSR_ANYCRLF; 331b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_BSR_ANY) 332b5975d6bSopenharmony_ci- pcre2_flags |= PCRE2_BSR_UNICODE; 333b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT) 334b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_PARTIAL_SOFT) 335b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_PARTIAL_SOFT; 336b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD) 337b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_PARTIAL_HARD) 338b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_PARTIAL_HARD; 339b5975d6bSopenharmony_ci- if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART) 340b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART) 341b5975d6bSopenharmony_ci pcre2_flags |= PCRE2_NOTEMPTY_ATSTART; 342b5975d6bSopenharmony_ci 343b5975d6bSopenharmony_ci- return pcre2_flags; 344b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_RAW) 345b5975d6bSopenharmony_ci+ pcre2_flags |= PCRE2_NO_UTF_CHECK; 346b5975d6bSopenharmony_ci+ 347b5975d6bSopenharmony_ci+ return pcre2_flags & G_REGEX_PCRE2_MATCH_MASK; 348b5975d6bSopenharmony_ci } 349b5975d6bSopenharmony_ci 350b5975d6bSopenharmony_ci-static gint 351b5975d6bSopenharmony_ci-map_to_pcre1_compile_flags (gint pcre2_flags) 352b5975d6bSopenharmony_ci+static GRegexCompileFlags 353b5975d6bSopenharmony_ci+g_regex_compile_flags_from_pcre2 (uint32_t pcre2_flags) 354b5975d6bSopenharmony_ci { 355b5975d6bSopenharmony_ci- /* Maps compile flags from pcre2 to pcre1 values 356b5975d6bSopenharmony_ci- */ 357b5975d6bSopenharmony_ci- gint pcre1_flags = 0; 358b5975d6bSopenharmony_ci- 359b5975d6bSopenharmony_ci- if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED)) 360b5975d6bSopenharmony_ci- return pcre2_flags; 361b5975d6bSopenharmony_ci+ GRegexCompileFlags compile_flags = G_REGEX_DEFAULT; 362b5975d6bSopenharmony_ci 363b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_CASELESS) 364b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_CASELESS; 365b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_CASELESS; 366b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_MULTILINE) 367b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MULTILINE; 368b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_MULTILINE; 369b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_DOTALL) 370b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_DOTALL; 371b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_DOTALL; 372b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_EXTENDED) 373b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_EXTENDED; 374b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_EXTENDED; 375b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_ANCHORED) 376b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_ANCHORED; 377b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_ANCHORED; 378b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_DOLLAR_ENDONLY) 379b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_DOLLAR_ENDONLY; 380b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_DOLLAR_ENDONLY; 381b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_UNGREEDY) 382b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_UNGREEDY; 383b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_UNGREEDY; 384b5975d6bSopenharmony_ci if (!(pcre2_flags & PCRE2_UTF)) 385b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_RAW; 386b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_RAW; 387b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE) 388b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE; 389b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_NO_AUTO_CAPTURE; 390b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_FIRSTLINE) 391b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_FIRSTLINE; 392b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_FIRSTLINE; 393b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_DUPNAMES) 394b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_DUPNAMES; 395b5975d6bSopenharmony_ci- if (pcre2_flags & PCRE2_NEWLINE_CR) 396b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_NEWLINE_CR; 397b5975d6bSopenharmony_ci- if (pcre2_flags & PCRE2_NEWLINE_LF) 398b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_NEWLINE_LF; 399b5975d6bSopenharmony_ci- /* Check for exact match for a composite flag */ 400b5975d6bSopenharmony_ci- if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF) 401b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_NEWLINE_CRLF; 402b5975d6bSopenharmony_ci- /* Check for exact match for a composite flag */ 403b5975d6bSopenharmony_ci- if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF) 404b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF; 405b5975d6bSopenharmony_ci- if (pcre2_flags & PCRE2_BSR_ANYCRLF) 406b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_BSR_ANYCRLF; 407b5975d6bSopenharmony_ci- 408b5975d6bSopenharmony_ci- return pcre1_flags; 409b5975d6bSopenharmony_ci+ compile_flags |= G_REGEX_DUPNAMES; 410b5975d6bSopenharmony_ci+ 411b5975d6bSopenharmony_ci+ return compile_flags & G_REGEX_COMPILE_MASK; 412b5975d6bSopenharmony_ci } 413b5975d6bSopenharmony_ci 414b5975d6bSopenharmony_ci-static gint 415b5975d6bSopenharmony_ci-map_to_pcre1_match_flags (gint pcre2_flags) 416b5975d6bSopenharmony_ci+static GRegexMatchFlags 417b5975d6bSopenharmony_ci+g_regex_match_flags_from_pcre2 (uint32_t pcre2_flags) 418b5975d6bSopenharmony_ci { 419b5975d6bSopenharmony_ci- /* Maps match flags from pcre2 to pcre1 values 420b5975d6bSopenharmony_ci- */ 421b5975d6bSopenharmony_ci- gint pcre1_flags = 0; 422b5975d6bSopenharmony_ci- 423b5975d6bSopenharmony_ci- if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED)) 424b5975d6bSopenharmony_ci- return pcre2_flags; 425b5975d6bSopenharmony_ci+ GRegexMatchFlags match_flags = G_REGEX_MATCH_DEFAULT; 426b5975d6bSopenharmony_ci 427b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_ANCHORED) 428b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_ANCHORED; 429b5975d6bSopenharmony_ci+ match_flags |= G_REGEX_MATCH_ANCHORED; 430b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_NOTBOL) 431b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NOTBOL; 432b5975d6bSopenharmony_ci+ match_flags |= G_REGEX_MATCH_NOTBOL; 433b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_NOTEOL) 434b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NOTEOL; 435b5975d6bSopenharmony_ci+ match_flags |= G_REGEX_MATCH_NOTEOL; 436b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_NOTEMPTY) 437b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NOTEMPTY; 438b5975d6bSopenharmony_ci- if (pcre2_flags & PCRE2_NEWLINE_CR) 439b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR; 440b5975d6bSopenharmony_ci- if (pcre2_flags & PCRE2_NEWLINE_LF) 441b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF; 442b5975d6bSopenharmony_ci- /* Check for exact match for a composite flag */ 443b5975d6bSopenharmony_ci- if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF) 444b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF; 445b5975d6bSopenharmony_ci- if (pcre2_flags & PCRE2_NEWLINE_ANY) 446b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY; 447b5975d6bSopenharmony_ci- /* Check for exact match for a composite flag */ 448b5975d6bSopenharmony_ci- if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF) 449b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF; 450b5975d6bSopenharmony_ci- if (pcre2_flags & PCRE2_BSR_ANYCRLF) 451b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF; 452b5975d6bSopenharmony_ci- if (pcre2_flags & PCRE2_BSR_UNICODE) 453b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_BSR_ANY; 454b5975d6bSopenharmony_ci+ match_flags |= G_REGEX_MATCH_NOTEMPTY; 455b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_PARTIAL_SOFT) 456b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT; 457b5975d6bSopenharmony_ci+ match_flags |= G_REGEX_MATCH_PARTIAL_SOFT; 458b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_PARTIAL_HARD) 459b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD; 460b5975d6bSopenharmony_ci+ match_flags |= G_REGEX_MATCH_PARTIAL_HARD; 461b5975d6bSopenharmony_ci if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART) 462b5975d6bSopenharmony_ci- pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART; 463b5975d6bSopenharmony_ci+ match_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART; 464b5975d6bSopenharmony_ci+ 465b5975d6bSopenharmony_ci+ return (match_flags & G_REGEX_MATCH_MASK); 466b5975d6bSopenharmony_ci+} 467b5975d6bSopenharmony_ci+ 468b5975d6bSopenharmony_ci+static uint32_t 469b5975d6bSopenharmony_ci+get_pcre2_newline_compile_options (GRegexCompileFlags compile_flags) 470b5975d6bSopenharmony_ci+{ 471b5975d6bSopenharmony_ci+ compile_flags &= G_REGEX_COMPILE_NEWLINE_MASK; 472b5975d6bSopenharmony_ci+ 473b5975d6bSopenharmony_ci+ switch (compile_flags) 474b5975d6bSopenharmony_ci+ { 475b5975d6bSopenharmony_ci+ case G_REGEX_NEWLINE_CR: 476b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_CR; 477b5975d6bSopenharmony_ci+ case G_REGEX_NEWLINE_LF: 478b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_LF; 479b5975d6bSopenharmony_ci+ case G_REGEX_NEWLINE_CRLF: 480b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_CRLF; 481b5975d6bSopenharmony_ci+ case G_REGEX_NEWLINE_ANYCRLF: 482b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_ANYCRLF; 483b5975d6bSopenharmony_ci+ default: 484b5975d6bSopenharmony_ci+ if (compile_flags != 0) 485b5975d6bSopenharmony_ci+ return 0; 486b5975d6bSopenharmony_ci+ 487b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_ANY; 488b5975d6bSopenharmony_ci+ } 489b5975d6bSopenharmony_ci+} 490b5975d6bSopenharmony_ci+ 491b5975d6bSopenharmony_ci+static uint32_t 492b5975d6bSopenharmony_ci+get_pcre2_newline_match_options (GRegexMatchFlags match_flags) 493b5975d6bSopenharmony_ci+{ 494b5975d6bSopenharmony_ci+ switch (match_flags & G_REGEX_MATCH_NEWLINE_MASK) 495b5975d6bSopenharmony_ci+ { 496b5975d6bSopenharmony_ci+ case G_REGEX_MATCH_NEWLINE_CR: 497b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_CR; 498b5975d6bSopenharmony_ci+ case G_REGEX_MATCH_NEWLINE_LF: 499b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_LF; 500b5975d6bSopenharmony_ci+ case G_REGEX_MATCH_NEWLINE_CRLF: 501b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_CRLF; 502b5975d6bSopenharmony_ci+ case G_REGEX_MATCH_NEWLINE_ANY: 503b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_ANY; 504b5975d6bSopenharmony_ci+ case G_REGEX_MATCH_NEWLINE_ANYCRLF: 505b5975d6bSopenharmony_ci+ return PCRE2_NEWLINE_ANYCRLF; 506b5975d6bSopenharmony_ci+ default: 507b5975d6bSopenharmony_ci+ return 0; 508b5975d6bSopenharmony_ci+ } 509b5975d6bSopenharmony_ci+} 510b5975d6bSopenharmony_ci+ 511b5975d6bSopenharmony_ci+static uint32_t 512b5975d6bSopenharmony_ci+get_pcre2_bsr_compile_options (GRegexCompileFlags compile_flags) 513b5975d6bSopenharmony_ci+{ 514b5975d6bSopenharmony_ci+ if (compile_flags & G_REGEX_BSR_ANYCRLF) 515b5975d6bSopenharmony_ci+ return PCRE2_BSR_ANYCRLF; 516b5975d6bSopenharmony_ci 517b5975d6bSopenharmony_ci- return pcre1_flags; 518b5975d6bSopenharmony_ci+ return PCRE2_BSR_UNICODE; 519b5975d6bSopenharmony_ci+} 520b5975d6bSopenharmony_ci+ 521b5975d6bSopenharmony_ci+static uint32_t 522b5975d6bSopenharmony_ci+get_pcre2_bsr_match_options (GRegexMatchFlags match_flags) 523b5975d6bSopenharmony_ci+{ 524b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_BSR_ANYCRLF) 525b5975d6bSopenharmony_ci+ return PCRE2_BSR_ANYCRLF; 526b5975d6bSopenharmony_ci+ 527b5975d6bSopenharmony_ci+ if (match_flags & G_REGEX_MATCH_BSR_ANY) 528b5975d6bSopenharmony_ci+ return PCRE2_BSR_UNICODE; 529b5975d6bSopenharmony_ci+ 530b5975d6bSopenharmony_ci+ return 0; 531b5975d6bSopenharmony_ci } 532b5975d6bSopenharmony_ci 533b5975d6bSopenharmony_ci static const gchar * 534b5975d6bSopenharmony_ci@@ -742,12 +772,12 @@ translate_compile_error (gint *errcode, const gchar **errmsg) 535b5975d6bSopenharmony_ci /* GMatchInfo */ 536b5975d6bSopenharmony_ci 537b5975d6bSopenharmony_ci static GMatchInfo * 538b5975d6bSopenharmony_ci-match_info_new (const GRegex *regex, 539b5975d6bSopenharmony_ci- const gchar *string, 540b5975d6bSopenharmony_ci- gint string_len, 541b5975d6bSopenharmony_ci- gint start_position, 542b5975d6bSopenharmony_ci- gint match_options, 543b5975d6bSopenharmony_ci- gboolean is_dfa) 544b5975d6bSopenharmony_ci+match_info_new (const GRegex *regex, 545b5975d6bSopenharmony_ci+ const gchar *string, 546b5975d6bSopenharmony_ci+ gint string_len, 547b5975d6bSopenharmony_ci+ gint start_position, 548b5975d6bSopenharmony_ci+ GRegexMatchFlags match_options, 549b5975d6bSopenharmony_ci+ gboolean is_dfa) 550b5975d6bSopenharmony_ci { 551b5975d6bSopenharmony_ci GMatchInfo *match_info; 552b5975d6bSopenharmony_ci 553b5975d6bSopenharmony_ci@@ -761,7 +791,8 @@ match_info_new (const GRegex *regex, 554b5975d6bSopenharmony_ci match_info->string_len = string_len; 555b5975d6bSopenharmony_ci match_info->matches = PCRE2_ERROR_NOMATCH; 556b5975d6bSopenharmony_ci match_info->pos = start_position; 557b5975d6bSopenharmony_ci- match_info->match_opts = match_options; 558b5975d6bSopenharmony_ci+ match_info->match_opts = 559b5975d6bSopenharmony_ci+ get_pcre2_match_options (match_options, regex->orig_compile_opts); 560b5975d6bSopenharmony_ci 561b5975d6bSopenharmony_ci pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, 562b5975d6bSopenharmony_ci &match_info->n_subpatterns); 563b5975d6bSopenharmony_ci@@ -822,8 +853,8 @@ recalc_match_offsets (GMatchInfo *match_info, 564b5975d6bSopenharmony_ci } 565b5975d6bSopenharmony_ci 566b5975d6bSopenharmony_ci static void 567b5975d6bSopenharmony_ci-enable_jit_with_match_options (GRegex *regex, 568b5975d6bSopenharmony_ci- GRegexMatchFlags match_options) 569b5975d6bSopenharmony_ci+enable_jit_with_match_options (GRegex *regex, 570b5975d6bSopenharmony_ci+ uint32_t match_options) 571b5975d6bSopenharmony_ci { 572b5975d6bSopenharmony_ci gint old_jit_options, new_jit_options, retval; 573b5975d6bSopenharmony_ci 574b5975d6bSopenharmony_ci@@ -1009,7 +1040,7 @@ g_match_info_next (GMatchInfo *match_info, 575b5975d6bSopenharmony_ci return FALSE; 576b5975d6bSopenharmony_ci } 577b5975d6bSopenharmony_ci 578b5975d6bSopenharmony_ci- opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts); 579b5975d6bSopenharmony_ci+ opts = match_info->regex->match_opts | match_info->match_opts; 580b5975d6bSopenharmony_ci 581b5975d6bSopenharmony_ci enable_jit_with_match_options (match_info->regex, opts); 582b5975d6bSopenharmony_ci if (match_info->regex->jit_status == JIT_STATUS_ENABLED) 583b5975d6bSopenharmony_ci@@ -1018,7 +1049,7 @@ g_match_info_next (GMatchInfo *match_info, 584b5975d6bSopenharmony_ci (PCRE2_SPTR8) match_info->string, 585b5975d6bSopenharmony_ci match_info->string_len, 586b5975d6bSopenharmony_ci match_info->pos, 587b5975d6bSopenharmony_ci- opts & ~G_REGEX_FLAGS_CONVERTED, 588b5975d6bSopenharmony_ci+ opts, 589b5975d6bSopenharmony_ci match_info->match_data, 590b5975d6bSopenharmony_ci match_info->match_context); 591b5975d6bSopenharmony_ci } 592b5975d6bSopenharmony_ci@@ -1028,7 +1059,7 @@ g_match_info_next (GMatchInfo *match_info, 593b5975d6bSopenharmony_ci (PCRE2_SPTR8) match_info->string, 594b5975d6bSopenharmony_ci match_info->string_len, 595b5975d6bSopenharmony_ci match_info->pos, 596b5975d6bSopenharmony_ci- opts & ~G_REGEX_FLAGS_CONVERTED, 597b5975d6bSopenharmony_ci+ opts, 598b5975d6bSopenharmony_ci match_info->match_data, 599b5975d6bSopenharmony_ci match_info->match_context); 600b5975d6bSopenharmony_ci } 601b5975d6bSopenharmony_ci@@ -1563,14 +1594,14 @@ g_regex_unref (GRegex *regex) 602b5975d6bSopenharmony_ci } 603b5975d6bSopenharmony_ci } 604b5975d6bSopenharmony_ci 605b5975d6bSopenharmony_ci-/* 606b5975d6bSopenharmony_ci- * @match_options: (inout) (optional): 607b5975d6bSopenharmony_ci- */ 608b5975d6bSopenharmony_ci-static pcre2_code *regex_compile (const gchar *pattern, 609b5975d6bSopenharmony_ci- GRegexCompileFlags compile_options, 610b5975d6bSopenharmony_ci- GRegexCompileFlags *compile_options_out, 611b5975d6bSopenharmony_ci- GRegexMatchFlags *match_options, 612b5975d6bSopenharmony_ci- GError **error); 613b5975d6bSopenharmony_ci+static pcre2_code * regex_compile (const gchar *pattern, 614b5975d6bSopenharmony_ci+ uint32_t compile_options, 615b5975d6bSopenharmony_ci+ uint32_t newline_options, 616b5975d6bSopenharmony_ci+ uint32_t bsr_options, 617b5975d6bSopenharmony_ci+ GError **error); 618b5975d6bSopenharmony_ci+ 619b5975d6bSopenharmony_ci+static uint32_t get_pcre2_inline_compile_options (pcre2_code *re, 620b5975d6bSopenharmony_ci+ uint32_t compile_options); 621b5975d6bSopenharmony_ci 622b5975d6bSopenharmony_ci /** 623b5975d6bSopenharmony_ci * g_regex_new: 624b5975d6bSopenharmony_ci@@ -1596,11 +1627,10 @@ g_regex_new (const gchar *pattern, 625b5975d6bSopenharmony_ci GRegex *regex; 626b5975d6bSopenharmony_ci pcre2_code *re; 627b5975d6bSopenharmony_ci static gsize initialised = 0; 628b5975d6bSopenharmony_ci- GRegexCompileFlags orig_compile_opts; 629b5975d6bSopenharmony_ci- 630b5975d6bSopenharmony_ci- orig_compile_opts = compile_options; 631b5975d6bSopenharmony_ci- compile_options = map_to_pcre2_compile_flags (compile_options); 632b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 633b5975d6bSopenharmony_ci+ uint32_t pcre_compile_options; 634b5975d6bSopenharmony_ci+ uint32_t pcre_match_options; 635b5975d6bSopenharmony_ci+ uint32_t newline_options; 636b5975d6bSopenharmony_ci+ uint32_t bsr_options; 637b5975d6bSopenharmony_ci 638b5975d6bSopenharmony_ci g_return_val_if_fail (pattern != NULL, NULL); 639b5975d6bSopenharmony_ci g_return_val_if_fail (error == NULL || *error == NULL, NULL); 640b5975d6bSopenharmony_ci@@ -1618,113 +1648,97 @@ g_regex_new (const gchar *pattern, 641b5975d6bSopenharmony_ci g_once_init_leave (&initialised, supports_utf8 ? 1 : 2); 642b5975d6bSopenharmony_ci } 643b5975d6bSopenharmony_ci 644b5975d6bSopenharmony_ci- if (G_UNLIKELY (initialised != 1)) 645b5975d6bSopenharmony_ci+ if (G_UNLIKELY (initialised != 1)) 646b5975d6bSopenharmony_ci { 647b5975d6bSopenharmony_ci g_set_error_literal (error, G_REGEX_ERROR, G_REGEX_ERROR_COMPILE, 648b5975d6bSopenharmony_ci _("PCRE library is compiled with incompatible options")); 649b5975d6bSopenharmony_ci return NULL; 650b5975d6bSopenharmony_ci } 651b5975d6bSopenharmony_ci 652b5975d6bSopenharmony_ci- switch (compile_options & G_REGEX_NEWLINE_MASK) 653b5975d6bSopenharmony_ci+ pcre_compile_options = get_pcre2_compile_options (compile_options); 654b5975d6bSopenharmony_ci+ pcre_match_options = get_pcre2_match_options (match_options, compile_options); 655b5975d6bSopenharmony_ci+ 656b5975d6bSopenharmony_ci+ newline_options = get_pcre2_newline_match_options (match_options); 657b5975d6bSopenharmony_ci+ if (newline_options == 0) 658b5975d6bSopenharmony_ci+ newline_options = get_pcre2_newline_compile_options (compile_options); 659b5975d6bSopenharmony_ci+ 660b5975d6bSopenharmony_ci+ if (newline_options == 0) 661b5975d6bSopenharmony_ci { 662b5975d6bSopenharmony_ci- case 0: /* PCRE2_NEWLINE_ANY */ 663b5975d6bSopenharmony_ci- case PCRE2_NEWLINE_CR: 664b5975d6bSopenharmony_ci- case PCRE2_NEWLINE_LF: 665b5975d6bSopenharmony_ci- case PCRE2_NEWLINE_CRLF: 666b5975d6bSopenharmony_ci- case PCRE2_NEWLINE_ANYCRLF: 667b5975d6bSopenharmony_ci- break; 668b5975d6bSopenharmony_ci- default: 669b5975d6bSopenharmony_ci g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS, 670b5975d6bSopenharmony_ci "Invalid newline flags"); 671b5975d6bSopenharmony_ci return NULL; 672b5975d6bSopenharmony_ci } 673b5975d6bSopenharmony_ci 674b5975d6bSopenharmony_ci- re = regex_compile (pattern, compile_options, &compile_options, 675b5975d6bSopenharmony_ci- &match_options, error); 676b5975d6bSopenharmony_ci+ bsr_options = get_pcre2_bsr_match_options (match_options); 677b5975d6bSopenharmony_ci+ if (!bsr_options) 678b5975d6bSopenharmony_ci+ bsr_options = get_pcre2_bsr_compile_options (compile_options); 679b5975d6bSopenharmony_ci+ 680b5975d6bSopenharmony_ci+ re = regex_compile (pattern, pcre_compile_options, 681b5975d6bSopenharmony_ci+ newline_options, bsr_options, error); 682b5975d6bSopenharmony_ci if (re == NULL) 683b5975d6bSopenharmony_ci return NULL; 684b5975d6bSopenharmony_ci 685b5975d6bSopenharmony_ci+ pcre_compile_options |= 686b5975d6bSopenharmony_ci+ get_pcre2_inline_compile_options (re, pcre_compile_options); 687b5975d6bSopenharmony_ci+ 688b5975d6bSopenharmony_ci regex = g_new0 (GRegex, 1); 689b5975d6bSopenharmony_ci regex->ref_count = 1; 690b5975d6bSopenharmony_ci regex->pattern = g_strdup (pattern); 691b5975d6bSopenharmony_ci regex->pcre_re = re; 692b5975d6bSopenharmony_ci- regex->compile_opts = compile_options; 693b5975d6bSopenharmony_ci- regex->orig_compile_opts = orig_compile_opts; 694b5975d6bSopenharmony_ci- regex->match_opts = match_options; 695b5975d6bSopenharmony_ci+ regex->compile_opts = pcre_compile_options; 696b5975d6bSopenharmony_ci+ regex->orig_compile_opts = compile_options; 697b5975d6bSopenharmony_ci+ regex->match_opts = pcre_match_options; 698b5975d6bSopenharmony_ci+ regex->orig_match_opts = match_options; 699b5975d6bSopenharmony_ci enable_jit_with_match_options (regex, regex->match_opts); 700b5975d6bSopenharmony_ci 701b5975d6bSopenharmony_ci return regex; 702b5975d6bSopenharmony_ci } 703b5975d6bSopenharmony_ci 704b5975d6bSopenharmony_ci-static gint 705b5975d6bSopenharmony_ci-extract_newline_options (const GRegexCompileFlags compile_options, 706b5975d6bSopenharmony_ci- const GRegexMatchFlags *match_options) 707b5975d6bSopenharmony_ci-{ 708b5975d6bSopenharmony_ci- gint newline_options = PCRE2_NEWLINE_ANY; 709b5975d6bSopenharmony_ci- 710b5975d6bSopenharmony_ci- if (compile_options & G_REGEX_NEWLINE_MASK) 711b5975d6bSopenharmony_ci- newline_options = compile_options & G_REGEX_NEWLINE_MASK; 712b5975d6bSopenharmony_ci- if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK) 713b5975d6bSopenharmony_ci- newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK; 714b5975d6bSopenharmony_ci- 715b5975d6bSopenharmony_ci- return newline_options; 716b5975d6bSopenharmony_ci-} 717b5975d6bSopenharmony_ci- 718b5975d6bSopenharmony_ci-static gint 719b5975d6bSopenharmony_ci-extract_bsr_options (const GRegexCompileFlags compile_options, 720b5975d6bSopenharmony_ci- const GRegexMatchFlags *match_options) 721b5975d6bSopenharmony_ci-{ 722b5975d6bSopenharmony_ci- gint bsr_options = PCRE2_BSR_UNICODE; 723b5975d6bSopenharmony_ci- 724b5975d6bSopenharmony_ci- if (compile_options & PCRE2_BSR_ANYCRLF) 725b5975d6bSopenharmony_ci- bsr_options = PCRE2_BSR_ANYCRLF; 726b5975d6bSopenharmony_ci- if (match_options && *match_options & PCRE2_BSR_ANYCRLF) 727b5975d6bSopenharmony_ci- bsr_options = PCRE2_BSR_ANYCRLF; 728b5975d6bSopenharmony_ci- if (match_options && *match_options & PCRE2_BSR_UNICODE) 729b5975d6bSopenharmony_ci- bsr_options = PCRE2_BSR_UNICODE; 730b5975d6bSopenharmony_ci- 731b5975d6bSopenharmony_ci- return bsr_options; 732b5975d6bSopenharmony_ci-} 733b5975d6bSopenharmony_ci- 734b5975d6bSopenharmony_ci static pcre2_code * 735b5975d6bSopenharmony_ci-regex_compile (const gchar *pattern, 736b5975d6bSopenharmony_ci- GRegexCompileFlags compile_options, 737b5975d6bSopenharmony_ci- GRegexCompileFlags *compile_options_out, 738b5975d6bSopenharmony_ci- GRegexMatchFlags *match_options, 739b5975d6bSopenharmony_ci- GError **error) 740b5975d6bSopenharmony_ci+regex_compile (const gchar *pattern, 741b5975d6bSopenharmony_ci+ uint32_t compile_options, 742b5975d6bSopenharmony_ci+ uint32_t newline_options, 743b5975d6bSopenharmony_ci+ uint32_t bsr_options, 744b5975d6bSopenharmony_ci+ GError **error) 745b5975d6bSopenharmony_ci { 746b5975d6bSopenharmony_ci pcre2_code *re; 747b5975d6bSopenharmony_ci pcre2_compile_context *context; 748b5975d6bSopenharmony_ci const gchar *errmsg; 749b5975d6bSopenharmony_ci PCRE2_SIZE erroffset; 750b5975d6bSopenharmony_ci gint errcode; 751b5975d6bSopenharmony_ci- GRegexCompileFlags nonpcre_compile_options; 752b5975d6bSopenharmony_ci- uint32_t pcre_compile_options; 753b5975d6bSopenharmony_ci- 754b5975d6bSopenharmony_ci- nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; 755b5975d6bSopenharmony_ci 756b5975d6bSopenharmony_ci context = pcre2_compile_context_create (NULL); 757b5975d6bSopenharmony_ci 758b5975d6bSopenharmony_ci /* set newline options */ 759b5975d6bSopenharmony_ci- pcre2_set_newline (context, extract_newline_options (compile_options, match_options)); 760b5975d6bSopenharmony_ci+ if (pcre2_set_newline (context, newline_options) != 0) 761b5975d6bSopenharmony_ci+ { 762b5975d6bSopenharmony_ci+ g_set_error (error, G_REGEX_ERROR, 763b5975d6bSopenharmony_ci+ G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS, 764b5975d6bSopenharmony_ci+ "Invalid newline flags"); 765b5975d6bSopenharmony_ci+ pcre2_compile_context_free (context); 766b5975d6bSopenharmony_ci+ return NULL; 767b5975d6bSopenharmony_ci+ } 768b5975d6bSopenharmony_ci 769b5975d6bSopenharmony_ci /* set bsr options */ 770b5975d6bSopenharmony_ci- pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options)); 771b5975d6bSopenharmony_ci+ if (pcre2_set_bsr (context, bsr_options) != 0) 772b5975d6bSopenharmony_ci+ { 773b5975d6bSopenharmony_ci+ g_set_error (error, G_REGEX_ERROR, 774b5975d6bSopenharmony_ci+ G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS, 775b5975d6bSopenharmony_ci+ "Invalid BSR flags"); 776b5975d6bSopenharmony_ci+ pcre2_compile_context_free (context); 777b5975d6bSopenharmony_ci+ return NULL; 778b5975d6bSopenharmony_ci+ } 779b5975d6bSopenharmony_ci 780b5975d6bSopenharmony_ci /* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */ 781b5975d6bSopenharmony_ci if (compile_options & PCRE2_UTF) 782b5975d6bSopenharmony_ci- { 783b5975d6bSopenharmony_ci- compile_options |= PCRE2_NO_UTF_CHECK; 784b5975d6bSopenharmony_ci- if (match_options != NULL) 785b5975d6bSopenharmony_ci- *match_options |= PCRE2_NO_UTF_CHECK; 786b5975d6bSopenharmony_ci- } 787b5975d6bSopenharmony_ci+ compile_options |= PCRE2_NO_UTF_CHECK; 788b5975d6bSopenharmony_ci 789b5975d6bSopenharmony_ci compile_options |= PCRE2_UCP; 790b5975d6bSopenharmony_ci 791b5975d6bSopenharmony_ci /* compile the pattern */ 792b5975d6bSopenharmony_ci re = pcre2_compile ((PCRE2_SPTR8) pattern, 793b5975d6bSopenharmony_ci PCRE2_ZERO_TERMINATED, 794b5975d6bSopenharmony_ci- compile_options & ~G_REGEX_FLAGS_CONVERTED, 795b5975d6bSopenharmony_ci+ compile_options, 796b5975d6bSopenharmony_ci &errcode, 797b5975d6bSopenharmony_ci &erroffset, 798b5975d6bSopenharmony_ci context); 799b5975d6bSopenharmony_ci@@ -1755,16 +1769,22 @@ regex_compile (const gchar *pattern, 800b5975d6bSopenharmony_ci return NULL; 801b5975d6bSopenharmony_ci } 802b5975d6bSopenharmony_ci 803b5975d6bSopenharmony_ci+ return re; 804b5975d6bSopenharmony_ci+} 805b5975d6bSopenharmony_ci+ 806b5975d6bSopenharmony_ci+static uint32_t 807b5975d6bSopenharmony_ci+get_pcre2_inline_compile_options (pcre2_code *re, 808b5975d6bSopenharmony_ci+ uint32_t compile_options) 809b5975d6bSopenharmony_ci+{ 810b5975d6bSopenharmony_ci+ uint32_t pcre_compile_options; 811b5975d6bSopenharmony_ci+ uint32_t nonpcre_compile_options; 812b5975d6bSopenharmony_ci+ 813b5975d6bSopenharmony_ci /* For options set at the beginning of the pattern, pcre puts them into 814b5975d6bSopenharmony_ci * compile options, e.g. "(?i)foo" will make the pcre structure store 815b5975d6bSopenharmony_ci * PCRE2_CASELESS even though it wasn't explicitly given for compilation. */ 816b5975d6bSopenharmony_ci+ nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; 817b5975d6bSopenharmony_ci pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options); 818b5975d6bSopenharmony_ci- compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK; 819b5975d6bSopenharmony_ci- 820b5975d6bSopenharmony_ci- /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */ 821b5975d6bSopenharmony_ci- if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF) 822b5975d6bSopenharmony_ci- compile_options &= ~PCRE2_NEWLINE_ANY; 823b5975d6bSopenharmony_ci- 824b5975d6bSopenharmony_ci+ compile_options = pcre_compile_options & G_REGEX_PCRE2_COMPILE_MASK; 825b5975d6bSopenharmony_ci compile_options |= nonpcre_compile_options; 826b5975d6bSopenharmony_ci 827b5975d6bSopenharmony_ci if (!(compile_options & PCRE2_DUPNAMES)) 828b5975d6bSopenharmony_ci@@ -1775,10 +1795,7 @@ regex_compile (const gchar *pattern, 829b5975d6bSopenharmony_ci compile_options |= PCRE2_DUPNAMES; 830b5975d6bSopenharmony_ci } 831b5975d6bSopenharmony_ci 832b5975d6bSopenharmony_ci- if (compile_options_out != 0) 833b5975d6bSopenharmony_ci- *compile_options_out = compile_options; 834b5975d6bSopenharmony_ci- 835b5975d6bSopenharmony_ci- return re; 836b5975d6bSopenharmony_ci+ return compile_options; 837b5975d6bSopenharmony_ci } 838b5975d6bSopenharmony_ci 839b5975d6bSopenharmony_ci /** 840b5975d6bSopenharmony_ci@@ -1940,7 +1957,7 @@ g_regex_get_compile_flags (const GRegex *regex) 841b5975d6bSopenharmony_ci break; 842b5975d6bSopenharmony_ci } 843b5975d6bSopenharmony_ci 844b5975d6bSopenharmony_ci- return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags; 845b5975d6bSopenharmony_ci+ return g_regex_compile_flags_from_pcre2 (regex->compile_opts) | extra_flags; 846b5975d6bSopenharmony_ci } 847b5975d6bSopenharmony_ci 848b5975d6bSopenharmony_ci /** 849b5975d6bSopenharmony_ci@@ -1956,9 +1973,15 @@ g_regex_get_compile_flags (const GRegex *regex) 850b5975d6bSopenharmony_ci GRegexMatchFlags 851b5975d6bSopenharmony_ci g_regex_get_match_flags (const GRegex *regex) 852b5975d6bSopenharmony_ci { 853b5975d6bSopenharmony_ci+ uint32_t flags; 854b5975d6bSopenharmony_ci+ 855b5975d6bSopenharmony_ci g_return_val_if_fail (regex != NULL, 0); 856b5975d6bSopenharmony_ci 857b5975d6bSopenharmony_ci- return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK); 858b5975d6bSopenharmony_ci+ flags = g_regex_match_flags_from_pcre2 (regex->match_opts); 859b5975d6bSopenharmony_ci+ flags |= (regex->orig_match_opts & G_REGEX_MATCH_NEWLINE_MASK); 860b5975d6bSopenharmony_ci+ flags |= (regex->orig_match_opts & (G_REGEX_MATCH_BSR_ANY | G_REGEX_MATCH_BSR_ANYCRLF)); 861b5975d6bSopenharmony_ci+ 862b5975d6bSopenharmony_ci+ return flags; 863b5975d6bSopenharmony_ci } 864b5975d6bSopenharmony_ci 865b5975d6bSopenharmony_ci /** 866b5975d6bSopenharmony_ci@@ -1992,9 +2015,6 @@ g_regex_match_simple (const gchar *pattern, 867b5975d6bSopenharmony_ci GRegex *regex; 868b5975d6bSopenharmony_ci gboolean result; 869b5975d6bSopenharmony_ci 870b5975d6bSopenharmony_ci- compile_options = map_to_pcre2_compile_flags (compile_options); 871b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 872b5975d6bSopenharmony_ci- 873b5975d6bSopenharmony_ci regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL); 874b5975d6bSopenharmony_ci if (!regex) 875b5975d6bSopenharmony_ci return FALSE; 876b5975d6bSopenharmony_ci@@ -2062,8 +2082,6 @@ g_regex_match (const GRegex *regex, 877b5975d6bSopenharmony_ci GRegexMatchFlags match_options, 878b5975d6bSopenharmony_ci GMatchInfo **match_info) 879b5975d6bSopenharmony_ci { 880b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 881b5975d6bSopenharmony_ci- 882b5975d6bSopenharmony_ci return g_regex_match_full (regex, string, -1, 0, match_options, 883b5975d6bSopenharmony_ci match_info, NULL); 884b5975d6bSopenharmony_ci } 885b5975d6bSopenharmony_ci@@ -2147,8 +2165,6 @@ g_regex_match_full (const GRegex *regex, 886b5975d6bSopenharmony_ci GMatchInfo *info; 887b5975d6bSopenharmony_ci gboolean match_ok; 888b5975d6bSopenharmony_ci 889b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 890b5975d6bSopenharmony_ci- 891b5975d6bSopenharmony_ci g_return_val_if_fail (regex != NULL, FALSE); 892b5975d6bSopenharmony_ci g_return_val_if_fail (string != NULL, FALSE); 893b5975d6bSopenharmony_ci g_return_val_if_fail (start_position >= 0, FALSE); 894b5975d6bSopenharmony_ci@@ -2199,8 +2215,6 @@ g_regex_match_all (const GRegex *regex, 895b5975d6bSopenharmony_ci GRegexMatchFlags match_options, 896b5975d6bSopenharmony_ci GMatchInfo **match_info) 897b5975d6bSopenharmony_ci { 898b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 899b5975d6bSopenharmony_ci- 900b5975d6bSopenharmony_ci return g_regex_match_all_full (regex, string, -1, 0, match_options, 901b5975d6bSopenharmony_ci match_info, NULL); 902b5975d6bSopenharmony_ci } 903b5975d6bSopenharmony_ci@@ -2272,8 +2286,8 @@ g_regex_match_all_full (const GRegex *regex, 904b5975d6bSopenharmony_ci gboolean done; 905b5975d6bSopenharmony_ci pcre2_code *pcre_re; 906b5975d6bSopenharmony_ci gboolean retval; 907b5975d6bSopenharmony_ci- 908b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 909b5975d6bSopenharmony_ci+ uint32_t newline_options; 910b5975d6bSopenharmony_ci+ uint32_t bsr_options; 911b5975d6bSopenharmony_ci 912b5975d6bSopenharmony_ci g_return_val_if_fail (regex != NULL, FALSE); 913b5975d6bSopenharmony_ci g_return_val_if_fail (string != NULL, FALSE); 914b5975d6bSopenharmony_ci@@ -2281,6 +2295,14 @@ g_regex_match_all_full (const GRegex *regex, 915b5975d6bSopenharmony_ci g_return_val_if_fail (error == NULL || *error == NULL, FALSE); 916b5975d6bSopenharmony_ci g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE); 917b5975d6bSopenharmony_ci 918b5975d6bSopenharmony_ci+ newline_options = get_pcre2_newline_match_options (match_options); 919b5975d6bSopenharmony_ci+ if (!newline_options) 920b5975d6bSopenharmony_ci+ newline_options = get_pcre2_newline_compile_options (regex->orig_compile_opts); 921b5975d6bSopenharmony_ci+ 922b5975d6bSopenharmony_ci+ bsr_options = get_pcre2_bsr_match_options (match_options); 923b5975d6bSopenharmony_ci+ if (!bsr_options) 924b5975d6bSopenharmony_ci+ bsr_options = get_pcre2_bsr_compile_options (regex->orig_compile_opts); 925b5975d6bSopenharmony_ci+ 926b5975d6bSopenharmony_ci /* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an 927b5975d6bSopenharmony_ci * optimization for normal regex matching, but results in omitting some 928b5975d6bSopenharmony_ci * shorter matches here, and an observable behaviour change. 929b5975d6bSopenharmony_ci@@ -2289,7 +2311,7 @@ g_regex_match_all_full (const GRegex *regex, 930b5975d6bSopenharmony_ci * codesearch.debian.net, so don't bother caching the recompiled RE. */ 931b5975d6bSopenharmony_ci pcre_re = regex_compile (regex->pattern, 932b5975d6bSopenharmony_ci regex->compile_opts | PCRE2_NO_AUTO_POSSESS, 933b5975d6bSopenharmony_ci- NULL, NULL, error); 934b5975d6bSopenharmony_ci+ newline_options, bsr_options, error); 935b5975d6bSopenharmony_ci if (pcre_re == NULL) 936b5975d6bSopenharmony_ci return FALSE; 937b5975d6bSopenharmony_ci 938b5975d6bSopenharmony_ci@@ -2303,7 +2325,7 @@ g_regex_match_all_full (const GRegex *regex, 939b5975d6bSopenharmony_ci info->matches = pcre2_dfa_match (pcre_re, 940b5975d6bSopenharmony_ci (PCRE2_SPTR8) info->string, info->string_len, 941b5975d6bSopenharmony_ci info->pos, 942b5975d6bSopenharmony_ci- (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED, 943b5975d6bSopenharmony_ci+ (regex->match_opts | info->match_opts), 944b5975d6bSopenharmony_ci info->match_data, 945b5975d6bSopenharmony_ci info->match_context, 946b5975d6bSopenharmony_ci info->workspace, info->n_workspace); 947b5975d6bSopenharmony_ci@@ -2436,9 +2458,6 @@ g_regex_split_simple (const gchar *pattern, 948b5975d6bSopenharmony_ci GRegex *regex; 949b5975d6bSopenharmony_ci gchar **result; 950b5975d6bSopenharmony_ci 951b5975d6bSopenharmony_ci- compile_options = map_to_pcre2_compile_flags (compile_options); 952b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 953b5975d6bSopenharmony_ci- 954b5975d6bSopenharmony_ci regex = g_regex_new (pattern, compile_options, 0, NULL); 955b5975d6bSopenharmony_ci if (!regex) 956b5975d6bSopenharmony_ci return NULL; 957b5975d6bSopenharmony_ci@@ -2482,8 +2501,6 @@ g_regex_split (const GRegex *regex, 958b5975d6bSopenharmony_ci const gchar *string, 959b5975d6bSopenharmony_ci GRegexMatchFlags match_options) 960b5975d6bSopenharmony_ci { 961b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 962b5975d6bSopenharmony_ci- 963b5975d6bSopenharmony_ci return g_regex_split_full (regex, string, -1, 0, 964b5975d6bSopenharmony_ci match_options, 0, NULL); 965b5975d6bSopenharmony_ci } 966b5975d6bSopenharmony_ci@@ -2548,8 +2565,6 @@ g_regex_split_full (const GRegex *regex, 967b5975d6bSopenharmony_ci /* the returned array of char **s */ 968b5975d6bSopenharmony_ci gchar **string_list; 969b5975d6bSopenharmony_ci 970b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 971b5975d6bSopenharmony_ci- 972b5975d6bSopenharmony_ci g_return_val_if_fail (regex != NULL, NULL); 973b5975d6bSopenharmony_ci g_return_val_if_fail (string != NULL, NULL); 974b5975d6bSopenharmony_ci g_return_val_if_fail (start_position >= 0, NULL); 975b5975d6bSopenharmony_ci@@ -3174,8 +3189,6 @@ g_regex_replace (const GRegex *regex, 976b5975d6bSopenharmony_ci GList *list; 977b5975d6bSopenharmony_ci GError *tmp_error = NULL; 978b5975d6bSopenharmony_ci 979b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 980b5975d6bSopenharmony_ci- 981b5975d6bSopenharmony_ci g_return_val_if_fail (regex != NULL, NULL); 982b5975d6bSopenharmony_ci g_return_val_if_fail (string != NULL, NULL); 983b5975d6bSopenharmony_ci g_return_val_if_fail (start_position >= 0, NULL); 984b5975d6bSopenharmony_ci@@ -3245,8 +3258,6 @@ g_regex_replace_literal (const GRegex *regex, 985b5975d6bSopenharmony_ci GRegexMatchFlags match_options, 986b5975d6bSopenharmony_ci GError **error) 987b5975d6bSopenharmony_ci { 988b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 989b5975d6bSopenharmony_ci- 990b5975d6bSopenharmony_ci g_return_val_if_fail (replacement != NULL, NULL); 991b5975d6bSopenharmony_ci g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); 992b5975d6bSopenharmony_ci 993b5975d6bSopenharmony_ci@@ -3335,8 +3346,6 @@ g_regex_replace_eval (const GRegex *regex, 994b5975d6bSopenharmony_ci gboolean done = FALSE; 995b5975d6bSopenharmony_ci GError *tmp_error = NULL; 996b5975d6bSopenharmony_ci 997b5975d6bSopenharmony_ci- match_options = map_to_pcre2_match_flags (match_options); 998b5975d6bSopenharmony_ci- 999b5975d6bSopenharmony_ci g_return_val_if_fail (regex != NULL, NULL); 1000b5975d6bSopenharmony_ci g_return_val_if_fail (string != NULL, NULL); 1001b5975d6bSopenharmony_ci g_return_val_if_fail (start_position >= 0, NULL); 1002b5975d6bSopenharmony_cidiff --git a/glib/tests/regex.c b/glib/tests/regex.c 1003b5975d6bSopenharmony_ciindex 0d01d59..79e6b4a 100644 1004b5975d6bSopenharmony_ci--- a/glib/tests/regex.c 1005b5975d6bSopenharmony_ci+++ b/glib/tests/regex.c 1006b5975d6bSopenharmony_ci@@ -1,6 +1,7 @@ 1007b5975d6bSopenharmony_ci /* 1008b5975d6bSopenharmony_ci * Copyright (C) 2005 - 2006, Marco Barisione <marco@barisione.org> 1009b5975d6bSopenharmony_ci * Copyright (C) 2010 Red Hat, Inc. 1010b5975d6bSopenharmony_ci+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com> 1011b5975d6bSopenharmony_ci * 1012b5975d6bSopenharmony_ci * This library is free software; you can redistribute it and/or 1013b5975d6bSopenharmony_ci * modify it under the terms of the GNU Lesser General Public 1014b5975d6bSopenharmony_ci@@ -2353,7 +2354,13 @@ main (int argc, char *argv[]) 1015b5975d6bSopenharmony_ci 1016b5975d6bSopenharmony_ci /* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */ 1017b5975d6bSopenharmony_ci TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0); 1018b5975d6bSopenharmony_ci+ TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY, 1019b5975d6bSopenharmony_ci+ G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY); 1020b5975d6bSopenharmony_ci+ TEST_NEW_CHECK_FLAGS ("a", 0, G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF, 1021b5975d6bSopenharmony_ci+ G_REGEX_NEWLINE_ANYCRLF | G_REGEX_BSR_ANYCRLF, 1022b5975d6bSopenharmony_ci+ G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF); 1023b5975d6bSopenharmony_ci TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0); 1024b5975d6bSopenharmony_ci+ TEST_NEW_CHECK_FLAGS ("(?J)a", 0, 0, G_REGEX_DUPNAMES, 0); 1025b5975d6bSopenharmony_ci TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0); 1026b5975d6bSopenharmony_ci TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0); 1027b5975d6bSopenharmony_ci TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0); 1028b5975d6bSopenharmony_ci@@ -2559,6 +2566,8 @@ main (int argc, char *argv[]) 1029b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, 0, "a\rb\rc", -1, 0, 0, TRUE); 1030b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_LF, 0, "a\rb\rc", -1, 0, 0, FALSE); 1031b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CRLF, 0, "a\rb\rc", -1, 0, 0, FALSE); 1032b5975d6bSopenharmony_ci+ TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\nc", -1, 0, 0, TRUE); 1033b5975d6bSopenharmony_ci+ TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\rc", -1, 0, 0, TRUE); 1034b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\nb\nc", -1, 0, 0, FALSE); 1035b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\nb\nc", -1, 0, 0, TRUE); 1036b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\nb\nc", -1, 0, 0, FALSE); 1037b5975d6bSopenharmony_ci@@ -2568,6 +2577,8 @@ main (int argc, char *argv[]) 1038b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\rb\rc", -1, 0, 0, TRUE); 1039b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\rb\rc", -1, 0, 0, FALSE); 1040b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE); 1041b5975d6bSopenharmony_ci+ TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\rc", -1, 0, 0, TRUE); 1042b5975d6bSopenharmony_ci+ TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\nc", -1, 0, 0, TRUE); 1043b5975d6bSopenharmony_ci 1044b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\nb\nc", -1, 0, 0, TRUE); 1045b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\rb\rc", -1, 0, 0, TRUE); 1046b5975d6bSopenharmony_ci@@ -2577,6 +2588,13 @@ main (int argc, char *argv[]) 1047b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\r\nb\r\nc", -1, 0, 0, TRUE); 1048b5975d6bSopenharmony_ci TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE); 1049b5975d6bSopenharmony_ci 1050b5975d6bSopenharmony_ci+ /* See https://gitlab.gnome.org/GNOME/glib/-/issues/2729#note_1544130 */ 1051b5975d6bSopenharmony_ci+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANY, "a", -1, 0, 0, TRUE); 1052b5975d6bSopenharmony_ci+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a", -1, 0, 0, TRUE); 1053b5975d6bSopenharmony_ci+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE); 1054b5975d6bSopenharmony_ci+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a", -1, 0, 0, TRUE); 1055b5975d6bSopenharmony_ci+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a", -1, 0, 0, TRUE); 1056b5975d6bSopenharmony_ci+ 1057b5975d6bSopenharmony_ci TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 1058b5975d6bSopenharmony_ci TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 1059b5975d6bSopenharmony_ci TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE); 1060b5975d6bSopenharmony_ci-- 1061b5975d6bSopenharmony_ci2.33.0 1062b5975d6bSopenharmony_ciGitLab 1063