1/************************************************* 2* Perl-Compatible Regular Expressions * 3*************************************************/ 4 5/* PCRE is a library of functions to support regular expressions whose syntax 6and semantics are as close as possible to those of the Perl 5 language. 7 8 Written by Philip Hazel 9 Original API code Copyright (c) 1997-2012 University of Cambridge 10 New API code Copyright (c) 2016 University of Cambridge 11 12----------------------------------------------------------------------------- 13Redistribution and use in source and binary forms, with or without 14modification, are permitted provided that the following conditions are met: 15 16 * Redistributions of source code must retain the above copyright notice, 17 this list of conditions and the following disclaimer. 18 19 * Redistributions in binary form must reproduce the above copyright 20 notice, this list of conditions and the following disclaimer in the 21 documentation and/or other materials provided with the distribution. 22 23 * Neither the name of the University of Cambridge nor the names of its 24 contributors may be used to endorse or promote products derived from 25 this software without specific prior written permission. 26 27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37POSSIBILITY OF SUCH DAMAGE. 38----------------------------------------------------------------------------- 39*/ 40 41#ifdef HAVE_CONFIG_H 42#include "config.h" 43#endif 44 45#include <stdio.h> 46#include <string.h> 47 48#define PCRE2_CODE_UNIT_WIDTH 0 49#include "pcre2.h" 50 51/* 52 Letter characters: 53 \xe6\x92\xad = 0x64ad = 25773 (kanji) 54 Non-letter characters: 55 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark) 56 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888 57 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character) 58 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character) 59 Newlines: 60 \xc2\x85 = 0x85 = 133 (NExt Line = NEL) 61 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator) 62 Othercase pairs: 63 \xc3\xa9 = 0xe9 = 233 (e') 64 \xc3\x89 = 0xc9 = 201 (E') 65 \xc3\xa1 = 0xe1 = 225 (a') 66 \xc3\x81 = 0xc1 = 193 (A') 67 \x53 = 0x53 = S 68 \x73 = 0x73 = s 69 \xc5\xbf = 0x17f = 383 (long S) 70 \xc8\xba = 0x23a = 570 71 \xe2\xb1\xa5 = 0x2c65 = 11365 72 \xe1\xbd\xb8 = 0x1f78 = 8056 73 \xe1\xbf\xb8 = 0x1ff8 = 8184 74 \xf0\x90\x90\x80 = 0x10400 = 66560 75 \xf0\x90\x90\xa8 = 0x10428 = 66600 76 \xc7\x84 = 0x1c4 = 452 77 \xc7\x85 = 0x1c5 = 453 78 \xc7\x86 = 0x1c6 = 454 79 Caseless sets: 80 ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586} 81 ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1 82 ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a} 83 84 Mark property: 85 \xcc\x8d = 0x30d = 781 86 Special: 87 \xc2\x80 = 0x80 = 128 (lowest 2 byte character) 88 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character) 89 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character) 90 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character) 91 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character) 92 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character) 93*/ 94 95static int regression_tests(void); 96static int invalid_utf8_regression_tests(void); 97static int invalid_utf16_regression_tests(void); 98static int invalid_utf32_regression_tests(void); 99 100int main(void) 101{ 102 int jit = 0; 103#if defined SUPPORT_PCRE2_8 104 pcre2_config_8(PCRE2_CONFIG_JIT, &jit); 105#elif defined SUPPORT_PCRE2_16 106 pcre2_config_16(PCRE2_CONFIG_JIT, &jit); 107#elif defined SUPPORT_PCRE2_32 108 pcre2_config_32(PCRE2_CONFIG_JIT, &jit); 109#endif 110 if (!jit) { 111 printf("JIT must be enabled to run pcre2_jit_test\n"); 112 return 1; 113 } 114 return regression_tests() 115 | invalid_utf8_regression_tests() 116 | invalid_utf16_regression_tests() 117 | invalid_utf32_regression_tests(); 118} 119 120/* --------------------------------------------------------------------------------------- */ 121 122#if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32) 123#error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined 124#endif 125 126#define MU (PCRE2_MULTILINE | PCRE2_UTF) 127#define MUP (PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP) 128#define CMU (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF) 129#define CMUP (PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP) 130#define M (PCRE2_MULTILINE) 131#define MP (PCRE2_MULTILINE | PCRE2_UCP) 132#define U (PCRE2_UTF) 133#define CM (PCRE2_CASELESS | PCRE2_MULTILINE) 134 135#define BSR(x) ((x) << 16) 136#define A PCRE2_NEWLINE_ANYCRLF 137 138#define GET_NEWLINE(x) ((x) & 0xffff) 139#define GET_BSR(x) ((x) >> 16) 140 141#define OFFSET_MASK 0x00ffff 142#define F_NO8 0x010000 143#define F_NO16 0x020000 144#define F_NO32 0x020000 145#define F_NOMATCH 0x040000 146#define F_DIFF 0x080000 147#define F_FORCECONV 0x100000 148#define F_PROPERTY 0x200000 149 150struct regression_test_case { 151 int compile_options; 152 int newline; 153 int match_options; 154 int start_offset; 155 const char *pattern; 156 const char *input; 157}; 158 159static struct regression_test_case regression_test_cases[] = { 160 /* Constant strings. */ 161 { MU, A, 0, 0, "AbC", "AbAbC" }, 162 { MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" }, 163 { CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" }, 164 { M, A, 0, 0, "[^a]", "aAbB" }, 165 { CM, A, 0, 0, "[^m]", "mMnN" }, 166 { M, A, 0, 0, "a[^b][^#]", "abacd" }, 167 { CM, A, 0, 0, "A[^B][^E]", "abacd" }, 168 { CMU, A, 0, 0, "[^x][^#]", "XxBll" }, 169 { MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" }, 170 { CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" }, 171 { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" }, 172 { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" }, 173 { MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" }, 174 { MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" }, 175 { MU, A, 0, 0, "[axd]", "sAXd" }, 176 { CMU, A, 0, 0, "[axd]", "sAXd" }, 177 { CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" }, 178 { MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" }, 179 { MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" }, 180 { CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." }, 181 { MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." }, 182 { MU, A, 0, 0, "[^a]", "\xc2\x80[]" }, 183 { CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" }, 184 { CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" }, 185 { PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" }, 186 { PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" }, 187 { PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" }, 188#ifndef NEVER_BACKSLASH_C 189 { M, A, 0, 0, "\\Ca", "cda" }, 190 { CM, A, 0, 0, "\\Ca", "CDA" }, 191 { M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" }, 192 { CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" }, 193#endif /* !NEVER_BACKSLASH_C */ 194 { CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 195 { CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 196 { CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" }, 197 { CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 198 { M, A, 0, 0, "[3-57-9]", "5" }, 199 { PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890", 200 "12345678901234567890123456789012345678901234567890123456789012345678901234567890" }, 201 202 /* Assertions. */ 203 { MU, A, 0, 0, "\\b[^A]", "A_B#" }, 204 { M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" }, 205 { MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" }, 206 { MP, A, 0, 0, "\\B", "_\xa1" }, 207 { MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," }, 208 { MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" }, 209 { MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" }, 210 { MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" }, 211 { MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, 212 { MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" }, 213 { CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" }, 214 { M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" }, 215 { M, A, 0, 1 | F_NOMATCH, "^", "\n" }, 216 { 0, 0, 0, 0, "^ab", "ab" }, 217 { 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" }, 218 { M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" }, 219 { MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" }, 220 { M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" }, 221 { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" }, 222 { MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" }, 223 { 0, 0, 0, 0, "ab$", "ab" }, 224 { 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" }, 225 { PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" }, 226 { M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" }, 227 { M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" }, 228 { MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" }, 229 { MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" }, 230 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" }, 231 { M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" }, 232 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" }, 233 { 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" }, 234 { U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" }, 235 { M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" }, 236 { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" }, 237 { U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" }, 238 { 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" }, 239 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" }, 240 { U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" }, 241 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" }, 242 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" }, 243 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" }, 244 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" }, 245 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" }, 246 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" }, 247 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" }, 248 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" }, 249 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" }, 250 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" }, 251 { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" }, 252 { U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" }, 253 { M, A, 0, 0, "\\Aa", "aaa" }, 254 { M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" }, 255 { M, A, 0, 1, "\\Ga", "aaa" }, 256 { M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" }, 257 { M, A, 0, 0, "a\\z", "aaa" }, 258 { M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" }, 259 260 /* Brackets and alternatives. */ 261 { MU, A, 0, 0, "(ab|bb|cd)", "bacde" }, 262 { MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" }, 263 { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" }, 264 { CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" }, 265 { MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" }, 266 { MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" }, 267 { MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" }, 268 { MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" }, 269 { MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" }, 270 { MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" }, 271 { U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" }, 272 { U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" }, 273 { CM, A, 0, 0, "ab|cd", "CD" }, 274 { CM, A, 0, 0, "a1277|a1377|bX487", "bx487" }, 275 { CM, A, 0, 0, "a1277|a1377|bx487", "bX487" }, 276 277 /* Greedy and non-greedy ? operators. */ 278 { MU, A, 0, 0, "(?:a)?a", "laab" }, 279 { CMU, A, 0, 0, "(A)?A", "llaab" }, 280 { MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */ 281 { MU, A, 0, 0, "(a)?a", "manm" }, 282 { CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" }, 283 { MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" }, 284 { MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" }, 285 286 /* Greedy and non-greedy + operators */ 287 { MU, A, 0, 0, "(aa)+aa", "aaaaaaa" }, 288 { MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" }, 289 { MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" }, 290 { MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" }, 291 { MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, 292 { MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" }, 293 { MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" }, 294 { MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" }, 295 296 /* Greedy and non-greedy * operators */ 297 { CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" }, 298 { MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" }, 299 { MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" }, 300 { CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" }, 301 { MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" }, 302 { MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" }, 303 { M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" }, 304 { M, A, 0, 0, "((?:a|)*){0}a", "a" }, 305 306 /* Combining ? + * operators */ 307 { MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" }, 308 { MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" }, 309 { MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" }, 310 { MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" }, 311 { MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" }, 312 313 /* Single character iterators. */ 314 { MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" }, 315 { MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" }, 316 { MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" }, 317 { MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" }, 318 { MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" }, 319 { MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" }, 320 { MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" }, 321 { MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" }, 322 { MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" }, 323 { MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" }, 324 { MU, A, 0, 0, "(a?+[^b])+", "babaacacb" }, 325 { MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" }, 326 { CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" }, 327 { CMU, A, 0, 0, "[c-f]+k", "DemmFke" }, 328 { MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" }, 329 { MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" }, 330 { CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" }, 331 { CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" }, 332 { CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" }, 333 { CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" }, 334 { MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" }, 335 { CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" }, 336 { MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" }, 337 { MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" }, 338 { MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" }, 339 { MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" }, 340 { CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" }, 341 { CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" }, 342 { CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" }, 343 { CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, 344 { MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" }, 345 { MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" }, 346 { MU, A, 0, 0, "\\d+123", "987654321,01234" }, 347 { MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" }, 348 { MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" }, 349 { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."}, 350 { MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."}, 351 { MU, A, 0, 0, ".[ab]*.", "xx" }, 352 { MU, A, 0, 0, ".[ab]*a", "xxa" }, 353 { MU, A, 0, 0, ".[ab]?.", "xx" }, 354 { MU, A, 0, 0, "_[ab]+_*a", "_aa" }, 355 { MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" }, 356 { MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" }, 357 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" }, 358 359 /* Bracket repeats with limit. */ 360 { MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" }, 361 { MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" }, 362 { MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" }, 363 { MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" }, 364 { MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" }, 365 { MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" }, 366 { MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" }, 367 { MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" }, 368 { MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" }, 369 370 /* Basic character sets. */ 371 { MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " }, 372 { MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" }, 373 { MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" }, 374 { MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" }, 375 { MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" }, 376 { MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" }, 377 { MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" }, 378 { MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" }, 379 { MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" }, 380 { MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" }, 381 { MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" }, 382 { MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" }, 383 { CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" }, 384 { CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" }, 385 { MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" }, 386 { MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" }, 387 { MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" }, 388 { MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" }, 389 { MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" }, 390 { MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" }, 391 { MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" }, 392 { MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" }, 393 { CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " }, 394 395 /* Unicode properties. */ 396 { MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" }, 397 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" }, 398 { MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" }, 399 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" }, 400 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" }, 401 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" }, 402 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" }, 403 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" }, 404 { MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" }, 405 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" }, 406 { MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" }, 407 { MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" }, 408 { CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" }, 409 { MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" }, 410 { MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" }, 411 { MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" }, 412 { CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" }, 413 { MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" }, 414 { MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" }, 415 { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB baaa" }, 416 { MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " }, 417 { MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" }, 418 { MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" }, 419 { CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" }, 420 421 /* Possible empty brackets. */ 422 { MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" }, 423 { MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" }, 424 { MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" }, 425 { MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" }, 426 { MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" }, 427 { MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" }, 428 { MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" }, 429 { MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" }, 430 { MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" }, 431 { MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" }, 432 433 /* Start offset. */ 434 { MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" }, 435 { MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" }, 436 { MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" }, 437 { MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" }, 438 439 /* Newline. */ 440 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." }, 441 { M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." }, 442 { M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." }, 443 { MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" }, 444 { MU, A, 0, 1, "^", "\r\n" }, 445 { M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" }, 446 { M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" }, 447 448 /* Any character except newline or any newline. */ 449 { 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" }, 450 { U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" }, 451 { 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" }, 452 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" }, 453 { U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" }, 454 { U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" }, 455 { 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" }, 456 { U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" }, 457 { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" }, 458 { 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" }, 459 { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" }, 460 { U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" }, 461 { U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" }, 462 { MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" }, 463 { MU, A, 0, 0, "\\R+", "ab\r\n\r" }, 464 { MU, A, 0, 0, "\\R*", "ab\r\n\r" }, 465 { MU, A, 0, 0, "\\R*", "\r\n\r" }, 466 { MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" }, 467 { MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" }, 468 { MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" }, 469 { MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" }, 470 { MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" }, 471 { MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" }, 472 { MU, A, 0, 0, "\\R*\\R\\R", "\n\r" }, 473 { MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" }, 474 { MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" }, 475 476 /* Atomic groups (no fallback from "next" direction). */ 477 { MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" }, 478 { MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" }, 479 { MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op", 480 "bababcdedefgheijijklmlmnop" }, 481 { MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" }, 482 { MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" }, 483 { MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" }, 484 { MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" }, 485 { MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" }, 486 { MU, A, 0, 0, "(?>x|)*$", "aaa" }, 487 { MU, A, 0, 0, "(?>(x)|)*$", "aaa" }, 488 { MU, A, 0, 0, "(?>x|())*$", "aaa" }, 489 { MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" }, 490 { MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" }, 491 { MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" }, 492 { MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" }, 493 { MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" }, 494 { MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" }, 495 { MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" }, 496 { MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" }, 497 { MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" }, 498 { MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" }, 499 { MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" }, 500 { MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" }, 501 { MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" }, 502 { MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" }, 503 { CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" }, 504 { MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" }, 505 { MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" }, 506 { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" }, 507 { MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" }, 508 { MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" }, 509 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" }, 510 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" }, 511 { MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" }, 512 { MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" }, 513 { MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" }, 514 { MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" }, 515 516 /* Possessive quantifiers. */ 517 { MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" }, 518 { MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" }, 519 { MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" }, 520 { MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" }, 521 { MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" }, 522 { MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" }, 523 { MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" }, 524 { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" }, 525 { MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" }, 526 { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" }, 527 { MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" }, 528 { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" }, 529 { MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" }, 530 { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" }, 531 { MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" }, 532 { MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" }, 533 { MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" }, 534 { MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" }, 535 { MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" }, 536 { MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" }, 537 { MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" }, 538 { MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" }, 539 { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" }, 540 { MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" }, 541 { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" }, 542 { MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" }, 543 { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" }, 544 { MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" }, 545 { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" }, 546 { MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" }, 547 { MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" }, 548 { MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" }, 549 { MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" }, 550 { MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" }, 551 { MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" }, 552 553 /* Back references. */ 554 { MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" }, 555 { CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" }, 556 { CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" }, 557 { MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" }, 558 { MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" }, 559 { MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" }, 560 { MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" }, 561 { MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" }, 562 { MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" }, 563 { CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" }, 564 { MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" }, 565 { CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" }, 566 { MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" }, 567 { CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" }, 568 { MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" }, 569 { MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" }, 570 { M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" }, 571 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." }, 572 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." }, 573 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" }, 574 { MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" }, 575 { PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." }, 576 { CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" }, 577 { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" }, 578 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" }, 579 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" }, 580 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" }, 581 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" }, 582 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" }, 583 { MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" }, 584 { MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" }, 585 { MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" }, 586 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" }, 587 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" }, 588 { MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, 589 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" }, 590 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" }, 591 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" }, 592 { CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" }, 593 594 /* Assertions. */ 595 { MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" }, 596 { MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" }, 597 { MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" }, 598 { MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" }, 599 { MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" }, 600 { M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" }, 601 { M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" }, 602 { MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" }, 603 { MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" }, 604 { MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" }, 605 { MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" }, 606 { MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" }, 607 { MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" }, 608 { MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" }, 609 { MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" }, 610 { MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" }, 611 { MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" }, 612 { MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" }, 613 { MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" }, 614 { MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" }, 615 { MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" }, 616 { MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" }, 617 { MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" }, 618 { MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" }, 619 { MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" }, 620 { MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" }, 621 { MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" }, 622 { MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" }, 623 { MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" }, 624 { MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" }, 625 { MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" }, 626 { MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" }, 627 { MU, A, 0, 0, "a(?=)b", "ab" }, 628 { MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" }, 629 630 /* Not empty, ACCEPT, FAIL */ 631 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" }, 632 { MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" }, 633 { MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" }, 634 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" }, 635 { MU, A, 0, 0, "a(*ACCEPT)b", "ab" }, 636 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" }, 637 { MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" }, 638 { MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" }, 639 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" }, 640 { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" }, 641 { MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" }, 642 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" }, 643 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" }, 644 { MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" }, 645 { MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" }, 646 { MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" }, 647 { MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" }, 648 { MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" }, 649 { MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" }, 650 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" }, 651 652 /* Conditional blocks. */ 653 { MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" }, 654 { MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" }, 655 { MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" }, 656 { MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" }, 657 { MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" }, 658 { MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" }, 659 { MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" }, 660 { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" }, 661 { MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" }, 662 { MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" }, 663 { MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" }, 664 { MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" }, 665 { MU, A, 0, 0, "(?(?=a)ab)", "a" }, 666 { MU, A, 0, 0, "(?(?<!b)c)", "b" }, 667 { MU, A, 0, 0, "(?(DEFINE)a(b))", "a" }, 668 { MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" }, 669 { MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" }, 670 { MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" }, 671 { MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" }, 672 { MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" }, 673 { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" }, 674 { MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" }, 675 { MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" }, 676 { MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" }, 677 { MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" }, 678 { MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" }, 679 { MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" }, 680 { MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" }, 681 { MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" }, 682 { MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" }, 683 { MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" }, 684 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" }, 685 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" }, 686 { MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" }, 687 { MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" }, 688 { MU, A, 0, 0, "(?(?!)a|b)", "ab" }, 689 { MU, A, 0, 0, "(?(?!)a)", "ab" }, 690 { MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" }, 691 692 /* Set start of match. */ 693 { MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" }, 694 { MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" }, 695 { MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" }, 696 { MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" }, 697 { MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" }, 698 699 /* First line. */ 700 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" }, 701 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" }, 702 { MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" }, 703 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" }, 704 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" }, 705 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" }, 706 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" }, 707 { MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" }, 708 { MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" }, 709 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" }, 710 { M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" }, 711 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" }, 712 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" }, 713 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" }, 714 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" }, 715 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" }, 716 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" }, 717 { MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" }, 718 { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" }, 719 { MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" }, 720 { PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" }, 721 { MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" }, 722 723 /* Recurse. */ 724 { MU, A, 0, 0, "(a)(?1)", "aa" }, 725 { MU, A, 0, 0, "((a))(?1)", "aa" }, 726 { MU, A, 0, 0, "(b|a)(?1)", "aa" }, 727 { MU, A, 0, 0, "(b|(a))(?1)", "aa" }, 728 { MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" }, 729 { MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" }, 730 { MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" }, 731 { MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" }, 732 { MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" }, 733 { MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" }, 734 { MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" }, 735 { MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" }, 736 { MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" }, 737 { MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" }, 738 { MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" }, 739 { MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" }, 740 { MU, A, 0, 0, "b|<(?R)*>", "<<b>" }, 741 { MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" }, 742 { MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" }, 743 { MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" }, 744 { MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" }, 745 { MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" }, 746 { MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" }, 747 { MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" }, 748 { MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" }, 749 { MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" }, 750 { MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" }, 751 { MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" }, 752 { MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" }, 753 { MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" }, 754 { MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" }, 755 { MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" }, 756 757 /* 16 bit specific tests. */ 758 { CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" }, 759 { CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 760 { CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" }, 761 { CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" }, 762 { CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" }, 763 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" }, 764 { CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" }, 765 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" }, 766 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" }, 767 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" }, 768 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" }, 769 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" }, 770 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" }, 771 { CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" }, 772 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" }, 773 { CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" }, 774 { M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, 775 { M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" }, 776 { CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" }, 777 { CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" }, 778 { CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" }, 779 { CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" }, 780 { CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" }, 781 { CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" }, 782 { CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" }, 783 { M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" }, 784 { 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" }, 785 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" }, 786 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" }, 787 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" }, 788 { 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" }, 789 790 /* Partial matching. */ 791 { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" }, 792 { MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" }, 793 { MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" }, 794 { MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" }, 795 { MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" }, 796 { MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" }, 797 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" }, 798 { MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" }, 799 800 /* (*MARK) verb. */ 801 { MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" }, 802 { MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" }, 803 { MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" }, 804 { MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" }, 805 { MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" }, 806 { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" }, 807 { MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" }, 808 { MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" }, 809 { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, 810 { MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" }, 811 { MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" }, 812 { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" }, 813 { MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" }, 814 { MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" }, 815 { MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" }, 816 817 /* (*COMMIT) verb. */ 818 { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" }, 819 { MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" }, 820 { MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" }, 821 { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" }, 822 { MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" }, 823 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" }, 824 825 /* (*PRUNE) verb. */ 826 { MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" }, 827 { MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" }, 828 { MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" }, 829 { MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" }, 830 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" }, 831 { MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" }, 832 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" }, 833 { MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" }, 834 { MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, 835 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, 836 { MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, 837 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" }, 838 { MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" }, 839 { MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" }, 840 { MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, 841 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, 842 { MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, 843 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" }, 844 { MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" }, 845 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" }, 846 { MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" }, 847 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" }, 848 { MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" }, 849 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" }, 850 { MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" }, 851 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" }, 852 { MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" }, 853 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" }, 854 { MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" }, 855 { MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" }, 856 857 /* (*SKIP) verb. */ 858 { MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" }, 859 { MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," }, 860 { MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," }, 861 { MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" }, 862 863 /* (*THEN) verb. */ 864 { MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" }, 865 { MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" }, 866 { MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" }, 867 { MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" }, 868 { MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" }, 869 { MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" }, 870 { MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" }, 871 { MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" }, 872 { MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" }, 873 { MU, A, 0, 0, "(?=(*THEN: ))* ", " " }, 874 { MU, A, 0, 0, "a(*THEN)(?R) |", "a" }, 875 876 /* Recurse and control verbs. */ 877 { MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" }, 878 { MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" }, 879 { MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" }, 880 { MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" }, 881 { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" }, 882 { MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" }, 883 { MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" }, 884 { MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" }, 885 886#ifdef SUPPORT_UNICODE 887 /* Script runs and iterations. */ 888 { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" }, 889 { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" }, 890 { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" }, 891 { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" }, 892 { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" }, 893 { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" }, 894 { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" }, 895 { MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" }, 896#endif 897 898 /* Deep recursion. */ 899 { MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " }, 900 { MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " }, 901 { MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" }, 902 903 /* Deep recursion: Stack limit reached. */ 904 { M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" }, 905 { M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 906 { M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 907 { M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 908 { M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" }, 909 910 { 0, 0, 0, 0, NULL, NULL } 911}; 912 913#ifdef SUPPORT_PCRE2_8 914static pcre2_jit_stack_8* callback8(void *arg) 915{ 916 return (pcre2_jit_stack_8 *)arg; 917} 918#endif 919 920#ifdef SUPPORT_PCRE2_16 921static pcre2_jit_stack_16* callback16(void *arg) 922{ 923 return (pcre2_jit_stack_16 *)arg; 924} 925#endif 926 927#ifdef SUPPORT_PCRE2_32 928static pcre2_jit_stack_32* callback32(void *arg) 929{ 930 return (pcre2_jit_stack_32 *)arg; 931} 932#endif 933 934#ifdef SUPPORT_PCRE2_8 935static pcre2_jit_stack_8 *stack8; 936 937static pcre2_jit_stack_8 *getstack8(void) 938{ 939 if (!stack8) 940 stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL); 941 return stack8; 942} 943 944static void setstack8(pcre2_match_context_8 *mcontext) 945{ 946 if (!mcontext) { 947 if (stack8) 948 pcre2_jit_stack_free_8(stack8); 949 stack8 = NULL; 950 return; 951 } 952 953 pcre2_jit_stack_assign_8(mcontext, callback8, getstack8()); 954} 955#endif /* SUPPORT_PCRE2_8 */ 956 957#ifdef SUPPORT_PCRE2_16 958static pcre2_jit_stack_16 *stack16; 959 960static pcre2_jit_stack_16 *getstack16(void) 961{ 962 if (!stack16) 963 stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL); 964 return stack16; 965} 966 967static void setstack16(pcre2_match_context_16 *mcontext) 968{ 969 if (!mcontext) { 970 if (stack16) 971 pcre2_jit_stack_free_16(stack16); 972 stack16 = NULL; 973 return; 974 } 975 976 pcre2_jit_stack_assign_16(mcontext, callback16, getstack16()); 977} 978#endif /* SUPPORT_PCRE2_16 */ 979 980#ifdef SUPPORT_PCRE2_32 981static pcre2_jit_stack_32 *stack32; 982 983static pcre2_jit_stack_32 *getstack32(void) 984{ 985 if (!stack32) 986 stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL); 987 return stack32; 988} 989 990static void setstack32(pcre2_match_context_32 *mcontext) 991{ 992 if (!mcontext) { 993 if (stack32) 994 pcre2_jit_stack_free_32(stack32); 995 stack32 = NULL; 996 return; 997 } 998 999 pcre2_jit_stack_assign_32(mcontext, callback32, getstack32()); 1000} 1001#endif /* SUPPORT_PCRE2_32 */ 1002 1003#ifdef SUPPORT_PCRE2_16 1004 1005static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length) 1006{ 1007 PCRE2_SPTR8 iptr = input; 1008 PCRE2_UCHAR16 *optr = output; 1009 unsigned int c; 1010 1011 if (max_length == 0) 1012 return 0; 1013 1014 while (*iptr && max_length > 1) { 1015 c = 0; 1016 if (offsetmap) 1017 *offsetmap++ = (int)(iptr - (unsigned char*)input); 1018 1019 if (*iptr < 0xc0) 1020 c = *iptr++; 1021 else if (!(*iptr & 0x20)) { 1022 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f); 1023 iptr += 2; 1024 } else if (!(*iptr & 0x10)) { 1025 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f); 1026 iptr += 3; 1027 } else if (!(*iptr & 0x08)) { 1028 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f); 1029 iptr += 4; 1030 } 1031 1032 if (c < 65536) { 1033 *optr++ = c; 1034 max_length--; 1035 } else if (max_length <= 2) { 1036 *optr = '\0'; 1037 return (int)(optr - output); 1038 } else { 1039 c -= 0x10000; 1040 *optr++ = 0xd800 | ((c >> 10) & 0x3ff); 1041 *optr++ = 0xdc00 | (c & 0x3ff); 1042 max_length -= 2; 1043 if (offsetmap) 1044 offsetmap++; 1045 } 1046 } 1047 if (offsetmap) 1048 *offsetmap = (int)(iptr - (unsigned char*)input); 1049 *optr = '\0'; 1050 return (int)(optr - output); 1051} 1052 1053static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length) 1054{ 1055 PCRE2_SPTR8 iptr = input; 1056 PCRE2_UCHAR16 *optr = output; 1057 1058 if (max_length == 0) 1059 return 0; 1060 1061 while (*iptr && max_length > 1) { 1062 *optr++ = *iptr++; 1063 max_length--; 1064 } 1065 *optr = '\0'; 1066 return (int)(optr - output); 1067} 1068 1069#define REGTEST_MAX_LENGTH16 4096 1070static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16]; 1071static int regtest_offsetmap16[REGTEST_MAX_LENGTH16]; 1072 1073#endif /* SUPPORT_PCRE2_16 */ 1074 1075#ifdef SUPPORT_PCRE2_32 1076 1077static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length) 1078{ 1079 PCRE2_SPTR8 iptr = input; 1080 PCRE2_UCHAR32 *optr = output; 1081 unsigned int c; 1082 1083 if (max_length == 0) 1084 return 0; 1085 1086 while (*iptr && max_length > 1) { 1087 c = 0; 1088 if (offsetmap) 1089 *offsetmap++ = (int)(iptr - (unsigned char*)input); 1090 1091 if (*iptr < 0xc0) 1092 c = *iptr++; 1093 else if (!(*iptr & 0x20)) { 1094 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f); 1095 iptr += 2; 1096 } else if (!(*iptr & 0x10)) { 1097 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f); 1098 iptr += 3; 1099 } else if (!(*iptr & 0x08)) { 1100 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f); 1101 iptr += 4; 1102 } 1103 1104 *optr++ = c; 1105 max_length--; 1106 } 1107 if (offsetmap) 1108 *offsetmap = (int)(iptr - (unsigned char*)input); 1109 *optr = 0; 1110 return (int)(optr - output); 1111} 1112 1113static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length) 1114{ 1115 PCRE2_SPTR8 iptr = input; 1116 PCRE2_UCHAR32 *optr = output; 1117 1118 if (max_length == 0) 1119 return 0; 1120 1121 while (*iptr && max_length > 1) { 1122 *optr++ = *iptr++; 1123 max_length--; 1124 } 1125 *optr = '\0'; 1126 return (int)(optr - output); 1127} 1128 1129#define REGTEST_MAX_LENGTH32 4096 1130static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32]; 1131static int regtest_offsetmap32[REGTEST_MAX_LENGTH32]; 1132 1133#endif /* SUPPORT_PCRE2_32 */ 1134 1135static int check_ascii(const char *input) 1136{ 1137 const unsigned char *ptr = (unsigned char *)input; 1138 while (*ptr) { 1139 if (*ptr > 127) 1140 return 0; 1141 ptr++; 1142 } 1143 return 1; 1144} 1145 1146#define OVECTOR_SIZE 15 1147 1148static int regression_tests(void) 1149{ 1150 struct regression_test_case *current = regression_test_cases; 1151 int error; 1152 PCRE2_SIZE err_offs; 1153 int is_successful; 1154 int is_ascii; 1155 int total = 0; 1156 int successful = 0; 1157 int successful_row = 0; 1158 int counter = 0; 1159 int jit_compile_mode; 1160 int utf = 0; 1161 int disabled_options = 0; 1162 int i; 1163#ifdef SUPPORT_PCRE2_8 1164 pcre2_code_8 *re8; 1165 pcre2_compile_context_8 *ccontext8; 1166 pcre2_match_data_8 *mdata8_1; 1167 pcre2_match_data_8 *mdata8_2; 1168 pcre2_match_context_8 *mcontext8; 1169 PCRE2_SIZE *ovector8_1 = NULL; 1170 PCRE2_SIZE *ovector8_2 = NULL; 1171 int return_value8[2]; 1172#endif 1173#ifdef SUPPORT_PCRE2_16 1174 pcre2_code_16 *re16; 1175 pcre2_compile_context_16 *ccontext16; 1176 pcre2_match_data_16 *mdata16_1; 1177 pcre2_match_data_16 *mdata16_2; 1178 pcre2_match_context_16 *mcontext16; 1179 PCRE2_SIZE *ovector16_1 = NULL; 1180 PCRE2_SIZE *ovector16_2 = NULL; 1181 int return_value16[2]; 1182 int length16; 1183#endif 1184#ifdef SUPPORT_PCRE2_32 1185 pcre2_code_32 *re32; 1186 pcre2_compile_context_32 *ccontext32; 1187 pcre2_match_data_32 *mdata32_1; 1188 pcre2_match_data_32 *mdata32_2; 1189 pcre2_match_context_32 *mcontext32; 1190 PCRE2_SIZE *ovector32_1 = NULL; 1191 PCRE2_SIZE *ovector32_2 = NULL; 1192 int return_value32[2]; 1193 int length32; 1194#endif 1195 1196#if defined SUPPORT_PCRE2_8 1197 PCRE2_UCHAR8 cpu_info[128]; 1198#elif defined SUPPORT_PCRE2_16 1199 PCRE2_UCHAR16 cpu_info[128]; 1200#elif defined SUPPORT_PCRE2_32 1201 PCRE2_UCHAR32 cpu_info[128]; 1202#endif 1203#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2) 1204 int return_value; 1205#endif 1206 1207 /* This test compares the behaviour of interpreter and JIT. Although disabling 1208 utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is 1209 still considered successful from pcre2_jit_test point of view. */ 1210 1211#if defined SUPPORT_PCRE2_8 1212 pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info); 1213#elif defined SUPPORT_PCRE2_16 1214 pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info); 1215#elif defined SUPPORT_PCRE2_32 1216 pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info); 1217#endif 1218 1219 printf("Running JIT regression tests\n"); 1220 printf(" target CPU of SLJIT compiler: "); 1221 for (i = 0; cpu_info[i]; i++) 1222 printf("%c", (char)(cpu_info[i])); 1223 printf("\n"); 1224 1225#if defined SUPPORT_PCRE2_8 1226 pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf); 1227#elif defined SUPPORT_PCRE2_16 1228 pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf); 1229#elif defined SUPPORT_PCRE2_32 1230 pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf); 1231#endif 1232 1233 if (!utf) 1234 disabled_options |= PCRE2_UTF; 1235#ifdef SUPPORT_PCRE2_8 1236 printf(" in 8 bit mode with UTF-8 %s:\n", utf ? "enabled" : "disabled"); 1237#endif 1238#ifdef SUPPORT_PCRE2_16 1239 printf(" in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled"); 1240#endif 1241#ifdef SUPPORT_PCRE2_32 1242 printf(" in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled"); 1243#endif 1244 1245 while (current->pattern) { 1246 /* printf("\nPattern: %s :\n", current->pattern); */ 1247 total++; 1248 is_ascii = 0; 1249 if (!(current->start_offset & F_PROPERTY)) 1250 is_ascii = check_ascii(current->pattern) && check_ascii(current->input); 1251 1252 if (current->match_options & PCRE2_PARTIAL_SOFT) 1253 jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT; 1254 else if (current->match_options & PCRE2_PARTIAL_HARD) 1255 jit_compile_mode = PCRE2_JIT_PARTIAL_HARD; 1256 else 1257 jit_compile_mode = PCRE2_JIT_COMPLETE; 1258 error = 0; 1259#ifdef SUPPORT_PCRE2_8 1260 re8 = NULL; 1261 ccontext8 = pcre2_compile_context_create_8(NULL); 1262 if (ccontext8) { 1263 if (GET_NEWLINE(current->newline)) 1264 pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline)); 1265 if (GET_BSR(current->newline)) 1266 pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline)); 1267 1268 if (!(current->start_offset & F_NO8)) { 1269 re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED, 1270 current->compile_options & ~disabled_options, 1271 &error, &err_offs, ccontext8); 1272 1273 if (!re8 && (utf || is_ascii)) 1274 printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); 1275 } 1276 pcre2_compile_context_free_8(ccontext8); 1277 } 1278 else 1279 printf("\n8 bit: Cannot allocate compile context\n"); 1280#endif 1281#ifdef SUPPORT_PCRE2_16 1282 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) 1283 convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16); 1284 else 1285 copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16); 1286 1287 re16 = NULL; 1288 ccontext16 = pcre2_compile_context_create_16(NULL); 1289 if (ccontext16) { 1290 if (GET_NEWLINE(current->newline)) 1291 pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline)); 1292 if (GET_BSR(current->newline)) 1293 pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline)); 1294 1295 if (!(current->start_offset & F_NO16)) { 1296 re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED, 1297 current->compile_options & ~disabled_options, 1298 &error, &err_offs, ccontext16); 1299 1300 if (!re16 && (utf || is_ascii)) 1301 printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); 1302 } 1303 pcre2_compile_context_free_16(ccontext16); 1304 } 1305 else 1306 printf("\n16 bit: Cannot allocate compile context\n"); 1307#endif 1308#ifdef SUPPORT_PCRE2_32 1309 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) 1310 convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32); 1311 else 1312 copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32); 1313 1314 re32 = NULL; 1315 ccontext32 = pcre2_compile_context_create_32(NULL); 1316 if (ccontext32) { 1317 if (GET_NEWLINE(current->newline)) 1318 pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline)); 1319 if (GET_BSR(current->newline)) 1320 pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline)); 1321 1322 if (!(current->start_offset & F_NO32)) { 1323 re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED, 1324 current->compile_options & ~disabled_options, 1325 &error, &err_offs, ccontext32); 1326 1327 if (!re32 && (utf || is_ascii)) 1328 printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error); 1329 } 1330 pcre2_compile_context_free_32(ccontext32); 1331 } 1332 else 1333 printf("\n32 bit: Cannot allocate compile context\n"); 1334#endif 1335 1336 counter++; 1337 if ((counter & 0x3) != 0) { 1338#ifdef SUPPORT_PCRE2_8 1339 setstack8(NULL); 1340#endif 1341#ifdef SUPPORT_PCRE2_16 1342 setstack16(NULL); 1343#endif 1344#ifdef SUPPORT_PCRE2_32 1345 setstack32(NULL); 1346#endif 1347 } 1348 1349#ifdef SUPPORT_PCRE2_8 1350 return_value8[0] = -1000; 1351 return_value8[1] = -1000; 1352 mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL); 1353 mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL); 1354 mcontext8 = pcre2_match_context_create_8(NULL); 1355 if (!mdata8_1 || !mdata8_2 || !mcontext8) { 1356 printf("\n8 bit: Cannot allocate match data\n"); 1357 pcre2_match_data_free_8(mdata8_1); 1358 pcre2_match_data_free_8(mdata8_2); 1359 pcre2_match_context_free_8(mcontext8); 1360 pcre2_code_free_8(re8); 1361 re8 = NULL; 1362 } else { 1363 ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1); 1364 ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2); 1365 for (i = 0; i < OVECTOR_SIZE * 2; ++i) 1366 ovector8_1[i] = -2; 1367 for (i = 0; i < OVECTOR_SIZE * 2; ++i) 1368 ovector8_2[i] = -2; 1369 pcre2_set_match_limit_8(mcontext8, 10000000); 1370 } 1371 if (re8) { 1372 return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), 1373 current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8); 1374 1375 if (pcre2_jit_compile_8(re8, jit_compile_mode)) { 1376 printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern); 1377 } else if ((counter & 0x1) != 0) { 1378 setstack8(mcontext8); 1379 return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), 1380 current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8); 1381 } else { 1382 pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8()); 1383 return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input), 1384 current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8); 1385 } 1386 } 1387#endif 1388 1389#ifdef SUPPORT_PCRE2_16 1390 return_value16[0] = -1000; 1391 return_value16[1] = -1000; 1392 mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL); 1393 mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL); 1394 mcontext16 = pcre2_match_context_create_16(NULL); 1395 if (!mdata16_1 || !mdata16_2 || !mcontext16) { 1396 printf("\n16 bit: Cannot allocate match data\n"); 1397 pcre2_match_data_free_16(mdata16_1); 1398 pcre2_match_data_free_16(mdata16_2); 1399 pcre2_match_context_free_16(mcontext16); 1400 pcre2_code_free_16(re16); 1401 re16 = NULL; 1402 } else { 1403 ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1); 1404 ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2); 1405 for (i = 0; i < OVECTOR_SIZE * 2; ++i) 1406 ovector16_1[i] = -2; 1407 for (i = 0; i < OVECTOR_SIZE * 2; ++i) 1408 ovector16_2[i] = -2; 1409 pcre2_set_match_limit_16(mcontext16, 10000000); 1410 } 1411 if (re16) { 1412 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) 1413 length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16); 1414 else 1415 length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16); 1416 1417 return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16, 1418 current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16); 1419 1420 if (pcre2_jit_compile_16(re16, jit_compile_mode)) { 1421 printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern); 1422 } else if ((counter & 0x1) != 0) { 1423 setstack16(mcontext16); 1424 return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16, 1425 current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16); 1426 } else { 1427 pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16()); 1428 return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16, 1429 current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16); 1430 } 1431 } 1432#endif 1433 1434#ifdef SUPPORT_PCRE2_32 1435 return_value32[0] = -1000; 1436 return_value32[1] = -1000; 1437 mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL); 1438 mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL); 1439 mcontext32 = pcre2_match_context_create_32(NULL); 1440 if (!mdata32_1 || !mdata32_2 || !mcontext32) { 1441 printf("\n32 bit: Cannot allocate match data\n"); 1442 pcre2_match_data_free_32(mdata32_1); 1443 pcre2_match_data_free_32(mdata32_2); 1444 pcre2_match_context_free_32(mcontext32); 1445 pcre2_code_free_32(re32); 1446 re32 = NULL; 1447 } else { 1448 ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1); 1449 ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2); 1450 for (i = 0; i < OVECTOR_SIZE * 2; ++i) 1451 ovector32_1[i] = -2; 1452 for (i = 0; i < OVECTOR_SIZE * 2; ++i) 1453 ovector32_2[i] = -2; 1454 pcre2_set_match_limit_32(mcontext32, 10000000); 1455 } 1456 if (re32) { 1457 if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV)) 1458 length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32); 1459 else 1460 length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32); 1461 1462 return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32, 1463 current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32); 1464 1465 if (pcre2_jit_compile_32(re32, jit_compile_mode)) { 1466 printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern); 1467 } else if ((counter & 0x1) != 0) { 1468 setstack32(mcontext32); 1469 return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32, 1470 current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32); 1471 } else { 1472 pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32()); 1473 return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32, 1474 current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32); 1475 } 1476 } 1477#endif 1478 1479 /* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s", 1480 return_value8[0], return_value16[0], return_value32[0], 1481 (int)ovector8_1[0], (int)ovector8_1[1], 1482 (int)ovector16_1[0], (int)ovector16_1[1], 1483 (int)ovector32_1[0], (int)ovector32_1[1], 1484 (current->compile_options & PCRE2_CASELESS) ? "C" : ""); */ 1485 1486 /* If F_DIFF is set, just run the test, but do not compare the results. 1487 Segfaults can still be captured. */ 1488 1489 is_successful = 1; 1490 if (!(current->start_offset & F_DIFF)) { 1491#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2) 1492 if (!(current->start_offset & F_FORCECONV)) { 1493 1494 /* All results must be the same. */ 1495#ifdef SUPPORT_PCRE2_8 1496 if ((return_value = return_value8[0]) != return_value8[1]) { 1497 printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n", 1498 return_value8[0], return_value8[1], total, current->pattern, current->input); 1499 is_successful = 0; 1500 } else 1501#endif 1502#ifdef SUPPORT_PCRE2_16 1503 if ((return_value = return_value16[0]) != return_value16[1]) { 1504 printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n", 1505 return_value16[0], return_value16[1], total, current->pattern, current->input); 1506 is_successful = 0; 1507 } else 1508#endif 1509#ifdef SUPPORT_PCRE2_32 1510 if ((return_value = return_value32[0]) != return_value32[1]) { 1511 printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n", 1512 return_value32[0], return_value32[1], total, current->pattern, current->input); 1513 is_successful = 0; 1514 } else 1515#endif 1516#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16 1517 if (return_value8[0] != return_value16[0]) { 1518 printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n", 1519 return_value8[0], return_value16[0], 1520 total, current->pattern, current->input); 1521 is_successful = 0; 1522 } else 1523#endif 1524#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32 1525 if (return_value8[0] != return_value32[0]) { 1526 printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n", 1527 return_value8[0], return_value32[0], 1528 total, current->pattern, current->input); 1529 is_successful = 0; 1530 } else 1531#endif 1532#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32 1533 if (return_value16[0] != return_value32[0]) { 1534 printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n", 1535 return_value16[0], return_value32[0], 1536 total, current->pattern, current->input); 1537 is_successful = 0; 1538 } else 1539#endif 1540 if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) { 1541 if (return_value == PCRE2_ERROR_PARTIAL) { 1542 return_value = 2; 1543 } else { 1544 return_value *= 2; 1545 } 1546#ifdef SUPPORT_PCRE2_8 1547 return_value8[0] = return_value; 1548#endif 1549#ifdef SUPPORT_PCRE2_16 1550 return_value16[0] = return_value; 1551#endif 1552#ifdef SUPPORT_PCRE2_32 1553 return_value32[0] = return_value; 1554#endif 1555 /* Transform back the results. */ 1556 if (current->compile_options & PCRE2_UTF) { 1557#ifdef SUPPORT_PCRE2_16 1558 for (i = 0; i < return_value; ++i) { 1559 if (ovector16_1[i] != PCRE2_UNSET) 1560 ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]]; 1561 if (ovector16_2[i] != PCRE2_UNSET) 1562 ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]]; 1563 } 1564#endif 1565#ifdef SUPPORT_PCRE2_32 1566 for (i = 0; i < return_value; ++i) { 1567 if (ovector32_1[i] != PCRE2_UNSET) 1568 ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]]; 1569 if (ovector32_2[i] != PCRE2_UNSET) 1570 ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]]; 1571 } 1572#endif 1573 } 1574 1575 for (i = 0; i < return_value; ++i) { 1576#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16 1577 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) { 1578 printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n", 1579 i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i], 1580 total, current->pattern, current->input); 1581 is_successful = 0; 1582 } 1583#endif 1584#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32 1585 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) { 1586 printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n", 1587 i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i], 1588 total, current->pattern, current->input); 1589 is_successful = 0; 1590 } 1591#endif 1592#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32 1593 if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) { 1594 printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n", 1595 i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i], 1596 total, current->pattern, current->input); 1597 is_successful = 0; 1598 } 1599#endif 1600 } 1601 } 1602 } else 1603#endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */ 1604 { 1605#ifdef SUPPORT_PCRE2_8 1606 if (return_value8[0] != return_value8[1]) { 1607 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", 1608 return_value8[0], return_value8[1], total, current->pattern, current->input); 1609 is_successful = 0; 1610 } else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) { 1611 if (return_value8[0] == PCRE2_ERROR_PARTIAL) 1612 return_value8[0] = 2; 1613 else 1614 return_value8[0] *= 2; 1615 1616 for (i = 0; i < return_value8[0]; ++i) 1617 if (ovector8_1[i] != ovector8_2[i]) { 1618 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", 1619 i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input); 1620 is_successful = 0; 1621 } 1622 } 1623#endif 1624 1625#ifdef SUPPORT_PCRE2_16 1626 if (return_value16[0] != return_value16[1]) { 1627 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", 1628 return_value16[0], return_value16[1], total, current->pattern, current->input); 1629 is_successful = 0; 1630 } else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) { 1631 if (return_value16[0] == PCRE2_ERROR_PARTIAL) 1632 return_value16[0] = 2; 1633 else 1634 return_value16[0] *= 2; 1635 1636 for (i = 0; i < return_value16[0]; ++i) 1637 if (ovector16_1[i] != ovector16_2[i]) { 1638 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", 1639 i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input); 1640 is_successful = 0; 1641 } 1642 } 1643#endif 1644 1645#ifdef SUPPORT_PCRE2_32 1646 if (return_value32[0] != return_value32[1]) { 1647 printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n", 1648 return_value32[0], return_value32[1], total, current->pattern, current->input); 1649 is_successful = 0; 1650 } else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) { 1651 if (return_value32[0] == PCRE2_ERROR_PARTIAL) 1652 return_value32[0] = 2; 1653 else 1654 return_value32[0] *= 2; 1655 1656 for (i = 0; i < return_value32[0]; ++i) 1657 if (ovector32_1[i] != ovector32_2[i]) { 1658 printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n", 1659 i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input); 1660 is_successful = 0; 1661 } 1662 } 1663#endif 1664 } 1665 } 1666 1667 if (is_successful) { 1668#ifdef SUPPORT_PCRE2_8 1669 if (!(current->start_offset & F_NO8) && (utf || is_ascii)) { 1670 if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) { 1671 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n", 1672 total, current->pattern, current->input); 1673 is_successful = 0; 1674 } 1675 1676 if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) { 1677 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n", 1678 total, current->pattern, current->input); 1679 is_successful = 0; 1680 } 1681 } 1682#endif 1683#ifdef SUPPORT_PCRE2_16 1684 if (!(current->start_offset & F_NO16) && (utf || is_ascii)) { 1685 if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) { 1686 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n", 1687 total, current->pattern, current->input); 1688 is_successful = 0; 1689 } 1690 1691 if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) { 1692 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n", 1693 total, current->pattern, current->input); 1694 is_successful = 0; 1695 } 1696 } 1697#endif 1698#ifdef SUPPORT_PCRE2_32 1699 if (!(current->start_offset & F_NO32) && (utf || is_ascii)) { 1700 if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) { 1701 printf("32 bit: Test should match: [%d] '%s' @ '%s'\n", 1702 total, current->pattern, current->input); 1703 is_successful = 0; 1704 } 1705 1706 if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) { 1707 printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n", 1708 total, current->pattern, current->input); 1709 is_successful = 0; 1710 } 1711 } 1712#endif 1713 } 1714 1715 if (is_successful) { 1716#ifdef SUPPORT_PCRE2_8 1717 if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) { 1718 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", 1719 total, current->pattern, current->input); 1720 is_successful = 0; 1721 } 1722#endif 1723#ifdef SUPPORT_PCRE2_16 1724 if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) { 1725 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", 1726 total, current->pattern, current->input); 1727 is_successful = 0; 1728 } 1729#endif 1730#ifdef SUPPORT_PCRE2_32 1731 if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) { 1732 printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n", 1733 total, current->pattern, current->input); 1734 is_successful = 0; 1735 } 1736#endif 1737 } 1738 1739#ifdef SUPPORT_PCRE2_8 1740 pcre2_code_free_8(re8); 1741 pcre2_match_data_free_8(mdata8_1); 1742 pcre2_match_data_free_8(mdata8_2); 1743 pcre2_match_context_free_8(mcontext8); 1744#endif 1745#ifdef SUPPORT_PCRE2_16 1746 pcre2_code_free_16(re16); 1747 pcre2_match_data_free_16(mdata16_1); 1748 pcre2_match_data_free_16(mdata16_2); 1749 pcre2_match_context_free_16(mcontext16); 1750#endif 1751#ifdef SUPPORT_PCRE2_32 1752 pcre2_code_free_32(re32); 1753 pcre2_match_data_free_32(mdata32_1); 1754 pcre2_match_data_free_32(mdata32_2); 1755 pcre2_match_context_free_32(mcontext32); 1756#endif 1757 1758 if (is_successful) { 1759 successful++; 1760 successful_row++; 1761 printf("."); 1762 if (successful_row >= 60) { 1763 successful_row = 0; 1764 printf("\n"); 1765 } 1766 } else 1767 successful_row = 0; 1768 1769 fflush(stdout); 1770 current++; 1771 } 1772#ifdef SUPPORT_PCRE2_8 1773 setstack8(NULL); 1774#endif 1775#ifdef SUPPORT_PCRE2_16 1776 setstack16(NULL); 1777#endif 1778#ifdef SUPPORT_PCRE2_32 1779 setstack32(NULL); 1780#endif 1781 1782 if (total == successful) { 1783 printf("\nAll JIT regression tests are successfully passed.\n"); 1784 return 0; 1785 } else { 1786 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); 1787 return 1; 1788 } 1789} 1790 1791#if defined SUPPORT_UNICODE 1792 1793static int check_invalid_utf_result(int pattern_index, const char *type, int result, 1794 int match_start, int match_end, PCRE2_SIZE *ovector) 1795{ 1796 if (match_start < 0) { 1797 if (result != -1) { 1798 printf("Pattern[%d] %s result is not -1.\n", pattern_index, type); 1799 return 1; 1800 } 1801 return 0; 1802 } 1803 1804 if (result <= 0) { 1805 printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result); 1806 return 1; 1807 } 1808 1809 if (ovector[0] != (PCRE2_SIZE)match_start) { 1810 printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n", 1811 pattern_index, type, (int)ovector[0], match_start); 1812 return 1; 1813 } 1814 1815 if (ovector[1] != (PCRE2_SIZE)match_end) { 1816 printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n", 1817 pattern_index, type, (int)ovector[1], match_end); 1818 return 1; 1819 } 1820 1821 return 0; 1822} 1823 1824#endif /* SUPPORT_UNICODE */ 1825 1826#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8 1827 1828#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED) 1829#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF) 1830#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF) 1831 1832struct invalid_utf8_regression_test_case { 1833 int compile_options; 1834 int jit_compile_options; 1835 int start_offset; 1836 int skip_left; 1837 int skip_right; 1838 int match_start; 1839 int match_end; 1840 const char *pattern[2]; 1841 const char *input; 1842}; 1843 1844static const char invalid_utf8_newline_cr; 1845 1846static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = { 1847 { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" }, 1848 { UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" }, 1849 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" }, 1850 { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" }, 1851 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" }, 1852 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" }, 1853 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" }, 1854 { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" }, 1855 { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" }, 1856 { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" }, 1857 { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" }, 1858 { UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" }, 1859 { UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" }, 1860 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" }, 1861 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" }, 1862 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" }, 1863 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" }, 1864 { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" }, 1865 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" }, 1866 { UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" }, 1867 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" }, 1868 { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" }, 1869 { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" }, 1870 { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" }, 1871 { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" }, 1872 { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" }, 1873 { UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" }, 1874 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" }, 1875 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" }, 1876 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" }, 1877 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" }, 1878 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" }, 1879 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" }, 1880 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" }, 1881 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" }, 1882 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" }, 1883 { UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" }, 1884 { UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" }, 1885 1886 { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" }, 1887 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" }, 1888 { UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" }, 1889 { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" }, 1890 { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" }, 1891 { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" }, 1892 { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" }, 1893 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" }, 1894 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" }, 1895 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" }, 1896 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" }, 1897 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" }, 1898 { UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" }, 1899 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" }, 1900 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" }, 1901 { UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" }, 1902 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" }, 1903 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" }, 1904 { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" }, 1905 { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" }, 1906 { UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" }, 1907 { UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" }, 1908 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" }, 1909 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" }, 1910 { UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" }, 1911 1912 { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" }, 1913 { UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" }, 1914 { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" }, 1915 { UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" }, 1916 { UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" }, 1917 { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" }, 1918 { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" }, 1919 { UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" }, 1920 1921 { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" }, 1922 { UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" }, 1923 { UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" }, 1924 { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" }, 1925 { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" }, 1926 { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" }, 1927 { UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" }, 1928 1929 { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" }, 1930 { UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" }, 1931 { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" }, 1932 { UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" }, 1933 1934 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" }, 1935 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" }, 1936 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" }, 1937 { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" }, 1938 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" }, 1939 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" }, 1940 { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" }, 1941 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" }, 1942 { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" }, 1943 1944 { UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" }, 1945 { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" }, 1946 { UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" }, 1947 { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" }, 1948 { UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" }, 1949 { UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" }, 1950 { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" }, 1951 { UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" }, 1952 { UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" }, 1953 1954 { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" }, 1955 { UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" }, 1956 { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" }, 1957 { UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" }, 1958 1959 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"}, 1960 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"}, 1961 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"}, 1962 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"}, 1963 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"}, 1964 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"}, 1965 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"}, 1966 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"}, 1967 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"}, 1968 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"}, 1969 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"}, 1970 1971 { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"}, 1972 { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"}, 1973 { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"}, 1974 { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"}, 1975 { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"}, 1976 { PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"}, 1977 1978 { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" }, 1979 { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" }, 1980 { PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" }, 1981 { PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" }, 1982 1983 { PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" }, 1984 1985 /* These two are not invalid UTF tests, but this infrastructure fits better for them. */ 1986 { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" }, 1987 { 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" }, 1988 1989 { PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" }, 1990 1991 { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL } 1992}; 1993 1994#undef UDA 1995#undef CI 1996#undef CPI 1997 1998static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current, 1999 int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata) 2000{ 2001 pcre2_code_8 *code; 2002 int result, errorcode; 2003 PCRE2_SIZE length, erroroffset; 2004 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata); 2005 2006 if (current->pattern[i] == NULL) 2007 return 1; 2008 2009 code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED, 2010 current->compile_options, &errorcode, &erroroffset, ccontext); 2011 2012 if (!code) { 2013 printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset); 2014 return 0; 2015 } 2016 2017 if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) { 2018 printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index); 2019 pcre2_code_free_8(code); 2020 return 0; 2021 } 2022 2023 length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right); 2024 2025 if (current->jit_compile_options & PCRE2_JIT_COMPLETE) { 2026 result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left), 2027 length, current->start_offset - current->skip_left, 0, mdata, NULL); 2028 2029 if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) { 2030 pcre2_code_free_8(code); 2031 return 0; 2032 } 2033 } 2034 2035 if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) { 2036 result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left), 2037 length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL); 2038 2039 if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) { 2040 pcre2_code_free_8(code); 2041 return 0; 2042 } 2043 } 2044 2045 pcre2_code_free_8(code); 2046 return 1; 2047} 2048 2049static int invalid_utf8_regression_tests(void) 2050{ 2051 const struct invalid_utf8_regression_test_case *current; 2052 pcre2_compile_context_8 *ccontext; 2053 pcre2_match_data_8 *mdata; 2054 int total = 0, successful = 0; 2055 int result; 2056 2057 printf("\nRunning invalid-utf8 JIT regression tests\n"); 2058 2059 ccontext = pcre2_compile_context_create_8(NULL); 2060 pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY); 2061 mdata = pcre2_match_data_create_8(4, NULL); 2062 2063 for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) { 2064 /* printf("\nPattern: %s :\n", current->pattern); */ 2065 total++; 2066 2067 result = 1; 2068 if (current->pattern[1] != &invalid_utf8_newline_cr) 2069 { 2070 if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata)) 2071 result = 0; 2072 if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata)) 2073 result = 0; 2074 } else { 2075 pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR); 2076 if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata)) 2077 result = 0; 2078 pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY); 2079 } 2080 2081 if (result) { 2082 successful++; 2083 } 2084 2085 printf("."); 2086 if ((total % 60) == 0) 2087 printf("\n"); 2088 } 2089 2090 if ((total % 60) != 0) 2091 printf("\n"); 2092 2093 pcre2_match_data_free_8(mdata); 2094 pcre2_compile_context_free_8(ccontext); 2095 2096 if (total == successful) { 2097 printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n"); 2098 return 0; 2099 } else { 2100 printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); 2101 return 1; 2102 } 2103} 2104 2105#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */ 2106 2107static int invalid_utf8_regression_tests(void) 2108{ 2109 return 0; 2110} 2111 2112#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */ 2113 2114#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16 2115 2116#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED) 2117#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF) 2118#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF) 2119 2120struct invalid_utf16_regression_test_case { 2121 int compile_options; 2122 int jit_compile_options; 2123 int start_offset; 2124 int skip_left; 2125 int skip_right; 2126 int match_start; 2127 int match_end; 2128 const PCRE2_UCHAR16 *pattern[2]; 2129 const PCRE2_UCHAR16 *input; 2130}; 2131 2132static PCRE2_UCHAR16 allany16[] = { '.', 0 }; 2133static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 }; 2134static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 }; 2135static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 }; 2136static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 }; 2137static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 }; 2138static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 }; 2139static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 }; 2140static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 }; 2141static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 }; 2142static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 }; 2143static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 }; 2144static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 }; 2145static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 }; 2146static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 }; 2147static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 }; 2148static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 }; 2149static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 }; 2150static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 }; 2151static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 }; 2152 2153static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = { 2154 { UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 }, 2155 { UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 }, 2156 { UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 }, 2157 { UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 }, 2158 { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 }, 2159 { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 }, 2160 { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 }, 2161 { UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 }, 2162 { UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 }, 2163 { UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 }, 2164 2165 { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 }, 2166 { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 }, 2167 { UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 }, 2168 { UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 }, 2169 { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 }, 2170 { UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 }, 2171 { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 }, 2172 { UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 }, 2173 { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 }, 2174 { UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 }, 2175 2176 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 }, 2177 { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 }, 2178 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 }, 2179 { UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 }, 2180 2181 { UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 }, 2182 { UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 }, 2183 { UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 }, 2184 { UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 }, 2185 { UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 }, 2186 { UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 }, 2187 2188 { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 }, 2189 { UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 }, 2190 { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 }, 2191 2192 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 }, 2193 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 }, 2194 2195 { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 }, 2196 { PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 }, 2197 { PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 }, 2198 { PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 }, 2199 2200 { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL } 2201}; 2202 2203#undef UDA 2204#undef CI 2205#undef CPI 2206 2207static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current, 2208 int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata) 2209{ 2210 pcre2_code_16 *code; 2211 int result, errorcode; 2212 PCRE2_SIZE length, erroroffset; 2213 const PCRE2_UCHAR16 *input; 2214 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata); 2215 2216 if (current->pattern[i] == NULL) 2217 return 1; 2218 2219 code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED, 2220 current->compile_options, &errorcode, &erroroffset, ccontext); 2221 2222 if (!code) { 2223 printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset); 2224 return 0; 2225 } 2226 2227 if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) { 2228 printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index); 2229 pcre2_code_free_16(code); 2230 return 0; 2231 } 2232 2233 input = current->input; 2234 length = 0; 2235 2236 while (*input++ != 0) 2237 length++; 2238 2239 length -= current->skip_left + current->skip_right; 2240 2241 if (current->jit_compile_options & PCRE2_JIT_COMPLETE) { 2242 result = pcre2_jit_match_16(code, (current->input + current->skip_left), 2243 length, current->start_offset - current->skip_left, 0, mdata, NULL); 2244 2245 if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) { 2246 pcre2_code_free_16(code); 2247 return 0; 2248 } 2249 } 2250 2251 if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) { 2252 result = pcre2_jit_match_16(code, (current->input + current->skip_left), 2253 length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL); 2254 2255 if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) { 2256 pcre2_code_free_16(code); 2257 return 0; 2258 } 2259 } 2260 2261 pcre2_code_free_16(code); 2262 return 1; 2263} 2264 2265static int invalid_utf16_regression_tests(void) 2266{ 2267 const struct invalid_utf16_regression_test_case *current; 2268 pcre2_compile_context_16 *ccontext; 2269 pcre2_match_data_16 *mdata; 2270 int total = 0, successful = 0; 2271 int result; 2272 2273 printf("\nRunning invalid-utf16 JIT regression tests\n"); 2274 2275 ccontext = pcre2_compile_context_create_16(NULL); 2276 pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY); 2277 mdata = pcre2_match_data_create_16(4, NULL); 2278 2279 for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) { 2280 /* printf("\nPattern: %s :\n", current->pattern); */ 2281 total++; 2282 2283 result = 1; 2284 if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata)) 2285 result = 0; 2286 if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata)) 2287 result = 0; 2288 2289 if (result) { 2290 successful++; 2291 } 2292 2293 printf("."); 2294 if ((total % 60) == 0) 2295 printf("\n"); 2296 } 2297 2298 if ((total % 60) != 0) 2299 printf("\n"); 2300 2301 pcre2_match_data_free_16(mdata); 2302 pcre2_compile_context_free_16(ccontext); 2303 2304 if (total == successful) { 2305 printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n"); 2306 return 0; 2307 } else { 2308 printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); 2309 return 1; 2310 } 2311} 2312 2313#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */ 2314 2315static int invalid_utf16_regression_tests(void) 2316{ 2317 return 0; 2318} 2319 2320#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */ 2321 2322#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32 2323 2324#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED) 2325#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF) 2326#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF) 2327 2328struct invalid_utf32_regression_test_case { 2329 int compile_options; 2330 int jit_compile_options; 2331 int start_offset; 2332 int skip_left; 2333 int skip_right; 2334 int match_start; 2335 int match_end; 2336 const PCRE2_UCHAR32 *pattern[2]; 2337 const PCRE2_UCHAR32 *input; 2338}; 2339 2340static PCRE2_UCHAR32 allany32[] = { '.', 0 }; 2341static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 }; 2342static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 }; 2343static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 }; 2344static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 }; 2345static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 }; 2346static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 }; 2347static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 }; 2348static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 }; 2349static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 }; 2350static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 }; 2351static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 }; 2352static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 }; 2353 2354static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = { 2355 { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 }, 2356 { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 }, 2357 { UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 }, 2358 { UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 }, 2359 { UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 }, 2360 { UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 }, 2361 2362 { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 }, 2363 { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 }, 2364 { UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 }, 2365 { UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 }, 2366 { UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 }, 2367 2368 { UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 }, 2369 { UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 }, 2370 2371 { UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 }, 2372 { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 }, 2373 { UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 }, 2374 { UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 }, 2375 { UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 }, 2376 { UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 }, 2377 2378 { UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 }, 2379 { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 }, 2380 { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 }, 2381 { UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 }, 2382 { UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 }, 2383 2384 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 }, 2385 { PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 }, 2386 2387 { 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL } 2388}; 2389 2390#undef UDA 2391#undef CI 2392#undef CPI 2393 2394static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current, 2395 int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata) 2396{ 2397 pcre2_code_32 *code; 2398 int result, errorcode; 2399 PCRE2_SIZE length, erroroffset; 2400 const PCRE2_UCHAR32 *input; 2401 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata); 2402 2403 if (current->pattern[i] == NULL) 2404 return 1; 2405 2406 code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED, 2407 current->compile_options, &errorcode, &erroroffset, ccontext); 2408 2409 if (!code) { 2410 printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset); 2411 return 0; 2412 } 2413 2414 if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) { 2415 printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index); 2416 pcre2_code_free_32(code); 2417 return 0; 2418 } 2419 2420 input = current->input; 2421 length = 0; 2422 2423 while (*input++ != 0) 2424 length++; 2425 2426 length -= current->skip_left + current->skip_right; 2427 2428 if (current->jit_compile_options & PCRE2_JIT_COMPLETE) { 2429 result = pcre2_jit_match_32(code, (current->input + current->skip_left), 2430 length, current->start_offset - current->skip_left, 0, mdata, NULL); 2431 2432 if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) { 2433 pcre2_code_free_32(code); 2434 return 0; 2435 } 2436 } 2437 2438 if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) { 2439 result = pcre2_jit_match_32(code, (current->input + current->skip_left), 2440 length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL); 2441 2442 if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) { 2443 pcre2_code_free_32(code); 2444 return 0; 2445 } 2446 } 2447 2448 pcre2_code_free_32(code); 2449 return 1; 2450} 2451 2452static int invalid_utf32_regression_tests(void) 2453{ 2454 const struct invalid_utf32_regression_test_case *current; 2455 pcre2_compile_context_32 *ccontext; 2456 pcre2_match_data_32 *mdata; 2457 int total = 0, successful = 0; 2458 int result; 2459 2460 printf("\nRunning invalid-utf32 JIT regression tests\n"); 2461 2462 ccontext = pcre2_compile_context_create_32(NULL); 2463 pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY); 2464 mdata = pcre2_match_data_create_32(4, NULL); 2465 2466 for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) { 2467 /* printf("\nPattern: %s :\n", current->pattern); */ 2468 total++; 2469 2470 result = 1; 2471 if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata)) 2472 result = 0; 2473 if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata)) 2474 result = 0; 2475 2476 if (result) { 2477 successful++; 2478 } 2479 2480 printf("."); 2481 if ((total % 60) == 0) 2482 printf("\n"); 2483 } 2484 2485 if ((total % 60) != 0) 2486 printf("\n"); 2487 2488 pcre2_match_data_free_32(mdata); 2489 pcre2_compile_context_free_32(ccontext); 2490 2491 if (total == successful) { 2492 printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n"); 2493 return 0; 2494 } else { 2495 printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful); 2496 return 1; 2497 } 2498} 2499 2500#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */ 2501 2502static int invalid_utf32_regression_tests(void) 2503{ 2504 return 0; 2505} 2506 2507#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */ 2508 2509/* End of pcre2_jit_test.c */ 2510