1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8                       Written by Philip Hazel
9     Original API code Copyright (c) 1997-2012 University of Cambridge
10         New API code Copyright (c) 2016 University of Cambridge
11
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16    * Redistributions of source code must retain the above copyright notice,
17      this list of conditions and the following disclaimer.
18
19    * Redistributions in binary form must reproduce the above copyright
20      notice, this list of conditions and the following disclaimer in the
21      documentation and/or other materials provided with the distribution.
22
23    * Neither the name of the University of Cambridge nor the names of its
24      contributors may be used to endorse or promote products derived from
25      this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41#ifdef HAVE_CONFIG_H
42#include "config.h"
43#endif
44
45#include <stdio.h>
46#include <string.h>
47
48#define PCRE2_CODE_UNIT_WIDTH 0
49#include "pcre2.h"
50
51/*
52 Letter characters:
53   \xe6\x92\xad = 0x64ad = 25773 (kanji)
54 Non-letter characters:
55   \xc2\xa1 = 0xa1 =  (Inverted Exclamation Mark)
56   \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
57   \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
58   \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
59 Newlines:
60   \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
61   \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
62 Othercase pairs:
63   \xc3\xa9 = 0xe9 = 233 (e')
64      \xc3\x89 = 0xc9 = 201 (E')
65   \xc3\xa1 = 0xe1 = 225 (a')
66      \xc3\x81 = 0xc1 = 193 (A')
67   \x53 = 0x53 = S
68     \x73 = 0x73 = s
69     \xc5\xbf = 0x17f = 383 (long S)
70   \xc8\xba = 0x23a = 570
71      \xe2\xb1\xa5 = 0x2c65 = 11365
72   \xe1\xbd\xb8 = 0x1f78 = 8056
73      \xe1\xbf\xb8 = 0x1ff8 = 8184
74   \xf0\x90\x90\x80 = 0x10400 = 66560
75      \xf0\x90\x90\xa8 = 0x10428 = 66600
76   \xc7\x84 = 0x1c4 = 452
77     \xc7\x85 = 0x1c5 = 453
78     \xc7\x86 = 0x1c6 = 454
79 Caseless sets:
80   ucp_Armenian - \x{531}-\x{556} -> \x{561}-\x{586}
81   ucp_Coptic - \x{2c80}-\x{2ce3} -> caseless: XOR 0x1
82   ucp_Latin - \x{ff21}-\x{ff3a} -> \x{ff41]-\x{ff5a}
83
84 Mark property:
85   \xcc\x8d = 0x30d = 781
86 Special:
87   \xc2\x80 = 0x80 = 128 (lowest 2 byte character)
88   \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
89   \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
90   \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
91   \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
92   \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
93*/
94
95static int regression_tests(void);
96static int invalid_utf8_regression_tests(void);
97static int invalid_utf16_regression_tests(void);
98static int invalid_utf32_regression_tests(void);
99
100int main(void)
101{
102	int jit = 0;
103#if defined SUPPORT_PCRE2_8
104	pcre2_config_8(PCRE2_CONFIG_JIT, &jit);
105#elif defined SUPPORT_PCRE2_16
106	pcre2_config_16(PCRE2_CONFIG_JIT, &jit);
107#elif defined SUPPORT_PCRE2_32
108	pcre2_config_32(PCRE2_CONFIG_JIT, &jit);
109#endif
110	if (!jit) {
111		printf("JIT must be enabled to run pcre2_jit_test\n");
112		return 1;
113	}
114	return regression_tests()
115		| invalid_utf8_regression_tests()
116		| invalid_utf16_regression_tests()
117		| invalid_utf32_regression_tests();
118}
119
120/* --------------------------------------------------------------------------------------- */
121
122#if !(defined SUPPORT_PCRE2_8) && !(defined SUPPORT_PCRE2_16) && !(defined SUPPORT_PCRE2_32)
123#error SUPPORT_PCRE2_8 or SUPPORT_PCRE2_16 or SUPPORT_PCRE2_32 must be defined
124#endif
125
126#define MU	(PCRE2_MULTILINE | PCRE2_UTF)
127#define MUP	(PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
128#define CMU	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF)
129#define CMUP	(PCRE2_CASELESS | PCRE2_MULTILINE | PCRE2_UTF | PCRE2_UCP)
130#define M	(PCRE2_MULTILINE)
131#define MP	(PCRE2_MULTILINE | PCRE2_UCP)
132#define U	(PCRE2_UTF)
133#define CM	(PCRE2_CASELESS | PCRE2_MULTILINE)
134
135#define BSR(x)	((x) << 16)
136#define A	PCRE2_NEWLINE_ANYCRLF
137
138#define GET_NEWLINE(x)	((x) & 0xffff)
139#define GET_BSR(x)	((x) >> 16)
140
141#define OFFSET_MASK	0x00ffff
142#define F_NO8		0x010000
143#define F_NO16		0x020000
144#define F_NO32		0x020000
145#define F_NOMATCH	0x040000
146#define F_DIFF		0x080000
147#define F_FORCECONV	0x100000
148#define F_PROPERTY	0x200000
149
150struct regression_test_case {
151	int compile_options;
152	int newline;
153	int match_options;
154	int start_offset;
155	const char *pattern;
156	const char *input;
157};
158
159static struct regression_test_case regression_test_cases[] = {
160	/* Constant strings. */
161	{ MU, A, 0, 0, "AbC", "AbAbC" },
162	{ MU, A, 0, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
163	{ CMU, A, 0, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
164	{ M, A, 0, 0, "[^a]", "aAbB" },
165	{ CM, A, 0, 0, "[^m]", "mMnN" },
166	{ M, A, 0, 0, "a[^b][^#]", "abacd" },
167	{ CM, A, 0, 0, "A[^B][^E]", "abacd" },
168	{ CMU, A, 0, 0, "[^x][^#]", "XxBll" },
169	{ MU, A, 0, 0, "[^a]", "aaa\xc3\xa1#Ab" },
170	{ CMU, A, 0, 0, "[^A]", "aA\xe6\x92\xad" },
171	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\n+bc" },
172	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\r+bc" },
173	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\r\r+bc" },
174	{ MU, A, 0, 0, "\\W(\\W)?\\w", "\n\n+bc" },
175	{ MU, A, 0, 0, "[axd]", "sAXd" },
176	{ CMU, A, 0, 0, "[axd]", "sAXd" },
177	{ CMU, A, 0, 0 | F_NOMATCH, "[^axd]", "DxA" },
178	{ MU, A, 0, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
179	{ MU, A, 0, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
180	{ CMU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
181	{ MU, A, 0, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
182	{ MU, A, 0, 0, "[^a]", "\xc2\x80[]" },
183	{ CMU, A, 0, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
184	{ CM, A, 0, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
185	{ PCRE2_CASELESS, 0, 0, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
186	{ PCRE2_CASELESS, 0, 0, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
187	{ PCRE2_CASELESS, 0, 0, 0, "a1", "Aa1" },
188#ifndef NEVER_BACKSLASH_C
189	{ M, A, 0, 0, "\\Ca", "cda" },
190	{ CM, A, 0, 0, "\\Ca", "CDA" },
191	{ M, A, 0, 0 | F_NOMATCH, "\\Cx", "cda" },
192	{ CM, A, 0, 0 | F_NOMATCH, "\\Cx", "CDA" },
193#endif /* !NEVER_BACKSLASH_C */
194	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
195	{ CMUP, A, 0, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
196	{ CMUP, A, 0, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
197	{ CMUP, A, 0, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
198	{ M, A, 0, 0, "[3-57-9]", "5" },
199	{ PCRE2_AUTO_CALLOUT, A, 0, 0, "12345678901234567890123456789012345678901234567890123456789012345678901234567890",
200		"12345678901234567890123456789012345678901234567890123456789012345678901234567890" },
201
202	/* Assertions. */
203	{ MU, A, 0, 0, "\\b[^A]", "A_B#" },
204	{ M, A, 0, 0 | F_NOMATCH, "\\b\\W", "\n*" },
205	{ MU, A, 0, 0, "\\B[^,]\\b[^s]\\b", "#X" },
206	{ MP, A, 0, 0, "\\B", "_\xa1" },
207	{ MP, A, 0, 0 | F_PROPERTY, "\\b_\\b[,A]\\B", "_," },
208	{ MUP, A, 0, 0, "\\b", "\xe6\x92\xad!" },
209	{ MUP, A, 0, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
210	{ MUP, A, 0, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
211	{ MUP, A, 0, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
212	{ MU, A, 0, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
213	{ CMUP, A, 0, 0, "\\By", "\xf0\x90\x90\xa8y" },
214	{ M, A, 0, 0 | F_NOMATCH, "\\R^", "\n" },
215	{ M, A, 0, 1 | F_NOMATCH, "^", "\n" },
216	{ 0, 0, 0, 0, "^ab", "ab" },
217	{ 0, 0, 0, 0 | F_NOMATCH, "^ab", "aab" },
218	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "^a", "\r\raa\n\naa\r\naa" },
219	{ MU, A, 0, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
220	{ M, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--b--\x85--" },
221	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xe2\x80\xa8--" },
222	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "^-", "a--\xc2\x85--" },
223	{ 0, 0, 0, 0, "ab$", "ab" },
224	{ 0, 0, 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
225	{ PCRE2_DOLLAR_ENDONLY, 0, 0, 0 | F_NOMATCH, "ab$", "abab\r\n" },
226	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "a$", "\r\raa\n\naa\r\naa" },
227	{ M, PCRE2_NEWLINE_ANY, 0, 0, "a$", "aaa" },
228	{ MU, PCRE2_NEWLINE_ANYCRLF, 0, 0, "#$", "#\xc2\x85###\r#" },
229	{ MU, PCRE2_NEWLINE_ANY, 0, 0, "#$", "#\xe2\x80\xa9" },
230	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0 | F_NOMATCH, "^a", "aa\naa" },
231	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTBOL, 0, "^a", "aa\naa" },
232	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\naa" },
233	{ 0, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0 | F_NOMATCH, "a$", "aa\r\n" },
234	{ U | PCRE2_DOLLAR_ENDONLY, PCRE2_NEWLINE_ANY, 0, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
235	{ M, PCRE2_NEWLINE_ANY, PCRE2_NOTEOL, 0, "a$", "aa\naa" },
236	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa" },
237	{ U, PCRE2_NEWLINE_CR, 0, 0, "a\\Z", "aaa\r" },
238	{ 0, PCRE2_NEWLINE_CR, 0, 0, ".\\Z", "aaa\n" },
239	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r" },
240	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\n" },
241	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".\\Z", "aaa\r\n" },
242	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
243	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
244	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
245	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
246	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
247	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa" },
248	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r" },
249	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\n" },
250	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".\\Z", "aaa\r\n" },
251	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xc2\x85" },
252	{ U, PCRE2_NEWLINE_ANY, 0, 0, ".\\Z", "aaa\xe2\x80\xa8" },
253	{ M, A, 0, 0, "\\Aa", "aaa" },
254	{ M, A, 0, 1 | F_NOMATCH, "\\Aa", "aaa" },
255	{ M, A, 0, 1, "\\Ga", "aaa" },
256	{ M, A, 0, 1 | F_NOMATCH, "\\Ga", "aba" },
257	{ M, A, 0, 0, "a\\z", "aaa" },
258	{ M, A, 0, 0 | F_NOMATCH, "a\\z", "aab" },
259
260	/* Brackets and alternatives. */
261	{ MU, A, 0, 0, "(ab|bb|cd)", "bacde" },
262	{ MU, A, 0, 0, "(?:ab|a)(bc|c)", "ababc" },
263	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
264	{ CMU, A, 0, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
265	{ MU, A, 0, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
266	{ MU, A, 0, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
267	{ MU, A, 0, 0, "\xc7\x82|\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
268	{ MU, A, 0, 0, "=\xc7\x82|#\xc6\x82", "\xf1\x83\x82\x82=\xc7\x82\xc7\x83" },
269	{ MU, A, 0, 0, "\xc7\x82\xc7\x83|\xc6\x82\xc6\x82", "\xf1\x83\x82\x82\xc7\x82\xc7\x83" },
270	{ MU, A, 0, 0, "\xc6\x82\xc6\x82|\xc7\x83\xc7\x83|\xc8\x84\xc8\x84", "\xf1\x83\x82\x82\xc8\x84\xc8\x84" },
271	{ U, A, 0, 0, "\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80", "\xdf\xbf\xc2\x80\xe4\x84\x80" },
272	{ U, A, 0, 0, "(?:\xe1\x81\x80|\xe2\x82\x80|\xe4\x84\x80)#", "\xdf\xbf\xc2\x80#\xe4\x84\x80#" },
273	{ CM, A, 0, 0, "ab|cd", "CD" },
274	{ CM, A, 0, 0, "a1277|a1377|bX487", "bx487" },
275	{ CM, A, 0, 0, "a1277|a1377|bx487", "bX487" },
276
277	/* Greedy and non-greedy ? operators. */
278	{ MU, A, 0, 0, "(?:a)?a", "laab" },
279	{ CMU, A, 0, 0, "(A)?A", "llaab" },
280	{ MU, A, 0, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
281	{ MU, A, 0, 0, "(a)?a", "manm" },
282	{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
283	{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
284	{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
285
286	/* Greedy and non-greedy + operators */
287	{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
288	{ MU, A, 0, 0, "(aa)+?aa", "aaaaaaa" },
289	{ MU, A, 0, 0, "(?:aba|ab|a)+l", "ababamababal" },
290	{ MU, A, 0, 0, "(?:aba|ab|a)+?l", "ababamababal" },
291	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
292	{ MU, A, 0, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
293	{ MU, A, 0, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
294	{ MU, A, 0, 0, "(aa|bb){8,1000}", "abaabbaabbaabbaab_aabbaabbaabbaabbaabbaabb_" },
295
296	/* Greedy and non-greedy * operators */
297	{ CMU, A, 0, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
298	{ MU, A, 0, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
299	{ MU, A, 0, 0, "(aa|ab)*ab", "aaabaaab" },
300	{ CMU, A, 0, 0, "(aa|Ab)*?aB", "aaabaaab" },
301	{ MU, A, 0, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
302	{ MU, A, 0, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
303	{ M, A, 0, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
304	{ M, A, 0, 0, "((?:a|)*){0}a", "a" },
305
306	/* Combining ? + * operators */
307	{ MU, A, 0, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
308	{ MU, A, 0, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
309	{ MU, A, 0, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
310	{ MU, A, 0, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
311	{ MU, A, 0, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
312
313	/* Single character iterators. */
314	{ MU, A, 0, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
315	{ MU, A, 0, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
316	{ MU, A, 0, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
317	{ MU, A, 0, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
318	{ MU, A, 0, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
319	{ MU, A, 0, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
320	{ MU, A, 0, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
321	{ MU, A, 0, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
322	{ MU, A, 0, 0, "(ba{2})+c", "baabaaabacbaabaac" },
323	{ MU, A, 0, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
324	{ MU, A, 0, 0, "(a?+[^b])+", "babaacacb" },
325	{ MU, A, 0, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
326	{ CMU, A, 0, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
327	{ CMU, A, 0, 0, "[c-f]+k", "DemmFke" },
328	{ MU, A, 0, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
329	{ MU, A, 0, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
330	{ CMU, A, 0, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
331	{ CMU, A, 0, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
332	{ CMU, A, 0, 0, "[ace]{3,}", "AcbDAcEEcEd" },
333	{ CMU, A, 0, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
334	{ MU, A, 0, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
335	{ CMU, A, 0, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
336	{ MU, A, 0, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
337	{ MU, A, 0, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
338	{ MU, A, 0, 0, "\\b\\w+\\B", "x,a_cd" },
339	{ MUP, A, 0, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
340	{ CMU, A, 0, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
341	{ CMUP, A, 0, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
342	{ CMU, A, 0, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
343	{ CMU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
344	{ MU, A, 0, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
345	{ MU, A, 0, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
346	{ MU, A, 0, 0, "\\d+123", "987654321,01234" },
347	{ MU, A, 0, 0, "abcd*|\\w+xy", "aaaaa,abxyz" },
348	{ MU, A, 0, 0, "(?:abc|((?:amc|\\b\\w*xy)))", "aaaaa,abxyz" },
349	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.abcd#."},
350	{ MU, A, 0, 0, "a(?R)|([a-z]++)#", ".abcd.mbcd#."},
351	{ MU, A, 0, 0, ".[ab]*.", "xx" },
352	{ MU, A, 0, 0, ".[ab]*a", "xxa" },
353	{ MU, A, 0, 0, ".[ab]?.", "xx" },
354	{ MU, A, 0, 0, "_[ab]+_*a", "_aa" },
355	{ MU, A, 0, 0, "#(A+)#\\d+", "#A#A#0" },
356	{ MU, A, 0, 0, "(?P<size>\\d+)m|M", "4M" },
357	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\n?.+#", "\n,\n,#" },
358
359	/* Bracket repeats with limit. */
360	{ MU, A, 0, 0, "(?:(ab){2}){5}M", "abababababababababababM" },
361	{ MU, A, 0, 0, "(?:ab|abab){1,5}M", "abababababababababababM" },
362	{ MU, A, 0, 0, "(?>ab|abab){1,5}M", "abababababababababababM" },
363	{ MU, A, 0, 0, "(?:ab|abab){1,5}?M", "abababababababababababM" },
364	{ MU, A, 0, 0, "(?>ab|abab){1,5}?M", "abababababababababababM" },
365	{ MU, A, 0, 0, "(?:(ab){1,4}?){1,3}?M", "abababababababababababababM" },
366	{ MU, A, 0, 0, "(?:(ab){1,4}){1,3}abababababababababababM", "ababababababababababababM" },
367	{ MU, A, 0, 0 | F_NOMATCH, "(?:(ab){1,4}){1,3}abababababababababababM", "abababababababababababM" },
368	{ MU, A, 0, 0, "(ab){4,6}?M", "abababababababM" },
369
370	/* Basic character sets. */
371	{ MU, A, 0, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
372	{ MU, A, 0, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
373	{ MU, A, 0, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
374	{ MU, A, 0, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
375	{ MU, A, 0, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
376	{ MU, A, 0, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
377	{ MU, A, 0, 0, "x[bcef]+", "xaxdxecbfg" },
378	{ MU, A, 0, 0, "x[bcdghij]+", "xaxexfxdgbjk" },
379	{ MU, A, 0, 0, "x[^befg]+", "xbxexacdhg" },
380	{ MU, A, 0, 0, "x[^bcdl]+", "xlxbxaekmd" },
381	{ MU, A, 0, 0, "x[^bcdghi]+", "xbxdxgxaefji" },
382	{ MU, A, 0, 0, "x[B-Fb-f]+", "xaxAxgxbfBFG" },
383	{ CMU, A, 0, 0, "\\x{e9}+", "#\xf0\x90\x90\xa8\xc3\xa8\xc3\xa9\xc3\x89\xc3\x88" },
384	{ CMU, A, 0, 0, "[^\\x{e9}]+", "\xc3\xa9#\xf0\x90\x90\xa8\xc3\xa8\xc3\x88\xc3\x89" },
385	{ MU, A, 0, 0, "[\\x02\\x7e]+", "\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x02\x7e\x7f" },
386	{ MU, A, 0, 0, "[^\\x02\\x7e]+", "\x02\xc3\x81\xe1\xbf\xb8\xf0\x90\x90\xa8\x01\x7f\x7e" },
387	{ MU, A, 0, 0, "[\\x{81}-\\x{7fe}]+", "#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xc2\x81\xdf\xbe\xdf\xbf" },
388	{ MU, A, 0, 0, "[^\\x{81}-\\x{7fe}]+", "\xc2\x81#\xe1\xbf\xb8\xf0\x90\x90\xa8\xc2\x80\xdf\xbf\xdf\xbe" },
389	{ MU, A, 0, 0, "[\\x{801}-\\x{fffe}]+", "#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xe0\xa0\x81\xef\xbf\xbe\xef\xbf\xbf" },
390	{ MU, A, 0, 0, "[^\\x{801}-\\x{fffe}]+", "\xe0\xa0\x81#\xc3\xa9\xf0\x90\x90\x80\xe0\xa0\x80\xef\xbf\xbf\xef\xbf\xbe" },
391	{ MU, A, 0, 0, "[\\x{10001}-\\x{10fffe}]+", "#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf0\x90\x80\x81\xf4\x8f\xbf\xbe\xf4\x8f\xbf\xbf" },
392	{ MU, A, 0, 0, "[^\\x{10001}-\\x{10fffe}]+", "\xf0\x90\x80\x81#\xc3\xa9\xe2\xb1\xa5\xf0\x90\x80\x80\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbe" },
393	{ CMU, A, 0, 0 | F_NOMATCH, "^[\\x{0100}-\\x{017f}]", " " },
394
395	/* Unicode properties. */
396	{ MUP, A, 0, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
397	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
398	{ MUP, A, 0, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
399	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
400	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
401	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
402	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
403	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
404	{ MUP, A, 0, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
405	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
406	{ MUP, A, 0, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
407	{ MUP, A, 0, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
408	{ CMUP, A, 0, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
409	{ MUP, A, 0, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
410	{ MUP, A, 0, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
411	{ MU, A, 0, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
412	{ CMUP, A, 0, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
413	{ MUP, A, 0, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
414	{ MUP, A, 0, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
415	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "[a-b\\s]{2,5}[^a]", "AB  baaa" },
416	{ MUP, 0, 0, 0 | F_NOMATCH, "[^\\p{Hangul}\\p{Z}]", " " },
417	{ MUP, 0, 0, 0, "[\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
418	{ MUP, 0, 0, 0, "[\\x{a92e}\\p{Lu}\\P{Latin}]+", "c\xEA\xA4\xAE,A,b" },
419	{ CMUP, 0, 0, 0, "[^S]\\B", "\xe2\x80\x8a" },
420
421	/* Possible empty brackets. */
422	{ MU, A, 0, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
423	{ MU, A, 0, 0, "(|ab||bc|a)+d", "abcxabcabd" },
424	{ MU, A, 0, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
425	{ MU, A, 0, 0, "(|ab||bc|a)*d", "abcxabcabd" },
426	{ MU, A, 0, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
427	{ MU, A, 0, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
428	{ MU, A, 0, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
429	{ MU, A, 0, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
430	{ MU, A, 0, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
431	{ MU, A, 0, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
432
433	/* Start offset. */
434	{ MU, A, 0, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
435	{ MU, A, 0, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
436	{ MU, A, 0, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
437	{ MU, A, 0, 1, "(\\w\\W\\w)+", "ab#d" },
438
439	/* Newline. */
440	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
441	{ M, PCRE2_NEWLINE_CR, 0, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
442	{ M, PCRE2_NEWLINE_CRLF, 0, 0, "\\W{1,3}[^#]", "\r\n##...." },
443	{ MU, A, PCRE2_NO_UTF_CHECK, 1, "^.a", "\n\x80\nxa" },
444	{ MU, A, 0, 1, "^", "\r\n" },
445	{ M, PCRE2_NEWLINE_CRLF, 0, 1 | F_NOMATCH, "^", "\r\n" },
446	{ M, PCRE2_NEWLINE_CRLF, 0, 1, "^", "\r\na" },
447
448	/* Any character except newline or any newline. */
449	{ 0, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
450	{ U, PCRE2_NEWLINE_CRLF, 0, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
451	{ 0, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
452	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
453	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
454	{ U, PCRE2_NEWLINE_ANYCRLF, 0, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
455	{ 0, PCRE2_NEWLINE_ANY, 0, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
456	{ U, PCRE2_NEWLINE_ANY, 0, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
457	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\r" },
458	{ 0, BSR(PCRE2_BSR_ANYCRLF), 0, 0, "\\R", "\x85#\r\n#" },
459	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\xe2\x80\xa8#c" },
460	{ U, BSR(PCRE2_BSR_UNICODE), 0, 0, "\\R", "ab\r\nc" },
461	{ U, PCRE2_NEWLINE_CRLF | BSR(PCRE2_BSR_UNICODE), 0, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
462	{ MU, A, 0, 0 | F_NOMATCH, "\\R+", "ab" },
463	{ MU, A, 0, 0, "\\R+", "ab\r\n\r" },
464	{ MU, A, 0, 0, "\\R*", "ab\r\n\r" },
465	{ MU, A, 0, 0, "\\R*", "\r\n\r" },
466	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\r\r" },
467	{ MU, A, 0, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
468	{ MU, A, 0, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
469	{ MU, A, 0, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
470	{ MU, A, 0, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
471	{ MU, A, 0, 0, "\\R+\\R\\R", "\r\r\r" },
472	{ MU, A, 0, 0, "\\R*\\R\\R", "\n\r" },
473	{ MU, A, 0, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
474	{ MU, A, 0, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
475
476	/* Atomic groups (no fallback from "next" direction). */
477	{ MU, A, 0, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
478	{ MU, A, 0, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
479	{ MU, A, 0, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
480			"bababcdedefgheijijklmlmnop" },
481	{ MU, A, 0, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
482	{ MU, A, 0, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
483	{ MU, A, 0, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
484	{ MU, A, 0, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
485	{ MU, A, 0, 0, "((?>a|)+?)b", "aaacaaab" },
486	{ MU, A, 0, 0, "(?>x|)*$", "aaa" },
487	{ MU, A, 0, 0, "(?>(x)|)*$", "aaa" },
488	{ MU, A, 0, 0, "(?>x|())*$", "aaa" },
489	{ MU, A, 0, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
490	{ MU, A, 0, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
491	{ MU, A, 0, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
492	{ MU, A, 0, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
493	{ MU, A, 0, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
494	{ MU, A, 0, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
495	{ MU, A, 0, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
496	{ MU, A, 0, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
497	{ MU, A, 0, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
498	{ MU, A, 0, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
499	{ MU, A, 0, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
500	{ MU, A, 0, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
501	{ MU, A, 0, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
502	{ MU, A, 0, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
503	{ CM, A, 0, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
504	{ MU, A, 0, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
505	{ MU, A, 0, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
506	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
507	{ MU, A, 0, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
508	{ MU, A, 0, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
509	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
510	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
511	{ MU, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
512	{ MU, A, 0, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
513	{ MU, A, 0, 0, "(c(ab)?+ab)+", "cabcababcab" },
514	{ MU, A, 0, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
515
516	/* Possessive quantifiers. */
517	{ MU, A, 0, 0, "(?:a|b)++m", "mababbaaxababbaam" },
518	{ MU, A, 0, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
519	{ MU, A, 0, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
520	{ MU, A, 0, 0, "(a|b)++m", "mababbaaxababbaam" },
521	{ MU, A, 0, 0, "(a|b)*+m", "mababbaaxababbaam" },
522	{ MU, A, 0, 0, "(a|b)*+m", "ababbaaxababbaam" },
523	{ MU, A, 0, 0, "(a|b(*ACCEPT))++m", "maaxab" },
524	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxm" },
525	{ MU, A, 0, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
526	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxm" },
527	{ MU, A, 0, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
528	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxm" },
529	{ MU, A, 0, 0, "(b*)++m", "bxbbxbbbxbbm" },
530	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxm" },
531	{ MU, A, 0, 0, "(b*)*+m", "bxbbxbbbxbbm" },
532	{ MU, A, 0, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
533	{ MU, A, 0, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
534	{ MU, A, 0, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
535	{ MU, A, 0, 0, "(a|(b))++m", "mababbaaxababbaam" },
536	{ MU, A, 0, 0, "((a)|b)*+m", "mababbaaxababbaam" },
537	{ MU, A, 0, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
538	{ MU, A, 0, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
539	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxm" },
540	{ MU, A, 0, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
541	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
542	{ MU, A, 0, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
543	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxm" },
544	{ MU, A, 0, 0, "((b*))++m", "bxbbxbbbxbbm" },
545	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxm" },
546	{ MU, A, 0, 0, "((b*))*+m", "bxbbxbbbxbbm" },
547	{ MU, A, 0, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
548	{ MU, A, 0, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
549	{ MU, A, 0, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
550	{ MU, A, 0, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
551	{ MU, A, 0, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
552
553	/* Back references. */
554	{ MU, A, 0, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
555	{ CMU, A, 0, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
556	{ CM, A, 0, 0, "(a{2,4})\\1", "AaAaaAaA" },
557	{ MU, A, 0, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
558	{ MU, A, 0, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
559	{ MU, A, 0, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
560	{ MU, A, 0, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
561	{ MU, A, 0, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
562	{ MU, A, 0, 0, "(?:(aa)|b)\\1?b", "bb" },
563	{ CMU, A, 0, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
564	{ MU, A, 0, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
565	{ CMU, A, 0, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
566	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
567	{ CM, A, 0, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
568	{ MU, A, 0, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
569	{ MU, A, 0, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
570	{ M, A, 0, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
571	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
572	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
573	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
574	{ MUP, A, 0, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
575	{ PCRE2_UCP, 0, 0, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
576	{ CMUP, A, 0, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
577	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
578	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}(?<A>aa)(?<A>bb)", "aabb" },
579	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>*(?<A>aa)(?<A>bb)", "aabb" },
580	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{0,3}aaaaaa", "aabbaaaaaa" },
581	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?<A>aa)(?<A>bb)\\k<A>{2,5}bb", "aabbaaaabb" },
582	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}m", "aaaaaaaabbbbaabbbbm" },
583	{ MU | PCRE2_DUPNAMES, A, 0, 0 | F_NOMATCH, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
584	{ MU | PCRE2_DUPNAMES | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\k<A>{1,3}?(?<A>aa)(?<A>bb)", "aabb" },
585	{ MU | PCRE2_DUPNAMES, A, 0, 0, "\\k<A>*?(?<A>aa)(?<A>bb)", "aabb" },
586	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{0,3}?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
587	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>*?m", "aaaaaabbbbbbaabbbbbbbbbbm" },
588	{ MU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>aa)|(?<A>bb))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
589	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}M", "aaaaaaaabbbbaabbbbm" },
590	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
591	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
592	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
593
594	/* Assertions. */
595	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
596	{ MU, A, 0, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
597	{ MU, A, 0, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
598	{ MU, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
599	{ MU, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
600	{ M, A, 0, 0, "(?<=aaa|aa|a)a", "aaa" },
601	{ M, A, 0, 2, "(?<=aaa|aa|a)a", "aaa" },
602	{ MU, A, 0, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
603	{ MU, A, 0, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
604	{ MU, A, 0, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
605	{ MU, A, 0, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
606	{ MU, A, 0, 0, "((?(?=(a))a)+k)", "bbak" },
607	{ MU, A, 0, 0, "((?(?=a)a)+k)", "bbak" },
608	{ MU, A, 0, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
609	{ MU, A, 0, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
610	{ MU, A, 0, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
611	{ MU, A, 0, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
612	{ MU, A, 0, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
613	{ MU, A, 0, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
614	{ MU, A, 0, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
615	{ MU, A, 0, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
616	{ MU, A, 0, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
617	{ MU, A, 0, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
618	{ MU, A, 0, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
619	{ MU, A, 0, 0, "a(?=(?C)\\B(?C`x`))b", "ab" },
620	{ MU, A, 0, 0, "a(?!(?C)\\B(?C`x`))bb|ab", "abb" },
621	{ MU, A, 0, 0, "a(?=\\b|(?C)\\B(?C`x`))b", "ab" },
622	{ MU, A, 0, 0, "a(?!\\b|(?C)\\B(?C`x`))bb|ab", "abb" },
623	{ MU, A, 0, 0, "c(?(?=(?C)\\B(?C`x`))ab|a)", "cab" },
624	{ MU, A, 0, 0, "c(?(?!(?C)\\B(?C`x`))ab|a)", "cab" },
625	{ MU, A, 0, 0, "c(?(?=\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
626	{ MU, A, 0, 0, "c(?(?!\\b|(?C)\\B(?C`x`))ab|a)", "cab" },
627	{ MU, A, 0, 0, "a(?=)b", "ab" },
628	{ MU, A, 0, 0 | F_NOMATCH, "a(?!)b", "ab" },
629
630	/* Not empty, ACCEPT, FAIL */
631	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
632	{ MU, A, PCRE2_NOTEMPTY, 0, "a*", "bcaad" },
633	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?", "bcaad" },
634	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
635	{ MU, A, 0, 0, "a(*ACCEPT)b", "ab" },
636	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
637	{ MU, A, PCRE2_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
638	{ MU, A, PCRE2_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
639	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
640	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
641	{ MU, A, PCRE2_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
642	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
643	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
644	{ MU, A, 0, 0, "((a(*ACCEPT)b))", "ab" },
645	{ MU, A, 0, 0, "(a(*FAIL)a|a)", "aaa" },
646	{ MU, A, 0, 0, "(?=ab(*ACCEPT)b)a", "ab" },
647	{ MU, A, 0, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
648	{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
649	{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
650	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
651
652	/* Conditional blocks. */
653	{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
654	{ MU, A, 0, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
655	{ MU, A, 0, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
656	{ MU, A, 0, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
657	{ MU, A, 0, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
658	{ MU, A, 0, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
659	{ MU, A, 0, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
660	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
661	{ MU, A, 0, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
662	{ MU, A, 0, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
663	{ MU, A, 0, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
664	{ MU, A, 0, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
665	{ MU, A, 0, 0, "(?(?=a)ab)", "a" },
666	{ MU, A, 0, 0, "(?(?<!b)c)", "b" },
667	{ MU, A, 0, 0, "(?(DEFINE)a(b))", "a" },
668	{ MU, A, 0, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
669	{ MU, A, 0, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
670	{ MU, A, 0, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
671	{ MU, A, 0, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
672	{ MU, A, 0, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
673	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
674	{ MU, A, 0, 0, "(c)?\?(?(1)a|b)", "cbb" },
675	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
676	{ MU, A, 0, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
677	{ MU, A, 0, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
678	{ MU, A, 0, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
679	{ MU, A, 0, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
680	{ MU, A, 0, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
681	{ MU, A, 0, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
682	{ MU, A, 0, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
683	{ MU, A, 0, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
684	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
685	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
686	{ MU, A, 0, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
687	{ MU, A, 0, 0, "((?:a|aa)(?(1)aaa))x", "aax" },
688	{ MU, A, 0, 0, "(?(?!)a|b)", "ab" },
689	{ MU, A, 0, 0, "(?(?!)a)", "ab" },
690	{ MU, A, 0, 0 | F_NOMATCH, "(?(?!)a|b)", "ac" },
691
692	/* Set start of match. */
693	{ MU, A, 0, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
694	{ MU, A, 0, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
695	{ MU, A, 0, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
696	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
697	{ MU, A, PCRE2_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
698
699	/* First line. */
700	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
701	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
702	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "(?<=a)", "a" },
703	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[^a][^b]", "ab" },
704	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "a", "\na" },
705	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "[abc]", "\na" },
706	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^a", "\na" },
707	{ MU | PCRE2_FIRSTLINE, A, 0, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
708	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "\xf0\x90\x90\x80", "\xf0\x90\x90\x80" },
709	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\xc2\x85#" },
710	{ M | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "#", "\x85#" },
711	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_ANY, 0, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
712	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
713	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, ".", "\r" },
714	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0, "a", "\ra" },
715	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
716	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
717	{ MU | PCRE2_FIRSTLINE, PCRE2_NEWLINE_CRLF, 0, 1, ".", "\r\n" },
718	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_LF, 0, 0 | F_NOMATCH, "ab.", "ab" },
719	{ MU | PCRE2_FIRSTLINE, A, 0, 1 | F_NOMATCH, "^[a-d0-9]", "\nxx\nd" },
720	{ PCRE2_FIRSTLINE | PCRE2_DOTALL, PCRE2_NEWLINE_ANY, 0, 0, "....a", "012\n0a" },
721	{ MU | PCRE2_FIRSTLINE, A, 0, 0, "[aC]", "a" },
722
723	/* Recurse. */
724	{ MU, A, 0, 0, "(a)(?1)", "aa" },
725	{ MU, A, 0, 0, "((a))(?1)", "aa" },
726	{ MU, A, 0, 0, "(b|a)(?1)", "aa" },
727	{ MU, A, 0, 0, "(b|(a))(?1)", "aa" },
728	{ MU, A, 0, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
729	{ MU, A, 0, 0, "((a)(b)(?:a*))(?1)", "abab" },
730	{ MU, A, 0, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
731	{ MU, A, 0, 0, "((?2)b|(a)){2}(?1)", "aabab" },
732	{ MU, A, 0, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
733	{ MU, A, 0, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
734	{ MU, A, 0, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
735	{ MU, A, 0, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
736	{ MU, A, 0, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
737	{ MU, A, 0, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
738	{ MU, A, 0, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
739	{ MU, A, 0, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
740	{ MU, A, 0, 0, "b|<(?R)*>", "<<b>" },
741	{ MU, A, 0, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
742	{ MU, A, 0, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
743	{ MU, A, 0, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
744	{ MU, A, 0, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
745	{ MU, A, 0, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
746	{ MU, A, 0, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
747	{ MU, A, 0, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
748	{ MU, A, 0, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
749	{ MU, A, 0, 0, "((?(R)a|(?1)){3})", "XaaaaaaaaaX" },
750	{ MU, A, 0, 0, "((?:(?(R)a|(?1))){3})", "XaaaaaaaaaX" },
751	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3})aaaaaa", "aaaaaaaaXaaaaaaaaa" },
752	{ MU, A, 0, 0, "((?(R)a|(?1)){1,3}?)M", "aaaM" },
753	{ MU, A, 0, 0, "((.)(?:.|\\2(?1))){0}#(?1)#", "#aabbccdde# #aabbccddee#" },
754	{ MU, A, 0, 0, "((.)(?:\\2|\\2{4}b)){0}#(?:(?1))+#", "#aaaab# #aaaaab#" },
755	{ MU, A, 0, 0 | F_NOMATCH, "(?1)$((.|\\2xx){1,2})", "abc" },
756
757	/* 16 bit specific tests. */
758	{ CM, A, 0, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
759	{ CM, A, 0, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
760	{ CM, A, 0, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
761	{ CM, A, 0, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
762	{ CM, A, 0, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
763	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
764	{ CM, A, 0, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
765	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
766	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
767	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
768	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
769	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
770	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
771	{ CM, A, 0, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
772	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
773	{ CM, A, 0, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
774	{ M, A, 0, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
775	{ M, A, 0, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
776	{ CM, A, 0, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
777	{ CM, A, 0, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
778	{ CM, A, 0, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
779	{ CM, A, 0, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
780	{ CM | PCRE2_EXTENDED, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
781	{ CM, A, 0, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
782	{ CM, A, 0, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
783	{ M, PCRE2_NEWLINE_ANY, 0, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
784	{ 0, BSR(PCRE2_BSR_UNICODE), 0, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
785	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
786	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
787	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
788	{ 0, 0, 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
789
790	/* Partial matching. */
791	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab", "a" },
792	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "ab|a", "a" },
793	{ MU, A, PCRE2_PARTIAL_HARD, 0, "ab|a", "a" },
794	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "\\b#", "a" },
795	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
796	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
797	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a\\B", "a" },
798	{ MU, A, PCRE2_PARTIAL_HARD, 0, "a\\b", "a" },
799
800	/* (*MARK) verb. */
801	{ MU, A, 0, 0, "a(*MARK:aa)a", "ababaa" },
802	{ MU, A, 0, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
803	{ MU, A, 0, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
804	{ MU, A, 0, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
805	{ MU, A, 0, 0, "(?>a(*:aa))b|ac", "ac" },
806	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
807	{ MU, A, 0, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
808	{ MU, A, 0, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
809	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
810	{ MU, A, 0, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
811	{ MU, A, 0, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
812	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
813	{ MU, A, 0, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
814	{ MU, A, 0, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
815	{ MU, A, 0, 0 | F_NOMATCH, "(*:mark)m", "a" },
816
817	/* (*COMMIT) verb. */
818	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)b", "ac" },
819	{ MU, A, 0, 0, "aa(*COMMIT)b", "xaxaab" },
820	{ MU, A, 0, 0 | F_NOMATCH, "a(*COMMIT)(*:msg)b|ac", "ac" },
821	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b)++", "abac" },
822	{ MU, A, 0, 0 | F_NOMATCH, "((a)(*COMMIT)b)++", "abac" },
823	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*COMMIT)b)ab|ad", "ad" },
824
825	/* (*PRUNE) verb. */
826	{ MU, A, 0, 0, "aa\\K(*PRUNE)b", "aaab" },
827	{ MU, A, 0, 0, "aa(*PRUNE:bb)b|a", "aa" },
828	{ MU, A, 0, 0, "(a)(a)(*PRUNE)b|(a)", "aa" },
829	{ MU, A, 0, 0, "(a)(a)(a)(a)(a)(a)(a)(a)(*PRUNE)b|(a)", "aaaaaaaa" },
830	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|", "a" },
831	{ MU, A, PCRE2_PARTIAL_SOFT, 0, "a(*PRUNE)a|m", "a" },
832	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*PRUNE)b)ab|ad", "ad" },
833	{ MU, A, 0, 0, "a(*COMMIT)(*PRUNE)d|bc", "abc" },
834	{ MU, A, 0, 0, "(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
835	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=a(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
836	{ MU, A, 0, 0, "(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
837	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?=(a)(*COMMIT)b)a(*PRUNE)c|bc", "abc" },
838	{ MU, A, 0, 0, "(a(*COMMIT)b){0}a(?1)(*PRUNE)c|bc", "abc" },
839	{ MU, A, 0, 0 | F_NOMATCH, "(a(*COMMIT)b){0}a(*COMMIT)(?1)(*PRUNE)c|bc", "abc" },
840	{ MU, A, 0, 0, "(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
841	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(a(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
842	{ MU, A, 0, 0, "((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
843	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)((a)(*COMMIT)b)++(*PRUNE)d|c", "ababc" },
844	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
845	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*abab(*PRUNE)d|ba", "ababab" },
846	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
847	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+abab(*PRUNE)d|ba", "ababab" },
848	{ MU, A, 0, 0, "(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
849	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)?ab(*PRUNE)d|ba", "aba" },
850	{ MU, A, 0, 0, "(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
851	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)*?n(*PRUNE)d|ba", "abababn" },
852	{ MU, A, 0, 0, "(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
853	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)+?n(*PRUNE)d|ba", "abababn" },
854	{ MU, A, 0, 0, "(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
855	{ MU, A, 0, 0 | F_NOMATCH, "(*COMMIT)(?>a(*COMMIT)b)??n(*PRUNE)d|bn", "abn" },
856
857	/* (*SKIP) verb. */
858	{ MU, A, 0, 0 | F_NOMATCH, "(?=a(*SKIP)b)ab|ad", "ad" },
859	{ MU, A, 0, 0, "(\\w+(*SKIP)#)", "abcd,xyz#," },
860	{ MU, A, 0, 0, "\\w+(*SKIP)#|mm", "abcd,xyz#," },
861	{ MU, A, 0, 0 | F_NOMATCH, "b+(?<=(*SKIP)#c)|b+", "#bbb" },
862
863	/* (*THEN) verb. */
864	{ MU, A, 0, 0, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcaabcaabcaabcnacm" },
865	{ MU, A, 0, 0 | F_NOMATCH, "((?:a(*THEN)|aab)(*THEN)c|a+)+m", "aabcm" },
866	{ MU, A, 0, 0, "((?:a(*THEN)|aab)c|a+)+m", "aabcaabcnmaabcaabcm" },
867	{ MU, A, 0, 0, "((?:a|aab)(*THEN)c|a+)+m", "aam" },
868	{ MU, A, 0, 0, "((?:a(*COMMIT)|aab)(*THEN)c|a+)+m", "aam" },
869	{ MU, A, 0, 0, "(?(?=a(*THEN)b)ab|ad)", "ad" },
870	{ MU, A, 0, 0, "(?(?!a(*THEN)b)ad|add)", "add" },
871	{ MU, A, 0, 0 | F_NOMATCH, "(?(?=a)a(*THEN)b|ad)", "ad" },
872	{ MU, A, 0, 0, "(?!(?(?=a)ab|b(*THEN)d))bn|bnn", "bnn" },
873	{ MU, A, 0, 0, "(?=(*THEN: ))* ", " " },
874	{ MU, A, 0, 0, "a(*THEN)(?R) |", "a" },
875
876	/* Recurse and control verbs. */
877	{ MU, A, 0, 0, "(a(*ACCEPT)b){0}a(?1)b", "aacaabb" },
878	{ MU, A, 0, 0, "((a)\\2(*ACCEPT)b){0}a(?1)b", "aaacaaabb" },
879	{ MU, A, 0, 0, "((ab|a(*ACCEPT)x)+|ababababax){0}_(?1)_", "_ababababax_ _ababababa_" },
880	{ MU, A, 0, 0, "((.)(?:A(*ACCEPT)|(?1)\\2)){0}_(?1)_", "_bcdaAdcb_bcdaAdcb_" },
881	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_", "_ab_" },
882	{ MU, A, 0, 0, "((*MARK:m)(?:a|a(*COMMIT)b|aa)){0}_(?1)_|(_aa_)", "_aa_" },
883	{ MU, A, 0, 0, "(a(*COMMIT)(?:b|bb)|c(*ACCEPT)d|dd){0}_(?1)+_", "_ax_ _cd_ _abbb_ _abcd_ _abbcdd_" },
884	{ MU, A, 0, 0, "((.)(?:.|(*COMMIT)\\2{3}(*ACCEPT).*|.*)){0}_(?1){0,4}_", "_aaaabbbbccccddd_ _aaaabbbbccccdddd_" },
885
886#ifdef SUPPORT_UNICODE
887	/* Script runs and iterations. */
888	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
889	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
890	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
891	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)+?#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
892	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)*+#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
893	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)++#", "!abcdefghijklmno!abcdefghijklmno!abcdef#" },
894	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)?#", "!ab!abc!ab!ab#" },
895	{ MU, A, 0, 0, "!(*sr:\\w\\w|\\w\\w\\w)??#", "!ab!abc!ab!ab#" },
896#endif
897
898	/* Deep recursion. */
899	{ MU, A, 0, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
900	{ MU, A, 0, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
901	{ MU, A, 0, 0, "((a?)+)+b", "aaaaaaaaaaaa b" },
902
903	/* Deep recursion: Stack limit reached. */
904	{ M, A, 0, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
905	{ M, A, 0, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
906	{ M, A, 0, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
907	{ M, A, 0, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
908	{ M, A, 0, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
909
910	{ 0, 0, 0, 0, NULL, NULL }
911};
912
913#ifdef SUPPORT_PCRE2_8
914static pcre2_jit_stack_8* callback8(void *arg)
915{
916	return (pcre2_jit_stack_8 *)arg;
917}
918#endif
919
920#ifdef SUPPORT_PCRE2_16
921static pcre2_jit_stack_16* callback16(void *arg)
922{
923	return (pcre2_jit_stack_16 *)arg;
924}
925#endif
926
927#ifdef SUPPORT_PCRE2_32
928static pcre2_jit_stack_32* callback32(void *arg)
929{
930	return (pcre2_jit_stack_32 *)arg;
931}
932#endif
933
934#ifdef SUPPORT_PCRE2_8
935static pcre2_jit_stack_8 *stack8;
936
937static pcre2_jit_stack_8 *getstack8(void)
938{
939	if (!stack8)
940		stack8 = pcre2_jit_stack_create_8(1, 1024 * 1024, NULL);
941	return stack8;
942}
943
944static void setstack8(pcre2_match_context_8 *mcontext)
945{
946	if (!mcontext) {
947		if (stack8)
948			pcre2_jit_stack_free_8(stack8);
949		stack8 = NULL;
950		return;
951	}
952
953	pcre2_jit_stack_assign_8(mcontext, callback8, getstack8());
954}
955#endif /* SUPPORT_PCRE2_8 */
956
957#ifdef SUPPORT_PCRE2_16
958static pcre2_jit_stack_16 *stack16;
959
960static pcre2_jit_stack_16 *getstack16(void)
961{
962	if (!stack16)
963		stack16 = pcre2_jit_stack_create_16(1, 1024 * 1024, NULL);
964	return stack16;
965}
966
967static void setstack16(pcre2_match_context_16 *mcontext)
968{
969	if (!mcontext) {
970		if (stack16)
971			pcre2_jit_stack_free_16(stack16);
972		stack16 = NULL;
973		return;
974	}
975
976	pcre2_jit_stack_assign_16(mcontext, callback16, getstack16());
977}
978#endif /* SUPPORT_PCRE2_16 */
979
980#ifdef SUPPORT_PCRE2_32
981static pcre2_jit_stack_32 *stack32;
982
983static pcre2_jit_stack_32 *getstack32(void)
984{
985	if (!stack32)
986		stack32 = pcre2_jit_stack_create_32(1, 1024 * 1024, NULL);
987	return stack32;
988}
989
990static void setstack32(pcre2_match_context_32 *mcontext)
991{
992	if (!mcontext) {
993		if (stack32)
994			pcre2_jit_stack_free_32(stack32);
995		stack32 = NULL;
996		return;
997	}
998
999	pcre2_jit_stack_assign_32(mcontext, callback32, getstack32());
1000}
1001#endif /* SUPPORT_PCRE2_32 */
1002
1003#ifdef SUPPORT_PCRE2_16
1004
1005static int convert_utf8_to_utf16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int *offsetmap, int max_length)
1006{
1007	PCRE2_SPTR8 iptr = input;
1008	PCRE2_UCHAR16 *optr = output;
1009	unsigned int c;
1010
1011	if (max_length == 0)
1012		return 0;
1013
1014	while (*iptr && max_length > 1) {
1015		c = 0;
1016		if (offsetmap)
1017			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1018
1019		if (*iptr < 0xc0)
1020			c = *iptr++;
1021		else if (!(*iptr & 0x20)) {
1022			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1023			iptr += 2;
1024		} else if (!(*iptr & 0x10)) {
1025			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1026			iptr += 3;
1027		} else if (!(*iptr & 0x08)) {
1028			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1029			iptr += 4;
1030		}
1031
1032		if (c < 65536) {
1033			*optr++ = c;
1034			max_length--;
1035		} else if (max_length <= 2) {
1036			*optr = '\0';
1037			return (int)(optr - output);
1038		} else {
1039			c -= 0x10000;
1040			*optr++ = 0xd800 | ((c >> 10) & 0x3ff);
1041			*optr++ = 0xdc00 | (c & 0x3ff);
1042			max_length -= 2;
1043			if (offsetmap)
1044				offsetmap++;
1045		}
1046	}
1047	if (offsetmap)
1048		*offsetmap = (int)(iptr - (unsigned char*)input);
1049	*optr = '\0';
1050	return (int)(optr - output);
1051}
1052
1053static int copy_char8_to_char16(PCRE2_SPTR8 input, PCRE2_UCHAR16 *output, int max_length)
1054{
1055	PCRE2_SPTR8 iptr = input;
1056	PCRE2_UCHAR16 *optr = output;
1057
1058	if (max_length == 0)
1059		return 0;
1060
1061	while (*iptr && max_length > 1) {
1062		*optr++ = *iptr++;
1063		max_length--;
1064	}
1065	*optr = '\0';
1066	return (int)(optr - output);
1067}
1068
1069#define REGTEST_MAX_LENGTH16 4096
1070static PCRE2_UCHAR16 regtest_buf16[REGTEST_MAX_LENGTH16];
1071static int regtest_offsetmap16[REGTEST_MAX_LENGTH16];
1072
1073#endif /* SUPPORT_PCRE2_16 */
1074
1075#ifdef SUPPORT_PCRE2_32
1076
1077static int convert_utf8_to_utf32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int *offsetmap, int max_length)
1078{
1079	PCRE2_SPTR8 iptr = input;
1080	PCRE2_UCHAR32 *optr = output;
1081	unsigned int c;
1082
1083	if (max_length == 0)
1084		return 0;
1085
1086	while (*iptr && max_length > 1) {
1087		c = 0;
1088		if (offsetmap)
1089			*offsetmap++ = (int)(iptr - (unsigned char*)input);
1090
1091		if (*iptr < 0xc0)
1092			c = *iptr++;
1093		else if (!(*iptr & 0x20)) {
1094			c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
1095			iptr += 2;
1096		} else if (!(*iptr & 0x10)) {
1097			c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
1098			iptr += 3;
1099		} else if (!(*iptr & 0x08)) {
1100			c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
1101			iptr += 4;
1102		}
1103
1104		*optr++ = c;
1105		max_length--;
1106	}
1107	if (offsetmap)
1108		*offsetmap = (int)(iptr - (unsigned char*)input);
1109	*optr = 0;
1110	return (int)(optr - output);
1111}
1112
1113static int copy_char8_to_char32(PCRE2_SPTR8 input, PCRE2_UCHAR32 *output, int max_length)
1114{
1115	PCRE2_SPTR8 iptr = input;
1116	PCRE2_UCHAR32 *optr = output;
1117
1118	if (max_length == 0)
1119		return 0;
1120
1121	while (*iptr && max_length > 1) {
1122		*optr++ = *iptr++;
1123		max_length--;
1124	}
1125	*optr = '\0';
1126	return (int)(optr - output);
1127}
1128
1129#define REGTEST_MAX_LENGTH32 4096
1130static PCRE2_UCHAR32 regtest_buf32[REGTEST_MAX_LENGTH32];
1131static int regtest_offsetmap32[REGTEST_MAX_LENGTH32];
1132
1133#endif /* SUPPORT_PCRE2_32 */
1134
1135static int check_ascii(const char *input)
1136{
1137	const unsigned char *ptr = (unsigned char *)input;
1138	while (*ptr) {
1139		if (*ptr > 127)
1140			return 0;
1141		ptr++;
1142	}
1143	return 1;
1144}
1145
1146#define OVECTOR_SIZE 15
1147
1148static int regression_tests(void)
1149{
1150	struct regression_test_case *current = regression_test_cases;
1151	int error;
1152	PCRE2_SIZE err_offs;
1153	int is_successful;
1154	int is_ascii;
1155	int total = 0;
1156	int successful = 0;
1157	int successful_row = 0;
1158	int counter = 0;
1159	int jit_compile_mode;
1160	int utf = 0;
1161	int disabled_options = 0;
1162	int i;
1163#ifdef SUPPORT_PCRE2_8
1164	pcre2_code_8 *re8;
1165	pcre2_compile_context_8 *ccontext8;
1166	pcre2_match_data_8 *mdata8_1;
1167	pcre2_match_data_8 *mdata8_2;
1168	pcre2_match_context_8 *mcontext8;
1169	PCRE2_SIZE *ovector8_1 = NULL;
1170	PCRE2_SIZE *ovector8_2 = NULL;
1171	int return_value8[2];
1172#endif
1173#ifdef SUPPORT_PCRE2_16
1174	pcre2_code_16 *re16;
1175	pcre2_compile_context_16 *ccontext16;
1176	pcre2_match_data_16 *mdata16_1;
1177	pcre2_match_data_16 *mdata16_2;
1178	pcre2_match_context_16 *mcontext16;
1179	PCRE2_SIZE *ovector16_1 = NULL;
1180	PCRE2_SIZE *ovector16_2 = NULL;
1181	int return_value16[2];
1182	int length16;
1183#endif
1184#ifdef SUPPORT_PCRE2_32
1185	pcre2_code_32 *re32;
1186	pcre2_compile_context_32 *ccontext32;
1187	pcre2_match_data_32 *mdata32_1;
1188	pcre2_match_data_32 *mdata32_2;
1189	pcre2_match_context_32 *mcontext32;
1190	PCRE2_SIZE *ovector32_1 = NULL;
1191	PCRE2_SIZE *ovector32_2 = NULL;
1192	int return_value32[2];
1193	int length32;
1194#endif
1195
1196#if defined SUPPORT_PCRE2_8
1197	PCRE2_UCHAR8 cpu_info[128];
1198#elif defined SUPPORT_PCRE2_16
1199	PCRE2_UCHAR16 cpu_info[128];
1200#elif defined SUPPORT_PCRE2_32
1201	PCRE2_UCHAR32 cpu_info[128];
1202#endif
1203#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1204	int return_value;
1205#endif
1206
1207	/* This test compares the behaviour of interpreter and JIT. Although disabling
1208	utf or ucp may make tests fail, if the pcre2_match result is the SAME, it is
1209	still considered successful from pcre2_jit_test point of view. */
1210
1211#if defined SUPPORT_PCRE2_8
1212	pcre2_config_8(PCRE2_CONFIG_JITTARGET, &cpu_info);
1213#elif defined SUPPORT_PCRE2_16
1214	pcre2_config_16(PCRE2_CONFIG_JITTARGET, &cpu_info);
1215#elif defined SUPPORT_PCRE2_32
1216	pcre2_config_32(PCRE2_CONFIG_JITTARGET, &cpu_info);
1217#endif
1218
1219	printf("Running JIT regression tests\n");
1220	printf("  target CPU of SLJIT compiler: ");
1221	for (i = 0; cpu_info[i]; i++)
1222		printf("%c", (char)(cpu_info[i]));
1223	printf("\n");
1224
1225#if defined SUPPORT_PCRE2_8
1226	pcre2_config_8(PCRE2_CONFIG_UNICODE, &utf);
1227#elif defined SUPPORT_PCRE2_16
1228	pcre2_config_16(PCRE2_CONFIG_UNICODE, &utf);
1229#elif defined SUPPORT_PCRE2_32
1230	pcre2_config_32(PCRE2_CONFIG_UNICODE, &utf);
1231#endif
1232
1233	if (!utf)
1234		disabled_options |= PCRE2_UTF;
1235#ifdef SUPPORT_PCRE2_8
1236	printf("  in  8 bit mode with UTF-8  %s:\n", utf ? "enabled" : "disabled");
1237#endif
1238#ifdef SUPPORT_PCRE2_16
1239	printf("  in 16 bit mode with UTF-16 %s:\n", utf ? "enabled" : "disabled");
1240#endif
1241#ifdef SUPPORT_PCRE2_32
1242	printf("  in 32 bit mode with UTF-32 %s:\n", utf ? "enabled" : "disabled");
1243#endif
1244
1245	while (current->pattern) {
1246		/* printf("\nPattern: %s :\n", current->pattern); */
1247		total++;
1248		is_ascii = 0;
1249		if (!(current->start_offset & F_PROPERTY))
1250			is_ascii = check_ascii(current->pattern) && check_ascii(current->input);
1251
1252		if (current->match_options & PCRE2_PARTIAL_SOFT)
1253			jit_compile_mode = PCRE2_JIT_PARTIAL_SOFT;
1254		else if (current->match_options & PCRE2_PARTIAL_HARD)
1255			jit_compile_mode = PCRE2_JIT_PARTIAL_HARD;
1256		else
1257			jit_compile_mode = PCRE2_JIT_COMPLETE;
1258		error = 0;
1259#ifdef SUPPORT_PCRE2_8
1260		re8 = NULL;
1261		ccontext8 = pcre2_compile_context_create_8(NULL);
1262		if (ccontext8) {
1263			if (GET_NEWLINE(current->newline))
1264				pcre2_set_newline_8(ccontext8, GET_NEWLINE(current->newline));
1265			if (GET_BSR(current->newline))
1266				pcre2_set_bsr_8(ccontext8, GET_BSR(current->newline));
1267
1268			if (!(current->start_offset & F_NO8)) {
1269				re8 = pcre2_compile_8((PCRE2_SPTR8)current->pattern, PCRE2_ZERO_TERMINATED,
1270					current->compile_options & ~disabled_options,
1271					&error, &err_offs, ccontext8);
1272
1273				if (!re8 && (utf || is_ascii))
1274					printf("\n8 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1275			}
1276			pcre2_compile_context_free_8(ccontext8);
1277		}
1278		else
1279			printf("\n8 bit: Cannot allocate compile context\n");
1280#endif
1281#ifdef SUPPORT_PCRE2_16
1282		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1283			convert_utf8_to_utf16((PCRE2_SPTR8)current->pattern, regtest_buf16, NULL, REGTEST_MAX_LENGTH16);
1284		else
1285			copy_char8_to_char16((PCRE2_SPTR8)current->pattern, regtest_buf16, REGTEST_MAX_LENGTH16);
1286
1287		re16 = NULL;
1288		ccontext16 = pcre2_compile_context_create_16(NULL);
1289		if (ccontext16) {
1290			if (GET_NEWLINE(current->newline))
1291				pcre2_set_newline_16(ccontext16, GET_NEWLINE(current->newline));
1292			if (GET_BSR(current->newline))
1293				pcre2_set_bsr_16(ccontext16, GET_BSR(current->newline));
1294
1295			if (!(current->start_offset & F_NO16)) {
1296				re16 = pcre2_compile_16(regtest_buf16, PCRE2_ZERO_TERMINATED,
1297					current->compile_options & ~disabled_options,
1298					&error, &err_offs, ccontext16);
1299
1300				if (!re16 && (utf || is_ascii))
1301					printf("\n16 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1302			}
1303			pcre2_compile_context_free_16(ccontext16);
1304		}
1305		else
1306			printf("\n16 bit: Cannot allocate compile context\n");
1307#endif
1308#ifdef SUPPORT_PCRE2_32
1309		if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1310			convert_utf8_to_utf32((PCRE2_SPTR8)current->pattern, regtest_buf32, NULL, REGTEST_MAX_LENGTH32);
1311		else
1312			copy_char8_to_char32((PCRE2_SPTR8)current->pattern, regtest_buf32, REGTEST_MAX_LENGTH32);
1313
1314		re32 = NULL;
1315		ccontext32 = pcre2_compile_context_create_32(NULL);
1316		if (ccontext32) {
1317			if (GET_NEWLINE(current->newline))
1318				pcre2_set_newline_32(ccontext32, GET_NEWLINE(current->newline));
1319			if (GET_BSR(current->newline))
1320				pcre2_set_bsr_32(ccontext32, GET_BSR(current->newline));
1321
1322			if (!(current->start_offset & F_NO32)) {
1323				re32 = pcre2_compile_32(regtest_buf32, PCRE2_ZERO_TERMINATED,
1324					current->compile_options & ~disabled_options,
1325					&error, &err_offs, ccontext32);
1326
1327				if (!re32 && (utf || is_ascii))
1328					printf("\n32 bit: Cannot compile pattern \"%s\": %d\n", current->pattern, error);
1329			}
1330			pcre2_compile_context_free_32(ccontext32);
1331		}
1332		else
1333			printf("\n32 bit: Cannot allocate compile context\n");
1334#endif
1335
1336		counter++;
1337		if ((counter & 0x3) != 0) {
1338#ifdef SUPPORT_PCRE2_8
1339			setstack8(NULL);
1340#endif
1341#ifdef SUPPORT_PCRE2_16
1342			setstack16(NULL);
1343#endif
1344#ifdef SUPPORT_PCRE2_32
1345			setstack32(NULL);
1346#endif
1347		}
1348
1349#ifdef SUPPORT_PCRE2_8
1350		return_value8[0] = -1000;
1351		return_value8[1] = -1000;
1352		mdata8_1 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1353		mdata8_2 = pcre2_match_data_create_8(OVECTOR_SIZE, NULL);
1354		mcontext8 = pcre2_match_context_create_8(NULL);
1355		if (!mdata8_1 || !mdata8_2 || !mcontext8) {
1356			printf("\n8 bit: Cannot allocate match data\n");
1357			pcre2_match_data_free_8(mdata8_1);
1358			pcre2_match_data_free_8(mdata8_2);
1359			pcre2_match_context_free_8(mcontext8);
1360			pcre2_code_free_8(re8);
1361			re8 = NULL;
1362		} else {
1363			ovector8_1 = pcre2_get_ovector_pointer_8(mdata8_1);
1364			ovector8_2 = pcre2_get_ovector_pointer_8(mdata8_2);
1365			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1366				ovector8_1[i] = -2;
1367			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1368				ovector8_2[i] = -2;
1369			pcre2_set_match_limit_8(mcontext8, 10000000);
1370		}
1371		if (re8) {
1372			return_value8[1] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1373				current->start_offset & OFFSET_MASK, current->match_options, mdata8_2, mcontext8);
1374
1375			if (pcre2_jit_compile_8(re8, jit_compile_mode)) {
1376				printf("\n8 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1377			} else if ((counter & 0x1) != 0) {
1378				setstack8(mcontext8);
1379				return_value8[0] = pcre2_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1380					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1381			} else {
1382				pcre2_jit_stack_assign_8(mcontext8, NULL, getstack8());
1383				return_value8[0] = pcre2_jit_match_8(re8, (PCRE2_SPTR8)current->input, strlen(current->input),
1384					current->start_offset & OFFSET_MASK, current->match_options, mdata8_1, mcontext8);
1385			}
1386		}
1387#endif
1388
1389#ifdef SUPPORT_PCRE2_16
1390		return_value16[0] = -1000;
1391		return_value16[1] = -1000;
1392		mdata16_1 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1393		mdata16_2 = pcre2_match_data_create_16(OVECTOR_SIZE, NULL);
1394		mcontext16 = pcre2_match_context_create_16(NULL);
1395		if (!mdata16_1 || !mdata16_2 || !mcontext16) {
1396			printf("\n16 bit: Cannot allocate match data\n");
1397			pcre2_match_data_free_16(mdata16_1);
1398			pcre2_match_data_free_16(mdata16_2);
1399			pcre2_match_context_free_16(mcontext16);
1400			pcre2_code_free_16(re16);
1401			re16 = NULL;
1402		} else {
1403			ovector16_1 = pcre2_get_ovector_pointer_16(mdata16_1);
1404			ovector16_2 = pcre2_get_ovector_pointer_16(mdata16_2);
1405			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1406				ovector16_1[i] = -2;
1407			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1408				ovector16_2[i] = -2;
1409			pcre2_set_match_limit_16(mcontext16, 10000000);
1410		}
1411		if (re16) {
1412			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1413				length16 = convert_utf8_to_utf16((PCRE2_SPTR8)current->input, regtest_buf16, regtest_offsetmap16, REGTEST_MAX_LENGTH16);
1414			else
1415				length16 = copy_char8_to_char16((PCRE2_SPTR8)current->input, regtest_buf16, REGTEST_MAX_LENGTH16);
1416
1417			return_value16[1] = pcre2_match_16(re16, regtest_buf16, length16,
1418				current->start_offset & OFFSET_MASK, current->match_options, mdata16_2, mcontext16);
1419
1420			if (pcre2_jit_compile_16(re16, jit_compile_mode)) {
1421				printf("\n16 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1422			} else if ((counter & 0x1) != 0) {
1423				setstack16(mcontext16);
1424				return_value16[0] = pcre2_match_16(re16, regtest_buf16, length16,
1425					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1426			} else {
1427				pcre2_jit_stack_assign_16(mcontext16, NULL, getstack16());
1428				return_value16[0] = pcre2_jit_match_16(re16, regtest_buf16, length16,
1429					current->start_offset & OFFSET_MASK, current->match_options, mdata16_1, mcontext16);
1430			}
1431		}
1432#endif
1433
1434#ifdef SUPPORT_PCRE2_32
1435		return_value32[0] = -1000;
1436		return_value32[1] = -1000;
1437		mdata32_1 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1438		mdata32_2 = pcre2_match_data_create_32(OVECTOR_SIZE, NULL);
1439		mcontext32 = pcre2_match_context_create_32(NULL);
1440		if (!mdata32_1 || !mdata32_2 || !mcontext32) {
1441			printf("\n32 bit: Cannot allocate match data\n");
1442			pcre2_match_data_free_32(mdata32_1);
1443			pcre2_match_data_free_32(mdata32_2);
1444			pcre2_match_context_free_32(mcontext32);
1445			pcre2_code_free_32(re32);
1446			re32 = NULL;
1447		} else {
1448			ovector32_1 = pcre2_get_ovector_pointer_32(mdata32_1);
1449			ovector32_2 = pcre2_get_ovector_pointer_32(mdata32_2);
1450			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1451				ovector32_1[i] = -2;
1452			for (i = 0; i < OVECTOR_SIZE * 2; ++i)
1453				ovector32_2[i] = -2;
1454			pcre2_set_match_limit_32(mcontext32, 10000000);
1455		}
1456		if (re32) {
1457			if ((current->compile_options & PCRE2_UTF) || (current->start_offset & F_FORCECONV))
1458				length32 = convert_utf8_to_utf32((PCRE2_SPTR8)current->input, regtest_buf32, regtest_offsetmap32, REGTEST_MAX_LENGTH32);
1459			else
1460				length32 = copy_char8_to_char32((PCRE2_SPTR8)current->input, regtest_buf32, REGTEST_MAX_LENGTH32);
1461
1462			return_value32[1] = pcre2_match_32(re32, regtest_buf32, length32,
1463				current->start_offset & OFFSET_MASK, current->match_options, mdata32_2, mcontext32);
1464
1465			if (pcre2_jit_compile_32(re32, jit_compile_mode)) {
1466				printf("\n32 bit: JIT compiler does not support \"%s\"\n", current->pattern);
1467			} else if ((counter & 0x1) != 0) {
1468				setstack32(mcontext32);
1469				return_value32[0] = pcre2_match_32(re32, regtest_buf32, length32,
1470					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1471			} else {
1472				pcre2_jit_stack_assign_32(mcontext32, NULL, getstack32());
1473				return_value32[0] = pcre2_jit_match_32(re32, regtest_buf32, length32,
1474					current->start_offset & OFFSET_MASK, current->match_options, mdata32_1, mcontext32);
1475			}
1476		}
1477#endif
1478
1479		/* printf("[%d-%d-%d|%d-%d|%d-%d|%d-%d]%s",
1480			return_value8[0], return_value16[0], return_value32[0],
1481			(int)ovector8_1[0], (int)ovector8_1[1],
1482			(int)ovector16_1[0], (int)ovector16_1[1],
1483			(int)ovector32_1[0], (int)ovector32_1[1],
1484			(current->compile_options & PCRE2_CASELESS) ? "C" : ""); */
1485
1486		/* If F_DIFF is set, just run the test, but do not compare the results.
1487		Segfaults can still be captured. */
1488
1489		is_successful = 1;
1490		if (!(current->start_offset & F_DIFF)) {
1491#if defined SUPPORT_UNICODE && ((defined(SUPPORT_PCRE2_8) + defined(SUPPORT_PCRE2_16) + defined(SUPPORT_PCRE2_32)) >= 2)
1492			if (!(current->start_offset & F_FORCECONV)) {
1493
1494				/* All results must be the same. */
1495#ifdef SUPPORT_PCRE2_8
1496				if ((return_value = return_value8[0]) != return_value8[1]) {
1497					printf("\n8 bit: Return value differs(J8:%d,I8:%d): [%d] '%s' @ '%s'\n",
1498						return_value8[0], return_value8[1], total, current->pattern, current->input);
1499					is_successful = 0;
1500				} else
1501#endif
1502#ifdef SUPPORT_PCRE2_16
1503				if ((return_value = return_value16[0]) != return_value16[1]) {
1504					printf("\n16 bit: Return value differs(J16:%d,I16:%d): [%d] '%s' @ '%s'\n",
1505						return_value16[0], return_value16[1], total, current->pattern, current->input);
1506					is_successful = 0;
1507				} else
1508#endif
1509#ifdef SUPPORT_PCRE2_32
1510				if ((return_value = return_value32[0]) != return_value32[1]) {
1511					printf("\n32 bit: Return value differs(J32:%d,I32:%d): [%d] '%s' @ '%s'\n",
1512						return_value32[0], return_value32[1], total, current->pattern, current->input);
1513					is_successful = 0;
1514				} else
1515#endif
1516#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1517				if (return_value8[0] != return_value16[0]) {
1518					printf("\n8 and 16 bit: Return value differs(J8:%d,J16:%d): [%d] '%s' @ '%s'\n",
1519						return_value8[0], return_value16[0],
1520						total, current->pattern, current->input);
1521					is_successful = 0;
1522				} else
1523#endif
1524#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1525				if (return_value8[0] != return_value32[0]) {
1526					printf("\n8 and 32 bit: Return value differs(J8:%d,J32:%d): [%d] '%s' @ '%s'\n",
1527						return_value8[0], return_value32[0],
1528						total, current->pattern, current->input);
1529					is_successful = 0;
1530				} else
1531#endif
1532#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1533				if (return_value16[0] != return_value32[0]) {
1534					printf("\n16 and 32 bit: Return value differs(J16:%d,J32:%d): [%d] '%s' @ '%s'\n",
1535						return_value16[0], return_value32[0],
1536						total, current->pattern, current->input);
1537					is_successful = 0;
1538				} else
1539#endif
1540				if (return_value >= 0 || return_value == PCRE2_ERROR_PARTIAL) {
1541					if (return_value == PCRE2_ERROR_PARTIAL) {
1542						return_value = 2;
1543					} else {
1544						return_value *= 2;
1545					}
1546#ifdef SUPPORT_PCRE2_8
1547					return_value8[0] = return_value;
1548#endif
1549#ifdef SUPPORT_PCRE2_16
1550					return_value16[0] = return_value;
1551#endif
1552#ifdef SUPPORT_PCRE2_32
1553					return_value32[0] = return_value;
1554#endif
1555					/* Transform back the results. */
1556					if (current->compile_options & PCRE2_UTF) {
1557#ifdef SUPPORT_PCRE2_16
1558						for (i = 0; i < return_value; ++i) {
1559							if (ovector16_1[i] != PCRE2_UNSET)
1560								ovector16_1[i] = regtest_offsetmap16[ovector16_1[i]];
1561							if (ovector16_2[i] != PCRE2_UNSET)
1562								ovector16_2[i] = regtest_offsetmap16[ovector16_2[i]];
1563						}
1564#endif
1565#ifdef SUPPORT_PCRE2_32
1566						for (i = 0; i < return_value; ++i) {
1567							if (ovector32_1[i] != PCRE2_UNSET)
1568								ovector32_1[i] = regtest_offsetmap32[ovector32_1[i]];
1569							if (ovector32_2[i] != PCRE2_UNSET)
1570								ovector32_2[i] = regtest_offsetmap32[ovector32_2[i]];
1571						}
1572#endif
1573					}
1574
1575					for (i = 0; i < return_value; ++i) {
1576#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_16
1577						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1578							printf("\n8 and 16 bit: Ovector[%d] value differs(J8:%d,I8:%d,J16:%d,I16:%d): [%d] '%s' @ '%s' \n",
1579								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector16_1[i], (int)ovector16_2[i],
1580								total, current->pattern, current->input);
1581							is_successful = 0;
1582						}
1583#endif
1584#if defined SUPPORT_PCRE2_8 && defined SUPPORT_PCRE2_32
1585						if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector32_1[i] || ovector8_1[i] != ovector32_2[i]) {
1586							printf("\n8 and 32 bit: Ovector[%d] value differs(J8:%d,I8:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1587								i, (int)ovector8_1[i], (int)ovector8_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1588								total, current->pattern, current->input);
1589							is_successful = 0;
1590						}
1591#endif
1592#if defined SUPPORT_PCRE2_16 && defined SUPPORT_PCRE2_32
1593						if (ovector16_1[i] != ovector16_2[i] || ovector16_1[i] != ovector32_1[i] || ovector16_1[i] != ovector32_2[i]) {
1594							printf("\n16 and 32 bit: Ovector[%d] value differs(J16:%d,I16:%d,J32:%d,I32:%d): [%d] '%s' @ '%s' \n",
1595								i, (int)ovector16_1[i], (int)ovector16_2[i], (int)ovector32_1[i], (int)ovector32_2[i],
1596								total, current->pattern, current->input);
1597							is_successful = 0;
1598						}
1599#endif
1600					}
1601				}
1602			} else
1603#endif /* more than one of SUPPORT_PCRE2_8, SUPPORT_PCRE2_16 and SUPPORT_PCRE2_32 */
1604			{
1605#ifdef SUPPORT_PCRE2_8
1606				if (return_value8[0] != return_value8[1]) {
1607					printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1608						return_value8[0], return_value8[1], total, current->pattern, current->input);
1609					is_successful = 0;
1610				} else if (return_value8[0] >= 0 || return_value8[0] == PCRE2_ERROR_PARTIAL) {
1611					if (return_value8[0] == PCRE2_ERROR_PARTIAL)
1612						return_value8[0] = 2;
1613					else
1614						return_value8[0] *= 2;
1615
1616					for (i = 0; i < return_value8[0]; ++i)
1617						if (ovector8_1[i] != ovector8_2[i]) {
1618							printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1619								i, (int)ovector8_1[i], (int)ovector8_2[i], total, current->pattern, current->input);
1620							is_successful = 0;
1621						}
1622				}
1623#endif
1624
1625#ifdef SUPPORT_PCRE2_16
1626				if (return_value16[0] != return_value16[1]) {
1627					printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1628						return_value16[0], return_value16[1], total, current->pattern, current->input);
1629					is_successful = 0;
1630				} else if (return_value16[0] >= 0 || return_value16[0] == PCRE2_ERROR_PARTIAL) {
1631					if (return_value16[0] == PCRE2_ERROR_PARTIAL)
1632						return_value16[0] = 2;
1633					else
1634						return_value16[0] *= 2;
1635
1636					for (i = 0; i < return_value16[0]; ++i)
1637						if (ovector16_1[i] != ovector16_2[i]) {
1638							printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1639								i, (int)ovector16_1[i], (int)ovector16_2[i], total, current->pattern, current->input);
1640							is_successful = 0;
1641						}
1642				}
1643#endif
1644
1645#ifdef SUPPORT_PCRE2_32
1646				if (return_value32[0] != return_value32[1]) {
1647					printf("\n32 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1648						return_value32[0], return_value32[1], total, current->pattern, current->input);
1649					is_successful = 0;
1650				} else if (return_value32[0] >= 0 || return_value32[0] == PCRE2_ERROR_PARTIAL) {
1651					if (return_value32[0] == PCRE2_ERROR_PARTIAL)
1652						return_value32[0] = 2;
1653					else
1654						return_value32[0] *= 2;
1655
1656					for (i = 0; i < return_value32[0]; ++i)
1657						if (ovector32_1[i] != ovector32_2[i]) {
1658							printf("\n32 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1659								i, (int)ovector32_1[i], (int)ovector32_2[i], total, current->pattern, current->input);
1660							is_successful = 0;
1661						}
1662				}
1663#endif
1664			}
1665		}
1666
1667		if (is_successful) {
1668#ifdef SUPPORT_PCRE2_8
1669			if (!(current->start_offset & F_NO8) && (utf || is_ascii)) {
1670				if (return_value8[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1671					printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1672						total, current->pattern, current->input);
1673					is_successful = 0;
1674				}
1675
1676				if (return_value8[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1677					printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1678						total, current->pattern, current->input);
1679					is_successful = 0;
1680				}
1681			}
1682#endif
1683#ifdef SUPPORT_PCRE2_16
1684			if (!(current->start_offset & F_NO16) && (utf || is_ascii)) {
1685				if (return_value16[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1686					printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1687						total, current->pattern, current->input);
1688					is_successful = 0;
1689				}
1690
1691				if (return_value16[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1692					printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1693						total, current->pattern, current->input);
1694					is_successful = 0;
1695				}
1696			}
1697#endif
1698#ifdef SUPPORT_PCRE2_32
1699			if (!(current->start_offset & F_NO32) && (utf || is_ascii)) {
1700				if (return_value32[0] < 0 && !(current->start_offset & F_NOMATCH)) {
1701					printf("32 bit: Test should match: [%d] '%s' @ '%s'\n",
1702						total, current->pattern, current->input);
1703					is_successful = 0;
1704				}
1705
1706				if (return_value32[0] >= 0 && (current->start_offset & F_NOMATCH)) {
1707					printf("32 bit: Test should not match: [%d] '%s' @ '%s'\n",
1708						total, current->pattern, current->input);
1709					is_successful = 0;
1710				}
1711			}
1712#endif
1713		}
1714
1715		if (is_successful) {
1716#ifdef SUPPORT_PCRE2_8
1717			if (re8 && !(current->start_offset & F_NO8) && pcre2_get_mark_8(mdata8_1) != pcre2_get_mark_8(mdata8_2)) {
1718				printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1719					total, current->pattern, current->input);
1720				is_successful = 0;
1721			}
1722#endif
1723#ifdef SUPPORT_PCRE2_16
1724			if (re16 && !(current->start_offset & F_NO16) && pcre2_get_mark_16(mdata16_1) != pcre2_get_mark_16(mdata16_2)) {
1725				printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1726					total, current->pattern, current->input);
1727				is_successful = 0;
1728			}
1729#endif
1730#ifdef SUPPORT_PCRE2_32
1731			if (re32 && !(current->start_offset & F_NO32) && pcre2_get_mark_32(mdata32_1) != pcre2_get_mark_32(mdata32_2)) {
1732				printf("32 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1733					total, current->pattern, current->input);
1734				is_successful = 0;
1735			}
1736#endif
1737		}
1738
1739#ifdef SUPPORT_PCRE2_8
1740		pcre2_code_free_8(re8);
1741		pcre2_match_data_free_8(mdata8_1);
1742		pcre2_match_data_free_8(mdata8_2);
1743		pcre2_match_context_free_8(mcontext8);
1744#endif
1745#ifdef SUPPORT_PCRE2_16
1746		pcre2_code_free_16(re16);
1747		pcre2_match_data_free_16(mdata16_1);
1748		pcre2_match_data_free_16(mdata16_2);
1749		pcre2_match_context_free_16(mcontext16);
1750#endif
1751#ifdef SUPPORT_PCRE2_32
1752		pcre2_code_free_32(re32);
1753		pcre2_match_data_free_32(mdata32_1);
1754		pcre2_match_data_free_32(mdata32_2);
1755		pcre2_match_context_free_32(mcontext32);
1756#endif
1757
1758		if (is_successful) {
1759			successful++;
1760			successful_row++;
1761			printf(".");
1762			if (successful_row >= 60) {
1763				successful_row = 0;
1764				printf("\n");
1765			}
1766		} else
1767			successful_row = 0;
1768
1769		fflush(stdout);
1770		current++;
1771	}
1772#ifdef SUPPORT_PCRE2_8
1773	setstack8(NULL);
1774#endif
1775#ifdef SUPPORT_PCRE2_16
1776	setstack16(NULL);
1777#endif
1778#ifdef SUPPORT_PCRE2_32
1779	setstack32(NULL);
1780#endif
1781
1782	if (total == successful) {
1783		printf("\nAll JIT regression tests are successfully passed.\n");
1784		return 0;
1785	} else {
1786		printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1787		return 1;
1788	}
1789}
1790
1791#if defined SUPPORT_UNICODE
1792
1793static int check_invalid_utf_result(int pattern_index, const char *type, int result,
1794	int match_start, int match_end, PCRE2_SIZE *ovector)
1795{
1796	if (match_start < 0) {
1797		if (result != -1) {
1798			printf("Pattern[%d] %s result is not -1.\n", pattern_index, type);
1799			return 1;
1800		}
1801		return 0;
1802	}
1803
1804	if (result <= 0) {
1805		printf("Pattern[%d] %s result (%d) is not greater than 0.\n", pattern_index, type, result);
1806		return 1;
1807	}
1808
1809	if (ovector[0] != (PCRE2_SIZE)match_start) {
1810		printf("Pattern[%d] %s ovector[0] is unexpected (%d instead of %d)\n",
1811			pattern_index, type, (int)ovector[0], match_start);
1812		return 1;
1813	}
1814
1815	if (ovector[1] != (PCRE2_SIZE)match_end) {
1816		printf("Pattern[%d] %s ovector[1] is unexpected (%d instead of %d)\n",
1817			pattern_index, type, (int)ovector[1], match_end);
1818		return 1;
1819	}
1820
1821	return 0;
1822}
1823
1824#endif /* SUPPORT_UNICODE */
1825
1826#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_8
1827
1828#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
1829#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
1830#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
1831
1832struct invalid_utf8_regression_test_case {
1833	int compile_options;
1834	int jit_compile_options;
1835	int start_offset;
1836	int skip_left;
1837	int skip_right;
1838	int match_start;
1839	int match_end;
1840	const char *pattern[2];
1841	const char *input;
1842};
1843
1844static const char invalid_utf8_newline_cr;
1845
1846static const struct invalid_utf8_regression_test_case invalid_utf8_regression_test_cases[] = {
1847	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1848	{ UDA, CI, 0, 0, 0, 0, 4, { ".", NULL }, "\xf0\x90\x80\x80" },
1849	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf4\x90\x80\x80" },
1850	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xf4\x8f\xbf\xbf" },
1851	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\x7f" },
1852	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x90\x80\xc0" },
1853	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf0\x8f\xbf\xbf" },
1854	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf#" },
1855	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xef\xbf\xbf" },
1856	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80#" },
1857	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xe0\xa0\x80" },
1858	{ UDA, CI, 0, 0, 2, -1, -1, { ".", NULL }, "\xef\xbf\xbf#" },
1859	{ UDA, CI, 0, 0, 1, -1, -1, { ".", NULL }, "\xef\xbf\xbf" },
1860	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\x7f#" },
1861	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xef\xbf\xc0" },
1862	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf#" },
1863	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x9f\xbf" },
1864	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xed\x9f\xbf#" },
1865	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xa0\x80#" },
1866	{ UDA, CI, 0, 0, 0, 0, 3, { ".", NULL }, "\xee\x80\x80#" },
1867	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xed\xbf\xbf#" },
1868	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf##" },
1869	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf#" },
1870	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xdf\xbf" },
1871	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80##" },
1872	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80#" },
1873	{ UDA, CI, 0, 0, 0, 0, 2, { ".", NULL }, "\xc2\x80" },
1874	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80##" },
1875	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0##" },
1876	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xe0\x80" },
1877	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xdf\xc0" },
1878	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf##" },
1879	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xc1\xbf" },
1880	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80###" },
1881	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\x80" },
1882	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8###" },
1883	{ UDA, CI, 0, 0, 0, -1, -1, { ".", NULL }, "\xf8" },
1884	{ UDA, CI, 0, 0, 0, 0, 1, { ".", NULL }, "\x7f" },
1885
1886	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf4\x8f\xbf\xbf#" },
1887	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\xa0\x80\x80\xf4\xa0\x80\x80" },
1888	{ UDA, CPI, 4, 1, 1, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf" },
1889	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xef\xbf\xbf#" },
1890	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "#\xe0\xa0\x80#" },
1891	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf0\x90\x80\x80#" },
1892	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "\xf3\xbf\xbf\xbf#" },
1893	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf0\x8f\xbf\xbf\xf0\x8f\xbf\xbf" },
1894	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf5\x80\x80\x80\xf5\x80\x80\x80" },
1895	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x90\x80\x80\xf4\x90\x80\x80" },
1896	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xbf\xff\xf4\x8f\xbf\xff" },
1897	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\xf4\x8f\xff\xbf\xf4\x8f\xff\xbf" },
1898	{ UDA, CPI, 4, 0, 1, -1, -1, { "\\B", "\\b" }, "\xef\x80\x80\x80\xef\x80\x80" },
1899	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80\x80\x80\x80\x80\x80\x80" },
1900	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xe0\x9f\xbf\xe0\x9f\xbf#" },
1901	{ UDA, CPI, 4, 2, 2, -1, -1, { "\\B", "\\b" }, "#\xe0\xa0\x80\xe0\xa0\x80#" },
1902	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xf0\x80\x80\xf0\x80\x80#" },
1903	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "#\xed\xa0\x80\xed\xa0\x80#" },
1904	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xdf\xbf#" },
1905	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xdf\xbf#" },
1906	{ UDA, CPI, 4, 0, 0, 4, 4, { "\\B", NULL }, "##\xc2\x80#" },
1907	{ UDA, CPI, 4, 2, 0, 2, 2, { "\\B", NULL }, "##\xc2\x80#" },
1908	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xc1\xbf\xc1\xbf##" },
1909	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xdf\xc0\xdf\xc0##" },
1910	{ UDA, CPI, 4, 0, 0, -1, -1, { "\\B", "\\b" }, "##\xe0\x80\xe0\x80##" },
1911
1912	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xef\xbf\xbf#" },
1913	{ UDA, CPI, 3, 0, 0, 3, 3, { "\\B", NULL }, "\xe0\xa0\x80#" },
1914	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x9f\xbf\xe0\x9f\xbf" },
1915	{ UDA, CPI, 3, 1, 1, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xbf\xef\xbf\xbf" },
1916	{ UDA, CPI, 3, 0, 1, -1, -1, { "\\B", "\\b" }, "\xdf\x80\x80\xdf\x80" },
1917	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xbf\xff\xef\xbf\xff" },
1918	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xef\xff\xbf\xef\xff\xbf" },
1919	{ UDA, CPI, 3, 0, 0, -1, -1, { "\\B", "\\b" }, "\xed\xbf\xbf\xed\xbf\xbf" },
1920
1921	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xdf\xbf#" },
1922	{ UDA, CPI, 2, 0, 0, 2, 2, { "\\B", NULL }, "\xc2\x80#" },
1923	{ UDA, CPI, 2, 1, 1, -1, -1, { "\\B", "\\b" }, "\xdf\xbf\xdf\xbf" },
1924	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xc1\xbf\xc1\xbf" },
1925	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xe0\x80\xe0\x80" },
1926	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xdf\xff\xdf\xff" },
1927	{ UDA, CPI, 2, 0, 0, -1, -1, { "\\B", "\\b" }, "\xff\xbf\xff\xbf" },
1928
1929	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x7f#" },
1930	{ UDA, CPI, 1, 0, 0, 1, 1, { "\\B", NULL }, "\x01#" },
1931	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\x80\x80" },
1932	{ UDA, CPI, 1, 0, 0, -1, -1, { "\\B", "\\b" }, "\xb0\xb0" },
1933
1934	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { "(.)\\1", NULL }, "aA" },
1935	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "a\xff" },
1936	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1937	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xc3\xa1\xc3\x81" },
1938	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, -1, -1, { "(.)\\1", NULL }, "\xc2\x80\x80" },
1939	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 6, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1940	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xe1\xbd\xb8\xe1\xbf\xb8" },
1941	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 8, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1942	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { "(.)\\1", NULL }, "\xf0\x90\x90\x80\xf0\x90\x90\xa8" },
1943
1944	{ UDA, CPI, 0, 0, 0, 0, 1, { "\\X", NULL }, "A" },
1945	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xff" },
1946	{ UDA, CPI, 0, 0, 0, 0, 2, { "\\X", NULL }, "\xc3\xa1" },
1947	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xc3\xa1" },
1948	{ UDA, CPI, 0, 0, 0, -1, -1, { "\\X", NULL }, "\xc3\x7f" },
1949	{ UDA, CPI, 0, 0, 0, 0, 3, { "\\X", NULL }, "\xe1\xbd\xb8" },
1950	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xe1\xbd\xb8" },
1951	{ UDA, CPI, 0, 0, 0, 0, 4, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1952	{ UDA, CPI, 0, 0, 1, -1, -1, { "\\X", NULL }, "\xf0\x90\x90\x80" },
1953
1954	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "#" },
1955	{ UDA, CPI, 0, 0, 0, 0, 4, { "[^#]", NULL }, "\xf4\x8f\xbf\xbf" },
1956	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xf4\x90\x80\x80" },
1957	{ UDA, CPI, 0, 0, 0, -1, -1, { "[^#]", NULL }, "\xc1\x80" },
1958
1959	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { "^\\W", NULL }, " \x0a#"},
1960	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 14, 15, { "^\\W", NULL }, " \xc0\x8a#\xe0\x80\x8a#\xf0\x80\x80\x8a#\x0a#"},
1961	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf8\x0a#"},
1962	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xc3\x0a#"},
1963	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xf1\x0a#"},
1964	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xf2\xbf\x0a#"},
1965	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \xf2\xbf\xbf\x0a#"},
1966	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { "^\\W", NULL }, " \xef\x0a#"},
1967	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { "^\\W", NULL }, " \xef\xbf\x0a#"},
1968	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 5, 6, { "^\\W", NULL }, " \x85#\xc2\x85#"},
1969	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 7, 8, { "^\\W", NULL }, " \xe2\x80\xf8\xe2\x80\xa8#"},
1970
1971	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xe2\x80\xf8\xe2\x80\xa8#"},
1972	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 3, 4, { "#", NULL }, "\xe2\x80\xf8#\xe2\x80\xa8#"},
1973	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "abcd\xc2\x85#"},
1974	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 1, 2, { "#", NULL }, "\x85#\xc2\x85#"},
1975	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, 5, 6, { "#", NULL }, "\xef,\x80,\xf8#\x0a"},
1976	{ PCRE2_UTF | PCRE2_FIRSTLINE, CI, 0, 0, 0, -1, -1, { "#", NULL }, "\xef,\x80,\xf8\x0a#"},
1977
1978	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1979	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1980	{ PCRE2_UTF, CI, 0, 0, 0, 4, 8, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7#\xc7\x85#" },
1981	{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
1982
1983	{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
1984
1985	/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
1986	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
1987	{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\R{2}", NULL }, "\r\n\n" },
1988
1989	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 0, 0, 0, -1, -1, { "^.a", &invalid_utf8_newline_cr }, "\xc3\xa7#a" },
1990
1991	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
1992};
1993
1994#undef UDA
1995#undef CI
1996#undef CPI
1997
1998static int run_invalid_utf8_test(const struct invalid_utf8_regression_test_case *current,
1999	int pattern_index, int i, pcre2_compile_context_8 *ccontext, pcre2_match_data_8 *mdata)
2000{
2001	pcre2_code_8 *code;
2002	int result, errorcode;
2003	PCRE2_SIZE length, erroroffset;
2004	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_8(mdata);
2005
2006	if (current->pattern[i] == NULL)
2007		return 1;
2008
2009	code = pcre2_compile_8((PCRE2_UCHAR8*)current->pattern[i], PCRE2_ZERO_TERMINATED,
2010		current->compile_options, &errorcode, &erroroffset, ccontext);
2011
2012	if (!code) {
2013		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2014		return 0;
2015	}
2016
2017	if (pcre2_jit_compile_8(code, current->jit_compile_options) != 0) {
2018		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2019		pcre2_code_free_8(code);
2020		return 0;
2021	}
2022
2023	length = (PCRE2_SIZE)(strlen(current->input) - current->skip_left - current->skip_right);
2024
2025	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2026		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2027			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2028
2029		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2030			pcre2_code_free_8(code);
2031			return 0;
2032		}
2033	}
2034
2035	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2036		result = pcre2_jit_match_8(code, (PCRE2_UCHAR8*)(current->input + current->skip_left),
2037			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2038
2039		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2040			pcre2_code_free_8(code);
2041			return 0;
2042		}
2043	}
2044
2045	pcre2_code_free_8(code);
2046	return 1;
2047}
2048
2049static int invalid_utf8_regression_tests(void)
2050{
2051	const struct invalid_utf8_regression_test_case *current;
2052	pcre2_compile_context_8 *ccontext;
2053	pcre2_match_data_8 *mdata;
2054	int total = 0, successful = 0;
2055	int result;
2056
2057	printf("\nRunning invalid-utf8 JIT regression tests\n");
2058
2059	ccontext = pcre2_compile_context_create_8(NULL);
2060	pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2061	mdata = pcre2_match_data_create_8(4, NULL);
2062
2063	for (current = invalid_utf8_regression_test_cases; current->pattern[0]; current++) {
2064		/* printf("\nPattern: %s :\n", current->pattern); */
2065		total++;
2066
2067		result = 1;
2068		if (current->pattern[1] != &invalid_utf8_newline_cr)
2069		{
2070			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2071				result = 0;
2072			if (!run_invalid_utf8_test(current, total - 1, 1, ccontext, mdata))
2073				result = 0;
2074		} else {
2075			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_CR);
2076			if (!run_invalid_utf8_test(current, total - 1, 0, ccontext, mdata))
2077				result = 0;
2078			pcre2_set_newline_8(ccontext, PCRE2_NEWLINE_ANY);
2079		}
2080
2081		if (result) {
2082			successful++;
2083		}
2084
2085		printf(".");
2086		if ((total % 60) == 0)
2087			printf("\n");
2088	}
2089
2090	if ((total % 60) != 0)
2091		printf("\n");
2092
2093	pcre2_match_data_free_8(mdata);
2094	pcre2_compile_context_free_8(ccontext);
2095
2096	if (total == successful) {
2097		printf("\nAll invalid UTF8 JIT regression tests are successfully passed.\n");
2098		return 0;
2099	} else {
2100		printf("\nInvalid UTF8 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2101		return 1;
2102	}
2103}
2104
2105#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_8 */
2106
2107static int invalid_utf8_regression_tests(void)
2108{
2109	return 0;
2110}
2111
2112#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_8 */
2113
2114#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_16
2115
2116#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2117#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2118#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2119
2120struct invalid_utf16_regression_test_case {
2121	int compile_options;
2122	int jit_compile_options;
2123	int start_offset;
2124	int skip_left;
2125	int skip_right;
2126	int match_start;
2127	int match_end;
2128	const PCRE2_UCHAR16 *pattern[2];
2129	const PCRE2_UCHAR16 *input;
2130};
2131
2132static PCRE2_UCHAR16 allany16[] = { '.', 0 };
2133static PCRE2_UCHAR16 non_word_boundary16[] = { '\\', 'B', 0 };
2134static PCRE2_UCHAR16 word_boundary16[] = { '\\', 'b', 0 };
2135static PCRE2_UCHAR16 backreference16[] = { '(', '.', ')', '\\', '1', 0 };
2136static PCRE2_UCHAR16 grapheme16[] = { '\\', 'X', 0 };
2137static PCRE2_UCHAR16 nothashmark16[] = { '[', '^', '#', ']', 0 };
2138static PCRE2_UCHAR16 afternl16[] = { '^', '\\', 'W', 0 };
2139static PCRE2_UCHAR16 generic16[] = { '#', 0xd800, 0xdc00, '#', 0 };
2140static PCRE2_UCHAR16 test16_1[] = { 0xd7ff, 0xe000, 0xffff, 0x01, '#', 0 };
2141static PCRE2_UCHAR16 test16_2[] = { 0xd800, 0xdc00, 0xd800, 0xdc00, 0 };
2142static PCRE2_UCHAR16 test16_3[] = { 0xdbff, 0xdfff, 0xdbff, 0xdfff, 0 };
2143static PCRE2_UCHAR16 test16_4[] = { 0xd800, 0xdbff, 0xd800, 0xdbff, 0 };
2144static PCRE2_UCHAR16 test16_5[] = { '#', 0xd800, 0xdc00, '#', 0 };
2145static PCRE2_UCHAR16 test16_6[] = { 'a', 'A', 0xdc28, 0 };
2146static PCRE2_UCHAR16 test16_7[] = { 0xd801, 0xdc00, 0xd801, 0xdc28, 0 };
2147static PCRE2_UCHAR16 test16_8[] = { '#', 0xd800, 0xdc00, 0 };
2148static PCRE2_UCHAR16 test16_9[] = { ' ', 0x2028, '#', 0 };
2149static PCRE2_UCHAR16 test16_10[] = { ' ', 0xdc00, 0xd800, 0x2028, '#', 0 };
2150static PCRE2_UCHAR16 test16_11[] = { 0xdc00, 0xdc00, 0xd800, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2151static PCRE2_UCHAR16 test16_12[] = { '#', 0xd800, 0xdc00, 0xd800, '#', 0xd800, 0xdc00, 0xdc00, 0xdc00, '#', 0xd800, 0xdc00, '#', 0 };
2152
2153static const struct invalid_utf16_regression_test_case invalid_utf16_regression_test_cases[] = {
2154	{ UDA, CI, 0, 0, 0, 0, 1, { allany16, NULL }, test16_1 },
2155	{ UDA, CI, 1, 0, 0, 1, 2, { allany16, NULL }, test16_1 },
2156	{ UDA, CI, 2, 0, 0, 2, 3, { allany16, NULL }, test16_1 },
2157	{ UDA, CI, 3, 0, 0, 3, 4, { allany16, NULL }, test16_1 },
2158	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_2 },
2159	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_2 },
2160	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_2 },
2161	{ UDA, CI, 0, 0, 0, 0, 2, { allany16, NULL }, test16_3 },
2162	{ UDA, CI, 0, 0, 3, -1, -1, { allany16, NULL }, test16_3 },
2163	{ UDA, CI, 1, 0, 0, -1, -1, { allany16, NULL }, test16_3 },
2164
2165	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary16, NULL }, test16_1 },
2166	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_1 },
2167	{ UDA, CPI, 3, 0, 0, 3, 3, { non_word_boundary16, NULL }, test16_1 },
2168	{ UDA, CPI, 4, 0, 0, 4, 4, { non_word_boundary16, NULL }, test16_1 },
2169	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_2 },
2170	{ UDA, CPI, 2, 0, 0, 2, 2, { non_word_boundary16, NULL }, test16_3 },
2171	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_2 },
2172	{ UDA, CPI, 2, 1, 1, -1, -1, { non_word_boundary16, word_boundary16 }, test16_3 },
2173	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_4 },
2174	{ UDA, CPI, 2, 0, 0, -1, -1, { non_word_boundary16, word_boundary16 }, test16_5 },
2175
2176	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference16, NULL }, test16_6 },
2177	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference16, NULL }, test16_6 },
2178	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 4, { backreference16, NULL }, test16_7 },
2179	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 1, -1, -1, { backreference16, NULL }, test16_7 },
2180
2181	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme16, NULL }, test16_6 },
2182	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme16, NULL }, test16_6 },
2183	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme16, NULL }, test16_6 },
2184	{ UDA, CPI, 0, 0, 0, 0, 2, { grapheme16, NULL }, test16_7 },
2185	{ UDA, CPI, 2, 0, 0, 2, 4, { grapheme16, NULL }, test16_7 },
2186	{ UDA, CPI, 1, 0, 0, -1, -1, { grapheme16, NULL }, test16_7 },
2187
2188	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2189	{ UDA, CPI, 1, 0, 0, 1, 3, { nothashmark16, NULL }, test16_8 },
2190	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark16, NULL }, test16_8 },
2191
2192	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl16, NULL }, test16_9 },
2193	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 4, 5, { afternl16, NULL }, test16_10 },
2194
2195	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2196	{ PCRE2_UTF | PCRE2_NO_START_OPTIMIZE, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2197	{ PCRE2_UTF, CI, 0, 0, 0, 5, 9, { generic16, NULL }, test16_11 },
2198	{ PCRE2_UTF, CI, 0, 0, 0, 9, 13, { generic16, NULL }, test16_12 },
2199
2200	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2201};
2202
2203#undef UDA
2204#undef CI
2205#undef CPI
2206
2207static int run_invalid_utf16_test(const struct invalid_utf16_regression_test_case *current,
2208	int pattern_index, int i, pcre2_compile_context_16 *ccontext, pcre2_match_data_16 *mdata)
2209{
2210	pcre2_code_16 *code;
2211	int result, errorcode;
2212	PCRE2_SIZE length, erroroffset;
2213	const PCRE2_UCHAR16 *input;
2214	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(mdata);
2215
2216	if (current->pattern[i] == NULL)
2217		return 1;
2218
2219	code = pcre2_compile_16(current->pattern[i], PCRE2_ZERO_TERMINATED,
2220		current->compile_options, &errorcode, &erroroffset, ccontext);
2221
2222	if (!code) {
2223		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2224		return 0;
2225	}
2226
2227	if (pcre2_jit_compile_16(code, current->jit_compile_options) != 0) {
2228		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2229		pcre2_code_free_16(code);
2230		return 0;
2231	}
2232
2233	input = current->input;
2234	length = 0;
2235
2236	while (*input++ != 0)
2237		length++;
2238
2239	length -= current->skip_left + current->skip_right;
2240
2241	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2242		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2243			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2244
2245		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2246			pcre2_code_free_16(code);
2247			return 0;
2248		}
2249	}
2250
2251	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2252		result = pcre2_jit_match_16(code, (current->input + current->skip_left),
2253			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2254
2255		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2256			pcre2_code_free_16(code);
2257			return 0;
2258		}
2259	}
2260
2261	pcre2_code_free_16(code);
2262	return 1;
2263}
2264
2265static int invalid_utf16_regression_tests(void)
2266{
2267	const struct invalid_utf16_regression_test_case *current;
2268	pcre2_compile_context_16 *ccontext;
2269	pcre2_match_data_16 *mdata;
2270	int total = 0, successful = 0;
2271	int result;
2272
2273	printf("\nRunning invalid-utf16 JIT regression tests\n");
2274
2275	ccontext = pcre2_compile_context_create_16(NULL);
2276	pcre2_set_newline_16(ccontext, PCRE2_NEWLINE_ANY);
2277	mdata = pcre2_match_data_create_16(4, NULL);
2278
2279	for (current = invalid_utf16_regression_test_cases; current->pattern[0]; current++) {
2280		/* printf("\nPattern: %s :\n", current->pattern); */
2281		total++;
2282
2283		result = 1;
2284		if (!run_invalid_utf16_test(current, total - 1, 0, ccontext, mdata))
2285			result = 0;
2286		if (!run_invalid_utf16_test(current, total - 1, 1, ccontext, mdata))
2287			result = 0;
2288
2289		if (result) {
2290			successful++;
2291		}
2292
2293		printf(".");
2294		if ((total % 60) == 0)
2295			printf("\n");
2296	}
2297
2298	if ((total % 60) != 0)
2299		printf("\n");
2300
2301	pcre2_match_data_free_16(mdata);
2302	pcre2_compile_context_free_16(ccontext);
2303
2304	if (total == successful) {
2305		printf("\nAll invalid UTF16 JIT regression tests are successfully passed.\n");
2306		return 0;
2307	} else {
2308		printf("\nInvalid UTF16 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2309		return 1;
2310	}
2311}
2312
2313#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_16 */
2314
2315static int invalid_utf16_regression_tests(void)
2316{
2317	return 0;
2318}
2319
2320#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_16 */
2321
2322#if defined SUPPORT_UNICODE && defined SUPPORT_PCRE2_32
2323
2324#define UDA (PCRE2_UTF | PCRE2_DOTALL | PCRE2_ANCHORED)
2325#define CI (PCRE2_JIT_COMPLETE | PCRE2_JIT_INVALID_UTF)
2326#define CPI (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_INVALID_UTF)
2327
2328struct invalid_utf32_regression_test_case {
2329	int compile_options;
2330	int jit_compile_options;
2331	int start_offset;
2332	int skip_left;
2333	int skip_right;
2334	int match_start;
2335	int match_end;
2336	const PCRE2_UCHAR32 *pattern[2];
2337	const PCRE2_UCHAR32 *input;
2338};
2339
2340static PCRE2_UCHAR32 allany32[] = { '.', 0 };
2341static PCRE2_UCHAR32 non_word_boundary32[] = { '\\', 'B', 0 };
2342static PCRE2_UCHAR32 word_boundary32[] = { '\\', 'b', 0 };
2343static PCRE2_UCHAR32 backreference32[] = { '(', '.', ')', '\\', '1', 0 };
2344static PCRE2_UCHAR32 grapheme32[] = { '\\', 'X', 0 };
2345static PCRE2_UCHAR32 nothashmark32[] = { '[', '^', '#', ']', 0 };
2346static PCRE2_UCHAR32 afternl32[] = { '^', '\\', 'W', 0 };
2347static PCRE2_UCHAR32 test32_1[] = { 0x10ffff, 0x10ffff, 0x110000, 0x110000, 0x10ffff, 0 };
2348static PCRE2_UCHAR32 test32_2[] = { 0xd7ff, 0xe000, 0xd800, 0xdfff, 0xe000, 0xdfff, 0xd800, 0 };
2349static PCRE2_UCHAR32 test32_3[] = { 'a', 'A', 0x110000, 0 };
2350static PCRE2_UCHAR32 test32_4[] = { '#', 0x10ffff, 0x110000, 0 };
2351static PCRE2_UCHAR32 test32_5[] = { ' ', 0x2028, '#', 0 };
2352static PCRE2_UCHAR32 test32_6[] = { ' ', 0x110000, 0x2028, '#', 0 };
2353
2354static const struct invalid_utf32_regression_test_case invalid_utf32_regression_test_cases[] = {
2355	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_1 },
2356	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_1 },
2357	{ UDA, CI, 0, 0, 0, 0, 1, { allany32, NULL }, test32_2 },
2358	{ UDA, CI, 1, 0, 0, 1, 2, { allany32, NULL }, test32_2 },
2359	{ UDA, CI, 2, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2360	{ UDA, CI, 3, 0, 0, -1, -1, { allany32, NULL }, test32_2 },
2361
2362	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_1 },
2363	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_1 },
2364	{ UDA, CPI, 1, 0, 0, 1, 1, { non_word_boundary32, NULL }, test32_2 },
2365	{ UDA, CPI, 3, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2366	{ UDA, CPI, 6, 0, 0, -1, -1, { non_word_boundary32, word_boundary32 }, test32_2 },
2367
2368	{ UDA | PCRE2_CASELESS, CPI, 0, 0, 0, 0, 2, { backreference32, NULL }, test32_3 },
2369	{ UDA | PCRE2_CASELESS, CPI, 1, 0, 0, -1, -1, { backreference32, NULL }, test32_3 },
2370
2371	{ UDA, CPI, 0, 0, 0, 0, 1, { grapheme32, NULL }, test32_1 },
2372	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_1 },
2373	{ UDA, CPI, 1, 0, 0, 1, 2, { grapheme32, NULL }, test32_2 },
2374	{ UDA, CPI, 2, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2375	{ UDA, CPI, 3, 0, 0, -1, -1, { grapheme32, NULL }, test32_2 },
2376	{ UDA, CPI, 4, 0, 0, 4, 5, { grapheme32, NULL }, test32_2 },
2377
2378	{ UDA, CPI, 0, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2379	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_4 },
2380	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_4 },
2381	{ UDA, CPI, 1, 0, 0, 1, 2, { nothashmark32, NULL }, test32_2 },
2382	{ UDA, CPI, 2, 0, 0, -1, -1, { nothashmark32, NULL }, test32_2 },
2383
2384	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 2, 3, { afternl32, NULL }, test32_5 },
2385	{ PCRE2_UTF | PCRE2_MULTILINE, CI, 1, 0, 0, 3, 4, { afternl32, NULL }, test32_6 },
2386
2387	{ 0, 0, 0, 0, 0, 0, 0, { NULL, NULL }, NULL }
2388};
2389
2390#undef UDA
2391#undef CI
2392#undef CPI
2393
2394static int run_invalid_utf32_test(const struct invalid_utf32_regression_test_case *current,
2395	int pattern_index, int i, pcre2_compile_context_32 *ccontext, pcre2_match_data_32 *mdata)
2396{
2397	pcre2_code_32 *code;
2398	int result, errorcode;
2399	PCRE2_SIZE length, erroroffset;
2400	const PCRE2_UCHAR32 *input;
2401	PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_32(mdata);
2402
2403	if (current->pattern[i] == NULL)
2404		return 1;
2405
2406	code = pcre2_compile_32(current->pattern[i], PCRE2_ZERO_TERMINATED,
2407		current->compile_options, &errorcode, &erroroffset, ccontext);
2408
2409	if (!code) {
2410		printf("Pattern[%d:0] cannot be compiled. Error offset: %d\n", pattern_index, (int)erroroffset);
2411		return 0;
2412	}
2413
2414	if (pcre2_jit_compile_32(code, current->jit_compile_options) != 0) {
2415		printf("Pattern[%d:0] cannot be compiled by the JIT compiler.\n", pattern_index);
2416		pcre2_code_free_32(code);
2417		return 0;
2418	}
2419
2420	input = current->input;
2421	length = 0;
2422
2423	while (*input++ != 0)
2424		length++;
2425
2426	length -= current->skip_left + current->skip_right;
2427
2428	if (current->jit_compile_options & PCRE2_JIT_COMPLETE) {
2429		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2430			length, current->start_offset - current->skip_left, 0, mdata, NULL);
2431
2432		if (check_invalid_utf_result(pattern_index, "match", result, current->match_start, current->match_end, ovector)) {
2433			pcre2_code_free_32(code);
2434			return 0;
2435		}
2436	}
2437
2438	if (current->jit_compile_options & PCRE2_JIT_PARTIAL_SOFT) {
2439		result = pcre2_jit_match_32(code, (current->input + current->skip_left),
2440			length, current->start_offset - current->skip_left, PCRE2_PARTIAL_SOFT, mdata, NULL);
2441
2442		if (check_invalid_utf_result(pattern_index, "partial match", result, current->match_start, current->match_end, ovector)) {
2443			pcre2_code_free_32(code);
2444			return 0;
2445		}
2446	}
2447
2448	pcre2_code_free_32(code);
2449	return 1;
2450}
2451
2452static int invalid_utf32_regression_tests(void)
2453{
2454	const struct invalid_utf32_regression_test_case *current;
2455	pcre2_compile_context_32 *ccontext;
2456	pcre2_match_data_32 *mdata;
2457	int total = 0, successful = 0;
2458	int result;
2459
2460	printf("\nRunning invalid-utf32 JIT regression tests\n");
2461
2462	ccontext = pcre2_compile_context_create_32(NULL);
2463	pcre2_set_newline_32(ccontext, PCRE2_NEWLINE_ANY);
2464	mdata = pcre2_match_data_create_32(4, NULL);
2465
2466	for (current = invalid_utf32_regression_test_cases; current->pattern[0]; current++) {
2467		/* printf("\nPattern: %s :\n", current->pattern); */
2468		total++;
2469
2470		result = 1;
2471		if (!run_invalid_utf32_test(current, total - 1, 0, ccontext, mdata))
2472			result = 0;
2473		if (!run_invalid_utf32_test(current, total - 1, 1, ccontext, mdata))
2474			result = 0;
2475
2476		if (result) {
2477			successful++;
2478		}
2479
2480		printf(".");
2481		if ((total % 60) == 0)
2482			printf("\n");
2483	}
2484
2485	if ((total % 60) != 0)
2486		printf("\n");
2487
2488	pcre2_match_data_free_32(mdata);
2489	pcre2_compile_context_free_32(ccontext);
2490
2491	if (total == successful) {
2492		printf("\nAll invalid UTF32 JIT regression tests are successfully passed.\n");
2493		return 0;
2494	} else {
2495		printf("\nInvalid UTF32 successful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
2496		return 1;
2497	}
2498}
2499
2500#else /* !SUPPORT_UNICODE || !SUPPORT_PCRE2_32 */
2501
2502static int invalid_utf32_regression_tests(void)
2503{
2504	return 0;
2505}
2506
2507#endif /* SUPPORT_UNICODE && SUPPORT_PCRE2_32 */
2508
2509/* End of pcre2_jit_test.c */
2510