1/*************************************************
2*      Perl-Compatible Regular Expressions       *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8                       Written by Philip Hazel
9                    This module by Zoltan Herczeg
10     Original API code Copyright (c) 1997-2012 University of Cambridge
11          New API code Copyright (c) 2016-2021 University of Cambridge
12
13-----------------------------------------------------------------------------
14Redistribution and use in source and binary forms, with or without
15modification, are permitted provided that the following conditions are met:
16
17    * Redistributions of source code must retain the above copyright notice,
18      this list of conditions and the following disclaimer.
19
20    * Redistributions in binary form must reproduce the above copyright
21      notice, this list of conditions and the following disclaimer in the
22      documentation and/or other materials provided with the distribution.
23
24    * Neither the name of the University of Cambridge nor the names of its
25      contributors may be used to endorse or promote products derived from
26      this software without specific prior written permission.
27
28THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38POSSIBILITY OF SUCH DAMAGE.
39-----------------------------------------------------------------------------
40*/
41
42#ifdef HAVE_CONFIG_H
43#include "config.h"
44#endif
45
46#include "pcre2_internal.h"
47
48#ifdef SUPPORT_JIT
49
50/* All-in-one: Since we use the JIT compiler only from here,
51we just include it. This way we don't need to touch the build
52system files. */
53
54#define SLJIT_CONFIG_AUTO 1
55#define SLJIT_CONFIG_STATIC 1
56#define SLJIT_VERBOSE 0
57
58#ifdef PCRE2_DEBUG
59#define SLJIT_DEBUG 1
60#else
61#define SLJIT_DEBUG 0
62#endif
63
64#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
67static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68{
69pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70return allocator->malloc(size, allocator->memory_data);
71}
72
73static void pcre2_jit_free(void *ptr, void *allocator_data)
74{
75pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76allocator->free(ptr, allocator->memory_data);
77}
78
79#include "sljit/sljitLir.c"
80
81#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82#error Unsupported architecture
83#endif
84
85/* Defines for debugging purposes. */
86
87/* 1 - Use unoptimized capturing brackets.
88   2 - Enable capture_last_ptr (includes option 1). */
89/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91/* 1 - Always have a control head. */
92/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94/* Allocate memory for the regex stack on the real machine stack.
95Fast, but limited size. */
96#define MACHINE_STACK_SIZE 32768
97
98/* Growth rate for stack allocated by the OS. Should be the multiply
99of page size. */
100#define STACK_GROWTH_RATE 8192
101
102/* Enable to check that the allocation could destroy temporaries. */
103#if defined SLJIT_DEBUG && SLJIT_DEBUG
104#define DESTROY_REGISTERS 1
105#endif
106
107/*
108Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110The code generator follows the recursive nature of the PERL compatible regular
111expressions. The basic blocks of regular expressions are condition checkers
112whose execute different commands depending on the result of the condition check.
113The relationship between the operators can be horizontal (concatenation) and
114vertical (sub-expression) (See struct backtrack_common for more details).
115
116  'ab' - 'a' and 'b' regexps are concatenated
117  'a+' - 'a' is the sub-expression of the '+' operator
118
119The condition checkers are boolean (true/false) checkers. Machine code is generated
120for the checker itself and for the actions depending on the result of the checker.
121The 'true' case is called as the matching path (expected path), and the other is called as
122the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123branches on the matching path.
124
125 Greedy star operator (*) :
126   Matching path: match happens.
127   Backtrack path: match failed.
128 Non-greedy star operator (*?) :
129   Matching path: no need to perform a match.
130   Backtrack path: match is required.
131
132The following example shows how the code generated for a capturing bracket
133with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134we have the following regular expression:
135
136   A(B|C)D
137
138The generated code will be the following:
139
140 A matching path
141 '(' matching path (pushing arguments to the stack)
142 B matching path
143 ')' matching path (pushing arguments to the stack)
144 D matching path
145 return with successful match
146
147 D backtrack path
148 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149 B backtrack path
150 C expected path
151 jump to D matching path
152 C backtrack path
153 A backtrack path
154
155 Notice, that the order of backtrack code paths are the opposite of the fast
156 code paths. In this way the topmost value on the stack is always belong
157 to the current backtrack code path. The backtrack path must check
158 whether there is a next alternative. If so, it needs to jump back to
159 the matching path eventually. Otherwise it needs to clear out its own stack
160 frame and continue the execution on the backtrack code paths.
161*/
162
163/*
164Saved stack frames:
165
166Atomic blocks and asserts require reloading the values of private data
167when the backtrack mechanism performed. Because of OP_RECURSE, the data
168are not necessarly known in compile time, thus we need a dynamic restore
169mechanism.
170
171The stack frames are stored in a chain list, and have the following format:
172([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174Thus we can restore the private data to a particular point in the stack.
175*/
176
177typedef struct jit_arguments {
178  /* Pointers first. */
179  struct sljit_stack *stack;
180  PCRE2_SPTR str;
181  PCRE2_SPTR begin;
182  PCRE2_SPTR end;
183  pcre2_match_data *match_data;
184  PCRE2_SPTR startchar_ptr;
185  PCRE2_UCHAR *mark_ptr;
186  int (*callout)(pcre2_callout_block *, void *);
187  void *callout_data;
188  /* Everything else after. */
189  sljit_uw offset_limit;
190  sljit_u32 limit_match;
191  sljit_u32 oveccount;
192  sljit_u32 options;
193} jit_arguments;
194
195#define JIT_NUMBER_OF_COMPILE_MODES 3
196
197typedef struct executable_functions {
198  void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199  void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200  sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201  sljit_u32 top_bracket;
202  sljit_u32 limit_match;
203} executable_functions;
204
205typedef struct jump_list {
206  struct sljit_jump *jump;
207  struct jump_list *next;
208} jump_list;
209
210typedef struct stub_list {
211  struct sljit_jump *start;
212  struct sljit_label *quit;
213  struct stub_list *next;
214} stub_list;
215
216enum frame_types {
217  no_frame = -1,
218  no_stack = -2
219};
220
221enum control_types {
222  type_mark = 0,
223  type_then_trap = 1
224};
225
226enum  early_fail_types {
227  type_skip = 0,
228  type_fail = 1,
229  type_fail_range = 2
230};
231
232typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234/* The following structure is the key data type for the recursive
235code generator. It is allocated by compile_matchingpath, and contains
236the arguments for compile_backtrackingpath. Must be the first member
237of its descendants. */
238typedef struct backtrack_common {
239  /* Concatenation stack. */
240  struct backtrack_common *prev;
241  jump_list *nextbacktracks;
242  /* Internal stack (for component operators). */
243  struct backtrack_common *top;
244  jump_list *topbacktracks;
245  /* Opcode pointer. */
246  PCRE2_SPTR cc;
247} backtrack_common;
248
249typedef struct assert_backtrack {
250  backtrack_common common;
251  jump_list *condfailed;
252  /* Less than 0 if a frame is not needed. */
253  int framesize;
254  /* Points to our private memory word on the stack. */
255  int private_data_ptr;
256  /* For iterators. */
257  struct sljit_label *matchingpath;
258} assert_backtrack;
259
260typedef struct bracket_backtrack {
261  backtrack_common common;
262  /* Where to coninue if an alternative is successfully matched. */
263  struct sljit_label *alternative_matchingpath;
264  /* For rmin and rmax iterators. */
265  struct sljit_label *recursive_matchingpath;
266  /* For greedy ? operator. */
267  struct sljit_label *zero_matchingpath;
268  /* Contains the branches of a failed condition. */
269  union {
270    /* Both for OP_COND, OP_SCOND. */
271    jump_list *condfailed;
272    assert_backtrack *assert;
273    /* For OP_ONCE. Less than 0 if not needed. */
274    int framesize;
275    /* For brackets with >3 alternatives. */
276    struct sljit_put_label *matching_put_label;
277  } u;
278  /* Points to our private memory word on the stack. */
279  int private_data_ptr;
280} bracket_backtrack;
281
282typedef struct bracketpos_backtrack {
283  backtrack_common common;
284  /* Points to our private memory word on the stack. */
285  int private_data_ptr;
286  /* Reverting stack is needed. */
287  int framesize;
288  /* Allocated stack size. */
289  int stacksize;
290} bracketpos_backtrack;
291
292typedef struct braminzero_backtrack {
293  backtrack_common common;
294  struct sljit_label *matchingpath;
295} braminzero_backtrack;
296
297typedef struct char_iterator_backtrack {
298  backtrack_common common;
299  /* Next iteration. */
300  struct sljit_label *matchingpath;
301  union {
302    jump_list *backtracks;
303    struct {
304      unsigned int othercasebit;
305      PCRE2_UCHAR chr;
306      BOOL enabled;
307    } charpos;
308  } u;
309} char_iterator_backtrack;
310
311typedef struct ref_iterator_backtrack {
312  backtrack_common common;
313  /* Next iteration. */
314  struct sljit_label *matchingpath;
315} ref_iterator_backtrack;
316
317typedef struct recurse_entry {
318  struct recurse_entry *next;
319  /* Contains the function entry label. */
320  struct sljit_label *entry_label;
321  /* Contains the function entry label. */
322  struct sljit_label *backtrack_label;
323  /* Collects the entry calls until the function is not created. */
324  jump_list *entry_calls;
325  /* Collects the backtrack calls until the function is not created. */
326  jump_list *backtrack_calls;
327  /* Points to the starting opcode. */
328  sljit_sw start;
329} recurse_entry;
330
331typedef struct recurse_backtrack {
332  backtrack_common common;
333  /* Return to the matching path. */
334  struct sljit_label *matchingpath;
335  /* Recursive pattern. */
336  recurse_entry *entry;
337  /* Pattern is inlined. */
338  BOOL inlined_pattern;
339} recurse_backtrack;
340
341#define OP_THEN_TRAP OP_TABLE_LENGTH
342
343typedef struct then_trap_backtrack {
344  backtrack_common common;
345  /* If then_trap is not NULL, this structure contains the real
346  then_trap for the backtracking path. */
347  struct then_trap_backtrack *then_trap;
348  /* Points to the starting opcode. */
349  sljit_sw start;
350  /* Exit point for the then opcodes of this alternative. */
351  jump_list *quit;
352  /* Frame size of the current alternative. */
353  int framesize;
354} then_trap_backtrack;
355
356#define MAX_N_CHARS 12
357#define MAX_DIFF_CHARS 5
358
359typedef struct fast_forward_char_data {
360  /* Number of characters in the chars array, 255 for any character. */
361  sljit_u8 count;
362  /* Number of last UTF-8 characters in the chars array. */
363  sljit_u8 last_count;
364  /* Available characters in the current position. */
365  PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366} fast_forward_char_data;
367
368#define MAX_CLASS_RANGE_SIZE 4
369#define MAX_CLASS_CHARS_SIZE 3
370
371typedef struct compiler_common {
372  /* The sljit ceneric compiler. */
373  struct sljit_compiler *compiler;
374  /* Compiled regular expression. */
375  pcre2_real_code *re;
376  /* First byte code. */
377  PCRE2_SPTR start;
378  /* Maps private data offset to each opcode. */
379  sljit_s32 *private_data_ptrs;
380  /* Chain list of read-only data ptrs. */
381  void *read_only_data_head;
382  /* Tells whether the capturing bracket is optimized. */
383  sljit_u8 *optimized_cbracket;
384  /* Tells whether the starting offset is a target of then. */
385  sljit_u8 *then_offsets;
386  /* Current position where a THEN must jump. */
387  then_trap_backtrack *then_trap;
388  /* Starting offset of private data for capturing brackets. */
389  sljit_s32 cbra_ptr;
390  /* Output vector starting point. Must be divisible by 2. */
391  sljit_s32 ovector_start;
392  /* Points to the starting character of the current match. */
393  sljit_s32 start_ptr;
394  /* Last known position of the requested byte. */
395  sljit_s32 req_char_ptr;
396  /* Head of the last recursion. */
397  sljit_s32 recursive_head_ptr;
398  /* First inspected character for partial matching.
399     (Needed for avoiding zero length partial matches.) */
400  sljit_s32 start_used_ptr;
401  /* Starting pointer for partial soft matches. */
402  sljit_s32 hit_start;
403  /* Pointer of the match end position. */
404  sljit_s32 match_end_ptr;
405  /* Points to the marked string. */
406  sljit_s32 mark_ptr;
407  /* Recursive control verb management chain. */
408  sljit_s32 control_head_ptr;
409  /* Points to the last matched capture block index. */
410  sljit_s32 capture_last_ptr;
411  /* Fast forward skipping byte code pointer. */
412  PCRE2_SPTR fast_forward_bc_ptr;
413  /* Locals used by fast fail optimization. */
414  sljit_s32 early_fail_start_ptr;
415  sljit_s32 early_fail_end_ptr;
416  /* Variables used by recursive call generator. */
417  sljit_s32 recurse_bitset_size;
418  uint8_t *recurse_bitset;
419
420  /* Flipped and lower case tables. */
421  const sljit_u8 *fcc;
422  sljit_sw lcc;
423  /* Mode can be PCRE2_JIT_COMPLETE and others. */
424  int mode;
425  /* TRUE, when empty match is accepted for partial matching. */
426  BOOL allow_empty_partial;
427  /* TRUE, when minlength is greater than 0. */
428  BOOL might_be_empty;
429  /* \K is found in the pattern. */
430  BOOL has_set_som;
431  /* (*SKIP:arg) is found in the pattern. */
432  BOOL has_skip_arg;
433  /* (*THEN) is found in the pattern. */
434  BOOL has_then;
435  /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
436  BOOL has_skip_in_assert_back;
437  /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
438  BOOL local_quit_available;
439  /* Currently in a positive assertion. */
440  BOOL in_positive_assertion;
441  /* Newline control. */
442  int nltype;
443  sljit_u32 nlmax;
444  sljit_u32 nlmin;
445  int newline;
446  int bsr_nltype;
447  sljit_u32 bsr_nlmax;
448  sljit_u32 bsr_nlmin;
449  /* Dollar endonly. */
450  int endonly;
451  /* Tables. */
452  sljit_sw ctypes;
453  /* Named capturing brackets. */
454  PCRE2_SPTR name_table;
455  sljit_sw name_count;
456  sljit_sw name_entry_size;
457
458  /* Labels and jump lists. */
459  struct sljit_label *partialmatchlabel;
460  struct sljit_label *quit_label;
461  struct sljit_label *abort_label;
462  struct sljit_label *accept_label;
463  struct sljit_label *ff_newline_shortcut;
464  stub_list *stubs;
465  recurse_entry *entries;
466  recurse_entry *currententry;
467  jump_list *partialmatch;
468  jump_list *quit;
469  jump_list *positive_assertion_quit;
470  jump_list *abort;
471  jump_list *failed_match;
472  jump_list *accept;
473  jump_list *calllimit;
474  jump_list *stackalloc;
475  jump_list *revertframes;
476  jump_list *wordboundary;
477  jump_list *anynewline;
478  jump_list *hspace;
479  jump_list *vspace;
480  jump_list *casefulcmp;
481  jump_list *caselesscmp;
482  jump_list *reset_match;
483  BOOL unset_backref;
484  BOOL alt_circumflex;
485#ifdef SUPPORT_UNICODE
486  BOOL utf;
487  BOOL invalid_utf;
488  BOOL ucp;
489  /* Points to saving area for iref. */
490  sljit_s32 iref_ptr;
491  jump_list *getucd;
492  jump_list *getucdtype;
493#if PCRE2_CODE_UNIT_WIDTH == 8
494  jump_list *utfreadchar;
495  jump_list *utfreadtype8;
496  jump_list *utfpeakcharback;
497#endif
498#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
499  jump_list *utfreadchar_invalid;
500  jump_list *utfreadnewline_invalid;
501  jump_list *utfmoveback_invalid;
502  jump_list *utfpeakcharback_invalid;
503#endif
504#endif /* SUPPORT_UNICODE */
505} compiler_common;
506
507/* For byte_sequence_compare. */
508
509typedef struct compare_context {
510  int length;
511  int sourcereg;
512#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
513  int ucharptr;
514  union {
515    sljit_s32 asint;
516    sljit_u16 asushort;
517#if PCRE2_CODE_UNIT_WIDTH == 8
518    sljit_u8 asbyte;
519    sljit_u8 asuchars[4];
520#elif PCRE2_CODE_UNIT_WIDTH == 16
521    sljit_u16 asuchars[2];
522#elif PCRE2_CODE_UNIT_WIDTH == 32
523    sljit_u32 asuchars[1];
524#endif
525  } c;
526  union {
527    sljit_s32 asint;
528    sljit_u16 asushort;
529#if PCRE2_CODE_UNIT_WIDTH == 8
530    sljit_u8 asbyte;
531    sljit_u8 asuchars[4];
532#elif PCRE2_CODE_UNIT_WIDTH == 16
533    sljit_u16 asuchars[2];
534#elif PCRE2_CODE_UNIT_WIDTH == 32
535    sljit_u32 asuchars[1];
536#endif
537  } oc;
538#endif
539} compare_context;
540
541/* Undefine sljit macros. */
542#undef CMP
543
544/* Used for accessing the elements of the stack. */
545#define STACK(i)      ((i) * SSIZE_OF(sw))
546
547#ifdef SLJIT_PREF_SHIFT_REG
548#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
549/* Nothing. */
550#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
551#define SHIFT_REG_IS_R3
552#else
553#error "Unsupported shift register"
554#endif
555#endif
556
557#define TMP1          SLJIT_R0
558#ifdef SHIFT_REG_IS_R3
559#define TMP2          SLJIT_R3
560#define TMP3          SLJIT_R2
561#else
562#define TMP2          SLJIT_R2
563#define TMP3          SLJIT_R3
564#endif
565#define STR_PTR       SLJIT_R1
566#define STR_END       SLJIT_S0
567#define STACK_TOP     SLJIT_S1
568#define STACK_LIMIT   SLJIT_S2
569#define COUNT_MATCH   SLJIT_S3
570#define ARGUMENTS     SLJIT_S4
571#define RETURN_ADDR   SLJIT_R4
572
573#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
574#define HAS_VIRTUAL_REGISTERS 1
575#else
576#define HAS_VIRTUAL_REGISTERS 0
577#endif
578
579/* Local space layout. */
580/* These two locals can be used by the current opcode. */
581#define LOCALS0          (0 * sizeof(sljit_sw))
582#define LOCALS1          (1 * sizeof(sljit_sw))
583/* Two local variables for possessive quantifiers (char1 cannot use them). */
584#define POSSESSIVE0      (2 * sizeof(sljit_sw))
585#define POSSESSIVE1      (3 * sizeof(sljit_sw))
586/* Max limit of recursions. */
587#define LIMIT_MATCH      (4 * sizeof(sljit_sw))
588/* The output vector is stored on the stack, and contains pointers
589to characters. The vector data is divided into two groups: the first
590group contains the start / end character pointers, and the second is
591the start pointers when the end of the capturing group has not yet reached. */
592#define OVECTOR_START    (common->ovector_start)
593#define OVECTOR(i)       (OVECTOR_START + (i) * SSIZE_OF(sw))
594#define OVECTOR_PRIV(i)  (common->cbra_ptr + (i) * SSIZE_OF(sw))
595#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
596
597#if PCRE2_CODE_UNIT_WIDTH == 8
598#define MOV_UCHAR  SLJIT_MOV_U8
599#define IN_UCHARS(x) (x)
600#elif PCRE2_CODE_UNIT_WIDTH == 16
601#define MOV_UCHAR  SLJIT_MOV_U16
602#define UCHAR_SHIFT (1)
603#define IN_UCHARS(x) ((x) * 2)
604#elif PCRE2_CODE_UNIT_WIDTH == 32
605#define MOV_UCHAR  SLJIT_MOV_U32
606#define UCHAR_SHIFT (2)
607#define IN_UCHARS(x) ((x) * 4)
608#else
609#error Unsupported compiling mode
610#endif
611
612/* Shortcuts. */
613#define DEFINE_COMPILER \
614  struct sljit_compiler *compiler = common->compiler
615#define OP1(op, dst, dstw, src, srcw) \
616  sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
617#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
618  sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
619#define OP2U(op, src1, src1w, src2, src2w) \
620  sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
621#define OP_SRC(op, src, srcw) \
622  sljit_emit_op_src(compiler, (op), (src), (srcw))
623#define LABEL() \
624  sljit_emit_label(compiler)
625#define JUMP(type) \
626  sljit_emit_jump(compiler, (type))
627#define JUMPTO(type, label) \
628  sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
629#define JUMPHERE(jump) \
630  sljit_set_label((jump), sljit_emit_label(compiler))
631#define SET_LABEL(jump, label) \
632  sljit_set_label((jump), (label))
633#define CMP(type, src1, src1w, src2, src2w) \
634  sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
635#define CMPTO(type, src1, src1w, src2, src2w, label) \
636  sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
637#define OP_FLAGS(op, dst, dstw, type) \
638  sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
639#define CMOV(type, dst_reg, src, srcw) \
640  sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
641#define GET_LOCAL_BASE(dst, dstw, offset) \
642  sljit_get_local_base(compiler, (dst), (dstw), (offset))
643
644#define READ_CHAR_MAX 0x7fffffff
645
646#define INVALID_UTF_CHAR -1
647#define UNASSIGNED_UTF_CHAR 888
648
649#if defined SUPPORT_UNICODE
650#if PCRE2_CODE_UNIT_WIDTH == 8
651
652#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
653  { \
654  if (ptr[0] <= 0x7f) \
655    c = *ptr++; \
656  else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
657    { \
658    c = ptr[1] - 0x80; \
659    \
660    if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
661      { \
662      c |= (ptr[0] - 0xc0) << 6; \
663      ptr += 2; \
664      } \
665    else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
666      { \
667      c = c << 6 | (ptr[2] - 0x80); \
668      \
669      if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
670        { \
671        c |= (ptr[0] - 0xe0) << 12; \
672        ptr += 3; \
673        \
674        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
675          { \
676          invalid_action; \
677          } \
678        } \
679      else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
680        { \
681        c = c << 6 | (ptr[3] - 0x80); \
682        \
683        if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
684          { \
685          c |= (ptr[0] - 0xf0) << 18; \
686          ptr += 4; \
687          \
688          if (c >= 0x110000 || c < 0x10000) \
689            { \
690            invalid_action; \
691            } \
692          } \
693        else \
694          { \
695          invalid_action; \
696          } \
697        } \
698      else \
699        { \
700        invalid_action; \
701        } \
702      } \
703    else \
704      { \
705      invalid_action; \
706      } \
707    } \
708  else \
709    { \
710    invalid_action; \
711    } \
712  }
713
714#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
715  { \
716  c = ptr[-1]; \
717  if (c <= 0x7f) \
718    ptr--; \
719  else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
720    { \
721    c -= 0x80; \
722    \
723    if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
724      { \
725      c |= (ptr[-2] - 0xc0) << 6; \
726      ptr -= 2; \
727      } \
728    else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
729      { \
730      c = c << 6 | (ptr[-2] - 0x80); \
731      \
732      if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
733        { \
734        c |= (ptr[-3] - 0xe0) << 12; \
735        ptr -= 3; \
736        \
737        if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
738          { \
739          invalid_action; \
740          } \
741        } \
742      else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
743        { \
744        c = c << 6 | (ptr[-3] - 0x80); \
745        \
746        if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
747          { \
748          c |= (ptr[-4] - 0xf0) << 18; \
749          ptr -= 4; \
750          \
751          if (c >= 0x110000 || c < 0x10000) \
752            { \
753            invalid_action; \
754            } \
755          } \
756        else \
757          { \
758          invalid_action; \
759          } \
760        } \
761      else \
762        { \
763        invalid_action; \
764        } \
765      } \
766    else \
767      { \
768      invalid_action; \
769      } \
770    } \
771  else \
772    { \
773    invalid_action; \
774    } \
775  }
776
777#elif PCRE2_CODE_UNIT_WIDTH == 16
778
779#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
780  { \
781  if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
782    c = *ptr++; \
783  else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
784    { \
785    c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
786    ptr += 2; \
787    } \
788  else \
789    { \
790    invalid_action; \
791    } \
792  }
793
794#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
795  { \
796  c = ptr[-1]; \
797  if (c < 0xd800 || c >= 0xe000) \
798    ptr--; \
799  else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
800    { \
801    c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
802    ptr -= 2; \
803    } \
804  else \
805    { \
806    invalid_action; \
807    } \
808  }
809
810
811#elif PCRE2_CODE_UNIT_WIDTH == 32
812
813#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
814  { \
815  if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
816    c = *ptr++; \
817  else \
818    { \
819    invalid_action; \
820    } \
821  }
822
823#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
824  { \
825  c = ptr[-1]; \
826  if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
827    ptr--; \
828  else \
829    { \
830    invalid_action; \
831    } \
832  }
833
834#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
835#endif /* SUPPORT_UNICODE */
836
837static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
838{
839SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
840do cc += GET(cc, 1); while (*cc == OP_ALT);
841SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
842cc += 1 + LINK_SIZE;
843return cc;
844}
845
846static int no_alternatives(PCRE2_SPTR cc)
847{
848int count = 0;
849SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
850do
851  {
852  cc += GET(cc, 1);
853  count++;
854  }
855while (*cc == OP_ALT);
856SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
857return count;
858}
859
860/* Functions whose might need modification for all new supported opcodes:
861 next_opcode
862 check_opcode_types
863 set_private_data_ptrs
864 get_framesize
865 init_frame
866 get_recurse_data_length
867 copy_recurse_data
868 compile_matchingpath
869 compile_backtrackingpath
870*/
871
872static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
873{
874SLJIT_UNUSED_ARG(common);
875switch(*cc)
876  {
877  case OP_SOD:
878  case OP_SOM:
879  case OP_SET_SOM:
880  case OP_NOT_WORD_BOUNDARY:
881  case OP_WORD_BOUNDARY:
882  case OP_NOT_DIGIT:
883  case OP_DIGIT:
884  case OP_NOT_WHITESPACE:
885  case OP_WHITESPACE:
886  case OP_NOT_WORDCHAR:
887  case OP_WORDCHAR:
888  case OP_ANY:
889  case OP_ALLANY:
890  case OP_NOTPROP:
891  case OP_PROP:
892  case OP_ANYNL:
893  case OP_NOT_HSPACE:
894  case OP_HSPACE:
895  case OP_NOT_VSPACE:
896  case OP_VSPACE:
897  case OP_EXTUNI:
898  case OP_EODN:
899  case OP_EOD:
900  case OP_CIRC:
901  case OP_CIRCM:
902  case OP_DOLL:
903  case OP_DOLLM:
904  case OP_CRSTAR:
905  case OP_CRMINSTAR:
906  case OP_CRPLUS:
907  case OP_CRMINPLUS:
908  case OP_CRQUERY:
909  case OP_CRMINQUERY:
910  case OP_CRRANGE:
911  case OP_CRMINRANGE:
912  case OP_CRPOSSTAR:
913  case OP_CRPOSPLUS:
914  case OP_CRPOSQUERY:
915  case OP_CRPOSRANGE:
916  case OP_CLASS:
917  case OP_NCLASS:
918  case OP_REF:
919  case OP_REFI:
920  case OP_DNREF:
921  case OP_DNREFI:
922  case OP_RECURSE:
923  case OP_CALLOUT:
924  case OP_ALT:
925  case OP_KET:
926  case OP_KETRMAX:
927  case OP_KETRMIN:
928  case OP_KETRPOS:
929  case OP_REVERSE:
930  case OP_ASSERT:
931  case OP_ASSERT_NOT:
932  case OP_ASSERTBACK:
933  case OP_ASSERTBACK_NOT:
934  case OP_ASSERT_NA:
935  case OP_ASSERTBACK_NA:
936  case OP_ONCE:
937  case OP_SCRIPT_RUN:
938  case OP_BRA:
939  case OP_BRAPOS:
940  case OP_CBRA:
941  case OP_CBRAPOS:
942  case OP_COND:
943  case OP_SBRA:
944  case OP_SBRAPOS:
945  case OP_SCBRA:
946  case OP_SCBRAPOS:
947  case OP_SCOND:
948  case OP_CREF:
949  case OP_DNCREF:
950  case OP_RREF:
951  case OP_DNRREF:
952  case OP_FALSE:
953  case OP_TRUE:
954  case OP_BRAZERO:
955  case OP_BRAMINZERO:
956  case OP_BRAPOSZERO:
957  case OP_PRUNE:
958  case OP_SKIP:
959  case OP_THEN:
960  case OP_COMMIT:
961  case OP_FAIL:
962  case OP_ACCEPT:
963  case OP_ASSERT_ACCEPT:
964  case OP_CLOSE:
965  case OP_SKIPZERO:
966  return cc + PRIV(OP_lengths)[*cc];
967
968  case OP_CHAR:
969  case OP_CHARI:
970  case OP_NOT:
971  case OP_NOTI:
972  case OP_STAR:
973  case OP_MINSTAR:
974  case OP_PLUS:
975  case OP_MINPLUS:
976  case OP_QUERY:
977  case OP_MINQUERY:
978  case OP_UPTO:
979  case OP_MINUPTO:
980  case OP_EXACT:
981  case OP_POSSTAR:
982  case OP_POSPLUS:
983  case OP_POSQUERY:
984  case OP_POSUPTO:
985  case OP_STARI:
986  case OP_MINSTARI:
987  case OP_PLUSI:
988  case OP_MINPLUSI:
989  case OP_QUERYI:
990  case OP_MINQUERYI:
991  case OP_UPTOI:
992  case OP_MINUPTOI:
993  case OP_EXACTI:
994  case OP_POSSTARI:
995  case OP_POSPLUSI:
996  case OP_POSQUERYI:
997  case OP_POSUPTOI:
998  case OP_NOTSTAR:
999  case OP_NOTMINSTAR:
1000  case OP_NOTPLUS:
1001  case OP_NOTMINPLUS:
1002  case OP_NOTQUERY:
1003  case OP_NOTMINQUERY:
1004  case OP_NOTUPTO:
1005  case OP_NOTMINUPTO:
1006  case OP_NOTEXACT:
1007  case OP_NOTPOSSTAR:
1008  case OP_NOTPOSPLUS:
1009  case OP_NOTPOSQUERY:
1010  case OP_NOTPOSUPTO:
1011  case OP_NOTSTARI:
1012  case OP_NOTMINSTARI:
1013  case OP_NOTPLUSI:
1014  case OP_NOTMINPLUSI:
1015  case OP_NOTQUERYI:
1016  case OP_NOTMINQUERYI:
1017  case OP_NOTUPTOI:
1018  case OP_NOTMINUPTOI:
1019  case OP_NOTEXACTI:
1020  case OP_NOTPOSSTARI:
1021  case OP_NOTPOSPLUSI:
1022  case OP_NOTPOSQUERYI:
1023  case OP_NOTPOSUPTOI:
1024  cc += PRIV(OP_lengths)[*cc];
1025#ifdef SUPPORT_UNICODE
1026  if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1027#endif
1028  return cc;
1029
1030  /* Special cases. */
1031  case OP_TYPESTAR:
1032  case OP_TYPEMINSTAR:
1033  case OP_TYPEPLUS:
1034  case OP_TYPEMINPLUS:
1035  case OP_TYPEQUERY:
1036  case OP_TYPEMINQUERY:
1037  case OP_TYPEUPTO:
1038  case OP_TYPEMINUPTO:
1039  case OP_TYPEEXACT:
1040  case OP_TYPEPOSSTAR:
1041  case OP_TYPEPOSPLUS:
1042  case OP_TYPEPOSQUERY:
1043  case OP_TYPEPOSUPTO:
1044  return cc + PRIV(OP_lengths)[*cc] - 1;
1045
1046  case OP_ANYBYTE:
1047#ifdef SUPPORT_UNICODE
1048  if (common->utf) return NULL;
1049#endif
1050  return cc + 1;
1051
1052  case OP_CALLOUT_STR:
1053  return cc + GET(cc, 1 + 2*LINK_SIZE);
1054
1055#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1056  case OP_XCLASS:
1057  return cc + GET(cc, 1);
1058#endif
1059
1060  case OP_MARK:
1061  case OP_COMMIT_ARG:
1062  case OP_PRUNE_ARG:
1063  case OP_SKIP_ARG:
1064  case OP_THEN_ARG:
1065  return cc + 1 + 2 + cc[1];
1066
1067  default:
1068  SLJIT_UNREACHABLE();
1069  return NULL;
1070  }
1071}
1072
1073static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1074{
1075int count;
1076PCRE2_SPTR slot;
1077PCRE2_SPTR assert_back_end = cc - 1;
1078PCRE2_SPTR assert_na_end = cc - 1;
1079
1080/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1081while (cc < ccend)
1082  {
1083  switch(*cc)
1084    {
1085    case OP_SET_SOM:
1086    common->has_set_som = TRUE;
1087    common->might_be_empty = TRUE;
1088    cc += 1;
1089    break;
1090
1091    case OP_REFI:
1092#ifdef SUPPORT_UNICODE
1093    if (common->iref_ptr == 0)
1094      {
1095      common->iref_ptr = common->ovector_start;
1096      common->ovector_start += 3 * sizeof(sljit_sw);
1097      }
1098#endif /* SUPPORT_UNICODE */
1099    /* Fall through. */
1100    case OP_REF:
1101    common->optimized_cbracket[GET2(cc, 1)] = 0;
1102    cc += 1 + IMM2_SIZE;
1103    break;
1104
1105    case OP_ASSERT_NA:
1106    case OP_ASSERTBACK_NA:
1107    slot = bracketend(cc);
1108    if (slot > assert_na_end)
1109      assert_na_end = slot;
1110    cc += 1 + LINK_SIZE;
1111    break;
1112
1113    case OP_CBRAPOS:
1114    case OP_SCBRAPOS:
1115    common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1116    cc += 1 + LINK_SIZE + IMM2_SIZE;
1117    break;
1118
1119    case OP_COND:
1120    case OP_SCOND:
1121    /* Only AUTO_CALLOUT can insert this opcode. We do
1122       not intend to support this case. */
1123    if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1124      return FALSE;
1125    cc += 1 + LINK_SIZE;
1126    break;
1127
1128    case OP_CREF:
1129    common->optimized_cbracket[GET2(cc, 1)] = 0;
1130    cc += 1 + IMM2_SIZE;
1131    break;
1132
1133    case OP_DNREF:
1134    case OP_DNREFI:
1135    case OP_DNCREF:
1136    count = GET2(cc, 1 + IMM2_SIZE);
1137    slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1138    while (count-- > 0)
1139      {
1140      common->optimized_cbracket[GET2(slot, 0)] = 0;
1141      slot += common->name_entry_size;
1142      }
1143    cc += 1 + 2 * IMM2_SIZE;
1144    break;
1145
1146    case OP_RECURSE:
1147    /* Set its value only once. */
1148    if (common->recursive_head_ptr == 0)
1149      {
1150      common->recursive_head_ptr = common->ovector_start;
1151      common->ovector_start += sizeof(sljit_sw);
1152      }
1153    cc += 1 + LINK_SIZE;
1154    break;
1155
1156    case OP_CALLOUT:
1157    case OP_CALLOUT_STR:
1158    if (common->capture_last_ptr == 0)
1159      {
1160      common->capture_last_ptr = common->ovector_start;
1161      common->ovector_start += sizeof(sljit_sw);
1162      }
1163    cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1164    break;
1165
1166    case OP_ASSERTBACK:
1167    slot = bracketend(cc);
1168    if (slot > assert_back_end)
1169      assert_back_end = slot;
1170    cc += 1 + LINK_SIZE;
1171    break;
1172
1173    case OP_THEN_ARG:
1174    common->has_then = TRUE;
1175    common->control_head_ptr = 1;
1176    /* Fall through. */
1177
1178    case OP_COMMIT_ARG:
1179    case OP_PRUNE_ARG:
1180    if (cc < assert_na_end)
1181      return FALSE;
1182    /* Fall through */
1183    case OP_MARK:
1184    if (common->mark_ptr == 0)
1185      {
1186      common->mark_ptr = common->ovector_start;
1187      common->ovector_start += sizeof(sljit_sw);
1188      }
1189    cc += 1 + 2 + cc[1];
1190    break;
1191
1192    case OP_THEN:
1193    common->has_then = TRUE;
1194    common->control_head_ptr = 1;
1195    cc += 1;
1196    break;
1197
1198    case OP_SKIP:
1199    if (cc < assert_back_end)
1200      common->has_skip_in_assert_back = TRUE;
1201    if (cc < assert_na_end)
1202      return FALSE;
1203    cc += 1;
1204    break;
1205
1206    case OP_SKIP_ARG:
1207    common->control_head_ptr = 1;
1208    common->has_skip_arg = TRUE;
1209    if (cc < assert_back_end)
1210      common->has_skip_in_assert_back = TRUE;
1211    if (cc < assert_na_end)
1212      return FALSE;
1213    cc += 1 + 2 + cc[1];
1214    break;
1215
1216    case OP_PRUNE:
1217    case OP_COMMIT:
1218    case OP_ASSERT_ACCEPT:
1219    if (cc < assert_na_end)
1220      return FALSE;
1221    cc++;
1222    break;
1223
1224    default:
1225    cc = next_opcode(common, cc);
1226    if (cc == NULL)
1227      return FALSE;
1228    break;
1229    }
1230  }
1231return TRUE;
1232}
1233
1234#define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1235
1236/*
1237start:
1238  0 - skip / early fail allowed
1239  1 - only early fail with range allowed
1240  >1 - (start - 1) early fail is processed
1241
1242return: current number of iterators enhanced with fast fail
1243*/
1244static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1245   sljit_s32 depth, int start, BOOL fast_forward_allowed)
1246{
1247PCRE2_SPTR begin = cc;
1248PCRE2_SPTR next_alt;
1249PCRE2_SPTR end;
1250PCRE2_SPTR accelerated_start;
1251BOOL prev_fast_forward_allowed;
1252int result = 0;
1253int count;
1254
1255SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1256SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1257SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1258
1259next_alt = cc + GET(cc, 1);
1260if (*next_alt == OP_ALT)
1261  fast_forward_allowed = FALSE;
1262
1263do
1264  {
1265  count = start;
1266  cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1267
1268  while (TRUE)
1269    {
1270    accelerated_start = NULL;
1271
1272    switch(*cc)
1273      {
1274      case OP_SOD:
1275      case OP_SOM:
1276      case OP_SET_SOM:
1277      case OP_NOT_WORD_BOUNDARY:
1278      case OP_WORD_BOUNDARY:
1279      case OP_EODN:
1280      case OP_EOD:
1281      case OP_CIRC:
1282      case OP_CIRCM:
1283      case OP_DOLL:
1284      case OP_DOLLM:
1285      /* Zero width assertions. */
1286      cc++;
1287      continue;
1288
1289      case OP_NOT_DIGIT:
1290      case OP_DIGIT:
1291      case OP_NOT_WHITESPACE:
1292      case OP_WHITESPACE:
1293      case OP_NOT_WORDCHAR:
1294      case OP_WORDCHAR:
1295      case OP_ANY:
1296      case OP_ALLANY:
1297      case OP_ANYBYTE:
1298      case OP_NOT_HSPACE:
1299      case OP_HSPACE:
1300      case OP_NOT_VSPACE:
1301      case OP_VSPACE:
1302      fast_forward_allowed = FALSE;
1303      cc++;
1304      continue;
1305
1306      case OP_ANYNL:
1307      case OP_EXTUNI:
1308      fast_forward_allowed = FALSE;
1309      if (count == 0)
1310        count = 1;
1311      cc++;
1312      continue;
1313
1314      case OP_NOTPROP:
1315      case OP_PROP:
1316      fast_forward_allowed = FALSE;
1317      cc += 1 + 2;
1318      continue;
1319
1320      case OP_CHAR:
1321      case OP_CHARI:
1322      case OP_NOT:
1323      case OP_NOTI:
1324      fast_forward_allowed = FALSE;
1325      cc += 2;
1326#ifdef SUPPORT_UNICODE
1327      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1328#endif
1329      continue;
1330
1331      case OP_TYPESTAR:
1332      case OP_TYPEMINSTAR:
1333      case OP_TYPEPLUS:
1334      case OP_TYPEMINPLUS:
1335      case OP_TYPEPOSSTAR:
1336      case OP_TYPEPOSPLUS:
1337      /* The type or prop opcode is skipped in the next iteration. */
1338      cc += 1;
1339
1340      if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1341        {
1342        accelerated_start = cc - 1;
1343        break;
1344        }
1345
1346      if (count == 0)
1347        count = 1;
1348      fast_forward_allowed = FALSE;
1349      continue;
1350
1351      case OP_TYPEUPTO:
1352      case OP_TYPEMINUPTO:
1353      case OP_TYPEEXACT:
1354      case OP_TYPEPOSUPTO:
1355      cc += IMM2_SIZE;
1356      /* Fall through */
1357
1358      case OP_TYPEQUERY:
1359      case OP_TYPEMINQUERY:
1360      case OP_TYPEPOSQUERY:
1361      /* The type or prop opcode is skipped in the next iteration. */
1362      fast_forward_allowed = FALSE;
1363      if (count == 0)
1364        count = 1;
1365      cc += 1;
1366      continue;
1367
1368      case OP_STAR:
1369      case OP_MINSTAR:
1370      case OP_PLUS:
1371      case OP_MINPLUS:
1372      case OP_POSSTAR:
1373      case OP_POSPLUS:
1374
1375      case OP_STARI:
1376      case OP_MINSTARI:
1377      case OP_PLUSI:
1378      case OP_MINPLUSI:
1379      case OP_POSSTARI:
1380      case OP_POSPLUSI:
1381
1382      case OP_NOTSTAR:
1383      case OP_NOTMINSTAR:
1384      case OP_NOTPLUS:
1385      case OP_NOTMINPLUS:
1386      case OP_NOTPOSSTAR:
1387      case OP_NOTPOSPLUS:
1388
1389      case OP_NOTSTARI:
1390      case OP_NOTMINSTARI:
1391      case OP_NOTPLUSI:
1392      case OP_NOTMINPLUSI:
1393      case OP_NOTPOSSTARI:
1394      case OP_NOTPOSPLUSI:
1395      accelerated_start = cc;
1396      cc += 2;
1397#ifdef SUPPORT_UNICODE
1398      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1399#endif
1400      break;
1401
1402      case OP_UPTO:
1403      case OP_MINUPTO:
1404      case OP_EXACT:
1405      case OP_POSUPTO:
1406      case OP_UPTOI:
1407      case OP_MINUPTOI:
1408      case OP_EXACTI:
1409      case OP_POSUPTOI:
1410      case OP_NOTUPTO:
1411      case OP_NOTMINUPTO:
1412      case OP_NOTEXACT:
1413      case OP_NOTPOSUPTO:
1414      case OP_NOTUPTOI:
1415      case OP_NOTMINUPTOI:
1416      case OP_NOTEXACTI:
1417      case OP_NOTPOSUPTOI:
1418      cc += IMM2_SIZE;
1419      /* Fall through */
1420
1421      case OP_QUERY:
1422      case OP_MINQUERY:
1423      case OP_POSQUERY:
1424      case OP_QUERYI:
1425      case OP_MINQUERYI:
1426      case OP_POSQUERYI:
1427      case OP_NOTQUERY:
1428      case OP_NOTMINQUERY:
1429      case OP_NOTPOSQUERY:
1430      case OP_NOTQUERYI:
1431      case OP_NOTMINQUERYI:
1432      case OP_NOTPOSQUERYI:
1433      fast_forward_allowed = FALSE;
1434      if (count == 0)
1435        count = 1;
1436      cc += 2;
1437#ifdef SUPPORT_UNICODE
1438      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1439#endif
1440      continue;
1441
1442      case OP_CLASS:
1443      case OP_NCLASS:
1444#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1445      case OP_XCLASS:
1446      accelerated_start = cc;
1447      cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1448#else
1449      accelerated_start = cc;
1450      cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1451#endif
1452
1453      switch (*cc)
1454        {
1455        case OP_CRSTAR:
1456        case OP_CRMINSTAR:
1457        case OP_CRPLUS:
1458        case OP_CRMINPLUS:
1459        case OP_CRPOSSTAR:
1460        case OP_CRPOSPLUS:
1461        cc++;
1462        break;
1463
1464        case OP_CRRANGE:
1465        case OP_CRMINRANGE:
1466        case OP_CRPOSRANGE:
1467        cc += 2 * IMM2_SIZE;
1468        /* Fall through */
1469        case OP_CRQUERY:
1470        case OP_CRMINQUERY:
1471        case OP_CRPOSQUERY:
1472        cc++;
1473        if (count == 0)
1474          count = 1;
1475        /* Fall through */
1476        default:
1477        accelerated_start = NULL;
1478        fast_forward_allowed = FALSE;
1479        continue;
1480        }
1481      break;
1482
1483      case OP_ONCE:
1484      case OP_BRA:
1485      case OP_CBRA:
1486      end = cc + GET(cc, 1);
1487
1488      prev_fast_forward_allowed = fast_forward_allowed;
1489      fast_forward_allowed = FALSE;
1490      if (depth >= 4)
1491        break;
1492
1493      end = bracketend(cc) - (1 + LINK_SIZE);
1494      if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1495        break;
1496
1497      count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1498
1499      if (PRIVATE_DATA(cc) != 0)
1500        common->private_data_ptrs[begin - common->start] = 1;
1501
1502      if (count < EARLY_FAIL_ENHANCE_MAX)
1503        {
1504        cc = end + (1 + LINK_SIZE);
1505        continue;
1506        }
1507      break;
1508
1509      case OP_KET:
1510      SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1511      if (cc >= next_alt)
1512        break;
1513      cc += 1 + LINK_SIZE;
1514      continue;
1515      }
1516
1517    if (accelerated_start != NULL)
1518      {
1519      if (count == 0)
1520        {
1521        count++;
1522
1523        if (fast_forward_allowed)
1524          {
1525          common->fast_forward_bc_ptr = accelerated_start;
1526          common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1527          *private_data_start += sizeof(sljit_sw);
1528          }
1529        else
1530          {
1531          common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1532
1533          if (common->early_fail_start_ptr == 0)
1534            common->early_fail_start_ptr = *private_data_start;
1535
1536          *private_data_start += sizeof(sljit_sw);
1537          common->early_fail_end_ptr = *private_data_start;
1538
1539          if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1540            return EARLY_FAIL_ENHANCE_MAX;
1541          }
1542        }
1543      else
1544        {
1545        common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1546
1547        if (common->early_fail_start_ptr == 0)
1548          common->early_fail_start_ptr = *private_data_start;
1549
1550        *private_data_start += 2 * sizeof(sljit_sw);
1551        common->early_fail_end_ptr = *private_data_start;
1552
1553        if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1554          return EARLY_FAIL_ENHANCE_MAX;
1555        }
1556
1557      /* Cannot be part of a repeat. */
1558      common->private_data_ptrs[begin - common->start] = 1;
1559      count++;
1560
1561      if (count < EARLY_FAIL_ENHANCE_MAX)
1562        continue;
1563      }
1564
1565    break;
1566    }
1567
1568  if (*cc != OP_ALT && *cc != OP_KET)
1569    result = EARLY_FAIL_ENHANCE_MAX;
1570  else if (result < count)
1571    result = count;
1572
1573  cc = next_alt;
1574  next_alt = cc + GET(cc, 1);
1575  }
1576while (*cc == OP_ALT);
1577
1578return result;
1579}
1580
1581static int get_class_iterator_size(PCRE2_SPTR cc)
1582{
1583sljit_u32 min;
1584sljit_u32 max;
1585switch(*cc)
1586  {
1587  case OP_CRSTAR:
1588  case OP_CRPLUS:
1589  return 2;
1590
1591  case OP_CRMINSTAR:
1592  case OP_CRMINPLUS:
1593  case OP_CRQUERY:
1594  case OP_CRMINQUERY:
1595  return 1;
1596
1597  case OP_CRRANGE:
1598  case OP_CRMINRANGE:
1599  min = GET2(cc, 1);
1600  max = GET2(cc, 1 + IMM2_SIZE);
1601  if (max == 0)
1602    return (*cc == OP_CRRANGE) ? 2 : 1;
1603  max -= min;
1604  if (max > 2)
1605    max = 2;
1606  return max;
1607
1608  default:
1609  return 0;
1610  }
1611}
1612
1613static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1614{
1615PCRE2_SPTR end = bracketend(begin);
1616PCRE2_SPTR next;
1617PCRE2_SPTR next_end;
1618PCRE2_SPTR max_end;
1619PCRE2_UCHAR type;
1620sljit_sw length = end - begin;
1621sljit_s32 min, max, i;
1622
1623/* Detect fixed iterations first. */
1624if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1625  return FALSE;
1626
1627/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1628 * Skip the check of the second part. */
1629if (PRIVATE_DATA(end - LINK_SIZE) != 0)
1630  return TRUE;
1631
1632next = end;
1633min = 1;
1634while (1)
1635  {
1636  if (*next != *begin)
1637    break;
1638  next_end = bracketend(next);
1639  if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1640    break;
1641  next = next_end;
1642  min++;
1643  }
1644
1645if (min == 2)
1646  return FALSE;
1647
1648max = 0;
1649max_end = next;
1650if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1651  {
1652  type = *next;
1653  while (1)
1654    {
1655    if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1656      break;
1657    next_end = bracketend(next + 2 + LINK_SIZE);
1658    if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1659      break;
1660    next = next_end;
1661    max++;
1662    }
1663
1664  if (next[0] == type && next[1] == *begin && max >= 1)
1665    {
1666    next_end = bracketend(next + 1);
1667    if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1668      {
1669      for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1670        if (*next_end != OP_KET)
1671          break;
1672
1673      if (i == max)
1674        {
1675        common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1676        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1677        /* +2 the original and the last. */
1678        common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1679        if (min == 1)
1680          return TRUE;
1681        min--;
1682        max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1683        }
1684      }
1685    }
1686  }
1687
1688if (min >= 3)
1689  {
1690  common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1691  common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1692  common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1693  return TRUE;
1694  }
1695
1696return FALSE;
1697}
1698
1699#define CASE_ITERATOR_PRIVATE_DATA_1 \
1700    case OP_MINSTAR: \
1701    case OP_MINPLUS: \
1702    case OP_QUERY: \
1703    case OP_MINQUERY: \
1704    case OP_MINSTARI: \
1705    case OP_MINPLUSI: \
1706    case OP_QUERYI: \
1707    case OP_MINQUERYI: \
1708    case OP_NOTMINSTAR: \
1709    case OP_NOTMINPLUS: \
1710    case OP_NOTQUERY: \
1711    case OP_NOTMINQUERY: \
1712    case OP_NOTMINSTARI: \
1713    case OP_NOTMINPLUSI: \
1714    case OP_NOTQUERYI: \
1715    case OP_NOTMINQUERYI:
1716
1717#define CASE_ITERATOR_PRIVATE_DATA_2A \
1718    case OP_STAR: \
1719    case OP_PLUS: \
1720    case OP_STARI: \
1721    case OP_PLUSI: \
1722    case OP_NOTSTAR: \
1723    case OP_NOTPLUS: \
1724    case OP_NOTSTARI: \
1725    case OP_NOTPLUSI:
1726
1727#define CASE_ITERATOR_PRIVATE_DATA_2B \
1728    case OP_UPTO: \
1729    case OP_MINUPTO: \
1730    case OP_UPTOI: \
1731    case OP_MINUPTOI: \
1732    case OP_NOTUPTO: \
1733    case OP_NOTMINUPTO: \
1734    case OP_NOTUPTOI: \
1735    case OP_NOTMINUPTOI:
1736
1737#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1738    case OP_TYPEMINSTAR: \
1739    case OP_TYPEMINPLUS: \
1740    case OP_TYPEQUERY: \
1741    case OP_TYPEMINQUERY:
1742
1743#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1744    case OP_TYPESTAR: \
1745    case OP_TYPEPLUS:
1746
1747#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1748    case OP_TYPEUPTO: \
1749    case OP_TYPEMINUPTO:
1750
1751static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1752{
1753PCRE2_SPTR cc = common->start;
1754PCRE2_SPTR alternative;
1755PCRE2_SPTR end = NULL;
1756int private_data_ptr = *private_data_start;
1757int space, size, bracketlen;
1758BOOL repeat_check = TRUE;
1759
1760while (cc < ccend)
1761  {
1762  space = 0;
1763  size = 0;
1764  bracketlen = 0;
1765  if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1766    break;
1767
1768  /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1769  if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1770    {
1771    if (detect_repeat(common, cc))
1772      {
1773      /* These brackets are converted to repeats, so no global
1774      based single character repeat is allowed. */
1775      if (cc >= end)
1776        end = bracketend(cc);
1777      }
1778    }
1779  repeat_check = TRUE;
1780
1781  switch(*cc)
1782    {
1783    case OP_KET:
1784    if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1785      {
1786      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1787      private_data_ptr += sizeof(sljit_sw);
1788      cc += common->private_data_ptrs[cc + 1 - common->start];
1789      }
1790    cc += 1 + LINK_SIZE;
1791    break;
1792
1793    case OP_ASSERT:
1794    case OP_ASSERT_NOT:
1795    case OP_ASSERTBACK:
1796    case OP_ASSERTBACK_NOT:
1797    case OP_ASSERT_NA:
1798    case OP_ASSERTBACK_NA:
1799    case OP_ONCE:
1800    case OP_SCRIPT_RUN:
1801    case OP_BRAPOS:
1802    case OP_SBRA:
1803    case OP_SBRAPOS:
1804    case OP_SCOND:
1805    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1806    private_data_ptr += sizeof(sljit_sw);
1807    bracketlen = 1 + LINK_SIZE;
1808    break;
1809
1810    case OP_CBRAPOS:
1811    case OP_SCBRAPOS:
1812    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1813    private_data_ptr += sizeof(sljit_sw);
1814    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1815    break;
1816
1817    case OP_COND:
1818    /* Might be a hidden SCOND. */
1819    common->private_data_ptrs[cc - common->start] = 0;
1820    alternative = cc + GET(cc, 1);
1821    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1822      {
1823      common->private_data_ptrs[cc - common->start] = private_data_ptr;
1824      private_data_ptr += sizeof(sljit_sw);
1825      }
1826    bracketlen = 1 + LINK_SIZE;
1827    break;
1828
1829    case OP_BRA:
1830    bracketlen = 1 + LINK_SIZE;
1831    break;
1832
1833    case OP_CBRA:
1834    case OP_SCBRA:
1835    bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1836    break;
1837
1838    case OP_BRAZERO:
1839    case OP_BRAMINZERO:
1840    case OP_BRAPOSZERO:
1841    size = 1;
1842    repeat_check = FALSE;
1843    break;
1844
1845    CASE_ITERATOR_PRIVATE_DATA_1
1846    size = -2;
1847    space = 1;
1848    break;
1849
1850    CASE_ITERATOR_PRIVATE_DATA_2A
1851    size = -2;
1852    space = 2;
1853    break;
1854
1855    CASE_ITERATOR_PRIVATE_DATA_2B
1856    size = -(2 + IMM2_SIZE);
1857    space = 2;
1858    break;
1859
1860    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1861    size = 1;
1862    space = 1;
1863    break;
1864
1865    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1866    size = 1;
1867    if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1868      space = 2;
1869    break;
1870
1871    case OP_TYPEUPTO:
1872    size = 1 + IMM2_SIZE;
1873    if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1874      space = 2;
1875    break;
1876
1877    case OP_TYPEMINUPTO:
1878    size = 1 + IMM2_SIZE;
1879    space = 2;
1880    break;
1881
1882    case OP_CLASS:
1883    case OP_NCLASS:
1884    size = 1 + 32 / sizeof(PCRE2_UCHAR);
1885    space = get_class_iterator_size(cc + size);
1886    break;
1887
1888#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1889    case OP_XCLASS:
1890    size = GET(cc, 1);
1891    space = get_class_iterator_size(cc + size);
1892    break;
1893#endif
1894
1895    default:
1896    cc = next_opcode(common, cc);
1897    SLJIT_ASSERT(cc != NULL);
1898    break;
1899    }
1900
1901  /* Character iterators, which are not inside a repeated bracket,
1902     gets a private slot instead of allocating it on the stack. */
1903  if (space > 0 && cc >= end)
1904    {
1905    common->private_data_ptrs[cc - common->start] = private_data_ptr;
1906    private_data_ptr += sizeof(sljit_sw) * space;
1907    }
1908
1909  if (size != 0)
1910    {
1911    if (size < 0)
1912      {
1913      cc += -size;
1914#ifdef SUPPORT_UNICODE
1915      if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1916#endif
1917      }
1918    else
1919      cc += size;
1920    }
1921
1922  if (bracketlen > 0)
1923    {
1924    if (cc >= end)
1925      {
1926      end = bracketend(cc);
1927      if (end[-1 - LINK_SIZE] == OP_KET)
1928        end = NULL;
1929      }
1930    cc += bracketlen;
1931    }
1932  }
1933*private_data_start = private_data_ptr;
1934}
1935
1936/* Returns with a frame_types (always < 0) if no need for frame. */
1937static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1938{
1939int length = 0;
1940int possessive = 0;
1941BOOL stack_restore = FALSE;
1942BOOL setsom_found = recursive;
1943BOOL setmark_found = recursive;
1944/* The last capture is a local variable even for recursions. */
1945BOOL capture_last_found = FALSE;
1946
1947#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1948SLJIT_ASSERT(common->control_head_ptr != 0);
1949*needs_control_head = TRUE;
1950#else
1951*needs_control_head = FALSE;
1952#endif
1953
1954if (ccend == NULL)
1955  {
1956  ccend = bracketend(cc) - (1 + LINK_SIZE);
1957  if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1958    {
1959    possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1960    /* This is correct regardless of common->capture_last_ptr. */
1961    capture_last_found = TRUE;
1962    }
1963  cc = next_opcode(common, cc);
1964  }
1965
1966SLJIT_ASSERT(cc != NULL);
1967while (cc < ccend)
1968  switch(*cc)
1969    {
1970    case OP_SET_SOM:
1971    SLJIT_ASSERT(common->has_set_som);
1972    stack_restore = TRUE;
1973    if (!setsom_found)
1974      {
1975      length += 2;
1976      setsom_found = TRUE;
1977      }
1978    cc += 1;
1979    break;
1980
1981    case OP_MARK:
1982    case OP_COMMIT_ARG:
1983    case OP_PRUNE_ARG:
1984    case OP_THEN_ARG:
1985    SLJIT_ASSERT(common->mark_ptr != 0);
1986    stack_restore = TRUE;
1987    if (!setmark_found)
1988      {
1989      length += 2;
1990      setmark_found = TRUE;
1991      }
1992    if (common->control_head_ptr != 0)
1993      *needs_control_head = TRUE;
1994    cc += 1 + 2 + cc[1];
1995    break;
1996
1997    case OP_RECURSE:
1998    stack_restore = TRUE;
1999    if (common->has_set_som && !setsom_found)
2000      {
2001      length += 2;
2002      setsom_found = TRUE;
2003      }
2004    if (common->mark_ptr != 0 && !setmark_found)
2005      {
2006      length += 2;
2007      setmark_found = TRUE;
2008      }
2009    if (common->capture_last_ptr != 0 && !capture_last_found)
2010      {
2011      length += 2;
2012      capture_last_found = TRUE;
2013      }
2014    cc += 1 + LINK_SIZE;
2015    break;
2016
2017    case OP_CBRA:
2018    case OP_CBRAPOS:
2019    case OP_SCBRA:
2020    case OP_SCBRAPOS:
2021    stack_restore = TRUE;
2022    if (common->capture_last_ptr != 0 && !capture_last_found)
2023      {
2024      length += 2;
2025      capture_last_found = TRUE;
2026      }
2027    length += 3;
2028    cc += 1 + LINK_SIZE + IMM2_SIZE;
2029    break;
2030
2031    case OP_THEN:
2032    stack_restore = TRUE;
2033    if (common->control_head_ptr != 0)
2034      *needs_control_head = TRUE;
2035    cc ++;
2036    break;
2037
2038    default:
2039    stack_restore = TRUE;
2040    /* Fall through. */
2041
2042    case OP_NOT_WORD_BOUNDARY:
2043    case OP_WORD_BOUNDARY:
2044    case OP_NOT_DIGIT:
2045    case OP_DIGIT:
2046    case OP_NOT_WHITESPACE:
2047    case OP_WHITESPACE:
2048    case OP_NOT_WORDCHAR:
2049    case OP_WORDCHAR:
2050    case OP_ANY:
2051    case OP_ALLANY:
2052    case OP_ANYBYTE:
2053    case OP_NOTPROP:
2054    case OP_PROP:
2055    case OP_ANYNL:
2056    case OP_NOT_HSPACE:
2057    case OP_HSPACE:
2058    case OP_NOT_VSPACE:
2059    case OP_VSPACE:
2060    case OP_EXTUNI:
2061    case OP_EODN:
2062    case OP_EOD:
2063    case OP_CIRC:
2064    case OP_CIRCM:
2065    case OP_DOLL:
2066    case OP_DOLLM:
2067    case OP_CHAR:
2068    case OP_CHARI:
2069    case OP_NOT:
2070    case OP_NOTI:
2071
2072    case OP_EXACT:
2073    case OP_POSSTAR:
2074    case OP_POSPLUS:
2075    case OP_POSQUERY:
2076    case OP_POSUPTO:
2077
2078    case OP_EXACTI:
2079    case OP_POSSTARI:
2080    case OP_POSPLUSI:
2081    case OP_POSQUERYI:
2082    case OP_POSUPTOI:
2083
2084    case OP_NOTEXACT:
2085    case OP_NOTPOSSTAR:
2086    case OP_NOTPOSPLUS:
2087    case OP_NOTPOSQUERY:
2088    case OP_NOTPOSUPTO:
2089
2090    case OP_NOTEXACTI:
2091    case OP_NOTPOSSTARI:
2092    case OP_NOTPOSPLUSI:
2093    case OP_NOTPOSQUERYI:
2094    case OP_NOTPOSUPTOI:
2095
2096    case OP_TYPEEXACT:
2097    case OP_TYPEPOSSTAR:
2098    case OP_TYPEPOSPLUS:
2099    case OP_TYPEPOSQUERY:
2100    case OP_TYPEPOSUPTO:
2101
2102    case OP_CLASS:
2103    case OP_NCLASS:
2104    case OP_XCLASS:
2105
2106    case OP_CALLOUT:
2107    case OP_CALLOUT_STR:
2108
2109    cc = next_opcode(common, cc);
2110    SLJIT_ASSERT(cc != NULL);
2111    break;
2112    }
2113
2114/* Possessive quantifiers can use a special case. */
2115if (SLJIT_UNLIKELY(possessive == length))
2116  return stack_restore ? no_frame : no_stack;
2117
2118if (length > 0)
2119  return length + 1;
2120return stack_restore ? no_frame : no_stack;
2121}
2122
2123static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2124{
2125DEFINE_COMPILER;
2126BOOL setsom_found = FALSE;
2127BOOL setmark_found = FALSE;
2128/* The last capture is a local variable even for recursions. */
2129BOOL capture_last_found = FALSE;
2130int offset;
2131
2132/* >= 1 + shortest item size (2) */
2133SLJIT_UNUSED_ARG(stacktop);
2134SLJIT_ASSERT(stackpos >= stacktop + 2);
2135
2136stackpos = STACK(stackpos);
2137if (ccend == NULL)
2138  {
2139  ccend = bracketend(cc) - (1 + LINK_SIZE);
2140  if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2141    cc = next_opcode(common, cc);
2142  }
2143
2144SLJIT_ASSERT(cc != NULL);
2145while (cc < ccend)
2146  switch(*cc)
2147    {
2148    case OP_SET_SOM:
2149    SLJIT_ASSERT(common->has_set_som);
2150    if (!setsom_found)
2151      {
2152      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2153      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2154      stackpos -= SSIZE_OF(sw);
2155      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2156      stackpos -= SSIZE_OF(sw);
2157      setsom_found = TRUE;
2158      }
2159    cc += 1;
2160    break;
2161
2162    case OP_MARK:
2163    case OP_COMMIT_ARG:
2164    case OP_PRUNE_ARG:
2165    case OP_THEN_ARG:
2166    SLJIT_ASSERT(common->mark_ptr != 0);
2167    if (!setmark_found)
2168      {
2169      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2170      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2171      stackpos -= SSIZE_OF(sw);
2172      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2173      stackpos -= SSIZE_OF(sw);
2174      setmark_found = TRUE;
2175      }
2176    cc += 1 + 2 + cc[1];
2177    break;
2178
2179    case OP_RECURSE:
2180    if (common->has_set_som && !setsom_found)
2181      {
2182      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2183      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2184      stackpos -= SSIZE_OF(sw);
2185      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2186      stackpos -= SSIZE_OF(sw);
2187      setsom_found = TRUE;
2188      }
2189    if (common->mark_ptr != 0 && !setmark_found)
2190      {
2191      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2192      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2193      stackpos -= SSIZE_OF(sw);
2194      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2195      stackpos -= SSIZE_OF(sw);
2196      setmark_found = TRUE;
2197      }
2198    if (common->capture_last_ptr != 0 && !capture_last_found)
2199      {
2200      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2201      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2202      stackpos -= SSIZE_OF(sw);
2203      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2204      stackpos -= SSIZE_OF(sw);
2205      capture_last_found = TRUE;
2206      }
2207    cc += 1 + LINK_SIZE;
2208    break;
2209
2210    case OP_CBRA:
2211    case OP_CBRAPOS:
2212    case OP_SCBRA:
2213    case OP_SCBRAPOS:
2214    if (common->capture_last_ptr != 0 && !capture_last_found)
2215      {
2216      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2217      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2218      stackpos -= SSIZE_OF(sw);
2219      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2220      stackpos -= SSIZE_OF(sw);
2221      capture_last_found = TRUE;
2222      }
2223    offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2224    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2225    stackpos -= SSIZE_OF(sw);
2226    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2227    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2228    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2229    stackpos -= SSIZE_OF(sw);
2230    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2231    stackpos -= SSIZE_OF(sw);
2232
2233    cc += 1 + LINK_SIZE + IMM2_SIZE;
2234    break;
2235
2236    default:
2237    cc = next_opcode(common, cc);
2238    SLJIT_ASSERT(cc != NULL);
2239    break;
2240    }
2241
2242OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2243SLJIT_ASSERT(stackpos == STACK(stacktop));
2244}
2245
2246#define RECURSE_TMP_REG_COUNT 3
2247
2248typedef struct delayed_mem_copy_status {
2249  struct sljit_compiler *compiler;
2250  int store_bases[RECURSE_TMP_REG_COUNT];
2251  int store_offsets[RECURSE_TMP_REG_COUNT];
2252  int tmp_regs[RECURSE_TMP_REG_COUNT];
2253  int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2254  int next_tmp_reg;
2255} delayed_mem_copy_status;
2256
2257static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2258{
2259int i;
2260
2261for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2262  {
2263  SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2264  SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2265
2266  status->store_bases[i] = -1;
2267  }
2268status->next_tmp_reg = 0;
2269status->compiler = common->compiler;
2270}
2271
2272static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2273  int store_base, sljit_sw store_offset)
2274{
2275struct sljit_compiler *compiler = status->compiler;
2276int next_tmp_reg = status->next_tmp_reg;
2277int tmp_reg = status->tmp_regs[next_tmp_reg];
2278
2279SLJIT_ASSERT(load_base > 0 && store_base > 0);
2280
2281if (status->store_bases[next_tmp_reg] == -1)
2282  {
2283  /* Preserve virtual registers. */
2284  if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2285    OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2286  }
2287else
2288  OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2289
2290OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2291status->store_bases[next_tmp_reg] = store_base;
2292status->store_offsets[next_tmp_reg] = store_offset;
2293
2294status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2295}
2296
2297static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2298{
2299struct sljit_compiler *compiler = status->compiler;
2300int next_tmp_reg = status->next_tmp_reg;
2301int tmp_reg, saved_tmp_reg, i;
2302
2303for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2304  {
2305  if (status->store_bases[next_tmp_reg] != -1)
2306    {
2307    tmp_reg = status->tmp_regs[next_tmp_reg];
2308    saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2309
2310    OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2311
2312    /* Restore virtual registers. */
2313    if (sljit_get_register_index(saved_tmp_reg) < 0)
2314      OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2315    }
2316
2317  next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2318  }
2319}
2320
2321#undef RECURSE_TMP_REG_COUNT
2322
2323static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2324{
2325uint8_t *byte;
2326uint8_t mask;
2327
2328SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2329
2330bit_index >>= SLJIT_WORD_SHIFT;
2331
2332SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2333
2334mask = 1 << (bit_index & 0x7);
2335byte = common->recurse_bitset + (bit_index >> 3);
2336
2337if (*byte & mask)
2338  return FALSE;
2339
2340*byte |= mask;
2341return TRUE;
2342}
2343
2344enum get_recurse_flags {
2345  recurse_flag_quit_found = (1 << 0),
2346  recurse_flag_accept_found = (1 << 1),
2347  recurse_flag_setsom_found = (1 << 2),
2348  recurse_flag_setmark_found = (1 << 3),
2349  recurse_flag_control_head_found = (1 << 4),
2350};
2351
2352static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
2353{
2354int length = 1;
2355int size, offset;
2356PCRE2_SPTR alternative;
2357uint32_t recurse_flags = 0;
2358
2359memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2360
2361#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2362SLJIT_ASSERT(common->control_head_ptr != 0);
2363recurse_flags |= recurse_flag_control_head_found;
2364#endif
2365
2366/* Calculate the sum of the private machine words. */
2367while (cc < ccend)
2368  {
2369  size = 0;
2370  switch(*cc)
2371    {
2372    case OP_SET_SOM:
2373    SLJIT_ASSERT(common->has_set_som);
2374    recurse_flags |= recurse_flag_setsom_found;
2375    cc += 1;
2376    break;
2377
2378    case OP_RECURSE:
2379    if (common->has_set_som)
2380      recurse_flags |= recurse_flag_setsom_found;
2381    if (common->mark_ptr != 0)
2382      recurse_flags |= recurse_flag_setmark_found;
2383    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2384      length++;
2385    cc += 1 + LINK_SIZE;
2386    break;
2387
2388    case OP_KET:
2389    offset = PRIVATE_DATA(cc);
2390    if (offset != 0)
2391      {
2392      if (recurse_check_bit(common, offset))
2393        length++;
2394      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2395      cc += PRIVATE_DATA(cc + 1);
2396      }
2397    cc += 1 + LINK_SIZE;
2398    break;
2399
2400    case OP_ASSERT:
2401    case OP_ASSERT_NOT:
2402    case OP_ASSERTBACK:
2403    case OP_ASSERTBACK_NOT:
2404    case OP_ASSERT_NA:
2405    case OP_ASSERTBACK_NA:
2406    case OP_ONCE:
2407    case OP_SCRIPT_RUN:
2408    case OP_BRAPOS:
2409    case OP_SBRA:
2410    case OP_SBRAPOS:
2411    case OP_SCOND:
2412    SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2413    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2414      length++;
2415    cc += 1 + LINK_SIZE;
2416    break;
2417
2418    case OP_CBRA:
2419    case OP_SCBRA:
2420    offset = GET2(cc, 1 + LINK_SIZE);
2421    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2422      {
2423      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2424      length += 2;
2425      }
2426    if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2427      length++;
2428    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2429      length++;
2430    cc += 1 + LINK_SIZE + IMM2_SIZE;
2431    break;
2432
2433    case OP_CBRAPOS:
2434    case OP_SCBRAPOS:
2435    offset = GET2(cc, 1 + LINK_SIZE);
2436    if (recurse_check_bit(common, OVECTOR(offset << 1)))
2437      {
2438      SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2439      length += 2;
2440      }
2441    if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2442      length++;
2443    if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2444      length++;
2445    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2446      length++;
2447    cc += 1 + LINK_SIZE + IMM2_SIZE;
2448    break;
2449
2450    case OP_COND:
2451    /* Might be a hidden SCOND. */
2452    alternative = cc + GET(cc, 1);
2453    if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
2454      length++;
2455    cc += 1 + LINK_SIZE;
2456    break;
2457
2458    CASE_ITERATOR_PRIVATE_DATA_1
2459    offset = PRIVATE_DATA(cc);
2460    if (offset != 0 && recurse_check_bit(common, offset))
2461      length++;
2462    cc += 2;
2463#ifdef SUPPORT_UNICODE
2464    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2465#endif
2466    break;
2467
2468    CASE_ITERATOR_PRIVATE_DATA_2A
2469    offset = PRIVATE_DATA(cc);
2470    if (offset != 0 && recurse_check_bit(common, offset))
2471      {
2472      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2473      length += 2;
2474      }
2475    cc += 2;
2476#ifdef SUPPORT_UNICODE
2477    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2478#endif
2479    break;
2480
2481    CASE_ITERATOR_PRIVATE_DATA_2B
2482    offset = PRIVATE_DATA(cc);
2483    if (offset != 0 && recurse_check_bit(common, offset))
2484      {
2485      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2486      length += 2;
2487      }
2488    cc += 2 + IMM2_SIZE;
2489#ifdef SUPPORT_UNICODE
2490    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2491#endif
2492    break;
2493
2494    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2495    offset = PRIVATE_DATA(cc);
2496    if (offset != 0 && recurse_check_bit(common, offset))
2497      length++;
2498    cc += 1;
2499    break;
2500
2501    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2502    offset = PRIVATE_DATA(cc);
2503    if (offset != 0 && recurse_check_bit(common, offset))
2504      {
2505      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2506      length += 2;
2507      }
2508    cc += 1;
2509    break;
2510
2511    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2512    offset = PRIVATE_DATA(cc);
2513    if (offset != 0 && recurse_check_bit(common, offset))
2514      {
2515      SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
2516      length += 2;
2517      }
2518    cc += 1 + IMM2_SIZE;
2519    break;
2520
2521    case OP_CLASS:
2522    case OP_NCLASS:
2523#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2524    case OP_XCLASS:
2525    size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2526#else
2527    size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2528#endif
2529
2530    offset = PRIVATE_DATA(cc);
2531    if (offset != 0 && recurse_check_bit(common, offset))
2532      length += get_class_iterator_size(cc + size);
2533    cc += size;
2534    break;
2535
2536    case OP_MARK:
2537    case OP_COMMIT_ARG:
2538    case OP_PRUNE_ARG:
2539    case OP_THEN_ARG:
2540    SLJIT_ASSERT(common->mark_ptr != 0);
2541    recurse_flags |= recurse_flag_setmark_found;
2542    if (common->control_head_ptr != 0)
2543      recurse_flags |= recurse_flag_control_head_found;
2544    if (*cc != OP_MARK)
2545      recurse_flags |= recurse_flag_quit_found;
2546
2547    cc += 1 + 2 + cc[1];
2548    break;
2549
2550    case OP_PRUNE:
2551    case OP_SKIP:
2552    case OP_COMMIT:
2553    recurse_flags |= recurse_flag_quit_found;
2554    cc++;
2555    break;
2556
2557    case OP_SKIP_ARG:
2558    recurse_flags |= recurse_flag_quit_found;
2559    cc += 1 + 2 + cc[1];
2560    break;
2561
2562    case OP_THEN:
2563    SLJIT_ASSERT(common->control_head_ptr != 0);
2564    recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
2565    cc++;
2566    break;
2567
2568    case OP_ACCEPT:
2569    case OP_ASSERT_ACCEPT:
2570    recurse_flags |= recurse_flag_accept_found;
2571    cc++;
2572    break;
2573
2574    default:
2575    cc = next_opcode(common, cc);
2576    SLJIT_ASSERT(cc != NULL);
2577    break;
2578    }
2579  }
2580SLJIT_ASSERT(cc == ccend);
2581
2582if (recurse_flags & recurse_flag_control_head_found)
2583  length++;
2584if (recurse_flags & recurse_flag_quit_found)
2585  {
2586  if (recurse_flags & recurse_flag_setsom_found)
2587    length++;
2588  if (recurse_flags & recurse_flag_setmark_found)
2589    length++;
2590  }
2591
2592*result_flags = recurse_flags;
2593return length;
2594}
2595
2596enum copy_recurse_data_types {
2597  recurse_copy_from_global,
2598  recurse_copy_private_to_global,
2599  recurse_copy_shared_to_global,
2600  recurse_copy_kept_shared_to_global,
2601  recurse_swap_global
2602};
2603
2604static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2605  int type, int stackptr, int stacktop, uint32_t recurse_flags)
2606{
2607delayed_mem_copy_status status;
2608PCRE2_SPTR alternative;
2609sljit_sw private_srcw[2];
2610sljit_sw shared_srcw[3];
2611sljit_sw kept_shared_srcw[2];
2612int private_count, shared_count, kept_shared_count;
2613int from_sp, base_reg, offset, i;
2614
2615memset(common->recurse_bitset, 0, common->recurse_bitset_size);
2616
2617#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2618SLJIT_ASSERT(common->control_head_ptr != 0);
2619recurse_check_bit(common, common->control_head_ptr);
2620#endif
2621
2622switch (type)
2623  {
2624  case recurse_copy_from_global:
2625  from_sp = TRUE;
2626  base_reg = STACK_TOP;
2627  break;
2628
2629  case recurse_copy_private_to_global:
2630  case recurse_copy_shared_to_global:
2631  case recurse_copy_kept_shared_to_global:
2632  from_sp = FALSE;
2633  base_reg = STACK_TOP;
2634  break;
2635
2636  default:
2637  SLJIT_ASSERT(type == recurse_swap_global);
2638  from_sp = FALSE;
2639  base_reg = TMP2;
2640  break;
2641  }
2642
2643stackptr = STACK(stackptr);
2644stacktop = STACK(stacktop);
2645
2646status.tmp_regs[0] = TMP1;
2647status.saved_tmp_regs[0] = TMP1;
2648
2649if (base_reg != TMP2)
2650  {
2651  status.tmp_regs[1] = TMP2;
2652  status.saved_tmp_regs[1] = TMP2;
2653  }
2654else
2655  {
2656  status.saved_tmp_regs[1] = RETURN_ADDR;
2657  if (HAS_VIRTUAL_REGISTERS)
2658    status.tmp_regs[1] = STR_PTR;
2659  else
2660    status.tmp_regs[1] = RETURN_ADDR;
2661  }
2662
2663status.saved_tmp_regs[2] = TMP3;
2664if (HAS_VIRTUAL_REGISTERS)
2665  status.tmp_regs[2] = STR_END;
2666else
2667  status.tmp_regs[2] = TMP3;
2668
2669delayed_mem_copy_init(&status, common);
2670
2671if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2672  {
2673  SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2674
2675  if (!from_sp)
2676    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2677
2678  if (from_sp || type == recurse_swap_global)
2679    delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2680  }
2681
2682stackptr += sizeof(sljit_sw);
2683
2684#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2685if (type != recurse_copy_shared_to_global)
2686  {
2687  if (!from_sp)
2688    delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2689
2690  if (from_sp || type == recurse_swap_global)
2691    delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2692  }
2693
2694stackptr += sizeof(sljit_sw);
2695#endif
2696
2697while (cc < ccend)
2698  {
2699  private_count = 0;
2700  shared_count = 0;
2701  kept_shared_count = 0;
2702
2703  switch(*cc)
2704    {
2705    case OP_SET_SOM:
2706    SLJIT_ASSERT(common->has_set_som);
2707    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
2708      {
2709      kept_shared_srcw[0] = OVECTOR(0);
2710      kept_shared_count = 1;
2711      }
2712    cc += 1;
2713    break;
2714
2715    case OP_RECURSE:
2716    if (recurse_flags & recurse_flag_quit_found)
2717      {
2718      if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
2719        {
2720        kept_shared_srcw[0] = OVECTOR(0);
2721        kept_shared_count = 1;
2722        }
2723      if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
2724        {
2725        kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2726        kept_shared_count++;
2727        }
2728      }
2729    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2730      {
2731      shared_srcw[0] = common->capture_last_ptr;
2732      shared_count = 1;
2733      }
2734    cc += 1 + LINK_SIZE;
2735    break;
2736
2737    case OP_KET:
2738    private_srcw[0] = PRIVATE_DATA(cc);
2739    if (private_srcw[0] != 0)
2740      {
2741      if (recurse_check_bit(common, private_srcw[0]))
2742        private_count = 1;
2743      SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2744      cc += PRIVATE_DATA(cc + 1);
2745      }
2746    cc += 1 + LINK_SIZE;
2747    break;
2748
2749    case OP_ASSERT:
2750    case OP_ASSERT_NOT:
2751    case OP_ASSERTBACK:
2752    case OP_ASSERTBACK_NOT:
2753    case OP_ASSERT_NA:
2754    case OP_ASSERTBACK_NA:
2755    case OP_ONCE:
2756    case OP_SCRIPT_RUN:
2757    case OP_BRAPOS:
2758    case OP_SBRA:
2759    case OP_SBRAPOS:
2760    case OP_SCOND:
2761    private_srcw[0] = PRIVATE_DATA(cc);
2762    if (recurse_check_bit(common, private_srcw[0]))
2763      private_count = 1;
2764    cc += 1 + LINK_SIZE;
2765    break;
2766
2767    case OP_CBRA:
2768    case OP_SCBRA:
2769    offset = GET2(cc, 1 + LINK_SIZE);
2770    shared_srcw[0] = OVECTOR(offset << 1);
2771    if (recurse_check_bit(common, shared_srcw[0]))
2772      {
2773      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2774      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2775      shared_count = 2;
2776      }
2777
2778    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2779      {
2780      shared_srcw[shared_count] = common->capture_last_ptr;
2781      shared_count++;
2782      }
2783
2784    if (common->optimized_cbracket[offset] == 0)
2785      {
2786      private_srcw[0] = OVECTOR_PRIV(offset);
2787      if (recurse_check_bit(common, private_srcw[0]))
2788        private_count = 1;
2789      }
2790
2791    cc += 1 + LINK_SIZE + IMM2_SIZE;
2792    break;
2793
2794    case OP_CBRAPOS:
2795    case OP_SCBRAPOS:
2796    offset = GET2(cc, 1 + LINK_SIZE);
2797    shared_srcw[0] = OVECTOR(offset << 1);
2798    if (recurse_check_bit(common, shared_srcw[0]))
2799      {
2800      shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2801      SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2802      shared_count = 2;
2803      }
2804
2805    if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2806      {
2807      shared_srcw[shared_count] = common->capture_last_ptr;
2808      shared_count++;
2809      }
2810
2811    private_srcw[0] = PRIVATE_DATA(cc);
2812    if (recurse_check_bit(common, private_srcw[0]))
2813      private_count = 1;
2814
2815    offset = OVECTOR_PRIV(offset);
2816    if (recurse_check_bit(common, offset))
2817      {
2818      private_srcw[private_count] = offset;
2819      private_count++;
2820      }
2821    cc += 1 + LINK_SIZE + IMM2_SIZE;
2822    break;
2823
2824    case OP_COND:
2825    /* Might be a hidden SCOND. */
2826    alternative = cc + GET(cc, 1);
2827    if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2828      {
2829      private_srcw[0] = PRIVATE_DATA(cc);
2830      if (recurse_check_bit(common, private_srcw[0]))
2831        private_count = 1;
2832      }
2833    cc += 1 + LINK_SIZE;
2834    break;
2835
2836    CASE_ITERATOR_PRIVATE_DATA_1
2837    private_srcw[0] = PRIVATE_DATA(cc);
2838    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2839      private_count = 1;
2840    cc += 2;
2841#ifdef SUPPORT_UNICODE
2842    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2843#endif
2844    break;
2845
2846    CASE_ITERATOR_PRIVATE_DATA_2A
2847    private_srcw[0] = PRIVATE_DATA(cc);
2848    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2849      {
2850      private_count = 2;
2851      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2852      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2853      }
2854    cc += 2;
2855#ifdef SUPPORT_UNICODE
2856    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2857#endif
2858    break;
2859
2860    CASE_ITERATOR_PRIVATE_DATA_2B
2861    private_srcw[0] = PRIVATE_DATA(cc);
2862    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2863      {
2864      private_count = 2;
2865      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2866      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2867      }
2868    cc += 2 + IMM2_SIZE;
2869#ifdef SUPPORT_UNICODE
2870    if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2871#endif
2872    break;
2873
2874    CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2875    private_srcw[0] = PRIVATE_DATA(cc);
2876    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2877      private_count = 1;
2878    cc += 1;
2879    break;
2880
2881    CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2882    private_srcw[0] = PRIVATE_DATA(cc);
2883    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2884      {
2885      private_count = 2;
2886      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2887      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2888      }
2889    cc += 1;
2890    break;
2891
2892    CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2893    private_srcw[0] = PRIVATE_DATA(cc);
2894    if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
2895      {
2896      private_count = 2;
2897      private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2898      SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2899      }
2900    cc += 1 + IMM2_SIZE;
2901    break;
2902
2903    case OP_CLASS:
2904    case OP_NCLASS:
2905#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2906    case OP_XCLASS:
2907    i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2908#else
2909    i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2910#endif
2911    if (PRIVATE_DATA(cc) != 0)
2912      {
2913      private_count = 1;
2914      private_srcw[0] = PRIVATE_DATA(cc);
2915      switch(get_class_iterator_size(cc + i))
2916        {
2917        case 1:
2918        break;
2919
2920        case 2:
2921        if (recurse_check_bit(common, private_srcw[0]))
2922          {
2923          private_count = 2;
2924          private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2925          SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2926          }
2927        break;
2928
2929        default:
2930        SLJIT_UNREACHABLE();
2931        break;
2932        }
2933      }
2934    cc += i;
2935    break;
2936
2937    case OP_MARK:
2938    case OP_COMMIT_ARG:
2939    case OP_PRUNE_ARG:
2940    case OP_THEN_ARG:
2941    SLJIT_ASSERT(common->mark_ptr != 0);
2942    if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
2943      {
2944      kept_shared_srcw[0] = common->mark_ptr;
2945      kept_shared_count = 1;
2946      }
2947    if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
2948      {
2949      private_srcw[0] = common->control_head_ptr;
2950      private_count = 1;
2951      }
2952    cc += 1 + 2 + cc[1];
2953    break;
2954
2955    case OP_THEN:
2956    SLJIT_ASSERT(common->control_head_ptr != 0);
2957    if (recurse_check_bit(common, common->control_head_ptr))
2958      {
2959      private_srcw[0] = common->control_head_ptr;
2960      private_count = 1;
2961      }
2962    cc++;
2963    break;
2964
2965    default:
2966    cc = next_opcode(common, cc);
2967    SLJIT_ASSERT(cc != NULL);
2968    continue;
2969    }
2970
2971  if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2972    {
2973    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2974
2975    for (i = 0; i < private_count; i++)
2976      {
2977      SLJIT_ASSERT(private_srcw[i] != 0);
2978
2979      if (!from_sp)
2980        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2981
2982      if (from_sp || type == recurse_swap_global)
2983        delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2984
2985      stackptr += sizeof(sljit_sw);
2986      }
2987    }
2988  else
2989    stackptr += sizeof(sljit_sw) * private_count;
2990
2991  if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2992    {
2993    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2994
2995    for (i = 0; i < shared_count; i++)
2996      {
2997      SLJIT_ASSERT(shared_srcw[i] != 0);
2998
2999      if (!from_sp)
3000        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3001
3002      if (from_sp || type == recurse_swap_global)
3003        delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3004
3005      stackptr += sizeof(sljit_sw);
3006      }
3007    }
3008  else
3009    stackptr += sizeof(sljit_sw) * shared_count;
3010
3011  if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3012    {
3013    SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3014
3015    for (i = 0; i < kept_shared_count; i++)
3016      {
3017      SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3018
3019      if (!from_sp)
3020        delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3021
3022      if (from_sp || type == recurse_swap_global)
3023        delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3024
3025      stackptr += sizeof(sljit_sw);
3026      }
3027    }
3028  else
3029    stackptr += sizeof(sljit_sw) * kept_shared_count;
3030  }
3031
3032SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3033
3034delayed_mem_copy_finish(&status);
3035}
3036
3037static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3038{
3039PCRE2_SPTR end = bracketend(cc);
3040BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3041
3042/* Assert captures then. */
3043if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3044  current_offset = NULL;
3045/* Conditional block does not. */
3046if (*cc == OP_COND || *cc == OP_SCOND)
3047  has_alternatives = FALSE;
3048
3049cc = next_opcode(common, cc);
3050if (has_alternatives)
3051  current_offset = common->then_offsets + (cc - common->start);
3052
3053while (cc < end)
3054  {
3055  if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3056    cc = set_then_offsets(common, cc, current_offset);
3057  else
3058    {
3059    if (*cc == OP_ALT && has_alternatives)
3060      current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
3061    if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3062      *current_offset = 1;
3063    cc = next_opcode(common, cc);
3064    }
3065  }
3066
3067return end;
3068}
3069
3070#undef CASE_ITERATOR_PRIVATE_DATA_1
3071#undef CASE_ITERATOR_PRIVATE_DATA_2A
3072#undef CASE_ITERATOR_PRIVATE_DATA_2B
3073#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3074#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3075#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3076
3077static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3078{
3079return (value & (value - 1)) == 0;
3080}
3081
3082static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3083{
3084while (list)
3085  {
3086  /* sljit_set_label is clever enough to do nothing
3087  if either the jump or the label is NULL. */
3088  SET_LABEL(list->jump, label);
3089  list = list->next;
3090  }
3091}
3092
3093static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3094{
3095jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3096if (list_item)
3097  {
3098  list_item->next = *list;
3099  list_item->jump = jump;
3100  *list = list_item;
3101  }
3102}
3103
3104static void add_stub(compiler_common *common, struct sljit_jump *start)
3105{
3106DEFINE_COMPILER;
3107stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3108
3109if (list_item)
3110  {
3111  list_item->start = start;
3112  list_item->quit = LABEL();
3113  list_item->next = common->stubs;
3114  common->stubs = list_item;
3115  }
3116}
3117
3118static void flush_stubs(compiler_common *common)
3119{
3120DEFINE_COMPILER;
3121stub_list *list_item = common->stubs;
3122
3123while (list_item)
3124  {
3125  JUMPHERE(list_item->start);
3126  add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3127  JUMPTO(SLJIT_JUMP, list_item->quit);
3128  list_item = list_item->next;
3129  }
3130common->stubs = NULL;
3131}
3132
3133static SLJIT_INLINE void count_match(compiler_common *common)
3134{
3135DEFINE_COMPILER;
3136
3137OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3138add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3139}
3140
3141static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3142{
3143/* May destroy all locals and registers except TMP2. */
3144DEFINE_COMPILER;
3145
3146SLJIT_ASSERT(size > 0);
3147OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3148#ifdef DESTROY_REGISTERS
3149OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3150OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3151OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3152OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3153OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3154#endif
3155add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3156}
3157
3158static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3159{
3160DEFINE_COMPILER;
3161
3162SLJIT_ASSERT(size > 0);
3163OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
3164}
3165
3166static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3167{
3168DEFINE_COMPILER;
3169sljit_uw *result;
3170
3171if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3172  return NULL;
3173
3174result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3175if (SLJIT_UNLIKELY(result == NULL))
3176  {
3177  sljit_set_compiler_memory_error(compiler);
3178  return NULL;
3179  }
3180
3181*(void**)result = common->read_only_data_head;
3182common->read_only_data_head = (void *)result;
3183return result + 1;
3184}
3185
3186static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3187{
3188DEFINE_COMPILER;
3189struct sljit_label *loop;
3190sljit_s32 i;
3191
3192/* At this point we can freely use all temporary registers. */
3193SLJIT_ASSERT(length > 1);
3194/* TMP1 returns with begin - 1. */
3195OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3196if (length < 8)
3197  {
3198  for (i = 1; i < length; i++)
3199    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3200  }
3201else
3202  {
3203  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3204    {
3205    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3206    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3207    loop = LABEL();
3208    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3209    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3210    JUMPTO(SLJIT_NOT_ZERO, loop);
3211    }
3212  else
3213    {
3214    GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3215    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3216    loop = LABEL();
3217    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3218    OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3219    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3220    JUMPTO(SLJIT_NOT_ZERO, loop);
3221    }
3222  }
3223}
3224
3225static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3226{
3227DEFINE_COMPILER;
3228sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3229sljit_u32 uncleared_size;
3230sljit_s32 src = SLJIT_IMM;
3231sljit_s32 i;
3232struct sljit_label *loop;
3233
3234SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3235
3236if (size == sizeof(sljit_sw))
3237  {
3238  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3239  return;
3240  }
3241
3242if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3243  {
3244  OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3245  src = TMP3;
3246  }
3247
3248if (size <= 6 * sizeof(sljit_sw))
3249  {
3250  for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3251    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3252  return;
3253  }
3254
3255GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3256
3257uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3258
3259OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3260
3261loop = LABEL();
3262OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3263OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3264OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
3265OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
3266CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3267
3268if (uncleared_size >= sizeof(sljit_sw))
3269  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3270
3271if (uncleared_size >= 2 * sizeof(sljit_sw))
3272  OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3273}
3274
3275static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3276{
3277DEFINE_COMPILER;
3278struct sljit_label *loop;
3279int i;
3280
3281SLJIT_ASSERT(length > 1);
3282/* OVECTOR(1) contains the "string begin - 1" constant. */
3283if (length > 2)
3284  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3285if (length < 8)
3286  {
3287  for (i = 2; i < length; i++)
3288    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3289  }
3290else
3291  {
3292  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3293    {
3294    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3295    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3296    loop = LABEL();
3297    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3298    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3299    JUMPTO(SLJIT_NOT_ZERO, loop);
3300    }
3301  else
3302    {
3303    GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3304    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3305    loop = LABEL();
3306    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3307    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3308    OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3309    JUMPTO(SLJIT_NOT_ZERO, loop);
3310    }
3311  }
3312
3313if (!HAS_VIRTUAL_REGISTERS)
3314  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3315else
3316  OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3317
3318if (common->mark_ptr != 0)
3319  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3320if (common->control_head_ptr != 0)
3321  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3322if (HAS_VIRTUAL_REGISTERS)
3323  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3324
3325OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3326OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3327}
3328
3329static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3330{
3331while (current != NULL)
3332  {
3333  switch (current[1])
3334    {
3335    case type_then_trap:
3336    break;
3337
3338    case type_mark:
3339    if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3340      return current[3];
3341    break;
3342
3343    default:
3344    SLJIT_UNREACHABLE();
3345    break;
3346    }
3347  SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3348  current = (sljit_sw*)current[0];
3349  }
3350return 0;
3351}
3352
3353static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3354{
3355DEFINE_COMPILER;
3356struct sljit_label *loop;
3357BOOL has_pre;
3358
3359/* At this point we can freely use all registers. */
3360OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3361OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3362
3363if (HAS_VIRTUAL_REGISTERS)
3364  {
3365  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3366  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3367  if (common->mark_ptr != 0)
3368    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3369  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3370  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3371  if (common->mark_ptr != 0)
3372    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3373  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3374    SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3375  }
3376else
3377  {
3378  OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3379  OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3380  if (common->mark_ptr != 0)
3381    OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3382  OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3383  OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3384  if (common->mark_ptr != 0)
3385    OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3386  OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3387  }
3388
3389has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3390
3391GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3392OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3393
3394loop = LABEL();
3395
3396if (has_pre)
3397  sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3398else
3399  {
3400  OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3401  OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3402  }
3403
3404OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3405OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3406/* Copy the integer value to the output buffer */
3407#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3408OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3409#endif
3410
3411SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3412OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3413
3414OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3415JUMPTO(SLJIT_NOT_ZERO, loop);
3416
3417/* Calculate the return value, which is the maximum ovector value. */
3418if (topbracket > 1)
3419  {
3420  if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
3421    {
3422    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3423    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3424
3425    /* OVECTOR(0) is never equal to SLJIT_S2. */
3426    loop = LABEL();
3427    sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
3428    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3429    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3430    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3431    }
3432  else
3433    {
3434    GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3435    OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3436
3437    /* OVECTOR(0) is never equal to SLJIT_S2. */
3438    loop = LABEL();
3439    OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3440    OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
3441    OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3442    CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3443    OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3444    }
3445  }
3446else
3447  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3448}
3449
3450static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3451{
3452DEFINE_COMPILER;
3453sljit_s32 mov_opcode;
3454sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3455
3456SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3457SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3458  && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3459
3460if (arguments_reg != ARGUMENTS)
3461  OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3462OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3463  common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3464OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3465
3466/* Store match begin and end. */
3467OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3468OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3469OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3470
3471mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3472
3473OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3474#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3475OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3476#endif
3477OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3478
3479OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3480#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3481OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3482#endif
3483OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3484
3485JUMPTO(SLJIT_JUMP, quit);
3486}
3487
3488static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3489{
3490/* May destroy TMP1. */
3491DEFINE_COMPILER;
3492struct sljit_jump *jump;
3493
3494if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3495  {
3496  /* The value of -1 must be kept for start_used_ptr! */
3497  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3498  /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3499  is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3500  jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3501  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3502  JUMPHERE(jump);
3503  }
3504else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3505  {
3506  jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3507  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3508  JUMPHERE(jump);
3509  }
3510}
3511
3512static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3513{
3514/* Detects if the character has an othercase. */
3515unsigned int c;
3516
3517#ifdef SUPPORT_UNICODE
3518if (common->utf || common->ucp)
3519  {
3520  if (common->utf)
3521    {
3522    GETCHAR(c, cc);
3523    }
3524  else
3525    c = *cc;
3526
3527  if (c > 127)
3528    return c != UCD_OTHERCASE(c);
3529
3530  return common->fcc[c] != c;
3531  }
3532else
3533#endif
3534  c = *cc;
3535return MAX_255(c) ? common->fcc[c] != c : FALSE;
3536}
3537
3538static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3539{
3540/* Returns with the othercase. */
3541#ifdef SUPPORT_UNICODE
3542if ((common->utf || common->ucp) && c > 127)
3543  return UCD_OTHERCASE(c);
3544#endif
3545return TABLE_GET(c, common->fcc, c);
3546}
3547
3548static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3549{
3550/* Detects if the character and its othercase has only 1 bit difference. */
3551unsigned int c, oc, bit;
3552#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3553int n;
3554#endif
3555
3556#ifdef SUPPORT_UNICODE
3557if (common->utf || common->ucp)
3558  {
3559  if (common->utf)
3560    {
3561    GETCHAR(c, cc);
3562    }
3563  else
3564    c = *cc;
3565
3566  if (c <= 127)
3567    oc = common->fcc[c];
3568  else
3569    oc = UCD_OTHERCASE(c);
3570  }
3571else
3572  {
3573  c = *cc;
3574  oc = TABLE_GET(c, common->fcc, c);
3575  }
3576#else
3577c = *cc;
3578oc = TABLE_GET(c, common->fcc, c);
3579#endif
3580
3581SLJIT_ASSERT(c != oc);
3582
3583bit = c ^ oc;
3584/* Optimized for English alphabet. */
3585if (c <= 127 && bit == 0x20)
3586  return (0 << 8) | 0x20;
3587
3588/* Since c != oc, they must have at least 1 bit difference. */
3589if (!is_powerof2(bit))
3590  return 0;
3591
3592#if PCRE2_CODE_UNIT_WIDTH == 8
3593
3594#ifdef SUPPORT_UNICODE
3595if (common->utf && c > 127)
3596  {
3597  n = GET_EXTRALEN(*cc);
3598  while ((bit & 0x3f) == 0)
3599    {
3600    n--;
3601    bit >>= 6;
3602    }
3603  return (n << 8) | bit;
3604  }
3605#endif /* SUPPORT_UNICODE */
3606return (0 << 8) | bit;
3607
3608#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3609
3610#ifdef SUPPORT_UNICODE
3611if (common->utf && c > 65535)
3612  {
3613  if (bit >= (1u << 10))
3614    bit >>= 10;
3615  else
3616    return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3617  }
3618#endif /* SUPPORT_UNICODE */
3619return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3620
3621#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3622}
3623
3624static void check_partial(compiler_common *common, BOOL force)
3625{
3626/* Checks whether a partial matching is occurred. Does not modify registers. */
3627DEFINE_COMPILER;
3628struct sljit_jump *jump = NULL;
3629
3630SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3631
3632if (common->mode == PCRE2_JIT_COMPLETE)
3633  return;
3634
3635if (!force && !common->allow_empty_partial)
3636  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3637else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3638  jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3639
3640if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3641  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3642else
3643  {
3644  if (common->partialmatchlabel != NULL)
3645    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3646  else
3647    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3648  }
3649
3650if (jump != NULL)
3651  JUMPHERE(jump);
3652}
3653
3654static void check_str_end(compiler_common *common, jump_list **end_reached)
3655{
3656/* Does not affect registers. Usually used in a tight spot. */
3657DEFINE_COMPILER;
3658struct sljit_jump *jump;
3659
3660if (common->mode == PCRE2_JIT_COMPLETE)
3661  {
3662  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3663  return;
3664  }
3665
3666jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3667if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3668  {
3669  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3670  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3671  add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3672  }
3673else
3674  {
3675  add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3676  if (common->partialmatchlabel != NULL)
3677    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3678  else
3679    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3680  }
3681JUMPHERE(jump);
3682}
3683
3684static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3685{
3686DEFINE_COMPILER;
3687struct sljit_jump *jump;
3688
3689if (common->mode == PCRE2_JIT_COMPLETE)
3690  {
3691  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3692  return;
3693  }
3694
3695/* Partial matching mode. */
3696jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3697if (!common->allow_empty_partial)
3698  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3699else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3700  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3701
3702if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3703  {
3704  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3705  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3706  }
3707else
3708  {
3709  if (common->partialmatchlabel != NULL)
3710    JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3711  else
3712    add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3713  }
3714JUMPHERE(jump);
3715}
3716
3717static void process_partial_match(compiler_common *common)
3718{
3719DEFINE_COMPILER;
3720struct sljit_jump *jump;
3721
3722/* Partial matching mode. */
3723if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3724  {
3725  jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3726  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3727  JUMPHERE(jump);
3728  }
3729else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3730  {
3731  if (common->partialmatchlabel != NULL)
3732    CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3733  else
3734    add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3735  }
3736}
3737
3738static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3739{
3740DEFINE_COMPILER;
3741
3742CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3743process_partial_match(common);
3744}
3745
3746static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3747{
3748/* Reads the character into TMP1, keeps STR_PTR.
3749Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3750DEFINE_COMPILER;
3751#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3752struct sljit_jump *jump;
3753#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3754
3755SLJIT_UNUSED_ARG(max);
3756SLJIT_UNUSED_ARG(dst);
3757SLJIT_UNUSED_ARG(dstw);
3758SLJIT_UNUSED_ARG(backtracks);
3759
3760OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3761
3762#ifdef SUPPORT_UNICODE
3763#if PCRE2_CODE_UNIT_WIDTH == 8
3764if (common->utf)
3765  {
3766  if (max < 128) return;
3767
3768  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3769  OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3770  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3771  add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3772  OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3773  if (backtracks && common->invalid_utf)
3774    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3775  JUMPHERE(jump);
3776  }
3777#elif PCRE2_CODE_UNIT_WIDTH == 16
3778if (common->utf)
3779  {
3780  if (max < 0xd800) return;
3781
3782  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3783
3784  if (common->invalid_utf)
3785    {
3786    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3787    OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3788    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3789    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3790    OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3791    if (backtracks && common->invalid_utf)
3792      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3793    }
3794  else
3795    {
3796    /* TMP2 contains the high surrogate. */
3797    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3798    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3799    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3800    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3801    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3802    }
3803
3804  JUMPHERE(jump);
3805  }
3806#elif PCRE2_CODE_UNIT_WIDTH == 32
3807if (common->invalid_utf)
3808  {
3809  if (max < 0xd800) return;
3810
3811  if (backtracks != NULL)
3812    {
3813    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3814    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3815    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3816    }
3817  else
3818    {
3819    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3820    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
3821    CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3822    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3823    CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3824    }
3825  }
3826#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3827#endif /* SUPPORT_UNICODE */
3828}
3829
3830static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3831{
3832/* Reads one character back without moving STR_PTR. TMP2 must
3833contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3834DEFINE_COMPILER;
3835
3836#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3837struct sljit_jump *jump;
3838#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3839
3840SLJIT_UNUSED_ARG(max);
3841SLJIT_UNUSED_ARG(backtracks);
3842
3843OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3844
3845#ifdef SUPPORT_UNICODE
3846#if PCRE2_CODE_UNIT_WIDTH == 8
3847if (common->utf)
3848  {
3849  if (max < 128) return;
3850
3851  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3852  if (common->invalid_utf)
3853    {
3854    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3855    if (backtracks != NULL)
3856      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3857    }
3858  else
3859    add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3860  JUMPHERE(jump);
3861  }
3862#elif PCRE2_CODE_UNIT_WIDTH == 16
3863if (common->utf)
3864  {
3865  if (max < 0xd800) return;
3866
3867  if (common->invalid_utf)
3868    {
3869    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3870    add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3871    if (backtracks != NULL)
3872      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3873    }
3874  else
3875    {
3876    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3877    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3878    /* TMP2 contains the low surrogate. */
3879    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3880    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3881    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3882    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3883    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3884    }
3885    JUMPHERE(jump);
3886  }
3887#elif PCRE2_CODE_UNIT_WIDTH == 32
3888if (common->invalid_utf)
3889  {
3890  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3891  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3892  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3893  }
3894#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3895#endif /* SUPPORT_UNICODE */
3896}
3897
3898#define READ_CHAR_UPDATE_STR_PTR 0x1
3899#define READ_CHAR_UTF8_NEWLINE 0x2
3900#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3901#define READ_CHAR_VALID_UTF 0x4
3902
3903static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3904  jump_list **backtracks, sljit_u32 options)
3905{
3906/* Reads the precise value of a character into TMP1, if the character is
3907between min and max (c >= min && c <= max). Otherwise it returns with a value
3908outside the range. Does not check STR_END. */
3909DEFINE_COMPILER;
3910#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3911struct sljit_jump *jump;
3912#endif
3913#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3914struct sljit_jump *jump2;
3915#endif
3916
3917SLJIT_UNUSED_ARG(min);
3918SLJIT_UNUSED_ARG(max);
3919SLJIT_UNUSED_ARG(backtracks);
3920SLJIT_UNUSED_ARG(options);
3921SLJIT_ASSERT(min <= max);
3922
3923OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3924OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3925
3926#ifdef SUPPORT_UNICODE
3927#if PCRE2_CODE_UNIT_WIDTH == 8
3928if (common->utf)
3929  {
3930  if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3931
3932  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3933    {
3934    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3935
3936    if (options & READ_CHAR_UTF8_NEWLINE)
3937      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3938    else
3939      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3940
3941    if (backtracks != NULL)
3942      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3943    JUMPHERE(jump);
3944    return;
3945    }
3946
3947  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3948  if (min >= 0x10000)
3949    {
3950    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3951    if (options & READ_CHAR_UPDATE_STR_PTR)
3952      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3953    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3954    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3955    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3956    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3957    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3958    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3959    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3960    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3961    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3962    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3963    if (!(options & READ_CHAR_UPDATE_STR_PTR))
3964      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3965    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3966    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3967    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3968    JUMPHERE(jump2);
3969    if (options & READ_CHAR_UPDATE_STR_PTR)
3970      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3971    }
3972  else if (min >= 0x800 && max <= 0xffff)
3973    {
3974    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3975    if (options & READ_CHAR_UPDATE_STR_PTR)
3976      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3977    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3978    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3979    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3980    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3981    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3982    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3983    if (!(options & READ_CHAR_UPDATE_STR_PTR))
3984      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3985    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3986    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3987    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3988    JUMPHERE(jump2);
3989    if (options & READ_CHAR_UPDATE_STR_PTR)
3990      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3991    }
3992  else if (max >= 0x800)
3993    {
3994    add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3995    }
3996  else if (max < 128)
3997    {
3998    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3999    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4000    }
4001  else
4002    {
4003    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4004    if (!(options & READ_CHAR_UPDATE_STR_PTR))
4005      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4006    else
4007      OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4008    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4009    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4010    OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4011    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4012    if (options & READ_CHAR_UPDATE_STR_PTR)
4013      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4014    }
4015  JUMPHERE(jump);
4016  }
4017#elif PCRE2_CODE_UNIT_WIDTH == 16
4018if (common->utf)
4019  {
4020  if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4021
4022  if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4023    {
4024    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4025    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4026
4027    if (options & READ_CHAR_UTF8_NEWLINE)
4028      add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4029    else
4030      add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4031
4032    if (backtracks != NULL)
4033      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4034    JUMPHERE(jump);
4035    return;
4036    }
4037
4038  if (max >= 0x10000)
4039    {
4040    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4041    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4042    /* TMP2 contains the high surrogate. */
4043    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4045    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4046    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4047    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4048    JUMPHERE(jump);
4049    return;
4050    }
4051
4052  /* Skip low surrogate if necessary. */
4053  OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4054
4055  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4056    {
4057    if (options & READ_CHAR_UPDATE_STR_PTR)
4058      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4059    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4060    if (options & READ_CHAR_UPDATE_STR_PTR)
4061      CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4062    if (max >= 0xd800)
4063      CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
4064    }
4065  else
4066    {
4067    jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4068    if (options & READ_CHAR_UPDATE_STR_PTR)
4069      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4070    if (max >= 0xd800)
4071      OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4072    JUMPHERE(jump);
4073    }
4074  }
4075#elif PCRE2_CODE_UNIT_WIDTH == 32
4076if (common->invalid_utf)
4077  {
4078  if (backtracks != NULL)
4079    {
4080    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4081    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4082    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4083    }
4084  else
4085    {
4086    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4087    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
4088    CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4089    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4090    CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4091    }
4092  }
4093#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4094#endif /* SUPPORT_UNICODE */
4095}
4096
4097#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4098
4099static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4100{
4101/* Tells whether the character codes below 128 are enough
4102to determine a match. */
4103const sljit_u8 value = nclass ? 0xff : 0;
4104const sljit_u8 *end = bitset + 32;
4105
4106bitset += 16;
4107do
4108  {
4109  if (*bitset++ != value)
4110    return FALSE;
4111  }
4112while (bitset < end);
4113return TRUE;
4114}
4115
4116static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4117{
4118/* Reads the precise character type of a character into TMP1, if the character
4119is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4120full_read argument tells whether characters above max are accepted or not. */
4121DEFINE_COMPILER;
4122struct sljit_jump *jump;
4123
4124SLJIT_ASSERT(common->utf);
4125
4126OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4127OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4128
4129/* All values > 127 are zero in ctypes. */
4130OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4131
4132if (negated)
4133  {
4134  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4135
4136  if (common->invalid_utf)
4137    {
4138    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4139    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4140    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4141    }
4142  else
4143    {
4144    OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4145    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4146    }
4147  JUMPHERE(jump);
4148  }
4149}
4150
4151#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4152
4153static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4154{
4155/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4156DEFINE_COMPILER;
4157#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4158struct sljit_jump *jump;
4159#endif
4160#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4161struct sljit_jump *jump2;
4162#endif
4163
4164SLJIT_UNUSED_ARG(backtracks);
4165SLJIT_UNUSED_ARG(negated);
4166
4167OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4168OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4169
4170#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4171if (common->utf)
4172  {
4173  /* The result of this read may be unused, but saves an "else" part. */
4174  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4175  jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4176
4177  if (!negated)
4178    {
4179    if (common->invalid_utf)
4180      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4181
4182    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4183    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4184    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4185    if (common->invalid_utf)
4186      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4187
4188    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4189    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4190    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4191    if (common->invalid_utf)
4192      add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4193
4194    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4195    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4196    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4197    JUMPHERE(jump2);
4198    }
4199  else if (common->invalid_utf)
4200    {
4201    add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4202    OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4203    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4204
4205    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4206    jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4207    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4208    JUMPHERE(jump2);
4209    }
4210  else
4211    add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4212
4213  JUMPHERE(jump);
4214  return;
4215  }
4216#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4217
4218#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4219if (common->invalid_utf && negated)
4220  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4221#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4222
4223#if PCRE2_CODE_UNIT_WIDTH != 8
4224/* The ctypes array contains only 256 values. */
4225OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4226jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4227#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4228OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4229#if PCRE2_CODE_UNIT_WIDTH != 8
4230JUMPHERE(jump);
4231#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4232
4233#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4234if (common->utf && negated)
4235  {
4236  /* Skip low surrogate if necessary. */
4237  if (!common->invalid_utf)
4238    {
4239    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4240
4241    if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4242      {
4243      OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4244      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
4245      CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4246      }
4247    else
4248      {
4249      jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4250      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4251      JUMPHERE(jump);
4252      }
4253    return;
4254    }
4255
4256  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4257  jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4258  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4259  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4260
4261  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4262  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4263  OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4264  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4265
4266  JUMPHERE(jump);
4267  return;
4268  }
4269#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4270}
4271
4272static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4273{
4274/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4275TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4276and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4277DEFINE_COMPILER;
4278
4279#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4280struct sljit_jump *jump;
4281#endif
4282
4283#ifdef SUPPORT_UNICODE
4284#if PCRE2_CODE_UNIT_WIDTH == 8
4285struct sljit_label *label;
4286
4287if (common->utf)
4288  {
4289  if (!must_be_valid && common->invalid_utf)
4290    {
4291    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4292    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4293    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4294    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4295    if (backtracks != NULL)
4296      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4297    JUMPHERE(jump);
4298    return;
4299    }
4300
4301  label = LABEL();
4302  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4303  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4304  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4305  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4306  return;
4307  }
4308#elif PCRE2_CODE_UNIT_WIDTH == 16
4309if (common->utf)
4310  {
4311  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4312  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4313
4314  if (!must_be_valid && common->invalid_utf)
4315    {
4316    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4317    jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4318    add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4319    if (backtracks != NULL)
4320      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4321    JUMPHERE(jump);
4322    return;
4323    }
4324
4325  /* Skip low surrogate if necessary. */
4326  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4327  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
4328  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4329  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4330  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4331  return;
4332  }
4333#elif PCRE2_CODE_UNIT_WIDTH == 32
4334if (common->invalid_utf && !must_be_valid)
4335  {
4336  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4337  if (backtracks != NULL)
4338    {
4339    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4340    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4341    return;
4342    }
4343
4344  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
4345  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4346  OP2(SLJIT_SHL,  TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4347  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4348  return;
4349  }
4350#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4351#endif /* SUPPORT_UNICODE */
4352
4353SLJIT_UNUSED_ARG(backtracks);
4354SLJIT_UNUSED_ARG(must_be_valid);
4355
4356OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4357}
4358
4359static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4360{
4361/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4362DEFINE_COMPILER;
4363struct sljit_jump *jump;
4364
4365if (nltype == NLTYPE_ANY)
4366  {
4367  add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4368  sljit_set_current_flags(compiler, SLJIT_SET_Z);
4369  add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4370  }
4371else if (nltype == NLTYPE_ANYCRLF)
4372  {
4373  if (jumpifmatch)
4374    {
4375    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4376    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4377    }
4378  else
4379    {
4380    jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4381    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4382    JUMPHERE(jump);
4383    }
4384  }
4385else
4386  {
4387  SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4388  add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4389  }
4390}
4391
4392#ifdef SUPPORT_UNICODE
4393
4394#if PCRE2_CODE_UNIT_WIDTH == 8
4395static void do_utfreadchar(compiler_common *common)
4396{
4397/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4398of the character (>= 0xc0). Return char value in TMP1. */
4399DEFINE_COMPILER;
4400struct sljit_jump *jump;
4401
4402sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4403OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4404OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4405OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4406OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4407
4408/* Searching for the first zero. */
4409OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4410jump = JUMP(SLJIT_NOT_ZERO);
4411/* Two byte sequence. */
4412OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4413OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4414OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4415
4416JUMPHERE(jump);
4417OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4418OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4419OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4420OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4421
4422OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4423jump = JUMP(SLJIT_NOT_ZERO);
4424/* Three byte sequence. */
4425OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4426OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4427OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4428
4429/* Four byte sequence. */
4430JUMPHERE(jump);
4431OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4432OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4433OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4434OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4435OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4436OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4437OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4438}
4439
4440static void do_utfreadtype8(compiler_common *common)
4441{
4442/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4443of the character (>= 0xc0). Return value in TMP1. */
4444DEFINE_COMPILER;
4445struct sljit_jump *jump;
4446struct sljit_jump *compare;
4447
4448sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4449
4450OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
4451jump = JUMP(SLJIT_NOT_ZERO);
4452/* Two byte sequence. */
4453OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4454OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4455OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4456/* The upper 5 bits are known at this point. */
4457compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4458OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4459OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4460OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4461OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4462OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4463
4464JUMPHERE(compare);
4465OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4466OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4467
4468/* We only have types for characters less than 256. */
4469JUMPHERE(jump);
4470OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4471OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4472OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4473OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4474}
4475
4476static void do_utfreadchar_invalid(compiler_common *common)
4477{
4478/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4479of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4480undefined for invalid characters. */
4481DEFINE_COMPILER;
4482sljit_s32 i;
4483sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4484struct sljit_jump *jump;
4485struct sljit_jump *buffer_end_close;
4486struct sljit_label *three_byte_entry;
4487struct sljit_label *exit_invalid_label;
4488struct sljit_jump *exit_invalid[11];
4489
4490sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4491
4492OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4493
4494/* Usually more than 3 characters remained in the subject buffer. */
4495OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4496
4497/* Not a valid start of a multi-byte sequence, no more bytes read. */
4498exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4499
4500buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4501
4502OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4503OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4504/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4505OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4506OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4507exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4508
4509OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4510jump = JUMP(SLJIT_NOT_ZERO);
4511
4512OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4513OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4514
4515JUMPHERE(jump);
4516
4517/* Three-byte sequence. */
4518OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4519OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4521OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4522if (has_cmov)
4523  {
4524  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4525  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4526  exit_invalid[2] = NULL;
4527  }
4528else
4529  exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4530
4531OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
4532jump = JUMP(SLJIT_NOT_ZERO);
4533
4534three_byte_entry = LABEL();
4535
4536OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4537if (has_cmov)
4538  {
4539  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4540  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4541  exit_invalid[3] = NULL;
4542  }
4543else
4544  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4545OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4546OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4547
4548if (has_cmov)
4549  {
4550  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4551  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4552  exit_invalid[4] = NULL;
4553  }
4554else
4555  exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4556OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4557
4558JUMPHERE(jump);
4559
4560/* Four-byte sequence. */
4561OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4563OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4564OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4565if (has_cmov)
4566  {
4567  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4568  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4569  exit_invalid[5] = NULL;
4570  }
4571else
4572  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4573
4574OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4575if (has_cmov)
4576  {
4577  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4578  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4579  exit_invalid[6] = NULL;
4580  }
4581else
4582  exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4583
4584OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4585OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4586
4587JUMPHERE(buffer_end_close);
4588OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4589exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4590
4591/* Two-byte sequence. */
4592OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4593OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4594/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4595OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4596OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4597exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4598
4599OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
4600jump = JUMP(SLJIT_NOT_ZERO);
4601
4602OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4603
4604/* Three-byte sequence. */
4605JUMPHERE(jump);
4606exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4607
4608OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4609OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4610OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4611OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4612if (has_cmov)
4613  {
4614  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4615  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4616  exit_invalid[10] = NULL;
4617  }
4618else
4619  exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4620
4621/* One will be substracted from STR_PTR later. */
4622OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4623
4624/* Four byte sequences are not possible. */
4625CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4626
4627exit_invalid_label = LABEL();
4628for (i = 0; i < 11; i++)
4629  sljit_set_label(exit_invalid[i], exit_invalid_label);
4630
4631OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4632OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4633}
4634
4635static void do_utfreadnewline_invalid(compiler_common *common)
4636{
4637/* Slow decoding a UTF-8 character, specialized for newlines.
4638TMP1 contains the first byte of the character (>= 0xc0). Return
4639char value in TMP1. */
4640DEFINE_COMPILER;
4641struct sljit_label *loop;
4642struct sljit_label *skip_start;
4643struct sljit_label *three_byte_exit;
4644struct sljit_jump *jump[5];
4645
4646sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4647
4648if (common->nltype != NLTYPE_ANY)
4649  {
4650  SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4651
4652  /* All newlines are ascii, just skip intermediate octets. */
4653  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654  loop = LABEL();
4655  if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4656    sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4657  else
4658    {
4659    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4660    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661    }
4662
4663  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4664  CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4665  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4666
4667  JUMPHERE(jump[0]);
4668
4669  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4670  OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4671  return;
4672  }
4673
4674jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4675OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4676OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4677
4678jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4679jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4680
4681skip_start = LABEL();
4682OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4683jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4684
4685/* Skip intermediate octets. */
4686loop = LABEL();
4687jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4689OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4690OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4691CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4692
4693JUMPHERE(jump[3]);
4694OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4695
4696three_byte_exit = LABEL();
4697JUMPHERE(jump[0]);
4698JUMPHERE(jump[4]);
4699
4700OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4701OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4702
4703/* Two byte long newline: 0x85. */
4704JUMPHERE(jump[1]);
4705CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4706
4707OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4708OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709
4710/* Three byte long newlines: 0x2028 and 0x2029. */
4711JUMPHERE(jump[2]);
4712CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4713CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4714
4715OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4716OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4717
4718OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4719CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4720
4721OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4722OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4723OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4724}
4725
4726static void do_utfmoveback_invalid(compiler_common *common)
4727{
4728/* Goes one character back. */
4729DEFINE_COMPILER;
4730sljit_s32 i;
4731struct sljit_jump *jump;
4732struct sljit_jump *buffer_start_close;
4733struct sljit_label *exit_ok_label;
4734struct sljit_label *exit_invalid_label;
4735struct sljit_jump *exit_invalid[7];
4736
4737sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4738
4739OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4740exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4741
4742/* Two-byte sequence. */
4743buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4744
4745OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4746
4747OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4748jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4749
4750OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4751OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4752OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4753
4754/* Three-byte sequence. */
4755JUMPHERE(jump);
4756exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4757
4758OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4759
4760OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4761jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4762
4763OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4764OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4765OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4766
4767/* Four-byte sequence. */
4768JUMPHERE(jump);
4769OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4770exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4771
4772OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4773OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4774exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4775
4776exit_ok_label = LABEL();
4777OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4778OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4779
4780/* Two-byte sequence. */
4781JUMPHERE(buffer_start_close);
4782OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4783
4784exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4785
4786OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4787
4788OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4789CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4790
4791/* Three-byte sequence. */
4792OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4793exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4794exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4795
4796OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4797
4798OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4799CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4800
4801/* Four-byte sequences are not possible. */
4802
4803exit_invalid_label = LABEL();
4804sljit_set_label(exit_invalid[5], exit_invalid_label);
4805sljit_set_label(exit_invalid[6], exit_invalid_label);
4806OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4807OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4808OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4809
4810JUMPHERE(exit_invalid[4]);
4811/* -2 + 4 = 2 */
4812OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4813
4814exit_invalid_label = LABEL();
4815for (i = 0; i < 4; i++)
4816  sljit_set_label(exit_invalid[i], exit_invalid_label);
4817OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4818OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4819OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4820}
4821
4822static void do_utfpeakcharback(compiler_common *common)
4823{
4824/* Peak a character back. Does not modify STR_PTR. */
4825DEFINE_COMPILER;
4826struct sljit_jump *jump[2];
4827
4828sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4829
4830OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4831OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4832jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4833
4834OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4835OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4836jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4837
4838OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4839OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4840OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4841OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4842OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4843
4844JUMPHERE(jump[1]);
4845OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4846OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4847OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4848OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4849
4850JUMPHERE(jump[0]);
4851OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4852OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4853OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4854OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4855
4856OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4857}
4858
4859static void do_utfpeakcharback_invalid(compiler_common *common)
4860{
4861/* Peak a character back. Does not modify STR_PTR. */
4862DEFINE_COMPILER;
4863sljit_s32 i;
4864sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4865struct sljit_jump *jump[2];
4866struct sljit_label *two_byte_entry;
4867struct sljit_label *three_byte_entry;
4868struct sljit_label *exit_invalid_label;
4869struct sljit_jump *exit_invalid[8];
4870
4871sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4872
4873OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4874exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4875jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4876
4877/* Two-byte sequence. */
4878OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4879OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4880jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4881
4882two_byte_entry = LABEL();
4883OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4884/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4885OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4886OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4887
4888JUMPHERE(jump[1]);
4889OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4890OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4891exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4892OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4893OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4894
4895/* Three-byte sequence. */
4896OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4897OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4898jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4899
4900three_byte_entry = LABEL();
4901OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4902OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4903
4904OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4905if (has_cmov)
4906  {
4907  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4908  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4909  exit_invalid[2] = NULL;
4910  }
4911else
4912  exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4913
4914OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4915if (has_cmov)
4916  {
4917  OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4918  CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4919  exit_invalid[3] = NULL;
4920  }
4921else
4922  exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4923
4924OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4925
4926JUMPHERE(jump[1]);
4927OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4928exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4929OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4930OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4931
4932/* Four-byte sequence. */
4933OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4934OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4935OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4936OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4937/* ADD is used instead of OR because of the SUB 0x10000 above. */
4938OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4939
4940if (has_cmov)
4941  {
4942  OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4943  CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4944  exit_invalid[5] = NULL;
4945  }
4946else
4947  exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4948
4949OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4950OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951
4952JUMPHERE(jump[0]);
4953OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4954jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4955
4956/* Two-byte sequence. */
4957OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4958OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4959CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4960
4961OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4962OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4963exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4964OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4965OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4966
4967/* Three-byte sequence. */
4968OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4969OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4970CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4971
4972OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4973OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4974
4975JUMPHERE(jump[0]);
4976exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4977
4978/* Two-byte sequence. */
4979OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4980OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4981CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4982
4983exit_invalid_label = LABEL();
4984for (i = 0; i < 8; i++)
4985  sljit_set_label(exit_invalid[i], exit_invalid_label);
4986
4987OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4988OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4989}
4990
4991#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4992
4993#if PCRE2_CODE_UNIT_WIDTH == 16
4994
4995static void do_utfreadchar_invalid(compiler_common *common)
4996{
4997/* Slow decoding a UTF-16 character. TMP1 contains the first half
4998of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4999undefined for invalid characters. */
5000DEFINE_COMPILER;
5001struct sljit_jump *exit_invalid[3];
5002
5003sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5004
5005/* TMP2 contains the high surrogate. */
5006exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5007exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5008
5009OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5010OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5011OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5012
5013OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5014OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5015exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5016
5017OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5018OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5019
5020JUMPHERE(exit_invalid[0]);
5021JUMPHERE(exit_invalid[1]);
5022JUMPHERE(exit_invalid[2]);
5023OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5024OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5025}
5026
5027static void do_utfreadnewline_invalid(compiler_common *common)
5028{
5029/* Slow decoding a UTF-16 character, specialized for newlines.
5030TMP1 contains the first half of the character (>= 0xd800). Return
5031char value in TMP1. */
5032
5033DEFINE_COMPILER;
5034struct sljit_jump *exit_invalid[2];
5035
5036sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5037
5038/* TMP2 contains the high surrogate. */
5039exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5040
5041OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5042exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5043
5044OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
5045OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
5046OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5047OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5048OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5049OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5050
5051OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5052
5053JUMPHERE(exit_invalid[0]);
5054JUMPHERE(exit_invalid[1]);
5055OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5056OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5057}
5058
5059static void do_utfmoveback_invalid(compiler_common *common)
5060{
5061/* Goes one character back. */
5062DEFINE_COMPILER;
5063struct sljit_jump *exit_invalid[3];
5064
5065sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5066
5067exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5068exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5069
5070OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5071OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5072exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5073
5074OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5075OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5076OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077
5078JUMPHERE(exit_invalid[0]);
5079JUMPHERE(exit_invalid[1]);
5080JUMPHERE(exit_invalid[2]);
5081
5082OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5083OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5084OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5085}
5086
5087static void do_utfpeakcharback_invalid(compiler_common *common)
5088{
5089/* Peak a character back. Does not modify STR_PTR. */
5090DEFINE_COMPILER;
5091struct sljit_jump *jump;
5092struct sljit_jump *exit_invalid[3];
5093
5094sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5095
5096jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5097OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5098exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5099exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5100
5101OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5102OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5103OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5104exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5105OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5106OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5107
5108JUMPHERE(jump);
5109OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5110
5111JUMPHERE(exit_invalid[0]);
5112JUMPHERE(exit_invalid[1]);
5113JUMPHERE(exit_invalid[2]);
5114
5115OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5116OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5117}
5118
5119#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5120
5121/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5122#define UCD_BLOCK_MASK 127
5123#define UCD_BLOCK_SHIFT 7
5124
5125static void do_getucd(compiler_common *common)
5126{
5127/* Search the UCD record for the character comes in TMP1.
5128Returns chartype in TMP1 and UCD offset in TMP2. */
5129DEFINE_COMPILER;
5130#if PCRE2_CODE_UNIT_WIDTH == 32
5131struct sljit_jump *jump;
5132#endif
5133
5134#if defined SLJIT_DEBUG && SLJIT_DEBUG
5135/* dummy_ucd_record */
5136const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5137SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5138SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5139#endif
5140
5141SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5142
5143sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5144
5145#if PCRE2_CODE_UNIT_WIDTH == 32
5146if (!common->utf)
5147  {
5148  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5149  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5150  JUMPHERE(jump);
5151  }
5152#endif
5153
5154OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5155OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5156OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5157OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5158OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5159OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5160OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5161OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5162OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5163}
5164
5165static void do_getucdtype(compiler_common *common)
5166{
5167/* Search the UCD record for the character comes in TMP1.
5168Returns chartype in TMP1 and UCD offset in TMP2. */
5169DEFINE_COMPILER;
5170#if PCRE2_CODE_UNIT_WIDTH == 32
5171struct sljit_jump *jump;
5172#endif
5173
5174#if defined SLJIT_DEBUG && SLJIT_DEBUG
5175/* dummy_ucd_record */
5176const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5177SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5178SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5179#endif
5180
5181SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5182
5183sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5184
5185#if PCRE2_CODE_UNIT_WIDTH == 32
5186if (!common->utf)
5187  {
5188  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5189  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5190  JUMPHERE(jump);
5191  }
5192#endif
5193
5194OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5195OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5196OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5197OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5198OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5199OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5200OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5201OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5202
5203/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5204OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5205OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5206OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5207OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5208
5209OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5210}
5211
5212#endif /* SUPPORT_UNICODE */
5213
5214static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5215{
5216DEFINE_COMPILER;
5217struct sljit_label *mainloop;
5218struct sljit_label *newlinelabel = NULL;
5219struct sljit_jump *start;
5220struct sljit_jump *end = NULL;
5221struct sljit_jump *end2 = NULL;
5222#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5223struct sljit_label *loop;
5224struct sljit_jump *jump;
5225#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5226jump_list *newline = NULL;
5227sljit_u32 overall_options = common->re->overall_options;
5228BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5229BOOL newlinecheck = FALSE;
5230BOOL readuchar = FALSE;
5231
5232if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5233    && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5234  newlinecheck = TRUE;
5235
5236SLJIT_ASSERT(common->abort_label == NULL);
5237
5238if ((overall_options & PCRE2_FIRSTLINE) != 0)
5239  {
5240  /* Search for the end of the first line. */
5241  SLJIT_ASSERT(common->match_end_ptr != 0);
5242  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5243
5244  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5245    {
5246    mainloop = LABEL();
5247    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5248    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5249    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5250    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5251    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5252    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5253    JUMPHERE(end);
5254    OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5255    }
5256  else
5257    {
5258    end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5259    mainloop = LABEL();
5260    /* Continual stores does not cause data dependency. */
5261    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5262    read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5263    check_newlinechar(common, common->nltype, &newline, TRUE);
5264    CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5265    JUMPHERE(end);
5266    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5267    set_jumps(newline, LABEL());
5268    }
5269
5270  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5271  }
5272else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5273  {
5274  /* Check whether offset limit is set and valid. */
5275  SLJIT_ASSERT(common->match_end_ptr != 0);
5276
5277  if (HAS_VIRTUAL_REGISTERS)
5278    {
5279    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5280    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5281    }
5282  else
5283    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5284
5285  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5286  end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5287  if (HAS_VIRTUAL_REGISTERS)
5288    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5289  else
5290    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5291
5292#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5293  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5294#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5295  if (HAS_VIRTUAL_REGISTERS)
5296    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5297
5298  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5299  end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5300  OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5301  JUMPHERE(end2);
5302  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5303  add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5304  JUMPHERE(end);
5305  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5306  }
5307
5308start = JUMP(SLJIT_JUMP);
5309
5310if (newlinecheck)
5311  {
5312  newlinelabel = LABEL();
5313  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5314  end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5315  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5316  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5317  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5318#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5319  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5320#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5321  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5322  end2 = JUMP(SLJIT_JUMP);
5323  }
5324
5325mainloop = LABEL();
5326
5327/* Increasing the STR_PTR here requires one less jump in the most common case. */
5328#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5329if (common->utf && !common->invalid_utf) readuchar = TRUE;
5330#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5331if (newlinecheck) readuchar = TRUE;
5332
5333if (readuchar)
5334  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5335
5336if (newlinecheck)
5337  CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5338
5339OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5340#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5341#if PCRE2_CODE_UNIT_WIDTH == 8
5342if (common->invalid_utf)
5343  {
5344  /* Skip continuation code units. */
5345  loop = LABEL();
5346  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5347  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5348  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5349  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5350  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5351  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5352  JUMPHERE(jump);
5353  }
5354else if (common->utf)
5355  {
5356  jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5357  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5358  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5359  JUMPHERE(jump);
5360  }
5361#elif PCRE2_CODE_UNIT_WIDTH == 16
5362if (common->invalid_utf)
5363  {
5364  /* Skip continuation code units. */
5365  loop = LABEL();
5366  jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5367  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5368  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5369  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5370  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5371  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5372  JUMPHERE(jump);
5373  }
5374else if (common->utf)
5375  {
5376  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5377
5378  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5379    {
5380    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5381    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5382    CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5383    }
5384  else
5385    {
5386    OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5387    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5388    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5389    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5390    }
5391  }
5392#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5393#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5394JUMPHERE(start);
5395
5396if (newlinecheck)
5397  {
5398  JUMPHERE(end);
5399  JUMPHERE(end2);
5400  }
5401
5402return mainloop;
5403}
5404
5405
5406static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5407{
5408sljit_u32 i, count = chars->count;
5409
5410if (count == 255)
5411  return;
5412
5413if (count == 0)
5414  {
5415  chars->count = 1;
5416  chars->chars[0] = chr;
5417
5418  if (last)
5419    chars->last_count = 1;
5420  return;
5421  }
5422
5423for (i = 0; i < count; i++)
5424  if (chars->chars[i] == chr)
5425    return;
5426
5427if (count >= MAX_DIFF_CHARS)
5428  {
5429  chars->count = 255;
5430  return;
5431  }
5432
5433chars->chars[count] = chr;
5434chars->count = count + 1;
5435
5436if (last)
5437  chars->last_count++;
5438}
5439
5440static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5441{
5442/* Recursive function, which scans prefix literals. */
5443BOOL last, any, class, caseless;
5444int len, repeat, len_save, consumed = 0;
5445sljit_u32 chr; /* Any unicode character. */
5446sljit_u8 *bytes, *bytes_end, byte;
5447PCRE2_SPTR alternative, cc_save, oc;
5448#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5449PCRE2_UCHAR othercase[4];
5450#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5451PCRE2_UCHAR othercase[2];
5452#else
5453PCRE2_UCHAR othercase[1];
5454#endif
5455
5456repeat = 1;
5457while (TRUE)
5458  {
5459  if (*rec_count == 0)
5460    return 0;
5461  (*rec_count)--;
5462
5463  last = TRUE;
5464  any = FALSE;
5465  class = FALSE;
5466  caseless = FALSE;
5467
5468  switch (*cc)
5469    {
5470    case OP_CHARI:
5471    caseless = TRUE;
5472    /* Fall through */
5473    case OP_CHAR:
5474    last = FALSE;
5475    cc++;
5476    break;
5477
5478    case OP_SOD:
5479    case OP_SOM:
5480    case OP_SET_SOM:
5481    case OP_NOT_WORD_BOUNDARY:
5482    case OP_WORD_BOUNDARY:
5483    case OP_EODN:
5484    case OP_EOD:
5485    case OP_CIRC:
5486    case OP_CIRCM:
5487    case OP_DOLL:
5488    case OP_DOLLM:
5489    /* Zero width assertions. */
5490    cc++;
5491    continue;
5492
5493    case OP_ASSERT:
5494    case OP_ASSERT_NOT:
5495    case OP_ASSERTBACK:
5496    case OP_ASSERTBACK_NOT:
5497    case OP_ASSERT_NA:
5498    case OP_ASSERTBACK_NA:
5499    cc = bracketend(cc);
5500    continue;
5501
5502    case OP_PLUSI:
5503    case OP_MINPLUSI:
5504    case OP_POSPLUSI:
5505    caseless = TRUE;
5506    /* Fall through */
5507    case OP_PLUS:
5508    case OP_MINPLUS:
5509    case OP_POSPLUS:
5510    cc++;
5511    break;
5512
5513    case OP_EXACTI:
5514    caseless = TRUE;
5515    /* Fall through */
5516    case OP_EXACT:
5517    repeat = GET2(cc, 1);
5518    last = FALSE;
5519    cc += 1 + IMM2_SIZE;
5520    break;
5521
5522    case OP_QUERYI:
5523    case OP_MINQUERYI:
5524    case OP_POSQUERYI:
5525    caseless = TRUE;
5526    /* Fall through */
5527    case OP_QUERY:
5528    case OP_MINQUERY:
5529    case OP_POSQUERY:
5530    len = 1;
5531    cc++;
5532#ifdef SUPPORT_UNICODE
5533    if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5534#endif
5535    max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5536    if (max_chars == 0)
5537      return consumed;
5538    last = FALSE;
5539    break;
5540
5541    case OP_KET:
5542    cc += 1 + LINK_SIZE;
5543    continue;
5544
5545    case OP_ALT:
5546    cc += GET(cc, 1);
5547    continue;
5548
5549    case OP_ONCE:
5550    case OP_BRA:
5551    case OP_BRAPOS:
5552    case OP_CBRA:
5553    case OP_CBRAPOS:
5554    alternative = cc + GET(cc, 1);
5555    while (*alternative == OP_ALT)
5556      {
5557      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5558      if (max_chars == 0)
5559        return consumed;
5560      alternative += GET(alternative, 1);
5561      }
5562
5563    if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5564      cc += IMM2_SIZE;
5565    cc += 1 + LINK_SIZE;
5566    continue;
5567
5568    case OP_CLASS:
5569#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5570    if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5571      return consumed;
5572#endif
5573    class = TRUE;
5574    break;
5575
5576    case OP_NCLASS:
5577#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5578    if (common->utf) return consumed;
5579#endif
5580    class = TRUE;
5581    break;
5582
5583#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5584    case OP_XCLASS:
5585#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5586    if (common->utf) return consumed;
5587#endif
5588    any = TRUE;
5589    cc += GET(cc, 1);
5590    break;
5591#endif
5592
5593    case OP_DIGIT:
5594#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5595    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5596      return consumed;
5597#endif
5598    any = TRUE;
5599    cc++;
5600    break;
5601
5602    case OP_WHITESPACE:
5603#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5604    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5605      return consumed;
5606#endif
5607    any = TRUE;
5608    cc++;
5609    break;
5610
5611    case OP_WORDCHAR:
5612#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5613    if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5614      return consumed;
5615#endif
5616    any = TRUE;
5617    cc++;
5618    break;
5619
5620    case OP_NOT:
5621    case OP_NOTI:
5622    cc++;
5623    /* Fall through. */
5624    case OP_NOT_DIGIT:
5625    case OP_NOT_WHITESPACE:
5626    case OP_NOT_WORDCHAR:
5627    case OP_ANY:
5628    case OP_ALLANY:
5629#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5630    if (common->utf) return consumed;
5631#endif
5632    any = TRUE;
5633    cc++;
5634    break;
5635
5636#ifdef SUPPORT_UNICODE
5637    case OP_NOTPROP:
5638    case OP_PROP:
5639#if PCRE2_CODE_UNIT_WIDTH != 32
5640    if (common->utf) return consumed;
5641#endif
5642    any = TRUE;
5643    cc += 1 + 2;
5644    break;
5645#endif
5646
5647    case OP_TYPEEXACT:
5648    repeat = GET2(cc, 1);
5649    cc += 1 + IMM2_SIZE;
5650    continue;
5651
5652    case OP_NOTEXACT:
5653    case OP_NOTEXACTI:
5654#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5655    if (common->utf) return consumed;
5656#endif
5657    any = TRUE;
5658    repeat = GET2(cc, 1);
5659    cc += 1 + IMM2_SIZE + 1;
5660    break;
5661
5662    default:
5663    return consumed;
5664    }
5665
5666  if (any)
5667    {
5668    do
5669      {
5670      chars->count = 255;
5671
5672      consumed++;
5673      if (--max_chars == 0)
5674        return consumed;
5675      chars++;
5676      }
5677    while (--repeat > 0);
5678
5679    repeat = 1;
5680    continue;
5681    }
5682
5683  if (class)
5684    {
5685    bytes = (sljit_u8*) (cc + 1);
5686    cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5687
5688    switch (*cc)
5689      {
5690      case OP_CRSTAR:
5691      case OP_CRMINSTAR:
5692      case OP_CRPOSSTAR:
5693      case OP_CRQUERY:
5694      case OP_CRMINQUERY:
5695      case OP_CRPOSQUERY:
5696      max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5697      if (max_chars == 0)
5698        return consumed;
5699      break;
5700
5701      default:
5702      case OP_CRPLUS:
5703      case OP_CRMINPLUS:
5704      case OP_CRPOSPLUS:
5705      break;
5706
5707      case OP_CRRANGE:
5708      case OP_CRMINRANGE:
5709      case OP_CRPOSRANGE:
5710      repeat = GET2(cc, 1);
5711      if (repeat <= 0)
5712        return consumed;
5713      break;
5714      }
5715
5716    do
5717      {
5718      if (bytes[31] & 0x80)
5719        chars->count = 255;
5720      else if (chars->count != 255)
5721        {
5722        bytes_end = bytes + 32;
5723        chr = 0;
5724        do
5725          {
5726          byte = *bytes++;
5727          SLJIT_ASSERT((chr & 0x7) == 0);
5728          if (byte == 0)
5729            chr += 8;
5730          else
5731            {
5732            do
5733              {
5734              if ((byte & 0x1) != 0)
5735                add_prefix_char(chr, chars, TRUE);
5736              byte >>= 1;
5737              chr++;
5738              }
5739            while (byte != 0);
5740            chr = (chr + 7) & ~7;
5741            }
5742          }
5743        while (chars->count != 255 && bytes < bytes_end);
5744        bytes = bytes_end - 32;
5745        }
5746
5747      consumed++;
5748      if (--max_chars == 0)
5749        return consumed;
5750      chars++;
5751      }
5752    while (--repeat > 0);
5753
5754    switch (*cc)
5755      {
5756      case OP_CRSTAR:
5757      case OP_CRMINSTAR:
5758      case OP_CRPOSSTAR:
5759      return consumed;
5760
5761      case OP_CRQUERY:
5762      case OP_CRMINQUERY:
5763      case OP_CRPOSQUERY:
5764      cc++;
5765      break;
5766
5767      case OP_CRRANGE:
5768      case OP_CRMINRANGE:
5769      case OP_CRPOSRANGE:
5770      if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5771        return consumed;
5772      cc += 1 + 2 * IMM2_SIZE;
5773      break;
5774      }
5775
5776    repeat = 1;
5777    continue;
5778    }
5779
5780  len = 1;
5781#ifdef SUPPORT_UNICODE
5782  if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5783#endif
5784
5785  if (caseless && char_has_othercase(common, cc))
5786    {
5787#ifdef SUPPORT_UNICODE
5788    if (common->utf)
5789      {
5790      GETCHAR(chr, cc);
5791      if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5792        return consumed;
5793      }
5794    else
5795#endif
5796      {
5797      chr = *cc;
5798#ifdef SUPPORT_UNICODE
5799      if (common->ucp && chr > 127)
5800        othercase[0] = UCD_OTHERCASE(chr);
5801      else
5802#endif
5803        othercase[0] = TABLE_GET(chr, common->fcc, chr);
5804      }
5805    }
5806  else
5807    {
5808    caseless = FALSE;
5809    othercase[0] = 0; /* Stops compiler warning - PH */
5810    }
5811
5812  len_save = len;
5813  cc_save = cc;
5814  while (TRUE)
5815    {
5816    oc = othercase;
5817    do
5818      {
5819      len--;
5820      consumed++;
5821
5822      chr = *cc;
5823      add_prefix_char(*cc, chars, len == 0);
5824
5825      if (caseless)
5826        add_prefix_char(*oc, chars, len == 0);
5827
5828      if (--max_chars == 0)
5829        return consumed;
5830      chars++;
5831      cc++;
5832      oc++;
5833      }
5834    while (len > 0);
5835
5836    if (--repeat == 0)
5837      break;
5838
5839    len = len_save;
5840    cc = cc_save;
5841    }
5842
5843  repeat = 1;
5844  if (last)
5845    return consumed;
5846  }
5847}
5848
5849#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5850static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5851{
5852#if PCRE2_CODE_UNIT_WIDTH == 8
5853OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5854CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5855#elif PCRE2_CODE_UNIT_WIDTH == 16
5856OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5857CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5858#else
5859#error "Unknown code width"
5860#endif
5861}
5862#endif
5863
5864#include "pcre2_jit_simd_inc.h"
5865
5866#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5867
5868static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5869{
5870  sljit_s32 i, j, max_i = 0, max_j = 0;
5871  sljit_u32 max_pri = 0;
5872  PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5873
5874  for (i = max - 1; i >= 1; i--)
5875    {
5876    if (chars[i].last_count > 2)
5877      {
5878      a1 = chars[i].chars[0];
5879      a2 = chars[i].chars[1];
5880      a_pri = chars[i].last_count;
5881
5882      j = i - max_fast_forward_char_pair_offset();
5883      if (j < 0)
5884        j = 0;
5885
5886      while (j < i)
5887        {
5888        b_pri = chars[j].last_count;
5889        if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
5890          {
5891          b1 = chars[j].chars[0];
5892          b2 = chars[j].chars[1];
5893
5894          if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5895            {
5896            max_pri = a_pri + b_pri;
5897            max_i = i;
5898            max_j = j;
5899            }
5900          }
5901        j++;
5902        }
5903      }
5904    }
5905
5906if (max_pri == 0)
5907  return FALSE;
5908
5909fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5910return TRUE;
5911}
5912
5913#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5914
5915static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5916{
5917DEFINE_COMPILER;
5918struct sljit_label *start;
5919struct sljit_jump *match;
5920struct sljit_jump *partial_quit;
5921PCRE2_UCHAR mask;
5922BOOL has_match_end = (common->match_end_ptr != 0);
5923
5924SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5925
5926if (has_match_end)
5927  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5928
5929if (offset > 0)
5930  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5931
5932if (has_match_end)
5933  {
5934  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5935
5936  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5937  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
5938  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5939  }
5940
5941#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5942
5943if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5944  {
5945  fast_forward_char_simd(common, char1, char2, offset);
5946
5947  if (offset > 0)
5948    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5949
5950  if (has_match_end)
5951    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5952  return;
5953  }
5954
5955#endif
5956
5957start = LABEL();
5958
5959partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5960if (common->mode == PCRE2_JIT_COMPLETE)
5961  add_jump(compiler, &common->failed_match, partial_quit);
5962
5963OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5964OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5965
5966if (char1 == char2)
5967  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5968else
5969  {
5970  mask = char1 ^ char2;
5971  if (is_powerof2(mask))
5972    {
5973    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5974    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5975    }
5976  else
5977    {
5978    match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5979    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5980    JUMPHERE(match);
5981    }
5982  }
5983
5984#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5985if (common->utf && offset > 0)
5986  {
5987  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5988  jumpto_if_not_utf_char_start(compiler, TMP1, start);
5989  }
5990#endif
5991
5992OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5993
5994if (common->mode != PCRE2_JIT_COMPLETE)
5995  JUMPHERE(partial_quit);
5996
5997if (has_match_end)
5998  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5999}
6000
6001static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6002{
6003DEFINE_COMPILER;
6004struct sljit_label *start;
6005struct sljit_jump *match;
6006fast_forward_char_data chars[MAX_N_CHARS];
6007sljit_s32 offset;
6008PCRE2_UCHAR mask;
6009PCRE2_UCHAR *char_set, *char_set_end;
6010int i, max, from;
6011int range_right = -1, range_len;
6012sljit_u8 *update_table = NULL;
6013BOOL in_range;
6014sljit_u32 rec_count;
6015
6016for (i = 0; i < MAX_N_CHARS; i++)
6017  {
6018  chars[i].count = 0;
6019  chars[i].last_count = 0;
6020  }
6021
6022rec_count = 10000;
6023max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6024
6025if (max < 1)
6026  return FALSE;
6027
6028/* Convert last_count to priority. */
6029for (i = 0; i < max; i++)
6030  {
6031  SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6032
6033  if (chars[i].count == 1)
6034    {
6035    chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6036    /* Simplifies algorithms later. */
6037    chars[i].chars[1] = chars[i].chars[0];
6038    }
6039  else if (chars[i].count == 2)
6040    {
6041    SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6042
6043    if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6044      chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6045    else
6046      chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6047    }
6048  else
6049    chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6050  }
6051
6052#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6053if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6054  return TRUE;
6055#endif
6056
6057in_range = FALSE;
6058/* Prevent compiler "uninitialized" warning */
6059from = 0;
6060range_len = 4 /* minimum length */ - 1;
6061for (i = 0; i <= max; i++)
6062  {
6063  if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6064    {
6065    range_len = i - from;
6066    range_right = i - 1;
6067    }
6068
6069  if (i < max && chars[i].count < 255)
6070    {
6071    SLJIT_ASSERT(chars[i].count > 0);
6072    if (!in_range)
6073      {
6074      in_range = TRUE;
6075      from = i;
6076      }
6077    }
6078  else
6079    in_range = FALSE;
6080  }
6081
6082if (range_right >= 0)
6083  {
6084  update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6085  if (update_table == NULL)
6086    return TRUE;
6087  memset(update_table, IN_UCHARS(range_len), 256);
6088
6089  for (i = 0; i < range_len; i++)
6090    {
6091    SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6092
6093    char_set = chars[range_right - i].chars;
6094    char_set_end = char_set + chars[range_right - i].count;
6095    do
6096      {
6097      if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6098        update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6099      char_set++;
6100      }
6101    while (char_set < char_set_end);
6102    }
6103  }
6104
6105offset = -1;
6106/* Scan forward. */
6107for (i = 0; i < max; i++)
6108  {
6109  if (range_right == i)
6110    continue;
6111
6112  if (offset == -1)
6113    {
6114    if (chars[i].last_count >= 2)
6115      offset = i;
6116    }
6117  else if (chars[offset].last_count < chars[i].last_count)
6118    offset = i;
6119  }
6120
6121SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6122
6123if (range_right < 0)
6124  {
6125  if (offset < 0)
6126    return FALSE;
6127  /* Works regardless the value is 1 or 2. */
6128  fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6129  return TRUE;
6130  }
6131
6132SLJIT_ASSERT(range_right != offset);
6133
6134if (common->match_end_ptr != 0)
6135  {
6136  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6137  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6138  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6139  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6140  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6141  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6142  }
6143else
6144  {
6145  OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146  add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6147  }
6148
6149SLJIT_ASSERT(range_right >= 0);
6150
6151if (!HAS_VIRTUAL_REGISTERS)
6152  OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6153
6154start = LABEL();
6155add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6156
6157#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6158OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6159#else
6160OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6161#endif
6162
6163if (!HAS_VIRTUAL_REGISTERS)
6164  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6165else
6166  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6167
6168OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6169CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6170
6171if (offset >= 0)
6172  {
6173  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6174  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6175
6176  if (chars[offset].count == 1)
6177    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6178  else
6179    {
6180    mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6181    if (is_powerof2(mask))
6182      {
6183      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6184      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6185      }
6186    else
6187      {
6188      match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6189      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6190      JUMPHERE(match);
6191      }
6192    }
6193  }
6194
6195#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6196if (common->utf && offset != 0)
6197  {
6198  if (offset < 0)
6199    {
6200    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6201    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6202    }
6203  else
6204    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6205
6206  jumpto_if_not_utf_char_start(compiler, TMP1, start);
6207
6208  if (offset < 0)
6209    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6210  }
6211#endif
6212
6213if (offset >= 0)
6214  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6215
6216if (common->match_end_ptr != 0)
6217  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6218else
6219  OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6220return TRUE;
6221}
6222
6223static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6224{
6225PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6226PCRE2_UCHAR oc;
6227
6228oc = first_char;
6229if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6230  {
6231  oc = TABLE_GET(first_char, common->fcc, first_char);
6232#if defined SUPPORT_UNICODE
6233  if (first_char > 127 && (common->utf || common->ucp))
6234    oc = UCD_OTHERCASE(first_char);
6235#endif
6236  }
6237
6238fast_forward_first_char2(common, first_char, oc, 0);
6239}
6240
6241static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6242{
6243DEFINE_COMPILER;
6244struct sljit_label *loop;
6245struct sljit_jump *lastchar = NULL;
6246struct sljit_jump *firstchar;
6247struct sljit_jump *quit = NULL;
6248struct sljit_jump *foundcr = NULL;
6249struct sljit_jump *notfoundnl;
6250jump_list *newline = NULL;
6251
6252if (common->match_end_ptr != 0)
6253  {
6254  OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6255  OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6256  }
6257
6258if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6259  {
6260#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6261  if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6262    {
6263    if (HAS_VIRTUAL_REGISTERS)
6264      {
6265      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6266      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6267      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6268      }
6269    else
6270      {
6271      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6272      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6273      }
6274    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6275
6276    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6277    OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
6278    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6279#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6280    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6281#endif
6282    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6283
6284    fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6285    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6286    }
6287  else
6288#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6289    {
6290    lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6291    if (HAS_VIRTUAL_REGISTERS)
6292      {
6293      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6294      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6295      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6296      }
6297    else
6298      {
6299      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6300      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6301      }
6302    firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6303
6304    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6305    OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
6306    OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6307#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6308    OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6309#endif
6310    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6311
6312    loop = LABEL();
6313    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6314    quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6315    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6316    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6317    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6318    CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6319
6320    JUMPHERE(quit);
6321    JUMPHERE(lastchar);
6322    }
6323
6324  JUMPHERE(firstchar);
6325
6326  if (common->match_end_ptr != 0)
6327    OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6328  return;
6329  }
6330
6331if (HAS_VIRTUAL_REGISTERS)
6332  {
6333  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6334  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6335  }
6336else
6337  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6338
6339/* Example: match /^/ to \r\n from offset 1. */
6340firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6341
6342if (common->nltype == NLTYPE_ANY)
6343  move_back(common, NULL, FALSE);
6344else
6345  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6346
6347loop = LABEL();
6348common->ff_newline_shortcut = loop;
6349
6350#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6351if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6352  {
6353  if (common->nltype == NLTYPE_ANYCRLF)
6354    {
6355    fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6356    if (common->mode != PCRE2_JIT_COMPLETE)
6357      lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6358
6359    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6360    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6361    quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6362    }
6363   else
6364    {
6365    fast_forward_char_simd(common, common->newline, common->newline, 0);
6366
6367    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6368    if (common->mode != PCRE2_JIT_COMPLETE)
6369      {
6370      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
6371      CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6372      }
6373    }
6374  }
6375else
6376#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6377  {
6378  read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6379  lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6380  if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6381    foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6382  check_newlinechar(common, common->nltype, &newline, FALSE);
6383  set_jumps(newline, loop);
6384  }
6385
6386if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6387  {
6388  if (quit == NULL)
6389    {
6390    quit = JUMP(SLJIT_JUMP);
6391    JUMPHERE(foundcr);
6392    }
6393
6394  notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6395  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6396  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
6397  OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6398#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6399  OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6400#endif
6401  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6402  JUMPHERE(notfoundnl);
6403  JUMPHERE(quit);
6404  }
6405
6406if (lastchar)
6407  JUMPHERE(lastchar);
6408JUMPHERE(firstchar);
6409
6410if (common->match_end_ptr != 0)
6411  OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6412}
6413
6414static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6415
6416static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6417{
6418DEFINE_COMPILER;
6419const sljit_u8 *start_bits = common->re->start_bitmap;
6420struct sljit_label *start;
6421struct sljit_jump *partial_quit;
6422#if PCRE2_CODE_UNIT_WIDTH != 8
6423struct sljit_jump *found = NULL;
6424#endif
6425jump_list *matches = NULL;
6426
6427if (common->match_end_ptr != 0)
6428  {
6429  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6430  OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6431  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6432  OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
6433  CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6434  }
6435
6436start = LABEL();
6437
6438partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6439if (common->mode == PCRE2_JIT_COMPLETE)
6440  add_jump(compiler, &common->failed_match, partial_quit);
6441
6442OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6443OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444
6445if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6446  {
6447#if PCRE2_CODE_UNIT_WIDTH != 8
6448  if ((start_bits[31] & 0x80) != 0)
6449    found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6450  else
6451    CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6452#elif defined SUPPORT_UNICODE
6453  if (common->utf && is_char7_bitset(start_bits, FALSE))
6454    CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6455#endif
6456  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6457  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6458  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6459  if (!HAS_VIRTUAL_REGISTERS)
6460    {
6461    OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6462    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
6463    }
6464  else
6465    {
6466    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6467    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
6468    }
6469  JUMPTO(SLJIT_ZERO, start);
6470  }
6471else
6472  set_jumps(matches, start);
6473
6474#if PCRE2_CODE_UNIT_WIDTH != 8
6475if (found != NULL)
6476  JUMPHERE(found);
6477#endif
6478
6479OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6480
6481if (common->mode != PCRE2_JIT_COMPLETE)
6482  JUMPHERE(partial_quit);
6483
6484if (common->match_end_ptr != 0)
6485  OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6486}
6487
6488static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6489{
6490DEFINE_COMPILER;
6491struct sljit_label *loop;
6492struct sljit_jump *toolong;
6493struct sljit_jump *already_found;
6494struct sljit_jump *found;
6495struct sljit_jump *found_oc = NULL;
6496jump_list *not_found = NULL;
6497sljit_u32 oc, bit;
6498
6499SLJIT_ASSERT(common->req_char_ptr != 0);
6500OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6501OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6502toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6503already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6504
6505if (has_firstchar)
6506  OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6507else
6508  OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6509
6510oc = req_char;
6511if (caseless)
6512  {
6513  oc = TABLE_GET(req_char, common->fcc, req_char);
6514#if defined SUPPORT_UNICODE
6515  if (req_char > 127 && (common->utf || common->ucp))
6516    oc = UCD_OTHERCASE(req_char);
6517#endif
6518  }
6519
6520#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6521if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6522  {
6523  not_found = fast_requested_char_simd(common, req_char, oc);
6524  }
6525else
6526#endif
6527  {
6528  loop = LABEL();
6529  add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6530
6531  OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6532
6533  if (req_char == oc)
6534    found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6535  else
6536    {
6537    bit = req_char ^ oc;
6538    if (is_powerof2(bit))
6539      {
6540       OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6541      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6542      }
6543    else
6544      {
6545      found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6546      found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6547      }
6548    }
6549  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6550  JUMPTO(SLJIT_JUMP, loop);
6551
6552  JUMPHERE(found);
6553  if (found_oc)
6554    JUMPHERE(found_oc);
6555  }
6556
6557OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6558
6559JUMPHERE(already_found);
6560JUMPHERE(toolong);
6561return not_found;
6562}
6563
6564static void do_revertframes(compiler_common *common)
6565{
6566DEFINE_COMPILER;
6567struct sljit_jump *jump;
6568struct sljit_label *mainloop;
6569
6570sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6571GET_LOCAL_BASE(TMP1, 0, 0);
6572
6573/* Drop frames until we reach STACK_TOP. */
6574mainloop = LABEL();
6575OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
6576jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6577
6578OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6579if (HAS_VIRTUAL_REGISTERS)
6580  {
6581  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6582  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6583  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6584  }
6585else
6586  {
6587  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6588  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
6589  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
6590  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6591  GET_LOCAL_BASE(TMP1, 0, 0);
6592  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6593  }
6594JUMPTO(SLJIT_JUMP, mainloop);
6595
6596JUMPHERE(jump);
6597jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6598/* End of reverting values. */
6599OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6600
6601JUMPHERE(jump);
6602OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
6603OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6604if (HAS_VIRTUAL_REGISTERS)
6605  {
6606  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6607  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6608  }
6609else
6610  {
6611  OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
6612  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
6613  OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6614  }
6615JUMPTO(SLJIT_JUMP, mainloop);
6616}
6617
6618static void check_wordboundary(compiler_common *common)
6619{
6620DEFINE_COMPILER;
6621struct sljit_jump *skipread;
6622jump_list *skipread_list = NULL;
6623#ifdef SUPPORT_UNICODE
6624struct sljit_label *valid_utf;
6625jump_list *invalid_utf1 = NULL;
6626#endif /* SUPPORT_UNICODE */
6627jump_list *invalid_utf2 = NULL;
6628#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6629struct sljit_jump *jump;
6630#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6631
6632SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6633
6634sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6635/* Get type of the previous char, and put it to TMP3. */
6636OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6637OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6638OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6639skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6640
6641#ifdef SUPPORT_UNICODE
6642if (common->invalid_utf)
6643  {
6644  peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6645
6646  if (common->mode != PCRE2_JIT_COMPLETE)
6647    {
6648    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6649    OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6650    move_back(common, NULL, TRUE);
6651    check_start_used_ptr(common);
6652    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6653    OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6654    }
6655  }
6656else
6657#endif /* SUPPORT_UNICODE */
6658  {
6659  if (common->mode == PCRE2_JIT_COMPLETE)
6660    peek_char_back(common, READ_CHAR_MAX, NULL);
6661  else
6662    {
6663    move_back(common, NULL, TRUE);
6664    check_start_used_ptr(common);
6665    read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6666    }
6667  }
6668
6669/* Testing char type. */
6670#ifdef SUPPORT_UNICODE
6671if (common->ucp)
6672  {
6673  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6674  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6675  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6676  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6677  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6678  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6679  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6680  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6681  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6682  JUMPHERE(jump);
6683  OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6684  }
6685else
6686#endif /* SUPPORT_UNICODE */
6687  {
6688#if PCRE2_CODE_UNIT_WIDTH != 8
6689  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6690#elif defined SUPPORT_UNICODE
6691  /* Here TMP3 has already been zeroed. */
6692  jump = NULL;
6693  if (common->utf)
6694    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6695#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6696  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6697  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6698  OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6699#if PCRE2_CODE_UNIT_WIDTH != 8
6700  JUMPHERE(jump);
6701#elif defined SUPPORT_UNICODE
6702  if (jump != NULL)
6703    JUMPHERE(jump);
6704#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6705  }
6706JUMPHERE(skipread);
6707
6708OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6709check_str_end(common, &skipread_list);
6710peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6711
6712/* Testing char type. This is a code duplication. */
6713#ifdef SUPPORT_UNICODE
6714
6715valid_utf = LABEL();
6716
6717if (common->ucp)
6718  {
6719  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6720  jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6721  add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6722  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6723  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6724  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6725  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6726  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6727  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6728  JUMPHERE(jump);
6729  }
6730else
6731#endif /* SUPPORT_UNICODE */
6732  {
6733#if PCRE2_CODE_UNIT_WIDTH != 8
6734  /* TMP2 may be destroyed by peek_char. */
6735  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6736  jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6737#elif defined SUPPORT_UNICODE
6738  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6739  jump = NULL;
6740  if (common->utf)
6741    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6742#endif
6743  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6744  OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6745  OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6746#if PCRE2_CODE_UNIT_WIDTH != 8
6747  JUMPHERE(jump);
6748#elif defined SUPPORT_UNICODE
6749  if (jump != NULL)
6750    JUMPHERE(jump);
6751#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6752  }
6753set_jumps(skipread_list, LABEL());
6754
6755OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6756OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6757OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6758
6759#ifdef SUPPORT_UNICODE
6760if (common->invalid_utf)
6761  {
6762  set_jumps(invalid_utf1, LABEL());
6763
6764  peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6765  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6766
6767  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6768  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6769  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6770
6771  set_jumps(invalid_utf2, LABEL());
6772  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6773  OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6774  OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6775  }
6776#endif /* SUPPORT_UNICODE */
6777}
6778
6779static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6780{
6781/* May destroy TMP1. */
6782DEFINE_COMPILER;
6783int ranges[MAX_CLASS_RANGE_SIZE];
6784sljit_u8 bit, cbit, all;
6785int i, byte, length = 0;
6786
6787bit = bits[0] & 0x1;
6788/* All bits will be zero or one (since bit is zero or one). */
6789all = -bit;
6790
6791for (i = 0; i < 256; )
6792  {
6793  byte = i >> 3;
6794  if ((i & 0x7) == 0 && bits[byte] == all)
6795    i += 8;
6796  else
6797    {
6798    cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6799    if (cbit != bit)
6800      {
6801      if (length >= MAX_CLASS_RANGE_SIZE)
6802        return FALSE;
6803      ranges[length] = i;
6804      length++;
6805      bit = cbit;
6806      all = -cbit;
6807      }
6808    i++;
6809    }
6810  }
6811
6812if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6813  {
6814  if (length >= MAX_CLASS_RANGE_SIZE)
6815    return FALSE;
6816  ranges[length] = 256;
6817  length++;
6818  }
6819
6820if (length < 0 || length > 4)
6821  return FALSE;
6822
6823bit = bits[0] & 0x1;
6824if (invert) bit ^= 0x1;
6825
6826/* No character is accepted. */
6827if (length == 0 && bit == 0)
6828  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6829
6830switch(length)
6831  {
6832  case 0:
6833  /* When bit != 0, all characters are accepted. */
6834  return TRUE;
6835
6836  case 1:
6837  add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6838  return TRUE;
6839
6840  case 2:
6841  if (ranges[0] + 1 != ranges[1])
6842    {
6843    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6844    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6845    }
6846  else
6847    add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6848  return TRUE;
6849
6850  case 3:
6851  if (bit != 0)
6852    {
6853    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6854    if (ranges[0] + 1 != ranges[1])
6855      {
6856      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6857      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6858      }
6859    else
6860      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6861    return TRUE;
6862    }
6863
6864  add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6865  if (ranges[1] + 1 != ranges[2])
6866    {
6867    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6868    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6869    }
6870  else
6871    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6872  return TRUE;
6873
6874  case 4:
6875  if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6876      && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6877      && (ranges[1] & (ranges[2] - ranges[0])) == 0
6878      && is_powerof2(ranges[2] - ranges[0]))
6879    {
6880    SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6881    OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6882    if (ranges[2] + 1 != ranges[3])
6883      {
6884      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6885      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6886      }
6887    else
6888      add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6889    return TRUE;
6890    }
6891
6892  if (bit != 0)
6893    {
6894    i = 0;
6895    if (ranges[0] + 1 != ranges[1])
6896      {
6897      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6898      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6899      i = ranges[0];
6900      }
6901    else
6902      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6903
6904    if (ranges[2] + 1 != ranges[3])
6905      {
6906      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6907      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6908      }
6909    else
6910      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6911    return TRUE;
6912    }
6913
6914  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6915  add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6916  if (ranges[1] + 1 != ranges[2])
6917    {
6918    OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6919    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6920    }
6921  else
6922    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6923  return TRUE;
6924
6925  default:
6926  SLJIT_UNREACHABLE();
6927  return FALSE;
6928  }
6929}
6930
6931static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6932{
6933/* May destroy TMP1. */
6934DEFINE_COMPILER;
6935uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6936uint8_t byte;
6937sljit_s32 type;
6938int i, j, k, len, c;
6939
6940if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6941  return FALSE;
6942
6943len = 0;
6944
6945for (i = 0; i < 32; i++)
6946  {
6947  byte = bits[i];
6948
6949  if (nclass)
6950    byte = ~byte;
6951
6952  j = 0;
6953  while (byte != 0)
6954    {
6955    if (byte & 0x1)
6956      {
6957      c = i * 8 + j;
6958
6959      k = len;
6960
6961      if ((c & 0x20) != 0)
6962        {
6963        for (k = 0; k < len; k++)
6964          if (char_list[k] == c - 0x20)
6965            {
6966            char_list[k] |= 0x120;
6967            break;
6968            }
6969        }
6970
6971      if (k == len)
6972        {
6973        if (len >= MAX_CLASS_CHARS_SIZE)
6974          return FALSE;
6975
6976        char_list[len++] = (uint16_t) c;
6977        }
6978      }
6979
6980    byte >>= 1;
6981    j++;
6982    }
6983  }
6984
6985if (len == 0) return FALSE;  /* Should never occur, but stops analyzers complaining. */
6986
6987i = 0;
6988j = 0;
6989
6990if (char_list[0] == 0)
6991  {
6992  i++;
6993  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
6994  OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6995  }
6996else
6997  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6998
6999while (i < len)
7000  {
7001  if ((char_list[i] & 0x100) != 0)
7002    j++;
7003  else
7004    {
7005    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
7006    CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7007    }
7008  i++;
7009  }
7010
7011if (j != 0)
7012  {
7013  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7014
7015  for (i = 0; i < len; i++)
7016    if ((char_list[i] & 0x100) != 0)
7017      {
7018      j--;
7019      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
7020      CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7021      }
7022  }
7023
7024if (invert)
7025  nclass = !nclass;
7026
7027type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7028add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7029return TRUE;
7030}
7031
7032static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7033{
7034/* May destroy TMP1. */
7035if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7036  return TRUE;
7037return optimize_class_chars(common, bits, nclass, invert, backtracks);
7038}
7039
7040static void check_anynewline(compiler_common *common)
7041{
7042/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7043DEFINE_COMPILER;
7044
7045sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7046
7047OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7048OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7049OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7050OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7051#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7052#if PCRE2_CODE_UNIT_WIDTH == 8
7053if (common->utf)
7054  {
7055#endif
7056  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7057  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7058  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7059#if PCRE2_CODE_UNIT_WIDTH == 8
7060  }
7061#endif
7062#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7063OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7064OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7065}
7066
7067static void check_hspace(compiler_common *common)
7068{
7069/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7070DEFINE_COMPILER;
7071
7072sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7073
7074OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
7075OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7076OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
7077OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7078OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
7079#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7080#if PCRE2_CODE_UNIT_WIDTH == 8
7081if (common->utf)
7082  {
7083#endif
7084  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7085  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
7086  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7087  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
7088  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7089  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7090  OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7091  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7092  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7093  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7094  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7095  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7096  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7097#if PCRE2_CODE_UNIT_WIDTH == 8
7098  }
7099#endif
7100#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7101OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7102
7103OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7104}
7105
7106static void check_vspace(compiler_common *common)
7107{
7108/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7109DEFINE_COMPILER;
7110
7111sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7112
7113OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7114OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7115OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7116OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7117#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7118#if PCRE2_CODE_UNIT_WIDTH == 8
7119if (common->utf)
7120  {
7121#endif
7122  OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7123  OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7124  OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7125#if PCRE2_CODE_UNIT_WIDTH == 8
7126  }
7127#endif
7128#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7129OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7130
7131OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7132}
7133
7134static void do_casefulcmp(compiler_common *common)
7135{
7136DEFINE_COMPILER;
7137struct sljit_jump *jump;
7138struct sljit_label *label;
7139int char1_reg;
7140int char2_reg;
7141
7142if (HAS_VIRTUAL_REGISTERS)
7143  {
7144  char1_reg = STR_END;
7145  char2_reg = STACK_TOP;
7146  }
7147else
7148  {
7149  char1_reg = TMP3;
7150  char2_reg = RETURN_ADDR;
7151  }
7152
7153sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7154OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7155
7156if (char1_reg == STR_END)
7157  {
7158  OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7159  OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7160  }
7161
7162if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7163  {
7164  label = LABEL();
7165  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7166  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7167  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7168  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7169  JUMPTO(SLJIT_NOT_ZERO, label);
7170
7171  JUMPHERE(jump);
7172  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7173  }
7174else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7175  {
7176  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7177  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7178
7179  label = LABEL();
7180  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7181  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7182  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7183  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7184  JUMPTO(SLJIT_NOT_ZERO, label);
7185
7186  JUMPHERE(jump);
7187  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7188  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189  }
7190else
7191  {
7192  label = LABEL();
7193  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7194  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7195  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7196  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7197  jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7198  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7199  JUMPTO(SLJIT_NOT_ZERO, label);
7200
7201  JUMPHERE(jump);
7202  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7203  }
7204
7205if (char1_reg == STR_END)
7206  {
7207  OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7208  OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7209  }
7210
7211OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7212}
7213
7214static void do_caselesscmp(compiler_common *common)
7215{
7216DEFINE_COMPILER;
7217struct sljit_jump *jump;
7218struct sljit_label *label;
7219int char1_reg = STR_END;
7220int char2_reg;
7221int lcc_table;
7222int opt_type = 0;
7223
7224if (HAS_VIRTUAL_REGISTERS)
7225  {
7226  char2_reg = STACK_TOP;
7227  lcc_table = STACK_LIMIT;
7228  }
7229else
7230  {
7231  char2_reg = RETURN_ADDR;
7232  lcc_table = TMP3;
7233  }
7234
7235if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7236  opt_type = 1;
7237else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7238  opt_type = 2;
7239
7240sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7241OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7242
7243OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7244
7245if (char2_reg == STACK_TOP)
7246  {
7247  OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7248  OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7249  }
7250
7251OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7252
7253if (opt_type == 1)
7254  {
7255  label = LABEL();
7256  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7257  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7258  }
7259else if (opt_type == 2)
7260  {
7261  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7262  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7263
7264  label = LABEL();
7265  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7266  sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7267  }
7268else
7269  {
7270  label = LABEL();
7271  OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7272  OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7273  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7274  }
7275
7276#if PCRE2_CODE_UNIT_WIDTH != 8
7277jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7278#endif
7279OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7280#if PCRE2_CODE_UNIT_WIDTH != 8
7281JUMPHERE(jump);
7282jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7283#endif
7284OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7285#if PCRE2_CODE_UNIT_WIDTH != 8
7286JUMPHERE(jump);
7287#endif
7288
7289if (opt_type == 0)
7290  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7291
7292jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7293OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7294JUMPTO(SLJIT_NOT_ZERO, label);
7295
7296JUMPHERE(jump);
7297OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7298
7299if (opt_type == 2)
7300  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7301
7302if (char2_reg == STACK_TOP)
7303  {
7304  OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7305  OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7306  }
7307
7308OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7309OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7310}
7311
7312static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7313    compare_context *context, jump_list **backtracks)
7314{
7315DEFINE_COMPILER;
7316unsigned int othercasebit = 0;
7317PCRE2_SPTR othercasechar = NULL;
7318#ifdef SUPPORT_UNICODE
7319int utflength;
7320#endif
7321
7322if (caseless && char_has_othercase(common, cc))
7323  {
7324  othercasebit = char_get_othercase_bit(common, cc);
7325  SLJIT_ASSERT(othercasebit);
7326  /* Extracting bit difference info. */
7327#if PCRE2_CODE_UNIT_WIDTH == 8
7328  othercasechar = cc + (othercasebit >> 8);
7329  othercasebit &= 0xff;
7330#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7331  /* Note that this code only handles characters in the BMP. If there
7332  ever are characters outside the BMP whose othercase differs in only one
7333  bit from itself (there currently are none), this code will need to be
7334  revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7335  othercasechar = cc + (othercasebit >> 9);
7336  if ((othercasebit & 0x100) != 0)
7337    othercasebit = (othercasebit & 0xff) << 8;
7338  else
7339    othercasebit &= 0xff;
7340#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7341  }
7342
7343if (context->sourcereg == -1)
7344  {
7345#if PCRE2_CODE_UNIT_WIDTH == 8
7346#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7347  if (context->length >= 4)
7348    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7349  else if (context->length >= 2)
7350    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7351  else
7352#endif
7353    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7354#elif PCRE2_CODE_UNIT_WIDTH == 16
7355#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7356  if (context->length >= 4)
7357    OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7358  else
7359#endif
7360    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7361#elif PCRE2_CODE_UNIT_WIDTH == 32
7362  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7363#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7364  context->sourcereg = TMP2;
7365  }
7366
7367#ifdef SUPPORT_UNICODE
7368utflength = 1;
7369if (common->utf && HAS_EXTRALEN(*cc))
7370  utflength += GET_EXTRALEN(*cc);
7371
7372do
7373  {
7374#endif
7375
7376  context->length -= IN_UCHARS(1);
7377#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7378
7379  /* Unaligned read is supported. */
7380  if (othercasebit != 0 && othercasechar == cc)
7381    {
7382    context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7383    context->oc.asuchars[context->ucharptr] = othercasebit;
7384    }
7385  else
7386    {
7387    context->c.asuchars[context->ucharptr] = *cc;
7388    context->oc.asuchars[context->ucharptr] = 0;
7389    }
7390  context->ucharptr++;
7391
7392#if PCRE2_CODE_UNIT_WIDTH == 8
7393  if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7394#else
7395  if (context->ucharptr >= 2 || context->length == 0)
7396#endif
7397    {
7398    if (context->length >= 4)
7399      OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7400    else if (context->length >= 2)
7401      OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7402#if PCRE2_CODE_UNIT_WIDTH == 8
7403    else if (context->length >= 1)
7404      OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7405#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7406    context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7407
7408    switch(context->ucharptr)
7409      {
7410      case 4 / sizeof(PCRE2_UCHAR):
7411      if (context->oc.asint != 0)
7412        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7413      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7414      break;
7415
7416      case 2 / sizeof(PCRE2_UCHAR):
7417      if (context->oc.asushort != 0)
7418        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7419      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7420      break;
7421
7422#if PCRE2_CODE_UNIT_WIDTH == 8
7423      case 1:
7424      if (context->oc.asbyte != 0)
7425        OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7426      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7427      break;
7428#endif
7429
7430      default:
7431      SLJIT_UNREACHABLE();
7432      break;
7433      }
7434    context->ucharptr = 0;
7435    }
7436
7437#else
7438
7439  /* Unaligned read is unsupported or in 32 bit mode. */
7440  if (context->length >= 1)
7441    OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7442
7443  context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7444
7445  if (othercasebit != 0 && othercasechar == cc)
7446    {
7447    OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7448    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7449    }
7450  else
7451    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7452
7453#endif
7454
7455  cc++;
7456#ifdef SUPPORT_UNICODE
7457  utflength--;
7458  }
7459while (utflength > 0);
7460#endif
7461
7462return cc;
7463}
7464
7465#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7466
7467#define SET_TYPE_OFFSET(value) \
7468  if ((value) != typeoffset) \
7469    { \
7470    if ((value) < typeoffset) \
7471      OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7472    else \
7473      OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7474    } \
7475  typeoffset = (value);
7476
7477#define SET_CHAR_OFFSET(value) \
7478  if ((value) != charoffset) \
7479    { \
7480    if ((value) < charoffset) \
7481      OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7482    else \
7483      OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7484    } \
7485  charoffset = (value);
7486
7487static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7488
7489#ifdef SUPPORT_UNICODE
7490#define XCLASS_SAVE_CHAR 0x001
7491#define XCLASS_CHAR_SAVED 0x002
7492#define XCLASS_HAS_TYPE 0x004
7493#define XCLASS_HAS_SCRIPT 0x008
7494#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7495#define XCLASS_HAS_BOOL 0x020
7496#define XCLASS_HAS_BIDICL 0x040
7497#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7498#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7499#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7500#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7501
7502#endif /* SUPPORT_UNICODE */
7503
7504static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7505{
7506DEFINE_COMPILER;
7507jump_list *found = NULL;
7508jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7509sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7510struct sljit_jump *jump = NULL;
7511PCRE2_SPTR ccbegin;
7512int compares, invertcmp, numberofcmps;
7513#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7514BOOL utf = common->utf;
7515#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7516
7517#ifdef SUPPORT_UNICODE
7518sljit_u32 unicode_status = 0;
7519int typereg = TMP1;
7520const sljit_u32 *other_cases;
7521sljit_uw typeoffset;
7522#endif /* SUPPORT_UNICODE */
7523
7524/* Scanning the necessary info. */
7525cc++;
7526ccbegin = cc;
7527compares = 0;
7528
7529if (cc[-1] & XCL_MAP)
7530  {
7531  min = 0;
7532  cc += 32 / sizeof(PCRE2_UCHAR);
7533  }
7534
7535while (*cc != XCL_END)
7536  {
7537  compares++;
7538  if (*cc == XCL_SINGLE)
7539    {
7540    cc ++;
7541    GETCHARINCTEST(c, cc);
7542    if (c > max) max = c;
7543    if (c < min) min = c;
7544#ifdef SUPPORT_UNICODE
7545    unicode_status |= XCLASS_SAVE_CHAR;
7546#endif /* SUPPORT_UNICODE */
7547    }
7548  else if (*cc == XCL_RANGE)
7549    {
7550    cc ++;
7551    GETCHARINCTEST(c, cc);
7552    if (c < min) min = c;
7553    GETCHARINCTEST(c, cc);
7554    if (c > max) max = c;
7555#ifdef SUPPORT_UNICODE
7556    unicode_status |= XCLASS_SAVE_CHAR;
7557#endif /* SUPPORT_UNICODE */
7558    }
7559#ifdef SUPPORT_UNICODE
7560  else
7561    {
7562    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7563    cc++;
7564    if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
7565      {
7566      other_cases = PRIV(ucd_caseless_sets) + cc[1];
7567      while (*other_cases != NOTACHAR)
7568        {
7569        if (*other_cases > max) max = *other_cases;
7570        if (*other_cases < min) min = *other_cases;
7571        other_cases++;
7572        }
7573      }
7574    else
7575      {
7576      max = READ_CHAR_MAX;
7577      min = 0;
7578      }
7579
7580    switch(*cc)
7581      {
7582      case PT_ANY:
7583      /* Any either accepts everything or ignored. */
7584      if (cc[-1] == XCL_PROP)
7585        {
7586        compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7587        if (list == backtracks)
7588          add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7589        return;
7590        }
7591      break;
7592
7593      case PT_LAMP:
7594      case PT_GC:
7595      case PT_PC:
7596      case PT_ALNUM:
7597      unicode_status |= XCLASS_HAS_TYPE;
7598      break;
7599
7600      case PT_SCX:
7601      unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7602      if (cc[-1] == XCL_NOTPROP)
7603        {
7604        unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7605        break;
7606        }
7607      compares++;
7608      /* Fall through */
7609
7610      case PT_SC:
7611      unicode_status |= XCLASS_HAS_SCRIPT;
7612      break;
7613
7614      case PT_SPACE:
7615      case PT_PXSPACE:
7616      case PT_WORD:
7617      case PT_PXGRAPH:
7618      case PT_PXPRINT:
7619      case PT_PXPUNCT:
7620      unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
7621      break;
7622
7623      case PT_CLIST:
7624      case PT_UCNC:
7625      unicode_status |= XCLASS_SAVE_CHAR;
7626      break;
7627
7628      case PT_BOOL:
7629      unicode_status |= XCLASS_HAS_BOOL;
7630      break;
7631
7632      case PT_BIDICL:
7633      unicode_status |= XCLASS_HAS_BIDICL;
7634      break;
7635
7636      default:
7637      SLJIT_UNREACHABLE();
7638      break;
7639      }
7640    cc += 2;
7641    }
7642#endif /* SUPPORT_UNICODE */
7643  }
7644SLJIT_ASSERT(compares > 0);
7645
7646/* We are not necessary in utf mode even in 8 bit mode. */
7647cc = ccbegin;
7648if ((cc[-1] & XCL_NOT) != 0)
7649  read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7650else
7651  {
7652#ifdef SUPPORT_UNICODE
7653  read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
7654#else /* !SUPPORT_UNICODE */
7655  read_char(common, min, max, NULL, 0);
7656#endif /* SUPPORT_UNICODE */
7657  }
7658
7659if ((cc[-1] & XCL_HASPROP) == 0)
7660  {
7661  if ((cc[-1] & XCL_MAP) != 0)
7662    {
7663    jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7664    if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7665      {
7666      OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7667      OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7668      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7669      OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7670      OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7671      add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7672      }
7673
7674    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7675    JUMPHERE(jump);
7676
7677    cc += 32 / sizeof(PCRE2_UCHAR);
7678    }
7679  else
7680    {
7681    OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7682    add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7683    }
7684  }
7685else if ((cc[-1] & XCL_MAP) != 0)
7686  {
7687  OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7688#ifdef SUPPORT_UNICODE
7689  unicode_status |= XCLASS_CHAR_SAVED;
7690#endif /* SUPPORT_UNICODE */
7691  if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7692    {
7693#if PCRE2_CODE_UNIT_WIDTH == 8
7694    jump = NULL;
7695    if (common->utf)
7696#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7697      jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7698
7699    OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7700    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7701    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7702    OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7703    OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
7704    add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7705
7706#if PCRE2_CODE_UNIT_WIDTH == 8
7707    if (common->utf)
7708#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7709      JUMPHERE(jump);
7710    }
7711
7712  OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7713  cc += 32 / sizeof(PCRE2_UCHAR);
7714  }
7715
7716#ifdef SUPPORT_UNICODE
7717if (unicode_status & XCLASS_NEEDS_UCD)
7718  {
7719  if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
7720    OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7721
7722#if PCRE2_CODE_UNIT_WIDTH == 32
7723  if (!common->utf)
7724    {
7725    jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7726    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7727    JUMPHERE(jump);
7728    }
7729#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7730
7731  OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7732  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7733  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7734  OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7735  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7736  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7737  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7738  OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7739  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7740  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7741  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7742
7743  ccbegin = cc;
7744
7745  if (unicode_status & XCLASS_HAS_BIDICL)
7746    {
7747    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7748    OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
7749
7750    while (*cc != XCL_END)
7751      {
7752      if (*cc == XCL_SINGLE)
7753        {
7754        cc ++;
7755        GETCHARINCTEST(c, cc);
7756        }
7757      else if (*cc == XCL_RANGE)
7758        {
7759        cc ++;
7760        GETCHARINCTEST(c, cc);
7761        GETCHARINCTEST(c, cc);
7762        }
7763      else
7764        {
7765        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7766        cc++;
7767        if (*cc == PT_BIDICL)
7768          {
7769          compares--;
7770          invertcmp = (compares == 0 && list != backtracks);
7771          if (cc[-1] == XCL_NOTPROP)
7772            invertcmp ^= 0x1;
7773          jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7774          add_jump(compiler, compares > 0 ? list : backtracks, jump);
7775          }
7776        cc += 2;
7777        }
7778      }
7779
7780    cc = ccbegin;
7781    }
7782
7783  if (unicode_status & XCLASS_HAS_BOOL)
7784    {
7785    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7786    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7787    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7788
7789    while (*cc != XCL_END)
7790      {
7791      if (*cc == XCL_SINGLE)
7792        {
7793        cc ++;
7794        GETCHARINCTEST(c, cc);
7795        }
7796      else if (*cc == XCL_RANGE)
7797        {
7798        cc ++;
7799        GETCHARINCTEST(c, cc);
7800        GETCHARINCTEST(c, cc);
7801        }
7802      else
7803        {
7804        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7805        cc++;
7806        if (*cc == PT_BOOL)
7807          {
7808          compares--;
7809          invertcmp = (compares == 0 && list != backtracks);
7810          if (cc[-1] == XCL_NOTPROP)
7811            invertcmp ^= 0x1;
7812
7813          OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7814          add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7815          }
7816        cc += 2;
7817        }
7818      }
7819
7820    cc = ccbegin;
7821    }
7822
7823  if (unicode_status & XCLASS_HAS_SCRIPT)
7824    {
7825    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7826
7827    while (*cc != XCL_END)
7828      {
7829      if (*cc == XCL_SINGLE)
7830        {
7831        cc ++;
7832        GETCHARINCTEST(c, cc);
7833        }
7834      else if (*cc == XCL_RANGE)
7835        {
7836        cc ++;
7837        GETCHARINCTEST(c, cc);
7838        GETCHARINCTEST(c, cc);
7839        }
7840      else
7841        {
7842        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7843        cc++;
7844        switch (*cc)
7845          {
7846          case PT_SCX:
7847          if (cc[-1] == XCL_NOTPROP)
7848            break;
7849          /* Fall through */
7850
7851          case PT_SC:
7852          compares--;
7853          invertcmp = (compares == 0 && list != backtracks);
7854          if (cc[-1] == XCL_NOTPROP)
7855            invertcmp ^= 0x1;
7856
7857          add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
7858          }
7859        cc += 2;
7860        }
7861      }
7862
7863    cc = ccbegin;
7864    }
7865
7866  if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
7867    {
7868    OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7869    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
7870    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7871
7872    if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
7873      {
7874      if (unicode_status & XCLASS_HAS_TYPE)
7875        {
7876        if (unicode_status & XCLASS_SAVE_CHAR)
7877          {
7878          OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
7879          unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
7880          }
7881        else
7882          {
7883          OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
7884          unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
7885          }
7886        }
7887      OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7888      }
7889
7890    while (*cc != XCL_END)
7891      {
7892      if (*cc == XCL_SINGLE)
7893        {
7894        cc ++;
7895        GETCHARINCTEST(c, cc);
7896        }
7897      else if (*cc == XCL_RANGE)
7898        {
7899        cc ++;
7900        GETCHARINCTEST(c, cc);
7901        GETCHARINCTEST(c, cc);
7902        }
7903      else
7904        {
7905        SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7906        cc++;
7907        if (*cc == PT_SCX)
7908          {
7909          compares--;
7910          invertcmp = (compares == 0 && list != backtracks);
7911
7912          jump = NULL;
7913          if (cc[-1] == XCL_NOTPROP)
7914            {
7915            jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
7916            if (invertcmp)
7917              {
7918              add_jump(compiler, backtracks, jump);
7919              jump = NULL;
7920              }
7921            invertcmp ^= 0x1;
7922            }
7923
7924          OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7925          add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7926
7927          if (jump != NULL)
7928            JUMPHERE(jump);
7929          }
7930        cc += 2;
7931        }
7932      }
7933
7934    if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
7935      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7936    else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
7937      OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
7938    cc = ccbegin;
7939    }
7940
7941  if (unicode_status & XCLASS_SAVE_CHAR)
7942    OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7943
7944  if (unicode_status & XCLASS_HAS_TYPE)
7945    {
7946    if (unicode_status & XCLASS_SAVE_CHAR)
7947      typereg = RETURN_ADDR;
7948
7949    OP1(SLJIT_MOV_U8, typereg, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7950    }
7951  }
7952#endif /* SUPPORT_UNICODE */
7953
7954/* Generating code. */
7955charoffset = 0;
7956numberofcmps = 0;
7957#ifdef SUPPORT_UNICODE
7958typeoffset = 0;
7959#endif /* SUPPORT_UNICODE */
7960
7961while (*cc != XCL_END)
7962  {
7963  compares--;
7964  invertcmp = (compares == 0 && list != backtracks);
7965  jump = NULL;
7966
7967  if (*cc == XCL_SINGLE)
7968    {
7969    cc ++;
7970    GETCHARINCTEST(c, cc);
7971
7972    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7973      {
7974      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7975      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7976      numberofcmps++;
7977      }
7978    else if (numberofcmps > 0)
7979      {
7980      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7981      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7982      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7983      numberofcmps = 0;
7984      }
7985    else
7986      {
7987      jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7988      numberofcmps = 0;
7989      }
7990    }
7991  else if (*cc == XCL_RANGE)
7992    {
7993    cc ++;
7994    GETCHARINCTEST(c, cc);
7995    SET_CHAR_OFFSET(c);
7996    GETCHARINCTEST(c, cc);
7997
7998    if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7999      {
8000      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8001      OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8002      numberofcmps++;
8003      }
8004    else if (numberofcmps > 0)
8005      {
8006      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8007      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8008      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8009      numberofcmps = 0;
8010      }
8011    else
8012      {
8013      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8014      numberofcmps = 0;
8015      }
8016    }
8017#ifdef SUPPORT_UNICODE
8018  else
8019    {
8020    SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8021    if (*cc == XCL_NOTPROP)
8022      invertcmp ^= 0x1;
8023    cc++;
8024    switch(*cc)
8025      {
8026      case PT_ANY:
8027      if (!invertcmp)
8028        jump = JUMP(SLJIT_JUMP);
8029      break;
8030
8031      case PT_LAMP:
8032      OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
8033      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8034      OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
8035      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8036      OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
8037      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8038      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8039      break;
8040
8041      case PT_GC:
8042      c = PRIV(ucp_typerange)[(int)cc[1] * 2];
8043      SET_TYPE_OFFSET(c);
8044      jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
8045      break;
8046
8047      case PT_PC:
8048      jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
8049      break;
8050
8051      case PT_SC:
8052      case PT_SCX:
8053      case PT_BOOL:
8054      case PT_BIDICL:
8055      compares++;
8056      /* Do nothing. */
8057      break;
8058
8059      case PT_SPACE:
8060      case PT_PXSPACE:
8061      SET_CHAR_OFFSET(9);
8062      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
8063      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8064
8065      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
8066      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8067
8068      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
8069      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8070
8071      SET_TYPE_OFFSET(ucp_Zl);
8072      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
8073      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8074      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8075      break;
8076
8077      case PT_WORD:
8078      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
8079      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8080      /* Fall through. */
8081
8082      case PT_ALNUM:
8083      SET_TYPE_OFFSET(ucp_Ll);
8084      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
8085      OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8086      SET_TYPE_OFFSET(ucp_Nd);
8087      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
8088      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8089      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8090      break;
8091
8092      case PT_CLIST:
8093      other_cases = PRIV(ucd_caseless_sets) + cc[1];
8094
8095      /* At least three characters are required.
8096         Otherwise this case would be handled by the normal code path. */
8097      SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8098      SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8099
8100      /* Optimizing character pairs, if their difference is power of 2. */
8101      if (is_powerof2(other_cases[1] ^ other_cases[0]))
8102        {
8103        if (charoffset == 0)
8104          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8105        else
8106          {
8107          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8108          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8109          }
8110        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
8111        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8112        other_cases += 2;
8113        }
8114      else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8115        {
8116        if (charoffset == 0)
8117          OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8118        else
8119          {
8120          OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8121          OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8122          }
8123        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
8124        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8125
8126        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
8127        OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8128
8129        other_cases += 3;
8130        }
8131      else
8132        {
8133        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8134        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8135        }
8136
8137      while (*other_cases != NOTACHAR)
8138        {
8139        OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
8140        OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8141        }
8142      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8143      break;
8144
8145      case PT_UCNC:
8146      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
8147      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8148      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
8149      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8150      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
8151      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8152
8153      SET_CHAR_OFFSET(0xa0);
8154      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
8155      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8156      SET_CHAR_OFFSET(0);
8157      OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
8158      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8159      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8160      break;
8161
8162      case PT_PXGRAPH:
8163      /* C and Z groups are the farthest two groups. */
8164      SET_TYPE_OFFSET(ucp_Ll);
8165      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8166      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8167
8168      jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8169
8170      /* In case of ucp_Cf, we overwrite the result. */
8171      SET_CHAR_OFFSET(0x2066);
8172      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8173      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8174
8175      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8176      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8177
8178      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
8179      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8180
8181      JUMPHERE(jump);
8182      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8183      break;
8184
8185      case PT_PXPRINT:
8186      /* C and Z groups are the farthest two groups. */
8187      SET_TYPE_OFFSET(ucp_Ll);
8188      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
8189      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8190
8191      OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
8192      OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8193
8194      jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8195
8196      /* In case of ucp_Cf, we overwrite the result. */
8197      SET_CHAR_OFFSET(0x2066);
8198      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
8199      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8200
8201      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
8202      OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8203
8204      JUMPHERE(jump);
8205      jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8206      break;
8207
8208      case PT_PXPUNCT:
8209      SET_TYPE_OFFSET(ucp_Sc);
8210      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
8211      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8212
8213      SET_CHAR_OFFSET(0);
8214      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
8215      OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8216
8217      SET_TYPE_OFFSET(ucp_Pc);
8218      OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
8219      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8220      jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8221      break;
8222
8223      default:
8224      SLJIT_UNREACHABLE();
8225      break;
8226      }
8227    cc += 2;
8228    }
8229#endif /* SUPPORT_UNICODE */
8230
8231  if (jump != NULL)
8232    add_jump(compiler, compares > 0 ? list : backtracks, jump);
8233  }
8234
8235if (found != NULL)
8236  set_jumps(found, LABEL());
8237}
8238
8239#undef SET_TYPE_OFFSET
8240#undef SET_CHAR_OFFSET
8241
8242#endif
8243
8244static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8245{
8246DEFINE_COMPILER;
8247int length;
8248struct sljit_jump *jump[4];
8249#ifdef SUPPORT_UNICODE
8250struct sljit_label *label;
8251#endif /* SUPPORT_UNICODE */
8252
8253switch(type)
8254  {
8255  case OP_SOD:
8256  if (HAS_VIRTUAL_REGISTERS)
8257    {
8258    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8259    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8260    }
8261  else
8262    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8263  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8264  return cc;
8265
8266  case OP_SOM:
8267  if (HAS_VIRTUAL_REGISTERS)
8268    {
8269    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8270    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8271    }
8272  else
8273    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8274  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8275  return cc;
8276
8277  case OP_NOT_WORD_BOUNDARY:
8278  case OP_WORD_BOUNDARY:
8279  add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8280#ifdef SUPPORT_UNICODE
8281  if (common->invalid_utf)
8282    {
8283    add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8284    return cc;
8285    }
8286#endif /* SUPPORT_UNICODE */
8287  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8288  add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8289  return cc;
8290
8291  case OP_EODN:
8292  /* Requires rather complex checks. */
8293  jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8294  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8295    {
8296    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8297    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8298    if (common->mode == PCRE2_JIT_COMPLETE)
8299      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8300    else
8301      {
8302      jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8303      OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
8304      OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8305      OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8306      OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8307      add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8308      check_partial(common, TRUE);
8309      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8310      JUMPHERE(jump[1]);
8311      }
8312    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8313    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8314    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8315    }
8316  else if (common->nltype == NLTYPE_FIXED)
8317    {
8318    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8319    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8320    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8321    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8322    }
8323  else
8324    {
8325    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8326    jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8327    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8328    OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
8329    jump[2] = JUMP(SLJIT_GREATER);
8330    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8331    /* Equal. */
8332    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8333    jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8334    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8335
8336    JUMPHERE(jump[1]);
8337    if (common->nltype == NLTYPE_ANYCRLF)
8338      {
8339      OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8340      add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8341      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8342      }
8343    else
8344      {
8345      OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8346      read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8347      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8348      add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8349      sljit_set_current_flags(compiler, SLJIT_SET_Z);
8350      add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8351      OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8352      }
8353    JUMPHERE(jump[2]);
8354    JUMPHERE(jump[3]);
8355    }
8356  JUMPHERE(jump[0]);
8357  if (common->mode != PCRE2_JIT_COMPLETE)
8358    check_partial(common, TRUE);
8359  return cc;
8360
8361  case OP_EOD:
8362  add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8363  if (common->mode != PCRE2_JIT_COMPLETE)
8364    check_partial(common, TRUE);
8365  return cc;
8366
8367  case OP_DOLL:
8368  if (HAS_VIRTUAL_REGISTERS)
8369    {
8370    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8371    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8372    }
8373  else
8374    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8375  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8376
8377  if (!common->endonly)
8378    compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8379  else
8380    {
8381    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8382    check_partial(common, FALSE);
8383    }
8384  return cc;
8385
8386  case OP_DOLLM:
8387  jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8388  if (HAS_VIRTUAL_REGISTERS)
8389    {
8390    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8391    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8392    }
8393  else
8394    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8395  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8396  check_partial(common, FALSE);
8397  jump[0] = JUMP(SLJIT_JUMP);
8398  JUMPHERE(jump[1]);
8399
8400  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8401    {
8402    OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8403    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8404    if (common->mode == PCRE2_JIT_COMPLETE)
8405      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8406    else
8407      {
8408      jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8409      /* STR_PTR = STR_END - IN_UCHARS(1) */
8410      add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8411      check_partial(common, TRUE);
8412      add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8413      JUMPHERE(jump[1]);
8414      }
8415
8416    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8417    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8418    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8419    }
8420  else
8421    {
8422    peek_char(common, common->nlmax, TMP3, 0, NULL);
8423    check_newlinechar(common, common->nltype, backtracks, FALSE);
8424    }
8425  JUMPHERE(jump[0]);
8426  return cc;
8427
8428  case OP_CIRC:
8429  if (HAS_VIRTUAL_REGISTERS)
8430    {
8431    OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8432    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8433    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8434    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8435    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8436    }
8437  else
8438    {
8439    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8440    add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8441    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8442    add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8443    }
8444  return cc;
8445
8446  case OP_CIRCM:
8447  /* TMP2 might be used by peek_char_back. */
8448  if (HAS_VIRTUAL_REGISTERS)
8449    {
8450    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8451    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8452    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8453    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8454    }
8455  else
8456    {
8457    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8458    jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8459    OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8460    }
8461  add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8462  jump[0] = JUMP(SLJIT_JUMP);
8463  JUMPHERE(jump[1]);
8464
8465  if (!common->alt_circumflex)
8466    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8467
8468  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8469    {
8470    OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8471    add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8472    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8473    OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8474    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8475    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8476    }
8477  else
8478    {
8479    peek_char_back(common, common->nlmax, backtracks);
8480    check_newlinechar(common, common->nltype, backtracks, FALSE);
8481    }
8482  JUMPHERE(jump[0]);
8483  return cc;
8484
8485  case OP_REVERSE:
8486  length = GET(cc, 0);
8487  if (length == 0)
8488    return cc + LINK_SIZE;
8489  if (HAS_VIRTUAL_REGISTERS)
8490    {
8491    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8492    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8493    }
8494  else
8495    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8496#ifdef SUPPORT_UNICODE
8497  if (common->utf)
8498    {
8499    OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8500    label = LABEL();
8501    add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8502    move_back(common, backtracks, FALSE);
8503    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8504    JUMPTO(SLJIT_NOT_ZERO, label);
8505    }
8506  else
8507#endif
8508    {
8509    OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8510    add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8511    }
8512  check_start_used_ptr(common);
8513  return cc + LINK_SIZE;
8514  }
8515SLJIT_UNREACHABLE();
8516return cc;
8517}
8518
8519#ifdef SUPPORT_UNICODE
8520
8521#if PCRE2_CODE_UNIT_WIDTH != 32
8522
8523static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8524{
8525PCRE2_SPTR start_subject = args->begin;
8526PCRE2_SPTR end_subject = args->end;
8527int lgb, rgb, ricount;
8528PCRE2_SPTR prevcc, endcc, bptr;
8529BOOL first = TRUE;
8530uint32_t c;
8531
8532prevcc = cc;
8533endcc = NULL;
8534do
8535  {
8536  GETCHARINC(c, cc);
8537  rgb = UCD_GRAPHBREAK(c);
8538
8539  if (first)
8540    {
8541    lgb = rgb;
8542    endcc = cc;
8543    first = FALSE;
8544    continue;
8545    }
8546
8547  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8548    break;
8549
8550  /* Not breaking between Regional Indicators is allowed only if there
8551  are an even number of preceding RIs. */
8552
8553  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8554    {
8555    ricount = 0;
8556    bptr = prevcc;
8557
8558    /* bptr is pointing to the left-hand character */
8559    while (bptr > start_subject)
8560      {
8561      bptr--;
8562      BACKCHAR(bptr);
8563      GETCHAR(c, bptr);
8564
8565      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8566        break;
8567
8568      ricount++;
8569      }
8570
8571    if ((ricount & 1) != 0) break;  /* Grapheme break required */
8572    }
8573
8574  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8575  allows any number of them before a following Extended_Pictographic. */
8576
8577  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8578       lgb != ucp_gbExtended_Pictographic)
8579    lgb = rgb;
8580
8581  prevcc = endcc;
8582  endcc = cc;
8583  }
8584while (cc < end_subject);
8585
8586return endcc;
8587}
8588
8589#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8590
8591static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8592{
8593PCRE2_SPTR start_subject = args->begin;
8594PCRE2_SPTR end_subject = args->end;
8595int lgb, rgb, ricount;
8596PCRE2_SPTR prevcc, endcc, bptr;
8597BOOL first = TRUE;
8598uint32_t c;
8599
8600prevcc = cc;
8601endcc = NULL;
8602do
8603  {
8604  GETCHARINC_INVALID(c, cc, end_subject, break);
8605  rgb = UCD_GRAPHBREAK(c);
8606
8607  if (first)
8608    {
8609    lgb = rgb;
8610    endcc = cc;
8611    first = FALSE;
8612    continue;
8613    }
8614
8615  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8616    break;
8617
8618  /* Not breaking between Regional Indicators is allowed only if there
8619  are an even number of preceding RIs. */
8620
8621  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8622    {
8623    ricount = 0;
8624    bptr = prevcc;
8625
8626    /* bptr is pointing to the left-hand character */
8627    while (bptr > start_subject)
8628      {
8629      GETCHARBACK_INVALID(c, bptr, start_subject, break);
8630
8631      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
8632        break;
8633
8634      ricount++;
8635      }
8636
8637    if ((ricount & 1) != 0)
8638      break;  /* Grapheme break required */
8639    }
8640
8641  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8642  allows any number of them before a following Extended_Pictographic. */
8643
8644  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8645       lgb != ucp_gbExtended_Pictographic)
8646    lgb = rgb;
8647
8648  prevcc = endcc;
8649  endcc = cc;
8650  }
8651while (cc < end_subject);
8652
8653return endcc;
8654}
8655
8656static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8657{
8658PCRE2_SPTR start_subject = args->begin;
8659PCRE2_SPTR end_subject = args->end;
8660int lgb, rgb, ricount;
8661PCRE2_SPTR bptr;
8662uint32_t c;
8663
8664/* Patch by PH */
8665/* GETCHARINC(c, cc); */
8666c = *cc++;
8667
8668#if PCRE2_CODE_UNIT_WIDTH == 32
8669if (c >= 0x110000)
8670  return NULL;
8671#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8672lgb = UCD_GRAPHBREAK(c);
8673
8674while (cc < end_subject)
8675  {
8676  c = *cc;
8677#if PCRE2_CODE_UNIT_WIDTH == 32
8678  if (c >= 0x110000)
8679    break;
8680#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8681  rgb = UCD_GRAPHBREAK(c);
8682
8683  if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8684    break;
8685
8686  /* Not breaking between Regional Indicators is allowed only if there
8687  are an even number of preceding RIs. */
8688
8689  if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
8690    {
8691    ricount = 0;
8692    bptr = cc - 1;
8693
8694    /* bptr is pointing to the left-hand character */
8695    while (bptr > start_subject)
8696      {
8697      bptr--;
8698      c = *bptr;
8699#if PCRE2_CODE_UNIT_WIDTH == 32
8700      if (c >= 0x110000)
8701        break;
8702#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8703
8704      if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
8705
8706      ricount++;
8707      }
8708
8709    if ((ricount & 1) != 0)
8710      break;  /* Grapheme break required */
8711    }
8712
8713  /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8714  allows any number of them before a following Extended_Pictographic. */
8715
8716  if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8717       lgb != ucp_gbExtended_Pictographic)
8718    lgb = rgb;
8719
8720  cc++;
8721  }
8722
8723return cc;
8724}
8725
8726#endif /* SUPPORT_UNICODE */
8727
8728static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8729{
8730DEFINE_COMPILER;
8731int length;
8732unsigned int c, oc, bit;
8733compare_context context;
8734struct sljit_jump *jump[3];
8735jump_list *end_list;
8736#ifdef SUPPORT_UNICODE
8737PCRE2_UCHAR propdata[5];
8738#endif /* SUPPORT_UNICODE */
8739
8740switch(type)
8741  {
8742  case OP_NOT_DIGIT:
8743  case OP_DIGIT:
8744  /* Digits are usually 0-9, so it is worth to optimize them. */
8745  if (check_str_ptr)
8746    detect_partial_match(common, backtracks);
8747#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8748  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8749    read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8750  else
8751#endif
8752    read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8753    /* Flip the starting bit in the negative case. */
8754  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
8755  add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8756  return cc;
8757
8758  case OP_NOT_WHITESPACE:
8759  case OP_WHITESPACE:
8760  if (check_str_ptr)
8761    detect_partial_match(common, backtracks);
8762#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8763  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8764    read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8765  else
8766#endif
8767    read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8768  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
8769  add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8770  return cc;
8771
8772  case OP_NOT_WORDCHAR:
8773  case OP_WORDCHAR:
8774  if (check_str_ptr)
8775    detect_partial_match(common, backtracks);
8776#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8777  if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8778    read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8779  else
8780#endif
8781    read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8782  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
8783  add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8784  return cc;
8785
8786  case OP_ANY:
8787  if (check_str_ptr)
8788    detect_partial_match(common, backtracks);
8789  read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8790  if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8791    {
8792    jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8793    end_list = NULL;
8794    if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8795      add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8796    else
8797      check_str_end(common, &end_list);
8798
8799    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8800    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8801    set_jumps(end_list, LABEL());
8802    JUMPHERE(jump[0]);
8803    }
8804  else
8805    check_newlinechar(common, common->nltype, backtracks, TRUE);
8806  return cc;
8807
8808  case OP_ALLANY:
8809  if (check_str_ptr)
8810    detect_partial_match(common, backtracks);
8811#ifdef SUPPORT_UNICODE
8812  if (common->utf)
8813    {
8814    if (common->invalid_utf)
8815      {
8816      read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8817      return cc;
8818      }
8819
8820#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8821    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8822    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8823#if PCRE2_CODE_UNIT_WIDTH == 8
8824    jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8825    OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8826    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8827#elif PCRE2_CODE_UNIT_WIDTH == 16
8828    jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8829    OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8830    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
8831    OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8832    OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8833    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8834#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8835    JUMPHERE(jump[0]);
8836    return cc;
8837#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8838    }
8839#endif /* SUPPORT_UNICODE */
8840  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8841  return cc;
8842
8843  case OP_ANYBYTE:
8844  if (check_str_ptr)
8845    detect_partial_match(common, backtracks);
8846  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8847  return cc;
8848
8849#ifdef SUPPORT_UNICODE
8850  case OP_NOTPROP:
8851  case OP_PROP:
8852  propdata[0] = XCL_HASPROP;
8853  propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8854  propdata[2] = cc[0];
8855  propdata[3] = cc[1];
8856  propdata[4] = XCL_END;
8857  if (check_str_ptr)
8858    detect_partial_match(common, backtracks);
8859  compile_xclass_matchingpath(common, propdata, backtracks);
8860  return cc + 2;
8861#endif
8862
8863  case OP_ANYNL:
8864  if (check_str_ptr)
8865    detect_partial_match(common, backtracks);
8866  read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8867  jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8868  /* We don't need to handle soft partial matching case. */
8869  end_list = NULL;
8870  if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8871    add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8872  else
8873    check_str_end(common, &end_list);
8874  OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8875  jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8876  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8877  jump[2] = JUMP(SLJIT_JUMP);
8878  JUMPHERE(jump[0]);
8879  check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8880  set_jumps(end_list, LABEL());
8881  JUMPHERE(jump[1]);
8882  JUMPHERE(jump[2]);
8883  return cc;
8884
8885  case OP_NOT_HSPACE:
8886  case OP_HSPACE:
8887  if (check_str_ptr)
8888    detect_partial_match(common, backtracks);
8889
8890  if (type == OP_NOT_HSPACE)
8891    read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8892  else
8893    read_char(common, 0x9, 0x3000, NULL, 0);
8894
8895  add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8896  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8897  add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8898  return cc;
8899
8900  case OP_NOT_VSPACE:
8901  case OP_VSPACE:
8902  if (check_str_ptr)
8903    detect_partial_match(common, backtracks);
8904
8905  if (type == OP_NOT_VSPACE)
8906    read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8907  else
8908    read_char(common, 0xa, 0x2029, NULL, 0);
8909
8910  add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8911  sljit_set_current_flags(compiler, SLJIT_SET_Z);
8912  add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8913  return cc;
8914
8915#ifdef SUPPORT_UNICODE
8916  case OP_EXTUNI:
8917  if (check_str_ptr)
8918    detect_partial_match(common, backtracks);
8919
8920  SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8921  OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8922
8923#if PCRE2_CODE_UNIT_WIDTH != 32
8924  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8925    common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8926  if (common->invalid_utf)
8927    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8928#else
8929  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8930    common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
8931  if (!common->utf || common->invalid_utf)
8932    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8933#endif
8934
8935  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8936
8937  if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8938    {
8939    jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8940    /* Since we successfully read a char above, partial matching must occure. */
8941    check_partial(common, TRUE);
8942    JUMPHERE(jump[0]);
8943    }
8944  return cc;
8945#endif
8946
8947  case OP_CHAR:
8948  case OP_CHARI:
8949  length = 1;
8950#ifdef SUPPORT_UNICODE
8951  if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8952#endif
8953
8954  if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8955    detect_partial_match(common, backtracks);
8956
8957  if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8958    {
8959    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8960    if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8961      add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8962
8963    context.length = IN_UCHARS(length);
8964    context.sourcereg = -1;
8965#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8966    context.ucharptr = 0;
8967#endif
8968    return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8969    }
8970
8971#ifdef SUPPORT_UNICODE
8972  if (common->utf)
8973    {
8974    GETCHAR(c, cc);
8975    }
8976  else
8977#endif
8978    c = *cc;
8979
8980  SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8981
8982  if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8983    add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8984
8985  oc = char_othercase(common, c);
8986  read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8987
8988  SLJIT_ASSERT(!is_powerof2(c ^ oc));
8989
8990  if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8991    {
8992    OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
8993    CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8994    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8995    }
8996  else
8997    {
8998    jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8999    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9000    JUMPHERE(jump[0]);
9001    }
9002  return cc + length;
9003
9004  case OP_NOT:
9005  case OP_NOTI:
9006  if (check_str_ptr)
9007    detect_partial_match(common, backtracks);
9008
9009  length = 1;
9010#ifdef SUPPORT_UNICODE
9011  if (common->utf)
9012    {
9013#if PCRE2_CODE_UNIT_WIDTH == 8
9014    c = *cc;
9015    if (c < 128 && !common->invalid_utf)
9016      {
9017      OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9018      if (type == OP_NOT || !char_has_othercase(common, cc))
9019        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9020      else
9021        {
9022        /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9023        OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9024        add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9025        }
9026      /* Skip the variable-length character. */
9027      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9028      jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9029      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9030      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9031      JUMPHERE(jump[0]);
9032      return cc + 1;
9033      }
9034    else
9035#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9036      {
9037      GETCHARLEN(c, cc, length);
9038      }
9039    }
9040  else
9041#endif /* SUPPORT_UNICODE */
9042    c = *cc;
9043
9044  if (type == OP_NOT || !char_has_othercase(common, cc))
9045    {
9046    read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9047    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9048    }
9049  else
9050    {
9051    oc = char_othercase(common, c);
9052    read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9053    bit = c ^ oc;
9054    if (is_powerof2(bit))
9055      {
9056      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9057      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9058      }
9059    else
9060      {
9061      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9062      add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9063      }
9064    }
9065  return cc + length;
9066
9067  case OP_CLASS:
9068  case OP_NCLASS:
9069  if (check_str_ptr)
9070    detect_partial_match(common, backtracks);
9071
9072#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9073  bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9074  if (type == OP_NCLASS)
9075    read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9076  else
9077    read_char(common, 0, bit, NULL, 0);
9078#else
9079  if (type == OP_NCLASS)
9080    read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9081  else
9082    read_char(common, 0, 255, NULL, 0);
9083#endif
9084
9085  if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9086    return cc + 32 / sizeof(PCRE2_UCHAR);
9087
9088#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9089  jump[0] = NULL;
9090  if (common->utf)
9091    {
9092    jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9093    if (type == OP_CLASS)
9094      {
9095      add_jump(compiler, backtracks, jump[0]);
9096      jump[0] = NULL;
9097      }
9098    }
9099#elif PCRE2_CODE_UNIT_WIDTH != 8
9100  jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9101  if (type == OP_CLASS)
9102    {
9103    add_jump(compiler, backtracks, jump[0]);
9104    jump[0] = NULL;
9105    }
9106#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9107
9108  OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9109  OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9110  OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9111  OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
9112  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
9113  add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9114
9115#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9116  if (jump[0] != NULL)
9117    JUMPHERE(jump[0]);
9118#endif
9119  return cc + 32 / sizeof(PCRE2_UCHAR);
9120
9121#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9122  case OP_XCLASS:
9123  if (check_str_ptr)
9124    detect_partial_match(common, backtracks);
9125  compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9126  return cc + GET(cc, 0) - 1;
9127#endif
9128  }
9129SLJIT_UNREACHABLE();
9130return cc;
9131}
9132
9133static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9134{
9135/* This function consumes at least one input character. */
9136/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9137DEFINE_COMPILER;
9138PCRE2_SPTR ccbegin = cc;
9139compare_context context;
9140int size;
9141
9142context.length = 0;
9143do
9144  {
9145  if (cc >= ccend)
9146    break;
9147
9148  if (*cc == OP_CHAR)
9149    {
9150    size = 1;
9151#ifdef SUPPORT_UNICODE
9152    if (common->utf && HAS_EXTRALEN(cc[1]))
9153      size += GET_EXTRALEN(cc[1]);
9154#endif
9155    }
9156  else if (*cc == OP_CHARI)
9157    {
9158    size = 1;
9159#ifdef SUPPORT_UNICODE
9160    if (common->utf)
9161      {
9162      if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9163        size = 0;
9164      else if (HAS_EXTRALEN(cc[1]))
9165        size += GET_EXTRALEN(cc[1]);
9166      }
9167    else
9168#endif
9169    if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9170      size = 0;
9171    }
9172  else
9173    size = 0;
9174
9175  cc += 1 + size;
9176  context.length += IN_UCHARS(size);
9177  }
9178while (size > 0 && context.length <= 128);
9179
9180cc = ccbegin;
9181if (context.length > 0)
9182  {
9183  /* We have a fixed-length byte sequence. */
9184  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9185  add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9186
9187  context.sourcereg = -1;
9188#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9189  context.ucharptr = 0;
9190#endif
9191  do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9192  return cc;
9193  }
9194
9195/* A non-fixed length character will be checked if length == 0. */
9196return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9197}
9198
9199/* Forward definitions. */
9200static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9201static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9202
9203#define PUSH_BACKTRACK(size, ccstart, error) \
9204  do \
9205    { \
9206    backtrack = sljit_alloc_memory(compiler, (size)); \
9207    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9208      return error; \
9209    memset(backtrack, 0, size); \
9210    backtrack->prev = parent->top; \
9211    backtrack->cc = (ccstart); \
9212    parent->top = backtrack; \
9213    } \
9214  while (0)
9215
9216#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9217  do \
9218    { \
9219    backtrack = sljit_alloc_memory(compiler, (size)); \
9220    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9221      return; \
9222    memset(backtrack, 0, size); \
9223    backtrack->prev = parent->top; \
9224    backtrack->cc = (ccstart); \
9225    parent->top = backtrack; \
9226    } \
9227  while (0)
9228
9229#define BACKTRACK_AS(type) ((type *)backtrack)
9230
9231static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9232{
9233/* The OVECTOR offset goes to TMP2. */
9234DEFINE_COMPILER;
9235int count = GET2(cc, 1 + IMM2_SIZE);
9236PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9237unsigned int offset;
9238jump_list *found = NULL;
9239
9240SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9241
9242OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9243
9244count--;
9245while (count-- > 0)
9246  {
9247  offset = GET2(slot, 0) << 1;
9248  GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9249  add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9250  slot += common->name_entry_size;
9251  }
9252
9253offset = GET2(slot, 0) << 1;
9254GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9255if (backtracks != NULL && !common->unset_backref)
9256  add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9257
9258set_jumps(found, LABEL());
9259}
9260
9261static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9262{
9263DEFINE_COMPILER;
9264BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9265int offset = 0;
9266struct sljit_jump *jump = NULL;
9267struct sljit_jump *partial;
9268struct sljit_jump *nopartial;
9269#if defined SUPPORT_UNICODE
9270struct sljit_label *loop;
9271struct sljit_label *caseless_loop;
9272jump_list *no_match = NULL;
9273int source_reg = COUNT_MATCH;
9274int source_end_reg = ARGUMENTS;
9275int char1_reg = STACK_LIMIT;
9276#endif /* SUPPORT_UNICODE */
9277
9278if (ref)
9279  {
9280  offset = GET2(cc, 1) << 1;
9281  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9282  /* OVECTOR(1) contains the "string begin - 1" constant. */
9283  if (withchecks && !common->unset_backref)
9284    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9285  }
9286else
9287  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9288
9289#if defined SUPPORT_UNICODE
9290if (common->utf && *cc == OP_REFI)
9291  {
9292  SLJIT_ASSERT(common->iref_ptr != 0);
9293
9294  if (ref)
9295    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9296  else
9297    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9298
9299  if (withchecks && emptyfail)
9300    add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9301
9302  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9303  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9304  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9305
9306  OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9307  OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9308
9309  loop = LABEL();
9310  jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9311  partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9312
9313  /* Read original character. It must be a valid UTF character. */
9314  OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9315  OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9316
9317  read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9318
9319  OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9320  OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9321  OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9322
9323  /* Read second character. */
9324  read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9325
9326  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9327
9328  OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9329
9330  add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9331
9332  OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9333  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9334  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9335
9336  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9337
9338  OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9339  OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9340  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9341  CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9342
9343  add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9344  OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9345  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9346
9347  caseless_loop = LABEL();
9348  OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9349  OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9350  OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
9351  JUMPTO(SLJIT_EQUAL, loop);
9352  JUMPTO(SLJIT_LESS, caseless_loop);
9353
9354  set_jumps(no_match, LABEL());
9355  if (common->mode == PCRE2_JIT_COMPLETE)
9356    JUMPHERE(partial);
9357
9358  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9359  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9360  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9361  add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9362
9363  if (common->mode != PCRE2_JIT_COMPLETE)
9364    {
9365    JUMPHERE(partial);
9366    OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9367    OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9368    OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9369
9370    check_partial(common, FALSE);
9371    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9372    }
9373
9374  JUMPHERE(jump);
9375  OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9376  OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9377  OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9378  return;
9379  }
9380else
9381#endif /* SUPPORT_UNICODE */
9382  {
9383  if (ref)
9384    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9385  else
9386    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9387
9388  if (withchecks)
9389    jump = JUMP(SLJIT_ZERO);
9390
9391  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9392  partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9393  if (common->mode == PCRE2_JIT_COMPLETE)
9394    add_jump(compiler, backtracks, partial);
9395
9396  add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9397  add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9398
9399  if (common->mode != PCRE2_JIT_COMPLETE)
9400    {
9401    nopartial = JUMP(SLJIT_JUMP);
9402    JUMPHERE(partial);
9403    /* TMP2 -= STR_END - STR_PTR */
9404    OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9405    OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9406    partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9407    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9408    add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9409    add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9410    JUMPHERE(partial);
9411    check_partial(common, FALSE);
9412    add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9413    JUMPHERE(nopartial);
9414    }
9415  }
9416
9417if (jump != NULL)
9418  {
9419  if (emptyfail)
9420    add_jump(compiler, backtracks, jump);
9421  else
9422    JUMPHERE(jump);
9423  }
9424}
9425
9426static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9427{
9428DEFINE_COMPILER;
9429BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9430backtrack_common *backtrack;
9431PCRE2_UCHAR type;
9432int offset = 0;
9433struct sljit_label *label;
9434struct sljit_jump *zerolength;
9435struct sljit_jump *jump = NULL;
9436PCRE2_SPTR ccbegin = cc;
9437int min = 0, max = 0;
9438BOOL minimize;
9439
9440PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9441
9442if (ref)
9443  offset = GET2(cc, 1) << 1;
9444else
9445  cc += IMM2_SIZE;
9446type = cc[1 + IMM2_SIZE];
9447
9448SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9449minimize = (type & 0x1) != 0;
9450switch(type)
9451  {
9452  case OP_CRSTAR:
9453  case OP_CRMINSTAR:
9454  min = 0;
9455  max = 0;
9456  cc += 1 + IMM2_SIZE + 1;
9457  break;
9458  case OP_CRPLUS:
9459  case OP_CRMINPLUS:
9460  min = 1;
9461  max = 0;
9462  cc += 1 + IMM2_SIZE + 1;
9463  break;
9464  case OP_CRQUERY:
9465  case OP_CRMINQUERY:
9466  min = 0;
9467  max = 1;
9468  cc += 1 + IMM2_SIZE + 1;
9469  break;
9470  case OP_CRRANGE:
9471  case OP_CRMINRANGE:
9472  min = GET2(cc, 1 + IMM2_SIZE + 1);
9473  max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9474  cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9475  break;
9476  default:
9477  SLJIT_UNREACHABLE();
9478  break;
9479  }
9480
9481if (!minimize)
9482  {
9483  if (min == 0)
9484    {
9485    allocate_stack(common, 2);
9486    if (ref)
9487      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9488    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9489    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9490    /* Temporary release of STR_PTR. */
9491    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9492    /* Handles both invalid and empty cases. Since the minimum repeat,
9493    is zero the invalid case is basically the same as an empty case. */
9494    if (ref)
9495      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9496    else
9497      {
9498      compile_dnref_search(common, ccbegin, NULL);
9499      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9500      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9501      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9502      }
9503    /* Restore if not zero length. */
9504    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9505    }
9506  else
9507    {
9508    allocate_stack(common, 1);
9509    if (ref)
9510      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9511    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9512    if (ref)
9513      {
9514      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9515      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9516      }
9517    else
9518      {
9519      compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9520      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9521      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9522      zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9523      }
9524    }
9525
9526  if (min > 1 || max > 1)
9527    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9528
9529  label = LABEL();
9530  if (!ref)
9531    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9532  compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9533
9534  if (min > 1 || max > 1)
9535    {
9536    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9537    OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9538    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9539    if (min > 1)
9540      CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9541    if (max > 1)
9542      {
9543      jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9544      allocate_stack(common, 1);
9545      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9546      JUMPTO(SLJIT_JUMP, label);
9547      JUMPHERE(jump);
9548      }
9549    }
9550
9551  if (max == 0)
9552    {
9553    /* Includes min > 1 case as well. */
9554    allocate_stack(common, 1);
9555    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9556    JUMPTO(SLJIT_JUMP, label);
9557    }
9558
9559  JUMPHERE(zerolength);
9560  BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9561
9562  count_match(common);
9563  return cc;
9564  }
9565
9566allocate_stack(common, ref ? 2 : 3);
9567if (ref)
9568  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9569OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9570if (type != OP_CRMINSTAR)
9571  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9572
9573if (min == 0)
9574  {
9575  /* Handles both invalid and empty cases. Since the minimum repeat,
9576  is zero the invalid case is basically the same as an empty case. */
9577  if (ref)
9578    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9579  else
9580    {
9581    compile_dnref_search(common, ccbegin, NULL);
9582    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9583    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9584    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9585    }
9586  /* Length is non-zero, we can match real repeats. */
9587  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9588  jump = JUMP(SLJIT_JUMP);
9589  }
9590else
9591  {
9592  if (ref)
9593    {
9594    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9595    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9596    }
9597  else
9598    {
9599    compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9600    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9601    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9602    zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9603    }
9604  }
9605
9606BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9607if (max > 0)
9608  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9609
9610if (!ref)
9611  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9612compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9613OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9614
9615if (min > 1)
9616  {
9617  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9618  OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9619  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9620  CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9621  }
9622else if (max > 0)
9623  OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9624
9625if (jump != NULL)
9626  JUMPHERE(jump);
9627JUMPHERE(zerolength);
9628
9629count_match(common);
9630return cc;
9631}
9632
9633static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9634{
9635DEFINE_COMPILER;
9636backtrack_common *backtrack;
9637recurse_entry *entry = common->entries;
9638recurse_entry *prev = NULL;
9639sljit_sw start = GET(cc, 1);
9640PCRE2_SPTR start_cc;
9641BOOL needs_control_head;
9642
9643PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9644
9645/* Inlining simple patterns. */
9646if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9647  {
9648  start_cc = common->start + start;
9649  compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9650  BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9651  return cc + 1 + LINK_SIZE;
9652  }
9653
9654while (entry != NULL)
9655  {
9656  if (entry->start == start)
9657    break;
9658  prev = entry;
9659  entry = entry->next;
9660  }
9661
9662if (entry == NULL)
9663  {
9664  entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9665  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9666    return NULL;
9667  entry->next = NULL;
9668  entry->entry_label = NULL;
9669  entry->backtrack_label = NULL;
9670  entry->entry_calls = NULL;
9671  entry->backtrack_calls = NULL;
9672  entry->start = start;
9673
9674  if (prev != NULL)
9675    prev->next = entry;
9676  else
9677    common->entries = entry;
9678  }
9679
9680BACKTRACK_AS(recurse_backtrack)->entry = entry;
9681
9682if (entry->entry_label == NULL)
9683  add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9684else
9685  JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9686/* Leave if the match is failed. */
9687add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9688BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9689return cc + 1 + LINK_SIZE;
9690}
9691
9692static sljit_s32 SLJIT_FUNC do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9693{
9694PCRE2_SPTR begin;
9695PCRE2_SIZE *ovector;
9696sljit_u32 oveccount, capture_top;
9697
9698if (arguments->callout == NULL)
9699  return 0;
9700
9701SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9702
9703begin = arguments->begin;
9704ovector = (PCRE2_SIZE*)(callout_block + 1);
9705oveccount = callout_block->capture_top;
9706
9707SLJIT_ASSERT(oveccount >= 1);
9708
9709callout_block->version = 2;
9710callout_block->callout_flags = 0;
9711
9712/* Offsets in subject. */
9713callout_block->subject_length = arguments->end - arguments->begin;
9714callout_block->start_match = jit_ovector[0] - begin;
9715callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9716callout_block->subject = begin;
9717
9718/* Convert and copy the JIT offset vector to the ovector array. */
9719callout_block->capture_top = 1;
9720callout_block->offset_vector = ovector;
9721
9722ovector[0] = PCRE2_UNSET;
9723ovector[1] = PCRE2_UNSET;
9724ovector += 2;
9725jit_ovector += 2;
9726capture_top = 1;
9727
9728/* Convert pointers to sizes. */
9729while (--oveccount != 0)
9730  {
9731  capture_top++;
9732
9733  ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9734  ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9735
9736  if (ovector[0] != PCRE2_UNSET)
9737    callout_block->capture_top = capture_top;
9738
9739  ovector += 2;
9740  jit_ovector += 2;
9741  }
9742
9743return (arguments->callout)(callout_block, arguments->callout_data);
9744}
9745
9746#define CALLOUT_ARG_OFFSET(arg) \
9747    SLJIT_OFFSETOF(pcre2_callout_block, arg)
9748
9749static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9750{
9751DEFINE_COMPILER;
9752backtrack_common *backtrack;
9753sljit_s32 mov_opcode;
9754unsigned int callout_length = (*cc == OP_CALLOUT)
9755    ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9756sljit_sw value1;
9757sljit_sw value2;
9758sljit_sw value3;
9759sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
9760
9761PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9762
9763callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9764
9765allocate_stack(common, callout_arg_size);
9766
9767SLJIT_ASSERT(common->capture_last_ptr != 0);
9768OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9769OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9770value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9771OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9772OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9773OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9774
9775/* These pointer sized fields temporarly stores internal variables. */
9776OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9777
9778if (common->mark_ptr != 0)
9779  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9780mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9781OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9782OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9783
9784if (*cc == OP_CALLOUT)
9785  {
9786  value1 = 0;
9787  value2 = 0;
9788  value3 = 0;
9789  }
9790else
9791  {
9792  value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9793  value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9794  value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9795  }
9796
9797OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9798OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9799OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9800OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9801
9802SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9803
9804/* Needed to save important temporary registers. */
9805OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9806/* SLJIT_R0 = arguments */
9807OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9808GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9809sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
9810OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9811free_stack(common, callout_arg_size);
9812
9813/* Check return value. */
9814OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9815add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9816if (common->abort_label == NULL)
9817  add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9818else
9819  JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9820return cc + callout_length;
9821}
9822
9823#undef CALLOUT_ARG_SIZE
9824#undef CALLOUT_ARG_OFFSET
9825
9826static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9827{
9828while (TRUE)
9829  {
9830  switch (*cc)
9831    {
9832    case OP_CALLOUT_STR:
9833    cc += GET(cc, 1 + 2*LINK_SIZE);
9834    break;
9835
9836    case OP_NOT_WORD_BOUNDARY:
9837    case OP_WORD_BOUNDARY:
9838    case OP_CIRC:
9839    case OP_CIRCM:
9840    case OP_DOLL:
9841    case OP_DOLLM:
9842    case OP_CALLOUT:
9843    case OP_ALT:
9844    cc += PRIV(OP_lengths)[*cc];
9845    break;
9846
9847    case OP_KET:
9848    return FALSE;
9849
9850    default:
9851    return TRUE;
9852    }
9853  }
9854}
9855
9856static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9857{
9858DEFINE_COMPILER;
9859int framesize;
9860int extrasize;
9861BOOL local_quit_available = FALSE;
9862BOOL needs_control_head;
9863int private_data_ptr;
9864backtrack_common altbacktrack;
9865PCRE2_SPTR ccbegin;
9866PCRE2_UCHAR opcode;
9867PCRE2_UCHAR bra = OP_BRA;
9868jump_list *tmp = NULL;
9869jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9870jump_list **found;
9871/* Saving previous accept variables. */
9872BOOL save_local_quit_available = common->local_quit_available;
9873BOOL save_in_positive_assertion = common->in_positive_assertion;
9874then_trap_backtrack *save_then_trap = common->then_trap;
9875struct sljit_label *save_quit_label = common->quit_label;
9876struct sljit_label *save_accept_label = common->accept_label;
9877jump_list *save_quit = common->quit;
9878jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9879jump_list *save_accept = common->accept;
9880struct sljit_jump *jump;
9881struct sljit_jump *brajump = NULL;
9882
9883/* Assert captures then. */
9884common->then_trap = NULL;
9885
9886if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9887  {
9888  SLJIT_ASSERT(!conditional);
9889  bra = *cc;
9890  cc++;
9891  }
9892private_data_ptr = PRIVATE_DATA(cc);
9893SLJIT_ASSERT(private_data_ptr != 0);
9894framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9895backtrack->framesize = framesize;
9896backtrack->private_data_ptr = private_data_ptr;
9897opcode = *cc;
9898SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9899found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9900ccbegin = cc;
9901cc += GET(cc, 1);
9902
9903if (bra == OP_BRAMINZERO)
9904  {
9905  /* This is a braminzero backtrack path. */
9906  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9907  free_stack(common, 1);
9908  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9909  }
9910
9911if (framesize < 0)
9912  {
9913  extrasize = 1;
9914  if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9915    extrasize = 0;
9916
9917  if (needs_control_head)
9918    extrasize++;
9919
9920  if (framesize == no_frame)
9921    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9922
9923  if (extrasize > 0)
9924    allocate_stack(common, extrasize);
9925
9926  if (needs_control_head)
9927    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9928
9929  if (extrasize > 0)
9930    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9931
9932  if (needs_control_head)
9933    {
9934    SLJIT_ASSERT(extrasize == 2);
9935    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9936    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9937    }
9938  }
9939else
9940  {
9941  extrasize = needs_control_head ? 3 : 2;
9942  allocate_stack(common, framesize + extrasize);
9943
9944  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9945  OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9946  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9947  if (needs_control_head)
9948    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9949  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9950
9951  if (needs_control_head)
9952    {
9953    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9954    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9955    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9956    }
9957  else
9958    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9959
9960  init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9961  }
9962
9963memset(&altbacktrack, 0, sizeof(backtrack_common));
9964if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9965  {
9966  /* Control verbs cannot escape from these asserts. */
9967  local_quit_available = TRUE;
9968  common->local_quit_available = TRUE;
9969  common->quit_label = NULL;
9970  common->quit = NULL;
9971  }
9972
9973common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9974common->positive_assertion_quit = NULL;
9975
9976while (1)
9977  {
9978  common->accept_label = NULL;
9979  common->accept = NULL;
9980  altbacktrack.top = NULL;
9981  altbacktrack.topbacktracks = NULL;
9982
9983  if (*ccbegin == OP_ALT && extrasize > 0)
9984    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9985
9986  altbacktrack.cc = ccbegin;
9987  compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9988  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9989    {
9990    if (local_quit_available)
9991      {
9992      common->local_quit_available = save_local_quit_available;
9993      common->quit_label = save_quit_label;
9994      common->quit = save_quit;
9995      }
9996    common->in_positive_assertion = save_in_positive_assertion;
9997    common->then_trap = save_then_trap;
9998    common->accept_label = save_accept_label;
9999    common->positive_assertion_quit = save_positive_assertion_quit;
10000    common->accept = save_accept;
10001    return NULL;
10002    }
10003  common->accept_label = LABEL();
10004  if (common->accept != NULL)
10005    set_jumps(common->accept, common->accept_label);
10006
10007  /* Reset stack. */
10008  if (framesize < 0)
10009    {
10010    if (framesize == no_frame)
10011      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10012    else if (extrasize > 0)
10013      free_stack(common, extrasize);
10014
10015    if (needs_control_head)
10016      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10017    }
10018  else
10019    {
10020    if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10021      {
10022      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10023      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10024      if (needs_control_head)
10025        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10026      }
10027    else
10028      {
10029      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10030      if (needs_control_head)
10031        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10032      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10033      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10034      }
10035    }
10036
10037  if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10038    {
10039    /* We know that STR_PTR was stored on the top of the stack. */
10040    if (conditional)
10041      {
10042      if (extrasize > 0)
10043        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
10044      }
10045    else if (bra == OP_BRAZERO)
10046      {
10047      if (framesize < 0)
10048        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10049      else
10050        {
10051        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10052        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10053        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10054        }
10055      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10056      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10057      }
10058    else if (framesize >= 0)
10059      {
10060      /* For OP_BRA and OP_BRAMINZERO. */
10061      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10062      }
10063    }
10064  add_jump(compiler, found, JUMP(SLJIT_JUMP));
10065
10066  compile_backtrackingpath(common, altbacktrack.top);
10067  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10068    {
10069    if (local_quit_available)
10070      {
10071      common->local_quit_available = save_local_quit_available;
10072      common->quit_label = save_quit_label;
10073      common->quit = save_quit;
10074      }
10075    common->in_positive_assertion = save_in_positive_assertion;
10076    common->then_trap = save_then_trap;
10077    common->accept_label = save_accept_label;
10078    common->positive_assertion_quit = save_positive_assertion_quit;
10079    common->accept = save_accept;
10080    return NULL;
10081    }
10082  set_jumps(altbacktrack.topbacktracks, LABEL());
10083
10084  if (*cc != OP_ALT)
10085    break;
10086
10087  ccbegin = cc;
10088  cc += GET(cc, 1);
10089  }
10090
10091if (local_quit_available)
10092  {
10093  SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10094  /* Makes the check less complicated below. */
10095  common->positive_assertion_quit = common->quit;
10096  }
10097
10098/* None of them matched. */
10099if (common->positive_assertion_quit != NULL)
10100  {
10101  jump = JUMP(SLJIT_JUMP);
10102  set_jumps(common->positive_assertion_quit, LABEL());
10103  SLJIT_ASSERT(framesize != no_stack);
10104  if (framesize < 0)
10105    OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10106  else
10107    {
10108    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10109    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10110    OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10111    }
10112  JUMPHERE(jump);
10113  }
10114
10115if (needs_control_head)
10116  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
10117
10118if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10119  {
10120  /* Assert is failed. */
10121  if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10122    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10123
10124  if (framesize < 0)
10125    {
10126    /* The topmost item should be 0. */
10127    if (bra == OP_BRAZERO)
10128      {
10129      if (extrasize == 2)
10130        free_stack(common, 1);
10131      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10132      }
10133    else if (extrasize > 0)
10134      free_stack(common, extrasize);
10135    }
10136  else
10137    {
10138    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10139    /* The topmost item should be 0. */
10140    if (bra == OP_BRAZERO)
10141      {
10142      free_stack(common, framesize + extrasize - 1);
10143      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10144      }
10145    else
10146      free_stack(common, framesize + extrasize);
10147    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10148    }
10149  jump = JUMP(SLJIT_JUMP);
10150  if (bra != OP_BRAZERO)
10151    add_jump(compiler, target, jump);
10152
10153  /* Assert is successful. */
10154  set_jumps(tmp, LABEL());
10155  if (framesize < 0)
10156    {
10157    /* We know that STR_PTR was stored on the top of the stack. */
10158    if (extrasize > 0)
10159      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10160
10161    /* Keep the STR_PTR on the top of the stack. */
10162    if (bra == OP_BRAZERO)
10163      {
10164      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10165      if (extrasize == 2)
10166        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10167      }
10168    else if (bra == OP_BRAMINZERO)
10169      {
10170      OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10171      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10172      }
10173    }
10174  else
10175    {
10176    if (bra == OP_BRA)
10177      {
10178      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10179      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10180      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10181      }
10182    else
10183      {
10184      /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10185      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10186      if (extrasize == 2)
10187        {
10188        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10189        if (bra == OP_BRAMINZERO)
10190          OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10191        }
10192      else
10193        {
10194        SLJIT_ASSERT(extrasize == 3);
10195        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10196        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10197        }
10198      }
10199    }
10200
10201  if (bra == OP_BRAZERO)
10202    {
10203    backtrack->matchingpath = LABEL();
10204    SET_LABEL(jump, backtrack->matchingpath);
10205    }
10206  else if (bra == OP_BRAMINZERO)
10207    {
10208    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10209    JUMPHERE(brajump);
10210    if (framesize >= 0)
10211      {
10212      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10213      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10214      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10215      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10216      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10217      }
10218    set_jumps(backtrack->common.topbacktracks, LABEL());
10219    }
10220  }
10221else
10222  {
10223  /* AssertNot is successful. */
10224  if (framesize < 0)
10225    {
10226    if (extrasize > 0)
10227      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10228
10229    if (bra != OP_BRA)
10230      {
10231      if (extrasize == 2)
10232        free_stack(common, 1);
10233      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10234      }
10235    else if (extrasize > 0)
10236      free_stack(common, extrasize);
10237    }
10238  else
10239    {
10240    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10241    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10242    /* The topmost item should be 0. */
10243    if (bra != OP_BRA)
10244      {
10245      free_stack(common, framesize + extrasize - 1);
10246      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10247      }
10248    else
10249      free_stack(common, framesize + extrasize);
10250    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10251    }
10252
10253  if (bra == OP_BRAZERO)
10254    backtrack->matchingpath = LABEL();
10255  else if (bra == OP_BRAMINZERO)
10256    {
10257    JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10258    JUMPHERE(brajump);
10259    }
10260
10261  if (bra != OP_BRA)
10262    {
10263    SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10264    set_jumps(backtrack->common.topbacktracks, LABEL());
10265    backtrack->common.topbacktracks = NULL;
10266    }
10267  }
10268
10269if (local_quit_available)
10270  {
10271  common->local_quit_available = save_local_quit_available;
10272  common->quit_label = save_quit_label;
10273  common->quit = save_quit;
10274  }
10275common->in_positive_assertion = save_in_positive_assertion;
10276common->then_trap = save_then_trap;
10277common->accept_label = save_accept_label;
10278common->positive_assertion_quit = save_positive_assertion_quit;
10279common->accept = save_accept;
10280return cc + 1 + LINK_SIZE;
10281}
10282
10283static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10284{
10285DEFINE_COMPILER;
10286int stacksize;
10287
10288if (framesize < 0)
10289  {
10290  if (framesize == no_frame)
10291    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10292  else
10293    {
10294    stacksize = needs_control_head ? 1 : 0;
10295    if (ket != OP_KET || has_alternatives)
10296      stacksize++;
10297
10298    if (stacksize > 0)
10299      free_stack(common, stacksize);
10300    }
10301
10302  if (needs_control_head)
10303    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10304
10305  /* TMP2 which is set here used by OP_KETRMAX below. */
10306  if (ket == OP_KETRMAX)
10307    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10308  else if (ket == OP_KETRMIN)
10309    {
10310    /* Move the STR_PTR to the private_data_ptr. */
10311    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10312    }
10313  }
10314else
10315  {
10316  stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10317  OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10318  if (needs_control_head)
10319    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10320
10321  if (ket == OP_KETRMAX)
10322    {
10323    /* TMP2 which is set here used by OP_KETRMAX below. */
10324    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10325    }
10326  }
10327if (needs_control_head)
10328  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10329}
10330
10331static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10332{
10333DEFINE_COMPILER;
10334
10335if (common->capture_last_ptr != 0)
10336  {
10337  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10338  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10339  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10340  stacksize++;
10341  }
10342if (common->optimized_cbracket[offset >> 1] == 0)
10343  {
10344  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10345  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10346  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10347  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10348  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10349  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10350  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10351  stacksize += 2;
10352  }
10353return stacksize;
10354}
10355
10356static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10357{
10358  if (PRIV(script_run)(ptr, endptr, FALSE))
10359    return endptr;
10360  return NULL;
10361}
10362
10363#ifdef SUPPORT_UNICODE
10364
10365static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10366{
10367  if (PRIV(script_run)(ptr, endptr, TRUE))
10368    return endptr;
10369  return NULL;
10370}
10371
10372#endif /* SUPPORT_UNICODE */
10373
10374static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10375{
10376DEFINE_COMPILER;
10377
10378SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10379
10380OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10381#ifdef SUPPORT_UNICODE
10382sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10383  common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
10384#else
10385sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
10386#endif
10387
10388OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10389add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10390}
10391
10392/*
10393  Handling bracketed expressions is probably the most complex part.
10394
10395  Stack layout naming characters:
10396    S - Push the current STR_PTR
10397    0 - Push a 0 (NULL)
10398    A - Push the current STR_PTR. Needed for restoring the STR_PTR
10399        before the next alternative. Not pushed if there are no alternatives.
10400    M - Any values pushed by the current alternative. Can be empty, or anything.
10401    C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10402    L - Push the previous local (pointed by localptr) to the stack
10403   () - opional values stored on the stack
10404  ()* - optonal, can be stored multiple times
10405
10406  The following list shows the regular expression templates, their PCRE byte codes
10407  and stack layout supported by pcre-sljit.
10408
10409  (?:)                     OP_BRA     | OP_KET                A M
10410  ()                       OP_CBRA    | OP_KET                C M
10411  (?:)+                    OP_BRA     | OP_KETRMAX        0   A M S   ( A M S )*
10412                           OP_SBRA    | OP_KETRMAX        0   L M S   ( L M S )*
10413  (?:)+?                   OP_BRA     | OP_KETRMIN        0   A M S   ( A M S )*
10414                           OP_SBRA    | OP_KETRMIN        0   L M S   ( L M S )*
10415  ()+                      OP_CBRA    | OP_KETRMAX        0   C M S   ( C M S )*
10416                           OP_SCBRA   | OP_KETRMAX        0   C M S   ( C M S )*
10417  ()+?                     OP_CBRA    | OP_KETRMIN        0   C M S   ( C M S )*
10418                           OP_SCBRA   | OP_KETRMIN        0   C M S   ( C M S )*
10419  (?:)?    OP_BRAZERO    | OP_BRA     | OP_KET            S ( A M 0 )
10420  (?:)??   OP_BRAMINZERO | OP_BRA     | OP_KET            S ( A M 0 )
10421  ()?      OP_BRAZERO    | OP_CBRA    | OP_KET            S ( C M 0 )
10422  ()??     OP_BRAMINZERO | OP_CBRA    | OP_KET            S ( C M 0 )
10423  (?:)*    OP_BRAZERO    | OP_BRA     | OP_KETRMAX      S 0 ( A M S )*
10424           OP_BRAZERO    | OP_SBRA    | OP_KETRMAX      S 0 ( L M S )*
10425  (?:)*?   OP_BRAMINZERO | OP_BRA     | OP_KETRMIN      S 0 ( A M S )*
10426           OP_BRAMINZERO | OP_SBRA    | OP_KETRMIN      S 0 ( L M S )*
10427  ()*      OP_BRAZERO    | OP_CBRA    | OP_KETRMAX      S 0 ( C M S )*
10428           OP_BRAZERO    | OP_SCBRA   | OP_KETRMAX      S 0 ( C M S )*
10429  ()*?     OP_BRAMINZERO | OP_CBRA    | OP_KETRMIN      S 0 ( C M S )*
10430           OP_BRAMINZERO | OP_SCBRA   | OP_KETRMIN      S 0 ( C M S )*
10431
10432
10433  Stack layout naming characters:
10434    A - Push the alternative index (starting from 0) on the stack.
10435        Not pushed if there is no alternatives.
10436    M - Any values pushed by the current alternative. Can be empty, or anything.
10437
10438  The next list shows the possible content of a bracket:
10439  (|)     OP_*BRA    | OP_ALT ...         M A
10440  (?()|)  OP_*COND   | OP_ALT             M A
10441  (?>|)   OP_ONCE    | OP_ALT ...         [stack trace] M A
10442                                          Or nothing, if trace is unnecessary
10443*/
10444
10445static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10446{
10447DEFINE_COMPILER;
10448backtrack_common *backtrack;
10449PCRE2_UCHAR opcode;
10450int private_data_ptr = 0;
10451int offset = 0;
10452int i, stacksize;
10453int repeat_ptr = 0, repeat_length = 0;
10454int repeat_type = 0, repeat_count = 0;
10455PCRE2_SPTR ccbegin;
10456PCRE2_SPTR matchingpath;
10457PCRE2_SPTR slot;
10458PCRE2_UCHAR bra = OP_BRA;
10459PCRE2_UCHAR ket;
10460assert_backtrack *assert;
10461BOOL has_alternatives;
10462BOOL needs_control_head = FALSE;
10463struct sljit_jump *jump;
10464struct sljit_jump *skip;
10465struct sljit_label *rmax_label = NULL;
10466struct sljit_jump *braminzero = NULL;
10467
10468PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10469
10470if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10471  {
10472  bra = *cc;
10473  cc++;
10474  opcode = *cc;
10475  }
10476
10477opcode = *cc;
10478ccbegin = cc;
10479matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10480ket = *matchingpath;
10481if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10482  {
10483  repeat_ptr = PRIVATE_DATA(matchingpath);
10484  repeat_length = PRIVATE_DATA(matchingpath + 1);
10485  repeat_type = PRIVATE_DATA(matchingpath + 2);
10486  repeat_count = PRIVATE_DATA(matchingpath + 3);
10487  SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10488  if (repeat_type == OP_UPTO)
10489    ket = OP_KETRMAX;
10490  if (repeat_type == OP_MINUPTO)
10491    ket = OP_KETRMIN;
10492  }
10493
10494matchingpath = ccbegin + 1 + LINK_SIZE;
10495SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10496SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10497cc += GET(cc, 1);
10498
10499has_alternatives = *cc == OP_ALT;
10500if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10501  {
10502  SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10503    compile_time_checks_must_be_grouped_together);
10504  has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10505  }
10506
10507if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10508  opcode = OP_SCOND;
10509
10510if (opcode == OP_CBRA || opcode == OP_SCBRA)
10511  {
10512  /* Capturing brackets has a pre-allocated space. */
10513  offset = GET2(ccbegin, 1 + LINK_SIZE);
10514  if (common->optimized_cbracket[offset] == 0)
10515    {
10516    private_data_ptr = OVECTOR_PRIV(offset);
10517    offset <<= 1;
10518    }
10519  else
10520    {
10521    offset <<= 1;
10522    private_data_ptr = OVECTOR(offset);
10523    }
10524  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10525  matchingpath += IMM2_SIZE;
10526  }
10527else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10528  {
10529  /* Other brackets simply allocate the next entry. */
10530  private_data_ptr = PRIVATE_DATA(ccbegin);
10531  SLJIT_ASSERT(private_data_ptr != 0);
10532  BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10533  if (opcode == OP_ONCE)
10534    BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10535  }
10536
10537/* Instructions before the first alternative. */
10538stacksize = 0;
10539if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10540  stacksize++;
10541if (bra == OP_BRAZERO)
10542  stacksize++;
10543
10544if (stacksize > 0)
10545  allocate_stack(common, stacksize);
10546
10547stacksize = 0;
10548if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10549  {
10550  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10551  stacksize++;
10552  }
10553
10554if (bra == OP_BRAZERO)
10555  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10556
10557if (bra == OP_BRAMINZERO)
10558  {
10559  /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10560  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10561  if (ket != OP_KETRMIN)
10562    {
10563    free_stack(common, 1);
10564    braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10565    }
10566  else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10567    {
10568    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10569    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10570    /* Nothing stored during the first run. */
10571    skip = JUMP(SLJIT_JUMP);
10572    JUMPHERE(jump);
10573    /* Checking zero-length iteration. */
10574    if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10575      {
10576      /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10577      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10578      }
10579    else
10580      {
10581      /* Except when the whole stack frame must be saved. */
10582      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10583      braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10584      }
10585    JUMPHERE(skip);
10586    }
10587  else
10588    {
10589    jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10590    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10591    JUMPHERE(jump);
10592    }
10593  }
10594
10595if (repeat_type != 0)
10596  {
10597  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10598  if (repeat_type == OP_EXACT)
10599    rmax_label = LABEL();
10600  }
10601
10602if (ket == OP_KETRMIN)
10603  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10604
10605if (ket == OP_KETRMAX)
10606  {
10607  rmax_label = LABEL();
10608  if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10609    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10610  }
10611
10612/* Handling capturing brackets and alternatives. */
10613if (opcode == OP_ONCE)
10614  {
10615  stacksize = 0;
10616  if (needs_control_head)
10617    {
10618    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10619    stacksize++;
10620    }
10621
10622  if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10623    {
10624    /* Neither capturing brackets nor recursions are found in the block. */
10625    if (ket == OP_KETRMIN)
10626      {
10627      stacksize += 2;
10628      if (!needs_control_head)
10629        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10630      }
10631    else
10632      {
10633      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10634        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10635      if (ket == OP_KETRMAX || has_alternatives)
10636        stacksize++;
10637      }
10638
10639    if (stacksize > 0)
10640      allocate_stack(common, stacksize);
10641
10642    stacksize = 0;
10643    if (needs_control_head)
10644      {
10645      stacksize++;
10646      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10647      }
10648
10649    if (ket == OP_KETRMIN)
10650      {
10651      if (needs_control_head)
10652        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10653      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10654      if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10655        OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10656      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10657      }
10658    else if (ket == OP_KETRMAX || has_alternatives)
10659      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10660    }
10661  else
10662    {
10663    if (ket != OP_KET || has_alternatives)
10664      stacksize++;
10665
10666    stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10667    allocate_stack(common, stacksize);
10668
10669    if (needs_control_head)
10670      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10671
10672    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10673    OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10674
10675    stacksize = needs_control_head ? 1 : 0;
10676    if (ket != OP_KET || has_alternatives)
10677      {
10678      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10679      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10680      stacksize++;
10681      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10682      }
10683    else
10684      {
10685      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10686      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10687      }
10688    init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10689    }
10690  }
10691else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10692  {
10693  /* Saving the previous values. */
10694  if (common->optimized_cbracket[offset >> 1] != 0)
10695    {
10696    SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10697    allocate_stack(common, 2);
10698    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10699    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10700    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10701    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10702    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10703    }
10704  else
10705    {
10706    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10707    allocate_stack(common, 1);
10708    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10709    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10710    }
10711  }
10712else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10713  {
10714  /* Saving the previous value. */
10715  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10716  allocate_stack(common, 1);
10717  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10718  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10719  }
10720else if (has_alternatives)
10721  {
10722  /* Pushing the starting string pointer. */
10723  allocate_stack(common, 1);
10724  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10725  }
10726
10727/* Generating code for the first alternative. */
10728if (opcode == OP_COND || opcode == OP_SCOND)
10729  {
10730  if (*matchingpath == OP_CREF)
10731    {
10732    SLJIT_ASSERT(has_alternatives);
10733    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10734      CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10735    matchingpath += 1 + IMM2_SIZE;
10736    }
10737  else if (*matchingpath == OP_DNCREF)
10738    {
10739    SLJIT_ASSERT(has_alternatives);
10740
10741    i = GET2(matchingpath, 1 + IMM2_SIZE);
10742    slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10743    OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10744    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10745    OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10746    slot += common->name_entry_size;
10747    i--;
10748    while (i-- > 0)
10749      {
10750      OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10751      OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10752      slot += common->name_entry_size;
10753      }
10754    OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10755    add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10756    matchingpath += 1 + 2 * IMM2_SIZE;
10757    }
10758  else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10759    {
10760    /* Never has other case. */
10761    BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10762    SLJIT_ASSERT(!has_alternatives);
10763
10764    if (*matchingpath == OP_TRUE)
10765      {
10766      stacksize = 1;
10767      matchingpath++;
10768      }
10769    else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10770      stacksize = 0;
10771    else if (*matchingpath == OP_RREF)
10772      {
10773      stacksize = GET2(matchingpath, 1);
10774      if (common->currententry == NULL)
10775        stacksize = 0;
10776      else if (stacksize == RREF_ANY)
10777        stacksize = 1;
10778      else if (common->currententry->start == 0)
10779        stacksize = stacksize == 0;
10780      else
10781        stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10782
10783      if (stacksize != 0)
10784        matchingpath += 1 + IMM2_SIZE;
10785      }
10786    else
10787      {
10788      if (common->currententry == NULL || common->currententry->start == 0)
10789        stacksize = 0;
10790      else
10791        {
10792        stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10793        slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10794        i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10795        while (stacksize > 0)
10796          {
10797          if ((int)GET2(slot, 0) == i)
10798            break;
10799          slot += common->name_entry_size;
10800          stacksize--;
10801          }
10802        }
10803
10804      if (stacksize != 0)
10805        matchingpath += 1 + 2 * IMM2_SIZE;
10806      }
10807
10808      /* The stacksize == 0 is a common "else" case. */
10809      if (stacksize == 0)
10810        {
10811        if (*cc == OP_ALT)
10812          {
10813          matchingpath = cc + 1 + LINK_SIZE;
10814          cc += GET(cc, 1);
10815          }
10816        else
10817          matchingpath = cc;
10818        }
10819    }
10820  else
10821    {
10822    SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10823    /* Similar code as PUSH_BACKTRACK macro. */
10824    assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10825    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10826      return NULL;
10827    memset(assert, 0, sizeof(assert_backtrack));
10828    assert->common.cc = matchingpath;
10829    BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10830    matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10831    }
10832  }
10833
10834compile_matchingpath(common, matchingpath, cc, backtrack);
10835if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10836  return NULL;
10837
10838if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10839  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10840
10841if (opcode == OP_ONCE)
10842  match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10843
10844if (opcode == OP_SCRIPT_RUN)
10845  match_script_run_common(common, private_data_ptr, backtrack);
10846
10847stacksize = 0;
10848if (repeat_type == OP_MINUPTO)
10849  {
10850  /* We need to preserve the counter. TMP2 will be used below. */
10851  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10852  stacksize++;
10853  }
10854if (ket != OP_KET || bra != OP_BRA)
10855  stacksize++;
10856if (offset != 0)
10857  {
10858  if (common->capture_last_ptr != 0)
10859    stacksize++;
10860  if (common->optimized_cbracket[offset >> 1] == 0)
10861    stacksize += 2;
10862  }
10863if (has_alternatives && opcode != OP_ONCE)
10864  stacksize++;
10865
10866if (stacksize > 0)
10867  allocate_stack(common, stacksize);
10868
10869stacksize = 0;
10870if (repeat_type == OP_MINUPTO)
10871  {
10872  /* TMP2 was set above. */
10873  OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10874  stacksize++;
10875  }
10876
10877if (ket != OP_KET || bra != OP_BRA)
10878  {
10879  if (ket != OP_KET)
10880    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10881  else
10882    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10883  stacksize++;
10884  }
10885
10886if (offset != 0)
10887  stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10888
10889/* Skip and count the other alternatives. */
10890i = 1;
10891while (*cc == OP_ALT)
10892  {
10893  cc += GET(cc, 1);
10894  i++;
10895  }
10896
10897if (has_alternatives)
10898  {
10899  if (opcode != OP_ONCE)
10900    {
10901    if (i <= 3)
10902      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10903    else
10904      BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10905    }
10906  if (ket != OP_KETRMAX)
10907    BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10908  }
10909
10910/* Must be after the matchingpath label. */
10911if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10912  {
10913  SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10914  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10915  }
10916
10917if (ket == OP_KETRMAX)
10918  {
10919  if (repeat_type != 0)
10920    {
10921    if (has_alternatives)
10922      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10923    OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10924    JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10925    /* Drop STR_PTR for greedy plus quantifier. */
10926    if (opcode != OP_ONCE)
10927      free_stack(common, 1);
10928    }
10929  else if (opcode < OP_BRA || opcode >= OP_SBRA)
10930    {
10931    if (has_alternatives)
10932      BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10933
10934    /* Checking zero-length iteration. */
10935    if (opcode != OP_ONCE)
10936      {
10937      /* This case includes opcodes such as OP_SCRIPT_RUN. */
10938      CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10939      /* Drop STR_PTR for greedy plus quantifier. */
10940      if (bra != OP_BRAZERO)
10941        free_stack(common, 1);
10942      }
10943    else
10944      /* TMP2 must contain the starting STR_PTR. */
10945      CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10946    }
10947  else
10948    JUMPTO(SLJIT_JUMP, rmax_label);
10949  BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10950  }
10951
10952if (repeat_type == OP_EXACT)
10953  {
10954  count_match(common);
10955  OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10956  JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10957  }
10958else if (repeat_type == OP_UPTO)
10959  {
10960  /* We need to preserve the counter. */
10961  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10962  allocate_stack(common, 1);
10963  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10964  }
10965
10966if (bra == OP_BRAZERO)
10967  BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10968
10969if (bra == OP_BRAMINZERO)
10970  {
10971  /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10972  JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10973  if (braminzero != NULL)
10974    {
10975    JUMPHERE(braminzero);
10976    /* We need to release the end pointer to perform the
10977    backtrack for the zero-length iteration. When
10978    framesize is < 0, OP_ONCE will do the release itself. */
10979    if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10980      {
10981      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10982      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10983      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10984      }
10985    else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10986      free_stack(common, 1);
10987    }
10988  /* Continue to the normal backtrack. */
10989  }
10990
10991if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10992  count_match(common);
10993
10994cc += 1 + LINK_SIZE;
10995
10996if (opcode == OP_ONCE)
10997  {
10998  /* We temporarily encode the needs_control_head in the lowest bit.
10999     Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
11000     the same value for small signed numbers (including negative numbers). */
11001  BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
11002  }
11003return cc + repeat_length;
11004}
11005
11006static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11007{
11008DEFINE_COMPILER;
11009backtrack_common *backtrack;
11010PCRE2_UCHAR opcode;
11011int private_data_ptr;
11012int cbraprivptr = 0;
11013BOOL needs_control_head;
11014int framesize;
11015int stacksize;
11016int offset = 0;
11017BOOL zero = FALSE;
11018PCRE2_SPTR ccbegin = NULL;
11019int stack; /* Also contains the offset of control head. */
11020struct sljit_label *loop = NULL;
11021struct jump_list *emptymatch = NULL;
11022
11023PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11024if (*cc == OP_BRAPOSZERO)
11025  {
11026  zero = TRUE;
11027  cc++;
11028  }
11029
11030opcode = *cc;
11031private_data_ptr = PRIVATE_DATA(cc);
11032SLJIT_ASSERT(private_data_ptr != 0);
11033BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11034switch(opcode)
11035  {
11036  case OP_BRAPOS:
11037  case OP_SBRAPOS:
11038  ccbegin = cc + 1 + LINK_SIZE;
11039  break;
11040
11041  case OP_CBRAPOS:
11042  case OP_SCBRAPOS:
11043  offset = GET2(cc, 1 + LINK_SIZE);
11044  /* This case cannot be optimized in the same was as
11045  normal capturing brackets. */
11046  SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11047  cbraprivptr = OVECTOR_PRIV(offset);
11048  offset <<= 1;
11049  ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11050  break;
11051
11052  default:
11053  SLJIT_UNREACHABLE();
11054  break;
11055  }
11056
11057framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11058BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11059if (framesize < 0)
11060  {
11061  if (offset != 0)
11062    {
11063    stacksize = 2;
11064    if (common->capture_last_ptr != 0)
11065      stacksize++;
11066    }
11067  else
11068    stacksize = 1;
11069
11070  if (needs_control_head)
11071    stacksize++;
11072  if (!zero)
11073    stacksize++;
11074
11075  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11076  allocate_stack(common, stacksize);
11077  if (framesize == no_frame)
11078    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11079
11080  stack = 0;
11081  if (offset != 0)
11082    {
11083    stack = 2;
11084    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11085    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11086    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11087    if (common->capture_last_ptr != 0)
11088      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11089    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11090    if (needs_control_head)
11091      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11092    if (common->capture_last_ptr != 0)
11093      {
11094      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11095      stack = 3;
11096      }
11097    }
11098  else
11099    {
11100    if (needs_control_head)
11101      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11102    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11103    stack = 1;
11104    }
11105
11106  if (needs_control_head)
11107    stack++;
11108  if (!zero)
11109    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11110  if (needs_control_head)
11111    {
11112    stack--;
11113    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11114    }
11115  }
11116else
11117  {
11118  stacksize = framesize + 1;
11119  if (!zero)
11120    stacksize++;
11121  if (needs_control_head)
11122    stacksize++;
11123  if (offset == 0)
11124    stacksize++;
11125  BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11126
11127  allocate_stack(common, stacksize);
11128  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11129  if (needs_control_head)
11130    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11131  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11132
11133  stack = 0;
11134  if (!zero)
11135    {
11136    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11137    stack = 1;
11138    }
11139  if (needs_control_head)
11140    {
11141    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11142    stack++;
11143    }
11144  if (offset == 0)
11145    {
11146    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11147    stack++;
11148    }
11149  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11150  init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11151  stack -= 1 + (offset == 0);
11152  }
11153
11154if (offset != 0)
11155  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11156
11157loop = LABEL();
11158while (*cc != OP_KETRPOS)
11159  {
11160  backtrack->top = NULL;
11161  backtrack->topbacktracks = NULL;
11162  cc += GET(cc, 1);
11163
11164  compile_matchingpath(common, ccbegin, cc, backtrack);
11165  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11166    return NULL;
11167
11168  if (framesize < 0)
11169    {
11170    if (framesize == no_frame)
11171      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11172
11173    if (offset != 0)
11174      {
11175      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11176      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11177      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11178      if (common->capture_last_ptr != 0)
11179        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11180      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11181      }
11182    else
11183      {
11184      if (opcode == OP_SBRAPOS)
11185        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11186      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11187      }
11188
11189    /* Even if the match is empty, we need to reset the control head. */
11190    if (needs_control_head)
11191      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11192
11193    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11194      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11195
11196    if (!zero)
11197      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11198    }
11199  else
11200    {
11201    if (offset != 0)
11202      {
11203      OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11204      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11205      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11206      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11207      if (common->capture_last_ptr != 0)
11208        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11209      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11210      }
11211    else
11212      {
11213      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11214      OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11215      if (opcode == OP_SBRAPOS)
11216        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11217      OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11218      }
11219
11220    /* Even if the match is empty, we need to reset the control head. */
11221    if (needs_control_head)
11222      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11223
11224    if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11225      add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11226
11227    if (!zero)
11228      {
11229      if (framesize < 0)
11230        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11231      else
11232        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11233      }
11234    }
11235
11236  JUMPTO(SLJIT_JUMP, loop);
11237  flush_stubs(common);
11238
11239  compile_backtrackingpath(common, backtrack->top);
11240  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11241    return NULL;
11242  set_jumps(backtrack->topbacktracks, LABEL());
11243
11244  if (framesize < 0)
11245    {
11246    if (offset != 0)
11247      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11248    else
11249      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11250    }
11251  else
11252    {
11253    if (offset != 0)
11254      {
11255      /* Last alternative. */
11256      if (*cc == OP_KETRPOS)
11257        OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11258      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11259      }
11260    else
11261      {
11262      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11263      OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11264      }
11265    }
11266
11267  if (*cc == OP_KETRPOS)
11268    break;
11269  ccbegin = cc + 1 + LINK_SIZE;
11270  }
11271
11272/* We don't have to restore the control head in case of a failed match. */
11273
11274backtrack->topbacktracks = NULL;
11275if (!zero)
11276  {
11277  if (framesize < 0)
11278    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11279  else /* TMP2 is set to [private_data_ptr] above. */
11280    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11281  }
11282
11283/* None of them matched. */
11284set_jumps(emptymatch, LABEL());
11285count_match(common);
11286return cc + 1 + LINK_SIZE;
11287}
11288
11289static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11290{
11291int class_len;
11292
11293*opcode = *cc;
11294*exact = 0;
11295
11296if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11297  {
11298  cc++;
11299  *type = OP_CHAR;
11300  }
11301else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11302  {
11303  cc++;
11304  *type = OP_CHARI;
11305  *opcode -= OP_STARI - OP_STAR;
11306  }
11307else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11308  {
11309  cc++;
11310  *type = OP_NOT;
11311  *opcode -= OP_NOTSTAR - OP_STAR;
11312  }
11313else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11314  {
11315  cc++;
11316  *type = OP_NOTI;
11317  *opcode -= OP_NOTSTARI - OP_STAR;
11318  }
11319else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11320  {
11321  cc++;
11322  *opcode -= OP_TYPESTAR - OP_STAR;
11323  *type = OP_END;
11324  }
11325else
11326  {
11327  SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11328  *type = *opcode;
11329  cc++;
11330  class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11331  *opcode = cc[class_len - 1];
11332
11333  if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11334    {
11335    *opcode -= OP_CRSTAR - OP_STAR;
11336    *end = cc + class_len;
11337
11338    if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11339      {
11340      *exact = 1;
11341      *opcode -= OP_PLUS - OP_STAR;
11342      }
11343    }
11344  else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11345    {
11346    *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11347    *end = cc + class_len;
11348
11349    if (*opcode == OP_POSPLUS)
11350      {
11351      *exact = 1;
11352      *opcode = OP_POSSTAR;
11353      }
11354    }
11355  else
11356    {
11357    SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11358    *max = GET2(cc, (class_len + IMM2_SIZE));
11359    *exact = GET2(cc, class_len);
11360
11361    if (*max == 0)
11362      {
11363      if (*opcode == OP_CRPOSRANGE)
11364        *opcode = OP_POSSTAR;
11365      else
11366        *opcode -= OP_CRRANGE - OP_STAR;
11367      }
11368    else
11369      {
11370      *max -= *exact;
11371      if (*max == 0)
11372        *opcode = OP_EXACT;
11373      else if (*max == 1)
11374        {
11375        if (*opcode == OP_CRPOSRANGE)
11376          *opcode = OP_POSQUERY;
11377        else
11378          *opcode -= OP_CRRANGE - OP_QUERY;
11379        }
11380      else
11381        {
11382        if (*opcode == OP_CRPOSRANGE)
11383          *opcode = OP_POSUPTO;
11384        else
11385          *opcode -= OP_CRRANGE - OP_UPTO;
11386        }
11387      }
11388    *end = cc + class_len + 2 * IMM2_SIZE;
11389    }
11390  return cc;
11391  }
11392
11393switch(*opcode)
11394  {
11395  case OP_EXACT:
11396  *exact = GET2(cc, 0);
11397  cc += IMM2_SIZE;
11398  break;
11399
11400  case OP_PLUS:
11401  case OP_MINPLUS:
11402  *exact = 1;
11403  *opcode -= OP_PLUS - OP_STAR;
11404  break;
11405
11406  case OP_POSPLUS:
11407  *exact = 1;
11408  *opcode = OP_POSSTAR;
11409  break;
11410
11411  case OP_UPTO:
11412  case OP_MINUPTO:
11413  case OP_POSUPTO:
11414  *max = GET2(cc, 0);
11415  cc += IMM2_SIZE;
11416  break;
11417  }
11418
11419if (*type == OP_END)
11420  {
11421  *type = *cc;
11422  *end = next_opcode(common, cc);
11423  cc++;
11424  return cc;
11425  }
11426
11427*end = cc + 1;
11428#ifdef SUPPORT_UNICODE
11429if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11430#endif
11431return cc;
11432}
11433
11434static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11435{
11436DEFINE_COMPILER;
11437backtrack_common *backtrack;
11438PCRE2_UCHAR opcode;
11439PCRE2_UCHAR type;
11440sljit_u32 max = 0, exact;
11441sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11442sljit_s32 early_fail_type;
11443BOOL charpos_enabled;
11444PCRE2_UCHAR charpos_char;
11445unsigned int charpos_othercasebit;
11446PCRE2_SPTR end;
11447jump_list *no_match = NULL;
11448jump_list *no_char1_match = NULL;
11449struct sljit_jump *jump = NULL;
11450struct sljit_label *label;
11451int private_data_ptr = PRIVATE_DATA(cc);
11452int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11453int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11454int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
11455int tmp_base, tmp_offset;
11456#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11457BOOL use_tmp;
11458#endif
11459
11460PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11461
11462early_fail_type = (early_fail_ptr & 0x7);
11463early_fail_ptr >>= 3;
11464
11465/* During recursion, these optimizations are disabled. */
11466if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11467  {
11468  early_fail_ptr = 0;
11469  early_fail_type = type_skip;
11470  }
11471
11472SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11473  || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11474
11475if (early_fail_type == type_fail)
11476  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11477
11478cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11479
11480if (type != OP_EXTUNI)
11481  {
11482  tmp_base = TMP3;
11483  tmp_offset = 0;
11484  }
11485else
11486  {
11487  tmp_base = SLJIT_MEM1(SLJIT_SP);
11488  tmp_offset = POSSESSIVE0;
11489  }
11490
11491/* Handle fixed part first. */
11492if (exact > 1)
11493  {
11494  SLJIT_ASSERT(early_fail_ptr == 0);
11495
11496  if (common->mode == PCRE2_JIT_COMPLETE
11497#ifdef SUPPORT_UNICODE
11498      && !common->utf
11499#endif
11500      && type != OP_ANYNL && type != OP_EXTUNI)
11501    {
11502    OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11503    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11504    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11505    label = LABEL();
11506    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11507    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11508    JUMPTO(SLJIT_NOT_ZERO, label);
11509    }
11510  else
11511    {
11512    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11513    label = LABEL();
11514    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11515    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11516    JUMPTO(SLJIT_NOT_ZERO, label);
11517    }
11518  }
11519else if (exact == 1)
11520  compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11521
11522if (early_fail_type == type_fail_range)
11523  {
11524  /* Range end first, followed by range start. */
11525  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11526  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
11527  OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11528  OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11529  add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11530
11531  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11532  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
11533  }
11534
11535switch(opcode)
11536  {
11537  case OP_STAR:
11538  case OP_UPTO:
11539  SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11540
11541  if (type == OP_ANYNL || type == OP_EXTUNI)
11542    {
11543    SLJIT_ASSERT(private_data_ptr == 0);
11544    SLJIT_ASSERT(early_fail_ptr == 0);
11545
11546    allocate_stack(common, 2);
11547    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11548    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11549
11550    if (opcode == OP_UPTO)
11551      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11552
11553    label = LABEL();
11554    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11555    if (opcode == OP_UPTO)
11556      {
11557      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11558      OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11559      jump = JUMP(SLJIT_ZERO);
11560      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11561      }
11562
11563    /* We cannot use TMP3 because of allocate_stack. */
11564    allocate_stack(common, 1);
11565    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11566    JUMPTO(SLJIT_JUMP, label);
11567    if (jump != NULL)
11568      JUMPHERE(jump);
11569    BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11570    break;
11571    }
11572#ifdef SUPPORT_UNICODE
11573  else if (type == OP_ALLANY && !common->invalid_utf)
11574#else
11575  else if (type == OP_ALLANY)
11576#endif
11577    {
11578    if (opcode == OP_STAR)
11579      {
11580      if (private_data_ptr == 0)
11581        allocate_stack(common, 2);
11582
11583      OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11584      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11585
11586      OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11587      process_partial_match(common);
11588
11589      if (early_fail_ptr != 0)
11590        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11591      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11592      break;
11593      }
11594#ifdef SUPPORT_UNICODE
11595    else if (!common->utf)
11596#else
11597    else
11598#endif
11599      {
11600      if (private_data_ptr == 0)
11601        allocate_stack(common, 2);
11602
11603      OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11604      OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11605
11606      if (common->mode == PCRE2_JIT_COMPLETE)
11607        {
11608        OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11609        CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11610        }
11611      else
11612        {
11613        jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11614        process_partial_match(common);
11615        JUMPHERE(jump);
11616        }
11617
11618      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11619
11620      if (early_fail_ptr != 0)
11621        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11622      BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11623      break;
11624      }
11625    }
11626
11627  charpos_enabled = FALSE;
11628  charpos_char = 0;
11629  charpos_othercasebit = 0;
11630
11631  if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11632    {
11633#ifdef SUPPORT_UNICODE
11634    charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11635#else
11636    charpos_enabled = TRUE;
11637#endif
11638    if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11639      {
11640      charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11641      if (charpos_othercasebit == 0)
11642        charpos_enabled = FALSE;
11643      }
11644
11645    if (charpos_enabled)
11646      {
11647      charpos_char = end[1];
11648      /* Consume the OP_CHAR opcode. */
11649      end += 2;
11650#if PCRE2_CODE_UNIT_WIDTH == 8
11651      SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11652#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11653      SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11654      if ((charpos_othercasebit & 0x100) != 0)
11655        charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11656#endif
11657      if (charpos_othercasebit != 0)
11658        charpos_char |= charpos_othercasebit;
11659
11660      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11661      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11662      BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11663      }
11664    }
11665
11666  if (charpos_enabled)
11667    {
11668    if (opcode == OP_UPTO)
11669      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11670
11671    /* Search the first instance of charpos_char. */
11672    jump = JUMP(SLJIT_JUMP);
11673    label = LABEL();
11674    if (opcode == OP_UPTO)
11675      {
11676      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11677      add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11678      }
11679    compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11680    if (early_fail_ptr != 0)
11681      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11682    JUMPHERE(jump);
11683
11684    detect_partial_match(common, &backtrack->topbacktracks);
11685    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11686    if (charpos_othercasebit != 0)
11687      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11688    CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11689
11690    if (private_data_ptr == 0)
11691      allocate_stack(common, 2);
11692    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11693    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11694
11695    if (opcode == OP_UPTO)
11696      {
11697      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11698      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11699      }
11700
11701    /* Search the last instance of charpos_char. */
11702    label = LABEL();
11703    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11704    if (early_fail_ptr != 0)
11705      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11706    detect_partial_match(common, &no_match);
11707    OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11708    if (charpos_othercasebit != 0)
11709      OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11710
11711    if (opcode == OP_STAR)
11712      {
11713      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11714      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11715      JUMPTO(SLJIT_JUMP, label);
11716      }
11717    else
11718      {
11719      jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11720      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11721      JUMPHERE(jump);
11722      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11723      JUMPTO(SLJIT_NOT_ZERO, label);
11724      }
11725
11726    set_jumps(no_match, LABEL());
11727    OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11728    OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11729    }
11730  else
11731    {
11732    if (private_data_ptr == 0)
11733      allocate_stack(common, 2);
11734
11735    OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11736#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11737    use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11738    SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11739
11740    if (common->utf)
11741      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11742#endif
11743    if (opcode == OP_UPTO)
11744      OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11745
11746    detect_partial_match(common, &no_match);
11747    label = LABEL();
11748    compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11749#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11750    if (common->utf)
11751      OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11752#endif
11753
11754    if (opcode == OP_UPTO)
11755      {
11756      OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11757      add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11758      }
11759
11760    detect_partial_match_to(common, label);
11761    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11762
11763    set_jumps(no_char1_match, LABEL());
11764#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11765    if (common->utf)
11766      {
11767      set_jumps(no_match, LABEL());
11768      if (use_tmp)
11769        {
11770        OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11771        OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11772        }
11773      else
11774        OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11775      }
11776    else
11777#endif
11778      {
11779      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11780      set_jumps(no_match, LABEL());
11781      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11782      }
11783
11784    if (early_fail_ptr != 0)
11785      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11786    }
11787
11788  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11789  break;
11790
11791  case OP_MINSTAR:
11792  if (private_data_ptr == 0)
11793    allocate_stack(common, 1);
11794  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11795  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11796  if (early_fail_ptr != 0)
11797    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11798  break;
11799
11800  case OP_MINUPTO:
11801  SLJIT_ASSERT(early_fail_ptr == 0);
11802  if (private_data_ptr == 0)
11803    allocate_stack(common, 2);
11804  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11805  OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11806  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11807  break;
11808
11809  case OP_QUERY:
11810  case OP_MINQUERY:
11811  SLJIT_ASSERT(early_fail_ptr == 0);
11812  if (private_data_ptr == 0)
11813    allocate_stack(common, 1);
11814  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11815  if (opcode == OP_QUERY)
11816    compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11817  BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11818  break;
11819
11820  case OP_EXACT:
11821  break;
11822
11823  case OP_POSSTAR:
11824#if defined SUPPORT_UNICODE
11825  if (type == OP_ALLANY && !common->invalid_utf)
11826#else
11827  if (type == OP_ALLANY)
11828#endif
11829    {
11830    OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11831    process_partial_match(common);
11832    if (early_fail_ptr != 0)
11833      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11834    break;
11835    }
11836
11837#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11838  if (common->utf)
11839    {
11840    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11841    detect_partial_match(common, &no_match);
11842    label = LABEL();
11843    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11844    OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11845    detect_partial_match_to(common, label);
11846
11847    set_jumps(no_match, LABEL());
11848    OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11849    if (early_fail_ptr != 0)
11850      {
11851      if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11852        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11853      else
11854        OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11855      }
11856    break;
11857    }
11858#endif
11859
11860  detect_partial_match(common, &no_match);
11861  label = LABEL();
11862  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11863  detect_partial_match_to(common, label);
11864  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11865
11866  set_jumps(no_char1_match, LABEL());
11867  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11868  set_jumps(no_match, LABEL());
11869  if (early_fail_ptr != 0)
11870    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11871  break;
11872
11873  case OP_POSUPTO:
11874  SLJIT_ASSERT(early_fail_ptr == 0);
11875#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11876  if (common->utf)
11877    {
11878    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11879    OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11880
11881    detect_partial_match(common, &no_match);
11882    label = LABEL();
11883    compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11884    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11885    OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11886    add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11887    detect_partial_match_to(common, label);
11888
11889    set_jumps(no_match, LABEL());
11890    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11891    break;
11892    }
11893#endif
11894
11895  if (type == OP_ALLANY)
11896    {
11897    OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11898
11899    if (common->mode == PCRE2_JIT_COMPLETE)
11900      {
11901      OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
11902      CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11903      }
11904    else
11905      {
11906      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11907      process_partial_match(common);
11908      JUMPHERE(jump);
11909      }
11910    break;
11911    }
11912
11913  OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11914
11915  detect_partial_match(common, &no_match);
11916  label = LABEL();
11917  compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11918  OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11919  add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11920  detect_partial_match_to(common, label);
11921  OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11922
11923  set_jumps(no_char1_match, LABEL());
11924  OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11925  set_jumps(no_match, LABEL());
11926  break;
11927
11928  case OP_POSQUERY:
11929  SLJIT_ASSERT(early_fail_ptr == 0);
11930  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11931  compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11932  OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11933  set_jumps(no_match, LABEL());
11934  OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11935  break;
11936
11937  default:
11938  SLJIT_UNREACHABLE();
11939  break;
11940  }
11941
11942count_match(common);
11943return end;
11944}
11945
11946static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11947{
11948DEFINE_COMPILER;
11949backtrack_common *backtrack;
11950
11951PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11952
11953if (*cc == OP_FAIL)
11954  {
11955  add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11956  return cc + 1;
11957  }
11958
11959if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11960  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11961
11962if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11963  {
11964  /* No need to check notempty conditions. */
11965  if (common->accept_label == NULL)
11966    add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11967  else
11968    JUMPTO(SLJIT_JUMP, common->accept_label);
11969  return cc + 1;
11970  }
11971
11972if (common->accept_label == NULL)
11973  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11974else
11975  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11976
11977if (HAS_VIRTUAL_REGISTERS)
11978  {
11979  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11980  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11981  }
11982else
11983  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11984
11985OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11986add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11987OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11988if (common->accept_label == NULL)
11989  add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11990else
11991  JUMPTO(SLJIT_ZERO, common->accept_label);
11992
11993OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11994if (common->accept_label == NULL)
11995  add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11996else
11997  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11998add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11999return cc + 1;
12000}
12001
12002static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12003{
12004DEFINE_COMPILER;
12005int offset = GET2(cc, 1);
12006BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12007
12008/* Data will be discarded anyway... */
12009if (common->currententry != NULL)
12010  return cc + 1 + IMM2_SIZE;
12011
12012if (!optimized_cbracket)
12013  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12014offset <<= 1;
12015OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12016if (!optimized_cbracket)
12017  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12018return cc + 1 + IMM2_SIZE;
12019}
12020
12021static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12022{
12023DEFINE_COMPILER;
12024backtrack_common *backtrack;
12025PCRE2_UCHAR opcode = *cc;
12026PCRE2_SPTR ccend = cc + 1;
12027
12028if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12029    opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12030  ccend += 2 + cc[1];
12031
12032PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12033
12034if (opcode == OP_SKIP)
12035  {
12036  allocate_stack(common, 1);
12037  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12038  return ccend;
12039  }
12040
12041if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12042  {
12043  if (HAS_VIRTUAL_REGISTERS)
12044    OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12045  OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12046  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12047  OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12048  }
12049
12050return ccend;
12051}
12052
12053static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12054
12055static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12056{
12057DEFINE_COMPILER;
12058backtrack_common *backtrack;
12059BOOL needs_control_head;
12060int size;
12061
12062PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12063common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12064BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12065BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12066BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12067
12068size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12069size = 3 + (size < 0 ? 0 : size);
12070
12071OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12072allocate_stack(common, size);
12073if (size > 3)
12074  OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12075else
12076  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12077OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12078OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12079OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12080
12081size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12082if (size >= 0)
12083  init_frame(common, cc, ccend, size - 1, 0);
12084}
12085
12086static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12087{
12088DEFINE_COMPILER;
12089backtrack_common *backtrack;
12090BOOL has_then_trap = FALSE;
12091then_trap_backtrack *save_then_trap = NULL;
12092
12093SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12094
12095if (common->has_then && common->then_offsets[cc - common->start] != 0)
12096  {
12097  SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12098  has_then_trap = TRUE;
12099  save_then_trap = common->then_trap;
12100  /* Tail item on backtrack. */
12101  compile_then_trap_matchingpath(common, cc, ccend, parent);
12102  }
12103
12104while (cc < ccend)
12105  {
12106  switch(*cc)
12107    {
12108    case OP_SOD:
12109    case OP_SOM:
12110    case OP_NOT_WORD_BOUNDARY:
12111    case OP_WORD_BOUNDARY:
12112    case OP_EODN:
12113    case OP_EOD:
12114    case OP_DOLL:
12115    case OP_DOLLM:
12116    case OP_CIRC:
12117    case OP_CIRCM:
12118    case OP_REVERSE:
12119    cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12120    break;
12121
12122    case OP_NOT_DIGIT:
12123    case OP_DIGIT:
12124    case OP_NOT_WHITESPACE:
12125    case OP_WHITESPACE:
12126    case OP_NOT_WORDCHAR:
12127    case OP_WORDCHAR:
12128    case OP_ANY:
12129    case OP_ALLANY:
12130    case OP_ANYBYTE:
12131    case OP_NOTPROP:
12132    case OP_PROP:
12133    case OP_ANYNL:
12134    case OP_NOT_HSPACE:
12135    case OP_HSPACE:
12136    case OP_NOT_VSPACE:
12137    case OP_VSPACE:
12138    case OP_EXTUNI:
12139    case OP_NOT:
12140    case OP_NOTI:
12141    cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12142    break;
12143
12144    case OP_SET_SOM:
12145    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12146    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12147    allocate_stack(common, 1);
12148    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12149    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12150    cc++;
12151    break;
12152
12153    case OP_CHAR:
12154    case OP_CHARI:
12155    if (common->mode == PCRE2_JIT_COMPLETE)
12156      cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12157    else
12158      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12159    break;
12160
12161    case OP_STAR:
12162    case OP_MINSTAR:
12163    case OP_PLUS:
12164    case OP_MINPLUS:
12165    case OP_QUERY:
12166    case OP_MINQUERY:
12167    case OP_UPTO:
12168    case OP_MINUPTO:
12169    case OP_EXACT:
12170    case OP_POSSTAR:
12171    case OP_POSPLUS:
12172    case OP_POSQUERY:
12173    case OP_POSUPTO:
12174    case OP_STARI:
12175    case OP_MINSTARI:
12176    case OP_PLUSI:
12177    case OP_MINPLUSI:
12178    case OP_QUERYI:
12179    case OP_MINQUERYI:
12180    case OP_UPTOI:
12181    case OP_MINUPTOI:
12182    case OP_EXACTI:
12183    case OP_POSSTARI:
12184    case OP_POSPLUSI:
12185    case OP_POSQUERYI:
12186    case OP_POSUPTOI:
12187    case OP_NOTSTAR:
12188    case OP_NOTMINSTAR:
12189    case OP_NOTPLUS:
12190    case OP_NOTMINPLUS:
12191    case OP_NOTQUERY:
12192    case OP_NOTMINQUERY:
12193    case OP_NOTUPTO:
12194    case OP_NOTMINUPTO:
12195    case OP_NOTEXACT:
12196    case OP_NOTPOSSTAR:
12197    case OP_NOTPOSPLUS:
12198    case OP_NOTPOSQUERY:
12199    case OP_NOTPOSUPTO:
12200    case OP_NOTSTARI:
12201    case OP_NOTMINSTARI:
12202    case OP_NOTPLUSI:
12203    case OP_NOTMINPLUSI:
12204    case OP_NOTQUERYI:
12205    case OP_NOTMINQUERYI:
12206    case OP_NOTUPTOI:
12207    case OP_NOTMINUPTOI:
12208    case OP_NOTEXACTI:
12209    case OP_NOTPOSSTARI:
12210    case OP_NOTPOSPLUSI:
12211    case OP_NOTPOSQUERYI:
12212    case OP_NOTPOSUPTOI:
12213    case OP_TYPESTAR:
12214    case OP_TYPEMINSTAR:
12215    case OP_TYPEPLUS:
12216    case OP_TYPEMINPLUS:
12217    case OP_TYPEQUERY:
12218    case OP_TYPEMINQUERY:
12219    case OP_TYPEUPTO:
12220    case OP_TYPEMINUPTO:
12221    case OP_TYPEEXACT:
12222    case OP_TYPEPOSSTAR:
12223    case OP_TYPEPOSPLUS:
12224    case OP_TYPEPOSQUERY:
12225    case OP_TYPEPOSUPTO:
12226    cc = compile_iterator_matchingpath(common, cc, parent);
12227    break;
12228
12229    case OP_CLASS:
12230    case OP_NCLASS:
12231    if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12232      cc = compile_iterator_matchingpath(common, cc, parent);
12233    else
12234      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12235    break;
12236
12237#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12238    case OP_XCLASS:
12239    if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12240      cc = compile_iterator_matchingpath(common, cc, parent);
12241    else
12242      cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12243    break;
12244#endif
12245
12246    case OP_REF:
12247    case OP_REFI:
12248    if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12249      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12250    else
12251      {
12252      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12253      cc += 1 + IMM2_SIZE;
12254      }
12255    break;
12256
12257    case OP_DNREF:
12258    case OP_DNREFI:
12259    if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12260      cc = compile_ref_iterator_matchingpath(common, cc, parent);
12261    else
12262      {
12263      compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12264      compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12265      cc += 1 + 2 * IMM2_SIZE;
12266      }
12267    break;
12268
12269    case OP_RECURSE:
12270    cc = compile_recurse_matchingpath(common, cc, parent);
12271    break;
12272
12273    case OP_CALLOUT:
12274    case OP_CALLOUT_STR:
12275    cc = compile_callout_matchingpath(common, cc, parent);
12276    break;
12277
12278    case OP_ASSERT:
12279    case OP_ASSERT_NOT:
12280    case OP_ASSERTBACK:
12281    case OP_ASSERTBACK_NOT:
12282    PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12283    cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12284    break;
12285
12286    case OP_BRAMINZERO:
12287    PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12288    cc = bracketend(cc + 1);
12289    if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12290      {
12291      allocate_stack(common, 1);
12292      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12293      }
12294    else
12295      {
12296      allocate_stack(common, 2);
12297      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12298      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12299      }
12300    BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12301    count_match(common);
12302    break;
12303
12304    case OP_ASSERT_NA:
12305    case OP_ASSERTBACK_NA:
12306    case OP_ONCE:
12307    case OP_SCRIPT_RUN:
12308    case OP_BRA:
12309    case OP_CBRA:
12310    case OP_COND:
12311    case OP_SBRA:
12312    case OP_SCBRA:
12313    case OP_SCOND:
12314    cc = compile_bracket_matchingpath(common, cc, parent);
12315    break;
12316
12317    case OP_BRAZERO:
12318    if (cc[1] > OP_ASSERTBACK_NOT)
12319      cc = compile_bracket_matchingpath(common, cc, parent);
12320    else
12321      {
12322      PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12323      cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12324      }
12325    break;
12326
12327    case OP_BRAPOS:
12328    case OP_CBRAPOS:
12329    case OP_SBRAPOS:
12330    case OP_SCBRAPOS:
12331    case OP_BRAPOSZERO:
12332    cc = compile_bracketpos_matchingpath(common, cc, parent);
12333    break;
12334
12335    case OP_MARK:
12336    PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12337    SLJIT_ASSERT(common->mark_ptr != 0);
12338    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12339    allocate_stack(common, common->has_skip_arg ? 5 : 1);
12340    if (HAS_VIRTUAL_REGISTERS)
12341      OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12342    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12343    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12344    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12345    OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12346    if (common->has_skip_arg)
12347      {
12348      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12349      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12350      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12351      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12352      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12353      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12354      }
12355    cc += 1 + 2 + cc[1];
12356    break;
12357
12358    case OP_PRUNE:
12359    case OP_PRUNE_ARG:
12360    case OP_SKIP:
12361    case OP_SKIP_ARG:
12362    case OP_THEN:
12363    case OP_THEN_ARG:
12364    case OP_COMMIT:
12365    case OP_COMMIT_ARG:
12366    cc = compile_control_verb_matchingpath(common, cc, parent);
12367    break;
12368
12369    case OP_FAIL:
12370    case OP_ACCEPT:
12371    case OP_ASSERT_ACCEPT:
12372    cc = compile_fail_accept_matchingpath(common, cc, parent);
12373    break;
12374
12375    case OP_CLOSE:
12376    cc = compile_close_matchingpath(common, cc);
12377    break;
12378
12379    case OP_SKIPZERO:
12380    cc = bracketend(cc + 1);
12381    break;
12382
12383    default:
12384    SLJIT_UNREACHABLE();
12385    return;
12386    }
12387  if (cc == NULL)
12388    return;
12389  }
12390
12391if (has_then_trap)
12392  {
12393  /* Head item on backtrack. */
12394  PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12395  BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12396  BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12397  common->then_trap = save_then_trap;
12398  }
12399SLJIT_ASSERT(cc == ccend);
12400}
12401
12402#undef PUSH_BACKTRACK
12403#undef PUSH_BACKTRACK_NOVALUE
12404#undef BACKTRACK_AS
12405
12406#define COMPILE_BACKTRACKINGPATH(current) \
12407  do \
12408    { \
12409    compile_backtrackingpath(common, (current)); \
12410    if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12411      return; \
12412    } \
12413  while (0)
12414
12415#define CURRENT_AS(type) ((type *)current)
12416
12417static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12418{
12419DEFINE_COMPILER;
12420PCRE2_SPTR cc = current->cc;
12421PCRE2_UCHAR opcode;
12422PCRE2_UCHAR type;
12423sljit_u32 max = 0, exact;
12424struct sljit_label *label = NULL;
12425struct sljit_jump *jump = NULL;
12426jump_list *jumplist = NULL;
12427PCRE2_SPTR end;
12428int private_data_ptr = PRIVATE_DATA(cc);
12429int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12430int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12431int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
12432
12433cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12434
12435switch(opcode)
12436  {
12437  case OP_STAR:
12438  case OP_UPTO:
12439  if (type == OP_ANYNL || type == OP_EXTUNI)
12440    {
12441    SLJIT_ASSERT(private_data_ptr == 0);
12442    set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12443    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12444    free_stack(common, 1);
12445    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12446    }
12447  else
12448    {
12449    if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12450      {
12451      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12452      OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12453      OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12454
12455      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12456      label = LABEL();
12457      OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12458      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12459      if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12460        OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12461      CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12462      move_back(common, NULL, TRUE);
12463      CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12464      }
12465    else
12466      {
12467      OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12468      jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12469      move_back(common, NULL, TRUE);
12470      OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12471      JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12472      }
12473    JUMPHERE(jump);
12474    if (private_data_ptr == 0)
12475      free_stack(common, 2);
12476    }
12477  break;
12478
12479  case OP_MINSTAR:
12480  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12481  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12482  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12483  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12484  set_jumps(jumplist, LABEL());
12485  if (private_data_ptr == 0)
12486    free_stack(common, 1);
12487  break;
12488
12489  case OP_MINUPTO:
12490  OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12491  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12492  OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12493  add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12494
12495  OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12496  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12497  OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12498  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12499
12500  set_jumps(jumplist, LABEL());
12501  if (private_data_ptr == 0)
12502    free_stack(common, 2);
12503  break;
12504
12505  case OP_QUERY:
12506  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12507  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12508  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12509  jump = JUMP(SLJIT_JUMP);
12510  set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12511  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12512  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12513  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12514  JUMPHERE(jump);
12515  if (private_data_ptr == 0)
12516    free_stack(common, 1);
12517  break;
12518
12519  case OP_MINQUERY:
12520  OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12521  OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12522  jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12523  compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12524  JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12525  set_jumps(jumplist, LABEL());
12526  JUMPHERE(jump);
12527  if (private_data_ptr == 0)
12528    free_stack(common, 1);
12529  break;
12530
12531  case OP_EXACT:
12532  case OP_POSSTAR:
12533  case OP_POSQUERY:
12534  case OP_POSUPTO:
12535  break;
12536
12537  default:
12538  SLJIT_UNREACHABLE();
12539  break;
12540  }
12541
12542set_jumps(current->topbacktracks, LABEL());
12543}
12544
12545static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12546{
12547DEFINE_COMPILER;
12548PCRE2_SPTR cc = current->cc;
12549BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12550PCRE2_UCHAR type;
12551
12552type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12553
12554if ((type & 0x1) == 0)
12555  {
12556  /* Maximize case. */
12557  set_jumps(current->topbacktracks, LABEL());
12558  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12559  free_stack(common, 1);
12560  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12561  return;
12562  }
12563
12564OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12565CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12566set_jumps(current->topbacktracks, LABEL());
12567free_stack(common, ref ? 2 : 3);
12568}
12569
12570static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12571{
12572DEFINE_COMPILER;
12573recurse_entry *entry;
12574
12575if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12576  {
12577  entry = CURRENT_AS(recurse_backtrack)->entry;
12578  if (entry->backtrack_label == NULL)
12579    add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12580  else
12581    JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12582  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12583  }
12584else
12585  compile_backtrackingpath(common, current->top);
12586
12587set_jumps(current->topbacktracks, LABEL());
12588}
12589
12590static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12591{
12592DEFINE_COMPILER;
12593PCRE2_SPTR cc = current->cc;
12594PCRE2_UCHAR bra = OP_BRA;
12595struct sljit_jump *brajump = NULL;
12596
12597SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12598if (*cc == OP_BRAZERO)
12599  {
12600  bra = *cc;
12601  cc++;
12602  }
12603
12604if (bra == OP_BRAZERO)
12605  {
12606  SLJIT_ASSERT(current->topbacktracks == NULL);
12607  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12608  }
12609
12610if (CURRENT_AS(assert_backtrack)->framesize < 0)
12611  {
12612  set_jumps(current->topbacktracks, LABEL());
12613
12614  if (bra == OP_BRAZERO)
12615    {
12616    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12617    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12618    free_stack(common, 1);
12619    }
12620  return;
12621  }
12622
12623if (bra == OP_BRAZERO)
12624  {
12625  if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12626    {
12627    OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12628    CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12629    free_stack(common, 1);
12630    return;
12631    }
12632  free_stack(common, 1);
12633  brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12634  }
12635
12636if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12637  {
12638  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12639  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12640  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12641  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12642  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12643
12644  set_jumps(current->topbacktracks, LABEL());
12645  }
12646else
12647  set_jumps(current->topbacktracks, LABEL());
12648
12649if (bra == OP_BRAZERO)
12650  {
12651  /* We know there is enough place on the stack. */
12652  OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12653  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12654  JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12655  JUMPHERE(brajump);
12656  }
12657}
12658
12659static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12660{
12661DEFINE_COMPILER;
12662int opcode, stacksize, alt_count, alt_max;
12663int offset = 0;
12664int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12665int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12666PCRE2_SPTR cc = current->cc;
12667PCRE2_SPTR ccbegin;
12668PCRE2_SPTR ccprev;
12669PCRE2_UCHAR bra = OP_BRA;
12670PCRE2_UCHAR ket;
12671assert_backtrack *assert;
12672BOOL has_alternatives;
12673BOOL needs_control_head = FALSE;
12674struct sljit_jump *brazero = NULL;
12675struct sljit_jump *next_alt = NULL;
12676struct sljit_jump *once = NULL;
12677struct sljit_jump *cond = NULL;
12678struct sljit_label *rmin_label = NULL;
12679struct sljit_label *exact_label = NULL;
12680struct sljit_put_label *put_label = NULL;
12681
12682if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12683  {
12684  bra = *cc;
12685  cc++;
12686  }
12687
12688opcode = *cc;
12689ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12690ket = *ccbegin;
12691if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12692  {
12693  repeat_ptr = PRIVATE_DATA(ccbegin);
12694  repeat_type = PRIVATE_DATA(ccbegin + 2);
12695  repeat_count = PRIVATE_DATA(ccbegin + 3);
12696  SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12697  if (repeat_type == OP_UPTO)
12698    ket = OP_KETRMAX;
12699  if (repeat_type == OP_MINUPTO)
12700    ket = OP_KETRMIN;
12701  }
12702ccbegin = cc;
12703cc += GET(cc, 1);
12704has_alternatives = *cc == OP_ALT;
12705if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12706  has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12707if (opcode == OP_CBRA || opcode == OP_SCBRA)
12708  offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12709if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12710  opcode = OP_SCOND;
12711
12712alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12713
12714/* Decoding the needs_control_head in framesize. */
12715if (opcode == OP_ONCE)
12716  {
12717  needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12718  CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12719  }
12720
12721if (ket != OP_KET && repeat_type != 0)
12722  {
12723  /* TMP1 is used in OP_KETRMIN below. */
12724  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12725  free_stack(common, 1);
12726  if (repeat_type == OP_UPTO)
12727    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12728  else
12729    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12730  }
12731
12732if (ket == OP_KETRMAX)
12733  {
12734  if (bra == OP_BRAZERO)
12735    {
12736    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12737    free_stack(common, 1);
12738    brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12739    }
12740  }
12741else if (ket == OP_KETRMIN)
12742  {
12743  if (bra != OP_BRAMINZERO)
12744    {
12745    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12746    if (repeat_type != 0)
12747      {
12748      /* TMP1 was set a few lines above. */
12749      CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12750      /* Drop STR_PTR for non-greedy plus quantifier. */
12751      if (opcode != OP_ONCE)
12752        free_stack(common, 1);
12753      }
12754    else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12755      {
12756      /* Checking zero-length iteration. */
12757      if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12758        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12759      else
12760        {
12761        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12762        CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12763        }
12764      /* Drop STR_PTR for non-greedy plus quantifier. */
12765      if (opcode != OP_ONCE)
12766        free_stack(common, 1);
12767      }
12768    else
12769      JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12770    }
12771  rmin_label = LABEL();
12772  if (repeat_type != 0)
12773    OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12774  }
12775else if (bra == OP_BRAZERO)
12776  {
12777  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12778  free_stack(common, 1);
12779  brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12780  }
12781else if (repeat_type == OP_EXACT)
12782  {
12783  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12784  exact_label = LABEL();
12785  }
12786
12787if (offset != 0)
12788  {
12789  if (common->capture_last_ptr != 0)
12790    {
12791    SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12792    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12793    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12794    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12795    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12796    free_stack(common, 3);
12797    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12798    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12799    }
12800  else if (common->optimized_cbracket[offset >> 1] == 0)
12801    {
12802    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12803    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12804    free_stack(common, 2);
12805    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12806    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12807    }
12808  }
12809
12810if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12811  {
12812  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12813    {
12814    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12815    add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12816    OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12817    }
12818  once = JUMP(SLJIT_JUMP);
12819  }
12820else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12821  {
12822  if (has_alternatives)
12823    {
12824    /* Always exactly one alternative. */
12825    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12826    free_stack(common, 1);
12827
12828    alt_max = 2;
12829    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12830    }
12831  }
12832else if (has_alternatives)
12833  {
12834  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12835  free_stack(common, 1);
12836
12837  if (alt_max > 3)
12838    {
12839    sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12840
12841    SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12842    sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12843    sljit_emit_op0(compiler, SLJIT_ENDBR);
12844    }
12845  else
12846    next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12847  }
12848
12849COMPILE_BACKTRACKINGPATH(current->top);
12850if (current->topbacktracks)
12851  set_jumps(current->topbacktracks, LABEL());
12852
12853if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12854  {
12855  /* Conditional block always has at most one alternative. */
12856  if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12857    {
12858    SLJIT_ASSERT(has_alternatives);
12859    assert = CURRENT_AS(bracket_backtrack)->u.assert;
12860    if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12861      {
12862      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12863      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12864      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12865      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12866      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12867      }
12868    cond = JUMP(SLJIT_JUMP);
12869    set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12870    }
12871  else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12872    {
12873    SLJIT_ASSERT(has_alternatives);
12874    cond = JUMP(SLJIT_JUMP);
12875    set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12876    }
12877  else
12878    SLJIT_ASSERT(!has_alternatives);
12879  }
12880
12881if (has_alternatives)
12882  {
12883  alt_count = 1;
12884  do
12885    {
12886    current->top = NULL;
12887    current->topbacktracks = NULL;
12888    current->nextbacktracks = NULL;
12889    /* Conditional blocks always have an additional alternative, even if it is empty. */
12890    if (*cc == OP_ALT)
12891      {
12892      ccprev = cc + 1 + LINK_SIZE;
12893      cc += GET(cc, 1);
12894      if (opcode != OP_COND && opcode != OP_SCOND)
12895        {
12896        if (opcode != OP_ONCE)
12897          {
12898          if (private_data_ptr != 0)
12899            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12900          else
12901            OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12902          }
12903        else
12904          OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12905        }
12906      compile_matchingpath(common, ccprev, cc, current);
12907      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12908        return;
12909
12910      if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12911        OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12912
12913      if (opcode == OP_SCRIPT_RUN)
12914        match_script_run_common(common, private_data_ptr, current);
12915      }
12916
12917    /* Instructions after the current alternative is successfully matched. */
12918    /* There is a similar code in compile_bracket_matchingpath. */
12919    if (opcode == OP_ONCE)
12920      match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12921
12922    stacksize = 0;
12923    if (repeat_type == OP_MINUPTO)
12924      {
12925      /* We need to preserve the counter. TMP2 will be used below. */
12926      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12927      stacksize++;
12928      }
12929    if (ket != OP_KET || bra != OP_BRA)
12930      stacksize++;
12931    if (offset != 0)
12932      {
12933      if (common->capture_last_ptr != 0)
12934        stacksize++;
12935      if (common->optimized_cbracket[offset >> 1] == 0)
12936        stacksize += 2;
12937      }
12938    if (opcode != OP_ONCE)
12939      stacksize++;
12940
12941    if (stacksize > 0)
12942      allocate_stack(common, stacksize);
12943
12944    stacksize = 0;
12945    if (repeat_type == OP_MINUPTO)
12946      {
12947      /* TMP2 was set above. */
12948      OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12949      stacksize++;
12950      }
12951
12952    if (ket != OP_KET || bra != OP_BRA)
12953      {
12954      if (ket != OP_KET)
12955        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12956      else
12957        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12958      stacksize++;
12959      }
12960
12961    if (offset != 0)
12962      stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12963
12964    if (opcode != OP_ONCE)
12965      {
12966      if (alt_max <= 3)
12967        OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12968      else
12969        put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12970      }
12971
12972    if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12973      {
12974      /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12975      SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12976      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12977      }
12978
12979    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12980
12981    if (opcode != OP_ONCE)
12982      {
12983      if (alt_max <= 3)
12984        {
12985        JUMPHERE(next_alt);
12986        alt_count++;
12987        if (alt_count < alt_max)
12988          {
12989          SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12990          next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12991          }
12992        }
12993      else
12994        {
12995        sljit_set_put_label(put_label, LABEL());
12996        sljit_emit_op0(compiler, SLJIT_ENDBR);
12997        }
12998      }
12999
13000    COMPILE_BACKTRACKINGPATH(current->top);
13001    if (current->topbacktracks)
13002      set_jumps(current->topbacktracks, LABEL());
13003    SLJIT_ASSERT(!current->nextbacktracks);
13004    }
13005  while (*cc == OP_ALT);
13006
13007  if (cond != NULL)
13008    {
13009    SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13010    assert = CURRENT_AS(bracket_backtrack)->u.assert;
13011    if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13012      {
13013      OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13014      add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13015      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13016      OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13017      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13018      }
13019    JUMPHERE(cond);
13020    }
13021
13022  /* Free the STR_PTR. */
13023  if (private_data_ptr == 0)
13024    free_stack(common, 1);
13025  }
13026
13027if (offset != 0)
13028  {
13029  /* Using both tmp register is better for instruction scheduling. */
13030  if (common->optimized_cbracket[offset >> 1] != 0)
13031    {
13032    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13033    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13034    free_stack(common, 2);
13035    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13036    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13037    }
13038  else
13039    {
13040    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13041    free_stack(common, 1);
13042    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13043    }
13044  }
13045else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13046  {
13047  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13048  free_stack(common, 1);
13049  }
13050else if (opcode == OP_ONCE)
13051  {
13052  cc = ccbegin + GET(ccbegin, 1);
13053  stacksize = needs_control_head ? 1 : 0;
13054
13055  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13056    {
13057    /* Reset head and drop saved frame. */
13058    stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13059    }
13060  else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13061    {
13062    /* The STR_PTR must be released. */
13063    stacksize++;
13064    }
13065
13066  if (stacksize > 0)
13067    free_stack(common, stacksize);
13068
13069  JUMPHERE(once);
13070  /* Restore previous private_data_ptr */
13071  if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13072    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13073  else if (ket == OP_KETRMIN)
13074    {
13075    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13076    /* See the comment below. */
13077    free_stack(common, 2);
13078    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13079    }
13080  }
13081
13082if (repeat_type == OP_EXACT)
13083  {
13084  OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13085  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13086  CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13087  }
13088else if (ket == OP_KETRMAX)
13089  {
13090  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13091  if (bra != OP_BRAZERO)
13092    free_stack(common, 1);
13093
13094  CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13095  if (bra == OP_BRAZERO)
13096    {
13097    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13098    JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13099    JUMPHERE(brazero);
13100    free_stack(common, 1);
13101    }
13102  }
13103else if (ket == OP_KETRMIN)
13104  {
13105  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13106
13107  /* OP_ONCE removes everything in case of a backtrack, so we don't
13108  need to explicitly release the STR_PTR. The extra release would
13109  affect badly the free_stack(2) above. */
13110  if (opcode != OP_ONCE)
13111    free_stack(common, 1);
13112  CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13113  if (opcode == OP_ONCE)
13114    free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13115  else if (bra == OP_BRAMINZERO)
13116    free_stack(common, 1);
13117  }
13118else if (bra == OP_BRAZERO)
13119  {
13120  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121  JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13122  JUMPHERE(brazero);
13123  }
13124}
13125
13126static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13127{
13128DEFINE_COMPILER;
13129int offset;
13130struct sljit_jump *jump;
13131
13132if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13133  {
13134  if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
13135    {
13136    offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
13137    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13138    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13139    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13140    if (common->capture_last_ptr != 0)
13141      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13142    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13143    if (common->capture_last_ptr != 0)
13144      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13145    }
13146  set_jumps(current->topbacktracks, LABEL());
13147  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13148  return;
13149  }
13150
13151OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13152add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13153OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13154
13155if (current->topbacktracks)
13156  {
13157  jump = JUMP(SLJIT_JUMP);
13158  set_jumps(current->topbacktracks, LABEL());
13159  /* Drop the stack frame. */
13160  free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13161  JUMPHERE(jump);
13162  }
13163OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13164}
13165
13166static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13167{
13168assert_backtrack backtrack;
13169
13170current->top = NULL;
13171current->topbacktracks = NULL;
13172current->nextbacktracks = NULL;
13173if (current->cc[1] > OP_ASSERTBACK_NOT)
13174  {
13175  /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13176  compile_bracket_matchingpath(common, current->cc, current);
13177  compile_bracket_backtrackingpath(common, current->top);
13178  }
13179else
13180  {
13181  memset(&backtrack, 0, sizeof(backtrack));
13182  backtrack.common.cc = current->cc;
13183  backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13184  /* Manual call of compile_assert_matchingpath. */
13185  compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13186  }
13187SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
13188}
13189
13190static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13191{
13192DEFINE_COMPILER;
13193PCRE2_UCHAR opcode = *current->cc;
13194struct sljit_label *loop;
13195struct sljit_jump *jump;
13196
13197if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13198  {
13199  if (common->then_trap != NULL)
13200    {
13201    SLJIT_ASSERT(common->control_head_ptr != 0);
13202
13203    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13204    OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13205    OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13206    jump = JUMP(SLJIT_JUMP);
13207
13208    loop = LABEL();
13209    OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13210    JUMPHERE(jump);
13211    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13212    CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13213    add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13214    return;
13215    }
13216  else if (!common->local_quit_available && common->in_positive_assertion)
13217    {
13218    add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13219    return;
13220    }
13221  }
13222
13223if (common->local_quit_available)
13224  {
13225  /* Abort match with a fail. */
13226  if (common->quit_label == NULL)
13227    add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13228  else
13229    JUMPTO(SLJIT_JUMP, common->quit_label);
13230  return;
13231  }
13232
13233if (opcode == OP_SKIP_ARG)
13234  {
13235  SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13236  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13237  OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13238  sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
13239
13240  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13241  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13242  return;
13243  }
13244
13245if (opcode == OP_SKIP)
13246  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13247else
13248  OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13249add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13250}
13251
13252static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13253{
13254DEFINE_COMPILER;
13255struct sljit_jump *jump;
13256int size;
13257
13258if (CURRENT_AS(then_trap_backtrack)->then_trap)
13259  {
13260  common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13261  return;
13262  }
13263
13264size = CURRENT_AS(then_trap_backtrack)->framesize;
13265size = 3 + (size < 0 ? 0 : size);
13266
13267OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13268free_stack(common, size);
13269jump = JUMP(SLJIT_JUMP);
13270
13271set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13272/* STACK_TOP is set by THEN. */
13273if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13274  {
13275  add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13276  OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13277  }
13278OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13279free_stack(common, 3);
13280
13281JUMPHERE(jump);
13282OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13283}
13284
13285static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13286{
13287DEFINE_COMPILER;
13288then_trap_backtrack *save_then_trap = common->then_trap;
13289
13290while (current)
13291  {
13292  if (current->nextbacktracks != NULL)
13293    set_jumps(current->nextbacktracks, LABEL());
13294  switch(*current->cc)
13295    {
13296    case OP_SET_SOM:
13297    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13298    free_stack(common, 1);
13299    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13300    break;
13301
13302    case OP_STAR:
13303    case OP_MINSTAR:
13304    case OP_PLUS:
13305    case OP_MINPLUS:
13306    case OP_QUERY:
13307    case OP_MINQUERY:
13308    case OP_UPTO:
13309    case OP_MINUPTO:
13310    case OP_EXACT:
13311    case OP_POSSTAR:
13312    case OP_POSPLUS:
13313    case OP_POSQUERY:
13314    case OP_POSUPTO:
13315    case OP_STARI:
13316    case OP_MINSTARI:
13317    case OP_PLUSI:
13318    case OP_MINPLUSI:
13319    case OP_QUERYI:
13320    case OP_MINQUERYI:
13321    case OP_UPTOI:
13322    case OP_MINUPTOI:
13323    case OP_EXACTI:
13324    case OP_POSSTARI:
13325    case OP_POSPLUSI:
13326    case OP_POSQUERYI:
13327    case OP_POSUPTOI:
13328    case OP_NOTSTAR:
13329    case OP_NOTMINSTAR:
13330    case OP_NOTPLUS:
13331    case OP_NOTMINPLUS:
13332    case OP_NOTQUERY:
13333    case OP_NOTMINQUERY:
13334    case OP_NOTUPTO:
13335    case OP_NOTMINUPTO:
13336    case OP_NOTEXACT:
13337    case OP_NOTPOSSTAR:
13338    case OP_NOTPOSPLUS:
13339    case OP_NOTPOSQUERY:
13340    case OP_NOTPOSUPTO:
13341    case OP_NOTSTARI:
13342    case OP_NOTMINSTARI:
13343    case OP_NOTPLUSI:
13344    case OP_NOTMINPLUSI:
13345    case OP_NOTQUERYI:
13346    case OP_NOTMINQUERYI:
13347    case OP_NOTUPTOI:
13348    case OP_NOTMINUPTOI:
13349    case OP_NOTEXACTI:
13350    case OP_NOTPOSSTARI:
13351    case OP_NOTPOSPLUSI:
13352    case OP_NOTPOSQUERYI:
13353    case OP_NOTPOSUPTOI:
13354    case OP_TYPESTAR:
13355    case OP_TYPEMINSTAR:
13356    case OP_TYPEPLUS:
13357    case OP_TYPEMINPLUS:
13358    case OP_TYPEQUERY:
13359    case OP_TYPEMINQUERY:
13360    case OP_TYPEUPTO:
13361    case OP_TYPEMINUPTO:
13362    case OP_TYPEEXACT:
13363    case OP_TYPEPOSSTAR:
13364    case OP_TYPEPOSPLUS:
13365    case OP_TYPEPOSQUERY:
13366    case OP_TYPEPOSUPTO:
13367    case OP_CLASS:
13368    case OP_NCLASS:
13369#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13370    case OP_XCLASS:
13371#endif
13372    compile_iterator_backtrackingpath(common, current);
13373    break;
13374
13375    case OP_REF:
13376    case OP_REFI:
13377    case OP_DNREF:
13378    case OP_DNREFI:
13379    compile_ref_iterator_backtrackingpath(common, current);
13380    break;
13381
13382    case OP_RECURSE:
13383    compile_recurse_backtrackingpath(common, current);
13384    break;
13385
13386    case OP_ASSERT:
13387    case OP_ASSERT_NOT:
13388    case OP_ASSERTBACK:
13389    case OP_ASSERTBACK_NOT:
13390    compile_assert_backtrackingpath(common, current);
13391    break;
13392
13393    case OP_ASSERT_NA:
13394    case OP_ASSERTBACK_NA:
13395    case OP_ONCE:
13396    case OP_SCRIPT_RUN:
13397    case OP_BRA:
13398    case OP_CBRA:
13399    case OP_COND:
13400    case OP_SBRA:
13401    case OP_SCBRA:
13402    case OP_SCOND:
13403    compile_bracket_backtrackingpath(common, current);
13404    break;
13405
13406    case OP_BRAZERO:
13407    if (current->cc[1] > OP_ASSERTBACK_NOT)
13408      compile_bracket_backtrackingpath(common, current);
13409    else
13410      compile_assert_backtrackingpath(common, current);
13411    break;
13412
13413    case OP_BRAPOS:
13414    case OP_CBRAPOS:
13415    case OP_SBRAPOS:
13416    case OP_SCBRAPOS:
13417    case OP_BRAPOSZERO:
13418    compile_bracketpos_backtrackingpath(common, current);
13419    break;
13420
13421    case OP_BRAMINZERO:
13422    compile_braminzero_backtrackingpath(common, current);
13423    break;
13424
13425    case OP_MARK:
13426    OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13427    if (common->has_skip_arg)
13428      OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13429    free_stack(common, common->has_skip_arg ? 5 : 1);
13430    OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13431    if (common->has_skip_arg)
13432      OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13433    break;
13434
13435    case OP_THEN:
13436    case OP_THEN_ARG:
13437    case OP_PRUNE:
13438    case OP_PRUNE_ARG:
13439    case OP_SKIP:
13440    case OP_SKIP_ARG:
13441    compile_control_verb_backtrackingpath(common, current);
13442    break;
13443
13444    case OP_COMMIT:
13445    case OP_COMMIT_ARG:
13446    if (!common->local_quit_available)
13447      OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13448    if (common->quit_label == NULL)
13449      add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13450    else
13451      JUMPTO(SLJIT_JUMP, common->quit_label);
13452    break;
13453
13454    case OP_CALLOUT:
13455    case OP_CALLOUT_STR:
13456    case OP_FAIL:
13457    case OP_ACCEPT:
13458    case OP_ASSERT_ACCEPT:
13459    set_jumps(current->topbacktracks, LABEL());
13460    break;
13461
13462    case OP_THEN_TRAP:
13463    /* A virtual opcode for then traps. */
13464    compile_then_trap_backtrackingpath(common, current);
13465    break;
13466
13467    default:
13468    SLJIT_UNREACHABLE();
13469    break;
13470    }
13471  current = current->prev;
13472  }
13473common->then_trap = save_then_trap;
13474}
13475
13476static SLJIT_INLINE void compile_recurse(compiler_common *common)
13477{
13478DEFINE_COMPILER;
13479PCRE2_SPTR cc = common->start + common->currententry->start;
13480PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13481PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13482uint32_t recurse_flags = 0;
13483int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
13484int alt_count, alt_max, local_size;
13485backtrack_common altbacktrack;
13486jump_list *match = NULL;
13487struct sljit_jump *next_alt = NULL;
13488struct sljit_jump *accept_exit = NULL;
13489struct sljit_label *quit;
13490struct sljit_put_label *put_label = NULL;
13491
13492/* Recurse captures then. */
13493common->then_trap = NULL;
13494
13495SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13496
13497alt_max = no_alternatives(cc);
13498alt_count = 0;
13499
13500/* Matching path. */
13501SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13502common->currententry->entry_label = LABEL();
13503set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13504
13505sljit_emit_fast_enter(compiler, TMP2, 0);
13506count_match(common);
13507
13508local_size = (alt_max > 1) ? 2 : 1;
13509
13510/* (Reversed) stack layout:
13511   [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13512
13513allocate_stack(common, private_data_size + local_size);
13514/* Save return address. */
13515OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13516
13517copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
13518
13519/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13520OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13521
13522if (recurse_flags & recurse_flag_control_head_found)
13523  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13524
13525if (alt_max > 1)
13526  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13527
13528memset(&altbacktrack, 0, sizeof(backtrack_common));
13529common->quit_label = NULL;
13530common->accept_label = NULL;
13531common->quit = NULL;
13532common->accept = NULL;
13533altbacktrack.cc = ccbegin;
13534cc += GET(cc, 1);
13535while (1)
13536  {
13537  altbacktrack.top = NULL;
13538  altbacktrack.topbacktracks = NULL;
13539
13540  if (altbacktrack.cc != ccbegin)
13541    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13542
13543  compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13544  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13545    return;
13546
13547  allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
13548  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13549
13550  if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
13551    {
13552    if (alt_max > 3)
13553      put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13554    else
13555      OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13556    }
13557
13558  add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13559
13560  if (alt_count == 0)
13561    {
13562    /* Backtracking path entry. */
13563    SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13564    common->currententry->backtrack_label = LABEL();
13565    set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13566
13567    sljit_emit_fast_enter(compiler, TMP1, 0);
13568
13569    if (recurse_flags & recurse_flag_accept_found)
13570      accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13571
13572    OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13573    /* Save return address. */
13574    OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13575
13576    copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13577
13578    if (alt_max > 1)
13579      {
13580      OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13581      free_stack(common, 2);
13582
13583      if (alt_max > 3)
13584        {
13585        sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13586        sljit_set_put_label(put_label, LABEL());
13587        sljit_emit_op0(compiler, SLJIT_ENDBR);
13588        }
13589      else
13590        next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13591      }
13592    else
13593      free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
13594    }
13595  else if (alt_max > 3)
13596    {
13597    sljit_set_put_label(put_label, LABEL());
13598    sljit_emit_op0(compiler, SLJIT_ENDBR);
13599    }
13600  else
13601    {
13602    JUMPHERE(next_alt);
13603    if (alt_count + 1 < alt_max)
13604      {
13605      SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13606      next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13607      }
13608    }
13609
13610  alt_count++;
13611
13612  compile_backtrackingpath(common, altbacktrack.top);
13613  if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13614    return;
13615  set_jumps(altbacktrack.topbacktracks, LABEL());
13616
13617  if (*cc != OP_ALT)
13618    break;
13619
13620  altbacktrack.cc = cc + 1 + LINK_SIZE;
13621  cc += GET(cc, 1);
13622  }
13623
13624/* No alternative is matched. */
13625
13626quit = LABEL();
13627
13628copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
13629
13630OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13631free_stack(common, private_data_size + local_size);
13632OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13633OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13634
13635if (common->quit != NULL)
13636  {
13637  SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
13638
13639  set_jumps(common->quit, LABEL());
13640  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13641  copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13642  JUMPTO(SLJIT_JUMP, quit);
13643  }
13644
13645if (recurse_flags & recurse_flag_accept_found)
13646  {
13647  JUMPHERE(accept_exit);
13648  free_stack(common, 2);
13649
13650  /* Save return address. */
13651  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13652
13653  copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
13654
13655  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13656  free_stack(common, private_data_size + local_size);
13657  OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13658  OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13659  }
13660
13661if (common->accept != NULL)
13662  {
13663  SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
13664
13665  set_jumps(common->accept, LABEL());
13666
13667  OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13668  OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13669
13670  allocate_stack(common, 2);
13671  OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13672  }
13673
13674set_jumps(match, LABEL());
13675
13676OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13677
13678copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
13679
13680OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13681OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13682OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13683}
13684
13685#undef COMPILE_BACKTRACKINGPATH
13686#undef CURRENT_AS
13687
13688#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13689  (PCRE2_JIT_INVALID_UTF)
13690
13691static int jit_compile(pcre2_code *code, sljit_u32 mode)
13692{
13693pcre2_real_code *re = (pcre2_real_code *)code;
13694struct sljit_compiler *compiler;
13695backtrack_common rootbacktrack;
13696compiler_common common_data;
13697compiler_common *common = &common_data;
13698const sljit_u8 *tables = re->tables;
13699void *allocator_data = &re->memctl;
13700int private_data_size;
13701PCRE2_SPTR ccend;
13702executable_functions *functions;
13703void *executable_func;
13704sljit_uw executable_size;
13705sljit_uw total_length;
13706struct sljit_label *mainloop_label = NULL;
13707struct sljit_label *continue_match_label;
13708struct sljit_label *empty_match_found_label = NULL;
13709struct sljit_label *empty_match_backtrack_label = NULL;
13710struct sljit_label *reset_match_label;
13711struct sljit_label *quit_label;
13712struct sljit_jump *jump;
13713struct sljit_jump *minlength_check_failed = NULL;
13714struct sljit_jump *empty_match = NULL;
13715struct sljit_jump *end_anchor_failed = NULL;
13716jump_list *reqcu_not_found = NULL;
13717
13718SLJIT_ASSERT(tables);
13719
13720#if HAS_VIRTUAL_REGISTERS == 1
13721SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13722#elif HAS_VIRTUAL_REGISTERS == 0
13723SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13724#else
13725#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13726#endif
13727
13728memset(&rootbacktrack, 0, sizeof(backtrack_common));
13729memset(common, 0, sizeof(compiler_common));
13730common->re = re;
13731common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13732rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13733
13734#ifdef SUPPORT_UNICODE
13735common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13736#endif /* SUPPORT_UNICODE */
13737mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13738
13739common->start = rootbacktrack.cc;
13740common->read_only_data_head = NULL;
13741common->fcc = tables + fcc_offset;
13742common->lcc = (sljit_sw)(tables + lcc_offset);
13743common->mode = mode;
13744common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13745common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13746common->nltype = NLTYPE_FIXED;
13747switch(re->newline_convention)
13748  {
13749  case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13750  case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13751  case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13752  case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13753  case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13754  case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13755  default: return PCRE2_ERROR_INTERNAL;
13756  }
13757common->nlmax = READ_CHAR_MAX;
13758common->nlmin = 0;
13759if (re->bsr_convention == PCRE2_BSR_UNICODE)
13760  common->bsr_nltype = NLTYPE_ANY;
13761else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13762  common->bsr_nltype = NLTYPE_ANYCRLF;
13763else
13764  {
13765#ifdef BSR_ANYCRLF
13766  common->bsr_nltype = NLTYPE_ANYCRLF;
13767#else
13768  common->bsr_nltype = NLTYPE_ANY;
13769#endif
13770  }
13771common->bsr_nlmax = READ_CHAR_MAX;
13772common->bsr_nlmin = 0;
13773common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13774common->ctypes = (sljit_sw)(tables + ctypes_offset);
13775common->name_count = re->name_count;
13776common->name_entry_size = re->name_entry_size;
13777common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13778common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13779#ifdef SUPPORT_UNICODE
13780/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13781common->utf = (re->overall_options & PCRE2_UTF) != 0;
13782common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13783if (common->utf)
13784  {
13785  if (common->nltype == NLTYPE_ANY)
13786    common->nlmax = 0x2029;
13787  else if (common->nltype == NLTYPE_ANYCRLF)
13788    common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13789  else
13790    {
13791    /* We only care about the first newline character. */
13792    common->nlmax = common->newline & 0xff;
13793    }
13794
13795  if (common->nltype == NLTYPE_FIXED)
13796    common->nlmin = common->newline & 0xff;
13797  else
13798    common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13799
13800  if (common->bsr_nltype == NLTYPE_ANY)
13801    common->bsr_nlmax = 0x2029;
13802  else
13803    common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13804  common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13805  }
13806else
13807  common->invalid_utf = FALSE;
13808#endif /* SUPPORT_UNICODE */
13809ccend = bracketend(common->start);
13810
13811/* Calculate the local space size on the stack. */
13812common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13813common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13814if (!common->optimized_cbracket)
13815  return PCRE2_ERROR_NOMEMORY;
13816#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13817memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13818#else
13819memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13820#endif
13821
13822SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13823#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13824common->capture_last_ptr = common->ovector_start;
13825common->ovector_start += sizeof(sljit_sw);
13826#endif
13827if (!check_opcode_types(common, common->start, ccend))
13828  {
13829  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13830  return PCRE2_ERROR_NOMEMORY;
13831  }
13832
13833/* Checking flags and updating ovector_start. */
13834if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13835  {
13836  common->req_char_ptr = common->ovector_start;
13837  common->ovector_start += sizeof(sljit_sw);
13838  }
13839if (mode != PCRE2_JIT_COMPLETE)
13840  {
13841  common->start_used_ptr = common->ovector_start;
13842  common->ovector_start += sizeof(sljit_sw);
13843  if (mode == PCRE2_JIT_PARTIAL_SOFT)
13844    {
13845    common->hit_start = common->ovector_start;
13846    common->ovector_start += sizeof(sljit_sw);
13847    }
13848  }
13849if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13850  {
13851  common->match_end_ptr = common->ovector_start;
13852  common->ovector_start += sizeof(sljit_sw);
13853  }
13854#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13855common->control_head_ptr = 1;
13856#endif
13857if (common->control_head_ptr != 0)
13858  {
13859  common->control_head_ptr = common->ovector_start;
13860  common->ovector_start += sizeof(sljit_sw);
13861  }
13862if (common->has_set_som)
13863  {
13864  /* Saving the real start pointer is necessary. */
13865  common->start_ptr = common->ovector_start;
13866  common->ovector_start += sizeof(sljit_sw);
13867  }
13868
13869/* Aligning ovector to even number of sljit words. */
13870if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13871  common->ovector_start += sizeof(sljit_sw);
13872
13873if (common->start_ptr == 0)
13874  common->start_ptr = OVECTOR(0);
13875
13876/* Capturing brackets cannot be optimized if callouts are allowed. */
13877if (common->capture_last_ptr != 0)
13878  memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13879
13880SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13881common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13882
13883total_length = ccend - common->start;
13884common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13885if (!common->private_data_ptrs)
13886  {
13887  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13888  return PCRE2_ERROR_NOMEMORY;
13889  }
13890memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13891
13892private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13893
13894if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13895  detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13896
13897set_private_data_ptrs(common, &private_data_size, ccend);
13898
13899SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13900
13901if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13902  {
13903  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13904  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13905  return PCRE2_ERROR_NOMEMORY;
13906  }
13907
13908if (common->has_then)
13909  {
13910  common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13911  memset(common->then_offsets, 0, total_length);
13912  set_then_offsets(common, common->start, NULL);
13913  }
13914
13915compiler = sljit_create_compiler(allocator_data, NULL);
13916if (!compiler)
13917  {
13918  SLJIT_FREE(common->optimized_cbracket, allocator_data);
13919  SLJIT_FREE(common->private_data_ptrs, allocator_data);
13920  return PCRE2_ERROR_NOMEMORY;
13921  }
13922common->compiler = compiler;
13923
13924/* Main pcre2_jit_exec entry. */
13925SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13926sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, 0, 0, private_data_size);
13927
13928/* Register init. */
13929reset_ovector(common, (re->top_bracket + 1) * 2);
13930if (common->req_char_ptr != 0)
13931  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13932
13933OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13934OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13935OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13936OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13937OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13938OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13939OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13940OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13941OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13942OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13943
13944if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13945  reset_early_fail(common);
13946
13947if (mode == PCRE2_JIT_PARTIAL_SOFT)
13948  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13949if (common->mark_ptr != 0)
13950  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13951if (common->control_head_ptr != 0)
13952  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13953
13954/* Main part of the matching */
13955if ((re->overall_options & PCRE2_ANCHORED) == 0)
13956  {
13957  mainloop_label = mainloop_entry(common);
13958  continue_match_label = LABEL();
13959  /* Forward search if possible. */
13960  if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13961    {
13962    if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13963      ;
13964    else if ((re->flags & PCRE2_FIRSTSET) != 0)
13965      fast_forward_first_char(common);
13966    else if ((re->flags & PCRE2_STARTLINE) != 0)
13967      fast_forward_newline(common);
13968    else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13969      fast_forward_start_bits(common);
13970    }
13971  }
13972else
13973  continue_match_label = LABEL();
13974
13975if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13976  {
13977  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13978  OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13979  minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13980  }
13981if (common->req_char_ptr != 0)
13982  reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13983
13984/* Store the current STR_PTR in OVECTOR(0). */
13985OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13986/* Copy the limit of allowed recursions. */
13987OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13988if (common->capture_last_ptr != 0)
13989  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13990if (common->fast_forward_bc_ptr != NULL)
13991  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13992
13993if (common->start_ptr != OVECTOR(0))
13994  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13995
13996/* Copy the beginning of the string. */
13997if (mode == PCRE2_JIT_PARTIAL_SOFT)
13998  {
13999  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14000  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14001  JUMPHERE(jump);
14002  }
14003else if (mode == PCRE2_JIT_PARTIAL_HARD)
14004  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14005
14006compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14007if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14008  {
14009  sljit_free_compiler(compiler);
14010  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14011  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14012  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14013  return PCRE2_ERROR_NOMEMORY;
14014  }
14015
14016if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14017  end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14018
14019if (common->might_be_empty)
14020  {
14021  empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14022  empty_match_found_label = LABEL();
14023  }
14024
14025common->accept_label = LABEL();
14026if (common->accept != NULL)
14027  set_jumps(common->accept, common->accept_label);
14028
14029/* This means we have a match. Update the ovector. */
14030copy_ovector(common, re->top_bracket + 1);
14031common->quit_label = common->abort_label = LABEL();
14032if (common->quit != NULL)
14033  set_jumps(common->quit, common->quit_label);
14034if (common->abort != NULL)
14035  set_jumps(common->abort, common->abort_label);
14036if (minlength_check_failed != NULL)
14037  SET_LABEL(minlength_check_failed, common->abort_label);
14038
14039sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14040sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14041
14042if (common->failed_match != NULL)
14043  {
14044  SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14045  set_jumps(common->failed_match, LABEL());
14046  OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14047  JUMPTO(SLJIT_JUMP, common->abort_label);
14048  }
14049
14050if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14051  JUMPHERE(end_anchor_failed);
14052
14053if (mode != PCRE2_JIT_COMPLETE)
14054  {
14055  common->partialmatchlabel = LABEL();
14056  set_jumps(common->partialmatch, common->partialmatchlabel);
14057  return_with_partial_match(common, common->quit_label);
14058  }
14059
14060if (common->might_be_empty)
14061  empty_match_backtrack_label = LABEL();
14062compile_backtrackingpath(common, rootbacktrack.top);
14063if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14064  {
14065  sljit_free_compiler(compiler);
14066  SLJIT_FREE(common->optimized_cbracket, allocator_data);
14067  SLJIT_FREE(common->private_data_ptrs, allocator_data);
14068  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14069  return PCRE2_ERROR_NOMEMORY;
14070  }
14071
14072SLJIT_ASSERT(rootbacktrack.prev == NULL);
14073reset_match_label = LABEL();
14074
14075if (mode == PCRE2_JIT_PARTIAL_SOFT)
14076  {
14077  /* Update hit_start only in the first time. */
14078  jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14079  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14080  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14081  OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14082  JUMPHERE(jump);
14083  }
14084
14085/* Check we have remaining characters. */
14086if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14087  {
14088  OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14089  }
14090
14091OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14092    (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14093
14094if ((re->overall_options & PCRE2_ANCHORED) == 0)
14095  {
14096  if (common->ff_newline_shortcut != NULL)
14097    {
14098    /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14099    if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14100      {
14101      if (common->match_end_ptr != 0)
14102        {
14103        OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14104        OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14105        CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14106        OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14107        }
14108      else
14109        CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14110      }
14111    }
14112  else
14113    CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14114  }
14115
14116/* No more remaining characters. */
14117if (reqcu_not_found != NULL)
14118  set_jumps(reqcu_not_found, LABEL());
14119
14120if (mode == PCRE2_JIT_PARTIAL_SOFT)
14121  CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14122
14123OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14124JUMPTO(SLJIT_JUMP, common->quit_label);
14125
14126flush_stubs(common);
14127
14128if (common->might_be_empty)
14129  {
14130  JUMPHERE(empty_match);
14131  OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14132  OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
14133  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
14134  JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
14135  OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
14136  JUMPTO(SLJIT_ZERO, empty_match_found_label);
14137  OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14138  CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14139  JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14140  }
14141
14142common->fast_forward_bc_ptr = NULL;
14143common->early_fail_start_ptr = 0;
14144common->early_fail_end_ptr = 0;
14145common->currententry = common->entries;
14146common->local_quit_available = TRUE;
14147quit_label = common->quit_label;
14148if (common->currententry != NULL)
14149  {
14150  /* A free bit for each private data. */
14151  common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
14152  SLJIT_ASSERT(common->recurse_bitset_size > 0);
14153  common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14154
14155  if (common->recurse_bitset != NULL)
14156    {
14157    do
14158      {
14159      /* Might add new entries. */
14160      compile_recurse(common);
14161      if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14162        break;
14163      flush_stubs(common);
14164      common->currententry = common->currententry->next;
14165      }
14166    while (common->currententry != NULL);
14167
14168    SLJIT_FREE(common->recurse_bitset, allocator_data);
14169    }
14170
14171  if (common->currententry != NULL)
14172    {
14173    /* The common->recurse_bitset has been freed. */
14174    SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14175
14176    sljit_free_compiler(compiler);
14177    SLJIT_FREE(common->optimized_cbracket, allocator_data);
14178    SLJIT_FREE(common->private_data_ptrs, allocator_data);
14179    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14180    return PCRE2_ERROR_NOMEMORY;
14181    }
14182  }
14183common->local_quit_available = FALSE;
14184common->quit_label = quit_label;
14185
14186/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14187/* This is a (really) rare case. */
14188set_jumps(common->stackalloc, LABEL());
14189/* RETURN_ADDR is not a saved register. */
14190sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14191
14192SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14193
14194OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14195OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14196OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14197OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14198OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14199
14200sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
14201
14202jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14203OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14204OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14205OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14206OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14207OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14208
14209/* Allocation failed. */
14210JUMPHERE(jump);
14211/* We break the return address cache here, but this is a really rare case. */
14212OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14213JUMPTO(SLJIT_JUMP, common->quit_label);
14214
14215/* Call limit reached. */
14216set_jumps(common->calllimit, LABEL());
14217OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14218JUMPTO(SLJIT_JUMP, common->quit_label);
14219
14220if (common->revertframes != NULL)
14221  {
14222  set_jumps(common->revertframes, LABEL());
14223  do_revertframes(common);
14224  }
14225if (common->wordboundary != NULL)
14226  {
14227  set_jumps(common->wordboundary, LABEL());
14228  check_wordboundary(common);
14229  }
14230if (common->anynewline != NULL)
14231  {
14232  set_jumps(common->anynewline, LABEL());
14233  check_anynewline(common);
14234  }
14235if (common->hspace != NULL)
14236  {
14237  set_jumps(common->hspace, LABEL());
14238  check_hspace(common);
14239  }
14240if (common->vspace != NULL)
14241  {
14242  set_jumps(common->vspace, LABEL());
14243  check_vspace(common);
14244  }
14245if (common->casefulcmp != NULL)
14246  {
14247  set_jumps(common->casefulcmp, LABEL());
14248  do_casefulcmp(common);
14249  }
14250if (common->caselesscmp != NULL)
14251  {
14252  set_jumps(common->caselesscmp, LABEL());
14253  do_caselesscmp(common);
14254  }
14255if (common->reset_match != NULL)
14256  {
14257  set_jumps(common->reset_match, LABEL());
14258  do_reset_match(common, (re->top_bracket + 1) * 2);
14259  CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14260  OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14261  JUMPTO(SLJIT_JUMP, reset_match_label);
14262  }
14263#ifdef SUPPORT_UNICODE
14264#if PCRE2_CODE_UNIT_WIDTH == 8
14265if (common->utfreadchar != NULL)
14266  {
14267  set_jumps(common->utfreadchar, LABEL());
14268  do_utfreadchar(common);
14269  }
14270if (common->utfreadtype8 != NULL)
14271  {
14272  set_jumps(common->utfreadtype8, LABEL());
14273  do_utfreadtype8(common);
14274  }
14275if (common->utfpeakcharback != NULL)
14276  {
14277  set_jumps(common->utfpeakcharback, LABEL());
14278  do_utfpeakcharback(common);
14279  }
14280#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14281#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14282if (common->utfreadchar_invalid != NULL)
14283  {
14284  set_jumps(common->utfreadchar_invalid, LABEL());
14285  do_utfreadchar_invalid(common);
14286  }
14287if (common->utfreadnewline_invalid != NULL)
14288  {
14289  set_jumps(common->utfreadnewline_invalid, LABEL());
14290  do_utfreadnewline_invalid(common);
14291  }
14292if (common->utfmoveback_invalid)
14293  {
14294  set_jumps(common->utfmoveback_invalid, LABEL());
14295  do_utfmoveback_invalid(common);
14296  }
14297if (common->utfpeakcharback_invalid)
14298  {
14299  set_jumps(common->utfpeakcharback_invalid, LABEL());
14300  do_utfpeakcharback_invalid(common);
14301  }
14302#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14303if (common->getucd != NULL)
14304  {
14305  set_jumps(common->getucd, LABEL());
14306  do_getucd(common);
14307  }
14308if (common->getucdtype != NULL)
14309  {
14310  set_jumps(common->getucdtype, LABEL());
14311  do_getucdtype(common);
14312  }
14313#endif /* SUPPORT_UNICODE */
14314
14315SLJIT_FREE(common->optimized_cbracket, allocator_data);
14316SLJIT_FREE(common->private_data_ptrs, allocator_data);
14317
14318executable_func = sljit_generate_code(compiler);
14319executable_size = sljit_get_generated_code_size(compiler);
14320sljit_free_compiler(compiler);
14321
14322if (executable_func == NULL)
14323  {
14324  PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14325  return PCRE2_ERROR_NOMEMORY;
14326  }
14327
14328/* Reuse the function descriptor if possible. */
14329if (re->executable_jit != NULL)
14330  functions = (executable_functions *)re->executable_jit;
14331else
14332  {
14333  functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14334  if (functions == NULL)
14335    {
14336    /* This case is highly unlikely since we just recently
14337    freed a lot of memory. Not impossible though. */
14338    sljit_free_code(executable_func, NULL);
14339    PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14340    return PCRE2_ERROR_NOMEMORY;
14341    }
14342  memset(functions, 0, sizeof(executable_functions));
14343  functions->top_bracket = re->top_bracket + 1;
14344  functions->limit_match = re->limit_match;
14345  re->executable_jit = functions;
14346  }
14347
14348/* Turn mode into an index. */
14349if (mode == PCRE2_JIT_COMPLETE)
14350  mode = 0;
14351else
14352  mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14353
14354SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14355functions->executable_funcs[mode] = executable_func;
14356functions->read_only_data_heads[mode] = common->read_only_data_head;
14357functions->executable_sizes[mode] = executable_size;
14358return 0;
14359}
14360
14361#endif
14362
14363/*************************************************
14364*        JIT compile a Regular Expression        *
14365*************************************************/
14366
14367/* This function used JIT to convert a previously-compiled pattern into machine
14368code.
14369
14370Arguments:
14371  code          a compiled pattern
14372  options       JIT option bits
14373
14374Returns:        0: success or (*NOJIT) was used
14375               <0: an error code
14376*/
14377
14378#define PUBLIC_JIT_COMPILE_OPTIONS \
14379  (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14380
14381PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14382pcre2_jit_compile(pcre2_code *code, uint32_t options)
14383{
14384pcre2_real_code *re = (pcre2_real_code *)code;
14385#ifdef SUPPORT_JIT
14386executable_functions *functions;
14387static int executable_allocator_is_working = -1;
14388#endif
14389
14390if (code == NULL)
14391  return PCRE2_ERROR_NULL;
14392
14393if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14394  return PCRE2_ERROR_JIT_BADOPTION;
14395
14396/* Support for invalid UTF was first introduced in JIT, with the option
14397PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14398compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14399preferred feature, with the earlier option deprecated. However, for backward
14400compatibility, if the earlier option is set, it forces the new option so that
14401if JIT matching falls back to the interpreter, there is still support for
14402invalid UTF. However, if this function has already been successfully called
14403without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14404non-invalid-supporting JIT code was compiled), give an error.
14405
14406If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14407actions are needed:
14408
14409  1. Remove the definition from pcre2.h.in and from the list in
14410     PUBLIC_JIT_COMPILE_OPTIONS above.
14411
14412  2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14413
14414  3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14415
14416  4. Delete the following short block of code. The setting of "re" and
14417     "functions" can be moved into the JIT-only block below, but if that is
14418     done, (void)re and (void)functions will be needed in the non-JIT case, to
14419     avoid compiler warnings.
14420*/
14421
14422#ifdef SUPPORT_JIT
14423functions = (executable_functions *)re->executable_jit;
14424#endif
14425
14426if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14427  {
14428  if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14429    {
14430#ifdef SUPPORT_JIT
14431    if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14432#endif
14433    re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14434    }
14435  }
14436
14437/* The above tests are run with and without JIT support. This means that
14438PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14439interpreter support) even in the absence of JIT. But now, if there is no JIT
14440support, give an error return. */
14441
14442#ifndef SUPPORT_JIT
14443return PCRE2_ERROR_JIT_BADOPTION;
14444#else  /* SUPPORT_JIT */
14445
14446/* There is JIT support. Do the necessary. */
14447
14448if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14449
14450if (executable_allocator_is_working == -1)
14451  {
14452  /* Checks whether the executable allocator is working. This check
14453     might run multiple times in multi-threaded environments, but the
14454     result should not be affected by it. */
14455  void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14456  if (ptr != NULL)
14457    {
14458    SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14459    executable_allocator_is_working = 1;
14460    }
14461  else executable_allocator_is_working = 0;
14462  }
14463
14464if (!executable_allocator_is_working)
14465  return PCRE2_ERROR_NOMEMORY;
14466
14467if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14468  options |= PCRE2_JIT_INVALID_UTF;
14469
14470if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14471    || functions->executable_funcs[0] == NULL)) {
14472  uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14473  int result = jit_compile(code, options & ~excluded_options);
14474  if (result != 0)
14475    return result;
14476  }
14477
14478if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14479    || functions->executable_funcs[1] == NULL)) {
14480  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14481  int result = jit_compile(code, options & ~excluded_options);
14482  if (result != 0)
14483    return result;
14484  }
14485
14486if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14487    || functions->executable_funcs[2] == NULL)) {
14488  uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14489  int result = jit_compile(code, options & ~excluded_options);
14490  if (result != 0)
14491    return result;
14492  }
14493
14494return 0;
14495
14496#endif  /* SUPPORT_JIT */
14497}
14498
14499/* JIT compiler uses an all-in-one approach. This improves security,
14500   since the code generator functions are not exported. */
14501
14502#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14503
14504#include "pcre2_jit_match.c"
14505#include "pcre2_jit_misc.c"
14506
14507/* End of pcre2_jit_compile.c */
14508