1// Copyright 2013 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#if V8_TARGET_ARCH_ARM64
6
7#include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
8
9#include "src/codegen/arm64/macro-assembler-arm64-inl.h"
10#include "src/codegen/macro-assembler.h"
11#include "src/logging/log.h"
12#include "src/objects/objects-inl.h"
13#include "src/regexp/regexp-macro-assembler.h"
14#include "src/regexp/regexp-stack.h"
15#include "src/snapshot/embedded/embedded-data.h"
16#include "src/strings/unicode.h"
17
18namespace v8 {
19namespace internal {
20
21/*
22 * This assembler uses the following register assignment convention:
23 * - w19     : Used to temporarely store a value before a call to C code.
24 *             See CheckNotBackReferenceIgnoreCase.
25 * - x20     : Pointer to the current Code object,
26 *             it includes the heap object tag.
27 * - w21     : Current position in input, as negative offset from
28 *             the end of the string. Please notice that this is
29 *             the byte offset, not the character offset!
30 * - w22     : Currently loaded character. Must be loaded using
31 *             LoadCurrentCharacter before using any of the dispatch methods.
32 * - x23     : Points to tip of backtrack stack.
33 * - w24     : Position of the first character minus one: non_position_value.
34 *             Used to initialize capture registers.
35 * - x25     : Address at the end of the input string: input_end.
36 *             Points to byte after last character in input.
37 * - x26     : Address at the start of the input string: input_start.
38 * - w27     : Where to start in the input string.
39 * - x28     : Output array pointer.
40 * - x29/fp  : Frame pointer. Used to access arguments, local variables and
41 *             RegExp registers.
42 * - x16/x17 : IP registers, used by assembler. Very volatile.
43 * - sp      : Points to tip of C stack.
44 *
45 * - x0-x7   : Used as a cache to store 32 bit capture registers. These
46 *             registers need to be retained every time a call to C code
47 *             is done.
48 *
49 * The remaining registers are free for computations.
50 * Each call to a public method should retain this convention.
51 *
52 * The stack will have the following structure:
53 *
54 *  Location     Name               Description
55 *               (as referred to
56 *               in the code)
57 *
58 *  - fp[104]    Address regexp     Address of the JSRegExp object. Unused in
59 *                                  native code, passed to match signature of
60 *                                  the interpreter.
61 *  - fp[96]     isolate            Address of the current isolate.
62 *  ^^^^^^^^^ sp when called ^^^^^^^^^
63 *  - fp[16..88] r19-r28            Backup of CalleeSaved registers.
64 *  - fp[8]      lr                 Return from the RegExp code.
65 *  - fp[0]      fp                 Old frame pointer.
66 *  ^^^^^^^^^ fp ^^^^^^^^^
67 *  - fp[-8]     direct_call        1 => Direct call from JavaScript code.
68 *                                  0 => Call through the runtime system.
69 *  - fp[-16]    output_size        Output may fit multiple sets of matches.
70 *  - fp[-24]    input              Handle containing the input string.
71 *  - fp[-32]    success_counter
72 *  ^^^^^^^^^^^^^ From here and downwards we store 32 bit values ^^^^^^^^^^^^^
73 *  - fp[-40]    register N         Capture registers initialized with
74 *  - fp[-44]    register N + 1     non_position_value.
75 *               ...                The first kNumCachedRegisters (N) registers
76 *               ...                are cached in x0 to x7.
77 *               ...                Only positions must be stored in the first
78 *  -            ...                num_saved_registers_ registers.
79 *  -            ...
80 *  -            register N + num_registers - 1
81 *  ^^^^^^^^^ sp ^^^^^^^^^
82 *
83 * The first num_saved_registers_ registers are initialized to point to
84 * "character -1" in the string (i.e., char_size() bytes before the first
85 * character of the string). The remaining registers start out as garbage.
86 *
87 * The data up to the return address must be placed there by the calling
88 * code and the remaining arguments are passed in registers, e.g. by calling the
89 * code entry as cast to a function with the signature:
90 * int (*match)(String input_string,
91 *              int start_index,
92 *              Address start,
93 *              Address end,
94 *              int* capture_output_array,
95 *              int num_capture_registers,
96 *              bool direct_call = false,
97 *              Isolate* isolate,
98 *              Address regexp);
99 * The call is performed by NativeRegExpMacroAssembler::Execute()
100 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
101 */
102
103#define __ ACCESS_MASM(masm_)
104
105const int RegExpMacroAssemblerARM64::kRegExpCodeSize;
106
107RegExpMacroAssemblerARM64::RegExpMacroAssemblerARM64(Isolate* isolate,
108                                                     Zone* zone, Mode mode,
109                                                     int registers_to_save)
110    : NativeRegExpMacroAssembler(isolate, zone),
111      masm_(std::make_unique<MacroAssembler>(
112          isolate, CodeObjectRequired::kYes,
113          NewAssemblerBuffer(kRegExpCodeSize))),
114      no_root_array_scope_(masm_.get()),
115      mode_(mode),
116      num_registers_(registers_to_save),
117      num_saved_registers_(registers_to_save),
118      entry_label_(),
119      start_label_(),
120      success_label_(),
121      backtrack_label_(),
122      exit_label_() {
123  DCHECK_EQ(0, registers_to_save % 2);
124  // We can cache at most 16 W registers in x0-x7.
125  STATIC_ASSERT(kNumCachedRegisters <= 16);
126  STATIC_ASSERT((kNumCachedRegisters % 2) == 0);
127  __ CallTarget();
128
129  __ B(&entry_label_);   // We'll write the entry code later.
130  __ Bind(&start_label_);  // And then continue from here.
131}
132
133RegExpMacroAssemblerARM64::~RegExpMacroAssemblerARM64() = default;
134
135void RegExpMacroAssemblerARM64::AbortedCodeGeneration() {
136  masm_->AbortedCodeGeneration();
137  entry_label_.Unuse();
138  start_label_.Unuse();
139  success_label_.Unuse();
140  backtrack_label_.Unuse();
141  exit_label_.Unuse();
142  check_preempt_label_.Unuse();
143  stack_overflow_label_.Unuse();
144  fallback_label_.Unuse();
145}
146
147int RegExpMacroAssemblerARM64::stack_limit_slack()  {
148  return RegExpStack::kStackLimitSlack;
149}
150
151
152void RegExpMacroAssemblerARM64::AdvanceCurrentPosition(int by) {
153  if (by != 0) {
154    __ Add(current_input_offset(),
155           current_input_offset(), by * char_size());
156  }
157}
158
159
160void RegExpMacroAssemblerARM64::AdvanceRegister(int reg, int by) {
161  DCHECK((reg >= 0) && (reg < num_registers_));
162  if (by != 0) {
163    RegisterState register_state = GetRegisterState(reg);
164    switch (register_state) {
165      case STACKED:
166        __ Ldr(w10, register_location(reg));
167        __ Add(w10, w10, by);
168        __ Str(w10, register_location(reg));
169        break;
170      case CACHED_LSW: {
171        Register to_advance = GetCachedRegister(reg);
172        __ Add(to_advance, to_advance, by);
173        break;
174      }
175      case CACHED_MSW: {
176        Register to_advance = GetCachedRegister(reg);
177        // Sign-extend to int64, shift as uint64, cast back to int64.
178        __ Add(
179            to_advance, to_advance,
180            static_cast<int64_t>(static_cast<uint64_t>(static_cast<int64_t>(by))
181                                 << kWRegSizeInBits));
182        break;
183      }
184      default:
185        UNREACHABLE();
186    }
187  }
188}
189
190
191void RegExpMacroAssemblerARM64::Backtrack() {
192  CheckPreemption();
193  if (has_backtrack_limit()) {
194    Label next;
195    UseScratchRegisterScope temps(masm_.get());
196    Register scratch = temps.AcquireW();
197    __ Ldr(scratch, MemOperand(frame_pointer(), kBacktrackCount));
198    __ Add(scratch, scratch, 1);
199    __ Str(scratch, MemOperand(frame_pointer(), kBacktrackCount));
200    __ Cmp(scratch, Operand(backtrack_limit()));
201    __ B(ne, &next);
202
203    // Backtrack limit exceeded.
204    if (can_fallback()) {
205      __ B(&fallback_label_);
206    } else {
207      // Can't fallback, so we treat it as a failed match.
208      Fail();
209    }
210
211    __ bind(&next);
212  }
213  Pop(w10);
214  __ Add(x10, code_pointer(), Operand(w10, UXTW));
215  __ Br(x10);
216}
217
218
219void RegExpMacroAssemblerARM64::Bind(Label* label) {
220  __ Bind(label);
221}
222
223void RegExpMacroAssemblerARM64::BindJumpTarget(Label* label) {
224  __ BindJumpTarget(label);
225}
226
227void RegExpMacroAssemblerARM64::CheckCharacter(uint32_t c, Label* on_equal) {
228  CompareAndBranchOrBacktrack(current_character(), c, eq, on_equal);
229}
230
231void RegExpMacroAssemblerARM64::CheckCharacterGT(base::uc16 limit,
232                                                 Label* on_greater) {
233  CompareAndBranchOrBacktrack(current_character(), limit, hi, on_greater);
234}
235
236void RegExpMacroAssemblerARM64::CheckAtStart(int cp_offset,
237                                             Label* on_at_start) {
238  __ Add(w10, current_input_offset(),
239         Operand(-char_size() + cp_offset * char_size()));
240  __ Cmp(w10, string_start_minus_one());
241  BranchOrBacktrack(eq, on_at_start);
242}
243
244void RegExpMacroAssemblerARM64::CheckNotAtStart(int cp_offset,
245                                                Label* on_not_at_start) {
246  __ Add(w10, current_input_offset(),
247         Operand(-char_size() + cp_offset * char_size()));
248  __ Cmp(w10, string_start_minus_one());
249  BranchOrBacktrack(ne, on_not_at_start);
250}
251
252void RegExpMacroAssemblerARM64::CheckCharacterLT(base::uc16 limit,
253                                                 Label* on_less) {
254  CompareAndBranchOrBacktrack(current_character(), limit, lo, on_less);
255}
256
257void RegExpMacroAssemblerARM64::CheckCharacters(
258    base::Vector<const base::uc16> str, int cp_offset, Label* on_failure,
259    bool check_end_of_string) {
260  // This method is only ever called from the cctests.
261
262  if (check_end_of_string) {
263    // Is last character of required match inside string.
264    CheckPosition(cp_offset + str.length() - 1, on_failure);
265  }
266
267  Register characters_address = x11;
268
269  __ Add(characters_address,
270         input_end(),
271         Operand(current_input_offset(), SXTW));
272  if (cp_offset != 0) {
273    __ Add(characters_address, characters_address, cp_offset * char_size());
274  }
275
276  for (int i = 0; i < str.length(); i++) {
277    if (mode_ == LATIN1) {
278      __ Ldrb(w10, MemOperand(characters_address, 1, PostIndex));
279      DCHECK_GE(String::kMaxOneByteCharCode, str[i]);
280    } else {
281      __ Ldrh(w10, MemOperand(characters_address, 2, PostIndex));
282    }
283    CompareAndBranchOrBacktrack(w10, str[i], ne, on_failure);
284  }
285}
286
287void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
288  __ Ldr(w10, MemOperand(backtrack_stackpointer()));
289  __ Cmp(current_input_offset(), w10);
290  __ Cset(x11, eq);
291  __ Add(backtrack_stackpointer(),
292         backtrack_stackpointer(), Operand(x11, LSL, kWRegSizeLog2));
293  BranchOrBacktrack(eq, on_equal);
294}
295
296void RegExpMacroAssemblerARM64::PushCachedRegisters() {
297  CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
298  DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
299  __ PushCPURegList(cached_registers);
300}
301
302void RegExpMacroAssemblerARM64::PopCachedRegisters() {
303  CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
304  DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
305  __ PopCPURegList(cached_registers);
306}
307
308void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
309    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
310  Label fallthrough;
311
312  Register capture_start_offset = w10;
313  // Save the capture length in a callee-saved register so it will
314  // be preserved if we call a C helper.
315  Register capture_length = w19;
316  DCHECK(kCalleeSaved.IncludesAliasOf(capture_length));
317
318  // Find length of back-referenced capture.
319  DCHECK_EQ(0, start_reg % 2);
320  if (start_reg < kNumCachedRegisters) {
321    __ Mov(capture_start_offset.X(), GetCachedRegister(start_reg));
322    __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
323  } else {
324    __ Ldp(w11, capture_start_offset, capture_location(start_reg, x10));
325  }
326  __ Sub(capture_length, w11, capture_start_offset);  // Length to check.
327
328  // At this point, the capture registers are either both set or both cleared.
329  // If the capture length is zero, then the capture is either empty or cleared.
330  // Fall through in both cases.
331  __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
332
333  // Check that there are enough characters left in the input.
334  if (read_backward) {
335    __ Add(w12, string_start_minus_one(), capture_length);
336    __ Cmp(current_input_offset(), w12);
337    BranchOrBacktrack(le, on_no_match);
338  } else {
339    __ Cmn(capture_length, current_input_offset());
340    BranchOrBacktrack(gt, on_no_match);
341  }
342
343  if (mode_ == LATIN1) {
344    Label success;
345    Label fail;
346    Label loop_check;
347
348    Register capture_start_address = x12;
349    Register capture_end_addresss = x13;
350    Register current_position_address = x14;
351
352    __ Add(capture_start_address,
353           input_end(),
354           Operand(capture_start_offset, SXTW));
355    __ Add(capture_end_addresss,
356           capture_start_address,
357           Operand(capture_length, SXTW));
358    __ Add(current_position_address,
359           input_end(),
360           Operand(current_input_offset(), SXTW));
361    if (read_backward) {
362      // Offset by length when matching backwards.
363      __ Sub(current_position_address, current_position_address,
364             Operand(capture_length, SXTW));
365    }
366
367    Label loop;
368    __ Bind(&loop);
369    __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
370    __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
371    __ Cmp(w10, w11);
372    __ B(eq, &loop_check);
373
374    // Mismatch, try case-insensitive match (converting letters to lower-case).
375    __ Orr(w10, w10, 0x20);  // Convert capture character to lower-case.
376    __ Orr(w11, w11, 0x20);  // Also convert input character.
377    __ Cmp(w11, w10);
378    __ B(ne, &fail);
379    __ Sub(w10, w10, 'a');
380    __ Cmp(w10, 'z' - 'a');  // Is w10 a lowercase letter?
381    __ B(ls, &loop_check);  // In range 'a'-'z'.
382    // Latin-1: Check for values in range [224,254] but not 247.
383    __ Sub(w10, w10, 224 - 'a');
384    __ Cmp(w10, 254 - 224);
385    __ Ccmp(w10, 247 - 224, ZFlag, ls);  // Check for 247.
386    __ B(eq, &fail);  // Weren't Latin-1 letters.
387
388    __ Bind(&loop_check);
389    __ Cmp(capture_start_address, capture_end_addresss);
390    __ B(lt, &loop);
391    __ B(&success);
392
393    __ Bind(&fail);
394    BranchOrBacktrack(al, on_no_match);
395
396    __ Bind(&success);
397    // Compute new value of character position after the matched part.
398    __ Sub(current_input_offset().X(), current_position_address, input_end());
399    if (read_backward) {
400      __ Sub(current_input_offset().X(), current_input_offset().X(),
401             Operand(capture_length, SXTW));
402    }
403    if (FLAG_debug_code) {
404      __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
405      __ Ccmp(current_input_offset(), 0, NoFlag, eq);
406      // The current input offset should be <= 0, and fit in a W register.
407      __ Check(le, AbortReason::kOffsetOutOfRange);
408    }
409  } else {
410    DCHECK(mode_ == UC16);
411    int argument_count = 4;
412
413    PushCachedRegisters();
414
415    // Put arguments into arguments registers.
416    // Parameters are
417    //   x0: Address byte_offset1 - Address captured substring's start.
418    //   x1: Address byte_offset2 - Address of current character position.
419    //   w2: size_t byte_length - length of capture in bytes(!)
420    //   x3: Isolate* isolate.
421
422    // Address of start of capture.
423    __ Add(x0, input_end(), Operand(capture_start_offset, SXTW));
424    // Length of capture.
425    __ Mov(w2, capture_length);
426    // Address of current input position.
427    __ Add(x1, input_end(), Operand(current_input_offset(), SXTW));
428    if (read_backward) {
429      __ Sub(x1, x1, Operand(capture_length, SXTW));
430    }
431    // Isolate.
432    __ Mov(x3, ExternalReference::isolate_address(isolate()));
433
434    {
435      AllowExternalCallThatCantCauseGC scope(masm_.get());
436      ExternalReference function =
437          unicode
438              ? ExternalReference::re_case_insensitive_compare_unicode()
439              : ExternalReference::re_case_insensitive_compare_non_unicode();
440      __ CallCFunction(function, argument_count);
441    }
442
443    // Check if function returned non-zero for success or zero for failure.
444    // x0 is one of the registers used as a cache so it must be tested before
445    // the cache is restored.
446    __ Cmp(x0, 0);
447    PopCachedRegisters();
448    BranchOrBacktrack(eq, on_no_match);
449
450    // On success, advance position by length of capture.
451    if (read_backward) {
452      __ Sub(current_input_offset(), current_input_offset(), capture_length);
453    } else {
454      __ Add(current_input_offset(), current_input_offset(), capture_length);
455    }
456  }
457
458  __ Bind(&fallthrough);
459}
460
461void RegExpMacroAssemblerARM64::CheckNotBackReference(int start_reg,
462                                                      bool read_backward,
463                                                      Label* on_no_match) {
464  Label fallthrough;
465
466  Register capture_start_address = x12;
467  Register capture_end_address = x13;
468  Register current_position_address = x14;
469  Register capture_length = w15;
470
471  // Find length of back-referenced capture.
472  DCHECK_EQ(0, start_reg % 2);
473  if (start_reg < kNumCachedRegisters) {
474    __ Mov(x10, GetCachedRegister(start_reg));
475    __ Lsr(x11, GetCachedRegister(start_reg), kWRegSizeInBits);
476  } else {
477    __ Ldp(w11, w10, capture_location(start_reg, x10));
478  }
479  __ Sub(capture_length, w11, w10);  // Length to check.
480
481  // At this point, the capture registers are either both set or both cleared.
482  // If the capture length is zero, then the capture is either empty or cleared.
483  // Fall through in both cases.
484  __ CompareAndBranch(capture_length, Operand(0), eq, &fallthrough);
485
486  // Check that there are enough characters left in the input.
487  if (read_backward) {
488    __ Add(w12, string_start_minus_one(), capture_length);
489    __ Cmp(current_input_offset(), w12);
490    BranchOrBacktrack(le, on_no_match);
491  } else {
492    __ Cmn(capture_length, current_input_offset());
493    BranchOrBacktrack(gt, on_no_match);
494  }
495
496  // Compute pointers to match string and capture string
497  __ Add(capture_start_address, input_end(), Operand(w10, SXTW));
498  __ Add(capture_end_address,
499         capture_start_address,
500         Operand(capture_length, SXTW));
501  __ Add(current_position_address,
502         input_end(),
503         Operand(current_input_offset(), SXTW));
504  if (read_backward) {
505    // Offset by length when matching backwards.
506    __ Sub(current_position_address, current_position_address,
507           Operand(capture_length, SXTW));
508  }
509
510  Label loop;
511  __ Bind(&loop);
512  if (mode_ == LATIN1) {
513    __ Ldrb(w10, MemOperand(capture_start_address, 1, PostIndex));
514    __ Ldrb(w11, MemOperand(current_position_address, 1, PostIndex));
515  } else {
516    DCHECK(mode_ == UC16);
517    __ Ldrh(w10, MemOperand(capture_start_address, 2, PostIndex));
518    __ Ldrh(w11, MemOperand(current_position_address, 2, PostIndex));
519  }
520  __ Cmp(w10, w11);
521  BranchOrBacktrack(ne, on_no_match);
522  __ Cmp(capture_start_address, capture_end_address);
523  __ B(lt, &loop);
524
525  // Move current character position to position after match.
526  __ Sub(current_input_offset().X(), current_position_address, input_end());
527  if (read_backward) {
528    __ Sub(current_input_offset().X(), current_input_offset().X(),
529           Operand(capture_length, SXTW));
530  }
531
532  if (FLAG_debug_code) {
533    __ Cmp(current_input_offset().X(), Operand(current_input_offset(), SXTW));
534    __ Ccmp(current_input_offset(), 0, NoFlag, eq);
535    // The current input offset should be <= 0, and fit in a W register.
536    __ Check(le, AbortReason::kOffsetOutOfRange);
537  }
538  __ Bind(&fallthrough);
539}
540
541
542void RegExpMacroAssemblerARM64::CheckNotCharacter(unsigned c,
543                                                  Label* on_not_equal) {
544  CompareAndBranchOrBacktrack(current_character(), c, ne, on_not_equal);
545}
546
547
548void RegExpMacroAssemblerARM64::CheckCharacterAfterAnd(uint32_t c,
549                                                       uint32_t mask,
550                                                       Label* on_equal) {
551  __ And(w10, current_character(), mask);
552  CompareAndBranchOrBacktrack(w10, c, eq, on_equal);
553}
554
555
556void RegExpMacroAssemblerARM64::CheckNotCharacterAfterAnd(unsigned c,
557                                                          unsigned mask,
558                                                          Label* on_not_equal) {
559  __ And(w10, current_character(), mask);
560  CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
561}
562
563void RegExpMacroAssemblerARM64::CheckNotCharacterAfterMinusAnd(
564    base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
565  DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
566  __ Sub(w10, current_character(), minus);
567  __ And(w10, w10, mask);
568  CompareAndBranchOrBacktrack(w10, c, ne, on_not_equal);
569}
570
571void RegExpMacroAssemblerARM64::CheckCharacterInRange(base::uc16 from,
572                                                      base::uc16 to,
573                                                      Label* on_in_range) {
574  __ Sub(w10, current_character(), from);
575  // Unsigned lower-or-same condition.
576  CompareAndBranchOrBacktrack(w10, to - from, ls, on_in_range);
577}
578
579void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
580    base::uc16 from, base::uc16 to, Label* on_not_in_range) {
581  __ Sub(w10, current_character(), from);
582  // Unsigned higher condition.
583  CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
584}
585
586void RegExpMacroAssemblerARM64::CallIsCharacterInRangeArray(
587    const ZoneList<CharacterRange>* ranges) {
588  static const int kNumArguments = 3;
589  __ Mov(w0, current_character());
590  __ Mov(x1, GetOrAddRangeArray(ranges));
591  __ Mov(x2, ExternalReference::isolate_address(isolate()));
592
593  {
594    // We have a frame (set up in GetCode), but the assembler doesn't know.
595    FrameScope scope(masm_.get(), StackFrame::MANUAL);
596    __ CallCFunction(ExternalReference::re_is_character_in_range_array(),
597                     kNumArguments);
598  }
599
600  __ Mov(code_pointer(), Operand(masm_->CodeObject()));
601}
602
603bool RegExpMacroAssemblerARM64::CheckCharacterInRangeArray(
604    const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
605  // Note: due to the arm64 oddity of x0 being a 'cached register',
606  // pushing/popping registers must happen outside of CallIsCharacterInRange
607  // s.t. we can compare the return value to 0 before popping x0.
608  PushCachedRegisters();
609  CallIsCharacterInRangeArray(ranges);
610  __ Cmp(x0, 0);
611  PopCachedRegisters();
612  BranchOrBacktrack(ne, on_in_range);
613  return true;
614}
615
616bool RegExpMacroAssemblerARM64::CheckCharacterNotInRangeArray(
617    const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
618  // Note: due to the arm64 oddity of x0 being a 'cached register',
619  // pushing/popping registers must happen outside of CallIsCharacterInRange
620  // s.t. we can compare the return value to 0 before popping x0.
621  PushCachedRegisters();
622  CallIsCharacterInRangeArray(ranges);
623  __ Cmp(x0, 0);
624  PopCachedRegisters();
625  BranchOrBacktrack(eq, on_not_in_range);
626  return true;
627}
628
629void RegExpMacroAssemblerARM64::CheckBitInTable(
630    Handle<ByteArray> table,
631    Label* on_bit_set) {
632  __ Mov(x11, Operand(table));
633  if ((mode_ != LATIN1) || (kTableMask != String::kMaxOneByteCharCode)) {
634    __ And(w10, current_character(), kTableMask);
635    __ Add(w10, w10, ByteArray::kHeaderSize - kHeapObjectTag);
636  } else {
637    __ Add(w10, current_character(), ByteArray::kHeaderSize - kHeapObjectTag);
638  }
639  __ Ldrb(w11, MemOperand(x11, w10, UXTW));
640  CompareAndBranchOrBacktrack(w11, 0, ne, on_bit_set);
641}
642
643bool RegExpMacroAssemblerARM64::CheckSpecialCharacterClass(
644    StandardCharacterSet type, Label* on_no_match) {
645  // Range checks (c in min..max) are generally implemented by an unsigned
646  // (c - min) <= (max - min) check
647  // TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
648  switch (type) {
649    case StandardCharacterSet::kWhitespace:
650      // Match space-characters.
651      if (mode_ == LATIN1) {
652        // One byte space characters are '\t'..'\r', ' ' and \u00a0.
653        Label success;
654        // Check for ' ' or 0x00A0.
655        __ Cmp(current_character(), ' ');
656        __ Ccmp(current_character(), 0x00A0, ZFlag, ne);
657        __ B(eq, &success);
658        // Check range 0x09..0x0D.
659        __ Sub(w10, current_character(), '\t');
660        CompareAndBranchOrBacktrack(w10, '\r' - '\t', hi, on_no_match);
661        __ Bind(&success);
662        return true;
663      }
664      return false;
665    case StandardCharacterSet::kNotWhitespace:
666      // The emitted code for generic character classes is good enough.
667      return false;
668    case StandardCharacterSet::kDigit:
669      // Match ASCII digits ('0'..'9').
670      __ Sub(w10, current_character(), '0');
671      CompareAndBranchOrBacktrack(w10, '9' - '0', hi, on_no_match);
672      return true;
673    case StandardCharacterSet::kNotDigit:
674      // Match ASCII non-digits.
675      __ Sub(w10, current_character(), '0');
676      CompareAndBranchOrBacktrack(w10, '9' - '0', ls, on_no_match);
677      return true;
678    case StandardCharacterSet::kNotLineTerminator: {
679      // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
680      // Here we emit the conditional branch only once at the end to make branch
681      // prediction more efficient, even though we could branch out of here
682      // as soon as a character matches.
683      __ Cmp(current_character(), 0x0A);
684      __ Ccmp(current_character(), 0x0D, ZFlag, ne);
685      if (mode_ == UC16) {
686        __ Sub(w10, current_character(), 0x2028);
687        // If the Z flag was set we clear the flags to force a branch.
688        __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
689        // ls -> !((C==1) && (Z==0))
690        BranchOrBacktrack(ls, on_no_match);
691      } else {
692        BranchOrBacktrack(eq, on_no_match);
693      }
694      return true;
695    }
696    case StandardCharacterSet::kLineTerminator: {
697      // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029)
698      // We have to check all 4 newline characters before emitting
699      // the conditional branch.
700      __ Cmp(current_character(), 0x0A);
701      __ Ccmp(current_character(), 0x0D, ZFlag, ne);
702      if (mode_ == UC16) {
703        __ Sub(w10, current_character(), 0x2028);
704        // If the Z flag was set we clear the flags to force a fall-through.
705        __ Ccmp(w10, 0x2029 - 0x2028, NoFlag, ne);
706        // hi -> (C==1) && (Z==0)
707        BranchOrBacktrack(hi, on_no_match);
708      } else {
709        BranchOrBacktrack(ne, on_no_match);
710      }
711      return true;
712    }
713    case StandardCharacterSet::kWord: {
714      if (mode_ != LATIN1) {
715        // Table is 256 entries, so all Latin1 characters can be tested.
716        CompareAndBranchOrBacktrack(current_character(), 'z', hi, on_no_match);
717      }
718      ExternalReference map = ExternalReference::re_word_character_map();
719      __ Mov(x10, map);
720      __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
721      CompareAndBranchOrBacktrack(w10, 0, eq, on_no_match);
722      return true;
723    }
724    case StandardCharacterSet::kNotWord: {
725      Label done;
726      if (mode_ != LATIN1) {
727        // Table is 256 entries, so all Latin1 characters can be tested.
728        __ Cmp(current_character(), 'z');
729        __ B(hi, &done);
730      }
731      ExternalReference map = ExternalReference::re_word_character_map();
732      __ Mov(x10, map);
733      __ Ldrb(w10, MemOperand(x10, current_character(), UXTW));
734      CompareAndBranchOrBacktrack(w10, 0, ne, on_no_match);
735      __ Bind(&done);
736      return true;
737    }
738    case StandardCharacterSet::kEverything:
739      // Match any character.
740      return true;
741  }
742}
743
744void RegExpMacroAssemblerARM64::Fail() {
745  __ Mov(w0, FAILURE);
746  __ B(&exit_label_);
747}
748
749void RegExpMacroAssemblerARM64::LoadRegExpStackPointerFromMemory(Register dst) {
750  ExternalReference ref =
751      ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
752  __ Mov(dst, ref);
753  __ Ldr(dst, MemOperand(dst));
754}
755
756void RegExpMacroAssemblerARM64::StoreRegExpStackPointerToMemory(
757    Register src, Register scratch) {
758  ExternalReference ref =
759      ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
760  __ Mov(scratch, ref);
761  __ Str(src, MemOperand(scratch));
762}
763
764void RegExpMacroAssemblerARM64::PushRegExpBasePointer(Register stack_pointer,
765                                                      Register scratch) {
766  ExternalReference ref =
767      ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
768  __ Mov(scratch, ref);
769  __ Ldr(scratch, MemOperand(scratch));
770  __ Sub(scratch, stack_pointer, scratch);
771  __ Str(scratch, MemOperand(frame_pointer(), kRegExpStackBasePointer));
772}
773
774void RegExpMacroAssemblerARM64::PopRegExpBasePointer(Register stack_pointer_out,
775                                                     Register scratch) {
776  ExternalReference ref =
777      ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
778  __ Ldr(stack_pointer_out,
779         MemOperand(frame_pointer(), kRegExpStackBasePointer));
780  __ Mov(scratch, ref);
781  __ Ldr(scratch, MemOperand(scratch));
782  __ Add(stack_pointer_out, stack_pointer_out, scratch);
783  StoreRegExpStackPointerToMemory(stack_pointer_out, scratch);
784}
785
786Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
787  Label return_w0;
788  // Finalize code - write the entry point code now we know how many
789  // registers we need.
790
791  // Entry code:
792  __ Bind(&entry_label_);
793
794  // Arguments on entry:
795  // x0:  String   input
796  // x1:  int      start_offset
797  // x2:  byte*    input_start
798  // x3:  byte*    input_end
799  // x4:  int*     output array
800  // x5:  int      output array size
801  // x6:  int      direct_call
802  // x7:  Isolate* isolate
803  //
804  // sp[0]:  secondary link/return address used by native call
805
806  // Tell the system that we have a stack frame.  Because the type is MANUAL, no
807  // code is generated.
808  FrameScope scope(masm_.get(), StackFrame::MANUAL);
809
810  // Push registers on the stack, only push the argument registers that we need.
811  CPURegList argument_registers(x0, x5, x6, x7);
812
813  CPURegList registers_to_retain = kCalleeSaved;
814  DCHECK_EQ(registers_to_retain.Count(), kNumCalleeSavedRegisters);
815
816  __ PushCPURegList(registers_to_retain);
817  __ Push<TurboAssembler::kSignLR>(lr, fp);
818  __ PushCPURegList(argument_registers);
819
820  // Set frame pointer in place.
821  __ Add(frame_pointer(), sp, argument_registers.Count() * kSystemPointerSize);
822
823  // Initialize callee-saved registers.
824  __ Mov(start_offset(), w1);
825  __ Mov(input_start(), x2);
826  __ Mov(input_end(), x3);
827  __ Mov(output_array(), x4);
828
829  // Make sure the stack alignment will be respected.
830  const int alignment = masm_->ActivationFrameAlignment();
831  DCHECK_EQ(alignment % 16, 0);
832  const int align_mask = (alignment / kWRegSize) - 1;
833
834  // Make room for stack locals.
835  static constexpr int kWRegPerXReg = kXRegSize / kWRegSize;
836  DCHECK_EQ(kNumberOfStackLocals * kWRegPerXReg,
837            ((kNumberOfStackLocals * kWRegPerXReg) + align_mask) & ~align_mask);
838  __ Claim(kNumberOfStackLocals * kWRegPerXReg);
839
840  // Initialize backtrack stack pointer. It must not be clobbered from here on.
841  // Note the backtrack_stackpointer is callee-saved.
842  STATIC_ASSERT(backtrack_stackpointer() == x23);
843  LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
844
845  // Store the regexp base pointer - we'll later restore it / write it to
846  // memory when returning from this irregexp code object.
847  PushRegExpBasePointer(backtrack_stackpointer(), x11);
848
849  // Set the number of registers we will need to allocate, that is:
850  //   - (num_registers_ - kNumCachedRegisters) (W registers)
851  const int num_stack_registers =
852      std::max(0, num_registers_ - kNumCachedRegisters);
853  const int num_wreg_to_allocate =
854      (num_stack_registers + align_mask) & ~align_mask;
855
856  {
857    // Check if we have space on the stack.
858    Label stack_limit_hit, stack_ok;
859
860    ExternalReference stack_limit =
861        ExternalReference::address_of_jslimit(isolate());
862    __ Mov(x10, stack_limit);
863    __ Ldr(x10, MemOperand(x10));
864    __ Subs(x10, sp, x10);
865
866    // Handle it if the stack pointer is already below the stack limit.
867    __ B(ls, &stack_limit_hit);
868
869    // Check if there is room for the variable number of registers above
870    // the stack limit.
871    __ Cmp(x10, num_wreg_to_allocate * kWRegSize);
872    __ B(hs, &stack_ok);
873
874    // Exit with OutOfMemory exception. There is not enough space on the stack
875    // for our working registers.
876    __ Mov(w0, EXCEPTION);
877    __ B(&return_w0);
878
879    __ Bind(&stack_limit_hit);
880    CallCheckStackGuardState(x10);
881    // If returned value is non-zero, we exit with the returned value as result.
882    __ Cbnz(w0, &return_w0);
883
884    __ Bind(&stack_ok);
885  }
886
887  // Allocate space on stack.
888  __ Claim(num_wreg_to_allocate, kWRegSize);
889
890  // Initialize success_counter and kBacktrackCount with 0.
891  __ Str(wzr, MemOperand(frame_pointer(), kSuccessCounter));
892  __ Str(wzr, MemOperand(frame_pointer(), kBacktrackCount));
893
894  // Find negative length (offset of start relative to end).
895  __ Sub(x10, input_start(), input_end());
896  if (FLAG_debug_code) {
897    // Check that the size of the input string chars is in range.
898    __ Neg(x11, x10);
899    __ Cmp(x11, SeqTwoByteString::kMaxCharsSize);
900    __ Check(ls, AbortReason::kInputStringTooLong);
901  }
902  __ Mov(current_input_offset(), w10);
903
904  // The non-position value is used as a clearing value for the
905  // capture registers, it corresponds to the position of the first character
906  // minus one.
907  __ Sub(string_start_minus_one(), current_input_offset(), char_size());
908  __ Sub(string_start_minus_one(), string_start_minus_one(),
909         Operand(start_offset(), LSL, (mode_ == UC16) ? 1 : 0));
910  // We can store this value twice in an X register for initializing
911  // on-stack registers later.
912  __ Orr(twice_non_position_value(), string_start_minus_one().X(),
913         Operand(string_start_minus_one().X(), LSL, kWRegSizeInBits));
914
915  // Initialize code pointer register.
916  __ Mov(code_pointer(), Operand(masm_->CodeObject()));
917
918  Label load_char_start_regexp;
919  {
920    Label start_regexp;
921    // Load newline if index is at start, previous character otherwise.
922    __ Cbnz(start_offset(), &load_char_start_regexp);
923    __ Mov(current_character(), '\n');
924    __ B(&start_regexp);
925
926    // Global regexp restarts matching here.
927    __ Bind(&load_char_start_regexp);
928    // Load previous char as initial value of current character register.
929    LoadCurrentCharacterUnchecked(-1, 1);
930    __ Bind(&start_regexp);
931  }
932
933  // Initialize on-stack registers.
934  if (num_saved_registers_ > 0) {
935    ClearRegisters(0, num_saved_registers_ - 1);
936  }
937
938  // Execute.
939  __ B(&start_label_);
940
941  if (backtrack_label_.is_linked()) {
942    __ Bind(&backtrack_label_);
943    Backtrack();
944  }
945
946  if (success_label_.is_linked()) {
947    Register first_capture_start = w15;
948
949    // Save captures when successful.
950    __ Bind(&success_label_);
951
952    if (num_saved_registers_ > 0) {
953      // V8 expects the output to be an int32_t array.
954      Register capture_start = w12;
955      Register capture_end = w13;
956      Register input_length = w14;
957
958      // Copy captures to output.
959
960      // Get string length.
961      __ Sub(x10, input_end(), input_start());
962      if (FLAG_debug_code) {
963        // Check that the size of the input string chars is in range.
964        __ Cmp(x10, SeqTwoByteString::kMaxCharsSize);
965        __ Check(ls, AbortReason::kInputStringTooLong);
966      }
967      // input_start has a start_offset offset on entry. We need to include
968      // it when computing the length of the whole string.
969      if (mode_ == UC16) {
970        __ Add(input_length, start_offset(), Operand(w10, LSR, 1));
971      } else {
972        __ Add(input_length, start_offset(), w10);
973      }
974
975      // Copy the results to the output array from the cached registers first.
976      for (int i = 0;
977           (i < num_saved_registers_) && (i < kNumCachedRegisters);
978           i += 2) {
979        __ Mov(capture_start.X(), GetCachedRegister(i));
980        __ Lsr(capture_end.X(), capture_start.X(), kWRegSizeInBits);
981        if ((i == 0) && global_with_zero_length_check()) {
982          // Keep capture start for the zero-length check later.
983          __ Mov(first_capture_start, capture_start);
984        }
985        // Offsets need to be relative to the start of the string.
986        if (mode_ == UC16) {
987          __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
988          __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
989        } else {
990          __ Add(capture_start, input_length, capture_start);
991          __ Add(capture_end, input_length, capture_end);
992        }
993        // The output pointer advances for a possible global match.
994        __ Stp(capture_start, capture_end,
995               MemOperand(output_array(), kSystemPointerSize, PostIndex));
996      }
997
998      // Only carry on if there are more than kNumCachedRegisters capture
999      // registers.
1000      int num_registers_left_on_stack =
1001          num_saved_registers_ - kNumCachedRegisters;
1002      if (num_registers_left_on_stack > 0) {
1003        Register base = x10;
1004        // There are always an even number of capture registers. A couple of
1005        // registers determine one match with two offsets.
1006        DCHECK_EQ(0, num_registers_left_on_stack % 2);
1007        __ Add(base, frame_pointer(), kFirstCaptureOnStack);
1008
1009        // We can unroll the loop here, we should not unroll for less than 2
1010        // registers.
1011        STATIC_ASSERT(kNumRegistersToUnroll > 2);
1012        if (num_registers_left_on_stack <= kNumRegistersToUnroll) {
1013          for (int i = 0; i < num_registers_left_on_stack / 2; i++) {
1014            __ Ldp(capture_end, capture_start,
1015                   MemOperand(base, -kSystemPointerSize, PostIndex));
1016            if ((i == 0) && global_with_zero_length_check()) {
1017              // Keep capture start for the zero-length check later.
1018              __ Mov(first_capture_start, capture_start);
1019            }
1020            // Offsets need to be relative to the start of the string.
1021            if (mode_ == UC16) {
1022              __ Add(capture_start,
1023                     input_length,
1024                     Operand(capture_start, ASR, 1));
1025              __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
1026            } else {
1027              __ Add(capture_start, input_length, capture_start);
1028              __ Add(capture_end, input_length, capture_end);
1029            }
1030            // The output pointer advances for a possible global match.
1031            __ Stp(capture_start, capture_end,
1032                   MemOperand(output_array(), kSystemPointerSize, PostIndex));
1033          }
1034        } else {
1035          Label loop, start;
1036          __ Mov(x11, num_registers_left_on_stack);
1037
1038          __ Ldp(capture_end, capture_start,
1039                 MemOperand(base, -kSystemPointerSize, PostIndex));
1040          if (global_with_zero_length_check()) {
1041            __ Mov(first_capture_start, capture_start);
1042          }
1043          __ B(&start);
1044
1045          __ Bind(&loop);
1046          __ Ldp(capture_end, capture_start,
1047                 MemOperand(base, -kSystemPointerSize, PostIndex));
1048          __ Bind(&start);
1049          if (mode_ == UC16) {
1050            __ Add(capture_start, input_length, Operand(capture_start, ASR, 1));
1051            __ Add(capture_end, input_length, Operand(capture_end, ASR, 1));
1052          } else {
1053            __ Add(capture_start, input_length, capture_start);
1054            __ Add(capture_end, input_length, capture_end);
1055          }
1056          // The output pointer advances for a possible global match.
1057          __ Stp(capture_start, capture_end,
1058                 MemOperand(output_array(), kSystemPointerSize, PostIndex));
1059          __ Sub(x11, x11, 2);
1060          __ Cbnz(x11, &loop);
1061        }
1062      }
1063    }
1064
1065    if (global()) {
1066      Register success_counter = w0;
1067      Register output_size = x10;
1068      // Restart matching if the regular expression is flagged as global.
1069
1070      // Increment success counter.
1071      __ Ldr(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
1072      __ Add(success_counter, success_counter, 1);
1073      __ Str(success_counter, MemOperand(frame_pointer(), kSuccessCounter));
1074
1075      // Capture results have been stored, so the number of remaining global
1076      // output registers is reduced by the number of stored captures.
1077      __ Ldr(output_size, MemOperand(frame_pointer(), kOutputSize));
1078      __ Sub(output_size, output_size, num_saved_registers_);
1079      // Check whether we have enough room for another set of capture results.
1080      __ Cmp(output_size, num_saved_registers_);
1081      __ B(lt, &return_w0);
1082
1083      // The output pointer is already set to the next field in the output
1084      // array.
1085      // Update output size on the frame before we restart matching.
1086      __ Str(output_size, MemOperand(frame_pointer(), kOutputSize));
1087
1088      // Restore the original regexp stack pointer value (effectively, pop the
1089      // stored base pointer).
1090      PopRegExpBasePointer(backtrack_stackpointer(), x11);
1091
1092      if (global_with_zero_length_check()) {
1093        // Special case for zero-length matches.
1094        __ Cmp(current_input_offset(), first_capture_start);
1095        // Not a zero-length match, restart.
1096        __ B(ne, &load_char_start_regexp);
1097        // Offset from the end is zero if we already reached the end.
1098        __ Cbz(current_input_offset(), &return_w0);
1099        // Advance current position after a zero-length match.
1100        Label advance;
1101        __ bind(&advance);
1102        __ Add(current_input_offset(), current_input_offset(),
1103               Operand((mode_ == UC16) ? 2 : 1));
1104        if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
1105      }
1106
1107      __ B(&load_char_start_regexp);
1108    } else {
1109      __ Mov(w0, SUCCESS);
1110    }
1111  }
1112
1113  if (exit_label_.is_linked()) {
1114    // Exit and return w0.
1115    __ Bind(&exit_label_);
1116    if (global()) {
1117      __ Ldr(w0, MemOperand(frame_pointer(), kSuccessCounter));
1118    }
1119  }
1120
1121  __ Bind(&return_w0);
1122  // Restore the original regexp stack pointer value (effectively, pop the
1123  // stored base pointer).
1124  PopRegExpBasePointer(backtrack_stackpointer(), x11);
1125
1126  // Set stack pointer back to first register to retain.
1127  __ Mov(sp, fp);
1128  __ Pop<TurboAssembler::kAuthLR>(fp, lr);
1129
1130  // Restore registers.
1131  __ PopCPURegList(registers_to_retain);
1132
1133  __ Ret();
1134
1135  Label exit_with_exception;
1136  if (check_preempt_label_.is_linked()) {
1137    __ Bind(&check_preempt_label_);
1138
1139    StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10);
1140
1141    SaveLinkRegister();
1142    PushCachedRegisters();
1143    CallCheckStackGuardState(x10);
1144    // Returning from the regexp code restores the stack (sp <- fp)
1145    // so we don't need to drop the link register from it before exiting.
1146    __ Cbnz(w0, &return_w0);
1147    // Reset the cached registers.
1148    PopCachedRegisters();
1149
1150    LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
1151
1152    RestoreLinkRegister();
1153    __ Ret();
1154  }
1155
1156  if (stack_overflow_label_.is_linked()) {
1157    __ Bind(&stack_overflow_label_);
1158
1159    StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10);
1160
1161    SaveLinkRegister();
1162    PushCachedRegisters();
1163    // Call GrowStack(isolate).
1164    static constexpr int kNumArguments = 1;
1165    __ Mov(x0, ExternalReference::isolate_address(isolate()));
1166    __ CallCFunction(ExternalReference::re_grow_stack(), kNumArguments);
1167    // If return nullptr, we have failed to grow the stack, and must exit with
1168    // a stack-overflow exception.  Returning from the regexp code restores the
1169    // stack (sp <- fp) so we don't need to drop the link register from it
1170    // before exiting.
1171    __ Cbz(w0, &exit_with_exception);
1172    // Otherwise use return value as new stack pointer.
1173    __ Mov(backtrack_stackpointer(), x0);
1174    PopCachedRegisters();
1175    RestoreLinkRegister();
1176    __ Ret();
1177  }
1178
1179  if (exit_with_exception.is_linked()) {
1180    __ Bind(&exit_with_exception);
1181    __ Mov(w0, EXCEPTION);
1182    __ B(&return_w0);
1183  }
1184
1185  if (fallback_label_.is_linked()) {
1186    __ Bind(&fallback_label_);
1187    __ Mov(w0, FALLBACK_TO_EXPERIMENTAL);
1188    __ B(&return_w0);
1189  }
1190
1191  CodeDesc code_desc;
1192  masm_->GetCode(isolate(), &code_desc);
1193  Handle<Code> code =
1194      Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP)
1195          .set_self_reference(masm_->CodeObject())
1196          .Build();
1197  PROFILE(masm_->isolate(),
1198          RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
1199  return Handle<HeapObject>::cast(code);
1200}
1201
1202
1203void RegExpMacroAssemblerARM64::GoTo(Label* to) {
1204  BranchOrBacktrack(al, to);
1205}
1206
1207void RegExpMacroAssemblerARM64::IfRegisterGE(int reg, int comparand,
1208                                             Label* if_ge) {
1209  Register to_compare = GetRegister(reg, w10);
1210  CompareAndBranchOrBacktrack(to_compare, comparand, ge, if_ge);
1211}
1212
1213
1214void RegExpMacroAssemblerARM64::IfRegisterLT(int reg, int comparand,
1215                                             Label* if_lt) {
1216  Register to_compare = GetRegister(reg, w10);
1217  CompareAndBranchOrBacktrack(to_compare, comparand, lt, if_lt);
1218}
1219
1220
1221void RegExpMacroAssemblerARM64::IfRegisterEqPos(int reg, Label* if_eq) {
1222  Register to_compare = GetRegister(reg, w10);
1223  __ Cmp(to_compare, current_input_offset());
1224  BranchOrBacktrack(eq, if_eq);
1225}
1226
1227RegExpMacroAssembler::IrregexpImplementation
1228    RegExpMacroAssemblerARM64::Implementation() {
1229  return kARM64Implementation;
1230}
1231
1232
1233void RegExpMacroAssemblerARM64::PopCurrentPosition() {
1234  Pop(current_input_offset());
1235}
1236
1237
1238void RegExpMacroAssemblerARM64::PopRegister(int register_index) {
1239  Pop(w10);
1240  StoreRegister(register_index, w10);
1241}
1242
1243
1244void RegExpMacroAssemblerARM64::PushBacktrack(Label* label) {
1245  if (label->is_bound()) {
1246    int target = label->pos();
1247    __ Mov(w10, target + Code::kHeaderSize - kHeapObjectTag);
1248  } else {
1249    __ Adr(x10, label, MacroAssembler::kAdrFar);
1250    __ Sub(x10, x10, code_pointer());
1251    if (FLAG_debug_code) {
1252      __ Cmp(x10, kWRegMask);
1253      // The code offset has to fit in a W register.
1254      __ Check(ls, AbortReason::kOffsetOutOfRange);
1255    }
1256  }
1257  Push(w10);
1258  CheckStackLimit();
1259}
1260
1261
1262void RegExpMacroAssemblerARM64::PushCurrentPosition() {
1263  Push(current_input_offset());
1264}
1265
1266
1267void RegExpMacroAssemblerARM64::PushRegister(int register_index,
1268                                             StackCheckFlag check_stack_limit) {
1269  Register to_push = GetRegister(register_index, w10);
1270  Push(to_push);
1271  if (check_stack_limit) CheckStackLimit();
1272}
1273
1274
1275void RegExpMacroAssemblerARM64::ReadCurrentPositionFromRegister(int reg) {
1276  RegisterState register_state = GetRegisterState(reg);
1277  switch (register_state) {
1278    case STACKED:
1279      __ Ldr(current_input_offset(), register_location(reg));
1280      break;
1281    case CACHED_LSW:
1282      __ Mov(current_input_offset(), GetCachedRegister(reg).W());
1283      break;
1284    case CACHED_MSW:
1285      __ Lsr(current_input_offset().X(), GetCachedRegister(reg),
1286             kWRegSizeInBits);
1287      break;
1288    default:
1289      UNREACHABLE();
1290  }
1291}
1292
1293void RegExpMacroAssemblerARM64::WriteStackPointerToRegister(int reg) {
1294  ExternalReference ref =
1295      ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1296  __ Mov(x10, ref);
1297  __ Ldr(x10, MemOperand(x10));
1298  __ Sub(x10, backtrack_stackpointer(), x10);
1299  if (FLAG_debug_code) {
1300    __ Cmp(x10, Operand(w10, SXTW));
1301    // The stack offset needs to fit in a W register.
1302    __ Check(eq, AbortReason::kOffsetOutOfRange);
1303  }
1304  StoreRegister(reg, w10);
1305}
1306
1307void RegExpMacroAssemblerARM64::ReadStackPointerFromRegister(int reg) {
1308  ExternalReference ref =
1309      ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1310  Register read_from = GetRegister(reg, w10);
1311  __ Mov(x11, ref);
1312  __ Ldr(x11, MemOperand(x11));
1313  __ Add(backtrack_stackpointer(), x11, Operand(read_from, SXTW));
1314}
1315
1316void RegExpMacroAssemblerARM64::SetCurrentPositionFromEnd(int by) {
1317  Label after_position;
1318  __ Cmp(current_input_offset(), -by * char_size());
1319  __ B(ge, &after_position);
1320  __ Mov(current_input_offset(), -by * char_size());
1321  // On RegExp code entry (where this operation is used), the character before
1322  // the current position is expected to be already loaded.
1323  // We have advanced the position, so it's safe to read backwards.
1324  LoadCurrentCharacterUnchecked(-1, 1);
1325  __ Bind(&after_position);
1326}
1327
1328
1329void RegExpMacroAssemblerARM64::SetRegister(int register_index, int to) {
1330  DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1331  Register set_to = wzr;
1332  if (to != 0) {
1333    set_to = w10;
1334    __ Mov(set_to, to);
1335  }
1336  StoreRegister(register_index, set_to);
1337}
1338
1339
1340bool RegExpMacroAssemblerARM64::Succeed() {
1341  __ B(&success_label_);
1342  return global();
1343}
1344
1345
1346void RegExpMacroAssemblerARM64::WriteCurrentPositionToRegister(int reg,
1347                                                               int cp_offset) {
1348  Register position = current_input_offset();
1349  if (cp_offset != 0) {
1350    position = w10;
1351    __ Add(position, current_input_offset(), cp_offset * char_size());
1352  }
1353  StoreRegister(reg, position);
1354}
1355
1356
1357void RegExpMacroAssemblerARM64::ClearRegisters(int reg_from, int reg_to) {
1358  DCHECK(reg_from <= reg_to);
1359  int num_registers = reg_to - reg_from + 1;
1360
1361  // If the first capture register is cached in a hardware register but not
1362  // aligned on a 64-bit one, we need to clear the first one specifically.
1363  if ((reg_from < kNumCachedRegisters) && ((reg_from % 2) != 0)) {
1364    StoreRegister(reg_from, string_start_minus_one());
1365    num_registers--;
1366    reg_from++;
1367  }
1368
1369  // Clear cached registers in pairs as far as possible.
1370  while ((num_registers >= 2) && (reg_from < kNumCachedRegisters)) {
1371    DCHECK(GetRegisterState(reg_from) == CACHED_LSW);
1372    __ Mov(GetCachedRegister(reg_from), twice_non_position_value());
1373    reg_from += 2;
1374    num_registers -= 2;
1375  }
1376
1377  if ((num_registers % 2) == 1) {
1378    StoreRegister(reg_from, string_start_minus_one());
1379    num_registers--;
1380    reg_from++;
1381  }
1382
1383  if (num_registers > 0) {
1384    // If there are some remaining registers, they are stored on the stack.
1385    DCHECK_LE(kNumCachedRegisters, reg_from);
1386
1387    // Move down the indexes of the registers on stack to get the correct offset
1388    // in memory.
1389    reg_from -= kNumCachedRegisters;
1390    reg_to -= kNumCachedRegisters;
1391    // We should not unroll the loop for less than 2 registers.
1392    STATIC_ASSERT(kNumRegistersToUnroll > 2);
1393    // We position the base pointer to (reg_from + 1).
1394    int base_offset = kFirstRegisterOnStack -
1395        kWRegSize - (kWRegSize * reg_from);
1396    if (num_registers > kNumRegistersToUnroll) {
1397      Register base = x10;
1398      __ Add(base, frame_pointer(), base_offset);
1399
1400      Label loop;
1401      __ Mov(x11, num_registers);
1402      __ Bind(&loop);
1403      __ Str(twice_non_position_value(),
1404             MemOperand(base, -kSystemPointerSize, PostIndex));
1405      __ Sub(x11, x11, 2);
1406      __ Cbnz(x11, &loop);
1407    } else {
1408      for (int i = reg_from; i <= reg_to; i += 2) {
1409        __ Str(twice_non_position_value(),
1410               MemOperand(frame_pointer(), base_offset));
1411        base_offset -= kWRegSize * 2;
1412      }
1413    }
1414  }
1415}
1416
1417// Helper function for reading a value out of a stack frame.
1418template <typename T>
1419static T& frame_entry(Address re_frame, int frame_offset) {
1420  return *reinterpret_cast<T*>(re_frame + frame_offset);
1421}
1422
1423
1424template <typename T>
1425static T* frame_entry_address(Address re_frame, int frame_offset) {
1426  return reinterpret_cast<T*>(re_frame + frame_offset);
1427}
1428
1429int RegExpMacroAssemblerARM64::CheckStackGuardState(
1430    Address* return_address, Address raw_code, Address re_frame,
1431    int start_index, const byte** input_start, const byte** input_end) {
1432  Code re_code = Code::cast(Object(raw_code));
1433  return NativeRegExpMacroAssembler::CheckStackGuardState(
1434      frame_entry<Isolate*>(re_frame, kIsolate), start_index,
1435      static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
1436      return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
1437      input_start, input_end);
1438}
1439
1440
1441void RegExpMacroAssemblerARM64::CheckPosition(int cp_offset,
1442                                              Label* on_outside_input) {
1443  if (cp_offset >= 0) {
1444    CompareAndBranchOrBacktrack(current_input_offset(),
1445                                -cp_offset * char_size(), ge, on_outside_input);
1446  } else {
1447    __ Add(w12, current_input_offset(), Operand(cp_offset * char_size()));
1448    __ Cmp(w12, string_start_minus_one());
1449    BranchOrBacktrack(le, on_outside_input);
1450  }
1451}
1452
1453
1454// Private methods:
1455
1456void RegExpMacroAssemblerARM64::CallCheckStackGuardState(Register scratch) {
1457  DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins());
1458  DCHECK(!masm_->options().isolate_independent_code);
1459
1460  // Allocate space on the stack to store the return address. The
1461  // CheckStackGuardState C++ function will override it if the code
1462  // moved. Allocate extra space for 2 arguments passed by pointers.
1463  // AAPCS64 requires the stack to be 16 byte aligned.
1464  int alignment = masm_->ActivationFrameAlignment();
1465  DCHECK_EQ(alignment % 16, 0);
1466  int align_mask = (alignment / kXRegSize) - 1;
1467  int xreg_to_claim = (3 + align_mask) & ~align_mask;
1468
1469  __ Claim(xreg_to_claim);
1470
1471  // CheckStackGuardState needs the end and start addresses of the input string.
1472  __ Poke(input_end(), 2 * kSystemPointerSize);
1473  __ Add(x5, sp, 2 * kSystemPointerSize);
1474  __ Poke(input_start(), kSystemPointerSize);
1475  __ Add(x4, sp, kSystemPointerSize);
1476
1477  __ Mov(w3, start_offset());
1478  // RegExp code frame pointer.
1479  __ Mov(x2, frame_pointer());
1480  // Code of self.
1481  __ Mov(x1, Operand(masm_->CodeObject()));
1482
1483  // We need to pass a pointer to the return address as first argument.
1484  // DirectCEntry will place the return address on the stack before calling so
1485  // the stack pointer will point to it.
1486  __ Mov(x0, sp);
1487
1488  DCHECK_EQ(scratch, x10);
1489  ExternalReference check_stack_guard_state =
1490      ExternalReference::re_check_stack_guard_state();
1491  __ Mov(scratch, check_stack_guard_state);
1492
1493  __ CallBuiltin(Builtin::kDirectCEntry);
1494
1495  // The input string may have been moved in memory, we need to reload it.
1496  __ Peek(input_start(), kSystemPointerSize);
1497  __ Peek(input_end(), 2 * kSystemPointerSize);
1498
1499  __ Drop(xreg_to_claim);
1500
1501  // Reload the Code pointer.
1502  __ Mov(code_pointer(), Operand(masm_->CodeObject()));
1503}
1504
1505void RegExpMacroAssemblerARM64::BranchOrBacktrack(Condition condition,
1506                                                  Label* to) {
1507  if (condition == al) {  // Unconditional.
1508    if (to == nullptr) {
1509      Backtrack();
1510      return;
1511    }
1512    __ B(to);
1513    return;
1514  }
1515  if (to == nullptr) {
1516    to = &backtrack_label_;
1517  }
1518  __ B(condition, to);
1519}
1520
1521void RegExpMacroAssemblerARM64::CompareAndBranchOrBacktrack(Register reg,
1522                                                            int immediate,
1523                                                            Condition condition,
1524                                                            Label* to) {
1525  if ((immediate == 0) && ((condition == eq) || (condition == ne))) {
1526    if (to == nullptr) {
1527      to = &backtrack_label_;
1528    }
1529    if (condition == eq) {
1530      __ Cbz(reg, to);
1531    } else {
1532      __ Cbnz(reg, to);
1533    }
1534  } else {
1535    __ Cmp(reg, immediate);
1536    BranchOrBacktrack(condition, to);
1537  }
1538}
1539
1540
1541void RegExpMacroAssemblerARM64::CheckPreemption() {
1542  // Check for preemption.
1543  ExternalReference stack_limit =
1544      ExternalReference::address_of_jslimit(isolate());
1545  __ Mov(x10, stack_limit);
1546  __ Ldr(x10, MemOperand(x10));
1547  __ Cmp(sp, x10);
1548  CallIf(&check_preempt_label_, ls);
1549}
1550
1551
1552void RegExpMacroAssemblerARM64::CheckStackLimit() {
1553  ExternalReference stack_limit =
1554      ExternalReference::address_of_regexp_stack_limit_address(isolate());
1555  __ Mov(x10, stack_limit);
1556  __ Ldr(x10, MemOperand(x10));
1557  __ Cmp(backtrack_stackpointer(), x10);
1558  CallIf(&stack_overflow_label_, ls);
1559}
1560
1561
1562void RegExpMacroAssemblerARM64::Push(Register source) {
1563  DCHECK(source.Is32Bits());
1564  DCHECK_NE(source, backtrack_stackpointer());
1565  __ Str(source,
1566         MemOperand(backtrack_stackpointer(),
1567                    -static_cast<int>(kWRegSize),
1568                    PreIndex));
1569}
1570
1571
1572void RegExpMacroAssemblerARM64::Pop(Register target) {
1573  DCHECK(target.Is32Bits());
1574  DCHECK_NE(target, backtrack_stackpointer());
1575  __ Ldr(target,
1576         MemOperand(backtrack_stackpointer(), kWRegSize, PostIndex));
1577}
1578
1579
1580Register RegExpMacroAssemblerARM64::GetCachedRegister(int register_index) {
1581  DCHECK_GT(kNumCachedRegisters, register_index);
1582  return Register::Create(register_index / 2, kXRegSizeInBits);
1583}
1584
1585
1586Register RegExpMacroAssemblerARM64::GetRegister(int register_index,
1587                                                Register maybe_result) {
1588  DCHECK(maybe_result.Is32Bits());
1589  DCHECK_LE(0, register_index);
1590  if (num_registers_ <= register_index) {
1591    num_registers_ = register_index + 1;
1592  }
1593  Register result = NoReg;
1594  RegisterState register_state = GetRegisterState(register_index);
1595  switch (register_state) {
1596    case STACKED:
1597      __ Ldr(maybe_result, register_location(register_index));
1598      result = maybe_result;
1599      break;
1600    case CACHED_LSW:
1601      result = GetCachedRegister(register_index).W();
1602      break;
1603    case CACHED_MSW:
1604      __ Lsr(maybe_result.X(), GetCachedRegister(register_index),
1605             kWRegSizeInBits);
1606      result = maybe_result;
1607      break;
1608    default:
1609      UNREACHABLE();
1610  }
1611  DCHECK(result.Is32Bits());
1612  return result;
1613}
1614
1615
1616void RegExpMacroAssemblerARM64::StoreRegister(int register_index,
1617                                              Register source) {
1618  DCHECK(source.Is32Bits());
1619  DCHECK_LE(0, register_index);
1620  if (num_registers_ <= register_index) {
1621    num_registers_ = register_index + 1;
1622  }
1623
1624  RegisterState register_state = GetRegisterState(register_index);
1625  switch (register_state) {
1626    case STACKED:
1627      __ Str(source, register_location(register_index));
1628      break;
1629    case CACHED_LSW: {
1630      Register cached_register = GetCachedRegister(register_index);
1631      if (source != cached_register.W()) {
1632        __ Bfi(cached_register, source.X(), 0, kWRegSizeInBits);
1633      }
1634      break;
1635    }
1636    case CACHED_MSW: {
1637      Register cached_register = GetCachedRegister(register_index);
1638      __ Bfi(cached_register, source.X(), kWRegSizeInBits, kWRegSizeInBits);
1639      break;
1640    }
1641    default:
1642      UNREACHABLE();
1643  }
1644}
1645
1646
1647void RegExpMacroAssemblerARM64::CallIf(Label* to, Condition condition) {
1648  Label skip_call;
1649  if (condition != al) __ B(&skip_call, NegateCondition(condition));
1650  __ Bl(to);
1651  __ Bind(&skip_call);
1652}
1653
1654
1655void RegExpMacroAssemblerARM64::RestoreLinkRegister() {
1656  // TODO(v8:10026): Remove when we stop compacting for code objects that are
1657  // active on the call stack.
1658  __ Pop<TurboAssembler::kAuthLR>(padreg, lr);
1659  __ Add(lr, lr, Operand(masm_->CodeObject()));
1660}
1661
1662
1663void RegExpMacroAssemblerARM64::SaveLinkRegister() {
1664  __ Sub(lr, lr, Operand(masm_->CodeObject()));
1665  __ Push<TurboAssembler::kSignLR>(lr, padreg);
1666}
1667
1668
1669MemOperand RegExpMacroAssemblerARM64::register_location(int register_index) {
1670  DCHECK(register_index < (1<<30));
1671  DCHECK_LE(kNumCachedRegisters, register_index);
1672  if (num_registers_ <= register_index) {
1673    num_registers_ = register_index + 1;
1674  }
1675  register_index -= kNumCachedRegisters;
1676  int offset = kFirstRegisterOnStack - register_index * kWRegSize;
1677  return MemOperand(frame_pointer(), offset);
1678}
1679
1680MemOperand RegExpMacroAssemblerARM64::capture_location(int register_index,
1681                                                     Register scratch) {
1682  DCHECK(register_index < (1<<30));
1683  DCHECK(register_index < num_saved_registers_);
1684  DCHECK_LE(kNumCachedRegisters, register_index);
1685  DCHECK_EQ(register_index % 2, 0);
1686  register_index -= kNumCachedRegisters;
1687  int offset = kFirstCaptureOnStack - register_index * kWRegSize;
1688  // capture_location is used with Stp instructions to load/store 2 registers.
1689  // The immediate field in the encoding is limited to 7 bits (signed).
1690  if (is_int7(offset)) {
1691    return MemOperand(frame_pointer(), offset);
1692  } else {
1693    __ Add(scratch, frame_pointer(), offset);
1694    return MemOperand(scratch);
1695  }
1696}
1697
1698void RegExpMacroAssemblerARM64::LoadCurrentCharacterUnchecked(int cp_offset,
1699                                                              int characters) {
1700  Register offset = current_input_offset();
1701
1702  // The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
1703  // and the operating system running on the target allow it.
1704  // If unaligned load/stores are not supported then this function must only
1705  // be used to load a single character at a time.
1706
1707  // ARMv8 supports unaligned accesses but V8 or the kernel can decide to
1708  // disable it.
1709  // TODO(pielan): See whether or not we should disable unaligned accesses.
1710  if (!CanReadUnaligned()) {
1711    DCHECK_EQ(1, characters);
1712  }
1713
1714  if (cp_offset != 0) {
1715    if (FLAG_debug_code) {
1716      __ Mov(x10, cp_offset * char_size());
1717      __ Add(x10, x10, Operand(current_input_offset(), SXTW));
1718      __ Cmp(x10, Operand(w10, SXTW));
1719      // The offset needs to fit in a W register.
1720      __ Check(eq, AbortReason::kOffsetOutOfRange);
1721    } else {
1722      __ Add(w10, current_input_offset(), cp_offset * char_size());
1723    }
1724    offset = w10;
1725  }
1726
1727  if (mode_ == LATIN1) {
1728    if (characters == 4) {
1729      __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1730    } else if (characters == 2) {
1731      __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1732    } else {
1733      DCHECK_EQ(1, characters);
1734      __ Ldrb(current_character(), MemOperand(input_end(), offset, SXTW));
1735    }
1736  } else {
1737    DCHECK(mode_ == UC16);
1738    if (characters == 2) {
1739      __ Ldr(current_character(), MemOperand(input_end(), offset, SXTW));
1740    } else {
1741      DCHECK_EQ(1, characters);
1742      __ Ldrh(current_character(), MemOperand(input_end(), offset, SXTW));
1743    }
1744  }
1745}
1746
1747}  // namespace internal
1748}  // namespace v8
1749
1750#undef __
1751
1752#endif  // V8_TARGET_ARCH_ARM64
1753