1// Copyright 2021 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#if V8_TARGET_ARCH_RISCV64
6
7#include "src/regexp/riscv64/regexp-macro-assembler-riscv64.h"
8
9#include "src/codegen/assembler-inl.h"
10#include "src/codegen/macro-assembler.h"
11#include "src/logging/log.h"
12#include "src/objects/objects-inl.h"
13#include "src/regexp/regexp-macro-assembler.h"
14#include "src/regexp/regexp-stack.h"
15#include "src/snapshot/embedded/embedded-data-inl.h"
16#include "src/strings/unicode.h"
17
18namespace v8 {
19namespace internal {
20
21/* clang-format off
22 *
23 * This assembler uses the following register assignment convention
24 * - s3 : kScratchReg. Temporarily stores the index of capture start after a matching pass
25 *        for a global regexp.
26 * - s4 : Pointer to current Code object including heap object tag.
27 * - s1 : Current position in input, as negative offset from end of string.
28 *        Please notice that this is the byte offset, not the character offset!
29 * - s2 : Currently loaded character. Must be loaded using
30 *        LoadCurrentCharacter before using any of the dispatch methods.
31 * - t0 : Points to tip of backtrack stack
32 * - t1 : Unused.
33 * - t2 : End of input (points to byte after last character in input).
34 * - fp : Frame pointer. Used to access arguments, local variables and
35 *         RegExp registers.
36 * - sp : Points to tip of C stack.
37 *
38 * The remaining registers are free for computations.
39 * Each call to a public method should retain this convention.
40 *
41 * The stack will have the following structure:
42 *
43 *                                                                              kStackFrameHeader
44 *  --- sp when called ---
45 *  - fp[72]  ra                 Return from RegExp code (ra).                  kReturnAddress
46 *  - fp[64]  s9, old-fp         Old fp, callee saved(s9).
47 *  - fp[0..63]  fp..s7          Callee-saved registers fp..s7.
48 *  --- frame pointer ----
49 *  - fp[-8]  Isolate* isolate   (address of the current isolate)               kIsolate
50 *  - fp[-16] direct_call        (1 = direct call from JS, 0 = from runtime)    kDirectCall
51 *  - fp[-24] output_size (may fit multiple sets of matches)                    kNumOutputRegisters
52 *  - fp[-32] int* output (int[num_saved_registers_], for output).              kRegisterOutput
53 *  - fp[-40] end of input       (address of end of string).                    kInputEnd
54 *  - fp[-48] start of input     (address of first character in string).        kInputStart
55 *  - fp[-56] start index        (character index of start).                    kStartIndex
56 *  - fp[-64] void* input_string (location of a handle containing the string).  kInputString
57 *  - fp[-72] success counter    (only for global regexps to count matches).    kSuccessfulCaptures
58 *  - fp[-80] Offset of location before start of input (effectively character   kStringStartMinusOne
59 *            position -1). Used to initialize capture registers to a
60 *            non-position.
61 *  --------- The following output registers are 32-bit values. ---------
62 *  - fp[-88] register 0         (Only positions must be stored in the first    kRegisterZero
63 *  -         register 1          num_saved_registers_ registers)
64 *  -         ...
65 *  -         register num_registers-1
66 *  --- sp ---
67 *
68 * The first num_saved_registers_ registers are initialized to point to
69 * "character -1" in the string (i.e., char_size() bytes before the first
70 * character of the string). The remaining registers start out as garbage.
71 *
72 * The data up to the return address must be placed there by the calling
73 * code and the remaining arguments are passed in registers, e.g. by calling the
74 * code entry as cast to a function with the signature:
75 * int (*match)(String input_string,
76 *              int start_index,
77 *              Address start,
78 *              Address end,
79 *              int* output,
80 *              int output_size,
81 *              bool direct_call = false,
82 *              Isolate* isolate,
83 *              Address regexp);
84 * The call is performed by NativeRegExpMacroAssembler::Execute()
85 * (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
86 *
87 * clang-format on
88 */
89
90#define __ ACCESS_MASM(masm_)
91
92const int RegExpMacroAssemblerRISCV::kRegExpCodeSize;
93
94RegExpMacroAssemblerRISCV::RegExpMacroAssemblerRISCV(Isolate* isolate,
95                                                     Zone* zone, Mode mode,
96                                                     int registers_to_save)
97    : NativeRegExpMacroAssembler(isolate, zone),
98      masm_(std::make_unique<MacroAssembler>(
99          isolate, CodeObjectRequired::kYes,
100          NewAssemblerBuffer(kRegExpCodeSize))),
101      no_root_array_scope_(masm_.get()),
102      mode_(mode),
103      num_registers_(registers_to_save),
104      num_saved_registers_(registers_to_save),
105      entry_label_(),
106      start_label_(),
107      success_label_(),
108      backtrack_label_(),
109      exit_label_(),
110      internal_failure_label_() {
111  DCHECK_EQ(0, registers_to_save % 2);
112  __ jmp(&entry_label_);  // We'll write the entry code later.
113  // If the code gets too big or corrupted, an internal exception will be
114  // raised, and we will exit right away.
115  __ bind(&internal_failure_label_);
116  __ li(a0, Operand(FAILURE));
117  __ Ret();
118  __ bind(&start_label_);  // And then continue from here.
119}
120
121RegExpMacroAssemblerRISCV::~RegExpMacroAssemblerRISCV() {
122  // Unuse labels in case we throw away the assembler without calling GetCode.
123  entry_label_.Unuse();
124  start_label_.Unuse();
125  success_label_.Unuse();
126  backtrack_label_.Unuse();
127  exit_label_.Unuse();
128  check_preempt_label_.Unuse();
129  stack_overflow_label_.Unuse();
130  internal_failure_label_.Unuse();
131  fallback_label_.Unuse();
132}
133
134int RegExpMacroAssemblerRISCV::stack_limit_slack() {
135  return RegExpStack::kStackLimitSlack;
136}
137
138void RegExpMacroAssemblerRISCV::AdvanceCurrentPosition(int by) {
139  if (by != 0) {
140    __ Add64(current_input_offset(), current_input_offset(),
141             Operand(by * char_size()));
142  }
143}
144
145void RegExpMacroAssemblerRISCV::AdvanceRegister(int reg, int by) {
146  DCHECK_LE(0, reg);
147  DCHECK_GT(num_registers_, reg);
148  if (by != 0) {
149    __ Ld(a0, register_location(reg));
150    __ Add64(a0, a0, Operand(by));
151    __ Sd(a0, register_location(reg));
152  }
153}
154
155void RegExpMacroAssemblerRISCV::Backtrack() {
156  CheckPreemption();
157  if (has_backtrack_limit()) {
158    Label next;
159    __ Ld(a0, MemOperand(frame_pointer(), kBacktrackCount));
160    __ Add64(a0, a0, Operand(1));
161    __ Sd(a0, MemOperand(frame_pointer(), kBacktrackCount));
162    __ BranchShort(&next, ne, a0, Operand(backtrack_limit()));
163
164    // Backtrack limit exceeded.
165    if (can_fallback()) {
166      __ jmp(&fallback_label_);
167    } else {
168      // Can't fallback, so we treat it as a failed match.
169      Fail();
170    }
171
172    __ bind(&next);
173  }
174  // Pop Code offset from backtrack stack, add Code and jump to location.
175  Pop(a0);
176  __ Add64(a0, a0, code_pointer());
177  __ Jump(a0);
178}
179
180void RegExpMacroAssemblerRISCV::Bind(Label* label) { __ bind(label); }
181
182void RegExpMacroAssemblerRISCV::CheckCharacter(uint32_t c, Label* on_equal) {
183  BranchOrBacktrack(on_equal, eq, current_character(), Operand(c));
184}
185
186void RegExpMacroAssemblerRISCV::CheckCharacterGT(base::uc16 limit,
187                                                 Label* on_greater) {
188  BranchOrBacktrack(on_greater, gt, current_character(), Operand(limit));
189}
190
191void RegExpMacroAssemblerRISCV::CheckAtStart(int cp_offset,
192                                             Label* on_at_start) {
193  __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
194  __ Add64(a0, current_input_offset(),
195           Operand(-char_size() + cp_offset * char_size()));
196  BranchOrBacktrack(on_at_start, eq, a0, Operand(a1));
197}
198
199void RegExpMacroAssemblerRISCV::CheckNotAtStart(int cp_offset,
200                                                Label* on_not_at_start) {
201  __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
202  __ Add64(a0, current_input_offset(),
203           Operand(-char_size() + cp_offset * char_size()));
204  BranchOrBacktrack(on_not_at_start, ne, a0, Operand(a1));
205}
206
207void RegExpMacroAssemblerRISCV::CheckCharacterLT(base::uc16 limit,
208                                                 Label* on_less) {
209  BranchOrBacktrack(on_less, lt, current_character(), Operand(limit));
210}
211
212void RegExpMacroAssemblerRISCV::CheckGreedyLoop(Label* on_equal) {
213  Label backtrack_non_equal;
214  __ Lw(a0, MemOperand(backtrack_stackpointer(), 0));
215  __ BranchShort(&backtrack_non_equal, ne, current_input_offset(), Operand(a0));
216  __ Add64(backtrack_stackpointer(), backtrack_stackpointer(),
217           Operand(kIntSize));
218  __ bind(&backtrack_non_equal);
219  BranchOrBacktrack(on_equal, eq, current_input_offset(), Operand(a0));
220}
221
222// Push (pop) caller-saved registers used by irregexp.
223void RegExpMacroAssemblerRISCV::PushCallerSavedRegisters() {
224  RegList caller_saved_regexp = {current_input_offset(), current_character(),
225                                 end_of_input_address(),
226                                 backtrack_stackpointer()};
227  __ MultiPush(caller_saved_regexp);
228}
229
230void RegExpMacroAssemblerRISCV::PopCallerSavedRegisters() {
231  RegList caller_saved_regexp = {current_input_offset(), current_character(),
232                                 end_of_input_address(),
233                                 backtrack_stackpointer()};
234  __ MultiPop(caller_saved_regexp);
235}
236
237void RegExpMacroAssemblerRISCV::CallIsCharacterInRangeArray(
238    const ZoneList<CharacterRange>* ranges) {
239  PushCallerSavedRegisters();
240  static const int kNumArguments = 3;
241  __ PrepareCallCFunction(kNumArguments, a0);
242
243  __ mv(a0, current_character());
244  __ li(a1, Operand(GetOrAddRangeArray(ranges)));
245  __ li(a2, Operand(ExternalReference::isolate_address(isolate())));
246
247  {
248    // We have a frame (set up in GetCode), but the assembler doesn't know.
249    FrameScope scope(masm_.get(), StackFrame::MANUAL);
250    __ CallCFunction(ExternalReference::re_is_character_in_range_array(),
251                     kNumArguments);
252  }
253  PopCallerSavedRegisters();
254  __ li(code_pointer(), Operand(masm_->CodeObject()));
255}
256
257bool RegExpMacroAssemblerRISCV::CheckCharacterInRangeArray(
258    const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
259  CallIsCharacterInRangeArray(ranges);
260  BranchOrBacktrack(on_in_range, ne, a0, Operand(zero_reg));
261  return true;
262}
263
264bool RegExpMacroAssemblerRISCV::CheckCharacterNotInRangeArray(
265    const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
266  CallIsCharacterInRangeArray(ranges);
267  BranchOrBacktrack(on_not_in_range, eq, a0, Operand(zero_reg));
268  return true;
269}
270
271void RegExpMacroAssemblerRISCV::CheckNotBackReferenceIgnoreCase(
272    int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
273  Label fallthrough;
274  __ Ld(a0, register_location(start_reg));      // Index of start of capture.
275  __ Ld(a1, register_location(start_reg + 1));  // Index of end of capture.
276  __ Sub64(a1, a1, a0);                         // Length of capture.
277
278  // At this point, the capture registers are either both set or both cleared.
279  // If the capture length is zero, then the capture is either empty or cleared.
280  // Fall through in both cases.
281  __ BranchShort(&fallthrough, eq, a1, Operand(zero_reg));
282
283  if (read_backward) {
284    __ Ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
285    __ Add64(t1, t1, a1);
286    BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
287  } else {
288    __ Add64(t1, a1, current_input_offset());
289    // Check that there are enough characters left in the input.
290    BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
291  }
292
293  if (mode_ == LATIN1) {
294    Label success;
295    Label fail;
296    Label loop_check;
297
298    // a0 - offset of start of capture.
299    // a1 - length of capture.
300    __ Add64(a0, a0, Operand(end_of_input_address()));
301    __ Add64(a2, end_of_input_address(), Operand(current_input_offset()));
302    if (read_backward) {
303      __ Sub64(a2, a2, Operand(a1));
304    }
305    __ Add64(a1, a0, Operand(a1));
306
307    // a0 - Address of start of capture.
308    // a1 - Address of end of capture.
309    // a2 - Address of current input position.
310
311    Label loop;
312    __ bind(&loop);
313    __ Lbu(a3, MemOperand(a0, 0));
314    __ addi(a0, a0, char_size());
315    __ Lbu(a4, MemOperand(a2, 0));
316    __ addi(a2, a2, char_size());
317
318    __ BranchShort(&loop_check, eq, a4, Operand(a3));
319
320    // Mismatch, try case-insensitive match (converting letters to lower-case).
321    __ Or(a3, a3, Operand(0x20));  // Convert capture character to lower-case.
322    __ Or(a4, a4, Operand(0x20));  // Also convert input character.
323    __ BranchShort(&fail, ne, a4, Operand(a3));
324    __ Sub64(a3, a3, Operand('a'));
325    __ BranchShort(&loop_check, Uless_equal, a3, Operand('z' - 'a'));
326    // Latin-1: Check for values in range [224,254] but not 247.
327    __ Sub64(a3, a3, Operand(224 - 'a'));
328    // Weren't Latin-1 letters.
329    __ BranchShort(&fail, Ugreater, a3, Operand(254 - 224));
330    // Check for 247.
331    __ BranchShort(&fail, eq, a3, Operand(247 - 224));
332
333    __ bind(&loop_check);
334    __ Branch(&loop, lt, a0, Operand(a1));
335    __ jmp(&success);
336
337    __ bind(&fail);
338    GoTo(on_no_match);
339
340    __ bind(&success);
341    // Compute new value of character position after the matched part.
342    __ Sub64(current_input_offset(), a2, end_of_input_address());
343    if (read_backward) {
344      __ Ld(t1, register_location(start_reg));  // Index of start of capture.
345      __ Ld(a2, register_location(start_reg + 1));  // Index of end of capture.
346      __ Add64(current_input_offset(), current_input_offset(), Operand(t1));
347      __ Sub64(current_input_offset(), current_input_offset(), Operand(a2));
348    }
349  } else {
350    DCHECK(mode_ == UC16);
351    PushCallerSavedRegisters();
352
353    int argument_count = 4;
354    __ PrepareCallCFunction(argument_count, a2);
355
356    // a0 - offset of start of capture.
357    // a1 - length of capture.
358
359    // Put arguments into arguments registers.
360    // Parameters are
361    //   a0: Address byte_offset1 - Address captured substring's start.
362    //   a1: Address byte_offset2 - Address of current character position.
363    //   a2: size_t byte_length - length of capture in bytes(!).
364    //   a3: Isolate* isolate.
365
366    // Address of start of capture.
367    __ Add64(a0, a0, Operand(end_of_input_address()));
368    // Length of capture.
369    __ mv(a2, a1);
370    // Save length in callee-save register for use on return.
371    __ mv(s3, a1);
372    // Address of current input position.
373    __ Add64(a1, current_input_offset(), Operand(end_of_input_address()));
374    if (read_backward) {
375      __ Sub64(a1, a1, Operand(s3));
376    }
377    // Isolate.
378    __ li(a3, Operand(ExternalReference::isolate_address(masm_->isolate())));
379
380    {
381      AllowExternalCallThatCantCauseGC scope(masm_.get());
382      ExternalReference function =
383          unicode
384              ? ExternalReference::re_case_insensitive_compare_unicode()
385              : ExternalReference::re_case_insensitive_compare_non_unicode();
386      __ CallCFunction(function, argument_count);
387    }
388
389    // Restore regexp engine registers.
390    PopCallerSavedRegisters();
391    __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
392    __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
393
394    // Check if function returned non-zero for success or zero for failure.
395    BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
396    // On success, increment position by length of capture.
397    if (read_backward) {
398      __ Sub64(current_input_offset(), current_input_offset(), Operand(s3));
399    } else {
400      __ Add64(current_input_offset(), current_input_offset(), Operand(s3));
401    }
402  }
403
404  __ bind(&fallthrough);
405}
406
407void RegExpMacroAssemblerRISCV::CheckNotBackReference(int start_reg,
408                                                      bool read_backward,
409                                                      Label* on_no_match) {
410  Label fallthrough;
411
412  // Find length of back-referenced capture.
413  __ Ld(a0, register_location(start_reg));
414  __ Ld(a1, register_location(start_reg + 1));
415  __ Sub64(a1, a1, a0);  // Length to check.
416
417  // At this point, the capture registers are either both set or both cleared.
418  // If the capture length is zero, then the capture is either empty or cleared.
419  // Fall through in both cases.
420  __ BranchShort(&fallthrough, eq, a1, Operand(zero_reg));
421
422  if (read_backward) {
423    __ Ld(t1, MemOperand(frame_pointer(), kStringStartMinusOne));
424    __ Add64(t1, t1, a1);
425    BranchOrBacktrack(on_no_match, le, current_input_offset(), Operand(t1));
426  } else {
427    __ Add64(t1, a1, current_input_offset());
428    // Check that there are enough characters left in the input.
429    BranchOrBacktrack(on_no_match, gt, t1, Operand(zero_reg));
430  }
431
432  // Compute pointers to match string and capture string.
433  __ Add64(a0, a0, Operand(end_of_input_address()));
434  __ Add64(a2, end_of_input_address(), Operand(current_input_offset()));
435  if (read_backward) {
436    __ Sub64(a2, a2, Operand(a1));
437  }
438  __ Add64(a1, a1, Operand(a0));
439
440  Label loop;
441  __ bind(&loop);
442  if (mode_ == LATIN1) {
443    __ Lbu(a3, MemOperand(a0, 0));
444    __ addi(a0, a0, char_size());
445    __ Lbu(a4, MemOperand(a2, 0));
446    __ addi(a2, a2, char_size());
447  } else {
448    DCHECK(mode_ == UC16);
449    __ Lhu(a3, MemOperand(a0, 0));
450    __ addi(a0, a0, char_size());
451    __ Lhu(a4, MemOperand(a2, 0));
452    __ addi(a2, a2, char_size());
453  }
454  BranchOrBacktrack(on_no_match, ne, a3, Operand(a4));
455  __ Branch(&loop, lt, a0, Operand(a1));
456
457  // Move current character position to position after match.
458  __ Sub64(current_input_offset(), a2, end_of_input_address());
459  if (read_backward) {
460    __ Ld(t1, register_location(start_reg));      // Index of start of capture.
461    __ Ld(a2, register_location(start_reg + 1));  // Index of end of capture.
462    __ Add64(current_input_offset(), current_input_offset(), Operand(t1));
463    __ Sub64(current_input_offset(), current_input_offset(), Operand(a2));
464  }
465  __ bind(&fallthrough);
466}
467
468void RegExpMacroAssemblerRISCV::CheckNotCharacter(uint32_t c,
469                                                  Label* on_not_equal) {
470  BranchOrBacktrack(on_not_equal, ne, current_character(), Operand(c));
471}
472
473void RegExpMacroAssemblerRISCV::CheckCharacterAfterAnd(uint32_t c,
474                                                       uint32_t mask,
475                                                       Label* on_equal) {
476  __ And(a0, current_character(), Operand(mask));
477  Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c);
478  BranchOrBacktrack(on_equal, eq, a0, rhs);
479}
480
481void RegExpMacroAssemblerRISCV::CheckNotCharacterAfterAnd(uint32_t c,
482                                                          uint32_t mask,
483                                                          Label* on_not_equal) {
484  __ And(a0, current_character(), Operand(mask));
485  Operand rhs = (c == 0) ? Operand(zero_reg) : Operand(c);
486  BranchOrBacktrack(on_not_equal, ne, a0, rhs);
487}
488
489void RegExpMacroAssemblerRISCV::CheckNotCharacterAfterMinusAnd(
490    base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
491  DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
492  __ Sub64(a0, current_character(), Operand(minus));
493  __ And(a0, a0, Operand(mask));
494  BranchOrBacktrack(on_not_equal, ne, a0, Operand(c));
495}
496
497void RegExpMacroAssemblerRISCV::CheckCharacterInRange(base::uc16 from,
498                                                      base::uc16 to,
499                                                      Label* on_in_range) {
500  __ Sub64(a0, current_character(), Operand(from));
501  // Unsigned lower-or-same condition.
502  BranchOrBacktrack(on_in_range, Uless_equal, a0, Operand(to - from));
503}
504
505void RegExpMacroAssemblerRISCV::CheckCharacterNotInRange(
506    base::uc16 from, base::uc16 to, Label* on_not_in_range) {
507  __ Sub64(a0, current_character(), Operand(from));
508  // Unsigned higher condition.
509  BranchOrBacktrack(on_not_in_range, Ugreater, a0, Operand(to - from));
510}
511
512void RegExpMacroAssemblerRISCV::CheckBitInTable(Handle<ByteArray> table,
513                                                Label* on_bit_set) {
514  __ li(a0, Operand(table));
515  if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
516    __ And(a1, current_character(), Operand(kTableSize - 1));
517    __ Add64(a0, a0, a1);
518  } else {
519    __ Add64(a0, a0, current_character());
520  }
521
522  __ Lbu(a0, FieldMemOperand(a0, ByteArray::kHeaderSize));
523  BranchOrBacktrack(on_bit_set, ne, a0, Operand(zero_reg));
524}
525
526bool RegExpMacroAssemblerRISCV::CheckSpecialCharacterClass(
527    StandardCharacterSet type, Label* on_no_match) {
528  // Range checks (c in min..max) are generally implemented by an unsigned
529  // (c - min) <= (max - min) check.
530  switch (type) {
531    case StandardCharacterSet::kWhitespace:
532      // Match space-characters.
533      if (mode_ == LATIN1) {
534        // One byte space characters are '\t'..'\r', ' ' and \u00a0.
535        Label success;
536        __ BranchShort(&success, eq, current_character(), Operand(' '));
537        // Check range 0x09..0x0D.
538        __ Sub64(a0, current_character(), Operand('\t'));
539        __ BranchShort(&success, Uless_equal, a0, Operand('\r' - '\t'));
540        // \u00a0 (NBSP).
541        BranchOrBacktrack(on_no_match, ne, a0, Operand(0x00A0 - '\t'));
542        __ bind(&success);
543        return true;
544      }
545      return false;
546    case StandardCharacterSet::kNotWhitespace:
547      // The emitted code for generic character classes is good enough.
548      return false;
549    case StandardCharacterSet::kDigit:
550      // Match Latin1 digits ('0'..'9').
551      __ Sub64(a0, current_character(), Operand('0'));
552      BranchOrBacktrack(on_no_match, Ugreater, a0, Operand('9' - '0'));
553      return true;
554    case StandardCharacterSet::kNotDigit:
555      // Match non Latin1-digits.
556      __ Sub64(a0, current_character(), Operand('0'));
557      BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand('9' - '0'));
558      return true;
559    case StandardCharacterSet::kNotLineTerminator: {
560      // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
561      __ Xor(a0, current_character(), Operand(0x01));
562      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
563      __ Sub64(a0, a0, Operand(0x0B));
564      BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand(0x0C - 0x0B));
565      if (mode_ == UC16) {
566        // Compare original value to 0x2028 and 0x2029, using the already
567        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
568        // 0x201D (0x2028 - 0x0B) or 0x201E.
569        __ Sub64(a0, a0, Operand(0x2028 - 0x0B));
570        BranchOrBacktrack(on_no_match, Uless_equal, a0, Operand(1));
571      }
572      return true;
573    }
574    case StandardCharacterSet::kLineTerminator: {
575      // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
576      __ Xor(a0, current_character(), Operand(0x01));
577      // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
578      __ Sub64(a0, a0, Operand(0x0B));
579      if (mode_ == LATIN1) {
580        BranchOrBacktrack(on_no_match, Ugreater, a0, Operand(0x0C - 0x0B));
581      } else {
582        Label done;
583        BranchOrBacktrack(&done, Uless_equal, a0, Operand(0x0C - 0x0B));
584        // Compare original value to 0x2028 and 0x2029, using the already
585        // computed (current_char ^ 0x01 - 0x0B). I.e., check for
586        // 0x201D (0x2028 - 0x0B) or 0x201E.
587        __ Sub64(a0, a0, Operand(0x2028 - 0x0B));
588        BranchOrBacktrack(on_no_match, Ugreater, a0, Operand(1));
589        __ bind(&done);
590      }
591      return true;
592    }
593    case StandardCharacterSet::kWord: {
594      if (mode_ != LATIN1) {
595        // Table is 256 entries, so all Latin1 characters can be tested.
596        BranchOrBacktrack(on_no_match, Ugreater, current_character(),
597                          Operand('z'));
598      }
599      ExternalReference map = ExternalReference::re_word_character_map();
600      __ li(a0, Operand(map));
601      __ Add64(a0, a0, current_character());
602      __ Lbu(a0, MemOperand(a0, 0));
603      BranchOrBacktrack(on_no_match, eq, a0, Operand(zero_reg));
604      return true;
605    }
606    case StandardCharacterSet::kNotWord: {
607      Label done;
608      if (mode_ != LATIN1) {
609        // Table is 256 entries, so all Latin1 characters can be tested.
610        __ BranchShort(&done, Ugreater, current_character(), Operand('z'));
611      }
612      ExternalReference map = ExternalReference::re_word_character_map();
613      __ li(a0, Operand(map));
614      __ Add64(a0, a0, current_character());
615      __ Lbu(a0, MemOperand(a0, 0));
616      BranchOrBacktrack(on_no_match, ne, a0, Operand(zero_reg));
617      if (mode_ != LATIN1) {
618        __ bind(&done);
619      }
620      return true;
621    }
622    case StandardCharacterSet::kEverything:
623      // Match any character.
624      return true;
625    // No custom implementation (yet): s(UC16), S(UC16).
626    default:
627      return false;
628  }
629}
630
631void RegExpMacroAssemblerRISCV::Fail() {
632  __ li(a0, Operand(FAILURE));
633  __ jmp(&exit_label_);
634}
635
636void RegExpMacroAssemblerRISCV::LoadRegExpStackPointerFromMemory(Register dst) {
637  ExternalReference ref =
638      ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
639  __ li(dst, Operand(ref));
640  __ Ld(dst, MemOperand(dst));
641}
642
643void RegExpMacroAssemblerRISCV::StoreRegExpStackPointerToMemory(
644    Register src, Register scratch) {
645  ExternalReference ref =
646      ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
647  __ li(scratch, Operand(ref));
648  __ Sd(src, MemOperand(scratch));
649}
650
651void RegExpMacroAssemblerRISCV::PushRegExpBasePointer(Register scratch1,
652                                                      Register scratch2) {
653  LoadRegExpStackPointerFromMemory(scratch1);
654  ExternalReference ref =
655      ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
656  __ li(scratch2, Operand(ref));
657  __ Ld(scratch2, MemOperand(scratch2));
658  __ Sub64(scratch2, scratch1, scratch2);
659  __ Sd(scratch2, MemOperand(frame_pointer(), kRegExpStackBasePointer));
660}
661
662void RegExpMacroAssemblerRISCV::PopRegExpBasePointer(Register scratch1,
663                                                     Register scratch2) {
664  ExternalReference ref =
665      ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
666  __ Ld(scratch1, MemOperand(frame_pointer(), kRegExpStackBasePointer));
667  __ li(scratch2, ref);
668  __ Ld(scratch2, MemOperand(scratch2));
669  __ Add64(scratch1, scratch1, scratch2);
670  StoreRegExpStackPointerToMemory(scratch1, scratch2);
671}
672
673Handle<HeapObject> RegExpMacroAssemblerRISCV::GetCode(Handle<String> source) {
674  Label return_a0;
675  if (masm_->has_exception()) {
676    // If the code gets corrupted due to long regular expressions and lack of
677    // space on trampolines, an internal exception flag is set. If this case
678    // is detected, we will jump into exit sequence right away.
679    __ bind_to(&entry_label_, internal_failure_label_.pos());
680  } else {
681    // Finalize code - write the entry point code now we know how many
682    // registers we need.
683
684    // Entry code:
685    __ bind(&entry_label_);
686
687    // Tell the system that we have a stack frame.  Because the type is MANUAL,
688    // no is generated.
689    FrameScope scope(masm_.get(), StackFrame::MANUAL);
690
691    // Actually emit code to start a new stack frame.
692    // Push arguments
693    // Save callee-save registers.
694    // Start new stack frame.
695    // Store link register in existing stack-cell.
696    // Order here should correspond to order of offset constants in header file.
697    // TODO(plind): we save fp..s11, but ONLY use s3 here - use the regs
698    // or dont save.
699    RegList registers_to_retain = {fp, s1, s2, s3, s4,
700                                   s5, s6, s7, s8 /*, s9, s10, s11*/};
701    DCHECK(registers_to_retain.Count() == kNumCalleeRegsToRetain);
702
703    // The remaining arguments are passed in registers, e.g.by calling the code
704    // entry as cast to a function with the signature:
705    //
706    // *int(*match)(String input_string,      // a0
707    //             int start_offset,          // a1
708    //             byte* input_start,         // a2
709    //             byte* input_end,           // a3
710    //             int* output,               // a4
711    //             int output_size,           // a5
712    //             int call_origin,           // a6
713    //             Isolate* isolate,          // a7
714    //             Address regexp);           // on the stack
715    RegList argument_registers = {a0, a1, a2, a3, a4, a5, a6, a7};
716
717    // According to MultiPush implementation, registers will be pushed in the
718    // order of ra, fp, then s8, ..., s1, and finally a7,...a0
719    __ MultiPush(RegList{ra} | registers_to_retain | argument_registers);
720
721    // Set frame pointer in space for it if this is not a direct call
722    // from generated code.
723    __ Add64(frame_pointer(), sp,
724             Operand(argument_registers.Count() * kSystemPointerSize));
725
726    STATIC_ASSERT(kSuccessfulCaptures == kInputString - kSystemPointerSize);
727    __ mv(a0, zero_reg);
728    __ push(a0);  // Make room for success counter and initialize it to 0.
729    STATIC_ASSERT(kStringStartMinusOne ==
730                  kSuccessfulCaptures - kSystemPointerSize);
731    __ push(a0);  // Make room for "string start - 1" constant.
732    STATIC_ASSERT(kBacktrackCount == kStringStartMinusOne - kSystemPointerSize);
733    __ push(a0);  // The backtrack counter
734    STATIC_ASSERT(kRegExpStackBasePointer ==
735                  kBacktrackCount - kSystemPointerSize);
736    __ push(a0);  // The regexp stack base ptr.
737    // Store the regexp base pointer - we'll later restore it / write it to
738    // memory when returning from this irregexp code object.
739    PushRegExpBasePointer(a0, a1);
740
741    // Check if we have space on the stack for registers.
742    Label stack_limit_hit;
743    Label stack_ok;
744
745    ExternalReference stack_limit =
746        ExternalReference::address_of_jslimit(masm_->isolate());
747    __ li(a0, Operand(stack_limit));
748    __ Ld(a0, MemOperand(a0));
749    __ Sub64(a0, sp, a0);
750    // Handle it if the stack pointer is already below the stack limit.
751    __ BranchShort(&stack_limit_hit, le, a0, Operand(zero_reg));
752    // Check if there is room for the variable number of registers above
753    // the stack limit.
754    __ BranchShort(&stack_ok, Ugreater_equal, a0,
755                   Operand(num_registers_ * kSystemPointerSize));
756    // Exit with OutOfMemory exception. There is not enough space on the stack
757    // for our working registers.
758    __ li(a0, Operand(EXCEPTION));
759    __ jmp(&return_a0);
760
761    __ bind(&stack_limit_hit);
762    CallCheckStackGuardState(a0);
763    // If returned value is non-zero, we exit with the returned value as result.
764    __ Branch(&return_a0, ne, a0, Operand(zero_reg));
765
766    __ bind(&stack_ok);
767    // Allocate space on stack for registers.
768    __ Sub64(sp, sp, Operand(num_registers_ * kSystemPointerSize));
769    // Load string end.
770    __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
771    // Load input start.
772    __ Ld(a0, MemOperand(frame_pointer(), kInputStart));
773    // Find negative length (offset of start relative to end).
774    __ Sub64(current_input_offset(), a0, end_of_input_address());
775    // Set a0 to address of char before start of the input string
776    // (effectively string position -1).
777    __ Ld(a1, MemOperand(frame_pointer(), kStartIndex));
778    __ Sub64(a0, current_input_offset(), Operand(char_size()));
779    __ slli(t1, a1, (mode_ == UC16) ? 1 : 0);
780    __ Sub64(a0, a0, t1);
781    // Store this value in a local variable, for use when clearing
782    // position registers.
783    __ Sd(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
784
785    // Initialize code pointer register
786    __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
787
788    Label load_char_start_regexp, start_regexp;
789    // Load newline if index is at start, previous character otherwise.
790    __ BranchShort(&load_char_start_regexp, ne, a1, Operand(zero_reg));
791    __ li(current_character(), Operand('\n'));
792    __ jmp(&start_regexp);
793
794    // Global regexp restarts matching here.
795    __ bind(&load_char_start_regexp);
796    // Load previous char as initial value of current character register.
797    LoadCurrentCharacterUnchecked(-1, 1);
798    __ bind(&start_regexp);
799
800    // Initialize on-stack registers.
801    if (num_saved_registers_ > 0) {  // Always is, if generated from a regexp.
802      // Fill saved registers with initial value = start offset - 1.
803      if (num_saved_registers_ > 8) {
804        // Address of register 0.
805        __ Add64(a1, frame_pointer(), Operand(kRegisterZero));
806        __ li(a2, Operand(num_saved_registers_));
807        Label init_loop;
808        __ bind(&init_loop);
809        __ Sd(a0, MemOperand(a1));
810        __ Add64(a1, a1, Operand(-kSystemPointerSize));
811        __ Sub64(a2, a2, Operand(1));
812        __ Branch(&init_loop, ne, a2, Operand(zero_reg));
813      } else {
814        for (int i = 0; i < num_saved_registers_; i++) {
815          __ Sd(a0, register_location(i));
816        }
817      }
818    }
819
820    // Initialize backtrack stack pointer.
821    LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
822
823    __ jmp(&start_label_);
824
825    // Exit code:
826    if (success_label_.is_linked()) {
827      // Save captures when successful.
828      __ bind(&success_label_);
829      if (num_saved_registers_ > 0) {
830        // Copy captures to output.
831        __ Ld(a1, MemOperand(frame_pointer(), kInputStart));
832        __ Ld(a0, MemOperand(frame_pointer(), kRegisterOutput));
833        __ Ld(a2, MemOperand(frame_pointer(), kStartIndex));
834        __ Sub64(a1, end_of_input_address(), a1);
835        // a1 is length of input in bytes.
836        if (mode_ == UC16) {
837          __ srli(a1, a1, 1);
838        }
839        // a1 is length of input in characters.
840        __ Add64(a1, a1, Operand(a2));
841        // a1 is length of string in characters.
842
843        DCHECK_EQ(0, num_saved_registers_ % 2);
844        // Always an even number of capture registers. This allows us to
845        // unroll the loop once to add an operation between a load of a
846        // register and the following use of that register.
847        for (int i = 0; i < num_saved_registers_; i += 2) {
848          __ Ld(a2, register_location(i));
849          __ Ld(a3, register_location(i + 1));
850          if (i == 0 && global_with_zero_length_check()) {
851            // Keep capture start in a4 for the zero-length check later.
852            __ mv(s3, a2);
853          }
854          if (mode_ == UC16) {
855            __ srai(a2, a2, 1);
856            __ Add64(a2, a2, a1);
857            __ srai(a3, a3, 1);
858            __ Add64(a3, a3, a1);
859          } else {
860            __ Add64(a2, a1, Operand(a2));
861            __ Add64(a3, a1, Operand(a3));
862          }
863          // V8 expects the output to be an int32_t array.
864          __ Sw(a2, MemOperand(a0));
865          __ Add64(a0, a0, kIntSize);
866          __ Sw(a3, MemOperand(a0));
867          __ Add64(a0, a0, kIntSize);
868        }
869      }
870
871      if (global()) {
872        // Restart matching if the regular expression is flagged as global.
873        __ Ld(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
874        __ Ld(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
875        __ Ld(a2, MemOperand(frame_pointer(), kRegisterOutput));
876        // Increment success counter.
877        __ Add64(a0, a0, 1);
878        __ Sd(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
879        // Capture results have been stored, so the number of remaining global
880        // output registers is reduced by the number of stored captures.
881        __ Sub64(a1, a1, num_saved_registers_);
882        // Check whether we have enough room for another set of capture results.
883        __ Branch(&return_a0, lt, a1, Operand(num_saved_registers_));
884
885        __ Sd(a1, MemOperand(frame_pointer(), kNumOutputRegisters));
886        // Advance the location for output.
887        __ Add64(a2, a2, num_saved_registers_ * kIntSize);
888        __ Sd(a2, MemOperand(frame_pointer(), kRegisterOutput));
889
890        // Prepare a0 to initialize registers with its value in the next run.
891        __ Ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
892
893        if (global_with_zero_length_check()) {
894          // Special case for zero-length matches.
895          // s3: capture start index
896          // Not a zero-length match, restart.
897          __ Branch(&load_char_start_regexp, ne, current_input_offset(),
898                    Operand(s3));
899          // Offset from the end is zero if we already reached the end.
900          __ Branch(&exit_label_, eq, current_input_offset(),
901                    Operand(zero_reg));
902          // Advance current position after a zero-length match.
903          Label advance;
904          __ bind(&advance);
905          __ Add64(current_input_offset(), current_input_offset(),
906                   Operand((mode_ == UC16) ? 2 : 1));
907          if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
908        }
909
910        __ Branch(&load_char_start_regexp);
911      } else {
912        __ li(a0, Operand(SUCCESS));
913      }
914    }
915    // Exit and return a0.
916    __ bind(&exit_label_);
917    if (global()) {
918      __ Ld(a0, MemOperand(frame_pointer(), kSuccessfulCaptures));
919    }
920
921    __ bind(&return_a0);
922    // Restore the original regexp stack pointer value (effectively, pop the
923    // stored base pointer).
924    PopRegExpBasePointer(a1, a2);
925    // Skip sp past regexp registers and local variables..
926    __ mv(sp, frame_pointer());
927
928    // Restore registers fp..s11 and return (restoring ra to pc).
929    __ MultiPop(registers_to_retain | ra);
930
931    __ Ret();
932
933    // Backtrack code (branch target for conditional backtracks).
934    if (backtrack_label_.is_linked()) {
935      __ bind(&backtrack_label_);
936      Backtrack();
937    }
938
939    Label exit_with_exception;
940
941    // Preempt-code.
942    if (check_preempt_label_.is_linked()) {
943      SafeCallTarget(&check_preempt_label_);
944      StoreRegExpStackPointerToMemory(backtrack_stackpointer(), a1);
945      // Put regexp engine registers on stack.
946      PushCallerSavedRegisters();
947      CallCheckStackGuardState(a0);
948      PopCallerSavedRegisters();
949      // If returning non-zero, we should end execution with the given
950      // result as return value.
951      __ Branch(&return_a0, ne, a0, Operand(zero_reg));
952      LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
953      // String might have moved: Reload end of string from frame.
954      __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
955      __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
956      SafeReturn();
957    }
958
959    // Backtrack stack overflow code.
960    if (stack_overflow_label_.is_linked()) {
961      SafeCallTarget(&stack_overflow_label_);
962      // Call GrowStack(isolate).
963      StoreRegExpStackPointerToMemory(backtrack_stackpointer(),
964                                      a1);
965
966      static constexpr int kNumArguments = 1;
967      __ PrepareCallCFunction(kNumArguments, 0, a0);
968      __ li(a0, ExternalReference::isolate_address(isolate()));
969      ExternalReference grow_stack = ExternalReference::re_grow_stack();
970      __ CallCFunction(grow_stack, kNumArguments);
971      // If nullptr is returned, we have failed to grow the stack, and must exit
972      // with a stack-overflow exception.
973      __ BranchShort(&exit_with_exception, eq, a0, Operand(zero_reg));
974      // Otherwise use return value as new stack pointer.
975      __ mv(backtrack_stackpointer(), a0);
976      // Restore saved registers and continue.
977      __ li(code_pointer(), Operand(masm_->CodeObject()), CONSTANT_SIZE);
978      __ Ld(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
979      SafeReturn();
980    }
981
982    if (exit_with_exception.is_linked()) {
983      // If any of the code above needed to exit with an exception.
984      __ bind(&exit_with_exception);
985      // Exit with Result EXCEPTION(-1) to signal thrown exception.
986      __ li(a0, Operand(EXCEPTION));
987      __ jmp(&return_a0);
988    }
989
990    if (fallback_label_.is_linked()) {
991      __ bind(&fallback_label_);
992      __ li(a0, Operand(FALLBACK_TO_EXPERIMENTAL));
993      __ jmp(&return_a0);
994    }
995  }
996
997  CodeDesc code_desc;
998  masm_->GetCode(isolate(), &code_desc);
999  Handle<Code> code =
1000      Factory::CodeBuilder(isolate(), code_desc, CodeKind::REGEXP)
1001          .set_self_reference(masm_->CodeObject())
1002          .Build();
1003  LOG(masm_->isolate(),
1004      RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
1005  return Handle<HeapObject>::cast(code);
1006}
1007
1008void RegExpMacroAssemblerRISCV::GoTo(Label* to) {
1009  if (to == nullptr) {
1010    Backtrack();
1011    return;
1012  }
1013  __ jmp(to);
1014  return;
1015}
1016
1017void RegExpMacroAssemblerRISCV::IfRegisterGE(int reg, int comparand,
1018                                             Label* if_ge) {
1019  __ Ld(a0, register_location(reg));
1020  BranchOrBacktrack(if_ge, ge, a0, Operand(comparand));
1021}
1022
1023void RegExpMacroAssemblerRISCV::IfRegisterLT(int reg, int comparand,
1024                                             Label* if_lt) {
1025  __ Ld(a0, register_location(reg));
1026  BranchOrBacktrack(if_lt, lt, a0, Operand(comparand));
1027}
1028
1029void RegExpMacroAssemblerRISCV::IfRegisterEqPos(int reg, Label* if_eq) {
1030  __ Ld(a0, register_location(reg));
1031  BranchOrBacktrack(if_eq, eq, a0, Operand(current_input_offset()));
1032}
1033
1034RegExpMacroAssembler::IrregexpImplementation
1035RegExpMacroAssemblerRISCV::Implementation() {
1036  return kRISCVImplementation;
1037}
1038
1039void RegExpMacroAssemblerRISCV::PopCurrentPosition() {
1040  Pop(current_input_offset());
1041}
1042
1043void RegExpMacroAssemblerRISCV::PopRegister(int register_index) {
1044  Pop(a0);
1045  __ Sd(a0, register_location(register_index));
1046}
1047
1048void RegExpMacroAssemblerRISCV::PushBacktrack(Label* label) {
1049  if (label->is_bound()) {
1050    int target = label->pos();
1051    __ li(a0, Operand(target + Code::kHeaderSize - kHeapObjectTag));
1052  } else {
1053    Assembler::BlockTrampolinePoolScope block_trampoline_pool(masm_.get());
1054    Label after_constant;
1055    __ BranchShort(&after_constant);
1056    int offset = masm_->pc_offset();
1057    int cp_offset = offset + Code::kHeaderSize - kHeapObjectTag;
1058    __ emit(0);
1059    masm_->label_at_put(label, offset);
1060    __ bind(&after_constant);
1061    if (is_int16(cp_offset)) {
1062      __ Lwu(a0, MemOperand(code_pointer(), cp_offset));
1063    } else {
1064      __ Add64(a0, code_pointer(), cp_offset);
1065      __ Lwu(a0, MemOperand(a0, 0));
1066    }
1067  }
1068  Push(a0);
1069  CheckStackLimit();
1070}
1071
1072void RegExpMacroAssemblerRISCV::PushCurrentPosition() {
1073  Push(current_input_offset());
1074}
1075
1076void RegExpMacroAssemblerRISCV::PushRegister(int register_index,
1077                                             StackCheckFlag check_stack_limit) {
1078  __ Ld(a0, register_location(register_index));
1079  Push(a0);
1080  if (check_stack_limit) CheckStackLimit();
1081}
1082
1083void RegExpMacroAssemblerRISCV::ReadCurrentPositionFromRegister(int reg) {
1084  __ Ld(current_input_offset(), register_location(reg));
1085}
1086
1087void RegExpMacroAssemblerRISCV::WriteStackPointerToRegister(int reg) {
1088  ExternalReference ref =
1089      ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1090  __ li(a0, ref);
1091  __ Ld(a0, MemOperand(a0));
1092  __ Sub64(a0, backtrack_stackpointer(), a0);
1093  __ Sw(a0, register_location(reg));
1094}
1095
1096void RegExpMacroAssemblerRISCV::ReadStackPointerFromRegister(int reg) {
1097  ExternalReference ref =
1098      ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1099  __ li(a1, ref);
1100  __ Ld(a1, MemOperand(a1));
1101  __ Lw(backtrack_stackpointer(), register_location(reg));
1102  __ Add64(backtrack_stackpointer(), backtrack_stackpointer(), a1);
1103}
1104
1105void RegExpMacroAssemblerRISCV::SetCurrentPositionFromEnd(int by) {
1106  Label after_position;
1107  __ BranchShort(&after_position, ge, current_input_offset(),
1108                 Operand(-by * char_size()));
1109  __ li(current_input_offset(), -by * char_size());
1110  // On RegExp code entry (where this operation is used), the character before
1111  // the current position is expected to be already loaded.
1112  // We have advanced the position, so it's safe to read backwards.
1113  LoadCurrentCharacterUnchecked(-1, 1);
1114  __ bind(&after_position);
1115}
1116
1117void RegExpMacroAssemblerRISCV::SetRegister(int register_index, int to) {
1118  DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1119  __ li(a0, Operand(to));
1120  __ Sd(a0, register_location(register_index));
1121}
1122
1123bool RegExpMacroAssemblerRISCV::Succeed() {
1124  __ jmp(&success_label_);
1125  return global();
1126}
1127
1128void RegExpMacroAssemblerRISCV::WriteCurrentPositionToRegister(int reg,
1129                                                               int cp_offset) {
1130  if (cp_offset == 0) {
1131    __ Sd(current_input_offset(), register_location(reg));
1132  } else {
1133    __ Add64(a0, current_input_offset(), Operand(cp_offset * char_size()));
1134    __ Sd(a0, register_location(reg));
1135  }
1136}
1137
1138void RegExpMacroAssemblerRISCV::ClearRegisters(int reg_from, int reg_to) {
1139  DCHECK(reg_from <= reg_to);
1140  __ Ld(a0, MemOperand(frame_pointer(), kStringStartMinusOne));
1141  for (int reg = reg_from; reg <= reg_to; reg++) {
1142    __ Sd(a0, register_location(reg));
1143  }
1144}
1145#ifdef RISCV_HAS_NO_UNALIGNED
1146bool RegExpMacroAssemblerRISCV::CanReadUnaligned() const { return false; }
1147#endif
1148// Private methods:
1149
1150void RegExpMacroAssemblerRISCV::CallCheckStackGuardState(Register scratch) {
1151  DCHECK(!isolate()->IsGeneratingEmbeddedBuiltins());
1152  DCHECK(!masm_->options().isolate_independent_code);
1153
1154  int stack_alignment = base::OS::ActivationFrameAlignment();
1155
1156  // Align the stack pointer and save the original sp value on the stack.
1157  __ mv(scratch, sp);
1158  __ Sub64(sp, sp, Operand(kSystemPointerSize));
1159  DCHECK(base::bits::IsPowerOfTwo(stack_alignment));
1160  __ And(sp, sp, Operand(-stack_alignment));
1161  __ Sd(scratch, MemOperand(sp));
1162
1163  __ mv(a2, frame_pointer());
1164  // Code of self.
1165  __ li(a1, Operand(masm_->CodeObject()), CONSTANT_SIZE);
1166
1167  // We need to make room for the return address on the stack.
1168  DCHECK(IsAligned(stack_alignment, kSystemPointerSize));
1169  __ Sub64(sp, sp, Operand(stack_alignment));
1170
1171  // The stack pointer now points to cell where the return address will be
1172  // written. Arguments are in registers, meaning we treat the return address as
1173  // argument 5. Since DirectCEntry will handle allocating space for the C
1174  // argument slots, we don't need to care about that here. This is how the
1175  // stack will look (sp meaning the value of sp at this moment):
1176  // [sp + 3] - empty slot if needed for alignment.
1177  // [sp + 2] - saved sp.
1178  // [sp + 1] - second word reserved for return value.
1179  // [sp + 0] - first word reserved for return value.
1180
1181  // a0 will point to the return address, placed by DirectCEntry.
1182  __ mv(a0, sp);
1183
1184  ExternalReference stack_guard_check =
1185      ExternalReference::re_check_stack_guard_state();
1186  __ li(t6, Operand(stack_guard_check));
1187
1188  EmbeddedData d = EmbeddedData::FromBlob();
1189  CHECK(Builtins::IsIsolateIndependent(Builtin::kDirectCEntry));
1190  Address entry = d.InstructionStartOfBuiltin(Builtin::kDirectCEntry);
1191  __ li(kScratchReg, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
1192  __ Call(kScratchReg);
1193
1194  // DirectCEntry allocated space for the C argument slots so we have to
1195  // drop them with the return address from the stack with loading saved sp.
1196  // At this point stack must look:
1197  // [sp + 7] - empty slot if needed for alignment.
1198  // [sp + 6] - saved sp.
1199  // [sp + 5] - second word reserved for return value.
1200  // [sp + 4] - first word reserved for return value.
1201  // [sp + 3] - C argument slot.
1202  // [sp + 2] - C argument slot.
1203  // [sp + 1] - C argument slot.
1204  // [sp + 0] - C argument slot.
1205  __ Ld(sp, MemOperand(sp, stack_alignment + kCArgsSlotsSize));
1206
1207  __ li(code_pointer(), Operand(masm_->CodeObject()));
1208}
1209
1210// Helper function for reading a value out of a stack frame.
1211template <typename T>
1212static T& frame_entry(Address re_frame, int frame_offset) {
1213  return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
1214}
1215
1216template <typename T>
1217static T* frame_entry_address(Address re_frame, int frame_offset) {
1218  return reinterpret_cast<T*>(re_frame + frame_offset);
1219}
1220
1221int64_t RegExpMacroAssemblerRISCV::CheckStackGuardState(Address* return_address,
1222                                                        Address raw_code,
1223                                                        Address re_frame) {
1224  Code re_code = Code::cast(Object(raw_code));
1225  return NativeRegExpMacroAssembler::CheckStackGuardState(
1226      frame_entry<Isolate*>(re_frame, kIsolate),
1227      static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)),
1228      static_cast<RegExp::CallOrigin>(
1229          frame_entry<int64_t>(re_frame, kDirectCall)),
1230      return_address, re_code,
1231      frame_entry_address<Address>(re_frame, kInputString),
1232      frame_entry_address<const byte*>(re_frame, kInputStart),
1233      frame_entry_address<const byte*>(re_frame, kInputEnd));
1234}
1235
1236MemOperand RegExpMacroAssemblerRISCV::register_location(int register_index) {
1237  DCHECK(register_index < (1 << 30));
1238  if (num_registers_ <= register_index) {
1239    num_registers_ = register_index + 1;
1240  }
1241  return MemOperand(frame_pointer(),
1242                    kRegisterZero - register_index * kSystemPointerSize);
1243}
1244
1245void RegExpMacroAssemblerRISCV::CheckPosition(int cp_offset,
1246                                              Label* on_outside_input) {
1247  if (cp_offset >= 0) {
1248    BranchOrBacktrack(on_outside_input, ge, current_input_offset(),
1249                      Operand(-cp_offset * char_size()));
1250  } else {
1251    __ Ld(a1, MemOperand(frame_pointer(), kStringStartMinusOne));
1252    __ Add64(a0, current_input_offset(), Operand(cp_offset * char_size()));
1253    BranchOrBacktrack(on_outside_input, le, a0, Operand(a1));
1254  }
1255}
1256
1257void RegExpMacroAssemblerRISCV::BranchOrBacktrack(Label* to,
1258                                                  Condition condition,
1259                                                  Register rs,
1260                                                  const Operand& rt) {
1261  if (condition == al) {  // Unconditional.
1262    if (to == nullptr) {
1263      Backtrack();
1264      return;
1265    }
1266    __ jmp(to);
1267    return;
1268  }
1269  if (to == nullptr) {
1270    __ Branch(&backtrack_label_, condition, rs, rt);
1271    return;
1272  }
1273  __ Branch(to, condition, rs, rt);
1274}
1275
1276void RegExpMacroAssemblerRISCV::SafeCall(Label* to, Condition cond, Register rs,
1277                                         const Operand& rt) {
1278  __ BranchAndLink(to, cond, rs, rt);
1279}
1280
1281void RegExpMacroAssemblerRISCV::SafeReturn() {
1282  __ pop(ra);
1283  __ Add64(t1, ra, Operand(masm_->CodeObject()));
1284  __ Jump(t1);
1285}
1286
1287void RegExpMacroAssemblerRISCV::SafeCallTarget(Label* name) {
1288  __ bind(name);
1289  __ Sub64(ra, ra, Operand(masm_->CodeObject()));
1290  __ push(ra);
1291}
1292
1293void RegExpMacroAssemblerRISCV::Push(Register source) {
1294  DCHECK(source != backtrack_stackpointer());
1295  __ Add64(backtrack_stackpointer(), backtrack_stackpointer(),
1296           Operand(-kIntSize));
1297  __ Sw(source, MemOperand(backtrack_stackpointer()));
1298}
1299
1300void RegExpMacroAssemblerRISCV::Pop(Register target) {
1301  DCHECK(target != backtrack_stackpointer());
1302  __ Lw(target, MemOperand(backtrack_stackpointer()));
1303  __ Add64(backtrack_stackpointer(), backtrack_stackpointer(), kIntSize);
1304}
1305
1306void RegExpMacroAssemblerRISCV::CheckPreemption() {
1307  // Check for preemption.
1308  ExternalReference stack_limit =
1309      ExternalReference::address_of_jslimit(masm_->isolate());
1310  __ li(a0, Operand(stack_limit));
1311  __ Ld(a0, MemOperand(a0));
1312  SafeCall(&check_preempt_label_, Uless_equal, sp, Operand(a0));
1313}
1314
1315void RegExpMacroAssemblerRISCV::CheckStackLimit() {
1316  ExternalReference stack_limit =
1317      ExternalReference::address_of_regexp_stack_limit_address(
1318          masm_->isolate());
1319
1320  __ li(a0, Operand(stack_limit));
1321  __ Ld(a0, MemOperand(a0));
1322  SafeCall(&stack_overflow_label_, Uless_equal, backtrack_stackpointer(),
1323           Operand(a0));
1324}
1325
1326void RegExpMacroAssemblerRISCV::LoadCurrentCharacterUnchecked(int cp_offset,
1327                                                              int characters) {
1328  Register offset = current_input_offset();
1329
1330  // If unaligned load/stores are not supported then this function must only
1331  // be used to load a single character at a time.
1332  if (!CanReadUnaligned()) {
1333    DCHECK_EQ(1, characters);
1334  }
1335  if (cp_offset != 0) {
1336    // t3 is not being used to store the capture start index at this point.
1337    __ Add64(t3, current_input_offset(), Operand(cp_offset * char_size()));
1338    offset = t3;
1339  }
1340
1341  if (mode_ == LATIN1) {
1342    if (characters == 4) {
1343      __ Add64(kScratchReg, end_of_input_address(), offset);
1344      __ Lwu(current_character(), MemOperand(kScratchReg));
1345    } else if (characters == 2) {
1346      __ Add64(kScratchReg, end_of_input_address(), offset);
1347      __ Lhu(current_character(), MemOperand(kScratchReg));
1348    } else {
1349  DCHECK_EQ(1, characters);
1350      __ Add64(kScratchReg, end_of_input_address(), offset);
1351      __ Lbu(current_character(), MemOperand(kScratchReg));
1352    }
1353  } else {
1354    DCHECK(mode_ == UC16);
1355    if (characters == 2) {
1356      __ Add64(kScratchReg, end_of_input_address(), offset);
1357      __ Lwu(current_character(), MemOperand(kScratchReg));
1358    } else {
1359      DCHECK_EQ(1, characters);
1360      __ Add64(kScratchReg, end_of_input_address(), offset);
1361      __ Lhu(current_character(), MemOperand(kScratchReg));
1362    }
1363  }
1364}
1365
1366#undef __
1367
1368}  // namespace internal
1369}  // namespace v8
1370
1371#endif  // V8_TARGET_ARCH_RISCV64
1372