1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #if V8_TARGET_ARCH_X64
6
7 #include "src/regexp/x64/regexp-macro-assembler-x64.h"
8
9 #include "src/codegen/code-desc.h"
10 #include "src/codegen/macro-assembler.h"
11 #include "src/heap/factory.h"
12 #include "src/logging/log.h"
13 #include "src/objects/code-inl.h"
14 #include "src/regexp/regexp-macro-assembler.h"
15 #include "src/regexp/regexp-stack.h"
16
17 namespace v8 {
18 namespace internal {
19
20 /*
21 * This assembler uses the following register assignment convention
22 * - rdx : Currently loaded character(s) as Latin1 or UC16. Must be loaded
23 * using LoadCurrentCharacter before using any of the dispatch methods.
24 * Temporarily stores the index of capture start after a matching pass
25 * for a global regexp.
26 * - rdi : Current position in input, as negative offset from end of string.
27 * Please notice that this is the byte offset, not the character
28 * offset! Is always a 32-bit signed (negative) offset, but must be
29 * maintained sign-extended to 64 bits, since it is used as index.
30 * - rsi : End of input (points to byte after last character in input),
31 * so that rsi+rdi points to the current character.
32 * - rbp : Frame pointer. Used to access arguments, local variables and
33 * RegExp registers.
34 * - rsp : Points to tip of C stack.
35 * - rcx : Points to tip of backtrack stack. The backtrack stack contains
36 * only 32-bit values. Most are offsets from some base (e.g., character
37 * positions from end of string or code location from Code pointer).
38 * - r8 : Code object pointer. Used to convert between absolute and
39 * code-object-relative addresses.
40 *
41 * The registers rax, rbx, r9 and r11 are free to use for computations.
42 * If changed to use r12+, they should be saved as callee-save registers.
43 * The macro assembler special register r13 (kRootRegister) isn't special
44 * during execution of RegExp code (it doesn't hold the value assumed when
45 * creating JS code), so Root related macro operations can be used.
46 *
47 * Each call to a C++ method should retain these registers.
48 *
49 * The stack will have the following content, in some order, indexable from the
50 * frame pointer (see, e.g., kDirectCall):
51 * - Address regexp (address of the JSRegExp object; unused in native
52 * code, passed to match signature of interpreter)
53 * - Isolate* isolate (address of the current isolate)
54 * - direct_call (if 1, direct call from JavaScript code, if 0 call
55 * through the runtime system)
56 * - capture array size (may fit multiple sets of matches)
57 * - int* capture_array (int[num_saved_registers_], for output).
58 * - end of input (address of end of string)
59 * - start of input (address of first character in string)
60 * - start index (character index of start)
61 * - String input_string (input string)
62 * - return address
63 * - backup of callee save registers (rbx, possibly rsi and rdi).
64 * - success counter (only useful for global regexp to count matches)
65 * - Offset of location before start of input (effectively character
66 * string start - 1). Used to initialize capture registers to a
67 * non-position.
68 * - At start of string (if 1, we are starting at the start of the
69 * string, otherwise 0)
70 * - register 0 rbp[-n] (Only positions must be stored in the first
71 * - register 1 rbp[-n-8] num_saved_registers_ registers)
72 * - ...
73 *
74 * The first num_saved_registers_ registers are initialized to point to
75 * "character -1" in the string (i.e., char_size() bytes before the first
76 * character of the string). The remaining registers starts out uninitialized.
77 *
78 * The argument values must be provided by the calling code by calling the
79 * code's entry address cast to a function pointer with the following signature:
80 * int (*match)(String input_string,
81 * int start_index,
82 * Address start,
83 * Address end,
84 * int* capture_output_array,
85 * int num_capture_registers,
86 * bool direct_call = false,
87 * Isolate* isolate,
88 * Address regexp);
89 */
90
91 #define __ ACCESS_MASM((&masm_))
92
93 const int RegExpMacroAssemblerX64::kRegExpCodeSize;
94
RegExpMacroAssemblerX64(Isolate* isolate, Zone* zone, Mode mode, int registers_to_save)95 RegExpMacroAssemblerX64::RegExpMacroAssemblerX64(Isolate* isolate, Zone* zone,
96 Mode mode,
97 int registers_to_save)
98 : NativeRegExpMacroAssembler(isolate, zone),
99 masm_(isolate, CodeObjectRequired::kYes,
100 NewAssemblerBuffer(kRegExpCodeSize)),
101 no_root_array_scope_(&masm_),
102 code_relative_fixup_positions_(zone),
103 mode_(mode),
104 num_registers_(registers_to_save),
105 num_saved_registers_(registers_to_save),
106 entry_label_(),
107 start_label_(),
108 success_label_(),
109 backtrack_label_(),
110 exit_label_() {
111 DCHECK_EQ(0, registers_to_save % 2);
112 __ jmp(&entry_label_); // We'll write the entry code when we know more.
113 __ bind(&start_label_); // And then continue from here.
114 }
115
~RegExpMacroAssemblerX64()116 RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() {
117 // Unuse labels in case we throw away the assembler without calling GetCode.
118 entry_label_.Unuse();
119 start_label_.Unuse();
120 success_label_.Unuse();
121 backtrack_label_.Unuse();
122 exit_label_.Unuse();
123 check_preempt_label_.Unuse();
124 stack_overflow_label_.Unuse();
125 fallback_label_.Unuse();
126 }
127
128
stack_limit_slack()129 int RegExpMacroAssemblerX64::stack_limit_slack() {
130 return RegExpStack::kStackLimitSlack;
131 }
132
133
AdvanceCurrentPosition(int by)134 void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) {
135 if (by != 0) {
136 __ addq(rdi, Immediate(by * char_size()));
137 }
138 }
139
140
AdvanceRegister(int reg, int by)141 void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) {
142 DCHECK_LE(0, reg);
143 DCHECK_GT(num_registers_, reg);
144 if (by != 0) {
145 __ addq(register_location(reg), Immediate(by));
146 }
147 }
148
149
Backtrack()150 void RegExpMacroAssemblerX64::Backtrack() {
151 CheckPreemption();
152 if (has_backtrack_limit()) {
153 Label next;
154 __ incq(Operand(rbp, kBacktrackCount));
155 __ cmpq(Operand(rbp, kBacktrackCount), Immediate(backtrack_limit()));
156 __ j(not_equal, &next);
157
158 // Backtrack limit exceeded.
159 if (can_fallback()) {
160 __ jmp(&fallback_label_);
161 } else {
162 // Can't fallback, so we treat it as a failed match.
163 Fail();
164 }
165
166 __ bind(&next);
167 }
168 // Pop Code offset from backtrack stack, add Code and jump to location.
169 Pop(rbx);
170 __ addq(rbx, code_object_pointer());
171 __ jmp(rbx);
172 }
173
174
Bind(Label* label)175 void RegExpMacroAssemblerX64::Bind(Label* label) {
176 __ bind(label);
177 }
178
179
CheckCharacter(uint32_t c, Label* on_equal)180 void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) {
181 __ cmpl(current_character(), Immediate(c));
182 BranchOrBacktrack(equal, on_equal);
183 }
184
CheckCharacterGT(base::uc16 limit, Label* on_greater)185 void RegExpMacroAssemblerX64::CheckCharacterGT(base::uc16 limit,
186 Label* on_greater) {
187 __ cmpl(current_character(), Immediate(limit));
188 BranchOrBacktrack(greater, on_greater);
189 }
190
CheckAtStart(int cp_offset, Label* on_at_start)191 void RegExpMacroAssemblerX64::CheckAtStart(int cp_offset, Label* on_at_start) {
192 __ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size()));
193 __ cmpq(rax, Operand(rbp, kStringStartMinusOne));
194 BranchOrBacktrack(equal, on_at_start);
195 }
196
CheckNotAtStart(int cp_offset, Label* on_not_at_start)197 void RegExpMacroAssemblerX64::CheckNotAtStart(int cp_offset,
198 Label* on_not_at_start) {
199 __ leaq(rax, Operand(rdi, -char_size() + cp_offset * char_size()));
200 __ cmpq(rax, Operand(rbp, kStringStartMinusOne));
201 BranchOrBacktrack(not_equal, on_not_at_start);
202 }
203
CheckCharacterLT(base::uc16 limit, Label* on_less)204 void RegExpMacroAssemblerX64::CheckCharacterLT(base::uc16 limit,
205 Label* on_less) {
206 __ cmpl(current_character(), Immediate(limit));
207 BranchOrBacktrack(less, on_less);
208 }
209
CheckGreedyLoop(Label* on_equal)210 void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
211 Label fallthrough;
212 __ cmpl(rdi, Operand(backtrack_stackpointer(), 0));
213 __ j(not_equal, &fallthrough);
214 Drop();
215 BranchOrBacktrack(no_condition, on_equal);
216 __ bind(&fallthrough);
217 }
218
219 // Push (pop) caller-saved registers used by irregexp.
PushCallerSavedRegisters()220 void RegExpMacroAssemblerX64::PushCallerSavedRegisters() {
221 #ifndef V8_TARGET_OS_WIN
222 // Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI.
223 __ pushq(rsi);
224 __ pushq(rdi);
225 #endif
226 __ pushq(rcx);
227 }
228
PopCallerSavedRegisters()229 void RegExpMacroAssemblerX64::PopCallerSavedRegisters() {
230 __ popq(rcx);
231 #ifndef V8_TARGET_OS_WIN
232 __ popq(rdi);
233 __ popq(rsi);
234 #endif
235 }
236
CheckNotBackReferenceIgnoreCase( int start_reg, bool read_backward, bool unicode, Label* on_no_match)237 void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
238 int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
239 Label fallthrough;
240 ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
241 ReadPositionFromRegister(rbx, start_reg + 1); // Offset of end of capture
242 __ subq(rbx, rdx); // Length of capture.
243
244 // -----------------------
245 // rdx = Start offset of capture.
246 // rbx = Length of capture
247
248 // At this point, the capture registers are either both set or both cleared.
249 // If the capture length is zero, then the capture is either empty or cleared.
250 // Fall through in both cases.
251 __ j(equal, &fallthrough);
252
253 // -----------------------
254 // rdx - Start of capture
255 // rbx - length of capture
256 // Check that there are sufficient characters left in the input.
257 if (read_backward) {
258 __ movl(rax, Operand(rbp, kStringStartMinusOne));
259 __ addl(rax, rbx);
260 __ cmpl(rdi, rax);
261 BranchOrBacktrack(less_equal, on_no_match);
262 } else {
263 __ movl(rax, rdi);
264 __ addl(rax, rbx);
265 BranchOrBacktrack(greater, on_no_match);
266 }
267
268 if (mode_ == LATIN1) {
269 Label loop_increment;
270 if (on_no_match == nullptr) {
271 on_no_match = &backtrack_label_;
272 }
273
274 __ leaq(r9, Operand(rsi, rdx, times_1, 0));
275 __ leaq(r11, Operand(rsi, rdi, times_1, 0));
276 if (read_backward) {
277 __ subq(r11, rbx); // Offset by length when matching backwards.
278 }
279 __ addq(rbx, r9); // End of capture
280 // ---------------------
281 // r11 - current input character address
282 // r9 - current capture character address
283 // rbx - end of capture
284
285 Label loop;
286 __ bind(&loop);
287 __ movzxbl(rdx, Operand(r9, 0));
288 __ movzxbl(rax, Operand(r11, 0));
289 // al - input character
290 // dl - capture character
291 __ cmpb(rax, rdx);
292 __ j(equal, &loop_increment);
293
294 // Mismatch, try case-insensitive match (converting letters to lower-case).
295 // I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's
296 // a match.
297 __ orq(rax, Immediate(0x20)); // Convert match character to lower-case.
298 __ orq(rdx, Immediate(0x20)); // Convert capture character to lower-case.
299 __ cmpb(rax, rdx);
300 __ j(not_equal, on_no_match); // Definitely not equal.
301 __ subb(rax, Immediate('a'));
302 __ cmpb(rax, Immediate('z' - 'a'));
303 __ j(below_equal, &loop_increment); // In range 'a'-'z'.
304 // Latin-1: Check for values in range [224,254] but not 247.
305 __ subb(rax, Immediate(224 - 'a'));
306 __ cmpb(rax, Immediate(254 - 224));
307 __ j(above, on_no_match); // Weren't Latin-1 letters.
308 __ cmpb(rax, Immediate(247 - 224)); // Check for 247.
309 __ j(equal, on_no_match);
310 __ bind(&loop_increment);
311 // Increment pointers into match and capture strings.
312 __ addq(r11, Immediate(1));
313 __ addq(r9, Immediate(1));
314 // Compare to end of capture, and loop if not done.
315 __ cmpq(r9, rbx);
316 __ j(below, &loop);
317
318 // Compute new value of character position after the matched part.
319 __ movq(rdi, r11);
320 __ subq(rdi, rsi);
321 if (read_backward) {
322 // Subtract match length if we matched backward.
323 __ addq(rdi, register_location(start_reg));
324 __ subq(rdi, register_location(start_reg + 1));
325 }
326 } else {
327 DCHECK(mode_ == UC16);
328 PushCallerSavedRegisters();
329
330 static const int num_arguments = 4;
331 __ PrepareCallCFunction(num_arguments);
332
333 // Put arguments into parameter registers. Parameters are
334 // Address byte_offset1 - Address captured substring's start.
335 // Address byte_offset2 - Address of current character position.
336 // size_t byte_length - length of capture in bytes(!)
337 // Isolate* isolate.
338 #ifdef V8_TARGET_OS_WIN
339 DCHECK(rcx == arg_reg_1);
340 DCHECK(rdx == arg_reg_2);
341 // Compute and set byte_offset1 (start of capture).
342 __ leaq(rcx, Operand(rsi, rdx, times_1, 0));
343 // Set byte_offset2.
344 __ leaq(rdx, Operand(rsi, rdi, times_1, 0));
345 if (read_backward) {
346 __ subq(rdx, rbx);
347 }
348 #else // AMD64 calling convention
349 DCHECK(rdi == arg_reg_1);
350 DCHECK(rsi == arg_reg_2);
351 // Compute byte_offset2 (current position = rsi+rdi).
352 __ leaq(rax, Operand(rsi, rdi, times_1, 0));
353 // Compute and set byte_offset1 (start of capture).
354 __ leaq(rdi, Operand(rsi, rdx, times_1, 0));
355 // Set byte_offset2.
356 __ movq(rsi, rax);
357 if (read_backward) {
358 __ subq(rsi, rbx);
359 }
360 #endif // V8_TARGET_OS_WIN
361
362 // Set byte_length.
363 __ movq(arg_reg_3, rbx);
364 // Isolate.
365 __ LoadAddress(arg_reg_4, ExternalReference::isolate_address(isolate()));
366
367 {
368 AllowExternalCallThatCantCauseGC scope(&masm_);
369 ExternalReference compare =
370 unicode
371 ? ExternalReference::re_case_insensitive_compare_unicode()
372 : ExternalReference::re_case_insensitive_compare_non_unicode();
373 __ CallCFunction(compare, num_arguments);
374 }
375
376 // Restore original values before reacting on result value.
377 __ Move(code_object_pointer(), masm_.CodeObject());
378 PopCallerSavedRegisters();
379
380 // Check if function returned non-zero for success or zero for failure.
381 __ testq(rax, rax);
382 BranchOrBacktrack(zero, on_no_match);
383 // On success, advance position by length of capture.
384 // Requires that rbx is callee save (true for both Win64 and AMD64 ABIs).
385 if (read_backward) {
386 __ subq(rdi, rbx);
387 } else {
388 __ addq(rdi, rbx);
389 }
390 }
391 __ bind(&fallthrough);
392 }
393
CheckNotBackReference(int start_reg, bool read_backward, Label* on_no_match)394 void RegExpMacroAssemblerX64::CheckNotBackReference(int start_reg,
395 bool read_backward,
396 Label* on_no_match) {
397 Label fallthrough;
398
399 // Find length of back-referenced capture.
400 ReadPositionFromRegister(rdx, start_reg); // Offset of start of capture
401 ReadPositionFromRegister(rax, start_reg + 1); // Offset of end of capture
402 __ subq(rax, rdx); // Length to check.
403
404 // At this point, the capture registers are either both set or both cleared.
405 // If the capture length is zero, then the capture is either empty or cleared.
406 // Fall through in both cases.
407 __ j(equal, &fallthrough);
408
409 // -----------------------
410 // rdx - Start of capture
411 // rax - length of capture
412 // Check that there are sufficient characters left in the input.
413 if (read_backward) {
414 __ movl(rbx, Operand(rbp, kStringStartMinusOne));
415 __ addl(rbx, rax);
416 __ cmpl(rdi, rbx);
417 BranchOrBacktrack(less_equal, on_no_match);
418 } else {
419 __ movl(rbx, rdi);
420 __ addl(rbx, rax);
421 BranchOrBacktrack(greater, on_no_match);
422 }
423
424 // Compute pointers to match string and capture string
425 __ leaq(rbx, Operand(rsi, rdi, times_1, 0)); // Start of match.
426 if (read_backward) {
427 __ subq(rbx, rax); // Offset by length when matching backwards.
428 }
429 __ addq(rdx, rsi); // Start of capture.
430 __ leaq(r9, Operand(rdx, rax, times_1, 0)); // End of capture
431
432 // -----------------------
433 // rbx - current capture character address.
434 // rbx - current input character address .
435 // r9 - end of input to match (capture length after rbx).
436
437 Label loop;
438 __ bind(&loop);
439 if (mode_ == LATIN1) {
440 __ movzxbl(rax, Operand(rdx, 0));
441 __ cmpb(rax, Operand(rbx, 0));
442 } else {
443 DCHECK(mode_ == UC16);
444 __ movzxwl(rax, Operand(rdx, 0));
445 __ cmpw(rax, Operand(rbx, 0));
446 }
447 BranchOrBacktrack(not_equal, on_no_match);
448 // Increment pointers into capture and match string.
449 __ addq(rbx, Immediate(char_size()));
450 __ addq(rdx, Immediate(char_size()));
451 // Check if we have reached end of match area.
452 __ cmpq(rdx, r9);
453 __ j(below, &loop);
454
455 // Success.
456 // Set current character position to position after match.
457 __ movq(rdi, rbx);
458 __ subq(rdi, rsi);
459 if (read_backward) {
460 // Subtract match length if we matched backward.
461 __ addq(rdi, register_location(start_reg));
462 __ subq(rdi, register_location(start_reg + 1));
463 }
464
465 __ bind(&fallthrough);
466 }
467
468
CheckNotCharacter(uint32_t c, Label* on_not_equal)469 void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c,
470 Label* on_not_equal) {
471 __ cmpl(current_character(), Immediate(c));
472 BranchOrBacktrack(not_equal, on_not_equal);
473 }
474
475
CheckCharacterAfterAnd(uint32_t c, uint32_t mask, Label* on_equal)476 void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c,
477 uint32_t mask,
478 Label* on_equal) {
479 if (c == 0) {
480 __ testl(current_character(), Immediate(mask));
481 } else {
482 __ Move(rax, mask);
483 __ andq(rax, current_character());
484 __ cmpl(rax, Immediate(c));
485 }
486 BranchOrBacktrack(equal, on_equal);
487 }
488
489
CheckNotCharacterAfterAnd(uint32_t c, uint32_t mask, Label* on_not_equal)490 void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c,
491 uint32_t mask,
492 Label* on_not_equal) {
493 if (c == 0) {
494 __ testl(current_character(), Immediate(mask));
495 } else {
496 __ Move(rax, mask);
497 __ andq(rax, current_character());
498 __ cmpl(rax, Immediate(c));
499 }
500 BranchOrBacktrack(not_equal, on_not_equal);
501 }
502
CheckNotCharacterAfterMinusAnd( base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal)503 void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
504 base::uc16 c, base::uc16 minus, base::uc16 mask, Label* on_not_equal) {
505 DCHECK_GT(String::kMaxUtf16CodeUnit, minus);
506 __ leal(rax, Operand(current_character(), -minus));
507 __ andl(rax, Immediate(mask));
508 __ cmpl(rax, Immediate(c));
509 BranchOrBacktrack(not_equal, on_not_equal);
510 }
511
CheckCharacterInRange(base::uc16 from, base::uc16 to, Label* on_in_range)512 void RegExpMacroAssemblerX64::CheckCharacterInRange(base::uc16 from,
513 base::uc16 to,
514 Label* on_in_range) {
515 __ leal(rax, Operand(current_character(), -from));
516 __ cmpl(rax, Immediate(to - from));
517 BranchOrBacktrack(below_equal, on_in_range);
518 }
519
CheckCharacterNotInRange(base::uc16 from, base::uc16 to, Label* on_not_in_range)520 void RegExpMacroAssemblerX64::CheckCharacterNotInRange(base::uc16 from,
521 base::uc16 to,
522 Label* on_not_in_range) {
523 __ leal(rax, Operand(current_character(), -from));
524 __ cmpl(rax, Immediate(to - from));
525 BranchOrBacktrack(above, on_not_in_range);
526 }
527
CallIsCharacterInRangeArray( const ZoneList<CharacterRange>* ranges)528 void RegExpMacroAssemblerX64::CallIsCharacterInRangeArray(
529 const ZoneList<CharacterRange>* ranges) {
530 PushCallerSavedRegisters();
531
532 static const int kNumArguments = 3;
533 __ PrepareCallCFunction(kNumArguments);
534
535 __ Move(arg_reg_1, current_character());
536 __ Move(arg_reg_2, GetOrAddRangeArray(ranges));
537 __ LoadAddress(arg_reg_3, ExternalReference::isolate_address(isolate()));
538
539 {
540 // We have a frame (set up in GetCode), but the assembler doesn't know.
541 FrameScope scope(&masm_, StackFrame::MANUAL);
542 __ CallCFunction(ExternalReference::re_is_character_in_range_array(),
543 kNumArguments);
544 }
545
546 PopCallerSavedRegisters();
547 __ Move(code_object_pointer(), masm_.CodeObject());
548 }
549
CheckCharacterInRangeArray( const ZoneList<CharacterRange>* ranges, Label* on_in_range)550 bool RegExpMacroAssemblerX64::CheckCharacterInRangeArray(
551 const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
552 CallIsCharacterInRangeArray(ranges);
553 __ testq(rax, rax);
554 BranchOrBacktrack(not_zero, on_in_range);
555 return true;
556 }
557
CheckCharacterNotInRangeArray( const ZoneList<CharacterRange>* ranges, Label* on_not_in_range)558 bool RegExpMacroAssemblerX64::CheckCharacterNotInRangeArray(
559 const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
560 CallIsCharacterInRangeArray(ranges);
561 __ testq(rax, rax);
562 BranchOrBacktrack(zero, on_not_in_range);
563 return true;
564 }
565
CheckBitInTable( Handle<ByteArray> table, Label* on_bit_set)566 void RegExpMacroAssemblerX64::CheckBitInTable(
567 Handle<ByteArray> table,
568 Label* on_bit_set) {
569 __ Move(rax, table);
570 Register index = current_character();
571 if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
572 __ movq(rbx, current_character());
573 __ andq(rbx, Immediate(kTableMask));
574 index = rbx;
575 }
576 __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize),
577 Immediate(0));
578 BranchOrBacktrack(not_equal, on_bit_set);
579 }
580
CheckSpecialCharacterClass( StandardCharacterSet type, Label* on_no_match)581 bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(
582 StandardCharacterSet type, Label* on_no_match) {
583 // Range checks (c in min..max) are generally implemented by an unsigned
584 // (c - min) <= (max - min) check, using the sequence:
585 // leal(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))
586 // cmpl(rax, Immediate(max - min))
587 // TODO(jgruber): No custom implementation (yet): s(UC16), S(UC16).
588 switch (type) {
589 case StandardCharacterSet::kWhitespace:
590 // Match space-characters.
591 if (mode_ == LATIN1) {
592 // One byte space characters are '\t'..'\r', ' ' and \u00a0.
593 Label success;
594 __ cmpl(current_character(), Immediate(' '));
595 __ j(equal, &success, Label::kNear);
596 // Check range 0x09..0x0D.
597 __ leal(rax, Operand(current_character(), -'\t'));
598 __ cmpl(rax, Immediate('\r' - '\t'));
599 __ j(below_equal, &success, Label::kNear);
600 // \u00a0 (NBSP).
601 __ cmpl(rax, Immediate(0x00A0 - '\t'));
602 BranchOrBacktrack(not_equal, on_no_match);
603 __ bind(&success);
604 return true;
605 }
606 return false;
607 case StandardCharacterSet::kNotWhitespace:
608 // The emitted code for generic character classes is good enough.
609 return false;
610 case StandardCharacterSet::kDigit:
611 // Match ASCII digits ('0'..'9').
612 __ leal(rax, Operand(current_character(), -'0'));
613 __ cmpl(rax, Immediate('9' - '0'));
614 BranchOrBacktrack(above, on_no_match);
615 return true;
616 case StandardCharacterSet::kNotDigit:
617 // Match non ASCII-digits.
618 __ leal(rax, Operand(current_character(), -'0'));
619 __ cmpl(rax, Immediate('9' - '0'));
620 BranchOrBacktrack(below_equal, on_no_match);
621 return true;
622 case StandardCharacterSet::kNotLineTerminator: {
623 // Match non-newlines (not 0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
624 __ movl(rax, current_character());
625 __ xorl(rax, Immediate(0x01));
626 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
627 __ subl(rax, Immediate(0x0B));
628 __ cmpl(rax, Immediate(0x0C - 0x0B));
629 BranchOrBacktrack(below_equal, on_no_match);
630 if (mode_ == UC16) {
631 // Compare original value to 0x2028 and 0x2029, using the already
632 // computed (current_char ^ 0x01 - 0x0B). I.e., check for
633 // 0x201D (0x2028 - 0x0B) or 0x201E.
634 __ subl(rax, Immediate(0x2028 - 0x0B));
635 __ cmpl(rax, Immediate(0x2029 - 0x2028));
636 BranchOrBacktrack(below_equal, on_no_match);
637 }
638 return true;
639 }
640 case StandardCharacterSet::kLineTerminator: {
641 // Match newlines (0x0A('\n'), 0x0D('\r'), 0x2028 and 0x2029).
642 __ movl(rax, current_character());
643 __ xorl(rax, Immediate(0x01));
644 // See if current character is '\n'^1 or '\r'^1, i.e., 0x0B or 0x0C.
645 __ subl(rax, Immediate(0x0B));
646 __ cmpl(rax, Immediate(0x0C - 0x0B));
647 if (mode_ == LATIN1) {
648 BranchOrBacktrack(above, on_no_match);
649 } else {
650 Label done;
651 BranchOrBacktrack(below_equal, &done);
652 // Compare original value to 0x2028 and 0x2029, using the already
653 // computed (current_char ^ 0x01 - 0x0B). I.e., check for
654 // 0x201D (0x2028 - 0x0B) or 0x201E.
655 __ subl(rax, Immediate(0x2028 - 0x0B));
656 __ cmpl(rax, Immediate(0x2029 - 0x2028));
657 BranchOrBacktrack(above, on_no_match);
658 __ bind(&done);
659 }
660 return true;
661 }
662 case StandardCharacterSet::kWord: {
663 if (mode_ != LATIN1) {
664 // Table is 256 entries, so all Latin1 characters can be tested.
665 __ cmpl(current_character(), Immediate('z'));
666 BranchOrBacktrack(above, on_no_match);
667 }
668 __ Move(rbx, ExternalReference::re_word_character_map());
669 DCHECK_EQ(0,
670 word_character_map[0]); // Character '\0' is not a word char.
671 __ testb(Operand(rbx, current_character(), times_1, 0),
672 current_character());
673 BranchOrBacktrack(zero, on_no_match);
674 return true;
675 }
676 case StandardCharacterSet::kNotWord: {
677 Label done;
678 if (mode_ != LATIN1) {
679 // Table is 256 entries, so all Latin1 characters can be tested.
680 __ cmpl(current_character(), Immediate('z'));
681 __ j(above, &done);
682 }
683 __ Move(rbx, ExternalReference::re_word_character_map());
684 DCHECK_EQ(0,
685 word_character_map[0]); // Character '\0' is not a word char.
686 __ testb(Operand(rbx, current_character(), times_1, 0),
687 current_character());
688 BranchOrBacktrack(not_zero, on_no_match);
689 if (mode_ != LATIN1) {
690 __ bind(&done);
691 }
692 return true;
693 }
694
695 case StandardCharacterSet::kEverything:
696 // Match any character.
697 return true;
698 }
699 }
700
Fail()701 void RegExpMacroAssemblerX64::Fail() {
702 STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
703 if (!global()) {
704 __ Move(rax, FAILURE);
705 }
706 __ jmp(&exit_label_);
707 }
708
LoadRegExpStackPointerFromMemory(Register dst)709 void RegExpMacroAssemblerX64::LoadRegExpStackPointerFromMemory(Register dst) {
710 ExternalReference ref =
711 ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
712 __ movq(dst, __ ExternalReferenceAsOperand(ref, dst));
713 }
714
StoreRegExpStackPointerToMemory( Register src, Register scratch)715 void RegExpMacroAssemblerX64::StoreRegExpStackPointerToMemory(
716 Register src, Register scratch) {
717 ExternalReference ref =
718 ExternalReference::address_of_regexp_stack_stack_pointer(isolate());
719 __ movq(__ ExternalReferenceAsOperand(ref, scratch), src);
720 }
721
PushRegExpBasePointer(Register stack_pointer, Register scratch)722 void RegExpMacroAssemblerX64::PushRegExpBasePointer(Register stack_pointer,
723 Register scratch) {
724 ExternalReference ref =
725 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
726 __ movq(scratch, __ ExternalReferenceAsOperand(ref, scratch));
727 __ subq(scratch, stack_pointer);
728 __ movq(Operand(rbp, kRegExpStackBasePointer), scratch);
729 }
730
PopRegExpBasePointer(Register stack_pointer_out, Register scratch)731 void RegExpMacroAssemblerX64::PopRegExpBasePointer(Register stack_pointer_out,
732 Register scratch) {
733 ExternalReference ref =
734 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
735 __ movq(scratch, Operand(rbp, kRegExpStackBasePointer));
736 __ movq(stack_pointer_out,
737 __ ExternalReferenceAsOperand(ref, stack_pointer_out));
738 __ subq(stack_pointer_out, scratch);
739 StoreRegExpStackPointerToMemory(stack_pointer_out, scratch);
740 }
741
GetCode(Handle<String> source)742 Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
743 Label return_rax;
744 // Finalize code - write the entry point code now we know how many registers
745 // we need.
746 __ bind(&entry_label_);
747
748 // Tell the system that we have a stack frame. Because the type is MANUAL, no
749 // physical frame is generated.
750 FrameScope scope(&masm_, StackFrame::MANUAL);
751
752 // Actually emit code to start a new stack frame.
753 __ pushq(rbp);
754 __ movq(rbp, rsp);
755
756 // Save parameters and callee-save registers. Order here should correspond
757 // to order of kBackup_ebx etc.
758 #ifdef V8_TARGET_OS_WIN
759 // MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots.
760 // Store register parameters in pre-allocated stack slots.
761 __ movq(Operand(rbp, kInputString), arg_reg_1);
762 __ movq(Operand(rbp, kStartIndex), arg_reg_2); // Passed as int32 in edx.
763 __ movq(Operand(rbp, kInputStart), arg_reg_3);
764 __ movq(Operand(rbp, kInputEnd), arg_reg_4);
765
766 STATIC_ASSERT(kNumCalleeSaveRegisters == 3);
767 __ pushq(rsi);
768 __ pushq(rdi);
769 __ pushq(rbx);
770 #else
771 // GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack).
772 // Push register parameters on stack for reference.
773 DCHECK_EQ(kInputString, -1 * kSystemPointerSize);
774 DCHECK_EQ(kStartIndex, -2 * kSystemPointerSize);
775 DCHECK_EQ(kInputStart, -3 * kSystemPointerSize);
776 DCHECK_EQ(kInputEnd, -4 * kSystemPointerSize);
777 DCHECK_EQ(kRegisterOutput, -5 * kSystemPointerSize);
778 DCHECK_EQ(kNumOutputRegisters, -6 * kSystemPointerSize);
779 __ pushq(arg_reg_1);
780 __ pushq(arg_reg_2);
781 __ pushq(arg_reg_3);
782 __ pushq(arg_reg_4);
783 __ pushq(r8);
784 __ pushq(r9);
785
786 STATIC_ASSERT(kNumCalleeSaveRegisters == 1);
787 __ pushq(rbx);
788 #endif
789
790 STATIC_ASSERT(kSuccessfulCaptures ==
791 kLastCalleeSaveRegister - kSystemPointerSize);
792 __ Push(Immediate(0)); // Number of successful matches in a global regexp.
793 STATIC_ASSERT(kStringStartMinusOne ==
794 kSuccessfulCaptures - kSystemPointerSize);
795 __ Push(Immediate(0)); // Make room for "string start - 1" constant.
796 STATIC_ASSERT(kBacktrackCount == kStringStartMinusOne - kSystemPointerSize);
797 __ Push(Immediate(0)); // The backtrack counter.
798 STATIC_ASSERT(kRegExpStackBasePointer ==
799 kBacktrackCount - kSystemPointerSize);
800 __ Push(Immediate(0)); // The regexp stack base ptr.
801
802 // Initialize backtrack stack pointer. It must not be clobbered from here on.
803 // Note the backtrack_stackpointer is *not* callee-saved.
804 STATIC_ASSERT(backtrack_stackpointer() == rcx);
805 LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
806
807 // Store the regexp base pointer - we'll later restore it / write it to
808 // memory when returning from this irregexp code object.
809 PushRegExpBasePointer(backtrack_stackpointer(), kScratchRegister);
810
811 {
812 // Check if we have space on the stack for registers.
813 Label stack_limit_hit, stack_ok;
814
815 ExternalReference stack_limit =
816 ExternalReference::address_of_jslimit(isolate());
817 __ movq(r9, rsp);
818 __ Move(kScratchRegister, stack_limit);
819 __ subq(r9, Operand(kScratchRegister, 0));
820 // Handle it if the stack pointer is already below the stack limit.
821 __ j(below_equal, &stack_limit_hit);
822 // Check if there is room for the variable number of registers above
823 // the stack limit.
824 __ cmpq(r9, Immediate(num_registers_ * kSystemPointerSize));
825 __ j(above_equal, &stack_ok);
826 // Exit with OutOfMemory exception. There is not enough space on the stack
827 // for our working registers.
828 __ Move(rax, EXCEPTION);
829 __ jmp(&return_rax);
830
831 __ bind(&stack_limit_hit);
832 __ Move(code_object_pointer(), masm_.CodeObject());
833 __ pushq(backtrack_stackpointer());
834 CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp.
835 __ popq(backtrack_stackpointer());
836 __ testq(rax, rax);
837 // If returned value is non-zero, we exit with the returned value as result.
838 __ j(not_zero, &return_rax);
839
840 __ bind(&stack_ok);
841 }
842
843 // Allocate space on stack for registers.
844 __ AllocateStackSpace(num_registers_ * kSystemPointerSize);
845 // Load string length.
846 __ movq(rsi, Operand(rbp, kInputEnd));
847 // Load input position.
848 __ movq(rdi, Operand(rbp, kInputStart));
849 // Set up rdi to be negative offset from string end.
850 __ subq(rdi, rsi);
851 // Set rax to address of char before start of the string
852 // (effectively string position -1).
853 __ movq(rbx, Operand(rbp, kStartIndex));
854 __ negq(rbx);
855 if (mode_ == UC16) {
856 __ leaq(rax, Operand(rdi, rbx, times_2, -char_size()));
857 } else {
858 __ leaq(rax, Operand(rdi, rbx, times_1, -char_size()));
859 }
860 // Store this value in a local variable, for use when clearing
861 // position registers.
862 __ movq(Operand(rbp, kStringStartMinusOne), rax);
863
864 // Initialize code object pointer.
865 __ Move(code_object_pointer(), masm_.CodeObject());
866
867 Label load_char_start_regexp; // Execution restarts here for global regexps.
868 {
869 Label start_regexp;
870
871 // Load newline if index is at start, previous character otherwise.
872 __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
873 __ j(not_equal, &load_char_start_regexp, Label::kNear);
874 __ Move(current_character(), '\n');
875 __ jmp(&start_regexp, Label::kNear);
876
877 // Global regexp restarts matching here.
878 __ bind(&load_char_start_regexp);
879 // Load previous char as initial value of current character register.
880 LoadCurrentCharacterUnchecked(-1, 1);
881
882 __ bind(&start_regexp);
883 }
884
885 // Initialize on-stack registers.
886 if (num_saved_registers_ > 0) {
887 // Fill saved registers with initial value = start offset - 1
888 // Fill in stack push order, to avoid accessing across an unwritten
889 // page (a problem on Windows).
890 if (num_saved_registers_ > 8) {
891 __ Move(r9, kRegisterZero);
892 Label init_loop;
893 __ bind(&init_loop);
894 __ movq(Operand(rbp, r9, times_1, 0), rax);
895 __ subq(r9, Immediate(kSystemPointerSize));
896 __ cmpq(r9, Immediate(kRegisterZero -
897 num_saved_registers_ * kSystemPointerSize));
898 __ j(greater, &init_loop);
899 } else { // Unroll the loop.
900 for (int i = 0; i < num_saved_registers_; i++) {
901 __ movq(register_location(i), rax);
902 }
903 }
904 }
905
906 __ jmp(&start_label_);
907
908 // Exit code:
909 if (success_label_.is_linked()) {
910 // Save captures when successful.
911 __ bind(&success_label_);
912 if (num_saved_registers_ > 0) {
913 // copy captures to output
914 __ movq(rdx, Operand(rbp, kStartIndex));
915 __ movq(rbx, Operand(rbp, kRegisterOutput));
916 __ movq(rcx, Operand(rbp, kInputEnd));
917 __ subq(rcx, Operand(rbp, kInputStart));
918 if (mode_ == UC16) {
919 __ leaq(rcx, Operand(rcx, rdx, times_2, 0));
920 } else {
921 __ addq(rcx, rdx);
922 }
923 for (int i = 0; i < num_saved_registers_; i++) {
924 __ movq(rax, register_location(i));
925 if (i == 0 && global_with_zero_length_check()) {
926 // Keep capture start in rdx for the zero-length check later.
927 __ movq(rdx, rax);
928 }
929 __ addq(rax, rcx); // Convert to index from start, not end.
930 if (mode_ == UC16) {
931 __ sarq(rax, Immediate(1)); // Convert byte index to character index.
932 }
933 __ movl(Operand(rbx, i * kIntSize), rax);
934 }
935 }
936
937 if (global()) {
938 // Restart matching if the regular expression is flagged as global.
939 // Increment success counter.
940 __ incq(Operand(rbp, kSuccessfulCaptures));
941 // Capture results have been stored, so the number of remaining global
942 // output registers is reduced by the number of stored captures.
943 __ movsxlq(rcx, Operand(rbp, kNumOutputRegisters));
944 __ subq(rcx, Immediate(num_saved_registers_));
945 // Check whether we have enough room for another set of capture results.
946 __ cmpq(rcx, Immediate(num_saved_registers_));
947 __ j(less, &exit_label_);
948
949 __ movq(Operand(rbp, kNumOutputRegisters), rcx);
950 // Advance the location for output.
951 __ addq(Operand(rbp, kRegisterOutput),
952 Immediate(num_saved_registers_ * kIntSize));
953
954 // Prepare rax to initialize registers with its value in the next run.
955 __ movq(rax, Operand(rbp, kStringStartMinusOne));
956
957 // Restore the original regexp stack pointer value (effectively, pop the
958 // stored base pointer).
959 PopRegExpBasePointer(backtrack_stackpointer(), kScratchRegister);
960
961 if (global_with_zero_length_check()) {
962 // Special case for zero-length matches.
963 // rdx: capture start index
964 __ cmpq(rdi, rdx);
965 // Not a zero-length match, restart.
966 __ j(not_equal, &load_char_start_regexp);
967 // rdi (offset from the end) is zero if we already reached the end.
968 __ testq(rdi, rdi);
969 __ j(zero, &exit_label_, Label::kNear);
970 // Advance current position after a zero-length match.
971 Label advance;
972 __ bind(&advance);
973 if (mode_ == UC16) {
974 __ addq(rdi, Immediate(2));
975 } else {
976 __ incq(rdi);
977 }
978 if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
979 }
980
981 __ jmp(&load_char_start_regexp);
982 } else {
983 __ Move(rax, SUCCESS);
984 }
985 }
986
987 __ bind(&exit_label_);
988 if (global()) {
989 // Return the number of successful captures.
990 __ movq(rax, Operand(rbp, kSuccessfulCaptures));
991 }
992
993 __ bind(&return_rax);
994 // Restore the original regexp stack pointer value (effectively, pop the
995 // stored base pointer).
996 PopRegExpBasePointer(backtrack_stackpointer(), kScratchRegister);
997
998 #ifdef V8_TARGET_OS_WIN
999 // Restore callee save registers.
1000 __ leaq(rsp, Operand(rbp, kLastCalleeSaveRegister));
1001 STATIC_ASSERT(kNumCalleeSaveRegisters == 3);
1002 __ popq(rbx);
1003 __ popq(rdi);
1004 __ popq(rsi);
1005 // Stack now at rbp.
1006 #else
1007 // Restore callee save register.
1008 STATIC_ASSERT(kNumCalleeSaveRegisters == 1);
1009 __ movq(rbx, Operand(rbp, kBackup_rbx));
1010 // Skip rsp to rbp.
1011 __ movq(rsp, rbp);
1012 #endif
1013
1014 // Exit function frame, restore previous one.
1015 __ popq(rbp);
1016 __ ret(0);
1017
1018 // Backtrack code (branch target for conditional backtracks).
1019 if (backtrack_label_.is_linked()) {
1020 __ bind(&backtrack_label_);
1021 Backtrack();
1022 }
1023
1024 Label exit_with_exception;
1025
1026 // Preempt-code
1027 if (check_preempt_label_.is_linked()) {
1028 SafeCallTarget(&check_preempt_label_);
1029
1030 __ pushq(rdi);
1031
1032 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), kScratchRegister);
1033
1034 CallCheckStackGuardState();
1035 __ testq(rax, rax);
1036 // If returning non-zero, we should end execution with the given
1037 // result as return value.
1038 __ j(not_zero, &return_rax);
1039
1040 // Restore registers.
1041 __ Move(code_object_pointer(), masm_.CodeObject());
1042 __ popq(rdi);
1043
1044 LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
1045
1046 // String might have moved: Reload esi from frame.
1047 __ movq(rsi, Operand(rbp, kInputEnd));
1048 SafeReturn();
1049 }
1050
1051 // Backtrack stack overflow code.
1052 if (stack_overflow_label_.is_linked()) {
1053 SafeCallTarget(&stack_overflow_label_);
1054 // Reached if the backtrack-stack limit has been hit.
1055
1056 PushCallerSavedRegisters();
1057
1058 // Call GrowStack(isolate).
1059
1060 StoreRegExpStackPointerToMemory(backtrack_stackpointer(), kScratchRegister);
1061
1062 static constexpr int kNumArguments = 1;
1063 __ PrepareCallCFunction(kNumArguments);
1064 __ LoadAddress(arg_reg_1, ExternalReference::isolate_address(isolate()));
1065
1066 ExternalReference grow_stack = ExternalReference::re_grow_stack();
1067 __ CallCFunction(grow_stack, kNumArguments);
1068 // If nullptr is returned, we have failed to grow the stack, and must exit
1069 // with a stack-overflow exception.
1070 __ testq(rax, rax);
1071 __ j(equal, &exit_with_exception);
1072 PopCallerSavedRegisters();
1073 // Otherwise use return value as new stack pointer.
1074 __ movq(backtrack_stackpointer(), rax);
1075 // Restore saved registers and continue.
1076 __ Move(code_object_pointer(), masm_.CodeObject());
1077 SafeReturn();
1078 }
1079
1080 if (exit_with_exception.is_linked()) {
1081 // If any of the code above needed to exit with an exception.
1082 __ bind(&exit_with_exception);
1083 // Exit with Result EXCEPTION(-1) to signal thrown exception.
1084 __ Move(rax, EXCEPTION);
1085 __ jmp(&return_rax);
1086 }
1087
1088 if (fallback_label_.is_linked()) {
1089 __ bind(&fallback_label_);
1090 __ Move(rax, FALLBACK_TO_EXPERIMENTAL);
1091 __ jmp(&return_rax);
1092 }
1093
1094 FixupCodeRelativePositions();
1095
1096 CodeDesc code_desc;
1097 Isolate* isolate = this->isolate();
1098 masm_.GetCode(isolate, &code_desc);
1099 Handle<Code> code = Factory::CodeBuilder(isolate, code_desc, CodeKind::REGEXP)
1100 .set_self_reference(masm_.CodeObject())
1101 .Build();
1102 PROFILE(isolate,
1103 RegExpCodeCreateEvent(Handle<AbstractCode>::cast(code), source));
1104 return Handle<HeapObject>::cast(code);
1105 }
1106
1107
GoTo(Label* to)1108 void RegExpMacroAssemblerX64::GoTo(Label* to) {
1109 BranchOrBacktrack(no_condition, to);
1110 }
1111
1112
IfRegisterGE(int reg, int comparand, Label* if_ge)1113 void RegExpMacroAssemblerX64::IfRegisterGE(int reg,
1114 int comparand,
1115 Label* if_ge) {
1116 __ cmpq(register_location(reg), Immediate(comparand));
1117 BranchOrBacktrack(greater_equal, if_ge);
1118 }
1119
1120
IfRegisterLT(int reg, int comparand, Label* if_lt)1121 void RegExpMacroAssemblerX64::IfRegisterLT(int reg,
1122 int comparand,
1123 Label* if_lt) {
1124 __ cmpq(register_location(reg), Immediate(comparand));
1125 BranchOrBacktrack(less, if_lt);
1126 }
1127
1128
IfRegisterEqPos(int reg, Label* if_eq)1129 void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg,
1130 Label* if_eq) {
1131 __ cmpq(rdi, register_location(reg));
1132 BranchOrBacktrack(equal, if_eq);
1133 }
1134
1135
1136 RegExpMacroAssembler::IrregexpImplementation
Implementation()1137 RegExpMacroAssemblerX64::Implementation() {
1138 return kX64Implementation;
1139 }
1140
1141
PopCurrentPosition()1142 void RegExpMacroAssemblerX64::PopCurrentPosition() {
1143 Pop(rdi);
1144 }
1145
1146
PopRegister(int register_index)1147 void RegExpMacroAssemblerX64::PopRegister(int register_index) {
1148 Pop(rax);
1149 __ movq(register_location(register_index), rax);
1150 }
1151
1152
PushBacktrack(Label* label)1153 void RegExpMacroAssemblerX64::PushBacktrack(Label* label) {
1154 Push(label);
1155 CheckStackLimit();
1156 }
1157
1158
PushCurrentPosition()1159 void RegExpMacroAssemblerX64::PushCurrentPosition() {
1160 Push(rdi);
1161 }
1162
1163
PushRegister(int register_index, StackCheckFlag check_stack_limit)1164 void RegExpMacroAssemblerX64::PushRegister(int register_index,
1165 StackCheckFlag check_stack_limit) {
1166 __ movq(rax, register_location(register_index));
1167 Push(rax);
1168 if (check_stack_limit) CheckStackLimit();
1169 }
1170
ReadCurrentPositionFromRegister(int reg)1171 void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) {
1172 __ movq(rdi, register_location(reg));
1173 }
1174
1175
ReadPositionFromRegister(Register dst, int reg)1176 void RegExpMacroAssemblerX64::ReadPositionFromRegister(Register dst, int reg) {
1177 __ movq(dst, register_location(reg));
1178 }
1179
1180 // Preserves a position-independent representation of the stack pointer in reg:
1181 // reg = top - sp.
WriteStackPointerToRegister(int reg)1182 void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) {
1183 ExternalReference stack_top_address =
1184 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1185 __ movq(rax, __ ExternalReferenceAsOperand(stack_top_address, rax));
1186 __ subq(rax, backtrack_stackpointer());
1187 __ movq(register_location(reg), rax);
1188 }
1189
ReadStackPointerFromRegister(int reg)1190 void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) {
1191 ExternalReference stack_top_address =
1192 ExternalReference::address_of_regexp_stack_memory_top_address(isolate());
1193 __ movq(backtrack_stackpointer(),
1194 __ ExternalReferenceAsOperand(stack_top_address,
1195 backtrack_stackpointer()));
1196 __ subq(backtrack_stackpointer(), register_location(reg));
1197 }
1198
SetCurrentPositionFromEnd(int by)1199 void RegExpMacroAssemblerX64::SetCurrentPositionFromEnd(int by) {
1200 Label after_position;
1201 __ cmpq(rdi, Immediate(-by * char_size()));
1202 __ j(greater_equal, &after_position, Label::kNear);
1203 __ Move(rdi, -by * char_size());
1204 // On RegExp code entry (where this operation is used), the character before
1205 // the current position is expected to be already loaded.
1206 // We have advanced the position, so it's safe to read backwards.
1207 LoadCurrentCharacterUnchecked(-1, 1);
1208 __ bind(&after_position);
1209 }
1210
1211
SetRegister(int register_index, int to)1212 void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) {
1213 DCHECK(register_index >= num_saved_registers_); // Reserved for positions!
1214 __ movq(register_location(register_index), Immediate(to));
1215 }
1216
1217
Succeed()1218 bool RegExpMacroAssemblerX64::Succeed() {
1219 __ jmp(&success_label_);
1220 return global();
1221 }
1222
1223
WriteCurrentPositionToRegister(int reg, int cp_offset)1224 void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg,
1225 int cp_offset) {
1226 if (cp_offset == 0) {
1227 __ movq(register_location(reg), rdi);
1228 } else {
1229 __ leaq(rax, Operand(rdi, cp_offset * char_size()));
1230 __ movq(register_location(reg), rax);
1231 }
1232 }
1233
1234
ClearRegisters(int reg_from, int reg_to)1235 void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) {
1236 DCHECK(reg_from <= reg_to);
1237 __ movq(rax, Operand(rbp, kStringStartMinusOne));
1238 for (int reg = reg_from; reg <= reg_to; reg++) {
1239 __ movq(register_location(reg), rax);
1240 }
1241 }
1242
1243 // Private methods:
1244
CallCheckStackGuardState()1245 void RegExpMacroAssemblerX64::CallCheckStackGuardState() {
1246 // This function call preserves no register values. Caller should
1247 // store anything volatile in a C call or overwritten by this function.
1248 static const int num_arguments = 3;
1249 __ PrepareCallCFunction(num_arguments);
1250 #ifdef V8_TARGET_OS_WIN
1251 // Second argument: Code of self. (Do this before overwriting r8).
1252 __ movq(rdx, code_object_pointer());
1253 // Third argument: RegExp code frame pointer.
1254 __ movq(r8, rbp);
1255 // First argument: Next address on the stack (will be address of
1256 // return address).
1257 __ leaq(rcx, Operand(rsp, -kSystemPointerSize));
1258 #else
1259 // Third argument: RegExp code frame pointer.
1260 __ movq(rdx, rbp);
1261 // Second argument: Code of self.
1262 __ movq(rsi, code_object_pointer());
1263 // First argument: Next address on the stack (will be address of
1264 // return address).
1265 __ leaq(rdi, Operand(rsp, -kSystemPointerSize));
1266 #endif
1267 ExternalReference stack_check =
1268 ExternalReference::re_check_stack_guard_state();
1269 __ CallCFunction(stack_check, num_arguments);
1270 }
1271
1272
1273 // Helper function for reading a value out of a stack frame.
1274 template <typename T>
frame_entry(Address re_frame, int frame_offset)1275 static T& frame_entry(Address re_frame, int frame_offset) {
1276 return reinterpret_cast<T&>(Memory<int32_t>(re_frame + frame_offset));
1277 }
1278
1279
1280 template <typename T>
frame_entry_address(Address re_frame, int frame_offset)1281 static T* frame_entry_address(Address re_frame, int frame_offset) {
1282 return reinterpret_cast<T*>(re_frame + frame_offset);
1283 }
1284
CheckStackGuardState(Address* return_address, Address raw_code, Address re_frame)1285 int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
1286 Address raw_code,
1287 Address re_frame) {
1288 Code re_code = Code::cast(Object(raw_code));
1289 return NativeRegExpMacroAssembler::CheckStackGuardState(
1290 frame_entry<Isolate*>(re_frame, kIsolate),
1291 frame_entry<int>(re_frame, kStartIndex),
1292 static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
1293 return_address, re_code,
1294 frame_entry_address<Address>(re_frame, kInputString),
1295 frame_entry_address<const byte*>(re_frame, kInputStart),
1296 frame_entry_address<const byte*>(re_frame, kInputEnd));
1297 }
1298
1299
register_location(int register_index)1300 Operand RegExpMacroAssemblerX64::register_location(int register_index) {
1301 DCHECK(register_index < (1<<30));
1302 if (num_registers_ <= register_index) {
1303 num_registers_ = register_index + 1;
1304 }
1305 return Operand(rbp, kRegisterZero - register_index * kSystemPointerSize);
1306 }
1307
1308
1309 void RegExpMacroAssemblerX64::CheckPosition(int cp_offset,
1310 Label* on_outside_input) {
1311 if (cp_offset >= 0) {
1312 __ cmpl(rdi, Immediate(-cp_offset * char_size()));
1313 BranchOrBacktrack(greater_equal, on_outside_input);
1314 } else {
1315 __ leaq(rax, Operand(rdi, cp_offset * char_size()));
1316 __ cmpq(rax, Operand(rbp, kStringStartMinusOne));
1317 BranchOrBacktrack(less_equal, on_outside_input);
1318 }
1319 }
1320
1321
1322 void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition,
1323 Label* to) {
1324 if (condition < 0) { // No condition
1325 if (to == nullptr) {
1326 Backtrack();
1327 return;
1328 }
1329 __ jmp(to);
1330 return;
1331 }
1332 if (to == nullptr) {
1333 __ j(condition, &backtrack_label_);
1334 return;
1335 }
1336 __ j(condition, to);
1337 }
1338
1339
1340 void RegExpMacroAssemblerX64::SafeCall(Label* to) {
1341 __ call(to);
1342 }
1343
1344
1345 void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) {
1346 __ bind(label);
1347 __ subq(Operand(rsp, 0), code_object_pointer());
1348 }
1349
1350
1351 void RegExpMacroAssemblerX64::SafeReturn() {
1352 __ addq(Operand(rsp, 0), code_object_pointer());
1353 __ ret(0);
1354 }
1355
1356
1357 void RegExpMacroAssemblerX64::Push(Register source) {
1358 DCHECK(source != backtrack_stackpointer());
1359 // Notice: This updates flags, unlike normal Push.
1360 __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1361 __ movl(Operand(backtrack_stackpointer(), 0), source);
1362 }
1363
1364
1365 void RegExpMacroAssemblerX64::Push(Immediate value) {
1366 // Notice: This updates flags, unlike normal Push.
1367 __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1368 __ movl(Operand(backtrack_stackpointer(), 0), value);
1369 }
1370
1371
1372 void RegExpMacroAssemblerX64::FixupCodeRelativePositions() {
1373 for (int position : code_relative_fixup_positions_) {
1374 // The position succeeds a relative label offset from position.
1375 // Patch the relative offset to be relative to the Code object pointer
1376 // instead.
1377 int patch_position = position - kIntSize;
1378 int offset = masm_.long_at(patch_position);
1379 masm_.long_at_put(patch_position,
1380 offset
1381 + position
1382 + Code::kHeaderSize
1383 - kHeapObjectTag);
1384 }
1385 code_relative_fixup_positions_.Rewind(0);
1386 }
1387
1388
1389 void RegExpMacroAssemblerX64::Push(Label* backtrack_target) {
1390 __ subq(backtrack_stackpointer(), Immediate(kIntSize));
1391 __ movl(Operand(backtrack_stackpointer(), 0), backtrack_target);
1392 MarkPositionForCodeRelativeFixup();
1393 }
1394
1395
1396 void RegExpMacroAssemblerX64::Pop(Register target) {
1397 DCHECK(target != backtrack_stackpointer());
1398 __ movsxlq(target, Operand(backtrack_stackpointer(), 0));
1399 // Notice: This updates flags, unlike normal Pop.
1400 __ addq(backtrack_stackpointer(), Immediate(kIntSize));
1401 }
1402
1403
1404 void RegExpMacroAssemblerX64::Drop() {
1405 __ addq(backtrack_stackpointer(), Immediate(kIntSize));
1406 }
1407
1408
1409 void RegExpMacroAssemblerX64::CheckPreemption() {
1410 // Check for preemption.
1411 Label no_preempt;
1412 ExternalReference stack_limit =
1413 ExternalReference::address_of_jslimit(isolate());
1414 __ load_rax(stack_limit);
1415 __ cmpq(rsp, rax);
1416 __ j(above, &no_preempt);
1417
1418 SafeCall(&check_preempt_label_);
1419
1420 __ bind(&no_preempt);
1421 }
1422
1423
1424 void RegExpMacroAssemblerX64::CheckStackLimit() {
1425 Label no_stack_overflow;
1426 ExternalReference stack_limit =
1427 ExternalReference::address_of_regexp_stack_limit_address(isolate());
1428 __ load_rax(stack_limit);
1429 __ cmpq(backtrack_stackpointer(), rax);
1430 __ j(above, &no_stack_overflow);
1431
1432 SafeCall(&stack_overflow_label_);
1433
1434 __ bind(&no_stack_overflow);
1435 }
1436
1437
1438 void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,
1439 int characters) {
1440 if (mode_ == LATIN1) {
1441 if (characters == 4) {
1442 __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1443 } else if (characters == 2) {
1444 __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1445 } else {
1446 DCHECK_EQ(1, characters);
1447 __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
1448 }
1449 } else {
1450 DCHECK(mode_ == UC16);
1451 if (characters == 2) {
1452 __ movl(current_character(),
1453 Operand(rsi, rdi, times_1, cp_offset * sizeof(base::uc16)));
1454 } else {
1455 DCHECK_EQ(1, characters);
1456 __ movzxwl(current_character(),
1457 Operand(rsi, rdi, times_1, cp_offset * sizeof(base::uc16)));
1458 }
1459 }
1460 }
1461
1462 #undef __
1463
1464 } // namespace internal
1465 } // namespace v8
1466
1467 #endif // V8_TARGET_ARCH_X64
1468