1 // Copyright 2019 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/regexp/regexp-bytecode-peephole.h"
6 
7 #include "src/flags/flags.h"
8 #include "src/objects/fixed-array-inl.h"
9 #include "src/regexp/regexp-bytecodes.h"
10 #include "src/utils/memcopy.h"
11 #include "src/utils/utils.h"
12 #include "src/zone/zone-containers.h"
13 #include "src/zone/zone.h"
14 
15 namespace v8 {
16 namespace internal {
17 
18 namespace {
19 
20 struct BytecodeArgument {
21   int offset;
22   int length;
23 
BytecodeArgumentv8::internal::__anon14964::BytecodeArgument24   BytecodeArgument(int offset, int length) : offset(offset), length(length) {}
25 };
26 
27 struct BytecodeArgumentMapping : BytecodeArgument {
28   int new_length;
29 
BytecodeArgumentMappingv8::internal::__anon14964::BytecodeArgumentMapping30   BytecodeArgumentMapping(int offset, int length, int new_length)
31       : BytecodeArgument(offset, length), new_length(new_length) {}
32 };
33 
34 struct BytecodeArgumentCheck : BytecodeArgument {
35   enum CheckType { kCheckAddress = 0, kCheckValue };
36   CheckType type;
37   int check_offset;
38   int check_length;
39 
BytecodeArgumentCheckv8::internal::__anon14964::BytecodeArgumentCheck40   BytecodeArgumentCheck(int offset, int length, int check_offset)
41       : BytecodeArgument(offset, length),
42         type(kCheckAddress),
43         check_offset(check_offset) {}
BytecodeArgumentCheckv8::internal::__anon14964::BytecodeArgumentCheck44   BytecodeArgumentCheck(int offset, int length, int check_offset,
45                         int check_length)
46       : BytecodeArgument(offset, length),
47         type(kCheckValue),
48         check_offset(check_offset),
49         check_length(check_length) {}
50 };
51 
52 // Trie-Node for storing bytecode sequences we want to optimize.
53 class BytecodeSequenceNode {
54  public:
55   // Dummy bytecode used when we need to store/return a bytecode but it's not a
56   // valid bytecode in the current context.
57   static constexpr int kDummyBytecode = -1;
58 
59   BytecodeSequenceNode(int bytecode, Zone* zone);
60   // Adds a new node as child of the current node if it isn't a child already.
61   BytecodeSequenceNode& FollowedBy(int bytecode);
62   // Marks the end of a sequence and sets optimized bytecode to replace all
63   // bytecodes of the sequence with.
64   BytecodeSequenceNode& ReplaceWith(int bytecode);
65   // Maps arguments of bytecodes in the sequence to the optimized bytecode.
66   // Order of invocation determines order of arguments in the optimized
67   // bytecode.
68   // Invoking this method is only allowed on nodes that mark the end of a valid
69   // sequence (i.e. after ReplaceWith()).
70   // bytecode_index_in_sequence: Zero-based index of the referred bytecode
71   // within the sequence (e.g. the bytecode passed to CreateSequence() has
72   // index 0).
73   // argument_offset: Zero-based offset to the argument within the bytecode
74   // (e.g. the first argument that's not packed with the bytecode has offset 4).
75   // argument_byte_length: Length of the argument.
76   // new_argument_byte_length: Length of the argument in the new bytecode
77   // (= argument_byte_length if omitted).
78   BytecodeSequenceNode& MapArgument(int bytecode_index_in_sequence,
79                                     int argument_offset,
80                                     int argument_byte_length,
81                                     int new_argument_byte_length = 0);
82   // Adds a check to the sequence node making it only a valid sequence when the
83   // argument of the current bytecode at the specified offset matches the offset
84   // to check against.
85   // argument_offset: Zero-based offset to the argument within the bytecode
86   // (e.g. the first argument that's not packed with the bytecode has offset 4).
87   // argument_byte_length: Length of the argument.
88   // check_byte_offset: Zero-based offset relative to the beginning of the
89   // sequence that needs to match the value given by argument_offset. (e.g.
90   // check_byte_offset 0 matches the address of the first bytecode in the
91   // sequence).
92   BytecodeSequenceNode& IfArgumentEqualsOffset(int argument_offset,
93                                                int argument_byte_length,
94                                                int check_byte_offset);
95   // Adds a check to the sequence node making it only a valid sequence when the
96   // argument of the current bytecode at the specified offset matches the
97   // argument of another bytecode in the sequence.
98   // This is similar to IfArgumentEqualsOffset, except that this method matches
99   // the values of both arguments.
100   BytecodeSequenceNode& IfArgumentEqualsValueAtOffset(
101       int argument_offset, int argument_byte_length,
102       int other_bytecode_index_in_sequence, int other_argument_offset,
103       int other_argument_byte_length);
104   // Marks an argument as unused.
105   // All arguments that are not mapped explicitly have to be marked as unused.
106   // bytecode_index_in_sequence: Zero-based index of the referred bytecode
107   // within the sequence (e.g. the bytecode passed to CreateSequence() has
108   // index 0).
109   // argument_offset: Zero-based offset to the argument within the bytecode
110   // (e.g. the first argument that's not packed with the bytecode has offset 4).
111   // argument_byte_length: Length of the argument.
112   BytecodeSequenceNode& IgnoreArgument(int bytecode_index_in_sequence,
113                                        int argument_offset,
114                                        int argument_byte_length);
115   // Checks if the current node is valid for the sequence. I.e. all conditions
116   // set by IfArgumentEqualsOffset and IfArgumentEquals are fulfilled by this
117   // node for the actual bytecode sequence.
118   bool CheckArguments(const byte* bytecode, int pc);
119   // Returns whether this node marks the end of a valid sequence (i.e. can be
120   // replaced with an optimized bytecode).
121   bool IsSequence() const;
122   // Returns the length of the sequence in bytes.
123   int SequenceLength() const;
124   // Returns the optimized bytecode for the node or kDummyBytecode if it is not
125   // the end of a valid sequence.
126   int OptimizedBytecode() const;
127   // Returns the child of the current node matching the given bytecode or
128   // nullptr if no such child is found.
129   BytecodeSequenceNode* Find(int bytecode) const;
130   // Returns number of arguments mapped to the current node.
131   // Invoking this method is only allowed on nodes that mark the end of a valid
132   // sequence (i.e. if IsSequence())
133   size_t ArgumentSize() const;
134   // Returns the argument-mapping of the argument at index.
135   // Invoking this method is only allowed on nodes that mark the end of a valid
136   // sequence (i.e. if IsSequence())
137   BytecodeArgumentMapping ArgumentMapping(size_t index) const;
138   // Returns an iterator to begin of ignored arguments.
139   // Invoking this method is only allowed on nodes that mark the end of a valid
140   // sequence (i.e. if IsSequence())
141   ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredBegin() const;
142   // Returns an iterator to end of ignored arguments.
143   // Invoking this method is only allowed on nodes that mark the end of a valid
144   // sequence (i.e. if IsSequence())
145   ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredEnd() const;
146   // Returns whether the current node has ignored argument or not.
147   bool HasIgnoredArguments() const;
148 
149  private:
150   // Returns a node in the sequence specified by its index within the sequence.
151   BytecodeSequenceNode& GetNodeByIndexInSequence(int index_in_sequence);
152   Zone* zone() const;
153 
154   int bytecode_;
155   int bytecode_replacement_;
156   int index_in_sequence_;
157   int start_offset_;
158   BytecodeSequenceNode* parent_;
159   ZoneUnorderedMap<int, BytecodeSequenceNode*> children_;
160   ZoneVector<BytecodeArgumentMapping>* argument_mapping_;
161   ZoneLinkedList<BytecodeArgumentCheck>* argument_check_;
162   ZoneLinkedList<BytecodeArgument>* argument_ignored_;
163 
164   Zone* zone_;
165 };
166 
167 // These definitions are here in order to please the linker, which in debug mode
168 // sometimes requires static constants to be defined in .cc files.
169 constexpr int BytecodeSequenceNode::kDummyBytecode;
170 
171 class RegExpBytecodePeephole {
172  public:
173   RegExpBytecodePeephole(Zone* zone, size_t buffer_size,
174                          const ZoneUnorderedMap<int, int>& jump_edges);
175 
176   // Parses bytecode and fills the internal buffer with the potentially
177   // optimized bytecode. Returns true when optimizations were performed, false
178   // otherwise.
179   bool OptimizeBytecode(const byte* bytecode, int length);
180   // Copies the internal bytecode buffer to another buffer. The caller is
181   // responsible for allocating/freeing the memory.
182   void CopyOptimizedBytecode(byte* to_address) const;
183   int Length() const;
184 
185  private:
186   // Sets up all sequences that are going to be used.
187   void DefineStandardSequences();
188   // Starts a new bytecode sequence.
189   BytecodeSequenceNode& CreateSequence(int bytecode);
190   // Checks for optimization candidates at pc and emits optimized bytecode to
191   // the internal buffer. Returns the length of replaced bytecodes in bytes.
192   int TryOptimizeSequence(const byte* bytecode, int bytecode_length,
193                           int start_pc);
194   // Emits optimized bytecode to the internal buffer. start_pc points to the
195   // start of the sequence in bytecode and last_node is the last
196   // BytecodeSequenceNode of the matching sequence found.
197   void EmitOptimization(int start_pc, const byte* bytecode,
198                         const BytecodeSequenceNode& last_node);
199   // Adds a relative jump source fixup at pos.
200   // Jump source fixups are used to find offsets in the new bytecode that
201   // contain jump sources.
202   void AddJumpSourceFixup(int fixup, int pos);
203   // Adds a relative jump destination fixup at pos.
204   // Jump destination fixups are used to find offsets in the new bytecode that
205   // can be jumped to.
206   void AddJumpDestinationFixup(int fixup, int pos);
207   // Sets an absolute jump destination fixup at pos.
208   void SetJumpDestinationFixup(int fixup, int pos);
209   // Prepare internal structures used to fixup jumps.
210   void PrepareJumpStructures(const ZoneUnorderedMap<int, int>& jump_edges);
211   // Updates all jump targets in the new bytecode.
212   void FixJumps();
213   // Update a single jump.
214   void FixJump(int jump_source, int jump_destination);
215   void AddSentinelFixups(int pos);
216   template <typename T>
217   void EmitValue(T value);
218   template <typename T>
219   void OverwriteValue(int offset, T value);
220   void CopyRangeToOutput(const byte* orig_bytecode, int start, int length);
221   void SetRange(byte value, int count);
222   void EmitArgument(int start_pc, const byte* bytecode,
223                     BytecodeArgumentMapping arg);
224   int pc() const;
225   Zone* zone() const;
226 
227   ZoneVector<byte> optimized_bytecode_buffer_;
228   BytecodeSequenceNode* sequences_;
229   // Jumps used in old bytecode.
230   // Key: Jump source (offset where destination is stored in old bytecode)
231   // Value: Destination
232   ZoneMap<int, int> jump_edges_;
233   // Jumps used in new bytecode.
234   // Key: Jump source (offset where destination is stored in new bytecode)
235   // Value: Destination
236   ZoneMap<int, int> jump_edges_mapped_;
237   // Number of times a jump destination is used within the bytecode.
238   // Key: Jump destination (offset in old bytecode).
239   // Value: Number of times jump destination is used.
240   ZoneMap<int, int> jump_usage_counts_;
241   // Maps offsets in old bytecode to fixups of sources (delta to new bytecode).
242   // Key: Offset in old bytecode from where the fixup is valid.
243   // Value: Delta to map jump source from old bytecode to new bytecode in bytes.
244   ZoneMap<int, int> jump_source_fixups_;
245   // Maps offsets in old bytecode to fixups of destinations (delta to new
246   // bytecode).
247   // Key: Offset in old bytecode from where the fixup is valid.
248   // Value: Delta to map jump destinations from old bytecode to new bytecode in
249   // bytes.
250   ZoneMap<int, int> jump_destination_fixups_;
251 
252   Zone* zone_;
253 
254   DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodePeephole);
255 };
256 
257 template <typename T>
GetValue(const byte* buffer, int pos)258 T GetValue(const byte* buffer, int pos) {
259   DCHECK(IsAligned(reinterpret_cast<Address>(buffer + pos), alignof(T)));
260   return *reinterpret_cast<const T*>(buffer + pos);
261 }
262 
GetArgumentValue(const byte* bytecode, int offset, int length)263 int32_t GetArgumentValue(const byte* bytecode, int offset, int length) {
264   switch (length) {
265     case 1:
266       return GetValue<byte>(bytecode, offset);
267     case 2:
268       return GetValue<int16_t>(bytecode, offset);
269     case 4:
270       return GetValue<int32_t>(bytecode, offset);
271     default:
272       UNREACHABLE();
273   }
274 }
275 
BytecodeSequenceNode(int bytecode, Zone* zone)276 BytecodeSequenceNode::BytecodeSequenceNode(int bytecode, Zone* zone)
277     : bytecode_(bytecode),
278       bytecode_replacement_(kDummyBytecode),
279       index_in_sequence_(0),
280       start_offset_(0),
281       parent_(nullptr),
282       children_(ZoneUnorderedMap<int, BytecodeSequenceNode*>(zone)),
283       argument_mapping_(zone->New<ZoneVector<BytecodeArgumentMapping>>(zone)),
284       argument_check_(zone->New<ZoneLinkedList<BytecodeArgumentCheck>>(zone)),
285       argument_ignored_(zone->New<ZoneLinkedList<BytecodeArgument>>(zone)),
286       zone_(zone) {}
287 
FollowedBy(int bytecode)288 BytecodeSequenceNode& BytecodeSequenceNode::FollowedBy(int bytecode) {
289   DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);
290 
291   if (children_.find(bytecode) == children_.end()) {
292     BytecodeSequenceNode* new_node =
293         zone()->New<BytecodeSequenceNode>(bytecode, zone());
294     // If node is not the first in the sequence, set offsets and parent.
295     if (bytecode_ != kDummyBytecode) {
296       new_node->start_offset_ = start_offset_ + RegExpBytecodeLength(bytecode_);
297       new_node->index_in_sequence_ = index_in_sequence_ + 1;
298       new_node->parent_ = this;
299     }
300     children_[bytecode] = new_node;
301   }
302 
303   return *children_[bytecode];
304 }
305 
ReplaceWith(int bytecode)306 BytecodeSequenceNode& BytecodeSequenceNode::ReplaceWith(int bytecode) {
307   DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);
308 
309   bytecode_replacement_ = bytecode;
310 
311   return *this;
312 }
313 
MapArgument( int bytecode_index_in_sequence, int argument_offset, int argument_byte_length, int new_argument_byte_length)314 BytecodeSequenceNode& BytecodeSequenceNode::MapArgument(
315     int bytecode_index_in_sequence, int argument_offset,
316     int argument_byte_length, int new_argument_byte_length) {
317   DCHECK(IsSequence());
318   DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_);
319 
320   BytecodeSequenceNode& ref_node =
321       GetNodeByIndexInSequence(bytecode_index_in_sequence);
322   DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_));
323 
324   int absolute_offset = ref_node.start_offset_ + argument_offset;
325   if (new_argument_byte_length == 0) {
326     new_argument_byte_length = argument_byte_length;
327   }
328 
329   argument_mapping_->push_back(BytecodeArgumentMapping{
330       absolute_offset, argument_byte_length, new_argument_byte_length});
331 
332   return *this;
333 }
334 
IfArgumentEqualsOffset( int argument_offset, int argument_byte_length, int check_byte_offset)335 BytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsOffset(
336     int argument_offset, int argument_byte_length, int check_byte_offset) {
337   DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_));
338   DCHECK(argument_byte_length == 1 || argument_byte_length == 2 ||
339          argument_byte_length == 4);
340 
341   int absolute_offset = start_offset_ + argument_offset;
342 
343   argument_check_->push_back(BytecodeArgumentCheck{
344       absolute_offset, argument_byte_length, check_byte_offset});
345 
346   return *this;
347 }
348 
IfArgumentEqualsValueAtOffset( int argument_offset, int argument_byte_length, int other_bytecode_index_in_sequence, int other_argument_offset, int other_argument_byte_length)349 BytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsValueAtOffset(
350     int argument_offset, int argument_byte_length,
351     int other_bytecode_index_in_sequence, int other_argument_offset,
352     int other_argument_byte_length) {
353   DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_));
354   DCHECK_LE(other_bytecode_index_in_sequence, index_in_sequence_);
355   DCHECK_EQ(argument_byte_length, other_argument_byte_length);
356 
357   BytecodeSequenceNode& ref_node =
358       GetNodeByIndexInSequence(other_bytecode_index_in_sequence);
359   DCHECK_LT(other_argument_offset, RegExpBytecodeLength(ref_node.bytecode_));
360 
361   int absolute_offset = start_offset_ + argument_offset;
362   int other_absolute_offset = ref_node.start_offset_ + other_argument_offset;
363 
364   argument_check_->push_back(
365       BytecodeArgumentCheck{absolute_offset, argument_byte_length,
366                             other_absolute_offset, other_argument_byte_length});
367 
368   return *this;
369 }
370 
IgnoreArgument( int bytecode_index_in_sequence, int argument_offset, int argument_byte_length)371 BytecodeSequenceNode& BytecodeSequenceNode::IgnoreArgument(
372     int bytecode_index_in_sequence, int argument_offset,
373     int argument_byte_length) {
374   DCHECK(IsSequence());
375   DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_);
376 
377   BytecodeSequenceNode& ref_node =
378       GetNodeByIndexInSequence(bytecode_index_in_sequence);
379   DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_));
380 
381   int absolute_offset = ref_node.start_offset_ + argument_offset;
382 
383   argument_ignored_->push_back(
384       BytecodeArgument{absolute_offset, argument_byte_length});
385 
386   return *this;
387 }
388 
CheckArguments(const byte* bytecode, int pc)389 bool BytecodeSequenceNode::CheckArguments(const byte* bytecode, int pc) {
390   bool is_valid = true;
391   for (auto check_iter = argument_check_->begin();
392        check_iter != argument_check_->end() && is_valid; check_iter++) {
393     auto value =
394         GetArgumentValue(bytecode, pc + check_iter->offset, check_iter->length);
395     if (check_iter->type == BytecodeArgumentCheck::kCheckAddress) {
396       is_valid &= value == pc + check_iter->check_offset;
397     } else if (check_iter->type == BytecodeArgumentCheck::kCheckValue) {
398       auto other_value = GetArgumentValue(
399           bytecode, pc + check_iter->check_offset, check_iter->check_length);
400       is_valid &= value == other_value;
401     } else {
402       UNREACHABLE();
403     }
404   }
405   return is_valid;
406 }
407 
IsSequence() const408 bool BytecodeSequenceNode::IsSequence() const {
409   return bytecode_replacement_ != kDummyBytecode;
410 }
411 
SequenceLength() const412 int BytecodeSequenceNode::SequenceLength() const {
413   return start_offset_ + RegExpBytecodeLength(bytecode_);
414 }
415 
OptimizedBytecode() const416 int BytecodeSequenceNode::OptimizedBytecode() const {
417   return bytecode_replacement_;
418 }
419 
Find(int bytecode) const420 BytecodeSequenceNode* BytecodeSequenceNode::Find(int bytecode) const {
421   auto found = children_.find(bytecode);
422   if (found == children_.end()) return nullptr;
423   return found->second;
424 }
425 
ArgumentSize() const426 size_t BytecodeSequenceNode::ArgumentSize() const {
427   DCHECK(IsSequence());
428   return argument_mapping_->size();
429 }
430 
ArgumentMapping( size_t index) const431 BytecodeArgumentMapping BytecodeSequenceNode::ArgumentMapping(
432     size_t index) const {
433   DCHECK(IsSequence());
434   DCHECK(argument_mapping_ != nullptr);
435   DCHECK_LT(index, argument_mapping_->size());
436 
437   return argument_mapping_->at(index);
438 }
439 
440 ZoneLinkedList<BytecodeArgument>::iterator
ArgumentIgnoredBegin() const441 BytecodeSequenceNode::ArgumentIgnoredBegin() const {
442   DCHECK(IsSequence());
443   DCHECK(argument_ignored_ != nullptr);
444   return argument_ignored_->begin();
445 }
446 
447 ZoneLinkedList<BytecodeArgument>::iterator
ArgumentIgnoredEnd() const448 BytecodeSequenceNode::ArgumentIgnoredEnd() const {
449   DCHECK(IsSequence());
450   DCHECK(argument_ignored_ != nullptr);
451   return argument_ignored_->end();
452 }
453 
HasIgnoredArguments() const454 bool BytecodeSequenceNode::HasIgnoredArguments() const {
455   return argument_ignored_ != nullptr;
456 }
457 
GetNodeByIndexInSequence( int index_in_sequence)458 BytecodeSequenceNode& BytecodeSequenceNode::GetNodeByIndexInSequence(
459     int index_in_sequence) {
460   DCHECK_LE(index_in_sequence, index_in_sequence_);
461 
462   if (index_in_sequence < index_in_sequence_) {
463     DCHECK(parent_ != nullptr);
464     return parent_->GetNodeByIndexInSequence(index_in_sequence);
465   } else {
466     return *this;
467   }
468 }
469 
zone() const470 Zone* BytecodeSequenceNode::zone() const { return zone_; }
471 
RegExpBytecodePeephole( Zone* zone, size_t buffer_size, const ZoneUnorderedMap<int, int>& jump_edges)472 RegExpBytecodePeephole::RegExpBytecodePeephole(
473     Zone* zone, size_t buffer_size,
474     const ZoneUnorderedMap<int, int>& jump_edges)
475     : optimized_bytecode_buffer_(zone),
476       sequences_(zone->New<BytecodeSequenceNode>(
477           BytecodeSequenceNode::kDummyBytecode, zone)),
478       jump_edges_(zone),
479       jump_edges_mapped_(zone),
480       jump_usage_counts_(zone),
481       jump_source_fixups_(zone),
482       jump_destination_fixups_(zone),
483       zone_(zone) {
484   optimized_bytecode_buffer_.reserve(buffer_size);
485   PrepareJumpStructures(jump_edges);
486   DefineStandardSequences();
487   // Sentinel fixups at beginning of bytecode (position -1) so we don't have to
488   // check for end of iterator inside the fixup loop.
489   // In general fixups are deltas of original offsets of jump
490   // sources/destinations (in the old bytecode) to find them in the new
491   // bytecode. All jump targets are fixed after the new bytecode is fully
492   // emitted in the internal buffer.
493   AddSentinelFixups(-1);
494   // Sentinel fixups at end of (old) bytecode so we don't have to check for
495   // end of iterator inside the fixup loop.
496   DCHECK_LE(buffer_size, std::numeric_limits<int>::max());
497   AddSentinelFixups(static_cast<int>(buffer_size));
498 }
499 
DefineStandardSequences()500 void RegExpBytecodePeephole::DefineStandardSequences() {
501   // Commonly used sequences can be found by creating regexp bytecode traces
502   // (--trace-regexp-bytecodes) and using v8/tools/regexp-sequences.py.
503   CreateSequence(BC_LOAD_CURRENT_CHAR)
504       .FollowedBy(BC_CHECK_BIT_IN_TABLE)
505       .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
506       // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
507       // first bytecode in this sequence.
508       .IfArgumentEqualsOffset(4, 4, 0)
509       .ReplaceWith(BC_SKIP_UNTIL_BIT_IN_TABLE)
510       .MapArgument(0, 1, 3)      // load offset
511       .MapArgument(2, 1, 3, 4)   // advance by
512       .MapArgument(1, 8, 16)     // bit table
513       .MapArgument(1, 4, 4)      // goto when match
514       .MapArgument(0, 4, 4)      // goto on failure
515       .IgnoreArgument(2, 4, 4);  // loop jump
516 
517   CreateSequence(BC_CHECK_CURRENT_POSITION)
518       .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED)
519       .FollowedBy(BC_CHECK_CHAR)
520       .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
521       // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
522       // first bytecode in this sequence.
523       .IfArgumentEqualsOffset(4, 4, 0)
524       .ReplaceWith(BC_SKIP_UNTIL_CHAR_POS_CHECKED)
525       .MapArgument(1, 1, 3)      // load offset
526       .MapArgument(3, 1, 3, 2)   // advance_by
527       .MapArgument(2, 1, 3, 2)   // c
528       .MapArgument(0, 1, 3, 4)   // eats at least
529       .MapArgument(2, 4, 4)      // goto when match
530       .MapArgument(0, 4, 4)      // goto on failure
531       .IgnoreArgument(3, 4, 4);  // loop jump
532 
533   CreateSequence(BC_CHECK_CURRENT_POSITION)
534       .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED)
535       .FollowedBy(BC_AND_CHECK_CHAR)
536       .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
537       // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
538       // first bytecode in this sequence.
539       .IfArgumentEqualsOffset(4, 4, 0)
540       .ReplaceWith(BC_SKIP_UNTIL_CHAR_AND)
541       .MapArgument(1, 1, 3)      // load offset
542       .MapArgument(3, 1, 3, 2)   // advance_by
543       .MapArgument(2, 1, 3, 2)   // c
544       .MapArgument(2, 4, 4)      // mask
545       .MapArgument(0, 1, 3, 4)   // eats at least
546       .MapArgument(2, 8, 4)      // goto when match
547       .MapArgument(0, 4, 4)      // goto on failure
548       .IgnoreArgument(3, 4, 4);  // loop jump
549 
550   // TODO(pthier): It might make sense for short sequences like this one to only
551   // optimize them if the resulting optimization is not longer than the current
552   // one. This could be the case if there are jumps inside the sequence and we
553   // have to replicate parts of the sequence. A method to mark such sequences
554   // might be useful.
555   CreateSequence(BC_LOAD_CURRENT_CHAR)
556       .FollowedBy(BC_CHECK_CHAR)
557       .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
558       // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
559       // first bytecode in this sequence.
560       .IfArgumentEqualsOffset(4, 4, 0)
561       .ReplaceWith(BC_SKIP_UNTIL_CHAR)
562       .MapArgument(0, 1, 3)      // load offset
563       .MapArgument(2, 1, 3, 2)   // advance by
564       .MapArgument(1, 1, 3, 2)   // character
565       .MapArgument(1, 4, 4)      // goto when match
566       .MapArgument(0, 4, 4)      // goto on failure
567       .IgnoreArgument(2, 4, 4);  // loop jump
568 
569   CreateSequence(BC_LOAD_CURRENT_CHAR)
570       .FollowedBy(BC_CHECK_CHAR)
571       .FollowedBy(BC_CHECK_CHAR)
572       // Sequence is only valid if the jump targets of both CHECK_CHAR bytecodes
573       // are equal.
574       .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4)
575       .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
576       // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
577       // first bytecode in this sequence.
578       .IfArgumentEqualsOffset(4, 4, 0)
579       .ReplaceWith(BC_SKIP_UNTIL_CHAR_OR_CHAR)
580       .MapArgument(0, 1, 3)      // load offset
581       .MapArgument(3, 1, 3, 4)   // advance by
582       .MapArgument(1, 1, 3, 2)   // character 1
583       .MapArgument(2, 1, 3, 2)   // character 2
584       .MapArgument(1, 4, 4)      // goto when match
585       .MapArgument(0, 4, 4)      // goto on failure
586       .IgnoreArgument(2, 4, 4)   // goto when match 2
587       .IgnoreArgument(3, 4, 4);  // loop jump
588 
589   CreateSequence(BC_LOAD_CURRENT_CHAR)
590       .FollowedBy(BC_CHECK_GT)
591       // Sequence is only valid if the jump target of CHECK_GT is the first
592       // bytecode AFTER the whole sequence.
593       .IfArgumentEqualsOffset(4, 4, 56)
594       .FollowedBy(BC_CHECK_BIT_IN_TABLE)
595       // Sequence is only valid if the jump target of CHECK_BIT_IN_TABLE is
596       // the ADVANCE_CP_AND_GOTO bytecode at the end of the sequence.
597       .IfArgumentEqualsOffset(4, 4, 48)
598       .FollowedBy(BC_GOTO)
599       // Sequence is only valid if the jump target of GOTO is the same as the
600       // jump target of CHECK_GT (i.e. both jump to the first bytecode AFTER the
601       // whole sequence.
602       .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4)
603       .FollowedBy(BC_ADVANCE_CP_AND_GOTO)
604       // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the
605       // first bytecode in this sequence.
606       .IfArgumentEqualsOffset(4, 4, 0)
607       .ReplaceWith(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE)
608       .MapArgument(0, 1, 3)      // load offset
609       .MapArgument(4, 1, 3, 2)   // advance by
610       .MapArgument(1, 1, 3, 2)   // character
611       .MapArgument(2, 8, 16)     // bit table
612       .MapArgument(1, 4, 4)      // goto when match
613       .MapArgument(0, 4, 4)      // goto on failure
614       .IgnoreArgument(2, 4, 4)   // indirect loop jump
615       .IgnoreArgument(3, 4, 4)   // jump out of loop
616       .IgnoreArgument(4, 4, 4);  // loop jump
617 }
618 
OptimizeBytecode(const byte* bytecode, int length)619 bool RegExpBytecodePeephole::OptimizeBytecode(const byte* bytecode,
620                                               int length) {
621   int old_pc = 0;
622   bool did_optimize = false;
623 
624   while (old_pc < length) {
625     int replaced_len = TryOptimizeSequence(bytecode, length, old_pc);
626     if (replaced_len > 0) {
627       old_pc += replaced_len;
628       did_optimize = true;
629     } else {
630       int bc = bytecode[old_pc];
631       int bc_len = RegExpBytecodeLength(bc);
632       CopyRangeToOutput(bytecode, old_pc, bc_len);
633       old_pc += bc_len;
634     }
635   }
636 
637   if (did_optimize) {
638     FixJumps();
639   }
640 
641   return did_optimize;
642 }
643 
CopyOptimizedBytecode(byte* to_address) const644 void RegExpBytecodePeephole::CopyOptimizedBytecode(byte* to_address) const {
645   MemCopy(to_address, &(*optimized_bytecode_buffer_.begin()), Length());
646 }
647 
Length() const648 int RegExpBytecodePeephole::Length() const { return pc(); }
649 
CreateSequence(int bytecode)650 BytecodeSequenceNode& RegExpBytecodePeephole::CreateSequence(int bytecode) {
651   DCHECK(sequences_ != nullptr);
652   DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount);
653 
654   return sequences_->FollowedBy(bytecode);
655 }
656 
TryOptimizeSequence(const byte* bytecode, int bytecode_length, int start_pc)657 int RegExpBytecodePeephole::TryOptimizeSequence(const byte* bytecode,
658                                                 int bytecode_length,
659                                                 int start_pc) {
660   BytecodeSequenceNode* seq_node = sequences_;
661   BytecodeSequenceNode* valid_seq_end = nullptr;
662 
663   int current_pc = start_pc;
664 
665   // Check for the longest valid sequence matching any of the pre-defined
666   // sequences in the Trie data structure.
667   while (current_pc < bytecode_length) {
668     seq_node = seq_node->Find(bytecode[current_pc]);
669     if (seq_node == nullptr) break;
670     if (!seq_node->CheckArguments(bytecode, start_pc)) break;
671 
672     if (seq_node->IsSequence()) valid_seq_end = seq_node;
673     current_pc += RegExpBytecodeLength(bytecode[current_pc]);
674   }
675 
676   if (valid_seq_end) {
677     EmitOptimization(start_pc, bytecode, *valid_seq_end);
678     return valid_seq_end->SequenceLength();
679   }
680 
681   return 0;
682 }
683 
EmitOptimization( int start_pc, const byte* bytecode, const BytecodeSequenceNode& last_node)684 void RegExpBytecodePeephole::EmitOptimization(
685     int start_pc, const byte* bytecode, const BytecodeSequenceNode& last_node) {
686 #ifdef DEBUG
687   int optimized_start_pc = pc();
688 #endif
689   // Jump sources that are mapped or marked as unused will be deleted at the end
690   // of this method. We don't delete them immediately as we might need the
691   // information when we have to preserve bytecodes at the end.
692   // TODO(pthier): Replace with a stack-allocated data structure.
693   ZoneLinkedList<int> delete_jumps = ZoneLinkedList<int>(zone());
694 
695   uint32_t bc = last_node.OptimizedBytecode();
696   EmitValue(bc);
697 
698   for (size_t arg = 0; arg < last_node.ArgumentSize(); arg++) {
699     BytecodeArgumentMapping arg_map = last_node.ArgumentMapping(arg);
700     int arg_pos = start_pc + arg_map.offset;
701     // If we map any jump source we mark the old source for deletion and insert
702     // a new jump.
703     auto jump_edge_iter = jump_edges_.find(arg_pos);
704     if (jump_edge_iter != jump_edges_.end()) {
705       int jump_source = jump_edge_iter->first;
706       int jump_destination = jump_edge_iter->second;
707       // Add new jump edge add current position.
708       jump_edges_mapped_.emplace(Length(), jump_destination);
709       // Mark old jump edge for deletion.
710       delete_jumps.push_back(jump_source);
711       // Decrement usage count of jump destination.
712       auto jump_count_iter = jump_usage_counts_.find(jump_destination);
713       DCHECK(jump_count_iter != jump_usage_counts_.end());
714       int& usage_count = jump_count_iter->second;
715       --usage_count;
716     }
717     // TODO(pthier): DCHECK that mapped arguments are never sources of jumps
718     // to destinations inside the sequence.
719     EmitArgument(start_pc, bytecode, arg_map);
720   }
721   DCHECK_EQ(pc(), optimized_start_pc +
722                       RegExpBytecodeLength(last_node.OptimizedBytecode()));
723 
724   // Remove jumps from arguments we ignore.
725   if (last_node.HasIgnoredArguments()) {
726     for (auto ignored_arg = last_node.ArgumentIgnoredBegin();
727          ignored_arg != last_node.ArgumentIgnoredEnd(); ignored_arg++) {
728       auto jump_edge_iter = jump_edges_.find(start_pc + ignored_arg->offset);
729       if (jump_edge_iter != jump_edges_.end()) {
730         int jump_source = jump_edge_iter->first;
731         int jump_destination = jump_edge_iter->second;
732         // Mark old jump edge for deletion.
733         delete_jumps.push_back(jump_source);
734         // Decrement usage count of jump destination.
735         auto jump_count_iter = jump_usage_counts_.find(jump_destination);
736         DCHECK(jump_count_iter != jump_usage_counts_.end());
737         int& usage_count = jump_count_iter->second;
738         --usage_count;
739       }
740     }
741   }
742 
743   int fixup_length = RegExpBytecodeLength(bc) - last_node.SequenceLength();
744 
745   // Check if there are any jumps inside the old sequence.
746   // If so we have to keep the bytecodes that are jumped to around.
747   auto jump_destination_candidate = jump_usage_counts_.upper_bound(start_pc);
748   int jump_candidate_destination = jump_destination_candidate->first;
749   int jump_candidate_count = jump_destination_candidate->second;
750   // Jump destinations only jumped to from inside the sequence will be ignored.
751   while (jump_destination_candidate != jump_usage_counts_.end() &&
752          jump_candidate_count == 0) {
753     ++jump_destination_candidate;
754     jump_candidate_destination = jump_destination_candidate->first;
755     jump_candidate_count = jump_destination_candidate->second;
756   }
757 
758   int preserve_from = start_pc + last_node.SequenceLength();
759   if (jump_destination_candidate != jump_usage_counts_.end() &&
760       jump_candidate_destination < start_pc + last_node.SequenceLength()) {
761     preserve_from = jump_candidate_destination;
762     // Check if any jump in the sequence we are preserving has a jump
763     // destination inside the optimized sequence before the current position we
764     // want to preserve. If so we have to preserve all bytecodes starting at
765     // this jump destination.
766     for (auto jump_iter = jump_edges_.lower_bound(preserve_from);
767          jump_iter != jump_edges_.end() &&
768          jump_iter->first /* jump source */ <
769              start_pc + last_node.SequenceLength();
770          ++jump_iter) {
771       int jump_destination = jump_iter->second;
772       if (jump_destination > start_pc && jump_destination < preserve_from) {
773         preserve_from = jump_destination;
774       }
775     }
776 
777     // We preserve everything to the end of the sequence. This is conservative
778     // since it would be enough to preserve all bytecudes up to an unconditional
779     // jump.
780     int preserve_length = start_pc + last_node.SequenceLength() - preserve_from;
781     fixup_length += preserve_length;
782     // Jumps after the start of the preserved sequence need fixup.
783     AddJumpSourceFixup(fixup_length,
784                        start_pc + last_node.SequenceLength() - preserve_length);
785     // All jump targets after the start of the optimized sequence need to be
786     // fixed relative to the length of the optimized sequence including
787     // bytecodes we preserved.
788     AddJumpDestinationFixup(fixup_length, start_pc + 1);
789     // Jumps to the sequence we preserved need absolute fixup as they could
790     // occur before or after the sequence.
791     SetJumpDestinationFixup(pc() - preserve_from, preserve_from);
792     CopyRangeToOutput(bytecode, preserve_from, preserve_length);
793   } else {
794     AddJumpDestinationFixup(fixup_length, start_pc + 1);
795     // Jumps after the end of the old sequence need fixup.
796     AddJumpSourceFixup(fixup_length, start_pc + last_node.SequenceLength());
797   }
798 
799   // Delete jumps we definitely don't need anymore
800   for (int del : delete_jumps) {
801     if (del < preserve_from) {
802       jump_edges_.erase(del);
803     }
804   }
805 }
806 
AddJumpSourceFixup(int fixup, int pos)807 void RegExpBytecodePeephole::AddJumpSourceFixup(int fixup, int pos) {
808   auto previous_fixup = jump_source_fixups_.lower_bound(pos);
809   DCHECK(previous_fixup != jump_source_fixups_.end());
810   DCHECK(previous_fixup != jump_source_fixups_.begin());
811 
812   int previous_fixup_value = (--previous_fixup)->second;
813   jump_source_fixups_[pos] = previous_fixup_value + fixup;
814 }
815 
AddJumpDestinationFixup(int fixup, int pos)816 void RegExpBytecodePeephole::AddJumpDestinationFixup(int fixup, int pos) {
817   auto previous_fixup = jump_destination_fixups_.lower_bound(pos);
818   DCHECK(previous_fixup != jump_destination_fixups_.end());
819   DCHECK(previous_fixup != jump_destination_fixups_.begin());
820 
821   int previous_fixup_value = (--previous_fixup)->second;
822   jump_destination_fixups_[pos] = previous_fixup_value + fixup;
823 }
824 
SetJumpDestinationFixup(int fixup, int pos)825 void RegExpBytecodePeephole::SetJumpDestinationFixup(int fixup, int pos) {
826   auto previous_fixup = jump_destination_fixups_.lower_bound(pos);
827   DCHECK(previous_fixup != jump_destination_fixups_.end());
828   DCHECK(previous_fixup != jump_destination_fixups_.begin());
829 
830   int previous_fixup_value = (--previous_fixup)->second;
831   jump_destination_fixups_.emplace(pos, fixup);
832   jump_destination_fixups_.emplace(pos + 1, previous_fixup_value);
833 }
834 
PrepareJumpStructures( const ZoneUnorderedMap<int, int>& jump_edges)835 void RegExpBytecodePeephole::PrepareJumpStructures(
836     const ZoneUnorderedMap<int, int>& jump_edges) {
837   for (auto jump_edge : jump_edges) {
838     int jump_source = jump_edge.first;
839     int jump_destination = jump_edge.second;
840 
841     jump_edges_.emplace(jump_source, jump_destination);
842     jump_usage_counts_[jump_destination]++;
843   }
844 }
845 
FixJumps()846 void RegExpBytecodePeephole::FixJumps() {
847   int position_fixup = 0;
848   // Next position where fixup changes.
849   auto next_source_fixup = jump_source_fixups_.lower_bound(0);
850   int next_source_fixup_offset = next_source_fixup->first;
851   int next_source_fixup_value = next_source_fixup->second;
852 
853   for (auto jump_edge : jump_edges_) {
854     int jump_source = jump_edge.first;
855     int jump_destination = jump_edge.second;
856     while (jump_source >= next_source_fixup_offset) {
857       position_fixup = next_source_fixup_value;
858       ++next_source_fixup;
859       next_source_fixup_offset = next_source_fixup->first;
860       next_source_fixup_value = next_source_fixup->second;
861     }
862     jump_source += position_fixup;
863 
864     FixJump(jump_source, jump_destination);
865   }
866 
867   // Mapped jump edges don't need source fixups, as the position already is an
868   // offset in the new bytecode.
869   for (auto jump_edge : jump_edges_mapped_) {
870     int jump_source = jump_edge.first;
871     int jump_destination = jump_edge.second;
872 
873     FixJump(jump_source, jump_destination);
874   }
875 }
876 
FixJump(int jump_source, int jump_destination)877 void RegExpBytecodePeephole::FixJump(int jump_source, int jump_destination) {
878   int fixed_jump_destination =
879       jump_destination +
880       (--jump_destination_fixups_.upper_bound(jump_destination))->second;
881   DCHECK_LT(fixed_jump_destination, Length());
882 #ifdef DEBUG
883   // TODO(pthier): This check could be better if we track the bytecodes
884   // actually used and check if we jump to one of them.
885   byte jump_bc = optimized_bytecode_buffer_[fixed_jump_destination];
886   DCHECK_GT(jump_bc, 0);
887   DCHECK_LT(jump_bc, kRegExpBytecodeCount);
888 #endif
889 
890   if (jump_destination != fixed_jump_destination) {
891     OverwriteValue<uint32_t>(jump_source, fixed_jump_destination);
892   }
893 }
894 
AddSentinelFixups(int pos)895 void RegExpBytecodePeephole::AddSentinelFixups(int pos) {
896   jump_source_fixups_.emplace(pos, 0);
897   jump_destination_fixups_.emplace(pos, 0);
898 }
899 
900 template <typename T>
EmitValue(T value)901 void RegExpBytecodePeephole::EmitValue(T value) {
902   DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
903          optimized_bytecode_buffer_.end());
904   byte* value_byte_iter = reinterpret_cast<byte*>(&value);
905   optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(),
906                                     value_byte_iter,
907                                     value_byte_iter + sizeof(T));
908 }
909 
910 template <typename T>
OverwriteValue(int offset, T value)911 void RegExpBytecodePeephole::OverwriteValue(int offset, T value) {
912   byte* value_byte_iter = reinterpret_cast<byte*>(&value);
913   byte* value_byte_iter_end = value_byte_iter + sizeof(T);
914   while (value_byte_iter < value_byte_iter_end) {
915     optimized_bytecode_buffer_[offset++] = *value_byte_iter++;
916   }
917 }
918 
CopyRangeToOutput(const byte* orig_bytecode, int start, int length)919 void RegExpBytecodePeephole::CopyRangeToOutput(const byte* orig_bytecode,
920                                                int start, int length) {
921   DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
922          optimized_bytecode_buffer_.end());
923   optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(),
924                                     orig_bytecode + start,
925                                     orig_bytecode + start + length);
926 }
927 
SetRange(byte value, int count)928 void RegExpBytecodePeephole::SetRange(byte value, int count) {
929   DCHECK(optimized_bytecode_buffer_.begin() + pc() ==
930          optimized_bytecode_buffer_.end());
931   optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), count,
932                                     value);
933 }
934 
EmitArgument(int start_pc, const byte* bytecode, BytecodeArgumentMapping arg)935 void RegExpBytecodePeephole::EmitArgument(int start_pc, const byte* bytecode,
936                                           BytecodeArgumentMapping arg) {
937   int arg_pos = start_pc + arg.offset;
938   switch (arg.length) {
939     case 1:
940       DCHECK_EQ(arg.new_length, arg.length);
941       EmitValue(GetValue<byte>(bytecode, arg_pos));
942       break;
943     case 2:
944       DCHECK_EQ(arg.new_length, arg.length);
945       EmitValue(GetValue<uint16_t>(bytecode, arg_pos));
946       break;
947     case 3: {
948       // Length 3 only occurs in 'packed' arguments where the lowermost byte is
949       // the current bytecode, and the remaining 3 bytes are the packed value.
950       //
951       // We load 4 bytes from position - 1 and shift out the bytecode.
952 #ifdef V8_TARGET_BIG_ENDIAN
953       UNIMPLEMENTED();
954       int32_t val = 0;
955 #else
956       int32_t val = GetValue<int32_t>(bytecode, arg_pos - 1) >> kBitsPerByte;
957 #endif  // V8_TARGET_BIG_ENDIAN
958 
959       switch (arg.new_length) {
960         case 2:
961           EmitValue<uint16_t>(val);
962           break;
963         case 3: {
964           // Pack with previously emitted value.
965           auto prev_val =
966               GetValue<int32_t>(&(*optimized_bytecode_buffer_.begin()),
967                                 Length() - sizeof(uint32_t));
968 #ifdef V8_TARGET_BIG_ENDIAN
969       UNIMPLEMENTED();
970       USE(prev_val);
971 #else
972           DCHECK_EQ(prev_val & 0xFFFFFF00, 0);
973           OverwriteValue<uint32_t>(
974               pc() - sizeof(uint32_t),
975               (static_cast<uint32_t>(val) << 8) | (prev_val & 0xFF));
976 #endif  // V8_TARGET_BIG_ENDIAN
977           break;
978         }
979         case 4:
980           EmitValue<uint32_t>(val);
981           break;
982       }
983       break;
984     }
985     case 4:
986       DCHECK_EQ(arg.new_length, arg.length);
987       EmitValue(GetValue<uint32_t>(bytecode, arg_pos));
988       break;
989     case 8:
990       DCHECK_EQ(arg.new_length, arg.length);
991       EmitValue(GetValue<uint64_t>(bytecode, arg_pos));
992       break;
993     default:
994       CopyRangeToOutput(bytecode, arg_pos,
995                         std::min(arg.length, arg.new_length));
996       if (arg.length < arg.new_length) {
997         SetRange(0x00, arg.new_length - arg.length);
998       }
999       break;
1000   }
1001 }
1002 
pc() const1003 int RegExpBytecodePeephole::pc() const {
1004   DCHECK_LE(optimized_bytecode_buffer_.size(), std::numeric_limits<int>::max());
1005   return static_cast<int>(optimized_bytecode_buffer_.size());
1006 }
1007 
zone() const1008 Zone* RegExpBytecodePeephole::zone() const { return zone_; }
1009 
1010 }  // namespace
1011 
1012 // static
OptimizeBytecode( Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode, int length, const ZoneUnorderedMap<int, int>& jump_edges)1013 Handle<ByteArray> RegExpBytecodePeepholeOptimization::OptimizeBytecode(
1014     Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode,
1015     int length, const ZoneUnorderedMap<int, int>& jump_edges) {
1016   RegExpBytecodePeephole peephole(zone, length, jump_edges);
1017   bool did_optimize = peephole.OptimizeBytecode(bytecode, length);
1018   Handle<ByteArray> array = isolate->factory()->NewByteArray(peephole.Length());
1019   peephole.CopyOptimizedBytecode(array->GetDataStartAddress());
1020 
1021   if (did_optimize && FLAG_trace_regexp_peephole_optimization) {
1022     PrintF("Original Bytecode:\n");
1023     RegExpBytecodeDisassemble(bytecode, length, source->ToCString().get());
1024     PrintF("Optimized Bytecode:\n");
1025     RegExpBytecodeDisassemble(array->GetDataStartAddress(), peephole.Length(),
1026                               source->ToCString().get());
1027   }
1028 
1029   return array;
1030 }
1031 
1032 }  // namespace internal
1033 }  // namespace v8
1034