11cb0ef41Sopenharmony_ci// Copyright 2019 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#include "src/regexp/regexp-bytecode-peephole.h" 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#include "src/flags/flags.h" 81cb0ef41Sopenharmony_ci#include "src/objects/fixed-array-inl.h" 91cb0ef41Sopenharmony_ci#include "src/regexp/regexp-bytecodes.h" 101cb0ef41Sopenharmony_ci#include "src/utils/memcopy.h" 111cb0ef41Sopenharmony_ci#include "src/utils/utils.h" 121cb0ef41Sopenharmony_ci#include "src/zone/zone-containers.h" 131cb0ef41Sopenharmony_ci#include "src/zone/zone.h" 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_cinamespace v8 { 161cb0ef41Sopenharmony_cinamespace internal { 171cb0ef41Sopenharmony_ci 181cb0ef41Sopenharmony_cinamespace { 191cb0ef41Sopenharmony_ci 201cb0ef41Sopenharmony_cistruct BytecodeArgument { 211cb0ef41Sopenharmony_ci int offset; 221cb0ef41Sopenharmony_ci int length; 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ci BytecodeArgument(int offset, int length) : offset(offset), length(length) {} 251cb0ef41Sopenharmony_ci}; 261cb0ef41Sopenharmony_ci 271cb0ef41Sopenharmony_cistruct BytecodeArgumentMapping : BytecodeArgument { 281cb0ef41Sopenharmony_ci int new_length; 291cb0ef41Sopenharmony_ci 301cb0ef41Sopenharmony_ci BytecodeArgumentMapping(int offset, int length, int new_length) 311cb0ef41Sopenharmony_ci : BytecodeArgument(offset, length), new_length(new_length) {} 321cb0ef41Sopenharmony_ci}; 331cb0ef41Sopenharmony_ci 341cb0ef41Sopenharmony_cistruct BytecodeArgumentCheck : BytecodeArgument { 351cb0ef41Sopenharmony_ci enum CheckType { kCheckAddress = 0, kCheckValue }; 361cb0ef41Sopenharmony_ci CheckType type; 371cb0ef41Sopenharmony_ci int check_offset; 381cb0ef41Sopenharmony_ci int check_length; 391cb0ef41Sopenharmony_ci 401cb0ef41Sopenharmony_ci BytecodeArgumentCheck(int offset, int length, int check_offset) 411cb0ef41Sopenharmony_ci : BytecodeArgument(offset, length), 421cb0ef41Sopenharmony_ci type(kCheckAddress), 431cb0ef41Sopenharmony_ci check_offset(check_offset) {} 441cb0ef41Sopenharmony_ci BytecodeArgumentCheck(int offset, int length, int check_offset, 451cb0ef41Sopenharmony_ci int check_length) 461cb0ef41Sopenharmony_ci : BytecodeArgument(offset, length), 471cb0ef41Sopenharmony_ci type(kCheckValue), 481cb0ef41Sopenharmony_ci check_offset(check_offset), 491cb0ef41Sopenharmony_ci check_length(check_length) {} 501cb0ef41Sopenharmony_ci}; 511cb0ef41Sopenharmony_ci 521cb0ef41Sopenharmony_ci// Trie-Node for storing bytecode sequences we want to optimize. 531cb0ef41Sopenharmony_ciclass BytecodeSequenceNode { 541cb0ef41Sopenharmony_ci public: 551cb0ef41Sopenharmony_ci // Dummy bytecode used when we need to store/return a bytecode but it's not a 561cb0ef41Sopenharmony_ci // valid bytecode in the current context. 571cb0ef41Sopenharmony_ci static constexpr int kDummyBytecode = -1; 581cb0ef41Sopenharmony_ci 591cb0ef41Sopenharmony_ci BytecodeSequenceNode(int bytecode, Zone* zone); 601cb0ef41Sopenharmony_ci // Adds a new node as child of the current node if it isn't a child already. 611cb0ef41Sopenharmony_ci BytecodeSequenceNode& FollowedBy(int bytecode); 621cb0ef41Sopenharmony_ci // Marks the end of a sequence and sets optimized bytecode to replace all 631cb0ef41Sopenharmony_ci // bytecodes of the sequence with. 641cb0ef41Sopenharmony_ci BytecodeSequenceNode& ReplaceWith(int bytecode); 651cb0ef41Sopenharmony_ci // Maps arguments of bytecodes in the sequence to the optimized bytecode. 661cb0ef41Sopenharmony_ci // Order of invocation determines order of arguments in the optimized 671cb0ef41Sopenharmony_ci // bytecode. 681cb0ef41Sopenharmony_ci // Invoking this method is only allowed on nodes that mark the end of a valid 691cb0ef41Sopenharmony_ci // sequence (i.e. after ReplaceWith()). 701cb0ef41Sopenharmony_ci // bytecode_index_in_sequence: Zero-based index of the referred bytecode 711cb0ef41Sopenharmony_ci // within the sequence (e.g. the bytecode passed to CreateSequence() has 721cb0ef41Sopenharmony_ci // index 0). 731cb0ef41Sopenharmony_ci // argument_offset: Zero-based offset to the argument within the bytecode 741cb0ef41Sopenharmony_ci // (e.g. the first argument that's not packed with the bytecode has offset 4). 751cb0ef41Sopenharmony_ci // argument_byte_length: Length of the argument. 761cb0ef41Sopenharmony_ci // new_argument_byte_length: Length of the argument in the new bytecode 771cb0ef41Sopenharmony_ci // (= argument_byte_length if omitted). 781cb0ef41Sopenharmony_ci BytecodeSequenceNode& MapArgument(int bytecode_index_in_sequence, 791cb0ef41Sopenharmony_ci int argument_offset, 801cb0ef41Sopenharmony_ci int argument_byte_length, 811cb0ef41Sopenharmony_ci int new_argument_byte_length = 0); 821cb0ef41Sopenharmony_ci // Adds a check to the sequence node making it only a valid sequence when the 831cb0ef41Sopenharmony_ci // argument of the current bytecode at the specified offset matches the offset 841cb0ef41Sopenharmony_ci // to check against. 851cb0ef41Sopenharmony_ci // argument_offset: Zero-based offset to the argument within the bytecode 861cb0ef41Sopenharmony_ci // (e.g. the first argument that's not packed with the bytecode has offset 4). 871cb0ef41Sopenharmony_ci // argument_byte_length: Length of the argument. 881cb0ef41Sopenharmony_ci // check_byte_offset: Zero-based offset relative to the beginning of the 891cb0ef41Sopenharmony_ci // sequence that needs to match the value given by argument_offset. (e.g. 901cb0ef41Sopenharmony_ci // check_byte_offset 0 matches the address of the first bytecode in the 911cb0ef41Sopenharmony_ci // sequence). 921cb0ef41Sopenharmony_ci BytecodeSequenceNode& IfArgumentEqualsOffset(int argument_offset, 931cb0ef41Sopenharmony_ci int argument_byte_length, 941cb0ef41Sopenharmony_ci int check_byte_offset); 951cb0ef41Sopenharmony_ci // Adds a check to the sequence node making it only a valid sequence when the 961cb0ef41Sopenharmony_ci // argument of the current bytecode at the specified offset matches the 971cb0ef41Sopenharmony_ci // argument of another bytecode in the sequence. 981cb0ef41Sopenharmony_ci // This is similar to IfArgumentEqualsOffset, except that this method matches 991cb0ef41Sopenharmony_ci // the values of both arguments. 1001cb0ef41Sopenharmony_ci BytecodeSequenceNode& IfArgumentEqualsValueAtOffset( 1011cb0ef41Sopenharmony_ci int argument_offset, int argument_byte_length, 1021cb0ef41Sopenharmony_ci int other_bytecode_index_in_sequence, int other_argument_offset, 1031cb0ef41Sopenharmony_ci int other_argument_byte_length); 1041cb0ef41Sopenharmony_ci // Marks an argument as unused. 1051cb0ef41Sopenharmony_ci // All arguments that are not mapped explicitly have to be marked as unused. 1061cb0ef41Sopenharmony_ci // bytecode_index_in_sequence: Zero-based index of the referred bytecode 1071cb0ef41Sopenharmony_ci // within the sequence (e.g. the bytecode passed to CreateSequence() has 1081cb0ef41Sopenharmony_ci // index 0). 1091cb0ef41Sopenharmony_ci // argument_offset: Zero-based offset to the argument within the bytecode 1101cb0ef41Sopenharmony_ci // (e.g. the first argument that's not packed with the bytecode has offset 4). 1111cb0ef41Sopenharmony_ci // argument_byte_length: Length of the argument. 1121cb0ef41Sopenharmony_ci BytecodeSequenceNode& IgnoreArgument(int bytecode_index_in_sequence, 1131cb0ef41Sopenharmony_ci int argument_offset, 1141cb0ef41Sopenharmony_ci int argument_byte_length); 1151cb0ef41Sopenharmony_ci // Checks if the current node is valid for the sequence. I.e. all conditions 1161cb0ef41Sopenharmony_ci // set by IfArgumentEqualsOffset and IfArgumentEquals are fulfilled by this 1171cb0ef41Sopenharmony_ci // node for the actual bytecode sequence. 1181cb0ef41Sopenharmony_ci bool CheckArguments(const byte* bytecode, int pc); 1191cb0ef41Sopenharmony_ci // Returns whether this node marks the end of a valid sequence (i.e. can be 1201cb0ef41Sopenharmony_ci // replaced with an optimized bytecode). 1211cb0ef41Sopenharmony_ci bool IsSequence() const; 1221cb0ef41Sopenharmony_ci // Returns the length of the sequence in bytes. 1231cb0ef41Sopenharmony_ci int SequenceLength() const; 1241cb0ef41Sopenharmony_ci // Returns the optimized bytecode for the node or kDummyBytecode if it is not 1251cb0ef41Sopenharmony_ci // the end of a valid sequence. 1261cb0ef41Sopenharmony_ci int OptimizedBytecode() const; 1271cb0ef41Sopenharmony_ci // Returns the child of the current node matching the given bytecode or 1281cb0ef41Sopenharmony_ci // nullptr if no such child is found. 1291cb0ef41Sopenharmony_ci BytecodeSequenceNode* Find(int bytecode) const; 1301cb0ef41Sopenharmony_ci // Returns number of arguments mapped to the current node. 1311cb0ef41Sopenharmony_ci // Invoking this method is only allowed on nodes that mark the end of a valid 1321cb0ef41Sopenharmony_ci // sequence (i.e. if IsSequence()) 1331cb0ef41Sopenharmony_ci size_t ArgumentSize() const; 1341cb0ef41Sopenharmony_ci // Returns the argument-mapping of the argument at index. 1351cb0ef41Sopenharmony_ci // Invoking this method is only allowed on nodes that mark the end of a valid 1361cb0ef41Sopenharmony_ci // sequence (i.e. if IsSequence()) 1371cb0ef41Sopenharmony_ci BytecodeArgumentMapping ArgumentMapping(size_t index) const; 1381cb0ef41Sopenharmony_ci // Returns an iterator to begin of ignored arguments. 1391cb0ef41Sopenharmony_ci // Invoking this method is only allowed on nodes that mark the end of a valid 1401cb0ef41Sopenharmony_ci // sequence (i.e. if IsSequence()) 1411cb0ef41Sopenharmony_ci ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredBegin() const; 1421cb0ef41Sopenharmony_ci // Returns an iterator to end of ignored arguments. 1431cb0ef41Sopenharmony_ci // Invoking this method is only allowed on nodes that mark the end of a valid 1441cb0ef41Sopenharmony_ci // sequence (i.e. if IsSequence()) 1451cb0ef41Sopenharmony_ci ZoneLinkedList<BytecodeArgument>::iterator ArgumentIgnoredEnd() const; 1461cb0ef41Sopenharmony_ci // Returns whether the current node has ignored argument or not. 1471cb0ef41Sopenharmony_ci bool HasIgnoredArguments() const; 1481cb0ef41Sopenharmony_ci 1491cb0ef41Sopenharmony_ci private: 1501cb0ef41Sopenharmony_ci // Returns a node in the sequence specified by its index within the sequence. 1511cb0ef41Sopenharmony_ci BytecodeSequenceNode& GetNodeByIndexInSequence(int index_in_sequence); 1521cb0ef41Sopenharmony_ci Zone* zone() const; 1531cb0ef41Sopenharmony_ci 1541cb0ef41Sopenharmony_ci int bytecode_; 1551cb0ef41Sopenharmony_ci int bytecode_replacement_; 1561cb0ef41Sopenharmony_ci int index_in_sequence_; 1571cb0ef41Sopenharmony_ci int start_offset_; 1581cb0ef41Sopenharmony_ci BytecodeSequenceNode* parent_; 1591cb0ef41Sopenharmony_ci ZoneUnorderedMap<int, BytecodeSequenceNode*> children_; 1601cb0ef41Sopenharmony_ci ZoneVector<BytecodeArgumentMapping>* argument_mapping_; 1611cb0ef41Sopenharmony_ci ZoneLinkedList<BytecodeArgumentCheck>* argument_check_; 1621cb0ef41Sopenharmony_ci ZoneLinkedList<BytecodeArgument>* argument_ignored_; 1631cb0ef41Sopenharmony_ci 1641cb0ef41Sopenharmony_ci Zone* zone_; 1651cb0ef41Sopenharmony_ci}; 1661cb0ef41Sopenharmony_ci 1671cb0ef41Sopenharmony_ci// These definitions are here in order to please the linker, which in debug mode 1681cb0ef41Sopenharmony_ci// sometimes requires static constants to be defined in .cc files. 1691cb0ef41Sopenharmony_ciconstexpr int BytecodeSequenceNode::kDummyBytecode; 1701cb0ef41Sopenharmony_ci 1711cb0ef41Sopenharmony_ciclass RegExpBytecodePeephole { 1721cb0ef41Sopenharmony_ci public: 1731cb0ef41Sopenharmony_ci RegExpBytecodePeephole(Zone* zone, size_t buffer_size, 1741cb0ef41Sopenharmony_ci const ZoneUnorderedMap<int, int>& jump_edges); 1751cb0ef41Sopenharmony_ci 1761cb0ef41Sopenharmony_ci // Parses bytecode and fills the internal buffer with the potentially 1771cb0ef41Sopenharmony_ci // optimized bytecode. Returns true when optimizations were performed, false 1781cb0ef41Sopenharmony_ci // otherwise. 1791cb0ef41Sopenharmony_ci bool OptimizeBytecode(const byte* bytecode, int length); 1801cb0ef41Sopenharmony_ci // Copies the internal bytecode buffer to another buffer. The caller is 1811cb0ef41Sopenharmony_ci // responsible for allocating/freeing the memory. 1821cb0ef41Sopenharmony_ci void CopyOptimizedBytecode(byte* to_address) const; 1831cb0ef41Sopenharmony_ci int Length() const; 1841cb0ef41Sopenharmony_ci 1851cb0ef41Sopenharmony_ci private: 1861cb0ef41Sopenharmony_ci // Sets up all sequences that are going to be used. 1871cb0ef41Sopenharmony_ci void DefineStandardSequences(); 1881cb0ef41Sopenharmony_ci // Starts a new bytecode sequence. 1891cb0ef41Sopenharmony_ci BytecodeSequenceNode& CreateSequence(int bytecode); 1901cb0ef41Sopenharmony_ci // Checks for optimization candidates at pc and emits optimized bytecode to 1911cb0ef41Sopenharmony_ci // the internal buffer. Returns the length of replaced bytecodes in bytes. 1921cb0ef41Sopenharmony_ci int TryOptimizeSequence(const byte* bytecode, int bytecode_length, 1931cb0ef41Sopenharmony_ci int start_pc); 1941cb0ef41Sopenharmony_ci // Emits optimized bytecode to the internal buffer. start_pc points to the 1951cb0ef41Sopenharmony_ci // start of the sequence in bytecode and last_node is the last 1961cb0ef41Sopenharmony_ci // BytecodeSequenceNode of the matching sequence found. 1971cb0ef41Sopenharmony_ci void EmitOptimization(int start_pc, const byte* bytecode, 1981cb0ef41Sopenharmony_ci const BytecodeSequenceNode& last_node); 1991cb0ef41Sopenharmony_ci // Adds a relative jump source fixup at pos. 2001cb0ef41Sopenharmony_ci // Jump source fixups are used to find offsets in the new bytecode that 2011cb0ef41Sopenharmony_ci // contain jump sources. 2021cb0ef41Sopenharmony_ci void AddJumpSourceFixup(int fixup, int pos); 2031cb0ef41Sopenharmony_ci // Adds a relative jump destination fixup at pos. 2041cb0ef41Sopenharmony_ci // Jump destination fixups are used to find offsets in the new bytecode that 2051cb0ef41Sopenharmony_ci // can be jumped to. 2061cb0ef41Sopenharmony_ci void AddJumpDestinationFixup(int fixup, int pos); 2071cb0ef41Sopenharmony_ci // Sets an absolute jump destination fixup at pos. 2081cb0ef41Sopenharmony_ci void SetJumpDestinationFixup(int fixup, int pos); 2091cb0ef41Sopenharmony_ci // Prepare internal structures used to fixup jumps. 2101cb0ef41Sopenharmony_ci void PrepareJumpStructures(const ZoneUnorderedMap<int, int>& jump_edges); 2111cb0ef41Sopenharmony_ci // Updates all jump targets in the new bytecode. 2121cb0ef41Sopenharmony_ci void FixJumps(); 2131cb0ef41Sopenharmony_ci // Update a single jump. 2141cb0ef41Sopenharmony_ci void FixJump(int jump_source, int jump_destination); 2151cb0ef41Sopenharmony_ci void AddSentinelFixups(int pos); 2161cb0ef41Sopenharmony_ci template <typename T> 2171cb0ef41Sopenharmony_ci void EmitValue(T value); 2181cb0ef41Sopenharmony_ci template <typename T> 2191cb0ef41Sopenharmony_ci void OverwriteValue(int offset, T value); 2201cb0ef41Sopenharmony_ci void CopyRangeToOutput(const byte* orig_bytecode, int start, int length); 2211cb0ef41Sopenharmony_ci void SetRange(byte value, int count); 2221cb0ef41Sopenharmony_ci void EmitArgument(int start_pc, const byte* bytecode, 2231cb0ef41Sopenharmony_ci BytecodeArgumentMapping arg); 2241cb0ef41Sopenharmony_ci int pc() const; 2251cb0ef41Sopenharmony_ci Zone* zone() const; 2261cb0ef41Sopenharmony_ci 2271cb0ef41Sopenharmony_ci ZoneVector<byte> optimized_bytecode_buffer_; 2281cb0ef41Sopenharmony_ci BytecodeSequenceNode* sequences_; 2291cb0ef41Sopenharmony_ci // Jumps used in old bytecode. 2301cb0ef41Sopenharmony_ci // Key: Jump source (offset where destination is stored in old bytecode) 2311cb0ef41Sopenharmony_ci // Value: Destination 2321cb0ef41Sopenharmony_ci ZoneMap<int, int> jump_edges_; 2331cb0ef41Sopenharmony_ci // Jumps used in new bytecode. 2341cb0ef41Sopenharmony_ci // Key: Jump source (offset where destination is stored in new bytecode) 2351cb0ef41Sopenharmony_ci // Value: Destination 2361cb0ef41Sopenharmony_ci ZoneMap<int, int> jump_edges_mapped_; 2371cb0ef41Sopenharmony_ci // Number of times a jump destination is used within the bytecode. 2381cb0ef41Sopenharmony_ci // Key: Jump destination (offset in old bytecode). 2391cb0ef41Sopenharmony_ci // Value: Number of times jump destination is used. 2401cb0ef41Sopenharmony_ci ZoneMap<int, int> jump_usage_counts_; 2411cb0ef41Sopenharmony_ci // Maps offsets in old bytecode to fixups of sources (delta to new bytecode). 2421cb0ef41Sopenharmony_ci // Key: Offset in old bytecode from where the fixup is valid. 2431cb0ef41Sopenharmony_ci // Value: Delta to map jump source from old bytecode to new bytecode in bytes. 2441cb0ef41Sopenharmony_ci ZoneMap<int, int> jump_source_fixups_; 2451cb0ef41Sopenharmony_ci // Maps offsets in old bytecode to fixups of destinations (delta to new 2461cb0ef41Sopenharmony_ci // bytecode). 2471cb0ef41Sopenharmony_ci // Key: Offset in old bytecode from where the fixup is valid. 2481cb0ef41Sopenharmony_ci // Value: Delta to map jump destinations from old bytecode to new bytecode in 2491cb0ef41Sopenharmony_ci // bytes. 2501cb0ef41Sopenharmony_ci ZoneMap<int, int> jump_destination_fixups_; 2511cb0ef41Sopenharmony_ci 2521cb0ef41Sopenharmony_ci Zone* zone_; 2531cb0ef41Sopenharmony_ci 2541cb0ef41Sopenharmony_ci DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodePeephole); 2551cb0ef41Sopenharmony_ci}; 2561cb0ef41Sopenharmony_ci 2571cb0ef41Sopenharmony_citemplate <typename T> 2581cb0ef41Sopenharmony_ciT GetValue(const byte* buffer, int pos) { 2591cb0ef41Sopenharmony_ci DCHECK(IsAligned(reinterpret_cast<Address>(buffer + pos), alignof(T))); 2601cb0ef41Sopenharmony_ci return *reinterpret_cast<const T*>(buffer + pos); 2611cb0ef41Sopenharmony_ci} 2621cb0ef41Sopenharmony_ci 2631cb0ef41Sopenharmony_ciint32_t GetArgumentValue(const byte* bytecode, int offset, int length) { 2641cb0ef41Sopenharmony_ci switch (length) { 2651cb0ef41Sopenharmony_ci case 1: 2661cb0ef41Sopenharmony_ci return GetValue<byte>(bytecode, offset); 2671cb0ef41Sopenharmony_ci case 2: 2681cb0ef41Sopenharmony_ci return GetValue<int16_t>(bytecode, offset); 2691cb0ef41Sopenharmony_ci case 4: 2701cb0ef41Sopenharmony_ci return GetValue<int32_t>(bytecode, offset); 2711cb0ef41Sopenharmony_ci default: 2721cb0ef41Sopenharmony_ci UNREACHABLE(); 2731cb0ef41Sopenharmony_ci } 2741cb0ef41Sopenharmony_ci} 2751cb0ef41Sopenharmony_ci 2761cb0ef41Sopenharmony_ciBytecodeSequenceNode::BytecodeSequenceNode(int bytecode, Zone* zone) 2771cb0ef41Sopenharmony_ci : bytecode_(bytecode), 2781cb0ef41Sopenharmony_ci bytecode_replacement_(kDummyBytecode), 2791cb0ef41Sopenharmony_ci index_in_sequence_(0), 2801cb0ef41Sopenharmony_ci start_offset_(0), 2811cb0ef41Sopenharmony_ci parent_(nullptr), 2821cb0ef41Sopenharmony_ci children_(ZoneUnorderedMap<int, BytecodeSequenceNode*>(zone)), 2831cb0ef41Sopenharmony_ci argument_mapping_(zone->New<ZoneVector<BytecodeArgumentMapping>>(zone)), 2841cb0ef41Sopenharmony_ci argument_check_(zone->New<ZoneLinkedList<BytecodeArgumentCheck>>(zone)), 2851cb0ef41Sopenharmony_ci argument_ignored_(zone->New<ZoneLinkedList<BytecodeArgument>>(zone)), 2861cb0ef41Sopenharmony_ci zone_(zone) {} 2871cb0ef41Sopenharmony_ci 2881cb0ef41Sopenharmony_ciBytecodeSequenceNode& BytecodeSequenceNode::FollowedBy(int bytecode) { 2891cb0ef41Sopenharmony_ci DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount); 2901cb0ef41Sopenharmony_ci 2911cb0ef41Sopenharmony_ci if (children_.find(bytecode) == children_.end()) { 2921cb0ef41Sopenharmony_ci BytecodeSequenceNode* new_node = 2931cb0ef41Sopenharmony_ci zone()->New<BytecodeSequenceNode>(bytecode, zone()); 2941cb0ef41Sopenharmony_ci // If node is not the first in the sequence, set offsets and parent. 2951cb0ef41Sopenharmony_ci if (bytecode_ != kDummyBytecode) { 2961cb0ef41Sopenharmony_ci new_node->start_offset_ = start_offset_ + RegExpBytecodeLength(bytecode_); 2971cb0ef41Sopenharmony_ci new_node->index_in_sequence_ = index_in_sequence_ + 1; 2981cb0ef41Sopenharmony_ci new_node->parent_ = this; 2991cb0ef41Sopenharmony_ci } 3001cb0ef41Sopenharmony_ci children_[bytecode] = new_node; 3011cb0ef41Sopenharmony_ci } 3021cb0ef41Sopenharmony_ci 3031cb0ef41Sopenharmony_ci return *children_[bytecode]; 3041cb0ef41Sopenharmony_ci} 3051cb0ef41Sopenharmony_ci 3061cb0ef41Sopenharmony_ciBytecodeSequenceNode& BytecodeSequenceNode::ReplaceWith(int bytecode) { 3071cb0ef41Sopenharmony_ci DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount); 3081cb0ef41Sopenharmony_ci 3091cb0ef41Sopenharmony_ci bytecode_replacement_ = bytecode; 3101cb0ef41Sopenharmony_ci 3111cb0ef41Sopenharmony_ci return *this; 3121cb0ef41Sopenharmony_ci} 3131cb0ef41Sopenharmony_ci 3141cb0ef41Sopenharmony_ciBytecodeSequenceNode& BytecodeSequenceNode::MapArgument( 3151cb0ef41Sopenharmony_ci int bytecode_index_in_sequence, int argument_offset, 3161cb0ef41Sopenharmony_ci int argument_byte_length, int new_argument_byte_length) { 3171cb0ef41Sopenharmony_ci DCHECK(IsSequence()); 3181cb0ef41Sopenharmony_ci DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_); 3191cb0ef41Sopenharmony_ci 3201cb0ef41Sopenharmony_ci BytecodeSequenceNode& ref_node = 3211cb0ef41Sopenharmony_ci GetNodeByIndexInSequence(bytecode_index_in_sequence); 3221cb0ef41Sopenharmony_ci DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_)); 3231cb0ef41Sopenharmony_ci 3241cb0ef41Sopenharmony_ci int absolute_offset = ref_node.start_offset_ + argument_offset; 3251cb0ef41Sopenharmony_ci if (new_argument_byte_length == 0) { 3261cb0ef41Sopenharmony_ci new_argument_byte_length = argument_byte_length; 3271cb0ef41Sopenharmony_ci } 3281cb0ef41Sopenharmony_ci 3291cb0ef41Sopenharmony_ci argument_mapping_->push_back(BytecodeArgumentMapping{ 3301cb0ef41Sopenharmony_ci absolute_offset, argument_byte_length, new_argument_byte_length}); 3311cb0ef41Sopenharmony_ci 3321cb0ef41Sopenharmony_ci return *this; 3331cb0ef41Sopenharmony_ci} 3341cb0ef41Sopenharmony_ci 3351cb0ef41Sopenharmony_ciBytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsOffset( 3361cb0ef41Sopenharmony_ci int argument_offset, int argument_byte_length, int check_byte_offset) { 3371cb0ef41Sopenharmony_ci DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_)); 3381cb0ef41Sopenharmony_ci DCHECK(argument_byte_length == 1 || argument_byte_length == 2 || 3391cb0ef41Sopenharmony_ci argument_byte_length == 4); 3401cb0ef41Sopenharmony_ci 3411cb0ef41Sopenharmony_ci int absolute_offset = start_offset_ + argument_offset; 3421cb0ef41Sopenharmony_ci 3431cb0ef41Sopenharmony_ci argument_check_->push_back(BytecodeArgumentCheck{ 3441cb0ef41Sopenharmony_ci absolute_offset, argument_byte_length, check_byte_offset}); 3451cb0ef41Sopenharmony_ci 3461cb0ef41Sopenharmony_ci return *this; 3471cb0ef41Sopenharmony_ci} 3481cb0ef41Sopenharmony_ci 3491cb0ef41Sopenharmony_ciBytecodeSequenceNode& BytecodeSequenceNode::IfArgumentEqualsValueAtOffset( 3501cb0ef41Sopenharmony_ci int argument_offset, int argument_byte_length, 3511cb0ef41Sopenharmony_ci int other_bytecode_index_in_sequence, int other_argument_offset, 3521cb0ef41Sopenharmony_ci int other_argument_byte_length) { 3531cb0ef41Sopenharmony_ci DCHECK_LT(argument_offset, RegExpBytecodeLength(bytecode_)); 3541cb0ef41Sopenharmony_ci DCHECK_LE(other_bytecode_index_in_sequence, index_in_sequence_); 3551cb0ef41Sopenharmony_ci DCHECK_EQ(argument_byte_length, other_argument_byte_length); 3561cb0ef41Sopenharmony_ci 3571cb0ef41Sopenharmony_ci BytecodeSequenceNode& ref_node = 3581cb0ef41Sopenharmony_ci GetNodeByIndexInSequence(other_bytecode_index_in_sequence); 3591cb0ef41Sopenharmony_ci DCHECK_LT(other_argument_offset, RegExpBytecodeLength(ref_node.bytecode_)); 3601cb0ef41Sopenharmony_ci 3611cb0ef41Sopenharmony_ci int absolute_offset = start_offset_ + argument_offset; 3621cb0ef41Sopenharmony_ci int other_absolute_offset = ref_node.start_offset_ + other_argument_offset; 3631cb0ef41Sopenharmony_ci 3641cb0ef41Sopenharmony_ci argument_check_->push_back( 3651cb0ef41Sopenharmony_ci BytecodeArgumentCheck{absolute_offset, argument_byte_length, 3661cb0ef41Sopenharmony_ci other_absolute_offset, other_argument_byte_length}); 3671cb0ef41Sopenharmony_ci 3681cb0ef41Sopenharmony_ci return *this; 3691cb0ef41Sopenharmony_ci} 3701cb0ef41Sopenharmony_ci 3711cb0ef41Sopenharmony_ciBytecodeSequenceNode& BytecodeSequenceNode::IgnoreArgument( 3721cb0ef41Sopenharmony_ci int bytecode_index_in_sequence, int argument_offset, 3731cb0ef41Sopenharmony_ci int argument_byte_length) { 3741cb0ef41Sopenharmony_ci DCHECK(IsSequence()); 3751cb0ef41Sopenharmony_ci DCHECK_LE(bytecode_index_in_sequence, index_in_sequence_); 3761cb0ef41Sopenharmony_ci 3771cb0ef41Sopenharmony_ci BytecodeSequenceNode& ref_node = 3781cb0ef41Sopenharmony_ci GetNodeByIndexInSequence(bytecode_index_in_sequence); 3791cb0ef41Sopenharmony_ci DCHECK_LT(argument_offset, RegExpBytecodeLength(ref_node.bytecode_)); 3801cb0ef41Sopenharmony_ci 3811cb0ef41Sopenharmony_ci int absolute_offset = ref_node.start_offset_ + argument_offset; 3821cb0ef41Sopenharmony_ci 3831cb0ef41Sopenharmony_ci argument_ignored_->push_back( 3841cb0ef41Sopenharmony_ci BytecodeArgument{absolute_offset, argument_byte_length}); 3851cb0ef41Sopenharmony_ci 3861cb0ef41Sopenharmony_ci return *this; 3871cb0ef41Sopenharmony_ci} 3881cb0ef41Sopenharmony_ci 3891cb0ef41Sopenharmony_cibool BytecodeSequenceNode::CheckArguments(const byte* bytecode, int pc) { 3901cb0ef41Sopenharmony_ci bool is_valid = true; 3911cb0ef41Sopenharmony_ci for (auto check_iter = argument_check_->begin(); 3921cb0ef41Sopenharmony_ci check_iter != argument_check_->end() && is_valid; check_iter++) { 3931cb0ef41Sopenharmony_ci auto value = 3941cb0ef41Sopenharmony_ci GetArgumentValue(bytecode, pc + check_iter->offset, check_iter->length); 3951cb0ef41Sopenharmony_ci if (check_iter->type == BytecodeArgumentCheck::kCheckAddress) { 3961cb0ef41Sopenharmony_ci is_valid &= value == pc + check_iter->check_offset; 3971cb0ef41Sopenharmony_ci } else if (check_iter->type == BytecodeArgumentCheck::kCheckValue) { 3981cb0ef41Sopenharmony_ci auto other_value = GetArgumentValue( 3991cb0ef41Sopenharmony_ci bytecode, pc + check_iter->check_offset, check_iter->check_length); 4001cb0ef41Sopenharmony_ci is_valid &= value == other_value; 4011cb0ef41Sopenharmony_ci } else { 4021cb0ef41Sopenharmony_ci UNREACHABLE(); 4031cb0ef41Sopenharmony_ci } 4041cb0ef41Sopenharmony_ci } 4051cb0ef41Sopenharmony_ci return is_valid; 4061cb0ef41Sopenharmony_ci} 4071cb0ef41Sopenharmony_ci 4081cb0ef41Sopenharmony_cibool BytecodeSequenceNode::IsSequence() const { 4091cb0ef41Sopenharmony_ci return bytecode_replacement_ != kDummyBytecode; 4101cb0ef41Sopenharmony_ci} 4111cb0ef41Sopenharmony_ci 4121cb0ef41Sopenharmony_ciint BytecodeSequenceNode::SequenceLength() const { 4131cb0ef41Sopenharmony_ci return start_offset_ + RegExpBytecodeLength(bytecode_); 4141cb0ef41Sopenharmony_ci} 4151cb0ef41Sopenharmony_ci 4161cb0ef41Sopenharmony_ciint BytecodeSequenceNode::OptimizedBytecode() const { 4171cb0ef41Sopenharmony_ci return bytecode_replacement_; 4181cb0ef41Sopenharmony_ci} 4191cb0ef41Sopenharmony_ci 4201cb0ef41Sopenharmony_ciBytecodeSequenceNode* BytecodeSequenceNode::Find(int bytecode) const { 4211cb0ef41Sopenharmony_ci auto found = children_.find(bytecode); 4221cb0ef41Sopenharmony_ci if (found == children_.end()) return nullptr; 4231cb0ef41Sopenharmony_ci return found->second; 4241cb0ef41Sopenharmony_ci} 4251cb0ef41Sopenharmony_ci 4261cb0ef41Sopenharmony_cisize_t BytecodeSequenceNode::ArgumentSize() const { 4271cb0ef41Sopenharmony_ci DCHECK(IsSequence()); 4281cb0ef41Sopenharmony_ci return argument_mapping_->size(); 4291cb0ef41Sopenharmony_ci} 4301cb0ef41Sopenharmony_ci 4311cb0ef41Sopenharmony_ciBytecodeArgumentMapping BytecodeSequenceNode::ArgumentMapping( 4321cb0ef41Sopenharmony_ci size_t index) const { 4331cb0ef41Sopenharmony_ci DCHECK(IsSequence()); 4341cb0ef41Sopenharmony_ci DCHECK(argument_mapping_ != nullptr); 4351cb0ef41Sopenharmony_ci DCHECK_LT(index, argument_mapping_->size()); 4361cb0ef41Sopenharmony_ci 4371cb0ef41Sopenharmony_ci return argument_mapping_->at(index); 4381cb0ef41Sopenharmony_ci} 4391cb0ef41Sopenharmony_ci 4401cb0ef41Sopenharmony_ciZoneLinkedList<BytecodeArgument>::iterator 4411cb0ef41Sopenharmony_ciBytecodeSequenceNode::ArgumentIgnoredBegin() const { 4421cb0ef41Sopenharmony_ci DCHECK(IsSequence()); 4431cb0ef41Sopenharmony_ci DCHECK(argument_ignored_ != nullptr); 4441cb0ef41Sopenharmony_ci return argument_ignored_->begin(); 4451cb0ef41Sopenharmony_ci} 4461cb0ef41Sopenharmony_ci 4471cb0ef41Sopenharmony_ciZoneLinkedList<BytecodeArgument>::iterator 4481cb0ef41Sopenharmony_ciBytecodeSequenceNode::ArgumentIgnoredEnd() const { 4491cb0ef41Sopenharmony_ci DCHECK(IsSequence()); 4501cb0ef41Sopenharmony_ci DCHECK(argument_ignored_ != nullptr); 4511cb0ef41Sopenharmony_ci return argument_ignored_->end(); 4521cb0ef41Sopenharmony_ci} 4531cb0ef41Sopenharmony_ci 4541cb0ef41Sopenharmony_cibool BytecodeSequenceNode::HasIgnoredArguments() const { 4551cb0ef41Sopenharmony_ci return argument_ignored_ != nullptr; 4561cb0ef41Sopenharmony_ci} 4571cb0ef41Sopenharmony_ci 4581cb0ef41Sopenharmony_ciBytecodeSequenceNode& BytecodeSequenceNode::GetNodeByIndexInSequence( 4591cb0ef41Sopenharmony_ci int index_in_sequence) { 4601cb0ef41Sopenharmony_ci DCHECK_LE(index_in_sequence, index_in_sequence_); 4611cb0ef41Sopenharmony_ci 4621cb0ef41Sopenharmony_ci if (index_in_sequence < index_in_sequence_) { 4631cb0ef41Sopenharmony_ci DCHECK(parent_ != nullptr); 4641cb0ef41Sopenharmony_ci return parent_->GetNodeByIndexInSequence(index_in_sequence); 4651cb0ef41Sopenharmony_ci } else { 4661cb0ef41Sopenharmony_ci return *this; 4671cb0ef41Sopenharmony_ci } 4681cb0ef41Sopenharmony_ci} 4691cb0ef41Sopenharmony_ci 4701cb0ef41Sopenharmony_ciZone* BytecodeSequenceNode::zone() const { return zone_; } 4711cb0ef41Sopenharmony_ci 4721cb0ef41Sopenharmony_ciRegExpBytecodePeephole::RegExpBytecodePeephole( 4731cb0ef41Sopenharmony_ci Zone* zone, size_t buffer_size, 4741cb0ef41Sopenharmony_ci const ZoneUnorderedMap<int, int>& jump_edges) 4751cb0ef41Sopenharmony_ci : optimized_bytecode_buffer_(zone), 4761cb0ef41Sopenharmony_ci sequences_(zone->New<BytecodeSequenceNode>( 4771cb0ef41Sopenharmony_ci BytecodeSequenceNode::kDummyBytecode, zone)), 4781cb0ef41Sopenharmony_ci jump_edges_(zone), 4791cb0ef41Sopenharmony_ci jump_edges_mapped_(zone), 4801cb0ef41Sopenharmony_ci jump_usage_counts_(zone), 4811cb0ef41Sopenharmony_ci jump_source_fixups_(zone), 4821cb0ef41Sopenharmony_ci jump_destination_fixups_(zone), 4831cb0ef41Sopenharmony_ci zone_(zone) { 4841cb0ef41Sopenharmony_ci optimized_bytecode_buffer_.reserve(buffer_size); 4851cb0ef41Sopenharmony_ci PrepareJumpStructures(jump_edges); 4861cb0ef41Sopenharmony_ci DefineStandardSequences(); 4871cb0ef41Sopenharmony_ci // Sentinel fixups at beginning of bytecode (position -1) so we don't have to 4881cb0ef41Sopenharmony_ci // check for end of iterator inside the fixup loop. 4891cb0ef41Sopenharmony_ci // In general fixups are deltas of original offsets of jump 4901cb0ef41Sopenharmony_ci // sources/destinations (in the old bytecode) to find them in the new 4911cb0ef41Sopenharmony_ci // bytecode. All jump targets are fixed after the new bytecode is fully 4921cb0ef41Sopenharmony_ci // emitted in the internal buffer. 4931cb0ef41Sopenharmony_ci AddSentinelFixups(-1); 4941cb0ef41Sopenharmony_ci // Sentinel fixups at end of (old) bytecode so we don't have to check for 4951cb0ef41Sopenharmony_ci // end of iterator inside the fixup loop. 4961cb0ef41Sopenharmony_ci DCHECK_LE(buffer_size, std::numeric_limits<int>::max()); 4971cb0ef41Sopenharmony_ci AddSentinelFixups(static_cast<int>(buffer_size)); 4981cb0ef41Sopenharmony_ci} 4991cb0ef41Sopenharmony_ci 5001cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::DefineStandardSequences() { 5011cb0ef41Sopenharmony_ci // Commonly used sequences can be found by creating regexp bytecode traces 5021cb0ef41Sopenharmony_ci // (--trace-regexp-bytecodes) and using v8/tools/regexp-sequences.py. 5031cb0ef41Sopenharmony_ci CreateSequence(BC_LOAD_CURRENT_CHAR) 5041cb0ef41Sopenharmony_ci .FollowedBy(BC_CHECK_BIT_IN_TABLE) 5051cb0ef41Sopenharmony_ci .FollowedBy(BC_ADVANCE_CP_AND_GOTO) 5061cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the 5071cb0ef41Sopenharmony_ci // first bytecode in this sequence. 5081cb0ef41Sopenharmony_ci .IfArgumentEqualsOffset(4, 4, 0) 5091cb0ef41Sopenharmony_ci .ReplaceWith(BC_SKIP_UNTIL_BIT_IN_TABLE) 5101cb0ef41Sopenharmony_ci .MapArgument(0, 1, 3) // load offset 5111cb0ef41Sopenharmony_ci .MapArgument(2, 1, 3, 4) // advance by 5121cb0ef41Sopenharmony_ci .MapArgument(1, 8, 16) // bit table 5131cb0ef41Sopenharmony_ci .MapArgument(1, 4, 4) // goto when match 5141cb0ef41Sopenharmony_ci .MapArgument(0, 4, 4) // goto on failure 5151cb0ef41Sopenharmony_ci .IgnoreArgument(2, 4, 4); // loop jump 5161cb0ef41Sopenharmony_ci 5171cb0ef41Sopenharmony_ci CreateSequence(BC_CHECK_CURRENT_POSITION) 5181cb0ef41Sopenharmony_ci .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED) 5191cb0ef41Sopenharmony_ci .FollowedBy(BC_CHECK_CHAR) 5201cb0ef41Sopenharmony_ci .FollowedBy(BC_ADVANCE_CP_AND_GOTO) 5211cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the 5221cb0ef41Sopenharmony_ci // first bytecode in this sequence. 5231cb0ef41Sopenharmony_ci .IfArgumentEqualsOffset(4, 4, 0) 5241cb0ef41Sopenharmony_ci .ReplaceWith(BC_SKIP_UNTIL_CHAR_POS_CHECKED) 5251cb0ef41Sopenharmony_ci .MapArgument(1, 1, 3) // load offset 5261cb0ef41Sopenharmony_ci .MapArgument(3, 1, 3, 2) // advance_by 5271cb0ef41Sopenharmony_ci .MapArgument(2, 1, 3, 2) // c 5281cb0ef41Sopenharmony_ci .MapArgument(0, 1, 3, 4) // eats at least 5291cb0ef41Sopenharmony_ci .MapArgument(2, 4, 4) // goto when match 5301cb0ef41Sopenharmony_ci .MapArgument(0, 4, 4) // goto on failure 5311cb0ef41Sopenharmony_ci .IgnoreArgument(3, 4, 4); // loop jump 5321cb0ef41Sopenharmony_ci 5331cb0ef41Sopenharmony_ci CreateSequence(BC_CHECK_CURRENT_POSITION) 5341cb0ef41Sopenharmony_ci .FollowedBy(BC_LOAD_CURRENT_CHAR_UNCHECKED) 5351cb0ef41Sopenharmony_ci .FollowedBy(BC_AND_CHECK_CHAR) 5361cb0ef41Sopenharmony_ci .FollowedBy(BC_ADVANCE_CP_AND_GOTO) 5371cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the 5381cb0ef41Sopenharmony_ci // first bytecode in this sequence. 5391cb0ef41Sopenharmony_ci .IfArgumentEqualsOffset(4, 4, 0) 5401cb0ef41Sopenharmony_ci .ReplaceWith(BC_SKIP_UNTIL_CHAR_AND) 5411cb0ef41Sopenharmony_ci .MapArgument(1, 1, 3) // load offset 5421cb0ef41Sopenharmony_ci .MapArgument(3, 1, 3, 2) // advance_by 5431cb0ef41Sopenharmony_ci .MapArgument(2, 1, 3, 2) // c 5441cb0ef41Sopenharmony_ci .MapArgument(2, 4, 4) // mask 5451cb0ef41Sopenharmony_ci .MapArgument(0, 1, 3, 4) // eats at least 5461cb0ef41Sopenharmony_ci .MapArgument(2, 8, 4) // goto when match 5471cb0ef41Sopenharmony_ci .MapArgument(0, 4, 4) // goto on failure 5481cb0ef41Sopenharmony_ci .IgnoreArgument(3, 4, 4); // loop jump 5491cb0ef41Sopenharmony_ci 5501cb0ef41Sopenharmony_ci // TODO(pthier): It might make sense for short sequences like this one to only 5511cb0ef41Sopenharmony_ci // optimize them if the resulting optimization is not longer than the current 5521cb0ef41Sopenharmony_ci // one. This could be the case if there are jumps inside the sequence and we 5531cb0ef41Sopenharmony_ci // have to replicate parts of the sequence. A method to mark such sequences 5541cb0ef41Sopenharmony_ci // might be useful. 5551cb0ef41Sopenharmony_ci CreateSequence(BC_LOAD_CURRENT_CHAR) 5561cb0ef41Sopenharmony_ci .FollowedBy(BC_CHECK_CHAR) 5571cb0ef41Sopenharmony_ci .FollowedBy(BC_ADVANCE_CP_AND_GOTO) 5581cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the 5591cb0ef41Sopenharmony_ci // first bytecode in this sequence. 5601cb0ef41Sopenharmony_ci .IfArgumentEqualsOffset(4, 4, 0) 5611cb0ef41Sopenharmony_ci .ReplaceWith(BC_SKIP_UNTIL_CHAR) 5621cb0ef41Sopenharmony_ci .MapArgument(0, 1, 3) // load offset 5631cb0ef41Sopenharmony_ci .MapArgument(2, 1, 3, 2) // advance by 5641cb0ef41Sopenharmony_ci .MapArgument(1, 1, 3, 2) // character 5651cb0ef41Sopenharmony_ci .MapArgument(1, 4, 4) // goto when match 5661cb0ef41Sopenharmony_ci .MapArgument(0, 4, 4) // goto on failure 5671cb0ef41Sopenharmony_ci .IgnoreArgument(2, 4, 4); // loop jump 5681cb0ef41Sopenharmony_ci 5691cb0ef41Sopenharmony_ci CreateSequence(BC_LOAD_CURRENT_CHAR) 5701cb0ef41Sopenharmony_ci .FollowedBy(BC_CHECK_CHAR) 5711cb0ef41Sopenharmony_ci .FollowedBy(BC_CHECK_CHAR) 5721cb0ef41Sopenharmony_ci // Sequence is only valid if the jump targets of both CHECK_CHAR bytecodes 5731cb0ef41Sopenharmony_ci // are equal. 5741cb0ef41Sopenharmony_ci .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4) 5751cb0ef41Sopenharmony_ci .FollowedBy(BC_ADVANCE_CP_AND_GOTO) 5761cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the 5771cb0ef41Sopenharmony_ci // first bytecode in this sequence. 5781cb0ef41Sopenharmony_ci .IfArgumentEqualsOffset(4, 4, 0) 5791cb0ef41Sopenharmony_ci .ReplaceWith(BC_SKIP_UNTIL_CHAR_OR_CHAR) 5801cb0ef41Sopenharmony_ci .MapArgument(0, 1, 3) // load offset 5811cb0ef41Sopenharmony_ci .MapArgument(3, 1, 3, 4) // advance by 5821cb0ef41Sopenharmony_ci .MapArgument(1, 1, 3, 2) // character 1 5831cb0ef41Sopenharmony_ci .MapArgument(2, 1, 3, 2) // character 2 5841cb0ef41Sopenharmony_ci .MapArgument(1, 4, 4) // goto when match 5851cb0ef41Sopenharmony_ci .MapArgument(0, 4, 4) // goto on failure 5861cb0ef41Sopenharmony_ci .IgnoreArgument(2, 4, 4) // goto when match 2 5871cb0ef41Sopenharmony_ci .IgnoreArgument(3, 4, 4); // loop jump 5881cb0ef41Sopenharmony_ci 5891cb0ef41Sopenharmony_ci CreateSequence(BC_LOAD_CURRENT_CHAR) 5901cb0ef41Sopenharmony_ci .FollowedBy(BC_CHECK_GT) 5911cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of CHECK_GT is the first 5921cb0ef41Sopenharmony_ci // bytecode AFTER the whole sequence. 5931cb0ef41Sopenharmony_ci .IfArgumentEqualsOffset(4, 4, 56) 5941cb0ef41Sopenharmony_ci .FollowedBy(BC_CHECK_BIT_IN_TABLE) 5951cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of CHECK_BIT_IN_TABLE is 5961cb0ef41Sopenharmony_ci // the ADVANCE_CP_AND_GOTO bytecode at the end of the sequence. 5971cb0ef41Sopenharmony_ci .IfArgumentEqualsOffset(4, 4, 48) 5981cb0ef41Sopenharmony_ci .FollowedBy(BC_GOTO) 5991cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of GOTO is the same as the 6001cb0ef41Sopenharmony_ci // jump target of CHECK_GT (i.e. both jump to the first bytecode AFTER the 6011cb0ef41Sopenharmony_ci // whole sequence. 6021cb0ef41Sopenharmony_ci .IfArgumentEqualsValueAtOffset(4, 4, 1, 4, 4) 6031cb0ef41Sopenharmony_ci .FollowedBy(BC_ADVANCE_CP_AND_GOTO) 6041cb0ef41Sopenharmony_ci // Sequence is only valid if the jump target of ADVANCE_CP_AND_GOTO is the 6051cb0ef41Sopenharmony_ci // first bytecode in this sequence. 6061cb0ef41Sopenharmony_ci .IfArgumentEqualsOffset(4, 4, 0) 6071cb0ef41Sopenharmony_ci .ReplaceWith(BC_SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) 6081cb0ef41Sopenharmony_ci .MapArgument(0, 1, 3) // load offset 6091cb0ef41Sopenharmony_ci .MapArgument(4, 1, 3, 2) // advance by 6101cb0ef41Sopenharmony_ci .MapArgument(1, 1, 3, 2) // character 6111cb0ef41Sopenharmony_ci .MapArgument(2, 8, 16) // bit table 6121cb0ef41Sopenharmony_ci .MapArgument(1, 4, 4) // goto when match 6131cb0ef41Sopenharmony_ci .MapArgument(0, 4, 4) // goto on failure 6141cb0ef41Sopenharmony_ci .IgnoreArgument(2, 4, 4) // indirect loop jump 6151cb0ef41Sopenharmony_ci .IgnoreArgument(3, 4, 4) // jump out of loop 6161cb0ef41Sopenharmony_ci .IgnoreArgument(4, 4, 4); // loop jump 6171cb0ef41Sopenharmony_ci} 6181cb0ef41Sopenharmony_ci 6191cb0ef41Sopenharmony_cibool RegExpBytecodePeephole::OptimizeBytecode(const byte* bytecode, 6201cb0ef41Sopenharmony_ci int length) { 6211cb0ef41Sopenharmony_ci int old_pc = 0; 6221cb0ef41Sopenharmony_ci bool did_optimize = false; 6231cb0ef41Sopenharmony_ci 6241cb0ef41Sopenharmony_ci while (old_pc < length) { 6251cb0ef41Sopenharmony_ci int replaced_len = TryOptimizeSequence(bytecode, length, old_pc); 6261cb0ef41Sopenharmony_ci if (replaced_len > 0) { 6271cb0ef41Sopenharmony_ci old_pc += replaced_len; 6281cb0ef41Sopenharmony_ci did_optimize = true; 6291cb0ef41Sopenharmony_ci } else { 6301cb0ef41Sopenharmony_ci int bc = bytecode[old_pc]; 6311cb0ef41Sopenharmony_ci int bc_len = RegExpBytecodeLength(bc); 6321cb0ef41Sopenharmony_ci CopyRangeToOutput(bytecode, old_pc, bc_len); 6331cb0ef41Sopenharmony_ci old_pc += bc_len; 6341cb0ef41Sopenharmony_ci } 6351cb0ef41Sopenharmony_ci } 6361cb0ef41Sopenharmony_ci 6371cb0ef41Sopenharmony_ci if (did_optimize) { 6381cb0ef41Sopenharmony_ci FixJumps(); 6391cb0ef41Sopenharmony_ci } 6401cb0ef41Sopenharmony_ci 6411cb0ef41Sopenharmony_ci return did_optimize; 6421cb0ef41Sopenharmony_ci} 6431cb0ef41Sopenharmony_ci 6441cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::CopyOptimizedBytecode(byte* to_address) const { 6451cb0ef41Sopenharmony_ci MemCopy(to_address, &(*optimized_bytecode_buffer_.begin()), Length()); 6461cb0ef41Sopenharmony_ci} 6471cb0ef41Sopenharmony_ci 6481cb0ef41Sopenharmony_ciint RegExpBytecodePeephole::Length() const { return pc(); } 6491cb0ef41Sopenharmony_ci 6501cb0ef41Sopenharmony_ciBytecodeSequenceNode& RegExpBytecodePeephole::CreateSequence(int bytecode) { 6511cb0ef41Sopenharmony_ci DCHECK(sequences_ != nullptr); 6521cb0ef41Sopenharmony_ci DCHECK(0 <= bytecode && bytecode < kRegExpBytecodeCount); 6531cb0ef41Sopenharmony_ci 6541cb0ef41Sopenharmony_ci return sequences_->FollowedBy(bytecode); 6551cb0ef41Sopenharmony_ci} 6561cb0ef41Sopenharmony_ci 6571cb0ef41Sopenharmony_ciint RegExpBytecodePeephole::TryOptimizeSequence(const byte* bytecode, 6581cb0ef41Sopenharmony_ci int bytecode_length, 6591cb0ef41Sopenharmony_ci int start_pc) { 6601cb0ef41Sopenharmony_ci BytecodeSequenceNode* seq_node = sequences_; 6611cb0ef41Sopenharmony_ci BytecodeSequenceNode* valid_seq_end = nullptr; 6621cb0ef41Sopenharmony_ci 6631cb0ef41Sopenharmony_ci int current_pc = start_pc; 6641cb0ef41Sopenharmony_ci 6651cb0ef41Sopenharmony_ci // Check for the longest valid sequence matching any of the pre-defined 6661cb0ef41Sopenharmony_ci // sequences in the Trie data structure. 6671cb0ef41Sopenharmony_ci while (current_pc < bytecode_length) { 6681cb0ef41Sopenharmony_ci seq_node = seq_node->Find(bytecode[current_pc]); 6691cb0ef41Sopenharmony_ci if (seq_node == nullptr) break; 6701cb0ef41Sopenharmony_ci if (!seq_node->CheckArguments(bytecode, start_pc)) break; 6711cb0ef41Sopenharmony_ci 6721cb0ef41Sopenharmony_ci if (seq_node->IsSequence()) valid_seq_end = seq_node; 6731cb0ef41Sopenharmony_ci current_pc += RegExpBytecodeLength(bytecode[current_pc]); 6741cb0ef41Sopenharmony_ci } 6751cb0ef41Sopenharmony_ci 6761cb0ef41Sopenharmony_ci if (valid_seq_end) { 6771cb0ef41Sopenharmony_ci EmitOptimization(start_pc, bytecode, *valid_seq_end); 6781cb0ef41Sopenharmony_ci return valid_seq_end->SequenceLength(); 6791cb0ef41Sopenharmony_ci } 6801cb0ef41Sopenharmony_ci 6811cb0ef41Sopenharmony_ci return 0; 6821cb0ef41Sopenharmony_ci} 6831cb0ef41Sopenharmony_ci 6841cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::EmitOptimization( 6851cb0ef41Sopenharmony_ci int start_pc, const byte* bytecode, const BytecodeSequenceNode& last_node) { 6861cb0ef41Sopenharmony_ci#ifdef DEBUG 6871cb0ef41Sopenharmony_ci int optimized_start_pc = pc(); 6881cb0ef41Sopenharmony_ci#endif 6891cb0ef41Sopenharmony_ci // Jump sources that are mapped or marked as unused will be deleted at the end 6901cb0ef41Sopenharmony_ci // of this method. We don't delete them immediately as we might need the 6911cb0ef41Sopenharmony_ci // information when we have to preserve bytecodes at the end. 6921cb0ef41Sopenharmony_ci // TODO(pthier): Replace with a stack-allocated data structure. 6931cb0ef41Sopenharmony_ci ZoneLinkedList<int> delete_jumps = ZoneLinkedList<int>(zone()); 6941cb0ef41Sopenharmony_ci 6951cb0ef41Sopenharmony_ci uint32_t bc = last_node.OptimizedBytecode(); 6961cb0ef41Sopenharmony_ci EmitValue(bc); 6971cb0ef41Sopenharmony_ci 6981cb0ef41Sopenharmony_ci for (size_t arg = 0; arg < last_node.ArgumentSize(); arg++) { 6991cb0ef41Sopenharmony_ci BytecodeArgumentMapping arg_map = last_node.ArgumentMapping(arg); 7001cb0ef41Sopenharmony_ci int arg_pos = start_pc + arg_map.offset; 7011cb0ef41Sopenharmony_ci // If we map any jump source we mark the old source for deletion and insert 7021cb0ef41Sopenharmony_ci // a new jump. 7031cb0ef41Sopenharmony_ci auto jump_edge_iter = jump_edges_.find(arg_pos); 7041cb0ef41Sopenharmony_ci if (jump_edge_iter != jump_edges_.end()) { 7051cb0ef41Sopenharmony_ci int jump_source = jump_edge_iter->first; 7061cb0ef41Sopenharmony_ci int jump_destination = jump_edge_iter->second; 7071cb0ef41Sopenharmony_ci // Add new jump edge add current position. 7081cb0ef41Sopenharmony_ci jump_edges_mapped_.emplace(Length(), jump_destination); 7091cb0ef41Sopenharmony_ci // Mark old jump edge for deletion. 7101cb0ef41Sopenharmony_ci delete_jumps.push_back(jump_source); 7111cb0ef41Sopenharmony_ci // Decrement usage count of jump destination. 7121cb0ef41Sopenharmony_ci auto jump_count_iter = jump_usage_counts_.find(jump_destination); 7131cb0ef41Sopenharmony_ci DCHECK(jump_count_iter != jump_usage_counts_.end()); 7141cb0ef41Sopenharmony_ci int& usage_count = jump_count_iter->second; 7151cb0ef41Sopenharmony_ci --usage_count; 7161cb0ef41Sopenharmony_ci } 7171cb0ef41Sopenharmony_ci // TODO(pthier): DCHECK that mapped arguments are never sources of jumps 7181cb0ef41Sopenharmony_ci // to destinations inside the sequence. 7191cb0ef41Sopenharmony_ci EmitArgument(start_pc, bytecode, arg_map); 7201cb0ef41Sopenharmony_ci } 7211cb0ef41Sopenharmony_ci DCHECK_EQ(pc(), optimized_start_pc + 7221cb0ef41Sopenharmony_ci RegExpBytecodeLength(last_node.OptimizedBytecode())); 7231cb0ef41Sopenharmony_ci 7241cb0ef41Sopenharmony_ci // Remove jumps from arguments we ignore. 7251cb0ef41Sopenharmony_ci if (last_node.HasIgnoredArguments()) { 7261cb0ef41Sopenharmony_ci for (auto ignored_arg = last_node.ArgumentIgnoredBegin(); 7271cb0ef41Sopenharmony_ci ignored_arg != last_node.ArgumentIgnoredEnd(); ignored_arg++) { 7281cb0ef41Sopenharmony_ci auto jump_edge_iter = jump_edges_.find(start_pc + ignored_arg->offset); 7291cb0ef41Sopenharmony_ci if (jump_edge_iter != jump_edges_.end()) { 7301cb0ef41Sopenharmony_ci int jump_source = jump_edge_iter->first; 7311cb0ef41Sopenharmony_ci int jump_destination = jump_edge_iter->second; 7321cb0ef41Sopenharmony_ci // Mark old jump edge for deletion. 7331cb0ef41Sopenharmony_ci delete_jumps.push_back(jump_source); 7341cb0ef41Sopenharmony_ci // Decrement usage count of jump destination. 7351cb0ef41Sopenharmony_ci auto jump_count_iter = jump_usage_counts_.find(jump_destination); 7361cb0ef41Sopenharmony_ci DCHECK(jump_count_iter != jump_usage_counts_.end()); 7371cb0ef41Sopenharmony_ci int& usage_count = jump_count_iter->second; 7381cb0ef41Sopenharmony_ci --usage_count; 7391cb0ef41Sopenharmony_ci } 7401cb0ef41Sopenharmony_ci } 7411cb0ef41Sopenharmony_ci } 7421cb0ef41Sopenharmony_ci 7431cb0ef41Sopenharmony_ci int fixup_length = RegExpBytecodeLength(bc) - last_node.SequenceLength(); 7441cb0ef41Sopenharmony_ci 7451cb0ef41Sopenharmony_ci // Check if there are any jumps inside the old sequence. 7461cb0ef41Sopenharmony_ci // If so we have to keep the bytecodes that are jumped to around. 7471cb0ef41Sopenharmony_ci auto jump_destination_candidate = jump_usage_counts_.upper_bound(start_pc); 7481cb0ef41Sopenharmony_ci int jump_candidate_destination = jump_destination_candidate->first; 7491cb0ef41Sopenharmony_ci int jump_candidate_count = jump_destination_candidate->second; 7501cb0ef41Sopenharmony_ci // Jump destinations only jumped to from inside the sequence will be ignored. 7511cb0ef41Sopenharmony_ci while (jump_destination_candidate != jump_usage_counts_.end() && 7521cb0ef41Sopenharmony_ci jump_candidate_count == 0) { 7531cb0ef41Sopenharmony_ci ++jump_destination_candidate; 7541cb0ef41Sopenharmony_ci jump_candidate_destination = jump_destination_candidate->first; 7551cb0ef41Sopenharmony_ci jump_candidate_count = jump_destination_candidate->second; 7561cb0ef41Sopenharmony_ci } 7571cb0ef41Sopenharmony_ci 7581cb0ef41Sopenharmony_ci int preserve_from = start_pc + last_node.SequenceLength(); 7591cb0ef41Sopenharmony_ci if (jump_destination_candidate != jump_usage_counts_.end() && 7601cb0ef41Sopenharmony_ci jump_candidate_destination < start_pc + last_node.SequenceLength()) { 7611cb0ef41Sopenharmony_ci preserve_from = jump_candidate_destination; 7621cb0ef41Sopenharmony_ci // Check if any jump in the sequence we are preserving has a jump 7631cb0ef41Sopenharmony_ci // destination inside the optimized sequence before the current position we 7641cb0ef41Sopenharmony_ci // want to preserve. If so we have to preserve all bytecodes starting at 7651cb0ef41Sopenharmony_ci // this jump destination. 7661cb0ef41Sopenharmony_ci for (auto jump_iter = jump_edges_.lower_bound(preserve_from); 7671cb0ef41Sopenharmony_ci jump_iter != jump_edges_.end() && 7681cb0ef41Sopenharmony_ci jump_iter->first /* jump source */ < 7691cb0ef41Sopenharmony_ci start_pc + last_node.SequenceLength(); 7701cb0ef41Sopenharmony_ci ++jump_iter) { 7711cb0ef41Sopenharmony_ci int jump_destination = jump_iter->second; 7721cb0ef41Sopenharmony_ci if (jump_destination > start_pc && jump_destination < preserve_from) { 7731cb0ef41Sopenharmony_ci preserve_from = jump_destination; 7741cb0ef41Sopenharmony_ci } 7751cb0ef41Sopenharmony_ci } 7761cb0ef41Sopenharmony_ci 7771cb0ef41Sopenharmony_ci // We preserve everything to the end of the sequence. This is conservative 7781cb0ef41Sopenharmony_ci // since it would be enough to preserve all bytecudes up to an unconditional 7791cb0ef41Sopenharmony_ci // jump. 7801cb0ef41Sopenharmony_ci int preserve_length = start_pc + last_node.SequenceLength() - preserve_from; 7811cb0ef41Sopenharmony_ci fixup_length += preserve_length; 7821cb0ef41Sopenharmony_ci // Jumps after the start of the preserved sequence need fixup. 7831cb0ef41Sopenharmony_ci AddJumpSourceFixup(fixup_length, 7841cb0ef41Sopenharmony_ci start_pc + last_node.SequenceLength() - preserve_length); 7851cb0ef41Sopenharmony_ci // All jump targets after the start of the optimized sequence need to be 7861cb0ef41Sopenharmony_ci // fixed relative to the length of the optimized sequence including 7871cb0ef41Sopenharmony_ci // bytecodes we preserved. 7881cb0ef41Sopenharmony_ci AddJumpDestinationFixup(fixup_length, start_pc + 1); 7891cb0ef41Sopenharmony_ci // Jumps to the sequence we preserved need absolute fixup as they could 7901cb0ef41Sopenharmony_ci // occur before or after the sequence. 7911cb0ef41Sopenharmony_ci SetJumpDestinationFixup(pc() - preserve_from, preserve_from); 7921cb0ef41Sopenharmony_ci CopyRangeToOutput(bytecode, preserve_from, preserve_length); 7931cb0ef41Sopenharmony_ci } else { 7941cb0ef41Sopenharmony_ci AddJumpDestinationFixup(fixup_length, start_pc + 1); 7951cb0ef41Sopenharmony_ci // Jumps after the end of the old sequence need fixup. 7961cb0ef41Sopenharmony_ci AddJumpSourceFixup(fixup_length, start_pc + last_node.SequenceLength()); 7971cb0ef41Sopenharmony_ci } 7981cb0ef41Sopenharmony_ci 7991cb0ef41Sopenharmony_ci // Delete jumps we definitely don't need anymore 8001cb0ef41Sopenharmony_ci for (int del : delete_jumps) { 8011cb0ef41Sopenharmony_ci if (del < preserve_from) { 8021cb0ef41Sopenharmony_ci jump_edges_.erase(del); 8031cb0ef41Sopenharmony_ci } 8041cb0ef41Sopenharmony_ci } 8051cb0ef41Sopenharmony_ci} 8061cb0ef41Sopenharmony_ci 8071cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::AddJumpSourceFixup(int fixup, int pos) { 8081cb0ef41Sopenharmony_ci auto previous_fixup = jump_source_fixups_.lower_bound(pos); 8091cb0ef41Sopenharmony_ci DCHECK(previous_fixup != jump_source_fixups_.end()); 8101cb0ef41Sopenharmony_ci DCHECK(previous_fixup != jump_source_fixups_.begin()); 8111cb0ef41Sopenharmony_ci 8121cb0ef41Sopenharmony_ci int previous_fixup_value = (--previous_fixup)->second; 8131cb0ef41Sopenharmony_ci jump_source_fixups_[pos] = previous_fixup_value + fixup; 8141cb0ef41Sopenharmony_ci} 8151cb0ef41Sopenharmony_ci 8161cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::AddJumpDestinationFixup(int fixup, int pos) { 8171cb0ef41Sopenharmony_ci auto previous_fixup = jump_destination_fixups_.lower_bound(pos); 8181cb0ef41Sopenharmony_ci DCHECK(previous_fixup != jump_destination_fixups_.end()); 8191cb0ef41Sopenharmony_ci DCHECK(previous_fixup != jump_destination_fixups_.begin()); 8201cb0ef41Sopenharmony_ci 8211cb0ef41Sopenharmony_ci int previous_fixup_value = (--previous_fixup)->second; 8221cb0ef41Sopenharmony_ci jump_destination_fixups_[pos] = previous_fixup_value + fixup; 8231cb0ef41Sopenharmony_ci} 8241cb0ef41Sopenharmony_ci 8251cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::SetJumpDestinationFixup(int fixup, int pos) { 8261cb0ef41Sopenharmony_ci auto previous_fixup = jump_destination_fixups_.lower_bound(pos); 8271cb0ef41Sopenharmony_ci DCHECK(previous_fixup != jump_destination_fixups_.end()); 8281cb0ef41Sopenharmony_ci DCHECK(previous_fixup != jump_destination_fixups_.begin()); 8291cb0ef41Sopenharmony_ci 8301cb0ef41Sopenharmony_ci int previous_fixup_value = (--previous_fixup)->second; 8311cb0ef41Sopenharmony_ci jump_destination_fixups_.emplace(pos, fixup); 8321cb0ef41Sopenharmony_ci jump_destination_fixups_.emplace(pos + 1, previous_fixup_value); 8331cb0ef41Sopenharmony_ci} 8341cb0ef41Sopenharmony_ci 8351cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::PrepareJumpStructures( 8361cb0ef41Sopenharmony_ci const ZoneUnorderedMap<int, int>& jump_edges) { 8371cb0ef41Sopenharmony_ci for (auto jump_edge : jump_edges) { 8381cb0ef41Sopenharmony_ci int jump_source = jump_edge.first; 8391cb0ef41Sopenharmony_ci int jump_destination = jump_edge.second; 8401cb0ef41Sopenharmony_ci 8411cb0ef41Sopenharmony_ci jump_edges_.emplace(jump_source, jump_destination); 8421cb0ef41Sopenharmony_ci jump_usage_counts_[jump_destination]++; 8431cb0ef41Sopenharmony_ci } 8441cb0ef41Sopenharmony_ci} 8451cb0ef41Sopenharmony_ci 8461cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::FixJumps() { 8471cb0ef41Sopenharmony_ci int position_fixup = 0; 8481cb0ef41Sopenharmony_ci // Next position where fixup changes. 8491cb0ef41Sopenharmony_ci auto next_source_fixup = jump_source_fixups_.lower_bound(0); 8501cb0ef41Sopenharmony_ci int next_source_fixup_offset = next_source_fixup->first; 8511cb0ef41Sopenharmony_ci int next_source_fixup_value = next_source_fixup->second; 8521cb0ef41Sopenharmony_ci 8531cb0ef41Sopenharmony_ci for (auto jump_edge : jump_edges_) { 8541cb0ef41Sopenharmony_ci int jump_source = jump_edge.first; 8551cb0ef41Sopenharmony_ci int jump_destination = jump_edge.second; 8561cb0ef41Sopenharmony_ci while (jump_source >= next_source_fixup_offset) { 8571cb0ef41Sopenharmony_ci position_fixup = next_source_fixup_value; 8581cb0ef41Sopenharmony_ci ++next_source_fixup; 8591cb0ef41Sopenharmony_ci next_source_fixup_offset = next_source_fixup->first; 8601cb0ef41Sopenharmony_ci next_source_fixup_value = next_source_fixup->second; 8611cb0ef41Sopenharmony_ci } 8621cb0ef41Sopenharmony_ci jump_source += position_fixup; 8631cb0ef41Sopenharmony_ci 8641cb0ef41Sopenharmony_ci FixJump(jump_source, jump_destination); 8651cb0ef41Sopenharmony_ci } 8661cb0ef41Sopenharmony_ci 8671cb0ef41Sopenharmony_ci // Mapped jump edges don't need source fixups, as the position already is an 8681cb0ef41Sopenharmony_ci // offset in the new bytecode. 8691cb0ef41Sopenharmony_ci for (auto jump_edge : jump_edges_mapped_) { 8701cb0ef41Sopenharmony_ci int jump_source = jump_edge.first; 8711cb0ef41Sopenharmony_ci int jump_destination = jump_edge.second; 8721cb0ef41Sopenharmony_ci 8731cb0ef41Sopenharmony_ci FixJump(jump_source, jump_destination); 8741cb0ef41Sopenharmony_ci } 8751cb0ef41Sopenharmony_ci} 8761cb0ef41Sopenharmony_ci 8771cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::FixJump(int jump_source, int jump_destination) { 8781cb0ef41Sopenharmony_ci int fixed_jump_destination = 8791cb0ef41Sopenharmony_ci jump_destination + 8801cb0ef41Sopenharmony_ci (--jump_destination_fixups_.upper_bound(jump_destination))->second; 8811cb0ef41Sopenharmony_ci DCHECK_LT(fixed_jump_destination, Length()); 8821cb0ef41Sopenharmony_ci#ifdef DEBUG 8831cb0ef41Sopenharmony_ci // TODO(pthier): This check could be better if we track the bytecodes 8841cb0ef41Sopenharmony_ci // actually used and check if we jump to one of them. 8851cb0ef41Sopenharmony_ci byte jump_bc = optimized_bytecode_buffer_[fixed_jump_destination]; 8861cb0ef41Sopenharmony_ci DCHECK_GT(jump_bc, 0); 8871cb0ef41Sopenharmony_ci DCHECK_LT(jump_bc, kRegExpBytecodeCount); 8881cb0ef41Sopenharmony_ci#endif 8891cb0ef41Sopenharmony_ci 8901cb0ef41Sopenharmony_ci if (jump_destination != fixed_jump_destination) { 8911cb0ef41Sopenharmony_ci OverwriteValue<uint32_t>(jump_source, fixed_jump_destination); 8921cb0ef41Sopenharmony_ci } 8931cb0ef41Sopenharmony_ci} 8941cb0ef41Sopenharmony_ci 8951cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::AddSentinelFixups(int pos) { 8961cb0ef41Sopenharmony_ci jump_source_fixups_.emplace(pos, 0); 8971cb0ef41Sopenharmony_ci jump_destination_fixups_.emplace(pos, 0); 8981cb0ef41Sopenharmony_ci} 8991cb0ef41Sopenharmony_ci 9001cb0ef41Sopenharmony_citemplate <typename T> 9011cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::EmitValue(T value) { 9021cb0ef41Sopenharmony_ci DCHECK(optimized_bytecode_buffer_.begin() + pc() == 9031cb0ef41Sopenharmony_ci optimized_bytecode_buffer_.end()); 9041cb0ef41Sopenharmony_ci byte* value_byte_iter = reinterpret_cast<byte*>(&value); 9051cb0ef41Sopenharmony_ci optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), 9061cb0ef41Sopenharmony_ci value_byte_iter, 9071cb0ef41Sopenharmony_ci value_byte_iter + sizeof(T)); 9081cb0ef41Sopenharmony_ci} 9091cb0ef41Sopenharmony_ci 9101cb0ef41Sopenharmony_citemplate <typename T> 9111cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::OverwriteValue(int offset, T value) { 9121cb0ef41Sopenharmony_ci byte* value_byte_iter = reinterpret_cast<byte*>(&value); 9131cb0ef41Sopenharmony_ci byte* value_byte_iter_end = value_byte_iter + sizeof(T); 9141cb0ef41Sopenharmony_ci while (value_byte_iter < value_byte_iter_end) { 9151cb0ef41Sopenharmony_ci optimized_bytecode_buffer_[offset++] = *value_byte_iter++; 9161cb0ef41Sopenharmony_ci } 9171cb0ef41Sopenharmony_ci} 9181cb0ef41Sopenharmony_ci 9191cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::CopyRangeToOutput(const byte* orig_bytecode, 9201cb0ef41Sopenharmony_ci int start, int length) { 9211cb0ef41Sopenharmony_ci DCHECK(optimized_bytecode_buffer_.begin() + pc() == 9221cb0ef41Sopenharmony_ci optimized_bytecode_buffer_.end()); 9231cb0ef41Sopenharmony_ci optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), 9241cb0ef41Sopenharmony_ci orig_bytecode + start, 9251cb0ef41Sopenharmony_ci orig_bytecode + start + length); 9261cb0ef41Sopenharmony_ci} 9271cb0ef41Sopenharmony_ci 9281cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::SetRange(byte value, int count) { 9291cb0ef41Sopenharmony_ci DCHECK(optimized_bytecode_buffer_.begin() + pc() == 9301cb0ef41Sopenharmony_ci optimized_bytecode_buffer_.end()); 9311cb0ef41Sopenharmony_ci optimized_bytecode_buffer_.insert(optimized_bytecode_buffer_.end(), count, 9321cb0ef41Sopenharmony_ci value); 9331cb0ef41Sopenharmony_ci} 9341cb0ef41Sopenharmony_ci 9351cb0ef41Sopenharmony_civoid RegExpBytecodePeephole::EmitArgument(int start_pc, const byte* bytecode, 9361cb0ef41Sopenharmony_ci BytecodeArgumentMapping arg) { 9371cb0ef41Sopenharmony_ci int arg_pos = start_pc + arg.offset; 9381cb0ef41Sopenharmony_ci switch (arg.length) { 9391cb0ef41Sopenharmony_ci case 1: 9401cb0ef41Sopenharmony_ci DCHECK_EQ(arg.new_length, arg.length); 9411cb0ef41Sopenharmony_ci EmitValue(GetValue<byte>(bytecode, arg_pos)); 9421cb0ef41Sopenharmony_ci break; 9431cb0ef41Sopenharmony_ci case 2: 9441cb0ef41Sopenharmony_ci DCHECK_EQ(arg.new_length, arg.length); 9451cb0ef41Sopenharmony_ci EmitValue(GetValue<uint16_t>(bytecode, arg_pos)); 9461cb0ef41Sopenharmony_ci break; 9471cb0ef41Sopenharmony_ci case 3: { 9481cb0ef41Sopenharmony_ci // Length 3 only occurs in 'packed' arguments where the lowermost byte is 9491cb0ef41Sopenharmony_ci // the current bytecode, and the remaining 3 bytes are the packed value. 9501cb0ef41Sopenharmony_ci // 9511cb0ef41Sopenharmony_ci // We load 4 bytes from position - 1 and shift out the bytecode. 9521cb0ef41Sopenharmony_ci#ifdef V8_TARGET_BIG_ENDIAN 9531cb0ef41Sopenharmony_ci UNIMPLEMENTED(); 9541cb0ef41Sopenharmony_ci int32_t val = 0; 9551cb0ef41Sopenharmony_ci#else 9561cb0ef41Sopenharmony_ci int32_t val = GetValue<int32_t>(bytecode, arg_pos - 1) >> kBitsPerByte; 9571cb0ef41Sopenharmony_ci#endif // V8_TARGET_BIG_ENDIAN 9581cb0ef41Sopenharmony_ci 9591cb0ef41Sopenharmony_ci switch (arg.new_length) { 9601cb0ef41Sopenharmony_ci case 2: 9611cb0ef41Sopenharmony_ci EmitValue<uint16_t>(val); 9621cb0ef41Sopenharmony_ci break; 9631cb0ef41Sopenharmony_ci case 3: { 9641cb0ef41Sopenharmony_ci // Pack with previously emitted value. 9651cb0ef41Sopenharmony_ci auto prev_val = 9661cb0ef41Sopenharmony_ci GetValue<int32_t>(&(*optimized_bytecode_buffer_.begin()), 9671cb0ef41Sopenharmony_ci Length() - sizeof(uint32_t)); 9681cb0ef41Sopenharmony_ci#ifdef V8_TARGET_BIG_ENDIAN 9691cb0ef41Sopenharmony_ci UNIMPLEMENTED(); 9701cb0ef41Sopenharmony_ci USE(prev_val); 9711cb0ef41Sopenharmony_ci#else 9721cb0ef41Sopenharmony_ci DCHECK_EQ(prev_val & 0xFFFFFF00, 0); 9731cb0ef41Sopenharmony_ci OverwriteValue<uint32_t>( 9741cb0ef41Sopenharmony_ci pc() - sizeof(uint32_t), 9751cb0ef41Sopenharmony_ci (static_cast<uint32_t>(val) << 8) | (prev_val & 0xFF)); 9761cb0ef41Sopenharmony_ci#endif // V8_TARGET_BIG_ENDIAN 9771cb0ef41Sopenharmony_ci break; 9781cb0ef41Sopenharmony_ci } 9791cb0ef41Sopenharmony_ci case 4: 9801cb0ef41Sopenharmony_ci EmitValue<uint32_t>(val); 9811cb0ef41Sopenharmony_ci break; 9821cb0ef41Sopenharmony_ci } 9831cb0ef41Sopenharmony_ci break; 9841cb0ef41Sopenharmony_ci } 9851cb0ef41Sopenharmony_ci case 4: 9861cb0ef41Sopenharmony_ci DCHECK_EQ(arg.new_length, arg.length); 9871cb0ef41Sopenharmony_ci EmitValue(GetValue<uint32_t>(bytecode, arg_pos)); 9881cb0ef41Sopenharmony_ci break; 9891cb0ef41Sopenharmony_ci case 8: 9901cb0ef41Sopenharmony_ci DCHECK_EQ(arg.new_length, arg.length); 9911cb0ef41Sopenharmony_ci EmitValue(GetValue<uint64_t>(bytecode, arg_pos)); 9921cb0ef41Sopenharmony_ci break; 9931cb0ef41Sopenharmony_ci default: 9941cb0ef41Sopenharmony_ci CopyRangeToOutput(bytecode, arg_pos, 9951cb0ef41Sopenharmony_ci std::min(arg.length, arg.new_length)); 9961cb0ef41Sopenharmony_ci if (arg.length < arg.new_length) { 9971cb0ef41Sopenharmony_ci SetRange(0x00, arg.new_length - arg.length); 9981cb0ef41Sopenharmony_ci } 9991cb0ef41Sopenharmony_ci break; 10001cb0ef41Sopenharmony_ci } 10011cb0ef41Sopenharmony_ci} 10021cb0ef41Sopenharmony_ci 10031cb0ef41Sopenharmony_ciint RegExpBytecodePeephole::pc() const { 10041cb0ef41Sopenharmony_ci DCHECK_LE(optimized_bytecode_buffer_.size(), std::numeric_limits<int>::max()); 10051cb0ef41Sopenharmony_ci return static_cast<int>(optimized_bytecode_buffer_.size()); 10061cb0ef41Sopenharmony_ci} 10071cb0ef41Sopenharmony_ci 10081cb0ef41Sopenharmony_ciZone* RegExpBytecodePeephole::zone() const { return zone_; } 10091cb0ef41Sopenharmony_ci 10101cb0ef41Sopenharmony_ci} // namespace 10111cb0ef41Sopenharmony_ci 10121cb0ef41Sopenharmony_ci// static 10131cb0ef41Sopenharmony_ciHandle<ByteArray> RegExpBytecodePeepholeOptimization::OptimizeBytecode( 10141cb0ef41Sopenharmony_ci Isolate* isolate, Zone* zone, Handle<String> source, const byte* bytecode, 10151cb0ef41Sopenharmony_ci int length, const ZoneUnorderedMap<int, int>& jump_edges) { 10161cb0ef41Sopenharmony_ci RegExpBytecodePeephole peephole(zone, length, jump_edges); 10171cb0ef41Sopenharmony_ci bool did_optimize = peephole.OptimizeBytecode(bytecode, length); 10181cb0ef41Sopenharmony_ci Handle<ByteArray> array = isolate->factory()->NewByteArray(peephole.Length()); 10191cb0ef41Sopenharmony_ci peephole.CopyOptimizedBytecode(array->GetDataStartAddress()); 10201cb0ef41Sopenharmony_ci 10211cb0ef41Sopenharmony_ci if (did_optimize && FLAG_trace_regexp_peephole_optimization) { 10221cb0ef41Sopenharmony_ci PrintF("Original Bytecode:\n"); 10231cb0ef41Sopenharmony_ci RegExpBytecodeDisassemble(bytecode, length, source->ToCString().get()); 10241cb0ef41Sopenharmony_ci PrintF("Optimized Bytecode:\n"); 10251cb0ef41Sopenharmony_ci RegExpBytecodeDisassemble(array->GetDataStartAddress(), peephole.Length(), 10261cb0ef41Sopenharmony_ci source->ToCString().get()); 10271cb0ef41Sopenharmony_ci } 10281cb0ef41Sopenharmony_ci 10291cb0ef41Sopenharmony_ci return array; 10301cb0ef41Sopenharmony_ci} 10311cb0ef41Sopenharmony_ci 10321cb0ef41Sopenharmony_ci} // namespace internal 10331cb0ef41Sopenharmony_ci} // namespace v8 1034