11cb0ef41Sopenharmony_ci// Copyright 2012 the V8 project authors. All rights reserved.
21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be
31cb0ef41Sopenharmony_ci// found in the LICENSE file.
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci#ifndef V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
61cb0ef41Sopenharmony_ci#define V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
71cb0ef41Sopenharmony_ci
81cb0ef41Sopenharmony_ci#include "src/base/strings.h"
91cb0ef41Sopenharmony_ci#include "src/codegen/label.h"
101cb0ef41Sopenharmony_ci#include "src/regexp/regexp-macro-assembler.h"
111cb0ef41Sopenharmony_ci
121cb0ef41Sopenharmony_cinamespace v8 {
131cb0ef41Sopenharmony_cinamespace internal {
141cb0ef41Sopenharmony_ci
151cb0ef41Sopenharmony_ci// An assembler/generator for the Irregexp byte code.
161cb0ef41Sopenharmony_ciclass V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
171cb0ef41Sopenharmony_ci public:
181cb0ef41Sopenharmony_ci  // Create an assembler. Instructions and relocation information are emitted
191cb0ef41Sopenharmony_ci  // into a buffer, with the instructions starting from the beginning and the
201cb0ef41Sopenharmony_ci  // relocation information starting from the end of the buffer. See CodeDesc
211cb0ef41Sopenharmony_ci  // for a detailed comment on the layout (globals.h).
221cb0ef41Sopenharmony_ci  //
231cb0ef41Sopenharmony_ci  // The assembler allocates and grows its own buffer, and buffer_size
241cb0ef41Sopenharmony_ci  // determines the initial buffer size. The buffer is owned by the assembler
251cb0ef41Sopenharmony_ci  // and deallocated upon destruction of the assembler.
261cb0ef41Sopenharmony_ci  RegExpBytecodeGenerator(Isolate* isolate, Zone* zone);
271cb0ef41Sopenharmony_ci  ~RegExpBytecodeGenerator() override;
281cb0ef41Sopenharmony_ci  // The byte-code interpreter checks on each push anyway.
291cb0ef41Sopenharmony_ci  int stack_limit_slack() override { return 1; }
301cb0ef41Sopenharmony_ci  bool CanReadUnaligned() const override { return false; }
311cb0ef41Sopenharmony_ci  void Bind(Label* label) override;
321cb0ef41Sopenharmony_ci  void AdvanceCurrentPosition(int by) override;  // Signed cp change.
331cb0ef41Sopenharmony_ci  void PopCurrentPosition() override;
341cb0ef41Sopenharmony_ci  void PushCurrentPosition() override;
351cb0ef41Sopenharmony_ci  void Backtrack() override;
361cb0ef41Sopenharmony_ci  void GoTo(Label* label) override;
371cb0ef41Sopenharmony_ci  void PushBacktrack(Label* label) override;
381cb0ef41Sopenharmony_ci  bool Succeed() override;
391cb0ef41Sopenharmony_ci  void Fail() override;
401cb0ef41Sopenharmony_ci  void PopRegister(int register_index) override;
411cb0ef41Sopenharmony_ci  void PushRegister(int register_index,
421cb0ef41Sopenharmony_ci                    StackCheckFlag check_stack_limit) override;
431cb0ef41Sopenharmony_ci  void AdvanceRegister(int reg, int by) override;  // r[reg] += by.
441cb0ef41Sopenharmony_ci  void SetCurrentPositionFromEnd(int by) override;
451cb0ef41Sopenharmony_ci  void SetRegister(int register_index, int to) override;
461cb0ef41Sopenharmony_ci  void WriteCurrentPositionToRegister(int reg, int cp_offset) override;
471cb0ef41Sopenharmony_ci  void ClearRegisters(int reg_from, int reg_to) override;
481cb0ef41Sopenharmony_ci  void ReadCurrentPositionFromRegister(int reg) override;
491cb0ef41Sopenharmony_ci  void WriteStackPointerToRegister(int reg) override;
501cb0ef41Sopenharmony_ci  void ReadStackPointerFromRegister(int reg) override;
511cb0ef41Sopenharmony_ci  void LoadCurrentCharacterImpl(int cp_offset, Label* on_end_of_input,
521cb0ef41Sopenharmony_ci                                bool check_bounds, int characters,
531cb0ef41Sopenharmony_ci                                int eats_at_least) override;
541cb0ef41Sopenharmony_ci  void CheckCharacter(unsigned c, Label* on_equal) override;
551cb0ef41Sopenharmony_ci  void CheckCharacterAfterAnd(unsigned c, unsigned mask,
561cb0ef41Sopenharmony_ci                              Label* on_equal) override;
571cb0ef41Sopenharmony_ci  void CheckCharacterGT(base::uc16 limit, Label* on_greater) override;
581cb0ef41Sopenharmony_ci  void CheckCharacterLT(base::uc16 limit, Label* on_less) override;
591cb0ef41Sopenharmony_ci  void CheckGreedyLoop(Label* on_tos_equals_current_position) override;
601cb0ef41Sopenharmony_ci  void CheckAtStart(int cp_offset, Label* on_at_start) override;
611cb0ef41Sopenharmony_ci  void CheckNotAtStart(int cp_offset, Label* on_not_at_start) override;
621cb0ef41Sopenharmony_ci  void CheckNotCharacter(unsigned c, Label* on_not_equal) override;
631cb0ef41Sopenharmony_ci  void CheckNotCharacterAfterAnd(unsigned c, unsigned mask,
641cb0ef41Sopenharmony_ci                                 Label* on_not_equal) override;
651cb0ef41Sopenharmony_ci  void CheckNotCharacterAfterMinusAnd(base::uc16 c, base::uc16 minus,
661cb0ef41Sopenharmony_ci                                      base::uc16 mask,
671cb0ef41Sopenharmony_ci                                      Label* on_not_equal) override;
681cb0ef41Sopenharmony_ci  void CheckCharacterInRange(base::uc16 from, base::uc16 to,
691cb0ef41Sopenharmony_ci                             Label* on_in_range) override;
701cb0ef41Sopenharmony_ci  void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
711cb0ef41Sopenharmony_ci                                Label* on_not_in_range) override;
721cb0ef41Sopenharmony_ci  bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
731cb0ef41Sopenharmony_ci                                  Label* on_in_range) override {
741cb0ef41Sopenharmony_ci    // Disabled in the interpreter, because 1) there is no constant pool that
751cb0ef41Sopenharmony_ci    // could store the ByteArray pointer, 2) bytecode size limits are not as
761cb0ef41Sopenharmony_ci    // restrictive as code (e.g. branch distances on arm), 3) bytecode for
771cb0ef41Sopenharmony_ci    // large character classes is already quite compact.
781cb0ef41Sopenharmony_ci    // TODO(jgruber): Consider using BytecodeArrays (with a constant pool)
791cb0ef41Sopenharmony_ci    // instead of plain ByteArrays; then we could implement
801cb0ef41Sopenharmony_ci    // CheckCharacterInRangeArray in the interpreter.
811cb0ef41Sopenharmony_ci    return false;
821cb0ef41Sopenharmony_ci  }
831cb0ef41Sopenharmony_ci  bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
841cb0ef41Sopenharmony_ci                                     Label* on_not_in_range) override {
851cb0ef41Sopenharmony_ci    return false;
861cb0ef41Sopenharmony_ci  }
871cb0ef41Sopenharmony_ci  void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
881cb0ef41Sopenharmony_ci  void CheckNotBackReference(int start_reg, bool read_backward,
891cb0ef41Sopenharmony_ci                             Label* on_no_match) override;
901cb0ef41Sopenharmony_ci  void CheckNotBackReferenceIgnoreCase(int start_reg, bool read_backward,
911cb0ef41Sopenharmony_ci                                       bool unicode,
921cb0ef41Sopenharmony_ci                                       Label* on_no_match) override;
931cb0ef41Sopenharmony_ci  void IfRegisterLT(int register_index, int comparand, Label* if_lt) override;
941cb0ef41Sopenharmony_ci  void IfRegisterGE(int register_index, int comparand, Label* if_ge) override;
951cb0ef41Sopenharmony_ci  void IfRegisterEqPos(int register_index, Label* if_eq) override;
961cb0ef41Sopenharmony_ci
971cb0ef41Sopenharmony_ci  IrregexpImplementation Implementation() override;
981cb0ef41Sopenharmony_ci  Handle<HeapObject> GetCode(Handle<String> source) override;
991cb0ef41Sopenharmony_ci
1001cb0ef41Sopenharmony_ci private:
1011cb0ef41Sopenharmony_ci  void ExpandBuffer();
1021cb0ef41Sopenharmony_ci
1031cb0ef41Sopenharmony_ci  // Code and bitmap emission.
1041cb0ef41Sopenharmony_ci  inline void EmitOrLink(Label* label);
1051cb0ef41Sopenharmony_ci  inline void Emit32(uint32_t x);
1061cb0ef41Sopenharmony_ci  inline void Emit16(uint32_t x);
1071cb0ef41Sopenharmony_ci  inline void Emit8(uint32_t x);
1081cb0ef41Sopenharmony_ci  inline void Emit(uint32_t bc, uint32_t arg);
1091cb0ef41Sopenharmony_ci  inline void Emit(uint32_t bc, int32_t arg);
1101cb0ef41Sopenharmony_ci  // Bytecode buffer.
1111cb0ef41Sopenharmony_ci  int length();
1121cb0ef41Sopenharmony_ci  void Copy(byte* a);
1131cb0ef41Sopenharmony_ci
1141cb0ef41Sopenharmony_ci  // The buffer into which code and relocation info are generated.
1151cb0ef41Sopenharmony_ci  static constexpr int kInitialBufferSize = 1024;
1161cb0ef41Sopenharmony_ci  ZoneVector<byte> buffer_;
1171cb0ef41Sopenharmony_ci
1181cb0ef41Sopenharmony_ci  // The program counter.
1191cb0ef41Sopenharmony_ci  int pc_;
1201cb0ef41Sopenharmony_ci  Label backtrack_;
1211cb0ef41Sopenharmony_ci
1221cb0ef41Sopenharmony_ci  int advance_current_start_;
1231cb0ef41Sopenharmony_ci  int advance_current_offset_;
1241cb0ef41Sopenharmony_ci  int advance_current_end_;
1251cb0ef41Sopenharmony_ci
1261cb0ef41Sopenharmony_ci  // Stores jump edges emitted for the bytecode (used by
1271cb0ef41Sopenharmony_ci  // RegExpBytecodePeepholeOptimization).
1281cb0ef41Sopenharmony_ci  // Key: jump source (offset in buffer_ where jump destination is stored).
1291cb0ef41Sopenharmony_ci  // Value: jump destination (offset in buffer_ to jump to).
1301cb0ef41Sopenharmony_ci  ZoneUnorderedMap<int, int> jump_edges_;
1311cb0ef41Sopenharmony_ci
1321cb0ef41Sopenharmony_ci  Isolate* isolate_;
1331cb0ef41Sopenharmony_ci
1341cb0ef41Sopenharmony_ci  static const int kInvalidPC = -1;
1351cb0ef41Sopenharmony_ci
1361cb0ef41Sopenharmony_ci  DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpBytecodeGenerator);
1371cb0ef41Sopenharmony_ci};
1381cb0ef41Sopenharmony_ci
1391cb0ef41Sopenharmony_ci}  // namespace internal
1401cb0ef41Sopenharmony_ci}  // namespace v8
1411cb0ef41Sopenharmony_ci
1421cb0ef41Sopenharmony_ci#endif  // V8_REGEXP_REGEXP_BYTECODE_GENERATOR_H_
143