1// Copyright 2011 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_REGEXP_REGEXP_BYTECODES_H_ 6#define V8_REGEXP_REGEXP_BYTECODES_H_ 7 8#include "src/base/bounds.h" 9#include "src/base/macros.h" 10#include "src/base/strings.h" 11#include "src/common/globals.h" 12 13namespace v8 { 14namespace internal { 15 16// Maximum number of bytecodes that will be used (next power of 2 of actually 17// defined bytecodes). 18// All slots between the last actually defined bytecode and maximum id will be 19// filled with BREAKs, indicating an invalid operation. This way using 20// BYTECODE_MASK guarantees no OOB access to the dispatch table. 21constexpr int kRegExpPaddedBytecodeCount = 1 << 6; 22constexpr int BYTECODE_MASK = kRegExpPaddedBytecodeCount - 1; 23// The first argument is packed in with the byte code in one word, but so it 24// has 24 bits, but it can be positive and negative so only use 23 bits for 25// positive values. 26const unsigned int MAX_FIRST_ARG = 0x7fffffu; 27const int BYTECODE_SHIFT = 8; 28STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK); 29 30// The list of bytecodes, in format: V(Name, Code, ByteLength). 31// TODO(pthier): Argument offsets of bytecodes should be easily accessible by 32// name or at least by position. 33// TODO(jgruber): More precise types (e.g. int32/uint32 instead of value32). 34#define BYTECODE_ITERATOR(V) \ 35 V(BREAK, 0, 4) /* bc8 */ \ 36 V(PUSH_CP, 1, 4) /* bc8 pad24 */ \ 37 V(PUSH_BT, 2, 8) /* bc8 pad24 offset32 */ \ 38 V(PUSH_REGISTER, 3, 4) /* bc8 reg_idx24 */ \ 39 V(SET_REGISTER_TO_CP, 4, 8) /* bc8 reg_idx24 offset32 */ \ 40 V(SET_CP_TO_REGISTER, 5, 4) /* bc8 reg_idx24 */ \ 41 V(SET_REGISTER_TO_SP, 6, 4) /* bc8 reg_idx24 */ \ 42 V(SET_SP_TO_REGISTER, 7, 4) /* bc8 reg_idx24 */ \ 43 V(SET_REGISTER, 8, 8) /* bc8 reg_idx24 value32 */ \ 44 V(ADVANCE_REGISTER, 9, 8) /* bc8 reg_idx24 value32 */ \ 45 V(POP_CP, 10, 4) /* bc8 pad24 */ \ 46 V(POP_BT, 11, 4) /* bc8 pad24 */ \ 47 V(POP_REGISTER, 12, 4) /* bc8 reg_idx24 */ \ 48 V(FAIL, 13, 4) /* bc8 pad24 */ \ 49 V(SUCCEED, 14, 4) /* bc8 pad24 */ \ 50 V(ADVANCE_CP, 15, 4) /* bc8 offset24 */ \ 51 /* Jump to another bytecode given its offset. */ \ 52 /* Bit Layout: */ \ 53 /* 0x00 - 0x07: 0x10 (fixed) Bytecode */ \ 54 /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ 55 /* 0x20 - 0x3F: Address of bytecode to jump to */ \ 56 V(GOTO, 16, 8) /* bc8 pad24 addr32 */ \ 57 /* Check if offset is in range and load character at given offset. */ \ 58 /* Bit Layout: */ \ 59 /* 0x00 - 0x07: 0x11 (fixed) Bytecode */ \ 60 /* 0x08 - 0x1F: Offset from current position */ \ 61 /* 0x20 - 0x3F: Address of bytecode when load is out of range */ \ 62 V(LOAD_CURRENT_CHAR, 17, 8) /* bc8 offset24 addr32 */ \ 63 /* Load character at given offset without range checks. */ \ 64 /* Bit Layout: */ \ 65 /* 0x00 - 0x07: 0x12 (fixed) Bytecode */ \ 66 /* 0x08 - 0x1F: Offset from current position */ \ 67 V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 4) /* bc8 offset24 */ \ 68 V(LOAD_2_CURRENT_CHARS, 19, 8) /* bc8 offset24 addr32 */ \ 69 V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 4) /* bc8 offset24 */ \ 70 V(LOAD_4_CURRENT_CHARS, 21, 8) /* bc8 offset24 addr32 */ \ 71 V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 4) /* bc8 offset24 */ \ 72 V(CHECK_4_CHARS, 23, 12) /* bc8 pad24 uint32 addr32 */ \ 73 /* Check if current character is equal to a given character */ \ 74 /* Bit Layout: */ \ 75 /* 0x00 - 0x07: 0x19 (fixed) Bytecode */ \ 76 /* 0x08 - 0x0F: 0x00 (unused) Padding */ \ 77 /* 0x10 - 0x1F: Character to check */ \ 78 /* 0x20 - 0x3F: Address of bytecode when matched */ \ 79 V(CHECK_CHAR, 24, 8) /* bc8 pad8 uint16 addr32 */ \ 80 V(CHECK_NOT_4_CHARS, 25, 12) /* bc8 pad24 uint32 addr32 */ \ 81 V(CHECK_NOT_CHAR, 26, 8) /* bc8 pad8 uint16 addr32 */ \ 82 V(AND_CHECK_4_CHARS, 27, 16) /* bc8 pad24 uint32 uint32 addr32 */ \ 83 /* Checks if the current character combined with mask (bitwise and) */ \ 84 /* matches a character (e.g. used when two characters in a disjunction */ \ 85 /* differ by only a single bit */ \ 86 /* Bit Layout: */ \ 87 /* 0x00 - 0x07: 0x1c (fixed) Bytecode */ \ 88 /* 0x08 - 0x0F: 0x00 (unused) Padding */ \ 89 /* 0x10 - 0x1F: Character to match against (after mask aplied) */ \ 90 /* 0x20 - 0x3F: Bitmask bitwise and combined with current character */ \ 91 /* 0x40 - 0x5F: Address of bytecode when matched */ \ 92 V(AND_CHECK_CHAR, 28, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ 93 V(AND_CHECK_NOT_4_CHARS, 29, 16) /* bc8 pad24 uint32 uint32 addr32 */ \ 94 V(AND_CHECK_NOT_CHAR, 30, 12) /* bc8 pad8 uint16 uint32 addr32 */ \ 95 V(MINUS_AND_CHECK_NOT_CHAR, 31, \ 96 12) /* bc8 pad8 base::uc16 base::uc16 base::uc16 addr32 */ \ 97 V(CHECK_CHAR_IN_RANGE, 32, 12) /* bc8 pad24 base::uc16 base::uc16 addr32 */ \ 98 V(CHECK_CHAR_NOT_IN_RANGE, 33, \ 99 12) /* bc8 pad24 base::uc16 base::uc16 addr32 */ \ 100 /* Checks if the current character matches any of the characters encoded */ \ 101 /* in a bit table. Similar to/inspired by boyer moore string search */ \ 102 /* Bit Layout: */ \ 103 /* 0x00 - 0x07: 0x22 (fixed) Bytecode */ \ 104 /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ 105 /* 0x20 - 0x3F: Address of bytecode when bit is set */ \ 106 /* 0x40 - 0xBF: Bit table */ \ 107 V(CHECK_BIT_IN_TABLE, 34, 24) /* bc8 pad24 addr32 bits128 */ \ 108 V(CHECK_LT, 35, 8) /* bc8 pad8 base::uc16 addr32 */ \ 109 V(CHECK_GT, 36, 8) /* bc8 pad8 base::uc16 addr32 */ \ 110 V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \ 111 V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \ 112 V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE, 39, 8) \ 113 V(CHECK_NOT_BACK_REF_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \ 114 V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 41, 8) /* bc8 reg_idx24 addr32 */ \ 115 V(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD, 42, 8) \ 116 V(CHECK_NOT_REGS_EQUAL, 43, 12) /* bc8 regidx24 reg_idx32 addr32 */ \ 117 V(CHECK_REGISTER_LT, 44, 12) /* bc8 reg_idx24 value32 addr32 */ \ 118 V(CHECK_REGISTER_GE, 45, 12) /* bc8 reg_idx24 value32 addr32 */ \ 119 V(CHECK_REGISTER_EQ_POS, 46, 8) /* bc8 reg_idx24 addr32 */ \ 120 V(CHECK_AT_START, 47, 8) /* bc8 pad24 addr32 */ \ 121 V(CHECK_NOT_AT_START, 48, 8) /* bc8 offset24 addr32 */ \ 122 /* Checks if the current position matches top of backtrack stack */ \ 123 /* Bit Layout: */ \ 124 /* 0x00 - 0x07: 0x31 (fixed) Bytecode */ \ 125 /* 0x08 - 0x1F: 0x00 (unused) Padding */ \ 126 /* 0x20 - 0x3F: Address of bytecode when current matches tos */ \ 127 V(CHECK_GREEDY, 49, 8) /* bc8 pad24 addr32 */ \ 128 /* Advance character pointer by given offset and jump to another bytecode.*/ \ 129 /* Bit Layout: */ \ 130 /* 0x00 - 0x07: 0x32 (fixed) Bytecode */ \ 131 /* 0x08 - 0x1F: Number of characters to advance */ \ 132 /* 0x20 - 0x3F: Address of bytecode to jump to */ \ 133 V(ADVANCE_CP_AND_GOTO, 50, 8) /* bc8 offset24 addr32 */ \ 134 V(SET_CURRENT_POSITION_FROM_END, 51, 4) /* bc8 idx24 */ \ 135 /* Checks if current position + given offset is in range. */ \ 136 /* Bit Layout: */ \ 137 /* 0x00 - 0x07: 0x34 (fixed) Bytecode */ \ 138 /* 0x08 - 0x1F: Offset from current position */ \ 139 /* 0x20 - 0x3F: Address of bytecode when position is out of range */ \ 140 V(CHECK_CURRENT_POSITION, 52, 8) /* bc8 idx24 addr32 */ \ 141 /* Combination of: */ \ 142 /* LOAD_CURRENT_CHAR, CHECK_BIT_IN_TABLE and ADVANCE_CP_AND_GOTO */ \ 143 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 144 /* Bit Layout: */ \ 145 /* 0x00 - 0x07 0x35 (fixed) Bytecode */ \ 146 /* 0x08 - 0x1F Load character offset from current position */ \ 147 /* 0x20 - 0x3F Number of characters to advance */ \ 148 /* 0x40 - 0xBF Bit Table */ \ 149 /* 0xC0 - 0xDF Address of bytecode when character is matched */ \ 150 /* 0xE0 - 0xFF Address of bytecode when no match */ \ 151 V(SKIP_UNTIL_BIT_IN_TABLE, 53, 32) \ 152 /* Combination of: */ \ 153 /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, AND_CHECK_CHAR */ \ 154 /* and ADVANCE_CP_AND_GOTO */ \ 155 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 156 /* Bit Layout: */ \ 157 /* 0x00 - 0x07 0x36 (fixed) Bytecode */ \ 158 /* 0x08 - 0x1F Load character offset from current position */ \ 159 /* 0x20 - 0x2F Number of characters to advance */ \ 160 /* 0x30 - 0x3F Character to match against (after mask applied) */ \ 161 /* 0x40 - 0x5F: Bitmask bitwise and combined with current character */ \ 162 /* 0x60 - 0x7F Minimum number of characters this pattern consumes */ \ 163 /* 0x80 - 0x9F Address of bytecode when character is matched */ \ 164 /* 0xA0 - 0xBF Address of bytecode when no match */ \ 165 V(SKIP_UNTIL_CHAR_AND, 54, 24) \ 166 /* Combination of: */ \ 167 /* LOAD_CURRENT_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \ 168 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 169 /* Bit Layout: */ \ 170 /* 0x00 - 0x07 0x37 (fixed) Bytecode */ \ 171 /* 0x08 - 0x1F Load character offset from current position */ \ 172 /* 0x20 - 0x2F Number of characters to advance */ \ 173 /* 0x30 - 0x3F Character to match */ \ 174 /* 0x40 - 0x5F Address of bytecode when character is matched */ \ 175 /* 0x60 - 0x7F Address of bytecode when no match */ \ 176 V(SKIP_UNTIL_CHAR, 55, 16) \ 177 /* Combination of: */ \ 178 /* CHECK_CURRENT_POSITION, LOAD_CURRENT_CHAR_UNCHECKED, CHECK_CHAR */ \ 179 /* and ADVANCE_CP_AND_GOTO */ \ 180 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 181 /* Bit Layout: */ \ 182 /* 0x00 - 0x07 0x38 (fixed) Bytecode */ \ 183 /* 0x08 - 0x1F Load character offset from current position */ \ 184 /* 0x20 - 0x2F Number of characters to advance */ \ 185 /* 0x30 - 0x3F Character to match */ \ 186 /* 0x40 - 0x5F Minimum number of characters this pattern consumes */ \ 187 /* 0x60 - 0x7F Address of bytecode when character is matched */ \ 188 /* 0x80 - 0x9F Address of bytecode when no match */ \ 189 V(SKIP_UNTIL_CHAR_POS_CHECKED, 56, 20) \ 190 /* Combination of: */ \ 191 /* LOAD_CURRENT_CHAR, CHECK_CHAR, CHECK_CHAR and ADVANCE_CP_AND_GOTO */ \ 192 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 193 /* Bit Layout: */ \ 194 /* 0x00 - 0x07 0x39 (fixed) Bytecode */ \ 195 /* 0x08 - 0x1F Load character offset from current position */ \ 196 /* 0x20 - 0x3F Number of characters to advance */ \ 197 /* 0x40 - 0x4F Character to match */ \ 198 /* 0x50 - 0x5F Other Character to match */ \ 199 /* 0x60 - 0x7F Address of bytecode when either character is matched */ \ 200 /* 0x80 - 0x9F Address of bytecode when no match */ \ 201 V(SKIP_UNTIL_CHAR_OR_CHAR, 57, 20) \ 202 /* Combination of: */ \ 203 /* LOAD_CURRENT_CHAR, CHECK_GT, CHECK_BIT_IN_TABLE, GOTO and */ \ 204 /* and ADVANCE_CP_AND_GOTO */ \ 205 /* Emitted by RegExpBytecodePeepholeOptimization. */ \ 206 /* Bit Layout: */ \ 207 /* 0x00 - 0x07 0x3A (fixed) Bytecode */ \ 208 /* 0x08 - 0x1F Load character offset from current position */ \ 209 /* 0x20 - 0x2F Number of characters to advance */ \ 210 /* 0x30 - 0x3F Character to check if it is less than current char */ \ 211 /* 0x40 - 0xBF Bit Table */ \ 212 /* 0xC0 - 0xDF Address of bytecode when character is matched */ \ 213 /* 0xE0 - 0xFF Address of bytecode when no match */ \ 214 V(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE, 58, 32) 215 216#define COUNT(...) +1 217static constexpr int kRegExpBytecodeCount = BYTECODE_ITERATOR(COUNT); 218#undef COUNT 219 220// Just making sure we assigned values above properly. They should be 221// contiguous, strictly increasing, and start at 0. 222// TODO(jgruber): Do not explicitly assign values, instead generate them 223// implicitly from the list order. 224STATIC_ASSERT(kRegExpBytecodeCount == 59); 225 226#define DECLARE_BYTECODES(name, code, length) \ 227 static constexpr int BC_##name = code; 228BYTECODE_ITERATOR(DECLARE_BYTECODES) 229#undef DECLARE_BYTECODES 230 231static constexpr int kRegExpBytecodeLengths[] = { 232#define DECLARE_BYTECODE_LENGTH(name, code, length) length, 233 BYTECODE_ITERATOR(DECLARE_BYTECODE_LENGTH) 234#undef DECLARE_BYTECODE_LENGTH 235}; 236 237inline constexpr int RegExpBytecodeLength(int bytecode) { 238 DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1)); 239 return kRegExpBytecodeLengths[bytecode]; 240} 241 242static constexpr const char* const kRegExpBytecodeNames[] = { 243#define DECLARE_BYTECODE_NAME(name, ...) #name, 244 BYTECODE_ITERATOR(DECLARE_BYTECODE_NAME) 245#undef DECLARE_BYTECODE_NAME 246}; 247 248inline constexpr const char* RegExpBytecodeName(int bytecode) { 249 DCHECK(base::IsInRange(bytecode, 0, kRegExpBytecodeCount - 1)); 250 return kRegExpBytecodeNames[bytecode]; 251} 252 253void RegExpBytecodeDisassembleSingle(const byte* code_base, const byte* pc); 254void RegExpBytecodeDisassemble(const byte* code_base, int length, 255 const char* pattern); 256 257} // namespace internal 258} // namespace v8 259 260#endif // V8_REGEXP_REGEXP_BYTECODES_H_ 261