1// Copyright 2016 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/regexp/regexp-ast.h" 6#include "src/utils/ostreams.h" 7 8namespace v8 { 9namespace internal { 10 11#define MAKE_ACCEPT(Name) \ 12 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ 13 return visitor->Visit##Name(this, data); \ 14 } 15FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) 16#undef MAKE_ACCEPT 17 18#define MAKE_TYPE_CASE(Name) \ 19 RegExp##Name* RegExpTree::As##Name() { return nullptr; } \ 20 bool RegExpTree::Is##Name() { return false; } 21FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) 22#undef MAKE_TYPE_CASE 23 24#define MAKE_TYPE_CASE(Name) \ 25 RegExp##Name* RegExp##Name::As##Name() { return this; } \ 26 bool RegExp##Name::Is##Name() { return true; } 27FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) 28#undef MAKE_TYPE_CASE 29 30namespace { 31 32Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) { 33 Interval result = Interval::Empty(); 34 for (int i = 0; i < children->length(); i++) 35 result = result.Union(children->at(i)->CaptureRegisters()); 36 return result; 37} 38 39} // namespace 40 41Interval RegExpAlternative::CaptureRegisters() { 42 return ListCaptureRegisters(nodes()); 43} 44 45 46Interval RegExpDisjunction::CaptureRegisters() { 47 return ListCaptureRegisters(alternatives()); 48} 49 50 51Interval RegExpLookaround::CaptureRegisters() { 52 return body()->CaptureRegisters(); 53} 54 55 56Interval RegExpCapture::CaptureRegisters() { 57 Interval self(StartRegister(index()), EndRegister(index())); 58 return self.Union(body()->CaptureRegisters()); 59} 60 61 62Interval RegExpQuantifier::CaptureRegisters() { 63 return body()->CaptureRegisters(); 64} 65 66 67bool RegExpAssertion::IsAnchoredAtStart() { 68 return assertion_type() == RegExpAssertion::Type::START_OF_INPUT; 69} 70 71 72bool RegExpAssertion::IsAnchoredAtEnd() { 73 return assertion_type() == RegExpAssertion::Type::END_OF_INPUT; 74} 75 76 77bool RegExpAlternative::IsAnchoredAtStart() { 78 ZoneList<RegExpTree*>* nodes = this->nodes(); 79 for (int i = 0; i < nodes->length(); i++) { 80 RegExpTree* node = nodes->at(i); 81 if (node->IsAnchoredAtStart()) { 82 return true; 83 } 84 if (node->max_match() > 0) { 85 return false; 86 } 87 } 88 return false; 89} 90 91 92bool RegExpAlternative::IsAnchoredAtEnd() { 93 ZoneList<RegExpTree*>* nodes = this->nodes(); 94 for (int i = nodes->length() - 1; i >= 0; i--) { 95 RegExpTree* node = nodes->at(i); 96 if (node->IsAnchoredAtEnd()) { 97 return true; 98 } 99 if (node->max_match() > 0) { 100 return false; 101 } 102 } 103 return false; 104} 105 106 107bool RegExpDisjunction::IsAnchoredAtStart() { 108 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 109 for (int i = 0; i < alternatives->length(); i++) { 110 if (!alternatives->at(i)->IsAnchoredAtStart()) return false; 111 } 112 return true; 113} 114 115 116bool RegExpDisjunction::IsAnchoredAtEnd() { 117 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 118 for (int i = 0; i < alternatives->length(); i++) { 119 if (!alternatives->at(i)->IsAnchoredAtEnd()) return false; 120 } 121 return true; 122} 123 124 125bool RegExpLookaround::IsAnchoredAtStart() { 126 return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart(); 127} 128 129 130bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); } 131 132 133bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); } 134 135namespace { 136 137// Convert regular expression trees to a simple sexp representation. 138// This representation should be different from the input grammar 139// in as many cases as possible, to make it more difficult for incorrect 140// parses to look as correct ones which is likely if the input and 141// output formats are alike. 142class RegExpUnparser final : public RegExpVisitor { 143 public: 144 RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {} 145 void VisitCharacterRange(CharacterRange that); 146#define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override; 147 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) 148#undef MAKE_CASE 149 private: 150 std::ostream& os_; 151 Zone* zone_; 152}; 153 154} // namespace 155 156void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { 157 os_ << "(|"; 158 for (int i = 0; i < that->alternatives()->length(); i++) { 159 os_ << " "; 160 that->alternatives()->at(i)->Accept(this, data); 161 } 162 os_ << ")"; 163 return nullptr; 164} 165 166 167void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { 168 os_ << "(:"; 169 for (int i = 0; i < that->nodes()->length(); i++) { 170 os_ << " "; 171 that->nodes()->at(i)->Accept(this, data); 172 } 173 os_ << ")"; 174 return nullptr; 175} 176 177 178void RegExpUnparser::VisitCharacterRange(CharacterRange that) { 179 os_ << AsUC32(that.from()); 180 if (!that.IsSingleton()) { 181 os_ << "-" << AsUC32(that.to()); 182 } 183} 184 185 186void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, 187 void* data) { 188 if (that->is_negated()) os_ << "^"; 189 os_ << "["; 190 for (int i = 0; i < that->ranges(zone_)->length(); i++) { 191 if (i > 0) os_ << " "; 192 VisitCharacterRange(that->ranges(zone_)->at(i)); 193 } 194 os_ << "]"; 195 return nullptr; 196} 197 198 199void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { 200 switch (that->assertion_type()) { 201 case RegExpAssertion::Type::START_OF_INPUT: 202 os_ << "@^i"; 203 break; 204 case RegExpAssertion::Type::END_OF_INPUT: 205 os_ << "@$i"; 206 break; 207 case RegExpAssertion::Type::START_OF_LINE: 208 os_ << "@^l"; 209 break; 210 case RegExpAssertion::Type::END_OF_LINE: 211 os_ << "@$l"; 212 break; 213 case RegExpAssertion::Type::BOUNDARY: 214 os_ << "@b"; 215 break; 216 case RegExpAssertion::Type::NON_BOUNDARY: 217 os_ << "@B"; 218 break; 219 } 220 return nullptr; 221} 222 223 224void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { 225 os_ << "'"; 226 base::Vector<const base::uc16> chardata = that->data(); 227 for (int i = 0; i < chardata.length(); i++) { 228 os_ << AsUC16(chardata[i]); 229 } 230 os_ << "'"; 231 return nullptr; 232} 233 234 235void* RegExpUnparser::VisitText(RegExpText* that, void* data) { 236 if (that->elements()->length() == 1) { 237 that->elements()->at(0).tree()->Accept(this, data); 238 } else { 239 os_ << "(!"; 240 for (int i = 0; i < that->elements()->length(); i++) { 241 os_ << " "; 242 that->elements()->at(i).tree()->Accept(this, data); 243 } 244 os_ << ")"; 245 } 246 return nullptr; 247} 248 249 250void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { 251 os_ << "(# " << that->min() << " "; 252 if (that->max() == RegExpTree::kInfinity) { 253 os_ << "- "; 254 } else { 255 os_ << that->max() << " "; 256 } 257 os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); 258 that->body()->Accept(this, data); 259 os_ << ")"; 260 return nullptr; 261} 262 263 264void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { 265 os_ << "(^ "; 266 that->body()->Accept(this, data); 267 os_ << ")"; 268 return nullptr; 269} 270 271void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) { 272 os_ << "(?: "; 273 that->body()->Accept(this, data); 274 os_ << ")"; 275 return nullptr; 276} 277 278void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) { 279 os_ << "("; 280 os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-"); 281 os_ << (that->is_positive() ? " + " : " - "); 282 that->body()->Accept(this, data); 283 os_ << ")"; 284 return nullptr; 285} 286 287 288void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, 289 void* data) { 290 os_ << "(<- " << that->index() << ")"; 291 return nullptr; 292} 293 294 295void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) { 296 os_ << '%'; 297 return nullptr; 298} 299 300std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { 301 RegExpUnparser unparser(os, zone); 302 Accept(&unparser, nullptr); 303 return os; 304} 305 306RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) 307 : alternatives_(alternatives) { 308 DCHECK_LT(1, alternatives->length()); 309 RegExpTree* first_alternative = alternatives->at(0); 310 min_match_ = first_alternative->min_match(); 311 max_match_ = first_alternative->max_match(); 312 for (int i = 1; i < alternatives->length(); i++) { 313 RegExpTree* alternative = alternatives->at(i); 314 min_match_ = std::min(min_match_, alternative->min_match()); 315 max_match_ = std::max(max_match_, alternative->max_match()); 316 } 317} 318 319namespace { 320 321int IncreaseBy(int previous, int increase) { 322 if (RegExpTree::kInfinity - previous < increase) { 323 return RegExpTree::kInfinity; 324 } else { 325 return previous + increase; 326 } 327} 328 329} // namespace 330 331RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) 332 : nodes_(nodes) { 333 DCHECK_LT(1, nodes->length()); 334 min_match_ = 0; 335 max_match_ = 0; 336 for (int i = 0; i < nodes->length(); i++) { 337 RegExpTree* node = nodes->at(i); 338 int node_min_match = node->min_match(); 339 min_match_ = IncreaseBy(min_match_, node_min_match); 340 int node_max_match = node->max_match(); 341 max_match_ = IncreaseBy(max_match_, node_max_match); 342 } 343} 344 345 346} // namespace internal 347} // namespace v8 348