1// Copyright 2016 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/regexp/regexp-ast.h"
6#include "src/utils/ostreams.h"
7
8namespace v8 {
9namespace internal {
10
11#define MAKE_ACCEPT(Name)                                          \
12  void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
13    return visitor->Visit##Name(this, data);                       \
14  }
15FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
16#undef MAKE_ACCEPT
17
18#define MAKE_TYPE_CASE(Name)                               \
19  RegExp##Name* RegExpTree::As##Name() { return nullptr; } \
20  bool RegExpTree::Is##Name() { return false; }
21FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
22#undef MAKE_TYPE_CASE
23
24#define MAKE_TYPE_CASE(Name)                              \
25  RegExp##Name* RegExp##Name::As##Name() { return this; } \
26  bool RegExp##Name::Is##Name() { return true; }
27FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
28#undef MAKE_TYPE_CASE
29
30namespace {
31
32Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
33  Interval result = Interval::Empty();
34  for (int i = 0; i < children->length(); i++)
35    result = result.Union(children->at(i)->CaptureRegisters());
36  return result;
37}
38
39}  // namespace
40
41Interval RegExpAlternative::CaptureRegisters() {
42  return ListCaptureRegisters(nodes());
43}
44
45
46Interval RegExpDisjunction::CaptureRegisters() {
47  return ListCaptureRegisters(alternatives());
48}
49
50
51Interval RegExpLookaround::CaptureRegisters() {
52  return body()->CaptureRegisters();
53}
54
55
56Interval RegExpCapture::CaptureRegisters() {
57  Interval self(StartRegister(index()), EndRegister(index()));
58  return self.Union(body()->CaptureRegisters());
59}
60
61
62Interval RegExpQuantifier::CaptureRegisters() {
63  return body()->CaptureRegisters();
64}
65
66
67bool RegExpAssertion::IsAnchoredAtStart() {
68  return assertion_type() == RegExpAssertion::Type::START_OF_INPUT;
69}
70
71
72bool RegExpAssertion::IsAnchoredAtEnd() {
73  return assertion_type() == RegExpAssertion::Type::END_OF_INPUT;
74}
75
76
77bool RegExpAlternative::IsAnchoredAtStart() {
78  ZoneList<RegExpTree*>* nodes = this->nodes();
79  for (int i = 0; i < nodes->length(); i++) {
80    RegExpTree* node = nodes->at(i);
81    if (node->IsAnchoredAtStart()) {
82      return true;
83    }
84    if (node->max_match() > 0) {
85      return false;
86    }
87  }
88  return false;
89}
90
91
92bool RegExpAlternative::IsAnchoredAtEnd() {
93  ZoneList<RegExpTree*>* nodes = this->nodes();
94  for (int i = nodes->length() - 1; i >= 0; i--) {
95    RegExpTree* node = nodes->at(i);
96    if (node->IsAnchoredAtEnd()) {
97      return true;
98    }
99    if (node->max_match() > 0) {
100      return false;
101    }
102  }
103  return false;
104}
105
106
107bool RegExpDisjunction::IsAnchoredAtStart() {
108  ZoneList<RegExpTree*>* alternatives = this->alternatives();
109  for (int i = 0; i < alternatives->length(); i++) {
110    if (!alternatives->at(i)->IsAnchoredAtStart()) return false;
111  }
112  return true;
113}
114
115
116bool RegExpDisjunction::IsAnchoredAtEnd() {
117  ZoneList<RegExpTree*>* alternatives = this->alternatives();
118  for (int i = 0; i < alternatives->length(); i++) {
119    if (!alternatives->at(i)->IsAnchoredAtEnd()) return false;
120  }
121  return true;
122}
123
124
125bool RegExpLookaround::IsAnchoredAtStart() {
126  return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
127}
128
129
130bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
131
132
133bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
134
135namespace {
136
137// Convert regular expression trees to a simple sexp representation.
138// This representation should be different from the input grammar
139// in as many cases as possible, to make it more difficult for incorrect
140// parses to look as correct ones which is likely if the input and
141// output formats are alike.
142class RegExpUnparser final : public RegExpVisitor {
143 public:
144  RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {}
145  void VisitCharacterRange(CharacterRange that);
146#define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override;
147  FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE)
148#undef MAKE_CASE
149 private:
150  std::ostream& os_;
151  Zone* zone_;
152};
153
154}  // namespace
155
156void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
157  os_ << "(|";
158  for (int i = 0; i < that->alternatives()->length(); i++) {
159    os_ << " ";
160    that->alternatives()->at(i)->Accept(this, data);
161  }
162  os_ << ")";
163  return nullptr;
164}
165
166
167void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) {
168  os_ << "(:";
169  for (int i = 0; i < that->nodes()->length(); i++) {
170    os_ << " ";
171    that->nodes()->at(i)->Accept(this, data);
172  }
173  os_ << ")";
174  return nullptr;
175}
176
177
178void RegExpUnparser::VisitCharacterRange(CharacterRange that) {
179  os_ << AsUC32(that.from());
180  if (!that.IsSingleton()) {
181    os_ << "-" << AsUC32(that.to());
182  }
183}
184
185
186void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that,
187                                          void* data) {
188  if (that->is_negated()) os_ << "^";
189  os_ << "[";
190  for (int i = 0; i < that->ranges(zone_)->length(); i++) {
191    if (i > 0) os_ << " ";
192    VisitCharacterRange(that->ranges(zone_)->at(i));
193  }
194  os_ << "]";
195  return nullptr;
196}
197
198
199void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) {
200  switch (that->assertion_type()) {
201    case RegExpAssertion::Type::START_OF_INPUT:
202      os_ << "@^i";
203      break;
204    case RegExpAssertion::Type::END_OF_INPUT:
205      os_ << "@$i";
206      break;
207    case RegExpAssertion::Type::START_OF_LINE:
208      os_ << "@^l";
209      break;
210    case RegExpAssertion::Type::END_OF_LINE:
211      os_ << "@$l";
212      break;
213    case RegExpAssertion::Type::BOUNDARY:
214      os_ << "@b";
215      break;
216    case RegExpAssertion::Type::NON_BOUNDARY:
217      os_ << "@B";
218      break;
219  }
220  return nullptr;
221}
222
223
224void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
225  os_ << "'";
226  base::Vector<const base::uc16> chardata = that->data();
227  for (int i = 0; i < chardata.length(); i++) {
228    os_ << AsUC16(chardata[i]);
229  }
230  os_ << "'";
231  return nullptr;
232}
233
234
235void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
236  if (that->elements()->length() == 1) {
237    that->elements()->at(0).tree()->Accept(this, data);
238  } else {
239    os_ << "(!";
240    for (int i = 0; i < that->elements()->length(); i++) {
241      os_ << " ";
242      that->elements()->at(i).tree()->Accept(this, data);
243    }
244    os_ << ")";
245  }
246  return nullptr;
247}
248
249
250void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) {
251  os_ << "(# " << that->min() << " ";
252  if (that->max() == RegExpTree::kInfinity) {
253    os_ << "- ";
254  } else {
255    os_ << that->max() << " ";
256  }
257  os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n ");
258  that->body()->Accept(this, data);
259  os_ << ")";
260  return nullptr;
261}
262
263
264void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
265  os_ << "(^ ";
266  that->body()->Accept(this, data);
267  os_ << ")";
268  return nullptr;
269}
270
271void* RegExpUnparser::VisitGroup(RegExpGroup* that, void* data) {
272  os_ << "(?: ";
273  that->body()->Accept(this, data);
274  os_ << ")";
275  return nullptr;
276}
277
278void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
279  os_ << "(";
280  os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
281  os_ << (that->is_positive() ? " + " : " - ");
282  that->body()->Accept(this, data);
283  os_ << ")";
284  return nullptr;
285}
286
287
288void* RegExpUnparser::VisitBackReference(RegExpBackReference* that,
289                                         void* data) {
290  os_ << "(<- " << that->index() << ")";
291  return nullptr;
292}
293
294
295void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) {
296  os_ << '%';
297  return nullptr;
298}
299
300std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) {
301  RegExpUnparser unparser(os, zone);
302  Accept(&unparser, nullptr);
303  return os;
304}
305
306RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
307    : alternatives_(alternatives) {
308  DCHECK_LT(1, alternatives->length());
309  RegExpTree* first_alternative = alternatives->at(0);
310  min_match_ = first_alternative->min_match();
311  max_match_ = first_alternative->max_match();
312  for (int i = 1; i < alternatives->length(); i++) {
313    RegExpTree* alternative = alternatives->at(i);
314    min_match_ = std::min(min_match_, alternative->min_match());
315    max_match_ = std::max(max_match_, alternative->max_match());
316  }
317}
318
319namespace {
320
321int IncreaseBy(int previous, int increase) {
322  if (RegExpTree::kInfinity - previous < increase) {
323    return RegExpTree::kInfinity;
324  } else {
325    return previous + increase;
326  }
327}
328
329}  // namespace
330
331RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
332    : nodes_(nodes) {
333  DCHECK_LT(1, nodes->length());
334  min_match_ = 0;
335  max_match_ = 0;
336  for (int i = 0; i < nodes->length(); i++) {
337    RegExpTree* node = nodes->at(i);
338    int node_min_match = node->min_match();
339    min_match_ = IncreaseBy(min_match_, node_min_match);
340    int node_max_match = node->max_match();
341    max_match_ = IncreaseBy(max_match_, node_max_match);
342  }
343}
344
345
346}  // namespace internal
347}  // namespace v8
348